drm/xe: Introduce a new DRM driver for Intel GPUs

+1

Documentation/gpu/drivers.rst

··· 18 18 vkms 19 19 bridge/dw-hdmi 20 20 xen-front 21 + xe/index 21 22 afbc 22 23 komeda-kms 23 24 panfrost

+23

Documentation/gpu/xe/index.rst

··· 1 + .. SPDX-License-Identifier: (GPL-2.0+ OR MIT) 2 + 3 + ======================= 4 + drm/xe Intel GFX Driver 5 + ======================= 6 + 7 + The drm/xe driver supports some future GFX cards with rendering, display, 8 + compute and media. Support for currently available platforms like TGL, ADL, 9 + DG2, etc is provided to prototype the driver. 10 + 11 + .. toctree:: 12 + :titlesonly: 13 + 14 + xe_mm 15 + xe_map 16 + xe_migrate 17 + xe_cs 18 + xe_pm 19 + xe_pcode 20 + xe_gt_mcr 21 + xe_wa 22 + xe_rtp 23 + xe_firmware

+8

Documentation/gpu/xe/xe_cs.rst

··· 1 + .. SPDX-License-Identifier: (GPL-2.0+ OR MIT) 2 + 3 + ================== 4 + Command submission 5 + ================== 6 + 7 + .. kernel-doc:: drivers/gpu/drm/xe/xe_exec.c 8 + :doc: Execbuf (User GPU command submission)

+34

Documentation/gpu/xe/xe_firmware.rst

··· 1 + .. SPDX-License-Identifier: (GPL-2.0+ OR MIT) 2 + 3 + ======== 4 + Firmware 5 + ======== 6 + 7 + Firmware Layout 8 + =============== 9 + 10 + .. kernel-doc:: drivers/gpu/drm/xe/xe_uc_fw_abi.h 11 + :doc: Firmware Layout 12 + 13 + Write Once Protected Content Memory (WOPCM) Layout 14 + ================================================== 15 + 16 + .. kernel-doc:: drivers/gpu/drm/xe/xe_wopcm.c 17 + :doc: Write Once Protected Content Memory (WOPCM) Layout 18 + 19 + GuC CTB Blob 20 + ============ 21 + 22 + .. kernel-doc:: drivers/gpu/drm/xe/xe_guc_ct.c 23 + :doc: GuC CTB Blob 24 + 25 + GuC Power Conservation (PC) 26 + =========================== 27 + 28 + .. kernel-doc:: drivers/gpu/drm/xe/xe_guc_pc.c 29 + :doc: GuC Power Conservation (PC) 30 + 31 + Internal API 32 + ============ 33 + 34 + TODO

+13

Documentation/gpu/xe/xe_gt_mcr.rst

··· 1 + .. SPDX-License-Identifier: (GPL-2.0+ OR MIT) 2 + 3 + ============================================== 4 + GT Multicast/Replicated (MCR) Register Support 5 + ============================================== 6 + 7 + .. kernel-doc:: drivers/gpu/drm/xe/xe_gt_mcr.c 8 + :doc: GT Multicast/Replicated (MCR) Register Support 9 + 10 + Internal API 11 + ============ 12 + 13 + TODO

+8

Documentation/gpu/xe/xe_map.rst

··· 1 + .. SPDX-License-Identifier: (GPL-2.0+ OR MIT) 2 + 3 + ========= 4 + Map Layer 5 + ========= 6 + 7 + .. kernel-doc:: drivers/gpu/drm/xe/xe_map.h 8 + :doc: Map layer

+8

Documentation/gpu/xe/xe_migrate.rst

··· 1 + .. SPDX-License-Identifier: (GPL-2.0+ OR MIT) 2 + 3 + ============= 4 + Migrate Layer 5 + ============= 6 + 7 + .. kernel-doc:: drivers/gpu/drm/xe/xe_migrate_doc.h 8 + :doc: Migrate Layer

+14

Documentation/gpu/xe/xe_mm.rst

··· 1 + .. SPDX-License-Identifier: (GPL-2.0+ OR MIT) 2 + 3 + ================= 4 + Memory Management 5 + ================= 6 + 7 + .. kernel-doc:: drivers/gpu/drm/xe/xe_bo_doc.h 8 + :doc: Buffer Objects (BO) 9 + 10 + Pagetable building 11 + ================== 12 + 13 + .. kernel-doc:: drivers/gpu/drm/xe/xe_pt.c 14 + :doc: Pagetable building

+14

Documentation/gpu/xe/xe_pcode.rst

··· 1 + .. SPDX-License-Identifier: (GPL-2.0+ OR MIT) 2 + 3 + ===== 4 + Pcode 5 + ===== 6 + 7 + .. kernel-doc:: drivers/gpu/drm/xe/xe_pcode.c 8 + :doc: PCODE 9 + 10 + Internal API 11 + ============ 12 + 13 + .. kernel-doc:: drivers/gpu/drm/xe/xe_pcode.c 14 + :internal:

+14

Documentation/gpu/xe/xe_pm.rst

··· 1 + .. SPDX-License-Identifier: (GPL-2.0+ OR MIT) 2 + 3 + ======================== 4 + Runtime Power Management 5 + ======================== 6 + 7 + .. kernel-doc:: drivers/gpu/drm/xe/xe_pm.c 8 + :doc: Xe Power Management 9 + 10 + Internal API 11 + ============ 12 + 13 + .. kernel-doc:: drivers/gpu/drm/xe/xe_pm.c 14 + :internal:

+20

Documentation/gpu/xe/xe_rtp.rst

··· 1 + .. SPDX-License-Identifier: (GPL-2.0+ OR MIT) 2 + 3 + ========================= 4 + Register Table Processing 5 + ========================= 6 + 7 + .. kernel-doc:: drivers/gpu/drm/xe/xe_rtp.c 8 + :doc: Register Table Processing 9 + 10 + Internal API 11 + ============ 12 + 13 + .. kernel-doc:: drivers/gpu/drm/xe/xe_rtp_types.h 14 + :internal: 15 + 16 + .. kernel-doc:: drivers/gpu/drm/xe/xe_rtp.h 17 + :internal: 18 + 19 + .. kernel-doc:: drivers/gpu/drm/xe/xe_rtp.c 20 + :internal:

+14

Documentation/gpu/xe/xe_wa.rst

··· 1 + .. SPDX-License-Identifier: (GPL-2.0+ OR MIT) 2 + 3 + ==================== 4 + Hardware workarounds 5 + ==================== 6 + 7 + .. kernel-doc:: drivers/gpu/drm/xe/xe_wa.c 8 + :doc: Hardware workarounds 9 + 10 + Internal API 11 + ============ 12 + 13 + .. kernel-doc:: drivers/gpu/drm/xe/xe_wa.c 14 + :internal:

+2

drivers/gpu/drm/Kconfig

··· 276 276 277 277 source "drivers/gpu/drm/i915/Kconfig" 278 278 279 + source "drivers/gpu/drm/xe/Kconfig" 280 + 279 281 source "drivers/gpu/drm/kmb/Kconfig" 280 282 281 283 config DRM_VGEM

+1

drivers/gpu/drm/Makefile

··· 134 134 obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/ 135 135 obj-$(CONFIG_DRM_AMDGPU)+= amd/amdxcp/ 136 136 obj-$(CONFIG_DRM_I915) += i915/ 137 + obj-$(CONFIG_DRM_XE) += xe/ 137 138 obj-$(CONFIG_DRM_KMB_DISPLAY) += kmb/ 138 139 obj-$(CONFIG_DRM_MGAG200) += mgag200/ 139 140 obj-$(CONFIG_DRM_V3D) += v3d/

+2

drivers/gpu/drm/xe/.gitignore

··· 1 + # SPDX-License-Identifier: GPL-2.0-only 2 + *.hdrtest

+63

drivers/gpu/drm/xe/Kconfig

··· 1 + # SPDX-License-Identifier: GPL-2.0-only 2 + config DRM_XE 3 + tristate "Intel Xe Graphics" 4 + depends on DRM && PCI && MMU 5 + select INTERVAL_TREE 6 + # we need shmfs for the swappable backing store, and in particular 7 + # the shmem_readpage() which depends upon tmpfs 8 + select SHMEM 9 + select TMPFS 10 + select DRM_BUDDY 11 + select DRM_KMS_HELPER 12 + select DRM_PANEL 13 + select DRM_SUBALLOC_HELPER 14 + select RELAY 15 + select IRQ_WORK 16 + select SYNC_FILE 17 + select IOSF_MBI 18 + select CRC32 19 + select SND_HDA_I915 if SND_HDA_CORE 20 + select CEC_CORE if CEC_NOTIFIER 21 + select VMAP_PFN 22 + select DRM_TTM 23 + select DRM_TTM_HELPER 24 + select DRM_SCHED 25 + select MMU_NOTIFIER 26 + help 27 + Experimental driver for Intel Xe series GPUs 28 + 29 + If "M" is selected, the module will be called xe. 30 + 31 + config DRM_XE_FORCE_PROBE 32 + string "Force probe xe for selected Intel hardware IDs" 33 + depends on DRM_XE 34 + help 35 + This is the default value for the xe.force_probe module 36 + parameter. Using the module parameter overrides this option. 37 + 38 + Force probe the xe for Intel graphics devices that are 39 + recognized but not properly supported by this kernel version. It is 40 + recommended to upgrade to a kernel version with proper support as soon 41 + as it is available. 42 + 43 + It can also be used to block the probe of recognized and fully 44 + supported devices. 45 + 46 + Use "" to disable force probe. If in doubt, use this. 47 + 48 + Use "<pci-id>[,<pci-id>,...]" to force probe the xe for listed 49 + devices. For example, "4500" or "4500,4571". 50 + 51 + Use "*" to force probe the driver for all known devices. 52 + 53 + Use "!" right before the ID to block the probe of the device. For 54 + example, "4500,!4571" forces the probe of 4500 and blocks the probe of 55 + 4571. 56 + 57 + Use "!*" to block the probe of the driver for all known devices. 58 + 59 + menu "drm/Xe Debugging" 60 + depends on DRM_XE 61 + depends on EXPERT 62 + source "drivers/gpu/drm/xe/Kconfig.debug" 63 + endmenu

+96

drivers/gpu/drm/xe/Kconfig.debug

··· 1 + # SPDX-License-Identifier: GPL-2.0-only 2 + config DRM_XE_WERROR 3 + bool "Force GCC to throw an error instead of a warning when compiling" 4 + # As this may inadvertently break the build, only allow the user 5 + # to shoot oneself in the foot iff they aim really hard 6 + depends on EXPERT 7 + # We use the dependency on !COMPILE_TEST to not be enabled in 8 + # allmodconfig or allyesconfig configurations 9 + depends on !COMPILE_TEST 10 + default n 11 + help 12 + Add -Werror to the build flags for (and only for) xe.ko. 13 + Do not enable this unless you are writing code for the xe.ko module. 14 + 15 + Recommended for driver developers only. 16 + 17 + If in doubt, say "N". 18 + 19 + config DRM_XE_DEBUG 20 + bool "Enable additional driver debugging" 21 + depends on DRM_XE 22 + depends on EXPERT 23 + depends on !COMPILE_TEST 24 + default n 25 + help 26 + Choose this option to turn on extra driver debugging that may affect 27 + performance but will catch some internal issues. 28 + 29 + Recommended for driver developers only. 30 + 31 + If in doubt, say "N". 32 + 33 + config DRM_XE_DEBUG_VM 34 + bool "Enable extra VM debugging info" 35 + default n 36 + help 37 + Enable extra VM debugging info 38 + 39 + Recommended for driver developers only. 40 + 41 + If in doubt, say "N". 42 + 43 + config DRM_XE_DEBUG_MEM 44 + bool "Enable passing SYS/LMEM addresses to user space" 45 + default n 46 + help 47 + Pass object location trough uapi. Intended for extended 48 + testing and development only. 49 + 50 + Recommended for driver developers only. 51 + 52 + If in doubt, say "N". 53 + 54 + config DRM_XE_SIMPLE_ERROR_CAPTURE 55 + bool "Enable simple error capture to dmesg on job timeout" 56 + default n 57 + help 58 + Choose this option when debugging an unexpected job timeout 59 + 60 + Recommended for driver developers only. 61 + 62 + If in doubt, say "N". 63 + 64 + config DRM_XE_KUNIT_TEST 65 + tristate "KUnit tests for the drm xe driver" if !KUNIT_ALL_TESTS 66 + depends on DRM_XE && KUNIT 67 + default KUNIT_ALL_TESTS 68 + select DRM_EXPORT_FOR_TESTS if m 69 + help 70 + Choose this option to allow the driver to perform selftests under 71 + the kunit framework 72 + 73 + Recommended for driver developers only. 74 + 75 + If in doubt, say "N". 76 + 77 + config DRM_XE_LARGE_GUC_BUFFER 78 + bool "Enable larger guc log buffer" 79 + default n 80 + help 81 + Choose this option when debugging guc issues. 82 + Buffer should be large enough for complex issues. 83 + 84 + Recommended for driver developers only. 85 + 86 + If in doubt, say "N". 87 + 88 + config DRM_XE_USERPTR_INVAL_INJECT 89 + bool "Inject userptr invalidation -EINVAL errors" 90 + default n 91 + help 92 + Choose this option when debugging error paths that 93 + are hit during checks for userptr invalidations. 94 + 95 + Recomended for driver developers only. 96 + If in doubt, say "N".

+121

drivers/gpu/drm/xe/Makefile

··· 1 + # SPDX-License-Identifier: GPL-2.0 2 + # 3 + # Makefile for the drm device driver. This driver provides support for the 4 + # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. 5 + 6 + # Add a set of useful warning flags and enable -Werror for CI to prevent 7 + # trivial mistakes from creeping in. We have to do this piecemeal as we reject 8 + # any patch that isn't warning clean, so turning on -Wall -Wextra (or W=1) we 9 + # need to filter out dubious warnings. Still it is our interest 10 + # to keep running locally with W=1 C=1 until we are completely clean. 11 + # 12 + # Note the danger in using -Wall -Wextra is that when CI updates gcc we 13 + # will most likely get a sudden build breakage... Hopefully we will fix 14 + # new warnings before CI updates! 15 + subdir-ccflags-y := -Wall -Wextra 16 + # making these call cc-disable-warning breaks when trying to build xe.mod.o 17 + # by calling make M=drivers/gpu/drm/xe. This doesn't happen in upstream tree, 18 + # so it was somehow fixed by the changes in the build system. Move it back to 19 + # $(call cc-disable-warning, ...) after rebase. 20 + subdir-ccflags-y += -Wno-unused-parameter 21 + subdir-ccflags-y += -Wno-type-limits 22 + #subdir-ccflags-y += $(call cc-disable-warning, unused-parameter) 23 + #subdir-ccflags-y += $(call cc-disable-warning, type-limits) 24 + subdir-ccflags-y += $(call cc-disable-warning, missing-field-initializers) 25 + subdir-ccflags-y += $(call cc-disable-warning, unused-but-set-variable) 26 + # clang warnings 27 + subdir-ccflags-y += $(call cc-disable-warning, sign-compare) 28 + subdir-ccflags-y += $(call cc-disable-warning, sometimes-uninitialized) 29 + subdir-ccflags-y += $(call cc-disable-warning, initializer-overrides) 30 + subdir-ccflags-y += $(call cc-disable-warning, frame-address) 31 + subdir-ccflags-$(CONFIG_DRM_XE_WERROR) += -Werror 32 + 33 + # Fine grained warnings disable 34 + CFLAGS_xe_pci.o = $(call cc-disable-warning, override-init) 35 + 36 + subdir-ccflags-y += -I$(srctree)/$(src) 37 + 38 + # Please keep these build lists sorted! 39 + 40 + # core driver code 41 + 42 + xe-y += xe_bb.o \ 43 + xe_bo.o \ 44 + xe_bo_evict.o \ 45 + xe_debugfs.o \ 46 + xe_device.o \ 47 + xe_dma_buf.o \ 48 + xe_engine.o \ 49 + xe_exec.o \ 50 + xe_execlist.o \ 51 + xe_force_wake.o \ 52 + xe_ggtt.o \ 53 + xe_gpu_scheduler.o \ 54 + xe_gt.o \ 55 + xe_gt_clock.o \ 56 + xe_gt_debugfs.o \ 57 + xe_gt_mcr.o \ 58 + xe_gt_pagefault.o \ 59 + xe_gt_sysfs.o \ 60 + xe_gt_topology.o \ 61 + xe_guc.o \ 62 + xe_guc_ads.o \ 63 + xe_guc_ct.o \ 64 + xe_guc_debugfs.o \ 65 + xe_guc_hwconfig.o \ 66 + xe_guc_log.o \ 67 + xe_guc_pc.o \ 68 + xe_guc_submit.o \ 69 + xe_hw_engine.o \ 70 + xe_hw_fence.o \ 71 + xe_huc.o \ 72 + xe_huc_debugfs.o \ 73 + xe_irq.o \ 74 + xe_lrc.o \ 75 + xe_migrate.o \ 76 + xe_mmio.o \ 77 + xe_mocs.o \ 78 + xe_module.o \ 79 + xe_pci.o \ 80 + xe_pcode.o \ 81 + xe_pm.o \ 82 + xe_preempt_fence.o \ 83 + xe_pt.o \ 84 + xe_pt_walk.o \ 85 + xe_query.o \ 86 + xe_reg_sr.o \ 87 + xe_reg_whitelist.o \ 88 + xe_rtp.o \ 89 + xe_ring_ops.o \ 90 + xe_sa.o \ 91 + xe_sched_job.o \ 92 + xe_step.o \ 93 + xe_sync.o \ 94 + xe_trace.o \ 95 + xe_ttm_gtt_mgr.o \ 96 + xe_ttm_vram_mgr.o \ 97 + xe_tuning.o \ 98 + xe_uc.o \ 99 + xe_uc_debugfs.o \ 100 + xe_uc_fw.o \ 101 + xe_vm.o \ 102 + xe_vm_madvise.o \ 103 + xe_wait_user_fence.o \ 104 + xe_wa.o \ 105 + xe_wopcm.o 106 + 107 + # XXX: Needed for i915 register definitions. Will be removed after xe-regs. 108 + subdir-ccflags-y += -I$(srctree)/drivers/gpu/drm/i915/ 109 + 110 + obj-$(CONFIG_DRM_XE) += xe.o 111 + obj-$(CONFIG_DRM_XE_KUNIT_TEST) += tests/ 112 + \ 113 + # header test 114 + always-$(CONFIG_DRM_XE_WERROR) += \ 115 + $(patsubst %.h,%.hdrtest, $(shell cd $(srctree)/$(src) && find * -name '*.h')) 116 + 117 + quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@) 118 + cmd_hdrtest = $(CC) -DHDRTEST $(filter-out $(CFLAGS_GCOV), $(c_flags)) -S -o /dev/null -x c /dev/null -include $<; touch $@ 119 + 120 + $(obj)/%.hdrtest: $(src)/%.h FORCE 121 + $(call if_changed_dep,hdrtest)

+219

drivers/gpu/drm/xe/abi/guc_actions_abi.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2014-2021 Intel Corporation 4 + */ 5 + 6 + #ifndef _ABI_GUC_ACTIONS_ABI_H 7 + #define _ABI_GUC_ACTIONS_ABI_H 8 + 9 + /** 10 + * DOC: HOST2GUC_SELF_CFG 11 + * 12 + * This message is used by Host KMD to setup of the `GuC Self Config KLVs`_. 13 + * 14 + * This message must be sent as `MMIO HXG Message`_. 15 + * 16 + * +---+-------+--------------------------------------------------------------+ 17 + * | | Bits | Description | 18 + * +===+=======+==============================================================+ 19 + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | 20 + * | +-------+--------------------------------------------------------------+ 21 + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | 22 + * | +-------+--------------------------------------------------------------+ 23 + * | | 27:16 | DATA0 = MBZ | 24 + * | +-------+--------------------------------------------------------------+ 25 + * | | 15:0 | ACTION = _`GUC_ACTION_HOST2GUC_SELF_CFG` = 0x0508 | 26 + * +---+-------+--------------------------------------------------------------+ 27 + * | 1 | 31:16 | **KLV_KEY** - KLV key, see `GuC Self Config KLVs`_ | 28 + * | +-------+--------------------------------------------------------------+ 29 + * | | 15:0 | **KLV_LEN** - KLV length | 30 + * | | | | 31 + * | | | - 32 bit KLV = 1 | 32 + * | | | - 64 bit KLV = 2 | 33 + * +---+-------+--------------------------------------------------------------+ 34 + * | 2 | 31:0 | **VALUE32** - Bits 31-0 of the KLV value | 35 + * +---+-------+--------------------------------------------------------------+ 36 + * | 3 | 31:0 | **VALUE64** - Bits 63-32 of the KLV value (**KLV_LEN** = 2) | 37 + * +---+-------+--------------------------------------------------------------+ 38 + * 39 + * +---+-------+--------------------------------------------------------------+ 40 + * | | Bits | Description | 41 + * +===+=======+==============================================================+ 42 + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ | 43 + * | +-------+--------------------------------------------------------------+ 44 + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | 45 + * | +-------+--------------------------------------------------------------+ 46 + * | | 27:0 | DATA0 = **NUM** - 1 if KLV was parsed, 0 if not recognized | 47 + * +---+-------+--------------------------------------------------------------+ 48 + */ 49 + #define GUC_ACTION_HOST2GUC_SELF_CFG 0x0508 50 + 51 + #define HOST2GUC_SELF_CFG_REQUEST_MSG_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 3u) 52 + #define HOST2GUC_SELF_CFG_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 53 + #define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY (0xffff << 16) 54 + #define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_LEN (0xffff << 0) 55 + #define HOST2GUC_SELF_CFG_REQUEST_MSG_2_VALUE32 GUC_HXG_REQUEST_MSG_n_DATAn 56 + #define HOST2GUC_SELF_CFG_REQUEST_MSG_3_VALUE64 GUC_HXG_REQUEST_MSG_n_DATAn 57 + 58 + #define HOST2GUC_SELF_CFG_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN 59 + #define HOST2GUC_SELF_CFG_RESPONSE_MSG_0_NUM GUC_HXG_RESPONSE_MSG_0_DATA0 60 + 61 + /** 62 + * DOC: HOST2GUC_CONTROL_CTB 63 + * 64 + * This H2G action allows Vf Host to enable or disable H2G and G2H `CT Buffer`_. 65 + * 66 + * This message must be sent as `MMIO HXG Message`_. 67 + * 68 + * +---+-------+--------------------------------------------------------------+ 69 + * | | Bits | Description | 70 + * +===+=======+==============================================================+ 71 + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | 72 + * | +-------+--------------------------------------------------------------+ 73 + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | 74 + * | +-------+--------------------------------------------------------------+ 75 + * | | 27:16 | DATA0 = MBZ | 76 + * | +-------+--------------------------------------------------------------+ 77 + * | | 15:0 | ACTION = _`GUC_ACTION_HOST2GUC_CONTROL_CTB` = 0x4509 | 78 + * +---+-------+--------------------------------------------------------------+ 79 + * | 1 | 31:0 | **CONTROL** - control `CTB based communication`_ | 80 + * | | | | 81 + * | | | - _`GUC_CTB_CONTROL_DISABLE` = 0 | 82 + * | | | - _`GUC_CTB_CONTROL_ENABLE` = 1 | 83 + * +---+-------+--------------------------------------------------------------+ 84 + * 85 + * +---+-------+--------------------------------------------------------------+ 86 + * | | Bits | Description | 87 + * +===+=======+==============================================================+ 88 + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ | 89 + * | +-------+--------------------------------------------------------------+ 90 + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | 91 + * | +-------+--------------------------------------------------------------+ 92 + * | | 27:0 | DATA0 = MBZ | 93 + * +---+-------+--------------------------------------------------------------+ 94 + */ 95 + #define GUC_ACTION_HOST2GUC_CONTROL_CTB 0x4509 96 + 97 + #define HOST2GUC_CONTROL_CTB_REQUEST_MSG_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 1u) 98 + #define HOST2GUC_CONTROL_CTB_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 99 + #define HOST2GUC_CONTROL_CTB_REQUEST_MSG_1_CONTROL GUC_HXG_REQUEST_MSG_n_DATAn 100 + #define GUC_CTB_CONTROL_DISABLE 0u 101 + #define GUC_CTB_CONTROL_ENABLE 1u 102 + 103 + #define HOST2GUC_CONTROL_CTB_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN 104 + #define HOST2GUC_CONTROL_CTB_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0 105 + 106 + /* legacy definitions */ 107 + 108 + enum xe_guc_action { 109 + XE_GUC_ACTION_DEFAULT = 0x0, 110 + XE_GUC_ACTION_REQUEST_PREEMPTION = 0x2, 111 + XE_GUC_ACTION_REQUEST_ENGINE_RESET = 0x3, 112 + XE_GUC_ACTION_ALLOCATE_DOORBELL = 0x10, 113 + XE_GUC_ACTION_DEALLOCATE_DOORBELL = 0x20, 114 + XE_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE = 0x30, 115 + XE_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x40, 116 + XE_GUC_ACTION_FORCE_LOG_BUFFER_FLUSH = 0x302, 117 + XE_GUC_ACTION_ENTER_S_STATE = 0x501, 118 + XE_GUC_ACTION_EXIT_S_STATE = 0x502, 119 + XE_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE = 0x506, 120 + XE_GUC_ACTION_SCHED_CONTEXT = 0x1000, 121 + XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET = 0x1001, 122 + XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002, 123 + XE_GUC_ACTION_SCHED_ENGINE_MODE_SET = 0x1003, 124 + XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE = 0x1004, 125 + XE_GUC_ACTION_SET_CONTEXT_PRIORITY = 0x1005, 126 + XE_GUC_ACTION_SET_CONTEXT_EXECUTION_QUANTUM = 0x1006, 127 + XE_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT = 0x1007, 128 + XE_GUC_ACTION_CONTEXT_RESET_NOTIFICATION = 0x1008, 129 + XE_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009, 130 + XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES = 0x100B, 131 + XE_GUC_ACTION_SETUP_PC_GUCRC = 0x3004, 132 + XE_GUC_ACTION_AUTHENTICATE_HUC = 0x4000, 133 + XE_GUC_ACTION_GET_HWCONFIG = 0x4100, 134 + XE_GUC_ACTION_REGISTER_CONTEXT = 0x4502, 135 + XE_GUC_ACTION_DEREGISTER_CONTEXT = 0x4503, 136 + XE_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505, 137 + XE_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506, 138 + XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600, 139 + XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601, 140 + XE_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507, 141 + XE_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A, 142 + XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR = 0x6000, 143 + XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC = 0x6002, 144 + XE_GUC_ACTION_PAGE_FAULT_RES_DESC = 0x6003, 145 + XE_GUC_ACTION_ACCESS_COUNTER_NOTIFY = 0x6004, 146 + XE_GUC_ACTION_TLB_INVALIDATION = 0x7000, 147 + XE_GUC_ACTION_TLB_INVALIDATION_DONE = 0x7001, 148 + XE_GUC_ACTION_TLB_INVALIDATION_ALL = 0x7002, 149 + XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION = 0x8002, 150 + XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE = 0x8003, 151 + XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED = 0x8004, 152 + XE_GUC_ACTION_NOTIFY_EXCEPTION = 0x8005, 153 + XE_GUC_ACTION_LIMIT 154 + }; 155 + 156 + enum xe_guc_rc_options { 157 + XE_GUCRC_HOST_CONTROL, 158 + XE_GUCRC_FIRMWARE_CONTROL, 159 + }; 160 + 161 + enum xe_guc_preempt_options { 162 + XE_GUC_PREEMPT_OPTION_DROP_WORK_Q = 0x4, 163 + XE_GUC_PREEMPT_OPTION_DROP_SUBMIT_Q = 0x8, 164 + }; 165 + 166 + enum xe_guc_report_status { 167 + XE_GUC_REPORT_STATUS_UNKNOWN = 0x0, 168 + XE_GUC_REPORT_STATUS_ACKED = 0x1, 169 + XE_GUC_REPORT_STATUS_ERROR = 0x2, 170 + XE_GUC_REPORT_STATUS_COMPLETE = 0x4, 171 + }; 172 + 173 + enum xe_guc_sleep_state_status { 174 + XE_GUC_SLEEP_STATE_SUCCESS = 0x1, 175 + XE_GUC_SLEEP_STATE_PREEMPT_TO_IDLE_FAILED = 0x2, 176 + XE_GUC_SLEEP_STATE_ENGINE_RESET_FAILED = 0x3 177 + #define XE_GUC_SLEEP_STATE_INVALID_MASK 0x80000000 178 + }; 179 + 180 + #define GUC_LOG_CONTROL_LOGGING_ENABLED (1 << 0) 181 + #define GUC_LOG_CONTROL_VERBOSITY_SHIFT 4 182 + #define GUC_LOG_CONTROL_VERBOSITY_MASK (0xF << GUC_LOG_CONTROL_VERBOSITY_SHIFT) 183 + #define GUC_LOG_CONTROL_DEFAULT_LOGGING (1 << 8) 184 + 185 + #define XE_GUC_TLB_INVAL_TYPE_SHIFT 0 186 + #define XE_GUC_TLB_INVAL_MODE_SHIFT 8 187 + /* Flush PPC or SMRO caches along with TLB invalidation request */ 188 + #define XE_GUC_TLB_INVAL_FLUSH_CACHE (1 << 31) 189 + 190 + enum xe_guc_tlb_invalidation_type { 191 + XE_GUC_TLB_INVAL_FULL = 0x0, 192 + XE_GUC_TLB_INVAL_PAGE_SELECTIVE = 0x1, 193 + XE_GUC_TLB_INVAL_PAGE_SELECTIVE_CTX = 0x2, 194 + XE_GUC_TLB_INVAL_GUC = 0x3, 195 + }; 196 + 197 + /* 198 + * 0: Heavy mode of Invalidation: 199 + * The pipeline of the engine(s) for which the invalidation is targeted to is 200 + * blocked, and all the in-flight transactions are guaranteed to be Globally 201 + * Observed before completing the TLB invalidation 202 + * 1: Lite mode of Invalidation: 203 + * TLBs of the targeted engine(s) are immediately invalidated. 204 + * In-flight transactions are NOT guaranteed to be Globally Observed before 205 + * completing TLB invalidation. 206 + * Light Invalidation Mode is to be used only when 207 + * it can be guaranteed (by SW) that the address translations remain invariant 208 + * for the in-flight transactions across the TLB invalidation. In other words, 209 + * this mode can be used when the TLB invalidation is intended to clear out the 210 + * stale cached translations that are no longer in use. Light Invalidation Mode 211 + * is much faster than the Heavy Invalidation Mode, as it does not wait for the 212 + * in-flight transactions to be GOd. 213 + */ 214 + enum xe_guc_tlb_inval_mode { 215 + XE_GUC_TLB_INVAL_MODE_HEAVY = 0x0, 216 + XE_GUC_TLB_INVAL_MODE_LITE = 0x1, 217 + }; 218 + 219 + #endif

+249

drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2021 Intel Corporation 4 + */ 5 + 6 + #ifndef _GUC_ACTIONS_SLPC_ABI_H_ 7 + #define _GUC_ACTIONS_SLPC_ABI_H_ 8 + 9 + #include <linux/types.h> 10 + 11 + /** 12 + * DOC: SLPC SHARED DATA STRUCTURE 13 + * 14 + * +----+------+--------------------------------------------------------------+ 15 + * | CL | Bytes| Description | 16 + * +====+======+==============================================================+ 17 + * | 1 | 0-3 | SHARED DATA SIZE | 18 + * | +------+--------------------------------------------------------------+ 19 + * | | 4-7 | GLOBAL STATE | 20 + * | +------+--------------------------------------------------------------+ 21 + * | | 8-11 | DISPLAY DATA ADDRESS | 22 + * | +------+--------------------------------------------------------------+ 23 + * | | 12:63| PADDING | 24 + * +----+------+--------------------------------------------------------------+ 25 + * | | 0:63 | PADDING(PLATFORM INFO) | 26 + * +----+------+--------------------------------------------------------------+ 27 + * | 3 | 0-3 | TASK STATE DATA | 28 + * + +------+--------------------------------------------------------------+ 29 + * | | 4:63 | PADDING | 30 + * +----+------+--------------------------------------------------------------+ 31 + * |4-21|0:1087| OVERRIDE PARAMS AND BIT FIELDS | 32 + * +----+------+--------------------------------------------------------------+ 33 + * | | | PADDING + EXTRA RESERVED PAGE | 34 + * +----+------+--------------------------------------------------------------+ 35 + */ 36 + 37 + /* 38 + * SLPC exposes certain parameters for global configuration by the host. 39 + * These are referred to as override parameters, because in most cases 40 + * the host will not need to modify the default values used by SLPC. 41 + * SLPC remembers the default values which allows the host to easily restore 42 + * them by simply unsetting the override. The host can set or unset override 43 + * parameters during SLPC (re-)initialization using the SLPC Reset event. 44 + * The host can also set or unset override parameters on the fly using the 45 + * Parameter Set and Parameter Unset events 46 + */ 47 + 48 + #define SLPC_MAX_OVERRIDE_PARAMETERS 256 49 + #define SLPC_OVERRIDE_BITFIELD_SIZE \ 50 + (SLPC_MAX_OVERRIDE_PARAMETERS / 32) 51 + 52 + #define SLPC_PAGE_SIZE_BYTES 4096 53 + #define SLPC_CACHELINE_SIZE_BYTES 64 54 + #define SLPC_SHARED_DATA_SIZE_BYTE_HEADER SLPC_CACHELINE_SIZE_BYTES 55 + #define SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO SLPC_CACHELINE_SIZE_BYTES 56 + #define SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE SLPC_CACHELINE_SIZE_BYTES 57 + #define SLPC_SHARED_DATA_MODE_DEFN_TABLE_SIZE SLPC_PAGE_SIZE_BYTES 58 + #define SLPC_SHARED_DATA_SIZE_BYTE_MAX (2 * SLPC_PAGE_SIZE_BYTES) 59 + 60 + /* 61 + * Cacheline size aligned (Total size needed for 62 + * SLPM_KMD_MAX_OVERRIDE_PARAMETERS=256 is 1088 bytes) 63 + */ 64 + #define SLPC_OVERRIDE_PARAMS_TOTAL_BYTES (((((SLPC_MAX_OVERRIDE_PARAMETERS * 4) \ 65 + + ((SLPC_MAX_OVERRIDE_PARAMETERS / 32) * 4)) \ 66 + + (SLPC_CACHELINE_SIZE_BYTES - 1)) / SLPC_CACHELINE_SIZE_BYTES) * \ 67 + SLPC_CACHELINE_SIZE_BYTES) 68 + 69 + #define SLPC_SHARED_DATA_SIZE_BYTE_OTHER (SLPC_SHARED_DATA_SIZE_BYTE_MAX - \ 70 + (SLPC_SHARED_DATA_SIZE_BYTE_HEADER \ 71 + + SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO \ 72 + + SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE \ 73 + + SLPC_OVERRIDE_PARAMS_TOTAL_BYTES \ 74 + + SLPC_SHARED_DATA_MODE_DEFN_TABLE_SIZE)) 75 + 76 + enum slpc_task_enable { 77 + SLPC_PARAM_TASK_DEFAULT = 0, 78 + SLPC_PARAM_TASK_ENABLED, 79 + SLPC_PARAM_TASK_DISABLED, 80 + SLPC_PARAM_TASK_UNKNOWN 81 + }; 82 + 83 + enum slpc_global_state { 84 + SLPC_GLOBAL_STATE_NOT_RUNNING = 0, 85 + SLPC_GLOBAL_STATE_INITIALIZING = 1, 86 + SLPC_GLOBAL_STATE_RESETTING = 2, 87 + SLPC_GLOBAL_STATE_RUNNING = 3, 88 + SLPC_GLOBAL_STATE_SHUTTING_DOWN = 4, 89 + SLPC_GLOBAL_STATE_ERROR = 5 90 + }; 91 + 92 + enum slpc_param_id { 93 + SLPC_PARAM_TASK_ENABLE_GTPERF = 0, 94 + SLPC_PARAM_TASK_DISABLE_GTPERF = 1, 95 + SLPC_PARAM_TASK_ENABLE_BALANCER = 2, 96 + SLPC_PARAM_TASK_DISABLE_BALANCER = 3, 97 + SLPC_PARAM_TASK_ENABLE_DCC = 4, 98 + SLPC_PARAM_TASK_DISABLE_DCC = 5, 99 + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ = 6, 100 + SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ = 7, 101 + SLPC_PARAM_GLOBAL_MIN_GT_SLICE_FREQ_MHZ = 8, 102 + SLPC_PARAM_GLOBAL_MAX_GT_SLICE_FREQ_MHZ = 9, 103 + SLPC_PARAM_GTPERF_THRESHOLD_MAX_FPS = 10, 104 + SLPC_PARAM_GLOBAL_DISABLE_GT_FREQ_MANAGEMENT = 11, 105 + SLPC_PARAM_GTPERF_ENABLE_FRAMERATE_STALLING = 12, 106 + SLPC_PARAM_GLOBAL_DISABLE_RC6_MODE_CHANGE = 13, 107 + SLPC_PARAM_GLOBAL_OC_UNSLICE_FREQ_MHZ = 14, 108 + SLPC_PARAM_GLOBAL_OC_SLICE_FREQ_MHZ = 15, 109 + SLPC_PARAM_GLOBAL_ENABLE_IA_GT_BALANCING = 16, 110 + SLPC_PARAM_GLOBAL_ENABLE_ADAPTIVE_BURST_TURBO = 17, 111 + SLPC_PARAM_GLOBAL_ENABLE_EVAL_MODE = 18, 112 + SLPC_PARAM_GLOBAL_ENABLE_BALANCER_IN_NON_GAMING_MODE = 19, 113 + SLPC_PARAM_GLOBAL_RT_MODE_TURBO_FREQ_DELTA_MHZ = 20, 114 + SLPC_PARAM_PWRGATE_RC_MODE = 21, 115 + SLPC_PARAM_EDR_MODE_COMPUTE_TIMEOUT_MS = 22, 116 + SLPC_PARAM_EDR_QOS_FREQ_MHZ = 23, 117 + SLPC_PARAM_MEDIA_FF_RATIO_MODE = 24, 118 + SLPC_PARAM_ENABLE_IA_FREQ_LIMITING = 25, 119 + SLPC_PARAM_STRATEGIES = 26, 120 + SLPC_PARAM_POWER_PROFILE = 27, 121 + SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY = 28, 122 + SLPC_MAX_PARAM = 32, 123 + }; 124 + 125 + enum slpc_media_ratio_mode { 126 + SLPC_MEDIA_RATIO_MODE_DYNAMIC_CONTROL = 0, 127 + SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_ONE = 1, 128 + SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_TWO = 2, 129 + }; 130 + 131 + enum slpc_gucrc_mode { 132 + SLPC_GUCRC_MODE_HW = 0, 133 + SLPC_GUCRC_MODE_GUCRC_NO_RC6 = 1, 134 + SLPC_GUCRC_MODE_GUCRC_STATIC_TIMEOUT = 2, 135 + SLPC_GUCRC_MODE_GUCRC_DYNAMIC_HYSTERESIS = 3, 136 + 137 + SLPC_GUCRC_MODE_MAX, 138 + }; 139 + 140 + enum slpc_event_id { 141 + SLPC_EVENT_RESET = 0, 142 + SLPC_EVENT_SHUTDOWN = 1, 143 + SLPC_EVENT_PLATFORM_INFO_CHANGE = 2, 144 + SLPC_EVENT_DISPLAY_MODE_CHANGE = 3, 145 + SLPC_EVENT_FLIP_COMPLETE = 4, 146 + SLPC_EVENT_QUERY_TASK_STATE = 5, 147 + SLPC_EVENT_PARAMETER_SET = 6, 148 + SLPC_EVENT_PARAMETER_UNSET = 7, 149 + }; 150 + 151 + struct slpc_task_state_data { 152 + union { 153 + u32 task_status_padding; 154 + struct { 155 + u32 status; 156 + #define SLPC_GTPERF_TASK_ENABLED REG_BIT(0) 157 + #define SLPC_DCC_TASK_ENABLED REG_BIT(11) 158 + #define SLPC_IN_DCC REG_BIT(12) 159 + #define SLPC_BALANCER_ENABLED REG_BIT(15) 160 + #define SLPC_IBC_TASK_ENABLED REG_BIT(16) 161 + #define SLPC_BALANCER_IA_LMT_ENABLED REG_BIT(17) 162 + #define SLPC_BALANCER_IA_LMT_ACTIVE REG_BIT(18) 163 + }; 164 + }; 165 + union { 166 + u32 freq_padding; 167 + struct { 168 + #define SLPC_MAX_UNSLICE_FREQ_MASK REG_GENMASK(7, 0) 169 + #define SLPC_MIN_UNSLICE_FREQ_MASK REG_GENMASK(15, 8) 170 + #define SLPC_MAX_SLICE_FREQ_MASK REG_GENMASK(23, 16) 171 + #define SLPC_MIN_SLICE_FREQ_MASK REG_GENMASK(31, 24) 172 + u32 freq; 173 + }; 174 + }; 175 + } __packed; 176 + 177 + struct slpc_shared_data_header { 178 + /* Total size in bytes of this shared buffer. */ 179 + u32 size; 180 + u32 global_state; 181 + u32 display_data_addr; 182 + } __packed; 183 + 184 + struct slpc_override_params { 185 + u32 bits[SLPC_OVERRIDE_BITFIELD_SIZE]; 186 + u32 values[SLPC_MAX_OVERRIDE_PARAMETERS]; 187 + } __packed; 188 + 189 + struct slpc_shared_data { 190 + struct slpc_shared_data_header header; 191 + u8 shared_data_header_pad[SLPC_SHARED_DATA_SIZE_BYTE_HEADER - 192 + sizeof(struct slpc_shared_data_header)]; 193 + 194 + u8 platform_info_pad[SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO]; 195 + 196 + struct slpc_task_state_data task_state_data; 197 + u8 task_state_data_pad[SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE - 198 + sizeof(struct slpc_task_state_data)]; 199 + 200 + struct slpc_override_params override_params; 201 + u8 override_params_pad[SLPC_OVERRIDE_PARAMS_TOTAL_BYTES - 202 + sizeof(struct slpc_override_params)]; 203 + 204 + u8 shared_data_pad[SLPC_SHARED_DATA_SIZE_BYTE_OTHER]; 205 + 206 + /* PAGE 2 (4096 bytes), mode based parameter will be removed soon */ 207 + u8 reserved_mode_definition[4096]; 208 + } __packed; 209 + 210 + /** 211 + * DOC: SLPC H2G MESSAGE FORMAT 212 + * 213 + * +---+-------+--------------------------------------------------------------+ 214 + * | | Bits | Description | 215 + * +===+=======+==============================================================+ 216 + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | 217 + * | +-------+--------------------------------------------------------------+ 218 + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | 219 + * | +-------+--------------------------------------------------------------+ 220 + * | | 27:16 | DATA0 = MBZ | 221 + * | +-------+--------------------------------------------------------------+ 222 + * | | 15:0 | ACTION = _`GUC_ACTION_HOST2GUC_PC_SLPM_REQUEST` = 0x3003 | 223 + * +---+-------+--------------------------------------------------------------+ 224 + * | 1 | 31:8 | **EVENT_ID** | 225 + * + +-------+--------------------------------------------------------------+ 226 + * | | 7:0 | **EVENT_ARGC** - number of data arguments | 227 + * +---+-------+--------------------------------------------------------------+ 228 + * | 2 | 31:0 | **EVENT_DATA1** | 229 + * +---+-------+--------------------------------------------------------------+ 230 + * |...| 31:0 | ... | 231 + * +---+-------+--------------------------------------------------------------+ 232 + * |2+n| 31:0 | **EVENT_DATAn** | 233 + * +---+-------+--------------------------------------------------------------+ 234 + */ 235 + 236 + #define GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST 0x3003 237 + 238 + #define HOST2GUC_PC_SLPC_REQUEST_MSG_MIN_LEN \ 239 + (GUC_HXG_REQUEST_MSG_MIN_LEN + 1u) 240 + #define HOST2GUC_PC_SLPC_EVENT_MAX_INPUT_ARGS 9 241 + #define HOST2GUC_PC_SLPC_REQUEST_MSG_MAX_LEN \ 242 + (HOST2GUC_PC_SLPC_REQUEST_REQUEST_MSG_MIN_LEN + \ 243 + HOST2GUC_PC_SLPC_EVENT_MAX_INPUT_ARGS) 244 + #define HOST2GUC_PC_SLPC_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 245 + #define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID (0xff << 8) 246 + #define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC (0xff << 0) 247 + #define HOST2GUC_PC_SLPC_REQUEST_MSG_N_EVENT_DATA_N GUC_HXG_REQUEST_MSG_n_DATAn 248 + 249 + #endif

+189

drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2014-2021 Intel Corporation 4 + */ 5 + 6 + #ifndef _ABI_GUC_COMMUNICATION_CTB_ABI_H 7 + #define _ABI_GUC_COMMUNICATION_CTB_ABI_H 8 + 9 + #include <linux/types.h> 10 + #include <linux/build_bug.h> 11 + 12 + #include "guc_messages_abi.h" 13 + 14 + /** 15 + * DOC: CT Buffer 16 + * 17 + * Circular buffer used to send `CTB Message`_ 18 + */ 19 + 20 + /** 21 + * DOC: CTB Descriptor 22 + * 23 + * +---+-------+--------------------------------------------------------------+ 24 + * | | Bits | Description | 25 + * +===+=======+==============================================================+ 26 + * | 0 | 31:0 | **HEAD** - offset (in dwords) to the last dword that was | 27 + * | | | read from the `CT Buffer`_. | 28 + * | | | It can only be updated by the receiver. | 29 + * +---+-------+--------------------------------------------------------------+ 30 + * | 1 | 31:0 | **TAIL** - offset (in dwords) to the last dword that was | 31 + * | | | written to the `CT Buffer`_. | 32 + * | | | It can only be updated by the sender. | 33 + * +---+-------+--------------------------------------------------------------+ 34 + * | 2 | 31:0 | **STATUS** - status of the CTB | 35 + * | | | | 36 + * | | | - _`GUC_CTB_STATUS_NO_ERROR` = 0 (normal operation) | 37 + * | | | - _`GUC_CTB_STATUS_OVERFLOW` = 1 (head/tail too large) | 38 + * | | | - _`GUC_CTB_STATUS_UNDERFLOW` = 2 (truncated message) | 39 + * | | | - _`GUC_CTB_STATUS_MISMATCH` = 4 (head/tail modified) | 40 + * +---+-------+--------------------------------------------------------------+ 41 + * |...| | RESERVED = MBZ | 42 + * +---+-------+--------------------------------------------------------------+ 43 + * | 15| 31:0 | RESERVED = MBZ | 44 + * +---+-------+--------------------------------------------------------------+ 45 + */ 46 + 47 + struct guc_ct_buffer_desc { 48 + u32 head; 49 + u32 tail; 50 + u32 status; 51 + #define GUC_CTB_STATUS_NO_ERROR 0 52 + #define GUC_CTB_STATUS_OVERFLOW (1 << 0) 53 + #define GUC_CTB_STATUS_UNDERFLOW (1 << 1) 54 + #define GUC_CTB_STATUS_MISMATCH (1 << 2) 55 + u32 reserved[13]; 56 + } __packed; 57 + static_assert(sizeof(struct guc_ct_buffer_desc) == 64); 58 + 59 + /** 60 + * DOC: CTB Message 61 + * 62 + * +---+-------+--------------------------------------------------------------+ 63 + * | | Bits | Description | 64 + * +===+=======+==============================================================+ 65 + * | 0 | 31:16 | **FENCE** - message identifier | 66 + * | +-------+--------------------------------------------------------------+ 67 + * | | 15:12 | **FORMAT** - format of the CTB message | 68 + * | | | - _`GUC_CTB_FORMAT_HXG` = 0 - see `CTB HXG Message`_ | 69 + * | +-------+--------------------------------------------------------------+ 70 + * | | 11:8 | **RESERVED** | 71 + * | +-------+--------------------------------------------------------------+ 72 + * | | 7:0 | **NUM_DWORDS** - length of the CTB message (w/o header) | 73 + * +---+-------+--------------------------------------------------------------+ 74 + * | 1 | 31:0 | optional (depends on FORMAT) | 75 + * +---+-------+ | 76 + * |...| | | 77 + * +---+-------+ | 78 + * | n | 31:0 | | 79 + * +---+-------+--------------------------------------------------------------+ 80 + */ 81 + 82 + #define GUC_CTB_HDR_LEN 1u 83 + #define GUC_CTB_MSG_MIN_LEN GUC_CTB_HDR_LEN 84 + #define GUC_CTB_MSG_MAX_LEN 256u 85 + #define GUC_CTB_MSG_0_FENCE (0xffff << 16) 86 + #define GUC_CTB_MSG_0_FORMAT (0xf << 12) 87 + #define GUC_CTB_FORMAT_HXG 0u 88 + #define GUC_CTB_MSG_0_RESERVED (0xf << 8) 89 + #define GUC_CTB_MSG_0_NUM_DWORDS (0xff << 0) 90 + 91 + /** 92 + * DOC: CTB HXG Message 93 + * 94 + * +---+-------+--------------------------------------------------------------+ 95 + * | | Bits | Description | 96 + * +===+=======+==============================================================+ 97 + * | 0 | 31:16 | FENCE | 98 + * | +-------+--------------------------------------------------------------+ 99 + * | | 15:12 | FORMAT = GUC_CTB_FORMAT_HXG_ | 100 + * | +-------+--------------------------------------------------------------+ 101 + * | | 11:8 | RESERVED = MBZ | 102 + * | +-------+--------------------------------------------------------------+ 103 + * | | 7:0 | NUM_DWORDS = length (in dwords) of the embedded HXG message | 104 + * +---+-------+--------------------------------------------------------------+ 105 + * | 1 | 31:0 | | 106 + * +---+-------+ | 107 + * |...| | [Embedded `HXG Message`_] | 108 + * +---+-------+ | 109 + * | n | 31:0 | | 110 + * +---+-------+--------------------------------------------------------------+ 111 + */ 112 + 113 + #define GUC_CTB_HXG_MSG_MIN_LEN (GUC_CTB_MSG_MIN_LEN + GUC_HXG_MSG_MIN_LEN) 114 + #define GUC_CTB_HXG_MSG_MAX_LEN GUC_CTB_MSG_MAX_LEN 115 + 116 + /** 117 + * DOC: CTB based communication 118 + * 119 + * The CTB (command transport buffer) communication between Host and GuC 120 + * is based on u32 data stream written to the shared buffer. One buffer can 121 + * be used to transmit data only in one direction (one-directional channel). 122 + * 123 + * Current status of the each buffer is stored in the buffer descriptor. 124 + * Buffer descriptor holds tail and head fields that represents active data 125 + * stream. The tail field is updated by the data producer (sender), and head 126 + * field is updated by the data consumer (receiver):: 127 + * 128 + * +------------+ 129 + * | DESCRIPTOR | +=================+============+========+ 130 + * +============+ | | MESSAGE(s) | | 131 + * | address |--------->+=================+============+========+ 132 + * +------------+ 133 + * | head | ^-----head--------^ 134 + * +------------+ 135 + * | tail | ^---------tail-----------------^ 136 + * +------------+ 137 + * | size | ^---------------size--------------------^ 138 + * +------------+ 139 + * 140 + * Each message in data stream starts with the single u32 treated as a header, 141 + * followed by optional set of u32 data that makes message specific payload:: 142 + * 143 + * +------------+---------+---------+---------+ 144 + * | MESSAGE | 145 + * +------------+---------+---------+---------+ 146 + * | msg[0] | [1] | ... | [n-1] | 147 + * +------------+---------+---------+---------+ 148 + * | MESSAGE | MESSAGE PAYLOAD | 149 + * + HEADER +---------+---------+---------+ 150 + * | | 0 | ... | n | 151 + * +======+=====+=========+=========+=========+ 152 + * | 31:16| code| | | | 153 + * +------+-----+ | | | 154 + * | 15:5|flags| | | | 155 + * +------+-----+ | | | 156 + * | 4:0| len| | | | 157 + * +------+-----+---------+---------+---------+ 158 + * 159 + * ^-------------len-------------^ 160 + * 161 + * The message header consists of: 162 + * 163 + * - **len**, indicates length of the message payload (in u32) 164 + * - **code**, indicates message code 165 + * - **flags**, holds various bits to control message handling 166 + */ 167 + 168 + /* 169 + * Definition of the command transport message header (DW0) 170 + * 171 + * bit[4..0] message len (in dwords) 172 + * bit[7..5] reserved 173 + * bit[8] response (G2H only) 174 + * bit[8] write fence to desc (H2G only) 175 + * bit[9] write status to H2G buff (H2G only) 176 + * bit[10] send status back via G2H (H2G only) 177 + * bit[15..11] reserved 178 + * bit[31..16] action code 179 + */ 180 + #define GUC_CT_MSG_LEN_SHIFT 0 181 + #define GUC_CT_MSG_LEN_MASK 0x1F 182 + #define GUC_CT_MSG_IS_RESPONSE (1 << 8) 183 + #define GUC_CT_MSG_WRITE_FENCE_TO_DESC (1 << 8) 184 + #define GUC_CT_MSG_WRITE_STATUS_TO_BUFF (1 << 9) 185 + #define GUC_CT_MSG_SEND_STATUS (1 << 10) 186 + #define GUC_CT_MSG_ACTION_SHIFT 16 187 + #define GUC_CT_MSG_ACTION_MASK 0xFFFF 188 + 189 + #endif

+49

drivers/gpu/drm/xe/abi/guc_communication_mmio_abi.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2014-2021 Intel Corporation 4 + */ 5 + 6 + #ifndef _ABI_GUC_COMMUNICATION_MMIO_ABI_H 7 + #define _ABI_GUC_COMMUNICATION_MMIO_ABI_H 8 + 9 + /** 10 + * DOC: GuC MMIO based communication 11 + * 12 + * The MMIO based communication between Host and GuC relies on special 13 + * hardware registers which format could be defined by the software 14 + * (so called scratch registers). 15 + * 16 + * Each MMIO based message, both Host to GuC (H2G) and GuC to Host (G2H) 17 + * messages, which maximum length depends on number of available scratch 18 + * registers, is directly written into those scratch registers. 19 + * 20 + * For Gen9+, there are 16 software scratch registers 0xC180-0xC1B8, 21 + * but no H2G command takes more than 4 parameters and the GuC firmware 22 + * itself uses an 4-element array to store the H2G message. 23 + * 24 + * For Gen11+, there are additional 4 registers 0x190240-0x19024C, which 25 + * are, regardless on lower count, preferred over legacy ones. 26 + * 27 + * The MMIO based communication is mainly used during driver initialization 28 + * phase to setup the `CTB based communication`_ that will be used afterwards. 29 + */ 30 + 31 + #define GUC_MAX_MMIO_MSG_LEN 4 32 + 33 + /** 34 + * DOC: MMIO HXG Message 35 + * 36 + * Format of the MMIO messages follows definitions of `HXG Message`_. 37 + * 38 + * +---+-------+--------------------------------------------------------------+ 39 + * | | Bits | Description | 40 + * +===+=======+==============================================================+ 41 + * | 0 | 31:0 | | 42 + * +---+-------+ | 43 + * |...| | [Embedded `HXG Message`_] | 44 + * +---+-------+ | 45 + * | n | 31:0 | | 46 + * +---+-------+--------------------------------------------------------------+ 47 + */ 48 + 49 + #endif

+37

drivers/gpu/drm/xe/abi/guc_errors_abi.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2014-2021 Intel Corporation 4 + */ 5 + 6 + #ifndef _ABI_GUC_ERRORS_ABI_H 7 + #define _ABI_GUC_ERRORS_ABI_H 8 + 9 + enum xe_guc_response_status { 10 + XE_GUC_RESPONSE_STATUS_SUCCESS = 0x0, 11 + XE_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000, 12 + }; 13 + 14 + enum xe_guc_load_status { 15 + XE_GUC_LOAD_STATUS_DEFAULT = 0x00, 16 + XE_GUC_LOAD_STATUS_START = 0x01, 17 + XE_GUC_LOAD_STATUS_ERROR_DEVID_BUILD_MISMATCH = 0x02, 18 + XE_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH = 0x03, 19 + XE_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE = 0x04, 20 + XE_GUC_LOAD_STATUS_GDT_DONE = 0x10, 21 + XE_GUC_LOAD_STATUS_IDT_DONE = 0x20, 22 + XE_GUC_LOAD_STATUS_LAPIC_DONE = 0x30, 23 + XE_GUC_LOAD_STATUS_GUCINT_DONE = 0x40, 24 + XE_GUC_LOAD_STATUS_DPC_READY = 0x50, 25 + XE_GUC_LOAD_STATUS_DPC_ERROR = 0x60, 26 + XE_GUC_LOAD_STATUS_EXCEPTION = 0x70, 27 + XE_GUC_LOAD_STATUS_INIT_DATA_INVALID = 0x71, 28 + XE_GUC_LOAD_STATUS_PXP_TEARDOWN_CTRL_ENABLED = 0x72, 29 + XE_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_START, 30 + XE_GUC_LOAD_STATUS_MPU_DATA_INVALID = 0x73, 31 + XE_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID = 0x74, 32 + XE_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_END, 33 + 34 + XE_GUC_LOAD_STATUS_READY = 0xF0, 35 + }; 36 + 37 + #endif

+322

drivers/gpu/drm/xe/abi/guc_klvs_abi.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2021 Intel Corporation 4 + */ 5 + 6 + #ifndef _ABI_GUC_KLVS_ABI_H 7 + #define _ABI_GUC_KLVS_ABI_H 8 + 9 + #include <linux/types.h> 10 + 11 + /** 12 + * DOC: GuC KLV 13 + * 14 + * +---+-------+--------------------------------------------------------------+ 15 + * | | Bits | Description | 16 + * +===+=======+==============================================================+ 17 + * | 0 | 31:16 | **KEY** - KLV key identifier | 18 + * | | | - `GuC Self Config KLVs`_ | 19 + * | | | - `GuC VGT Policy KLVs`_ | 20 + * | | | - `GuC VF Configuration KLVs`_ | 21 + * | | | | 22 + * | +-------+--------------------------------------------------------------+ 23 + * | | 15:0 | **LEN** - length of VALUE (in 32bit dwords) | 24 + * +---+-------+--------------------------------------------------------------+ 25 + * | 1 | 31:0 | **VALUE** - actual value of the KLV (format depends on KEY) | 26 + * +---+-------+ | 27 + * |...| | | 28 + * +---+-------+ | 29 + * | n | 31:0 | | 30 + * +---+-------+--------------------------------------------------------------+ 31 + */ 32 + 33 + #define GUC_KLV_LEN_MIN 1u 34 + #define GUC_KLV_0_KEY (0xffff << 16) 35 + #define GUC_KLV_0_LEN (0xffff << 0) 36 + #define GUC_KLV_n_VALUE (0xffffffff << 0) 37 + 38 + /** 39 + * DOC: GuC Self Config KLVs 40 + * 41 + * `GuC KLV`_ keys available for use with HOST2GUC_SELF_CFG_. 42 + * 43 + * _`GUC_KLV_SELF_CFG_MEMIRQ_STATUS_ADDR` : 0x0900 44 + * Refers to 64 bit Global Gfx address (in bytes) of memory based interrupts 45 + * status vector for use by the GuC. 46 + * 47 + * _`GUC_KLV_SELF_CFG_MEMIRQ_SOURCE_ADDR` : 0x0901 48 + * Refers to 64 bit Global Gfx address (in bytes) of memory based interrupts 49 + * source vector for use by the GuC. 50 + * 51 + * _`GUC_KLV_SELF_CFG_H2G_CTB_ADDR` : 0x0902 52 + * Refers to 64 bit Global Gfx address of H2G `CT Buffer`_. 53 + * Should be above WOPCM address but below APIC base address for native mode. 54 + * 55 + * _`GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR : 0x0903 56 + * Refers to 64 bit Global Gfx address of H2G `CTB Descriptor`_. 57 + * Should be above WOPCM address but below APIC base address for native mode. 58 + * 59 + * _`GUC_KLV_SELF_CFG_H2G_CTB_SIZE : 0x0904 60 + * Refers to size of H2G `CT Buffer`_ in bytes. 61 + * Should be a multiple of 4K. 62 + * 63 + * _`GUC_KLV_SELF_CFG_G2H_CTB_ADDR : 0x0905 64 + * Refers to 64 bit Global Gfx address of G2H `CT Buffer`_. 65 + * Should be above WOPCM address but below APIC base address for native mode. 66 + * 67 + * _GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR : 0x0906 68 + * Refers to 64 bit Global Gfx address of G2H `CTB Descriptor`_. 69 + * Should be above WOPCM address but below APIC base address for native mode. 70 + * 71 + * _GUC_KLV_SELF_CFG_G2H_CTB_SIZE : 0x0907 72 + * Refers to size of G2H `CT Buffer`_ in bytes. 73 + * Should be a multiple of 4K. 74 + */ 75 + 76 + #define GUC_KLV_SELF_CFG_MEMIRQ_STATUS_ADDR_KEY 0x0900 77 + #define GUC_KLV_SELF_CFG_MEMIRQ_STATUS_ADDR_LEN 2u 78 + 79 + #define GUC_KLV_SELF_CFG_MEMIRQ_SOURCE_ADDR_KEY 0x0901 80 + #define GUC_KLV_SELF_CFG_MEMIRQ_SOURCE_ADDR_LEN 2u 81 + 82 + #define GUC_KLV_SELF_CFG_H2G_CTB_ADDR_KEY 0x0902 83 + #define GUC_KLV_SELF_CFG_H2G_CTB_ADDR_LEN 2u 84 + 85 + #define GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_KEY 0x0903 86 + #define GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_LEN 2u 87 + 88 + #define GUC_KLV_SELF_CFG_H2G_CTB_SIZE_KEY 0x0904 89 + #define GUC_KLV_SELF_CFG_H2G_CTB_SIZE_LEN 1u 90 + 91 + #define GUC_KLV_SELF_CFG_G2H_CTB_ADDR_KEY 0x0905 92 + #define GUC_KLV_SELF_CFG_G2H_CTB_ADDR_LEN 2u 93 + 94 + #define GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_KEY 0x0906 95 + #define GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_LEN 2u 96 + 97 + #define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY 0x0907 98 + #define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_LEN 1u 99 + 100 + /* 101 + * Per context scheduling policy update keys. 102 + */ 103 + enum { 104 + GUC_CONTEXT_POLICIES_KLV_ID_EXECUTION_QUANTUM = 0x2001, 105 + GUC_CONTEXT_POLICIES_KLV_ID_PREEMPTION_TIMEOUT = 0x2002, 106 + GUC_CONTEXT_POLICIES_KLV_ID_SCHEDULING_PRIORITY = 0x2003, 107 + GUC_CONTEXT_POLICIES_KLV_ID_PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY = 0x2004, 108 + GUC_CONTEXT_POLICIES_KLV_ID_SLPM_GT_FREQUENCY = 0x2005, 109 + 110 + GUC_CONTEXT_POLICIES_KLV_NUM_IDS = 5, 111 + }; 112 + 113 + /** 114 + * DOC: GuC VGT Policy KLVs 115 + * 116 + * `GuC KLV`_ keys available for use with PF2GUC_UPDATE_VGT_POLICY. 117 + * 118 + * _`GUC_KLV_VGT_POLICY_SCHED_IF_IDLE` : 0x8001 119 + * This config sets whether strict scheduling is enabled whereby any VF 120 + * that doesn’t have work to submit is still allocated a fixed execution 121 + * time-slice to ensure active VFs execution is always consitent even 122 + * during other VF reprovisiong / rebooting events. Changing this KLV 123 + * impacts all VFs and takes effect on the next VF-Switch event. 124 + * 125 + * :0: don't schedule idle (default) 126 + * :1: schedule if idle 127 + * 128 + * _`GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD` : 0x8002 129 + * This config sets the sample period for tracking adverse event counters. 130 + * A sample period is the period in millisecs during which events are counted. 131 + * This is applicable for all the VFs. 132 + * 133 + * :0: adverse events are not counted (default) 134 + * :n: sample period in milliseconds 135 + * 136 + * _`GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH` : 0x8D00 137 + * This enum is to reset utilized HW engine after VF Switch (i.e to clean 138 + * up Stale HW register left behind by previous VF) 139 + * 140 + * :0: don't reset (default) 141 + * :1: reset 142 + */ 143 + 144 + #define GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_KEY 0x8001 145 + #define GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_LEN 1u 146 + 147 + #define GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD_KEY 0x8002 148 + #define GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD_LEN 1u 149 + 150 + #define GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH_KEY 0x8D00 151 + #define GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH_LEN 1u 152 + 153 + /** 154 + * DOC: GuC VF Configuration KLVs 155 + * 156 + * `GuC KLV`_ keys available for use with PF2GUC_UPDATE_VF_CFG. 157 + * 158 + * _`GUC_KLV_VF_CFG_GGTT_START` : 0x0001 159 + * A 4K aligned start GTT address/offset assigned to VF. 160 + * Value is 64 bits. 161 + * 162 + * _`GUC_KLV_VF_CFG_GGTT_SIZE` : 0x0002 163 + * A 4K aligned size of GGTT assigned to VF. 164 + * Value is 64 bits. 165 + * 166 + * _`GUC_KLV_VF_CFG_LMEM_SIZE` : 0x0003 167 + * A 2M aligned size of local memory assigned to VF. 168 + * Value is 64 bits. 169 + * 170 + * _`GUC_KLV_VF_CFG_NUM_CONTEXTS` : 0x0004 171 + * Refers to the number of contexts allocated to this VF. 172 + * 173 + * :0: no contexts (default) 174 + * :1-65535: number of contexts (Gen12) 175 + * 176 + * _`GUC_KLV_VF_CFG_TILE_MASK` : 0x0005 177 + * For multi-tiled products, this field contains the bitwise-OR of tiles 178 + * assigned to the VF. Bit-0-set means VF has access to Tile-0, 179 + * Bit-31-set means VF has access to Tile-31, and etc. 180 + * At least one tile will always be allocated. 181 + * If all bits are zero, VF KMD should treat this as a fatal error. 182 + * For, single-tile products this KLV config is ignored. 183 + * 184 + * _`GUC_KLV_VF_CFG_NUM_DOORBELLS` : 0x0006 185 + * Refers to the number of doorbells allocated to this VF. 186 + * 187 + * :0: no doorbells (default) 188 + * :1-255: number of doorbells (Gen12) 189 + * 190 + * _`GUC_KLV_VF_CFG_EXEC_QUANTUM` : 0x8A01 191 + * This config sets the VFs-execution-quantum in milliseconds. 192 + * GUC will attempt to obey the maximum values as much as HW is capable 193 + * of and this will never be perfectly-exact (accumulated nano-second 194 + * granularity) since the GPUs clock time runs off a different crystal 195 + * from the CPUs clock. Changing this KLV on a VF that is currently 196 + * running a context wont take effect until a new context is scheduled in. 197 + * That said, when the PF is changing this value from 0xFFFFFFFF to 198 + * something else, it might never take effect if the VF is running an 199 + * inifinitely long compute or shader kernel. In such a scenario, the 200 + * PF would need to trigger a VM PAUSE and then change the KLV to force 201 + * it to take effect. Such cases might typically happen on a 1PF+1VF 202 + * Virtualization config enabled for heavier workloads like AI/ML. 203 + * 204 + * :0: infinite exec quantum (default) 205 + * 206 + * _`GUC_KLV_VF_CFG_PREEMPT_TIMEOUT` : 0x8A02 207 + * This config sets the VF-preemption-timeout in microseconds. 208 + * GUC will attempt to obey the minimum and maximum values as much as 209 + * HW is capable and this will never be perfectly-exact (accumulated 210 + * nano-second granularity) since the GPUs clock time runs off a 211 + * different crystal from the CPUs clock. Changing this KLV on a VF 212 + * that is currently running a context wont take effect until a new 213 + * context is scheduled in. 214 + * That said, when the PF is changing this value from 0xFFFFFFFF to 215 + * something else, it might never take effect if the VF is running an 216 + * inifinitely long compute or shader kernel. 217 + * In this case, the PF would need to trigger a VM PAUSE and then change 218 + * the KLV to force it to take effect. Such cases might typically happen 219 + * on a 1PF+1VF Virtualization config enabled for heavier workloads like 220 + * AI/ML. 221 + * 222 + * :0: no preemption timeout (default) 223 + * 224 + * _`GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR` : 0x8A03 225 + * This config sets threshold for CAT errors caused by the VF. 226 + * 227 + * :0: adverse events or error will not be reported (default) 228 + * :n: event occurrence count per sampling interval 229 + * 230 + * _`GUC_KLV_VF_CFG_THRESHOLD_ENGINE_RESET` : 0x8A04 231 + * This config sets threshold for engine reset caused by the VF. 232 + * 233 + * :0: adverse events or error will not be reported (default) 234 + * :n: event occurrence count per sampling interval 235 + * 236 + * _`GUC_KLV_VF_CFG_THRESHOLD_PAGE_FAULT` : 0x8A05 237 + * This config sets threshold for page fault errors caused by the VF. 238 + * 239 + * :0: adverse events or error will not be reported (default) 240 + * :n: event occurrence count per sampling interval 241 + * 242 + * _`GUC_KLV_VF_CFG_THRESHOLD_H2G_STORM` : 0x8A06 243 + * This config sets threshold for H2G interrupts triggered by the VF. 244 + * 245 + * :0: adverse events or error will not be reported (default) 246 + * :n: time (us) per sampling interval 247 + * 248 + * _`GUC_KLV_VF_CFG_THRESHOLD_IRQ_STORM` : 0x8A07 249 + * This config sets threshold for GT interrupts triggered by the VF's 250 + * workloads. 251 + * 252 + * :0: adverse events or error will not be reported (default) 253 + * :n: time (us) per sampling interval 254 + * 255 + * _`GUC_KLV_VF_CFG_THRESHOLD_DOORBELL_STORM` : 0x8A08 256 + * This config sets threshold for doorbell's ring triggered by the VF. 257 + * 258 + * :0: adverse events or error will not be reported (default) 259 + * :n: time (us) per sampling interval 260 + * 261 + * _`GUC_KLV_VF_CFG_BEGIN_DOORBELL_ID` : 0x8A0A 262 + * Refers to the start index of doorbell assigned to this VF. 263 + * 264 + * :0: (default) 265 + * :1-255: number of doorbells (Gen12) 266 + * 267 + * _`GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID` : 0x8A0B 268 + * Refers to the start index in context array allocated to this VF’s use. 269 + * 270 + * :0: (default) 271 + * :1-65535: number of contexts (Gen12) 272 + */ 273 + 274 + #define GUC_KLV_VF_CFG_GGTT_START_KEY 0x0001 275 + #define GUC_KLV_VF_CFG_GGTT_START_LEN 2u 276 + 277 + #define GUC_KLV_VF_CFG_GGTT_SIZE_KEY 0x0002 278 + #define GUC_KLV_VF_CFG_GGTT_SIZE_LEN 2u 279 + 280 + #define GUC_KLV_VF_CFG_LMEM_SIZE_KEY 0x0003 281 + #define GUC_KLV_VF_CFG_LMEM_SIZE_LEN 2u 282 + 283 + #define GUC_KLV_VF_CFG_NUM_CONTEXTS_KEY 0x0004 284 + #define GUC_KLV_VF_CFG_NUM_CONTEXTS_LEN 1u 285 + 286 + #define GUC_KLV_VF_CFG_TILE_MASK_KEY 0x0005 287 + #define GUC_KLV_VF_CFG_TILE_MASK_LEN 1u 288 + 289 + #define GUC_KLV_VF_CFG_NUM_DOORBELLS_KEY 0x0006 290 + #define GUC_KLV_VF_CFG_NUM_DOORBELLS_LEN 1u 291 + 292 + #define GUC_KLV_VF_CFG_EXEC_QUANTUM_KEY 0x8a01 293 + #define GUC_KLV_VF_CFG_EXEC_QUANTUM_LEN 1u 294 + 295 + #define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY 0x8a02 296 + #define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_LEN 1u 297 + 298 + #define GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR_KEY 0x8a03 299 + #define GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR_LEN 1u 300 + 301 + #define GUC_KLV_VF_CFG_THRESHOLD_ENGINE_RESET_KEY 0x8a04 302 + #define GUC_KLV_VF_CFG_THRESHOLD_ENGINE_RESET_LEN 1u 303 + 304 + #define GUC_KLV_VF_CFG_THRESHOLD_PAGE_FAULT_KEY 0x8a05 305 + #define GUC_KLV_VF_CFG_THRESHOLD_PAGE_FAULT_LEN 1u 306 + 307 + #define GUC_KLV_VF_CFG_THRESHOLD_H2G_STORM_KEY 0x8a06 308 + #define GUC_KLV_VF_CFG_THRESHOLD_H2G_STORM_LEN 1u 309 + 310 + #define GUC_KLV_VF_CFG_THRESHOLD_IRQ_STORM_KEY 0x8a07 311 + #define GUC_KLV_VF_CFG_THRESHOLD_IRQ_STORM_LEN 1u 312 + 313 + #define GUC_KLV_VF_CFG_THRESHOLD_DOORBELL_STORM_KEY 0x8a08 314 + #define GUC_KLV_VF_CFG_THRESHOLD_DOORBELL_STORM_LEN 1u 315 + 316 + #define GUC_KLV_VF_CFG_BEGIN_DOORBELL_ID_KEY 0x8a0a 317 + #define GUC_KLV_VF_CFG_BEGIN_DOORBELL_ID_LEN 1u 318 + 319 + #define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_KEY 0x8a0b 320 + #define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_LEN 1u 321 + 322 + #endif

+234

drivers/gpu/drm/xe/abi/guc_messages_abi.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2014-2021 Intel Corporation 4 + */ 5 + 6 + #ifndef _ABI_GUC_MESSAGES_ABI_H 7 + #define _ABI_GUC_MESSAGES_ABI_H 8 + 9 + /** 10 + * DOC: HXG Message 11 + * 12 + * All messages exchanged with GuC are defined using 32 bit dwords. 13 + * First dword is treated as a message header. Remaining dwords are optional. 14 + * 15 + * +---+-------+--------------------------------------------------------------+ 16 + * | | Bits | Description | 17 + * +===+=======+==============================================================+ 18 + * | | | | 19 + * | 0 | 31 | **ORIGIN** - originator of the message | 20 + * | | | - _`GUC_HXG_ORIGIN_HOST` = 0 | 21 + * | | | - _`GUC_HXG_ORIGIN_GUC` = 1 | 22 + * | | | | 23 + * | +-------+--------------------------------------------------------------+ 24 + * | | 30:28 | **TYPE** - message type | 25 + * | | | - _`GUC_HXG_TYPE_REQUEST` = 0 | 26 + * | | | - _`GUC_HXG_TYPE_EVENT` = 1 | 27 + * | | | - _`GUC_HXG_TYPE_NO_RESPONSE_BUSY` = 3 | 28 + * | | | - _`GUC_HXG_TYPE_NO_RESPONSE_RETRY` = 5 | 29 + * | | | - _`GUC_HXG_TYPE_RESPONSE_FAILURE` = 6 | 30 + * | | | - _`GUC_HXG_TYPE_RESPONSE_SUCCESS` = 7 | 31 + * | +-------+--------------------------------------------------------------+ 32 + * | | 27:0 | **AUX** - auxiliary data (depends on TYPE) | 33 + * +---+-------+--------------------------------------------------------------+ 34 + * | 1 | 31:0 | | 35 + * +---+-------+ | 36 + * |...| | **PAYLOAD** - optional payload (depends on TYPE) | 37 + * +---+-------+ | 38 + * | n | 31:0 | | 39 + * +---+-------+--------------------------------------------------------------+ 40 + */ 41 + 42 + #define GUC_HXG_MSG_MIN_LEN 1u 43 + #define GUC_HXG_MSG_0_ORIGIN (0x1 << 31) 44 + #define GUC_HXG_ORIGIN_HOST 0u 45 + #define GUC_HXG_ORIGIN_GUC 1u 46 + #define GUC_HXG_MSG_0_TYPE (0x7 << 28) 47 + #define GUC_HXG_TYPE_REQUEST 0u 48 + #define GUC_HXG_TYPE_EVENT 1u 49 + #define GUC_HXG_TYPE_NO_RESPONSE_BUSY 3u 50 + #define GUC_HXG_TYPE_NO_RESPONSE_RETRY 5u 51 + #define GUC_HXG_TYPE_RESPONSE_FAILURE 6u 52 + #define GUC_HXG_TYPE_RESPONSE_SUCCESS 7u 53 + #define GUC_HXG_MSG_0_AUX (0xfffffff << 0) 54 + #define GUC_HXG_MSG_n_PAYLOAD (0xffffffff << 0) 55 + 56 + /** 57 + * DOC: HXG Request 58 + * 59 + * The `HXG Request`_ message should be used to initiate synchronous activity 60 + * for which confirmation or return data is expected. 61 + * 62 + * The recipient of this message shall use `HXG Response`_, `HXG Failure`_ 63 + * or `HXG Retry`_ message as a definite reply, and may use `HXG Busy`_ 64 + * message as a intermediate reply. 65 + * 66 + * Format of @DATA0 and all @DATAn fields depends on the @ACTION code. 67 + * 68 + * +---+-------+--------------------------------------------------------------+ 69 + * | | Bits | Description | 70 + * +===+=======+==============================================================+ 71 + * | 0 | 31 | ORIGIN | 72 + * | +-------+--------------------------------------------------------------+ 73 + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | 74 + * | +-------+--------------------------------------------------------------+ 75 + * | | 27:16 | **DATA0** - request data (depends on ACTION) | 76 + * | +-------+--------------------------------------------------------------+ 77 + * | | 15:0 | **ACTION** - requested action code | 78 + * +---+-------+--------------------------------------------------------------+ 79 + * | 1 | 31:0 | | 80 + * +---+-------+ | 81 + * |...| | **DATAn** - optional data (depends on ACTION) | 82 + * +---+-------+ | 83 + * | n | 31:0 | | 84 + * +---+-------+--------------------------------------------------------------+ 85 + */ 86 + 87 + #define GUC_HXG_REQUEST_MSG_MIN_LEN GUC_HXG_MSG_MIN_LEN 88 + #define GUC_HXG_REQUEST_MSG_0_DATA0 (0xfff << 16) 89 + #define GUC_HXG_REQUEST_MSG_0_ACTION (0xffff << 0) 90 + #define GUC_HXG_REQUEST_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD 91 + 92 + /** 93 + * DOC: HXG Event 94 + * 95 + * The `HXG Event`_ message should be used to initiate asynchronous activity 96 + * that does not involves immediate confirmation nor data. 97 + * 98 + * Format of @DATA0 and all @DATAn fields depends on the @ACTION code. 99 + * 100 + * +---+-------+--------------------------------------------------------------+ 101 + * | | Bits | Description | 102 + * +===+=======+==============================================================+ 103 + * | 0 | 31 | ORIGIN | 104 + * | +-------+--------------------------------------------------------------+ 105 + * | | 30:28 | TYPE = GUC_HXG_TYPE_EVENT_ | 106 + * | +-------+--------------------------------------------------------------+ 107 + * | | 27:16 | **DATA0** - event data (depends on ACTION) | 108 + * | +-------+--------------------------------------------------------------+ 109 + * | | 15:0 | **ACTION** - event action code | 110 + * +---+-------+--------------------------------------------------------------+ 111 + * | 1 | 31:0 | | 112 + * +---+-------+ | 113 + * |...| | **DATAn** - optional event data (depends on ACTION) | 114 + * +---+-------+ | 115 + * | n | 31:0 | | 116 + * +---+-------+--------------------------------------------------------------+ 117 + */ 118 + 119 + #define GUC_HXG_EVENT_MSG_MIN_LEN GUC_HXG_MSG_MIN_LEN 120 + #define GUC_HXG_EVENT_MSG_0_DATA0 (0xfff << 16) 121 + #define GUC_HXG_EVENT_MSG_0_ACTION (0xffff << 0) 122 + #define GUC_HXG_EVENT_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD 123 + 124 + /** 125 + * DOC: HXG Busy 126 + * 127 + * The `HXG Busy`_ message may be used to acknowledge reception of the `HXG Request`_ 128 + * message if the recipient expects that it processing will be longer than default 129 + * timeout. 130 + * 131 + * The @COUNTER field may be used as a progress indicator. 132 + * 133 + * +---+-------+--------------------------------------------------------------+ 134 + * | | Bits | Description | 135 + * +===+=======+==============================================================+ 136 + * | 0 | 31 | ORIGIN | 137 + * | +-------+--------------------------------------------------------------+ 138 + * | | 30:28 | TYPE = GUC_HXG_TYPE_NO_RESPONSE_BUSY_ | 139 + * | +-------+--------------------------------------------------------------+ 140 + * | | 27:0 | **COUNTER** - progress indicator | 141 + * +---+-------+--------------------------------------------------------------+ 142 + */ 143 + 144 + #define GUC_HXG_BUSY_MSG_LEN GUC_HXG_MSG_MIN_LEN 145 + #define GUC_HXG_BUSY_MSG_0_COUNTER GUC_HXG_MSG_0_AUX 146 + 147 + /** 148 + * DOC: HXG Retry 149 + * 150 + * The `HXG Retry`_ message should be used by recipient to indicate that the 151 + * `HXG Request`_ message was dropped and it should be resent again. 152 + * 153 + * The @REASON field may be used to provide additional information. 154 + * 155 + * +---+-------+--------------------------------------------------------------+ 156 + * | | Bits | Description | 157 + * +===+=======+==============================================================+ 158 + * | 0 | 31 | ORIGIN | 159 + * | +-------+--------------------------------------------------------------+ 160 + * | | 30:28 | TYPE = GUC_HXG_TYPE_NO_RESPONSE_RETRY_ | 161 + * | +-------+--------------------------------------------------------------+ 162 + * | | 27:0 | **REASON** - reason for retry | 163 + * | | | - _`GUC_HXG_RETRY_REASON_UNSPECIFIED` = 0 | 164 + * +---+-------+--------------------------------------------------------------+ 165 + */ 166 + 167 + #define GUC_HXG_RETRY_MSG_LEN GUC_HXG_MSG_MIN_LEN 168 + #define GUC_HXG_RETRY_MSG_0_REASON GUC_HXG_MSG_0_AUX 169 + #define GUC_HXG_RETRY_REASON_UNSPECIFIED 0u 170 + 171 + /** 172 + * DOC: HXG Failure 173 + * 174 + * The `HXG Failure`_ message shall be used as a reply to the `HXG Request`_ 175 + * message that could not be processed due to an error. 176 + * 177 + * +---+-------+--------------------------------------------------------------+ 178 + * | | Bits | Description | 179 + * +===+=======+==============================================================+ 180 + * | 0 | 31 | ORIGIN | 181 + * | +-------+--------------------------------------------------------------+ 182 + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_FAILURE_ | 183 + * | +-------+--------------------------------------------------------------+ 184 + * | | 27:16 | **HINT** - additional error hint | 185 + * | +-------+--------------------------------------------------------------+ 186 + * | | 15:0 | **ERROR** - error/result code | 187 + * +---+-------+--------------------------------------------------------------+ 188 + */ 189 + 190 + #define GUC_HXG_FAILURE_MSG_LEN GUC_HXG_MSG_MIN_LEN 191 + #define GUC_HXG_FAILURE_MSG_0_HINT (0xfff << 16) 192 + #define GUC_HXG_FAILURE_MSG_0_ERROR (0xffff << 0) 193 + 194 + /** 195 + * DOC: HXG Response 196 + * 197 + * The `HXG Response`_ message shall be used as a reply to the `HXG Request`_ 198 + * message that was successfully processed without an error. 199 + * 200 + * +---+-------+--------------------------------------------------------------+ 201 + * | | Bits | Description | 202 + * +===+=======+==============================================================+ 203 + * | 0 | 31 | ORIGIN | 204 + * | +-------+--------------------------------------------------------------+ 205 + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | 206 + * | +-------+--------------------------------------------------------------+ 207 + * | | 27:0 | **DATA0** - data (depends on ACTION from `HXG Request`_) | 208 + * +---+-------+--------------------------------------------------------------+ 209 + * | 1 | 31:0 | | 210 + * +---+-------+ | 211 + * |...| | **DATAn** - data (depends on ACTION from `HXG Request`_) | 212 + * +---+-------+ | 213 + * | n | 31:0 | | 214 + * +---+-------+--------------------------------------------------------------+ 215 + */ 216 + 217 + #define GUC_HXG_RESPONSE_MSG_MIN_LEN GUC_HXG_MSG_MIN_LEN 218 + #define GUC_HXG_RESPONSE_MSG_0_DATA0 GUC_HXG_MSG_0_AUX 219 + #define GUC_HXG_RESPONSE_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD 220 + 221 + /* deprecated */ 222 + #define INTEL_GUC_MSG_TYPE_SHIFT 28 223 + #define INTEL_GUC_MSG_TYPE_MASK (0xF << INTEL_GUC_MSG_TYPE_SHIFT) 224 + #define INTEL_GUC_MSG_DATA_SHIFT 16 225 + #define INTEL_GUC_MSG_DATA_MASK (0xFFF << INTEL_GUC_MSG_DATA_SHIFT) 226 + #define INTEL_GUC_MSG_CODE_SHIFT 0 227 + #define INTEL_GUC_MSG_CODE_MASK (0xFFFF << INTEL_GUC_MSG_CODE_SHIFT) 228 + 229 + enum intel_guc_msg_type { 230 + INTEL_GUC_MSG_TYPE_REQUEST = 0x0, 231 + INTEL_GUC_MSG_TYPE_RESPONSE = 0xF, 232 + }; 233 + 234 + #endif

+4

drivers/gpu/drm/xe/tests/Makefile

··· 1 + # SPDX-License-Identifier: GPL-2.0 2 + 3 + obj-$(CONFIG_DRM_XE_KUNIT_TEST) += xe_bo_test.o xe_dma_buf_test.o \ 4 + xe_migrate_test.o

+303

drivers/gpu/drm/xe/tests/xe_bo.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 AND MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include <kunit/test.h> 7 + 8 + #include "xe_bo_evict.h" 9 + #include "xe_pci.h" 10 + 11 + static int ccs_test_migrate(struct xe_gt *gt, struct xe_bo *bo, 12 + bool clear, u64 get_val, u64 assign_val, 13 + struct kunit *test) 14 + { 15 + struct dma_fence *fence; 16 + struct ttm_tt *ttm; 17 + struct page *page; 18 + pgoff_t ccs_page; 19 + long timeout; 20 + u64 *cpu_map; 21 + int ret; 22 + u32 offset; 23 + 24 + /* Move bo to VRAM if not already there. */ 25 + ret = xe_bo_validate(bo, NULL, false); 26 + if (ret) { 27 + KUNIT_FAIL(test, "Failed to validate bo.\n"); 28 + return ret; 29 + } 30 + 31 + /* Optionally clear bo *and* CCS data in VRAM. */ 32 + if (clear) { 33 + fence = xe_migrate_clear(gt->migrate, bo, bo->ttm.resource, 0); 34 + if (IS_ERR(fence)) { 35 + KUNIT_FAIL(test, "Failed to submit bo clear.\n"); 36 + return PTR_ERR(fence); 37 + } 38 + dma_fence_put(fence); 39 + } 40 + 41 + /* Evict to system. CCS data should be copied. */ 42 + ret = xe_bo_evict(bo, true); 43 + if (ret) { 44 + KUNIT_FAIL(test, "Failed to evict bo.\n"); 45 + return ret; 46 + } 47 + 48 + /* Sync all migration blits */ 49 + timeout = dma_resv_wait_timeout(bo->ttm.base.resv, 50 + DMA_RESV_USAGE_KERNEL, 51 + true, 52 + 5 * HZ); 53 + if (timeout <= 0) { 54 + KUNIT_FAIL(test, "Failed to sync bo eviction.\n"); 55 + return -ETIME; 56 + } 57 + 58 + /* 59 + * Bo with CCS data is now in system memory. Verify backing store 60 + * and data integrity. Then assign for the next testing round while 61 + * we still have a CPU map. 62 + */ 63 + ttm = bo->ttm.ttm; 64 + if (!ttm || !ttm_tt_is_populated(ttm)) { 65 + KUNIT_FAIL(test, "Bo was not in expected placement.\n"); 66 + return -EINVAL; 67 + } 68 + 69 + ccs_page = xe_bo_ccs_pages_start(bo) >> PAGE_SHIFT; 70 + if (ccs_page >= ttm->num_pages) { 71 + KUNIT_FAIL(test, "No TTM CCS pages present.\n"); 72 + return -EINVAL; 73 + } 74 + 75 + page = ttm->pages[ccs_page]; 76 + cpu_map = kmap_local_page(page); 77 + 78 + /* Check first CCS value */ 79 + if (cpu_map[0] != get_val) { 80 + KUNIT_FAIL(test, 81 + "Expected CCS readout 0x%016llx, got 0x%016llx.\n", 82 + (unsigned long long)get_val, 83 + (unsigned long long)cpu_map[0]); 84 + ret = -EINVAL; 85 + } 86 + 87 + /* Check last CCS value, or at least last value in page. */ 88 + offset = xe_device_ccs_bytes(gt->xe, bo->size); 89 + offset = min_t(u32, offset, PAGE_SIZE) / sizeof(u64) - 1; 90 + if (cpu_map[offset] != get_val) { 91 + KUNIT_FAIL(test, 92 + "Expected CCS readout 0x%016llx, got 0x%016llx.\n", 93 + (unsigned long long)get_val, 94 + (unsigned long long)cpu_map[offset]); 95 + ret = -EINVAL; 96 + } 97 + 98 + cpu_map[0] = assign_val; 99 + cpu_map[offset] = assign_val; 100 + kunmap_local(cpu_map); 101 + 102 + return ret; 103 + } 104 + 105 + static void ccs_test_run_gt(struct xe_device *xe, struct xe_gt *gt, 106 + struct kunit *test) 107 + { 108 + struct xe_bo *bo; 109 + u32 vram_bit; 110 + int ret; 111 + 112 + /* TODO: Sanity check */ 113 + vram_bit = XE_BO_CREATE_VRAM0_BIT << gt->info.vram_id; 114 + kunit_info(test, "Testing gt id %u vram id %u\n", gt->info.id, 115 + gt->info.vram_id); 116 + 117 + bo = xe_bo_create_locked(xe, NULL, NULL, SZ_1M, ttm_bo_type_device, 118 + vram_bit); 119 + if (IS_ERR(bo)) { 120 + KUNIT_FAIL(test, "Failed to create bo.\n"); 121 + return; 122 + } 123 + 124 + kunit_info(test, "Verifying that CCS data is cleared on creation.\n"); 125 + ret = ccs_test_migrate(gt, bo, false, 0ULL, 0xdeadbeefdeadbeefULL, 126 + test); 127 + if (ret) 128 + goto out_unlock; 129 + 130 + kunit_info(test, "Verifying that CCS data survives migration.\n"); 131 + ret = ccs_test_migrate(gt, bo, false, 0xdeadbeefdeadbeefULL, 132 + 0xdeadbeefdeadbeefULL, test); 133 + if (ret) 134 + goto out_unlock; 135 + 136 + kunit_info(test, "Verifying that CCS data can be properly cleared.\n"); 137 + ret = ccs_test_migrate(gt, bo, true, 0ULL, 0ULL, test); 138 + 139 + out_unlock: 140 + xe_bo_unlock_no_vm(bo); 141 + xe_bo_put(bo); 142 + } 143 + 144 + static int ccs_test_run_device(struct xe_device *xe) 145 + { 146 + struct kunit *test = xe_cur_kunit(); 147 + struct xe_gt *gt; 148 + int id; 149 + 150 + if (!xe_device_has_flat_ccs(xe)) { 151 + kunit_info(test, "Skipping non-flat-ccs device.\n"); 152 + return 0; 153 + } 154 + 155 + for_each_gt(gt, xe, id) 156 + ccs_test_run_gt(xe, gt, test); 157 + 158 + return 0; 159 + } 160 + 161 + void xe_ccs_migrate_kunit(struct kunit *test) 162 + { 163 + xe_call_for_each_device(ccs_test_run_device); 164 + } 165 + EXPORT_SYMBOL(xe_ccs_migrate_kunit); 166 + 167 + static int evict_test_run_gt(struct xe_device *xe, struct xe_gt *gt, struct kunit *test) 168 + { 169 + struct xe_bo *bo, *external; 170 + unsigned int bo_flags = XE_BO_CREATE_USER_BIT | 171 + XE_BO_CREATE_VRAM_IF_DGFX(gt); 172 + struct xe_vm *vm = xe_migrate_get_vm(xe->gt[0].migrate); 173 + struct ww_acquire_ctx ww; 174 + int err, i; 175 + 176 + kunit_info(test, "Testing device %s gt id %u vram id %u\n", 177 + dev_name(xe->drm.dev), gt->info.id, gt->info.vram_id); 178 + 179 + for (i = 0; i < 2; ++i) { 180 + xe_vm_lock(vm, &ww, 0, false); 181 + bo = xe_bo_create(xe, NULL, vm, 0x10000, ttm_bo_type_device, 182 + bo_flags); 183 + xe_vm_unlock(vm, &ww); 184 + if (IS_ERR(bo)) { 185 + KUNIT_FAIL(test, "bo create err=%pe\n", bo); 186 + break; 187 + } 188 + 189 + external = xe_bo_create(xe, NULL, NULL, 0x10000, 190 + ttm_bo_type_device, bo_flags); 191 + if (IS_ERR(external)) { 192 + KUNIT_FAIL(test, "external bo create err=%pe\n", external); 193 + goto cleanup_bo; 194 + } 195 + 196 + xe_bo_lock(external, &ww, 0, false); 197 + err = xe_bo_pin_external(external); 198 + xe_bo_unlock(external, &ww); 199 + if (err) { 200 + KUNIT_FAIL(test, "external bo pin err=%pe\n", 201 + ERR_PTR(err)); 202 + goto cleanup_external; 203 + } 204 + 205 + err = xe_bo_evict_all(xe); 206 + if (err) { 207 + KUNIT_FAIL(test, "evict err=%pe\n", ERR_PTR(err)); 208 + goto cleanup_all; 209 + } 210 + 211 + err = xe_bo_restore_kernel(xe); 212 + if (err) { 213 + KUNIT_FAIL(test, "restore kernel err=%pe\n", 214 + ERR_PTR(err)); 215 + goto cleanup_all; 216 + } 217 + 218 + err = xe_bo_restore_user(xe); 219 + if (err) { 220 + KUNIT_FAIL(test, "restore user err=%pe\n", ERR_PTR(err)); 221 + goto cleanup_all; 222 + } 223 + 224 + if (!xe_bo_is_vram(external)) { 225 + KUNIT_FAIL(test, "external bo is not vram\n"); 226 + err = -EPROTO; 227 + goto cleanup_all; 228 + } 229 + 230 + if (xe_bo_is_vram(bo)) { 231 + KUNIT_FAIL(test, "bo is vram\n"); 232 + err = -EPROTO; 233 + goto cleanup_all; 234 + } 235 + 236 + if (i) { 237 + down_read(&vm->lock); 238 + xe_vm_lock(vm, &ww, 0, false); 239 + err = xe_bo_validate(bo, bo->vm, false); 240 + xe_vm_unlock(vm, &ww); 241 + up_read(&vm->lock); 242 + if (err) { 243 + KUNIT_FAIL(test, "bo valid err=%pe\n", 244 + ERR_PTR(err)); 245 + goto cleanup_all; 246 + } 247 + xe_bo_lock(external, &ww, 0, false); 248 + err = xe_bo_validate(external, NULL, false); 249 + xe_bo_unlock(external, &ww); 250 + if (err) { 251 + KUNIT_FAIL(test, "external bo valid err=%pe\n", 252 + ERR_PTR(err)); 253 + goto cleanup_all; 254 + } 255 + } 256 + 257 + xe_bo_lock(external, &ww, 0, false); 258 + xe_bo_unpin_external(external); 259 + xe_bo_unlock(external, &ww); 260 + 261 + xe_bo_put(external); 262 + xe_bo_put(bo); 263 + continue; 264 + 265 + cleanup_all: 266 + xe_bo_lock(external, &ww, 0, false); 267 + xe_bo_unpin_external(external); 268 + xe_bo_unlock(external, &ww); 269 + cleanup_external: 270 + xe_bo_put(external); 271 + cleanup_bo: 272 + xe_bo_put(bo); 273 + break; 274 + } 275 + 276 + xe_vm_put(vm); 277 + 278 + return 0; 279 + } 280 + 281 + static int evict_test_run_device(struct xe_device *xe) 282 + { 283 + struct kunit *test = xe_cur_kunit(); 284 + struct xe_gt *gt; 285 + int id; 286 + 287 + if (!IS_DGFX(xe)) { 288 + kunit_info(test, "Skipping non-discrete device %s.\n", 289 + dev_name(xe->drm.dev)); 290 + return 0; 291 + } 292 + 293 + for_each_gt(gt, xe, id) 294 + evict_test_run_gt(xe, gt, test); 295 + 296 + return 0; 297 + } 298 + 299 + void xe_bo_evict_kunit(struct kunit *test) 300 + { 301 + xe_call_for_each_device(evict_test_run_device); 302 + } 303 + EXPORT_SYMBOL(xe_bo_evict_kunit);

+25

drivers/gpu/drm/xe/tests/xe_bo_test.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include <kunit/test.h> 7 + 8 + void xe_ccs_migrate_kunit(struct kunit *test); 9 + void xe_bo_evict_kunit(struct kunit *test); 10 + 11 + static struct kunit_case xe_bo_tests[] = { 12 + KUNIT_CASE(xe_ccs_migrate_kunit), 13 + KUNIT_CASE(xe_bo_evict_kunit), 14 + {} 15 + }; 16 + 17 + static struct kunit_suite xe_bo_test_suite = { 18 + .name = "xe_bo", 19 + .test_cases = xe_bo_tests, 20 + }; 21 + 22 + kunit_test_suite(xe_bo_test_suite); 23 + 24 + MODULE_AUTHOR("Intel Corporation"); 25 + MODULE_LICENSE("GPL");

+259

drivers/gpu/drm/xe/tests/xe_dma_buf.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 AND MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include <kunit/test.h> 7 + 8 + #include "xe_pci.h" 9 + 10 + static bool p2p_enabled(struct dma_buf_test_params *params) 11 + { 12 + return IS_ENABLED(CONFIG_PCI_P2PDMA) && params->attach_ops && 13 + params->attach_ops->allow_peer2peer; 14 + } 15 + 16 + static bool is_dynamic(struct dma_buf_test_params *params) 17 + { 18 + return IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY) && params->attach_ops && 19 + params->attach_ops->move_notify; 20 + } 21 + 22 + static void check_residency(struct kunit *test, struct xe_bo *exported, 23 + struct xe_bo *imported, struct dma_buf *dmabuf) 24 + { 25 + struct dma_buf_test_params *params = to_dma_buf_test_params(test->priv); 26 + u32 mem_type; 27 + int ret; 28 + 29 + xe_bo_assert_held(exported); 30 + xe_bo_assert_held(imported); 31 + 32 + mem_type = XE_PL_VRAM0; 33 + if (!(params->mem_mask & XE_BO_CREATE_VRAM0_BIT)) 34 + /* No VRAM allowed */ 35 + mem_type = XE_PL_TT; 36 + else if (params->force_different_devices && !p2p_enabled(params)) 37 + /* No P2P */ 38 + mem_type = XE_PL_TT; 39 + else if (params->force_different_devices && !is_dynamic(params) && 40 + (params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)) 41 + /* Pin migrated to TT */ 42 + mem_type = XE_PL_TT; 43 + 44 + if (!xe_bo_is_mem_type(exported, mem_type)) { 45 + KUNIT_FAIL(test, "Exported bo was not in expected memory type.\n"); 46 + return; 47 + } 48 + 49 + if (xe_bo_is_pinned(exported)) 50 + return; 51 + 52 + /* 53 + * Evict exporter. Note that the gem object dma_buf member isn't 54 + * set from xe_gem_prime_export(), and it's needed for the move_notify() 55 + * functionality, so hack that up here. Evicting the exported bo will 56 + * evict also the imported bo through the move_notify() functionality if 57 + * importer is on a different device. If they're on the same device, 58 + * the exporter and the importer should be the same bo. 59 + */ 60 + swap(exported->ttm.base.dma_buf, dmabuf); 61 + ret = xe_bo_evict(exported, true); 62 + swap(exported->ttm.base.dma_buf, dmabuf); 63 + if (ret) { 64 + if (ret != -EINTR && ret != -ERESTARTSYS) 65 + KUNIT_FAIL(test, "Evicting exporter failed with err=%d.\n", 66 + ret); 67 + return; 68 + } 69 + 70 + /* Verify that also importer has been evicted to SYSTEM */ 71 + if (!xe_bo_is_mem_type(imported, XE_PL_SYSTEM)) { 72 + KUNIT_FAIL(test, "Importer wasn't properly evicted.\n"); 73 + return; 74 + } 75 + 76 + /* Re-validate the importer. This should move also exporter in. */ 77 + ret = xe_bo_validate(imported, NULL, false); 78 + if (ret) { 79 + if (ret != -EINTR && ret != -ERESTARTSYS) 80 + KUNIT_FAIL(test, "Validating importer failed with err=%d.\n", 81 + ret); 82 + return; 83 + } 84 + 85 + /* 86 + * If on different devices, the exporter is kept in system if 87 + * possible, saving a migration step as the transfer is just 88 + * likely as fast from system memory. 89 + */ 90 + if (params->force_different_devices && 91 + params->mem_mask & XE_BO_CREATE_SYSTEM_BIT) 92 + KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, XE_PL_TT)); 93 + else 94 + KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, mem_type)); 95 + 96 + if (params->force_different_devices) 97 + KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(imported, XE_PL_TT)); 98 + else 99 + KUNIT_EXPECT_TRUE(test, exported == imported); 100 + } 101 + 102 + static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) 103 + { 104 + struct kunit *test = xe_cur_kunit(); 105 + struct dma_buf_test_params *params = to_dma_buf_test_params(test->priv); 106 + struct drm_gem_object *import; 107 + struct dma_buf *dmabuf; 108 + struct xe_bo *bo; 109 + 110 + /* No VRAM on this device? */ 111 + if (!ttm_manager_type(&xe->ttm, XE_PL_VRAM0) && 112 + (params->mem_mask & XE_BO_CREATE_VRAM0_BIT)) 113 + return; 114 + 115 + kunit_info(test, "running %s\n", __func__); 116 + bo = xe_bo_create(xe, NULL, NULL, PAGE_SIZE, ttm_bo_type_device, 117 + XE_BO_CREATE_USER_BIT | params->mem_mask); 118 + if (IS_ERR(bo)) { 119 + KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", 120 + PTR_ERR(bo)); 121 + return; 122 + } 123 + 124 + dmabuf = xe_gem_prime_export(&bo->ttm.base, 0); 125 + if (IS_ERR(dmabuf)) { 126 + KUNIT_FAIL(test, "xe_gem_prime_export() failed with err=%ld\n", 127 + PTR_ERR(dmabuf)); 128 + goto out; 129 + } 130 + 131 + import = xe_gem_prime_import(&xe->drm, dmabuf); 132 + if (!IS_ERR(import)) { 133 + struct xe_bo *import_bo = gem_to_xe_bo(import); 134 + 135 + /* 136 + * Did import succeed when it shouldn't due to lack of p2p support? 137 + */ 138 + if (params->force_different_devices && 139 + !p2p_enabled(params) && 140 + !(params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)) { 141 + KUNIT_FAIL(test, 142 + "xe_gem_prime_import() succeeded when it shouldn't have\n"); 143 + } else { 144 + int err; 145 + 146 + /* Is everything where we expect it to be? */ 147 + xe_bo_lock_no_vm(import_bo, NULL); 148 + err = xe_bo_validate(import_bo, NULL, false); 149 + if (err && err != -EINTR && err != -ERESTARTSYS) 150 + KUNIT_FAIL(test, 151 + "xe_bo_validate() failed with err=%d\n", err); 152 + 153 + check_residency(test, bo, import_bo, dmabuf); 154 + xe_bo_unlock_no_vm(import_bo); 155 + } 156 + drm_gem_object_put(import); 157 + } else if (PTR_ERR(import) != -EOPNOTSUPP) { 158 + /* Unexpected error code. */ 159 + KUNIT_FAIL(test, 160 + "xe_gem_prime_import failed with the wrong err=%ld\n", 161 + PTR_ERR(import)); 162 + } else if (!params->force_different_devices || 163 + p2p_enabled(params) || 164 + (params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)) { 165 + /* Shouldn't fail if we can reuse same bo, use p2p or use system */ 166 + KUNIT_FAIL(test, "dynamic p2p attachment failed with err=%ld\n", 167 + PTR_ERR(import)); 168 + } 169 + dma_buf_put(dmabuf); 170 + out: 171 + drm_gem_object_put(&bo->ttm.base); 172 + } 173 + 174 + static const struct dma_buf_attach_ops nop2p_attach_ops = { 175 + .allow_peer2peer = false, 176 + .move_notify = xe_dma_buf_move_notify 177 + }; 178 + 179 + /* 180 + * We test the implementation with bos of different residency and with 181 + * importers with different capabilities; some lacking p2p support and some 182 + * lacking dynamic capabilities (attach_ops == NULL). We also fake 183 + * different devices avoiding the import shortcut that just reuses the same 184 + * gem object. 185 + */ 186 + static const struct dma_buf_test_params test_params[] = { 187 + {.mem_mask = XE_BO_CREATE_VRAM0_BIT, 188 + .attach_ops = &xe_dma_buf_attach_ops}, 189 + {.mem_mask = XE_BO_CREATE_VRAM0_BIT, 190 + .attach_ops = &xe_dma_buf_attach_ops, 191 + .force_different_devices = true}, 192 + 193 + {.mem_mask = XE_BO_CREATE_VRAM0_BIT, 194 + .attach_ops = &nop2p_attach_ops}, 195 + {.mem_mask = XE_BO_CREATE_VRAM0_BIT, 196 + .attach_ops = &nop2p_attach_ops, 197 + .force_different_devices = true}, 198 + 199 + {.mem_mask = XE_BO_CREATE_VRAM0_BIT}, 200 + {.mem_mask = XE_BO_CREATE_VRAM0_BIT, 201 + .force_different_devices = true}, 202 + 203 + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT, 204 + .attach_ops = &xe_dma_buf_attach_ops}, 205 + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT, 206 + .attach_ops = &xe_dma_buf_attach_ops, 207 + .force_different_devices = true}, 208 + 209 + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT, 210 + .attach_ops = &nop2p_attach_ops}, 211 + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT, 212 + .attach_ops = &nop2p_attach_ops, 213 + .force_different_devices = true}, 214 + 215 + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT}, 216 + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT, 217 + .force_different_devices = true}, 218 + 219 + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT, 220 + .attach_ops = &xe_dma_buf_attach_ops}, 221 + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT, 222 + .attach_ops = &xe_dma_buf_attach_ops, 223 + .force_different_devices = true}, 224 + 225 + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT, 226 + .attach_ops = &nop2p_attach_ops}, 227 + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT, 228 + .attach_ops = &nop2p_attach_ops, 229 + .force_different_devices = true}, 230 + 231 + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT}, 232 + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT, 233 + .force_different_devices = true}, 234 + 235 + {} 236 + }; 237 + 238 + static int dma_buf_run_device(struct xe_device *xe) 239 + { 240 + const struct dma_buf_test_params *params; 241 + struct kunit *test = xe_cur_kunit(); 242 + 243 + for (params = test_params; params->mem_mask; ++params) { 244 + struct dma_buf_test_params p = *params; 245 + 246 + p.base.id = XE_TEST_LIVE_DMA_BUF; 247 + test->priv = &p; 248 + xe_test_dmabuf_import_same_driver(xe); 249 + } 250 + 251 + /* A non-zero return would halt iteration over driver devices */ 252 + return 0; 253 + } 254 + 255 + void xe_dma_buf_kunit(struct kunit *test) 256 + { 257 + xe_call_for_each_device(dma_buf_run_device); 258 + } 259 + EXPORT_SYMBOL(xe_dma_buf_kunit);

+23

drivers/gpu/drm/xe/tests/xe_dma_buf_test.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include <kunit/test.h> 7 + 8 + void xe_dma_buf_kunit(struct kunit *test); 9 + 10 + static struct kunit_case xe_dma_buf_tests[] = { 11 + KUNIT_CASE(xe_dma_buf_kunit), 12 + {} 13 + }; 14 + 15 + static struct kunit_suite xe_dma_buf_test_suite = { 16 + .name = "xe_dma_buf", 17 + .test_cases = xe_dma_buf_tests, 18 + }; 19 + 20 + kunit_test_suite(xe_dma_buf_test_suite); 21 + 22 + MODULE_AUTHOR("Intel Corporation"); 23 + MODULE_LICENSE("GPL");

+378

drivers/gpu/drm/xe/tests/xe_migrate.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2020-2022 Intel Corporation 4 + */ 5 + 6 + #include <kunit/test.h> 7 + 8 + #include "xe_pci.h" 9 + 10 + static bool sanity_fence_failed(struct xe_device *xe, struct dma_fence *fence, 11 + const char *str, struct kunit *test) 12 + { 13 + long ret; 14 + 15 + if (IS_ERR(fence)) { 16 + KUNIT_FAIL(test, "Failed to create fence for %s: %li\n", str, 17 + PTR_ERR(fence)); 18 + return true; 19 + } 20 + if (!fence) 21 + return true; 22 + 23 + ret = dma_fence_wait_timeout(fence, false, 5 * HZ); 24 + if (ret <= 0) { 25 + KUNIT_FAIL(test, "Fence timed out for %s: %li\n", str, ret); 26 + return true; 27 + } 28 + 29 + return false; 30 + } 31 + 32 + static int run_sanity_job(struct xe_migrate *m, struct xe_device *xe, 33 + struct xe_bb *bb, u32 second_idx, const char *str, 34 + struct kunit *test) 35 + { 36 + struct xe_sched_job *job = xe_bb_create_migration_job(m->eng, bb, 37 + m->batch_base_ofs, 38 + second_idx); 39 + struct dma_fence *fence; 40 + 41 + if (IS_ERR(job)) { 42 + KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n", 43 + PTR_ERR(job)); 44 + return PTR_ERR(job); 45 + } 46 + 47 + xe_sched_job_arm(job); 48 + fence = dma_fence_get(&job->drm.s_fence->finished); 49 + xe_sched_job_push(job); 50 + 51 + if (sanity_fence_failed(xe, fence, str, test)) 52 + return -ETIMEDOUT; 53 + 54 + dma_fence_put(fence); 55 + kunit_info(test, "%s: Job completed\n", str); 56 + return 0; 57 + } 58 + 59 + static void 60 + sanity_populate_cb(struct xe_migrate_pt_update *pt_update, 61 + struct xe_gt *gt, struct iosys_map *map, void *dst, 62 + u32 qword_ofs, u32 num_qwords, 63 + const struct xe_vm_pgtable_update *update) 64 + { 65 + int i; 66 + u64 *ptr = dst; 67 + 68 + for (i = 0; i < num_qwords; i++) 69 + ptr[i] = (qword_ofs + i - update->ofs) * 0x1111111111111111ULL; 70 + } 71 + 72 + static const struct xe_migrate_pt_update_ops sanity_ops = { 73 + .populate = sanity_populate_cb, 74 + }; 75 + 76 + #define check(_retval, _expected, str, _test) \ 77 + do { if ((_retval) != (_expected)) { \ 78 + KUNIT_FAIL(_test, "Sanity check failed: " str \ 79 + " expected %llx, got %llx\n", \ 80 + (u64)(_expected), (u64)(_retval)); \ 81 + } } while (0) 82 + 83 + static void test_copy(struct xe_migrate *m, struct xe_bo *bo, 84 + struct kunit *test) 85 + { 86 + struct xe_device *xe = gt_to_xe(m->gt); 87 + u64 retval, expected = 0xc0c0c0c0c0c0c0c0ULL; 88 + bool big = bo->size >= SZ_2M; 89 + struct dma_fence *fence; 90 + const char *str = big ? "Copying big bo" : "Copying small bo"; 91 + int err; 92 + 93 + struct xe_bo *sysmem = xe_bo_create_locked(xe, m->gt, NULL, 94 + bo->size, 95 + ttm_bo_type_kernel, 96 + XE_BO_CREATE_SYSTEM_BIT); 97 + if (IS_ERR(sysmem)) { 98 + KUNIT_FAIL(test, "Failed to allocate sysmem bo for %s: %li\n", 99 + str, PTR_ERR(sysmem)); 100 + return; 101 + } 102 + 103 + err = xe_bo_validate(sysmem, NULL, false); 104 + if (err) { 105 + KUNIT_FAIL(test, "Failed to validate system bo for %s: %li\n", 106 + str, err); 107 + goto out_unlock; 108 + } 109 + 110 + err = xe_bo_vmap(sysmem); 111 + if (err) { 112 + KUNIT_FAIL(test, "Failed to vmap system bo for %s: %li\n", 113 + str, err); 114 + goto out_unlock; 115 + } 116 + 117 + xe_map_memset(xe, &sysmem->vmap, 0, 0xd0, sysmem->size); 118 + fence = xe_migrate_clear(m, sysmem, sysmem->ttm.resource, 0xc0c0c0c0); 119 + if (!sanity_fence_failed(xe, fence, big ? "Clearing sysmem big bo" : 120 + "Clearing sysmem small bo", test)) { 121 + retval = xe_map_rd(xe, &sysmem->vmap, 0, u64); 122 + check(retval, expected, "sysmem first offset should be cleared", 123 + test); 124 + retval = xe_map_rd(xe, &sysmem->vmap, sysmem->size - 8, u64); 125 + check(retval, expected, "sysmem last offset should be cleared", 126 + test); 127 + } 128 + dma_fence_put(fence); 129 + 130 + /* Try to copy 0xc0 from sysmem to lmem with 2MB or 64KiB/4KiB pages */ 131 + xe_map_memset(xe, &sysmem->vmap, 0, 0xc0, sysmem->size); 132 + xe_map_memset(xe, &bo->vmap, 0, 0xd0, bo->size); 133 + 134 + fence = xe_migrate_copy(m, sysmem, sysmem->ttm.resource, 135 + bo->ttm.resource); 136 + if (!sanity_fence_failed(xe, fence, big ? "Copying big bo sysmem -> vram" : 137 + "Copying small bo sysmem -> vram", test)) { 138 + retval = xe_map_rd(xe, &bo->vmap, 0, u64); 139 + check(retval, expected, 140 + "sysmem -> vram bo first offset should be copied", test); 141 + retval = xe_map_rd(xe, &bo->vmap, bo->size - 8, u64); 142 + check(retval, expected, 143 + "sysmem -> vram bo offset should be copied", test); 144 + } 145 + dma_fence_put(fence); 146 + 147 + /* And other way around.. slightly hacky.. */ 148 + xe_map_memset(xe, &sysmem->vmap, 0, 0xd0, sysmem->size); 149 + xe_map_memset(xe, &bo->vmap, 0, 0xc0, bo->size); 150 + 151 + fence = xe_migrate_copy(m, sysmem, bo->ttm.resource, 152 + sysmem->ttm.resource); 153 + if (!sanity_fence_failed(xe, fence, big ? "Copying big bo vram -> sysmem" : 154 + "Copying small bo vram -> sysmem", test)) { 155 + retval = xe_map_rd(xe, &sysmem->vmap, 0, u64); 156 + check(retval, expected, 157 + "vram -> sysmem bo first offset should be copied", test); 158 + retval = xe_map_rd(xe, &sysmem->vmap, bo->size - 8, u64); 159 + check(retval, expected, 160 + "vram -> sysmem bo last offset should be copied", test); 161 + } 162 + dma_fence_put(fence); 163 + 164 + xe_bo_vunmap(sysmem); 165 + out_unlock: 166 + xe_bo_unlock_no_vm(sysmem); 167 + xe_bo_put(sysmem); 168 + } 169 + 170 + static void test_pt_update(struct xe_migrate *m, struct xe_bo *pt, 171 + struct kunit *test) 172 + { 173 + struct xe_device *xe = gt_to_xe(m->gt); 174 + struct dma_fence *fence; 175 + u64 retval, expected; 176 + int i; 177 + 178 + struct xe_vm_pgtable_update update = { 179 + .ofs = 1, 180 + .qwords = 0x10, 181 + .pt_bo = pt, 182 + }; 183 + struct xe_migrate_pt_update pt_update = { 184 + .ops = &sanity_ops, 185 + }; 186 + 187 + /* Test xe_migrate_update_pgtables() updates the pagetable as expected */ 188 + expected = 0xf0f0f0f0f0f0f0f0ULL; 189 + xe_map_memset(xe, &pt->vmap, 0, (u8)expected, pt->size); 190 + 191 + fence = xe_migrate_update_pgtables(m, NULL, NULL, m->eng, &update, 1, 192 + NULL, 0, &pt_update); 193 + if (sanity_fence_failed(xe, fence, "Migration pagetable update", test)) 194 + return; 195 + 196 + dma_fence_put(fence); 197 + retval = xe_map_rd(xe, &pt->vmap, 0, u64); 198 + check(retval, expected, "PTE[0] must stay untouched", test); 199 + 200 + for (i = 0; i < update.qwords; i++) { 201 + retval = xe_map_rd(xe, &pt->vmap, (update.ofs + i) * 8, u64); 202 + check(retval, i * 0x1111111111111111ULL, "PTE update", test); 203 + } 204 + 205 + retval = xe_map_rd(xe, &pt->vmap, 8 * (update.ofs + update.qwords), 206 + u64); 207 + check(retval, expected, "PTE[0x11] must stay untouched", test); 208 + } 209 + 210 + static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) 211 + { 212 + struct xe_gt *gt = m->gt; 213 + struct xe_device *xe = gt_to_xe(gt); 214 + struct xe_bo *pt, *bo = m->pt_bo, *big, *tiny; 215 + struct xe_res_cursor src_it; 216 + struct dma_fence *fence; 217 + u64 retval, expected; 218 + struct xe_bb *bb; 219 + int err; 220 + u8 id = gt->info.id; 221 + 222 + err = xe_bo_vmap(bo); 223 + if (err) { 224 + KUNIT_FAIL(test, "Failed to vmap our pagetables: %li\n", 225 + PTR_ERR(bo)); 226 + return; 227 + } 228 + 229 + big = xe_bo_create_pin_map(xe, m->gt, m->eng->vm, SZ_4M, 230 + ttm_bo_type_kernel, 231 + XE_BO_CREATE_VRAM_IF_DGFX(m->gt) | 232 + XE_BO_CREATE_PINNED_BIT); 233 + if (IS_ERR(big)) { 234 + KUNIT_FAIL(test, "Failed to allocate bo: %li\n", PTR_ERR(big)); 235 + goto vunmap; 236 + } 237 + 238 + pt = xe_bo_create_pin_map(xe, m->gt, m->eng->vm, GEN8_PAGE_SIZE, 239 + ttm_bo_type_kernel, 240 + XE_BO_CREATE_VRAM_IF_DGFX(m->gt) | 241 + XE_BO_CREATE_PINNED_BIT); 242 + if (IS_ERR(pt)) { 243 + KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n", 244 + PTR_ERR(pt)); 245 + goto free_big; 246 + } 247 + 248 + tiny = xe_bo_create_pin_map(xe, m->gt, m->eng->vm, 249 + 2 * SZ_4K, 250 + ttm_bo_type_kernel, 251 + XE_BO_CREATE_VRAM_IF_DGFX(m->gt) | 252 + XE_BO_CREATE_PINNED_BIT); 253 + if (IS_ERR(tiny)) { 254 + KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n", 255 + PTR_ERR(pt)); 256 + goto free_pt; 257 + } 258 + 259 + bb = xe_bb_new(m->gt, 32, xe->info.supports_usm); 260 + if (IS_ERR(bb)) { 261 + KUNIT_FAIL(test, "Failed to create batchbuffer: %li\n", 262 + PTR_ERR(bb)); 263 + goto free_tiny; 264 + } 265 + 266 + kunit_info(test, "Starting tests, top level PT addr: %llx, special pagetable base addr: %llx\n", 267 + xe_bo_main_addr(m->eng->vm->pt_root[id]->bo, GEN8_PAGE_SIZE), 268 + xe_bo_main_addr(m->pt_bo, GEN8_PAGE_SIZE)); 269 + 270 + /* First part of the test, are we updating our pagetable bo with a new entry? */ 271 + xe_map_wr(xe, &bo->vmap, GEN8_PAGE_SIZE * (NUM_KERNEL_PDE - 1), u64, 0xdeaddeadbeefbeef); 272 + expected = gen8_pte_encode(NULL, pt, 0, XE_CACHE_WB, 0, 0); 273 + if (m->eng->vm->flags & XE_VM_FLAGS_64K) 274 + expected |= GEN12_PTE_PS64; 275 + xe_res_first(pt->ttm.resource, 0, pt->size, &src_it); 276 + emit_pte(m, bb, NUM_KERNEL_PDE - 1, xe_bo_is_vram(pt), 277 + &src_it, GEN8_PAGE_SIZE, pt); 278 + run_sanity_job(m, xe, bb, bb->len, "Writing PTE for our fake PT", test); 279 + 280 + retval = xe_map_rd(xe, &bo->vmap, GEN8_PAGE_SIZE * (NUM_KERNEL_PDE - 1), 281 + u64); 282 + check(retval, expected, "PTE entry write", test); 283 + 284 + /* Now try to write data to our newly mapped 'pagetable', see if it succeeds */ 285 + bb->len = 0; 286 + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; 287 + xe_map_wr(xe, &pt->vmap, 0, u32, 0xdeaddead); 288 + expected = 0x12345678U; 289 + 290 + emit_clear(m->gt, bb, xe_migrate_vm_addr(NUM_KERNEL_PDE - 1, 0), 4, 4, 291 + expected, IS_DGFX(xe)); 292 + run_sanity_job(m, xe, bb, 1, "Writing to our newly mapped pagetable", 293 + test); 294 + 295 + retval = xe_map_rd(xe, &pt->vmap, 0, u32); 296 + check(retval, expected, "Write to PT after adding PTE", test); 297 + 298 + /* Sanity checks passed, try the full ones! */ 299 + 300 + /* Clear a small bo */ 301 + kunit_info(test, "Clearing small buffer object\n"); 302 + xe_map_memset(xe, &tiny->vmap, 0, 0x22, tiny->size); 303 + expected = 0x224488ff; 304 + fence = xe_migrate_clear(m, tiny, tiny->ttm.resource, expected); 305 + if (sanity_fence_failed(xe, fence, "Clearing small bo", test)) 306 + goto out; 307 + 308 + dma_fence_put(fence); 309 + retval = xe_map_rd(xe, &tiny->vmap, 0, u32); 310 + check(retval, expected, "Command clear small first value", test); 311 + retval = xe_map_rd(xe, &tiny->vmap, tiny->size - 4, u32); 312 + check(retval, expected, "Command clear small last value", test); 313 + 314 + if (IS_DGFX(xe)) { 315 + kunit_info(test, "Copying small buffer object to system\n"); 316 + test_copy(m, tiny, test); 317 + } 318 + 319 + /* Clear a big bo with a fixed value */ 320 + kunit_info(test, "Clearing big buffer object\n"); 321 + xe_map_memset(xe, &big->vmap, 0, 0x11, big->size); 322 + expected = 0x11223344U; 323 + fence = xe_migrate_clear(m, big, big->ttm.resource, expected); 324 + if (sanity_fence_failed(xe, fence, "Clearing big bo", test)) 325 + goto out; 326 + 327 + dma_fence_put(fence); 328 + retval = xe_map_rd(xe, &big->vmap, 0, u32); 329 + check(retval, expected, "Command clear big first value", test); 330 + retval = xe_map_rd(xe, &big->vmap, big->size - 4, u32); 331 + check(retval, expected, "Command clear big last value", test); 332 + 333 + if (IS_DGFX(xe)) { 334 + kunit_info(test, "Copying big buffer object to system\n"); 335 + test_copy(m, big, test); 336 + } 337 + 338 + test_pt_update(m, pt, test); 339 + 340 + out: 341 + xe_bb_free(bb, NULL); 342 + free_tiny: 343 + xe_bo_unpin(tiny); 344 + xe_bo_put(tiny); 345 + free_pt: 346 + xe_bo_unpin(pt); 347 + xe_bo_put(pt); 348 + free_big: 349 + xe_bo_unpin(big); 350 + xe_bo_put(big); 351 + vunmap: 352 + xe_bo_vunmap(m->pt_bo); 353 + } 354 + 355 + static int migrate_test_run_device(struct xe_device *xe) 356 + { 357 + struct kunit *test = xe_cur_kunit(); 358 + struct xe_gt *gt; 359 + int id; 360 + 361 + for_each_gt(gt, xe, id) { 362 + struct xe_migrate *m = gt->migrate; 363 + struct ww_acquire_ctx ww; 364 + 365 + kunit_info(test, "Testing gt id %d.\n", id); 366 + xe_vm_lock(m->eng->vm, &ww, 0, true); 367 + xe_migrate_sanity_test(m, test); 368 + xe_vm_unlock(m->eng->vm, &ww); 369 + } 370 + 371 + return 0; 372 + } 373 + 374 + void xe_migrate_sanity_kunit(struct kunit *test) 375 + { 376 + xe_call_for_each_device(migrate_test_run_device); 377 + } 378 + EXPORT_SYMBOL(xe_migrate_sanity_kunit);

+23

drivers/gpu/drm/xe/tests/xe_migrate_test.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include <kunit/test.h> 7 + 8 + void xe_migrate_sanity_kunit(struct kunit *test); 9 + 10 + static struct kunit_case xe_migrate_tests[] = { 11 + KUNIT_CASE(xe_migrate_sanity_kunit), 12 + {} 13 + }; 14 + 15 + static struct kunit_suite xe_migrate_test_suite = { 16 + .name = "xe_migrate", 17 + .test_cases = xe_migrate_tests, 18 + }; 19 + 20 + kunit_test_suite(xe_migrate_test_suite); 21 + 22 + MODULE_AUTHOR("Intel Corporation"); 23 + MODULE_LICENSE("GPL");

+66

drivers/gpu/drm/xe/tests/xe_test.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 AND MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef __XE_TEST_H__ 7 + #define __XE_TEST_H__ 8 + 9 + #include <linux/types.h> 10 + 11 + #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) 12 + #include <linux/sched.h> 13 + #include <kunit/test.h> 14 + 15 + /* 16 + * Each test that provides a kunit private test structure, place a test id 17 + * here and point the kunit->priv to an embedded struct xe_test_priv. 18 + */ 19 + enum xe_test_priv_id { 20 + XE_TEST_LIVE_DMA_BUF, 21 + }; 22 + 23 + /** 24 + * struct xe_test_priv - Base class for test private info 25 + * @id: enum xe_test_priv_id to identify the subclass. 26 + */ 27 + struct xe_test_priv { 28 + enum xe_test_priv_id id; 29 + }; 30 + 31 + #define XE_TEST_DECLARE(x) x 32 + #define XE_TEST_ONLY(x) unlikely(x) 33 + #define XE_TEST_EXPORT 34 + #define xe_cur_kunit() current->kunit_test 35 + 36 + /** 37 + * xe_cur_kunit_priv - Obtain the struct xe_test_priv pointed to by 38 + * current->kunit->priv if it exists and is embedded in the expected subclass. 39 + * @id: Id of the expected subclass. 40 + * 41 + * Return: NULL if the process is not a kunit test, and NULL if the 42 + * current kunit->priv pointer is not pointing to an object of the expected 43 + * subclass. A pointer to the embedded struct xe_test_priv otherwise. 44 + */ 45 + static inline struct xe_test_priv * 46 + xe_cur_kunit_priv(enum xe_test_priv_id id) 47 + { 48 + struct xe_test_priv *priv; 49 + 50 + if (!xe_cur_kunit()) 51 + return NULL; 52 + 53 + priv = xe_cur_kunit()->priv; 54 + return priv->id == id ? priv : NULL; 55 + } 56 + 57 + #else /* if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) */ 58 + 59 + #define XE_TEST_DECLARE(x) 60 + #define XE_TEST_ONLY(x) 0 61 + #define XE_TEST_EXPORT static 62 + #define xe_cur_kunit() NULL 63 + #define xe_cur_kunit_priv(_id) NULL 64 + 65 + #endif 66 + #endif

+97

drivers/gpu/drm/xe/xe_bb.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include "xe_bb.h" 7 + #include "xe_sa.h" 8 + #include "xe_device.h" 9 + #include "xe_engine_types.h" 10 + #include "xe_hw_fence.h" 11 + #include "xe_sched_job.h" 12 + #include "xe_vm_types.h" 13 + 14 + #include "gt/intel_gpu_commands.h" 15 + 16 + struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm) 17 + { 18 + struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL); 19 + int err; 20 + 21 + if (!bb) 22 + return ERR_PTR(-ENOMEM); 23 + 24 + bb->bo = xe_sa_bo_new(!usm ? &gt->kernel_bb_pool : 25 + &gt->usm.bb_pool, 4 * dwords + 4); 26 + if (IS_ERR(bb->bo)) { 27 + err = PTR_ERR(bb->bo); 28 + goto err; 29 + } 30 + 31 + bb->cs = xe_sa_bo_cpu_addr(bb->bo); 32 + bb->len = 0; 33 + 34 + return bb; 35 + err: 36 + kfree(bb); 37 + return ERR_PTR(err); 38 + } 39 + 40 + static struct xe_sched_job * 41 + __xe_bb_create_job(struct xe_engine *kernel_eng, struct xe_bb *bb, u64 *addr) 42 + { 43 + u32 size = drm_suballoc_size(bb->bo); 44 + 45 + XE_BUG_ON((bb->len * 4 + 1) > size); 46 + 47 + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; 48 + 49 + xe_sa_bo_flush_write(bb->bo); 50 + 51 + return xe_sched_job_create(kernel_eng, addr); 52 + } 53 + 54 + struct xe_sched_job *xe_bb_create_wa_job(struct xe_engine *wa_eng, 55 + struct xe_bb *bb, u64 batch_base_ofs) 56 + { 57 + u64 addr = batch_base_ofs + drm_suballoc_soffset(bb->bo); 58 + 59 + XE_BUG_ON(!(wa_eng->vm->flags & XE_VM_FLAG_MIGRATION)); 60 + 61 + return __xe_bb_create_job(wa_eng, bb, &addr); 62 + } 63 + 64 + struct xe_sched_job *xe_bb_create_migration_job(struct xe_engine *kernel_eng, 65 + struct xe_bb *bb, 66 + u64 batch_base_ofs, 67 + u32 second_idx) 68 + { 69 + u64 addr[2] = { 70 + batch_base_ofs + drm_suballoc_soffset(bb->bo), 71 + batch_base_ofs + drm_suballoc_soffset(bb->bo) + 72 + 4 * second_idx, 73 + }; 74 + 75 + BUG_ON(second_idx > bb->len); 76 + BUG_ON(!(kernel_eng->vm->flags & XE_VM_FLAG_MIGRATION)); 77 + 78 + return __xe_bb_create_job(kernel_eng, bb, addr); 79 + } 80 + 81 + struct xe_sched_job *xe_bb_create_job(struct xe_engine *kernel_eng, 82 + struct xe_bb *bb) 83 + { 84 + u64 addr = xe_sa_bo_gpu_addr(bb->bo); 85 + 86 + BUG_ON(kernel_eng->vm && kernel_eng->vm->flags & XE_VM_FLAG_MIGRATION); 87 + return __xe_bb_create_job(kernel_eng, bb, &addr); 88 + } 89 + 90 + void xe_bb_free(struct xe_bb *bb, struct dma_fence *fence) 91 + { 92 + if (!bb) 93 + return; 94 + 95 + xe_sa_bo_free(bb->bo, fence); 96 + kfree(bb); 97 + }

+27

drivers/gpu/drm/xe/xe_bb.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_BB_H_ 7 + #define _XE_BB_H_ 8 + 9 + #include "xe_bb_types.h" 10 + 11 + struct dma_fence; 12 + 13 + struct xe_gt; 14 + struct xe_engine; 15 + struct xe_sched_job; 16 + 17 + struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 size, bool usm); 18 + struct xe_sched_job *xe_bb_create_job(struct xe_engine *kernel_eng, 19 + struct xe_bb *bb); 20 + struct xe_sched_job *xe_bb_create_migration_job(struct xe_engine *kernel_eng, 21 + struct xe_bb *bb, u64 batch_ofs, 22 + u32 second_idx); 23 + struct xe_sched_job *xe_bb_create_wa_job(struct xe_engine *wa_eng, 24 + struct xe_bb *bb, u64 batch_ofs); 25 + void xe_bb_free(struct xe_bb *bb, struct dma_fence *fence); 26 + 27 + #endif

+20

drivers/gpu/drm/xe/xe_bb_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_BB_TYPES_H_ 7 + #define _XE_BB_TYPES_H_ 8 + 9 + #include <linux/types.h> 10 + 11 + struct drm_suballoc; 12 + 13 + struct xe_bb { 14 + struct drm_suballoc *bo; 15 + 16 + u32 *cs; 17 + u32 len; /* in dwords */ 18 + }; 19 + 20 + #endif

+1698

drivers/gpu/drm/xe/xe_bo.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2021 Intel Corporation 4 + */ 5 + 6 + 7 + #include "xe_bo.h" 8 + 9 + #include <linux/dma-buf.h> 10 + 11 + #include <drm/drm_drv.h> 12 + #include <drm/drm_gem_ttm_helper.h> 13 + #include <drm/ttm/ttm_device.h> 14 + #include <drm/ttm/ttm_placement.h> 15 + #include <drm/ttm/ttm_tt.h> 16 + #include <drm/xe_drm.h> 17 + 18 + #include "xe_device.h" 19 + #include "xe_dma_buf.h" 20 + #include "xe_ggtt.h" 21 + #include "xe_gt.h" 22 + #include "xe_map.h" 23 + #include "xe_migrate.h" 24 + #include "xe_preempt_fence.h" 25 + #include "xe_res_cursor.h" 26 + #include "xe_trace.h" 27 + #include "xe_vm.h" 28 + 29 + static const struct ttm_place sys_placement_flags = { 30 + .fpfn = 0, 31 + .lpfn = 0, 32 + .mem_type = XE_PL_SYSTEM, 33 + .flags = 0, 34 + }; 35 + 36 + static struct ttm_placement sys_placement = { 37 + .num_placement = 1, 38 + .placement = &sys_placement_flags, 39 + .num_busy_placement = 1, 40 + .busy_placement = &sys_placement_flags, 41 + }; 42 + 43 + bool mem_type_is_vram(u32 mem_type) 44 + { 45 + return mem_type >= XE_PL_VRAM0; 46 + } 47 + 48 + static bool resource_is_vram(struct ttm_resource *res) 49 + { 50 + return mem_type_is_vram(res->mem_type); 51 + } 52 + 53 + bool xe_bo_is_vram(struct xe_bo *bo) 54 + { 55 + return resource_is_vram(bo->ttm.resource); 56 + } 57 + 58 + static bool xe_bo_is_user(struct xe_bo *bo) 59 + { 60 + return bo->flags & XE_BO_CREATE_USER_BIT; 61 + } 62 + 63 + static struct xe_gt * 64 + mem_type_to_gt(struct xe_device *xe, u32 mem_type) 65 + { 66 + XE_BUG_ON(!mem_type_is_vram(mem_type)); 67 + 68 + return xe_device_get_gt(xe, mem_type - XE_PL_VRAM0); 69 + } 70 + 71 + static void try_add_system(struct xe_bo *bo, struct ttm_place *places, 72 + u32 bo_flags, u32 *c) 73 + { 74 + if (bo_flags & XE_BO_CREATE_SYSTEM_BIT) { 75 + places[*c] = (struct ttm_place) { 76 + .mem_type = XE_PL_TT, 77 + }; 78 + *c += 1; 79 + 80 + if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID) 81 + bo->props.preferred_mem_type = XE_PL_TT; 82 + } 83 + } 84 + 85 + static void try_add_vram0(struct xe_device *xe, struct xe_bo *bo, 86 + struct ttm_place *places, u32 bo_flags, u32 *c) 87 + { 88 + struct xe_gt *gt; 89 + 90 + if (bo_flags & XE_BO_CREATE_VRAM0_BIT) { 91 + gt = mem_type_to_gt(xe, XE_PL_VRAM0); 92 + XE_BUG_ON(!gt->mem.vram.size); 93 + 94 + places[*c] = (struct ttm_place) { 95 + .mem_type = XE_PL_VRAM0, 96 + /* 97 + * For eviction / restore on suspend / resume objects 98 + * pinned in VRAM must be contiguous 99 + */ 100 + .flags = bo_flags & (XE_BO_CREATE_PINNED_BIT | 101 + XE_BO_CREATE_GGTT_BIT) ? 102 + TTM_PL_FLAG_CONTIGUOUS : 0, 103 + }; 104 + *c += 1; 105 + 106 + if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID) 107 + bo->props.preferred_mem_type = XE_PL_VRAM0; 108 + } 109 + } 110 + 111 + static void try_add_vram1(struct xe_device *xe, struct xe_bo *bo, 112 + struct ttm_place *places, u32 bo_flags, u32 *c) 113 + { 114 + struct xe_gt *gt; 115 + 116 + if (bo_flags & XE_BO_CREATE_VRAM1_BIT) { 117 + gt = mem_type_to_gt(xe, XE_PL_VRAM1); 118 + XE_BUG_ON(!gt->mem.vram.size); 119 + 120 + places[*c] = (struct ttm_place) { 121 + .mem_type = XE_PL_VRAM1, 122 + /* 123 + * For eviction / restore on suspend / resume objects 124 + * pinned in VRAM must be contiguous 125 + */ 126 + .flags = bo_flags & (XE_BO_CREATE_PINNED_BIT | 127 + XE_BO_CREATE_GGTT_BIT) ? 128 + TTM_PL_FLAG_CONTIGUOUS : 0, 129 + }; 130 + *c += 1; 131 + 132 + if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID) 133 + bo->props.preferred_mem_type = XE_PL_VRAM1; 134 + } 135 + } 136 + 137 + static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, 138 + u32 bo_flags) 139 + { 140 + struct ttm_place *places = bo->placements; 141 + u32 c = 0; 142 + 143 + bo->props.preferred_mem_type = XE_BO_PROPS_INVALID; 144 + 145 + /* The order of placements should indicate preferred location */ 146 + 147 + if (bo->props.preferred_mem_class == XE_MEM_REGION_CLASS_SYSMEM) { 148 + try_add_system(bo, places, bo_flags, &c); 149 + if (bo->props.preferred_gt == XE_GT1) { 150 + try_add_vram1(xe, bo, places, bo_flags, &c); 151 + try_add_vram0(xe, bo, places, bo_flags, &c); 152 + } else { 153 + try_add_vram0(xe, bo, places, bo_flags, &c); 154 + try_add_vram1(xe, bo, places, bo_flags, &c); 155 + } 156 + } else if (bo->props.preferred_gt == XE_GT1) { 157 + try_add_vram1(xe, bo, places, bo_flags, &c); 158 + try_add_vram0(xe, bo, places, bo_flags, &c); 159 + try_add_system(bo, places, bo_flags, &c); 160 + } else { 161 + try_add_vram0(xe, bo, places, bo_flags, &c); 162 + try_add_vram1(xe, bo, places, bo_flags, &c); 163 + try_add_system(bo, places, bo_flags, &c); 164 + } 165 + 166 + if (!c) 167 + return -EINVAL; 168 + 169 + bo->placement = (struct ttm_placement) { 170 + .num_placement = c, 171 + .placement = places, 172 + .num_busy_placement = c, 173 + .busy_placement = places, 174 + }; 175 + 176 + return 0; 177 + } 178 + 179 + int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, 180 + u32 bo_flags) 181 + { 182 + xe_bo_assert_held(bo); 183 + return __xe_bo_placement_for_flags(xe, bo, bo_flags); 184 + } 185 + 186 + static void xe_evict_flags(struct ttm_buffer_object *tbo, 187 + struct ttm_placement *placement) 188 + { 189 + struct xe_bo *bo; 190 + 191 + if (!xe_bo_is_xe_bo(tbo)) { 192 + /* Don't handle scatter gather BOs */ 193 + if (tbo->type == ttm_bo_type_sg) { 194 + placement->num_placement = 0; 195 + placement->num_busy_placement = 0; 196 + return; 197 + } 198 + 199 + *placement = sys_placement; 200 + return; 201 + } 202 + 203 + /* 204 + * For xe, sg bos that are evicted to system just triggers a 205 + * rebind of the sg list upon subsequent validation to XE_PL_TT. 206 + */ 207 + 208 + bo = ttm_to_xe_bo(tbo); 209 + switch (tbo->resource->mem_type) { 210 + case XE_PL_VRAM0: 211 + case XE_PL_VRAM1: 212 + case XE_PL_TT: 213 + default: 214 + /* for now kick out to system */ 215 + *placement = sys_placement; 216 + break; 217 + } 218 + } 219 + 220 + struct xe_ttm_tt { 221 + struct ttm_tt ttm; 222 + struct device *dev; 223 + struct sg_table sgt; 224 + struct sg_table *sg; 225 + }; 226 + 227 + static int xe_tt_map_sg(struct ttm_tt *tt) 228 + { 229 + struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm); 230 + unsigned long num_pages = tt->num_pages; 231 + int ret; 232 + 233 + XE_BUG_ON(tt->page_flags & TTM_TT_FLAG_EXTERNAL); 234 + 235 + if (xe_tt->sg) 236 + return 0; 237 + 238 + ret = sg_alloc_table_from_pages(&xe_tt->sgt, tt->pages, num_pages, 239 + 0, (u64)num_pages << PAGE_SHIFT, 240 + GFP_KERNEL); 241 + if (ret) 242 + return ret; 243 + 244 + xe_tt->sg = &xe_tt->sgt; 245 + ret = dma_map_sgtable(xe_tt->dev, xe_tt->sg, DMA_BIDIRECTIONAL, 246 + DMA_ATTR_SKIP_CPU_SYNC); 247 + if (ret) { 248 + sg_free_table(xe_tt->sg); 249 + xe_tt->sg = NULL; 250 + return ret; 251 + } 252 + 253 + return 0; 254 + } 255 + 256 + struct sg_table *xe_bo_get_sg(struct xe_bo *bo) 257 + { 258 + struct ttm_tt *tt = bo->ttm.ttm; 259 + struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm); 260 + 261 + return xe_tt->sg; 262 + } 263 + 264 + static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, 265 + u32 page_flags) 266 + { 267 + struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); 268 + struct xe_device *xe = xe_bo_device(bo); 269 + struct xe_ttm_tt *tt; 270 + int err; 271 + 272 + tt = kzalloc(sizeof(*tt), GFP_KERNEL); 273 + if (!tt) 274 + return NULL; 275 + 276 + tt->dev = xe->drm.dev; 277 + 278 + /* TODO: Select caching mode */ 279 + err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, 280 + bo->flags & XE_BO_SCANOUT_BIT ? ttm_write_combined : ttm_cached, 281 + DIV_ROUND_UP(xe_device_ccs_bytes(xe_bo_device(bo), 282 + bo->ttm.base.size), 283 + PAGE_SIZE)); 284 + if (err) { 285 + kfree(tt); 286 + return NULL; 287 + } 288 + 289 + return &tt->ttm; 290 + } 291 + 292 + static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt, 293 + struct ttm_operation_ctx *ctx) 294 + { 295 + int err; 296 + 297 + /* 298 + * dma-bufs are not populated with pages, and the dma- 299 + * addresses are set up when moved to XE_PL_TT. 300 + */ 301 + if (tt->page_flags & TTM_TT_FLAG_EXTERNAL) 302 + return 0; 303 + 304 + err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx); 305 + if (err) 306 + return err; 307 + 308 + /* A follow up may move this xe_bo_move when BO is moved to XE_PL_TT */ 309 + err = xe_tt_map_sg(tt); 310 + if (err) 311 + ttm_pool_free(&ttm_dev->pool, tt); 312 + 313 + return err; 314 + } 315 + 316 + static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt) 317 + { 318 + struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm); 319 + 320 + if (tt->page_flags & TTM_TT_FLAG_EXTERNAL) 321 + return; 322 + 323 + if (xe_tt->sg) { 324 + dma_unmap_sgtable(xe_tt->dev, xe_tt->sg, 325 + DMA_BIDIRECTIONAL, 0); 326 + sg_free_table(xe_tt->sg); 327 + xe_tt->sg = NULL; 328 + } 329 + 330 + return ttm_pool_free(&ttm_dev->pool, tt); 331 + } 332 + 333 + static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt) 334 + { 335 + ttm_tt_fini(tt); 336 + kfree(tt); 337 + } 338 + 339 + static int xe_ttm_io_mem_reserve(struct ttm_device *bdev, 340 + struct ttm_resource *mem) 341 + { 342 + struct xe_device *xe = ttm_to_xe_device(bdev); 343 + struct xe_gt *gt; 344 + 345 + switch (mem->mem_type) { 346 + case XE_PL_SYSTEM: 347 + case XE_PL_TT: 348 + return 0; 349 + case XE_PL_VRAM0: 350 + case XE_PL_VRAM1: 351 + gt = mem_type_to_gt(xe, mem->mem_type); 352 + mem->bus.offset = mem->start << PAGE_SHIFT; 353 + 354 + if (gt->mem.vram.mapping && 355 + mem->placement & TTM_PL_FLAG_CONTIGUOUS) 356 + mem->bus.addr = (u8 *)gt->mem.vram.mapping + 357 + mem->bus.offset; 358 + 359 + mem->bus.offset += gt->mem.vram.io_start; 360 + mem->bus.is_iomem = true; 361 + 362 + #if !defined(CONFIG_X86) 363 + mem->bus.caching = ttm_write_combined; 364 + #endif 365 + break; 366 + default: 367 + return -EINVAL; 368 + } 369 + return 0; 370 + } 371 + 372 + static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo, 373 + const struct ttm_operation_ctx *ctx) 374 + { 375 + struct dma_resv_iter cursor; 376 + struct dma_fence *fence; 377 + struct xe_vma *vma; 378 + int ret = 0; 379 + 380 + dma_resv_assert_held(bo->ttm.base.resv); 381 + 382 + if (!xe_device_in_fault_mode(xe) && !list_empty(&bo->vmas)) { 383 + dma_resv_iter_begin(&cursor, bo->ttm.base.resv, 384 + DMA_RESV_USAGE_BOOKKEEP); 385 + dma_resv_for_each_fence_unlocked(&cursor, fence) 386 + dma_fence_enable_sw_signaling(fence); 387 + dma_resv_iter_end(&cursor); 388 + } 389 + 390 + list_for_each_entry(vma, &bo->vmas, bo_link) { 391 + struct xe_vm *vm = vma->vm; 392 + 393 + trace_xe_vma_evict(vma); 394 + 395 + if (xe_vm_in_fault_mode(vm)) { 396 + /* Wait for pending binds / unbinds. */ 397 + long timeout; 398 + 399 + if (ctx->no_wait_gpu && 400 + !dma_resv_test_signaled(bo->ttm.base.resv, 401 + DMA_RESV_USAGE_BOOKKEEP)) 402 + return -EBUSY; 403 + 404 + timeout = dma_resv_wait_timeout(bo->ttm.base.resv, 405 + DMA_RESV_USAGE_BOOKKEEP, 406 + ctx->interruptible, 407 + MAX_SCHEDULE_TIMEOUT); 408 + if (timeout > 0) { 409 + ret = xe_vm_invalidate_vma(vma); 410 + XE_WARN_ON(ret); 411 + } else if (!timeout) { 412 + ret = -ETIME; 413 + } else { 414 + ret = timeout; 415 + } 416 + 417 + } else { 418 + bool vm_resv_locked = false; 419 + struct xe_vm *vm = vma->vm; 420 + 421 + /* 422 + * We need to put the vma on the vm's rebind_list, 423 + * but need the vm resv to do so. If we can't verify 424 + * that we indeed have it locked, put the vma an the 425 + * vm's notifier.rebind_list instead and scoop later. 426 + */ 427 + if (dma_resv_trylock(&vm->resv)) 428 + vm_resv_locked = true; 429 + else if (ctx->resv != &vm->resv) { 430 + spin_lock(&vm->notifier.list_lock); 431 + list_move_tail(&vma->notifier.rebind_link, 432 + &vm->notifier.rebind_list); 433 + spin_unlock(&vm->notifier.list_lock); 434 + continue; 435 + } 436 + 437 + xe_vm_assert_held(vm); 438 + if (list_empty(&vma->rebind_link) && vma->gt_present) 439 + list_add_tail(&vma->rebind_link, &vm->rebind_list); 440 + 441 + if (vm_resv_locked) 442 + dma_resv_unlock(&vm->resv); 443 + } 444 + } 445 + 446 + return ret; 447 + } 448 + 449 + /* 450 + * The dma-buf map_attachment() / unmap_attachment() is hooked up here. 451 + * Note that unmapping the attachment is deferred to the next 452 + * map_attachment time, or to bo destroy (after idling) whichever comes first. 453 + * This is to avoid syncing before unmap_attachment(), assuming that the 454 + * caller relies on idling the reservation object before moving the 455 + * backing store out. Should that assumption not hold, then we will be able 456 + * to unconditionally call unmap_attachment() when moving out to system. 457 + */ 458 + static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo, 459 + struct ttm_resource *old_res, 460 + struct ttm_resource *new_res) 461 + { 462 + struct dma_buf_attachment *attach = ttm_bo->base.import_attach; 463 + struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt, 464 + ttm); 465 + struct sg_table *sg; 466 + 467 + XE_BUG_ON(!attach); 468 + XE_BUG_ON(!ttm_bo->ttm); 469 + 470 + if (new_res->mem_type == XE_PL_SYSTEM) 471 + goto out; 472 + 473 + if (ttm_bo->sg) { 474 + dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL); 475 + ttm_bo->sg = NULL; 476 + } 477 + 478 + sg = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL); 479 + if (IS_ERR(sg)) 480 + return PTR_ERR(sg); 481 + 482 + ttm_bo->sg = sg; 483 + xe_tt->sg = sg; 484 + 485 + out: 486 + ttm_bo_move_null(ttm_bo, new_res); 487 + 488 + return 0; 489 + } 490 + 491 + /** 492 + * xe_bo_move_notify - Notify subsystems of a pending move 493 + * @bo: The buffer object 494 + * @ctx: The struct ttm_operation_ctx controlling locking and waits. 495 + * 496 + * This function notifies subsystems of an upcoming buffer move. 497 + * Upon receiving such a notification, subsystems should schedule 498 + * halting access to the underlying pages and optionally add a fence 499 + * to the buffer object's dma_resv object, that signals when access is 500 + * stopped. The caller will wait on all dma_resv fences before 501 + * starting the move. 502 + * 503 + * A subsystem may commence access to the object after obtaining 504 + * bindings to the new backing memory under the object lock. 505 + * 506 + * Return: 0 on success, -EINTR or -ERESTARTSYS if interrupted in fault mode, 507 + * negative error code on error. 508 + */ 509 + static int xe_bo_move_notify(struct xe_bo *bo, 510 + const struct ttm_operation_ctx *ctx) 511 + { 512 + struct ttm_buffer_object *ttm_bo = &bo->ttm; 513 + struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev); 514 + int ret; 515 + 516 + /* 517 + * If this starts to call into many components, consider 518 + * using a notification chain here. 519 + */ 520 + 521 + if (xe_bo_is_pinned(bo)) 522 + return -EINVAL; 523 + 524 + xe_bo_vunmap(bo); 525 + ret = xe_bo_trigger_rebind(xe, bo, ctx); 526 + if (ret) 527 + return ret; 528 + 529 + /* Don't call move_notify() for imported dma-bufs. */ 530 + if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach) 531 + dma_buf_move_notify(ttm_bo->base.dma_buf); 532 + 533 + return 0; 534 + } 535 + 536 + static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, 537 + struct ttm_operation_ctx *ctx, 538 + struct ttm_resource *new_mem, 539 + struct ttm_place *hop) 540 + { 541 + struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev); 542 + struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); 543 + struct ttm_resource *old_mem = ttm_bo->resource; 544 + struct ttm_tt *ttm = ttm_bo->ttm; 545 + struct xe_gt *gt = NULL; 546 + struct dma_fence *fence; 547 + bool move_lacks_source; 548 + bool needs_clear; 549 + int ret = 0; 550 + 551 + if (!old_mem) { 552 + if (new_mem->mem_type != TTM_PL_SYSTEM) { 553 + hop->mem_type = TTM_PL_SYSTEM; 554 + hop->flags = TTM_PL_FLAG_TEMPORARY; 555 + ret = -EMULTIHOP; 556 + goto out; 557 + } 558 + 559 + ttm_bo_move_null(ttm_bo, new_mem); 560 + goto out; 561 + } 562 + 563 + if (ttm_bo->type == ttm_bo_type_sg) { 564 + ret = xe_bo_move_notify(bo, ctx); 565 + if (!ret) 566 + ret = xe_bo_move_dmabuf(ttm_bo, old_mem, new_mem); 567 + goto out; 568 + } 569 + 570 + move_lacks_source = !resource_is_vram(old_mem) && 571 + (!ttm || !ttm_tt_is_populated(ttm)); 572 + 573 + needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) || 574 + (!ttm && ttm_bo->type == ttm_bo_type_device); 575 + 576 + if ((move_lacks_source && !needs_clear) || 577 + (old_mem->mem_type == XE_PL_SYSTEM && 578 + new_mem->mem_type == XE_PL_TT)) { 579 + ttm_bo_move_null(ttm_bo, new_mem); 580 + goto out; 581 + } 582 + 583 + if (!move_lacks_source && !xe_bo_is_pinned(bo)) { 584 + ret = xe_bo_move_notify(bo, ctx); 585 + if (ret) 586 + goto out; 587 + } 588 + 589 + if (old_mem->mem_type == XE_PL_TT && 590 + new_mem->mem_type == XE_PL_SYSTEM) { 591 + long timeout = dma_resv_wait_timeout(ttm_bo->base.resv, 592 + DMA_RESV_USAGE_BOOKKEEP, 593 + true, 594 + MAX_SCHEDULE_TIMEOUT); 595 + if (timeout < 0) { 596 + ret = timeout; 597 + goto out; 598 + } 599 + ttm_bo_move_null(ttm_bo, new_mem); 600 + goto out; 601 + } 602 + 603 + if (!move_lacks_source && 604 + ((old_mem->mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) || 605 + (resource_is_vram(old_mem) && 606 + new_mem->mem_type == XE_PL_SYSTEM))) { 607 + hop->fpfn = 0; 608 + hop->lpfn = 0; 609 + hop->mem_type = XE_PL_TT; 610 + hop->flags = TTM_PL_FLAG_TEMPORARY; 611 + ret = -EMULTIHOP; 612 + goto out; 613 + } 614 + 615 + if (bo->gt) 616 + gt = bo->gt; 617 + else if (resource_is_vram(new_mem)) 618 + gt = mem_type_to_gt(xe, new_mem->mem_type); 619 + else if (resource_is_vram(old_mem)) 620 + gt = mem_type_to_gt(xe, old_mem->mem_type); 621 + 622 + XE_BUG_ON(!gt); 623 + XE_BUG_ON(!gt->migrate); 624 + 625 + trace_xe_bo_move(bo); 626 + xe_device_mem_access_get(xe); 627 + 628 + if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) { 629 + /* 630 + * Kernel memory that is pinned should only be moved on suspend 631 + * / resume, some of the pinned memory is required for the 632 + * device to resume / use the GPU to move other evicted memory 633 + * (user memory) around. This likely could be optimized a bit 634 + * futher where we find the minimum set of pinned memory 635 + * required for resume but for simplity doing a memcpy for all 636 + * pinned memory. 637 + */ 638 + ret = xe_bo_vmap(bo); 639 + if (!ret) { 640 + ret = ttm_bo_move_memcpy(ttm_bo, ctx, new_mem); 641 + 642 + /* Create a new VMAP once kernel BO back in VRAM */ 643 + if (!ret && resource_is_vram(new_mem)) { 644 + void *new_addr = gt->mem.vram.mapping + 645 + (new_mem->start << PAGE_SHIFT); 646 + 647 + XE_BUG_ON(new_mem->start != 648 + bo->placements->fpfn); 649 + 650 + iosys_map_set_vaddr_iomem(&bo->vmap, new_addr); 651 + } 652 + } 653 + } else { 654 + if (move_lacks_source) 655 + fence = xe_migrate_clear(gt->migrate, bo, new_mem, 0); 656 + else 657 + fence = xe_migrate_copy(gt->migrate, bo, old_mem, new_mem); 658 + if (IS_ERR(fence)) { 659 + ret = PTR_ERR(fence); 660 + xe_device_mem_access_put(xe); 661 + goto out; 662 + } 663 + ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict, true, 664 + new_mem); 665 + dma_fence_put(fence); 666 + } 667 + 668 + xe_device_mem_access_put(xe); 669 + trace_printk("new_mem->mem_type=%d\n", new_mem->mem_type); 670 + 671 + out: 672 + return ret; 673 + 674 + } 675 + 676 + static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *bo, 677 + unsigned long page_offset) 678 + { 679 + struct xe_device *xe = ttm_to_xe_device(bo->bdev); 680 + struct xe_gt *gt = mem_type_to_gt(xe, bo->resource->mem_type); 681 + struct xe_res_cursor cursor; 682 + 683 + xe_res_first(bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor); 684 + return (gt->mem.vram.io_start + cursor.start) >> PAGE_SHIFT; 685 + } 686 + 687 + static void __xe_bo_vunmap(struct xe_bo *bo); 688 + 689 + /* 690 + * TODO: Move this function to TTM so we don't rely on how TTM does its 691 + * locking, thereby abusing TTM internals. 692 + */ 693 + static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo) 694 + { 695 + bool locked; 696 + 697 + XE_WARN_ON(kref_read(&ttm_bo->kref)); 698 + 699 + /* 700 + * We can typically only race with TTM trylocking under the 701 + * lru_lock, which will immediately be unlocked again since 702 + * the ttm_bo refcount is zero at this point. So trylocking *should* 703 + * always succeed here, as long as we hold the lru lock. 704 + */ 705 + spin_lock(&ttm_bo->bdev->lru_lock); 706 + locked = dma_resv_trylock(ttm_bo->base.resv); 707 + spin_unlock(&ttm_bo->bdev->lru_lock); 708 + XE_WARN_ON(!locked); 709 + 710 + return locked; 711 + } 712 + 713 + static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo) 714 + { 715 + struct dma_resv_iter cursor; 716 + struct dma_fence *fence; 717 + struct dma_fence *replacement = NULL; 718 + struct xe_bo *bo; 719 + 720 + if (!xe_bo_is_xe_bo(ttm_bo)) 721 + return; 722 + 723 + bo = ttm_to_xe_bo(ttm_bo); 724 + XE_WARN_ON(bo->created && kref_read(&ttm_bo->base.refcount)); 725 + 726 + /* 727 + * Corner case where TTM fails to allocate memory and this BOs resv 728 + * still points the VMs resv 729 + */ 730 + if (ttm_bo->base.resv != &ttm_bo->base._resv) 731 + return; 732 + 733 + if (!xe_ttm_bo_lock_in_destructor(ttm_bo)) 734 + return; 735 + 736 + /* 737 + * Scrub the preempt fences if any. The unbind fence is already 738 + * attached to the resv. 739 + * TODO: Don't do this for external bos once we scrub them after 740 + * unbind. 741 + */ 742 + dma_resv_for_each_fence(&cursor, ttm_bo->base.resv, 743 + DMA_RESV_USAGE_BOOKKEEP, fence) { 744 + if (xe_fence_is_xe_preempt(fence) && 745 + !dma_fence_is_signaled(fence)) { 746 + if (!replacement) 747 + replacement = dma_fence_get_stub(); 748 + 749 + dma_resv_replace_fences(ttm_bo->base.resv, 750 + fence->context, 751 + replacement, 752 + DMA_RESV_USAGE_BOOKKEEP); 753 + } 754 + } 755 + dma_fence_put(replacement); 756 + 757 + dma_resv_unlock(ttm_bo->base.resv); 758 + } 759 + 760 + static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo) 761 + { 762 + if (!xe_bo_is_xe_bo(ttm_bo)) 763 + return; 764 + 765 + /* 766 + * Object is idle and about to be destroyed. Release the 767 + * dma-buf attachment. 768 + */ 769 + if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) { 770 + struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, 771 + struct xe_ttm_tt, ttm); 772 + 773 + dma_buf_unmap_attachment(ttm_bo->base.import_attach, ttm_bo->sg, 774 + DMA_BIDIRECTIONAL); 775 + ttm_bo->sg = NULL; 776 + xe_tt->sg = NULL; 777 + } 778 + } 779 + 780 + struct ttm_device_funcs xe_ttm_funcs = { 781 + .ttm_tt_create = xe_ttm_tt_create, 782 + .ttm_tt_populate = xe_ttm_tt_populate, 783 + .ttm_tt_unpopulate = xe_ttm_tt_unpopulate, 784 + .ttm_tt_destroy = xe_ttm_tt_destroy, 785 + .evict_flags = xe_evict_flags, 786 + .move = xe_bo_move, 787 + .io_mem_reserve = xe_ttm_io_mem_reserve, 788 + .io_mem_pfn = xe_ttm_io_mem_pfn, 789 + .release_notify = xe_ttm_bo_release_notify, 790 + .eviction_valuable = ttm_bo_eviction_valuable, 791 + .delete_mem_notify = xe_ttm_bo_delete_mem_notify, 792 + }; 793 + 794 + static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo) 795 + { 796 + struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); 797 + 798 + if (bo->ttm.base.import_attach) 799 + drm_prime_gem_destroy(&bo->ttm.base, NULL); 800 + drm_gem_object_release(&bo->ttm.base); 801 + 802 + WARN_ON(!list_empty(&bo->vmas)); 803 + 804 + if (bo->ggtt_node.size) 805 + xe_ggtt_remove_bo(bo->gt->mem.ggtt, bo); 806 + 807 + if (bo->vm && xe_bo_is_user(bo)) 808 + xe_vm_put(bo->vm); 809 + 810 + kfree(bo); 811 + } 812 + 813 + static void xe_gem_object_free(struct drm_gem_object *obj) 814 + { 815 + /* Our BO reference counting scheme works as follows: 816 + * 817 + * The gem object kref is typically used throughout the driver, 818 + * and the gem object holds a ttm_buffer_object refcount, so 819 + * that when the last gem object reference is put, which is when 820 + * we end up in this function, we put also that ttm_buffer_object 821 + * refcount. Anything using gem interfaces is then no longer 822 + * allowed to access the object in a way that requires a gem 823 + * refcount, including locking the object. 824 + * 825 + * driver ttm callbacks is allowed to use the ttm_buffer_object 826 + * refcount directly if needed. 827 + */ 828 + __xe_bo_vunmap(gem_to_xe_bo(obj)); 829 + ttm_bo_put(container_of(obj, struct ttm_buffer_object, base)); 830 + } 831 + 832 + static bool should_migrate_to_system(struct xe_bo *bo) 833 + { 834 + struct xe_device *xe = xe_bo_device(bo); 835 + 836 + return xe_device_in_fault_mode(xe) && bo->props.cpu_atomic; 837 + } 838 + 839 + static vm_fault_t xe_gem_fault(struct vm_fault *vmf) 840 + { 841 + struct ttm_buffer_object *tbo = vmf->vma->vm_private_data; 842 + struct drm_device *ddev = tbo->base.dev; 843 + vm_fault_t ret; 844 + int idx, r = 0; 845 + 846 + ret = ttm_bo_vm_reserve(tbo, vmf); 847 + if (ret) 848 + return ret; 849 + 850 + if (drm_dev_enter(ddev, &idx)) { 851 + struct xe_bo *bo = ttm_to_xe_bo(tbo); 852 + 853 + trace_xe_bo_cpu_fault(bo); 854 + 855 + if (should_migrate_to_system(bo)) { 856 + r = xe_bo_migrate(bo, XE_PL_TT); 857 + if (r == -EBUSY || r == -ERESTARTSYS || r == -EINTR) 858 + ret = VM_FAULT_NOPAGE; 859 + else if (r) 860 + ret = VM_FAULT_SIGBUS; 861 + } 862 + if (!ret) 863 + ret = ttm_bo_vm_fault_reserved(vmf, 864 + vmf->vma->vm_page_prot, 865 + TTM_BO_VM_NUM_PREFAULT); 866 + 867 + drm_dev_exit(idx); 868 + } else { 869 + ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot); 870 + } 871 + if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) 872 + return ret; 873 + 874 + dma_resv_unlock(tbo->base.resv); 875 + return ret; 876 + } 877 + 878 + static const struct vm_operations_struct xe_gem_vm_ops = { 879 + .fault = xe_gem_fault, 880 + .open = ttm_bo_vm_open, 881 + .close = ttm_bo_vm_close, 882 + .access = ttm_bo_vm_access 883 + }; 884 + 885 + static const struct drm_gem_object_funcs xe_gem_object_funcs = { 886 + .free = xe_gem_object_free, 887 + .mmap = drm_gem_ttm_mmap, 888 + .export = xe_gem_prime_export, 889 + .vm_ops = &xe_gem_vm_ops, 890 + }; 891 + 892 + /** 893 + * xe_bo_alloc - Allocate storage for a struct xe_bo 894 + * 895 + * This funcition is intended to allocate storage to be used for input 896 + * to __xe_bo_create_locked(), in the case a pointer to the bo to be 897 + * created is needed before the call to __xe_bo_create_locked(). 898 + * If __xe_bo_create_locked ends up never to be called, then the 899 + * storage allocated with this function needs to be freed using 900 + * xe_bo_free(). 901 + * 902 + * Return: A pointer to an uninitialized struct xe_bo on success, 903 + * ERR_PTR(-ENOMEM) on error. 904 + */ 905 + struct xe_bo *xe_bo_alloc(void) 906 + { 907 + struct xe_bo *bo = kzalloc(sizeof(*bo), GFP_KERNEL); 908 + 909 + if (!bo) 910 + return ERR_PTR(-ENOMEM); 911 + 912 + return bo; 913 + } 914 + 915 + /** 916 + * xe_bo_free - Free storage allocated using xe_bo_alloc() 917 + * @bo: The buffer object storage. 918 + * 919 + * Refer to xe_bo_alloc() documentation for valid use-cases. 920 + */ 921 + void xe_bo_free(struct xe_bo *bo) 922 + { 923 + kfree(bo); 924 + } 925 + 926 + struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, 927 + struct xe_gt *gt, struct dma_resv *resv, 928 + size_t size, enum ttm_bo_type type, 929 + u32 flags) 930 + { 931 + struct ttm_operation_ctx ctx = { 932 + .interruptible = true, 933 + .no_wait_gpu = false, 934 + }; 935 + struct ttm_placement *placement; 936 + uint32_t alignment; 937 + int err; 938 + 939 + /* Only kernel objects should set GT */ 940 + XE_BUG_ON(gt && type != ttm_bo_type_kernel); 941 + 942 + if (!bo) { 943 + bo = xe_bo_alloc(); 944 + if (IS_ERR(bo)) 945 + return bo; 946 + } 947 + 948 + if (flags & (XE_BO_CREATE_VRAM0_BIT | XE_BO_CREATE_VRAM1_BIT) && 949 + !(flags & XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT) && 950 + xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) { 951 + size = ALIGN(size, SZ_64K); 952 + flags |= XE_BO_INTERNAL_64K; 953 + alignment = SZ_64K >> PAGE_SHIFT; 954 + } else { 955 + alignment = SZ_4K >> PAGE_SHIFT; 956 + } 957 + 958 + bo->gt = gt; 959 + bo->size = size; 960 + bo->flags = flags; 961 + bo->ttm.base.funcs = &xe_gem_object_funcs; 962 + bo->props.preferred_mem_class = XE_BO_PROPS_INVALID; 963 + bo->props.preferred_gt = XE_BO_PROPS_INVALID; 964 + bo->props.preferred_mem_type = XE_BO_PROPS_INVALID; 965 + bo->ttm.priority = DRM_XE_VMA_PRIORITY_NORMAL; 966 + INIT_LIST_HEAD(&bo->vmas); 967 + INIT_LIST_HEAD(&bo->pinned_link); 968 + 969 + drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size); 970 + 971 + if (resv) { 972 + ctx.allow_res_evict = true; 973 + ctx.resv = resv; 974 + } 975 + 976 + err = __xe_bo_placement_for_flags(xe, bo, bo->flags); 977 + if (WARN_ON(err)) 978 + return ERR_PTR(err); 979 + 980 + /* Defer populating type_sg bos */ 981 + placement = (type == ttm_bo_type_sg || 982 + bo->flags & XE_BO_DEFER_BACKING) ? &sys_placement : 983 + &bo->placement; 984 + err = ttm_bo_init_reserved(&xe->ttm, &bo->ttm, type, 985 + placement, alignment, 986 + &ctx, NULL, resv, xe_ttm_bo_destroy); 987 + if (err) 988 + return ERR_PTR(err); 989 + 990 + bo->created = true; 991 + ttm_bo_move_to_lru_tail_unlocked(&bo->ttm); 992 + 993 + return bo; 994 + } 995 + 996 + struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_gt *gt, 997 + struct xe_vm *vm, size_t size, 998 + enum ttm_bo_type type, u32 flags) 999 + { 1000 + struct xe_bo *bo; 1001 + int err; 1002 + 1003 + if (vm) 1004 + xe_vm_assert_held(vm); 1005 + bo = __xe_bo_create_locked(xe, NULL, gt, vm ? &vm->resv : NULL, size, 1006 + type, flags); 1007 + if (IS_ERR(bo)) 1008 + return bo; 1009 + 1010 + if (vm && xe_bo_is_user(bo)) 1011 + xe_vm_get(vm); 1012 + bo->vm = vm; 1013 + 1014 + if (flags & XE_BO_CREATE_GGTT_BIT) { 1015 + XE_BUG_ON(!gt); 1016 + 1017 + err = xe_ggtt_insert_bo(gt->mem.ggtt, bo); 1018 + if (err) 1019 + goto err_unlock_put_bo; 1020 + } 1021 + 1022 + return bo; 1023 + 1024 + err_unlock_put_bo: 1025 + xe_bo_unlock_vm_held(bo); 1026 + xe_bo_put(bo); 1027 + return ERR_PTR(err); 1028 + } 1029 + 1030 + struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_gt *gt, 1031 + struct xe_vm *vm, size_t size, 1032 + enum ttm_bo_type type, u32 flags) 1033 + { 1034 + struct xe_bo *bo = xe_bo_create_locked(xe, gt, vm, size, type, flags); 1035 + 1036 + if (!IS_ERR(bo)) 1037 + xe_bo_unlock_vm_held(bo); 1038 + 1039 + return bo; 1040 + } 1041 + 1042 + struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_gt *gt, 1043 + struct xe_vm *vm, size_t size, 1044 + enum ttm_bo_type type, u32 flags) 1045 + { 1046 + struct xe_bo *bo = xe_bo_create_locked(xe, gt, vm, size, type, flags); 1047 + int err; 1048 + 1049 + if (IS_ERR(bo)) 1050 + return bo; 1051 + 1052 + err = xe_bo_pin(bo); 1053 + if (err) 1054 + goto err_put; 1055 + 1056 + err = xe_bo_vmap(bo); 1057 + if (err) 1058 + goto err_unpin; 1059 + 1060 + xe_bo_unlock_vm_held(bo); 1061 + 1062 + return bo; 1063 + 1064 + err_unpin: 1065 + xe_bo_unpin(bo); 1066 + err_put: 1067 + xe_bo_unlock_vm_held(bo); 1068 + xe_bo_put(bo); 1069 + return ERR_PTR(err); 1070 + } 1071 + 1072 + struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_gt *gt, 1073 + const void *data, size_t size, 1074 + enum ttm_bo_type type, u32 flags) 1075 + { 1076 + struct xe_bo *bo = xe_bo_create_pin_map(xe, gt, NULL, 1077 + ALIGN(size, PAGE_SIZE), 1078 + type, flags); 1079 + if (IS_ERR(bo)) 1080 + return bo; 1081 + 1082 + xe_map_memcpy_to(xe, &bo->vmap, 0, data, size); 1083 + 1084 + return bo; 1085 + } 1086 + 1087 + /* 1088 + * XXX: This is in the VM bind data path, likely should calculate this once and 1089 + * store, with a recalculation if the BO is moved. 1090 + */ 1091 + static uint64_t vram_region_io_offset(struct xe_bo *bo) 1092 + { 1093 + struct xe_device *xe = xe_bo_device(bo); 1094 + struct xe_gt *gt = mem_type_to_gt(xe, bo->ttm.resource->mem_type); 1095 + 1096 + return gt->mem.vram.io_start - xe->mem.vram.io_start; 1097 + } 1098 + 1099 + /** 1100 + * xe_bo_pin_external - pin an external BO 1101 + * @bo: buffer object to be pinned 1102 + * 1103 + * Pin an external (not tied to a VM, can be exported via dma-buf / prime FD) 1104 + * BO. Unique call compared to xe_bo_pin as this function has it own set of 1105 + * asserts and code to ensure evict / restore on suspend / resume. 1106 + * 1107 + * Returns 0 for success, negative error code otherwise. 1108 + */ 1109 + int xe_bo_pin_external(struct xe_bo *bo) 1110 + { 1111 + struct xe_device *xe = xe_bo_device(bo); 1112 + int err; 1113 + 1114 + XE_BUG_ON(bo->vm); 1115 + XE_BUG_ON(!xe_bo_is_user(bo)); 1116 + 1117 + if (!xe_bo_is_pinned(bo)) { 1118 + err = xe_bo_validate(bo, NULL, false); 1119 + if (err) 1120 + return err; 1121 + 1122 + if (xe_bo_is_vram(bo)) { 1123 + spin_lock(&xe->pinned.lock); 1124 + list_add_tail(&bo->pinned_link, 1125 + &xe->pinned.external_vram); 1126 + spin_unlock(&xe->pinned.lock); 1127 + } 1128 + } 1129 + 1130 + ttm_bo_pin(&bo->ttm); 1131 + 1132 + /* 1133 + * FIXME: If we always use the reserve / unreserve functions for locking 1134 + * we do not need this. 1135 + */ 1136 + ttm_bo_move_to_lru_tail_unlocked(&bo->ttm); 1137 + 1138 + return 0; 1139 + } 1140 + 1141 + int xe_bo_pin(struct xe_bo *bo) 1142 + { 1143 + struct xe_device *xe = xe_bo_device(bo); 1144 + int err; 1145 + 1146 + /* We currently don't expect user BO to be pinned */ 1147 + XE_BUG_ON(xe_bo_is_user(bo)); 1148 + 1149 + /* Pinned object must be in GGTT or have pinned flag */ 1150 + XE_BUG_ON(!(bo->flags & (XE_BO_CREATE_PINNED_BIT | 1151 + XE_BO_CREATE_GGTT_BIT))); 1152 + 1153 + /* 1154 + * No reason we can't support pinning imported dma-bufs we just don't 1155 + * expect to pin an imported dma-buf. 1156 + */ 1157 + XE_BUG_ON(bo->ttm.base.import_attach); 1158 + 1159 + /* We only expect at most 1 pin */ 1160 + XE_BUG_ON(xe_bo_is_pinned(bo)); 1161 + 1162 + err = xe_bo_validate(bo, NULL, false); 1163 + if (err) 1164 + return err; 1165 + 1166 + /* 1167 + * For pinned objects in on DGFX, we expect these objects to be in 1168 + * contiguous VRAM memory. Required eviction / restore during suspend / 1169 + * resume (force restore to same physical address). 1170 + */ 1171 + if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) && 1172 + bo->flags & XE_BO_INTERNAL_TEST)) { 1173 + struct ttm_place *place = &(bo->placements[0]); 1174 + bool lmem; 1175 + 1176 + XE_BUG_ON(!(place->flags & TTM_PL_FLAG_CONTIGUOUS)); 1177 + XE_BUG_ON(!mem_type_is_vram(place->mem_type)); 1178 + 1179 + place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE, &lmem) - 1180 + vram_region_io_offset(bo)) >> PAGE_SHIFT; 1181 + place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT); 1182 + 1183 + spin_lock(&xe->pinned.lock); 1184 + list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present); 1185 + spin_unlock(&xe->pinned.lock); 1186 + } 1187 + 1188 + ttm_bo_pin(&bo->ttm); 1189 + 1190 + /* 1191 + * FIXME: If we always use the reserve / unreserve functions for locking 1192 + * we do not need this. 1193 + */ 1194 + ttm_bo_move_to_lru_tail_unlocked(&bo->ttm); 1195 + 1196 + return 0; 1197 + } 1198 + 1199 + /** 1200 + * xe_bo_unpin_external - unpin an external BO 1201 + * @bo: buffer object to be unpinned 1202 + * 1203 + * Unpin an external (not tied to a VM, can be exported via dma-buf / prime FD) 1204 + * BO. Unique call compared to xe_bo_unpin as this function has it own set of 1205 + * asserts and code to ensure evict / restore on suspend / resume. 1206 + * 1207 + * Returns 0 for success, negative error code otherwise. 1208 + */ 1209 + void xe_bo_unpin_external(struct xe_bo *bo) 1210 + { 1211 + struct xe_device *xe = xe_bo_device(bo); 1212 + 1213 + XE_BUG_ON(bo->vm); 1214 + XE_BUG_ON(!xe_bo_is_pinned(bo)); 1215 + XE_BUG_ON(!xe_bo_is_user(bo)); 1216 + 1217 + if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link)) { 1218 + spin_lock(&xe->pinned.lock); 1219 + list_del_init(&bo->pinned_link); 1220 + spin_unlock(&xe->pinned.lock); 1221 + } 1222 + 1223 + ttm_bo_unpin(&bo->ttm); 1224 + 1225 + /* 1226 + * FIXME: If we always use the reserve / unreserve functions for locking 1227 + * we do not need this. 1228 + */ 1229 + ttm_bo_move_to_lru_tail_unlocked(&bo->ttm); 1230 + } 1231 + 1232 + void xe_bo_unpin(struct xe_bo *bo) 1233 + { 1234 + struct xe_device *xe = xe_bo_device(bo); 1235 + 1236 + XE_BUG_ON(bo->ttm.base.import_attach); 1237 + XE_BUG_ON(!xe_bo_is_pinned(bo)); 1238 + 1239 + if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) && 1240 + bo->flags & XE_BO_INTERNAL_TEST)) { 1241 + XE_BUG_ON(list_empty(&bo->pinned_link)); 1242 + 1243 + spin_lock(&xe->pinned.lock); 1244 + list_del_init(&bo->pinned_link); 1245 + spin_unlock(&xe->pinned.lock); 1246 + } 1247 + 1248 + ttm_bo_unpin(&bo->ttm); 1249 + } 1250 + 1251 + /** 1252 + * xe_bo_validate() - Make sure the bo is in an allowed placement 1253 + * @bo: The bo, 1254 + * @vm: Pointer to a the vm the bo shares a locked dma_resv object with, or 1255 + * NULL. Used together with @allow_res_evict. 1256 + * @allow_res_evict: Whether it's allowed to evict bos sharing @vm's 1257 + * reservation object. 1258 + * 1259 + * Make sure the bo is in allowed placement, migrating it if necessary. If 1260 + * needed, other bos will be evicted. If bos selected for eviction shares 1261 + * the @vm's reservation object, they can be evicted iff @allow_res_evict is 1262 + * set to true, otherwise they will be bypassed. 1263 + * 1264 + * Return: 0 on success, negative error code on failure. May return 1265 + * -EINTR or -ERESTARTSYS if internal waits are interrupted by a signal. 1266 + */ 1267 + int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict) 1268 + { 1269 + struct ttm_operation_ctx ctx = { 1270 + .interruptible = true, 1271 + .no_wait_gpu = false, 1272 + }; 1273 + 1274 + if (vm) { 1275 + lockdep_assert_held(&vm->lock); 1276 + xe_vm_assert_held(vm); 1277 + 1278 + ctx.allow_res_evict = allow_res_evict; 1279 + ctx.resv = &vm->resv; 1280 + } 1281 + 1282 + return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx); 1283 + } 1284 + 1285 + bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo) 1286 + { 1287 + if (bo->destroy == &xe_ttm_bo_destroy) 1288 + return true; 1289 + 1290 + return false; 1291 + } 1292 + 1293 + dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, 1294 + size_t page_size, bool *is_lmem) 1295 + { 1296 + struct xe_res_cursor cur; 1297 + u64 page; 1298 + 1299 + if (!READ_ONCE(bo->ttm.pin_count)) 1300 + xe_bo_assert_held(bo); 1301 + 1302 + XE_BUG_ON(page_size > PAGE_SIZE); 1303 + page = offset >> PAGE_SHIFT; 1304 + offset &= (PAGE_SIZE - 1); 1305 + 1306 + *is_lmem = xe_bo_is_vram(bo); 1307 + 1308 + if (!*is_lmem) { 1309 + XE_BUG_ON(!bo->ttm.ttm); 1310 + 1311 + xe_res_first_sg(xe_bo_get_sg(bo), page << PAGE_SHIFT, 1312 + page_size, &cur); 1313 + return xe_res_dma(&cur) + offset; 1314 + } else { 1315 + struct xe_res_cursor cur; 1316 + 1317 + xe_res_first(bo->ttm.resource, page << PAGE_SHIFT, 1318 + page_size, &cur); 1319 + return cur.start + offset + vram_region_io_offset(bo); 1320 + } 1321 + } 1322 + 1323 + int xe_bo_vmap(struct xe_bo *bo) 1324 + { 1325 + void *virtual; 1326 + bool is_iomem; 1327 + int ret; 1328 + 1329 + xe_bo_assert_held(bo); 1330 + 1331 + if (!iosys_map_is_null(&bo->vmap)) 1332 + return 0; 1333 + 1334 + /* 1335 + * We use this more or less deprecated interface for now since 1336 + * ttm_bo_vmap() doesn't offer the optimization of kmapping 1337 + * single page bos, which is done here. 1338 + * TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap 1339 + * to use struct iosys_map. 1340 + */ 1341 + ret = ttm_bo_kmap(&bo->ttm, 0, bo->size >> PAGE_SHIFT, &bo->kmap); 1342 + if (ret) 1343 + return ret; 1344 + 1345 + virtual = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem); 1346 + if (is_iomem) 1347 + iosys_map_set_vaddr_iomem(&bo->vmap, (void __iomem *)virtual); 1348 + else 1349 + iosys_map_set_vaddr(&bo->vmap, virtual); 1350 + 1351 + return 0; 1352 + } 1353 + 1354 + static void __xe_bo_vunmap(struct xe_bo *bo) 1355 + { 1356 + if (!iosys_map_is_null(&bo->vmap)) { 1357 + iosys_map_clear(&bo->vmap); 1358 + ttm_bo_kunmap(&bo->kmap); 1359 + } 1360 + } 1361 + 1362 + void xe_bo_vunmap(struct xe_bo *bo) 1363 + { 1364 + xe_bo_assert_held(bo); 1365 + __xe_bo_vunmap(bo); 1366 + } 1367 + 1368 + int xe_gem_create_ioctl(struct drm_device *dev, void *data, 1369 + struct drm_file *file) 1370 + { 1371 + struct xe_device *xe = to_xe_device(dev); 1372 + struct xe_file *xef = to_xe_file(file); 1373 + struct drm_xe_gem_create *args = data; 1374 + struct ww_acquire_ctx ww; 1375 + struct xe_vm *vm = NULL; 1376 + struct xe_bo *bo; 1377 + unsigned bo_flags = XE_BO_CREATE_USER_BIT; 1378 + u32 handle; 1379 + int err; 1380 + 1381 + if (XE_IOCTL_ERR(xe, args->extensions)) 1382 + return -EINVAL; 1383 + 1384 + if (XE_IOCTL_ERR(xe, args->flags & 1385 + ~(XE_GEM_CREATE_FLAG_DEFER_BACKING | 1386 + XE_GEM_CREATE_FLAG_SCANOUT | 1387 + xe->info.mem_region_mask))) 1388 + return -EINVAL; 1389 + 1390 + /* at least one memory type must be specified */ 1391 + if (XE_IOCTL_ERR(xe, !(args->flags & xe->info.mem_region_mask))) 1392 + return -EINVAL; 1393 + 1394 + if (XE_IOCTL_ERR(xe, args->handle)) 1395 + return -EINVAL; 1396 + 1397 + if (XE_IOCTL_ERR(xe, args->size > SIZE_MAX)) 1398 + return -EINVAL; 1399 + 1400 + if (XE_IOCTL_ERR(xe, args->size & ~PAGE_MASK)) 1401 + return -EINVAL; 1402 + 1403 + if (args->vm_id) { 1404 + vm = xe_vm_lookup(xef, args->vm_id); 1405 + if (XE_IOCTL_ERR(xe, !vm)) 1406 + return -ENOENT; 1407 + err = xe_vm_lock(vm, &ww, 0, true); 1408 + if (err) { 1409 + xe_vm_put(vm); 1410 + return err; 1411 + } 1412 + } 1413 + 1414 + if (args->flags & XE_GEM_CREATE_FLAG_DEFER_BACKING) 1415 + bo_flags |= XE_BO_DEFER_BACKING; 1416 + 1417 + if (args->flags & XE_GEM_CREATE_FLAG_SCANOUT) 1418 + bo_flags |= XE_BO_SCANOUT_BIT; 1419 + 1420 + bo_flags |= args->flags << (ffs(XE_BO_CREATE_SYSTEM_BIT) - 1); 1421 + bo = xe_bo_create(xe, NULL, vm, args->size, ttm_bo_type_device, 1422 + bo_flags); 1423 + if (vm) { 1424 + xe_vm_unlock(vm, &ww); 1425 + xe_vm_put(vm); 1426 + } 1427 + 1428 + if (IS_ERR(bo)) 1429 + return PTR_ERR(bo); 1430 + 1431 + err = drm_gem_handle_create(file, &bo->ttm.base, &handle); 1432 + xe_bo_put(bo); 1433 + if (err) 1434 + return err; 1435 + 1436 + args->handle = handle; 1437 + 1438 + return 0; 1439 + } 1440 + 1441 + int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data, 1442 + struct drm_file *file) 1443 + { 1444 + struct xe_device *xe = to_xe_device(dev); 1445 + struct drm_xe_gem_mmap_offset *args = data; 1446 + struct drm_gem_object *gem_obj; 1447 + 1448 + if (XE_IOCTL_ERR(xe, args->extensions)) 1449 + return -EINVAL; 1450 + 1451 + if (XE_IOCTL_ERR(xe, args->flags)) 1452 + return -EINVAL; 1453 + 1454 + gem_obj = drm_gem_object_lookup(file, args->handle); 1455 + if (XE_IOCTL_ERR(xe, !gem_obj)) 1456 + return -ENOENT; 1457 + 1458 + /* The mmap offset was set up at BO allocation time. */ 1459 + args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node); 1460 + 1461 + xe_bo_put(gem_to_xe_bo(gem_obj)); 1462 + return 0; 1463 + } 1464 + 1465 + int xe_bo_lock(struct xe_bo *bo, struct ww_acquire_ctx *ww, 1466 + int num_resv, bool intr) 1467 + { 1468 + struct ttm_validate_buffer tv_bo; 1469 + LIST_HEAD(objs); 1470 + LIST_HEAD(dups); 1471 + 1472 + XE_BUG_ON(!ww); 1473 + 1474 + tv_bo.num_shared = num_resv; 1475 + tv_bo.bo = &bo->ttm;; 1476 + list_add_tail(&tv_bo.head, &objs); 1477 + 1478 + return ttm_eu_reserve_buffers(ww, &objs, intr, &dups); 1479 + } 1480 + 1481 + void xe_bo_unlock(struct xe_bo *bo, struct ww_acquire_ctx *ww) 1482 + { 1483 + dma_resv_unlock(bo->ttm.base.resv); 1484 + ww_acquire_fini(ww); 1485 + } 1486 + 1487 + /** 1488 + * xe_bo_can_migrate - Whether a buffer object likely can be migrated 1489 + * @bo: The buffer object to migrate 1490 + * @mem_type: The TTM memory type intended to migrate to 1491 + * 1492 + * Check whether the buffer object supports migration to the 1493 + * given memory type. Note that pinning may affect the ability to migrate as 1494 + * returned by this function. 1495 + * 1496 + * This function is primarily intended as a helper for checking the 1497 + * possibility to migrate buffer objects and can be called without 1498 + * the object lock held. 1499 + * 1500 + * Return: true if migration is possible, false otherwise. 1501 + */ 1502 + bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type) 1503 + { 1504 + unsigned int cur_place; 1505 + 1506 + if (bo->ttm.type == ttm_bo_type_kernel) 1507 + return true; 1508 + 1509 + if (bo->ttm.type == ttm_bo_type_sg) 1510 + return false; 1511 + 1512 + for (cur_place = 0; cur_place < bo->placement.num_placement; 1513 + cur_place++) { 1514 + if (bo->placements[cur_place].mem_type == mem_type) 1515 + return true; 1516 + } 1517 + 1518 + return false; 1519 + } 1520 + 1521 + static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place) 1522 + { 1523 + memset(place, 0, sizeof(*place)); 1524 + place->mem_type = mem_type; 1525 + } 1526 + 1527 + /** 1528 + * xe_bo_migrate - Migrate an object to the desired region id 1529 + * @bo: The buffer object to migrate. 1530 + * @mem_type: The TTM region type to migrate to. 1531 + * 1532 + * Attempt to migrate the buffer object to the desired memory region. The 1533 + * buffer object may not be pinned, and must be locked. 1534 + * On successful completion, the object memory type will be updated, 1535 + * but an async migration task may not have completed yet, and to 1536 + * accomplish that, the object's kernel fences must be signaled with 1537 + * the object lock held. 1538 + * 1539 + * Return: 0 on success. Negative error code on failure. In particular may 1540 + * return -EINTR or -ERESTARTSYS if signal pending. 1541 + */ 1542 + int xe_bo_migrate(struct xe_bo *bo, u32 mem_type) 1543 + { 1544 + struct ttm_operation_ctx ctx = { 1545 + .interruptible = true, 1546 + .no_wait_gpu = false, 1547 + }; 1548 + struct ttm_placement placement; 1549 + struct ttm_place requested; 1550 + 1551 + xe_bo_assert_held(bo); 1552 + 1553 + if (bo->ttm.resource->mem_type == mem_type) 1554 + return 0; 1555 + 1556 + if (xe_bo_is_pinned(bo)) 1557 + return -EBUSY; 1558 + 1559 + if (!xe_bo_can_migrate(bo, mem_type)) 1560 + return -EINVAL; 1561 + 1562 + xe_place_from_ttm_type(mem_type, &requested); 1563 + placement.num_placement = 1; 1564 + placement.num_busy_placement = 1; 1565 + placement.placement = &requested; 1566 + placement.busy_placement = &requested; 1567 + 1568 + return ttm_bo_validate(&bo->ttm, &placement, &ctx); 1569 + } 1570 + 1571 + /** 1572 + * xe_bo_evict - Evict an object to evict placement 1573 + * @bo: The buffer object to migrate. 1574 + * @force_alloc: Set force_alloc in ttm_operation_ctx 1575 + * 1576 + * On successful completion, the object memory will be moved to evict 1577 + * placement. Ths function blocks until the object has been fully moved. 1578 + * 1579 + * Return: 0 on success. Negative error code on failure. 1580 + */ 1581 + int xe_bo_evict(struct xe_bo *bo, bool force_alloc) 1582 + { 1583 + struct ttm_operation_ctx ctx = { 1584 + .interruptible = false, 1585 + .no_wait_gpu = false, 1586 + .force_alloc = force_alloc, 1587 + }; 1588 + struct ttm_placement placement; 1589 + int ret; 1590 + 1591 + xe_evict_flags(&bo->ttm, &placement); 1592 + ret = ttm_bo_validate(&bo->ttm, &placement, &ctx); 1593 + if (ret) 1594 + return ret; 1595 + 1596 + dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL, 1597 + false, MAX_SCHEDULE_TIMEOUT); 1598 + 1599 + return 0; 1600 + } 1601 + 1602 + /** 1603 + * xe_bo_needs_ccs_pages - Whether a bo needs to back up CCS pages when 1604 + * placed in system memory. 1605 + * @bo: The xe_bo 1606 + * 1607 + * If a bo has an allowable placement in XE_PL_TT memory, it can't use 1608 + * flat CCS compression, because the GPU then has no way to access the 1609 + * CCS metadata using relevant commands. For the opposite case, we need to 1610 + * allocate storage for the CCS metadata when the BO is not resident in 1611 + * VRAM memory. 1612 + * 1613 + * Return: true if extra pages need to be allocated, false otherwise. 1614 + */ 1615 + bool xe_bo_needs_ccs_pages(struct xe_bo *bo) 1616 + { 1617 + return bo->ttm.type == ttm_bo_type_device && 1618 + !(bo->flags & XE_BO_CREATE_SYSTEM_BIT) && 1619 + (bo->flags & (XE_BO_CREATE_VRAM0_BIT | XE_BO_CREATE_VRAM1_BIT)); 1620 + } 1621 + 1622 + /** 1623 + * __xe_bo_release_dummy() - Dummy kref release function 1624 + * @kref: The embedded struct kref. 1625 + * 1626 + * Dummy release function for xe_bo_put_deferred(). Keep off. 1627 + */ 1628 + void __xe_bo_release_dummy(struct kref *kref) 1629 + { 1630 + } 1631 + 1632 + /** 1633 + * xe_bo_put_commit() - Put bos whose put was deferred by xe_bo_put_deferred(). 1634 + * @deferred: The lockless list used for the call to xe_bo_put_deferred(). 1635 + * 1636 + * Puts all bos whose put was deferred by xe_bo_put_deferred(). 1637 + * The @deferred list can be either an onstack local list or a global 1638 + * shared list used by a workqueue. 1639 + */ 1640 + void xe_bo_put_commit(struct llist_head *deferred) 1641 + { 1642 + struct llist_node *freed; 1643 + struct xe_bo *bo, *next; 1644 + 1645 + if (!deferred) 1646 + return; 1647 + 1648 + freed = llist_del_all(deferred); 1649 + if (!freed) 1650 + return; 1651 + 1652 + llist_for_each_entry_safe(bo, next, freed, freed) 1653 + drm_gem_object_free(&bo->ttm.base.refcount); 1654 + } 1655 + 1656 + /** 1657 + * xe_bo_dumb_create - Create a dumb bo as backing for a fb 1658 + * @file_priv: ... 1659 + * @dev: ... 1660 + * @args: ... 1661 + * 1662 + * See dumb_create() hook in include/drm/drm_drv.h 1663 + * 1664 + * Return: ... 1665 + */ 1666 + int xe_bo_dumb_create(struct drm_file *file_priv, 1667 + struct drm_device *dev, 1668 + struct drm_mode_create_dumb *args) 1669 + { 1670 + struct xe_device *xe = to_xe_device(dev); 1671 + struct xe_bo *bo; 1672 + uint32_t handle; 1673 + int cpp = DIV_ROUND_UP(args->bpp, 8); 1674 + int err; 1675 + u32 page_size = max_t(u32, PAGE_SIZE, 1676 + xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K); 1677 + 1678 + args->pitch = ALIGN(args->width * cpp, 64); 1679 + args->size = ALIGN(mul_u32_u32(args->pitch, args->height), 1680 + page_size); 1681 + 1682 + bo = xe_bo_create(xe, NULL, NULL, args->size, ttm_bo_type_device, 1683 + XE_BO_CREATE_VRAM_IF_DGFX(to_gt(xe)) | 1684 + XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT); 1685 + if (IS_ERR(bo)) 1686 + return PTR_ERR(bo); 1687 + 1688 + err = drm_gem_handle_create(file_priv, &bo->ttm.base, &handle); 1689 + /* drop reference from allocate - handle holds it now */ 1690 + drm_gem_object_put(&bo->ttm.base); 1691 + if (!err) 1692 + args->handle = handle; 1693 + return err; 1694 + } 1695 + 1696 + #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) 1697 + #include "tests/xe_bo.c" 1698 + #endif

+290

drivers/gpu/drm/xe/xe_bo.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2021 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_BO_H_ 7 + #define _XE_BO_H_ 8 + 9 + #include "xe_bo_types.h" 10 + #include "xe_macros.h" 11 + #include "xe_vm_types.h" 12 + 13 + #define XE_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ 14 + 15 + #define XE_BO_CREATE_USER_BIT BIT(1) 16 + #define XE_BO_CREATE_SYSTEM_BIT BIT(2) 17 + #define XE_BO_CREATE_VRAM0_BIT BIT(3) 18 + #define XE_BO_CREATE_VRAM1_BIT BIT(4) 19 + #define XE_BO_CREATE_VRAM_IF_DGFX(gt) \ 20 + (IS_DGFX(gt_to_xe(gt)) ? XE_BO_CREATE_VRAM0_BIT << gt->info.vram_id : \ 21 + XE_BO_CREATE_SYSTEM_BIT) 22 + #define XE_BO_CREATE_GGTT_BIT BIT(5) 23 + #define XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT BIT(6) 24 + #define XE_BO_CREATE_PINNED_BIT BIT(7) 25 + #define XE_BO_DEFER_BACKING BIT(8) 26 + #define XE_BO_SCANOUT_BIT BIT(9) 27 + /* this one is trigger internally only */ 28 + #define XE_BO_INTERNAL_TEST BIT(30) 29 + #define XE_BO_INTERNAL_64K BIT(31) 30 + 31 + #define PPAT_UNCACHED GENMASK_ULL(4, 3) 32 + #define PPAT_CACHED_PDE 0 33 + #define PPAT_CACHED BIT_ULL(7) 34 + #define PPAT_DISPLAY_ELLC BIT_ULL(4) 35 + 36 + #define GEN8_PTE_SHIFT 12 37 + #define GEN8_PAGE_SIZE (1 << GEN8_PTE_SHIFT) 38 + #define GEN8_PTE_MASK (GEN8_PAGE_SIZE - 1) 39 + #define GEN8_PDE_SHIFT (GEN8_PTE_SHIFT - 3) 40 + #define GEN8_PDES (1 << GEN8_PDE_SHIFT) 41 + #define GEN8_PDE_MASK (GEN8_PDES - 1) 42 + 43 + #define GEN8_64K_PTE_SHIFT 16 44 + #define GEN8_64K_PAGE_SIZE (1 << GEN8_64K_PTE_SHIFT) 45 + #define GEN8_64K_PTE_MASK (GEN8_64K_PAGE_SIZE - 1) 46 + #define GEN8_64K_PDE_MASK (GEN8_PDE_MASK >> 4) 47 + 48 + #define GEN8_PDE_PS_2M BIT_ULL(7) 49 + #define GEN8_PDPE_PS_1G BIT_ULL(7) 50 + #define GEN8_PDE_IPS_64K BIT_ULL(11) 51 + 52 + #define GEN12_GGTT_PTE_LM BIT_ULL(1) 53 + #define GEN12_USM_PPGTT_PTE_AE BIT_ULL(10) 54 + #define GEN12_PPGTT_PTE_LM BIT_ULL(11) 55 + #define GEN12_PDE_64K BIT_ULL(6) 56 + #define GEN12_PTE_PS64 BIT_ULL(8) 57 + 58 + #define GEN8_PAGE_PRESENT BIT_ULL(0) 59 + #define GEN8_PAGE_RW BIT_ULL(1) 60 + 61 + #define PTE_READ_ONLY BIT(0) 62 + 63 + #define XE_PL_SYSTEM TTM_PL_SYSTEM 64 + #define XE_PL_TT TTM_PL_TT 65 + #define XE_PL_VRAM0 TTM_PL_VRAM 66 + #define XE_PL_VRAM1 (XE_PL_VRAM0 + 1) 67 + 68 + #define XE_BO_PROPS_INVALID (-1) 69 + 70 + struct sg_table; 71 + 72 + struct xe_bo *xe_bo_alloc(void); 73 + void xe_bo_free(struct xe_bo *bo); 74 + 75 + struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, 76 + struct xe_gt *gt, struct dma_resv *resv, 77 + size_t size, enum ttm_bo_type type, 78 + u32 flags); 79 + struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_gt *gt, 80 + struct xe_vm *vm, size_t size, 81 + enum ttm_bo_type type, u32 flags); 82 + struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_gt *gt, 83 + struct xe_vm *vm, size_t size, 84 + enum ttm_bo_type type, u32 flags); 85 + struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_gt *gt, 86 + struct xe_vm *vm, size_t size, 87 + enum ttm_bo_type type, u32 flags); 88 + struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_gt *gt, 89 + const void *data, size_t size, 90 + enum ttm_bo_type type, u32 flags); 91 + 92 + int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, 93 + u32 bo_flags); 94 + 95 + static inline struct xe_bo *ttm_to_xe_bo(const struct ttm_buffer_object *bo) 96 + { 97 + return container_of(bo, struct xe_bo, ttm); 98 + } 99 + 100 + static inline struct xe_bo *gem_to_xe_bo(const struct drm_gem_object *obj) 101 + { 102 + return container_of(obj, struct xe_bo, ttm.base); 103 + } 104 + 105 + #define xe_bo_device(bo) ttm_to_xe_device((bo)->ttm.bdev) 106 + 107 + static inline struct xe_bo *xe_bo_get(struct xe_bo *bo) 108 + { 109 + if (bo) 110 + drm_gem_object_get(&bo->ttm.base); 111 + 112 + return bo; 113 + } 114 + 115 + static inline void xe_bo_put(struct xe_bo *bo) 116 + { 117 + if (bo) 118 + drm_gem_object_put(&bo->ttm.base); 119 + } 120 + 121 + static inline void xe_bo_assert_held(struct xe_bo *bo) 122 + { 123 + if (bo) 124 + dma_resv_assert_held((bo)->ttm.base.resv); 125 + } 126 + 127 + int xe_bo_lock(struct xe_bo *bo, struct ww_acquire_ctx *ww, 128 + int num_resv, bool intr); 129 + 130 + void xe_bo_unlock(struct xe_bo *bo, struct ww_acquire_ctx *ww); 131 + 132 + static inline void xe_bo_unlock_vm_held(struct xe_bo *bo) 133 + { 134 + if (bo) { 135 + XE_BUG_ON(bo->vm && bo->ttm.base.resv != &bo->vm->resv); 136 + if (bo->vm) 137 + xe_vm_assert_held(bo->vm); 138 + else 139 + dma_resv_unlock(bo->ttm.base.resv); 140 + } 141 + } 142 + 143 + static inline void xe_bo_lock_no_vm(struct xe_bo *bo, 144 + struct ww_acquire_ctx *ctx) 145 + { 146 + if (bo) { 147 + XE_BUG_ON(bo->vm || (bo->ttm.type != ttm_bo_type_sg && 148 + bo->ttm.base.resv != &bo->ttm.base._resv)); 149 + dma_resv_lock(bo->ttm.base.resv, ctx); 150 + } 151 + } 152 + 153 + static inline void xe_bo_unlock_no_vm(struct xe_bo *bo) 154 + { 155 + if (bo) { 156 + XE_BUG_ON(bo->vm || (bo->ttm.type != ttm_bo_type_sg && 157 + bo->ttm.base.resv != &bo->ttm.base._resv)); 158 + dma_resv_unlock(bo->ttm.base.resv); 159 + } 160 + } 161 + 162 + int xe_bo_pin_external(struct xe_bo *bo); 163 + int xe_bo_pin(struct xe_bo *bo); 164 + void xe_bo_unpin_external(struct xe_bo *bo); 165 + void xe_bo_unpin(struct xe_bo *bo); 166 + int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict); 167 + 168 + static inline bool xe_bo_is_pinned(struct xe_bo *bo) 169 + { 170 + return bo->ttm.pin_count; 171 + } 172 + 173 + static inline void xe_bo_unpin_map_no_vm(struct xe_bo *bo) 174 + { 175 + if (likely(bo)) { 176 + xe_bo_lock_no_vm(bo, NULL); 177 + xe_bo_unpin(bo); 178 + xe_bo_unlock_no_vm(bo); 179 + 180 + xe_bo_put(bo); 181 + } 182 + } 183 + 184 + bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo); 185 + dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, 186 + size_t page_size, bool *is_lmem); 187 + 188 + static inline dma_addr_t 189 + xe_bo_main_addr(struct xe_bo *bo, size_t page_size) 190 + { 191 + bool is_lmem; 192 + 193 + return xe_bo_addr(bo, 0, page_size, &is_lmem); 194 + } 195 + 196 + static inline u32 197 + xe_bo_ggtt_addr(struct xe_bo *bo) 198 + { 199 + XE_BUG_ON(bo->ggtt_node.size > bo->size); 200 + XE_BUG_ON(bo->ggtt_node.start + bo->ggtt_node.size > (1ull << 32)); 201 + return bo->ggtt_node.start; 202 + } 203 + 204 + int xe_bo_vmap(struct xe_bo *bo); 205 + void xe_bo_vunmap(struct xe_bo *bo); 206 + 207 + bool mem_type_is_vram(u32 mem_type); 208 + bool xe_bo_is_vram(struct xe_bo *bo); 209 + 210 + bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type); 211 + 212 + int xe_bo_migrate(struct xe_bo *bo, u32 mem_type); 213 + int xe_bo_evict(struct xe_bo *bo, bool force_alloc); 214 + 215 + extern struct ttm_device_funcs xe_ttm_funcs; 216 + 217 + int xe_gem_create_ioctl(struct drm_device *dev, void *data, 218 + struct drm_file *file); 219 + int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data, 220 + struct drm_file *file); 221 + int xe_bo_dumb_create(struct drm_file *file_priv, 222 + struct drm_device *dev, 223 + struct drm_mode_create_dumb *args); 224 + 225 + bool xe_bo_needs_ccs_pages(struct xe_bo *bo); 226 + 227 + static inline size_t xe_bo_ccs_pages_start(struct xe_bo *bo) 228 + { 229 + return PAGE_ALIGN(bo->ttm.base.size); 230 + } 231 + 232 + void __xe_bo_release_dummy(struct kref *kref); 233 + 234 + /** 235 + * xe_bo_put_deferred() - Put a buffer object with delayed final freeing 236 + * @bo: The bo to put. 237 + * @deferred: List to which to add the buffer object if we cannot put, or 238 + * NULL if the function is to put unconditionally. 239 + * 240 + * Since the final freeing of an object includes both sleeping and (!) 241 + * memory allocation in the dma_resv individualization, it's not ok 242 + * to put an object from atomic context nor from within a held lock 243 + * tainted by reclaim. In such situations we want to defer the final 244 + * freeing until we've exited the restricting context, or in the worst 245 + * case to a workqueue. 246 + * This function either puts the object if possible without the refcount 247 + * reaching zero, or adds it to the @deferred list if that was not possible. 248 + * The caller needs to follow up with a call to xe_bo_put_commit() to actually 249 + * put the bo iff this function returns true. It's safe to always 250 + * follow up with a call to xe_bo_put_commit(). 251 + * TODO: It's TTM that is the villain here. Perhaps TTM should add an 252 + * interface like this. 253 + * 254 + * Return: true if @bo was the first object put on the @freed list, 255 + * false otherwise. 256 + */ 257 + static inline bool 258 + xe_bo_put_deferred(struct xe_bo *bo, struct llist_head *deferred) 259 + { 260 + if (!deferred) { 261 + xe_bo_put(bo); 262 + return false; 263 + } 264 + 265 + if (!kref_put(&bo->ttm.base.refcount, __xe_bo_release_dummy)) 266 + return false; 267 + 268 + return llist_add(&bo->freed, deferred); 269 + } 270 + 271 + void xe_bo_put_commit(struct llist_head *deferred); 272 + 273 + struct sg_table *xe_bo_get_sg(struct xe_bo *bo); 274 + 275 + #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) 276 + /** 277 + * xe_bo_is_mem_type - Whether the bo currently resides in the given 278 + * TTM memory type 279 + * @bo: The bo to check. 280 + * @mem_type: The TTM memory type. 281 + * 282 + * Return: true iff the bo resides in @mem_type, false otherwise. 283 + */ 284 + static inline bool xe_bo_is_mem_type(struct xe_bo *bo, u32 mem_type) 285 + { 286 + xe_bo_assert_held(bo); 287 + return bo->ttm.resource->mem_type == mem_type; 288 + } 289 + #endif 290 + #endif

+179

drivers/gpu/drm/xe/xe_bo_doc.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_BO_DOC_H_ 7 + #define _XE_BO_DOC_H_ 8 + 9 + /** 10 + * DOC: Buffer Objects (BO) 11 + * 12 + * BO management 13 + * ============= 14 + * 15 + * TTM manages (placement, eviction, etc...) all BOs in XE. 16 + * 17 + * BO creation 18 + * =========== 19 + * 20 + * Create a chunk of memory which can be used by the GPU. Placement rules 21 + * (sysmem or vram region) passed in upon creation. TTM handles placement of BO 22 + * and can trigger eviction of other BOs to make space for the new BO. 23 + * 24 + * Kernel BOs 25 + * ---------- 26 + * 27 + * A kernel BO is created as part of driver load (e.g. uC firmware images, GuC 28 + * ADS, etc...) or a BO created as part of a user operation which requires 29 + * a kernel BO (e.g. engine state, memory for page tables, etc...). These BOs 30 + * are typically mapped in the GGTT (any kernel BOs aside memory for page tables 31 + * are in the GGTT), are pinned (can't move or be evicted at runtime), have a 32 + * vmap (XE can access the memory via xe_map layer) and have contiguous physical 33 + * memory. 34 + * 35 + * More details of why kernel BOs are pinned and contiguous below. 36 + * 37 + * User BOs 38 + * -------- 39 + * 40 + * A user BO is created via the DRM_IOCTL_XE_GEM_CREATE IOCTL. Once it is 41 + * created the BO can be mmap'd (via DRM_IOCTL_XE_GEM_MMAP_OFFSET) for user 42 + * access and it can be bound for GPU access (via DRM_IOCTL_XE_VM_BIND). All 43 + * user BOs are evictable and user BOs are never pinned by XE. The allocation of 44 + * the backing store can be defered from creation time until first use which is 45 + * either mmap, bind, or pagefault. 46 + * 47 + * Private BOs 48 + * ~~~~~~~~~~~ 49 + * 50 + * A private BO is a user BO created with a valid VM argument passed into the 51 + * create IOCTL. If a BO is private it cannot be exported via prime FD and 52 + * mappings can only be created for the BO within the VM it is tied to. Lastly, 53 + * the BO dma-resv slots / lock point to the VM's dma-resv slots / lock (all 54 + * private BOs to a VM share common dma-resv slots / lock). 55 + * 56 + * External BOs 57 + * ~~~~~~~~~~~~ 58 + * 59 + * An external BO is a user BO created with a NULL VM argument passed into the 60 + * create IOCTL. An external BO can be shared with different UMDs / devices via 61 + * prime FD and the BO can be mapped into multiple VMs. An external BO has its 62 + * own unique dma-resv slots / lock. An external BO will be in an array of all 63 + * VMs which has a mapping of the BO. This allows VMs to lookup and lock all 64 + * external BOs mapped in the VM as needed. 65 + * 66 + * BO placement 67 + * ~~~~~~~~~~~~ 68 + * 69 + * When a user BO is created, a mask of valid placements is passed indicating 70 + * which memory regions are considered valid. 71 + * 72 + * The memory region information is available via query uAPI (TODO: add link). 73 + * 74 + * BO validation 75 + * ============= 76 + * 77 + * BO validation (ttm_bo_validate) refers to ensuring a BO has a valid 78 + * placement. If a BO was swapped to temporary storage, a validation call will 79 + * trigger a move back to a valid (location where GPU can access BO) placement. 80 + * Validation of a BO may evict other BOs to make room for the BO being 81 + * validated. 82 + * 83 + * BO eviction / moving 84 + * ==================== 85 + * 86 + * All eviction (or in other words, moving a BO from one memory location to 87 + * another) is routed through TTM with a callback into XE. 88 + * 89 + * Runtime eviction 90 + * ---------------- 91 + * 92 + * Runtime evictions refers to during normal operations where TTM decides it 93 + * needs to move a BO. Typically this is because TTM needs to make room for 94 + * another BO and the evicted BO is first BO on LRU list that is not locked. 95 + * 96 + * An example of this is a new BO which can only be placed in VRAM but there is 97 + * not space in VRAM. There could be multiple BOs which have sysmem and VRAM 98 + * placement rules which currently reside in VRAM, TTM trigger a will move of 99 + * one (or multiple) of these BO(s) until there is room in VRAM to place the new 100 + * BO. The evicted BO(s) are valid but still need new bindings before the BO 101 + * used again (exec or compute mode rebind worker). 102 + * 103 + * Another example would be, TTM can't find a BO to evict which has another 104 + * valid placement. In this case TTM will evict one (or multiple) unlocked BO(s) 105 + * to a temporary unreachable (invalid) placement. The evicted BO(s) are invalid 106 + * and before next use need to be moved to a valid placement and rebound. 107 + * 108 + * In both cases, moves of these BOs are scheduled behind the fences in the BO's 109 + * dma-resv slots. 110 + * 111 + * WW locking tries to ensures if 2 VMs use 51% of the memory forward progress 112 + * is made on both VMs. 113 + * 114 + * Runtime eviction uses per a GT migration engine (TODO: link to migration 115 + * engine doc) to do a GPU memcpy from one location to another. 116 + * 117 + * Rebinds after runtime eviction 118 + * ------------------------------ 119 + * 120 + * When BOs are moved, every mapping (VMA) of the BO needs to rebound before 121 + * the BO is used again. Every VMA is added to an evicted list of its VM when 122 + * the BO is moved. This is safe because of the VM locking structure (TODO: link 123 + * to VM locking doc). On the next use of a VM (exec or compute mode rebind 124 + * worker) the evicted VMA list is checked and rebinds are triggered. In the 125 + * case of faulting VM, the rebind is done in the page fault handler. 126 + * 127 + * Suspend / resume eviction of VRAM 128 + * --------------------------------- 129 + * 130 + * During device suspend / resume VRAM may lose power which means the contents 131 + * of VRAM's memory is blown away. Thus BOs present in VRAM at the time of 132 + * suspend must be moved to sysmem in order for their contents to be saved. 133 + * 134 + * A simple TTM call (ttm_resource_manager_evict_all) can move all non-pinned 135 + * (user) BOs to sysmem. External BOs that are pinned need to be manually 136 + * evicted with a simple loop + xe_bo_evict call. It gets a little trickier 137 + * with kernel BOs. 138 + * 139 + * Some kernel BOs are used by the GT migration engine to do moves, thus we 140 + * can't move all of the BOs via the GT migration engine. For simplity, use a 141 + * TTM memcpy (CPU) to move any kernel (pinned) BO on either suspend or resume. 142 + * 143 + * Some kernel BOs need to be restored to the exact same physical location. TTM 144 + * makes this rather easy but the caveat is the memory must be contiguous. Again 145 + * for simplity, we enforce that all kernel (pinned) BOs are contiguous and 146 + * restored to the same physical location. 147 + * 148 + * Pinned external BOs in VRAM are restored on resume via the GPU. 149 + * 150 + * Rebinds after suspend / resume 151 + * ------------------------------ 152 + * 153 + * Most kernel BOs have GGTT mappings which must be restored during the resume 154 + * process. All user BOs are rebound after validation on their next use. 155 + * 156 + * Future work 157 + * =========== 158 + * 159 + * Trim the list of BOs which is saved / restored via TTM memcpy on suspend / 160 + * resume. All we really need to save / restore via TTM memcpy is the memory 161 + * required for the GuC to load and the memory for the GT migrate engine to 162 + * operate. 163 + * 164 + * Do not require kernel BOs to be contiguous in physical memory / restored to 165 + * the same physical address on resume. In all likelihood the only memory that 166 + * needs to be restored to the same physical address is memory used for page 167 + * tables. All of that memory is allocated 1 page at time so the contiguous 168 + * requirement isn't needed. Some work on the vmap code would need to be done if 169 + * kernel BOs are not contiguous too. 170 + * 171 + * Make some kernel BO evictable rather than pinned. An example of this would be 172 + * engine state, in all likelihood if the dma-slots of these BOs where properly 173 + * used rather than pinning we could safely evict + rebind these BOs as needed. 174 + * 175 + * Some kernel BOs do not need to be restored on resume (e.g. GuC ADS as that is 176 + * repopulated on resume), add flag to mark such objects as no save / restore. 177 + */ 178 + 179 + #endif

+225

drivers/gpu/drm/xe/xe_bo_evict.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include "xe_bo.h" 7 + #include "xe_bo_evict.h" 8 + #include "xe_device.h" 9 + #include "xe_ggtt.h" 10 + #include "xe_gt.h" 11 + 12 + /** 13 + * xe_bo_evict_all - evict all BOs from VRAM 14 + * 15 + * @xe: xe device 16 + * 17 + * Evict non-pinned user BOs first (via GPU), evict pinned external BOs next 18 + * (via GPU), wait for evictions, and finally evict pinned kernel BOs via CPU. 19 + * All eviction magic done via TTM calls. 20 + * 21 + * Evict == move VRAM BOs to temporary (typically system) memory. 22 + * 23 + * This function should be called before the device goes into a suspend state 24 + * where the VRAM loses power. 25 + */ 26 + int xe_bo_evict_all(struct xe_device *xe) 27 + { 28 + struct ttm_device *bdev = &xe->ttm; 29 + struct ww_acquire_ctx ww; 30 + struct xe_bo *bo; 31 + struct xe_gt *gt; 32 + struct list_head still_in_list; 33 + u32 mem_type; 34 + u8 id; 35 + int ret; 36 + 37 + if (!IS_DGFX(xe)) 38 + return 0; 39 + 40 + /* User memory */ 41 + for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) { 42 + struct ttm_resource_manager *man = 43 + ttm_manager_type(bdev, mem_type); 44 + 45 + if (man) { 46 + ret = ttm_resource_manager_evict_all(bdev, man); 47 + if (ret) 48 + return ret; 49 + } 50 + } 51 + 52 + /* Pinned user memory in VRAM */ 53 + INIT_LIST_HEAD(&still_in_list); 54 + spin_lock(&xe->pinned.lock); 55 + for (;;) { 56 + bo = list_first_entry_or_null(&xe->pinned.external_vram, 57 + typeof(*bo), pinned_link); 58 + if (!bo) 59 + break; 60 + xe_bo_get(bo); 61 + list_move_tail(&bo->pinned_link, &still_in_list); 62 + spin_unlock(&xe->pinned.lock); 63 + 64 + xe_bo_lock(bo, &ww, 0, false); 65 + ret = xe_bo_evict(bo, true); 66 + xe_bo_unlock(bo, &ww); 67 + xe_bo_put(bo); 68 + if (ret) { 69 + spin_lock(&xe->pinned.lock); 70 + list_splice_tail(&still_in_list, 71 + &xe->pinned.external_vram); 72 + spin_unlock(&xe->pinned.lock); 73 + return ret; 74 + } 75 + 76 + spin_lock(&xe->pinned.lock); 77 + } 78 + list_splice_tail(&still_in_list, &xe->pinned.external_vram); 79 + spin_unlock(&xe->pinned.lock); 80 + 81 + /* 82 + * Wait for all user BO to be evicted as those evictions depend on the 83 + * memory moved below. 84 + */ 85 + for_each_gt(gt, xe, id) 86 + xe_gt_migrate_wait(gt); 87 + 88 + spin_lock(&xe->pinned.lock); 89 + for (;;) { 90 + bo = list_first_entry_or_null(&xe->pinned.kernel_bo_present, 91 + typeof(*bo), pinned_link); 92 + if (!bo) 93 + break; 94 + xe_bo_get(bo); 95 + list_move_tail(&bo->pinned_link, &xe->pinned.evicted); 96 + spin_unlock(&xe->pinned.lock); 97 + 98 + xe_bo_lock(bo, &ww, 0, false); 99 + ret = xe_bo_evict(bo, true); 100 + xe_bo_unlock(bo, &ww); 101 + xe_bo_put(bo); 102 + if (ret) 103 + return ret; 104 + 105 + spin_lock(&xe->pinned.lock); 106 + } 107 + spin_unlock(&xe->pinned.lock); 108 + 109 + return 0; 110 + } 111 + 112 + /** 113 + * xe_bo_restore_kernel - restore kernel BOs to VRAM 114 + * 115 + * @xe: xe device 116 + * 117 + * Move kernel BOs from temporary (typically system) memory to VRAM via CPU. All 118 + * moves done via TTM calls. 119 + * 120 + * This function should be called early, before trying to init the GT, on device 121 + * resume. 122 + */ 123 + int xe_bo_restore_kernel(struct xe_device *xe) 124 + { 125 + struct ww_acquire_ctx ww; 126 + struct xe_bo *bo; 127 + int ret; 128 + 129 + if (!IS_DGFX(xe)) 130 + return 0; 131 + 132 + spin_lock(&xe->pinned.lock); 133 + for (;;) { 134 + bo = list_first_entry_or_null(&xe->pinned.evicted, 135 + typeof(*bo), pinned_link); 136 + if (!bo) 137 + break; 138 + xe_bo_get(bo); 139 + list_move_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present); 140 + spin_unlock(&xe->pinned.lock); 141 + 142 + xe_bo_lock(bo, &ww, 0, false); 143 + ret = xe_bo_validate(bo, NULL, false); 144 + xe_bo_unlock(bo, &ww); 145 + if (ret) { 146 + xe_bo_put(bo); 147 + return ret; 148 + } 149 + 150 + if (bo->flags & XE_BO_CREATE_GGTT_BIT) 151 + xe_ggtt_map_bo(bo->gt->mem.ggtt, bo); 152 + 153 + /* 154 + * We expect validate to trigger a move VRAM and our move code 155 + * should setup the iosys map. 156 + */ 157 + XE_BUG_ON(iosys_map_is_null(&bo->vmap)); 158 + XE_BUG_ON(!xe_bo_is_vram(bo)); 159 + 160 + xe_bo_put(bo); 161 + 162 + spin_lock(&xe->pinned.lock); 163 + } 164 + spin_unlock(&xe->pinned.lock); 165 + 166 + return 0; 167 + } 168 + 169 + /** 170 + * xe_bo_restore_user - restore pinned user BOs to VRAM 171 + * 172 + * @xe: xe device 173 + * 174 + * Move pinned user BOs from temporary (typically system) memory to VRAM via 175 + * CPU. All moves done via TTM calls. 176 + * 177 + * This function should be called late, after GT init, on device resume. 178 + */ 179 + int xe_bo_restore_user(struct xe_device *xe) 180 + { 181 + struct ww_acquire_ctx ww; 182 + struct xe_bo *bo; 183 + struct xe_gt *gt; 184 + struct list_head still_in_list; 185 + u8 id; 186 + int ret; 187 + 188 + if (!IS_DGFX(xe)) 189 + return 0; 190 + 191 + /* Pinned user memory in VRAM should be validated on resume */ 192 + INIT_LIST_HEAD(&still_in_list); 193 + spin_lock(&xe->pinned.lock); 194 + for (;;) { 195 + bo = list_first_entry_or_null(&xe->pinned.external_vram, 196 + typeof(*bo), pinned_link); 197 + if (!bo) 198 + break; 199 + list_move_tail(&bo->pinned_link, &still_in_list); 200 + xe_bo_get(bo); 201 + spin_unlock(&xe->pinned.lock); 202 + 203 + xe_bo_lock(bo, &ww, 0, false); 204 + ret = xe_bo_validate(bo, NULL, false); 205 + xe_bo_unlock(bo, &ww); 206 + xe_bo_put(bo); 207 + if (ret) { 208 + spin_lock(&xe->pinned.lock); 209 + list_splice_tail(&still_in_list, 210 + &xe->pinned.external_vram); 211 + spin_unlock(&xe->pinned.lock); 212 + return ret; 213 + } 214 + 215 + spin_lock(&xe->pinned.lock); 216 + } 217 + list_splice_tail(&still_in_list, &xe->pinned.external_vram); 218 + spin_unlock(&xe->pinned.lock); 219 + 220 + /* Wait for validate to complete */ 221 + for_each_gt(gt, xe, id) 222 + xe_gt_migrate_wait(gt); 223 + 224 + return 0; 225 + }

+15

drivers/gpu/drm/xe/xe_bo_evict.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_BO_EVICT_H_ 7 + #define _XE_BO_EVICT_H_ 8 + 9 + struct xe_device; 10 + 11 + int xe_bo_evict_all(struct xe_device *xe); 12 + int xe_bo_restore_kernel(struct xe_device *xe); 13 + int xe_bo_restore_user(struct xe_device *xe); 14 + 15 + #endif

+73

drivers/gpu/drm/xe/xe_bo_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_BO_TYPES_H_ 7 + #define _XE_BO_TYPES_H_ 8 + 9 + #include <linux/iosys-map.h> 10 + 11 + #include <drm/drm_mm.h> 12 + #include <drm/ttm/ttm_bo.h> 13 + #include <drm/ttm/ttm_device.h> 14 + #include <drm/ttm/ttm_execbuf_util.h> 15 + #include <drm/ttm/ttm_placement.h> 16 + 17 + struct xe_device; 18 + struct xe_vm; 19 + 20 + #define XE_BO_MAX_PLACEMENTS 3 21 + 22 + /** @xe_bo: XE buffer object */ 23 + struct xe_bo { 24 + /** @ttm: TTM base buffer object */ 25 + struct ttm_buffer_object ttm; 26 + /** @size: Size of this buffer object */ 27 + size_t size; 28 + /** @flags: flags for this buffer object */ 29 + u32 flags; 30 + /** @vm: VM this BO is attached to, for extobj this will be NULL */ 31 + struct xe_vm *vm; 32 + /** @gt: GT this BO is attached to (kernel BO only) */ 33 + struct xe_gt *gt; 34 + /** @vmas: List of VMAs for this BO */ 35 + struct list_head vmas; 36 + /** @placements: valid placements for this BO */ 37 + struct ttm_place placements[XE_BO_MAX_PLACEMENTS]; 38 + /** @placement: current placement for this BO */ 39 + struct ttm_placement placement; 40 + /** @ggtt_node: GGTT node if this BO is mapped in the GGTT */ 41 + struct drm_mm_node ggtt_node; 42 + /** @vmap: iosys map of this buffer */ 43 + struct iosys_map vmap; 44 + /** @ttm_kmap: TTM bo kmap object for internal use only. Keep off. */ 45 + struct ttm_bo_kmap_obj kmap; 46 + /** @pinned_link: link to present / evicted list of pinned BO */ 47 + struct list_head pinned_link; 48 + /** @props: BO user controlled properties */ 49 + struct { 50 + /** @preferred_mem: preferred memory class for this BO */ 51 + s16 preferred_mem_class; 52 + /** @prefered_gt: preferred GT for this BO */ 53 + s16 preferred_gt; 54 + /** @preferred_mem_type: preferred memory type */ 55 + s32 preferred_mem_type; 56 + /** 57 + * @cpu_atomic: the CPU expects to do atomics operations to 58 + * this BO 59 + */ 60 + bool cpu_atomic; 61 + /** 62 + * @device_atomic: the device expects to do atomics operations 63 + * to this BO 64 + */ 65 + bool device_atomic; 66 + } props; 67 + /** @freed: List node for delayed put. */ 68 + struct llist_node freed; 69 + /** @created: Whether the bo has passed initial creation */ 70 + bool created; 71 + }; 72 + 73 + #endif

+129

drivers/gpu/drm/xe/xe_debugfs.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include <linux/string_helpers.h> 7 + 8 + #include <drm/drm_debugfs.h> 9 + 10 + #include "xe_bo.h" 11 + #include "xe_device.h" 12 + #include "xe_debugfs.h" 13 + #include "xe_gt_debugfs.h" 14 + #include "xe_step.h" 15 + 16 + #ifdef CONFIG_DRM_XE_DEBUG 17 + #include "xe_bo_evict.h" 18 + #include "xe_migrate.h" 19 + #include "xe_vm.h" 20 + #endif 21 + 22 + static struct xe_device *node_to_xe(struct drm_info_node *node) 23 + { 24 + return to_xe_device(node->minor->dev); 25 + } 26 + 27 + static int info(struct seq_file *m, void *data) 28 + { 29 + struct xe_device *xe = node_to_xe(m->private); 30 + struct drm_printer p = drm_seq_file_printer(m); 31 + struct xe_gt *gt; 32 + u8 id; 33 + 34 + drm_printf(&p, "graphics_verx100 %d\n", xe->info.graphics_verx100); 35 + drm_printf(&p, "media_verx100 %d\n", xe->info.media_verx100); 36 + drm_printf(&p, "stepping G:%s M:%s D:%s B:%s\n", 37 + xe_step_name(xe->info.step.graphics), 38 + xe_step_name(xe->info.step.media), 39 + xe_step_name(xe->info.step.display), 40 + xe_step_name(xe->info.step.basedie)); 41 + drm_printf(&p, "is_dgfx %s\n", str_yes_no(xe->info.is_dgfx)); 42 + drm_printf(&p, "platform %d\n", xe->info.platform); 43 + drm_printf(&p, "subplatform %d\n", 44 + xe->info.subplatform > XE_SUBPLATFORM_NONE ? xe->info.subplatform : 0); 45 + drm_printf(&p, "devid 0x%x\n", xe->info.devid); 46 + drm_printf(&p, "revid %d\n", xe->info.revid); 47 + drm_printf(&p, "tile_count %d\n", xe->info.tile_count); 48 + drm_printf(&p, "vm_max_level %d\n", xe->info.vm_max_level); 49 + drm_printf(&p, "enable_guc %s\n", str_yes_no(xe->info.enable_guc)); 50 + drm_printf(&p, "supports_usm %s\n", str_yes_no(xe->info.supports_usm)); 51 + drm_printf(&p, "has_flat_ccs %s\n", str_yes_no(xe->info.has_flat_ccs)); 52 + for_each_gt(gt, xe, id) { 53 + drm_printf(&p, "gt%d force wake %d\n", id, 54 + xe_force_wake_ref(gt_to_fw(gt), XE_FW_GT)); 55 + drm_printf(&p, "gt%d engine_mask 0x%llx\n", id, 56 + gt->info.engine_mask); 57 + } 58 + 59 + return 0; 60 + } 61 + 62 + static const struct drm_info_list debugfs_list[] = { 63 + {"info", info, 0}, 64 + }; 65 + 66 + static int forcewake_open(struct inode *inode, struct file *file) 67 + { 68 + struct xe_device *xe = inode->i_private; 69 + struct xe_gt *gt; 70 + u8 id; 71 + 72 + for_each_gt(gt, xe, id) 73 + XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); 74 + 75 + return 0; 76 + } 77 + 78 + static int forcewake_release(struct inode *inode, struct file *file) 79 + { 80 + struct xe_device *xe = inode->i_private; 81 + struct xe_gt *gt; 82 + u8 id; 83 + 84 + for_each_gt(gt, xe, id) 85 + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); 86 + 87 + return 0; 88 + } 89 + 90 + static const struct file_operations forcewake_all_fops = { 91 + .owner = THIS_MODULE, 92 + .open = forcewake_open, 93 + .release = forcewake_release, 94 + }; 95 + 96 + void xe_debugfs_register(struct xe_device *xe) 97 + { 98 + struct ttm_device *bdev = &xe->ttm; 99 + struct drm_minor *minor = xe->drm.primary; 100 + struct dentry *root = minor->debugfs_root; 101 + struct ttm_resource_manager *man; 102 + struct xe_gt *gt; 103 + u32 mem_type; 104 + u8 id; 105 + 106 + drm_debugfs_create_files(debugfs_list, 107 + ARRAY_SIZE(debugfs_list), 108 + root, minor); 109 + 110 + debugfs_create_file("forcewake_all", 0400, root, xe, 111 + &forcewake_all_fops); 112 + 113 + for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) { 114 + man = ttm_manager_type(bdev, mem_type); 115 + 116 + if (man) { 117 + char name[16]; 118 + 119 + sprintf(name, "vram%d_mm", mem_type - XE_PL_VRAM0); 120 + ttm_resource_manager_create_debugfs(man, root, name); 121 + } 122 + } 123 + 124 + man = ttm_manager_type(bdev, XE_PL_TT); 125 + ttm_resource_manager_create_debugfs(man, root, "gtt_mm"); 126 + 127 + for_each_gt(gt, xe, id) 128 + xe_gt_debugfs_register(gt); 129 + }

+13

drivers/gpu/drm/xe/xe_debugfs.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_DEBUGFS_H_ 7 + #define _XE_DEBUGFS_H_ 8 + 9 + struct xe_device; 10 + 11 + void xe_debugfs_register(struct xe_device *xe); 12 + 13 + #endif

+359

drivers/gpu/drm/xe/xe_device.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2021 Intel Corporation 4 + */ 5 + 6 + #include "xe_device.h" 7 + 8 + #include <drm/drm_gem_ttm_helper.h> 9 + #include <drm/drm_aperture.h> 10 + #include <drm/drm_ioctl.h> 11 + #include <drm/xe_drm.h> 12 + #include <drm/drm_managed.h> 13 + #include <drm/drm_atomic_helper.h> 14 + 15 + #include "xe_bo.h" 16 + #include "xe_debugfs.h" 17 + #include "xe_dma_buf.h" 18 + #include "xe_drv.h" 19 + #include "xe_engine.h" 20 + #include "xe_exec.h" 21 + #include "xe_gt.h" 22 + #include "xe_irq.h" 23 + #include "xe_module.h" 24 + #include "xe_mmio.h" 25 + #include "xe_pcode.h" 26 + #include "xe_pm.h" 27 + #include "xe_query.h" 28 + #include "xe_vm.h" 29 + #include "xe_vm_madvise.h" 30 + #include "xe_wait_user_fence.h" 31 + 32 + static int xe_file_open(struct drm_device *dev, struct drm_file *file) 33 + { 34 + struct xe_file *xef; 35 + 36 + xef = kzalloc(sizeof(*xef), GFP_KERNEL); 37 + if (!xef) 38 + return -ENOMEM; 39 + 40 + xef->drm = file; 41 + 42 + mutex_init(&xef->vm.lock); 43 + xa_init_flags(&xef->vm.xa, XA_FLAGS_ALLOC1); 44 + 45 + mutex_init(&xef->engine.lock); 46 + xa_init_flags(&xef->engine.xa, XA_FLAGS_ALLOC1); 47 + 48 + file->driver_priv = xef; 49 + return 0; 50 + } 51 + 52 + static void device_kill_persitent_engines(struct xe_device *xe, 53 + struct xe_file *xef); 54 + 55 + static void xe_file_close(struct drm_device *dev, struct drm_file *file) 56 + { 57 + struct xe_device *xe = to_xe_device(dev); 58 + struct xe_file *xef = file->driver_priv; 59 + struct xe_vm *vm; 60 + struct xe_engine *e; 61 + unsigned long idx; 62 + 63 + mutex_lock(&xef->engine.lock); 64 + xa_for_each(&xef->engine.xa, idx, e) { 65 + xe_engine_kill(e); 66 + xe_engine_put(e); 67 + } 68 + mutex_unlock(&xef->engine.lock); 69 + mutex_destroy(&xef->engine.lock); 70 + device_kill_persitent_engines(xe, xef); 71 + 72 + mutex_lock(&xef->vm.lock); 73 + xa_for_each(&xef->vm.xa, idx, vm) 74 + xe_vm_close_and_put(vm); 75 + mutex_unlock(&xef->vm.lock); 76 + mutex_destroy(&xef->vm.lock); 77 + 78 + kfree(xef); 79 + } 80 + 81 + static const struct drm_ioctl_desc xe_ioctls[] = { 82 + DRM_IOCTL_DEF_DRV(XE_DEVICE_QUERY, xe_query_ioctl, DRM_RENDER_ALLOW), 83 + DRM_IOCTL_DEF_DRV(XE_GEM_CREATE, xe_gem_create_ioctl, DRM_RENDER_ALLOW), 84 + DRM_IOCTL_DEF_DRV(XE_GEM_MMAP_OFFSET, xe_gem_mmap_offset_ioctl, 85 + DRM_RENDER_ALLOW), 86 + DRM_IOCTL_DEF_DRV(XE_VM_CREATE, xe_vm_create_ioctl, DRM_RENDER_ALLOW), 87 + DRM_IOCTL_DEF_DRV(XE_VM_DESTROY, xe_vm_destroy_ioctl, DRM_RENDER_ALLOW), 88 + DRM_IOCTL_DEF_DRV(XE_VM_BIND, xe_vm_bind_ioctl, DRM_RENDER_ALLOW), 89 + DRM_IOCTL_DEF_DRV(XE_ENGINE_CREATE, xe_engine_create_ioctl, 90 + DRM_RENDER_ALLOW), 91 + DRM_IOCTL_DEF_DRV(XE_ENGINE_DESTROY, xe_engine_destroy_ioctl, 92 + DRM_RENDER_ALLOW), 93 + DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW), 94 + DRM_IOCTL_DEF_DRV(XE_MMIO, xe_mmio_ioctl, DRM_RENDER_ALLOW), 95 + DRM_IOCTL_DEF_DRV(XE_ENGINE_SET_PROPERTY, xe_engine_set_property_ioctl, 96 + DRM_RENDER_ALLOW), 97 + DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl, 98 + DRM_RENDER_ALLOW), 99 + DRM_IOCTL_DEF_DRV(XE_VM_MADVISE, xe_vm_madvise_ioctl, DRM_RENDER_ALLOW), 100 + }; 101 + 102 + static const struct file_operations xe_driver_fops = { 103 + .owner = THIS_MODULE, 104 + .open = drm_open, 105 + .release = drm_release_noglobal, 106 + .unlocked_ioctl = drm_ioctl, 107 + .mmap = drm_gem_mmap, 108 + .poll = drm_poll, 109 + .read = drm_read, 110 + // .compat_ioctl = i915_ioc32_compat_ioctl, 111 + .llseek = noop_llseek, 112 + }; 113 + 114 + static void xe_driver_release(struct drm_device *dev) 115 + { 116 + struct xe_device *xe = to_xe_device(dev); 117 + 118 + pci_set_drvdata(to_pci_dev(xe->drm.dev), NULL); 119 + } 120 + 121 + static struct drm_driver driver = { 122 + /* Don't use MTRRs here; the Xserver or userspace app should 123 + * deal with them for Intel hardware. 124 + */ 125 + .driver_features = 126 + DRIVER_GEM | 127 + DRIVER_RENDER | DRIVER_SYNCOBJ | 128 + DRIVER_SYNCOBJ_TIMELINE, 129 + .open = xe_file_open, 130 + .postclose = xe_file_close, 131 + 132 + .gem_prime_import = xe_gem_prime_import, 133 + 134 + .dumb_create = xe_bo_dumb_create, 135 + .dumb_map_offset = drm_gem_ttm_dumb_map_offset, 136 + .release = &xe_driver_release, 137 + 138 + .ioctls = xe_ioctls, 139 + .num_ioctls = ARRAY_SIZE(xe_ioctls), 140 + .fops = &xe_driver_fops, 141 + .name = DRIVER_NAME, 142 + .desc = DRIVER_DESC, 143 + .date = DRIVER_DATE, 144 + .major = DRIVER_MAJOR, 145 + .minor = DRIVER_MINOR, 146 + .patchlevel = DRIVER_PATCHLEVEL, 147 + }; 148 + 149 + static void xe_device_destroy(struct drm_device *dev, void *dummy) 150 + { 151 + struct xe_device *xe = to_xe_device(dev); 152 + 153 + destroy_workqueue(xe->ordered_wq); 154 + mutex_destroy(&xe->persitent_engines.lock); 155 + ttm_device_fini(&xe->ttm); 156 + } 157 + 158 + struct xe_device *xe_device_create(struct pci_dev *pdev, 159 + const struct pci_device_id *ent) 160 + { 161 + struct xe_device *xe; 162 + int err; 163 + 164 + err = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &driver); 165 + if (err) 166 + return ERR_PTR(err); 167 + 168 + xe = devm_drm_dev_alloc(&pdev->dev, &driver, struct xe_device, drm); 169 + if (IS_ERR(xe)) 170 + return xe; 171 + 172 + err = ttm_device_init(&xe->ttm, &xe_ttm_funcs, xe->drm.dev, 173 + xe->drm.anon_inode->i_mapping, 174 + xe->drm.vma_offset_manager, false, false); 175 + if (WARN_ON(err)) 176 + goto err_put; 177 + 178 + xe->info.devid = pdev->device; 179 + xe->info.revid = pdev->revision; 180 + xe->info.enable_guc = enable_guc; 181 + 182 + spin_lock_init(&xe->irq.lock); 183 + 184 + init_waitqueue_head(&xe->ufence_wq); 185 + 186 + mutex_init(&xe->usm.lock); 187 + xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC1); 188 + 189 + mutex_init(&xe->persitent_engines.lock); 190 + INIT_LIST_HEAD(&xe->persitent_engines.list); 191 + 192 + spin_lock_init(&xe->pinned.lock); 193 + INIT_LIST_HEAD(&xe->pinned.kernel_bo_present); 194 + INIT_LIST_HEAD(&xe->pinned.external_vram); 195 + INIT_LIST_HEAD(&xe->pinned.evicted); 196 + 197 + xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0); 198 + 199 + mutex_init(&xe->sb_lock); 200 + xe->enabled_irq_mask = ~0; 201 + 202 + err = drmm_add_action_or_reset(&xe->drm, xe_device_destroy, NULL); 203 + if (err) 204 + goto err_put; 205 + 206 + mutex_init(&xe->mem_access.lock); 207 + return xe; 208 + 209 + err_put: 210 + drm_dev_put(&xe->drm); 211 + 212 + return ERR_PTR(err); 213 + } 214 + 215 + int xe_device_probe(struct xe_device *xe) 216 + { 217 + struct xe_gt *gt; 218 + int err; 219 + u8 id; 220 + 221 + xe->info.mem_region_mask = 1; 222 + 223 + for_each_gt(gt, xe, id) { 224 + err = xe_gt_alloc(xe, gt); 225 + if (err) 226 + return err; 227 + } 228 + 229 + err = xe_mmio_init(xe); 230 + if (err) 231 + return err; 232 + 233 + for_each_gt(gt, xe, id) { 234 + err = xe_pcode_probe(gt); 235 + if (err) 236 + return err; 237 + } 238 + 239 + err = xe_irq_install(xe); 240 + if (err) 241 + return err; 242 + 243 + for_each_gt(gt, xe, id) { 244 + err = xe_gt_init_early(gt); 245 + if (err) 246 + goto err_irq_shutdown; 247 + } 248 + 249 + err = xe_mmio_probe_vram(xe); 250 + if (err) 251 + goto err_irq_shutdown; 252 + 253 + for_each_gt(gt, xe, id) { 254 + err = xe_gt_init_noalloc(gt); 255 + if (err) 256 + goto err_irq_shutdown; 257 + } 258 + 259 + for_each_gt(gt, xe, id) { 260 + err = xe_gt_init(gt); 261 + if (err) 262 + goto err_irq_shutdown; 263 + } 264 + 265 + err = drm_dev_register(&xe->drm, 0); 266 + if (err) 267 + goto err_irq_shutdown; 268 + 269 + xe_debugfs_register(xe); 270 + 271 + return 0; 272 + 273 + err_irq_shutdown: 274 + xe_irq_shutdown(xe); 275 + return err; 276 + } 277 + 278 + void xe_device_remove(struct xe_device *xe) 279 + { 280 + xe_irq_shutdown(xe); 281 + } 282 + 283 + void xe_device_shutdown(struct xe_device *xe) 284 + { 285 + } 286 + 287 + void xe_device_add_persitent_engines(struct xe_device *xe, struct xe_engine *e) 288 + { 289 + mutex_lock(&xe->persitent_engines.lock); 290 + list_add_tail(&e->persitent.link, &xe->persitent_engines.list); 291 + mutex_unlock(&xe->persitent_engines.lock); 292 + } 293 + 294 + void xe_device_remove_persitent_engines(struct xe_device *xe, 295 + struct xe_engine *e) 296 + { 297 + mutex_lock(&xe->persitent_engines.lock); 298 + if (!list_empty(&e->persitent.link)) 299 + list_del(&e->persitent.link); 300 + mutex_unlock(&xe->persitent_engines.lock); 301 + } 302 + 303 + static void device_kill_persitent_engines(struct xe_device *xe, 304 + struct xe_file *xef) 305 + { 306 + struct xe_engine *e, *next; 307 + 308 + mutex_lock(&xe->persitent_engines.lock); 309 + list_for_each_entry_safe(e, next, &xe->persitent_engines.list, 310 + persitent.link) 311 + if (e->persitent.xef == xef) { 312 + xe_engine_kill(e); 313 + list_del_init(&e->persitent.link); 314 + } 315 + mutex_unlock(&xe->persitent_engines.lock); 316 + } 317 + 318 + #define SOFTWARE_FLAGS_SPR33 _MMIO(0x4F084) 319 + 320 + void xe_device_wmb(struct xe_device *xe) 321 + { 322 + struct xe_gt *gt = xe_device_get_gt(xe, 0); 323 + 324 + wmb(); 325 + if (IS_DGFX(xe)) 326 + xe_mmio_write32(gt, SOFTWARE_FLAGS_SPR33.reg, 0); 327 + } 328 + 329 + u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size) 330 + { 331 + return xe_device_has_flat_ccs(xe) ? 332 + DIV_ROUND_UP(size, NUM_BYTES_PER_CCS_BYTE) : 0; 333 + } 334 + 335 + void xe_device_mem_access_get(struct xe_device *xe) 336 + { 337 + bool resumed = xe_pm_runtime_resume_if_suspended(xe); 338 + 339 + mutex_lock(&xe->mem_access.lock); 340 + if (xe->mem_access.ref++ == 0) 341 + xe->mem_access.hold_rpm = xe_pm_runtime_get_if_active(xe); 342 + mutex_unlock(&xe->mem_access.lock); 343 + 344 + /* The usage counter increased if device was immediately resumed */ 345 + if (resumed) 346 + xe_pm_runtime_put(xe); 347 + 348 + XE_WARN_ON(xe->mem_access.ref == U32_MAX); 349 + } 350 + 351 + void xe_device_mem_access_put(struct xe_device *xe) 352 + { 353 + mutex_lock(&xe->mem_access.lock); 354 + if (--xe->mem_access.ref == 0 && xe->mem_access.hold_rpm) 355 + xe_pm_runtime_put(xe); 356 + mutex_unlock(&xe->mem_access.lock); 357 + 358 + XE_WARN_ON(xe->mem_access.ref < 0); 359 + }

+126

drivers/gpu/drm/xe/xe_device.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2021 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_DEVICE_H_ 7 + #define _XE_DEVICE_H_ 8 + 9 + struct xe_engine; 10 + struct xe_file; 11 + 12 + #include <drm/drm_util.h> 13 + 14 + #include "xe_device_types.h" 15 + #include "xe_macros.h" 16 + #include "xe_force_wake.h" 17 + 18 + #include "gt/intel_gpu_commands.h" 19 + 20 + static inline struct xe_device *to_xe_device(const struct drm_device *dev) 21 + { 22 + return container_of(dev, struct xe_device, drm); 23 + } 24 + 25 + static inline struct xe_device *pdev_to_xe_device(struct pci_dev *pdev) 26 + { 27 + return pci_get_drvdata(pdev); 28 + } 29 + 30 + static inline struct xe_device *ttm_to_xe_device(struct ttm_device *ttm) 31 + { 32 + return container_of(ttm, struct xe_device, ttm); 33 + } 34 + 35 + struct xe_device *xe_device_create(struct pci_dev *pdev, 36 + const struct pci_device_id *ent); 37 + int xe_device_probe(struct xe_device *xe); 38 + void xe_device_remove(struct xe_device *xe); 39 + void xe_device_shutdown(struct xe_device *xe); 40 + 41 + void xe_device_add_persitent_engines(struct xe_device *xe, struct xe_engine *e); 42 + void xe_device_remove_persitent_engines(struct xe_device *xe, 43 + struct xe_engine *e); 44 + 45 + void xe_device_wmb(struct xe_device *xe); 46 + 47 + static inline struct xe_file *to_xe_file(const struct drm_file *file) 48 + { 49 + return file->driver_priv; 50 + } 51 + 52 + static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id) 53 + { 54 + struct xe_gt *gt; 55 + 56 + XE_BUG_ON(gt_id > XE_MAX_GT); 57 + gt = xe->gt + gt_id; 58 + XE_BUG_ON(gt->info.id != gt_id); 59 + XE_BUG_ON(gt->info.type == XE_GT_TYPE_UNINITIALIZED); 60 + 61 + return gt; 62 + } 63 + 64 + /* 65 + * FIXME: Placeholder until multi-gt lands. Once that lands, kill this function. 66 + */ 67 + static inline struct xe_gt *to_gt(struct xe_device *xe) 68 + { 69 + return xe->gt; 70 + } 71 + 72 + static inline bool xe_device_guc_submission_enabled(struct xe_device *xe) 73 + { 74 + return xe->info.enable_guc; 75 + } 76 + 77 + static inline void xe_device_guc_submission_disable(struct xe_device *xe) 78 + { 79 + xe->info.enable_guc = false; 80 + } 81 + 82 + #define for_each_gt(gt__, xe__, id__) \ 83 + for ((id__) = 0; (id__) < (xe__)->info.tile_count; (id__++)) \ 84 + for_each_if ((gt__) = xe_device_get_gt((xe__), (id__))) 85 + 86 + static inline struct xe_force_wake * gt_to_fw(struct xe_gt *gt) 87 + { 88 + return &gt->mmio.fw; 89 + } 90 + 91 + void xe_device_mem_access_get(struct xe_device *xe); 92 + void xe_device_mem_access_put(struct xe_device *xe); 93 + 94 + static inline void xe_device_assert_mem_access(struct xe_device *xe) 95 + { 96 + XE_WARN_ON(!xe->mem_access.ref); 97 + } 98 + 99 + static inline bool xe_device_mem_access_ongoing(struct xe_device *xe) 100 + { 101 + bool ret; 102 + 103 + mutex_lock(&xe->mem_access.lock); 104 + ret = xe->mem_access.ref; 105 + mutex_unlock(&xe->mem_access.lock); 106 + 107 + return ret; 108 + } 109 + 110 + static inline bool xe_device_in_fault_mode(struct xe_device *xe) 111 + { 112 + return xe->usm.num_vm_in_fault_mode != 0; 113 + } 114 + 115 + static inline bool xe_device_in_non_fault_mode(struct xe_device *xe) 116 + { 117 + return xe->usm.num_vm_in_non_fault_mode != 0; 118 + } 119 + 120 + static inline bool xe_device_has_flat_ccs(struct xe_device *xe) 121 + { 122 + return xe->info.has_flat_ccs; 123 + } 124 + 125 + u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size); 126 + #endif

+214

drivers/gpu/drm/xe/xe_device_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_DEVICE_TYPES_H_ 7 + #define _XE_DEVICE_TYPES_H_ 8 + 9 + #include <linux/pci.h> 10 + 11 + #include <drm/drm_device.h> 12 + #include <drm/drm_file.h> 13 + #include <drm/ttm/ttm_device.h> 14 + 15 + #include "xe_gt_types.h" 16 + #include "xe_platform_types.h" 17 + #include "xe_step_types.h" 18 + 19 + #define XE_BO_INVALID_OFFSET LONG_MAX 20 + 21 + #define GRAPHICS_VER(xe) ((xe)->info.graphics_verx100 / 100) 22 + #define MEDIA_VER(xe) ((xe)->info.media_verx100 / 100) 23 + #define GRAPHICS_VERx100(xe) ((xe)->info.graphics_verx100) 24 + #define MEDIA_VERx100(xe) ((xe)->info.media_verx100) 25 + #define IS_DGFX(xe) ((xe)->info.is_dgfx) 26 + 27 + #define XE_VRAM_FLAGS_NEED64K BIT(0) 28 + 29 + #define XE_GT0 0 30 + #define XE_GT1 1 31 + #define XE_MAX_GT (XE_GT1 + 1) 32 + 33 + #define XE_MAX_ASID (BIT(20)) 34 + 35 + #define IS_PLATFORM_STEP(_xe, _platform, min_step, max_step) \ 36 + ((_xe)->info.platform == (_platform) && \ 37 + (_xe)->info.step.graphics >= (min_step) && \ 38 + (_xe)->info.step.graphics < (max_step)) 39 + #define IS_SUBPLATFORM_STEP(_xe, _platform, sub, min_step, max_step) \ 40 + ((_xe)->info.platform == (_platform) && \ 41 + (_xe)->info.subplatform == (sub) && \ 42 + (_xe)->info.step.graphics >= (min_step) && \ 43 + (_xe)->info.step.graphics < (max_step)) 44 + 45 + /** 46 + * struct xe_device - Top level struct of XE device 47 + */ 48 + struct xe_device { 49 + /** @drm: drm device */ 50 + struct drm_device drm; 51 + 52 + /** @info: device info */ 53 + struct intel_device_info { 54 + /** @graphics_verx100: graphics IP version */ 55 + u32 graphics_verx100; 56 + /** @media_verx100: media IP version */ 57 + u32 media_verx100; 58 + /** @mem_region_mask: mask of valid memory regions */ 59 + u32 mem_region_mask; 60 + /** @is_dgfx: is discrete device */ 61 + bool is_dgfx; 62 + /** @platform: XE platform enum */ 63 + enum xe_platform platform; 64 + /** @subplatform: XE subplatform enum */ 65 + enum xe_subplatform subplatform; 66 + /** @devid: device ID */ 67 + u16 devid; 68 + /** @revid: device revision */ 69 + u8 revid; 70 + /** @step: stepping information for each IP */ 71 + struct xe_step_info step; 72 + /** @dma_mask_size: DMA address bits */ 73 + u8 dma_mask_size; 74 + /** @vram_flags: Vram flags */ 75 + u8 vram_flags; 76 + /** @tile_count: Number of tiles */ 77 + u8 tile_count; 78 + /** @vm_max_level: Max VM level */ 79 + u8 vm_max_level; 80 + /** @media_ver: Media version */ 81 + u8 media_ver; 82 + /** @supports_usm: Supports unified shared memory */ 83 + bool supports_usm; 84 + /** @enable_guc: GuC submission enabled */ 85 + bool enable_guc; 86 + /** @has_flat_ccs: Whether flat CCS metadata is used */ 87 + bool has_flat_ccs; 88 + /** @has_4tile: Whether tile-4 tiling is supported */ 89 + bool has_4tile; 90 + } info; 91 + 92 + /** @irq: device interrupt state */ 93 + struct { 94 + /** @lock: lock for processing irq's on this device */ 95 + spinlock_t lock; 96 + 97 + /** @enabled: interrupts enabled on this device */ 98 + bool enabled; 99 + } irq; 100 + 101 + /** @ttm: ttm device */ 102 + struct ttm_device ttm; 103 + 104 + /** @mmio: mmio info for device */ 105 + struct { 106 + /** @size: size of MMIO space for device */ 107 + size_t size; 108 + /** @regs: pointer to MMIO space for device */ 109 + void *regs; 110 + } mmio; 111 + 112 + /** @mem: memory info for device */ 113 + struct { 114 + /** @vram: VRAM info for device */ 115 + struct { 116 + /** @io_start: start address of VRAM */ 117 + resource_size_t io_start; 118 + /** @size: size of VRAM */ 119 + resource_size_t size; 120 + /** @mapping: pointer to VRAM mappable space */ 121 + void *__iomem mapping; 122 + } vram; 123 + } mem; 124 + 125 + /** @usm: unified memory state */ 126 + struct { 127 + /** @asid: convert a ASID to VM */ 128 + struct xarray asid_to_vm; 129 + /** @next_asid: next ASID, used to cyclical alloc asids */ 130 + u32 next_asid; 131 + /** @num_vm_in_fault_mode: number of VM in fault mode */ 132 + u32 num_vm_in_fault_mode; 133 + /** @num_vm_in_non_fault_mode: number of VM in non-fault mode */ 134 + u32 num_vm_in_non_fault_mode; 135 + /** @lock: protects UM state */ 136 + struct mutex lock; 137 + } usm; 138 + 139 + /** @persitent_engines: engines that are closed but still running */ 140 + struct { 141 + /** @lock: protects persitent engines */ 142 + struct mutex lock; 143 + /** @list: list of persitent engines */ 144 + struct list_head list; 145 + } persitent_engines; 146 + 147 + /** @pinned: pinned BO state */ 148 + struct { 149 + /** @lock: protected pinned BO list state */ 150 + spinlock_t lock; 151 + /** @evicted: pinned kernel BO that are present */ 152 + struct list_head kernel_bo_present; 153 + /** @evicted: pinned BO that have been evicted */ 154 + struct list_head evicted; 155 + /** @external_vram: pinned external BO in vram*/ 156 + struct list_head external_vram; 157 + } pinned; 158 + 159 + /** @ufence_wq: user fence wait queue */ 160 + wait_queue_head_t ufence_wq; 161 + 162 + /** @ordered_wq: used to serialize compute mode resume */ 163 + struct workqueue_struct *ordered_wq; 164 + 165 + /** @gt: graphics tile */ 166 + struct xe_gt gt[XE_MAX_GT]; 167 + 168 + /** 169 + * @mem_access: keep track of memory access in the device, possibly 170 + * triggering additional actions when they occur. 171 + */ 172 + struct { 173 + /** @lock: protect the ref count */ 174 + struct mutex lock; 175 + /** @ref: ref count of memory accesses */ 176 + u32 ref; 177 + /** @hold_rpm: need to put rpm ref back at the end */ 178 + bool hold_rpm; 179 + } mem_access; 180 + 181 + /** @d3cold_allowed: Indicates if d3cold is a valid device state */ 182 + bool d3cold_allowed; 183 + 184 + /* For pcode */ 185 + struct mutex sb_lock; 186 + 187 + u32 enabled_irq_mask; 188 + }; 189 + 190 + /** 191 + * struct xe_file - file handle for XE driver 192 + */ 193 + struct xe_file { 194 + /** @drm: base DRM file */ 195 + struct drm_file *drm; 196 + 197 + /** @vm: VM state for file */ 198 + struct { 199 + /** @xe: xarray to store VMs */ 200 + struct xarray xa; 201 + /** @lock: protects file VM state */ 202 + struct mutex lock; 203 + } vm; 204 + 205 + /** @engine: Submission engine state for file */ 206 + struct { 207 + /** @xe: xarray to store engines */ 208 + struct xarray xa; 209 + /** @lock: protects file engine state */ 210 + struct mutex lock; 211 + } engine; 212 + }; 213 + 214 + #endif

+307

drivers/gpu/drm/xe/xe_dma_buf.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include <linux/dma-buf.h> 7 + 8 + #include <drm/drm_device.h> 9 + #include <drm/drm_prime.h> 10 + 11 + #include <drm/ttm/ttm_tt.h> 12 + 13 + #include <kunit/test.h> 14 + #include <linux/pci-p2pdma.h> 15 + 16 + #include "tests/xe_test.h" 17 + #include "xe_bo.h" 18 + #include "xe_device.h" 19 + #include "xe_dma_buf.h" 20 + #include "xe_ttm_vram_mgr.h" 21 + #include "xe_vm.h" 22 + 23 + MODULE_IMPORT_NS(DMA_BUF); 24 + 25 + static int xe_dma_buf_attach(struct dma_buf *dmabuf, 26 + struct dma_buf_attachment *attach) 27 + { 28 + struct drm_gem_object *obj = attach->dmabuf->priv; 29 + 30 + if (attach->peer2peer && 31 + pci_p2pdma_distance(to_pci_dev(obj->dev->dev), attach->dev, false) < 0) 32 + attach->peer2peer = false; 33 + 34 + if (!attach->peer2peer && !xe_bo_can_migrate(gem_to_xe_bo(obj), XE_PL_TT)) 35 + return -EOPNOTSUPP; 36 + 37 + xe_device_mem_access_get(to_xe_device(obj->dev)); 38 + return 0; 39 + } 40 + 41 + static void xe_dma_buf_detach(struct dma_buf *dmabuf, 42 + struct dma_buf_attachment *attach) 43 + { 44 + struct drm_gem_object *obj = attach->dmabuf->priv; 45 + 46 + xe_device_mem_access_put(to_xe_device(obj->dev)); 47 + } 48 + 49 + static int xe_dma_buf_pin(struct dma_buf_attachment *attach) 50 + { 51 + struct drm_gem_object *obj = attach->dmabuf->priv; 52 + struct xe_bo *bo = gem_to_xe_bo(obj); 53 + 54 + /* 55 + * Migrate to TT first to increase the chance of non-p2p clients 56 + * can attach. 57 + */ 58 + (void)xe_bo_migrate(bo, XE_PL_TT); 59 + xe_bo_pin_external(bo); 60 + 61 + return 0; 62 + } 63 + 64 + static void xe_dma_buf_unpin(struct dma_buf_attachment *attach) 65 + { 66 + struct drm_gem_object *obj = attach->dmabuf->priv; 67 + struct xe_bo *bo = gem_to_xe_bo(obj); 68 + 69 + xe_bo_unpin_external(bo); 70 + } 71 + 72 + static struct sg_table *xe_dma_buf_map(struct dma_buf_attachment *attach, 73 + enum dma_data_direction dir) 74 + { 75 + struct dma_buf *dma_buf = attach->dmabuf; 76 + struct drm_gem_object *obj = dma_buf->priv; 77 + struct xe_bo *bo = gem_to_xe_bo(obj); 78 + struct sg_table *sgt; 79 + int r = 0; 80 + 81 + if (!attach->peer2peer && !xe_bo_can_migrate(bo, XE_PL_TT)) 82 + return ERR_PTR(-EOPNOTSUPP); 83 + 84 + if (!xe_bo_is_pinned(bo)) { 85 + if (!attach->peer2peer || 86 + bo->ttm.resource->mem_type == XE_PL_SYSTEM) { 87 + if (xe_bo_can_migrate(bo, XE_PL_TT)) 88 + r = xe_bo_migrate(bo, XE_PL_TT); 89 + else 90 + r = xe_bo_validate(bo, NULL, false); 91 + } 92 + if (r) 93 + return ERR_PTR(r); 94 + } 95 + 96 + switch (bo->ttm.resource->mem_type) { 97 + case XE_PL_TT: 98 + sgt = drm_prime_pages_to_sg(obj->dev, 99 + bo->ttm.ttm->pages, 100 + bo->ttm.ttm->num_pages); 101 + if (IS_ERR(sgt)) 102 + return sgt; 103 + 104 + if (dma_map_sgtable(attach->dev, sgt, dir, 105 + DMA_ATTR_SKIP_CPU_SYNC)) 106 + goto error_free; 107 + break; 108 + 109 + case XE_PL_VRAM0: 110 + case XE_PL_VRAM1: 111 + r = xe_ttm_vram_mgr_alloc_sgt(xe_bo_device(bo), 112 + bo->ttm.resource, 0, 113 + bo->ttm.base.size, attach->dev, 114 + dir, &sgt); 115 + if (r) 116 + return ERR_PTR(r); 117 + break; 118 + default: 119 + return ERR_PTR(-EINVAL); 120 + } 121 + 122 + return sgt; 123 + 124 + error_free: 125 + sg_free_table(sgt); 126 + kfree(sgt); 127 + return ERR_PTR(-EBUSY); 128 + } 129 + 130 + static void xe_dma_buf_unmap(struct dma_buf_attachment *attach, 131 + struct sg_table *sgt, 132 + enum dma_data_direction dir) 133 + { 134 + struct dma_buf *dma_buf = attach->dmabuf; 135 + struct xe_bo *bo = gem_to_xe_bo(dma_buf->priv); 136 + 137 + if (!xe_bo_is_vram(bo)) { 138 + dma_unmap_sgtable(attach->dev, sgt, dir, 0); 139 + sg_free_table(sgt); 140 + kfree(sgt); 141 + } else { 142 + xe_ttm_vram_mgr_free_sgt(attach->dev, dir, sgt); 143 + } 144 + } 145 + 146 + static int xe_dma_buf_begin_cpu_access(struct dma_buf *dma_buf, 147 + enum dma_data_direction direction) 148 + { 149 + struct drm_gem_object *obj = dma_buf->priv; 150 + struct xe_bo *bo = gem_to_xe_bo(obj); 151 + bool reads = (direction == DMA_BIDIRECTIONAL || 152 + direction == DMA_FROM_DEVICE); 153 + 154 + if (!reads) 155 + return 0; 156 + 157 + xe_bo_lock_no_vm(bo, NULL); 158 + (void)xe_bo_migrate(bo, XE_PL_TT); 159 + xe_bo_unlock_no_vm(bo); 160 + 161 + return 0; 162 + } 163 + 164 + const struct dma_buf_ops xe_dmabuf_ops = { 165 + .attach = xe_dma_buf_attach, 166 + .detach = xe_dma_buf_detach, 167 + .pin = xe_dma_buf_pin, 168 + .unpin = xe_dma_buf_unpin, 169 + .map_dma_buf = xe_dma_buf_map, 170 + .unmap_dma_buf = xe_dma_buf_unmap, 171 + .release = drm_gem_dmabuf_release, 172 + .begin_cpu_access = xe_dma_buf_begin_cpu_access, 173 + .mmap = drm_gem_dmabuf_mmap, 174 + .vmap = drm_gem_dmabuf_vmap, 175 + .vunmap = drm_gem_dmabuf_vunmap, 176 + }; 177 + 178 + struct dma_buf *xe_gem_prime_export(struct drm_gem_object *obj, int flags) 179 + { 180 + struct xe_bo *bo = gem_to_xe_bo(obj); 181 + struct dma_buf *buf; 182 + 183 + if (bo->vm) 184 + return ERR_PTR(-EPERM); 185 + 186 + buf = drm_gem_prime_export(obj, flags); 187 + if (!IS_ERR(buf)) 188 + buf->ops = &xe_dmabuf_ops; 189 + 190 + return buf; 191 + } 192 + 193 + static struct drm_gem_object * 194 + xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage, 195 + struct dma_buf *dma_buf) 196 + { 197 + struct dma_resv *resv = dma_buf->resv; 198 + struct xe_device *xe = to_xe_device(dev); 199 + struct xe_bo *bo; 200 + int ret; 201 + 202 + dma_resv_lock(resv, NULL); 203 + bo = __xe_bo_create_locked(xe, storage, NULL, resv, dma_buf->size, 204 + ttm_bo_type_sg, XE_BO_CREATE_SYSTEM_BIT); 205 + if (IS_ERR(bo)) { 206 + ret = PTR_ERR(bo); 207 + goto error; 208 + } 209 + dma_resv_unlock(resv); 210 + 211 + return &bo->ttm.base; 212 + 213 + error: 214 + dma_resv_unlock(resv); 215 + return ERR_PTR(ret); 216 + } 217 + 218 + static void xe_dma_buf_move_notify(struct dma_buf_attachment *attach) 219 + { 220 + struct drm_gem_object *obj = attach->importer_priv; 221 + struct xe_bo *bo = gem_to_xe_bo(obj); 222 + 223 + XE_WARN_ON(xe_bo_evict(bo, false)); 224 + } 225 + 226 + static const struct dma_buf_attach_ops xe_dma_buf_attach_ops = { 227 + .allow_peer2peer = true, 228 + .move_notify = xe_dma_buf_move_notify 229 + }; 230 + 231 + #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) 232 + 233 + struct dma_buf_test_params { 234 + struct xe_test_priv base; 235 + const struct dma_buf_attach_ops *attach_ops; 236 + bool force_different_devices; 237 + u32 mem_mask; 238 + }; 239 + 240 + #define to_dma_buf_test_params(_priv) \ 241 + container_of(_priv, struct dma_buf_test_params, base) 242 + #endif 243 + 244 + struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev, 245 + struct dma_buf *dma_buf) 246 + { 247 + XE_TEST_DECLARE(struct dma_buf_test_params *test = 248 + to_dma_buf_test_params 249 + (xe_cur_kunit_priv(XE_TEST_LIVE_DMA_BUF));) 250 + const struct dma_buf_attach_ops *attach_ops; 251 + struct dma_buf_attachment *attach; 252 + struct drm_gem_object *obj; 253 + struct xe_bo *bo; 254 + 255 + if (dma_buf->ops == &xe_dmabuf_ops) { 256 + obj = dma_buf->priv; 257 + if (obj->dev == dev && 258 + !XE_TEST_ONLY(test && test->force_different_devices)) { 259 + /* 260 + * Importing dmabuf exported from out own gem increases 261 + * refcount on gem itself instead of f_count of dmabuf. 262 + */ 263 + drm_gem_object_get(obj); 264 + return obj; 265 + } 266 + } 267 + 268 + /* 269 + * Don't publish the bo until we have a valid attachment, and a 270 + * valid attachment needs the bo address. So pre-create a bo before 271 + * creating the attachment and publish. 272 + */ 273 + bo = xe_bo_alloc(); 274 + if (IS_ERR(bo)) 275 + return ERR_CAST(bo); 276 + 277 + attach_ops = &xe_dma_buf_attach_ops; 278 + #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) 279 + if (test) 280 + attach_ops = test->attach_ops; 281 + #endif 282 + 283 + attach = dma_buf_dynamic_attach(dma_buf, dev->dev, attach_ops, &bo->ttm.base); 284 + if (IS_ERR(attach)) { 285 + obj = ERR_CAST(attach); 286 + goto out_err; 287 + } 288 + 289 + /* Errors here will take care of freeing the bo. */ 290 + obj = xe_dma_buf_init_obj(dev, bo, dma_buf); 291 + if (IS_ERR(obj)) 292 + return obj; 293 + 294 + 295 + get_dma_buf(dma_buf); 296 + obj->import_attach = attach; 297 + return obj; 298 + 299 + out_err: 300 + xe_bo_free(bo); 301 + 302 + return obj; 303 + } 304 + 305 + #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) 306 + #include "tests/xe_dma_buf.c" 307 + #endif

+15

drivers/gpu/drm/xe/xe_dma_buf.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_DMA_BUF_H_ 7 + #define _XE_DMA_BUF_H_ 8 + 9 + #include <drm/drm_gem.h> 10 + 11 + struct dma_buf *xe_gem_prime_export(struct drm_gem_object *obj, int flags); 12 + struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev, 13 + struct dma_buf *dma_buf); 14 + 15 + #endif

+24

drivers/gpu/drm/xe/xe_drv.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2021 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_DRV_H_ 7 + #define _XE_DRV_H_ 8 + 9 + #include <drm/drm_drv.h> 10 + 11 + #define DRIVER_NAME "xe" 12 + #define DRIVER_DESC "Intel Xe Graphics" 13 + #define DRIVER_DATE "20201103" 14 + #define DRIVER_TIMESTAMP 1604406085 15 + 16 + /* Interface history: 17 + * 18 + * 1.1: Original. 19 + */ 20 + #define DRIVER_MAJOR 1 21 + #define DRIVER_MINOR 1 22 + #define DRIVER_PATCHLEVEL 0 23 + 24 + #endif

+734

drivers/gpu/drm/xe/xe_engine.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2021 Intel Corporation 4 + */ 5 + 6 + #include "xe_engine.h" 7 + 8 + #include <drm/drm_device.h> 9 + #include <drm/drm_file.h> 10 + #include <drm/xe_drm.h> 11 + #include <linux/nospec.h> 12 + 13 + #include "xe_device.h" 14 + #include "xe_gt.h" 15 + #include "xe_lrc.h" 16 + #include "xe_macros.h" 17 + #include "xe_migrate.h" 18 + #include "xe_pm.h" 19 + #include "xe_trace.h" 20 + #include "xe_vm.h" 21 + 22 + static struct xe_engine *__xe_engine_create(struct xe_device *xe, 23 + struct xe_vm *vm, 24 + u32 logical_mask, 25 + u16 width, struct xe_hw_engine *hwe, 26 + u32 flags) 27 + { 28 + struct xe_engine *e; 29 + struct xe_gt *gt = hwe->gt; 30 + int err; 31 + int i; 32 + 33 + e = kzalloc(sizeof(*e) + sizeof(struct xe_lrc) * width, GFP_KERNEL); 34 + if (!e) 35 + return ERR_PTR(-ENOMEM); 36 + 37 + kref_init(&e->refcount); 38 + e->flags = flags; 39 + e->hwe = hwe; 40 + e->gt = gt; 41 + if (vm) 42 + e->vm = xe_vm_get(vm); 43 + e->class = hwe->class; 44 + e->width = width; 45 + e->logical_mask = logical_mask; 46 + e->fence_irq = &gt->fence_irq[hwe->class]; 47 + e->ring_ops = gt->ring_ops[hwe->class]; 48 + e->ops = gt->engine_ops; 49 + INIT_LIST_HEAD(&e->persitent.link); 50 + INIT_LIST_HEAD(&e->compute.link); 51 + INIT_LIST_HEAD(&e->multi_gt_link); 52 + 53 + /* FIXME: Wire up to configurable default value */ 54 + e->sched_props.timeslice_us = 1 * 1000; 55 + e->sched_props.preempt_timeout_us = 640 * 1000; 56 + 57 + if (xe_engine_is_parallel(e)) { 58 + e->parallel.composite_fence_ctx = dma_fence_context_alloc(1); 59 + e->parallel.composite_fence_seqno = 1; 60 + } 61 + if (e->flags & ENGINE_FLAG_VM) { 62 + e->bind.fence_ctx = dma_fence_context_alloc(1); 63 + e->bind.fence_seqno = 1; 64 + } 65 + 66 + for (i = 0; i < width; ++i) { 67 + err = xe_lrc_init(e->lrc + i, hwe, e, vm, SZ_16K); 68 + if (err) 69 + goto err_lrc; 70 + } 71 + 72 + err = e->ops->init(e); 73 + if (err) 74 + goto err_lrc; 75 + 76 + return e; 77 + 78 + err_lrc: 79 + for (i = i - 1; i >= 0; --i) 80 + xe_lrc_finish(e->lrc + i); 81 + kfree(e); 82 + return ERR_PTR(err); 83 + } 84 + 85 + struct xe_engine *xe_engine_create(struct xe_device *xe, struct xe_vm *vm, 86 + u32 logical_mask, u16 width, 87 + struct xe_hw_engine *hwe, u32 flags) 88 + { 89 + struct ww_acquire_ctx ww; 90 + struct xe_engine *e; 91 + int err; 92 + 93 + if (vm) { 94 + err = xe_vm_lock(vm, &ww, 0, true); 95 + if (err) 96 + return ERR_PTR(err); 97 + } 98 + e = __xe_engine_create(xe, vm, logical_mask, width, hwe, flags); 99 + if (vm) 100 + xe_vm_unlock(vm, &ww); 101 + 102 + return e; 103 + } 104 + 105 + struct xe_engine *xe_engine_create_class(struct xe_device *xe, struct xe_gt *gt, 106 + struct xe_vm *vm, 107 + enum xe_engine_class class, u32 flags) 108 + { 109 + struct xe_hw_engine *hwe, *hwe0 = NULL; 110 + enum xe_hw_engine_id id; 111 + u32 logical_mask = 0; 112 + 113 + for_each_hw_engine(hwe, gt, id) { 114 + if (xe_hw_engine_is_reserved(hwe)) 115 + continue; 116 + 117 + if (hwe->class == class) { 118 + logical_mask |= BIT(hwe->logical_instance); 119 + if (!hwe0) 120 + hwe0 = hwe; 121 + } 122 + } 123 + 124 + if (!logical_mask) 125 + return ERR_PTR(-ENODEV); 126 + 127 + return xe_engine_create(xe, vm, logical_mask, 1, hwe0, flags); 128 + } 129 + 130 + void xe_engine_destroy(struct kref *ref) 131 + { 132 + struct xe_engine *e = container_of(ref, struct xe_engine, refcount); 133 + struct xe_engine *engine, *next; 134 + 135 + if (!(e->flags & ENGINE_FLAG_BIND_ENGINE_CHILD)) { 136 + list_for_each_entry_safe(engine, next, &e->multi_gt_list, 137 + multi_gt_link) 138 + xe_engine_put(engine); 139 + } 140 + 141 + e->ops->fini(e); 142 + } 143 + 144 + void xe_engine_fini(struct xe_engine *e) 145 + { 146 + int i; 147 + 148 + for (i = 0; i < e->width; ++i) 149 + xe_lrc_finish(e->lrc + i); 150 + if (e->vm) 151 + xe_vm_put(e->vm); 152 + 153 + kfree(e); 154 + } 155 + 156 + struct xe_engine *xe_engine_lookup(struct xe_file *xef, u32 id) 157 + { 158 + struct xe_engine *e; 159 + 160 + mutex_lock(&xef->engine.lock); 161 + e = xa_load(&xef->engine.xa, id); 162 + mutex_unlock(&xef->engine.lock); 163 + 164 + if (e) 165 + xe_engine_get(e); 166 + 167 + return e; 168 + } 169 + 170 + static int engine_set_priority(struct xe_device *xe, struct xe_engine *e, 171 + u64 value, bool create) 172 + { 173 + if (XE_IOCTL_ERR(xe, value > XE_ENGINE_PRIORITY_HIGH)) 174 + return -EINVAL; 175 + 176 + if (XE_IOCTL_ERR(xe, value == XE_ENGINE_PRIORITY_HIGH && 177 + !capable(CAP_SYS_NICE))) 178 + return -EPERM; 179 + 180 + return e->ops->set_priority(e, value); 181 + } 182 + 183 + static int engine_set_timeslice(struct xe_device *xe, struct xe_engine *e, 184 + u64 value, bool create) 185 + { 186 + if (!capable(CAP_SYS_NICE)) 187 + return -EPERM; 188 + 189 + return e->ops->set_timeslice(e, value); 190 + } 191 + 192 + static int engine_set_preemption_timeout(struct xe_device *xe, 193 + struct xe_engine *e, u64 value, 194 + bool create) 195 + { 196 + if (!capable(CAP_SYS_NICE)) 197 + return -EPERM; 198 + 199 + return e->ops->set_preempt_timeout(e, value); 200 + } 201 + 202 + static int engine_set_compute_mode(struct xe_device *xe, struct xe_engine *e, 203 + u64 value, bool create) 204 + { 205 + if (XE_IOCTL_ERR(xe, !create)) 206 + return -EINVAL; 207 + 208 + if (XE_IOCTL_ERR(xe, e->flags & ENGINE_FLAG_COMPUTE_MODE)) 209 + return -EINVAL; 210 + 211 + if (XE_IOCTL_ERR(xe, e->flags & ENGINE_FLAG_VM)) 212 + return -EINVAL; 213 + 214 + if (value) { 215 + struct xe_vm *vm = e->vm; 216 + int err; 217 + 218 + if (XE_IOCTL_ERR(xe, xe_vm_in_fault_mode(vm))) 219 + return -EOPNOTSUPP; 220 + 221 + if (XE_IOCTL_ERR(xe, !xe_vm_in_compute_mode(vm))) 222 + return -EOPNOTSUPP; 223 + 224 + if (XE_IOCTL_ERR(xe, e->width != 1)) 225 + return -EINVAL; 226 + 227 + e->compute.context = dma_fence_context_alloc(1); 228 + spin_lock_init(&e->compute.lock); 229 + 230 + err = xe_vm_add_compute_engine(vm, e); 231 + if (XE_IOCTL_ERR(xe, err)) 232 + return err; 233 + 234 + e->flags |= ENGINE_FLAG_COMPUTE_MODE; 235 + e->flags &= ~ENGINE_FLAG_PERSISTENT; 236 + } 237 + 238 + return 0; 239 + } 240 + 241 + static int engine_set_persistence(struct xe_device *xe, struct xe_engine *e, 242 + u64 value, bool create) 243 + { 244 + if (XE_IOCTL_ERR(xe, !create)) 245 + return -EINVAL; 246 + 247 + if (XE_IOCTL_ERR(xe, e->flags & ENGINE_FLAG_COMPUTE_MODE)) 248 + return -EINVAL; 249 + 250 + if (value) 251 + e->flags |= ENGINE_FLAG_PERSISTENT; 252 + else 253 + e->flags &= ~ENGINE_FLAG_PERSISTENT; 254 + 255 + return 0; 256 + } 257 + 258 + static int engine_set_job_timeout(struct xe_device *xe, struct xe_engine *e, 259 + u64 value, bool create) 260 + { 261 + if (XE_IOCTL_ERR(xe, !create)) 262 + return -EINVAL; 263 + 264 + if (!capable(CAP_SYS_NICE)) 265 + return -EPERM; 266 + 267 + return e->ops->set_job_timeout(e, value); 268 + } 269 + 270 + static int engine_set_acc_trigger(struct xe_device *xe, struct xe_engine *e, 271 + u64 value, bool create) 272 + { 273 + if (XE_IOCTL_ERR(xe, !create)) 274 + return -EINVAL; 275 + 276 + if (XE_IOCTL_ERR(xe, !xe->info.supports_usm)) 277 + return -EINVAL; 278 + 279 + e->usm.acc_trigger = value; 280 + 281 + return 0; 282 + } 283 + 284 + static int engine_set_acc_notify(struct xe_device *xe, struct xe_engine *e, 285 + u64 value, bool create) 286 + { 287 + if (XE_IOCTL_ERR(xe, !create)) 288 + return -EINVAL; 289 + 290 + if (XE_IOCTL_ERR(xe, !xe->info.supports_usm)) 291 + return -EINVAL; 292 + 293 + e->usm.acc_notify = value; 294 + 295 + return 0; 296 + } 297 + 298 + static int engine_set_acc_granularity(struct xe_device *xe, struct xe_engine *e, 299 + u64 value, bool create) 300 + { 301 + if (XE_IOCTL_ERR(xe, !create)) 302 + return -EINVAL; 303 + 304 + if (XE_IOCTL_ERR(xe, !xe->info.supports_usm)) 305 + return -EINVAL; 306 + 307 + e->usm.acc_granularity = value; 308 + 309 + return 0; 310 + } 311 + 312 + typedef int (*xe_engine_set_property_fn)(struct xe_device *xe, 313 + struct xe_engine *e, 314 + u64 value, bool create); 315 + 316 + static const xe_engine_set_property_fn engine_set_property_funcs[] = { 317 + [XE_ENGINE_PROPERTY_PRIORITY] = engine_set_priority, 318 + [XE_ENGINE_PROPERTY_TIMESLICE] = engine_set_timeslice, 319 + [XE_ENGINE_PROPERTY_PREEMPTION_TIMEOUT] = engine_set_preemption_timeout, 320 + [XE_ENGINE_PROPERTY_COMPUTE_MODE] = engine_set_compute_mode, 321 + [XE_ENGINE_PROPERTY_PERSISTENCE] = engine_set_persistence, 322 + [XE_ENGINE_PROPERTY_JOB_TIMEOUT] = engine_set_job_timeout, 323 + [XE_ENGINE_PROPERTY_ACC_TRIGGER] = engine_set_acc_trigger, 324 + [XE_ENGINE_PROPERTY_ACC_NOTIFY] = engine_set_acc_notify, 325 + [XE_ENGINE_PROPERTY_ACC_GRANULARITY] = engine_set_acc_granularity, 326 + }; 327 + 328 + static int engine_user_ext_set_property(struct xe_device *xe, 329 + struct xe_engine *e, 330 + u64 extension, 331 + bool create) 332 + { 333 + u64 __user *address = u64_to_user_ptr(extension); 334 + struct drm_xe_ext_engine_set_property ext; 335 + int err; 336 + u32 idx; 337 + 338 + err = __copy_from_user(&ext, address, sizeof(ext)); 339 + if (XE_IOCTL_ERR(xe, err)) 340 + return -EFAULT; 341 + 342 + if (XE_IOCTL_ERR(xe, ext.property >= 343 + ARRAY_SIZE(engine_set_property_funcs))) 344 + return -EINVAL; 345 + 346 + idx = array_index_nospec(ext.property, ARRAY_SIZE(engine_set_property_funcs)); 347 + return engine_set_property_funcs[idx](xe, e, ext.value, create); 348 + } 349 + 350 + typedef int (*xe_engine_user_extension_fn)(struct xe_device *xe, 351 + struct xe_engine *e, 352 + u64 extension, 353 + bool create); 354 + 355 + static const xe_engine_set_property_fn engine_user_extension_funcs[] = { 356 + [XE_ENGINE_EXTENSION_SET_PROPERTY] = engine_user_ext_set_property, 357 + }; 358 + 359 + #define MAX_USER_EXTENSIONS 16 360 + static int engine_user_extensions(struct xe_device *xe, struct xe_engine *e, 361 + u64 extensions, int ext_number, bool create) 362 + { 363 + u64 __user *address = u64_to_user_ptr(extensions); 364 + struct xe_user_extension ext; 365 + int err; 366 + u32 idx; 367 + 368 + if (XE_IOCTL_ERR(xe, ext_number >= MAX_USER_EXTENSIONS)) 369 + return -E2BIG; 370 + 371 + err = __copy_from_user(&ext, address, sizeof(ext)); 372 + if (XE_IOCTL_ERR(xe, err)) 373 + return -EFAULT; 374 + 375 + if (XE_IOCTL_ERR(xe, ext.name >= 376 + ARRAY_SIZE(engine_user_extension_funcs))) 377 + return -EINVAL; 378 + 379 + idx = array_index_nospec(ext.name, 380 + ARRAY_SIZE(engine_user_extension_funcs)); 381 + err = engine_user_extension_funcs[idx](xe, e, extensions, create); 382 + if (XE_IOCTL_ERR(xe, err)) 383 + return err; 384 + 385 + if (ext.next_extension) 386 + return engine_user_extensions(xe, e, ext.next_extension, 387 + ++ext_number, create); 388 + 389 + return 0; 390 + } 391 + 392 + static const enum xe_engine_class user_to_xe_engine_class[] = { 393 + [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER, 394 + [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY, 395 + [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE, 396 + [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE, 397 + [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, 398 + }; 399 + 400 + static struct xe_hw_engine * 401 + find_hw_engine(struct xe_device *xe, 402 + struct drm_xe_engine_class_instance eci) 403 + { 404 + u32 idx; 405 + 406 + if (eci.engine_class > ARRAY_SIZE(user_to_xe_engine_class)) 407 + return NULL; 408 + 409 + if (eci.gt_id >= xe->info.tile_count) 410 + return NULL; 411 + 412 + idx = array_index_nospec(eci.engine_class, 413 + ARRAY_SIZE(user_to_xe_engine_class)); 414 + 415 + return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id), 416 + user_to_xe_engine_class[idx], 417 + eci.engine_instance, true); 418 + } 419 + 420 + static u32 bind_engine_logical_mask(struct xe_device *xe, struct xe_gt *gt, 421 + struct drm_xe_engine_class_instance *eci, 422 + u16 width, u16 num_placements) 423 + { 424 + struct xe_hw_engine *hwe; 425 + enum xe_hw_engine_id id; 426 + u32 logical_mask = 0; 427 + 428 + if (XE_IOCTL_ERR(xe, width != 1)) 429 + return 0; 430 + if (XE_IOCTL_ERR(xe, num_placements != 1)) 431 + return 0; 432 + if (XE_IOCTL_ERR(xe, eci[0].engine_instance != 0)) 433 + return 0; 434 + 435 + eci[0].engine_class = DRM_XE_ENGINE_CLASS_COPY; 436 + 437 + for_each_hw_engine(hwe, gt, id) { 438 + if (xe_hw_engine_is_reserved(hwe)) 439 + continue; 440 + 441 + if (hwe->class == 442 + user_to_xe_engine_class[DRM_XE_ENGINE_CLASS_COPY]) 443 + logical_mask |= BIT(hwe->logical_instance); 444 + } 445 + 446 + return logical_mask; 447 + } 448 + 449 + static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt, 450 + struct drm_xe_engine_class_instance *eci, 451 + u16 width, u16 num_placements) 452 + { 453 + int len = width * num_placements; 454 + int i, j, n; 455 + u16 class; 456 + u16 gt_id; 457 + u32 return_mask = 0, prev_mask; 458 + 459 + if (XE_IOCTL_ERR(xe, !xe_device_guc_submission_enabled(xe) && 460 + len > 1)) 461 + return 0; 462 + 463 + for (i = 0; i < width; ++i) { 464 + u32 current_mask = 0; 465 + 466 + for (j = 0; j < num_placements; ++j) { 467 + struct xe_hw_engine *hwe; 468 + 469 + n = j * width + i; 470 + 471 + hwe = find_hw_engine(xe, eci[n]); 472 + if (XE_IOCTL_ERR(xe, !hwe)) 473 + return 0; 474 + 475 + if (XE_IOCTL_ERR(xe, xe_hw_engine_is_reserved(hwe))) 476 + return 0; 477 + 478 + if (XE_IOCTL_ERR(xe, n && eci[n].gt_id != gt_id) || 479 + XE_IOCTL_ERR(xe, n && eci[n].engine_class != class)) 480 + return 0; 481 + 482 + class = eci[n].engine_class; 483 + gt_id = eci[n].gt_id; 484 + 485 + if (width == 1 || !i) 486 + return_mask |= BIT(eci[n].engine_instance); 487 + current_mask |= BIT(eci[n].engine_instance); 488 + } 489 + 490 + /* Parallel submissions must be logically contiguous */ 491 + if (i && XE_IOCTL_ERR(xe, current_mask != prev_mask << 1)) 492 + return 0; 493 + 494 + prev_mask = current_mask; 495 + } 496 + 497 + return return_mask; 498 + } 499 + 500 + int xe_engine_create_ioctl(struct drm_device *dev, void *data, 501 + struct drm_file *file) 502 + { 503 + struct xe_device *xe = to_xe_device(dev); 504 + struct xe_file *xef = to_xe_file(file); 505 + struct drm_xe_engine_create *args = data; 506 + struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE]; 507 + struct drm_xe_engine_class_instance __user *user_eci = 508 + u64_to_user_ptr(args->instances); 509 + struct xe_hw_engine *hwe; 510 + struct xe_vm *vm, *migrate_vm; 511 + struct xe_gt *gt; 512 + struct xe_engine *e = NULL; 513 + u32 logical_mask; 514 + u32 id; 515 + int len; 516 + int err; 517 + 518 + if (XE_IOCTL_ERR(xe, args->flags)) 519 + return -EINVAL; 520 + 521 + len = args->width * args->num_placements; 522 + if (XE_IOCTL_ERR(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE)) 523 + return -EINVAL; 524 + 525 + err = __copy_from_user(eci, user_eci, 526 + sizeof(struct drm_xe_engine_class_instance) * 527 + len); 528 + if (XE_IOCTL_ERR(xe, err)) 529 + return -EFAULT; 530 + 531 + if (XE_IOCTL_ERR(xe, eci[0].gt_id >= xe->info.tile_count)) 532 + return -EINVAL; 533 + 534 + xe_pm_runtime_get(xe); 535 + 536 + if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) { 537 + for_each_gt(gt, xe, id) { 538 + struct xe_engine *new; 539 + 540 + if (xe_gt_is_media_type(gt)) 541 + continue; 542 + 543 + eci[0].gt_id = gt->info.id; 544 + logical_mask = bind_engine_logical_mask(xe, gt, eci, 545 + args->width, 546 + args->num_placements); 547 + if (XE_IOCTL_ERR(xe, !logical_mask)) { 548 + err = -EINVAL; 549 + goto put_rpm; 550 + } 551 + 552 + hwe = find_hw_engine(xe, eci[0]); 553 + if (XE_IOCTL_ERR(xe, !hwe)) { 554 + err = -EINVAL; 555 + goto put_rpm; 556 + } 557 + 558 + migrate_vm = xe_migrate_get_vm(gt->migrate); 559 + new = xe_engine_create(xe, migrate_vm, logical_mask, 560 + args->width, hwe, 561 + ENGINE_FLAG_PERSISTENT | 562 + ENGINE_FLAG_VM | 563 + (id ? 564 + ENGINE_FLAG_BIND_ENGINE_CHILD : 565 + 0)); 566 + xe_vm_put(migrate_vm); 567 + if (IS_ERR(new)) { 568 + err = PTR_ERR(new); 569 + if (e) 570 + goto put_engine; 571 + goto put_rpm; 572 + } 573 + if (id == 0) 574 + e = new; 575 + else 576 + list_add_tail(&new->multi_gt_list, 577 + &e->multi_gt_link); 578 + } 579 + } else { 580 + gt = xe_device_get_gt(xe, eci[0].gt_id); 581 + logical_mask = calc_validate_logical_mask(xe, gt, eci, 582 + args->width, 583 + args->num_placements); 584 + if (XE_IOCTL_ERR(xe, !logical_mask)) { 585 + err = -EINVAL; 586 + goto put_rpm; 587 + } 588 + 589 + hwe = find_hw_engine(xe, eci[0]); 590 + if (XE_IOCTL_ERR(xe, !hwe)) { 591 + err = -EINVAL; 592 + goto put_rpm; 593 + } 594 + 595 + vm = xe_vm_lookup(xef, args->vm_id); 596 + if (XE_IOCTL_ERR(xe, !vm)) { 597 + err = -ENOENT; 598 + goto put_rpm; 599 + } 600 + 601 + e = xe_engine_create(xe, vm, logical_mask, 602 + args->width, hwe, ENGINE_FLAG_PERSISTENT); 603 + xe_vm_put(vm); 604 + if (IS_ERR(e)) { 605 + err = PTR_ERR(e); 606 + goto put_rpm; 607 + } 608 + } 609 + 610 + if (args->extensions) { 611 + err = engine_user_extensions(xe, e, args->extensions, 0, true); 612 + if (XE_IOCTL_ERR(xe, err)) 613 + goto put_engine; 614 + } 615 + 616 + if (XE_IOCTL_ERR(xe, e->vm && xe_vm_in_compute_mode(e->vm) != 617 + !!(e->flags & ENGINE_FLAG_COMPUTE_MODE))) { 618 + err = -ENOTSUPP; 619 + goto put_engine; 620 + } 621 + 622 + e->persitent.xef = xef; 623 + 624 + mutex_lock(&xef->engine.lock); 625 + err = xa_alloc(&xef->engine.xa, &id, e, xa_limit_32b, GFP_KERNEL); 626 + mutex_unlock(&xef->engine.lock); 627 + if (err) 628 + goto put_engine; 629 + 630 + args->engine_id = id; 631 + 632 + return 0; 633 + 634 + put_engine: 635 + xe_engine_kill(e); 636 + xe_engine_put(e); 637 + put_rpm: 638 + xe_pm_runtime_put(xe); 639 + return err; 640 + } 641 + 642 + static void engine_kill_compute(struct xe_engine *e) 643 + { 644 + if (!xe_vm_in_compute_mode(e->vm)) 645 + return; 646 + 647 + down_write(&e->vm->lock); 648 + list_del(&e->compute.link); 649 + --e->vm->preempt.num_engines; 650 + if (e->compute.pfence) { 651 + dma_fence_enable_sw_signaling(e->compute.pfence); 652 + dma_fence_put(e->compute.pfence); 653 + e->compute.pfence = NULL; 654 + } 655 + up_write(&e->vm->lock); 656 + } 657 + 658 + void xe_engine_kill(struct xe_engine *e) 659 + { 660 + struct xe_engine *engine = e, *next; 661 + 662 + list_for_each_entry_safe(engine, next, &engine->multi_gt_list, 663 + multi_gt_link) { 664 + e->ops->kill(engine); 665 + engine_kill_compute(engine); 666 + } 667 + 668 + e->ops->kill(e); 669 + engine_kill_compute(e); 670 + } 671 + 672 + int xe_engine_destroy_ioctl(struct drm_device *dev, void *data, 673 + struct drm_file *file) 674 + { 675 + struct xe_device *xe = to_xe_device(dev); 676 + struct xe_file *xef = to_xe_file(file); 677 + struct drm_xe_engine_destroy *args = data; 678 + struct xe_engine *e; 679 + 680 + if (XE_IOCTL_ERR(xe, args->pad)) 681 + return -EINVAL; 682 + 683 + mutex_lock(&xef->engine.lock); 684 + e = xa_erase(&xef->engine.xa, args->engine_id); 685 + mutex_unlock(&xef->engine.lock); 686 + if (XE_IOCTL_ERR(xe, !e)) 687 + return -ENOENT; 688 + 689 + if (!(e->flags & ENGINE_FLAG_PERSISTENT)) 690 + xe_engine_kill(e); 691 + else 692 + xe_device_add_persitent_engines(xe, e); 693 + 694 + trace_xe_engine_close(e); 695 + xe_engine_put(e); 696 + xe_pm_runtime_put(xe); 697 + 698 + return 0; 699 + } 700 + 701 + int xe_engine_set_property_ioctl(struct drm_device *dev, void *data, 702 + struct drm_file *file) 703 + { 704 + struct xe_device *xe = to_xe_device(dev); 705 + struct xe_file *xef = to_xe_file(file); 706 + struct drm_xe_engine_set_property *args = data; 707 + struct xe_engine *e; 708 + int ret; 709 + u32 idx; 710 + 711 + e = xe_engine_lookup(xef, args->engine_id); 712 + if (XE_IOCTL_ERR(xe, !e)) 713 + return -ENOENT; 714 + 715 + if (XE_IOCTL_ERR(xe, args->property >= 716 + ARRAY_SIZE(engine_set_property_funcs))) { 717 + ret = -EINVAL; 718 + goto out; 719 + } 720 + 721 + idx = array_index_nospec(args->property, 722 + ARRAY_SIZE(engine_set_property_funcs)); 723 + ret = engine_set_property_funcs[idx](xe, e, args->value, false); 724 + if (XE_IOCTL_ERR(xe, ret)) 725 + goto out; 726 + 727 + if (args->extensions) 728 + ret = engine_user_extensions(xe, e, args->extensions, 0, 729 + false); 730 + out: 731 + xe_engine_put(e); 732 + 733 + return ret; 734 + }

+54

drivers/gpu/drm/xe/xe_engine.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2021 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_ENGINE_H_ 7 + #define _XE_ENGINE_H_ 8 + 9 + #include "xe_engine_types.h" 10 + #include "xe_vm_types.h" 11 + 12 + struct drm_device; 13 + struct drm_file; 14 + struct xe_device; 15 + struct xe_file; 16 + 17 + struct xe_engine *xe_engine_create(struct xe_device *xe, struct xe_vm *vm, 18 + u32 logical_mask, u16 width, 19 + struct xe_hw_engine *hw_engine, u32 flags); 20 + struct xe_engine *xe_engine_create_class(struct xe_device *xe, struct xe_gt *gt, 21 + struct xe_vm *vm, 22 + enum xe_engine_class class, u32 flags); 23 + 24 + void xe_engine_fini(struct xe_engine *e); 25 + void xe_engine_destroy(struct kref *ref); 26 + 27 + struct xe_engine *xe_engine_lookup(struct xe_file *xef, u32 id); 28 + 29 + static inline struct xe_engine *xe_engine_get(struct xe_engine *engine) 30 + { 31 + kref_get(&engine->refcount); 32 + return engine; 33 + } 34 + 35 + static inline void xe_engine_put(struct xe_engine *engine) 36 + { 37 + kref_put(&engine->refcount, xe_engine_destroy); 38 + } 39 + 40 + static inline bool xe_engine_is_parallel(struct xe_engine *engine) 41 + { 42 + return engine->width > 1; 43 + } 44 + 45 + void xe_engine_kill(struct xe_engine *e); 46 + 47 + int xe_engine_create_ioctl(struct drm_device *dev, void *data, 48 + struct drm_file *file); 49 + int xe_engine_destroy_ioctl(struct drm_device *dev, void *data, 50 + struct drm_file *file); 51 + int xe_engine_set_property_ioctl(struct drm_device *dev, void *data, 52 + struct drm_file *file); 53 + 54 + #endif

+208

drivers/gpu/drm/xe/xe_engine_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_ENGINE_TYPES_H_ 7 + #define _XE_ENGINE_TYPES_H_ 8 + 9 + #include <linux/kref.h> 10 + 11 + #include <drm/gpu_scheduler.h> 12 + 13 + #include "xe_gpu_scheduler_types.h" 14 + #include "xe_hw_engine_types.h" 15 + #include "xe_hw_fence_types.h" 16 + #include "xe_lrc_types.h" 17 + 18 + struct xe_execlist_engine; 19 + struct xe_gt; 20 + struct xe_guc_engine; 21 + struct xe_hw_engine; 22 + struct xe_vm; 23 + 24 + enum xe_engine_priority { 25 + XE_ENGINE_PRIORITY_UNSET = -2, /* For execlist usage only */ 26 + XE_ENGINE_PRIORITY_LOW = 0, 27 + XE_ENGINE_PRIORITY_NORMAL, 28 + XE_ENGINE_PRIORITY_HIGH, 29 + XE_ENGINE_PRIORITY_KERNEL, 30 + 31 + XE_ENGINE_PRIORITY_COUNT 32 + }; 33 + 34 + /** 35 + * struct xe_engine - Submission engine 36 + * 37 + * Contains all state necessary for submissions. Can either be a user object or 38 + * a kernel object. 39 + */ 40 + struct xe_engine { 41 + /** @gt: graphics tile this engine can submit to */ 42 + struct xe_gt *gt; 43 + /** 44 + * @hwe: A hardware of the same class. May (physical engine) or may not 45 + * (virtual engine) be where jobs actual engine up running. Should never 46 + * really be used for submissions. 47 + */ 48 + struct xe_hw_engine *hwe; 49 + /** @refcount: ref count of this engine */ 50 + struct kref refcount; 51 + /** @vm: VM (address space) for this engine */ 52 + struct xe_vm *vm; 53 + /** @class: class of this engine */ 54 + enum xe_engine_class class; 55 + /** @priority: priority of this exec queue */ 56 + enum xe_engine_priority priority; 57 + /** 58 + * @logical_mask: logical mask of where job submitted to engine can run 59 + */ 60 + u32 logical_mask; 61 + /** @name: name of this engine */ 62 + char name[MAX_FENCE_NAME_LEN]; 63 + /** @width: width (number BB submitted per exec) of this engine */ 64 + u16 width; 65 + /** @fence_irq: fence IRQ used to signal job completion */ 66 + struct xe_hw_fence_irq *fence_irq; 67 + 68 + #define ENGINE_FLAG_BANNED BIT(0) 69 + #define ENGINE_FLAG_KERNEL BIT(1) 70 + #define ENGINE_FLAG_PERSISTENT BIT(2) 71 + #define ENGINE_FLAG_COMPUTE_MODE BIT(3) 72 + #define ENGINE_FLAG_VM BIT(4) 73 + #define ENGINE_FLAG_BIND_ENGINE_CHILD BIT(5) 74 + #define ENGINE_FLAG_WA BIT(6) 75 + 76 + /** 77 + * @flags: flags for this engine, should statically setup aside from ban 78 + * bit 79 + */ 80 + unsigned long flags; 81 + 82 + union { 83 + /** @multi_gt_list: list head for VM bind engines if multi-GT */ 84 + struct list_head multi_gt_list; 85 + /** @multi_gt_link: link for VM bind engines if multi-GT */ 86 + struct list_head multi_gt_link; 87 + }; 88 + 89 + union { 90 + /** @execlist: execlist backend specific state for engine */ 91 + struct xe_execlist_engine *execlist; 92 + /** @guc: GuC backend specific state for engine */ 93 + struct xe_guc_engine *guc; 94 + }; 95 + 96 + /** 97 + * @persitent: persitent engine state 98 + */ 99 + struct { 100 + /** @xef: file which this engine belongs to */ 101 + struct xe_file *xef; 102 + /** @link: link in list of persitent engines */ 103 + struct list_head link; 104 + } persitent; 105 + 106 + union { 107 + /** 108 + * @parallel: parallel submission state 109 + */ 110 + struct { 111 + /** @composite_fence_ctx: context composite fence */ 112 + u64 composite_fence_ctx; 113 + /** @composite_fence_seqno: seqno for composite fence */ 114 + u32 composite_fence_seqno; 115 + } parallel; 116 + /** 117 + * @bind: bind submission state 118 + */ 119 + struct { 120 + /** @fence_ctx: context bind fence */ 121 + u64 fence_ctx; 122 + /** @fence_seqno: seqno for bind fence */ 123 + u32 fence_seqno; 124 + } bind; 125 + }; 126 + 127 + /** @sched_props: scheduling properties */ 128 + struct { 129 + /** @timeslice_us: timeslice period in micro-seconds */ 130 + u32 timeslice_us; 131 + /** @preempt_timeout_us: preemption timeout in micro-seconds */ 132 + u32 preempt_timeout_us; 133 + } sched_props; 134 + 135 + /** @compute: compute engine state */ 136 + struct { 137 + /** @pfence: preemption fence */ 138 + struct dma_fence *pfence; 139 + /** @context: preemption fence context */ 140 + u64 context; 141 + /** @seqno: preemption fence seqno */ 142 + u32 seqno; 143 + /** @link: link into VM's list of engines */ 144 + struct list_head link; 145 + /** @lock: preemption fences lock */ 146 + spinlock_t lock; 147 + } compute; 148 + 149 + /** @usm: unified shared memory state */ 150 + struct { 151 + /** @acc_trigger: access counter trigger */ 152 + u32 acc_trigger; 153 + /** @acc_notify: access counter notify */ 154 + u32 acc_notify; 155 + /** @acc_granularity: access counter granularity */ 156 + u32 acc_granularity; 157 + } usm; 158 + 159 + /** @ops: submission backend engine operations */ 160 + const struct xe_engine_ops *ops; 161 + 162 + /** @ring_ops: ring operations for this engine */ 163 + const struct xe_ring_ops *ring_ops; 164 + /** @entity: DRM sched entity for this engine (1 to 1 relationship) */ 165 + struct drm_sched_entity *entity; 166 + /** @lrc: logical ring context for this engine */ 167 + struct xe_lrc lrc[0]; 168 + }; 169 + 170 + /** 171 + * struct xe_engine_ops - Submission backend engine operations 172 + */ 173 + struct xe_engine_ops { 174 + /** @init: Initialize engine for submission backend */ 175 + int (*init)(struct xe_engine *e); 176 + /** @kill: Kill inflight submissions for backend */ 177 + void (*kill)(struct xe_engine *e); 178 + /** @fini: Fini engine for submission backend */ 179 + void (*fini)(struct xe_engine *e); 180 + /** @set_priority: Set priority for engine */ 181 + int (*set_priority)(struct xe_engine *e, 182 + enum xe_engine_priority priority); 183 + /** @set_timeslice: Set timeslice for engine */ 184 + int (*set_timeslice)(struct xe_engine *e, u32 timeslice_us); 185 + /** @set_preempt_timeout: Set preemption timeout for engine */ 186 + int (*set_preempt_timeout)(struct xe_engine *e, u32 preempt_timeout_us); 187 + /** @set_job_timeout: Set job timeout for engine */ 188 + int (*set_job_timeout)(struct xe_engine *e, u32 job_timeout_ms); 189 + /** 190 + * @suspend: Suspend engine from executing, allowed to be called 191 + * multiple times in a row before resume with the caveat that 192 + * suspend_wait returns before calling suspend again. 193 + */ 194 + int (*suspend)(struct xe_engine *e); 195 + /** 196 + * @suspend_wait: Wait for an engine to suspend executing, should be 197 + * call after suspend. 198 + */ 199 + void (*suspend_wait)(struct xe_engine *e); 200 + /** 201 + * @resume: Resume engine execution, engine must be in a suspended 202 + * state and dma fence returned from most recent suspend call must be 203 + * signalled when this function is called. 204 + */ 205 + void (*resume)(struct xe_engine *e); 206 + }; 207 + 208 + #endif

+390

drivers/gpu/drm/xe/xe_exec.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include <drm/drm_device.h> 7 + #include <drm/drm_file.h> 8 + #include <drm/xe_drm.h> 9 + 10 + #include "xe_bo.h" 11 + #include "xe_device.h" 12 + #include "xe_engine.h" 13 + #include "xe_exec.h" 14 + #include "xe_macros.h" 15 + #include "xe_sched_job.h" 16 + #include "xe_sync.h" 17 + #include "xe_vm.h" 18 + 19 + /** 20 + * DOC: Execbuf (User GPU command submission) 21 + * 22 + * Execs have historically been rather complicated in DRM drivers (at least in 23 + * the i915) because a few things: 24 + * 25 + * - Passing in a list BO which are read / written to creating implicit syncs 26 + * - Binding at exec time 27 + * - Flow controlling the ring at exec time 28 + * 29 + * In XE we avoid all of this complication by not allowing a BO list to be 30 + * passed into an exec, using the dma-buf implicit sync uAPI, have binds as 31 + * seperate operations, and using the DRM scheduler to flow control the ring. 32 + * Let's deep dive on each of these. 33 + * 34 + * We can get away from a BO list by forcing the user to use in / out fences on 35 + * every exec rather than the kernel tracking dependencies of BO (e.g. if the 36 + * user knows an exec writes to a BO and reads from the BO in the next exec, it 37 + * is the user's responsibility to pass in / out fence between the two execs). 38 + * 39 + * Implicit dependencies for external BOs are handled by using the dma-buf 40 + * implicit dependency uAPI (TODO: add link). To make this works each exec must 41 + * install the job's fence into the DMA_RESV_USAGE_WRITE slot of every external 42 + * BO mapped in the VM. 43 + * 44 + * We do not allow a user to trigger a bind at exec time rather we have a VM 45 + * bind IOCTL which uses the same in / out fence interface as exec. In that 46 + * sense, a VM bind is basically the same operation as an exec from the user 47 + * perspective. e.g. If an exec depends on a VM bind use the in / out fence 48 + * interface (struct drm_xe_sync) to synchronize like syncing between two 49 + * dependent execs. 50 + * 51 + * Although a user cannot trigger a bind, we still have to rebind userptrs in 52 + * the VM that have been invalidated since the last exec, likewise we also have 53 + * to rebind BOs that have been evicted by the kernel. We schedule these rebinds 54 + * behind any pending kernel operations on any external BOs in VM or any BOs 55 + * private to the VM. This is accomplished by the rebinds waiting on BOs 56 + * DMA_RESV_USAGE_KERNEL slot (kernel ops) and kernel ops waiting on all BOs 57 + * slots (inflight execs are in the DMA_RESV_USAGE_BOOKING for private BOs and 58 + * in DMA_RESV_USAGE_WRITE for external BOs). 59 + * 60 + * Rebinds / dma-resv usage applies to non-compute mode VMs only as for compute 61 + * mode VMs we use preempt fences and a rebind worker (TODO: add link). 62 + * 63 + * There is no need to flow control the ring in the exec as we write the ring at 64 + * submission time and set the DRM scheduler max job limit SIZE_OF_RING / 65 + * MAX_JOB_SIZE. The DRM scheduler will then hold all jobs until space in the 66 + * ring is available. 67 + * 68 + * All of this results in a rather simple exec implementation. 69 + * 70 + * Flow 71 + * ~~~~ 72 + * 73 + * .. code-block:: 74 + * 75 + * Parse input arguments 76 + * Wait for any async VM bind passed as in-fences to start 77 + * <----------------------------------------------------------------------| 78 + * Lock global VM lock in read mode | 79 + * Pin userptrs (also finds userptr invalidated since last exec) | 80 + * Lock exec (VM dma-resv lock, external BOs dma-resv locks) | 81 + * Validate BOs that have been evicted | 82 + * Create job | 83 + * Rebind invalidated userptrs + evicted BOs (non-compute-mode) | 84 + * Add rebind fence dependency to job | 85 + * Add job VM dma-resv bookkeeping slot (non-compute mode) | 86 + * Add job to external BOs dma-resv write slots (non-compute mode) | 87 + * Check if any userptrs invalidated since pin ------ Drop locks ---------| 88 + * Install in / out fences for job 89 + * Submit job 90 + * Unlock all 91 + */ 92 + 93 + static int xe_exec_begin(struct xe_engine *e, struct ww_acquire_ctx *ww, 94 + struct ttm_validate_buffer tv_onstack[], 95 + struct ttm_validate_buffer **tv, 96 + struct list_head *objs) 97 + { 98 + struct xe_vm *vm = e->vm; 99 + struct xe_vma *vma; 100 + LIST_HEAD(dups); 101 + int err; 102 + 103 + *tv = NULL; 104 + if (xe_vm_no_dma_fences(e->vm)) 105 + return 0; 106 + 107 + err = xe_vm_lock_dma_resv(vm, ww, tv_onstack, tv, objs, true, 1); 108 + if (err) 109 + return err; 110 + 111 + /* 112 + * Validate BOs that have been evicted (i.e. make sure the 113 + * BOs have valid placements possibly moving an evicted BO back 114 + * to a location where the GPU can access it). 115 + */ 116 + list_for_each_entry(vma, &vm->rebind_list, rebind_link) { 117 + if (xe_vma_is_userptr(vma)) 118 + continue; 119 + 120 + err = xe_bo_validate(vma->bo, vm, false); 121 + if (err) { 122 + xe_vm_unlock_dma_resv(vm, tv_onstack, *tv, ww, objs); 123 + *tv = NULL; 124 + return err; 125 + } 126 + } 127 + 128 + return 0; 129 + } 130 + 131 + static void xe_exec_end(struct xe_engine *e, 132 + struct ttm_validate_buffer *tv_onstack, 133 + struct ttm_validate_buffer *tv, 134 + struct ww_acquire_ctx *ww, 135 + struct list_head *objs) 136 + { 137 + if (!xe_vm_no_dma_fences(e->vm)) 138 + xe_vm_unlock_dma_resv(e->vm, tv_onstack, tv, ww, objs); 139 + } 140 + 141 + int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 142 + { 143 + struct xe_device *xe = to_xe_device(dev); 144 + struct xe_file *xef = to_xe_file(file); 145 + struct drm_xe_exec *args = data; 146 + struct drm_xe_sync __user *syncs_user = u64_to_user_ptr(args->syncs); 147 + u64 __user *addresses_user = u64_to_user_ptr(args->address); 148 + struct xe_engine *engine; 149 + struct xe_sync_entry *syncs = NULL; 150 + u64 addresses[XE_HW_ENGINE_MAX_INSTANCE]; 151 + struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV]; 152 + struct ttm_validate_buffer *tv = NULL; 153 + u32 i, num_syncs = 0; 154 + struct xe_sched_job *job; 155 + struct dma_fence *rebind_fence; 156 + struct xe_vm *vm; 157 + struct ww_acquire_ctx ww; 158 + struct list_head objs; 159 + bool write_locked; 160 + int err = 0; 161 + 162 + if (XE_IOCTL_ERR(xe, args->extensions)) 163 + return -EINVAL; 164 + 165 + engine = xe_engine_lookup(xef, args->engine_id); 166 + if (XE_IOCTL_ERR(xe, !engine)) 167 + return -ENOENT; 168 + 169 + if (XE_IOCTL_ERR(xe, engine->flags & ENGINE_FLAG_VM)) 170 + return -EINVAL; 171 + 172 + if (XE_IOCTL_ERR(xe, engine->width != args->num_batch_buffer)) 173 + return -EINVAL; 174 + 175 + if (XE_IOCTL_ERR(xe, engine->flags & ENGINE_FLAG_BANNED)) { 176 + err = -ECANCELED; 177 + goto err_engine; 178 + } 179 + 180 + if (args->num_syncs) { 181 + syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL); 182 + if (!syncs) { 183 + err = -ENOMEM; 184 + goto err_engine; 185 + } 186 + } 187 + 188 + vm = engine->vm; 189 + 190 + for (i = 0; i < args->num_syncs; i++) { 191 + err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs++], 192 + &syncs_user[i], true, 193 + xe_vm_no_dma_fences(vm)); 194 + if (err) 195 + goto err_syncs; 196 + } 197 + 198 + if (xe_engine_is_parallel(engine)) { 199 + err = __copy_from_user(addresses, addresses_user, sizeof(u64) * 200 + engine->width); 201 + if (err) { 202 + err = -EFAULT; 203 + goto err_syncs; 204 + } 205 + } 206 + 207 + /* 208 + * We can't install a job into the VM dma-resv shared slot before an 209 + * async VM bind passed in as a fence without the risk of deadlocking as 210 + * the bind can trigger an eviction which in turn depends on anything in 211 + * the VM dma-resv shared slots. Not an ideal solution, but we wait for 212 + * all dependent async VM binds to start (install correct fences into 213 + * dma-resv slots) before moving forward. 214 + */ 215 + if (!xe_vm_no_dma_fences(vm) && 216 + vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS) { 217 + for (i = 0; i < args->num_syncs; i++) { 218 + struct dma_fence *fence = syncs[i].fence; 219 + if (fence) { 220 + err = xe_vm_async_fence_wait_start(fence); 221 + if (err) 222 + goto err_syncs; 223 + } 224 + } 225 + } 226 + 227 + retry: 228 + if (!xe_vm_no_dma_fences(vm) && xe_vm_userptr_check_repin(vm)) { 229 + err = down_write_killable(&vm->lock); 230 + write_locked = true; 231 + } else { 232 + /* We don't allow execs while the VM is in error state */ 233 + err = down_read_interruptible(&vm->lock); 234 + write_locked = false; 235 + } 236 + if (err) 237 + goto err_syncs; 238 + 239 + /* We don't allow execs while the VM is in error state */ 240 + if (vm->async_ops.error) { 241 + err = vm->async_ops.error; 242 + goto err_unlock_list; 243 + } 244 + 245 + /* 246 + * Extreme corner where we exit a VM error state with a munmap style VM 247 + * unbind inflight which requires a rebind. In this case the rebind 248 + * needs to install some fences into the dma-resv slots. The worker to 249 + * do this queued, let that worker make progress by dropping vm->lock, 250 + * flushing the worker and retrying the exec. 251 + */ 252 + if (vm->async_ops.munmap_rebind_inflight) { 253 + if (write_locked) 254 + up_write(&vm->lock); 255 + else 256 + up_read(&vm->lock); 257 + flush_work(&vm->async_ops.work); 258 + goto retry; 259 + } 260 + 261 + if (write_locked) { 262 + err = xe_vm_userptr_pin(vm); 263 + downgrade_write(&vm->lock); 264 + write_locked = false; 265 + if (err) 266 + goto err_unlock_list; 267 + } 268 + 269 + err = xe_exec_begin(engine, &ww, tv_onstack, &tv, &objs); 270 + if (err) 271 + goto err_unlock_list; 272 + 273 + if (xe_vm_is_closed(engine->vm)) { 274 + drm_warn(&xe->drm, "Trying to schedule after vm is closed\n"); 275 + err = -EIO; 276 + goto err_engine_end; 277 + } 278 + 279 + job = xe_sched_job_create(engine, xe_engine_is_parallel(engine) ? 280 + addresses : &args->address); 281 + if (IS_ERR(job)) { 282 + err = PTR_ERR(job); 283 + goto err_engine_end; 284 + } 285 + 286 + /* 287 + * Rebind any invalidated userptr or evicted BOs in the VM, non-compute 288 + * VM mode only. 289 + */ 290 + rebind_fence = xe_vm_rebind(vm, false); 291 + if (IS_ERR(rebind_fence)) { 292 + err = PTR_ERR(rebind_fence); 293 + goto err_put_job; 294 + } 295 + 296 + /* 297 + * We store the rebind_fence in the VM so subsequent execs don't get 298 + * scheduled before the rebinds of userptrs / evicted BOs is complete. 299 + */ 300 + if (rebind_fence) { 301 + dma_fence_put(vm->rebind_fence); 302 + vm->rebind_fence = rebind_fence; 303 + } 304 + if (vm->rebind_fence) { 305 + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, 306 + &vm->rebind_fence->flags)) { 307 + dma_fence_put(vm->rebind_fence); 308 + vm->rebind_fence = NULL; 309 + } else { 310 + dma_fence_get(vm->rebind_fence); 311 + err = drm_sched_job_add_dependency(&job->drm, 312 + vm->rebind_fence); 313 + if (err) 314 + goto err_put_job; 315 + } 316 + } 317 + 318 + /* Wait behind munmap style rebinds */ 319 + if (!xe_vm_no_dma_fences(vm)) { 320 + err = drm_sched_job_add_resv_dependencies(&job->drm, 321 + &vm->resv, 322 + DMA_RESV_USAGE_KERNEL); 323 + if (err) 324 + goto err_put_job; 325 + } 326 + 327 + for (i = 0; i < num_syncs && !err; i++) 328 + err = xe_sync_entry_add_deps(&syncs[i], job); 329 + if (err) 330 + goto err_put_job; 331 + 332 + if (!xe_vm_no_dma_fences(vm)) { 333 + err = down_read_interruptible(&vm->userptr.notifier_lock); 334 + if (err) 335 + goto err_put_job; 336 + 337 + err = __xe_vm_userptr_needs_repin(vm); 338 + if (err) 339 + goto err_repin; 340 + } 341 + 342 + /* 343 + * Point of no return, if we error after this point just set an error on 344 + * the job and let the DRM scheduler / backend clean up the job. 345 + */ 346 + xe_sched_job_arm(job); 347 + if (!xe_vm_no_dma_fences(vm)) { 348 + /* Block userptr invalidations / BO eviction */ 349 + dma_resv_add_fence(&vm->resv, 350 + &job->drm.s_fence->finished, 351 + DMA_RESV_USAGE_BOOKKEEP); 352 + 353 + /* 354 + * Make implicit sync work across drivers, assuming all external 355 + * BOs are written as we don't pass in a read / write list. 356 + */ 357 + xe_vm_fence_all_extobjs(vm, &job->drm.s_fence->finished, 358 + DMA_RESV_USAGE_WRITE); 359 + } 360 + 361 + for (i = 0; i < num_syncs; i++) 362 + xe_sync_entry_signal(&syncs[i], job, 363 + &job->drm.s_fence->finished); 364 + 365 + xe_sched_job_push(job); 366 + 367 + err_repin: 368 + if (!xe_vm_no_dma_fences(vm)) 369 + up_read(&vm->userptr.notifier_lock); 370 + err_put_job: 371 + if (err) 372 + xe_sched_job_put(job); 373 + err_engine_end: 374 + xe_exec_end(engine, tv_onstack, tv, &ww, &objs); 375 + err_unlock_list: 376 + if (write_locked) 377 + up_write(&vm->lock); 378 + else 379 + up_read(&vm->lock); 380 + if (err == -EAGAIN) 381 + goto retry; 382 + err_syncs: 383 + for (i = 0; i < num_syncs; i++) 384 + xe_sync_entry_cleanup(&syncs[i]); 385 + kfree(syncs); 386 + err_engine: 387 + xe_engine_put(engine); 388 + 389 + return err; 390 + }

+14

drivers/gpu/drm/xe/xe_exec.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_EXEC_H_ 7 + #define _XE_EXEC_H_ 8 + 9 + struct drm_device; 10 + struct drm_file; 11 + 12 + int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file); 13 + 14 + #endif

+489

drivers/gpu/drm/xe/xe_execlist.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2021 Intel Corporation 4 + */ 5 + 6 + #include <drm/drm_managed.h> 7 + 8 + #include "xe_execlist.h" 9 + 10 + #include "xe_bo.h" 11 + #include "xe_device.h" 12 + #include "xe_engine.h" 13 + #include "xe_hw_fence.h" 14 + #include "xe_gt.h" 15 + #include "xe_lrc.h" 16 + #include "xe_macros.h" 17 + #include "xe_mmio.h" 18 + #include "xe_mocs.h" 19 + #include "xe_ring_ops_types.h" 20 + #include "xe_sched_job.h" 21 + 22 + #include "i915_reg.h" 23 + #include "gt/intel_gpu_commands.h" 24 + #include "gt/intel_gt_regs.h" 25 + #include "gt/intel_lrc_reg.h" 26 + #include "gt/intel_engine_regs.h" 27 + 28 + #define XE_EXECLIST_HANG_LIMIT 1 29 + 30 + #define GEN11_SW_CTX_ID_SHIFT 37 31 + #define GEN11_SW_CTX_ID_WIDTH 11 32 + #define XEHP_SW_CTX_ID_SHIFT 39 33 + #define XEHP_SW_CTX_ID_WIDTH 16 34 + 35 + #define GEN11_SW_CTX_ID \ 36 + GENMASK_ULL(GEN11_SW_CTX_ID_WIDTH + GEN11_SW_CTX_ID_SHIFT - 1, \ 37 + GEN11_SW_CTX_ID_SHIFT) 38 + 39 + #define XEHP_SW_CTX_ID \ 40 + GENMASK_ULL(XEHP_SW_CTX_ID_WIDTH + XEHP_SW_CTX_ID_SHIFT - 1, \ 41 + XEHP_SW_CTX_ID_SHIFT) 42 + 43 + 44 + static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc, 45 + u32 ctx_id) 46 + { 47 + struct xe_gt *gt = hwe->gt; 48 + struct xe_device *xe = gt_to_xe(gt); 49 + u64 lrc_desc; 50 + 51 + printk(KERN_INFO "__start_lrc(%s, 0x%p, %u)\n", hwe->name, lrc, ctx_id); 52 + 53 + lrc_desc = xe_lrc_descriptor(lrc); 54 + 55 + if (GRAPHICS_VERx100(xe) >= 1250) { 56 + XE_BUG_ON(!FIELD_FIT(XEHP_SW_CTX_ID, ctx_id)); 57 + lrc_desc |= FIELD_PREP(XEHP_SW_CTX_ID, ctx_id); 58 + } else { 59 + XE_BUG_ON(!FIELD_FIT(GEN11_SW_CTX_ID, ctx_id)); 60 + lrc_desc |= FIELD_PREP(GEN11_SW_CTX_ID, ctx_id); 61 + } 62 + 63 + if (hwe->class == XE_ENGINE_CLASS_COMPUTE) 64 + xe_mmio_write32(hwe->gt, GEN12_RCU_MODE.reg, 65 + _MASKED_BIT_ENABLE(GEN12_RCU_MODE_CCS_ENABLE)); 66 + 67 + xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); 68 + lrc->ring.old_tail = lrc->ring.tail; 69 + 70 + /* 71 + * Make sure the context image is complete before we submit it to HW. 72 + * 73 + * Ostensibly, writes (including the WCB) should be flushed prior to 74 + * an uncached write such as our mmio register access, the empirical 75 + * evidence (esp. on Braswell) suggests that the WC write into memory 76 + * may not be visible to the HW prior to the completion of the UC 77 + * register write and that we may begin execution from the context 78 + * before its image is complete leading to invalid PD chasing. 79 + */ 80 + wmb(); 81 + 82 + xe_mmio_write32(gt, RING_HWS_PGA(hwe->mmio_base).reg, 83 + xe_bo_ggtt_addr(hwe->hwsp)); 84 + xe_mmio_read32(gt, RING_HWS_PGA(hwe->mmio_base).reg); 85 + xe_mmio_write32(gt, RING_MODE_GEN7(hwe->mmio_base).reg, 86 + _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); 87 + 88 + xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS(hwe->mmio_base).reg + 0, 89 + lower_32_bits(lrc_desc)); 90 + xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS(hwe->mmio_base).reg + 4, 91 + upper_32_bits(lrc_desc)); 92 + xe_mmio_write32(gt, RING_EXECLIST_CONTROL(hwe->mmio_base).reg, 93 + EL_CTRL_LOAD); 94 + } 95 + 96 + static void __xe_execlist_port_start(struct xe_execlist_port *port, 97 + struct xe_execlist_engine *exl) 98 + { 99 + struct xe_device *xe = gt_to_xe(port->hwe->gt); 100 + int max_ctx = FIELD_MAX(GEN11_SW_CTX_ID); 101 + 102 + if (GRAPHICS_VERx100(xe) >= 1250) 103 + max_ctx = FIELD_MAX(XEHP_SW_CTX_ID); 104 + 105 + xe_execlist_port_assert_held(port); 106 + 107 + if (port->running_exl != exl || !exl->has_run) { 108 + port->last_ctx_id++; 109 + 110 + /* 0 is reserved for the kernel context */ 111 + if (port->last_ctx_id > max_ctx) 112 + port->last_ctx_id = 1; 113 + } 114 + 115 + __start_lrc(port->hwe, exl->engine->lrc, port->last_ctx_id); 116 + port->running_exl = exl; 117 + exl->has_run = true; 118 + } 119 + 120 + static void __xe_execlist_port_idle(struct xe_execlist_port *port) 121 + { 122 + u32 noop[2] = { MI_NOOP, MI_NOOP }; 123 + 124 + xe_execlist_port_assert_held(port); 125 + 126 + if (!port->running_exl) 127 + return; 128 + 129 + printk(KERN_INFO "__xe_execlist_port_idle(%d:%d)\n", port->hwe->class, 130 + port->hwe->instance); 131 + 132 + xe_lrc_write_ring(&port->hwe->kernel_lrc, noop, sizeof(noop)); 133 + __start_lrc(port->hwe, &port->hwe->kernel_lrc, 0); 134 + port->running_exl = NULL; 135 + } 136 + 137 + static bool xe_execlist_is_idle(struct xe_execlist_engine *exl) 138 + { 139 + struct xe_lrc *lrc = exl->engine->lrc; 140 + 141 + return lrc->ring.tail == lrc->ring.old_tail; 142 + } 143 + 144 + static void __xe_execlist_port_start_next_active(struct xe_execlist_port *port) 145 + { 146 + struct xe_execlist_engine *exl = NULL; 147 + int i; 148 + 149 + xe_execlist_port_assert_held(port); 150 + 151 + for (i = ARRAY_SIZE(port->active) - 1; i >= 0; i--) { 152 + while (!list_empty(&port->active[i])) { 153 + exl = list_first_entry(&port->active[i], 154 + struct xe_execlist_engine, 155 + active_link); 156 + list_del(&exl->active_link); 157 + 158 + if (xe_execlist_is_idle(exl)) { 159 + exl->active_priority = XE_ENGINE_PRIORITY_UNSET; 160 + continue; 161 + } 162 + 163 + list_add_tail(&exl->active_link, &port->active[i]); 164 + __xe_execlist_port_start(port, exl); 165 + return; 166 + } 167 + } 168 + 169 + __xe_execlist_port_idle(port); 170 + } 171 + 172 + static u64 read_execlist_status(struct xe_hw_engine *hwe) 173 + { 174 + struct xe_gt *gt = hwe->gt; 175 + u32 hi, lo; 176 + 177 + lo = xe_mmio_read32(gt, RING_EXECLIST_STATUS_LO(hwe->mmio_base).reg); 178 + hi = xe_mmio_read32(gt, RING_EXECLIST_STATUS_HI(hwe->mmio_base).reg); 179 + 180 + printk(KERN_INFO "EXECLIST_STATUS %d:%d = 0x%08x %08x\n", hwe->class, 181 + hwe->instance, hi, lo); 182 + 183 + return lo | (u64)hi << 32; 184 + } 185 + 186 + static void xe_execlist_port_irq_handler_locked(struct xe_execlist_port *port) 187 + { 188 + u64 status; 189 + 190 + xe_execlist_port_assert_held(port); 191 + 192 + status = read_execlist_status(port->hwe); 193 + if (status & BIT(7)) 194 + return; 195 + 196 + __xe_execlist_port_start_next_active(port); 197 + } 198 + 199 + static void xe_execlist_port_irq_handler(struct xe_hw_engine *hwe, 200 + u16 intr_vec) 201 + { 202 + struct xe_execlist_port *port = hwe->exl_port; 203 + 204 + spin_lock(&port->lock); 205 + xe_execlist_port_irq_handler_locked(port); 206 + spin_unlock(&port->lock); 207 + } 208 + 209 + static void xe_execlist_port_wake_locked(struct xe_execlist_port *port, 210 + enum xe_engine_priority priority) 211 + { 212 + xe_execlist_port_assert_held(port); 213 + 214 + if (port->running_exl && port->running_exl->active_priority >= priority) 215 + return; 216 + 217 + __xe_execlist_port_start_next_active(port); 218 + } 219 + 220 + static void xe_execlist_make_active(struct xe_execlist_engine *exl) 221 + { 222 + struct xe_execlist_port *port = exl->port; 223 + enum xe_engine_priority priority = exl->active_priority; 224 + 225 + XE_BUG_ON(priority == XE_ENGINE_PRIORITY_UNSET); 226 + XE_BUG_ON(priority < 0); 227 + XE_BUG_ON(priority >= ARRAY_SIZE(exl->port->active)); 228 + 229 + spin_lock_irq(&port->lock); 230 + 231 + if (exl->active_priority != priority && 232 + exl->active_priority != XE_ENGINE_PRIORITY_UNSET) { 233 + /* Priority changed, move it to the right list */ 234 + list_del(&exl->active_link); 235 + exl->active_priority = XE_ENGINE_PRIORITY_UNSET; 236 + } 237 + 238 + if (exl->active_priority == XE_ENGINE_PRIORITY_UNSET) { 239 + exl->active_priority = priority; 240 + list_add_tail(&exl->active_link, &port->active[priority]); 241 + } 242 + 243 + xe_execlist_port_wake_locked(exl->port, priority); 244 + 245 + spin_unlock_irq(&port->lock); 246 + } 247 + 248 + static void xe_execlist_port_irq_fail_timer(struct timer_list *timer) 249 + { 250 + struct xe_execlist_port *port = 251 + container_of(timer, struct xe_execlist_port, irq_fail); 252 + 253 + spin_lock_irq(&port->lock); 254 + xe_execlist_port_irq_handler_locked(port); 255 + spin_unlock_irq(&port->lock); 256 + 257 + port->irq_fail.expires = jiffies + msecs_to_jiffies(1000); 258 + add_timer(&port->irq_fail); 259 + } 260 + 261 + struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe, 262 + struct xe_hw_engine *hwe) 263 + { 264 + struct drm_device *drm = &xe->drm; 265 + struct xe_execlist_port *port; 266 + int i; 267 + 268 + port = drmm_kzalloc(drm, sizeof(*port), GFP_KERNEL); 269 + if (!port) 270 + return ERR_PTR(-ENOMEM); 271 + 272 + port->hwe = hwe; 273 + 274 + spin_lock_init(&port->lock); 275 + for (i = 0; i < ARRAY_SIZE(port->active); i++) 276 + INIT_LIST_HEAD(&port->active[i]); 277 + 278 + port->last_ctx_id = 1; 279 + port->running_exl = NULL; 280 + 281 + hwe->irq_handler = xe_execlist_port_irq_handler; 282 + 283 + /* TODO: Fix the interrupt code so it doesn't race like mad */ 284 + timer_setup(&port->irq_fail, xe_execlist_port_irq_fail_timer, 0); 285 + port->irq_fail.expires = jiffies + msecs_to_jiffies(1000); 286 + add_timer(&port->irq_fail); 287 + 288 + return port; 289 + } 290 + 291 + void xe_execlist_port_destroy(struct xe_execlist_port *port) 292 + { 293 + del_timer(&port->irq_fail); 294 + 295 + /* Prevent an interrupt while we're destroying */ 296 + spin_lock_irq(&gt_to_xe(port->hwe->gt)->irq.lock); 297 + port->hwe->irq_handler = NULL; 298 + spin_unlock_irq(&gt_to_xe(port->hwe->gt)->irq.lock); 299 + } 300 + 301 + static struct dma_fence * 302 + execlist_run_job(struct drm_sched_job *drm_job) 303 + { 304 + struct xe_sched_job *job = to_xe_sched_job(drm_job); 305 + struct xe_engine *e = job->engine; 306 + struct xe_execlist_engine *exl = job->engine->execlist; 307 + 308 + e->ring_ops->emit_job(job); 309 + xe_execlist_make_active(exl); 310 + 311 + return dma_fence_get(job->fence); 312 + } 313 + 314 + static void execlist_job_free(struct drm_sched_job *drm_job) 315 + { 316 + struct xe_sched_job *job = to_xe_sched_job(drm_job); 317 + 318 + xe_sched_job_put(job); 319 + } 320 + 321 + static const struct drm_sched_backend_ops drm_sched_ops = { 322 + .run_job = execlist_run_job, 323 + .free_job = execlist_job_free, 324 + }; 325 + 326 + static int execlist_engine_init(struct xe_engine *e) 327 + { 328 + struct drm_gpu_scheduler *sched; 329 + struct xe_execlist_engine *exl; 330 + int err; 331 + 332 + XE_BUG_ON(xe_device_guc_submission_enabled(gt_to_xe(e->gt))); 333 + 334 + exl = kzalloc(sizeof(*exl), GFP_KERNEL); 335 + if (!exl) 336 + return -ENOMEM; 337 + 338 + exl->engine = e; 339 + 340 + err = drm_sched_init(&exl->sched, &drm_sched_ops, NULL, 1, 341 + e->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, 342 + XE_SCHED_HANG_LIMIT, XE_SCHED_JOB_TIMEOUT, 343 + NULL, NULL, e->hwe->name, 344 + gt_to_xe(e->gt)->drm.dev); 345 + if (err) 346 + goto err_free; 347 + 348 + sched = &exl->sched; 349 + err = drm_sched_entity_init(&exl->entity, 0, &sched, 1, NULL); 350 + if (err) 351 + goto err_sched; 352 + 353 + exl->port = e->hwe->exl_port; 354 + exl->has_run = false; 355 + exl->active_priority = XE_ENGINE_PRIORITY_UNSET; 356 + e->execlist = exl; 357 + e->entity = &exl->entity; 358 + 359 + switch (e->class) { 360 + case XE_ENGINE_CLASS_RENDER: 361 + sprintf(e->name, "rcs%d", ffs(e->logical_mask) - 1); 362 + break; 363 + case XE_ENGINE_CLASS_VIDEO_DECODE: 364 + sprintf(e->name, "vcs%d", ffs(e->logical_mask) - 1); 365 + break; 366 + case XE_ENGINE_CLASS_VIDEO_ENHANCE: 367 + sprintf(e->name, "vecs%d", ffs(e->logical_mask) - 1); 368 + break; 369 + case XE_ENGINE_CLASS_COPY: 370 + sprintf(e->name, "bcs%d", ffs(e->logical_mask) - 1); 371 + break; 372 + case XE_ENGINE_CLASS_COMPUTE: 373 + sprintf(e->name, "ccs%d", ffs(e->logical_mask) - 1); 374 + break; 375 + default: 376 + XE_WARN_ON(e->class); 377 + } 378 + 379 + return 0; 380 + 381 + err_sched: 382 + drm_sched_fini(&exl->sched); 383 + err_free: 384 + kfree(exl); 385 + return err; 386 + } 387 + 388 + static void execlist_engine_fini_async(struct work_struct *w) 389 + { 390 + struct xe_execlist_engine *ee = 391 + container_of(w, struct xe_execlist_engine, fini_async); 392 + struct xe_engine *e = ee->engine; 393 + struct xe_execlist_engine *exl = e->execlist; 394 + unsigned long flags; 395 + 396 + XE_BUG_ON(xe_device_guc_submission_enabled(gt_to_xe(e->gt))); 397 + 398 + spin_lock_irqsave(&exl->port->lock, flags); 399 + if (WARN_ON(exl->active_priority != XE_ENGINE_PRIORITY_UNSET)) 400 + list_del(&exl->active_link); 401 + spin_unlock_irqrestore(&exl->port->lock, flags); 402 + 403 + if (e->flags & ENGINE_FLAG_PERSISTENT) 404 + xe_device_remove_persitent_engines(gt_to_xe(e->gt), e); 405 + drm_sched_entity_fini(&exl->entity); 406 + drm_sched_fini(&exl->sched); 407 + kfree(exl); 408 + 409 + xe_engine_fini(e); 410 + } 411 + 412 + static void execlist_engine_kill(struct xe_engine *e) 413 + { 414 + /* NIY */ 415 + } 416 + 417 + static void execlist_engine_fini(struct xe_engine *e) 418 + { 419 + INIT_WORK(&e->execlist->fini_async, execlist_engine_fini_async); 420 + queue_work(system_unbound_wq, &e->execlist->fini_async); 421 + } 422 + 423 + static int execlist_engine_set_priority(struct xe_engine *e, 424 + enum xe_engine_priority priority) 425 + { 426 + /* NIY */ 427 + return 0; 428 + } 429 + 430 + static int execlist_engine_set_timeslice(struct xe_engine *e, u32 timeslice_us) 431 + { 432 + /* NIY */ 433 + return 0; 434 + } 435 + 436 + static int execlist_engine_set_preempt_timeout(struct xe_engine *e, 437 + u32 preempt_timeout_us) 438 + { 439 + /* NIY */ 440 + return 0; 441 + } 442 + 443 + static int execlist_engine_set_job_timeout(struct xe_engine *e, 444 + u32 job_timeout_ms) 445 + { 446 + /* NIY */ 447 + return 0; 448 + } 449 + 450 + static int execlist_engine_suspend(struct xe_engine *e) 451 + { 452 + /* NIY */ 453 + return 0; 454 + } 455 + 456 + static void execlist_engine_suspend_wait(struct xe_engine *e) 457 + 458 + { 459 + /* NIY */ 460 + } 461 + 462 + static void execlist_engine_resume(struct xe_engine *e) 463 + { 464 + xe_mocs_init_engine(e); 465 + } 466 + 467 + static const struct xe_engine_ops execlist_engine_ops = { 468 + .init = execlist_engine_init, 469 + .kill = execlist_engine_kill, 470 + .fini = execlist_engine_fini, 471 + .set_priority = execlist_engine_set_priority, 472 + .set_timeslice = execlist_engine_set_timeslice, 473 + .set_preempt_timeout = execlist_engine_set_preempt_timeout, 474 + .set_job_timeout = execlist_engine_set_job_timeout, 475 + .suspend = execlist_engine_suspend, 476 + .suspend_wait = execlist_engine_suspend_wait, 477 + .resume = execlist_engine_resume, 478 + }; 479 + 480 + int xe_execlist_init(struct xe_gt *gt) 481 + { 482 + /* GuC submission enabled, nothing to do */ 483 + if (xe_device_guc_submission_enabled(gt_to_xe(gt))) 484 + return 0; 485 + 486 + gt->engine_ops = &execlist_engine_ops; 487 + 488 + return 0; 489 + }

+21

drivers/gpu/drm/xe/xe_execlist.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2021 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_EXECLIST_H_ 7 + #define _XE_EXECLIST_H_ 8 + 9 + #include "xe_execlist_types.h" 10 + 11 + struct xe_device; 12 + struct xe_gt; 13 + 14 + #define xe_execlist_port_assert_held(port) lockdep_assert_held(&(port)->lock); 15 + 16 + int xe_execlist_init(struct xe_gt *gt); 17 + struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe, 18 + struct xe_hw_engine *hwe); 19 + void xe_execlist_port_destroy(struct xe_execlist_port *port); 20 + 21 + #endif

+49

drivers/gpu/drm/xe/xe_execlist_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_EXECLIST_TYPES_H_ 7 + #define _XE_EXECLIST_TYPES_H_ 8 + 9 + #include <linux/list.h> 10 + #include <linux/spinlock.h> 11 + #include <linux/workqueue.h> 12 + 13 + #include "xe_engine_types.h" 14 + 15 + struct xe_hw_engine; 16 + struct xe_execlist_engine; 17 + 18 + struct xe_execlist_port { 19 + struct xe_hw_engine *hwe; 20 + 21 + spinlock_t lock; 22 + 23 + struct list_head active[XE_ENGINE_PRIORITY_COUNT]; 24 + 25 + u32 last_ctx_id; 26 + 27 + struct xe_execlist_engine *running_exl; 28 + 29 + struct timer_list irq_fail; 30 + }; 31 + 32 + struct xe_execlist_engine { 33 + struct xe_engine *engine; 34 + 35 + struct drm_gpu_scheduler sched; 36 + 37 + struct drm_sched_entity entity; 38 + 39 + struct xe_execlist_port *port; 40 + 41 + bool has_run; 42 + 43 + struct work_struct fini_async; 44 + 45 + enum xe_engine_priority active_priority; 46 + struct list_head active_link; 47 + }; 48 + 49 + #endif

+203

drivers/gpu/drm/xe/xe_force_wake.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include <drm/drm_util.h> 7 + 8 + #include "xe_force_wake.h" 9 + #include "xe_gt.h" 10 + #include "xe_mmio.h" 11 + #include "gt/intel_gt_regs.h" 12 + 13 + #define XE_FORCE_WAKE_ACK_TIMEOUT_MS 50 14 + 15 + static struct xe_gt * 16 + fw_to_gt(struct xe_force_wake *fw) 17 + { 18 + return fw->gt; 19 + } 20 + 21 + static struct xe_device * 22 + fw_to_xe(struct xe_force_wake *fw) 23 + { 24 + return gt_to_xe(fw_to_gt(fw)); 25 + } 26 + 27 + static void domain_init(struct xe_force_wake_domain *domain, 28 + enum xe_force_wake_domain_id id, 29 + u32 reg, u32 ack, u32 val, u32 mask) 30 + { 31 + domain->id = id; 32 + domain->reg_ctl = reg; 33 + domain->reg_ack = ack; 34 + domain->val = val; 35 + domain->mask = mask; 36 + } 37 + 38 + #define FORCEWAKE_ACK_GT_MTL _MMIO(0xdfc) 39 + 40 + void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw) 41 + { 42 + struct xe_device *xe = gt_to_xe(gt); 43 + 44 + fw->gt = gt; 45 + mutex_init(&fw->lock); 46 + 47 + /* Assuming gen11+ so assert this assumption is correct */ 48 + XE_BUG_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11); 49 + 50 + if (xe->info.platform == XE_METEORLAKE) { 51 + domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT], 52 + XE_FW_DOMAIN_ID_GT, 53 + FORCEWAKE_GT_GEN9.reg, 54 + FORCEWAKE_ACK_GT_MTL.reg, 55 + BIT(0), BIT(16)); 56 + } else { 57 + domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT], 58 + XE_FW_DOMAIN_ID_GT, 59 + FORCEWAKE_GT_GEN9.reg, 60 + FORCEWAKE_ACK_GT_GEN9.reg, 61 + BIT(0), BIT(16)); 62 + } 63 + } 64 + 65 + void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw) 66 + { 67 + int i, j; 68 + 69 + /* Assuming gen11+ so assert this assumption is correct */ 70 + XE_BUG_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11); 71 + 72 + if (!xe_gt_is_media_type(gt)) 73 + domain_init(&fw->domains[XE_FW_DOMAIN_ID_RENDER], 74 + XE_FW_DOMAIN_ID_RENDER, 75 + FORCEWAKE_RENDER_GEN9.reg, 76 + FORCEWAKE_ACK_RENDER_GEN9.reg, 77 + BIT(0), BIT(16)); 78 + 79 + for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) { 80 + if (!(gt->info.engine_mask & BIT(i))) 81 + continue; 82 + 83 + domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j], 84 + XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j, 85 + FORCEWAKE_MEDIA_VDBOX_GEN11(j).reg, 86 + FORCEWAKE_ACK_MEDIA_VDBOX_GEN11(j).reg, 87 + BIT(0), BIT(16)); 88 + } 89 + 90 + for (i = XE_HW_ENGINE_VECS0, j =0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) { 91 + if (!(gt->info.engine_mask & BIT(i))) 92 + continue; 93 + 94 + domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j], 95 + XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j, 96 + FORCEWAKE_MEDIA_VEBOX_GEN11(j).reg, 97 + FORCEWAKE_ACK_MEDIA_VEBOX_GEN11(j).reg, 98 + BIT(0), BIT(16)); 99 + } 100 + } 101 + 102 + void xe_force_wake_prune(struct xe_gt *gt, struct xe_force_wake *fw) 103 + { 104 + int i, j; 105 + 106 + /* Call after fuses have been read, prune domains that are fused off */ 107 + 108 + for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) 109 + if (!(gt->info.engine_mask & BIT(i))) 110 + fw->domains[XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j].reg_ctl = 0; 111 + 112 + for (i = XE_HW_ENGINE_VECS0, j =0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) 113 + if (!(gt->info.engine_mask & BIT(i))) 114 + fw->domains[XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j].reg_ctl = 0; 115 + } 116 + 117 + static void domain_wake(struct xe_gt *gt, struct xe_force_wake_domain *domain) 118 + { 119 + xe_mmio_write32(gt, domain->reg_ctl, domain->mask | domain->val); 120 + } 121 + 122 + static int domain_wake_wait(struct xe_gt *gt, 123 + struct xe_force_wake_domain *domain) 124 + { 125 + return xe_mmio_wait32(gt, domain->reg_ack, domain->val, domain->val, 126 + XE_FORCE_WAKE_ACK_TIMEOUT_MS); 127 + } 128 + 129 + static void domain_sleep(struct xe_gt *gt, struct xe_force_wake_domain *domain) 130 + { 131 + xe_mmio_write32(gt, domain->reg_ctl, domain->mask); 132 + } 133 + 134 + static int domain_sleep_wait(struct xe_gt *gt, 135 + struct xe_force_wake_domain *domain) 136 + { 137 + return xe_mmio_wait32(gt, domain->reg_ack, 0, domain->val, 138 + XE_FORCE_WAKE_ACK_TIMEOUT_MS); 139 + } 140 + 141 + #define for_each_fw_domain_masked(domain__, mask__, fw__, tmp__) \ 142 + for (tmp__ = (mask__); tmp__ ;) \ 143 + for_each_if((domain__ = ((fw__)->domains + \ 144 + __mask_next_bit(tmp__))) && \ 145 + domain__->reg_ctl) 146 + 147 + int xe_force_wake_get(struct xe_force_wake *fw, 148 + enum xe_force_wake_domains domains) 149 + { 150 + struct xe_device *xe = fw_to_xe(fw); 151 + struct xe_gt *gt = fw_to_gt(fw); 152 + struct xe_force_wake_domain *domain; 153 + enum xe_force_wake_domains tmp, woken = 0; 154 + int ret, ret2 = 0; 155 + 156 + mutex_lock(&fw->lock); 157 + for_each_fw_domain_masked(domain, domains, fw, tmp) { 158 + if (!domain->ref++) { 159 + woken |= BIT(domain->id); 160 + domain_wake(gt, domain); 161 + } 162 + } 163 + for_each_fw_domain_masked(domain, woken, fw, tmp) { 164 + ret = domain_wake_wait(gt, domain); 165 + ret2 |= ret; 166 + if (ret) 167 + drm_notice(&xe->drm, "Force wake domain (%d) failed to ack wake, ret=%d\n", 168 + domain->id, ret); 169 + } 170 + fw->awake_domains |= woken; 171 + mutex_unlock(&fw->lock); 172 + 173 + return ret2; 174 + } 175 + 176 + int xe_force_wake_put(struct xe_force_wake *fw, 177 + enum xe_force_wake_domains domains) 178 + { 179 + struct xe_device *xe = fw_to_xe(fw); 180 + struct xe_gt *gt = fw_to_gt(fw); 181 + struct xe_force_wake_domain *domain; 182 + enum xe_force_wake_domains tmp, sleep = 0; 183 + int ret, ret2 = 0; 184 + 185 + mutex_lock(&fw->lock); 186 + for_each_fw_domain_masked(domain, domains, fw, tmp) { 187 + if (!--domain->ref) { 188 + sleep |= BIT(domain->id); 189 + domain_sleep(gt, domain); 190 + } 191 + } 192 + for_each_fw_domain_masked(domain, sleep, fw, tmp) { 193 + ret = domain_sleep_wait(gt, domain); 194 + ret2 |= ret; 195 + if (ret) 196 + drm_notice(&xe->drm, "Force wake domain (%d) failed to ack sleep, ret=%d\n", 197 + domain->id, ret); 198 + } 199 + fw->awake_domains &= ~sleep; 200 + mutex_unlock(&fw->lock); 201 + 202 + return ret2; 203 + }

+40

drivers/gpu/drm/xe/xe_force_wake.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_FORCE_WAKE_H_ 7 + #define _XE_FORCE_WAKE_H_ 8 + 9 + #include "xe_force_wake_types.h" 10 + #include "xe_macros.h" 11 + 12 + struct xe_gt; 13 + 14 + void xe_force_wake_init_gt(struct xe_gt *gt, 15 + struct xe_force_wake *fw); 16 + void xe_force_wake_init_engines(struct xe_gt *gt, 17 + struct xe_force_wake *fw); 18 + void xe_force_wake_prune(struct xe_gt *gt, 19 + struct xe_force_wake *fw); 20 + int xe_force_wake_get(struct xe_force_wake *fw, 21 + enum xe_force_wake_domains domains); 22 + int xe_force_wake_put(struct xe_force_wake *fw, 23 + enum xe_force_wake_domains domains); 24 + 25 + static inline int 26 + xe_force_wake_ref(struct xe_force_wake *fw, 27 + enum xe_force_wake_domains domain) 28 + { 29 + XE_BUG_ON(!domain); 30 + return fw->domains[ffs(domain) - 1].ref; 31 + } 32 + 33 + static inline void 34 + xe_force_wake_assert_held(struct xe_force_wake *fw, 35 + enum xe_force_wake_domains domain) 36 + { 37 + XE_BUG_ON(!(fw->awake_domains & domain)); 38 + } 39 + 40 + #endif

+84

drivers/gpu/drm/xe/xe_force_wake_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_FORCE_WAKE_TYPES_H_ 7 + #define _XE_FORCE_WAKE_TYPES_H_ 8 + 9 + #include <linux/mutex.h> 10 + #include <linux/types.h> 11 + 12 + enum xe_force_wake_domain_id { 13 + XE_FW_DOMAIN_ID_GT = 0, 14 + XE_FW_DOMAIN_ID_RENDER, 15 + XE_FW_DOMAIN_ID_MEDIA, 16 + XE_FW_DOMAIN_ID_MEDIA_VDBOX0, 17 + XE_FW_DOMAIN_ID_MEDIA_VDBOX1, 18 + XE_FW_DOMAIN_ID_MEDIA_VDBOX2, 19 + XE_FW_DOMAIN_ID_MEDIA_VDBOX3, 20 + XE_FW_DOMAIN_ID_MEDIA_VDBOX4, 21 + XE_FW_DOMAIN_ID_MEDIA_VDBOX5, 22 + XE_FW_DOMAIN_ID_MEDIA_VDBOX6, 23 + XE_FW_DOMAIN_ID_MEDIA_VDBOX7, 24 + XE_FW_DOMAIN_ID_MEDIA_VEBOX0, 25 + XE_FW_DOMAIN_ID_MEDIA_VEBOX1, 26 + XE_FW_DOMAIN_ID_MEDIA_VEBOX2, 27 + XE_FW_DOMAIN_ID_MEDIA_VEBOX3, 28 + XE_FW_DOMAIN_ID_GSC, 29 + XE_FW_DOMAIN_ID_COUNT 30 + }; 31 + 32 + enum xe_force_wake_domains { 33 + XE_FW_GT = BIT(XE_FW_DOMAIN_ID_GT), 34 + XE_FW_RENDER = BIT(XE_FW_DOMAIN_ID_RENDER), 35 + XE_FW_MEDIA = BIT(XE_FW_DOMAIN_ID_MEDIA), 36 + XE_FW_MEDIA_VDBOX0 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX0), 37 + XE_FW_MEDIA_VDBOX1 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX1), 38 + XE_FW_MEDIA_VDBOX2 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX2), 39 + XE_FW_MEDIA_VDBOX3 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX3), 40 + XE_FW_MEDIA_VDBOX4 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX4), 41 + XE_FW_MEDIA_VDBOX5 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX5), 42 + XE_FW_MEDIA_VDBOX6 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX6), 43 + XE_FW_MEDIA_VDBOX7 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX7), 44 + XE_FW_MEDIA_VEBOX0 = BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX0), 45 + XE_FW_MEDIA_VEBOX1 = BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX1), 46 + XE_FW_MEDIA_VEBOX2 = BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX2), 47 + XE_FW_MEDIA_VEBOX3 = BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX3), 48 + XE_FW_GSC = BIT(XE_FW_DOMAIN_ID_GSC), 49 + XE_FORCEWAKE_ALL = BIT(XE_FW_DOMAIN_ID_COUNT) - 1 50 + }; 51 + 52 + /** 53 + * struct xe_force_wake_domain - XE force wake domains 54 + */ 55 + struct xe_force_wake_domain { 56 + /** @id: domain force wake id */ 57 + enum xe_force_wake_domain_id id; 58 + /** @reg_ctl: domain wake control register address */ 59 + u32 reg_ctl; 60 + /** @reg_ack: domain ack register address */ 61 + u32 reg_ack; 62 + /** @val: domain wake write value */ 63 + u32 val; 64 + /** @mask: domain mask */ 65 + u32 mask; 66 + /** @ref: domain reference */ 67 + u32 ref; 68 + }; 69 + 70 + /** 71 + * struct xe_force_wake - XE force wake 72 + */ 73 + struct xe_force_wake { 74 + /** @gt: back pointers to GT */ 75 + struct xe_gt *gt; 76 + /** @lock: protects everything force wake struct */ 77 + struct mutex lock; 78 + /** @awake_domains: mask of all domains awake */ 79 + enum xe_force_wake_domains awake_domains; 80 + /** @domains: force wake domains */ 81 + struct xe_force_wake_domain domains[XE_FW_DOMAIN_ID_COUNT]; 82 + }; 83 + 84 + #endif

+304

drivers/gpu/drm/xe/xe_ggtt.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2021 Intel Corporation 4 + */ 5 + 6 + #include "xe_ggtt.h" 7 + 8 + #include <linux/sizes.h> 9 + #include <drm/i915_drm.h> 10 + 11 + #include <drm/drm_managed.h> 12 + 13 + #include "xe_device.h" 14 + #include "xe_bo.h" 15 + #include "xe_gt.h" 16 + #include "xe_mmio.h" 17 + #include "xe_wopcm.h" 18 + 19 + #include "i915_reg.h" 20 + #include "gt/intel_gt_regs.h" 21 + 22 + /* FIXME: Common file, preferably auto-gen */ 23 + #define MTL_GGTT_PTE_PAT0 BIT(52) 24 + #define MTL_GGTT_PTE_PAT1 BIT(53) 25 + 26 + u64 xe_ggtt_pte_encode(struct xe_bo *bo, u64 bo_offset) 27 + { 28 + struct xe_device *xe = xe_bo_device(bo); 29 + u64 pte; 30 + bool is_lmem; 31 + 32 + pte = xe_bo_addr(bo, bo_offset, GEN8_PAGE_SIZE, &is_lmem); 33 + pte |= GEN8_PAGE_PRESENT; 34 + 35 + if (is_lmem) 36 + pte |= GEN12_GGTT_PTE_LM; 37 + 38 + /* FIXME: vfunc + pass in caching rules */ 39 + if (xe->info.platform == XE_METEORLAKE) { 40 + pte |= MTL_GGTT_PTE_PAT0; 41 + pte |= MTL_GGTT_PTE_PAT1; 42 + } 43 + 44 + return pte; 45 + } 46 + 47 + static unsigned int probe_gsm_size(struct pci_dev *pdev) 48 + { 49 + u16 gmch_ctl, ggms; 50 + 51 + pci_read_config_word(pdev, SNB_GMCH_CTRL, &gmch_ctl); 52 + ggms = (gmch_ctl >> BDW_GMCH_GGMS_SHIFT) & BDW_GMCH_GGMS_MASK; 53 + return ggms ? SZ_1M << ggms : 0; 54 + } 55 + 56 + void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte) 57 + { 58 + XE_BUG_ON(addr & GEN8_PTE_MASK); 59 + XE_BUG_ON(addr >= ggtt->size); 60 + 61 + writeq(pte, &ggtt->gsm[addr >> GEN8_PTE_SHIFT]); 62 + } 63 + 64 + static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size) 65 + { 66 + u64 end = start + size - 1; 67 + u64 scratch_pte; 68 + 69 + XE_BUG_ON(start >= end); 70 + 71 + if (ggtt->scratch) 72 + scratch_pte = xe_ggtt_pte_encode(ggtt->scratch, 0); 73 + else 74 + scratch_pte = 0; 75 + 76 + while (start < end) { 77 + xe_ggtt_set_pte(ggtt, start, scratch_pte); 78 + start += GEN8_PAGE_SIZE; 79 + } 80 + } 81 + 82 + static void ggtt_fini_noalloc(struct drm_device *drm, void *arg) 83 + { 84 + struct xe_ggtt *ggtt = arg; 85 + 86 + mutex_destroy(&ggtt->lock); 87 + drm_mm_takedown(&ggtt->mm); 88 + 89 + xe_bo_unpin_map_no_vm(ggtt->scratch); 90 + } 91 + 92 + int xe_ggtt_init_noalloc(struct xe_gt *gt, struct xe_ggtt *ggtt) 93 + { 94 + struct xe_device *xe = gt_to_xe(gt); 95 + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 96 + unsigned int gsm_size; 97 + 98 + XE_BUG_ON(xe_gt_is_media_type(gt)); 99 + 100 + ggtt->gt = gt; 101 + 102 + gsm_size = probe_gsm_size(pdev); 103 + if (gsm_size == 0) { 104 + drm_err(&xe->drm, "Hardware reported no preallocated GSM\n"); 105 + return -ENOMEM; 106 + } 107 + 108 + ggtt->gsm = gt->mmio.regs + SZ_8M; 109 + ggtt->size = (gsm_size / 8) * (u64)GEN8_PAGE_SIZE; 110 + 111 + /* 112 + * 8B per entry, each points to a 4KB page. 113 + * 114 + * The GuC owns the WOPCM space, thus we can't allocate GGTT address in 115 + * this area. Even though we likely configure the WOPCM to less than the 116 + * maximum value, to simplify the driver load (no need to fetch HuC + 117 + * GuC firmwares and determine there sizes before initializing the GGTT) 118 + * just start the GGTT allocation above the max WOPCM size. This might 119 + * waste space in the GGTT (WOPCM is 2MB on modern platforms) but we can 120 + * live with this. 121 + * 122 + * Another benifit of this is the GuC bootrom can't access anything 123 + * below the WOPCM max size so anything the bootom needs to access (e.g. 124 + * a RSA key) needs to be placed in the GGTT above the WOPCM max size. 125 + * Starting the GGTT allocations above the WOPCM max give us the correct 126 + * placement for free. 127 + */ 128 + drm_mm_init(&ggtt->mm, xe_wopcm_size(xe), 129 + ggtt->size - xe_wopcm_size(xe)); 130 + mutex_init(&ggtt->lock); 131 + 132 + return drmm_add_action_or_reset(&xe->drm, ggtt_fini_noalloc, ggtt); 133 + } 134 + 135 + static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt) 136 + { 137 + struct drm_mm_node *hole; 138 + u64 start, end; 139 + 140 + /* Display may have allocated inside ggtt, so be careful with clearing here */ 141 + mutex_lock(&ggtt->lock); 142 + drm_mm_for_each_hole(hole, &ggtt->mm, start, end) 143 + xe_ggtt_clear(ggtt, start, end - start); 144 + 145 + xe_ggtt_invalidate(ggtt->gt); 146 + mutex_unlock(&ggtt->lock); 147 + } 148 + 149 + int xe_ggtt_init(struct xe_gt *gt, struct xe_ggtt *ggtt) 150 + { 151 + struct xe_device *xe = gt_to_xe(gt); 152 + int err; 153 + 154 + ggtt->scratch = xe_bo_create_locked(xe, gt, NULL, GEN8_PAGE_SIZE, 155 + ttm_bo_type_kernel, 156 + XE_BO_CREATE_VRAM_IF_DGFX(gt) | 157 + XE_BO_CREATE_PINNED_BIT); 158 + if (IS_ERR(ggtt->scratch)) { 159 + err = PTR_ERR(ggtt->scratch); 160 + goto err; 161 + } 162 + 163 + err = xe_bo_pin(ggtt->scratch); 164 + xe_bo_unlock_no_vm(ggtt->scratch); 165 + if (err) { 166 + xe_bo_put(ggtt->scratch); 167 + goto err; 168 + } 169 + 170 + xe_ggtt_initial_clear(ggtt); 171 + return 0; 172 + err: 173 + ggtt->scratch = NULL; 174 + return err; 175 + } 176 + 177 + #define GEN12_GUC_TLB_INV_CR _MMIO(0xcee8) 178 + #define GEN12_GUC_TLB_INV_CR_INVALIDATE (1 << 0) 179 + #define PVC_GUC_TLB_INV_DESC0 _MMIO(0xcf7c) 180 + #define PVC_GUC_TLB_INV_DESC0_VALID (1 << 0) 181 + #define PVC_GUC_TLB_INV_DESC1 _MMIO(0xcf80) 182 + #define PVC_GUC_TLB_INV_DESC1_INVALIDATE (1 << 6) 183 + 184 + void xe_ggtt_invalidate(struct xe_gt *gt) 185 + { 186 + /* TODO: vfunc for GuC vs. non-GuC */ 187 + 188 + /* TODO: i915 makes comments about this being uncached and 189 + * therefore flushing WC buffers. Is that really true here? 190 + */ 191 + xe_mmio_write32(gt, GFX_FLSH_CNTL_GEN6.reg, GFX_FLSH_CNTL_EN); 192 + if (xe_device_guc_submission_enabled(gt_to_xe(gt))) { 193 + struct xe_device *xe = gt_to_xe(gt); 194 + 195 + /* TODO: also use vfunc here */ 196 + if (xe->info.platform == XE_PVC) { 197 + xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1.reg, 198 + PVC_GUC_TLB_INV_DESC1_INVALIDATE); 199 + xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC0.reg, 200 + PVC_GUC_TLB_INV_DESC0_VALID); 201 + } else 202 + xe_mmio_write32(gt, GEN12_GUC_TLB_INV_CR.reg, 203 + GEN12_GUC_TLB_INV_CR_INVALIDATE); 204 + } 205 + } 206 + 207 + void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix) 208 + { 209 + u64 addr, scratch_pte; 210 + 211 + scratch_pte = xe_ggtt_pte_encode(ggtt->scratch, 0); 212 + 213 + printk("%sGlobal GTT:", prefix); 214 + for (addr = 0; addr < ggtt->size; addr += GEN8_PAGE_SIZE) { 215 + unsigned int i = addr / GEN8_PAGE_SIZE; 216 + 217 + XE_BUG_ON(addr > U32_MAX); 218 + if (ggtt->gsm[i] == scratch_pte) 219 + continue; 220 + 221 + printk("%s ggtt[0x%08x] = 0x%016llx", 222 + prefix, (u32)addr, ggtt->gsm[i]); 223 + } 224 + } 225 + 226 + int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, struct drm_mm_node *node, 227 + u32 size, u32 align, u32 mm_flags) 228 + { 229 + return drm_mm_insert_node_generic(&ggtt->mm, node, size, align, 0, 230 + mm_flags); 231 + } 232 + 233 + int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, 234 + u32 size, u32 align) 235 + { 236 + int ret; 237 + 238 + mutex_lock(&ggtt->lock); 239 + ret = xe_ggtt_insert_special_node_locked(ggtt, node, size, 240 + align, DRM_MM_INSERT_HIGH); 241 + mutex_unlock(&ggtt->lock); 242 + 243 + return ret; 244 + } 245 + 246 + void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) 247 + { 248 + u64 start = bo->ggtt_node.start; 249 + u64 offset, pte; 250 + 251 + for (offset = 0; offset < bo->size; offset += GEN8_PAGE_SIZE) { 252 + pte = xe_ggtt_pte_encode(bo, offset); 253 + xe_ggtt_set_pte(ggtt, start + offset, pte); 254 + } 255 + 256 + xe_ggtt_invalidate(ggtt->gt); 257 + } 258 + 259 + int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) 260 + { 261 + int err; 262 + 263 + if (XE_WARN_ON(bo->ggtt_node.size)) { 264 + /* Someone's already inserted this BO in the GGTT */ 265 + XE_BUG_ON(bo->ggtt_node.size != bo->size); 266 + return 0; 267 + } 268 + 269 + err = xe_bo_validate(bo, NULL, false); 270 + if (err) 271 + return err; 272 + 273 + mutex_lock(&ggtt->lock); 274 + err = drm_mm_insert_node(&ggtt->mm, &bo->ggtt_node, bo->size); 275 + if (!err) 276 + xe_ggtt_map_bo(ggtt, bo); 277 + mutex_unlock(&ggtt->lock); 278 + 279 + return 0; 280 + } 281 + 282 + void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node) 283 + { 284 + mutex_lock(&ggtt->lock); 285 + 286 + xe_ggtt_clear(ggtt, node->start, node->size); 287 + drm_mm_remove_node(node); 288 + node->size = 0; 289 + 290 + xe_ggtt_invalidate(ggtt->gt); 291 + 292 + mutex_unlock(&ggtt->lock); 293 + } 294 + 295 + void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) 296 + { 297 + if (XE_WARN_ON(!bo->ggtt_node.size)) 298 + return; 299 + 300 + /* This BO is not currently in the GGTT */ 301 + XE_BUG_ON(bo->ggtt_node.size != bo->size); 302 + 303 + xe_ggtt_remove_node(ggtt, &bo->ggtt_node); 304 + }

+28

drivers/gpu/drm/xe/xe_ggtt.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2021 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_GGTT_H_ 7 + #define _XE_GGTT_H_ 8 + 9 + #include "xe_ggtt_types.h" 10 + 11 + u64 xe_ggtt_pte_encode(struct xe_bo *bo, u64 bo_offset); 12 + void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte); 13 + void xe_ggtt_invalidate(struct xe_gt *gt); 14 + int xe_ggtt_init_noalloc(struct xe_gt *gt, struct xe_ggtt *ggtt); 15 + int xe_ggtt_init(struct xe_gt *gt, struct xe_ggtt *ggtt); 16 + void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix); 17 + 18 + int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, 19 + u32 size, u32 align); 20 + int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, 21 + struct drm_mm_node *node, 22 + u32 size, u32 align, u32 mm_flags); 23 + void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node); 24 + void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); 25 + int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); 26 + void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); 27 + 28 + #endif

+28

drivers/gpu/drm/xe/xe_ggtt_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_GGTT_TYPES_H_ 7 + #define _XE_GGTT_TYPES_H_ 8 + 9 + #include <drm/drm_mm.h> 10 + 11 + struct xe_bo; 12 + struct xe_gt; 13 + 14 + struct xe_ggtt { 15 + struct xe_gt *gt; 16 + 17 + u64 size; 18 + 19 + struct xe_bo *scratch; 20 + 21 + struct mutex lock; 22 + 23 + u64 __iomem *gsm; 24 + 25 + struct drm_mm mm; 26 + }; 27 + 28 + #endif

+101

drivers/gpu/drm/xe/xe_gpu_scheduler.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2023 Intel Corporation 4 + */ 5 + 6 + #include "xe_gpu_scheduler.h" 7 + 8 + static void xe_sched_process_msg_queue(struct xe_gpu_scheduler *sched) 9 + { 10 + if (!READ_ONCE(sched->base.pause_submit)) 11 + queue_work(sched->base.submit_wq, &sched->work_process_msg); 12 + } 13 + 14 + static void xe_sched_process_msg_queue_if_ready(struct xe_gpu_scheduler *sched) 15 + { 16 + struct xe_sched_msg *msg; 17 + 18 + spin_lock(&sched->base.job_list_lock); 19 + msg = list_first_entry_or_null(&sched->msgs, struct xe_sched_msg, link); 20 + if (msg) 21 + xe_sched_process_msg_queue(sched); 22 + spin_unlock(&sched->base.job_list_lock); 23 + } 24 + 25 + static struct xe_sched_msg * 26 + xe_sched_get_msg(struct xe_gpu_scheduler *sched) 27 + { 28 + struct xe_sched_msg *msg; 29 + 30 + spin_lock(&sched->base.job_list_lock); 31 + msg = list_first_entry_or_null(&sched->msgs, 32 + struct xe_sched_msg, link); 33 + if (msg) 34 + list_del(&msg->link); 35 + spin_unlock(&sched->base.job_list_lock); 36 + 37 + return msg; 38 + } 39 + 40 + static void xe_sched_process_msg_work(struct work_struct *w) 41 + { 42 + struct xe_gpu_scheduler *sched = 43 + container_of(w, struct xe_gpu_scheduler, work_process_msg); 44 + struct xe_sched_msg *msg; 45 + 46 + if (READ_ONCE(sched->base.pause_submit)) 47 + return; 48 + 49 + msg = xe_sched_get_msg(sched); 50 + if (msg) { 51 + sched->ops->process_msg(msg); 52 + 53 + xe_sched_process_msg_queue_if_ready(sched); 54 + } 55 + } 56 + 57 + int xe_sched_init(struct xe_gpu_scheduler *sched, 58 + const struct drm_sched_backend_ops *ops, 59 + const struct xe_sched_backend_ops *xe_ops, 60 + struct workqueue_struct *submit_wq, 61 + uint32_t hw_submission, unsigned hang_limit, 62 + long timeout, struct workqueue_struct *timeout_wq, 63 + atomic_t *score, const char *name, 64 + struct device *dev) 65 + { 66 + sched->ops = xe_ops; 67 + INIT_LIST_HEAD(&sched->msgs); 68 + INIT_WORK(&sched->work_process_msg, xe_sched_process_msg_work); 69 + 70 + return drm_sched_init(&sched->base, ops, submit_wq, 1, hw_submission, 71 + hang_limit, timeout, timeout_wq, score, name, 72 + dev); 73 + } 74 + 75 + void xe_sched_fini(struct xe_gpu_scheduler *sched) 76 + { 77 + xe_sched_submission_stop(sched); 78 + drm_sched_fini(&sched->base); 79 + } 80 + 81 + void xe_sched_submission_start(struct xe_gpu_scheduler *sched) 82 + { 83 + drm_sched_wqueue_start(&sched->base); 84 + queue_work(sched->base.submit_wq, &sched->work_process_msg); 85 + } 86 + 87 + void xe_sched_submission_stop(struct xe_gpu_scheduler *sched) 88 + { 89 + drm_sched_wqueue_stop(&sched->base); 90 + cancel_work_sync(&sched->work_process_msg); 91 + } 92 + 93 + void xe_sched_add_msg(struct xe_gpu_scheduler *sched, 94 + struct xe_sched_msg *msg) 95 + { 96 + spin_lock(&sched->base.job_list_lock); 97 + list_add_tail(&msg->link, &sched->msgs); 98 + spin_unlock(&sched->base.job_list_lock); 99 + 100 + xe_sched_process_msg_queue(sched); 101 + }

+73

drivers/gpu/drm/xe/xe_gpu_scheduler.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2023 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_GPU_SCHEDULER_H_ 7 + #define _XE_GPU_SCHEDULER_H_ 8 + 9 + #include "xe_gpu_scheduler_types.h" 10 + #include "xe_sched_job_types.h" 11 + 12 + int xe_sched_init(struct xe_gpu_scheduler *sched, 13 + const struct drm_sched_backend_ops *ops, 14 + const struct xe_sched_backend_ops *xe_ops, 15 + struct workqueue_struct *submit_wq, 16 + uint32_t hw_submission, unsigned hang_limit, 17 + long timeout, struct workqueue_struct *timeout_wq, 18 + atomic_t *score, const char *name, 19 + struct device *dev); 20 + void xe_sched_fini(struct xe_gpu_scheduler *sched); 21 + 22 + void xe_sched_submission_start(struct xe_gpu_scheduler *sched); 23 + void xe_sched_submission_stop(struct xe_gpu_scheduler *sched); 24 + 25 + void xe_sched_add_msg(struct xe_gpu_scheduler *sched, 26 + struct xe_sched_msg *msg); 27 + 28 + static inline void xe_sched_stop(struct xe_gpu_scheduler *sched) 29 + { 30 + drm_sched_stop(&sched->base, NULL); 31 + } 32 + 33 + static inline void xe_sched_tdr_queue_imm(struct xe_gpu_scheduler *sched) 34 + { 35 + drm_sched_tdr_queue_imm(&sched->base); 36 + } 37 + 38 + static inline void xe_sched_resubmit_jobs(struct xe_gpu_scheduler *sched) 39 + { 40 + drm_sched_resubmit_jobs(&sched->base); 41 + } 42 + 43 + static inline bool 44 + xe_sched_invalidate_job(struct xe_sched_job *job, int threshold) 45 + { 46 + return drm_sched_invalidate_job(&job->drm, threshold); 47 + } 48 + 49 + static inline void xe_sched_add_pending_job(struct xe_gpu_scheduler *sched, 50 + struct xe_sched_job *job) 51 + { 52 + list_add(&job->drm.list, &sched->base.pending_list); 53 + } 54 + 55 + static inline 56 + struct xe_sched_job *xe_sched_first_pending_job(struct xe_gpu_scheduler *sched) 57 + { 58 + return list_first_entry_or_null(&sched->base.pending_list, 59 + struct xe_sched_job, drm.list); 60 + } 61 + 62 + static inline int 63 + xe_sched_entity_init(struct xe_sched_entity *entity, 64 + struct xe_gpu_scheduler *sched) 65 + { 66 + return drm_sched_entity_init(entity, 0, 67 + (struct drm_gpu_scheduler **)&sched, 68 + 1, NULL); 69 + } 70 + 71 + #define xe_sched_entity_fini drm_sched_entity_fini 72 + 73 + #endif

+57

drivers/gpu/drm/xe/xe_gpu_scheduler_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2023 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_GPU_SCHEDULER_TYPES_H_ 7 + #define _XE_GPU_SCHEDULER_TYPES_H_ 8 + 9 + #include <drm/gpu_scheduler.h> 10 + 11 + /** 12 + * struct xe_sched_msg - an in-band (relative to GPU scheduler run queue) 13 + * message 14 + * 15 + * Generic enough for backend defined messages, backend can expand if needed. 16 + */ 17 + struct xe_sched_msg { 18 + /** @link: list link into the gpu scheduler list of messages */ 19 + struct list_head link; 20 + /** 21 + * @private_data: opaque pointer to message private data (backend defined) 22 + */ 23 + void *private_data; 24 + /** @opcode: opcode of message (backend defined) */ 25 + unsigned int opcode; 26 + }; 27 + 28 + /** 29 + * struct xe_sched_backend_ops - Define the backend operations called by the 30 + * scheduler 31 + */ 32 + struct xe_sched_backend_ops { 33 + /** 34 + * @process_msg: Process a message. Allowed to block, it is this 35 + * function's responsibility to free message if dynamically allocated. 36 + */ 37 + void (*process_msg)(struct xe_sched_msg *msg); 38 + }; 39 + 40 + /** 41 + * struct xe_gpu_scheduler - Xe GPU scheduler 42 + */ 43 + struct xe_gpu_scheduler { 44 + /** @base: DRM GPU scheduler */ 45 + struct drm_gpu_scheduler base; 46 + /** @ops: Xe scheduler ops */ 47 + const struct xe_sched_backend_ops *ops; 48 + /** @msgs: list of messages to be processed in @work_process_msg */ 49 + struct list_head msgs; 50 + /** @work_process_msg: processes messages */ 51 + struct work_struct work_process_msg; 52 + }; 53 + 54 + #define xe_sched_entity drm_sched_entity 55 + #define xe_sched_policy drm_sched_policy 56 + 57 + #endif

+830

drivers/gpu/drm/xe/xe_gt.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include <linux/minmax.h> 7 + 8 + #include <drm/drm_managed.h> 9 + 10 + #include "xe_bb.h" 11 + #include "xe_bo.h" 12 + #include "xe_device.h" 13 + #include "xe_engine.h" 14 + #include "xe_execlist.h" 15 + #include "xe_force_wake.h" 16 + #include "xe_ggtt.h" 17 + #include "xe_gt.h" 18 + #include "xe_gt_clock.h" 19 + #include "xe_gt_mcr.h" 20 + #include "xe_gt_pagefault.h" 21 + #include "xe_gt_sysfs.h" 22 + #include "xe_gt_topology.h" 23 + #include "xe_hw_fence.h" 24 + #include "xe_irq.h" 25 + #include "xe_lrc.h" 26 + #include "xe_map.h" 27 + #include "xe_migrate.h" 28 + #include "xe_mmio.h" 29 + #include "xe_mocs.h" 30 + #include "xe_reg_sr.h" 31 + #include "xe_ring_ops.h" 32 + #include "xe_sa.h" 33 + #include "xe_sched_job.h" 34 + #include "xe_ttm_gtt_mgr.h" 35 + #include "xe_ttm_vram_mgr.h" 36 + #include "xe_tuning.h" 37 + #include "xe_uc.h" 38 + #include "xe_vm.h" 39 + #include "xe_wa.h" 40 + #include "xe_wopcm.h" 41 + 42 + #include "gt/intel_gt_regs.h" 43 + 44 + struct xe_gt *xe_find_full_gt(struct xe_gt *gt) 45 + { 46 + struct xe_gt *search; 47 + u8 id; 48 + 49 + XE_BUG_ON(!xe_gt_is_media_type(gt)); 50 + 51 + for_each_gt(search, gt_to_xe(gt), id) { 52 + if (search->info.vram_id == gt->info.vram_id) 53 + return search; 54 + } 55 + 56 + XE_BUG_ON("NOT POSSIBLE"); 57 + return NULL; 58 + } 59 + 60 + int xe_gt_alloc(struct xe_device *xe, struct xe_gt *gt) 61 + { 62 + struct drm_device *drm = &xe->drm; 63 + 64 + XE_BUG_ON(gt->info.type == XE_GT_TYPE_UNINITIALIZED); 65 + 66 + if (!xe_gt_is_media_type(gt)) { 67 + gt->mem.ggtt = drmm_kzalloc(drm, sizeof(*gt->mem.ggtt), 68 + GFP_KERNEL); 69 + if (!gt->mem.ggtt) 70 + return -ENOMEM; 71 + 72 + gt->mem.vram_mgr = drmm_kzalloc(drm, sizeof(*gt->mem.vram_mgr), 73 + GFP_KERNEL); 74 + if (!gt->mem.vram_mgr) 75 + return -ENOMEM; 76 + 77 + gt->mem.gtt_mgr = drmm_kzalloc(drm, sizeof(*gt->mem.gtt_mgr), 78 + GFP_KERNEL); 79 + if (!gt->mem.gtt_mgr) 80 + return -ENOMEM; 81 + } else { 82 + struct xe_gt *full_gt = xe_find_full_gt(gt); 83 + 84 + gt->mem.ggtt = full_gt->mem.ggtt; 85 + gt->mem.vram_mgr = full_gt->mem.vram_mgr; 86 + gt->mem.gtt_mgr = full_gt->mem.gtt_mgr; 87 + } 88 + 89 + gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0); 90 + 91 + return 0; 92 + } 93 + 94 + /* FIXME: These should be in a common file */ 95 + #define CHV_PPAT_SNOOP REG_BIT(6) 96 + #define GEN8_PPAT_AGE(x) ((x)<<4) 97 + #define GEN8_PPAT_LLCeLLC (3<<2) 98 + #define GEN8_PPAT_LLCELLC (2<<2) 99 + #define GEN8_PPAT_LLC (1<<2) 100 + #define GEN8_PPAT_WB (3<<0) 101 + #define GEN8_PPAT_WT (2<<0) 102 + #define GEN8_PPAT_WC (1<<0) 103 + #define GEN8_PPAT_UC (0<<0) 104 + #define GEN8_PPAT_ELLC_OVERRIDE (0<<2) 105 + #define GEN8_PPAT(i, x) ((u64)(x) << ((i) * 8)) 106 + #define GEN12_PPAT_CLOS(x) ((x)<<2) 107 + 108 + static void tgl_setup_private_ppat(struct xe_gt *gt) 109 + { 110 + /* TGL doesn't support LLC or AGE settings */ 111 + xe_mmio_write32(gt, GEN12_PAT_INDEX(0).reg, GEN8_PPAT_WB); 112 + xe_mmio_write32(gt, GEN12_PAT_INDEX(1).reg, GEN8_PPAT_WC); 113 + xe_mmio_write32(gt, GEN12_PAT_INDEX(2).reg, GEN8_PPAT_WT); 114 + xe_mmio_write32(gt, GEN12_PAT_INDEX(3).reg, GEN8_PPAT_UC); 115 + xe_mmio_write32(gt, GEN12_PAT_INDEX(4).reg, GEN8_PPAT_WB); 116 + xe_mmio_write32(gt, GEN12_PAT_INDEX(5).reg, GEN8_PPAT_WB); 117 + xe_mmio_write32(gt, GEN12_PAT_INDEX(6).reg, GEN8_PPAT_WB); 118 + xe_mmio_write32(gt, GEN12_PAT_INDEX(7).reg, GEN8_PPAT_WB); 119 + } 120 + 121 + static void pvc_setup_private_ppat(struct xe_gt *gt) 122 + { 123 + xe_mmio_write32(gt, GEN12_PAT_INDEX(0).reg, GEN8_PPAT_UC); 124 + xe_mmio_write32(gt, GEN12_PAT_INDEX(1).reg, GEN8_PPAT_WC); 125 + xe_mmio_write32(gt, GEN12_PAT_INDEX(2).reg, GEN8_PPAT_WT); 126 + xe_mmio_write32(gt, GEN12_PAT_INDEX(3).reg, GEN8_PPAT_WB); 127 + xe_mmio_write32(gt, GEN12_PAT_INDEX(4).reg, 128 + GEN12_PPAT_CLOS(1) | GEN8_PPAT_WT); 129 + xe_mmio_write32(gt, GEN12_PAT_INDEX(5).reg, 130 + GEN12_PPAT_CLOS(1) | GEN8_PPAT_WB); 131 + xe_mmio_write32(gt, GEN12_PAT_INDEX(6).reg, 132 + GEN12_PPAT_CLOS(2) | GEN8_PPAT_WT); 133 + xe_mmio_write32(gt, GEN12_PAT_INDEX(7).reg, 134 + GEN12_PPAT_CLOS(2) | GEN8_PPAT_WB); 135 + } 136 + 137 + #define MTL_PPAT_L4_CACHE_POLICY_MASK REG_GENMASK(3, 2) 138 + #define MTL_PAT_INDEX_COH_MODE_MASK REG_GENMASK(1, 0) 139 + #define MTL_PPAT_3_UC REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 3) 140 + #define MTL_PPAT_1_WT REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 1) 141 + #define MTL_PPAT_0_WB REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 0) 142 + #define MTL_3_COH_2W REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 3) 143 + #define MTL_2_COH_1W REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 2) 144 + #define MTL_0_COH_NON REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 0) 145 + 146 + static void mtl_setup_private_ppat(struct xe_gt *gt) 147 + { 148 + xe_mmio_write32(gt, GEN12_PAT_INDEX(0).reg, MTL_PPAT_0_WB); 149 + xe_mmio_write32(gt, GEN12_PAT_INDEX(1).reg, 150 + MTL_PPAT_1_WT | MTL_2_COH_1W); 151 + xe_mmio_write32(gt, GEN12_PAT_INDEX(2).reg, 152 + MTL_PPAT_3_UC | MTL_2_COH_1W); 153 + xe_mmio_write32(gt, GEN12_PAT_INDEX(3).reg, 154 + MTL_PPAT_0_WB | MTL_2_COH_1W); 155 + xe_mmio_write32(gt, GEN12_PAT_INDEX(4).reg, 156 + MTL_PPAT_0_WB | MTL_3_COH_2W); 157 + } 158 + 159 + static void setup_private_ppat(struct xe_gt *gt) 160 + { 161 + struct xe_device *xe = gt_to_xe(gt); 162 + 163 + if (xe->info.platform == XE_METEORLAKE) 164 + mtl_setup_private_ppat(gt); 165 + else if (xe->info.platform == XE_PVC) 166 + pvc_setup_private_ppat(gt); 167 + else 168 + tgl_setup_private_ppat(gt); 169 + } 170 + 171 + static int gt_ttm_mgr_init(struct xe_gt *gt) 172 + { 173 + struct xe_device *xe = gt_to_xe(gt); 174 + int err; 175 + struct sysinfo si; 176 + u64 gtt_size; 177 + 178 + si_meminfo(&si); 179 + gtt_size = (u64)si.totalram * si.mem_unit * 3/4; 180 + 181 + if (gt->mem.vram.size) { 182 + err = xe_ttm_vram_mgr_init(gt, gt->mem.vram_mgr); 183 + if (err) 184 + return err; 185 + gtt_size = min(max((XE_DEFAULT_GTT_SIZE_MB << 20), 186 + gt->mem.vram.size), 187 + gtt_size); 188 + xe->info.mem_region_mask |= BIT(gt->info.vram_id) << 1; 189 + } 190 + 191 + err = xe_ttm_gtt_mgr_init(gt, gt->mem.gtt_mgr, gtt_size); 192 + if (err) 193 + return err; 194 + 195 + return 0; 196 + } 197 + 198 + static void gt_fini(struct drm_device *drm, void *arg) 199 + { 200 + struct xe_gt *gt = arg; 201 + int i; 202 + 203 + destroy_workqueue(gt->ordered_wq); 204 + 205 + for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) 206 + xe_hw_fence_irq_finish(&gt->fence_irq[i]); 207 + } 208 + 209 + static void gt_reset_worker(struct work_struct *w); 210 + 211 + int emit_nop_job(struct xe_gt *gt, struct xe_engine *e) 212 + { 213 + struct xe_sched_job *job; 214 + struct xe_bb *bb; 215 + struct dma_fence *fence; 216 + u64 batch_ofs; 217 + long timeout; 218 + 219 + bb = xe_bb_new(gt, 4, false); 220 + if (IS_ERR(bb)) 221 + return PTR_ERR(bb); 222 + 223 + batch_ofs = xe_bo_ggtt_addr(gt->kernel_bb_pool.bo); 224 + job = xe_bb_create_wa_job(e, bb, batch_ofs); 225 + if (IS_ERR(job)) { 226 + xe_bb_free(bb, NULL); 227 + return PTR_ERR(bb); 228 + } 229 + 230 + xe_sched_job_arm(job); 231 + fence = dma_fence_get(&job->drm.s_fence->finished); 232 + xe_sched_job_push(job); 233 + 234 + timeout = dma_fence_wait_timeout(fence, false, HZ); 235 + dma_fence_put(fence); 236 + xe_bb_free(bb, NULL); 237 + if (timeout < 0) 238 + return timeout; 239 + else if (!timeout) 240 + return -ETIME; 241 + 242 + return 0; 243 + } 244 + 245 + int emit_wa_job(struct xe_gt *gt, struct xe_engine *e) 246 + { 247 + struct xe_reg_sr *sr = &e->hwe->reg_lrc; 248 + struct xe_reg_sr_entry *entry; 249 + unsigned long reg; 250 + struct xe_sched_job *job; 251 + struct xe_bb *bb; 252 + struct dma_fence *fence; 253 + u64 batch_ofs; 254 + long timeout; 255 + int count = 0; 256 + 257 + bb = xe_bb_new(gt, SZ_4K, false); /* Just pick a large BB size */ 258 + if (IS_ERR(bb)) 259 + return PTR_ERR(bb); 260 + 261 + xa_for_each(&sr->xa, reg, entry) 262 + ++count; 263 + 264 + if (count) { 265 + bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM(count); 266 + xa_for_each(&sr->xa, reg, entry) { 267 + bb->cs[bb->len++] = reg; 268 + bb->cs[bb->len++] = entry->set_bits; 269 + } 270 + } 271 + bb->cs[bb->len++] = MI_NOOP; 272 + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; 273 + 274 + batch_ofs = xe_bo_ggtt_addr(gt->kernel_bb_pool.bo); 275 + job = xe_bb_create_wa_job(e, bb, batch_ofs); 276 + if (IS_ERR(job)) { 277 + xe_bb_free(bb, NULL); 278 + return PTR_ERR(bb); 279 + } 280 + 281 + xe_sched_job_arm(job); 282 + fence = dma_fence_get(&job->drm.s_fence->finished); 283 + xe_sched_job_push(job); 284 + 285 + timeout = dma_fence_wait_timeout(fence, false, HZ); 286 + dma_fence_put(fence); 287 + xe_bb_free(bb, NULL); 288 + if (timeout < 0) 289 + return timeout; 290 + else if (!timeout) 291 + return -ETIME; 292 + 293 + return 0; 294 + } 295 + 296 + int xe_gt_record_default_lrcs(struct xe_gt *gt) 297 + { 298 + struct xe_device *xe = gt_to_xe(gt); 299 + struct xe_hw_engine *hwe; 300 + enum xe_hw_engine_id id; 301 + int err = 0; 302 + 303 + for_each_hw_engine(hwe, gt, id) { 304 + struct xe_engine *e, *nop_e; 305 + struct xe_vm *vm; 306 + void *default_lrc; 307 + 308 + if (gt->default_lrc[hwe->class]) 309 + continue; 310 + 311 + xe_reg_sr_init(&hwe->reg_lrc, "LRC", xe); 312 + xe_wa_process_lrc(hwe); 313 + 314 + default_lrc = drmm_kzalloc(&xe->drm, 315 + xe_lrc_size(xe, hwe->class), 316 + GFP_KERNEL); 317 + if (!default_lrc) 318 + return -ENOMEM; 319 + 320 + vm = xe_migrate_get_vm(gt->migrate); 321 + e = xe_engine_create(xe, vm, BIT(hwe->logical_instance), 1, 322 + hwe, ENGINE_FLAG_WA); 323 + if (IS_ERR(e)) { 324 + err = PTR_ERR(e); 325 + goto put_vm; 326 + } 327 + 328 + /* Prime golden LRC with known good state */ 329 + err = emit_wa_job(gt, e); 330 + if (err) 331 + goto put_engine; 332 + 333 + nop_e = xe_engine_create(xe, vm, BIT(hwe->logical_instance), 334 + 1, hwe, ENGINE_FLAG_WA); 335 + if (IS_ERR(nop_e)) { 336 + err = PTR_ERR(nop_e); 337 + goto put_engine; 338 + } 339 + 340 + /* Switch to different LRC */ 341 + err = emit_nop_job(gt, nop_e); 342 + if (err) 343 + goto put_nop_e; 344 + 345 + /* Reload golden LRC to record the effect of any indirect W/A */ 346 + err = emit_nop_job(gt, e); 347 + if (err) 348 + goto put_nop_e; 349 + 350 + xe_map_memcpy_from(xe, default_lrc, 351 + &e->lrc[0].bo->vmap, 352 + xe_lrc_pphwsp_offset(&e->lrc[0]), 353 + xe_lrc_size(xe, hwe->class)); 354 + 355 + gt->default_lrc[hwe->class] = default_lrc; 356 + put_nop_e: 357 + xe_engine_put(nop_e); 358 + put_engine: 359 + xe_engine_put(e); 360 + put_vm: 361 + xe_vm_put(vm); 362 + if (err) 363 + break; 364 + } 365 + 366 + return err; 367 + } 368 + 369 + int xe_gt_init_early(struct xe_gt *gt) 370 + { 371 + int err; 372 + 373 + xe_force_wake_init_gt(gt, gt_to_fw(gt)); 374 + 375 + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 376 + if (err) 377 + return err; 378 + 379 + xe_gt_topology_init(gt); 380 + xe_gt_mcr_init(gt); 381 + 382 + err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 383 + if (err) 384 + return err; 385 + 386 + xe_reg_sr_init(&gt->reg_sr, "GT", gt_to_xe(gt)); 387 + xe_wa_process_gt(gt); 388 + xe_tuning_process_gt(gt); 389 + 390 + return 0; 391 + } 392 + 393 + /** 394 + * xe_gt_init_noalloc - Init GT up to the point where allocations can happen. 395 + * @gt: The GT to initialize. 396 + * 397 + * This function prepares the GT to allow memory allocations to VRAM, but is not 398 + * allowed to allocate memory itself. This state is useful for display readout, 399 + * because the inherited display framebuffer will otherwise be overwritten as it 400 + * is usually put at the start of VRAM. 401 + * 402 + * Returns: 0 on success, negative error code on error. 403 + */ 404 + int xe_gt_init_noalloc(struct xe_gt *gt) 405 + { 406 + int err, err2; 407 + 408 + if (xe_gt_is_media_type(gt)) 409 + return 0; 410 + 411 + xe_device_mem_access_get(gt_to_xe(gt)); 412 + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 413 + if (err) 414 + goto err; 415 + 416 + err = gt_ttm_mgr_init(gt); 417 + if (err) 418 + goto err_force_wake; 419 + 420 + err = xe_ggtt_init_noalloc(gt, gt->mem.ggtt); 421 + 422 + err_force_wake: 423 + err2 = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 424 + XE_WARN_ON(err2); 425 + xe_device_mem_access_put(gt_to_xe(gt)); 426 + err: 427 + return err; 428 + } 429 + 430 + static int gt_fw_domain_init(struct xe_gt *gt) 431 + { 432 + int err, i; 433 + 434 + xe_device_mem_access_get(gt_to_xe(gt)); 435 + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 436 + if (err) 437 + goto err_hw_fence_irq; 438 + 439 + if (!xe_gt_is_media_type(gt)) { 440 + err = xe_ggtt_init(gt, gt->mem.ggtt); 441 + if (err) 442 + goto err_force_wake; 443 + } 444 + 445 + /* Allow driver to load if uC init fails (likely missing firmware) */ 446 + err = xe_uc_init(&gt->uc); 447 + XE_WARN_ON(err); 448 + 449 + err = xe_uc_init_hwconfig(&gt->uc); 450 + if (err) 451 + goto err_force_wake; 452 + 453 + /* Enables per hw engine IRQs */ 454 + xe_gt_irq_postinstall(gt); 455 + 456 + /* Rerun MCR init as we now have hw engine list */ 457 + xe_gt_mcr_init(gt); 458 + 459 + err = xe_hw_engines_init_early(gt); 460 + if (err) 461 + goto err_force_wake; 462 + 463 + err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 464 + XE_WARN_ON(err); 465 + xe_device_mem_access_put(gt_to_xe(gt)); 466 + 467 + return 0; 468 + 469 + err_force_wake: 470 + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 471 + err_hw_fence_irq: 472 + for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) 473 + xe_hw_fence_irq_finish(&gt->fence_irq[i]); 474 + xe_device_mem_access_put(gt_to_xe(gt)); 475 + 476 + return err; 477 + } 478 + 479 + static int all_fw_domain_init(struct xe_gt *gt) 480 + { 481 + int err, i; 482 + 483 + xe_device_mem_access_get(gt_to_xe(gt)); 484 + err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 485 + if (err) 486 + goto err_hw_fence_irq; 487 + 488 + setup_private_ppat(gt); 489 + 490 + xe_reg_sr_apply_mmio(&gt->reg_sr, gt); 491 + 492 + err = xe_gt_clock_init(gt); 493 + if (err) 494 + goto err_force_wake; 495 + 496 + xe_mocs_init(gt); 497 + err = xe_execlist_init(gt); 498 + if (err) 499 + goto err_force_wake; 500 + 501 + err = xe_hw_engines_init(gt); 502 + if (err) 503 + goto err_force_wake; 504 + 505 + err = xe_uc_init_post_hwconfig(&gt->uc); 506 + if (err) 507 + goto err_force_wake; 508 + 509 + /* 510 + * FIXME: This should be ok as SA should only be used by gt->migrate and 511 + * vm->gt->migrate and both should be pointing to a non-media GT. But to 512 + * realy safe, convert gt->kernel_bb_pool to a pointer and point a media 513 + * GT to the kernel_bb_pool on a real tile. 514 + */ 515 + if (!xe_gt_is_media_type(gt)) { 516 + err = xe_sa_bo_manager_init(gt, &gt->kernel_bb_pool, SZ_1M, 16); 517 + if (err) 518 + goto err_force_wake; 519 + 520 + /* 521 + * USM has its only SA pool to non-block behind user operations 522 + */ 523 + if (gt_to_xe(gt)->info.supports_usm) { 524 + err = xe_sa_bo_manager_init(gt, &gt->usm.bb_pool, 525 + SZ_1M, 16); 526 + if (err) 527 + goto err_force_wake; 528 + } 529 + } 530 + 531 + if (!xe_gt_is_media_type(gt)) { 532 + gt->migrate = xe_migrate_init(gt); 533 + if (IS_ERR(gt->migrate)) 534 + goto err_force_wake; 535 + } else { 536 + gt->migrate = xe_find_full_gt(gt)->migrate; 537 + } 538 + 539 + err = xe_uc_init_hw(&gt->uc); 540 + if (err) 541 + goto err_force_wake; 542 + 543 + err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); 544 + XE_WARN_ON(err); 545 + xe_device_mem_access_put(gt_to_xe(gt)); 546 + 547 + return 0; 548 + 549 + err_force_wake: 550 + xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); 551 + err_hw_fence_irq: 552 + for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) 553 + xe_hw_fence_irq_finish(&gt->fence_irq[i]); 554 + xe_device_mem_access_put(gt_to_xe(gt)); 555 + 556 + return err; 557 + } 558 + 559 + int xe_gt_init(struct xe_gt *gt) 560 + { 561 + int err; 562 + int i; 563 + 564 + INIT_WORK(&gt->reset.worker, gt_reset_worker); 565 + 566 + for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) { 567 + gt->ring_ops[i] = xe_ring_ops_get(gt, i); 568 + xe_hw_fence_irq_init(&gt->fence_irq[i]); 569 + } 570 + 571 + err = xe_gt_pagefault_init(gt); 572 + if (err) 573 + return err; 574 + 575 + xe_gt_sysfs_init(gt); 576 + 577 + err = gt_fw_domain_init(gt); 578 + if (err) 579 + return err; 580 + 581 + xe_force_wake_init_engines(gt, gt_to_fw(gt)); 582 + 583 + err = all_fw_domain_init(gt); 584 + if (err) 585 + return err; 586 + 587 + xe_force_wake_prune(gt, gt_to_fw(gt)); 588 + 589 + err = drmm_add_action_or_reset(&gt_to_xe(gt)->drm, gt_fini, gt); 590 + if (err) 591 + return err; 592 + 593 + return 0; 594 + } 595 + 596 + int do_gt_reset(struct xe_gt *gt) 597 + { 598 + struct xe_device *xe = gt_to_xe(gt); 599 + int err; 600 + 601 + xe_mmio_write32(gt, GEN6_GDRST.reg, GEN11_GRDOM_FULL); 602 + err = xe_mmio_wait32(gt, GEN6_GDRST.reg, 0, GEN11_GRDOM_FULL, 5); 603 + if (err) 604 + drm_err(&xe->drm, 605 + "GT reset failed to clear GEN11_GRDOM_FULL\n"); 606 + 607 + return err; 608 + } 609 + 610 + static int do_gt_restart(struct xe_gt *gt) 611 + { 612 + struct xe_hw_engine *hwe; 613 + enum xe_hw_engine_id id; 614 + int err; 615 + 616 + setup_private_ppat(gt); 617 + 618 + xe_reg_sr_apply_mmio(&gt->reg_sr, gt); 619 + 620 + err = xe_wopcm_init(&gt->uc.wopcm); 621 + if (err) 622 + return err; 623 + 624 + for_each_hw_engine(hwe, gt, id) 625 + xe_hw_engine_enable_ring(hwe); 626 + 627 + err = xe_uc_init_hw(&gt->uc); 628 + if (err) 629 + return err; 630 + 631 + xe_mocs_init(gt); 632 + err = xe_uc_start(&gt->uc); 633 + if (err) 634 + return err; 635 + 636 + for_each_hw_engine(hwe, gt, id) { 637 + xe_reg_sr_apply_mmio(&hwe->reg_sr, gt); 638 + xe_reg_sr_apply_whitelist(&hwe->reg_whitelist, 639 + hwe->mmio_base, gt); 640 + } 641 + 642 + return 0; 643 + } 644 + 645 + static int gt_reset(struct xe_gt *gt) 646 + { 647 + struct xe_device *xe = gt_to_xe(gt); 648 + int err; 649 + 650 + /* We only support GT resets with GuC submission */ 651 + if (!xe_device_guc_submission_enabled(gt_to_xe(gt))) 652 + return -ENODEV; 653 + 654 + drm_info(&xe->drm, "GT reset started\n"); 655 + 656 + xe_device_mem_access_get(gt_to_xe(gt)); 657 + err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 658 + if (err) 659 + goto err_msg; 660 + 661 + xe_uc_stop_prepare(&gt->uc); 662 + xe_gt_pagefault_reset(gt); 663 + 664 + err = xe_uc_stop(&gt->uc); 665 + if (err) 666 + goto err_out; 667 + 668 + err = do_gt_reset(gt); 669 + if (err) 670 + goto err_out; 671 + 672 + err = do_gt_restart(gt); 673 + if (err) 674 + goto err_out; 675 + 676 + xe_device_mem_access_put(gt_to_xe(gt)); 677 + err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); 678 + XE_WARN_ON(err); 679 + 680 + drm_info(&xe->drm, "GT reset done\n"); 681 + 682 + return 0; 683 + 684 + err_out: 685 + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); 686 + err_msg: 687 + XE_WARN_ON(xe_uc_start(&gt->uc)); 688 + xe_device_mem_access_put(gt_to_xe(gt)); 689 + drm_err(&xe->drm, "GT reset failed, err=%d\n", err); 690 + 691 + return err; 692 + } 693 + 694 + static void gt_reset_worker(struct work_struct *w) 695 + { 696 + struct xe_gt *gt = container_of(w, typeof(*gt), reset.worker); 697 + 698 + gt_reset(gt); 699 + } 700 + 701 + void xe_gt_reset_async(struct xe_gt *gt) 702 + { 703 + struct xe_device *xe = gt_to_xe(gt); 704 + 705 + drm_info(&xe->drm, "Try GT reset\n"); 706 + 707 + /* Don't do a reset while one is already in flight */ 708 + if (xe_uc_reset_prepare(&gt->uc)) 709 + return; 710 + 711 + drm_info(&xe->drm, "Doing GT reset\n"); 712 + queue_work(gt->ordered_wq, &gt->reset.worker); 713 + } 714 + 715 + void xe_gt_suspend_prepare(struct xe_gt *gt) 716 + { 717 + xe_device_mem_access_get(gt_to_xe(gt)); 718 + XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); 719 + 720 + xe_uc_stop_prepare(&gt->uc); 721 + 722 + xe_device_mem_access_put(gt_to_xe(gt)); 723 + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); 724 + } 725 + 726 + int xe_gt_suspend(struct xe_gt *gt) 727 + { 728 + struct xe_device *xe = gt_to_xe(gt); 729 + int err; 730 + 731 + /* For now suspend/resume is only allowed with GuC */ 732 + if (!xe_device_guc_submission_enabled(gt_to_xe(gt))) 733 + return -ENODEV; 734 + 735 + xe_device_mem_access_get(gt_to_xe(gt)); 736 + err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 737 + if (err) 738 + goto err_msg; 739 + 740 + err = xe_uc_suspend(&gt->uc); 741 + if (err) 742 + goto err_force_wake; 743 + 744 + xe_device_mem_access_put(gt_to_xe(gt)); 745 + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); 746 + drm_info(&xe->drm, "GT suspended\n"); 747 + 748 + return 0; 749 + 750 + err_force_wake: 751 + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); 752 + err_msg: 753 + xe_device_mem_access_put(gt_to_xe(gt)); 754 + drm_err(&xe->drm, "GT suspend failed: %d\n", err); 755 + 756 + return err; 757 + } 758 + 759 + int xe_gt_resume(struct xe_gt *gt) 760 + { 761 + struct xe_device *xe = gt_to_xe(gt); 762 + int err; 763 + 764 + xe_device_mem_access_get(gt_to_xe(gt)); 765 + err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 766 + if (err) 767 + goto err_msg; 768 + 769 + err = do_gt_restart(gt); 770 + if (err) 771 + goto err_force_wake; 772 + 773 + xe_device_mem_access_put(gt_to_xe(gt)); 774 + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); 775 + drm_info(&xe->drm, "GT resumed\n"); 776 + 777 + return 0; 778 + 779 + err_force_wake: 780 + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); 781 + err_msg: 782 + xe_device_mem_access_put(gt_to_xe(gt)); 783 + drm_err(&xe->drm, "GT resume failed: %d\n", err); 784 + 785 + return err; 786 + } 787 + 788 + void xe_gt_migrate_wait(struct xe_gt *gt) 789 + { 790 + xe_migrate_wait(gt->migrate); 791 + } 792 + 793 + struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt, 794 + enum xe_engine_class class, 795 + u16 instance, bool logical) 796 + { 797 + struct xe_hw_engine *hwe; 798 + enum xe_hw_engine_id id; 799 + 800 + for_each_hw_engine(hwe, gt, id) 801 + if (hwe->class == class && 802 + ((!logical && hwe->instance == instance) || 803 + (logical && hwe->logical_instance == instance))) 804 + return hwe; 805 + 806 + return NULL; 807 + } 808 + 809 + struct xe_hw_engine *xe_gt_any_hw_engine_by_reset_domain(struct xe_gt *gt, 810 + enum xe_engine_class class) 811 + { 812 + struct xe_hw_engine *hwe; 813 + enum xe_hw_engine_id id; 814 + 815 + for_each_hw_engine(hwe, gt, id) { 816 + switch (class) { 817 + case XE_ENGINE_CLASS_RENDER: 818 + case XE_ENGINE_CLASS_COMPUTE: 819 + if (hwe->class == XE_ENGINE_CLASS_RENDER || 820 + hwe->class == XE_ENGINE_CLASS_COMPUTE) 821 + return hwe; 822 + break; 823 + default: 824 + if (hwe->class == class) 825 + return hwe; 826 + } 827 + } 828 + 829 + return NULL; 830 + }

+64

drivers/gpu/drm/xe/xe_gt.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_GT_H_ 7 + #define _XE_GT_H_ 8 + 9 + #include <drm/drm_util.h> 10 + 11 + #include "xe_device_types.h" 12 + #include "xe_hw_engine.h" 13 + 14 + #define for_each_hw_engine(hwe__, gt__, id__) \ 15 + for ((id__) = 0; (id__) < ARRAY_SIZE((gt__)->hw_engines); (id__)++) \ 16 + for_each_if (((hwe__) = (gt__)->hw_engines + (id__)) && \ 17 + xe_hw_engine_is_valid((hwe__))) 18 + 19 + int xe_gt_alloc(struct xe_device *xe, struct xe_gt *gt); 20 + int xe_gt_init_early(struct xe_gt *gt); 21 + int xe_gt_init_noalloc(struct xe_gt *gt); 22 + int xe_gt_init(struct xe_gt *gt); 23 + int xe_gt_record_default_lrcs(struct xe_gt *gt); 24 + void xe_gt_suspend_prepare(struct xe_gt *gt); 25 + int xe_gt_suspend(struct xe_gt *gt); 26 + int xe_gt_resume(struct xe_gt *gt); 27 + void xe_gt_reset_async(struct xe_gt *gt); 28 + void xe_gt_migrate_wait(struct xe_gt *gt); 29 + 30 + struct xe_gt *xe_find_full_gt(struct xe_gt *gt); 31 + 32 + /** 33 + * xe_gt_any_hw_engine_by_reset_domain - scan the list of engines and return the 34 + * first that matches the same reset domain as @class 35 + * @gt: GT structure 36 + * @class: hw engine class to lookup 37 + */ 38 + struct xe_hw_engine * 39 + xe_gt_any_hw_engine_by_reset_domain(struct xe_gt *gt, enum xe_engine_class class); 40 + 41 + struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt, 42 + enum xe_engine_class class, 43 + u16 instance, 44 + bool logical); 45 + 46 + static inline bool xe_gt_is_media_type(struct xe_gt *gt) 47 + { 48 + return gt->info.type == XE_GT_TYPE_MEDIA; 49 + } 50 + 51 + static inline struct xe_device * gt_to_xe(struct xe_gt *gt) 52 + { 53 + return gt->xe; 54 + } 55 + 56 + static inline bool xe_gt_is_usm_hwe(struct xe_gt *gt, struct xe_hw_engine *hwe) 57 + { 58 + struct xe_device *xe = gt_to_xe(gt); 59 + 60 + return xe->info.supports_usm && hwe->class == XE_ENGINE_CLASS_COPY && 61 + hwe->instance == gt->usm.reserved_bcs_instance; 62 + } 63 + 64 + #endif

+83

drivers/gpu/drm/xe/xe_gt_clock.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include "i915_reg.h" 7 + #include "gt/intel_gt_regs.h" 8 + 9 + #include "xe_device.h" 10 + #include "xe_gt.h" 11 + #include "xe_gt_clock.h" 12 + #include "xe_macros.h" 13 + #include "xe_mmio.h" 14 + 15 + static u32 read_reference_ts_freq(struct xe_gt *gt) 16 + { 17 + u32 ts_override = xe_mmio_read32(gt, GEN9_TIMESTAMP_OVERRIDE.reg); 18 + u32 base_freq, frac_freq; 19 + 20 + base_freq = ((ts_override & GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK) >> 21 + GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT) + 1; 22 + base_freq *= 1000000; 23 + 24 + frac_freq = ((ts_override & 25 + GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK) >> 26 + GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT); 27 + frac_freq = 1000000 / (frac_freq + 1); 28 + 29 + return base_freq + frac_freq; 30 + } 31 + 32 + static u32 get_crystal_clock_freq(u32 rpm_config_reg) 33 + { 34 + const u32 f19_2_mhz = 19200000; 35 + const u32 f24_mhz = 24000000; 36 + const u32 f25_mhz = 25000000; 37 + const u32 f38_4_mhz = 38400000; 38 + u32 crystal_clock = 39 + (rpm_config_reg & GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >> 40 + GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT; 41 + 42 + switch (crystal_clock) { 43 + case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ: 44 + return f24_mhz; 45 + case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ: 46 + return f19_2_mhz; 47 + case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ: 48 + return f38_4_mhz; 49 + case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ: 50 + return f25_mhz; 51 + default: 52 + XE_BUG_ON("NOT_POSSIBLE"); 53 + return 0; 54 + } 55 + } 56 + 57 + int xe_gt_clock_init(struct xe_gt *gt) 58 + { 59 + u32 ctc_reg = xe_mmio_read32(gt, CTC_MODE.reg); 60 + u32 freq = 0; 61 + 62 + /* Assuming gen11+ so assert this assumption is correct */ 63 + XE_BUG_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11); 64 + 65 + if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) == CTC_SOURCE_DIVIDE_LOGIC) { 66 + freq = read_reference_ts_freq(gt); 67 + } else { 68 + u32 c0 = xe_mmio_read32(gt, RPM_CONFIG0.reg); 69 + 70 + freq = get_crystal_clock_freq(c0); 71 + 72 + /* 73 + * Now figure out how the command stream's timestamp 74 + * register increments from this frequency (it might 75 + * increment only every few clock cycle). 76 + */ 77 + freq >>= 3 - ((c0 & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> 78 + GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT); 79 + } 80 + 81 + gt->info.clock_freq = freq; 82 + return 0; 83 + }

+13

drivers/gpu/drm/xe/xe_gt_clock.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_GT_CLOCK_H_ 7 + #define _XE_GT_CLOCK_H_ 8 + 9 + struct xe_gt; 10 + 11 + int xe_gt_clock_init(struct xe_gt *gt); 12 + 13 + #endif

+160

drivers/gpu/drm/xe/xe_gt_debugfs.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include <drm/drm_debugfs.h> 7 + #include <drm/drm_managed.h> 8 + 9 + #include "xe_device.h" 10 + #include "xe_force_wake.h" 11 + #include "xe_gt.h" 12 + #include "xe_gt_debugfs.h" 13 + #include "xe_gt_mcr.h" 14 + #include "xe_gt_pagefault.h" 15 + #include "xe_gt_topology.h" 16 + #include "xe_hw_engine.h" 17 + #include "xe_macros.h" 18 + #include "xe_uc_debugfs.h" 19 + 20 + static struct xe_gt *node_to_gt(struct drm_info_node *node) 21 + { 22 + return node->info_ent->data; 23 + } 24 + 25 + static int hw_engines(struct seq_file *m, void *data) 26 + { 27 + struct xe_gt *gt = node_to_gt(m->private); 28 + struct xe_device *xe = gt_to_xe(gt); 29 + struct drm_printer p = drm_seq_file_printer(m); 30 + struct xe_hw_engine *hwe; 31 + enum xe_hw_engine_id id; 32 + int err; 33 + 34 + xe_device_mem_access_get(xe); 35 + err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 36 + if (err) { 37 + xe_device_mem_access_put(xe); 38 + return err; 39 + } 40 + 41 + for_each_hw_engine(hwe, gt, id) 42 + xe_hw_engine_print_state(hwe, &p); 43 + 44 + xe_device_mem_access_put(xe); 45 + err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); 46 + if (err) 47 + return err; 48 + 49 + return 0; 50 + } 51 + 52 + static int force_reset(struct seq_file *m, void *data) 53 + { 54 + struct xe_gt *gt = node_to_gt(m->private); 55 + 56 + xe_gt_reset_async(gt); 57 + 58 + return 0; 59 + } 60 + 61 + static int sa_info(struct seq_file *m, void *data) 62 + { 63 + struct xe_gt *gt = node_to_gt(m->private); 64 + struct drm_printer p = drm_seq_file_printer(m); 65 + 66 + drm_suballoc_dump_debug_info(&gt->kernel_bb_pool.base, &p, 67 + gt->kernel_bb_pool.gpu_addr); 68 + 69 + return 0; 70 + } 71 + 72 + static int topology(struct seq_file *m, void *data) 73 + { 74 + struct xe_gt *gt = node_to_gt(m->private); 75 + struct drm_printer p = drm_seq_file_printer(m); 76 + 77 + xe_gt_topology_dump(gt, &p); 78 + 79 + return 0; 80 + } 81 + 82 + static int steering(struct seq_file *m, void *data) 83 + { 84 + struct xe_gt *gt = node_to_gt(m->private); 85 + struct drm_printer p = drm_seq_file_printer(m); 86 + 87 + xe_gt_mcr_steering_dump(gt, &p); 88 + 89 + return 0; 90 + } 91 + 92 + #ifdef CONFIG_DRM_XE_DEBUG 93 + static int invalidate_tlb(struct seq_file *m, void *data) 94 + { 95 + struct xe_gt *gt = node_to_gt(m->private); 96 + int seqno; 97 + int ret = 0; 98 + 99 + seqno = xe_gt_tlb_invalidation(gt); 100 + XE_WARN_ON(seqno < 0); 101 + if (seqno > 0) 102 + ret = xe_gt_tlb_invalidation_wait(gt, seqno); 103 + XE_WARN_ON(ret < 0); 104 + 105 + return 0; 106 + } 107 + #endif 108 + 109 + static const struct drm_info_list debugfs_list[] = { 110 + {"hw_engines", hw_engines, 0}, 111 + {"force_reset", force_reset, 0}, 112 + {"sa_info", sa_info, 0}, 113 + {"topology", topology, 0}, 114 + {"steering", steering, 0}, 115 + #ifdef CONFIG_DRM_XE_DEBUG 116 + {"invalidate_tlb", invalidate_tlb, 0}, 117 + #endif 118 + }; 119 + 120 + void xe_gt_debugfs_register(struct xe_gt *gt) 121 + { 122 + struct drm_minor *minor = gt_to_xe(gt)->drm.primary; 123 + struct dentry *root; 124 + struct drm_info_list *local; 125 + char name[8]; 126 + int i; 127 + 128 + XE_BUG_ON(!minor->debugfs_root); 129 + 130 + sprintf(name, "gt%d", gt->info.id); 131 + root = debugfs_create_dir(name, minor->debugfs_root); 132 + if (IS_ERR(root)) { 133 + XE_WARN_ON("Create GT directory failed"); 134 + return; 135 + } 136 + 137 + /* 138 + * Allocate local copy as we need to pass in the GT to the debugfs 139 + * entry and drm_debugfs_create_files just references the drm_info_list 140 + * passed in (e.g. can't define this on the stack). 141 + */ 142 + #define DEBUGFS_SIZE ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list) 143 + local = drmm_kmalloc(&gt_to_xe(gt)->drm, DEBUGFS_SIZE, GFP_KERNEL); 144 + if (!local) { 145 + XE_WARN_ON("Couldn't allocate memory"); 146 + return; 147 + } 148 + 149 + memcpy(local, debugfs_list, DEBUGFS_SIZE); 150 + #undef DEBUGFS_SIZE 151 + 152 + for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i) 153 + local[i].data = gt; 154 + 155 + drm_debugfs_create_files(local, 156 + ARRAY_SIZE(debugfs_list), 157 + root, minor); 158 + 159 + xe_uc_debugfs_register(&gt->uc, root); 160 + }

+13

drivers/gpu/drm/xe/xe_gt_debugfs.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_GT_DEBUGFS_H_ 7 + #define _XE_GT_DEBUGFS_H_ 8 + 9 + struct xe_gt; 10 + 11 + void xe_gt_debugfs_register(struct xe_gt *gt); 12 + 13 + #endif

+552

drivers/gpu/drm/xe/xe_gt_mcr.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include "xe_gt.h" 7 + #include "xe_gt_mcr.h" 8 + #include "xe_gt_topology.h" 9 + #include "xe_gt_types.h" 10 + #include "xe_mmio.h" 11 + 12 + #include "gt/intel_gt_regs.h" 13 + 14 + /** 15 + * DOC: GT Multicast/Replicated (MCR) Register Support 16 + * 17 + * Some GT registers are designed as "multicast" or "replicated" registers: 18 + * multiple instances of the same register share a single MMIO offset. MCR 19 + * registers are generally used when the hardware needs to potentially track 20 + * independent values of a register per hardware unit (e.g., per-subslice, 21 + * per-L3bank, etc.). The specific types of replication that exist vary 22 + * per-platform. 23 + * 24 + * MMIO accesses to MCR registers are controlled according to the settings 25 + * programmed in the platform's MCR_SELECTOR register(s). MMIO writes to MCR 26 + * registers can be done in either a (i.e., a single write updates all 27 + * instances of the register to the same value) or unicast (a write updates only 28 + * one specific instance). Reads of MCR registers always operate in a unicast 29 + * manner regardless of how the multicast/unicast bit is set in MCR_SELECTOR. 30 + * Selection of a specific MCR instance for unicast operations is referred to 31 + * as "steering." 32 + * 33 + * If MCR register operations are steered toward a hardware unit that is 34 + * fused off or currently powered down due to power gating, the MMIO operation 35 + * is "terminated" by the hardware. Terminated read operations will return a 36 + * value of zero and terminated unicast write operations will be silently 37 + * ignored. 38 + */ 39 + 40 + enum { 41 + MCR_OP_READ, 42 + MCR_OP_WRITE 43 + }; 44 + 45 + static const struct xe_mmio_range xelp_l3bank_steering_table[] = { 46 + { 0x00B100, 0x00B3FF }, 47 + {}, 48 + }; 49 + 50 + /* 51 + * Although the bspec lists more "MSLICE" ranges than shown here, some of those 52 + * are of a "GAM" subclass that has special rules and doesn't need to be 53 + * included here. 54 + */ 55 + static const struct xe_mmio_range xehp_mslice_steering_table[] = { 56 + { 0x00DD00, 0x00DDFF }, 57 + { 0x00E900, 0x00FFFF }, /* 0xEA00 - OxEFFF is unused */ 58 + {}, 59 + }; 60 + 61 + static const struct xe_mmio_range xehp_lncf_steering_table[] = { 62 + { 0x00B000, 0x00B0FF }, 63 + { 0x00D880, 0x00D8FF }, 64 + {}, 65 + }; 66 + 67 + /* 68 + * We have several types of MCR registers where steering to (0,0) will always 69 + * provide us with a non-terminated value. We'll stick them all in the same 70 + * table for simplicity. 71 + */ 72 + static const struct xe_mmio_range xehpc_instance0_steering_table[] = { 73 + { 0x004000, 0x004AFF }, /* HALF-BSLICE */ 74 + { 0x008800, 0x00887F }, /* CC */ 75 + { 0x008A80, 0x008AFF }, /* TILEPSMI */ 76 + { 0x00B000, 0x00B0FF }, /* HALF-BSLICE */ 77 + { 0x00B100, 0x00B3FF }, /* L3BANK */ 78 + { 0x00C800, 0x00CFFF }, /* HALF-BSLICE */ 79 + { 0x00D800, 0x00D8FF }, /* HALF-BSLICE */ 80 + { 0x00DD00, 0x00DDFF }, /* BSLICE */ 81 + { 0x00E900, 0x00E9FF }, /* HALF-BSLICE */ 82 + { 0x00EC00, 0x00EEFF }, /* HALF-BSLICE */ 83 + { 0x00F000, 0x00FFFF }, /* HALF-BSLICE */ 84 + { 0x024180, 0x0241FF }, /* HALF-BSLICE */ 85 + {}, 86 + }; 87 + 88 + static const struct xe_mmio_range xelpg_instance0_steering_table[] = { 89 + { 0x000B00, 0x000BFF }, /* SQIDI */ 90 + { 0x001000, 0x001FFF }, /* SQIDI */ 91 + { 0x004000, 0x0048FF }, /* GAM */ 92 + { 0x008700, 0x0087FF }, /* SQIDI */ 93 + { 0x00B000, 0x00B0FF }, /* NODE */ 94 + { 0x00C800, 0x00CFFF }, /* GAM */ 95 + { 0x00D880, 0x00D8FF }, /* NODE */ 96 + { 0x00DD00, 0x00DDFF }, /* OAAL2 */ 97 + {}, 98 + }; 99 + 100 + static const struct xe_mmio_range xelpg_l3bank_steering_table[] = { 101 + { 0x00B100, 0x00B3FF }, 102 + {}, 103 + }; 104 + 105 + static const struct xe_mmio_range xelp_dss_steering_table[] = { 106 + { 0x008150, 0x00815F }, 107 + { 0x009520, 0x00955F }, 108 + { 0x00DE80, 0x00E8FF }, 109 + { 0x024A00, 0x024A7F }, 110 + {}, 111 + }; 112 + 113 + /* DSS steering is used for GSLICE ranges as well */ 114 + static const struct xe_mmio_range xehp_dss_steering_table[] = { 115 + { 0x005200, 0x0052FF }, /* GSLICE */ 116 + { 0x005400, 0x007FFF }, /* GSLICE */ 117 + { 0x008140, 0x00815F }, /* GSLICE (0x8140-0x814F), DSS (0x8150-0x815F) */ 118 + { 0x008D00, 0x008DFF }, /* DSS */ 119 + { 0x0094D0, 0x00955F }, /* GSLICE (0x94D0-0x951F), DSS (0x9520-0x955F) */ 120 + { 0x009680, 0x0096FF }, /* DSS */ 121 + { 0x00D800, 0x00D87F }, /* GSLICE */ 122 + { 0x00DC00, 0x00DCFF }, /* GSLICE */ 123 + { 0x00DE80, 0x00E8FF }, /* DSS (0xE000-0xE0FF reserved ) */ 124 + { 0x017000, 0x017FFF }, /* GSLICE */ 125 + { 0x024A00, 0x024A7F }, /* DSS */ 126 + {}, 127 + }; 128 + 129 + /* DSS steering is used for COMPUTE ranges as well */ 130 + static const struct xe_mmio_range xehpc_dss_steering_table[] = { 131 + { 0x008140, 0x00817F }, /* COMPUTE (0x8140-0x814F & 0x8160-0x817F), DSS (0x8150-0x815F) */ 132 + { 0x0094D0, 0x00955F }, /* COMPUTE (0x94D0-0x951F), DSS (0x9520-0x955F) */ 133 + { 0x009680, 0x0096FF }, /* DSS */ 134 + { 0x00DC00, 0x00DCFF }, /* COMPUTE */ 135 + { 0x00DE80, 0x00E7FF }, /* DSS (0xDF00-0xE1FF reserved ) */ 136 + {}, 137 + }; 138 + 139 + /* DSS steering is used for SLICE ranges as well */ 140 + static const struct xe_mmio_range xelpg_dss_steering_table[] = { 141 + { 0x005200, 0x0052FF }, /* SLICE */ 142 + { 0x005500, 0x007FFF }, /* SLICE */ 143 + { 0x008140, 0x00815F }, /* SLICE (0x8140-0x814F), DSS (0x8150-0x815F) */ 144 + { 0x0094D0, 0x00955F }, /* SLICE (0x94D0-0x951F), DSS (0x9520-0x955F) */ 145 + { 0x009680, 0x0096FF }, /* DSS */ 146 + { 0x00D800, 0x00D87F }, /* SLICE */ 147 + { 0x00DC00, 0x00DCFF }, /* SLICE */ 148 + { 0x00DE80, 0x00E8FF }, /* DSS (0xE000-0xE0FF reserved) */ 149 + {}, 150 + }; 151 + 152 + static const struct xe_mmio_range xelpmp_oaddrm_steering_table[] = { 153 + { 0x393200, 0x39323F }, 154 + { 0x393400, 0x3934FF }, 155 + {}, 156 + }; 157 + 158 + /* 159 + * DG2 GAM registers are a special case; this table is checked directly in 160 + * xe_gt_mcr_get_nonterminated_steering and is not hooked up via 161 + * gt->steering[]. 162 + */ 163 + static const struct xe_mmio_range dg2_gam_ranges[] = { 164 + { 0x004000, 0x004AFF }, 165 + { 0x00C800, 0x00CFFF }, 166 + { 0x00F000, 0x00FFFF }, 167 + {}, 168 + }; 169 + 170 + static void init_steering_l3bank(struct xe_gt *gt) 171 + { 172 + if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) { 173 + u32 mslice_mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK, 174 + xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg)); 175 + u32 bank_mask = REG_FIELD_GET(GT_L3_EXC_MASK, 176 + xe_mmio_read32(gt, XEHP_FUSE4.reg)); 177 + 178 + /* 179 + * Group selects mslice, instance selects bank within mslice. 180 + * Bank 0 is always valid _except_ when the bank mask is 010b. 181 + */ 182 + gt->steering[L3BANK].group_target = __ffs(mslice_mask); 183 + gt->steering[L3BANK].instance_target = 184 + bank_mask & BIT(0) ? 0 : 2; 185 + } else { 186 + u32 fuse = REG_FIELD_GET(GEN10_L3BANK_MASK, 187 + ~xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg)); 188 + 189 + gt->steering[L3BANK].group_target = 0; /* unused */ 190 + gt->steering[L3BANK].instance_target = __ffs(fuse); 191 + } 192 + } 193 + 194 + static void init_steering_mslice(struct xe_gt *gt) 195 + { 196 + u32 mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK, 197 + xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg)); 198 + 199 + /* 200 + * mslice registers are valid (not terminated) if either the meml3 201 + * associated with the mslice is present, or at least one DSS associated 202 + * with the mslice is present. There will always be at least one meml3 203 + * so we can just use that to find a non-terminated mslice and ignore 204 + * the DSS fusing. 205 + */ 206 + gt->steering[MSLICE].group_target = __ffs(mask); 207 + gt->steering[MSLICE].instance_target = 0; /* unused */ 208 + 209 + /* 210 + * LNCF termination is also based on mslice presence, so we'll set 211 + * it up here. Either LNCF within a non-terminated mslice will work, 212 + * so we just always pick LNCF 0 here. 213 + */ 214 + gt->steering[LNCF].group_target = __ffs(mask) << 1; 215 + gt->steering[LNCF].instance_target = 0; /* unused */ 216 + } 217 + 218 + static void init_steering_dss(struct xe_gt *gt) 219 + { 220 + unsigned int dss = min(xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0), 221 + xe_dss_mask_group_ffs(gt->fuse_topo.c_dss_mask, 0, 0)); 222 + unsigned int dss_per_grp = gt_to_xe(gt)->info.platform == XE_PVC ? 8 : 4; 223 + 224 + gt->steering[DSS].group_target = dss / dss_per_grp; 225 + gt->steering[DSS].instance_target = dss % dss_per_grp; 226 + } 227 + 228 + static void init_steering_oaddrm(struct xe_gt *gt) 229 + { 230 + /* 231 + * First instance is only terminated if the entire first media slice 232 + * is absent (i.e., no VCS0 or VECS0). 233 + */ 234 + if (gt->info.engine_mask & (XE_HW_ENGINE_VCS0 | XE_HW_ENGINE_VECS0)) 235 + gt->steering[OADDRM].group_target = 0; 236 + else 237 + gt->steering[OADDRM].group_target = 1; 238 + 239 + gt->steering[DSS].instance_target = 0; /* unused */ 240 + } 241 + 242 + static void init_steering_inst0(struct xe_gt *gt) 243 + { 244 + gt->steering[DSS].group_target = 0; /* unused */ 245 + gt->steering[DSS].instance_target = 0; /* unused */ 246 + } 247 + 248 + static const struct { 249 + const char *name; 250 + void (*init)(struct xe_gt *); 251 + } xe_steering_types[] = { 252 + { "L3BANK", init_steering_l3bank }, 253 + { "MSLICE", init_steering_mslice }, 254 + { "LNCF", NULL }, /* initialized by mslice init */ 255 + { "DSS", init_steering_dss }, 256 + { "OADDRM", init_steering_oaddrm }, 257 + { "INSTANCE 0", init_steering_inst0 }, 258 + }; 259 + 260 + void xe_gt_mcr_init(struct xe_gt *gt) 261 + { 262 + struct xe_device *xe = gt_to_xe(gt); 263 + 264 + BUILD_BUG_ON(ARRAY_SIZE(xe_steering_types) != NUM_STEERING_TYPES); 265 + 266 + spin_lock_init(&gt->mcr_lock); 267 + 268 + if (gt->info.type == XE_GT_TYPE_MEDIA) { 269 + drm_WARN_ON(&xe->drm, MEDIA_VER(xe) < 13); 270 + 271 + gt->steering[OADDRM].ranges = xelpmp_oaddrm_steering_table; 272 + } else if (GRAPHICS_VERx100(xe) >= 1270) { 273 + gt->steering[INSTANCE0].ranges = xelpg_instance0_steering_table; 274 + gt->steering[L3BANK].ranges = xelpg_l3bank_steering_table; 275 + gt->steering[DSS].ranges = xelpg_dss_steering_table; 276 + } else if (xe->info.platform == XE_PVC) { 277 + gt->steering[INSTANCE0].ranges = xehpc_instance0_steering_table; 278 + gt->steering[DSS].ranges = xehpc_dss_steering_table; 279 + } else if (xe->info.platform == XE_DG2) { 280 + gt->steering[MSLICE].ranges = xehp_mslice_steering_table; 281 + gt->steering[LNCF].ranges = xehp_lncf_steering_table; 282 + gt->steering[DSS].ranges = xehp_dss_steering_table; 283 + } else { 284 + gt->steering[L3BANK].ranges = xelp_l3bank_steering_table; 285 + gt->steering[DSS].ranges = xelp_dss_steering_table; 286 + } 287 + 288 + /* Select non-terminated steering target for each type */ 289 + for (int i = 0; i < NUM_STEERING_TYPES; i++) 290 + if (gt->steering[i].ranges && xe_steering_types[i].init) 291 + xe_steering_types[i].init(gt); 292 + } 293 + 294 + /* 295 + * xe_gt_mcr_get_nonterminated_steering - find group/instance values that 296 + * will steer a register to a non-terminated instance 297 + * @gt: GT structure 298 + * @reg: register for which the steering is required 299 + * @group: return variable for group steering 300 + * @instance: return variable for instance steering 301 + * 302 + * This function returns a group/instance pair that is guaranteed to work for 303 + * read steering of the given register. Note that a value will be returned even 304 + * if the register is not replicated and therefore does not actually require 305 + * steering. 306 + * 307 + * Returns true if the caller should steer to the @group/@instance values 308 + * returned. Returns false if the caller need not perform any steering (i.e., 309 + * the DG2 GAM range special case). 310 + */ 311 + static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, 312 + i915_mcr_reg_t reg, 313 + u8 *group, u8 *instance) 314 + { 315 + for (int type = 0; type < NUM_STEERING_TYPES; type++) { 316 + if (!gt->steering[type].ranges) 317 + continue; 318 + 319 + for (int i = 0; gt->steering[type].ranges[i].end > 0; i++) { 320 + if (xe_mmio_in_range(&gt->steering[type].ranges[i], reg.reg)) { 321 + *group = gt->steering[type].group_target; 322 + *instance = gt->steering[type].instance_target; 323 + return true; 324 + } 325 + } 326 + } 327 + 328 + /* 329 + * All MCR registers should usually be part of one of the steering 330 + * ranges we're tracking. However there's one special case: DG2 331 + * GAM registers are technically multicast registers, but are special 332 + * in a number of ways: 333 + * - they have their own dedicated steering control register (they 334 + * don't share 0xFDC with other MCR classes) 335 + * - all reads should be directed to instance 1 (unicast reads against 336 + * other instances are not allowed), and instance 1 is already the 337 + * the hardware's default steering target, which we never change 338 + * 339 + * Ultimately this means that we can just treat them as if they were 340 + * unicast registers and all operations will work properly. 341 + */ 342 + for (int i = 0; dg2_gam_ranges[i].end > 0; i++) 343 + if (xe_mmio_in_range(&dg2_gam_ranges[i], reg.reg)) 344 + return false; 345 + 346 + /* 347 + * Not found in a steering table and not a DG2 GAM register? We'll 348 + * just steer to 0/0 as a guess and raise a warning. 349 + */ 350 + drm_WARN(&gt_to_xe(gt)->drm, true, 351 + "Did not find MCR register %#x in any MCR steering table\n", 352 + reg.reg); 353 + *group = 0; 354 + *instance = 0; 355 + 356 + return true; 357 + } 358 + 359 + #define STEER_SEMAPHORE 0xFD0 360 + 361 + /* 362 + * Obtain exclusive access to MCR steering. On MTL and beyond we also need 363 + * to synchronize with external clients (e.g., firmware), so a semaphore 364 + * register will also need to be taken. 365 + */ 366 + static void mcr_lock(struct xe_gt *gt) 367 + { 368 + struct xe_device *xe = gt_to_xe(gt); 369 + int ret; 370 + 371 + spin_lock(&gt->mcr_lock); 372 + 373 + /* 374 + * Starting with MTL we also need to grab a semaphore register 375 + * to synchronize with external agents (e.g., firmware) that now 376 + * shares the same steering control register. 377 + */ 378 + if (GRAPHICS_VERx100(xe) >= 1270) 379 + ret = wait_for_us(xe_mmio_read32(gt, STEER_SEMAPHORE) == 0x1, 10); 380 + 381 + drm_WARN_ON_ONCE(&xe->drm, ret == -ETIMEDOUT); 382 + } 383 + 384 + static void mcr_unlock(struct xe_gt *gt) { 385 + /* Release hardware semaphore */ 386 + if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) 387 + xe_mmio_write32(gt, STEER_SEMAPHORE, 0x1); 388 + 389 + spin_unlock(&gt->mcr_lock); 390 + } 391 + 392 + /* 393 + * Access a register with specific MCR steering 394 + * 395 + * Caller needs to make sure the relevant forcewake wells are up. 396 + */ 397 + static u32 rw_with_mcr_steering(struct xe_gt *gt, i915_mcr_reg_t reg, u8 rw_flag, 398 + int group, int instance, u32 value) 399 + { 400 + u32 steer_reg, steer_val, val = 0; 401 + 402 + lockdep_assert_held(&gt->mcr_lock); 403 + 404 + if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) { 405 + steer_reg = MTL_MCR_SELECTOR.reg; 406 + steer_val = REG_FIELD_PREP(MTL_MCR_GROUPID, group) | 407 + REG_FIELD_PREP(MTL_MCR_INSTANCEID, instance); 408 + } else { 409 + steer_reg = GEN8_MCR_SELECTOR.reg; 410 + steer_val = REG_FIELD_PREP(GEN11_MCR_SLICE_MASK, group) | 411 + REG_FIELD_PREP(GEN11_MCR_SUBSLICE_MASK, instance); 412 + } 413 + 414 + /* 415 + * Always leave the hardware in multicast mode when doing reads 416 + * (see comment about Wa_22013088509 below) and only change it 417 + * to unicast mode when doing writes of a specific instance. 418 + * 419 + * No need to save old steering reg value. 420 + */ 421 + if (rw_flag == MCR_OP_READ) 422 + steer_val |= GEN11_MCR_MULTICAST; 423 + 424 + xe_mmio_write32(gt, steer_reg, steer_val); 425 + 426 + if (rw_flag == MCR_OP_READ) 427 + val = xe_mmio_read32(gt, reg.reg); 428 + else 429 + xe_mmio_write32(gt, reg.reg, value); 430 + 431 + /* 432 + * If we turned off the multicast bit (during a write) we're required 433 + * to turn it back on before finishing. The group and instance values 434 + * don't matter since they'll be re-programmed on the next MCR 435 + * operation. 436 + */ 437 + if (rw_flag == MCR_OP_WRITE) 438 + xe_mmio_write32(gt, steer_reg, GEN11_MCR_MULTICAST); 439 + 440 + return val; 441 + } 442 + 443 + /** 444 + * xe_gt_mcr_unicast_read_any - reads a non-terminated instance of an MCR register 445 + * @gt: GT structure 446 + * @reg: register to read 447 + * 448 + * Reads a GT MCR register. The read will be steered to a non-terminated 449 + * instance (i.e., one that isn't fused off or powered down by power gating). 450 + * This function assumes the caller is already holding any necessary forcewake 451 + * domains. 452 + * 453 + * Returns the value from a non-terminated instance of @reg. 454 + */ 455 + u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, i915_mcr_reg_t reg) 456 + { 457 + u8 group, instance; 458 + u32 val; 459 + bool steer; 460 + 461 + steer = xe_gt_mcr_get_nonterminated_steering(gt, reg, &group, &instance); 462 + 463 + if (steer) { 464 + mcr_lock(gt); 465 + val = rw_with_mcr_steering(gt, reg, MCR_OP_READ, 466 + group, instance, 0); 467 + mcr_unlock(gt); 468 + } else { 469 + /* DG2 GAM special case rules; treat as if unicast */ 470 + val = xe_mmio_read32(gt, reg.reg); 471 + } 472 + 473 + return val; 474 + } 475 + 476 + /** 477 + * xe_gt_mcr_unicast_read - read a specific instance of an MCR register 478 + * @gt: GT structure 479 + * @reg: the MCR register to read 480 + * @group: the MCR group 481 + * @instance: the MCR instance 482 + * 483 + * Returns the value read from an MCR register after steering toward a specific 484 + * group/instance. 485 + */ 486 + u32 xe_gt_mcr_unicast_read(struct xe_gt *gt, 487 + i915_mcr_reg_t reg, 488 + int group, int instance) 489 + { 490 + u32 val; 491 + 492 + mcr_lock(gt); 493 + val = rw_with_mcr_steering(gt, reg, MCR_OP_READ, group, instance, 0); 494 + mcr_unlock(gt); 495 + 496 + return val; 497 + } 498 + 499 + /** 500 + * xe_gt_mcr_unicast_write - write a specific instance of an MCR register 501 + * @gt: GT structure 502 + * @reg: the MCR register to write 503 + * @value: value to write 504 + * @group: the MCR group 505 + * @instance: the MCR instance 506 + * 507 + * Write an MCR register in unicast mode after steering toward a specific 508 + * group/instance. 509 + */ 510 + void xe_gt_mcr_unicast_write(struct xe_gt *gt, i915_mcr_reg_t reg, u32 value, 511 + int group, int instance) 512 + { 513 + mcr_lock(gt); 514 + rw_with_mcr_steering(gt, reg, MCR_OP_WRITE, group, instance, value); 515 + mcr_unlock(gt); 516 + } 517 + 518 + /** 519 + * xe_gt_mcr_multicast_write - write a value to all instances of an MCR register 520 + * @gt: GT structure 521 + * @reg: the MCR register to write 522 + * @value: value to write 523 + * 524 + * Write an MCR register in multicast mode to update all instances. 525 + */ 526 + void xe_gt_mcr_multicast_write(struct xe_gt *gt, i915_mcr_reg_t reg, u32 value) 527 + { 528 + /* 529 + * Synchronize with any unicast operations. Once we have exclusive 530 + * access, the MULTICAST bit should already be set, so there's no need 531 + * to touch the steering register. 532 + */ 533 + mcr_lock(gt); 534 + xe_mmio_write32(gt, reg.reg, value); 535 + mcr_unlock(gt); 536 + } 537 + 538 + void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p) 539 + { 540 + for (int i = 0; i < NUM_STEERING_TYPES; i++) { 541 + if (gt->steering[i].ranges) { 542 + drm_printf(p, "%s steering: group=%#x, instance=%#x\n", 543 + xe_steering_types[i].name, 544 + gt->steering[i].group_target, 545 + gt->steering[i].instance_target); 546 + for (int j = 0; gt->steering[i].ranges[j].end; j++) 547 + drm_printf(p, "\t0x%06x - 0x%06x\n", 548 + gt->steering[i].ranges[j].start, 549 + gt->steering[i].ranges[j].end); 550 + } 551 + } 552 + }

+26

drivers/gpu/drm/xe/xe_gt_mcr.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_GT_MCR_H_ 7 + #define _XE_GT_MCR_H_ 8 + 9 + #include "i915_reg_defs.h" 10 + 11 + struct drm_printer; 12 + struct xe_gt; 13 + 14 + void xe_gt_mcr_init(struct xe_gt *gt); 15 + 16 + u32 xe_gt_mcr_unicast_read(struct xe_gt *gt, i915_mcr_reg_t reg, 17 + int group, int instance); 18 + u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, i915_mcr_reg_t reg); 19 + 20 + void xe_gt_mcr_unicast_write(struct xe_gt *gt, i915_mcr_reg_t reg, u32 value, 21 + int group, int instance); 22 + void xe_gt_mcr_multicast_write(struct xe_gt *gt, i915_mcr_reg_t reg, u32 value); 23 + 24 + void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p); 25 + 26 + #endif /* _XE_GT_MCR_H_ */

+750

drivers/gpu/drm/xe/xe_gt_pagefault.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include <linux/circ_buf.h> 7 + 8 + #include <drm/drm_managed.h> 9 + #include <drm/ttm/ttm_execbuf_util.h> 10 + 11 + #include "xe_bo.h" 12 + #include "xe_gt.h" 13 + #include "xe_guc.h" 14 + #include "xe_guc_ct.h" 15 + #include "xe_gt_pagefault.h" 16 + #include "xe_migrate.h" 17 + #include "xe_pt.h" 18 + #include "xe_trace.h" 19 + #include "xe_vm.h" 20 + 21 + struct pagefault { 22 + u64 page_addr; 23 + u32 asid; 24 + u16 pdata; 25 + u8 vfid; 26 + u8 access_type; 27 + u8 fault_type; 28 + u8 fault_level; 29 + u8 engine_class; 30 + u8 engine_instance; 31 + u8 fault_unsuccessful; 32 + }; 33 + 34 + enum access_type { 35 + ACCESS_TYPE_READ = 0, 36 + ACCESS_TYPE_WRITE = 1, 37 + ACCESS_TYPE_ATOMIC = 2, 38 + ACCESS_TYPE_RESERVED = 3, 39 + }; 40 + 41 + enum fault_type { 42 + NOT_PRESENT = 0, 43 + WRITE_ACCESS_VIOLATION = 1, 44 + ATOMIC_ACCESS_VIOLATION = 2, 45 + }; 46 + 47 + struct acc { 48 + u64 va_range_base; 49 + u32 asid; 50 + u32 sub_granularity; 51 + u8 granularity; 52 + u8 vfid; 53 + u8 access_type; 54 + u8 engine_class; 55 + u8 engine_instance; 56 + }; 57 + 58 + static struct xe_gt * 59 + guc_to_gt(struct xe_guc *guc) 60 + { 61 + return container_of(guc, struct xe_gt, uc.guc); 62 + } 63 + 64 + static int send_tlb_invalidation(struct xe_guc *guc) 65 + { 66 + struct xe_gt *gt = guc_to_gt(guc); 67 + u32 action[] = { 68 + XE_GUC_ACTION_TLB_INVALIDATION, 69 + 0, 70 + XE_GUC_TLB_INVAL_FULL << XE_GUC_TLB_INVAL_TYPE_SHIFT | 71 + XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | 72 + XE_GUC_TLB_INVAL_FLUSH_CACHE, 73 + }; 74 + int seqno; 75 + int ret; 76 + 77 + /* 78 + * XXX: The seqno algorithm relies on TLB invalidation being processed 79 + * in order which they currently are, if that changes the algorithm will 80 + * need to be updated. 81 + */ 82 + mutex_lock(&guc->ct.lock); 83 + seqno = gt->usm.tlb_invalidation_seqno; 84 + action[1] = seqno; 85 + gt->usm.tlb_invalidation_seqno = (gt->usm.tlb_invalidation_seqno + 1) % 86 + TLB_INVALIDATION_SEQNO_MAX; 87 + if (!gt->usm.tlb_invalidation_seqno) 88 + gt->usm.tlb_invalidation_seqno = 1; 89 + ret = xe_guc_ct_send_locked(&guc->ct, action, ARRAY_SIZE(action), 90 + G2H_LEN_DW_TLB_INVALIDATE, 1); 91 + if (!ret) 92 + ret = seqno; 93 + mutex_unlock(&guc->ct.lock); 94 + 95 + return ret; 96 + } 97 + 98 + static bool access_is_atomic(enum access_type access_type) 99 + { 100 + return access_type == ACCESS_TYPE_ATOMIC; 101 + } 102 + 103 + static bool vma_is_valid(struct xe_gt *gt, struct xe_vma *vma) 104 + { 105 + return BIT(gt->info.id) & vma->gt_present && 106 + !(BIT(gt->info.id) & vma->usm.gt_invalidated); 107 + } 108 + 109 + static bool vma_matches(struct xe_vma *vma, struct xe_vma *lookup) 110 + { 111 + if (lookup->start > vma->end || lookup->end < vma->start) 112 + return false; 113 + 114 + return true; 115 + } 116 + 117 + static bool only_needs_bo_lock(struct xe_bo *bo) 118 + { 119 + return bo && bo->vm; 120 + } 121 + 122 + static struct xe_vma *lookup_vma(struct xe_vm *vm, u64 page_addr) 123 + { 124 + struct xe_vma *vma = NULL, lookup; 125 + 126 + lookup.start = page_addr; 127 + lookup.end = lookup.start + SZ_4K - 1; 128 + if (vm->usm.last_fault_vma) { /* Fast lookup */ 129 + if (vma_matches(vm->usm.last_fault_vma, &lookup)) 130 + vma = vm->usm.last_fault_vma; 131 + } 132 + if (!vma) 133 + vma = xe_vm_find_overlapping_vma(vm, &lookup); 134 + 135 + return vma; 136 + } 137 + 138 + static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) 139 + { 140 + struct xe_device *xe = gt_to_xe(gt); 141 + struct xe_vm *vm; 142 + struct xe_vma *vma = NULL; 143 + struct xe_bo *bo; 144 + LIST_HEAD(objs); 145 + LIST_HEAD(dups); 146 + struct ttm_validate_buffer tv_bo, tv_vm; 147 + struct ww_acquire_ctx ww; 148 + struct dma_fence *fence; 149 + bool write_locked; 150 + int ret = 0; 151 + bool atomic; 152 + 153 + /* ASID to VM */ 154 + mutex_lock(&xe->usm.lock); 155 + vm = xa_load(&xe->usm.asid_to_vm, pf->asid); 156 + if (vm) 157 + xe_vm_get(vm); 158 + mutex_unlock(&xe->usm.lock); 159 + if (!vm || !xe_vm_in_fault_mode(vm)) 160 + return -EINVAL; 161 + 162 + retry_userptr: 163 + /* 164 + * TODO: Avoid exclusive lock if VM doesn't have userptrs, or 165 + * start out read-locked? 166 + */ 167 + down_write(&vm->lock); 168 + write_locked = true; 169 + vma = lookup_vma(vm, pf->page_addr); 170 + if (!vma) { 171 + ret = -EINVAL; 172 + goto unlock_vm; 173 + } 174 + 175 + if (!xe_vma_is_userptr(vma) || !xe_vma_userptr_check_repin(vma)) { 176 + downgrade_write(&vm->lock); 177 + write_locked = false; 178 + } 179 + 180 + trace_xe_vma_pagefault(vma); 181 + 182 + atomic = access_is_atomic(pf->access_type); 183 + 184 + /* Check if VMA is valid */ 185 + if (vma_is_valid(gt, vma) && !atomic) 186 + goto unlock_vm; 187 + 188 + /* TODO: Validate fault */ 189 + 190 + if (xe_vma_is_userptr(vma) && write_locked) { 191 + spin_lock(&vm->userptr.invalidated_lock); 192 + list_del_init(&vma->userptr.invalidate_link); 193 + spin_unlock(&vm->userptr.invalidated_lock); 194 + 195 + ret = xe_vma_userptr_pin_pages(vma); 196 + if (ret) 197 + goto unlock_vm; 198 + 199 + downgrade_write(&vm->lock); 200 + write_locked = false; 201 + } 202 + 203 + /* Lock VM and BOs dma-resv */ 204 + bo = vma->bo; 205 + if (only_needs_bo_lock(bo)) { 206 + /* This path ensures the BO's LRU is updated */ 207 + ret = xe_bo_lock(bo, &ww, xe->info.tile_count, false); 208 + } else { 209 + tv_vm.num_shared = xe->info.tile_count; 210 + tv_vm.bo = xe_vm_ttm_bo(vm); 211 + list_add(&tv_vm.head, &objs); 212 + if (bo) { 213 + tv_bo.bo = &bo->ttm; 214 + tv_bo.num_shared = xe->info.tile_count; 215 + list_add(&tv_bo.head, &objs); 216 + } 217 + ret = ttm_eu_reserve_buffers(&ww, &objs, false, &dups); 218 + } 219 + if (ret) 220 + goto unlock_vm; 221 + 222 + if (atomic) { 223 + if (xe_vma_is_userptr(vma)) { 224 + ret = -EACCES; 225 + goto unlock_dma_resv; 226 + } 227 + 228 + /* Migrate to VRAM, move should invalidate the VMA first */ 229 + ret = xe_bo_migrate(bo, XE_PL_VRAM0 + gt->info.vram_id); 230 + if (ret) 231 + goto unlock_dma_resv; 232 + } else if (bo) { 233 + /* Create backing store if needed */ 234 + ret = xe_bo_validate(bo, vm, true); 235 + if (ret) 236 + goto unlock_dma_resv; 237 + } 238 + 239 + /* Bind VMA only to the GT that has faulted */ 240 + trace_xe_vma_pf_bind(vma); 241 + fence = __xe_pt_bind_vma(gt, vma, xe_gt_migrate_engine(gt), NULL, 0, 242 + vma->gt_present & BIT(gt->info.id)); 243 + if (IS_ERR(fence)) { 244 + ret = PTR_ERR(fence); 245 + goto unlock_dma_resv; 246 + } 247 + 248 + /* 249 + * XXX: Should we drop the lock before waiting? This only helps if doing 250 + * GPU binds which is currently only done if we have to wait for more 251 + * than 10ms on a move. 252 + */ 253 + dma_fence_wait(fence, false); 254 + dma_fence_put(fence); 255 + 256 + if (xe_vma_is_userptr(vma)) 257 + ret = xe_vma_userptr_check_repin(vma); 258 + vma->usm.gt_invalidated &= ~BIT(gt->info.id); 259 + 260 + unlock_dma_resv: 261 + if (only_needs_bo_lock(bo)) 262 + xe_bo_unlock(bo, &ww); 263 + else 264 + ttm_eu_backoff_reservation(&ww, &objs); 265 + unlock_vm: 266 + if (!ret) 267 + vm->usm.last_fault_vma = vma; 268 + if (write_locked) 269 + up_write(&vm->lock); 270 + else 271 + up_read(&vm->lock); 272 + if (ret == -EAGAIN) 273 + goto retry_userptr; 274 + 275 + if (!ret) { 276 + /* 277 + * FIXME: Doing a full TLB invalidation for now, likely could 278 + * defer TLB invalidate + fault response to a callback of fence 279 + * too 280 + */ 281 + ret = send_tlb_invalidation(&gt->uc.guc); 282 + if (ret >= 0) 283 + ret = 0; 284 + } 285 + xe_vm_put(vm); 286 + 287 + return ret; 288 + } 289 + 290 + static int send_pagefault_reply(struct xe_guc *guc, 291 + struct xe_guc_pagefault_reply *reply) 292 + { 293 + u32 action[] = { 294 + XE_GUC_ACTION_PAGE_FAULT_RES_DESC, 295 + reply->dw0, 296 + reply->dw1, 297 + }; 298 + 299 + return xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); 300 + } 301 + 302 + static void print_pagefault(struct xe_device *xe, struct pagefault *pf) 303 + { 304 + drm_warn(&xe->drm, "\n\tASID: %d\n" 305 + "\tVFID: %d\n" 306 + "\tPDATA: 0x%04x\n" 307 + "\tFaulted Address: 0x%08x%08x\n" 308 + "\tFaultType: %d\n" 309 + "\tAccessType: %d\n" 310 + "\tFaultLevel: %d\n" 311 + "\tEngineClass: %d\n" 312 + "\tEngineInstance: %d\n", 313 + pf->asid, pf->vfid, pf->pdata, upper_32_bits(pf->page_addr), 314 + lower_32_bits(pf->page_addr), 315 + pf->fault_type, pf->access_type, pf->fault_level, 316 + pf->engine_class, pf->engine_instance); 317 + } 318 + 319 + #define PF_MSG_LEN_DW 4 320 + 321 + static int get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf) 322 + { 323 + const struct xe_guc_pagefault_desc *desc; 324 + int ret = 0; 325 + 326 + spin_lock_irq(&pf_queue->lock); 327 + if (pf_queue->head != pf_queue->tail) { 328 + desc = (const struct xe_guc_pagefault_desc *) 329 + (pf_queue->data + pf_queue->head); 330 + 331 + pf->fault_level = FIELD_GET(PFD_FAULT_LEVEL, desc->dw0); 332 + pf->engine_class = FIELD_GET(PFD_ENG_CLASS, desc->dw0); 333 + pf->engine_instance = FIELD_GET(PFD_ENG_INSTANCE, desc->dw0); 334 + pf->pdata = FIELD_GET(PFD_PDATA_HI, desc->dw1) << 335 + PFD_PDATA_HI_SHIFT; 336 + pf->pdata |= FIELD_GET(PFD_PDATA_LO, desc->dw0); 337 + pf->asid = FIELD_GET(PFD_ASID, desc->dw1); 338 + pf->vfid = FIELD_GET(PFD_VFID, desc->dw2); 339 + pf->access_type = FIELD_GET(PFD_ACCESS_TYPE, desc->dw2); 340 + pf->fault_type = FIELD_GET(PFD_FAULT_TYPE, desc->dw2); 341 + pf->page_addr = (u64)(FIELD_GET(PFD_VIRTUAL_ADDR_HI, desc->dw3)) << 342 + PFD_VIRTUAL_ADDR_HI_SHIFT; 343 + pf->page_addr |= FIELD_GET(PFD_VIRTUAL_ADDR_LO, desc->dw2) << 344 + PFD_VIRTUAL_ADDR_LO_SHIFT; 345 + 346 + pf_queue->head = (pf_queue->head + PF_MSG_LEN_DW) % 347 + PF_QUEUE_NUM_DW; 348 + } else { 349 + ret = -1; 350 + } 351 + spin_unlock_irq(&pf_queue->lock); 352 + 353 + return ret; 354 + } 355 + 356 + static bool pf_queue_full(struct pf_queue *pf_queue) 357 + { 358 + lockdep_assert_held(&pf_queue->lock); 359 + 360 + return CIRC_SPACE(pf_queue->tail, pf_queue->head, PF_QUEUE_NUM_DW) <= 361 + PF_MSG_LEN_DW; 362 + } 363 + 364 + int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) 365 + { 366 + struct xe_gt *gt = guc_to_gt(guc); 367 + struct pf_queue *pf_queue; 368 + unsigned long flags; 369 + u32 asid; 370 + bool full; 371 + 372 + if (unlikely(len != PF_MSG_LEN_DW)) 373 + return -EPROTO; 374 + 375 + asid = FIELD_GET(PFD_ASID, msg[1]); 376 + pf_queue = &gt->usm.pf_queue[asid % NUM_PF_QUEUE]; 377 + 378 + spin_lock_irqsave(&pf_queue->lock, flags); 379 + full = pf_queue_full(pf_queue); 380 + if (!full) { 381 + memcpy(pf_queue->data + pf_queue->tail, msg, len * sizeof(u32)); 382 + pf_queue->tail = (pf_queue->tail + len) % PF_QUEUE_NUM_DW; 383 + queue_work(gt->usm.pf_wq, &pf_queue->worker); 384 + } else { 385 + XE_WARN_ON("PF Queue full, shouldn't be possible"); 386 + } 387 + spin_unlock_irqrestore(&pf_queue->lock, flags); 388 + 389 + return full ? -ENOSPC : 0; 390 + } 391 + 392 + static void pf_queue_work_func(struct work_struct *w) 393 + { 394 + struct pf_queue *pf_queue = container_of(w, struct pf_queue, worker); 395 + struct xe_gt *gt = pf_queue->gt; 396 + struct xe_device *xe = gt_to_xe(gt); 397 + struct xe_guc_pagefault_reply reply = {}; 398 + struct pagefault pf = {}; 399 + int ret; 400 + 401 + ret = get_pagefault(pf_queue, &pf); 402 + if (ret) 403 + return; 404 + 405 + ret = handle_pagefault(gt, &pf); 406 + if (unlikely(ret)) { 407 + print_pagefault(xe, &pf); 408 + pf.fault_unsuccessful = 1; 409 + drm_warn(&xe->drm, "Fault response: Unsuccessful %d\n", ret); 410 + } 411 + 412 + reply.dw0 = FIELD_PREP(PFR_VALID, 1) | 413 + FIELD_PREP(PFR_SUCCESS, pf.fault_unsuccessful) | 414 + FIELD_PREP(PFR_REPLY, PFR_ACCESS) | 415 + FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) | 416 + FIELD_PREP(PFR_ASID, pf.asid); 417 + 418 + reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) | 419 + FIELD_PREP(PFR_ENG_INSTANCE, pf.engine_instance) | 420 + FIELD_PREP(PFR_ENG_CLASS, pf.engine_class) | 421 + FIELD_PREP(PFR_PDATA, pf.pdata); 422 + 423 + send_pagefault_reply(&gt->uc.guc, &reply); 424 + } 425 + 426 + static void acc_queue_work_func(struct work_struct *w); 427 + 428 + int xe_gt_pagefault_init(struct xe_gt *gt) 429 + { 430 + struct xe_device *xe = gt_to_xe(gt); 431 + int i; 432 + 433 + if (!xe->info.supports_usm) 434 + return 0; 435 + 436 + gt->usm.tlb_invalidation_seqno = 1; 437 + for (i = 0; i < NUM_PF_QUEUE; ++i) { 438 + gt->usm.pf_queue[i].gt = gt; 439 + spin_lock_init(&gt->usm.pf_queue[i].lock); 440 + INIT_WORK(&gt->usm.pf_queue[i].worker, pf_queue_work_func); 441 + } 442 + for (i = 0; i < NUM_ACC_QUEUE; ++i) { 443 + gt->usm.acc_queue[i].gt = gt; 444 + spin_lock_init(&gt->usm.acc_queue[i].lock); 445 + INIT_WORK(&gt->usm.acc_queue[i].worker, acc_queue_work_func); 446 + } 447 + 448 + gt->usm.pf_wq = alloc_workqueue("xe_gt_page_fault_work_queue", 449 + WQ_UNBOUND | WQ_HIGHPRI, NUM_PF_QUEUE); 450 + if (!gt->usm.pf_wq) 451 + return -ENOMEM; 452 + 453 + gt->usm.acc_wq = alloc_workqueue("xe_gt_access_counter_work_queue", 454 + WQ_UNBOUND | WQ_HIGHPRI, 455 + NUM_ACC_QUEUE); 456 + if (!gt->usm.acc_wq) 457 + return -ENOMEM; 458 + 459 + return 0; 460 + } 461 + 462 + void xe_gt_pagefault_reset(struct xe_gt *gt) 463 + { 464 + struct xe_device *xe = gt_to_xe(gt); 465 + int i; 466 + 467 + if (!xe->info.supports_usm) 468 + return; 469 + 470 + for (i = 0; i < NUM_PF_QUEUE; ++i) { 471 + spin_lock_irq(&gt->usm.pf_queue[i].lock); 472 + gt->usm.pf_queue[i].head = 0; 473 + gt->usm.pf_queue[i].tail = 0; 474 + spin_unlock_irq(&gt->usm.pf_queue[i].lock); 475 + } 476 + 477 + for (i = 0; i < NUM_ACC_QUEUE; ++i) { 478 + spin_lock(&gt->usm.acc_queue[i].lock); 479 + gt->usm.acc_queue[i].head = 0; 480 + gt->usm.acc_queue[i].tail = 0; 481 + spin_unlock(&gt->usm.acc_queue[i].lock); 482 + } 483 + } 484 + 485 + int xe_gt_tlb_invalidation(struct xe_gt *gt) 486 + { 487 + return send_tlb_invalidation(&gt->uc.guc); 488 + } 489 + 490 + static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno) 491 + { 492 + if (gt->usm.tlb_invalidation_seqno_recv >= seqno) 493 + return true; 494 + 495 + if (seqno - gt->usm.tlb_invalidation_seqno_recv > 496 + (TLB_INVALIDATION_SEQNO_MAX / 2)) 497 + return true; 498 + 499 + return false; 500 + } 501 + 502 + int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno) 503 + { 504 + struct xe_device *xe = gt_to_xe(gt); 505 + struct xe_guc *guc = &gt->uc.guc; 506 + int ret; 507 + 508 + /* 509 + * XXX: See above, this algorithm only works if seqno are always in 510 + * order 511 + */ 512 + ret = wait_event_timeout(guc->ct.wq, 513 + tlb_invalidation_seqno_past(gt, seqno), 514 + HZ / 5); 515 + if (!ret) { 516 + drm_err(&xe->drm, "TLB invalidation time'd out, seqno=%d, recv=%d\n", 517 + seqno, gt->usm.tlb_invalidation_seqno_recv); 518 + return -ETIME; 519 + } 520 + 521 + return 0; 522 + } 523 + 524 + int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 525 + { 526 + struct xe_gt *gt = guc_to_gt(guc); 527 + int expected_seqno; 528 + 529 + if (unlikely(len != 1)) 530 + return -EPROTO; 531 + 532 + /* Sanity check on seqno */ 533 + expected_seqno = (gt->usm.tlb_invalidation_seqno_recv + 1) % 534 + TLB_INVALIDATION_SEQNO_MAX; 535 + XE_WARN_ON(expected_seqno != msg[0]); 536 + 537 + gt->usm.tlb_invalidation_seqno_recv = msg[0]; 538 + smp_wmb(); 539 + wake_up_all(&guc->ct.wq); 540 + 541 + return 0; 542 + } 543 + 544 + static int granularity_in_byte(int val) 545 + { 546 + switch (val) { 547 + case 0: 548 + return SZ_128K; 549 + case 1: 550 + return SZ_2M; 551 + case 2: 552 + return SZ_16M; 553 + case 3: 554 + return SZ_64M; 555 + default: 556 + return 0; 557 + } 558 + } 559 + 560 + static int sub_granularity_in_byte(int val) 561 + { 562 + return (granularity_in_byte(val) / 32); 563 + } 564 + 565 + static void print_acc(struct xe_device *xe, struct acc *acc) 566 + { 567 + drm_warn(&xe->drm, "Access counter request:\n" 568 + "\tType: %s\n" 569 + "\tASID: %d\n" 570 + "\tVFID: %d\n" 571 + "\tEngine: %d:%d\n" 572 + "\tGranularity: 0x%x KB Region/ %d KB sub-granularity\n" 573 + "\tSub_Granularity Vector: 0x%08x\n" 574 + "\tVA Range base: 0x%016llx\n", 575 + acc->access_type ? "AC_NTFY_VAL" : "AC_TRIG_VAL", 576 + acc->asid, acc->vfid, acc->engine_class, acc->engine_instance, 577 + granularity_in_byte(acc->granularity) / SZ_1K, 578 + sub_granularity_in_byte(acc->granularity) / SZ_1K, 579 + acc->sub_granularity, acc->va_range_base); 580 + } 581 + 582 + static struct xe_vma *get_acc_vma(struct xe_vm *vm, struct acc *acc) 583 + { 584 + u64 page_va = acc->va_range_base + (ffs(acc->sub_granularity) - 1) * 585 + sub_granularity_in_byte(acc->granularity); 586 + struct xe_vma lookup; 587 + 588 + lookup.start = page_va; 589 + lookup.end = lookup.start + SZ_4K - 1; 590 + 591 + return xe_vm_find_overlapping_vma(vm, &lookup); 592 + } 593 + 594 + static int handle_acc(struct xe_gt *gt, struct acc *acc) 595 + { 596 + struct xe_device *xe = gt_to_xe(gt); 597 + struct xe_vm *vm; 598 + struct xe_vma *vma; 599 + struct xe_bo *bo; 600 + LIST_HEAD(objs); 601 + LIST_HEAD(dups); 602 + struct ttm_validate_buffer tv_bo, tv_vm; 603 + struct ww_acquire_ctx ww; 604 + int ret = 0; 605 + 606 + /* We only support ACC_TRIGGER at the moment */ 607 + if (acc->access_type != ACC_TRIGGER) 608 + return -EINVAL; 609 + 610 + /* ASID to VM */ 611 + mutex_lock(&xe->usm.lock); 612 + vm = xa_load(&xe->usm.asid_to_vm, acc->asid); 613 + if (vm) 614 + xe_vm_get(vm); 615 + mutex_unlock(&xe->usm.lock); 616 + if (!vm || !xe_vm_in_fault_mode(vm)) 617 + return -EINVAL; 618 + 619 + down_read(&vm->lock); 620 + 621 + /* Lookup VMA */ 622 + vma = get_acc_vma(vm, acc); 623 + if (!vma) { 624 + ret = -EINVAL; 625 + goto unlock_vm; 626 + } 627 + 628 + trace_xe_vma_acc(vma); 629 + 630 + /* Userptr can't be migrated, nothing to do */ 631 + if (xe_vma_is_userptr(vma)) 632 + goto unlock_vm; 633 + 634 + /* Lock VM and BOs dma-resv */ 635 + bo = vma->bo; 636 + if (only_needs_bo_lock(bo)) { 637 + /* This path ensures the BO's LRU is updated */ 638 + ret = xe_bo_lock(bo, &ww, xe->info.tile_count, false); 639 + } else { 640 + tv_vm.num_shared = xe->info.tile_count; 641 + tv_vm.bo = xe_vm_ttm_bo(vm); 642 + list_add(&tv_vm.head, &objs); 643 + tv_bo.bo = &bo->ttm; 644 + tv_bo.num_shared = xe->info.tile_count; 645 + list_add(&tv_bo.head, &objs); 646 + ret = ttm_eu_reserve_buffers(&ww, &objs, false, &dups); 647 + } 648 + if (ret) 649 + goto unlock_vm; 650 + 651 + /* Migrate to VRAM, move should invalidate the VMA first */ 652 + ret = xe_bo_migrate(bo, XE_PL_VRAM0 + gt->info.vram_id); 653 + 654 + if (only_needs_bo_lock(bo)) 655 + xe_bo_unlock(bo, &ww); 656 + else 657 + ttm_eu_backoff_reservation(&ww, &objs); 658 + unlock_vm: 659 + up_read(&vm->lock); 660 + xe_vm_put(vm); 661 + 662 + return ret; 663 + } 664 + 665 + #define make_u64(hi__, low__) ((u64)(hi__) << 32 | (u64)(low__)) 666 + 667 + static int get_acc(struct acc_queue *acc_queue, struct acc *acc) 668 + { 669 + const struct xe_guc_acc_desc *desc; 670 + int ret = 0; 671 + 672 + spin_lock(&acc_queue->lock); 673 + if (acc_queue->head != acc_queue->tail) { 674 + desc = (const struct xe_guc_acc_desc *) 675 + (acc_queue->data + acc_queue->head); 676 + 677 + acc->granularity = FIELD_GET(ACC_GRANULARITY, desc->dw2); 678 + acc->sub_granularity = FIELD_GET(ACC_SUBG_HI, desc->dw1) << 31 | 679 + FIELD_GET(ACC_SUBG_LO, desc->dw0); 680 + acc->engine_class = FIELD_GET(ACC_ENG_CLASS, desc->dw1); 681 + acc->engine_instance = FIELD_GET(ACC_ENG_INSTANCE, desc->dw1); 682 + acc->asid = FIELD_GET(ACC_ASID, desc->dw1); 683 + acc->vfid = FIELD_GET(ACC_VFID, desc->dw2); 684 + acc->access_type = FIELD_GET(ACC_TYPE, desc->dw0); 685 + acc->va_range_base = make_u64(desc->dw3 & ACC_VIRTUAL_ADDR_RANGE_HI, 686 + desc->dw2 & ACC_VIRTUAL_ADDR_RANGE_LO); 687 + } else { 688 + ret = -1; 689 + } 690 + spin_unlock(&acc_queue->lock); 691 + 692 + return ret; 693 + } 694 + 695 + static void acc_queue_work_func(struct work_struct *w) 696 + { 697 + struct acc_queue *acc_queue = container_of(w, struct acc_queue, worker); 698 + struct xe_gt *gt = acc_queue->gt; 699 + struct xe_device *xe = gt_to_xe(gt); 700 + struct acc acc = {}; 701 + int ret; 702 + 703 + ret = get_acc(acc_queue, &acc); 704 + if (ret) 705 + return; 706 + 707 + ret = handle_acc(gt, &acc); 708 + if (unlikely(ret)) { 709 + print_acc(xe, &acc); 710 + drm_warn(&xe->drm, "ACC: Unsuccessful %d\n", ret); 711 + } 712 + } 713 + 714 + #define ACC_MSG_LEN_DW 4 715 + 716 + static bool acc_queue_full(struct acc_queue *acc_queue) 717 + { 718 + lockdep_assert_held(&acc_queue->lock); 719 + 720 + return CIRC_SPACE(acc_queue->tail, acc_queue->head, ACC_QUEUE_NUM_DW) <= 721 + ACC_MSG_LEN_DW; 722 + } 723 + 724 + int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len) 725 + { 726 + struct xe_gt *gt = guc_to_gt(guc); 727 + struct acc_queue *acc_queue; 728 + u32 asid; 729 + bool full; 730 + 731 + if (unlikely(len != ACC_MSG_LEN_DW)) 732 + return -EPROTO; 733 + 734 + asid = FIELD_GET(ACC_ASID, msg[1]); 735 + acc_queue = &gt->usm.acc_queue[asid % NUM_ACC_QUEUE]; 736 + 737 + spin_lock(&acc_queue->lock); 738 + full = acc_queue_full(acc_queue); 739 + if (!full) { 740 + memcpy(acc_queue->data + acc_queue->tail, msg, 741 + len * sizeof(u32)); 742 + acc_queue->tail = (acc_queue->tail + len) % ACC_QUEUE_NUM_DW; 743 + queue_work(gt->usm.acc_wq, &acc_queue->worker); 744 + } else { 745 + drm_warn(&gt_to_xe(gt)->drm, "ACC Queue full, dropping ACC"); 746 + } 747 + spin_unlock(&acc_queue->lock); 748 + 749 + return full ? -ENOSPC : 0; 750 + }

+22

drivers/gpu/drm/xe/xe_gt_pagefault.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_GT_PAGEFAULT_H_ 7 + #define _XE_GT_PAGEFAULT_H_ 8 + 9 + #include <linux/types.h> 10 + 11 + struct xe_gt; 12 + struct xe_guc; 13 + 14 + int xe_gt_pagefault_init(struct xe_gt *gt); 15 + void xe_gt_pagefault_reset(struct xe_gt *gt); 16 + int xe_gt_tlb_invalidation(struct xe_gt *gt); 17 + int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno); 18 + int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len); 19 + int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len); 20 + int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len); 21 + 22 + #endif /* _XE_GT_PAGEFAULT_ */

+55

drivers/gpu/drm/xe/xe_gt_sysfs.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include <linux/kobject.h> 7 + #include <linux/sysfs.h> 8 + #include <drm/drm_managed.h> 9 + #include "xe_gt.h" 10 + #include "xe_gt_sysfs.h" 11 + 12 + static void xe_gt_sysfs_kobj_release(struct kobject *kobj) 13 + { 14 + kfree(kobj); 15 + } 16 + 17 + static struct kobj_type xe_gt_sysfs_kobj_type = { 18 + .release = xe_gt_sysfs_kobj_release, 19 + .sysfs_ops = &kobj_sysfs_ops, 20 + }; 21 + 22 + static void gt_sysfs_fini(struct drm_device *drm, void *arg) 23 + { 24 + struct xe_gt *gt = arg; 25 + 26 + kobject_put(gt->sysfs); 27 + } 28 + 29 + int xe_gt_sysfs_init(struct xe_gt *gt) 30 + { 31 + struct device *dev = gt_to_xe(gt)->drm.dev; 32 + struct kobj_gt *kg; 33 + int err; 34 + 35 + kg = kzalloc(sizeof(*kg), GFP_KERNEL); 36 + if (!kg) 37 + return -ENOMEM; 38 + 39 + kobject_init(&kg->base, &xe_gt_sysfs_kobj_type); 40 + kg->gt = gt; 41 + 42 + err = kobject_add(&kg->base, &dev->kobj, "gt%d", gt->info.id); 43 + if (err) { 44 + kobject_put(&kg->base); 45 + return err; 46 + } 47 + 48 + gt->sysfs = &kg->base; 49 + 50 + err = drmm_add_action_or_reset(&gt_to_xe(gt)->drm, gt_sysfs_fini, gt); 51 + if (err) 52 + return err; 53 + 54 + return 0; 55 + }

+19

drivers/gpu/drm/xe/xe_gt_sysfs.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_GT_SYSFS_H_ 7 + #define _XE_GT_SYSFS_H_ 8 + 9 + #include "xe_gt_sysfs_types.h" 10 + 11 + int xe_gt_sysfs_init(struct xe_gt *gt); 12 + 13 + static inline struct xe_gt * 14 + kobj_to_gt(struct kobject *kobj) 15 + { 16 + return container_of(kobj, struct kobj_gt, base)->gt; 17 + } 18 + 19 + #endif /* _XE_GT_SYSFS_H_ */

+26

drivers/gpu/drm/xe/xe_gt_sysfs_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_GT_SYSFS_TYPES_H_ 7 + #define _XE_GT_SYSFS_TYPES_H_ 8 + 9 + #include <linux/kobject.h> 10 + 11 + struct xe_gt; 12 + 13 + /** 14 + * struct kobj_gt - A GT's kobject struct that connects the kobject and the GT 15 + * 16 + * When dealing with multiple GTs, this struct helps to understand which GT 17 + * needs to be addressed on a given sysfs call. 18 + */ 19 + struct kobj_gt { 20 + /** @base: The actual kobject */ 21 + struct kobject base; 22 + /** @gt: A pointer to the GT itself */ 23 + struct xe_gt *gt; 24 + }; 25 + 26 + #endif /* _XE_GT_SYSFS_TYPES_H_ */

+144

drivers/gpu/drm/xe/xe_gt_topology.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include <linux/bitmap.h> 7 + 8 + #include "xe_gt.h" 9 + #include "xe_gt_topology.h" 10 + #include "xe_mmio.h" 11 + 12 + #define XE_MAX_DSS_FUSE_BITS (32 * XE_MAX_DSS_FUSE_REGS) 13 + #define XE_MAX_EU_FUSE_BITS (32 * XE_MAX_EU_FUSE_REGS) 14 + 15 + #define XELP_EU_ENABLE 0x9134 /* "_DISABLE" on Xe_LP */ 16 + #define XELP_EU_MASK REG_GENMASK(7, 0) 17 + #define XELP_GT_GEOMETRY_DSS_ENABLE 0x913c 18 + #define XEHP_GT_COMPUTE_DSS_ENABLE 0x9144 19 + #define XEHPC_GT_COMPUTE_DSS_ENABLE_EXT 0x9148 20 + 21 + static void 22 + load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...) 23 + { 24 + va_list argp; 25 + u32 fuse_val[XE_MAX_DSS_FUSE_REGS] = {}; 26 + int i; 27 + 28 + if (drm_WARN_ON(&gt_to_xe(gt)->drm, numregs > XE_MAX_DSS_FUSE_REGS)) 29 + numregs = XE_MAX_DSS_FUSE_REGS; 30 + 31 + va_start(argp, numregs); 32 + for (i = 0; i < numregs; i++) 33 + fuse_val[i] = xe_mmio_read32(gt, va_arg(argp, u32)); 34 + va_end(argp); 35 + 36 + bitmap_from_arr32(mask, fuse_val, numregs * 32); 37 + } 38 + 39 + static void 40 + load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask) 41 + { 42 + struct xe_device *xe = gt_to_xe(gt); 43 + u32 reg = xe_mmio_read32(gt, XELP_EU_ENABLE); 44 + u32 val = 0; 45 + int i; 46 + 47 + BUILD_BUG_ON(XE_MAX_EU_FUSE_REGS > 1); 48 + 49 + /* 50 + * Pre-Xe_HP platforms inverted the bit meaning (disable instead 51 + * of enable). 52 + */ 53 + if (GRAPHICS_VERx100(xe) < 1250) 54 + reg = ~reg & XELP_EU_MASK; 55 + 56 + /* On PVC, one bit = one EU */ 57 + if (GRAPHICS_VERx100(xe) == 1260) { 58 + val = reg; 59 + } else { 60 + /* All other platforms, one bit = 2 EU */ 61 + for (i = 0; i < fls(reg); i++) 62 + if (reg & BIT(i)) 63 + val |= 0x3 << 2 * i; 64 + } 65 + 66 + bitmap_from_arr32(mask, &val, XE_MAX_EU_FUSE_BITS); 67 + } 68 + 69 + void 70 + xe_gt_topology_init(struct xe_gt *gt) 71 + { 72 + struct xe_device *xe = gt_to_xe(gt); 73 + struct drm_printer p = drm_debug_printer("GT topology"); 74 + int num_geometry_regs, num_compute_regs; 75 + 76 + if (GRAPHICS_VERx100(xe) == 1260) { 77 + num_geometry_regs = 0; 78 + num_compute_regs = 2; 79 + } else if (GRAPHICS_VERx100(xe) >= 1250) { 80 + num_geometry_regs = 1; 81 + num_compute_regs = 1; 82 + } else { 83 + num_geometry_regs = 1; 84 + num_compute_regs = 0; 85 + } 86 + 87 + load_dss_mask(gt, gt->fuse_topo.g_dss_mask, num_geometry_regs, 88 + XELP_GT_GEOMETRY_DSS_ENABLE); 89 + load_dss_mask(gt, gt->fuse_topo.c_dss_mask, num_compute_regs, 90 + XEHP_GT_COMPUTE_DSS_ENABLE, 91 + XEHPC_GT_COMPUTE_DSS_ENABLE_EXT); 92 + load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss); 93 + 94 + xe_gt_topology_dump(gt, &p); 95 + } 96 + 97 + unsigned int 98 + xe_gt_topology_count_dss(xe_dss_mask_t mask) 99 + { 100 + return bitmap_weight(mask, XE_MAX_DSS_FUSE_BITS); 101 + } 102 + 103 + u64 104 + xe_gt_topology_dss_group_mask(xe_dss_mask_t mask, int grpsize) 105 + { 106 + xe_dss_mask_t per_dss_mask = {}; 107 + u64 grpmask = 0; 108 + 109 + WARN_ON(DIV_ROUND_UP(XE_MAX_DSS_FUSE_BITS, grpsize) > BITS_PER_TYPE(grpmask)); 110 + 111 + bitmap_fill(per_dss_mask, grpsize); 112 + for (int i = 0; !bitmap_empty(mask, XE_MAX_DSS_FUSE_BITS); i++) { 113 + if (bitmap_intersects(mask, per_dss_mask, grpsize)) 114 + grpmask |= BIT(i); 115 + 116 + bitmap_shift_right(mask, mask, grpsize, XE_MAX_DSS_FUSE_BITS); 117 + } 118 + 119 + return grpmask; 120 + } 121 + 122 + void 123 + xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p) 124 + { 125 + drm_printf(p, "dss mask (geometry): %*pb\n", XE_MAX_DSS_FUSE_BITS, 126 + gt->fuse_topo.g_dss_mask); 127 + drm_printf(p, "dss mask (compute): %*pb\n", XE_MAX_DSS_FUSE_BITS, 128 + gt->fuse_topo.c_dss_mask); 129 + 130 + drm_printf(p, "EU mask per DSS: %*pb\n", XE_MAX_EU_FUSE_BITS, 131 + gt->fuse_topo.eu_mask_per_dss); 132 + 133 + } 134 + 135 + /* 136 + * Used to obtain the index of the first DSS. Can start searching from the 137 + * beginning of a specific dss group (e.g., gslice, cslice, etc.) if 138 + * groupsize and groupnum are non-zero. 139 + */ 140 + unsigned int 141 + xe_dss_mask_group_ffs(xe_dss_mask_t mask, int groupsize, int groupnum) 142 + { 143 + return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize); 144 + }

+20

drivers/gpu/drm/xe/xe_gt_topology.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef __XE_GT_TOPOLOGY_H__ 7 + #define __XE_GT_TOPOLOGY_H__ 8 + 9 + #include "xe_gt_types.h" 10 + 11 + struct drm_printer; 12 + 13 + void xe_gt_topology_init(struct xe_gt *gt); 14 + 15 + void xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p); 16 + 17 + unsigned int 18 + xe_dss_mask_group_ffs(xe_dss_mask_t mask, int groupsize, int groupnum); 19 + 20 + #endif /* __XE_GT_TOPOLOGY_H__ */

+320

drivers/gpu/drm/xe/xe_gt_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_GT_TYPES_H_ 7 + #define _XE_GT_TYPES_H_ 8 + 9 + #include "xe_force_wake_types.h" 10 + #include "xe_hw_engine_types.h" 11 + #include "xe_hw_fence_types.h" 12 + #include "xe_reg_sr_types.h" 13 + #include "xe_sa_types.h" 14 + #include "xe_uc_types.h" 15 + 16 + struct xe_engine_ops; 17 + struct xe_ggtt; 18 + struct xe_migrate; 19 + struct xe_ring_ops; 20 + struct xe_ttm_gtt_mgr; 21 + struct xe_ttm_vram_mgr; 22 + 23 + enum xe_gt_type { 24 + XE_GT_TYPE_UNINITIALIZED, 25 + XE_GT_TYPE_MAIN, 26 + XE_GT_TYPE_REMOTE, 27 + XE_GT_TYPE_MEDIA, 28 + }; 29 + 30 + #define XE_MAX_DSS_FUSE_REGS 2 31 + #define XE_MAX_EU_FUSE_REGS 1 32 + 33 + typedef unsigned long xe_dss_mask_t[BITS_TO_LONGS(32 * XE_MAX_DSS_FUSE_REGS)]; 34 + typedef unsigned long xe_eu_mask_t[BITS_TO_LONGS(32 * XE_MAX_DSS_FUSE_REGS)]; 35 + 36 + struct xe_mmio_range { 37 + u32 start; 38 + u32 end; 39 + }; 40 + 41 + /* 42 + * The hardware has multiple kinds of multicast register ranges that need 43 + * special register steering (and future platforms are expected to add 44 + * additional types). 45 + * 46 + * During driver startup, we initialize the steering control register to 47 + * direct reads to a slice/subslice that are valid for the 'subslice' class 48 + * of multicast registers. If another type of steering does not have any 49 + * overlap in valid steering targets with 'subslice' style registers, we will 50 + * need to explicitly re-steer reads of registers of the other type. 51 + * 52 + * Only the replication types that may need additional non-default steering 53 + * are listed here. 54 + */ 55 + enum xe_steering_type { 56 + L3BANK, 57 + MSLICE, 58 + LNCF, 59 + DSS, 60 + OADDRM, 61 + 62 + /* 63 + * On some platforms there are multiple types of MCR registers that 64 + * will always return a non-terminated value at instance (0, 0). We'll 65 + * lump those all into a single category to keep things simple. 66 + */ 67 + INSTANCE0, 68 + 69 + NUM_STEERING_TYPES 70 + }; 71 + 72 + /** 73 + * struct xe_gt - Top level struct of a graphics tile 74 + * 75 + * A graphics tile may be a physical split (duplicate pieces of silicon, 76 + * different GGTT + VRAM) or a virtual split (shared GGTT + VRAM). Either way 77 + * this structure encapsulates of everything a GT is (MMIO, VRAM, memory 78 + * management, microcontrols, and a hardware set of engines). 79 + */ 80 + struct xe_gt { 81 + /** @xe: backpointer to XE device */ 82 + struct xe_device *xe; 83 + 84 + /** @info: GT info */ 85 + struct { 86 + /** @type: type of GT */ 87 + enum xe_gt_type type; 88 + /** @id: id of GT */ 89 + u8 id; 90 + /** @vram: id of the VRAM for this GT */ 91 + u8 vram_id; 92 + /** @clock_freq: clock frequency */ 93 + u32 clock_freq; 94 + /** @engine_mask: mask of engines present on GT */ 95 + u64 engine_mask; 96 + } info; 97 + 98 + /** 99 + * @mmio: mmio info for GT, can be subset of the global device mmio 100 + * space 101 + */ 102 + struct { 103 + /** @size: size of MMIO space on GT */ 104 + size_t size; 105 + /** @regs: pointer to MMIO space on GT */ 106 + void *regs; 107 + /** @fw: force wake for GT */ 108 + struct xe_force_wake fw; 109 + /** 110 + * @adj_limit: adjust MMIO address if address is below this 111 + * value 112 + */ 113 + u32 adj_limit; 114 + /** @adj_offset: offect to add to MMIO address when adjusting */ 115 + u32 adj_offset; 116 + } mmio; 117 + 118 + /** 119 + * @reg_sr: table with registers to be restored on GT init/resume/reset 120 + */ 121 + struct xe_reg_sr reg_sr; 122 + 123 + /** 124 + * @mem: memory management info for GT, multiple GTs can point to same 125 + * objects (virtual split) 126 + */ 127 + struct { 128 + /** 129 + * @vram: VRAM info for GT, multiple GTs can point to same info 130 + * (virtual split), can be subset of global device VRAM 131 + */ 132 + struct { 133 + /** @io_start: start address of VRAM */ 134 + resource_size_t io_start; 135 + /** @size: size of VRAM */ 136 + resource_size_t size; 137 + /** @mapping: pointer to VRAM mappable space */ 138 + void *__iomem mapping; 139 + } vram; 140 + /** @vram_mgr: VRAM TTM manager */ 141 + struct xe_ttm_vram_mgr *vram_mgr; 142 + /** @gtt_mr: GTT TTM manager */ 143 + struct xe_ttm_gtt_mgr *gtt_mgr; 144 + /** @ggtt: Global graphics translation table */ 145 + struct xe_ggtt *ggtt; 146 + } mem; 147 + 148 + /** @reset: state for GT resets */ 149 + struct { 150 + /** 151 + * @worker: work so GT resets can done async allowing to reset 152 + * code to safely flush all code paths 153 + */ 154 + struct work_struct worker; 155 + } reset; 156 + 157 + /** @usm: unified shared memory state */ 158 + struct { 159 + /** 160 + * @bb_pool: Pool from which batchbuffers, for USM operations 161 + * (e.g. migrations, fixing page tables), are allocated. 162 + * Dedicated pool needed so USM operations to not get blocked 163 + * behind any user operations which may have resulted in a 164 + * fault. 165 + */ 166 + struct xe_sa_manager bb_pool; 167 + /** 168 + * @reserved_bcs_instance: reserved BCS instance used for USM 169 + * operations (e.g. mmigrations, fixing page tables) 170 + */ 171 + u16 reserved_bcs_instance; 172 + /** 173 + * @tlb_invalidation_seqno: TLB invalidation seqno, protected by 174 + * CT lock 175 + */ 176 + #define TLB_INVALIDATION_SEQNO_MAX 0x100000 177 + int tlb_invalidation_seqno; 178 + /** 179 + * @tlb_invalidation_seqno_recv: last received TLB invalidation 180 + * seqno, protected by CT lock 181 + */ 182 + int tlb_invalidation_seqno_recv; 183 + /** @pf_wq: page fault work queue, unbound, high priority */ 184 + struct workqueue_struct *pf_wq; 185 + /** @acc_wq: access counter work queue, unbound, high priority */ 186 + struct workqueue_struct *acc_wq; 187 + /** 188 + * @pf_queue: Page fault queue used to sync faults so faults can 189 + * be processed not under the GuC CT lock. The queue is sized so 190 + * it can sync all possible faults (1 per physical engine). 191 + * Multiple queues exists for page faults from different VMs are 192 + * be processed in parallel. 193 + */ 194 + struct pf_queue { 195 + /** @gt: back pointer to GT */ 196 + struct xe_gt *gt; 197 + #define PF_QUEUE_NUM_DW 128 198 + /** @data: data in the page fault queue */ 199 + u32 data[PF_QUEUE_NUM_DW]; 200 + /** 201 + * @head: head pointer in DWs for page fault queue, 202 + * moved by worker which processes faults. 203 + */ 204 + u16 head; 205 + /** 206 + * @tail: tail pointer in DWs for page fault queue, 207 + * moved by G2H handler. 208 + */ 209 + u16 tail; 210 + /** @lock: protects page fault queue */ 211 + spinlock_t lock; 212 + /** @worker: to process page faults */ 213 + struct work_struct worker; 214 + #define NUM_PF_QUEUE 4 215 + } pf_queue[NUM_PF_QUEUE]; 216 + /** 217 + * @acc_queue: Same as page fault queue, cannot process access 218 + * counters under CT lock. 219 + */ 220 + struct acc_queue { 221 + /** @gt: back pointer to GT */ 222 + struct xe_gt *gt; 223 + #define ACC_QUEUE_NUM_DW 128 224 + /** @data: data in the page fault queue */ 225 + u32 data[ACC_QUEUE_NUM_DW]; 226 + /** 227 + * @head: head pointer in DWs for page fault queue, 228 + * moved by worker which processes faults. 229 + */ 230 + u16 head; 231 + /** 232 + * @tail: tail pointer in DWs for page fault queue, 233 + * moved by G2H handler. 234 + */ 235 + u16 tail; 236 + /** @lock: protects page fault queue */ 237 + spinlock_t lock; 238 + /** @worker: to process access counters */ 239 + struct work_struct worker; 240 + #define NUM_ACC_QUEUE 4 241 + } acc_queue[NUM_ACC_QUEUE]; 242 + } usm; 243 + 244 + /** @ordered_wq: used to serialize GT resets and TDRs */ 245 + struct workqueue_struct *ordered_wq; 246 + 247 + /** @uc: micro controllers on the GT */ 248 + struct xe_uc uc; 249 + 250 + /** @engine_ops: submission backend engine operations */ 251 + const struct xe_engine_ops *engine_ops; 252 + 253 + /** 254 + * @ring_ops: ring operations for this hw engine (1 per engine class) 255 + */ 256 + const struct xe_ring_ops *ring_ops[XE_ENGINE_CLASS_MAX]; 257 + 258 + /** @fence_irq: fence IRQs (1 per engine class) */ 259 + struct xe_hw_fence_irq fence_irq[XE_ENGINE_CLASS_MAX]; 260 + 261 + /** @default_lrc: default LRC state */ 262 + void *default_lrc[XE_ENGINE_CLASS_MAX]; 263 + 264 + /** @hw_engines: hardware engines on the GT */ 265 + struct xe_hw_engine hw_engines[XE_NUM_HW_ENGINES]; 266 + 267 + /** @kernel_bb_pool: Pool from which batchbuffers are allocated */ 268 + struct xe_sa_manager kernel_bb_pool; 269 + 270 + /** @migrate: Migration helper for vram blits and clearing */ 271 + struct xe_migrate *migrate; 272 + 273 + /** @pcode: GT's PCODE */ 274 + struct { 275 + /** @lock: protecting GT's PCODE mailbox data */ 276 + struct mutex lock; 277 + } pcode; 278 + 279 + /** @sysfs: sysfs' kobj used by xe_gt_sysfs */ 280 + struct kobject *sysfs; 281 + 282 + /** @mocs: info */ 283 + struct { 284 + /** @uc_index: UC index */ 285 + u8 uc_index; 286 + /** @wb_index: WB index, only used on L3_CCS platforms */ 287 + u8 wb_index; 288 + } mocs; 289 + 290 + /** @fuse_topo: GT topology reported by fuse registers */ 291 + struct { 292 + /** @g_dss_mask: dual-subslices usable by geometry */ 293 + xe_dss_mask_t g_dss_mask; 294 + 295 + /** @c_dss_mask: dual-subslices usable by compute */ 296 + xe_dss_mask_t c_dss_mask; 297 + 298 + /** @eu_mask_per_dss: EU mask per DSS*/ 299 + xe_eu_mask_t eu_mask_per_dss; 300 + } fuse_topo; 301 + 302 + /** @steering: register steering for individual HW units */ 303 + struct { 304 + /* @ranges: register ranges used for this steering type */ 305 + const struct xe_mmio_range *ranges; 306 + 307 + /** @group_target: target to steer accesses to */ 308 + u16 group_target; 309 + /** @instance_target: instance to steer accesses to */ 310 + u16 instance_target; 311 + } steering[NUM_STEERING_TYPES]; 312 + 313 + /** 314 + * @mcr_lock: protects the MCR_SELECTOR register for the duration 315 + * of a steered operation 316 + */ 317 + spinlock_t mcr_lock; 318 + }; 319 + 320 + #endif

+875

drivers/gpu/drm/xe/xe_guc.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include "xe_bo.h" 7 + #include "xe_device.h" 8 + #include "xe_guc.h" 9 + #include "xe_guc_ads.h" 10 + #include "xe_guc_ct.h" 11 + #include "xe_guc_hwconfig.h" 12 + #include "xe_guc_log.h" 13 + #include "xe_guc_reg.h" 14 + #include "xe_guc_pc.h" 15 + #include "xe_guc_submit.h" 16 + #include "xe_gt.h" 17 + #include "xe_platform_types.h" 18 + #include "xe_uc_fw.h" 19 + #include "xe_wopcm.h" 20 + #include "xe_mmio.h" 21 + #include "xe_force_wake.h" 22 + #include "i915_reg_defs.h" 23 + #include "gt/intel_gt_regs.h" 24 + 25 + /* TODO: move to common file */ 26 + #define GUC_PVC_MOCS_INDEX_MASK REG_GENMASK(25, 24) 27 + #define PVC_MOCS_UC_INDEX 1 28 + #define PVC_GUC_MOCS_INDEX(index) REG_FIELD_PREP(GUC_PVC_MOCS_INDEX_MASK,\ 29 + index) 30 + 31 + static struct xe_gt * 32 + guc_to_gt(struct xe_guc *guc) 33 + { 34 + return container_of(guc, struct xe_gt, uc.guc); 35 + } 36 + 37 + static struct xe_device * 38 + guc_to_xe(struct xe_guc *guc) 39 + { 40 + return gt_to_xe(guc_to_gt(guc)); 41 + } 42 + 43 + /* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */ 44 + #define GUC_GGTT_TOP 0xFEE00000 45 + static u32 guc_bo_ggtt_addr(struct xe_guc *guc, 46 + struct xe_bo *bo) 47 + { 48 + u32 addr = xe_bo_ggtt_addr(bo); 49 + 50 + XE_BUG_ON(addr < xe_wopcm_size(guc_to_xe(guc))); 51 + XE_BUG_ON(range_overflows_t(u32, addr, bo->size, GUC_GGTT_TOP)); 52 + 53 + return addr; 54 + } 55 + 56 + static u32 guc_ctl_debug_flags(struct xe_guc *guc) 57 + { 58 + u32 level = xe_guc_log_get_level(&guc->log); 59 + u32 flags = 0; 60 + 61 + if (!GUC_LOG_LEVEL_IS_VERBOSE(level)) 62 + flags |= GUC_LOG_DISABLED; 63 + else 64 + flags |= GUC_LOG_LEVEL_TO_VERBOSITY(level) << 65 + GUC_LOG_VERBOSITY_SHIFT; 66 + 67 + return flags; 68 + } 69 + 70 + static u32 guc_ctl_feature_flags(struct xe_guc *guc) 71 + { 72 + return GUC_CTL_ENABLE_SLPC; 73 + } 74 + 75 + static u32 guc_ctl_log_params_flags(struct xe_guc *guc) 76 + { 77 + u32 offset = guc_bo_ggtt_addr(guc, guc->log.bo) >> PAGE_SHIFT; 78 + u32 flags; 79 + 80 + #if (((CRASH_BUFFER_SIZE) % SZ_1M) == 0) 81 + #define LOG_UNIT SZ_1M 82 + #define LOG_FLAG GUC_LOG_LOG_ALLOC_UNITS 83 + #else 84 + #define LOG_UNIT SZ_4K 85 + #define LOG_FLAG 0 86 + #endif 87 + 88 + #if (((CAPTURE_BUFFER_SIZE) % SZ_1M) == 0) 89 + #define CAPTURE_UNIT SZ_1M 90 + #define CAPTURE_FLAG GUC_LOG_CAPTURE_ALLOC_UNITS 91 + #else 92 + #define CAPTURE_UNIT SZ_4K 93 + #define CAPTURE_FLAG 0 94 + #endif 95 + 96 + BUILD_BUG_ON(!CRASH_BUFFER_SIZE); 97 + BUILD_BUG_ON(!IS_ALIGNED(CRASH_BUFFER_SIZE, LOG_UNIT)); 98 + BUILD_BUG_ON(!DEBUG_BUFFER_SIZE); 99 + BUILD_BUG_ON(!IS_ALIGNED(DEBUG_BUFFER_SIZE, LOG_UNIT)); 100 + BUILD_BUG_ON(!CAPTURE_BUFFER_SIZE); 101 + BUILD_BUG_ON(!IS_ALIGNED(CAPTURE_BUFFER_SIZE, CAPTURE_UNIT)); 102 + 103 + BUILD_BUG_ON((CRASH_BUFFER_SIZE / LOG_UNIT - 1) > 104 + (GUC_LOG_CRASH_MASK >> GUC_LOG_CRASH_SHIFT)); 105 + BUILD_BUG_ON((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) > 106 + (GUC_LOG_DEBUG_MASK >> GUC_LOG_DEBUG_SHIFT)); 107 + BUILD_BUG_ON((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) > 108 + (GUC_LOG_CAPTURE_MASK >> GUC_LOG_CAPTURE_SHIFT)); 109 + 110 + flags = GUC_LOG_VALID | 111 + GUC_LOG_NOTIFY_ON_HALF_FULL | 112 + CAPTURE_FLAG | 113 + LOG_FLAG | 114 + ((CRASH_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_CRASH_SHIFT) | 115 + ((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_DEBUG_SHIFT) | 116 + ((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) << 117 + GUC_LOG_CAPTURE_SHIFT) | 118 + (offset << GUC_LOG_BUF_ADDR_SHIFT); 119 + 120 + #undef LOG_UNIT 121 + #undef LOG_FLAG 122 + #undef CAPTURE_UNIT 123 + #undef CAPTURE_FLAG 124 + 125 + return flags; 126 + } 127 + 128 + static u32 guc_ctl_ads_flags(struct xe_guc *guc) 129 + { 130 + u32 ads = guc_bo_ggtt_addr(guc, guc->ads.bo) >> PAGE_SHIFT; 131 + u32 flags = ads << GUC_ADS_ADDR_SHIFT; 132 + 133 + return flags; 134 + } 135 + 136 + static u32 guc_ctl_wa_flags(struct xe_guc *guc) 137 + { 138 + struct xe_device *xe = guc_to_xe(guc); 139 + struct xe_gt *gt = guc_to_gt(guc); 140 + u32 flags = 0; 141 + 142 + /* Wa_22012773006:gen11,gen12 < XeHP */ 143 + if (GRAPHICS_VER(xe) >= 11 && 144 + GRAPHICS_VERx100(xe) < 1250) 145 + flags |= GUC_WA_POLLCS; 146 + 147 + /* Wa_16011759253 */ 148 + /* Wa_22011383443 */ 149 + if (IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G10, STEP_A0, STEP_B0) || 150 + IS_PLATFORM_STEP(xe, XE_PVC, STEP_A0, STEP_B0)) 151 + flags |= GUC_WA_GAM_CREDITS; 152 + 153 + /* Wa_14014475959 */ 154 + if (IS_PLATFORM_STEP(xe, XE_METEORLAKE, STEP_A0, STEP_B0) || 155 + xe->info.platform == XE_DG2) 156 + flags |= GUC_WA_HOLD_CCS_SWITCHOUT; 157 + 158 + /* 159 + * Wa_14012197797 160 + * Wa_22011391025 161 + * 162 + * The same WA bit is used for both and 22011391025 is applicable to 163 + * all DG2. 164 + */ 165 + if (xe->info.platform == XE_DG2) 166 + flags |= GUC_WA_DUAL_QUEUE; 167 + 168 + /* 169 + * Wa_2201180203 170 + * GUC_WA_PRE_PARSER causes media workload hang for PVC A0 and PCIe 171 + * errors. Disable this for PVC A0 steppings. 172 + */ 173 + if (GRAPHICS_VER(xe) <= 12 && 174 + !IS_PLATFORM_STEP(xe, XE_PVC, STEP_A0, STEP_B0)) 175 + flags |= GUC_WA_PRE_PARSER; 176 + 177 + /* Wa_16011777198 */ 178 + if (IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G10, STEP_A0, STEP_C0) || 179 + IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G11, STEP_A0, 180 + STEP_B0)) 181 + flags |= GUC_WA_RCS_RESET_BEFORE_RC6; 182 + 183 + /* 184 + * Wa_22012727170 185 + * Wa_22012727685 186 + * 187 + * This WA is applicable to PVC CT A0, but causes media regressions. 188 + * Drop the WA for PVC. 189 + */ 190 + if (IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G10, STEP_A0, STEP_C0) || 191 + IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G11, STEP_A0, 192 + STEP_FOREVER)) 193 + flags |= GUC_WA_CONTEXT_ISOLATION; 194 + 195 + /* Wa_16015675438, Wa_18020744125 */ 196 + if (!xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_RENDER)) 197 + flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST; 198 + 199 + /* Wa_1509372804 */ 200 + if (IS_PLATFORM_STEP(xe, XE_PVC, STEP_A0, STEP_C0)) 201 + flags |= GUC_WA_RENDER_RST_RC6_EXIT; 202 + 203 + 204 + return flags; 205 + } 206 + 207 + static u32 guc_ctl_devid(struct xe_guc *guc) 208 + { 209 + struct xe_device *xe = guc_to_xe(guc); 210 + 211 + return (((u32)xe->info.devid) << 16) | xe->info.revid; 212 + } 213 + 214 + static void guc_init_params(struct xe_guc *guc) 215 + { 216 + struct xe_device *xe = guc_to_xe(guc); 217 + u32 *params = guc->params; 218 + int i; 219 + 220 + BUILD_BUG_ON(sizeof(guc->params) != GUC_CTL_MAX_DWORDS * sizeof(u32)); 221 + BUILD_BUG_ON(SOFT_SCRATCH_COUNT != GUC_CTL_MAX_DWORDS + 2); 222 + 223 + params[GUC_CTL_LOG_PARAMS] = guc_ctl_log_params_flags(guc); 224 + params[GUC_CTL_FEATURE] = guc_ctl_feature_flags(guc); 225 + params[GUC_CTL_DEBUG] = guc_ctl_debug_flags(guc); 226 + params[GUC_CTL_ADS] = guc_ctl_ads_flags(guc); 227 + params[GUC_CTL_WA] = guc_ctl_wa_flags(guc); 228 + params[GUC_CTL_DEVID] = guc_ctl_devid(guc); 229 + 230 + for (i = 0; i < GUC_CTL_MAX_DWORDS; i++) 231 + drm_dbg(&xe->drm, "GuC param[%2d] = 0x%08x\n", i, params[i]); 232 + } 233 + 234 + /* 235 + * Initialise the GuC parameter block before starting the firmware 236 + * transfer. These parameters are read by the firmware on startup 237 + * and cannot be changed thereafter. 238 + */ 239 + void guc_write_params(struct xe_guc *guc) 240 + { 241 + struct xe_gt *gt = guc_to_gt(guc); 242 + int i; 243 + 244 + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); 245 + 246 + xe_mmio_write32(gt, SOFT_SCRATCH(0).reg, 0); 247 + 248 + for (i = 0; i < GUC_CTL_MAX_DWORDS; i++) 249 + xe_mmio_write32(gt, SOFT_SCRATCH(1 + i).reg, guc->params[i]); 250 + } 251 + 252 + #define MEDIA_GUC_HOST_INTERRUPT _MMIO(0x190304) 253 + 254 + int xe_guc_init(struct xe_guc *guc) 255 + { 256 + struct xe_device *xe = guc_to_xe(guc); 257 + struct xe_gt *gt = guc_to_gt(guc); 258 + int ret; 259 + 260 + guc->fw.type = XE_UC_FW_TYPE_GUC; 261 + ret = xe_uc_fw_init(&guc->fw); 262 + if (ret) 263 + goto out; 264 + 265 + ret = xe_guc_log_init(&guc->log); 266 + if (ret) 267 + goto out; 268 + 269 + ret = xe_guc_ads_init(&guc->ads); 270 + if (ret) 271 + goto out; 272 + 273 + ret = xe_guc_ct_init(&guc->ct); 274 + if (ret) 275 + goto out; 276 + 277 + ret = xe_guc_pc_init(&guc->pc); 278 + if (ret) 279 + goto out; 280 + 281 + guc_init_params(guc); 282 + 283 + if (xe_gt_is_media_type(gt)) 284 + guc->notify_reg = MEDIA_GUC_HOST_INTERRUPT.reg; 285 + else 286 + guc->notify_reg = GEN11_GUC_HOST_INTERRUPT.reg; 287 + 288 + xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE); 289 + 290 + return 0; 291 + 292 + out: 293 + drm_err(&xe->drm, "GuC init failed with %d", ret); 294 + return ret; 295 + } 296 + 297 + /** 298 + * xe_guc_init_post_hwconfig - initialize GuC post hwconfig load 299 + * @guc: The GuC object 300 + * 301 + * Return: 0 on success, negative error code on error. 302 + */ 303 + int xe_guc_init_post_hwconfig(struct xe_guc *guc) 304 + { 305 + return xe_guc_ads_init_post_hwconfig(&guc->ads); 306 + } 307 + 308 + int xe_guc_post_load_init(struct xe_guc *guc) 309 + { 310 + xe_guc_ads_populate_post_load(&guc->ads); 311 + 312 + return 0; 313 + } 314 + 315 + int xe_guc_reset(struct xe_guc *guc) 316 + { 317 + struct xe_device *xe = guc_to_xe(guc); 318 + struct xe_gt *gt = guc_to_gt(guc); 319 + u32 guc_status; 320 + int ret; 321 + 322 + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); 323 + 324 + xe_mmio_write32(gt, GEN6_GDRST.reg, GEN11_GRDOM_GUC); 325 + 326 + ret = xe_mmio_wait32(gt, GEN6_GDRST.reg, 0, GEN11_GRDOM_GUC, 5); 327 + if (ret) { 328 + drm_err(&xe->drm, "GuC reset timed out, GEN6_GDRST=0x%8x\n", 329 + xe_mmio_read32(gt, GEN6_GDRST.reg)); 330 + goto err_out; 331 + } 332 + 333 + guc_status = xe_mmio_read32(gt, GUC_STATUS.reg); 334 + if (!(guc_status & GS_MIA_IN_RESET)) { 335 + drm_err(&xe->drm, 336 + "GuC status: 0x%x, MIA core expected to be in reset\n", 337 + guc_status); 338 + ret = -EIO; 339 + goto err_out; 340 + } 341 + 342 + return 0; 343 + 344 + err_out: 345 + 346 + return ret; 347 + } 348 + 349 + static void guc_prepare_xfer(struct xe_guc *guc) 350 + { 351 + struct xe_gt *gt = guc_to_gt(guc); 352 + struct xe_device *xe = guc_to_xe(guc); 353 + u32 shim_flags = GUC_ENABLE_READ_CACHE_LOGIC | 354 + GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA | 355 + GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA | 356 + GUC_ENABLE_MIA_CLOCK_GATING; 357 + 358 + if (GRAPHICS_VERx100(xe) < 1250) 359 + shim_flags |= GUC_DISABLE_SRAM_INIT_TO_ZEROES | 360 + GUC_ENABLE_MIA_CACHING; 361 + 362 + if (xe->info.platform == XE_PVC) 363 + shim_flags |= PVC_GUC_MOCS_INDEX(PVC_MOCS_UC_INDEX); 364 + 365 + /* Must program this register before loading the ucode with DMA */ 366 + xe_mmio_write32(gt, GUC_SHIM_CONTROL.reg, shim_flags); 367 + 368 + xe_mmio_write32(gt, GEN9_GT_PM_CONFIG.reg, GT_DOORBELL_ENABLE); 369 + } 370 + 371 + /* 372 + * Supporting MMIO & in memory RSA 373 + */ 374 + static int guc_xfer_rsa(struct xe_guc *guc) 375 + { 376 + struct xe_gt *gt = guc_to_gt(guc); 377 + u32 rsa[UOS_RSA_SCRATCH_COUNT]; 378 + size_t copied; 379 + int i; 380 + 381 + if (guc->fw.rsa_size > 256) { 382 + u32 rsa_ggtt_addr = xe_bo_ggtt_addr(guc->fw.bo) + 383 + xe_uc_fw_rsa_offset(&guc->fw); 384 + xe_mmio_write32(gt, UOS_RSA_SCRATCH(0).reg, rsa_ggtt_addr); 385 + return 0; 386 + } 387 + 388 + copied = xe_uc_fw_copy_rsa(&guc->fw, rsa, sizeof(rsa)); 389 + if (copied < sizeof(rsa)) 390 + return -ENOMEM; 391 + 392 + for (i = 0; i < UOS_RSA_SCRATCH_COUNT; i++) 393 + xe_mmio_write32(gt, UOS_RSA_SCRATCH(i).reg, rsa[i]); 394 + 395 + return 0; 396 + } 397 + 398 + /* 399 + * Read the GuC status register (GUC_STATUS) and store it in the 400 + * specified location; then return a boolean indicating whether 401 + * the value matches either of two values representing completion 402 + * of the GuC boot process. 403 + * 404 + * This is used for polling the GuC status in a wait_for() 405 + * loop below. 406 + */ 407 + static bool guc_ready(struct xe_guc *guc, u32 *status) 408 + { 409 + u32 val = xe_mmio_read32(guc_to_gt(guc), GUC_STATUS.reg); 410 + u32 uk_val = REG_FIELD_GET(GS_UKERNEL_MASK, val); 411 + 412 + *status = val; 413 + return uk_val == XE_GUC_LOAD_STATUS_READY; 414 + } 415 + 416 + static int guc_wait_ucode(struct xe_guc *guc) 417 + { 418 + struct xe_device *xe = guc_to_xe(guc); 419 + u32 status; 420 + int ret; 421 + 422 + /* 423 + * Wait for the GuC to start up. 424 + * NB: Docs recommend not using the interrupt for completion. 425 + * Measurements indicate this should take no more than 20ms 426 + * (assuming the GT clock is at maximum frequency). So, a 427 + * timeout here indicates that the GuC has failed and is unusable. 428 + * (Higher levels of the driver may decide to reset the GuC and 429 + * attempt the ucode load again if this happens.) 430 + * 431 + * FIXME: There is a known (but exceedingly unlikely) race condition 432 + * where the asynchronous frequency management code could reduce 433 + * the GT clock while a GuC reload is in progress (during a full 434 + * GT reset). A fix is in progress but there are complex locking 435 + * issues to be resolved. In the meantime bump the timeout to 436 + * 200ms. Even at slowest clock, this should be sufficient. And 437 + * in the working case, a larger timeout makes no difference. 438 + */ 439 + ret = wait_for(guc_ready(guc, &status), 200); 440 + if (ret) { 441 + struct drm_device *drm = &xe->drm; 442 + struct drm_printer p = drm_info_printer(drm->dev); 443 + 444 + drm_info(drm, "GuC load failed: status = 0x%08X\n", status); 445 + drm_info(drm, "GuC load failed: status: Reset = %d, " 446 + "BootROM = 0x%02X, UKernel = 0x%02X, " 447 + "MIA = 0x%02X, Auth = 0x%02X\n", 448 + REG_FIELD_GET(GS_MIA_IN_RESET, status), 449 + REG_FIELD_GET(GS_BOOTROM_MASK, status), 450 + REG_FIELD_GET(GS_UKERNEL_MASK, status), 451 + REG_FIELD_GET(GS_MIA_MASK, status), 452 + REG_FIELD_GET(GS_AUTH_STATUS_MASK, status)); 453 + 454 + if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) { 455 + drm_info(drm, "GuC firmware signature verification failed\n"); 456 + ret = -ENOEXEC; 457 + } 458 + 459 + if (REG_FIELD_GET(GS_UKERNEL_MASK, status) == 460 + XE_GUC_LOAD_STATUS_EXCEPTION) { 461 + drm_info(drm, "GuC firmware exception. EIP: %#x\n", 462 + xe_mmio_read32(guc_to_gt(guc), 463 + SOFT_SCRATCH(13).reg)); 464 + ret = -ENXIO; 465 + } 466 + 467 + xe_guc_log_print(&guc->log, &p); 468 + } else { 469 + drm_dbg(&xe->drm, "GuC successfully loaded"); 470 + } 471 + 472 + return ret; 473 + } 474 + 475 + static int __xe_guc_upload(struct xe_guc *guc) 476 + { 477 + int ret; 478 + 479 + guc_write_params(guc); 480 + guc_prepare_xfer(guc); 481 + 482 + /* 483 + * Note that GuC needs the CSS header plus uKernel code to be copied 484 + * by the DMA engine in one operation, whereas the RSA signature is 485 + * loaded separately, either by copying it to the UOS_RSA_SCRATCH 486 + * register (if key size <= 256) or through a ggtt-pinned vma (if key 487 + * size > 256). The RSA size and therefore the way we provide it to the 488 + * HW is fixed for each platform and hard-coded in the bootrom. 489 + */ 490 + ret = guc_xfer_rsa(guc); 491 + if (ret) 492 + goto out; 493 + /* 494 + * Current uCode expects the code to be loaded at 8k; locations below 495 + * this are used for the stack. 496 + */ 497 + ret = xe_uc_fw_upload(&guc->fw, 0x2000, UOS_MOVE); 498 + if (ret) 499 + goto out; 500 + 501 + /* Wait for authentication */ 502 + ret = guc_wait_ucode(guc); 503 + if (ret) 504 + goto out; 505 + 506 + xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_RUNNING); 507 + return 0; 508 + 509 + out: 510 + xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOAD_FAIL); 511 + return 0 /* FIXME: ret, don't want to stop load currently */; 512 + } 513 + 514 + /** 515 + * xe_guc_min_load_for_hwconfig - load minimal GuC and read hwconfig table 516 + * @guc: The GuC object 517 + * 518 + * This function uploads a minimal GuC that does not support submissions but 519 + * in a state where the hwconfig table can be read. Next, it reads and parses 520 + * the hwconfig table so it can be used for subsequent steps in the driver load. 521 + * Lastly, it enables CT communication (XXX: this is needed for PFs/VFs only). 522 + * 523 + * Return: 0 on success, negative error code on error. 524 + */ 525 + int xe_guc_min_load_for_hwconfig(struct xe_guc *guc) 526 + { 527 + int ret; 528 + 529 + xe_guc_ads_populate_minimal(&guc->ads); 530 + 531 + ret = __xe_guc_upload(guc); 532 + if (ret) 533 + return ret; 534 + 535 + ret = xe_guc_hwconfig_init(guc); 536 + if (ret) 537 + return ret; 538 + 539 + ret = xe_guc_enable_communication(guc); 540 + if (ret) 541 + return ret; 542 + 543 + return 0; 544 + } 545 + 546 + int xe_guc_upload(struct xe_guc *guc) 547 + { 548 + xe_guc_ads_populate(&guc->ads); 549 + 550 + return __xe_guc_upload(guc); 551 + } 552 + 553 + static void guc_handle_mmio_msg(struct xe_guc *guc) 554 + { 555 + struct xe_gt *gt = guc_to_gt(guc); 556 + u32 msg; 557 + 558 + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); 559 + 560 + msg = xe_mmio_read32(gt, SOFT_SCRATCH(15).reg); 561 + msg &= XE_GUC_RECV_MSG_EXCEPTION | 562 + XE_GUC_RECV_MSG_CRASH_DUMP_POSTED; 563 + xe_mmio_write32(gt, SOFT_SCRATCH(15).reg, 0); 564 + 565 + if (msg & XE_GUC_RECV_MSG_CRASH_DUMP_POSTED) 566 + drm_err(&guc_to_xe(guc)->drm, 567 + "Received early GuC crash dump notification!\n"); 568 + 569 + if (msg & XE_GUC_RECV_MSG_EXCEPTION) 570 + drm_err(&guc_to_xe(guc)->drm, 571 + "Received early GuC exception notification!\n"); 572 + } 573 + 574 + void guc_enable_irq(struct xe_guc *guc) 575 + { 576 + struct xe_gt *gt = guc_to_gt(guc); 577 + u32 events = xe_gt_is_media_type(gt) ? 578 + REG_FIELD_PREP(ENGINE0_MASK, GUC_INTR_GUC2HOST) : 579 + REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST); 580 + 581 + xe_mmio_write32(gt, GEN11_GUC_SG_INTR_ENABLE.reg, 582 + REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST)); 583 + if (xe_gt_is_media_type(gt)) 584 + xe_mmio_rmw32(gt, GEN11_GUC_SG_INTR_MASK.reg, events, 0); 585 + else 586 + xe_mmio_write32(gt, GEN11_GUC_SG_INTR_MASK.reg, ~events); 587 + } 588 + 589 + int xe_guc_enable_communication(struct xe_guc *guc) 590 + { 591 + int err; 592 + 593 + guc_enable_irq(guc); 594 + 595 + xe_mmio_rmw32(guc_to_gt(guc), GEN6_PMINTRMSK.reg, 596 + ARAT_EXPIRED_INTRMSK, 0); 597 + 598 + err = xe_guc_ct_enable(&guc->ct); 599 + if (err) 600 + return err; 601 + 602 + guc_handle_mmio_msg(guc); 603 + 604 + return 0; 605 + } 606 + 607 + int xe_guc_suspend(struct xe_guc *guc) 608 + { 609 + int ret; 610 + u32 action[] = { 611 + XE_GUC_ACTION_CLIENT_SOFT_RESET, 612 + }; 613 + 614 + ret = xe_guc_send_mmio(guc, action, ARRAY_SIZE(action)); 615 + if (ret) { 616 + drm_err(&guc_to_xe(guc)->drm, 617 + "GuC suspend: CLIENT_SOFT_RESET fail: %d!\n", ret); 618 + return ret; 619 + } 620 + 621 + xe_guc_sanitize(guc); 622 + return 0; 623 + } 624 + 625 + void xe_guc_notify(struct xe_guc *guc) 626 + { 627 + struct xe_gt *gt = guc_to_gt(guc); 628 + 629 + xe_mmio_write32(gt, guc->notify_reg, GUC_SEND_TRIGGER); 630 + } 631 + 632 + int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr) 633 + { 634 + u32 action[] = { 635 + XE_GUC_ACTION_AUTHENTICATE_HUC, 636 + rsa_addr 637 + }; 638 + 639 + return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action)); 640 + } 641 + 642 + #define MEDIA_SOFT_SCRATCH(n) _MMIO(0x190310 + (n) * 4) 643 + #define MEDIA_SOFT_SCRATCH_COUNT 4 644 + 645 + int xe_guc_send_mmio(struct xe_guc *guc, const u32 *request, u32 len) 646 + { 647 + struct xe_device *xe = guc_to_xe(guc); 648 + struct xe_gt *gt = guc_to_gt(guc); 649 + u32 header; 650 + u32 reply_reg = xe_gt_is_media_type(gt) ? 651 + MEDIA_SOFT_SCRATCH(0).reg : GEN11_SOFT_SCRATCH(0).reg; 652 + int ret; 653 + int i; 654 + 655 + XE_BUG_ON(guc->ct.enabled); 656 + XE_BUG_ON(!len); 657 + XE_BUG_ON(len > GEN11_SOFT_SCRATCH_COUNT); 658 + XE_BUG_ON(len > MEDIA_SOFT_SCRATCH_COUNT); 659 + XE_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, request[0]) != 660 + GUC_HXG_ORIGIN_HOST); 661 + XE_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_TYPE, request[0]) != 662 + GUC_HXG_TYPE_REQUEST); 663 + 664 + retry: 665 + /* Not in critical data-path, just do if else for GT type */ 666 + if (xe_gt_is_media_type(gt)) { 667 + for (i = 0; i < len; ++i) 668 + xe_mmio_write32(gt, MEDIA_SOFT_SCRATCH(i).reg, 669 + request[i]); 670 + #define LAST_INDEX MEDIA_SOFT_SCRATCH_COUNT - 1 671 + xe_mmio_read32(gt, MEDIA_SOFT_SCRATCH(LAST_INDEX).reg); 672 + } else { 673 + for (i = 0; i < len; ++i) 674 + xe_mmio_write32(gt, GEN11_SOFT_SCRATCH(i).reg, 675 + request[i]); 676 + #undef LAST_INDEX 677 + #define LAST_INDEX GEN11_SOFT_SCRATCH_COUNT - 1 678 + xe_mmio_read32(gt, GEN11_SOFT_SCRATCH(LAST_INDEX).reg); 679 + } 680 + 681 + xe_guc_notify(guc); 682 + 683 + ret = xe_mmio_wait32(gt, reply_reg, 684 + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, 685 + GUC_HXG_ORIGIN_GUC), 686 + GUC_HXG_MSG_0_ORIGIN, 687 + 50); 688 + if (ret) { 689 + timeout: 690 + drm_err(&xe->drm, "mmio request 0x%08x: no reply 0x%08x\n", 691 + request[0], xe_mmio_read32(gt, reply_reg)); 692 + return ret; 693 + } 694 + 695 + header = xe_mmio_read32(gt, reply_reg); 696 + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) == 697 + GUC_HXG_TYPE_NO_RESPONSE_BUSY) { 698 + #define done ({ header = xe_mmio_read32(gt, reply_reg); \ 699 + FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) != \ 700 + GUC_HXG_ORIGIN_GUC || \ 701 + FIELD_GET(GUC_HXG_MSG_0_TYPE, header) != \ 702 + GUC_HXG_TYPE_NO_RESPONSE_BUSY; }) 703 + 704 + ret = wait_for(done, 1000); 705 + if (unlikely(ret)) 706 + goto timeout; 707 + if (unlikely(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) != 708 + GUC_HXG_ORIGIN_GUC)) 709 + goto proto; 710 + #undef done 711 + } 712 + 713 + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) == 714 + GUC_HXG_TYPE_NO_RESPONSE_RETRY) { 715 + u32 reason = FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, header); 716 + 717 + drm_dbg(&xe->drm, "mmio request %#x: retrying, reason %u\n", 718 + request[0], reason); 719 + goto retry; 720 + } 721 + 722 + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) == 723 + GUC_HXG_TYPE_RESPONSE_FAILURE) { 724 + u32 hint = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, header); 725 + u32 error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, header); 726 + 727 + drm_err(&xe->drm, "mmio request %#x: failure %x/%u\n", 728 + request[0], error, hint); 729 + return -ENXIO; 730 + } 731 + 732 + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) != 733 + GUC_HXG_TYPE_RESPONSE_SUCCESS) { 734 + proto: 735 + drm_err(&xe->drm, "mmio request %#x: unexpected reply %#x\n", 736 + request[0], header); 737 + return -EPROTO; 738 + } 739 + 740 + /* Use data from the GuC response as our return value */ 741 + return FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, header); 742 + } 743 + 744 + static int guc_self_cfg(struct xe_guc *guc, u16 key, u16 len, u64 val) 745 + { 746 + u32 request[HOST2GUC_SELF_CFG_REQUEST_MSG_LEN] = { 747 + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | 748 + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | 749 + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, 750 + GUC_ACTION_HOST2GUC_SELF_CFG), 751 + FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY, key) | 752 + FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_LEN, len), 753 + FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_2_VALUE32, 754 + lower_32_bits(val)), 755 + FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_3_VALUE64, 756 + upper_32_bits(val)), 757 + }; 758 + int ret; 759 + 760 + XE_BUG_ON(len > 2); 761 + XE_BUG_ON(len == 1 && upper_32_bits(val)); 762 + 763 + /* Self config must go over MMIO */ 764 + ret = xe_guc_send_mmio(guc, request, ARRAY_SIZE(request)); 765 + 766 + if (unlikely(ret < 0)) 767 + return ret; 768 + if (unlikely(ret > 1)) 769 + return -EPROTO; 770 + if (unlikely(!ret)) 771 + return -ENOKEY; 772 + 773 + return 0; 774 + } 775 + 776 + int xe_guc_self_cfg32(struct xe_guc *guc, u16 key, u32 val) 777 + { 778 + return guc_self_cfg(guc, key, 1, val); 779 + } 780 + 781 + int xe_guc_self_cfg64(struct xe_guc *guc, u16 key, u64 val) 782 + { 783 + return guc_self_cfg(guc, key, 2, val); 784 + } 785 + 786 + void xe_guc_irq_handler(struct xe_guc *guc, const u16 iir) 787 + { 788 + if (iir & GUC_INTR_GUC2HOST) 789 + xe_guc_ct_irq_handler(&guc->ct); 790 + } 791 + 792 + void xe_guc_sanitize(struct xe_guc *guc) 793 + { 794 + xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE); 795 + xe_guc_ct_disable(&guc->ct); 796 + } 797 + 798 + int xe_guc_reset_prepare(struct xe_guc *guc) 799 + { 800 + return xe_guc_submit_reset_prepare(guc); 801 + } 802 + 803 + void xe_guc_reset_wait(struct xe_guc *guc) 804 + { 805 + xe_guc_submit_reset_wait(guc); 806 + } 807 + 808 + void xe_guc_stop_prepare(struct xe_guc *guc) 809 + { 810 + XE_WARN_ON(xe_guc_pc_stop(&guc->pc)); 811 + } 812 + 813 + int xe_guc_stop(struct xe_guc *guc) 814 + { 815 + int ret; 816 + 817 + xe_guc_ct_disable(&guc->ct); 818 + 819 + ret = xe_guc_submit_stop(guc); 820 + if (ret) 821 + return ret; 822 + 823 + return 0; 824 + } 825 + 826 + int xe_guc_start(struct xe_guc *guc) 827 + { 828 + int ret; 829 + 830 + ret = xe_guc_submit_start(guc); 831 + if (ret) 832 + return ret; 833 + 834 + ret = xe_guc_pc_start(&guc->pc); 835 + XE_WARN_ON(ret); 836 + 837 + return 0; 838 + } 839 + 840 + void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p) 841 + { 842 + struct xe_gt *gt = guc_to_gt(guc); 843 + u32 status; 844 + int err; 845 + int i; 846 + 847 + xe_uc_fw_print(&guc->fw, p); 848 + 849 + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 850 + if (err) 851 + return; 852 + 853 + status = xe_mmio_read32(gt, GUC_STATUS.reg); 854 + 855 + drm_printf(p, "\nGuC status 0x%08x:\n", status); 856 + drm_printf(p, "\tBootrom status = 0x%x\n", 857 + (status & GS_BOOTROM_MASK) >> GS_BOOTROM_SHIFT); 858 + drm_printf(p, "\tuKernel status = 0x%x\n", 859 + (status & GS_UKERNEL_MASK) >> GS_UKERNEL_SHIFT); 860 + drm_printf(p, "\tMIA Core status = 0x%x\n", 861 + (status & GS_MIA_MASK) >> GS_MIA_SHIFT); 862 + drm_printf(p, "\tLog level = %d\n", 863 + xe_guc_log_get_level(&guc->log)); 864 + 865 + drm_puts(p, "\nScratch registers:\n"); 866 + for (i = 0; i < SOFT_SCRATCH_COUNT; i++) { 867 + drm_printf(p, "\t%2d: \t0x%x\n", 868 + i, xe_mmio_read32(gt, SOFT_SCRATCH(i).reg)); 869 + } 870 + 871 + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 872 + 873 + xe_guc_ct_print(&guc->ct, p); 874 + xe_guc_submit_print(guc, p); 875 + }

+57

drivers/gpu/drm/xe/xe_guc.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_GUC_H_ 7 + #define _XE_GUC_H_ 8 + 9 + #include "xe_hw_engine_types.h" 10 + #include "xe_guc_types.h" 11 + #include "xe_macros.h" 12 + 13 + struct drm_printer; 14 + 15 + int xe_guc_init(struct xe_guc *guc); 16 + int xe_guc_init_post_hwconfig(struct xe_guc *guc); 17 + int xe_guc_post_load_init(struct xe_guc *guc); 18 + int xe_guc_reset(struct xe_guc *guc); 19 + int xe_guc_upload(struct xe_guc *guc); 20 + int xe_guc_min_load_for_hwconfig(struct xe_guc *guc); 21 + int xe_guc_enable_communication(struct xe_guc *guc); 22 + int xe_guc_suspend(struct xe_guc *guc); 23 + void xe_guc_notify(struct xe_guc *guc); 24 + int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr); 25 + int xe_guc_send_mmio(struct xe_guc *guc, const u32 *request, u32 len); 26 + int xe_guc_self_cfg32(struct xe_guc *guc, u16 key, u32 val); 27 + int xe_guc_self_cfg64(struct xe_guc *guc, u16 key, u64 val); 28 + void xe_guc_irq_handler(struct xe_guc *guc, const u16 iir); 29 + void xe_guc_sanitize(struct xe_guc *guc); 30 + void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p); 31 + int xe_guc_reset_prepare(struct xe_guc *guc); 32 + void xe_guc_reset_wait(struct xe_guc *guc); 33 + void xe_guc_stop_prepare(struct xe_guc *guc); 34 + int xe_guc_stop(struct xe_guc *guc); 35 + int xe_guc_start(struct xe_guc *guc); 36 + 37 + static inline u16 xe_engine_class_to_guc_class(enum xe_engine_class class) 38 + { 39 + switch (class) { 40 + case XE_ENGINE_CLASS_RENDER: 41 + return GUC_RENDER_CLASS; 42 + case XE_ENGINE_CLASS_VIDEO_DECODE: 43 + return GUC_VIDEO_CLASS; 44 + case XE_ENGINE_CLASS_VIDEO_ENHANCE: 45 + return GUC_VIDEOENHANCE_CLASS; 46 + case XE_ENGINE_CLASS_COPY: 47 + return GUC_BLITTER_CLASS; 48 + case XE_ENGINE_CLASS_COMPUTE: 49 + return GUC_COMPUTE_CLASS; 50 + case XE_ENGINE_CLASS_OTHER: 51 + default: 52 + XE_WARN_ON(class); 53 + return -1; 54 + } 55 + } 56 + 57 + #endif

+676

drivers/gpu/drm/xe/xe_guc_ads.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include <drm/drm_managed.h> 7 + 8 + #include "xe_bo.h" 9 + #include "xe_gt.h" 10 + #include "xe_guc.h" 11 + #include "xe_guc_ads.h" 12 + #include "xe_guc_reg.h" 13 + #include "xe_hw_engine.h" 14 + #include "xe_lrc.h" 15 + #include "xe_map.h" 16 + #include "xe_mmio.h" 17 + #include "xe_platform_types.h" 18 + #include "gt/intel_gt_regs.h" 19 + #include "gt/intel_engine_regs.h" 20 + 21 + /* Slack of a few additional entries per engine */ 22 + #define ADS_REGSET_EXTRA_MAX 8 23 + 24 + static struct xe_guc * 25 + ads_to_guc(struct xe_guc_ads *ads) 26 + { 27 + return container_of(ads, struct xe_guc, ads); 28 + } 29 + 30 + static struct xe_gt * 31 + ads_to_gt(struct xe_guc_ads *ads) 32 + { 33 + return container_of(ads, struct xe_gt, uc.guc.ads); 34 + } 35 + 36 + static struct xe_device * 37 + ads_to_xe(struct xe_guc_ads *ads) 38 + { 39 + return gt_to_xe(ads_to_gt(ads)); 40 + } 41 + 42 + static struct iosys_map * 43 + ads_to_map(struct xe_guc_ads *ads) 44 + { 45 + return &ads->bo->vmap; 46 + } 47 + 48 + /* UM Queue parameters: */ 49 + #define GUC_UM_QUEUE_SIZE (SZ_64K) 50 + #define GUC_PAGE_RES_TIMEOUT_US (-1) 51 + 52 + /* 53 + * The Additional Data Struct (ADS) has pointers for different buffers used by 54 + * the GuC. One single gem object contains the ADS struct itself (guc_ads) and 55 + * all the extra buffers indirectly linked via the ADS struct's entries. 56 + * 57 + * Layout of the ADS blob allocated for the GuC: 58 + * 59 + * +---------------------------------------+ <== base 60 + * | guc_ads | 61 + * +---------------------------------------+ 62 + * | guc_policies | 63 + * +---------------------------------------+ 64 + * | guc_gt_system_info | 65 + * +---------------------------------------+ 66 + * | guc_engine_usage | 67 + * +---------------------------------------+ 68 + * | guc_um_init_params | 69 + * +---------------------------------------+ <== static 70 + * | guc_mmio_reg[countA] (engine 0.0) | 71 + * | guc_mmio_reg[countB] (engine 0.1) | 72 + * | guc_mmio_reg[countC] (engine 1.0) | 73 + * | ... | 74 + * +---------------------------------------+ <== dynamic 75 + * | padding | 76 + * +---------------------------------------+ <== 4K aligned 77 + * | golden contexts | 78 + * +---------------------------------------+ 79 + * | padding | 80 + * +---------------------------------------+ <== 4K aligned 81 + * | capture lists | 82 + * +---------------------------------------+ 83 + * | padding | 84 + * +---------------------------------------+ <== 4K aligned 85 + * | UM queues | 86 + * +---------------------------------------+ 87 + * | padding | 88 + * +---------------------------------------+ <== 4K aligned 89 + * | private data | 90 + * +---------------------------------------+ 91 + * | padding | 92 + * +---------------------------------------+ <== 4K aligned 93 + */ 94 + struct __guc_ads_blob { 95 + struct guc_ads ads; 96 + struct guc_policies policies; 97 + struct guc_gt_system_info system_info; 98 + struct guc_engine_usage engine_usage; 99 + struct guc_um_init_params um_init_params; 100 + /* From here on, location is dynamic! Refer to above diagram. */ 101 + struct guc_mmio_reg regset[0]; 102 + } __packed; 103 + 104 + #define ads_blob_read(ads_, field_) \ 105 + xe_map_rd_field(ads_to_xe(ads_), ads_to_map(ads_), 0, \ 106 + struct __guc_ads_blob, field_) 107 + 108 + #define ads_blob_write(ads_, field_, val_) \ 109 + xe_map_wr_field(ads_to_xe(ads_), ads_to_map(ads_), 0, \ 110 + struct __guc_ads_blob, field_, val_) 111 + 112 + #define info_map_write(xe_, map_, field_, val_) \ 113 + xe_map_wr_field(xe_, map_, 0, struct guc_gt_system_info, field_, val_) 114 + 115 + #define info_map_read(xe_, map_, field_) \ 116 + xe_map_rd_field(xe_, map_, 0, struct guc_gt_system_info, field_) 117 + 118 + static size_t guc_ads_regset_size(struct xe_guc_ads *ads) 119 + { 120 + XE_BUG_ON(!ads->regset_size); 121 + 122 + return ads->regset_size; 123 + } 124 + 125 + static size_t guc_ads_golden_lrc_size(struct xe_guc_ads *ads) 126 + { 127 + return PAGE_ALIGN(ads->golden_lrc_size); 128 + } 129 + 130 + static size_t guc_ads_capture_size(struct xe_guc_ads *ads) 131 + { 132 + /* FIXME: Allocate a proper capture list */ 133 + return PAGE_ALIGN(PAGE_SIZE); 134 + } 135 + 136 + static size_t guc_ads_um_queues_size(struct xe_guc_ads *ads) 137 + { 138 + struct xe_device *xe = ads_to_xe(ads); 139 + 140 + if (!xe->info.supports_usm) 141 + return 0; 142 + 143 + return GUC_UM_QUEUE_SIZE * GUC_UM_HW_QUEUE_MAX; 144 + } 145 + 146 + static size_t guc_ads_private_data_size(struct xe_guc_ads *ads) 147 + { 148 + return PAGE_ALIGN(ads_to_guc(ads)->fw.private_data_size); 149 + } 150 + 151 + static size_t guc_ads_regset_offset(struct xe_guc_ads *ads) 152 + { 153 + return offsetof(struct __guc_ads_blob, regset); 154 + } 155 + 156 + static size_t guc_ads_golden_lrc_offset(struct xe_guc_ads *ads) 157 + { 158 + size_t offset; 159 + 160 + offset = guc_ads_regset_offset(ads) + 161 + guc_ads_regset_size(ads); 162 + 163 + return PAGE_ALIGN(offset); 164 + } 165 + 166 + static size_t guc_ads_capture_offset(struct xe_guc_ads *ads) 167 + { 168 + size_t offset; 169 + 170 + offset = guc_ads_golden_lrc_offset(ads) + 171 + guc_ads_golden_lrc_size(ads); 172 + 173 + return PAGE_ALIGN(offset); 174 + } 175 + 176 + static size_t guc_ads_um_queues_offset(struct xe_guc_ads *ads) 177 + { 178 + u32 offset; 179 + 180 + offset = guc_ads_capture_offset(ads) + 181 + guc_ads_capture_size(ads); 182 + 183 + return PAGE_ALIGN(offset); 184 + } 185 + 186 + static size_t guc_ads_private_data_offset(struct xe_guc_ads *ads) 187 + { 188 + size_t offset; 189 + 190 + offset = guc_ads_um_queues_offset(ads) + 191 + guc_ads_um_queues_size(ads); 192 + 193 + return PAGE_ALIGN(offset); 194 + } 195 + 196 + static size_t guc_ads_size(struct xe_guc_ads *ads) 197 + { 198 + return guc_ads_private_data_offset(ads) + 199 + guc_ads_private_data_size(ads); 200 + } 201 + 202 + static void guc_ads_fini(struct drm_device *drm, void *arg) 203 + { 204 + struct xe_guc_ads *ads = arg; 205 + 206 + xe_bo_unpin_map_no_vm(ads->bo); 207 + } 208 + 209 + static size_t calculate_regset_size(struct xe_gt *gt) 210 + { 211 + struct xe_reg_sr_entry *sr_entry; 212 + unsigned long sr_idx; 213 + struct xe_hw_engine *hwe; 214 + enum xe_hw_engine_id id; 215 + unsigned int count = 0; 216 + 217 + for_each_hw_engine(hwe, gt, id) 218 + xa_for_each(&hwe->reg_sr.xa, sr_idx, sr_entry) 219 + count++; 220 + 221 + count += (ADS_REGSET_EXTRA_MAX + LNCFCMOCS_REG_COUNT) * XE_NUM_HW_ENGINES; 222 + 223 + return count * sizeof(struct guc_mmio_reg); 224 + } 225 + 226 + static u32 engine_enable_mask(struct xe_gt *gt, enum xe_engine_class class) 227 + { 228 + struct xe_hw_engine *hwe; 229 + enum xe_hw_engine_id id; 230 + u32 mask = 0; 231 + 232 + for_each_hw_engine(hwe, gt, id) 233 + if (hwe->class == class) 234 + mask |= BIT(hwe->instance); 235 + 236 + return mask; 237 + } 238 + 239 + static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads) 240 + { 241 + struct xe_device *xe = ads_to_xe(ads); 242 + struct xe_gt *gt = ads_to_gt(ads); 243 + size_t total_size = 0, alloc_size, real_size; 244 + int class; 245 + 246 + for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { 247 + if (class == XE_ENGINE_CLASS_OTHER) 248 + continue; 249 + 250 + if (!engine_enable_mask(gt, class)) 251 + continue; 252 + 253 + real_size = xe_lrc_size(xe, class); 254 + alloc_size = PAGE_ALIGN(real_size); 255 + total_size += alloc_size; 256 + } 257 + 258 + return total_size; 259 + } 260 + 261 + #define MAX_GOLDEN_LRC_SIZE (SZ_4K * 64) 262 + 263 + int xe_guc_ads_init(struct xe_guc_ads *ads) 264 + { 265 + struct xe_device *xe = ads_to_xe(ads); 266 + struct xe_gt *gt = ads_to_gt(ads); 267 + struct xe_bo *bo; 268 + int err; 269 + 270 + ads->golden_lrc_size = calculate_golden_lrc_size(ads); 271 + ads->regset_size = calculate_regset_size(gt); 272 + 273 + bo = xe_bo_create_pin_map(xe, gt, NULL, guc_ads_size(ads) + 274 + MAX_GOLDEN_LRC_SIZE, 275 + ttm_bo_type_kernel, 276 + XE_BO_CREATE_VRAM_IF_DGFX(gt) | 277 + XE_BO_CREATE_GGTT_BIT); 278 + if (IS_ERR(bo)) 279 + return PTR_ERR(bo); 280 + 281 + ads->bo = bo; 282 + 283 + err = drmm_add_action_or_reset(&xe->drm, guc_ads_fini, ads); 284 + if (err) 285 + return err; 286 + 287 + return 0; 288 + } 289 + 290 + /** 291 + * xe_guc_ads_init_post_hwconfig - initialize ADS post hwconfig load 292 + * @ads: Additional data structures object 293 + * 294 + * Recalcuate golden_lrc_size & regset_size as the number hardware engines may 295 + * have changed after the hwconfig was loaded. Also verify the new sizes fit in 296 + * the already allocated ADS buffer object. 297 + * 298 + * Return: 0 on success, negative error code on error. 299 + */ 300 + int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads) 301 + { 302 + struct xe_gt *gt = ads_to_gt(ads); 303 + u32 prev_regset_size = ads->regset_size; 304 + 305 + XE_BUG_ON(!ads->bo); 306 + 307 + ads->golden_lrc_size = calculate_golden_lrc_size(ads); 308 + ads->regset_size = calculate_regset_size(gt); 309 + 310 + XE_WARN_ON(ads->golden_lrc_size + 311 + (ads->regset_size - prev_regset_size) > 312 + MAX_GOLDEN_LRC_SIZE); 313 + 314 + return 0; 315 + } 316 + 317 + static void guc_policies_init(struct xe_guc_ads *ads) 318 + { 319 + ads_blob_write(ads, policies.dpc_promote_time, 320 + GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US); 321 + ads_blob_write(ads, policies.max_num_work_items, 322 + GLOBAL_POLICY_MAX_NUM_WI); 323 + ads_blob_write(ads, policies.global_flags, 0); 324 + ads_blob_write(ads, policies.is_valid, 1); 325 + } 326 + 327 + static void fill_engine_enable_masks(struct xe_gt *gt, 328 + struct iosys_map *info_map) 329 + { 330 + struct xe_device *xe = gt_to_xe(gt); 331 + 332 + info_map_write(xe, info_map, engine_enabled_masks[GUC_RENDER_CLASS], 333 + engine_enable_mask(gt, XE_ENGINE_CLASS_RENDER)); 334 + info_map_write(xe, info_map, engine_enabled_masks[GUC_BLITTER_CLASS], 335 + engine_enable_mask(gt, XE_ENGINE_CLASS_COPY)); 336 + info_map_write(xe, info_map, engine_enabled_masks[GUC_VIDEO_CLASS], 337 + engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_DECODE)); 338 + info_map_write(xe, info_map, 339 + engine_enabled_masks[GUC_VIDEOENHANCE_CLASS], 340 + engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_ENHANCE)); 341 + info_map_write(xe, info_map, engine_enabled_masks[GUC_COMPUTE_CLASS], 342 + engine_enable_mask(gt, XE_ENGINE_CLASS_COMPUTE)); 343 + } 344 + 345 + static void guc_prep_golden_lrc_null(struct xe_guc_ads *ads) 346 + { 347 + struct xe_device *xe = ads_to_xe(ads); 348 + struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), 349 + offsetof(struct __guc_ads_blob, system_info)); 350 + u8 guc_class; 351 + 352 + for (guc_class = 0; guc_class <= GUC_MAX_ENGINE_CLASSES; ++guc_class) { 353 + if (!info_map_read(xe, &info_map, 354 + engine_enabled_masks[guc_class])) 355 + continue; 356 + 357 + ads_blob_write(ads, ads.eng_state_size[guc_class], 358 + guc_ads_golden_lrc_size(ads) - 359 + xe_lrc_skip_size(xe)); 360 + ads_blob_write(ads, ads.golden_context_lrca[guc_class], 361 + xe_bo_ggtt_addr(ads->bo) + 362 + guc_ads_golden_lrc_offset(ads)); 363 + } 364 + } 365 + 366 + static void guc_mapping_table_init_invalid(struct xe_gt *gt, 367 + struct iosys_map *info_map) 368 + { 369 + struct xe_device *xe = gt_to_xe(gt); 370 + unsigned int i, j; 371 + 372 + /* Table must be set to invalid values for entries not used */ 373 + for (i = 0; i < GUC_MAX_ENGINE_CLASSES; ++i) 374 + for (j = 0; j < GUC_MAX_INSTANCES_PER_CLASS; ++j) 375 + info_map_write(xe, info_map, mapping_table[i][j], 376 + GUC_MAX_INSTANCES_PER_CLASS); 377 + } 378 + 379 + static void guc_mapping_table_init(struct xe_gt *gt, 380 + struct iosys_map *info_map) 381 + { 382 + struct xe_device *xe = gt_to_xe(gt); 383 + struct xe_hw_engine *hwe; 384 + enum xe_hw_engine_id id; 385 + 386 + guc_mapping_table_init_invalid(gt, info_map); 387 + 388 + for_each_hw_engine(hwe, gt, id) { 389 + u8 guc_class; 390 + 391 + guc_class = xe_engine_class_to_guc_class(hwe->class); 392 + info_map_write(xe, info_map, 393 + mapping_table[guc_class][hwe->logical_instance], 394 + hwe->instance); 395 + } 396 + } 397 + 398 + static void guc_capture_list_init(struct xe_guc_ads *ads) 399 + { 400 + int i, j; 401 + u32 addr = xe_bo_ggtt_addr(ads->bo) + guc_ads_capture_offset(ads); 402 + 403 + /* FIXME: Populate a proper capture list */ 404 + for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) { 405 + for (j = 0; j < GUC_MAX_ENGINE_CLASSES; j++) { 406 + ads_blob_write(ads, ads.capture_instance[i][j], addr); 407 + ads_blob_write(ads, ads.capture_class[i][j], addr); 408 + } 409 + 410 + ads_blob_write(ads, ads.capture_global[i], addr); 411 + } 412 + } 413 + 414 + static void guc_mmio_regset_write_one(struct xe_guc_ads *ads, 415 + struct iosys_map *regset_map, 416 + u32 reg, u32 flags, 417 + unsigned int n_entry) 418 + { 419 + struct guc_mmio_reg entry = { 420 + .offset = reg, 421 + .flags = flags, 422 + /* TODO: steering */ 423 + }; 424 + 425 + xe_map_memcpy_to(ads_to_xe(ads), regset_map, n_entry * sizeof(entry), 426 + &entry, sizeof(entry)); 427 + } 428 + 429 + static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads, 430 + struct iosys_map *regset_map, 431 + struct xe_hw_engine *hwe) 432 + { 433 + struct xe_hw_engine *hwe_rcs_reset_domain = 434 + xe_gt_any_hw_engine_by_reset_domain(hwe->gt, XE_ENGINE_CLASS_RENDER); 435 + struct xe_reg_sr_entry *entry; 436 + unsigned long idx; 437 + unsigned count = 0; 438 + const struct { 439 + u32 reg; 440 + u32 flags; 441 + bool skip; 442 + } *e, extra_regs[] = { 443 + { .reg = RING_MODE_GEN7(hwe->mmio_base).reg, }, 444 + { .reg = RING_HWS_PGA(hwe->mmio_base).reg, }, 445 + { .reg = RING_IMR(hwe->mmio_base).reg, }, 446 + { .reg = GEN12_RCU_MODE.reg, .flags = 0x3, 447 + .skip = hwe != hwe_rcs_reset_domain }, 448 + }; 449 + u32 i; 450 + 451 + BUILD_BUG_ON(ARRAY_SIZE(extra_regs) > ADS_REGSET_EXTRA_MAX); 452 + 453 + xa_for_each(&hwe->reg_sr.xa, idx, entry) { 454 + u32 flags = entry->masked_reg ? GUC_REGSET_MASKED : 0; 455 + 456 + guc_mmio_regset_write_one(ads, regset_map, idx, flags, count++); 457 + } 458 + 459 + for (e = extra_regs; e < extra_regs + ARRAY_SIZE(extra_regs); e++) { 460 + if (e->skip) 461 + continue; 462 + 463 + guc_mmio_regset_write_one(ads, regset_map, 464 + e->reg, e->flags, count++); 465 + } 466 + 467 + for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) { 468 + guc_mmio_regset_write_one(ads, regset_map, 469 + GEN9_LNCFCMOCS(i).reg, 0, count++); 470 + } 471 + 472 + XE_BUG_ON(ads->regset_size < (count * sizeof(struct guc_mmio_reg))); 473 + 474 + return count; 475 + } 476 + 477 + static void guc_mmio_reg_state_init(struct xe_guc_ads *ads) 478 + { 479 + size_t regset_offset = guc_ads_regset_offset(ads); 480 + struct xe_gt *gt = ads_to_gt(ads); 481 + struct xe_hw_engine *hwe; 482 + enum xe_hw_engine_id id; 483 + u32 addr = xe_bo_ggtt_addr(ads->bo) + regset_offset; 484 + struct iosys_map regset_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), 485 + regset_offset); 486 + 487 + for_each_hw_engine(hwe, gt, id) { 488 + unsigned int count; 489 + u8 gc; 490 + 491 + /* 492 + * 1. Write all MMIO entries for this engine to the table. No 493 + * need to worry about fused-off engines and when there are 494 + * entries in the regset: the reg_state_list has been zero'ed 495 + * by xe_guc_ads_populate() 496 + */ 497 + count = guc_mmio_regset_write(ads, &regset_map, hwe); 498 + if (!count) 499 + continue; 500 + 501 + /* 502 + * 2. Record in the header (ads.reg_state_list) the address 503 + * location and number of entries 504 + */ 505 + gc = xe_engine_class_to_guc_class(hwe->class); 506 + ads_blob_write(ads, ads.reg_state_list[gc][hwe->instance].address, addr); 507 + ads_blob_write(ads, ads.reg_state_list[gc][hwe->instance].count, count); 508 + 509 + addr += count * sizeof(struct guc_mmio_reg); 510 + iosys_map_incr(&regset_map, count * sizeof(struct guc_mmio_reg)); 511 + } 512 + } 513 + 514 + static void guc_um_init_params(struct xe_guc_ads *ads) 515 + { 516 + u32 um_queue_offset = guc_ads_um_queues_offset(ads); 517 + u64 base_dpa; 518 + u32 base_ggtt; 519 + int i; 520 + 521 + base_ggtt = xe_bo_ggtt_addr(ads->bo) + um_queue_offset; 522 + base_dpa = xe_bo_main_addr(ads->bo, PAGE_SIZE) + um_queue_offset; 523 + 524 + for (i = 0; i < GUC_UM_HW_QUEUE_MAX; ++i) { 525 + ads_blob_write(ads, um_init_params.queue_params[i].base_dpa, 526 + base_dpa + (i * GUC_UM_QUEUE_SIZE)); 527 + ads_blob_write(ads, um_init_params.queue_params[i].base_ggtt_address, 528 + base_ggtt + (i * GUC_UM_QUEUE_SIZE)); 529 + ads_blob_write(ads, um_init_params.queue_params[i].size_in_bytes, 530 + GUC_UM_QUEUE_SIZE); 531 + } 532 + 533 + ads_blob_write(ads, um_init_params.page_response_timeout_in_us, 534 + GUC_PAGE_RES_TIMEOUT_US); 535 + } 536 + 537 + static void guc_doorbell_init(struct xe_guc_ads *ads) 538 + { 539 + struct xe_device *xe = ads_to_xe(ads); 540 + struct xe_gt *gt = ads_to_gt(ads); 541 + 542 + if (GRAPHICS_VER(xe) >= 12 && !IS_DGFX(xe)) { 543 + u32 distdbreg = 544 + xe_mmio_read32(gt, GEN12_DIST_DBS_POPULATED.reg); 545 + 546 + ads_blob_write(ads, 547 + system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI], 548 + ((distdbreg >> GEN12_DOORBELLS_PER_SQIDI_SHIFT) 549 + & GEN12_DOORBELLS_PER_SQIDI) + 1); 550 + } 551 + } 552 + 553 + /** 554 + * xe_guc_ads_populate_minimal - populate minimal ADS 555 + * @ads: Additional data structures object 556 + * 557 + * This function populates a minimal ADS that does not support submissions but 558 + * enough so the GuC can load and the hwconfig table can be read. 559 + */ 560 + void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads) 561 + { 562 + struct xe_gt *gt = ads_to_gt(ads); 563 + struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), 564 + offsetof(struct __guc_ads_blob, system_info)); 565 + u32 base = xe_bo_ggtt_addr(ads->bo); 566 + 567 + XE_BUG_ON(!ads->bo); 568 + 569 + xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size); 570 + guc_policies_init(ads); 571 + guc_prep_golden_lrc_null(ads); 572 + guc_mapping_table_init_invalid(gt, &info_map); 573 + guc_doorbell_init(ads); 574 + 575 + ads_blob_write(ads, ads.scheduler_policies, base + 576 + offsetof(struct __guc_ads_blob, policies)); 577 + ads_blob_write(ads, ads.gt_system_info, base + 578 + offsetof(struct __guc_ads_blob, system_info)); 579 + ads_blob_write(ads, ads.private_data, base + 580 + guc_ads_private_data_offset(ads)); 581 + } 582 + 583 + void xe_guc_ads_populate(struct xe_guc_ads *ads) 584 + { 585 + struct xe_device *xe = ads_to_xe(ads); 586 + struct xe_gt *gt = ads_to_gt(ads); 587 + struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), 588 + offsetof(struct __guc_ads_blob, system_info)); 589 + u32 base = xe_bo_ggtt_addr(ads->bo); 590 + 591 + XE_BUG_ON(!ads->bo); 592 + 593 + xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size); 594 + guc_policies_init(ads); 595 + fill_engine_enable_masks(gt, &info_map); 596 + guc_mmio_reg_state_init(ads); 597 + guc_prep_golden_lrc_null(ads); 598 + guc_mapping_table_init(gt, &info_map); 599 + guc_capture_list_init(ads); 600 + guc_doorbell_init(ads); 601 + 602 + if (xe->info.supports_usm) { 603 + guc_um_init_params(ads); 604 + ads_blob_write(ads, ads.um_init_data, base + 605 + offsetof(struct __guc_ads_blob, um_init_params)); 606 + } 607 + 608 + ads_blob_write(ads, ads.scheduler_policies, base + 609 + offsetof(struct __guc_ads_blob, policies)); 610 + ads_blob_write(ads, ads.gt_system_info, base + 611 + offsetof(struct __guc_ads_blob, system_info)); 612 + ads_blob_write(ads, ads.private_data, base + 613 + guc_ads_private_data_offset(ads)); 614 + } 615 + 616 + static void guc_populate_golden_lrc(struct xe_guc_ads *ads) 617 + { 618 + struct xe_device *xe = ads_to_xe(ads); 619 + struct xe_gt *gt = ads_to_gt(ads); 620 + struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), 621 + offsetof(struct __guc_ads_blob, system_info)); 622 + size_t total_size = 0, alloc_size, real_size; 623 + u32 addr_ggtt, offset; 624 + int class; 625 + 626 + offset = guc_ads_golden_lrc_offset(ads); 627 + addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset; 628 + 629 + for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { 630 + u8 guc_class; 631 + 632 + if (class == XE_ENGINE_CLASS_OTHER) 633 + continue; 634 + 635 + guc_class = xe_engine_class_to_guc_class(class); 636 + 637 + if (!info_map_read(xe, &info_map, 638 + engine_enabled_masks[guc_class])) 639 + continue; 640 + 641 + XE_BUG_ON(!gt->default_lrc[class]); 642 + 643 + real_size = xe_lrc_size(xe, class); 644 + alloc_size = PAGE_ALIGN(real_size); 645 + total_size += alloc_size; 646 + 647 + /* 648 + * This interface is slightly confusing. We need to pass the 649 + * base address of the full golden context and the size of just 650 + * the engine state, which is the section of the context image 651 + * that starts after the execlists LRC registers. This is 652 + * required to allow the GuC to restore just the engine state 653 + * when a watchdog reset occurs. 654 + * We calculate the engine state size by removing the size of 655 + * what comes before it in the context image (which is identical 656 + * on all engines). 657 + */ 658 + ads_blob_write(ads, ads.eng_state_size[guc_class], 659 + real_size - xe_lrc_skip_size(xe)); 660 + ads_blob_write(ads, ads.golden_context_lrca[guc_class], 661 + addr_ggtt); 662 + 663 + xe_map_memcpy_to(xe, ads_to_map(ads), offset, 664 + gt->default_lrc[class], real_size); 665 + 666 + addr_ggtt += alloc_size; 667 + offset += alloc_size; 668 + } 669 + 670 + XE_BUG_ON(total_size != ads->golden_lrc_size); 671 + } 672 + 673 + void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads) 674 + { 675 + guc_populate_golden_lrc(ads); 676 + }

+17

drivers/gpu/drm/xe/xe_guc_ads.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_GUC_ADS_H_ 7 + #define _XE_GUC_ADS_H_ 8 + 9 + #include "xe_guc_ads_types.h" 10 + 11 + int xe_guc_ads_init(struct xe_guc_ads *ads); 12 + int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads); 13 + void xe_guc_ads_populate(struct xe_guc_ads *ads); 14 + void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads); 15 + void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads); 16 + 17 + #endif

+25

drivers/gpu/drm/xe/xe_guc_ads_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_GUC_ADS_TYPES_H_ 7 + #define _XE_GUC_ADS_TYPES_H_ 8 + 9 + #include <linux/types.h> 10 + 11 + struct xe_bo; 12 + 13 + /** 14 + * struct xe_guc_ads - GuC additional data structures (ADS) 15 + */ 16 + struct xe_guc_ads { 17 + /** @bo: XE BO for GuC ads blob */ 18 + struct xe_bo *bo; 19 + /** @golden_lrc_size: golden LRC size */ 20 + size_t golden_lrc_size; 21 + /** @regset_size: size of register set passed to GuC for save/restore */ 22 + u32 regset_size; 23 + }; 24 + 25 + #endif

+1196

drivers/gpu/drm/xe/xe_guc_ct.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include <linux/bitfield.h> 7 + #include <linux/circ_buf.h> 8 + #include <linux/delay.h> 9 + 10 + #include <drm/drm_managed.h> 11 + 12 + #include "xe_bo.h" 13 + #include "xe_device.h" 14 + #include "xe_gt.h" 15 + #include "xe_guc.h" 16 + #include "xe_guc_ct.h" 17 + #include "xe_gt_pagefault.h" 18 + #include "xe_guc_submit.h" 19 + #include "xe_map.h" 20 + #include "xe_trace.h" 21 + 22 + /* Used when a CT send wants to block and / or receive data */ 23 + struct g2h_fence { 24 + wait_queue_head_t wq; 25 + u32 *response_buffer; 26 + u32 seqno; 27 + u16 response_len; 28 + u16 error; 29 + u16 hint; 30 + u16 reason; 31 + bool retry; 32 + bool fail; 33 + bool done; 34 + }; 35 + 36 + static void g2h_fence_init(struct g2h_fence *g2h_fence, u32 *response_buffer) 37 + { 38 + g2h_fence->response_buffer = response_buffer; 39 + g2h_fence->response_len = 0; 40 + g2h_fence->fail = false; 41 + g2h_fence->retry = false; 42 + g2h_fence->done = false; 43 + g2h_fence->seqno = ~0x0; 44 + } 45 + 46 + static bool g2h_fence_needs_alloc(struct g2h_fence *g2h_fence) 47 + { 48 + return g2h_fence->seqno == ~0x0; 49 + } 50 + 51 + static struct xe_guc * 52 + ct_to_guc(struct xe_guc_ct *ct) 53 + { 54 + return container_of(ct, struct xe_guc, ct); 55 + } 56 + 57 + static struct xe_gt * 58 + ct_to_gt(struct xe_guc_ct *ct) 59 + { 60 + return container_of(ct, struct xe_gt, uc.guc.ct); 61 + } 62 + 63 + static struct xe_device * 64 + ct_to_xe(struct xe_guc_ct *ct) 65 + { 66 + return gt_to_xe(ct_to_gt(ct)); 67 + } 68 + 69 + /** 70 + * DOC: GuC CTB Blob 71 + * 72 + * We allocate single blob to hold both CTB descriptors and buffers: 73 + * 74 + * +--------+-----------------------------------------------+------+ 75 + * | offset | contents | size | 76 + * +========+===============================================+======+ 77 + * | 0x0000 | H2G CTB Descriptor (send) | | 78 + * +--------+-----------------------------------------------+ 4K | 79 + * | 0x0800 | G2H CTB Descriptor (g2h) | | 80 + * +--------+-----------------------------------------------+------+ 81 + * | 0x1000 | H2G CT Buffer (send) | n*4K | 82 + * | | | | 83 + * +--------+-----------------------------------------------+------+ 84 + * | 0x1000 | G2H CT Buffer (g2h) | m*4K | 85 + * | + n*4K | | | 86 + * +--------+-----------------------------------------------+------+ 87 + * 88 + * Size of each ``CT Buffer`` must be multiple of 4K. 89 + * We don't expect too many messages in flight at any time, unless we are 90 + * using the GuC submission. In that case each request requires a minimum 91 + * 2 dwords which gives us a maximum 256 queue'd requests. Hopefully this 92 + * enough space to avoid backpressure on the driver. We increase the size 93 + * of the receive buffer (relative to the send) to ensure a G2H response 94 + * CTB has a landing spot. 95 + */ 96 + 97 + #define CTB_DESC_SIZE ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K) 98 + #define CTB_H2G_BUFFER_SIZE (SZ_4K) 99 + #define CTB_G2H_BUFFER_SIZE (4 * CTB_H2G_BUFFER_SIZE) 100 + #define G2H_ROOM_BUFFER_SIZE (CTB_G2H_BUFFER_SIZE / 4) 101 + 102 + static size_t guc_ct_size(void) 103 + { 104 + return 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE + 105 + CTB_G2H_BUFFER_SIZE; 106 + } 107 + 108 + static void guc_ct_fini(struct drm_device *drm, void *arg) 109 + { 110 + struct xe_guc_ct *ct = arg; 111 + 112 + xa_destroy(&ct->fence_lookup); 113 + xe_bo_unpin_map_no_vm(ct->bo); 114 + } 115 + 116 + static void g2h_worker_func(struct work_struct *w); 117 + 118 + static void primelockdep(struct xe_guc_ct *ct) 119 + { 120 + if (!IS_ENABLED(CONFIG_LOCKDEP)) 121 + return; 122 + 123 + fs_reclaim_acquire(GFP_KERNEL); 124 + might_lock(&ct->lock); 125 + fs_reclaim_release(GFP_KERNEL); 126 + } 127 + 128 + int xe_guc_ct_init(struct xe_guc_ct *ct) 129 + { 130 + struct xe_device *xe = ct_to_xe(ct); 131 + struct xe_gt *gt = ct_to_gt(ct); 132 + struct xe_bo *bo; 133 + int err; 134 + 135 + XE_BUG_ON(guc_ct_size() % PAGE_SIZE); 136 + 137 + mutex_init(&ct->lock); 138 + spin_lock_init(&ct->fast_lock); 139 + xa_init(&ct->fence_lookup); 140 + ct->fence_context = dma_fence_context_alloc(1); 141 + INIT_WORK(&ct->g2h_worker, g2h_worker_func); 142 + init_waitqueue_head(&ct->wq); 143 + 144 + primelockdep(ct); 145 + 146 + bo = xe_bo_create_pin_map(xe, gt, NULL, guc_ct_size(), 147 + ttm_bo_type_kernel, 148 + XE_BO_CREATE_VRAM_IF_DGFX(gt) | 149 + XE_BO_CREATE_GGTT_BIT); 150 + if (IS_ERR(bo)) 151 + return PTR_ERR(bo); 152 + 153 + ct->bo = bo; 154 + 155 + err = drmm_add_action_or_reset(&xe->drm, guc_ct_fini, ct); 156 + if (err) 157 + return err; 158 + 159 + return 0; 160 + } 161 + 162 + #define desc_read(xe_, guc_ctb__, field_) \ 163 + xe_map_rd_field(xe_, &guc_ctb__->desc, 0, \ 164 + struct guc_ct_buffer_desc, field_) 165 + 166 + #define desc_write(xe_, guc_ctb__, field_, val_) \ 167 + xe_map_wr_field(xe_, &guc_ctb__->desc, 0, \ 168 + struct guc_ct_buffer_desc, field_, val_) 169 + 170 + static void guc_ct_ctb_h2g_init(struct xe_device *xe, struct guc_ctb *h2g, 171 + struct iosys_map *map) 172 + { 173 + h2g->size = CTB_H2G_BUFFER_SIZE / sizeof(u32); 174 + h2g->resv_space = 0; 175 + h2g->tail = 0; 176 + h2g->head = 0; 177 + h2g->space = CIRC_SPACE(h2g->tail, h2g->head, h2g->size) - 178 + h2g->resv_space; 179 + h2g->broken = false; 180 + 181 + h2g->desc = *map; 182 + xe_map_memset(xe, &h2g->desc, 0, 0, sizeof(struct guc_ct_buffer_desc)); 183 + 184 + h2g->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE * 2); 185 + } 186 + 187 + static void guc_ct_ctb_g2h_init(struct xe_device *xe, struct guc_ctb *g2h, 188 + struct iosys_map *map) 189 + { 190 + g2h->size = CTB_G2H_BUFFER_SIZE / sizeof(u32); 191 + g2h->resv_space = G2H_ROOM_BUFFER_SIZE / sizeof(u32); 192 + g2h->head = 0; 193 + g2h->tail = 0; 194 + g2h->space = CIRC_SPACE(g2h->tail, g2h->head, g2h->size) - 195 + g2h->resv_space; 196 + g2h->broken = false; 197 + 198 + g2h->desc = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE); 199 + xe_map_memset(xe, &g2h->desc, 0, 0, sizeof(struct guc_ct_buffer_desc)); 200 + 201 + g2h->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE * 2 + 202 + CTB_H2G_BUFFER_SIZE); 203 + } 204 + 205 + static int guc_ct_ctb_h2g_register(struct xe_guc_ct *ct) 206 + { 207 + struct xe_guc *guc = ct_to_guc(ct); 208 + u32 desc_addr, ctb_addr, size; 209 + int err; 210 + 211 + desc_addr = xe_bo_ggtt_addr(ct->bo); 212 + ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2; 213 + size = ct->ctbs.h2g.size * sizeof(u32); 214 + 215 + err = xe_guc_self_cfg64(guc, 216 + GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_KEY, 217 + desc_addr); 218 + if (err) 219 + return err; 220 + 221 + err = xe_guc_self_cfg64(guc, 222 + GUC_KLV_SELF_CFG_H2G_CTB_ADDR_KEY, 223 + ctb_addr); 224 + if (err) 225 + return err; 226 + 227 + return xe_guc_self_cfg32(guc, 228 + GUC_KLV_SELF_CFG_H2G_CTB_SIZE_KEY, 229 + size); 230 + } 231 + 232 + static int guc_ct_ctb_g2h_register(struct xe_guc_ct *ct) 233 + { 234 + struct xe_guc *guc = ct_to_guc(ct); 235 + u32 desc_addr, ctb_addr, size; 236 + int err; 237 + 238 + desc_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE; 239 + ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2 + 240 + CTB_H2G_BUFFER_SIZE; 241 + size = ct->ctbs.g2h.size * sizeof(u32); 242 + 243 + err = xe_guc_self_cfg64(guc, 244 + GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_KEY, 245 + desc_addr); 246 + if (err) 247 + return err; 248 + 249 + err = xe_guc_self_cfg64(guc, 250 + GUC_KLV_SELF_CFG_G2H_CTB_ADDR_KEY, 251 + ctb_addr); 252 + if (err) 253 + return err; 254 + 255 + return xe_guc_self_cfg32(guc, 256 + GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY, 257 + size); 258 + } 259 + 260 + static int guc_ct_control_toggle(struct xe_guc_ct *ct, bool enable) 261 + { 262 + u32 request[HOST2GUC_CONTROL_CTB_REQUEST_MSG_LEN] = { 263 + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | 264 + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | 265 + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, 266 + GUC_ACTION_HOST2GUC_CONTROL_CTB), 267 + FIELD_PREP(HOST2GUC_CONTROL_CTB_REQUEST_MSG_1_CONTROL, 268 + enable ? GUC_CTB_CONTROL_ENABLE : 269 + GUC_CTB_CONTROL_DISABLE), 270 + }; 271 + int ret = xe_guc_send_mmio(ct_to_guc(ct), request, ARRAY_SIZE(request)); 272 + 273 + return ret > 0 ? -EPROTO : ret; 274 + } 275 + 276 + int xe_guc_ct_enable(struct xe_guc_ct *ct) 277 + { 278 + struct xe_device *xe = ct_to_xe(ct); 279 + int err; 280 + 281 + XE_BUG_ON(ct->enabled); 282 + 283 + guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap); 284 + guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap); 285 + 286 + err = guc_ct_ctb_h2g_register(ct); 287 + if (err) 288 + goto err_out; 289 + 290 + err = guc_ct_ctb_g2h_register(ct); 291 + if (err) 292 + goto err_out; 293 + 294 + err = guc_ct_control_toggle(ct, true); 295 + if (err) 296 + goto err_out; 297 + 298 + mutex_lock(&ct->lock); 299 + ct->g2h_outstanding = 0; 300 + ct->enabled = true; 301 + mutex_unlock(&ct->lock); 302 + 303 + smp_mb(); 304 + wake_up_all(&ct->wq); 305 + drm_dbg(&xe->drm, "GuC CT communication channel enabled\n"); 306 + 307 + return 0; 308 + 309 + err_out: 310 + drm_err(&xe->drm, "Failed to enabled CT (%d)\n", err); 311 + 312 + return err; 313 + } 314 + 315 + void xe_guc_ct_disable(struct xe_guc_ct *ct) 316 + { 317 + mutex_lock(&ct->lock); 318 + ct->enabled = false; 319 + mutex_unlock(&ct->lock); 320 + 321 + xa_destroy(&ct->fence_lookup); 322 + } 323 + 324 + static bool h2g_has_room(struct xe_guc_ct *ct, u32 cmd_len) 325 + { 326 + struct guc_ctb *h2g = &ct->ctbs.h2g; 327 + 328 + lockdep_assert_held(&ct->lock); 329 + 330 + if (cmd_len > h2g->space) { 331 + h2g->head = desc_read(ct_to_xe(ct), h2g, head); 332 + h2g->space = CIRC_SPACE(h2g->tail, h2g->head, h2g->size) - 333 + h2g->resv_space; 334 + if (cmd_len > h2g->space) 335 + return false; 336 + } 337 + 338 + return true; 339 + } 340 + 341 + static bool g2h_has_room(struct xe_guc_ct *ct, u32 g2h_len) 342 + { 343 + lockdep_assert_held(&ct->lock); 344 + 345 + return ct->ctbs.g2h.space > g2h_len; 346 + } 347 + 348 + static int has_room(struct xe_guc_ct *ct, u32 cmd_len, u32 g2h_len) 349 + { 350 + lockdep_assert_held(&ct->lock); 351 + 352 + if (!g2h_has_room(ct, g2h_len) || !h2g_has_room(ct, cmd_len)) 353 + return -EBUSY; 354 + 355 + return 0; 356 + } 357 + 358 + static void h2g_reserve_space(struct xe_guc_ct *ct, u32 cmd_len) 359 + { 360 + lockdep_assert_held(&ct->lock); 361 + ct->ctbs.h2g.space -= cmd_len; 362 + } 363 + 364 + static void g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h) 365 + { 366 + XE_BUG_ON(g2h_len > ct->ctbs.g2h.space); 367 + 368 + if (g2h_len) { 369 + spin_lock_irq(&ct->fast_lock); 370 + ct->ctbs.g2h.space -= g2h_len; 371 + ct->g2h_outstanding += num_g2h; 372 + spin_unlock_irq(&ct->fast_lock); 373 + } 374 + } 375 + 376 + static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len) 377 + { 378 + lockdep_assert_held(&ct->fast_lock); 379 + XE_WARN_ON(ct->ctbs.g2h.space + g2h_len > 380 + ct->ctbs.g2h.size - ct->ctbs.g2h.resv_space); 381 + 382 + ct->ctbs.g2h.space += g2h_len; 383 + --ct->g2h_outstanding; 384 + } 385 + 386 + static void g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len) 387 + { 388 + spin_lock_irq(&ct->fast_lock); 389 + __g2h_release_space(ct, g2h_len); 390 + spin_unlock_irq(&ct->fast_lock); 391 + } 392 + 393 + static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, 394 + u32 ct_fence_value, bool want_response) 395 + { 396 + struct xe_device *xe = ct_to_xe(ct); 397 + struct guc_ctb *h2g = &ct->ctbs.h2g; 398 + u32 cmd[GUC_CTB_MSG_MAX_LEN / sizeof(u32)]; 399 + u32 cmd_len = len + GUC_CTB_HDR_LEN; 400 + u32 cmd_idx = 0, i; 401 + u32 tail = h2g->tail; 402 + struct iosys_map map = IOSYS_MAP_INIT_OFFSET(&h2g->cmds, 403 + tail * sizeof(u32)); 404 + 405 + lockdep_assert_held(&ct->lock); 406 + XE_BUG_ON(len * sizeof(u32) > GUC_CTB_MSG_MAX_LEN); 407 + XE_BUG_ON(tail > h2g->size); 408 + 409 + /* Command will wrap, zero fill (NOPs), return and check credits again */ 410 + if (tail + cmd_len > h2g->size) { 411 + xe_map_memset(xe, &map, 0, 0, (h2g->size - tail) * sizeof(u32)); 412 + h2g_reserve_space(ct, (h2g->size - tail)); 413 + h2g->tail = 0; 414 + desc_write(xe, h2g, tail, h2g->tail); 415 + 416 + return -EAGAIN; 417 + } 418 + 419 + /* 420 + * dw0: CT header (including fence) 421 + * dw1: HXG header (including action code) 422 + * dw2+: action data 423 + */ 424 + cmd[cmd_idx++] = FIELD_PREP(GUC_CTB_MSG_0_FORMAT, GUC_CTB_FORMAT_HXG) | 425 + FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) | 426 + FIELD_PREP(GUC_CTB_MSG_0_FENCE, ct_fence_value); 427 + if (want_response) { 428 + cmd[cmd_idx++] = 429 + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | 430 + FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | 431 + GUC_HXG_EVENT_MSG_0_DATA0, action[0]); 432 + } else { 433 + cmd[cmd_idx++] = 434 + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) | 435 + FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | 436 + GUC_HXG_EVENT_MSG_0_DATA0, action[0]); 437 + } 438 + for (i = 1; i < len; ++i) 439 + cmd[cmd_idx++] = action[i]; 440 + 441 + /* Write H2G ensuring visable before descriptor update */ 442 + xe_map_memcpy_to(xe, &map, 0, cmd, cmd_len * sizeof(u32)); 443 + xe_device_wmb(ct_to_xe(ct)); 444 + 445 + /* Update local copies */ 446 + h2g->tail = (tail + cmd_len) % h2g->size; 447 + h2g_reserve_space(ct, cmd_len); 448 + 449 + /* Update descriptor */ 450 + desc_write(xe, h2g, tail, h2g->tail); 451 + 452 + return 0; 453 + } 454 + 455 + static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, 456 + u32 len, u32 g2h_len, u32 num_g2h, 457 + struct g2h_fence *g2h_fence) 458 + { 459 + int ret; 460 + 461 + XE_BUG_ON(g2h_len && g2h_fence); 462 + XE_BUG_ON(num_g2h && g2h_fence); 463 + XE_BUG_ON(g2h_len && !num_g2h); 464 + XE_BUG_ON(!g2h_len && num_g2h); 465 + lockdep_assert_held(&ct->lock); 466 + 467 + if (unlikely(ct->ctbs.h2g.broken)) { 468 + ret = -EPIPE; 469 + goto out; 470 + } 471 + 472 + if (unlikely(!ct->enabled)) { 473 + ret = -ENODEV; 474 + goto out; 475 + } 476 + 477 + if (g2h_fence) { 478 + g2h_len = GUC_CTB_HXG_MSG_MAX_LEN; 479 + num_g2h = 1; 480 + 481 + if (g2h_fence_needs_alloc(g2h_fence)) { 482 + void *ptr; 483 + 484 + g2h_fence->seqno = (ct->fence_seqno++ & 0xffff); 485 + init_waitqueue_head(&g2h_fence->wq); 486 + ptr = xa_store(&ct->fence_lookup, 487 + g2h_fence->seqno, 488 + g2h_fence, GFP_ATOMIC); 489 + if (IS_ERR(ptr)) { 490 + ret = PTR_ERR(ptr); 491 + goto out; 492 + } 493 + } 494 + } 495 + 496 + xe_device_mem_access_get(ct_to_xe(ct)); 497 + retry: 498 + ret = has_room(ct, len + GUC_CTB_HDR_LEN, g2h_len); 499 + if (unlikely(ret)) 500 + goto put_wa; 501 + 502 + ret = h2g_write(ct, action, len, g2h_fence ? g2h_fence->seqno : 0, 503 + !!g2h_fence); 504 + if (unlikely(ret)) { 505 + if (ret == -EAGAIN) 506 + goto retry; 507 + goto put_wa; 508 + } 509 + 510 + g2h_reserve_space(ct, g2h_len, num_g2h); 511 + xe_guc_notify(ct_to_guc(ct)); 512 + put_wa: 513 + xe_device_mem_access_put(ct_to_xe(ct)); 514 + out: 515 + 516 + return ret; 517 + } 518 + 519 + static void kick_reset(struct xe_guc_ct *ct) 520 + { 521 + xe_gt_reset_async(ct_to_gt(ct)); 522 + } 523 + 524 + static int dequeue_one_g2h(struct xe_guc_ct *ct); 525 + 526 + static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len, 527 + u32 g2h_len, u32 num_g2h, 528 + struct g2h_fence *g2h_fence) 529 + { 530 + struct drm_device *drm = &ct_to_xe(ct)->drm; 531 + struct drm_printer p = drm_info_printer(drm->dev); 532 + unsigned int sleep_period_ms = 1; 533 + int ret; 534 + 535 + XE_BUG_ON(g2h_len && g2h_fence); 536 + lockdep_assert_held(&ct->lock); 537 + 538 + try_again: 539 + ret = __guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, 540 + g2h_fence); 541 + 542 + /* 543 + * We wait to try to restore credits for about 1 second before bailing. 544 + * In the case of H2G credits we have no choice but just to wait for the 545 + * GuC to consume H2Gs in the channel so we use a wait / sleep loop. In 546 + * the case of G2H we process any G2H in the channel, hopefully freeing 547 + * credits as we consume the G2H messages. 548 + */ 549 + if (unlikely(ret == -EBUSY && 550 + !h2g_has_room(ct, len + GUC_CTB_HDR_LEN))) { 551 + struct guc_ctb *h2g = &ct->ctbs.h2g; 552 + 553 + if (sleep_period_ms == 1024) 554 + goto broken; 555 + 556 + trace_xe_guc_ct_h2g_flow_control(h2g->head, h2g->tail, 557 + h2g->size, h2g->space, 558 + len + GUC_CTB_HDR_LEN); 559 + msleep(sleep_period_ms); 560 + sleep_period_ms <<= 1; 561 + 562 + goto try_again; 563 + } else if (unlikely(ret == -EBUSY)) { 564 + struct xe_device *xe = ct_to_xe(ct); 565 + struct guc_ctb *g2h = &ct->ctbs.g2h; 566 + 567 + trace_xe_guc_ct_g2h_flow_control(g2h->head, 568 + desc_read(xe, g2h, tail), 569 + g2h->size, g2h->space, 570 + g2h_fence ? 571 + GUC_CTB_HXG_MSG_MAX_LEN : 572 + g2h_len); 573 + 574 + #define g2h_avail(ct) \ 575 + (desc_read(ct_to_xe(ct), (&ct->ctbs.g2h), tail) != ct->ctbs.g2h.head) 576 + if (!wait_event_timeout(ct->wq, !ct->g2h_outstanding || 577 + g2h_avail(ct), HZ)) 578 + goto broken; 579 + #undef g2h_avail 580 + 581 + if (dequeue_one_g2h(ct) < 0) 582 + goto broken; 583 + 584 + goto try_again; 585 + } 586 + 587 + return ret; 588 + 589 + broken: 590 + drm_err(drm, "No forward process on H2G, reset required"); 591 + xe_guc_ct_print(ct, &p); 592 + ct->ctbs.h2g.broken = true; 593 + 594 + return -EDEADLK; 595 + } 596 + 597 + static int guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len, 598 + u32 g2h_len, u32 num_g2h, struct g2h_fence *g2h_fence) 599 + { 600 + int ret; 601 + 602 + XE_BUG_ON(g2h_len && g2h_fence); 603 + 604 + mutex_lock(&ct->lock); 605 + ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, g2h_fence); 606 + mutex_unlock(&ct->lock); 607 + 608 + return ret; 609 + } 610 + 611 + int xe_guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len, 612 + u32 g2h_len, u32 num_g2h) 613 + { 614 + int ret; 615 + 616 + ret = guc_ct_send(ct, action, len, g2h_len, num_g2h, NULL); 617 + if (ret == -EDEADLK) 618 + kick_reset(ct); 619 + 620 + return ret; 621 + } 622 + 623 + int xe_guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len, 624 + u32 g2h_len, u32 num_g2h) 625 + { 626 + int ret; 627 + 628 + ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, NULL); 629 + if (ret == -EDEADLK) 630 + kick_reset(ct); 631 + 632 + return ret; 633 + } 634 + 635 + int xe_guc_ct_send_g2h_handler(struct xe_guc_ct *ct, const u32 *action, u32 len) 636 + { 637 + int ret; 638 + 639 + lockdep_assert_held(&ct->lock); 640 + 641 + ret = guc_ct_send_locked(ct, action, len, 0, 0, NULL); 642 + if (ret == -EDEADLK) 643 + kick_reset(ct); 644 + 645 + return ret; 646 + } 647 + 648 + /* 649 + * Check if a GT reset is in progress or will occur and if GT reset brought the 650 + * CT back up. Randomly picking 5 seconds for an upper limit to do a GT a reset. 651 + */ 652 + static bool retry_failure(struct xe_guc_ct *ct, int ret) 653 + { 654 + if (!(ret == -EDEADLK || ret == -EPIPE || ret == -ENODEV)) 655 + return false; 656 + 657 + #define ct_alive(ct) \ 658 + (ct->enabled && !ct->ctbs.h2g.broken && !ct->ctbs.g2h.broken) 659 + if (!wait_event_interruptible_timeout(ct->wq, ct_alive(ct), HZ * 5)) 660 + return false; 661 + #undef ct_alive 662 + 663 + return true; 664 + } 665 + 666 + static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len, 667 + u32 *response_buffer, bool no_fail) 668 + { 669 + struct xe_device *xe = ct_to_xe(ct); 670 + struct g2h_fence g2h_fence; 671 + int ret = 0; 672 + 673 + /* 674 + * We use a fence to implement blocking sends / receiving response data. 675 + * The seqno of the fence is sent in the H2G, returned in the G2H, and 676 + * an xarray is used as storage media with the seqno being to key. 677 + * Fields in the fence hold success, failure, retry status and the 678 + * response data. Safe to allocate on the stack as the xarray is the 679 + * only reference and it cannot be present after this function exits. 680 + */ 681 + retry: 682 + g2h_fence_init(&g2h_fence, response_buffer); 683 + retry_same_fence: 684 + ret = guc_ct_send(ct, action, len, 0, 0, &g2h_fence); 685 + if (unlikely(ret == -ENOMEM)) { 686 + void *ptr; 687 + 688 + /* Retry allocation /w GFP_KERNEL */ 689 + ptr = xa_store(&ct->fence_lookup, 690 + g2h_fence.seqno, 691 + &g2h_fence, GFP_KERNEL); 692 + if (IS_ERR(ptr)) { 693 + return PTR_ERR(ptr); 694 + } 695 + 696 + goto retry_same_fence; 697 + } else if (unlikely(ret)) { 698 + if (ret == -EDEADLK) 699 + kick_reset(ct); 700 + 701 + if (no_fail && retry_failure(ct, ret)) 702 + goto retry_same_fence; 703 + 704 + if (!g2h_fence_needs_alloc(&g2h_fence)) 705 + xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno); 706 + 707 + return ret; 708 + } 709 + 710 + ret = wait_event_timeout(g2h_fence.wq, g2h_fence.done, HZ); 711 + if (!ret) { 712 + drm_err(&xe->drm, "Timed out wait for G2H, fence %u, action %04x", 713 + g2h_fence.seqno, action[0]); 714 + xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno); 715 + return -ETIME; 716 + } 717 + 718 + if (g2h_fence.retry) { 719 + drm_warn(&xe->drm, "Send retry, action 0x%04x, reason %d", 720 + action[0], g2h_fence.reason); 721 + goto retry; 722 + } 723 + if (g2h_fence.fail) { 724 + drm_err(&xe->drm, "Send failed, action 0x%04x, error %d, hint %d", 725 + action[0], g2h_fence.error, g2h_fence.hint); 726 + ret = -EIO; 727 + } 728 + 729 + return ret > 0 ? 0 : ret; 730 + } 731 + 732 + int xe_guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len, 733 + u32 *response_buffer) 734 + { 735 + return guc_ct_send_recv(ct, action, len, response_buffer, false); 736 + } 737 + 738 + int xe_guc_ct_send_recv_no_fail(struct xe_guc_ct *ct, const u32 *action, 739 + u32 len, u32 *response_buffer) 740 + { 741 + return guc_ct_send_recv(ct, action, len, response_buffer, true); 742 + } 743 + 744 + static int parse_g2h_event(struct xe_guc_ct *ct, u32 *msg, u32 len) 745 + { 746 + u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]); 747 + 748 + lockdep_assert_held(&ct->lock); 749 + 750 + switch (action) { 751 + case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE: 752 + case XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE: 753 + case XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE: 754 + case XE_GUC_ACTION_TLB_INVALIDATION_DONE: 755 + g2h_release_space(ct, len); 756 + } 757 + 758 + return 0; 759 + } 760 + 761 + static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len) 762 + { 763 + struct xe_device *xe = ct_to_xe(ct); 764 + u32 response_len = len - GUC_CTB_MSG_MIN_LEN; 765 + u32 fence = FIELD_GET(GUC_CTB_MSG_0_FENCE, msg[0]); 766 + u32 type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]); 767 + struct g2h_fence *g2h_fence; 768 + 769 + lockdep_assert_held(&ct->lock); 770 + 771 + g2h_fence = xa_erase(&ct->fence_lookup, fence); 772 + if (unlikely(!g2h_fence)) { 773 + /* Don't tear down channel, as send could've timed out */ 774 + drm_warn(&xe->drm, "G2H fence (%u) not found!\n", fence); 775 + g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN); 776 + return 0; 777 + } 778 + 779 + XE_WARN_ON(fence != g2h_fence->seqno); 780 + 781 + if (type == GUC_HXG_TYPE_RESPONSE_FAILURE) { 782 + g2h_fence->fail = true; 783 + g2h_fence->error = 784 + FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, msg[0]); 785 + g2h_fence->hint = 786 + FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, msg[0]); 787 + } else if (type == GUC_HXG_TYPE_NO_RESPONSE_RETRY) { 788 + g2h_fence->retry = true; 789 + g2h_fence->reason = 790 + FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, msg[0]); 791 + } else if (g2h_fence->response_buffer) { 792 + g2h_fence->response_len = response_len; 793 + memcpy(g2h_fence->response_buffer, msg + GUC_CTB_MSG_MIN_LEN, 794 + response_len * sizeof(u32)); 795 + } 796 + 797 + g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN); 798 + 799 + g2h_fence->done = true; 800 + smp_mb(); 801 + 802 + wake_up(&g2h_fence->wq); 803 + 804 + return 0; 805 + } 806 + 807 + static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) 808 + { 809 + struct xe_device *xe = ct_to_xe(ct); 810 + u32 header, hxg, origin, type; 811 + int ret; 812 + 813 + lockdep_assert_held(&ct->lock); 814 + 815 + header = msg[0]; 816 + hxg = msg[1]; 817 + 818 + origin = FIELD_GET(GUC_HXG_MSG_0_ORIGIN, hxg); 819 + if (unlikely(origin != GUC_HXG_ORIGIN_GUC)) { 820 + drm_err(&xe->drm, 821 + "G2H channel broken on read, origin=%d, reset required\n", 822 + origin); 823 + ct->ctbs.g2h.broken = true; 824 + 825 + return -EPROTO; 826 + } 827 + 828 + type = FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg); 829 + switch (type) { 830 + case GUC_HXG_TYPE_EVENT: 831 + ret = parse_g2h_event(ct, msg, len); 832 + break; 833 + case GUC_HXG_TYPE_RESPONSE_SUCCESS: 834 + case GUC_HXG_TYPE_RESPONSE_FAILURE: 835 + case GUC_HXG_TYPE_NO_RESPONSE_RETRY: 836 + ret = parse_g2h_response(ct, msg, len); 837 + break; 838 + default: 839 + drm_err(&xe->drm, 840 + "G2H channel broken on read, type=%d, reset required\n", 841 + type); 842 + ct->ctbs.g2h.broken = true; 843 + 844 + ret = -EOPNOTSUPP; 845 + } 846 + 847 + return ret; 848 + } 849 + 850 + static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) 851 + { 852 + struct xe_device *xe = ct_to_xe(ct); 853 + struct xe_guc *guc = ct_to_guc(ct); 854 + u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]); 855 + u32 *payload = msg + GUC_CTB_HXG_MSG_MIN_LEN; 856 + u32 adj_len = len - GUC_CTB_HXG_MSG_MIN_LEN; 857 + int ret = 0; 858 + 859 + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]) != GUC_HXG_TYPE_EVENT) 860 + return 0; 861 + 862 + switch (action) { 863 + case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE: 864 + ret = xe_guc_sched_done_handler(guc, payload, adj_len); 865 + break; 866 + case XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE: 867 + ret = xe_guc_deregister_done_handler(guc, payload, adj_len); 868 + break; 869 + case XE_GUC_ACTION_CONTEXT_RESET_NOTIFICATION: 870 + ret = xe_guc_engine_reset_handler(guc, payload, adj_len); 871 + break; 872 + case XE_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION: 873 + ret = xe_guc_engine_reset_failure_handler(guc, payload, 874 + adj_len); 875 + break; 876 + case XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE: 877 + /* Selftest only at the moment */ 878 + break; 879 + case XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION: 880 + case XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE: 881 + /* FIXME: Handle this */ 882 + break; 883 + case XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR: 884 + ret = xe_guc_engine_memory_cat_error_handler(guc, payload, 885 + adj_len); 886 + break; 887 + case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC: 888 + ret = xe_guc_pagefault_handler(guc, payload, adj_len); 889 + break; 890 + case XE_GUC_ACTION_TLB_INVALIDATION_DONE: 891 + ret = xe_guc_tlb_invalidation_done_handler(guc, payload, 892 + adj_len); 893 + break; 894 + case XE_GUC_ACTION_ACCESS_COUNTER_NOTIFY: 895 + ret = xe_guc_access_counter_notify_handler(guc, payload, 896 + adj_len); 897 + break; 898 + default: 899 + drm_err(&xe->drm, "unexpected action 0x%04x\n", action); 900 + } 901 + 902 + if (ret) 903 + drm_err(&xe->drm, "action 0x%04x failed processing, ret=%d\n", 904 + action, ret); 905 + 906 + return 0; 907 + } 908 + 909 + static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) 910 + { 911 + struct xe_device *xe = ct_to_xe(ct); 912 + struct guc_ctb *g2h = &ct->ctbs.g2h; 913 + u32 tail, head, len; 914 + s32 avail; 915 + 916 + lockdep_assert_held(&ct->fast_lock); 917 + 918 + if (!ct->enabled) 919 + return -ENODEV; 920 + 921 + if (g2h->broken) 922 + return -EPIPE; 923 + 924 + /* Calculate DW available to read */ 925 + tail = desc_read(xe, g2h, tail); 926 + avail = tail - g2h->head; 927 + if (unlikely(avail == 0)) 928 + return 0; 929 + 930 + if (avail < 0) 931 + avail += g2h->size; 932 + 933 + /* Read header */ 934 + xe_map_memcpy_from(xe, msg, &g2h->cmds, sizeof(u32) * g2h->head, sizeof(u32)); 935 + len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, msg[0]) + GUC_CTB_MSG_MIN_LEN; 936 + if (len > avail) { 937 + drm_err(&xe->drm, 938 + "G2H channel broken on read, avail=%d, len=%d, reset required\n", 939 + avail, len); 940 + g2h->broken = true; 941 + 942 + return -EPROTO; 943 + } 944 + 945 + head = (g2h->head + 1) % g2h->size; 946 + avail = len - 1; 947 + 948 + /* Read G2H message */ 949 + if (avail + head > g2h->size) { 950 + u32 avail_til_wrap = g2h->size - head; 951 + 952 + xe_map_memcpy_from(xe, msg + 1, 953 + &g2h->cmds, sizeof(u32) * head, 954 + avail_til_wrap * sizeof(u32)); 955 + xe_map_memcpy_from(xe, msg + 1 + avail_til_wrap, 956 + &g2h->cmds, 0, 957 + (avail - avail_til_wrap) * sizeof(u32)); 958 + } else { 959 + xe_map_memcpy_from(xe, msg + 1, 960 + &g2h->cmds, sizeof(u32) * head, 961 + avail * sizeof(u32)); 962 + } 963 + 964 + if (fast_path) { 965 + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]) != GUC_HXG_TYPE_EVENT) 966 + return 0; 967 + 968 + switch (FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1])) { 969 + case XE_GUC_ACTION_TLB_INVALIDATION_DONE: 970 + case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC: 971 + break; /* Process these in fast-path */ 972 + default: 973 + return 0; 974 + } 975 + } 976 + 977 + /* Update local / descriptor header */ 978 + g2h->head = (head + avail) % g2h->size; 979 + desc_write(xe, g2h, head, g2h->head); 980 + 981 + return len; 982 + } 983 + 984 + static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len) 985 + { 986 + struct xe_device *xe = ct_to_xe(ct); 987 + struct xe_guc *guc = ct_to_guc(ct); 988 + u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]); 989 + u32 *payload = msg + GUC_CTB_HXG_MSG_MIN_LEN; 990 + u32 adj_len = len - GUC_CTB_HXG_MSG_MIN_LEN; 991 + int ret = 0; 992 + 993 + switch (action) { 994 + case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC: 995 + ret = xe_guc_pagefault_handler(guc, payload, adj_len); 996 + break; 997 + case XE_GUC_ACTION_TLB_INVALIDATION_DONE: 998 + __g2h_release_space(ct, len); 999 + ret = xe_guc_tlb_invalidation_done_handler(guc, payload, 1000 + adj_len); 1001 + break; 1002 + default: 1003 + XE_WARN_ON("NOT_POSSIBLE"); 1004 + } 1005 + 1006 + if (ret) 1007 + drm_err(&xe->drm, "action 0x%04x failed processing, ret=%d\n", 1008 + action, ret); 1009 + } 1010 + 1011 + /** 1012 + * xe_guc_ct_fast_path - process critical G2H in the IRQ handler 1013 + * @ct: GuC CT object 1014 + * 1015 + * Anything related to page faults is critical for performance, process these 1016 + * critical G2H in the IRQ. This is safe as these handlers either just wake up 1017 + * waiters or queue another worker. 1018 + */ 1019 + void xe_guc_ct_fast_path(struct xe_guc_ct *ct) 1020 + { 1021 + struct xe_device *xe = ct_to_xe(ct); 1022 + int len; 1023 + 1024 + if (!xe_device_in_fault_mode(xe) || !xe_device_mem_access_ongoing(xe)) 1025 + return; 1026 + 1027 + spin_lock(&ct->fast_lock); 1028 + do { 1029 + len = g2h_read(ct, ct->fast_msg, true); 1030 + if (len > 0) 1031 + g2h_fast_path(ct, ct->fast_msg, len); 1032 + } while (len > 0); 1033 + spin_unlock(&ct->fast_lock); 1034 + } 1035 + 1036 + /* Returns less than zero on error, 0 on done, 1 on more available */ 1037 + static int dequeue_one_g2h(struct xe_guc_ct *ct) 1038 + { 1039 + int len; 1040 + int ret; 1041 + 1042 + lockdep_assert_held(&ct->lock); 1043 + 1044 + spin_lock_irq(&ct->fast_lock); 1045 + len = g2h_read(ct, ct->msg, false); 1046 + spin_unlock_irq(&ct->fast_lock); 1047 + if (len <= 0) 1048 + return len; 1049 + 1050 + ret = parse_g2h_msg(ct, ct->msg, len); 1051 + if (unlikely(ret < 0)) 1052 + return ret; 1053 + 1054 + ret = process_g2h_msg(ct, ct->msg, len); 1055 + if (unlikely(ret < 0)) 1056 + return ret; 1057 + 1058 + return 1; 1059 + } 1060 + 1061 + static void g2h_worker_func(struct work_struct *w) 1062 + { 1063 + struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, g2h_worker); 1064 + int ret; 1065 + 1066 + xe_device_mem_access_get(ct_to_xe(ct)); 1067 + do { 1068 + mutex_lock(&ct->lock); 1069 + ret = dequeue_one_g2h(ct); 1070 + mutex_unlock(&ct->lock); 1071 + 1072 + if (unlikely(ret == -EPROTO || ret == -EOPNOTSUPP)) { 1073 + struct drm_device *drm = &ct_to_xe(ct)->drm; 1074 + struct drm_printer p = drm_info_printer(drm->dev); 1075 + 1076 + xe_guc_ct_print(ct, &p); 1077 + kick_reset(ct); 1078 + } 1079 + } while (ret == 1); 1080 + xe_device_mem_access_put(ct_to_xe(ct)); 1081 + } 1082 + 1083 + static void guc_ct_ctb_print(struct xe_device *xe, struct guc_ctb *ctb, 1084 + struct drm_printer *p) 1085 + { 1086 + u32 head, tail; 1087 + 1088 + drm_printf(p, "\tsize: %d\n", ctb->size); 1089 + drm_printf(p, "\tresv_space: %d\n", ctb->resv_space); 1090 + drm_printf(p, "\thead: %d\n", ctb->head); 1091 + drm_printf(p, "\ttail: %d\n", ctb->tail); 1092 + drm_printf(p, "\tspace: %d\n", ctb->space); 1093 + drm_printf(p, "\tbroken: %d\n", ctb->broken); 1094 + 1095 + head = desc_read(xe, ctb, head); 1096 + tail = desc_read(xe, ctb, tail); 1097 + drm_printf(p, "\thead (memory): %d\n", head); 1098 + drm_printf(p, "\ttail (memory): %d\n", tail); 1099 + drm_printf(p, "\tstatus (memory): 0x%x\n", desc_read(xe, ctb, status)); 1100 + 1101 + if (head != tail) { 1102 + struct iosys_map map = 1103 + IOSYS_MAP_INIT_OFFSET(&ctb->cmds, head * sizeof(u32)); 1104 + 1105 + while (head != tail) { 1106 + drm_printf(p, "\tcmd[%d]: 0x%08x\n", head, 1107 + xe_map_rd(xe, &map, 0, u32)); 1108 + ++head; 1109 + if (head == ctb->size) { 1110 + head = 0; 1111 + map = ctb->cmds; 1112 + } else { 1113 + iosys_map_incr(&map, sizeof(u32)); 1114 + } 1115 + } 1116 + } 1117 + } 1118 + 1119 + void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p) 1120 + { 1121 + if (ct->enabled) { 1122 + drm_puts(p, "\nH2G CTB (all sizes in DW):\n"); 1123 + guc_ct_ctb_print(ct_to_xe(ct), &ct->ctbs.h2g, p); 1124 + 1125 + drm_puts(p, "\nG2H CTB (all sizes in DW):\n"); 1126 + guc_ct_ctb_print(ct_to_xe(ct), &ct->ctbs.g2h, p); 1127 + drm_printf(p, "\tg2h outstanding: %d\n", ct->g2h_outstanding); 1128 + } else { 1129 + drm_puts(p, "\nCT disabled\n"); 1130 + } 1131 + } 1132 + 1133 + #ifdef XE_GUC_CT_SELFTEST 1134 + /* 1135 + * Disable G2H processing in IRQ handler to force xe_guc_ct_send to enter flow 1136 + * control if enough sent, 8k sends is enough. Verify forward process, verify 1137 + * credits expected values on exit. 1138 + */ 1139 + void xe_guc_ct_selftest(struct xe_guc_ct *ct, struct drm_printer *p) 1140 + { 1141 + struct guc_ctb *g2h = &ct->ctbs.g2h; 1142 + u32 action[] = { XE_GUC_ACTION_SCHED_ENGINE_MODE_SET, 0, 0, 1, }; 1143 + u32 bad_action[] = { XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 0, 0, }; 1144 + int ret; 1145 + int i; 1146 + 1147 + ct->suppress_irq_handler = true; 1148 + drm_puts(p, "Starting GuC CT selftest\n"); 1149 + 1150 + for (i = 0; i < 8192; ++i) { 1151 + ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 4, 1); 1152 + if (ret) { 1153 + drm_printf(p, "Aborted pass %d, ret %d\n", i, ret); 1154 + xe_guc_ct_print(ct, p); 1155 + break; 1156 + } 1157 + } 1158 + 1159 + ct->suppress_irq_handler = false; 1160 + if (!ret) { 1161 + xe_guc_ct_irq_handler(ct); 1162 + msleep(200); 1163 + if (g2h->space != 1164 + CIRC_SPACE(0, 0, g2h->size) - g2h->resv_space) { 1165 + drm_printf(p, "Mismatch on space %d, %d\n", 1166 + g2h->space, 1167 + CIRC_SPACE(0, 0, g2h->size) - 1168 + g2h->resv_space); 1169 + ret = -EIO; 1170 + } 1171 + if (ct->g2h_outstanding) { 1172 + drm_printf(p, "Outstanding G2H, %d\n", 1173 + ct->g2h_outstanding); 1174 + ret = -EIO; 1175 + } 1176 + } 1177 + 1178 + /* Check failure path for blocking CTs too */ 1179 + xe_guc_ct_send_block(ct, bad_action, ARRAY_SIZE(bad_action)); 1180 + if (g2h->space != 1181 + CIRC_SPACE(0, 0, g2h->size) - g2h->resv_space) { 1182 + drm_printf(p, "Mismatch on space %d, %d\n", 1183 + g2h->space, 1184 + CIRC_SPACE(0, 0, g2h->size) - 1185 + g2h->resv_space); 1186 + ret = -EIO; 1187 + } 1188 + if (ct->g2h_outstanding) { 1189 + drm_printf(p, "Outstanding G2H, %d\n", 1190 + ct->g2h_outstanding); 1191 + ret = -EIO; 1192 + } 1193 + 1194 + drm_printf(p, "GuC CT selftest done - %s\n", ret ? "FAIL" : "PASS"); 1195 + } 1196 + #endif

+62

drivers/gpu/drm/xe/xe_guc_ct.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_GUC_CT_H_ 7 + #define _XE_GUC_CT_H_ 8 + 9 + #include "xe_guc_ct_types.h" 10 + 11 + struct drm_printer; 12 + 13 + int xe_guc_ct_init(struct xe_guc_ct *ct); 14 + int xe_guc_ct_enable(struct xe_guc_ct *ct); 15 + void xe_guc_ct_disable(struct xe_guc_ct *ct); 16 + void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p); 17 + void xe_guc_ct_fast_path(struct xe_guc_ct *ct); 18 + 19 + static inline void xe_guc_ct_irq_handler(struct xe_guc_ct *ct) 20 + { 21 + wake_up_all(&ct->wq); 22 + #ifdef XE_GUC_CT_SELFTEST 23 + if (!ct->suppress_irq_handler && ct->enabled) 24 + queue_work(system_unbound_wq, &ct->g2h_worker); 25 + #else 26 + if (ct->enabled) 27 + queue_work(system_unbound_wq, &ct->g2h_worker); 28 + #endif 29 + xe_guc_ct_fast_path(ct); 30 + } 31 + 32 + /* Basic CT send / receives */ 33 + int xe_guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len, 34 + u32 g2h_len, u32 num_g2h); 35 + int xe_guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len, 36 + u32 g2h_len, u32 num_g2h); 37 + int xe_guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len, 38 + u32 *response_buffer); 39 + static inline int 40 + xe_guc_ct_send_block(struct xe_guc_ct *ct, const u32 *action, u32 len) 41 + { 42 + return xe_guc_ct_send_recv(ct, action, len, NULL); 43 + } 44 + 45 + /* This is only version of the send CT you can call from a G2H handler */ 46 + int xe_guc_ct_send_g2h_handler(struct xe_guc_ct *ct, const u32 *action, 47 + u32 len); 48 + 49 + /* Can't fail because a GT reset is in progress */ 50 + int xe_guc_ct_send_recv_no_fail(struct xe_guc_ct *ct, const u32 *action, 51 + u32 len, u32 *response_buffer); 52 + static inline int 53 + xe_guc_ct_send_block_no_fail(struct xe_guc_ct *ct, const u32 *action, u32 len) 54 + { 55 + return xe_guc_ct_send_recv_no_fail(ct, action, len, NULL); 56 + } 57 + 58 + #ifdef XE_GUC_CT_SELFTEST 59 + void xe_guc_ct_selftest(struct xe_guc_ct *ct, struct drm_printer *p); 60 + #endif 61 + 62 + #endif

+87

drivers/gpu/drm/xe/xe_guc_ct_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_GUC_CT_TYPES_H_ 7 + #define _XE_GUC_CT_TYPES_H_ 8 + 9 + #include <linux/iosys-map.h> 10 + #include <linux/interrupt.h> 11 + #include <linux/spinlock_types.h> 12 + #include <linux/wait.h> 13 + #include <linux/xarray.h> 14 + 15 + #include "abi/guc_communication_ctb_abi.h" 16 + 17 + #define XE_GUC_CT_SELFTEST 18 + 19 + struct xe_bo; 20 + 21 + /** 22 + * struct guc_ctb - GuC command transport buffer (CTB) 23 + */ 24 + struct guc_ctb { 25 + /** @desc: dma buffer map for CTB descriptor */ 26 + struct iosys_map desc; 27 + /** @cmds: dma buffer map for CTB commands */ 28 + struct iosys_map cmds; 29 + /** @size: size of CTB commands (DW) */ 30 + u32 size; 31 + /** @resv_space: reserved space of CTB commands (DW) */ 32 + u32 resv_space; 33 + /** @head: head of CTB commands (DW) */ 34 + u32 head; 35 + /** @tail: tail of CTB commands (DW) */ 36 + u32 tail; 37 + /** @space: space in CTB commands (DW) */ 38 + u32 space; 39 + /** @broken: channel broken */ 40 + bool broken; 41 + }; 42 + 43 + /** 44 + * struct xe_guc_ct - GuC command transport (CT) layer 45 + * 46 + * Includes a pair of CT buffers for bi-directional communication and tracking 47 + * for the H2G and G2H requests sent and received through the buffers. 48 + */ 49 + struct xe_guc_ct { 50 + /** @bo: XE BO for CT */ 51 + struct xe_bo *bo; 52 + /** @lock: protects everything in CT layer */ 53 + struct mutex lock; 54 + /** @fast_lock: protects G2H channel and credits */ 55 + spinlock_t fast_lock; 56 + /** @ctbs: buffers for sending and receiving commands */ 57 + struct { 58 + /** @send: Host to GuC (H2G, send) channel */ 59 + struct guc_ctb h2g; 60 + /** @recv: GuC to Host (G2H, receive) channel */ 61 + struct guc_ctb g2h; 62 + } ctbs; 63 + /** @g2h_outstanding: number of outstanding G2H */ 64 + u32 g2h_outstanding; 65 + /** @g2h_worker: worker to process G2H messages */ 66 + struct work_struct g2h_worker; 67 + /** @enabled: CT enabled */ 68 + bool enabled; 69 + /** @fence_seqno: G2H fence seqno - 16 bits used by CT */ 70 + u32 fence_seqno; 71 + /** @fence_context: context for G2H fence */ 72 + u64 fence_context; 73 + /** @fence_lookup: G2H fence lookup */ 74 + struct xarray fence_lookup; 75 + /** @wq: wait queue used for reliable CT sends and freeing G2H credits */ 76 + wait_queue_head_t wq; 77 + #ifdef XE_GUC_CT_SELFTEST 78 + /** @suppress_irq_handler: force flow control to sender */ 79 + bool suppress_irq_handler; 80 + #endif 81 + /** @msg: Message buffer */ 82 + u32 msg[GUC_CTB_MSG_MAX_LEN]; 83 + /** @fast_msg: Message buffer */ 84 + u32 fast_msg[GUC_CTB_MSG_MAX_LEN]; 85 + }; 86 + 87 + #endif

+105

drivers/gpu/drm/xe/xe_guc_debugfs.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include <drm/drm_debugfs.h> 7 + #include <drm/drm_managed.h> 8 + 9 + #include "xe_device.h" 10 + #include "xe_gt.h" 11 + #include "xe_guc.h" 12 + #include "xe_guc_ct.h" 13 + #include "xe_guc_debugfs.h" 14 + #include "xe_guc_log.h" 15 + #include "xe_macros.h" 16 + 17 + static struct xe_gt * 18 + guc_to_gt(struct xe_guc *guc) 19 + { 20 + return container_of(guc, struct xe_gt, uc.guc); 21 + } 22 + 23 + static struct xe_device * 24 + guc_to_xe(struct xe_guc *guc) 25 + { 26 + return gt_to_xe(guc_to_gt(guc)); 27 + } 28 + 29 + static struct xe_guc *node_to_guc(struct drm_info_node *node) 30 + { 31 + return node->info_ent->data; 32 + } 33 + 34 + static int guc_info(struct seq_file *m, void *data) 35 + { 36 + struct xe_guc *guc = node_to_guc(m->private); 37 + struct xe_device *xe = guc_to_xe(guc); 38 + struct drm_printer p = drm_seq_file_printer(m); 39 + 40 + xe_device_mem_access_get(xe); 41 + xe_guc_print_info(guc, &p); 42 + xe_device_mem_access_put(xe); 43 + 44 + return 0; 45 + } 46 + 47 + static int guc_log(struct seq_file *m, void *data) 48 + { 49 + struct xe_guc *guc = node_to_guc(m->private); 50 + struct xe_device *xe = guc_to_xe(guc); 51 + struct drm_printer p = drm_seq_file_printer(m); 52 + 53 + xe_device_mem_access_get(xe); 54 + xe_guc_log_print(&guc->log, &p); 55 + xe_device_mem_access_put(xe); 56 + 57 + return 0; 58 + } 59 + 60 + #ifdef XE_GUC_CT_SELFTEST 61 + static int guc_ct_selftest(struct seq_file *m, void *data) 62 + { 63 + struct xe_guc *guc = node_to_guc(m->private); 64 + struct xe_device *xe = guc_to_xe(guc); 65 + struct drm_printer p = drm_seq_file_printer(m); 66 + 67 + xe_device_mem_access_get(xe); 68 + xe_guc_ct_selftest(&guc->ct, &p); 69 + xe_device_mem_access_put(xe); 70 + 71 + return 0; 72 + } 73 + #endif 74 + 75 + static const struct drm_info_list debugfs_list[] = { 76 + {"guc_info", guc_info, 0}, 77 + {"guc_log", guc_log, 0}, 78 + #ifdef XE_GUC_CT_SELFTEST 79 + {"guc_ct_selftest", guc_ct_selftest, 0}, 80 + #endif 81 + }; 82 + 83 + void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent) 84 + { 85 + struct drm_minor *minor = guc_to_xe(guc)->drm.primary; 86 + struct drm_info_list *local; 87 + int i; 88 + 89 + #define DEBUGFS_SIZE ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list) 90 + local = drmm_kmalloc(&guc_to_xe(guc)->drm, DEBUGFS_SIZE, GFP_KERNEL); 91 + if (!local) { 92 + XE_WARN_ON("Couldn't allocate memory"); 93 + return; 94 + } 95 + 96 + memcpy(local, debugfs_list, DEBUGFS_SIZE); 97 + #undef DEBUGFS_SIZE 98 + 99 + for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i) 100 + local[i].data = guc; 101 + 102 + drm_debugfs_create_files(local, 103 + ARRAY_SIZE(debugfs_list), 104 + parent, minor); 105 + }

+14

drivers/gpu/drm/xe/xe_guc_debugfs.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_GUC_DEBUGFS_H_ 7 + #define _XE_GUC_DEBUGFS_H_ 8 + 9 + struct dentry; 10 + struct xe_guc; 11 + 12 + void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent); 13 + 14 + #endif

+52

drivers/gpu/drm/xe/xe_guc_engine_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_GUC_ENGINE_TYPES_H_ 7 + #define _XE_GUC_ENGINE_TYPES_H_ 8 + 9 + #include <linux/spinlock.h> 10 + #include <linux/workqueue.h> 11 + 12 + #include "xe_gpu_scheduler_types.h" 13 + 14 + struct dma_fence; 15 + struct xe_engine; 16 + 17 + /** 18 + * struct xe_guc_engine - GuC specific state for an xe_engine 19 + */ 20 + struct xe_guc_engine { 21 + /** @engine: Backpointer to parent xe_engine */ 22 + struct xe_engine *engine; 23 + /** @sched: GPU scheduler for this xe_engine */ 24 + struct xe_gpu_scheduler sched; 25 + /** @entity: Scheduler entity for this xe_engine */ 26 + struct xe_sched_entity entity; 27 + /** 28 + * @static_msgs: Static messages for this xe_engine, used when a message 29 + * needs to sent through the GPU scheduler but memory allocations are 30 + * not allowed. 31 + */ 32 + #define MAX_STATIC_MSG_TYPE 3 33 + struct xe_sched_msg static_msgs[MAX_STATIC_MSG_TYPE]; 34 + /** @fini_async: do final fini async from this worker */ 35 + struct work_struct fini_async; 36 + /** @resume_time: time of last resume */ 37 + u64 resume_time; 38 + /** @state: GuC specific state for this xe_engine */ 39 + atomic_t state; 40 + /** @wqi_head: work queue item tail */ 41 + u32 wqi_head; 42 + /** @wqi_tail: work queue item tail */ 43 + u32 wqi_tail; 44 + /** @id: GuC id for this xe_engine */ 45 + u16 id; 46 + /** @suspend_wait: wait queue used to wait on pending suspends */ 47 + wait_queue_head_t suspend_wait; 48 + /** @suspend_pending: a suspend of the engine is pending */ 49 + bool suspend_pending; 50 + }; 51 + 52 + #endif

+392

drivers/gpu/drm/xe/xe_guc_fwif.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_GUC_FWIF_H 7 + #define _XE_GUC_FWIF_H 8 + 9 + #include <linux/bits.h> 10 + 11 + #include "abi/guc_actions_abi.h" 12 + #include "abi/guc_actions_slpc_abi.h" 13 + #include "abi/guc_errors_abi.h" 14 + #include "abi/guc_communication_mmio_abi.h" 15 + #include "abi/guc_communication_ctb_abi.h" 16 + #include "abi/guc_klvs_abi.h" 17 + #include "abi/guc_messages_abi.h" 18 + 19 + #define G2H_LEN_DW_SCHED_CONTEXT_MODE_SET 4 20 + #define G2H_LEN_DW_DEREGISTER_CONTEXT 3 21 + #define G2H_LEN_DW_TLB_INVALIDATE 3 22 + 23 + #define GUC_CONTEXT_DISABLE 0 24 + #define GUC_CONTEXT_ENABLE 1 25 + 26 + #define GUC_CLIENT_PRIORITY_KMD_HIGH 0 27 + #define GUC_CLIENT_PRIORITY_HIGH 1 28 + #define GUC_CLIENT_PRIORITY_KMD_NORMAL 2 29 + #define GUC_CLIENT_PRIORITY_NORMAL 3 30 + #define GUC_CLIENT_PRIORITY_NUM 4 31 + 32 + #define GUC_RENDER_ENGINE 0 33 + #define GUC_VIDEO_ENGINE 1 34 + #define GUC_BLITTER_ENGINE 2 35 + #define GUC_VIDEOENHANCE_ENGINE 3 36 + #define GUC_VIDEO_ENGINE2 4 37 + #define GUC_MAX_ENGINES_NUM (GUC_VIDEO_ENGINE2 + 1) 38 + 39 + #define GUC_RENDER_CLASS 0 40 + #define GUC_VIDEO_CLASS 1 41 + #define GUC_VIDEOENHANCE_CLASS 2 42 + #define GUC_BLITTER_CLASS 3 43 + #define GUC_COMPUTE_CLASS 4 44 + #define GUC_GSC_OTHER_CLASS 5 45 + #define GUC_LAST_ENGINE_CLASS GUC_GSC_OTHER_CLASS 46 + #define GUC_MAX_ENGINE_CLASSES 16 47 + #define GUC_MAX_INSTANCES_PER_CLASS 32 48 + 49 + /* Work item for submitting workloads into work queue of GuC. */ 50 + #define WQ_STATUS_ACTIVE 1 51 + #define WQ_STATUS_SUSPENDED 2 52 + #define WQ_STATUS_CMD_ERROR 3 53 + #define WQ_STATUS_ENGINE_ID_NOT_USED 4 54 + #define WQ_STATUS_SUSPENDED_FROM_RESET 5 55 + #define WQ_TYPE_NOOP 0x4 56 + #define WQ_TYPE_MULTI_LRC 0x5 57 + #define WQ_TYPE_MASK GENMASK(7, 0) 58 + #define WQ_LEN_MASK GENMASK(26, 16) 59 + 60 + #define WQ_GUC_ID_MASK GENMASK(15, 0) 61 + #define WQ_RING_TAIL_MASK GENMASK(28, 18) 62 + 63 + struct guc_wq_item { 64 + u32 header; 65 + u32 context_desc; 66 + u32 submit_element_info; 67 + u32 fence_id; 68 + } __packed; 69 + 70 + struct guc_sched_wq_desc { 71 + u32 head; 72 + u32 tail; 73 + u32 error_offset; 74 + u32 wq_status; 75 + u32 reserved[28]; 76 + } __packed; 77 + 78 + /* Helper for context registration H2G */ 79 + struct guc_ctxt_registration_info { 80 + u32 flags; 81 + u32 context_idx; 82 + u32 engine_class; 83 + u32 engine_submit_mask; 84 + u32 wq_desc_lo; 85 + u32 wq_desc_hi; 86 + u32 wq_base_lo; 87 + u32 wq_base_hi; 88 + u32 wq_size; 89 + u32 hwlrca_lo; 90 + u32 hwlrca_hi; 91 + }; 92 + #define CONTEXT_REGISTRATION_FLAG_KMD BIT(0) 93 + 94 + /* 32-bit KLV structure as used by policy updates and others */ 95 + struct guc_klv_generic_dw_t { 96 + u32 kl; 97 + u32 value; 98 + } __packed; 99 + 100 + /* Format of the UPDATE_CONTEXT_POLICIES H2G data packet */ 101 + struct guc_update_engine_policy_header { 102 + u32 action; 103 + u32 guc_id; 104 + } __packed; 105 + 106 + struct guc_update_engine_policy { 107 + struct guc_update_engine_policy_header header; 108 + struct guc_klv_generic_dw_t klv[GUC_CONTEXT_POLICIES_KLV_NUM_IDS]; 109 + } __packed; 110 + 111 + /* GUC_CTL_* - Parameters for loading the GuC */ 112 + #define GUC_CTL_LOG_PARAMS 0 113 + #define GUC_LOG_VALID BIT(0) 114 + #define GUC_LOG_NOTIFY_ON_HALF_FULL BIT(1) 115 + #define GUC_LOG_CAPTURE_ALLOC_UNITS BIT(2) 116 + #define GUC_LOG_LOG_ALLOC_UNITS BIT(3) 117 + #define GUC_LOG_CRASH_SHIFT 4 118 + #define GUC_LOG_CRASH_MASK (0x3 << GUC_LOG_CRASH_SHIFT) 119 + #define GUC_LOG_DEBUG_SHIFT 6 120 + #define GUC_LOG_DEBUG_MASK (0xF << GUC_LOG_DEBUG_SHIFT) 121 + #define GUC_LOG_CAPTURE_SHIFT 10 122 + #define GUC_LOG_CAPTURE_MASK (0x3 << GUC_LOG_CAPTURE_SHIFT) 123 + #define GUC_LOG_BUF_ADDR_SHIFT 12 124 + 125 + #define GUC_CTL_WA 1 126 + #define GUC_WA_GAM_CREDITS BIT(10) 127 + #define GUC_WA_DUAL_QUEUE BIT(11) 128 + #define GUC_WA_RCS_RESET_BEFORE_RC6 BIT(13) 129 + #define GUC_WA_CONTEXT_ISOLATION BIT(15) 130 + #define GUC_WA_PRE_PARSER BIT(14) 131 + #define GUC_WA_HOLD_CCS_SWITCHOUT BIT(17) 132 + #define GUC_WA_POLLCS BIT(18) 133 + #define GUC_WA_RENDER_RST_RC6_EXIT BIT(19) 134 + #define GUC_WA_RCS_REGS_IN_CCS_REGS_LIST BIT(21) 135 + 136 + #define GUC_CTL_FEATURE 2 137 + #define GUC_CTL_ENABLE_SLPC BIT(2) 138 + #define GUC_CTL_DISABLE_SCHEDULER BIT(14) 139 + 140 + #define GUC_CTL_DEBUG 3 141 + #define GUC_LOG_VERBOSITY_SHIFT 0 142 + #define GUC_LOG_VERBOSITY_LOW (0 << GUC_LOG_VERBOSITY_SHIFT) 143 + #define GUC_LOG_VERBOSITY_MED (1 << GUC_LOG_VERBOSITY_SHIFT) 144 + #define GUC_LOG_VERBOSITY_HIGH (2 << GUC_LOG_VERBOSITY_SHIFT) 145 + #define GUC_LOG_VERBOSITY_ULTRA (3 << GUC_LOG_VERBOSITY_SHIFT) 146 + #define GUC_LOG_VERBOSITY_MIN 0 147 + #define GUC_LOG_VERBOSITY_MAX 3 148 + #define GUC_LOG_VERBOSITY_MASK 0x0000000f 149 + #define GUC_LOG_DESTINATION_MASK (3 << 4) 150 + #define GUC_LOG_DISABLED (1 << 6) 151 + #define GUC_PROFILE_ENABLED (1 << 7) 152 + 153 + #define GUC_CTL_ADS 4 154 + #define GUC_ADS_ADDR_SHIFT 1 155 + #define GUC_ADS_ADDR_MASK (0xFFFFF << GUC_ADS_ADDR_SHIFT) 156 + 157 + #define GUC_CTL_DEVID 5 158 + 159 + #define GUC_CTL_MAX_DWORDS 14 160 + 161 + /* Scheduling policy settings */ 162 + 163 + #define GLOBAL_POLICY_MAX_NUM_WI 15 164 + 165 + /* Don't reset an engine upon preemption failure */ 166 + #define GLOBAL_POLICY_DISABLE_ENGINE_RESET BIT(0) 167 + 168 + #define GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US 500000 169 + 170 + struct guc_policies { 171 + u32 submission_queue_depth[GUC_MAX_ENGINE_CLASSES]; 172 + /* In micro seconds. How much time to allow before DPC processing is 173 + * called back via interrupt (to prevent DPC queue drain starving). 174 + * Typically 1000s of micro seconds (example only, not granularity). */ 175 + u32 dpc_promote_time; 176 + 177 + /* Must be set to take these new values. */ 178 + u32 is_valid; 179 + 180 + /* Max number of WIs to process per call. A large value may keep CS 181 + * idle. */ 182 + u32 max_num_work_items; 183 + 184 + u32 global_flags; 185 + u32 reserved[4]; 186 + } __packed; 187 + 188 + /* GuC MMIO reg state struct */ 189 + struct guc_mmio_reg { 190 + u32 offset; 191 + u32 value; 192 + u32 flags; 193 + u32 mask; 194 + #define GUC_REGSET_MASKED BIT(0) 195 + #define GUC_REGSET_MASKED_WITH_VALUE BIT(2) 196 + #define GUC_REGSET_RESTORE_ONLY BIT(3) 197 + } __packed; 198 + 199 + /* GuC register sets */ 200 + struct guc_mmio_reg_set { 201 + u32 address; 202 + u16 count; 203 + u16 reserved; 204 + } __packed; 205 + 206 + /* Generic GT SysInfo data types */ 207 + #define GUC_GENERIC_GT_SYSINFO_SLICE_ENABLED 0 208 + #define GUC_GENERIC_GT_SYSINFO_VDBOX_SFC_SUPPORT_MASK 1 209 + #define GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI 2 210 + #define GUC_GENERIC_GT_SYSINFO_MAX 16 211 + 212 + /* HW info */ 213 + struct guc_gt_system_info { 214 + u8 mapping_table[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS]; 215 + u32 engine_enabled_masks[GUC_MAX_ENGINE_CLASSES]; 216 + u32 generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_MAX]; 217 + } __packed; 218 + 219 + enum { 220 + GUC_CAPTURE_LIST_INDEX_PF = 0, 221 + GUC_CAPTURE_LIST_INDEX_VF = 1, 222 + GUC_CAPTURE_LIST_INDEX_MAX = 2, 223 + }; 224 + 225 + /* GuC Additional Data Struct */ 226 + struct guc_ads { 227 + struct guc_mmio_reg_set reg_state_list[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS]; 228 + u32 reserved0; 229 + u32 scheduler_policies; 230 + u32 gt_system_info; 231 + u32 reserved1; 232 + u32 control_data; 233 + u32 golden_context_lrca[GUC_MAX_ENGINE_CLASSES]; 234 + u32 eng_state_size[GUC_MAX_ENGINE_CLASSES]; 235 + u32 private_data; 236 + u32 um_init_data; 237 + u32 capture_instance[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES]; 238 + u32 capture_class[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES]; 239 + u32 capture_global[GUC_CAPTURE_LIST_INDEX_MAX]; 240 + u32 reserved[14]; 241 + } __packed; 242 + 243 + /* Engine usage stats */ 244 + struct guc_engine_usage_record { 245 + u32 current_context_index; 246 + u32 last_switch_in_stamp; 247 + u32 reserved0; 248 + u32 total_runtime; 249 + u32 reserved1[4]; 250 + } __packed; 251 + 252 + struct guc_engine_usage { 253 + struct guc_engine_usage_record engines[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS]; 254 + } __packed; 255 + 256 + /* This action will be programmed in C1BC - SOFT_SCRATCH_15_REG */ 257 + enum xe_guc_recv_message { 258 + XE_GUC_RECV_MSG_CRASH_DUMP_POSTED = BIT(1), 259 + XE_GUC_RECV_MSG_EXCEPTION = BIT(30), 260 + }; 261 + 262 + /* Page fault structures */ 263 + struct access_counter_desc { 264 + u32 dw0; 265 + #define ACCESS_COUNTER_TYPE BIT(0) 266 + #define ACCESS_COUNTER_SUBG_LO GENMASK(31, 1) 267 + 268 + u32 dw1; 269 + #define ACCESS_COUNTER_SUBG_HI BIT(0) 270 + #define ACCESS_COUNTER_RSVD0 GENMASK(2, 1) 271 + #define ACCESS_COUNTER_ENG_INSTANCE GENMASK(8, 3) 272 + #define ACCESS_COUNTER_ENG_CLASS GENMASK(11, 9) 273 + #define ACCESS_COUNTER_ASID GENMASK(31, 12) 274 + 275 + u32 dw2; 276 + #define ACCESS_COUNTER_VFID GENMASK(5, 0) 277 + #define ACCESS_COUNTER_RSVD1 GENMASK(7, 6) 278 + #define ACCESS_COUNTER_GRANULARITY GENMASK(10, 8) 279 + #define ACCESS_COUNTER_RSVD2 GENMASK(16, 11) 280 + #define ACCESS_COUNTER_VIRTUAL_ADDR_RANGE_LO GENMASK(31, 17) 281 + 282 + u32 dw3; 283 + #define ACCESS_COUNTER_VIRTUAL_ADDR_RANGE_HI GENMASK(31, 0) 284 + } __packed; 285 + 286 + enum guc_um_queue_type { 287 + GUC_UM_HW_QUEUE_PAGE_FAULT = 0, 288 + GUC_UM_HW_QUEUE_PAGE_FAULT_RESPONSE, 289 + GUC_UM_HW_QUEUE_ACCESS_COUNTER, 290 + GUC_UM_HW_QUEUE_MAX 291 + }; 292 + 293 + struct guc_um_queue_params { 294 + u64 base_dpa; 295 + u32 base_ggtt_address; 296 + u32 size_in_bytes; 297 + u32 rsvd[4]; 298 + } __packed; 299 + 300 + struct guc_um_init_params { 301 + u64 page_response_timeout_in_us; 302 + u32 rsvd[6]; 303 + struct guc_um_queue_params queue_params[GUC_UM_HW_QUEUE_MAX]; 304 + } __packed; 305 + 306 + enum xe_guc_fault_reply_type { 307 + PFR_ACCESS = 0, 308 + PFR_ENGINE, 309 + PFR_VFID, 310 + PFR_ALL, 311 + PFR_INVALID 312 + }; 313 + 314 + enum xe_guc_response_desc_type { 315 + TLB_INVALIDATION_DESC = 0, 316 + FAULT_RESPONSE_DESC 317 + }; 318 + 319 + struct xe_guc_pagefault_desc { 320 + u32 dw0; 321 + #define PFD_FAULT_LEVEL GENMASK(2, 0) 322 + #define PFD_SRC_ID GENMASK(10, 3) 323 + #define PFD_RSVD_0 GENMASK(17, 11) 324 + #define XE2_PFD_TRVA_FAULT BIT(18) 325 + #define PFD_ENG_INSTANCE GENMASK(24, 19) 326 + #define PFD_ENG_CLASS GENMASK(27, 25) 327 + #define PFD_PDATA_LO GENMASK(31, 28) 328 + 329 + u32 dw1; 330 + #define PFD_PDATA_HI GENMASK(11, 0) 331 + #define PFD_PDATA_HI_SHIFT 4 332 + #define PFD_ASID GENMASK(31, 12) 333 + 334 + u32 dw2; 335 + #define PFD_ACCESS_TYPE GENMASK(1, 0) 336 + #define PFD_FAULT_TYPE GENMASK(3, 2) 337 + #define PFD_VFID GENMASK(9, 4) 338 + #define PFD_RSVD_1 GENMASK(11, 10) 339 + #define PFD_VIRTUAL_ADDR_LO GENMASK(31, 12) 340 + #define PFD_VIRTUAL_ADDR_LO_SHIFT 12 341 + 342 + u32 dw3; 343 + #define PFD_VIRTUAL_ADDR_HI GENMASK(31, 0) 344 + #define PFD_VIRTUAL_ADDR_HI_SHIFT 32 345 + } __packed; 346 + 347 + struct xe_guc_pagefault_reply { 348 + u32 dw0; 349 + #define PFR_VALID BIT(0) 350 + #define PFR_SUCCESS BIT(1) 351 + #define PFR_REPLY GENMASK(4, 2) 352 + #define PFR_RSVD_0 GENMASK(9, 5) 353 + #define PFR_DESC_TYPE GENMASK(11, 10) 354 + #define PFR_ASID GENMASK(31, 12) 355 + 356 + u32 dw1; 357 + #define PFR_VFID GENMASK(5, 0) 358 + #define PFR_RSVD_1 BIT(6) 359 + #define PFR_ENG_INSTANCE GENMASK(12, 7) 360 + #define PFR_ENG_CLASS GENMASK(15, 13) 361 + #define PFR_PDATA GENMASK(31, 16) 362 + 363 + u32 dw2; 364 + #define PFR_RSVD_2 GENMASK(31, 0) 365 + } __packed; 366 + 367 + struct xe_guc_acc_desc { 368 + u32 dw0; 369 + #define ACC_TYPE BIT(0) 370 + #define ACC_TRIGGER 0 371 + #define ACC_NOTIFY 1 372 + #define ACC_SUBG_LO GENMASK(31, 1) 373 + 374 + u32 dw1; 375 + #define ACC_SUBG_HI BIT(0) 376 + #define ACC_RSVD0 GENMASK(2, 1) 377 + #define ACC_ENG_INSTANCE GENMASK(8, 3) 378 + #define ACC_ENG_CLASS GENMASK(11, 9) 379 + #define ACC_ASID GENMASK(31, 12) 380 + 381 + u32 dw2; 382 + #define ACC_VFID GENMASK(5, 0) 383 + #define ACC_RSVD1 GENMASK(7, 6) 384 + #define ACC_GRANULARITY GENMASK(10, 8) 385 + #define ACC_RSVD2 GENMASK(16, 11) 386 + #define ACC_VIRTUAL_ADDR_RANGE_LO GENMASK(31, 17) 387 + 388 + u32 dw3; 389 + #define ACC_VIRTUAL_ADDR_RANGE_HI GENMASK(31, 0) 390 + } __packed; 391 + 392 + #endif

+125

drivers/gpu/drm/xe/xe_guc_hwconfig.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include <drm/drm_managed.h> 7 + 8 + #include "xe_bo.h" 9 + #include "xe_device.h" 10 + #include "xe_gt.h" 11 + #include "xe_guc.h" 12 + #include "xe_guc_hwconfig.h" 13 + #include "xe_map.h" 14 + 15 + static struct xe_gt * 16 + guc_to_gt(struct xe_guc *guc) 17 + { 18 + return container_of(guc, struct xe_gt, uc.guc); 19 + } 20 + 21 + static struct xe_device * 22 + guc_to_xe(struct xe_guc *guc) 23 + { 24 + return gt_to_xe(guc_to_gt(guc)); 25 + } 26 + 27 + static int send_get_hwconfig(struct xe_guc *guc, u32 ggtt_addr, u32 size) 28 + { 29 + u32 action[] = { 30 + XE_GUC_ACTION_GET_HWCONFIG, 31 + lower_32_bits(ggtt_addr), 32 + upper_32_bits(ggtt_addr), 33 + size, 34 + }; 35 + 36 + return xe_guc_send_mmio(guc, action, ARRAY_SIZE(action)); 37 + } 38 + 39 + static int guc_hwconfig_size(struct xe_guc *guc, u32 *size) 40 + { 41 + int ret = send_get_hwconfig(guc, 0, 0); 42 + 43 + if (ret < 0) 44 + return ret; 45 + 46 + *size = ret; 47 + return 0; 48 + } 49 + 50 + static int guc_hwconfig_copy(struct xe_guc *guc) 51 + { 52 + int ret = send_get_hwconfig(guc, xe_bo_ggtt_addr(guc->hwconfig.bo), 53 + guc->hwconfig.size); 54 + 55 + if (ret < 0) 56 + return ret; 57 + 58 + return 0; 59 + } 60 + 61 + static void guc_hwconfig_fini(struct drm_device *drm, void *arg) 62 + { 63 + struct xe_guc *guc = arg; 64 + 65 + xe_bo_unpin_map_no_vm(guc->hwconfig.bo); 66 + } 67 + 68 + int xe_guc_hwconfig_init(struct xe_guc *guc) 69 + { 70 + struct xe_device *xe = guc_to_xe(guc); 71 + struct xe_gt *gt = guc_to_gt(guc); 72 + struct xe_bo *bo; 73 + u32 size; 74 + int err; 75 + 76 + /* Initialization already done */ 77 + if (guc->hwconfig.bo) 78 + return 0; 79 + 80 + /* 81 + * All hwconfig the same across GTs so only GT0 needs to be configured 82 + */ 83 + if (gt->info.id != XE_GT0) 84 + return 0; 85 + 86 + /* ADL_P, DG2+ supports hwconfig table */ 87 + if (GRAPHICS_VERx100(xe) < 1255 && xe->info.platform != XE_ALDERLAKE_P) 88 + return 0; 89 + 90 + err = guc_hwconfig_size(guc, &size); 91 + if (err) 92 + return err; 93 + if (!size) 94 + return -EINVAL; 95 + 96 + bo = xe_bo_create_pin_map(xe, gt, NULL, PAGE_ALIGN(size), 97 + ttm_bo_type_kernel, 98 + XE_BO_CREATE_VRAM_IF_DGFX(gt) | 99 + XE_BO_CREATE_GGTT_BIT); 100 + if (IS_ERR(bo)) 101 + return PTR_ERR(bo); 102 + guc->hwconfig.bo = bo; 103 + guc->hwconfig.size = size; 104 + 105 + err = drmm_add_action_or_reset(&xe->drm, guc_hwconfig_fini, guc); 106 + if (err) 107 + return err; 108 + 109 + return guc_hwconfig_copy(guc); 110 + } 111 + 112 + u32 xe_guc_hwconfig_size(struct xe_guc *guc) 113 + { 114 + return !guc->hwconfig.bo ? 0 : guc->hwconfig.size; 115 + } 116 + 117 + void xe_guc_hwconfig_copy(struct xe_guc *guc, void *dst) 118 + { 119 + struct xe_device *xe = guc_to_xe(guc); 120 + 121 + XE_BUG_ON(!guc->hwconfig.bo); 122 + 123 + xe_map_memcpy_from(xe, dst, &guc->hwconfig.bo->vmap, 0, 124 + guc->hwconfig.size); 125 + }

+17

drivers/gpu/drm/xe/xe_guc_hwconfig.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_GUC_HWCONFIG_H_ 7 + #define _XE_GUC_HWCONFIG_H_ 8 + 9 + #include <linux/types.h> 10 + 11 + struct xe_guc; 12 + 13 + int xe_guc_hwconfig_init(struct xe_guc *guc); 14 + u32 xe_guc_hwconfig_size(struct xe_guc *guc); 15 + void xe_guc_hwconfig_copy(struct xe_guc *guc, void *dst); 16 + 17 + #endif

+109

drivers/gpu/drm/xe/xe_guc_log.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include <drm/drm_managed.h> 7 + 8 + #include "xe_bo.h" 9 + #include "xe_gt.h" 10 + #include "xe_guc_log.h" 11 + #include "xe_map.h" 12 + #include "xe_module.h" 13 + 14 + static struct xe_gt * 15 + log_to_gt(struct xe_guc_log *log) 16 + { 17 + return container_of(log, struct xe_gt, uc.guc.log); 18 + } 19 + 20 + static struct xe_device * 21 + log_to_xe(struct xe_guc_log *log) 22 + { 23 + return gt_to_xe(log_to_gt(log)); 24 + } 25 + 26 + static size_t guc_log_size(void) 27 + { 28 + /* 29 + * GuC Log buffer Layout 30 + * 31 + * +===============================+ 00B 32 + * | Crash dump state header | 33 + * +-------------------------------+ 32B 34 + * | Debug state header | 35 + * +-------------------------------+ 64B 36 + * | Capture state header | 37 + * +-------------------------------+ 96B 38 + * | | 39 + * +===============================+ PAGE_SIZE (4KB) 40 + * | Crash Dump logs | 41 + * +===============================+ + CRASH_SIZE 42 + * | Debug logs | 43 + * +===============================+ + DEBUG_SIZE 44 + * | Capture logs | 45 + * +===============================+ + CAPTURE_SIZE 46 + */ 47 + return PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE + 48 + CAPTURE_BUFFER_SIZE; 49 + } 50 + 51 + void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p) 52 + { 53 + struct xe_device *xe = log_to_xe(log); 54 + size_t size; 55 + int i, j; 56 + 57 + XE_BUG_ON(!log->bo); 58 + 59 + size = log->bo->size; 60 + 61 + #define DW_PER_READ 128 62 + XE_BUG_ON(size % (DW_PER_READ * sizeof(u32))); 63 + for (i = 0; i < size / sizeof(u32); i += DW_PER_READ) { 64 + u32 read[DW_PER_READ]; 65 + 66 + xe_map_memcpy_from(xe, read, &log->bo->vmap, i * sizeof(u32), 67 + DW_PER_READ * sizeof(u32)); 68 + #define DW_PER_PRINT 4 69 + for (j = 0; j < DW_PER_READ / DW_PER_PRINT; ++j) { 70 + u32 *print = read + j * DW_PER_PRINT; 71 + 72 + drm_printf(p, "0x%08x 0x%08x 0x%08x 0x%08x\n", 73 + *(print + 0), *(print + 1), 74 + *(print + 2), *(print + 3)); 75 + } 76 + } 77 + } 78 + 79 + static void guc_log_fini(struct drm_device *drm, void *arg) 80 + { 81 + struct xe_guc_log *log = arg; 82 + 83 + xe_bo_unpin_map_no_vm(log->bo); 84 + } 85 + 86 + int xe_guc_log_init(struct xe_guc_log *log) 87 + { 88 + struct xe_device *xe = log_to_xe(log); 89 + struct xe_gt *gt = log_to_gt(log); 90 + struct xe_bo *bo; 91 + int err; 92 + 93 + bo = xe_bo_create_pin_map(xe, gt, NULL, guc_log_size(), 94 + ttm_bo_type_kernel, 95 + XE_BO_CREATE_VRAM_IF_DGFX(gt) | 96 + XE_BO_CREATE_GGTT_BIT); 97 + if (IS_ERR(bo)) 98 + return PTR_ERR(bo); 99 + 100 + xe_map_memset(xe, &bo->vmap, 0, 0, guc_log_size()); 101 + log->bo = bo; 102 + log->level = xe_guc_log_level; 103 + 104 + err = drmm_add_action_or_reset(&xe->drm, guc_log_fini, log); 105 + if (err) 106 + return err; 107 + 108 + return 0; 109 + }

+48

drivers/gpu/drm/xe/xe_guc_log.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_GUC_LOG_H_ 7 + #define _XE_GUC_LOG_H_ 8 + 9 + #include "xe_guc_log_types.h" 10 + 11 + struct drm_printer; 12 + 13 + #if IS_ENABLED(CONFIG_DRM_XE_LARGE_GUC_BUFFER) 14 + #define CRASH_BUFFER_SIZE SZ_1M 15 + #define DEBUG_BUFFER_SIZE SZ_8M 16 + #define CAPTURE_BUFFER_SIZE SZ_2M 17 + #else 18 + #define CRASH_BUFFER_SIZE SZ_8K 19 + #define DEBUG_BUFFER_SIZE SZ_64K 20 + #define CAPTURE_BUFFER_SIZE SZ_16K 21 + #endif 22 + /* 23 + * While we're using plain log level in i915, GuC controls are much more... 24 + * "elaborate"? We have a couple of bits for verbosity, separate bit for actual 25 + * log enabling, and separate bit for default logging - which "conveniently" 26 + * ignores the enable bit. 27 + */ 28 + #define GUC_LOG_LEVEL_DISABLED 0 29 + #define GUC_LOG_LEVEL_NON_VERBOSE 1 30 + #define GUC_LOG_LEVEL_IS_ENABLED(x) ((x) > GUC_LOG_LEVEL_DISABLED) 31 + #define GUC_LOG_LEVEL_IS_VERBOSE(x) ((x) > GUC_LOG_LEVEL_NON_VERBOSE) 32 + #define GUC_LOG_LEVEL_TO_VERBOSITY(x) ({ \ 33 + typeof(x) _x = (x); \ 34 + GUC_LOG_LEVEL_IS_VERBOSE(_x) ? _x - 2 : 0; \ 35 + }) 36 + #define GUC_VERBOSITY_TO_LOG_LEVEL(x) ((x) + 2) 37 + #define GUC_LOG_LEVEL_MAX GUC_VERBOSITY_TO_LOG_LEVEL(GUC_LOG_VERBOSITY_MAX) 38 + 39 + int xe_guc_log_init(struct xe_guc_log *log); 40 + void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p); 41 + 42 + static inline u32 43 + xe_guc_log_get_level(struct xe_guc_log *log) 44 + { 45 + return log->level; 46 + } 47 + 48 + #endif

+23

drivers/gpu/drm/xe/xe_guc_log_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_GUC_LOG_TYPES_H_ 7 + #define _XE_GUC_LOG_TYPES_H_ 8 + 9 + #include <linux/types.h> 10 + 11 + struct xe_bo; 12 + 13 + /** 14 + * struct xe_guc_log - GuC log 15 + */ 16 + struct xe_guc_log { 17 + /** @level: GuC log level */ 18 + u32 level; 19 + /** @bo: XE BO for GuC log */ 20 + struct xe_bo *bo; 21 + }; 22 + 23 + #endif

+843

drivers/gpu/drm/xe/xe_guc_pc.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include <drm/drm_managed.h> 7 + #include "xe_bo.h" 8 + #include "xe_device.h" 9 + #include "xe_gt.h" 10 + #include "xe_gt_types.h" 11 + #include "xe_gt_sysfs.h" 12 + #include "xe_guc_ct.h" 13 + #include "xe_map.h" 14 + #include "xe_mmio.h" 15 + #include "xe_pcode.h" 16 + #include "i915_reg_defs.h" 17 + #include "i915_reg.h" 18 + 19 + #include "intel_mchbar_regs.h" 20 + 21 + /* For GEN6_RP_STATE_CAP.reg to be merged when the definition moves to Xe */ 22 + #define RP0_MASK REG_GENMASK(7, 0) 23 + #define RP1_MASK REG_GENMASK(15, 8) 24 + #define RPN_MASK REG_GENMASK(23, 16) 25 + 26 + #define GEN10_FREQ_INFO_REC _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5ef0) 27 + #define RPE_MASK REG_GENMASK(15, 8) 28 + 29 + #include "gt/intel_gt_regs.h" 30 + /* For GEN6_RPNSWREQ.reg to be merged when the definition moves to Xe */ 31 + #define REQ_RATIO_MASK REG_GENMASK(31, 23) 32 + 33 + /* For GEN6_GT_CORE_STATUS.reg to be merged when the definition moves to Xe */ 34 + #define RCN_MASK REG_GENMASK(2, 0) 35 + 36 + #define GEN12_RPSTAT1 _MMIO(0x1381b4) 37 + #define GEN12_CAGF_MASK REG_GENMASK(19, 11) 38 + 39 + #define GT_FREQUENCY_MULTIPLIER 50 40 + #define GEN9_FREQ_SCALER 3 41 + 42 + /** 43 + * DOC: GuC Power Conservation (PC) 44 + * 45 + * GuC Power Conservation (PC) supports multiple features for the most 46 + * efficient and performing use of the GT when GuC submission is enabled, 47 + * including frequency management, Render-C states management, and various 48 + * algorithms for power balancing. 49 + * 50 + * Single Loop Power Conservation (SLPC) is the name given to the suite of 51 + * connected power conservation features in the GuC firmware. The firmware 52 + * exposes a programming interface to the host for the control of SLPC. 53 + * 54 + * Frequency management: 55 + * ===================== 56 + * 57 + * Xe driver enables SLPC with all of its defaults features and frequency 58 + * selection, which varies per platform. 59 + * Xe's GuC PC provides a sysfs API for frequency management: 60 + * 61 + * device/gt#/freq_* *read-only* files: 62 + * - freq_act: The actual resolved frequency decided by PCODE. 63 + * - freq_cur: The current one requested by GuC PC to the Hardware. 64 + * - freq_rpn: The Render Performance (RP) N level, which is the minimal one. 65 + * - freq_rpe: The Render Performance (RP) E level, which is the efficient one. 66 + * - freq_rp0: The Render Performance (RP) 0 level, which is the maximum one. 67 + * 68 + * device/gt#/freq_* *read-write* files: 69 + * - freq_min: GuC PC min request. 70 + * - freq_max: GuC PC max request. 71 + * If max <= min, then freq_min becomes a fixed frequency request. 72 + * 73 + * Render-C States: 74 + * ================ 75 + * 76 + * Render-C states is also a GuC PC feature that is now enabled in Xe for 77 + * all platforms. 78 + * Xe's GuC PC provides a sysfs API for Render-C States: 79 + * 80 + * device/gt#/rc* *read-only* files: 81 + * - rc_status: Provide the actual immediate status of Render-C: (rc0 or rc6) 82 + * - rc6_residency: Provide the rc6_residency counter in units of 1.28 uSec. 83 + * Prone to overflows. 84 + */ 85 + 86 + static struct xe_guc * 87 + pc_to_guc(struct xe_guc_pc *pc) 88 + { 89 + return container_of(pc, struct xe_guc, pc); 90 + } 91 + 92 + static struct xe_device * 93 + pc_to_xe(struct xe_guc_pc *pc) 94 + { 95 + struct xe_guc *guc = pc_to_guc(pc); 96 + struct xe_gt *gt = container_of(guc, struct xe_gt, uc.guc); 97 + 98 + return gt_to_xe(gt); 99 + } 100 + 101 + static struct xe_gt * 102 + pc_to_gt(struct xe_guc_pc *pc) 103 + { 104 + return container_of(pc, struct xe_gt, uc.guc.pc); 105 + } 106 + 107 + static struct xe_guc_pc * 108 + dev_to_pc(struct device *dev) 109 + { 110 + return &kobj_to_gt(&dev->kobj)->uc.guc.pc; 111 + } 112 + 113 + static struct iosys_map * 114 + pc_to_maps(struct xe_guc_pc *pc) 115 + { 116 + return &pc->bo->vmap; 117 + } 118 + 119 + #define slpc_shared_data_read(pc_, field_) \ 120 + xe_map_rd_field(pc_to_xe(pc_), pc_to_maps(pc_), 0, \ 121 + struct slpc_shared_data, field_) 122 + 123 + #define slpc_shared_data_write(pc_, field_, val_) \ 124 + xe_map_wr_field(pc_to_xe(pc_), pc_to_maps(pc_), 0, \ 125 + struct slpc_shared_data, field_, val_) 126 + 127 + #define SLPC_EVENT(id, count) \ 128 + (FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID, id) | \ 129 + FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC, count)) 130 + 131 + static bool pc_is_in_state(struct xe_guc_pc *pc, enum slpc_global_state state) 132 + { 133 + xe_device_assert_mem_access(pc_to_xe(pc)); 134 + return slpc_shared_data_read(pc, header.global_state) == state; 135 + } 136 + 137 + static int pc_action_reset(struct xe_guc_pc *pc) 138 + { 139 + struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; 140 + int ret; 141 + u32 action[] = { 142 + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, 143 + SLPC_EVENT(SLPC_EVENT_RESET, 2), 144 + xe_bo_ggtt_addr(pc->bo), 145 + 0, 146 + }; 147 + 148 + ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); 149 + if (ret) 150 + drm_err(&pc_to_xe(pc)->drm, "GuC PC reset: %pe", ERR_PTR(ret)); 151 + 152 + return ret; 153 + } 154 + 155 + static int pc_action_shutdown(struct xe_guc_pc *pc) 156 + { 157 + struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; 158 + int ret; 159 + u32 action[] = { 160 + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, 161 + SLPC_EVENT(SLPC_EVENT_SHUTDOWN, 2), 162 + xe_bo_ggtt_addr(pc->bo), 163 + 0, 164 + }; 165 + 166 + ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); 167 + if (ret) 168 + drm_err(&pc_to_xe(pc)->drm, "GuC PC shutdown %pe", 169 + ERR_PTR(ret)); 170 + 171 + return ret; 172 + } 173 + 174 + static int pc_action_query_task_state(struct xe_guc_pc *pc) 175 + { 176 + struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; 177 + int ret; 178 + u32 action[] = { 179 + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, 180 + SLPC_EVENT(SLPC_EVENT_QUERY_TASK_STATE, 2), 181 + xe_bo_ggtt_addr(pc->bo), 182 + 0, 183 + }; 184 + 185 + if (!pc_is_in_state(pc, SLPC_GLOBAL_STATE_RUNNING)) 186 + return -EAGAIN; 187 + 188 + /* Blocking here to ensure the results are ready before reading them */ 189 + ret = xe_guc_ct_send_block(ct, action, ARRAY_SIZE(action)); 190 + if (ret) 191 + drm_err(&pc_to_xe(pc)->drm, 192 + "GuC PC query task state failed: %pe", ERR_PTR(ret)); 193 + 194 + return ret; 195 + } 196 + 197 + static int pc_action_set_param(struct xe_guc_pc *pc, u8 id, u32 value) 198 + { 199 + struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; 200 + int ret; 201 + u32 action[] = { 202 + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, 203 + SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2), 204 + id, 205 + value, 206 + }; 207 + 208 + if (!pc_is_in_state(pc, SLPC_GLOBAL_STATE_RUNNING)) 209 + return -EAGAIN; 210 + 211 + ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); 212 + if (ret) 213 + drm_err(&pc_to_xe(pc)->drm, "GuC PC set param failed: %pe", 214 + ERR_PTR(ret)); 215 + 216 + return ret; 217 + } 218 + 219 + static int pc_action_setup_gucrc(struct xe_guc_pc *pc, u32 mode) 220 + { 221 + struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; 222 + u32 action[] = { 223 + XE_GUC_ACTION_SETUP_PC_GUCRC, 224 + mode, 225 + }; 226 + int ret; 227 + 228 + ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); 229 + if (ret) 230 + drm_err(&pc_to_xe(pc)->drm, "GuC RC enable failed: %pe", 231 + ERR_PTR(ret)); 232 + return ret; 233 + } 234 + 235 + static u32 decode_freq(u32 raw) 236 + { 237 + return DIV_ROUND_CLOSEST(raw * GT_FREQUENCY_MULTIPLIER, 238 + GEN9_FREQ_SCALER); 239 + } 240 + 241 + static u32 pc_get_min_freq(struct xe_guc_pc *pc) 242 + { 243 + u32 freq; 244 + 245 + freq = FIELD_GET(SLPC_MIN_UNSLICE_FREQ_MASK, 246 + slpc_shared_data_read(pc, task_state_data.freq)); 247 + 248 + return decode_freq(freq); 249 + } 250 + 251 + static int pc_set_min_freq(struct xe_guc_pc *pc, u32 freq) 252 + { 253 + /* 254 + * Let's only check for the rpn-rp0 range. If max < min, 255 + * min becomes a fixed request. 256 + */ 257 + if (freq < pc->rpn_freq || freq > pc->rp0_freq) 258 + return -EINVAL; 259 + 260 + /* 261 + * GuC policy is to elevate minimum frequency to the efficient levels 262 + * Our goal is to have the admin choices respected. 263 + */ 264 + pc_action_set_param(pc, SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY, 265 + freq < pc->rpe_freq); 266 + 267 + return pc_action_set_param(pc, 268 + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, 269 + freq); 270 + } 271 + 272 + static int pc_get_max_freq(struct xe_guc_pc *pc) 273 + { 274 + u32 freq; 275 + 276 + freq = FIELD_GET(SLPC_MAX_UNSLICE_FREQ_MASK, 277 + slpc_shared_data_read(pc, task_state_data.freq)); 278 + 279 + return decode_freq(freq); 280 + } 281 + 282 + static int pc_set_max_freq(struct xe_guc_pc *pc, u32 freq) 283 + { 284 + /* 285 + * Let's only check for the rpn-rp0 range. If max < min, 286 + * min becomes a fixed request. 287 + * Also, overclocking is not supported. 288 + */ 289 + if (freq < pc->rpn_freq || freq > pc->rp0_freq) 290 + return -EINVAL; 291 + 292 + return pc_action_set_param(pc, 293 + SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ, 294 + freq); 295 + } 296 + 297 + static void pc_update_rp_values(struct xe_guc_pc *pc) 298 + { 299 + struct xe_gt *gt = pc_to_gt(pc); 300 + struct xe_device *xe = gt_to_xe(gt); 301 + u32 reg; 302 + 303 + /* 304 + * For PVC we still need to use fused RP1 as the approximation for RPe 305 + * For other platforms than PVC we get the resolved RPe directly from 306 + * PCODE at a different register 307 + */ 308 + if (xe->info.platform == XE_PVC) 309 + reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP.reg); 310 + else 311 + reg = xe_mmio_read32(gt, GEN10_FREQ_INFO_REC.reg); 312 + 313 + pc->rpe_freq = REG_FIELD_GET(RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER; 314 + 315 + /* 316 + * RPe is decided at runtime by PCODE. In the rare case where that's 317 + * smaller than the fused min, we will trust the PCODE and use that 318 + * as our minimum one. 319 + */ 320 + pc->rpn_freq = min(pc->rpn_freq, pc->rpe_freq); 321 + } 322 + 323 + static ssize_t freq_act_show(struct device *dev, 324 + struct device_attribute *attr, char *buf) 325 + { 326 + struct kobject *kobj = &dev->kobj; 327 + struct xe_gt *gt = kobj_to_gt(kobj); 328 + u32 freq; 329 + ssize_t ret; 330 + 331 + /* 332 + * When in RC6, actual frequency is 0. Let's block RC6 so we are able 333 + * to verify that our freq requests are really happening. 334 + */ 335 + ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 336 + if (ret) 337 + return ret; 338 + 339 + xe_device_mem_access_get(gt_to_xe(gt)); 340 + freq = xe_mmio_read32(gt, GEN12_RPSTAT1.reg); 341 + xe_device_mem_access_put(gt_to_xe(gt)); 342 + 343 + freq = REG_FIELD_GET(GEN12_CAGF_MASK, freq); 344 + ret = sysfs_emit(buf, "%d\n", decode_freq(freq)); 345 + 346 + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); 347 + return ret; 348 + } 349 + static DEVICE_ATTR_RO(freq_act); 350 + 351 + static ssize_t freq_cur_show(struct device *dev, 352 + struct device_attribute *attr, char *buf) 353 + { 354 + struct kobject *kobj = &dev->kobj; 355 + struct xe_gt *gt = kobj_to_gt(kobj); 356 + u32 freq; 357 + ssize_t ret; 358 + 359 + /* 360 + * GuC SLPC plays with cur freq request when GuCRC is enabled 361 + * Block RC6 for a more reliable read. 362 + */ 363 + ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 364 + if (ret) 365 + return ret; 366 + 367 + xe_device_mem_access_get(gt_to_xe(gt)); 368 + freq = xe_mmio_read32(gt, GEN6_RPNSWREQ.reg); 369 + xe_device_mem_access_put(gt_to_xe(gt)); 370 + 371 + freq = REG_FIELD_GET(REQ_RATIO_MASK, freq); 372 + ret = sysfs_emit(buf, "%d\n", decode_freq(freq)); 373 + 374 + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); 375 + return ret; 376 + } 377 + static DEVICE_ATTR_RO(freq_cur); 378 + 379 + static ssize_t freq_rp0_show(struct device *dev, 380 + struct device_attribute *attr, char *buf) 381 + { 382 + struct xe_guc_pc *pc = dev_to_pc(dev); 383 + 384 + return sysfs_emit(buf, "%d\n", pc->rp0_freq); 385 + } 386 + static DEVICE_ATTR_RO(freq_rp0); 387 + 388 + static ssize_t freq_rpe_show(struct device *dev, 389 + struct device_attribute *attr, char *buf) 390 + { 391 + struct xe_guc_pc *pc = dev_to_pc(dev); 392 + 393 + pc_update_rp_values(pc); 394 + return sysfs_emit(buf, "%d\n", pc->rpe_freq); 395 + } 396 + static DEVICE_ATTR_RO(freq_rpe); 397 + 398 + static ssize_t freq_rpn_show(struct device *dev, 399 + struct device_attribute *attr, char *buf) 400 + { 401 + struct xe_guc_pc *pc = dev_to_pc(dev); 402 + 403 + return sysfs_emit(buf, "%d\n", pc->rpn_freq); 404 + } 405 + static DEVICE_ATTR_RO(freq_rpn); 406 + 407 + static ssize_t freq_min_show(struct device *dev, 408 + struct device_attribute *attr, char *buf) 409 + { 410 + struct xe_guc_pc *pc = dev_to_pc(dev); 411 + struct xe_gt *gt = pc_to_gt(pc); 412 + ssize_t ret; 413 + 414 + xe_device_mem_access_get(pc_to_xe(pc)); 415 + mutex_lock(&pc->freq_lock); 416 + if (!pc->freq_ready) { 417 + /* Might be in the middle of a gt reset */ 418 + ret = -EAGAIN; 419 + goto out; 420 + } 421 + 422 + /* 423 + * GuC SLPC plays with min freq request when GuCRC is enabled 424 + * Block RC6 for a more reliable read. 425 + */ 426 + ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 427 + if (ret) 428 + goto out; 429 + 430 + ret = pc_action_query_task_state(pc); 431 + if (ret) 432 + goto fw; 433 + 434 + ret = sysfs_emit(buf, "%d\n", pc_get_min_freq(pc)); 435 + 436 + fw: 437 + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); 438 + out: 439 + mutex_unlock(&pc->freq_lock); 440 + xe_device_mem_access_put(pc_to_xe(pc)); 441 + return ret; 442 + } 443 + 444 + static ssize_t freq_min_store(struct device *dev, struct device_attribute *attr, 445 + const char *buff, size_t count) 446 + { 447 + struct xe_guc_pc *pc = dev_to_pc(dev); 448 + u32 freq; 449 + ssize_t ret; 450 + 451 + ret = kstrtou32(buff, 0, &freq); 452 + if (ret) 453 + return ret; 454 + 455 + xe_device_mem_access_get(pc_to_xe(pc)); 456 + mutex_lock(&pc->freq_lock); 457 + if (!pc->freq_ready) { 458 + /* Might be in the middle of a gt reset */ 459 + ret = -EAGAIN; 460 + goto out; 461 + } 462 + 463 + ret = pc_set_min_freq(pc, freq); 464 + if (ret) 465 + goto out; 466 + 467 + pc->user_requested_min = freq; 468 + 469 + out: 470 + mutex_unlock(&pc->freq_lock); 471 + xe_device_mem_access_put(pc_to_xe(pc)); 472 + return ret ?: count; 473 + } 474 + static DEVICE_ATTR_RW(freq_min); 475 + 476 + static ssize_t freq_max_show(struct device *dev, 477 + struct device_attribute *attr, char *buf) 478 + { 479 + struct xe_guc_pc *pc = dev_to_pc(dev); 480 + ssize_t ret; 481 + 482 + xe_device_mem_access_get(pc_to_xe(pc)); 483 + mutex_lock(&pc->freq_lock); 484 + if (!pc->freq_ready) { 485 + /* Might be in the middle of a gt reset */ 486 + ret = -EAGAIN; 487 + goto out; 488 + } 489 + 490 + ret = pc_action_query_task_state(pc); 491 + if (ret) 492 + goto out; 493 + 494 + ret = sysfs_emit(buf, "%d\n", pc_get_max_freq(pc)); 495 + 496 + out: 497 + mutex_unlock(&pc->freq_lock); 498 + xe_device_mem_access_put(pc_to_xe(pc)); 499 + return ret; 500 + } 501 + 502 + static ssize_t freq_max_store(struct device *dev, struct device_attribute *attr, 503 + const char *buff, size_t count) 504 + { 505 + struct xe_guc_pc *pc = dev_to_pc(dev); 506 + u32 freq; 507 + ssize_t ret; 508 + 509 + ret = kstrtou32(buff, 0, &freq); 510 + if (ret) 511 + return ret; 512 + 513 + xe_device_mem_access_get(pc_to_xe(pc)); 514 + mutex_lock(&pc->freq_lock); 515 + if (!pc->freq_ready) { 516 + /* Might be in the middle of a gt reset */ 517 + ret = -EAGAIN; 518 + goto out; 519 + } 520 + 521 + ret = pc_set_max_freq(pc, freq); 522 + if (ret) 523 + goto out; 524 + 525 + pc->user_requested_max = freq; 526 + 527 + out: 528 + mutex_unlock(&pc->freq_lock); 529 + xe_device_mem_access_put(pc_to_xe(pc)); 530 + return ret ?: count; 531 + } 532 + static DEVICE_ATTR_RW(freq_max); 533 + 534 + static ssize_t rc_status_show(struct device *dev, 535 + struct device_attribute *attr, char *buff) 536 + { 537 + struct xe_guc_pc *pc = dev_to_pc(dev); 538 + struct xe_gt *gt = pc_to_gt(pc); 539 + u32 reg; 540 + 541 + xe_device_mem_access_get(gt_to_xe(gt)); 542 + reg = xe_mmio_read32(gt, GEN6_GT_CORE_STATUS.reg); 543 + xe_device_mem_access_put(gt_to_xe(gt)); 544 + 545 + switch (REG_FIELD_GET(RCN_MASK, reg)) { 546 + case GEN6_RC6: 547 + return sysfs_emit(buff, "rc6\n"); 548 + case GEN6_RC0: 549 + return sysfs_emit(buff, "rc0\n"); 550 + default: 551 + return -ENOENT; 552 + } 553 + } 554 + static DEVICE_ATTR_RO(rc_status); 555 + 556 + static ssize_t rc6_residency_show(struct device *dev, 557 + struct device_attribute *attr, char *buff) 558 + { 559 + struct xe_guc_pc *pc = dev_to_pc(dev); 560 + struct xe_gt *gt = pc_to_gt(pc); 561 + u32 reg; 562 + ssize_t ret; 563 + 564 + ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 565 + if (ret) 566 + return ret; 567 + 568 + xe_device_mem_access_get(pc_to_xe(pc)); 569 + reg = xe_mmio_read32(gt, GEN6_GT_GFX_RC6.reg); 570 + xe_device_mem_access_put(pc_to_xe(pc)); 571 + 572 + ret = sysfs_emit(buff, "%u\n", reg); 573 + 574 + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); 575 + return ret; 576 + } 577 + static DEVICE_ATTR_RO(rc6_residency); 578 + 579 + static const struct attribute *pc_attrs[] = { 580 + &dev_attr_freq_act.attr, 581 + &dev_attr_freq_cur.attr, 582 + &dev_attr_freq_rp0.attr, 583 + &dev_attr_freq_rpe.attr, 584 + &dev_attr_freq_rpn.attr, 585 + &dev_attr_freq_min.attr, 586 + &dev_attr_freq_max.attr, 587 + &dev_attr_rc_status.attr, 588 + &dev_attr_rc6_residency.attr, 589 + NULL 590 + }; 591 + 592 + static void pc_init_fused_rp_values(struct xe_guc_pc *pc) 593 + { 594 + struct xe_gt *gt = pc_to_gt(pc); 595 + struct xe_device *xe = gt_to_xe(gt); 596 + u32 reg; 597 + 598 + xe_device_assert_mem_access(pc_to_xe(pc)); 599 + 600 + if (xe->info.platform == XE_PVC) 601 + reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP.reg); 602 + else 603 + reg = xe_mmio_read32(gt, GEN6_RP_STATE_CAP.reg); 604 + pc->rp0_freq = REG_FIELD_GET(RP0_MASK, reg) * GT_FREQUENCY_MULTIPLIER; 605 + pc->rpn_freq = REG_FIELD_GET(RPN_MASK, reg) * GT_FREQUENCY_MULTIPLIER; 606 + } 607 + 608 + static int pc_adjust_freq_bounds(struct xe_guc_pc *pc) 609 + { 610 + int ret; 611 + 612 + lockdep_assert_held(&pc->freq_lock); 613 + 614 + ret = pc_action_query_task_state(pc); 615 + if (ret) 616 + return ret; 617 + 618 + /* 619 + * GuC defaults to some RPmax that is not actually achievable without 620 + * overclocking. Let's adjust it to the Hardware RP0, which is the 621 + * regular maximum 622 + */ 623 + if (pc_get_max_freq(pc) > pc->rp0_freq) 624 + pc_set_max_freq(pc, pc->rp0_freq); 625 + 626 + /* 627 + * Same thing happens for Server platforms where min is listed as 628 + * RPMax 629 + */ 630 + if (pc_get_min_freq(pc) > pc->rp0_freq) 631 + pc_set_min_freq(pc, pc->rp0_freq); 632 + 633 + return 0; 634 + } 635 + 636 + static int pc_adjust_requested_freq(struct xe_guc_pc *pc) 637 + { 638 + int ret = 0; 639 + 640 + lockdep_assert_held(&pc->freq_lock); 641 + 642 + if (pc->user_requested_min != 0) { 643 + ret = pc_set_min_freq(pc, pc->user_requested_min); 644 + if (ret) 645 + return ret; 646 + } 647 + 648 + if (pc->user_requested_max != 0) { 649 + ret = pc_set_max_freq(pc, pc->user_requested_max); 650 + if (ret) 651 + return ret; 652 + } 653 + 654 + return ret; 655 + } 656 + 657 + static int pc_gucrc_disable(struct xe_guc_pc *pc) 658 + { 659 + struct xe_gt *gt = pc_to_gt(pc); 660 + int ret; 661 + 662 + xe_device_assert_mem_access(pc_to_xe(pc)); 663 + 664 + ret = pc_action_setup_gucrc(pc, XE_GUCRC_HOST_CONTROL); 665 + if (ret) 666 + return ret; 667 + 668 + ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 669 + if (ret) 670 + return ret; 671 + 672 + xe_mmio_write32(gt, GEN9_PG_ENABLE.reg, 0); 673 + xe_mmio_write32(gt, GEN6_RC_CONTROL.reg, 0); 674 + xe_mmio_write32(gt, GEN6_RC_STATE.reg, 0); 675 + 676 + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); 677 + return 0; 678 + } 679 + 680 + static void pc_init_pcode_freq(struct xe_guc_pc *pc) 681 + { 682 + u32 min = DIV_ROUND_CLOSEST(pc->rpn_freq, GT_FREQUENCY_MULTIPLIER); 683 + u32 max = DIV_ROUND_CLOSEST(pc->rp0_freq, GT_FREQUENCY_MULTIPLIER); 684 + 685 + XE_WARN_ON(xe_pcode_init_min_freq_table(pc_to_gt(pc), min, max)); 686 + } 687 + 688 + static int pc_init_freqs(struct xe_guc_pc *pc) 689 + { 690 + int ret; 691 + 692 + mutex_lock(&pc->freq_lock); 693 + 694 + ret = pc_adjust_freq_bounds(pc); 695 + if (ret) 696 + goto out; 697 + 698 + ret = pc_adjust_requested_freq(pc); 699 + if (ret) 700 + goto out; 701 + 702 + pc_update_rp_values(pc); 703 + 704 + pc_init_pcode_freq(pc); 705 + 706 + /* 707 + * The frequencies are really ready for use only after the user 708 + * requested ones got restored. 709 + */ 710 + pc->freq_ready = true; 711 + 712 + out: 713 + mutex_unlock(&pc->freq_lock); 714 + return ret; 715 + } 716 + 717 + /** 718 + * xe_guc_pc_start - Start GuC's Power Conservation component 719 + * @pc: Xe_GuC_PC instance 720 + */ 721 + int xe_guc_pc_start(struct xe_guc_pc *pc) 722 + { 723 + struct xe_device *xe = pc_to_xe(pc); 724 + struct xe_gt *gt = pc_to_gt(pc); 725 + u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data)); 726 + int ret; 727 + 728 + XE_WARN_ON(!xe_device_guc_submission_enabled(xe)); 729 + 730 + xe_device_mem_access_get(pc_to_xe(pc)); 731 + 732 + memset(pc->bo->vmap.vaddr, 0, size); 733 + slpc_shared_data_write(pc, header.size, size); 734 + 735 + ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 736 + if (ret) 737 + return ret; 738 + 739 + ret = pc_action_reset(pc); 740 + if (ret) 741 + goto out; 742 + 743 + if (wait_for(pc_is_in_state(pc, SLPC_GLOBAL_STATE_RUNNING), 5)) { 744 + drm_err(&pc_to_xe(pc)->drm, "GuC PC Start failed\n"); 745 + ret = -EIO; 746 + goto out; 747 + } 748 + 749 + ret = pc_init_freqs(pc); 750 + if (ret) 751 + goto out; 752 + 753 + if (xe->info.platform == XE_PVC) { 754 + pc_gucrc_disable(pc); 755 + ret = 0; 756 + goto out; 757 + } 758 + 759 + ret = pc_action_setup_gucrc(pc, XE_GUCRC_FIRMWARE_CONTROL); 760 + 761 + out: 762 + xe_device_mem_access_put(pc_to_xe(pc)); 763 + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); 764 + return ret; 765 + } 766 + 767 + /** 768 + * xe_guc_pc_stop - Stop GuC's Power Conservation component 769 + * @pc: Xe_GuC_PC instance 770 + */ 771 + int xe_guc_pc_stop(struct xe_guc_pc *pc) 772 + { 773 + int ret; 774 + 775 + xe_device_mem_access_get(pc_to_xe(pc)); 776 + 777 + ret = pc_gucrc_disable(pc); 778 + if (ret) 779 + goto out; 780 + 781 + mutex_lock(&pc->freq_lock); 782 + pc->freq_ready = false; 783 + mutex_unlock(&pc->freq_lock); 784 + 785 + ret = pc_action_shutdown(pc); 786 + if (ret) 787 + goto out; 788 + 789 + if (wait_for(pc_is_in_state(pc, SLPC_GLOBAL_STATE_NOT_RUNNING), 5)) { 790 + drm_err(&pc_to_xe(pc)->drm, "GuC PC Shutdown failed\n"); 791 + ret = -EIO; 792 + } 793 + 794 + out: 795 + xe_device_mem_access_put(pc_to_xe(pc)); 796 + return ret; 797 + } 798 + 799 + static void pc_fini(struct drm_device *drm, void *arg) 800 + { 801 + struct xe_guc_pc *pc = arg; 802 + 803 + XE_WARN_ON(xe_guc_pc_stop(pc)); 804 + sysfs_remove_files(pc_to_gt(pc)->sysfs, pc_attrs); 805 + xe_bo_unpin_map_no_vm(pc->bo); 806 + } 807 + 808 + /** 809 + * xe_guc_pc_init - Initialize GuC's Power Conservation component 810 + * @pc: Xe_GuC_PC instance 811 + */ 812 + int xe_guc_pc_init(struct xe_guc_pc *pc) 813 + { 814 + struct xe_gt *gt = pc_to_gt(pc); 815 + struct xe_device *xe = gt_to_xe(gt); 816 + struct xe_bo *bo; 817 + u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data)); 818 + int err; 819 + 820 + mutex_init(&pc->freq_lock); 821 + 822 + bo = xe_bo_create_pin_map(xe, gt, NULL, size, 823 + ttm_bo_type_kernel, 824 + XE_BO_CREATE_VRAM_IF_DGFX(gt) | 825 + XE_BO_CREATE_GGTT_BIT); 826 + 827 + if (IS_ERR(bo)) 828 + return PTR_ERR(bo); 829 + 830 + pc->bo = bo; 831 + 832 + pc_init_fused_rp_values(pc); 833 + 834 + err = sysfs_create_files(gt->sysfs, pc_attrs); 835 + if (err) 836 + return err; 837 + 838 + err = drmm_add_action_or_reset(&xe->drm, pc_fini, pc); 839 + if (err) 840 + return err; 841 + 842 + return 0; 843 + }

+15

drivers/gpu/drm/xe/xe_guc_pc.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_GUC_PC_H_ 7 + #define _XE_GUC_PC_H_ 8 + 9 + #include "xe_guc_pc_types.h" 10 + 11 + int xe_guc_pc_init(struct xe_guc_pc *pc); 12 + int xe_guc_pc_start(struct xe_guc_pc *pc); 13 + int xe_guc_pc_stop(struct xe_guc_pc *pc); 14 + 15 + #endif /* _XE_GUC_PC_H_ */

+34

drivers/gpu/drm/xe/xe_guc_pc_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_GUC_PC_TYPES_H_ 7 + #define _XE_GUC_PC_TYPES_H_ 8 + 9 + #include <linux/types.h> 10 + #include <linux/mutex.h> 11 + 12 + /** 13 + * struct xe_guc_pc - GuC Power Conservation (PC) 14 + */ 15 + struct xe_guc_pc { 16 + /** @bo: GGTT buffer object that is shared with GuC PC */ 17 + struct xe_bo *bo; 18 + /** @rp0_freq: HW RP0 frequency - The Maximum one */ 19 + u32 rp0_freq; 20 + /** @rpe_freq: HW RPe frequency - The Efficient one */ 21 + u32 rpe_freq; 22 + /** @rpn_freq: HW RPN frequency - The Minimum one */ 23 + u32 rpn_freq; 24 + /** @user_requested_min: Stash the minimum requested freq by user */ 25 + u32 user_requested_min; 26 + /** @user_requested_max: Stash the maximum requested freq by user */ 27 + u32 user_requested_max; 28 + /** @freq_lock: Let's protect the frequencies */ 29 + struct mutex freq_lock; 30 + /** @freq_ready: Only handle freq changes, if they are really ready */ 31 + bool freq_ready; 32 + }; 33 + 34 + #endif /* _XE_GUC_PC_TYPES_H_ */

+147

drivers/gpu/drm/xe/xe_guc_reg.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_GUC_REG_H_ 7 + #define _XE_GUC_REG_H_ 8 + 9 + #include <linux/compiler.h> 10 + #include <linux/types.h> 11 + 12 + #include "i915_reg.h" 13 + 14 + /* Definitions of GuC H/W registers, bits, etc */ 15 + 16 + #define GUC_STATUS _MMIO(0xc000) 17 + #define GS_RESET_SHIFT 0 18 + #define GS_MIA_IN_RESET (0x01 << GS_RESET_SHIFT) 19 + #define GS_BOOTROM_SHIFT 1 20 + #define GS_BOOTROM_MASK (0x7F << GS_BOOTROM_SHIFT) 21 + #define GS_BOOTROM_RSA_FAILED (0x50 << GS_BOOTROM_SHIFT) 22 + #define GS_BOOTROM_JUMP_PASSED (0x76 << GS_BOOTROM_SHIFT) 23 + #define GS_UKERNEL_SHIFT 8 24 + #define GS_UKERNEL_MASK (0xFF << GS_UKERNEL_SHIFT) 25 + #define GS_MIA_SHIFT 16 26 + #define GS_MIA_MASK (0x07 << GS_MIA_SHIFT) 27 + #define GS_MIA_CORE_STATE (0x01 << GS_MIA_SHIFT) 28 + #define GS_MIA_HALT_REQUESTED (0x02 << GS_MIA_SHIFT) 29 + #define GS_MIA_ISR_ENTRY (0x04 << GS_MIA_SHIFT) 30 + #define GS_AUTH_STATUS_SHIFT 30 31 + #define GS_AUTH_STATUS_MASK (0x03 << GS_AUTH_STATUS_SHIFT) 32 + #define GS_AUTH_STATUS_BAD (0x01 << GS_AUTH_STATUS_SHIFT) 33 + #define GS_AUTH_STATUS_GOOD (0x02 << GS_AUTH_STATUS_SHIFT) 34 + 35 + #define SOFT_SCRATCH(n) _MMIO(0xc180 + (n) * 4) 36 + #define SOFT_SCRATCH_COUNT 16 37 + 38 + #define GEN11_SOFT_SCRATCH(n) _MMIO(0x190240 + (n) * 4) 39 + #define GEN11_SOFT_SCRATCH_COUNT 4 40 + 41 + #define UOS_RSA_SCRATCH(i) _MMIO(0xc200 + (i) * 4) 42 + #define UOS_RSA_SCRATCH_COUNT 64 43 + 44 + #define DMA_ADDR_0_LOW _MMIO(0xc300) 45 + #define DMA_ADDR_0_HIGH _MMIO(0xc304) 46 + #define DMA_ADDR_1_LOW _MMIO(0xc308) 47 + #define DMA_ADDR_1_HIGH _MMIO(0xc30c) 48 + #define DMA_ADDRESS_SPACE_WOPCM (7 << 16) 49 + #define DMA_ADDRESS_SPACE_GTT (8 << 16) 50 + #define DMA_COPY_SIZE _MMIO(0xc310) 51 + #define DMA_CTRL _MMIO(0xc314) 52 + #define HUC_UKERNEL (1<<9) 53 + #define UOS_MOVE (1<<4) 54 + #define START_DMA (1<<0) 55 + #define DMA_GUC_WOPCM_OFFSET _MMIO(0xc340) 56 + #define GUC_WOPCM_OFFSET_VALID (1<<0) 57 + #define HUC_LOADING_AGENT_VCR (0<<1) 58 + #define HUC_LOADING_AGENT_GUC (1<<1) 59 + #define GUC_WOPCM_OFFSET_SHIFT 14 60 + #define GUC_WOPCM_OFFSET_MASK (0x3ffff << GUC_WOPCM_OFFSET_SHIFT) 61 + #define GUC_MAX_IDLE_COUNT _MMIO(0xC3E4) 62 + 63 + #define HUC_STATUS2 _MMIO(0xD3B0) 64 + #define HUC_FW_VERIFIED (1<<7) 65 + 66 + #define GEN11_HUC_KERNEL_LOAD_INFO _MMIO(0xC1DC) 67 + #define HUC_LOAD_SUCCESSFUL (1 << 0) 68 + 69 + #define GUC_WOPCM_SIZE _MMIO(0xc050) 70 + #define GUC_WOPCM_SIZE_LOCKED (1<<0) 71 + #define GUC_WOPCM_SIZE_SHIFT 12 72 + #define GUC_WOPCM_SIZE_MASK (0xfffff << GUC_WOPCM_SIZE_SHIFT) 73 + 74 + #define GEN8_GT_PM_CONFIG _MMIO(0x138140) 75 + #define GEN9LP_GT_PM_CONFIG _MMIO(0x138140) 76 + #define GEN9_GT_PM_CONFIG _MMIO(0x13816c) 77 + #define GT_DOORBELL_ENABLE (1<<0) 78 + 79 + #define GEN8_GTCR _MMIO(0x4274) 80 + #define GEN8_GTCR_INVALIDATE (1<<0) 81 + 82 + #define GEN12_GUC_TLB_INV_CR _MMIO(0xcee8) 83 + #define GEN12_GUC_TLB_INV_CR_INVALIDATE (1 << 0) 84 + 85 + #define GUC_ARAT_C6DIS _MMIO(0xA178) 86 + 87 + #define GUC_SHIM_CONTROL _MMIO(0xc064) 88 + #define GUC_DISABLE_SRAM_INIT_TO_ZEROES (1<<0) 89 + #define GUC_ENABLE_READ_CACHE_LOGIC (1<<1) 90 + #define GUC_ENABLE_MIA_CACHING (1<<2) 91 + #define GUC_GEN10_MSGCH_ENABLE (1<<4) 92 + #define GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA (1<<9) 93 + #define GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA (1<<10) 94 + #define GUC_ENABLE_MIA_CLOCK_GATING (1<<15) 95 + #define GUC_GEN10_SHIM_WC_ENABLE (1<<21) 96 + 97 + #define GUC_SEND_INTERRUPT _MMIO(0xc4c8) 98 + #define GUC_SEND_TRIGGER (1<<0) 99 + #define GEN11_GUC_HOST_INTERRUPT _MMIO(0x1901f0) 100 + 101 + #define GUC_NUM_DOORBELLS 256 102 + 103 + /* format of the HW-monitored doorbell cacheline */ 104 + struct guc_doorbell_info { 105 + u32 db_status; 106 + #define GUC_DOORBELL_DISABLED 0 107 + #define GUC_DOORBELL_ENABLED 1 108 + 109 + u32 cookie; 110 + u32 reserved[14]; 111 + } __packed; 112 + 113 + #define GEN8_DRBREGL(x) _MMIO(0x1000 + (x) * 8) 114 + #define GEN8_DRB_VALID (1<<0) 115 + #define GEN8_DRBREGU(x) _MMIO(0x1000 + (x) * 8 + 4) 116 + 117 + #define GEN12_DIST_DBS_POPULATED _MMIO(0xd08) 118 + #define GEN12_DOORBELLS_PER_SQIDI_SHIFT 16 119 + #define GEN12_DOORBELLS_PER_SQIDI (0xff) 120 + #define GEN12_SQIDIS_DOORBELL_EXIST (0xffff) 121 + 122 + #define DE_GUCRMR _MMIO(0x44054) 123 + 124 + #define GUC_BCS_RCS_IER _MMIO(0xC550) 125 + #define GUC_VCS2_VCS1_IER _MMIO(0xC554) 126 + #define GUC_WD_VECS_IER _MMIO(0xC558) 127 + #define GUC_PM_P24C_IER _MMIO(0xC55C) 128 + 129 + /* GuC Interrupt Vector */ 130 + #define GUC_INTR_GUC2HOST BIT(15) 131 + #define GUC_INTR_EXEC_ERROR BIT(14) 132 + #define GUC_INTR_DISPLAY_EVENT BIT(13) 133 + #define GUC_INTR_SEM_SIG BIT(12) 134 + #define GUC_INTR_IOMMU2GUC BIT(11) 135 + #define GUC_INTR_DOORBELL_RANG BIT(10) 136 + #define GUC_INTR_DMA_DONE BIT(9) 137 + #define GUC_INTR_FATAL_ERROR BIT(8) 138 + #define GUC_INTR_NOTIF_ERROR BIT(7) 139 + #define GUC_INTR_SW_INT_6 BIT(6) 140 + #define GUC_INTR_SW_INT_5 BIT(5) 141 + #define GUC_INTR_SW_INT_4 BIT(4) 142 + #define GUC_INTR_SW_INT_3 BIT(3) 143 + #define GUC_INTR_SW_INT_2 BIT(2) 144 + #define GUC_INTR_SW_INT_1 BIT(1) 145 + #define GUC_INTR_SW_INT_0 BIT(0) 146 + 147 + #endif

+1695

drivers/gpu/drm/xe/xe_guc_submit.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include <linux/bitfield.h> 7 + #include <linux/bitmap.h> 8 + #include <linux/circ_buf.h> 9 + #include <linux/delay.h> 10 + #include <linux/dma-fence-array.h> 11 + 12 + #include <drm/drm_managed.h> 13 + 14 + #include "xe_device.h" 15 + #include "xe_engine.h" 16 + #include "xe_guc.h" 17 + #include "xe_guc_ct.h" 18 + #include "xe_guc_engine_types.h" 19 + #include "xe_guc_submit.h" 20 + #include "xe_gt.h" 21 + #include "xe_force_wake.h" 22 + #include "xe_gpu_scheduler.h" 23 + #include "xe_hw_engine.h" 24 + #include "xe_hw_fence.h" 25 + #include "xe_lrc.h" 26 + #include "xe_macros.h" 27 + #include "xe_map.h" 28 + #include "xe_mocs.h" 29 + #include "xe_ring_ops_types.h" 30 + #include "xe_sched_job.h" 31 + #include "xe_trace.h" 32 + #include "xe_vm.h" 33 + 34 + #include "gt/intel_lrc_reg.h" 35 + 36 + static struct xe_gt * 37 + guc_to_gt(struct xe_guc *guc) 38 + { 39 + return container_of(guc, struct xe_gt, uc.guc); 40 + } 41 + 42 + static struct xe_device * 43 + guc_to_xe(struct xe_guc *guc) 44 + { 45 + return gt_to_xe(guc_to_gt(guc)); 46 + } 47 + 48 + static struct xe_guc * 49 + engine_to_guc(struct xe_engine *e) 50 + { 51 + return &e->gt->uc.guc; 52 + } 53 + 54 + /* 55 + * Helpers for engine state, using an atomic as some of the bits can transition 56 + * as the same time (e.g. a suspend can be happning at the same time as schedule 57 + * engine done being processed). 58 + */ 59 + #define ENGINE_STATE_REGISTERED (1 << 0) 60 + #define ENGINE_STATE_ENABLED (1 << 1) 61 + #define ENGINE_STATE_PENDING_ENABLE (1 << 2) 62 + #define ENGINE_STATE_PENDING_DISABLE (1 << 3) 63 + #define ENGINE_STATE_DESTROYED (1 << 4) 64 + #define ENGINE_STATE_SUSPENDED (1 << 5) 65 + #define ENGINE_STATE_RESET (1 << 6) 66 + #define ENGINE_STATE_KILLED (1 << 7) 67 + 68 + static bool engine_registered(struct xe_engine *e) 69 + { 70 + return atomic_read(&e->guc->state) & ENGINE_STATE_REGISTERED; 71 + } 72 + 73 + static void set_engine_registered(struct xe_engine *e) 74 + { 75 + atomic_or(ENGINE_STATE_REGISTERED, &e->guc->state); 76 + } 77 + 78 + static void clear_engine_registered(struct xe_engine *e) 79 + { 80 + atomic_and(~ENGINE_STATE_REGISTERED, &e->guc->state); 81 + } 82 + 83 + static bool engine_enabled(struct xe_engine *e) 84 + { 85 + return atomic_read(&e->guc->state) & ENGINE_STATE_ENABLED; 86 + } 87 + 88 + static void set_engine_enabled(struct xe_engine *e) 89 + { 90 + atomic_or(ENGINE_STATE_ENABLED, &e->guc->state); 91 + } 92 + 93 + static void clear_engine_enabled(struct xe_engine *e) 94 + { 95 + atomic_and(~ENGINE_STATE_ENABLED, &e->guc->state); 96 + } 97 + 98 + static bool engine_pending_enable(struct xe_engine *e) 99 + { 100 + return atomic_read(&e->guc->state) & ENGINE_STATE_PENDING_ENABLE; 101 + } 102 + 103 + static void set_engine_pending_enable(struct xe_engine *e) 104 + { 105 + atomic_or(ENGINE_STATE_PENDING_ENABLE, &e->guc->state); 106 + } 107 + 108 + static void clear_engine_pending_enable(struct xe_engine *e) 109 + { 110 + atomic_and(~ENGINE_STATE_PENDING_ENABLE, &e->guc->state); 111 + } 112 + 113 + static bool engine_pending_disable(struct xe_engine *e) 114 + { 115 + return atomic_read(&e->guc->state) & ENGINE_STATE_PENDING_DISABLE; 116 + } 117 + 118 + static void set_engine_pending_disable(struct xe_engine *e) 119 + { 120 + atomic_or(ENGINE_STATE_PENDING_DISABLE, &e->guc->state); 121 + } 122 + 123 + static void clear_engine_pending_disable(struct xe_engine *e) 124 + { 125 + atomic_and(~ENGINE_STATE_PENDING_DISABLE, &e->guc->state); 126 + } 127 + 128 + static bool engine_destroyed(struct xe_engine *e) 129 + { 130 + return atomic_read(&e->guc->state) & ENGINE_STATE_DESTROYED; 131 + } 132 + 133 + static void set_engine_destroyed(struct xe_engine *e) 134 + { 135 + atomic_or(ENGINE_STATE_DESTROYED, &e->guc->state); 136 + } 137 + 138 + static bool engine_banned(struct xe_engine *e) 139 + { 140 + return (e->flags & ENGINE_FLAG_BANNED); 141 + } 142 + 143 + static void set_engine_banned(struct xe_engine *e) 144 + { 145 + e->flags |= ENGINE_FLAG_BANNED; 146 + } 147 + 148 + static bool engine_suspended(struct xe_engine *e) 149 + { 150 + return atomic_read(&e->guc->state) & ENGINE_STATE_SUSPENDED; 151 + } 152 + 153 + static void set_engine_suspended(struct xe_engine *e) 154 + { 155 + atomic_or(ENGINE_STATE_SUSPENDED, &e->guc->state); 156 + } 157 + 158 + static void clear_engine_suspended(struct xe_engine *e) 159 + { 160 + atomic_and(~ENGINE_STATE_SUSPENDED, &e->guc->state); 161 + } 162 + 163 + static bool engine_reset(struct xe_engine *e) 164 + { 165 + return atomic_read(&e->guc->state) & ENGINE_STATE_RESET; 166 + } 167 + 168 + static void set_engine_reset(struct xe_engine *e) 169 + { 170 + atomic_or(ENGINE_STATE_RESET, &e->guc->state); 171 + } 172 + 173 + static bool engine_killed(struct xe_engine *e) 174 + { 175 + return atomic_read(&e->guc->state) & ENGINE_STATE_KILLED; 176 + } 177 + 178 + static void set_engine_killed(struct xe_engine *e) 179 + { 180 + atomic_or(ENGINE_STATE_KILLED, &e->guc->state); 181 + } 182 + 183 + static bool engine_killed_or_banned(struct xe_engine *e) 184 + { 185 + return engine_killed(e) || engine_banned(e); 186 + } 187 + 188 + static void guc_submit_fini(struct drm_device *drm, void *arg) 189 + { 190 + struct xe_guc *guc = arg; 191 + 192 + xa_destroy(&guc->submission_state.engine_lookup); 193 + ida_destroy(&guc->submission_state.guc_ids); 194 + bitmap_free(guc->submission_state.guc_ids_bitmap); 195 + } 196 + 197 + #define GUC_ID_MAX 65535 198 + #define GUC_ID_NUMBER_MLRC 4096 199 + #define GUC_ID_NUMBER_SLRC (GUC_ID_MAX - GUC_ID_NUMBER_MLRC) 200 + #define GUC_ID_START_MLRC GUC_ID_NUMBER_SLRC 201 + 202 + static const struct xe_engine_ops guc_engine_ops; 203 + 204 + static void primelockdep(struct xe_guc *guc) 205 + { 206 + if (!IS_ENABLED(CONFIG_LOCKDEP)) 207 + return; 208 + 209 + fs_reclaim_acquire(GFP_KERNEL); 210 + 211 + mutex_lock(&guc->submission_state.lock); 212 + might_lock(&guc->submission_state.suspend.lock); 213 + mutex_unlock(&guc->submission_state.lock); 214 + 215 + fs_reclaim_release(GFP_KERNEL); 216 + } 217 + 218 + int xe_guc_submit_init(struct xe_guc *guc) 219 + { 220 + struct xe_device *xe = guc_to_xe(guc); 221 + struct xe_gt *gt = guc_to_gt(guc); 222 + int err; 223 + 224 + guc->submission_state.guc_ids_bitmap = 225 + bitmap_zalloc(GUC_ID_NUMBER_MLRC, GFP_KERNEL); 226 + if (!guc->submission_state.guc_ids_bitmap) 227 + return -ENOMEM; 228 + 229 + gt->engine_ops = &guc_engine_ops; 230 + 231 + mutex_init(&guc->submission_state.lock); 232 + xa_init(&guc->submission_state.engine_lookup); 233 + ida_init(&guc->submission_state.guc_ids); 234 + 235 + spin_lock_init(&guc->submission_state.suspend.lock); 236 + guc->submission_state.suspend.context = dma_fence_context_alloc(1); 237 + 238 + primelockdep(guc); 239 + 240 + err = drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); 241 + if (err) 242 + return err; 243 + 244 + return 0; 245 + } 246 + 247 + static int alloc_guc_id(struct xe_guc *guc, struct xe_engine *e) 248 + { 249 + int ret; 250 + void *ptr; 251 + 252 + /* 253 + * Must use GFP_NOWAIT as this lock is in the dma fence signalling path, 254 + * worse case user gets -ENOMEM on engine create and has to try again. 255 + * 256 + * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent 257 + * failure. 258 + */ 259 + lockdep_assert_held(&guc->submission_state.lock); 260 + 261 + if (xe_engine_is_parallel(e)) { 262 + void *bitmap = guc->submission_state.guc_ids_bitmap; 263 + 264 + ret = bitmap_find_free_region(bitmap, GUC_ID_NUMBER_MLRC, 265 + order_base_2(e->width)); 266 + } else { 267 + ret = ida_simple_get(&guc->submission_state.guc_ids, 0, 268 + GUC_ID_NUMBER_SLRC, GFP_NOWAIT); 269 + } 270 + if (ret < 0) 271 + return ret; 272 + 273 + e->guc->id = ret; 274 + if (xe_engine_is_parallel(e)) 275 + e->guc->id += GUC_ID_START_MLRC; 276 + 277 + ptr = xa_store(&guc->submission_state.engine_lookup, 278 + e->guc->id, e, GFP_NOWAIT); 279 + if (IS_ERR(ptr)) { 280 + ret = PTR_ERR(ptr); 281 + goto err_release; 282 + } 283 + 284 + return 0; 285 + 286 + err_release: 287 + ida_simple_remove(&guc->submission_state.guc_ids, e->guc->id); 288 + return ret; 289 + } 290 + 291 + static void release_guc_id(struct xe_guc *guc, struct xe_engine *e) 292 + { 293 + mutex_lock(&guc->submission_state.lock); 294 + xa_erase(&guc->submission_state.engine_lookup, e->guc->id); 295 + if (xe_engine_is_parallel(e)) 296 + bitmap_release_region(guc->submission_state.guc_ids_bitmap, 297 + e->guc->id - GUC_ID_START_MLRC, 298 + order_base_2(e->width)); 299 + else 300 + ida_simple_remove(&guc->submission_state.guc_ids, e->guc->id); 301 + mutex_unlock(&guc->submission_state.lock); 302 + } 303 + 304 + struct engine_policy { 305 + u32 count; 306 + struct guc_update_engine_policy h2g; 307 + }; 308 + 309 + static u32 __guc_engine_policy_action_size(struct engine_policy *policy) 310 + { 311 + size_t bytes = sizeof(policy->h2g.header) + 312 + (sizeof(policy->h2g.klv[0]) * policy->count); 313 + 314 + return bytes / sizeof(u32); 315 + } 316 + 317 + static void __guc_engine_policy_start_klv(struct engine_policy *policy, 318 + u16 guc_id) 319 + { 320 + policy->h2g.header.action = 321 + XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; 322 + policy->h2g.header.guc_id = guc_id; 323 + policy->count = 0; 324 + } 325 + 326 + #define MAKE_ENGINE_POLICY_ADD(func, id) \ 327 + static void __guc_engine_policy_add_##func(struct engine_policy *policy, \ 328 + u32 data) \ 329 + { \ 330 + XE_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ 331 + \ 332 + policy->h2g.klv[policy->count].kl = \ 333 + FIELD_PREP(GUC_KLV_0_KEY, \ 334 + GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ 335 + FIELD_PREP(GUC_KLV_0_LEN, 1); \ 336 + policy->h2g.klv[policy->count].value = data; \ 337 + policy->count++; \ 338 + } 339 + 340 + MAKE_ENGINE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) 341 + MAKE_ENGINE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) 342 + MAKE_ENGINE_POLICY_ADD(priority, SCHEDULING_PRIORITY) 343 + #undef MAKE_ENGINE_POLICY_ADD 344 + 345 + static const int xe_engine_prio_to_guc[] = { 346 + [XE_ENGINE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL, 347 + [XE_ENGINE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL, 348 + [XE_ENGINE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH, 349 + [XE_ENGINE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH, 350 + }; 351 + 352 + static void init_policies(struct xe_guc *guc, struct xe_engine *e) 353 + { 354 + struct engine_policy policy; 355 + enum xe_engine_priority prio = e->priority; 356 + u32 timeslice_us = e->sched_props.timeslice_us; 357 + u32 preempt_timeout_us = e->sched_props.preempt_timeout_us; 358 + 359 + XE_BUG_ON(!engine_registered(e)); 360 + 361 + __guc_engine_policy_start_klv(&policy, e->guc->id); 362 + __guc_engine_policy_add_priority(&policy, xe_engine_prio_to_guc[prio]); 363 + __guc_engine_policy_add_execution_quantum(&policy, timeslice_us); 364 + __guc_engine_policy_add_preemption_timeout(&policy, preempt_timeout_us); 365 + 366 + xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 367 + __guc_engine_policy_action_size(&policy), 0, 0); 368 + } 369 + 370 + static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_engine *e) 371 + { 372 + struct engine_policy policy; 373 + 374 + __guc_engine_policy_start_klv(&policy, e->guc->id); 375 + __guc_engine_policy_add_preemption_timeout(&policy, 1); 376 + 377 + xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 378 + __guc_engine_policy_action_size(&policy), 0, 0); 379 + } 380 + 381 + #define PARALLEL_SCRATCH_SIZE 2048 382 + #define WQ_SIZE (PARALLEL_SCRATCH_SIZE / 2) 383 + #define WQ_OFFSET (PARALLEL_SCRATCH_SIZE - WQ_SIZE) 384 + #define CACHELINE_BYTES 64 385 + 386 + struct sync_semaphore { 387 + u32 semaphore; 388 + u8 unused[CACHELINE_BYTES - sizeof(u32)]; 389 + }; 390 + 391 + struct parallel_scratch { 392 + struct guc_sched_wq_desc wq_desc; 393 + 394 + struct sync_semaphore go; 395 + struct sync_semaphore join[XE_HW_ENGINE_MAX_INSTANCE]; 396 + 397 + u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) - 398 + sizeof(struct sync_semaphore) * (XE_HW_ENGINE_MAX_INSTANCE + 1)]; 399 + 400 + u32 wq[WQ_SIZE / sizeof(u32)]; 401 + }; 402 + 403 + #define parallel_read(xe_, map_, field_) \ 404 + xe_map_rd_field(xe_, &map_, 0, struct parallel_scratch, field_) 405 + #define parallel_write(xe_, map_, field_, val_) \ 406 + xe_map_wr_field(xe_, &map_, 0, struct parallel_scratch, field_, val_) 407 + 408 + static void __register_mlrc_engine(struct xe_guc *guc, 409 + struct xe_engine *e, 410 + struct guc_ctxt_registration_info *info) 411 + { 412 + #define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) 413 + u32 action[MAX_MLRC_REG_SIZE]; 414 + int len = 0; 415 + int i; 416 + 417 + XE_BUG_ON(!xe_engine_is_parallel(e)); 418 + 419 + action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 420 + action[len++] = info->flags; 421 + action[len++] = info->context_idx; 422 + action[len++] = info->engine_class; 423 + action[len++] = info->engine_submit_mask; 424 + action[len++] = info->wq_desc_lo; 425 + action[len++] = info->wq_desc_hi; 426 + action[len++] = info->wq_base_lo; 427 + action[len++] = info->wq_base_hi; 428 + action[len++] = info->wq_size; 429 + action[len++] = e->width; 430 + action[len++] = info->hwlrca_lo; 431 + action[len++] = info->hwlrca_hi; 432 + 433 + for (i = 1; i < e->width; ++i) { 434 + struct xe_lrc *lrc = e->lrc + i; 435 + 436 + action[len++] = lower_32_bits(xe_lrc_descriptor(lrc)); 437 + action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); 438 + } 439 + 440 + XE_BUG_ON(len > MAX_MLRC_REG_SIZE); 441 + #undef MAX_MLRC_REG_SIZE 442 + 443 + xe_guc_ct_send(&guc->ct, action, len, 0, 0); 444 + } 445 + 446 + static void __register_engine(struct xe_guc *guc, 447 + struct guc_ctxt_registration_info *info) 448 + { 449 + u32 action[] = { 450 + XE_GUC_ACTION_REGISTER_CONTEXT, 451 + info->flags, 452 + info->context_idx, 453 + info->engine_class, 454 + info->engine_submit_mask, 455 + info->wq_desc_lo, 456 + info->wq_desc_hi, 457 + info->wq_base_lo, 458 + info->wq_base_hi, 459 + info->wq_size, 460 + info->hwlrca_lo, 461 + info->hwlrca_hi, 462 + }; 463 + 464 + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); 465 + } 466 + 467 + static void register_engine(struct xe_engine *e) 468 + { 469 + struct xe_guc *guc = engine_to_guc(e); 470 + struct xe_device *xe = guc_to_xe(guc); 471 + struct xe_lrc *lrc = e->lrc; 472 + struct guc_ctxt_registration_info info; 473 + 474 + XE_BUG_ON(engine_registered(e)); 475 + 476 + memset(&info, 0, sizeof(info)); 477 + info.context_idx = e->guc->id; 478 + info.engine_class = xe_engine_class_to_guc_class(e->class); 479 + info.engine_submit_mask = e->logical_mask; 480 + info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc)); 481 + info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc)); 482 + info.flags = CONTEXT_REGISTRATION_FLAG_KMD; 483 + 484 + if (xe_engine_is_parallel(e)) { 485 + u32 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); 486 + struct iosys_map map = xe_lrc_parallel_map(lrc); 487 + 488 + info.wq_desc_lo = lower_32_bits(ggtt_addr + 489 + offsetof(struct parallel_scratch, wq_desc)); 490 + info.wq_desc_hi = upper_32_bits(ggtt_addr + 491 + offsetof(struct parallel_scratch, wq_desc)); 492 + info.wq_base_lo = lower_32_bits(ggtt_addr + 493 + offsetof(struct parallel_scratch, wq[0])); 494 + info.wq_base_hi = upper_32_bits(ggtt_addr + 495 + offsetof(struct parallel_scratch, wq[0])); 496 + info.wq_size = WQ_SIZE; 497 + 498 + e->guc->wqi_head = 0; 499 + e->guc->wqi_tail = 0; 500 + xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE); 501 + parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE); 502 + } 503 + 504 + set_engine_registered(e); 505 + trace_xe_engine_register(e); 506 + if (xe_engine_is_parallel(e)) 507 + __register_mlrc_engine(guc, e, &info); 508 + else 509 + __register_engine(guc, &info); 510 + init_policies(guc, e); 511 + } 512 + 513 + static u32 wq_space_until_wrap(struct xe_engine *e) 514 + { 515 + return (WQ_SIZE - e->guc->wqi_tail); 516 + } 517 + 518 + static int wq_wait_for_space(struct xe_engine *e, u32 wqi_size) 519 + { 520 + struct xe_guc *guc = engine_to_guc(e); 521 + struct xe_device *xe = guc_to_xe(guc); 522 + struct iosys_map map = xe_lrc_parallel_map(e->lrc); 523 + unsigned int sleep_period_ms = 1; 524 + 525 + #define AVAILABLE_SPACE \ 526 + CIRC_SPACE(e->guc->wqi_tail, e->guc->wqi_head, WQ_SIZE) 527 + if (wqi_size > AVAILABLE_SPACE) { 528 + try_again: 529 + e->guc->wqi_head = parallel_read(xe, map, wq_desc.head); 530 + if (wqi_size > AVAILABLE_SPACE) { 531 + if (sleep_period_ms == 1024) { 532 + xe_gt_reset_async(e->gt); 533 + return -ENODEV; 534 + } 535 + 536 + msleep(sleep_period_ms); 537 + sleep_period_ms <<= 1; 538 + goto try_again; 539 + } 540 + } 541 + #undef AVAILABLE_SPACE 542 + 543 + return 0; 544 + } 545 + 546 + static int wq_noop_append(struct xe_engine *e) 547 + { 548 + struct xe_guc *guc = engine_to_guc(e); 549 + struct xe_device *xe = guc_to_xe(guc); 550 + struct iosys_map map = xe_lrc_parallel_map(e->lrc); 551 + u32 len_dw = wq_space_until_wrap(e) / sizeof(u32) - 1; 552 + 553 + if (wq_wait_for_space(e, wq_space_until_wrap(e))) 554 + return -ENODEV; 555 + 556 + XE_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 557 + 558 + parallel_write(xe, map, wq[e->guc->wqi_tail / sizeof(u32)], 559 + FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 560 + FIELD_PREP(WQ_LEN_MASK, len_dw)); 561 + e->guc->wqi_tail = 0; 562 + 563 + return 0; 564 + } 565 + 566 + static void wq_item_append(struct xe_engine *e) 567 + { 568 + struct xe_guc *guc = engine_to_guc(e); 569 + struct xe_device *xe = guc_to_xe(guc); 570 + struct iosys_map map = xe_lrc_parallel_map(e->lrc); 571 + u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + 3]; 572 + u32 wqi_size = (e->width + 3) * sizeof(u32); 573 + u32 len_dw = (wqi_size / sizeof(u32)) - 1; 574 + int i = 0, j; 575 + 576 + if (wqi_size > wq_space_until_wrap(e)) { 577 + if (wq_noop_append(e)) 578 + return; 579 + } 580 + if (wq_wait_for_space(e, wqi_size)) 581 + return; 582 + 583 + wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 584 + FIELD_PREP(WQ_LEN_MASK, len_dw); 585 + wqi[i++] = xe_lrc_descriptor(e->lrc); 586 + wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, e->guc->id) | 587 + FIELD_PREP(WQ_RING_TAIL_MASK, e->lrc->ring.tail / sizeof(u64)); 588 + wqi[i++] = 0; 589 + for (j = 1; j < e->width; ++j) { 590 + struct xe_lrc *lrc = e->lrc + j; 591 + 592 + wqi[i++] = lrc->ring.tail / sizeof(u64); 593 + } 594 + 595 + XE_BUG_ON(i != wqi_size / sizeof(u32)); 596 + 597 + iosys_map_incr(&map, offsetof(struct parallel_scratch, 598 + wq[e->guc->wqi_tail / sizeof(u32)])); 599 + xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); 600 + e->guc->wqi_tail += wqi_size; 601 + XE_BUG_ON(e->guc->wqi_tail > WQ_SIZE); 602 + 603 + xe_device_wmb(xe); 604 + 605 + map = xe_lrc_parallel_map(e->lrc); 606 + parallel_write(xe, map, wq_desc.tail, e->guc->wqi_tail); 607 + } 608 + 609 + #define RESUME_PENDING ~0x0ull 610 + static void submit_engine(struct xe_engine *e) 611 + { 612 + struct xe_guc *guc = engine_to_guc(e); 613 + struct xe_lrc *lrc = e->lrc; 614 + u32 action[3]; 615 + u32 g2h_len = 0; 616 + u32 num_g2h = 0; 617 + int len = 0; 618 + bool extra_submit = false; 619 + 620 + XE_BUG_ON(!engine_registered(e)); 621 + 622 + if (xe_engine_is_parallel(e)) 623 + wq_item_append(e); 624 + else 625 + xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); 626 + 627 + if (engine_suspended(e) && !xe_engine_is_parallel(e)) 628 + return; 629 + 630 + if (!engine_enabled(e) && !engine_suspended(e)) { 631 + action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 632 + action[len++] = e->guc->id; 633 + action[len++] = GUC_CONTEXT_ENABLE; 634 + g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 635 + num_g2h = 1; 636 + if (xe_engine_is_parallel(e)) 637 + extra_submit = true; 638 + 639 + e->guc->resume_time = RESUME_PENDING; 640 + set_engine_pending_enable(e); 641 + set_engine_enabled(e); 642 + trace_xe_engine_scheduling_enable(e); 643 + } else { 644 + action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 645 + action[len++] = e->guc->id; 646 + trace_xe_engine_submit(e); 647 + } 648 + 649 + xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h); 650 + 651 + if (extra_submit) { 652 + len = 0; 653 + action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 654 + action[len++] = e->guc->id; 655 + trace_xe_engine_submit(e); 656 + 657 + xe_guc_ct_send(&guc->ct, action, len, 0, 0); 658 + } 659 + } 660 + 661 + static struct dma_fence * 662 + guc_engine_run_job(struct drm_sched_job *drm_job) 663 + { 664 + struct xe_sched_job *job = to_xe_sched_job(drm_job); 665 + struct xe_engine *e = job->engine; 666 + 667 + XE_BUG_ON((engine_destroyed(e) || engine_pending_disable(e)) && 668 + !engine_banned(e) && !engine_suspended(e)); 669 + 670 + trace_xe_sched_job_run(job); 671 + 672 + if (!engine_killed_or_banned(e) && !xe_sched_job_is_error(job)) { 673 + if (!engine_registered(e)) 674 + register_engine(e); 675 + e->ring_ops->emit_job(job); 676 + submit_engine(e); 677 + } 678 + 679 + if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags)) 680 + return job->fence; 681 + else 682 + return dma_fence_get(job->fence); 683 + } 684 + 685 + static void guc_engine_free_job(struct drm_sched_job *drm_job) 686 + { 687 + struct xe_sched_job *job = to_xe_sched_job(drm_job); 688 + 689 + trace_xe_sched_job_free(job); 690 + xe_sched_job_put(job); 691 + } 692 + 693 + static int guc_read_stopped(struct xe_guc *guc) 694 + { 695 + return atomic_read(&guc->submission_state.stopped); 696 + } 697 + 698 + #define MAKE_SCHED_CONTEXT_ACTION(e, enable_disable) \ 699 + u32 action[] = { \ 700 + XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \ 701 + e->guc->id, \ 702 + GUC_CONTEXT_##enable_disable, \ 703 + } 704 + 705 + static void disable_scheduling_deregister(struct xe_guc *guc, 706 + struct xe_engine *e) 707 + { 708 + MAKE_SCHED_CONTEXT_ACTION(e, DISABLE); 709 + int ret; 710 + 711 + set_min_preemption_timeout(guc, e); 712 + smp_rmb(); 713 + ret = wait_event_timeout(guc->ct.wq, !engine_pending_enable(e) || 714 + guc_read_stopped(guc), HZ * 5); 715 + if (!ret) { 716 + struct xe_gpu_scheduler *sched = &e->guc->sched; 717 + 718 + XE_WARN_ON("Pending enable failed to respond"); 719 + xe_sched_submission_start(sched); 720 + xe_gt_reset_async(e->gt); 721 + xe_sched_tdr_queue_imm(sched); 722 + return; 723 + } 724 + 725 + clear_engine_enabled(e); 726 + set_engine_pending_disable(e); 727 + set_engine_destroyed(e); 728 + trace_xe_engine_scheduling_disable(e); 729 + 730 + /* 731 + * Reserve space for both G2H here as the 2nd G2H is sent from a G2H 732 + * handler and we are not allowed to reserved G2H space in handlers. 733 + */ 734 + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 735 + G2H_LEN_DW_SCHED_CONTEXT_MODE_SET + 736 + G2H_LEN_DW_DEREGISTER_CONTEXT, 2); 737 + } 738 + 739 + static void guc_engine_print(struct xe_engine *e, struct drm_printer *p); 740 + 741 + #if IS_ENABLED(CONFIG_DRM_XE_SIMPLE_ERROR_CAPTURE) 742 + static void simple_error_capture(struct xe_engine *e) 743 + { 744 + struct xe_guc *guc = engine_to_guc(e); 745 + struct drm_printer p = drm_err_printer(""); 746 + struct xe_hw_engine *hwe; 747 + enum xe_hw_engine_id id; 748 + u32 adj_logical_mask = e->logical_mask; 749 + u32 width_mask = (0x1 << e->width) - 1; 750 + int i; 751 + bool cookie; 752 + 753 + if (e->vm && !e->vm->error_capture.capture_once) { 754 + e->vm->error_capture.capture_once = true; 755 + cookie = dma_fence_begin_signalling(); 756 + for (i = 0; e->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) { 757 + if (adj_logical_mask & BIT(i)) { 758 + adj_logical_mask |= width_mask << i; 759 + i += e->width; 760 + } else { 761 + ++i; 762 + } 763 + } 764 + 765 + xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); 766 + xe_guc_ct_print(&guc->ct, &p); 767 + guc_engine_print(e, &p); 768 + for_each_hw_engine(hwe, guc_to_gt(guc), id) { 769 + if (hwe->class != e->hwe->class || 770 + !(BIT(hwe->logical_instance) & adj_logical_mask)) 771 + continue; 772 + xe_hw_engine_print_state(hwe, &p); 773 + } 774 + xe_analyze_vm(&p, e->vm, e->gt->info.id); 775 + xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); 776 + dma_fence_end_signalling(cookie); 777 + } 778 + } 779 + #else 780 + static void simple_error_capture(struct xe_engine *e) 781 + { 782 + } 783 + #endif 784 + 785 + static enum drm_gpu_sched_stat 786 + guc_engine_timedout_job(struct drm_sched_job *drm_job) 787 + { 788 + struct xe_sched_job *job = to_xe_sched_job(drm_job); 789 + struct xe_sched_job *tmp_job; 790 + struct xe_engine *e = job->engine; 791 + struct xe_gpu_scheduler *sched = &e->guc->sched; 792 + struct xe_device *xe = guc_to_xe(engine_to_guc(e)); 793 + int err = -ETIME; 794 + int i = 0; 795 + 796 + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) { 797 + XE_WARN_ON(e->flags & ENGINE_FLAG_KERNEL); 798 + XE_WARN_ON(e->flags & ENGINE_FLAG_VM && !engine_killed(e)); 799 + 800 + drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx", 801 + xe_sched_job_seqno(job), e->guc->id, e->flags); 802 + simple_error_capture(e); 803 + } else { 804 + drm_dbg(&xe->drm, "Timedout signaled job: seqno=%u, guc_id=%d, flags=0x%lx", 805 + xe_sched_job_seqno(job), e->guc->id, e->flags); 806 + } 807 + trace_xe_sched_job_timedout(job); 808 + 809 + /* Kill the run_job entry point */ 810 + xe_sched_submission_stop(sched); 811 + 812 + /* 813 + * Kernel jobs should never fail, nor should VM jobs if they do 814 + * somethings has gone wrong and the GT needs a reset 815 + */ 816 + if (e->flags & ENGINE_FLAG_KERNEL || 817 + (e->flags & ENGINE_FLAG_VM && !engine_killed(e))) { 818 + if (!xe_sched_invalidate_job(job, 2)) { 819 + xe_sched_add_pending_job(sched, job); 820 + xe_sched_submission_start(sched); 821 + xe_gt_reset_async(e->gt); 822 + goto out; 823 + } 824 + } 825 + 826 + /* Engine state now stable, disable scheduling if needed */ 827 + if (engine_enabled(e)) { 828 + struct xe_guc *guc = engine_to_guc(e); 829 + int ret; 830 + 831 + if (engine_reset(e)) 832 + err = -EIO; 833 + set_engine_banned(e); 834 + xe_engine_get(e); 835 + disable_scheduling_deregister(engine_to_guc(e), e); 836 + 837 + /* 838 + * Must wait for scheduling to be disabled before signalling 839 + * any fences, if GT broken the GT reset code should signal us. 840 + * 841 + * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault 842 + * error) messages which can cause the schedule disable to get 843 + * lost. If this occurs, trigger a GT reset to recover. 844 + */ 845 + smp_rmb(); 846 + ret = wait_event_timeout(guc->ct.wq, 847 + !engine_pending_disable(e) || 848 + guc_read_stopped(guc), HZ * 5); 849 + if (!ret) { 850 + XE_WARN_ON("Schedule disable failed to respond"); 851 + xe_sched_add_pending_job(sched, job); 852 + xe_sched_submission_start(sched); 853 + xe_gt_reset_async(e->gt); 854 + xe_sched_tdr_queue_imm(sched); 855 + goto out; 856 + } 857 + } 858 + 859 + /* Stop fence signaling */ 860 + xe_hw_fence_irq_stop(e->fence_irq); 861 + 862 + /* 863 + * Fence state now stable, stop / start scheduler which cleans up any 864 + * fences that are complete 865 + */ 866 + xe_sched_add_pending_job(sched, job); 867 + xe_sched_submission_start(sched); 868 + xe_sched_tdr_queue_imm(&e->guc->sched); 869 + 870 + /* Mark all outstanding jobs as bad, thus completing them */ 871 + spin_lock(&sched->base.job_list_lock); 872 + list_for_each_entry(tmp_job, &sched->base.pending_list, drm.list) 873 + xe_sched_job_set_error(tmp_job, !i++ ? err : -ECANCELED); 874 + spin_unlock(&sched->base.job_list_lock); 875 + 876 + /* Start fence signaling */ 877 + xe_hw_fence_irq_start(e->fence_irq); 878 + 879 + out: 880 + return DRM_GPU_SCHED_STAT_NOMINAL; 881 + } 882 + 883 + static void __guc_engine_fini_async(struct work_struct *w) 884 + { 885 + struct xe_guc_engine *ge = 886 + container_of(w, struct xe_guc_engine, fini_async); 887 + struct xe_engine *e = ge->engine; 888 + struct xe_guc *guc = engine_to_guc(e); 889 + 890 + trace_xe_engine_destroy(e); 891 + 892 + if (e->flags & ENGINE_FLAG_PERSISTENT) 893 + xe_device_remove_persitent_engines(gt_to_xe(e->gt), e); 894 + release_guc_id(guc, e); 895 + xe_sched_entity_fini(&ge->entity); 896 + xe_sched_fini(&ge->sched); 897 + 898 + if (!(e->flags & ENGINE_FLAG_KERNEL)) { 899 + kfree(ge); 900 + xe_engine_fini(e); 901 + } 902 + } 903 + 904 + static void guc_engine_fini_async(struct xe_engine *e) 905 + { 906 + bool kernel = e->flags & ENGINE_FLAG_KERNEL; 907 + 908 + INIT_WORK(&e->guc->fini_async, __guc_engine_fini_async); 909 + queue_work(system_unbound_wq, &e->guc->fini_async); 910 + 911 + /* We must block on kernel engines so slabs are empty on driver unload */ 912 + if (kernel) { 913 + struct xe_guc_engine *ge = e->guc; 914 + 915 + flush_work(&ge->fini_async); 916 + kfree(ge); 917 + xe_engine_fini(e); 918 + } 919 + } 920 + 921 + static void __guc_engine_fini(struct xe_guc *guc, struct xe_engine *e) 922 + { 923 + /* 924 + * Might be done from within the GPU scheduler, need to do async as we 925 + * fini the scheduler when the engine is fini'd, the scheduler can't 926 + * complete fini within itself (circular dependency). Async resolves 927 + * this we and don't really care when everything is fini'd, just that it 928 + * is. 929 + */ 930 + guc_engine_fini_async(e); 931 + } 932 + 933 + static void __guc_engine_process_msg_cleanup(struct xe_sched_msg *msg) 934 + { 935 + struct xe_engine *e = msg->private_data; 936 + struct xe_guc *guc = engine_to_guc(e); 937 + 938 + XE_BUG_ON(e->flags & ENGINE_FLAG_KERNEL); 939 + trace_xe_engine_cleanup_entity(e); 940 + 941 + if (engine_registered(e)) 942 + disable_scheduling_deregister(guc, e); 943 + else 944 + __guc_engine_fini(guc, e); 945 + } 946 + 947 + static bool guc_engine_allowed_to_change_state(struct xe_engine *e) 948 + { 949 + return !engine_killed_or_banned(e) && engine_registered(e); 950 + } 951 + 952 + static void __guc_engine_process_msg_set_sched_props(struct xe_sched_msg *msg) 953 + { 954 + struct xe_engine *e = msg->private_data; 955 + struct xe_guc *guc = engine_to_guc(e); 956 + 957 + if (guc_engine_allowed_to_change_state(e)) 958 + init_policies(guc, e); 959 + kfree(msg); 960 + } 961 + 962 + static void suspend_fence_signal(struct xe_engine *e) 963 + { 964 + struct xe_guc *guc = engine_to_guc(e); 965 + 966 + XE_BUG_ON(!engine_suspended(e) && !engine_killed(e) && 967 + !guc_read_stopped(guc)); 968 + XE_BUG_ON(!e->guc->suspend_pending); 969 + 970 + e->guc->suspend_pending = false; 971 + smp_wmb(); 972 + wake_up(&e->guc->suspend_wait); 973 + } 974 + 975 + static void __guc_engine_process_msg_suspend(struct xe_sched_msg *msg) 976 + { 977 + struct xe_engine *e = msg->private_data; 978 + struct xe_guc *guc = engine_to_guc(e); 979 + 980 + if (guc_engine_allowed_to_change_state(e) && !engine_suspended(e) && 981 + engine_enabled(e)) { 982 + wait_event(guc->ct.wq, e->guc->resume_time != RESUME_PENDING || 983 + guc_read_stopped(guc)); 984 + 985 + if (!guc_read_stopped(guc)) { 986 + MAKE_SCHED_CONTEXT_ACTION(e, DISABLE); 987 + s64 since_resume_ms = 988 + ktime_ms_delta(ktime_get(), 989 + e->guc->resume_time); 990 + s64 wait_ms = e->vm->preempt.min_run_period_ms - 991 + since_resume_ms; 992 + 993 + if (wait_ms > 0 && e->guc->resume_time) 994 + msleep(wait_ms); 995 + 996 + set_engine_suspended(e); 997 + clear_engine_enabled(e); 998 + set_engine_pending_disable(e); 999 + trace_xe_engine_scheduling_disable(e); 1000 + 1001 + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1002 + G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1003 + } 1004 + } else if (e->guc->suspend_pending) { 1005 + set_engine_suspended(e); 1006 + suspend_fence_signal(e); 1007 + } 1008 + } 1009 + 1010 + static void __guc_engine_process_msg_resume(struct xe_sched_msg *msg) 1011 + { 1012 + struct xe_engine *e = msg->private_data; 1013 + struct xe_guc *guc = engine_to_guc(e); 1014 + 1015 + if (guc_engine_allowed_to_change_state(e)) { 1016 + MAKE_SCHED_CONTEXT_ACTION(e, ENABLE); 1017 + 1018 + e->guc->resume_time = RESUME_PENDING; 1019 + clear_engine_suspended(e); 1020 + set_engine_pending_enable(e); 1021 + set_engine_enabled(e); 1022 + trace_xe_engine_scheduling_enable(e); 1023 + 1024 + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1025 + G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1026 + } else { 1027 + clear_engine_suspended(e); 1028 + } 1029 + } 1030 + 1031 + #define CLEANUP 1 /* Non-zero values to catch uninitialized msg */ 1032 + #define SET_SCHED_PROPS 2 1033 + #define SUSPEND 3 1034 + #define RESUME 4 1035 + 1036 + static void guc_engine_process_msg(struct xe_sched_msg *msg) 1037 + { 1038 + trace_xe_sched_msg_recv(msg); 1039 + 1040 + switch (msg->opcode) { 1041 + case CLEANUP: 1042 + __guc_engine_process_msg_cleanup(msg); 1043 + break; 1044 + case SET_SCHED_PROPS: 1045 + __guc_engine_process_msg_set_sched_props(msg); 1046 + break; 1047 + case SUSPEND: 1048 + __guc_engine_process_msg_suspend(msg); 1049 + break; 1050 + case RESUME: 1051 + __guc_engine_process_msg_resume(msg); 1052 + break; 1053 + default: 1054 + XE_BUG_ON("Unknown message type"); 1055 + } 1056 + } 1057 + 1058 + static const struct drm_sched_backend_ops drm_sched_ops = { 1059 + .run_job = guc_engine_run_job, 1060 + .free_job = guc_engine_free_job, 1061 + .timedout_job = guc_engine_timedout_job, 1062 + }; 1063 + 1064 + static const struct xe_sched_backend_ops xe_sched_ops = { 1065 + .process_msg = guc_engine_process_msg, 1066 + }; 1067 + 1068 + static int guc_engine_init(struct xe_engine *e) 1069 + { 1070 + struct xe_gpu_scheduler *sched; 1071 + struct xe_guc *guc = engine_to_guc(e); 1072 + struct xe_guc_engine *ge; 1073 + long timeout; 1074 + int err; 1075 + 1076 + XE_BUG_ON(!xe_device_guc_submission_enabled(guc_to_xe(guc))); 1077 + 1078 + ge = kzalloc(sizeof(*ge), GFP_KERNEL); 1079 + if (!ge) 1080 + return -ENOMEM; 1081 + 1082 + e->guc = ge; 1083 + ge->engine = e; 1084 + init_waitqueue_head(&ge->suspend_wait); 1085 + 1086 + timeout = xe_vm_no_dma_fences(e->vm) ? MAX_SCHEDULE_TIMEOUT : HZ * 5; 1087 + err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, NULL, 1088 + e->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, 1089 + 64, timeout, guc_to_gt(guc)->ordered_wq, NULL, 1090 + e->name, gt_to_xe(e->gt)->drm.dev); 1091 + if (err) 1092 + goto err_free; 1093 + 1094 + sched = &ge->sched; 1095 + err = xe_sched_entity_init(&ge->entity, sched); 1096 + if (err) 1097 + goto err_sched; 1098 + e->priority = XE_ENGINE_PRIORITY_NORMAL; 1099 + 1100 + mutex_lock(&guc->submission_state.lock); 1101 + 1102 + err = alloc_guc_id(guc, e); 1103 + if (err) 1104 + goto err_entity; 1105 + 1106 + e->entity = &ge->entity; 1107 + 1108 + if (guc_read_stopped(guc)) 1109 + xe_sched_stop(sched); 1110 + 1111 + mutex_unlock(&guc->submission_state.lock); 1112 + 1113 + switch (e->class) { 1114 + case XE_ENGINE_CLASS_RENDER: 1115 + sprintf(e->name, "rcs%d", e->guc->id); 1116 + break; 1117 + case XE_ENGINE_CLASS_VIDEO_DECODE: 1118 + sprintf(e->name, "vcs%d", e->guc->id); 1119 + break; 1120 + case XE_ENGINE_CLASS_VIDEO_ENHANCE: 1121 + sprintf(e->name, "vecs%d", e->guc->id); 1122 + break; 1123 + case XE_ENGINE_CLASS_COPY: 1124 + sprintf(e->name, "bcs%d", e->guc->id); 1125 + break; 1126 + case XE_ENGINE_CLASS_COMPUTE: 1127 + sprintf(e->name, "ccs%d", e->guc->id); 1128 + break; 1129 + default: 1130 + XE_WARN_ON(e->class); 1131 + } 1132 + 1133 + trace_xe_engine_create(e); 1134 + 1135 + return 0; 1136 + 1137 + err_entity: 1138 + xe_sched_entity_fini(&ge->entity); 1139 + err_sched: 1140 + xe_sched_fini(&ge->sched); 1141 + err_free: 1142 + kfree(ge); 1143 + 1144 + return err; 1145 + } 1146 + 1147 + static void guc_engine_kill(struct xe_engine *e) 1148 + { 1149 + trace_xe_engine_kill(e); 1150 + set_engine_killed(e); 1151 + xe_sched_tdr_queue_imm(&e->guc->sched); 1152 + } 1153 + 1154 + static void guc_engine_add_msg(struct xe_engine *e, struct xe_sched_msg *msg, 1155 + u32 opcode) 1156 + { 1157 + INIT_LIST_HEAD(&msg->link); 1158 + msg->opcode = opcode; 1159 + msg->private_data = e; 1160 + 1161 + trace_xe_sched_msg_add(msg); 1162 + xe_sched_add_msg(&e->guc->sched, msg); 1163 + } 1164 + 1165 + #define STATIC_MSG_CLEANUP 0 1166 + #define STATIC_MSG_SUSPEND 1 1167 + #define STATIC_MSG_RESUME 2 1168 + static void guc_engine_fini(struct xe_engine *e) 1169 + { 1170 + struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_CLEANUP; 1171 + 1172 + if (!(e->flags & ENGINE_FLAG_KERNEL)) 1173 + guc_engine_add_msg(e, msg, CLEANUP); 1174 + else 1175 + __guc_engine_fini(engine_to_guc(e), e); 1176 + } 1177 + 1178 + static int guc_engine_set_priority(struct xe_engine *e, 1179 + enum xe_engine_priority priority) 1180 + { 1181 + struct xe_sched_msg *msg; 1182 + 1183 + if (e->priority == priority || engine_killed_or_banned(e)) 1184 + return 0; 1185 + 1186 + msg = kmalloc(sizeof(*msg), GFP_KERNEL); 1187 + if (!msg) 1188 + return -ENOMEM; 1189 + 1190 + guc_engine_add_msg(e, msg, SET_SCHED_PROPS); 1191 + e->priority = priority; 1192 + 1193 + return 0; 1194 + } 1195 + 1196 + static int guc_engine_set_timeslice(struct xe_engine *e, u32 timeslice_us) 1197 + { 1198 + struct xe_sched_msg *msg; 1199 + 1200 + if (e->sched_props.timeslice_us == timeslice_us || 1201 + engine_killed_or_banned(e)) 1202 + return 0; 1203 + 1204 + msg = kmalloc(sizeof(*msg), GFP_KERNEL); 1205 + if (!msg) 1206 + return -ENOMEM; 1207 + 1208 + e->sched_props.timeslice_us = timeslice_us; 1209 + guc_engine_add_msg(e, msg, SET_SCHED_PROPS); 1210 + 1211 + return 0; 1212 + } 1213 + 1214 + static int guc_engine_set_preempt_timeout(struct xe_engine *e, 1215 + u32 preempt_timeout_us) 1216 + { 1217 + struct xe_sched_msg *msg; 1218 + 1219 + if (e->sched_props.preempt_timeout_us == preempt_timeout_us || 1220 + engine_killed_or_banned(e)) 1221 + return 0; 1222 + 1223 + msg = kmalloc(sizeof(*msg), GFP_KERNEL); 1224 + if (!msg) 1225 + return -ENOMEM; 1226 + 1227 + e->sched_props.preempt_timeout_us = preempt_timeout_us; 1228 + guc_engine_add_msg(e, msg, SET_SCHED_PROPS); 1229 + 1230 + return 0; 1231 + } 1232 + 1233 + static int guc_engine_set_job_timeout(struct xe_engine *e, u32 job_timeout_ms) 1234 + { 1235 + struct xe_gpu_scheduler *sched = &e->guc->sched; 1236 + 1237 + XE_BUG_ON(engine_registered(e)); 1238 + XE_BUG_ON(engine_banned(e)); 1239 + XE_BUG_ON(engine_killed(e)); 1240 + 1241 + sched->base.timeout = job_timeout_ms; 1242 + 1243 + return 0; 1244 + } 1245 + 1246 + static int guc_engine_suspend(struct xe_engine *e) 1247 + { 1248 + struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_SUSPEND; 1249 + 1250 + if (engine_killed_or_banned(e) || e->guc->suspend_pending) 1251 + return -EINVAL; 1252 + 1253 + e->guc->suspend_pending = true; 1254 + guc_engine_add_msg(e, msg, SUSPEND); 1255 + 1256 + return 0; 1257 + } 1258 + 1259 + static void guc_engine_suspend_wait(struct xe_engine *e) 1260 + { 1261 + struct xe_guc *guc = engine_to_guc(e); 1262 + 1263 + wait_event(e->guc->suspend_wait, !e->guc->suspend_pending || 1264 + guc_read_stopped(guc)); 1265 + } 1266 + 1267 + static void guc_engine_resume(struct xe_engine *e) 1268 + { 1269 + struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_RESUME; 1270 + 1271 + XE_BUG_ON(e->guc->suspend_pending); 1272 + 1273 + xe_mocs_init_engine(e); 1274 + guc_engine_add_msg(e, msg, RESUME); 1275 + } 1276 + 1277 + /* 1278 + * All of these functions are an abstraction layer which other parts of XE can 1279 + * use to trap into the GuC backend. All of these functions, aside from init, 1280 + * really shouldn't do much other than trap into the DRM scheduler which 1281 + * synchronizes these operations. 1282 + */ 1283 + static const struct xe_engine_ops guc_engine_ops = { 1284 + .init = guc_engine_init, 1285 + .kill = guc_engine_kill, 1286 + .fini = guc_engine_fini, 1287 + .set_priority = guc_engine_set_priority, 1288 + .set_timeslice = guc_engine_set_timeslice, 1289 + .set_preempt_timeout = guc_engine_set_preempt_timeout, 1290 + .set_job_timeout = guc_engine_set_job_timeout, 1291 + .suspend = guc_engine_suspend, 1292 + .suspend_wait = guc_engine_suspend_wait, 1293 + .resume = guc_engine_resume, 1294 + }; 1295 + 1296 + static void guc_engine_stop(struct xe_guc *guc, struct xe_engine *e) 1297 + { 1298 + struct xe_gpu_scheduler *sched = &e->guc->sched; 1299 + 1300 + /* Stop scheduling + flush any DRM scheduler operations */ 1301 + xe_sched_submission_stop(sched); 1302 + 1303 + /* Clean up lost G2H + reset engine state */ 1304 + if (engine_destroyed(e) && engine_registered(e)) { 1305 + if (engine_banned(e)) 1306 + xe_engine_put(e); 1307 + else 1308 + __guc_engine_fini(guc, e); 1309 + } 1310 + if (e->guc->suspend_pending) { 1311 + set_engine_suspended(e); 1312 + suspend_fence_signal(e); 1313 + } 1314 + atomic_and(ENGINE_STATE_DESTROYED | ENGINE_STATE_SUSPENDED, 1315 + &e->guc->state); 1316 + e->guc->resume_time = 0; 1317 + trace_xe_engine_stop(e); 1318 + 1319 + /* 1320 + * Ban any engine (aside from kernel and engines used for VM ops) with a 1321 + * started but not complete job or if a job has gone through a GT reset 1322 + * more than twice. 1323 + */ 1324 + if (!(e->flags & (ENGINE_FLAG_KERNEL | ENGINE_FLAG_VM))) { 1325 + struct xe_sched_job *job = xe_sched_first_pending_job(sched); 1326 + 1327 + if (job) { 1328 + if ((xe_sched_job_started(job) && 1329 + !xe_sched_job_completed(job)) || 1330 + xe_sched_invalidate_job(job, 2)) { 1331 + trace_xe_sched_job_ban(job); 1332 + xe_sched_tdr_queue_imm(&e->guc->sched); 1333 + set_engine_banned(e); 1334 + } 1335 + } 1336 + } 1337 + } 1338 + 1339 + int xe_guc_submit_reset_prepare(struct xe_guc *guc) 1340 + { 1341 + int ret; 1342 + 1343 + /* 1344 + * Using an atomic here rather than submission_state.lock as this 1345 + * function can be called while holding the CT lock (engine reset 1346 + * failure). submission_state.lock needs the CT lock to resubmit jobs. 1347 + * Atomic is not ideal, but it works to prevent against concurrent reset 1348 + * and releasing any TDRs waiting on guc->submission_state.stopped. 1349 + */ 1350 + ret = atomic_fetch_or(1, &guc->submission_state.stopped); 1351 + smp_wmb(); 1352 + wake_up_all(&guc->ct.wq); 1353 + 1354 + return ret; 1355 + } 1356 + 1357 + void xe_guc_submit_reset_wait(struct xe_guc *guc) 1358 + { 1359 + wait_event(guc->ct.wq, !guc_read_stopped(guc)); 1360 + } 1361 + 1362 + int xe_guc_submit_stop(struct xe_guc *guc) 1363 + { 1364 + struct xe_engine *e; 1365 + unsigned long index; 1366 + 1367 + XE_BUG_ON(guc_read_stopped(guc) != 1); 1368 + 1369 + mutex_lock(&guc->submission_state.lock); 1370 + 1371 + xa_for_each(&guc->submission_state.engine_lookup, index, e) 1372 + guc_engine_stop(guc, e); 1373 + 1374 + mutex_unlock(&guc->submission_state.lock); 1375 + 1376 + /* 1377 + * No one can enter the backend at this point, aside from new engine 1378 + * creation which is protected by guc->submission_state.lock. 1379 + */ 1380 + 1381 + return 0; 1382 + } 1383 + 1384 + static void guc_engine_start(struct xe_engine *e) 1385 + { 1386 + struct xe_gpu_scheduler *sched = &e->guc->sched; 1387 + 1388 + if (!engine_killed_or_banned(e)) { 1389 + int i; 1390 + 1391 + trace_xe_engine_resubmit(e); 1392 + for (i = 0; i < e->width; ++i) 1393 + xe_lrc_set_ring_head(e->lrc + i, e->lrc[i].ring.tail); 1394 + xe_sched_resubmit_jobs(sched); 1395 + } 1396 + 1397 + xe_sched_submission_start(sched); 1398 + } 1399 + 1400 + int xe_guc_submit_start(struct xe_guc *guc) 1401 + { 1402 + struct xe_engine *e; 1403 + unsigned long index; 1404 + 1405 + XE_BUG_ON(guc_read_stopped(guc) != 1); 1406 + 1407 + mutex_lock(&guc->submission_state.lock); 1408 + atomic_dec(&guc->submission_state.stopped); 1409 + xa_for_each(&guc->submission_state.engine_lookup, index, e) 1410 + guc_engine_start(e); 1411 + mutex_unlock(&guc->submission_state.lock); 1412 + 1413 + wake_up_all(&guc->ct.wq); 1414 + 1415 + return 0; 1416 + } 1417 + 1418 + static struct xe_engine * 1419 + g2h_engine_lookup(struct xe_guc *guc, u32 guc_id) 1420 + { 1421 + struct xe_device *xe = guc_to_xe(guc); 1422 + struct xe_engine *e; 1423 + 1424 + if (unlikely(guc_id >= GUC_ID_MAX)) { 1425 + drm_err(&xe->drm, "Invalid guc_id %u", guc_id); 1426 + return NULL; 1427 + } 1428 + 1429 + e = xa_load(&guc->submission_state.engine_lookup, guc_id); 1430 + if (unlikely(!e)) { 1431 + drm_err(&xe->drm, "Not engine present for guc_id %u", guc_id); 1432 + return NULL; 1433 + } 1434 + 1435 + XE_BUG_ON(e->guc->id != guc_id); 1436 + 1437 + return e; 1438 + } 1439 + 1440 + static void deregister_engine(struct xe_guc *guc, struct xe_engine *e) 1441 + { 1442 + u32 action[] = { 1443 + XE_GUC_ACTION_DEREGISTER_CONTEXT, 1444 + e->guc->id, 1445 + }; 1446 + 1447 + trace_xe_engine_deregister(e); 1448 + 1449 + xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action)); 1450 + } 1451 + 1452 + int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 1453 + { 1454 + struct xe_device *xe = guc_to_xe(guc); 1455 + struct xe_engine *e; 1456 + u32 guc_id = msg[0]; 1457 + 1458 + if (unlikely(len < 2)) { 1459 + drm_err(&xe->drm, "Invalid length %u", len); 1460 + return -EPROTO; 1461 + } 1462 + 1463 + e = g2h_engine_lookup(guc, guc_id); 1464 + if (unlikely(!e)) 1465 + return -EPROTO; 1466 + 1467 + if (unlikely(!engine_pending_enable(e) && 1468 + !engine_pending_disable(e))) { 1469 + drm_err(&xe->drm, "Unexpected engine state 0x%04x", 1470 + atomic_read(&e->guc->state)); 1471 + return -EPROTO; 1472 + } 1473 + 1474 + trace_xe_engine_scheduling_done(e); 1475 + 1476 + if (engine_pending_enable(e)) { 1477 + e->guc->resume_time = ktime_get(); 1478 + clear_engine_pending_enable(e); 1479 + smp_wmb(); 1480 + wake_up_all(&guc->ct.wq); 1481 + } else { 1482 + clear_engine_pending_disable(e); 1483 + if (e->guc->suspend_pending) { 1484 + suspend_fence_signal(e); 1485 + } else { 1486 + if (engine_banned(e)) { 1487 + smp_wmb(); 1488 + wake_up_all(&guc->ct.wq); 1489 + } 1490 + deregister_engine(guc, e); 1491 + } 1492 + } 1493 + 1494 + return 0; 1495 + } 1496 + 1497 + int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 1498 + { 1499 + struct xe_device *xe = guc_to_xe(guc); 1500 + struct xe_engine *e; 1501 + u32 guc_id = msg[0]; 1502 + 1503 + if (unlikely(len < 1)) { 1504 + drm_err(&xe->drm, "Invalid length %u", len); 1505 + return -EPROTO; 1506 + } 1507 + 1508 + e = g2h_engine_lookup(guc, guc_id); 1509 + if (unlikely(!e)) 1510 + return -EPROTO; 1511 + 1512 + if (!engine_destroyed(e) || engine_pending_disable(e) || 1513 + engine_pending_enable(e) || engine_enabled(e)) { 1514 + drm_err(&xe->drm, "Unexpected engine state 0x%04x", 1515 + atomic_read(&e->guc->state)); 1516 + return -EPROTO; 1517 + } 1518 + 1519 + trace_xe_engine_deregister_done(e); 1520 + 1521 + clear_engine_registered(e); 1522 + if (engine_banned(e)) 1523 + xe_engine_put(e); 1524 + else 1525 + __guc_engine_fini(guc, e); 1526 + 1527 + return 0; 1528 + } 1529 + 1530 + int xe_guc_engine_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) 1531 + { 1532 + struct xe_device *xe = guc_to_xe(guc); 1533 + struct xe_engine *e; 1534 + u32 guc_id = msg[0]; 1535 + 1536 + if (unlikely(len < 1)) { 1537 + drm_err(&xe->drm, "Invalid length %u", len); 1538 + return -EPROTO; 1539 + } 1540 + 1541 + e = g2h_engine_lookup(guc, guc_id); 1542 + if (unlikely(!e)) 1543 + return -EPROTO; 1544 + 1545 + drm_info(&xe->drm, "Engine reset: guc_id=%d", guc_id); 1546 + 1547 + /* FIXME: Do error capture, most likely async */ 1548 + 1549 + trace_xe_engine_reset(e); 1550 + 1551 + /* 1552 + * A banned engine is a NOP at this point (came from 1553 + * guc_engine_timedout_job). Otherwise, kick drm scheduler to cancel 1554 + * jobs by setting timeout of the job to the minimum value kicking 1555 + * guc_engine_timedout_job. 1556 + */ 1557 + set_engine_reset(e); 1558 + if (!engine_banned(e)) 1559 + xe_sched_tdr_queue_imm(&e->guc->sched); 1560 + 1561 + return 0; 1562 + } 1563 + 1564 + int xe_guc_engine_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, 1565 + u32 len) 1566 + { 1567 + struct xe_device *xe = guc_to_xe(guc); 1568 + struct xe_engine *e; 1569 + u32 guc_id = msg[0]; 1570 + 1571 + if (unlikely(len < 1)) { 1572 + drm_err(&xe->drm, "Invalid length %u", len); 1573 + return -EPROTO; 1574 + } 1575 + 1576 + e = g2h_engine_lookup(guc, guc_id); 1577 + if (unlikely(!e)) 1578 + return -EPROTO; 1579 + 1580 + drm_warn(&xe->drm, "Engine memory cat error: guc_id=%d", guc_id); 1581 + trace_xe_engine_memory_cat_error(e); 1582 + 1583 + /* Treat the same as engine reset */ 1584 + set_engine_reset(e); 1585 + if (!engine_banned(e)) 1586 + xe_sched_tdr_queue_imm(&e->guc->sched); 1587 + 1588 + return 0; 1589 + } 1590 + 1591 + int xe_guc_engine_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) 1592 + { 1593 + struct xe_device *xe = guc_to_xe(guc); 1594 + u8 guc_class, instance; 1595 + u32 reason; 1596 + 1597 + if (unlikely(len != 3)) { 1598 + drm_err(&xe->drm, "Invalid length %u", len); 1599 + return -EPROTO; 1600 + } 1601 + 1602 + guc_class = msg[0]; 1603 + instance = msg[1]; 1604 + reason = msg[2]; 1605 + 1606 + /* Unexpected failure of a hardware feature, log an actual error */ 1607 + drm_err(&xe->drm, "GuC engine reset request failed on %d:%d because 0x%08X", 1608 + guc_class, instance, reason); 1609 + 1610 + xe_gt_reset_async(guc_to_gt(guc)); 1611 + 1612 + return 0; 1613 + } 1614 + 1615 + static void guc_engine_wq_print(struct xe_engine *e, struct drm_printer *p) 1616 + { 1617 + struct xe_guc *guc = engine_to_guc(e); 1618 + struct xe_device *xe = guc_to_xe(guc); 1619 + struct iosys_map map = xe_lrc_parallel_map(e->lrc); 1620 + int i; 1621 + 1622 + drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n", 1623 + e->guc->wqi_head, parallel_read(xe, map, wq_desc.head)); 1624 + drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n", 1625 + e->guc->wqi_tail, parallel_read(xe, map, wq_desc.tail)); 1626 + drm_printf(p, "\tWQ status: %u\n", 1627 + parallel_read(xe, map, wq_desc.wq_status)); 1628 + if (parallel_read(xe, map, wq_desc.head) != 1629 + parallel_read(xe, map, wq_desc.tail)) { 1630 + for (i = parallel_read(xe, map, wq_desc.head); 1631 + i != parallel_read(xe, map, wq_desc.tail); 1632 + i = (i + sizeof(u32)) % WQ_SIZE) 1633 + drm_printf(p, "\tWQ[%ld]: 0x%08x\n", i / sizeof(u32), 1634 + parallel_read(xe, map, wq[i / sizeof(u32)])); 1635 + } 1636 + } 1637 + 1638 + static void guc_engine_print(struct xe_engine *e, struct drm_printer *p) 1639 + { 1640 + struct xe_gpu_scheduler *sched = &e->guc->sched; 1641 + struct xe_sched_job *job; 1642 + int i; 1643 + 1644 + drm_printf(p, "\nGuC ID: %d\n", e->guc->id); 1645 + drm_printf(p, "\tName: %s\n", e->name); 1646 + drm_printf(p, "\tClass: %d\n", e->class); 1647 + drm_printf(p, "\tLogical mask: 0x%x\n", e->logical_mask); 1648 + drm_printf(p, "\tWidth: %d\n", e->width); 1649 + drm_printf(p, "\tRef: %d\n", kref_read(&e->refcount)); 1650 + drm_printf(p, "\tTimeout: %ld (ms)\n", sched->base.timeout); 1651 + drm_printf(p, "\tTimeslice: %u (us)\n", e->sched_props.timeslice_us); 1652 + drm_printf(p, "\tPreempt timeout: %u (us)\n", 1653 + e->sched_props.preempt_timeout_us); 1654 + for (i = 0; i < e->width; ++i ) { 1655 + struct xe_lrc *lrc = e->lrc + i; 1656 + 1657 + drm_printf(p, "\tHW Context Desc: 0x%08x\n", 1658 + lower_32_bits(xe_lrc_ggtt_addr(lrc))); 1659 + drm_printf(p, "\tLRC Head: (memory) %u\n", 1660 + xe_lrc_ring_head(lrc)); 1661 + drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n", 1662 + lrc->ring.tail, 1663 + xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL)); 1664 + drm_printf(p, "\tStart seqno: (memory) %d\n", 1665 + xe_lrc_start_seqno(lrc)); 1666 + drm_printf(p, "\tSeqno: (memory) %d\n", xe_lrc_seqno(lrc)); 1667 + } 1668 + drm_printf(p, "\tSchedule State: 0x%x\n", atomic_read(&e->guc->state)); 1669 + drm_printf(p, "\tFlags: 0x%lx\n", e->flags); 1670 + if (xe_engine_is_parallel(e)) 1671 + guc_engine_wq_print(e, p); 1672 + 1673 + spin_lock(&sched->base.job_list_lock); 1674 + list_for_each_entry(job, &sched->base.pending_list, drm.list) 1675 + drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n", 1676 + xe_sched_job_seqno(job), 1677 + dma_fence_is_signaled(job->fence) ? 1 : 0, 1678 + dma_fence_is_signaled(&job->drm.s_fence->finished) ? 1679 + 1 : 0); 1680 + spin_unlock(&sched->base.job_list_lock); 1681 + } 1682 + 1683 + void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) 1684 + { 1685 + struct xe_engine *e; 1686 + unsigned long index; 1687 + 1688 + if (!xe_device_guc_submission_enabled(guc_to_xe(guc))) 1689 + return; 1690 + 1691 + mutex_lock(&guc->submission_state.lock); 1692 + xa_for_each(&guc->submission_state.engine_lookup, index, e) 1693 + guc_engine_print(e, p); 1694 + mutex_unlock(&guc->submission_state.lock); 1695 + }

+30

drivers/gpu/drm/xe/xe_guc_submit.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_GUC_SUBMIT_H_ 7 + #define _XE_GUC_SUBMIT_H_ 8 + 9 + #include <linux/types.h> 10 + 11 + struct drm_printer; 12 + struct xe_engine; 13 + struct xe_guc; 14 + 15 + int xe_guc_submit_init(struct xe_guc *guc); 16 + void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p); 17 + 18 + int xe_guc_submit_reset_prepare(struct xe_guc *guc); 19 + void xe_guc_submit_reset_wait(struct xe_guc *guc); 20 + int xe_guc_submit_stop(struct xe_guc *guc); 21 + int xe_guc_submit_start(struct xe_guc *guc); 22 + 23 + int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len); 24 + int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len); 25 + int xe_guc_engine_reset_handler(struct xe_guc *guc, u32 *msg, u32 len); 26 + int xe_guc_engine_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, 27 + u32 len); 28 + int xe_guc_engine_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len); 29 + 30 + #endif

+71

drivers/gpu/drm/xe/xe_guc_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_GUC_TYPES_H_ 7 + #define _XE_GUC_TYPES_H_ 8 + 9 + #include <linux/idr.h> 10 + #include <linux/xarray.h> 11 + 12 + #include "xe_guc_ads_types.h" 13 + #include "xe_guc_ct_types.h" 14 + #include "xe_guc_fwif.h" 15 + #include "xe_guc_log_types.h" 16 + #include "xe_guc_pc_types.h" 17 + #include "xe_uc_fw_types.h" 18 + 19 + /** 20 + * struct xe_guc - Graphic micro controller 21 + */ 22 + struct xe_guc { 23 + /** @fw: Generic uC firmware management */ 24 + struct xe_uc_fw fw; 25 + /** @log: GuC log */ 26 + struct xe_guc_log log; 27 + /** @ads: GuC ads */ 28 + struct xe_guc_ads ads; 29 + /** @ct: GuC ct */ 30 + struct xe_guc_ct ct; 31 + /** @pc: GuC Power Conservation */ 32 + struct xe_guc_pc pc; 33 + /** @submission_state: GuC submission state */ 34 + struct { 35 + /** @engine_lookup: Lookup an xe_engine from guc_id */ 36 + struct xarray engine_lookup; 37 + /** @guc_ids: used to allocate new guc_ids, single-lrc */ 38 + struct ida guc_ids; 39 + /** @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc */ 40 + unsigned long *guc_ids_bitmap; 41 + /** @stopped: submissions are stopped */ 42 + atomic_t stopped; 43 + /** @lock: protects submission state */ 44 + struct mutex lock; 45 + /** @suspend: suspend fence state */ 46 + struct { 47 + /** @lock: suspend fences lock */ 48 + spinlock_t lock; 49 + /** @context: suspend fences context */ 50 + u64 context; 51 + /** @seqno: suspend fences seqno */ 52 + u32 seqno; 53 + } suspend; 54 + } submission_state; 55 + /** @hwconfig: Hardware config state */ 56 + struct { 57 + /** @bo: buffer object of the hardware config */ 58 + struct xe_bo *bo; 59 + /** @size: size of the hardware config */ 60 + u32 size; 61 + } hwconfig; 62 + 63 + /** 64 + * @notify_reg: Register which is written to notify GuC of H2G messages 65 + */ 66 + u32 notify_reg; 67 + /** @params: Control params for fw initialization */ 68 + u32 params[GUC_CTL_MAX_DWORDS]; 69 + }; 70 + 71 + #endif

+131

drivers/gpu/drm/xe/xe_huc.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include "xe_bo.h" 7 + #include "xe_device.h" 8 + #include "xe_force_wake.h" 9 + #include "xe_gt.h" 10 + #include "xe_guc.h" 11 + #include "xe_guc_reg.h" 12 + #include "xe_huc.h" 13 + #include "xe_mmio.h" 14 + #include "xe_uc_fw.h" 15 + 16 + static struct xe_gt * 17 + huc_to_gt(struct xe_huc *huc) 18 + { 19 + return container_of(huc, struct xe_gt, uc.huc); 20 + } 21 + 22 + static struct xe_device * 23 + huc_to_xe(struct xe_huc *huc) 24 + { 25 + return gt_to_xe(huc_to_gt(huc)); 26 + } 27 + 28 + static struct xe_guc * 29 + huc_to_guc(struct xe_huc *huc) 30 + { 31 + return &container_of(huc, struct xe_uc, huc)->guc; 32 + } 33 + 34 + int xe_huc_init(struct xe_huc *huc) 35 + { 36 + struct xe_device *xe = huc_to_xe(huc); 37 + int ret; 38 + 39 + huc->fw.type = XE_UC_FW_TYPE_HUC; 40 + ret = xe_uc_fw_init(&huc->fw); 41 + if (ret) 42 + goto out; 43 + 44 + xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOADABLE); 45 + 46 + return 0; 47 + 48 + out: 49 + if (xe_uc_fw_is_disabled(&huc->fw)) { 50 + drm_info(&xe->drm, "HuC disabled\n"); 51 + return 0; 52 + } 53 + drm_err(&xe->drm, "HuC init failed with %d", ret); 54 + return ret; 55 + } 56 + 57 + int xe_huc_upload(struct xe_huc *huc) 58 + { 59 + if (xe_uc_fw_is_disabled(&huc->fw)) 60 + return 0; 61 + return xe_uc_fw_upload(&huc->fw, 0, HUC_UKERNEL); 62 + } 63 + 64 + int xe_huc_auth(struct xe_huc *huc) 65 + { 66 + struct xe_device *xe = huc_to_xe(huc); 67 + struct xe_gt *gt = huc_to_gt(huc); 68 + struct xe_guc *guc = huc_to_guc(huc); 69 + int ret; 70 + if (xe_uc_fw_is_disabled(&huc->fw)) 71 + return 0; 72 + 73 + XE_BUG_ON(xe_uc_fw_is_running(&huc->fw)); 74 + 75 + if (!xe_uc_fw_is_loaded(&huc->fw)) 76 + return -ENOEXEC; 77 + 78 + ret = xe_guc_auth_huc(guc, xe_bo_ggtt_addr(huc->fw.bo) + 79 + xe_uc_fw_rsa_offset(&huc->fw)); 80 + if (ret) { 81 + drm_err(&xe->drm, "HuC: GuC did not ack Auth request %d\n", 82 + ret); 83 + goto fail; 84 + } 85 + 86 + ret = xe_mmio_wait32(gt, GEN11_HUC_KERNEL_LOAD_INFO.reg, 87 + HUC_LOAD_SUCCESSFUL, 88 + HUC_LOAD_SUCCESSFUL, 100); 89 + if (ret) { 90 + drm_err(&xe->drm, "HuC: Firmware not verified %d\n", ret); 91 + goto fail; 92 + } 93 + 94 + xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_RUNNING); 95 + drm_dbg(&xe->drm, "HuC authenticated\n"); 96 + 97 + return 0; 98 + 99 + fail: 100 + drm_err(&xe->drm, "HuC authentication failed %d\n", ret); 101 + xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOAD_FAIL); 102 + 103 + return ret; 104 + } 105 + 106 + void xe_huc_sanitize(struct xe_huc *huc) 107 + { 108 + if (xe_uc_fw_is_disabled(&huc->fw)) 109 + return; 110 + xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOADABLE); 111 + } 112 + 113 + void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p) 114 + { 115 + struct xe_gt *gt = huc_to_gt(huc); 116 + int err; 117 + 118 + xe_uc_fw_print(&huc->fw, p); 119 + 120 + if (xe_uc_fw_is_disabled(&huc->fw)) 121 + return; 122 + 123 + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 124 + if (err) 125 + return; 126 + 127 + drm_printf(p, "\nHuC status: 0x%08x\n", 128 + xe_mmio_read32(gt, GEN11_HUC_KERNEL_LOAD_INFO.reg)); 129 + 130 + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 131 + }

+19

drivers/gpu/drm/xe/xe_huc.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_HUC_H_ 7 + #define _XE_HUC_H_ 8 + 9 + #include "xe_huc_types.h" 10 + 11 + struct drm_printer; 12 + 13 + int xe_huc_init(struct xe_huc *huc); 14 + int xe_huc_upload(struct xe_huc *huc); 15 + int xe_huc_auth(struct xe_huc *huc); 16 + void xe_huc_sanitize(struct xe_huc *huc); 17 + void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p); 18 + 19 + #endif

+71

drivers/gpu/drm/xe/xe_huc_debugfs.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include <drm/drm_debugfs.h> 7 + #include <drm/drm_managed.h> 8 + 9 + #include "xe_device.h" 10 + #include "xe_gt.h" 11 + #include "xe_huc.h" 12 + #include "xe_huc_debugfs.h" 13 + #include "xe_macros.h" 14 + 15 + static struct xe_gt * 16 + huc_to_gt(struct xe_huc *huc) 17 + { 18 + return container_of(huc, struct xe_gt, uc.huc); 19 + } 20 + 21 + static struct xe_device * 22 + huc_to_xe(struct xe_huc *huc) 23 + { 24 + return gt_to_xe(huc_to_gt(huc)); 25 + } 26 + 27 + static struct xe_huc *node_to_huc(struct drm_info_node *node) 28 + { 29 + return node->info_ent->data; 30 + } 31 + 32 + static int huc_info(struct seq_file *m, void *data) 33 + { 34 + struct xe_huc *huc = node_to_huc(m->private); 35 + struct xe_device *xe = huc_to_xe(huc); 36 + struct drm_printer p = drm_seq_file_printer(m); 37 + 38 + xe_device_mem_access_get(xe); 39 + xe_huc_print_info(huc, &p); 40 + xe_device_mem_access_put(xe); 41 + 42 + return 0; 43 + } 44 + 45 + static const struct drm_info_list debugfs_list[] = { 46 + {"huc_info", huc_info, 0}, 47 + }; 48 + 49 + void xe_huc_debugfs_register(struct xe_huc *huc, struct dentry *parent) 50 + { 51 + struct drm_minor *minor = huc_to_xe(huc)->drm.primary; 52 + struct drm_info_list *local; 53 + int i; 54 + 55 + #define DEBUGFS_SIZE ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list) 56 + local = drmm_kmalloc(&huc_to_xe(huc)->drm, DEBUGFS_SIZE, GFP_KERNEL); 57 + if (!local) { 58 + XE_WARN_ON("Couldn't allocate memory"); 59 + return; 60 + } 61 + 62 + memcpy(local, debugfs_list, DEBUGFS_SIZE); 63 + #undef DEBUGFS_SIZE 64 + 65 + for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i) 66 + local[i].data = huc; 67 + 68 + drm_debugfs_create_files(local, 69 + ARRAY_SIZE(debugfs_list), 70 + parent, minor); 71 + }

+14

drivers/gpu/drm/xe/xe_huc_debugfs.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_HUC_DEBUGFS_H_ 7 + #define _XE_HUC_DEBUGFS_H_ 8 + 9 + struct dentry; 10 + struct xe_huc; 11 + 12 + void xe_huc_debugfs_register(struct xe_huc *huc, struct dentry *parent); 13 + 14 + #endif

+19

drivers/gpu/drm/xe/xe_huc_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_HUC_TYPES_H_ 7 + #define _XE_HUC_TYPES_H_ 8 + 9 + #include "xe_uc_fw_types.h" 10 + 11 + /** 12 + * struct xe_huc - HuC 13 + */ 14 + struct xe_huc { 15 + /** @fw: Generic uC firmware management */ 16 + struct xe_uc_fw fw; 17 + }; 18 + 19 + #endif

+658

drivers/gpu/drm/xe/xe_hw_engine.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2021 Intel Corporation 4 + */ 5 + 6 + #include "xe_hw_engine.h" 7 + 8 + #include <drm/drm_managed.h> 9 + 10 + #include "xe_bo.h" 11 + #include "xe_device.h" 12 + #include "xe_execlist.h" 13 + #include "xe_force_wake.h" 14 + #include "xe_gt.h" 15 + #include "xe_gt_topology.h" 16 + #include "xe_hw_fence.h" 17 + #include "xe_lrc.h" 18 + #include "xe_macros.h" 19 + #include "xe_mmio.h" 20 + #include "xe_reg_sr.h" 21 + #include "xe_sched_job.h" 22 + #include "xe_wa.h" 23 + 24 + #include "gt/intel_engine_regs.h" 25 + #include "i915_reg.h" 26 + #include "gt/intel_gt_regs.h" 27 + 28 + #define MAX_MMIO_BASES 3 29 + struct engine_info { 30 + const char *name; 31 + unsigned int class : 8; 32 + unsigned int instance : 8; 33 + enum xe_force_wake_domains domain; 34 + /* mmio bases table *must* be sorted in reverse graphics_ver order */ 35 + struct engine_mmio_base { 36 + unsigned int graphics_ver : 8; 37 + unsigned int base : 24; 38 + } mmio_bases[MAX_MMIO_BASES]; 39 + }; 40 + 41 + static const struct engine_info engine_infos[] = { 42 + [XE_HW_ENGINE_RCS0] = { 43 + .name = "rcs0", 44 + .class = XE_ENGINE_CLASS_RENDER, 45 + .instance = 0, 46 + .domain = XE_FW_RENDER, 47 + .mmio_bases = { 48 + { .graphics_ver = 1, .base = RENDER_RING_BASE } 49 + }, 50 + }, 51 + [XE_HW_ENGINE_BCS0] = { 52 + .name = "bcs0", 53 + .class = XE_ENGINE_CLASS_COPY, 54 + .instance = 0, 55 + .domain = XE_FW_RENDER, 56 + .mmio_bases = { 57 + { .graphics_ver = 6, .base = BLT_RING_BASE } 58 + }, 59 + }, 60 + [XE_HW_ENGINE_BCS1] = { 61 + .name = "bcs1", 62 + .class = XE_ENGINE_CLASS_COPY, 63 + .instance = 1, 64 + .domain = XE_FW_RENDER, 65 + .mmio_bases = { 66 + { .graphics_ver = 12, .base = XEHPC_BCS1_RING_BASE } 67 + }, 68 + }, 69 + [XE_HW_ENGINE_BCS2] = { 70 + .name = "bcs2", 71 + .class = XE_ENGINE_CLASS_COPY, 72 + .instance = 2, 73 + .domain = XE_FW_RENDER, 74 + .mmio_bases = { 75 + { .graphics_ver = 12, .base = XEHPC_BCS2_RING_BASE } 76 + }, 77 + }, 78 + [XE_HW_ENGINE_BCS3] = { 79 + .name = "bcs3", 80 + .class = XE_ENGINE_CLASS_COPY, 81 + .instance = 3, 82 + .domain = XE_FW_RENDER, 83 + .mmio_bases = { 84 + { .graphics_ver = 12, .base = XEHPC_BCS3_RING_BASE } 85 + }, 86 + }, 87 + [XE_HW_ENGINE_BCS4] = { 88 + .name = "bcs4", 89 + .class = XE_ENGINE_CLASS_COPY, 90 + .instance = 4, 91 + .domain = XE_FW_RENDER, 92 + .mmio_bases = { 93 + { .graphics_ver = 12, .base = XEHPC_BCS4_RING_BASE } 94 + }, 95 + }, 96 + [XE_HW_ENGINE_BCS5] = { 97 + .name = "bcs5", 98 + .class = XE_ENGINE_CLASS_COPY, 99 + .instance = 5, 100 + .domain = XE_FW_RENDER, 101 + .mmio_bases = { 102 + { .graphics_ver = 12, .base = XEHPC_BCS5_RING_BASE } 103 + }, 104 + }, 105 + [XE_HW_ENGINE_BCS6] = { 106 + .name = "bcs6", 107 + .class = XE_ENGINE_CLASS_COPY, 108 + .instance = 6, 109 + .domain = XE_FW_RENDER, 110 + .mmio_bases = { 111 + { .graphics_ver = 12, .base = XEHPC_BCS6_RING_BASE } 112 + }, 113 + }, 114 + [XE_HW_ENGINE_BCS7] = { 115 + .name = "bcs7", 116 + .class = XE_ENGINE_CLASS_COPY, 117 + .instance = 7, 118 + .domain = XE_FW_RENDER, 119 + .mmio_bases = { 120 + { .graphics_ver = 12, .base = XEHPC_BCS7_RING_BASE } 121 + }, 122 + }, 123 + [XE_HW_ENGINE_BCS8] = { 124 + .name = "bcs8", 125 + .class = XE_ENGINE_CLASS_COPY, 126 + .instance = 8, 127 + .domain = XE_FW_RENDER, 128 + .mmio_bases = { 129 + { .graphics_ver = 12, .base = XEHPC_BCS8_RING_BASE } 130 + }, 131 + }, 132 + 133 + [XE_HW_ENGINE_VCS0] = { 134 + .name = "vcs0", 135 + .class = XE_ENGINE_CLASS_VIDEO_DECODE, 136 + .instance = 0, 137 + .domain = XE_FW_MEDIA_VDBOX0, 138 + .mmio_bases = { 139 + { .graphics_ver = 11, .base = GEN11_BSD_RING_BASE }, 140 + { .graphics_ver = 6, .base = GEN6_BSD_RING_BASE }, 141 + { .graphics_ver = 4, .base = BSD_RING_BASE } 142 + }, 143 + }, 144 + [XE_HW_ENGINE_VCS1] = { 145 + .name = "vcs1", 146 + .class = XE_ENGINE_CLASS_VIDEO_DECODE, 147 + .instance = 1, 148 + .domain = XE_FW_MEDIA_VDBOX1, 149 + .mmio_bases = { 150 + { .graphics_ver = 11, .base = GEN11_BSD2_RING_BASE }, 151 + { .graphics_ver = 8, .base = GEN8_BSD2_RING_BASE } 152 + }, 153 + }, 154 + [XE_HW_ENGINE_VCS2] = { 155 + .name = "vcs2", 156 + .class = XE_ENGINE_CLASS_VIDEO_DECODE, 157 + .instance = 2, 158 + .domain = XE_FW_MEDIA_VDBOX2, 159 + .mmio_bases = { 160 + { .graphics_ver = 11, .base = GEN11_BSD3_RING_BASE } 161 + }, 162 + }, 163 + [XE_HW_ENGINE_VCS3] = { 164 + .name = "vcs3", 165 + .class = XE_ENGINE_CLASS_VIDEO_DECODE, 166 + .instance = 3, 167 + .domain = XE_FW_MEDIA_VDBOX3, 168 + .mmio_bases = { 169 + { .graphics_ver = 11, .base = GEN11_BSD4_RING_BASE } 170 + }, 171 + }, 172 + [XE_HW_ENGINE_VCS4] = { 173 + .name = "vcs4", 174 + .class = XE_ENGINE_CLASS_VIDEO_DECODE, 175 + .instance = 4, 176 + .domain = XE_FW_MEDIA_VDBOX4, 177 + .mmio_bases = { 178 + { .graphics_ver = 12, .base = XEHP_BSD5_RING_BASE } 179 + }, 180 + }, 181 + [XE_HW_ENGINE_VCS5] = { 182 + .name = "vcs5", 183 + .class = XE_ENGINE_CLASS_VIDEO_DECODE, 184 + .instance = 5, 185 + .domain = XE_FW_MEDIA_VDBOX5, 186 + .mmio_bases = { 187 + { .graphics_ver = 12, .base = XEHP_BSD6_RING_BASE } 188 + }, 189 + }, 190 + [XE_HW_ENGINE_VCS6] = { 191 + .name = "vcs6", 192 + .class = XE_ENGINE_CLASS_VIDEO_DECODE, 193 + .instance = 6, 194 + .domain = XE_FW_MEDIA_VDBOX6, 195 + .mmio_bases = { 196 + { .graphics_ver = 12, .base = XEHP_BSD7_RING_BASE } 197 + }, 198 + }, 199 + [XE_HW_ENGINE_VCS7] = { 200 + .name = "vcs7", 201 + .class = XE_ENGINE_CLASS_VIDEO_DECODE, 202 + .instance = 7, 203 + .domain = XE_FW_MEDIA_VDBOX7, 204 + .mmio_bases = { 205 + { .graphics_ver = 12, .base = XEHP_BSD8_RING_BASE } 206 + }, 207 + }, 208 + [XE_HW_ENGINE_VECS0] = { 209 + .name = "vecs0", 210 + .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, 211 + .instance = 0, 212 + .domain = XE_FW_MEDIA_VEBOX0, 213 + .mmio_bases = { 214 + { .graphics_ver = 11, .base = GEN11_VEBOX_RING_BASE }, 215 + { .graphics_ver = 7, .base = VEBOX_RING_BASE } 216 + }, 217 + }, 218 + [XE_HW_ENGINE_VECS1] = { 219 + .name = "vecs1", 220 + .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, 221 + .instance = 1, 222 + .domain = XE_FW_MEDIA_VEBOX1, 223 + .mmio_bases = { 224 + { .graphics_ver = 11, .base = GEN11_VEBOX2_RING_BASE } 225 + }, 226 + }, 227 + [XE_HW_ENGINE_VECS2] = { 228 + .name = "vecs2", 229 + .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, 230 + .instance = 2, 231 + .domain = XE_FW_MEDIA_VEBOX2, 232 + .mmio_bases = { 233 + { .graphics_ver = 12, .base = XEHP_VEBOX3_RING_BASE } 234 + }, 235 + }, 236 + [XE_HW_ENGINE_VECS3] = { 237 + .name = "vecs3", 238 + .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, 239 + .instance = 3, 240 + .domain = XE_FW_MEDIA_VEBOX3, 241 + .mmio_bases = { 242 + { .graphics_ver = 12, .base = XEHP_VEBOX4_RING_BASE } 243 + }, 244 + }, 245 + [XE_HW_ENGINE_CCS0] = { 246 + .name = "ccs0", 247 + .class = XE_ENGINE_CLASS_COMPUTE, 248 + .instance = 0, 249 + .domain = XE_FW_RENDER, 250 + .mmio_bases = { 251 + { .graphics_ver = 12, .base = GEN12_COMPUTE0_RING_BASE }, 252 + }, 253 + }, 254 + [XE_HW_ENGINE_CCS1] = { 255 + .name = "ccs1", 256 + .class = XE_ENGINE_CLASS_COMPUTE, 257 + .instance = 1, 258 + .domain = XE_FW_RENDER, 259 + .mmio_bases = { 260 + { .graphics_ver = 12, .base = GEN12_COMPUTE1_RING_BASE }, 261 + }, 262 + }, 263 + [XE_HW_ENGINE_CCS2] = { 264 + .name = "ccs2", 265 + .class = XE_ENGINE_CLASS_COMPUTE, 266 + .instance = 2, 267 + .domain = XE_FW_RENDER, 268 + .mmio_bases = { 269 + { .graphics_ver = 12, .base = GEN12_COMPUTE2_RING_BASE }, 270 + }, 271 + }, 272 + [XE_HW_ENGINE_CCS3] = { 273 + .name = "ccs3", 274 + .class = XE_ENGINE_CLASS_COMPUTE, 275 + .instance = 3, 276 + .domain = XE_FW_RENDER, 277 + .mmio_bases = { 278 + { .graphics_ver = 12, .base = GEN12_COMPUTE3_RING_BASE }, 279 + }, 280 + }, 281 + }; 282 + 283 + static u32 engine_info_mmio_base(const struct engine_info *info, 284 + unsigned int graphics_ver) 285 + { 286 + int i; 287 + 288 + for (i = 0; i < MAX_MMIO_BASES; i++) 289 + if (graphics_ver >= info->mmio_bases[i].graphics_ver) 290 + break; 291 + 292 + XE_BUG_ON(i == MAX_MMIO_BASES); 293 + XE_BUG_ON(!info->mmio_bases[i].base); 294 + 295 + return info->mmio_bases[i].base; 296 + } 297 + 298 + static void hw_engine_fini(struct drm_device *drm, void *arg) 299 + { 300 + struct xe_hw_engine *hwe = arg; 301 + 302 + if (hwe->exl_port) 303 + xe_execlist_port_destroy(hwe->exl_port); 304 + xe_lrc_finish(&hwe->kernel_lrc); 305 + 306 + xe_bo_unpin_map_no_vm(hwe->hwsp); 307 + 308 + hwe->gt = NULL; 309 + } 310 + 311 + static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, u32 reg, u32 val) 312 + { 313 + XE_BUG_ON(reg & hwe->mmio_base); 314 + xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); 315 + 316 + xe_mmio_write32(hwe->gt, reg + hwe->mmio_base, val); 317 + } 318 + 319 + static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, u32 reg) 320 + { 321 + XE_BUG_ON(reg & hwe->mmio_base); 322 + xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); 323 + 324 + return xe_mmio_read32(hwe->gt, reg + hwe->mmio_base); 325 + } 326 + 327 + void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe) 328 + { 329 + u32 ccs_mask = 330 + xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE); 331 + 332 + if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask & BIT(0)) 333 + xe_mmio_write32(hwe->gt, GEN12_RCU_MODE.reg, 334 + _MASKED_BIT_ENABLE(GEN12_RCU_MODE_CCS_ENABLE)); 335 + 336 + hw_engine_mmio_write32(hwe, RING_HWSTAM(0).reg, ~0x0); 337 + hw_engine_mmio_write32(hwe, RING_HWS_PGA(0).reg, 338 + xe_bo_ggtt_addr(hwe->hwsp)); 339 + hw_engine_mmio_write32(hwe, RING_MODE_GEN7(0).reg, 340 + _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); 341 + hw_engine_mmio_write32(hwe, RING_MI_MODE(0).reg, 342 + _MASKED_BIT_DISABLE(STOP_RING)); 343 + hw_engine_mmio_read32(hwe, RING_MI_MODE(0).reg); 344 + } 345 + 346 + static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe, 347 + enum xe_hw_engine_id id) 348 + { 349 + struct xe_device *xe = gt_to_xe(gt); 350 + const struct engine_info *info; 351 + 352 + if (WARN_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name)) 353 + return; 354 + 355 + if (!(gt->info.engine_mask & BIT(id))) 356 + return; 357 + 358 + info = &engine_infos[id]; 359 + 360 + XE_BUG_ON(hwe->gt); 361 + 362 + hwe->gt = gt; 363 + hwe->class = info->class; 364 + hwe->instance = info->instance; 365 + hwe->mmio_base = engine_info_mmio_base(info, GRAPHICS_VER(xe)); 366 + hwe->domain = info->domain; 367 + hwe->name = info->name; 368 + hwe->fence_irq = &gt->fence_irq[info->class]; 369 + hwe->engine_id = id; 370 + 371 + xe_reg_sr_init(&hwe->reg_sr, hwe->name, gt_to_xe(gt)); 372 + xe_wa_process_engine(hwe); 373 + 374 + xe_reg_sr_init(&hwe->reg_whitelist, hwe->name, gt_to_xe(gt)); 375 + xe_reg_whitelist_process_engine(hwe); 376 + } 377 + 378 + static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, 379 + enum xe_hw_engine_id id) 380 + { 381 + struct xe_device *xe = gt_to_xe(gt); 382 + int err; 383 + 384 + XE_BUG_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name); 385 + XE_BUG_ON(!(gt->info.engine_mask & BIT(id))); 386 + 387 + xe_reg_sr_apply_mmio(&hwe->reg_sr, gt); 388 + xe_reg_sr_apply_whitelist(&hwe->reg_whitelist, hwe->mmio_base, gt); 389 + 390 + hwe->hwsp = xe_bo_create_locked(xe, gt, NULL, SZ_4K, ttm_bo_type_kernel, 391 + XE_BO_CREATE_VRAM_IF_DGFX(gt) | 392 + XE_BO_CREATE_GGTT_BIT); 393 + if (IS_ERR(hwe->hwsp)) { 394 + err = PTR_ERR(hwe->hwsp); 395 + goto err_name; 396 + } 397 + 398 + err = xe_bo_pin(hwe->hwsp); 399 + if (err) 400 + goto err_unlock_put_hwsp; 401 + 402 + err = xe_bo_vmap(hwe->hwsp); 403 + if (err) 404 + goto err_unpin_hwsp; 405 + 406 + xe_bo_unlock_no_vm(hwe->hwsp); 407 + 408 + err = xe_lrc_init(&hwe->kernel_lrc, hwe, NULL, NULL, SZ_16K); 409 + if (err) 410 + goto err_hwsp; 411 + 412 + if (!xe_device_guc_submission_enabled(xe)) { 413 + hwe->exl_port = xe_execlist_port_create(xe, hwe); 414 + if (IS_ERR(hwe->exl_port)) { 415 + err = PTR_ERR(hwe->exl_port); 416 + goto err_kernel_lrc; 417 + } 418 + } 419 + 420 + if (xe_device_guc_submission_enabled(xe)) 421 + xe_hw_engine_enable_ring(hwe); 422 + 423 + /* We reserve the highest BCS instance for USM */ 424 + if (xe->info.supports_usm && hwe->class == XE_ENGINE_CLASS_COPY) 425 + gt->usm.reserved_bcs_instance = hwe->instance; 426 + 427 + err = drmm_add_action_or_reset(&xe->drm, hw_engine_fini, hwe); 428 + if (err) 429 + return err; 430 + 431 + return 0; 432 + 433 + err_unpin_hwsp: 434 + xe_bo_unpin(hwe->hwsp); 435 + err_unlock_put_hwsp: 436 + xe_bo_unlock_no_vm(hwe->hwsp); 437 + xe_bo_put(hwe->hwsp); 438 + err_kernel_lrc: 439 + xe_lrc_finish(&hwe->kernel_lrc); 440 + err_hwsp: 441 + xe_bo_put(hwe->hwsp); 442 + err_name: 443 + hwe->name = NULL; 444 + 445 + return err; 446 + } 447 + 448 + static void hw_engine_setup_logical_mapping(struct xe_gt *gt) 449 + { 450 + int class; 451 + 452 + /* FIXME: Doing a simple logical mapping that works for most hardware */ 453 + for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { 454 + struct xe_hw_engine *hwe; 455 + enum xe_hw_engine_id id; 456 + int logical_instance = 0; 457 + 458 + for_each_hw_engine(hwe, gt, id) 459 + if (hwe->class == class) 460 + hwe->logical_instance = logical_instance++; 461 + } 462 + } 463 + 464 + static void read_fuses(struct xe_gt *gt) 465 + { 466 + struct xe_device *xe = gt_to_xe(gt); 467 + u32 media_fuse; 468 + u16 vdbox_mask; 469 + u16 vebox_mask; 470 + u32 bcs_mask; 471 + int i, j; 472 + 473 + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); 474 + 475 + /* 476 + * FIXME: Hack job, thinking we should have table of vfuncs for each 477 + * class which picks the correct vfunc based on IP version. 478 + */ 479 + 480 + media_fuse = xe_mmio_read32(gt, GEN11_GT_VEBOX_VDBOX_DISABLE.reg); 481 + if (GRAPHICS_VERx100(xe) < 1250) 482 + media_fuse = ~media_fuse; 483 + 484 + vdbox_mask = media_fuse & GEN11_GT_VDBOX_DISABLE_MASK; 485 + vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >> 486 + GEN11_GT_VEBOX_DISABLE_SHIFT; 487 + 488 + for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) { 489 + if (!(gt->info.engine_mask & BIT(i))) 490 + continue; 491 + 492 + if (!(BIT(j) & vdbox_mask)) { 493 + gt->info.engine_mask &= ~BIT(i); 494 + drm_info(&xe->drm, "vcs%u fused off\n", j); 495 + } 496 + } 497 + 498 + for (i = XE_HW_ENGINE_VECS0, j = 0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) { 499 + if (!(gt->info.engine_mask & BIT(i))) 500 + continue; 501 + 502 + if (!(BIT(j) & vebox_mask)) { 503 + gt->info.engine_mask &= ~BIT(i); 504 + drm_info(&xe->drm, "vecs%u fused off\n", j); 505 + } 506 + } 507 + 508 + bcs_mask = xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg); 509 + bcs_mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK, bcs_mask); 510 + 511 + for (i = XE_HW_ENGINE_BCS1, j = 0; i <= XE_HW_ENGINE_BCS8; ++i, ++j) { 512 + if (!(gt->info.engine_mask & BIT(i))) 513 + continue; 514 + 515 + if (!(BIT(j/2) & bcs_mask)) { 516 + gt->info.engine_mask &= ~BIT(i); 517 + drm_info(&xe->drm, "bcs%u fused off\n", j); 518 + } 519 + } 520 + 521 + /* TODO: compute engines */ 522 + } 523 + 524 + int xe_hw_engines_init_early(struct xe_gt *gt) 525 + { 526 + int i; 527 + 528 + read_fuses(gt); 529 + 530 + for (i = 0; i < ARRAY_SIZE(gt->hw_engines); i++) 531 + hw_engine_init_early(gt, &gt->hw_engines[i], i); 532 + 533 + return 0; 534 + } 535 + 536 + int xe_hw_engines_init(struct xe_gt *gt) 537 + { 538 + int err; 539 + struct xe_hw_engine *hwe; 540 + enum xe_hw_engine_id id; 541 + 542 + for_each_hw_engine(hwe, gt, id) { 543 + err = hw_engine_init(gt, hwe, id); 544 + if (err) 545 + return err; 546 + } 547 + 548 + hw_engine_setup_logical_mapping(gt); 549 + 550 + return 0; 551 + } 552 + 553 + void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec) 554 + { 555 + wake_up_all(&gt_to_xe(hwe->gt)->ufence_wq); 556 + 557 + if (hwe->irq_handler) 558 + hwe->irq_handler(hwe, intr_vec); 559 + 560 + if (intr_vec & GT_RENDER_USER_INTERRUPT) 561 + xe_hw_fence_irq_run(hwe->fence_irq); 562 + } 563 + 564 + void xe_hw_engine_print_state(struct xe_hw_engine *hwe, struct drm_printer *p) 565 + { 566 + if (!xe_hw_engine_is_valid(hwe)) 567 + return; 568 + 569 + drm_printf(p, "%s (physical), logical instance=%d\n", hwe->name, 570 + hwe->logical_instance); 571 + drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n", 572 + hwe->domain, 573 + xe_force_wake_ref(gt_to_fw(hwe->gt), hwe->domain)); 574 + drm_printf(p, "\tMMIO base: 0x%08x\n", hwe->mmio_base); 575 + 576 + drm_printf(p, "\tHWSTAM: 0x%08x\n", 577 + hw_engine_mmio_read32(hwe, RING_HWSTAM(0).reg)); 578 + drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n", 579 + hw_engine_mmio_read32(hwe, RING_HWS_PGA(0).reg)); 580 + 581 + drm_printf(p, "\tRING_EXECLIST_STATUS_LO: 0x%08x\n", 582 + hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0).reg)); 583 + drm_printf(p, "\tRING_EXECLIST_STATUS_HI: 0x%08x\n", 584 + hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0).reg)); 585 + drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_LO: 0x%08x\n", 586 + hw_engine_mmio_read32(hwe, 587 + RING_EXECLIST_SQ_CONTENTS(0).reg)); 588 + drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_HI: 0x%08x\n", 589 + hw_engine_mmio_read32(hwe, 590 + RING_EXECLIST_SQ_CONTENTS(0).reg) + 4); 591 + drm_printf(p, "\tRING_EXECLIST_CONTROL: 0x%08x\n", 592 + hw_engine_mmio_read32(hwe, RING_EXECLIST_CONTROL(0).reg)); 593 + 594 + drm_printf(p, "\tRING_START: 0x%08x\n", 595 + hw_engine_mmio_read32(hwe, RING_START(0).reg)); 596 + drm_printf(p, "\tRING_HEAD: 0x%08x\n", 597 + hw_engine_mmio_read32(hwe, RING_HEAD(0).reg) & HEAD_ADDR); 598 + drm_printf(p, "\tRING_TAIL: 0x%08x\n", 599 + hw_engine_mmio_read32(hwe, RING_TAIL(0).reg) & TAIL_ADDR); 600 + drm_printf(p, "\tRING_CTL: 0x%08x\n", 601 + hw_engine_mmio_read32(hwe, RING_CTL(0).reg)); 602 + drm_printf(p, "\tRING_MODE: 0x%08x\n", 603 + hw_engine_mmio_read32(hwe, RING_MI_MODE(0).reg)); 604 + drm_printf(p, "\tRING_MODE_GEN7: 0x%08x\n", 605 + hw_engine_mmio_read32(hwe, RING_MODE_GEN7(0).reg)); 606 + 607 + drm_printf(p, "\tRING_IMR: 0x%08x\n", 608 + hw_engine_mmio_read32(hwe, RING_IMR(0).reg)); 609 + drm_printf(p, "\tRING_ESR: 0x%08x\n", 610 + hw_engine_mmio_read32(hwe, RING_ESR(0).reg)); 611 + drm_printf(p, "\tRING_EMR: 0x%08x\n", 612 + hw_engine_mmio_read32(hwe, RING_EMR(0).reg)); 613 + drm_printf(p, "\tRING_EIR: 0x%08x\n", 614 + hw_engine_mmio_read32(hwe, RING_EIR(0).reg)); 615 + 616 + drm_printf(p, "\tACTHD: 0x%08x_%08x\n", 617 + hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0).reg), 618 + hw_engine_mmio_read32(hwe, RING_ACTHD(0).reg)); 619 + drm_printf(p, "\tBBADDR: 0x%08x_%08x\n", 620 + hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0).reg), 621 + hw_engine_mmio_read32(hwe, RING_BBADDR(0).reg)); 622 + drm_printf(p, "\tDMA_FADDR: 0x%08x_%08x\n", 623 + hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0).reg), 624 + hw_engine_mmio_read32(hwe, RING_DMA_FADD(0).reg)); 625 + 626 + drm_printf(p, "\tIPEIR: 0x%08x\n", 627 + hw_engine_mmio_read32(hwe, IPEIR(0).reg)); 628 + drm_printf(p, "\tIPEHR: 0x%08x\n\n", 629 + hw_engine_mmio_read32(hwe, IPEHR(0).reg)); 630 + 631 + if (hwe->class == XE_ENGINE_CLASS_COMPUTE) 632 + drm_printf(p, "\tGEN12_RCU_MODE: 0x%08x\n", 633 + xe_mmio_read32(hwe->gt, GEN12_RCU_MODE.reg)); 634 + 635 + } 636 + 637 + u32 xe_hw_engine_mask_per_class(struct xe_gt *gt, 638 + enum xe_engine_class engine_class) 639 + { 640 + u32 mask = 0; 641 + enum xe_hw_engine_id id; 642 + 643 + for (id = 0; id < XE_NUM_HW_ENGINES; ++id) { 644 + if (engine_infos[id].class == engine_class && 645 + gt->info.engine_mask & BIT(id)) 646 + mask |= BIT(engine_infos[id].instance); 647 + } 648 + return mask; 649 + } 650 + 651 + bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe) 652 + { 653 + struct xe_gt *gt = hwe->gt; 654 + struct xe_device *xe = gt_to_xe(gt); 655 + 656 + return xe->info.supports_usm && hwe->class == XE_ENGINE_CLASS_COPY && 657 + hwe->instance == gt->usm.reserved_bcs_instance; 658 + }

+27

drivers/gpu/drm/xe/xe_hw_engine.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2021 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_HW_ENGINE_H_ 7 + #define _XE_HW_ENGINE_H_ 8 + 9 + #include "xe_hw_engine_types.h" 10 + 11 + struct drm_printer; 12 + 13 + int xe_hw_engines_init_early(struct xe_gt *gt); 14 + int xe_hw_engines_init(struct xe_gt *gt); 15 + void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec); 16 + void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe); 17 + void xe_hw_engine_print_state(struct xe_hw_engine *hwe, struct drm_printer *p); 18 + u32 xe_hw_engine_mask_per_class(struct xe_gt *gt, 19 + enum xe_engine_class engine_class); 20 + 21 + bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe); 22 + static inline bool xe_hw_engine_is_valid(struct xe_hw_engine *hwe) 23 + { 24 + return hwe->name; 25 + } 26 + 27 + #endif

+107

drivers/gpu/drm/xe/xe_hw_engine_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_HW_ENGINE_TYPES_H_ 7 + #define _XE_HW_ENGINE_TYPES_H_ 8 + 9 + #include "xe_force_wake_types.h" 10 + #include "xe_lrc_types.h" 11 + #include "xe_reg_sr_types.h" 12 + 13 + /* See "Engine ID Definition" struct in the Icelake PRM */ 14 + enum xe_engine_class { 15 + XE_ENGINE_CLASS_RENDER = 0, 16 + XE_ENGINE_CLASS_VIDEO_DECODE = 1, 17 + XE_ENGINE_CLASS_VIDEO_ENHANCE = 2, 18 + XE_ENGINE_CLASS_COPY = 3, 19 + XE_ENGINE_CLASS_OTHER = 4, 20 + XE_ENGINE_CLASS_COMPUTE = 5, 21 + XE_ENGINE_CLASS_MAX = 6, 22 + }; 23 + 24 + enum xe_hw_engine_id { 25 + XE_HW_ENGINE_RCS0, 26 + XE_HW_ENGINE_BCS0, 27 + XE_HW_ENGINE_BCS1, 28 + XE_HW_ENGINE_BCS2, 29 + XE_HW_ENGINE_BCS3, 30 + XE_HW_ENGINE_BCS4, 31 + XE_HW_ENGINE_BCS5, 32 + XE_HW_ENGINE_BCS6, 33 + XE_HW_ENGINE_BCS7, 34 + XE_HW_ENGINE_BCS8, 35 + XE_HW_ENGINE_VCS0, 36 + XE_HW_ENGINE_VCS1, 37 + XE_HW_ENGINE_VCS2, 38 + XE_HW_ENGINE_VCS3, 39 + XE_HW_ENGINE_VCS4, 40 + XE_HW_ENGINE_VCS5, 41 + XE_HW_ENGINE_VCS6, 42 + XE_HW_ENGINE_VCS7, 43 + XE_HW_ENGINE_VECS0, 44 + XE_HW_ENGINE_VECS1, 45 + XE_HW_ENGINE_VECS2, 46 + XE_HW_ENGINE_VECS3, 47 + XE_HW_ENGINE_CCS0, 48 + XE_HW_ENGINE_CCS1, 49 + XE_HW_ENGINE_CCS2, 50 + XE_HW_ENGINE_CCS3, 51 + XE_NUM_HW_ENGINES, 52 + }; 53 + 54 + /* FIXME: s/XE_HW_ENGINE_MAX_INSTANCE/XE_HW_ENGINE_MAX_COUNT */ 55 + #define XE_HW_ENGINE_MAX_INSTANCE 9 56 + 57 + struct xe_bo; 58 + struct xe_execlist_port; 59 + struct xe_gt; 60 + 61 + /** 62 + * struct xe_hw_engine - Hardware engine 63 + * 64 + * Contains all the hardware engine state for physical instances. 65 + */ 66 + struct xe_hw_engine { 67 + /** @gt: graphics tile this hw engine belongs to */ 68 + struct xe_gt *gt; 69 + /** @name: name of this hw engine */ 70 + const char *name; 71 + /** @class: class of this hw engine */ 72 + enum xe_engine_class class; 73 + /** @instance: physical instance of this hw engine */ 74 + u16 instance; 75 + /** @logical_instance: logical instance of this hw engine */ 76 + u16 logical_instance; 77 + /** @mmio_base: MMIO base address of this hw engine*/ 78 + u32 mmio_base; 79 + /** 80 + * @reg_sr: table with registers to be restored on GT init/resume/reset 81 + */ 82 + struct xe_reg_sr reg_sr; 83 + /** 84 + * @reg_whitelist: table with registers to be whitelisted 85 + */ 86 + struct xe_reg_sr reg_whitelist; 87 + /** 88 + * @reg_lrc: LRC workaround registers 89 + */ 90 + struct xe_reg_sr reg_lrc; 91 + /** @domain: force wake domain of this hw engine */ 92 + enum xe_force_wake_domains domain; 93 + /** @hwsp: hardware status page buffer object */ 94 + struct xe_bo *hwsp; 95 + /** @kernel_lrc: Kernel LRC (should be replaced /w an xe_engine) */ 96 + struct xe_lrc kernel_lrc; 97 + /** @exl_port: execlists port */ 98 + struct xe_execlist_port *exl_port; 99 + /** @fence_irq: fence IRQ to run when a hw engine IRQ is received */ 100 + struct xe_hw_fence_irq *fence_irq; 101 + /** @irq_handler: IRQ handler to run when hw engine IRQ is received */ 102 + void (*irq_handler)(struct xe_hw_engine *, u16); 103 + /** @engine_id: id for this hw engine */ 104 + enum xe_hw_engine_id engine_id; 105 + }; 106 + 107 + #endif

+230

drivers/gpu/drm/xe/xe_hw_fence.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2021 Intel Corporation 4 + */ 5 + 6 + #include "xe_hw_fence.h" 7 + 8 + #include <linux/device.h> 9 + #include <linux/slab.h> 10 + 11 + #include "xe_bo.h" 12 + #include "xe_device.h" 13 + #include "xe_gt.h" 14 + #include "xe_hw_engine.h" 15 + #include "xe_macros.h" 16 + #include "xe_map.h" 17 + #include "xe_trace.h" 18 + 19 + static struct kmem_cache *xe_hw_fence_slab; 20 + 21 + int __init xe_hw_fence_module_init(void) 22 + { 23 + xe_hw_fence_slab = kmem_cache_create("xe_hw_fence", 24 + sizeof(struct xe_hw_fence), 0, 25 + SLAB_HWCACHE_ALIGN, NULL); 26 + if (!xe_hw_fence_slab) 27 + return -ENOMEM; 28 + 29 + return 0; 30 + } 31 + 32 + void xe_hw_fence_module_exit(void) 33 + { 34 + rcu_barrier(); 35 + kmem_cache_destroy(xe_hw_fence_slab); 36 + } 37 + 38 + static struct xe_hw_fence *fence_alloc(void) 39 + { 40 + return kmem_cache_zalloc(xe_hw_fence_slab, GFP_KERNEL); 41 + } 42 + 43 + static void fence_free(struct rcu_head *rcu) 44 + { 45 + struct xe_hw_fence *fence = 46 + container_of(rcu, struct xe_hw_fence, dma.rcu); 47 + 48 + if (!WARN_ON_ONCE(!fence)) 49 + kmem_cache_free(xe_hw_fence_slab, fence); 50 + } 51 + 52 + static void hw_fence_irq_run_cb(struct irq_work *work) 53 + { 54 + struct xe_hw_fence_irq *irq = container_of(work, typeof(*irq), work); 55 + struct xe_hw_fence *fence, *next; 56 + bool tmp; 57 + 58 + tmp = dma_fence_begin_signalling(); 59 + spin_lock(&irq->lock); 60 + if (irq->enabled) { 61 + list_for_each_entry_safe(fence, next, &irq->pending, irq_link) { 62 + struct dma_fence *dma_fence = &fence->dma; 63 + 64 + trace_xe_hw_fence_try_signal(fence); 65 + if (dma_fence_is_signaled_locked(dma_fence)) { 66 + trace_xe_hw_fence_signal(fence); 67 + list_del_init(&fence->irq_link); 68 + dma_fence_put(dma_fence); 69 + } 70 + } 71 + } 72 + spin_unlock(&irq->lock); 73 + dma_fence_end_signalling(tmp); 74 + } 75 + 76 + void xe_hw_fence_irq_init(struct xe_hw_fence_irq *irq) 77 + { 78 + spin_lock_init(&irq->lock); 79 + init_irq_work(&irq->work, hw_fence_irq_run_cb); 80 + INIT_LIST_HEAD(&irq->pending); 81 + irq->enabled = true; 82 + } 83 + 84 + void xe_hw_fence_irq_finish(struct xe_hw_fence_irq *irq) 85 + { 86 + struct xe_hw_fence *fence, *next; 87 + unsigned long flags; 88 + int err; 89 + bool tmp; 90 + 91 + if (XE_WARN_ON(!list_empty(&irq->pending))) { 92 + tmp = dma_fence_begin_signalling(); 93 + spin_lock_irqsave(&irq->lock, flags); 94 + list_for_each_entry_safe(fence, next, &irq->pending, irq_link) { 95 + list_del_init(&fence->irq_link); 96 + err = dma_fence_signal_locked(&fence->dma); 97 + dma_fence_put(&fence->dma); 98 + XE_WARN_ON(err); 99 + } 100 + spin_unlock_irqrestore(&irq->lock, flags); 101 + dma_fence_end_signalling(tmp); 102 + } 103 + } 104 + 105 + void xe_hw_fence_irq_run(struct xe_hw_fence_irq *irq) 106 + { 107 + irq_work_queue(&irq->work); 108 + } 109 + 110 + void xe_hw_fence_irq_stop(struct xe_hw_fence_irq *irq) 111 + { 112 + spin_lock_irq(&irq->lock); 113 + irq->enabled = false; 114 + spin_unlock_irq(&irq->lock); 115 + } 116 + 117 + void xe_hw_fence_irq_start(struct xe_hw_fence_irq *irq) 118 + { 119 + spin_lock_irq(&irq->lock); 120 + irq->enabled = true; 121 + spin_unlock_irq(&irq->lock); 122 + 123 + irq_work_queue(&irq->work); 124 + } 125 + 126 + void xe_hw_fence_ctx_init(struct xe_hw_fence_ctx *ctx, struct xe_gt *gt, 127 + struct xe_hw_fence_irq *irq, const char *name) 128 + { 129 + ctx->gt = gt; 130 + ctx->irq = irq; 131 + ctx->dma_fence_ctx = dma_fence_context_alloc(1); 132 + ctx->next_seqno = 1; 133 + sprintf(ctx->name, "%s", name); 134 + } 135 + 136 + void xe_hw_fence_ctx_finish(struct xe_hw_fence_ctx *ctx) 137 + { 138 + } 139 + 140 + static struct xe_hw_fence *to_xe_hw_fence(struct dma_fence *fence); 141 + 142 + static struct xe_hw_fence_irq *xe_hw_fence_irq(struct xe_hw_fence *fence) 143 + { 144 + return container_of(fence->dma.lock, struct xe_hw_fence_irq, lock); 145 + } 146 + 147 + static const char *xe_hw_fence_get_driver_name(struct dma_fence *dma_fence) 148 + { 149 + struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); 150 + 151 + return dev_name(gt_to_xe(fence->ctx->gt)->drm.dev); 152 + } 153 + 154 + static const char *xe_hw_fence_get_timeline_name(struct dma_fence *dma_fence) 155 + { 156 + struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); 157 + 158 + return fence->ctx->name; 159 + } 160 + 161 + static bool xe_hw_fence_signaled(struct dma_fence *dma_fence) 162 + { 163 + struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); 164 + struct xe_device *xe = gt_to_xe(fence->ctx->gt); 165 + u32 seqno = xe_map_rd(xe, &fence->seqno_map, 0, u32); 166 + 167 + return dma_fence->error || 168 + (s32)fence->dma.seqno <= (s32)seqno; 169 + } 170 + 171 + static bool xe_hw_fence_enable_signaling(struct dma_fence *dma_fence) 172 + { 173 + struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); 174 + struct xe_hw_fence_irq *irq = xe_hw_fence_irq(fence); 175 + 176 + dma_fence_get(dma_fence); 177 + list_add_tail(&fence->irq_link, &irq->pending); 178 + 179 + /* SW completed (no HW IRQ) so kick handler to signal fence */ 180 + if (xe_hw_fence_signaled(dma_fence)) 181 + xe_hw_fence_irq_run(irq); 182 + 183 + return true; 184 + } 185 + 186 + static void xe_hw_fence_release(struct dma_fence *dma_fence) 187 + { 188 + struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); 189 + 190 + trace_xe_hw_fence_free(fence); 191 + XE_BUG_ON(!list_empty(&fence->irq_link)); 192 + call_rcu(&dma_fence->rcu, fence_free); 193 + } 194 + 195 + static const struct dma_fence_ops xe_hw_fence_ops = { 196 + .get_driver_name = xe_hw_fence_get_driver_name, 197 + .get_timeline_name = xe_hw_fence_get_timeline_name, 198 + .enable_signaling = xe_hw_fence_enable_signaling, 199 + .signaled = xe_hw_fence_signaled, 200 + .release = xe_hw_fence_release, 201 + }; 202 + 203 + static struct xe_hw_fence *to_xe_hw_fence(struct dma_fence *fence) 204 + { 205 + if (XE_WARN_ON(fence->ops != &xe_hw_fence_ops)) 206 + return NULL; 207 + 208 + return container_of(fence, struct xe_hw_fence, dma); 209 + } 210 + 211 + struct xe_hw_fence *xe_hw_fence_create(struct xe_hw_fence_ctx *ctx, 212 + struct iosys_map seqno_map) 213 + { 214 + struct xe_hw_fence *fence; 215 + 216 + fence = fence_alloc(); 217 + if (!fence) 218 + return ERR_PTR(-ENOMEM); 219 + 220 + dma_fence_init(&fence->dma, &xe_hw_fence_ops, &ctx->irq->lock, 221 + ctx->dma_fence_ctx, ctx->next_seqno++); 222 + 223 + fence->ctx = ctx; 224 + fence->seqno_map = seqno_map; 225 + INIT_LIST_HEAD(&fence->irq_link); 226 + 227 + trace_xe_hw_fence_create(fence); 228 + 229 + return fence; 230 + }

+27

drivers/gpu/drm/xe/xe_hw_fence.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2021 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_HW_FENCE_H_ 7 + #define _XE_HW_FENCE_H_ 8 + 9 + #include "xe_hw_fence_types.h" 10 + 11 + int xe_hw_fence_module_init(void); 12 + void xe_hw_fence_module_exit(void); 13 + 14 + void xe_hw_fence_irq_init(struct xe_hw_fence_irq *irq); 15 + void xe_hw_fence_irq_finish(struct xe_hw_fence_irq *irq); 16 + void xe_hw_fence_irq_run(struct xe_hw_fence_irq *irq); 17 + void xe_hw_fence_irq_stop(struct xe_hw_fence_irq *irq); 18 + void xe_hw_fence_irq_start(struct xe_hw_fence_irq *irq); 19 + 20 + void xe_hw_fence_ctx_init(struct xe_hw_fence_ctx *ctx, struct xe_gt *gt, 21 + struct xe_hw_fence_irq *irq, const char *name); 22 + void xe_hw_fence_ctx_finish(struct xe_hw_fence_ctx *ctx); 23 + 24 + struct xe_hw_fence *xe_hw_fence_create(struct xe_hw_fence_ctx *ctx, 25 + struct iosys_map seqno_map); 26 + 27 + #endif

+72

drivers/gpu/drm/xe/xe_hw_fence_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_HW_FENCE_TYPES_H_ 7 + #define _XE_HW_FENCE_TYPES_H_ 8 + 9 + #include <linux/iosys-map.h> 10 + #include <linux/dma-fence.h> 11 + #include <linux/irq_work.h> 12 + #include <linux/list.h> 13 + #include <linux/spinlock.h> 14 + 15 + struct xe_gt; 16 + 17 + /** 18 + * struct xe_hw_fence_irq - hardware fence IRQ handler 19 + * 20 + * One per engine class, signals completed xe_hw_fences, triggered via hw engine 21 + * interrupt. On each trigger, search list of pending fences and signal. 22 + */ 23 + struct xe_hw_fence_irq { 24 + /** @lock: protects all xe_hw_fences + pending list */ 25 + spinlock_t lock; 26 + /** @work: IRQ worker run to signal the fences */ 27 + struct irq_work work; 28 + /** @pending: list of pending xe_hw_fences */ 29 + struct list_head pending; 30 + /** @enabled: fence signaling enabled */ 31 + bool enabled; 32 + }; 33 + 34 + #define MAX_FENCE_NAME_LEN 16 35 + 36 + /** 37 + * struct xe_hw_fence_ctx - hardware fence context 38 + * 39 + * The context for a hardware fence. 1 to 1 relationship with xe_engine. Points 40 + * to a xe_hw_fence_irq, maintains serial seqno. 41 + */ 42 + struct xe_hw_fence_ctx { 43 + /** @gt: graphics tile of hardware fence context */ 44 + struct xe_gt *gt; 45 + /** @irq: fence irq handler */ 46 + struct xe_hw_fence_irq *irq; 47 + /** @dma_fence_ctx: dma fence context for hardware fence */ 48 + u64 dma_fence_ctx; 49 + /** @next_seqno: next seqno for hardware fence */ 50 + u32 next_seqno; 51 + /** @name: name of hardware fence context */ 52 + char name[MAX_FENCE_NAME_LEN]; 53 + }; 54 + 55 + /** 56 + * struct xe_hw_fence - hardware fence 57 + * 58 + * Used to indicate a xe_sched_job is complete via a seqno written to memory. 59 + * Signals on error or seqno past. 60 + */ 61 + struct xe_hw_fence { 62 + /** @dma: base dma fence for hardware fence context */ 63 + struct dma_fence dma; 64 + /** @ctx: hardware fence context */ 65 + struct xe_hw_fence_ctx *ctx; 66 + /** @seqno_map: I/O map for seqno */ 67 + struct iosys_map seqno_map; 68 + /** @irq_link: Link in struct xe_hw_fence_irq.pending */ 69 + struct list_head irq_link; 70 + }; 71 + 72 + #endif

+565

drivers/gpu/drm/xe/xe_irq.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2021 Intel Corporation 4 + */ 5 + 6 + #include <linux/sched/clock.h> 7 + 8 + #include <drm/drm_managed.h> 9 + 10 + #include "xe_device.h" 11 + #include "xe_drv.h" 12 + #include "xe_guc.h" 13 + #include "xe_gt.h" 14 + #include "xe_hw_engine.h" 15 + #include "xe_mmio.h" 16 + 17 + #include "i915_reg.h" 18 + #include "gt/intel_gt_regs.h" 19 + 20 + static void gen3_assert_iir_is_zero(struct xe_gt *gt, i915_reg_t reg) 21 + { 22 + u32 val = xe_mmio_read32(gt, reg.reg); 23 + 24 + if (val == 0) 25 + return; 26 + 27 + drm_WARN(&gt_to_xe(gt)->drm, 1, 28 + "Interrupt register 0x%x is not zero: 0x%08x\n", 29 + reg.reg, val); 30 + xe_mmio_write32(gt, reg.reg, 0xffffffff); 31 + xe_mmio_read32(gt, reg.reg); 32 + xe_mmio_write32(gt, reg.reg, 0xffffffff); 33 + xe_mmio_read32(gt, reg.reg); 34 + } 35 + 36 + static void gen3_irq_init(struct xe_gt *gt, 37 + i915_reg_t imr, u32 imr_val, 38 + i915_reg_t ier, u32 ier_val, 39 + i915_reg_t iir) 40 + { 41 + gen3_assert_iir_is_zero(gt, iir); 42 + 43 + xe_mmio_write32(gt, ier.reg, ier_val); 44 + xe_mmio_write32(gt, imr.reg, imr_val); 45 + xe_mmio_read32(gt, imr.reg); 46 + } 47 + #define GEN3_IRQ_INIT(gt, type, imr_val, ier_val) \ 48 + gen3_irq_init((gt), \ 49 + type##IMR, imr_val, \ 50 + type##IER, ier_val, \ 51 + type##IIR) 52 + 53 + static void gen3_irq_reset(struct xe_gt *gt, i915_reg_t imr, i915_reg_t iir, 54 + i915_reg_t ier) 55 + { 56 + xe_mmio_write32(gt, imr.reg, 0xffffffff); 57 + xe_mmio_read32(gt, imr.reg); 58 + 59 + xe_mmio_write32(gt, ier.reg, 0); 60 + 61 + /* IIR can theoretically queue up two events. Be paranoid. */ 62 + xe_mmio_write32(gt, iir.reg, 0xffffffff); 63 + xe_mmio_read32(gt, iir.reg); 64 + xe_mmio_write32(gt, iir.reg, 0xffffffff); 65 + xe_mmio_read32(gt, iir.reg); 66 + } 67 + #define GEN3_IRQ_RESET(gt, type) \ 68 + gen3_irq_reset((gt), type##IMR, type##IIR, type##IER) 69 + 70 + static u32 gen11_intr_disable(struct xe_gt *gt) 71 + { 72 + xe_mmio_write32(gt, GEN11_GFX_MSTR_IRQ.reg, 0); 73 + 74 + /* 75 + * Now with master disabled, get a sample of level indications 76 + * for this interrupt. Indications will be cleared on related acks. 77 + * New indications can and will light up during processing, 78 + * and will generate new interrupt after enabling master. 79 + */ 80 + return xe_mmio_read32(gt, GEN11_GFX_MSTR_IRQ.reg); 81 + } 82 + 83 + static u32 84 + gen11_gu_misc_irq_ack(struct xe_gt *gt, const u32 master_ctl) 85 + { 86 + u32 iir; 87 + 88 + if (!(master_ctl & GEN11_GU_MISC_IRQ)) 89 + return 0; 90 + 91 + iir = xe_mmio_read32(gt, GEN11_GU_MISC_IIR.reg); 92 + if (likely(iir)) 93 + xe_mmio_write32(gt, GEN11_GU_MISC_IIR.reg, iir); 94 + 95 + return iir; 96 + } 97 + 98 + static inline void gen11_intr_enable(struct xe_gt *gt, bool stall) 99 + { 100 + xe_mmio_write32(gt, GEN11_GFX_MSTR_IRQ.reg, GEN11_MASTER_IRQ); 101 + if (stall) 102 + xe_mmio_read32(gt, GEN11_GFX_MSTR_IRQ.reg); 103 + } 104 + 105 + static void gen11_gt_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) 106 + { 107 + u32 irqs, dmask, smask; 108 + u32 ccs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COMPUTE); 109 + u32 bcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COPY); 110 + 111 + if (xe_device_guc_submission_enabled(xe)) { 112 + irqs = GT_RENDER_USER_INTERRUPT | 113 + GT_RENDER_PIPECTL_NOTIFY_INTERRUPT; 114 + } else { 115 + irqs = GT_RENDER_USER_INTERRUPT | 116 + GT_CS_MASTER_ERROR_INTERRUPT | 117 + GT_CONTEXT_SWITCH_INTERRUPT | 118 + GT_WAIT_SEMAPHORE_INTERRUPT; 119 + } 120 + 121 + dmask = irqs << 16 | irqs; 122 + smask = irqs << 16; 123 + 124 + /* Enable RCS, BCS, VCS and VECS class interrupts. */ 125 + xe_mmio_write32(gt, GEN11_RENDER_COPY_INTR_ENABLE.reg, dmask); 126 + xe_mmio_write32(gt, GEN11_VCS_VECS_INTR_ENABLE.reg, dmask); 127 + if (ccs_mask) 128 + xe_mmio_write32(gt, GEN12_CCS_RSVD_INTR_ENABLE.reg, smask); 129 + 130 + /* Unmask irqs on RCS, BCS, VCS and VECS engines. */ 131 + xe_mmio_write32(gt, GEN11_RCS0_RSVD_INTR_MASK.reg, ~smask); 132 + xe_mmio_write32(gt, GEN11_BCS_RSVD_INTR_MASK.reg, ~smask); 133 + if (bcs_mask & (BIT(1)|BIT(2))) 134 + xe_mmio_write32(gt, XEHPC_BCS1_BCS2_INTR_MASK.reg, ~dmask); 135 + if (bcs_mask & (BIT(3)|BIT(4))) 136 + xe_mmio_write32(gt, XEHPC_BCS3_BCS4_INTR_MASK.reg, ~dmask); 137 + if (bcs_mask & (BIT(5)|BIT(6))) 138 + xe_mmio_write32(gt, XEHPC_BCS5_BCS6_INTR_MASK.reg, ~dmask); 139 + if (bcs_mask & (BIT(7)|BIT(8))) 140 + xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK.reg, ~dmask); 141 + xe_mmio_write32(gt, GEN11_VCS0_VCS1_INTR_MASK.reg, ~dmask); 142 + xe_mmio_write32(gt, GEN11_VCS2_VCS3_INTR_MASK.reg, ~dmask); 143 + //if (HAS_ENGINE(gt, VCS4) || HAS_ENGINE(gt, VCS5)) 144 + // intel_uncore_write(uncore, GEN12_VCS4_VCS5_INTR_MASK, ~dmask); 145 + //if (HAS_ENGINE(gt, VCS6) || HAS_ENGINE(gt, VCS7)) 146 + // intel_uncore_write(uncore, GEN12_VCS6_VCS7_INTR_MASK, ~dmask); 147 + xe_mmio_write32(gt, GEN11_VECS0_VECS1_INTR_MASK.reg, ~dmask); 148 + //if (HAS_ENGINE(gt, VECS2) || HAS_ENGINE(gt, VECS3)) 149 + // intel_uncore_write(uncore, GEN12_VECS2_VECS3_INTR_MASK, ~dmask); 150 + if (ccs_mask & (BIT(0)|BIT(1))) 151 + xe_mmio_write32(gt, GEN12_CCS0_CCS1_INTR_MASK.reg, ~dmask); 152 + if (ccs_mask & (BIT(2)|BIT(3))) 153 + xe_mmio_write32(gt, GEN12_CCS2_CCS3_INTR_MASK.reg, ~dmask); 154 + 155 + /* 156 + * RPS interrupts will get enabled/disabled on demand when RPS itself 157 + * is enabled/disabled. 158 + */ 159 + /* TODO: gt->pm_ier, gt->pm_imr */ 160 + xe_mmio_write32(gt, GEN11_GPM_WGBOXPERF_INTR_ENABLE.reg, 0); 161 + xe_mmio_write32(gt, GEN11_GPM_WGBOXPERF_INTR_MASK.reg, ~0); 162 + 163 + /* Same thing for GuC interrupts */ 164 + xe_mmio_write32(gt, GEN11_GUC_SG_INTR_ENABLE.reg, 0); 165 + xe_mmio_write32(gt, GEN11_GUC_SG_INTR_MASK.reg, ~0); 166 + } 167 + 168 + static void gen11_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) 169 + { 170 + /* TODO: PCH */ 171 + 172 + gen11_gt_irq_postinstall(xe, gt); 173 + 174 + GEN3_IRQ_INIT(gt, GEN11_GU_MISC_, ~GEN11_GU_MISC_GSE, 175 + GEN11_GU_MISC_GSE); 176 + 177 + gen11_intr_enable(gt, true); 178 + } 179 + 180 + static u32 181 + gen11_gt_engine_identity(struct xe_device *xe, 182 + struct xe_gt *gt, 183 + const unsigned int bank, 184 + const unsigned int bit) 185 + { 186 + u32 timeout_ts; 187 + u32 ident; 188 + 189 + lockdep_assert_held(&xe->irq.lock); 190 + 191 + xe_mmio_write32(gt, GEN11_IIR_REG_SELECTOR(bank).reg, BIT(bit)); 192 + 193 + /* 194 + * NB: Specs do not specify how long to spin wait, 195 + * so we do ~100us as an educated guess. 196 + */ 197 + timeout_ts = (local_clock() >> 10) + 100; 198 + do { 199 + ident = xe_mmio_read32(gt, GEN11_INTR_IDENTITY_REG(bank).reg); 200 + } while (!(ident & GEN11_INTR_DATA_VALID) && 201 + !time_after32(local_clock() >> 10, timeout_ts)); 202 + 203 + if (unlikely(!(ident & GEN11_INTR_DATA_VALID))) { 204 + drm_err(&xe->drm, "INTR_IDENTITY_REG%u:%u 0x%08x not valid!\n", 205 + bank, bit, ident); 206 + return 0; 207 + } 208 + 209 + xe_mmio_write32(gt, GEN11_INTR_IDENTITY_REG(bank).reg, 210 + GEN11_INTR_DATA_VALID); 211 + 212 + return ident; 213 + } 214 + 215 + #define OTHER_MEDIA_GUC_INSTANCE 16 216 + 217 + static void 218 + gen11_gt_other_irq_handler(struct xe_gt *gt, const u8 instance, const u16 iir) 219 + { 220 + if (instance == OTHER_GUC_INSTANCE && !xe_gt_is_media_type(gt)) 221 + return xe_guc_irq_handler(&gt->uc.guc, iir); 222 + if (instance == OTHER_MEDIA_GUC_INSTANCE && xe_gt_is_media_type(gt)) 223 + return xe_guc_irq_handler(&gt->uc.guc, iir); 224 + 225 + if (instance != OTHER_GUC_INSTANCE && 226 + instance != OTHER_MEDIA_GUC_INSTANCE) { 227 + WARN_ONCE(1, "unhandled other interrupt instance=0x%x, iir=0x%x\n", 228 + instance, iir); 229 + } 230 + } 231 + 232 + static void gen11_gt_irq_handler(struct xe_device *xe, struct xe_gt *gt, 233 + u32 master_ctl, long unsigned int *intr_dw, 234 + u32 *identity) 235 + { 236 + unsigned int bank, bit; 237 + u16 instance, intr_vec; 238 + enum xe_engine_class class; 239 + struct xe_hw_engine *hwe; 240 + 241 + spin_lock(&xe->irq.lock); 242 + 243 + for (bank = 0; bank < 2; bank++) { 244 + if (!(master_ctl & GEN11_GT_DW_IRQ(bank))) 245 + continue; 246 + 247 + if (!xe_gt_is_media_type(gt)) { 248 + intr_dw[bank] = 249 + xe_mmio_read32(gt, GEN11_GT_INTR_DW(bank).reg); 250 + for_each_set_bit(bit, intr_dw + bank, 32) 251 + identity[bit] = gen11_gt_engine_identity(xe, gt, 252 + bank, 253 + bit); 254 + xe_mmio_write32(gt, GEN11_GT_INTR_DW(bank).reg, 255 + intr_dw[bank]); 256 + } 257 + 258 + for_each_set_bit(bit, intr_dw + bank, 32) { 259 + class = GEN11_INTR_ENGINE_CLASS(identity[bit]); 260 + instance = GEN11_INTR_ENGINE_INSTANCE(identity[bit]); 261 + intr_vec = GEN11_INTR_ENGINE_INTR(identity[bit]); 262 + 263 + if (class == XE_ENGINE_CLASS_OTHER) { 264 + gen11_gt_other_irq_handler(gt, instance, 265 + intr_vec); 266 + continue; 267 + } 268 + 269 + hwe = xe_gt_hw_engine(gt, class, instance, false); 270 + if (!hwe) 271 + continue; 272 + 273 + xe_hw_engine_handle_irq(hwe, intr_vec); 274 + } 275 + } 276 + 277 + spin_unlock(&xe->irq.lock); 278 + } 279 + 280 + static irqreturn_t gen11_irq_handler(int irq, void *arg) 281 + { 282 + struct xe_device *xe = arg; 283 + struct xe_gt *gt = xe_device_get_gt(xe, 0); /* Only 1 GT here */ 284 + u32 master_ctl, gu_misc_iir; 285 + long unsigned int intr_dw[2]; 286 + u32 identity[32]; 287 + 288 + master_ctl = gen11_intr_disable(gt); 289 + if (!master_ctl) { 290 + gen11_intr_enable(gt, false); 291 + return IRQ_NONE; 292 + } 293 + 294 + gen11_gt_irq_handler(xe, gt, master_ctl, intr_dw, identity); 295 + 296 + gu_misc_iir = gen11_gu_misc_irq_ack(gt, master_ctl); 297 + 298 + gen11_intr_enable(gt, false); 299 + 300 + return IRQ_HANDLED; 301 + } 302 + 303 + static u32 dg1_intr_disable(struct xe_device *xe) 304 + { 305 + struct xe_gt *gt = xe_device_get_gt(xe, 0); 306 + u32 val; 307 + 308 + /* First disable interrupts */ 309 + xe_mmio_write32(gt, DG1_MSTR_TILE_INTR.reg, 0); 310 + 311 + /* Get the indication levels and ack the master unit */ 312 + val = xe_mmio_read32(gt, DG1_MSTR_TILE_INTR.reg); 313 + if (unlikely(!val)) 314 + return 0; 315 + 316 + xe_mmio_write32(gt, DG1_MSTR_TILE_INTR.reg, val); 317 + 318 + return val; 319 + } 320 + 321 + static void dg1_intr_enable(struct xe_device *xe, bool stall) 322 + { 323 + struct xe_gt *gt = xe_device_get_gt(xe, 0); 324 + 325 + xe_mmio_write32(gt, DG1_MSTR_TILE_INTR.reg, DG1_MSTR_IRQ); 326 + if (stall) 327 + xe_mmio_read32(gt, DG1_MSTR_TILE_INTR.reg); 328 + } 329 + 330 + static void dg1_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) 331 + { 332 + gen11_gt_irq_postinstall(xe, gt); 333 + 334 + GEN3_IRQ_INIT(gt, GEN11_GU_MISC_, ~GEN11_GU_MISC_GSE, 335 + GEN11_GU_MISC_GSE); 336 + 337 + if (gt->info.id + 1 == xe->info.tile_count) 338 + dg1_intr_enable(xe, true); 339 + } 340 + 341 + static irqreturn_t dg1_irq_handler(int irq, void *arg) 342 + { 343 + struct xe_device *xe = arg; 344 + struct xe_gt *gt; 345 + u32 master_tile_ctl, master_ctl = 0, gu_misc_iir; 346 + long unsigned int intr_dw[2]; 347 + u32 identity[32]; 348 + u8 id; 349 + 350 + /* TODO: This really shouldn't be copied+pasted */ 351 + 352 + master_tile_ctl = dg1_intr_disable(xe); 353 + if (!master_tile_ctl) { 354 + dg1_intr_enable(xe, false); 355 + return IRQ_NONE; 356 + } 357 + 358 + for_each_gt(gt, xe, id) { 359 + if ((master_tile_ctl & DG1_MSTR_TILE(gt->info.vram_id)) == 0) 360 + continue; 361 + 362 + if (!xe_gt_is_media_type(gt)) 363 + master_ctl = xe_mmio_read32(gt, GEN11_GFX_MSTR_IRQ.reg); 364 + 365 + /* 366 + * We might be in irq handler just when PCIe DPC is initiated 367 + * and all MMIO reads will be returned with all 1's. Ignore this 368 + * irq as device is inaccessible. 369 + */ 370 + if (master_ctl == REG_GENMASK(31, 0)) { 371 + dev_dbg(gt_to_xe(gt)->drm.dev, 372 + "Ignore this IRQ as device might be in DPC containment.\n"); 373 + return IRQ_HANDLED; 374 + } 375 + 376 + if (!xe_gt_is_media_type(gt)) 377 + xe_mmio_write32(gt, GEN11_GFX_MSTR_IRQ.reg, master_ctl); 378 + gen11_gt_irq_handler(xe, gt, master_ctl, intr_dw, identity); 379 + } 380 + 381 + gu_misc_iir = gen11_gu_misc_irq_ack(gt, master_ctl); 382 + 383 + dg1_intr_enable(xe, false); 384 + 385 + return IRQ_HANDLED; 386 + } 387 + 388 + static void gen11_gt_irq_reset(struct xe_gt *gt) 389 + { 390 + u32 ccs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COMPUTE); 391 + u32 bcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COPY); 392 + 393 + /* Disable RCS, BCS, VCS and VECS class engines. */ 394 + xe_mmio_write32(gt, GEN11_RENDER_COPY_INTR_ENABLE.reg, 0); 395 + xe_mmio_write32(gt, GEN11_VCS_VECS_INTR_ENABLE.reg, 0); 396 + if (ccs_mask) 397 + xe_mmio_write32(gt, GEN12_CCS_RSVD_INTR_ENABLE.reg, 0); 398 + 399 + /* Restore masks irqs on RCS, BCS, VCS and VECS engines. */ 400 + xe_mmio_write32(gt, GEN11_RCS0_RSVD_INTR_MASK.reg, ~0); 401 + xe_mmio_write32(gt, GEN11_BCS_RSVD_INTR_MASK.reg, ~0); 402 + if (bcs_mask & (BIT(1)|BIT(2))) 403 + xe_mmio_write32(gt, XEHPC_BCS1_BCS2_INTR_MASK.reg, ~0); 404 + if (bcs_mask & (BIT(3)|BIT(4))) 405 + xe_mmio_write32(gt, XEHPC_BCS3_BCS4_INTR_MASK.reg, ~0); 406 + if (bcs_mask & (BIT(5)|BIT(6))) 407 + xe_mmio_write32(gt, XEHPC_BCS5_BCS6_INTR_MASK.reg, ~0); 408 + if (bcs_mask & (BIT(7)|BIT(8))) 409 + xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK.reg, ~0); 410 + xe_mmio_write32(gt, GEN11_VCS0_VCS1_INTR_MASK.reg, ~0); 411 + xe_mmio_write32(gt, GEN11_VCS2_VCS3_INTR_MASK.reg, ~0); 412 + // if (HAS_ENGINE(gt, VCS4) || HAS_ENGINE(gt, VCS5)) 413 + // xe_mmio_write32(xe, GEN12_VCS4_VCS5_INTR_MASK.reg, ~0); 414 + // if (HAS_ENGINE(gt, VCS6) || HAS_ENGINE(gt, VCS7)) 415 + // xe_mmio_write32(xe, GEN12_VCS6_VCS7_INTR_MASK.reg, ~0); 416 + xe_mmio_write32(gt, GEN11_VECS0_VECS1_INTR_MASK.reg, ~0); 417 + // if (HAS_ENGINE(gt, VECS2) || HAS_ENGINE(gt, VECS3)) 418 + // xe_mmio_write32(xe, GEN12_VECS2_VECS3_INTR_MASK.reg, ~0); 419 + if (ccs_mask & (BIT(0)|BIT(1))) 420 + xe_mmio_write32(gt, GEN12_CCS0_CCS1_INTR_MASK.reg, ~0); 421 + if (ccs_mask & (BIT(2)|BIT(3))) 422 + xe_mmio_write32(gt, GEN12_CCS2_CCS3_INTR_MASK.reg, ~0); 423 + 424 + xe_mmio_write32(gt, GEN11_GPM_WGBOXPERF_INTR_ENABLE.reg, 0); 425 + xe_mmio_write32(gt, GEN11_GPM_WGBOXPERF_INTR_MASK.reg, ~0); 426 + xe_mmio_write32(gt, GEN11_GUC_SG_INTR_ENABLE.reg, 0); 427 + xe_mmio_write32(gt, GEN11_GUC_SG_INTR_MASK.reg, ~0); 428 + } 429 + 430 + static void gen11_irq_reset(struct xe_gt *gt) 431 + { 432 + gen11_intr_disable(gt); 433 + 434 + gen11_gt_irq_reset(gt); 435 + 436 + GEN3_IRQ_RESET(gt, GEN11_GU_MISC_); 437 + GEN3_IRQ_RESET(gt, GEN8_PCU_); 438 + } 439 + 440 + static void dg1_irq_reset(struct xe_gt *gt) 441 + { 442 + if (gt->info.id == 0) 443 + dg1_intr_disable(gt_to_xe(gt)); 444 + 445 + gen11_gt_irq_reset(gt); 446 + 447 + GEN3_IRQ_RESET(gt, GEN11_GU_MISC_); 448 + GEN3_IRQ_RESET(gt, GEN8_PCU_); 449 + } 450 + 451 + void xe_irq_reset(struct xe_device *xe) 452 + { 453 + struct xe_gt *gt; 454 + u8 id; 455 + 456 + for_each_gt(gt, xe, id) { 457 + if (GRAPHICS_VERx100(xe) >= 1210) { 458 + dg1_irq_reset(gt); 459 + } else if (GRAPHICS_VER(xe) >= 11) { 460 + gen11_irq_reset(gt); 461 + } else { 462 + drm_err(&xe->drm, "No interrupt reset hook"); 463 + } 464 + } 465 + } 466 + 467 + void xe_gt_irq_postinstall(struct xe_gt *gt) 468 + { 469 + struct xe_device *xe = gt_to_xe(gt); 470 + 471 + if (GRAPHICS_VERx100(xe) >= 1210) 472 + dg1_irq_postinstall(xe, gt); 473 + else if (GRAPHICS_VER(xe) >= 11) 474 + gen11_irq_postinstall(xe, gt); 475 + else 476 + drm_err(&xe->drm, "No interrupt postinstall hook"); 477 + } 478 + 479 + static void xe_irq_postinstall(struct xe_device *xe) 480 + { 481 + struct xe_gt *gt; 482 + u8 id; 483 + 484 + for_each_gt(gt, xe, id) 485 + xe_gt_irq_postinstall(gt); 486 + } 487 + 488 + static irq_handler_t xe_irq_handler(struct xe_device *xe) 489 + { 490 + if (GRAPHICS_VERx100(xe) >= 1210) { 491 + return dg1_irq_handler; 492 + } else if (GRAPHICS_VER(xe) >= 11) { 493 + return gen11_irq_handler; 494 + } else { 495 + return NULL; 496 + } 497 + } 498 + 499 + static void irq_uninstall(struct drm_device *drm, void *arg) 500 + { 501 + struct xe_device *xe = arg; 502 + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 503 + int irq = pdev->irq; 504 + 505 + if (!xe->irq.enabled) 506 + return; 507 + 508 + xe->irq.enabled = false; 509 + xe_irq_reset(xe); 510 + free_irq(irq, xe); 511 + if (pdev->msi_enabled) 512 + pci_disable_msi(pdev); 513 + } 514 + 515 + int xe_irq_install(struct xe_device *xe) 516 + { 517 + int irq = to_pci_dev(xe->drm.dev)->irq; 518 + static irq_handler_t irq_handler; 519 + int err; 520 + 521 + irq_handler = xe_irq_handler(xe); 522 + if (!irq_handler) { 523 + drm_err(&xe->drm, "No supported interrupt handler"); 524 + return -EINVAL; 525 + } 526 + 527 + xe->irq.enabled = true; 528 + 529 + xe_irq_reset(xe); 530 + 531 + err = request_irq(irq, irq_handler, 532 + IRQF_SHARED, DRIVER_NAME, xe); 533 + if (err < 0) { 534 + xe->irq.enabled = false; 535 + return err; 536 + } 537 + 538 + err = drmm_add_action_or_reset(&xe->drm, irq_uninstall, xe); 539 + if (err) 540 + return err; 541 + 542 + return err; 543 + } 544 + 545 + void xe_irq_shutdown(struct xe_device *xe) 546 + { 547 + irq_uninstall(&xe->drm, xe); 548 + } 549 + 550 + void xe_irq_suspend(struct xe_device *xe) 551 + { 552 + spin_lock_irq(&xe->irq.lock); 553 + xe->irq.enabled = false; 554 + xe_irq_reset(xe); 555 + spin_unlock_irq(&xe->irq.lock); 556 + } 557 + 558 + void xe_irq_resume(struct xe_device *xe) 559 + { 560 + spin_lock_irq(&xe->irq.lock); 561 + xe->irq.enabled = true; 562 + xe_irq_reset(xe); 563 + xe_irq_postinstall(xe); 564 + spin_unlock_irq(&xe->irq.lock); 565 + }

+18

drivers/gpu/drm/xe/xe_irq.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_IRQ_H_ 7 + #define _XE_IRQ_H_ 8 + 9 + struct xe_device; 10 + struct xe_gt; 11 + 12 + int xe_irq_install(struct xe_device *xe); 13 + void xe_gt_irq_postinstall(struct xe_gt *gt); 14 + void xe_irq_shutdown(struct xe_device *xe); 15 + void xe_irq_suspend(struct xe_device *xe); 16 + void xe_irq_resume(struct xe_device *xe); 17 + 18 + #endif

+841

drivers/gpu/drm/xe/xe_lrc.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2021 Intel Corporation 4 + */ 5 + 6 + #include "xe_lrc.h" 7 + 8 + #include "xe_bo.h" 9 + #include "xe_device.h" 10 + #include "xe_engine_types.h" 11 + #include "xe_gt.h" 12 + #include "xe_map.h" 13 + #include "xe_hw_fence.h" 14 + #include "xe_vm.h" 15 + 16 + #include "i915_reg.h" 17 + #include "gt/intel_gpu_commands.h" 18 + #include "gt/intel_gt_regs.h" 19 + #include "gt/intel_lrc_reg.h" 20 + #include "gt/intel_engine_regs.h" 21 + 22 + #define GEN8_CTX_VALID (1 << 0) 23 + #define GEN8_CTX_L3LLC_COHERENT (1 << 5) 24 + #define GEN8_CTX_PRIVILEGE (1 << 8) 25 + #define GEN8_CTX_ADDRESSING_MODE_SHIFT 3 26 + #define INTEL_LEGACY_64B_CONTEXT 3 27 + 28 + #define GEN11_ENGINE_CLASS_SHIFT 61 29 + #define GEN11_ENGINE_INSTANCE_SHIFT 48 30 + 31 + static struct xe_device * 32 + lrc_to_xe(struct xe_lrc *lrc) 33 + { 34 + return gt_to_xe(lrc->fence_ctx.gt); 35 + } 36 + 37 + size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class) 38 + { 39 + switch (class) { 40 + case XE_ENGINE_CLASS_RENDER: 41 + case XE_ENGINE_CLASS_COMPUTE: 42 + /* 14 pages since graphics_ver == 11 */ 43 + return 14 * SZ_4K; 44 + default: 45 + WARN(1, "Unknown engine class: %d", class); 46 + fallthrough; 47 + case XE_ENGINE_CLASS_COPY: 48 + case XE_ENGINE_CLASS_VIDEO_DECODE: 49 + case XE_ENGINE_CLASS_VIDEO_ENHANCE: 50 + return 2 * SZ_4K; 51 + } 52 + } 53 + 54 + /* 55 + * The per-platform tables are u8-encoded in @data. Decode @data and set the 56 + * addresses' offset and commands in @regs. The following encoding is used 57 + * for each byte. There are 2 steps: decoding commands and decoding addresses. 58 + * 59 + * Commands: 60 + * [7]: create NOPs - number of NOPs are set in lower bits 61 + * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set 62 + * MI_LRI_FORCE_POSTED 63 + * [5:0]: Number of NOPs or registers to set values to in case of 64 + * MI_LOAD_REGISTER_IMM 65 + * 66 + * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count" 67 + * number of registers. They are set by using the REG/REG16 macros: the former 68 + * is used for offsets smaller than 0x200 while the latter is for values bigger 69 + * than that. Those macros already set all the bits documented below correctly: 70 + * 71 + * [7]: When a register offset needs more than 6 bits, use additional bytes, to 72 + * follow, for the lower bits 73 + * [6:0]: Register offset, without considering the engine base. 74 + * 75 + * This function only tweaks the commands and register offsets. Values are not 76 + * filled out. 77 + */ 78 + static void set_offsets(u32 *regs, 79 + const u8 *data, 80 + const struct xe_hw_engine *hwe) 81 + #define NOP(x) (BIT(7) | (x)) 82 + #define LRI(count, flags) ((flags) << 6 | (count) | \ 83 + BUILD_BUG_ON_ZERO(count >= BIT(6))) 84 + #define POSTED BIT(0) 85 + #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) 86 + #define REG16(x) \ 87 + (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ 88 + (((x) >> 2) & 0x7f) 89 + #define END 0 90 + { 91 + const u32 base = hwe->mmio_base; 92 + 93 + while (*data) { 94 + u8 count, flags; 95 + 96 + if (*data & BIT(7)) { /* skip */ 97 + count = *data++ & ~BIT(7); 98 + regs += count; 99 + continue; 100 + } 101 + 102 + count = *data & 0x3f; 103 + flags = *data >> 6; 104 + data++; 105 + 106 + *regs = MI_LOAD_REGISTER_IMM(count); 107 + if (flags & POSTED) 108 + *regs |= MI_LRI_FORCE_POSTED; 109 + *regs |= MI_LRI_LRM_CS_MMIO; 110 + regs++; 111 + 112 + XE_BUG_ON(!count); 113 + do { 114 + u32 offset = 0; 115 + u8 v; 116 + 117 + do { 118 + v = *data++; 119 + offset <<= 7; 120 + offset |= v & ~BIT(7); 121 + } while (v & BIT(7)); 122 + 123 + regs[0] = base + (offset << 2); 124 + regs += 2; 125 + } while (--count); 126 + } 127 + 128 + *regs = MI_BATCH_BUFFER_END | BIT(0); 129 + } 130 + 131 + static const u8 gen12_xcs_offsets[] = { 132 + NOP(1), 133 + LRI(13, POSTED), 134 + REG16(0x244), 135 + REG(0x034), 136 + REG(0x030), 137 + REG(0x038), 138 + REG(0x03c), 139 + REG(0x168), 140 + REG(0x140), 141 + REG(0x110), 142 + REG(0x1c0), 143 + REG(0x1c4), 144 + REG(0x1c8), 145 + REG(0x180), 146 + REG16(0x2b4), 147 + 148 + NOP(5), 149 + LRI(9, POSTED), 150 + REG16(0x3a8), 151 + REG16(0x28c), 152 + REG16(0x288), 153 + REG16(0x284), 154 + REG16(0x280), 155 + REG16(0x27c), 156 + REG16(0x278), 157 + REG16(0x274), 158 + REG16(0x270), 159 + 160 + END 161 + }; 162 + 163 + static const u8 dg2_xcs_offsets[] = { 164 + NOP(1), 165 + LRI(15, POSTED), 166 + REG16(0x244), 167 + REG(0x034), 168 + REG(0x030), 169 + REG(0x038), 170 + REG(0x03c), 171 + REG(0x168), 172 + REG(0x140), 173 + REG(0x110), 174 + REG(0x1c0), 175 + REG(0x1c4), 176 + REG(0x1c8), 177 + REG(0x180), 178 + REG16(0x2b4), 179 + REG(0x120), 180 + REG(0x124), 181 + 182 + NOP(1), 183 + LRI(9, POSTED), 184 + REG16(0x3a8), 185 + REG16(0x28c), 186 + REG16(0x288), 187 + REG16(0x284), 188 + REG16(0x280), 189 + REG16(0x27c), 190 + REG16(0x278), 191 + REG16(0x274), 192 + REG16(0x270), 193 + 194 + END 195 + }; 196 + 197 + static const u8 gen12_rcs_offsets[] = { 198 + NOP(1), 199 + LRI(13, POSTED), 200 + REG16(0x244), 201 + REG(0x034), 202 + REG(0x030), 203 + REG(0x038), 204 + REG(0x03c), 205 + REG(0x168), 206 + REG(0x140), 207 + REG(0x110), 208 + REG(0x1c0), 209 + REG(0x1c4), 210 + REG(0x1c8), 211 + REG(0x180), 212 + REG16(0x2b4), 213 + 214 + NOP(5), 215 + LRI(9, POSTED), 216 + REG16(0x3a8), 217 + REG16(0x28c), 218 + REG16(0x288), 219 + REG16(0x284), 220 + REG16(0x280), 221 + REG16(0x27c), 222 + REG16(0x278), 223 + REG16(0x274), 224 + REG16(0x270), 225 + 226 + LRI(3, POSTED), 227 + REG(0x1b0), 228 + REG16(0x5a8), 229 + REG16(0x5ac), 230 + 231 + NOP(6), 232 + LRI(1, 0), 233 + REG(0x0c8), 234 + NOP(3 + 9 + 1), 235 + 236 + LRI(51, POSTED), 237 + REG16(0x588), 238 + REG16(0x588), 239 + REG16(0x588), 240 + REG16(0x588), 241 + REG16(0x588), 242 + REG16(0x588), 243 + REG(0x028), 244 + REG(0x09c), 245 + REG(0x0c0), 246 + REG(0x178), 247 + REG(0x17c), 248 + REG16(0x358), 249 + REG(0x170), 250 + REG(0x150), 251 + REG(0x154), 252 + REG(0x158), 253 + REG16(0x41c), 254 + REG16(0x600), 255 + REG16(0x604), 256 + REG16(0x608), 257 + REG16(0x60c), 258 + REG16(0x610), 259 + REG16(0x614), 260 + REG16(0x618), 261 + REG16(0x61c), 262 + REG16(0x620), 263 + REG16(0x624), 264 + REG16(0x628), 265 + REG16(0x62c), 266 + REG16(0x630), 267 + REG16(0x634), 268 + REG16(0x638), 269 + REG16(0x63c), 270 + REG16(0x640), 271 + REG16(0x644), 272 + REG16(0x648), 273 + REG16(0x64c), 274 + REG16(0x650), 275 + REG16(0x654), 276 + REG16(0x658), 277 + REG16(0x65c), 278 + REG16(0x660), 279 + REG16(0x664), 280 + REG16(0x668), 281 + REG16(0x66c), 282 + REG16(0x670), 283 + REG16(0x674), 284 + REG16(0x678), 285 + REG16(0x67c), 286 + REG(0x068), 287 + REG(0x084), 288 + NOP(1), 289 + 290 + END 291 + }; 292 + 293 + static const u8 xehp_rcs_offsets[] = { 294 + NOP(1), 295 + LRI(13, POSTED), 296 + REG16(0x244), 297 + REG(0x034), 298 + REG(0x030), 299 + REG(0x038), 300 + REG(0x03c), 301 + REG(0x168), 302 + REG(0x140), 303 + REG(0x110), 304 + REG(0x1c0), 305 + REG(0x1c4), 306 + REG(0x1c8), 307 + REG(0x180), 308 + REG16(0x2b4), 309 + 310 + NOP(5), 311 + LRI(9, POSTED), 312 + REG16(0x3a8), 313 + REG16(0x28c), 314 + REG16(0x288), 315 + REG16(0x284), 316 + REG16(0x280), 317 + REG16(0x27c), 318 + REG16(0x278), 319 + REG16(0x274), 320 + REG16(0x270), 321 + 322 + LRI(3, POSTED), 323 + REG(0x1b0), 324 + REG16(0x5a8), 325 + REG16(0x5ac), 326 + 327 + NOP(6), 328 + LRI(1, 0), 329 + REG(0x0c8), 330 + 331 + END 332 + }; 333 + 334 + static const u8 dg2_rcs_offsets[] = { 335 + NOP(1), 336 + LRI(15, POSTED), 337 + REG16(0x244), 338 + REG(0x034), 339 + REG(0x030), 340 + REG(0x038), 341 + REG(0x03c), 342 + REG(0x168), 343 + REG(0x140), 344 + REG(0x110), 345 + REG(0x1c0), 346 + REG(0x1c4), 347 + REG(0x1c8), 348 + REG(0x180), 349 + REG16(0x2b4), 350 + REG(0x120), 351 + REG(0x124), 352 + 353 + NOP(1), 354 + LRI(9, POSTED), 355 + REG16(0x3a8), 356 + REG16(0x28c), 357 + REG16(0x288), 358 + REG16(0x284), 359 + REG16(0x280), 360 + REG16(0x27c), 361 + REG16(0x278), 362 + REG16(0x274), 363 + REG16(0x270), 364 + 365 + LRI(3, POSTED), 366 + REG(0x1b0), 367 + REG16(0x5a8), 368 + REG16(0x5ac), 369 + 370 + NOP(6), 371 + LRI(1, 0), 372 + REG(0x0c8), 373 + 374 + END 375 + }; 376 + 377 + static const u8 mtl_rcs_offsets[] = { 378 + NOP(1), 379 + LRI(15, POSTED), 380 + REG16(0x244), 381 + REG(0x034), 382 + REG(0x030), 383 + REG(0x038), 384 + REG(0x03c), 385 + REG(0x168), 386 + REG(0x140), 387 + REG(0x110), 388 + REG(0x1c0), 389 + REG(0x1c4), 390 + REG(0x1c8), 391 + REG(0x180), 392 + REG16(0x2b4), 393 + REG(0x120), 394 + REG(0x124), 395 + 396 + NOP(1), 397 + LRI(9, POSTED), 398 + REG16(0x3a8), 399 + REG16(0x28c), 400 + REG16(0x288), 401 + REG16(0x284), 402 + REG16(0x280), 403 + REG16(0x27c), 404 + REG16(0x278), 405 + REG16(0x274), 406 + REG16(0x270), 407 + 408 + NOP(2), 409 + LRI(2, POSTED), 410 + REG16(0x5a8), 411 + REG16(0x5ac), 412 + 413 + NOP(6), 414 + LRI(1, 0), 415 + REG(0x0c8), 416 + 417 + END 418 + }; 419 + 420 + #undef END 421 + #undef REG16 422 + #undef REG 423 + #undef LRI 424 + #undef NOP 425 + 426 + static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class) 427 + { 428 + if (class == XE_ENGINE_CLASS_RENDER) { 429 + if (GRAPHICS_VERx100(xe) >= 1270) 430 + return mtl_rcs_offsets; 431 + else if (GRAPHICS_VERx100(xe) >= 1255) 432 + return dg2_rcs_offsets; 433 + else if (GRAPHICS_VERx100(xe) >= 1250) 434 + return xehp_rcs_offsets; 435 + else 436 + return gen12_rcs_offsets; 437 + } else { 438 + if (GRAPHICS_VERx100(xe) >= 1255) 439 + return dg2_xcs_offsets; 440 + else 441 + return gen12_xcs_offsets; 442 + } 443 + } 444 + 445 + static void set_context_control(u32 * regs, struct xe_hw_engine *hwe) 446 + { 447 + regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH) | 448 + _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) | 449 + CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT; 450 + 451 + /* TODO: Timestamp */ 452 + } 453 + 454 + static int lrc_ring_mi_mode(struct xe_hw_engine *hwe) 455 + { 456 + struct xe_device *xe = gt_to_xe(hwe->gt); 457 + 458 + if (GRAPHICS_VERx100(xe) >= 1250) 459 + return 0x70; 460 + else 461 + return 0x60; 462 + } 463 + 464 + static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe) 465 + { 466 + int x; 467 + 468 + x = lrc_ring_mi_mode(hwe); 469 + regs[x + 1] &= ~STOP_RING; 470 + regs[x + 1] |= STOP_RING << 16; 471 + } 472 + 473 + static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc) 474 + { 475 + return 0; 476 + } 477 + 478 + u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) 479 + { 480 + return lrc->ring.size; 481 + } 482 + 483 + /* Make the magic macros work */ 484 + #define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset 485 + 486 + #define LRC_SEQNO_PPHWSP_OFFSET 512 487 + #define LRC_START_SEQNO_PPHWSP_OFFSET LRC_SEQNO_PPHWSP_OFFSET + 8 488 + #define LRC_PARALLEL_PPHWSP_OFFSET 2048 489 + #define LRC_PPHWSP_SIZE SZ_4K 490 + 491 + static size_t lrc_reg_size(struct xe_device *xe) 492 + { 493 + if (GRAPHICS_VERx100(xe) >= 1250) 494 + return 96 * sizeof(u32); 495 + else 496 + return 80 * sizeof(u32); 497 + } 498 + 499 + size_t xe_lrc_skip_size(struct xe_device *xe) 500 + { 501 + return LRC_PPHWSP_SIZE + lrc_reg_size(xe); 502 + } 503 + 504 + static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc) 505 + { 506 + /* The seqno is stored in the driver-defined portion of PPHWSP */ 507 + return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET; 508 + } 509 + 510 + static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc) 511 + { 512 + /* The start seqno is stored in the driver-defined portion of PPHWSP */ 513 + return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET; 514 + } 515 + 516 + static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc) 517 + { 518 + /* The parallel is stored in the driver-defined portion of PPHWSP */ 519 + return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET; 520 + } 521 + 522 + static inline u32 __xe_lrc_regs_offset(struct xe_lrc *lrc) 523 + { 524 + return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; 525 + } 526 + 527 + #define DECL_MAP_ADDR_HELPERS(elem) \ 528 + static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \ 529 + { \ 530 + struct iosys_map map = lrc->bo->vmap; \ 531 + \ 532 + XE_BUG_ON(iosys_map_is_null(&map)); \ 533 + iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \ 534 + return map; \ 535 + } \ 536 + static inline u32 __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \ 537 + { \ 538 + return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \ 539 + } \ 540 + 541 + DECL_MAP_ADDR_HELPERS(ring) 542 + DECL_MAP_ADDR_HELPERS(pphwsp) 543 + DECL_MAP_ADDR_HELPERS(seqno) 544 + DECL_MAP_ADDR_HELPERS(regs) 545 + DECL_MAP_ADDR_HELPERS(start_seqno) 546 + DECL_MAP_ADDR_HELPERS(parallel) 547 + 548 + #undef DECL_MAP_ADDR_HELPERS 549 + 550 + u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc) 551 + { 552 + return __xe_lrc_pphwsp_ggtt_addr(lrc); 553 + } 554 + 555 + u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr) 556 + { 557 + struct xe_device *xe = lrc_to_xe(lrc); 558 + struct iosys_map map; 559 + 560 + map = __xe_lrc_regs_map(lrc); 561 + iosys_map_incr(&map, reg_nr * sizeof(u32)); 562 + return xe_map_read32(xe, &map); 563 + } 564 + 565 + void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val) 566 + { 567 + struct xe_device *xe = lrc_to_xe(lrc); 568 + struct iosys_map map; 569 + 570 + map = __xe_lrc_regs_map(lrc); 571 + iosys_map_incr(&map, reg_nr * sizeof(u32)); 572 + xe_map_write32(xe, &map, val); 573 + } 574 + 575 + static void *empty_lrc_data(struct xe_hw_engine *hwe) 576 + { 577 + struct xe_device *xe = gt_to_xe(hwe->gt); 578 + void *data; 579 + u32 *regs; 580 + 581 + data = kzalloc(xe_lrc_size(xe, hwe->class), GFP_KERNEL); 582 + if (!data) 583 + return NULL; 584 + 585 + /* 1st page: Per-Process of HW status Page */ 586 + regs = data + LRC_PPHWSP_SIZE; 587 + set_offsets(regs, reg_offsets(xe, hwe->class), hwe); 588 + set_context_control(regs, hwe); 589 + reset_stop_ring(regs, hwe); 590 + 591 + return data; 592 + } 593 + 594 + static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) 595 + { 596 + u64 desc = xe_vm_pdp4_descriptor(vm, lrc->full_gt); 597 + 598 + xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc)); 599 + xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc)); 600 + } 601 + 602 + #define PVC_CTX_ASID (0x2e + 1) 603 + #define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) 604 + #define ACC_GRANULARITY_S 20 605 + #define ACC_NOTIFY_S 16 606 + 607 + int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, 608 + struct xe_engine *e, struct xe_vm *vm, u32 ring_size) 609 + { 610 + struct xe_gt *gt = hwe->gt; 611 + struct xe_device *xe = gt_to_xe(gt); 612 + struct iosys_map map; 613 + void *init_data = NULL; 614 + u32 arb_enable; 615 + int err; 616 + 617 + lrc->flags = 0; 618 + 619 + lrc->bo = xe_bo_create_locked(xe, hwe->gt, vm, 620 + ring_size + xe_lrc_size(xe, hwe->class), 621 + ttm_bo_type_kernel, 622 + XE_BO_CREATE_VRAM_IF_DGFX(hwe->gt) | 623 + XE_BO_CREATE_GGTT_BIT); 624 + if (IS_ERR(lrc->bo)) 625 + return PTR_ERR(lrc->bo); 626 + 627 + if (xe_gt_is_media_type(hwe->gt)) 628 + lrc->full_gt = xe_find_full_gt(hwe->gt); 629 + else 630 + lrc->full_gt = hwe->gt; 631 + 632 + /* 633 + * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address 634 + * via VM bind calls. 635 + */ 636 + err = xe_bo_pin(lrc->bo); 637 + if (err) 638 + goto err_unlock_put_bo; 639 + lrc->flags |= XE_LRC_PINNED; 640 + 641 + err = xe_bo_vmap(lrc->bo); 642 + if (err) 643 + goto err_unpin_bo; 644 + 645 + xe_bo_unlock_vm_held(lrc->bo); 646 + 647 + lrc->ring.size = ring_size; 648 + lrc->ring.tail = 0; 649 + 650 + xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, 651 + hwe->fence_irq, hwe->name); 652 + 653 + if (!gt->default_lrc[hwe->class]) { 654 + init_data = empty_lrc_data(hwe); 655 + if (!init_data) { 656 + xe_lrc_finish(lrc); 657 + return -ENOMEM; 658 + } 659 + } 660 + 661 + /* 662 + * Init Per-Process of HW status Page, LRC / context state to known 663 + * values 664 + */ 665 + map = __xe_lrc_pphwsp_map(lrc); 666 + if (!init_data) { 667 + xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ 668 + xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, 669 + gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, 670 + xe_lrc_size(xe, hwe->class) - LRC_PPHWSP_SIZE); 671 + } else { 672 + xe_map_memcpy_to(xe, &map, 0, init_data, 673 + xe_lrc_size(xe, hwe->class)); 674 + kfree(init_data); 675 + } 676 + 677 + if (vm) 678 + xe_lrc_set_ppgtt(lrc, vm); 679 + 680 + xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); 681 + xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0); 682 + xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); 683 + xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, 684 + RING_CTL_SIZE(lrc->ring.size) | RING_VALID); 685 + if (xe->info.supports_usm && vm) { 686 + xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, 687 + (e->usm.acc_granularity << 688 + ACC_GRANULARITY_S) | vm->usm.asid); 689 + xe_lrc_write_ctx_reg(lrc, PVC_CTX_ACC_CTR_THOLD, 690 + (e->usm.acc_notify << ACC_NOTIFY_S) | 691 + e->usm.acc_trigger); 692 + } 693 + 694 + lrc->desc = GEN8_CTX_VALID; 695 + lrc->desc |= INTEL_LEGACY_64B_CONTEXT << GEN8_CTX_ADDRESSING_MODE_SHIFT; 696 + /* TODO: Priority */ 697 + 698 + /* While this appears to have something about privileged batches or 699 + * some such, it really just means PPGTT mode. 700 + */ 701 + if (vm) 702 + lrc->desc |= GEN8_CTX_PRIVILEGE; 703 + 704 + if (GRAPHICS_VERx100(xe) < 1250) { 705 + lrc->desc |= (u64)hwe->instance << GEN11_ENGINE_INSTANCE_SHIFT; 706 + lrc->desc |= (u64)hwe->class << GEN11_ENGINE_CLASS_SHIFT; 707 + } 708 + 709 + arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE; 710 + xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable)); 711 + 712 + return 0; 713 + 714 + err_unpin_bo: 715 + if (lrc->flags & XE_LRC_PINNED) 716 + xe_bo_unpin(lrc->bo); 717 + err_unlock_put_bo: 718 + xe_bo_unlock_vm_held(lrc->bo); 719 + xe_bo_put(lrc->bo); 720 + return err; 721 + } 722 + 723 + void xe_lrc_finish(struct xe_lrc *lrc) 724 + { 725 + struct ww_acquire_ctx ww; 726 + 727 + xe_hw_fence_ctx_finish(&lrc->fence_ctx); 728 + if (lrc->flags & XE_LRC_PINNED) { 729 + if (lrc->bo->vm) 730 + xe_vm_lock(lrc->bo->vm, &ww, 0, false); 731 + else 732 + xe_bo_lock_no_vm(lrc->bo, NULL); 733 + xe_bo_unpin(lrc->bo); 734 + if (lrc->bo->vm) 735 + xe_vm_unlock(lrc->bo->vm, &ww); 736 + else 737 + xe_bo_unlock_no_vm(lrc->bo); 738 + } 739 + xe_bo_put(lrc->bo); 740 + } 741 + 742 + void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head) 743 + { 744 + xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head); 745 + } 746 + 747 + u32 xe_lrc_ring_head(struct xe_lrc *lrc) 748 + { 749 + return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR; 750 + } 751 + 752 + u32 xe_lrc_ring_space(struct xe_lrc *lrc) 753 + { 754 + const u32 head = xe_lrc_ring_head(lrc); 755 + const u32 tail = lrc->ring.tail; 756 + const u32 size = lrc->ring.size; 757 + 758 + return ((head - tail - 1) & (size - 1)) + 1; 759 + } 760 + 761 + static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring, 762 + const void *data, size_t size) 763 + { 764 + struct xe_device *xe = lrc_to_xe(lrc); 765 + 766 + iosys_map_incr(&ring, lrc->ring.tail); 767 + xe_map_memcpy_to(xe, &ring, 0, data, size); 768 + lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1); 769 + } 770 + 771 + void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size) 772 + { 773 + struct iosys_map ring; 774 + u32 rhs; 775 + size_t aligned_size; 776 + 777 + XE_BUG_ON(!IS_ALIGNED(size, 4)); 778 + aligned_size = ALIGN(size, 8); 779 + 780 + ring = __xe_lrc_ring_map(lrc); 781 + 782 + XE_BUG_ON(lrc->ring.tail >= lrc->ring.size); 783 + rhs = lrc->ring.size - lrc->ring.tail; 784 + if (size > rhs) { 785 + __xe_lrc_write_ring(lrc, ring, data, rhs); 786 + __xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs); 787 + } else { 788 + __xe_lrc_write_ring(lrc, ring, data, size); 789 + } 790 + 791 + if (aligned_size > size) { 792 + u32 noop = MI_NOOP; 793 + 794 + __xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop)); 795 + } 796 + } 797 + 798 + u64 xe_lrc_descriptor(struct xe_lrc *lrc) 799 + { 800 + return lrc->desc | xe_lrc_ggtt_addr(lrc); 801 + } 802 + 803 + u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc) 804 + { 805 + return __xe_lrc_seqno_ggtt_addr(lrc); 806 + } 807 + 808 + struct dma_fence *xe_lrc_create_seqno_fence(struct xe_lrc *lrc) 809 + { 810 + return &xe_hw_fence_create(&lrc->fence_ctx, 811 + __xe_lrc_seqno_map(lrc))->dma; 812 + } 813 + 814 + s32 xe_lrc_seqno(struct xe_lrc *lrc) 815 + { 816 + struct iosys_map map = __xe_lrc_seqno_map(lrc); 817 + 818 + return xe_map_read32(lrc_to_xe(lrc), &map); 819 + } 820 + 821 + s32 xe_lrc_start_seqno(struct xe_lrc *lrc) 822 + { 823 + struct iosys_map map = __xe_lrc_start_seqno_map(lrc); 824 + 825 + return xe_map_read32(lrc_to_xe(lrc), &map); 826 + } 827 + 828 + u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc) 829 + { 830 + return __xe_lrc_start_seqno_ggtt_addr(lrc); 831 + } 832 + 833 + u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc) 834 + { 835 + return __xe_lrc_parallel_ggtt_addr(lrc); 836 + } 837 + 838 + struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc) 839 + { 840 + return __xe_lrc_parallel_map(lrc); 841 + }

+50

drivers/gpu/drm/xe/xe_lrc.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2021 Intel Corporation 4 + */ 5 + #ifndef _XE_LRC_H_ 6 + #define _XE_LRC_H_ 7 + 8 + #include "xe_lrc_types.h" 9 + 10 + struct xe_device; 11 + struct xe_engine; 12 + enum xe_engine_class; 13 + struct xe_hw_engine; 14 + struct xe_vm; 15 + 16 + #define LRC_PPHWSP_SCRATCH_ADDR (0x34 * 4) 17 + 18 + int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, 19 + struct xe_engine *e, struct xe_vm *vm, u32 ring_size); 20 + void xe_lrc_finish(struct xe_lrc *lrc); 21 + 22 + size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class); 23 + u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc); 24 + 25 + void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head); 26 + u32 xe_lrc_ring_head(struct xe_lrc *lrc); 27 + u32 xe_lrc_ring_space(struct xe_lrc *lrc); 28 + void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size); 29 + 30 + u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc); 31 + u32 *xe_lrc_regs(struct xe_lrc *lrc); 32 + 33 + u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr); 34 + void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val); 35 + 36 + u64 xe_lrc_descriptor(struct xe_lrc *lrc); 37 + 38 + u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc); 39 + struct dma_fence *xe_lrc_create_seqno_fence(struct xe_lrc *lrc); 40 + s32 xe_lrc_seqno(struct xe_lrc *lrc); 41 + 42 + u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc); 43 + s32 xe_lrc_start_seqno(struct xe_lrc *lrc); 44 + 45 + u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc); 46 + struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc); 47 + 48 + size_t xe_lrc_skip_size(struct xe_device *xe); 49 + 50 + #endif

+47

drivers/gpu/drm/xe/xe_lrc_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_LRC_TYPES_H_ 7 + #define _XE_LRC_TYPES_H_ 8 + 9 + #include "xe_hw_fence_types.h" 10 + 11 + struct xe_bo; 12 + 13 + /** 14 + * struct xe_lrc - Logical ring context (LRC) and submission ring object 15 + */ 16 + struct xe_lrc { 17 + /** 18 + * @bo: buffer object (memory) for logical ring context, per process HW 19 + * status page, and submission ring. 20 + */ 21 + struct xe_bo *bo; 22 + 23 + /** @full_gt: full GT which this LRC belongs to */ 24 + struct xe_gt *full_gt; 25 + 26 + /** @flags: LRC flags */ 27 + u32 flags; 28 + #define XE_LRC_PINNED BIT(1) 29 + 30 + /** @ring: submission ring state */ 31 + struct { 32 + /** @size: size of submission ring */ 33 + u32 size; 34 + /** @tail: tail of submission ring */ 35 + u32 tail; 36 + /** @old_tail: shadow of tail */ 37 + u32 old_tail; 38 + } ring; 39 + 40 + /** @desc: LRC descriptor */ 41 + u64 desc; 42 + 43 + /** @fence_ctx: context for hw fence */ 44 + struct xe_hw_fence_ctx fence_ctx; 45 + }; 46 + 47 + #endif

+20

drivers/gpu/drm/xe/xe_macros.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2021 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_MACROS_H_ 7 + #define _XE_MACROS_H_ 8 + 9 + #include <linux/bug.h> 10 + 11 + #define XE_EXTRA_DEBUG 1 12 + #define XE_WARN_ON WARN_ON 13 + #define XE_BUG_ON BUG_ON 14 + 15 + #define XE_IOCTL_ERR(xe, cond) \ 16 + ((cond) && (drm_info(&(xe)->drm, \ 17 + "Ioctl argument check failed at %s:%d: %s", \ 18 + __FILE__, __LINE__, #cond), 1)) 19 + 20 + #endif

+93

drivers/gpu/drm/xe/xe_map.h

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef __XE_MAP_H__ 7 + #define __XE_MAP_H__ 8 + 9 + #include <linux/iosys-map.h> 10 + 11 + #include <xe_device.h> 12 + 13 + /** 14 + * DOC: Map layer 15 + * 16 + * All access to any memory shared with a device (both sysmem and vram) in the 17 + * XE driver should go through this layer (xe_map). This layer is built on top 18 + * of :ref:`driver-api/device-io:Generalizing Access to System and I/O Memory` 19 + * and with extra hooks into the XE driver that allows adding asserts to memory 20 + * accesses (e.g. for blocking runtime_pm D3Cold on Discrete Graphics). 21 + */ 22 + 23 + static inline void xe_map_memcpy_to(struct xe_device *xe, struct iosys_map *dst, 24 + size_t dst_offset, const void *src, 25 + size_t len) 26 + { 27 + xe_device_assert_mem_access(xe); 28 + iosys_map_memcpy_to(dst, dst_offset, src, len); 29 + } 30 + 31 + static inline void xe_map_memcpy_from(struct xe_device *xe, void *dst, 32 + const struct iosys_map *src, 33 + size_t src_offset, size_t len) 34 + { 35 + xe_device_assert_mem_access(xe); 36 + iosys_map_memcpy_from(dst, src, src_offset, len); 37 + } 38 + 39 + static inline void xe_map_memset(struct xe_device *xe, 40 + struct iosys_map *dst, size_t offset, 41 + int value, size_t len) 42 + { 43 + xe_device_assert_mem_access(xe); 44 + iosys_map_memset(dst, offset, value, len); 45 + } 46 + 47 + /* FIXME: We likely should kill these two functions sooner or later */ 48 + static inline u32 xe_map_read32(struct xe_device *xe, struct iosys_map *map) 49 + { 50 + xe_device_assert_mem_access(xe); 51 + 52 + if (map->is_iomem) 53 + return readl(map->vaddr_iomem); 54 + else 55 + return READ_ONCE(*(u32 *)map->vaddr); 56 + } 57 + 58 + static inline void xe_map_write32(struct xe_device *xe, struct iosys_map *map, 59 + u32 val) 60 + { 61 + xe_device_assert_mem_access(xe); 62 + 63 + if (map->is_iomem) 64 + writel(val, map->vaddr_iomem); 65 + else 66 + *(u32 *)map->vaddr = val; 67 + } 68 + 69 + #define xe_map_rd(xe__, map__, offset__, type__) ({ \ 70 + struct xe_device *__xe = xe__; \ 71 + xe_device_assert_mem_access(__xe); \ 72 + iosys_map_rd(map__, offset__, type__); \ 73 + }) 74 + 75 + #define xe_map_wr(xe__, map__, offset__, type__, val__) ({ \ 76 + struct xe_device *__xe = xe__; \ 77 + xe_device_assert_mem_access(__xe); \ 78 + iosys_map_wr(map__, offset__, type__, val__); \ 79 + }) 80 + 81 + #define xe_map_rd_field(xe__, map__, struct_offset__, struct_type__, field__) ({ \ 82 + struct xe_device *__xe = xe__; \ 83 + xe_device_assert_mem_access(__xe); \ 84 + iosys_map_rd_field(map__, struct_offset__, struct_type__, field__); \ 85 + }) 86 + 87 + #define xe_map_wr_field(xe__, map__, struct_offset__, struct_type__, field__, val__) ({ \ 88 + struct xe_device *__xe = xe__; \ 89 + xe_device_assert_mem_access(__xe); \ 90 + iosys_map_wr_field(map__, struct_offset__, struct_type__, field__, val__); \ 91 + }) 92 + 93 + #endif

+1168

drivers/gpu/drm/xe/xe_migrate.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2020 Intel Corporation 4 + */ 5 + #include "xe_migrate.h" 6 + 7 + #include "xe_bb.h" 8 + #include "xe_bo.h" 9 + #include "xe_engine.h" 10 + #include "xe_ggtt.h" 11 + #include "xe_gt.h" 12 + #include "xe_hw_engine.h" 13 + #include "xe_lrc.h" 14 + #include "xe_map.h" 15 + #include "xe_mocs.h" 16 + #include "xe_pt.h" 17 + #include "xe_res_cursor.h" 18 + #include "xe_sched_job.h" 19 + #include "xe_sync.h" 20 + #include "xe_trace.h" 21 + #include "xe_vm.h" 22 + 23 + #include <linux/sizes.h> 24 + #include <drm/drm_managed.h> 25 + #include <drm/ttm/ttm_tt.h> 26 + #include <drm/xe_drm.h> 27 + 28 + #include "gt/intel_gpu_commands.h" 29 + 30 + struct xe_migrate { 31 + struct xe_engine *eng; 32 + struct xe_gt *gt; 33 + struct mutex job_mutex; 34 + struct xe_bo *pt_bo; 35 + struct xe_bo *cleared_bo; 36 + u64 batch_base_ofs; 37 + u64 usm_batch_base_ofs; 38 + u64 cleared_vram_ofs; 39 + struct dma_fence *fence; 40 + struct drm_suballoc_manager vm_update_sa; 41 + }; 42 + 43 + #define MAX_PREEMPTDISABLE_TRANSFER SZ_8M /* Around 1ms. */ 44 + #define NUM_KERNEL_PDE 17 45 + #define NUM_PT_SLOTS 32 46 + #define NUM_PT_PER_BLIT (MAX_PREEMPTDISABLE_TRANSFER / SZ_2M) 47 + 48 + struct xe_engine *xe_gt_migrate_engine(struct xe_gt *gt) 49 + { 50 + return gt->migrate->eng; 51 + } 52 + 53 + static void xe_migrate_fini(struct drm_device *dev, void *arg) 54 + { 55 + struct xe_migrate *m = arg; 56 + struct ww_acquire_ctx ww; 57 + 58 + xe_vm_lock(m->eng->vm, &ww, 0, false); 59 + xe_bo_unpin(m->pt_bo); 60 + if (m->cleared_bo) 61 + xe_bo_unpin(m->cleared_bo); 62 + xe_vm_unlock(m->eng->vm, &ww); 63 + 64 + dma_fence_put(m->fence); 65 + if (m->cleared_bo) 66 + xe_bo_put(m->cleared_bo); 67 + xe_bo_put(m->pt_bo); 68 + drm_suballoc_manager_fini(&m->vm_update_sa); 69 + mutex_destroy(&m->job_mutex); 70 + xe_vm_close_and_put(m->eng->vm); 71 + xe_engine_put(m->eng); 72 + } 73 + 74 + static u64 xe_migrate_vm_addr(u64 slot, u32 level) 75 + { 76 + XE_BUG_ON(slot >= NUM_PT_SLOTS); 77 + 78 + /* First slot is reserved for mapping of PT bo and bb, start from 1 */ 79 + return (slot + 1ULL) << xe_pt_shift(level + 1); 80 + } 81 + 82 + static u64 xe_migrate_vram_ofs(u64 addr) 83 + { 84 + return addr + (256ULL << xe_pt_shift(2)); 85 + } 86 + 87 + /* 88 + * For flat CCS clearing we need a cleared chunk of memory to copy from, 89 + * since the CCS clearing mode of XY_FAST_COLOR_BLT appears to be buggy 90 + * (it clears on only 14 bytes in each chunk of 16). 91 + * If clearing the main surface one can use the part of the main surface 92 + * already cleared, but for clearing as part of copying non-compressed 93 + * data out of system memory, we don't readily have a cleared part of 94 + * VRAM to copy from, so create one to use for that case. 95 + */ 96 + static int xe_migrate_create_cleared_bo(struct xe_migrate *m, struct xe_vm *vm) 97 + { 98 + struct xe_gt *gt = m->gt; 99 + struct xe_device *xe = vm->xe; 100 + size_t cleared_size; 101 + u64 vram_addr; 102 + bool is_vram; 103 + 104 + if (!xe_device_has_flat_ccs(xe)) 105 + return 0; 106 + 107 + cleared_size = xe_device_ccs_bytes(xe, MAX_PREEMPTDISABLE_TRANSFER); 108 + cleared_size = PAGE_ALIGN(cleared_size); 109 + m->cleared_bo = xe_bo_create_pin_map(xe, gt, vm, cleared_size, 110 + ttm_bo_type_kernel, 111 + XE_BO_CREATE_VRAM_IF_DGFX(gt) | 112 + XE_BO_CREATE_PINNED_BIT); 113 + if (IS_ERR(m->cleared_bo)) 114 + return PTR_ERR(m->cleared_bo); 115 + 116 + xe_map_memset(xe, &m->cleared_bo->vmap, 0, 0x00, cleared_size); 117 + vram_addr = xe_bo_addr(m->cleared_bo, 0, GEN8_PAGE_SIZE, &is_vram); 118 + XE_BUG_ON(!is_vram); 119 + m->cleared_vram_ofs = xe_migrate_vram_ofs(vram_addr); 120 + 121 + return 0; 122 + } 123 + 124 + static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m, 125 + struct xe_vm *vm) 126 + { 127 + u8 id = gt->info.id; 128 + u32 num_entries = NUM_PT_SLOTS, num_level = vm->pt_root[id]->level; 129 + u32 map_ofs, level, i; 130 + struct xe_device *xe = gt_to_xe(m->gt); 131 + struct xe_bo *bo, *batch = gt->kernel_bb_pool.bo; 132 + u64 entry; 133 + int ret; 134 + 135 + /* Can't bump NUM_PT_SLOTS too high */ 136 + BUILD_BUG_ON(NUM_PT_SLOTS > SZ_2M/GEN8_PAGE_SIZE); 137 + /* Must be a multiple of 64K to support all platforms */ 138 + BUILD_BUG_ON(NUM_PT_SLOTS * GEN8_PAGE_SIZE % SZ_64K); 139 + /* And one slot reserved for the 4KiB page table updates */ 140 + BUILD_BUG_ON(!(NUM_KERNEL_PDE & 1)); 141 + 142 + /* Need to be sure everything fits in the first PT, or create more */ 143 + XE_BUG_ON(m->batch_base_ofs + batch->size >= SZ_2M); 144 + 145 + bo = xe_bo_create_pin_map(vm->xe, m->gt, vm, 146 + num_entries * GEN8_PAGE_SIZE, 147 + ttm_bo_type_kernel, 148 + XE_BO_CREATE_VRAM_IF_DGFX(m->gt) | 149 + XE_BO_CREATE_PINNED_BIT); 150 + if (IS_ERR(bo)) 151 + return PTR_ERR(bo); 152 + 153 + ret = xe_migrate_create_cleared_bo(m, vm); 154 + if (ret) { 155 + xe_bo_put(bo); 156 + return ret; 157 + } 158 + 159 + entry = gen8_pde_encode(bo, bo->size - GEN8_PAGE_SIZE, XE_CACHE_WB); 160 + xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry); 161 + 162 + map_ofs = (num_entries - num_level) * GEN8_PAGE_SIZE; 163 + 164 + /* Map the entire BO in our level 0 pt */ 165 + for (i = 0, level = 0; i < num_entries; level++) { 166 + entry = gen8_pte_encode(NULL, bo, i * GEN8_PAGE_SIZE, 167 + XE_CACHE_WB, 0, 0); 168 + 169 + xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, entry); 170 + 171 + if (vm->flags & XE_VM_FLAGS_64K) 172 + i += 16; 173 + else 174 + i += 1; 175 + } 176 + 177 + if (!IS_DGFX(xe)) { 178 + XE_BUG_ON(xe->info.supports_usm); 179 + 180 + /* Write out batch too */ 181 + m->batch_base_ofs = NUM_PT_SLOTS * GEN8_PAGE_SIZE; 182 + for (i = 0; i < batch->size; 183 + i += vm->flags & XE_VM_FLAGS_64K ? GEN8_64K_PAGE_SIZE : 184 + GEN8_PAGE_SIZE) { 185 + entry = gen8_pte_encode(NULL, batch, i, 186 + XE_CACHE_WB, 0, 0); 187 + 188 + xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, 189 + entry); 190 + level++; 191 + } 192 + } else { 193 + bool is_lmem; 194 + u64 batch_addr = xe_bo_addr(batch, 0, GEN8_PAGE_SIZE, &is_lmem); 195 + 196 + m->batch_base_ofs = xe_migrate_vram_ofs(batch_addr); 197 + 198 + if (xe->info.supports_usm) { 199 + batch = gt->usm.bb_pool.bo; 200 + batch_addr = xe_bo_addr(batch, 0, GEN8_PAGE_SIZE, 201 + &is_lmem); 202 + m->usm_batch_base_ofs = xe_migrate_vram_ofs(batch_addr); 203 + } 204 + } 205 + 206 + for (level = 1; level < num_level; level++) { 207 + u32 flags = 0; 208 + 209 + if (vm->flags & XE_VM_FLAGS_64K && level == 1) 210 + flags = GEN12_PDE_64K; 211 + 212 + entry = gen8_pde_encode(bo, map_ofs + (level - 1) * 213 + GEN8_PAGE_SIZE, XE_CACHE_WB); 214 + xe_map_wr(xe, &bo->vmap, map_ofs + GEN8_PAGE_SIZE * level, u64, 215 + entry | flags); 216 + } 217 + 218 + /* Write PDE's that point to our BO. */ 219 + for (i = 0; i < num_entries - num_level; i++) { 220 + entry = gen8_pde_encode(bo, i * GEN8_PAGE_SIZE, 221 + XE_CACHE_WB); 222 + 223 + xe_map_wr(xe, &bo->vmap, map_ofs + GEN8_PAGE_SIZE + 224 + (i + 1) * 8, u64, entry); 225 + } 226 + 227 + /* Identity map the entire vram at 256GiB offset */ 228 + if (IS_DGFX(xe)) { 229 + u64 pos, ofs, flags; 230 + 231 + level = 2; 232 + ofs = map_ofs + GEN8_PAGE_SIZE * level + 256 * 8; 233 + flags = GEN8_PAGE_RW | GEN8_PAGE_PRESENT | PPAT_CACHED | 234 + GEN12_PPGTT_PTE_LM | GEN8_PDPE_PS_1G; 235 + 236 + /* 237 + * Use 1GB pages, it shouldn't matter the physical amount of 238 + * vram is less, when we don't access it. 239 + */ 240 + for (pos = 0; pos < xe->mem.vram.size; pos += SZ_1G, ofs += 8) 241 + xe_map_wr(xe, &bo->vmap, ofs, u64, pos | flags); 242 + } 243 + 244 + /* 245 + * Example layout created above, with root level = 3: 246 + * [PT0...PT7]: kernel PT's for copy/clear; 64 or 4KiB PTE's 247 + * [PT8]: Kernel PT for VM_BIND, 4 KiB PTE's 248 + * [PT9...PT28]: Userspace PT's for VM_BIND, 4 KiB PTE's 249 + * [PT29 = PDE 0] [PT30 = PDE 1] [PT31 = PDE 2] 250 + * 251 + * This makes the lowest part of the VM point to the pagetables. 252 + * Hence the lowest 2M in the vm should point to itself, with a few writes 253 + * and flushes, other parts of the VM can be used either for copying and 254 + * clearing. 255 + * 256 + * For performance, the kernel reserves PDE's, so about 20 are left 257 + * for async VM updates. 258 + * 259 + * To make it easier to work, each scratch PT is put in slot (1 + PT #) 260 + * everywhere, this allows lockless updates to scratch pages by using 261 + * the different addresses in VM. 262 + */ 263 + #define NUM_VMUSA_UNIT_PER_PAGE 32 264 + #define VM_SA_UPDATE_UNIT_SIZE (GEN8_PAGE_SIZE / NUM_VMUSA_UNIT_PER_PAGE) 265 + #define NUM_VMUSA_WRITES_PER_UNIT (VM_SA_UPDATE_UNIT_SIZE / sizeof(u64)) 266 + drm_suballoc_manager_init(&m->vm_update_sa, 267 + (map_ofs / GEN8_PAGE_SIZE - NUM_KERNEL_PDE) * 268 + NUM_VMUSA_UNIT_PER_PAGE, 0); 269 + 270 + m->pt_bo = bo; 271 + return 0; 272 + } 273 + 274 + struct xe_migrate *xe_migrate_init(struct xe_gt *gt) 275 + { 276 + struct xe_device *xe = gt_to_xe(gt); 277 + struct xe_migrate *m; 278 + struct xe_vm *vm; 279 + struct ww_acquire_ctx ww; 280 + int err; 281 + 282 + XE_BUG_ON(xe_gt_is_media_type(gt)); 283 + 284 + m = drmm_kzalloc(&xe->drm, sizeof(*m), GFP_KERNEL); 285 + if (!m) 286 + return ERR_PTR(-ENOMEM); 287 + 288 + m->gt = gt; 289 + 290 + /* Special layout, prepared below.. */ 291 + vm = xe_vm_create(xe, XE_VM_FLAG_MIGRATION | 292 + XE_VM_FLAG_SET_GT_ID(gt)); 293 + if (IS_ERR(vm)) 294 + return ERR_CAST(vm); 295 + 296 + xe_vm_lock(vm, &ww, 0, false); 297 + err = xe_migrate_prepare_vm(gt, m, vm); 298 + xe_vm_unlock(vm, &ww); 299 + if (err) { 300 + xe_vm_close_and_put(vm); 301 + return ERR_PTR(err); 302 + } 303 + 304 + if (xe->info.supports_usm) { 305 + struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, 306 + XE_ENGINE_CLASS_COPY, 307 + gt->usm.reserved_bcs_instance, 308 + false); 309 + if (!hwe) 310 + return ERR_PTR(-EINVAL); 311 + 312 + m->eng = xe_engine_create(xe, vm, 313 + BIT(hwe->logical_instance), 1, 314 + hwe, ENGINE_FLAG_KERNEL); 315 + } else { 316 + m->eng = xe_engine_create_class(xe, gt, vm, 317 + XE_ENGINE_CLASS_COPY, 318 + ENGINE_FLAG_KERNEL); 319 + } 320 + if (IS_ERR(m->eng)) { 321 + xe_vm_close_and_put(vm); 322 + return ERR_CAST(m->eng); 323 + } 324 + 325 + mutex_init(&m->job_mutex); 326 + 327 + err = drmm_add_action_or_reset(&xe->drm, xe_migrate_fini, m); 328 + if (err) 329 + return ERR_PTR(err); 330 + 331 + return m; 332 + } 333 + 334 + static void emit_arb_clear(struct xe_bb *bb) 335 + { 336 + /* 1 dword */ 337 + bb->cs[bb->len++] = MI_ARB_ON_OFF | MI_ARB_DISABLE; 338 + } 339 + 340 + static u64 xe_migrate_res_sizes(struct xe_res_cursor *cur) 341 + { 342 + /* 343 + * For VRAM we use identity mapped pages so we are limited to current 344 + * cursor size. For system we program the pages ourselves so we have no 345 + * such limitation. 346 + */ 347 + return min_t(u64, MAX_PREEMPTDISABLE_TRANSFER, 348 + mem_type_is_vram(cur->mem_type) ? cur->size : 349 + cur->remaining); 350 + } 351 + 352 + static u32 pte_update_size(struct xe_migrate *m, 353 + bool is_vram, 354 + struct xe_res_cursor *cur, 355 + u64 *L0, u64 *L0_ofs, u32 *L0_pt, 356 + u32 cmd_size, u32 pt_ofs, u32 avail_pts) 357 + { 358 + u32 cmds = 0; 359 + 360 + *L0_pt = pt_ofs; 361 + if (!is_vram) { 362 + /* Clip L0 to available size */ 363 + u64 size = min(*L0, (u64)avail_pts * SZ_2M); 364 + u64 num_4k_pages = DIV_ROUND_UP(size, GEN8_PAGE_SIZE); 365 + 366 + *L0 = size; 367 + *L0_ofs = xe_migrate_vm_addr(pt_ofs, 0); 368 + 369 + /* MI_STORE_DATA_IMM */ 370 + cmds += 3 * DIV_ROUND_UP(num_4k_pages, 0x1ff); 371 + 372 + /* PDE qwords */ 373 + cmds += num_4k_pages * 2; 374 + 375 + /* Each chunk has a single blit command */ 376 + cmds += cmd_size; 377 + } else { 378 + /* Offset into identity map. */ 379 + *L0_ofs = xe_migrate_vram_ofs(cur->start); 380 + cmds += cmd_size; 381 + } 382 + 383 + return cmds; 384 + } 385 + 386 + static void emit_pte(struct xe_migrate *m, 387 + struct xe_bb *bb, u32 at_pt, 388 + bool is_vram, 389 + struct xe_res_cursor *cur, 390 + u32 size, struct xe_bo *bo) 391 + { 392 + u32 ptes; 393 + u64 ofs = at_pt * GEN8_PAGE_SIZE; 394 + u64 cur_ofs; 395 + 396 + /* 397 + * FIXME: Emitting VRAM PTEs to L0 PTs is forbidden. Currently 398 + * we're only emitting VRAM PTEs during sanity tests, so when 399 + * that's moved to a Kunit test, we should condition VRAM PTEs 400 + * on running tests. 401 + */ 402 + 403 + ptes = DIV_ROUND_UP(size, GEN8_PAGE_SIZE); 404 + 405 + while (ptes) { 406 + u32 chunk = min(0x1ffU, ptes); 407 + 408 + bb->cs[bb->len++] = MI_STORE_DATA_IMM | BIT(21) | 409 + (chunk * 2 + 1); 410 + bb->cs[bb->len++] = ofs; 411 + bb->cs[bb->len++] = 0; 412 + 413 + cur_ofs = ofs; 414 + ofs += chunk * 8; 415 + ptes -= chunk; 416 + 417 + while (chunk--) { 418 + u64 addr; 419 + 420 + XE_BUG_ON(cur->start & (PAGE_SIZE - 1)); 421 + 422 + if (is_vram) { 423 + addr = cur->start; 424 + 425 + /* Is this a 64K PTE entry? */ 426 + if ((m->eng->vm->flags & XE_VM_FLAGS_64K) && 427 + !(cur_ofs & (16 * 8 - 1))) { 428 + XE_WARN_ON(!IS_ALIGNED(addr, SZ_64K)); 429 + addr |= GEN12_PTE_PS64; 430 + } 431 + 432 + addr |= GEN12_PPGTT_PTE_LM; 433 + } else { 434 + addr = xe_res_dma(cur); 435 + } 436 + addr |= PPAT_CACHED | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; 437 + bb->cs[bb->len++] = lower_32_bits(addr); 438 + bb->cs[bb->len++] = upper_32_bits(addr); 439 + 440 + xe_res_next(cur, PAGE_SIZE); 441 + cur_ofs += 8; 442 + } 443 + } 444 + } 445 + 446 + #define EMIT_COPY_CCS_DW 5 447 + static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb, 448 + u64 dst_ofs, bool dst_is_indirect, 449 + u64 src_ofs, bool src_is_indirect, 450 + u32 size) 451 + { 452 + u32 *cs = bb->cs + bb->len; 453 + u32 num_ccs_blks; 454 + u32 mocs = xe_mocs_index_to_value(gt->mocs.uc_index); 455 + 456 + num_ccs_blks = DIV_ROUND_UP(xe_device_ccs_bytes(gt_to_xe(gt), size), 457 + NUM_CCS_BYTES_PER_BLOCK); 458 + XE_BUG_ON(num_ccs_blks > NUM_CCS_BLKS_PER_XFER); 459 + *cs++ = XY_CTRL_SURF_COPY_BLT | 460 + (src_is_indirect ? 0x0 : 0x1) << SRC_ACCESS_TYPE_SHIFT | 461 + (dst_is_indirect ? 0x0 : 0x1) << DST_ACCESS_TYPE_SHIFT | 462 + ((num_ccs_blks - 1) & CCS_SIZE_MASK) << CCS_SIZE_SHIFT; 463 + *cs++ = lower_32_bits(src_ofs); 464 + *cs++ = upper_32_bits(src_ofs) | 465 + FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs); 466 + *cs++ = lower_32_bits(dst_ofs); 467 + *cs++ = upper_32_bits(dst_ofs) | 468 + FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs); 469 + 470 + bb->len = cs - bb->cs; 471 + } 472 + 473 + #define EMIT_COPY_DW 10 474 + static void emit_copy(struct xe_gt *gt, struct xe_bb *bb, 475 + u64 src_ofs, u64 dst_ofs, unsigned int size, 476 + unsigned pitch) 477 + { 478 + XE_BUG_ON(size / pitch > S16_MAX); 479 + XE_BUG_ON(pitch / 4 > S16_MAX); 480 + XE_BUG_ON(pitch > U16_MAX); 481 + 482 + bb->cs[bb->len++] = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2); 483 + bb->cs[bb->len++] = BLT_DEPTH_32 | pitch; 484 + bb->cs[bb->len++] = 0; 485 + bb->cs[bb->len++] = (size / pitch) << 16 | pitch / 4; 486 + bb->cs[bb->len++] = lower_32_bits(dst_ofs); 487 + bb->cs[bb->len++] = upper_32_bits(dst_ofs); 488 + bb->cs[bb->len++] = 0; 489 + bb->cs[bb->len++] = pitch; 490 + bb->cs[bb->len++] = lower_32_bits(src_ofs); 491 + bb->cs[bb->len++] = upper_32_bits(src_ofs); 492 + } 493 + 494 + static int job_add_deps(struct xe_sched_job *job, struct dma_resv *resv, 495 + enum dma_resv_usage usage) 496 + { 497 + return drm_sched_job_add_resv_dependencies(&job->drm, resv, usage); 498 + } 499 + 500 + static u64 xe_migrate_batch_base(struct xe_migrate *m, bool usm) 501 + { 502 + return usm ? m->usm_batch_base_ofs : m->batch_base_ofs; 503 + } 504 + 505 + static u32 xe_migrate_ccs_copy(struct xe_migrate *m, 506 + struct xe_bb *bb, 507 + u64 src_ofs, bool src_is_vram, 508 + u64 dst_ofs, bool dst_is_vram, u32 dst_size, 509 + u64 ccs_ofs, bool copy_ccs) 510 + { 511 + struct xe_gt *gt = m->gt; 512 + u32 flush_flags = 0; 513 + 514 + if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && dst_is_vram) { 515 + /* 516 + * If the bo doesn't have any CCS metadata attached, we still 517 + * need to clear it for security reasons. 518 + */ 519 + emit_copy_ccs(gt, bb, dst_ofs, true, m->cleared_vram_ofs, false, 520 + dst_size); 521 + flush_flags = MI_FLUSH_DW_CCS; 522 + } else if (copy_ccs) { 523 + if (!src_is_vram) 524 + src_ofs = ccs_ofs; 525 + else if (!dst_is_vram) 526 + dst_ofs = ccs_ofs; 527 + 528 + /* 529 + * At the moment, we don't support copying CCS metadata from 530 + * system to system. 531 + */ 532 + XE_BUG_ON(!src_is_vram && !dst_is_vram); 533 + 534 + emit_copy_ccs(gt, bb, dst_ofs, dst_is_vram, src_ofs, 535 + src_is_vram, dst_size); 536 + if (dst_is_vram) 537 + flush_flags = MI_FLUSH_DW_CCS; 538 + } 539 + 540 + return flush_flags; 541 + } 542 + 543 + struct dma_fence *xe_migrate_copy(struct xe_migrate *m, 544 + struct xe_bo *bo, 545 + struct ttm_resource *src, 546 + struct ttm_resource *dst) 547 + { 548 + struct xe_gt *gt = m->gt; 549 + struct xe_device *xe = gt_to_xe(gt); 550 + struct dma_fence *fence = NULL; 551 + u64 size = bo->size; 552 + struct xe_res_cursor src_it, dst_it, ccs_it; 553 + u64 src_L0_ofs, dst_L0_ofs; 554 + u32 src_L0_pt, dst_L0_pt; 555 + u64 src_L0, dst_L0; 556 + int pass = 0; 557 + int err; 558 + bool src_is_vram = mem_type_is_vram(src->mem_type); 559 + bool dst_is_vram = mem_type_is_vram(dst->mem_type); 560 + bool copy_ccs = xe_device_has_flat_ccs(xe) && xe_bo_needs_ccs_pages(bo); 561 + bool copy_system_ccs = copy_ccs && (!src_is_vram || !dst_is_vram); 562 + 563 + if (!src_is_vram) 564 + xe_res_first_sg(xe_bo_get_sg(bo), 0, bo->size, &src_it); 565 + else 566 + xe_res_first(src, 0, bo->size, &src_it); 567 + if (!dst_is_vram) 568 + xe_res_first_sg(xe_bo_get_sg(bo), 0, bo->size, &dst_it); 569 + else 570 + xe_res_first(dst, 0, bo->size, &dst_it); 571 + 572 + if (copy_system_ccs) 573 + xe_res_first_sg(xe_bo_get_sg(bo), xe_bo_ccs_pages_start(bo), 574 + PAGE_ALIGN(xe_device_ccs_bytes(xe, size)), 575 + &ccs_it); 576 + 577 + while (size) { 578 + u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */ 579 + struct xe_sched_job *job; 580 + struct xe_bb *bb; 581 + u32 flush_flags; 582 + u32 update_idx; 583 + u64 ccs_ofs, ccs_size; 584 + u32 ccs_pt; 585 + bool usm = xe->info.supports_usm; 586 + 587 + src_L0 = xe_migrate_res_sizes(&src_it); 588 + dst_L0 = xe_migrate_res_sizes(&dst_it); 589 + 590 + drm_dbg(&xe->drm, "Pass %u, sizes: %llu & %llu\n", 591 + pass++, src_L0, dst_L0); 592 + 593 + src_L0 = min(src_L0, dst_L0); 594 + 595 + batch_size += pte_update_size(m, src_is_vram, &src_it, &src_L0, 596 + &src_L0_ofs, &src_L0_pt, 0, 0, 597 + NUM_PT_PER_BLIT); 598 + 599 + batch_size += pte_update_size(m, dst_is_vram, &dst_it, &src_L0, 600 + &dst_L0_ofs, &dst_L0_pt, 0, 601 + NUM_PT_PER_BLIT, NUM_PT_PER_BLIT); 602 + 603 + if (copy_system_ccs) { 604 + ccs_size = xe_device_ccs_bytes(xe, src_L0); 605 + batch_size += pte_update_size(m, false, &ccs_it, &ccs_size, 606 + &ccs_ofs, &ccs_pt, 0, 607 + 2 * NUM_PT_PER_BLIT, 608 + NUM_PT_PER_BLIT); 609 + } 610 + 611 + /* Add copy commands size here */ 612 + batch_size += EMIT_COPY_DW + 613 + (xe_device_has_flat_ccs(xe) ? EMIT_COPY_CCS_DW : 0); 614 + 615 + bb = xe_bb_new(gt, batch_size, usm); 616 + if (IS_ERR(bb)) { 617 + err = PTR_ERR(bb); 618 + goto err_sync; 619 + } 620 + 621 + /* Preemption is enabled again by the ring ops. */ 622 + if (!src_is_vram || !dst_is_vram) 623 + emit_arb_clear(bb); 624 + 625 + if (!src_is_vram) 626 + emit_pte(m, bb, src_L0_pt, src_is_vram, &src_it, src_L0, 627 + bo); 628 + else 629 + xe_res_next(&src_it, src_L0); 630 + 631 + if (!dst_is_vram) 632 + emit_pte(m, bb, dst_L0_pt, dst_is_vram, &dst_it, src_L0, 633 + bo); 634 + else 635 + xe_res_next(&dst_it, src_L0); 636 + 637 + if (copy_system_ccs) 638 + emit_pte(m, bb, ccs_pt, false, &ccs_it, ccs_size, bo); 639 + 640 + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; 641 + update_idx = bb->len; 642 + 643 + emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, GEN8_PAGE_SIZE); 644 + flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, src_is_vram, 645 + dst_L0_ofs, dst_is_vram, 646 + src_L0, ccs_ofs, copy_ccs); 647 + 648 + mutex_lock(&m->job_mutex); 649 + job = xe_bb_create_migration_job(m->eng, bb, 650 + xe_migrate_batch_base(m, usm), 651 + update_idx); 652 + if (IS_ERR(job)) { 653 + err = PTR_ERR(job); 654 + goto err; 655 + } 656 + 657 + xe_sched_job_add_migrate_flush(job, flush_flags); 658 + if (!fence) { 659 + err = job_add_deps(job, bo->ttm.base.resv, 660 + DMA_RESV_USAGE_BOOKKEEP); 661 + if (err) 662 + goto err_job; 663 + } 664 + 665 + xe_sched_job_arm(job); 666 + dma_fence_put(fence); 667 + fence = dma_fence_get(&job->drm.s_fence->finished); 668 + xe_sched_job_push(job); 669 + 670 + dma_fence_put(m->fence); 671 + m->fence = dma_fence_get(fence); 672 + 673 + mutex_unlock(&m->job_mutex); 674 + 675 + xe_bb_free(bb, fence); 676 + size -= src_L0; 677 + continue; 678 + 679 + err_job: 680 + xe_sched_job_put(job); 681 + err: 682 + mutex_unlock(&m->job_mutex); 683 + xe_bb_free(bb, NULL); 684 + 685 + err_sync: 686 + /* Sync partial copy if any. */ 687 + if (fence) { 688 + dma_fence_wait(fence, false); 689 + dma_fence_put(fence); 690 + } 691 + 692 + return ERR_PTR(err); 693 + } 694 + 695 + return fence; 696 + } 697 + 698 + static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, 699 + u32 size, u32 pitch, u32 value, bool is_vram) 700 + { 701 + u32 *cs = bb->cs + bb->len; 702 + u32 len = XY_FAST_COLOR_BLT_DW; 703 + u32 mocs = xe_mocs_index_to_value(gt->mocs.uc_index); 704 + 705 + if (GRAPHICS_VERx100(gt->xe) < 1250) 706 + len = 11; 707 + 708 + *cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 | 709 + (len - 2); 710 + *cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) | 711 + (pitch - 1); 712 + *cs++ = 0; 713 + *cs++ = (size / pitch) << 16 | pitch / 4; 714 + *cs++ = lower_32_bits(src_ofs); 715 + *cs++ = upper_32_bits(src_ofs); 716 + *cs++ = (is_vram ? 0x0 : 0x1) << XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT; 717 + *cs++ = value; 718 + *cs++ = 0; 719 + *cs++ = 0; 720 + *cs++ = 0; 721 + 722 + if (len > 11) { 723 + *cs++ = 0; 724 + *cs++ = 0; 725 + *cs++ = 0; 726 + *cs++ = 0; 727 + *cs++ = 0; 728 + } 729 + 730 + XE_BUG_ON(cs - bb->cs != len + bb->len); 731 + bb->len += len; 732 + 733 + return 0; 734 + } 735 + 736 + struct dma_fence *xe_migrate_clear(struct xe_migrate *m, 737 + struct xe_bo *bo, 738 + struct ttm_resource *dst, 739 + u32 value) 740 + { 741 + bool clear_vram = mem_type_is_vram(dst->mem_type); 742 + struct xe_gt *gt = m->gt; 743 + struct xe_device *xe = gt_to_xe(gt); 744 + struct dma_fence *fence = NULL; 745 + u64 size = bo->size; 746 + struct xe_res_cursor src_it; 747 + struct ttm_resource *src = dst; 748 + int err; 749 + int pass = 0; 750 + 751 + if (!clear_vram) 752 + xe_res_first_sg(xe_bo_get_sg(bo), 0, bo->size, &src_it); 753 + else 754 + xe_res_first(src, 0, bo->size, &src_it); 755 + 756 + while (size) { 757 + u64 clear_L0_ofs; 758 + u32 clear_L0_pt; 759 + u32 flush_flags = 0; 760 + u64 clear_L0; 761 + struct xe_sched_job *job; 762 + struct xe_bb *bb; 763 + u32 batch_size, update_idx; 764 + bool usm = xe->info.supports_usm; 765 + 766 + clear_L0 = xe_migrate_res_sizes(&src_it); 767 + drm_dbg(&xe->drm, "Pass %u, size: %llu\n", pass++, clear_L0); 768 + 769 + /* Calculate final sizes and batch size.. */ 770 + batch_size = 2 + 771 + pte_update_size(m, clear_vram, &src_it, 772 + &clear_L0, &clear_L0_ofs, &clear_L0_pt, 773 + XY_FAST_COLOR_BLT_DW, 0, NUM_PT_PER_BLIT); 774 + if (xe_device_has_flat_ccs(xe) && clear_vram) 775 + batch_size += EMIT_COPY_CCS_DW; 776 + 777 + /* Clear commands */ 778 + 779 + if (WARN_ON_ONCE(!clear_L0)) 780 + break; 781 + 782 + bb = xe_bb_new(gt, batch_size, usm); 783 + if (IS_ERR(bb)) { 784 + err = PTR_ERR(bb); 785 + goto err_sync; 786 + } 787 + 788 + size -= clear_L0; 789 + 790 + /* TODO: Add dependencies here */ 791 + 792 + /* Preemption is enabled again by the ring ops. */ 793 + if (!clear_vram) { 794 + emit_arb_clear(bb); 795 + emit_pte(m, bb, clear_L0_pt, clear_vram, &src_it, clear_L0, 796 + bo); 797 + } else { 798 + xe_res_next(&src_it, clear_L0); 799 + } 800 + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; 801 + update_idx = bb->len; 802 + 803 + emit_clear(gt, bb, clear_L0_ofs, clear_L0, GEN8_PAGE_SIZE, 804 + value, clear_vram); 805 + if (xe_device_has_flat_ccs(xe) && clear_vram) { 806 + emit_copy_ccs(gt, bb, clear_L0_ofs, true, 807 + m->cleared_vram_ofs, false, clear_L0); 808 + flush_flags = MI_FLUSH_DW_CCS; 809 + } 810 + 811 + mutex_lock(&m->job_mutex); 812 + job = xe_bb_create_migration_job(m->eng, bb, 813 + xe_migrate_batch_base(m, usm), 814 + update_idx); 815 + if (IS_ERR(job)) { 816 + err = PTR_ERR(job); 817 + goto err; 818 + } 819 + 820 + xe_sched_job_add_migrate_flush(job, flush_flags); 821 + 822 + xe_sched_job_arm(job); 823 + dma_fence_put(fence); 824 + fence = dma_fence_get(&job->drm.s_fence->finished); 825 + xe_sched_job_push(job); 826 + 827 + dma_fence_put(m->fence); 828 + m->fence = dma_fence_get(fence); 829 + 830 + mutex_unlock(&m->job_mutex); 831 + 832 + xe_bb_free(bb, fence); 833 + continue; 834 + 835 + err: 836 + mutex_unlock(&m->job_mutex); 837 + xe_bb_free(bb, NULL); 838 + err_sync: 839 + /* Sync partial copies if any. */ 840 + if (fence) { 841 + dma_fence_wait(m->fence, false); 842 + dma_fence_put(fence); 843 + } 844 + 845 + return ERR_PTR(err); 846 + } 847 + 848 + return fence; 849 + } 850 + 851 + static void write_pgtable(struct xe_gt *gt, struct xe_bb *bb, u64 ppgtt_ofs, 852 + const struct xe_vm_pgtable_update *update, 853 + struct xe_migrate_pt_update *pt_update) 854 + { 855 + const struct xe_migrate_pt_update_ops *ops = pt_update->ops; 856 + u32 chunk; 857 + u32 ofs = update->ofs, size = update->qwords; 858 + 859 + /* 860 + * If we have 512 entries (max), we would populate it ourselves, 861 + * and update the PDE above it to the new pointer. 862 + * The only time this can only happen if we have to update the top 863 + * PDE. This requires a BO that is almost vm->size big. 864 + * 865 + * This shouldn't be possible in practice.. might change when 16K 866 + * pages are used. Hence the BUG_ON. 867 + */ 868 + XE_BUG_ON(update->qwords > 0x1ff); 869 + if (!ppgtt_ofs) { 870 + bool is_lmem; 871 + 872 + ppgtt_ofs = xe_migrate_vram_ofs(xe_bo_addr(update->pt_bo, 0, 873 + GEN8_PAGE_SIZE, 874 + &is_lmem)); 875 + XE_BUG_ON(!is_lmem); 876 + } 877 + 878 + do { 879 + u64 addr = ppgtt_ofs + ofs * 8; 880 + chunk = min(update->qwords, 0x1ffU); 881 + 882 + /* Ensure populatefn can do memset64 by aligning bb->cs */ 883 + if (!(bb->len & 1)) 884 + bb->cs[bb->len++] = MI_NOOP; 885 + 886 + bb->cs[bb->len++] = MI_STORE_DATA_IMM | BIT(21) | 887 + (chunk * 2 + 1); 888 + bb->cs[bb->len++] = lower_32_bits(addr); 889 + bb->cs[bb->len++] = upper_32_bits(addr); 890 + ops->populate(pt_update, gt, NULL, bb->cs + bb->len, ofs, chunk, 891 + update); 892 + 893 + bb->len += chunk * 2; 894 + ofs += chunk; 895 + size -= chunk; 896 + } while (size); 897 + } 898 + 899 + struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m) 900 + { 901 + return xe_vm_get(m->eng->vm); 902 + } 903 + 904 + static struct dma_fence * 905 + xe_migrate_update_pgtables_cpu(struct xe_migrate *m, 906 + struct xe_vm *vm, struct xe_bo *bo, 907 + const struct xe_vm_pgtable_update *updates, 908 + u32 num_updates, bool wait_vm, 909 + struct xe_migrate_pt_update *pt_update) 910 + { 911 + const struct xe_migrate_pt_update_ops *ops = pt_update->ops; 912 + struct dma_fence *fence; 913 + int err; 914 + u32 i; 915 + 916 + /* Wait on BO moves for 10 ms, then fall back to GPU job */ 917 + if (bo) { 918 + long wait; 919 + 920 + wait = dma_resv_wait_timeout(bo->ttm.base.resv, 921 + DMA_RESV_USAGE_KERNEL, 922 + true, HZ / 100); 923 + if (wait <= 0) 924 + return ERR_PTR(-ETIME); 925 + } 926 + if (wait_vm) { 927 + long wait; 928 + 929 + wait = dma_resv_wait_timeout(&vm->resv, 930 + DMA_RESV_USAGE_BOOKKEEP, 931 + true, HZ / 100); 932 + if (wait <= 0) 933 + return ERR_PTR(-ETIME); 934 + } 935 + 936 + if (ops->pre_commit) { 937 + err = ops->pre_commit(pt_update); 938 + if (err) 939 + return ERR_PTR(err); 940 + } 941 + for (i = 0; i < num_updates; i++) { 942 + const struct xe_vm_pgtable_update *update = &updates[i]; 943 + 944 + ops->populate(pt_update, m->gt, &update->pt_bo->vmap, NULL, 945 + update->ofs, update->qwords, update); 946 + } 947 + 948 + trace_xe_vm_cpu_bind(vm); 949 + xe_device_wmb(vm->xe); 950 + 951 + fence = dma_fence_get_stub(); 952 + 953 + return fence; 954 + } 955 + 956 + static bool no_in_syncs(struct xe_sync_entry *syncs, u32 num_syncs) 957 + { 958 + int i; 959 + 960 + for (i = 0; i < num_syncs; i++) { 961 + struct dma_fence *fence = syncs[i].fence; 962 + 963 + if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, 964 + &fence->flags)) 965 + return false; 966 + } 967 + 968 + return true; 969 + } 970 + 971 + static bool engine_is_idle(struct xe_engine *e) 972 + { 973 + return !e || e->lrc[0].fence_ctx.next_seqno == 1 || 974 + xe_lrc_seqno(&e->lrc[0]) == e->lrc[0].fence_ctx.next_seqno; 975 + } 976 + 977 + struct dma_fence * 978 + xe_migrate_update_pgtables(struct xe_migrate *m, 979 + struct xe_vm *vm, 980 + struct xe_bo *bo, 981 + struct xe_engine *eng, 982 + const struct xe_vm_pgtable_update *updates, 983 + u32 num_updates, 984 + struct xe_sync_entry *syncs, u32 num_syncs, 985 + struct xe_migrate_pt_update *pt_update) 986 + { 987 + const struct xe_migrate_pt_update_ops *ops = pt_update->ops; 988 + struct xe_gt *gt = m->gt; 989 + struct xe_device *xe = gt_to_xe(gt); 990 + struct xe_sched_job *job; 991 + struct dma_fence *fence; 992 + struct drm_suballoc *sa_bo = NULL; 993 + struct xe_vma *vma = pt_update->vma; 994 + struct xe_bb *bb; 995 + u32 i, batch_size, ppgtt_ofs, update_idx, page_ofs = 0; 996 + u64 addr; 997 + int err = 0; 998 + bool usm = !eng && xe->info.supports_usm; 999 + bool first_munmap_rebind = vma && vma->first_munmap_rebind; 1000 + 1001 + /* Use the CPU if no in syncs and engine is idle */ 1002 + if (no_in_syncs(syncs, num_syncs) && engine_is_idle(eng)) { 1003 + fence = xe_migrate_update_pgtables_cpu(m, vm, bo, updates, 1004 + num_updates, 1005 + first_munmap_rebind, 1006 + pt_update); 1007 + if (!IS_ERR(fence) || fence == ERR_PTR(-EAGAIN)) 1008 + return fence; 1009 + } 1010 + 1011 + /* fixed + PTE entries */ 1012 + if (IS_DGFX(xe)) 1013 + batch_size = 2; 1014 + else 1015 + batch_size = 6 + num_updates * 2; 1016 + 1017 + for (i = 0; i < num_updates; i++) { 1018 + u32 num_cmds = DIV_ROUND_UP(updates[i].qwords, 0x1ff); 1019 + 1020 + /* align noop + MI_STORE_DATA_IMM cmd prefix */ 1021 + batch_size += 4 * num_cmds + updates[i].qwords * 2; 1022 + } 1023 + 1024 + /* 1025 + * XXX: Create temp bo to copy from, if batch_size becomes too big? 1026 + * 1027 + * Worst case: Sum(2 * (each lower level page size) + (top level page size)) 1028 + * Should be reasonably bound.. 1029 + */ 1030 + XE_BUG_ON(batch_size >= SZ_128K); 1031 + 1032 + bb = xe_bb_new(gt, batch_size, !eng && xe->info.supports_usm); 1033 + if (IS_ERR(bb)) 1034 + return ERR_CAST(bb); 1035 + 1036 + /* For sysmem PTE's, need to map them in our hole.. */ 1037 + if (!IS_DGFX(xe)) { 1038 + ppgtt_ofs = NUM_KERNEL_PDE - 1; 1039 + if (eng) { 1040 + XE_BUG_ON(num_updates > NUM_VMUSA_WRITES_PER_UNIT); 1041 + 1042 + sa_bo = drm_suballoc_new(&m->vm_update_sa, 1, 1043 + GFP_KERNEL, true, 0); 1044 + if (IS_ERR(sa_bo)) { 1045 + err = PTR_ERR(sa_bo); 1046 + goto err; 1047 + } 1048 + 1049 + ppgtt_ofs = NUM_KERNEL_PDE + 1050 + (drm_suballoc_soffset(sa_bo) / 1051 + NUM_VMUSA_UNIT_PER_PAGE); 1052 + page_ofs = (drm_suballoc_soffset(sa_bo) % 1053 + NUM_VMUSA_UNIT_PER_PAGE) * 1054 + VM_SA_UPDATE_UNIT_SIZE; 1055 + } 1056 + 1057 + /* Preemption is enabled again by the ring ops. */ 1058 + emit_arb_clear(bb); 1059 + 1060 + /* Map our PT's to gtt */ 1061 + bb->cs[bb->len++] = MI_STORE_DATA_IMM | BIT(21) | 1062 + (num_updates * 2 + 1); 1063 + bb->cs[bb->len++] = ppgtt_ofs * GEN8_PAGE_SIZE + page_ofs; 1064 + bb->cs[bb->len++] = 0; /* upper_32_bits */ 1065 + 1066 + for (i = 0; i < num_updates; i++) { 1067 + struct xe_bo *pt_bo = updates[i].pt_bo; 1068 + 1069 + BUG_ON(pt_bo->size != SZ_4K); 1070 + 1071 + addr = gen8_pte_encode(NULL, pt_bo, 0, XE_CACHE_WB, 1072 + 0, 0); 1073 + bb->cs[bb->len++] = lower_32_bits(addr); 1074 + bb->cs[bb->len++] = upper_32_bits(addr); 1075 + } 1076 + 1077 + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; 1078 + update_idx = bb->len; 1079 + 1080 + addr = xe_migrate_vm_addr(ppgtt_ofs, 0) + 1081 + (page_ofs / sizeof(u64)) * GEN8_PAGE_SIZE; 1082 + for (i = 0; i < num_updates; i++) 1083 + write_pgtable(m->gt, bb, addr + i * GEN8_PAGE_SIZE, 1084 + &updates[i], pt_update); 1085 + } else { 1086 + /* phys pages, no preamble required */ 1087 + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; 1088 + update_idx = bb->len; 1089 + 1090 + /* Preemption is enabled again by the ring ops. */ 1091 + emit_arb_clear(bb); 1092 + for (i = 0; i < num_updates; i++) 1093 + write_pgtable(m->gt, bb, 0, &updates[i], pt_update); 1094 + } 1095 + 1096 + if (!eng) 1097 + mutex_lock(&m->job_mutex); 1098 + 1099 + job = xe_bb_create_migration_job(eng ?: m->eng, bb, 1100 + xe_migrate_batch_base(m, usm), 1101 + update_idx); 1102 + if (IS_ERR(job)) { 1103 + err = PTR_ERR(job); 1104 + goto err_bb; 1105 + } 1106 + 1107 + /* Wait on BO move */ 1108 + if (bo) { 1109 + err = job_add_deps(job, bo->ttm.base.resv, 1110 + DMA_RESV_USAGE_KERNEL); 1111 + if (err) 1112 + goto err_job; 1113 + } 1114 + 1115 + /* 1116 + * Munmap style VM unbind, need to wait for all jobs to be complete / 1117 + * trigger preempts before moving forward 1118 + */ 1119 + if (first_munmap_rebind) { 1120 + err = job_add_deps(job, &vm->resv, 1121 + DMA_RESV_USAGE_BOOKKEEP); 1122 + if (err) 1123 + goto err_job; 1124 + } 1125 + 1126 + for (i = 0; !err && i < num_syncs; i++) 1127 + err = xe_sync_entry_add_deps(&syncs[i], job); 1128 + 1129 + if (err) 1130 + goto err_job; 1131 + 1132 + if (ops->pre_commit) { 1133 + err = ops->pre_commit(pt_update); 1134 + if (err) 1135 + goto err_job; 1136 + } 1137 + xe_sched_job_arm(job); 1138 + fence = dma_fence_get(&job->drm.s_fence->finished); 1139 + xe_sched_job_push(job); 1140 + 1141 + if (!eng) 1142 + mutex_unlock(&m->job_mutex); 1143 + 1144 + xe_bb_free(bb, fence); 1145 + drm_suballoc_free(sa_bo, fence); 1146 + 1147 + return fence; 1148 + 1149 + err_job: 1150 + xe_sched_job_put(job); 1151 + err_bb: 1152 + if (!eng) 1153 + mutex_unlock(&m->job_mutex); 1154 + xe_bb_free(bb, NULL); 1155 + err: 1156 + drm_suballoc_free(sa_bo, NULL); 1157 + return ERR_PTR(err); 1158 + } 1159 + 1160 + void xe_migrate_wait(struct xe_migrate *m) 1161 + { 1162 + if (m->fence) 1163 + dma_fence_wait(m->fence, false); 1164 + } 1165 + 1166 + #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) 1167 + #include "tests/xe_migrate.c" 1168 + #endif

+88

drivers/gpu/drm/xe/xe_migrate.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2020 Intel Corporation 4 + */ 5 + 6 + #ifndef __XE_MIGRATE__ 7 + #define __XE_MIGRATE__ 8 + 9 + #include <drm/drm_mm.h> 10 + 11 + struct dma_fence; 12 + struct iosys_map; 13 + struct ttm_resource; 14 + 15 + struct xe_bo; 16 + struct xe_gt; 17 + struct xe_engine; 18 + struct xe_migrate; 19 + struct xe_migrate_pt_update; 20 + struct xe_sync_entry; 21 + struct xe_pt; 22 + struct xe_vm; 23 + struct xe_vm_pgtable_update; 24 + struct xe_vma; 25 + 26 + struct xe_migrate_pt_update_ops { 27 + /** 28 + * populate() - Populate a command buffer or page-table with ptes. 29 + * @pt_update: Embeddable callback argument. 30 + * @gt: The gt for the current operation. 31 + * @map: struct iosys_map into the memory to be populated. 32 + * @pos: If @map is NULL, map into the memory to be populated. 33 + * @ofs: qword offset into @map, unused if @map is NULL. 34 + * @num_qwords: Number of qwords to write. 35 + * @update: Information about the PTEs to be inserted. 36 + * 37 + * This interface is intended to be used as a callback into the 38 + * page-table system to populate command buffers or shared 39 + * page-tables with PTEs. 40 + */ 41 + void (*populate)(struct xe_migrate_pt_update *pt_update, 42 + struct xe_gt *gt, struct iosys_map *map, 43 + void *pos, u32 ofs, u32 num_qwords, 44 + const struct xe_vm_pgtable_update *update); 45 + 46 + /** 47 + * pre_commit(): Callback to be called just before arming the 48 + * sched_job. 49 + * @pt_update: Pointer to embeddable callback argument. 50 + * 51 + * Return: 0 on success, negative error code on error. 52 + */ 53 + int (*pre_commit)(struct xe_migrate_pt_update *pt_update); 54 + }; 55 + 56 + struct xe_migrate_pt_update { 57 + const struct xe_migrate_pt_update_ops *ops; 58 + struct xe_vma *vma; 59 + }; 60 + 61 + struct xe_migrate *xe_migrate_init(struct xe_gt *gt); 62 + 63 + struct dma_fence *xe_migrate_copy(struct xe_migrate *m, 64 + struct xe_bo *bo, 65 + struct ttm_resource *src, 66 + struct ttm_resource *dst); 67 + 68 + struct dma_fence *xe_migrate_clear(struct xe_migrate *m, 69 + struct xe_bo *bo, 70 + struct ttm_resource *dst, 71 + u32 value); 72 + 73 + struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m); 74 + 75 + struct dma_fence * 76 + xe_migrate_update_pgtables(struct xe_migrate *m, 77 + struct xe_vm *vm, 78 + struct xe_bo *bo, 79 + struct xe_engine *eng, 80 + const struct xe_vm_pgtable_update *updates, 81 + u32 num_updates, 82 + struct xe_sync_entry *syncs, u32 num_syncs, 83 + struct xe_migrate_pt_update *pt_update); 84 + 85 + void xe_migrate_wait(struct xe_migrate *m); 86 + 87 + struct xe_engine *xe_gt_migrate_engine(struct xe_gt *gt); 88 + #endif

+88

drivers/gpu/drm/xe/xe_migrate_doc.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_MIGRATE_DOC_H_ 7 + #define _XE_MIGRATE_DOC_H_ 8 + 9 + /** 10 + * DOC: Migrate Layer 11 + * 12 + * The XE migrate layer is used generate jobs which can copy memory (eviction), 13 + * clear memory, or program tables (binds). This layer exists in every GT, has 14 + * a migrate engine, and uses a special VM for all generated jobs. 15 + * 16 + * Special VM details 17 + * ================== 18 + * 19 + * The special VM is configured with a page structure where we can dynamically 20 + * map BOs which need to be copied and cleared, dynamically map other VM's page 21 + * table BOs for updates, and identity map the entire device's VRAM with 1 GB 22 + * pages. 23 + * 24 + * Currently the page structure consists of 48 phyiscal pages with 16 being 25 + * reserved for BO mapping during copies and clear, 1 reserved for kernel binds, 26 + * several pages are needed to setup the identity mappings (exact number based 27 + * on how many bits of address space the device has), and the rest are reserved 28 + * user bind operations. 29 + * 30 + * TODO: Diagram of layout 31 + * 32 + * Bind jobs 33 + * ========= 34 + * 35 + * A bind job consist of two batches and runs either on the migrate engine 36 + * (kernel binds) or the bind engine passed in (user binds). In both cases the 37 + * VM of the engine is the migrate VM. 38 + * 39 + * The first batch is used to update the migration VM page structure to point to 40 + * the bind VM page table BOs which need to be updated. A physical page is 41 + * required for this. If it is a user bind, the page is allocated from pool of 42 + * pages reserved user bind operations with drm_suballoc managing this pool. If 43 + * it is a kernel bind, the page reserved for kernel binds is used. 44 + * 45 + * The first batch is only required for devices without VRAM as when the device 46 + * has VRAM the bind VM page table BOs are in VRAM and the identity mapping can 47 + * be used. 48 + * 49 + * The second batch is used to program page table updated in the bind VM. Why 50 + * not just one batch? Well the TLBs need to be invalidated between these two 51 + * batches and that only can be done from the ring. 52 + * 53 + * When the bind job complete, the page allocated is returned the pool of pages 54 + * reserved for user bind operations if a user bind. No need do this for kernel 55 + * binds as the reserved kernel page is serially used by each job. 56 + * 57 + * Copy / clear jobs 58 + * ================= 59 + * 60 + * A copy or clear job consist of two batches and runs on the migrate engine. 61 + * 62 + * Like binds, the first batch is used update the migration VM page structure. 63 + * In copy jobs, we need to map the source and destination of the BO into page 64 + * the structure. In clear jobs, we just need to add 1 mapping of BO into the 65 + * page structure. We use the 16 reserved pages in migration VM for mappings, 66 + * this gives us a maximum copy size of 16 MB and maximum clear size of 32 MB. 67 + * 68 + * The second batch is used do either do the copy or clear. Again similar to 69 + * binds, two batches are required as the TLBs need to be invalidated from the 70 + * ring between the batches. 71 + * 72 + * More than one job will be generated if the BO is larger than maximum copy / 73 + * clear size. 74 + * 75 + * Future work 76 + * =========== 77 + * 78 + * Update copy and clear code to use identity mapped VRAM. 79 + * 80 + * Can we rework the use of the pages async binds to use all the entries in each 81 + * page? 82 + * 83 + * Using large pages for sysmem mappings. 84 + * 85 + * Is it possible to identity map the sysmem? We should explore this. 86 + */ 87 + 88 + #endif

+466

drivers/gpu/drm/xe/xe_mmio.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2021 Intel Corporation 4 + */ 5 + 6 + #include "xe_mmio.h" 7 + 8 + #include <drm/drm_managed.h> 9 + #include <drm/xe_drm.h> 10 + 11 + #include "xe_device.h" 12 + #include "xe_gt.h" 13 + #include "xe_gt_mcr.h" 14 + #include "xe_macros.h" 15 + #include "xe_module.h" 16 + 17 + #include "i915_reg.h" 18 + #include "gt/intel_engine_regs.h" 19 + #include "gt/intel_gt_regs.h" 20 + 21 + #define XEHP_MTCFG_ADDR _MMIO(0x101800) 22 + #define TILE_COUNT REG_GENMASK(15, 8) 23 + #define GEN12_LMEM_BAR 2 24 + 25 + static int xe_set_dma_info(struct xe_device *xe) 26 + { 27 + unsigned int mask_size = xe->info.dma_mask_size; 28 + int err; 29 + 30 + /* 31 + * We don't have a max segment size, so set it to the max so sg's 32 + * debugging layer doesn't complain 33 + */ 34 + dma_set_max_seg_size(xe->drm.dev, UINT_MAX); 35 + 36 + err = dma_set_mask(xe->drm.dev, DMA_BIT_MASK(mask_size)); 37 + if (err) 38 + goto mask_err; 39 + 40 + err = dma_set_coherent_mask(xe->drm.dev, DMA_BIT_MASK(mask_size)); 41 + if (err) 42 + goto mask_err; 43 + 44 + return 0; 45 + 46 + mask_err: 47 + drm_err(&xe->drm, "Can't set DMA mask/consistent mask (%d)\n", err); 48 + return err; 49 + } 50 + 51 + #ifdef CONFIG_64BIT 52 + static int 53 + _resize_bar(struct xe_device *xe, int resno, resource_size_t size) 54 + { 55 + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 56 + int bar_size = pci_rebar_bytes_to_size(size); 57 + int ret; 58 + 59 + if (pci_resource_len(pdev, resno)) 60 + pci_release_resource(pdev, resno); 61 + 62 + ret = pci_resize_resource(pdev, resno, bar_size); 63 + if (ret) { 64 + drm_info(&xe->drm, "Failed to resize BAR%d to %dM (%pe)\n", 65 + resno, 1 << bar_size, ERR_PTR(ret)); 66 + return -1; 67 + } 68 + 69 + drm_info(&xe->drm, "BAR%d resized to %dM\n", resno, 1 << bar_size); 70 + return 1; 71 + } 72 + 73 + static int xe_resize_lmem_bar(struct xe_device *xe, resource_size_t lmem_size) 74 + { 75 + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 76 + struct pci_bus *root = pdev->bus; 77 + struct resource *root_res; 78 + resource_size_t rebar_size; 79 + resource_size_t current_size; 80 + u32 pci_cmd; 81 + int i; 82 + int ret; 83 + u64 force_lmem_bar_size = xe_force_lmem_bar_size; 84 + 85 + current_size = roundup_pow_of_two(pci_resource_len(pdev, GEN12_LMEM_BAR)); 86 + 87 + if (force_lmem_bar_size) { 88 + u32 bar_sizes; 89 + 90 + rebar_size = force_lmem_bar_size * (resource_size_t)SZ_1M; 91 + bar_sizes = pci_rebar_get_possible_sizes(pdev, GEN12_LMEM_BAR); 92 + 93 + if (rebar_size == current_size) 94 + return 0; 95 + 96 + if (!(bar_sizes & BIT(pci_rebar_bytes_to_size(rebar_size))) || 97 + rebar_size >= roundup_pow_of_two(lmem_size)) { 98 + rebar_size = lmem_size; 99 + drm_info(&xe->drm, 100 + "Given bar size is not within supported size, setting it to default: %llu\n", 101 + (u64)lmem_size >> 20); 102 + } 103 + } else { 104 + rebar_size = current_size; 105 + 106 + if (rebar_size != roundup_pow_of_two(lmem_size)) 107 + rebar_size = lmem_size; 108 + else 109 + return 0; 110 + } 111 + 112 + while (root->parent) 113 + root = root->parent; 114 + 115 + pci_bus_for_each_resource(root, root_res, i) { 116 + if (root_res && root_res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && 117 + root_res->start > 0x100000000ull) 118 + break; 119 + } 120 + 121 + if (!root_res) { 122 + drm_info(&xe->drm, "Can't resize LMEM BAR - platform support is missing\n"); 123 + return -1; 124 + } 125 + 126 + pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd); 127 + pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd & ~PCI_COMMAND_MEMORY); 128 + 129 + ret = _resize_bar(xe, GEN12_LMEM_BAR, rebar_size); 130 + 131 + pci_assign_unassigned_bus_resources(pdev->bus); 132 + pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd); 133 + return ret; 134 + } 135 + #else 136 + static int xe_resize_lmem_bar(struct xe_device *xe, resource_size_t lmem_size) { return 0; } 137 + #endif 138 + 139 + static bool xe_pci_resource_valid(struct pci_dev *pdev, int bar) 140 + { 141 + if (!pci_resource_flags(pdev, bar)) 142 + return false; 143 + 144 + if (pci_resource_flags(pdev, bar) & IORESOURCE_UNSET) 145 + return false; 146 + 147 + if (!pci_resource_len(pdev, bar)) 148 + return false; 149 + 150 + return true; 151 + } 152 + 153 + int xe_mmio_probe_vram(struct xe_device *xe) 154 + { 155 + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 156 + struct xe_gt *gt; 157 + u8 id; 158 + u64 lmem_size; 159 + u64 original_size; 160 + u64 current_size; 161 + u64 flat_ccs_base; 162 + int resize_result; 163 + 164 + if (!IS_DGFX(xe)) { 165 + xe->mem.vram.mapping = 0; 166 + xe->mem.vram.size = 0; 167 + xe->mem.vram.io_start = 0; 168 + 169 + for_each_gt(gt, xe, id) { 170 + gt->mem.vram.mapping = 0; 171 + gt->mem.vram.size = 0; 172 + gt->mem.vram.io_start = 0; 173 + } 174 + return 0; 175 + } 176 + 177 + if (!xe_pci_resource_valid(pdev, GEN12_LMEM_BAR)) { 178 + drm_err(&xe->drm, "pci resource is not valid\n"); 179 + return -ENXIO; 180 + } 181 + 182 + gt = xe_device_get_gt(xe, 0); 183 + lmem_size = xe_mmio_read64(gt, GEN12_GSMBASE.reg); 184 + 185 + original_size = pci_resource_len(pdev, GEN12_LMEM_BAR); 186 + 187 + if (xe->info.has_flat_ccs) { 188 + int err; 189 + u32 reg; 190 + 191 + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 192 + if (err) 193 + return err; 194 + reg = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE0_ADDR_RANGE); 195 + lmem_size = (u64)REG_FIELD_GET(GENMASK(14, 8), reg) * SZ_1G; 196 + reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR); 197 + flat_ccs_base = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K; 198 + 199 + drm_info(&xe->drm, "lmem_size: 0x%llx flat_ccs_base: 0x%llx\n", 200 + lmem_size, flat_ccs_base); 201 + 202 + err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 203 + if (err) 204 + return err; 205 + } else { 206 + flat_ccs_base = lmem_size; 207 + } 208 + 209 + resize_result = xe_resize_lmem_bar(xe, lmem_size); 210 + current_size = pci_resource_len(pdev, GEN12_LMEM_BAR); 211 + xe->mem.vram.io_start = pci_resource_start(pdev, GEN12_LMEM_BAR); 212 + 213 + xe->mem.vram.size = min(current_size, lmem_size); 214 + 215 + if (!xe->mem.vram.size) 216 + return -EIO; 217 + 218 + if (resize_result > 0) 219 + drm_info(&xe->drm, "Successfully resize LMEM from %lluMiB to %lluMiB\n", 220 + (u64)original_size >> 20, 221 + (u64)current_size >> 20); 222 + else if (xe->mem.vram.size < lmem_size && !xe_force_lmem_bar_size) 223 + drm_info(&xe->drm, "Using a reduced BAR size of %lluMiB. Consider enabling 'Resizable BAR' support in your BIOS.\n", 224 + (u64)xe->mem.vram.size >> 20); 225 + if (xe->mem.vram.size < lmem_size) 226 + drm_warn(&xe->drm, "Restricting VRAM size to PCI resource size (0x%llx->0x%llx)\n", 227 + lmem_size, xe->mem.vram.size); 228 + 229 + #ifdef CONFIG_64BIT 230 + xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.size); 231 + #endif 232 + 233 + xe->mem.vram.size = min_t(u64, xe->mem.vram.size, flat_ccs_base); 234 + 235 + drm_info(&xe->drm, "TOTAL VRAM: %pa, %pa\n", &xe->mem.vram.io_start, &xe->mem.vram.size); 236 + 237 + /* FIXME: Assuming equally partitioned VRAM, incorrect */ 238 + if (xe->info.tile_count > 1) { 239 + u8 adj_tile_count = xe->info.tile_count; 240 + resource_size_t size, io_start; 241 + 242 + for_each_gt(gt, xe, id) 243 + if (xe_gt_is_media_type(gt)) 244 + --adj_tile_count; 245 + 246 + XE_BUG_ON(!adj_tile_count); 247 + 248 + size = xe->mem.vram.size / adj_tile_count; 249 + io_start = xe->mem.vram.io_start; 250 + 251 + for_each_gt(gt, xe, id) { 252 + if (id && !xe_gt_is_media_type(gt)) 253 + io_start += size; 254 + 255 + gt->mem.vram.size = size; 256 + gt->mem.vram.io_start = io_start; 257 + gt->mem.vram.mapping = xe->mem.vram.mapping + 258 + (io_start - xe->mem.vram.io_start); 259 + 260 + drm_info(&xe->drm, "VRAM[%u, %u]: %pa, %pa\n", 261 + id, gt->info.vram_id, &gt->mem.vram.io_start, 262 + &gt->mem.vram.size); 263 + } 264 + } else { 265 + gt->mem.vram.size = xe->mem.vram.size; 266 + gt->mem.vram.io_start = xe->mem.vram.io_start; 267 + gt->mem.vram.mapping = xe->mem.vram.mapping; 268 + 269 + drm_info(&xe->drm, "VRAM: %pa\n", &gt->mem.vram.size); 270 + } 271 + return 0; 272 + } 273 + 274 + static void xe_mmio_probe_tiles(struct xe_device *xe) 275 + { 276 + struct xe_gt *gt = xe_device_get_gt(xe, 0); 277 + u32 mtcfg; 278 + u8 adj_tile_count; 279 + u8 id; 280 + 281 + if (xe->info.tile_count == 1) 282 + return; 283 + 284 + mtcfg = xe_mmio_read64(gt, XEHP_MTCFG_ADDR.reg); 285 + adj_tile_count = xe->info.tile_count = 286 + REG_FIELD_GET(TILE_COUNT, mtcfg) + 1; 287 + if (xe->info.media_ver >= 13) 288 + xe->info.tile_count *= 2; 289 + 290 + drm_info(&xe->drm, "tile_count: %d, adj_tile_count %d\n", 291 + xe->info.tile_count, adj_tile_count); 292 + 293 + if (xe->info.tile_count > 1) { 294 + const int mmio_bar = 0; 295 + size_t size; 296 + void *regs; 297 + 298 + if (adj_tile_count > 1) { 299 + pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs); 300 + xe->mmio.size = SZ_16M * adj_tile_count; 301 + xe->mmio.regs = pci_iomap(to_pci_dev(xe->drm.dev), 302 + mmio_bar, xe->mmio.size); 303 + } 304 + 305 + size = xe->mmio.size / adj_tile_count; 306 + regs = xe->mmio.regs; 307 + 308 + for_each_gt(gt, xe, id) { 309 + if (id && !xe_gt_is_media_type(gt)) 310 + regs += size; 311 + gt->mmio.size = size; 312 + gt->mmio.regs = regs; 313 + } 314 + } 315 + } 316 + 317 + static void mmio_fini(struct drm_device *drm, void *arg) 318 + { 319 + struct xe_device *xe = arg; 320 + 321 + pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs); 322 + if (xe->mem.vram.mapping) 323 + iounmap(xe->mem.vram.mapping); 324 + } 325 + 326 + int xe_mmio_init(struct xe_device *xe) 327 + { 328 + struct xe_gt *gt = xe_device_get_gt(xe, 0); 329 + const int mmio_bar = 0; 330 + int err; 331 + 332 + /* 333 + * Map the entire BAR, which includes registers (0-4MB), reserved space 334 + * (4MB-8MB), and GGTT (8MB-16MB). Other parts of the driver (GTs, 335 + * GGTTs) will derive the pointers they need from the mapping in the 336 + * device structure. 337 + */ 338 + xe->mmio.size = SZ_16M; 339 + xe->mmio.regs = pci_iomap(to_pci_dev(xe->drm.dev), mmio_bar, 340 + xe->mmio.size); 341 + if (xe->mmio.regs == NULL) { 342 + drm_err(&xe->drm, "failed to map registers\n"); 343 + return -EIO; 344 + } 345 + 346 + err = drmm_add_action_or_reset(&xe->drm, mmio_fini, xe); 347 + if (err) 348 + return err; 349 + 350 + /* 1 GT for now, 1 to 1 mapping, may change on multi-GT devices */ 351 + gt->mmio.size = xe->mmio.size; 352 + gt->mmio.regs = xe->mmio.regs; 353 + 354 + /* 355 + * The boot firmware initializes local memory and assesses its health. 356 + * If memory training fails, the punit will have been instructed to 357 + * keep the GT powered down; we won't be able to communicate with it 358 + * and we should not continue with driver initialization. 359 + */ 360 + if (IS_DGFX(xe) && !(xe_mmio_read32(gt, GU_CNTL.reg) & LMEM_INIT)) { 361 + drm_err(&xe->drm, "LMEM not initialized by firmware\n"); 362 + return -ENODEV; 363 + } 364 + 365 + err = xe_set_dma_info(xe); 366 + if (err) 367 + return err; 368 + 369 + xe_mmio_probe_tiles(xe); 370 + 371 + return 0; 372 + } 373 + 374 + #define VALID_MMIO_FLAGS (\ 375 + DRM_XE_MMIO_BITS_MASK |\ 376 + DRM_XE_MMIO_READ |\ 377 + DRM_XE_MMIO_WRITE) 378 + 379 + static const i915_reg_t mmio_read_whitelist[] = { 380 + RING_TIMESTAMP(RENDER_RING_BASE), 381 + }; 382 + 383 + int xe_mmio_ioctl(struct drm_device *dev, void *data, 384 + struct drm_file *file) 385 + { 386 + struct xe_device *xe = to_xe_device(dev); 387 + struct drm_xe_mmio *args = data; 388 + unsigned int bits_flag, bytes; 389 + bool allowed; 390 + int ret = 0; 391 + 392 + if (XE_IOCTL_ERR(xe, args->extensions)) 393 + return -EINVAL; 394 + 395 + if (XE_IOCTL_ERR(xe, args->flags & ~VALID_MMIO_FLAGS)) 396 + return -EINVAL; 397 + 398 + if (XE_IOCTL_ERR(xe, !(args->flags & DRM_XE_MMIO_WRITE) && args->value)) 399 + return -EINVAL; 400 + 401 + allowed = capable(CAP_SYS_ADMIN); 402 + if (!allowed && ((args->flags & ~DRM_XE_MMIO_BITS_MASK) == DRM_XE_MMIO_READ)) { 403 + unsigned int i; 404 + 405 + for (i = 0; i < ARRAY_SIZE(mmio_read_whitelist); i++) { 406 + if (mmio_read_whitelist[i].reg == args->addr) { 407 + allowed = true; 408 + break; 409 + } 410 + } 411 + } 412 + 413 + if (XE_IOCTL_ERR(xe, !allowed)) 414 + return -EPERM; 415 + 416 + bits_flag = args->flags & DRM_XE_MMIO_BITS_MASK; 417 + bytes = 1 << bits_flag; 418 + if (XE_IOCTL_ERR(xe, args->addr + bytes > xe->mmio.size)) 419 + return -EINVAL; 420 + 421 + xe_force_wake_get(gt_to_fw(&xe->gt[0]), XE_FORCEWAKE_ALL); 422 + 423 + if (args->flags & DRM_XE_MMIO_WRITE) { 424 + switch (bits_flag) { 425 + case DRM_XE_MMIO_8BIT: 426 + return -EINVAL; /* TODO */ 427 + case DRM_XE_MMIO_16BIT: 428 + return -EINVAL; /* TODO */ 429 + case DRM_XE_MMIO_32BIT: 430 + if (XE_IOCTL_ERR(xe, args->value > U32_MAX)) 431 + return -EINVAL; 432 + xe_mmio_write32(to_gt(xe), args->addr, args->value); 433 + break; 434 + case DRM_XE_MMIO_64BIT: 435 + xe_mmio_write64(to_gt(xe), args->addr, args->value); 436 + break; 437 + default: 438 + drm_WARN(&xe->drm, 1, "Invalid MMIO bit size"); 439 + ret = -EINVAL; 440 + goto exit; 441 + } 442 + } 443 + 444 + if (args->flags & DRM_XE_MMIO_READ) { 445 + switch (bits_flag) { 446 + case DRM_XE_MMIO_8BIT: 447 + return -EINVAL; /* TODO */ 448 + case DRM_XE_MMIO_16BIT: 449 + return -EINVAL; /* TODO */ 450 + case DRM_XE_MMIO_32BIT: 451 + args->value = xe_mmio_read32(to_gt(xe), args->addr); 452 + break; 453 + case DRM_XE_MMIO_64BIT: 454 + args->value = xe_mmio_read64(to_gt(xe), args->addr); 455 + break; 456 + default: 457 + drm_WARN(&xe->drm, 1, "Invalid MMIO bit size"); 458 + ret = -EINVAL; 459 + } 460 + } 461 + 462 + exit: 463 + xe_force_wake_put(gt_to_fw(&xe->gt[0]), XE_FORCEWAKE_ALL); 464 + 465 + return ret; 466 + }

+110

drivers/gpu/drm/xe/xe_mmio.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2021 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_MMIO_H_ 7 + #define _XE_MMIO_H_ 8 + 9 + #include <linux/delay.h> 10 + 11 + #include "xe_gt_types.h" 12 + 13 + /* 14 + * FIXME: This header has been deemed evil and we need to kill it. Temporarily 15 + * including so we can use 'wait_for' and unblock initial development. A follow 16 + * should replace 'wait_for' with a sane version and drop including this header. 17 + */ 18 + #include "i915_utils.h" 19 + 20 + struct drm_device; 21 + struct drm_file; 22 + struct xe_device; 23 + 24 + int xe_mmio_init(struct xe_device *xe); 25 + 26 + static inline u8 xe_mmio_read8(struct xe_gt *gt, u32 reg) 27 + { 28 + if (reg < gt->mmio.adj_limit) 29 + reg += gt->mmio.adj_offset; 30 + 31 + return readb(gt->mmio.regs + reg); 32 + } 33 + 34 + static inline void xe_mmio_write32(struct xe_gt *gt, 35 + u32 reg, u32 val) 36 + { 37 + if (reg < gt->mmio.adj_limit) 38 + reg += gt->mmio.adj_offset; 39 + 40 + writel(val, gt->mmio.regs + reg); 41 + } 42 + 43 + static inline u32 xe_mmio_read32(struct xe_gt *gt, u32 reg) 44 + { 45 + if (reg < gt->mmio.adj_limit) 46 + reg += gt->mmio.adj_offset; 47 + 48 + return readl(gt->mmio.regs + reg); 49 + } 50 + 51 + static inline u32 xe_mmio_rmw32(struct xe_gt *gt, u32 reg, u32 mask, 52 + u32 val) 53 + { 54 + u32 old, reg_val; 55 + 56 + old = xe_mmio_read32(gt, reg); 57 + reg_val = (old & mask) | val; 58 + xe_mmio_write32(gt, reg, reg_val); 59 + 60 + return old; 61 + } 62 + 63 + static inline void xe_mmio_write64(struct xe_gt *gt, 64 + u32 reg, u64 val) 65 + { 66 + if (reg < gt->mmio.adj_limit) 67 + reg += gt->mmio.adj_offset; 68 + 69 + writeq(val, gt->mmio.regs + reg); 70 + } 71 + 72 + static inline u64 xe_mmio_read64(struct xe_gt *gt, u32 reg) 73 + { 74 + if (reg < gt->mmio.adj_limit) 75 + reg += gt->mmio.adj_offset; 76 + 77 + return readq(gt->mmio.regs + reg); 78 + } 79 + 80 + static inline int xe_mmio_write32_and_verify(struct xe_gt *gt, 81 + u32 reg, u32 val, 82 + u32 mask, u32 eval) 83 + { 84 + u32 reg_val; 85 + 86 + xe_mmio_write32(gt, reg, val); 87 + reg_val = xe_mmio_read32(gt, reg); 88 + 89 + return (reg_val & mask) != eval ? -EINVAL : 0; 90 + } 91 + 92 + static inline int xe_mmio_wait32(struct xe_gt *gt, 93 + u32 reg, u32 val, 94 + u32 mask, u32 timeout_ms) 95 + { 96 + return wait_for((xe_mmio_read32(gt, reg) & mask) == val, 97 + timeout_ms); 98 + } 99 + 100 + int xe_mmio_ioctl(struct drm_device *dev, void *data, 101 + struct drm_file *file); 102 + 103 + static inline bool xe_mmio_in_range(const struct xe_mmio_range *range, u32 reg) 104 + { 105 + return range && reg >= range->start && reg <= range->end; 106 + } 107 + 108 + int xe_mmio_probe_vram(struct xe_device *xe); 109 + 110 + #endif

+557

drivers/gpu/drm/xe/xe_mocs.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include "xe_bo.h" 7 + #include "xe_device.h" 8 + #include "xe_engine.h" 9 + #include "xe_gt.h" 10 + #include "xe_platform_types.h" 11 + #include "xe_mmio.h" 12 + #include "xe_mocs.h" 13 + #include "xe_step_types.h" 14 + 15 + #include "gt/intel_gt_regs.h" 16 + 17 + #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) 18 + #define mocs_dbg drm_dbg 19 + #else 20 + __printf(2, 3) 21 + static inline void mocs_dbg(const struct drm_device *dev, 22 + const char *format, ...) 23 + { /* noop */ } 24 + #endif 25 + 26 + /* 27 + * MOCS indexes used for GPU surfaces, defining the cacheability of the 28 + * surface data and the coherency for this data wrt. CPU vs. GPU accesses. 29 + */ 30 + enum xe_mocs_info_index { 31 + /* 32 + * Not cached anywhere, coherency between CPU and GPU accesses is 33 + * guaranteed. 34 + */ 35 + XE_MOCS_UNCACHED, 36 + /* 37 + * Cacheability and coherency controlled by the kernel automatically 38 + * based on the xxxx IOCTL setting and the current 39 + * usage of the surface (used for display scanout or not). 40 + */ 41 + XE_MOCS_PTE, 42 + /* 43 + * Cached in all GPU caches available on the platform. 44 + * Coherency between CPU and GPU accesses to the surface is not 45 + * guaranteed without extra synchronization. 46 + */ 47 + XE_MOCS_CACHED, 48 + }; 49 + 50 + enum { 51 + HAS_GLOBAL_MOCS = BIT(0), 52 + HAS_RENDER_L3CC = BIT(1), 53 + }; 54 + 55 + struct xe_mocs_entry { 56 + u32 control_value; 57 + u16 l3cc_value; 58 + u16 used; 59 + }; 60 + 61 + struct xe_mocs_info { 62 + unsigned int size; 63 + unsigned int n_entries; 64 + const struct xe_mocs_entry *table; 65 + u8 uc_index; 66 + u8 wb_index; 67 + u8 unused_entries_index; 68 + }; 69 + 70 + /* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */ 71 + #define _LE_CACHEABILITY(value) ((value) << 0) 72 + #define _LE_TGT_CACHE(value) ((value) << 2) 73 + #define LE_LRUM(value) ((value) << 4) 74 + #define LE_AOM(value) ((value) << 6) 75 + #define LE_RSC(value) ((value) << 7) 76 + #define LE_SCC(value) ((value) << 8) 77 + #define LE_PFM(value) ((value) << 11) 78 + #define LE_SCF(value) ((value) << 14) 79 + #define LE_COS(value) ((value) << 15) 80 + #define LE_SSE(value) ((value) << 17) 81 + 82 + /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */ 83 + #define L3_ESC(value) ((value) << 0) 84 + #define L3_SCC(value) ((value) << 1) 85 + #define _L3_CACHEABILITY(value) ((value) << 4) 86 + #define L3_GLBGO(value) ((value) << 6) 87 + #define L3_LKUP(value) ((value) << 7) 88 + 89 + /* Helper defines */ 90 + #define GEN9_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */ 91 + #define PVC_NUM_MOCS_ENTRIES 3 92 + #define MTL_NUM_MOCS_ENTRIES 16 93 + 94 + /* (e)LLC caching options */ 95 + /* 96 + * Note: LE_0_PAGETABLE works only up to Gen11; for newer gens it means 97 + * the same as LE_UC 98 + */ 99 + #define LE_0_PAGETABLE _LE_CACHEABILITY(0) 100 + #define LE_1_UC _LE_CACHEABILITY(1) 101 + #define LE_2_WT _LE_CACHEABILITY(2) 102 + #define LE_3_WB _LE_CACHEABILITY(3) 103 + 104 + /* Target cache */ 105 + #define LE_TC_0_PAGETABLE _LE_TGT_CACHE(0) 106 + #define LE_TC_1_LLC _LE_TGT_CACHE(1) 107 + #define LE_TC_2_LLC_ELLC _LE_TGT_CACHE(2) 108 + #define LE_TC_3_LLC_ELLC_ALT _LE_TGT_CACHE(3) 109 + 110 + /* L3 caching options */ 111 + #define L3_0_DIRECT _L3_CACHEABILITY(0) 112 + #define L3_1_UC _L3_CACHEABILITY(1) 113 + #define L3_2_RESERVED _L3_CACHEABILITY(2) 114 + #define L3_3_WB _L3_CACHEABILITY(3) 115 + 116 + #define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \ 117 + [__idx] = { \ 118 + .control_value = __control_value, \ 119 + .l3cc_value = __l3cc_value, \ 120 + .used = 1, \ 121 + } 122 + 123 + /* 124 + * MOCS tables 125 + * 126 + * These are the MOCS tables that are programmed across all the rings. 127 + * The control value is programmed to all the rings that support the 128 + * MOCS registers. While the l3cc_values are only programmed to the 129 + * LNCFCMOCS0 - LNCFCMOCS32 registers. 130 + * 131 + * These tables are intended to be kept reasonably consistent across 132 + * HW platforms, and for ICL+, be identical across OSes. To achieve 133 + * that, for Icelake and above, list of entries is published as part 134 + * of bspec. 135 + * 136 + * Entries not part of the following tables are undefined as far as 137 + * userspace is concerned and shouldn't be relied upon. For Gen < 12 138 + * they will be initialized to PTE. Gen >= 12 don't have a setting for 139 + * PTE and those platforms except TGL/RKL will be initialized L3 WB to 140 + * catch accidental use of reserved and unused mocs indexes. 141 + * 142 + * The last few entries are reserved by the hardware. For ICL+ they 143 + * should be initialized according to bspec and never used, for older 144 + * platforms they should never be written to. 145 + * 146 + * NOTE1: These tables are part of bspec and defined as part of hardware 147 + * interface for ICL+. For older platforms, they are part of kernel 148 + * ABI. It is expected that, for specific hardware platform, existing 149 + * entries will remain constant and the table will only be updated by 150 + * adding new entries, filling unused positions. 151 + * 152 + * NOTE2: For GEN >= 12 except TGL and RKL, reserved and unspecified MOCS 153 + * indices have been set to L3 WB. These reserved entries should never 154 + * be used, they may be changed to low performant variants with better 155 + * coherency in the future if more entries are needed. 156 + * For TGL/RKL, all the unspecified MOCS indexes are mapped to L3 UC. 157 + */ 158 + 159 + #define GEN11_MOCS_ENTRIES \ 160 + /* Entries 0 and 1 are defined per-platform */ \ 161 + /* Base - L3 + LLC */ \ 162 + MOCS_ENTRY(2, \ 163 + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ 164 + L3_3_WB), \ 165 + /* Base - Uncached */ \ 166 + MOCS_ENTRY(3, \ 167 + LE_1_UC | LE_TC_1_LLC, \ 168 + L3_1_UC), \ 169 + /* Base - L3 */ \ 170 + MOCS_ENTRY(4, \ 171 + LE_1_UC | LE_TC_1_LLC, \ 172 + L3_3_WB), \ 173 + /* Base - LLC */ \ 174 + MOCS_ENTRY(5, \ 175 + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ 176 + L3_1_UC), \ 177 + /* Age 0 - LLC */ \ 178 + MOCS_ENTRY(6, \ 179 + LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \ 180 + L3_1_UC), \ 181 + /* Age 0 - L3 + LLC */ \ 182 + MOCS_ENTRY(7, \ 183 + LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \ 184 + L3_3_WB), \ 185 + /* Age: Don't Chg. - LLC */ \ 186 + MOCS_ENTRY(8, \ 187 + LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \ 188 + L3_1_UC), \ 189 + /* Age: Don't Chg. - L3 + LLC */ \ 190 + MOCS_ENTRY(9, \ 191 + LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \ 192 + L3_3_WB), \ 193 + /* No AOM - LLC */ \ 194 + MOCS_ENTRY(10, \ 195 + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \ 196 + L3_1_UC), \ 197 + /* No AOM - L3 + LLC */ \ 198 + MOCS_ENTRY(11, \ 199 + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \ 200 + L3_3_WB), \ 201 + /* No AOM; Age 0 - LLC */ \ 202 + MOCS_ENTRY(12, \ 203 + LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \ 204 + L3_1_UC), \ 205 + /* No AOM; Age 0 - L3 + LLC */ \ 206 + MOCS_ENTRY(13, \ 207 + LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \ 208 + L3_3_WB), \ 209 + /* No AOM; Age:DC - LLC */ \ 210 + MOCS_ENTRY(14, \ 211 + LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \ 212 + L3_1_UC), \ 213 + /* No AOM; Age:DC - L3 + LLC */ \ 214 + MOCS_ENTRY(15, \ 215 + LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \ 216 + L3_3_WB), \ 217 + /* Self-Snoop - L3 + LLC */ \ 218 + MOCS_ENTRY(18, \ 219 + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \ 220 + L3_3_WB), \ 221 + /* Skip Caching - L3 + LLC(12.5%) */ \ 222 + MOCS_ENTRY(19, \ 223 + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(7), \ 224 + L3_3_WB), \ 225 + /* Skip Caching - L3 + LLC(25%) */ \ 226 + MOCS_ENTRY(20, \ 227 + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(3), \ 228 + L3_3_WB), \ 229 + /* Skip Caching - L3 + LLC(50%) */ \ 230 + MOCS_ENTRY(21, \ 231 + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(1), \ 232 + L3_3_WB), \ 233 + /* Skip Caching - L3 + LLC(75%) */ \ 234 + MOCS_ENTRY(22, \ 235 + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(3), \ 236 + L3_3_WB), \ 237 + /* Skip Caching - L3 + LLC(87.5%) */ \ 238 + MOCS_ENTRY(23, \ 239 + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(7), \ 240 + L3_3_WB), \ 241 + /* HW Reserved - SW program but never use */ \ 242 + MOCS_ENTRY(62, \ 243 + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ 244 + L3_1_UC), \ 245 + /* HW Reserved - SW program but never use */ \ 246 + MOCS_ENTRY(63, \ 247 + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ 248 + L3_1_UC) 249 + 250 + static const struct xe_mocs_entry tgl_mocs_desc[] = { 251 + /* 252 + * NOTE: 253 + * Reserved and unspecified MOCS indices have been set to (L3 + LCC). 254 + * These reserved entries should never be used, they may be changed 255 + * to low performant variants with better coherency in the future if 256 + * more entries are needed. We are programming index XE_MOCS_PTE(1) 257 + * only, __init_mocs_table() take care to program unused index with 258 + * this entry. 259 + */ 260 + MOCS_ENTRY(XE_MOCS_PTE, 261 + LE_0_PAGETABLE | LE_TC_0_PAGETABLE, 262 + L3_1_UC), 263 + GEN11_MOCS_ENTRIES, 264 + 265 + /* Implicitly enable L1 - HDC:L1 + L3 + LLC */ 266 + MOCS_ENTRY(48, 267 + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), 268 + L3_3_WB), 269 + /* Implicitly enable L1 - HDC:L1 + L3 */ 270 + MOCS_ENTRY(49, 271 + LE_1_UC | LE_TC_1_LLC, 272 + L3_3_WB), 273 + /* Implicitly enable L1 - HDC:L1 + LLC */ 274 + MOCS_ENTRY(50, 275 + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), 276 + L3_1_UC), 277 + /* Implicitly enable L1 - HDC:L1 */ 278 + MOCS_ENTRY(51, 279 + LE_1_UC | LE_TC_1_LLC, 280 + L3_1_UC), 281 + /* HW Special Case (CCS) */ 282 + MOCS_ENTRY(60, 283 + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), 284 + L3_1_UC), 285 + /* HW Special Case (Displayable) */ 286 + MOCS_ENTRY(61, 287 + LE_1_UC | LE_TC_1_LLC, 288 + L3_3_WB), 289 + }; 290 + 291 + static const struct xe_mocs_entry dg1_mocs_desc[] = { 292 + /* UC */ 293 + MOCS_ENTRY(1, 0, L3_1_UC), 294 + /* WB - L3 */ 295 + MOCS_ENTRY(5, 0, L3_3_WB), 296 + /* WB - L3 50% */ 297 + MOCS_ENTRY(6, 0, L3_ESC(1) | L3_SCC(1) | L3_3_WB), 298 + /* WB - L3 25% */ 299 + MOCS_ENTRY(7, 0, L3_ESC(1) | L3_SCC(3) | L3_3_WB), 300 + /* WB - L3 12.5% */ 301 + MOCS_ENTRY(8, 0, L3_ESC(1) | L3_SCC(7) | L3_3_WB), 302 + 303 + /* HDC:L1 + L3 */ 304 + MOCS_ENTRY(48, 0, L3_3_WB), 305 + /* HDC:L1 */ 306 + MOCS_ENTRY(49, 0, L3_1_UC), 307 + 308 + /* HW Reserved */ 309 + MOCS_ENTRY(60, 0, L3_1_UC), 310 + MOCS_ENTRY(61, 0, L3_1_UC), 311 + MOCS_ENTRY(62, 0, L3_1_UC), 312 + MOCS_ENTRY(63, 0, L3_1_UC), 313 + }; 314 + 315 + static const struct xe_mocs_entry gen12_mocs_desc[] = { 316 + GEN11_MOCS_ENTRIES, 317 + /* Implicitly enable L1 - HDC:L1 + L3 + LLC */ 318 + MOCS_ENTRY(48, 319 + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), 320 + L3_3_WB), 321 + /* Implicitly enable L1 - HDC:L1 + L3 */ 322 + MOCS_ENTRY(49, 323 + LE_1_UC | LE_TC_1_LLC, 324 + L3_3_WB), 325 + /* Implicitly enable L1 - HDC:L1 + LLC */ 326 + MOCS_ENTRY(50, 327 + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), 328 + L3_1_UC), 329 + /* Implicitly enable L1 - HDC:L1 */ 330 + MOCS_ENTRY(51, 331 + LE_1_UC | LE_TC_1_LLC, 332 + L3_1_UC), 333 + /* HW Special Case (CCS) */ 334 + MOCS_ENTRY(60, 335 + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), 336 + L3_1_UC), 337 + /* HW Special Case (Displayable) */ 338 + MOCS_ENTRY(61, 339 + LE_1_UC | LE_TC_1_LLC, 340 + L3_3_WB), 341 + }; 342 + 343 + static const struct xe_mocs_entry dg2_mocs_desc[] = { 344 + /* UC - Coherent; GO:L3 */ 345 + MOCS_ENTRY(0, 0, L3_1_UC | L3_LKUP(1)), 346 + /* UC - Coherent; GO:Memory */ 347 + MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), 348 + /* UC - Non-Coherent; GO:Memory */ 349 + MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)), 350 + 351 + /* WB - LC */ 352 + MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)), 353 + }; 354 + 355 + static const struct xe_mocs_entry dg2_mocs_desc_g10_ax[] = { 356 + /* Wa_14011441408: Set Go to Memory for MOCS#0 */ 357 + MOCS_ENTRY(0, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), 358 + /* UC - Coherent; GO:Memory */ 359 + MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), 360 + /* UC - Non-Coherent; GO:Memory */ 361 + MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)), 362 + 363 + /* WB - LC */ 364 + MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)), 365 + }; 366 + 367 + static const struct xe_mocs_entry pvc_mocs_desc[] = { 368 + /* Error */ 369 + MOCS_ENTRY(0, 0, L3_3_WB), 370 + 371 + /* UC */ 372 + MOCS_ENTRY(1, 0, L3_1_UC), 373 + 374 + /* WB */ 375 + MOCS_ENTRY(2, 0, L3_3_WB), 376 + }; 377 + 378 + static unsigned int get_mocs_settings(struct xe_device *xe, 379 + struct xe_mocs_info *info) 380 + { 381 + unsigned int flags; 382 + 383 + memset(info, 0, sizeof(struct xe_mocs_info)); 384 + 385 + info->unused_entries_index = XE_MOCS_PTE; 386 + switch (xe->info.platform) { 387 + case XE_PVC: 388 + info->size = ARRAY_SIZE(pvc_mocs_desc); 389 + info->table = pvc_mocs_desc; 390 + info->n_entries = PVC_NUM_MOCS_ENTRIES; 391 + info->uc_index = 1; 392 + info->wb_index = 2; 393 + info->unused_entries_index = 2; 394 + break; 395 + case XE_METEORLAKE: 396 + info->size = ARRAY_SIZE(dg2_mocs_desc); 397 + info->table = dg2_mocs_desc; 398 + info->n_entries = MTL_NUM_MOCS_ENTRIES; 399 + info->uc_index = 1; 400 + info->unused_entries_index = 3; 401 + break; 402 + case XE_DG2: 403 + if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G10 && 404 + xe->info.step.graphics >= STEP_A0 && 405 + xe->info.step.graphics <= STEP_B0) { 406 + info->size = ARRAY_SIZE(dg2_mocs_desc_g10_ax); 407 + info->table = dg2_mocs_desc_g10_ax; 408 + } else { 409 + info->size = ARRAY_SIZE(dg2_mocs_desc); 410 + info->table = dg2_mocs_desc; 411 + } 412 + info->uc_index = 1; 413 + info->n_entries = GEN9_NUM_MOCS_ENTRIES; 414 + info->unused_entries_index = 3; 415 + break; 416 + case XE_DG1: 417 + info->size = ARRAY_SIZE(dg1_mocs_desc); 418 + info->table = dg1_mocs_desc; 419 + info->uc_index = 1; 420 + info->n_entries = GEN9_NUM_MOCS_ENTRIES; 421 + info->uc_index = 1; 422 + info->unused_entries_index = 5; 423 + break; 424 + case XE_TIGERLAKE: 425 + info->size = ARRAY_SIZE(tgl_mocs_desc); 426 + info->table = tgl_mocs_desc; 427 + info->n_entries = GEN9_NUM_MOCS_ENTRIES; 428 + info->uc_index = 3; 429 + break; 430 + case XE_ALDERLAKE_S: 431 + case XE_ALDERLAKE_P: 432 + info->size = ARRAY_SIZE(gen12_mocs_desc); 433 + info->table = gen12_mocs_desc; 434 + info->n_entries = GEN9_NUM_MOCS_ENTRIES; 435 + info->uc_index = 3; 436 + info->unused_entries_index = 2; 437 + break; 438 + default: 439 + drm_err(&xe->drm, "Platform that should have a MOCS table does not.\n"); 440 + return 0; 441 + } 442 + 443 + if (XE_WARN_ON(info->size > info->n_entries)) 444 + return 0; 445 + 446 + flags = HAS_RENDER_L3CC; 447 + if (!IS_DGFX(xe)) 448 + flags |= HAS_GLOBAL_MOCS; 449 + 450 + return flags; 451 + } 452 + 453 + /* 454 + * Get control_value from MOCS entry taking into account when it's not used 455 + * then if unused_entries_index is non-zero then its value will be returned 456 + * otherwise XE_MOCS_PTE's value is returned in this case. 457 + */ 458 + static u32 get_entry_control(const struct xe_mocs_info *info, 459 + unsigned int index) 460 + { 461 + if (index < info->size && info->table[index].used) 462 + return info->table[index].control_value; 463 + return info->table[info->unused_entries_index].control_value; 464 + } 465 + 466 + static void __init_mocs_table(struct xe_gt *gt, 467 + const struct xe_mocs_info *info, 468 + u32 addr) 469 + { 470 + struct xe_device *xe = gt_to_xe(gt); 471 + 472 + unsigned int i; 473 + u32 mocs; 474 + 475 + mocs_dbg(&gt->xe->drm, "entries:%d\n", info->n_entries); 476 + drm_WARN_ONCE(&xe->drm, !info->unused_entries_index, 477 + "Unused entries index should have been defined\n"); 478 + for (i = 0; 479 + i < info->n_entries ? (mocs = get_entry_control(info, i)), 1 : 0; 480 + i++) { 481 + mocs_dbg(&gt->xe->drm, "%d 0x%x 0x%x\n", i, _MMIO(addr + i * 4).reg, mocs); 482 + xe_mmio_write32(gt, _MMIO(addr + i * 4).reg, mocs); 483 + } 484 + } 485 + 486 + /* 487 + * Get l3cc_value from MOCS entry taking into account when it's not used 488 + * then if unused_entries_index is not zero then its value will be returned 489 + * otherwise I915_MOCS_PTE's value is returned in this case. 490 + */ 491 + static u16 get_entry_l3cc(const struct xe_mocs_info *info, 492 + unsigned int index) 493 + { 494 + if (index < info->size && info->table[index].used) 495 + return info->table[index].l3cc_value; 496 + return info->table[info->unused_entries_index].l3cc_value; 497 + } 498 + 499 + static u32 l3cc_combine(u16 low, u16 high) 500 + { 501 + return low | (u32)high << 16; 502 + } 503 + 504 + static void init_l3cc_table(struct xe_gt *gt, 505 + const struct xe_mocs_info *info) 506 + { 507 + unsigned int i; 508 + u32 l3cc; 509 + 510 + mocs_dbg(&gt->xe->drm, "entries:%d\n", info->n_entries); 511 + for (i = 0; 512 + i < (info->n_entries + 1) / 2 ? 513 + (l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i), 514 + get_entry_l3cc(info, 2 * i + 1))), 1 : 0; 515 + i++) { 516 + mocs_dbg(&gt->xe->drm, "%d 0x%x 0x%x\n", i, GEN9_LNCFCMOCS(i).reg, l3cc); 517 + xe_mmio_write32(gt, GEN9_LNCFCMOCS(i).reg, l3cc); 518 + } 519 + } 520 + 521 + void xe_mocs_init_engine(const struct xe_engine *engine) 522 + { 523 + struct xe_mocs_info table; 524 + unsigned int flags; 525 + 526 + flags = get_mocs_settings(engine->gt->xe, &table); 527 + if (!flags) 528 + return; 529 + 530 + if (flags & HAS_RENDER_L3CC && engine->class == XE_ENGINE_CLASS_RENDER) 531 + init_l3cc_table(engine->gt, &table); 532 + } 533 + 534 + void xe_mocs_init(struct xe_gt *gt) 535 + { 536 + struct xe_mocs_info table; 537 + unsigned int flags; 538 + 539 + /* 540 + * LLC and eDRAM control values are not applicable to dgfx 541 + */ 542 + flags = get_mocs_settings(gt->xe, &table); 543 + mocs_dbg(&gt->xe->drm, "flag:0x%x\n", flags); 544 + gt->mocs.uc_index = table.uc_index; 545 + gt->mocs.wb_index = table.wb_index; 546 + 547 + if (flags & HAS_GLOBAL_MOCS) 548 + __init_mocs_table(gt, &table, GEN12_GLOBAL_MOCS(0).reg); 549 + 550 + /* 551 + * Initialize the L3CC table as part of mocs initalization to make 552 + * sure the LNCFCMOCSx registers are programmed for the subsequent 553 + * memory transactions including guc transactions 554 + */ 555 + if (flags & HAS_RENDER_L3CC) 556 + init_l3cc_table(gt, &table); 557 + }

+29

drivers/gpu/drm/xe/xe_mocs.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_MOCS_H_ 7 + #define _XE_MOCS_H_ 8 + 9 + #include <linux/types.h> 10 + 11 + struct xe_engine; 12 + struct xe_gt; 13 + 14 + void xe_mocs_init_engine(const struct xe_engine *engine); 15 + void xe_mocs_init(struct xe_gt *gt); 16 + 17 + /** 18 + * xe_mocs_index_to_value - Translate mocs index to the mocs value exected by 19 + * most blitter commands. 20 + * @mocs_index: index into the mocs tables 21 + * 22 + * Return: The corresponding mocs value to be programmed. 23 + */ 24 + static inline u32 xe_mocs_index_to_value(u32 mocs_index) 25 + { 26 + return mocs_index << 1; 27 + } 28 + 29 + #endif

+76

drivers/gpu/drm/xe/xe_module.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2021 Intel Corporation 4 + */ 5 + 6 + #include <linux/init.h> 7 + #include <linux/module.h> 8 + 9 + #include "xe_drv.h" 10 + #include "xe_hw_fence.h" 11 + #include "xe_module.h" 12 + #include "xe_pci.h" 13 + #include "xe_sched_job.h" 14 + 15 + bool enable_guc = true; 16 + module_param_named_unsafe(enable_guc, enable_guc, bool, 0444); 17 + MODULE_PARM_DESC(enable_guc, "Enable GuC submission"); 18 + 19 + u32 xe_force_lmem_bar_size; 20 + module_param_named(lmem_bar_size, xe_force_lmem_bar_size, uint, 0600); 21 + MODULE_PARM_DESC(lmem_bar_size, "Set the lmem bar size(in MiB)"); 22 + 23 + int xe_guc_log_level = 5; 24 + module_param_named(guc_log_level, xe_guc_log_level, int, 0600); 25 + MODULE_PARM_DESC(guc_log_level, "GuC firmware logging level (0=disable, 1..5=enable with verbosity min..max)"); 26 + 27 + char *xe_param_force_probe = CONFIG_DRM_XE_FORCE_PROBE; 28 + module_param_named_unsafe(force_probe, xe_param_force_probe, charp, 0400); 29 + MODULE_PARM_DESC(force_probe, 30 + "Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details."); 31 + 32 + struct init_funcs { 33 + int (*init)(void); 34 + void (*exit)(void); 35 + }; 36 + #define MAKE_INIT_EXIT_FUNCS(name) \ 37 + { .init = xe_##name##_module_init, \ 38 + .exit = xe_##name##_module_exit, } 39 + static const struct init_funcs init_funcs[] = { 40 + MAKE_INIT_EXIT_FUNCS(hw_fence), 41 + MAKE_INIT_EXIT_FUNCS(sched_job), 42 + }; 43 + 44 + static int __init xe_init(void) 45 + { 46 + int err, i; 47 + 48 + for (i = 0; i < ARRAY_SIZE(init_funcs); i++) { 49 + err = init_funcs[i].init(); 50 + if (err) { 51 + while (i--) 52 + init_funcs[i].exit(); 53 + return err; 54 + } 55 + } 56 + 57 + return xe_register_pci_driver(); 58 + } 59 + 60 + static void __exit xe_exit(void) 61 + { 62 + int i; 63 + 64 + xe_unregister_pci_driver(); 65 + 66 + for (i = ARRAY_SIZE(init_funcs) - 1; i >= 0; i--) 67 + init_funcs[i].exit(); 68 + } 69 + 70 + module_init(xe_init); 71 + module_exit(xe_exit); 72 + 73 + MODULE_AUTHOR("Intel Corporation"); 74 + 75 + MODULE_DESCRIPTION(DRIVER_DESC); 76 + MODULE_LICENSE("GPL and additional rights");

+13

drivers/gpu/drm/xe/xe_module.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2023 Intel Corporation 4 + */ 5 + 6 + #include <linux/init.h> 7 + 8 + /* Module modprobe variables */ 9 + extern bool enable_guc; 10 + extern bool enable_display; 11 + extern u32 xe_force_lmem_bar_size; 12 + extern int xe_guc_log_level; 13 + extern char *xe_param_force_probe;

+651

drivers/gpu/drm/xe/xe_pci.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2021 Intel Corporation 4 + */ 5 + 6 + #include "xe_pci.h" 7 + 8 + #include <linux/device/driver.h> 9 + #include <linux/module.h> 10 + #include <linux/pci.h> 11 + #include <linux/pm_runtime.h> 12 + 13 + #include <drm/drm_drv.h> 14 + #include <drm/drm_color_mgmt.h> 15 + #include <drm/xe_pciids.h> 16 + 17 + #include "xe_drv.h" 18 + #include "xe_device.h" 19 + #include "xe_macros.h" 20 + #include "xe_module.h" 21 + #include "xe_pm.h" 22 + #include "xe_step.h" 23 + 24 + #include "i915_reg.h" 25 + 26 + #define DEV_INFO_FOR_EACH_FLAG(func) \ 27 + func(require_force_probe); \ 28 + func(is_dgfx); \ 29 + /* Keep has_* in alphabetical order */ \ 30 + 31 + struct xe_subplatform_desc { 32 + enum xe_subplatform subplatform; 33 + const char *name; 34 + const u16 *pciidlist; 35 + }; 36 + 37 + struct xe_gt_desc { 38 + enum xe_gt_type type; 39 + u8 vram_id; 40 + u64 engine_mask; 41 + u32 mmio_adj_limit; 42 + u32 mmio_adj_offset; 43 + }; 44 + 45 + struct xe_device_desc { 46 + u8 graphics_ver; 47 + u8 graphics_rel; 48 + u8 media_ver; 49 + u8 media_rel; 50 + 51 + u64 platform_engine_mask; /* Engines supported by the HW */ 52 + 53 + enum xe_platform platform; 54 + const char *platform_name; 55 + const struct xe_subplatform_desc *subplatforms; 56 + const struct xe_gt_desc *extra_gts; 57 + 58 + u8 dma_mask_size; /* available DMA address bits */ 59 + 60 + u8 gt; /* GT number, 0 if undefined */ 61 + 62 + #define DEFINE_FLAG(name) u8 name:1 63 + DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG); 64 + #undef DEFINE_FLAG 65 + 66 + u8 vram_flags; 67 + u8 max_tiles; 68 + u8 vm_max_level; 69 + 70 + bool supports_usm; 71 + bool has_flat_ccs; 72 + bool has_4tile; 73 + }; 74 + 75 + #define PLATFORM(x) \ 76 + .platform = (x), \ 77 + .platform_name = #x 78 + 79 + #define NOP(x) x 80 + 81 + /* Keep in gen based order, and chronological order within a gen */ 82 + #define GEN12_FEATURES \ 83 + .require_force_probe = true, \ 84 + .graphics_ver = 12, \ 85 + .media_ver = 12, \ 86 + .dma_mask_size = 39, \ 87 + .max_tiles = 1, \ 88 + .vm_max_level = 3, \ 89 + .vram_flags = 0 90 + 91 + static const struct xe_device_desc tgl_desc = { 92 + GEN12_FEATURES, 93 + PLATFORM(XE_TIGERLAKE), 94 + .platform_engine_mask = 95 + BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | 96 + BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) | 97 + BIT(XE_HW_ENGINE_VCS2), 98 + }; 99 + 100 + static const struct xe_device_desc adl_s_desc = { 101 + GEN12_FEATURES, 102 + PLATFORM(XE_ALDERLAKE_S), 103 + .platform_engine_mask = 104 + BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | 105 + BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) | 106 + BIT(XE_HW_ENGINE_VCS2), 107 + }; 108 + 109 + static const u16 adlp_rplu_ids[] = { XE_RPLU_IDS(NOP), 0 }; 110 + 111 + static const struct xe_device_desc adl_p_desc = { 112 + GEN12_FEATURES, 113 + PLATFORM(XE_ALDERLAKE_P), 114 + .platform_engine_mask = 115 + BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | 116 + BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) | 117 + BIT(XE_HW_ENGINE_VCS2), 118 + .subplatforms = (const struct xe_subplatform_desc[]) { 119 + { XE_SUBPLATFORM_ADLP_RPLU, "RPLU", adlp_rplu_ids }, 120 + {}, 121 + }, 122 + }; 123 + 124 + #define DGFX_FEATURES \ 125 + .is_dgfx = 1 126 + 127 + static const struct xe_device_desc dg1_desc = { 128 + GEN12_FEATURES, 129 + DGFX_FEATURES, 130 + .graphics_rel = 10, 131 + PLATFORM(XE_DG1), 132 + .platform_engine_mask = 133 + BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | 134 + BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) | 135 + BIT(XE_HW_ENGINE_VCS2), 136 + }; 137 + 138 + #define XE_HP_FEATURES \ 139 + .require_force_probe = true, \ 140 + .graphics_ver = 12, \ 141 + .graphics_rel = 50, \ 142 + .has_flat_ccs = true, \ 143 + .dma_mask_size = 46, \ 144 + .max_tiles = 1, \ 145 + .vm_max_level = 3 146 + 147 + #define XE_HPM_FEATURES \ 148 + .media_ver = 12, \ 149 + .media_rel = 50 150 + 151 + static const u16 dg2_g10_ids[] = { XE_DG2_G10_IDS(NOP), XE_ATS_M150_IDS(NOP), 0 }; 152 + static const u16 dg2_g11_ids[] = { XE_DG2_G11_IDS(NOP), XE_ATS_M75_IDS(NOP), 0 }; 153 + static const u16 dg2_g12_ids[] = { XE_DG2_G12_IDS(NOP), 0 }; 154 + 155 + #define DG2_FEATURES \ 156 + DGFX_FEATURES, \ 157 + .graphics_rel = 55, \ 158 + .media_rel = 55, \ 159 + PLATFORM(XE_DG2), \ 160 + .subplatforms = (const struct xe_subplatform_desc[]) { \ 161 + { XE_SUBPLATFORM_DG2_G10, "G10", dg2_g10_ids }, \ 162 + { XE_SUBPLATFORM_DG2_G11, "G11", dg2_g11_ids }, \ 163 + { XE_SUBPLATFORM_DG2_G12, "G12", dg2_g12_ids }, \ 164 + { } \ 165 + }, \ 166 + .platform_engine_mask = \ 167 + BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | \ 168 + BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VECS1) | \ 169 + BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) | \ 170 + BIT(XE_HW_ENGINE_CCS0) | BIT(XE_HW_ENGINE_CCS1) | \ 171 + BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3), \ 172 + .require_force_probe = true, \ 173 + .vram_flags = XE_VRAM_FLAGS_NEED64K, \ 174 + .has_4tile = 1 175 + 176 + static const struct xe_device_desc ats_m_desc = { 177 + XE_HP_FEATURES, 178 + XE_HPM_FEATURES, 179 + 180 + DG2_FEATURES, 181 + }; 182 + 183 + static const struct xe_device_desc dg2_desc = { 184 + XE_HP_FEATURES, 185 + XE_HPM_FEATURES, 186 + 187 + DG2_FEATURES, 188 + }; 189 + 190 + #define PVC_ENGINES \ 191 + BIT(XE_HW_ENGINE_BCS0) | BIT(XE_HW_ENGINE_BCS1) | \ 192 + BIT(XE_HW_ENGINE_BCS2) | BIT(XE_HW_ENGINE_BCS3) | \ 193 + BIT(XE_HW_ENGINE_BCS4) | BIT(XE_HW_ENGINE_BCS5) | \ 194 + BIT(XE_HW_ENGINE_BCS6) | BIT(XE_HW_ENGINE_BCS7) | \ 195 + BIT(XE_HW_ENGINE_BCS8) | \ 196 + BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS1) | \ 197 + BIT(XE_HW_ENGINE_VCS2) | \ 198 + BIT(XE_HW_ENGINE_CCS0) | BIT(XE_HW_ENGINE_CCS1) | \ 199 + BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3) 200 + 201 + static const struct xe_gt_desc pvc_gts[] = { 202 + { 203 + .type = XE_GT_TYPE_REMOTE, 204 + .vram_id = 1, 205 + .engine_mask = PVC_ENGINES, 206 + .mmio_adj_limit = 0, 207 + .mmio_adj_offset = 0, 208 + }, 209 + }; 210 + 211 + static const __maybe_unused struct xe_device_desc pvc_desc = { 212 + XE_HP_FEATURES, 213 + XE_HPM_FEATURES, 214 + DGFX_FEATURES, 215 + PLATFORM(XE_PVC), 216 + .extra_gts = pvc_gts, 217 + .graphics_rel = 60, 218 + .has_flat_ccs = 0, 219 + .media_rel = 60, 220 + .platform_engine_mask = PVC_ENGINES, 221 + .vram_flags = XE_VRAM_FLAGS_NEED64K, 222 + .dma_mask_size = 52, 223 + .max_tiles = 2, 224 + .vm_max_level = 4, 225 + .supports_usm = true, 226 + }; 227 + 228 + #define MTL_MEDIA_ENGINES \ 229 + BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) | \ 230 + BIT(XE_HW_ENGINE_VECS0) /* TODO: GSC0 */ 231 + 232 + static const struct xe_gt_desc xelpmp_gts[] = { 233 + { 234 + .type = XE_GT_TYPE_MEDIA, 235 + .vram_id = 0, 236 + .engine_mask = MTL_MEDIA_ENGINES, 237 + .mmio_adj_limit = 0x40000, 238 + .mmio_adj_offset = 0x380000, 239 + }, 240 + }; 241 + 242 + #define MTL_MAIN_ENGINES \ 243 + BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | \ 244 + BIT(XE_HW_ENGINE_CCS0) 245 + 246 + static const struct xe_device_desc mtl_desc = { 247 + /* 248 + * Real graphics IP version will be obtained from hardware GMD_ID 249 + * register. Value provided here is just for sanity checking. 250 + */ 251 + .require_force_probe = true, 252 + .graphics_ver = 12, 253 + .graphics_rel = 70, 254 + .dma_mask_size = 46, 255 + .max_tiles = 2, 256 + .vm_max_level = 3, 257 + .media_ver = 13, 258 + PLATFORM(XE_METEORLAKE), 259 + .extra_gts = xelpmp_gts, 260 + .platform_engine_mask = MTL_MAIN_ENGINES, 261 + }; 262 + 263 + #undef PLATFORM 264 + 265 + #define INTEL_VGA_DEVICE(id, info) { \ 266 + PCI_DEVICE(PCI_VENDOR_ID_INTEL, id), \ 267 + PCI_BASE_CLASS_DISPLAY << 16, 0xff << 16, \ 268 + (unsigned long) info } 269 + 270 + /* 271 + * Make sure any device matches here are from most specific to most 272 + * general. For example, since the Quanta match is based on the subsystem 273 + * and subvendor IDs, we need it to come before the more general IVB 274 + * PCI ID matches, otherwise we'll use the wrong info struct above. 275 + */ 276 + static const struct pci_device_id pciidlist[] = { 277 + XE_TGL_GT2_IDS(INTEL_VGA_DEVICE, &tgl_desc), 278 + XE_DG1_IDS(INTEL_VGA_DEVICE, &dg1_desc), 279 + XE_ATS_M_IDS(INTEL_VGA_DEVICE, &ats_m_desc), 280 + XE_DG2_IDS(INTEL_VGA_DEVICE, &dg2_desc), 281 + XE_ADLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc), 282 + XE_ADLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc), 283 + XE_MTL_IDS(INTEL_VGA_DEVICE, &mtl_desc), 284 + { } 285 + }; 286 + MODULE_DEVICE_TABLE(pci, pciidlist); 287 + 288 + #undef INTEL_VGA_DEVICE 289 + 290 + /* is device_id present in comma separated list of ids */ 291 + static bool device_id_in_list(u16 device_id, const char *devices, bool negative) 292 + { 293 + char *s, *p, *tok; 294 + bool ret; 295 + 296 + if (!devices || !*devices) 297 + return false; 298 + 299 + /* match everything */ 300 + if (negative && strcmp(devices, "!*") == 0) 301 + return true; 302 + if (!negative && strcmp(devices, "*") == 0) 303 + return true; 304 + 305 + s = kstrdup(devices, GFP_KERNEL); 306 + if (!s) 307 + return false; 308 + 309 + for (p = s, ret = false; (tok = strsep(&p, ",")) != NULL; ) { 310 + u16 val; 311 + 312 + if (negative && tok[0] == '!') 313 + tok++; 314 + else if ((negative && tok[0] != '!') || 315 + (!negative && tok[0] == '!')) 316 + continue; 317 + 318 + if (kstrtou16(tok, 16, &val) == 0 && val == device_id) { 319 + ret = true; 320 + break; 321 + } 322 + } 323 + 324 + kfree(s); 325 + 326 + return ret; 327 + } 328 + 329 + static bool id_forced(u16 device_id) 330 + { 331 + return device_id_in_list(device_id, xe_param_force_probe, false); 332 + } 333 + 334 + static bool id_blocked(u16 device_id) 335 + { 336 + return device_id_in_list(device_id, xe_param_force_probe, true); 337 + } 338 + 339 + static const struct xe_subplatform_desc * 340 + subplatform_get(const struct xe_device *xe, const struct xe_device_desc *desc) 341 + { 342 + const struct xe_subplatform_desc *sp; 343 + const u16 *id; 344 + 345 + for (sp = desc->subplatforms; sp && sp->subplatform; sp++) 346 + for (id = sp->pciidlist; *id; id++) 347 + if (*id == xe->info.devid) 348 + return sp; 349 + 350 + return NULL; 351 + } 352 + 353 + static void xe_pci_remove(struct pci_dev *pdev) 354 + { 355 + struct xe_device *xe; 356 + 357 + xe = pci_get_drvdata(pdev); 358 + if (!xe) /* driver load aborted, nothing to cleanup */ 359 + return; 360 + 361 + xe_device_remove(xe); 362 + pci_set_drvdata(pdev, NULL); 363 + } 364 + 365 + static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 366 + { 367 + const struct xe_device_desc *desc = (void *)ent->driver_data; 368 + const struct xe_subplatform_desc *spd; 369 + struct xe_device *xe; 370 + struct xe_gt *gt; 371 + u8 id; 372 + int err; 373 + 374 + if (desc->require_force_probe && !id_forced(pdev->device)) { 375 + dev_info(&pdev->dev, 376 + "Your graphics device %04x is not officially supported\n" 377 + "by xe driver in this kernel version. To force Xe probe,\n" 378 + "use xe.force_probe='%04x' and i915.force_probe='!%04x'\n" 379 + "module parameters or CONFIG_DRM_XE_FORCE_PROBE='%04x' and\n" 380 + "CONFIG_DRM_I915_FORCE_PROBE='!%04x' configuration options.\n", 381 + pdev->device, pdev->device, pdev->device, 382 + pdev->device, pdev->device); 383 + return -ENODEV; 384 + } 385 + 386 + if (id_blocked(pdev->device)) { 387 + dev_info(&pdev->dev, "Probe blocked for device [%04x:%04x].\n", 388 + pdev->vendor, pdev->device); 389 + return -ENODEV; 390 + } 391 + 392 + xe = xe_device_create(pdev, ent); 393 + if (IS_ERR(xe)) 394 + return PTR_ERR(xe); 395 + 396 + xe->info.graphics_verx100 = desc->graphics_ver * 100 + 397 + desc->graphics_rel; 398 + xe->info.media_verx100 = desc->media_ver * 100 + 399 + desc->media_rel; 400 + xe->info.is_dgfx = desc->is_dgfx; 401 + xe->info.platform = desc->platform; 402 + xe->info.dma_mask_size = desc->dma_mask_size; 403 + xe->info.vram_flags = desc->vram_flags; 404 + xe->info.tile_count = desc->max_tiles; 405 + xe->info.vm_max_level = desc->vm_max_level; 406 + xe->info.media_ver = desc->media_ver; 407 + xe->info.supports_usm = desc->supports_usm; 408 + xe->info.has_flat_ccs = desc->has_flat_ccs; 409 + xe->info.has_4tile = desc->has_4tile; 410 + 411 + spd = subplatform_get(xe, desc); 412 + xe->info.subplatform = spd ? spd->subplatform : XE_SUBPLATFORM_NONE; 413 + xe->info.step = xe_step_get(xe); 414 + 415 + for (id = 0; id < xe->info.tile_count; ++id) { 416 + gt = xe->gt + id; 417 + gt->info.id = id; 418 + gt->xe = xe; 419 + 420 + if (id == 0) { 421 + gt->info.type = XE_GT_TYPE_MAIN; 422 + gt->info.vram_id = id; 423 + gt->info.engine_mask = desc->platform_engine_mask; 424 + gt->mmio.adj_limit = 0; 425 + gt->mmio.adj_offset = 0; 426 + } else { 427 + gt->info.type = desc->extra_gts[id - 1].type; 428 + gt->info.vram_id = desc->extra_gts[id - 1].vram_id; 429 + gt->info.engine_mask = 430 + desc->extra_gts[id - 1].engine_mask; 431 + gt->mmio.adj_limit = 432 + desc->extra_gts[id - 1].mmio_adj_limit; 433 + gt->mmio.adj_offset = 434 + desc->extra_gts[id - 1].mmio_adj_offset; 435 + } 436 + } 437 + 438 + drm_dbg(&xe->drm, "%s %s %04x:%04x dgfx:%d gfx100:%d media100:%d dma_m_s:%d tc:%d", 439 + desc->platform_name, spd ? spd->name : "", 440 + xe->info.devid, xe->info.revid, 441 + xe->info.is_dgfx, xe->info.graphics_verx100, 442 + xe->info.media_verx100, 443 + xe->info.dma_mask_size, xe->info.tile_count); 444 + 445 + drm_dbg(&xe->drm, "Stepping = (G:%s, M:%s, D:%s, B:%s)\n", 446 + xe_step_name(xe->info.step.graphics), 447 + xe_step_name(xe->info.step.media), 448 + xe_step_name(xe->info.step.display), 449 + xe_step_name(xe->info.step.basedie)); 450 + 451 + pci_set_drvdata(pdev, xe); 452 + err = pci_enable_device(pdev); 453 + if (err) { 454 + drm_dev_put(&xe->drm); 455 + return err; 456 + } 457 + 458 + pci_set_master(pdev); 459 + 460 + if (pci_enable_msi(pdev) < 0) 461 + drm_dbg(&xe->drm, "can't enable MSI"); 462 + 463 + err = xe_device_probe(xe); 464 + if (err) { 465 + pci_disable_device(pdev); 466 + return err; 467 + } 468 + 469 + xe_pm_runtime_init(xe); 470 + 471 + return 0; 472 + } 473 + 474 + static void xe_pci_shutdown(struct pci_dev *pdev) 475 + { 476 + xe_device_shutdown(pdev_to_xe_device(pdev)); 477 + } 478 + 479 + #ifdef CONFIG_PM_SLEEP 480 + static int xe_pci_suspend(struct device *dev) 481 + { 482 + struct pci_dev *pdev = to_pci_dev(dev); 483 + int err; 484 + 485 + err = xe_pm_suspend(pdev_to_xe_device(pdev)); 486 + if (err) 487 + return err; 488 + 489 + pci_save_state(pdev); 490 + pci_disable_device(pdev); 491 + 492 + err = pci_set_power_state(pdev, PCI_D3hot); 493 + if (err) 494 + return err; 495 + 496 + return 0; 497 + } 498 + 499 + static int xe_pci_resume(struct device *dev) 500 + { 501 + struct pci_dev *pdev = to_pci_dev(dev); 502 + int err; 503 + 504 + err = pci_set_power_state(pdev, PCI_D0); 505 + if (err) 506 + return err; 507 + 508 + pci_restore_state(pdev); 509 + 510 + err = pci_enable_device(pdev); 511 + if (err) 512 + return err; 513 + 514 + pci_set_master(pdev); 515 + 516 + err = xe_pm_resume(pdev_to_xe_device(pdev)); 517 + if (err) 518 + return err; 519 + 520 + return 0; 521 + } 522 + #endif 523 + 524 + static int xe_pci_runtime_suspend(struct device *dev) 525 + { 526 + struct pci_dev *pdev = to_pci_dev(dev); 527 + struct xe_device *xe = pdev_to_xe_device(pdev); 528 + int err; 529 + 530 + err = xe_pm_runtime_suspend(xe); 531 + if (err) 532 + return err; 533 + 534 + pci_save_state(pdev); 535 + 536 + if (xe->d3cold_allowed) { 537 + pci_disable_device(pdev); 538 + pci_ignore_hotplug(pdev); 539 + pci_set_power_state(pdev, PCI_D3cold); 540 + } else { 541 + pci_set_power_state(pdev, PCI_D3hot); 542 + } 543 + 544 + return 0; 545 + } 546 + 547 + static int xe_pci_runtime_resume(struct device *dev) 548 + { 549 + struct pci_dev *pdev = to_pci_dev(dev); 550 + struct xe_device *xe = pdev_to_xe_device(pdev); 551 + int err; 552 + 553 + err = pci_set_power_state(pdev, PCI_D0); 554 + if (err) 555 + return err; 556 + 557 + pci_restore_state(pdev); 558 + 559 + if (xe->d3cold_allowed) { 560 + err = pci_enable_device(pdev); 561 + if (err) 562 + return err; 563 + 564 + pci_set_master(pdev); 565 + } 566 + 567 + return xe_pm_runtime_resume(xe); 568 + } 569 + 570 + static int xe_pci_runtime_idle(struct device *dev) 571 + { 572 + struct pci_dev *pdev = to_pci_dev(dev); 573 + struct xe_device *xe = pdev_to_xe_device(pdev); 574 + 575 + /* 576 + * FIXME: d3cold should be allowed (true) if 577 + * (IS_DGFX(xe) && !xe_device_mem_access_ongoing(xe)) 578 + * however the change to the buddy allocator broke the 579 + * xe_bo_restore_kernel when the pci device is disabled 580 + */ 581 + xe->d3cold_allowed = false; 582 + 583 + return 0; 584 + } 585 + 586 + static const struct dev_pm_ops xe_pm_ops = { 587 + .suspend = xe_pci_suspend, 588 + .resume = xe_pci_resume, 589 + .freeze = xe_pci_suspend, 590 + .thaw = xe_pci_resume, 591 + .poweroff = xe_pci_suspend, 592 + .restore = xe_pci_resume, 593 + .runtime_suspend = xe_pci_runtime_suspend, 594 + .runtime_resume = xe_pci_runtime_resume, 595 + .runtime_idle = xe_pci_runtime_idle, 596 + }; 597 + 598 + static struct pci_driver xe_pci_driver = { 599 + .name = DRIVER_NAME, 600 + .id_table = pciidlist, 601 + .probe = xe_pci_probe, 602 + .remove = xe_pci_remove, 603 + .shutdown = xe_pci_shutdown, 604 + .driver.pm = &xe_pm_ops, 605 + }; 606 + 607 + int xe_register_pci_driver(void) 608 + { 609 + return pci_register_driver(&xe_pci_driver); 610 + } 611 + 612 + void xe_unregister_pci_driver(void) 613 + { 614 + pci_unregister_driver(&xe_pci_driver); 615 + } 616 + 617 + #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) 618 + static int dev_to_xe_device_fn(struct device *dev, void *data) 619 + 620 + { 621 + struct drm_device *drm = dev_get_drvdata(dev); 622 + int (*xe_fn)(struct xe_device *xe) = data; 623 + int ret = 0; 624 + int idx; 625 + 626 + if (drm_dev_enter(drm, &idx)) 627 + ret = xe_fn(to_xe_device(dev_get_drvdata(dev))); 628 + drm_dev_exit(idx); 629 + 630 + return ret; 631 + } 632 + 633 + /** 634 + * xe_call_for_each_device - Iterate over all devices this driver binds to 635 + * @xe_fn: Function to call for each device. 636 + * 637 + * This function iterated over all devices this driver binds to, and calls 638 + * @xe_fn: for each one of them. If the called function returns anything else 639 + * than 0, iteration is stopped and the return value is returned by this 640 + * function. Across each function call, drm_dev_enter() / drm_dev_exit() is 641 + * called for the corresponding drm device. 642 + * 643 + * Return: Zero or the error code of a call to @xe_fn returning an error 644 + * code. 645 + */ 646 + int xe_call_for_each_device(xe_device_fn xe_fn) 647 + { 648 + return driver_for_each_device(&xe_pci_driver.driver, NULL, 649 + xe_fn, dev_to_xe_device_fn); 650 + } 651 + #endif

+21

drivers/gpu/drm/xe/xe_pci.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2021 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_PCI_H_ 7 + #define _XE_PCI_H_ 8 + 9 + #include "tests/xe_test.h" 10 + 11 + int xe_register_pci_driver(void); 12 + void xe_unregister_pci_driver(void); 13 + 14 + #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) 15 + struct xe_device; 16 + 17 + typedef int (*xe_device_fn)(struct xe_device *); 18 + 19 + int xe_call_for_each_device(xe_device_fn xe_fn); 20 + #endif 21 + #endif

+296

drivers/gpu/drm/xe/xe_pcode.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include "xe_pcode_api.h" 7 + #include "xe_pcode.h" 8 + 9 + #include "xe_gt.h" 10 + #include "xe_mmio.h" 11 + 12 + #include <linux/errno.h> 13 + 14 + /** 15 + * DOC: PCODE 16 + * 17 + * Xe PCODE is the component responsible for interfacing with the PCODE 18 + * firmware. 19 + * It shall provide a very simple ABI to other Xe components, but be the 20 + * single and consolidated place that will communicate with PCODE. All read 21 + * and write operations to PCODE will be internal and private to this component. 22 + * 23 + * What's next: 24 + * - PCODE hw metrics 25 + * - PCODE for display operations 26 + */ 27 + 28 + static int pcode_mailbox_status(struct xe_gt *gt) 29 + { 30 + u32 err; 31 + static const struct pcode_err_decode err_decode[] = { 32 + [PCODE_ILLEGAL_CMD] = {-ENXIO, "Illegal Command"}, 33 + [PCODE_TIMEOUT] = {-ETIMEDOUT, "Timed out"}, 34 + [PCODE_ILLEGAL_DATA] = {-EINVAL, "Illegal Data"}, 35 + [PCODE_ILLEGAL_SUBCOMMAND] = {-ENXIO, "Illegal Subcommand"}, 36 + [PCODE_LOCKED] = {-EBUSY, "PCODE Locked"}, 37 + [PCODE_GT_RATIO_OUT_OF_RANGE] = {-EOVERFLOW, 38 + "GT ratio out of range"}, 39 + [PCODE_REJECTED] = {-EACCES, "PCODE Rejected"}, 40 + [PCODE_ERROR_MASK] = {-EPROTO, "Unknown"}, 41 + }; 42 + 43 + lockdep_assert_held(&gt->pcode.lock); 44 + 45 + err = xe_mmio_read32(gt, PCODE_MAILBOX.reg) & PCODE_ERROR_MASK; 46 + if (err) { 47 + drm_err(&gt_to_xe(gt)->drm, "PCODE Mailbox failed: %d %s", err, 48 + err_decode[err].str ?: "Unknown"); 49 + return err_decode[err].errno ?: -EPROTO; 50 + } 51 + 52 + return 0; 53 + } 54 + 55 + static bool pcode_mailbox_done(struct xe_gt *gt) 56 + { 57 + lockdep_assert_held(&gt->pcode.lock); 58 + return (xe_mmio_read32(gt, PCODE_MAILBOX.reg) & PCODE_READY) == 0; 59 + } 60 + 61 + static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1, 62 + unsigned int timeout, bool return_data, bool atomic) 63 + { 64 + lockdep_assert_held(&gt->pcode.lock); 65 + 66 + if (!pcode_mailbox_done(gt)) 67 + return -EAGAIN; 68 + 69 + xe_mmio_write32(gt, PCODE_DATA0.reg, *data0); 70 + xe_mmio_write32(gt, PCODE_DATA1.reg, data1 ? *data1 : 0); 71 + xe_mmio_write32(gt, PCODE_MAILBOX.reg, PCODE_READY | mbox); 72 + 73 + if (atomic) 74 + _wait_for_atomic(pcode_mailbox_done(gt), timeout * 1000, 1); 75 + else 76 + wait_for(pcode_mailbox_done(gt), timeout); 77 + 78 + if (return_data) { 79 + *data0 = xe_mmio_read32(gt, PCODE_DATA0.reg); 80 + if (data1) 81 + *data1 = xe_mmio_read32(gt, PCODE_DATA1.reg); 82 + } 83 + 84 + return pcode_mailbox_status(gt); 85 + } 86 + 87 + int xe_pcode_write_timeout(struct xe_gt *gt, u32 mbox, u32 data, int timeout) 88 + { 89 + int err; 90 + 91 + mutex_lock(&gt->pcode.lock); 92 + err = pcode_mailbox_rw(gt, mbox, &data, NULL, timeout, false, false); 93 + mutex_unlock(&gt->pcode.lock); 94 + 95 + return err; 96 + } 97 + 98 + int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1) 99 + { 100 + int err; 101 + 102 + mutex_lock(&gt->pcode.lock); 103 + err = pcode_mailbox_rw(gt, mbox, val, val1, 1, true, false); 104 + mutex_unlock(&gt->pcode.lock); 105 + 106 + return err; 107 + } 108 + 109 + static bool xe_pcode_try_request(struct xe_gt *gt, u32 mbox, 110 + u32 request, u32 reply_mask, u32 reply, 111 + u32 *status, bool atomic) 112 + { 113 + *status = pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true, atomic); 114 + 115 + return (*status == 0) && ((request & reply_mask) == reply); 116 + } 117 + 118 + /** 119 + * xe_pcode_request - send PCODE request until acknowledgment 120 + * @gt: gt 121 + * @mbox: PCODE mailbox ID the request is targeted for 122 + * @request: request ID 123 + * @reply_mask: mask used to check for request acknowledgment 124 + * @reply: value used to check for request acknowledgment 125 + * @timeout_base_ms: timeout for polling with preemption enabled 126 + * 127 + * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE 128 + * reports an error or an overall timeout of @timeout_base_ms+50 ms expires. 129 + * The request is acknowledged once the PCODE reply dword equals @reply after 130 + * applying @reply_mask. Polling is first attempted with preemption enabled 131 + * for @timeout_base_ms and if this times out for another 50 ms with 132 + * preemption disabled. 133 + * 134 + * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some 135 + * other error as reported by PCODE. 136 + */ 137 + int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request, 138 + u32 reply_mask, u32 reply, int timeout_base_ms) 139 + { 140 + u32 status; 141 + int ret; 142 + bool atomic = false; 143 + 144 + mutex_lock(&gt->pcode.lock); 145 + 146 + #define COND \ 147 + xe_pcode_try_request(gt, mbox, request, reply_mask, reply, &status, atomic) 148 + 149 + /* 150 + * Prime the PCODE by doing a request first. Normally it guarantees 151 + * that a subsequent request, at most @timeout_base_ms later, succeeds. 152 + * _wait_for() doesn't guarantee when its passed condition is evaluated 153 + * first, so send the first request explicitly. 154 + */ 155 + if (COND) { 156 + ret = 0; 157 + goto out; 158 + } 159 + ret = _wait_for(COND, timeout_base_ms * 1000, 10, 10); 160 + if (!ret) 161 + goto out; 162 + 163 + /* 164 + * The above can time out if the number of requests was low (2 in the 165 + * worst case) _and_ PCODE was busy for some reason even after a 166 + * (queued) request and @timeout_base_ms delay. As a workaround retry 167 + * the poll with preemption disabled to maximize the number of 168 + * requests. Increase the timeout from @timeout_base_ms to 50ms to 169 + * account for interrupts that could reduce the number of these 170 + * requests, and for any quirks of the PCODE firmware that delays 171 + * the request completion. 172 + */ 173 + drm_err(&gt_to_xe(gt)->drm, 174 + "PCODE timeout, retrying with preemption disabled\n"); 175 + drm_WARN_ON_ONCE(&gt_to_xe(gt)->drm, timeout_base_ms > 1); 176 + preempt_disable(); 177 + atomic = true; 178 + ret = wait_for_atomic(COND, 50); 179 + atomic = false; 180 + preempt_enable(); 181 + 182 + out: 183 + mutex_unlock(&gt->pcode.lock); 184 + return status ? status : ret; 185 + #undef COND 186 + } 187 + /** 188 + * xe_pcode_init_min_freq_table - Initialize PCODE's QOS frequency table 189 + * @gt: gt instance 190 + * @min_gt_freq: Minimal (RPn) GT frequency in units of 50MHz. 191 + * @max_gt_freq: Maximal (RP0) GT frequency in units of 50MHz. 192 + * 193 + * This function initialize PCODE's QOS frequency table for a proper minimal 194 + * frequency/power steering decision, depending on the current requested GT 195 + * frequency. For older platforms this was a more complete table including 196 + * the IA freq. However for the latest platforms this table become a simple 197 + * 1-1 Ring vs GT frequency. Even though, without setting it, PCODE might 198 + * not take the right decisions for some memory frequencies and affect latency. 199 + * 200 + * It returns 0 on success, and -ERROR number on failure, -EINVAL if max 201 + * frequency is higher then the minimal, and other errors directly translated 202 + * from the PCODE Error returs: 203 + * - -ENXIO: "Illegal Command" 204 + * - -ETIMEDOUT: "Timed out" 205 + * - -EINVAL: "Illegal Data" 206 + * - -ENXIO, "Illegal Subcommand" 207 + * - -EBUSY: "PCODE Locked" 208 + * - -EOVERFLOW, "GT ratio out of range" 209 + * - -EACCES, "PCODE Rejected" 210 + * - -EPROTO, "Unknown" 211 + */ 212 + int xe_pcode_init_min_freq_table(struct xe_gt *gt, u32 min_gt_freq, 213 + u32 max_gt_freq) 214 + { 215 + int ret; 216 + u32 freq; 217 + 218 + if (IS_DGFX(gt_to_xe(gt))) 219 + return 0; 220 + 221 + if (max_gt_freq <= min_gt_freq) 222 + return -EINVAL; 223 + 224 + mutex_lock(&gt->pcode.lock); 225 + for (freq = min_gt_freq; freq <= max_gt_freq; freq++) { 226 + u32 data = freq << PCODE_FREQ_RING_RATIO_SHIFT | freq; 227 + 228 + ret = pcode_mailbox_rw(gt, PCODE_WRITE_MIN_FREQ_TABLE, 229 + &data, NULL, 1, false, false); 230 + if (ret) 231 + goto unlock; 232 + } 233 + 234 + unlock: 235 + mutex_unlock(&gt->pcode.lock); 236 + return ret; 237 + } 238 + 239 + static bool pcode_dgfx_status_complete(struct xe_gt *gt) 240 + { 241 + u32 data = DGFX_GET_INIT_STATUS; 242 + int status = pcode_mailbox_rw(gt, DGFX_PCODE_STATUS, 243 + &data, NULL, 1, true, false); 244 + 245 + return status == 0 && 246 + (data & DGFX_INIT_STATUS_COMPLETE) == DGFX_INIT_STATUS_COMPLETE; 247 + } 248 + 249 + /** 250 + * xe_pcode_init - Ensure PCODE is initialized 251 + * @gt: gt instance 252 + * 253 + * This function ensures that PCODE is properly initialized. To be called during 254 + * probe and resume paths. 255 + * 256 + * It returns 0 on success, and -error number on failure. 257 + */ 258 + int xe_pcode_init(struct xe_gt *gt) 259 + { 260 + int timeout = 180000; /* 3 min */ 261 + int ret; 262 + 263 + if (!IS_DGFX(gt_to_xe(gt))) 264 + return 0; 265 + 266 + mutex_lock(&gt->pcode.lock); 267 + ret = wait_for(pcode_dgfx_status_complete(gt), timeout); 268 + mutex_unlock(&gt->pcode.lock); 269 + 270 + if (ret) 271 + drm_err(&gt_to_xe(gt)->drm, 272 + "PCODE initialization timedout after: %d min\n", 273 + timeout / 60000); 274 + 275 + return ret; 276 + } 277 + 278 + /** 279 + * xe_pcode_probe - Prepare xe_pcode and also ensure PCODE is initialized. 280 + * @gt: gt instance 281 + * 282 + * This function initializes the xe_pcode component, and when needed, it ensures 283 + * that PCODE has properly performed its initialization and it is really ready 284 + * to go. To be called once only during probe. 285 + * 286 + * It returns 0 on success, and -error number on failure. 287 + */ 288 + int xe_pcode_probe(struct xe_gt *gt) 289 + { 290 + mutex_init(&gt->pcode.lock); 291 + 292 + if (!IS_DGFX(gt_to_xe(gt))) 293 + return 0; 294 + 295 + return xe_pcode_init(gt); 296 + }

+25

drivers/gpu/drm/xe/xe_pcode.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_PCODE_H_ 7 + #define _XE_PCODE_H_ 8 + 9 + #include <linux/types.h> 10 + struct xe_gt; 11 + 12 + int xe_pcode_probe(struct xe_gt *gt); 13 + int xe_pcode_init(struct xe_gt *gt); 14 + int xe_pcode_init_min_freq_table(struct xe_gt *gt, u32 min_gt_freq, 15 + u32 max_gt_freq); 16 + int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1); 17 + int xe_pcode_write_timeout(struct xe_gt *gt, u32 mbox, u32 val, 18 + int timeout_ms); 19 + #define xe_pcode_write(gt, mbox, val) \ 20 + xe_pcode_write_timeout(gt, mbox, val, 1) 21 + 22 + int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request, 23 + u32 reply_mask, u32 reply, int timeout_ms); 24 + 25 + #endif

+40

drivers/gpu/drm/xe/xe_pcode_api.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + /* Internal to xe_pcode */ 7 + 8 + #define PCODE_MAILBOX _MMIO(0x138124) 9 + #define PCODE_READY REG_BIT(31) 10 + #define PCODE_MB_PARAM2 REG_GENMASK(23, 16) 11 + #define PCODE_MB_PARAM1 REG_GENMASK(15, 8) 12 + #define PCODE_MB_COMMAND REG_GENMASK(7, 0) 13 + #define PCODE_ERROR_MASK 0xFF 14 + #define PCODE_SUCCESS 0x0 15 + #define PCODE_ILLEGAL_CMD 0x1 16 + #define PCODE_TIMEOUT 0x2 17 + #define PCODE_ILLEGAL_DATA 0x3 18 + #define PCODE_ILLEGAL_SUBCOMMAND 0x4 19 + #define PCODE_LOCKED 0x6 20 + #define PCODE_GT_RATIO_OUT_OF_RANGE 0x10 21 + #define PCODE_REJECTED 0x11 22 + 23 + #define PCODE_DATA0 _MMIO(0x138128) 24 + #define PCODE_DATA1 _MMIO(0x13812C) 25 + 26 + /* Min Freq QOS Table */ 27 + #define PCODE_WRITE_MIN_FREQ_TABLE 0x8 28 + #define PCODE_READ_MIN_FREQ_TABLE 0x9 29 + #define PCODE_FREQ_RING_RATIO_SHIFT 16 30 + 31 + /* PCODE Init */ 32 + #define DGFX_PCODE_STATUS 0x7E 33 + #define DGFX_GET_INIT_STATUS 0x0 34 + #define DGFX_INIT_STATUS_COMPLETE 0x1 35 + 36 + struct pcode_err_decode { 37 + int errno; 38 + const char *str; 39 + }; 40 +

+32

drivers/gpu/drm/xe/xe_platform_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_PLATFORM_INFO_TYPES_H_ 7 + #define _XE_PLATFORM_INFO_TYPES_H_ 8 + 9 + /* Keep in gen based order, and chronological order within a gen */ 10 + enum xe_platform { 11 + XE_PLATFORM_UNINITIALIZED = 0, 12 + /* gen12 */ 13 + XE_TIGERLAKE, 14 + XE_ROCKETLAKE, 15 + XE_DG1, 16 + XE_DG2, 17 + XE_PVC, 18 + XE_ALDERLAKE_S, 19 + XE_ALDERLAKE_P, 20 + XE_METEORLAKE, 21 + }; 22 + 23 + enum xe_subplatform { 24 + XE_SUBPLATFORM_UNINITIALIZED = 0, 25 + XE_SUBPLATFORM_NONE, 26 + XE_SUBPLATFORM_DG2_G10, 27 + XE_SUBPLATFORM_DG2_G11, 28 + XE_SUBPLATFORM_DG2_G12, 29 + XE_SUBPLATFORM_ADLP_RPLU, 30 + }; 31 + 32 + #endif

+207

drivers/gpu/drm/xe/xe_pm.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include <linux/pm_runtime.h> 7 + 8 + #include <drm/ttm/ttm_placement.h> 9 + 10 + #include "xe_bo.h" 11 + #include "xe_bo_evict.h" 12 + #include "xe_device.h" 13 + #include "xe_pm.h" 14 + #include "xe_gt.h" 15 + #include "xe_ggtt.h" 16 + #include "xe_irq.h" 17 + #include "xe_pcode.h" 18 + 19 + /** 20 + * DOC: Xe Power Management 21 + * 22 + * Xe PM shall be guided by the simplicity. 23 + * Use the simplest hook options whenever possible. 24 + * Let's not reinvent the runtime_pm references and hooks. 25 + * Shall have a clear separation of display and gt underneath this component. 26 + * 27 + * What's next: 28 + * 29 + * For now s2idle and s3 are only working in integrated devices. The next step 30 + * is to iterate through all VRAM's BO backing them up into the system memory 31 + * before allowing the system suspend. 32 + * 33 + * Also runtime_pm needs to be here from the beginning. 34 + * 35 + * RC6/RPS are also critical PM features. Let's start with GuCRC and GuC SLPC 36 + * and no wait boost. Frequency optimizations should come on a next stage. 37 + */ 38 + 39 + /** 40 + * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle 41 + * @xe: xe device instance 42 + * 43 + * Return: 0 on success 44 + */ 45 + int xe_pm_suspend(struct xe_device *xe) 46 + { 47 + struct xe_gt *gt; 48 + u8 id; 49 + int err; 50 + 51 + for_each_gt(gt, xe, id) 52 + xe_gt_suspend_prepare(gt); 53 + 54 + /* FIXME: Super racey... */ 55 + err = xe_bo_evict_all(xe); 56 + if (err) 57 + return err; 58 + 59 + for_each_gt(gt, xe, id) { 60 + err = xe_gt_suspend(gt); 61 + if (err) 62 + return err; 63 + } 64 + 65 + xe_irq_suspend(xe); 66 + 67 + return 0; 68 + } 69 + 70 + /** 71 + * xe_pm_resume - Helper for System resume S3->S0 / S2idle->S0 72 + * @xe: xe device instance 73 + * 74 + * Return: 0 on success 75 + */ 76 + int xe_pm_resume(struct xe_device *xe) 77 + { 78 + struct xe_gt *gt; 79 + u8 id; 80 + int err; 81 + 82 + for_each_gt(gt, xe, id) { 83 + err = xe_pcode_init(gt); 84 + if (err) 85 + return err; 86 + } 87 + 88 + /* 89 + * This only restores pinned memory which is the memory required for the 90 + * GT(s) to resume. 91 + */ 92 + err = xe_bo_restore_kernel(xe); 93 + if (err) 94 + return err; 95 + 96 + xe_irq_resume(xe); 97 + 98 + for_each_gt(gt, xe, id) 99 + xe_gt_resume(gt); 100 + 101 + err = xe_bo_restore_user(xe); 102 + if (err) 103 + return err; 104 + 105 + return 0; 106 + } 107 + 108 + void xe_pm_runtime_init(struct xe_device *xe) 109 + { 110 + struct device *dev = xe->drm.dev; 111 + 112 + pm_runtime_use_autosuspend(dev); 113 + pm_runtime_set_autosuspend_delay(dev, 1000); 114 + pm_runtime_set_active(dev); 115 + pm_runtime_allow(dev); 116 + pm_runtime_mark_last_busy(dev); 117 + pm_runtime_put_autosuspend(dev); 118 + } 119 + 120 + int xe_pm_runtime_suspend(struct xe_device *xe) 121 + { 122 + struct xe_gt *gt; 123 + u8 id; 124 + int err; 125 + 126 + if (xe->d3cold_allowed) { 127 + if (xe_device_mem_access_ongoing(xe)) 128 + return -EBUSY; 129 + 130 + err = xe_bo_evict_all(xe); 131 + if (err) 132 + return err; 133 + } 134 + 135 + for_each_gt(gt, xe, id) { 136 + err = xe_gt_suspend(gt); 137 + if (err) 138 + return err; 139 + } 140 + 141 + xe_irq_suspend(xe); 142 + 143 + return 0; 144 + } 145 + 146 + int xe_pm_runtime_resume(struct xe_device *xe) 147 + { 148 + struct xe_gt *gt; 149 + u8 id; 150 + int err; 151 + 152 + if (xe->d3cold_allowed) { 153 + for_each_gt(gt, xe, id) { 154 + err = xe_pcode_init(gt); 155 + if (err) 156 + return err; 157 + } 158 + 159 + /* 160 + * This only restores pinned memory which is the memory 161 + * required for the GT(s) to resume. 162 + */ 163 + err = xe_bo_restore_kernel(xe); 164 + if (err) 165 + return err; 166 + } 167 + 168 + xe_irq_resume(xe); 169 + 170 + for_each_gt(gt, xe, id) 171 + xe_gt_resume(gt); 172 + 173 + if (xe->d3cold_allowed) { 174 + err = xe_bo_restore_user(xe); 175 + if (err) 176 + return err; 177 + } 178 + 179 + return 0; 180 + } 181 + 182 + int xe_pm_runtime_get(struct xe_device *xe) 183 + { 184 + return pm_runtime_get_sync(xe->drm.dev); 185 + } 186 + 187 + int xe_pm_runtime_put(struct xe_device *xe) 188 + { 189 + pm_runtime_mark_last_busy(xe->drm.dev); 190 + return pm_runtime_put_autosuspend(xe->drm.dev); 191 + } 192 + 193 + /* Return true if resume operation happened and usage count was increased */ 194 + bool xe_pm_runtime_resume_if_suspended(struct xe_device *xe) 195 + { 196 + /* In case we are suspended we need to immediately wake up */ 197 + if (pm_runtime_suspended(xe->drm.dev)) 198 + return !pm_runtime_resume_and_get(xe->drm.dev); 199 + 200 + return false; 201 + } 202 + 203 + int xe_pm_runtime_get_if_active(struct xe_device *xe) 204 + { 205 + WARN_ON(pm_runtime_suspended(xe->drm.dev)); 206 + return pm_runtime_get_if_active(xe->drm.dev, true); 207 + }

+24

drivers/gpu/drm/xe/xe_pm.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_PM_H_ 7 + #define _XE_PM_H_ 8 + 9 + #include <linux/pm_runtime.h> 10 + 11 + struct xe_device; 12 + 13 + int xe_pm_suspend(struct xe_device *xe); 14 + int xe_pm_resume(struct xe_device *xe); 15 + 16 + void xe_pm_runtime_init(struct xe_device *xe); 17 + int xe_pm_runtime_suspend(struct xe_device *xe); 18 + int xe_pm_runtime_resume(struct xe_device *xe); 19 + int xe_pm_runtime_get(struct xe_device *xe); 20 + int xe_pm_runtime_put(struct xe_device *xe); 21 + bool xe_pm_runtime_resume_if_suspended(struct xe_device *xe); 22 + int xe_pm_runtime_get_if_active(struct xe_device *xe); 23 + 24 + #endif

+157

drivers/gpu/drm/xe/xe_preempt_fence.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include <linux/slab.h> 7 + 8 + #include "xe_engine.h" 9 + #include "xe_preempt_fence.h" 10 + #include "xe_vm.h" 11 + 12 + static void preempt_fence_work_func(struct work_struct *w) 13 + { 14 + bool cookie = dma_fence_begin_signalling(); 15 + struct xe_preempt_fence *pfence = 16 + container_of(w, typeof(*pfence), preempt_work); 17 + struct xe_engine *e = pfence->engine; 18 + 19 + if (pfence->error) 20 + dma_fence_set_error(&pfence->base, pfence->error); 21 + else 22 + e->ops->suspend_wait(e); 23 + 24 + dma_fence_signal(&pfence->base); 25 + dma_fence_end_signalling(cookie); 26 + 27 + queue_work(system_unbound_wq, &e->vm->preempt.rebind_work); 28 + 29 + xe_engine_put(e); 30 + } 31 + 32 + static const char * 33 + preempt_fence_get_driver_name(struct dma_fence *fence) 34 + { 35 + return "xe"; 36 + } 37 + 38 + static const char * 39 + preempt_fence_get_timeline_name(struct dma_fence *fence) 40 + { 41 + return "preempt"; 42 + } 43 + 44 + static bool preempt_fence_enable_signaling(struct dma_fence *fence) 45 + { 46 + struct xe_preempt_fence *pfence = 47 + container_of(fence, typeof(*pfence), base); 48 + struct xe_engine *e = pfence->engine; 49 + 50 + pfence->error = e->ops->suspend(e); 51 + queue_work(system_unbound_wq, &pfence->preempt_work); 52 + return true; 53 + } 54 + 55 + static const struct dma_fence_ops preempt_fence_ops = { 56 + .get_driver_name = preempt_fence_get_driver_name, 57 + .get_timeline_name = preempt_fence_get_timeline_name, 58 + .enable_signaling = preempt_fence_enable_signaling, 59 + }; 60 + 61 + /** 62 + * xe_preempt_fence_alloc() - Allocate a preempt fence with minimal 63 + * initialization 64 + * 65 + * Allocate a preempt fence, and initialize its list head. 66 + * If the preempt_fence allocated has been armed with 67 + * xe_preempt_fence_arm(), it must be freed using dma_fence_put(). If not, 68 + * it must be freed using xe_preempt_fence_free(). 69 + * 70 + * Return: A struct xe_preempt_fence pointer used for calling into 71 + * xe_preempt_fence_arm() or xe_preempt_fence_free(). 72 + * An error pointer on error. 73 + */ 74 + struct xe_preempt_fence *xe_preempt_fence_alloc(void) 75 + { 76 + struct xe_preempt_fence *pfence; 77 + 78 + pfence = kmalloc(sizeof(*pfence), GFP_KERNEL); 79 + if (!pfence) 80 + return ERR_PTR(-ENOMEM); 81 + 82 + INIT_LIST_HEAD(&pfence->link); 83 + INIT_WORK(&pfence->preempt_work, preempt_fence_work_func); 84 + 85 + return pfence; 86 + } 87 + 88 + /** 89 + * xe_preempt_fence_free() - Free a preempt fence allocated using 90 + * xe_preempt_fence_alloc(). 91 + * @pfence: pointer obtained from xe_preempt_fence_alloc(); 92 + * 93 + * Free a preempt fence that has not yet been armed. 94 + */ 95 + void xe_preempt_fence_free(struct xe_preempt_fence *pfence) 96 + { 97 + list_del(&pfence->link); 98 + kfree(pfence); 99 + } 100 + 101 + /** 102 + * xe_preempt_fence_arm() - Arm a preempt fence allocated using 103 + * xe_preempt_fence_alloc(). 104 + * @pfence: The struct xe_preempt_fence pointer returned from 105 + * xe_preempt_fence_alloc(). 106 + * @e: The struct xe_engine used for arming. 107 + * @context: The dma-fence context used for arming. 108 + * @seqno: The dma-fence seqno used for arming. 109 + * 110 + * Inserts the preempt fence into @context's timeline, takes @link off any 111 + * list, and registers the struct xe_engine as the xe_engine to be preempted. 112 + * 113 + * Return: A pointer to a struct dma_fence embedded into the preempt fence. 114 + * This function doesn't error. 115 + */ 116 + struct dma_fence * 117 + xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_engine *e, 118 + u64 context, u32 seqno) 119 + { 120 + list_del_init(&pfence->link); 121 + pfence->engine = xe_engine_get(e); 122 + dma_fence_init(&pfence->base, &preempt_fence_ops, 123 + &e->compute.lock, context, seqno); 124 + 125 + return &pfence->base; 126 + } 127 + 128 + /** 129 + * xe_preempt_fence_create() - Helper to create and arm a preempt fence. 130 + * @e: The struct xe_engine used for arming. 131 + * @context: The dma-fence context used for arming. 132 + * @seqno: The dma-fence seqno used for arming. 133 + * 134 + * Allocates and inserts the preempt fence into @context's timeline, 135 + * and registers @e as the struct xe_engine to be preempted. 136 + * 137 + * Return: A pointer to the resulting struct dma_fence on success. An error 138 + * pointer on error. In particular if allocation fails it returns 139 + * ERR_PTR(-ENOMEM); 140 + */ 141 + struct dma_fence * 142 + xe_preempt_fence_create(struct xe_engine *e, 143 + u64 context, u32 seqno) 144 + { 145 + struct xe_preempt_fence *pfence; 146 + 147 + pfence = xe_preempt_fence_alloc(); 148 + if (IS_ERR(pfence)) 149 + return ERR_CAST(pfence); 150 + 151 + return xe_preempt_fence_arm(pfence, e, context, seqno); 152 + } 153 + 154 + bool xe_fence_is_xe_preempt(const struct dma_fence *fence) 155 + { 156 + return fence->ops == &preempt_fence_ops; 157 + }

+61

drivers/gpu/drm/xe/xe_preempt_fence.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_PREEMPT_FENCE_H_ 7 + #define _XE_PREEMPT_FENCE_H_ 8 + 9 + #include "xe_preempt_fence_types.h" 10 + 11 + struct list_head; 12 + 13 + struct dma_fence * 14 + xe_preempt_fence_create(struct xe_engine *e, 15 + u64 context, u32 seqno); 16 + 17 + struct xe_preempt_fence *xe_preempt_fence_alloc(void); 18 + 19 + void xe_preempt_fence_free(struct xe_preempt_fence *pfence); 20 + 21 + struct dma_fence * 22 + xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_engine *e, 23 + u64 context, u32 seqno); 24 + 25 + static inline struct xe_preempt_fence * 26 + to_preempt_fence(struct dma_fence *fence) 27 + { 28 + return container_of(fence, struct xe_preempt_fence, base); 29 + } 30 + 31 + /** 32 + * xe_preempt_fence_link() - Return a link used to keep unarmed preempt 33 + * fences on a list. 34 + * @pfence: Pointer to the preempt fence. 35 + * 36 + * The link is embedded in the struct xe_preempt_fence. Use 37 + * link_to_preempt_fence() to convert back to the preempt fence. 38 + * 39 + * Return: A pointer to an embedded struct list_head. 40 + */ 41 + static inline struct list_head * 42 + xe_preempt_fence_link(struct xe_preempt_fence *pfence) 43 + { 44 + return &pfence->link; 45 + } 46 + 47 + /** 48 + * to_preempt_fence_from_link() - Convert back to a preempt fence pointer 49 + * from a link obtained with xe_preempt_fence_link(). 50 + * @link: The struct list_head obtained from xe_preempt_fence_link(). 51 + * 52 + * Return: A pointer to the embedding struct xe_preempt_fence. 53 + */ 54 + static inline struct xe_preempt_fence * 55 + to_preempt_fence_from_link(struct list_head *link) 56 + { 57 + return container_of(link, struct xe_preempt_fence, link); 58 + } 59 + 60 + bool xe_fence_is_xe_preempt(const struct dma_fence *fence); 61 + #endif

+33

drivers/gpu/drm/xe/xe_preempt_fence_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_PREEMPT_FENCE_TYPES_H_ 7 + #define _XE_PREEMPT_FENCE_TYPES_H_ 8 + 9 + #include <linux/dma-fence.h> 10 + #include <linux/workqueue.h> 11 + 12 + struct xe_engine; 13 + 14 + /** 15 + * struct xe_preempt_fence - XE preempt fence 16 + * 17 + * A preemption fence which suspends the execution of an xe_engine on the 18 + * hardware and triggers a callback once the xe_engine is complete. 19 + */ 20 + struct xe_preempt_fence { 21 + /** @base: dma fence base */ 22 + struct dma_fence base; 23 + /** @link: link into list of pending preempt fences */ 24 + struct list_head link; 25 + /** @engine: xe engine for this preempt fence */ 26 + struct xe_engine *engine; 27 + /** @preempt_work: work struct which issues preemption */ 28 + struct work_struct preempt_work; 29 + /** @error: preempt fence is in error state */ 30 + int error; 31 + }; 32 + 33 + #endif

+1542

drivers/gpu/drm/xe/xe_pt.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include "xe_bo.h" 7 + #include "xe_device.h" 8 + #include "xe_gt.h" 9 + #include "xe_migrate.h" 10 + #include "xe_pt.h" 11 + #include "xe_pt_types.h" 12 + #include "xe_pt_walk.h" 13 + #include "xe_vm.h" 14 + #include "xe_res_cursor.h" 15 + 16 + struct xe_pt_dir { 17 + struct xe_pt pt; 18 + /** @dir: Directory structure for the xe_pt_walk functionality */ 19 + struct xe_ptw_dir dir; 20 + }; 21 + 22 + #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) 23 + #define xe_pt_set_addr(__xe_pt, __addr) ((__xe_pt)->addr = (__addr)) 24 + #define xe_pt_addr(__xe_pt) ((__xe_pt)->addr) 25 + #else 26 + #define xe_pt_set_addr(__xe_pt, __addr) 27 + #define xe_pt_addr(__xe_pt) 0ull 28 + #endif 29 + 30 + static const u64 xe_normal_pt_shifts[] = {12, 21, 30, 39, 48}; 31 + static const u64 xe_compact_pt_shifts[] = {16, 21, 30, 39, 48}; 32 + 33 + #define XE_PT_HIGHEST_LEVEL (ARRAY_SIZE(xe_normal_pt_shifts) - 1) 34 + 35 + static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt) 36 + { 37 + return container_of(pt, struct xe_pt_dir, pt); 38 + } 39 + 40 + static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index) 41 + { 42 + return container_of(pt_dir->dir.entries[index], struct xe_pt, base); 43 + } 44 + 45 + /** 46 + * gen8_pde_encode() - Encode a page-table directory entry pointing to 47 + * another page-table. 48 + * @bo: The page-table bo of the page-table to point to. 49 + * @bo_offset: Offset in the page-table bo to point to. 50 + * @level: The cache level indicating the caching of @bo. 51 + * 52 + * TODO: Rename. 53 + * 54 + * Return: An encoded page directory entry. No errors. 55 + */ 56 + u64 gen8_pde_encode(struct xe_bo *bo, u64 bo_offset, 57 + const enum xe_cache_level level) 58 + { 59 + u64 pde; 60 + bool is_lmem; 61 + 62 + pde = xe_bo_addr(bo, bo_offset, GEN8_PAGE_SIZE, &is_lmem); 63 + pde |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW; 64 + 65 + XE_WARN_ON(IS_DGFX(xe_bo_device(bo)) && !is_lmem); 66 + 67 + /* FIXME: I don't think the PPAT handling is correct for MTL */ 68 + 69 + if (level != XE_CACHE_NONE) 70 + pde |= PPAT_CACHED_PDE; 71 + else 72 + pde |= PPAT_UNCACHED; 73 + 74 + return pde; 75 + } 76 + 77 + static dma_addr_t vma_addr(struct xe_vma *vma, u64 offset, 78 + size_t page_size, bool *is_lmem) 79 + { 80 + if (xe_vma_is_userptr(vma)) { 81 + struct xe_res_cursor cur; 82 + u64 page; 83 + 84 + *is_lmem = false; 85 + page = offset >> PAGE_SHIFT; 86 + offset &= (PAGE_SIZE - 1); 87 + 88 + xe_res_first_sg(vma->userptr.sg, page << PAGE_SHIFT, page_size, 89 + &cur); 90 + return xe_res_dma(&cur) + offset; 91 + } else { 92 + return xe_bo_addr(vma->bo, offset, page_size, is_lmem); 93 + } 94 + } 95 + 96 + static u64 __gen8_pte_encode(u64 pte, enum xe_cache_level cache, u32 flags, 97 + u32 pt_level) 98 + { 99 + pte |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW; 100 + 101 + if (unlikely(flags & PTE_READ_ONLY)) 102 + pte &= ~GEN8_PAGE_RW; 103 + 104 + /* FIXME: I don't think the PPAT handling is correct for MTL */ 105 + 106 + switch (cache) { 107 + case XE_CACHE_NONE: 108 + pte |= PPAT_UNCACHED; 109 + break; 110 + case XE_CACHE_WT: 111 + pte |= PPAT_DISPLAY_ELLC; 112 + break; 113 + default: 114 + pte |= PPAT_CACHED; 115 + break; 116 + } 117 + 118 + if (pt_level == 1) 119 + pte |= GEN8_PDE_PS_2M; 120 + else if (pt_level == 2) 121 + pte |= GEN8_PDPE_PS_1G; 122 + 123 + /* XXX: Does hw support 1 GiB pages? */ 124 + XE_BUG_ON(pt_level > 2); 125 + 126 + return pte; 127 + } 128 + 129 + /** 130 + * gen8_pte_encode() - Encode a page-table entry pointing to memory. 131 + * @vma: The vma representing the memory to point to. 132 + * @bo: If @vma is NULL, representing the memory to point to. 133 + * @offset: The offset into @vma or @bo. 134 + * @cache: The cache level indicating 135 + * @flags: Currently only supports PTE_READ_ONLY for read-only access. 136 + * @pt_level: The page-table level of the page-table into which the entry 137 + * is to be inserted. 138 + * 139 + * TODO: Rename. 140 + * 141 + * Return: An encoded page-table entry. No errors. 142 + */ 143 + u64 gen8_pte_encode(struct xe_vma *vma, struct xe_bo *bo, 144 + u64 offset, enum xe_cache_level cache, 145 + u32 flags, u32 pt_level) 146 + { 147 + u64 pte; 148 + bool is_vram; 149 + 150 + if (vma) 151 + pte = vma_addr(vma, offset, GEN8_PAGE_SIZE, &is_vram); 152 + else 153 + pte = xe_bo_addr(bo, offset, GEN8_PAGE_SIZE, &is_vram); 154 + 155 + if (is_vram) { 156 + pte |= GEN12_PPGTT_PTE_LM; 157 + if (vma && vma->use_atomic_access_pte_bit) 158 + pte |= GEN12_USM_PPGTT_PTE_AE; 159 + } 160 + 161 + return __gen8_pte_encode(pte, cache, flags, pt_level); 162 + } 163 + 164 + static u64 __xe_pt_empty_pte(struct xe_gt *gt, struct xe_vm *vm, 165 + unsigned int level) 166 + { 167 + u8 id = gt->info.id; 168 + 169 + XE_BUG_ON(xe_gt_is_media_type(gt)); 170 + 171 + if (!vm->scratch_bo[id]) 172 + return 0; 173 + 174 + if (level == 0) { 175 + u64 empty = gen8_pte_encode(NULL, vm->scratch_bo[id], 0, 176 + XE_CACHE_WB, 0, 0); 177 + if (vm->flags & XE_VM_FLAGS_64K) 178 + empty |= GEN12_PTE_PS64; 179 + 180 + return empty; 181 + } else { 182 + return gen8_pde_encode(vm->scratch_pt[id][level - 1]->bo, 0, 183 + XE_CACHE_WB); 184 + } 185 + } 186 + 187 + /** 188 + * xe_pt_create() - Create a page-table. 189 + * @vm: The vm to create for. 190 + * @gt: The gt to create for. 191 + * @level: The page-table level. 192 + * 193 + * Allocate and initialize a single struct xe_pt metadata structure. Also 194 + * create the corresponding page-table bo, but don't initialize it. If the 195 + * level is grater than zero, then it's assumed to be a directory page- 196 + * table and the directory structure is also allocated and initialized to 197 + * NULL pointers. 198 + * 199 + * Return: A valid struct xe_pt pointer on success, Pointer error code on 200 + * error. 201 + */ 202 + struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_gt *gt, 203 + unsigned int level) 204 + { 205 + struct xe_pt *pt; 206 + struct xe_bo *bo; 207 + size_t size; 208 + int err; 209 + 210 + size = !level ? sizeof(struct xe_pt) : sizeof(struct xe_pt_dir) + 211 + GEN8_PDES * sizeof(struct xe_ptw *); 212 + pt = kzalloc(size, GFP_KERNEL); 213 + if (!pt) 214 + return ERR_PTR(-ENOMEM); 215 + 216 + bo = xe_bo_create_pin_map(vm->xe, gt, vm, SZ_4K, 217 + ttm_bo_type_kernel, 218 + XE_BO_CREATE_VRAM_IF_DGFX(gt) | 219 + XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT | 220 + XE_BO_CREATE_PINNED_BIT); 221 + if (IS_ERR(bo)) { 222 + err = PTR_ERR(bo); 223 + goto err_kfree; 224 + } 225 + pt->bo = bo; 226 + pt->level = level; 227 + pt->base.dir = level ? &as_xe_pt_dir(pt)->dir : NULL; 228 + 229 + XE_BUG_ON(level > XE_VM_MAX_LEVEL); 230 + 231 + return pt; 232 + 233 + err_kfree: 234 + kfree(pt); 235 + return ERR_PTR(err); 236 + } 237 + 238 + /** 239 + * xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero 240 + * entries. 241 + * @gt: The gt the scratch pagetable of which to use. 242 + * @vm: The vm we populate for. 243 + * @pt: The pagetable the bo of which to initialize. 244 + * 245 + * Populate the page-table bo of @pt with entries pointing into the gt's 246 + * scratch page-table tree if any. Otherwise populate with zeros. 247 + */ 248 + void xe_pt_populate_empty(struct xe_gt *gt, struct xe_vm *vm, 249 + struct xe_pt *pt) 250 + { 251 + struct iosys_map *map = &pt->bo->vmap; 252 + u64 empty; 253 + int i; 254 + 255 + XE_BUG_ON(xe_gt_is_media_type(gt)); 256 + 257 + if (!vm->scratch_bo[gt->info.id]) { 258 + /* 259 + * FIXME: Some memory is allocated already allocated to zero? 260 + * Find out which memory that is and avoid this memset... 261 + */ 262 + xe_map_memset(vm->xe, map, 0, 0, SZ_4K); 263 + } else { 264 + empty = __xe_pt_empty_pte(gt, vm, pt->level); 265 + for (i = 0; i < GEN8_PDES; i++) 266 + xe_pt_write(vm->xe, map, i, empty); 267 + } 268 + } 269 + 270 + /** 271 + * xe_pt_shift() - Return the ilog2 value of the size of the address range of 272 + * a page-table at a certain level. 273 + * @level: The level. 274 + * 275 + * Return: The ilog2 value of the size of the address range of a page-table 276 + * at level @level. 277 + */ 278 + unsigned int xe_pt_shift(unsigned int level) 279 + { 280 + return GEN8_PTE_SHIFT + GEN8_PDE_SHIFT * level; 281 + } 282 + 283 + /** 284 + * xe_pt_destroy() - Destroy a page-table tree. 285 + * @pt: The root of the page-table tree to destroy. 286 + * @flags: vm flags. Currently unused. 287 + * @deferred: List head of lockless list for deferred putting. NULL for 288 + * immediate putting. 289 + * 290 + * Puts the page-table bo, recursively calls xe_pt_destroy on all children 291 + * and finally frees @pt. TODO: Can we remove the @flags argument? 292 + */ 293 + void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred) 294 + { 295 + int i; 296 + 297 + if (!pt) 298 + return; 299 + 300 + XE_BUG_ON(!list_empty(&pt->bo->vmas)); 301 + xe_bo_unpin(pt->bo); 302 + xe_bo_put_deferred(pt->bo, deferred); 303 + 304 + if (pt->level > 0 && pt->num_live) { 305 + struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); 306 + 307 + for (i = 0; i < GEN8_PDES; i++) { 308 + if (xe_pt_entry(pt_dir, i)) 309 + xe_pt_destroy(xe_pt_entry(pt_dir, i), flags, 310 + deferred); 311 + } 312 + } 313 + kfree(pt); 314 + } 315 + 316 + /** 317 + * xe_pt_create_scratch() - Setup a scratch memory pagetable tree for the 318 + * given gt and vm. 319 + * @xe: xe device. 320 + * @gt: gt to set up for. 321 + * @vm: vm to set up for. 322 + * 323 + * Sets up a pagetable tree with one page-table per level and a single 324 + * leaf bo. All pagetable entries point to the single page-table or, 325 + * for L0, the single bo one level below. 326 + * 327 + * Return: 0 on success, negative error code on error. 328 + */ 329 + int xe_pt_create_scratch(struct xe_device *xe, struct xe_gt *gt, 330 + struct xe_vm *vm) 331 + { 332 + u8 id = gt->info.id; 333 + int i; 334 + 335 + vm->scratch_bo[id] = xe_bo_create(xe, gt, vm, SZ_4K, 336 + ttm_bo_type_kernel, 337 + XE_BO_CREATE_VRAM_IF_DGFX(gt) | 338 + XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT | 339 + XE_BO_CREATE_PINNED_BIT); 340 + if (IS_ERR(vm->scratch_bo[id])) 341 + return PTR_ERR(vm->scratch_bo[id]); 342 + xe_bo_pin(vm->scratch_bo[id]); 343 + 344 + for (i = 0; i < vm->pt_root[id]->level; i++) { 345 + vm->scratch_pt[id][i] = xe_pt_create(vm, gt, i); 346 + if (IS_ERR(vm->scratch_pt[id][i])) 347 + return PTR_ERR(vm->scratch_pt[id][i]); 348 + 349 + xe_pt_populate_empty(gt, vm, vm->scratch_pt[id][i]); 350 + } 351 + 352 + return 0; 353 + } 354 + 355 + /** 356 + * DOC: Pagetable building 357 + * 358 + * Below we use the term "page-table" for both page-directories, containing 359 + * pointers to lower level page-directories or page-tables, and level 0 360 + * page-tables that contain only page-table-entries pointing to memory pages. 361 + * 362 + * When inserting an address range in an already existing page-table tree 363 + * there will typically be a set of page-tables that are shared with other 364 + * address ranges, and a set that are private to this address range. 365 + * The set of shared page-tables can be at most two per level, 366 + * and those can't be updated immediately because the entries of those 367 + * page-tables may still be in use by the gpu for other mappings. Therefore 368 + * when inserting entries into those, we instead stage those insertions by 369 + * adding insertion data into struct xe_vm_pgtable_update structures. This 370 + * data, (subtrees for the cpu and page-table-entries for the gpu) is then 371 + * added in a separate commit step. CPU-data is committed while still under the 372 + * vm lock, the object lock and for userptr, the notifier lock in read mode. 373 + * The GPU async data is committed either by the GPU or CPU after fulfilling 374 + * relevant dependencies. 375 + * For non-shared page-tables (and, in fact, for shared ones that aren't 376 + * existing at the time of staging), we add the data in-place without the 377 + * special update structures. This private part of the page-table tree will 378 + * remain disconnected from the vm page-table tree until data is committed to 379 + * the shared page tables of the vm tree in the commit phase. 380 + */ 381 + 382 + struct xe_pt_update { 383 + /** @update: The update structure we're building for this parent. */ 384 + struct xe_vm_pgtable_update *update; 385 + /** @parent: The parent. Used to detect a parent change. */ 386 + struct xe_pt *parent; 387 + /** @preexisting: Whether the parent was pre-existing or allocated */ 388 + bool preexisting; 389 + }; 390 + 391 + struct xe_pt_stage_bind_walk { 392 + /** base: The base class. */ 393 + struct xe_pt_walk base; 394 + 395 + /* Input parameters for the walk */ 396 + /** @vm: The vm we're building for. */ 397 + struct xe_vm *vm; 398 + /** @gt: The gt we're building for. */ 399 + struct xe_gt *gt; 400 + /** @cache: Desired cache level for the ptes */ 401 + enum xe_cache_level cache; 402 + /** @default_pte: PTE flag only template. No address is associated */ 403 + u64 default_pte; 404 + /** @dma_offset: DMA offset to add to the PTE. */ 405 + u64 dma_offset; 406 + /** 407 + * @needs_64k: This address range enforces 64K alignment and 408 + * granularity. 409 + */ 410 + bool needs_64K; 411 + /** 412 + * @pte_flags: Flags determining PTE setup. These are not flags 413 + * encoded directly in the PTE. See @default_pte for those. 414 + */ 415 + u32 pte_flags; 416 + 417 + /* Also input, but is updated during the walk*/ 418 + /** @curs: The DMA address cursor. */ 419 + struct xe_res_cursor *curs; 420 + /** @va_curs_start: The Virtual address coresponding to @curs->start */ 421 + u64 va_curs_start; 422 + 423 + /* Output */ 424 + struct xe_walk_update { 425 + /** @wupd.entries: Caller provided storage. */ 426 + struct xe_vm_pgtable_update *entries; 427 + /** @wupd.num_used_entries: Number of update @entries used. */ 428 + unsigned int num_used_entries; 429 + /** @wupd.updates: Tracks the update entry at a given level */ 430 + struct xe_pt_update updates[XE_VM_MAX_LEVEL + 1]; 431 + } wupd; 432 + 433 + /* Walk state */ 434 + /** 435 + * @l0_end_addr: The end address of the current l0 leaf. Used for 436 + * 64K granularity detection. 437 + */ 438 + u64 l0_end_addr; 439 + /** @addr_64K: The start address of the current 64K chunk. */ 440 + u64 addr_64K; 441 + /** @found_64: Whether @add_64K actually points to a 64K chunk. */ 442 + bool found_64K; 443 + }; 444 + 445 + static int 446 + xe_pt_new_shared(struct xe_walk_update *wupd, struct xe_pt *parent, 447 + pgoff_t offset, bool alloc_entries) 448 + { 449 + struct xe_pt_update *upd = &wupd->updates[parent->level]; 450 + struct xe_vm_pgtable_update *entry; 451 + 452 + /* 453 + * For *each level*, we could only have one active 454 + * struct xt_pt_update at any one time. Once we move on to a 455 + * new parent and page-directory, the old one is complete, and 456 + * updates are either already stored in the build tree or in 457 + * @wupd->entries 458 + */ 459 + if (likely(upd->parent == parent)) 460 + return 0; 461 + 462 + upd->parent = parent; 463 + upd->preexisting = true; 464 + 465 + if (wupd->num_used_entries == XE_VM_MAX_LEVEL * 2 + 1) 466 + return -EINVAL; 467 + 468 + entry = wupd->entries + wupd->num_used_entries++; 469 + upd->update = entry; 470 + entry->ofs = offset; 471 + entry->pt_bo = parent->bo; 472 + entry->pt = parent; 473 + entry->flags = 0; 474 + entry->qwords = 0; 475 + 476 + if (alloc_entries) { 477 + entry->pt_entries = kmalloc_array(GEN8_PDES, 478 + sizeof(*entry->pt_entries), 479 + GFP_KERNEL); 480 + if (!entry->pt_entries) 481 + return -ENOMEM; 482 + } 483 + 484 + return 0; 485 + } 486 + 487 + /* 488 + * NOTE: This is a very frequently called function so we allow ourselves 489 + * to annotate (using branch prediction hints) the fastpath of updating a 490 + * non-pre-existing pagetable with leaf ptes. 491 + */ 492 + static int 493 + xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent, 494 + pgoff_t offset, struct xe_pt *xe_child, u64 pte) 495 + { 496 + struct xe_pt_update *upd = &xe_walk->wupd.updates[parent->level]; 497 + struct xe_pt_update *child_upd = xe_child ? 498 + &xe_walk->wupd.updates[xe_child->level] : NULL; 499 + int ret; 500 + 501 + ret = xe_pt_new_shared(&xe_walk->wupd, parent, offset, true); 502 + if (unlikely(ret)) 503 + return ret; 504 + 505 + /* 506 + * Register this new pagetable so that it won't be recognized as 507 + * a shared pagetable by a subsequent insertion. 508 + */ 509 + if (unlikely(child_upd)) { 510 + child_upd->update = NULL; 511 + child_upd->parent = xe_child; 512 + child_upd->preexisting = false; 513 + } 514 + 515 + if (likely(!upd->preexisting)) { 516 + /* Continue building a non-connected subtree. */ 517 + struct iosys_map *map = &parent->bo->vmap; 518 + 519 + if (unlikely(xe_child)) 520 + parent->base.dir->entries[offset] = &xe_child->base; 521 + 522 + xe_pt_write(xe_walk->vm->xe, map, offset, pte); 523 + parent->num_live++; 524 + } else { 525 + /* Shared pt. Stage update. */ 526 + unsigned int idx; 527 + struct xe_vm_pgtable_update *entry = upd->update; 528 + 529 + idx = offset - entry->ofs; 530 + entry->pt_entries[idx].pt = xe_child; 531 + entry->pt_entries[idx].pte = pte; 532 + entry->qwords++; 533 + } 534 + 535 + return 0; 536 + } 537 + 538 + static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level, 539 + struct xe_pt_stage_bind_walk *xe_walk) 540 + { 541 + u64 size, dma; 542 + 543 + /* Does the virtual range requested cover a huge pte? */ 544 + if (!xe_pt_covers(addr, next, level, &xe_walk->base)) 545 + return false; 546 + 547 + /* Does the DMA segment cover the whole pte? */ 548 + if (next - xe_walk->va_curs_start > xe_walk->curs->size) 549 + return false; 550 + 551 + /* Is the DMA address huge PTE size aligned? */ 552 + size = next - addr; 553 + dma = addr - xe_walk->va_curs_start + xe_res_dma(xe_walk->curs); 554 + 555 + return IS_ALIGNED(dma, size); 556 + } 557 + 558 + /* 559 + * Scan the requested mapping to check whether it can be done entirely 560 + * with 64K PTEs. 561 + */ 562 + static bool 563 + xe_pt_scan_64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) 564 + { 565 + struct xe_res_cursor curs = *xe_walk->curs; 566 + 567 + if (!IS_ALIGNED(addr, SZ_64K)) 568 + return false; 569 + 570 + if (next > xe_walk->l0_end_addr) 571 + return false; 572 + 573 + xe_res_next(&curs, addr - xe_walk->va_curs_start); 574 + for (; addr < next; addr += SZ_64K) { 575 + if (!IS_ALIGNED(xe_res_dma(&curs), SZ_64K) || curs.size < SZ_64K) 576 + return false; 577 + 578 + xe_res_next(&curs, SZ_64K); 579 + } 580 + 581 + return addr == next; 582 + } 583 + 584 + /* 585 + * For non-compact "normal" 4K level-0 pagetables, we want to try to group 586 + * addresses together in 64K-contigous regions to add a 64K TLB hint for the 587 + * device to the PTE. 588 + * This function determines whether the address is part of such a 589 + * segment. For VRAM in normal pagetables, this is strictly necessary on 590 + * some devices. 591 + */ 592 + static bool 593 + xe_pt_is_pte_ps64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) 594 + { 595 + /* Address is within an already found 64k region */ 596 + if (xe_walk->found_64K && addr - xe_walk->addr_64K < SZ_64K) 597 + return true; 598 + 599 + xe_walk->found_64K = xe_pt_scan_64K(addr, addr + SZ_64K, xe_walk); 600 + xe_walk->addr_64K = addr; 601 + 602 + return xe_walk->found_64K; 603 + } 604 + 605 + static int 606 + xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, 607 + unsigned int level, u64 addr, u64 next, 608 + struct xe_ptw **child, 609 + enum page_walk_action *action, 610 + struct xe_pt_walk *walk) 611 + { 612 + struct xe_pt_stage_bind_walk *xe_walk = 613 + container_of(walk, typeof(*xe_walk), base); 614 + struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base); 615 + struct xe_pt *xe_child; 616 + bool covers; 617 + int ret = 0; 618 + u64 pte; 619 + 620 + /* Is this a leaf entry ?*/ 621 + if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) { 622 + struct xe_res_cursor *curs = xe_walk->curs; 623 + 624 + XE_WARN_ON(xe_walk->va_curs_start != addr); 625 + 626 + pte = __gen8_pte_encode(xe_res_dma(curs) + xe_walk->dma_offset, 627 + xe_walk->cache, xe_walk->pte_flags, 628 + level); 629 + pte |= xe_walk->default_pte; 630 + 631 + /* 632 + * Set the GEN12_PTE_PS64 hint if possible, otherwise if 633 + * this device *requires* 64K PTE size for VRAM, fail. 634 + */ 635 + if (level == 0 && !xe_parent->is_compact) { 636 + if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) 637 + pte |= GEN12_PTE_PS64; 638 + else if (XE_WARN_ON(xe_walk->needs_64K)) 639 + return -EINVAL; 640 + } 641 + 642 + ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, NULL, pte); 643 + if (unlikely(ret)) 644 + return ret; 645 + 646 + xe_res_next(curs, next - addr); 647 + xe_walk->va_curs_start = next; 648 + *action = ACTION_CONTINUE; 649 + 650 + return ret; 651 + } 652 + 653 + /* 654 + * Descending to lower level. Determine if we need to allocate a 655 + * new page table or -directory, which we do if there is no 656 + * previous one or there is one we can completely replace. 657 + */ 658 + if (level == 1) { 659 + walk->shifts = xe_normal_pt_shifts; 660 + xe_walk->l0_end_addr = next; 661 + } 662 + 663 + covers = xe_pt_covers(addr, next, level, &xe_walk->base); 664 + if (covers || !*child) { 665 + u64 flags = 0; 666 + 667 + xe_child = xe_pt_create(xe_walk->vm, xe_walk->gt, level - 1); 668 + if (IS_ERR(xe_child)) 669 + return PTR_ERR(xe_child); 670 + 671 + xe_pt_set_addr(xe_child, 672 + round_down(addr, 1ull << walk->shifts[level])); 673 + 674 + if (!covers) 675 + xe_pt_populate_empty(xe_walk->gt, xe_walk->vm, xe_child); 676 + 677 + *child = &xe_child->base; 678 + 679 + /* 680 + * Prefer the compact pagetable layout for L0 if possible. 681 + * TODO: Suballocate the pt bo to avoid wasting a lot of 682 + * memory. 683 + */ 684 + if (GRAPHICS_VERx100(xe_walk->gt->xe) >= 1250 && level == 1 && 685 + covers && xe_pt_scan_64K(addr, next, xe_walk)) { 686 + walk->shifts = xe_compact_pt_shifts; 687 + flags |= GEN12_PDE_64K; 688 + xe_child->is_compact = true; 689 + } 690 + 691 + pte = gen8_pde_encode(xe_child->bo, 0, xe_walk->cache) | flags; 692 + ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child, 693 + pte); 694 + } 695 + 696 + *action = ACTION_SUBTREE; 697 + return ret; 698 + } 699 + 700 + static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = { 701 + .pt_entry = xe_pt_stage_bind_entry, 702 + }; 703 + 704 + /** 705 + * xe_pt_stage_bind() - Build a disconnected page-table tree for a given address 706 + * range. 707 + * @gt: The gt we're building for. 708 + * @vma: The vma indicating the address range. 709 + * @entries: Storage for the update entries used for connecting the tree to 710 + * the main tree at commit time. 711 + * @num_entries: On output contains the number of @entries used. 712 + * 713 + * This function builds a disconnected page-table tree for a given address 714 + * range. The tree is connected to the main vm tree for the gpu using 715 + * xe_migrate_update_pgtables() and for the cpu using xe_pt_commit_bind(). 716 + * The function builds xe_vm_pgtable_update structures for already existing 717 + * shared page-tables, and non-existing shared and non-shared page-tables 718 + * are built and populated directly. 719 + * 720 + * Return 0 on success, negative error code on error. 721 + */ 722 + static int 723 + xe_pt_stage_bind(struct xe_gt *gt, struct xe_vma *vma, 724 + struct xe_vm_pgtable_update *entries, u32 *num_entries) 725 + { 726 + struct xe_bo *bo = vma->bo; 727 + bool is_vram = !xe_vma_is_userptr(vma) && bo && xe_bo_is_vram(bo); 728 + struct xe_res_cursor curs; 729 + struct xe_pt_stage_bind_walk xe_walk = { 730 + .base = { 731 + .ops = &xe_pt_stage_bind_ops, 732 + .shifts = xe_normal_pt_shifts, 733 + .max_level = XE_PT_HIGHEST_LEVEL, 734 + }, 735 + .vm = vma->vm, 736 + .gt = gt, 737 + .curs = &curs, 738 + .va_curs_start = vma->start, 739 + .pte_flags = vma->pte_flags, 740 + .wupd.entries = entries, 741 + .needs_64K = (vma->vm->flags & XE_VM_FLAGS_64K) && is_vram, 742 + }; 743 + struct xe_pt *pt = vma->vm->pt_root[gt->info.id]; 744 + int ret; 745 + 746 + if (is_vram) { 747 + xe_walk.default_pte = GEN12_PPGTT_PTE_LM; 748 + if (vma && vma->use_atomic_access_pte_bit) 749 + xe_walk.default_pte |= GEN12_USM_PPGTT_PTE_AE; 750 + xe_walk.dma_offset = gt->mem.vram.io_start - 751 + gt_to_xe(gt)->mem.vram.io_start; 752 + xe_walk.cache = XE_CACHE_WB; 753 + } else { 754 + if (!xe_vma_is_userptr(vma) && bo->flags & XE_BO_SCANOUT_BIT) 755 + xe_walk.cache = XE_CACHE_WT; 756 + else 757 + xe_walk.cache = XE_CACHE_WB; 758 + } 759 + 760 + xe_bo_assert_held(bo); 761 + if (xe_vma_is_userptr(vma)) 762 + xe_res_first_sg(vma->userptr.sg, 0, vma->end - vma->start + 1, 763 + &curs); 764 + else if (xe_bo_is_vram(bo)) 765 + xe_res_first(bo->ttm.resource, vma->bo_offset, 766 + vma->end - vma->start + 1, &curs); 767 + else 768 + xe_res_first_sg(xe_bo_get_sg(bo), vma->bo_offset, 769 + vma->end - vma->start + 1, &curs); 770 + 771 + ret = xe_pt_walk_range(&pt->base, pt->level, vma->start, vma->end + 1, 772 + &xe_walk.base); 773 + 774 + *num_entries = xe_walk.wupd.num_used_entries; 775 + return ret; 776 + } 777 + 778 + /** 779 + * xe_pt_nonshared_offsets() - Determine the non-shared entry offsets of a 780 + * shared pagetable. 781 + * @addr: The start address within the non-shared pagetable. 782 + * @end: The end address within the non-shared pagetable. 783 + * @level: The level of the non-shared pagetable. 784 + * @walk: Walk info. The function adjusts the walk action. 785 + * @action: next action to perform (see enum page_walk_action) 786 + * @offset: Ignored on input, First non-shared entry on output. 787 + * @end_offset: Ignored on input, Last non-shared entry + 1 on output. 788 + * 789 + * A non-shared page-table has some entries that belong to the address range 790 + * and others that don't. This function determines the entries that belong 791 + * fully to the address range. Depending on level, some entries may 792 + * partially belong to the address range (that can't happen at level 0). 793 + * The function detects that and adjust those offsets to not include those 794 + * partial entries. Iff it does detect partial entries, we know that there must 795 + * be shared page tables also at lower levels, so it adjusts the walk action 796 + * accordingly. 797 + * 798 + * Return: true if there were non-shared entries, false otherwise. 799 + */ 800 + static bool xe_pt_nonshared_offsets(u64 addr, u64 end, unsigned int level, 801 + struct xe_pt_walk *walk, 802 + enum page_walk_action *action, 803 + pgoff_t *offset, pgoff_t *end_offset) 804 + { 805 + u64 size = 1ull << walk->shifts[level]; 806 + 807 + *offset = xe_pt_offset(addr, level, walk); 808 + *end_offset = xe_pt_num_entries(addr, end, level, walk) + *offset; 809 + 810 + if (!level) 811 + return true; 812 + 813 + /* 814 + * If addr or next are not size aligned, there are shared pts at lower 815 + * level, so in that case traverse down the subtree 816 + */ 817 + *action = ACTION_CONTINUE; 818 + if (!IS_ALIGNED(addr, size)) { 819 + *action = ACTION_SUBTREE; 820 + (*offset)++; 821 + } 822 + 823 + if (!IS_ALIGNED(end, size)) { 824 + *action = ACTION_SUBTREE; 825 + (*end_offset)--; 826 + } 827 + 828 + return *end_offset > *offset; 829 + } 830 + 831 + struct xe_pt_zap_ptes_walk { 832 + /** @base: The walk base-class */ 833 + struct xe_pt_walk base; 834 + 835 + /* Input parameters for the walk */ 836 + /** @gt: The gt we're building for */ 837 + struct xe_gt *gt; 838 + 839 + /* Output */ 840 + /** @needs_invalidate: Whether we need to invalidate TLB*/ 841 + bool needs_invalidate; 842 + }; 843 + 844 + static int xe_pt_zap_ptes_entry(struct xe_ptw *parent, pgoff_t offset, 845 + unsigned int level, u64 addr, u64 next, 846 + struct xe_ptw **child, 847 + enum page_walk_action *action, 848 + struct xe_pt_walk *walk) 849 + { 850 + struct xe_pt_zap_ptes_walk *xe_walk = 851 + container_of(walk, typeof(*xe_walk), base); 852 + struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); 853 + pgoff_t end_offset; 854 + 855 + XE_BUG_ON(!*child); 856 + XE_BUG_ON(!level && xe_child->is_compact); 857 + 858 + /* 859 + * Note that we're called from an entry callback, and we're dealing 860 + * with the child of that entry rather than the parent, so need to 861 + * adjust level down. 862 + */ 863 + if (xe_pt_nonshared_offsets(addr, next, --level, walk, action, &offset, 864 + &end_offset)) { 865 + xe_map_memset(gt_to_xe(xe_walk->gt), &xe_child->bo->vmap, 866 + offset * sizeof(u64), 0, 867 + (end_offset - offset) * sizeof(u64)); 868 + xe_walk->needs_invalidate = true; 869 + } 870 + 871 + return 0; 872 + } 873 + 874 + static const struct xe_pt_walk_ops xe_pt_zap_ptes_ops = { 875 + .pt_entry = xe_pt_zap_ptes_entry, 876 + }; 877 + 878 + /** 879 + * xe_pt_zap_ptes() - Zap (zero) gpu ptes of an address range 880 + * @gt: The gt we're zapping for. 881 + * @vma: GPU VMA detailing address range. 882 + * 883 + * Eviction and Userptr invalidation needs to be able to zap the 884 + * gpu ptes of a given address range in pagefaulting mode. 885 + * In order to be able to do that, that function needs access to the shared 886 + * page-table entrieaso it can either clear the leaf PTEs or 887 + * clear the pointers to lower-level page-tables. The caller is required 888 + * to hold the necessary locks to ensure neither the page-table connectivity 889 + * nor the page-table entries of the range is updated from under us. 890 + * 891 + * Return: Whether ptes were actually updated and a TLB invalidation is 892 + * required. 893 + */ 894 + bool xe_pt_zap_ptes(struct xe_gt *gt, struct xe_vma *vma) 895 + { 896 + struct xe_pt_zap_ptes_walk xe_walk = { 897 + .base = { 898 + .ops = &xe_pt_zap_ptes_ops, 899 + .shifts = xe_normal_pt_shifts, 900 + .max_level = XE_PT_HIGHEST_LEVEL, 901 + }, 902 + .gt = gt, 903 + }; 904 + struct xe_pt *pt = vma->vm->pt_root[gt->info.id]; 905 + 906 + if (!(vma->gt_present & BIT(gt->info.id))) 907 + return false; 908 + 909 + (void)xe_pt_walk_shared(&pt->base, pt->level, vma->start, vma->end + 1, 910 + &xe_walk.base); 911 + 912 + return xe_walk.needs_invalidate; 913 + } 914 + 915 + static void 916 + xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_gt *gt, 917 + struct iosys_map *map, void *data, 918 + u32 qword_ofs, u32 num_qwords, 919 + const struct xe_vm_pgtable_update *update) 920 + { 921 + struct xe_pt_entry *ptes = update->pt_entries; 922 + u64 *ptr = data; 923 + u32 i; 924 + 925 + XE_BUG_ON(xe_gt_is_media_type(gt)); 926 + 927 + for (i = 0; i < num_qwords; i++) { 928 + if (map) 929 + xe_map_wr(gt_to_xe(gt), map, (qword_ofs + i) * 930 + sizeof(u64), u64, ptes[i].pte); 931 + else 932 + ptr[i] = ptes[i].pte; 933 + } 934 + } 935 + 936 + static void xe_pt_abort_bind(struct xe_vma *vma, 937 + struct xe_vm_pgtable_update *entries, 938 + u32 num_entries) 939 + { 940 + u32 i, j; 941 + 942 + for (i = 0; i < num_entries; i++) { 943 + if (!entries[i].pt_entries) 944 + continue; 945 + 946 + for (j = 0; j < entries[i].qwords; j++) 947 + xe_pt_destroy(entries[i].pt_entries[j].pt, vma->vm->flags, NULL); 948 + kfree(entries[i].pt_entries); 949 + } 950 + } 951 + 952 + static void xe_pt_commit_locks_assert(struct xe_vma *vma) 953 + { 954 + struct xe_vm *vm = vma->vm; 955 + 956 + lockdep_assert_held(&vm->lock); 957 + 958 + if (xe_vma_is_userptr(vma)) 959 + lockdep_assert_held_read(&vm->userptr.notifier_lock); 960 + else 961 + dma_resv_assert_held(vma->bo->ttm.base.resv); 962 + 963 + dma_resv_assert_held(&vm->resv); 964 + } 965 + 966 + static void xe_pt_commit_bind(struct xe_vma *vma, 967 + struct xe_vm_pgtable_update *entries, 968 + u32 num_entries, bool rebind, 969 + struct llist_head *deferred) 970 + { 971 + u32 i, j; 972 + 973 + xe_pt_commit_locks_assert(vma); 974 + 975 + for (i = 0; i < num_entries; i++) { 976 + struct xe_pt *pt = entries[i].pt; 977 + struct xe_pt_dir *pt_dir; 978 + 979 + if (!rebind) 980 + pt->num_live += entries[i].qwords; 981 + 982 + if (!pt->level) { 983 + kfree(entries[i].pt_entries); 984 + continue; 985 + } 986 + 987 + pt_dir = as_xe_pt_dir(pt); 988 + for (j = 0; j < entries[i].qwords; j++) { 989 + u32 j_ = j + entries[i].ofs; 990 + struct xe_pt *newpte = entries[i].pt_entries[j].pt; 991 + 992 + if (xe_pt_entry(pt_dir, j_)) 993 + xe_pt_destroy(xe_pt_entry(pt_dir, j_), 994 + vma->vm->flags, deferred); 995 + 996 + pt_dir->dir.entries[j_] = &newpte->base; 997 + } 998 + kfree(entries[i].pt_entries); 999 + } 1000 + } 1001 + 1002 + static int 1003 + xe_pt_prepare_bind(struct xe_gt *gt, struct xe_vma *vma, 1004 + struct xe_vm_pgtable_update *entries, u32 *num_entries, 1005 + bool rebind) 1006 + { 1007 + int err; 1008 + 1009 + *num_entries = 0; 1010 + err = xe_pt_stage_bind(gt, vma, entries, num_entries); 1011 + if (!err) 1012 + BUG_ON(!*num_entries); 1013 + else /* abort! */ 1014 + xe_pt_abort_bind(vma, entries, *num_entries); 1015 + 1016 + return err; 1017 + } 1018 + 1019 + static void xe_vm_dbg_print_entries(struct xe_device *xe, 1020 + const struct xe_vm_pgtable_update *entries, 1021 + unsigned int num_entries) 1022 + #if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) 1023 + { 1024 + unsigned int i; 1025 + 1026 + vm_dbg(&xe->drm, "%u entries to update\n", num_entries); 1027 + for (i = 0; i < num_entries; i++) { 1028 + const struct xe_vm_pgtable_update *entry = &entries[i]; 1029 + struct xe_pt *xe_pt = entry->pt; 1030 + u64 page_size = 1ull << xe_pt_shift(xe_pt->level); 1031 + u64 end; 1032 + u64 start; 1033 + 1034 + XE_BUG_ON(entry->pt->is_compact); 1035 + start = entry->ofs * page_size; 1036 + end = start + page_size * entry->qwords; 1037 + vm_dbg(&xe->drm, 1038 + "\t%u: Update level %u at (%u + %u) [%llx...%llx) f:%x\n", 1039 + i, xe_pt->level, entry->ofs, entry->qwords, 1040 + xe_pt_addr(xe_pt) + start, xe_pt_addr(xe_pt) + end, 0); 1041 + } 1042 + } 1043 + #else 1044 + {} 1045 + #endif 1046 + 1047 + #ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT 1048 + 1049 + static int xe_pt_userptr_inject_eagain(struct xe_vma *vma) 1050 + { 1051 + u32 divisor = vma->userptr.divisor ? vma->userptr.divisor : 2; 1052 + static u32 count; 1053 + 1054 + if (count++ % divisor == divisor - 1) { 1055 + struct xe_vm *vm = vma->vm; 1056 + 1057 + vma->userptr.divisor = divisor << 1; 1058 + spin_lock(&vm->userptr.invalidated_lock); 1059 + list_move_tail(&vma->userptr.invalidate_link, 1060 + &vm->userptr.invalidated); 1061 + spin_unlock(&vm->userptr.invalidated_lock); 1062 + return true; 1063 + } 1064 + 1065 + return false; 1066 + } 1067 + 1068 + #else 1069 + 1070 + static bool xe_pt_userptr_inject_eagain(struct xe_vma *vma) 1071 + { 1072 + return false; 1073 + } 1074 + 1075 + #endif 1076 + 1077 + /** 1078 + * struct xe_pt_migrate_pt_update - Callback argument for pre-commit callbacks 1079 + * @base: Base we derive from. 1080 + * @bind: Whether this is a bind or an unbind operation. A bind operation 1081 + * makes the pre-commit callback error with -EAGAIN if it detects a 1082 + * pending invalidation. 1083 + * @locked: Whether the pre-commit callback locked the userptr notifier lock 1084 + * and it needs unlocking. 1085 + */ 1086 + struct xe_pt_migrate_pt_update { 1087 + struct xe_migrate_pt_update base; 1088 + bool bind; 1089 + bool locked; 1090 + }; 1091 + 1092 + static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) 1093 + { 1094 + struct xe_pt_migrate_pt_update *userptr_update = 1095 + container_of(pt_update, typeof(*userptr_update), base); 1096 + struct xe_vma *vma = pt_update->vma; 1097 + unsigned long notifier_seq = vma->userptr.notifier_seq; 1098 + struct xe_vm *vm = vma->vm; 1099 + 1100 + userptr_update->locked = false; 1101 + 1102 + /* 1103 + * Wait until nobody is running the invalidation notifier, and 1104 + * since we're exiting the loop holding the notifier lock, 1105 + * nobody can proceed invalidating either. 1106 + * 1107 + * Note that we don't update the vma->userptr.notifier_seq since 1108 + * we don't update the userptr pages. 1109 + */ 1110 + do { 1111 + down_read(&vm->userptr.notifier_lock); 1112 + if (!mmu_interval_read_retry(&vma->userptr.notifier, 1113 + notifier_seq)) 1114 + break; 1115 + 1116 + up_read(&vm->userptr.notifier_lock); 1117 + 1118 + if (userptr_update->bind) 1119 + return -EAGAIN; 1120 + 1121 + notifier_seq = mmu_interval_read_begin(&vma->userptr.notifier); 1122 + } while (true); 1123 + 1124 + /* Inject errors to test_whether they are handled correctly */ 1125 + if (userptr_update->bind && xe_pt_userptr_inject_eagain(vma)) { 1126 + up_read(&vm->userptr.notifier_lock); 1127 + return -EAGAIN; 1128 + } 1129 + 1130 + userptr_update->locked = true; 1131 + 1132 + return 0; 1133 + } 1134 + 1135 + static const struct xe_migrate_pt_update_ops bind_ops = { 1136 + .populate = xe_vm_populate_pgtable, 1137 + }; 1138 + 1139 + static const struct xe_migrate_pt_update_ops userptr_bind_ops = { 1140 + .populate = xe_vm_populate_pgtable, 1141 + .pre_commit = xe_pt_userptr_pre_commit, 1142 + }; 1143 + 1144 + /** 1145 + * __xe_pt_bind_vma() - Build and connect a page-table tree for the vma 1146 + * address range. 1147 + * @gt: The gt to bind for. 1148 + * @vma: The vma to bind. 1149 + * @e: The engine with which to do pipelined page-table updates. 1150 + * @syncs: Entries to sync on before binding the built tree to the live vm tree. 1151 + * @num_syncs: Number of @sync entries. 1152 + * @rebind: Whether we're rebinding this vma to the same address range without 1153 + * an unbind in-between. 1154 + * 1155 + * This function builds a page-table tree (see xe_pt_stage_bind() for more 1156 + * information on page-table building), and the xe_vm_pgtable_update entries 1157 + * abstracting the operations needed to attach it to the main vm tree. It 1158 + * then takes the relevant locks and updates the metadata side of the main 1159 + * vm tree and submits the operations for pipelined attachment of the 1160 + * gpu page-table to the vm main tree, (which can be done either by the 1161 + * cpu and the GPU). 1162 + * 1163 + * Return: A valid dma-fence representing the pipelined attachment operation 1164 + * on success, an error pointer on error. 1165 + */ 1166 + struct dma_fence * 1167 + __xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e, 1168 + struct xe_sync_entry *syncs, u32 num_syncs, 1169 + bool rebind) 1170 + { 1171 + struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1]; 1172 + struct xe_pt_migrate_pt_update bind_pt_update = { 1173 + .base = { 1174 + .ops = xe_vma_is_userptr(vma) ? &userptr_bind_ops : &bind_ops, 1175 + .vma = vma, 1176 + }, 1177 + .bind = true, 1178 + }; 1179 + struct xe_vm *vm = vma->vm; 1180 + u32 num_entries; 1181 + struct dma_fence *fence; 1182 + int err; 1183 + 1184 + bind_pt_update.locked = false; 1185 + xe_bo_assert_held(vma->bo); 1186 + xe_vm_assert_held(vm); 1187 + XE_BUG_ON(xe_gt_is_media_type(gt)); 1188 + 1189 + vm_dbg(&vma->vm->xe->drm, 1190 + "Preparing bind, with range [%llx...%llx) engine %p.\n", 1191 + vma->start, vma->end, e); 1192 + 1193 + err = xe_pt_prepare_bind(gt, vma, entries, &num_entries, rebind); 1194 + if (err) 1195 + goto err; 1196 + XE_BUG_ON(num_entries > ARRAY_SIZE(entries)); 1197 + 1198 + xe_vm_dbg_print_entries(gt_to_xe(gt), entries, num_entries); 1199 + 1200 + fence = xe_migrate_update_pgtables(gt->migrate, 1201 + vm, vma->bo, 1202 + e ? e : vm->eng[gt->info.id], 1203 + entries, num_entries, 1204 + syncs, num_syncs, 1205 + &bind_pt_update.base); 1206 + if (!IS_ERR(fence)) { 1207 + LLIST_HEAD(deferred); 1208 + 1209 + /* add shared fence now for pagetable delayed destroy */ 1210 + dma_resv_add_fence(&vm->resv, fence, !rebind && 1211 + vma->last_munmap_rebind ? 1212 + DMA_RESV_USAGE_KERNEL : 1213 + DMA_RESV_USAGE_BOOKKEEP); 1214 + 1215 + if (!xe_vma_is_userptr(vma) && !vma->bo->vm) 1216 + dma_resv_add_fence(vma->bo->ttm.base.resv, fence, 1217 + DMA_RESV_USAGE_BOOKKEEP); 1218 + xe_pt_commit_bind(vma, entries, num_entries, rebind, 1219 + bind_pt_update.locked ? &deferred : NULL); 1220 + 1221 + /* This vma is live (again?) now */ 1222 + vma->gt_present |= BIT(gt->info.id); 1223 + 1224 + if (bind_pt_update.locked) { 1225 + vma->userptr.initial_bind = true; 1226 + up_read(&vm->userptr.notifier_lock); 1227 + xe_bo_put_commit(&deferred); 1228 + } 1229 + if (!rebind && vma->last_munmap_rebind && 1230 + xe_vm_in_compute_mode(vm)) 1231 + queue_work(vm->xe->ordered_wq, 1232 + &vm->preempt.rebind_work); 1233 + } else { 1234 + if (bind_pt_update.locked) 1235 + up_read(&vm->userptr.notifier_lock); 1236 + xe_pt_abort_bind(vma, entries, num_entries); 1237 + } 1238 + 1239 + return fence; 1240 + 1241 + err: 1242 + return ERR_PTR(err); 1243 + } 1244 + 1245 + struct xe_pt_stage_unbind_walk { 1246 + /** @base: The pagewalk base-class. */ 1247 + struct xe_pt_walk base; 1248 + 1249 + /* Input parameters for the walk */ 1250 + /** @gt: The gt we're unbinding from. */ 1251 + struct xe_gt *gt; 1252 + 1253 + /** 1254 + * @modified_start: Walk range start, modified to include any 1255 + * shared pagetables that we're the only user of and can thus 1256 + * treat as private. 1257 + */ 1258 + u64 modified_start; 1259 + /** @modified_end: Walk range start, modified like @modified_start. */ 1260 + u64 modified_end; 1261 + 1262 + /* Output */ 1263 + /* @wupd: Structure to track the page-table updates we're building */ 1264 + struct xe_walk_update wupd; 1265 + }; 1266 + 1267 + /* 1268 + * Check whether this range is the only one populating this pagetable, 1269 + * and in that case, update the walk range checks so that higher levels don't 1270 + * view us as a shared pagetable. 1271 + */ 1272 + static bool xe_pt_check_kill(u64 addr, u64 next, unsigned int level, 1273 + const struct xe_pt *child, 1274 + enum page_walk_action *action, 1275 + struct xe_pt_walk *walk) 1276 + { 1277 + struct xe_pt_stage_unbind_walk *xe_walk = 1278 + container_of(walk, typeof(*xe_walk), base); 1279 + unsigned int shift = walk->shifts[level]; 1280 + u64 size = 1ull << shift; 1281 + 1282 + if (IS_ALIGNED(addr, size) && IS_ALIGNED(next, size) && 1283 + ((next - addr) >> shift) == child->num_live) { 1284 + u64 size = 1ull << walk->shifts[level + 1]; 1285 + 1286 + *action = ACTION_CONTINUE; 1287 + 1288 + if (xe_walk->modified_start >= addr) 1289 + xe_walk->modified_start = round_down(addr, size); 1290 + if (xe_walk->modified_end <= next) 1291 + xe_walk->modified_end = round_up(next, size); 1292 + 1293 + return true; 1294 + } 1295 + 1296 + return false; 1297 + } 1298 + 1299 + static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, 1300 + unsigned int level, u64 addr, u64 next, 1301 + struct xe_ptw **child, 1302 + enum page_walk_action *action, 1303 + struct xe_pt_walk *walk) 1304 + { 1305 + struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); 1306 + 1307 + XE_BUG_ON(!*child); 1308 + XE_BUG_ON(!level && xe_child->is_compact); 1309 + 1310 + xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk); 1311 + 1312 + return 0; 1313 + } 1314 + 1315 + static int 1316 + xe_pt_stage_unbind_post_descend(struct xe_ptw *parent, pgoff_t offset, 1317 + unsigned int level, u64 addr, u64 next, 1318 + struct xe_ptw **child, 1319 + enum page_walk_action *action, 1320 + struct xe_pt_walk *walk) 1321 + { 1322 + struct xe_pt_stage_unbind_walk *xe_walk = 1323 + container_of(walk, typeof(*xe_walk), base); 1324 + struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); 1325 + pgoff_t end_offset; 1326 + u64 size = 1ull << walk->shifts[--level]; 1327 + 1328 + if (!IS_ALIGNED(addr, size)) 1329 + addr = xe_walk->modified_start; 1330 + if (!IS_ALIGNED(next, size)) 1331 + next = xe_walk->modified_end; 1332 + 1333 + /* Parent == *child is the root pt. Don't kill it. */ 1334 + if (parent != *child && 1335 + xe_pt_check_kill(addr, next, level, xe_child, action, walk)) 1336 + return 0; 1337 + 1338 + if (!xe_pt_nonshared_offsets(addr, next, level, walk, action, &offset, 1339 + &end_offset)) 1340 + return 0; 1341 + 1342 + (void)xe_pt_new_shared(&xe_walk->wupd, xe_child, offset, false); 1343 + xe_walk->wupd.updates[level].update->qwords = end_offset - offset; 1344 + 1345 + return 0; 1346 + } 1347 + 1348 + static const struct xe_pt_walk_ops xe_pt_stage_unbind_ops = { 1349 + .pt_entry = xe_pt_stage_unbind_entry, 1350 + .pt_post_descend = xe_pt_stage_unbind_post_descend, 1351 + }; 1352 + 1353 + /** 1354 + * xe_pt_stage_unbind() - Build page-table update structures for an unbind 1355 + * operation 1356 + * @gt: The gt we're unbinding for. 1357 + * @vma: The vma we're unbinding. 1358 + * @entries: Caller-provided storage for the update structures. 1359 + * 1360 + * Builds page-table update structures for an unbind operation. The function 1361 + * will attempt to remove all page-tables that we're the only user 1362 + * of, and for that to work, the unbind operation must be committed in the 1363 + * same critical section that blocks racing binds to the same page-table tree. 1364 + * 1365 + * Return: The number of entries used. 1366 + */ 1367 + static unsigned int xe_pt_stage_unbind(struct xe_gt *gt, struct xe_vma *vma, 1368 + struct xe_vm_pgtable_update *entries) 1369 + { 1370 + struct xe_pt_stage_unbind_walk xe_walk = { 1371 + .base = { 1372 + .ops = &xe_pt_stage_unbind_ops, 1373 + .shifts = xe_normal_pt_shifts, 1374 + .max_level = XE_PT_HIGHEST_LEVEL, 1375 + }, 1376 + .gt = gt, 1377 + .modified_start = vma->start, 1378 + .modified_end = vma->end + 1, 1379 + .wupd.entries = entries, 1380 + }; 1381 + struct xe_pt *pt = vma->vm->pt_root[gt->info.id]; 1382 + 1383 + (void)xe_pt_walk_shared(&pt->base, pt->level, vma->start, vma->end + 1, 1384 + &xe_walk.base); 1385 + 1386 + return xe_walk.wupd.num_used_entries; 1387 + } 1388 + 1389 + static void 1390 + xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update, 1391 + struct xe_gt *gt, struct iosys_map *map, 1392 + void *ptr, u32 qword_ofs, u32 num_qwords, 1393 + const struct xe_vm_pgtable_update *update) 1394 + { 1395 + struct xe_vma *vma = pt_update->vma; 1396 + u64 empty = __xe_pt_empty_pte(gt, vma->vm, update->pt->level); 1397 + int i; 1398 + 1399 + XE_BUG_ON(xe_gt_is_media_type(gt)); 1400 + 1401 + if (map && map->is_iomem) 1402 + for (i = 0; i < num_qwords; ++i) 1403 + xe_map_wr(gt_to_xe(gt), map, (qword_ofs + i) * 1404 + sizeof(u64), u64, empty); 1405 + else if (map) 1406 + memset64(map->vaddr + qword_ofs * sizeof(u64), empty, 1407 + num_qwords); 1408 + else 1409 + memset64(ptr, empty, num_qwords); 1410 + } 1411 + 1412 + static void 1413 + xe_pt_commit_unbind(struct xe_vma *vma, 1414 + struct xe_vm_pgtable_update *entries, u32 num_entries, 1415 + struct llist_head *deferred) 1416 + { 1417 + u32 j; 1418 + 1419 + xe_pt_commit_locks_assert(vma); 1420 + 1421 + for (j = 0; j < num_entries; ++j) { 1422 + struct xe_vm_pgtable_update *entry = &entries[j]; 1423 + struct xe_pt *pt = entry->pt; 1424 + 1425 + pt->num_live -= entry->qwords; 1426 + if (pt->level) { 1427 + struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); 1428 + u32 i; 1429 + 1430 + for (i = entry->ofs; i < entry->ofs + entry->qwords; 1431 + i++) { 1432 + if (xe_pt_entry(pt_dir, i)) 1433 + xe_pt_destroy(xe_pt_entry(pt_dir, i), 1434 + vma->vm->flags, deferred); 1435 + 1436 + pt_dir->dir.entries[i] = NULL; 1437 + } 1438 + } 1439 + } 1440 + } 1441 + 1442 + static const struct xe_migrate_pt_update_ops unbind_ops = { 1443 + .populate = xe_migrate_clear_pgtable_callback, 1444 + }; 1445 + 1446 + static const struct xe_migrate_pt_update_ops userptr_unbind_ops = { 1447 + .populate = xe_migrate_clear_pgtable_callback, 1448 + .pre_commit = xe_pt_userptr_pre_commit, 1449 + }; 1450 + 1451 + /** 1452 + * __xe_pt_unbind_vma() - Disconnect and free a page-table tree for the vma 1453 + * address range. 1454 + * @gt: The gt to unbind for. 1455 + * @vma: The vma to unbind. 1456 + * @e: The engine with which to do pipelined page-table updates. 1457 + * @syncs: Entries to sync on before disconnecting the tree to be destroyed. 1458 + * @num_syncs: Number of @sync entries. 1459 + * 1460 + * This function builds a the xe_vm_pgtable_update entries abstracting the 1461 + * operations needed to detach the page-table tree to be destroyed from the 1462 + * man vm tree. 1463 + * It then takes the relevant locks and submits the operations for 1464 + * pipelined detachment of the gpu page-table from the vm main tree, 1465 + * (which can be done either by the cpu and the GPU), Finally it frees the 1466 + * detached page-table tree. 1467 + * 1468 + * Return: A valid dma-fence representing the pipelined detachment operation 1469 + * on success, an error pointer on error. 1470 + */ 1471 + struct dma_fence * 1472 + __xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e, 1473 + struct xe_sync_entry *syncs, u32 num_syncs) 1474 + { 1475 + struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1]; 1476 + struct xe_pt_migrate_pt_update unbind_pt_update = { 1477 + .base = { 1478 + .ops = xe_vma_is_userptr(vma) ? &userptr_unbind_ops : 1479 + &unbind_ops, 1480 + .vma = vma, 1481 + }, 1482 + }; 1483 + struct xe_vm *vm = vma->vm; 1484 + u32 num_entries; 1485 + struct dma_fence *fence = NULL; 1486 + LLIST_HEAD(deferred); 1487 + 1488 + xe_bo_assert_held(vma->bo); 1489 + xe_vm_assert_held(vm); 1490 + XE_BUG_ON(xe_gt_is_media_type(gt)); 1491 + 1492 + vm_dbg(&vma->vm->xe->drm, 1493 + "Preparing unbind, with range [%llx...%llx) engine %p.\n", 1494 + vma->start, vma->end, e); 1495 + 1496 + num_entries = xe_pt_stage_unbind(gt, vma, entries); 1497 + XE_BUG_ON(num_entries > ARRAY_SIZE(entries)); 1498 + 1499 + xe_vm_dbg_print_entries(gt_to_xe(gt), entries, num_entries); 1500 + 1501 + /* 1502 + * Even if we were already evicted and unbind to destroy, we need to 1503 + * clear again here. The eviction may have updated pagetables at a 1504 + * lower level, because it needs to be more conservative. 1505 + */ 1506 + fence = xe_migrate_update_pgtables(gt->migrate, 1507 + vm, NULL, e ? e : 1508 + vm->eng[gt->info.id], 1509 + entries, num_entries, 1510 + syncs, num_syncs, 1511 + &unbind_pt_update.base); 1512 + if (!IS_ERR(fence)) { 1513 + /* add shared fence now for pagetable delayed destroy */ 1514 + dma_resv_add_fence(&vm->resv, fence, 1515 + DMA_RESV_USAGE_BOOKKEEP); 1516 + 1517 + /* This fence will be installed by caller when doing eviction */ 1518 + if (!xe_vma_is_userptr(vma) && !vma->bo->vm) 1519 + dma_resv_add_fence(vma->bo->ttm.base.resv, fence, 1520 + DMA_RESV_USAGE_BOOKKEEP); 1521 + xe_pt_commit_unbind(vma, entries, num_entries, 1522 + unbind_pt_update.locked ? &deferred : NULL); 1523 + vma->gt_present &= ~BIT(gt->info.id); 1524 + } 1525 + 1526 + if (!vma->gt_present) 1527 + list_del_init(&vma->rebind_link); 1528 + 1529 + if (unbind_pt_update.locked) { 1530 + XE_WARN_ON(!xe_vma_is_userptr(vma)); 1531 + 1532 + if (!vma->gt_present) { 1533 + spin_lock(&vm->userptr.invalidated_lock); 1534 + list_del_init(&vma->userptr.invalidate_link); 1535 + spin_unlock(&vm->userptr.invalidated_lock); 1536 + } 1537 + up_read(&vm->userptr.notifier_lock); 1538 + xe_bo_put_commit(&deferred); 1539 + } 1540 + 1541 + return fence; 1542 + }

+54

drivers/gpu/drm/xe/xe_pt.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + #ifndef _XE_PT_H_ 6 + #define _XE_PT_H_ 7 + 8 + #include <linux/types.h> 9 + 10 + #include "xe_pt_types.h" 11 + 12 + struct dma_fence; 13 + struct xe_bo; 14 + struct xe_device; 15 + struct xe_engine; 16 + struct xe_gt; 17 + struct xe_sync_entry; 18 + struct xe_vm; 19 + struct xe_vma; 20 + 21 + #define xe_pt_write(xe, map, idx, data) \ 22 + xe_map_wr(xe, map, (idx) * sizeof(u64), u64, data) 23 + 24 + unsigned int xe_pt_shift(unsigned int level); 25 + 26 + struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_gt *gt, 27 + unsigned int level); 28 + 29 + int xe_pt_create_scratch(struct xe_device *xe, struct xe_gt *gt, 30 + struct xe_vm *vm); 31 + 32 + void xe_pt_populate_empty(struct xe_gt *gt, struct xe_vm *vm, 33 + struct xe_pt *pt); 34 + 35 + void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred); 36 + 37 + struct dma_fence * 38 + __xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e, 39 + struct xe_sync_entry *syncs, u32 num_syncs, 40 + bool rebind); 41 + 42 + struct dma_fence * 43 + __xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e, 44 + struct xe_sync_entry *syncs, u32 num_syncs); 45 + 46 + bool xe_pt_zap_ptes(struct xe_gt *gt, struct xe_vma *vma); 47 + 48 + u64 gen8_pde_encode(struct xe_bo *bo, u64 bo_offset, 49 + const enum xe_cache_level level); 50 + 51 + u64 gen8_pte_encode(struct xe_vma *vma, struct xe_bo *bo, 52 + u64 offset, enum xe_cache_level cache, 53 + u32 flags, u32 pt_level); 54 + #endif

+57

drivers/gpu/drm/xe/xe_pt_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_PT_TYPES_H_ 7 + #define _XE_PT_TYPES_H_ 8 + 9 + #include "xe_pt_walk.h" 10 + 11 + enum xe_cache_level { 12 + XE_CACHE_NONE, 13 + XE_CACHE_WT, 14 + XE_CACHE_WB, 15 + }; 16 + 17 + #define XE_VM_MAX_LEVEL 4 18 + 19 + struct xe_pt { 20 + struct xe_ptw base; 21 + struct xe_bo *bo; 22 + unsigned int level; 23 + unsigned int num_live; 24 + bool rebind; 25 + bool is_compact; 26 + #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) 27 + /** addr: Virtual address start address of the PT. */ 28 + u64 addr; 29 + #endif 30 + }; 31 + 32 + struct xe_pt_entry { 33 + struct xe_pt *pt; 34 + u64 pte; 35 + }; 36 + 37 + struct xe_vm_pgtable_update { 38 + /** @bo: page table bo to write to */ 39 + struct xe_bo *pt_bo; 40 + 41 + /** @ofs: offset inside this PTE to begin writing to (in qwords) */ 42 + u32 ofs; 43 + 44 + /** @qwords: number of PTE's to write */ 45 + u32 qwords; 46 + 47 + /** @pt: opaque pointer useful for the caller of xe_migrate_update_pgtables */ 48 + struct xe_pt *pt; 49 + 50 + /** @pt_entries: Newly added pagetable entries */ 51 + struct xe_pt_entry *pt_entries; 52 + 53 + /** @flags: Target flags */ 54 + u32 flags; 55 + }; 56 + 57 + #endif

+160

drivers/gpu/drm/xe/xe_pt_walk.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + #include "xe_pt_walk.h" 6 + 7 + /** 8 + * DOC: GPU page-table tree walking. 9 + * The utilities in this file are similar to the CPU page-table walk 10 + * utilities in mm/pagewalk.c. The main difference is that we distinguish 11 + * the various levels of a page-table tree with an unsigned integer rather 12 + * than by name. 0 is the lowest level, and page-tables with level 0 can 13 + * not be directories pointing to lower levels, whereas all other levels 14 + * can. The user of the utilities determines the highest level. 15 + * 16 + * Nomenclature: 17 + * Each struct xe_ptw, regardless of level is referred to as a page table, and 18 + * multiple page tables typically form a page table tree with page tables at 19 + * intermediate levels being page directories pointing at page tables at lower 20 + * levels. A shared page table for a given address range is a page-table which 21 + * is neither fully within nor fully outside the address range and that can 22 + * thus be shared by two or more address ranges. 23 + * 24 + * Please keep this code generic so that it can used as a drm-wide page- 25 + * table walker should other drivers find use for it. 26 + */ 27 + static u64 xe_pt_addr_end(u64 addr, u64 end, unsigned int level, 28 + const struct xe_pt_walk *walk) 29 + { 30 + u64 size = 1ull << walk->shifts[level]; 31 + u64 tmp = round_up(addr + 1, size); 32 + 33 + return min_t(u64, tmp, end); 34 + } 35 + 36 + static bool xe_pt_next(pgoff_t *offset, u64 *addr, u64 next, u64 end, 37 + unsigned int level, const struct xe_pt_walk *walk) 38 + { 39 + pgoff_t step = 1; 40 + 41 + /* Shared pt walk skips to the last pagetable */ 42 + if (unlikely(walk->shared_pt_mode)) { 43 + unsigned int shift = walk->shifts[level]; 44 + u64 skip_to = round_down(end, 1ull << shift); 45 + 46 + if (skip_to > next) { 47 + step += (skip_to - next) >> shift; 48 + next = skip_to; 49 + } 50 + } 51 + 52 + *addr = next; 53 + *offset += step; 54 + 55 + return next != end; 56 + } 57 + 58 + /** 59 + * xe_pt_walk_range() - Walk a range of a gpu page table tree with callbacks 60 + * for each page-table entry in all levels. 61 + * @parent: The root page table for walk start. 62 + * @level: The root page table level. 63 + * @addr: Virtual address start. 64 + * @end: Virtual address end + 1. 65 + * @walk: Walk info. 66 + * 67 + * Similar to the CPU page-table walker, this is a helper to walk 68 + * a gpu page table and call a provided callback function for each entry. 69 + * 70 + * Return: 0 on success, negative error code on error. The error is 71 + * propagated from the callback and on error the walk is terminated. 72 + */ 73 + int xe_pt_walk_range(struct xe_ptw *parent, unsigned int level, 74 + u64 addr, u64 end, struct xe_pt_walk *walk) 75 + { 76 + pgoff_t offset = xe_pt_offset(addr, level, walk); 77 + struct xe_ptw **entries = parent->dir ? parent->dir->entries : NULL; 78 + const struct xe_pt_walk_ops *ops = walk->ops; 79 + enum page_walk_action action; 80 + struct xe_ptw *child; 81 + int err = 0; 82 + u64 next; 83 + 84 + do { 85 + next = xe_pt_addr_end(addr, end, level, walk); 86 + if (walk->shared_pt_mode && xe_pt_covers(addr, next, level, 87 + walk)) 88 + continue; 89 + again: 90 + action = ACTION_SUBTREE; 91 + child = entries ? entries[offset] : NULL; 92 + err = ops->pt_entry(parent, offset, level, addr, next, 93 + &child, &action, walk); 94 + if (err) 95 + break; 96 + 97 + /* Probably not needed yet for gpu pagetable walk. */ 98 + if (unlikely(action == ACTION_AGAIN)) 99 + goto again; 100 + 101 + if (likely(!level || !child || action == ACTION_CONTINUE)) 102 + continue; 103 + 104 + err = xe_pt_walk_range(child, level - 1, addr, next, walk); 105 + 106 + if (!err && ops->pt_post_descend) 107 + err = ops->pt_post_descend(parent, offset, level, addr, 108 + next, &child, &action, walk); 109 + if (err) 110 + break; 111 + 112 + } while (xe_pt_next(&offset, &addr, next, end, level, walk)); 113 + 114 + return err; 115 + } 116 + 117 + /** 118 + * xe_pt_walk_shared() - Walk shared page tables of a page-table tree. 119 + * @parent: Root page table directory. 120 + * @level: Level of the root. 121 + * @addr: Start address. 122 + * @end: Last address + 1. 123 + * @walk: Walk info. 124 + * 125 + * This function is similar to xe_pt_walk_range() but it skips page tables 126 + * that are private to the range. Since the root (or @parent) page table is 127 + * typically also a shared page table this function is different in that it 128 + * calls the pt_entry callback and the post_descend callback also for the 129 + * root. The root can be detected in the callbacks by checking whether 130 + * parent == *child. 131 + * Walking only the shared page tables is common for unbind-type operations 132 + * where the page-table entries for an address range are cleared or detached 133 + * from the main page-table tree. 134 + * 135 + * Return: 0 on success, negative error code on error: If a callback 136 + * returns an error, the walk will be terminated and the error returned by 137 + * this function. 138 + */ 139 + int xe_pt_walk_shared(struct xe_ptw *parent, unsigned int level, 140 + u64 addr, u64 end, struct xe_pt_walk *walk) 141 + { 142 + const struct xe_pt_walk_ops *ops = walk->ops; 143 + enum page_walk_action action = ACTION_SUBTREE; 144 + struct xe_ptw *child = parent; 145 + int err; 146 + 147 + walk->shared_pt_mode = true; 148 + err = walk->ops->pt_entry(parent, 0, level + 1, addr, end, 149 + &child, &action, walk); 150 + 151 + if (err || action != ACTION_SUBTREE) 152 + return err; 153 + 154 + err = xe_pt_walk_range(parent, level, addr, end, walk); 155 + if (!err && ops->pt_post_descend) { 156 + err = ops->pt_post_descend(parent, 0, level + 1, addr, end, 157 + &child, &action, walk); 158 + } 159 + return err; 160 + }

+161

drivers/gpu/drm/xe/xe_pt_walk.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + #ifndef __XE_PT_WALK__ 6 + #define __XE_PT_WALK__ 7 + 8 + #include <linux/pagewalk.h> 9 + #include <linux/types.h> 10 + 11 + struct xe_ptw_dir; 12 + 13 + /** 14 + * struct xe_ptw - base class for driver pagetable subclassing. 15 + * @dir: Pointer to an array of children if any. 16 + * 17 + * Drivers could subclass this, and if it's a page-directory, typically 18 + * embed the xe_ptw_dir::entries array in the same allocation. 19 + */ 20 + struct xe_ptw { 21 + struct xe_ptw_dir *dir; 22 + }; 23 + 24 + /** 25 + * struct xe_ptw_dir - page directory structure 26 + * @entries: Array holding page directory children. 27 + * 28 + * It is the responsibility of the user to ensure @entries is 29 + * correctly sized. 30 + */ 31 + struct xe_ptw_dir { 32 + struct xe_ptw *entries[0]; 33 + }; 34 + 35 + /** 36 + * struct xe_pt_walk - Embeddable struct for walk parameters 37 + */ 38 + struct xe_pt_walk { 39 + /** @ops: The walk ops used for the pagewalk */ 40 + const struct xe_pt_walk_ops *ops; 41 + /** 42 + * @shifts: Array of page-table entry shifts used for the 43 + * different levels, starting out with the leaf level 0 44 + * page-shift as the first entry. It's legal for this pointer to be 45 + * changed during the walk. 46 + */ 47 + const u64 *shifts; 48 + /** @max_level: Highest populated level in @sizes */ 49 + unsigned int max_level; 50 + /** 51 + * @shared_pt_mode: Whether to skip all entries that are private 52 + * to the address range and called only for entries that are 53 + * shared with other address ranges. Such entries are referred to 54 + * as shared pagetables. 55 + */ 56 + bool shared_pt_mode; 57 + }; 58 + 59 + /** 60 + * typedef xe_pt_entry_fn - gpu page-table-walk callback-function 61 + * @parent: The parent page.table. 62 + * @offset: The offset (number of entries) into the page table. 63 + * @level: The level of @parent. 64 + * @addr: The virtual address. 65 + * @next: The virtual address for the next call, or end address. 66 + * @child: Pointer to pointer to child page-table at this @offset. The 67 + * function may modify the value pointed to if, for example, allocating a 68 + * child page table. 69 + * @action: The walk action to take upon return. See <linux/pagewalk.h>. 70 + * @walk: The walk parameters. 71 + */ 72 + typedef int (*xe_pt_entry_fn)(struct xe_ptw *parent, pgoff_t offset, 73 + unsigned int level, u64 addr, u64 next, 74 + struct xe_ptw **child, 75 + enum page_walk_action *action, 76 + struct xe_pt_walk *walk); 77 + 78 + /** 79 + * struct xe_pt_walk_ops - Walk callbacks. 80 + */ 81 + struct xe_pt_walk_ops { 82 + /** 83 + * @pt_entry: Callback to be called for each page table entry prior 84 + * to descending to the next level. The returned value of the action 85 + * function parameter is honored. 86 + */ 87 + xe_pt_entry_fn pt_entry; 88 + /** 89 + * @pt_post_descend: Callback to be called for each page table entry 90 + * after return from descending to the next level. The returned value 91 + * of the action function parameter is ignored. 92 + */ 93 + xe_pt_entry_fn pt_post_descend; 94 + }; 95 + 96 + int xe_pt_walk_range(struct xe_ptw *parent, unsigned int level, 97 + u64 addr, u64 end, struct xe_pt_walk *walk); 98 + 99 + int xe_pt_walk_shared(struct xe_ptw *parent, unsigned int level, 100 + u64 addr, u64 end, struct xe_pt_walk *walk); 101 + 102 + /** 103 + * xe_pt_covers - Whether the address range covers an entire entry in @level 104 + * @addr: Start of the range. 105 + * @end: End of range + 1. 106 + * @level: Page table level. 107 + * @walk: Page table walk info. 108 + * 109 + * This function is a helper to aid in determining whether a leaf page table 110 + * entry can be inserted at this @level. 111 + * 112 + * Return: Whether the range provided covers exactly an entry at this level. 113 + */ 114 + static inline bool xe_pt_covers(u64 addr, u64 end, unsigned int level, 115 + const struct xe_pt_walk *walk) 116 + { 117 + u64 pt_size = 1ull << walk->shifts[level]; 118 + 119 + return end - addr == pt_size && IS_ALIGNED(addr, pt_size); 120 + } 121 + 122 + /** 123 + * xe_pt_num_entries: Number of page-table entries of a given range at this 124 + * level 125 + * @addr: Start address. 126 + * @end: End address. 127 + * @level: Page table level. 128 + * @walk: Walk info. 129 + * 130 + * Return: The number of page table entries at this level between @start and 131 + * @end. 132 + */ 133 + static inline pgoff_t 134 + xe_pt_num_entries(u64 addr, u64 end, unsigned int level, 135 + const struct xe_pt_walk *walk) 136 + { 137 + u64 pt_size = 1ull << walk->shifts[level]; 138 + 139 + return (round_up(end, pt_size) - round_down(addr, pt_size)) >> 140 + walk->shifts[level]; 141 + } 142 + 143 + /** 144 + * xe_pt_offset: Offset of the page-table entry for a given address. 145 + * @addr: The address. 146 + * @level: Page table level. 147 + * @walk: Walk info. 148 + * 149 + * Return: The page table entry offset for the given address in a 150 + * page table with size indicated by @level. 151 + */ 152 + static inline pgoff_t 153 + xe_pt_offset(u64 addr, unsigned int level, const struct xe_pt_walk *walk) 154 + { 155 + if (level < walk->max_level) 156 + addr &= ((1ull << walk->shifts[level + 1]) - 1); 157 + 158 + return addr >> walk->shifts[level]; 159 + } 160 + 161 + #endif

+387

drivers/gpu/drm/xe/xe_query.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include <drm/xe_drm.h> 7 + #include <drm/ttm/ttm_placement.h> 8 + #include <linux/nospec.h> 9 + 10 + #include "xe_bo.h" 11 + #include "xe_device.h" 12 + #include "xe_gt.h" 13 + #include "xe_macros.h" 14 + #include "xe_query.h" 15 + #include "xe_ggtt.h" 16 + #include "xe_guc_hwconfig.h" 17 + 18 + static const enum xe_engine_class xe_to_user_engine_class[] = { 19 + [XE_ENGINE_CLASS_RENDER] = DRM_XE_ENGINE_CLASS_RENDER, 20 + [XE_ENGINE_CLASS_COPY] = DRM_XE_ENGINE_CLASS_COPY, 21 + [XE_ENGINE_CLASS_VIDEO_DECODE] = DRM_XE_ENGINE_CLASS_VIDEO_DECODE, 22 + [XE_ENGINE_CLASS_VIDEO_ENHANCE] = DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE, 23 + [XE_ENGINE_CLASS_COMPUTE] = DRM_XE_ENGINE_CLASS_COMPUTE, 24 + }; 25 + 26 + static size_t calc_hw_engine_info_size(struct xe_device *xe) 27 + { 28 + struct xe_hw_engine *hwe; 29 + enum xe_hw_engine_id id; 30 + struct xe_gt *gt; 31 + u8 gt_id; 32 + int i = 0; 33 + 34 + for_each_gt(gt, xe, gt_id) 35 + for_each_hw_engine(hwe, gt, id) { 36 + if (xe_hw_engine_is_reserved(hwe)) 37 + continue; 38 + i++; 39 + } 40 + 41 + return i * sizeof(struct drm_xe_engine_class_instance); 42 + } 43 + 44 + static int query_engines(struct xe_device *xe, 45 + struct drm_xe_device_query *query) 46 + { 47 + size_t size = calc_hw_engine_info_size(xe); 48 + struct drm_xe_engine_class_instance __user *query_ptr = 49 + u64_to_user_ptr(query->data); 50 + struct drm_xe_engine_class_instance *hw_engine_info; 51 + struct xe_hw_engine *hwe; 52 + enum xe_hw_engine_id id; 53 + struct xe_gt *gt; 54 + u8 gt_id; 55 + int i = 0; 56 + 57 + if (query->size == 0) { 58 + query->size = size; 59 + return 0; 60 + } else if (XE_IOCTL_ERR(xe, query->size != size)) { 61 + return -EINVAL; 62 + } 63 + 64 + hw_engine_info = kmalloc(size, GFP_KERNEL); 65 + if (XE_IOCTL_ERR(xe, !hw_engine_info)) 66 + return -ENOMEM; 67 + 68 + for_each_gt(gt, xe, gt_id) 69 + for_each_hw_engine(hwe, gt, id) { 70 + if (xe_hw_engine_is_reserved(hwe)) 71 + continue; 72 + 73 + hw_engine_info[i].engine_class = 74 + xe_to_user_engine_class[hwe->class]; 75 + hw_engine_info[i].engine_instance = 76 + hwe->logical_instance; 77 + hw_engine_info[i++].gt_id = gt->info.id; 78 + } 79 + 80 + if (copy_to_user(query_ptr, hw_engine_info, size)) { 81 + kfree(hw_engine_info); 82 + return -EFAULT; 83 + } 84 + kfree(hw_engine_info); 85 + 86 + return 0; 87 + } 88 + 89 + static size_t calc_memory_usage_size(struct xe_device *xe) 90 + { 91 + u32 num_managers = 1; 92 + int i; 93 + 94 + for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) 95 + if (ttm_manager_type(&xe->ttm, i)) 96 + num_managers++; 97 + 98 + return offsetof(struct drm_xe_query_mem_usage, regions[num_managers]); 99 + } 100 + 101 + static int query_memory_usage(struct xe_device *xe, 102 + struct drm_xe_device_query *query) 103 + { 104 + size_t size = calc_memory_usage_size(xe); 105 + struct drm_xe_query_mem_usage *usage; 106 + struct drm_xe_query_mem_usage __user *query_ptr = 107 + u64_to_user_ptr(query->data); 108 + struct ttm_resource_manager *man; 109 + int ret, i; 110 + 111 + if (query->size == 0) { 112 + query->size = size; 113 + return 0; 114 + } else if (XE_IOCTL_ERR(xe, query->size != size)) { 115 + return -EINVAL; 116 + } 117 + 118 + usage = kmalloc(size, GFP_KERNEL); 119 + if (XE_IOCTL_ERR(xe, !usage)) 120 + return -ENOMEM; 121 + 122 + usage->pad = 0; 123 + 124 + man = ttm_manager_type(&xe->ttm, XE_PL_TT); 125 + usage->regions[0].mem_class = XE_MEM_REGION_CLASS_SYSMEM; 126 + usage->regions[0].instance = 0; 127 + usage->regions[0].pad = 0; 128 + usage->regions[0].min_page_size = PAGE_SIZE; 129 + usage->regions[0].max_page_size = PAGE_SIZE; 130 + usage->regions[0].total_size = man->size << PAGE_SHIFT; 131 + usage->regions[0].used = ttm_resource_manager_usage(man); 132 + usage->num_regions = 1; 133 + 134 + for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) { 135 + man = ttm_manager_type(&xe->ttm, i); 136 + if (man) { 137 + usage->regions[usage->num_regions].mem_class = 138 + XE_MEM_REGION_CLASS_VRAM; 139 + usage->regions[usage->num_regions].instance = 140 + usage->num_regions; 141 + usage->regions[usage->num_regions].pad = 0; 142 + usage->regions[usage->num_regions].min_page_size = 143 + xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? 144 + SZ_64K : PAGE_SIZE; 145 + usage->regions[usage->num_regions].max_page_size = 146 + SZ_1G; 147 + usage->regions[usage->num_regions].total_size = 148 + man->size; 149 + usage->regions[usage->num_regions++].used = 150 + ttm_resource_manager_usage(man); 151 + } 152 + } 153 + 154 + if (!copy_to_user(query_ptr, usage, size)) 155 + ret = 0; 156 + else 157 + ret = -ENOSPC; 158 + 159 + kfree(usage); 160 + return ret; 161 + } 162 + 163 + static int query_config(struct xe_device *xe, struct drm_xe_device_query *query) 164 + { 165 + u32 num_params = XE_QUERY_CONFIG_NUM_PARAM; 166 + size_t size = 167 + sizeof(struct drm_xe_query_config) + num_params * sizeof(u64); 168 + struct drm_xe_query_config __user *query_ptr = 169 + u64_to_user_ptr(query->data); 170 + struct drm_xe_query_config *config; 171 + 172 + if (query->size == 0) { 173 + query->size = size; 174 + return 0; 175 + } else if (XE_IOCTL_ERR(xe, query->size != size)) { 176 + return -EINVAL; 177 + } 178 + 179 + config = kzalloc(size, GFP_KERNEL); 180 + if (XE_IOCTL_ERR(xe, !config)) 181 + return -ENOMEM; 182 + 183 + config->num_params = num_params; 184 + config->info[XE_QUERY_CONFIG_REV_AND_DEVICE_ID] = 185 + xe->info.devid | (xe->info.revid << 16); 186 + if (to_gt(xe)->mem.vram.size) 187 + config->info[XE_QUERY_CONFIG_FLAGS] = 188 + XE_QUERY_CONFIG_FLAGS_HAS_VRAM; 189 + if (xe->info.enable_guc) 190 + config->info[XE_QUERY_CONFIG_FLAGS] |= 191 + XE_QUERY_CONFIG_FLAGS_USE_GUC; 192 + config->info[XE_QUERY_CONFIG_MIN_ALIGNEMENT] = 193 + xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K; 194 + config->info[XE_QUERY_CONFIG_VA_BITS] = 12 + 195 + (9 * (xe->info.vm_max_level + 1)); 196 + config->info[XE_QUERY_CONFIG_GT_COUNT] = xe->info.tile_count; 197 + config->info[XE_QUERY_CONFIG_MEM_REGION_COUNT] = 198 + hweight_long(xe->info.mem_region_mask); 199 + 200 + if (copy_to_user(query_ptr, config, size)) { 201 + kfree(config); 202 + return -EFAULT; 203 + } 204 + kfree(config); 205 + 206 + return 0; 207 + } 208 + 209 + static int query_gts(struct xe_device *xe, struct drm_xe_device_query *query) 210 + { 211 + struct xe_gt *gt; 212 + size_t size = sizeof(struct drm_xe_query_gts) + 213 + xe->info.tile_count * sizeof(struct drm_xe_query_gt); 214 + struct drm_xe_query_gts __user *query_ptr = 215 + u64_to_user_ptr(query->data); 216 + struct drm_xe_query_gts *gts; 217 + u8 id; 218 + 219 + if (query->size == 0) { 220 + query->size = size; 221 + return 0; 222 + } else if (XE_IOCTL_ERR(xe, query->size != size)) { 223 + return -EINVAL; 224 + } 225 + 226 + gts = kzalloc(size, GFP_KERNEL); 227 + if (XE_IOCTL_ERR(xe, !gts)) 228 + return -ENOMEM; 229 + 230 + gts->num_gt = xe->info.tile_count; 231 + for_each_gt(gt, xe, id) { 232 + if (id == 0) 233 + gts->gts[id].type = XE_QUERY_GT_TYPE_MAIN; 234 + else if (xe_gt_is_media_type(gt)) 235 + gts->gts[id].type = XE_QUERY_GT_TYPE_MEDIA; 236 + else 237 + gts->gts[id].type = XE_QUERY_GT_TYPE_REMOTE; 238 + gts->gts[id].instance = id; 239 + gts->gts[id].clock_freq = gt->info.clock_freq; 240 + if (!IS_DGFX(xe)) 241 + gts->gts[id].native_mem_regions = 0x1; 242 + else 243 + gts->gts[id].native_mem_regions = 244 + BIT(gt->info.vram_id) << 1; 245 + gts->gts[id].slow_mem_regions = xe->info.mem_region_mask ^ 246 + gts->gts[id].native_mem_regions; 247 + } 248 + 249 + if (copy_to_user(query_ptr, gts, size)) { 250 + kfree(gts); 251 + return -EFAULT; 252 + } 253 + kfree(gts); 254 + 255 + return 0; 256 + } 257 + 258 + static int query_hwconfig(struct xe_device *xe, 259 + struct drm_xe_device_query *query) 260 + { 261 + struct xe_gt *gt = xe_device_get_gt(xe, 0); 262 + size_t size = xe_guc_hwconfig_size(&gt->uc.guc); 263 + void __user *query_ptr = u64_to_user_ptr(query->data); 264 + void *hwconfig; 265 + 266 + if (query->size == 0) { 267 + query->size = size; 268 + return 0; 269 + } else if (XE_IOCTL_ERR(xe, query->size != size)) { 270 + return -EINVAL; 271 + } 272 + 273 + hwconfig = kzalloc(size, GFP_KERNEL); 274 + if (XE_IOCTL_ERR(xe, !hwconfig)) 275 + return -ENOMEM; 276 + 277 + xe_device_mem_access_get(xe); 278 + xe_guc_hwconfig_copy(&gt->uc.guc, hwconfig); 279 + xe_device_mem_access_put(xe); 280 + 281 + if (copy_to_user(query_ptr, hwconfig, size)) { 282 + kfree(hwconfig); 283 + return -EFAULT; 284 + } 285 + kfree(hwconfig); 286 + 287 + return 0; 288 + } 289 + 290 + static size_t calc_topo_query_size(struct xe_device *xe) 291 + { 292 + return xe->info.tile_count * 293 + (3 * sizeof(struct drm_xe_query_topology_mask) + 294 + sizeof_field(struct xe_gt, fuse_topo.g_dss_mask) + 295 + sizeof_field(struct xe_gt, fuse_topo.c_dss_mask) + 296 + sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss)); 297 + } 298 + 299 + static void __user *copy_mask(void __user *ptr, 300 + struct drm_xe_query_topology_mask *topo, 301 + void *mask, size_t mask_size) 302 + { 303 + topo->num_bytes = mask_size; 304 + 305 + if (copy_to_user(ptr, topo, sizeof(*topo))) 306 + return ERR_PTR(-EFAULT); 307 + ptr += sizeof(topo); 308 + 309 + if (copy_to_user(ptr, mask, mask_size)) 310 + return ERR_PTR(-EFAULT); 311 + ptr += mask_size; 312 + 313 + return ptr; 314 + } 315 + 316 + static int query_gt_topology(struct xe_device *xe, 317 + struct drm_xe_device_query *query) 318 + { 319 + void __user *query_ptr = u64_to_user_ptr(query->data); 320 + size_t size = calc_topo_query_size(xe); 321 + struct drm_xe_query_topology_mask topo; 322 + struct xe_gt *gt; 323 + int id; 324 + 325 + if (query->size == 0) { 326 + query->size = size; 327 + return 0; 328 + } else if (XE_IOCTL_ERR(xe, query->size != size)) { 329 + return -EINVAL; 330 + } 331 + 332 + for_each_gt(gt, xe, id) { 333 + topo.gt_id = id; 334 + 335 + topo.type = XE_TOPO_DSS_GEOMETRY; 336 + query_ptr = copy_mask(query_ptr, &topo, 337 + gt->fuse_topo.g_dss_mask, 338 + sizeof(gt->fuse_topo.g_dss_mask)); 339 + if (IS_ERR(query_ptr)) 340 + return PTR_ERR(query_ptr); 341 + 342 + topo.type = XE_TOPO_DSS_COMPUTE; 343 + query_ptr = copy_mask(query_ptr, &topo, 344 + gt->fuse_topo.c_dss_mask, 345 + sizeof(gt->fuse_topo.c_dss_mask)); 346 + if (IS_ERR(query_ptr)) 347 + return PTR_ERR(query_ptr); 348 + 349 + topo.type = XE_TOPO_EU_PER_DSS; 350 + query_ptr = copy_mask(query_ptr, &topo, 351 + gt->fuse_topo.eu_mask_per_dss, 352 + sizeof(gt->fuse_topo.eu_mask_per_dss)); 353 + if (IS_ERR(query_ptr)) 354 + return PTR_ERR(query_ptr); 355 + } 356 + 357 + return 0; 358 + } 359 + 360 + static int (* const xe_query_funcs[])(struct xe_device *xe, 361 + struct drm_xe_device_query *query) = { 362 + query_engines, 363 + query_memory_usage, 364 + query_config, 365 + query_gts, 366 + query_hwconfig, 367 + query_gt_topology, 368 + }; 369 + 370 + int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 371 + { 372 + struct xe_device *xe = to_xe_device(dev); 373 + struct drm_xe_device_query *query = data; 374 + u32 idx; 375 + 376 + if (XE_IOCTL_ERR(xe, query->extensions != 0)) 377 + return -EINVAL; 378 + 379 + if (XE_IOCTL_ERR(xe, query->query > ARRAY_SIZE(xe_query_funcs))) 380 + return -EINVAL; 381 + 382 + idx = array_index_nospec(query->query, ARRAY_SIZE(xe_query_funcs)); 383 + if (XE_IOCTL_ERR(xe, !xe_query_funcs[idx])) 384 + return -EINVAL; 385 + 386 + return xe_query_funcs[idx](xe, query); 387 + }

+14

drivers/gpu/drm/xe/xe_query.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_QUERY_H_ 7 + #define _XE_QUERY_H_ 8 + 9 + struct drm_device; 10 + struct drm_file; 11 + 12 + int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file); 13 + 14 + #endif

+248

drivers/gpu/drm/xe/xe_reg_sr.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include "xe_reg_sr.h" 7 + 8 + #include <linux/align.h> 9 + #include <linux/string_helpers.h> 10 + #include <linux/xarray.h> 11 + 12 + #include <drm/drm_print.h> 13 + #include <drm/drm_managed.h> 14 + 15 + #include "xe_rtp_types.h" 16 + #include "xe_device_types.h" 17 + #include "xe_force_wake.h" 18 + #include "xe_gt.h" 19 + #include "xe_gt_mcr.h" 20 + #include "xe_macros.h" 21 + #include "xe_mmio.h" 22 + 23 + #include "gt/intel_engine_regs.h" 24 + #include "gt/intel_gt_regs.h" 25 + 26 + #define XE_REG_SR_GROW_STEP_DEFAULT 16 27 + 28 + static void reg_sr_fini(struct drm_device *drm, void *arg) 29 + { 30 + struct xe_reg_sr *sr = arg; 31 + 32 + xa_destroy(&sr->xa); 33 + kfree(sr->pool.arr); 34 + memset(&sr->pool, 0, sizeof(sr->pool)); 35 + } 36 + 37 + int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe) 38 + { 39 + xa_init(&sr->xa); 40 + memset(&sr->pool, 0, sizeof(sr->pool)); 41 + sr->pool.grow_step = XE_REG_SR_GROW_STEP_DEFAULT; 42 + sr->name = name; 43 + 44 + return drmm_add_action_or_reset(&xe->drm, reg_sr_fini, sr); 45 + } 46 + 47 + int xe_reg_sr_dump_kv(struct xe_reg_sr *sr, 48 + struct xe_reg_sr_kv **dst) 49 + { 50 + struct xe_reg_sr_kv *iter; 51 + struct xe_reg_sr_entry *entry; 52 + unsigned long idx; 53 + 54 + if (xa_empty(&sr->xa)) { 55 + *dst = NULL; 56 + return 0; 57 + } 58 + 59 + *dst = kmalloc_array(sr->pool.used, sizeof(**dst), GFP_KERNEL); 60 + if (!*dst) 61 + return -ENOMEM; 62 + 63 + iter = *dst; 64 + xa_for_each(&sr->xa, idx, entry) { 65 + iter->k = idx; 66 + iter->v = *entry; 67 + iter++; 68 + } 69 + 70 + return 0; 71 + } 72 + 73 + static struct xe_reg_sr_entry *alloc_entry(struct xe_reg_sr *sr) 74 + { 75 + if (sr->pool.used == sr->pool.allocated) { 76 + struct xe_reg_sr_entry *arr; 77 + 78 + arr = krealloc_array(sr->pool.arr, 79 + ALIGN(sr->pool.allocated + 1, sr->pool.grow_step), 80 + sizeof(*arr), GFP_KERNEL); 81 + if (!arr) 82 + return NULL; 83 + 84 + sr->pool.arr = arr; 85 + sr->pool.allocated += sr->pool.grow_step; 86 + } 87 + 88 + return &sr->pool.arr[sr->pool.used++]; 89 + } 90 + 91 + static bool compatible_entries(const struct xe_reg_sr_entry *e1, 92 + const struct xe_reg_sr_entry *e2) 93 + { 94 + /* 95 + * Don't allow overwriting values: clr_bits/set_bits should be disjoint 96 + * when operating in the same register 97 + */ 98 + if (e1->clr_bits & e2->clr_bits || e1->set_bits & e2->set_bits || 99 + e1->clr_bits & e2->set_bits || e1->set_bits & e2->clr_bits) 100 + return false; 101 + 102 + if (e1->masked_reg != e2->masked_reg) 103 + return false; 104 + 105 + if (e1->reg_type != e2->reg_type) 106 + return false; 107 + 108 + return true; 109 + } 110 + 111 + int xe_reg_sr_add(struct xe_reg_sr *sr, u32 reg, 112 + const struct xe_reg_sr_entry *e) 113 + { 114 + unsigned long idx = reg; 115 + struct xe_reg_sr_entry *pentry = xa_load(&sr->xa, idx); 116 + int ret; 117 + 118 + if (pentry) { 119 + if (!compatible_entries(pentry, e)) { 120 + ret = -EINVAL; 121 + goto fail; 122 + } 123 + 124 + pentry->clr_bits |= e->clr_bits; 125 + pentry->set_bits |= e->set_bits; 126 + pentry->read_mask |= e->read_mask; 127 + 128 + return 0; 129 + } 130 + 131 + pentry = alloc_entry(sr); 132 + if (!pentry) { 133 + ret = -ENOMEM; 134 + goto fail; 135 + } 136 + 137 + *pentry = *e; 138 + ret = xa_err(xa_store(&sr->xa, idx, pentry, GFP_KERNEL)); 139 + if (ret) 140 + goto fail; 141 + 142 + return 0; 143 + 144 + fail: 145 + DRM_ERROR("Discarding save-restore reg %04lx (clear: %08x, set: %08x, masked: %s): ret=%d\n", 146 + idx, e->clr_bits, e->set_bits, 147 + str_yes_no(e->masked_reg), ret); 148 + 149 + return ret; 150 + } 151 + 152 + static void apply_one_mmio(struct xe_gt *gt, u32 reg, 153 + struct xe_reg_sr_entry *entry) 154 + { 155 + struct xe_device *xe = gt_to_xe(gt); 156 + u32 val; 157 + 158 + /* 159 + * If this is a masked register, need to figure what goes on the upper 160 + * 16 bits: it's either the clr_bits (when using FIELD_SET and WR) or 161 + * the set_bits, when using SET. 162 + * 163 + * When it's not masked, we have to read it from hardware, unless we are 164 + * supposed to set all bits. 165 + */ 166 + if (entry->masked_reg) 167 + val = (entry->clr_bits ?: entry->set_bits << 16); 168 + else if (entry->clr_bits + 1) 169 + val = (entry->reg_type == XE_RTP_REG_MCR ? 170 + xe_gt_mcr_unicast_read_any(gt, MCR_REG(reg)) : 171 + xe_mmio_read32(gt, reg)) & (~entry->clr_bits); 172 + else 173 + val = 0; 174 + 175 + /* 176 + * TODO: add selftest to validate all tables, regardless of platform: 177 + * - Masked registers can't have set_bits with upper bits set 178 + * - set_bits must be contained in clr_bits 179 + */ 180 + val |= entry->set_bits; 181 + 182 + drm_dbg(&xe->drm, "REG[0x%x] = 0x%08x", reg, val); 183 + 184 + if (entry->reg_type == XE_RTP_REG_MCR) 185 + xe_gt_mcr_multicast_write(gt, MCR_REG(reg), val); 186 + else 187 + xe_mmio_write32(gt, reg, val); 188 + } 189 + 190 + void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt) 191 + { 192 + struct xe_device *xe = gt_to_xe(gt); 193 + struct xe_reg_sr_entry *entry; 194 + unsigned long reg; 195 + int err; 196 + 197 + drm_dbg(&xe->drm, "Applying %s save-restore MMIOs\n", sr->name); 198 + 199 + err = xe_force_wake_get(&gt->mmio.fw, XE_FORCEWAKE_ALL); 200 + if (err) 201 + goto err_force_wake; 202 + 203 + xa_for_each(&sr->xa, reg, entry) 204 + apply_one_mmio(gt, reg, entry); 205 + 206 + err = xe_force_wake_put(&gt->mmio.fw, XE_FORCEWAKE_ALL); 207 + XE_WARN_ON(err); 208 + 209 + return; 210 + 211 + err_force_wake: 212 + drm_err(&xe->drm, "Failed to apply, err=%d\n", err); 213 + } 214 + 215 + void xe_reg_sr_apply_whitelist(struct xe_reg_sr *sr, u32 mmio_base, 216 + struct xe_gt *gt) 217 + { 218 + struct xe_device *xe = gt_to_xe(gt); 219 + struct xe_reg_sr_entry *entry; 220 + unsigned long reg; 221 + unsigned int slot = 0; 222 + int err; 223 + 224 + drm_dbg(&xe->drm, "Whitelisting %s registers\n", sr->name); 225 + 226 + err = xe_force_wake_get(&gt->mmio.fw, XE_FORCEWAKE_ALL); 227 + if (err) 228 + goto err_force_wake; 229 + 230 + xa_for_each(&sr->xa, reg, entry) { 231 + xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot).reg, 232 + reg | entry->set_bits); 233 + slot++; 234 + } 235 + 236 + /* And clear the rest just in case of garbage */ 237 + for (; slot < RING_MAX_NONPRIV_SLOTS; slot++) 238 + xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot).reg, 239 + RING_NOPID(mmio_base).reg); 240 + 241 + err = xe_force_wake_put(&gt->mmio.fw, XE_FORCEWAKE_ALL); 242 + XE_WARN_ON(err); 243 + 244 + return; 245 + 246 + err_force_wake: 247 + drm_err(&xe->drm, "Failed to apply, err=%d\n", err); 248 + }

+28

drivers/gpu/drm/xe/xe_reg_sr.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_REG_SR_ 7 + #define _XE_REG_SR_ 8 + 9 + #include "xe_reg_sr_types.h" 10 + 11 + /* 12 + * Reg save/restore bookkeeping 13 + */ 14 + 15 + struct xe_device; 16 + struct xe_gt; 17 + 18 + int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe); 19 + int xe_reg_sr_dump_kv(struct xe_reg_sr *sr, 20 + struct xe_reg_sr_kv **dst); 21 + 22 + int xe_reg_sr_add(struct xe_reg_sr *sr, u32 reg, 23 + const struct xe_reg_sr_entry *e); 24 + void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt); 25 + void xe_reg_sr_apply_whitelist(struct xe_reg_sr *sr, u32 mmio_base, 26 + struct xe_gt *gt); 27 + 28 + #endif

+44

drivers/gpu/drm/xe/xe_reg_sr_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_REG_SR_TYPES_ 7 + #define _XE_REG_SR_TYPES_ 8 + 9 + #include <linux/xarray.h> 10 + #include <linux/types.h> 11 + 12 + #include "i915_reg_defs.h" 13 + 14 + struct xe_reg_sr_entry { 15 + u32 clr_bits; 16 + u32 set_bits; 17 + /* Mask for bits to consider when reading value back */ 18 + u32 read_mask; 19 + /* 20 + * "Masked registers" are marked in spec as register with the upper 16 21 + * bits as a mask for the bits that is being updated on the lower 16 22 + * bits when writing to it. 23 + */ 24 + u8 masked_reg; 25 + u8 reg_type; 26 + }; 27 + 28 + struct xe_reg_sr_kv { 29 + u32 k; 30 + struct xe_reg_sr_entry v; 31 + }; 32 + 33 + struct xe_reg_sr { 34 + struct { 35 + struct xe_reg_sr_entry *arr; 36 + unsigned int used; 37 + unsigned int allocated; 38 + unsigned int grow_step; 39 + } pool; 40 + struct xarray xa; 41 + const char *name; 42 + }; 43 + 44 + #endif

+73

drivers/gpu/drm/xe/xe_reg_whitelist.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2023 Intel Corporation 4 + */ 5 + 6 + #include "xe_reg_whitelist.h" 7 + 8 + #include "xe_platform_types.h" 9 + #include "xe_gt_types.h" 10 + #include "xe_rtp.h" 11 + 12 + #include "../i915/gt/intel_engine_regs.h" 13 + #include "../i915/gt/intel_gt_regs.h" 14 + 15 + #undef _MMIO 16 + #undef MCR_REG 17 + #define _MMIO(x) _XE_RTP_REG(x) 18 + #define MCR_REG(x) _XE_RTP_MCR_REG(x) 19 + 20 + static bool match_not_render(const struct xe_gt *gt, 21 + const struct xe_hw_engine *hwe) 22 + { 23 + return hwe->class != XE_ENGINE_CLASS_RENDER; 24 + } 25 + 26 + static const struct xe_rtp_entry register_whitelist[] = { 27 + { XE_RTP_NAME("WaAllowPMDepthAndInvocationCountAccessFromUMD, 1408556865"), 28 + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), 29 + XE_WHITELIST_REGISTER(PS_INVOCATION_COUNT, 30 + RING_FORCE_TO_NONPRIV_ACCESS_RD | 31 + RING_FORCE_TO_NONPRIV_RANGE_4) 32 + }, 33 + { XE_RTP_NAME("1508744258, 14012131227, 1808121037"), 34 + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), 35 + XE_WHITELIST_REGISTER(GEN7_COMMON_SLICE_CHICKEN1, 0) 36 + }, 37 + { XE_RTP_NAME("1806527549"), 38 + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), 39 + XE_WHITELIST_REGISTER(HIZ_CHICKEN, 0) 40 + }, 41 + { XE_RTP_NAME("allow_read_ctx_timestamp"), 42 + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1260), FUNC(match_not_render)), 43 + XE_WHITELIST_REGISTER(RING_CTX_TIMESTAMP(0), 44 + RING_FORCE_TO_NONPRIV_ACCESS_RD, 45 + XE_RTP_FLAG(ENGINE_BASE)) 46 + }, 47 + { XE_RTP_NAME("16014440446_part_1"), 48 + XE_RTP_RULES(PLATFORM(PVC)), 49 + XE_WHITELIST_REGISTER(_MMIO(0x4400), 50 + RING_FORCE_TO_NONPRIV_DENY | 51 + RING_FORCE_TO_NONPRIV_RANGE_64) 52 + }, 53 + { XE_RTP_NAME("16014440446_part_2"), 54 + XE_RTP_RULES(PLATFORM(PVC)), 55 + XE_WHITELIST_REGISTER(_MMIO(0x4500), 56 + RING_FORCE_TO_NONPRIV_DENY | 57 + RING_FORCE_TO_NONPRIV_RANGE_64) 58 + }, 59 + {} 60 + }; 61 + 62 + /** 63 + * xe_reg_whitelist_process_engine - process table of registers to whitelist 64 + * @hwe: engine instance to process whitelist for 65 + * 66 + * Process wwhitelist table for this platform, saving in @hwe all the 67 + * registers that need to be whitelisted by the hardware so they can be accessed 68 + * by userspace. 69 + */ 70 + void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe) 71 + { 72 + xe_rtp_process(register_whitelist, &hwe->reg_whitelist, hwe->gt, hwe); 73 + }

+13

drivers/gpu/drm/xe/xe_reg_whitelist.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2023 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_REG_WHITELIST_ 7 + #define _XE_REG_WHITELIST_ 8 + 9 + struct xe_hw_engine; 10 + 11 + void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe); 12 + 13 + #endif

+226

drivers/gpu/drm/xe/xe_res_cursor.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR MIT */ 2 + /* 3 + * Copyright 2020 Advanced Micro Devices, Inc. 4 + * 5 + * Permission is hereby granted, free of charge, to any person obtaining a 6 + * copy of this software and associated documentation files (the "Software"), 7 + * to deal in the Software without restriction, including without limitation 8 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 + * and/or sell copies of the Software, and to permit persons to whom the 10 + * Software is furnished to do so, subject to the following conditions: 11 + * 12 + * The above copyright notice and this permission notice shall be included in 13 + * all copies or substantial portions of the Software. 14 + * 15 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 + * OTHER DEALINGS IN THE SOFTWARE. 22 + */ 23 + 24 + #ifndef __XE_RES_CURSOR_H__ 25 + #define __XE_RES_CURSOR_H__ 26 + 27 + #include <linux/scatterlist.h> 28 + 29 + #include <drm/drm_mm.h> 30 + #include <drm/ttm/ttm_placement.h> 31 + #include <drm/ttm/ttm_range_manager.h> 32 + #include <drm/ttm/ttm_resource.h> 33 + #include <drm/ttm/ttm_tt.h> 34 + 35 + #include "xe_bo.h" 36 + #include "xe_macros.h" 37 + #include "xe_ttm_vram_mgr.h" 38 + 39 + /* state back for walking over vram_mgr and gtt_mgr allocations */ 40 + struct xe_res_cursor { 41 + u64 start; 42 + u64 size; 43 + u64 remaining; 44 + void *node; 45 + u32 mem_type; 46 + struct scatterlist *sgl; 47 + }; 48 + 49 + /** 50 + * xe_res_first - initialize a xe_res_cursor 51 + * 52 + * @res: TTM resource object to walk 53 + * @start: Start of the range 54 + * @size: Size of the range 55 + * @cur: cursor object to initialize 56 + * 57 + * Start walking over the range of allocations between @start and @size. 58 + */ 59 + static inline void xe_res_first(struct ttm_resource *res, 60 + u64 start, u64 size, 61 + struct xe_res_cursor *cur) 62 + { 63 + struct drm_buddy_block *block; 64 + struct list_head *head, *next; 65 + 66 + cur->sgl = NULL; 67 + if (!res) 68 + goto fallback; 69 + 70 + XE_BUG_ON(start + size > res->size); 71 + 72 + cur->mem_type = res->mem_type; 73 + 74 + switch (cur->mem_type) { 75 + case XE_PL_VRAM0: 76 + case XE_PL_VRAM1: 77 + head = &to_xe_ttm_vram_mgr_resource(res)->blocks; 78 + 79 + block = list_first_entry_or_null(head, 80 + struct drm_buddy_block, 81 + link); 82 + if (!block) 83 + goto fallback; 84 + 85 + while (start >= xe_ttm_vram_mgr_block_size(block)) { 86 + start -= xe_ttm_vram_mgr_block_size(block); 87 + 88 + next = block->link.next; 89 + if (next != head) 90 + block = list_entry(next, struct drm_buddy_block, 91 + link); 92 + } 93 + 94 + cur->start = xe_ttm_vram_mgr_block_start(block) + start; 95 + cur->size = min(xe_ttm_vram_mgr_block_size(block) - start, 96 + size); 97 + cur->remaining = size; 98 + cur->node = block; 99 + break; 100 + default: 101 + goto fallback; 102 + } 103 + 104 + return; 105 + 106 + fallback: 107 + cur->start = start; 108 + cur->size = size; 109 + cur->remaining = size; 110 + cur->node = NULL; 111 + cur->mem_type = XE_PL_TT; 112 + XE_WARN_ON(res && start + size > res->size); 113 + return; 114 + } 115 + 116 + static inline void __xe_res_sg_next(struct xe_res_cursor *cur) 117 + { 118 + struct scatterlist *sgl = cur->sgl; 119 + u64 start = cur->start; 120 + 121 + while (start >= sg_dma_len(sgl)) { 122 + start -= sg_dma_len(sgl); 123 + sgl = sg_next(sgl); 124 + XE_BUG_ON(!sgl); 125 + } 126 + 127 + cur->start = start; 128 + cur->size = sg_dma_len(sgl) - start; 129 + cur->sgl = sgl; 130 + } 131 + 132 + /** 133 + * xe_res_first_sg - initialize a xe_res_cursor with a scatter gather table 134 + * 135 + * @sg: scatter gather table to walk 136 + * @start: Start of the range 137 + * @size: Size of the range 138 + * @cur: cursor object to initialize 139 + * 140 + * Start walking over the range of allocations between @start and @size. 141 + */ 142 + static inline void xe_res_first_sg(const struct sg_table *sg, 143 + u64 start, u64 size, 144 + struct xe_res_cursor *cur) 145 + { 146 + XE_BUG_ON(!sg); 147 + XE_BUG_ON(!IS_ALIGNED(start, PAGE_SIZE) || 148 + !IS_ALIGNED(size, PAGE_SIZE)); 149 + cur->node = NULL; 150 + cur->start = start; 151 + cur->remaining = size; 152 + cur->size = 0; 153 + cur->sgl = sg->sgl; 154 + cur->mem_type = XE_PL_TT; 155 + __xe_res_sg_next(cur); 156 + } 157 + 158 + /** 159 + * xe_res_next - advance the cursor 160 + * 161 + * @cur: the cursor to advance 162 + * @size: number of bytes to move forward 163 + * 164 + * Move the cursor @size bytes forwrad, walking to the next node if necessary. 165 + */ 166 + static inline void xe_res_next(struct xe_res_cursor *cur, u64 size) 167 + { 168 + struct drm_buddy_block *block; 169 + struct list_head *next; 170 + u64 start; 171 + 172 + XE_BUG_ON(size > cur->remaining); 173 + 174 + cur->remaining -= size; 175 + if (!cur->remaining) 176 + return; 177 + 178 + if (cur->size > size) { 179 + cur->size -= size; 180 + cur->start += size; 181 + return; 182 + } 183 + 184 + if (cur->sgl) { 185 + cur->start += size; 186 + __xe_res_sg_next(cur); 187 + return; 188 + } 189 + 190 + switch (cur->mem_type) { 191 + case XE_PL_VRAM0: 192 + case XE_PL_VRAM1: 193 + start = size - cur->size; 194 + block = cur->node; 195 + 196 + next = block->link.next; 197 + block = list_entry(next, struct drm_buddy_block, link); 198 + 199 + 200 + while (start >= xe_ttm_vram_mgr_block_size(block)) { 201 + start -= xe_ttm_vram_mgr_block_size(block); 202 + 203 + next = block->link.next; 204 + block = list_entry(next, struct drm_buddy_block, link); 205 + } 206 + 207 + cur->start = xe_ttm_vram_mgr_block_start(block) + start; 208 + cur->size = min(xe_ttm_vram_mgr_block_size(block) - start, 209 + cur->remaining); 210 + cur->node = block; 211 + break; 212 + default: 213 + return; 214 + } 215 + } 216 + 217 + /** 218 + * xe_res_dma - return dma address of cursor at current position 219 + * 220 + * @cur: the cursor to return the dma address from 221 + */ 222 + static inline u64 xe_res_dma(const struct xe_res_cursor *cur) 223 + { 224 + return cur->sgl ? sg_dma_address(cur->sgl) + cur->start : cur->start; 225 + } 226 + #endif

+373

drivers/gpu/drm/xe/xe_ring_ops.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include "xe_engine_types.h" 7 + #include "xe_gt.h" 8 + #include "xe_lrc.h" 9 + #include "xe_macros.h" 10 + #include "xe_ring_ops.h" 11 + #include "xe_sched_job.h" 12 + #include "xe_vm_types.h" 13 + 14 + #include "i915_reg.h" 15 + #include "gt/intel_gpu_commands.h" 16 + #include "gt/intel_gt_regs.h" 17 + #include "gt/intel_lrc_reg.h" 18 + 19 + static u32 preparser_disable(bool state) 20 + { 21 + return MI_ARB_CHECK | BIT(8) | state; 22 + } 23 + 24 + static int emit_aux_table_inv(struct xe_gt *gt, u32 addr, u32 *dw, int i) 25 + { 26 + dw[i++] = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN; 27 + dw[i++] = addr + gt->mmio.adj_offset; 28 + dw[i++] = AUX_INV; 29 + dw[i++] = MI_NOOP; 30 + 31 + return i; 32 + } 33 + 34 + static int emit_user_interrupt(u32 *dw, int i) 35 + { 36 + dw[i++] = MI_USER_INTERRUPT; 37 + dw[i++] = MI_ARB_ON_OFF | MI_ARB_ENABLE; 38 + dw[i++] = MI_ARB_CHECK; 39 + 40 + return i; 41 + } 42 + 43 + static int emit_store_imm_ggtt(u32 addr, u32 value, u32 *dw, int i) 44 + { 45 + dw[i++] = MI_STORE_DATA_IMM | BIT(22) /* GGTT */ | 2; 46 + dw[i++] = addr; 47 + dw[i++] = 0; 48 + dw[i++] = value; 49 + 50 + return i; 51 + } 52 + 53 + static int emit_flush_imm_ggtt(u32 addr, u32 value, u32 *dw, int i) 54 + { 55 + dw[i++] = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; 56 + dw[i++] = addr | MI_FLUSH_DW_USE_GTT; 57 + dw[i++] = 0; 58 + dw[i++] = value; 59 + 60 + return i; 61 + } 62 + 63 + static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i) 64 + { 65 + dw[i++] = MI_BATCH_BUFFER_START_GEN8 | ppgtt_flag; 66 + dw[i++] = lower_32_bits(batch_addr); 67 + dw[i++] = upper_32_bits(batch_addr); 68 + 69 + return i; 70 + } 71 + 72 + static int emit_flush_invalidate(u32 flag, u32 *dw, int i) 73 + { 74 + dw[i] = MI_FLUSH_DW + 1; 75 + dw[i] |= flag; 76 + dw[i++] |= MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW | 77 + MI_FLUSH_DW_STORE_INDEX; 78 + 79 + dw[i++] = LRC_PPHWSP_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; 80 + dw[i++] = 0; 81 + dw[i++] = ~0U; 82 + 83 + return i; 84 + } 85 + 86 + static int emit_pipe_invalidate(u32 mask_flags, u32 *dw, int i) 87 + { 88 + u32 flags = PIPE_CONTROL_CS_STALL | 89 + PIPE_CONTROL_COMMAND_CACHE_INVALIDATE | 90 + PIPE_CONTROL_TLB_INVALIDATE | 91 + PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE | 92 + PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | 93 + PIPE_CONTROL_VF_CACHE_INVALIDATE | 94 + PIPE_CONTROL_CONST_CACHE_INVALIDATE | 95 + PIPE_CONTROL_STATE_CACHE_INVALIDATE | 96 + PIPE_CONTROL_QW_WRITE | 97 + PIPE_CONTROL_STORE_DATA_INDEX; 98 + 99 + flags &= ~mask_flags; 100 + 101 + dw[i++] = GFX_OP_PIPE_CONTROL(6); 102 + dw[i++] = flags; 103 + dw[i++] = LRC_PPHWSP_SCRATCH_ADDR; 104 + dw[i++] = 0; 105 + dw[i++] = 0; 106 + dw[i++] = 0; 107 + 108 + return i; 109 + } 110 + 111 + #define MI_STORE_QWORD_IMM_GEN8_POSTED (MI_INSTR(0x20, 3) | (1 << 21)) 112 + 113 + static int emit_store_imm_ppgtt_posted(u64 addr, u64 value, 114 + u32 *dw, int i) 115 + { 116 + dw[i++] = MI_STORE_QWORD_IMM_GEN8_POSTED; 117 + dw[i++] = lower_32_bits(addr); 118 + dw[i++] = upper_32_bits(addr); 119 + dw[i++] = lower_32_bits(value); 120 + dw[i++] = upper_32_bits(value); 121 + 122 + return i; 123 + } 124 + 125 + static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw, 126 + int i) 127 + { 128 + dw[i++] = GFX_OP_PIPE_CONTROL(6); 129 + dw[i++] = (stall_only ? PIPE_CONTROL_CS_STALL : 130 + PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_CS_STALL) | 131 + PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_QW_WRITE; 132 + dw[i++] = addr; 133 + dw[i++] = 0; 134 + dw[i++] = value; 135 + dw[i++] = 0; /* We're thrashing one extra dword. */ 136 + 137 + return i; 138 + } 139 + 140 + static u32 get_ppgtt_flag(struct xe_sched_job *job) 141 + { 142 + return !(job->engine->flags & ENGINE_FLAG_WA) ? BIT(8) : 0; 143 + } 144 + 145 + static void __emit_job_gen12_copy(struct xe_sched_job *job, struct xe_lrc *lrc, 146 + u64 batch_addr, u32 seqno) 147 + { 148 + u32 dw[MAX_JOB_SIZE_DW], i = 0; 149 + u32 ppgtt_flag = get_ppgtt_flag(job); 150 + 151 + /* XXX: Conditional flushing possible */ 152 + dw[i++] = preparser_disable(true); 153 + i = emit_flush_invalidate(0, dw, i); 154 + dw[i++] = preparser_disable(false); 155 + 156 + i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), 157 + seqno, dw, i); 158 + 159 + i = emit_bb_start(batch_addr, ppgtt_flag, dw, i); 160 + 161 + if (job->user_fence.used) 162 + i = emit_store_imm_ppgtt_posted(job->user_fence.addr, 163 + job->user_fence.value, 164 + dw, i); 165 + 166 + i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, dw, i); 167 + 168 + i = emit_user_interrupt(dw, i); 169 + 170 + XE_BUG_ON(i > MAX_JOB_SIZE_DW); 171 + 172 + xe_lrc_write_ring(lrc, dw, i * sizeof(*dw)); 173 + } 174 + 175 + static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, 176 + u64 batch_addr, u32 seqno) 177 + { 178 + u32 dw[MAX_JOB_SIZE_DW], i = 0; 179 + u32 ppgtt_flag = get_ppgtt_flag(job); 180 + struct xe_gt *gt = job->engine->gt; 181 + struct xe_device *xe = gt_to_xe(gt); 182 + bool decode = job->engine->class == XE_ENGINE_CLASS_VIDEO_DECODE; 183 + 184 + /* XXX: Conditional flushing possible */ 185 + dw[i++] = preparser_disable(true); 186 + i = emit_flush_invalidate(decode ? MI_INVALIDATE_BSD : 0, dw, i); 187 + /* Wa_1809175790 */ 188 + if (!xe->info.has_flat_ccs) { 189 + if (decode) 190 + i = emit_aux_table_inv(gt, GEN12_VD0_AUX_INV.reg, dw, i); 191 + else 192 + i = emit_aux_table_inv(gt, GEN12_VE0_AUX_INV.reg, dw, i); 193 + } 194 + dw[i++] = preparser_disable(false); 195 + 196 + i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), 197 + seqno, dw, i); 198 + 199 + i = emit_bb_start(batch_addr, ppgtt_flag, dw, i); 200 + 201 + if (job->user_fence.used) 202 + i = emit_store_imm_ppgtt_posted(job->user_fence.addr, 203 + job->user_fence.value, 204 + dw, i); 205 + 206 + i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, dw, i); 207 + 208 + i = emit_user_interrupt(dw, i); 209 + 210 + XE_BUG_ON(i > MAX_JOB_SIZE_DW); 211 + 212 + xe_lrc_write_ring(lrc, dw, i * sizeof(*dw)); 213 + } 214 + 215 + /* 216 + * 3D-related flags that can't be set on _engines_ that lack access to the 3D 217 + * pipeline (i.e., CCS engines). 218 + */ 219 + #define PIPE_CONTROL_3D_ENGINE_FLAGS (\ 220 + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | \ 221 + PIPE_CONTROL_DEPTH_CACHE_FLUSH | \ 222 + PIPE_CONTROL_TILE_CACHE_FLUSH | \ 223 + PIPE_CONTROL_DEPTH_STALL | \ 224 + PIPE_CONTROL_STALL_AT_SCOREBOARD | \ 225 + PIPE_CONTROL_PSD_SYNC | \ 226 + PIPE_CONTROL_AMFS_FLUSH | \ 227 + PIPE_CONTROL_VF_CACHE_INVALIDATE | \ 228 + PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET) 229 + 230 + /* 3D-related flags that can't be set on _platforms_ that lack a 3D pipeline */ 231 + #define PIPE_CONTROL_3D_ARCH_FLAGS ( \ 232 + PIPE_CONTROL_3D_ENGINE_FLAGS | \ 233 + PIPE_CONTROL_INDIRECT_STATE_DISABLE | \ 234 + PIPE_CONTROL_FLUSH_ENABLE | \ 235 + PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \ 236 + PIPE_CONTROL_DC_FLUSH_ENABLE) 237 + 238 + static void __emit_job_gen12_render_compute(struct xe_sched_job *job, 239 + struct xe_lrc *lrc, 240 + u64 batch_addr, u32 seqno) 241 + { 242 + u32 dw[MAX_JOB_SIZE_DW], i = 0; 243 + u32 ppgtt_flag = get_ppgtt_flag(job); 244 + struct xe_gt *gt = job->engine->gt; 245 + struct xe_device *xe = gt_to_xe(gt); 246 + bool pvc = xe->info.platform == XE_PVC; 247 + u32 mask_flags = 0; 248 + 249 + /* XXX: Conditional flushing possible */ 250 + dw[i++] = preparser_disable(true); 251 + if (pvc) 252 + mask_flags = PIPE_CONTROL_3D_ARCH_FLAGS; 253 + else if (job->engine->class == XE_ENGINE_CLASS_COMPUTE) 254 + mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS; 255 + i = emit_pipe_invalidate(mask_flags, dw, i); 256 + /* Wa_1809175790 */ 257 + if (!xe->info.has_flat_ccs) 258 + i = emit_aux_table_inv(gt, GEN12_CCS_AUX_INV.reg, dw, i); 259 + dw[i++] = preparser_disable(false); 260 + 261 + i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), 262 + seqno, dw, i); 263 + 264 + i = emit_bb_start(batch_addr, ppgtt_flag, dw, i); 265 + 266 + if (job->user_fence.used) 267 + i = emit_store_imm_ppgtt_posted(job->user_fence.addr, 268 + job->user_fence.value, 269 + dw, i); 270 + 271 + i = emit_pipe_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, pvc, dw, i); 272 + 273 + i = emit_user_interrupt(dw, i); 274 + 275 + XE_BUG_ON(i > MAX_JOB_SIZE_DW); 276 + 277 + xe_lrc_write_ring(lrc, dw, i * sizeof(*dw)); 278 + } 279 + 280 + static void emit_migration_job_gen12(struct xe_sched_job *job, 281 + struct xe_lrc *lrc, u32 seqno) 282 + { 283 + u32 dw[MAX_JOB_SIZE_DW], i = 0; 284 + 285 + i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), 286 + seqno, dw, i); 287 + 288 + i = emit_bb_start(job->batch_addr[0], BIT(8), dw, i); 289 + 290 + dw[i++] = preparser_disable(true); 291 + i = emit_flush_invalidate(0, dw, i); 292 + dw[i++] = preparser_disable(false); 293 + 294 + i = emit_bb_start(job->batch_addr[1], BIT(8), dw, i); 295 + 296 + dw[i++] = (MI_FLUSH_DW | MI_INVALIDATE_TLB | job->migrate_flush_flags | 297 + MI_FLUSH_DW_OP_STOREDW) + 1; 298 + dw[i++] = xe_lrc_seqno_ggtt_addr(lrc) | MI_FLUSH_DW_USE_GTT; 299 + dw[i++] = 0; 300 + dw[i++] = seqno; /* value */ 301 + 302 + i = emit_user_interrupt(dw, i); 303 + 304 + XE_BUG_ON(i > MAX_JOB_SIZE_DW); 305 + 306 + xe_lrc_write_ring(lrc, dw, i * sizeof(*dw)); 307 + } 308 + 309 + static void emit_job_gen12_copy(struct xe_sched_job *job) 310 + { 311 + int i; 312 + 313 + if (xe_sched_job_is_migration(job->engine)) { 314 + emit_migration_job_gen12(job, job->engine->lrc, 315 + xe_sched_job_seqno(job)); 316 + return; 317 + } 318 + 319 + for (i = 0; i < job->engine->width; ++i) 320 + __emit_job_gen12_copy(job, job->engine->lrc + i, 321 + job->batch_addr[i], 322 + xe_sched_job_seqno(job)); 323 + } 324 + 325 + static void emit_job_gen12_video(struct xe_sched_job *job) 326 + { 327 + int i; 328 + 329 + /* FIXME: Not doing parallel handshake for now */ 330 + for (i = 0; i < job->engine->width; ++i) 331 + __emit_job_gen12_video(job, job->engine->lrc + i, 332 + job->batch_addr[i], 333 + xe_sched_job_seqno(job)); 334 + } 335 + 336 + static void emit_job_gen12_render_compute(struct xe_sched_job *job) 337 + { 338 + int i; 339 + 340 + for (i = 0; i < job->engine->width; ++i) 341 + __emit_job_gen12_render_compute(job, job->engine->lrc + i, 342 + job->batch_addr[i], 343 + xe_sched_job_seqno(job)); 344 + } 345 + 346 + static const struct xe_ring_ops ring_ops_gen12_copy = { 347 + .emit_job = emit_job_gen12_copy, 348 + }; 349 + 350 + static const struct xe_ring_ops ring_ops_gen12_video = { 351 + .emit_job = emit_job_gen12_video, 352 + }; 353 + 354 + static const struct xe_ring_ops ring_ops_gen12_render_compute = { 355 + .emit_job = emit_job_gen12_render_compute, 356 + }; 357 + 358 + const struct xe_ring_ops * 359 + xe_ring_ops_get(struct xe_gt *gt, enum xe_engine_class class) 360 + { 361 + switch (class) { 362 + case XE_ENGINE_CLASS_COPY: 363 + return &ring_ops_gen12_copy; 364 + case XE_ENGINE_CLASS_VIDEO_DECODE: 365 + case XE_ENGINE_CLASS_VIDEO_ENHANCE: 366 + return &ring_ops_gen12_video; 367 + case XE_ENGINE_CLASS_RENDER: 368 + case XE_ENGINE_CLASS_COMPUTE: 369 + return &ring_ops_gen12_render_compute; 370 + default: 371 + return NULL; 372 + } 373 + }

+17

drivers/gpu/drm/xe/xe_ring_ops.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_RING_OPS_H_ 7 + #define _XE_RING_OPS_H_ 8 + 9 + #include "xe_hw_engine_types.h" 10 + #include "xe_ring_ops_types.h" 11 + 12 + struct xe_gt; 13 + 14 + const struct xe_ring_ops * 15 + xe_ring_ops_get(struct xe_gt *gt, enum xe_engine_class class); 16 + 17 + #endif

+22

drivers/gpu/drm/xe/xe_ring_ops_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_RING_OPS_TYPES_H_ 7 + #define _XE_RING_OPS_TYPES_H_ 8 + 9 + struct xe_sched_job; 10 + 11 + #define MAX_JOB_SIZE_DW 48 12 + #define MAX_JOB_SIZE_BYTES (MAX_JOB_SIZE_DW * 4) 13 + 14 + /** 15 + * struct xe_ring_ops - Ring operations 16 + */ 17 + struct xe_ring_ops { 18 + /** @emit_job: Write job to ring */ 19 + void (*emit_job)(struct xe_sched_job *job); 20 + }; 21 + 22 + #endif

+144

drivers/gpu/drm/xe/xe_rtp.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include "xe_rtp.h" 7 + 8 + #include <drm/xe_drm.h> 9 + 10 + #include "xe_gt.h" 11 + #include "xe_macros.h" 12 + #include "xe_reg_sr.h" 13 + 14 + /** 15 + * DOC: Register Table Processing 16 + * 17 + * Internal infrastructure to define how registers should be updated based on 18 + * rules and actions. This can be used to define tables with multiple entries 19 + * (one per register) that will be walked over at some point in time to apply 20 + * the values to the registers that have matching rules. 21 + */ 22 + 23 + static bool rule_matches(struct xe_gt *gt, 24 + struct xe_hw_engine *hwe, 25 + const struct xe_rtp_entry *entry) 26 + { 27 + const struct xe_device *xe = gt_to_xe(gt); 28 + const struct xe_rtp_rule *r; 29 + unsigned int i; 30 + bool match; 31 + 32 + for (r = entry->rules, i = 0; i < entry->n_rules; 33 + r = &entry->rules[++i]) { 34 + switch (r->match_type) { 35 + case XE_RTP_MATCH_PLATFORM: 36 + match = xe->info.platform == r->platform; 37 + break; 38 + case XE_RTP_MATCH_SUBPLATFORM: 39 + match = xe->info.platform == r->platform && 40 + xe->info.subplatform == r->subplatform; 41 + break; 42 + case XE_RTP_MATCH_GRAPHICS_VERSION: 43 + /* TODO: match display */ 44 + match = xe->info.graphics_verx100 == r->ver_start; 45 + break; 46 + case XE_RTP_MATCH_GRAPHICS_VERSION_RANGE: 47 + match = xe->info.graphics_verx100 >= r->ver_start && 48 + xe->info.graphics_verx100 <= r->ver_end; 49 + break; 50 + case XE_RTP_MATCH_MEDIA_VERSION: 51 + match = xe->info.media_verx100 == r->ver_start; 52 + break; 53 + case XE_RTP_MATCH_MEDIA_VERSION_RANGE: 54 + match = xe->info.media_verx100 >= r->ver_start && 55 + xe->info.media_verx100 <= r->ver_end; 56 + break; 57 + case XE_RTP_MATCH_STEP: 58 + /* TODO: match media/display */ 59 + match = xe->info.step.graphics >= r->step_start && 60 + xe->info.step.graphics < r->step_end; 61 + break; 62 + case XE_RTP_MATCH_ENGINE_CLASS: 63 + match = hwe->class == r->engine_class; 64 + break; 65 + case XE_RTP_MATCH_NOT_ENGINE_CLASS: 66 + match = hwe->class != r->engine_class; 67 + break; 68 + case XE_RTP_MATCH_FUNC: 69 + match = r->match_func(gt, hwe); 70 + break; 71 + case XE_RTP_MATCH_INTEGRATED: 72 + match = !xe->info.is_dgfx; 73 + break; 74 + case XE_RTP_MATCH_DISCRETE: 75 + match = xe->info.is_dgfx; 76 + break; 77 + 78 + default: 79 + XE_WARN_ON(r->match_type); 80 + } 81 + 82 + if (!match) 83 + return false; 84 + } 85 + 86 + return true; 87 + } 88 + 89 + static void rtp_add_sr_entry(const struct xe_rtp_entry *entry, 90 + struct xe_gt *gt, 91 + u32 mmio_base, 92 + struct xe_reg_sr *sr) 93 + { 94 + u32 reg = entry->regval.reg + mmio_base; 95 + struct xe_reg_sr_entry sr_entry = { 96 + .clr_bits = entry->regval.clr_bits, 97 + .set_bits = entry->regval.set_bits, 98 + .read_mask = entry->regval.read_mask, 99 + .masked_reg = entry->regval.flags & XE_RTP_FLAG_MASKED_REG, 100 + .reg_type = entry->regval.reg_type, 101 + }; 102 + 103 + xe_reg_sr_add(sr, reg, &sr_entry); 104 + } 105 + 106 + /** 107 + * xe_rtp_process - Process all rtp @entries, adding the matching ones to @sr 108 + * @entries: Table with RTP definitions 109 + * @sr: Where to add an entry to with the values for matching. This can be 110 + * viewed as the "coalesced view" of multiple the tables. The bits for each 111 + * register set are expected not to collide with previously added entries 112 + * @gt: The GT to be used for matching rules 113 + * @hwe: Engine instance to use for matching rules and as mmio base 114 + * 115 + * Walk the table pointed by @entries (with an empty sentinel) and add all 116 + * entries with matching rules to @sr. If @hwe is not NULL, its mmio_base is 117 + * used to calculate the right register offset 118 + */ 119 + void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr, 120 + struct xe_gt *gt, struct xe_hw_engine *hwe) 121 + { 122 + const struct xe_rtp_entry *entry; 123 + 124 + for (entry = entries; entry && entry->name; entry++) { 125 + u32 mmio_base = 0; 126 + 127 + if (entry->regval.flags & XE_RTP_FLAG_FOREACH_ENGINE) { 128 + struct xe_hw_engine *each_hwe; 129 + enum xe_hw_engine_id id; 130 + 131 + for_each_hw_engine(each_hwe, gt, id) { 132 + mmio_base = each_hwe->mmio_base; 133 + 134 + if (rule_matches(gt, each_hwe, entry)) 135 + rtp_add_sr_entry(entry, gt, mmio_base, sr); 136 + } 137 + } else if (rule_matches(gt, hwe, entry)) { 138 + if (entry->regval.flags & XE_RTP_FLAG_ENGINE_BASE) 139 + mmio_base = hwe->mmio_base; 140 + 141 + rtp_add_sr_entry(entry, gt, mmio_base, sr); 142 + } 143 + } 144 + }

+340

drivers/gpu/drm/xe/xe_rtp.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_RTP_ 7 + #define _XE_RTP_ 8 + 9 + #include <linux/xarray.h> 10 + #include <linux/types.h> 11 + 12 + #include "xe_rtp_types.h" 13 + 14 + #include "i915_reg_defs.h" 15 + 16 + /* 17 + * Register table poke infrastructure 18 + */ 19 + 20 + struct xe_hw_engine; 21 + struct xe_gt; 22 + struct xe_reg_sr; 23 + 24 + /* 25 + * Helper macros - not to be used outside this header. 26 + */ 27 + /* This counts to 12. Any more, it will return 13th argument. */ 28 + #define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n 29 + #define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) 30 + 31 + #define __CONCAT(a, b) a ## b 32 + #define CONCATENATE(a, b) __CONCAT(a, b) 33 + 34 + #define __CALL_FOR_EACH_1(MACRO_, x, ...) MACRO_(x) 35 + #define __CALL_FOR_EACH_2(MACRO_, x, ...) \ 36 + MACRO_(x) __CALL_FOR_EACH_1(MACRO_, ##__VA_ARGS__) 37 + #define __CALL_FOR_EACH_3(MACRO_, x, ...) \ 38 + MACRO_(x) __CALL_FOR_EACH_2(MACRO_, ##__VA_ARGS__) 39 + #define __CALL_FOR_EACH_4(MACRO_, x, ...) \ 40 + MACRO_(x) __CALL_FOR_EACH_3(MACRO_, ##__VA_ARGS__) 41 + 42 + #define _CALL_FOR_EACH(NARGS_, MACRO_, x, ...) \ 43 + CONCATENATE(__CALL_FOR_EACH_, NARGS_)(MACRO_, x, ##__VA_ARGS__) 44 + #define CALL_FOR_EACH(MACRO_, x, ...) \ 45 + _CALL_FOR_EACH(COUNT_ARGS(x, ##__VA_ARGS__), MACRO_, x, ##__VA_ARGS__) 46 + 47 + #define _XE_RTP_REG(x_) (x_), \ 48 + .reg_type = XE_RTP_REG_REGULAR 49 + #define _XE_RTP_MCR_REG(x_) (x_), \ 50 + .reg_type = XE_RTP_REG_MCR 51 + 52 + /* 53 + * Helper macros for concatenating prefix - do not use them directly outside 54 + * this header 55 + */ 56 + #define __ADD_XE_RTP_FLAG_PREFIX(x) CONCATENATE(XE_RTP_FLAG_, x) | 57 + #define __ADD_XE_RTP_RULE_PREFIX(x) CONCATENATE(XE_RTP_RULE_, x) , 58 + 59 + /* 60 + * Macros to encode rules to match against platform, IP version, stepping, etc. 61 + * Shouldn't be used directly - see XE_RTP_RULES() 62 + */ 63 + 64 + #define _XE_RTP_RULE_PLATFORM(plat__) \ 65 + { .match_type = XE_RTP_MATCH_PLATFORM, .platform = plat__ } 66 + 67 + #define _XE_RTP_RULE_SUBPLATFORM(plat__, sub__) \ 68 + { .match_type = XE_RTP_MATCH_SUBPLATFORM, \ 69 + .platform = plat__, .subplatform = sub__ } 70 + 71 + #define _XE_RTP_RULE_STEP(start__, end__) \ 72 + { .match_type = XE_RTP_MATCH_STEP, \ 73 + .step_start = start__, .step_end = end__ } 74 + 75 + #define _XE_RTP_RULE_ENGINE_CLASS(cls__) \ 76 + { .match_type = XE_RTP_MATCH_ENGINE_CLASS, \ 77 + .engine_class = (cls__) } 78 + 79 + /** 80 + * XE_RTP_RULE_PLATFORM - Create rule matching platform 81 + * @plat_: platform to match 82 + * 83 + * Refer to XE_RTP_RULES() for expected usage. 84 + */ 85 + #define XE_RTP_RULE_PLATFORM(plat_) \ 86 + _XE_RTP_RULE_PLATFORM(XE_##plat_) 87 + 88 + /** 89 + * XE_RTP_RULE_SUBPLATFORM - Create rule matching platform and sub-platform 90 + * @plat_: platform to match 91 + * @sub_: sub-platform to match 92 + * 93 + * Refer to XE_RTP_RULES() for expected usage. 94 + */ 95 + #define XE_RTP_RULE_SUBPLATFORM(plat_, sub_) \ 96 + _XE_RTP_RULE_SUBPLATFORM(XE_##plat_, XE_SUBPLATFORM_##plat_##_##sub_) 97 + 98 + /** 99 + * XE_RTP_RULE_STEP - Create rule matching platform stepping 100 + * @start_: First stepping matching the rule 101 + * @end_: First stepping that does not match the rule 102 + * 103 + * Note that the range matching this rule [ @start_, @end_ ), i.e. inclusive on 104 + * the left, exclusive on the right. 105 + * 106 + * Refer to XE_RTP_RULES() for expected usage. 107 + */ 108 + #define XE_RTP_RULE_STEP(start_, end_) \ 109 + _XE_RTP_RULE_STEP(STEP_##start_, STEP_##end_) 110 + 111 + /** 112 + * XE_RTP_RULE_ENGINE_CLASS - Create rule matching an engine class 113 + * @cls_: Engine class to match 114 + * 115 + * Refer to XE_RTP_RULES() for expected usage. 116 + */ 117 + #define XE_RTP_RULE_ENGINE_CLASS(cls_) \ 118 + _XE_RTP_RULE_ENGINE_CLASS(XE_ENGINE_CLASS_##cls_) 119 + 120 + /** 121 + * XE_RTP_RULE_FUNC - Create rule using callback function for match 122 + * @func__: Function to call to decide if rule matches 123 + * 124 + * This allows more complex checks to be performed. The ``XE_RTP`` 125 + * infrastructure will simply call the function @func_ passed to decide if this 126 + * rule matches the device. 127 + * 128 + * Refer to XE_RTP_RULES() for expected usage. 129 + */ 130 + #define XE_RTP_RULE_FUNC(func__) \ 131 + { .match_type = XE_RTP_MATCH_FUNC, \ 132 + .match_func = (func__) } 133 + 134 + /** 135 + * XE_RTP_RULE_GRAPHICS_VERSION - Create rule matching graphics version 136 + * @ver__: Graphics IP version to match 137 + * 138 + * Refer to XE_RTP_RULES() for expected usage. 139 + */ 140 + #define XE_RTP_RULE_GRAPHICS_VERSION(ver__) \ 141 + { .match_type = XE_RTP_MATCH_GRAPHICS_VERSION, \ 142 + .ver_start = ver__, } 143 + 144 + /** 145 + * XE_RTP_RULE_GRAPHICS_VERSION_RANGE - Create rule matching a range of graphics version 146 + * @ver_start__: First graphics IP version to match 147 + * @ver_end__: Last graphics IP version to match 148 + * 149 + * Note that the range matching this rule is [ @ver_start__, @ver_end__ ], i.e. 150 + * inclusive on boths sides 151 + * 152 + * Refer to XE_RTP_RULES() for expected usage. 153 + */ 154 + #define XE_RTP_RULE_GRAPHICS_VERSION_RANGE(ver_start__, ver_end__) \ 155 + { .match_type = XE_RTP_MATCH_GRAPHICS_VERSION_RANGE, \ 156 + .ver_start = ver_start__, .ver_end = ver_end__, } 157 + 158 + /** 159 + * XE_RTP_RULE_MEDIA_VERSION - Create rule matching media version 160 + * @ver__: Graphics IP version to match 161 + * 162 + * Refer to XE_RTP_RULES() for expected usage. 163 + */ 164 + #define XE_RTP_RULE_MEDIA_VERSION(ver__) \ 165 + { .match_type = XE_RTP_MATCH_MEDIA_VERSION, \ 166 + .ver_start = ver__, } 167 + 168 + /** 169 + * XE_RTP_RULE_MEDIA_VERSION_RANGE - Create rule matching a range of media version 170 + * @ver_start__: First media IP version to match 171 + * @ver_end__: Last media IP version to match 172 + * 173 + * Note that the range matching this rule is [ @ver_start__, @ver_end__ ], i.e. 174 + * inclusive on boths sides 175 + * 176 + * Refer to XE_RTP_RULES() for expected usage. 177 + */ 178 + #define XE_RTP_RULE_MEDIA_VERSION_RANGE(ver_start__, ver_end__) \ 179 + { .match_type = XE_RTP_MATCH_MEDIA_VERSION_RANGE, \ 180 + .ver_start = ver_start__, .ver_end = ver_end__, } 181 + 182 + /** 183 + * XE_RTP_RULE_IS_INTEGRATED - Create a rule matching integrated graphics devices 184 + * 185 + * Refer to XE_RTP_RULES() for expected usage. 186 + */ 187 + #define XE_RTP_RULE_IS_INTEGRATED \ 188 + { .match_type = XE_RTP_MATCH_INTEGRATED } 189 + 190 + /** 191 + * XE_RTP_RULE_IS_DISCRETE - Create a rule matching discrete graphics devices 192 + * 193 + * Refer to XE_RTP_RULES() for expected usage. 194 + */ 195 + #define XE_RTP_RULE_IS_DISCRETE \ 196 + { .match_type = XE_RTP_MATCH_DISCRETE } 197 + 198 + /** 199 + * XE_RTP_WR - Helper to write a value to the register, overriding all the bits 200 + * @reg_: Register 201 + * @val_: Value to set 202 + * @...: Additional fields to override in the struct xe_rtp_regval entry 203 + * 204 + * The correspondent notation in bspec is: 205 + * 206 + * REGNAME = VALUE 207 + */ 208 + #define XE_RTP_WR(reg_, val_, ...) \ 209 + .regval = { .reg = reg_, .clr_bits = ~0u, .set_bits = (val_), \ 210 + .read_mask = (~0u), ##__VA_ARGS__ } 211 + 212 + /** 213 + * XE_RTP_SET - Set bits from @val_ in the register. 214 + * @reg_: Register 215 + * @val_: Bits to set in the register 216 + * @...: Additional fields to override in the struct xe_rtp_regval entry 217 + * 218 + * For masked registers this translates to a single write, while for other 219 + * registers it's a RMW. The correspondent bspec notation is (example for bits 2 220 + * and 5, but could be any): 221 + * 222 + * REGNAME[2] = 1 223 + * REGNAME[5] = 1 224 + */ 225 + #define XE_RTP_SET(reg_, val_, ...) \ 226 + .regval = { .reg = reg_, .clr_bits = (val_), .set_bits = (val_), \ 227 + .read_mask = (val_), ##__VA_ARGS__ } 228 + 229 + /** 230 + * XE_RTP_CLR: Clear bits from @val_ in the register. 231 + * @reg_: Register 232 + * @val_: Bits to clear in the register 233 + * @...: Additional fields to override in the struct xe_rtp_regval entry 234 + * 235 + * For masked registers this translates to a single write, while for other 236 + * registers it's a RMW. The correspondent bspec notation is (example for bits 2 237 + * and 5, but could be any): 238 + * 239 + * REGNAME[2] = 0 240 + * REGNAME[5] = 0 241 + */ 242 + #define XE_RTP_CLR(reg_, val_, ...) \ 243 + .regval = { .reg = reg_, .clr_bits = (val_), .set_bits = 0, \ 244 + .read_mask = (val_), ##__VA_ARGS__ } 245 + 246 + /** 247 + * XE_RTP_FIELD_SET: Set a bit range, defined by @mask_bits_, to the value in 248 + * @reg_: Register 249 + * @mask_bits_: Mask of bits to be changed in the register, forming a field 250 + * @val_: Value to set in the field denoted by @mask_bits_ 251 + * @...: Additional fields to override in the struct xe_rtp_regval entry 252 + * 253 + * For masked registers this translates to a single write, while for other 254 + * registers it's a RMW. The correspondent bspec notation is: 255 + * 256 + * REGNAME[<end>:<start>] = VALUE 257 + */ 258 + #define XE_RTP_FIELD_SET(reg_, mask_bits_, val_, ...) \ 259 + .regval = { .reg = reg_, .clr_bits = (mask_bits_), .set_bits = (val_),\ 260 + .read_mask = (mask_bits_), ##__VA_ARGS__ } 261 + 262 + #define XE_RTP_FIELD_SET_NO_READ_MASK(reg_, mask_bits_, val_, ...) \ 263 + .regval = { .reg = reg_, .clr_bits = (mask_bits_), .set_bits = (val_),\ 264 + .read_mask = 0, ##__VA_ARGS__ } 265 + 266 + /** 267 + * XE_WHITELIST_REGISTER - Add register to userspace whitelist 268 + * @reg_: Register 269 + * @flags_: Whitelist-specific flags to set 270 + * @...: Additional fields to override in the struct xe_rtp_regval entry 271 + * 272 + * Add a register to the whitelist, allowing userspace to modify the ster with 273 + * regular user privileges. 274 + */ 275 + #define XE_WHITELIST_REGISTER(reg_, flags_, ...) \ 276 + /* TODO fail build if ((flags) & ~(RING_FORCE_TO_NONPRIV_MASK_VALID)) */\ 277 + .regval = { .reg = reg_, .set_bits = (flags_), \ 278 + .clr_bits = RING_FORCE_TO_NONPRIV_MASK_VALID, \ 279 + ##__VA_ARGS__ } 280 + 281 + /** 282 + * XE_RTP_NAME - Helper to set the name in xe_rtp_entry 283 + * @s_: Name describing this rule, often a HW-specific number 284 + * 285 + * TODO: maybe move this behind a debug config? 286 + */ 287 + #define XE_RTP_NAME(s_) .name = (s_) 288 + 289 + /** 290 + * XE_RTP_FLAG - Helper to add multiple flags to a struct xe_rtp_regval entry 291 + * @f1_: Last part of a ``XE_RTP_FLAG_*`` 292 + * @...: Additional flags, defined like @f1_ 293 + * 294 + * Helper to automatically add a ``XE_RTP_FLAG_`` prefix to @f1_ so it can be 295 + * easily used to define struct xe_rtp_regval entries. Example: 296 + * 297 + * .. code-block:: c 298 + * 299 + * const struct xe_rtp_entry wa_entries[] = { 300 + * ... 301 + * { XE_RTP_NAME("test-entry"), 302 + * XE_RTP_FLAG(FOREACH_ENGINE, MASKED_REG), 303 + * ... 304 + * }, 305 + * ... 306 + * }; 307 + */ 308 + #define XE_RTP_FLAG(f1_, ...) \ 309 + .flags = (CALL_FOR_EACH(__ADD_XE_RTP_FLAG_PREFIX, f1_, ##__VA_ARGS__) 0) 310 + 311 + /** 312 + * XE_RTP_RULES - Helper to set multiple rules to a struct xe_rtp_entry entry 313 + * @r1: Last part of XE_RTP_MATCH_* 314 + * @...: Additional rules, defined like @r1 315 + * 316 + * At least one rule is needed and up to 4 are supported. Multiple rules are 317 + * AND'ed together, i.e. all the rules must evaluate to true for the entry to 318 + * be processed. See XE_RTP_MATCH_* for the possible match rules. Example: 319 + * 320 + * .. code-block:: c 321 + * 322 + * const struct xe_rtp_entry wa_entries[] = { 323 + * ... 324 + * { XE_RTP_NAME("test-entry"), 325 + * XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), 326 + * ... 327 + * }, 328 + * ... 329 + * }; 330 + */ 331 + #define XE_RTP_RULES(r1, ...) \ 332 + .n_rules = COUNT_ARGS(r1, ##__VA_ARGS__), \ 333 + .rules = (struct xe_rtp_rule[]) { \ 334 + CALL_FOR_EACH(__ADD_XE_RTP_RULE_PREFIX, r1, ##__VA_ARGS__) \ 335 + } 336 + 337 + void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr, 338 + struct xe_gt *gt, struct xe_hw_engine *hwe); 339 + 340 + #endif

+105

drivers/gpu/drm/xe/xe_rtp_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_RTP_TYPES_ 7 + #define _XE_RTP_TYPES_ 8 + 9 + #include <linux/types.h> 10 + 11 + #include "i915_reg_defs.h" 12 + 13 + struct xe_hw_engine; 14 + struct xe_gt; 15 + 16 + enum { 17 + XE_RTP_REG_REGULAR, 18 + XE_RTP_REG_MCR, 19 + }; 20 + 21 + /** 22 + * struct xe_rtp_regval - register and value for rtp table 23 + */ 24 + struct xe_rtp_regval { 25 + /** @reg: Register */ 26 + u32 reg; 27 + /* 28 + * TODO: maybe we need a union here with a func pointer for cases 29 + * that are too specific to be generalized 30 + */ 31 + /** @clr_bits: bits to clear when updating register */ 32 + u32 clr_bits; 33 + /** @set_bits: bits to set when updating register */ 34 + u32 set_bits; 35 + #define XE_RTP_NOCHECK .read_mask = 0 36 + /** @read_mask: mask for bits to consider when reading value back */ 37 + u32 read_mask; 38 + #define XE_RTP_FLAG_FOREACH_ENGINE BIT(0) 39 + #define XE_RTP_FLAG_MASKED_REG BIT(1) 40 + #define XE_RTP_FLAG_ENGINE_BASE BIT(2) 41 + /** @flags: flags to apply on rule evaluation or action */ 42 + u8 flags; 43 + /** @reg_type: register type, see ``XE_RTP_REG_*`` */ 44 + u8 reg_type; 45 + }; 46 + 47 + enum { 48 + XE_RTP_MATCH_PLATFORM, 49 + XE_RTP_MATCH_SUBPLATFORM, 50 + XE_RTP_MATCH_GRAPHICS_VERSION, 51 + XE_RTP_MATCH_GRAPHICS_VERSION_RANGE, 52 + XE_RTP_MATCH_MEDIA_VERSION, 53 + XE_RTP_MATCH_MEDIA_VERSION_RANGE, 54 + XE_RTP_MATCH_INTEGRATED, 55 + XE_RTP_MATCH_DISCRETE, 56 + XE_RTP_MATCH_STEP, 57 + XE_RTP_MATCH_ENGINE_CLASS, 58 + XE_RTP_MATCH_NOT_ENGINE_CLASS, 59 + XE_RTP_MATCH_FUNC, 60 + }; 61 + 62 + /** struct xe_rtp_rule - match rule for processing entry */ 63 + struct xe_rtp_rule { 64 + u8 match_type; 65 + 66 + /* match filters */ 67 + union { 68 + /* MATCH_PLATFORM / MATCH_SUBPLATFORM */ 69 + struct { 70 + u8 platform; 71 + u8 subplatform; 72 + }; 73 + /* 74 + * MATCH_GRAPHICS_VERSION / XE_RTP_MATCH_GRAPHICS_VERSION_RANGE / 75 + * MATCH_MEDIA_VERSION / XE_RTP_MATCH_MEDIA_VERSION_RANGE 76 + */ 77 + struct { 78 + u32 ver_start; 79 + #define XE_RTP_END_VERSION_UNDEFINED U32_MAX 80 + u32 ver_end; 81 + }; 82 + /* MATCH_STEP */ 83 + struct { 84 + u8 step_start; 85 + u8 step_end; 86 + }; 87 + /* MATCH_ENGINE_CLASS / MATCH_NOT_ENGINE_CLASS */ 88 + struct { 89 + u8 engine_class; 90 + }; 91 + /* MATCH_FUNC */ 92 + bool (*match_func)(const struct xe_gt *gt, 93 + const struct xe_hw_engine *hwe); 94 + }; 95 + }; 96 + 97 + /** struct xe_rtp_entry - Entry in an rtp table */ 98 + struct xe_rtp_entry { 99 + const char *name; 100 + const struct xe_rtp_regval regval; 101 + const struct xe_rtp_rule *rules; 102 + unsigned int n_rules; 103 + }; 104 + 105 + #endif

+96

drivers/gpu/drm/xe/xe_sa.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include <linux/kernel.h> 7 + #include <drm/drm_managed.h> 8 + 9 + #include "xe_bo.h" 10 + #include "xe_device.h" 11 + #include "xe_gt.h" 12 + #include "xe_map.h" 13 + #include "xe_sa.h" 14 + 15 + static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg) 16 + { 17 + struct xe_sa_manager *sa_manager = arg; 18 + struct xe_bo *bo = sa_manager->bo; 19 + 20 + if (!bo) { 21 + drm_err(drm, "no bo for sa manager\n"); 22 + return; 23 + } 24 + 25 + drm_suballoc_manager_fini(&sa_manager->base); 26 + 27 + if (bo->vmap.is_iomem) 28 + kvfree(sa_manager->cpu_ptr); 29 + 30 + xe_bo_unpin_map_no_vm(bo); 31 + sa_manager->bo = NULL; 32 + } 33 + 34 + int xe_sa_bo_manager_init(struct xe_gt *gt, 35 + struct xe_sa_manager *sa_manager, 36 + u32 size, u32 align) 37 + { 38 + struct xe_device *xe = gt_to_xe(gt); 39 + u32 managed_size = size - SZ_4K; 40 + struct xe_bo *bo; 41 + 42 + sa_manager->bo = NULL; 43 + 44 + bo = xe_bo_create_pin_map(xe, gt, NULL, size, ttm_bo_type_kernel, 45 + XE_BO_CREATE_VRAM_IF_DGFX(gt) | 46 + XE_BO_CREATE_GGTT_BIT); 47 + if (IS_ERR(bo)) { 48 + drm_err(&xe->drm, "failed to allocate bo for sa manager: %ld\n", 49 + PTR_ERR(bo)); 50 + return PTR_ERR(bo); 51 + } 52 + sa_manager->bo = bo; 53 + 54 + drm_suballoc_manager_init(&sa_manager->base, managed_size, align); 55 + sa_manager->gpu_addr = xe_bo_ggtt_addr(bo); 56 + 57 + if (bo->vmap.is_iomem) { 58 + sa_manager->cpu_ptr = kvzalloc(managed_size, GFP_KERNEL); 59 + if (!sa_manager->cpu_ptr) { 60 + xe_bo_unpin_map_no_vm(sa_manager->bo); 61 + sa_manager->bo = NULL; 62 + return -ENOMEM; 63 + } 64 + } else { 65 + sa_manager->cpu_ptr = bo->vmap.vaddr; 66 + memset(sa_manager->cpu_ptr, 0, bo->ttm.base.size); 67 + } 68 + 69 + return drmm_add_action_or_reset(&xe->drm, xe_sa_bo_manager_fini, 70 + sa_manager); 71 + } 72 + 73 + struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager, 74 + unsigned size) 75 + { 76 + return drm_suballoc_new(&sa_manager->base, size, GFP_KERNEL, true, 0); 77 + } 78 + 79 + void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo) 80 + { 81 + struct xe_sa_manager *sa_manager = to_xe_sa_manager(sa_bo->manager); 82 + struct xe_device *xe = gt_to_xe(sa_manager->bo->gt); 83 + 84 + if (!sa_manager->bo->vmap.is_iomem) 85 + return; 86 + 87 + xe_map_memcpy_to(xe, &sa_manager->bo->vmap, drm_suballoc_soffset(sa_bo), 88 + xe_sa_bo_cpu_addr(sa_bo), 89 + drm_suballoc_size(sa_bo)); 90 + } 91 + 92 + void xe_sa_bo_free(struct drm_suballoc *sa_bo, 93 + struct dma_fence *fence) 94 + { 95 + drm_suballoc_free(sa_bo, fence); 96 + }

+42

drivers/gpu/drm/xe/xe_sa.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + #ifndef _XE_SA_H_ 6 + #define _XE_SA_H_ 7 + 8 + #include "xe_sa_types.h" 9 + 10 + struct dma_fence; 11 + struct xe_bo; 12 + struct xe_gt; 13 + 14 + int xe_sa_bo_manager_init(struct xe_gt *gt, 15 + struct xe_sa_manager *sa_manager, 16 + u32 size, u32 align); 17 + 18 + struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager, 19 + u32 size); 20 + void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo); 21 + void xe_sa_bo_free(struct drm_suballoc *sa_bo, 22 + struct dma_fence *fence); 23 + 24 + static inline struct xe_sa_manager * 25 + to_xe_sa_manager(struct drm_suballoc_manager *mng) 26 + { 27 + return container_of(mng, struct xe_sa_manager, base); 28 + } 29 + 30 + static inline u64 xe_sa_bo_gpu_addr(struct drm_suballoc *sa) 31 + { 32 + return to_xe_sa_manager(sa->manager)->gpu_addr + 33 + drm_suballoc_soffset(sa); 34 + } 35 + 36 + static inline void *xe_sa_bo_cpu_addr(struct drm_suballoc *sa) 37 + { 38 + return to_xe_sa_manager(sa->manager)->cpu_ptr + 39 + drm_suballoc_soffset(sa); 40 + } 41 + 42 + #endif

+19

drivers/gpu/drm/xe/xe_sa_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + #ifndef _XE_SA_TYPES_H_ 6 + #define _XE_SA_TYPES_H_ 7 + 8 + #include <drm/drm_suballoc.h> 9 + 10 + struct xe_bo; 11 + 12 + struct xe_sa_manager { 13 + struct drm_suballoc_manager base; 14 + struct xe_bo *bo; 15 + u64 gpu_addr; 16 + void *cpu_ptr; 17 + }; 18 + 19 + #endif

+246

drivers/gpu/drm/xe/xe_sched_job.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2021 Intel Corporation 4 + */ 5 + 6 + #include "xe_sched_job.h" 7 + 8 + #include <linux/dma-fence-array.h> 9 + #include <linux/slab.h> 10 + 11 + #include "xe_device_types.h" 12 + #include "xe_engine.h" 13 + #include "xe_gt.h" 14 + #include "xe_hw_engine_types.h" 15 + #include "xe_hw_fence.h" 16 + #include "xe_lrc.h" 17 + #include "xe_macros.h" 18 + #include "xe_trace.h" 19 + #include "xe_vm.h" 20 + 21 + static struct kmem_cache *xe_sched_job_slab; 22 + static struct kmem_cache *xe_sched_job_parallel_slab; 23 + 24 + int __init xe_sched_job_module_init(void) 25 + { 26 + xe_sched_job_slab = 27 + kmem_cache_create("xe_sched_job", 28 + sizeof(struct xe_sched_job) + 29 + sizeof(u64), 0, 30 + SLAB_HWCACHE_ALIGN, NULL); 31 + if (!xe_sched_job_slab) 32 + return -ENOMEM; 33 + 34 + xe_sched_job_parallel_slab = 35 + kmem_cache_create("xe_sched_job_parallel", 36 + sizeof(struct xe_sched_job) + 37 + sizeof(u64) * 38 + XE_HW_ENGINE_MAX_INSTANCE , 0, 39 + SLAB_HWCACHE_ALIGN, NULL); 40 + if (!xe_sched_job_parallel_slab) { 41 + kmem_cache_destroy(xe_sched_job_slab); 42 + return -ENOMEM; 43 + } 44 + 45 + return 0; 46 + } 47 + 48 + void xe_sched_job_module_exit(void) 49 + { 50 + kmem_cache_destroy(xe_sched_job_slab); 51 + kmem_cache_destroy(xe_sched_job_parallel_slab); 52 + } 53 + 54 + static struct xe_sched_job *job_alloc(bool parallel) 55 + { 56 + return kmem_cache_zalloc(parallel ? xe_sched_job_parallel_slab : 57 + xe_sched_job_slab, GFP_KERNEL); 58 + } 59 + 60 + bool xe_sched_job_is_migration(struct xe_engine *e) 61 + { 62 + return e->vm && (e->vm->flags & XE_VM_FLAG_MIGRATION) && 63 + !(e->flags & ENGINE_FLAG_WA); 64 + } 65 + 66 + static void job_free(struct xe_sched_job *job) 67 + { 68 + struct xe_engine *e = job->engine; 69 + bool is_migration = xe_sched_job_is_migration(e); 70 + 71 + kmem_cache_free(xe_engine_is_parallel(job->engine) || is_migration ? 72 + xe_sched_job_parallel_slab : xe_sched_job_slab, job); 73 + } 74 + 75 + struct xe_sched_job *xe_sched_job_create(struct xe_engine *e, 76 + u64 *batch_addr) 77 + { 78 + struct xe_sched_job *job; 79 + struct dma_fence **fences; 80 + bool is_migration = xe_sched_job_is_migration(e); 81 + int err; 82 + int i, j; 83 + u32 width; 84 + 85 + /* Migration and kernel engines have their own locking */ 86 + if (!(e->flags & (ENGINE_FLAG_KERNEL | ENGINE_FLAG_VM | 87 + ENGINE_FLAG_WA))) { 88 + lockdep_assert_held(&e->vm->lock); 89 + if (!xe_vm_no_dma_fences(e->vm)) 90 + xe_vm_assert_held(e->vm); 91 + } 92 + 93 + job = job_alloc(xe_engine_is_parallel(e) || is_migration); 94 + if (!job) 95 + return ERR_PTR(-ENOMEM); 96 + 97 + job->engine = e; 98 + kref_init(&job->refcount); 99 + xe_engine_get(job->engine); 100 + 101 + err = drm_sched_job_init(&job->drm, e->entity, 1, NULL); 102 + if (err) 103 + goto err_free; 104 + 105 + if (!xe_engine_is_parallel(e)) { 106 + job->fence = xe_lrc_create_seqno_fence(e->lrc); 107 + if (IS_ERR(job->fence)) { 108 + err = PTR_ERR(job->fence); 109 + goto err_sched_job; 110 + } 111 + } else { 112 + struct dma_fence_array *cf; 113 + 114 + fences = kmalloc_array(e->width, sizeof(*fences), GFP_KERNEL); 115 + if (!fences) { 116 + err = -ENOMEM; 117 + goto err_sched_job; 118 + } 119 + 120 + for (j = 0; j < e->width; ++j) { 121 + fences[j] = xe_lrc_create_seqno_fence(e->lrc + j); 122 + if (IS_ERR(fences[j])) { 123 + err = PTR_ERR(fences[j]); 124 + goto err_fences; 125 + } 126 + } 127 + 128 + cf = dma_fence_array_create(e->width, fences, 129 + e->parallel.composite_fence_ctx, 130 + e->parallel.composite_fence_seqno++, 131 + false); 132 + if (!cf) { 133 + --e->parallel.composite_fence_seqno; 134 + err = -ENOMEM; 135 + goto err_fences; 136 + } 137 + 138 + /* Sanity check */ 139 + for (j = 0; j < e->width; ++j) 140 + XE_BUG_ON(cf->base.seqno != fences[j]->seqno); 141 + 142 + job->fence = &cf->base; 143 + } 144 + 145 + width = e->width; 146 + if (is_migration) 147 + width = 2; 148 + 149 + for (i = 0; i < width; ++i) 150 + job->batch_addr[i] = batch_addr[i]; 151 + 152 + trace_xe_sched_job_create(job); 153 + return job; 154 + 155 + err_fences: 156 + for (j = j - 1; j >= 0; --j) { 157 + --e->lrc[j].fence_ctx.next_seqno; 158 + dma_fence_put(fences[j]); 159 + } 160 + kfree(fences); 161 + err_sched_job: 162 + drm_sched_job_cleanup(&job->drm); 163 + err_free: 164 + xe_engine_put(e); 165 + job_free(job); 166 + return ERR_PTR(err); 167 + } 168 + 169 + /** 170 + * xe_sched_job_destroy - Destroy XE schedule job 171 + * @ref: reference to XE schedule job 172 + * 173 + * Called when ref == 0, drop a reference to job's xe_engine + fence, cleanup 174 + * base DRM schedule job, and free memory for XE schedule job. 175 + */ 176 + void xe_sched_job_destroy(struct kref *ref) 177 + { 178 + struct xe_sched_job *job = 179 + container_of(ref, struct xe_sched_job, refcount); 180 + 181 + xe_engine_put(job->engine); 182 + dma_fence_put(job->fence); 183 + drm_sched_job_cleanup(&job->drm); 184 + job_free(job); 185 + } 186 + 187 + void xe_sched_job_set_error(struct xe_sched_job *job, int error) 188 + { 189 + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) 190 + return; 191 + 192 + dma_fence_set_error(job->fence, error); 193 + 194 + if (dma_fence_is_array(job->fence)) { 195 + struct dma_fence_array *array = 196 + to_dma_fence_array(job->fence); 197 + struct dma_fence **child = array->fences; 198 + unsigned int nchild = array->num_fences; 199 + 200 + do { 201 + struct dma_fence *current_fence = *child++; 202 + 203 + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, 204 + &current_fence->flags)) 205 + continue; 206 + dma_fence_set_error(current_fence, error); 207 + } while (--nchild); 208 + } 209 + 210 + trace_xe_sched_job_set_error(job); 211 + 212 + dma_fence_enable_sw_signaling(job->fence); 213 + xe_hw_fence_irq_run(job->engine->fence_irq); 214 + } 215 + 216 + bool xe_sched_job_started(struct xe_sched_job *job) 217 + { 218 + struct xe_lrc *lrc = job->engine->lrc; 219 + 220 + return xe_lrc_start_seqno(lrc) >= xe_sched_job_seqno(job); 221 + } 222 + 223 + bool xe_sched_job_completed(struct xe_sched_job *job) 224 + { 225 + struct xe_lrc *lrc = job->engine->lrc; 226 + 227 + /* 228 + * Can safely check just LRC[0] seqno as that is last seqno written when 229 + * parallel handshake is done. 230 + */ 231 + 232 + return xe_lrc_seqno(lrc) >= xe_sched_job_seqno(job); 233 + } 234 + 235 + void xe_sched_job_arm(struct xe_sched_job *job) 236 + { 237 + drm_sched_job_arm(&job->drm); 238 + } 239 + 240 + void xe_sched_job_push(struct xe_sched_job *job) 241 + { 242 + xe_sched_job_get(job); 243 + trace_xe_sched_job_exec(job); 244 + drm_sched_entity_push_job(&job->drm); 245 + xe_sched_job_put(job); 246 + }

+76

drivers/gpu/drm/xe/xe_sched_job.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2021 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_SCHED_JOB_H_ 7 + #define _XE_SCHED_JOB_H_ 8 + 9 + #include "xe_sched_job_types.h" 10 + 11 + #define XE_SCHED_HANG_LIMIT 1 12 + #define XE_SCHED_JOB_TIMEOUT LONG_MAX 13 + 14 + int xe_sched_job_module_init(void); 15 + void xe_sched_job_module_exit(void); 16 + 17 + struct xe_sched_job *xe_sched_job_create(struct xe_engine *e, 18 + u64 *batch_addr); 19 + void xe_sched_job_destroy(struct kref *ref); 20 + 21 + /** 22 + * xe_sched_job_get - get reference to XE schedule job 23 + * @job: XE schedule job object 24 + * 25 + * Increment XE schedule job's reference count 26 + */ 27 + static inline struct xe_sched_job *xe_sched_job_get(struct xe_sched_job *job) 28 + { 29 + kref_get(&job->refcount); 30 + return job; 31 + } 32 + 33 + /** 34 + * xe_sched_job_put - put reference to XE schedule job 35 + * @job: XE schedule job object 36 + * 37 + * Decrement XE schedule job's reference count, call xe_sched_job_destroy when 38 + * reference count == 0. 39 + */ 40 + static inline void xe_sched_job_put(struct xe_sched_job *job) 41 + { 42 + kref_put(&job->refcount, xe_sched_job_destroy); 43 + } 44 + 45 + void xe_sched_job_set_error(struct xe_sched_job *job, int error); 46 + static inline bool xe_sched_job_is_error(struct xe_sched_job *job) 47 + { 48 + return job->fence->error < 0; 49 + } 50 + 51 + bool xe_sched_job_started(struct xe_sched_job *job); 52 + bool xe_sched_job_completed(struct xe_sched_job *job); 53 + 54 + void xe_sched_job_arm(struct xe_sched_job *job); 55 + void xe_sched_job_push(struct xe_sched_job *job); 56 + 57 + static inline struct xe_sched_job * 58 + to_xe_sched_job(struct drm_sched_job *drm) 59 + { 60 + return container_of(drm, struct xe_sched_job, drm); 61 + } 62 + 63 + static inline u32 xe_sched_job_seqno(struct xe_sched_job *job) 64 + { 65 + return job->fence->seqno; 66 + } 67 + 68 + static inline void 69 + xe_sched_job_add_migrate_flush(struct xe_sched_job *job, u32 flags) 70 + { 71 + job->migrate_flush_flags = flags; 72 + } 73 + 74 + bool xe_sched_job_is_migration(struct xe_engine *e); 75 + 76 + #endif

+46

drivers/gpu/drm/xe/xe_sched_job_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_SCHED_JOB_TYPES_H_ 7 + #define _XE_SCHED_JOB_TYPES_H_ 8 + 9 + #include <linux/kref.h> 10 + 11 + #include <drm/gpu_scheduler.h> 12 + 13 + struct xe_engine; 14 + 15 + /** 16 + * struct xe_sched_job - XE schedule job (batch buffer tracking) 17 + */ 18 + struct xe_sched_job { 19 + /** @drm: base DRM scheduler job */ 20 + struct drm_sched_job drm; 21 + /** @engine: XE submission engine */ 22 + struct xe_engine *engine; 23 + /** @refcount: ref count of this job */ 24 + struct kref refcount; 25 + /** 26 + * @fence: dma fence to indicate completion. 1 way relationship - job 27 + * can safely reference fence, fence cannot safely reference job. 28 + */ 29 + #define JOB_FLAG_SUBMIT DMA_FENCE_FLAG_USER_BITS 30 + struct dma_fence *fence; 31 + /** @user_fence: write back value when BB is complete */ 32 + struct { 33 + /** @used: user fence is used */ 34 + bool used; 35 + /** @addr: address to write to */ 36 + u64 addr; 37 + /** @value: write back value */ 38 + u64 value; 39 + } user_fence; 40 + /** @migrate_flush_flags: Additional flush flags for migration jobs */ 41 + u32 migrate_flush_flags; 42 + /** @batch_addr: batch buffer address of job */ 43 + u64 batch_addr[0]; 44 + }; 45 + 46 + #endif

+189

drivers/gpu/drm/xe/xe_step.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include "xe_step.h" 7 + 8 + #include "xe_device.h" 9 + #include "xe_platform_types.h" 10 + 11 + /* 12 + * Provide mapping between PCI's revision ID to the individual GMD 13 + * (Graphics/Media/Display) stepping values that can be compared numerically. 14 + * 15 + * Some platforms may have unusual ways of mapping PCI revision ID to GMD 16 + * steppings. E.g., in some cases a higher PCI revision may translate to a 17 + * lower stepping of the GT and/or display IP. 18 + * 19 + * Also note that some revisions/steppings may have been set aside as 20 + * placeholders but never materialized in real hardware; in those cases there 21 + * may be jumps in the revision IDs or stepping values in the tables below. 22 + */ 23 + 24 + /* 25 + * Some platforms always have the same stepping value for GT and display; 26 + * use a macro to define these to make it easier to identify the platforms 27 + * where the two steppings can deviate. 28 + */ 29 + #define COMMON_GT_MEDIA_STEP(x_) \ 30 + .graphics = STEP_##x_, \ 31 + .media = STEP_##x_ 32 + 33 + #define COMMON_STEP(x_) \ 34 + COMMON_GT_MEDIA_STEP(x_), \ 35 + .graphics = STEP_##x_, \ 36 + .media = STEP_##x_, \ 37 + .display = STEP_##x_ 38 + 39 + __diag_push(); 40 + __diag_ignore_all("-Woverride-init", "Allow field overrides in table"); 41 + 42 + /* Same GT stepping between tgl_uy_revids and tgl_revids don't mean the same HW */ 43 + static const struct xe_step_info tgl_revids[] = { 44 + [0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_B0 }, 45 + [1] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_D0 }, 46 + }; 47 + 48 + static const struct xe_step_info dg1_revids[] = { 49 + [0] = { COMMON_STEP(A0) }, 50 + [1] = { COMMON_STEP(B0) }, 51 + }; 52 + 53 + static const struct xe_step_info adls_revids[] = { 54 + [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 }, 55 + [0x1] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A2 }, 56 + [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_B0 }, 57 + [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_B0 }, 58 + [0xC] = { COMMON_GT_MEDIA_STEP(D0), .display = STEP_C0 }, 59 + }; 60 + 61 + static const struct xe_step_info dg2_g10_revid_step_tbl[] = { 62 + [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 }, 63 + [0x1] = { COMMON_GT_MEDIA_STEP(A1), .display = STEP_A0 }, 64 + [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_B0 }, 65 + [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_C0 }, 66 + }; 67 + 68 + static const struct xe_step_info dg2_g11_revid_step_tbl[] = { 69 + [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_B0 }, 70 + [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_C0 }, 71 + [0x5] = { COMMON_GT_MEDIA_STEP(B1), .display = STEP_C0 }, 72 + }; 73 + 74 + static const struct xe_step_info dg2_g12_revid_step_tbl[] = { 75 + [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_C0 }, 76 + [0x1] = { COMMON_GT_MEDIA_STEP(A1), .display = STEP_C0 }, 77 + }; 78 + 79 + static const struct xe_step_info pvc_revid_step_tbl[] = { 80 + [0x3] = { .graphics = STEP_A0 }, 81 + [0x5] = { .graphics = STEP_B0 }, 82 + [0x6] = { .graphics = STEP_B1 }, 83 + [0x7] = { .graphics = STEP_C0 }, 84 + }; 85 + 86 + static const int pvc_basedie_subids[] = { 87 + [0x0] = STEP_A0, 88 + [0x3] = STEP_B0, 89 + [0x4] = STEP_B1, 90 + [0x5] = STEP_B3, 91 + }; 92 + 93 + __diag_pop(); 94 + 95 + struct xe_step_info xe_step_get(struct xe_device *xe) 96 + { 97 + const struct xe_step_info *revids = NULL; 98 + struct xe_step_info step = {}; 99 + u16 revid = xe->info.revid; 100 + int size = 0; 101 + const int *basedie_info = NULL; 102 + int basedie_size = 0; 103 + int baseid = 0; 104 + 105 + if (xe->info.platform == XE_PVC) { 106 + baseid = FIELD_GET(GENMASK(5, 3), xe->info.revid); 107 + revid = FIELD_GET(GENMASK(2, 0), xe->info.revid); 108 + revids = pvc_revid_step_tbl; 109 + size = ARRAY_SIZE(pvc_revid_step_tbl); 110 + basedie_info = pvc_basedie_subids; 111 + basedie_size = ARRAY_SIZE(pvc_basedie_subids); 112 + } else if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G10) { 113 + revids = dg2_g10_revid_step_tbl; 114 + size = ARRAY_SIZE(dg2_g10_revid_step_tbl); 115 + } else if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G11) { 116 + revids = dg2_g11_revid_step_tbl; 117 + size = ARRAY_SIZE(dg2_g11_revid_step_tbl); 118 + } else if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G12) { 119 + revids = dg2_g12_revid_step_tbl; 120 + size = ARRAY_SIZE(dg2_g12_revid_step_tbl); 121 + } else if (xe->info.platform == XE_ALDERLAKE_S) { 122 + revids = adls_revids; 123 + size = ARRAY_SIZE(adls_revids); 124 + } else if (xe->info.platform == XE_DG1) { 125 + revids = dg1_revids; 126 + size = ARRAY_SIZE(dg1_revids); 127 + } else if (xe->info.platform == XE_TIGERLAKE) { 128 + revids = tgl_revids; 129 + size = ARRAY_SIZE(tgl_revids); 130 + } 131 + 132 + /* Not using the stepping scheme for the platform yet. */ 133 + if (!revids) 134 + return step; 135 + 136 + if (revid < size && revids[revid].graphics != STEP_NONE) { 137 + step = revids[revid]; 138 + } else { 139 + drm_warn(&xe->drm, "Unknown revid 0x%02x\n", revid); 140 + 141 + /* 142 + * If we hit a gap in the revid array, use the information for 143 + * the next revid. 144 + * 145 + * This may be wrong in all sorts of ways, especially if the 146 + * steppings in the array are not monotonically increasing, but 147 + * it's better than defaulting to 0. 148 + */ 149 + while (revid < size && revids[revid].graphics == STEP_NONE) 150 + revid++; 151 + 152 + if (revid < size) { 153 + drm_dbg(&xe->drm, "Using steppings for revid 0x%02x\n", 154 + revid); 155 + step = revids[revid]; 156 + } else { 157 + drm_dbg(&xe->drm, "Using future steppings\n"); 158 + step.graphics = STEP_FUTURE; 159 + step.display = STEP_FUTURE; 160 + } 161 + } 162 + 163 + drm_WARN_ON(&xe->drm, step.graphics == STEP_NONE); 164 + 165 + if (basedie_info && basedie_size) { 166 + if (baseid < basedie_size && basedie_info[baseid] != STEP_NONE) { 167 + step.basedie = basedie_info[baseid]; 168 + } else { 169 + drm_warn(&xe->drm, "Unknown baseid 0x%02x\n", baseid); 170 + step.basedie = STEP_FUTURE; 171 + } 172 + } 173 + 174 + return step; 175 + } 176 + 177 + #define STEP_NAME_CASE(name) \ 178 + case STEP_##name: \ 179 + return #name; 180 + 181 + const char *xe_step_name(enum xe_step step) 182 + { 183 + switch (step) { 184 + STEP_NAME_LIST(STEP_NAME_CASE); 185 + 186 + default: 187 + return "**"; 188 + } 189 + }

+18

drivers/gpu/drm/xe/xe_step.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_STEP_H_ 7 + #define _XE_STEP_H_ 8 + 9 + #include <linux/types.h> 10 + 11 + #include "xe_step_types.h" 12 + 13 + struct xe_device; 14 + 15 + struct xe_step_info xe_step_get(struct xe_device *xe); 16 + const char *xe_step_name(enum xe_step step); 17 + 18 + #endif

+51

drivers/gpu/drm/xe/xe_step_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_STEP_TYPES_H_ 7 + #define _XE_STEP_TYPES_H_ 8 + 9 + #include <linux/types.h> 10 + 11 + struct xe_step_info { 12 + u8 graphics; 13 + u8 media; 14 + u8 display; 15 + u8 basedie; 16 + }; 17 + 18 + #define STEP_ENUM_VAL(name) STEP_##name, 19 + 20 + #define STEP_NAME_LIST(func) \ 21 + func(A0) \ 22 + func(A1) \ 23 + func(A2) \ 24 + func(B0) \ 25 + func(B1) \ 26 + func(B2) \ 27 + func(B3) \ 28 + func(C0) \ 29 + func(C1) \ 30 + func(D0) \ 31 + func(D1) \ 32 + func(E0) \ 33 + func(F0) \ 34 + func(G0) \ 35 + func(H0) \ 36 + func(I0) \ 37 + func(I1) \ 38 + func(J0) 39 + 40 + /* 41 + * Symbolic steppings that do not match the hardware. These are valid both as gt 42 + * and display steppings as symbolic names. 43 + */ 44 + enum xe_step { 45 + STEP_NONE = 0, 46 + STEP_NAME_LIST(STEP_ENUM_VAL) 47 + STEP_FUTURE, 48 + STEP_FOREVER, 49 + }; 50 + 51 + #endif

+276

drivers/gpu/drm/xe/xe_sync.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2021 Intel Corporation 4 + */ 5 + 6 + #include "xe_sync.h" 7 + 8 + #include <linux/kthread.h> 9 + #include <linux/sched/mm.h> 10 + #include <linux/uaccess.h> 11 + #include <drm/xe_drm.h> 12 + #include <drm/drm_print.h> 13 + #include <drm/drm_syncobj.h> 14 + 15 + #include "xe_device_types.h" 16 + #include "xe_sched_job_types.h" 17 + #include "xe_macros.h" 18 + 19 + #define SYNC_FLAGS_TYPE_MASK 0x3 20 + #define SYNC_FLAGS_FENCE_INSTALLED 0x10000 21 + 22 + struct user_fence { 23 + struct xe_device *xe; 24 + struct kref refcount; 25 + struct dma_fence_cb cb; 26 + struct work_struct worker; 27 + struct mm_struct *mm; 28 + u64 __user *addr; 29 + u64 value; 30 + }; 31 + 32 + static void user_fence_destroy(struct kref *kref) 33 + { 34 + struct user_fence *ufence = container_of(kref, struct user_fence, 35 + refcount); 36 + 37 + mmdrop(ufence->mm); 38 + kfree(ufence); 39 + } 40 + 41 + static void user_fence_get(struct user_fence *ufence) 42 + { 43 + kref_get(&ufence->refcount); 44 + } 45 + 46 + static void user_fence_put(struct user_fence *ufence) 47 + { 48 + kref_put(&ufence->refcount, user_fence_destroy); 49 + } 50 + 51 + static struct user_fence *user_fence_create(struct xe_device *xe, u64 addr, 52 + u64 value) 53 + { 54 + struct user_fence *ufence; 55 + 56 + ufence = kmalloc(sizeof(*ufence), GFP_KERNEL); 57 + if (!ufence) 58 + return NULL; 59 + 60 + ufence->xe = xe; 61 + kref_init(&ufence->refcount); 62 + ufence->addr = u64_to_user_ptr(addr); 63 + ufence->value = value; 64 + ufence->mm = current->mm; 65 + mmgrab(ufence->mm); 66 + 67 + return ufence; 68 + } 69 + 70 + static void user_fence_worker(struct work_struct *w) 71 + { 72 + struct user_fence *ufence = container_of(w, struct user_fence, worker); 73 + 74 + if (mmget_not_zero(ufence->mm)) { 75 + kthread_use_mm(ufence->mm); 76 + if (copy_to_user(ufence->addr, &ufence->value, sizeof(ufence->value))) 77 + XE_WARN_ON("Copy to user failed"); 78 + kthread_unuse_mm(ufence->mm); 79 + mmput(ufence->mm); 80 + } 81 + 82 + wake_up_all(&ufence->xe->ufence_wq); 83 + user_fence_put(ufence); 84 + } 85 + 86 + static void kick_ufence(struct user_fence *ufence, struct dma_fence *fence) 87 + { 88 + INIT_WORK(&ufence->worker, user_fence_worker); 89 + queue_work(ufence->xe->ordered_wq, &ufence->worker); 90 + dma_fence_put(fence); 91 + } 92 + 93 + static void user_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) 94 + { 95 + struct user_fence *ufence = container_of(cb, struct user_fence, cb); 96 + 97 + kick_ufence(ufence, fence); 98 + } 99 + 100 + int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, 101 + struct xe_sync_entry *sync, 102 + struct drm_xe_sync __user *sync_user, 103 + bool exec, bool no_dma_fences) 104 + { 105 + struct drm_xe_sync sync_in; 106 + int err; 107 + 108 + if (copy_from_user(&sync_in, sync_user, sizeof(*sync_user))) 109 + return -EFAULT; 110 + 111 + if (XE_IOCTL_ERR(xe, sync_in.flags & 112 + ~(SYNC_FLAGS_TYPE_MASK | DRM_XE_SYNC_SIGNAL))) 113 + return -EINVAL; 114 + 115 + switch (sync_in.flags & SYNC_FLAGS_TYPE_MASK) { 116 + case DRM_XE_SYNC_SYNCOBJ: 117 + if (XE_IOCTL_ERR(xe, no_dma_fences)) 118 + return -ENOTSUPP; 119 + 120 + if (XE_IOCTL_ERR(xe, upper_32_bits(sync_in.addr))) 121 + return -EINVAL; 122 + 123 + sync->syncobj = drm_syncobj_find(xef->drm, sync_in.handle); 124 + if (XE_IOCTL_ERR(xe, !sync->syncobj)) 125 + return -ENOENT; 126 + 127 + if (!(sync_in.flags & DRM_XE_SYNC_SIGNAL)) { 128 + sync->fence = drm_syncobj_fence_get(sync->syncobj); 129 + if (XE_IOCTL_ERR(xe, !sync->fence)) 130 + return -EINVAL; 131 + } 132 + break; 133 + 134 + case DRM_XE_SYNC_TIMELINE_SYNCOBJ: 135 + if (XE_IOCTL_ERR(xe, no_dma_fences)) 136 + return -ENOTSUPP; 137 + 138 + if (XE_IOCTL_ERR(xe, upper_32_bits(sync_in.addr))) 139 + return -EINVAL; 140 + 141 + if (XE_IOCTL_ERR(xe, sync_in.timeline_value == 0)) 142 + return -EINVAL; 143 + 144 + sync->syncobj = drm_syncobj_find(xef->drm, sync_in.handle); 145 + if (XE_IOCTL_ERR(xe, !sync->syncobj)) 146 + return -ENOENT; 147 + 148 + if (sync_in.flags & DRM_XE_SYNC_SIGNAL) { 149 + sync->chain_fence = dma_fence_chain_alloc(); 150 + if (!sync->chain_fence) 151 + return -ENOMEM; 152 + } else { 153 + sync->fence = drm_syncobj_fence_get(sync->syncobj); 154 + if (XE_IOCTL_ERR(xe, !sync->fence)) 155 + return -EINVAL; 156 + 157 + err = dma_fence_chain_find_seqno(&sync->fence, 158 + sync_in.timeline_value); 159 + if (err) 160 + return err; 161 + } 162 + break; 163 + 164 + case DRM_XE_SYNC_DMA_BUF: 165 + if (XE_IOCTL_ERR(xe, "TODO")) 166 + return -EINVAL; 167 + break; 168 + 169 + case DRM_XE_SYNC_USER_FENCE: 170 + if (XE_IOCTL_ERR(xe, !(sync_in.flags & DRM_XE_SYNC_SIGNAL))) 171 + return -ENOTSUPP; 172 + 173 + if (XE_IOCTL_ERR(xe, sync_in.addr & 0x7)) 174 + return -EINVAL; 175 + 176 + if (exec) { 177 + sync->addr = sync_in.addr; 178 + } else { 179 + sync->ufence = user_fence_create(xe, sync_in.addr, 180 + sync_in.timeline_value); 181 + if (XE_IOCTL_ERR(xe, !sync->ufence)) 182 + return -ENOMEM; 183 + } 184 + 185 + break; 186 + 187 + default: 188 + return -EINVAL; 189 + } 190 + 191 + sync->flags = sync_in.flags; 192 + sync->timeline_value = sync_in.timeline_value; 193 + 194 + return 0; 195 + } 196 + 197 + int xe_sync_entry_wait(struct xe_sync_entry *sync) 198 + { 199 + if (sync->fence) 200 + dma_fence_wait(sync->fence, true); 201 + 202 + return 0; 203 + } 204 + 205 + int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job) 206 + { 207 + int err; 208 + 209 + if (sync->fence) { 210 + err = drm_sched_job_add_dependency(&job->drm, 211 + dma_fence_get(sync->fence)); 212 + if (err) { 213 + dma_fence_put(sync->fence); 214 + return err; 215 + } 216 + } 217 + 218 + return 0; 219 + } 220 + 221 + bool xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job, 222 + struct dma_fence *fence) 223 + { 224 + if (!(sync->flags & DRM_XE_SYNC_SIGNAL) || 225 + sync->flags & SYNC_FLAGS_FENCE_INSTALLED) 226 + return false; 227 + 228 + if (sync->chain_fence) { 229 + drm_syncobj_add_point(sync->syncobj, sync->chain_fence, 230 + fence, sync->timeline_value); 231 + /* 232 + * The chain's ownership is transferred to the 233 + * timeline. 234 + */ 235 + sync->chain_fence = NULL; 236 + } else if (sync->syncobj) { 237 + drm_syncobj_replace_fence(sync->syncobj, fence); 238 + } else if (sync->ufence) { 239 + int err; 240 + 241 + dma_fence_get(fence); 242 + user_fence_get(sync->ufence); 243 + err = dma_fence_add_callback(fence, &sync->ufence->cb, 244 + user_fence_cb); 245 + if (err == -ENOENT) { 246 + kick_ufence(sync->ufence, fence); 247 + } else if (err) { 248 + XE_WARN_ON("failed to add user fence"); 249 + user_fence_put(sync->ufence); 250 + dma_fence_put(fence); 251 + } 252 + } else if ((sync->flags & SYNC_FLAGS_TYPE_MASK) == 253 + DRM_XE_SYNC_USER_FENCE) { 254 + job->user_fence.used = true; 255 + job->user_fence.addr = sync->addr; 256 + job->user_fence.value = sync->timeline_value; 257 + } 258 + 259 + /* TODO: external BO? */ 260 + 261 + sync->flags |= SYNC_FLAGS_FENCE_INSTALLED; 262 + 263 + return true; 264 + } 265 + 266 + void xe_sync_entry_cleanup(struct xe_sync_entry *sync) 267 + { 268 + if (sync->syncobj) 269 + drm_syncobj_put(sync->syncobj); 270 + if (sync->fence) 271 + dma_fence_put(sync->fence); 272 + if (sync->chain_fence) 273 + dma_fence_put(&sync->chain_fence->base); 274 + if (sync->ufence) 275 + user_fence_put(sync->ufence); 276 + }

+27

drivers/gpu/drm/xe/xe_sync.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2021 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_SYNC_H_ 7 + #define _XE_SYNC_H_ 8 + 9 + #include "xe_sync_types.h" 10 + 11 + struct xe_device; 12 + struct xe_file; 13 + struct xe_sched_job; 14 + 15 + int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, 16 + struct xe_sync_entry *sync, 17 + struct drm_xe_sync __user *sync_user, 18 + bool exec, bool compute_mode); 19 + int xe_sync_entry_wait(struct xe_sync_entry *sync); 20 + int xe_sync_entry_add_deps(struct xe_sync_entry *sync, 21 + struct xe_sched_job *job); 22 + bool xe_sync_entry_signal(struct xe_sync_entry *sync, 23 + struct xe_sched_job *job, 24 + struct dma_fence *fence); 25 + void xe_sync_entry_cleanup(struct xe_sync_entry *sync); 26 + 27 + #endif

+27

drivers/gpu/drm/xe/xe_sync_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_SYNC_TYPES_H_ 7 + #define _XE_SYNC_TYPES_H_ 8 + 9 + #include <linux/types.h> 10 + 11 + struct drm_syncobj; 12 + struct dma_fence; 13 + struct dma_fence_chain; 14 + struct drm_xe_sync; 15 + struct user_fence; 16 + 17 + struct xe_sync_entry { 18 + struct drm_syncobj *syncobj; 19 + struct dma_fence *fence; 20 + struct dma_fence_chain *chain_fence; 21 + struct user_fence *ufence; 22 + u64 addr; 23 + u64 timeline_value; 24 + u32 flags; 25 + }; 26 + 27 + #endif

+9

drivers/gpu/drm/xe/xe_trace.c

+513

drivers/gpu/drm/xe/xe_trace.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #undef TRACE_SYSTEM 7 + #define TRACE_SYSTEM xe 8 + 9 + #if !defined(_XE_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) 10 + #define _XE_TRACE_H_ 11 + 12 + #include <linux/types.h> 13 + #include <linux/tracepoint.h> 14 + 15 + #include "xe_bo_types.h" 16 + #include "xe_engine_types.h" 17 + #include "xe_gpu_scheduler_types.h" 18 + #include "xe_gt_types.h" 19 + #include "xe_guc_engine_types.h" 20 + #include "xe_sched_job.h" 21 + #include "xe_vm_types.h" 22 + 23 + DECLARE_EVENT_CLASS(xe_bo, 24 + TP_PROTO(struct xe_bo *bo), 25 + TP_ARGS(bo), 26 + 27 + TP_STRUCT__entry( 28 + __field(size_t, size) 29 + __field(u32, flags) 30 + __field(u64, vm) 31 + ), 32 + 33 + TP_fast_assign( 34 + __entry->size = bo->size; 35 + __entry->flags = bo->flags; 36 + __entry->vm = (u64)bo->vm; 37 + ), 38 + 39 + TP_printk("size=%ld, flags=0x%02x, vm=0x%016llx", 40 + __entry->size, __entry->flags, __entry->vm) 41 + ); 42 + 43 + DEFINE_EVENT(xe_bo, xe_bo_cpu_fault, 44 + TP_PROTO(struct xe_bo *bo), 45 + TP_ARGS(bo) 46 + ); 47 + 48 + DEFINE_EVENT(xe_bo, xe_bo_move, 49 + TP_PROTO(struct xe_bo *bo), 50 + TP_ARGS(bo) 51 + ); 52 + 53 + DECLARE_EVENT_CLASS(xe_engine, 54 + TP_PROTO(struct xe_engine *e), 55 + TP_ARGS(e), 56 + 57 + TP_STRUCT__entry( 58 + __field(enum xe_engine_class, class) 59 + __field(u32, logical_mask) 60 + __field(u8, gt_id) 61 + __field(u16, width) 62 + __field(u16, guc_id) 63 + __field(u32, guc_state) 64 + __field(u32, flags) 65 + ), 66 + 67 + TP_fast_assign( 68 + __entry->class = e->class; 69 + __entry->logical_mask = e->logical_mask; 70 + __entry->gt_id = e->gt->info.id; 71 + __entry->width = e->width; 72 + __entry->guc_id = e->guc->id; 73 + __entry->guc_state = atomic_read(&e->guc->state); 74 + __entry->flags = e->flags; 75 + ), 76 + 77 + TP_printk("%d:0x%x, gt=%d, width=%d, guc_id=%d, guc_state=0x%x, flags=0x%x", 78 + __entry->class, __entry->logical_mask, 79 + __entry->gt_id, __entry->width, __entry->guc_id, 80 + __entry->guc_state, __entry->flags) 81 + ); 82 + 83 + DEFINE_EVENT(xe_engine, xe_engine_create, 84 + TP_PROTO(struct xe_engine *e), 85 + TP_ARGS(e) 86 + ); 87 + 88 + DEFINE_EVENT(xe_engine, xe_engine_supress_resume, 89 + TP_PROTO(struct xe_engine *e), 90 + TP_ARGS(e) 91 + ); 92 + 93 + DEFINE_EVENT(xe_engine, xe_engine_submit, 94 + TP_PROTO(struct xe_engine *e), 95 + TP_ARGS(e) 96 + ); 97 + 98 + DEFINE_EVENT(xe_engine, xe_engine_scheduling_enable, 99 + TP_PROTO(struct xe_engine *e), 100 + TP_ARGS(e) 101 + ); 102 + 103 + DEFINE_EVENT(xe_engine, xe_engine_scheduling_disable, 104 + TP_PROTO(struct xe_engine *e), 105 + TP_ARGS(e) 106 + ); 107 + 108 + DEFINE_EVENT(xe_engine, xe_engine_scheduling_done, 109 + TP_PROTO(struct xe_engine *e), 110 + TP_ARGS(e) 111 + ); 112 + 113 + DEFINE_EVENT(xe_engine, xe_engine_register, 114 + TP_PROTO(struct xe_engine *e), 115 + TP_ARGS(e) 116 + ); 117 + 118 + DEFINE_EVENT(xe_engine, xe_engine_deregister, 119 + TP_PROTO(struct xe_engine *e), 120 + TP_ARGS(e) 121 + ); 122 + 123 + DEFINE_EVENT(xe_engine, xe_engine_deregister_done, 124 + TP_PROTO(struct xe_engine *e), 125 + TP_ARGS(e) 126 + ); 127 + 128 + DEFINE_EVENT(xe_engine, xe_engine_close, 129 + TP_PROTO(struct xe_engine *e), 130 + TP_ARGS(e) 131 + ); 132 + 133 + DEFINE_EVENT(xe_engine, xe_engine_kill, 134 + TP_PROTO(struct xe_engine *e), 135 + TP_ARGS(e) 136 + ); 137 + 138 + DEFINE_EVENT(xe_engine, xe_engine_cleanup_entity, 139 + TP_PROTO(struct xe_engine *e), 140 + TP_ARGS(e) 141 + ); 142 + 143 + DEFINE_EVENT(xe_engine, xe_engine_destroy, 144 + TP_PROTO(struct xe_engine *e), 145 + TP_ARGS(e) 146 + ); 147 + 148 + DEFINE_EVENT(xe_engine, xe_engine_reset, 149 + TP_PROTO(struct xe_engine *e), 150 + TP_ARGS(e) 151 + ); 152 + 153 + DEFINE_EVENT(xe_engine, xe_engine_memory_cat_error, 154 + TP_PROTO(struct xe_engine *e), 155 + TP_ARGS(e) 156 + ); 157 + 158 + DEFINE_EVENT(xe_engine, xe_engine_stop, 159 + TP_PROTO(struct xe_engine *e), 160 + TP_ARGS(e) 161 + ); 162 + 163 + DEFINE_EVENT(xe_engine, xe_engine_resubmit, 164 + TP_PROTO(struct xe_engine *e), 165 + TP_ARGS(e) 166 + ); 167 + 168 + DECLARE_EVENT_CLASS(xe_sched_job, 169 + TP_PROTO(struct xe_sched_job *job), 170 + TP_ARGS(job), 171 + 172 + TP_STRUCT__entry( 173 + __field(u32, seqno) 174 + __field(u16, guc_id) 175 + __field(u32, guc_state) 176 + __field(u32, flags) 177 + __field(int, error) 178 + __field(u64, fence) 179 + __field(u64, batch_addr) 180 + ), 181 + 182 + TP_fast_assign( 183 + __entry->seqno = xe_sched_job_seqno(job); 184 + __entry->guc_id = job->engine->guc->id; 185 + __entry->guc_state = 186 + atomic_read(&job->engine->guc->state); 187 + __entry->flags = job->engine->flags; 188 + __entry->error = job->fence->error; 189 + __entry->fence = (u64)job->fence; 190 + __entry->batch_addr = (u64)job->batch_addr[0]; 191 + ), 192 + 193 + TP_printk("fence=0x%016llx, seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d", 194 + __entry->fence, __entry->seqno, __entry->guc_id, 195 + __entry->batch_addr, __entry->guc_state, 196 + __entry->flags, __entry->error) 197 + ); 198 + 199 + DEFINE_EVENT(xe_sched_job, xe_sched_job_create, 200 + TP_PROTO(struct xe_sched_job *job), 201 + TP_ARGS(job) 202 + ); 203 + 204 + DEFINE_EVENT(xe_sched_job, xe_sched_job_exec, 205 + TP_PROTO(struct xe_sched_job *job), 206 + TP_ARGS(job) 207 + ); 208 + 209 + DEFINE_EVENT(xe_sched_job, xe_sched_job_run, 210 + TP_PROTO(struct xe_sched_job *job), 211 + TP_ARGS(job) 212 + ); 213 + 214 + DEFINE_EVENT(xe_sched_job, xe_sched_job_free, 215 + TP_PROTO(struct xe_sched_job *job), 216 + TP_ARGS(job) 217 + ); 218 + 219 + DEFINE_EVENT(xe_sched_job, xe_sched_job_timedout, 220 + TP_PROTO(struct xe_sched_job *job), 221 + TP_ARGS(job) 222 + ); 223 + 224 + DEFINE_EVENT(xe_sched_job, xe_sched_job_set_error, 225 + TP_PROTO(struct xe_sched_job *job), 226 + TP_ARGS(job) 227 + ); 228 + 229 + DEFINE_EVENT(xe_sched_job, xe_sched_job_ban, 230 + TP_PROTO(struct xe_sched_job *job), 231 + TP_ARGS(job) 232 + ); 233 + 234 + DECLARE_EVENT_CLASS(xe_sched_msg, 235 + TP_PROTO(struct xe_sched_msg *msg), 236 + TP_ARGS(msg), 237 + 238 + TP_STRUCT__entry( 239 + __field(u32, opcode) 240 + __field(u16, guc_id) 241 + ), 242 + 243 + TP_fast_assign( 244 + __entry->opcode = msg->opcode; 245 + __entry->guc_id = 246 + ((struct xe_engine *)msg->private_data)->guc->id; 247 + ), 248 + 249 + TP_printk("guc_id=%d, opcode=%u", __entry->guc_id, 250 + __entry->opcode) 251 + ); 252 + 253 + DEFINE_EVENT(xe_sched_msg, xe_sched_msg_add, 254 + TP_PROTO(struct xe_sched_msg *msg), 255 + TP_ARGS(msg) 256 + ); 257 + 258 + DEFINE_EVENT(xe_sched_msg, xe_sched_msg_recv, 259 + TP_PROTO(struct xe_sched_msg *msg), 260 + TP_ARGS(msg) 261 + ); 262 + 263 + DECLARE_EVENT_CLASS(xe_hw_fence, 264 + TP_PROTO(struct xe_hw_fence *fence), 265 + TP_ARGS(fence), 266 + 267 + TP_STRUCT__entry( 268 + __field(u64, ctx) 269 + __field(u32, seqno) 270 + __field(u64, fence) 271 + ), 272 + 273 + TP_fast_assign( 274 + __entry->ctx = fence->dma.context; 275 + __entry->seqno = fence->dma.seqno; 276 + __entry->fence = (u64)fence; 277 + ), 278 + 279 + TP_printk("ctx=0x%016llx, fence=0x%016llx, seqno=%u", 280 + __entry->ctx, __entry->fence, __entry->seqno) 281 + ); 282 + 283 + DEFINE_EVENT(xe_hw_fence, xe_hw_fence_create, 284 + TP_PROTO(struct xe_hw_fence *fence), 285 + TP_ARGS(fence) 286 + ); 287 + 288 + DEFINE_EVENT(xe_hw_fence, xe_hw_fence_signal, 289 + TP_PROTO(struct xe_hw_fence *fence), 290 + TP_ARGS(fence) 291 + ); 292 + 293 + DEFINE_EVENT(xe_hw_fence, xe_hw_fence_try_signal, 294 + TP_PROTO(struct xe_hw_fence *fence), 295 + TP_ARGS(fence) 296 + ); 297 + 298 + DEFINE_EVENT(xe_hw_fence, xe_hw_fence_free, 299 + TP_PROTO(struct xe_hw_fence *fence), 300 + TP_ARGS(fence) 301 + ); 302 + 303 + DECLARE_EVENT_CLASS(xe_vma, 304 + TP_PROTO(struct xe_vma *vma), 305 + TP_ARGS(vma), 306 + 307 + TP_STRUCT__entry( 308 + __field(u64, vma) 309 + __field(u32, asid) 310 + __field(u64, start) 311 + __field(u64, end) 312 + __field(u64, ptr) 313 + ), 314 + 315 + TP_fast_assign( 316 + __entry->vma = (u64)vma; 317 + __entry->asid = vma->vm->usm.asid; 318 + __entry->start = vma->start; 319 + __entry->end = vma->end; 320 + __entry->ptr = (u64)vma->userptr.ptr; 321 + ), 322 + 323 + TP_printk("vma=0x%016llx, asid=0x%05x, start=0x%012llx, end=0x%012llx, ptr=0x%012llx,", 324 + __entry->vma, __entry->asid, __entry->start, 325 + __entry->end, __entry->ptr) 326 + ) 327 + 328 + DEFINE_EVENT(xe_vma, xe_vma_flush, 329 + TP_PROTO(struct xe_vma *vma), 330 + TP_ARGS(vma) 331 + ); 332 + 333 + DEFINE_EVENT(xe_vma, xe_vma_pagefault, 334 + TP_PROTO(struct xe_vma *vma), 335 + TP_ARGS(vma) 336 + ); 337 + 338 + DEFINE_EVENT(xe_vma, xe_vma_acc, 339 + TP_PROTO(struct xe_vma *vma), 340 + TP_ARGS(vma) 341 + ); 342 + 343 + DEFINE_EVENT(xe_vma, xe_vma_fail, 344 + TP_PROTO(struct xe_vma *vma), 345 + TP_ARGS(vma) 346 + ); 347 + 348 + DEFINE_EVENT(xe_vma, xe_vma_bind, 349 + TP_PROTO(struct xe_vma *vma), 350 + TP_ARGS(vma) 351 + ); 352 + 353 + DEFINE_EVENT(xe_vma, xe_vma_pf_bind, 354 + TP_PROTO(struct xe_vma *vma), 355 + TP_ARGS(vma) 356 + ); 357 + 358 + DEFINE_EVENT(xe_vma, xe_vma_unbind, 359 + TP_PROTO(struct xe_vma *vma), 360 + TP_ARGS(vma) 361 + ); 362 + 363 + DEFINE_EVENT(xe_vma, xe_vma_userptr_rebind_worker, 364 + TP_PROTO(struct xe_vma *vma), 365 + TP_ARGS(vma) 366 + ); 367 + 368 + DEFINE_EVENT(xe_vma, xe_vma_userptr_rebind_exec, 369 + TP_PROTO(struct xe_vma *vma), 370 + TP_ARGS(vma) 371 + ); 372 + 373 + DEFINE_EVENT(xe_vma, xe_vma_rebind_worker, 374 + TP_PROTO(struct xe_vma *vma), 375 + TP_ARGS(vma) 376 + ); 377 + 378 + DEFINE_EVENT(xe_vma, xe_vma_rebind_exec, 379 + TP_PROTO(struct xe_vma *vma), 380 + TP_ARGS(vma) 381 + ); 382 + 383 + DEFINE_EVENT(xe_vma, xe_vma_userptr_invalidate, 384 + TP_PROTO(struct xe_vma *vma), 385 + TP_ARGS(vma) 386 + ); 387 + 388 + DEFINE_EVENT(xe_vma, xe_vma_usm_invalidate, 389 + TP_PROTO(struct xe_vma *vma), 390 + TP_ARGS(vma) 391 + ); 392 + 393 + DEFINE_EVENT(xe_vma, xe_vma_evict, 394 + TP_PROTO(struct xe_vma *vma), 395 + TP_ARGS(vma) 396 + ); 397 + 398 + DEFINE_EVENT(xe_vma, xe_vma_userptr_invalidate_complete, 399 + TP_PROTO(struct xe_vma *vma), 400 + TP_ARGS(vma) 401 + ); 402 + 403 + DECLARE_EVENT_CLASS(xe_vm, 404 + TP_PROTO(struct xe_vm *vm), 405 + TP_ARGS(vm), 406 + 407 + TP_STRUCT__entry( 408 + __field(u64, vm) 409 + __field(u32, asid) 410 + ), 411 + 412 + TP_fast_assign( 413 + __entry->vm = (u64)vm; 414 + __entry->asid = vm->usm.asid; 415 + ), 416 + 417 + TP_printk("vm=0x%016llx, asid=0x%05x", __entry->vm, 418 + __entry->asid) 419 + ); 420 + 421 + DEFINE_EVENT(xe_vm, xe_vm_create, 422 + TP_PROTO(struct xe_vm *vm), 423 + TP_ARGS(vm) 424 + ); 425 + 426 + DEFINE_EVENT(xe_vm, xe_vm_free, 427 + TP_PROTO(struct xe_vm *vm), 428 + TP_ARGS(vm) 429 + ); 430 + 431 + DEFINE_EVENT(xe_vm, xe_vm_cpu_bind, 432 + TP_PROTO(struct xe_vm *vm), 433 + TP_ARGS(vm) 434 + ); 435 + 436 + DEFINE_EVENT(xe_vm, xe_vm_restart, 437 + TP_PROTO(struct xe_vm *vm), 438 + TP_ARGS(vm) 439 + ); 440 + 441 + DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_enter, 442 + TP_PROTO(struct xe_vm *vm), 443 + TP_ARGS(vm) 444 + ); 445 + 446 + DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_retry, 447 + TP_PROTO(struct xe_vm *vm), 448 + TP_ARGS(vm) 449 + ); 450 + 451 + DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_exit, 452 + TP_PROTO(struct xe_vm *vm), 453 + TP_ARGS(vm) 454 + ); 455 + 456 + TRACE_EVENT(xe_guc_ct_h2g_flow_control, 457 + TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len), 458 + TP_ARGS(_head, _tail, size, space, len), 459 + 460 + TP_STRUCT__entry( 461 + __field(u32, _head) 462 + __field(u32, _tail) 463 + __field(u32, size) 464 + __field(u32, space) 465 + __field(u32, len) 466 + ), 467 + 468 + TP_fast_assign( 469 + __entry->_head = _head; 470 + __entry->_tail = _tail; 471 + __entry->size = size; 472 + __entry->space = space; 473 + __entry->len = len; 474 + ), 475 + 476 + TP_printk("head=%u, tail=%u, size=%u, space=%u, len=%u", 477 + __entry->_head, __entry->_tail, __entry->size, 478 + __entry->space, __entry->len) 479 + ); 480 + 481 + TRACE_EVENT(xe_guc_ct_g2h_flow_control, 482 + TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len), 483 + TP_ARGS(_head, _tail, size, space, len), 484 + 485 + TP_STRUCT__entry( 486 + __field(u32, _head) 487 + __field(u32, _tail) 488 + __field(u32, size) 489 + __field(u32, space) 490 + __field(u32, len) 491 + ), 492 + 493 + TP_fast_assign( 494 + __entry->_head = _head; 495 + __entry->_tail = _tail; 496 + __entry->size = size; 497 + __entry->space = space; 498 + __entry->len = len; 499 + ), 500 + 501 + TP_printk("head=%u, tail=%u, size=%u, space=%u, len=%u", 502 + __entry->_head, __entry->_tail, __entry->size, 503 + __entry->space, __entry->len) 504 + ); 505 + 506 + #endif 507 + 508 + /* This part must be outside protection */ 509 + #undef TRACE_INCLUDE_PATH 510 + #undef TRACE_INCLUDE_FILE 511 + #define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/xe 512 + #define TRACE_INCLUDE_FILE xe_trace 513 + #include <trace/define_trace.h>

+130

drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2021-2022 Intel Corporation 4 + * Copyright (C) 2021-2002 Red Hat 5 + */ 6 + 7 + #include <drm/drm_managed.h> 8 + 9 + #include <drm/ttm/ttm_range_manager.h> 10 + #include <drm/ttm/ttm_placement.h> 11 + #include <drm/ttm/ttm_tt.h> 12 + 13 + #include "xe_bo.h" 14 + #include "xe_gt.h" 15 + #include "xe_ttm_gtt_mgr.h" 16 + 17 + struct xe_ttm_gtt_node { 18 + struct ttm_buffer_object *tbo; 19 + struct ttm_range_mgr_node base; 20 + }; 21 + 22 + static inline struct xe_ttm_gtt_mgr * 23 + to_gtt_mgr(struct ttm_resource_manager *man) 24 + { 25 + return container_of(man, struct xe_ttm_gtt_mgr, manager); 26 + } 27 + 28 + static inline struct xe_ttm_gtt_node * 29 + to_xe_ttm_gtt_node(struct ttm_resource *res) 30 + { 31 + return container_of(res, struct xe_ttm_gtt_node, base.base); 32 + } 33 + 34 + static int xe_ttm_gtt_mgr_new(struct ttm_resource_manager *man, 35 + struct ttm_buffer_object *tbo, 36 + const struct ttm_place *place, 37 + struct ttm_resource **res) 38 + { 39 + struct xe_ttm_gtt_node *node; 40 + int r; 41 + 42 + node = kzalloc(struct_size(node, base.mm_nodes, 1), GFP_KERNEL); 43 + if (!node) 44 + return -ENOMEM; 45 + 46 + node->tbo = tbo; 47 + ttm_resource_init(tbo, place, &node->base.base); 48 + 49 + if (!(place->flags & TTM_PL_FLAG_TEMPORARY) && 50 + ttm_resource_manager_usage(man) > (man->size << PAGE_SHIFT)) { 51 + r = -ENOSPC; 52 + goto err_fini; 53 + } 54 + 55 + node->base.mm_nodes[0].start = 0; 56 + node->base.mm_nodes[0].size = PFN_UP(node->base.base.size); 57 + node->base.base.start = XE_BO_INVALID_OFFSET; 58 + 59 + *res = &node->base.base; 60 + 61 + return 0; 62 + 63 + err_fini: 64 + ttm_resource_fini(man, &node->base.base); 65 + kfree(node); 66 + return r; 67 + } 68 + 69 + static void xe_ttm_gtt_mgr_del(struct ttm_resource_manager *man, 70 + struct ttm_resource *res) 71 + { 72 + struct xe_ttm_gtt_node *node = to_xe_ttm_gtt_node(res); 73 + 74 + ttm_resource_fini(man, res); 75 + kfree(node); 76 + } 77 + 78 + static void xe_ttm_gtt_mgr_debug(struct ttm_resource_manager *man, 79 + struct drm_printer *printer) 80 + { 81 + 82 + } 83 + 84 + static const struct ttm_resource_manager_func xe_ttm_gtt_mgr_func = { 85 + .alloc = xe_ttm_gtt_mgr_new, 86 + .free = xe_ttm_gtt_mgr_del, 87 + .debug = xe_ttm_gtt_mgr_debug 88 + }; 89 + 90 + static void ttm_gtt_mgr_fini(struct drm_device *drm, void *arg) 91 + { 92 + struct xe_ttm_gtt_mgr *mgr = arg; 93 + struct xe_device *xe = gt_to_xe(mgr->gt); 94 + struct ttm_resource_manager *man = &mgr->manager; 95 + int err; 96 + 97 + ttm_resource_manager_set_used(man, false); 98 + 99 + err = ttm_resource_manager_evict_all(&xe->ttm, man); 100 + if (err) 101 + return; 102 + 103 + ttm_resource_manager_cleanup(man); 104 + ttm_set_driver_manager(&xe->ttm, XE_PL_TT, NULL); 105 + } 106 + 107 + int xe_ttm_gtt_mgr_init(struct xe_gt *gt, struct xe_ttm_gtt_mgr *mgr, 108 + u64 gtt_size) 109 + { 110 + struct xe_device *xe = gt_to_xe(gt); 111 + struct ttm_resource_manager *man = &mgr->manager; 112 + int err; 113 + 114 + XE_BUG_ON(xe_gt_is_media_type(gt)); 115 + 116 + mgr->gt = gt; 117 + man->use_tt = true; 118 + man->func = &xe_ttm_gtt_mgr_func; 119 + 120 + ttm_resource_manager_init(man, &xe->ttm, gtt_size >> PAGE_SHIFT); 121 + 122 + ttm_set_driver_manager(&xe->ttm, XE_PL_TT, &mgr->manager); 123 + ttm_resource_manager_set_used(man, true); 124 + 125 + err = drmm_add_action_or_reset(&xe->drm, ttm_gtt_mgr_fini, mgr); 126 + if (err) 127 + return err; 128 + 129 + return 0; 130 + }

+16

drivers/gpu/drm/xe/xe_ttm_gtt_mgr.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_TTGM_GTT_MGR_H_ 7 + #define _XE_TTGM_GTT_MGR_H_ 8 + 9 + #include "xe_ttm_gtt_mgr_types.h" 10 + 11 + struct xe_gt; 12 + 13 + int xe_ttm_gtt_mgr_init(struct xe_gt *gt, struct xe_ttm_gtt_mgr *mgr, 14 + u64 gtt_size); 15 + 16 + #endif

+18

drivers/gpu/drm/xe/xe_ttm_gtt_mgr_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_TTM_GTT_MGR_TYPES_H_ 7 + #define _XE_TTM_GTT_MGR_TYPES_H_ 8 + 9 + #include <drm/ttm/ttm_device.h> 10 + 11 + struct xe_gt; 12 + 13 + struct xe_ttm_gtt_mgr { 14 + struct xe_gt *gt; 15 + struct ttm_resource_manager manager; 16 + }; 17 + 18 + #endif

+403

drivers/gpu/drm/xe/xe_ttm_vram_mgr.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2021-2022 Intel Corporation 4 + * Copyright (C) 2021-2002 Red Hat 5 + */ 6 + 7 + #include <drm/drm_managed.h> 8 + 9 + #include <drm/ttm/ttm_range_manager.h> 10 + #include <drm/ttm/ttm_placement.h> 11 + 12 + #include "xe_bo.h" 13 + #include "xe_device.h" 14 + #include "xe_gt.h" 15 + #include "xe_res_cursor.h" 16 + #include "xe_ttm_vram_mgr.h" 17 + 18 + static inline struct xe_ttm_vram_mgr * 19 + to_vram_mgr(struct ttm_resource_manager *man) 20 + { 21 + return container_of(man, struct xe_ttm_vram_mgr, manager); 22 + } 23 + 24 + static inline struct xe_gt * 25 + mgr_to_gt(struct xe_ttm_vram_mgr *mgr) 26 + { 27 + return mgr->gt; 28 + } 29 + 30 + static inline struct drm_buddy_block * 31 + xe_ttm_vram_mgr_first_block(struct list_head *list) 32 + { 33 + return list_first_entry_or_null(list, struct drm_buddy_block, link); 34 + } 35 + 36 + static inline bool xe_is_vram_mgr_blocks_contiguous(struct list_head *head) 37 + { 38 + struct drm_buddy_block *block; 39 + u64 start, size; 40 + 41 + block = xe_ttm_vram_mgr_first_block(head); 42 + if (!block) 43 + return false; 44 + 45 + while (head != block->link.next) { 46 + start = xe_ttm_vram_mgr_block_start(block); 47 + size = xe_ttm_vram_mgr_block_size(block); 48 + 49 + block = list_entry(block->link.next, struct drm_buddy_block, 50 + link); 51 + if (start + size != xe_ttm_vram_mgr_block_start(block)) 52 + return false; 53 + } 54 + 55 + return true; 56 + } 57 + 58 + static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, 59 + struct ttm_buffer_object *tbo, 60 + const struct ttm_place *place, 61 + struct ttm_resource **res) 62 + { 63 + u64 max_bytes, cur_size, min_block_size; 64 + struct xe_ttm_vram_mgr *mgr = to_vram_mgr(man); 65 + struct xe_ttm_vram_mgr_resource *vres; 66 + u64 size, remaining_size, lpfn, fpfn; 67 + struct drm_buddy *mm = &mgr->mm; 68 + struct drm_buddy_block *block; 69 + unsigned long pages_per_block; 70 + int r; 71 + 72 + lpfn = (u64)place->lpfn << PAGE_SHIFT; 73 + if (!lpfn) 74 + lpfn = man->size; 75 + 76 + fpfn = (u64)place->fpfn << PAGE_SHIFT; 77 + 78 + max_bytes = mgr->gt->mem.vram.size; 79 + if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { 80 + pages_per_block = ~0ul; 81 + } else { 82 + #ifdef CONFIG_TRANSPARENT_HUGEPAGE 83 + pages_per_block = HPAGE_PMD_NR; 84 + #else 85 + /* default to 2MB */ 86 + pages_per_block = 2UL << (20UL - PAGE_SHIFT); 87 + #endif 88 + 89 + pages_per_block = max_t(uint32_t, pages_per_block, 90 + tbo->page_alignment); 91 + } 92 + 93 + vres = kzalloc(sizeof(*vres), GFP_KERNEL); 94 + if (!vres) 95 + return -ENOMEM; 96 + 97 + ttm_resource_init(tbo, place, &vres->base); 98 + remaining_size = vres->base.size; 99 + 100 + /* bail out quickly if there's likely not enough VRAM for this BO */ 101 + if (ttm_resource_manager_usage(man) > max_bytes) { 102 + r = -ENOSPC; 103 + goto error_fini; 104 + } 105 + 106 + INIT_LIST_HEAD(&vres->blocks); 107 + 108 + if (place->flags & TTM_PL_FLAG_TOPDOWN) 109 + vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION; 110 + 111 + if (fpfn || lpfn != man->size) 112 + /* Allocate blocks in desired range */ 113 + vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; 114 + 115 + mutex_lock(&mgr->lock); 116 + while (remaining_size) { 117 + if (tbo->page_alignment) 118 + min_block_size = tbo->page_alignment << PAGE_SHIFT; 119 + else 120 + min_block_size = mgr->default_page_size; 121 + 122 + XE_BUG_ON(min_block_size < mm->chunk_size); 123 + 124 + /* Limit maximum size to 2GiB due to SG table limitations */ 125 + size = min(remaining_size, 2ULL << 30); 126 + 127 + if (size >= pages_per_block << PAGE_SHIFT) 128 + min_block_size = pages_per_block << PAGE_SHIFT; 129 + 130 + cur_size = size; 131 + 132 + if (fpfn + size != place->lpfn << PAGE_SHIFT) { 133 + /* 134 + * Except for actual range allocation, modify the size and 135 + * min_block_size conforming to continuous flag enablement 136 + */ 137 + if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { 138 + size = roundup_pow_of_two(size); 139 + min_block_size = size; 140 + /* 141 + * Modify the size value if size is not 142 + * aligned with min_block_size 143 + */ 144 + } else if (!IS_ALIGNED(size, min_block_size)) { 145 + size = round_up(size, min_block_size); 146 + } 147 + } 148 + 149 + r = drm_buddy_alloc_blocks(mm, fpfn, 150 + lpfn, 151 + size, 152 + min_block_size, 153 + &vres->blocks, 154 + vres->flags); 155 + if (unlikely(r)) 156 + goto error_free_blocks; 157 + 158 + if (size > remaining_size) 159 + remaining_size = 0; 160 + else 161 + remaining_size -= size; 162 + } 163 + mutex_unlock(&mgr->lock); 164 + 165 + if (cur_size != size) { 166 + struct drm_buddy_block *block; 167 + struct list_head *trim_list; 168 + u64 original_size; 169 + LIST_HEAD(temp); 170 + 171 + trim_list = &vres->blocks; 172 + original_size = vres->base.size; 173 + 174 + /* 175 + * If size value is rounded up to min_block_size, trim the last 176 + * block to the required size 177 + */ 178 + if (!list_is_singular(&vres->blocks)) { 179 + block = list_last_entry(&vres->blocks, typeof(*block), link); 180 + list_move_tail(&block->link, &temp); 181 + trim_list = &temp; 182 + /* 183 + * Compute the original_size value by subtracting the 184 + * last block size with (aligned size - original size) 185 + */ 186 + original_size = xe_ttm_vram_mgr_block_size(block) - 187 + (size - cur_size); 188 + } 189 + 190 + mutex_lock(&mgr->lock); 191 + drm_buddy_block_trim(mm, 192 + original_size, 193 + trim_list); 194 + mutex_unlock(&mgr->lock); 195 + 196 + if (!list_empty(&temp)) 197 + list_splice_tail(trim_list, &vres->blocks); 198 + } 199 + 200 + vres->base.start = 0; 201 + list_for_each_entry(block, &vres->blocks, link) { 202 + unsigned long start; 203 + 204 + start = xe_ttm_vram_mgr_block_start(block) + 205 + xe_ttm_vram_mgr_block_size(block); 206 + start >>= PAGE_SHIFT; 207 + 208 + if (start > PFN_UP(vres->base.size)) 209 + start -= PFN_UP(vres->base.size); 210 + else 211 + start = 0; 212 + vres->base.start = max(vres->base.start, start); 213 + } 214 + 215 + if (xe_is_vram_mgr_blocks_contiguous(&vres->blocks)) 216 + vres->base.placement |= TTM_PL_FLAG_CONTIGUOUS; 217 + 218 + *res = &vres->base; 219 + return 0; 220 + 221 + error_free_blocks: 222 + drm_buddy_free_list(mm, &vres->blocks); 223 + mutex_unlock(&mgr->lock); 224 + error_fini: 225 + ttm_resource_fini(man, &vres->base); 226 + kfree(vres); 227 + 228 + return r; 229 + } 230 + 231 + static void xe_ttm_vram_mgr_del(struct ttm_resource_manager *man, 232 + struct ttm_resource *res) 233 + { 234 + struct xe_ttm_vram_mgr_resource *vres = 235 + to_xe_ttm_vram_mgr_resource(res); 236 + struct xe_ttm_vram_mgr *mgr = to_vram_mgr(man); 237 + struct drm_buddy *mm = &mgr->mm; 238 + 239 + mutex_lock(&mgr->lock); 240 + drm_buddy_free_list(mm, &vres->blocks); 241 + mutex_unlock(&mgr->lock); 242 + 243 + ttm_resource_fini(man, res); 244 + 245 + kfree(vres); 246 + } 247 + 248 + static void xe_ttm_vram_mgr_debug(struct ttm_resource_manager *man, 249 + struct drm_printer *printer) 250 + { 251 + struct xe_ttm_vram_mgr *mgr = to_vram_mgr(man); 252 + struct drm_buddy *mm = &mgr->mm; 253 + 254 + mutex_lock(&mgr->lock); 255 + drm_buddy_print(mm, printer); 256 + mutex_unlock(&mgr->lock); 257 + drm_printf(printer, "man size:%llu\n", man->size); 258 + } 259 + 260 + static const struct ttm_resource_manager_func xe_ttm_vram_mgr_func = { 261 + .alloc = xe_ttm_vram_mgr_new, 262 + .free = xe_ttm_vram_mgr_del, 263 + .debug = xe_ttm_vram_mgr_debug 264 + }; 265 + 266 + static void ttm_vram_mgr_fini(struct drm_device *drm, void *arg) 267 + { 268 + struct xe_ttm_vram_mgr *mgr = arg; 269 + struct xe_device *xe = gt_to_xe(mgr->gt); 270 + struct ttm_resource_manager *man = &mgr->manager; 271 + int err; 272 + 273 + ttm_resource_manager_set_used(man, false); 274 + 275 + err = ttm_resource_manager_evict_all(&xe->ttm, man); 276 + if (err) 277 + return; 278 + 279 + drm_buddy_fini(&mgr->mm); 280 + 281 + ttm_resource_manager_cleanup(man); 282 + ttm_set_driver_manager(&xe->ttm, XE_PL_VRAM0 + mgr->gt->info.vram_id, 283 + NULL); 284 + } 285 + 286 + int xe_ttm_vram_mgr_init(struct xe_gt *gt, struct xe_ttm_vram_mgr *mgr) 287 + { 288 + struct xe_device *xe = gt_to_xe(gt); 289 + struct ttm_resource_manager *man = &mgr->manager; 290 + int err; 291 + 292 + XE_BUG_ON(xe_gt_is_media_type(gt)); 293 + 294 + mgr->gt = gt; 295 + man->func = &xe_ttm_vram_mgr_func; 296 + 297 + ttm_resource_manager_init(man, &xe->ttm, gt->mem.vram.size); 298 + err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE); 299 + if (err) 300 + return err; 301 + 302 + mutex_init(&mgr->lock); 303 + mgr->default_page_size = PAGE_SIZE; 304 + 305 + ttm_set_driver_manager(&xe->ttm, XE_PL_VRAM0 + gt->info.vram_id, 306 + &mgr->manager); 307 + ttm_resource_manager_set_used(man, true); 308 + 309 + err = drmm_add_action_or_reset(&xe->drm, ttm_vram_mgr_fini, mgr); 310 + if (err) 311 + return err; 312 + 313 + return 0; 314 + } 315 + 316 + int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe, 317 + struct ttm_resource *res, 318 + u64 offset, u64 length, 319 + struct device *dev, 320 + enum dma_data_direction dir, 321 + struct sg_table **sgt) 322 + { 323 + struct xe_gt *gt = xe_device_get_gt(xe, res->mem_type - XE_PL_VRAM0); 324 + struct xe_res_cursor cursor; 325 + struct scatterlist *sg; 326 + int num_entries = 0; 327 + int i, r; 328 + 329 + *sgt = kmalloc(sizeof(**sgt), GFP_KERNEL); 330 + if (!*sgt) 331 + return -ENOMEM; 332 + 333 + /* Determine the number of DRM_BUDDY blocks to export */ 334 + xe_res_first(res, offset, length, &cursor); 335 + while (cursor.remaining) { 336 + num_entries++; 337 + xe_res_next(&cursor, cursor.size); 338 + } 339 + 340 + r = sg_alloc_table(*sgt, num_entries, GFP_KERNEL); 341 + if (r) 342 + goto error_free; 343 + 344 + /* Initialize scatterlist nodes of sg_table */ 345 + for_each_sgtable_sg((*sgt), sg, i) 346 + sg->length = 0; 347 + 348 + /* 349 + * Walk down DRM_BUDDY blocks to populate scatterlist nodes 350 + * @note: Use iterator api to get first the DRM_BUDDY block 351 + * and the number of bytes from it. Access the following 352 + * DRM_BUDDY block(s) if more buffer needs to exported 353 + */ 354 + xe_res_first(res, offset, length, &cursor); 355 + for_each_sgtable_sg((*sgt), sg, i) { 356 + phys_addr_t phys = cursor.start + gt->mem.vram.io_start; 357 + size_t size = cursor.size; 358 + dma_addr_t addr; 359 + 360 + addr = dma_map_resource(dev, phys, size, dir, 361 + DMA_ATTR_SKIP_CPU_SYNC); 362 + r = dma_mapping_error(dev, addr); 363 + if (r) 364 + goto error_unmap; 365 + 366 + sg_set_page(sg, NULL, size, 0); 367 + sg_dma_address(sg) = addr; 368 + sg_dma_len(sg) = size; 369 + 370 + xe_res_next(&cursor, cursor.size); 371 + } 372 + 373 + return 0; 374 + 375 + error_unmap: 376 + for_each_sgtable_sg((*sgt), sg, i) { 377 + if (!sg->length) 378 + continue; 379 + 380 + dma_unmap_resource(dev, sg->dma_address, 381 + sg->length, dir, 382 + DMA_ATTR_SKIP_CPU_SYNC); 383 + } 384 + sg_free_table(*sgt); 385 + 386 + error_free: 387 + kfree(*sgt); 388 + return r; 389 + } 390 + 391 + void xe_ttm_vram_mgr_free_sgt(struct device *dev, enum dma_data_direction dir, 392 + struct sg_table *sgt) 393 + { 394 + struct scatterlist *sg; 395 + int i; 396 + 397 + for_each_sgtable_sg(sgt, sg, i) 398 + dma_unmap_resource(dev, sg->dma_address, 399 + sg->length, dir, 400 + DMA_ATTR_SKIP_CPU_SYNC); 401 + sg_free_table(sgt); 402 + kfree(sgt); 403 + }

+41

drivers/gpu/drm/xe/xe_ttm_vram_mgr.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_TTM_VRAM_MGR_H_ 7 + #define _XE_TTM_VRAM_MGR_H_ 8 + 9 + #include "xe_ttm_vram_mgr_types.h" 10 + 11 + enum dma_data_direction; 12 + struct xe_device; 13 + struct xe_gt; 14 + 15 + int xe_ttm_vram_mgr_init(struct xe_gt *gt, struct xe_ttm_vram_mgr *mgr); 16 + int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe, 17 + struct ttm_resource *res, 18 + u64 offset, u64 length, 19 + struct device *dev, 20 + enum dma_data_direction dir, 21 + struct sg_table **sgt); 22 + void xe_ttm_vram_mgr_free_sgt(struct device *dev, enum dma_data_direction dir, 23 + struct sg_table *sgt); 24 + 25 + static inline u64 xe_ttm_vram_mgr_block_start(struct drm_buddy_block *block) 26 + { 27 + return drm_buddy_block_offset(block); 28 + } 29 + 30 + static inline u64 xe_ttm_vram_mgr_block_size(struct drm_buddy_block *block) 31 + { 32 + return PAGE_SIZE << drm_buddy_block_order(block); 33 + } 34 + 35 + static inline struct xe_ttm_vram_mgr_resource * 36 + to_xe_ttm_vram_mgr_resource(struct ttm_resource *res) 37 + { 38 + return container_of(res, struct xe_ttm_vram_mgr_resource, base); 39 + } 40 + 41 + #endif

+44

drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_TTM_VRAM_MGR_TYPES_H_ 7 + #define _XE_TTM_VRAM_MGR_TYPES_H_ 8 + 9 + #include <drm/drm_buddy.h> 10 + #include <drm/ttm/ttm_device.h> 11 + 12 + struct xe_gt; 13 + 14 + /** 15 + * struct xe_ttm_vram_mgr - XE TTM VRAM manager 16 + * 17 + * Manages placement of TTM resource in VRAM. 18 + */ 19 + struct xe_ttm_vram_mgr { 20 + /** @gt: Graphics tile which the VRAM belongs to */ 21 + struct xe_gt *gt; 22 + /** @manager: Base TTM resource manager */ 23 + struct ttm_resource_manager manager; 24 + /** @mm: DRM buddy allocator which manages the VRAM */ 25 + struct drm_buddy mm; 26 + /** @default_page_size: default page size */ 27 + u64 default_page_size; 28 + /** @lock: protects allocations of VRAM */ 29 + struct mutex lock; 30 + }; 31 + 32 + /** 33 + * struct xe_ttm_vram_mgr_resource - XE TTM VRAM resource 34 + */ 35 + struct xe_ttm_vram_mgr_resource { 36 + /** @base: Base TTM resource */ 37 + struct ttm_resource base; 38 + /** @blocks: list of DRM buddy blocks */ 39 + struct list_head blocks; 40 + /** @flags: flags associated with the resource */ 41 + unsigned long flags; 42 + }; 43 + 44 + #endif

+39

drivers/gpu/drm/xe/xe_tuning.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include "xe_wa.h" 7 + 8 + #include "xe_platform_types.h" 9 + #include "xe_gt_types.h" 10 + #include "xe_rtp.h" 11 + 12 + #include "gt/intel_gt_regs.h" 13 + 14 + #undef _MMIO 15 + #undef MCR_REG 16 + #define _MMIO(x) _XE_RTP_REG(x) 17 + #define MCR_REG(x) _XE_RTP_MCR_REG(x) 18 + 19 + static const struct xe_rtp_entry gt_tunings[] = { 20 + { XE_RTP_NAME("Tuning: 32B Access Enable"), 21 + XE_RTP_RULES(PLATFORM(DG2)), 22 + XE_RTP_SET(XEHP_SQCM, EN_32B_ACCESS) 23 + }, 24 + {} 25 + }; 26 + 27 + static const struct xe_rtp_entry context_tunings[] = { 28 + { XE_RTP_NAME("1604555607"), 29 + XE_RTP_RULES(GRAPHICS_VERSION(1200)), 30 + XE_RTP_FIELD_SET_NO_READ_MASK(XEHP_FF_MODE2, FF_MODE2_TDS_TIMER_MASK, 31 + FF_MODE2_TDS_TIMER_128) 32 + }, 33 + {} 34 + }; 35 + 36 + void xe_tuning_process_gt(struct xe_gt *gt) 37 + { 38 + xe_rtp_process(gt_tunings, &gt->reg_sr, gt, NULL); 39 + }

+13

drivers/gpu/drm/xe/xe_tuning.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_TUNING_ 7 + #define _XE_TUNING_ 8 + 9 + struct xe_gt; 10 + 11 + void xe_tuning_process_gt(struct xe_gt *gt); 12 + 13 + #endif

+226

drivers/gpu/drm/xe/xe_uc.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include "xe_device.h" 7 + #include "xe_huc.h" 8 + #include "xe_gt.h" 9 + #include "xe_guc.h" 10 + #include "xe_guc_pc.h" 11 + #include "xe_guc_submit.h" 12 + #include "xe_uc.h" 13 + #include "xe_uc_fw.h" 14 + #include "xe_wopcm.h" 15 + 16 + static struct xe_gt * 17 + uc_to_gt(struct xe_uc *uc) 18 + { 19 + return container_of(uc, struct xe_gt, uc); 20 + } 21 + 22 + static struct xe_device * 23 + uc_to_xe(struct xe_uc *uc) 24 + { 25 + return gt_to_xe(uc_to_gt(uc)); 26 + } 27 + 28 + /* Should be called once at driver load only */ 29 + int xe_uc_init(struct xe_uc *uc) 30 + { 31 + int ret; 32 + 33 + /* GuC submission not enabled, nothing to do */ 34 + if (!xe_device_guc_submission_enabled(uc_to_xe(uc))) 35 + return 0; 36 + 37 + ret = xe_guc_init(&uc->guc); 38 + if (ret) 39 + goto err; 40 + 41 + ret = xe_huc_init(&uc->huc); 42 + if (ret) 43 + goto err; 44 + 45 + ret = xe_wopcm_init(&uc->wopcm); 46 + if (ret) 47 + goto err; 48 + 49 + ret = xe_guc_submit_init(&uc->guc); 50 + if (ret) 51 + goto err; 52 + 53 + return 0; 54 + 55 + err: 56 + /* If any uC firmwares not found, fall back to execlists */ 57 + xe_device_guc_submission_disable(uc_to_xe(uc)); 58 + 59 + return ret; 60 + } 61 + 62 + /** 63 + * xe_uc_init_post_hwconfig - init Uc post hwconfig load 64 + * @uc: The UC object 65 + * 66 + * Return: 0 on success, negative error code on error. 67 + */ 68 + int xe_uc_init_post_hwconfig(struct xe_uc *uc) 69 + { 70 + /* GuC submission not enabled, nothing to do */ 71 + if (!xe_device_guc_submission_enabled(uc_to_xe(uc))) 72 + return 0; 73 + 74 + return xe_guc_init_post_hwconfig(&uc->guc); 75 + } 76 + 77 + static int uc_reset(struct xe_uc *uc) 78 + { 79 + struct xe_device *xe = uc_to_xe(uc); 80 + int ret; 81 + 82 + ret = xe_guc_reset(&uc->guc); 83 + if (ret) { 84 + drm_err(&xe->drm, "Failed to reset GuC, ret = %d\n", ret); 85 + return ret; 86 + } 87 + 88 + return 0; 89 + } 90 + 91 + static int uc_sanitize(struct xe_uc *uc) 92 + { 93 + xe_huc_sanitize(&uc->huc); 94 + xe_guc_sanitize(&uc->guc); 95 + 96 + return uc_reset(uc); 97 + } 98 + 99 + /** 100 + * xe_uc_init_hwconfig - minimally init Uc, read and parse hwconfig 101 + * @uc: The UC object 102 + * 103 + * Return: 0 on success, negative error code on error. 104 + */ 105 + int xe_uc_init_hwconfig(struct xe_uc *uc) 106 + { 107 + int ret; 108 + 109 + /* GuC submission not enabled, nothing to do */ 110 + if (!xe_device_guc_submission_enabled(uc_to_xe(uc))) 111 + return 0; 112 + 113 + ret = xe_guc_min_load_for_hwconfig(&uc->guc); 114 + if (ret) 115 + return ret; 116 + 117 + return 0; 118 + } 119 + 120 + /* 121 + * Should be called during driver load, after every GT reset, and after every 122 + * suspend to reload / auth the firmwares. 123 + */ 124 + int xe_uc_init_hw(struct xe_uc *uc) 125 + { 126 + int ret; 127 + 128 + /* GuC submission not enabled, nothing to do */ 129 + if (!xe_device_guc_submission_enabled(uc_to_xe(uc))) 130 + return 0; 131 + 132 + ret = uc_sanitize(uc); 133 + if (ret) 134 + return ret; 135 + 136 + ret = xe_huc_upload(&uc->huc); 137 + if (ret) 138 + return ret; 139 + 140 + ret = xe_guc_upload(&uc->guc); 141 + if (ret) 142 + return ret; 143 + 144 + ret = xe_guc_enable_communication(&uc->guc); 145 + if (ret) 146 + return ret; 147 + 148 + ret = xe_gt_record_default_lrcs(uc_to_gt(uc)); 149 + if (ret) 150 + return ret; 151 + 152 + ret = xe_guc_post_load_init(&uc->guc); 153 + if (ret) 154 + return ret; 155 + 156 + ret = xe_guc_pc_start(&uc->guc.pc); 157 + if (ret) 158 + return ret; 159 + 160 + /* We don't fail the driver load if HuC fails to auth, but let's warn */ 161 + ret = xe_huc_auth(&uc->huc); 162 + XE_WARN_ON(ret); 163 + 164 + return 0; 165 + } 166 + 167 + int xe_uc_reset_prepare(struct xe_uc *uc) 168 + { 169 + /* GuC submission not enabled, nothing to do */ 170 + if (!xe_device_guc_submission_enabled(uc_to_xe(uc))) 171 + return 0; 172 + 173 + return xe_guc_reset_prepare(&uc->guc); 174 + } 175 + 176 + void xe_uc_stop_prepare(struct xe_uc *uc) 177 + { 178 + xe_guc_stop_prepare(&uc->guc); 179 + } 180 + 181 + int xe_uc_stop(struct xe_uc *uc) 182 + { 183 + /* GuC submission not enabled, nothing to do */ 184 + if (!xe_device_guc_submission_enabled(uc_to_xe(uc))) 185 + return 0; 186 + 187 + return xe_guc_stop(&uc->guc); 188 + } 189 + 190 + int xe_uc_start(struct xe_uc *uc) 191 + { 192 + /* GuC submission not enabled, nothing to do */ 193 + if (!xe_device_guc_submission_enabled(uc_to_xe(uc))) 194 + return 0; 195 + 196 + return xe_guc_start(&uc->guc); 197 + } 198 + 199 + static void uc_reset_wait(struct xe_uc *uc) 200 + { 201 + int ret; 202 + 203 + again: 204 + xe_guc_reset_wait(&uc->guc); 205 + 206 + ret = xe_uc_reset_prepare(uc); 207 + if (ret) 208 + goto again; 209 + } 210 + 211 + int xe_uc_suspend(struct xe_uc *uc) 212 + { 213 + int ret; 214 + 215 + /* GuC submission not enabled, nothing to do */ 216 + if (!xe_device_guc_submission_enabled(uc_to_xe(uc))) 217 + return 0; 218 + 219 + uc_reset_wait(uc); 220 + 221 + ret = xe_uc_stop(uc); 222 + if (ret) 223 + return ret; 224 + 225 + return xe_guc_suspend(&uc->guc); 226 + }

+21

drivers/gpu/drm/xe/xe_uc.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_UC_H_ 7 + #define _XE_UC_H_ 8 + 9 + #include "xe_uc_types.h" 10 + 11 + int xe_uc_init(struct xe_uc *uc); 12 + int xe_uc_init_hwconfig(struct xe_uc *uc); 13 + int xe_uc_init_post_hwconfig(struct xe_uc *uc); 14 + int xe_uc_init_hw(struct xe_uc *uc); 15 + int xe_uc_reset_prepare(struct xe_uc *uc); 16 + void xe_uc_stop_prepare(struct xe_uc *uc); 17 + int xe_uc_stop(struct xe_uc *uc); 18 + int xe_uc_start(struct xe_uc *uc); 19 + int xe_uc_suspend(struct xe_uc *uc); 20 + 21 + #endif

+26

drivers/gpu/drm/xe/xe_uc_debugfs.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include <drm/drm_debugfs.h> 7 + 8 + #include "xe_gt.h" 9 + #include "xe_guc_debugfs.h" 10 + #include "xe_huc_debugfs.h" 11 + #include "xe_macros.h" 12 + #include "xe_uc_debugfs.h" 13 + 14 + void xe_uc_debugfs_register(struct xe_uc *uc, struct dentry *parent) 15 + { 16 + struct dentry *root; 17 + 18 + root = debugfs_create_dir("uc", parent); 19 + if (IS_ERR(root)) { 20 + XE_WARN_ON("Create UC directory failed"); 21 + return; 22 + } 23 + 24 + xe_guc_debugfs_register(&uc->guc, root); 25 + xe_huc_debugfs_register(&uc->huc, root); 26 + }

+14

drivers/gpu/drm/xe/xe_uc_debugfs.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_UC_DEBUGFS_H_ 7 + #define _XE_UC_DEBUGFS_H_ 8 + 9 + struct dentry; 10 + struct xe_uc; 11 + 12 + void xe_uc_debugfs_register(struct xe_uc *uc, struct dentry *parent); 13 + 14 + #endif

+406

drivers/gpu/drm/xe/xe_uc_fw.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include <linux/bitfield.h> 7 + #include <linux/firmware.h> 8 + 9 + #include <drm/drm_managed.h> 10 + 11 + #include "xe_bo.h" 12 + #include "xe_device_types.h" 13 + #include "xe_force_wake.h" 14 + #include "xe_gt.h" 15 + #include "xe_guc_reg.h" 16 + #include "xe_map.h" 17 + #include "xe_mmio.h" 18 + #include "xe_uc_fw.h" 19 + 20 + static struct xe_gt * 21 + __uc_fw_to_gt(struct xe_uc_fw *uc_fw, enum xe_uc_fw_type type) 22 + { 23 + if (type == XE_UC_FW_TYPE_GUC) 24 + return container_of(uc_fw, struct xe_gt, uc.guc.fw); 25 + 26 + XE_BUG_ON(type != XE_UC_FW_TYPE_HUC); 27 + return container_of(uc_fw, struct xe_gt, uc.huc.fw); 28 + } 29 + 30 + static struct xe_gt *uc_fw_to_gt(struct xe_uc_fw *uc_fw) 31 + { 32 + return __uc_fw_to_gt(uc_fw, uc_fw->type); 33 + } 34 + 35 + static struct xe_device *uc_fw_to_xe(struct xe_uc_fw *uc_fw) 36 + { 37 + return gt_to_xe(uc_fw_to_gt(uc_fw)); 38 + } 39 + 40 + /* 41 + * List of required GuC and HuC binaries per-platform. 42 + * Must be ordered based on platform + revid, from newer to older. 43 + */ 44 + #define XE_GUC_FIRMWARE_DEFS(fw_def, guc_def) \ 45 + fw_def(METEORLAKE, 0, guc_def(mtl, 70, 5, 2)) \ 46 + fw_def(ALDERLAKE_P, 0, guc_def(adlp, 70, 5, 2)) \ 47 + fw_def(ALDERLAKE_S, 0, guc_def(tgl, 70, 5, 2)) \ 48 + fw_def(PVC, 0, guc_def(pvc, 70, 5, 2)) \ 49 + fw_def(DG2, 0, guc_def(dg2, 70, 5, 2)) \ 50 + fw_def(DG1, 0, guc_def(dg1, 70, 5, 2)) \ 51 + fw_def(TIGERLAKE, 0, guc_def(tgl, 70, 5, 2)) 52 + 53 + #define XE_HUC_FIRMWARE_DEFS(fw_def, huc_def) \ 54 + fw_def(DG1, 0, huc_def(dg1, 7, 9, 3)) \ 55 + fw_def(TIGERLAKE, 0, huc_def(tgl, 7, 9, 3)) 56 + 57 + #define __MAKE_UC_FW_PATH_MAJOR(prefix_, name_, major_) \ 58 + "xe/" \ 59 + __stringify(prefix_) "_" name_ "_" \ 60 + __stringify(major_) ".bin" 61 + 62 + #define __MAKE_UC_FW_PATH(prefix_, name_, major_, minor_, patch_) \ 63 + "xe/" \ 64 + __stringify(prefix_) name_ \ 65 + __stringify(major_) "." \ 66 + __stringify(minor_) "." \ 67 + __stringify(patch_) ".bin" 68 + 69 + #define MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_) \ 70 + __MAKE_UC_FW_PATH_MAJOR(prefix_, "guc", major_) 71 + 72 + #define MAKE_HUC_FW_PATH(prefix_, major_, minor_, bld_num_) \ 73 + __MAKE_UC_FW_PATH(prefix_, "_huc_", major_, minor_, bld_num_) 74 + 75 + /* All blobs need to be declared via MODULE_FIRMWARE() */ 76 + #define XE_UC_MODULE_FW(platform_, revid_, uc_) \ 77 + MODULE_FIRMWARE(uc_); 78 + 79 + XE_GUC_FIRMWARE_DEFS(XE_UC_MODULE_FW, MAKE_GUC_FW_PATH) 80 + XE_HUC_FIRMWARE_DEFS(XE_UC_MODULE_FW, MAKE_HUC_FW_PATH) 81 + 82 + /* The below structs and macros are used to iterate across the list of blobs */ 83 + struct __packed uc_fw_blob { 84 + u8 major; 85 + u8 minor; 86 + const char *path; 87 + }; 88 + 89 + #define UC_FW_BLOB(major_, minor_, path_) \ 90 + { .major = major_, .minor = minor_, .path = path_ } 91 + 92 + #define GUC_FW_BLOB(prefix_, major_, minor_, patch_) \ 93 + UC_FW_BLOB(major_, minor_, \ 94 + MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_)) 95 + 96 + #define HUC_FW_BLOB(prefix_, major_, minor_, bld_num_) \ 97 + UC_FW_BLOB(major_, minor_, \ 98 + MAKE_HUC_FW_PATH(prefix_, major_, minor_, bld_num_)) 99 + 100 + struct __packed uc_fw_platform_requirement { 101 + enum xe_platform p; 102 + u8 rev; /* first platform rev using this FW */ 103 + const struct uc_fw_blob blob; 104 + }; 105 + 106 + #define MAKE_FW_LIST(platform_, revid_, uc_) \ 107 + { \ 108 + .p = XE_##platform_, \ 109 + .rev = revid_, \ 110 + .blob = uc_, \ 111 + }, 112 + 113 + struct fw_blobs_by_type { 114 + const struct uc_fw_platform_requirement *blobs; 115 + u32 count; 116 + }; 117 + 118 + static void 119 + uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw) 120 + { 121 + static const struct uc_fw_platform_requirement blobs_guc[] = { 122 + XE_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB) 123 + }; 124 + static const struct uc_fw_platform_requirement blobs_huc[] = { 125 + XE_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB) 126 + }; 127 + static const struct fw_blobs_by_type blobs_all[XE_UC_FW_NUM_TYPES] = { 128 + [XE_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) }, 129 + [XE_UC_FW_TYPE_HUC] = { blobs_huc, ARRAY_SIZE(blobs_huc) }, 130 + }; 131 + static const struct uc_fw_platform_requirement *fw_blobs; 132 + enum xe_platform p = xe->info.platform; 133 + u32 fw_count; 134 + u8 rev = xe->info.revid; 135 + int i; 136 + 137 + XE_BUG_ON(uc_fw->type >= ARRAY_SIZE(blobs_all)); 138 + fw_blobs = blobs_all[uc_fw->type].blobs; 139 + fw_count = blobs_all[uc_fw->type].count; 140 + 141 + for (i = 0; i < fw_count && p <= fw_blobs[i].p; i++) { 142 + if (p == fw_blobs[i].p && rev >= fw_blobs[i].rev) { 143 + const struct uc_fw_blob *blob = &fw_blobs[i].blob; 144 + 145 + uc_fw->path = blob->path; 146 + uc_fw->major_ver_wanted = blob->major; 147 + uc_fw->minor_ver_wanted = blob->minor; 148 + break; 149 + } 150 + } 151 + } 152 + 153 + /** 154 + * xe_uc_fw_copy_rsa - copy fw RSA to buffer 155 + * 156 + * @uc_fw: uC firmware 157 + * @dst: dst buffer 158 + * @max_len: max number of bytes to copy 159 + * 160 + * Return: number of copied bytes. 161 + */ 162 + size_t xe_uc_fw_copy_rsa(struct xe_uc_fw *uc_fw, void *dst, u32 max_len) 163 + { 164 + struct xe_device *xe = uc_fw_to_xe(uc_fw); 165 + u32 size = min_t(u32, uc_fw->rsa_size, max_len); 166 + 167 + XE_BUG_ON(size % 4); 168 + XE_BUG_ON(!xe_uc_fw_is_available(uc_fw)); 169 + 170 + xe_map_memcpy_from(xe, dst, &uc_fw->bo->vmap, 171 + xe_uc_fw_rsa_offset(uc_fw), size); 172 + 173 + return size; 174 + } 175 + 176 + static void uc_fw_fini(struct drm_device *drm, void *arg) 177 + { 178 + struct xe_uc_fw *uc_fw = arg; 179 + 180 + if (!xe_uc_fw_is_available(uc_fw)) 181 + return; 182 + 183 + xe_bo_unpin_map_no_vm(uc_fw->bo); 184 + xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_SELECTED); 185 + } 186 + 187 + int xe_uc_fw_init(struct xe_uc_fw *uc_fw) 188 + { 189 + struct xe_device *xe = uc_fw_to_xe(uc_fw); 190 + struct xe_gt *gt = uc_fw_to_gt(uc_fw); 191 + struct device *dev = xe->drm.dev; 192 + const struct firmware *fw = NULL; 193 + struct uc_css_header *css; 194 + struct xe_bo *obj; 195 + size_t size; 196 + int err; 197 + 198 + /* 199 + * we use FIRMWARE_UNINITIALIZED to detect checks against uc_fw->status 200 + * before we're looked at the HW caps to see if we have uc support 201 + */ 202 + BUILD_BUG_ON(XE_UC_FIRMWARE_UNINITIALIZED); 203 + XE_BUG_ON(uc_fw->status); 204 + XE_BUG_ON(uc_fw->path); 205 + 206 + uc_fw_auto_select(xe, uc_fw); 207 + xe_uc_fw_change_status(uc_fw, uc_fw->path ? *uc_fw->path ? 208 + XE_UC_FIRMWARE_SELECTED : 209 + XE_UC_FIRMWARE_DISABLED : 210 + XE_UC_FIRMWARE_NOT_SUPPORTED); 211 + 212 + /* Transform no huc in the list into firmware disabled */ 213 + if (uc_fw->type == XE_UC_FW_TYPE_HUC && !xe_uc_fw_is_supported(uc_fw)) { 214 + xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_DISABLED); 215 + err = -ENOPKG; 216 + return err; 217 + } 218 + err = request_firmware(&fw, uc_fw->path, dev); 219 + if (err) 220 + goto fail; 221 + 222 + /* Check the size of the blob before examining buffer contents */ 223 + if (unlikely(fw->size < sizeof(struct uc_css_header))) { 224 + drm_warn(&xe->drm, "%s firmware %s: invalid size: %zu < %zu\n", 225 + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, 226 + fw->size, sizeof(struct uc_css_header)); 227 + err = -ENODATA; 228 + goto fail; 229 + } 230 + 231 + css = (struct uc_css_header *)fw->data; 232 + 233 + /* Check integrity of size values inside CSS header */ 234 + size = (css->header_size_dw - css->key_size_dw - css->modulus_size_dw - 235 + css->exponent_size_dw) * sizeof(u32); 236 + if (unlikely(size != sizeof(struct uc_css_header))) { 237 + drm_warn(&xe->drm, 238 + "%s firmware %s: unexpected header size: %zu != %zu\n", 239 + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, 240 + fw->size, sizeof(struct uc_css_header)); 241 + err = -EPROTO; 242 + goto fail; 243 + } 244 + 245 + /* uCode size must calculated from other sizes */ 246 + uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32); 247 + 248 + /* now RSA */ 249 + uc_fw->rsa_size = css->key_size_dw * sizeof(u32); 250 + 251 + /* At least, it should have header, uCode and RSA. Size of all three. */ 252 + size = sizeof(struct uc_css_header) + uc_fw->ucode_size + 253 + uc_fw->rsa_size; 254 + if (unlikely(fw->size < size)) { 255 + drm_warn(&xe->drm, "%s firmware %s: invalid size: %zu < %zu\n", 256 + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, 257 + fw->size, size); 258 + err = -ENOEXEC; 259 + goto fail; 260 + } 261 + 262 + /* Get version numbers from the CSS header */ 263 + uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, 264 + css->sw_version); 265 + uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MINOR, 266 + css->sw_version); 267 + 268 + if (uc_fw->major_ver_found != uc_fw->major_ver_wanted || 269 + uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) { 270 + drm_notice(&xe->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n", 271 + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, 272 + uc_fw->major_ver_found, uc_fw->minor_ver_found, 273 + uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); 274 + if (!xe_uc_fw_is_overridden(uc_fw)) { 275 + err = -ENOEXEC; 276 + goto fail; 277 + } 278 + } 279 + 280 + if (uc_fw->type == XE_UC_FW_TYPE_GUC) 281 + uc_fw->private_data_size = css->private_data_size; 282 + 283 + obj = xe_bo_create_from_data(xe, gt, fw->data, fw->size, 284 + ttm_bo_type_kernel, 285 + XE_BO_CREATE_VRAM_IF_DGFX(gt) | 286 + XE_BO_CREATE_GGTT_BIT); 287 + if (IS_ERR(obj)) { 288 + drm_notice(&xe->drm, "%s firmware %s: failed to create / populate bo", 289 + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path); 290 + err = PTR_ERR(obj); 291 + goto fail; 292 + } 293 + 294 + uc_fw->bo = obj; 295 + uc_fw->size = fw->size; 296 + xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_AVAILABLE); 297 + 298 + release_firmware(fw); 299 + 300 + err = drmm_add_action_or_reset(&xe->drm, uc_fw_fini, uc_fw); 301 + if (err) 302 + return err; 303 + 304 + return 0; 305 + 306 + fail: 307 + xe_uc_fw_change_status(uc_fw, err == -ENOENT ? 308 + XE_UC_FIRMWARE_MISSING : 309 + XE_UC_FIRMWARE_ERROR); 310 + 311 + drm_notice(&xe->drm, "%s firmware %s: fetch failed with error %d\n", 312 + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, err); 313 + drm_info(&xe->drm, "%s firmware(s) can be downloaded from %s\n", 314 + xe_uc_fw_type_repr(uc_fw->type), XE_UC_FIRMWARE_URL); 315 + 316 + release_firmware(fw); /* OK even if fw is NULL */ 317 + return err; 318 + } 319 + 320 + static u32 uc_fw_ggtt_offset(struct xe_uc_fw *uc_fw) 321 + { 322 + return xe_bo_ggtt_addr(uc_fw->bo); 323 + } 324 + 325 + static int uc_fw_xfer(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags) 326 + { 327 + struct xe_device *xe = uc_fw_to_xe(uc_fw); 328 + struct xe_gt *gt = uc_fw_to_gt(uc_fw); 329 + u32 src_offset; 330 + int ret; 331 + 332 + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); 333 + 334 + /* Set the source address for the uCode */ 335 + src_offset = uc_fw_ggtt_offset(uc_fw); 336 + xe_mmio_write32(gt, DMA_ADDR_0_LOW.reg, lower_32_bits(src_offset)); 337 + xe_mmio_write32(gt, DMA_ADDR_0_HIGH.reg, upper_32_bits(src_offset)); 338 + 339 + /* Set the DMA destination */ 340 + xe_mmio_write32(gt, DMA_ADDR_1_LOW.reg, offset); 341 + xe_mmio_write32(gt, DMA_ADDR_1_HIGH.reg, DMA_ADDRESS_SPACE_WOPCM); 342 + 343 + /* 344 + * Set the transfer size. The header plus uCode will be copied to WOPCM 345 + * via DMA, excluding any other components 346 + */ 347 + xe_mmio_write32(gt, DMA_COPY_SIZE.reg, 348 + sizeof(struct uc_css_header) + uc_fw->ucode_size); 349 + 350 + /* Start the DMA */ 351 + xe_mmio_write32(gt, DMA_CTRL.reg, 352 + _MASKED_BIT_ENABLE(dma_flags | START_DMA)); 353 + 354 + /* Wait for DMA to finish */ 355 + ret = xe_mmio_wait32(gt, DMA_CTRL.reg, 0, START_DMA, 100); 356 + if (ret) 357 + drm_err(&xe->drm, "DMA for %s fw failed, DMA_CTRL=%u\n", 358 + xe_uc_fw_type_repr(uc_fw->type), 359 + xe_mmio_read32(gt, DMA_CTRL.reg)); 360 + 361 + /* Disable the bits once DMA is over */ 362 + xe_mmio_write32(gt, DMA_CTRL.reg, _MASKED_BIT_DISABLE(dma_flags)); 363 + 364 + return ret; 365 + } 366 + 367 + int xe_uc_fw_upload(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags) 368 + { 369 + struct xe_device *xe = uc_fw_to_xe(uc_fw); 370 + int err; 371 + 372 + /* make sure the status was cleared the last time we reset the uc */ 373 + XE_BUG_ON(xe_uc_fw_is_loaded(uc_fw)); 374 + 375 + if (!xe_uc_fw_is_loadable(uc_fw)) 376 + return -ENOEXEC; 377 + 378 + /* Call custom loader */ 379 + err = uc_fw_xfer(uc_fw, offset, dma_flags); 380 + if (err) 381 + goto fail; 382 + 383 + xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_TRANSFERRED); 384 + return 0; 385 + 386 + fail: 387 + drm_err(&xe->drm, "Failed to load %s firmware %s (%d)\n", 388 + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, 389 + err); 390 + xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_LOAD_FAIL); 391 + return err; 392 + } 393 + 394 + 395 + void xe_uc_fw_print(struct xe_uc_fw *uc_fw, struct drm_printer *p) 396 + { 397 + drm_printf(p, "%s firmware: %s\n", 398 + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path); 399 + drm_printf(p, "\tstatus: %s\n", 400 + xe_uc_fw_status_repr(uc_fw->status)); 401 + drm_printf(p, "\tversion: wanted %u.%u, found %u.%u\n", 402 + uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted, 403 + uc_fw->major_ver_found, uc_fw->minor_ver_found); 404 + drm_printf(p, "\tuCode: %u bytes\n", uc_fw->ucode_size); 405 + drm_printf(p, "\tRSA: %u bytes\n", uc_fw->rsa_size); 406 + }

+180

drivers/gpu/drm/xe/xe_uc_fw.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_UC_FW_H_ 7 + #define _XE_UC_FW_H_ 8 + 9 + #include <linux/errno.h> 10 + 11 + #include "xe_uc_fw_types.h" 12 + #include "xe_uc_fw_abi.h" 13 + #include "xe_macros.h" 14 + 15 + struct drm_printer; 16 + 17 + int xe_uc_fw_init(struct xe_uc_fw *uc_fw); 18 + size_t xe_uc_fw_copy_rsa(struct xe_uc_fw *uc_fw, void *dst, u32 max_len); 19 + int xe_uc_fw_upload(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags); 20 + void xe_uc_fw_print(struct xe_uc_fw *uc_fw, struct drm_printer *p); 21 + 22 + static inline u32 xe_uc_fw_rsa_offset(struct xe_uc_fw *uc_fw) 23 + { 24 + return sizeof(struct uc_css_header) + uc_fw->ucode_size; 25 + } 26 + 27 + static inline void xe_uc_fw_change_status(struct xe_uc_fw *uc_fw, 28 + enum xe_uc_fw_status status) 29 + { 30 + uc_fw->__status = status; 31 + } 32 + 33 + static inline 34 + const char *xe_uc_fw_status_repr(enum xe_uc_fw_status status) 35 + { 36 + switch (status) { 37 + case XE_UC_FIRMWARE_NOT_SUPPORTED: 38 + return "N/A"; 39 + case XE_UC_FIRMWARE_UNINITIALIZED: 40 + return "UNINITIALIZED"; 41 + case XE_UC_FIRMWARE_DISABLED: 42 + return "DISABLED"; 43 + case XE_UC_FIRMWARE_SELECTED: 44 + return "SELECTED"; 45 + case XE_UC_FIRMWARE_MISSING: 46 + return "MISSING"; 47 + case XE_UC_FIRMWARE_ERROR: 48 + return "ERROR"; 49 + case XE_UC_FIRMWARE_AVAILABLE: 50 + return "AVAILABLE"; 51 + case XE_UC_FIRMWARE_INIT_FAIL: 52 + return "INIT FAIL"; 53 + case XE_UC_FIRMWARE_LOADABLE: 54 + return "LOADABLE"; 55 + case XE_UC_FIRMWARE_LOAD_FAIL: 56 + return "LOAD FAIL"; 57 + case XE_UC_FIRMWARE_TRANSFERRED: 58 + return "TRANSFERRED"; 59 + case XE_UC_FIRMWARE_RUNNING: 60 + return "RUNNING"; 61 + } 62 + return "<invalid>"; 63 + } 64 + 65 + static inline int xe_uc_fw_status_to_error(enum xe_uc_fw_status status) 66 + { 67 + switch (status) { 68 + case XE_UC_FIRMWARE_NOT_SUPPORTED: 69 + return -ENODEV; 70 + case XE_UC_FIRMWARE_UNINITIALIZED: 71 + return -EACCES; 72 + case XE_UC_FIRMWARE_DISABLED: 73 + return -EPERM; 74 + case XE_UC_FIRMWARE_MISSING: 75 + return -ENOENT; 76 + case XE_UC_FIRMWARE_ERROR: 77 + return -ENOEXEC; 78 + case XE_UC_FIRMWARE_INIT_FAIL: 79 + case XE_UC_FIRMWARE_LOAD_FAIL: 80 + return -EIO; 81 + case XE_UC_FIRMWARE_SELECTED: 82 + return -ESTALE; 83 + case XE_UC_FIRMWARE_AVAILABLE: 84 + case XE_UC_FIRMWARE_LOADABLE: 85 + case XE_UC_FIRMWARE_TRANSFERRED: 86 + case XE_UC_FIRMWARE_RUNNING: 87 + return 0; 88 + } 89 + return -EINVAL; 90 + } 91 + 92 + static inline const char *xe_uc_fw_type_repr(enum xe_uc_fw_type type) 93 + { 94 + switch (type) { 95 + case XE_UC_FW_TYPE_GUC: 96 + return "GuC"; 97 + case XE_UC_FW_TYPE_HUC: 98 + return "HuC"; 99 + } 100 + return "uC"; 101 + } 102 + 103 + static inline enum xe_uc_fw_status 104 + __xe_uc_fw_status(struct xe_uc_fw *uc_fw) 105 + { 106 + /* shouldn't call this before checking hw/blob availability */ 107 + XE_BUG_ON(uc_fw->status == XE_UC_FIRMWARE_UNINITIALIZED); 108 + return uc_fw->status; 109 + } 110 + 111 + static inline bool xe_uc_fw_is_supported(struct xe_uc_fw *uc_fw) 112 + { 113 + return __xe_uc_fw_status(uc_fw) != XE_UC_FIRMWARE_NOT_SUPPORTED; 114 + } 115 + 116 + static inline bool xe_uc_fw_is_enabled(struct xe_uc_fw *uc_fw) 117 + { 118 + return __xe_uc_fw_status(uc_fw) > XE_UC_FIRMWARE_DISABLED; 119 + } 120 + 121 + static inline bool xe_uc_fw_is_disabled(struct xe_uc_fw *uc_fw) 122 + { 123 + return __xe_uc_fw_status(uc_fw) == XE_UC_FIRMWARE_DISABLED; 124 + } 125 + 126 + static inline bool xe_uc_fw_is_available(struct xe_uc_fw *uc_fw) 127 + { 128 + return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_AVAILABLE; 129 + } 130 + 131 + static inline bool xe_uc_fw_is_loadable(struct xe_uc_fw *uc_fw) 132 + { 133 + return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_LOADABLE; 134 + } 135 + 136 + static inline bool xe_uc_fw_is_loaded(struct xe_uc_fw *uc_fw) 137 + { 138 + return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_TRANSFERRED; 139 + } 140 + 141 + static inline bool xe_uc_fw_is_running(struct xe_uc_fw *uc_fw) 142 + { 143 + return __xe_uc_fw_status(uc_fw) == XE_UC_FIRMWARE_RUNNING; 144 + } 145 + 146 + static inline bool xe_uc_fw_is_overridden(const struct xe_uc_fw *uc_fw) 147 + { 148 + return uc_fw->user_overridden; 149 + } 150 + 151 + static inline void xe_uc_fw_sanitize(struct xe_uc_fw *uc_fw) 152 + { 153 + if (xe_uc_fw_is_loaded(uc_fw)) 154 + xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_LOADABLE); 155 + } 156 + 157 + static inline u32 __xe_uc_fw_get_upload_size(struct xe_uc_fw *uc_fw) 158 + { 159 + return sizeof(struct uc_css_header) + uc_fw->ucode_size; 160 + } 161 + 162 + /** 163 + * xe_uc_fw_get_upload_size() - Get size of firmware needed to be uploaded. 164 + * @uc_fw: uC firmware. 165 + * 166 + * Get the size of the firmware and header that will be uploaded to WOPCM. 167 + * 168 + * Return: Upload firmware size, or zero on firmware fetch failure. 169 + */ 170 + static inline u32 xe_uc_fw_get_upload_size(struct xe_uc_fw *uc_fw) 171 + { 172 + if (!xe_uc_fw_is_available(uc_fw)) 173 + return 0; 174 + 175 + return __xe_uc_fw_get_upload_size(uc_fw); 176 + } 177 + 178 + #define XE_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/tree/xe" 179 + 180 + #endif

+81

drivers/gpu/drm/xe/xe_uc_fw_abi.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_UC_FW_ABI_H 7 + #define _XE_UC_FW_ABI_H 8 + 9 + #include <linux/types.h> 10 + #include <linux/build_bug.h> 11 + 12 + /** 13 + * DOC: Firmware Layout 14 + * 15 + * The GuC/HuC firmware layout looks like this:: 16 + * 17 + * +======================================================================+ 18 + * | Firmware blob | 19 + * +===============+===============+============+============+============+ 20 + * | CSS header | uCode | RSA key | modulus | exponent | 21 + * +===============+===============+============+============+============+ 22 + * <-header size-> <---header size continued -----------> 23 + * <--- size -----------------------------------------------------------> 24 + * <-key size-> 25 + * <-mod size-> 26 + * <-exp size-> 27 + * 28 + * The firmware may or may not have modulus key and exponent data. The header, 29 + * uCode and RSA signature are must-have components that will be used by driver. 30 + * Length of each components, which is all in dwords, can be found in header. 31 + * In the case that modulus and exponent are not present in fw, a.k.a truncated 32 + * image, the length value still appears in header. 33 + * 34 + * Driver will do some basic fw size validation based on the following rules: 35 + * 36 + * 1. Header, uCode and RSA are must-have components. 37 + * 2. All firmware components, if they present, are in the sequence illustrated 38 + * in the layout table above. 39 + * 3. Length info of each component can be found in header, in dwords. 40 + * 4. Modulus and exponent key are not required by driver. They may not appear 41 + * in fw. So driver will load a truncated firmware in this case. 42 + */ 43 + 44 + struct uc_css_header { 45 + u32 module_type; 46 + /* 47 + * header_size includes all non-uCode bits, including css_header, rsa 48 + * key, modulus key and exponent data. 49 + */ 50 + u32 header_size_dw; 51 + u32 header_version; 52 + u32 module_id; 53 + u32 module_vendor; 54 + u32 date; 55 + #define CSS_DATE_DAY (0xFF << 0) 56 + #define CSS_DATE_MONTH (0xFF << 8) 57 + #define CSS_DATE_YEAR (0xFFFF << 16) 58 + u32 size_dw; /* uCode plus header_size_dw */ 59 + u32 key_size_dw; 60 + u32 modulus_size_dw; 61 + u32 exponent_size_dw; 62 + u32 time; 63 + #define CSS_TIME_HOUR (0xFF << 0) 64 + #define CSS_DATE_MIN (0xFF << 8) 65 + #define CSS_DATE_SEC (0xFFFF << 16) 66 + char username[8]; 67 + char buildnumber[12]; 68 + u32 sw_version; 69 + #define CSS_SW_VERSION_UC_MAJOR (0xFF << 16) 70 + #define CSS_SW_VERSION_UC_MINOR (0xFF << 8) 71 + #define CSS_SW_VERSION_UC_PATCH (0xFF << 0) 72 + u32 reserved0[13]; 73 + union { 74 + u32 private_data_size; /* only applies to GuC */ 75 + u32 reserved1; 76 + }; 77 + u32 header_info; 78 + } __packed; 79 + static_assert(sizeof(struct uc_css_header) == 128); 80 + 81 + #endif

+112

drivers/gpu/drm/xe/xe_uc_fw_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_UC_FW_TYPES_H_ 7 + #define _XE_UC_FW_TYPES_H_ 8 + 9 + #include <linux/types.h> 10 + 11 + struct xe_bo; 12 + 13 + /* 14 + * +------------+---------------------------------------------------+ 15 + * | PHASE | FIRMWARE STATUS TRANSITIONS | 16 + * +============+===================================================+ 17 + * | | UNINITIALIZED | 18 + * +------------+- / | \ -+ 19 + * | | DISABLED <--/ | \--> NOT_SUPPORTED | 20 + * | init_early | V | 21 + * | | SELECTED | 22 + * +------------+- / | \ -+ 23 + * | | MISSING <--/ | \--> ERROR | 24 + * | fetch | V | 25 + * | | AVAILABLE | 26 + * +------------+- | \ -+ 27 + * | | | \--> INIT FAIL | 28 + * | init | V | 29 + * | | /------> LOADABLE <----<-----------\ | 30 + * +------------+- \ / \ \ \ -+ 31 + * | | LOAD FAIL <--< \--> TRANSFERRED \ | 32 + * | upload | \ / \ / | 33 + * | | \---------/ \--> RUNNING | 34 + * +------------+---------------------------------------------------+ 35 + */ 36 + 37 + /* 38 + * FIXME: Ported from the i915 and this is state machine is way too complicated. 39 + * Circle back and simplify this. 40 + */ 41 + enum xe_uc_fw_status { 42 + XE_UC_FIRMWARE_NOT_SUPPORTED = -1, /* no uc HW */ 43 + XE_UC_FIRMWARE_UNINITIALIZED = 0, /* used to catch checks done too early */ 44 + XE_UC_FIRMWARE_DISABLED, /* disabled */ 45 + XE_UC_FIRMWARE_SELECTED, /* selected the blob we want to load */ 46 + XE_UC_FIRMWARE_MISSING, /* blob not found on the system */ 47 + XE_UC_FIRMWARE_ERROR, /* invalid format or version */ 48 + XE_UC_FIRMWARE_AVAILABLE, /* blob found and copied in mem */ 49 + XE_UC_FIRMWARE_INIT_FAIL, /* failed to prepare fw objects for load */ 50 + XE_UC_FIRMWARE_LOADABLE, /* all fw-required objects are ready */ 51 + XE_UC_FIRMWARE_LOAD_FAIL, /* failed to xfer or init/auth the fw */ 52 + XE_UC_FIRMWARE_TRANSFERRED, /* dma xfer done */ 53 + XE_UC_FIRMWARE_RUNNING /* init/auth done */ 54 + }; 55 + 56 + enum xe_uc_fw_type { 57 + XE_UC_FW_TYPE_GUC = 0, 58 + XE_UC_FW_TYPE_HUC 59 + }; 60 + #define XE_UC_FW_NUM_TYPES 2 61 + 62 + /** 63 + * struct xe_uc_fw - XE micro controller firmware 64 + */ 65 + struct xe_uc_fw { 66 + /** @type: type uC firmware */ 67 + enum xe_uc_fw_type type; 68 + union { 69 + /** @status: firmware load status */ 70 + const enum xe_uc_fw_status status; 71 + /** 72 + * @__status: private firmware load status - only to be used 73 + * by firmware laoding code 74 + */ 75 + enum xe_uc_fw_status __status; 76 + }; 77 + /** @path: path to uC firmware */ 78 + const char *path; 79 + /** @user_overridden: user provided path to uC firmware via modparam */ 80 + bool user_overridden; 81 + /** @size: size of uC firmware including css header */ 82 + size_t size; 83 + 84 + /** @bo: XE BO for uC firmware */ 85 + struct xe_bo *bo; 86 + 87 + /* 88 + * The firmware build process will generate a version header file with 89 + * major and minor version defined. The versions are built into CSS 90 + * header of firmware. The xe kernel driver set the minimal firmware 91 + * version required per platform. 92 + */ 93 + 94 + /** @major_ver_wanted: major firmware version wanted by platform */ 95 + u16 major_ver_wanted; 96 + /** @minor_ver_wanted: minor firmware version wanted by platform */ 97 + u16 minor_ver_wanted; 98 + /** @major_ver_found: major version found in firmware blob */ 99 + u16 major_ver_found; 100 + /** @minor_ver_found: major version found in firmware blob */ 101 + u16 minor_ver_found; 102 + 103 + /** @rsa_size: RSA size */ 104 + u32 rsa_size; 105 + /** @ucode_size: micro kernel size */ 106 + u32 ucode_size; 107 + 108 + /** @private_data_size: size of private data found in uC css header */ 109 + u32 private_data_size; 110 + }; 111 + 112 + #endif

+25

drivers/gpu/drm/xe/xe_uc_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_UC_TYPES_H_ 7 + #define _XE_UC_TYPES_H_ 8 + 9 + #include "xe_guc_types.h" 10 + #include "xe_huc_types.h" 11 + #include "xe_wopcm_types.h" 12 + 13 + /** 14 + * struct xe_uc - XE micro controllers 15 + */ 16 + struct xe_uc { 17 + /** @guc: Graphics micro controller */ 18 + struct xe_guc guc; 19 + /** @huc: HuC */ 20 + struct xe_huc huc; 21 + /** @wopcm: WOPCM */ 22 + struct xe_wopcm wopcm; 23 + }; 24 + 25 + #endif

+3407

drivers/gpu/drm/xe/xe_vm.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2021 Intel Corporation 4 + */ 5 + 6 + #include "xe_vm.h" 7 + 8 + #include <linux/dma-fence-array.h> 9 + 10 + #include <drm/ttm/ttm_execbuf_util.h> 11 + #include <drm/ttm/ttm_tt.h> 12 + #include <drm/xe_drm.h> 13 + #include <linux/kthread.h> 14 + #include <linux/mm.h> 15 + #include <linux/swap.h> 16 + 17 + #include "xe_bo.h" 18 + #include "xe_device.h" 19 + #include "xe_engine.h" 20 + #include "xe_gt.h" 21 + #include "xe_gt_pagefault.h" 22 + #include "xe_migrate.h" 23 + #include "xe_pm.h" 24 + #include "xe_preempt_fence.h" 25 + #include "xe_pt.h" 26 + #include "xe_res_cursor.h" 27 + #include "xe_trace.h" 28 + #include "xe_sync.h" 29 + 30 + #define TEST_VM_ASYNC_OPS_ERROR 31 + 32 + /** 33 + * xe_vma_userptr_check_repin() - Advisory check for repin needed 34 + * @vma: The userptr vma 35 + * 36 + * Check if the userptr vma has been invalidated since last successful 37 + * repin. The check is advisory only and can the function can be called 38 + * without the vm->userptr.notifier_lock held. There is no guarantee that the 39 + * vma userptr will remain valid after a lockless check, so typically 40 + * the call needs to be followed by a proper check under the notifier_lock. 41 + * 42 + * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended. 43 + */ 44 + int xe_vma_userptr_check_repin(struct xe_vma *vma) 45 + { 46 + return mmu_interval_check_retry(&vma->userptr.notifier, 47 + vma->userptr.notifier_seq) ? 48 + -EAGAIN : 0; 49 + } 50 + 51 + int xe_vma_userptr_pin_pages(struct xe_vma *vma) 52 + { 53 + struct xe_vm *vm = vma->vm; 54 + struct xe_device *xe = vm->xe; 55 + const unsigned long num_pages = 56 + (vma->end - vma->start + 1) >> PAGE_SHIFT; 57 + struct page **pages; 58 + bool in_kthread = !current->mm; 59 + unsigned long notifier_seq; 60 + int pinned, ret, i; 61 + bool read_only = vma->pte_flags & PTE_READ_ONLY; 62 + 63 + lockdep_assert_held(&vm->lock); 64 + XE_BUG_ON(!xe_vma_is_userptr(vma)); 65 + retry: 66 + if (vma->destroyed) 67 + return 0; 68 + 69 + notifier_seq = mmu_interval_read_begin(&vma->userptr.notifier); 70 + if (notifier_seq == vma->userptr.notifier_seq) 71 + return 0; 72 + 73 + pages = kvmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL); 74 + if (!pages) 75 + return -ENOMEM; 76 + 77 + if (vma->userptr.sg) { 78 + dma_unmap_sgtable(xe->drm.dev, 79 + vma->userptr.sg, 80 + read_only ? DMA_TO_DEVICE : 81 + DMA_BIDIRECTIONAL, 0); 82 + sg_free_table(vma->userptr.sg); 83 + vma->userptr.sg = NULL; 84 + } 85 + 86 + pinned = ret = 0; 87 + if (in_kthread) { 88 + if (!mmget_not_zero(vma->userptr.notifier.mm)) { 89 + ret = -EFAULT; 90 + goto mm_closed; 91 + } 92 + kthread_use_mm(vma->userptr.notifier.mm); 93 + } 94 + 95 + while (pinned < num_pages) { 96 + ret = get_user_pages_fast(vma->userptr.ptr + pinned * PAGE_SIZE, 97 + num_pages - pinned, 98 + read_only ? 0 : FOLL_WRITE, 99 + &pages[pinned]); 100 + if (ret < 0) { 101 + if (in_kthread) 102 + ret = 0; 103 + break; 104 + } 105 + 106 + pinned += ret; 107 + ret = 0; 108 + } 109 + 110 + if (in_kthread) { 111 + kthread_unuse_mm(vma->userptr.notifier.mm); 112 + mmput(vma->userptr.notifier.mm); 113 + } 114 + mm_closed: 115 + if (ret) 116 + goto out; 117 + 118 + ret = sg_alloc_table_from_pages(&vma->userptr.sgt, pages, pinned, 119 + 0, (u64)pinned << PAGE_SHIFT, 120 + GFP_KERNEL); 121 + if (ret) { 122 + vma->userptr.sg = NULL; 123 + goto out; 124 + } 125 + vma->userptr.sg = &vma->userptr.sgt; 126 + 127 + ret = dma_map_sgtable(xe->drm.dev, vma->userptr.sg, 128 + read_only ? DMA_TO_DEVICE : 129 + DMA_BIDIRECTIONAL, 130 + DMA_ATTR_SKIP_CPU_SYNC | 131 + DMA_ATTR_NO_KERNEL_MAPPING); 132 + if (ret) { 133 + sg_free_table(vma->userptr.sg); 134 + vma->userptr.sg = NULL; 135 + goto out; 136 + } 137 + 138 + for (i = 0; i < pinned; ++i) { 139 + if (!read_only) { 140 + lock_page(pages[i]); 141 + set_page_dirty(pages[i]); 142 + unlock_page(pages[i]); 143 + } 144 + 145 + mark_page_accessed(pages[i]); 146 + } 147 + 148 + out: 149 + release_pages(pages, pinned); 150 + kvfree(pages); 151 + 152 + if (!(ret < 0)) { 153 + vma->userptr.notifier_seq = notifier_seq; 154 + if (xe_vma_userptr_check_repin(vma) == -EAGAIN) 155 + goto retry; 156 + } 157 + 158 + return ret < 0 ? ret : 0; 159 + } 160 + 161 + static bool preempt_fences_waiting(struct xe_vm *vm) 162 + { 163 + struct xe_engine *e; 164 + 165 + lockdep_assert_held(&vm->lock); 166 + xe_vm_assert_held(vm); 167 + 168 + list_for_each_entry(e, &vm->preempt.engines, compute.link) { 169 + if (!e->compute.pfence || (e->compute.pfence && 170 + test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, 171 + &e->compute.pfence->flags))) { 172 + return true; 173 + } 174 + } 175 + 176 + return false; 177 + } 178 + 179 + static void free_preempt_fences(struct list_head *list) 180 + { 181 + struct list_head *link, *next; 182 + 183 + list_for_each_safe(link, next, list) 184 + xe_preempt_fence_free(to_preempt_fence_from_link(link)); 185 + } 186 + 187 + static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list, 188 + unsigned int *count) 189 + { 190 + lockdep_assert_held(&vm->lock); 191 + xe_vm_assert_held(vm); 192 + 193 + if (*count >= vm->preempt.num_engines) 194 + return 0; 195 + 196 + for (; *count < vm->preempt.num_engines; ++(*count)) { 197 + struct xe_preempt_fence *pfence = xe_preempt_fence_alloc(); 198 + 199 + if (IS_ERR(pfence)) 200 + return PTR_ERR(pfence); 201 + 202 + list_move_tail(xe_preempt_fence_link(pfence), list); 203 + } 204 + 205 + return 0; 206 + } 207 + 208 + static int wait_for_existing_preempt_fences(struct xe_vm *vm) 209 + { 210 + struct xe_engine *e; 211 + 212 + xe_vm_assert_held(vm); 213 + 214 + list_for_each_entry(e, &vm->preempt.engines, compute.link) { 215 + if (e->compute.pfence) { 216 + long timeout = dma_fence_wait(e->compute.pfence, false); 217 + 218 + if (timeout < 0) 219 + return -ETIME; 220 + dma_fence_put(e->compute.pfence); 221 + e->compute.pfence = NULL; 222 + } 223 + } 224 + 225 + return 0; 226 + } 227 + 228 + static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list) 229 + { 230 + struct list_head *link; 231 + struct xe_engine *e; 232 + 233 + list_for_each_entry(e, &vm->preempt.engines, compute.link) { 234 + struct dma_fence *fence; 235 + 236 + link = list->next; 237 + XE_BUG_ON(link == list); 238 + 239 + fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link), 240 + e, e->compute.context, 241 + ++e->compute.seqno); 242 + dma_fence_put(e->compute.pfence); 243 + e->compute.pfence = fence; 244 + } 245 + } 246 + 247 + static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo) 248 + { 249 + struct xe_engine *e; 250 + struct ww_acquire_ctx ww; 251 + int err; 252 + 253 + err = xe_bo_lock(bo, &ww, vm->preempt.num_engines, true); 254 + if (err) 255 + return err; 256 + 257 + list_for_each_entry(e, &vm->preempt.engines, compute.link) 258 + if (e->compute.pfence) { 259 + dma_resv_add_fence(bo->ttm.base.resv, 260 + e->compute.pfence, 261 + DMA_RESV_USAGE_BOOKKEEP); 262 + } 263 + 264 + xe_bo_unlock(bo, &ww); 265 + return 0; 266 + } 267 + 268 + /** 269 + * xe_vm_fence_all_extobjs() - Add a fence to vm's external objects' resv 270 + * @vm: The vm. 271 + * @fence: The fence to add. 272 + * @usage: The resv usage for the fence. 273 + * 274 + * Loops over all of the vm's external object bindings and adds a @fence 275 + * with the given @usage to all of the external object's reservation 276 + * objects. 277 + */ 278 + void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence, 279 + enum dma_resv_usage usage) 280 + { 281 + struct xe_vma *vma; 282 + 283 + list_for_each_entry(vma, &vm->extobj.list, extobj.link) 284 + dma_resv_add_fence(vma->bo->ttm.base.resv, fence, usage); 285 + } 286 + 287 + static void resume_and_reinstall_preempt_fences(struct xe_vm *vm) 288 + { 289 + struct xe_engine *e; 290 + 291 + lockdep_assert_held(&vm->lock); 292 + xe_vm_assert_held(vm); 293 + 294 + list_for_each_entry(e, &vm->preempt.engines, compute.link) { 295 + e->ops->resume(e); 296 + 297 + dma_resv_add_fence(&vm->resv, e->compute.pfence, 298 + DMA_RESV_USAGE_BOOKKEEP); 299 + xe_vm_fence_all_extobjs(vm, e->compute.pfence, 300 + DMA_RESV_USAGE_BOOKKEEP); 301 + } 302 + } 303 + 304 + int xe_vm_add_compute_engine(struct xe_vm *vm, struct xe_engine *e) 305 + { 306 + struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV]; 307 + struct ttm_validate_buffer *tv; 308 + struct ww_acquire_ctx ww; 309 + struct list_head objs; 310 + struct dma_fence *pfence; 311 + int err; 312 + bool wait; 313 + 314 + XE_BUG_ON(!xe_vm_in_compute_mode(vm)); 315 + 316 + down_write(&vm->lock); 317 + 318 + err = xe_vm_lock_dma_resv(vm, &ww, tv_onstack, &tv, &objs, true, 1); 319 + if (err) 320 + goto out_unlock_outer; 321 + 322 + pfence = xe_preempt_fence_create(e, e->compute.context, 323 + ++e->compute.seqno); 324 + if (!pfence) { 325 + err = -ENOMEM; 326 + goto out_unlock; 327 + } 328 + 329 + list_add(&e->compute.link, &vm->preempt.engines); 330 + ++vm->preempt.num_engines; 331 + e->compute.pfence = pfence; 332 + 333 + down_read(&vm->userptr.notifier_lock); 334 + 335 + dma_resv_add_fence(&vm->resv, pfence, 336 + DMA_RESV_USAGE_BOOKKEEP); 337 + 338 + xe_vm_fence_all_extobjs(vm, pfence, DMA_RESV_USAGE_BOOKKEEP); 339 + 340 + /* 341 + * Check to see if a preemption on VM is in flight or userptr 342 + * invalidation, if so trigger this preempt fence to sync state with 343 + * other preempt fences on the VM. 344 + */ 345 + wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm); 346 + if (wait) 347 + dma_fence_enable_sw_signaling(pfence); 348 + 349 + up_read(&vm->userptr.notifier_lock); 350 + 351 + out_unlock: 352 + xe_vm_unlock_dma_resv(vm, tv_onstack, tv, &ww, &objs); 353 + out_unlock_outer: 354 + up_write(&vm->lock); 355 + 356 + return err; 357 + } 358 + 359 + /** 360 + * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs 361 + * that need repinning. 362 + * @vm: The VM. 363 + * 364 + * This function checks for whether the VM has userptrs that need repinning, 365 + * and provides a release-type barrier on the userptr.notifier_lock after 366 + * checking. 367 + * 368 + * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are. 369 + */ 370 + int __xe_vm_userptr_needs_repin(struct xe_vm *vm) 371 + { 372 + lockdep_assert_held_read(&vm->userptr.notifier_lock); 373 + 374 + return (list_empty(&vm->userptr.repin_list) && 375 + list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN; 376 + } 377 + 378 + /** 379 + * xe_vm_lock_dma_resv() - Lock the vm dma_resv object and the dma_resv 380 + * objects of the vm's external buffer objects. 381 + * @vm: The vm. 382 + * @ww: Pointer to a struct ww_acquire_ctx locking context. 383 + * @tv_onstack: Array size XE_ONSTACK_TV of storage for the struct 384 + * ttm_validate_buffers used for locking. 385 + * @tv: Pointer to a pointer that on output contains the actual storage used. 386 + * @objs: List head for the buffer objects locked. 387 + * @intr: Whether to lock interruptible. 388 + * @num_shared: Number of dma-fence slots to reserve in the locked objects. 389 + * 390 + * Locks the vm dma-resv objects and all the dma-resv objects of the 391 + * buffer objects on the vm external object list. The TTM utilities require 392 + * a list of struct ttm_validate_buffers pointing to the actual buffer 393 + * objects to lock. Storage for those struct ttm_validate_buffers should 394 + * be provided in @tv_onstack, and is typically reserved on the stack 395 + * of the caller. If the size of @tv_onstack isn't sufficient, then 396 + * storage will be allocated internally using kvmalloc(). 397 + * 398 + * The function performs deadlock handling internally, and after a 399 + * successful return the ww locking transaction should be considered 400 + * sealed. 401 + * 402 + * Return: 0 on success, Negative error code on error. In particular if 403 + * @intr is set to true, -EINTR or -ERESTARTSYS may be returned. In case 404 + * of error, any locking performed has been reverted. 405 + */ 406 + int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww, 407 + struct ttm_validate_buffer *tv_onstack, 408 + struct ttm_validate_buffer **tv, 409 + struct list_head *objs, 410 + bool intr, 411 + unsigned int num_shared) 412 + { 413 + struct ttm_validate_buffer *tv_vm, *tv_bo; 414 + struct xe_vma *vma, *next; 415 + LIST_HEAD(dups); 416 + int err; 417 + 418 + lockdep_assert_held(&vm->lock); 419 + 420 + if (vm->extobj.entries < XE_ONSTACK_TV) { 421 + tv_vm = tv_onstack; 422 + } else { 423 + tv_vm = kvmalloc_array(vm->extobj.entries + 1, sizeof(*tv_vm), 424 + GFP_KERNEL); 425 + if (!tv_vm) 426 + return -ENOMEM; 427 + } 428 + tv_bo = tv_vm + 1; 429 + 430 + INIT_LIST_HEAD(objs); 431 + list_for_each_entry(vma, &vm->extobj.list, extobj.link) { 432 + tv_bo->num_shared = num_shared; 433 + tv_bo->bo = &vma->bo->ttm; 434 + 435 + list_add_tail(&tv_bo->head, objs); 436 + tv_bo++; 437 + } 438 + tv_vm->num_shared = num_shared; 439 + tv_vm->bo = xe_vm_ttm_bo(vm); 440 + list_add_tail(&tv_vm->head, objs); 441 + err = ttm_eu_reserve_buffers(ww, objs, intr, &dups); 442 + if (err) 443 + goto out_err; 444 + 445 + spin_lock(&vm->notifier.list_lock); 446 + list_for_each_entry_safe(vma, next, &vm->notifier.rebind_list, 447 + notifier.rebind_link) { 448 + xe_bo_assert_held(vma->bo); 449 + 450 + list_del_init(&vma->notifier.rebind_link); 451 + if (vma->gt_present && !vma->destroyed) 452 + list_move_tail(&vma->rebind_link, &vm->rebind_list); 453 + } 454 + spin_unlock(&vm->notifier.list_lock); 455 + 456 + *tv = tv_vm; 457 + return 0; 458 + 459 + out_err: 460 + if (tv_vm != tv_onstack) 461 + kvfree(tv_vm); 462 + 463 + return err; 464 + } 465 + 466 + /** 467 + * xe_vm_unlock_dma_resv() - Unlock reservation objects locked by 468 + * xe_vm_lock_dma_resv() 469 + * @vm: The vm. 470 + * @tv_onstack: The @tv_onstack array given to xe_vm_lock_dma_resv(). 471 + * @tv: The value of *@tv given by xe_vm_lock_dma_resv(). 472 + * @ww: The ww_acquire_context used for locking. 473 + * @objs: The list returned from xe_vm_lock_dma_resv(). 474 + * 475 + * Unlocks the reservation objects and frees any memory allocated by 476 + * xe_vm_lock_dma_resv(). 477 + */ 478 + void xe_vm_unlock_dma_resv(struct xe_vm *vm, 479 + struct ttm_validate_buffer *tv_onstack, 480 + struct ttm_validate_buffer *tv, 481 + struct ww_acquire_ctx *ww, 482 + struct list_head *objs) 483 + { 484 + /* 485 + * Nothing should've been able to enter the list while we were locked, 486 + * since we've held the dma-resvs of all the vm's external objects, 487 + * and holding the dma_resv of an object is required for list 488 + * addition, and we shouldn't add ourselves. 489 + */ 490 + XE_WARN_ON(!list_empty(&vm->notifier.rebind_list)); 491 + 492 + ttm_eu_backoff_reservation(ww, objs); 493 + if (tv && tv != tv_onstack) 494 + kvfree(tv); 495 + } 496 + 497 + static void preempt_rebind_work_func(struct work_struct *w) 498 + { 499 + struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); 500 + struct xe_vma *vma; 501 + struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV]; 502 + struct ttm_validate_buffer *tv; 503 + struct ww_acquire_ctx ww; 504 + struct list_head objs; 505 + struct dma_fence *rebind_fence; 506 + unsigned int fence_count = 0; 507 + LIST_HEAD(preempt_fences); 508 + int err; 509 + long wait; 510 + int __maybe_unused tries = 0; 511 + 512 + XE_BUG_ON(!xe_vm_in_compute_mode(vm)); 513 + trace_xe_vm_rebind_worker_enter(vm); 514 + 515 + if (xe_vm_is_closed(vm)) { 516 + trace_xe_vm_rebind_worker_exit(vm); 517 + return; 518 + } 519 + 520 + down_write(&vm->lock); 521 + 522 + retry: 523 + if (vm->async_ops.error) 524 + goto out_unlock_outer; 525 + 526 + /* 527 + * Extreme corner where we exit a VM error state with a munmap style VM 528 + * unbind inflight which requires a rebind. In this case the rebind 529 + * needs to install some fences into the dma-resv slots. The worker to 530 + * do this queued, let that worker make progress by dropping vm->lock 531 + * and trying this again. 532 + */ 533 + if (vm->async_ops.munmap_rebind_inflight) { 534 + up_write(&vm->lock); 535 + flush_work(&vm->async_ops.work); 536 + goto retry; 537 + } 538 + 539 + if (xe_vm_userptr_check_repin(vm)) { 540 + err = xe_vm_userptr_pin(vm); 541 + if (err) 542 + goto out_unlock_outer; 543 + } 544 + 545 + err = xe_vm_lock_dma_resv(vm, &ww, tv_onstack, &tv, &objs, 546 + false, vm->preempt.num_engines); 547 + if (err) 548 + goto out_unlock_outer; 549 + 550 + /* Fresh preempt fences already installed. Everyting is running. */ 551 + if (!preempt_fences_waiting(vm)) 552 + goto out_unlock; 553 + 554 + /* 555 + * This makes sure vm is completely suspended and also balances 556 + * xe_engine suspend- and resume; we resume *all* vm engines below. 557 + */ 558 + err = wait_for_existing_preempt_fences(vm); 559 + if (err) 560 + goto out_unlock; 561 + 562 + err = alloc_preempt_fences(vm, &preempt_fences, &fence_count); 563 + if (err) 564 + goto out_unlock; 565 + 566 + list_for_each_entry(vma, &vm->rebind_list, rebind_link) { 567 + if (xe_vma_is_userptr(vma) || vma->destroyed) 568 + continue; 569 + 570 + err = xe_bo_validate(vma->bo, vm, false); 571 + if (err) 572 + goto out_unlock; 573 + } 574 + 575 + rebind_fence = xe_vm_rebind(vm, true); 576 + if (IS_ERR(rebind_fence)) { 577 + err = PTR_ERR(rebind_fence); 578 + goto out_unlock; 579 + } 580 + 581 + if (rebind_fence) { 582 + dma_fence_wait(rebind_fence, false); 583 + dma_fence_put(rebind_fence); 584 + } 585 + 586 + /* Wait on munmap style VM unbinds */ 587 + wait = dma_resv_wait_timeout(&vm->resv, 588 + DMA_RESV_USAGE_KERNEL, 589 + false, MAX_SCHEDULE_TIMEOUT); 590 + if (wait <= 0) { 591 + err = -ETIME; 592 + goto out_unlock; 593 + } 594 + 595 + #define retry_required(__tries, __vm) \ 596 + (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \ 597 + (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \ 598 + __xe_vm_userptr_needs_repin(__vm)) 599 + 600 + down_read(&vm->userptr.notifier_lock); 601 + if (retry_required(tries, vm)) { 602 + up_read(&vm->userptr.notifier_lock); 603 + err = -EAGAIN; 604 + goto out_unlock; 605 + } 606 + 607 + #undef retry_required 608 + 609 + /* Point of no return. */ 610 + arm_preempt_fences(vm, &preempt_fences); 611 + resume_and_reinstall_preempt_fences(vm); 612 + up_read(&vm->userptr.notifier_lock); 613 + 614 + out_unlock: 615 + xe_vm_unlock_dma_resv(vm, tv_onstack, tv, &ww, &objs); 616 + out_unlock_outer: 617 + if (err == -EAGAIN) { 618 + trace_xe_vm_rebind_worker_retry(vm); 619 + goto retry; 620 + } 621 + up_write(&vm->lock); 622 + 623 + free_preempt_fences(&preempt_fences); 624 + 625 + XE_WARN_ON(err < 0); /* TODO: Kill VM or put in error state */ 626 + trace_xe_vm_rebind_worker_exit(vm); 627 + } 628 + 629 + struct async_op_fence; 630 + static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, 631 + struct xe_engine *e, struct xe_sync_entry *syncs, 632 + u32 num_syncs, struct async_op_fence *afence); 633 + 634 + static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, 635 + const struct mmu_notifier_range *range, 636 + unsigned long cur_seq) 637 + { 638 + struct xe_vma *vma = container_of(mni, struct xe_vma, userptr.notifier); 639 + struct xe_vm *vm = vma->vm; 640 + struct dma_resv_iter cursor; 641 + struct dma_fence *fence; 642 + long err; 643 + 644 + XE_BUG_ON(!xe_vma_is_userptr(vma)); 645 + trace_xe_vma_userptr_invalidate(vma); 646 + 647 + if (!mmu_notifier_range_blockable(range)) 648 + return false; 649 + 650 + down_write(&vm->userptr.notifier_lock); 651 + mmu_interval_set_seq(mni, cur_seq); 652 + 653 + /* No need to stop gpu access if the userptr is not yet bound. */ 654 + if (!vma->userptr.initial_bind) { 655 + up_write(&vm->userptr.notifier_lock); 656 + return true; 657 + } 658 + 659 + /* 660 + * Tell exec and rebind worker they need to repin and rebind this 661 + * userptr. 662 + */ 663 + if (!xe_vm_in_fault_mode(vm) && !vma->destroyed && vma->gt_present) { 664 + spin_lock(&vm->userptr.invalidated_lock); 665 + list_move_tail(&vma->userptr.invalidate_link, 666 + &vm->userptr.invalidated); 667 + spin_unlock(&vm->userptr.invalidated_lock); 668 + } 669 + 670 + up_write(&vm->userptr.notifier_lock); 671 + 672 + /* 673 + * Preempt fences turn into schedule disables, pipeline these. 674 + * Note that even in fault mode, we need to wait for binds and 675 + * unbinds to complete, and those are attached as BOOKMARK fences 676 + * to the vm. 677 + */ 678 + dma_resv_iter_begin(&cursor, &vm->resv, 679 + DMA_RESV_USAGE_BOOKKEEP); 680 + dma_resv_for_each_fence_unlocked(&cursor, fence) 681 + dma_fence_enable_sw_signaling(fence); 682 + dma_resv_iter_end(&cursor); 683 + 684 + err = dma_resv_wait_timeout(&vm->resv, 685 + DMA_RESV_USAGE_BOOKKEEP, 686 + false, MAX_SCHEDULE_TIMEOUT); 687 + XE_WARN_ON(err <= 0); 688 + 689 + if (xe_vm_in_fault_mode(vm)) { 690 + err = xe_vm_invalidate_vma(vma); 691 + XE_WARN_ON(err); 692 + } 693 + 694 + trace_xe_vma_userptr_invalidate_complete(vma); 695 + 696 + return true; 697 + } 698 + 699 + static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { 700 + .invalidate = vma_userptr_invalidate, 701 + }; 702 + 703 + int xe_vm_userptr_pin(struct xe_vm *vm) 704 + { 705 + struct xe_vma *vma, *next; 706 + int err = 0; 707 + LIST_HEAD(tmp_evict); 708 + 709 + lockdep_assert_held_write(&vm->lock); 710 + 711 + /* Collect invalidated userptrs */ 712 + spin_lock(&vm->userptr.invalidated_lock); 713 + list_for_each_entry_safe(vma, next, &vm->userptr.invalidated, 714 + userptr.invalidate_link) { 715 + list_del_init(&vma->userptr.invalidate_link); 716 + list_move_tail(&vma->userptr_link, &vm->userptr.repin_list); 717 + } 718 + spin_unlock(&vm->userptr.invalidated_lock); 719 + 720 + /* Pin and move to temporary list */ 721 + list_for_each_entry_safe(vma, next, &vm->userptr.repin_list, userptr_link) { 722 + err = xe_vma_userptr_pin_pages(vma); 723 + if (err < 0) 724 + goto out_err; 725 + 726 + list_move_tail(&vma->userptr_link, &tmp_evict); 727 + } 728 + 729 + /* Take lock and move to rebind_list for rebinding. */ 730 + err = dma_resv_lock_interruptible(&vm->resv, NULL); 731 + if (err) 732 + goto out_err; 733 + 734 + list_for_each_entry_safe(vma, next, &tmp_evict, userptr_link) { 735 + list_del_init(&vma->userptr_link); 736 + list_move_tail(&vma->rebind_link, &vm->rebind_list); 737 + } 738 + 739 + dma_resv_unlock(&vm->resv); 740 + 741 + return 0; 742 + 743 + out_err: 744 + list_splice_tail(&tmp_evict, &vm->userptr.repin_list); 745 + 746 + return err; 747 + } 748 + 749 + /** 750 + * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs 751 + * that need repinning. 752 + * @vm: The VM. 753 + * 754 + * This function does an advisory check for whether the VM has userptrs that 755 + * need repinning. 756 + * 757 + * Return: 0 if there are no indications of userptrs needing repinning, 758 + * -EAGAIN if there are. 759 + */ 760 + int xe_vm_userptr_check_repin(struct xe_vm *vm) 761 + { 762 + return (list_empty_careful(&vm->userptr.repin_list) && 763 + list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN; 764 + } 765 + 766 + static struct dma_fence * 767 + xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e, 768 + struct xe_sync_entry *syncs, u32 num_syncs); 769 + 770 + struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) 771 + { 772 + struct dma_fence *fence = NULL; 773 + struct xe_vma *vma, *next; 774 + 775 + lockdep_assert_held(&vm->lock); 776 + if (xe_vm_no_dma_fences(vm) && !rebind_worker) 777 + return NULL; 778 + 779 + xe_vm_assert_held(vm); 780 + list_for_each_entry_safe(vma, next, &vm->rebind_list, rebind_link) { 781 + XE_WARN_ON(!vma->gt_present); 782 + 783 + list_del_init(&vma->rebind_link); 784 + dma_fence_put(fence); 785 + if (rebind_worker) 786 + trace_xe_vma_rebind_worker(vma); 787 + else 788 + trace_xe_vma_rebind_exec(vma); 789 + fence = xe_vm_bind_vma(vma, NULL, NULL, 0); 790 + if (IS_ERR(fence)) 791 + return fence; 792 + } 793 + 794 + return fence; 795 + } 796 + 797 + static struct xe_vma *xe_vma_create(struct xe_vm *vm, 798 + struct xe_bo *bo, 799 + u64 bo_offset_or_userptr, 800 + u64 start, u64 end, 801 + bool read_only, 802 + u64 gt_mask) 803 + { 804 + struct xe_vma *vma; 805 + struct xe_gt *gt; 806 + u8 id; 807 + 808 + XE_BUG_ON(start >= end); 809 + XE_BUG_ON(end >= vm->size); 810 + 811 + vma = kzalloc(sizeof(*vma), GFP_KERNEL); 812 + if (!vma) { 813 + vma = ERR_PTR(-ENOMEM); 814 + return vma; 815 + } 816 + 817 + INIT_LIST_HEAD(&vma->rebind_link); 818 + INIT_LIST_HEAD(&vma->unbind_link); 819 + INIT_LIST_HEAD(&vma->userptr_link); 820 + INIT_LIST_HEAD(&vma->userptr.invalidate_link); 821 + INIT_LIST_HEAD(&vma->notifier.rebind_link); 822 + INIT_LIST_HEAD(&vma->extobj.link); 823 + 824 + vma->vm = vm; 825 + vma->start = start; 826 + vma->end = end; 827 + if (read_only) 828 + vma->pte_flags = PTE_READ_ONLY; 829 + 830 + if (gt_mask) { 831 + vma->gt_mask = gt_mask; 832 + } else { 833 + for_each_gt(gt, vm->xe, id) 834 + if (!xe_gt_is_media_type(gt)) 835 + vma->gt_mask |= 0x1 << id; 836 + } 837 + 838 + if (vm->xe->info.platform == XE_PVC) 839 + vma->use_atomic_access_pte_bit = true; 840 + 841 + if (bo) { 842 + xe_bo_assert_held(bo); 843 + vma->bo_offset = bo_offset_or_userptr; 844 + vma->bo = xe_bo_get(bo); 845 + list_add_tail(&vma->bo_link, &bo->vmas); 846 + } else /* userptr */ { 847 + u64 size = end - start + 1; 848 + int err; 849 + 850 + vma->userptr.ptr = bo_offset_or_userptr; 851 + 852 + err = mmu_interval_notifier_insert(&vma->userptr.notifier, 853 + current->mm, 854 + vma->userptr.ptr, size, 855 + &vma_userptr_notifier_ops); 856 + if (err) { 857 + kfree(vma); 858 + vma = ERR_PTR(err); 859 + return vma; 860 + } 861 + 862 + vma->userptr.notifier_seq = LONG_MAX; 863 + xe_vm_get(vm); 864 + } 865 + 866 + return vma; 867 + } 868 + 869 + static bool vm_remove_extobj(struct xe_vma *vma) 870 + { 871 + if (!list_empty(&vma->extobj.link)) { 872 + vma->vm->extobj.entries--; 873 + list_del_init(&vma->extobj.link); 874 + return true; 875 + } 876 + return false; 877 + } 878 + 879 + static void xe_vma_destroy_late(struct xe_vma *vma) 880 + { 881 + struct xe_vm *vm = vma->vm; 882 + struct xe_device *xe = vm->xe; 883 + bool read_only = vma->pte_flags & PTE_READ_ONLY; 884 + 885 + if (xe_vma_is_userptr(vma)) { 886 + if (vma->userptr.sg) { 887 + dma_unmap_sgtable(xe->drm.dev, 888 + vma->userptr.sg, 889 + read_only ? DMA_TO_DEVICE : 890 + DMA_BIDIRECTIONAL, 0); 891 + sg_free_table(vma->userptr.sg); 892 + vma->userptr.sg = NULL; 893 + } 894 + 895 + /* 896 + * Since userptr pages are not pinned, we can't remove 897 + * the notifer until we're sure the GPU is not accessing 898 + * them anymore 899 + */ 900 + mmu_interval_notifier_remove(&vma->userptr.notifier); 901 + xe_vm_put(vm); 902 + } else { 903 + xe_bo_put(vma->bo); 904 + } 905 + 906 + kfree(vma); 907 + } 908 + 909 + static void vma_destroy_work_func(struct work_struct *w) 910 + { 911 + struct xe_vma *vma = 912 + container_of(w, struct xe_vma, destroy_work); 913 + 914 + xe_vma_destroy_late(vma); 915 + } 916 + 917 + static struct xe_vma * 918 + bo_has_vm_references_locked(struct xe_bo *bo, struct xe_vm *vm, 919 + struct xe_vma *ignore) 920 + { 921 + struct xe_vma *vma; 922 + 923 + list_for_each_entry(vma, &bo->vmas, bo_link) { 924 + if (vma != ignore && vma->vm == vm && !vma->destroyed) 925 + return vma; 926 + } 927 + 928 + return NULL; 929 + } 930 + 931 + static bool bo_has_vm_references(struct xe_bo *bo, struct xe_vm *vm, 932 + struct xe_vma *ignore) 933 + { 934 + struct ww_acquire_ctx ww; 935 + bool ret; 936 + 937 + xe_bo_lock(bo, &ww, 0, false); 938 + ret = !!bo_has_vm_references_locked(bo, vm, ignore); 939 + xe_bo_unlock(bo, &ww); 940 + 941 + return ret; 942 + } 943 + 944 + static void __vm_insert_extobj(struct xe_vm *vm, struct xe_vma *vma) 945 + { 946 + list_add(&vma->extobj.link, &vm->extobj.list); 947 + vm->extobj.entries++; 948 + } 949 + 950 + static void vm_insert_extobj(struct xe_vm *vm, struct xe_vma *vma) 951 + { 952 + struct xe_bo *bo = vma->bo; 953 + 954 + lockdep_assert_held_write(&vm->lock); 955 + 956 + if (bo_has_vm_references(bo, vm, vma)) 957 + return; 958 + 959 + __vm_insert_extobj(vm, vma); 960 + } 961 + 962 + static void vma_destroy_cb(struct dma_fence *fence, 963 + struct dma_fence_cb *cb) 964 + { 965 + struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb); 966 + 967 + INIT_WORK(&vma->destroy_work, vma_destroy_work_func); 968 + queue_work(system_unbound_wq, &vma->destroy_work); 969 + } 970 + 971 + static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) 972 + { 973 + struct xe_vm *vm = vma->vm; 974 + 975 + lockdep_assert_held_write(&vm->lock); 976 + XE_BUG_ON(!list_empty(&vma->unbind_link)); 977 + 978 + if (xe_vma_is_userptr(vma)) { 979 + XE_WARN_ON(!vma->destroyed); 980 + spin_lock(&vm->userptr.invalidated_lock); 981 + list_del_init(&vma->userptr.invalidate_link); 982 + spin_unlock(&vm->userptr.invalidated_lock); 983 + list_del(&vma->userptr_link); 984 + } else { 985 + xe_bo_assert_held(vma->bo); 986 + list_del(&vma->bo_link); 987 + 988 + spin_lock(&vm->notifier.list_lock); 989 + list_del(&vma->notifier.rebind_link); 990 + spin_unlock(&vm->notifier.list_lock); 991 + 992 + if (!vma->bo->vm && vm_remove_extobj(vma)) { 993 + struct xe_vma *other; 994 + 995 + other = bo_has_vm_references_locked(vma->bo, vm, NULL); 996 + 997 + if (other) 998 + __vm_insert_extobj(vm, other); 999 + } 1000 + } 1001 + 1002 + xe_vm_assert_held(vm); 1003 + if (!list_empty(&vma->rebind_link)) 1004 + list_del(&vma->rebind_link); 1005 + 1006 + if (fence) { 1007 + int ret = dma_fence_add_callback(fence, &vma->destroy_cb, 1008 + vma_destroy_cb); 1009 + 1010 + if (ret) { 1011 + XE_WARN_ON(ret != -ENOENT); 1012 + xe_vma_destroy_late(vma); 1013 + } 1014 + } else { 1015 + xe_vma_destroy_late(vma); 1016 + } 1017 + } 1018 + 1019 + static void xe_vma_destroy_unlocked(struct xe_vma *vma) 1020 + { 1021 + struct ttm_validate_buffer tv[2]; 1022 + struct ww_acquire_ctx ww; 1023 + struct xe_bo *bo = vma->bo; 1024 + LIST_HEAD(objs); 1025 + LIST_HEAD(dups); 1026 + int err; 1027 + 1028 + memset(tv, 0, sizeof(tv)); 1029 + tv[0].bo = xe_vm_ttm_bo(vma->vm); 1030 + list_add(&tv[0].head, &objs); 1031 + 1032 + if (bo) { 1033 + tv[1].bo = &xe_bo_get(bo)->ttm; 1034 + list_add(&tv[1].head, &objs); 1035 + } 1036 + err = ttm_eu_reserve_buffers(&ww, &objs, false, &dups); 1037 + XE_WARN_ON(err); 1038 + 1039 + xe_vma_destroy(vma, NULL); 1040 + 1041 + ttm_eu_backoff_reservation(&ww, &objs); 1042 + if (bo) 1043 + xe_bo_put(bo); 1044 + } 1045 + 1046 + static struct xe_vma *to_xe_vma(const struct rb_node *node) 1047 + { 1048 + BUILD_BUG_ON(offsetof(struct xe_vma, vm_node) != 0); 1049 + return (struct xe_vma *)node; 1050 + } 1051 + 1052 + static int xe_vma_cmp(const struct xe_vma *a, const struct xe_vma *b) 1053 + { 1054 + if (a->end < b->start) { 1055 + return -1; 1056 + } else if (b->end < a->start) { 1057 + return 1; 1058 + } else { 1059 + return 0; 1060 + } 1061 + } 1062 + 1063 + static bool xe_vma_less_cb(struct rb_node *a, const struct rb_node *b) 1064 + { 1065 + return xe_vma_cmp(to_xe_vma(a), to_xe_vma(b)) < 0; 1066 + } 1067 + 1068 + int xe_vma_cmp_vma_cb(const void *key, const struct rb_node *node) 1069 + { 1070 + struct xe_vma *cmp = to_xe_vma(node); 1071 + const struct xe_vma *own = key; 1072 + 1073 + if (own->start > cmp->end) 1074 + return 1; 1075 + 1076 + if (own->end < cmp->start) 1077 + return -1; 1078 + 1079 + return 0; 1080 + } 1081 + 1082 + struct xe_vma * 1083 + xe_vm_find_overlapping_vma(struct xe_vm *vm, const struct xe_vma *vma) 1084 + { 1085 + struct rb_node *node; 1086 + 1087 + if (xe_vm_is_closed(vm)) 1088 + return NULL; 1089 + 1090 + XE_BUG_ON(vma->end >= vm->size); 1091 + lockdep_assert_held(&vm->lock); 1092 + 1093 + node = rb_find(vma, &vm->vmas, xe_vma_cmp_vma_cb); 1094 + 1095 + return node ? to_xe_vma(node) : NULL; 1096 + } 1097 + 1098 + static void xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) 1099 + { 1100 + XE_BUG_ON(vma->vm != vm); 1101 + lockdep_assert_held(&vm->lock); 1102 + 1103 + rb_add(&vma->vm_node, &vm->vmas, xe_vma_less_cb); 1104 + } 1105 + 1106 + static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma) 1107 + { 1108 + XE_BUG_ON(vma->vm != vm); 1109 + lockdep_assert_held(&vm->lock); 1110 + 1111 + rb_erase(&vma->vm_node, &vm->vmas); 1112 + if (vm->usm.last_fault_vma == vma) 1113 + vm->usm.last_fault_vma = NULL; 1114 + } 1115 + 1116 + static void async_op_work_func(struct work_struct *w); 1117 + static void vm_destroy_work_func(struct work_struct *w); 1118 + 1119 + struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) 1120 + { 1121 + struct xe_vm *vm; 1122 + int err, i = 0, number_gts = 0; 1123 + struct xe_gt *gt; 1124 + u8 id; 1125 + 1126 + vm = kzalloc(sizeof(*vm), GFP_KERNEL); 1127 + if (!vm) 1128 + return ERR_PTR(-ENOMEM); 1129 + 1130 + vm->xe = xe; 1131 + kref_init(&vm->refcount); 1132 + dma_resv_init(&vm->resv); 1133 + 1134 + vm->size = 1ull << xe_pt_shift(xe->info.vm_max_level + 1); 1135 + 1136 + vm->vmas = RB_ROOT; 1137 + vm->flags = flags; 1138 + 1139 + init_rwsem(&vm->lock); 1140 + 1141 + INIT_LIST_HEAD(&vm->rebind_list); 1142 + 1143 + INIT_LIST_HEAD(&vm->userptr.repin_list); 1144 + INIT_LIST_HEAD(&vm->userptr.invalidated); 1145 + init_rwsem(&vm->userptr.notifier_lock); 1146 + spin_lock_init(&vm->userptr.invalidated_lock); 1147 + 1148 + INIT_LIST_HEAD(&vm->notifier.rebind_list); 1149 + spin_lock_init(&vm->notifier.list_lock); 1150 + 1151 + INIT_LIST_HEAD(&vm->async_ops.pending); 1152 + INIT_WORK(&vm->async_ops.work, async_op_work_func); 1153 + spin_lock_init(&vm->async_ops.lock); 1154 + 1155 + INIT_WORK(&vm->destroy_work, vm_destroy_work_func); 1156 + 1157 + INIT_LIST_HEAD(&vm->preempt.engines); 1158 + vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */ 1159 + 1160 + INIT_LIST_HEAD(&vm->extobj.list); 1161 + 1162 + if (!(flags & XE_VM_FLAG_MIGRATION)) { 1163 + /* We need to immeditatelly exit from any D3 state */ 1164 + xe_pm_runtime_get(xe); 1165 + xe_device_mem_access_get(xe); 1166 + } 1167 + 1168 + err = dma_resv_lock_interruptible(&vm->resv, NULL); 1169 + if (err) 1170 + goto err_put; 1171 + 1172 + if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) 1173 + vm->flags |= XE_VM_FLAGS_64K; 1174 + 1175 + for_each_gt(gt, xe, id) { 1176 + if (xe_gt_is_media_type(gt)) 1177 + continue; 1178 + 1179 + if (flags & XE_VM_FLAG_MIGRATION && 1180 + gt->info.id != XE_VM_FLAG_GT_ID(flags)) 1181 + continue; 1182 + 1183 + vm->pt_root[id] = xe_pt_create(vm, gt, xe->info.vm_max_level); 1184 + if (IS_ERR(vm->pt_root[id])) { 1185 + err = PTR_ERR(vm->pt_root[id]); 1186 + vm->pt_root[id] = NULL; 1187 + goto err_destroy_root; 1188 + } 1189 + } 1190 + 1191 + if (flags & XE_VM_FLAG_SCRATCH_PAGE) { 1192 + for_each_gt(gt, xe, id) { 1193 + if (!vm->pt_root[id]) 1194 + continue; 1195 + 1196 + err = xe_pt_create_scratch(xe, gt, vm); 1197 + if (err) 1198 + goto err_scratch_pt; 1199 + } 1200 + } 1201 + 1202 + if (flags & DRM_XE_VM_CREATE_COMPUTE_MODE) { 1203 + INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); 1204 + vm->flags |= XE_VM_FLAG_COMPUTE_MODE; 1205 + } 1206 + 1207 + if (flags & DRM_XE_VM_CREATE_ASYNC_BIND_OPS) { 1208 + vm->async_ops.fence.context = dma_fence_context_alloc(1); 1209 + vm->flags |= XE_VM_FLAG_ASYNC_BIND_OPS; 1210 + } 1211 + 1212 + /* Fill pt_root after allocating scratch tables */ 1213 + for_each_gt(gt, xe, id) { 1214 + if (!vm->pt_root[id]) 1215 + continue; 1216 + 1217 + xe_pt_populate_empty(gt, vm, vm->pt_root[id]); 1218 + } 1219 + dma_resv_unlock(&vm->resv); 1220 + 1221 + /* Kernel migration VM shouldn't have a circular loop.. */ 1222 + if (!(flags & XE_VM_FLAG_MIGRATION)) { 1223 + for_each_gt(gt, xe, id) { 1224 + struct xe_vm *migrate_vm; 1225 + struct xe_engine *eng; 1226 + 1227 + if (!vm->pt_root[id]) 1228 + continue; 1229 + 1230 + migrate_vm = xe_migrate_get_vm(gt->migrate); 1231 + eng = xe_engine_create_class(xe, gt, migrate_vm, 1232 + XE_ENGINE_CLASS_COPY, 1233 + ENGINE_FLAG_VM); 1234 + xe_vm_put(migrate_vm); 1235 + if (IS_ERR(eng)) { 1236 + xe_vm_close_and_put(vm); 1237 + return ERR_CAST(eng); 1238 + } 1239 + vm->eng[id] = eng; 1240 + number_gts++; 1241 + } 1242 + } 1243 + 1244 + if (number_gts > 1) 1245 + vm->composite_fence_ctx = dma_fence_context_alloc(1); 1246 + 1247 + mutex_lock(&xe->usm.lock); 1248 + if (flags & XE_VM_FLAG_FAULT_MODE) 1249 + xe->usm.num_vm_in_fault_mode++; 1250 + else if (!(flags & XE_VM_FLAG_MIGRATION)) 1251 + xe->usm.num_vm_in_non_fault_mode++; 1252 + mutex_unlock(&xe->usm.lock); 1253 + 1254 + trace_xe_vm_create(vm); 1255 + 1256 + return vm; 1257 + 1258 + err_scratch_pt: 1259 + for_each_gt(gt, xe, id) { 1260 + if (!vm->pt_root[id]) 1261 + continue; 1262 + 1263 + i = vm->pt_root[id]->level; 1264 + while (i) 1265 + if (vm->scratch_pt[id][--i]) 1266 + xe_pt_destroy(vm->scratch_pt[id][i], 1267 + vm->flags, NULL); 1268 + xe_bo_unpin(vm->scratch_bo[id]); 1269 + xe_bo_put(vm->scratch_bo[id]); 1270 + } 1271 + err_destroy_root: 1272 + for_each_gt(gt, xe, id) { 1273 + if (vm->pt_root[id]) 1274 + xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); 1275 + } 1276 + dma_resv_unlock(&vm->resv); 1277 + err_put: 1278 + dma_resv_fini(&vm->resv); 1279 + kfree(vm); 1280 + if (!(flags & XE_VM_FLAG_MIGRATION)) { 1281 + xe_device_mem_access_put(xe); 1282 + xe_pm_runtime_put(xe); 1283 + } 1284 + return ERR_PTR(err); 1285 + } 1286 + 1287 + static void flush_async_ops(struct xe_vm *vm) 1288 + { 1289 + queue_work(system_unbound_wq, &vm->async_ops.work); 1290 + flush_work(&vm->async_ops.work); 1291 + } 1292 + 1293 + static void vm_error_capture(struct xe_vm *vm, int err, 1294 + u32 op, u64 addr, u64 size) 1295 + { 1296 + struct drm_xe_vm_bind_op_error_capture capture; 1297 + u64 __user *address = 1298 + u64_to_user_ptr(vm->async_ops.error_capture.addr); 1299 + bool in_kthread = !current->mm; 1300 + 1301 + capture.error = err; 1302 + capture.op = op; 1303 + capture.addr = addr; 1304 + capture.size = size; 1305 + 1306 + if (in_kthread) { 1307 + if (!mmget_not_zero(vm->async_ops.error_capture.mm)) 1308 + goto mm_closed; 1309 + kthread_use_mm(vm->async_ops.error_capture.mm); 1310 + } 1311 + 1312 + if (copy_to_user(address, &capture, sizeof(capture))) 1313 + XE_WARN_ON("Copy to user failed"); 1314 + 1315 + if (in_kthread) { 1316 + kthread_unuse_mm(vm->async_ops.error_capture.mm); 1317 + mmput(vm->async_ops.error_capture.mm); 1318 + } 1319 + 1320 + mm_closed: 1321 + wake_up_all(&vm->async_ops.error_capture.wq); 1322 + } 1323 + 1324 + void xe_vm_close_and_put(struct xe_vm *vm) 1325 + { 1326 + struct rb_root contested = RB_ROOT; 1327 + struct ww_acquire_ctx ww; 1328 + struct xe_device *xe = vm->xe; 1329 + struct xe_gt *gt; 1330 + u8 id; 1331 + 1332 + XE_BUG_ON(vm->preempt.num_engines); 1333 + 1334 + vm->size = 0; 1335 + smp_mb(); 1336 + flush_async_ops(vm); 1337 + if (xe_vm_in_compute_mode(vm)) 1338 + flush_work(&vm->preempt.rebind_work); 1339 + 1340 + for_each_gt(gt, xe, id) { 1341 + if (vm->eng[id]) { 1342 + xe_engine_kill(vm->eng[id]); 1343 + xe_engine_put(vm->eng[id]); 1344 + vm->eng[id] = NULL; 1345 + } 1346 + } 1347 + 1348 + down_write(&vm->lock); 1349 + xe_vm_lock(vm, &ww, 0, false); 1350 + while (vm->vmas.rb_node) { 1351 + struct xe_vma *vma = to_xe_vma(vm->vmas.rb_node); 1352 + 1353 + if (xe_vma_is_userptr(vma)) { 1354 + down_read(&vm->userptr.notifier_lock); 1355 + vma->destroyed = true; 1356 + up_read(&vm->userptr.notifier_lock); 1357 + } 1358 + 1359 + rb_erase(&vma->vm_node, &vm->vmas); 1360 + 1361 + /* easy case, remove from VMA? */ 1362 + if (xe_vma_is_userptr(vma) || vma->bo->vm) { 1363 + xe_vma_destroy(vma, NULL); 1364 + continue; 1365 + } 1366 + 1367 + rb_add(&vma->vm_node, &contested, xe_vma_less_cb); 1368 + } 1369 + 1370 + /* 1371 + * All vm operations will add shared fences to resv. 1372 + * The only exception is eviction for a shared object, 1373 + * but even so, the unbind when evicted would still 1374 + * install a fence to resv. Hence it's safe to 1375 + * destroy the pagetables immediately. 1376 + */ 1377 + for_each_gt(gt, xe, id) { 1378 + if (vm->scratch_bo[id]) { 1379 + u32 i; 1380 + 1381 + xe_bo_unpin(vm->scratch_bo[id]); 1382 + xe_bo_put(vm->scratch_bo[id]); 1383 + for (i = 0; i < vm->pt_root[id]->level; i++) 1384 + xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, 1385 + NULL); 1386 + } 1387 + } 1388 + xe_vm_unlock(vm, &ww); 1389 + 1390 + if (contested.rb_node) { 1391 + 1392 + /* 1393 + * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL 1394 + * Since we hold a refcount to the bo, we can remove and free 1395 + * the members safely without locking. 1396 + */ 1397 + while (contested.rb_node) { 1398 + struct xe_vma *vma = to_xe_vma(contested.rb_node); 1399 + 1400 + rb_erase(&vma->vm_node, &contested); 1401 + xe_vma_destroy_unlocked(vma); 1402 + } 1403 + } 1404 + 1405 + if (vm->async_ops.error_capture.addr) 1406 + wake_up_all(&vm->async_ops.error_capture.wq); 1407 + 1408 + XE_WARN_ON(!list_empty(&vm->extobj.list)); 1409 + up_write(&vm->lock); 1410 + 1411 + xe_vm_put(vm); 1412 + } 1413 + 1414 + static void vm_destroy_work_func(struct work_struct *w) 1415 + { 1416 + struct xe_vm *vm = 1417 + container_of(w, struct xe_vm, destroy_work); 1418 + struct ww_acquire_ctx ww; 1419 + struct xe_device *xe = vm->xe; 1420 + struct xe_gt *gt; 1421 + u8 id; 1422 + void *lookup; 1423 + 1424 + /* xe_vm_close_and_put was not called? */ 1425 + XE_WARN_ON(vm->size); 1426 + 1427 + if (!(vm->flags & XE_VM_FLAG_MIGRATION)) { 1428 + xe_device_mem_access_put(xe); 1429 + xe_pm_runtime_put(xe); 1430 + 1431 + mutex_lock(&xe->usm.lock); 1432 + lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); 1433 + XE_WARN_ON(lookup != vm); 1434 + mutex_unlock(&xe->usm.lock); 1435 + } 1436 + 1437 + /* 1438 + * XXX: We delay destroying the PT root until the VM if freed as PT root 1439 + * is needed for xe_vm_lock to work. If we remove that dependency this 1440 + * can be moved to xe_vm_close_and_put. 1441 + */ 1442 + xe_vm_lock(vm, &ww, 0, false); 1443 + for_each_gt(gt, xe, id) { 1444 + if (vm->pt_root[id]) { 1445 + xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); 1446 + vm->pt_root[id] = NULL; 1447 + } 1448 + } 1449 + xe_vm_unlock(vm, &ww); 1450 + 1451 + mutex_lock(&xe->usm.lock); 1452 + if (vm->flags & XE_VM_FLAG_FAULT_MODE) 1453 + xe->usm.num_vm_in_fault_mode--; 1454 + else if (!(vm->flags & XE_VM_FLAG_MIGRATION)) 1455 + xe->usm.num_vm_in_non_fault_mode--; 1456 + mutex_unlock(&xe->usm.lock); 1457 + 1458 + trace_xe_vm_free(vm); 1459 + dma_fence_put(vm->rebind_fence); 1460 + dma_resv_fini(&vm->resv); 1461 + kfree(vm); 1462 + 1463 + } 1464 + 1465 + void xe_vm_free(struct kref *ref) 1466 + { 1467 + struct xe_vm *vm = container_of(ref, struct xe_vm, refcount); 1468 + 1469 + /* To destroy the VM we need to be able to sleep */ 1470 + queue_work(system_unbound_wq, &vm->destroy_work); 1471 + } 1472 + 1473 + struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) 1474 + { 1475 + struct xe_vm *vm; 1476 + 1477 + mutex_lock(&xef->vm.lock); 1478 + vm = xa_load(&xef->vm.xa, id); 1479 + mutex_unlock(&xef->vm.lock); 1480 + 1481 + if (vm) 1482 + xe_vm_get(vm); 1483 + 1484 + return vm; 1485 + } 1486 + 1487 + u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_gt *full_gt) 1488 + { 1489 + XE_BUG_ON(xe_gt_is_media_type(full_gt)); 1490 + 1491 + return gen8_pde_encode(vm->pt_root[full_gt->info.id]->bo, 0, 1492 + XE_CACHE_WB); 1493 + } 1494 + 1495 + static struct dma_fence * 1496 + xe_vm_unbind_vma(struct xe_vma *vma, struct xe_engine *e, 1497 + struct xe_sync_entry *syncs, u32 num_syncs) 1498 + { 1499 + struct xe_gt *gt; 1500 + struct dma_fence *fence = NULL; 1501 + struct dma_fence **fences = NULL; 1502 + struct dma_fence_array *cf = NULL; 1503 + struct xe_vm *vm = vma->vm; 1504 + int cur_fence = 0, i; 1505 + int number_gts = hweight_long(vma->gt_present); 1506 + int err; 1507 + u8 id; 1508 + 1509 + trace_xe_vma_unbind(vma); 1510 + 1511 + if (number_gts > 1) { 1512 + fences = kmalloc_array(number_gts, sizeof(*fences), 1513 + GFP_KERNEL); 1514 + if (!fences) 1515 + return ERR_PTR(-ENOMEM); 1516 + } 1517 + 1518 + for_each_gt(gt, vm->xe, id) { 1519 + if (!(vma->gt_present & BIT(id))) 1520 + goto next; 1521 + 1522 + XE_BUG_ON(xe_gt_is_media_type(gt)); 1523 + 1524 + fence = __xe_pt_unbind_vma(gt, vma, e, syncs, num_syncs); 1525 + if (IS_ERR(fence)) { 1526 + err = PTR_ERR(fence); 1527 + goto err_fences; 1528 + } 1529 + 1530 + if (fences) 1531 + fences[cur_fence++] = fence; 1532 + 1533 + next: 1534 + if (e && vm->pt_root[id] && !list_empty(&e->multi_gt_list)) 1535 + e = list_next_entry(e, multi_gt_list); 1536 + } 1537 + 1538 + if (fences) { 1539 + cf = dma_fence_array_create(number_gts, fences, 1540 + vm->composite_fence_ctx, 1541 + vm->composite_fence_seqno++, 1542 + false); 1543 + if (!cf) { 1544 + --vm->composite_fence_seqno; 1545 + err = -ENOMEM; 1546 + goto err_fences; 1547 + } 1548 + } 1549 + 1550 + for (i = 0; i < num_syncs; i++) 1551 + xe_sync_entry_signal(&syncs[i], NULL, cf ? &cf->base : fence); 1552 + 1553 + return cf ? &cf->base : !fence ? dma_fence_get_stub() : fence; 1554 + 1555 + err_fences: 1556 + if (fences) { 1557 + while (cur_fence) { 1558 + /* FIXME: Rewind the previous binds? */ 1559 + dma_fence_put(fences[--cur_fence]); 1560 + } 1561 + kfree(fences); 1562 + } 1563 + 1564 + return ERR_PTR(err); 1565 + } 1566 + 1567 + static struct dma_fence * 1568 + xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e, 1569 + struct xe_sync_entry *syncs, u32 num_syncs) 1570 + { 1571 + struct xe_gt *gt; 1572 + struct dma_fence *fence; 1573 + struct dma_fence **fences = NULL; 1574 + struct dma_fence_array *cf = NULL; 1575 + struct xe_vm *vm = vma->vm; 1576 + int cur_fence = 0, i; 1577 + int number_gts = hweight_long(vma->gt_mask); 1578 + int err; 1579 + u8 id; 1580 + 1581 + trace_xe_vma_bind(vma); 1582 + 1583 + if (number_gts > 1) { 1584 + fences = kmalloc_array(number_gts, sizeof(*fences), 1585 + GFP_KERNEL); 1586 + if (!fences) 1587 + return ERR_PTR(-ENOMEM); 1588 + } 1589 + 1590 + for_each_gt(gt, vm->xe, id) { 1591 + if (!(vma->gt_mask & BIT(id))) 1592 + goto next; 1593 + 1594 + XE_BUG_ON(xe_gt_is_media_type(gt)); 1595 + fence = __xe_pt_bind_vma(gt, vma, e, syncs, num_syncs, 1596 + vma->gt_present & BIT(id)); 1597 + if (IS_ERR(fence)) { 1598 + err = PTR_ERR(fence); 1599 + goto err_fences; 1600 + } 1601 + 1602 + if (fences) 1603 + fences[cur_fence++] = fence; 1604 + 1605 + next: 1606 + if (e && vm->pt_root[id] && !list_empty(&e->multi_gt_list)) 1607 + e = list_next_entry(e, multi_gt_list); 1608 + } 1609 + 1610 + if (fences) { 1611 + cf = dma_fence_array_create(number_gts, fences, 1612 + vm->composite_fence_ctx, 1613 + vm->composite_fence_seqno++, 1614 + false); 1615 + if (!cf) { 1616 + --vm->composite_fence_seqno; 1617 + err = -ENOMEM; 1618 + goto err_fences; 1619 + } 1620 + } 1621 + 1622 + for (i = 0; i < num_syncs; i++) 1623 + xe_sync_entry_signal(&syncs[i], NULL, cf ? &cf->base : fence); 1624 + 1625 + return cf ? &cf->base : fence; 1626 + 1627 + err_fences: 1628 + if (fences) { 1629 + while (cur_fence) { 1630 + /* FIXME: Rewind the previous binds? */ 1631 + dma_fence_put(fences[--cur_fence]); 1632 + } 1633 + kfree(fences); 1634 + } 1635 + 1636 + return ERR_PTR(err); 1637 + } 1638 + 1639 + struct async_op_fence { 1640 + struct dma_fence fence; 1641 + struct dma_fence_cb cb; 1642 + struct xe_vm *vm; 1643 + wait_queue_head_t wq; 1644 + bool started; 1645 + }; 1646 + 1647 + static const char *async_op_fence_get_driver_name(struct dma_fence *dma_fence) 1648 + { 1649 + return "xe"; 1650 + } 1651 + 1652 + static const char * 1653 + async_op_fence_get_timeline_name(struct dma_fence *dma_fence) 1654 + { 1655 + return "async_op_fence"; 1656 + } 1657 + 1658 + static const struct dma_fence_ops async_op_fence_ops = { 1659 + .get_driver_name = async_op_fence_get_driver_name, 1660 + .get_timeline_name = async_op_fence_get_timeline_name, 1661 + }; 1662 + 1663 + static void async_op_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) 1664 + { 1665 + struct async_op_fence *afence = 1666 + container_of(cb, struct async_op_fence, cb); 1667 + 1668 + dma_fence_signal(&afence->fence); 1669 + xe_vm_put(afence->vm); 1670 + dma_fence_put(&afence->fence); 1671 + } 1672 + 1673 + static void add_async_op_fence_cb(struct xe_vm *vm, 1674 + struct dma_fence *fence, 1675 + struct async_op_fence *afence) 1676 + { 1677 + int ret; 1678 + 1679 + if (!xe_vm_no_dma_fences(vm)) { 1680 + afence->started = true; 1681 + smp_wmb(); 1682 + wake_up_all(&afence->wq); 1683 + } 1684 + 1685 + afence->vm = xe_vm_get(vm); 1686 + dma_fence_get(&afence->fence); 1687 + ret = dma_fence_add_callback(fence, &afence->cb, async_op_fence_cb); 1688 + if (ret == -ENOENT) 1689 + dma_fence_signal(&afence->fence); 1690 + if (ret) { 1691 + xe_vm_put(vm); 1692 + dma_fence_put(&afence->fence); 1693 + } 1694 + XE_WARN_ON(ret && ret != -ENOENT); 1695 + } 1696 + 1697 + int xe_vm_async_fence_wait_start(struct dma_fence *fence) 1698 + { 1699 + if (fence->ops == &async_op_fence_ops) { 1700 + struct async_op_fence *afence = 1701 + container_of(fence, struct async_op_fence, fence); 1702 + 1703 + XE_BUG_ON(xe_vm_no_dma_fences(afence->vm)); 1704 + 1705 + smp_rmb(); 1706 + return wait_event_interruptible(afence->wq, afence->started); 1707 + } 1708 + 1709 + return 0; 1710 + } 1711 + 1712 + static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, 1713 + struct xe_engine *e, struct xe_sync_entry *syncs, 1714 + u32 num_syncs, struct async_op_fence *afence) 1715 + { 1716 + struct dma_fence *fence; 1717 + 1718 + xe_vm_assert_held(vm); 1719 + 1720 + fence = xe_vm_bind_vma(vma, e, syncs, num_syncs); 1721 + if (IS_ERR(fence)) 1722 + return PTR_ERR(fence); 1723 + if (afence) 1724 + add_async_op_fence_cb(vm, fence, afence); 1725 + 1726 + dma_fence_put(fence); 1727 + return 0; 1728 + } 1729 + 1730 + static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_engine *e, 1731 + struct xe_bo *bo, struct xe_sync_entry *syncs, 1732 + u32 num_syncs, struct async_op_fence *afence) 1733 + { 1734 + int err; 1735 + 1736 + xe_vm_assert_held(vm); 1737 + xe_bo_assert_held(bo); 1738 + 1739 + if (bo) { 1740 + err = xe_bo_validate(bo, vm, true); 1741 + if (err) 1742 + return err; 1743 + } 1744 + 1745 + return __xe_vm_bind(vm, vma, e, syncs, num_syncs, afence); 1746 + } 1747 + 1748 + static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma, 1749 + struct xe_engine *e, struct xe_sync_entry *syncs, 1750 + u32 num_syncs, struct async_op_fence *afence) 1751 + { 1752 + struct dma_fence *fence; 1753 + 1754 + xe_vm_assert_held(vm); 1755 + xe_bo_assert_held(vma->bo); 1756 + 1757 + fence = xe_vm_unbind_vma(vma, e, syncs, num_syncs); 1758 + if (IS_ERR(fence)) 1759 + return PTR_ERR(fence); 1760 + if (afence) 1761 + add_async_op_fence_cb(vm, fence, afence); 1762 + 1763 + xe_vma_destroy(vma, fence); 1764 + dma_fence_put(fence); 1765 + 1766 + return 0; 1767 + } 1768 + 1769 + static int vm_set_error_capture_address(struct xe_device *xe, struct xe_vm *vm, 1770 + u64 value) 1771 + { 1772 + if (XE_IOCTL_ERR(xe, !value)) 1773 + return -EINVAL; 1774 + 1775 + if (XE_IOCTL_ERR(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) 1776 + return -ENOTSUPP; 1777 + 1778 + if (XE_IOCTL_ERR(xe, vm->async_ops.error_capture.addr)) 1779 + return -ENOTSUPP; 1780 + 1781 + vm->async_ops.error_capture.mm = current->mm; 1782 + vm->async_ops.error_capture.addr = value; 1783 + init_waitqueue_head(&vm->async_ops.error_capture.wq); 1784 + 1785 + return 0; 1786 + } 1787 + 1788 + typedef int (*xe_vm_set_property_fn)(struct xe_device *xe, struct xe_vm *vm, 1789 + u64 value); 1790 + 1791 + static const xe_vm_set_property_fn vm_set_property_funcs[] = { 1792 + [XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS] = 1793 + vm_set_error_capture_address, 1794 + }; 1795 + 1796 + static int vm_user_ext_set_property(struct xe_device *xe, struct xe_vm *vm, 1797 + u64 extension) 1798 + { 1799 + u64 __user *address = u64_to_user_ptr(extension); 1800 + struct drm_xe_ext_vm_set_property ext; 1801 + int err; 1802 + 1803 + err = __copy_from_user(&ext, address, sizeof(ext)); 1804 + if (XE_IOCTL_ERR(xe, err)) 1805 + return -EFAULT; 1806 + 1807 + if (XE_IOCTL_ERR(xe, ext.property >= 1808 + ARRAY_SIZE(vm_set_property_funcs))) 1809 + return -EINVAL; 1810 + 1811 + return vm_set_property_funcs[ext.property](xe, vm, ext.value); 1812 + } 1813 + 1814 + typedef int (*xe_vm_user_extension_fn)(struct xe_device *xe, struct xe_vm *vm, 1815 + u64 extension); 1816 + 1817 + static const xe_vm_set_property_fn vm_user_extension_funcs[] = { 1818 + [XE_VM_EXTENSION_SET_PROPERTY] = vm_user_ext_set_property, 1819 + }; 1820 + 1821 + #define MAX_USER_EXTENSIONS 16 1822 + static int vm_user_extensions(struct xe_device *xe, struct xe_vm *vm, 1823 + u64 extensions, int ext_number) 1824 + { 1825 + u64 __user *address = u64_to_user_ptr(extensions); 1826 + struct xe_user_extension ext; 1827 + int err; 1828 + 1829 + if (XE_IOCTL_ERR(xe, ext_number >= MAX_USER_EXTENSIONS)) 1830 + return -E2BIG; 1831 + 1832 + err = __copy_from_user(&ext, address, sizeof(ext)); 1833 + if (XE_IOCTL_ERR(xe, err)) 1834 + return -EFAULT; 1835 + 1836 + if (XE_IOCTL_ERR(xe, ext.name >= 1837 + ARRAY_SIZE(vm_user_extension_funcs))) 1838 + return -EINVAL; 1839 + 1840 + err = vm_user_extension_funcs[ext.name](xe, vm, extensions); 1841 + if (XE_IOCTL_ERR(xe, err)) 1842 + return err; 1843 + 1844 + if (ext.next_extension) 1845 + return vm_user_extensions(xe, vm, ext.next_extension, 1846 + ++ext_number); 1847 + 1848 + return 0; 1849 + } 1850 + 1851 + #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_SCRATCH_PAGE | \ 1852 + DRM_XE_VM_CREATE_COMPUTE_MODE | \ 1853 + DRM_XE_VM_CREATE_ASYNC_BIND_OPS | \ 1854 + DRM_XE_VM_CREATE_FAULT_MODE) 1855 + 1856 + int xe_vm_create_ioctl(struct drm_device *dev, void *data, 1857 + struct drm_file *file) 1858 + { 1859 + struct xe_device *xe = to_xe_device(dev); 1860 + struct xe_file *xef = to_xe_file(file); 1861 + struct drm_xe_vm_create *args = data; 1862 + struct xe_vm *vm; 1863 + u32 id, asid; 1864 + int err; 1865 + u32 flags = 0; 1866 + 1867 + if (XE_IOCTL_ERR(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS)) 1868 + return -EINVAL; 1869 + 1870 + if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_SCRATCH_PAGE && 1871 + args->flags & DRM_XE_VM_CREATE_FAULT_MODE)) 1872 + return -EINVAL; 1873 + 1874 + if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_COMPUTE_MODE && 1875 + args->flags & DRM_XE_VM_CREATE_FAULT_MODE)) 1876 + return -EINVAL; 1877 + 1878 + if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE && 1879 + xe_device_in_non_fault_mode(xe))) 1880 + return -EINVAL; 1881 + 1882 + if (XE_IOCTL_ERR(xe, !(args->flags & DRM_XE_VM_CREATE_FAULT_MODE) && 1883 + xe_device_in_fault_mode(xe))) 1884 + return -EINVAL; 1885 + 1886 + if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE && 1887 + !xe->info.supports_usm)) 1888 + return -EINVAL; 1889 + 1890 + if (args->flags & DRM_XE_VM_CREATE_SCRATCH_PAGE) 1891 + flags |= XE_VM_FLAG_SCRATCH_PAGE; 1892 + if (args->flags & DRM_XE_VM_CREATE_COMPUTE_MODE) 1893 + flags |= XE_VM_FLAG_COMPUTE_MODE; 1894 + if (args->flags & DRM_XE_VM_CREATE_ASYNC_BIND_OPS) 1895 + flags |= XE_VM_FLAG_ASYNC_BIND_OPS; 1896 + if (args->flags & DRM_XE_VM_CREATE_FAULT_MODE) 1897 + flags |= XE_VM_FLAG_FAULT_MODE; 1898 + 1899 + vm = xe_vm_create(xe, flags); 1900 + if (IS_ERR(vm)) 1901 + return PTR_ERR(vm); 1902 + 1903 + if (args->extensions) { 1904 + err = vm_user_extensions(xe, vm, args->extensions, 0); 1905 + if (XE_IOCTL_ERR(xe, err)) { 1906 + xe_vm_close_and_put(vm); 1907 + return err; 1908 + } 1909 + } 1910 + 1911 + mutex_lock(&xef->vm.lock); 1912 + err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); 1913 + mutex_unlock(&xef->vm.lock); 1914 + if (err) { 1915 + xe_vm_close_and_put(vm); 1916 + return err; 1917 + } 1918 + 1919 + mutex_lock(&xe->usm.lock); 1920 + err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, 1921 + XA_LIMIT(0, XE_MAX_ASID - 1), 1922 + &xe->usm.next_asid, GFP_KERNEL); 1923 + mutex_unlock(&xe->usm.lock); 1924 + if (err) { 1925 + xe_vm_close_and_put(vm); 1926 + return err; 1927 + } 1928 + vm->usm.asid = asid; 1929 + 1930 + args->vm_id = id; 1931 + 1932 + #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) 1933 + /* Warning: Security issue - never enable by default */ 1934 + args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, GEN8_PAGE_SIZE); 1935 + #endif 1936 + 1937 + return 0; 1938 + } 1939 + 1940 + int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, 1941 + struct drm_file *file) 1942 + { 1943 + struct xe_device *xe = to_xe_device(dev); 1944 + struct xe_file *xef = to_xe_file(file); 1945 + struct drm_xe_vm_destroy *args = data; 1946 + struct xe_vm *vm; 1947 + 1948 + if (XE_IOCTL_ERR(xe, args->pad)) 1949 + return -EINVAL; 1950 + 1951 + vm = xe_vm_lookup(xef, args->vm_id); 1952 + if (XE_IOCTL_ERR(xe, !vm)) 1953 + return -ENOENT; 1954 + xe_vm_put(vm); 1955 + 1956 + /* FIXME: Extend this check to non-compute mode VMs */ 1957 + if (XE_IOCTL_ERR(xe, vm->preempt.num_engines)) 1958 + return -EBUSY; 1959 + 1960 + mutex_lock(&xef->vm.lock); 1961 + xa_erase(&xef->vm.xa, args->vm_id); 1962 + mutex_unlock(&xef->vm.lock); 1963 + 1964 + xe_vm_close_and_put(vm); 1965 + 1966 + return 0; 1967 + } 1968 + 1969 + static const u32 region_to_mem_type[] = { 1970 + XE_PL_TT, 1971 + XE_PL_VRAM0, 1972 + XE_PL_VRAM1, 1973 + }; 1974 + 1975 + static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, 1976 + struct xe_engine *e, u32 region, 1977 + struct xe_sync_entry *syncs, u32 num_syncs, 1978 + struct async_op_fence *afence) 1979 + { 1980 + int err; 1981 + 1982 + XE_BUG_ON(region > ARRAY_SIZE(region_to_mem_type)); 1983 + 1984 + if (!xe_vma_is_userptr(vma)) { 1985 + err = xe_bo_migrate(vma->bo, region_to_mem_type[region]); 1986 + if (err) 1987 + return err; 1988 + } 1989 + 1990 + if (vma->gt_mask != (vma->gt_present & ~vma->usm.gt_invalidated)) { 1991 + return xe_vm_bind(vm, vma, e, vma->bo, syncs, num_syncs, 1992 + afence); 1993 + } else { 1994 + int i; 1995 + 1996 + /* Nothing to do, signal fences now */ 1997 + for (i = 0; i < num_syncs; i++) 1998 + xe_sync_entry_signal(&syncs[i], NULL, 1999 + dma_fence_get_stub()); 2000 + if (afence) 2001 + dma_fence_signal(&afence->fence); 2002 + return 0; 2003 + } 2004 + } 2005 + 2006 + #define VM_BIND_OP(op) (op & 0xffff) 2007 + 2008 + static int __vm_bind_ioctl(struct xe_vm *vm, struct xe_vma *vma, 2009 + struct xe_engine *e, struct xe_bo *bo, u32 op, 2010 + u32 region, struct xe_sync_entry *syncs, 2011 + u32 num_syncs, struct async_op_fence *afence) 2012 + { 2013 + switch (VM_BIND_OP(op)) { 2014 + case XE_VM_BIND_OP_MAP: 2015 + return xe_vm_bind(vm, vma, e, bo, syncs, num_syncs, afence); 2016 + case XE_VM_BIND_OP_UNMAP: 2017 + case XE_VM_BIND_OP_UNMAP_ALL: 2018 + return xe_vm_unbind(vm, vma, e, syncs, num_syncs, afence); 2019 + case XE_VM_BIND_OP_MAP_USERPTR: 2020 + return xe_vm_bind(vm, vma, e, NULL, syncs, num_syncs, afence); 2021 + case XE_VM_BIND_OP_PREFETCH: 2022 + return xe_vm_prefetch(vm, vma, e, region, syncs, num_syncs, 2023 + afence); 2024 + break; 2025 + default: 2026 + XE_BUG_ON("NOT POSSIBLE"); 2027 + return -EINVAL; 2028 + } 2029 + } 2030 + 2031 + struct ttm_buffer_object *xe_vm_ttm_bo(struct xe_vm *vm) 2032 + { 2033 + int idx = vm->flags & XE_VM_FLAG_MIGRATION ? 2034 + XE_VM_FLAG_GT_ID(vm->flags) : 0; 2035 + 2036 + /* Safe to use index 0 as all BO in the VM share a single dma-resv lock */ 2037 + return &vm->pt_root[idx]->bo->ttm; 2038 + } 2039 + 2040 + static void xe_vm_tv_populate(struct xe_vm *vm, struct ttm_validate_buffer *tv) 2041 + { 2042 + tv->num_shared = 1; 2043 + tv->bo = xe_vm_ttm_bo(vm); 2044 + } 2045 + 2046 + static bool is_map_op(u32 op) 2047 + { 2048 + return VM_BIND_OP(op) == XE_VM_BIND_OP_MAP || 2049 + VM_BIND_OP(op) == XE_VM_BIND_OP_MAP_USERPTR; 2050 + } 2051 + 2052 + static bool is_unmap_op(u32 op) 2053 + { 2054 + return VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP || 2055 + VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL; 2056 + } 2057 + 2058 + static int vm_bind_ioctl(struct xe_vm *vm, struct xe_vma *vma, 2059 + struct xe_engine *e, struct xe_bo *bo, 2060 + struct drm_xe_vm_bind_op *bind_op, 2061 + struct xe_sync_entry *syncs, u32 num_syncs, 2062 + struct async_op_fence *afence) 2063 + { 2064 + LIST_HEAD(objs); 2065 + LIST_HEAD(dups); 2066 + struct ttm_validate_buffer tv_bo, tv_vm; 2067 + struct ww_acquire_ctx ww; 2068 + struct xe_bo *vbo; 2069 + int err, i; 2070 + 2071 + lockdep_assert_held(&vm->lock); 2072 + XE_BUG_ON(!list_empty(&vma->unbind_link)); 2073 + 2074 + /* Binds deferred to faults, signal fences now */ 2075 + if (xe_vm_in_fault_mode(vm) && is_map_op(bind_op->op) && 2076 + !(bind_op->op & XE_VM_BIND_FLAG_IMMEDIATE)) { 2077 + for (i = 0; i < num_syncs; i++) 2078 + xe_sync_entry_signal(&syncs[i], NULL, 2079 + dma_fence_get_stub()); 2080 + if (afence) 2081 + dma_fence_signal(&afence->fence); 2082 + return 0; 2083 + } 2084 + 2085 + xe_vm_tv_populate(vm, &tv_vm); 2086 + list_add_tail(&tv_vm.head, &objs); 2087 + vbo = vma->bo; 2088 + if (vbo) { 2089 + /* 2090 + * An unbind can drop the last reference to the BO and 2091 + * the BO is needed for ttm_eu_backoff_reservation so 2092 + * take a reference here. 2093 + */ 2094 + xe_bo_get(vbo); 2095 + 2096 + tv_bo.bo = &vbo->ttm; 2097 + tv_bo.num_shared = 1; 2098 + list_add(&tv_bo.head, &objs); 2099 + } 2100 + 2101 + again: 2102 + err = ttm_eu_reserve_buffers(&ww, &objs, true, &dups); 2103 + if (!err) { 2104 + err = __vm_bind_ioctl(vm, vma, e, bo, 2105 + bind_op->op, bind_op->region, syncs, 2106 + num_syncs, afence); 2107 + ttm_eu_backoff_reservation(&ww, &objs); 2108 + if (err == -EAGAIN && xe_vma_is_userptr(vma)) { 2109 + lockdep_assert_held_write(&vm->lock); 2110 + err = xe_vma_userptr_pin_pages(vma); 2111 + if (!err) 2112 + goto again; 2113 + } 2114 + } 2115 + xe_bo_put(vbo); 2116 + 2117 + return err; 2118 + } 2119 + 2120 + struct async_op { 2121 + struct xe_vma *vma; 2122 + struct xe_engine *engine; 2123 + struct xe_bo *bo; 2124 + struct drm_xe_vm_bind_op bind_op; 2125 + struct xe_sync_entry *syncs; 2126 + u32 num_syncs; 2127 + struct list_head link; 2128 + struct async_op_fence *fence; 2129 + }; 2130 + 2131 + static void async_op_cleanup(struct xe_vm *vm, struct async_op *op) 2132 + { 2133 + while (op->num_syncs--) 2134 + xe_sync_entry_cleanup(&op->syncs[op->num_syncs]); 2135 + kfree(op->syncs); 2136 + xe_bo_put(op->bo); 2137 + if (op->engine) 2138 + xe_engine_put(op->engine); 2139 + xe_vm_put(vm); 2140 + if (op->fence) 2141 + dma_fence_put(&op->fence->fence); 2142 + kfree(op); 2143 + } 2144 + 2145 + static struct async_op *next_async_op(struct xe_vm *vm) 2146 + { 2147 + return list_first_entry_or_null(&vm->async_ops.pending, 2148 + struct async_op, link); 2149 + } 2150 + 2151 + static void vm_set_async_error(struct xe_vm *vm, int err) 2152 + { 2153 + lockdep_assert_held(&vm->lock); 2154 + vm->async_ops.error = err; 2155 + } 2156 + 2157 + static void async_op_work_func(struct work_struct *w) 2158 + { 2159 + struct xe_vm *vm = container_of(w, struct xe_vm, async_ops.work); 2160 + 2161 + for (;;) { 2162 + struct async_op *op; 2163 + int err; 2164 + 2165 + if (vm->async_ops.error && !xe_vm_is_closed(vm)) 2166 + break; 2167 + 2168 + spin_lock_irq(&vm->async_ops.lock); 2169 + op = next_async_op(vm); 2170 + if (op) 2171 + list_del_init(&op->link); 2172 + spin_unlock_irq(&vm->async_ops.lock); 2173 + 2174 + if (!op) 2175 + break; 2176 + 2177 + if (!xe_vm_is_closed(vm)) { 2178 + bool first, last; 2179 + 2180 + down_write(&vm->lock); 2181 + again: 2182 + first = op->vma->first_munmap_rebind; 2183 + last = op->vma->last_munmap_rebind; 2184 + #ifdef TEST_VM_ASYNC_OPS_ERROR 2185 + #define FORCE_ASYNC_OP_ERROR BIT(31) 2186 + if (!(op->bind_op.op & FORCE_ASYNC_OP_ERROR)) { 2187 + err = vm_bind_ioctl(vm, op->vma, op->engine, 2188 + op->bo, &op->bind_op, 2189 + op->syncs, op->num_syncs, 2190 + op->fence); 2191 + } else { 2192 + err = -ENOMEM; 2193 + op->bind_op.op &= ~FORCE_ASYNC_OP_ERROR; 2194 + } 2195 + #else 2196 + err = vm_bind_ioctl(vm, op->vma, op->engine, op->bo, 2197 + &op->bind_op, op->syncs, 2198 + op->num_syncs, op->fence); 2199 + #endif 2200 + /* 2201 + * In order for the fencing to work (stall behind 2202 + * existing jobs / prevent new jobs from running) all 2203 + * the dma-resv slots need to be programmed in a batch 2204 + * relative to execs / the rebind worker. The vm->lock 2205 + * ensure this. 2206 + */ 2207 + if (!err && ((first && VM_BIND_OP(op->bind_op.op) == 2208 + XE_VM_BIND_OP_UNMAP) || 2209 + vm->async_ops.munmap_rebind_inflight)) { 2210 + if (last) { 2211 + op->vma->last_munmap_rebind = false; 2212 + vm->async_ops.munmap_rebind_inflight = 2213 + false; 2214 + } else { 2215 + vm->async_ops.munmap_rebind_inflight = 2216 + true; 2217 + 2218 + async_op_cleanup(vm, op); 2219 + 2220 + spin_lock_irq(&vm->async_ops.lock); 2221 + op = next_async_op(vm); 2222 + XE_BUG_ON(!op); 2223 + list_del_init(&op->link); 2224 + spin_unlock_irq(&vm->async_ops.lock); 2225 + 2226 + goto again; 2227 + } 2228 + } 2229 + if (err) { 2230 + trace_xe_vma_fail(op->vma); 2231 + drm_warn(&vm->xe->drm, "Async VM op(%d) failed with %d", 2232 + VM_BIND_OP(op->bind_op.op), 2233 + err); 2234 + 2235 + spin_lock_irq(&vm->async_ops.lock); 2236 + list_add(&op->link, &vm->async_ops.pending); 2237 + spin_unlock_irq(&vm->async_ops.lock); 2238 + 2239 + vm_set_async_error(vm, err); 2240 + up_write(&vm->lock); 2241 + 2242 + if (vm->async_ops.error_capture.addr) 2243 + vm_error_capture(vm, err, 2244 + op->bind_op.op, 2245 + op->bind_op.addr, 2246 + op->bind_op.range); 2247 + break; 2248 + } 2249 + up_write(&vm->lock); 2250 + } else { 2251 + trace_xe_vma_flush(op->vma); 2252 + 2253 + if (is_unmap_op(op->bind_op.op)) { 2254 + down_write(&vm->lock); 2255 + xe_vma_destroy_unlocked(op->vma); 2256 + up_write(&vm->lock); 2257 + } 2258 + 2259 + if (op->fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, 2260 + &op->fence->fence.flags)) { 2261 + if (!xe_vm_no_dma_fences(vm)) { 2262 + op->fence->started = true; 2263 + smp_wmb(); 2264 + wake_up_all(&op->fence->wq); 2265 + } 2266 + dma_fence_signal(&op->fence->fence); 2267 + } 2268 + } 2269 + 2270 + async_op_cleanup(vm, op); 2271 + } 2272 + } 2273 + 2274 + static int __vm_bind_ioctl_async(struct xe_vm *vm, struct xe_vma *vma, 2275 + struct xe_engine *e, struct xe_bo *bo, 2276 + struct drm_xe_vm_bind_op *bind_op, 2277 + struct xe_sync_entry *syncs, u32 num_syncs) 2278 + { 2279 + struct async_op *op; 2280 + bool installed = false; 2281 + u64 seqno; 2282 + int i; 2283 + 2284 + lockdep_assert_held(&vm->lock); 2285 + 2286 + op = kmalloc(sizeof(*op), GFP_KERNEL); 2287 + if (!op) { 2288 + return -ENOMEM; 2289 + } 2290 + 2291 + if (num_syncs) { 2292 + op->fence = kmalloc(sizeof(*op->fence), GFP_KERNEL); 2293 + if (!op->fence) { 2294 + kfree(op); 2295 + return -ENOMEM; 2296 + } 2297 + 2298 + seqno = e ? ++e->bind.fence_seqno : ++vm->async_ops.fence.seqno; 2299 + dma_fence_init(&op->fence->fence, &async_op_fence_ops, 2300 + &vm->async_ops.lock, e ? e->bind.fence_ctx : 2301 + vm->async_ops.fence.context, seqno); 2302 + 2303 + if (!xe_vm_no_dma_fences(vm)) { 2304 + op->fence->vm = vm; 2305 + op->fence->started = false; 2306 + init_waitqueue_head(&op->fence->wq); 2307 + } 2308 + } else { 2309 + op->fence = NULL; 2310 + } 2311 + op->vma = vma; 2312 + op->engine = e; 2313 + op->bo = bo; 2314 + op->bind_op = *bind_op; 2315 + op->syncs = syncs; 2316 + op->num_syncs = num_syncs; 2317 + INIT_LIST_HEAD(&op->link); 2318 + 2319 + for (i = 0; i < num_syncs; i++) 2320 + installed |= xe_sync_entry_signal(&syncs[i], NULL, 2321 + &op->fence->fence); 2322 + 2323 + if (!installed && op->fence) 2324 + dma_fence_signal(&op->fence->fence); 2325 + 2326 + spin_lock_irq(&vm->async_ops.lock); 2327 + list_add_tail(&op->link, &vm->async_ops.pending); 2328 + spin_unlock_irq(&vm->async_ops.lock); 2329 + 2330 + if (!vm->async_ops.error) 2331 + queue_work(system_unbound_wq, &vm->async_ops.work); 2332 + 2333 + return 0; 2334 + } 2335 + 2336 + static int vm_bind_ioctl_async(struct xe_vm *vm, struct xe_vma *vma, 2337 + struct xe_engine *e, struct xe_bo *bo, 2338 + struct drm_xe_vm_bind_op *bind_op, 2339 + struct xe_sync_entry *syncs, u32 num_syncs) 2340 + { 2341 + struct xe_vma *__vma, *next; 2342 + struct list_head rebind_list; 2343 + struct xe_sync_entry *in_syncs = NULL, *out_syncs = NULL; 2344 + u32 num_in_syncs = 0, num_out_syncs = 0; 2345 + bool first = true, last; 2346 + int err; 2347 + int i; 2348 + 2349 + lockdep_assert_held(&vm->lock); 2350 + 2351 + /* Not a linked list of unbinds + rebinds, easy */ 2352 + if (list_empty(&vma->unbind_link)) 2353 + return __vm_bind_ioctl_async(vm, vma, e, bo, bind_op, 2354 + syncs, num_syncs); 2355 + 2356 + /* 2357 + * Linked list of unbinds + rebinds, decompose syncs into 'in / out' 2358 + * passing the 'in' to the first operation and 'out' to the last. Also 2359 + * the reference counting is a little tricky, increment the VM / bind 2360 + * engine ref count on all but the last operation and increment the BOs 2361 + * ref count on each rebind. 2362 + */ 2363 + 2364 + XE_BUG_ON(VM_BIND_OP(bind_op->op) != XE_VM_BIND_OP_UNMAP && 2365 + VM_BIND_OP(bind_op->op) != XE_VM_BIND_OP_UNMAP_ALL && 2366 + VM_BIND_OP(bind_op->op) != XE_VM_BIND_OP_PREFETCH); 2367 + 2368 + /* Decompose syncs */ 2369 + if (num_syncs) { 2370 + in_syncs = kmalloc(sizeof(*in_syncs) * num_syncs, GFP_KERNEL); 2371 + out_syncs = kmalloc(sizeof(*out_syncs) * num_syncs, GFP_KERNEL); 2372 + if (!in_syncs || !out_syncs) { 2373 + err = -ENOMEM; 2374 + goto out_error; 2375 + } 2376 + 2377 + for (i = 0; i < num_syncs; ++i) { 2378 + bool signal = syncs[i].flags & DRM_XE_SYNC_SIGNAL; 2379 + 2380 + if (signal) 2381 + out_syncs[num_out_syncs++] = syncs[i]; 2382 + else 2383 + in_syncs[num_in_syncs++] = syncs[i]; 2384 + } 2385 + } 2386 + 2387 + /* Do unbinds + move rebinds to new list */ 2388 + INIT_LIST_HEAD(&rebind_list); 2389 + list_for_each_entry_safe(__vma, next, &vma->unbind_link, unbind_link) { 2390 + if (__vma->destroyed || 2391 + VM_BIND_OP(bind_op->op) == XE_VM_BIND_OP_PREFETCH) { 2392 + list_del_init(&__vma->unbind_link); 2393 + xe_bo_get(bo); 2394 + err = __vm_bind_ioctl_async(xe_vm_get(vm), __vma, 2395 + e ? xe_engine_get(e) : NULL, 2396 + bo, bind_op, first ? 2397 + in_syncs : NULL, 2398 + first ? num_in_syncs : 0); 2399 + if (err) { 2400 + xe_bo_put(bo); 2401 + xe_vm_put(vm); 2402 + if (e) 2403 + xe_engine_put(e); 2404 + goto out_error; 2405 + } 2406 + in_syncs = NULL; 2407 + first = false; 2408 + } else { 2409 + list_move_tail(&__vma->unbind_link, &rebind_list); 2410 + } 2411 + } 2412 + last = list_empty(&rebind_list); 2413 + if (!last) { 2414 + xe_vm_get(vm); 2415 + if (e) 2416 + xe_engine_get(e); 2417 + } 2418 + err = __vm_bind_ioctl_async(vm, vma, e, 2419 + bo, bind_op, 2420 + first ? in_syncs : 2421 + last ? out_syncs : NULL, 2422 + first ? num_in_syncs : 2423 + last ? num_out_syncs : 0); 2424 + if (err) { 2425 + if (!last) { 2426 + xe_vm_put(vm); 2427 + if (e) 2428 + xe_engine_put(e); 2429 + } 2430 + goto out_error; 2431 + } 2432 + in_syncs = NULL; 2433 + 2434 + /* Do rebinds */ 2435 + list_for_each_entry_safe(__vma, next, &rebind_list, unbind_link) { 2436 + list_del_init(&__vma->unbind_link); 2437 + last = list_empty(&rebind_list); 2438 + 2439 + if (xe_vma_is_userptr(__vma)) { 2440 + bind_op->op = XE_VM_BIND_FLAG_ASYNC | 2441 + XE_VM_BIND_OP_MAP_USERPTR; 2442 + } else { 2443 + bind_op->op = XE_VM_BIND_FLAG_ASYNC | 2444 + XE_VM_BIND_OP_MAP; 2445 + xe_bo_get(__vma->bo); 2446 + } 2447 + 2448 + if (!last) { 2449 + xe_vm_get(vm); 2450 + if (e) 2451 + xe_engine_get(e); 2452 + } 2453 + 2454 + err = __vm_bind_ioctl_async(vm, __vma, e, 2455 + __vma->bo, bind_op, last ? 2456 + out_syncs : NULL, 2457 + last ? num_out_syncs : 0); 2458 + if (err) { 2459 + if (!last) { 2460 + xe_vm_put(vm); 2461 + if (e) 2462 + xe_engine_put(e); 2463 + } 2464 + goto out_error; 2465 + } 2466 + } 2467 + 2468 + kfree(syncs); 2469 + return 0; 2470 + 2471 + out_error: 2472 + kfree(in_syncs); 2473 + kfree(out_syncs); 2474 + kfree(syncs); 2475 + 2476 + return err; 2477 + } 2478 + 2479 + static int __vm_bind_ioctl_lookup_vma(struct xe_vm *vm, struct xe_bo *bo, 2480 + u64 addr, u64 range, u32 op) 2481 + { 2482 + struct xe_device *xe = vm->xe; 2483 + struct xe_vma *vma, lookup; 2484 + bool async = !!(op & XE_VM_BIND_FLAG_ASYNC); 2485 + 2486 + lockdep_assert_held(&vm->lock); 2487 + 2488 + lookup.start = addr; 2489 + lookup.end = addr + range - 1; 2490 + 2491 + switch (VM_BIND_OP(op)) { 2492 + case XE_VM_BIND_OP_MAP: 2493 + case XE_VM_BIND_OP_MAP_USERPTR: 2494 + vma = xe_vm_find_overlapping_vma(vm, &lookup); 2495 + if (XE_IOCTL_ERR(xe, vma)) 2496 + return -EBUSY; 2497 + break; 2498 + case XE_VM_BIND_OP_UNMAP: 2499 + case XE_VM_BIND_OP_PREFETCH: 2500 + vma = xe_vm_find_overlapping_vma(vm, &lookup); 2501 + if (XE_IOCTL_ERR(xe, !vma) || 2502 + XE_IOCTL_ERR(xe, (vma->start != addr || 2503 + vma->end != addr + range - 1) && !async)) 2504 + return -EINVAL; 2505 + break; 2506 + case XE_VM_BIND_OP_UNMAP_ALL: 2507 + break; 2508 + default: 2509 + XE_BUG_ON("NOT POSSIBLE"); 2510 + return -EINVAL; 2511 + } 2512 + 2513 + return 0; 2514 + } 2515 + 2516 + static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma) 2517 + { 2518 + down_read(&vm->userptr.notifier_lock); 2519 + vma->destroyed = true; 2520 + up_read(&vm->userptr.notifier_lock); 2521 + xe_vm_remove_vma(vm, vma); 2522 + } 2523 + 2524 + static int prep_replacement_vma(struct xe_vm *vm, struct xe_vma *vma) 2525 + { 2526 + int err; 2527 + 2528 + if (vma->bo && !vma->bo->vm) { 2529 + vm_insert_extobj(vm, vma); 2530 + err = add_preempt_fences(vm, vma->bo); 2531 + if (err) 2532 + return err; 2533 + } 2534 + 2535 + return 0; 2536 + } 2537 + 2538 + /* 2539 + * Find all overlapping VMAs in lookup range and add to a list in the returned 2540 + * VMA, all of VMAs found will be unbound. Also possibly add 2 new VMAs that 2541 + * need to be bound if first / last VMAs are not fully unbound. This is akin to 2542 + * how munmap works. 2543 + */ 2544 + static struct xe_vma *vm_unbind_lookup_vmas(struct xe_vm *vm, 2545 + struct xe_vma *lookup) 2546 + { 2547 + struct xe_vma *vma = xe_vm_find_overlapping_vma(vm, lookup); 2548 + struct rb_node *node; 2549 + struct xe_vma *first = vma, *last = vma, *new_first = NULL, 2550 + *new_last = NULL, *__vma, *next; 2551 + int err = 0; 2552 + bool first_munmap_rebind = false; 2553 + 2554 + lockdep_assert_held(&vm->lock); 2555 + XE_BUG_ON(!vma); 2556 + 2557 + node = &vma->vm_node; 2558 + while ((node = rb_next(node))) { 2559 + if (!xe_vma_cmp_vma_cb(lookup, node)) { 2560 + __vma = to_xe_vma(node); 2561 + list_add_tail(&__vma->unbind_link, &vma->unbind_link); 2562 + last = __vma; 2563 + } else { 2564 + break; 2565 + } 2566 + } 2567 + 2568 + node = &vma->vm_node; 2569 + while ((node = rb_prev(node))) { 2570 + if (!xe_vma_cmp_vma_cb(lookup, node)) { 2571 + __vma = to_xe_vma(node); 2572 + list_add(&__vma->unbind_link, &vma->unbind_link); 2573 + first = __vma; 2574 + } else { 2575 + break; 2576 + } 2577 + } 2578 + 2579 + if (first->start != lookup->start) { 2580 + struct ww_acquire_ctx ww; 2581 + 2582 + if (first->bo) 2583 + err = xe_bo_lock(first->bo, &ww, 0, true); 2584 + if (err) 2585 + goto unwind; 2586 + new_first = xe_vma_create(first->vm, first->bo, 2587 + first->bo ? first->bo_offset : 2588 + first->userptr.ptr, 2589 + first->start, 2590 + lookup->start - 1, 2591 + (first->pte_flags & PTE_READ_ONLY), 2592 + first->gt_mask); 2593 + if (first->bo) 2594 + xe_bo_unlock(first->bo, &ww); 2595 + if (!new_first) { 2596 + err = -ENOMEM; 2597 + goto unwind; 2598 + } 2599 + if (!first->bo) { 2600 + err = xe_vma_userptr_pin_pages(new_first); 2601 + if (err) 2602 + goto unwind; 2603 + } 2604 + err = prep_replacement_vma(vm, new_first); 2605 + if (err) 2606 + goto unwind; 2607 + } 2608 + 2609 + if (last->end != lookup->end) { 2610 + struct ww_acquire_ctx ww; 2611 + u64 chunk = lookup->end + 1 - last->start; 2612 + 2613 + if (last->bo) 2614 + err = xe_bo_lock(last->bo, &ww, 0, true); 2615 + if (err) 2616 + goto unwind; 2617 + new_last = xe_vma_create(last->vm, last->bo, 2618 + last->bo ? last->bo_offset + chunk : 2619 + last->userptr.ptr + chunk, 2620 + last->start + chunk, 2621 + last->end, 2622 + (last->pte_flags & PTE_READ_ONLY), 2623 + last->gt_mask); 2624 + if (last->bo) 2625 + xe_bo_unlock(last->bo, &ww); 2626 + if (!new_last) { 2627 + err = -ENOMEM; 2628 + goto unwind; 2629 + } 2630 + if (!last->bo) { 2631 + err = xe_vma_userptr_pin_pages(new_last); 2632 + if (err) 2633 + goto unwind; 2634 + } 2635 + err = prep_replacement_vma(vm, new_last); 2636 + if (err) 2637 + goto unwind; 2638 + } 2639 + 2640 + prep_vma_destroy(vm, vma); 2641 + if (list_empty(&vma->unbind_link) && (new_first || new_last)) 2642 + vma->first_munmap_rebind = true; 2643 + list_for_each_entry(__vma, &vma->unbind_link, unbind_link) { 2644 + if ((new_first || new_last) && !first_munmap_rebind) { 2645 + __vma->first_munmap_rebind = true; 2646 + first_munmap_rebind = true; 2647 + } 2648 + prep_vma_destroy(vm, __vma); 2649 + } 2650 + if (new_first) { 2651 + xe_vm_insert_vma(vm, new_first); 2652 + list_add_tail(&new_first->unbind_link, &vma->unbind_link); 2653 + if (!new_last) 2654 + new_first->last_munmap_rebind = true; 2655 + } 2656 + if (new_last) { 2657 + xe_vm_insert_vma(vm, new_last); 2658 + list_add_tail(&new_last->unbind_link, &vma->unbind_link); 2659 + new_last->last_munmap_rebind = true; 2660 + } 2661 + 2662 + return vma; 2663 + 2664 + unwind: 2665 + list_for_each_entry_safe(__vma, next, &vma->unbind_link, unbind_link) 2666 + list_del_init(&__vma->unbind_link); 2667 + if (new_last) { 2668 + prep_vma_destroy(vm, new_last); 2669 + xe_vma_destroy_unlocked(new_last); 2670 + } 2671 + if (new_first) { 2672 + prep_vma_destroy(vm, new_first); 2673 + xe_vma_destroy_unlocked(new_first); 2674 + } 2675 + 2676 + return ERR_PTR(err); 2677 + } 2678 + 2679 + /* 2680 + * Similar to vm_unbind_lookup_vmas, find all VMAs in lookup range to prefetch 2681 + */ 2682 + static struct xe_vma *vm_prefetch_lookup_vmas(struct xe_vm *vm, 2683 + struct xe_vma *lookup, 2684 + u32 region) 2685 + { 2686 + struct xe_vma *vma = xe_vm_find_overlapping_vma(vm, lookup), *__vma, 2687 + *next; 2688 + struct rb_node *node; 2689 + 2690 + if (!xe_vma_is_userptr(vma)) { 2691 + if (!xe_bo_can_migrate(vma->bo, region_to_mem_type[region])) 2692 + return ERR_PTR(-EINVAL); 2693 + } 2694 + 2695 + node = &vma->vm_node; 2696 + while ((node = rb_next(node))) { 2697 + if (!xe_vma_cmp_vma_cb(lookup, node)) { 2698 + __vma = to_xe_vma(node); 2699 + if (!xe_vma_is_userptr(__vma)) { 2700 + if (!xe_bo_can_migrate(__vma->bo, region_to_mem_type[region])) 2701 + goto flush_list; 2702 + } 2703 + list_add_tail(&__vma->unbind_link, &vma->unbind_link); 2704 + } else { 2705 + break; 2706 + } 2707 + } 2708 + 2709 + node = &vma->vm_node; 2710 + while ((node = rb_prev(node))) { 2711 + if (!xe_vma_cmp_vma_cb(lookup, node)) { 2712 + __vma = to_xe_vma(node); 2713 + if (!xe_vma_is_userptr(__vma)) { 2714 + if (!xe_bo_can_migrate(__vma->bo, region_to_mem_type[region])) 2715 + goto flush_list; 2716 + } 2717 + list_add(&__vma->unbind_link, &vma->unbind_link); 2718 + } else { 2719 + break; 2720 + } 2721 + } 2722 + 2723 + return vma; 2724 + 2725 + flush_list: 2726 + list_for_each_entry_safe(__vma, next, &vma->unbind_link, 2727 + unbind_link) 2728 + list_del_init(&__vma->unbind_link); 2729 + 2730 + return ERR_PTR(-EINVAL); 2731 + } 2732 + 2733 + static struct xe_vma *vm_unbind_all_lookup_vmas(struct xe_vm *vm, 2734 + struct xe_bo *bo) 2735 + { 2736 + struct xe_vma *first = NULL, *vma; 2737 + 2738 + lockdep_assert_held(&vm->lock); 2739 + xe_bo_assert_held(bo); 2740 + 2741 + list_for_each_entry(vma, &bo->vmas, bo_link) { 2742 + if (vma->vm != vm) 2743 + continue; 2744 + 2745 + prep_vma_destroy(vm, vma); 2746 + if (!first) 2747 + first = vma; 2748 + else 2749 + list_add_tail(&vma->unbind_link, &first->unbind_link); 2750 + } 2751 + 2752 + return first; 2753 + } 2754 + 2755 + static struct xe_vma *vm_bind_ioctl_lookup_vma(struct xe_vm *vm, 2756 + struct xe_bo *bo, 2757 + u64 bo_offset_or_userptr, 2758 + u64 addr, u64 range, u32 op, 2759 + u64 gt_mask, u32 region) 2760 + { 2761 + struct ww_acquire_ctx ww; 2762 + struct xe_vma *vma, lookup; 2763 + int err; 2764 + 2765 + lockdep_assert_held(&vm->lock); 2766 + 2767 + lookup.start = addr; 2768 + lookup.end = addr + range - 1; 2769 + 2770 + switch (VM_BIND_OP(op)) { 2771 + case XE_VM_BIND_OP_MAP: 2772 + XE_BUG_ON(!bo); 2773 + 2774 + err = xe_bo_lock(bo, &ww, 0, true); 2775 + if (err) 2776 + return ERR_PTR(err); 2777 + vma = xe_vma_create(vm, bo, bo_offset_or_userptr, addr, 2778 + addr + range - 1, 2779 + op & XE_VM_BIND_FLAG_READONLY, 2780 + gt_mask); 2781 + xe_bo_unlock(bo, &ww); 2782 + if (!vma) 2783 + return ERR_PTR(-ENOMEM); 2784 + 2785 + xe_vm_insert_vma(vm, vma); 2786 + if (!bo->vm) { 2787 + vm_insert_extobj(vm, vma); 2788 + err = add_preempt_fences(vm, bo); 2789 + if (err) { 2790 + prep_vma_destroy(vm, vma); 2791 + xe_vma_destroy_unlocked(vma); 2792 + 2793 + return ERR_PTR(err); 2794 + } 2795 + } 2796 + break; 2797 + case XE_VM_BIND_OP_UNMAP: 2798 + vma = vm_unbind_lookup_vmas(vm, &lookup); 2799 + break; 2800 + case XE_VM_BIND_OP_PREFETCH: 2801 + vma = vm_prefetch_lookup_vmas(vm, &lookup, region); 2802 + break; 2803 + case XE_VM_BIND_OP_UNMAP_ALL: 2804 + XE_BUG_ON(!bo); 2805 + 2806 + err = xe_bo_lock(bo, &ww, 0, true); 2807 + if (err) 2808 + return ERR_PTR(err); 2809 + vma = vm_unbind_all_lookup_vmas(vm, bo); 2810 + if (!vma) 2811 + vma = ERR_PTR(-EINVAL); 2812 + xe_bo_unlock(bo, &ww); 2813 + break; 2814 + case XE_VM_BIND_OP_MAP_USERPTR: 2815 + XE_BUG_ON(bo); 2816 + 2817 + vma = xe_vma_create(vm, NULL, bo_offset_or_userptr, addr, 2818 + addr + range - 1, 2819 + op & XE_VM_BIND_FLAG_READONLY, 2820 + gt_mask); 2821 + if (!vma) 2822 + return ERR_PTR(-ENOMEM); 2823 + 2824 + err = xe_vma_userptr_pin_pages(vma); 2825 + if (err) { 2826 + xe_vma_destroy(vma, NULL); 2827 + 2828 + return ERR_PTR(err); 2829 + } else { 2830 + xe_vm_insert_vma(vm, vma); 2831 + } 2832 + break; 2833 + default: 2834 + XE_BUG_ON("NOT POSSIBLE"); 2835 + vma = ERR_PTR(-EINVAL); 2836 + } 2837 + 2838 + return vma; 2839 + } 2840 + 2841 + #ifdef TEST_VM_ASYNC_OPS_ERROR 2842 + #define SUPPORTED_FLAGS \ 2843 + (FORCE_ASYNC_OP_ERROR | XE_VM_BIND_FLAG_ASYNC | \ 2844 + XE_VM_BIND_FLAG_READONLY | XE_VM_BIND_FLAG_IMMEDIATE | 0xffff) 2845 + #else 2846 + #define SUPPORTED_FLAGS \ 2847 + (XE_VM_BIND_FLAG_ASYNC | XE_VM_BIND_FLAG_READONLY | \ 2848 + XE_VM_BIND_FLAG_IMMEDIATE | 0xffff) 2849 + #endif 2850 + #define XE_64K_PAGE_MASK 0xffffull 2851 + 2852 + #define MAX_BINDS 512 /* FIXME: Picking random upper limit */ 2853 + 2854 + static int vm_bind_ioctl_check_args(struct xe_device *xe, 2855 + struct drm_xe_vm_bind *args, 2856 + struct drm_xe_vm_bind_op **bind_ops, 2857 + bool *async) 2858 + { 2859 + int err; 2860 + int i; 2861 + 2862 + if (XE_IOCTL_ERR(xe, args->extensions) || 2863 + XE_IOCTL_ERR(xe, !args->num_binds) || 2864 + XE_IOCTL_ERR(xe, args->num_binds > MAX_BINDS)) 2865 + return -EINVAL; 2866 + 2867 + if (args->num_binds > 1) { 2868 + u64 __user *bind_user = 2869 + u64_to_user_ptr(args->vector_of_binds); 2870 + 2871 + *bind_ops = kmalloc(sizeof(struct drm_xe_vm_bind_op) * 2872 + args->num_binds, GFP_KERNEL); 2873 + if (!*bind_ops) 2874 + return -ENOMEM; 2875 + 2876 + err = __copy_from_user(*bind_ops, bind_user, 2877 + sizeof(struct drm_xe_vm_bind_op) * 2878 + args->num_binds); 2879 + if (XE_IOCTL_ERR(xe, err)) { 2880 + err = -EFAULT; 2881 + goto free_bind_ops; 2882 + } 2883 + } else { 2884 + *bind_ops = &args->bind; 2885 + } 2886 + 2887 + for (i = 0; i < args->num_binds; ++i) { 2888 + u64 range = (*bind_ops)[i].range; 2889 + u64 addr = (*bind_ops)[i].addr; 2890 + u32 op = (*bind_ops)[i].op; 2891 + u32 obj = (*bind_ops)[i].obj; 2892 + u64 obj_offset = (*bind_ops)[i].obj_offset; 2893 + u32 region = (*bind_ops)[i].region; 2894 + 2895 + if (i == 0) { 2896 + *async = !!(op & XE_VM_BIND_FLAG_ASYNC); 2897 + } else if (XE_IOCTL_ERR(xe, !*async) || 2898 + XE_IOCTL_ERR(xe, !(op & XE_VM_BIND_FLAG_ASYNC)) || 2899 + XE_IOCTL_ERR(xe, VM_BIND_OP(op) == 2900 + XE_VM_BIND_OP_RESTART)) { 2901 + err = -EINVAL; 2902 + goto free_bind_ops; 2903 + } 2904 + 2905 + if (XE_IOCTL_ERR(xe, !*async && 2906 + VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL)) { 2907 + err = -EINVAL; 2908 + goto free_bind_ops; 2909 + } 2910 + 2911 + if (XE_IOCTL_ERR(xe, !*async && 2912 + VM_BIND_OP(op) == XE_VM_BIND_OP_PREFETCH)) { 2913 + err = -EINVAL; 2914 + goto free_bind_ops; 2915 + } 2916 + 2917 + if (XE_IOCTL_ERR(xe, VM_BIND_OP(op) > 2918 + XE_VM_BIND_OP_PREFETCH) || 2919 + XE_IOCTL_ERR(xe, op & ~SUPPORTED_FLAGS) || 2920 + XE_IOCTL_ERR(xe, !obj && 2921 + VM_BIND_OP(op) == XE_VM_BIND_OP_MAP) || 2922 + XE_IOCTL_ERR(xe, !obj && 2923 + VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) || 2924 + XE_IOCTL_ERR(xe, addr && 2925 + VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) || 2926 + XE_IOCTL_ERR(xe, range && 2927 + VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) || 2928 + XE_IOCTL_ERR(xe, obj && 2929 + VM_BIND_OP(op) == XE_VM_BIND_OP_MAP_USERPTR) || 2930 + XE_IOCTL_ERR(xe, obj && 2931 + VM_BIND_OP(op) == XE_VM_BIND_OP_PREFETCH) || 2932 + XE_IOCTL_ERR(xe, region && 2933 + VM_BIND_OP(op) != XE_VM_BIND_OP_PREFETCH) || 2934 + XE_IOCTL_ERR(xe, !(BIT(region) & 2935 + xe->info.mem_region_mask)) || 2936 + XE_IOCTL_ERR(xe, obj && 2937 + VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP)) { 2938 + err = -EINVAL; 2939 + goto free_bind_ops; 2940 + } 2941 + 2942 + if (XE_IOCTL_ERR(xe, obj_offset & ~PAGE_MASK) || 2943 + XE_IOCTL_ERR(xe, addr & ~PAGE_MASK) || 2944 + XE_IOCTL_ERR(xe, range & ~PAGE_MASK) || 2945 + XE_IOCTL_ERR(xe, !range && VM_BIND_OP(op) != 2946 + XE_VM_BIND_OP_RESTART && 2947 + VM_BIND_OP(op) != XE_VM_BIND_OP_UNMAP_ALL)) { 2948 + err = -EINVAL; 2949 + goto free_bind_ops; 2950 + } 2951 + } 2952 + 2953 + return 0; 2954 + 2955 + free_bind_ops: 2956 + if (args->num_binds > 1) 2957 + kfree(*bind_ops); 2958 + return err; 2959 + } 2960 + 2961 + int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2962 + { 2963 + struct xe_device *xe = to_xe_device(dev); 2964 + struct xe_file *xef = to_xe_file(file); 2965 + struct drm_xe_vm_bind *args = data; 2966 + struct drm_xe_sync __user *syncs_user; 2967 + struct xe_bo **bos = NULL; 2968 + struct xe_vma **vmas = NULL; 2969 + struct xe_vm *vm; 2970 + struct xe_engine *e = NULL; 2971 + u32 num_syncs; 2972 + struct xe_sync_entry *syncs = NULL; 2973 + struct drm_xe_vm_bind_op *bind_ops; 2974 + bool async; 2975 + int err; 2976 + int i, j = 0; 2977 + 2978 + err = vm_bind_ioctl_check_args(xe, args, &bind_ops, &async); 2979 + if (err) 2980 + return err; 2981 + 2982 + vm = xe_vm_lookup(xef, args->vm_id); 2983 + if (XE_IOCTL_ERR(xe, !vm)) { 2984 + err = -EINVAL; 2985 + goto free_objs; 2986 + } 2987 + 2988 + if (XE_IOCTL_ERR(xe, xe_vm_is_closed(vm))) { 2989 + DRM_ERROR("VM closed while we began looking up?\n"); 2990 + err = -ENOENT; 2991 + goto put_vm; 2992 + } 2993 + 2994 + if (args->engine_id) { 2995 + e = xe_engine_lookup(xef, args->engine_id); 2996 + if (XE_IOCTL_ERR(xe, !e)) { 2997 + err = -ENOENT; 2998 + goto put_vm; 2999 + } 3000 + if (XE_IOCTL_ERR(xe, !(e->flags & ENGINE_FLAG_VM))) { 3001 + err = -EINVAL; 3002 + goto put_engine; 3003 + } 3004 + } 3005 + 3006 + if (VM_BIND_OP(bind_ops[0].op) == XE_VM_BIND_OP_RESTART) { 3007 + if (XE_IOCTL_ERR(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) 3008 + err = -ENOTSUPP; 3009 + if (XE_IOCTL_ERR(xe, !err && args->num_syncs)) 3010 + err = EINVAL; 3011 + if (XE_IOCTL_ERR(xe, !err && !vm->async_ops.error)) 3012 + err = -EPROTO; 3013 + 3014 + if (!err) { 3015 + down_write(&vm->lock); 3016 + trace_xe_vm_restart(vm); 3017 + vm_set_async_error(vm, 0); 3018 + up_write(&vm->lock); 3019 + 3020 + queue_work(system_unbound_wq, &vm->async_ops.work); 3021 + 3022 + /* Rebinds may have been blocked, give worker a kick */ 3023 + if (xe_vm_in_compute_mode(vm)) 3024 + queue_work(vm->xe->ordered_wq, 3025 + &vm->preempt.rebind_work); 3026 + } 3027 + 3028 + goto put_engine; 3029 + } 3030 + 3031 + if (XE_IOCTL_ERR(xe, !vm->async_ops.error && 3032 + async != !!(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) { 3033 + err = -ENOTSUPP; 3034 + goto put_engine; 3035 + } 3036 + 3037 + for (i = 0; i < args->num_binds; ++i) { 3038 + u64 range = bind_ops[i].range; 3039 + u64 addr = bind_ops[i].addr; 3040 + 3041 + if (XE_IOCTL_ERR(xe, range > vm->size) || 3042 + XE_IOCTL_ERR(xe, addr > vm->size - range)) { 3043 + err = -EINVAL; 3044 + goto put_engine; 3045 + } 3046 + 3047 + if (bind_ops[i].gt_mask) { 3048 + u64 valid_gts = BIT(xe->info.tile_count) - 1; 3049 + 3050 + if (XE_IOCTL_ERR(xe, bind_ops[i].gt_mask & 3051 + ~valid_gts)) { 3052 + err = -EINVAL; 3053 + goto put_engine; 3054 + } 3055 + } 3056 + } 3057 + 3058 + bos = kzalloc(sizeof(*bos) * args->num_binds, GFP_KERNEL); 3059 + if (!bos) { 3060 + err = -ENOMEM; 3061 + goto put_engine; 3062 + } 3063 + 3064 + vmas = kzalloc(sizeof(*vmas) * args->num_binds, GFP_KERNEL); 3065 + if (!vmas) { 3066 + err = -ENOMEM; 3067 + goto put_engine; 3068 + } 3069 + 3070 + for (i = 0; i < args->num_binds; ++i) { 3071 + struct drm_gem_object *gem_obj; 3072 + u64 range = bind_ops[i].range; 3073 + u64 addr = bind_ops[i].addr; 3074 + u32 obj = bind_ops[i].obj; 3075 + u64 obj_offset = bind_ops[i].obj_offset; 3076 + 3077 + if (!obj) 3078 + continue; 3079 + 3080 + gem_obj = drm_gem_object_lookup(file, obj); 3081 + if (XE_IOCTL_ERR(xe, !gem_obj)) { 3082 + err = -ENOENT; 3083 + goto put_obj; 3084 + } 3085 + bos[i] = gem_to_xe_bo(gem_obj); 3086 + 3087 + if (XE_IOCTL_ERR(xe, range > bos[i]->size) || 3088 + XE_IOCTL_ERR(xe, obj_offset > 3089 + bos[i]->size - range)) { 3090 + err = -EINVAL; 3091 + goto put_obj; 3092 + } 3093 + 3094 + if (bos[i]->flags & XE_BO_INTERNAL_64K) { 3095 + if (XE_IOCTL_ERR(xe, obj_offset & 3096 + XE_64K_PAGE_MASK) || 3097 + XE_IOCTL_ERR(xe, addr & XE_64K_PAGE_MASK) || 3098 + XE_IOCTL_ERR(xe, range & XE_64K_PAGE_MASK)) { 3099 + err = -EINVAL; 3100 + goto put_obj; 3101 + } 3102 + } 3103 + } 3104 + 3105 + if (args->num_syncs) { 3106 + syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL); 3107 + if (!syncs) { 3108 + err = -ENOMEM; 3109 + goto put_obj; 3110 + } 3111 + } 3112 + 3113 + syncs_user = u64_to_user_ptr(args->syncs); 3114 + for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { 3115 + err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], 3116 + &syncs_user[num_syncs], false, 3117 + xe_vm_no_dma_fences(vm)); 3118 + if (err) 3119 + goto free_syncs; 3120 + } 3121 + 3122 + err = down_write_killable(&vm->lock); 3123 + if (err) 3124 + goto free_syncs; 3125 + 3126 + /* Do some error checking first to make the unwind easier */ 3127 + for (i = 0; i < args->num_binds; ++i) { 3128 + u64 range = bind_ops[i].range; 3129 + u64 addr = bind_ops[i].addr; 3130 + u32 op = bind_ops[i].op; 3131 + 3132 + err = __vm_bind_ioctl_lookup_vma(vm, bos[i], addr, range, op); 3133 + if (err) 3134 + goto release_vm_lock; 3135 + } 3136 + 3137 + for (i = 0; i < args->num_binds; ++i) { 3138 + u64 range = bind_ops[i].range; 3139 + u64 addr = bind_ops[i].addr; 3140 + u32 op = bind_ops[i].op; 3141 + u64 obj_offset = bind_ops[i].obj_offset; 3142 + u64 gt_mask = bind_ops[i].gt_mask; 3143 + u32 region = bind_ops[i].region; 3144 + 3145 + vmas[i] = vm_bind_ioctl_lookup_vma(vm, bos[i], obj_offset, 3146 + addr, range, op, gt_mask, 3147 + region); 3148 + if (IS_ERR(vmas[i])) { 3149 + err = PTR_ERR(vmas[i]); 3150 + vmas[i] = NULL; 3151 + goto destroy_vmas; 3152 + } 3153 + } 3154 + 3155 + for (j = 0; j < args->num_binds; ++j) { 3156 + struct xe_sync_entry *__syncs; 3157 + u32 __num_syncs = 0; 3158 + bool first_or_last = j == 0 || j == args->num_binds - 1; 3159 + 3160 + if (args->num_binds == 1) { 3161 + __num_syncs = num_syncs; 3162 + __syncs = syncs; 3163 + } else if (first_or_last && num_syncs) { 3164 + bool first = j == 0; 3165 + 3166 + __syncs = kmalloc(sizeof(*__syncs) * num_syncs, 3167 + GFP_KERNEL); 3168 + if (!__syncs) { 3169 + err = ENOMEM; 3170 + break; 3171 + } 3172 + 3173 + /* in-syncs on first bind, out-syncs on last bind */ 3174 + for (i = 0; i < num_syncs; ++i) { 3175 + bool signal = syncs[i].flags & 3176 + DRM_XE_SYNC_SIGNAL; 3177 + 3178 + if ((first && !signal) || (!first && signal)) 3179 + __syncs[__num_syncs++] = syncs[i]; 3180 + } 3181 + } else { 3182 + __num_syncs = 0; 3183 + __syncs = NULL; 3184 + } 3185 + 3186 + if (async) { 3187 + bool last = j == args->num_binds - 1; 3188 + 3189 + /* 3190 + * Each pass of async worker drops the ref, take a ref 3191 + * here, 1 set of refs taken above 3192 + */ 3193 + if (!last) { 3194 + if (e) 3195 + xe_engine_get(e); 3196 + xe_vm_get(vm); 3197 + } 3198 + 3199 + err = vm_bind_ioctl_async(vm, vmas[j], e, bos[j], 3200 + bind_ops + j, __syncs, 3201 + __num_syncs); 3202 + if (err && !last) { 3203 + if (e) 3204 + xe_engine_put(e); 3205 + xe_vm_put(vm); 3206 + } 3207 + if (err) 3208 + break; 3209 + } else { 3210 + XE_BUG_ON(j != 0); /* Not supported */ 3211 + err = vm_bind_ioctl(vm, vmas[j], e, bos[j], 3212 + bind_ops + j, __syncs, 3213 + __num_syncs, NULL); 3214 + break; /* Needed so cleanup loops work */ 3215 + } 3216 + } 3217 + 3218 + /* Most of cleanup owned by the async bind worker */ 3219 + if (async && !err) { 3220 + up_write(&vm->lock); 3221 + if (args->num_binds > 1) 3222 + kfree(syncs); 3223 + goto free_objs; 3224 + } 3225 + 3226 + destroy_vmas: 3227 + for (i = j; err && i < args->num_binds; ++i) { 3228 + u32 op = bind_ops[i].op; 3229 + struct xe_vma *vma, *next; 3230 + 3231 + if (!vmas[i]) 3232 + break; 3233 + 3234 + list_for_each_entry_safe(vma, next, &vma->unbind_link, 3235 + unbind_link) { 3236 + list_del_init(&vma->unbind_link); 3237 + if (!vma->destroyed) { 3238 + prep_vma_destroy(vm, vma); 3239 + xe_vma_destroy_unlocked(vma); 3240 + } 3241 + } 3242 + 3243 + switch (VM_BIND_OP(op)) { 3244 + case XE_VM_BIND_OP_MAP: 3245 + prep_vma_destroy(vm, vmas[i]); 3246 + xe_vma_destroy_unlocked(vmas[i]); 3247 + break; 3248 + case XE_VM_BIND_OP_MAP_USERPTR: 3249 + prep_vma_destroy(vm, vmas[i]); 3250 + xe_vma_destroy_unlocked(vmas[i]); 3251 + break; 3252 + } 3253 + } 3254 + release_vm_lock: 3255 + up_write(&vm->lock); 3256 + free_syncs: 3257 + while (num_syncs--) { 3258 + if (async && j && 3259 + !(syncs[num_syncs].flags & DRM_XE_SYNC_SIGNAL)) 3260 + continue; /* Still in async worker */ 3261 + xe_sync_entry_cleanup(&syncs[num_syncs]); 3262 + } 3263 + 3264 + kfree(syncs); 3265 + put_obj: 3266 + for (i = j; i < args->num_binds; ++i) 3267 + xe_bo_put(bos[i]); 3268 + put_engine: 3269 + if (e) 3270 + xe_engine_put(e); 3271 + put_vm: 3272 + xe_vm_put(vm); 3273 + free_objs: 3274 + kfree(bos); 3275 + kfree(vmas); 3276 + if (args->num_binds > 1) 3277 + kfree(bind_ops); 3278 + return err; 3279 + } 3280 + 3281 + /* 3282 + * XXX: Using the TTM wrappers for now, likely can call into dma-resv code 3283 + * directly to optimize. Also this likely should be an inline function. 3284 + */ 3285 + int xe_vm_lock(struct xe_vm *vm, struct ww_acquire_ctx *ww, 3286 + int num_resv, bool intr) 3287 + { 3288 + struct ttm_validate_buffer tv_vm; 3289 + LIST_HEAD(objs); 3290 + LIST_HEAD(dups); 3291 + 3292 + XE_BUG_ON(!ww); 3293 + 3294 + tv_vm.num_shared = num_resv; 3295 + tv_vm.bo = xe_vm_ttm_bo(vm);; 3296 + list_add_tail(&tv_vm.head, &objs); 3297 + 3298 + return ttm_eu_reserve_buffers(ww, &objs, intr, &dups); 3299 + } 3300 + 3301 + void xe_vm_unlock(struct xe_vm *vm, struct ww_acquire_ctx *ww) 3302 + { 3303 + dma_resv_unlock(&vm->resv); 3304 + ww_acquire_fini(ww); 3305 + } 3306 + 3307 + /** 3308 + * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock 3309 + * @vma: VMA to invalidate 3310 + * 3311 + * Walks a list of page tables leaves which it memset the entries owned by this 3312 + * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is 3313 + * complete. 3314 + * 3315 + * Returns 0 for success, negative error code otherwise. 3316 + */ 3317 + int xe_vm_invalidate_vma(struct xe_vma *vma) 3318 + { 3319 + struct xe_device *xe = vma->vm->xe; 3320 + struct xe_gt *gt; 3321 + u32 gt_needs_invalidate = 0; 3322 + int seqno[XE_MAX_GT]; 3323 + u8 id; 3324 + int ret; 3325 + 3326 + XE_BUG_ON(!xe_vm_in_fault_mode(vma->vm)); 3327 + trace_xe_vma_usm_invalidate(vma); 3328 + 3329 + /* Check that we don't race with page-table updates */ 3330 + if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { 3331 + if (xe_vma_is_userptr(vma)) { 3332 + WARN_ON_ONCE(!mmu_interval_check_retry 3333 + (&vma->userptr.notifier, 3334 + vma->userptr.notifier_seq)); 3335 + WARN_ON_ONCE(!dma_resv_test_signaled(&vma->vm->resv, 3336 + DMA_RESV_USAGE_BOOKKEEP)); 3337 + 3338 + } else { 3339 + xe_bo_assert_held(vma->bo); 3340 + } 3341 + } 3342 + 3343 + for_each_gt(gt, xe, id) { 3344 + if (xe_pt_zap_ptes(gt, vma)) { 3345 + gt_needs_invalidate |= BIT(id); 3346 + xe_device_wmb(xe); 3347 + seqno[id] = xe_gt_tlb_invalidation(gt); 3348 + if (seqno[id] < 0) 3349 + return seqno[id]; 3350 + } 3351 + } 3352 + 3353 + for_each_gt(gt, xe, id) { 3354 + if (gt_needs_invalidate & BIT(id)) { 3355 + ret = xe_gt_tlb_invalidation_wait(gt, seqno[id]); 3356 + if (ret < 0) 3357 + return ret; 3358 + } 3359 + } 3360 + 3361 + vma->usm.gt_invalidated = vma->gt_mask; 3362 + 3363 + return 0; 3364 + } 3365 + 3366 + #if IS_ENABLED(CONFIG_DRM_XE_SIMPLE_ERROR_CAPTURE) 3367 + int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id) 3368 + { 3369 + struct rb_node *node; 3370 + bool is_lmem; 3371 + uint64_t addr; 3372 + 3373 + if (!down_read_trylock(&vm->lock)) { 3374 + drm_printf(p, " Failed to acquire VM lock to dump capture"); 3375 + return 0; 3376 + } 3377 + if (vm->pt_root[gt_id]) { 3378 + addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, GEN8_PAGE_SIZE, &is_lmem); 3379 + drm_printf(p, " VM root: A:0x%llx %s\n", addr, is_lmem ? "LMEM" : "SYS"); 3380 + } 3381 + 3382 + for (node = rb_first(&vm->vmas); node; node = rb_next(node)) { 3383 + struct xe_vma *vma = to_xe_vma(node); 3384 + bool is_userptr = xe_vma_is_userptr(vma); 3385 + 3386 + if (is_userptr) { 3387 + struct xe_res_cursor cur; 3388 + 3389 + xe_res_first_sg(vma->userptr.sg, 0, GEN8_PAGE_SIZE, &cur); 3390 + addr = xe_res_dma(&cur); 3391 + } else { 3392 + addr = xe_bo_addr(vma->bo, 0, GEN8_PAGE_SIZE, &is_lmem); 3393 + } 3394 + drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n", 3395 + vma->start, vma->end, vma->end - vma->start + 1ull, 3396 + addr, is_userptr ? "USR" : is_lmem ? "VRAM" : "SYS"); 3397 + } 3398 + up_read(&vm->lock); 3399 + 3400 + return 0; 3401 + } 3402 + #else 3403 + int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id) 3404 + { 3405 + return 0; 3406 + } 3407 + #endif

+141

drivers/gpu/drm/xe/xe_vm.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2021 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_VM_H_ 7 + #define _XE_VM_H_ 8 + 9 + #include "xe_macros.h" 10 + #include "xe_map.h" 11 + #include "xe_vm_types.h" 12 + 13 + struct drm_device; 14 + struct drm_printer; 15 + struct drm_file; 16 + 17 + struct ttm_buffer_object; 18 + struct ttm_validate_buffer; 19 + 20 + struct xe_engine; 21 + struct xe_file; 22 + struct xe_sync_entry; 23 + 24 + struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags); 25 + void xe_vm_free(struct kref *ref); 26 + 27 + struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id); 28 + int xe_vma_cmp_vma_cb(const void *key, const struct rb_node *node); 29 + 30 + static inline struct xe_vm *xe_vm_get(struct xe_vm *vm) 31 + { 32 + kref_get(&vm->refcount); 33 + return vm; 34 + } 35 + 36 + static inline void xe_vm_put(struct xe_vm *vm) 37 + { 38 + kref_put(&vm->refcount, xe_vm_free); 39 + } 40 + 41 + int xe_vm_lock(struct xe_vm *vm, struct ww_acquire_ctx *ww, 42 + int num_resv, bool intr); 43 + 44 + void xe_vm_unlock(struct xe_vm *vm, struct ww_acquire_ctx *ww); 45 + 46 + static inline bool xe_vm_is_closed(struct xe_vm *vm) 47 + { 48 + /* Only guaranteed not to change when vm->resv is held */ 49 + return !vm->size; 50 + } 51 + 52 + struct xe_vma * 53 + xe_vm_find_overlapping_vma(struct xe_vm *vm, const struct xe_vma *vma); 54 + 55 + #define xe_vm_assert_held(vm) dma_resv_assert_held(&(vm)->resv) 56 + 57 + u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_gt *full_gt); 58 + 59 + int xe_vm_create_ioctl(struct drm_device *dev, void *data, 60 + struct drm_file *file); 61 + int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, 62 + struct drm_file *file); 63 + int xe_vm_bind_ioctl(struct drm_device *dev, void *data, 64 + struct drm_file *file); 65 + 66 + void xe_vm_close_and_put(struct xe_vm *vm); 67 + 68 + static inline bool xe_vm_in_compute_mode(struct xe_vm *vm) 69 + { 70 + return vm->flags & XE_VM_FLAG_COMPUTE_MODE; 71 + } 72 + 73 + static inline bool xe_vm_in_fault_mode(struct xe_vm *vm) 74 + { 75 + return vm->flags & XE_VM_FLAG_FAULT_MODE; 76 + } 77 + 78 + static inline bool xe_vm_no_dma_fences(struct xe_vm *vm) 79 + { 80 + return xe_vm_in_compute_mode(vm) || xe_vm_in_fault_mode(vm); 81 + } 82 + 83 + int xe_vm_add_compute_engine(struct xe_vm *vm, struct xe_engine *e); 84 + 85 + int xe_vm_userptr_pin(struct xe_vm *vm); 86 + 87 + int __xe_vm_userptr_needs_repin(struct xe_vm *vm); 88 + 89 + int xe_vm_userptr_check_repin(struct xe_vm *vm); 90 + 91 + struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker); 92 + 93 + int xe_vm_invalidate_vma(struct xe_vma *vma); 94 + 95 + int xe_vm_async_fence_wait_start(struct dma_fence *fence); 96 + 97 + extern struct ttm_device_funcs xe_ttm_funcs; 98 + 99 + struct ttm_buffer_object *xe_vm_ttm_bo(struct xe_vm *vm); 100 + 101 + static inline bool xe_vma_is_userptr(struct xe_vma *vma) 102 + { 103 + return !vma->bo; 104 + } 105 + 106 + int xe_vma_userptr_pin_pages(struct xe_vma *vma); 107 + 108 + int xe_vma_userptr_check_repin(struct xe_vma *vma); 109 + 110 + /* 111 + * XE_ONSTACK_TV is used to size the tv_onstack array that is input 112 + * to xe_vm_lock_dma_resv() and xe_vm_unlock_dma_resv(). 113 + */ 114 + #define XE_ONSTACK_TV 20 115 + int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww, 116 + struct ttm_validate_buffer *tv_onstack, 117 + struct ttm_validate_buffer **tv, 118 + struct list_head *objs, 119 + bool intr, 120 + unsigned int num_shared); 121 + 122 + void xe_vm_unlock_dma_resv(struct xe_vm *vm, 123 + struct ttm_validate_buffer *tv_onstack, 124 + struct ttm_validate_buffer *tv, 125 + struct ww_acquire_ctx *ww, 126 + struct list_head *objs); 127 + 128 + void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence, 129 + enum dma_resv_usage usage); 130 + 131 + int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id); 132 + 133 + #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) 134 + #define vm_dbg drm_dbg 135 + #else 136 + __printf(2, 3) 137 + static inline void vm_dbg(const struct drm_device *dev, 138 + const char *format, ...) 139 + { /* noop */ } 140 + #endif 141 + #endif

+555

drivers/gpu/drm/xe/xe_vm_doc.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_VM_DOC_H_ 7 + #define _XE_VM_DOC_H_ 8 + 9 + /** 10 + * DOC: XE VM (user address space) 11 + * 12 + * VM creation 13 + * =========== 14 + * 15 + * Allocate a physical page for root of the page table structure, create default 16 + * bind engine, and return a handle to the user. 17 + * 18 + * Scratch page 19 + * ------------ 20 + * 21 + * If the VM is created with the flag, DRM_XE_VM_CREATE_SCRATCH_PAGE, set the 22 + * entire page table structure defaults pointing to blank page allocated by the 23 + * VM. Invalid memory access rather than fault just read / write to this page. 24 + * 25 + * VM bind (create GPU mapping for a BO or userptr) 26 + * ================================================ 27 + * 28 + * Creates GPU mapings for a BO or userptr within a VM. VM binds uses the same 29 + * in / out fence interface (struct drm_xe_sync) as execs which allows users to 30 + * think of binds and execs as more or less the same operation. 31 + * 32 + * Operations 33 + * ---------- 34 + * 35 + * XE_VM_BIND_OP_MAP - Create mapping for a BO 36 + * XE_VM_BIND_OP_UNMAP - Destroy mapping for a BO / userptr 37 + * XE_VM_BIND_OP_MAP_USERPTR - Create mapping for userptr 38 + * 39 + * Implementation details 40 + * ~~~~~~~~~~~~~~~~~~~~~~ 41 + * 42 + * All bind operations are implemented via a hybrid approach of using the CPU 43 + * and GPU to modify page tables. If a new physical page is allocated in the 44 + * page table structure we populate that page via the CPU and insert that new 45 + * page into the existing page table structure via a GPU job. Also any existing 46 + * pages in the page table structure that need to be modified also are updated 47 + * via the GPU job. As the root physical page is prealloced on VM creation our 48 + * GPU job will always have at least 1 update. The in / out fences are passed to 49 + * this job so again this is conceptually the same as an exec. 50 + * 51 + * Very simple example of few binds on an empty VM with 48 bits of address space 52 + * and the resulting operations: 53 + * 54 + * .. code-block:: 55 + * 56 + * bind BO0 0x0-0x1000 57 + * alloc page level 3a, program PTE[0] to BO0 phys address (CPU) 58 + * alloc page level 2, program PDE[0] page level 3a phys address (CPU) 59 + * alloc page level 1, program PDE[0] page level 2 phys address (CPU) 60 + * update root PDE[0] to page level 1 phys address (GPU) 61 + * 62 + * bind BO1 0x201000-0x202000 63 + * alloc page level 3b, program PTE[1] to BO1 phys address (CPU) 64 + * update page level 2 PDE[1] to page level 3b phys address (GPU) 65 + * 66 + * bind BO2 0x1ff000-0x201000 67 + * update page level 3a PTE[511] to BO2 phys addres (GPU) 68 + * update page level 3b PTE[0] to BO2 phys addres + 0x1000 (GPU) 69 + * 70 + * GPU bypass 71 + * ~~~~~~~~~~ 72 + * 73 + * In the above example the steps using the GPU can be converted to CPU if the 74 + * bind can be done immediately (all in-fences satisfied, VM dma-resv kernel 75 + * slot is idle). 76 + * 77 + * Address space 78 + * ------------- 79 + * 80 + * Depending on platform either 48 or 57 bits of address space is supported. 81 + * 82 + * Page sizes 83 + * ---------- 84 + * 85 + * The minimum page size is either 4k or 64k depending on platform and memory 86 + * placement (sysmem vs. VRAM). We enforce that binds must be aligned to the 87 + * minimum page size. 88 + * 89 + * Larger pages (2M or 1GB) can be used for BOs in VRAM, the BO physical address 90 + * is aligned to the larger pages size, and VA is aligned to the larger page 91 + * size. Larger pages for userptrs / BOs in sysmem should be possible but is not 92 + * yet implemented. 93 + * 94 + * Sync error handling mode 95 + * ------------------------ 96 + * 97 + * In both modes during the bind IOCTL the user input is validated. In sync 98 + * error handling mode the newly bound BO is validated (potentially moved back 99 + * to a region of memory where is can be used), page tables are updated by the 100 + * CPU and the job to do the GPU binds is created in the IOCTL itself. This step 101 + * can fail due to memory pressure. The user can recover by freeing memory and 102 + * trying this operation again. 103 + * 104 + * Async error handling mode 105 + * ------------------------- 106 + * 107 + * In async error handling the step of validating the BO, updating page tables, 108 + * and generating a job are deferred to an async worker. As this step can now 109 + * fail after the IOCTL has reported success we need an error handling flow for 110 + * which the user can recover from. 111 + * 112 + * The solution is for a user to register a user address with the VM which the 113 + * VM uses to report errors to. The ufence wait interface can be used to wait on 114 + * a VM going into an error state. Once an error is reported the VM's async 115 + * worker is paused. While the VM's async worker is paused sync, 116 + * XE_VM_BIND_OP_UNMAP operations are allowed (this can free memory). Once the 117 + * uses believe the error state is fixed, the async worker can be resumed via 118 + * XE_VM_BIND_OP_RESTART operation. When VM async bind work is restarted, the 119 + * first operation processed is the operation that caused the original error. 120 + * 121 + * Bind queues / engines 122 + * --------------------- 123 + * 124 + * Think of the case where we have two bind operations A + B and are submitted 125 + * in that order. A has in fences while B has none. If using a single bind 126 + * queue, B is now blocked on A's in fences even though it is ready to run. This 127 + * example is a real use case for VK sparse binding. We work around this 128 + * limitation by implementing bind engines. 129 + * 130 + * In the bind IOCTL the user can optionally pass in an engine ID which must map 131 + * to an engine which is of the special class DRM_XE_ENGINE_CLASS_VM_BIND. 132 + * Underneath this is a really virtual engine that can run on any of the copy 133 + * hardware engines. The job(s) created each IOCTL are inserted into this 134 + * engine's ring. In the example above if A and B have different bind engines B 135 + * is free to pass A. If the engine ID field is omitted, the default bind queue 136 + * for the VM is used. 137 + * 138 + * TODO: Explain race in issue 41 and how we solve it 139 + * 140 + * Array of bind operations 141 + * ------------------------ 142 + * 143 + * The uAPI allows multiple binds operations to be passed in via a user array, 144 + * of struct drm_xe_vm_bind_op, in a single VM bind IOCTL. This interface 145 + * matches the VK sparse binding API. The implementation is rather simple, parse 146 + * the array into a list of operations, pass the in fences to the first operation, 147 + * and pass the out fences to the last operation. The ordered nature of a bind 148 + * engine makes this possible. 149 + * 150 + * Munmap semantics for unbinds 151 + * ---------------------------- 152 + * 153 + * Munmap allows things like: 154 + * 155 + * .. code-block:: 156 + * 157 + * 0x0000-0x2000 and 0x3000-0x5000 have mappings 158 + * Munmap 0x1000-0x4000, results in mappings 0x0000-0x1000 and 0x4000-0x5000 159 + * 160 + * To support this semantic in the above example we decompose the above example 161 + * into 4 operations: 162 + * 163 + * .. code-block:: 164 + * 165 + * unbind 0x0000-0x2000 166 + * unbind 0x3000-0x5000 167 + * rebind 0x0000-0x1000 168 + * rebind 0x4000-0x5000 169 + * 170 + * Why not just do a partial unbind of 0x1000-0x2000 and 0x3000-0x4000? This 171 + * falls apart when using large pages at the edges and the unbind forces us to 172 + * use a smaller page size. For simplity we always issue a set of unbinds 173 + * unmapping anything in the range and at most 2 rebinds on the edges. 174 + * 175 + * Similar to an array of binds, in fences are passed to the first operation and 176 + * out fences are signaled on the last operation. 177 + * 178 + * In this example there is a window of time where 0x0000-0x1000 and 179 + * 0x4000-0x5000 are invalid but the user didn't ask for these addresses to be 180 + * removed from the mapping. To work around this we treat any munmap style 181 + * unbinds which require a rebind as a kernel operations (BO eviction or userptr 182 + * invalidation). The first operation waits on the VM's 183 + * DMA_RESV_USAGE_PREEMPT_FENCE slots (waits for all pending jobs on VM to 184 + * complete / triggers preempt fences) and the last operation is installed in 185 + * the VM's DMA_RESV_USAGE_KERNEL slot (blocks future jobs / resume compute mode 186 + * VM). The caveat is all dma-resv slots must be updated atomically with respect 187 + * to execs and compute mode rebind worker. To accomplish this, hold the 188 + * vm->lock in write mode from the first operation until the last. 189 + * 190 + * Deferred binds in fault mode 191 + * ---------------------------- 192 + * 193 + * In a VM is in fault mode (TODO: link to fault mode), new bind operations that 194 + * create mappings are by default are deferred to the page fault handler (first 195 + * use). This behavior can be overriden by setting the flag 196 + * XE_VM_BIND_FLAG_IMMEDIATE which indicates to creating the mapping 197 + * immediately. 198 + * 199 + * User pointer 200 + * ============ 201 + * 202 + * User pointers are user allocated memory (malloc'd, mmap'd, etc..) for which the 203 + * user wants to create a GPU mapping. Typically in other DRM drivers a dummy BO 204 + * was created and then a binding was created. We bypass creating a dummy BO in 205 + * XE and simply create a binding directly from the userptr. 206 + * 207 + * Invalidation 208 + * ------------ 209 + * 210 + * Since this a core kernel managed memory the kernel can move this memory 211 + * whenever it wants. We register an invalidation MMU notifier to alert XE when 212 + * a user poiter is about to move. The invalidation notifier needs to block 213 + * until all pending users (jobs or compute mode engines) of the userptr are 214 + * idle to ensure no faults. This done by waiting on all of VM's dma-resv slots. 215 + * 216 + * Rebinds 217 + * ------- 218 + * 219 + * Either the next exec (non-compute) or rebind worker (compute mode) will 220 + * rebind the userptr. The invalidation MMU notifier kicks the rebind worker 221 + * after the VM dma-resv wait if the VM is in compute mode. 222 + * 223 + * Compute mode 224 + * ============ 225 + * 226 + * A VM in compute mode enables long running workloads and ultra low latency 227 + * submission (ULLS). ULLS is implemented via a continuously running batch + 228 + * semaphores. This enables to the user to insert jump to new batch commands 229 + * into the continuously running batch. In both cases these batches exceed the 230 + * time a dma fence is allowed to exist for before signaling, as such dma fences 231 + * are not used when a VM is in compute mode. User fences (TODO: link user fence 232 + * doc) are used instead to signal operation's completion. 233 + * 234 + * Preempt fences 235 + * -------------- 236 + * 237 + * If the kernel decides to move memory around (either userptr invalidate, BO 238 + * eviction, or mumap style unbind which results in a rebind) and a batch is 239 + * running on an engine, that batch can fault or cause a memory corruption as 240 + * page tables for the moved memory are no longer valid. To work around this we 241 + * introduce the concept of preempt fences. When sw signaling is enabled on a 242 + * preempt fence it tells the submission backend to kick that engine off the 243 + * hardware and the preempt fence signals when the engine is off the hardware. 244 + * Once all preempt fences are signaled for a VM the kernel can safely move the 245 + * memory and kick the rebind worker which resumes all the engines execution. 246 + * 247 + * A preempt fence, for every engine using the VM, is installed the VM's 248 + * dma-resv DMA_RESV_USAGE_PREEMPT_FENCE slot. The same preempt fence, for every 249 + * engine using the VM, is also installed into the same dma-resv slot of every 250 + * external BO mapped in the VM. 251 + * 252 + * Rebind worker 253 + * ------------- 254 + * 255 + * The rebind worker is very similar to an exec. It is resposible for rebinding 256 + * evicted BOs or userptrs, waiting on those operations, installing new preempt 257 + * fences, and finally resuming executing of engines in the VM. 258 + * 259 + * Flow 260 + * ~~~~ 261 + * 262 + * .. code-block:: 263 + * 264 + * <----------------------------------------------------------------------| 265 + * Check if VM is closed, if so bail out | 266 + * Lock VM global lock in read mode | 267 + * Pin userptrs (also finds userptr invalidated since last rebind worker) | 268 + * Lock VM dma-resv and external BOs dma-resv | 269 + * Validate BOs that have been evicted | 270 + * Wait on and allocate new preempt fences for every engine using the VM | 271 + * Rebind invalidated userptrs + evicted BOs | 272 + * Wait on last rebind fence | 273 + * Wait VM's DMA_RESV_USAGE_KERNEL dma-resv slot | 274 + * Install preeempt fences and issue resume for every engine using the VM | 275 + * Check if any userptrs invalidated since pin | 276 + * Squash resume for all engines | 277 + * Unlock all | 278 + * Wait all VM's dma-resv slots | 279 + * Retry ---------------------------------------------------------- 280 + * Release all engines waiting to resume 281 + * Unlock all 282 + * 283 + * Timeslicing 284 + * ----------- 285 + * 286 + * In order to prevent an engine from continuously being kicked off the hardware 287 + * and making no forward progress an engine has a period of time it allowed to 288 + * run after resume before it can be kicked off again. This effectively gives 289 + * each engine a timeslice. 290 + * 291 + * Handling multiple GTs 292 + * ===================== 293 + * 294 + * If a GT has slower access to some regions and the page table structure are in 295 + * the slow region, the performance on that GT could adversely be affected. To 296 + * work around this we allow a VM page tables to be shadowed in multiple GTs. 297 + * When VM is created, a default bind engine and PT table structure are created 298 + * on each GT. 299 + * 300 + * Binds can optionally pass in a mask of GTs where a mapping should be created, 301 + * if this mask is zero then default to all the GTs where the VM has page 302 + * tables. 303 + * 304 + * The implementation for this breaks down into a bunch for_each_gt loops in 305 + * various places plus exporting a composite fence for multi-GT binds to the 306 + * user. 307 + * 308 + * Fault mode (unified shared memory) 309 + * ================================== 310 + * 311 + * A VM in fault mode can be enabled on devices that support page faults. If 312 + * page faults are enabled, using dma fences can potentially induce a deadlock: 313 + * A pending page fault can hold up the GPU work which holds up the dma fence 314 + * signaling, and memory allocation is usually required to resolve a page 315 + * fault, but memory allocation is not allowed to gate dma fence signaling. As 316 + * such, dma fences are not allowed when VM is in fault mode. Because dma-fences 317 + * are not allowed, long running workloads and ULLS are enabled on a faulting 318 + * VM. 319 + * 320 + * Defered VM binds 321 + * ---------------- 322 + * 323 + * By default, on a faulting VM binds just allocate the VMA and the actual 324 + * updating of the page tables is defered to the page fault handler. This 325 + * behavior can be overridden by setting the flag XE_VM_BIND_FLAG_IMMEDIATE in 326 + * the VM bind which will then do the bind immediately. 327 + * 328 + * Page fault handler 329 + * ------------------ 330 + * 331 + * Page faults are received in the G2H worker under the CT lock which is in the 332 + * path of dma fences (no memory allocations are allowed, faults require memory 333 + * allocations) thus we cannot process faults under the CT lock. Another issue 334 + * is faults issue TLB invalidations which require G2H credits and we cannot 335 + * allocate G2H credits in the G2H handlers without deadlocking. Lastly, we do 336 + * not want the CT lock to be an outer lock of the VM global lock (VM global 337 + * lock required to fault processing). 338 + * 339 + * To work around the above issue with processing faults in the G2H worker, we 340 + * sink faults to a buffer which is large enough to sink all possible faults on 341 + * the GT (1 per hardware engine) and kick a worker to process the faults. Since 342 + * the page faults G2H are already received in a worker, kicking another worker 343 + * adds more latency to a critical performance path. We add a fast path in the 344 + * G2H irq handler which looks at first G2H and if it is a page fault we sink 345 + * the fault to the buffer and kick the worker to process the fault. TLB 346 + * invalidation responses are also in the critical path so these can also be 347 + * processed in this fast path. 348 + * 349 + * Multiple buffers and workers are used and hashed over based on the ASID so 350 + * faults from different VMs can be processed in parallel. 351 + * 352 + * The page fault handler itself is rather simple, flow is below. 353 + * 354 + * .. code-block:: 355 + * 356 + * Lookup VM from ASID in page fault G2H 357 + * Lock VM global lock in read mode 358 + * Lookup VMA from address in page fault G2H 359 + * Check if VMA is valid, if not bail 360 + * Check if VMA's BO has backing store, if not allocate 361 + * <----------------------------------------------------------------------| 362 + * If userptr, pin pages | 363 + * Lock VM & BO dma-resv locks | 364 + * If atomic fault, migrate to VRAM, else validate BO location | 365 + * Issue rebind | 366 + * Wait on rebind to complete | 367 + * Check if userptr invalidated since pin | 368 + * Drop VM & BO dma-resv locks | 369 + * Retry ---------------------------------------------------------- 370 + * Unlock all 371 + * Issue blocking TLB invalidation | 372 + * Send page fault response to GuC 373 + * 374 + * Access counters 375 + * --------------- 376 + * 377 + * Access counters can be configured to trigger a G2H indicating the device is 378 + * accessing VMAs in system memory frequently as hint to migrate those VMAs to 379 + * VRAM. 380 + * 381 + * Same as the page fault handler, access counters G2H cannot be processed the 382 + * G2H worker under the CT lock. Again we use a buffer to sink access counter 383 + * G2H. Unlike page faults there is no upper bound so if the buffer is full we 384 + * simply drop the G2H. Access counters are a best case optimization and it is 385 + * safe to drop these unlike page faults. 386 + * 387 + * The access counter handler itself is rather simple flow is below. 388 + * 389 + * .. code-block:: 390 + * 391 + * Lookup VM from ASID in access counter G2H 392 + * Lock VM global lock in read mode 393 + * Lookup VMA from address in access counter G2H 394 + * If userptr, bail nothing to do 395 + * Lock VM & BO dma-resv locks 396 + * Issue migration to VRAM 397 + * Unlock all 398 + * 399 + * Notice no rebind is issued in the access counter handler as the rebind will 400 + * be issued on next page fault. 401 + * 402 + * Cavets with eviction / user pointer invalidation 403 + * ------------------------------------------------ 404 + * 405 + * In the case of eviction and user pointer invalidation on a faulting VM, there 406 + * is no need to issue a rebind rather we just need to blow away the page tables 407 + * for the VMAs and the page fault handler will rebind the VMAs when they fault. 408 + * The cavet is to update / read the page table structure the VM global lock is 409 + * neeeed. In both the case of eviction and user pointer invalidation locks are 410 + * held which make acquiring the VM global lock impossible. To work around this 411 + * every VMA maintains a list of leaf page table entries which should be written 412 + * to zero to blow away the VMA's page tables. After writing zero to these 413 + * entries a blocking TLB invalidate is issued. At this point it is safe for the 414 + * kernel to move the VMA's memory around. This is a necessary lockless 415 + * algorithm and is safe as leafs cannot be changed while either an eviction or 416 + * userptr invalidation is occurring. 417 + * 418 + * Locking 419 + * ======= 420 + * 421 + * VM locking protects all of the core data paths (bind operations, execs, 422 + * evictions, and compute mode rebind worker) in XE. 423 + * 424 + * Locks 425 + * ----- 426 + * 427 + * VM global lock (vm->lock) - rw semaphore lock. Outer most lock which protects 428 + * the list of userptrs mapped in the VM, the list of engines using this VM, and 429 + * the array of external BOs mapped in the VM. When adding or removing any of the 430 + * aforemented state from the VM should acquire this lock in write mode. The VM 431 + * bind path also acquires this lock in write while while the exec / compute 432 + * mode rebind worker acquire this lock in read mode. 433 + * 434 + * VM dma-resv lock (vm->ttm.base.resv->lock) - WW lock. Protects VM dma-resv 435 + * slots which is shared with any private BO in the VM. Expected to be acquired 436 + * during VM binds, execs, and compute mode rebind worker. This lock is also 437 + * held when private BOs are being evicted. 438 + * 439 + * external BO dma-resv lock (bo->ttm.base.resv->lock) - WW lock. Protects 440 + * external BO dma-resv slots. Expected to be acquired during VM binds (in 441 + * addition to the VM dma-resv lock). All external BO dma-locks within a VM are 442 + * expected to be acquired (in addition to the VM dma-resv lock) during execs 443 + * and the compute mode rebind worker. This lock is also held when an external 444 + * BO is being evicted. 445 + * 446 + * Putting it all together 447 + * ----------------------- 448 + * 449 + * 1. An exec and bind operation with the same VM can't be executing at the same 450 + * time (vm->lock). 451 + * 452 + * 2. A compute mode rebind worker and bind operation with the same VM can't be 453 + * executing at the same time (vm->lock). 454 + * 455 + * 3. We can't add / remove userptrs or external BOs to a VM while an exec with 456 + * the same VM is executing (vm->lock). 457 + * 458 + * 4. We can't add / remove userptrs, external BOs, or engines to a VM while a 459 + * compute mode rebind worker with the same VM is executing (vm->lock). 460 + * 461 + * 5. Evictions within a VM can't be happen while an exec with the same VM is 462 + * executing (dma-resv locks). 463 + * 464 + * 6. Evictions within a VM can't be happen while a compute mode rebind worker 465 + * with the same VM is executing (dma-resv locks). 466 + * 467 + * dma-resv usage 468 + * ============== 469 + * 470 + * As previously stated to enforce the ordering of kernel ops (eviction, userptr 471 + * invalidation, munmap style unbinds which result in a rebind), rebinds during 472 + * execs, execs, and resumes in the rebind worker we use both the VMs and 473 + * external BOs dma-resv slots. Let try to make this as clear as possible. 474 + * 475 + * Slot installation 476 + * ----------------- 477 + * 478 + * 1. Jobs from kernel ops install themselves into the DMA_RESV_USAGE_KERNEL 479 + * slot of either an external BO or VM (depends on if kernel op is operating on 480 + * an external or private BO) 481 + * 482 + * 2. In non-compute mode, jobs from execs install themselves into the 483 + * DMA_RESV_USAGE_BOOKKEEP slot of the VM 484 + * 485 + * 3. In non-compute mode, jobs from execs install themselves into the 486 + * DMA_RESV_USAGE_WRITE slot of all external BOs in the VM 487 + * 488 + * 4. Jobs from binds install themselves into the DMA_RESV_USAGE_BOOKKEEP slot 489 + * of the VM 490 + * 491 + * 5. Jobs from binds install themselves into the DMA_RESV_USAGE_BOOKKEEP slot 492 + * of the external BO (if the bind is to an external BO, this is addition to #4) 493 + * 494 + * 6. Every engine using a compute mode VM has a preempt fence in installed into 495 + * the DMA_RESV_USAGE_PREEMPT_FENCE slot of the VM 496 + * 497 + * 7. Every engine using a compute mode VM has a preempt fence in installed into 498 + * the DMA_RESV_USAGE_PREEMPT_FENCE slot of all the external BOs in the VM 499 + * 500 + * Slot waiting 501 + * ------------ 502 + * 503 + * 1. The exection of all jobs from kernel ops shall wait on all slots 504 + * (DMA_RESV_USAGE_PREEMPT_FENCE) of either an external BO or VM (depends on if 505 + * kernel op is operating on external or private BO) 506 + * 507 + * 2. In non-compute mode, the exection of all jobs from rebinds in execs shall 508 + * wait on the DMA_RESV_USAGE_KERNEL slot of either an external BO or VM 509 + * (depends on if the rebind is operatiing on an external or private BO) 510 + * 511 + * 3. In non-compute mode, the exection of all jobs from execs shall wait on the 512 + * last rebind job 513 + * 514 + * 4. In compute mode, the exection of all jobs from rebinds in the rebind 515 + * worker shall wait on the DMA_RESV_USAGE_KERNEL slot of either an external BO 516 + * or VM (depends on if rebind is operating on external or private BO) 517 + * 518 + * 5. In compute mode, resumes in rebind worker shall wait on last rebind fence 519 + * 520 + * 6. In compute mode, resumes in rebind worker shall wait on the 521 + * DMA_RESV_USAGE_KERNEL slot of the VM 522 + * 523 + * Putting it all together 524 + * ----------------------- 525 + * 526 + * 1. New jobs from kernel ops are blocked behind any existing jobs from 527 + * non-compute mode execs 528 + * 529 + * 2. New jobs from non-compute mode execs are blocked behind any existing jobs 530 + * from kernel ops and rebinds 531 + * 532 + * 3. New jobs from kernel ops are blocked behind all preempt fences signaling in 533 + * compute mode 534 + * 535 + * 4. Compute mode engine resumes are blocked behind any existing jobs from 536 + * kernel ops and rebinds 537 + * 538 + * Future work 539 + * =========== 540 + * 541 + * Support large pages for sysmem and userptr. 542 + * 543 + * Update page faults to handle BOs are page level grainularity (e.g. part of BO 544 + * could be in system memory while another part could be in VRAM). 545 + * 546 + * Page fault handler likely we be optimized a bit more (e.g. Rebinds always 547 + * wait on the dma-resv kernel slots of VM or BO, technically we only have to 548 + * wait the BO moving. If using a job to do the rebind, we could not block in 549 + * the page fault handler rather attach a callback to fence of the rebind job to 550 + * signal page fault complete. Our handling of short circuting for atomic faults 551 + * for bound VMAs could be better. etc...). We can tune all of this once we have 552 + * benchmarks / performance number from workloads up and running. 553 + */ 554 + 555 + #endif

+347

drivers/gpu/drm/xe/xe_vm_madvise.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2021 Intel Corporation 4 + */ 5 + 6 + #include <drm/xe_drm.h> 7 + #include <drm/ttm/ttm_tt.h> 8 + #include <linux/nospec.h> 9 + 10 + #include "xe_bo.h" 11 + #include "xe_vm.h" 12 + #include "xe_vm_madvise.h" 13 + 14 + static int madvise_preferred_mem_class(struct xe_device *xe, struct xe_vm *vm, 15 + struct xe_vma **vmas, int num_vmas, 16 + u64 value) 17 + { 18 + int i, err; 19 + 20 + if (XE_IOCTL_ERR(xe, value > XE_MEM_REGION_CLASS_VRAM)) 21 + return -EINVAL; 22 + 23 + if (XE_IOCTL_ERR(xe, value == XE_MEM_REGION_CLASS_VRAM && 24 + !xe->info.is_dgfx)) 25 + return -EINVAL; 26 + 27 + for (i = 0; i < num_vmas; ++i) { 28 + struct xe_bo *bo; 29 + struct ww_acquire_ctx ww; 30 + 31 + bo = vmas[i]->bo; 32 + 33 + err = xe_bo_lock(bo, &ww, 0, true); 34 + if (err) 35 + return err; 36 + bo->props.preferred_mem_class = value; 37 + xe_bo_placement_for_flags(xe, bo, bo->flags); 38 + xe_bo_unlock(bo, &ww); 39 + } 40 + 41 + return 0; 42 + } 43 + 44 + static int madvise_preferred_gt(struct xe_device *xe, struct xe_vm *vm, 45 + struct xe_vma **vmas, int num_vmas, u64 value) 46 + { 47 + int i, err; 48 + 49 + if (XE_IOCTL_ERR(xe, value > xe->info.tile_count)) 50 + return -EINVAL; 51 + 52 + for (i = 0; i < num_vmas; ++i) { 53 + struct xe_bo *bo; 54 + struct ww_acquire_ctx ww; 55 + 56 + bo = vmas[i]->bo; 57 + 58 + err = xe_bo_lock(bo, &ww, 0, true); 59 + if (err) 60 + return err; 61 + bo->props.preferred_gt = value; 62 + xe_bo_placement_for_flags(xe, bo, bo->flags); 63 + xe_bo_unlock(bo, &ww); 64 + } 65 + 66 + return 0; 67 + } 68 + 69 + static int madvise_preferred_mem_class_gt(struct xe_device *xe, 70 + struct xe_vm *vm, 71 + struct xe_vma **vmas, int num_vmas, 72 + u64 value) 73 + { 74 + int i, err; 75 + u32 gt_id = upper_32_bits(value); 76 + u32 mem_class = lower_32_bits(value); 77 + 78 + if (XE_IOCTL_ERR(xe, mem_class > XE_MEM_REGION_CLASS_VRAM)) 79 + return -EINVAL; 80 + 81 + if (XE_IOCTL_ERR(xe, mem_class == XE_MEM_REGION_CLASS_VRAM && 82 + !xe->info.is_dgfx)) 83 + return -EINVAL; 84 + 85 + if (XE_IOCTL_ERR(xe, gt_id > xe->info.tile_count)) 86 + return -EINVAL; 87 + 88 + for (i = 0; i < num_vmas; ++i) { 89 + struct xe_bo *bo; 90 + struct ww_acquire_ctx ww; 91 + 92 + bo = vmas[i]->bo; 93 + 94 + err = xe_bo_lock(bo, &ww, 0, true); 95 + if (err) 96 + return err; 97 + bo->props.preferred_mem_class = mem_class; 98 + bo->props.preferred_gt = gt_id; 99 + xe_bo_placement_for_flags(xe, bo, bo->flags); 100 + xe_bo_unlock(bo, &ww); 101 + } 102 + 103 + return 0; 104 + } 105 + 106 + static int madvise_cpu_atomic(struct xe_device *xe, struct xe_vm *vm, 107 + struct xe_vma **vmas, int num_vmas, u64 value) 108 + { 109 + int i, err; 110 + 111 + for (i = 0; i < num_vmas; ++i) { 112 + struct xe_bo *bo; 113 + struct ww_acquire_ctx ww; 114 + 115 + bo = vmas[i]->bo; 116 + if (XE_IOCTL_ERR(xe, !(bo->flags & XE_BO_CREATE_SYSTEM_BIT))) 117 + return -EINVAL; 118 + 119 + err = xe_bo_lock(bo, &ww, 0, true); 120 + if (err) 121 + return err; 122 + bo->props.cpu_atomic = !!value; 123 + 124 + /* 125 + * All future CPU accesses must be from system memory only, we 126 + * just invalidate the CPU page tables which will trigger a 127 + * migration on next access. 128 + */ 129 + if (bo->props.cpu_atomic) 130 + ttm_bo_unmap_virtual(&bo->ttm); 131 + xe_bo_unlock(bo, &ww); 132 + } 133 + 134 + return 0; 135 + } 136 + 137 + static int madvise_device_atomic(struct xe_device *xe, struct xe_vm *vm, 138 + struct xe_vma **vmas, int num_vmas, u64 value) 139 + { 140 + int i, err; 141 + 142 + for (i = 0; i < num_vmas; ++i) { 143 + struct xe_bo *bo; 144 + struct ww_acquire_ctx ww; 145 + 146 + bo = vmas[i]->bo; 147 + if (XE_IOCTL_ERR(xe, !(bo->flags & XE_BO_CREATE_VRAM0_BIT) && 148 + !(bo->flags & XE_BO_CREATE_VRAM1_BIT))) 149 + return -EINVAL; 150 + 151 + err = xe_bo_lock(bo, &ww, 0, true); 152 + if (err) 153 + return err; 154 + bo->props.device_atomic = !!value; 155 + xe_bo_unlock(bo, &ww); 156 + } 157 + 158 + return 0; 159 + } 160 + 161 + static int madvise_priority(struct xe_device *xe, struct xe_vm *vm, 162 + struct xe_vma **vmas, int num_vmas, u64 value) 163 + { 164 + int i, err; 165 + 166 + if (XE_IOCTL_ERR(xe, value > DRM_XE_VMA_PRIORITY_HIGH)) 167 + return -EINVAL; 168 + 169 + if (XE_IOCTL_ERR(xe, value == DRM_XE_VMA_PRIORITY_HIGH && 170 + !capable(CAP_SYS_NICE))) 171 + return -EPERM; 172 + 173 + for (i = 0; i < num_vmas; ++i) { 174 + struct xe_bo *bo; 175 + struct ww_acquire_ctx ww; 176 + 177 + bo = vmas[i]->bo; 178 + 179 + err = xe_bo_lock(bo, &ww, 0, true); 180 + if (err) 181 + return err; 182 + bo->ttm.priority = value; 183 + ttm_bo_move_to_lru_tail(&bo->ttm); 184 + xe_bo_unlock(bo, &ww); 185 + } 186 + 187 + return 0; 188 + } 189 + 190 + static int madvise_pin(struct xe_device *xe, struct xe_vm *vm, 191 + struct xe_vma **vmas, int num_vmas, u64 value) 192 + { 193 + XE_WARN_ON("NIY"); 194 + return 0; 195 + } 196 + 197 + typedef int (*madvise_func)(struct xe_device *xe, struct xe_vm *vm, 198 + struct xe_vma **vmas, int num_vmas, u64 value); 199 + 200 + static const madvise_func madvise_funcs[] = { 201 + [DRM_XE_VM_MADVISE_PREFERRED_MEM_CLASS] = madvise_preferred_mem_class, 202 + [DRM_XE_VM_MADVISE_PREFERRED_GT] = madvise_preferred_gt, 203 + [DRM_XE_VM_MADVISE_PREFERRED_MEM_CLASS_GT] = 204 + madvise_preferred_mem_class_gt, 205 + [DRM_XE_VM_MADVISE_CPU_ATOMIC] = madvise_cpu_atomic, 206 + [DRM_XE_VM_MADVISE_DEVICE_ATOMIC] = madvise_device_atomic, 207 + [DRM_XE_VM_MADVISE_PRIORITY] = madvise_priority, 208 + [DRM_XE_VM_MADVISE_PIN] = madvise_pin, 209 + }; 210 + 211 + static struct xe_vma *node_to_vma(const struct rb_node *node) 212 + { 213 + BUILD_BUG_ON(offsetof(struct xe_vma, vm_node) != 0); 214 + return (struct xe_vma *)node; 215 + } 216 + 217 + static struct xe_vma ** 218 + get_vmas(struct xe_vm *vm, int *num_vmas, u64 addr, u64 range) 219 + { 220 + struct xe_vma **vmas; 221 + struct xe_vma *vma, *__vma, lookup; 222 + int max_vmas = 8; 223 + struct rb_node *node; 224 + 225 + lockdep_assert_held(&vm->lock); 226 + 227 + vmas = kmalloc(max_vmas * sizeof(*vmas), GFP_KERNEL); 228 + if (!vmas) 229 + return NULL; 230 + 231 + lookup.start = addr; 232 + lookup.end = addr + range - 1; 233 + 234 + vma = xe_vm_find_overlapping_vma(vm, &lookup); 235 + if (!vma) 236 + return vmas; 237 + 238 + if (!xe_vma_is_userptr(vma)) { 239 + vmas[*num_vmas] = vma; 240 + *num_vmas += 1; 241 + } 242 + 243 + node = &vma->vm_node; 244 + while ((node = rb_next(node))) { 245 + if (!xe_vma_cmp_vma_cb(&lookup, node)) { 246 + __vma = node_to_vma(node); 247 + if (xe_vma_is_userptr(__vma)) 248 + continue; 249 + 250 + if (*num_vmas == max_vmas) { 251 + struct xe_vma **__vmas = 252 + krealloc(vmas, max_vmas * sizeof(*vmas), 253 + GFP_KERNEL); 254 + 255 + if (!__vmas) 256 + return NULL; 257 + vmas = __vmas; 258 + } 259 + vmas[*num_vmas] = __vma; 260 + *num_vmas += 1; 261 + } else { 262 + break; 263 + } 264 + } 265 + 266 + node = &vma->vm_node; 267 + while ((node = rb_prev(node))) { 268 + if (!xe_vma_cmp_vma_cb(&lookup, node)) { 269 + __vma = node_to_vma(node); 270 + if (xe_vma_is_userptr(__vma)) 271 + continue; 272 + 273 + if (*num_vmas == max_vmas) { 274 + struct xe_vma **__vmas = 275 + krealloc(vmas, max_vmas * sizeof(*vmas), 276 + GFP_KERNEL); 277 + 278 + if (!__vmas) 279 + return NULL; 280 + vmas = __vmas; 281 + } 282 + vmas[*num_vmas] = __vma; 283 + *num_vmas += 1; 284 + } else { 285 + break; 286 + } 287 + } 288 + 289 + return vmas; 290 + } 291 + 292 + int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, 293 + struct drm_file *file) 294 + { 295 + struct xe_device *xe = to_xe_device(dev); 296 + struct xe_file *xef = to_xe_file(file); 297 + struct drm_xe_vm_madvise *args = data; 298 + struct xe_vm *vm; 299 + struct xe_vma **vmas = NULL; 300 + int num_vmas = 0, err = 0, idx; 301 + 302 + if (XE_IOCTL_ERR(xe, args->extensions)) 303 + return -EINVAL; 304 + 305 + if (XE_IOCTL_ERR(xe, args->property > ARRAY_SIZE(madvise_funcs))) 306 + return -EINVAL; 307 + 308 + vm = xe_vm_lookup(xef, args->vm_id); 309 + if (XE_IOCTL_ERR(xe, !vm)) 310 + return -EINVAL; 311 + 312 + if (XE_IOCTL_ERR(xe, xe_vm_is_closed(vm))) { 313 + err = -ENOENT; 314 + goto put_vm; 315 + } 316 + 317 + if (XE_IOCTL_ERR(xe, !xe_vm_in_fault_mode(vm))) { 318 + err = -EINVAL; 319 + goto put_vm; 320 + } 321 + 322 + down_read(&vm->lock); 323 + 324 + vmas = get_vmas(vm, &num_vmas, args->addr, args->range); 325 + if (XE_IOCTL_ERR(xe, err)) 326 + goto unlock_vm; 327 + 328 + if (XE_IOCTL_ERR(xe, !vmas)) { 329 + err = -ENOMEM; 330 + goto unlock_vm; 331 + } 332 + 333 + if (XE_IOCTL_ERR(xe, !num_vmas)) { 334 + err = -EINVAL; 335 + goto unlock_vm; 336 + } 337 + 338 + idx = array_index_nospec(args->property, ARRAY_SIZE(madvise_funcs)); 339 + err = madvise_funcs[idx](xe, vm, vmas, num_vmas, args->value); 340 + 341 + unlock_vm: 342 + up_read(&vm->lock); 343 + put_vm: 344 + xe_vm_put(vm); 345 + kfree(vmas); 346 + return err; 347 + }

+15

drivers/gpu/drm/xe/xe_vm_madvise.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2021 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_VM_MADVISE_H_ 7 + #define _XE_VM_MADVISE_H_ 8 + 9 + struct drm_device; 10 + struct drm_file; 11 + 12 + int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, 13 + struct drm_file *file); 14 + 15 + #endif

+337

drivers/gpu/drm/xe/xe_vm_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_VM_TYPES_H_ 7 + #define _XE_VM_TYPES_H_ 8 + 9 + #include <linux/dma-resv.h> 10 + #include <linux/kref.h> 11 + #include <linux/mmu_notifier.h> 12 + #include <linux/scatterlist.h> 13 + 14 + #include "xe_device_types.h" 15 + #include "xe_pt_types.h" 16 + 17 + struct xe_bo; 18 + struct xe_vm; 19 + 20 + struct xe_vma { 21 + struct rb_node vm_node; 22 + /** @vm: VM which this VMA belongs to */ 23 + struct xe_vm *vm; 24 + 25 + /** 26 + * @start: start address of this VMA within its address domain, end - 27 + * start + 1 == VMA size 28 + */ 29 + u64 start; 30 + /** @end: end address of this VMA within its address domain */ 31 + u64 end; 32 + /** @pte_flags: pte flags for this VMA */ 33 + u32 pte_flags; 34 + 35 + /** @bo: BO if not a userptr, must be NULL is userptr */ 36 + struct xe_bo *bo; 37 + /** @bo_offset: offset into BO if not a userptr, unused for userptr */ 38 + u64 bo_offset; 39 + 40 + /** @gt_mask: GT mask of where to create binding for this VMA */ 41 + u64 gt_mask; 42 + 43 + /** 44 + * @gt_present: GT mask of binding are present for this VMA. 45 + * protected by vm->lock, vm->resv and for userptrs, 46 + * vm->userptr.notifier_lock for writing. Needs either for reading, 47 + * but if reading is done under the vm->lock only, it needs to be held 48 + * in write mode. 49 + */ 50 + u64 gt_present; 51 + 52 + /** 53 + * @destroyed: VMA is destroyed, in the sense that it shouldn't be 54 + * subject to rebind anymore. This field must be written under 55 + * the vm lock in write mode and the userptr.notifier_lock in 56 + * either mode. Read under the vm lock or the userptr.notifier_lock in 57 + * write mode. 58 + */ 59 + bool destroyed; 60 + 61 + /** 62 + * @first_munmap_rebind: VMA is first in a sequence of ops that triggers 63 + * a rebind (munmap style VM unbinds). This indicates the operation 64 + * using this VMA must wait on all dma-resv slots (wait for pending jobs 65 + * / trigger preempt fences). 66 + */ 67 + bool first_munmap_rebind; 68 + 69 + /** 70 + * @last_munmap_rebind: VMA is first in a sequence of ops that triggers 71 + * a rebind (munmap style VM unbinds). This indicates the operation 72 + * using this VMA must install itself into kernel dma-resv slot (blocks 73 + * future jobs) and kick the rebind work in compute mode. 74 + */ 75 + bool last_munmap_rebind; 76 + 77 + /** @use_atomic_access_pte_bit: Set atomic access bit in PTE */ 78 + bool use_atomic_access_pte_bit; 79 + 80 + union { 81 + /** @bo_link: link into BO if not a userptr */ 82 + struct list_head bo_link; 83 + /** @userptr_link: link into VM repin list if userptr */ 84 + struct list_head userptr_link; 85 + }; 86 + 87 + /** 88 + * @rebind_link: link into VM if this VMA needs rebinding, and 89 + * if it's a bo (not userptr) needs validation after a possible 90 + * eviction. Protected by the vm's resv lock. 91 + */ 92 + struct list_head rebind_link; 93 + 94 + /** 95 + * @unbind_link: link or list head if an unbind of multiple VMAs, in 96 + * single unbind op, is being done. 97 + */ 98 + struct list_head unbind_link; 99 + 100 + /** @destroy_cb: callback to destroy VMA when unbind job is done */ 101 + struct dma_fence_cb destroy_cb; 102 + 103 + /** @destroy_work: worker to destroy this BO */ 104 + struct work_struct destroy_work; 105 + 106 + /** @userptr: user pointer state */ 107 + struct { 108 + /** @ptr: user pointer */ 109 + uintptr_t ptr; 110 + /** @invalidate_link: Link for the vm::userptr.invalidated list */ 111 + struct list_head invalidate_link; 112 + /** 113 + * @notifier: MMU notifier for user pointer (invalidation call back) 114 + */ 115 + struct mmu_interval_notifier notifier; 116 + /** @sgt: storage for a scatter gather table */ 117 + struct sg_table sgt; 118 + /** @sg: allocated scatter gather table */ 119 + struct sg_table *sg; 120 + /** @notifier_seq: notifier sequence number */ 121 + unsigned long notifier_seq; 122 + /** 123 + * @initial_bind: user pointer has been bound at least once. 124 + * write: vm->userptr.notifier_lock in read mode and vm->resv held. 125 + * read: vm->userptr.notifier_lock in write mode or vm->resv held. 126 + */ 127 + bool initial_bind; 128 + #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) 129 + u32 divisor; 130 + #endif 131 + } userptr; 132 + 133 + /** @usm: unified shared memory state */ 134 + struct { 135 + /** @gt_invalidated: VMA has been invalidated */ 136 + u64 gt_invalidated; 137 + } usm; 138 + 139 + struct { 140 + struct list_head rebind_link; 141 + } notifier; 142 + 143 + struct { 144 + /** 145 + * @extobj.link: Link into vm's external object list. 146 + * protected by the vm lock. 147 + */ 148 + struct list_head link; 149 + } extobj; 150 + }; 151 + 152 + struct xe_device; 153 + 154 + #define xe_vm_assert_held(vm) dma_resv_assert_held(&(vm)->resv) 155 + 156 + struct xe_vm { 157 + struct xe_device *xe; 158 + 159 + struct kref refcount; 160 + 161 + /* engine used for (un)binding vma's */ 162 + struct xe_engine *eng[XE_MAX_GT]; 163 + 164 + /** Protects @rebind_list and the page-table structures */ 165 + struct dma_resv resv; 166 + 167 + u64 size; 168 + struct rb_root vmas; 169 + 170 + struct xe_pt *pt_root[XE_MAX_GT]; 171 + struct xe_bo *scratch_bo[XE_MAX_GT]; 172 + struct xe_pt *scratch_pt[XE_MAX_GT][XE_VM_MAX_LEVEL]; 173 + 174 + /** @flags: flags for this VM, statically setup a creation time */ 175 + #define XE_VM_FLAGS_64K BIT(0) 176 + #define XE_VM_FLAG_COMPUTE_MODE BIT(1) 177 + #define XE_VM_FLAG_ASYNC_BIND_OPS BIT(2) 178 + #define XE_VM_FLAG_MIGRATION BIT(3) 179 + #define XE_VM_FLAG_SCRATCH_PAGE BIT(4) 180 + #define XE_VM_FLAG_FAULT_MODE BIT(5) 181 + #define XE_VM_FLAG_GT_ID(flags) (((flags) >> 6) & 0x3) 182 + #define XE_VM_FLAG_SET_GT_ID(gt) ((gt)->info.id << 6) 183 + unsigned long flags; 184 + 185 + /** @composite_fence_ctx: context composite fence */ 186 + u64 composite_fence_ctx; 187 + /** @composite_fence_seqno: seqno for composite fence */ 188 + u32 composite_fence_seqno; 189 + 190 + /** 191 + * @lock: outer most lock, protects objects of anything attached to this 192 + * VM 193 + */ 194 + struct rw_semaphore lock; 195 + 196 + /** 197 + * @rebind_list: list of VMAs that need rebinding, and if they are 198 + * bos (not userptr), need validation after a possible eviction. The 199 + * list is protected by @resv. 200 + */ 201 + struct list_head rebind_list; 202 + 203 + /** @rebind_fence: rebind fence from execbuf */ 204 + struct dma_fence *rebind_fence; 205 + 206 + /** 207 + * @destroy_work: worker to destroy VM, needed as a dma_fence signaling 208 + * from an irq context can be last put and the destroy needs to be able 209 + * to sleep. 210 + */ 211 + struct work_struct destroy_work; 212 + 213 + /** @extobj: bookkeeping for external objects. Protected by the vm lock */ 214 + struct { 215 + /** @enties: number of external BOs attached this VM */ 216 + u32 entries; 217 + /** @list: list of vmas with external bos attached */ 218 + struct list_head list; 219 + } extobj; 220 + 221 + /** @async_ops: async VM operations (bind / unbinds) */ 222 + struct { 223 + /** @list: list of pending async VM ops */ 224 + struct list_head pending; 225 + /** @work: worker to execute async VM ops */ 226 + struct work_struct work; 227 + /** @lock: protects list of pending async VM ops and fences */ 228 + spinlock_t lock; 229 + /** @error_capture: error capture state */ 230 + struct { 231 + /** @mm: user MM */ 232 + struct mm_struct *mm; 233 + /** 234 + * @addr: user pointer to copy error capture state too 235 + */ 236 + u64 addr; 237 + /** @wq: user fence wait queue for VM errors */ 238 + wait_queue_head_t wq; 239 + } error_capture; 240 + /** @fence: fence state */ 241 + struct { 242 + /** @context: context of async fence */ 243 + u64 context; 244 + /** @seqno: seqno of async fence */ 245 + u32 seqno; 246 + } fence; 247 + /** @error: error state for async VM ops */ 248 + int error; 249 + /** 250 + * @munmap_rebind_inflight: an munmap style VM bind is in the 251 + * middle of a set of ops which requires a rebind at the end. 252 + */ 253 + bool munmap_rebind_inflight; 254 + } async_ops; 255 + 256 + /** @userptr: user pointer state */ 257 + struct { 258 + /** 259 + * @userptr.repin_list: list of VMAs which are user pointers, 260 + * and needs repinning. Protected by @lock. 261 + */ 262 + struct list_head repin_list; 263 + /** 264 + * @notifier_lock: protects notifier in write mode and 265 + * submission in read mode. 266 + */ 267 + struct rw_semaphore notifier_lock; 268 + /** 269 + * @userptr.invalidated_lock: Protects the 270 + * @userptr.invalidated list. 271 + */ 272 + spinlock_t invalidated_lock; 273 + /** 274 + * @userptr.invalidated: List of invalidated userptrs, not yet 275 + * picked 276 + * up for revalidation. Protected from access with the 277 + * @invalidated_lock. Removing items from the list 278 + * additionally requires @lock in write mode, and adding 279 + * items to the list requires the @userptr.notifer_lock in 280 + * write mode. 281 + */ 282 + struct list_head invalidated; 283 + } userptr; 284 + 285 + /** @preempt: preempt state */ 286 + struct { 287 + /** 288 + * @min_run_period_ms: The minimum run period before preempting 289 + * an engine again 290 + */ 291 + s64 min_run_period_ms; 292 + /** @engines: list of engines attached to this VM */ 293 + struct list_head engines; 294 + /** @num_engines: number user engines attached to this VM */ 295 + int num_engines; 296 + /** 297 + * @rebind_work: worker to rebind invalidated userptrs / evicted 298 + * BOs 299 + */ 300 + struct work_struct rebind_work; 301 + } preempt; 302 + 303 + /** @um: unified memory state */ 304 + struct { 305 + /** @asid: address space ID, unique to each VM */ 306 + u32 asid; 307 + /** 308 + * @last_fault_vma: Last fault VMA, used for fast lookup when we 309 + * get a flood of faults to the same VMA 310 + */ 311 + struct xe_vma *last_fault_vma; 312 + } usm; 313 + 314 + /** 315 + * @notifier: Lists and locks for temporary usage within notifiers where 316 + * we either can't grab the vm lock or the vm resv. 317 + */ 318 + struct { 319 + /** @notifier.list_lock: lock protecting @rebind_list */ 320 + spinlock_t list_lock; 321 + /** 322 + * @notifier.rebind_list: list of vmas that we want to put on the 323 + * main @rebind_list. This list is protected for writing by both 324 + * notifier.list_lock, and the resv of the bo the vma points to, 325 + * and for reading by the notifier.list_lock only. 326 + */ 327 + struct list_head rebind_list; 328 + } notifier; 329 + 330 + /** @error_capture: allow to track errors */ 331 + struct { 332 + /** @capture_once: capture only one error per VM */ 333 + bool capture_once; 334 + } error_capture; 335 + }; 336 + 337 + #endif

+326

drivers/gpu/drm/xe/xe_wa.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include "xe_wa.h" 7 + 8 + #include <linux/compiler_types.h> 9 + 10 + #include "xe_device_types.h" 11 + #include "xe_force_wake.h" 12 + #include "xe_gt.h" 13 + #include "xe_hw_engine_types.h" 14 + #include "xe_mmio.h" 15 + #include "xe_platform_types.h" 16 + #include "xe_rtp.h" 17 + #include "xe_step.h" 18 + 19 + #include "gt/intel_engine_regs.h" 20 + #include "gt/intel_gt_regs.h" 21 + #include "i915_reg.h" 22 + 23 + /** 24 + * DOC: Hardware workarounds 25 + * 26 + * Hardware workarounds are register programming documented to be executed in 27 + * the driver that fall outside of the normal programming sequences for a 28 + * platform. There are some basic categories of workarounds, depending on 29 + * how/when they are applied: 30 + * 31 + * - LRC workarounds: workarounds that touch registers that are 32 + * saved/restored to/from the HW context image. The list is emitted (via Load 33 + * Register Immediate commands) once when initializing the device and saved in 34 + * the default context. That default context is then used on every context 35 + * creation to have a "primed golden context", i.e. a context image that 36 + * already contains the changes needed to all the registers. 37 + * 38 + * TODO: Although these workarounds are maintained here, they are not 39 + * currently being applied. 40 + * 41 + * - Engine workarounds: the list of these WAs is applied whenever the specific 42 + * engine is reset. It's also possible that a set of engine classes share a 43 + * common power domain and they are reset together. This happens on some 44 + * platforms with render and compute engines. In this case (at least) one of 45 + * them need to keeep the workaround programming: the approach taken in the 46 + * driver is to tie those workarounds to the first compute/render engine that 47 + * is registered. When executing with GuC submission, engine resets are 48 + * outside of kernel driver control, hence the list of registers involved in 49 + * written once, on engine initialization, and then passed to GuC, that 50 + * saves/restores their values before/after the reset takes place. See 51 + * ``drivers/gpu/drm/xe/xe_guc_ads.c`` for reference. 52 + * 53 + * - GT workarounds: the list of these WAs is applied whenever these registers 54 + * revert to their default values: on GPU reset, suspend/resume [1]_, etc. 55 + * 56 + * - Register whitelist: some workarounds need to be implemented in userspace, 57 + * but need to touch privileged registers. The whitelist in the kernel 58 + * instructs the hardware to allow the access to happen. From the kernel side, 59 + * this is just a special case of a MMIO workaround (as we write the list of 60 + * these to/be-whitelisted registers to some special HW registers). 61 + * 62 + * - Workaround batchbuffers: buffers that get executed automatically by the 63 + * hardware on every HW context restore. These buffers are created and 64 + * programmed in the default context so the hardware always go through those 65 + * programming sequences when switching contexts. The support for workaround 66 + * batchbuffers is enabled these hardware mechanisms: 67 + * 68 + * #. INDIRECT_CTX: A batchbuffer and an offset are provided in the default 69 + * context, pointing the hardware to jump to that location when that offset 70 + * is reached in the context restore. Workaround batchbuffer in the driver 71 + * currently uses this mechanism for all platforms. 72 + * 73 + * #. BB_PER_CTX_PTR: A batchbuffer is provided in the default context, 74 + * pointing the hardware to a buffer to continue executing after the 75 + * engine registers are restored in a context restore sequence. This is 76 + * currently not used in the driver. 77 + * 78 + * - Other: There are WAs that, due to their nature, cannot be applied from a 79 + * central place. Those are peppered around the rest of the code, as needed. 80 + * Workarounds related to the display IP are the main example. 81 + * 82 + * .. [1] Technically, some registers are powercontext saved & restored, so they 83 + * survive a suspend/resume. In practice, writing them again is not too 84 + * costly and simplifies things, so it's the approach taken in the driver. 85 + * 86 + * .. note:: 87 + * Hardware workarounds in xe work the same way as in i915, with the 88 + * difference of how they are maintained in the code. In xe it uses the 89 + * xe_rtp infrastructure so the workarounds can be kept in tables, following 90 + * a more declarative approach rather than procedural. 91 + */ 92 + 93 + #undef _MMIO 94 + #undef MCR_REG 95 + #define _MMIO(x) _XE_RTP_REG(x) 96 + #define MCR_REG(x) _XE_RTP_MCR_REG(x) 97 + 98 + static bool match_14011060649(const struct xe_gt *gt, 99 + const struct xe_hw_engine *hwe) 100 + { 101 + return hwe->instance % 2 == 0; 102 + } 103 + 104 + static const struct xe_rtp_entry gt_was[] = { 105 + { XE_RTP_NAME("14011060649"), 106 + XE_RTP_RULES(MEDIA_VERSION_RANGE(1200, 1255), 107 + ENGINE_CLASS(VIDEO_DECODE), 108 + FUNC(match_14011060649)), 109 + XE_RTP_SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS, 110 + XE_RTP_FLAG(FOREACH_ENGINE)) 111 + }, 112 + { XE_RTP_NAME("16010515920"), 113 + XE_RTP_RULES(SUBPLATFORM(DG2, G10), 114 + STEP(A0, B0), 115 + ENGINE_CLASS(VIDEO_DECODE)), 116 + XE_RTP_SET(VDBOX_CGCTL3F18(0), ALNUNIT_CLKGATE_DIS, 117 + XE_RTP_FLAG(FOREACH_ENGINE)) 118 + }, 119 + { XE_RTP_NAME("22010523718"), 120 + XE_RTP_RULES(SUBPLATFORM(DG2, G10)), 121 + XE_RTP_SET(UNSLICE_UNIT_LEVEL_CLKGATE, CG3DDISCFEG_CLKGATE_DIS) 122 + }, 123 + { XE_RTP_NAME("14011006942"), 124 + XE_RTP_RULES(SUBPLATFORM(DG2, G10)), 125 + XE_RTP_SET(GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE, DSS_ROUTER_CLKGATE_DIS) 126 + }, 127 + { XE_RTP_NAME("14010948348"), 128 + XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), 129 + XE_RTP_SET(UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS) 130 + }, 131 + { XE_RTP_NAME("14011037102"), 132 + XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), 133 + XE_RTP_SET(UNSLCGCTL9444, LTCDD_CLKGATE_DIS) 134 + }, 135 + { XE_RTP_NAME("14011371254"), 136 + XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), 137 + XE_RTP_SET(GEN11_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS) 138 + }, 139 + { XE_RTP_NAME("14011431319/0"), 140 + XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), 141 + XE_RTP_SET(UNSLCGCTL9440, 142 + GAMTLBOACS_CLKGATE_DIS | 143 + GAMTLBVDBOX7_CLKGATE_DIS | GAMTLBVDBOX6_CLKGATE_DIS | 144 + GAMTLBVDBOX5_CLKGATE_DIS | GAMTLBVDBOX4_CLKGATE_DIS | 145 + GAMTLBVDBOX3_CLKGATE_DIS | GAMTLBVDBOX2_CLKGATE_DIS | 146 + GAMTLBVDBOX1_CLKGATE_DIS | GAMTLBVDBOX0_CLKGATE_DIS | 147 + GAMTLBKCR_CLKGATE_DIS | GAMTLBGUC_CLKGATE_DIS | 148 + GAMTLBBLT_CLKGATE_DIS) 149 + }, 150 + { XE_RTP_NAME("14011431319/1"), 151 + XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), 152 + XE_RTP_SET(UNSLCGCTL9444, 153 + GAMTLBGFXA0_CLKGATE_DIS | GAMTLBGFXA1_CLKGATE_DIS | 154 + GAMTLBCOMPA0_CLKGATE_DIS | GAMTLBCOMPA1_CLKGATE_DIS | 155 + GAMTLBCOMPB0_CLKGATE_DIS | GAMTLBCOMPB1_CLKGATE_DIS | 156 + GAMTLBCOMPC0_CLKGATE_DIS | GAMTLBCOMPC1_CLKGATE_DIS | 157 + GAMTLBCOMPD0_CLKGATE_DIS | GAMTLBCOMPD1_CLKGATE_DIS | 158 + GAMTLBMERT_CLKGATE_DIS | 159 + GAMTLBVEBOX3_CLKGATE_DIS | GAMTLBVEBOX2_CLKGATE_DIS | 160 + GAMTLBVEBOX1_CLKGATE_DIS | GAMTLBVEBOX0_CLKGATE_DIS) 161 + }, 162 + { XE_RTP_NAME("14010569222"), 163 + XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), 164 + XE_RTP_SET(UNSLICE_UNIT_LEVEL_CLKGATE, GAMEDIA_CLKGATE_DIS) 165 + }, 166 + { XE_RTP_NAME("14011028019"), 167 + XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), 168 + XE_RTP_SET(SSMCGCTL9530, RTFUNIT_CLKGATE_DIS) 169 + }, 170 + { XE_RTP_NAME("14014830051"), 171 + XE_RTP_RULES(PLATFORM(DG2)), 172 + XE_RTP_CLR(SARB_CHICKEN1, COMP_CKN_IN) 173 + }, 174 + { XE_RTP_NAME("14015795083"), 175 + XE_RTP_RULES(PLATFORM(DG2)), 176 + XE_RTP_CLR(GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE) 177 + }, 178 + { XE_RTP_NAME("14011059788"), 179 + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), 180 + XE_RTP_SET(GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE) 181 + }, 182 + { XE_RTP_NAME("1409420604"), 183 + XE_RTP_RULES(PLATFORM(DG1)), 184 + XE_RTP_SET(SUBSLICE_UNIT_LEVEL_CLKGATE2, CPSSUNIT_CLKGATE_DIS) 185 + }, 186 + { XE_RTP_NAME("1408615072"), 187 + XE_RTP_RULES(PLATFORM(DG1)), 188 + XE_RTP_SET(UNSLICE_UNIT_LEVEL_CLKGATE2, VSUNIT_CLKGATE_DIS_TGL) 189 + }, 190 + {} 191 + }; 192 + 193 + static const struct xe_rtp_entry engine_was[] = { 194 + { XE_RTP_NAME("14015227452"), 195 + XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), 196 + XE_RTP_SET(GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE, 197 + XE_RTP_FLAG(MASKED_REG)) 198 + }, 199 + { XE_RTP_NAME("1606931601"), 200 + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), 201 + XE_RTP_SET(GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ, 202 + XE_RTP_FLAG(MASKED_REG)) 203 + }, 204 + { XE_RTP_NAME("22010931296, 18011464164, 14010919138"), 205 + XE_RTP_RULES(GRAPHICS_VERSION(1200), ENGINE_CLASS(RENDER)), 206 + XE_RTP_SET(GEN7_FF_THREAD_MODE, GEN12_FF_TESSELATION_DOP_GATE_DISABLE) 207 + }, 208 + { XE_RTP_NAME("14010826681, 1606700617, 22010271021"), 209 + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), 210 + XE_RTP_SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE, 211 + XE_RTP_FLAG(MASKED_REG)) 212 + }, 213 + { XE_RTP_NAME("18019627453"), 214 + XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), 215 + XE_RTP_SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE, 216 + XE_RTP_FLAG(MASKED_REG)) 217 + }, 218 + { XE_RTP_NAME("1409804808"), 219 + XE_RTP_RULES(GRAPHICS_VERSION(1200), 220 + ENGINE_CLASS(RENDER), 221 + IS_INTEGRATED), 222 + XE_RTP_SET(GEN7_ROW_CHICKEN2, GEN12_PUSH_CONST_DEREF_HOLD_DIS, 223 + XE_RTP_FLAG(MASKED_REG)) 224 + }, 225 + { XE_RTP_NAME("14010229206, 1409085225"), 226 + XE_RTP_RULES(GRAPHICS_VERSION(1200), 227 + ENGINE_CLASS(RENDER), 228 + IS_INTEGRATED), 229 + XE_RTP_SET(GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH, 230 + XE_RTP_FLAG(MASKED_REG)) 231 + }, 232 + { XE_RTP_NAME("1607297627, 1607030317, 1607186500"), 233 + XE_RTP_RULES(PLATFORM(TIGERLAKE), ENGINE_CLASS(RENDER)), 234 + XE_RTP_SET(RING_PSMI_CTL(RENDER_RING_BASE), 235 + GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE | 236 + GEN8_RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_FLAG(MASKED_REG)) 237 + }, 238 + { XE_RTP_NAME("1607297627, 1607030317, 1607186500"), 239 + XE_RTP_RULES(PLATFORM(ROCKETLAKE), ENGINE_CLASS(RENDER)), 240 + XE_RTP_SET(RING_PSMI_CTL(RENDER_RING_BASE), 241 + GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE | 242 + GEN8_RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_FLAG(MASKED_REG)) 243 + }, 244 + { XE_RTP_NAME("1406941453"), 245 + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), 246 + XE_RTP_SET(GEN10_SAMPLER_MODE, ENABLE_SMALLPL, XE_RTP_FLAG(MASKED_REG)) 247 + }, 248 + { XE_RTP_NAME("FtrPerCtxtPreemptionGranularityControl"), 249 + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1250), ENGINE_CLASS(RENDER)), 250 + XE_RTP_SET(GEN7_FF_SLICE_CS_CHICKEN1, GEN9_FFSC_PERCTX_PREEMPT_CTRL, 251 + XE_RTP_FLAG(MASKED_REG)) 252 + }, 253 + {} 254 + }; 255 + 256 + static const struct xe_rtp_entry lrc_was[] = { 257 + { XE_RTP_NAME("1409342910, 14010698770, 14010443199, 1408979724, 1409178076, 1409207793, 1409217633, 1409252684, 1409347922, 1409142259"), 258 + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), 259 + XE_RTP_SET(GEN11_COMMON_SLICE_CHICKEN3, 260 + GEN12_DISABLE_CPS_AWARE_COLOR_PIPE, 261 + XE_RTP_FLAG(MASKED_REG)) 262 + }, 263 + { XE_RTP_NAME("WaDisableGPGPUMidThreadPreemption"), 264 + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), 265 + XE_RTP_FIELD_SET(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK, 266 + GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL, 267 + XE_RTP_FLAG(MASKED_REG)) 268 + }, 269 + { XE_RTP_NAME("16011163337"), 270 + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), 271 + /* read verification is ignored due to 1608008084. */ 272 + XE_RTP_FIELD_SET_NO_READ_MASK(GEN12_FF_MODE2, FF_MODE2_GS_TIMER_MASK, 273 + FF_MODE2_GS_TIMER_224) 274 + }, 275 + { XE_RTP_NAME("1409044764"), 276 + XE_RTP_RULES(PLATFORM(DG1)), 277 + XE_RTP_CLR(GEN11_COMMON_SLICE_CHICKEN3, 278 + DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN, 279 + XE_RTP_FLAG(MASKED_REG)) 280 + }, 281 + { XE_RTP_NAME("22010493298"), 282 + XE_RTP_RULES(PLATFORM(DG1)), 283 + XE_RTP_SET(HIZ_CHICKEN, 284 + DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE, 285 + XE_RTP_FLAG(MASKED_REG)) 286 + }, 287 + {} 288 + }; 289 + 290 + /** 291 + * xe_wa_process_gt - process GT workaround table 292 + * @gt: GT instance to process workarounds for 293 + * 294 + * Process GT workaround table for this platform, saving in @gt all the 295 + * workarounds that need to be applied at the GT level. 296 + */ 297 + void xe_wa_process_gt(struct xe_gt *gt) 298 + { 299 + xe_rtp_process(gt_was, &gt->reg_sr, gt, NULL); 300 + } 301 + 302 + /** 303 + * xe_wa_process_engine - process engine workaround table 304 + * @hwe: engine instance to process workarounds for 305 + * 306 + * Process engine workaround table for this platform, saving in @hwe all the 307 + * workarounds that need to be applied at the engine level that match this 308 + * engine. 309 + */ 310 + void xe_wa_process_engine(struct xe_hw_engine *hwe) 311 + { 312 + xe_rtp_process(engine_was, &hwe->reg_sr, hwe->gt, hwe); 313 + } 314 + 315 + /** 316 + * xe_wa_process_lrc - process context workaround table 317 + * @hwe: engine instance to process workarounds for 318 + * 319 + * Process context workaround table for this platform, saving in @hwe all the 320 + * workarounds that need to be applied on context restore. These are workarounds 321 + * touching registers that are part of the HW context image. 322 + */ 323 + void xe_wa_process_lrc(struct xe_hw_engine *hwe) 324 + { 325 + xe_rtp_process(lrc_was, &hwe->reg_lrc, hwe->gt, hwe); 326 + }

+18

drivers/gpu/drm/xe/xe_wa.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_WA_ 7 + #define _XE_WA_ 8 + 9 + struct xe_gt; 10 + struct xe_hw_engine; 11 + 12 + void xe_wa_process_gt(struct xe_gt *gt); 13 + void xe_wa_process_engine(struct xe_hw_engine *hwe); 14 + void xe_wa_process_lrc(struct xe_hw_engine *hwe); 15 + 16 + void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe); 17 + 18 + #endif

+202

drivers/gpu/drm/xe/xe_wait_user_fence.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include <drm/drm_device.h> 7 + #include <drm/drm_file.h> 8 + #include <drm/xe_drm.h> 9 + 10 + #include "xe_device.h" 11 + #include "xe_gt.h" 12 + #include "xe_macros.h" 13 + #include "xe_vm.h" 14 + 15 + static int do_compare(u64 addr, u64 value, u64 mask, u16 op) 16 + { 17 + u64 rvalue; 18 + int err; 19 + bool passed; 20 + 21 + err = copy_from_user(&rvalue, u64_to_user_ptr(addr), sizeof(rvalue)); 22 + if (err) 23 + return -EFAULT; 24 + 25 + switch (op) { 26 + case DRM_XE_UFENCE_WAIT_EQ: 27 + passed = (rvalue & mask) == (value & mask); 28 + break; 29 + case DRM_XE_UFENCE_WAIT_NEQ: 30 + passed = (rvalue & mask) != (value & mask); 31 + break; 32 + case DRM_XE_UFENCE_WAIT_GT: 33 + passed = (rvalue & mask) > (value & mask); 34 + break; 35 + case DRM_XE_UFENCE_WAIT_GTE: 36 + passed = (rvalue & mask) >= (value & mask); 37 + break; 38 + case DRM_XE_UFENCE_WAIT_LT: 39 + passed = (rvalue & mask) < (value & mask); 40 + break; 41 + case DRM_XE_UFENCE_WAIT_LTE: 42 + passed = (rvalue & mask) <= (value & mask); 43 + break; 44 + default: 45 + XE_BUG_ON("Not possible"); 46 + } 47 + 48 + return passed ? 0 : 1; 49 + } 50 + 51 + static const enum xe_engine_class user_to_xe_engine_class[] = { 52 + [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER, 53 + [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY, 54 + [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE, 55 + [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE, 56 + [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, 57 + }; 58 + 59 + int check_hw_engines(struct xe_device *xe, 60 + struct drm_xe_engine_class_instance *eci, 61 + int num_engines) 62 + { 63 + int i; 64 + 65 + for (i = 0; i < num_engines; ++i) { 66 + enum xe_engine_class user_class = 67 + user_to_xe_engine_class[eci[i].engine_class]; 68 + 69 + if (eci[i].gt_id >= xe->info.tile_count) 70 + return -EINVAL; 71 + 72 + if (!xe_gt_hw_engine(xe_device_get_gt(xe, eci[i].gt_id), 73 + user_class, eci[i].engine_instance, true)) 74 + return -EINVAL; 75 + } 76 + 77 + return 0; 78 + } 79 + 80 + #define VALID_FLAGS (DRM_XE_UFENCE_WAIT_SOFT_OP | \ 81 + DRM_XE_UFENCE_WAIT_ABSTIME | \ 82 + DRM_XE_UFENCE_WAIT_VM_ERROR) 83 + #define MAX_OP DRM_XE_UFENCE_WAIT_LTE 84 + 85 + int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, 86 + struct drm_file *file) 87 + { 88 + struct xe_device *xe = to_xe_device(dev); 89 + DEFINE_WAIT_FUNC(w_wait, woken_wake_function); 90 + struct drm_xe_wait_user_fence *args = data; 91 + struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE]; 92 + struct drm_xe_engine_class_instance __user *user_eci = 93 + u64_to_user_ptr(args->instances); 94 + struct xe_vm *vm = NULL; 95 + u64 addr = args->addr; 96 + int err; 97 + bool no_engines = args->flags & DRM_XE_UFENCE_WAIT_SOFT_OP || 98 + args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR; 99 + unsigned long timeout = args->timeout; 100 + 101 + if (XE_IOCTL_ERR(xe, args->extensions)) 102 + return -EINVAL; 103 + 104 + if (XE_IOCTL_ERR(xe, args->flags & ~VALID_FLAGS)) 105 + return -EINVAL; 106 + 107 + if (XE_IOCTL_ERR(xe, args->op > MAX_OP)) 108 + return -EINVAL; 109 + 110 + if (XE_IOCTL_ERR(xe, no_engines && 111 + (args->num_engines || args->instances))) 112 + return -EINVAL; 113 + 114 + if (XE_IOCTL_ERR(xe, !no_engines && !args->num_engines)) 115 + return -EINVAL; 116 + 117 + if (XE_IOCTL_ERR(xe, !(args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR) && 118 + addr & 0x7)) 119 + return -EINVAL; 120 + 121 + if (!no_engines) { 122 + err = copy_from_user(eci, user_eci, 123 + sizeof(struct drm_xe_engine_class_instance) * 124 + args->num_engines); 125 + if (XE_IOCTL_ERR(xe, err)) 126 + return -EFAULT; 127 + 128 + if (XE_IOCTL_ERR(xe, check_hw_engines(xe, eci, 129 + args->num_engines))) 130 + return -EINVAL; 131 + } 132 + 133 + if (args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR) { 134 + if (XE_IOCTL_ERR(xe, args->vm_id >> 32)) 135 + return -EINVAL; 136 + 137 + vm = xe_vm_lookup(to_xe_file(file), args->vm_id); 138 + if (XE_IOCTL_ERR(xe, !vm)) 139 + return -ENOENT; 140 + 141 + if (XE_IOCTL_ERR(xe, !vm->async_ops.error_capture.addr)) { 142 + xe_vm_put(vm); 143 + return -ENOTSUPP; 144 + } 145 + 146 + addr = vm->async_ops.error_capture.addr; 147 + } 148 + 149 + if (XE_IOCTL_ERR(xe, timeout > MAX_SCHEDULE_TIMEOUT)) 150 + return -EINVAL; 151 + 152 + /* 153 + * FIXME: Very simple implementation at the moment, single wait queue 154 + * for everything. Could be optimized to have a wait queue for every 155 + * hardware engine. Open coding as 'do_compare' can sleep which doesn't 156 + * work with the wait_event_* macros. 157 + */ 158 + if (vm) 159 + add_wait_queue(&vm->async_ops.error_capture.wq, &w_wait); 160 + else 161 + add_wait_queue(&xe->ufence_wq, &w_wait); 162 + for (;;) { 163 + if (vm && xe_vm_is_closed(vm)) { 164 + err = -ENODEV; 165 + break; 166 + } 167 + err = do_compare(addr, args->value, args->mask, args->op); 168 + if (err <= 0) 169 + break; 170 + 171 + if (signal_pending(current)) { 172 + err = -ERESTARTSYS; 173 + break; 174 + } 175 + 176 + if (!timeout) { 177 + err = -ETIME; 178 + break; 179 + } 180 + 181 + timeout = wait_woken(&w_wait, TASK_INTERRUPTIBLE, timeout); 182 + } 183 + if (vm) { 184 + remove_wait_queue(&vm->async_ops.error_capture.wq, &w_wait); 185 + xe_vm_put(vm); 186 + } else { 187 + remove_wait_queue(&xe->ufence_wq, &w_wait); 188 + } 189 + if (XE_IOCTL_ERR(xe, err < 0)) 190 + return err; 191 + else if (XE_IOCTL_ERR(xe, !timeout)) 192 + return -ETIME; 193 + 194 + /* 195 + * Again very simple, return the time in jiffies that has past, may need 196 + * a more precision 197 + */ 198 + if (args->flags & DRM_XE_UFENCE_WAIT_ABSTIME) 199 + args->timeout = args->timeout - timeout; 200 + 201 + return 0; 202 + }

+15

drivers/gpu/drm/xe/xe_wait_user_fence.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_WAIT_USER_FENCE_H_ 7 + #define _XE_WAIT_USER_FENCE_H_ 8 + 9 + struct drm_device; 10 + struct drm_file; 11 + 12 + int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, 13 + struct drm_file *file); 14 + 15 + #endif

+263

drivers/gpu/drm/xe/xe_wopcm.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #include "xe_device.h" 7 + #include "xe_force_wake.h" 8 + #include "xe_gt.h" 9 + #include "xe_guc_reg.h" 10 + #include "xe_mmio.h" 11 + #include "xe_uc_fw.h" 12 + #include "xe_wopcm.h" 13 + 14 + #include "i915_utils.h" 15 + 16 + /** 17 + * DOC: Write Once Protected Content Memory (WOPCM) Layout 18 + * 19 + * The layout of the WOPCM will be fixed after writing to GuC WOPCM size and 20 + * offset registers whose values are calculated and determined by HuC/GuC 21 + * firmware size and set of hardware requirements/restrictions as shown below: 22 + * 23 + * :: 24 + * 25 + * +=========> +====================+ <== WOPCM Top 26 + * ^ | HW contexts RSVD | 27 + * | +===> +====================+ <== GuC WOPCM Top 28 + * | ^ | | 29 + * | | | | 30 + * | | | | 31 + * | GuC | | 32 + * | WOPCM | | 33 + * | Size +--------------------+ 34 + * WOPCM | | GuC FW RSVD | 35 + * | | +--------------------+ 36 + * | | | GuC Stack RSVD | 37 + * | | +------------------- + 38 + * | v | GuC WOPCM RSVD | 39 + * | +===> +====================+ <== GuC WOPCM base 40 + * | | WOPCM RSVD | 41 + * | +------------------- + <== HuC Firmware Top 42 + * v | HuC FW | 43 + * +=========> +====================+ <== WOPCM Base 44 + * 45 + * GuC accessible WOPCM starts at GuC WOPCM base and ends at GuC WOPCM top. 46 + * The top part of the WOPCM is reserved for hardware contexts (e.g. RC6 47 + * context). 48 + */ 49 + 50 + /* Default WOPCM size is 2MB from Gen11, 1MB on previous platforms */ 51 + #define DGFX_WOPCM_SIZE SZ_4M /* FIXME: Larger size require 52 + for 2 tile PVC, do a proper 53 + probe sooner or later */ 54 + #define MTL_WOPCM_SIZE SZ_4M /* FIXME: Larger size require 55 + for MTL, do a proper probe 56 + sooner or later */ 57 + #define GEN11_WOPCM_SIZE SZ_2M 58 + /* 16KB WOPCM (RSVD WOPCM) is reserved from HuC firmware top. */ 59 + #define WOPCM_RESERVED_SIZE SZ_16K 60 + 61 + /* 16KB reserved at the beginning of GuC WOPCM. */ 62 + #define GUC_WOPCM_RESERVED SZ_16K 63 + /* 8KB from GUC_WOPCM_RESERVED is reserved for GuC stack. */ 64 + #define GUC_WOPCM_STACK_RESERVED SZ_8K 65 + 66 + /* GuC WOPCM Offset value needs to be aligned to 16KB. */ 67 + #define GUC_WOPCM_OFFSET_ALIGNMENT (1UL << GUC_WOPCM_OFFSET_SHIFT) 68 + 69 + /* 36KB WOPCM reserved at the end of WOPCM on GEN11. */ 70 + #define GEN11_WOPCM_HW_CTX_RESERVED (SZ_32K + SZ_4K) 71 + 72 + static inline struct xe_gt *wopcm_to_gt(struct xe_wopcm *wopcm) 73 + { 74 + return container_of(wopcm, struct xe_gt, uc.wopcm); 75 + } 76 + 77 + static inline struct xe_device *wopcm_to_xe(struct xe_wopcm *wopcm) 78 + { 79 + return gt_to_xe(wopcm_to_gt(wopcm)); 80 + } 81 + 82 + static u32 context_reserved_size(void) 83 + { 84 + return GEN11_WOPCM_HW_CTX_RESERVED; 85 + } 86 + 87 + static bool __check_layout(struct xe_device *xe, u32 wopcm_size, 88 + u32 guc_wopcm_base, u32 guc_wopcm_size, 89 + u32 guc_fw_size, u32 huc_fw_size) 90 + { 91 + const u32 ctx_rsvd = context_reserved_size(); 92 + u32 size; 93 + 94 + size = wopcm_size - ctx_rsvd; 95 + if (unlikely(range_overflows(guc_wopcm_base, guc_wopcm_size, size))) { 96 + drm_err(&xe->drm, 97 + "WOPCM: invalid GuC region layout: %uK + %uK > %uK\n", 98 + guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K, 99 + size / SZ_1K); 100 + return false; 101 + } 102 + 103 + size = guc_fw_size + GUC_WOPCM_RESERVED + GUC_WOPCM_STACK_RESERVED; 104 + if (unlikely(guc_wopcm_size < size)) { 105 + drm_err(&xe->drm, "WOPCM: no space for %s: %uK < %uK\n", 106 + xe_uc_fw_type_repr(XE_UC_FW_TYPE_GUC), 107 + guc_wopcm_size / SZ_1K, size / SZ_1K); 108 + return false; 109 + } 110 + 111 + size = huc_fw_size + WOPCM_RESERVED_SIZE; 112 + if (unlikely(guc_wopcm_base < size)) { 113 + drm_err(&xe->drm, "WOPCM: no space for %s: %uK < %uK\n", 114 + xe_uc_fw_type_repr(XE_UC_FW_TYPE_HUC), 115 + guc_wopcm_base / SZ_1K, size / SZ_1K); 116 + return false; 117 + } 118 + 119 + return true; 120 + } 121 + 122 + static bool __wopcm_regs_locked(struct xe_gt *gt, 123 + u32 *guc_wopcm_base, u32 *guc_wopcm_size) 124 + { 125 + u32 reg_base = xe_mmio_read32(gt, DMA_GUC_WOPCM_OFFSET.reg); 126 + u32 reg_size = xe_mmio_read32(gt, GUC_WOPCM_SIZE.reg); 127 + 128 + if (!(reg_size & GUC_WOPCM_SIZE_LOCKED) || 129 + !(reg_base & GUC_WOPCM_OFFSET_VALID)) 130 + return false; 131 + 132 + *guc_wopcm_base = reg_base & GUC_WOPCM_OFFSET_MASK; 133 + *guc_wopcm_size = reg_size & GUC_WOPCM_SIZE_MASK; 134 + return true; 135 + } 136 + 137 + static int __wopcm_init_regs(struct xe_device *xe, struct xe_gt *gt, 138 + struct xe_wopcm *wopcm) 139 + { 140 + u32 base = wopcm->guc.base; 141 + u32 size = wopcm->guc.size; 142 + u32 huc_agent = xe_uc_fw_is_disabled(&gt->uc.huc.fw) ? 0 : 143 + HUC_LOADING_AGENT_GUC; 144 + u32 mask; 145 + int err; 146 + 147 + XE_BUG_ON(!(base & GUC_WOPCM_OFFSET_MASK)); 148 + XE_BUG_ON(base & ~GUC_WOPCM_OFFSET_MASK); 149 + XE_BUG_ON(!(size & GUC_WOPCM_SIZE_MASK)); 150 + XE_BUG_ON(size & ~GUC_WOPCM_SIZE_MASK); 151 + 152 + mask = GUC_WOPCM_SIZE_MASK | GUC_WOPCM_SIZE_LOCKED; 153 + err = xe_mmio_write32_and_verify(gt, GUC_WOPCM_SIZE.reg, size, mask, 154 + size | GUC_WOPCM_SIZE_LOCKED); 155 + if (err) 156 + goto err_out; 157 + 158 + mask = GUC_WOPCM_OFFSET_MASK | GUC_WOPCM_OFFSET_VALID | huc_agent; 159 + err = xe_mmio_write32_and_verify(gt, DMA_GUC_WOPCM_OFFSET.reg, 160 + base | huc_agent, mask, 161 + base | huc_agent | 162 + GUC_WOPCM_OFFSET_VALID); 163 + if (err) 164 + goto err_out; 165 + 166 + return 0; 167 + 168 + err_out: 169 + drm_notice(&xe->drm, "Failed to init uC WOPCM registers!\n"); 170 + drm_notice(&xe->drm, "%s(%#x)=%#x\n", "DMA_GUC_WOPCM_OFFSET", 171 + DMA_GUC_WOPCM_OFFSET.reg, 172 + xe_mmio_read32(gt, DMA_GUC_WOPCM_OFFSET.reg)); 173 + drm_notice(&xe->drm, "%s(%#x)=%#x\n", "GUC_WOPCM_SIZE", 174 + GUC_WOPCM_SIZE.reg, 175 + xe_mmio_read32(gt, GUC_WOPCM_SIZE.reg)); 176 + 177 + return err; 178 + } 179 + 180 + u32 xe_wopcm_size(struct xe_device *xe) 181 + { 182 + return IS_DGFX(xe) ? DGFX_WOPCM_SIZE : 183 + xe->info.platform == XE_METEORLAKE ? MTL_WOPCM_SIZE : 184 + GEN11_WOPCM_SIZE; 185 + } 186 + 187 + /** 188 + * xe_wopcm_init() - Initialize the WOPCM structure. 189 + * @wopcm: pointer to xe_wopcm. 190 + * 191 + * This function will partition WOPCM space based on GuC and HuC firmware sizes 192 + * and will allocate max remaining for use by GuC. This function will also 193 + * enforce platform dependent hardware restrictions on GuC WOPCM offset and 194 + * size. It will fail the WOPCM init if any of these checks fail, so that the 195 + * following WOPCM registers setup and GuC firmware uploading would be aborted. 196 + */ 197 + int xe_wopcm_init(struct xe_wopcm *wopcm) 198 + { 199 + struct xe_device *xe = wopcm_to_xe(wopcm); 200 + struct xe_gt *gt = wopcm_to_gt(wopcm); 201 + u32 guc_fw_size = xe_uc_fw_get_upload_size(&gt->uc.guc.fw); 202 + u32 huc_fw_size = xe_uc_fw_get_upload_size(&gt->uc.huc.fw); 203 + u32 ctx_rsvd = context_reserved_size(); 204 + u32 guc_wopcm_base; 205 + u32 guc_wopcm_size; 206 + bool locked; 207 + int ret = 0; 208 + 209 + if (!guc_fw_size) 210 + return -EINVAL; 211 + 212 + wopcm->size = xe_wopcm_size(xe); 213 + drm_dbg(&xe->drm, "WOPCM: %uK\n", wopcm->size / SZ_1K); 214 + 215 + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); 216 + XE_BUG_ON(guc_fw_size >= wopcm->size); 217 + XE_BUG_ON(huc_fw_size >= wopcm->size); 218 + XE_BUG_ON(ctx_rsvd + WOPCM_RESERVED_SIZE >= wopcm->size); 219 + 220 + locked = __wopcm_regs_locked(gt, &guc_wopcm_base, &guc_wopcm_size); 221 + if (locked) { 222 + drm_dbg(&xe->drm, "GuC WOPCM is already locked [%uK, %uK)\n", 223 + guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K); 224 + goto check; 225 + } 226 + 227 + /* 228 + * Aligned value of guc_wopcm_base will determine available WOPCM space 229 + * for HuC firmware and mandatory reserved area. 230 + */ 231 + guc_wopcm_base = huc_fw_size + WOPCM_RESERVED_SIZE; 232 + guc_wopcm_base = ALIGN(guc_wopcm_base, GUC_WOPCM_OFFSET_ALIGNMENT); 233 + 234 + /* 235 + * Need to clamp guc_wopcm_base now to make sure the following math is 236 + * correct. Formal check of whole WOPCM layout will be done below. 237 + */ 238 + guc_wopcm_base = min(guc_wopcm_base, wopcm->size - ctx_rsvd); 239 + 240 + /* Aligned remainings of usable WOPCM space can be assigned to GuC. */ 241 + guc_wopcm_size = wopcm->size - ctx_rsvd - guc_wopcm_base; 242 + guc_wopcm_size &= GUC_WOPCM_SIZE_MASK; 243 + 244 + drm_dbg(&xe->drm, "Calculated GuC WOPCM [%uK, %uK)\n", 245 + guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K); 246 + 247 + check: 248 + if (__check_layout(xe, wopcm->size, guc_wopcm_base, guc_wopcm_size, 249 + guc_fw_size, huc_fw_size)) { 250 + wopcm->guc.base = guc_wopcm_base; 251 + wopcm->guc.size = guc_wopcm_size; 252 + XE_BUG_ON(!wopcm->guc.base); 253 + XE_BUG_ON(!wopcm->guc.size); 254 + } else { 255 + drm_notice(&xe->drm, "Unsuccessful WOPCM partitioning\n"); 256 + return -E2BIG; 257 + } 258 + 259 + if (!locked) 260 + ret = __wopcm_init_regs(xe, gt, wopcm); 261 + 262 + return ret; 263 + }

+16

drivers/gpu/drm/xe/xe_wopcm.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_WOPCM_H_ 7 + #define _XE_WOPCM_H_ 8 + 9 + #include "xe_wopcm_types.h" 10 + 11 + struct xe_device; 12 + 13 + int xe_wopcm_init(struct xe_wopcm *wopcm); 14 + u32 xe_wopcm_size(struct xe_device *xe); 15 + 16 + #endif

+26

drivers/gpu/drm/xe/xe_wopcm_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_WOPCM_TYPES_H_ 7 + #define _XE_WOPCM_TYPES_H_ 8 + 9 + #include <linux/types.h> 10 + 11 + /** 12 + * struct xe_wopcm - Overall WOPCM info and WOPCM regions. 13 + */ 14 + struct xe_wopcm { 15 + /** @size: Size of overall WOPCM */ 16 + u32 size; 17 + /** @guc: GuC WOPCM Region info */ 18 + struct { 19 + /** @base: GuC WOPCM base which is offset from WOPCM base */ 20 + u32 base; 21 + /** @size: Size of the GuC WOPCM region */ 22 + u32 size; 23 + } guc; 24 + }; 25 + 26 + #endif

+195

include/drm/xe_pciids.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_PCIIDS_H_ 7 + #define _XE_PCIIDS_H_ 8 + 9 + /* 10 + * Lists below can be turned into initializers for a struct pci_device_id 11 + * by defining INTEL_VGA_DEVICE: 12 + * 13 + * #define INTEL_VGA_DEVICE(id, info) { \ 14 + * 0x8086, id, \ 15 + * ~0, ~0, \ 16 + * 0x030000, 0xff0000, \ 17 + * (unsigned long) info } 18 + * 19 + * And then calling like: 20 + * 21 + * XE_TGL_12_GT1_IDS(INTEL_VGA_DEVICE, ## __VA_ARGS__) 22 + * 23 + * To turn them into something else, just provide a different macro passed as 24 + * first argument. 25 + */ 26 + 27 + /* TGL */ 28 + #define XE_TGL_GT1_IDS(MACRO__, ...) \ 29 + MACRO__(0x9A60, ## __VA_ARGS__), \ 30 + MACRO__(0x9A68, ## __VA_ARGS__), \ 31 + MACRO__(0x9A70, ## __VA_ARGS__) 32 + 33 + #define XE_TGL_GT2_IDS(MACRO__, ...) \ 34 + MACRO__(0x9A40, ## __VA_ARGS__), \ 35 + MACRO__(0x9A49, ## __VA_ARGS__), \ 36 + MACRO__(0x9A59, ## __VA_ARGS__), \ 37 + MACRO__(0x9A78, ## __VA_ARGS__), \ 38 + MACRO__(0x9AC0, ## __VA_ARGS__), \ 39 + MACRO__(0x9AC9, ## __VA_ARGS__), \ 40 + MACRO__(0x9AD9, ## __VA_ARGS__), \ 41 + MACRO__(0x9AF8, ## __VA_ARGS__) 42 + 43 + #define XE_TGL_IDS(MACRO__, ...) \ 44 + XE_TGL_GT1_IDS(MACRO__, ...), \ 45 + XE_TGL_GT2_IDS(MACRO__, ...) 46 + 47 + /* RKL */ 48 + #define XE_RKL_IDS(MACRO__, ...) \ 49 + MACRO__(0x4C80, ## __VA_ARGS__), \ 50 + MACRO__(0x4C8A, ## __VA_ARGS__), \ 51 + MACRO__(0x4C8B, ## __VA_ARGS__), \ 52 + MACRO__(0x4C8C, ## __VA_ARGS__), \ 53 + MACRO__(0x4C90, ## __VA_ARGS__), \ 54 + MACRO__(0x4C9A, ## __VA_ARGS__) 55 + 56 + /* DG1 */ 57 + #define XE_DG1_IDS(MACRO__, ...) \ 58 + MACRO__(0x4905, ## __VA_ARGS__), \ 59 + MACRO__(0x4906, ## __VA_ARGS__), \ 60 + MACRO__(0x4907, ## __VA_ARGS__), \ 61 + MACRO__(0x4908, ## __VA_ARGS__), \ 62 + MACRO__(0x4909, ## __VA_ARGS__) 63 + 64 + /* ADL-S */ 65 + #define XE_ADLS_IDS(MACRO__, ...) \ 66 + MACRO__(0x4680, ## __VA_ARGS__), \ 67 + MACRO__(0x4682, ## __VA_ARGS__), \ 68 + MACRO__(0x4688, ## __VA_ARGS__), \ 69 + MACRO__(0x468A, ## __VA_ARGS__), \ 70 + MACRO__(0x4690, ## __VA_ARGS__), \ 71 + MACRO__(0x4692, ## __VA_ARGS__), \ 72 + MACRO__(0x4693, ## __VA_ARGS__) 73 + 74 + /* ADL-P */ 75 + #define XE_ADLP_IDS(MACRO__, ...) \ 76 + MACRO__(0x46A0, ## __VA_ARGS__), \ 77 + MACRO__(0x46A1, ## __VA_ARGS__), \ 78 + MACRO__(0x46A2, ## __VA_ARGS__), \ 79 + MACRO__(0x46A3, ## __VA_ARGS__), \ 80 + MACRO__(0x46A6, ## __VA_ARGS__), \ 81 + MACRO__(0x46A8, ## __VA_ARGS__), \ 82 + MACRO__(0x46AA, ## __VA_ARGS__), \ 83 + MACRO__(0x462A, ## __VA_ARGS__), \ 84 + MACRO__(0x4626, ## __VA_ARGS__), \ 85 + MACRO__(0x4628, ## __VA_ARGS__), \ 86 + MACRO__(0x46B0, ## __VA_ARGS__), \ 87 + MACRO__(0x46B1, ## __VA_ARGS__), \ 88 + MACRO__(0x46B2, ## __VA_ARGS__), \ 89 + MACRO__(0x46B3, ## __VA_ARGS__), \ 90 + MACRO__(0x46C0, ## __VA_ARGS__), \ 91 + MACRO__(0x46C1, ## __VA_ARGS__), \ 92 + MACRO__(0x46C2, ## __VA_ARGS__), \ 93 + MACRO__(0x46C3, ## __VA_ARGS__) 94 + 95 + /* ADL-N */ 96 + #define XE_ADLN_IDS(MACRO__, ...) \ 97 + MACRO__(0x46D0, ## __VA_ARGS__), \ 98 + MACRO__(0x46D1, ## __VA_ARGS__), \ 99 + MACRO__(0x46D2, ## __VA_ARGS__) 100 + 101 + /* RPL-S */ 102 + #define XE_RPLS_IDS(MACRO__, ...) \ 103 + MACRO__(0xA780, ## __VA_ARGS__), \ 104 + MACRO__(0xA781, ## __VA_ARGS__), \ 105 + MACRO__(0xA782, ## __VA_ARGS__), \ 106 + MACRO__(0xA783, ## __VA_ARGS__), \ 107 + MACRO__(0xA788, ## __VA_ARGS__), \ 108 + MACRO__(0xA789, ## __VA_ARGS__), \ 109 + MACRO__(0xA78A, ## __VA_ARGS__), \ 110 + MACRO__(0xA78B, ## __VA_ARGS__) 111 + 112 + /* RPL-U */ 113 + #define XE_RPLU_IDS(MACRO__, ...) \ 114 + MACRO__(0xA721, ## __VA_ARGS__), \ 115 + MACRO__(0xA7A1, ## __VA_ARGS__), \ 116 + MACRO__(0xA7A9, ## __VA_ARGS__) 117 + 118 + /* RPL-P */ 119 + #define XE_RPLP_IDS(MACRO__, ...) \ 120 + MACRO__(0xA720, ## __VA_ARGS__), \ 121 + MACRO__(0xA7A0, ## __VA_ARGS__), \ 122 + MACRO__(0xA7A8, ## __VA_ARGS__) 123 + 124 + /* DG2 */ 125 + #define XE_DG2_G10_IDS(MACRO__, ...) \ 126 + MACRO__(0x5690, ## __VA_ARGS__), \ 127 + MACRO__(0x5691, ## __VA_ARGS__), \ 128 + MACRO__(0x5692, ## __VA_ARGS__), \ 129 + MACRO__(0x56A0, ## __VA_ARGS__), \ 130 + MACRO__(0x56A1, ## __VA_ARGS__), \ 131 + MACRO__(0x56A2, ## __VA_ARGS__) 132 + 133 + #define XE_DG2_G11_IDS(MACRO__, ...) \ 134 + MACRO__(0x5693, ## __VA_ARGS__), \ 135 + MACRO__(0x5694, ## __VA_ARGS__), \ 136 + MACRO__(0x5695, ## __VA_ARGS__), \ 137 + MACRO__(0x5698, ## __VA_ARGS__), \ 138 + MACRO__(0x56A5, ## __VA_ARGS__), \ 139 + MACRO__(0x56A6, ## __VA_ARGS__), \ 140 + MACRO__(0x56B0, ## __VA_ARGS__), \ 141 + MACRO__(0x56B1, ## __VA_ARGS__) 142 + 143 + #define XE_DG2_G12_IDS(MACRO__, ...) \ 144 + MACRO__(0x5696, ## __VA_ARGS__), \ 145 + MACRO__(0x5697, ## __VA_ARGS__), \ 146 + MACRO__(0x56A3, ## __VA_ARGS__), \ 147 + MACRO__(0x56A4, ## __VA_ARGS__), \ 148 + MACRO__(0x56B2, ## __VA_ARGS__), \ 149 + MACRO__(0x56B3, ## __VA_ARGS__) 150 + 151 + #define XE_DG2_IDS(MACRO__, ...) \ 152 + XE_DG2_G10_IDS(MACRO__, ## __VA_ARGS__),\ 153 + XE_DG2_G11_IDS(MACRO__, ## __VA_ARGS__),\ 154 + XE_DG2_G12_IDS(MACRO__, ## __VA_ARGS__) 155 + 156 + #define XE_ATS_M150_IDS(MACRO__, ...) \ 157 + MACRO__(0x56C0, ## __VA_ARGS__) 158 + 159 + #define XE_ATS_M75_IDS(MACRO__, ...) \ 160 + MACRO__(0x56C1, ## __VA_ARGS__) 161 + 162 + #define XE_ATS_M_IDS(MACRO__, ...) \ 163 + XE_ATS_M150_IDS(MACRO__, ## __VA_ARGS__),\ 164 + XE_ATS_M75_IDS(MACRO__, ## __VA_ARGS__) 165 + 166 + /* MTL */ 167 + #define XE_MTL_M_IDS(MACRO__, ...) \ 168 + MACRO__(0x7D40, ## __VA_ARGS__), \ 169 + MACRO__(0x7D43, ## __VA_ARGS__), \ 170 + MACRO__(0x7DC0, ## __VA_ARGS__) 171 + 172 + #define XE_MTL_P_IDS(MACRO__, ...) \ 173 + MACRO__(0x7D45, ## __VA_ARGS__), \ 174 + MACRO__(0x7D47, ## __VA_ARGS__), \ 175 + MACRO__(0x7D50, ## __VA_ARGS__), \ 176 + MACRO__(0x7D55, ## __VA_ARGS__), \ 177 + MACRO__(0x7DC5, ## __VA_ARGS__), \ 178 + MACRO__(0x7DD0, ## __VA_ARGS__), \ 179 + MACRO__(0x7DD5, ## __VA_ARGS__) 180 + 181 + #define XE_MTL_S_IDS(MACRO__, ...) \ 182 + MACRO__(0x7D60, ## __VA_ARGS__), \ 183 + MACRO__(0x7DE0, ## __VA_ARGS__) 184 + 185 + #define XE_ARL_IDS(MACRO__, ...) \ 186 + MACRO__(0x7D66, ## __VA_ARGS__), \ 187 + MACRO__(0x7D76, ## __VA_ARGS__) 188 + 189 + #define XE_MTL_IDS(MACRO__, ...) \ 190 + XE_MTL_M_IDS(MACRO__, ## __VA_ARGS__), \ 191 + XE_MTL_P_IDS(MACRO__, ## __VA_ARGS__), \ 192 + XE_MTL_S_IDS(MACRO__, ## __VA_ARGS__), \ 193 + XE_ARL_IDS(MACRO__, ## __VA_ARGS__) 194 + 195 + #endif

+787

include/uapi/drm/xe_drm.h

··· 1 + /* 2 + * Copyright 2021 Intel Corporation. All Rights Reserved. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the 6 + * "Software"), to deal in the Software without restriction, including 7 + * without limitation the rights to use, copy, modify, merge, publish, 8 + * distribute, sub license, and/or sell copies of the Software, and to 9 + * permit persons to whom the Software is furnished to do so, subject to 10 + * the following conditions: 11 + * 12 + * The above copyright notice and this permission notice (including the 13 + * next paragraph) shall be included in all copies or substantial portions 14 + * of the Software. 15 + * 16 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 17 + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 19 + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 20 + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 21 + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22 + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 + * 24 + */ 25 + 26 + #ifndef _UAPI_XE_DRM_H_ 27 + #define _UAPI_XE_DRM_H_ 28 + 29 + #include "drm.h" 30 + 31 + #if defined(__cplusplus) 32 + extern "C" { 33 + #endif 34 + 35 + /* Please note that modifications to all structs defined here are 36 + * subject to backwards-compatibility constraints. 37 + */ 38 + 39 + /** 40 + * struct i915_user_extension - Base class for defining a chain of extensions 41 + * 42 + * Many interfaces need to grow over time. In most cases we can simply 43 + * extend the struct and have userspace pass in more data. Another option, 44 + * as demonstrated by Vulkan's approach to providing extensions for forward 45 + * and backward compatibility, is to use a list of optional structs to 46 + * provide those extra details. 47 + * 48 + * The key advantage to using an extension chain is that it allows us to 49 + * redefine the interface more easily than an ever growing struct of 50 + * increasing complexity, and for large parts of that interface to be 51 + * entirely optional. The downside is more pointer chasing; chasing across 52 + * the __user boundary with pointers encapsulated inside u64. 53 + * 54 + * Example chaining: 55 + * 56 + * .. code-block:: C 57 + * 58 + * struct i915_user_extension ext3 { 59 + * .next_extension = 0, // end 60 + * .name = ..., 61 + * }; 62 + * struct i915_user_extension ext2 { 63 + * .next_extension = (uintptr_t)&ext3, 64 + * .name = ..., 65 + * }; 66 + * struct i915_user_extension ext1 { 67 + * .next_extension = (uintptr_t)&ext2, 68 + * .name = ..., 69 + * }; 70 + * 71 + * Typically the struct i915_user_extension would be embedded in some uAPI 72 + * struct, and in this case we would feed it the head of the chain(i.e ext1), 73 + * which would then apply all of the above extensions. 74 + * 75 + */ 76 + struct xe_user_extension { 77 + /** 78 + * @next_extension: 79 + * 80 + * Pointer to the next struct i915_user_extension, or zero if the end. 81 + */ 82 + __u64 next_extension; 83 + /** 84 + * @name: Name of the extension. 85 + * 86 + * Note that the name here is just some integer. 87 + * 88 + * Also note that the name space for this is not global for the whole 89 + * driver, but rather its scope/meaning is limited to the specific piece 90 + * of uAPI which has embedded the struct i915_user_extension. 91 + */ 92 + __u32 name; 93 + /** 94 + * @flags: MBZ 95 + * 96 + * All undefined bits must be zero. 97 + */ 98 + __u32 pad; 99 + }; 100 + 101 + /* 102 + * i915 specific ioctls. 103 + * 104 + * The device specific ioctl range is [DRM_COMMAND_BASE, DRM_COMMAND_END) ie 105 + * [0x40, 0xa0) (a0 is excluded). The numbers below are defined as offset 106 + * against DRM_COMMAND_BASE and should be between [0x0, 0x60). 107 + */ 108 + #define DRM_XE_DEVICE_QUERY 0x00 109 + #define DRM_XE_GEM_CREATE 0x01 110 + #define DRM_XE_GEM_MMAP_OFFSET 0x02 111 + #define DRM_XE_VM_CREATE 0x03 112 + #define DRM_XE_VM_DESTROY 0x04 113 + #define DRM_XE_VM_BIND 0x05 114 + #define DRM_XE_ENGINE_CREATE 0x06 115 + #define DRM_XE_ENGINE_DESTROY 0x07 116 + #define DRM_XE_EXEC 0x08 117 + #define DRM_XE_MMIO 0x09 118 + #define DRM_XE_ENGINE_SET_PROPERTY 0x0a 119 + #define DRM_XE_WAIT_USER_FENCE 0x0b 120 + #define DRM_XE_VM_MADVISE 0x0c 121 + 122 + /* Must be kept compact -- no holes */ 123 + #define DRM_IOCTL_XE_DEVICE_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_DEVICE_QUERY, struct drm_xe_device_query) 124 + #define DRM_IOCTL_XE_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_GEM_CREATE, struct drm_xe_gem_create) 125 + #define DRM_IOCTL_XE_GEM_MMAP_OFFSET DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_GEM_MMAP_OFFSET, struct drm_xe_gem_mmap_offset) 126 + #define DRM_IOCTL_XE_VM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_VM_CREATE, struct drm_xe_vm_create) 127 + #define DRM_IOCTL_XE_VM_DESTROY DRM_IOW( DRM_COMMAND_BASE + DRM_XE_VM_DESTROY, struct drm_xe_vm_destroy) 128 + #define DRM_IOCTL_XE_VM_BIND DRM_IOW( DRM_COMMAND_BASE + DRM_XE_VM_BIND, struct drm_xe_vm_bind) 129 + #define DRM_IOCTL_XE_ENGINE_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_ENGINE_CREATE, struct drm_xe_engine_create) 130 + #define DRM_IOCTL_XE_ENGINE_DESTROY DRM_IOW( DRM_COMMAND_BASE + DRM_XE_ENGINE_DESTROY, struct drm_xe_engine_destroy) 131 + #define DRM_IOCTL_XE_EXEC DRM_IOW( DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec) 132 + #define DRM_IOCTL_XE_MMIO DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_MMIO, struct drm_xe_mmio) 133 + #define DRM_IOCTL_XE_ENGINE_SET_PROPERTY DRM_IOW( DRM_COMMAND_BASE + DRM_XE_ENGINE_SET_PROPERTY, struct drm_xe_engine_set_property) 134 + #define DRM_IOCTL_XE_WAIT_USER_FENCE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence) 135 + #define DRM_IOCTL_XE_VM_MADVISE DRM_IOW( DRM_COMMAND_BASE + DRM_XE_VM_MADVISE, struct drm_xe_vm_madvise) 136 + 137 + struct drm_xe_engine_class_instance { 138 + __u16 engine_class; 139 + 140 + #define DRM_XE_ENGINE_CLASS_RENDER 0 141 + #define DRM_XE_ENGINE_CLASS_COPY 1 142 + #define DRM_XE_ENGINE_CLASS_VIDEO_DECODE 2 143 + #define DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE 3 144 + #define DRM_XE_ENGINE_CLASS_COMPUTE 4 145 + /* 146 + * Kernel only class (not actual hardware engine class). Used for 147 + * creating ordered queues of VM bind operations. 148 + */ 149 + #define DRM_XE_ENGINE_CLASS_VM_BIND 5 150 + 151 + __u16 engine_instance; 152 + __u16 gt_id; 153 + }; 154 + 155 + #define XE_MEM_REGION_CLASS_SYSMEM 0 156 + #define XE_MEM_REGION_CLASS_VRAM 1 157 + 158 + struct drm_xe_query_mem_usage { 159 + __u32 num_regions; 160 + __u32 pad; 161 + 162 + struct drm_xe_query_mem_region { 163 + __u16 mem_class; 164 + __u16 instance; /* unique ID even among different classes */ 165 + __u32 pad; 166 + __u32 min_page_size; 167 + __u32 max_page_size; 168 + __u64 total_size; 169 + __u64 used; 170 + __u64 reserved[8]; 171 + } regions[]; 172 + }; 173 + 174 + struct drm_xe_query_config { 175 + __u32 num_params; 176 + __u32 pad; 177 + #define XE_QUERY_CONFIG_REV_AND_DEVICE_ID 0 178 + #define XE_QUERY_CONFIG_FLAGS 1 179 + #define XE_QUERY_CONFIG_FLAGS_HAS_VRAM (0x1 << 0) 180 + #define XE_QUERY_CONFIG_FLAGS_USE_GUC (0x1 << 1) 181 + #define XE_QUERY_CONFIG_MIN_ALIGNEMENT 2 182 + #define XE_QUERY_CONFIG_VA_BITS 3 183 + #define XE_QUERY_CONFIG_GT_COUNT 4 184 + #define XE_QUERY_CONFIG_MEM_REGION_COUNT 5 185 + #define XE_QUERY_CONFIG_NUM_PARAM XE_QUERY_CONFIG_MEM_REGION_COUNT + 1 186 + __u64 info[]; 187 + }; 188 + 189 + struct drm_xe_query_gts { 190 + __u32 num_gt; 191 + __u32 pad; 192 + 193 + /* 194 + * TODO: Perhaps info about every mem region relative to this GT? e.g. 195 + * bandwidth between this GT and remote region? 196 + */ 197 + 198 + struct drm_xe_query_gt { 199 + #define XE_QUERY_GT_TYPE_MAIN 0 200 + #define XE_QUERY_GT_TYPE_REMOTE 1 201 + #define XE_QUERY_GT_TYPE_MEDIA 2 202 + __u16 type; 203 + __u16 instance; 204 + __u32 clock_freq; 205 + __u64 features; 206 + __u64 native_mem_regions; /* bit mask of instances from drm_xe_query_mem_usage */ 207 + __u64 slow_mem_regions; /* bit mask of instances from drm_xe_query_mem_usage */ 208 + __u64 inaccessible_mem_regions; /* bit mask of instances from drm_xe_query_mem_usage */ 209 + __u64 reserved[8]; 210 + } gts[]; 211 + }; 212 + 213 + struct drm_xe_query_topology_mask { 214 + /** @gt_id: GT ID the mask is associated with */ 215 + __u16 gt_id; 216 + 217 + /** @type: type of mask */ 218 + __u16 type; 219 + #define XE_TOPO_DSS_GEOMETRY (1 << 0) 220 + #define XE_TOPO_DSS_COMPUTE (1 << 1) 221 + #define XE_TOPO_EU_PER_DSS (1 << 2) 222 + 223 + /** @num_bytes: number of bytes in requested mask */ 224 + __u32 num_bytes; 225 + 226 + /** @mask: little-endian mask of @num_bytes */ 227 + __u8 mask[]; 228 + }; 229 + 230 + struct drm_xe_device_query { 231 + /** @extensions: Pointer to the first extension struct, if any */ 232 + __u64 extensions; 233 + 234 + /** @query: The type of data to query */ 235 + __u32 query; 236 + 237 + #define DRM_XE_DEVICE_QUERY_ENGINES 0 238 + #define DRM_XE_DEVICE_QUERY_MEM_USAGE 1 239 + #define DRM_XE_DEVICE_QUERY_CONFIG 2 240 + #define DRM_XE_DEVICE_QUERY_GTS 3 241 + #define DRM_XE_DEVICE_QUERY_HWCONFIG 4 242 + #define DRM_XE_DEVICE_QUERY_GT_TOPOLOGY 5 243 + 244 + /** @size: Size of the queried data */ 245 + __u32 size; 246 + 247 + /** @data: Queried data is placed here */ 248 + __u64 data; 249 + 250 + /** @reserved: Reserved */ 251 + __u64 reserved[2]; 252 + }; 253 + 254 + struct drm_xe_gem_create { 255 + /** @extensions: Pointer to the first extension struct, if any */ 256 + __u64 extensions; 257 + 258 + /** 259 + * @size: Requested size for the object 260 + * 261 + * The (page-aligned) allocated size for the object will be returned. 262 + */ 263 + __u64 size; 264 + 265 + /** 266 + * @flags: Flags, currently a mask of memory instances of where BO can 267 + * be placed 268 + */ 269 + #define XE_GEM_CREATE_FLAG_DEFER_BACKING (0x1 << 24) 270 + #define XE_GEM_CREATE_FLAG_SCANOUT (0x1 << 25) 271 + __u32 flags; 272 + 273 + /** 274 + * @vm_id: Attached VM, if any 275 + * 276 + * If a VM is specified, this BO must: 277 + * 278 + * 1. Only ever be bound to that VM. 279 + * 280 + * 2. Cannot be exported as a PRIME fd. 281 + */ 282 + __u32 vm_id; 283 + 284 + /** 285 + * @handle: Returned handle for the object. 286 + * 287 + * Object handles are nonzero. 288 + */ 289 + __u32 handle; 290 + 291 + /** @reserved: Reserved */ 292 + __u64 reserved[2]; 293 + }; 294 + 295 + struct drm_xe_gem_mmap_offset { 296 + /** @extensions: Pointer to the first extension struct, if any */ 297 + __u64 extensions; 298 + 299 + /** @handle: Handle for the object being mapped. */ 300 + __u32 handle; 301 + 302 + /** @flags: Must be zero */ 303 + __u32 flags; 304 + 305 + /** @offset: The fake offset to use for subsequent mmap call */ 306 + __u64 offset; 307 + 308 + /** @reserved: Reserved */ 309 + __u64 reserved[2]; 310 + }; 311 + 312 + /** 313 + * struct drm_xe_vm_bind_op_error_capture - format of VM bind op error capture 314 + */ 315 + struct drm_xe_vm_bind_op_error_capture { 316 + /** @error: errno that occured */ 317 + __s32 error; 318 + /** @op: operation that encounter an error */ 319 + __u32 op; 320 + /** @addr: address of bind op */ 321 + __u64 addr; 322 + /** @size: size of bind */ 323 + __u64 size; 324 + }; 325 + 326 + /** struct drm_xe_ext_vm_set_property - VM set property extension */ 327 + struct drm_xe_ext_vm_set_property { 328 + /** @base: base user extension */ 329 + struct xe_user_extension base; 330 + 331 + /** @property: property to set */ 332 + #define XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS 0 333 + __u32 property; 334 + 335 + /** @value: property value */ 336 + __u64 value; 337 + 338 + /** @reserved: Reserved */ 339 + __u64 reserved[2]; 340 + }; 341 + 342 + struct drm_xe_vm_create { 343 + /** @extensions: Pointer to the first extension struct, if any */ 344 + #define XE_VM_EXTENSION_SET_PROPERTY 0 345 + __u64 extensions; 346 + 347 + /** @flags: Flags */ 348 + __u32 flags; 349 + 350 + #define DRM_XE_VM_CREATE_SCRATCH_PAGE (0x1 << 0) 351 + #define DRM_XE_VM_CREATE_COMPUTE_MODE (0x1 << 1) 352 + #define DRM_XE_VM_CREATE_ASYNC_BIND_OPS (0x1 << 2) 353 + #define DRM_XE_VM_CREATE_FAULT_MODE (0x1 << 3) 354 + 355 + /** @vm_id: Returned VM ID */ 356 + __u32 vm_id; 357 + 358 + /** @reserved: Reserved */ 359 + __u64 reserved[2]; 360 + }; 361 + 362 + struct drm_xe_vm_destroy { 363 + /** @vm_id: VM ID */ 364 + __u32 vm_id; 365 + 366 + /** @pad: MBZ */ 367 + __u32 pad; 368 + 369 + /** @reserved: Reserved */ 370 + __u64 reserved[2]; 371 + }; 372 + 373 + struct drm_xe_vm_bind_op { 374 + /** 375 + * @obj: GEM object to operate on, MBZ for MAP_USERPTR, MBZ for UNMAP 376 + */ 377 + __u32 obj; 378 + 379 + union { 380 + /** 381 + * @obj_offset: Offset into the object, MBZ for CLEAR_RANGE, 382 + * ignored for unbind 383 + */ 384 + __u64 obj_offset; 385 + /** @userptr: user pointer to bind on */ 386 + __u64 userptr; 387 + }; 388 + 389 + /** 390 + * @range: Number of bytes from the object to bind to addr, MBZ for UNMAP_ALL 391 + */ 392 + __u64 range; 393 + 394 + /** @addr: Address to operate on, MBZ for UNMAP_ALL */ 395 + __u64 addr; 396 + 397 + /** 398 + * @gt_mask: Mask for which GTs to create binds for, 0 == All GTs, 399 + * only applies to creating new VMAs 400 + */ 401 + __u64 gt_mask; 402 + 403 + /** @op: Operation to perform (lower 16 bits) and flags (upper 16 bits) */ 404 + __u32 op; 405 + 406 + /** @mem_region: Memory region to prefetch VMA to, instance not a mask */ 407 + __u32 region; 408 + 409 + #define XE_VM_BIND_OP_MAP 0x0 410 + #define XE_VM_BIND_OP_UNMAP 0x1 411 + #define XE_VM_BIND_OP_MAP_USERPTR 0x2 412 + #define XE_VM_BIND_OP_RESTART 0x3 413 + #define XE_VM_BIND_OP_UNMAP_ALL 0x4 414 + #define XE_VM_BIND_OP_PREFETCH 0x5 415 + 416 + #define XE_VM_BIND_FLAG_READONLY (0x1 << 16) 417 + /* 418 + * A bind ops completions are always async, hence the support for out 419 + * sync. This flag indicates the allocation of the memory for new page 420 + * tables and the job to program the pages tables is asynchronous 421 + * relative to the IOCTL. That part of a bind operation can fail under 422 + * memory pressure, the job in practice can't fail unless the system is 423 + * totally shot. 424 + * 425 + * If this flag is clear and the IOCTL doesn't return an error, in 426 + * practice the bind op is good and will complete. 427 + * 428 + * If this flag is set and doesn't return return an error, the bind op 429 + * can still fail and recovery is needed. If configured, the bind op that 430 + * caused the error will be captured in drm_xe_vm_bind_op_error_capture. 431 + * Once the user sees the error (via a ufence + 432 + * XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS), it should free memory 433 + * via non-async unbinds, and then restart all queue'd async binds op via 434 + * XE_VM_BIND_OP_RESTART. Or alternatively the user should destroy the 435 + * VM. 436 + * 437 + * This flag is only allowed when DRM_XE_VM_CREATE_ASYNC_BIND_OPS is 438 + * configured in the VM and must be set if the VM is configured with 439 + * DRM_XE_VM_CREATE_ASYNC_BIND_OPS and not in an error state. 440 + */ 441 + #define XE_VM_BIND_FLAG_ASYNC (0x1 << 17) 442 + /* 443 + * Valid on a faulting VM only, do the MAP operation immediately rather 444 + * than differing the MAP to the page fault handler. 445 + */ 446 + #define XE_VM_BIND_FLAG_IMMEDIATE (0x1 << 18) 447 + 448 + /** @reserved: Reserved */ 449 + __u64 reserved[2]; 450 + }; 451 + 452 + struct drm_xe_vm_bind { 453 + /** @extensions: Pointer to the first extension struct, if any */ 454 + __u64 extensions; 455 + 456 + /** @vm_id: The ID of the VM to bind to */ 457 + __u32 vm_id; 458 + 459 + /** 460 + * @engine_id: engine_id, must be of class DRM_XE_ENGINE_CLASS_VM_BIND 461 + * and engine must have same vm_id. If zero, the default VM bind engine 462 + * is used. 463 + */ 464 + __u32 engine_id; 465 + 466 + /** @num_binds: number of binds in this IOCTL */ 467 + __u32 num_binds; 468 + 469 + union { 470 + /** @bind: used if num_binds == 1 */ 471 + struct drm_xe_vm_bind_op bind; 472 + /** 473 + * @vector_of_binds: userptr to array of struct 474 + * drm_xe_vm_bind_op if num_binds > 1 475 + */ 476 + __u64 vector_of_binds; 477 + }; 478 + 479 + /** @num_syncs: amount of syncs to wait on */ 480 + __u32 num_syncs; 481 + 482 + /** @syncs: pointer to struct drm_xe_sync array */ 483 + __u64 syncs; 484 + 485 + /** @reserved: Reserved */ 486 + __u64 reserved[2]; 487 + }; 488 + 489 + /** struct drm_xe_ext_engine_set_property - engine set property extension */ 490 + struct drm_xe_ext_engine_set_property { 491 + /** @base: base user extension */ 492 + struct xe_user_extension base; 493 + 494 + /** @property: property to set */ 495 + __u32 property; 496 + 497 + /** @value: property value */ 498 + __u64 value; 499 + }; 500 + 501 + /** 502 + * struct drm_xe_engine_set_property - engine set property 503 + * 504 + * Same namespace for extensions as drm_xe_engine_create 505 + */ 506 + struct drm_xe_engine_set_property { 507 + /** @extensions: Pointer to the first extension struct, if any */ 508 + __u64 extensions; 509 + 510 + /** @engine_id: Engine ID */ 511 + __u32 engine_id; 512 + 513 + /** @property: property to set */ 514 + #define XE_ENGINE_PROPERTY_PRIORITY 0 515 + #define XE_ENGINE_PROPERTY_TIMESLICE 1 516 + #define XE_ENGINE_PROPERTY_PREEMPTION_TIMEOUT 2 517 + /* 518 + * Long running or ULLS engine mode. DMA fences not allowed in this 519 + * mode. Must match the value of DRM_XE_VM_CREATE_COMPUTE_MODE, serves 520 + * as a sanity check the UMD knows what it is doing. Can only be set at 521 + * engine create time. 522 + */ 523 + #define XE_ENGINE_PROPERTY_COMPUTE_MODE 3 524 + #define XE_ENGINE_PROPERTY_PERSISTENCE 4 525 + #define XE_ENGINE_PROPERTY_JOB_TIMEOUT 5 526 + #define XE_ENGINE_PROPERTY_ACC_TRIGGER 6 527 + #define XE_ENGINE_PROPERTY_ACC_NOTIFY 7 528 + #define XE_ENGINE_PROPERTY_ACC_GRANULARITY 8 529 + __u32 property; 530 + 531 + /** @value: property value */ 532 + __u64 value; 533 + 534 + /** @reserved: Reserved */ 535 + __u64 reserved[2]; 536 + }; 537 + 538 + struct drm_xe_engine_create { 539 + /** @extensions: Pointer to the first extension struct, if any */ 540 + #define XE_ENGINE_EXTENSION_SET_PROPERTY 0 541 + __u64 extensions; 542 + 543 + /** @width: submission width (number BB per exec) for this engine */ 544 + __u16 width; 545 + 546 + /** @num_placements: number of valid placements for this engine */ 547 + __u16 num_placements; 548 + 549 + /** @vm_id: VM to use for this engine */ 550 + __u32 vm_id; 551 + 552 + /** @flags: MBZ */ 553 + __u32 flags; 554 + 555 + /** @engine_id: Returned engine ID */ 556 + __u32 engine_id; 557 + 558 + /** 559 + * @instances: user pointer to a 2-d array of struct 560 + * drm_xe_engine_class_instance 561 + * 562 + * length = width (i) * num_placements (j) 563 + * index = j + i * width 564 + */ 565 + __u64 instances; 566 + 567 + /** @reserved: Reserved */ 568 + __u64 reserved[2]; 569 + }; 570 + 571 + struct drm_xe_engine_destroy { 572 + /** @vm_id: VM ID */ 573 + __u32 engine_id; 574 + 575 + /** @pad: MBZ */ 576 + __u32 pad; 577 + 578 + /** @reserved: Reserved */ 579 + __u64 reserved[2]; 580 + }; 581 + 582 + struct drm_xe_sync { 583 + /** @extensions: Pointer to the first extension struct, if any */ 584 + __u64 extensions; 585 + 586 + __u32 flags; 587 + 588 + #define DRM_XE_SYNC_SYNCOBJ 0x0 589 + #define DRM_XE_SYNC_TIMELINE_SYNCOBJ 0x1 590 + #define DRM_XE_SYNC_DMA_BUF 0x2 591 + #define DRM_XE_SYNC_USER_FENCE 0x3 592 + #define DRM_XE_SYNC_SIGNAL 0x10 593 + 594 + union { 595 + __u32 handle; 596 + /** 597 + * @addr: Address of user fence. When sync passed in via exec 598 + * IOCTL this a GPU address in the VM. When sync passed in via 599 + * VM bind IOCTL this is a user pointer. In either case, it is 600 + * the users responsibility that this address is present and 601 + * mapped when the user fence is signalled. Must be qword 602 + * aligned. 603 + */ 604 + __u64 addr; 605 + }; 606 + 607 + __u64 timeline_value; 608 + 609 + /** @reserved: Reserved */ 610 + __u64 reserved[2]; 611 + }; 612 + 613 + struct drm_xe_exec { 614 + /** @extensions: Pointer to the first extension struct, if any */ 615 + __u64 extensions; 616 + 617 + /** @engine_id: Engine ID for the batch buffer */ 618 + __u32 engine_id; 619 + 620 + /** @num_syncs: Amount of struct drm_xe_sync in array. */ 621 + __u32 num_syncs; 622 + 623 + /** @syncs: Pointer to struct drm_xe_sync array. */ 624 + __u64 syncs; 625 + 626 + /** 627 + * @address: address of batch buffer if num_batch_buffer == 1 or an 628 + * array of batch buffer addresses 629 + */ 630 + __u64 address; 631 + 632 + /** 633 + * @num_batch_buffer: number of batch buffer in this exec, must match 634 + * the width of the engine 635 + */ 636 + __u16 num_batch_buffer; 637 + 638 + /** @reserved: Reserved */ 639 + __u64 reserved[2]; 640 + }; 641 + 642 + struct drm_xe_mmio { 643 + /** @extensions: Pointer to the first extension struct, if any */ 644 + __u64 extensions; 645 + 646 + __u32 addr; 647 + 648 + __u32 flags; 649 + 650 + #define DRM_XE_MMIO_8BIT 0x0 651 + #define DRM_XE_MMIO_16BIT 0x1 652 + #define DRM_XE_MMIO_32BIT 0x2 653 + #define DRM_XE_MMIO_64BIT 0x3 654 + #define DRM_XE_MMIO_BITS_MASK 0x3 655 + #define DRM_XE_MMIO_READ 0x4 656 + #define DRM_XE_MMIO_WRITE 0x8 657 + 658 + __u64 value; 659 + 660 + /** @reserved: Reserved */ 661 + __u64 reserved[2]; 662 + }; 663 + 664 + /** 665 + * struct drm_xe_wait_user_fence - wait user fence 666 + * 667 + * Wait on user fence, XE will wakeup on every HW engine interrupt in the 668 + * instances list and check if user fence is complete: 669 + * (*addr & MASK) OP (VALUE & MASK) 670 + * 671 + * Returns to user on user fence completion or timeout. 672 + */ 673 + struct drm_xe_wait_user_fence { 674 + /** @extensions: Pointer to the first extension struct, if any */ 675 + __u64 extensions; 676 + union { 677 + /** 678 + * @addr: user pointer address to wait on, must qword aligned 679 + */ 680 + __u64 addr; 681 + /** 682 + * @vm_id: The ID of the VM which encounter an error used with 683 + * DRM_XE_UFENCE_WAIT_VM_ERROR. Upper 32 bits must be clear. 684 + */ 685 + __u64 vm_id; 686 + }; 687 + /** @op: wait operation (type of comparison) */ 688 + #define DRM_XE_UFENCE_WAIT_EQ 0 689 + #define DRM_XE_UFENCE_WAIT_NEQ 1 690 + #define DRM_XE_UFENCE_WAIT_GT 2 691 + #define DRM_XE_UFENCE_WAIT_GTE 3 692 + #define DRM_XE_UFENCE_WAIT_LT 4 693 + #define DRM_XE_UFENCE_WAIT_LTE 5 694 + __u16 op; 695 + /** @flags: wait flags */ 696 + #define DRM_XE_UFENCE_WAIT_SOFT_OP (1 << 0) /* e.g. Wait on VM bind */ 697 + #define DRM_XE_UFENCE_WAIT_ABSTIME (1 << 1) 698 + #define DRM_XE_UFENCE_WAIT_VM_ERROR (1 << 2) 699 + __u16 flags; 700 + /** @value: compare value */ 701 + __u64 value; 702 + /** @mask: comparison mask */ 703 + #define DRM_XE_UFENCE_WAIT_U8 0xffu 704 + #define DRM_XE_UFENCE_WAIT_U16 0xffffu 705 + #define DRM_XE_UFENCE_WAIT_U32 0xffffffffu 706 + #define DRM_XE_UFENCE_WAIT_U64 0xffffffffffffffffu 707 + __u64 mask; 708 + /** @timeout: how long to wait before bailing, value in jiffies */ 709 + __s64 timeout; 710 + /** 711 + * @num_engines: number of engine instances to wait on, must be zero 712 + * when DRM_XE_UFENCE_WAIT_SOFT_OP set 713 + */ 714 + __u64 num_engines; 715 + /** 716 + * @instances: user pointer to array of drm_xe_engine_class_instance to 717 + * wait on, must be NULL when DRM_XE_UFENCE_WAIT_SOFT_OP set 718 + */ 719 + __u64 instances; 720 + 721 + /** @reserved: Reserved */ 722 + __u64 reserved[2]; 723 + }; 724 + 725 + struct drm_xe_vm_madvise { 726 + /** @extensions: Pointer to the first extension struct, if any */ 727 + __u64 extensions; 728 + 729 + /** @vm_id: The ID VM in which the VMA exists */ 730 + __u32 vm_id; 731 + 732 + /** @range: Number of bytes in the VMA */ 733 + __u64 range; 734 + 735 + /** @addr: Address of the VMA to operation on */ 736 + __u64 addr; 737 + 738 + /* 739 + * Setting the preferred location will trigger a migrate of the VMA 740 + * backing store to new location if the backing store is already 741 + * allocated. 742 + */ 743 + #define DRM_XE_VM_MADVISE_PREFERRED_MEM_CLASS 0 744 + #define DRM_XE_VM_MADVISE_PREFERRED_GT 1 745 + /* 746 + * In this case lower 32 bits are mem class, upper 32 are GT. 747 + * Combination provides a single IOCTL plus migrate VMA to preferred 748 + * location. 749 + */ 750 + #define DRM_XE_VM_MADVISE_PREFERRED_MEM_CLASS_GT 2 751 + /* 752 + * The CPU will do atomic memory operations to this VMA. Must be set on 753 + * some devices for atomics to behave correctly. 754 + */ 755 + #define DRM_XE_VM_MADVISE_CPU_ATOMIC 3 756 + /* 757 + * The device will do atomic memory operations to this VMA. Must be set 758 + * on some devices for atomics to behave correctly. 759 + */ 760 + #define DRM_XE_VM_MADVISE_DEVICE_ATOMIC 4 761 + /* 762 + * Priority WRT to eviction (moving from preferred memory location due 763 + * to memory pressure). The lower the priority, the more likely to be 764 + * evicted. 765 + */ 766 + #define DRM_XE_VM_MADVISE_PRIORITY 5 767 + #define DRM_XE_VMA_PRIORITY_LOW 0 768 + #define DRM_XE_VMA_PRIORITY_NORMAL 1 /* Default */ 769 + #define DRM_XE_VMA_PRIORITY_HIGH 2 /* Must be elevated user */ 770 + /* Pin the VMA in memory, must be elevated user */ 771 + #define DRM_XE_VM_MADVISE_PIN 6 772 + 773 + /** @property: property to set */ 774 + __u32 property; 775 + 776 + /** @value: property value */ 777 + __u64 value; 778 + 779 + /** @reserved: Reserved */ 780 + __u64 reserved[2]; 781 + }; 782 + 783 + #if defined(__cplusplus) 784 + } 785 + #endif 786 + 787 + #endif /* _UAPI_XE_DRM_H_ */