Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'drm-xe-next-2025-11-14' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next

Driver Changes:

Avoid TOCTOU when montoring throttle reasons (Lucas)
Add/extend workaround (Nitin)
SRIOV migration work / plumbing (Michal Wajdeczko, Michal Winiarski, Lukasz)
Drop debug flag requirement for VF resource fixup
Fix MTL vm_max_level (Rodrigo)
Changes around TILE_ADDR_RANGE for platform compatibility
(Fei, Lucas)
Add runtime registers for GFX ver >= 35 (Piotr)
Kerneldoc fix (Kriish)
Rework pcode error mapping (Lucas)
Allow lockdown the PF (Michal)
Eliminate GUC code caching of some frequency values (Sk)
Improvements around forcewake referencing (Matt Roper)

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Thomas Hellstrom <thomas.hellstrom@linux.intel.com>
Link: https://patch.msgid.link/aRcJOrisG2qPbucE@fedora

+4038 -490
+2
drivers/gpu/drm/xe/Makefile
··· 174 174 xe_lmtt_2l.o \ 175 175 xe_lmtt_ml.o \ 176 176 xe_pci_sriov.o \ 177 + xe_sriov_packet.o \ 177 178 xe_sriov_pf.o \ 178 179 xe_sriov_pf_control.o \ 179 180 xe_sriov_pf_debugfs.o \ 181 + xe_sriov_pf_migration.o \ 180 182 xe_sriov_pf_provision.o \ 181 183 xe_sriov_pf_service.o \ 182 184 xe_sriov_pf_sysfs.o \
-1
drivers/gpu/drm/xe/regs/xe_gt_regs.h
··· 101 101 102 102 #define XE2_LMEM_CFG XE_REG(0x48b0) 103 103 104 - #define XEHP_TILE_ADDR_RANGE(_idx) XE_REG_MCR(0x4900 + (_idx) * 4) 105 104 #define XEHP_FLAT_CCS_BASE_ADDR XE_REG_MCR(0x4910) 106 105 #define XEHP_FLAT_CCS_PTR REG_GENMASK(31, 8) 107 106
+2
drivers/gpu/drm/xe/regs/xe_regs.h
··· 40 40 #define STOLEN_RESERVED XE_REG(0x1082c0) 41 41 #define WOPCM_SIZE_MASK REG_GENMASK64(9, 7) 42 42 43 + #define SG_TILE_ADDR_RANGE(_idx) XE_REG(0x1083a0 + (_idx) * 4) 44 + 43 45 #define MTL_RP_STATE_CAP XE_REG(0x138000) 44 46 45 47 #define MTL_GT_RPA_FREQUENCY XE_REG(0x138008)
+208
drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_config_kunit.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 AND MIT 2 + /* 3 + * Copyright © 2025 Intel Corporation 4 + */ 5 + 6 + #include <kunit/static_stub.h> 7 + #include <kunit/test.h> 8 + #include <kunit/test-bug.h> 9 + 10 + #include "xe_kunit_helpers.h" 11 + #include "xe_pci_test.h" 12 + 13 + #define TEST_MAX_VFS 63 14 + 15 + static void pf_set_admin_mode(struct xe_device *xe, bool enable) 16 + { 17 + /* should match logic of xe_sriov_pf_admin_only() */ 18 + xe->info.probe_display = !enable; 19 + KUNIT_EXPECT_EQ(kunit_get_current_test(), enable, xe_sriov_pf_admin_only(xe)); 20 + } 21 + 22 + static const void *num_vfs_gen_param(struct kunit *test, const void *prev, char *desc) 23 + { 24 + unsigned long next = 1 + (unsigned long)prev; 25 + 26 + if (next > TEST_MAX_VFS) 27 + return NULL; 28 + snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%lu VF%s", 29 + next, str_plural(next)); 30 + return (void *)next; 31 + } 32 + 33 + static int pf_gt_config_test_init(struct kunit *test) 34 + { 35 + struct xe_pci_fake_data fake = { 36 + .sriov_mode = XE_SRIOV_MODE_PF, 37 + .platform = XE_TIGERLAKE, /* any random platform with SR-IOV */ 38 + .subplatform = XE_SUBPLATFORM_NONE, 39 + }; 40 + struct xe_device *xe; 41 + struct xe_gt *gt; 42 + 43 + test->priv = &fake; 44 + xe_kunit_helper_xe_device_test_init(test); 45 + 46 + xe = test->priv; 47 + KUNIT_ASSERT_TRUE(test, IS_SRIOV_PF(xe)); 48 + 49 + gt = xe_root_mmio_gt(xe); 50 + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gt); 51 + test->priv = gt; 52 + 53 + /* pretend it can support up to 63 VFs */ 54 + xe->sriov.pf.device_total_vfs = TEST_MAX_VFS; 55 + xe->sriov.pf.driver_max_vfs = TEST_MAX_VFS; 56 + KUNIT_ASSERT_EQ(test, xe_sriov_pf_get_totalvfs(xe), 63); 57 + 58 + pf_set_admin_mode(xe, false); 59 + KUNIT_ASSERT_EQ(test, xe_sriov_init(xe), 0); 60 + 61 + /* more sanity checks */ 62 + KUNIT_EXPECT_EQ(test, GUC_ID_MAX + 1, SZ_64K); 63 + KUNIT_EXPECT_EQ(test, GUC_NUM_DOORBELLS, SZ_256); 64 + 65 + return 0; 66 + } 67 + 68 + static void fair_contexts_1vf(struct kunit *test) 69 + { 70 + struct xe_gt *gt = test->priv; 71 + struct xe_device *xe = gt_to_xe(gt); 72 + 73 + pf_set_admin_mode(xe, false); 74 + KUNIT_ASSERT_FALSE(test, xe_sriov_pf_admin_only(xe)); 75 + KUNIT_EXPECT_EQ(test, SZ_32K, pf_profile_fair_ctxs(gt, 1)); 76 + 77 + pf_set_admin_mode(xe, true); 78 + KUNIT_ASSERT_TRUE(test, xe_sriov_pf_admin_only(xe)); 79 + KUNIT_EXPECT_EQ(test, SZ_64K - SZ_1K, pf_profile_fair_ctxs(gt, 1)); 80 + } 81 + 82 + static void fair_contexts(struct kunit *test) 83 + { 84 + unsigned int num_vfs = (unsigned long)test->param_value; 85 + struct xe_gt *gt = test->priv; 86 + struct xe_device *xe = gt_to_xe(gt); 87 + 88 + pf_set_admin_mode(xe, false); 89 + KUNIT_ASSERT_FALSE(test, xe_sriov_pf_admin_only(xe)); 90 + 91 + KUNIT_EXPECT_TRUE(test, is_power_of_2(pf_profile_fair_ctxs(gt, num_vfs))); 92 + KUNIT_EXPECT_GT(test, GUC_ID_MAX, num_vfs * pf_profile_fair_ctxs(gt, num_vfs)); 93 + 94 + if (num_vfs > 31) 95 + KUNIT_ASSERT_EQ(test, SZ_1K, pf_profile_fair_ctxs(gt, num_vfs)); 96 + else if (num_vfs > 15) 97 + KUNIT_ASSERT_EQ(test, SZ_2K, pf_profile_fair_ctxs(gt, num_vfs)); 98 + else if (num_vfs > 7) 99 + KUNIT_ASSERT_EQ(test, SZ_4K, pf_profile_fair_ctxs(gt, num_vfs)); 100 + else if (num_vfs > 3) 101 + KUNIT_ASSERT_EQ(test, SZ_8K, pf_profile_fair_ctxs(gt, num_vfs)); 102 + else if (num_vfs > 1) 103 + KUNIT_ASSERT_EQ(test, SZ_16K, pf_profile_fair_ctxs(gt, num_vfs)); 104 + else 105 + KUNIT_ASSERT_EQ(test, SZ_32K, pf_profile_fair_ctxs(gt, num_vfs)); 106 + } 107 + 108 + static void fair_doorbells_1vf(struct kunit *test) 109 + { 110 + struct xe_gt *gt = test->priv; 111 + struct xe_device *xe = gt_to_xe(gt); 112 + 113 + pf_set_admin_mode(xe, false); 114 + KUNIT_ASSERT_FALSE(test, xe_sriov_pf_admin_only(xe)); 115 + KUNIT_EXPECT_EQ(test, 128, pf_profile_fair_dbs(gt, 1)); 116 + 117 + pf_set_admin_mode(xe, true); 118 + KUNIT_ASSERT_TRUE(test, xe_sriov_pf_admin_only(xe)); 119 + KUNIT_EXPECT_EQ(test, 240, pf_profile_fair_dbs(gt, 1)); 120 + } 121 + 122 + static void fair_doorbells(struct kunit *test) 123 + { 124 + unsigned int num_vfs = (unsigned long)test->param_value; 125 + struct xe_gt *gt = test->priv; 126 + struct xe_device *xe = gt_to_xe(gt); 127 + 128 + pf_set_admin_mode(xe, false); 129 + KUNIT_ASSERT_FALSE(test, xe_sriov_pf_admin_only(xe)); 130 + 131 + KUNIT_EXPECT_TRUE(test, is_power_of_2(pf_profile_fair_dbs(gt, num_vfs))); 132 + KUNIT_EXPECT_GE(test, GUC_NUM_DOORBELLS, (num_vfs + 1) * pf_profile_fair_dbs(gt, num_vfs)); 133 + 134 + if (num_vfs > 31) 135 + KUNIT_ASSERT_EQ(test, SZ_4, pf_profile_fair_dbs(gt, num_vfs)); 136 + else if (num_vfs > 15) 137 + KUNIT_ASSERT_EQ(test, SZ_8, pf_profile_fair_dbs(gt, num_vfs)); 138 + else if (num_vfs > 7) 139 + KUNIT_ASSERT_EQ(test, SZ_16, pf_profile_fair_dbs(gt, num_vfs)); 140 + else if (num_vfs > 3) 141 + KUNIT_ASSERT_EQ(test, SZ_32, pf_profile_fair_dbs(gt, num_vfs)); 142 + else if (num_vfs > 1) 143 + KUNIT_ASSERT_EQ(test, SZ_64, pf_profile_fair_dbs(gt, num_vfs)); 144 + else 145 + KUNIT_ASSERT_EQ(test, SZ_128, pf_profile_fair_dbs(gt, num_vfs)); 146 + } 147 + 148 + static void fair_ggtt_1vf(struct kunit *test) 149 + { 150 + struct xe_gt *gt = test->priv; 151 + struct xe_device *xe = gt_to_xe(gt); 152 + 153 + pf_set_admin_mode(xe, false); 154 + KUNIT_ASSERT_FALSE(test, xe_sriov_pf_admin_only(xe)); 155 + KUNIT_EXPECT_EQ(test, SZ_2G, pf_profile_fair_ggtt(gt, 1)); 156 + 157 + pf_set_admin_mode(xe, true); 158 + KUNIT_ASSERT_TRUE(test, xe_sriov_pf_admin_only(xe)); 159 + KUNIT_EXPECT_EQ(test, SZ_2G + SZ_1G + SZ_512M, pf_profile_fair_ggtt(gt, 1)); 160 + } 161 + 162 + static void fair_ggtt(struct kunit *test) 163 + { 164 + unsigned int num_vfs = (unsigned long)test->param_value; 165 + struct xe_gt *gt = test->priv; 166 + struct xe_device *xe = gt_to_xe(gt); 167 + u64 alignment = pf_get_ggtt_alignment(gt); 168 + u64 shareable = SZ_2G + SZ_1G + SZ_512M; 169 + 170 + pf_set_admin_mode(xe, false); 171 + KUNIT_ASSERT_FALSE(test, xe_sriov_pf_admin_only(xe)); 172 + 173 + KUNIT_EXPECT_TRUE(test, IS_ALIGNED(pf_profile_fair_ggtt(gt, num_vfs), alignment)); 174 + KUNIT_EXPECT_GE(test, shareable, num_vfs * pf_profile_fair_ggtt(gt, num_vfs)); 175 + 176 + if (num_vfs > 56) 177 + KUNIT_ASSERT_EQ(test, SZ_64M - SZ_8M, pf_profile_fair_ggtt(gt, num_vfs)); 178 + else if (num_vfs > 28) 179 + KUNIT_ASSERT_EQ(test, SZ_64M, pf_profile_fair_ggtt(gt, num_vfs)); 180 + else if (num_vfs > 14) 181 + KUNIT_ASSERT_EQ(test, SZ_128M, pf_profile_fair_ggtt(gt, num_vfs)); 182 + else if (num_vfs > 7) 183 + KUNIT_ASSERT_EQ(test, SZ_256M, pf_profile_fair_ggtt(gt, num_vfs)); 184 + else if (num_vfs > 3) 185 + KUNIT_ASSERT_EQ(test, SZ_512M, pf_profile_fair_ggtt(gt, num_vfs)); 186 + else if (num_vfs > 1) 187 + KUNIT_ASSERT_EQ(test, SZ_1G, pf_profile_fair_ggtt(gt, num_vfs)); 188 + else 189 + KUNIT_ASSERT_EQ(test, SZ_2G, pf_profile_fair_ggtt(gt, num_vfs)); 190 + } 191 + 192 + static struct kunit_case pf_gt_config_test_cases[] = { 193 + KUNIT_CASE(fair_contexts_1vf), 194 + KUNIT_CASE(fair_doorbells_1vf), 195 + KUNIT_CASE(fair_ggtt_1vf), 196 + KUNIT_CASE_PARAM(fair_contexts, num_vfs_gen_param), 197 + KUNIT_CASE_PARAM(fair_doorbells, num_vfs_gen_param), 198 + KUNIT_CASE_PARAM(fair_ggtt, num_vfs_gen_param), 199 + {} 200 + }; 201 + 202 + static struct kunit_suite pf_gt_config_suite = { 203 + .name = "pf_gt_config", 204 + .test_cases = pf_gt_config_test_cases, 205 + .init = pf_gt_config_test_init, 206 + }; 207 + 208 + kunit_test_suite(pf_gt_config_suite);
+4 -4
drivers/gpu/drm/xe/xe_eu_stall.c
··· 49 49 wait_queue_head_t poll_wq; 50 50 size_t data_record_size; 51 51 size_t per_xecore_buf_size; 52 + unsigned int fw_ref; 52 53 53 54 struct xe_gt *gt; 54 55 struct xe_bo *bo; ··· 661 660 struct per_xecore_buf *xecore_buf; 662 661 struct xe_gt *gt = stream->gt; 663 662 u16 group, instance; 664 - unsigned int fw_ref; 665 663 int xecore; 666 664 667 665 /* Take runtime pm ref and forcewake to disable RC6 */ 668 666 xe_pm_runtime_get(gt_to_xe(gt)); 669 - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_RENDER); 670 - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_RENDER)) { 667 + stream->fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_RENDER); 668 + if (!xe_force_wake_ref_has_domain(stream->fw_ref, XE_FW_RENDER)) { 671 669 xe_gt_err(gt, "Failed to get RENDER forcewake\n"); 672 670 xe_pm_runtime_put(gt_to_xe(gt)); 673 671 return -ETIMEDOUT; ··· 832 832 xe_gt_mcr_multicast_write(gt, ROW_CHICKEN2, 833 833 _MASKED_BIT_DISABLE(DISABLE_DOP_GATING)); 834 834 835 - xe_force_wake_put(gt_to_fw(gt), XE_FW_RENDER); 835 + xe_force_wake_put(gt_to_fw(gt), stream->fw_ref); 836 836 xe_pm_runtime_put(gt_to_xe(gt)); 837 837 838 838 return 0;
+24 -2
drivers/gpu/drm/xe/xe_force_wake_types.h
··· 52 52 }; 53 53 54 54 /** 55 - * struct xe_force_wake_domain - Xe force wake domains 55 + * struct xe_force_wake_domain - Xe force wake power domain 56 + * 57 + * Represents an individual device-internal power domain. The driver must 58 + * ensure the power domain is awake before accessing registers or other 59 + * hardware functionality that is part of the power domain. Since different 60 + * driver threads may access hardware units simultaneously, a reference count 61 + * is used to ensure that the domain remains awake as long as any software 62 + * is using the part of the hardware covered by the power domain. 63 + * 64 + * Hardware provides a register interface to allow the driver to request 65 + * wake/sleep of power domains, although in most cases the actual action of 66 + * powering the hardware up/down is handled by firmware (and may be subject to 67 + * requirements and constraints outside of the driver's visibility) so the 68 + * driver needs to wait for an acknowledgment that a wake request has been 69 + * acted upon before accessing the parts of the hardware that reside within the 70 + * power domain. 56 71 */ 57 72 struct xe_force_wake_domain { 58 73 /** @id: domain force wake id */ ··· 85 70 }; 86 71 87 72 /** 88 - * struct xe_force_wake - Xe force wake 73 + * struct xe_force_wake - Xe force wake collection 74 + * 75 + * Represents a collection of related power domains (struct 76 + * xe_force_wake_domain) associated with a subunit of the device. 77 + * 78 + * Currently only used for GT power domains (where the term "forcewake" is used 79 + * in the hardware documentation), although the interface could be extended to 80 + * power wells in other parts of the hardware in the future. 89 81 */ 90 82 struct xe_force_wake { 91 83 /** @gt: back pointers to GT */
+104
drivers/gpu/drm/xe/xe_ggtt.c
··· 151 151 ggtt_update_access_counter(ggtt); 152 152 } 153 153 154 + static u64 xe_ggtt_get_pte(struct xe_ggtt *ggtt, u64 addr) 155 + { 156 + xe_tile_assert(ggtt->tile, !(addr & XE_PTE_MASK)); 157 + xe_tile_assert(ggtt->tile, addr < ggtt->size); 158 + 159 + return readq(&ggtt->gsm[addr >> XE_PTE_SHIFT]); 160 + } 161 + 154 162 static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size) 155 163 { 156 164 u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB]; ··· 241 233 static const struct xe_ggtt_pt_ops xelp_pt_ops = { 242 234 .pte_encode_flags = xelp_ggtt_pte_flags, 243 235 .ggtt_set_pte = xe_ggtt_set_pte, 236 + .ggtt_get_pte = xe_ggtt_get_pte, 244 237 }; 245 238 246 239 static const struct xe_ggtt_pt_ops xelpg_pt_ops = { 247 240 .pte_encode_flags = xelpg_ggtt_pte_flags, 248 241 .ggtt_set_pte = xe_ggtt_set_pte, 242 + .ggtt_get_pte = xe_ggtt_get_pte, 249 243 }; 250 244 251 245 static const struct xe_ggtt_pt_ops xelpg_pt_wa_ops = { 252 246 .pte_encode_flags = xelpg_ggtt_pte_flags, 253 247 .ggtt_set_pte = xe_ggtt_set_pte_and_flush, 248 + .ggtt_get_pte = xe_ggtt_get_pte, 254 249 }; 255 250 256 251 static void __xe_ggtt_init_early(struct xe_ggtt *ggtt, u32 reserved) ··· 709 698 } 710 699 711 700 /** 701 + * xe_ggtt_node_pt_size() - Get the size of page table entries needed to map a GGTT node. 702 + * @node: the &xe_ggtt_node 703 + * 704 + * Return: GGTT node page table entries size in bytes. 705 + */ 706 + size_t xe_ggtt_node_pt_size(const struct xe_ggtt_node *node) 707 + { 708 + if (!node) 709 + return 0; 710 + 711 + return node->base.size / XE_PAGE_SIZE * sizeof(u64); 712 + } 713 + 714 + /** 712 715 * xe_ggtt_map_bo - Map the BO into GGTT 713 716 * @ggtt: the &xe_ggtt where node will be mapped 714 717 * @node: the &xe_ggtt_node where this BO is mapped ··· 955 930 xe_ggtt_assign_locked(node->ggtt, &node->base, vfid); 956 931 mutex_unlock(&node->ggtt->lock); 957 932 } 933 + 934 + /** 935 + * xe_ggtt_node_save() - Save a &xe_ggtt_node to a buffer. 936 + * @node: the &xe_ggtt_node to be saved 937 + * @dst: destination buffer 938 + * @size: destination buffer size in bytes 939 + * @vfid: VF identifier 940 + * 941 + * Return: 0 on success or a negative error code on failure. 942 + */ 943 + int xe_ggtt_node_save(struct xe_ggtt_node *node, void *dst, size_t size, u16 vfid) 944 + { 945 + struct xe_ggtt *ggtt; 946 + u64 start, end; 947 + u64 *buf = dst; 948 + u64 pte; 949 + 950 + if (!node) 951 + return -ENOENT; 952 + 953 + guard(mutex)(&node->ggtt->lock); 954 + 955 + if (xe_ggtt_node_pt_size(node) != size) 956 + return -EINVAL; 957 + 958 + ggtt = node->ggtt; 959 + start = node->base.start; 960 + end = start + node->base.size - 1; 961 + 962 + while (start < end) { 963 + pte = ggtt->pt_ops->ggtt_get_pte(ggtt, start); 964 + if (vfid != u64_get_bits(pte, GGTT_PTE_VFID)) 965 + return -EPERM; 966 + 967 + *buf++ = u64_replace_bits(pte, 0, GGTT_PTE_VFID); 968 + start += XE_PAGE_SIZE; 969 + } 970 + 971 + return 0; 972 + } 973 + 974 + /** 975 + * xe_ggtt_node_load() - Load a &xe_ggtt_node from a buffer. 976 + * @node: the &xe_ggtt_node to be loaded 977 + * @src: source buffer 978 + * @size: source buffer size in bytes 979 + * @vfid: VF identifier 980 + * 981 + * Return: 0 on success or a negative error code on failure. 982 + */ 983 + int xe_ggtt_node_load(struct xe_ggtt_node *node, const void *src, size_t size, u16 vfid) 984 + { 985 + u64 vfid_pte = xe_encode_vfid_pte(vfid); 986 + const u64 *buf = src; 987 + struct xe_ggtt *ggtt; 988 + u64 start, end; 989 + 990 + if (!node) 991 + return -ENOENT; 992 + 993 + guard(mutex)(&node->ggtt->lock); 994 + 995 + if (xe_ggtt_node_pt_size(node) != size) 996 + return -EINVAL; 997 + 998 + ggtt = node->ggtt; 999 + start = node->base.start; 1000 + end = start + node->base.size - 1; 1001 + 1002 + while (start < end) { 1003 + vfid_pte = u64_replace_bits(*buf++, vfid, GGTT_PTE_VFID); 1004 + ggtt->pt_ops->ggtt_set_pte(ggtt, start, vfid_pte); 1005 + start += XE_PAGE_SIZE; 1006 + } 1007 + xe_ggtt_invalidate(ggtt); 1008 + 1009 + return 0; 1010 + } 1011 + 958 1012 #endif 959 1013 960 1014 /**
+3
drivers/gpu/drm/xe/xe_ggtt.h
··· 29 29 u32 size, u32 align, u32 mm_flags); 30 30 void xe_ggtt_node_remove(struct xe_ggtt_node *node, bool invalidate); 31 31 bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node); 32 + size_t xe_ggtt_node_pt_size(const struct xe_ggtt_node *node); 32 33 void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, 33 34 struct xe_bo *bo, u16 pat_index); 34 35 void xe_ggtt_map_bo_unlocked(struct xe_ggtt *ggtt, struct xe_bo *bo); ··· 44 43 45 44 #ifdef CONFIG_PCI_IOV 46 45 void xe_ggtt_assign(const struct xe_ggtt_node *node, u16 vfid); 46 + int xe_ggtt_node_save(struct xe_ggtt_node *node, void *dst, size_t size, u16 vfid); 47 + int xe_ggtt_node_load(struct xe_ggtt_node *node, const void *src, size_t size, u16 vfid); 47 48 #endif 48 49 49 50 #ifndef CONFIG_LOCKDEP
+2
drivers/gpu/drm/xe/xe_ggtt_types.h
··· 78 78 u64 (*pte_encode_flags)(struct xe_bo *bo, u16 pat_index); 79 79 /** @ggtt_set_pte: Directly write into GGTT's PTE */ 80 80 void (*ggtt_set_pte)(struct xe_ggtt *ggtt, u64 addr, u64 pte); 81 + /** @ggtt_get_pte: Directly read from GGTT's PTE */ 82 + u64 (*ggtt_get_pte)(struct xe_ggtt *ggtt, u64 addr); 81 83 }; 82 84 83 85 #endif
+143
drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
··· 9 9 #include "abi/guc_actions_sriov_abi.h" 10 10 #include "abi/guc_klvs_abi.h" 11 11 12 + #include "regs/xe_gtt_defs.h" 12 13 #include "regs/xe_guc_regs.h" 13 14 14 15 #include "xe_bo.h" ··· 698 697 return fair; 699 698 } 700 699 700 + static u64 pf_profile_fair_ggtt(struct xe_gt *gt, unsigned int num_vfs) 701 + { 702 + bool admin_only_pf = xe_sriov_pf_admin_only(gt_to_xe(gt)); 703 + u64 shareable = ALIGN_DOWN(GUC_GGTT_TOP, SZ_512M); 704 + u64 alignment = pf_get_ggtt_alignment(gt); 705 + 706 + if (admin_only_pf && num_vfs == 1) 707 + return ALIGN_DOWN(shareable, alignment); 708 + 709 + /* need to hardcode due to ~512M of GGTT being reserved */ 710 + if (num_vfs > 56) 711 + return SZ_64M - SZ_8M; 712 + 713 + return rounddown_pow_of_two(shareable / num_vfs); 714 + } 715 + 701 716 /** 702 717 * xe_gt_sriov_pf_config_set_fair_ggtt - Provision many VFs with fair GGTT. 703 718 * @gt: the &xe_gt (can't be media) ··· 727 710 int xe_gt_sriov_pf_config_set_fair_ggtt(struct xe_gt *gt, unsigned int vfid, 728 711 unsigned int num_vfs) 729 712 { 713 + u64 profile = pf_profile_fair_ggtt(gt, num_vfs); 730 714 u64 fair; 731 715 732 716 xe_gt_assert(gt, vfid); ··· 741 723 if (!fair) 742 724 return -ENOSPC; 743 725 726 + fair = min(fair, profile); 727 + if (fair < profile) 728 + xe_gt_sriov_info(gt, "Using non-profile provisioning (%s %llu vs %llu)\n", 729 + "GGTT", fair, profile); 730 + 744 731 return xe_gt_sriov_pf_config_bulk_set_ggtt(gt, vfid, num_vfs, fair); 732 + } 733 + 734 + /** 735 + * xe_gt_sriov_pf_config_ggtt_save() - Save a VF provisioned GGTT data into a buffer. 736 + * @gt: the &xe_gt 737 + * @vfid: VF identifier (can't be 0) 738 + * @buf: the GGTT data destination buffer (or NULL to query the buf size) 739 + * @size: the size of the buffer (or 0 to query the buf size) 740 + * 741 + * This function can only be called on PF. 742 + * 743 + * Return: size of the buffer needed to save GGTT data if querying, 744 + * 0 on successful save or a negative error code on failure. 745 + */ 746 + ssize_t xe_gt_sriov_pf_config_ggtt_save(struct xe_gt *gt, unsigned int vfid, 747 + void *buf, size_t size) 748 + { 749 + struct xe_ggtt_node *node; 750 + 751 + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 752 + xe_gt_assert(gt, vfid); 753 + xe_gt_assert(gt, !(!buf ^ !size)); 754 + 755 + guard(mutex)(xe_gt_sriov_pf_master_mutex(gt)); 756 + 757 + node = pf_pick_vf_config(gt, vfid)->ggtt_region; 758 + 759 + if (!buf) 760 + return xe_ggtt_node_pt_size(node); 761 + 762 + return xe_ggtt_node_save(node, buf, size, vfid); 763 + } 764 + 765 + /** 766 + * xe_gt_sriov_pf_config_ggtt_restore() - Restore a VF provisioned GGTT data from a buffer. 767 + * @gt: the &xe_gt 768 + * @vfid: VF identifier (can't be 0) 769 + * @buf: the GGTT data source buffer 770 + * @size: the size of the buffer 771 + * 772 + * This function can only be called on PF. 773 + * 774 + * Return: 0 on success or a negative error code on failure. 775 + */ 776 + int xe_gt_sriov_pf_config_ggtt_restore(struct xe_gt *gt, unsigned int vfid, 777 + const void *buf, size_t size) 778 + { 779 + struct xe_ggtt_node *node; 780 + 781 + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 782 + xe_gt_assert(gt, vfid); 783 + 784 + guard(mutex)(xe_gt_sriov_pf_master_mutex(gt)); 785 + 786 + node = pf_pick_vf_config(gt, vfid)->ggtt_region; 787 + 788 + return xe_ggtt_node_load(node, buf, size, vfid); 745 789 } 746 790 747 791 static u32 pf_get_min_spare_ctxs(struct xe_gt *gt) ··· 1065 985 "GuC context IDs", no_unit, n, err); 1066 986 } 1067 987 988 + static u32 pf_profile_fair_ctxs(struct xe_gt *gt, unsigned int num_vfs) 989 + { 990 + bool admin_only_pf = xe_sriov_pf_admin_only(gt_to_xe(gt)); 991 + 992 + if (admin_only_pf && num_vfs == 1) 993 + return ALIGN_DOWN(GUC_ID_MAX, SZ_1K); 994 + 995 + return rounddown_pow_of_two(GUC_ID_MAX / num_vfs); 996 + } 997 + 1068 998 static u32 pf_estimate_fair_ctxs(struct xe_gt *gt, unsigned int num_vfs) 1069 999 { 1070 1000 struct xe_guc_id_mgr *idm = &gt->uc.guc.submission_state.idm; ··· 1107 1017 int xe_gt_sriov_pf_config_set_fair_ctxs(struct xe_gt *gt, unsigned int vfid, 1108 1018 unsigned int num_vfs) 1109 1019 { 1020 + u32 profile = pf_profile_fair_ctxs(gt, num_vfs); 1110 1021 u32 fair; 1111 1022 1112 1023 xe_gt_assert(gt, vfid); ··· 1119 1028 1120 1029 if (!fair) 1121 1030 return -ENOSPC; 1031 + 1032 + fair = min(fair, profile); 1033 + if (fair < profile) 1034 + xe_gt_sriov_info(gt, "Using non-profile provisioning (%s %u vs %u)\n", 1035 + "GuC context IDs", fair, profile); 1122 1036 1123 1037 return xe_gt_sriov_pf_config_bulk_set_ctxs(gt, vfid, num_vfs, fair); 1124 1038 } ··· 1329 1233 "GuC doorbell IDs", no_unit, n, err); 1330 1234 } 1331 1235 1236 + static u32 pf_profile_fair_dbs(struct xe_gt *gt, unsigned int num_vfs) 1237 + { 1238 + bool admin_only_pf = xe_sriov_pf_admin_only(gt_to_xe(gt)); 1239 + 1240 + /* XXX: preliminary */ 1241 + if (admin_only_pf && num_vfs == 1) 1242 + return GUC_NUM_DOORBELLS - SZ_16; 1243 + 1244 + return rounddown_pow_of_two(GUC_NUM_DOORBELLS / (num_vfs + 1)); 1245 + } 1246 + 1332 1247 static u32 pf_estimate_fair_dbs(struct xe_gt *gt, unsigned int num_vfs) 1333 1248 { 1334 1249 struct xe_guc_db_mgr *dbm = &gt->uc.guc.dbm; ··· 1372 1265 int xe_gt_sriov_pf_config_set_fair_dbs(struct xe_gt *gt, unsigned int vfid, 1373 1266 unsigned int num_vfs) 1374 1267 { 1268 + u32 profile = pf_profile_fair_dbs(gt, num_vfs); 1375 1269 u32 fair; 1376 1270 1377 1271 xe_gt_assert(gt, vfid); ··· 1384 1276 1385 1277 if (!fair) 1386 1278 return -ENOSPC; 1279 + 1280 + fair = min(fair, profile); 1281 + if (fair < profile) 1282 + xe_gt_sriov_info(gt, "Using non-profile provisioning (%s %u vs %u)\n", 1283 + "GuC doorbell IDs", fair, profile); 1387 1284 1388 1285 return xe_gt_sriov_pf_config_bulk_set_dbs(gt, vfid, num_vfs, fair); 1389 1286 } ··· 1713 1600 return pf_config_bulk_set_u64_done(gt, vfid, num_vfs, size, 1714 1601 xe_gt_sriov_pf_config_get_lmem, 1715 1602 "LMEM", n, err); 1603 + } 1604 + 1605 + static struct xe_bo *pf_get_vf_config_lmem_obj(struct xe_gt *gt, unsigned int vfid) 1606 + { 1607 + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); 1608 + 1609 + return config->lmem_obj; 1610 + } 1611 + 1612 + /** 1613 + * xe_gt_sriov_pf_config_get_lmem_obj() - Take a reference to the struct &xe_bo backing VF LMEM. 1614 + * @gt: the &xe_gt 1615 + * @vfid: the VF identifier (can't be 0) 1616 + * 1617 + * This function can only be called on PF. 1618 + * The caller is responsible for calling xe_bo_put() on the returned object. 1619 + * 1620 + * Return: pointer to struct &xe_bo backing VF LMEM (if any). 1621 + */ 1622 + struct xe_bo *xe_gt_sriov_pf_config_get_lmem_obj(struct xe_gt *gt, unsigned int vfid) 1623 + { 1624 + xe_gt_assert(gt, vfid); 1625 + 1626 + guard(mutex)(xe_gt_sriov_pf_master_mutex(gt)); 1627 + 1628 + return xe_bo_get(pf_get_vf_config_lmem_obj(gt, vfid)); 1716 1629 } 1717 1630 1718 1631 static u64 pf_query_free_lmem(struct xe_gt *gt) ··· 2932 2793 2933 2794 return 0; 2934 2795 } 2796 + 2797 + #if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST) 2798 + #include "tests/xe_gt_sriov_pf_config_kunit.c" 2799 + #endif
+6
drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
··· 36 36 int xe_gt_sriov_pf_config_set_fair_lmem(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs); 37 37 int xe_gt_sriov_pf_config_bulk_set_lmem(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs, 38 38 u64 size); 39 + struct xe_bo *xe_gt_sriov_pf_config_get_lmem_obj(struct xe_gt *gt, unsigned int vfid); 39 40 40 41 u32 xe_gt_sriov_pf_config_get_exec_quantum(struct xe_gt *gt, unsigned int vfid); 41 42 int xe_gt_sriov_pf_config_set_exec_quantum(struct xe_gt *gt, unsigned int vfid, u32 exec_quantum); ··· 71 70 ssize_t xe_gt_sriov_pf_config_save(struct xe_gt *gt, unsigned int vfid, void *buf, size_t size); 72 71 int xe_gt_sriov_pf_config_restore(struct xe_gt *gt, unsigned int vfid, 73 72 const void *buf, size_t size); 73 + 74 + ssize_t xe_gt_sriov_pf_config_ggtt_save(struct xe_gt *gt, unsigned int vfid, 75 + void *buf, size_t size); 76 + int xe_gt_sriov_pf_config_ggtt_restore(struct xe_gt *gt, unsigned int vfid, 77 + const void *buf, size_t size); 74 78 75 79 bool xe_gt_sriov_pf_config_is_empty(struct xe_gt *gt, unsigned int vfid); 76 80
+614 -36
drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
··· 18 18 #include "xe_gt_sriov_printk.h" 19 19 #include "xe_guc_ct.h" 20 20 #include "xe_sriov.h" 21 + #include "xe_sriov_packet.h" 22 + #include "xe_sriov_packet_types.h" 21 23 #include "xe_sriov_pf_control.h" 24 + #include "xe_sriov_pf_migration.h" 22 25 #include "xe_sriov_pf_service.h" 23 26 #include "xe_tile.h" 24 27 ··· 184 181 CASE2STR(PAUSE_SEND_PAUSE); 185 182 CASE2STR(PAUSE_WAIT_GUC); 186 183 CASE2STR(PAUSE_GUC_DONE); 187 - CASE2STR(PAUSE_SAVE_GUC); 188 184 CASE2STR(PAUSE_FAILED); 189 185 CASE2STR(PAUSED); 186 + CASE2STR(SAVE_WIP); 187 + CASE2STR(SAVE_PROCESS_DATA); 188 + CASE2STR(SAVE_WAIT_DATA); 189 + CASE2STR(SAVE_DATA_DONE); 190 + CASE2STR(SAVE_FAILED); 191 + CASE2STR(SAVED); 192 + CASE2STR(RESTORE_WIP); 193 + CASE2STR(RESTORE_PROCESS_DATA); 194 + CASE2STR(RESTORE_WAIT_DATA); 195 + CASE2STR(RESTORE_DATA_DONE); 196 + CASE2STR(RESTORE_FAILED); 197 + CASE2STR(RESTORED); 190 198 CASE2STR(RESUME_WIP); 191 199 CASE2STR(RESUME_SEND_RESUME); 192 200 CASE2STR(RESUME_FAILED); ··· 222 208 case XE_GT_SRIOV_STATE_FLR_WIP: 223 209 case XE_GT_SRIOV_STATE_FLR_RESET_CONFIG: 224 210 return 5 * HZ; 211 + case XE_GT_SRIOV_STATE_RESTORE_WIP: 212 + return 20 * HZ; 225 213 default: 226 214 return HZ; 227 215 } ··· 241 225 { 242 226 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid); 243 227 244 - return &cs->state; 228 + return cs->state; 245 229 } 246 230 247 231 static bool pf_check_vf_state(struct xe_gt *gt, unsigned int vfid, ··· 345 329 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED); 346 330 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED); 347 331 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED); 332 + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_FAILED); 333 + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED); 348 334 } 349 335 350 336 #define pf_enter_vf_state_machine_bug(gt, vfid) ({ \ ··· 377 359 378 360 static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid); 379 361 static void pf_exit_vf_stop_wip(struct xe_gt *gt, unsigned int vfid); 362 + static void pf_exit_vf_save_wip(struct xe_gt *gt, unsigned int vfid); 363 + static void pf_exit_vf_restore_wip(struct xe_gt *gt, unsigned int vfid); 380 364 static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid); 381 365 static void pf_exit_vf_resume_wip(struct xe_gt *gt, unsigned int vfid); 382 366 ··· 400 380 401 381 pf_exit_vf_flr_wip(gt, vfid); 402 382 pf_exit_vf_stop_wip(gt, vfid); 383 + pf_exit_vf_save_wip(gt, vfid); 384 + pf_exit_vf_restore_wip(gt, vfid); 403 385 pf_exit_vf_pause_wip(gt, vfid); 404 386 pf_exit_vf_resume_wip(gt, vfid); 405 387 ··· 421 399 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); 422 400 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED); 423 401 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED); 402 + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED); 403 + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED); 424 404 pf_exit_vf_mismatch(gt, vfid); 425 405 pf_exit_vf_wip(gt, vfid); 426 406 } ··· 453 429 * : PAUSE_GUC_DONE o-----restart 454 430 * : | : 455 431 * : | o---<--busy : 456 - * : v / / : 457 - * : PAUSE_SAVE_GUC : 432 + * : / : 458 433 * : / : 459 434 * : / : 460 435 * :....o..............o...............o...........: ··· 473 450 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE); 474 451 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC); 475 452 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE); 476 - pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC); 477 453 } 478 454 } 479 455 ··· 503 481 pf_enter_vf_pause_failed(gt, vfid); 504 482 } 505 483 506 - static void pf_enter_vf_pause_save_guc(struct xe_gt *gt, unsigned int vfid) 507 - { 508 - if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC)) 509 - pf_enter_vf_state_machine_bug(gt, vfid); 510 - } 511 - 512 - static bool pf_exit_vf_pause_save_guc(struct xe_gt *gt, unsigned int vfid) 513 - { 514 - int err; 515 - 516 - if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC)) 517 - return false; 518 - 519 - err = xe_gt_sriov_pf_migration_save_guc_state(gt, vfid); 520 - if (err) { 521 - /* retry if busy */ 522 - if (err == -EBUSY) { 523 - pf_enter_vf_pause_save_guc(gt, vfid); 524 - return true; 525 - } 526 - /* give up on error */ 527 - if (err == -EIO) 528 - pf_enter_vf_mismatch(gt, vfid); 529 - } 530 - 531 - pf_enter_vf_pause_completed(gt, vfid); 532 - return true; 533 - } 534 - 535 484 static bool pf_exit_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid) 536 485 { 537 486 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE)) 538 487 return false; 539 488 540 - pf_enter_vf_pause_save_guc(gt, vfid); 489 + pf_enter_vf_pause_completed(gt, vfid); 541 490 return true; 542 491 } 543 492 ··· 668 675 { 669 676 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED); 670 677 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); 678 + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED); 679 + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED); 671 680 pf_exit_vf_mismatch(gt, vfid); 672 681 pf_exit_vf_wip(gt, vfid); 673 682 } ··· 748 753 return -EPERM; 749 754 } 750 755 756 + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP)) { 757 + xe_gt_sriov_dbg(gt, "VF%u save is in progress!\n", vfid); 758 + return -EBUSY; 759 + } 760 + 761 + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) { 762 + xe_gt_sriov_dbg(gt, "VF%u restore is in progress!\n", vfid); 763 + return -EBUSY; 764 + } 765 + 751 766 if (!pf_enter_vf_resume_wip(gt, vfid)) { 752 767 xe_gt_sriov_dbg(gt, "VF%u resume already in progress!\n", vfid); 753 768 return -EALREADY; ··· 779 774 780 775 xe_gt_sriov_dbg(gt, "VF%u resume was canceled!\n", vfid); 781 776 return -ECANCELED; 777 + } 778 + 779 + /** 780 + * DOC: The VF SAVE state machine 781 + * 782 + * SAVE extends the PAUSED state. 783 + * 784 + * The VF SAVE state machine looks like:: 785 + * 786 + * ....PAUSED.................................................... 787 + * : : 788 + * : (O)<---------o : 789 + * : | \ : 790 + * : save (SAVED) (SAVE_FAILED) : 791 + * : | ^ ^ : 792 + * : | | | : 793 + * : ....V...............o...........o......SAVE_WIP......... : 794 + * : : | | | : : 795 + * : : | empty | : : 796 + * : : | | | : : 797 + * : : | | | : : 798 + * : : | DATA_DONE | : : 799 + * : : | ^ | : : 800 + * : : | | error : : 801 + * : : | no_data / : : 802 + * : : | / / : : 803 + * : : | / / : : 804 + * : : | / / : : 805 + * : : o---------->PROCESS_DATA<----consume : : 806 + * : : \ \ : : 807 + * : : \ \ : : 808 + * : : \ \ : : 809 + * : : ring_full----->WAIT_DATA : : 810 + * : : : : 811 + * : :......................................................: : 812 + * :............................................................: 813 + * 814 + * For the full state machine view, see `The VF state machine`_. 815 + */ 816 + 817 + static void pf_exit_vf_save_wip(struct xe_gt *gt, unsigned int vfid) 818 + { 819 + if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP)) { 820 + xe_gt_sriov_pf_migration_ring_free(gt, vfid); 821 + 822 + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA); 823 + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WAIT_DATA); 824 + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_DONE); 825 + } 826 + } 827 + 828 + static void pf_enter_vf_saved(struct xe_gt *gt, unsigned int vfid) 829 + { 830 + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED)) 831 + pf_enter_vf_state_machine_bug(gt, vfid); 832 + 833 + xe_gt_sriov_dbg(gt, "VF%u saved!\n", vfid); 834 + 835 + pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); 836 + pf_exit_vf_mismatch(gt, vfid); 837 + pf_exit_vf_wip(gt, vfid); 838 + } 839 + 840 + static void pf_enter_vf_save_failed(struct xe_gt *gt, unsigned int vfid) 841 + { 842 + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_FAILED)) 843 + pf_enter_vf_state_machine_bug(gt, vfid); 844 + 845 + wake_up_all(xe_sriov_pf_migration_waitqueue(gt_to_xe(gt), vfid)); 846 + 847 + pf_exit_vf_wip(gt, vfid); 848 + } 849 + 850 + static int pf_handle_vf_save_data(struct xe_gt *gt, unsigned int vfid) 851 + { 852 + int ret; 853 + 854 + if (xe_gt_sriov_pf_migration_save_data_pending(gt, vfid, 855 + XE_SRIOV_PACKET_TYPE_GUC)) { 856 + ret = xe_gt_sriov_pf_migration_guc_save(gt, vfid); 857 + if (ret) 858 + return ret; 859 + 860 + xe_gt_sriov_pf_migration_save_data_complete(gt, vfid, 861 + XE_SRIOV_PACKET_TYPE_GUC); 862 + 863 + return -EAGAIN; 864 + } 865 + 866 + if (xe_gt_sriov_pf_migration_save_data_pending(gt, vfid, 867 + XE_SRIOV_PACKET_TYPE_GGTT)) { 868 + ret = xe_gt_sriov_pf_migration_ggtt_save(gt, vfid); 869 + if (ret) 870 + return ret; 871 + 872 + xe_gt_sriov_pf_migration_save_data_complete(gt, vfid, 873 + XE_SRIOV_PACKET_TYPE_GGTT); 874 + 875 + return -EAGAIN; 876 + } 877 + 878 + if (xe_gt_sriov_pf_migration_save_data_pending(gt, vfid, 879 + XE_SRIOV_PACKET_TYPE_MMIO)) { 880 + ret = xe_gt_sriov_pf_migration_mmio_save(gt, vfid); 881 + if (ret) 882 + return ret; 883 + 884 + xe_gt_sriov_pf_migration_save_data_complete(gt, vfid, 885 + XE_SRIOV_PACKET_TYPE_MMIO); 886 + 887 + return -EAGAIN; 888 + } 889 + 890 + if (xe_gt_sriov_pf_migration_save_data_pending(gt, vfid, 891 + XE_SRIOV_PACKET_TYPE_VRAM)) { 892 + ret = xe_gt_sriov_pf_migration_vram_save(gt, vfid); 893 + if (ret == -EAGAIN) 894 + return -EAGAIN; 895 + else if (ret) 896 + return ret; 897 + 898 + xe_gt_sriov_pf_migration_save_data_complete(gt, vfid, 899 + XE_SRIOV_PACKET_TYPE_VRAM); 900 + 901 + return -EAGAIN; 902 + } 903 + 904 + return 0; 905 + } 906 + 907 + static bool pf_handle_vf_save(struct xe_gt *gt, unsigned int vfid) 908 + { 909 + int ret; 910 + 911 + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA)) 912 + return false; 913 + 914 + if (xe_gt_sriov_pf_migration_ring_full(gt, vfid)) { 915 + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WAIT_DATA); 916 + return true; 917 + } 918 + 919 + ret = pf_handle_vf_save_data(gt, vfid); 920 + if (ret == -EAGAIN) 921 + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA); 922 + else if (ret) 923 + pf_enter_vf_save_failed(gt, vfid); 924 + else 925 + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_DONE); 926 + 927 + return true; 928 + } 929 + 930 + static void pf_exit_vf_save_wait_data(struct xe_gt *gt, unsigned int vfid) 931 + { 932 + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WAIT_DATA)) 933 + return; 934 + 935 + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA); 936 + pf_queue_vf(gt, vfid); 937 + } 938 + 939 + static bool pf_enter_vf_save_wip(struct xe_gt *gt, unsigned int vfid) 940 + { 941 + if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP)) { 942 + xe_gt_sriov_pf_migration_save_init(gt, vfid); 943 + pf_enter_vf_wip(gt, vfid); 944 + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA); 945 + pf_queue_vf(gt, vfid); 946 + return true; 947 + } 948 + 949 + return false; 950 + } 951 + 952 + /** 953 + * xe_gt_sriov_pf_control_check_save_data_done() - Check if all save migration data was produced. 954 + * @gt: the &xe_gt 955 + * @vfid: the VF identifier 956 + * 957 + * This function is for PF only. 958 + * 959 + * Return: true if all migration data was produced, false otherwise. 960 + */ 961 + bool xe_gt_sriov_pf_control_check_save_data_done(struct xe_gt *gt, unsigned int vfid) 962 + { 963 + return pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_DONE); 964 + } 965 + 966 + /** 967 + * xe_gt_sriov_pf_control_check_save_failed() - Check if save processing has failed. 968 + * @gt: the &xe_gt 969 + * @vfid: the VF identifier 970 + * 971 + * This function is for PF only. 972 + * 973 + * Return: true if save processing failed, false otherwise. 974 + */ 975 + bool xe_gt_sriov_pf_control_check_save_failed(struct xe_gt *gt, unsigned int vfid) 976 + { 977 + return pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_FAILED); 978 + } 979 + 980 + /** 981 + * xe_gt_sriov_pf_control_process_save_data() - Queue VF save migration data processing. 982 + * @gt: the &xe_gt 983 + * @vfid: the VF identifier 984 + * 985 + * This function is for PF only. 986 + * 987 + * Return: 0 on success or a negative error code on failure. 988 + */ 989 + int xe_gt_sriov_pf_control_process_save_data(struct xe_gt *gt, unsigned int vfid) 990 + { 991 + if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_FAILED)) 992 + return -EIO; 993 + 994 + pf_exit_vf_save_wait_data(gt, vfid); 995 + 996 + return 0; 997 + } 998 + 999 + /** 1000 + * xe_gt_sriov_pf_control_trigger_save_vf() - Start an SR-IOV VF migration data save sequence. 1001 + * @gt: the &xe_gt 1002 + * @vfid: the VF identifier 1003 + * 1004 + * This function is for PF only. 1005 + * 1006 + * Return: 0 on success or a negative error code on failure. 1007 + */ 1008 + int xe_gt_sriov_pf_control_trigger_save_vf(struct xe_gt *gt, unsigned int vfid) 1009 + { 1010 + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) { 1011 + xe_gt_sriov_dbg(gt, "VF%u is stopped!\n", vfid); 1012 + return -EPERM; 1013 + } 1014 + 1015 + if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) { 1016 + xe_gt_sriov_dbg(gt, "VF%u is not paused!\n", vfid); 1017 + return -EPERM; 1018 + } 1019 + 1020 + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) { 1021 + xe_gt_sriov_dbg(gt, "VF%u restore is in progress!\n", vfid); 1022 + return -EBUSY; 1023 + } 1024 + 1025 + if (!pf_enter_vf_save_wip(gt, vfid)) { 1026 + xe_gt_sriov_dbg(gt, "VF%u save already in progress!\n", vfid); 1027 + return -EALREADY; 1028 + } 1029 + 1030 + return 0; 1031 + } 1032 + 1033 + /** 1034 + * xe_gt_sriov_pf_control_finish_save_vf() - Complete a VF migration data save sequence. 1035 + * @gt: the &xe_gt 1036 + * @vfid: the VF identifier 1037 + * 1038 + * This function is for PF only. 1039 + * 1040 + * Return: 0 on success or a negative error code on failure. 1041 + */ 1042 + int xe_gt_sriov_pf_control_finish_save_vf(struct xe_gt *gt, unsigned int vfid) 1043 + { 1044 + if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_DONE)) { 1045 + xe_gt_sriov_err(gt, "VF%u save is still in progress!\n", vfid); 1046 + return -EIO; 1047 + } 1048 + 1049 + pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); 1050 + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_DONE); 1051 + pf_enter_vf_saved(gt, vfid); 1052 + 1053 + return 0; 1054 + } 1055 + 1056 + /** 1057 + * DOC: The VF RESTORE state machine 1058 + * 1059 + * RESTORE extends the PAUSED state. 1060 + * 1061 + * The VF RESTORE state machine looks like:: 1062 + * 1063 + * ....PAUSED.................................................... 1064 + * : : 1065 + * : (O)<---------o : 1066 + * : | \ : 1067 + * : restore (RESTORED) (RESTORE_FAILED) : 1068 + * : | ^ ^ : 1069 + * : | | | : 1070 + * : ....V...............o...........o......RESTORE_WIP...... : 1071 + * : : | | | : : 1072 + * : : | empty | : : 1073 + * : : | | | : : 1074 + * : : | | | : : 1075 + * : : | DATA_DONE | : : 1076 + * : : | ^ | : : 1077 + * : : | | error : : 1078 + * : : | trailer / : : 1079 + * : : | / / : : 1080 + * : : | / / : : 1081 + * : : | / / : : 1082 + * : : o---------->PROCESS_DATA<----produce : : 1083 + * : : \ \ : : 1084 + * : : \ \ : : 1085 + * : : \ \ : : 1086 + * : : ring_empty---->WAIT_DATA : : 1087 + * : : : : 1088 + * : :......................................................: : 1089 + * :............................................................: 1090 + * 1091 + * For the full state machine view, see `The VF state machine`_. 1092 + */ 1093 + 1094 + static void pf_exit_vf_restore_wip(struct xe_gt *gt, unsigned int vfid) 1095 + { 1096 + if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) { 1097 + xe_gt_sriov_pf_migration_ring_free(gt, vfid); 1098 + 1099 + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA); 1100 + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA); 1101 + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_DATA_DONE); 1102 + } 1103 + } 1104 + 1105 + static void pf_enter_vf_restored(struct xe_gt *gt, unsigned int vfid) 1106 + { 1107 + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED)) 1108 + pf_enter_vf_state_machine_bug(gt, vfid); 1109 + 1110 + xe_gt_sriov_dbg(gt, "VF%u restored!\n", vfid); 1111 + 1112 + pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); 1113 + pf_exit_vf_mismatch(gt, vfid); 1114 + pf_exit_vf_wip(gt, vfid); 1115 + } 1116 + 1117 + static void pf_enter_vf_restore_failed(struct xe_gt *gt, unsigned int vfid) 1118 + { 1119 + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED)) 1120 + pf_enter_vf_state_machine_bug(gt, vfid); 1121 + 1122 + wake_up_all(xe_sriov_pf_migration_waitqueue(gt_to_xe(gt), vfid)); 1123 + 1124 + pf_exit_vf_wip(gt, vfid); 1125 + } 1126 + 1127 + static int pf_handle_vf_restore_data(struct xe_gt *gt, unsigned int vfid) 1128 + { 1129 + struct xe_sriov_packet *data = xe_gt_sriov_pf_migration_restore_consume(gt, vfid); 1130 + int ret = 0; 1131 + 1132 + switch (data->hdr.type) { 1133 + case XE_SRIOV_PACKET_TYPE_GGTT: 1134 + ret = xe_gt_sriov_pf_migration_ggtt_restore(gt, vfid, data); 1135 + break; 1136 + case XE_SRIOV_PACKET_TYPE_MMIO: 1137 + ret = xe_gt_sriov_pf_migration_mmio_restore(gt, vfid, data); 1138 + break; 1139 + case XE_SRIOV_PACKET_TYPE_GUC: 1140 + ret = xe_gt_sriov_pf_migration_guc_restore(gt, vfid, data); 1141 + break; 1142 + case XE_SRIOV_PACKET_TYPE_VRAM: 1143 + ret = xe_gt_sriov_pf_migration_vram_restore(gt, vfid, data); 1144 + break; 1145 + default: 1146 + xe_gt_sriov_notice(gt, "Skipping VF%u unknown data type: %d\n", 1147 + vfid, data->hdr.type); 1148 + break; 1149 + } 1150 + 1151 + xe_sriov_packet_free(data); 1152 + 1153 + return ret; 1154 + } 1155 + 1156 + static bool pf_handle_vf_restore(struct xe_gt *gt, unsigned int vfid) 1157 + { 1158 + int ret; 1159 + 1160 + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA)) 1161 + return false; 1162 + 1163 + if (xe_gt_sriov_pf_migration_ring_empty(gt, vfid)) { 1164 + if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_DATA_DONE)) 1165 + pf_enter_vf_restored(gt, vfid); 1166 + else 1167 + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA); 1168 + 1169 + return true; 1170 + } 1171 + 1172 + ret = pf_handle_vf_restore_data(gt, vfid); 1173 + if (ret) 1174 + pf_enter_vf_restore_failed(gt, vfid); 1175 + else 1176 + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA); 1177 + 1178 + return true; 1179 + } 1180 + 1181 + static void pf_exit_vf_restore_wait_data(struct xe_gt *gt, unsigned int vfid) 1182 + { 1183 + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA)) 1184 + return; 1185 + 1186 + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA); 1187 + pf_queue_vf(gt, vfid); 1188 + } 1189 + 1190 + static bool pf_enter_vf_restore_wip(struct xe_gt *gt, unsigned int vfid) 1191 + { 1192 + if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) { 1193 + pf_enter_vf_wip(gt, vfid); 1194 + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA); 1195 + pf_queue_vf(gt, vfid); 1196 + return true; 1197 + } 1198 + 1199 + return false; 1200 + } 1201 + 1202 + /** 1203 + * xe_gt_sriov_pf_control_check_restore_failed() - Check if restore processing has failed. 1204 + * @gt: the &xe_gt 1205 + * @vfid: the VF identifier 1206 + * 1207 + * This function is for PF only. 1208 + * 1209 + * Return: true if restore processing failed, false otherwise. 1210 + */ 1211 + bool xe_gt_sriov_pf_control_check_restore_failed(struct xe_gt *gt, unsigned int vfid) 1212 + { 1213 + return pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED); 1214 + } 1215 + 1216 + /** 1217 + * xe_gt_sriov_pf_control_restore_data_done() - Indicate the end of VF migration data stream. 1218 + * @gt: the &xe_gt 1219 + * @vfid: the VF identifier 1220 + * 1221 + * This function is for PF only. 1222 + * 1223 + * Return: 0 on success or a negative error code on failure. 1224 + */ 1225 + int xe_gt_sriov_pf_control_restore_data_done(struct xe_gt *gt, unsigned int vfid) 1226 + { 1227 + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_DATA_DONE)) { 1228 + pf_enter_vf_state_machine_bug(gt, vfid); 1229 + return -EIO; 1230 + } 1231 + 1232 + return xe_gt_sriov_pf_control_process_restore_data(gt, vfid); 1233 + } 1234 + 1235 + /** 1236 + * xe_gt_sriov_pf_control_process_restore_data() - Queue VF restore migration data processing. 1237 + * @gt: the &xe_gt 1238 + * @vfid: the VF identifier 1239 + * 1240 + * This function is for PF only. 1241 + * 1242 + * Return: 0 on success or a negative error code on failure. 1243 + */ 1244 + int xe_gt_sriov_pf_control_process_restore_data(struct xe_gt *gt, unsigned int vfid) 1245 + { 1246 + if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED)) { 1247 + xe_gt_sriov_pf_migration_ring_free(gt, vfid); 1248 + return -EIO; 1249 + } 1250 + 1251 + pf_exit_vf_restore_wait_data(gt, vfid); 1252 + 1253 + return 0; 1254 + } 1255 + 1256 + /** 1257 + * xe_gt_sriov_pf_control_trigger restore_vf() - Start an SR-IOV VF migration data restore sequence. 1258 + * @gt: the &xe_gt 1259 + * @vfid: the VF identifier 1260 + * 1261 + * This function is for PF only. 1262 + * 1263 + * Return: 0 on success or a negative error code on failure. 1264 + */ 1265 + int xe_gt_sriov_pf_control_trigger_restore_vf(struct xe_gt *gt, unsigned int vfid) 1266 + { 1267 + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) { 1268 + xe_gt_sriov_dbg(gt, "VF%u is stopped!\n", vfid); 1269 + return -EPERM; 1270 + } 1271 + 1272 + if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) { 1273 + xe_gt_sriov_dbg(gt, "VF%u is not paused!\n", vfid); 1274 + return -EPERM; 1275 + } 1276 + 1277 + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP)) { 1278 + xe_gt_sriov_dbg(gt, "VF%u save is in progress!\n", vfid); 1279 + return -EBUSY; 1280 + } 1281 + 1282 + if (!pf_enter_vf_restore_wip(gt, vfid)) { 1283 + xe_gt_sriov_dbg(gt, "VF%u restore already in progress!\n", vfid); 1284 + return -EALREADY; 1285 + } 1286 + 1287 + return 0; 1288 + } 1289 + 1290 + static int pf_wait_vf_restore_done(struct xe_gt *gt, unsigned int vfid) 1291 + { 1292 + unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_RESTORE_WIP); 1293 + int err; 1294 + 1295 + err = pf_wait_vf_wip_done(gt, vfid, timeout); 1296 + if (err) { 1297 + xe_gt_sriov_notice(gt, "VF%u RESTORE didn't finish in %u ms (%pe)\n", 1298 + vfid, jiffies_to_msecs(timeout), ERR_PTR(err)); 1299 + return err; 1300 + } 1301 + 1302 + if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED)) 1303 + return -EIO; 1304 + 1305 + return 0; 1306 + } 1307 + 1308 + /** 1309 + * xe_gt_sriov_pf_control_finish_restore_vf() - Complete a VF migration data restore sequence. 1310 + * @gt: the &xe_gt 1311 + * @vfid: the VF identifier 1312 + * 1313 + * This function is for PF only. 1314 + * 1315 + * Return: 0 on success or a negative error code on failure. 1316 + */ 1317 + int xe_gt_sriov_pf_control_finish_restore_vf(struct xe_gt *gt, unsigned int vfid) 1318 + { 1319 + int ret; 1320 + 1321 + ret = pf_wait_vf_restore_done(gt, vfid); 1322 + if (ret) 1323 + return ret; 1324 + 1325 + if (!pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED)) { 1326 + pf_enter_vf_mismatch(gt, vfid); 1327 + return -EIO; 1328 + } 1329 + 1330 + pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); 1331 + 1332 + return 0; 782 1333 } 783 1334 784 1335 /** ··· 1378 817 1379 818 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED); 1380 819 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); 820 + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED); 821 + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED); 1381 822 pf_exit_vf_mismatch(gt, vfid); 1382 823 pf_exit_vf_wip(gt, vfid); 1383 824 } ··· 2023 1460 if (pf_exit_vf_pause_guc_done(gt, vfid)) 2024 1461 return true; 2025 1462 2026 - if (pf_exit_vf_pause_save_guc(gt, vfid)) 1463 + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WAIT_DATA)) { 1464 + xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid, 1465 + control_bit_to_string(XE_GT_SRIOV_STATE_SAVE_WAIT_DATA)); 1466 + return false; 1467 + } 1468 + 1469 + if (pf_handle_vf_save(gt, vfid)) 1470 + return true; 1471 + 1472 + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA)) { 1473 + xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid, 1474 + control_bit_to_string(XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA)); 1475 + return false; 1476 + } 1477 + 1478 + if (pf_handle_vf_restore(gt, vfid)) 2027 1479 return true; 2028 1480 2029 1481 if (pf_exit_vf_resume_send_resume(gt, vfid))
+10
drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h
··· 16 16 17 17 int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid); 18 18 int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid); 19 + bool xe_gt_sriov_pf_control_check_save_data_done(struct xe_gt *gt, unsigned int vfid); 20 + bool xe_gt_sriov_pf_control_check_save_failed(struct xe_gt *gt, unsigned int vfid); 21 + int xe_gt_sriov_pf_control_process_save_data(struct xe_gt *gt, unsigned int vfid); 22 + int xe_gt_sriov_pf_control_trigger_save_vf(struct xe_gt *gt, unsigned int vfid); 23 + int xe_gt_sriov_pf_control_finish_save_vf(struct xe_gt *gt, unsigned int vfid); 24 + int xe_gt_sriov_pf_control_restore_data_done(struct xe_gt *gt, unsigned int vfid); 25 + bool xe_gt_sriov_pf_control_check_restore_failed(struct xe_gt *gt, unsigned int vfid); 26 + int xe_gt_sriov_pf_control_process_restore_data(struct xe_gt *gt, unsigned int vfid); 27 + int xe_gt_sriov_pf_control_trigger_restore_vf(struct xe_gt *gt, unsigned int vfid); 28 + int xe_gt_sriov_pf_control_finish_restore_vf(struct xe_gt *gt, unsigned int vfid); 19 29 int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid); 20 30 int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid); 21 31 int xe_gt_sriov_pf_control_sync_flr(struct xe_gt *gt, unsigned int vfid, bool sync);
+30 -4
drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h
··· 28 28 * @XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE: indicates that the PF is about to send a PAUSE command. 29 29 * @XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC: indicates that the PF awaits for a response from the GuC. 30 30 * @XE_GT_SRIOV_STATE_PAUSE_GUC_DONE: indicates that the PF has received a response from the GuC. 31 - * @XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC: indicates that the PF needs to save the VF GuC state. 32 31 * @XE_GT_SRIOV_STATE_PAUSE_FAILED: indicates that a VF pause operation has failed. 33 32 * @XE_GT_SRIOV_STATE_PAUSED: indicates that the VF is paused. 33 + * @XE_GT_SRIOV_STATE_SAVE_WIP: indicates that VF save operation is in progress. 34 + * @XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA: indicates that VF migration data is being produced. 35 + * @XE_GT_SRIOV_STATE_SAVE_WAIT_DATA: indicates that PF awaits for space in migration data ring. 36 + * @XE_GT_SRIOV_STATE_SAVE_DATA_DONE: indicates that all migration data was produced by Xe. 37 + * @XE_GT_SRIOV_STATE_SAVE_FAILED: indicates that VF save operation has failed. 38 + * @XE_GT_SRIOV_STATE_SAVED: indicates that VF data is saved. 39 + * @XE_GT_SRIOV_STATE_RESTORE_WIP: indicates that VF restore operation is in progress. 40 + * @XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA: indicates that VF migration data is being consumed. 41 + * @XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA: indicates that PF awaits for data in migration data ring. 42 + * @XE_GT_SRIOV_STATE_RESTORE_DATA_DONE: indicates that all migration data was produced by the user. 43 + * @XE_GT_SRIOV_STATE_RESTORE_FAILED: indicates that VF restore operation has failed. 44 + * @XE_GT_SRIOV_STATE_RESTORED: indicates that VF data is restored. 34 45 * @XE_GT_SRIOV_STATE_RESUME_WIP: indicates the a VF resume operation is in progress. 35 46 * @XE_GT_SRIOV_STATE_RESUME_SEND_RESUME: indicates that the PF is about to send RESUME command. 36 47 * @XE_GT_SRIOV_STATE_RESUME_FAILED: indicates that a VF resume operation has failed. ··· 70 59 XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE, 71 60 XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC, 72 61 XE_GT_SRIOV_STATE_PAUSE_GUC_DONE, 73 - XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC, 74 62 XE_GT_SRIOV_STATE_PAUSE_FAILED, 75 63 XE_GT_SRIOV_STATE_PAUSED, 64 + 65 + XE_GT_SRIOV_STATE_SAVE_WIP, 66 + XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA, 67 + XE_GT_SRIOV_STATE_SAVE_WAIT_DATA, 68 + XE_GT_SRIOV_STATE_SAVE_DATA_DONE, 69 + XE_GT_SRIOV_STATE_SAVE_FAILED, 70 + XE_GT_SRIOV_STATE_SAVED, 71 + 72 + XE_GT_SRIOV_STATE_RESTORE_WIP, 73 + XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA, 74 + XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA, 75 + XE_GT_SRIOV_STATE_RESTORE_DATA_DONE, 76 + XE_GT_SRIOV_STATE_RESTORE_FAILED, 77 + XE_GT_SRIOV_STATE_RESTORED, 76 78 77 79 XE_GT_SRIOV_STATE_RESUME_WIP, 78 80 XE_GT_SRIOV_STATE_RESUME_SEND_RESUME, ··· 97 73 XE_GT_SRIOV_STATE_STOP_FAILED, 98 74 XE_GT_SRIOV_STATE_STOPPED, 99 75 100 - XE_GT_SRIOV_STATE_MISMATCH = BITS_PER_LONG - 1, 76 + XE_GT_SRIOV_STATE_MISMATCH, /* always keep as last */ 101 77 }; 78 + 79 + #define XE_GT_SRIOV_NUM_STATES (XE_GT_SRIOV_STATE_MISMATCH + 1) 102 80 103 81 /** 104 82 * struct xe_gt_sriov_control_state - GT-level per-VF control state. ··· 109 83 */ 110 84 struct xe_gt_sriov_control_state { 111 85 /** @state: VF state bits */ 112 - unsigned long state; 86 + DECLARE_BITMAP(state, XE_GT_SRIOV_NUM_STATES); 113 87 114 88 /** @done: completion of async operations */ 115 89 struct completion done;
-47
drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
··· 327 327 { "stop", xe_gt_sriov_pf_control_stop_vf }, 328 328 { "pause", xe_gt_sriov_pf_control_pause_vf }, 329 329 { "resume", xe_gt_sriov_pf_control_resume_vf }, 330 - #ifdef CONFIG_DRM_XE_DEBUG_SRIOV 331 - { "restore!", xe_gt_sriov_pf_migration_restore_guc_state }, 332 - #endif 333 330 }; 334 331 335 332 static ssize_t control_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) ··· 387 390 .open = simple_open, 388 391 .write = control_write, 389 392 .read = control_read, 390 - .llseek = default_llseek, 391 - }; 392 - 393 - /* 394 - * /sys/kernel/debug/dri/BDF/ 395 - * ├── sriov 396 - * : ├── vf1 397 - * : ├── tile0 398 - * : ├── gt0 399 - * : ├── guc_state 400 - */ 401 - 402 - static ssize_t guc_state_read(struct file *file, char __user *buf, 403 - size_t count, loff_t *pos) 404 - { 405 - struct dentry *dent = file_dentry(file); 406 - struct dentry *parent = dent->d_parent; 407 - struct xe_gt *gt = extract_gt(parent); 408 - unsigned int vfid = extract_vfid(parent); 409 - 410 - return xe_gt_sriov_pf_migration_read_guc_state(gt, vfid, buf, count, pos); 411 - } 412 - 413 - static ssize_t guc_state_write(struct file *file, const char __user *buf, 414 - size_t count, loff_t *pos) 415 - { 416 - struct dentry *dent = file_dentry(file); 417 - struct dentry *parent = dent->d_parent; 418 - struct xe_gt *gt = extract_gt(parent); 419 - unsigned int vfid = extract_vfid(parent); 420 - 421 - if (*pos) 422 - return -EINVAL; 423 - 424 - return xe_gt_sriov_pf_migration_write_guc_state(gt, vfid, buf, count); 425 - } 426 - 427 - static const struct file_operations guc_state_ops = { 428 - .owner = THIS_MODULE, 429 - .read = guc_state_read, 430 - .write = guc_state_write, 431 393 .llseek = default_llseek, 432 394 }; 433 395 ··· 524 568 525 569 /* for testing/debugging purposes only! */ 526 570 if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) { 527 - debugfs_create_file("guc_state", 528 - IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) ? 0600 : 0400, 529 - dent, NULL, &guc_state_ops); 530 571 debugfs_create_file("config_blob", 531 572 IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) ? 0600 : 0400, 532 573 dent, NULL, &config_blob_ops);
+871 -241
drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
··· 5 5 6 6 #include <drm/drm_managed.h> 7 7 8 + #include "regs/xe_guc_regs.h" 9 + 8 10 #include "abi/guc_actions_sriov_abi.h" 9 11 #include "xe_bo.h" 12 + #include "xe_ggtt.h" 13 + #include "xe_gt.h" 14 + #include "xe_gt_sriov_pf.h" 15 + #include "xe_gt_sriov_pf_config.h" 16 + #include "xe_gt_sriov_pf_control.h" 10 17 #include "xe_gt_sriov_pf_helpers.h" 11 18 #include "xe_gt_sriov_pf_migration.h" 12 19 #include "xe_gt_sriov_printk.h" 13 - #include "xe_guc.h" 20 + #include "xe_guc_buf.h" 14 21 #include "xe_guc_ct.h" 22 + #include "xe_migrate.h" 23 + #include "xe_mmio.h" 15 24 #include "xe_sriov.h" 25 + #include "xe_sriov_packet.h" 26 + #include "xe_sriov_packet_types.h" 27 + #include "xe_sriov_pf_migration.h" 28 + 29 + #define XE_GT_SRIOV_PF_MIGRATION_RING_SIZE 5 30 + 31 + static struct xe_gt_sriov_migration_data *pf_pick_gt_migration(struct xe_gt *gt, unsigned int vfid) 32 + { 33 + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 34 + xe_gt_assert(gt, vfid != PFID); 35 + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); 36 + 37 + return &gt->sriov.pf.vfs[vfid].migration; 38 + } 39 + 40 + static void pf_dump_mig_data(struct xe_gt *gt, unsigned int vfid, 41 + struct xe_sriov_packet *data, 42 + const char *what) 43 + { 44 + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) { 45 + struct drm_printer p = xe_gt_dbg_printer(gt); 46 + 47 + drm_printf(&p, "VF%u %s (%llu bytes)\n", vfid, what, data->hdr.size); 48 + drm_print_hex_dump(&p, "mig_hdr: ", (void *)&data->hdr, sizeof(data->hdr)); 49 + drm_print_hex_dump(&p, "mig_data: ", data->vaddr, min(SZ_64, data->hdr.size)); 50 + } 51 + } 52 + 53 + static ssize_t pf_migration_ggtt_size(struct xe_gt *gt, unsigned int vfid) 54 + { 55 + if (!xe_gt_is_main_type(gt)) 56 + return 0; 57 + 58 + return xe_gt_sriov_pf_config_ggtt_save(gt, vfid, NULL, 0); 59 + } 60 + 61 + static int pf_save_vf_ggtt_mig_data(struct xe_gt *gt, unsigned int vfid) 62 + { 63 + struct xe_sriov_packet *data; 64 + size_t size; 65 + int ret; 66 + 67 + size = pf_migration_ggtt_size(gt, vfid); 68 + xe_gt_assert(gt, size); 69 + 70 + data = xe_sriov_packet_alloc(gt_to_xe(gt)); 71 + if (!data) 72 + return -ENOMEM; 73 + 74 + ret = xe_sriov_packet_init(data, gt->tile->id, gt->info.id, 75 + XE_SRIOV_PACKET_TYPE_GGTT, 0, size); 76 + if (ret) 77 + goto fail; 78 + 79 + ret = xe_gt_sriov_pf_config_ggtt_save(gt, vfid, data->vaddr, size); 80 + if (ret) 81 + goto fail; 82 + 83 + pf_dump_mig_data(gt, vfid, data, "GGTT data save"); 84 + 85 + ret = xe_gt_sriov_pf_migration_save_produce(gt, vfid, data); 86 + if (ret) 87 + goto fail; 88 + 89 + return 0; 90 + 91 + fail: 92 + xe_sriov_packet_free(data); 93 + xe_gt_sriov_err(gt, "Failed to save VF%u GGTT data (%pe)\n", vfid, ERR_PTR(ret)); 94 + return ret; 95 + } 96 + 97 + static int pf_restore_vf_ggtt_mig_data(struct xe_gt *gt, unsigned int vfid, 98 + struct xe_sriov_packet *data) 99 + { 100 + int ret; 101 + 102 + pf_dump_mig_data(gt, vfid, data, "GGTT data restore"); 103 + 104 + ret = xe_gt_sriov_pf_config_ggtt_restore(gt, vfid, data->vaddr, data->hdr.size); 105 + if (ret) { 106 + xe_gt_sriov_err(gt, "Failed to restore VF%u GGTT data (%pe)\n", 107 + vfid, ERR_PTR(ret)); 108 + return ret; 109 + } 110 + 111 + return 0; 112 + } 113 + 114 + /** 115 + * xe_gt_sriov_pf_migration_ggtt_save() - Save VF GGTT migration data. 116 + * @gt: the &xe_gt 117 + * @vfid: the VF identifier (can't be 0) 118 + * 119 + * This function is for PF only. 120 + * 121 + * Return: 0 on success or a negative error code on failure. 122 + */ 123 + int xe_gt_sriov_pf_migration_ggtt_save(struct xe_gt *gt, unsigned int vfid) 124 + { 125 + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 126 + xe_gt_assert(gt, vfid != PFID); 127 + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); 128 + 129 + return pf_save_vf_ggtt_mig_data(gt, vfid); 130 + } 131 + 132 + /** 133 + * xe_gt_sriov_pf_migration_ggtt_restore() - Restore VF GGTT migration data. 134 + * @gt: the &xe_gt 135 + * @vfid: the VF identifier (can't be 0) 136 + * @data: the &xe_sriov_packet containing migration data 137 + * 138 + * This function is for PF only. 139 + * 140 + * Return: 0 on success or a negative error code on failure. 141 + */ 142 + int xe_gt_sriov_pf_migration_ggtt_restore(struct xe_gt *gt, unsigned int vfid, 143 + struct xe_sriov_packet *data) 144 + { 145 + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 146 + xe_gt_assert(gt, vfid != PFID); 147 + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); 148 + 149 + return pf_restore_vf_ggtt_mig_data(gt, vfid, data); 150 + } 16 151 17 152 /* Return: number of dwords saved/restored/required or a negative error code on failure */ 18 153 static int guc_action_vf_save_restore(struct xe_guc *guc, u32 vfid, u32 opcode, ··· 168 33 } 169 34 170 35 /* Return: size of the state in dwords or a negative error code on failure */ 171 - static int pf_send_guc_query_vf_state_size(struct xe_gt *gt, unsigned int vfid) 36 + static int pf_send_guc_query_vf_mig_data_size(struct xe_gt *gt, unsigned int vfid) 172 37 { 173 38 int ret; 174 39 ··· 177 42 } 178 43 179 44 /* Return: number of state dwords saved or a negative error code on failure */ 180 - static int pf_send_guc_save_vf_state(struct xe_gt *gt, unsigned int vfid, 181 - void *buff, size_t size) 45 + static int pf_send_guc_save_vf_mig_data(struct xe_gt *gt, unsigned int vfid, 46 + void *dst, size_t size) 182 47 { 183 48 const int ndwords = size / sizeof(u32); 184 - struct xe_tile *tile = gt_to_tile(gt); 185 - struct xe_device *xe = tile_to_xe(tile); 186 49 struct xe_guc *guc = &gt->uc.guc; 187 - struct xe_bo *bo; 50 + CLASS(xe_guc_buf, buf)(&guc->buf, ndwords); 188 51 int ret; 189 52 190 53 xe_gt_assert(gt, size % sizeof(u32) == 0); 191 54 xe_gt_assert(gt, size == ndwords * sizeof(u32)); 192 55 193 - bo = xe_bo_create_pin_map_novm(xe, tile, 194 - ALIGN(size, PAGE_SIZE), 195 - ttm_bo_type_kernel, 196 - XE_BO_FLAG_SYSTEM | 197 - XE_BO_FLAG_GGTT | 198 - XE_BO_FLAG_GGTT_INVALIDATE, false); 199 - if (IS_ERR(bo)) 200 - return PTR_ERR(bo); 56 + if (!xe_guc_buf_is_valid(buf)) 57 + return -ENOBUFS; 58 + 59 + /* FW expects this buffer to be zero-initialized */ 60 + memset(xe_guc_buf_cpu_ptr(buf), 0, size); 201 61 202 62 ret = guc_action_vf_save_restore(guc, vfid, GUC_PF_OPCODE_VF_SAVE, 203 - xe_bo_ggtt_addr(bo), ndwords); 63 + xe_guc_buf_flush(buf), ndwords); 204 64 if (!ret) 205 65 ret = -ENODATA; 206 66 else if (ret > ndwords) 207 67 ret = -EPROTO; 208 68 else if (ret > 0) 209 - xe_map_memcpy_from(xe, buff, &bo->vmap, 0, ret * sizeof(u32)); 69 + memcpy(dst, xe_guc_buf_sync_read(buf), ret * sizeof(u32)); 210 70 211 - xe_bo_unpin_map_no_vm(bo); 212 71 return ret; 213 72 } 214 73 215 74 /* Return: number of state dwords restored or a negative error code on failure */ 216 - static int pf_send_guc_restore_vf_state(struct xe_gt *gt, unsigned int vfid, 217 - const void *buff, size_t size) 75 + static int pf_send_guc_restore_vf_mig_data(struct xe_gt *gt, unsigned int vfid, 76 + const void *src, size_t size) 218 77 { 219 78 const int ndwords = size / sizeof(u32); 220 - struct xe_tile *tile = gt_to_tile(gt); 221 - struct xe_device *xe = tile_to_xe(tile); 222 79 struct xe_guc *guc = &gt->uc.guc; 223 - struct xe_bo *bo; 80 + CLASS(xe_guc_buf_from_data, buf)(&guc->buf, src, size); 224 81 int ret; 225 82 226 83 xe_gt_assert(gt, size % sizeof(u32) == 0); 227 84 xe_gt_assert(gt, size == ndwords * sizeof(u32)); 228 85 229 - bo = xe_bo_create_pin_map_novm(xe, tile, 230 - ALIGN(size, PAGE_SIZE), 231 - ttm_bo_type_kernel, 232 - XE_BO_FLAG_SYSTEM | 233 - XE_BO_FLAG_GGTT | 234 - XE_BO_FLAG_GGTT_INVALIDATE, false); 235 - if (IS_ERR(bo)) 236 - return PTR_ERR(bo); 237 - 238 - xe_map_memcpy_to(xe, &bo->vmap, 0, buff, size); 86 + if (!xe_guc_buf_is_valid(buf)) 87 + return -ENOBUFS; 239 88 240 89 ret = guc_action_vf_save_restore(guc, vfid, GUC_PF_OPCODE_VF_RESTORE, 241 - xe_bo_ggtt_addr(bo), ndwords); 90 + xe_guc_buf_flush(buf), ndwords); 242 91 if (!ret) 243 92 ret = -ENODATA; 244 93 else if (ret > ndwords) 245 94 ret = -EPROTO; 246 95 247 - xe_bo_unpin_map_no_vm(bo); 248 96 return ret; 249 97 } 250 98 251 99 static bool pf_migration_supported(struct xe_gt *gt) 252 100 { 253 - xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 254 - return gt->sriov.pf.migration.supported; 101 + return xe_sriov_pf_migration_supported(gt_to_xe(gt)); 255 102 } 256 103 257 - static struct mutex *pf_migration_mutex(struct xe_gt *gt) 104 + static int pf_save_vf_guc_mig_data(struct xe_gt *gt, unsigned int vfid) 258 105 { 259 - xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 260 - return &gt->sriov.pf.migration.snapshot_lock; 261 - } 262 - 263 - static struct xe_gt_sriov_state_snapshot *pf_pick_vf_snapshot(struct xe_gt *gt, 264 - unsigned int vfid) 265 - { 266 - xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 267 - xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); 268 - lockdep_assert_held(pf_migration_mutex(gt)); 269 - 270 - return &gt->sriov.pf.vfs[vfid].snapshot; 271 - } 272 - 273 - static unsigned int pf_snapshot_index(struct xe_gt *gt, struct xe_gt_sriov_state_snapshot *snapshot) 274 - { 275 - return container_of(snapshot, struct xe_gt_sriov_metadata, snapshot) - gt->sriov.pf.vfs; 276 - } 277 - 278 - static void pf_free_guc_state(struct xe_gt *gt, struct xe_gt_sriov_state_snapshot *snapshot) 279 - { 280 - struct xe_device *xe = gt_to_xe(gt); 281 - 282 - drmm_kfree(&xe->drm, snapshot->guc.buff); 283 - snapshot->guc.buff = NULL; 284 - snapshot->guc.size = 0; 285 - } 286 - 287 - static int pf_alloc_guc_state(struct xe_gt *gt, 288 - struct xe_gt_sriov_state_snapshot *snapshot, 289 - size_t size) 290 - { 291 - struct xe_device *xe = gt_to_xe(gt); 292 - void *p; 293 - 294 - pf_free_guc_state(gt, snapshot); 295 - 296 - if (!size) 297 - return -ENODATA; 298 - 299 - if (size % sizeof(u32)) 300 - return -EINVAL; 301 - 302 - if (size > SZ_2M) 303 - return -EFBIG; 304 - 305 - p = drmm_kzalloc(&xe->drm, size, GFP_KERNEL); 306 - if (!p) 307 - return -ENOMEM; 308 - 309 - snapshot->guc.buff = p; 310 - snapshot->guc.size = size; 311 - return 0; 312 - } 313 - 314 - static void pf_dump_guc_state(struct xe_gt *gt, struct xe_gt_sriov_state_snapshot *snapshot) 315 - { 316 - if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) { 317 - unsigned int vfid __maybe_unused = pf_snapshot_index(gt, snapshot); 318 - 319 - xe_gt_sriov_dbg_verbose(gt, "VF%u GuC state is %zu dwords:\n", 320 - vfid, snapshot->guc.size / sizeof(u32)); 321 - print_hex_dump_bytes("state: ", DUMP_PREFIX_OFFSET, 322 - snapshot->guc.buff, min(SZ_64, snapshot->guc.size)); 323 - } 324 - } 325 - 326 - static int pf_save_vf_guc_state(struct xe_gt *gt, unsigned int vfid) 327 - { 328 - struct xe_gt_sriov_state_snapshot *snapshot = pf_pick_vf_snapshot(gt, vfid); 106 + struct xe_sriov_packet *data; 329 107 size_t size; 330 108 int ret; 331 109 332 - ret = pf_send_guc_query_vf_state_size(gt, vfid); 110 + ret = pf_send_guc_query_vf_mig_data_size(gt, vfid); 333 111 if (ret < 0) 334 112 goto fail; 113 + 335 114 size = ret * sizeof(u32); 336 - xe_gt_sriov_dbg_verbose(gt, "VF%u state size is %d dwords (%zu bytes)\n", vfid, ret, size); 337 115 338 - ret = pf_alloc_guc_state(gt, snapshot, size); 339 - if (ret < 0) 116 + data = xe_sriov_packet_alloc(gt_to_xe(gt)); 117 + if (!data) { 118 + ret = -ENOMEM; 340 119 goto fail; 120 + } 341 121 342 - ret = pf_send_guc_save_vf_state(gt, vfid, snapshot->guc.buff, size); 122 + ret = xe_sriov_packet_init(data, gt->tile->id, gt->info.id, 123 + XE_SRIOV_PACKET_TYPE_GUC, 0, size); 124 + if (ret) 125 + goto fail_free; 126 + 127 + ret = pf_send_guc_save_vf_mig_data(gt, vfid, data->vaddr, size); 343 128 if (ret < 0) 344 - goto fail; 129 + goto fail_free; 345 130 size = ret * sizeof(u32); 346 131 xe_gt_assert(gt, size); 347 - xe_gt_assert(gt, size <= snapshot->guc.size); 348 - snapshot->guc.size = size; 132 + xe_gt_assert(gt, size <= data->hdr.size); 133 + data->hdr.size = size; 134 + data->remaining = size; 349 135 350 - pf_dump_guc_state(gt, snapshot); 136 + pf_dump_mig_data(gt, vfid, data, "GuC data save"); 137 + 138 + ret = xe_gt_sriov_pf_migration_save_produce(gt, vfid, data); 139 + if (ret) 140 + goto fail_free; 141 + 351 142 return 0; 352 143 144 + fail_free: 145 + xe_sriov_packet_free(data); 353 146 fail: 354 - xe_gt_sriov_dbg(gt, "Unable to save VF%u state (%pe)\n", vfid, ERR_PTR(ret)); 355 - pf_free_guc_state(gt, snapshot); 147 + xe_gt_sriov_err(gt, "Failed to save VF%u GuC data (%pe)\n", 148 + vfid, ERR_PTR(ret)); 356 149 return ret; 357 150 } 358 151 152 + static ssize_t pf_migration_guc_size(struct xe_gt *gt, unsigned int vfid) 153 + { 154 + ssize_t size; 155 + 156 + if (!pf_migration_supported(gt)) 157 + return -ENOPKG; 158 + 159 + size = pf_send_guc_query_vf_mig_data_size(gt, vfid); 160 + if (size >= 0) 161 + size *= sizeof(u32); 162 + 163 + return size; 164 + } 165 + 359 166 /** 360 - * xe_gt_sriov_pf_migration_save_guc_state() - Take a GuC VF state snapshot. 167 + * xe_gt_sriov_pf_migration_guc_save() - Save VF GuC migration data. 361 168 * @gt: the &xe_gt 362 169 * @vfid: the VF identifier 363 170 * ··· 307 230 * 308 231 * Return: 0 on success or a negative error code on failure. 309 232 */ 310 - int xe_gt_sriov_pf_migration_save_guc_state(struct xe_gt *gt, unsigned int vfid) 233 + int xe_gt_sriov_pf_migration_guc_save(struct xe_gt *gt, unsigned int vfid) 311 234 { 312 - int err; 313 - 314 235 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 315 236 xe_gt_assert(gt, vfid != PFID); 316 237 xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); ··· 316 241 if (!pf_migration_supported(gt)) 317 242 return -ENOPKG; 318 243 319 - mutex_lock(pf_migration_mutex(gt)); 320 - err = pf_save_vf_guc_state(gt, vfid); 321 - mutex_unlock(pf_migration_mutex(gt)); 322 - 323 - return err; 244 + return pf_save_vf_guc_mig_data(gt, vfid); 324 245 } 325 246 326 - static int pf_restore_vf_guc_state(struct xe_gt *gt, unsigned int vfid) 247 + static int pf_restore_vf_guc_state(struct xe_gt *gt, unsigned int vfid, 248 + struct xe_sriov_packet *data) 327 249 { 328 - struct xe_gt_sriov_state_snapshot *snapshot = pf_pick_vf_snapshot(gt, vfid); 329 250 int ret; 330 251 331 - if (!snapshot->guc.size) 332 - return -ENODATA; 252 + xe_gt_assert(gt, data->hdr.size); 333 253 334 - xe_gt_sriov_dbg_verbose(gt, "restoring %zu dwords of VF%u GuC state\n", 335 - snapshot->guc.size / sizeof(u32), vfid); 336 - ret = pf_send_guc_restore_vf_state(gt, vfid, snapshot->guc.buff, snapshot->guc.size); 254 + pf_dump_mig_data(gt, vfid, data, "GuC data restore"); 255 + 256 + ret = pf_send_guc_restore_vf_mig_data(gt, vfid, data->vaddr, data->hdr.size); 337 257 if (ret < 0) 338 258 goto fail; 339 259 340 - xe_gt_sriov_dbg_verbose(gt, "restored %d dwords of VF%u GuC state\n", ret, vfid); 341 260 return 0; 342 261 343 262 fail: 344 - xe_gt_sriov_dbg(gt, "Failed to restore VF%u GuC state (%pe)\n", vfid, ERR_PTR(ret)); 263 + xe_gt_sriov_err(gt, "Failed to restore VF%u GuC data (%pe)\n", 264 + vfid, ERR_PTR(ret)); 345 265 return ret; 346 266 } 347 267 348 268 /** 349 - * xe_gt_sriov_pf_migration_restore_guc_state() - Restore a GuC VF state. 269 + * xe_gt_sriov_pf_migration_guc_restore() - Restore VF GuC migration data. 350 270 * @gt: the &xe_gt 351 271 * @vfid: the VF identifier 272 + * @data: the &xe_sriov_packet containing migration data 352 273 * 353 274 * This function is for PF only. 354 275 * 355 276 * Return: 0 on success or a negative error code on failure. 356 277 */ 357 - int xe_gt_sriov_pf_migration_restore_guc_state(struct xe_gt *gt, unsigned int vfid) 278 + int xe_gt_sriov_pf_migration_guc_restore(struct xe_gt *gt, unsigned int vfid, 279 + struct xe_sriov_packet *data) 280 + { 281 + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 282 + xe_gt_assert(gt, vfid != PFID); 283 + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); 284 + 285 + if (!pf_migration_supported(gt)) 286 + return -ENOPKG; 287 + 288 + return pf_restore_vf_guc_state(gt, vfid, data); 289 + } 290 + 291 + static ssize_t pf_migration_mmio_size(struct xe_gt *gt, unsigned int vfid) 292 + { 293 + if (xe_gt_is_media_type(gt)) 294 + return MED_VF_SW_FLAG_COUNT * sizeof(u32); 295 + else 296 + return VF_SW_FLAG_COUNT * sizeof(u32); 297 + } 298 + 299 + static int pf_migration_mmio_save(struct xe_gt *gt, unsigned int vfid, void *buf, size_t size) 300 + { 301 + struct xe_mmio mmio; 302 + u32 *regs = buf; 303 + int n; 304 + 305 + if (size != pf_migration_mmio_size(gt, vfid)) 306 + return -EINVAL; 307 + 308 + xe_mmio_init_vf_view(&mmio, &gt->mmio, vfid); 309 + 310 + if (xe_gt_is_media_type(gt)) 311 + for (n = 0; n < MED_VF_SW_FLAG_COUNT; n++) 312 + regs[n] = xe_mmio_read32(&gt->mmio, MED_VF_SW_FLAG(n)); 313 + else 314 + for (n = 0; n < VF_SW_FLAG_COUNT; n++) 315 + regs[n] = xe_mmio_read32(&gt->mmio, VF_SW_FLAG(n)); 316 + 317 + return 0; 318 + } 319 + 320 + static int pf_migration_mmio_restore(struct xe_gt *gt, unsigned int vfid, 321 + const void *buf, size_t size) 322 + { 323 + const u32 *regs = buf; 324 + struct xe_mmio mmio; 325 + int n; 326 + 327 + if (size != pf_migration_mmio_size(gt, vfid)) 328 + return -EINVAL; 329 + 330 + xe_mmio_init_vf_view(&mmio, &gt->mmio, vfid); 331 + 332 + if (xe_gt_is_media_type(gt)) 333 + for (n = 0; n < MED_VF_SW_FLAG_COUNT; n++) 334 + xe_mmio_write32(&gt->mmio, MED_VF_SW_FLAG(n), regs[n]); 335 + else 336 + for (n = 0; n < VF_SW_FLAG_COUNT; n++) 337 + xe_mmio_write32(&gt->mmio, VF_SW_FLAG(n), regs[n]); 338 + 339 + return 0; 340 + } 341 + 342 + static int pf_save_vf_mmio_mig_data(struct xe_gt *gt, unsigned int vfid) 343 + { 344 + struct xe_sriov_packet *data; 345 + size_t size; 346 + int ret; 347 + 348 + size = pf_migration_mmio_size(gt, vfid); 349 + xe_gt_assert(gt, size); 350 + 351 + data = xe_sriov_packet_alloc(gt_to_xe(gt)); 352 + if (!data) 353 + return -ENOMEM; 354 + 355 + ret = xe_sriov_packet_init(data, gt->tile->id, gt->info.id, 356 + XE_SRIOV_PACKET_TYPE_MMIO, 0, size); 357 + if (ret) 358 + goto fail; 359 + 360 + ret = pf_migration_mmio_save(gt, vfid, data->vaddr, size); 361 + if (ret) 362 + goto fail; 363 + 364 + pf_dump_mig_data(gt, vfid, data, "MMIO data save"); 365 + 366 + ret = xe_gt_sriov_pf_migration_save_produce(gt, vfid, data); 367 + if (ret) 368 + goto fail; 369 + 370 + return 0; 371 + 372 + fail: 373 + xe_sriov_packet_free(data); 374 + xe_gt_sriov_err(gt, "Failed to save VF%u MMIO data (%pe)\n", vfid, ERR_PTR(ret)); 375 + return ret; 376 + } 377 + 378 + static int pf_restore_vf_mmio_mig_data(struct xe_gt *gt, unsigned int vfid, 379 + struct xe_sriov_packet *data) 358 380 { 359 381 int ret; 360 382 361 - xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 362 - xe_gt_assert(gt, vfid != PFID); 363 - xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); 383 + pf_dump_mig_data(gt, vfid, data, "MMIO data restore"); 364 384 365 - if (!pf_migration_supported(gt)) 366 - return -ENOPKG; 385 + ret = pf_migration_mmio_restore(gt, vfid, data->vaddr, data->hdr.size); 386 + if (ret) { 387 + xe_gt_sriov_err(gt, "Failed to restore VF%u MMIO data (%pe)\n", 388 + vfid, ERR_PTR(ret)); 367 389 368 - mutex_lock(pf_migration_mutex(gt)); 369 - ret = pf_restore_vf_guc_state(gt, vfid); 370 - mutex_unlock(pf_migration_mutex(gt)); 371 - 372 - return ret; 373 - } 374 - 375 - #ifdef CONFIG_DEBUG_FS 376 - /** 377 - * xe_gt_sriov_pf_migration_read_guc_state() - Read a GuC VF state. 378 - * @gt: the &xe_gt 379 - * @vfid: the VF identifier 380 - * @buf: the user space buffer to read to 381 - * @count: the maximum number of bytes to read 382 - * @pos: the current position in the buffer 383 - * 384 - * This function is for PF only. 385 - * 386 - * This function reads up to @count bytes from the saved VF GuC state buffer 387 - * at offset @pos into the user space address starting at @buf. 388 - * 389 - * Return: the number of bytes read or a negative error code on failure. 390 - */ 391 - ssize_t xe_gt_sriov_pf_migration_read_guc_state(struct xe_gt *gt, unsigned int vfid, 392 - char __user *buf, size_t count, loff_t *pos) 393 - { 394 - struct xe_gt_sriov_state_snapshot *snapshot; 395 - ssize_t ret; 396 - 397 - xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 398 - xe_gt_assert(gt, vfid != PFID); 399 - xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); 400 - 401 - if (!pf_migration_supported(gt)) 402 - return -ENOPKG; 403 - 404 - mutex_lock(pf_migration_mutex(gt)); 405 - snapshot = pf_pick_vf_snapshot(gt, vfid); 406 - if (snapshot->guc.size) 407 - ret = simple_read_from_buffer(buf, count, pos, snapshot->guc.buff, 408 - snapshot->guc.size); 409 - else 410 - ret = -ENODATA; 411 - mutex_unlock(pf_migration_mutex(gt)); 412 - 413 - return ret; 414 - } 415 - 416 - /** 417 - * xe_gt_sriov_pf_migration_write_guc_state() - Write a GuC VF state. 418 - * @gt: the &xe_gt 419 - * @vfid: the VF identifier 420 - * @buf: the user space buffer with GuC VF state 421 - * @size: the size of GuC VF state (in bytes) 422 - * 423 - * This function is for PF only. 424 - * 425 - * This function reads @size bytes of the VF GuC state stored at user space 426 - * address @buf and writes it into a internal VF state buffer. 427 - * 428 - * Return: the number of bytes used or a negative error code on failure. 429 - */ 430 - ssize_t xe_gt_sriov_pf_migration_write_guc_state(struct xe_gt *gt, unsigned int vfid, 431 - const char __user *buf, size_t size) 432 - { 433 - struct xe_gt_sriov_state_snapshot *snapshot; 434 - loff_t pos = 0; 435 - ssize_t ret; 436 - 437 - xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 438 - xe_gt_assert(gt, vfid != PFID); 439 - xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); 440 - 441 - if (!pf_migration_supported(gt)) 442 - return -ENOPKG; 443 - 444 - mutex_lock(pf_migration_mutex(gt)); 445 - snapshot = pf_pick_vf_snapshot(gt, vfid); 446 - ret = pf_alloc_guc_state(gt, snapshot, size); 447 - if (!ret) { 448 - ret = simple_write_to_buffer(snapshot->guc.buff, size, &pos, buf, size); 449 - if (ret < 0) 450 - pf_free_guc_state(gt, snapshot); 451 - else 452 - pf_dump_guc_state(gt, snapshot); 390 + return ret; 453 391 } 454 - mutex_unlock(pf_migration_mutex(gt)); 392 + 393 + return 0; 394 + } 395 + 396 + /** 397 + * xe_gt_sriov_pf_migration_mmio_save() - Save VF MMIO migration data. 398 + * @gt: the &xe_gt 399 + * @vfid: the VF identifier (can't be 0) 400 + * 401 + * This function is for PF only. 402 + * 403 + * Return: 0 on success or a negative error code on failure. 404 + */ 405 + int xe_gt_sriov_pf_migration_mmio_save(struct xe_gt *gt, unsigned int vfid) 406 + { 407 + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 408 + xe_gt_assert(gt, vfid != PFID); 409 + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); 410 + 411 + return pf_save_vf_mmio_mig_data(gt, vfid); 412 + } 413 + 414 + /** 415 + * xe_gt_sriov_pf_migration_mmio_restore() - Restore VF MMIO migration data. 416 + * @gt: the &xe_gt 417 + * @vfid: the VF identifier (can't be 0) 418 + * @data: the &xe_sriov_packet containing migration data 419 + * 420 + * This function is for PF only. 421 + * 422 + * Return: 0 on success or a negative error code on failure. 423 + */ 424 + int xe_gt_sriov_pf_migration_mmio_restore(struct xe_gt *gt, unsigned int vfid, 425 + struct xe_sriov_packet *data) 426 + { 427 + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 428 + xe_gt_assert(gt, vfid != PFID); 429 + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); 430 + 431 + return pf_restore_vf_mmio_mig_data(gt, vfid, data); 432 + } 433 + 434 + static ssize_t pf_migration_vram_size(struct xe_gt *gt, unsigned int vfid) 435 + { 436 + if (!xe_gt_is_main_type(gt)) 437 + return 0; 438 + 439 + return xe_gt_sriov_pf_config_get_lmem(gt, vfid); 440 + } 441 + 442 + static struct dma_fence *__pf_save_restore_vram(struct xe_gt *gt, unsigned int vfid, 443 + struct xe_bo *vram, u64 vram_offset, 444 + struct xe_bo *sysmem, u64 sysmem_offset, 445 + size_t size, bool save) 446 + { 447 + struct dma_fence *ret = NULL; 448 + struct drm_exec exec; 449 + int err; 450 + 451 + drm_exec_init(&exec, 0, 0); 452 + drm_exec_until_all_locked(&exec) { 453 + err = drm_exec_lock_obj(&exec, &vram->ttm.base); 454 + drm_exec_retry_on_contention(&exec); 455 + if (err) { 456 + ret = ERR_PTR(err); 457 + goto err; 458 + } 459 + 460 + err = drm_exec_lock_obj(&exec, &sysmem->ttm.base); 461 + drm_exec_retry_on_contention(&exec); 462 + if (err) { 463 + ret = ERR_PTR(err); 464 + goto err; 465 + } 466 + } 467 + 468 + ret = xe_migrate_vram_copy_chunk(vram, vram_offset, sysmem, sysmem_offset, size, 469 + save ? XE_MIGRATE_COPY_TO_SRAM : XE_MIGRATE_COPY_TO_VRAM); 470 + 471 + err: 472 + drm_exec_fini(&exec); 455 473 456 474 return ret; 457 475 } 458 - #endif /* CONFIG_DEBUG_FS */ 459 476 460 - static bool pf_check_migration_support(struct xe_gt *gt) 477 + #define PF_VRAM_SAVE_RESTORE_TIMEOUT (5 * HZ) 478 + static int pf_save_vram_chunk(struct xe_gt *gt, unsigned int vfid, 479 + struct xe_bo *src_vram, u64 src_vram_offset, 480 + size_t size) 461 481 { 462 - /* GuC 70.25 with save/restore v2 is required */ 463 - xe_gt_assert(gt, GUC_FIRMWARE_VER(&gt->uc.guc) >= MAKE_GUC_VER(70, 25, 0)); 482 + struct xe_sriov_packet *data; 483 + struct dma_fence *fence; 484 + int ret; 464 485 465 - /* XXX: for now this is for feature enabling only */ 466 - return IS_ENABLED(CONFIG_DRM_XE_DEBUG); 486 + data = xe_sriov_packet_alloc(gt_to_xe(gt)); 487 + if (!data) 488 + return -ENOMEM; 489 + 490 + ret = xe_sriov_packet_init(data, gt->tile->id, gt->info.id, 491 + XE_SRIOV_PACKET_TYPE_VRAM, src_vram_offset, 492 + size); 493 + if (ret) 494 + goto fail; 495 + 496 + fence = __pf_save_restore_vram(gt, vfid, 497 + src_vram, src_vram_offset, 498 + data->bo, 0, size, true); 499 + 500 + ret = dma_fence_wait_timeout(fence, false, PF_VRAM_SAVE_RESTORE_TIMEOUT); 501 + dma_fence_put(fence); 502 + if (!ret) { 503 + ret = -ETIME; 504 + goto fail; 505 + } 506 + 507 + pf_dump_mig_data(gt, vfid, data, "VRAM data save"); 508 + 509 + ret = xe_gt_sriov_pf_migration_save_produce(gt, vfid, data); 510 + if (ret) 511 + goto fail; 512 + 513 + return 0; 514 + 515 + fail: 516 + xe_sriov_packet_free(data); 517 + return ret; 518 + } 519 + 520 + #define VF_VRAM_STATE_CHUNK_MAX_SIZE SZ_512M 521 + static int pf_save_vf_vram_mig_data(struct xe_gt *gt, unsigned int vfid) 522 + { 523 + struct xe_gt_sriov_migration_data *migration = pf_pick_gt_migration(gt, vfid); 524 + loff_t *offset = &migration->save.vram_offset; 525 + struct xe_bo *vram; 526 + size_t vram_size, chunk_size; 527 + int ret; 528 + 529 + vram = xe_gt_sriov_pf_config_get_lmem_obj(gt, vfid); 530 + if (!vram) 531 + return -ENXIO; 532 + 533 + vram_size = xe_bo_size(vram); 534 + 535 + xe_gt_assert(gt, *offset < vram_size); 536 + 537 + chunk_size = min(vram_size - *offset, VF_VRAM_STATE_CHUNK_MAX_SIZE); 538 + 539 + ret = pf_save_vram_chunk(gt, vfid, vram, *offset, chunk_size); 540 + if (ret) 541 + goto fail; 542 + 543 + *offset += chunk_size; 544 + 545 + xe_bo_put(vram); 546 + 547 + if (*offset < vram_size) 548 + return -EAGAIN; 549 + 550 + return 0; 551 + 552 + fail: 553 + xe_bo_put(vram); 554 + xe_gt_sriov_err(gt, "Failed to save VF%u VRAM data (%pe)\n", vfid, ERR_PTR(ret)); 555 + return ret; 556 + } 557 + 558 + static int pf_restore_vf_vram_mig_data(struct xe_gt *gt, unsigned int vfid, 559 + struct xe_sriov_packet *data) 560 + { 561 + u64 end = data->hdr.offset + data->hdr.size; 562 + struct dma_fence *fence; 563 + struct xe_bo *vram; 564 + size_t size; 565 + int ret = 0; 566 + 567 + vram = xe_gt_sriov_pf_config_get_lmem_obj(gt, vfid); 568 + if (!vram) 569 + return -ENXIO; 570 + 571 + size = xe_bo_size(vram); 572 + 573 + if (end > size || end < data->hdr.size) { 574 + ret = -EINVAL; 575 + goto err; 576 + } 577 + 578 + pf_dump_mig_data(gt, vfid, data, "VRAM data restore"); 579 + 580 + fence = __pf_save_restore_vram(gt, vfid, vram, data->hdr.offset, 581 + data->bo, 0, data->hdr.size, false); 582 + ret = dma_fence_wait_timeout(fence, false, PF_VRAM_SAVE_RESTORE_TIMEOUT); 583 + dma_fence_put(fence); 584 + if (!ret) { 585 + ret = -ETIME; 586 + goto err; 587 + } 588 + 589 + return 0; 590 + err: 591 + xe_bo_put(vram); 592 + xe_gt_sriov_err(gt, "Failed to restore VF%u VRAM data (%pe)\n", vfid, ERR_PTR(ret)); 593 + return ret; 594 + } 595 + 596 + /** 597 + * xe_gt_sriov_pf_migration_vram_save() - Save VF VRAM migration data. 598 + * @gt: the &xe_gt 599 + * @vfid: the VF identifier (can't be 0) 600 + * 601 + * This function is for PF only. 602 + * 603 + * Return: 0 on success or a negative error code on failure. 604 + */ 605 + int xe_gt_sriov_pf_migration_vram_save(struct xe_gt *gt, unsigned int vfid) 606 + { 607 + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 608 + xe_gt_assert(gt, vfid != PFID); 609 + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); 610 + 611 + return pf_save_vf_vram_mig_data(gt, vfid); 612 + } 613 + 614 + /** 615 + * xe_gt_sriov_pf_migration_vram_restore() - Restore VF VRAM migration data. 616 + * @gt: the &xe_gt 617 + * @vfid: the VF identifier (can't be 0) 618 + * @data: the &xe_sriov_packet containing migration data 619 + * 620 + * This function is for PF only. 621 + * 622 + * Return: 0 on success or a negative error code on failure. 623 + */ 624 + int xe_gt_sriov_pf_migration_vram_restore(struct xe_gt *gt, unsigned int vfid, 625 + struct xe_sriov_packet *data) 626 + { 627 + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 628 + xe_gt_assert(gt, vfid != PFID); 629 + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); 630 + 631 + return pf_restore_vf_vram_mig_data(gt, vfid, data); 632 + } 633 + 634 + /** 635 + * xe_gt_sriov_pf_migration_size() - Total size of migration data from all components within a GT. 636 + * @gt: the &xe_gt 637 + * @vfid: the VF identifier (can't be 0) 638 + * 639 + * This function is for PF only. 640 + * 641 + * Return: total migration data size in bytes or a negative error code on failure. 642 + */ 643 + ssize_t xe_gt_sriov_pf_migration_size(struct xe_gt *gt, unsigned int vfid) 644 + { 645 + ssize_t total = 0; 646 + ssize_t size; 647 + 648 + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 649 + xe_gt_assert(gt, vfid != PFID); 650 + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); 651 + 652 + size = pf_migration_guc_size(gt, vfid); 653 + if (size < 0) 654 + return size; 655 + if (size > 0) 656 + size += sizeof(struct xe_sriov_packet_hdr); 657 + total += size; 658 + 659 + size = pf_migration_ggtt_size(gt, vfid); 660 + if (size < 0) 661 + return size; 662 + if (size > 0) 663 + size += sizeof(struct xe_sriov_packet_hdr); 664 + total += size; 665 + 666 + size = pf_migration_mmio_size(gt, vfid); 667 + if (size < 0) 668 + return size; 669 + if (size > 0) 670 + size += sizeof(struct xe_sriov_packet_hdr); 671 + total += size; 672 + 673 + size = pf_migration_vram_size(gt, vfid); 674 + if (size < 0) 675 + return size; 676 + if (size > 0) 677 + size += sizeof(struct xe_sriov_packet_hdr); 678 + total += size; 679 + 680 + return total; 681 + } 682 + 683 + /** 684 + * xe_gt_sriov_pf_migration_ring_empty() - Check if a migration ring is empty. 685 + * @gt: the &xe_gt 686 + * @vfid: the VF identifier 687 + * 688 + * Return: true if the ring is empty, otherwise false. 689 + */ 690 + bool xe_gt_sriov_pf_migration_ring_empty(struct xe_gt *gt, unsigned int vfid) 691 + { 692 + return ptr_ring_empty(&pf_pick_gt_migration(gt, vfid)->ring); 693 + } 694 + 695 + /** 696 + * xe_gt_sriov_pf_migration_ring_full() - Check if a migration ring is full. 697 + * @gt: the &xe_gt 698 + * @vfid: the VF identifier 699 + * 700 + * Return: true if the ring is full, otherwise false. 701 + */ 702 + bool xe_gt_sriov_pf_migration_ring_full(struct xe_gt *gt, unsigned int vfid) 703 + { 704 + return ptr_ring_full(&pf_pick_gt_migration(gt, vfid)->ring); 705 + } 706 + 707 + /** 708 + * xe_gt_sriov_pf_migration_ring_free() - Consume and free all data in migration ring 709 + * @gt: the &xe_gt 710 + * @vfid: the VF identifier 711 + */ 712 + void xe_gt_sriov_pf_migration_ring_free(struct xe_gt *gt, unsigned int vfid) 713 + { 714 + struct xe_gt_sriov_migration_data *migration = pf_pick_gt_migration(gt, vfid); 715 + struct xe_sriov_packet *data; 716 + 717 + if (ptr_ring_empty(&migration->ring)) 718 + return; 719 + 720 + xe_gt_sriov_notice(gt, "VF%u unprocessed migration data left in the ring!\n", vfid); 721 + 722 + while ((data = ptr_ring_consume(&migration->ring))) 723 + xe_sriov_packet_free(data); 724 + } 725 + 726 + static void pf_migration_save_data_todo(struct xe_gt *gt, unsigned int vfid, 727 + enum xe_sriov_packet_type type) 728 + { 729 + set_bit(type, &pf_pick_gt_migration(gt, vfid)->save.data_remaining); 730 + } 731 + 732 + /** 733 + * xe_gt_sriov_pf_migration_save_init() - Initialize per-GT migration related data. 734 + * @gt: the &xe_gt 735 + * @vfid: the VF identifier (can't be 0) 736 + */ 737 + void xe_gt_sriov_pf_migration_save_init(struct xe_gt *gt, unsigned int vfid) 738 + { 739 + struct xe_gt_sriov_migration_data *migration = pf_pick_gt_migration(gt, vfid); 740 + 741 + migration->save.data_remaining = 0; 742 + migration->save.vram_offset = 0; 743 + 744 + xe_gt_assert(gt, pf_migration_guc_size(gt, vfid) > 0); 745 + pf_migration_save_data_todo(gt, vfid, XE_SRIOV_PACKET_TYPE_GUC); 746 + 747 + if (pf_migration_ggtt_size(gt, vfid) > 0) 748 + pf_migration_save_data_todo(gt, vfid, XE_SRIOV_PACKET_TYPE_GGTT); 749 + 750 + xe_gt_assert(gt, pf_migration_mmio_size(gt, vfid) > 0); 751 + pf_migration_save_data_todo(gt, vfid, XE_SRIOV_PACKET_TYPE_MMIO); 752 + 753 + if (pf_migration_vram_size(gt, vfid) > 0) 754 + pf_migration_save_data_todo(gt, vfid, XE_SRIOV_PACKET_TYPE_VRAM); 755 + } 756 + 757 + /** 758 + * xe_gt_sriov_pf_migration_save_data_pending() - Check if migration data type needs to be saved. 759 + * @gt: the &xe_gt 760 + * @vfid: the VF identifier (can't be 0) 761 + * @type: the &xe_sriov_packet_type of data to be checked 762 + * 763 + * Return: true if the data needs saving, otherwise false. 764 + */ 765 + bool xe_gt_sriov_pf_migration_save_data_pending(struct xe_gt *gt, unsigned int vfid, 766 + enum xe_sriov_packet_type type) 767 + { 768 + return test_bit(type, &pf_pick_gt_migration(gt, vfid)->save.data_remaining); 769 + } 770 + 771 + /** 772 + * xe_gt_sriov_pf_migration_save_data_complete() - Complete migration data type save. 773 + * @gt: the &xe_gt 774 + * @vfid: the VF identifier (can't be 0) 775 + * @type: the &xe_sriov_packet_type to be marked as completed. 776 + */ 777 + void xe_gt_sriov_pf_migration_save_data_complete(struct xe_gt *gt, unsigned int vfid, 778 + enum xe_sriov_packet_type type) 779 + { 780 + clear_bit(type, &pf_pick_gt_migration(gt, vfid)->save.data_remaining); 781 + } 782 + 783 + /** 784 + * xe_gt_sriov_pf_migration_save_produce() - Add VF save data packet to migration ring. 785 + * @gt: the &xe_gt 786 + * @vfid: the VF identifier 787 + * @data: the &xe_sriov_packet 788 + * 789 + * Called by the save migration data producer (PF SR-IOV Control worker) when 790 + * processing migration data. 791 + * Wakes up the save migration data consumer (userspace), that is potentially 792 + * waiting for data when the ring was empty. 793 + * 794 + * Return: 0 on success or a negative error code on failure. 795 + */ 796 + int xe_gt_sriov_pf_migration_save_produce(struct xe_gt *gt, unsigned int vfid, 797 + struct xe_sriov_packet *data) 798 + { 799 + int ret; 800 + 801 + ret = ptr_ring_produce(&pf_pick_gt_migration(gt, vfid)->ring, data); 802 + if (ret) 803 + return ret; 804 + 805 + wake_up_all(xe_sriov_pf_migration_waitqueue(gt_to_xe(gt), vfid)); 806 + 807 + return 0; 808 + } 809 + 810 + /** 811 + * xe_gt_sriov_pf_migration_restore_consume() - Get VF restore data packet from migration ring. 812 + * @gt: the &xe_gt 813 + * @vfid: the VF identifier 814 + * 815 + * Called by the restore migration data consumer (PF SR-IOV Control worker) when 816 + * processing migration data. 817 + * Wakes up the restore migration data producer (userspace), that is 818 + * potentially waiting to add more data when the ring is full. 819 + * 820 + * Return: Pointer to &xe_sriov_packet on success, 821 + * NULL if ring is empty. 822 + */ 823 + struct xe_sriov_packet * 824 + xe_gt_sriov_pf_migration_restore_consume(struct xe_gt *gt, unsigned int vfid) 825 + { 826 + struct xe_gt_sriov_migration_data *migration = pf_pick_gt_migration(gt, vfid); 827 + struct wait_queue_head *wq = xe_sriov_pf_migration_waitqueue(gt_to_xe(gt), vfid); 828 + struct xe_sriov_packet *data; 829 + 830 + data = ptr_ring_consume(&migration->ring); 831 + if (data) 832 + wake_up_all(wq); 833 + 834 + return data; 835 + } 836 + 837 + static bool pf_restore_data_ready(struct xe_gt *gt, unsigned int vfid) 838 + { 839 + if (xe_gt_sriov_pf_control_check_restore_failed(gt, vfid) || 840 + !ptr_ring_full(&pf_pick_gt_migration(gt, vfid)->ring)) 841 + return true; 842 + 843 + return false; 844 + } 845 + 846 + /** 847 + * xe_gt_sriov_pf_migration_restore_produce() - Add VF restore data packet to migration ring. 848 + * @gt: the &xe_gt 849 + * @vfid: the VF identifier 850 + * @data: the &xe_sriov_packet 851 + * 852 + * Called by the restore migration data producer (userspace) when processing 853 + * migration data. 854 + * If the ring is full, waits until there is space. 855 + * Queues the restore migration data consumer (PF SR-IOV Control worker), that 856 + * is potentially waiting for data when the ring was empty. 857 + * 858 + * Return: 0 on success or a negative error code on failure. 859 + */ 860 + int xe_gt_sriov_pf_migration_restore_produce(struct xe_gt *gt, unsigned int vfid, 861 + struct xe_sriov_packet *data) 862 + { 863 + int ret; 864 + 865 + xe_gt_assert(gt, data->hdr.tile_id == gt->tile->id); 866 + xe_gt_assert(gt, data->hdr.gt_id == gt->info.id); 867 + 868 + for (;;) { 869 + if (xe_gt_sriov_pf_control_check_restore_failed(gt, vfid)) 870 + return -EIO; 871 + 872 + ret = ptr_ring_produce(&pf_pick_gt_migration(gt, vfid)->ring, data); 873 + if (!ret) 874 + break; 875 + 876 + ret = wait_event_interruptible(*xe_sriov_pf_migration_waitqueue(gt_to_xe(gt), vfid), 877 + pf_restore_data_ready(gt, vfid)); 878 + if (ret) 879 + return ret; 880 + } 881 + 882 + return xe_gt_sriov_pf_control_process_restore_data(gt, vfid); 883 + } 884 + 885 + /** 886 + * xe_gt_sriov_pf_migration_save_consume() - Get VF save data packet from migration ring. 887 + * @gt: the &xe_gt 888 + * @vfid: the VF identifier 889 + * 890 + * Called by the save migration data consumer (userspace) when 891 + * processing migration data. 892 + * Queues the save migration data producer (PF SR-IOV Control worker), that is 893 + * potentially waiting to add more data when the ring is full. 894 + * 895 + * Return: Pointer to &xe_sriov_packet on success, 896 + * NULL if ring is empty and there's no more data available, 897 + * ERR_PTR(-EAGAIN) if the ring is empty, but data is still produced. 898 + */ 899 + struct xe_sriov_packet * 900 + xe_gt_sriov_pf_migration_save_consume(struct xe_gt *gt, unsigned int vfid) 901 + { 902 + struct xe_gt_sriov_migration_data *migration = pf_pick_gt_migration(gt, vfid); 903 + struct xe_sriov_packet *data; 904 + int ret; 905 + 906 + data = ptr_ring_consume(&migration->ring); 907 + if (data) { 908 + ret = xe_gt_sriov_pf_control_process_save_data(gt, vfid); 909 + if (ret) { 910 + xe_sriov_packet_free(data); 911 + return ERR_PTR(ret); 912 + } 913 + 914 + return data; 915 + } 916 + 917 + if (xe_gt_sriov_pf_control_check_save_data_done(gt, vfid)) 918 + return NULL; 919 + 920 + if (xe_gt_sriov_pf_control_check_save_failed(gt, vfid)) 921 + return ERR_PTR(-EIO); 922 + 923 + return ERR_PTR(-EAGAIN); 924 + } 925 + 926 + static void destroy_pf_packet(void *ptr) 927 + { 928 + struct xe_sriov_packet *data = ptr; 929 + 930 + xe_sriov_packet_free(data); 931 + } 932 + 933 + static void action_ring_cleanup(void *arg) 934 + { 935 + struct ptr_ring *r = arg; 936 + 937 + ptr_ring_cleanup(r, destroy_pf_packet); 467 938 } 468 939 469 940 /** ··· 1023 402 int xe_gt_sriov_pf_migration_init(struct xe_gt *gt) 1024 403 { 1025 404 struct xe_device *xe = gt_to_xe(gt); 405 + unsigned int n, totalvfs; 1026 406 int err; 1027 407 1028 408 xe_gt_assert(gt, IS_SRIOV_PF(xe)); 1029 409 1030 - gt->sriov.pf.migration.supported = pf_check_migration_support(gt); 1031 - 1032 410 if (!pf_migration_supported(gt)) 1033 411 return 0; 1034 412 1035 - err = drmm_mutex_init(&xe->drm, &gt->sriov.pf.migration.snapshot_lock); 1036 - if (err) 1037 - return err; 413 + totalvfs = xe_sriov_pf_get_totalvfs(xe); 414 + for (n = 1; n <= totalvfs; n++) { 415 + struct xe_gt_sriov_migration_data *migration = pf_pick_gt_migration(gt, n); 416 + 417 + err = ptr_ring_init(&migration->ring, 418 + XE_GT_SRIOV_PF_MIGRATION_RING_SIZE, GFP_KERNEL); 419 + if (err) 420 + return err; 421 + 422 + err = devm_add_action_or_reset(xe->drm.dev, action_ring_cleanup, &migration->ring); 423 + if (err) 424 + return err; 425 + } 1038 426 1039 427 return 0; 1040 428 }
+38 -8
drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h
··· 9 9 #include <linux/types.h> 10 10 11 11 struct xe_gt; 12 + struct xe_sriov_packet; 13 + enum xe_sriov_packet_type; 14 + 15 + /* TODO: get this information by querying GuC in the future */ 16 + #define XE_GT_SRIOV_PF_MIGRATION_GUC_DATA_MAX_SIZE SZ_8M 12 17 13 18 int xe_gt_sriov_pf_migration_init(struct xe_gt *gt); 14 - int xe_gt_sriov_pf_migration_save_guc_state(struct xe_gt *gt, unsigned int vfid); 15 - int xe_gt_sriov_pf_migration_restore_guc_state(struct xe_gt *gt, unsigned int vfid); 19 + int xe_gt_sriov_pf_migration_guc_save(struct xe_gt *gt, unsigned int vfid); 20 + int xe_gt_sriov_pf_migration_guc_restore(struct xe_gt *gt, unsigned int vfid, 21 + struct xe_sriov_packet *data); 22 + int xe_gt_sriov_pf_migration_ggtt_save(struct xe_gt *gt, unsigned int vfid); 23 + int xe_gt_sriov_pf_migration_ggtt_restore(struct xe_gt *gt, unsigned int vfid, 24 + struct xe_sriov_packet *data); 25 + int xe_gt_sriov_pf_migration_mmio_save(struct xe_gt *gt, unsigned int vfid); 26 + int xe_gt_sriov_pf_migration_mmio_restore(struct xe_gt *gt, unsigned int vfid, 27 + struct xe_sriov_packet *data); 28 + int xe_gt_sriov_pf_migration_vram_save(struct xe_gt *gt, unsigned int vfid); 29 + int xe_gt_sriov_pf_migration_vram_restore(struct xe_gt *gt, unsigned int vfid, 30 + struct xe_sriov_packet *data); 16 31 17 - #ifdef CONFIG_DEBUG_FS 18 - ssize_t xe_gt_sriov_pf_migration_read_guc_state(struct xe_gt *gt, unsigned int vfid, 19 - char __user *buf, size_t count, loff_t *pos); 20 - ssize_t xe_gt_sriov_pf_migration_write_guc_state(struct xe_gt *gt, unsigned int vfid, 21 - const char __user *buf, size_t count); 22 - #endif 32 + ssize_t xe_gt_sriov_pf_migration_size(struct xe_gt *gt, unsigned int vfid); 33 + 34 + bool xe_gt_sriov_pf_migration_ring_empty(struct xe_gt *gt, unsigned int vfid); 35 + bool xe_gt_sriov_pf_migration_ring_full(struct xe_gt *gt, unsigned int vfid); 36 + void xe_gt_sriov_pf_migration_ring_free(struct xe_gt *gt, unsigned int vfid); 37 + 38 + void xe_gt_sriov_pf_migration_save_init(struct xe_gt *gt, unsigned int vfid); 39 + bool xe_gt_sriov_pf_migration_save_data_pending(struct xe_gt *gt, unsigned int vfid, 40 + enum xe_sriov_packet_type type); 41 + void xe_gt_sriov_pf_migration_save_data_complete(struct xe_gt *gt, unsigned int vfid, 42 + enum xe_sriov_packet_type type); 43 + 44 + int xe_gt_sriov_pf_migration_save_produce(struct xe_gt *gt, unsigned int vfid, 45 + struct xe_sriov_packet *data); 46 + struct xe_sriov_packet * 47 + xe_gt_sriov_pf_migration_restore_consume(struct xe_gt *gt, unsigned int vfid); 48 + 49 + int xe_gt_sriov_pf_migration_restore_produce(struct xe_gt *gt, unsigned int vfid, 50 + struct xe_sriov_packet *data); 51 + struct xe_sriov_packet * 52 + xe_gt_sriov_pf_migration_save_consume(struct xe_gt *gt, unsigned int vfid); 23 53 24 54 #endif
+11 -23
drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h
··· 6 6 #ifndef _XE_GT_SRIOV_PF_MIGRATION_TYPES_H_ 7 7 #define _XE_GT_SRIOV_PF_MIGRATION_TYPES_H_ 8 8 9 - #include <linux/mutex.h> 10 - #include <linux/types.h> 9 + #include <linux/ptr_ring.h> 11 10 12 11 /** 13 - * struct xe_gt_sriov_state_snapshot - GT-level per-VF state snapshot data. 12 + * struct xe_gt_sriov_migration_data - GT-level per-VF migration data. 14 13 * 15 14 * Used by the PF driver to maintain per-VF migration data. 16 15 */ 17 - struct xe_gt_sriov_state_snapshot { 18 - /** @guc: GuC VF state snapshot */ 16 + struct xe_gt_sriov_migration_data { 17 + /** @ring: queue containing VF save / restore migration data */ 18 + struct ptr_ring ring; 19 + /** @save: structure for currently processed save migration data */ 19 20 struct { 20 - /** @guc.buff: buffer with the VF state */ 21 - u32 *buff; 22 - /** @guc.size: size of the buffer (must be dwords aligned) */ 23 - u32 size; 24 - } guc; 25 - }; 26 - 27 - /** 28 - * struct xe_gt_sriov_pf_migration - GT-level data. 29 - * 30 - * Used by the PF driver to maintain non-VF specific per-GT data. 31 - */ 32 - struct xe_gt_sriov_pf_migration { 33 - /** @supported: indicates whether the feature is supported */ 34 - bool supported; 35 - 36 - /** @snapshot_lock: protects all VFs snapshots */ 37 - struct mutex snapshot_lock; 21 + /** @save.data_remaining: bitmap of migration types that need to be saved */ 22 + unsigned long data_remaining; 23 + /** @save.vram_offset: last saved offset within VRAM, used for chunked VRAM save */ 24 + loff_t vram_offset; 25 + } save; 38 26 }; 39 27 40 28 #endif
+20 -1
drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c
··· 99 99 HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */ 100 100 }; 101 101 102 + static const struct xe_reg ver_35_runtime_regs[] = { 103 + RPM_CONFIG0, /* _MMIO(0x0d00) */ 104 + XEHP_FUSE4, /* _MMIO(0x9114) */ 105 + MIRROR_FUSE3, /* _MMIO(0x9118) */ 106 + MIRROR_L3BANK_ENABLE, /* _MMIO(0x9130) */ 107 + XELP_EU_ENABLE, /* _MMIO(0x9134) */ 108 + XELP_GT_GEOMETRY_DSS_ENABLE, /* _MMIO(0x913c) */ 109 + GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */ 110 + XEHP_GT_COMPUTE_DSS_ENABLE, /* _MMIO(0x9144) */ 111 + XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,/* _MMIO(0x9148) */ 112 + XE2_GT_COMPUTE_DSS_2, /* _MMIO(0x914c) */ 113 + XE2_GT_GEOMETRY_DSS_1, /* _MMIO(0x9150) */ 114 + XE2_GT_GEOMETRY_DSS_2, /* _MMIO(0x9154) */ 115 + SERVICE_COPY_ENABLE, /* _MMIO(0x9170) */ 116 + }; 117 + 102 118 static const struct xe_reg *pick_runtime_regs(struct xe_device *xe, unsigned int *count) 103 119 { 104 120 const struct xe_reg *regs; 105 121 106 - if (GRAPHICS_VERx100(xe) >= 3000) { 122 + if (GRAPHICS_VER(xe) >= 35) { 123 + *count = ARRAY_SIZE(ver_35_runtime_regs); 124 + regs = ver_35_runtime_regs; 125 + } else if (GRAPHICS_VERx100(xe) >= 3000) { 107 126 *count = ARRAY_SIZE(ver_3000_runtime_regs); 108 127 regs = ver_3000_runtime_regs; 109 128 } else if (GRAPHICS_VERx100(xe) >= 2000) {
+2 -3
drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
··· 31 31 /** @version: negotiated VF/PF ABI version */ 32 32 struct xe_gt_sriov_pf_service_version version; 33 33 34 - /** @snapshot: snapshot of the VF state data */ 35 - struct xe_gt_sriov_state_snapshot snapshot; 34 + /** @migration: per-VF migration data. */ 35 + struct xe_gt_sriov_migration_data migration; 36 36 }; 37 37 38 38 /** ··· 58 58 struct xe_gt_sriov_pf_service service; 59 59 struct xe_gt_sriov_pf_control control; 60 60 struct xe_gt_sriov_pf_policy policy; 61 - struct xe_gt_sriov_pf_migration migration; 62 61 struct xe_gt_sriov_spare_config spare; 63 62 struct xe_gt_sriov_metadata *vfs; 64 63 };
+56 -2
drivers/gpu/drm/xe/xe_gt_throttle.c
··· 22 22 * Their availability depend on the platform and some may not be visible if that 23 23 * reason is not available. 24 24 * 25 + * The ``reasons`` attribute can be used by sysadmin to monitor all possible 26 + * reasons for throttling and report them. It's preferred over monitoring 27 + * ``status`` and then reading the reason from individual attributes since that 28 + * is racy. If there's no throttling happening, "none" is returned. 29 + * 25 30 * The following attributes are available on Crescent Island platform: 26 31 * 27 - * - ``status``: Overall throttle status 32 + * - ``status``: Overall throttle status (0: no throttling, 1: throttling) 33 + * - ``reasons``: Array of reasons causing throttling separated by space 28 34 * - ``reason_pl1``: package PL1 29 35 * - ``reason_pl2``: package PL2 30 36 * - ``reason_pl4``: package PL4 ··· 49 43 * 50 44 * Other platforms support the following reasons: 51 45 * 52 - * - ``status``: Overall status 46 + * - ``status``: Overall throttle status (0: no throttling, 1: throttling) 47 + * - ``reasons``: Array of reasons causing throttling separated by space 53 48 * - ``reason_pl1``: package PL1 54 49 * - ``reason_pl2``: package PL2 55 50 * - ``reason_pl4``: package PL4, Iccmax etc. ··· 118 111 return sysfs_emit(buff, "%u\n", is_throttled_by(gt, ta->mask)); 119 112 } 120 113 114 + static const struct attribute_group *get_platform_throttle_group(struct xe_device *xe); 115 + 116 + static ssize_t reasons_show(struct kobject *kobj, 117 + struct kobj_attribute *attr, char *buff) 118 + { 119 + struct xe_gt *gt = throttle_to_gt(kobj); 120 + struct xe_device *xe = gt_to_xe(gt); 121 + const struct attribute_group *group; 122 + struct attribute **pother; 123 + ssize_t ret = 0; 124 + u32 reasons; 125 + 126 + reasons = xe_gt_throttle_get_limit_reasons(gt); 127 + if (!reasons) 128 + goto ret_none; 129 + 130 + group = get_platform_throttle_group(xe); 131 + for (pother = group->attrs; *pother; pother++) { 132 + struct kobj_attribute *kattr = container_of(*pother, struct kobj_attribute, attr); 133 + struct throttle_attribute *other_ta = kobj_attribute_to_throttle(kattr); 134 + 135 + if (other_ta->mask != U32_MAX && reasons & other_ta->mask) 136 + ret += sysfs_emit_at(buff, ret, "%s ", (*pother)->name); 137 + } 138 + 139 + if (drm_WARN_ONCE(&xe->drm, !ret, "Unknown reason: %#x\n", reasons)) 140 + goto ret_none; 141 + 142 + /* Drop extra space from last iteration above */ 143 + ret--; 144 + ret += sysfs_emit_at(buff, ret, "\n"); 145 + 146 + return ret; 147 + 148 + ret_none: 149 + return sysfs_emit(buff, "none\n"); 150 + } 151 + 121 152 #define THROTTLE_ATTR_RO(name, _mask) \ 122 153 struct throttle_attribute attr_##name = { \ 123 154 .attr = __ATTR(name, 0444, reason_show, NULL), \ 124 155 .mask = _mask, \ 125 156 } 126 157 158 + #define THROTTLE_ATTR_RO_FUNC(name, _mask, _show) \ 159 + struct throttle_attribute attr_##name = { \ 160 + .attr = __ATTR(name, 0444, _show, NULL), \ 161 + .mask = _mask, \ 162 + } 163 + 164 + static THROTTLE_ATTR_RO_FUNC(reasons, 0, reasons_show); 127 165 static THROTTLE_ATTR_RO(status, U32_MAX); 128 166 static THROTTLE_ATTR_RO(reason_pl1, POWER_LIMIT_1_MASK); 129 167 static THROTTLE_ATTR_RO(reason_pl2, POWER_LIMIT_2_MASK); ··· 180 128 static THROTTLE_ATTR_RO(reason_vr_tdc, VR_TDC_MASK); 181 129 182 130 static struct attribute *throttle_attrs[] = { 131 + &attr_reasons.attr.attr, 183 132 &attr_status.attr.attr, 184 133 &attr_reason_pl1.attr.attr, 185 134 &attr_reason_pl2.attr.attr, ··· 206 153 207 154 static struct attribute *cri_throttle_attrs[] = { 208 155 /* Common */ 156 + &attr_reasons.attr.attr, 209 157 &attr_status.attr.attr, 210 158 &attr_reason_pl1.attr.attr, 211 159 &attr_reason_pl2.attr.attr,
+119
drivers/gpu/drm/xe/xe_guard.h
··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2025 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_GUARD_H_ 7 + #define _XE_GUARD_H_ 8 + 9 + #include <linux/spinlock.h> 10 + 11 + /** 12 + * struct xe_guard - Simple logic to protect a feature. 13 + * 14 + * Implements simple semaphore-like logic that can be used to lockdown the 15 + * feature unless it is already in use. Allows enabling of the otherwise 16 + * incompatible features, where we can't follow the strict owner semantics 17 + * required by the &rw_semaphore. 18 + * 19 + * NOTE! It shouldn't be used to protect a data, use &rw_semaphore instead. 20 + */ 21 + struct xe_guard { 22 + /** 23 + * @counter: implements simple exclusive/lockdown logic: 24 + * if == 0 then guard/feature is idle/not in use, 25 + * if < 0 then feature is active and can't be locked-down, 26 + * if > 0 then feature is lockded-down and can't be activated. 27 + */ 28 + int counter; 29 + 30 + /** @name: the name of the guard (useful for debug) */ 31 + const char *name; 32 + 33 + /** @owner: the info about the last owner of the guard (for debug) */ 34 + void *owner; 35 + 36 + /** @lock: protects guard's data */ 37 + spinlock_t lock; 38 + }; 39 + 40 + /** 41 + * xe_guard_init() - Initialize the guard. 42 + * @guard: the &xe_guard to init 43 + * @name: name of the guard 44 + */ 45 + static inline void xe_guard_init(struct xe_guard *guard, const char *name) 46 + { 47 + spin_lock_init(&guard->lock); 48 + guard->counter = 0; 49 + guard->name = name; 50 + } 51 + 52 + /** 53 + * xe_guard_arm() - Arm the guard for the exclusive/lockdown mode. 54 + * @guard: the &xe_guard to arm 55 + * @lockdown: arm for lockdown(true) or exclusive(false) mode 56 + * @who: optional owner info (for debug only) 57 + * 58 + * Multiple lockdown requests are allowed. 59 + * Only single exclusive access can be granted. 60 + * Will fail if the guard is already in exclusive mode. 61 + * On success, must call the xe_guard_disarm() to release. 62 + * 63 + * Return: 0 on success or a negative error code on failure. 64 + */ 65 + static inline int xe_guard_arm(struct xe_guard *guard, bool lockdown, void *who) 66 + { 67 + guard(spinlock)(&guard->lock); 68 + 69 + if (lockdown) { 70 + if (guard->counter < 0) 71 + return -EBUSY; 72 + guard->counter++; 73 + } else { 74 + if (guard->counter > 0) 75 + return -EPERM; 76 + if (guard->counter < 0) 77 + return -EUSERS; 78 + guard->counter--; 79 + } 80 + 81 + guard->owner = who; 82 + return 0; 83 + } 84 + 85 + /** 86 + * xe_guard_disarm() - Disarm the guard from exclusive/lockdown mode. 87 + * @guard: the &xe_guard to disarm 88 + * @lockdown: disarm from lockdown(true) or exclusive(false) mode 89 + * 90 + * Return: true if successfully disarmed or false in case of mismatch. 91 + */ 92 + static inline bool xe_guard_disarm(struct xe_guard *guard, bool lockdown) 93 + { 94 + guard(spinlock)(&guard->lock); 95 + 96 + if (lockdown) { 97 + if (guard->counter <= 0) 98 + return false; 99 + guard->counter--; 100 + } else { 101 + if (guard->counter != -1) 102 + return false; 103 + guard->counter++; 104 + } 105 + return true; 106 + } 107 + 108 + /** 109 + * xe_guard_mode_str() - Convert guard mode into a string. 110 + * @lockdown: flag used to select lockdown or exclusive mode 111 + * 112 + * Return: "lockdown" or "exclusive" string. 113 + */ 114 + static inline const char *xe_guard_mode_str(bool lockdown) 115 + { 116 + return lockdown ? "lockdown" : "exclusive"; 117 + } 118 + 119 + #endif
+12 -1
drivers/gpu/drm/xe/xe_guc.c
··· 24 24 #include "xe_gt_printk.h" 25 25 #include "xe_gt_sriov_vf.h" 26 26 #include "xe_gt_throttle.h" 27 + #include "xe_gt_sriov_pf_migration.h" 27 28 #include "xe_guc_ads.h" 28 29 #include "xe_guc_buf.h" 29 30 #include "xe_guc_capture.h" ··· 41 40 #include "xe_mmio.h" 42 41 #include "xe_platform_types.h" 43 42 #include "xe_sriov.h" 43 + #include "xe_sriov_pf_migration.h" 44 44 #include "xe_uc.h" 45 45 #include "xe_uc_fw.h" 46 46 #include "xe_wa.h" ··· 823 821 return 0; 824 822 } 825 823 824 + static u32 guc_additional_cache_size(struct xe_device *xe) 825 + { 826 + if (IS_SRIOV_PF(xe) && xe_sriov_pf_migration_supported(xe)) 827 + return XE_GT_SRIOV_PF_MIGRATION_GUC_DATA_MAX_SIZE; 828 + else 829 + return 0; /* Fallback to default size */ 830 + } 831 + 826 832 /** 827 833 * xe_guc_init_post_hwconfig - initialize GuC post hwconfig load 828 834 * @guc: The GuC object ··· 870 860 if (ret) 871 861 return ret; 872 862 873 - ret = xe_guc_buf_cache_init(&guc->buf); 863 + ret = xe_guc_buf_cache_init_with_size(&guc->buf, 864 + guc_additional_cache_size(guc_to_xe(guc))); 874 865 if (ret) 875 866 return ret; 876 867
+46 -11
drivers/gpu/drm/xe/xe_guc_buf.c
··· 13 13 #include "xe_guc_buf.h" 14 14 #include "xe_sa.h" 15 15 16 + #define XE_GUC_BUF_CACHE_DEFAULT_SIZE SZ_8K 17 + 16 18 static struct xe_guc *cache_to_guc(struct xe_guc_buf_cache *cache) 17 19 { 18 20 return container_of(cache, struct xe_guc, buf); ··· 25 23 return guc_to_gt(cache_to_guc(cache)); 26 24 } 27 25 28 - /** 29 - * xe_guc_buf_cache_init() - Initialize the GuC Buffer Cache. 30 - * @cache: the &xe_guc_buf_cache to initialize 31 - * 32 - * The Buffer Cache allows to obtain a reusable buffer that can be used to pass 33 - * indirect H2G data to GuC without a need to create a ad-hoc allocation. 34 - * 35 - * Return: 0 on success or a negative error code on failure. 36 - */ 37 - int xe_guc_buf_cache_init(struct xe_guc_buf_cache *cache) 26 + static int guc_buf_cache_init(struct xe_guc_buf_cache *cache, u32 size) 38 27 { 39 28 struct xe_gt *gt = cache_to_gt(cache); 40 29 struct xe_sa_manager *sam; 41 30 42 - sam = __xe_sa_bo_manager_init(gt_to_tile(gt), SZ_8K, 0, sizeof(u32)); 31 + sam = __xe_sa_bo_manager_init(gt_to_tile(gt), size, 0, sizeof(u32)); 43 32 if (IS_ERR(sam)) 44 33 return PTR_ERR(sam); 45 34 cache->sam = sam; ··· 39 46 xe_guc_buf_cache_dwords(cache), xe_bo_ggtt_addr(sam->bo), 40 47 __builtin_return_address(0)); 41 48 return 0; 49 + } 50 + 51 + /** 52 + * xe_guc_buf_cache_init() - Initialize the GuC Buffer Cache. 53 + * @cache: the &xe_guc_buf_cache to initialize 54 + * 55 + * The Buffer Cache allows to obtain a reusable buffer that can be used to pass 56 + * data to GuC or read data from GuC without a need to create a ad-hoc allocation. 57 + * 58 + * Return: 0 on success or a negative error code on failure. 59 + */ 60 + int xe_guc_buf_cache_init(struct xe_guc_buf_cache *cache) 61 + { 62 + return guc_buf_cache_init(cache, XE_GUC_BUF_CACHE_DEFAULT_SIZE); 63 + } 64 + 65 + /** 66 + * xe_guc_buf_cache_init_with_size() - Initialize the GuC Buffer Cache. 67 + * @cache: the &xe_guc_buf_cache to initialize 68 + * @size: size in bytes 69 + * 70 + * Like xe_guc_buf_cache_init(), except it allows the caller to make the cache 71 + * buffer larger, allowing to accommodate larger objects. 72 + * 73 + * Return: 0 on success or a negative error code on failure. 74 + */ 75 + int xe_guc_buf_cache_init_with_size(struct xe_guc_buf_cache *cache, u32 size) 76 + { 77 + return guc_buf_cache_init(cache, max(XE_GUC_BUF_CACHE_DEFAULT_SIZE, size)); 42 78 } 43 79 44 80 /** ··· 135 113 { 136 114 if (xe_guc_buf_is_valid(buf)) 137 115 xe_sa_bo_free(buf.sa, NULL); 116 + } 117 + 118 + /** 119 + * xe_guc_buf_sync_read() - Copy the data from the GPU memory to the sub-allocation. 120 + * @buf: the &xe_guc_buf to sync 121 + * 122 + * Return: a CPU pointer of the sub-allocation. 123 + */ 124 + void *xe_guc_buf_sync_read(const struct xe_guc_buf buf) 125 + { 126 + xe_sa_bo_sync_read(buf.sa); 127 + 128 + return xe_sa_bo_cpu_addr(buf.sa); 138 129 } 139 130 140 131 /**
+2
drivers/gpu/drm/xe/xe_guc_buf.h
··· 12 12 #include "xe_guc_buf_types.h" 13 13 14 14 int xe_guc_buf_cache_init(struct xe_guc_buf_cache *cache); 15 + int xe_guc_buf_cache_init_with_size(struct xe_guc_buf_cache *cache, u32 size); 15 16 u32 xe_guc_buf_cache_dwords(struct xe_guc_buf_cache *cache); 16 17 struct xe_guc_buf xe_guc_buf_reserve(struct xe_guc_buf_cache *cache, u32 dwords); 17 18 struct xe_guc_buf xe_guc_buf_from_data(struct xe_guc_buf_cache *cache, ··· 31 30 } 32 31 33 32 void *xe_guc_buf_cpu_ptr(const struct xe_guc_buf buf); 33 + void *xe_guc_buf_sync_read(const struct xe_guc_buf buf); 34 34 u64 xe_guc_buf_flush(const struct xe_guc_buf buf); 35 35 u64 xe_guc_buf_gpu_addr(const struct xe_guc_buf buf); 36 36 u64 xe_guc_cache_gpu_addr_from_ptr(struct xe_guc_buf_cache *cache, const void *ptr, u32 size);
+50 -50
drivers/gpu/drm/xe/xe_guc_pc.c
··· 331 331 * Our goal is to have the admin choices respected. 332 332 */ 333 333 pc_action_set_param(pc, SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY, 334 - freq < pc->rpe_freq); 334 + freq < xe_guc_pc_get_rpe_freq(pc)); 335 335 336 336 return pc_action_set_param(pc, 337 337 SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, ··· 363 363 freq); 364 364 } 365 365 366 - static void mtl_update_rpa_value(struct xe_guc_pc *pc) 366 + static u32 mtl_get_rpa_freq(struct xe_guc_pc *pc) 367 367 { 368 368 struct xe_gt *gt = pc_to_gt(pc); 369 369 u32 reg; ··· 373 373 else 374 374 reg = xe_mmio_read32(&gt->mmio, MTL_GT_RPA_FREQUENCY); 375 375 376 - pc->rpa_freq = decode_freq(REG_FIELD_GET(MTL_RPA_MASK, reg)); 376 + return decode_freq(REG_FIELD_GET(MTL_RPA_MASK, reg)); 377 377 } 378 378 379 - static void mtl_update_rpe_value(struct xe_guc_pc *pc) 379 + static u32 mtl_get_rpe_freq(struct xe_guc_pc *pc) 380 380 { 381 381 struct xe_gt *gt = pc_to_gt(pc); 382 382 u32 reg; ··· 386 386 else 387 387 reg = xe_mmio_read32(&gt->mmio, MTL_GT_RPE_FREQUENCY); 388 388 389 - pc->rpe_freq = decode_freq(REG_FIELD_GET(MTL_RPE_MASK, reg)); 389 + return decode_freq(REG_FIELD_GET(MTL_RPE_MASK, reg)); 390 390 } 391 391 392 - static void tgl_update_rpa_value(struct xe_guc_pc *pc) 392 + static u32 pvc_get_rpa_freq(struct xe_guc_pc *pc) 393 393 { 394 - struct xe_gt *gt = pc_to_gt(pc); 395 - struct xe_device *xe = gt_to_xe(gt); 396 - u32 reg; 397 - 398 394 /* 399 395 * For PVC we still need to use fused RP0 as the approximation for RPa 400 396 * For other platforms than PVC we get the resolved RPa directly from 401 397 * PCODE at a different register 402 398 */ 403 - if (xe->info.platform == XE_PVC) { 404 - reg = xe_mmio_read32(&gt->mmio, PVC_RP_STATE_CAP); 405 - pc->rpa_freq = REG_FIELD_GET(RP0_MASK, reg) * GT_FREQUENCY_MULTIPLIER; 406 - } else { 407 - reg = xe_mmio_read32(&gt->mmio, FREQ_INFO_REC); 408 - pc->rpa_freq = REG_FIELD_GET(RPA_MASK, reg) * GT_FREQUENCY_MULTIPLIER; 409 - } 399 + 400 + struct xe_gt *gt = pc_to_gt(pc); 401 + u32 reg; 402 + 403 + reg = xe_mmio_read32(&gt->mmio, PVC_RP_STATE_CAP); 404 + return REG_FIELD_GET(RP0_MASK, reg) * GT_FREQUENCY_MULTIPLIER; 410 405 } 411 406 412 - static void tgl_update_rpe_value(struct xe_guc_pc *pc) 407 + static u32 tgl_get_rpa_freq(struct xe_guc_pc *pc) 413 408 { 414 409 struct xe_gt *gt = pc_to_gt(pc); 415 - struct xe_device *xe = gt_to_xe(gt); 410 + u32 reg; 411 + 412 + reg = xe_mmio_read32(&gt->mmio, FREQ_INFO_REC); 413 + return REG_FIELD_GET(RPA_MASK, reg) * GT_FREQUENCY_MULTIPLIER; 414 + } 415 + 416 + static u32 pvc_get_rpe_freq(struct xe_guc_pc *pc) 417 + { 418 + struct xe_gt *gt = pc_to_gt(pc); 416 419 u32 reg; 417 420 418 421 /* 419 422 * For PVC we still need to use fused RP1 as the approximation for RPe 420 - * For other platforms than PVC we get the resolved RPe directly from 421 - * PCODE at a different register 422 423 */ 423 - if (xe->info.platform == XE_PVC) { 424 - reg = xe_mmio_read32(&gt->mmio, PVC_RP_STATE_CAP); 425 - pc->rpe_freq = REG_FIELD_GET(RP1_MASK, reg) * GT_FREQUENCY_MULTIPLIER; 426 - } else { 427 - reg = xe_mmio_read32(&gt->mmio, FREQ_INFO_REC); 428 - pc->rpe_freq = REG_FIELD_GET(RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER; 429 - } 424 + reg = xe_mmio_read32(&gt->mmio, PVC_RP_STATE_CAP); 425 + return REG_FIELD_GET(RP1_MASK, reg) * GT_FREQUENCY_MULTIPLIER; 430 426 } 431 427 432 - static void pc_update_rp_values(struct xe_guc_pc *pc) 428 + static u32 tgl_get_rpe_freq(struct xe_guc_pc *pc) 433 429 { 434 430 struct xe_gt *gt = pc_to_gt(pc); 435 - struct xe_device *xe = gt_to_xe(gt); 436 - 437 - if (GRAPHICS_VERx100(xe) >= 1270) { 438 - mtl_update_rpa_value(pc); 439 - mtl_update_rpe_value(pc); 440 - } else { 441 - tgl_update_rpa_value(pc); 442 - tgl_update_rpe_value(pc); 443 - } 431 + u32 reg; 444 432 445 433 /* 446 - * RPe is decided at runtime by PCODE. In the rare case where that's 447 - * smaller than the fused min, we will trust the PCODE and use that 448 - * as our minimum one. 434 + * For other platforms than PVC, we get the resolved RPe directly from 435 + * PCODE at a different register 449 436 */ 450 - pc->rpn_freq = min(pc->rpn_freq, pc->rpe_freq); 437 + reg = xe_mmio_read32(&gt->mmio, FREQ_INFO_REC); 438 + return REG_FIELD_GET(RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER; 451 439 } 452 440 453 441 /** ··· 536 548 */ 537 549 u32 xe_guc_pc_get_rpa_freq(struct xe_guc_pc *pc) 538 550 { 539 - pc_update_rp_values(pc); 551 + struct xe_gt *gt = pc_to_gt(pc); 552 + struct xe_device *xe = gt_to_xe(gt); 540 553 541 - return pc->rpa_freq; 554 + if (GRAPHICS_VERx100(xe) == 1260) 555 + return pvc_get_rpa_freq(pc); 556 + else if (GRAPHICS_VERx100(xe) >= 1270) 557 + return mtl_get_rpa_freq(pc); 558 + else 559 + return tgl_get_rpa_freq(pc); 542 560 } 543 561 544 562 /** ··· 555 561 */ 556 562 u32 xe_guc_pc_get_rpe_freq(struct xe_guc_pc *pc) 557 563 { 558 - pc_update_rp_values(pc); 564 + struct xe_device *xe = pc_to_xe(pc); 565 + u32 freq; 559 566 560 - return pc->rpe_freq; 567 + if (GRAPHICS_VERx100(xe) == 1260) 568 + freq = pvc_get_rpe_freq(pc); 569 + else if (GRAPHICS_VERx100(xe) >= 1270) 570 + freq = mtl_get_rpe_freq(pc); 571 + else 572 + freq = tgl_get_rpe_freq(pc); 573 + 574 + return freq; 561 575 } 562 576 563 577 /** ··· 1024 1022 /* 1025 1023 * Ensure min and max are bound by MERT_FREQ_CAP until driver loads. 1026 1024 */ 1027 - ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc))); 1025 + ret = pc_set_min_freq(pc, min(xe_guc_pc_get_rpe_freq(pc), pc_max_freq_cap(pc))); 1028 1026 if (!ret) 1029 1027 ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc))); 1030 1028 ··· 1134 1132 ret = pc_adjust_requested_freq(pc); 1135 1133 if (ret) 1136 1134 goto out; 1137 - 1138 - pc_update_rp_values(pc); 1139 1135 1140 1136 pc_init_pcode_freq(pc); 1141 1137 ··· 1340 1340 XE_WARN_ON(xe_guc_pc_stop(pc)); 1341 1341 1342 1342 /* Bind requested freq to mert_freq_cap before unload */ 1343 - pc_set_cur_freq(pc, min(pc_max_freq_cap(pc), pc->rpe_freq)); 1343 + pc_set_cur_freq(pc, min(pc_max_freq_cap(pc), xe_guc_pc_get_rpe_freq(pc))); 1344 1344 1345 1345 xe_force_wake_put(gt_to_fw(pc_to_gt(pc)), fw_ref); 1346 1346 }
-4
drivers/gpu/drm/xe/xe_guc_pc_types.h
··· 19 19 atomic_t flush_freq_limit; 20 20 /** @rp0_freq: HW RP0 frequency - The Maximum one */ 21 21 u32 rp0_freq; 22 - /** @rpa_freq: HW RPa frequency - The Achievable one */ 23 - u32 rpa_freq; 24 - /** @rpe_freq: HW RPe frequency - The Efficient one */ 25 - u32 rpe_freq; 26 22 /** @rpn_freq: HW RPN frequency - The Minimum one */ 27 23 u32 rpn_freq; 28 24 /** @user_requested_min: Stash the minimum requested freq by user */
+123 -5
drivers/gpu/drm/xe/xe_migrate.c
··· 29 29 #include "xe_lrc.h" 30 30 #include "xe_map.h" 31 31 #include "xe_mocs.h" 32 + #include "xe_printk.h" 32 33 #include "xe_pt.h" 33 34 #include "xe_res_cursor.h" 34 35 #include "xe_sa.h" ··· 1211 1210 return migrate->q; 1212 1211 } 1213 1212 1213 + /** 1214 + * xe_migrate_vram_copy_chunk() - Copy a chunk of a VRAM buffer object. 1215 + * @vram_bo: The VRAM buffer object. 1216 + * @vram_offset: The VRAM offset. 1217 + * @sysmem_bo: The sysmem buffer object. 1218 + * @sysmem_offset: The sysmem offset. 1219 + * @size: The size of VRAM chunk to copy. 1220 + * @dir: The direction of the copy operation. 1221 + * 1222 + * Copies a portion of a buffer object between VRAM and system memory. 1223 + * On Xe2 platforms that support flat CCS, VRAM data is decompressed when 1224 + * copying to system memory. 1225 + * 1226 + * Return: Pointer to a dma_fence representing the last copy batch, or 1227 + * an error pointer on failure. If there is a failure, any copy operation 1228 + * started by the function call has been synced. 1229 + */ 1230 + struct dma_fence *xe_migrate_vram_copy_chunk(struct xe_bo *vram_bo, u64 vram_offset, 1231 + struct xe_bo *sysmem_bo, u64 sysmem_offset, 1232 + u64 size, enum xe_migrate_copy_dir dir) 1233 + { 1234 + struct xe_device *xe = xe_bo_device(vram_bo); 1235 + struct xe_tile *tile = vram_bo->tile; 1236 + struct xe_gt *gt = tile->primary_gt; 1237 + struct xe_migrate *m = tile->migrate; 1238 + struct dma_fence *fence = NULL; 1239 + struct ttm_resource *vram = vram_bo->ttm.resource; 1240 + struct ttm_resource *sysmem = sysmem_bo->ttm.resource; 1241 + struct xe_res_cursor vram_it, sysmem_it; 1242 + u64 vram_L0_ofs, sysmem_L0_ofs; 1243 + u32 vram_L0_pt, sysmem_L0_pt; 1244 + u64 vram_L0, sysmem_L0; 1245 + bool to_sysmem = (dir == XE_MIGRATE_COPY_TO_SRAM); 1246 + bool use_comp_pat = to_sysmem && 1247 + GRAPHICS_VER(xe) >= 20 && xe_device_has_flat_ccs(xe); 1248 + int pass = 0; 1249 + int err; 1250 + 1251 + xe_assert(xe, IS_ALIGNED(vram_offset | sysmem_offset | size, PAGE_SIZE)); 1252 + xe_assert(xe, xe_bo_is_vram(vram_bo)); 1253 + xe_assert(xe, !xe_bo_is_vram(sysmem_bo)); 1254 + xe_assert(xe, !range_overflows(vram_offset, size, (u64)vram_bo->ttm.base.size)); 1255 + xe_assert(xe, !range_overflows(sysmem_offset, size, (u64)sysmem_bo->ttm.base.size)); 1256 + 1257 + xe_res_first(vram, vram_offset, size, &vram_it); 1258 + xe_res_first_sg(xe_bo_sg(sysmem_bo), sysmem_offset, size, &sysmem_it); 1259 + 1260 + while (size) { 1261 + u32 pte_flags = PTE_UPDATE_FLAG_IS_VRAM; 1262 + u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */ 1263 + struct xe_sched_job *job; 1264 + struct xe_bb *bb; 1265 + u32 update_idx; 1266 + bool usm = xe->info.has_usm; 1267 + u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE; 1268 + 1269 + sysmem_L0 = xe_migrate_res_sizes(m, &sysmem_it); 1270 + vram_L0 = min(xe_migrate_res_sizes(m, &vram_it), sysmem_L0); 1271 + 1272 + xe_dbg(xe, "Pass %u, size: %llu\n", pass++, vram_L0); 1273 + 1274 + pte_flags |= use_comp_pat ? PTE_UPDATE_FLAG_IS_COMP_PTE : 0; 1275 + batch_size += pte_update_size(m, pte_flags, vram, &vram_it, &vram_L0, 1276 + &vram_L0_ofs, &vram_L0_pt, 0, 0, avail_pts); 1277 + 1278 + batch_size += pte_update_size(m, 0, sysmem, &sysmem_it, &vram_L0, &sysmem_L0_ofs, 1279 + &sysmem_L0_pt, 0, avail_pts, avail_pts); 1280 + batch_size += EMIT_COPY_DW; 1281 + 1282 + bb = xe_bb_new(gt, batch_size, usm); 1283 + if (IS_ERR(bb)) { 1284 + err = PTR_ERR(bb); 1285 + return ERR_PTR(err); 1286 + } 1287 + 1288 + if (xe_migrate_allow_identity(vram_L0, &vram_it)) 1289 + xe_res_next(&vram_it, vram_L0); 1290 + else 1291 + emit_pte(m, bb, vram_L0_pt, true, use_comp_pat, &vram_it, vram_L0, vram); 1292 + 1293 + emit_pte(m, bb, sysmem_L0_pt, false, false, &sysmem_it, vram_L0, sysmem); 1294 + 1295 + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; 1296 + update_idx = bb->len; 1297 + 1298 + if (to_sysmem) 1299 + emit_copy(gt, bb, vram_L0_ofs, sysmem_L0_ofs, vram_L0, XE_PAGE_SIZE); 1300 + else 1301 + emit_copy(gt, bb, sysmem_L0_ofs, vram_L0_ofs, vram_L0, XE_PAGE_SIZE); 1302 + 1303 + job = xe_bb_create_migration_job(m->q, bb, xe_migrate_batch_base(m, usm), 1304 + update_idx); 1305 + if (IS_ERR(job)) { 1306 + xe_bb_free(bb, NULL); 1307 + err = PTR_ERR(job); 1308 + return ERR_PTR(err); 1309 + } 1310 + 1311 + xe_sched_job_add_migrate_flush(job, MI_INVALIDATE_TLB); 1312 + 1313 + xe_assert(xe, dma_resv_test_signaled(vram_bo->ttm.base.resv, 1314 + DMA_RESV_USAGE_BOOKKEEP)); 1315 + xe_assert(xe, dma_resv_test_signaled(sysmem_bo->ttm.base.resv, 1316 + DMA_RESV_USAGE_BOOKKEEP)); 1317 + 1318 + scoped_guard(mutex, &m->job_mutex) { 1319 + xe_sched_job_arm(job); 1320 + dma_fence_put(fence); 1321 + fence = dma_fence_get(&job->drm.s_fence->finished); 1322 + xe_sched_job_push(job); 1323 + 1324 + dma_fence_put(m->fence); 1325 + m->fence = dma_fence_get(fence); 1326 + } 1327 + 1328 + xe_bb_free(bb, fence); 1329 + size -= vram_L0; 1330 + } 1331 + 1332 + return fence; 1333 + } 1334 + 1214 1335 static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, 1215 1336 u32 size, u32 pitch) 1216 1337 { ··· 2034 1911 2035 1912 return true; 2036 1913 } 2037 - 2038 - enum xe_migrate_copy_dir { 2039 - XE_MIGRATE_COPY_TO_VRAM, 2040 - XE_MIGRATE_COPY_TO_SRAM, 2041 - }; 2042 1914 2043 1915 #define XE_CACHELINE_BYTES 64ull 2044 1916 #define XE_CACHELINE_MASK (XE_CACHELINE_BYTES - 1)
+8
drivers/gpu/drm/xe/xe_migrate.h
··· 28 28 29 29 enum xe_sriov_vf_ccs_rw_ctxs; 30 30 31 + enum xe_migrate_copy_dir { 32 + XE_MIGRATE_COPY_TO_VRAM, 33 + XE_MIGRATE_COPY_TO_SRAM, 34 + }; 35 + 31 36 /** 32 37 * struct xe_migrate_pt_update_ops - Callbacks for the 33 38 * xe_migrate_update_pgtables() function. ··· 136 131 137 132 struct xe_lrc *xe_migrate_lrc(struct xe_migrate *migrate); 138 133 struct xe_exec_queue *xe_migrate_exec_queue(struct xe_migrate *migrate); 134 + struct dma_fence *xe_migrate_vram_copy_chunk(struct xe_bo *vram_bo, u64 vram_offset, 135 + struct xe_bo *sysmem_bo, u64 sysmem_offset, 136 + u64 size, enum xe_migrate_copy_dir dir); 139 137 int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, 140 138 unsigned long offset, void *buf, int len, 141 139 int write);
+4 -5
drivers/gpu/drm/xe/xe_oa.c
··· 870 870 871 871 xe_oa_free_oa_buffer(stream); 872 872 873 - xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); 873 + xe_force_wake_put(gt_to_fw(gt), stream->fw_ref); 874 874 xe_pm_runtime_put(stream->oa->xe); 875 875 876 876 /* Wa_1509372804:pvc: Unset the override of GUCRC mode to enable rc6 */ ··· 1717 1717 struct xe_oa_open_param *param) 1718 1718 { 1719 1719 struct xe_gt *gt = param->hwe->gt; 1720 - unsigned int fw_ref; 1721 1720 int ret; 1722 1721 1723 1722 stream->exec_q = param->exec_q; ··· 1771 1772 1772 1773 /* Take runtime pm ref and forcewake to disable RC6 */ 1773 1774 xe_pm_runtime_get(stream->oa->xe); 1774 - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 1775 - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { 1775 + stream->fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 1776 + if (!xe_force_wake_ref_has_domain(stream->fw_ref, XE_FORCEWAKE_ALL)) { 1776 1777 ret = -ETIMEDOUT; 1777 1778 goto err_fw_put; 1778 1779 } ··· 1817 1818 err_free_oa_buf: 1818 1819 xe_oa_free_oa_buffer(stream); 1819 1820 err_fw_put: 1820 - xe_force_wake_put(gt_to_fw(gt), fw_ref); 1821 + xe_force_wake_put(gt_to_fw(gt), stream->fw_ref); 1821 1822 xe_pm_runtime_put(stream->oa->xe); 1822 1823 if (stream->override_gucrc) 1823 1824 xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(&gt->uc.guc.pc));
+3
drivers/gpu/drm/xe/xe_oa_types.h
··· 264 264 265 265 /** @syncs: syncs to wait on and to signal */ 266 266 struct xe_sync_entry *syncs; 267 + 268 + /** @fw_ref: Forcewake reference */ 269 + unsigned int fw_ref; 267 270 }; 268 271 #endif
+1 -1
drivers/gpu/drm/xe/xe_pci.c
··· 333 333 .has_pxp = true, 334 334 .max_gt_per_tile = 2, 335 335 .va_bits = 48, 336 - .vm_max_level = 4, 336 + .vm_max_level = 3, 337 337 }; 338 338 339 339 static const struct xe_device_desc lnl_desc = {
+21
drivers/gpu/drm/xe/xe_pci_sriov.c
··· 94 94 return pci_iov_vf_bar_set_size(pdev, VF_LMEM_BAR, __fls(sizes)); 95 95 } 96 96 97 + static int pf_prepare_vfs_enabling(struct xe_device *xe) 98 + { 99 + xe_assert(xe, IS_SRIOV_PF(xe)); 100 + /* make sure we are not locked-down by other components */ 101 + return xe_sriov_pf_arm_guard(xe, &xe->sriov.pf.guard_vfs_enabling, false, NULL); 102 + } 103 + 104 + static void pf_finish_vfs_enabling(struct xe_device *xe) 105 + { 106 + xe_assert(xe, IS_SRIOV_PF(xe)); 107 + /* allow other components to lockdown VFs enabling */ 108 + xe_sriov_pf_disarm_guard(xe, &xe->sriov.pf.guard_vfs_enabling, false, NULL); 109 + } 110 + 97 111 static int pf_enable_vfs(struct xe_device *xe, int num_vfs) 98 112 { 99 113 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); ··· 120 106 xe_sriov_dbg(xe, "enabling %u VF%s\n", num_vfs, str_plural(num_vfs)); 121 107 122 108 err = xe_sriov_pf_wait_ready(xe); 109 + if (err) 110 + goto out; 111 + 112 + err = pf_prepare_vfs_enabling(xe); 123 113 if (err) 124 114 goto out; 125 115 ··· 166 148 failed: 167 149 xe_sriov_pf_unprovision_vfs(xe, num_vfs); 168 150 xe_pm_runtime_put(xe); 151 + pf_finish_vfs_enabling(xe); 169 152 out: 170 153 xe_sriov_notice(xe, "Failed to enable %u VF%s (%pe)\n", 171 154 num_vfs, str_plural(num_vfs), ERR_PTR(err)); ··· 197 178 198 179 /* not needed anymore - see pf_enable_vfs() */ 199 180 xe_pm_runtime_put(xe); 181 + 182 + pf_finish_vfs_enabling(xe); 200 183 201 184 xe_sriov_info(xe, "Disabled %u VF%s\n", num_vfs, str_plural(num_vfs)); 202 185 return 0;
+26 -14
drivers/gpu/drm/xe/xe_pcode.c
··· 32 32 33 33 static int pcode_mailbox_status(struct xe_tile *tile) 34 34 { 35 + const char *err_str; 36 + int err_decode; 35 37 u32 err; 36 - static const struct pcode_err_decode err_decode[] = { 37 - [PCODE_ILLEGAL_CMD] = {-ENXIO, "Illegal Command"}, 38 - [PCODE_TIMEOUT] = {-ETIMEDOUT, "Timed out"}, 39 - [PCODE_ILLEGAL_DATA] = {-EINVAL, "Illegal Data"}, 40 - [PCODE_ILLEGAL_SUBCOMMAND] = {-ENXIO, "Illegal Subcommand"}, 41 - [PCODE_LOCKED] = {-EBUSY, "PCODE Locked"}, 42 - [PCODE_GT_RATIO_OUT_OF_RANGE] = {-EOVERFLOW, 43 - "GT ratio out of range"}, 44 - [PCODE_REJECTED] = {-EACCES, "PCODE Rejected"}, 45 - [PCODE_ERROR_MASK] = {-EPROTO, "Unknown"}, 46 - }; 38 + 39 + #define CASE_ERR(_err, _err_decode, _err_str) \ 40 + case _err: \ 41 + err_decode = _err_decode; \ 42 + err_str = _err_str; \ 43 + break 47 44 48 45 err = xe_mmio_read32(&tile->mmio, PCODE_MAILBOX) & PCODE_ERROR_MASK; 46 + switch (err) { 47 + CASE_ERR(PCODE_ILLEGAL_CMD, -ENXIO, "Illegal Command"); 48 + CASE_ERR(PCODE_TIMEOUT, -ETIMEDOUT, "Timed out"); 49 + CASE_ERR(PCODE_ILLEGAL_DATA, -EINVAL, "Illegal Data"); 50 + CASE_ERR(PCODE_ILLEGAL_SUBCOMMAND, -ENXIO, "Illegal Subcommand"); 51 + CASE_ERR(PCODE_LOCKED, -EBUSY, "PCODE Locked"); 52 + CASE_ERR(PCODE_GT_RATIO_OUT_OF_RANGE, -EOVERFLOW, "GT ratio out of range"); 53 + CASE_ERR(PCODE_REJECTED, -EACCES, "PCODE Rejected"); 54 + default: 55 + err_decode = -EPROTO; 56 + err_str = "Unknown"; 57 + } 58 + 49 59 if (err) { 50 - drm_err(&tile_to_xe(tile)->drm, "PCODE Mailbox failed: %d %s", err, 51 - err_decode[err].str ?: "Unknown"); 52 - return err_decode[err].errno ?: -EPROTO; 60 + drm_err(&tile_to_xe(tile)->drm, "PCODE Mailbox failed: %d %s", 61 + err_decode, err_str); 62 + 63 + return err_decode; 53 64 } 54 65 55 66 return 0; 67 + #undef CASE_ERR 56 68 } 57 69 58 70 static int __pcode_mailbox_rw(struct xe_tile *tile, u32 mbox, u32 *data0, u32 *data1,
-6
drivers/gpu/drm/xe/xe_pcode_api.h
··· 92 92 #define BMG_PCIE_CAP XE_REG(0x138340) 93 93 #define LINK_DOWNGRADE REG_GENMASK(1, 0) 94 94 #define DOWNGRADE_CAPABLE 2 95 - 96 - struct pcode_err_decode { 97 - int errno; 98 - const char *str; 99 - }; 100 -
+1 -1
drivers/gpu/drm/xe/xe_pm.c
··· 112 112 } 113 113 114 114 /** 115 - * xe_pm_might_block_on_suspend() - Block pending suspend. 115 + * xe_pm_block_on_suspend() - Block pending suspend. 116 116 * @xe: The xe device about to be suspended. 117 117 * 118 118 * Block if the pm notifier has start evicting bos, to avoid
+21
drivers/gpu/drm/xe/xe_sa.c
··· 110 110 return drm_suballoc_new(&sa_manager->base, size, gfp, true, 0); 111 111 } 112 112 113 + /** 114 + * xe_sa_bo_flush_write() - Copy the data from the sub-allocation to the GPU memory. 115 + * @sa_bo: the &drm_suballoc to flush 116 + */ 113 117 void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo) 114 118 { 115 119 struct xe_sa_manager *sa_manager = to_xe_sa_manager(sa_bo->manager); ··· 125 121 xe_map_memcpy_to(xe, &sa_manager->bo->vmap, drm_suballoc_soffset(sa_bo), 126 122 xe_sa_bo_cpu_addr(sa_bo), 127 123 drm_suballoc_size(sa_bo)); 124 + } 125 + 126 + /** 127 + * xe_sa_bo_sync_read() - Copy the data from GPU memory to the sub-allocation. 128 + * @sa_bo: the &drm_suballoc to sync 129 + */ 130 + void xe_sa_bo_sync_read(struct drm_suballoc *sa_bo) 131 + { 132 + struct xe_sa_manager *sa_manager = to_xe_sa_manager(sa_bo->manager); 133 + struct xe_device *xe = tile_to_xe(sa_manager->bo->tile); 134 + 135 + if (!sa_manager->bo->vmap.is_iomem) 136 + return; 137 + 138 + xe_map_memcpy_from(xe, xe_sa_bo_cpu_addr(sa_bo), &sa_manager->bo->vmap, 139 + drm_suballoc_soffset(sa_bo), 140 + drm_suballoc_size(sa_bo)); 128 141 } 129 142 130 143 void xe_sa_bo_free(struct drm_suballoc *sa_bo,
+1
drivers/gpu/drm/xe/xe_sa.h
··· 37 37 } 38 38 39 39 void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo); 40 + void xe_sa_bo_sync_read(struct drm_suballoc *sa_bo); 40 41 void xe_sa_bo_free(struct drm_suballoc *sa_bo, struct dma_fence *fence); 41 42 42 43 static inline struct xe_sa_manager *
+520
drivers/gpu/drm/xe/xe_sriov_packet.c
··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2025 Intel Corporation 4 + */ 5 + 6 + #include "xe_bo.h" 7 + #include "xe_device.h" 8 + #include "xe_guc_klv_helpers.h" 9 + #include "xe_printk.h" 10 + #include "xe_sriov_packet.h" 11 + #include "xe_sriov_packet_types.h" 12 + #include "xe_sriov_pf_helpers.h" 13 + #include "xe_sriov_pf_migration.h" 14 + #include "xe_sriov_printk.h" 15 + 16 + static struct mutex *pf_migration_mutex(struct xe_device *xe, unsigned int vfid) 17 + { 18 + xe_assert(xe, IS_SRIOV_PF(xe)); 19 + xe_assert(xe, vfid <= xe_sriov_pf_get_totalvfs(xe)); 20 + 21 + return &xe->sriov.pf.vfs[vfid].migration.lock; 22 + } 23 + 24 + static struct xe_sriov_packet **pf_pick_pending(struct xe_device *xe, unsigned int vfid) 25 + { 26 + xe_assert(xe, IS_SRIOV_PF(xe)); 27 + xe_assert(xe, vfid <= xe_sriov_pf_get_totalvfs(xe)); 28 + lockdep_assert_held(pf_migration_mutex(xe, vfid)); 29 + 30 + return &xe->sriov.pf.vfs[vfid].migration.pending; 31 + } 32 + 33 + static struct xe_sriov_packet ** 34 + pf_pick_descriptor(struct xe_device *xe, unsigned int vfid) 35 + { 36 + xe_assert(xe, IS_SRIOV_PF(xe)); 37 + xe_assert(xe, vfid <= xe_sriov_pf_get_totalvfs(xe)); 38 + lockdep_assert_held(pf_migration_mutex(xe, vfid)); 39 + 40 + return &xe->sriov.pf.vfs[vfid].migration.descriptor; 41 + } 42 + 43 + static struct xe_sriov_packet **pf_pick_trailer(struct xe_device *xe, unsigned int vfid) 44 + { 45 + xe_assert(xe, IS_SRIOV_PF(xe)); 46 + xe_assert(xe, vfid <= xe_sriov_pf_get_totalvfs(xe)); 47 + lockdep_assert_held(pf_migration_mutex(xe, vfid)); 48 + 49 + return &xe->sriov.pf.vfs[vfid].migration.trailer; 50 + } 51 + 52 + static struct xe_sriov_packet **pf_pick_read_packet(struct xe_device *xe, 53 + unsigned int vfid) 54 + { 55 + struct xe_sriov_packet **data; 56 + 57 + data = pf_pick_descriptor(xe, vfid); 58 + if (*data) 59 + return data; 60 + 61 + data = pf_pick_pending(xe, vfid); 62 + if (!*data) 63 + *data = xe_sriov_pf_migration_save_consume(xe, vfid); 64 + if (*data) 65 + return data; 66 + 67 + data = pf_pick_trailer(xe, vfid); 68 + if (*data) 69 + return data; 70 + 71 + return NULL; 72 + } 73 + 74 + static bool pkt_needs_bo(struct xe_sriov_packet *data) 75 + { 76 + return data->hdr.type == XE_SRIOV_PACKET_TYPE_VRAM; 77 + } 78 + 79 + /** 80 + * xe_sriov_packet_alloc() - Allocate migration data packet 81 + * @xe: the &xe_device 82 + * 83 + * Only allocates the "outer" structure, without initializing the migration 84 + * data backing storage. 85 + * 86 + * Return: Pointer to &xe_sriov_packet on success, 87 + * NULL in case of error. 88 + */ 89 + struct xe_sriov_packet *xe_sriov_packet_alloc(struct xe_device *xe) 90 + { 91 + struct xe_sriov_packet *data; 92 + 93 + data = kzalloc(sizeof(*data), GFP_KERNEL); 94 + if (!data) 95 + return NULL; 96 + 97 + data->xe = xe; 98 + data->hdr_remaining = sizeof(data->hdr); 99 + 100 + return data; 101 + } 102 + 103 + /** 104 + * xe_sriov_packet_free() - Free migration data packet. 105 + * @data: the &xe_sriov_packet 106 + */ 107 + void xe_sriov_packet_free(struct xe_sriov_packet *data) 108 + { 109 + if (IS_ERR_OR_NULL(data)) 110 + return; 111 + 112 + if (pkt_needs_bo(data)) 113 + xe_bo_unpin_map_no_vm(data->bo); 114 + else 115 + kvfree(data->buff); 116 + 117 + kfree(data); 118 + } 119 + 120 + static int pkt_init(struct xe_sriov_packet *data) 121 + { 122 + struct xe_gt *gt = xe_device_get_gt(data->xe, data->hdr.gt_id); 123 + 124 + if (!gt) 125 + return -EINVAL; 126 + 127 + if (data->hdr.size == 0) 128 + return 0; 129 + 130 + if (pkt_needs_bo(data)) { 131 + struct xe_bo *bo; 132 + 133 + bo = xe_bo_create_pin_map_novm(data->xe, gt->tile, PAGE_ALIGN(data->hdr.size), 134 + ttm_bo_type_kernel, 135 + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED, false); 136 + if (IS_ERR(bo)) 137 + return PTR_ERR(bo); 138 + 139 + data->bo = bo; 140 + data->vaddr = bo->vmap.vaddr; 141 + } else { 142 + void *buff = kvzalloc(data->hdr.size, GFP_KERNEL); 143 + 144 + if (!buff) 145 + return -ENOMEM; 146 + 147 + data->buff = buff; 148 + data->vaddr = buff; 149 + } 150 + 151 + return 0; 152 + } 153 + 154 + #define XE_SRIOV_PACKET_SUPPORTED_VERSION 1 155 + 156 + /** 157 + * xe_sriov_packet_init() - Initialize migration packet header and backing storage. 158 + * @data: the &xe_sriov_packet 159 + * @tile_id: tile identifier 160 + * @gt_id: GT identifier 161 + * @type: &xe_sriov_packet_type 162 + * @offset: offset of data packet payload (within wider resource) 163 + * @size: size of data packet payload 164 + * 165 + * Return: 0 on success or a negative error code on failure. 166 + */ 167 + int xe_sriov_packet_init(struct xe_sriov_packet *data, u8 tile_id, u8 gt_id, 168 + enum xe_sriov_packet_type type, loff_t offset, size_t size) 169 + { 170 + data->hdr.version = XE_SRIOV_PACKET_SUPPORTED_VERSION; 171 + data->hdr.type = type; 172 + data->hdr.tile_id = tile_id; 173 + data->hdr.gt_id = gt_id; 174 + data->hdr.offset = offset; 175 + data->hdr.size = size; 176 + data->remaining = size; 177 + 178 + return pkt_init(data); 179 + } 180 + 181 + /** 182 + * xe_sriov_packet_init_from_hdr() - Initialize migration packet backing storage based on header. 183 + * @data: the &xe_sriov_packet 184 + * 185 + * Header data is expected to be filled prior to calling this function. 186 + * 187 + * Return: 0 on success or a negative error code on failure. 188 + */ 189 + int xe_sriov_packet_init_from_hdr(struct xe_sriov_packet *data) 190 + { 191 + xe_assert(data->xe, !data->hdr_remaining); 192 + 193 + if (data->hdr.version != XE_SRIOV_PACKET_SUPPORTED_VERSION) 194 + return -EINVAL; 195 + 196 + data->remaining = data->hdr.size; 197 + 198 + return pkt_init(data); 199 + } 200 + 201 + static ssize_t pkt_hdr_read(struct xe_sriov_packet *data, 202 + char __user *buf, size_t len) 203 + { 204 + loff_t offset = sizeof(data->hdr) - data->hdr_remaining; 205 + 206 + if (!data->hdr_remaining) 207 + return -EINVAL; 208 + 209 + if (len > data->hdr_remaining) 210 + len = data->hdr_remaining; 211 + 212 + if (copy_to_user(buf, (void *)&data->hdr + offset, len)) 213 + return -EFAULT; 214 + 215 + data->hdr_remaining -= len; 216 + 217 + return len; 218 + } 219 + 220 + static ssize_t pkt_data_read(struct xe_sriov_packet *data, 221 + char __user *buf, size_t len) 222 + { 223 + if (len > data->remaining) 224 + len = data->remaining; 225 + 226 + if (copy_to_user(buf, data->vaddr + (data->hdr.size - data->remaining), len)) 227 + return -EFAULT; 228 + 229 + data->remaining -= len; 230 + 231 + return len; 232 + } 233 + 234 + static ssize_t pkt_read_single(struct xe_sriov_packet **data, 235 + unsigned int vfid, char __user *buf, size_t len) 236 + { 237 + ssize_t copied = 0; 238 + 239 + if ((*data)->hdr_remaining) 240 + copied = pkt_hdr_read(*data, buf, len); 241 + else 242 + copied = pkt_data_read(*data, buf, len); 243 + 244 + if ((*data)->remaining == 0 && (*data)->hdr_remaining == 0) { 245 + xe_sriov_packet_free(*data); 246 + *data = NULL; 247 + } 248 + 249 + return copied; 250 + } 251 + 252 + /** 253 + * xe_sriov_packet_read_single() - Read migration data from a single packet. 254 + * @xe: the &xe_device 255 + * @vfid: the VF identifier 256 + * @buf: start address of userspace buffer 257 + * @len: requested read size from userspace 258 + * 259 + * Return: number of bytes that has been successfully read, 260 + * 0 if no more migration data is available, 261 + * -errno on failure. 262 + */ 263 + ssize_t xe_sriov_packet_read_single(struct xe_device *xe, unsigned int vfid, 264 + char __user *buf, size_t len) 265 + { 266 + struct xe_sriov_packet **data = pf_pick_read_packet(xe, vfid); 267 + 268 + if (!data) 269 + return -ENODATA; 270 + if (IS_ERR(*data)) 271 + return PTR_ERR(*data); 272 + 273 + return pkt_read_single(data, vfid, buf, len); 274 + } 275 + 276 + static ssize_t pkt_hdr_write(struct xe_sriov_packet *data, 277 + const char __user *buf, size_t len) 278 + { 279 + loff_t offset = sizeof(data->hdr) - data->hdr_remaining; 280 + int ret; 281 + 282 + if (len > data->hdr_remaining) 283 + len = data->hdr_remaining; 284 + 285 + if (copy_from_user((void *)&data->hdr + offset, buf, len)) 286 + return -EFAULT; 287 + 288 + data->hdr_remaining -= len; 289 + 290 + if (!data->hdr_remaining) { 291 + ret = xe_sriov_packet_init_from_hdr(data); 292 + if (ret) 293 + return ret; 294 + } 295 + 296 + return len; 297 + } 298 + 299 + static ssize_t pkt_data_write(struct xe_sriov_packet *data, 300 + const char __user *buf, size_t len) 301 + { 302 + if (len > data->remaining) 303 + len = data->remaining; 304 + 305 + if (copy_from_user(data->vaddr + (data->hdr.size - data->remaining), buf, len)) 306 + return -EFAULT; 307 + 308 + data->remaining -= len; 309 + 310 + return len; 311 + } 312 + 313 + /** 314 + * xe_sriov_packet_write_single() - Write migration data to a single packet. 315 + * @xe: the &xe_device 316 + * @vfid: the VF identifier 317 + * @buf: start address of userspace buffer 318 + * @len: requested write size from userspace 319 + * 320 + * Return: number of bytes that has been successfully written, 321 + * -errno on failure. 322 + */ 323 + ssize_t xe_sriov_packet_write_single(struct xe_device *xe, unsigned int vfid, 324 + const char __user *buf, size_t len) 325 + { 326 + struct xe_sriov_packet **data = pf_pick_pending(xe, vfid); 327 + int ret; 328 + ssize_t copied; 329 + 330 + if (IS_ERR_OR_NULL(*data)) { 331 + *data = xe_sriov_packet_alloc(xe); 332 + if (!*data) 333 + return -ENOMEM; 334 + } 335 + 336 + if ((*data)->hdr_remaining) 337 + copied = pkt_hdr_write(*data, buf, len); 338 + else 339 + copied = pkt_data_write(*data, buf, len); 340 + 341 + if ((*data)->hdr_remaining == 0 && (*data)->remaining == 0) { 342 + ret = xe_sriov_pf_migration_restore_produce(xe, vfid, *data); 343 + if (ret) { 344 + xe_sriov_packet_free(*data); 345 + return ret; 346 + } 347 + 348 + *data = NULL; 349 + } 350 + 351 + return copied; 352 + } 353 + 354 + #define MIGRATION_KLV_DEVICE_DEVID_KEY 0xf001u 355 + #define MIGRATION_KLV_DEVICE_DEVID_LEN 1u 356 + #define MIGRATION_KLV_DEVICE_REVID_KEY 0xf002u 357 + #define MIGRATION_KLV_DEVICE_REVID_LEN 1u 358 + 359 + #define MIGRATION_DESCRIPTOR_DWORDS (GUC_KLV_LEN_MIN + MIGRATION_KLV_DEVICE_DEVID_LEN + \ 360 + GUC_KLV_LEN_MIN + MIGRATION_KLV_DEVICE_REVID_LEN) 361 + static size_t pf_descriptor_init(struct xe_device *xe, unsigned int vfid) 362 + { 363 + struct xe_sriov_packet **desc = pf_pick_descriptor(xe, vfid); 364 + struct xe_sriov_packet *data; 365 + unsigned int len = 0; 366 + u32 *klvs; 367 + int ret; 368 + 369 + data = xe_sriov_packet_alloc(xe); 370 + if (!data) 371 + return -ENOMEM; 372 + 373 + ret = xe_sriov_packet_init(data, 0, 0, XE_SRIOV_PACKET_TYPE_DESCRIPTOR, 374 + 0, MIGRATION_DESCRIPTOR_DWORDS * sizeof(u32)); 375 + if (ret) { 376 + xe_sriov_packet_free(data); 377 + return ret; 378 + } 379 + 380 + klvs = data->vaddr; 381 + klvs[len++] = PREP_GUC_KLV_CONST(MIGRATION_KLV_DEVICE_DEVID_KEY, 382 + MIGRATION_KLV_DEVICE_DEVID_LEN); 383 + klvs[len++] = xe->info.devid; 384 + klvs[len++] = PREP_GUC_KLV_CONST(MIGRATION_KLV_DEVICE_REVID_KEY, 385 + MIGRATION_KLV_DEVICE_REVID_LEN); 386 + klvs[len++] = xe->info.revid; 387 + 388 + xe_assert(xe, len == MIGRATION_DESCRIPTOR_DWORDS); 389 + 390 + *desc = data; 391 + 392 + return 0; 393 + } 394 + 395 + /** 396 + * xe_sriov_packet_process_descriptor() - Process migration data descriptor packet. 397 + * @xe: the &xe_device 398 + * @vfid: the VF identifier 399 + * @data: the &xe_sriov_packet containing the descriptor 400 + * 401 + * The descriptor uses the same KLV format as GuC, and contains metadata used for 402 + * checking migration data compatibility. 403 + * 404 + * Return: 0 on success, -errno on failure. 405 + */ 406 + int xe_sriov_packet_process_descriptor(struct xe_device *xe, unsigned int vfid, 407 + struct xe_sriov_packet *data) 408 + { 409 + u32 num_dwords = data->hdr.size / sizeof(u32); 410 + u32 *klvs = data->vaddr; 411 + 412 + xe_assert(xe, data->hdr.type == XE_SRIOV_PACKET_TYPE_DESCRIPTOR); 413 + 414 + if (data->hdr.size % sizeof(u32)) { 415 + xe_sriov_warn(xe, "Aborting migration, descriptor not in KLV format (size=%llu)\n", 416 + data->hdr.size); 417 + return -EINVAL; 418 + } 419 + 420 + while (num_dwords >= GUC_KLV_LEN_MIN) { 421 + u32 key = FIELD_GET(GUC_KLV_0_KEY, klvs[0]); 422 + u32 len = FIELD_GET(GUC_KLV_0_LEN, klvs[0]); 423 + 424 + klvs += GUC_KLV_LEN_MIN; 425 + num_dwords -= GUC_KLV_LEN_MIN; 426 + 427 + if (len > num_dwords) { 428 + xe_sriov_warn(xe, "Aborting migration, truncated KLV %#x, len %u\n", 429 + key, len); 430 + return -EINVAL; 431 + } 432 + 433 + switch (key) { 434 + case MIGRATION_KLV_DEVICE_DEVID_KEY: 435 + if (*klvs != xe->info.devid) { 436 + xe_sriov_warn(xe, 437 + "Aborting migration, devid mismatch %#06x!=%#06x\n", 438 + *klvs, xe->info.devid); 439 + return -ENODEV; 440 + } 441 + break; 442 + case MIGRATION_KLV_DEVICE_REVID_KEY: 443 + if (*klvs != xe->info.revid) { 444 + xe_sriov_warn(xe, 445 + "Aborting migration, revid mismatch %#06x!=%#06x\n", 446 + *klvs, xe->info.revid); 447 + return -ENODEV; 448 + } 449 + break; 450 + default: 451 + xe_sriov_dbg(xe, 452 + "Skipping unknown migration KLV %#x, len=%u\n", 453 + key, len); 454 + print_hex_dump_bytes("desc: ", DUMP_PREFIX_OFFSET, klvs, 455 + min(SZ_64, len * sizeof(u32))); 456 + break; 457 + } 458 + 459 + klvs += len; 460 + num_dwords -= len; 461 + } 462 + 463 + return 0; 464 + } 465 + 466 + static void pf_pending_init(struct xe_device *xe, unsigned int vfid) 467 + { 468 + struct xe_sriov_packet **data = pf_pick_pending(xe, vfid); 469 + 470 + *data = NULL; 471 + } 472 + 473 + #define MIGRATION_TRAILER_SIZE 0 474 + static int pf_trailer_init(struct xe_device *xe, unsigned int vfid) 475 + { 476 + struct xe_sriov_packet **trailer = pf_pick_trailer(xe, vfid); 477 + struct xe_sriov_packet *data; 478 + int ret; 479 + 480 + data = xe_sriov_packet_alloc(xe); 481 + if (!data) 482 + return -ENOMEM; 483 + 484 + ret = xe_sriov_packet_init(data, 0, 0, XE_SRIOV_PACKET_TYPE_TRAILER, 485 + 0, MIGRATION_TRAILER_SIZE); 486 + if (ret) { 487 + xe_sriov_packet_free(data); 488 + return ret; 489 + } 490 + 491 + *trailer = data; 492 + 493 + return 0; 494 + } 495 + 496 + /** 497 + * xe_sriov_packet_save_init() - Initialize the pending save migration packets. 498 + * @xe: the &xe_device 499 + * @vfid: the VF identifier 500 + * 501 + * Return: 0 on success, -errno on failure. 502 + */ 503 + int xe_sriov_packet_save_init(struct xe_device *xe, unsigned int vfid) 504 + { 505 + int ret; 506 + 507 + scoped_cond_guard(mutex_intr, return -EINTR, pf_migration_mutex(xe, vfid)) { 508 + ret = pf_descriptor_init(xe, vfid); 509 + if (ret) 510 + return ret; 511 + 512 + ret = pf_trailer_init(xe, vfid); 513 + if (ret) 514 + return ret; 515 + 516 + pf_pending_init(xe, vfid); 517 + } 518 + 519 + return 0; 520 + }
+30
drivers/gpu/drm/xe/xe_sriov_packet.h
··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2025 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_SRIOV_PACKET_H_ 7 + #define _XE_SRIOV_PACKET_H_ 8 + 9 + #include <linux/types.h> 10 + 11 + struct xe_device; 12 + struct xe_sriov_packet; 13 + enum xe_sriov_packet_type; 14 + 15 + struct xe_sriov_packet *xe_sriov_packet_alloc(struct xe_device *xe); 16 + void xe_sriov_packet_free(struct xe_sriov_packet *data); 17 + 18 + int xe_sriov_packet_init(struct xe_sriov_packet *data, u8 tile_id, u8 gt_id, 19 + enum xe_sriov_packet_type, loff_t offset, size_t size); 20 + int xe_sriov_packet_init_from_hdr(struct xe_sriov_packet *data); 21 + 22 + ssize_t xe_sriov_packet_read_single(struct xe_device *xe, unsigned int vfid, 23 + char __user *buf, size_t len); 24 + ssize_t xe_sriov_packet_write_single(struct xe_device *xe, unsigned int vfid, 25 + const char __user *buf, size_t len); 26 + int xe_sriov_packet_save_init(struct xe_device *xe, unsigned int vfid); 27 + int xe_sriov_packet_process_descriptor(struct xe_device *xe, unsigned int vfid, 28 + struct xe_sriov_packet *data); 29 + 30 + #endif
+75
drivers/gpu/drm/xe/xe_sriov_packet_types.h
··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2025 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_SRIOV_PACKET_TYPES_H_ 7 + #define _XE_SRIOV_PACKET_TYPES_H_ 8 + 9 + #include <linux/types.h> 10 + 11 + /** 12 + * enum xe_sriov_packet_type - Xe SR-IOV VF migration data packet type 13 + * @XE_SRIOV_PACKET_TYPE_DESCRIPTOR: Descriptor with VF device metadata 14 + * @XE_SRIOV_PACKET_TYPE_TRAILER: Trailer indicating end-of-stream 15 + * @XE_SRIOV_PACKET_TYPE_GGTT: Global GTT migration data 16 + * @XE_SRIOV_PACKET_TYPE_MMIO: MMIO registers migration data 17 + * @XE_SRIOV_PACKET_TYPE_GUC: GuC firmware migration data 18 + * @XE_SRIOV_PACKET_TYPE_VRAM: VRAM migration data 19 + */ 20 + enum xe_sriov_packet_type { 21 + /* Skipping 0 to catch uninitialized data */ 22 + XE_SRIOV_PACKET_TYPE_DESCRIPTOR = 1, 23 + XE_SRIOV_PACKET_TYPE_TRAILER, 24 + XE_SRIOV_PACKET_TYPE_GGTT, 25 + XE_SRIOV_PACKET_TYPE_MMIO, 26 + XE_SRIOV_PACKET_TYPE_GUC, 27 + XE_SRIOV_PACKET_TYPE_VRAM, 28 + }; 29 + 30 + /** 31 + * struct xe_sriov_packet_hdr - Xe SR-IOV VF migration data packet header 32 + */ 33 + struct xe_sriov_packet_hdr { 34 + /** @version: migration data protocol version */ 35 + u8 version; 36 + /** @type: migration data type */ 37 + u8 type; 38 + /** @tile_id: migration data tile id */ 39 + u8 tile_id; 40 + /** @gt_id: migration data gt id */ 41 + u8 gt_id; 42 + /** @flags: migration data flags */ 43 + u32 flags; 44 + /** 45 + * @offset: offset into the resource; 46 + * used when multiple packets of given type are used for migration 47 + */ 48 + u64 offset; 49 + /** @size: migration data size */ 50 + u64 size; 51 + } __packed; 52 + 53 + /** 54 + * struct xe_sriov_packet - Xe SR-IOV VF migration data packet 55 + */ 56 + struct xe_sriov_packet { 57 + /** @xe: the PF &xe_device this data packet belongs to */ 58 + struct xe_device *xe; 59 + /** @vaddr: CPU pointer to payload data */ 60 + void *vaddr; 61 + /** @remaining: payload data remaining */ 62 + size_t remaining; 63 + /** @hdr_remaining: header data remaining */ 64 + size_t hdr_remaining; 65 + union { 66 + /** @bo: Buffer object with migration data */ 67 + struct xe_bo *bo; 68 + /** @buff: Buffer with migration data */ 69 + void *buff; 70 + }; 71 + /** @hdr: data packet header */ 72 + struct xe_sriov_packet_hdr hdr; 73 + }; 74 + 75 + #endif
+102
drivers/gpu/drm/xe/xe_sriov_pf.c
··· 15 15 #include "xe_sriov.h" 16 16 #include "xe_sriov_pf.h" 17 17 #include "xe_sriov_pf_helpers.h" 18 + #include "xe_sriov_pf_migration.h" 18 19 #include "xe_sriov_pf_service.h" 19 20 #include "xe_sriov_pf_sysfs.h" 20 21 #include "xe_sriov_printk.h" ··· 103 102 if (err) 104 103 return err; 105 104 105 + err = xe_sriov_pf_migration_init(xe); 106 + if (err) 107 + return err; 108 + 109 + xe_guard_init(&xe->sriov.pf.guard_vfs_enabling, "vfs_enabling"); 110 + 106 111 xe_sriov_pf_service_init(xe); 107 112 108 113 return 0; ··· 167 160 } 168 161 169 162 return 0; 163 + } 164 + 165 + /** 166 + * xe_sriov_pf_arm_guard() - Arm the guard for exclusive/lockdown mode. 167 + * @xe: the PF &xe_device 168 + * @guard: the &xe_guard to arm 169 + * @lockdown: arm for lockdown(true) or exclusive(false) mode 170 + * @who: the address of the new owner, or NULL if it's a caller 171 + * 172 + * This function can only be called on PF. 173 + * 174 + * It is a simple wrapper for xe_guard_arm() with additional debug 175 + * messages. 176 + * 177 + * Return: 0 on success or a negative error code on failure. 178 + */ 179 + int xe_sriov_pf_arm_guard(struct xe_device *xe, struct xe_guard *guard, 180 + bool lockdown, void *who) 181 + { 182 + void *new_owner = who ?: __builtin_return_address(0); 183 + int err; 184 + 185 + err = xe_guard_arm(guard, lockdown, new_owner); 186 + if (err) { 187 + xe_sriov_dbg(xe, "%s/%s mode denied (%pe) last owner %ps\n", 188 + guard->name, xe_guard_mode_str(lockdown), 189 + ERR_PTR(err), guard->owner); 190 + return err; 191 + } 192 + 193 + xe_sriov_dbg_verbose(xe, "%s/%s by %ps\n", 194 + guard->name, xe_guard_mode_str(lockdown), 195 + new_owner); 196 + return 0; 197 + } 198 + 199 + /** 200 + * xe_sriov_pf_disarm_guard() - Disarm the guard. 201 + * @xe: the PF &xe_device 202 + * @guard: the &xe_guard to disarm 203 + * @lockdown: disarm from lockdown(true) or exclusive(false) mode 204 + * @who: the address of the indirect owner, or NULL if it's a caller 205 + * 206 + * This function can only be called on PF. 207 + * 208 + * It is a simple wrapper for xe_guard_disarm() with additional debug 209 + * messages and xe_assert() to easily catch any illegal calls. 210 + */ 211 + void xe_sriov_pf_disarm_guard(struct xe_device *xe, struct xe_guard *guard, 212 + bool lockdown, void *who) 213 + { 214 + bool disarmed; 215 + 216 + xe_sriov_dbg_verbose(xe, "%s/%s by %ps\n", 217 + guard->name, xe_guard_mode_str(lockdown), 218 + who ?: __builtin_return_address(0)); 219 + 220 + disarmed = xe_guard_disarm(guard, lockdown); 221 + xe_assert_msg(xe, disarmed, "%s/%s not armed? last owner %ps", 222 + guard->name, xe_guard_mode_str(lockdown), guard->owner); 223 + } 224 + 225 + /** 226 + * xe_sriov_pf_lockdown() - Lockdown the PF to prevent VFs enabling. 227 + * @xe: the PF &xe_device 228 + * 229 + * This function can only be called on PF. 230 + * 231 + * Once the PF is locked down, it will not enable VFs. 232 + * If VFs are already enabled, the -EBUSY will be returned. 233 + * To allow the PF enable VFs again call xe_sriov_pf_end_lockdown(). 234 + * 235 + * Return: 0 on success or a negative error code on failure. 236 + */ 237 + int xe_sriov_pf_lockdown(struct xe_device *xe) 238 + { 239 + xe_assert(xe, IS_SRIOV_PF(xe)); 240 + 241 + return xe_sriov_pf_arm_guard(xe, &xe->sriov.pf.guard_vfs_enabling, true, 242 + __builtin_return_address(0)); 243 + } 244 + 245 + /** 246 + * xe_sriov_pf_end_lockdown() - Allow the PF to enable VFs again. 247 + * @xe: the PF &xe_device 248 + * 249 + * This function can only be called on PF. 250 + * See xe_sriov_pf_lockdown() for details. 251 + */ 252 + void xe_sriov_pf_end_lockdown(struct xe_device *xe) 253 + { 254 + xe_assert(xe, IS_SRIOV_PF(xe)); 255 + 256 + xe_sriov_pf_disarm_guard(xe, &xe->sriov.pf.guard_vfs_enabling, true, 257 + __builtin_return_address(0)); 170 258 } 171 259 172 260 /**
+4
drivers/gpu/drm/xe/xe_sriov_pf.h
··· 17 17 int xe_sriov_pf_init_early(struct xe_device *xe); 18 18 int xe_sriov_pf_init_late(struct xe_device *xe); 19 19 int xe_sriov_pf_wait_ready(struct xe_device *xe); 20 + int xe_sriov_pf_lockdown(struct xe_device *xe); 21 + void xe_sriov_pf_end_lockdown(struct xe_device *xe); 20 22 void xe_sriov_pf_print_vfs_summary(struct xe_device *xe, struct drm_printer *p); 21 23 #else 22 24 static inline bool xe_sriov_pf_readiness(struct xe_device *xe) { return false; } 23 25 static inline int xe_sriov_pf_init_early(struct xe_device *xe) { return 0; } 24 26 static inline int xe_sriov_pf_init_late(struct xe_device *xe) { return 0; } 27 + static inline int xe_sriov_pf_lockdown(struct xe_device *xe) { return 0; } 28 + static inline void xe_sriov_pf_end_lockdown(struct xe_device *xe) { } 25 29 #endif 26 30 27 31 #endif
+128
drivers/gpu/drm/xe/xe_sriov_pf_control.c
··· 5 5 6 6 #include "xe_device.h" 7 7 #include "xe_gt_sriov_pf_control.h" 8 + #include "xe_gt_sriov_pf_migration.h" 9 + #include "xe_sriov_packet.h" 8 10 #include "xe_sriov_pf_control.h" 9 11 #include "xe_sriov_printk.h" 10 12 ··· 124 122 } 125 123 126 124 /** 125 + * xe_sriov_pf_control_wait_flr() - Wait for a VF reset (FLR) to complete. 126 + * @xe: the &xe_device 127 + * @vfid: the VF identifier 128 + * 129 + * This function is for PF only. 130 + * 131 + * Return: 0 on success or a negative error code on failure. 132 + */ 133 + int xe_sriov_pf_control_wait_flr(struct xe_device *xe, unsigned int vfid) 134 + { 135 + struct xe_gt *gt; 136 + unsigned int id; 137 + int result = 0; 138 + int err; 139 + 140 + for_each_gt(gt, xe, id) { 141 + err = xe_gt_sriov_pf_control_wait_flr(gt, vfid); 142 + result = result ? -EUCLEAN : err; 143 + } 144 + 145 + return result; 146 + } 147 + 148 + /** 127 149 * xe_sriov_pf_control_sync_flr() - Synchronize a VF FLR between all GTs. 128 150 * @xe: the &xe_device 129 151 * @vfid: the VF identifier ··· 174 148 } 175 149 176 150 return 0; 151 + } 152 + 153 + /** 154 + * xe_sriov_pf_control_trigger_save_vf() - Start VF migration data SAVE sequence on all GTs. 155 + * @xe: the &xe_device 156 + * @vfid: the VF identifier 157 + * 158 + * This function is for PF only. 159 + * 160 + * Return: 0 on success or a negative error code on failure. 161 + */ 162 + int xe_sriov_pf_control_trigger_save_vf(struct xe_device *xe, unsigned int vfid) 163 + { 164 + struct xe_gt *gt; 165 + unsigned int id; 166 + int ret; 167 + 168 + ret = xe_sriov_packet_save_init(xe, vfid); 169 + if (ret) 170 + return ret; 171 + 172 + for_each_gt(gt, xe, id) { 173 + xe_gt_sriov_pf_migration_save_init(gt, vfid); 174 + 175 + ret = xe_gt_sriov_pf_control_trigger_save_vf(gt, vfid); 176 + if (ret) 177 + return ret; 178 + } 179 + 180 + return 0; 181 + } 182 + 183 + /** 184 + * xe_sriov_pf_control_finish_save_vf() - Complete VF migration data SAVE sequence on all GTs. 185 + * @xe: the &xe_device 186 + * @vfid: the VF identifier 187 + * 188 + * This function is for PF only. 189 + * 190 + * Return: 0 on success or a negative error code on failure. 191 + */ 192 + int xe_sriov_pf_control_finish_save_vf(struct xe_device *xe, unsigned int vfid) 193 + { 194 + struct xe_gt *gt; 195 + unsigned int id; 196 + int ret; 197 + 198 + for_each_gt(gt, xe, id) { 199 + ret = xe_gt_sriov_pf_control_finish_save_vf(gt, vfid); 200 + if (ret) 201 + break; 202 + } 203 + 204 + return ret; 205 + } 206 + 207 + /** 208 + * xe_sriov_pf_control_trigger_restore_vf() - Start VF migration data RESTORE sequence on all GTs. 209 + * @xe: the &xe_device 210 + * @vfid: the VF identifier 211 + * 212 + * This function is for PF only. 213 + * 214 + * Return: 0 on success or a negative error code on failure. 215 + */ 216 + int xe_sriov_pf_control_trigger_restore_vf(struct xe_device *xe, unsigned int vfid) 217 + { 218 + struct xe_gt *gt; 219 + unsigned int id; 220 + int ret; 221 + 222 + for_each_gt(gt, xe, id) { 223 + ret = xe_gt_sriov_pf_control_trigger_restore_vf(gt, vfid); 224 + if (ret) 225 + return ret; 226 + } 227 + 228 + return ret; 229 + } 230 + 231 + /** 232 + * xe_sriov_pf_control_finish_restore_vf() - Complete VF migration data RESTORE sequence on all GTs. 233 + * @xe: the &xe_device 234 + * @vfid: the VF identifier 235 + * 236 + * This function is for PF only. 237 + * 238 + * Return: 0 on success or a negative error code on failure. 239 + */ 240 + int xe_sriov_pf_control_finish_restore_vf(struct xe_device *xe, unsigned int vfid) 241 + { 242 + struct xe_gt *gt; 243 + unsigned int id; 244 + int ret; 245 + 246 + for_each_gt(gt, xe, id) { 247 + ret = xe_gt_sriov_pf_control_finish_restore_vf(gt, vfid); 248 + if (ret) 249 + break; 250 + } 251 + 252 + return ret; 177 253 }
+5
drivers/gpu/drm/xe/xe_sriov_pf_control.h
··· 12 12 int xe_sriov_pf_control_resume_vf(struct xe_device *xe, unsigned int vfid); 13 13 int xe_sriov_pf_control_stop_vf(struct xe_device *xe, unsigned int vfid); 14 14 int xe_sriov_pf_control_reset_vf(struct xe_device *xe, unsigned int vfid); 15 + int xe_sriov_pf_control_wait_flr(struct xe_device *xe, unsigned int vfid); 15 16 int xe_sriov_pf_control_sync_flr(struct xe_device *xe, unsigned int vfid); 17 + int xe_sriov_pf_control_trigger_save_vf(struct xe_device *xe, unsigned int vfid); 18 + int xe_sriov_pf_control_finish_save_vf(struct xe_device *xe, unsigned int vfid); 19 + int xe_sriov_pf_control_trigger_restore_vf(struct xe_device *xe, unsigned int vfid); 20 + int xe_sriov_pf_control_finish_restore_vf(struct xe_device *xe, unsigned int vfid); 16 21 17 22 #endif
+131
drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c
··· 13 13 #include "xe_sriov_pf_control.h" 14 14 #include "xe_sriov_pf_debugfs.h" 15 15 #include "xe_sriov_pf_helpers.h" 16 + #include "xe_sriov_pf_migration.h" 16 17 #include "xe_sriov_pf_provision.h" 17 18 #include "xe_sriov_pf_service.h" 18 19 #include "xe_sriov_printk.h" ··· 99 98 100 99 DEFINE_SRIOV_ATTRIBUTE(restore_auto_provisioning); 101 100 101 + static int lockdown_vfs_enabling_open(struct inode *inode, struct file *file) 102 + { 103 + struct dentry *dent = file_dentry(file); 104 + struct xe_device *xe = extract_xe(dent); 105 + ssize_t ret; 106 + 107 + ret = xe_sriov_pf_lockdown(xe); 108 + if (ret < 0) 109 + return ret; 110 + 111 + file->private_data = xe; 112 + return nonseekable_open(inode, file); 113 + } 114 + 115 + static int lockdown_vfs_enabling_release(struct inode *inode, struct file *file) 116 + { 117 + struct xe_device *xe = file->private_data; 118 + 119 + xe_sriov_pf_end_lockdown(xe); 120 + return 0; 121 + } 122 + 123 + static const struct file_operations lockdown_vfs_enabling_fops = { 124 + .owner = THIS_MODULE, 125 + .open = lockdown_vfs_enabling_open, 126 + .release = lockdown_vfs_enabling_release, 127 + }; 128 + 102 129 static void pf_populate_root(struct xe_device *xe, struct dentry *dent) 103 130 { 104 131 debugfs_create_file("restore_auto_provisioning", 0200, dent, xe, 105 132 &restore_auto_provisioning_fops); 133 + debugfs_create_file("lockdown_vfs_enabling", 0400, dent, xe, 134 + &lockdown_vfs_enabling_fops); 106 135 } 107 136 108 137 static int simple_show(struct seq_file *m, void *data) ··· 163 132 * /sys/kernel/debug/dri/BDF/ 164 133 * ├── sriov 165 134 * │ ├── vf1 135 + * │ │ ├── migration_data 166 136 * │ │ ├── pause 167 137 * │ │ ├── reset 168 138 * │ │ ├── resume 169 139 * │ │ ├── stop 140 + * │ │ ├── save 141 + * │ │ ├── restore 170 142 * │ │ : 171 143 * │ ├── vf2 172 144 * │ │ ├── ... 173 145 */ 146 + 147 + static int from_file_read_to_vf_call(struct seq_file *s, 148 + int (*call)(struct xe_device *, unsigned int)) 149 + { 150 + struct dentry *dent = file_dentry(s->file)->d_parent; 151 + struct xe_device *xe = extract_xe(dent); 152 + unsigned int vfid = extract_vfid(dent); 153 + int ret; 154 + 155 + xe_pm_runtime_get(xe); 156 + ret = call(xe, vfid); 157 + xe_pm_runtime_put(xe); 158 + 159 + if (ret < 0) 160 + return ret; 161 + 162 + return 0; 163 + } 174 164 175 165 static ssize_t from_file_write_to_vf_call(struct file *file, const char __user *userbuf, 176 166 size_t count, loff_t *ppos, ··· 231 179 } \ 232 180 DEFINE_SHOW_STORE_ATTRIBUTE(OP) 233 181 182 + #define DEFINE_VF_CONTROL_ATTRIBUTE_RW(OP) \ 183 + static int OP##_show(struct seq_file *s, void *unused) \ 184 + { \ 185 + return from_file_read_to_vf_call(s, \ 186 + xe_sriov_pf_control_finish_##OP); \ 187 + } \ 188 + static ssize_t OP##_write(struct file *file, const char __user *userbuf, \ 189 + size_t count, loff_t *ppos) \ 190 + { \ 191 + return from_file_write_to_vf_call(file, userbuf, count, ppos, \ 192 + xe_sriov_pf_control_trigger_##OP); \ 193 + } \ 194 + DEFINE_SHOW_STORE_ATTRIBUTE(OP) 195 + 234 196 DEFINE_VF_CONTROL_ATTRIBUTE(pause_vf); 235 197 DEFINE_VF_CONTROL_ATTRIBUTE(resume_vf); 236 198 DEFINE_VF_CONTROL_ATTRIBUTE(stop_vf); 237 199 DEFINE_VF_CONTROL_ATTRIBUTE(reset_vf); 200 + DEFINE_VF_CONTROL_ATTRIBUTE_RW(save_vf); 201 + DEFINE_VF_CONTROL_ATTRIBUTE_RW(restore_vf); 202 + 203 + static ssize_t data_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) 204 + { 205 + struct dentry *dent = file_dentry(file)->d_parent; 206 + struct xe_device *xe = extract_xe(dent); 207 + unsigned int vfid = extract_vfid(dent); 208 + 209 + if (*pos) 210 + return -ESPIPE; 211 + 212 + return xe_sriov_pf_migration_write(xe, vfid, buf, count); 213 + } 214 + 215 + static ssize_t data_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) 216 + { 217 + struct dentry *dent = file_dentry(file)->d_parent; 218 + struct xe_device *xe = extract_xe(dent); 219 + unsigned int vfid = extract_vfid(dent); 220 + 221 + if (*ppos) 222 + return -ESPIPE; 223 + 224 + return xe_sriov_pf_migration_read(xe, vfid, buf, count); 225 + } 226 + 227 + static const struct file_operations data_vf_fops = { 228 + .owner = THIS_MODULE, 229 + .open = simple_open, 230 + .write = data_write, 231 + .read = data_read, 232 + .llseek = default_llseek, 233 + }; 234 + 235 + static ssize_t size_read(struct file *file, char __user *ubuf, size_t count, loff_t *ppos) 236 + { 237 + struct dentry *dent = file_dentry(file)->d_parent; 238 + struct xe_device *xe = extract_xe(dent); 239 + unsigned int vfid = extract_vfid(dent); 240 + char buf[21]; 241 + ssize_t ret; 242 + int len; 243 + 244 + xe_pm_runtime_get(xe); 245 + ret = xe_sriov_pf_migration_size(xe, vfid); 246 + xe_pm_runtime_put(xe); 247 + if (ret < 0) 248 + return ret; 249 + 250 + len = scnprintf(buf, sizeof(buf), "%zd\n", ret); 251 + 252 + return simple_read_from_buffer(ubuf, count, ppos, buf, len); 253 + } 254 + 255 + static const struct file_operations size_vf_fops = { 256 + .owner = THIS_MODULE, 257 + .open = simple_open, 258 + .read = size_read, 259 + .llseek = default_llseek, 260 + }; 238 261 239 262 static void pf_populate_vf(struct xe_device *xe, struct dentry *vfdent) 240 263 { ··· 317 190 debugfs_create_file("resume", 0200, vfdent, xe, &resume_vf_fops); 318 191 debugfs_create_file("stop", 0200, vfdent, xe, &stop_vf_fops); 319 192 debugfs_create_file("reset", 0200, vfdent, xe, &reset_vf_fops); 193 + debugfs_create_file("save", 0600, vfdent, xe, &save_vf_fops); 194 + debugfs_create_file("restore", 0600, vfdent, xe, &restore_vf_fops); 195 + debugfs_create_file("migration_data", 0600, vfdent, xe, &data_vf_fops); 196 + debugfs_create_file("migration_size", 0400, vfdent, xe, &size_vf_fops); 320 197 } 321 198 322 199 static void pf_populate_with_tiles(struct xe_device *xe, struct dentry *dent, unsigned int vfid)
+16
drivers/gpu/drm/xe/xe_sriov_pf_helpers.h
··· 48 48 return pci_num_vf(to_pci_dev(xe->drm.dev)); 49 49 } 50 50 51 + /** 52 + * xe_sriov_pf_admin_only() - Check if PF is mainly used for VFs administration. 53 + * @xe: the PF &xe_device 54 + * 55 + * Return: True if PF is mainly used for VFs administration. 56 + */ 57 + static inline bool xe_sriov_pf_admin_only(const struct xe_device *xe) 58 + { 59 + return !xe->info.probe_display; 60 + } 61 + 51 62 static inline struct mutex *xe_sriov_pf_master_mutex(struct xe_device *xe) 52 63 { 53 64 xe_assert(xe, IS_SRIOV_PF(xe)); 54 65 return &xe->sriov.pf.master_lock; 55 66 } 67 + 68 + int xe_sriov_pf_arm_guard(struct xe_device *xe, struct xe_guard *guard, 69 + bool write, void *who); 70 + void xe_sriov_pf_disarm_guard(struct xe_device *xe, struct xe_guard *guard, 71 + bool write, void *who); 56 72 57 73 #endif
+342
drivers/gpu/drm/xe/xe_sriov_pf_migration.c
··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2025 Intel Corporation 4 + */ 5 + 6 + #include <drm/drm_managed.h> 7 + 8 + #include "xe_device.h" 9 + #include "xe_gt_sriov_pf_control.h" 10 + #include "xe_gt_sriov_pf_migration.h" 11 + #include "xe_pm.h" 12 + #include "xe_sriov.h" 13 + #include "xe_sriov_packet.h" 14 + #include "xe_sriov_packet_types.h" 15 + #include "xe_sriov_pf_helpers.h" 16 + #include "xe_sriov_pf_migration.h" 17 + #include "xe_sriov_printk.h" 18 + 19 + static struct xe_sriov_migration_state *pf_pick_migration(struct xe_device *xe, unsigned int vfid) 20 + { 21 + xe_assert(xe, IS_SRIOV_PF(xe)); 22 + xe_assert(xe, vfid <= xe_sriov_pf_get_totalvfs(xe)); 23 + 24 + return &xe->sriov.pf.vfs[vfid].migration; 25 + } 26 + 27 + /** 28 + * xe_sriov_pf_migration_waitqueue() - Get waitqueue for migration. 29 + * @xe: the &xe_device 30 + * @vfid: the VF identifier 31 + * 32 + * Return: pointer to the migration waitqueue. 33 + */ 34 + wait_queue_head_t *xe_sriov_pf_migration_waitqueue(struct xe_device *xe, unsigned int vfid) 35 + { 36 + return &pf_pick_migration(xe, vfid)->wq; 37 + } 38 + 39 + /** 40 + * xe_sriov_pf_migration_supported() - Check if SR-IOV VF migration is supported by the device 41 + * @xe: the &xe_device 42 + * 43 + * Return: true if migration is supported, false otherwise 44 + */ 45 + bool xe_sriov_pf_migration_supported(struct xe_device *xe) 46 + { 47 + xe_assert(xe, IS_SRIOV_PF(xe)); 48 + 49 + return xe->sriov.pf.migration.supported; 50 + } 51 + 52 + static bool pf_check_migration_support(struct xe_device *xe) 53 + { 54 + /* XXX: for now this is for feature enabling only */ 55 + return IS_ENABLED(CONFIG_DRM_XE_DEBUG); 56 + } 57 + 58 + static void pf_migration_cleanup(void *arg) 59 + { 60 + struct xe_sriov_migration_state *migration = arg; 61 + 62 + xe_sriov_packet_free(migration->pending); 63 + xe_sriov_packet_free(migration->trailer); 64 + xe_sriov_packet_free(migration->descriptor); 65 + } 66 + 67 + /** 68 + * xe_sriov_pf_migration_init() - Initialize support for SR-IOV VF migration. 69 + * @xe: the &xe_device 70 + * 71 + * Return: 0 on success or a negative error code on failure. 72 + */ 73 + int xe_sriov_pf_migration_init(struct xe_device *xe) 74 + { 75 + unsigned int n, totalvfs; 76 + int err; 77 + 78 + xe_assert(xe, IS_SRIOV_PF(xe)); 79 + 80 + xe->sriov.pf.migration.supported = pf_check_migration_support(xe); 81 + if (!xe_sriov_pf_migration_supported(xe)) 82 + return 0; 83 + 84 + totalvfs = xe_sriov_pf_get_totalvfs(xe); 85 + for (n = 1; n <= totalvfs; n++) { 86 + struct xe_sriov_migration_state *migration = pf_pick_migration(xe, n); 87 + 88 + err = drmm_mutex_init(&xe->drm, &migration->lock); 89 + if (err) 90 + return err; 91 + 92 + init_waitqueue_head(&migration->wq); 93 + 94 + err = devm_add_action_or_reset(xe->drm.dev, pf_migration_cleanup, migration); 95 + if (err) 96 + return err; 97 + } 98 + 99 + return 0; 100 + } 101 + 102 + static bool pf_migration_data_ready(struct xe_device *xe, unsigned int vfid) 103 + { 104 + struct xe_gt *gt; 105 + u8 gt_id; 106 + 107 + for_each_gt(gt, xe, gt_id) { 108 + if (xe_gt_sriov_pf_control_check_save_failed(gt, vfid) || 109 + xe_gt_sriov_pf_control_check_save_data_done(gt, vfid) || 110 + !xe_gt_sriov_pf_migration_ring_empty(gt, vfid)) 111 + return true; 112 + } 113 + 114 + return false; 115 + } 116 + 117 + static struct xe_sriov_packet * 118 + pf_migration_consume(struct xe_device *xe, unsigned int vfid) 119 + { 120 + struct xe_sriov_packet *data; 121 + bool more_data = false; 122 + struct xe_gt *gt; 123 + u8 gt_id; 124 + 125 + for_each_gt(gt, xe, gt_id) { 126 + data = xe_gt_sriov_pf_migration_save_consume(gt, vfid); 127 + if (data && PTR_ERR(data) != EAGAIN) 128 + return data; 129 + if (PTR_ERR(data) == -EAGAIN) 130 + more_data = true; 131 + } 132 + 133 + if (!more_data) 134 + return NULL; 135 + 136 + return ERR_PTR(-EAGAIN); 137 + } 138 + 139 + /** 140 + * xe_sriov_pf_migration_save_consume() - Consume a VF migration data packet from the device. 141 + * @xe: the &xe_device 142 + * @vfid: the VF identifier 143 + * 144 + * Called by the save migration data consumer (userspace) when 145 + * processing migration data. 146 + * If there is no migration data to process, wait until more data is available. 147 + * 148 + * Return: Pointer to &xe_sriov_packet on success, 149 + * NULL if ring is empty and no more migration data is expected, 150 + * ERR_PTR value in case of error. 151 + * 152 + * Return: 0 on success or a negative error code on failure. 153 + */ 154 + struct xe_sriov_packet * 155 + xe_sriov_pf_migration_save_consume(struct xe_device *xe, unsigned int vfid) 156 + { 157 + struct xe_sriov_migration_state *migration = pf_pick_migration(xe, vfid); 158 + struct xe_sriov_packet *data; 159 + int ret; 160 + 161 + xe_assert(xe, IS_SRIOV_PF(xe)); 162 + 163 + for (;;) { 164 + data = pf_migration_consume(xe, vfid); 165 + if (PTR_ERR(data) != -EAGAIN) 166 + break; 167 + 168 + ret = wait_event_interruptible(migration->wq, 169 + pf_migration_data_ready(xe, vfid)); 170 + if (ret) 171 + return ERR_PTR(ret); 172 + } 173 + 174 + return data; 175 + } 176 + 177 + static int pf_handle_descriptor(struct xe_device *xe, unsigned int vfid, 178 + struct xe_sriov_packet *data) 179 + { 180 + int ret; 181 + 182 + if (data->hdr.tile_id != 0 || data->hdr.gt_id != 0) 183 + return -EINVAL; 184 + 185 + ret = xe_sriov_packet_process_descriptor(xe, vfid, data); 186 + if (ret) 187 + return ret; 188 + 189 + xe_sriov_packet_free(data); 190 + 191 + return 0; 192 + } 193 + 194 + static int pf_handle_trailer(struct xe_device *xe, unsigned int vfid, 195 + struct xe_sriov_packet *data) 196 + { 197 + struct xe_gt *gt; 198 + u8 gt_id; 199 + 200 + if (data->hdr.tile_id != 0 || data->hdr.gt_id != 0) 201 + return -EINVAL; 202 + if (data->hdr.offset != 0 || data->hdr.size != 0 || data->buff || data->bo) 203 + return -EINVAL; 204 + 205 + xe_sriov_packet_free(data); 206 + 207 + for_each_gt(gt, xe, gt_id) 208 + xe_gt_sriov_pf_control_restore_data_done(gt, vfid); 209 + 210 + return 0; 211 + } 212 + 213 + /** 214 + * xe_sriov_pf_migration_restore_produce() - Produce a VF migration data packet to the device. 215 + * @xe: the &xe_device 216 + * @vfid: the VF identifier 217 + * @data: Pointer to &xe_sriov_packet 218 + * 219 + * Called by the restore migration data producer (userspace) when processing 220 + * migration data. 221 + * If the underlying data structure is full, wait until there is space. 222 + * 223 + * Return: 0 on success or a negative error code on failure. 224 + */ 225 + int xe_sriov_pf_migration_restore_produce(struct xe_device *xe, unsigned int vfid, 226 + struct xe_sriov_packet *data) 227 + { 228 + struct xe_gt *gt; 229 + 230 + xe_assert(xe, IS_SRIOV_PF(xe)); 231 + 232 + if (data->hdr.type == XE_SRIOV_PACKET_TYPE_DESCRIPTOR) 233 + return pf_handle_descriptor(xe, vfid, data); 234 + if (data->hdr.type == XE_SRIOV_PACKET_TYPE_TRAILER) 235 + return pf_handle_trailer(xe, vfid, data); 236 + 237 + gt = xe_device_get_gt(xe, data->hdr.gt_id); 238 + if (!gt || data->hdr.tile_id != gt->tile->id || data->hdr.type == 0) { 239 + xe_sriov_err_ratelimited(xe, "Received invalid restore packet for VF%u (type:%u, tile:%u, GT:%u)\n", 240 + vfid, data->hdr.type, data->hdr.tile_id, data->hdr.gt_id); 241 + return -EINVAL; 242 + } 243 + 244 + return xe_gt_sriov_pf_migration_restore_produce(gt, vfid, data); 245 + } 246 + 247 + /** 248 + * xe_sriov_pf_migration_read() - Read migration data from the device. 249 + * @xe: the &xe_device 250 + * @vfid: the VF identifier 251 + * @buf: start address of userspace buffer 252 + * @len: requested read size from userspace 253 + * 254 + * Return: number of bytes that has been successfully read, 255 + * 0 if no more migration data is available, 256 + * -errno on failure. 257 + */ 258 + ssize_t xe_sriov_pf_migration_read(struct xe_device *xe, unsigned int vfid, 259 + char __user *buf, size_t len) 260 + { 261 + struct xe_sriov_migration_state *migration = pf_pick_migration(xe, vfid); 262 + ssize_t ret, consumed = 0; 263 + 264 + xe_assert(xe, IS_SRIOV_PF(xe)); 265 + 266 + scoped_cond_guard(mutex_intr, return -EINTR, &migration->lock) { 267 + while (consumed < len) { 268 + ret = xe_sriov_packet_read_single(xe, vfid, buf, len - consumed); 269 + if (ret == -ENODATA) 270 + break; 271 + if (ret < 0) 272 + return ret; 273 + 274 + consumed += ret; 275 + buf += ret; 276 + } 277 + } 278 + 279 + return consumed; 280 + } 281 + 282 + /** 283 + * xe_sriov_pf_migration_write() - Write migration data to the device. 284 + * @xe: the &xe_device 285 + * @vfid: the VF identifier 286 + * @buf: start address of userspace buffer 287 + * @len: requested write size from userspace 288 + * 289 + * Return: number of bytes that has been successfully written, 290 + * -errno on failure. 291 + */ 292 + ssize_t xe_sriov_pf_migration_write(struct xe_device *xe, unsigned int vfid, 293 + const char __user *buf, size_t len) 294 + { 295 + struct xe_sriov_migration_state *migration = pf_pick_migration(xe, vfid); 296 + ssize_t ret, produced = 0; 297 + 298 + xe_assert(xe, IS_SRIOV_PF(xe)); 299 + 300 + scoped_cond_guard(mutex_intr, return -EINTR, &migration->lock) { 301 + while (produced < len) { 302 + ret = xe_sriov_packet_write_single(xe, vfid, buf, len - produced); 303 + if (ret < 0) 304 + return ret; 305 + 306 + produced += ret; 307 + buf += ret; 308 + } 309 + } 310 + 311 + return produced; 312 + } 313 + 314 + /** 315 + * xe_sriov_pf_migration_size() - Total size of migration data from all components within a device 316 + * @xe: the &xe_device 317 + * @vfid: the VF identifier (can't be 0) 318 + * 319 + * This function is for PF only. 320 + * 321 + * Return: total migration data size in bytes or a negative error code on failure. 322 + */ 323 + ssize_t xe_sriov_pf_migration_size(struct xe_device *xe, unsigned int vfid) 324 + { 325 + size_t size = 0; 326 + struct xe_gt *gt; 327 + ssize_t ret; 328 + u8 gt_id; 329 + 330 + xe_assert(xe, IS_SRIOV_PF(xe)); 331 + xe_assert(xe, vfid); 332 + 333 + for_each_gt(gt, xe, gt_id) { 334 + ret = xe_gt_sriov_pf_migration_size(gt, vfid); 335 + if (ret < 0) 336 + return ret; 337 + 338 + size += ret; 339 + } 340 + 341 + return size; 342 + }
+29
drivers/gpu/drm/xe/xe_sriov_pf_migration.h
··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2025 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_SRIOV_PF_MIGRATION_H_ 7 + #define _XE_SRIOV_PF_MIGRATION_H_ 8 + 9 + #include <linux/types.h> 10 + #include <linux/wait.h> 11 + 12 + struct xe_device; 13 + struct xe_sriov_packet; 14 + 15 + int xe_sriov_pf_migration_init(struct xe_device *xe); 16 + bool xe_sriov_pf_migration_supported(struct xe_device *xe); 17 + int xe_sriov_pf_migration_restore_produce(struct xe_device *xe, unsigned int vfid, 18 + struct xe_sriov_packet *data); 19 + struct xe_sriov_packet * 20 + xe_sriov_pf_migration_save_consume(struct xe_device *xe, unsigned int vfid); 21 + ssize_t xe_sriov_pf_migration_size(struct xe_device *xe, unsigned int vfid); 22 + wait_queue_head_t *xe_sriov_pf_migration_waitqueue(struct xe_device *xe, unsigned int vfid); 23 + 24 + ssize_t xe_sriov_pf_migration_read(struct xe_device *xe, unsigned int vfid, 25 + char __user *buf, size_t len); 26 + ssize_t xe_sriov_pf_migration_write(struct xe_device *xe, unsigned int vfid, 27 + const char __user *buf, size_t len); 28 + 29 + #endif
+37
drivers/gpu/drm/xe/xe_sriov_pf_migration_types.h
··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2025 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_SRIOV_PF_MIGRATION_TYPES_H_ 7 + #define _XE_SRIOV_PF_MIGRATION_TYPES_H_ 8 + 9 + #include <linux/types.h> 10 + #include <linux/mutex_types.h> 11 + #include <linux/wait.h> 12 + 13 + /** 14 + * struct xe_sriov_pf_migration - Xe device level VF migration data 15 + */ 16 + struct xe_sriov_pf_migration { 17 + /** @supported: indicates whether VF migration feature is supported */ 18 + bool supported; 19 + }; 20 + 21 + /** 22 + * struct xe_sriov_migration_state - Per VF device-level migration related data 23 + */ 24 + struct xe_sriov_migration_state { 25 + /** @wq: waitqueue used to avoid busy-waiting for snapshot production/consumption */ 26 + wait_queue_head_t wq; 27 + /** @lock: Mutex protecting the migration data */ 28 + struct mutex lock; 29 + /** @pending: currently processed data packet of VF resource */ 30 + struct xe_sriov_packet *pending; 31 + /** @trailer: data packet used to indicate the end of stream */ 32 + struct xe_sriov_packet *trailer; 33 + /** @descriptor: data packet containing the metadata describing the device */ 34 + struct xe_sriov_packet *descriptor; 35 + }; 36 + 37 + #endif
+10
drivers/gpu/drm/xe/xe_sriov_pf_types.h
··· 9 9 #include <linux/mutex.h> 10 10 #include <linux/types.h> 11 11 12 + #include "xe_guard.h" 13 + #include "xe_sriov_pf_migration_types.h" 12 14 #include "xe_sriov_pf_provision_types.h" 13 15 #include "xe_sriov_pf_service_types.h" 14 16 ··· 25 23 26 24 /** @version: negotiated VF/PF ABI version */ 27 25 struct xe_sriov_pf_service_version version; 26 + /** @migration: migration state */ 27 + struct xe_sriov_migration_state migration; 28 28 }; 29 29 30 30 /** ··· 42 38 /** @driver_max_vfs: Maximum number of VFs supported by the driver. */ 43 39 u16 driver_max_vfs; 44 40 41 + /** @guard_vfs_enabling: guards VFs enabling */ 42 + struct xe_guard guard_vfs_enabling; 43 + 45 44 /** @master_lock: protects all VFs configurations across GTs */ 46 45 struct mutex master_lock; 47 46 48 47 /** @provision: device level provisioning data. */ 49 48 struct xe_sriov_pf_provision provision; 49 + 50 + /** @migration: device level migration data. */ 51 + struct xe_sriov_pf_migration migration; 50 52 51 53 /** @service: device level service data. */ 52 54 struct xe_sriov_pf_service service;
-8
drivers/gpu/drm/xe/xe_sriov_vf.c
··· 156 156 157 157 static void vf_migration_init_early(struct xe_device *xe) 158 158 { 159 - /* 160 - * TODO: Add conditions to allow specific platforms, when they're 161 - * supported at production quality. 162 - */ 163 - if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG)) 164 - return xe_sriov_vf_migration_disable(xe, 165 - "experimental feature not available on production builds"); 166 - 167 159 if (!xe_device_has_memirq(xe)) 168 160 return xe_sriov_vf_migration_disable(xe, "requires memory-based IRQ support"); 169 161
+15 -11
drivers/gpu/drm/xe/xe_vram.c
··· 183 183 return 0; 184 184 } 185 185 186 - static inline u64 get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size) 186 + static int get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size, u64 *poffset) 187 187 { 188 188 struct xe_device *xe = gt_to_xe(gt); 189 + unsigned int fw_ref; 189 190 u64 offset; 190 191 u32 reg; 192 + 193 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 194 + if (!fw_ref) 195 + return -ETIMEDOUT; 191 196 192 197 if (GRAPHICS_VER(xe) >= 20) { 193 198 u64 ccs_size = tile_size / 512; ··· 223 218 offset = (u64)REG_FIELD_GET(XEHP_FLAT_CCS_PTR, reg) * SZ_64K; 224 219 } 225 220 226 - return offset; 221 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 222 + *poffset = offset; 223 + 224 + return 0; 227 225 } 228 226 229 227 /* ··· 253 245 { 254 246 struct xe_device *xe = tile_to_xe(tile); 255 247 struct xe_gt *gt = tile->primary_gt; 256 - unsigned int fw_ref; 257 248 u64 offset; 258 249 u32 reg; 259 250 ··· 272 265 return 0; 273 266 } 274 267 275 - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 276 - if (!fw_ref) 277 - return -ETIMEDOUT; 278 - 279 268 /* actual size */ 280 269 if (unlikely(xe->info.platform == XE_DG1)) { 281 270 *tile_size = pci_resource_len(to_pci_dev(xe->drm.dev), LMEM_BAR); 282 271 *tile_offset = 0; 283 272 } else { 284 - reg = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE_ADDR_RANGE(gt->info.id)); 273 + reg = xe_mmio_read32(&tile->mmio, SG_TILE_ADDR_RANGE(tile->id)); 285 274 *tile_size = (u64)REG_FIELD_GET(GENMASK(14, 8), reg) * SZ_1G; 286 275 *tile_offset = (u64)REG_FIELD_GET(GENMASK(7, 1), reg) * SZ_1G; 287 276 } 288 277 289 278 /* minus device usage */ 290 279 if (xe->info.has_flat_ccs) { 291 - offset = get_flat_ccs_offset(gt, *tile_size); 280 + int ret = get_flat_ccs_offset(gt, *tile_size, &offset); 281 + 282 + if (ret) 283 + return ret; 292 284 } else { 293 285 offset = xe_mmio_read64_2x32(&tile->mmio, GSMBASE); 294 286 } 295 287 296 288 /* remove the tile offset so we have just the available size */ 297 289 *vram_size = offset - *tile_offset; 298 - 299 - xe_force_wake_put(gt_to_fw(gt), fw_ref); 300 290 301 291 return 0; 302 292 }
+5
drivers/gpu/drm/xe/xe_wa.c
··· 922 922 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005), ENGINE_CLASS(RENDER)), 923 923 XE_RTP_ACTIONS(SET(XEHP_SLICE_COMMON_ECO_CHICKEN1, FAST_CLEAR_VALIGN_FIX)) 924 924 }, 925 + { XE_RTP_NAME("15016589081"), 926 + XE_RTP_RULES(GRAPHICS_VERSION(3000), GRAPHICS_STEP(A0, B0), 927 + ENGINE_CLASS(RENDER)), 928 + XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX)) 929 + }, 925 930 }; 926 931 927 932 static __maybe_unused const struct xe_rtp_entry oob_was[] = {
+1 -1
include/drm/intel/pciids.h
··· 861 861 MACRO__(0xE216, ## __VA_ARGS__) 862 862 863 863 #define INTEL_BMG_IDS(MACRO__, ...) \ 864 - INTEL_BMG_G21_IDS(MACRO__, __VA_ARGS__), \ 864 + INTEL_BMG_G21_IDS(MACRO__, ## __VA_ARGS__), \ 865 865 MACRO__(0xE220, ## __VA_ARGS__), \ 866 866 MACRO__(0xE221, ## __VA_ARGS__), \ 867 867 MACRO__(0xE222, ## __VA_ARGS__), \