Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/xe/vf: Enable CCS save/restore only on supported GUC versions

CCS save/restore is supported starting with GuC 70.48.0 (compatibility
version 1.23.0). Gate the feature on the GuC firmware version and keep it
disabled on older or unsupported versions.

Fixes: f3009272ff2e ("drm/xe/vf: Create contexts for CCS read write")
Signed-off-by: Satyanarayana K V P <satyanarayana.k.v.p@intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Andi Shyti <andi.shyti@kernel.org>
Reviewed-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://lore.kernel.org/r/20250902103256.21658-2-satyanarayana.k.v.p@intel.com

authored by

Satyanarayana K V P and committed by
Michal Wajdeczko
be5590c3 ee4b3222

+95 -24
+1 -1
drivers/gpu/drm/xe/xe_device.c
··· 950 950 951 951 xe_vsec_init(xe); 952 952 953 - err = xe_sriov_late_init(xe); 953 + err = xe_sriov_init_late(xe); 954 954 if (err) 955 955 goto err_unregister_display; 956 956
+5 -9
drivers/gpu/drm/xe/xe_sriov.c
··· 160 160 } 161 161 162 162 /** 163 - * xe_sriov_late_init() - SR-IOV late initialization functions. 163 + * xe_sriov_init_late() - SR-IOV late initialization functions. 164 164 * @xe: the &xe_device to initialize 165 - * 166 - * On VF this function will initialize code for CCS migration. 167 165 * 168 166 * Return: 0 on success or a negative error code on failure. 169 167 */ 170 - int xe_sriov_late_init(struct xe_device *xe) 168 + int xe_sriov_init_late(struct xe_device *xe) 171 169 { 172 - int err = 0; 170 + if (IS_SRIOV_VF(xe)) 171 + return xe_sriov_vf_init_late(xe); 173 172 174 - if (IS_VF_CCS_INIT_NEEDED(xe)) 175 - err = xe_sriov_vf_ccs_init(xe); 176 - 177 - return err; 173 + return 0; 178 174 }
+1 -1
drivers/gpu/drm/xe/xe_sriov.h
··· 18 18 void xe_sriov_probe_early(struct xe_device *xe); 19 19 void xe_sriov_print_info(struct xe_device *xe, struct drm_printer *p); 20 20 int xe_sriov_init(struct xe_device *xe); 21 - int xe_sriov_late_init(struct xe_device *xe); 21 + int xe_sriov_init_late(struct xe_device *xe); 22 22 23 23 static inline enum xe_sriov_mode xe_device_sriov_mode(const struct xe_device *xe) 24 24 {
+77 -10
drivers/gpu/drm/xe/xe_sriov_vf.c
··· 10 10 #include "xe_gt.h" 11 11 #include "xe_gt_sriov_printk.h" 12 12 #include "xe_gt_sriov_vf.h" 13 + #include "xe_guc.h" 13 14 #include "xe_guc_ct.h" 14 15 #include "xe_guc_submit.h" 15 16 #include "xe_irq.h" ··· 19 18 #include "xe_sriov.h" 20 19 #include "xe_sriov_printk.h" 21 20 #include "xe_sriov_vf.h" 21 + #include "xe_sriov_vf_ccs.h" 22 22 #include "xe_tile_sriov_vf.h" 23 23 24 24 /** ··· 129 127 * | | | 130 128 */ 131 129 132 - static bool vf_migration_supported(struct xe_device *xe) 130 + /** 131 + * xe_sriov_vf_migration_supported - Report whether SR-IOV VF migration is 132 + * supported or not. 133 + * @xe: the &xe_device to check 134 + * 135 + * Returns: true if VF migration is supported, false otherwise. 136 + */ 137 + bool xe_sriov_vf_migration_supported(struct xe_device *xe) 138 + { 139 + xe_assert(xe, IS_SRIOV_VF(xe)); 140 + return xe->sriov.vf.migration.enabled; 141 + } 142 + 143 + static void vf_disable_migration(struct xe_device *xe, const char *fmt, ...) 144 + { 145 + struct va_format vaf; 146 + va_list va_args; 147 + 148 + xe_assert(xe, IS_SRIOV_VF(xe)); 149 + 150 + va_start(va_args, fmt); 151 + vaf.fmt = fmt; 152 + vaf.va = &va_args; 153 + xe_sriov_notice(xe, "migration disabled: %pV\n", &vaf); 154 + va_end(va_args); 155 + 156 + xe->sriov.vf.migration.enabled = false; 157 + } 158 + 159 + static void migration_worker_func(struct work_struct *w); 160 + 161 + static void vf_migration_init_early(struct xe_device *xe) 133 162 { 134 163 /* 135 164 * TODO: Add conditions to allow specific platforms, when they're 136 165 * supported at production quality. 137 166 */ 138 - return IS_ENABLED(CONFIG_DRM_XE_DEBUG); 139 - } 167 + if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG)) 168 + return vf_disable_migration(xe, 169 + "experimental feature not available on production builds"); 140 170 141 - static void migration_worker_func(struct work_struct *w); 171 + if (GRAPHICS_VER(xe) < 20) 172 + return vf_disable_migration(xe, "requires gfx version >= 20, but only %u found", 173 + GRAPHICS_VER(xe)); 174 + 175 + if (!IS_DGFX(xe)) { 176 + struct xe_uc_fw_version guc_version; 177 + 178 + xe_gt_sriov_vf_guc_versions(xe_device_get_gt(xe, 0), NULL, &guc_version); 179 + if (MAKE_GUC_VER_STRUCT(guc_version) < MAKE_GUC_VER(1, 23, 0)) 180 + return vf_disable_migration(xe, 181 + "CCS migration requires GuC ABI >= 1.23 but only %u.%u found", 182 + guc_version.major, guc_version.minor); 183 + } 184 + 185 + INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func); 186 + 187 + xe->sriov.vf.migration.enabled = true; 188 + xe_sriov_dbg(xe, "migration support enabled\n"); 189 + } 142 190 143 191 /** 144 192 * xe_sriov_vf_init_early - Initialize SR-IOV VF specific data. ··· 196 144 */ 197 145 void xe_sriov_vf_init_early(struct xe_device *xe) 198 146 { 199 - INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func); 200 - 201 - if (!vf_migration_supported(xe)) 202 - xe_sriov_info(xe, "migration not supported by this module version\n"); 147 + vf_migration_init_early(xe); 203 148 } 204 149 205 150 /** ··· 351 302 xe_pm_runtime_get(xe); 352 303 vf_post_migration_shutdown(xe); 353 304 354 - if (!vf_migration_supported(xe)) { 355 - xe_sriov_err(xe, "migration not supported by this module version\n"); 305 + if (!xe_sriov_vf_migration_supported(xe)) { 306 + xe_sriov_err(xe, "migration is not supported\n"); 356 307 err = -ENOTRECOVERABLE; 357 308 goto fail; 358 309 } ··· 426 377 started = queue_work(xe->sriov.wq, &xe->sriov.vf.migration.worker); 427 378 drm_info(&xe->drm, "VF migration recovery %s\n", started ? 428 379 "scheduled" : "already in progress"); 380 + } 381 + 382 + /** 383 + * xe_sriov_vf_init_late() - SR-IOV VF late initialization functions. 384 + * @xe: the &xe_device to initialize 385 + * 386 + * This function initializes code for CCS migration. 387 + * 388 + * Return: 0 on success or a negative error code on failure. 389 + */ 390 + int xe_sriov_vf_init_late(struct xe_device *xe) 391 + { 392 + int err = 0; 393 + 394 + if (xe_sriov_vf_migration_supported(xe)) 395 + err = xe_sriov_vf_ccs_init(xe); 396 + 397 + return err; 429 398 }
+4
drivers/gpu/drm/xe/xe_sriov_vf.h
··· 6 6 #ifndef _XE_SRIOV_VF_H_ 7 7 #define _XE_SRIOV_VF_H_ 8 8 9 + #include <linux/types.h> 10 + 9 11 struct xe_device; 10 12 11 13 void xe_sriov_vf_init_early(struct xe_device *xe); 14 + int xe_sriov_vf_init_late(struct xe_device *xe); 12 15 void xe_sriov_vf_start_migration_recovery(struct xe_device *xe); 16 + bool xe_sriov_vf_migration_supported(struct xe_device *xe); 13 17 14 18 #endif
+2 -3
drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
··· 264 264 u32 flags; 265 265 int err; 266 266 267 - xe_assert(xe, IS_SRIOV_VF(xe)); 268 - xe_assert(xe, !IS_DGFX(xe)); 269 - xe_assert(xe, xe_device_has_flat_ccs(xe)); 267 + if (!IS_VF_CCS_INIT_NEEDED(xe)) 268 + return 0; 270 269 271 270 for_each_ccs_rw_ctx(ctx_id) { 272 271 ctx = &tile->sriov.vf.ccs[ctx_id];
+5
drivers/gpu/drm/xe/xe_sriov_vf_types.h
··· 35 35 struct work_struct worker; 36 36 /** @migration.gt_flags: Per-GT request flags for VF migration recovery */ 37 37 unsigned long gt_flags; 38 + /** 39 + * @migration.enabled: flag indicating if migration support 40 + * was enabled or not due to missing prerequisites 41 + */ 42 + bool enabled; 38 43 } migration; 39 44 40 45 /** @ccs: VF CCS state data */