Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/xe/vf: Send RESFIX_DONE message at end of VF restore

After restore, GuC will not answer to any messages from VF KMD until
fixups are applied. When that is done, VF KMD sends RESFIX_DONE
message to GuC, at which point GuC resumes normal operation.

This patch implements sending the RESFIX_DONE message at end of
post-migration recovery.

v2: keep pm ref during whole recovery, style fixes (Michal)
v3: assert removal to separate patch, debug message per GuC instead
of one, comments changes (Michal)
v4: improve one debug message (Michal)

Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
Reviewed-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241104213449.1455694-4-tomasz.lis@intel.com
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>

authored by

Tomasz Lis and committed by
Michal Wajdeczko
1255954d 360a1f3e

+95
+38
drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
··· 502 502 #define VF2GUC_VF_RESET_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0 503 503 504 504 /** 505 + * DOC: VF2GUC_NOTIFY_RESFIX_DONE 506 + * 507 + * This action is used by VF to notify the GuC that the VF KMD has completed 508 + * post-migration recovery steps. 509 + * 510 + * This message must be sent as `MMIO HXG Message`_. 511 + * 512 + * +---+-------+--------------------------------------------------------------+ 513 + * | | Bits | Description | 514 + * +===+=======+==============================================================+ 515 + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | 516 + * | +-------+--------------------------------------------------------------+ 517 + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | 518 + * | +-------+--------------------------------------------------------------+ 519 + * | | 27:16 | DATA0 = MBZ | 520 + * | +-------+--------------------------------------------------------------+ 521 + * | | 15:0 | ACTION = _`GUC_ACTION_VF2GUC_NOTIFY_RESFIX_DONE` = 0x5508 | 522 + * +---+-------+--------------------------------------------------------------+ 523 + * 524 + * +---+-------+--------------------------------------------------------------+ 525 + * | | Bits | Description | 526 + * +===+=======+==============================================================+ 527 + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ | 528 + * | +-------+--------------------------------------------------------------+ 529 + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | 530 + * | +-------+--------------------------------------------------------------+ 531 + * | | 27:0 | DATA0 = MBZ | 532 + * +---+-------+--------------------------------------------------------------+ 533 + */ 534 + #define GUC_ACTION_VF2GUC_NOTIFY_RESFIX_DONE 0x5508u 535 + 536 + #define VF2GUC_NOTIFY_RESFIX_DONE_REQUEST_MSG_LEN GUC_HXG_REQUEST_MSG_MIN_LEN 537 + #define VF2GUC_NOTIFY_RESFIX_DONE_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 538 + 539 + #define VF2GUC_NOTIFY_RESFIX_DONE_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN 540 + #define VF2GUC_NOTIFY_RESFIX_DONE_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0 541 + 542 + /** 505 543 * DOC: VF2GUC_QUERY_SINGLE_KLV 506 544 * 507 545 * This action is used by VF to query value of the single KLV data.
+38
drivers/gpu/drm/xe/xe_gt_sriov_vf.c
··· 224 224 return 0; 225 225 } 226 226 227 + static int guc_action_vf_notify_resfix_done(struct xe_guc *guc) 228 + { 229 + u32 request[GUC_HXG_REQUEST_MSG_MIN_LEN] = { 230 + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | 231 + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | 232 + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_VF2GUC_NOTIFY_RESFIX_DONE), 233 + }; 234 + int ret; 235 + 236 + ret = xe_guc_mmio_send(guc, request, ARRAY_SIZE(request)); 237 + 238 + return ret > 0 ? -EPROTO : ret; 239 + } 240 + 241 + /** 242 + * xe_gt_sriov_vf_notify_resfix_done - Notify GuC about resource fixups apply completed. 243 + * @gt: the &xe_gt struct instance linked to target GuC 244 + * 245 + * Returns: 0 if the operation completed successfully, or a negative error 246 + * code otherwise. 247 + */ 248 + int xe_gt_sriov_vf_notify_resfix_done(struct xe_gt *gt) 249 + { 250 + struct xe_guc *guc = &gt->uc.guc; 251 + int err; 252 + 253 + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); 254 + 255 + err = guc_action_vf_notify_resfix_done(guc); 256 + if (unlikely(err)) 257 + xe_gt_sriov_err(gt, "Failed to notify GuC about resource fixup done (%pe)\n", 258 + ERR_PTR(err)); 259 + else 260 + xe_gt_sriov_dbg_verbose(gt, "sent GuC resource fixup done\n"); 261 + 262 + return err; 263 + } 264 + 227 265 static int guc_action_query_single_klv(struct xe_guc *guc, u32 key, 228 266 u32 *value, u32 value_len) 229 267 {
+1
drivers/gpu/drm/xe/xe_gt_sriov_vf.h
··· 17 17 int xe_gt_sriov_vf_connect(struct xe_gt *gt); 18 18 int xe_gt_sriov_vf_query_runtime(struct xe_gt *gt); 19 19 int xe_gt_sriov_vf_prepare_ggtt(struct xe_gt *gt); 20 + int xe_gt_sriov_vf_notify_resfix_done(struct xe_gt *gt); 20 21 void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt); 21 22 22 23 u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt);
+18
drivers/gpu/drm/xe/xe_sriov_vf.c
··· 8 8 #include "xe_assert.h" 9 9 #include "xe_device.h" 10 10 #include "xe_gt_sriov_printk.h" 11 + #include "xe_gt_sriov_vf.h" 12 + #include "xe_pm.h" 11 13 #include "xe_sriov.h" 12 14 #include "xe_sriov_printk.h" 13 15 #include "xe_sriov_vf.h" ··· 132 130 INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func); 133 131 } 134 132 133 + /* 134 + * Notify all GuCs about resource fixups apply finished. 135 + */ 136 + static void vf_post_migration_notify_resfix_done(struct xe_device *xe) 137 + { 138 + struct xe_gt *gt; 139 + unsigned int id; 140 + 141 + for_each_gt(gt, xe, id) { 142 + xe_gt_sriov_vf_notify_resfix_done(gt); 143 + } 144 + } 145 + 135 146 static void vf_post_migration_recovery(struct xe_device *xe) 136 147 { 137 148 drm_dbg(&xe->drm, "migration recovery in progress\n"); 149 + xe_pm_runtime_get(xe); 138 150 /* FIXME: add the recovery steps */ 151 + vf_post_migration_notify_resfix_done(xe); 152 + xe_pm_runtime_put(xe); 139 153 drm_notice(&xe->drm, "migration recovery ended\n"); 140 154 } 141 155