Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/xe/pf: Synchronize VF FLR between all GTs

The PF part of the VF FLR processing shall be done after all GuCs
confirm that they finished their part VF FLR processing, otherwise
PF may start clearing VF's GGTT that other GuC may still accessing.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Michał Winiarski <michal.winiarski@intel.com>
Link: https://lore.kernel.org/r/20250930233525.201263-7-michal.wajdeczko@intel.com

+88 -1
+55 -1
drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
··· 18 18 #include "xe_gt_sriov_printk.h" 19 19 #include "xe_guc_ct.h" 20 20 #include "xe_sriov.h" 21 + #include "xe_sriov_pf_control.h" 21 22 #include "xe_sriov_pf_service.h" 22 23 #include "xe_tile.h" 23 24 ··· 171 170 CASE2STR(FLR_SEND_START); 172 171 CASE2STR(FLR_WAIT_GUC); 173 172 CASE2STR(FLR_GUC_DONE); 173 + CASE2STR(FLR_SYNC); 174 174 CASE2STR(FLR_RESET_CONFIG); 175 175 CASE2STR(FLR_RESET_DATA); 176 176 CASE2STR(FLR_RESET_MMIO); ··· 942 940 * : v : | | 943 941 * : FLR_GUC_DONE : | | 944 942 * : | : | | 943 + * : | o--<--sync : | | 944 + * : |/ / : | | 945 + * : FLR_SYNC--o : | | 946 + * : | : | | 945 947 * : FLR_RESET_CONFIG---failed--->-----------o--------+-----------o 946 948 * : | : | | 947 949 * : FLR_RESET_DATA : | | ··· 1153 1147 return true; 1154 1148 } 1155 1149 1150 + static bool pf_exit_vf_flr_sync(struct xe_gt *gt, unsigned int vfid) 1151 + { 1152 + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC)) 1153 + return false; 1154 + 1155 + pf_enter_vf_flr_reset_config(gt, vfid); 1156 + return true; 1157 + } 1158 + 1159 + static void pf_enter_vf_flr_sync(struct xe_gt *gt, unsigned int vfid) 1160 + { 1161 + int ret; 1162 + 1163 + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC)) 1164 + pf_enter_vf_state_machine_bug(gt, vfid); 1165 + 1166 + ret = xe_sriov_pf_control_sync_flr(gt_to_xe(gt), vfid); 1167 + if (ret < 0) { 1168 + xe_gt_sriov_dbg_verbose(gt, "FLR checkpoint %pe\n", ERR_PTR(ret)); 1169 + pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC); 1170 + } else { 1171 + xe_gt_sriov_dbg_verbose(gt, "FLR checkpoint pass\n"); 1172 + pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC); 1173 + } 1174 + } 1175 + 1156 1176 static bool pf_exit_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid) 1157 1177 { 1158 1178 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE)) 1159 1179 return false; 1160 1180 1161 - pf_enter_vf_flr_reset_config(gt, vfid); 1181 + pf_enter_vf_flr_sync(gt, vfid); 1162 1182 return true; 1163 1183 } 1164 1184 ··· 1207 1175 { 1208 1176 pf_enter_vf_flr_wip(gt, vfid); 1209 1177 1178 + return 0; 1179 + } 1180 + 1181 + /** 1182 + * xe_gt_sriov_pf_control_sync_flr() - Synchronize on the VF FLR checkpoint. 1183 + * @gt: the &xe_gt 1184 + * @vfid: the VF identifier 1185 + * @sync: if true it will allow to exit the checkpoint 1186 + * 1187 + * Return: non-zero if FLR checkpoint has been reached, zero if the is no FLR 1188 + * in progress, or a negative error code on the FLR busy or failed. 1189 + */ 1190 + int xe_gt_sriov_pf_control_sync_flr(struct xe_gt *gt, unsigned int vfid, bool sync) 1191 + { 1192 + if (sync && pf_exit_vf_flr_sync(gt, vfid)) 1193 + return 1; 1194 + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC)) 1195 + return 1; 1196 + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) 1197 + return -EBUSY; 1198 + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED)) 1199 + return -EIO; 1210 1200 return 0; 1211 1201 } 1212 1202
+1
drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h
··· 18 18 int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid); 19 19 int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid); 20 20 int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid); 21 + int xe_gt_sriov_pf_control_sync_flr(struct xe_gt *gt, unsigned int vfid, bool sync); 21 22 int xe_gt_sriov_pf_control_wait_flr(struct xe_gt *gt, unsigned int vfid); 22 23 23 24 #ifdef CONFIG_PCI_IOV
+2
drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h
··· 18 18 * @XE_GT_SRIOV_STATE_FLR_SEND_START: indicates that the PF wants to send a FLR START command. 19 19 * @XE_GT_SRIOV_STATE_FLR_WAIT_GUC: indicates that the PF awaits for a response from the GuC. 20 20 * @XE_GT_SRIOV_STATE_FLR_GUC_DONE: indicates that the PF has received a response from the GuC. 21 + * @XE_GT_SRIOV_STATE_FLR_SYNC: indicates that the PF awaits to synchronize with other GuCs. 21 22 * @XE_GT_SRIOV_STATE_FLR_RESET_CONFIG: indicates that the PF needs to clear VF's resources. 22 23 * @XE_GT_SRIOV_STATE_FLR_RESET_DATA: indicates that the PF needs to clear VF's data. 23 24 * @XE_GT_SRIOV_STATE_FLR_RESET_MMIO: indicates that the PF needs to reset VF's registers. ··· 48 47 XE_GT_SRIOV_STATE_FLR_SEND_START, 49 48 XE_GT_SRIOV_STATE_FLR_WAIT_GUC, 50 49 XE_GT_SRIOV_STATE_FLR_GUC_DONE, 50 + XE_GT_SRIOV_STATE_FLR_SYNC, 51 51 XE_GT_SRIOV_STATE_FLR_RESET_CONFIG, 52 52 XE_GT_SRIOV_STATE_FLR_RESET_DATA, 53 53 XE_GT_SRIOV_STATE_FLR_RESET_MMIO,
+29
drivers/gpu/drm/xe/xe_sriov_pf_control.c
··· 120 120 121 121 return result; 122 122 } 123 + 124 + /** 125 + * xe_sriov_pf_control_sync_flr() - Synchronize a VF FLR between all GTs. 126 + * @xe: the &xe_device 127 + * @vfid: the VF identifier 128 + * 129 + * This function is for PF only. 130 + * 131 + * Return: 0 on success or a negative error code on failure. 132 + */ 133 + int xe_sriov_pf_control_sync_flr(struct xe_device *xe, unsigned int vfid) 134 + { 135 + struct xe_gt *gt; 136 + unsigned int id; 137 + int ret; 138 + 139 + for_each_gt(gt, xe, id) { 140 + ret = xe_gt_sriov_pf_control_sync_flr(gt, vfid, false); 141 + if (ret < 0) 142 + return ret; 143 + } 144 + for_each_gt(gt, xe, id) { 145 + ret = xe_gt_sriov_pf_control_sync_flr(gt, vfid, true); 146 + if (ret < 0) 147 + return ret; 148 + } 149 + 150 + return 0; 151 + }
+1
drivers/gpu/drm/xe/xe_sriov_pf_control.h
··· 12 12 int xe_sriov_pf_control_resume_vf(struct xe_device *xe, unsigned int vfid); 13 13 int xe_sriov_pf_control_stop_vf(struct xe_device *xe, unsigned int vfid); 14 14 int xe_sriov_pf_control_reset_vf(struct xe_device *xe, unsigned int vfid); 15 + int xe_sriov_pf_control_sync_flr(struct xe_device *xe, unsigned int vfid); 15 16 16 17 #endif