Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/xe/pf: Improve VF control

Our initial VF control implementation was focused on providing
a very minimal support for the VF_STATE_NOTIFY events just to
meet GuC requirements, without tracking a VF state or doing any
expected actions (like cleanup in case of the FLR notification).

Try to improve this by defining set of VF state machines, each
responsible for processing one activity (PAUSE, RESUME, STOP or
FLR). All required steps defined by the VF state machine are then
executed by the PF worker from the dedicated workqueue.

Any external requests or notifications simply try to transition
between the states to trigger a work and then wait for that work
to finish. Some predefined default timeouts are used to avoid
changing existing API calls, but it should be easy to extend the
control API to also accept specific timeout values.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Piotr Piórkowski <piotr.piorkowski@intel.com>
Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240828210809.1528-5-michal.wajdeczko@intel.com

+1305 -15
+6
drivers/gpu/drm/xe/xe_gt_sriov_pf.c
··· 9 9 10 10 #include "xe_gt_sriov_pf.h" 11 11 #include "xe_gt_sriov_pf_config.h" 12 + #include "xe_gt_sriov_pf_control.h" 12 13 #include "xe_gt_sriov_pf_helpers.h" 13 14 #include "xe_gt_sriov_pf_service.h" 14 15 #include "xe_mmio.h" ··· 58 57 if (err) 59 58 return err; 60 59 60 + err = xe_gt_sriov_pf_control_init(gt); 61 + if (err) 62 + return err; 63 + 61 64 return 0; 62 65 } 63 66 ··· 98 93 void xe_gt_sriov_pf_restart(struct xe_gt *gt) 99 94 { 100 95 xe_gt_sriov_pf_config_restart(gt); 96 + xe_gt_sriov_pf_control_restart(gt); 101 97 }
+1183 -15
drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
··· 3 3 * Copyright © 2023-2024 Intel Corporation 4 4 */ 5 5 6 + #include <drm/drm_managed.h> 7 + 6 8 #include "abi/guc_actions_sriov_abi.h" 7 9 8 10 #include "xe_device.h" 9 11 #include "xe_gt.h" 12 + #include "xe_gt_sriov_pf_config.h" 10 13 #include "xe_gt_sriov_pf_control.h" 11 14 #include "xe_gt_sriov_pf_helpers.h" 15 + #include "xe_gt_sriov_pf_monitor.h" 16 + #include "xe_gt_sriov_pf_service.h" 12 17 #include "xe_gt_sriov_printk.h" 13 18 #include "xe_guc_ct.h" 14 19 #include "xe_sriov.h" ··· 47 42 }; 48 43 int ret; 49 44 50 - /* XXX those two commands are now sent from the G2H handler */ 51 - if (cmd == GUC_PF_TRIGGER_VF_FLR_START || cmd == GUC_PF_TRIGGER_VF_FLR_FINISH) 52 - return xe_guc_ct_send_g2h_handler(&guc->ct, request, ARRAY_SIZE(request)); 53 - 54 45 ret = xe_guc_ct_send_block(&guc->ct, request, ARRAY_SIZE(request)); 55 46 return ret > 0 ? -EPROTO : ret; 56 47 } ··· 56 55 int err; 57 56 58 57 xe_gt_assert(gt, vfid != PFID); 58 + xe_gt_sriov_dbg_verbose(gt, "sending VF%u control command %s\n", 59 + vfid, control_cmd_to_string(cmd)); 59 60 60 61 err = guc_action_vf_control_cmd(&gt->uc.guc, vfid, cmd); 61 62 if (unlikely(err)) ··· 92 89 } 93 90 94 91 /** 92 + * DOC: The VF state machine 93 + * 94 + * The simplified VF state machine could be presented as:: 95 + * 96 + * pause--------------------------o 97 + * / | 98 + * / v 99 + * (READY)<------------------resume-----(PAUSED) 100 + * ^ \ / / 101 + * | \ / / 102 + * | stop---->(STOPPED)<----stop / 103 + * | / / 104 + * | / / 105 + * o--------<-----flr / 106 + * \ / 107 + * o------<--------------------flr 108 + * 109 + * Where: 110 + * 111 + * * READY - represents a state in which VF is fully operable 112 + * * PAUSED - represents a state in which VF activity is temporarily suspended 113 + * * STOPPED - represents a state in which VF activity is definitely halted 114 + * * pause - represents a request to temporarily suspend VF activity 115 + * * resume - represents a request to resume VF activity 116 + * * stop - represents a request to definitely halt VF activity 117 + * * flr - represents a request to perform VF FLR to restore VF activity 118 + * 119 + * However, each state transition requires additional steps that involves 120 + * communication with GuC that might fail or be interrupted by other requests:: 121 + * 122 + * .................................WIP.... 123 + * : : 124 + * pause--------------------->PAUSE_WIP----------------------------o 125 + * / : / \ : | 126 + * / : o----<---stop flr--o : | 127 + * / : | \ / | : V 128 + * (READY,RESUMED)<--------+------------RESUME_WIP<----+--<-----resume--(PAUSED) 129 + * ^ \ \ : | | : / / 130 + * | \ \ : | | : / / 131 + * | \ \ : | | : / / 132 + * | \ \ : o----<----------------------+--<-------stop / 133 + * | \ \ : | | : / 134 + * | \ \ : V | : / 135 + * | \ stop----->STOP_WIP---------flr--->-----o : / 136 + * | \ : | | : / 137 + * | \ : | V : / 138 + * | flr--------+----->----------------->FLR_WIP<-----flr 139 + * | : | / ^ : 140 + * | : | / | : 141 + * o--------<-------:----+-----<----------------o | : 142 + * : | | : 143 + * :....|...........................|.....: 144 + * | | 145 + * V | 146 + * (STOPPED)--------------------flr 147 + * 148 + * For details about each internal WIP state machine see: 149 + * 150 + * * `The VF PAUSE state machine`_ 151 + * * `The VF RESUME state machine`_ 152 + * * `The VF STOP state machine`_ 153 + * * `The VF FLR state machine`_ 154 + */ 155 + 156 + #ifdef CONFIG_DRM_XE_DEBUG_SRIOV 157 + static const char *control_bit_to_string(enum xe_gt_sriov_control_bits bit) 158 + { 159 + switch (bit) { 160 + #define CASE2STR(_X) \ 161 + case XE_GT_SRIOV_STATE_##_X: return #_X 162 + CASE2STR(WIP); 163 + CASE2STR(FLR_WIP); 164 + CASE2STR(FLR_SEND_START); 165 + CASE2STR(FLR_WAIT_GUC); 166 + CASE2STR(FLR_GUC_DONE); 167 + CASE2STR(FLR_RESET_CONFIG); 168 + CASE2STR(FLR_RESET_DATA); 169 + CASE2STR(FLR_RESET_MMIO); 170 + CASE2STR(FLR_SEND_FINISH); 171 + CASE2STR(FLR_FAILED); 172 + CASE2STR(PAUSE_WIP); 173 + CASE2STR(PAUSE_SEND_PAUSE); 174 + CASE2STR(PAUSE_WAIT_GUC); 175 + CASE2STR(PAUSE_GUC_DONE); 176 + CASE2STR(PAUSE_FAILED); 177 + CASE2STR(PAUSED); 178 + CASE2STR(RESUME_WIP); 179 + CASE2STR(RESUME_SEND_RESUME); 180 + CASE2STR(RESUME_FAILED); 181 + CASE2STR(RESUMED); 182 + CASE2STR(STOP_WIP); 183 + CASE2STR(STOP_SEND_STOP); 184 + CASE2STR(STOP_FAILED); 185 + CASE2STR(STOPPED); 186 + CASE2STR(MISMATCH); 187 + #undef CASE2STR 188 + default: return "?"; 189 + } 190 + } 191 + #endif 192 + 193 + static unsigned long pf_get_default_timeout(enum xe_gt_sriov_control_bits bit) 194 + { 195 + switch (bit) { 196 + case XE_GT_SRIOV_STATE_FLR_WAIT_GUC: 197 + case XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC: 198 + return HZ / 2; 199 + case XE_GT_SRIOV_STATE_FLR_WIP: 200 + case XE_GT_SRIOV_STATE_FLR_RESET_CONFIG: 201 + return 5 * HZ; 202 + default: 203 + return HZ; 204 + } 205 + } 206 + 207 + static struct xe_gt_sriov_control_state *pf_pick_vf_control(struct xe_gt *gt, unsigned int vfid) 208 + { 209 + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 210 + xe_gt_assert(gt, vfid <= xe_gt_sriov_pf_get_totalvfs(gt)); 211 + 212 + return &gt->sriov.pf.vfs[vfid].control; 213 + } 214 + 215 + static unsigned long *pf_peek_vf_state(struct xe_gt *gt, unsigned int vfid) 216 + { 217 + struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid); 218 + 219 + return &cs->state; 220 + } 221 + 222 + static bool pf_check_vf_state(struct xe_gt *gt, unsigned int vfid, 223 + enum xe_gt_sriov_control_bits bit) 224 + { 225 + return test_bit(bit, pf_peek_vf_state(gt, vfid)); 226 + } 227 + 228 + static void pf_dump_vf_state(struct xe_gt *gt, unsigned int vfid) 229 + { 230 + unsigned long state = *pf_peek_vf_state(gt, vfid); 231 + enum xe_gt_sriov_control_bits bit; 232 + 233 + if (state) { 234 + xe_gt_sriov_dbg_verbose(gt, "VF%u state %#lx%s%*pbl\n", 235 + vfid, state, state ? " bits " : "", 236 + (int)BITS_PER_LONG, &state); 237 + for_each_set_bit(bit, &state, BITS_PER_LONG) 238 + xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d)\n", 239 + vfid, control_bit_to_string(bit), bit); 240 + } else { 241 + xe_gt_sriov_dbg_verbose(gt, "VF%u state READY\n", vfid); 242 + } 243 + } 244 + 245 + static bool pf_expect_vf_state(struct xe_gt *gt, unsigned int vfid, 246 + enum xe_gt_sriov_control_bits bit) 247 + { 248 + bool result = pf_check_vf_state(gt, vfid, bit); 249 + 250 + if (unlikely(!result)) 251 + pf_dump_vf_state(gt, vfid); 252 + 253 + return result; 254 + } 255 + 256 + static bool pf_expect_vf_not_state(struct xe_gt *gt, unsigned int vfid, 257 + enum xe_gt_sriov_control_bits bit) 258 + { 259 + bool result = !pf_check_vf_state(gt, vfid, bit); 260 + 261 + if (unlikely(!result)) 262 + pf_dump_vf_state(gt, vfid); 263 + 264 + return result; 265 + } 266 + 267 + static bool pf_enter_vf_state(struct xe_gt *gt, unsigned int vfid, 268 + enum xe_gt_sriov_control_bits bit) 269 + { 270 + if (!test_and_set_bit(bit, pf_peek_vf_state(gt, vfid))) { 271 + xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) enter\n", 272 + vfid, control_bit_to_string(bit), bit); 273 + return true; 274 + } 275 + return false; 276 + } 277 + 278 + static bool pf_exit_vf_state(struct xe_gt *gt, unsigned int vfid, 279 + enum xe_gt_sriov_control_bits bit) 280 + { 281 + if (test_and_clear_bit(bit, pf_peek_vf_state(gt, vfid))) { 282 + xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) exit\n", 283 + vfid, control_bit_to_string(bit), bit); 284 + return true; 285 + } 286 + return false; 287 + } 288 + 289 + static void pf_escape_vf_state(struct xe_gt *gt, unsigned int vfid, 290 + enum xe_gt_sriov_control_bits bit) 291 + { 292 + if (pf_exit_vf_state(gt, vfid, bit)) 293 + xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) escaped by %ps\n", 294 + vfid, control_bit_to_string(bit), bit, 295 + __builtin_return_address(0)); 296 + } 297 + 298 + static void pf_enter_vf_mismatch(struct xe_gt *gt, unsigned int vfid) 299 + { 300 + if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_MISMATCH)) { 301 + xe_gt_sriov_dbg(gt, "VF%u state mismatch detected by %ps\n", 302 + vfid, __builtin_return_address(0)); 303 + pf_dump_vf_state(gt, vfid); 304 + } 305 + } 306 + 307 + static void pf_exit_vf_mismatch(struct xe_gt *gt, unsigned int vfid) 308 + { 309 + if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_MISMATCH)) 310 + xe_gt_sriov_dbg(gt, "VF%u state mismatch cleared by %ps\n", 311 + vfid, __builtin_return_address(0)); 312 + 313 + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED); 314 + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED); 315 + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED); 316 + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED); 317 + } 318 + 319 + #define pf_enter_vf_state_machine_bug(gt, vfid) ({ \ 320 + pf_enter_vf_mismatch((gt), (vfid)); \ 321 + }) 322 + 323 + static void pf_queue_control_worker(struct xe_gt *gt) 324 + { 325 + struct xe_device *xe = gt_to_xe(gt); 326 + 327 + xe_gt_assert(gt, IS_SRIOV_PF(xe)); 328 + 329 + queue_work(xe->sriov.wq, &gt->sriov.pf.control.worker); 330 + } 331 + 332 + static void pf_queue_vf(struct xe_gt *gt, unsigned int vfid) 333 + { 334 + struct xe_gt_sriov_pf_control *pfc = &gt->sriov.pf.control; 335 + 336 + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 337 + 338 + spin_lock(&pfc->lock); 339 + list_move_tail(&gt->sriov.pf.vfs[vfid].control.link, &pfc->list); 340 + spin_unlock(&pfc->lock); 341 + 342 + pf_queue_control_worker(gt); 343 + } 344 + 345 + static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid); 346 + static void pf_exit_vf_stop_wip(struct xe_gt *gt, unsigned int vfid); 347 + static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid); 348 + static void pf_exit_vf_resume_wip(struct xe_gt *gt, unsigned int vfid); 349 + 350 + static bool pf_enter_vf_wip(struct xe_gt *gt, unsigned int vfid) 351 + { 352 + if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_WIP)) { 353 + struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid); 354 + 355 + reinit_completion(&cs->done); 356 + return true; 357 + } 358 + return false; 359 + } 360 + 361 + static void pf_exit_vf_wip(struct xe_gt *gt, unsigned int vfid) 362 + { 363 + if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_WIP)) { 364 + struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid); 365 + 366 + pf_exit_vf_flr_wip(gt, vfid); 367 + pf_exit_vf_stop_wip(gt, vfid); 368 + pf_exit_vf_pause_wip(gt, vfid); 369 + pf_exit_vf_resume_wip(gt, vfid); 370 + 371 + complete_all(&cs->done); 372 + } 373 + } 374 + 375 + static int pf_wait_vf_wip_done(struct xe_gt *gt, unsigned int vfid, unsigned long timeout) 376 + { 377 + struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid); 378 + 379 + return wait_for_completion_timeout(&cs->done, timeout) ? 0 : -ETIMEDOUT; 380 + } 381 + 382 + static void pf_enter_vf_ready(struct xe_gt *gt, unsigned int vfid) 383 + { 384 + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); 385 + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED); 386 + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED); 387 + pf_exit_vf_mismatch(gt, vfid); 388 + pf_exit_vf_wip(gt, vfid); 389 + } 390 + 391 + /** 392 + * DOC: The VF PAUSE state machine 393 + * 394 + * The VF PAUSE state machine looks like:: 395 + * 396 + * (READY,RESUMED)<-------------<---------------------o---------o 397 + * | \ \ 398 + * pause \ \ 399 + * | \ \ 400 + * ....V...........................PAUSE_WIP........ \ \ 401 + * : \ : o \ 402 + * : \ o------<-----busy : | \ 403 + * : \ / / : | | 404 + * : PAUSE_SEND_PAUSE ---failed--->----------o--->(PAUSE_FAILED) | 405 + * : | \ : | | 406 + * : acked rejected---->----------o--->(MISMATCH) / 407 + * : | : / 408 + * : v : / 409 + * : PAUSE_WAIT_GUC : / 410 + * : | : / 411 + * : done : / 412 + * : | : / 413 + * : v : / 414 + * : PAUSE_GUC_DONE o-----restart 415 + * : / : 416 + * : / : 417 + * :....o..............o...............o...........: 418 + * | | | 419 + * completed flr stop 420 + * | | | 421 + * V .....V..... ......V..... 422 + * (PAUSED) : FLR_WIP : : STOP_WIP : 423 + * :.........: :..........: 424 + * 425 + * For the full state machine view, see `The VF state machine`_. 426 + */ 427 + 428 + static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid) 429 + { 430 + if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WIP)) { 431 + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE); 432 + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC); 433 + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE); 434 + } 435 + } 436 + 437 + static void pf_enter_vf_paused(struct xe_gt *gt, unsigned int vfid) 438 + { 439 + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) 440 + pf_enter_vf_state_machine_bug(gt, vfid); 441 + 442 + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED); 443 + pf_exit_vf_mismatch(gt, vfid); 444 + pf_exit_vf_wip(gt, vfid); 445 + } 446 + 447 + static void pf_enter_vf_pause_completed(struct xe_gt *gt, unsigned int vfid) 448 + { 449 + pf_enter_vf_paused(gt, vfid); 450 + } 451 + 452 + static void pf_enter_vf_pause_failed(struct xe_gt *gt, unsigned int vfid) 453 + { 454 + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED); 455 + pf_exit_vf_wip(gt, vfid); 456 + } 457 + 458 + static void pf_enter_vf_pause_rejected(struct xe_gt *gt, unsigned int vfid) 459 + { 460 + pf_enter_vf_mismatch(gt, vfid); 461 + pf_enter_vf_pause_failed(gt, vfid); 462 + } 463 + 464 + static bool pf_exit_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid) 465 + { 466 + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE)) 467 + return false; 468 + 469 + pf_enter_vf_pause_completed(gt, vfid); 470 + return true; 471 + } 472 + 473 + static void pf_enter_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid) 474 + { 475 + if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE)) 476 + pf_queue_vf(gt, vfid); 477 + } 478 + 479 + static void pf_enter_pause_wait_guc(struct xe_gt *gt, unsigned int vfid) 480 + { 481 + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC)) 482 + pf_enter_vf_state_machine_bug(gt, vfid); 483 + } 484 + 485 + static bool pf_exit_pause_wait_guc(struct xe_gt *gt, unsigned int vfid) 486 + { 487 + return pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC); 488 + } 489 + 490 + static void pf_enter_vf_pause_send_pause(struct xe_gt *gt, unsigned int vfid) 491 + { 492 + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE)) 493 + pf_enter_vf_state_machine_bug(gt, vfid); 494 + 495 + pf_queue_vf(gt, vfid); 496 + } 497 + 498 + static bool pf_exit_vf_pause_send_pause(struct xe_gt *gt, unsigned int vfid) 499 + { 500 + int err; 501 + 502 + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE)) 503 + return false; 504 + 505 + /* GuC may actually send a PAUSE_DONE before we get a RESPONSE */ 506 + pf_enter_pause_wait_guc(gt, vfid); 507 + 508 + err = pf_send_vf_pause(gt, vfid); 509 + if (err) { 510 + /* send failed, so we shouldn't expect PAUSE_DONE from GuC */ 511 + pf_exit_pause_wait_guc(gt, vfid); 512 + 513 + if (err == -EBUSY) 514 + pf_enter_vf_pause_send_pause(gt, vfid); 515 + else if (err == -EIO) 516 + pf_enter_vf_pause_rejected(gt, vfid); 517 + else 518 + pf_enter_vf_pause_failed(gt, vfid); 519 + } else { 520 + /* 521 + * we have already moved to WAIT_GUC, maybe even to GUC_DONE 522 + * but since GuC didn't complain, we may clear MISMATCH 523 + */ 524 + pf_exit_vf_mismatch(gt, vfid); 525 + } 526 + 527 + return true; 528 + } 529 + 530 + static bool pf_enter_vf_pause_wip(struct xe_gt *gt, unsigned int vfid) 531 + { 532 + if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WIP)) { 533 + pf_enter_vf_wip(gt, vfid); 534 + pf_enter_vf_pause_send_pause(gt, vfid); 535 + return true; 536 + } 537 + 538 + return false; 539 + } 540 + 541 + /** 95 542 * xe_gt_sriov_pf_control_pause_vf - Pause a VF. 96 543 * @gt: the &xe_gt 97 544 * @vfid: the VF identifier ··· 552 99 */ 553 100 int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid) 554 101 { 555 - return pf_send_vf_pause(gt, vfid); 102 + unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_PAUSE_WIP); 103 + int err; 104 + 105 + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) { 106 + xe_gt_sriov_dbg(gt, "VF%u is stopped!\n", vfid); 107 + return -EPERM; 108 + } 109 + 110 + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) { 111 + xe_gt_sriov_dbg(gt, "VF%u was already paused!\n", vfid); 112 + return -ESTALE; 113 + } 114 + 115 + if (!pf_enter_vf_pause_wip(gt, vfid)) { 116 + xe_gt_sriov_dbg(gt, "VF%u pause already in progress!\n", vfid); 117 + return -EALREADY; 118 + } 119 + 120 + err = pf_wait_vf_wip_done(gt, vfid, timeout); 121 + if (err) { 122 + xe_gt_sriov_dbg(gt, "VF%u pause didn't finish in %u ms (%pe)\n", 123 + vfid, jiffies_to_msecs(timeout), ERR_PTR(err)); 124 + return err; 125 + } 126 + 127 + if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) { 128 + xe_gt_sriov_info(gt, "VF%u paused!\n", vfid); 129 + return 0; 130 + } 131 + 132 + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED)) { 133 + xe_gt_sriov_dbg(gt, "VF%u pause failed!\n", vfid); 134 + return -EIO; 135 + } 136 + 137 + xe_gt_sriov_dbg(gt, "VF%u pause was canceled!\n", vfid); 138 + return -ECANCELED; 139 + } 140 + 141 + /** 142 + * DOC: The VF RESUME state machine 143 + * 144 + * The VF RESUME state machine looks like:: 145 + * 146 + * (PAUSED)<-----------------<------------------------o 147 + * | \ 148 + * resume \ 149 + * | \ 150 + * ....V............................RESUME_WIP...... \ 151 + * : \ : o 152 + * : \ o-------<-----busy : | 153 + * : \ / / : | 154 + * : RESUME_SEND_RESUME ---failed--->--------o--->(RESUME_FAILED) 155 + * : / \ : | 156 + * : acked rejected---->---------o--->(MISMATCH) 157 + * : / : 158 + * :....o..............o...............o.....o.....: 159 + * | | | \ 160 + * completed flr stop restart-->(READY) 161 + * | | | 162 + * V .....V..... ......V..... 163 + * (RESUMED) : FLR_WIP : : STOP_WIP : 164 + * :.........: :..........: 165 + * 166 + * For the full state machine view, see `The VF state machine`_. 167 + */ 168 + 169 + static void pf_exit_vf_resume_wip(struct xe_gt *gt, unsigned int vfid) 170 + { 171 + if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_WIP)) 172 + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME); 173 + } 174 + 175 + static void pf_enter_vf_resumed(struct xe_gt *gt, unsigned int vfid) 176 + { 177 + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED); 178 + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); 179 + pf_exit_vf_mismatch(gt, vfid); 180 + pf_exit_vf_wip(gt, vfid); 181 + } 182 + 183 + static void pf_enter_vf_resume_completed(struct xe_gt *gt, unsigned int vfid) 184 + { 185 + pf_enter_vf_resumed(gt, vfid); 186 + } 187 + 188 + static void pf_enter_vf_resume_failed(struct xe_gt *gt, unsigned int vfid) 189 + { 190 + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED); 191 + pf_exit_vf_wip(gt, vfid); 192 + } 193 + 194 + static void pf_enter_vf_resume_rejected(struct xe_gt *gt, unsigned int vfid) 195 + { 196 + pf_enter_vf_mismatch(gt, vfid); 197 + pf_enter_vf_resume_failed(gt, vfid); 198 + } 199 + 200 + static void pf_enter_vf_resume_send_resume(struct xe_gt *gt, unsigned int vfid) 201 + { 202 + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME)) 203 + pf_enter_vf_state_machine_bug(gt, vfid); 204 + 205 + pf_queue_vf(gt, vfid); 206 + } 207 + 208 + static bool pf_exit_vf_resume_send_resume(struct xe_gt *gt, unsigned int vfid) 209 + { 210 + int err; 211 + 212 + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME)) 213 + return false; 214 + 215 + err = pf_send_vf_resume(gt, vfid); 216 + if (err == -EBUSY) 217 + pf_enter_vf_resume_send_resume(gt, vfid); 218 + else if (err == -EIO) 219 + pf_enter_vf_resume_rejected(gt, vfid); 220 + else if (err) 221 + pf_enter_vf_resume_failed(gt, vfid); 222 + else 223 + pf_enter_vf_resume_completed(gt, vfid); 224 + return true; 225 + } 226 + 227 + static bool pf_enter_vf_resume_wip(struct xe_gt *gt, unsigned int vfid) 228 + { 229 + if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_WIP)) { 230 + pf_enter_vf_wip(gt, vfid); 231 + pf_enter_vf_resume_send_resume(gt, vfid); 232 + return true; 233 + } 234 + 235 + return false; 556 236 } 557 237 558 238 /** ··· 699 113 */ 700 114 int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid) 701 115 { 702 - return pf_send_vf_resume(gt, vfid); 116 + unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_RESUME_WIP); 117 + int err; 118 + 119 + if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) { 120 + xe_gt_sriov_dbg(gt, "VF%u is not paused!\n", vfid); 121 + return -EPERM; 122 + } 123 + 124 + if (!pf_enter_vf_resume_wip(gt, vfid)) { 125 + xe_gt_sriov_dbg(gt, "VF%u resume already in progress!\n", vfid); 126 + return -EALREADY; 127 + } 128 + 129 + err = pf_wait_vf_wip_done(gt, vfid, timeout); 130 + if (err) 131 + return err; 132 + 133 + if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED)) { 134 + xe_gt_sriov_info(gt, "VF%u resumed!\n", vfid); 135 + return 0; 136 + } 137 + 138 + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED)) { 139 + xe_gt_sriov_dbg(gt, "VF%u resume failed!\n", vfid); 140 + return -EIO; 141 + } 142 + 143 + xe_gt_sriov_dbg(gt, "VF%u resume was canceled!\n", vfid); 144 + return -ECANCELED; 145 + } 146 + 147 + /** 148 + * DOC: The VF STOP state machine 149 + * 150 + * The VF STOP state machine looks like:: 151 + * 152 + * (READY,PAUSED,RESUMED)<-------<--------------------o 153 + * | \ 154 + * stop \ 155 + * | \ 156 + * ....V..............................STOP_WIP...... \ 157 + * : \ : o 158 + * : \ o----<----busy : | 159 + * : \ / / : | 160 + * : STOP_SEND_STOP--------failed--->--------o--->(STOP_FAILED) 161 + * : / \ : | 162 + * : acked rejected-------->--------o--->(MISMATCH) 163 + * : / : 164 + * :....o..............o...............o...........: 165 + * | | | 166 + * completed flr restart 167 + * | | | 168 + * V .....V..... V 169 + * (STOPPED) : FLR_WIP : (READY) 170 + * :.........: 171 + * 172 + * For the full state machine view, see `The VF state machine`_. 173 + */ 174 + 175 + static void pf_exit_vf_stop_wip(struct xe_gt *gt, unsigned int vfid) 176 + { 177 + if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_WIP)) 178 + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP); 179 + } 180 + 181 + static void pf_enter_vf_stopped(struct xe_gt *gt, unsigned int vfid) 182 + { 183 + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) 184 + pf_enter_vf_state_machine_bug(gt, vfid); 185 + 186 + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED); 187 + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); 188 + pf_exit_vf_mismatch(gt, vfid); 189 + pf_exit_vf_wip(gt, vfid); 190 + } 191 + 192 + static void pf_enter_vf_stop_completed(struct xe_gt *gt, unsigned int vfid) 193 + { 194 + pf_enter_vf_stopped(gt, vfid); 195 + } 196 + 197 + static void pf_enter_vf_stop_failed(struct xe_gt *gt, unsigned int vfid) 198 + { 199 + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED); 200 + pf_exit_vf_wip(gt, vfid); 201 + } 202 + 203 + static void pf_enter_vf_stop_rejected(struct xe_gt *gt, unsigned int vfid) 204 + { 205 + pf_enter_vf_mismatch(gt, vfid); 206 + pf_enter_vf_stop_failed(gt, vfid); 207 + } 208 + 209 + static void pf_enter_vf_stop_send_stop(struct xe_gt *gt, unsigned int vfid) 210 + { 211 + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP)) 212 + pf_enter_vf_state_machine_bug(gt, vfid); 213 + 214 + pf_queue_vf(gt, vfid); 215 + } 216 + 217 + static bool pf_exit_vf_stop_send_stop(struct xe_gt *gt, unsigned int vfid) 218 + { 219 + int err; 220 + 221 + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP)) 222 + return false; 223 + 224 + err = pf_send_vf_stop(gt, vfid); 225 + if (err == -EBUSY) 226 + pf_enter_vf_stop_send_stop(gt, vfid); 227 + else if (err == -EIO) 228 + pf_enter_vf_stop_rejected(gt, vfid); 229 + else if (err) 230 + pf_enter_vf_stop_failed(gt, vfid); 231 + else 232 + pf_enter_vf_stop_completed(gt, vfid); 233 + return true; 234 + } 235 + 236 + static bool pf_enter_vf_stop_wip(struct xe_gt *gt, unsigned int vfid) 237 + { 238 + if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_WIP)) { 239 + pf_enter_vf_wip(gt, vfid); 240 + pf_enter_vf_stop_send_stop(gt, vfid); 241 + return true; 242 + } 243 + return false; 703 244 } 704 245 705 246 /** ··· 840 127 */ 841 128 int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid) 842 129 { 843 - return pf_send_vf_stop(gt, vfid); 130 + unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_STOP_WIP); 131 + int err; 132 + 133 + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) { 134 + xe_gt_sriov_dbg(gt, "VF%u was already stopped!\n", vfid); 135 + return -ESTALE; 136 + } 137 + 138 + if (!pf_enter_vf_stop_wip(gt, vfid)) { 139 + xe_gt_sriov_dbg(gt, "VF%u stop already in progress!\n", vfid); 140 + return -EALREADY; 141 + } 142 + 143 + err = pf_wait_vf_wip_done(gt, vfid, timeout); 144 + if (err) 145 + return err; 146 + 147 + if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) { 148 + xe_gt_sriov_info(gt, "VF%u stopped!\n", vfid); 149 + return 0; 150 + } 151 + 152 + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED)) { 153 + xe_gt_sriov_dbg(gt, "VF%u stop failed!\n", vfid); 154 + return -EIO; 155 + } 156 + 157 + xe_gt_sriov_dbg(gt, "VF%u stop was canceled!\n", vfid); 158 + return -ECANCELED; 159 + } 160 + 161 + /** 162 + * DOC: The VF FLR state machine 163 + * 164 + * The VF FLR state machine looks like:: 165 + * 166 + * (READY,PAUSED,STOPPED)<------------<--------------o 167 + * | \ 168 + * flr \ 169 + * | \ 170 + * ....V..........................FLR_WIP........... \ 171 + * : \ : \ 172 + * : \ o----<----busy : | 173 + * : \ / / : | 174 + * : FLR_SEND_START---failed----->-----------o--->(FLR_FAILED)<---o 175 + * : | \ : | | 176 + * : acked rejected----->-----------o--->(MISMATCH) | 177 + * : | : ^ | 178 + * : v : | | 179 + * : FLR_WAIT_GUC : | | 180 + * : | : | | 181 + * : done : | | 182 + * : | : | | 183 + * : v : | | 184 + * : FLR_GUC_DONE : | | 185 + * : | : | | 186 + * : FLR_RESET_CONFIG---failed--->-----------o--------+-----------o 187 + * : | : | | 188 + * : FLR_RESET_DATA : | | 189 + * : | : | | 190 + * : FLR_RESET_MMIO : | | 191 + * : | : | | 192 + * : | o----<----busy : | | 193 + * : |/ / : | | 194 + * : FLR_SEND_FINISH----failed--->-----------o--------+-----------o 195 + * : / \ : | 196 + * : acked rejected----->-----------o--------o 197 + * : / : 198 + * :....o..............................o...........: 199 + * | | 200 + * completed restart 201 + * | / 202 + * V / 203 + * (READY)<----------<------------o 204 + * 205 + * For the full state machine view, see `The VF state machine`_. 206 + */ 207 + 208 + static void pf_enter_vf_flr_send_start(struct xe_gt *gt, unsigned int vfid) 209 + { 210 + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START)) 211 + pf_enter_vf_state_machine_bug(gt, vfid); 212 + 213 + pf_queue_vf(gt, vfid); 214 + } 215 + 216 + static void pf_enter_vf_flr_wip(struct xe_gt *gt, unsigned int vfid) 217 + { 218 + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) { 219 + xe_gt_sriov_dbg(gt, "VF%u FLR is already in progress\n", vfid); 220 + return; 221 + } 222 + 223 + pf_enter_vf_wip(gt, vfid); 224 + pf_enter_vf_flr_send_start(gt, vfid); 225 + } 226 + 227 + static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid) 228 + { 229 + if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) { 230 + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH); 231 + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO); 232 + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA); 233 + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG); 234 + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE); 235 + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC); 236 + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START); 237 + } 238 + } 239 + 240 + static void pf_enter_vf_flr_completed(struct xe_gt *gt, unsigned int vfid) 241 + { 242 + pf_enter_vf_ready(gt, vfid); 243 + } 244 + 245 + static void pf_enter_vf_flr_failed(struct xe_gt *gt, unsigned int vfid) 246 + { 247 + if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED)) 248 + xe_gt_sriov_notice(gt, "VF%u FLR failed!\n", vfid); 249 + pf_exit_vf_wip(gt, vfid); 250 + } 251 + 252 + static void pf_enter_vf_flr_rejected(struct xe_gt *gt, unsigned int vfid) 253 + { 254 + pf_enter_vf_mismatch(gt, vfid); 255 + pf_enter_vf_flr_failed(gt, vfid); 256 + } 257 + 258 + static void pf_enter_vf_flr_send_finish(struct xe_gt *gt, unsigned int vfid) 259 + { 260 + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH)) 261 + pf_enter_vf_state_machine_bug(gt, vfid); 262 + 263 + pf_queue_vf(gt, vfid); 264 + } 265 + 266 + static bool pf_exit_vf_flr_send_finish(struct xe_gt *gt, unsigned int vfid) 267 + { 268 + int err; 269 + 270 + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH)) 271 + return false; 272 + 273 + err = pf_send_vf_flr_finish(gt, vfid); 274 + if (err == -EBUSY) 275 + pf_enter_vf_flr_send_finish(gt, vfid); 276 + else if (err == -EIO) 277 + pf_enter_vf_flr_rejected(gt, vfid); 278 + else if (err) 279 + pf_enter_vf_flr_failed(gt, vfid); 280 + else 281 + pf_enter_vf_flr_completed(gt, vfid); 282 + return true; 283 + } 284 + 285 + static void pf_enter_vf_flr_reset_mmio(struct xe_gt *gt, unsigned int vfid) 286 + { 287 + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO)) 288 + pf_enter_vf_state_machine_bug(gt, vfid); 289 + 290 + pf_queue_vf(gt, vfid); 291 + } 292 + 293 + static bool pf_exit_vf_flr_reset_mmio(struct xe_gt *gt, unsigned int vfid) 294 + { 295 + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO)) 296 + return false; 297 + 298 + /* XXX: placeholder */ 299 + 300 + pf_enter_vf_flr_send_finish(gt, vfid); 301 + return true; 302 + } 303 + 304 + static void pf_enter_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid) 305 + { 306 + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA)) 307 + pf_enter_vf_state_machine_bug(gt, vfid); 308 + 309 + pf_queue_vf(gt, vfid); 310 + } 311 + 312 + static bool pf_exit_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid) 313 + { 314 + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA)) 315 + return false; 316 + 317 + xe_gt_sriov_pf_service_reset(gt, vfid); 318 + xe_gt_sriov_pf_monitor_flr(gt, vfid); 319 + 320 + pf_enter_vf_flr_reset_mmio(gt, vfid); 321 + return true; 322 + } 323 + 324 + static void pf_enter_vf_flr_reset_config(struct xe_gt *gt, unsigned int vfid) 325 + { 326 + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG)) 327 + pf_enter_vf_state_machine_bug(gt, vfid); 328 + 329 + pf_queue_vf(gt, vfid); 330 + } 331 + 332 + static bool pf_exit_vf_flr_reset_config(struct xe_gt *gt, unsigned int vfid) 333 + { 334 + unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_FLR_RESET_CONFIG); 335 + int err; 336 + 337 + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG)) 338 + return false; 339 + 340 + err = xe_gt_sriov_pf_config_sanitize(gt, vfid, timeout); 341 + if (err) 342 + pf_enter_vf_flr_failed(gt, vfid); 343 + else 344 + pf_enter_vf_flr_reset_data(gt, vfid); 345 + return true; 346 + } 347 + 348 + static void pf_enter_vf_flr_wait_guc(struct xe_gt *gt, unsigned int vfid) 349 + { 350 + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC)) 351 + pf_enter_vf_state_machine_bug(gt, vfid); 352 + } 353 + 354 + static bool pf_exit_vf_flr_wait_guc(struct xe_gt *gt, unsigned int vfid) 355 + { 356 + return pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC); 357 + } 358 + 359 + static bool pf_exit_vf_flr_send_start(struct xe_gt *gt, unsigned int vfid) 360 + { 361 + int err; 362 + 363 + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START)) 364 + return false; 365 + 366 + /* GuC may actually send a FLR_DONE before we get a RESPONSE */ 367 + pf_enter_vf_flr_wait_guc(gt, vfid); 368 + 369 + err = pf_send_vf_flr_start(gt, vfid); 370 + if (err) { 371 + /* send failed, so we shouldn't expect FLR_DONE from GuC */ 372 + pf_exit_vf_flr_wait_guc(gt, vfid); 373 + 374 + if (err == -EBUSY) 375 + pf_enter_vf_flr_send_start(gt, vfid); 376 + else if (err == -EIO) 377 + pf_enter_vf_flr_rejected(gt, vfid); 378 + else 379 + pf_enter_vf_flr_failed(gt, vfid); 380 + } else { 381 + /* 382 + * we have already moved to WAIT_GUC, maybe even to GUC_DONE 383 + * but since GuC didn't complain, we may clear MISMATCH 384 + */ 385 + pf_exit_vf_mismatch(gt, vfid); 386 + } 387 + 388 + return true; 389 + } 390 + 391 + static bool pf_exit_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid) 392 + { 393 + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE)) 394 + return false; 395 + 396 + pf_enter_vf_flr_reset_config(gt, vfid); 397 + return true; 398 + } 399 + 400 + static void pf_enter_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid) 401 + { 402 + if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE)) 403 + pf_queue_vf(gt, vfid); 844 404 } 845 405 846 406 /** ··· 1127 141 */ 1128 142 int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid) 1129 143 { 144 + unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_FLR_WIP); 1130 145 int err; 1131 146 1132 - /* XXX pf_send_vf_flr_start() expects ct->lock */ 1133 - mutex_lock(&gt->uc.guc.ct.lock); 1134 - err = pf_send_vf_flr_start(gt, vfid); 1135 - mutex_unlock(&gt->uc.guc.ct.lock); 147 + pf_enter_vf_flr_wip(gt, vfid); 1136 148 1137 - return err; 149 + err = pf_wait_vf_wip_done(gt, vfid, timeout); 150 + if (err) { 151 + xe_gt_sriov_notice(gt, "VF%u FLR didn't finish in %u ms (%pe)\n", 152 + vfid, jiffies_to_msecs(timeout), ERR_PTR(err)); 153 + return err; 154 + } 155 + 156 + if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED)) 157 + return -EIO; 158 + 159 + return 0; 1138 160 } 1139 161 1140 162 /** ··· 1194 200 1195 201 if (needs_dispatch_flr(xe)) { 1196 202 for_each_gt(gtit, xe, gtid) 1197 - pf_send_vf_flr_start(gtit, vfid); 203 + pf_enter_vf_flr_wip(gtit, vfid); 1198 204 } else { 1199 - pf_send_vf_flr_start(gt, vfid); 205 + pf_enter_vf_flr_wip(gt, vfid); 1200 206 } 1201 207 } 1202 208 1203 209 static void pf_handle_vf_flr_done(struct xe_gt *gt, u32 vfid) 1204 210 { 1205 - pf_send_vf_flr_finish(gt, vfid); 211 + if (!pf_exit_vf_flr_wait_guc(gt, vfid)) { 212 + xe_gt_sriov_dbg(gt, "Received out of order 'VF%u FLR done'\n", vfid); 213 + pf_enter_vf_mismatch(gt, vfid); 214 + return; 215 + } 216 + 217 + pf_enter_vf_flr_guc_done(gt, vfid); 218 + } 219 + 220 + static void pf_handle_vf_pause_done(struct xe_gt *gt, u32 vfid) 221 + { 222 + if (!pf_exit_pause_wait_guc(gt, vfid)) { 223 + xe_gt_sriov_dbg(gt, "Received out of order 'VF%u PAUSE done'\n", vfid); 224 + pf_enter_vf_mismatch(gt, vfid); 225 + return; 226 + } 227 + 228 + pf_enter_vf_pause_guc_done(gt, vfid); 1206 229 } 1207 230 1208 231 static int pf_handle_vf_event(struct xe_gt *gt, u32 vfid, u32 eventid) ··· 1237 226 pf_handle_vf_flr_done(gt, vfid); 1238 227 break; 1239 228 case GUC_PF_NOTIFY_VF_PAUSE_DONE: 229 + pf_handle_vf_pause_done(gt, vfid); 1240 230 break; 1241 231 case GUC_PF_NOTIFY_VF_FIXUP_DONE: 1242 232 break; ··· 1295 283 eventid = FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_2_EVENT, msg[2]); 1296 284 1297 285 return vfid ? pf_handle_vf_event(gt, vfid, eventid) : pf_handle_pf_event(gt, eventid); 286 + } 287 + 288 + static bool pf_process_vf_state_machine(struct xe_gt *gt, unsigned int vfid) 289 + { 290 + if (pf_exit_vf_flr_send_start(gt, vfid)) 291 + return true; 292 + 293 + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC)) { 294 + xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid, 295 + control_bit_to_string(XE_GT_SRIOV_STATE_FLR_WAIT_GUC)); 296 + return false; 297 + } 298 + 299 + if (pf_exit_vf_flr_guc_done(gt, vfid)) 300 + return true; 301 + 302 + if (pf_exit_vf_flr_reset_config(gt, vfid)) 303 + return true; 304 + 305 + if (pf_exit_vf_flr_reset_data(gt, vfid)) 306 + return true; 307 + 308 + if (pf_exit_vf_flr_reset_mmio(gt, vfid)) 309 + return true; 310 + 311 + if (pf_exit_vf_flr_send_finish(gt, vfid)) 312 + return true; 313 + 314 + if (pf_exit_vf_stop_send_stop(gt, vfid)) 315 + return true; 316 + 317 + if (pf_exit_vf_pause_send_pause(gt, vfid)) 318 + return true; 319 + 320 + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC)) { 321 + xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid, 322 + control_bit_to_string(XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC)); 323 + return true; 324 + } 325 + 326 + if (pf_exit_vf_pause_guc_done(gt, vfid)) 327 + return true; 328 + 329 + if (pf_exit_vf_resume_send_resume(gt, vfid)) 330 + return true; 331 + 332 + return false; 333 + } 334 + 335 + static unsigned int pf_control_state_index(struct xe_gt *gt, 336 + struct xe_gt_sriov_control_state *cs) 337 + { 338 + return container_of(cs, struct xe_gt_sriov_metadata, control) - gt->sriov.pf.vfs; 339 + } 340 + 341 + static void pf_worker_find_work(struct xe_gt *gt) 342 + { 343 + struct xe_gt_sriov_pf_control *pfc = &gt->sriov.pf.control; 344 + struct xe_gt_sriov_control_state *cs; 345 + unsigned int vfid; 346 + bool empty; 347 + bool more; 348 + 349 + spin_lock(&pfc->lock); 350 + cs = list_first_entry_or_null(&pfc->list, struct xe_gt_sriov_control_state, link); 351 + if (cs) 352 + list_del_init(&cs->link); 353 + empty = list_empty(&pfc->list); 354 + spin_unlock(&pfc->lock); 355 + 356 + if (!cs) 357 + return; 358 + 359 + /* VF metadata structures are indexed by the VFID */ 360 + vfid = pf_control_state_index(gt, cs); 361 + xe_gt_assert(gt, vfid <= xe_gt_sriov_pf_get_totalvfs(gt)); 362 + 363 + more = pf_process_vf_state_machine(gt, vfid); 364 + if (more) 365 + pf_queue_vf(gt, vfid); 366 + else if (!empty) 367 + pf_queue_control_worker(gt); 368 + } 369 + 370 + static void control_worker_func(struct work_struct *w) 371 + { 372 + struct xe_gt *gt = container_of(w, struct xe_gt, sriov.pf.control.worker); 373 + 374 + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 375 + pf_worker_find_work(gt); 376 + } 377 + 378 + static void pf_stop_worker(struct xe_gt *gt) 379 + { 380 + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 381 + cancel_work_sync(&gt->sriov.pf.control.worker); 382 + } 383 + 384 + static void control_fini_action(struct drm_device *dev, void *data) 385 + { 386 + struct xe_gt *gt = data; 387 + 388 + pf_stop_worker(gt); 389 + } 390 + 391 + /** 392 + * xe_gt_sriov_pf_control_init() - Initialize PF's control data. 393 + * @gt: the &xe_gt 394 + * 395 + * This function is for PF only. 396 + * 397 + * Return: 0 on success or a negative error code on failure. 398 + */ 399 + int xe_gt_sriov_pf_control_init(struct xe_gt *gt) 400 + { 401 + struct xe_device *xe = gt_to_xe(gt); 402 + unsigned int n, totalvfs; 403 + 404 + xe_gt_assert(gt, IS_SRIOV_PF(xe)); 405 + 406 + totalvfs = xe_sriov_pf_get_totalvfs(xe); 407 + for (n = 0; n <= totalvfs; n++) { 408 + struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, n); 409 + 410 + init_completion(&cs->done); 411 + INIT_LIST_HEAD(&cs->link); 412 + } 413 + 414 + spin_lock_init(&gt->sriov.pf.control.lock); 415 + INIT_LIST_HEAD(&gt->sriov.pf.control.list); 416 + INIT_WORK(&gt->sriov.pf.control.worker, control_worker_func); 417 + 418 + return drmm_add_action_or_reset(&xe->drm, control_fini_action, gt); 419 + } 420 + 421 + /** 422 + * xe_gt_sriov_pf_control_restart() - Restart SR-IOV control data after a GT reset. 423 + * @gt: the &xe_gt 424 + * 425 + * Any per-VF status maintained by the PF or any ongoing VF control activity 426 + * performed by the PF must be reset or cancelled when the GT is reset. 427 + * 428 + * This function is for PF only. 429 + */ 430 + void xe_gt_sriov_pf_control_restart(struct xe_gt *gt) 431 + { 432 + struct xe_device *xe = gt_to_xe(gt); 433 + unsigned int n, totalvfs; 434 + 435 + xe_gt_assert(gt, IS_SRIOV_PF(xe)); 436 + 437 + pf_stop_worker(gt); 438 + 439 + totalvfs = xe_sriov_pf_get_totalvfs(xe); 440 + for (n = 1; n <= totalvfs; n++) 441 + pf_enter_vf_ready(gt, n); 1298 442 }
+3
drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h
··· 11 11 12 12 struct xe_gt; 13 13 14 + int xe_gt_sriov_pf_control_init(struct xe_gt *gt); 15 + void xe_gt_sriov_pf_control_restart(struct xe_gt *gt); 16 + 14 17 int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid); 15 18 int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid); 16 19 int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid);
+107
drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h
··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2024 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_GT_SRIOV_PF_CONTROL_TYPES_H_ 7 + #define _XE_GT_SRIOV_PF_CONTROL_TYPES_H_ 8 + 9 + #include <linux/completion.h> 10 + #include <linux/spinlock.h> 11 + #include <linux/workqueue_types.h> 12 + 13 + /** 14 + * enum xe_gt_sriov_control_bits - Various bits used by the PF to represent a VF state 15 + * 16 + * @XE_GT_SRIOV_STATE_WIP: indicates that some operations are in progress. 17 + * @XE_GT_SRIOV_STATE_FLR_WIP: indicates that a VF FLR is in progress. 18 + * @XE_GT_SRIOV_STATE_FLR_SEND_START: indicates that the PF wants to send a FLR START command. 19 + * @XE_GT_SRIOV_STATE_FLR_WAIT_GUC: indicates that the PF awaits for a response from the GuC. 20 + * @XE_GT_SRIOV_STATE_FLR_GUC_DONE: indicates that the PF has received a response from the GuC. 21 + * @XE_GT_SRIOV_STATE_FLR_RESET_CONFIG: indicates that the PF needs to clear VF's resources. 22 + * @XE_GT_SRIOV_STATE_FLR_RESET_DATA: indicates that the PF needs to clear VF's data. 23 + * @XE_GT_SRIOV_STATE_FLR_RESET_MMIO: indicates that the PF needs to reset VF's registers. 24 + * @XE_GT_SRIOV_STATE_FLR_SEND_FINISH: indicates that the PF wants to send a FLR FINISH message. 25 + * @XE_GT_SRIOV_STATE_FLR_FAILED: indicates that VF FLR sequence failed. 26 + * @XE_GT_SRIOV_STATE_PAUSE_WIP: indicates that a VF pause operation is in progress. 27 + * @XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE: indicates that the PF is about to send a PAUSE command. 28 + * @XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC: indicates that the PF awaits for a response from the GuC. 29 + * @XE_GT_SRIOV_STATE_PAUSE_GUC_DONE: indicates that the PF has received a response from the GuC. 30 + * @XE_GT_SRIOV_STATE_PAUSE_FAILED: indicates that a VF pause operation has failed. 31 + * @XE_GT_SRIOV_STATE_PAUSED: indicates that the VF is paused. 32 + * @XE_GT_SRIOV_STATE_RESUME_WIP: indicates the a VF resume operation is in progress. 33 + * @XE_GT_SRIOV_STATE_RESUME_SEND_RESUME: indicates that the PF is about to send RESUME command. 34 + * @XE_GT_SRIOV_STATE_RESUME_FAILED: indicates that a VF resume operation has failed. 35 + * @XE_GT_SRIOV_STATE_RESUMED: indicates that the VF was resumed. 36 + * @XE_GT_SRIOV_STATE_STOP_WIP: indicates that a VF stop operation is in progress. 37 + * @XE_GT_SRIOV_STATE_STOP_SEND_STOP: indicates that the PF wants to send a STOP command. 38 + * @XE_GT_SRIOV_STATE_STOP_FAILED: indicates that the VF stop operation has failed 39 + * @XE_GT_SRIOV_STATE_STOPPED: indicates that the VF was stopped. 40 + * @XE_GT_SRIOV_STATE_MISMATCH: indicates that the PF has detected a VF state mismatch. 41 + */ 42 + enum xe_gt_sriov_control_bits { 43 + XE_GT_SRIOV_STATE_WIP = 1, 44 + 45 + XE_GT_SRIOV_STATE_FLR_WIP, 46 + XE_GT_SRIOV_STATE_FLR_SEND_START, 47 + XE_GT_SRIOV_STATE_FLR_WAIT_GUC, 48 + XE_GT_SRIOV_STATE_FLR_GUC_DONE, 49 + XE_GT_SRIOV_STATE_FLR_RESET_CONFIG, 50 + XE_GT_SRIOV_STATE_FLR_RESET_DATA, 51 + XE_GT_SRIOV_STATE_FLR_RESET_MMIO, 52 + XE_GT_SRIOV_STATE_FLR_SEND_FINISH, 53 + XE_GT_SRIOV_STATE_FLR_FAILED, 54 + 55 + XE_GT_SRIOV_STATE_PAUSE_WIP, 56 + XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE, 57 + XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC, 58 + XE_GT_SRIOV_STATE_PAUSE_GUC_DONE, 59 + XE_GT_SRIOV_STATE_PAUSE_FAILED, 60 + XE_GT_SRIOV_STATE_PAUSED, 61 + 62 + XE_GT_SRIOV_STATE_RESUME_WIP, 63 + XE_GT_SRIOV_STATE_RESUME_SEND_RESUME, 64 + XE_GT_SRIOV_STATE_RESUME_FAILED, 65 + XE_GT_SRIOV_STATE_RESUMED, 66 + 67 + XE_GT_SRIOV_STATE_STOP_WIP, 68 + XE_GT_SRIOV_STATE_STOP_SEND_STOP, 69 + XE_GT_SRIOV_STATE_STOP_FAILED, 70 + XE_GT_SRIOV_STATE_STOPPED, 71 + 72 + XE_GT_SRIOV_STATE_MISMATCH = BITS_PER_LONG - 1, 73 + }; 74 + 75 + /** 76 + * struct xe_gt_sriov_control_state - GT-level per-VF control state. 77 + * 78 + * Used by the PF driver to maintain per-VF control data. 79 + */ 80 + struct xe_gt_sriov_control_state { 81 + /** @state: VF state bits */ 82 + unsigned long state; 83 + 84 + /** @done: completion of async operations */ 85 + struct completion done; 86 + 87 + /** @link: link into worker list */ 88 + struct list_head link; 89 + }; 90 + 91 + /** 92 + * struct xe_gt_sriov_pf_control - GT-level control data. 93 + * 94 + * Used by the PF driver to maintain its data. 95 + */ 96 + struct xe_gt_sriov_pf_control { 97 + /** @worker: worker that executes a VF operations */ 98 + struct work_struct worker; 99 + 100 + /** @list: list of VF entries that have a pending work */ 101 + struct list_head list; 102 + 103 + /** @lock: protects VF pending list */ 104 + spinlock_t lock; 105 + }; 106 + 107 + #endif
+6
drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
··· 9 9 #include <linux/types.h> 10 10 11 11 #include "xe_gt_sriov_pf_config_types.h" 12 + #include "xe_gt_sriov_pf_control_types.h" 12 13 #include "xe_gt_sriov_pf_monitor_types.h" 13 14 #include "xe_gt_sriov_pf_policy_types.h" 14 15 #include "xe_gt_sriov_pf_service_types.h" ··· 24 23 /** @monitor: per-VF monitoring data. */ 25 24 struct xe_gt_sriov_monitor monitor; 26 25 26 + /** @control: per-VF control data. */ 27 + struct xe_gt_sriov_control_state control; 28 + 27 29 /** @version: negotiated VF/PF ABI version */ 28 30 struct xe_gt_sriov_pf_service_version version; 29 31 }; ··· 34 30 /** 35 31 * struct xe_gt_sriov_pf - GT level PF virtualization data. 36 32 * @service: service data. 33 + * @control: control data. 37 34 * @policy: policy data. 38 35 * @spare: PF-only provisioning configuration. 39 36 * @vfs: metadata for all VFs. 40 37 */ 41 38 struct xe_gt_sriov_pf { 42 39 struct xe_gt_sriov_pf_service service; 40 + struct xe_gt_sriov_pf_control control; 43 41 struct xe_gt_sriov_pf_policy policy; 44 42 struct xe_gt_sriov_spare_config spare; 45 43 struct xe_gt_sriov_metadata *vfs;