Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/xe/pf: Handle VRAM migration data as part of PF control

Connect the helpers to allow save and restore of VRAM migration data in
stop_copy / resume device state.

Co-developed-by: Lukasz Laguna <lukasz.laguna@intel.com>
Signed-off-by: Lukasz Laguna <lukasz.laguna@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patch.msgid.link/20251112132220.516975-23-michal.winiarski@intel.com
Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>

+237
+17
drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
··· 892 892 return -EAGAIN; 893 893 } 894 894 895 + if (xe_gt_sriov_pf_migration_save_data_pending(gt, vfid, 896 + XE_SRIOV_PACKET_TYPE_VRAM)) { 897 + ret = xe_gt_sriov_pf_migration_vram_save(gt, vfid); 898 + if (ret == -EAGAIN) 899 + return -EAGAIN; 900 + else if (ret) 901 + return ret; 902 + 903 + xe_gt_sriov_pf_migration_save_data_complete(gt, vfid, 904 + XE_SRIOV_PACKET_TYPE_VRAM); 905 + 906 + return -EAGAIN; 907 + } 908 + 895 909 return 0; 896 910 } 897 911 ··· 1143 1129 break; 1144 1130 case XE_SRIOV_PACKET_TYPE_GUC: 1145 1131 ret = xe_gt_sriov_pf_migration_guc_restore(gt, vfid, data); 1132 + break; 1133 + case XE_SRIOV_PACKET_TYPE_VRAM: 1134 + ret = xe_gt_sriov_pf_migration_vram_restore(gt, vfid, data); 1146 1135 break; 1147 1136 default: 1148 1137 xe_gt_sriov_notice(gt, "Skipping VF%u unknown data type: %d\n",
+212
drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
··· 19 19 #include "xe_gt_sriov_printk.h" 20 20 #include "xe_guc_buf.h" 21 21 #include "xe_guc_ct.h" 22 + #include "xe_migrate.h" 22 23 #include "xe_mmio.h" 23 24 #include "xe_sriov.h" 24 25 #include "xe_sriov_packet.h" ··· 506 505 return pf_restore_vf_mmio_mig_data(gt, vfid, data); 507 506 } 508 507 508 + static ssize_t pf_migration_vram_size(struct xe_gt *gt, unsigned int vfid) 509 + { 510 + if (!xe_gt_is_main_type(gt)) 511 + return 0; 512 + 513 + return xe_gt_sriov_pf_config_get_lmem(gt, vfid); 514 + } 515 + 516 + static struct dma_fence *__pf_save_restore_vram(struct xe_gt *gt, unsigned int vfid, 517 + struct xe_bo *vram, u64 vram_offset, 518 + struct xe_bo *sysmem, u64 sysmem_offset, 519 + size_t size, bool save) 520 + { 521 + struct dma_fence *ret = NULL; 522 + struct drm_exec exec; 523 + int err; 524 + 525 + drm_exec_init(&exec, 0, 0); 526 + drm_exec_until_all_locked(&exec) { 527 + err = drm_exec_lock_obj(&exec, &vram->ttm.base); 528 + drm_exec_retry_on_contention(&exec); 529 + if (err) { 530 + ret = ERR_PTR(err); 531 + goto err; 532 + } 533 + 534 + err = drm_exec_lock_obj(&exec, &sysmem->ttm.base); 535 + drm_exec_retry_on_contention(&exec); 536 + if (err) { 537 + ret = ERR_PTR(err); 538 + goto err; 539 + } 540 + } 541 + 542 + ret = xe_migrate_vram_copy_chunk(vram, vram_offset, sysmem, sysmem_offset, size, 543 + save ? XE_MIGRATE_COPY_TO_SRAM : XE_MIGRATE_COPY_TO_VRAM); 544 + 545 + err: 546 + drm_exec_fini(&exec); 547 + 548 + return ret; 549 + } 550 + 551 + #define PF_VRAM_SAVE_RESTORE_TIMEOUT (5 * HZ) 552 + static int pf_save_vram_chunk(struct xe_gt *gt, unsigned int vfid, 553 + struct xe_bo *src_vram, u64 src_vram_offset, 554 + size_t size) 555 + { 556 + struct xe_sriov_packet *data; 557 + struct dma_fence *fence; 558 + int ret; 559 + 560 + data = xe_sriov_packet_alloc(gt_to_xe(gt)); 561 + if (!data) 562 + return -ENOMEM; 563 + 564 + ret = xe_sriov_packet_init(data, gt->tile->id, gt->info.id, 565 + XE_SRIOV_PACKET_TYPE_VRAM, src_vram_offset, 566 + size); 567 + if (ret) 568 + goto fail; 569 + 570 + fence = __pf_save_restore_vram(gt, vfid, 571 + src_vram, src_vram_offset, 572 + data->bo, 0, size, true); 573 + 574 + ret = dma_fence_wait_timeout(fence, false, PF_VRAM_SAVE_RESTORE_TIMEOUT); 575 + dma_fence_put(fence); 576 + if (!ret) { 577 + ret = -ETIME; 578 + goto fail; 579 + } 580 + 581 + pf_dump_mig_data(gt, vfid, data, "VRAM data save"); 582 + 583 + ret = xe_gt_sriov_pf_migration_save_produce(gt, vfid, data); 584 + if (ret) 585 + goto fail; 586 + 587 + return 0; 588 + 589 + fail: 590 + xe_sriov_packet_free(data); 591 + return ret; 592 + } 593 + 594 + #define VF_VRAM_STATE_CHUNK_MAX_SIZE SZ_512M 595 + static int pf_save_vf_vram_mig_data(struct xe_gt *gt, unsigned int vfid) 596 + { 597 + struct xe_gt_sriov_migration_data *migration = pf_pick_gt_migration(gt, vfid); 598 + loff_t *offset = &migration->save.vram_offset; 599 + struct xe_bo *vram; 600 + size_t vram_size, chunk_size; 601 + int ret; 602 + 603 + vram = xe_gt_sriov_pf_config_get_lmem_obj(gt, vfid); 604 + if (!vram) 605 + return -ENXIO; 606 + 607 + vram_size = xe_bo_size(vram); 608 + 609 + xe_gt_assert(gt, *offset < vram_size); 610 + 611 + chunk_size = min(vram_size - *offset, VF_VRAM_STATE_CHUNK_MAX_SIZE); 612 + 613 + ret = pf_save_vram_chunk(gt, vfid, vram, *offset, chunk_size); 614 + if (ret) 615 + goto fail; 616 + 617 + *offset += chunk_size; 618 + 619 + xe_bo_put(vram); 620 + 621 + if (*offset < vram_size) 622 + return -EAGAIN; 623 + 624 + return 0; 625 + 626 + fail: 627 + xe_bo_put(vram); 628 + xe_gt_sriov_err(gt, "Failed to save VF%u VRAM data (%pe)\n", vfid, ERR_PTR(ret)); 629 + return ret; 630 + } 631 + 632 + static int pf_restore_vf_vram_mig_data(struct xe_gt *gt, unsigned int vfid, 633 + struct xe_sriov_packet *data) 634 + { 635 + u64 end = data->hdr.offset + data->hdr.size; 636 + struct dma_fence *fence; 637 + struct xe_bo *vram; 638 + size_t size; 639 + int ret = 0; 640 + 641 + vram = xe_gt_sriov_pf_config_get_lmem_obj(gt, vfid); 642 + if (!vram) 643 + return -ENXIO; 644 + 645 + size = xe_bo_size(vram); 646 + 647 + if (end > size || end < data->hdr.size) { 648 + ret = -EINVAL; 649 + goto err; 650 + } 651 + 652 + pf_dump_mig_data(gt, vfid, data, "VRAM data restore"); 653 + 654 + fence = __pf_save_restore_vram(gt, vfid, vram, data->hdr.offset, 655 + data->bo, 0, data->hdr.size, false); 656 + ret = dma_fence_wait_timeout(fence, false, PF_VRAM_SAVE_RESTORE_TIMEOUT); 657 + dma_fence_put(fence); 658 + if (!ret) { 659 + ret = -ETIME; 660 + goto err; 661 + } 662 + 663 + return 0; 664 + err: 665 + xe_bo_put(vram); 666 + xe_gt_sriov_err(gt, "Failed to restore VF%u VRAM data (%pe)\n", vfid, ERR_PTR(ret)); 667 + return ret; 668 + } 669 + 670 + /** 671 + * xe_gt_sriov_pf_migration_vram_save() - Save VF VRAM migration data. 672 + * @gt: the &xe_gt 673 + * @vfid: the VF identifier (can't be 0) 674 + * 675 + * This function is for PF only. 676 + * 677 + * Return: 0 on success or a negative error code on failure. 678 + */ 679 + int xe_gt_sriov_pf_migration_vram_save(struct xe_gt *gt, unsigned int vfid) 680 + { 681 + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 682 + xe_gt_assert(gt, vfid != PFID); 683 + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); 684 + 685 + return pf_save_vf_vram_mig_data(gt, vfid); 686 + } 687 + 688 + /** 689 + * xe_gt_sriov_pf_migration_vram_restore() - Restore VF VRAM migration data. 690 + * @gt: the &xe_gt 691 + * @vfid: the VF identifier (can't be 0) 692 + * @data: the &xe_sriov_packet containing migration data 693 + * 694 + * This function is for PF only. 695 + * 696 + * Return: 0 on success or a negative error code on failure. 697 + */ 698 + int xe_gt_sriov_pf_migration_vram_restore(struct xe_gt *gt, unsigned int vfid, 699 + struct xe_sriov_packet *data) 700 + { 701 + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 702 + xe_gt_assert(gt, vfid != PFID); 703 + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); 704 + 705 + return pf_restore_vf_vram_mig_data(gt, vfid, data); 706 + } 707 + 509 708 /** 510 709 * xe_gt_sriov_pf_migration_size() - Total size of migration data from all components within a GT. 511 710 * @gt: the &xe_gt ··· 739 538 total += size; 740 539 741 540 size = pf_migration_mmio_size(gt, vfid); 541 + if (size < 0) 542 + return size; 543 + if (size > 0) 544 + size += sizeof(struct xe_sriov_packet_hdr); 545 + total += size; 546 + 547 + size = pf_migration_vram_size(gt, vfid); 742 548 if (size < 0) 743 549 return size; 744 550 if (size > 0) ··· 814 606 struct xe_gt_sriov_migration_data *migration = pf_pick_gt_migration(gt, vfid); 815 607 816 608 migration->save.data_remaining = 0; 609 + migration->save.vram_offset = 0; 817 610 818 611 xe_gt_assert(gt, pf_migration_guc_size(gt, vfid) > 0); 819 612 pf_migration_save_data_todo(gt, vfid, XE_SRIOV_PACKET_TYPE_GUC); ··· 824 615 825 616 xe_gt_assert(gt, pf_migration_mmio_size(gt, vfid) > 0); 826 617 pf_migration_save_data_todo(gt, vfid, XE_SRIOV_PACKET_TYPE_MMIO); 618 + 619 + if (pf_migration_vram_size(gt, vfid) > 0) 620 + pf_migration_save_data_todo(gt, vfid, XE_SRIOV_PACKET_TYPE_VRAM); 827 621 } 828 622 829 623 /**
+3
drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h
··· 25 25 int xe_gt_sriov_pf_migration_mmio_save(struct xe_gt *gt, unsigned int vfid); 26 26 int xe_gt_sriov_pf_migration_mmio_restore(struct xe_gt *gt, unsigned int vfid, 27 27 struct xe_sriov_packet *data); 28 + int xe_gt_sriov_pf_migration_vram_save(struct xe_gt *gt, unsigned int vfid); 29 + int xe_gt_sriov_pf_migration_vram_restore(struct xe_gt *gt, unsigned int vfid, 30 + struct xe_sriov_packet *data); 28 31 29 32 ssize_t xe_gt_sriov_pf_migration_size(struct xe_gt *gt, unsigned int vfid); 30 33
+2
drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h
··· 20 20 struct { 21 21 /** @save.data_remaining: bitmap of migration types that need to be saved */ 22 22 unsigned long data_remaining; 23 + /** @save.vram_offset: last saved offset within VRAM, used for chunked VRAM save */ 24 + loff_t vram_offset; 23 25 } save; 24 26 }; 25 27
+3
drivers/gpu/drm/xe/xe_sriov_pf_control.c
··· 5 5 6 6 #include "xe_device.h" 7 7 #include "xe_gt_sriov_pf_control.h" 8 + #include "xe_gt_sriov_pf_migration.h" 8 9 #include "xe_sriov_packet.h" 9 10 #include "xe_sriov_pf_control.h" 10 11 #include "xe_sriov_printk.h" ··· 172 171 return ret; 173 172 174 173 for_each_gt(gt, xe, id) { 174 + xe_gt_sriov_pf_migration_save_init(gt, vfid); 175 + 175 176 ret = xe_gt_sriov_pf_control_trigger_save_vf(gt, vfid); 176 177 if (ret) 177 178 return ret;