Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/xe/pf: Add data structures and handlers for migration rings

Migration data is queued in a per-GT ptr_ring to decouple the worker
responsible for handling the data transfer from the .read() and .write()
syscalls.
Add the data structures and handlers that will be used in future
commits.

Reviewed-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://patch.msgid.link/20251112132220.516975-6-michal.winiarski@intel.com
Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>

+762 -13
+298 -13
drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
··· 18 18 #include "xe_gt_sriov_printk.h" 19 19 #include "xe_guc_ct.h" 20 20 #include "xe_sriov.h" 21 + #include "xe_sriov_packet_types.h" 21 22 #include "xe_sriov_pf_control.h" 23 + #include "xe_sriov_pf_migration.h" 22 24 #include "xe_sriov_pf_service.h" 23 25 #include "xe_tile.h" 24 26 ··· 187 185 CASE2STR(PAUSE_FAILED); 188 186 CASE2STR(PAUSED); 189 187 CASE2STR(SAVE_WIP); 188 + CASE2STR(SAVE_PROCESS_DATA); 189 + CASE2STR(SAVE_WAIT_DATA); 190 + CASE2STR(SAVE_DATA_DONE); 190 191 CASE2STR(SAVE_FAILED); 191 192 CASE2STR(SAVED); 192 193 CASE2STR(RESTORE_WIP); 194 + CASE2STR(RESTORE_PROCESS_DATA); 195 + CASE2STR(RESTORE_WAIT_DATA); 196 + CASE2STR(RESTORE_DATA_DONE); 193 197 CASE2STR(RESTORE_FAILED); 194 198 CASE2STR(RESTORED); 195 199 CASE2STR(RESUME_WIP); ··· 812 804 return -ECANCELED; 813 805 } 814 806 807 + /** 808 + * DOC: The VF SAVE state machine 809 + * 810 + * SAVE extends the PAUSED state. 811 + * 812 + * The VF SAVE state machine looks like:: 813 + * 814 + * ....PAUSED.................................................... 815 + * : : 816 + * : (O)<---------o : 817 + * : | \ : 818 + * : save (SAVED) (SAVE_FAILED) : 819 + * : | ^ ^ : 820 + * : | | | : 821 + * : ....V...............o...........o......SAVE_WIP......... : 822 + * : : | | | : : 823 + * : : | empty | : : 824 + * : : | | | : : 825 + * : : | | | : : 826 + * : : | DATA_DONE | : : 827 + * : : | ^ | : : 828 + * : : | | error : : 829 + * : : | no_data / : : 830 + * : : | / / : : 831 + * : : | / / : : 832 + * : : | / / : : 833 + * : : o---------->PROCESS_DATA<----consume : : 834 + * : : \ \ : : 835 + * : : \ \ : : 836 + * : : \ \ : : 837 + * : : ring_full----->WAIT_DATA : : 838 + * : : : : 839 + * : :......................................................: : 840 + * :............................................................: 841 + * 842 + * For the full state machine view, see `The VF state machine`_. 843 + */ 844 + 815 845 static void pf_exit_vf_save_wip(struct xe_gt *gt, unsigned int vfid) 816 846 { 817 - pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP); 847 + if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP)) { 848 + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA); 849 + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WAIT_DATA); 850 + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_DONE); 851 + } 818 852 } 819 853 820 854 static void pf_enter_vf_saved(struct xe_gt *gt, unsigned int vfid) ··· 871 821 pf_exit_vf_wip(gt, vfid); 872 822 } 873 823 824 + static void pf_enter_vf_save_failed(struct xe_gt *gt, unsigned int vfid) 825 + { 826 + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_FAILED)) 827 + pf_enter_vf_state_machine_bug(gt, vfid); 828 + 829 + wake_up_all(xe_sriov_pf_migration_waitqueue(gt_to_xe(gt), vfid)); 830 + 831 + pf_exit_vf_wip(gt, vfid); 832 + } 833 + 834 + static int pf_handle_vf_save_data(struct xe_gt *gt, unsigned int vfid) 835 + { 836 + return 0; 837 + } 838 + 874 839 static bool pf_handle_vf_save(struct xe_gt *gt, unsigned int vfid) 875 840 { 876 - if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP)) 841 + int ret; 842 + 843 + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA)) 877 844 return false; 878 845 879 - pf_enter_vf_saved(gt, vfid); 846 + if (xe_gt_sriov_pf_migration_ring_full(gt, vfid)) { 847 + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WAIT_DATA); 848 + return true; 849 + } 850 + 851 + ret = pf_handle_vf_save_data(gt, vfid); 852 + if (ret == -EAGAIN) 853 + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA); 854 + else if (ret) 855 + pf_enter_vf_save_failed(gt, vfid); 856 + else 857 + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_DONE); 880 858 881 859 return true; 860 + } 861 + 862 + static void pf_exit_vf_save_wait_data(struct xe_gt *gt, unsigned int vfid) 863 + { 864 + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WAIT_DATA)) 865 + return; 866 + 867 + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA); 868 + pf_queue_vf(gt, vfid); 882 869 } 883 870 884 871 static bool pf_enter_vf_save_wip(struct xe_gt *gt, unsigned int vfid) 885 872 { 886 873 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP)) { 887 874 pf_enter_vf_wip(gt, vfid); 875 + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA); 888 876 pf_queue_vf(gt, vfid); 889 877 return true; 890 878 } 891 879 892 880 return false; 881 + } 882 + 883 + /** 884 + * xe_gt_sriov_pf_control_check_save_data_done() - Check if all save migration data was produced. 885 + * @gt: the &xe_gt 886 + * @vfid: the VF identifier 887 + * 888 + * This function is for PF only. 889 + * 890 + * Return: true if all migration data was produced, false otherwise. 891 + */ 892 + bool xe_gt_sriov_pf_control_check_save_data_done(struct xe_gt *gt, unsigned int vfid) 893 + { 894 + return pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_DONE); 895 + } 896 + 897 + /** 898 + * xe_gt_sriov_pf_control_check_save_failed() - Check if save processing has failed. 899 + * @gt: the &xe_gt 900 + * @vfid: the VF identifier 901 + * 902 + * This function is for PF only. 903 + * 904 + * Return: true if save processing failed, false otherwise. 905 + */ 906 + bool xe_gt_sriov_pf_control_check_save_failed(struct xe_gt *gt, unsigned int vfid) 907 + { 908 + return pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_FAILED); 909 + } 910 + 911 + /** 912 + * xe_gt_sriov_pf_control_process_save_data() - Queue VF save migration data processing. 913 + * @gt: the &xe_gt 914 + * @vfid: the VF identifier 915 + * 916 + * This function is for PF only. 917 + * 918 + * Return: 0 on success or a negative error code on failure. 919 + */ 920 + int xe_gt_sriov_pf_control_process_save_data(struct xe_gt *gt, unsigned int vfid) 921 + { 922 + if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_FAILED)) 923 + return -EIO; 924 + 925 + pf_exit_vf_save_wait_data(gt, vfid); 926 + 927 + return 0; 893 928 } 894 929 895 930 /** ··· 1022 887 */ 1023 888 int xe_gt_sriov_pf_control_finish_save_vf(struct xe_gt *gt, unsigned int vfid) 1024 889 { 1025 - if (!pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED)) { 1026 - pf_enter_vf_mismatch(gt, vfid); 890 + if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_DONE)) { 891 + xe_gt_sriov_err(gt, "VF%u save is still in progress!\n", vfid); 1027 892 return -EIO; 1028 893 } 1029 894 1030 895 pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); 896 + pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_DONE); 897 + pf_enter_vf_saved(gt, vfid); 1031 898 1032 899 return 0; 1033 900 } 1034 901 902 + /** 903 + * DOC: The VF RESTORE state machine 904 + * 905 + * RESTORE extends the PAUSED state. 906 + * 907 + * The VF RESTORE state machine looks like:: 908 + * 909 + * ....PAUSED.................................................... 910 + * : : 911 + * : (O)<---------o : 912 + * : | \ : 913 + * : restore (RESTORED) (RESTORE_FAILED) : 914 + * : | ^ ^ : 915 + * : | | | : 916 + * : ....V...............o...........o......RESTORE_WIP...... : 917 + * : : | | | : : 918 + * : : | empty | : : 919 + * : : | | | : : 920 + * : : | | | : : 921 + * : : | DATA_DONE | : : 922 + * : : | ^ | : : 923 + * : : | | error : : 924 + * : : | trailer / : : 925 + * : : | / / : : 926 + * : : | / / : : 927 + * : : | / / : : 928 + * : : o---------->PROCESS_DATA<----produce : : 929 + * : : \ \ : : 930 + * : : \ \ : : 931 + * : : \ \ : : 932 + * : : ring_empty---->WAIT_DATA : : 933 + * : : : : 934 + * : :......................................................: : 935 + * :............................................................: 936 + * 937 + * For the full state machine view, see `The VF state machine`_. 938 + */ 939 + 1035 940 static void pf_exit_vf_restore_wip(struct xe_gt *gt, unsigned int vfid) 1036 941 { 1037 - pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP); 942 + if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) { 943 + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA); 944 + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA); 945 + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_DATA_DONE); 946 + } 1038 947 } 1039 948 1040 949 static void pf_enter_vf_restored(struct xe_gt *gt, unsigned int vfid) ··· 1093 914 pf_exit_vf_wip(gt, vfid); 1094 915 } 1095 916 917 + static void pf_enter_vf_restore_failed(struct xe_gt *gt, unsigned int vfid) 918 + { 919 + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED)) 920 + pf_enter_vf_state_machine_bug(gt, vfid); 921 + 922 + wake_up_all(xe_sriov_pf_migration_waitqueue(gt_to_xe(gt), vfid)); 923 + 924 + pf_exit_vf_wip(gt, vfid); 925 + } 926 + 927 + static int pf_handle_vf_restore_data(struct xe_gt *gt, unsigned int vfid) 928 + { 929 + struct xe_sriov_packet *data = xe_gt_sriov_pf_migration_restore_consume(gt, vfid); 930 + 931 + xe_gt_sriov_notice(gt, "Skipping VF%u unknown data type: %d\n", vfid, data->hdr.type); 932 + 933 + return 0; 934 + } 935 + 1096 936 static bool pf_handle_vf_restore(struct xe_gt *gt, unsigned int vfid) 1097 937 { 1098 - if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) 938 + int ret; 939 + 940 + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA)) 1099 941 return false; 1100 942 1101 - pf_enter_vf_restored(gt, vfid); 943 + if (xe_gt_sriov_pf_migration_ring_empty(gt, vfid)) { 944 + if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_DATA_DONE)) 945 + pf_enter_vf_restored(gt, vfid); 946 + else 947 + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA); 948 + 949 + return true; 950 + } 951 + 952 + ret = pf_handle_vf_restore_data(gt, vfid); 953 + if (ret) 954 + pf_enter_vf_restore_failed(gt, vfid); 955 + else 956 + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA); 1102 957 1103 958 return true; 959 + } 960 + 961 + static void pf_exit_vf_restore_wait_data(struct xe_gt *gt, unsigned int vfid) 962 + { 963 + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA)) 964 + return; 965 + 966 + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA); 967 + pf_queue_vf(gt, vfid); 1104 968 } 1105 969 1106 970 static bool pf_enter_vf_restore_wip(struct xe_gt *gt, unsigned int vfid) 1107 971 { 1108 972 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) { 1109 973 pf_enter_vf_wip(gt, vfid); 974 + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA); 1110 975 pf_queue_vf(gt, vfid); 1111 976 return true; 1112 977 } 1113 978 1114 979 return false; 980 + } 981 + 982 + /** 983 + * xe_gt_sriov_pf_control_check_restore_failed() - Check if restore processing has failed. 984 + * @gt: the &xe_gt 985 + * @vfid: the VF identifier 986 + * 987 + * This function is for PF only. 988 + * 989 + * Return: true if restore processing failed, false otherwise. 990 + */ 991 + bool xe_gt_sriov_pf_control_check_restore_failed(struct xe_gt *gt, unsigned int vfid) 992 + { 993 + return pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED); 994 + } 995 + 996 + /** 997 + * xe_gt_sriov_pf_control_restore_data_done() - Indicate the end of VF migration data stream. 998 + * @gt: the &xe_gt 999 + * @vfid: the VF identifier 1000 + * 1001 + * This function is for PF only. 1002 + * 1003 + * Return: 0 on success or a negative error code on failure. 1004 + */ 1005 + int xe_gt_sriov_pf_control_restore_data_done(struct xe_gt *gt, unsigned int vfid) 1006 + { 1007 + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_DATA_DONE)) { 1008 + pf_enter_vf_state_machine_bug(gt, vfid); 1009 + return -EIO; 1010 + } 1011 + 1012 + return xe_gt_sriov_pf_control_process_restore_data(gt, vfid); 1013 + } 1014 + 1015 + /** 1016 + * xe_gt_sriov_pf_control_process_restore_data() - Queue VF restore migration data processing. 1017 + * @gt: the &xe_gt 1018 + * @vfid: the VF identifier 1019 + * 1020 + * This function is for PF only. 1021 + * 1022 + * Return: 0 on success or a negative error code on failure. 1023 + */ 1024 + int xe_gt_sriov_pf_control_process_restore_data(struct xe_gt *gt, unsigned int vfid) 1025 + { 1026 + if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED)) 1027 + return -EIO; 1028 + 1029 + pf_exit_vf_restore_wait_data(gt, vfid); 1030 + 1031 + return 0; 1115 1032 } 1116 1033 1117 1034 /** ··· 1275 1000 { 1276 1001 int ret; 1277 1002 1278 - if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) { 1279 - ret = pf_wait_vf_restore_done(gt, vfid); 1280 - if (ret) 1281 - return ret; 1282 - } 1003 + ret = pf_wait_vf_restore_done(gt, vfid); 1004 + if (ret) 1005 + return ret; 1283 1006 1284 1007 if (!pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED)) { 1285 1008 pf_enter_vf_mismatch(gt, vfid); ··· 1978 1705 if (pf_exit_vf_pause_save_guc(gt, vfid)) 1979 1706 return true; 1980 1707 1708 + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WAIT_DATA)) { 1709 + xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid, 1710 + control_bit_to_string(XE_GT_SRIOV_STATE_SAVE_WAIT_DATA)); 1711 + return false; 1712 + } 1713 + 1981 1714 if (pf_handle_vf_save(gt, vfid)) 1982 1715 return true; 1716 + 1717 + if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA)) { 1718 + xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid, 1719 + control_bit_to_string(XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA)); 1720 + return false; 1721 + } 1983 1722 1984 1723 if (pf_handle_vf_restore(gt, vfid)) 1985 1724 return true;
+6
drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h
··· 16 16 17 17 int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid); 18 18 int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid); 19 + bool xe_gt_sriov_pf_control_check_save_data_done(struct xe_gt *gt, unsigned int vfid); 20 + bool xe_gt_sriov_pf_control_check_save_failed(struct xe_gt *gt, unsigned int vfid); 21 + int xe_gt_sriov_pf_control_process_save_data(struct xe_gt *gt, unsigned int vfid); 19 22 int xe_gt_sriov_pf_control_trigger_save_vf(struct xe_gt *gt, unsigned int vfid); 20 23 int xe_gt_sriov_pf_control_finish_save_vf(struct xe_gt *gt, unsigned int vfid); 24 + int xe_gt_sriov_pf_control_restore_data_done(struct xe_gt *gt, unsigned int vfid); 25 + bool xe_gt_sriov_pf_control_check_restore_failed(struct xe_gt *gt, unsigned int vfid); 26 + int xe_gt_sriov_pf_control_process_restore_data(struct xe_gt *gt, unsigned int vfid); 21 27 int xe_gt_sriov_pf_control_trigger_restore_vf(struct xe_gt *gt, unsigned int vfid); 22 28 int xe_gt_sriov_pf_control_finish_restore_vf(struct xe_gt *gt, unsigned int vfid); 23 29 int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid);
+12
drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h
··· 32 32 * @XE_GT_SRIOV_STATE_PAUSE_FAILED: indicates that a VF pause operation has failed. 33 33 * @XE_GT_SRIOV_STATE_PAUSED: indicates that the VF is paused. 34 34 * @XE_GT_SRIOV_STATE_SAVE_WIP: indicates that VF save operation is in progress. 35 + * @XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA: indicates that VF migration data is being produced. 36 + * @XE_GT_SRIOV_STATE_SAVE_WAIT_DATA: indicates that PF awaits for space in migration data ring. 37 + * @XE_GT_SRIOV_STATE_SAVE_DATA_DONE: indicates that all migration data was produced by Xe. 35 38 * @XE_GT_SRIOV_STATE_SAVE_FAILED: indicates that VF save operation has failed. 36 39 * @XE_GT_SRIOV_STATE_SAVED: indicates that VF data is saved. 37 40 * @XE_GT_SRIOV_STATE_RESTORE_WIP: indicates that VF restore operation is in progress. 41 + * @XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA: indicates that VF migration data is being consumed. 42 + * @XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA: indicates that PF awaits for data in migration data ring. 43 + * @XE_GT_SRIOV_STATE_RESTORE_DATA_DONE: indicates that all migration data was produced by the user. 38 44 * @XE_GT_SRIOV_STATE_RESTORE_FAILED: indicates that VF restore operation has failed. 39 45 * @XE_GT_SRIOV_STATE_RESTORED: indicates that VF data is restored. 40 46 * @XE_GT_SRIOV_STATE_RESUME_WIP: indicates the a VF resume operation is in progress. ··· 76 70 XE_GT_SRIOV_STATE_PAUSED, 77 71 78 72 XE_GT_SRIOV_STATE_SAVE_WIP, 73 + XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA, 74 + XE_GT_SRIOV_STATE_SAVE_WAIT_DATA, 75 + XE_GT_SRIOV_STATE_SAVE_DATA_DONE, 79 76 XE_GT_SRIOV_STATE_SAVE_FAILED, 80 77 XE_GT_SRIOV_STATE_SAVED, 81 78 82 79 XE_GT_SRIOV_STATE_RESTORE_WIP, 80 + XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA, 81 + XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA, 82 + XE_GT_SRIOV_STATE_RESTORE_DATA_DONE, 83 83 XE_GT_SRIOV_STATE_RESTORE_FAILED, 84 84 XE_GT_SRIOV_STATE_RESTORED, 85 85
+200
drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
··· 7 7 8 8 #include "abi/guc_actions_sriov_abi.h" 9 9 #include "xe_bo.h" 10 + #include "xe_gt_sriov_pf_control.h" 10 11 #include "xe_gt_sriov_pf_helpers.h" 11 12 #include "xe_gt_sriov_pf_migration.h" 12 13 #include "xe_gt_sriov_printk.h" 13 14 #include "xe_guc.h" 14 15 #include "xe_guc_ct.h" 15 16 #include "xe_sriov.h" 17 + #include "xe_sriov_packet_types.h" 16 18 #include "xe_sriov_pf_migration.h" 19 + 20 + #define XE_GT_SRIOV_PF_MIGRATION_RING_SIZE 5 21 + 22 + static struct xe_gt_sriov_migration_data *pf_pick_gt_migration(struct xe_gt *gt, unsigned int vfid) 23 + { 24 + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 25 + xe_gt_assert(gt, vfid != PFID); 26 + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); 27 + 28 + return &gt->sriov.pf.vfs[vfid].migration; 29 + } 17 30 18 31 /* Return: number of dwords saved/restored/required or a negative error code on failure */ 19 32 static int guc_action_vf_save_restore(struct xe_guc *guc, u32 vfid, u32 opcode, ··· 396 383 #endif /* CONFIG_DEBUG_FS */ 397 384 398 385 /** 386 + * xe_gt_sriov_pf_migration_ring_empty() - Check if a migration ring is empty. 387 + * @gt: the &xe_gt 388 + * @vfid: the VF identifier 389 + * 390 + * Return: true if the ring is empty, otherwise false. 391 + */ 392 + bool xe_gt_sriov_pf_migration_ring_empty(struct xe_gt *gt, unsigned int vfid) 393 + { 394 + return ptr_ring_empty(&pf_pick_gt_migration(gt, vfid)->ring); 395 + } 396 + 397 + /** 398 + * xe_gt_sriov_pf_migration_ring_full() - Check if a migration ring is full. 399 + * @gt: the &xe_gt 400 + * @vfid: the VF identifier 401 + * 402 + * Return: true if the ring is full, otherwise false. 403 + */ 404 + bool xe_gt_sriov_pf_migration_ring_full(struct xe_gt *gt, unsigned int vfid) 405 + { 406 + return ptr_ring_full(&pf_pick_gt_migration(gt, vfid)->ring); 407 + } 408 + 409 + /** 410 + * xe_gt_sriov_pf_migration_save_produce() - Add VF save data packet to migration ring. 411 + * @gt: the &xe_gt 412 + * @vfid: the VF identifier 413 + * @data: the &xe_sriov_packet 414 + * 415 + * Called by the save migration data producer (PF SR-IOV Control worker) when 416 + * processing migration data. 417 + * Wakes up the save migration data consumer (userspace), that is potentially 418 + * waiting for data when the ring was empty. 419 + * 420 + * Return: 0 on success or a negative error code on failure. 421 + */ 422 + int xe_gt_sriov_pf_migration_save_produce(struct xe_gt *gt, unsigned int vfid, 423 + struct xe_sriov_packet *data) 424 + { 425 + int ret; 426 + 427 + ret = ptr_ring_produce(&pf_pick_gt_migration(gt, vfid)->ring, data); 428 + if (ret) 429 + return ret; 430 + 431 + wake_up_all(xe_sriov_pf_migration_waitqueue(gt_to_xe(gt), vfid)); 432 + 433 + return 0; 434 + } 435 + 436 + /** 437 + * xe_gt_sriov_pf_migration_restore_consume() - Get VF restore data packet from migration ring. 438 + * @gt: the &xe_gt 439 + * @vfid: the VF identifier 440 + * 441 + * Called by the restore migration data consumer (PF SR-IOV Control worker) when 442 + * processing migration data. 443 + * Wakes up the restore migration data producer (userspace), that is 444 + * potentially waiting to add more data when the ring is full. 445 + * 446 + * Return: Pointer to &xe_sriov_packet on success, 447 + * NULL if ring is empty. 448 + */ 449 + struct xe_sriov_packet * 450 + xe_gt_sriov_pf_migration_restore_consume(struct xe_gt *gt, unsigned int vfid) 451 + { 452 + struct xe_gt_sriov_migration_data *migration = pf_pick_gt_migration(gt, vfid); 453 + struct wait_queue_head *wq = xe_sriov_pf_migration_waitqueue(gt_to_xe(gt), vfid); 454 + struct xe_sriov_packet *data; 455 + 456 + data = ptr_ring_consume(&migration->ring); 457 + if (data) 458 + wake_up_all(wq); 459 + 460 + return data; 461 + } 462 + 463 + static bool pf_restore_data_ready(struct xe_gt *gt, unsigned int vfid) 464 + { 465 + if (xe_gt_sriov_pf_control_check_restore_failed(gt, vfid) || 466 + !ptr_ring_full(&pf_pick_gt_migration(gt, vfid)->ring)) 467 + return true; 468 + 469 + return false; 470 + } 471 + 472 + /** 473 + * xe_gt_sriov_pf_migration_restore_produce() - Add VF restore data packet to migration ring. 474 + * @gt: the &xe_gt 475 + * @vfid: the VF identifier 476 + * @data: the &xe_sriov_packet 477 + * 478 + * Called by the restore migration data producer (userspace) when processing 479 + * migration data. 480 + * If the ring is full, waits until there is space. 481 + * Queues the restore migration data consumer (PF SR-IOV Control worker), that 482 + * is potentially waiting for data when the ring was empty. 483 + * 484 + * Return: 0 on success or a negative error code on failure. 485 + */ 486 + int xe_gt_sriov_pf_migration_restore_produce(struct xe_gt *gt, unsigned int vfid, 487 + struct xe_sriov_packet *data) 488 + { 489 + int ret; 490 + 491 + xe_gt_assert(gt, data->hdr.tile_id == gt->tile->id); 492 + xe_gt_assert(gt, data->hdr.gt_id == gt->info.id); 493 + 494 + for (;;) { 495 + if (xe_gt_sriov_pf_control_check_restore_failed(gt, vfid)) 496 + return -EIO; 497 + 498 + ret = ptr_ring_produce(&pf_pick_gt_migration(gt, vfid)->ring, data); 499 + if (!ret) 500 + break; 501 + 502 + ret = wait_event_interruptible(*xe_sriov_pf_migration_waitqueue(gt_to_xe(gt), vfid), 503 + pf_restore_data_ready(gt, vfid)); 504 + if (ret) 505 + return ret; 506 + } 507 + 508 + return xe_gt_sriov_pf_control_process_restore_data(gt, vfid); 509 + } 510 + 511 + /** 512 + * xe_gt_sriov_pf_migration_save_consume() - Get VF save data packet from migration ring. 513 + * @gt: the &xe_gt 514 + * @vfid: the VF identifier 515 + * 516 + * Called by the save migration data consumer (userspace) when 517 + * processing migration data. 518 + * Queues the save migration data producer (PF SR-IOV Control worker), that is 519 + * potentially waiting to add more data when the ring is full. 520 + * 521 + * Return: Pointer to &xe_sriov_packet on success, 522 + * NULL if ring is empty and there's no more data available, 523 + * ERR_PTR(-EAGAIN) if the ring is empty, but data is still produced. 524 + */ 525 + struct xe_sriov_packet * 526 + xe_gt_sriov_pf_migration_save_consume(struct xe_gt *gt, unsigned int vfid) 527 + { 528 + struct xe_gt_sriov_migration_data *migration = pf_pick_gt_migration(gt, vfid); 529 + struct xe_sriov_packet *data; 530 + int ret; 531 + 532 + data = ptr_ring_consume(&migration->ring); 533 + if (data) { 534 + ret = xe_gt_sriov_pf_control_process_save_data(gt, vfid); 535 + if (ret) 536 + return ERR_PTR(ret); 537 + 538 + return data; 539 + } 540 + 541 + if (xe_gt_sriov_pf_control_check_save_data_done(gt, vfid)) 542 + return NULL; 543 + 544 + if (xe_gt_sriov_pf_control_check_save_failed(gt, vfid)) 545 + return ERR_PTR(-EIO); 546 + 547 + return ERR_PTR(-EAGAIN); 548 + } 549 + 550 + static void action_ring_cleanup(void *arg) 551 + { 552 + struct ptr_ring *r = arg; 553 + 554 + ptr_ring_cleanup(r, NULL); 555 + } 556 + 557 + /** 399 558 * xe_gt_sriov_pf_migration_init() - Initialize support for VF migration. 400 559 * @gt: the &xe_gt 401 560 * ··· 578 393 int xe_gt_sriov_pf_migration_init(struct xe_gt *gt) 579 394 { 580 395 struct xe_device *xe = gt_to_xe(gt); 396 + unsigned int n, totalvfs; 581 397 int err; 582 398 583 399 xe_gt_assert(gt, IS_SRIOV_PF(xe)); ··· 589 403 err = drmm_mutex_init(&xe->drm, &gt->sriov.pf.migration.snapshot_lock); 590 404 if (err) 591 405 return err; 406 + 407 + totalvfs = xe_sriov_pf_get_totalvfs(xe); 408 + for (n = 1; n <= totalvfs; n++) { 409 + struct xe_gt_sriov_migration_data *migration = pf_pick_gt_migration(gt, n); 410 + 411 + err = ptr_ring_init(&migration->ring, 412 + XE_GT_SRIOV_PF_MIGRATION_RING_SIZE, GFP_KERNEL); 413 + if (err) 414 + return err; 415 + 416 + err = devm_add_action_or_reset(xe->drm.dev, action_ring_cleanup, &migration->ring); 417 + if (err) 418 + return err; 419 + } 592 420 593 421 return 0; 594 422 }
+14
drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h
··· 9 9 #include <linux/types.h> 10 10 11 11 struct xe_gt; 12 + struct xe_sriov_packet; 12 13 13 14 int xe_gt_sriov_pf_migration_init(struct xe_gt *gt); 14 15 int xe_gt_sriov_pf_migration_save_guc_state(struct xe_gt *gt, unsigned int vfid); 15 16 int xe_gt_sriov_pf_migration_restore_guc_state(struct xe_gt *gt, unsigned int vfid); 17 + 18 + bool xe_gt_sriov_pf_migration_ring_empty(struct xe_gt *gt, unsigned int vfid); 19 + bool xe_gt_sriov_pf_migration_ring_full(struct xe_gt *gt, unsigned int vfid); 20 + 21 + int xe_gt_sriov_pf_migration_save_produce(struct xe_gt *gt, unsigned int vfid, 22 + struct xe_sriov_packet *data); 23 + struct xe_sriov_packet * 24 + xe_gt_sriov_pf_migration_restore_consume(struct xe_gt *gt, unsigned int vfid); 25 + 26 + int xe_gt_sriov_pf_migration_restore_produce(struct xe_gt *gt, unsigned int vfid, 27 + struct xe_sriov_packet *data); 28 + struct xe_sriov_packet * 29 + xe_gt_sriov_pf_migration_save_consume(struct xe_gt *gt, unsigned int vfid); 16 30 17 31 #ifdef CONFIG_DEBUG_FS 18 32 ssize_t xe_gt_sriov_pf_migration_read_guc_state(struct xe_gt *gt, unsigned int vfid,
+11
drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h
··· 7 7 #define _XE_GT_SRIOV_PF_MIGRATION_TYPES_H_ 8 8 9 9 #include <linux/mutex.h> 10 + #include <linux/ptr_ring.h> 10 11 #include <linux/types.h> 11 12 12 13 /** ··· 23 22 /** @guc.size: size of the buffer (must be dwords aligned) */ 24 23 u32 size; 25 24 } guc; 25 + }; 26 + 27 + /** 28 + * struct xe_gt_sriov_migration_data - GT-level per-VF migration data. 29 + * 30 + * Used by the PF driver to maintain per-VF migration data. 31 + */ 32 + struct xe_gt_sriov_migration_data { 33 + /** @ring: queue containing VF save / restore migration data */ 34 + struct ptr_ring ring; 26 35 }; 27 36 28 37 /**
+3
drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
··· 33 33 34 34 /** @snapshot: snapshot of the VF state data */ 35 35 struct xe_gt_sriov_state_snapshot snapshot; 36 + 37 + /** @migration: per-VF migration data. */ 38 + struct xe_gt_sriov_migration_data migration; 36 39 }; 37 40 38 41 /**
+56
drivers/gpu/drm/xe/xe_sriov_packet_types.h
··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2025 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_SRIOV_PACKET_TYPES_H_ 7 + #define _XE_SRIOV_PACKET_TYPES_H_ 8 + 9 + #include <linux/types.h> 10 + 11 + /** 12 + * struct xe_sriov_packet_hdr - Xe SR-IOV VF migration data packet header 13 + */ 14 + struct xe_sriov_packet_hdr { 15 + /** @version: migration data protocol version */ 16 + u8 version; 17 + /** @type: migration data type */ 18 + u8 type; 19 + /** @tile_id: migration data tile id */ 20 + u8 tile_id; 21 + /** @gt_id: migration data gt id */ 22 + u8 gt_id; 23 + /** @flags: migration data flags */ 24 + u32 flags; 25 + /** 26 + * @offset: offset into the resource; 27 + * used when multiple packets of given type are used for migration 28 + */ 29 + u64 offset; 30 + /** @size: migration data size */ 31 + u64 size; 32 + } __packed; 33 + 34 + /** 35 + * struct xe_sriov_packet - Xe SR-IOV VF migration data packet 36 + */ 37 + struct xe_sriov_packet { 38 + /** @xe: the PF &xe_device this data packet belongs to */ 39 + struct xe_device *xe; 40 + /** @vaddr: CPU pointer to payload data */ 41 + void *vaddr; 42 + /** @remaining: payload data remaining */ 43 + size_t remaining; 44 + /** @hdr_remaining: header data remaining */ 45 + size_t hdr_remaining; 46 + union { 47 + /** @bo: Buffer object with migration data */ 48 + struct xe_bo *bo; 49 + /** @buff: Buffer with migration data */ 50 + void *buff; 51 + }; 52 + /** @hdr: data packet header */ 53 + struct xe_sriov_packet_hdr hdr; 54 + }; 55 + 56 + #endif
+144
drivers/gpu/drm/xe/xe_sriov_pf_migration.c
··· 3 3 * Copyright © 2025 Intel Corporation 4 4 */ 5 5 6 + #include <drm/drm_managed.h> 7 + 8 + #include "xe_device.h" 9 + #include "xe_gt_sriov_pf_control.h" 10 + #include "xe_gt_sriov_pf_migration.h" 11 + #include "xe_pm.h" 6 12 #include "xe_sriov.h" 13 + #include "xe_sriov_packet_types.h" 14 + #include "xe_sriov_pf_helpers.h" 7 15 #include "xe_sriov_pf_migration.h" 16 + #include "xe_sriov_printk.h" 17 + 18 + static struct xe_sriov_migration_state *pf_pick_migration(struct xe_device *xe, unsigned int vfid) 19 + { 20 + xe_assert(xe, IS_SRIOV_PF(xe)); 21 + xe_assert(xe, vfid <= xe_sriov_pf_get_totalvfs(xe)); 22 + 23 + return &xe->sriov.pf.vfs[vfid].migration; 24 + } 25 + 26 + /** 27 + * xe_sriov_pf_migration_waitqueue() - Get waitqueue for migration. 28 + * @xe: the &xe_device 29 + * @vfid: the VF identifier 30 + * 31 + * Return: pointer to the migration waitqueue. 32 + */ 33 + wait_queue_head_t *xe_sriov_pf_migration_waitqueue(struct xe_device *xe, unsigned int vfid) 34 + { 35 + return &pf_pick_migration(xe, vfid)->wq; 36 + } 8 37 9 38 /** 10 39 * xe_sriov_pf_migration_supported() - Check if SR-IOV VF migration is supported by the device ··· 62 33 */ 63 34 int xe_sriov_pf_migration_init(struct xe_device *xe) 64 35 { 36 + unsigned int n, totalvfs; 37 + 65 38 xe_assert(xe, IS_SRIOV_PF(xe)); 66 39 67 40 xe->sriov.pf.migration.supported = pf_check_migration_support(xe); 41 + if (!xe_sriov_pf_migration_supported(xe)) 42 + return 0; 43 + 44 + totalvfs = xe_sriov_pf_get_totalvfs(xe); 45 + for (n = 1; n <= totalvfs; n++) { 46 + struct xe_sriov_migration_state *migration = pf_pick_migration(xe, n); 47 + 48 + init_waitqueue_head(&migration->wq); 49 + } 68 50 69 51 return 0; 52 + } 53 + 54 + static bool pf_migration_data_ready(struct xe_device *xe, unsigned int vfid) 55 + { 56 + struct xe_gt *gt; 57 + u8 gt_id; 58 + 59 + for_each_gt(gt, xe, gt_id) { 60 + if (xe_gt_sriov_pf_control_check_save_failed(gt, vfid) || 61 + xe_gt_sriov_pf_control_check_save_data_done(gt, vfid) || 62 + !xe_gt_sriov_pf_migration_ring_empty(gt, vfid)) 63 + return true; 64 + } 65 + 66 + return false; 67 + } 68 + 69 + static struct xe_sriov_packet * 70 + pf_migration_consume(struct xe_device *xe, unsigned int vfid) 71 + { 72 + struct xe_sriov_packet *data; 73 + bool more_data = false; 74 + struct xe_gt *gt; 75 + u8 gt_id; 76 + 77 + for_each_gt(gt, xe, gt_id) { 78 + data = xe_gt_sriov_pf_migration_save_consume(gt, vfid); 79 + if (data && PTR_ERR(data) != EAGAIN) 80 + return data; 81 + if (PTR_ERR(data) == -EAGAIN) 82 + more_data = true; 83 + } 84 + 85 + if (!more_data) 86 + return NULL; 87 + 88 + return ERR_PTR(-EAGAIN); 89 + } 90 + 91 + /** 92 + * xe_sriov_pf_migration_save_consume() - Consume a VF migration data packet from the device. 93 + * @xe: the &xe_device 94 + * @vfid: the VF identifier 95 + * 96 + * Called by the save migration data consumer (userspace) when 97 + * processing migration data. 98 + * If there is no migration data to process, wait until more data is available. 99 + * 100 + * Return: Pointer to &xe_sriov_packet on success, 101 + * NULL if ring is empty and no more migration data is expected, 102 + * ERR_PTR value in case of error. 103 + * 104 + * Return: 0 on success or a negative error code on failure. 105 + */ 106 + struct xe_sriov_packet * 107 + xe_sriov_pf_migration_save_consume(struct xe_device *xe, unsigned int vfid) 108 + { 109 + struct xe_sriov_migration_state *migration = pf_pick_migration(xe, vfid); 110 + struct xe_sriov_packet *data; 111 + int ret; 112 + 113 + xe_assert(xe, IS_SRIOV_PF(xe)); 114 + 115 + for (;;) { 116 + data = pf_migration_consume(xe, vfid); 117 + if (PTR_ERR(data) != -EAGAIN) 118 + break; 119 + 120 + ret = wait_event_interruptible(migration->wq, 121 + pf_migration_data_ready(xe, vfid)); 122 + if (ret) 123 + return ERR_PTR(ret); 124 + } 125 + 126 + return data; 127 + } 128 + 129 + /** 130 + * xe_sriov_pf_migration_restore_produce() - Produce a VF migration data packet to the device. 131 + * @xe: the &xe_device 132 + * @vfid: the VF identifier 133 + * @data: Pointer to &xe_sriov_packet 134 + * 135 + * Called by the restore migration data producer (userspace) when processing 136 + * migration data. 137 + * If the underlying data structure is full, wait until there is space. 138 + * 139 + * Return: 0 on success or a negative error code on failure. 140 + */ 141 + int xe_sriov_pf_migration_restore_produce(struct xe_device *xe, unsigned int vfid, 142 + struct xe_sriov_packet *data) 143 + { 144 + struct xe_gt *gt; 145 + 146 + xe_assert(xe, IS_SRIOV_PF(xe)); 147 + 148 + gt = xe_device_get_gt(xe, data->hdr.gt_id); 149 + if (!gt || data->hdr.tile_id != gt->tile->id || data->hdr.type == 0) { 150 + xe_sriov_err_ratelimited(xe, "Received invalid restore packet for VF%u (type:%u, tile:%u, GT:%u)\n", 151 + vfid, data->hdr.type, data->hdr.tile_id, data->hdr.gt_id); 152 + return -EINVAL; 153 + } 154 + 155 + return xe_gt_sriov_pf_migration_restore_produce(gt, vfid, data); 70 156 }
+7
drivers/gpu/drm/xe/xe_sriov_pf_migration.h
··· 7 7 #define _XE_SRIOV_PF_MIGRATION_H_ 8 8 9 9 #include <linux/types.h> 10 + #include <linux/wait.h> 10 11 11 12 struct xe_device; 13 + struct xe_sriov_packet; 12 14 13 15 int xe_sriov_pf_migration_init(struct xe_device *xe); 14 16 bool xe_sriov_pf_migration_supported(struct xe_device *xe); 17 + int xe_sriov_pf_migration_restore_produce(struct xe_device *xe, unsigned int vfid, 18 + struct xe_sriov_packet *data); 19 + struct xe_sriov_packet * 20 + xe_sriov_pf_migration_save_consume(struct xe_device *xe, unsigned int vfid); 21 + wait_queue_head_t *xe_sriov_pf_migration_waitqueue(struct xe_device *xe, unsigned int vfid); 15 22 16 23 #endif
+9
drivers/gpu/drm/xe/xe_sriov_pf_migration_types.h
··· 7 7 #define _XE_SRIOV_PF_MIGRATION_TYPES_H_ 8 8 9 9 #include <linux/types.h> 10 + #include <linux/wait.h> 10 11 11 12 /** 12 13 * struct xe_sriov_pf_migration - Xe device level VF migration data ··· 15 14 struct xe_sriov_pf_migration { 16 15 /** @supported: indicates whether VF migration feature is supported */ 17 16 bool supported; 17 + }; 18 + 19 + /** 20 + * struct xe_sriov_migration_state - Per VF device-level migration related data 21 + */ 22 + struct xe_sriov_migration_state { 23 + /** @wq: waitqueue used to avoid busy-waiting for snapshot production/consumption */ 24 + wait_queue_head_t wq; 18 25 }; 19 26 20 27 #endif
+2
drivers/gpu/drm/xe/xe_sriov_pf_types.h
··· 25 25 26 26 /** @version: negotiated VF/PF ABI version */ 27 27 struct xe_sriov_pf_service_version version; 28 + /** @migration: migration state */ 29 + struct xe_sriov_migration_state migration; 28 30 }; 29 31 30 32 /**