Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: Introduce SRIOV critical regions v2 during VF init

1. Introduced amdgpu_virt_init_critical_region during VF init.
- VFs use init_data_header_offset and init_data_header_size_kb
transmitted via PF2VF mailbox to fetch the offset of
critical regions' offsets/sizes in VRAM and save to
adev->virt.crit_region_offsets and adev->virt.crit_region_sizes_kb.

Signed-off-by: Ellen Pan <yunru.pan@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Ellen Pan and committed by
Alex Deucher
07009df6 6d2191d2

+220
+4
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
··· 2782 2782 r = amdgpu_virt_request_full_gpu(adev, true); 2783 2783 if (r) 2784 2784 return r; 2785 + 2786 + r = amdgpu_virt_init_critical_region(adev); 2787 + if (r) 2788 + return r; 2785 2789 } 2786 2790 2787 2791 switch (adev->asic_type) {
+174
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
··· 44 44 vf2pf_info->ucode_info[ucode].version = ver; \ 45 45 } while (0) 46 46 47 + #define mmRCC_CONFIG_MEMSIZE 0xde3 48 + 49 + const char *amdgpu_virt_dynamic_crit_table_name[] = { 50 + "IP DISCOVERY", 51 + "VBIOS IMG", 52 + "RAS TELEMETRY", 53 + "DATA EXCHANGE", 54 + "BAD PAGE INFO", 55 + "INIT HEADER", 56 + "LAST", 57 + }; 58 + 47 59 bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev) 48 60 { 49 61 /* By now all MMIO pages except mailbox are blocked */ ··· 853 841 mutex_init(&adev->virt.ras.ras_telemetry_mutex); 854 842 855 843 adev->virt.ras.cper_rptr = 0; 844 + } 845 + 846 + static uint8_t amdgpu_virt_crit_region_calc_checksum(uint8_t *buf_start, uint8_t *buf_end) 847 + { 848 + uint32_t sum = 0; 849 + 850 + if (buf_start >= buf_end) 851 + return 0; 852 + 853 + for (; buf_start < buf_end; buf_start++) 854 + sum += buf_start[0]; 855 + 856 + return 0xffffffff - sum; 857 + } 858 + 859 + int amdgpu_virt_init_critical_region(struct amdgpu_device *adev) 860 + { 861 + struct amd_sriov_msg_init_data_header *init_data_hdr = NULL; 862 + uint32_t init_hdr_offset = adev->virt.init_data_header.offset; 863 + uint32_t init_hdr_size = adev->virt.init_data_header.size_kb << 10; 864 + uint64_t vram_size; 865 + int r = 0; 866 + uint8_t checksum = 0; 867 + 868 + /* Skip below init if critical region version != v2 */ 869 + if (adev->virt.req_init_data_ver != GPU_CRIT_REGION_V2) 870 + return 0; 871 + 872 + if (init_hdr_offset < 0) { 873 + dev_err(adev->dev, "Invalid init header offset\n"); 874 + return -EINVAL; 875 + } 876 + 877 + vram_size = RREG32(mmRCC_CONFIG_MEMSIZE); 878 + if (!vram_size || vram_size == U32_MAX) 879 + return -EINVAL; 880 + vram_size <<= 20; 881 + 882 + if ((init_hdr_offset + init_hdr_size) > vram_size) { 883 + dev_err(adev->dev, "init_data_header exceeds VRAM size, exiting\n"); 884 + return -EINVAL; 885 + } 886 + 887 + /* Allocate for init_data_hdr */ 888 + init_data_hdr = kzalloc(sizeof(struct amd_sriov_msg_init_data_header), GFP_KERNEL); 889 + if (!init_data_hdr) 890 + return -ENOMEM; 891 + 892 + amdgpu_device_vram_access(adev, (uint64_t)init_hdr_offset, (uint32_t *)init_data_hdr, 893 + sizeof(struct amd_sriov_msg_init_data_header), false); 894 + 895 + /* Table validation */ 896 + if (strncmp(init_data_hdr->signature, 897 + AMDGPU_SRIOV_CRIT_DATA_SIGNATURE, 898 + AMDGPU_SRIOV_CRIT_DATA_SIG_LEN) != 0) { 899 + dev_err(adev->dev, "Invalid init data signature: %.4s\n", 900 + init_data_hdr->signature); 901 + r = -EINVAL; 902 + goto out; 903 + } 904 + 905 + checksum = amdgpu_virt_crit_region_calc_checksum( 906 + (uint8_t *)&init_data_hdr->initdata_offset, 907 + (uint8_t *)init_data_hdr + 908 + sizeof(struct amd_sriov_msg_init_data_header)); 909 + if (checksum != init_data_hdr->checksum) { 910 + dev_err(adev->dev, "Found unmatching checksum from calculation 0x%x and init_data 0x%x\n", 911 + checksum, init_data_hdr->checksum); 912 + r = -EINVAL; 913 + goto out; 914 + } 915 + 916 + memset(&adev->virt.crit_regn, 0, sizeof(adev->virt.crit_regn)); 917 + memset(adev->virt.crit_regn_tbl, 0, sizeof(adev->virt.crit_regn_tbl)); 918 + 919 + adev->virt.crit_regn.offset = init_data_hdr->initdata_offset; 920 + adev->virt.crit_regn.size_kb = init_data_hdr->initdata_size_in_kb; 921 + 922 + /* Validation and initialization for each table entry */ 923 + if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_IPD_TABLE_ID)) { 924 + if (!init_data_hdr->ip_discovery_size_in_kb || 925 + init_data_hdr->ip_discovery_size_in_kb > DISCOVERY_TMR_SIZE) { 926 + dev_err(adev->dev, "Invalid %s size: 0x%x\n", 927 + amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_IPD_TABLE_ID], 928 + init_data_hdr->ip_discovery_size_in_kb); 929 + r = -EINVAL; 930 + goto out; 931 + } 932 + 933 + adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].offset = 934 + init_data_hdr->ip_discovery_offset; 935 + adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb = 936 + init_data_hdr->ip_discovery_size_in_kb; 937 + } 938 + 939 + if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID)) { 940 + if (!init_data_hdr->vbios_img_size_in_kb) { 941 + dev_err(adev->dev, "Invalid %s size: 0x%x\n", 942 + amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID], 943 + init_data_hdr->vbios_img_size_in_kb); 944 + r = -EINVAL; 945 + goto out; 946 + } 947 + 948 + adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID].offset = 949 + init_data_hdr->vbios_img_offset; 950 + adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID].size_kb = 951 + init_data_hdr->vbios_img_size_in_kb; 952 + } 953 + 954 + if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID)) { 955 + if (!init_data_hdr->ras_tele_info_size_in_kb) { 956 + dev_err(adev->dev, "Invalid %s size: 0x%x\n", 957 + amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID], 958 + init_data_hdr->ras_tele_info_size_in_kb); 959 + r = -EINVAL; 960 + goto out; 961 + } 962 + 963 + adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].offset = 964 + init_data_hdr->ras_tele_info_offset; 965 + adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].size_kb = 966 + init_data_hdr->ras_tele_info_size_in_kb; 967 + } 968 + 969 + if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID)) { 970 + if (!init_data_hdr->dataexchange_size_in_kb) { 971 + dev_err(adev->dev, "Invalid %s size: 0x%x\n", 972 + amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID], 973 + init_data_hdr->dataexchange_size_in_kb); 974 + r = -EINVAL; 975 + goto out; 976 + } 977 + 978 + adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset = 979 + init_data_hdr->dataexchange_offset; 980 + adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb = 981 + init_data_hdr->dataexchange_size_in_kb; 982 + } 983 + 984 + if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID)) { 985 + if (!init_data_hdr->bad_page_size_in_kb) { 986 + dev_err(adev->dev, "Invalid %s size: 0x%x\n", 987 + amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID], 988 + init_data_hdr->bad_page_size_in_kb); 989 + r = -EINVAL; 990 + goto out; 991 + } 992 + 993 + adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].offset = 994 + init_data_hdr->bad_page_info_offset; 995 + adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].size_kb = 996 + init_data_hdr->bad_page_size_in_kb; 997 + } 998 + 999 + adev->virt.is_dynamic_crit_regn_enabled = true; 1000 + 1001 + out: 1002 + kfree(init_data_hdr); 1003 + init_data_hdr = NULL; 1004 + 1005 + return r; 856 1006 } 857 1007 858 1008 void amdgpu_virt_init(struct amdgpu_device *adev)
+11
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
··· 54 54 55 55 #define AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT 2 56 56 57 + /* Signature used to validate the SR-IOV dynamic critical region init data header ("INDA") */ 58 + #define AMDGPU_SRIOV_CRIT_DATA_SIGNATURE "INDA" 59 + #define AMDGPU_SRIOV_CRIT_DATA_SIG_LEN 4 60 + 61 + #define IS_SRIOV_CRIT_REGN_ENTRY_VALID(hdr, id) ((hdr)->valid_tables & (1 << (id))) 62 + 57 63 enum amdgpu_sriov_vf_mode { 58 64 SRIOV_VF_MODE_BARE_METAL = 0, 59 65 SRIOV_VF_MODE_ONE_VF, ··· 302 296 303 297 /* dynamic(v2) critical regions */ 304 298 struct amdgpu_virt_region init_data_header; 299 + struct amdgpu_virt_region crit_regn; 300 + struct amdgpu_virt_region crit_regn_tbl[AMD_SRIOV_MSG_MAX_TABLE_ID]; 301 + bool is_dynamic_crit_regn_enabled; 305 302 306 303 /* vf2pf message */ 307 304 struct delayed_work vf2pf_work; ··· 440 431 void amdgpu_virt_exchange_data(struct amdgpu_device *adev); 441 432 void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev); 442 433 void amdgpu_virt_init(struct amdgpu_device *adev); 434 + 435 + int amdgpu_virt_init_critical_region(struct amdgpu_device *adev); 443 436 444 437 bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev); 445 438 int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev);
+31
drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
··· 71 71 GPU_CRIT_REGION_V2 = 2, 72 72 }; 73 73 74 + /* v2 layout offset enum (in order of allocation) */ 75 + enum amd_sriov_msg_table_id_enum { 76 + AMD_SRIOV_MSG_IPD_TABLE_ID = 0, 77 + AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID, 78 + AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID, 79 + AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID, 80 + AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID, 81 + AMD_SRIOV_MSG_INITD_H_TABLE_ID, 82 + AMD_SRIOV_MSG_MAX_TABLE_ID, 83 + }; 84 + 85 + struct amd_sriov_msg_init_data_header { 86 + char signature[4]; /* "INDA" */ 87 + uint32_t version; 88 + uint32_t checksum; 89 + uint32_t initdata_offset; /* 0 */ 90 + uint32_t initdata_size_in_kb; /* 5MB */ 91 + uint32_t valid_tables; 92 + uint32_t vbios_img_offset; 93 + uint32_t vbios_img_size_in_kb; 94 + uint32_t dataexchange_offset; 95 + uint32_t dataexchange_size_in_kb; 96 + uint32_t ras_tele_info_offset; 97 + uint32_t ras_tele_info_size_in_kb; 98 + uint32_t ip_discovery_offset; 99 + uint32_t ip_discovery_size_in_kb; 100 + uint32_t bad_page_info_offset; 101 + uint32_t bad_page_size_in_kb; 102 + uint32_t reserved[8]; 103 + }; 104 + 74 105 /* 75 106 * PF2VF history log: 76 107 * v1 defined in amdgim