at v6.18-rc5 459 lines 17 kB view raw
1/* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Author: Monk.liu@amd.com 23 */ 24#ifndef AMDGPU_VIRT_H 25#define AMDGPU_VIRT_H 26 27#include "amdgv_sriovmsg.h" 28 29#define AMDGPU_SRIOV_CAPS_SRIOV_VBIOS (1 << 0) /* vBIOS is sr-iov ready */ 30#define AMDGPU_SRIOV_CAPS_ENABLE_IOV (1 << 1) /* sr-iov is enabled on this GPU */ 31#define AMDGPU_SRIOV_CAPS_IS_VF (1 << 2) /* this GPU is a virtual function */ 32#define AMDGPU_PASSTHROUGH_MODE (1 << 3) /* thw whole GPU is pass through for VM */ 33#define AMDGPU_SRIOV_CAPS_RUNTIME (1 << 4) /* is out of full access mode */ 34#define AMDGPU_VF_MMIO_ACCESS_PROTECT (1 << 5) /* MMIO write access is not allowed in sriov runtime */ 35 36/* flags for indirect register access path supported by rlcg for sriov */ 37#define AMDGPU_RLCG_GC_WRITE_LEGACY (0x8 << 28) 38#define AMDGPU_RLCG_GC_WRITE (0x0 << 28) 39#define AMDGPU_RLCG_GC_READ (0x1 << 28) 40#define AMDGPU_RLCG_MMHUB_WRITE (0x2 << 28) 41 42/* error code for indirect register access path supported by rlcg for sriov */ 43#define AMDGPU_RLCG_VFGATE_DISABLED 0x4000000 44#define AMDGPU_RLCG_WRONG_OPERATION_TYPE 0x2000000 45#define AMDGPU_RLCG_REG_NOT_IN_RANGE 0x1000000 46 47#define AMDGPU_RLCG_SCRATCH1_ADDRESS_MASK 0xFFFFF 48#define AMDGPU_RLCG_SCRATCH1_ERROR_MASK 0xF000000 49 50/* all asic after AI use this offset */ 51#define mmRCC_IOV_FUNC_IDENTIFIER 0xDE5 52/* tonga/fiji use this offset */ 53#define mmBIF_IOV_FUNC_IDENTIFIER 0x1503 54 55#define AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT 2 56 57enum amdgpu_sriov_vf_mode { 58 SRIOV_VF_MODE_BARE_METAL = 0, 59 SRIOV_VF_MODE_ONE_VF, 60 SRIOV_VF_MODE_MULTI_VF, 61}; 62 63struct amdgpu_mm_table { 64 struct amdgpu_bo *bo; 65 uint32_t *cpu_addr; 66 uint64_t gpu_addr; 67}; 68 69#define AMDGPU_VF_ERROR_ENTRY_SIZE 16 70 71/* struct error_entry - amdgpu VF error information. */ 72struct amdgpu_vf_error_buffer { 73 struct mutex lock; 74 int read_count; 75 int write_count; 76 uint16_t code[AMDGPU_VF_ERROR_ENTRY_SIZE]; 77 uint16_t flags[AMDGPU_VF_ERROR_ENTRY_SIZE]; 78 uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE]; 79}; 80 81enum idh_request; 82 83/** 84 * struct amdgpu_virt_ops - amdgpu device virt operations 85 */ 86struct amdgpu_virt_ops { 87 int (*req_full_gpu)(struct amdgpu_device *adev, bool init); 88 int (*rel_full_gpu)(struct amdgpu_device *adev, bool init); 89 int (*req_init_data)(struct amdgpu_device *adev); 90 int (*reset_gpu)(struct amdgpu_device *adev); 91 void (*ready_to_reset)(struct amdgpu_device *adev); 92 int (*wait_reset)(struct amdgpu_device *adev); 93 void (*trans_msg)(struct amdgpu_device *adev, enum idh_request req, 94 u32 data1, u32 data2, u32 data3); 95 void (*ras_poison_handler)(struct amdgpu_device *adev, 96 enum amdgpu_ras_block block); 97 bool (*rcvd_ras_intr)(struct amdgpu_device *adev); 98 int (*req_ras_err_count)(struct amdgpu_device *adev); 99 int (*req_ras_cper_dump)(struct amdgpu_device *adev, u64 vf_rptr); 100 int (*req_bad_pages)(struct amdgpu_device *adev); 101 int (*req_ras_chk_criti)(struct amdgpu_device *adev, u64 addr); 102}; 103 104/* 105 * Firmware Reserve Frame buffer 106 */ 107struct amdgpu_virt_fw_reserve { 108 struct amd_sriov_msg_pf2vf_info_header *p_pf2vf; 109 struct amd_sriov_msg_vf2pf_info_header *p_vf2pf; 110 void *ras_telemetry; 111 unsigned int checksum_key; 112}; 113 114/* 115 * Legacy GIM header 116 * 117 * Defination between PF and VF 118 * Structures forcibly aligned to 4 to keep the same style as PF. 119 */ 120#define AMDGIM_DATAEXCHANGE_OFFSET (64 * 1024) 121 122#define AMDGIM_GET_STRUCTURE_RESERVED_SIZE(total, u8, u16, u32, u64) \ 123 (total - (((u8)+3) / 4 + ((u16)+1) / 2 + (u32) + (u64)*2)) 124 125enum AMDGIM_FEATURE_FLAG { 126 /* GIM supports feature of Error log collecting */ 127 AMDGIM_FEATURE_ERROR_LOG_COLLECT = 0x1, 128 /* GIM supports feature of loading uCodes */ 129 AMDGIM_FEATURE_GIM_LOAD_UCODES = 0x2, 130 /* VRAM LOST by GIM */ 131 AMDGIM_FEATURE_GIM_FLR_VRAMLOST = 0x4, 132 /* MM bandwidth */ 133 AMDGIM_FEATURE_GIM_MM_BW_MGR = 0x8, 134 /* PP ONE VF MODE in GIM */ 135 AMDGIM_FEATURE_PP_ONE_VF = (1 << 4), 136 /* Indirect Reg Access enabled */ 137 AMDGIM_FEATURE_INDIRECT_REG_ACCESS = (1 << 5), 138 /* AV1 Support MODE*/ 139 AMDGIM_FEATURE_AV1_SUPPORT = (1 << 6), 140 /* VCN RB decouple */ 141 AMDGIM_FEATURE_VCN_RB_DECOUPLE = (1 << 7), 142 /* MES info */ 143 AMDGIM_FEATURE_MES_INFO_ENABLE = (1 << 8), 144 AMDGIM_FEATURE_RAS_CAPS = (1 << 9), 145 AMDGIM_FEATURE_RAS_TELEMETRY = (1 << 10), 146 AMDGIM_FEATURE_RAS_CPER = (1 << 11), 147}; 148 149enum AMDGIM_REG_ACCESS_FLAG { 150 /* Use PSP to program IH_RB_CNTL */ 151 AMDGIM_FEATURE_IH_REG_PSP_EN = (1 << 0), 152 /* Use RLC to program MMHUB regs */ 153 AMDGIM_FEATURE_MMHUB_REG_RLC_EN = (1 << 1), 154 /* Use RLC to program GC regs */ 155 AMDGIM_FEATURE_GC_REG_RLC_EN = (1 << 2), 156 /* Use PSP to program L1_TLB_CNTL */ 157 AMDGIM_FEATURE_L1_TLB_CNTL_PSP_EN = (1 << 3), 158 /* Use RLCG to program SQ_CONFIG1 */ 159 AMDGIM_FEATURE_REG_ACCESS_SQ_CONFIG = (1 << 4), 160}; 161 162struct amdgim_pf2vf_info_v1 { 163 /* header contains size and version */ 164 struct amd_sriov_msg_pf2vf_info_header header; 165 /* max_width * max_height */ 166 unsigned int uvd_enc_max_pixels_count; 167 /* 16x16 pixels/sec, codec independent */ 168 unsigned int uvd_enc_max_bandwidth; 169 /* max_width * max_height */ 170 unsigned int vce_enc_max_pixels_count; 171 /* 16x16 pixels/sec, codec independent */ 172 unsigned int vce_enc_max_bandwidth; 173 /* MEC FW position in kb from the start of visible frame buffer */ 174 unsigned int mecfw_kboffset; 175 /* The features flags of the GIM driver supports. */ 176 unsigned int feature_flags; 177 /* use private key from mailbox 2 to create chueksum */ 178 unsigned int checksum; 179} __aligned(4); 180 181struct amdgim_vf2pf_info_v1 { 182 /* header contains size and version */ 183 struct amd_sriov_msg_vf2pf_info_header header; 184 /* driver version */ 185 char driver_version[64]; 186 /* driver certification, 1=WHQL, 0=None */ 187 unsigned int driver_cert; 188 /* guest OS type and version: need a define */ 189 unsigned int os_info; 190 /* in the unit of 1M */ 191 unsigned int fb_usage; 192 /* guest gfx engine usage percentage */ 193 unsigned int gfx_usage; 194 /* guest gfx engine health percentage */ 195 unsigned int gfx_health; 196 /* guest compute engine usage percentage */ 197 unsigned int compute_usage; 198 /* guest compute engine health percentage */ 199 unsigned int compute_health; 200 /* guest vce engine usage percentage. 0xffff means N/A. */ 201 unsigned int vce_enc_usage; 202 /* guest vce engine health percentage. 0xffff means N/A. */ 203 unsigned int vce_enc_health; 204 /* guest uvd engine usage percentage. 0xffff means N/A. */ 205 unsigned int uvd_enc_usage; 206 /* guest uvd engine usage percentage. 0xffff means N/A. */ 207 unsigned int uvd_enc_health; 208 unsigned int checksum; 209} __aligned(4); 210 211struct amdgim_vf2pf_info_v2 { 212 /* header contains size and version */ 213 struct amd_sriov_msg_vf2pf_info_header header; 214 uint32_t checksum; 215 /* driver version */ 216 uint8_t driver_version[64]; 217 /* driver certification, 1=WHQL, 0=None */ 218 uint32_t driver_cert; 219 /* guest OS type and version: need a define */ 220 uint32_t os_info; 221 /* in the unit of 1M */ 222 uint32_t fb_usage; 223 /* guest gfx engine usage percentage */ 224 uint32_t gfx_usage; 225 /* guest gfx engine health percentage */ 226 uint32_t gfx_health; 227 /* guest compute engine usage percentage */ 228 uint32_t compute_usage; 229 /* guest compute engine health percentage */ 230 uint32_t compute_health; 231 /* guest vce engine usage percentage. 0xffff means N/A. */ 232 uint32_t vce_enc_usage; 233 /* guest vce engine health percentage. 0xffff means N/A. */ 234 uint32_t vce_enc_health; 235 /* guest uvd engine usage percentage. 0xffff means N/A. */ 236 uint32_t uvd_enc_usage; 237 /* guest uvd engine usage percentage. 0xffff means N/A. */ 238 uint32_t uvd_enc_health; 239 uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 64, 0, (12 + sizeof(struct amd_sriov_msg_vf2pf_info_header)/sizeof(uint32_t)), 0)]; 240} __aligned(4); 241 242struct amdgpu_virt_ras_err_handler_data { 243 /* point to bad page records array */ 244 struct eeprom_table_record *bps; 245 /* point to reserved bo array */ 246 struct amdgpu_bo **bps_bo; 247 /* the count of entries */ 248 int count; 249 /* last reserved entry's index + 1 */ 250 int last_reserved; 251}; 252 253struct amdgpu_virt_ras { 254 struct ratelimit_state ras_error_cnt_rs; 255 struct ratelimit_state ras_cper_dump_rs; 256 struct ratelimit_state ras_chk_criti_rs; 257 struct mutex ras_telemetry_mutex; 258 uint64_t cper_rptr; 259}; 260 261#define AMDGPU_VIRT_CAPS_LIST(X) X(AMDGPU_VIRT_CAP_POWER_LIMIT) 262 263DECLARE_ATTR_CAP_CLASS(amdgpu_virt, AMDGPU_VIRT_CAPS_LIST); 264 265/* GPU virtualization */ 266struct amdgpu_virt { 267 uint32_t caps; 268 struct amdgpu_bo *csa_obj; 269 void *csa_cpu_addr; 270 bool chained_ib_support; 271 uint32_t reg_val_offs; 272 struct amdgpu_irq_src ack_irq; 273 struct amdgpu_irq_src rcv_irq; 274 275 struct work_struct flr_work; 276 struct work_struct req_bad_pages_work; 277 struct work_struct handle_bad_pages_work; 278 279 struct amdgpu_mm_table mm_table; 280 const struct amdgpu_virt_ops *ops; 281 struct amdgpu_vf_error_buffer vf_errors; 282 struct amdgpu_virt_fw_reserve fw_reserve; 283 struct amdgpu_virt_caps virt_caps; 284 uint32_t gim_feature; 285 uint32_t reg_access_mode; 286 int req_init_data_ver; 287 bool tdr_debug; 288 struct amdgpu_virt_ras_err_handler_data *virt_eh_data; 289 bool ras_init_done; 290 uint32_t reg_access; 291 292 /* vf2pf message */ 293 struct delayed_work vf2pf_work; 294 uint32_t vf2pf_update_interval_ms; 295 int vf2pf_update_retry_cnt; 296 297 /* multimedia bandwidth config */ 298 bool is_mm_bw_enabled; 299 uint32_t decode_max_dimension_pixels; 300 uint32_t decode_max_frame_pixels; 301 uint32_t encode_max_dimension_pixels; 302 uint32_t encode_max_frame_pixels; 303 304 /* the ucode id to signal the autoload */ 305 uint32_t autoload_ucode_id; 306 307 /* Spinlock to protect access to the RLCG register interface */ 308 spinlock_t rlcg_reg_lock; 309 310 union amd_sriov_ras_caps ras_en_caps; 311 union amd_sriov_ras_caps ras_telemetry_en_caps; 312 struct amdgpu_virt_ras ras; 313 struct amd_sriov_ras_telemetry_error_count count_cache; 314 315 /* hibernate and resume with different VF feature for xgmi enabled system */ 316 bool is_xgmi_node_migrate_enabled; 317}; 318 319struct amdgpu_video_codec_info; 320 321#define amdgpu_sriov_enabled(adev) \ 322((adev)->virt.caps & AMDGPU_SRIOV_CAPS_ENABLE_IOV) 323 324#define amdgpu_sriov_vf(adev) \ 325((adev)->virt.caps & AMDGPU_SRIOV_CAPS_IS_VF) 326 327#define amdgpu_sriov_bios(adev) \ 328((adev)->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS) 329 330#define amdgpu_sriov_runtime(adev) \ 331((adev)->virt.caps & AMDGPU_SRIOV_CAPS_RUNTIME) 332 333#define amdgpu_sriov_fullaccess(adev) \ 334(amdgpu_sriov_vf((adev)) && !amdgpu_sriov_runtime((adev))) 335 336#define amdgpu_sriov_reg_indirect_en(adev) \ 337(amdgpu_sriov_vf((adev)) && \ 338 ((adev)->virt.gim_feature & (AMDGIM_FEATURE_INDIRECT_REG_ACCESS))) 339 340#define amdgpu_sriov_reg_indirect_ih(adev) \ 341(amdgpu_sriov_vf((adev)) && \ 342 ((adev)->virt.reg_access & (AMDGIM_FEATURE_IH_REG_PSP_EN))) 343 344#define amdgpu_sriov_reg_indirect_mmhub(adev) \ 345(amdgpu_sriov_vf((adev)) && \ 346 ((adev)->virt.reg_access & (AMDGIM_FEATURE_MMHUB_REG_RLC_EN))) 347 348#define amdgpu_sriov_reg_indirect_gc(adev) \ 349(amdgpu_sriov_vf((adev)) && \ 350 ((adev)->virt.reg_access & (AMDGIM_FEATURE_GC_REG_RLC_EN))) 351 352#define amdgpu_sriov_reg_indirect_l1_tlb_cntl(adev) \ 353(amdgpu_sriov_vf((adev)) && \ 354 ((adev)->virt.reg_access & (AMDGIM_FEATURE_L1_TLB_CNTL_PSP_EN))) 355 356#define amdgpu_sriov_rlcg_error_report_enabled(adev) \ 357 (amdgpu_sriov_reg_indirect_mmhub(adev) || amdgpu_sriov_reg_indirect_gc(adev)) 358 359#define amdgpu_sriov_reg_access_sq_config(adev) \ 360(amdgpu_sriov_vf((adev)) && \ 361 ((adev)->virt.reg_access & (AMDGIM_FEATURE_REG_ACCESS_SQ_CONFIG))) 362 363#define amdgpu_passthrough(adev) \ 364((adev)->virt.caps & AMDGPU_PASSTHROUGH_MODE) 365 366#define amdgpu_sriov_vf_mmio_access_protection(adev) \ 367((adev)->virt.caps & AMDGPU_VF_MMIO_ACCESS_PROTECT) 368 369#define amdgpu_sriov_ras_caps_en(adev) \ 370((adev)->virt.gim_feature & AMDGIM_FEATURE_RAS_CAPS) 371 372#define amdgpu_sriov_ras_telemetry_en(adev) \ 373(((adev)->virt.gim_feature & AMDGIM_FEATURE_RAS_TELEMETRY) && (adev)->virt.fw_reserve.ras_telemetry) 374 375#define amdgpu_sriov_ras_telemetry_block_en(adev, sriov_blk) \ 376(amdgpu_sriov_ras_telemetry_en((adev)) && (adev)->virt.ras_telemetry_en_caps.all & BIT(sriov_blk)) 377 378#define amdgpu_sriov_ras_cper_en(adev) \ 379((adev)->virt.gim_feature & AMDGIM_FEATURE_RAS_CPER) 380 381static inline bool is_virtual_machine(void) 382{ 383#if defined(CONFIG_X86) 384 return boot_cpu_has(X86_FEATURE_HYPERVISOR); 385#elif defined(CONFIG_ARM64) 386 return !is_kernel_in_hyp_mode(); 387#else 388 return false; 389#endif 390} 391 392#define amdgpu_sriov_is_pp_one_vf(adev) \ 393 ((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF) 394#define amdgpu_sriov_multi_vf_mode(adev) \ 395 (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) 396#define amdgpu_sriov_is_debug(adev) \ 397 ((!amdgpu_in_reset(adev)) && adev->virt.tdr_debug) 398#define amdgpu_sriov_is_normal(adev) \ 399 ((!amdgpu_in_reset(adev)) && (!adev->virt.tdr_debug)) 400#define amdgpu_sriov_is_av1_support(adev) \ 401 ((adev)->virt.gim_feature & AMDGIM_FEATURE_AV1_SUPPORT) 402#define amdgpu_sriov_is_vcn_rb_decouple(adev) \ 403 ((adev)->virt.gim_feature & AMDGIM_FEATURE_VCN_RB_DECOUPLE) 404#define amdgpu_sriov_is_mes_info_enable(adev) \ 405 ((adev)->virt.gim_feature & AMDGIM_FEATURE_MES_INFO_ENABLE) 406 407#define amdgpu_virt_xgmi_migrate_enabled(adev) \ 408 ((adev)->virt.is_xgmi_node_migrate_enabled && (adev)->gmc.xgmi.node_segment_size != 0) 409 410bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev); 411void amdgpu_virt_init_setting(struct amdgpu_device *adev); 412int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init); 413int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init); 414int amdgpu_virt_reset_gpu(struct amdgpu_device *adev); 415void amdgpu_virt_request_init_data(struct amdgpu_device *adev); 416void amdgpu_virt_ready_to_reset(struct amdgpu_device *adev); 417int amdgpu_virt_wait_reset(struct amdgpu_device *adev); 418int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev); 419void amdgpu_virt_free_mm_table(struct amdgpu_device *adev); 420bool amdgpu_virt_rcvd_ras_interrupt(struct amdgpu_device *adev); 421void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev); 422void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev); 423void amdgpu_virt_exchange_data(struct amdgpu_device *adev); 424void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev); 425void amdgpu_virt_init(struct amdgpu_device *adev); 426 427bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev); 428int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev); 429void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev); 430 431enum amdgpu_sriov_vf_mode amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device *adev); 432 433void amdgpu_virt_update_sriov_video_codec(struct amdgpu_device *adev, 434 struct amdgpu_video_codec_info *encode, uint32_t encode_array_size, 435 struct amdgpu_video_codec_info *decode, uint32_t decode_array_size); 436void amdgpu_sriov_wreg(struct amdgpu_device *adev, 437 u32 offset, u32 value, 438 u32 acc_flags, u32 hwip, u32 xcc_id); 439u32 amdgpu_sriov_rreg(struct amdgpu_device *adev, 440 u32 offset, u32 acc_flags, u32 hwip, u32 xcc_id); 441bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, 442 uint32_t ucode_id); 443void amdgpu_virt_pre_reset(struct amdgpu_device *adev); 444void amdgpu_virt_post_reset(struct amdgpu_device *adev); 445bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev); 446bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev, 447 u32 acc_flags, u32 hwip, 448 bool write, u32 *rlcg_flag); 449u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id); 450bool amdgpu_virt_get_ras_capability(struct amdgpu_device *adev); 451int amdgpu_virt_req_ras_err_count(struct amdgpu_device *adev, enum amdgpu_ras_block block, 452 struct ras_err_data *err_data); 453int amdgpu_virt_req_ras_cper_dump(struct amdgpu_device *adev, bool force_update); 454int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev); 455bool amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device *adev, 456 enum amdgpu_ras_block block); 457void amdgpu_virt_request_bad_pages(struct amdgpu_device *adev); 458int amdgpu_virt_check_vf_critical_region(struct amdgpu_device *adev, u64 addr, bool *hit); 459#endif