Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Author: Monk.liu@amd.com
23 */
24#ifndef AMDGPU_VIRT_H
25#define AMDGPU_VIRT_H
26
27#include "amdgv_sriovmsg.h"
28
29#define AMDGPU_SRIOV_CAPS_SRIOV_VBIOS (1 << 0) /* vBIOS is sr-iov ready */
30#define AMDGPU_SRIOV_CAPS_ENABLE_IOV (1 << 1) /* sr-iov is enabled on this GPU */
31#define AMDGPU_SRIOV_CAPS_IS_VF (1 << 2) /* this GPU is a virtual function */
32#define AMDGPU_PASSTHROUGH_MODE (1 << 3) /* thw whole GPU is pass through for VM */
33#define AMDGPU_SRIOV_CAPS_RUNTIME (1 << 4) /* is out of full access mode */
34#define AMDGPU_VF_MMIO_ACCESS_PROTECT (1 << 5) /* MMIO write access is not allowed in sriov runtime */
35
36/* flags for indirect register access path supported by rlcg for sriov */
37#define AMDGPU_RLCG_GC_WRITE_LEGACY (0x8 << 28)
38#define AMDGPU_RLCG_GC_WRITE (0x0 << 28)
39#define AMDGPU_RLCG_GC_READ (0x1 << 28)
40#define AMDGPU_RLCG_MMHUB_WRITE (0x2 << 28)
41
42/* error code for indirect register access path supported by rlcg for sriov */
43#define AMDGPU_RLCG_VFGATE_DISABLED 0x4000000
44#define AMDGPU_RLCG_WRONG_OPERATION_TYPE 0x2000000
45#define AMDGPU_RLCG_REG_NOT_IN_RANGE 0x1000000
46
47#define AMDGPU_RLCG_SCRATCH1_ADDRESS_MASK 0xFFFFF
48#define AMDGPU_RLCG_SCRATCH1_ERROR_MASK 0xF000000
49
50/* all asic after AI use this offset */
51#define mmRCC_IOV_FUNC_IDENTIFIER 0xDE5
52/* tonga/fiji use this offset */
53#define mmBIF_IOV_FUNC_IDENTIFIER 0x1503
54
55#define AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT 2
56
57enum amdgpu_sriov_vf_mode {
58 SRIOV_VF_MODE_BARE_METAL = 0,
59 SRIOV_VF_MODE_ONE_VF,
60 SRIOV_VF_MODE_MULTI_VF,
61};
62
63struct amdgpu_mm_table {
64 struct amdgpu_bo *bo;
65 uint32_t *cpu_addr;
66 uint64_t gpu_addr;
67};
68
69#define AMDGPU_VF_ERROR_ENTRY_SIZE 16
70
71/* struct error_entry - amdgpu VF error information. */
72struct amdgpu_vf_error_buffer {
73 struct mutex lock;
74 int read_count;
75 int write_count;
76 uint16_t code[AMDGPU_VF_ERROR_ENTRY_SIZE];
77 uint16_t flags[AMDGPU_VF_ERROR_ENTRY_SIZE];
78 uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE];
79};
80
81enum idh_request;
82
83/**
84 * struct amdgpu_virt_ops - amdgpu device virt operations
85 */
86struct amdgpu_virt_ops {
87 int (*req_full_gpu)(struct amdgpu_device *adev, bool init);
88 int (*rel_full_gpu)(struct amdgpu_device *adev, bool init);
89 int (*req_init_data)(struct amdgpu_device *adev);
90 int (*reset_gpu)(struct amdgpu_device *adev);
91 void (*ready_to_reset)(struct amdgpu_device *adev);
92 int (*wait_reset)(struct amdgpu_device *adev);
93 void (*trans_msg)(struct amdgpu_device *adev, enum idh_request req,
94 u32 data1, u32 data2, u32 data3);
95 void (*ras_poison_handler)(struct amdgpu_device *adev,
96 enum amdgpu_ras_block block);
97 bool (*rcvd_ras_intr)(struct amdgpu_device *adev);
98 int (*req_ras_err_count)(struct amdgpu_device *adev);
99 int (*req_ras_cper_dump)(struct amdgpu_device *adev, u64 vf_rptr);
100 int (*req_bad_pages)(struct amdgpu_device *adev);
101 int (*req_ras_chk_criti)(struct amdgpu_device *adev, u64 addr);
102};
103
104/*
105 * Firmware Reserve Frame buffer
106 */
107struct amdgpu_virt_fw_reserve {
108 struct amd_sriov_msg_pf2vf_info_header *p_pf2vf;
109 struct amd_sriov_msg_vf2pf_info_header *p_vf2pf;
110 void *ras_telemetry;
111 unsigned int checksum_key;
112};
113
114/*
115 * Legacy GIM header
116 *
117 * Defination between PF and VF
118 * Structures forcibly aligned to 4 to keep the same style as PF.
119 */
120#define AMDGIM_DATAEXCHANGE_OFFSET (64 * 1024)
121
122#define AMDGIM_GET_STRUCTURE_RESERVED_SIZE(total, u8, u16, u32, u64) \
123 (total - (((u8)+3) / 4 + ((u16)+1) / 2 + (u32) + (u64)*2))
124
125enum AMDGIM_FEATURE_FLAG {
126 /* GIM supports feature of Error log collecting */
127 AMDGIM_FEATURE_ERROR_LOG_COLLECT = 0x1,
128 /* GIM supports feature of loading uCodes */
129 AMDGIM_FEATURE_GIM_LOAD_UCODES = 0x2,
130 /* VRAM LOST by GIM */
131 AMDGIM_FEATURE_GIM_FLR_VRAMLOST = 0x4,
132 /* MM bandwidth */
133 AMDGIM_FEATURE_GIM_MM_BW_MGR = 0x8,
134 /* PP ONE VF MODE in GIM */
135 AMDGIM_FEATURE_PP_ONE_VF = (1 << 4),
136 /* Indirect Reg Access enabled */
137 AMDGIM_FEATURE_INDIRECT_REG_ACCESS = (1 << 5),
138 /* AV1 Support MODE*/
139 AMDGIM_FEATURE_AV1_SUPPORT = (1 << 6),
140 /* VCN RB decouple */
141 AMDGIM_FEATURE_VCN_RB_DECOUPLE = (1 << 7),
142 /* MES info */
143 AMDGIM_FEATURE_MES_INFO_ENABLE = (1 << 8),
144 AMDGIM_FEATURE_RAS_CAPS = (1 << 9),
145 AMDGIM_FEATURE_RAS_TELEMETRY = (1 << 10),
146 AMDGIM_FEATURE_RAS_CPER = (1 << 11),
147};
148
149enum AMDGIM_REG_ACCESS_FLAG {
150 /* Use PSP to program IH_RB_CNTL */
151 AMDGIM_FEATURE_IH_REG_PSP_EN = (1 << 0),
152 /* Use RLC to program MMHUB regs */
153 AMDGIM_FEATURE_MMHUB_REG_RLC_EN = (1 << 1),
154 /* Use RLC to program GC regs */
155 AMDGIM_FEATURE_GC_REG_RLC_EN = (1 << 2),
156 /* Use PSP to program L1_TLB_CNTL */
157 AMDGIM_FEATURE_L1_TLB_CNTL_PSP_EN = (1 << 3),
158 /* Use RLCG to program SQ_CONFIG1 */
159 AMDGIM_FEATURE_REG_ACCESS_SQ_CONFIG = (1 << 4),
160};
161
162struct amdgim_pf2vf_info_v1 {
163 /* header contains size and version */
164 struct amd_sriov_msg_pf2vf_info_header header;
165 /* max_width * max_height */
166 unsigned int uvd_enc_max_pixels_count;
167 /* 16x16 pixels/sec, codec independent */
168 unsigned int uvd_enc_max_bandwidth;
169 /* max_width * max_height */
170 unsigned int vce_enc_max_pixels_count;
171 /* 16x16 pixels/sec, codec independent */
172 unsigned int vce_enc_max_bandwidth;
173 /* MEC FW position in kb from the start of visible frame buffer */
174 unsigned int mecfw_kboffset;
175 /* The features flags of the GIM driver supports. */
176 unsigned int feature_flags;
177 /* use private key from mailbox 2 to create chueksum */
178 unsigned int checksum;
179} __aligned(4);
180
181struct amdgim_vf2pf_info_v1 {
182 /* header contains size and version */
183 struct amd_sriov_msg_vf2pf_info_header header;
184 /* driver version */
185 char driver_version[64];
186 /* driver certification, 1=WHQL, 0=None */
187 unsigned int driver_cert;
188 /* guest OS type and version: need a define */
189 unsigned int os_info;
190 /* in the unit of 1M */
191 unsigned int fb_usage;
192 /* guest gfx engine usage percentage */
193 unsigned int gfx_usage;
194 /* guest gfx engine health percentage */
195 unsigned int gfx_health;
196 /* guest compute engine usage percentage */
197 unsigned int compute_usage;
198 /* guest compute engine health percentage */
199 unsigned int compute_health;
200 /* guest vce engine usage percentage. 0xffff means N/A. */
201 unsigned int vce_enc_usage;
202 /* guest vce engine health percentage. 0xffff means N/A. */
203 unsigned int vce_enc_health;
204 /* guest uvd engine usage percentage. 0xffff means N/A. */
205 unsigned int uvd_enc_usage;
206 /* guest uvd engine usage percentage. 0xffff means N/A. */
207 unsigned int uvd_enc_health;
208 unsigned int checksum;
209} __aligned(4);
210
211struct amdgim_vf2pf_info_v2 {
212 /* header contains size and version */
213 struct amd_sriov_msg_vf2pf_info_header header;
214 uint32_t checksum;
215 /* driver version */
216 uint8_t driver_version[64];
217 /* driver certification, 1=WHQL, 0=None */
218 uint32_t driver_cert;
219 /* guest OS type and version: need a define */
220 uint32_t os_info;
221 /* in the unit of 1M */
222 uint32_t fb_usage;
223 /* guest gfx engine usage percentage */
224 uint32_t gfx_usage;
225 /* guest gfx engine health percentage */
226 uint32_t gfx_health;
227 /* guest compute engine usage percentage */
228 uint32_t compute_usage;
229 /* guest compute engine health percentage */
230 uint32_t compute_health;
231 /* guest vce engine usage percentage. 0xffff means N/A. */
232 uint32_t vce_enc_usage;
233 /* guest vce engine health percentage. 0xffff means N/A. */
234 uint32_t vce_enc_health;
235 /* guest uvd engine usage percentage. 0xffff means N/A. */
236 uint32_t uvd_enc_usage;
237 /* guest uvd engine usage percentage. 0xffff means N/A. */
238 uint32_t uvd_enc_health;
239 uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 64, 0, (12 + sizeof(struct amd_sriov_msg_vf2pf_info_header)/sizeof(uint32_t)), 0)];
240} __aligned(4);
241
242struct amdgpu_virt_ras_err_handler_data {
243 /* point to bad page records array */
244 struct eeprom_table_record *bps;
245 /* point to reserved bo array */
246 struct amdgpu_bo **bps_bo;
247 /* the count of entries */
248 int count;
249 /* last reserved entry's index + 1 */
250 int last_reserved;
251};
252
253struct amdgpu_virt_ras {
254 struct ratelimit_state ras_error_cnt_rs;
255 struct ratelimit_state ras_cper_dump_rs;
256 struct ratelimit_state ras_chk_criti_rs;
257 struct mutex ras_telemetry_mutex;
258 uint64_t cper_rptr;
259};
260
261#define AMDGPU_VIRT_CAPS_LIST(X) X(AMDGPU_VIRT_CAP_POWER_LIMIT)
262
263DECLARE_ATTR_CAP_CLASS(amdgpu_virt, AMDGPU_VIRT_CAPS_LIST);
264
265/* GPU virtualization */
266struct amdgpu_virt {
267 uint32_t caps;
268 struct amdgpu_bo *csa_obj;
269 void *csa_cpu_addr;
270 bool chained_ib_support;
271 uint32_t reg_val_offs;
272 struct amdgpu_irq_src ack_irq;
273 struct amdgpu_irq_src rcv_irq;
274
275 struct work_struct flr_work;
276 struct work_struct req_bad_pages_work;
277 struct work_struct handle_bad_pages_work;
278
279 struct amdgpu_mm_table mm_table;
280 const struct amdgpu_virt_ops *ops;
281 struct amdgpu_vf_error_buffer vf_errors;
282 struct amdgpu_virt_fw_reserve fw_reserve;
283 struct amdgpu_virt_caps virt_caps;
284 uint32_t gim_feature;
285 uint32_t reg_access_mode;
286 int req_init_data_ver;
287 bool tdr_debug;
288 struct amdgpu_virt_ras_err_handler_data *virt_eh_data;
289 bool ras_init_done;
290 uint32_t reg_access;
291
292 /* vf2pf message */
293 struct delayed_work vf2pf_work;
294 uint32_t vf2pf_update_interval_ms;
295 int vf2pf_update_retry_cnt;
296
297 /* multimedia bandwidth config */
298 bool is_mm_bw_enabled;
299 uint32_t decode_max_dimension_pixels;
300 uint32_t decode_max_frame_pixels;
301 uint32_t encode_max_dimension_pixels;
302 uint32_t encode_max_frame_pixels;
303
304 /* the ucode id to signal the autoload */
305 uint32_t autoload_ucode_id;
306
307 /* Spinlock to protect access to the RLCG register interface */
308 spinlock_t rlcg_reg_lock;
309
310 union amd_sriov_ras_caps ras_en_caps;
311 union amd_sriov_ras_caps ras_telemetry_en_caps;
312 struct amdgpu_virt_ras ras;
313 struct amd_sriov_ras_telemetry_error_count count_cache;
314
315 /* hibernate and resume with different VF feature for xgmi enabled system */
316 bool is_xgmi_node_migrate_enabled;
317};
318
319struct amdgpu_video_codec_info;
320
321#define amdgpu_sriov_enabled(adev) \
322((adev)->virt.caps & AMDGPU_SRIOV_CAPS_ENABLE_IOV)
323
324#define amdgpu_sriov_vf(adev) \
325((adev)->virt.caps & AMDGPU_SRIOV_CAPS_IS_VF)
326
327#define amdgpu_sriov_bios(adev) \
328((adev)->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS)
329
330#define amdgpu_sriov_runtime(adev) \
331((adev)->virt.caps & AMDGPU_SRIOV_CAPS_RUNTIME)
332
333#define amdgpu_sriov_fullaccess(adev) \
334(amdgpu_sriov_vf((adev)) && !amdgpu_sriov_runtime((adev)))
335
336#define amdgpu_sriov_reg_indirect_en(adev) \
337(amdgpu_sriov_vf((adev)) && \
338 ((adev)->virt.gim_feature & (AMDGIM_FEATURE_INDIRECT_REG_ACCESS)))
339
340#define amdgpu_sriov_reg_indirect_ih(adev) \
341(amdgpu_sriov_vf((adev)) && \
342 ((adev)->virt.reg_access & (AMDGIM_FEATURE_IH_REG_PSP_EN)))
343
344#define amdgpu_sriov_reg_indirect_mmhub(adev) \
345(amdgpu_sriov_vf((adev)) && \
346 ((adev)->virt.reg_access & (AMDGIM_FEATURE_MMHUB_REG_RLC_EN)))
347
348#define amdgpu_sriov_reg_indirect_gc(adev) \
349(amdgpu_sriov_vf((adev)) && \
350 ((adev)->virt.reg_access & (AMDGIM_FEATURE_GC_REG_RLC_EN)))
351
352#define amdgpu_sriov_reg_indirect_l1_tlb_cntl(adev) \
353(amdgpu_sriov_vf((adev)) && \
354 ((adev)->virt.reg_access & (AMDGIM_FEATURE_L1_TLB_CNTL_PSP_EN)))
355
356#define amdgpu_sriov_rlcg_error_report_enabled(adev) \
357 (amdgpu_sriov_reg_indirect_mmhub(adev) || amdgpu_sriov_reg_indirect_gc(adev))
358
359#define amdgpu_sriov_reg_access_sq_config(adev) \
360(amdgpu_sriov_vf((adev)) && \
361 ((adev)->virt.reg_access & (AMDGIM_FEATURE_REG_ACCESS_SQ_CONFIG)))
362
363#define amdgpu_passthrough(adev) \
364((adev)->virt.caps & AMDGPU_PASSTHROUGH_MODE)
365
366#define amdgpu_sriov_vf_mmio_access_protection(adev) \
367((adev)->virt.caps & AMDGPU_VF_MMIO_ACCESS_PROTECT)
368
369#define amdgpu_sriov_ras_caps_en(adev) \
370((adev)->virt.gim_feature & AMDGIM_FEATURE_RAS_CAPS)
371
372#define amdgpu_sriov_ras_telemetry_en(adev) \
373(((adev)->virt.gim_feature & AMDGIM_FEATURE_RAS_TELEMETRY) && (adev)->virt.fw_reserve.ras_telemetry)
374
375#define amdgpu_sriov_ras_telemetry_block_en(adev, sriov_blk) \
376(amdgpu_sriov_ras_telemetry_en((adev)) && (adev)->virt.ras_telemetry_en_caps.all & BIT(sriov_blk))
377
378#define amdgpu_sriov_ras_cper_en(adev) \
379((adev)->virt.gim_feature & AMDGIM_FEATURE_RAS_CPER)
380
381static inline bool is_virtual_machine(void)
382{
383#if defined(CONFIG_X86)
384 return boot_cpu_has(X86_FEATURE_HYPERVISOR);
385#elif defined(CONFIG_ARM64)
386 return !is_kernel_in_hyp_mode();
387#else
388 return false;
389#endif
390}
391
392#define amdgpu_sriov_is_pp_one_vf(adev) \
393 ((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF)
394#define amdgpu_sriov_multi_vf_mode(adev) \
395 (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
396#define amdgpu_sriov_is_debug(adev) \
397 ((!amdgpu_in_reset(adev)) && adev->virt.tdr_debug)
398#define amdgpu_sriov_is_normal(adev) \
399 ((!amdgpu_in_reset(adev)) && (!adev->virt.tdr_debug))
400#define amdgpu_sriov_is_av1_support(adev) \
401 ((adev)->virt.gim_feature & AMDGIM_FEATURE_AV1_SUPPORT)
402#define amdgpu_sriov_is_vcn_rb_decouple(adev) \
403 ((adev)->virt.gim_feature & AMDGIM_FEATURE_VCN_RB_DECOUPLE)
404#define amdgpu_sriov_is_mes_info_enable(adev) \
405 ((adev)->virt.gim_feature & AMDGIM_FEATURE_MES_INFO_ENABLE)
406
407#define amdgpu_virt_xgmi_migrate_enabled(adev) \
408 ((adev)->virt.is_xgmi_node_migrate_enabled && (adev)->gmc.xgmi.node_segment_size != 0)
409
410bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
411void amdgpu_virt_init_setting(struct amdgpu_device *adev);
412int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
413int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
414int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
415void amdgpu_virt_request_init_data(struct amdgpu_device *adev);
416void amdgpu_virt_ready_to_reset(struct amdgpu_device *adev);
417int amdgpu_virt_wait_reset(struct amdgpu_device *adev);
418int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev);
419void amdgpu_virt_free_mm_table(struct amdgpu_device *adev);
420bool amdgpu_virt_rcvd_ras_interrupt(struct amdgpu_device *adev);
421void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev);
422void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
423void amdgpu_virt_exchange_data(struct amdgpu_device *adev);
424void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev);
425void amdgpu_virt_init(struct amdgpu_device *adev);
426
427bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev);
428int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev);
429void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev);
430
431enum amdgpu_sriov_vf_mode amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device *adev);
432
433void amdgpu_virt_update_sriov_video_codec(struct amdgpu_device *adev,
434 struct amdgpu_video_codec_info *encode, uint32_t encode_array_size,
435 struct amdgpu_video_codec_info *decode, uint32_t decode_array_size);
436void amdgpu_sriov_wreg(struct amdgpu_device *adev,
437 u32 offset, u32 value,
438 u32 acc_flags, u32 hwip, u32 xcc_id);
439u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
440 u32 offset, u32 acc_flags, u32 hwip, u32 xcc_id);
441bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev,
442 uint32_t ucode_id);
443void amdgpu_virt_pre_reset(struct amdgpu_device *adev);
444void amdgpu_virt_post_reset(struct amdgpu_device *adev);
445bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev);
446bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev,
447 u32 acc_flags, u32 hwip,
448 bool write, u32 *rlcg_flag);
449u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id);
450bool amdgpu_virt_get_ras_capability(struct amdgpu_device *adev);
451int amdgpu_virt_req_ras_err_count(struct amdgpu_device *adev, enum amdgpu_ras_block block,
452 struct ras_err_data *err_data);
453int amdgpu_virt_req_ras_cper_dump(struct amdgpu_device *adev, bool force_update);
454int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev);
455bool amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device *adev,
456 enum amdgpu_ras_block block);
457void amdgpu_virt_request_bad_pages(struct amdgpu_device *adev);
458int amdgpu_virt_check_vf_critical_region(struct amdgpu_device *adev, u64 addr, bool *hit);
459#endif