Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * The above copyright notice and this permission notice (including the
22 * next paragraph) shall be included in all copies or substantial portions
23 * of the Software.
24 *
25 */
26
27#include <linux/firmware.h>
28#include <drm/drmP.h>
29#include "amdgpu.h"
30#include "amdgpu_vce.h"
31#include "soc15d.h"
32#include "soc15_common.h"
33#include "mmsch_v1_0.h"
34
35#include "vega10/soc15ip.h"
36#include "vega10/VCE/vce_4_0_offset.h"
37#include "vega10/VCE/vce_4_0_default.h"
38#include "vega10/VCE/vce_4_0_sh_mask.h"
39#include "vega10/MMHUB/mmhub_1_0_offset.h"
40#include "vega10/MMHUB/mmhub_1_0_sh_mask.h"
41
42#define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02
43
44#define VCE_V4_0_FW_SIZE (384 * 1024)
45#define VCE_V4_0_STACK_SIZE (64 * 1024)
46#define VCE_V4_0_DATA_SIZE ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
47
48static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
49static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
50static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
51
52/**
53 * vce_v4_0_ring_get_rptr - get read pointer
54 *
55 * @ring: amdgpu_ring pointer
56 *
57 * Returns the current hardware read pointer
58 */
59static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
60{
61 struct amdgpu_device *adev = ring->adev;
62
63 if (ring == &adev->vce.ring[0])
64 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
65 else if (ring == &adev->vce.ring[1])
66 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
67 else
68 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
69}
70
71/**
72 * vce_v4_0_ring_get_wptr - get write pointer
73 *
74 * @ring: amdgpu_ring pointer
75 *
76 * Returns the current hardware write pointer
77 */
78static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
79{
80 struct amdgpu_device *adev = ring->adev;
81
82 if (ring->use_doorbell)
83 return adev->wb.wb[ring->wptr_offs];
84
85 if (ring == &adev->vce.ring[0])
86 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
87 else if (ring == &adev->vce.ring[1])
88 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
89 else
90 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
91}
92
93/**
94 * vce_v4_0_ring_set_wptr - set write pointer
95 *
96 * @ring: amdgpu_ring pointer
97 *
98 * Commits the write pointer to the hardware
99 */
100static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
101{
102 struct amdgpu_device *adev = ring->adev;
103
104 if (ring->use_doorbell) {
105 /* XXX check if swapping is necessary on BE */
106 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
107 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
108 return;
109 }
110
111 if (ring == &adev->vce.ring[0])
112 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
113 lower_32_bits(ring->wptr));
114 else if (ring == &adev->vce.ring[1])
115 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
116 lower_32_bits(ring->wptr));
117 else
118 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
119 lower_32_bits(ring->wptr));
120}
121
122static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
123{
124 int i, j;
125
126 for (i = 0; i < 10; ++i) {
127 for (j = 0; j < 100; ++j) {
128 uint32_t status =
129 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
130
131 if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
132 return 0;
133 mdelay(10);
134 }
135
136 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
137 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
138 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
139 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
140 mdelay(10);
141 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
142 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
143 mdelay(10);
144
145 }
146
147 return -ETIMEDOUT;
148}
149
150static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
151 struct amdgpu_mm_table *table)
152{
153 uint32_t data = 0, loop;
154 uint64_t addr = table->gpu_addr;
155 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
156 uint32_t size;
157
158 size = header->header_size + header->vce_table_size + header->uvd_table_size;
159
160 /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
161 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
162 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
163
164 /* 2, update vmid of descriptor */
165 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
166 data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
167 data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
168 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
169
170 /* 3, notify mmsch about the size of this descriptor */
171 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
172
173 /* 4, set resp to zero */
174 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
175
176 /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
177 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
178
179 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
180 loop = 1000;
181 while ((data & 0x10000002) != 0x10000002) {
182 udelay(10);
183 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
184 loop--;
185 if (!loop)
186 break;
187 }
188
189 if (!loop) {
190 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
191 return -EBUSY;
192 }
193
194 return 0;
195}
196
197static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
198{
199 struct amdgpu_ring *ring;
200 uint32_t offset, size;
201 uint32_t table_size = 0;
202 struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
203 struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
204 struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
205 struct mmsch_v1_0_cmd_end end = { { 0 } };
206 uint32_t *init_table = adev->virt.mm_table.cpu_addr;
207 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
208
209 direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
210 direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
211 direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
212 end.cmd_header.command_type = MMSCH_COMMAND__END;
213
214 if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
215 header->version = MMSCH_VERSION;
216 header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
217
218 if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
219 header->vce_table_offset = header->header_size;
220 else
221 header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
222
223 init_table += header->vce_table_offset;
224
225 ring = &adev->vce.ring[0];
226 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
227 lower_32_bits(ring->gpu_addr));
228 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
229 upper_32_bits(ring->gpu_addr));
230 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
231 ring->ring_size / 4);
232
233 /* BEGING OF MC_RESUME */
234 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
235 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
236 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
237 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
238 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
239
240 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
241 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
242 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
243 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
244 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
245 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
246 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
247 } else {
248 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
249 adev->vce.gpu_addr >> 8);
250 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
251 adev->vce.gpu_addr >> 8);
252 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
253 adev->vce.gpu_addr >> 8);
254 }
255
256 offset = AMDGPU_VCE_FIRMWARE_OFFSET;
257 size = VCE_V4_0_FW_SIZE;
258 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
259 offset & 0x7FFFFFFF);
260 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
261
262 offset += size;
263 size = VCE_V4_0_STACK_SIZE;
264 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
265 offset & 0x7FFFFFFF);
266 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
267
268 offset += size;
269 size = VCE_V4_0_DATA_SIZE;
270 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
271 offset & 0x7FFFFFFF);
272 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
273
274 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
275 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
276 0xffffffff, VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
277
278 /* end of MC_RESUME */
279 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
280 VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
281 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
282 ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
283 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
284 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
285
286 MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
287 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
288 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
289
290 /* clear BUSY flag */
291 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
292 ~VCE_STATUS__JOB_BUSY_MASK, 0);
293
294 /* add end packet */
295 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
296 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
297 header->vce_table_size = table_size;
298
299 return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
300 }
301
302 return -EINVAL; /* already initializaed ? */
303}
304
305/**
306 * vce_v4_0_start - start VCE block
307 *
308 * @adev: amdgpu_device pointer
309 *
310 * Setup and start the VCE block
311 */
312static int vce_v4_0_start(struct amdgpu_device *adev)
313{
314 struct amdgpu_ring *ring;
315 int r;
316
317 ring = &adev->vce.ring[0];
318
319 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
320 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
321 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
322 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
323 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
324
325 ring = &adev->vce.ring[1];
326
327 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
328 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
329 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
330 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
331 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
332
333 ring = &adev->vce.ring[2];
334
335 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
336 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
337 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
338 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
339 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
340
341 vce_v4_0_mc_resume(adev);
342 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
343 ~VCE_STATUS__JOB_BUSY_MASK);
344
345 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
346
347 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
348 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
349 mdelay(100);
350
351 r = vce_v4_0_firmware_loaded(adev);
352
353 /* clear BUSY flag */
354 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
355
356 if (r) {
357 DRM_ERROR("VCE not responding, giving up!!!\n");
358 return r;
359 }
360
361 return 0;
362}
363
364static int vce_v4_0_stop(struct amdgpu_device *adev)
365{
366
367 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
368
369 /* hold on ECPU */
370 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
371 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
372 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
373
374 /* clear BUSY flag */
375 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
376
377 /* Set Clock-Gating off */
378 /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
379 vce_v4_0_set_vce_sw_clock_gating(adev, false);
380 */
381
382 return 0;
383}
384
385static int vce_v4_0_early_init(void *handle)
386{
387 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
388
389 if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
390 adev->vce.num_rings = 1;
391 else
392 adev->vce.num_rings = 3;
393
394 vce_v4_0_set_ring_funcs(adev);
395 vce_v4_0_set_irq_funcs(adev);
396
397 return 0;
398}
399
400static int vce_v4_0_sw_init(void *handle)
401{
402 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
403 struct amdgpu_ring *ring;
404 unsigned size;
405 int r, i;
406
407 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
408 if (r)
409 return r;
410
411 size = (VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE) * 2;
412 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
413 size += VCE_V4_0_FW_SIZE;
414
415 r = amdgpu_vce_sw_init(adev, size);
416 if (r)
417 return r;
418
419 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
420 const struct common_firmware_header *hdr;
421 hdr = (const struct common_firmware_header *)adev->vce.fw->data;
422 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
423 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
424 adev->firmware.fw_size +=
425 ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
426 DRM_INFO("PSP loading VCE firmware\n");
427 }
428
429 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
430 r = amdgpu_vce_resume(adev);
431 if (r)
432 return r;
433 }
434
435 for (i = 0; i < adev->vce.num_rings; i++) {
436 ring = &adev->vce.ring[i];
437 sprintf(ring->name, "vce%d", i);
438 if (amdgpu_sriov_vf(adev)) {
439 /* DOORBELL only works under SRIOV */
440 ring->use_doorbell = true;
441 if (i == 0)
442 ring->doorbell_index = AMDGPU_DOORBELL64_RING0_1 * 2;
443 else if (i == 1)
444 ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2;
445 else
446 ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2 + 1;
447 }
448 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
449 if (r)
450 return r;
451 }
452
453 r = amdgpu_virt_alloc_mm_table(adev);
454 if (r)
455 return r;
456
457 return r;
458}
459
460static int vce_v4_0_sw_fini(void *handle)
461{
462 int r;
463 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
464
465 /* free MM table */
466 amdgpu_virt_free_mm_table(adev);
467
468 r = amdgpu_vce_suspend(adev);
469 if (r)
470 return r;
471
472 return amdgpu_vce_sw_fini(adev);
473}
474
475static int vce_v4_0_hw_init(void *handle)
476{
477 int r, i;
478 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
479
480 if (amdgpu_sriov_vf(adev))
481 r = vce_v4_0_sriov_start(adev);
482 else
483 r = vce_v4_0_start(adev);
484 if (r)
485 return r;
486
487 for (i = 0; i < adev->vce.num_rings; i++)
488 adev->vce.ring[i].ready = false;
489
490 for (i = 0; i < adev->vce.num_rings; i++) {
491 r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
492 if (r)
493 return r;
494 else
495 adev->vce.ring[i].ready = true;
496 }
497
498 DRM_INFO("VCE initialized successfully.\n");
499
500 return 0;
501}
502
503static int vce_v4_0_hw_fini(void *handle)
504{
505 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
506 int i;
507
508 /* vce_v4_0_wait_for_idle(handle); */
509 vce_v4_0_stop(adev);
510 for (i = 0; i < adev->vce.num_rings; i++)
511 adev->vce.ring[i].ready = false;
512
513 return 0;
514}
515
516static int vce_v4_0_suspend(void *handle)
517{
518 int r;
519 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
520
521 r = vce_v4_0_hw_fini(adev);
522 if (r)
523 return r;
524
525 return amdgpu_vce_suspend(adev);
526}
527
528static int vce_v4_0_resume(void *handle)
529{
530 int r;
531 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
532
533 r = amdgpu_vce_resume(adev);
534 if (r)
535 return r;
536
537 return vce_v4_0_hw_init(adev);
538}
539
540static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
541{
542 uint32_t offset, size;
543
544 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
545 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
546 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
547 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
548
549 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
550 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
551 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
552 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
553 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
554
555 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
556 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
557 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8));
558 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
559 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff);
560 } else {
561 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
562 (adev->vce.gpu_addr >> 8));
563 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
564 (adev->vce.gpu_addr >> 40) & 0xff);
565 }
566
567 offset = AMDGPU_VCE_FIRMWARE_OFFSET;
568 size = VCE_V4_0_FW_SIZE;
569 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
570 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
571
572 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
573 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
574 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
575 size = VCE_V4_0_STACK_SIZE;
576 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
577 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
578
579 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
580 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
581 offset += size;
582 size = VCE_V4_0_DATA_SIZE;
583 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
584 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
585
586 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
587 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
588 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
589 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
590}
591
592static int vce_v4_0_set_clockgating_state(void *handle,
593 enum amd_clockgating_state state)
594{
595 /* needed for driver unload*/
596 return 0;
597}
598
599#if 0
600static bool vce_v4_0_is_idle(void *handle)
601{
602 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
603 u32 mask = 0;
604
605 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
606 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
607
608 return !(RREG32(mmSRBM_STATUS2) & mask);
609}
610
611static int vce_v4_0_wait_for_idle(void *handle)
612{
613 unsigned i;
614 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
615
616 for (i = 0; i < adev->usec_timeout; i++)
617 if (vce_v4_0_is_idle(handle))
618 return 0;
619
620 return -ETIMEDOUT;
621}
622
623#define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */
624#define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */
625#define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */
626#define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
627 VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
628
629static bool vce_v4_0_check_soft_reset(void *handle)
630{
631 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
632 u32 srbm_soft_reset = 0;
633
634 /* According to VCE team , we should use VCE_STATUS instead
635 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
636 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
637 * instance's registers are accessed
638 * (0 for 1st instance, 10 for 2nd instance).
639 *
640 *VCE_STATUS
641 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB |
642 *|----+----+-----------+----+----+----+----------+---------+----|
643 *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0|
644 *
645 * VCE team suggest use bit 3--bit 6 for busy status check
646 */
647 mutex_lock(&adev->grbm_idx_mutex);
648 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
649 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
650 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
651 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
652 }
653 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
654 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
655 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
656 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
657 }
658 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
659 mutex_unlock(&adev->grbm_idx_mutex);
660
661 if (srbm_soft_reset) {
662 adev->vce.srbm_soft_reset = srbm_soft_reset;
663 return true;
664 } else {
665 adev->vce.srbm_soft_reset = 0;
666 return false;
667 }
668}
669
670static int vce_v4_0_soft_reset(void *handle)
671{
672 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
673 u32 srbm_soft_reset;
674
675 if (!adev->vce.srbm_soft_reset)
676 return 0;
677 srbm_soft_reset = adev->vce.srbm_soft_reset;
678
679 if (srbm_soft_reset) {
680 u32 tmp;
681
682 tmp = RREG32(mmSRBM_SOFT_RESET);
683 tmp |= srbm_soft_reset;
684 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
685 WREG32(mmSRBM_SOFT_RESET, tmp);
686 tmp = RREG32(mmSRBM_SOFT_RESET);
687
688 udelay(50);
689
690 tmp &= ~srbm_soft_reset;
691 WREG32(mmSRBM_SOFT_RESET, tmp);
692 tmp = RREG32(mmSRBM_SOFT_RESET);
693
694 /* Wait a little for things to settle down */
695 udelay(50);
696 }
697
698 return 0;
699}
700
701static int vce_v4_0_pre_soft_reset(void *handle)
702{
703 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
704
705 if (!adev->vce.srbm_soft_reset)
706 return 0;
707
708 mdelay(5);
709
710 return vce_v4_0_suspend(adev);
711}
712
713
714static int vce_v4_0_post_soft_reset(void *handle)
715{
716 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
717
718 if (!adev->vce.srbm_soft_reset)
719 return 0;
720
721 mdelay(5);
722
723 return vce_v4_0_resume(adev);
724}
725
726static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
727{
728 u32 tmp, data;
729
730 tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
731 if (override)
732 data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
733 else
734 data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
735
736 if (tmp != data)
737 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
738}
739
740static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
741 bool gated)
742{
743 u32 data;
744
745 /* Set Override to disable Clock Gating */
746 vce_v4_0_override_vce_clock_gating(adev, true);
747
748 /* This function enables MGCG which is controlled by firmware.
749 With the clocks in the gated state the core is still
750 accessible but the firmware will throttle the clocks on the
751 fly as necessary.
752 */
753 if (gated) {
754 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
755 data |= 0x1ff;
756 data &= ~0xef0000;
757 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
758
759 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
760 data |= 0x3ff000;
761 data &= ~0xffc00000;
762 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
763
764 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
765 data |= 0x2;
766 data &= ~0x00010000;
767 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
768
769 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
770 data |= 0x37f;
771 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
772
773 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
774 data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
775 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
776 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
777 0x8;
778 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
779 } else {
780 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
781 data &= ~0x80010;
782 data |= 0xe70008;
783 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
784
785 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
786 data |= 0xffc00000;
787 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
788
789 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
790 data |= 0x10000;
791 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
792
793 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
794 data &= ~0xffc00000;
795 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
796
797 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
798 data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
799 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
800 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
801 0x8);
802 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
803 }
804 vce_v4_0_override_vce_clock_gating(adev, false);
805}
806
807static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
808{
809 u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
810
811 if (enable)
812 tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
813 else
814 tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
815
816 WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
817}
818
819static int vce_v4_0_set_clockgating_state(void *handle,
820 enum amd_clockgating_state state)
821{
822 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
823 bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
824 int i;
825
826 if ((adev->asic_type == CHIP_POLARIS10) ||
827 (adev->asic_type == CHIP_TONGA) ||
828 (adev->asic_type == CHIP_FIJI))
829 vce_v4_0_set_bypass_mode(adev, enable);
830
831 if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
832 return 0;
833
834 mutex_lock(&adev->grbm_idx_mutex);
835 for (i = 0; i < 2; i++) {
836 /* Program VCE Instance 0 or 1 if not harvested */
837 if (adev->vce.harvest_config & (1 << i))
838 continue;
839
840 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
841
842 if (enable) {
843 /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
844 uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
845 data &= ~(0xf | 0xff0);
846 data |= ((0x0 << 0) | (0x04 << 4));
847 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
848
849 /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
850 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
851 data &= ~(0xf | 0xff0);
852 data |= ((0x0 << 0) | (0x04 << 4));
853 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
854 }
855
856 vce_v4_0_set_vce_sw_clock_gating(adev, enable);
857 }
858
859 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
860 mutex_unlock(&adev->grbm_idx_mutex);
861
862 return 0;
863}
864
865static int vce_v4_0_set_powergating_state(void *handle,
866 enum amd_powergating_state state)
867{
868 /* This doesn't actually powergate the VCE block.
869 * That's done in the dpm code via the SMC. This
870 * just re-inits the block as necessary. The actual
871 * gating still happens in the dpm code. We should
872 * revisit this when there is a cleaner line between
873 * the smc and the hw blocks
874 */
875 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
876
877 if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
878 return 0;
879
880 if (state == AMD_PG_STATE_GATE)
881 /* XXX do we need a vce_v4_0_stop()? */
882 return 0;
883 else
884 return vce_v4_0_start(adev);
885}
886#endif
887
888static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
889 struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch)
890{
891 amdgpu_ring_write(ring, VCE_CMD_IB_VM);
892 amdgpu_ring_write(ring, vm_id);
893 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
894 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
895 amdgpu_ring_write(ring, ib->length_dw);
896}
897
898static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
899 u64 seq, unsigned flags)
900{
901 WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
902
903 amdgpu_ring_write(ring, VCE_CMD_FENCE);
904 amdgpu_ring_write(ring, addr);
905 amdgpu_ring_write(ring, upper_32_bits(addr));
906 amdgpu_ring_write(ring, seq);
907 amdgpu_ring_write(ring, VCE_CMD_TRAP);
908}
909
910static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
911{
912 amdgpu_ring_write(ring, VCE_CMD_END);
913}
914
915static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
916 unsigned int vm_id, uint64_t pd_addr)
917{
918 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
919 uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
920 unsigned eng = ring->vm_inv_eng;
921
922 pd_addr = pd_addr | 0x1; /* valid bit */
923 /* now only use physical base address of PDE and valid */
924 BUG_ON(pd_addr & 0xFFFF00000000003EULL);
925
926 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
927 amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
928 amdgpu_ring_write(ring, upper_32_bits(pd_addr));
929
930 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
931 amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
932 amdgpu_ring_write(ring, lower_32_bits(pd_addr));
933
934 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
935 amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
936 amdgpu_ring_write(ring, 0xffffffff);
937 amdgpu_ring_write(ring, lower_32_bits(pd_addr));
938
939 /* flush TLB */
940 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
941 amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2);
942 amdgpu_ring_write(ring, req);
943
944 /* wait for flush */
945 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
946 amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
947 amdgpu_ring_write(ring, 1 << vm_id);
948 amdgpu_ring_write(ring, 1 << vm_id);
949}
950
951static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
952 struct amdgpu_irq_src *source,
953 unsigned type,
954 enum amdgpu_interrupt_state state)
955{
956 uint32_t val = 0;
957
958 if (state == AMDGPU_IRQ_STATE_ENABLE)
959 val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
960
961 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
962 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
963 return 0;
964}
965
966static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
967 struct amdgpu_irq_src *source,
968 struct amdgpu_iv_entry *entry)
969{
970 DRM_DEBUG("IH: VCE\n");
971
972 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_STATUS),
973 VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK,
974 ~VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK);
975
976 switch (entry->src_data[0]) {
977 case 0:
978 case 1:
979 case 2:
980 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
981 break;
982 default:
983 DRM_ERROR("Unhandled interrupt: %d %d\n",
984 entry->src_id, entry->src_data[0]);
985 break;
986 }
987
988 return 0;
989}
990
991const struct amd_ip_funcs vce_v4_0_ip_funcs = {
992 .name = "vce_v4_0",
993 .early_init = vce_v4_0_early_init,
994 .late_init = NULL,
995 .sw_init = vce_v4_0_sw_init,
996 .sw_fini = vce_v4_0_sw_fini,
997 .hw_init = vce_v4_0_hw_init,
998 .hw_fini = vce_v4_0_hw_fini,
999 .suspend = vce_v4_0_suspend,
1000 .resume = vce_v4_0_resume,
1001 .is_idle = NULL /* vce_v4_0_is_idle */,
1002 .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1003 .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1004 .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1005 .soft_reset = NULL /* vce_v4_0_soft_reset */,
1006 .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1007 .set_clockgating_state = vce_v4_0_set_clockgating_state,
1008 .set_powergating_state = NULL /* vce_v4_0_set_powergating_state */,
1009};
1010
1011static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1012 .type = AMDGPU_RING_TYPE_VCE,
1013 .align_mask = 0x3f,
1014 .nop = VCE_CMD_NO_OP,
1015 .support_64bit_ptrs = false,
1016 .vmhub = AMDGPU_MMHUB,
1017 .get_rptr = vce_v4_0_ring_get_rptr,
1018 .get_wptr = vce_v4_0_ring_get_wptr,
1019 .set_wptr = vce_v4_0_ring_set_wptr,
1020 .parse_cs = amdgpu_vce_ring_parse_cs_vm,
1021 .emit_frame_size =
1022 17 + /* vce_v4_0_emit_vm_flush */
1023 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1024 1, /* vce_v4_0_ring_insert_end */
1025 .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1026 .emit_ib = vce_v4_0_ring_emit_ib,
1027 .emit_vm_flush = vce_v4_0_emit_vm_flush,
1028 .emit_fence = vce_v4_0_ring_emit_fence,
1029 .test_ring = amdgpu_vce_ring_test_ring,
1030 .test_ib = amdgpu_vce_ring_test_ib,
1031 .insert_nop = amdgpu_ring_insert_nop,
1032 .insert_end = vce_v4_0_ring_insert_end,
1033 .pad_ib = amdgpu_ring_generic_pad_ib,
1034 .begin_use = amdgpu_vce_ring_begin_use,
1035 .end_use = amdgpu_vce_ring_end_use,
1036};
1037
1038static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1039{
1040 int i;
1041
1042 for (i = 0; i < adev->vce.num_rings; i++)
1043 adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1044 DRM_INFO("VCE enabled in VM mode\n");
1045}
1046
1047static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1048 .set = vce_v4_0_set_interrupt_state,
1049 .process = vce_v4_0_process_interrupt,
1050};
1051
1052static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1053{
1054 adev->vce.irq.num_types = 1;
1055 adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1056};
1057
1058const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1059{
1060 .type = AMD_IP_BLOCK_TYPE_VCE,
1061 .major = 4,
1062 .minor = 0,
1063 .rev = 0,
1064 .funcs = &vce_v4_0_ip_funcs,
1065};