Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/*
2 * Copyright 2023 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24#include <linux/firmware.h>
25#include <linux/module.h>
26#include "amdgpu.h"
27#include "gfx_v12_0.h"
28#include "soc15_common.h"
29#include "soc21.h"
30#include "gc/gc_12_0_0_offset.h"
31#include "gc/gc_12_0_0_sh_mask.h"
32#include "gc/gc_11_0_0_default.h"
33#include "v12_structs.h"
34#include "mes_v12_api_def.h"
35
36MODULE_FIRMWARE("amdgpu/gc_12_0_0_mes.bin");
37MODULE_FIRMWARE("amdgpu/gc_12_0_0_mes1.bin");
38MODULE_FIRMWARE("amdgpu/gc_12_0_0_uni_mes.bin");
39MODULE_FIRMWARE("amdgpu/gc_12_0_1_mes.bin");
40MODULE_FIRMWARE("amdgpu/gc_12_0_1_mes1.bin");
41MODULE_FIRMWARE("amdgpu/gc_12_0_1_uni_mes.bin");
42
43static int mes_v12_0_hw_init(struct amdgpu_ip_block *ip_block);
44static int mes_v12_0_hw_fini(struct amdgpu_ip_block *ip_block);
45static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev);
46static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev);
47
48#define MES_EOP_SIZE 2048
49
50#define MES12_HUNG_DB_OFFSET_ARRAY_SIZE 4
51
52static void mes_v12_0_ring_set_wptr(struct amdgpu_ring *ring)
53{
54 struct amdgpu_device *adev = ring->adev;
55
56 if (ring->use_doorbell) {
57 atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
58 ring->wptr);
59 WDOORBELL64(ring->doorbell_index, ring->wptr);
60 } else {
61 BUG();
62 }
63}
64
65static u64 mes_v12_0_ring_get_rptr(struct amdgpu_ring *ring)
66{
67 return *ring->rptr_cpu_addr;
68}
69
70static u64 mes_v12_0_ring_get_wptr(struct amdgpu_ring *ring)
71{
72 u64 wptr;
73
74 if (ring->use_doorbell)
75 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
76 else
77 BUG();
78 return wptr;
79}
80
81static const struct amdgpu_ring_funcs mes_v12_0_ring_funcs = {
82 .type = AMDGPU_RING_TYPE_MES,
83 .align_mask = 1,
84 .nop = 0,
85 .support_64bit_ptrs = true,
86 .get_rptr = mes_v12_0_ring_get_rptr,
87 .get_wptr = mes_v12_0_ring_get_wptr,
88 .set_wptr = mes_v12_0_ring_set_wptr,
89 .insert_nop = amdgpu_ring_insert_nop,
90};
91
92static const char *mes_v12_0_opcodes[] = {
93 "SET_HW_RSRC",
94 "SET_SCHEDULING_CONFIG",
95 "ADD_QUEUE",
96 "REMOVE_QUEUE",
97 "PERFORM_YIELD",
98 "SET_GANG_PRIORITY_LEVEL",
99 "SUSPEND",
100 "RESUME",
101 "RESET",
102 "SET_LOG_BUFFER",
103 "CHANGE_GANG_PRORITY",
104 "QUERY_SCHEDULER_STATUS",
105 "unused",
106 "SET_DEBUG_VMID",
107 "MISC",
108 "UPDATE_ROOT_PAGE_TABLE",
109 "AMD_LOG",
110 "SET_SE_MODE",
111 "SET_GANG_SUBMIT",
112 "SET_HW_RSRC_1",
113 "INVALIDATE_TLBS",
114};
115
116static const char *mes_v12_0_misc_opcodes[] = {
117 "WRITE_REG",
118 "INV_GART",
119 "QUERY_STATUS",
120 "READ_REG",
121 "WAIT_REG_MEM",
122 "SET_SHADER_DEBUGGER",
123 "NOTIFY_WORK_ON_UNMAPPED_QUEUE",
124 "NOTIFY_TO_UNMAP_PROCESSES",
125};
126
127static const char *mes_v12_0_get_op_string(union MESAPI__MISC *x_pkt)
128{
129 const char *op_str = NULL;
130
131 if (x_pkt->header.opcode < ARRAY_SIZE(mes_v12_0_opcodes))
132 op_str = mes_v12_0_opcodes[x_pkt->header.opcode];
133
134 return op_str;
135}
136
137static const char *mes_v12_0_get_misc_op_string(union MESAPI__MISC *x_pkt)
138{
139 const char *op_str = NULL;
140
141 if ((x_pkt->header.opcode == MES_SCH_API_MISC) &&
142 (x_pkt->opcode < ARRAY_SIZE(mes_v12_0_misc_opcodes)))
143 op_str = mes_v12_0_misc_opcodes[x_pkt->opcode];
144
145 return op_str;
146}
147
148static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
149 int pipe, void *pkt, int size,
150 int api_status_off)
151{
152 union MESAPI__QUERY_MES_STATUS mes_status_pkt;
153 signed long timeout = 2100000; /* 2100 ms */
154 struct amdgpu_device *adev = mes->adev;
155 struct amdgpu_ring *ring = &mes->ring[pipe];
156 spinlock_t *ring_lock = &mes->ring_lock[pipe];
157 struct MES_API_STATUS *api_status;
158 union MESAPI__MISC *x_pkt = pkt;
159 const char *op_str, *misc_op_str;
160 unsigned long flags;
161 u64 status_gpu_addr;
162 u32 seq, status_offset;
163 u64 *status_ptr;
164 signed long r;
165 int ret;
166
167 if (x_pkt->header.opcode >= MES_SCH_API_MAX)
168 return -EINVAL;
169
170 if (amdgpu_emu_mode) {
171 timeout *= 100;
172 } else if (amdgpu_sriov_vf(adev)) {
173 /* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */
174 timeout = 15 * 600 * 1000;
175 }
176
177 ret = amdgpu_device_wb_get(adev, &status_offset);
178 if (ret)
179 return ret;
180
181 status_gpu_addr = adev->wb.gpu_addr + (status_offset * 4);
182 status_ptr = (u64 *)&adev->wb.wb[status_offset];
183 *status_ptr = 0;
184
185 spin_lock_irqsave(ring_lock, flags);
186 r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4);
187 if (r)
188 goto error_unlock_free;
189
190 seq = ++ring->fence_drv.sync_seq;
191 r = amdgpu_fence_wait_polling(ring,
192 seq - ring->fence_drv.num_fences_mask,
193 timeout);
194 if (r < 1)
195 goto error_undo;
196
197 api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
198 api_status->api_completion_fence_addr = status_gpu_addr;
199 api_status->api_completion_fence_value = 1;
200
201 amdgpu_ring_write_multiple(ring, pkt, size / 4);
202
203 memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
204 mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
205 mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
206 mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
207 mes_status_pkt.api_status.api_completion_fence_addr =
208 ring->fence_drv.gpu_addr;
209 mes_status_pkt.api_status.api_completion_fence_value = seq;
210
211 amdgpu_ring_write_multiple(ring, &mes_status_pkt,
212 sizeof(mes_status_pkt) / 4);
213
214 amdgpu_ring_commit(ring);
215 spin_unlock_irqrestore(ring_lock, flags);
216
217 op_str = mes_v12_0_get_op_string(x_pkt);
218 misc_op_str = mes_v12_0_get_misc_op_string(x_pkt);
219
220 if (misc_op_str)
221 dev_dbg(adev->dev, "MES(%d) msg=%s (%s) was emitted\n",
222 pipe, op_str, misc_op_str);
223 else if (op_str)
224 dev_dbg(adev->dev, "MES(%d) msg=%s was emitted\n",
225 pipe, op_str);
226 else
227 dev_dbg(adev->dev, "MES(%d) msg=%d was emitted\n",
228 pipe, x_pkt->header.opcode);
229
230 r = amdgpu_fence_wait_polling(ring, seq, timeout);
231 if (r < 1 || !*status_ptr) {
232
233 if (misc_op_str)
234 dev_err(adev->dev, "MES(%d) failed to respond to msg=%s (%s)\n",
235 pipe, op_str, misc_op_str);
236 else if (op_str)
237 dev_err(adev->dev, "MES(%d) failed to respond to msg=%s\n",
238 pipe, op_str);
239 else
240 dev_err(adev->dev, "MES(%d) failed to respond to msg=%d\n",
241 pipe, x_pkt->header.opcode);
242
243 while (halt_if_hws_hang)
244 schedule();
245
246 r = -ETIMEDOUT;
247 goto error_wb_free;
248 }
249
250 amdgpu_device_wb_free(adev, status_offset);
251 return 0;
252
253error_undo:
254 dev_err(adev->dev, "MES ring buffer is full.\n");
255 amdgpu_ring_undo(ring);
256
257error_unlock_free:
258 spin_unlock_irqrestore(ring_lock, flags);
259
260error_wb_free:
261 amdgpu_device_wb_free(adev, status_offset);
262 return r;
263}
264
265static int convert_to_mes_queue_type(int queue_type)
266{
267 if (queue_type == AMDGPU_RING_TYPE_GFX)
268 return MES_QUEUE_TYPE_GFX;
269 else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
270 return MES_QUEUE_TYPE_COMPUTE;
271 else if (queue_type == AMDGPU_RING_TYPE_SDMA)
272 return MES_QUEUE_TYPE_SDMA;
273 else if (queue_type == AMDGPU_RING_TYPE_MES)
274 return MES_QUEUE_TYPE_SCHQ;
275 else
276 BUG();
277 return -1;
278}
279
280static int convert_to_mes_priority_level(int priority_level)
281{
282 switch (priority_level) {
283 case AMDGPU_MES_PRIORITY_LEVEL_LOW:
284 return AMD_PRIORITY_LEVEL_LOW;
285 case AMDGPU_MES_PRIORITY_LEVEL_NORMAL:
286 default:
287 return AMD_PRIORITY_LEVEL_NORMAL;
288 case AMDGPU_MES_PRIORITY_LEVEL_MEDIUM:
289 return AMD_PRIORITY_LEVEL_MEDIUM;
290 case AMDGPU_MES_PRIORITY_LEVEL_HIGH:
291 return AMD_PRIORITY_LEVEL_HIGH;
292 case AMDGPU_MES_PRIORITY_LEVEL_REALTIME:
293 return AMD_PRIORITY_LEVEL_REALTIME;
294 }
295}
296
297static int mes_v12_0_add_hw_queue(struct amdgpu_mes *mes,
298 struct mes_add_queue_input *input)
299{
300 struct amdgpu_device *adev = mes->adev;
301 union MESAPI__ADD_QUEUE mes_add_queue_pkt;
302 struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
303 uint32_t vm_cntx_cntl = hub->vm_cntx_cntl;
304
305 memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
306
307 mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
308 mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
309 mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
310
311 mes_add_queue_pkt.process_id = input->process_id;
312 mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr;
313 mes_add_queue_pkt.process_va_start = input->process_va_start;
314 mes_add_queue_pkt.process_va_end = input->process_va_end;
315 mes_add_queue_pkt.process_quantum = input->process_quantum;
316 mes_add_queue_pkt.process_context_addr = input->process_context_addr;
317 mes_add_queue_pkt.gang_quantum = input->gang_quantum;
318 mes_add_queue_pkt.gang_context_addr = input->gang_context_addr;
319 mes_add_queue_pkt.inprocess_gang_priority =
320 convert_to_mes_priority_level(input->inprocess_gang_priority);
321 mes_add_queue_pkt.gang_global_priority_level =
322 convert_to_mes_priority_level(input->gang_global_priority_level);
323 mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
324 mes_add_queue_pkt.mqd_addr = input->mqd_addr;
325
326 mes_add_queue_pkt.wptr_addr = input->wptr_mc_addr;
327
328 mes_add_queue_pkt.queue_type =
329 convert_to_mes_queue_type(input->queue_type);
330 mes_add_queue_pkt.paging = input->paging;
331 mes_add_queue_pkt.vm_context_cntl = vm_cntx_cntl;
332 mes_add_queue_pkt.gws_base = input->gws_base;
333 mes_add_queue_pkt.gws_size = input->gws_size;
334 mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
335 mes_add_queue_pkt.tma_addr = input->tma_addr;
336 mes_add_queue_pkt.trap_en = input->trap_en;
337 mes_add_queue_pkt.skip_process_ctx_clear = input->skip_process_ctx_clear;
338 mes_add_queue_pkt.is_kfd_process = input->is_kfd_process;
339
340 /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */
341 mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
342 mes_add_queue_pkt.gds_size = input->queue_size;
343
344 /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */
345 mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
346 mes_add_queue_pkt.gds_size = input->queue_size;
347
348 return mes_v12_0_submit_pkt_and_poll_completion(mes,
349 AMDGPU_MES_SCHED_PIPE,
350 &mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
351 offsetof(union MESAPI__ADD_QUEUE, api_status));
352}
353
354static int mes_v12_0_remove_hw_queue(struct amdgpu_mes *mes,
355 struct mes_remove_queue_input *input)
356{
357 union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
358
359 memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
360
361 mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
362 mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
363 mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
364
365 mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
366 mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
367
368 return mes_v12_0_submit_pkt_and_poll_completion(mes,
369 AMDGPU_MES_SCHED_PIPE,
370 &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
371 offsetof(union MESAPI__REMOVE_QUEUE, api_status));
372}
373
374int gfx_v12_0_request_gfx_index_mutex(struct amdgpu_device *adev,
375 bool req)
376{
377 u32 i, tmp, val;
378
379 for (i = 0; i < adev->usec_timeout; i++) {
380 /* Request with MeId=2, PipeId=0 */
381 tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req);
382 tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4);
383 WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp);
384
385 val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX);
386 if (req) {
387 if (val == tmp)
388 break;
389 } else {
390 tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX,
391 REQUEST, 1);
392
393 /* unlocked or locked by firmware */
394 if (val != tmp)
395 break;
396 }
397 udelay(1);
398 }
399
400 if (i >= adev->usec_timeout)
401 return -EINVAL;
402
403 return 0;
404}
405
406static int mes_v12_0_reset_queue_mmio(struct amdgpu_mes *mes, uint32_t queue_type,
407 uint32_t me_id, uint32_t pipe_id,
408 uint32_t queue_id, uint32_t vmid)
409{
410 struct amdgpu_device *adev = mes->adev;
411 uint32_t value, reg;
412 int i, r = 0;
413
414 amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
415
416 if (queue_type == AMDGPU_RING_TYPE_GFX) {
417 dev_info(adev->dev, "reset gfx queue (%d:%d:%d: vmid:%d)\n",
418 me_id, pipe_id, queue_id, vmid);
419
420 mutex_lock(&adev->gfx.reset_sem_mutex);
421 gfx_v12_0_request_gfx_index_mutex(adev, true);
422 /* all se allow writes */
423 WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX,
424 (uint32_t)(0x1 << GRBM_GFX_INDEX__SE_BROADCAST_WRITES__SHIFT));
425 value = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
426 if (pipe_id == 0)
427 value = REG_SET_FIELD(value, CP_VMID_RESET, PIPE0_QUEUES, 1 << queue_id);
428 else
429 value = REG_SET_FIELD(value, CP_VMID_RESET, PIPE1_QUEUES, 1 << queue_id);
430 WREG32_SOC15(GC, 0, regCP_VMID_RESET, value);
431 gfx_v12_0_request_gfx_index_mutex(adev, false);
432 mutex_unlock(&adev->gfx.reset_sem_mutex);
433
434 mutex_lock(&adev->srbm_mutex);
435 soc21_grbm_select(adev, me_id, pipe_id, queue_id, 0);
436 /* wait till dequeue take effects */
437 for (i = 0; i < adev->usec_timeout; i++) {
438 if (!(RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE) & 1))
439 break;
440 udelay(1);
441 }
442 if (i >= adev->usec_timeout) {
443 dev_err(adev->dev, "failed to wait on gfx hqd deactivate\n");
444 r = -ETIMEDOUT;
445 }
446
447 soc21_grbm_select(adev, 0, 0, 0, 0);
448 mutex_unlock(&adev->srbm_mutex);
449 } else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
450 dev_info(adev->dev, "reset compute queue (%d:%d:%d)\n",
451 me_id, pipe_id, queue_id);
452 mutex_lock(&adev->srbm_mutex);
453 soc21_grbm_select(adev, me_id, pipe_id, queue_id, 0);
454 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
455 WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
456
457 /* wait till dequeue take effects */
458 for (i = 0; i < adev->usec_timeout; i++) {
459 if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
460 break;
461 udelay(1);
462 }
463 if (i >= adev->usec_timeout) {
464 dev_err(adev->dev, "failed to wait on hqd deactivate\n");
465 r = -ETIMEDOUT;
466 }
467 soc21_grbm_select(adev, 0, 0, 0, 0);
468 mutex_unlock(&adev->srbm_mutex);
469 } else if (queue_type == AMDGPU_RING_TYPE_SDMA) {
470 dev_info(adev->dev, "reset sdma queue (%d:%d:%d)\n",
471 me_id, pipe_id, queue_id);
472 switch (me_id) {
473 case 1:
474 reg = SOC15_REG_OFFSET(GC, 0, regSDMA1_QUEUE_RESET_REQ);
475 break;
476 case 0:
477 default:
478 reg = SOC15_REG_OFFSET(GC, 0, regSDMA0_QUEUE_RESET_REQ);
479 break;
480 }
481
482 value = 1 << queue_id;
483 WREG32(reg, value);
484 /* wait for queue reset done */
485 for (i = 0; i < adev->usec_timeout; i++) {
486 if (!(RREG32(reg) & value))
487 break;
488 udelay(1);
489 }
490 if (i >= adev->usec_timeout) {
491 dev_err(adev->dev, "failed to wait on sdma queue reset done\n");
492 r = -ETIMEDOUT;
493 }
494 }
495
496 amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
497 return r;
498}
499
500static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes,
501 struct mes_map_legacy_queue_input *input)
502{
503 union MESAPI__ADD_QUEUE mes_add_queue_pkt;
504 int pipe;
505
506 memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
507
508 mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
509 mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
510 mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
511
512 mes_add_queue_pkt.pipe_id = input->pipe_id;
513 mes_add_queue_pkt.queue_id = input->queue_id;
514 mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
515 mes_add_queue_pkt.mqd_addr = input->mqd_addr;
516 mes_add_queue_pkt.wptr_addr = input->wptr_addr;
517 mes_add_queue_pkt.queue_type =
518 convert_to_mes_queue_type(input->queue_type);
519 mes_add_queue_pkt.map_legacy_kq = 1;
520
521 if (mes->adev->enable_uni_mes)
522 pipe = AMDGPU_MES_KIQ_PIPE;
523 else
524 pipe = AMDGPU_MES_SCHED_PIPE;
525
526 return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
527 &mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
528 offsetof(union MESAPI__ADD_QUEUE, api_status));
529}
530
531static int mes_v12_0_unmap_legacy_queue(struct amdgpu_mes *mes,
532 struct mes_unmap_legacy_queue_input *input)
533{
534 union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
535 int pipe;
536
537 memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
538
539 mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
540 mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
541 mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
542
543 mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
544 mes_remove_queue_pkt.gang_context_addr = 0;
545
546 mes_remove_queue_pkt.pipe_id = input->pipe_id;
547 mes_remove_queue_pkt.queue_id = input->queue_id;
548
549 if (input->action == PREEMPT_QUEUES_NO_UNMAP) {
550 mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1;
551 mes_remove_queue_pkt.tf_addr = input->trail_fence_addr;
552 mes_remove_queue_pkt.tf_data =
553 lower_32_bits(input->trail_fence_data);
554 } else {
555 mes_remove_queue_pkt.unmap_legacy_queue = 1;
556 mes_remove_queue_pkt.queue_type =
557 convert_to_mes_queue_type(input->queue_type);
558 }
559
560 if (mes->adev->enable_uni_mes)
561 pipe = AMDGPU_MES_KIQ_PIPE;
562 else
563 pipe = AMDGPU_MES_SCHED_PIPE;
564
565 return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
566 &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
567 offsetof(union MESAPI__REMOVE_QUEUE, api_status));
568}
569
570static int mes_v12_0_suspend_gang(struct amdgpu_mes *mes,
571 struct mes_suspend_gang_input *input)
572{
573 union MESAPI__SUSPEND mes_suspend_gang_pkt;
574
575 memset(&mes_suspend_gang_pkt, 0, sizeof(mes_suspend_gang_pkt));
576
577 mes_suspend_gang_pkt.header.type = MES_API_TYPE_SCHEDULER;
578 mes_suspend_gang_pkt.header.opcode = MES_SCH_API_SUSPEND;
579 mes_suspend_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
580
581 mes_suspend_gang_pkt.suspend_all_gangs = input->suspend_all_gangs;
582 mes_suspend_gang_pkt.gang_context_addr = input->gang_context_addr;
583 mes_suspend_gang_pkt.suspend_fence_addr = input->suspend_fence_addr;
584 mes_suspend_gang_pkt.suspend_fence_value = input->suspend_fence_value;
585
586 return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_SCHED_PIPE,
587 &mes_suspend_gang_pkt, sizeof(mes_suspend_gang_pkt),
588 offsetof(union MESAPI__SUSPEND, api_status));
589}
590
591static int mes_v12_0_resume_gang(struct amdgpu_mes *mes,
592 struct mes_resume_gang_input *input)
593{
594 union MESAPI__RESUME mes_resume_gang_pkt;
595
596 memset(&mes_resume_gang_pkt, 0, sizeof(mes_resume_gang_pkt));
597
598 mes_resume_gang_pkt.header.type = MES_API_TYPE_SCHEDULER;
599 mes_resume_gang_pkt.header.opcode = MES_SCH_API_RESUME;
600 mes_resume_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
601
602 mes_resume_gang_pkt.resume_all_gangs = input->resume_all_gangs;
603 mes_resume_gang_pkt.gang_context_addr = input->gang_context_addr;
604
605 return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_SCHED_PIPE,
606 &mes_resume_gang_pkt, sizeof(mes_resume_gang_pkt),
607 offsetof(union MESAPI__RESUME, api_status));
608}
609
610static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes, int pipe)
611{
612 union MESAPI__QUERY_MES_STATUS mes_status_pkt;
613
614 memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
615
616 mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
617 mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
618 mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
619
620 return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
621 &mes_status_pkt, sizeof(mes_status_pkt),
622 offsetof(union MESAPI__QUERY_MES_STATUS, api_status));
623}
624
625static int mes_v12_0_misc_op(struct amdgpu_mes *mes,
626 struct mes_misc_op_input *input)
627{
628 union MESAPI__MISC misc_pkt;
629 int pipe;
630
631 if (mes->adev->enable_uni_mes)
632 pipe = AMDGPU_MES_KIQ_PIPE;
633 else
634 pipe = AMDGPU_MES_SCHED_PIPE;
635
636 memset(&misc_pkt, 0, sizeof(misc_pkt));
637
638 misc_pkt.header.type = MES_API_TYPE_SCHEDULER;
639 misc_pkt.header.opcode = MES_SCH_API_MISC;
640 misc_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
641
642 switch (input->op) {
643 case MES_MISC_OP_READ_REG:
644 misc_pkt.opcode = MESAPI_MISC__READ_REG;
645 misc_pkt.read_reg.reg_offset = input->read_reg.reg_offset;
646 misc_pkt.read_reg.buffer_addr = input->read_reg.buffer_addr;
647 break;
648 case MES_MISC_OP_WRITE_REG:
649 misc_pkt.opcode = MESAPI_MISC__WRITE_REG;
650 misc_pkt.write_reg.reg_offset = input->write_reg.reg_offset;
651 misc_pkt.write_reg.reg_value = input->write_reg.reg_value;
652 break;
653 case MES_MISC_OP_WRM_REG_WAIT:
654 misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
655 misc_pkt.wait_reg_mem.op = WRM_OPERATION__WAIT_REG_MEM;
656 misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
657 misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
658 misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0;
659 misc_pkt.wait_reg_mem.reg_offset2 = 0;
660 break;
661 case MES_MISC_OP_WRM_REG_WR_WAIT:
662 misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
663 misc_pkt.wait_reg_mem.op = WRM_OPERATION__WR_WAIT_WR_REG;
664 misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
665 misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
666 misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0;
667 misc_pkt.wait_reg_mem.reg_offset2 = input->wrm_reg.reg1;
668 break;
669 case MES_MISC_OP_SET_SHADER_DEBUGGER:
670 pipe = AMDGPU_MES_SCHED_PIPE;
671 misc_pkt.opcode = MESAPI_MISC__SET_SHADER_DEBUGGER;
672 misc_pkt.set_shader_debugger.process_context_addr =
673 input->set_shader_debugger.process_context_addr;
674 misc_pkt.set_shader_debugger.flags.u32all =
675 input->set_shader_debugger.flags.u32all;
676 misc_pkt.set_shader_debugger.spi_gdbg_per_vmid_cntl =
677 input->set_shader_debugger.spi_gdbg_per_vmid_cntl;
678 memcpy(misc_pkt.set_shader_debugger.tcp_watch_cntl,
679 input->set_shader_debugger.tcp_watch_cntl,
680 sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl));
681 misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en;
682 break;
683 case MES_MISC_OP_CHANGE_CONFIG:
684 misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG;
685 misc_pkt.change_config.opcode =
686 MESAPI_MISC__CHANGE_CONFIG_OPTION_LIMIT_SINGLE_PROCESS;
687 misc_pkt.change_config.option.bits.limit_single_process =
688 input->change_config.option.limit_single_process;
689 break;
690
691 default:
692 DRM_ERROR("unsupported misc op (%d) \n", input->op);
693 return -EINVAL;
694 }
695
696 return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
697 &misc_pkt, sizeof(misc_pkt),
698 offsetof(union MESAPI__MISC, api_status));
699}
700
701static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes, int pipe)
702{
703 union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_1_pkt;
704
705 memset(&mes_set_hw_res_1_pkt, 0, sizeof(mes_set_hw_res_1_pkt));
706
707 mes_set_hw_res_1_pkt.header.type = MES_API_TYPE_SCHEDULER;
708 mes_set_hw_res_1_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC_1;
709 mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
710 mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 0xa;
711 mes_set_hw_res_1_pkt.cleaner_shader_fence_mc_addr =
712 mes->resource_1_gpu_addr[pipe];
713
714 return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
715 &mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt),
716 offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status));
717}
718
719static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe)
720{
721 int i;
722 struct amdgpu_device *adev = mes->adev;
723 union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt;
724
725 memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt));
726
727 mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER;
728 mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC;
729 mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
730
731 if (pipe == AMDGPU_MES_SCHED_PIPE) {
732 mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub;
733 mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub;
734 mes_set_hw_res_pkt.gds_size = adev->gds.gds_size;
735 mes_set_hw_res_pkt.paging_vmid = 0;
736
737 for (i = 0; i < MAX_COMPUTE_PIPES; i++)
738 mes_set_hw_res_pkt.compute_hqd_mask[i] =
739 mes->compute_hqd_mask[i];
740
741 for (i = 0; i < MAX_GFX_PIPES; i++)
742 mes_set_hw_res_pkt.gfx_hqd_mask[i] =
743 mes->gfx_hqd_mask[i];
744
745 for (i = 0; i < MAX_SDMA_PIPES; i++)
746 mes_set_hw_res_pkt.sdma_hqd_mask[i] =
747 mes->sdma_hqd_mask[i];
748
749 for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
750 mes_set_hw_res_pkt.aggregated_doorbells[i] =
751 mes->aggregated_doorbells[i];
752 }
753
754 mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr =
755 mes->sch_ctx_gpu_addr[pipe];
756 mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr =
757 mes->query_status_fence_gpu_addr[pipe];
758
759 for (i = 0; i < 5; i++) {
760 mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i];
761 mes_set_hw_res_pkt.mmhub_base[i] =
762 adev->reg_offset[MMHUB_HWIP][0][i];
763 mes_set_hw_res_pkt.osssys_base[i] =
764 adev->reg_offset[OSSSYS_HWIP][0][i];
765 }
766
767 mes_set_hw_res_pkt.disable_reset = 1;
768 mes_set_hw_res_pkt.disable_mes_log = 1;
769 mes_set_hw_res_pkt.use_different_vmid_compute = 1;
770 mes_set_hw_res_pkt.enable_reg_active_poll = 1;
771 mes_set_hw_res_pkt.enable_level_process_quantum_check = 1;
772 if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x82)
773 mes_set_hw_res_pkt.enable_lr_compute_wa = 1;
774 else
775 dev_info_once(adev->dev,
776 "MES FW version must be >= 0x82 to enable LR compute workaround.\n");
777
778 /*
779 * Keep oversubscribe timer for sdma . When we have unmapped doorbell
780 * handling support, other queue will not use the oversubscribe timer.
781 * handling mode - 0: disabled; 1: basic version; 2: basic+ version
782 */
783 mes_set_hw_res_pkt.oversubscription_timer = 50;
784 mes_set_hw_res_pkt.unmapped_doorbell_handling = 1;
785
786 if (amdgpu_mes_log_enable) {
787 mes_set_hw_res_pkt.enable_mes_event_int_logging = 1;
788 mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr +
789 pipe * (AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE);
790 }
791
792 if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE)
793 mes_set_hw_res_pkt.limit_single_process = 1;
794
795 return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
796 &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
797 offsetof(union MESAPI_SET_HW_RESOURCES, api_status));
798}
799
800static void mes_v12_0_init_aggregated_doorbell(struct amdgpu_mes *mes)
801{
802 struct amdgpu_device *adev = mes->adev;
803 uint32_t data;
804
805 data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL1);
806 data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK |
807 CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK |
808 CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK);
809 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] <<
810 CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT;
811 data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT;
812 WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL1, data);
813
814 data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL2);
815 data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK |
816 CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK |
817 CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK);
818 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] <<
819 CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT;
820 data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT;
821 WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL2, data);
822
823 data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL3);
824 data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK |
825 CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK |
826 CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK);
827 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] <<
828 CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT;
829 data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT;
830 WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL3, data);
831
832 data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL4);
833 data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK |
834 CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK |
835 CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK);
836 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] <<
837 CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT;
838 data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT;
839 WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL4, data);
840
841 data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL5);
842 data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK |
843 CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK |
844 CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK);
845 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] <<
846 CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT;
847 data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT;
848 WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL5, data);
849
850 data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT;
851 WREG32_SOC15(GC, 0, regCP_HQD_GFX_CONTROL, data);
852}
853
854
855static void mes_v12_0_enable_unmapped_doorbell_handling(
856 struct amdgpu_mes *mes, bool enable)
857{
858 struct amdgpu_device *adev = mes->adev;
859 uint32_t data = RREG32_SOC15(GC, 0, regCP_UNMAPPED_DOORBELL);
860
861 /*
862 * The default PROC_LSB settng is 0xc which means doorbell
863 * addr[16:12] gives the doorbell page number. For kfd, each
864 * process will use 2 pages of doorbell, we need to change the
865 * setting to 0xd
866 */
867 data &= ~CP_UNMAPPED_DOORBELL__PROC_LSB_MASK;
868 data |= 0xd << CP_UNMAPPED_DOORBELL__PROC_LSB__SHIFT;
869
870 data |= (enable ? 1 : 0) << CP_UNMAPPED_DOORBELL__ENABLE__SHIFT;
871
872 WREG32_SOC15(GC, 0, regCP_UNMAPPED_DOORBELL, data);
873}
874
875static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes,
876 struct mes_reset_queue_input *input)
877{
878 union MESAPI__RESET mes_reset_queue_pkt;
879 int pipe;
880
881 if (input->use_mmio)
882 return mes_v12_0_reset_queue_mmio(mes, input->queue_type,
883 input->me_id, input->pipe_id,
884 input->queue_id, input->vmid);
885
886 memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt));
887
888 mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
889 mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET;
890 mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
891
892 mes_reset_queue_pkt.queue_type =
893 convert_to_mes_queue_type(input->queue_type);
894
895 if (input->legacy_gfx) {
896 mes_reset_queue_pkt.reset_legacy_gfx = 1;
897 mes_reset_queue_pkt.pipe_id_lp = input->pipe_id;
898 mes_reset_queue_pkt.queue_id_lp = input->queue_id;
899 mes_reset_queue_pkt.mqd_mc_addr_lp = input->mqd_addr;
900 mes_reset_queue_pkt.doorbell_offset_lp = input->doorbell_offset;
901 mes_reset_queue_pkt.wptr_addr_lp = input->wptr_addr;
902 mes_reset_queue_pkt.vmid_id_lp = input->vmid;
903 } else {
904 mes_reset_queue_pkt.reset_queue_only = 1;
905 mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset;
906 }
907
908 if (input->is_kq)
909 pipe = AMDGPU_MES_KIQ_PIPE;
910 else
911 pipe = AMDGPU_MES_SCHED_PIPE;
912
913 return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
914 &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt),
915 offsetof(union MESAPI__RESET, api_status));
916}
917
918static int mes_v12_0_detect_and_reset_hung_queues(struct amdgpu_mes *mes,
919 struct mes_detect_and_reset_queue_input *input)
920{
921 union MESAPI__RESET mes_reset_queue_pkt;
922
923 memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt));
924
925 mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
926 mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET;
927 mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
928
929 mes_reset_queue_pkt.queue_type =
930 convert_to_mes_queue_type(input->queue_type);
931 mes_reset_queue_pkt.doorbell_offset_addr =
932 mes->hung_queue_db_array_gpu_addr;
933
934 if (input->detect_only)
935 mes_reset_queue_pkt.hang_detect_only = 1;
936 else
937 mes_reset_queue_pkt.hang_detect_then_reset = 1;
938
939 return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_SCHED_PIPE,
940 &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt),
941 offsetof(union MESAPI__RESET, api_status));
942}
943
944static int mes_v12_inv_tlb_convert_hub_id(uint8_t id)
945{
946 /*
947 * MES doesn't support invalidate gc_hub on slave xcc individually
948 * master xcc will invalidate all gc_hub for the partition
949 */
950 if (AMDGPU_IS_GFXHUB(id))
951 return 0;
952 else if (AMDGPU_IS_MMHUB0(id))
953 return 1;
954 else
955 return -EINVAL;
956
957}
958
959static int mes_v12_0_inv_tlbs_pasid(struct amdgpu_mes *mes,
960 struct mes_inv_tlbs_pasid_input *input)
961{
962 union MESAPI__INV_TLBS mes_inv_tlbs;
963 int ret;
964
965 memset(&mes_inv_tlbs, 0, sizeof(mes_inv_tlbs));
966
967 mes_inv_tlbs.header.type = MES_API_TYPE_SCHEDULER;
968 mes_inv_tlbs.header.opcode = MES_SCH_API_INV_TLBS;
969 mes_inv_tlbs.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
970
971 mes_inv_tlbs.invalidate_tlbs.inv_sel = 0;
972 mes_inv_tlbs.invalidate_tlbs.flush_type = input->flush_type;
973 mes_inv_tlbs.invalidate_tlbs.inv_sel_id = input->pasid;
974
975 /*convert amdgpu_mes_hub_id to mes expected hub_id */
976 ret = mes_v12_inv_tlb_convert_hub_id(input->hub_id);
977 if (ret < 0)
978 return -EINVAL;
979 mes_inv_tlbs.invalidate_tlbs.hub_id = ret;
980 return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_KIQ_PIPE,
981 &mes_inv_tlbs, sizeof(mes_inv_tlbs),
982 offsetof(union MESAPI__INV_TLBS, api_status));
983
984}
985
986static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
987 .add_hw_queue = mes_v12_0_add_hw_queue,
988 .remove_hw_queue = mes_v12_0_remove_hw_queue,
989 .map_legacy_queue = mes_v12_0_map_legacy_queue,
990 .unmap_legacy_queue = mes_v12_0_unmap_legacy_queue,
991 .suspend_gang = mes_v12_0_suspend_gang,
992 .resume_gang = mes_v12_0_resume_gang,
993 .misc_op = mes_v12_0_misc_op,
994 .reset_hw_queue = mes_v12_0_reset_hw_queue,
995 .invalidate_tlbs_pasid = mes_v12_0_inv_tlbs_pasid,
996 .detect_and_reset_hung_queues = mes_v12_0_detect_and_reset_hung_queues,
997};
998
999static int mes_v12_0_allocate_ucode_buffer(struct amdgpu_device *adev,
1000 enum amdgpu_mes_pipe pipe)
1001{
1002 int r;
1003 const struct mes_firmware_header_v1_0 *mes_hdr;
1004 const __le32 *fw_data;
1005 unsigned fw_size;
1006
1007 mes_hdr = (const struct mes_firmware_header_v1_0 *)
1008 adev->mes.fw[pipe]->data;
1009
1010 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1011 le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
1012 fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
1013
1014 r = amdgpu_bo_create_reserved(adev, fw_size,
1015 PAGE_SIZE,
1016 AMDGPU_GEM_DOMAIN_VRAM,
1017 &adev->mes.ucode_fw_obj[pipe],
1018 &adev->mes.ucode_fw_gpu_addr[pipe],
1019 (void **)&adev->mes.ucode_fw_ptr[pipe]);
1020 if (r) {
1021 dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r);
1022 return r;
1023 }
1024
1025 memcpy(adev->mes.ucode_fw_ptr[pipe], fw_data, fw_size);
1026
1027 amdgpu_bo_kunmap(adev->mes.ucode_fw_obj[pipe]);
1028 amdgpu_bo_unreserve(adev->mes.ucode_fw_obj[pipe]);
1029
1030 return 0;
1031}
1032
1033static int mes_v12_0_allocate_ucode_data_buffer(struct amdgpu_device *adev,
1034 enum amdgpu_mes_pipe pipe)
1035{
1036 int r;
1037 const struct mes_firmware_header_v1_0 *mes_hdr;
1038 const __le32 *fw_data;
1039 unsigned fw_size;
1040
1041 mes_hdr = (const struct mes_firmware_header_v1_0 *)
1042 adev->mes.fw[pipe]->data;
1043
1044 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1045 le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
1046 fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
1047
1048 r = amdgpu_bo_create_reserved(adev, fw_size,
1049 64 * 1024,
1050 AMDGPU_GEM_DOMAIN_VRAM,
1051 &adev->mes.data_fw_obj[pipe],
1052 &adev->mes.data_fw_gpu_addr[pipe],
1053 (void **)&adev->mes.data_fw_ptr[pipe]);
1054 if (r) {
1055 dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r);
1056 return r;
1057 }
1058
1059 memcpy(adev->mes.data_fw_ptr[pipe], fw_data, fw_size);
1060
1061 amdgpu_bo_kunmap(adev->mes.data_fw_obj[pipe]);
1062 amdgpu_bo_unreserve(adev->mes.data_fw_obj[pipe]);
1063
1064 return 0;
1065}
1066
1067static void mes_v12_0_free_ucode_buffers(struct amdgpu_device *adev,
1068 enum amdgpu_mes_pipe pipe)
1069{
1070 amdgpu_bo_free_kernel(&adev->mes.data_fw_obj[pipe],
1071 &adev->mes.data_fw_gpu_addr[pipe],
1072 (void **)&adev->mes.data_fw_ptr[pipe]);
1073
1074 amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj[pipe],
1075 &adev->mes.ucode_fw_gpu_addr[pipe],
1076 (void **)&adev->mes.ucode_fw_ptr[pipe]);
1077}
1078
1079static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable)
1080{
1081 uint64_t ucode_addr;
1082 uint32_t pipe, data = 0;
1083
1084 if (enable) {
1085 mutex_lock(&adev->srbm_mutex);
1086 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
1087 soc21_grbm_select(adev, 3, pipe, 0, 0);
1088 if (amdgpu_mes_log_enable) {
1089 u32 log_size = AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE;
1090 /* In case uni mes is not enabled, only program for pipe 0 */
1091 if (adev->mes.event_log_size >= (pipe + 1) * log_size) {
1092 WREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_LO,
1093 lower_32_bits(adev->mes.event_log_gpu_addr +
1094 pipe * log_size + AMDGPU_MES_LOG_BUFFER_SIZE));
1095 WREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_HI,
1096 upper_32_bits(adev->mes.event_log_gpu_addr +
1097 pipe * log_size + AMDGPU_MES_LOG_BUFFER_SIZE));
1098 dev_info(adev->dev, "Setup CP MES MSCRATCH address : 0x%x. 0x%x\n",
1099 RREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_HI),
1100 RREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_LO));
1101 }
1102 }
1103
1104 data = RREG32_SOC15(GC, 0, regCP_MES_CNTL);
1105 if (pipe == 0)
1106 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
1107 else
1108 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1);
1109 WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
1110
1111 ucode_addr = adev->mes.uc_start_addr[pipe] >> 2;
1112 WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START,
1113 lower_32_bits(ucode_addr));
1114 WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START_HI,
1115 upper_32_bits(ucode_addr));
1116
1117 /* unhalt MES and activate one pipe each loop */
1118 data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1);
1119 if (pipe)
1120 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1);
1121 dev_info(adev->dev, "program CP_MES_CNTL : 0x%x\n", data);
1122
1123 WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
1124
1125 }
1126 soc21_grbm_select(adev, 0, 0, 0, 0);
1127 mutex_unlock(&adev->srbm_mutex);
1128
1129 if (amdgpu_emu_mode)
1130 msleep(100);
1131 else if (adev->enable_uni_mes)
1132 udelay(500);
1133 else
1134 udelay(50);
1135 } else {
1136 data = RREG32_SOC15(GC, 0, regCP_MES_CNTL);
1137 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0);
1138 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 0);
1139 data = REG_SET_FIELD(data, CP_MES_CNTL,
1140 MES_INVALIDATE_ICACHE, 1);
1141 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
1142 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1);
1143 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1);
1144 WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
1145 }
1146}
1147
1148static void mes_v12_0_set_ucode_start_addr(struct amdgpu_device *adev)
1149{
1150 uint64_t ucode_addr;
1151 int pipe;
1152
1153 mes_v12_0_enable(adev, false);
1154
1155 mutex_lock(&adev->srbm_mutex);
1156 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
1157 /* me=3, queue=0 */
1158 soc21_grbm_select(adev, 3, pipe, 0, 0);
1159
1160 /* set ucode start address */
1161 ucode_addr = adev->mes.uc_start_addr[pipe] >> 2;
1162 WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START,
1163 lower_32_bits(ucode_addr));
1164 WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START_HI,
1165 upper_32_bits(ucode_addr));
1166
1167 soc21_grbm_select(adev, 0, 0, 0, 0);
1168 }
1169 mutex_unlock(&adev->srbm_mutex);
1170}
1171
1172/* This function is for backdoor MES firmware */
1173static int mes_v12_0_load_microcode(struct amdgpu_device *adev,
1174 enum amdgpu_mes_pipe pipe, bool prime_icache)
1175{
1176 int r;
1177 uint32_t data;
1178
1179 mes_v12_0_enable(adev, false);
1180
1181 if (!adev->mes.fw[pipe])
1182 return -EINVAL;
1183
1184 r = mes_v12_0_allocate_ucode_buffer(adev, pipe);
1185 if (r)
1186 return r;
1187
1188 r = mes_v12_0_allocate_ucode_data_buffer(adev, pipe);
1189 if (r) {
1190 mes_v12_0_free_ucode_buffers(adev, pipe);
1191 return r;
1192 }
1193
1194 mutex_lock(&adev->srbm_mutex);
1195 /* me=3, pipe=0, queue=0 */
1196 soc21_grbm_select(adev, 3, pipe, 0, 0);
1197
1198 WREG32_SOC15(GC, 0, regCP_MES_IC_BASE_CNTL, 0);
1199
1200 /* set ucode fimrware address */
1201 WREG32_SOC15(GC, 0, regCP_MES_IC_BASE_LO,
1202 lower_32_bits(adev->mes.ucode_fw_gpu_addr[pipe]));
1203 WREG32_SOC15(GC, 0, regCP_MES_IC_BASE_HI,
1204 upper_32_bits(adev->mes.ucode_fw_gpu_addr[pipe]));
1205
1206 /* set ucode instruction cache boundary to 2M-1 */
1207 WREG32_SOC15(GC, 0, regCP_MES_MIBOUND_LO, 0x1FFFFF);
1208
1209 /* set ucode data firmware address */
1210 WREG32_SOC15(GC, 0, regCP_MES_MDBASE_LO,
1211 lower_32_bits(adev->mes.data_fw_gpu_addr[pipe]));
1212 WREG32_SOC15(GC, 0, regCP_MES_MDBASE_HI,
1213 upper_32_bits(adev->mes.data_fw_gpu_addr[pipe]));
1214
1215 /* Set data cache boundary CP_MES_MDBOUND_LO */
1216 WREG32_SOC15(GC, 0, regCP_MES_MDBOUND_LO, 0x7FFFF);
1217
1218 if (prime_icache) {
1219 /* invalidate ICACHE */
1220 data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL);
1221 data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0);
1222 data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1);
1223 WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data);
1224
1225 /* prime the ICACHE. */
1226 data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL);
1227 data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1);
1228 WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data);
1229 }
1230
1231 soc21_grbm_select(adev, 0, 0, 0, 0);
1232 mutex_unlock(&adev->srbm_mutex);
1233
1234 return 0;
1235}
1236
1237static int mes_v12_0_allocate_eop_buf(struct amdgpu_device *adev,
1238 enum amdgpu_mes_pipe pipe)
1239{
1240 int r;
1241 u32 *eop;
1242
1243 r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE,
1244 AMDGPU_GEM_DOMAIN_GTT,
1245 &adev->mes.eop_gpu_obj[pipe],
1246 &adev->mes.eop_gpu_addr[pipe],
1247 (void **)&eop);
1248 if (r) {
1249 dev_warn(adev->dev, "(%d) create EOP bo failed\n", r);
1250 return r;
1251 }
1252
1253 memset(eop, 0,
1254 adev->mes.eop_gpu_obj[pipe]->tbo.base.size);
1255
1256 amdgpu_bo_kunmap(adev->mes.eop_gpu_obj[pipe]);
1257 amdgpu_bo_unreserve(adev->mes.eop_gpu_obj[pipe]);
1258
1259 return 0;
1260}
1261
1262static int mes_v12_0_mqd_init(struct amdgpu_ring *ring)
1263{
1264 struct v12_compute_mqd *mqd = ring->mqd_ptr;
1265 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
1266 uint32_t tmp;
1267
1268 mqd->header = 0xC0310800;
1269 mqd->compute_pipelinestat_enable = 0x00000001;
1270 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
1271 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
1272 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
1273 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
1274 mqd->compute_misc_reserved = 0x00000007;
1275
1276 eop_base_addr = ring->eop_gpu_addr >> 8;
1277
1278 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
1279 tmp = regCP_HQD_EOP_CONTROL_DEFAULT;
1280 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
1281 (order_base_2(MES_EOP_SIZE / 4) - 1));
1282
1283 mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_base_addr);
1284 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
1285 mqd->cp_hqd_eop_control = tmp;
1286
1287 /* disable the queue if it's active */
1288 ring->wptr = 0;
1289 mqd->cp_hqd_pq_rptr = 0;
1290 mqd->cp_hqd_pq_wptr_lo = 0;
1291 mqd->cp_hqd_pq_wptr_hi = 0;
1292
1293 /* set the pointer to the MQD */
1294 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
1295 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
1296
1297 /* set MQD vmid to 0 */
1298 tmp = regCP_MQD_CONTROL_DEFAULT;
1299 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
1300 mqd->cp_mqd_control = tmp;
1301
1302 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
1303 hqd_gpu_addr = ring->gpu_addr >> 8;
1304 mqd->cp_hqd_pq_base_lo = lower_32_bits(hqd_gpu_addr);
1305 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
1306
1307 /* set the wb address whether it's enabled or not */
1308 wb_gpu_addr = ring->rptr_gpu_addr;
1309 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
1310 mqd->cp_hqd_pq_rptr_report_addr_hi =
1311 upper_32_bits(wb_gpu_addr) & 0xffff;
1312
1313 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
1314 wb_gpu_addr = ring->wptr_gpu_addr;
1315 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8;
1316 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
1317
1318 /* set up the HQD, this is similar to CP_RB0_CNTL */
1319 tmp = regCP_HQD_PQ_CONTROL_DEFAULT;
1320 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
1321 (order_base_2(ring->ring_size / 4) - 1));
1322 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
1323 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
1324 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
1325 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
1326 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
1327 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
1328 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, NO_UPDATE_RPTR, 1);
1329 mqd->cp_hqd_pq_control = tmp;
1330
1331 /* enable doorbell */
1332 tmp = 0;
1333 if (ring->use_doorbell) {
1334 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1335 DOORBELL_OFFSET, ring->doorbell_index);
1336 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1337 DOORBELL_EN, 1);
1338 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1339 DOORBELL_SOURCE, 0);
1340 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1341 DOORBELL_HIT, 0);
1342 } else {
1343 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1344 DOORBELL_EN, 0);
1345 }
1346 mqd->cp_hqd_pq_doorbell_control = tmp;
1347
1348 mqd->cp_hqd_vmid = 0;
1349 /* activate the queue */
1350 mqd->cp_hqd_active = 1;
1351
1352 tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT;
1353 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE,
1354 PRELOAD_SIZE, 0x55);
1355 mqd->cp_hqd_persistent_state = tmp;
1356
1357 mqd->cp_hqd_ib_control = regCP_HQD_IB_CONTROL_DEFAULT;
1358 mqd->cp_hqd_iq_timer = regCP_HQD_IQ_TIMER_DEFAULT;
1359 mqd->cp_hqd_quantum = regCP_HQD_QUANTUM_DEFAULT;
1360
1361 /*
1362 * Set CP_HQD_GFX_CONTROL.DB_UPDATED_MSG_EN[15] to enable unmapped
1363 * doorbell handling. This is a reserved CP internal register can
1364 * not be accesss by others
1365 */
1366 mqd->reserved_184 = BIT(15);
1367
1368 return 0;
1369}
1370
1371static void mes_v12_0_queue_init_register(struct amdgpu_ring *ring)
1372{
1373 struct v12_compute_mqd *mqd = ring->mqd_ptr;
1374 struct amdgpu_device *adev = ring->adev;
1375 uint32_t data = 0;
1376
1377 mutex_lock(&adev->srbm_mutex);
1378 soc21_grbm_select(adev, 3, ring->pipe, 0, 0);
1379
1380 /* set CP_HQD_VMID.VMID = 0. */
1381 data = RREG32_SOC15(GC, 0, regCP_HQD_VMID);
1382 data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0);
1383 WREG32_SOC15(GC, 0, regCP_HQD_VMID, data);
1384
1385 /* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */
1386 data = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
1387 data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
1388 DOORBELL_EN, 0);
1389 WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, data);
1390
1391 /* set CP_MQD_BASE_ADDR/HI with the MQD base address */
1392 WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
1393 WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
1394
1395 /* set CP_MQD_CONTROL.VMID=0 */
1396 data = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL);
1397 data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0);
1398 WREG32_SOC15(GC, 0, regCP_MQD_CONTROL, 0);
1399
1400 /* set CP_HQD_PQ_BASE/HI with the ring buffer base address */
1401 WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
1402 WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
1403
1404 /* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */
1405 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
1406 mqd->cp_hqd_pq_rptr_report_addr_lo);
1407 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
1408 mqd->cp_hqd_pq_rptr_report_addr_hi);
1409
1410 /* set CP_HQD_PQ_CONTROL */
1411 WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
1412
1413 /* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */
1414 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
1415 mqd->cp_hqd_pq_wptr_poll_addr_lo);
1416 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
1417 mqd->cp_hqd_pq_wptr_poll_addr_hi);
1418
1419 /* set CP_HQD_PQ_DOORBELL_CONTROL */
1420 WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
1421 mqd->cp_hqd_pq_doorbell_control);
1422
1423 /* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */
1424 WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
1425
1426 /* set CP_HQD_ACTIVE.ACTIVE=1 */
1427 WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, mqd->cp_hqd_active);
1428
1429 soc21_grbm_select(adev, 0, 0, 0, 0);
1430 mutex_unlock(&adev->srbm_mutex);
1431}
1432
1433static int mes_v12_0_kiq_enable_queue(struct amdgpu_device *adev)
1434{
1435 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
1436 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
1437 int r;
1438
1439 if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
1440 return -EINVAL;
1441
1442 r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
1443 if (r) {
1444 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
1445 return r;
1446 }
1447
1448 kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[0]);
1449
1450 r = amdgpu_ring_test_ring(kiq_ring);
1451 if (r) {
1452 DRM_ERROR("kfq enable failed\n");
1453 kiq_ring->sched.ready = false;
1454 }
1455 return r;
1456}
1457
1458static int mes_v12_0_queue_init(struct amdgpu_device *adev,
1459 enum amdgpu_mes_pipe pipe)
1460{
1461 struct amdgpu_ring *ring;
1462 int r;
1463
1464 if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE)
1465 ring = &adev->gfx.kiq[0].ring;
1466 else
1467 ring = &adev->mes.ring[pipe];
1468
1469 if ((adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) &&
1470 (amdgpu_in_reset(adev) || adev->in_suspend)) {
1471 *(ring->wptr_cpu_addr) = 0;
1472 *(ring->rptr_cpu_addr) = 0;
1473 amdgpu_ring_clear_ring(ring);
1474 }
1475
1476 r = mes_v12_0_mqd_init(ring);
1477 if (r)
1478 return r;
1479
1480 if (pipe == AMDGPU_MES_SCHED_PIPE) {
1481 if (adev->enable_uni_mes)
1482 r = amdgpu_mes_map_legacy_queue(adev, ring);
1483 else
1484 r = mes_v12_0_kiq_enable_queue(adev);
1485 if (r)
1486 return r;
1487 } else {
1488 mes_v12_0_queue_init_register(ring);
1489 }
1490
1491 if (((pipe == AMDGPU_MES_SCHED_PIPE) && !adev->mes.sched_version) ||
1492 ((pipe == AMDGPU_MES_KIQ_PIPE) && !adev->mes.kiq_version)) {
1493 /* get MES scheduler/KIQ versions */
1494 mutex_lock(&adev->srbm_mutex);
1495 soc21_grbm_select(adev, 3, pipe, 0, 0);
1496
1497 if (pipe == AMDGPU_MES_SCHED_PIPE)
1498 adev->mes.sched_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
1499 else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq)
1500 adev->mes.kiq_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
1501
1502 soc21_grbm_select(adev, 0, 0, 0, 0);
1503 mutex_unlock(&adev->srbm_mutex);
1504 }
1505
1506 return 0;
1507}
1508
1509static int mes_v12_0_ring_init(struct amdgpu_device *adev, int pipe)
1510{
1511 struct amdgpu_ring *ring;
1512
1513 ring = &adev->mes.ring[pipe];
1514
1515 ring->funcs = &mes_v12_0_ring_funcs;
1516
1517 ring->me = 3;
1518 ring->pipe = pipe;
1519 ring->queue = 0;
1520
1521 ring->ring_obj = NULL;
1522 ring->use_doorbell = true;
1523 ring->eop_gpu_addr = adev->mes.eop_gpu_addr[pipe];
1524 ring->no_scheduler = true;
1525 sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1526
1527 if (pipe == AMDGPU_MES_SCHED_PIPE)
1528 ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1;
1529 else
1530 ring->doorbell_index = adev->doorbell_index.mes_ring1 << 1;
1531
1532 return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
1533 AMDGPU_RING_PRIO_DEFAULT, NULL);
1534}
1535
1536static int mes_v12_0_kiq_ring_init(struct amdgpu_device *adev)
1537{
1538 struct amdgpu_ring *ring;
1539
1540 spin_lock_init(&adev->gfx.kiq[0].ring_lock);
1541
1542 ring = &adev->gfx.kiq[0].ring;
1543
1544 ring->me = 3;
1545 ring->pipe = 1;
1546 ring->queue = 0;
1547
1548 ring->adev = NULL;
1549 ring->ring_obj = NULL;
1550 ring->use_doorbell = true;
1551 ring->doorbell_index = adev->doorbell_index.mes_ring1 << 1;
1552 ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_KIQ_PIPE];
1553 ring->no_scheduler = true;
1554 sprintf(ring->name, "mes_kiq_%d.%d.%d",
1555 ring->me, ring->pipe, ring->queue);
1556
1557 return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
1558 AMDGPU_RING_PRIO_DEFAULT, NULL);
1559}
1560
1561static int mes_v12_0_mqd_sw_init(struct amdgpu_device *adev,
1562 enum amdgpu_mes_pipe pipe)
1563{
1564 int r, mqd_size = sizeof(struct v12_compute_mqd);
1565 struct amdgpu_ring *ring;
1566
1567 if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE)
1568 ring = &adev->gfx.kiq[0].ring;
1569 else
1570 ring = &adev->mes.ring[pipe];
1571
1572 if (ring->mqd_obj)
1573 return 0;
1574
1575 r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
1576 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
1577 &ring->mqd_gpu_addr, &ring->mqd_ptr);
1578 if (r) {
1579 dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
1580 return r;
1581 }
1582
1583 memset(ring->mqd_ptr, 0, mqd_size);
1584
1585 /* prepare MQD backup */
1586 adev->mes.mqd_backup[pipe] = kmalloc(mqd_size, GFP_KERNEL);
1587 if (!adev->mes.mqd_backup[pipe])
1588 dev_warn(adev->dev,
1589 "no memory to create MQD backup for ring %s\n",
1590 ring->name);
1591
1592 return 0;
1593}
1594
1595static int mes_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
1596{
1597 struct amdgpu_device *adev = ip_block->adev;
1598 int pipe, r;
1599
1600 adev->mes.funcs = &mes_v12_0_funcs;
1601 adev->mes.kiq_hw_init = &mes_v12_0_kiq_hw_init;
1602 adev->mes.kiq_hw_fini = &mes_v12_0_kiq_hw_fini;
1603 adev->mes.enable_legacy_queue_map = true;
1604
1605 adev->mes.event_log_size = adev->enable_uni_mes ?
1606 (AMDGPU_MAX_MES_PIPES * (AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE)) :
1607 (AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE);
1608 r = amdgpu_mes_init(adev);
1609 if (r)
1610 return r;
1611
1612 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
1613 r = mes_v12_0_allocate_eop_buf(adev, pipe);
1614 if (r)
1615 return r;
1616
1617 r = mes_v12_0_mqd_sw_init(adev, pipe);
1618 if (r)
1619 return r;
1620
1621 if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) {
1622 r = mes_v12_0_kiq_ring_init(adev);
1623 }
1624 else {
1625 r = mes_v12_0_ring_init(adev, pipe);
1626 if (r)
1627 return r;
1628 r = amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1629 AMDGPU_GEM_DOMAIN_VRAM,
1630 &adev->mes.resource_1[pipe],
1631 &adev->mes.resource_1_gpu_addr[pipe],
1632 &adev->mes.resource_1_addr[pipe]);
1633 if (r) {
1634 dev_err(adev->dev, "(%d) failed to create mes resource_1 bo pipe[%d]\n", r, pipe);
1635 return r;
1636 }
1637 }
1638 }
1639
1640 return 0;
1641}
1642
1643static int mes_v12_0_sw_fini(struct amdgpu_ip_block *ip_block)
1644{
1645 struct amdgpu_device *adev = ip_block->adev;
1646 int pipe;
1647
1648 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
1649 amdgpu_bo_free_kernel(&adev->mes.resource_1[pipe],
1650 &adev->mes.resource_1_gpu_addr[pipe],
1651 &adev->mes.resource_1_addr[pipe]);
1652
1653 kfree(adev->mes.mqd_backup[pipe]);
1654
1655 amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[pipe],
1656 &adev->mes.eop_gpu_addr[pipe],
1657 NULL);
1658 amdgpu_ucode_release(&adev->mes.fw[pipe]);
1659
1660 if (adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) {
1661 amdgpu_bo_free_kernel(&adev->mes.ring[pipe].mqd_obj,
1662 &adev->mes.ring[pipe].mqd_gpu_addr,
1663 &adev->mes.ring[pipe].mqd_ptr);
1664 amdgpu_ring_fini(&adev->mes.ring[pipe]);
1665 }
1666 }
1667
1668 if (!adev->enable_uni_mes) {
1669 amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj,
1670 &adev->gfx.kiq[0].ring.mqd_gpu_addr,
1671 &adev->gfx.kiq[0].ring.mqd_ptr);
1672 amdgpu_ring_fini(&adev->gfx.kiq[0].ring);
1673 }
1674
1675 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1676 mes_v12_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE);
1677 mes_v12_0_free_ucode_buffers(adev, AMDGPU_MES_SCHED_PIPE);
1678 }
1679
1680 amdgpu_mes_fini(adev);
1681 return 0;
1682}
1683
1684static void mes_v12_0_kiq_dequeue_sched(struct amdgpu_device *adev)
1685{
1686 uint32_t data;
1687 int i;
1688
1689 mutex_lock(&adev->srbm_mutex);
1690 soc21_grbm_select(adev, 3, AMDGPU_MES_SCHED_PIPE, 0, 0);
1691
1692 /* disable the queue if it's active */
1693 if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
1694 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
1695 for (i = 0; i < adev->usec_timeout; i++) {
1696 if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
1697 break;
1698 udelay(1);
1699 }
1700 }
1701 data = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
1702 data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
1703 DOORBELL_EN, 0);
1704 data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
1705 DOORBELL_HIT, 1);
1706 WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, data);
1707
1708 WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 0);
1709
1710 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 0);
1711 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 0);
1712 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR, 0);
1713
1714 soc21_grbm_select(adev, 0, 0, 0, 0);
1715 mutex_unlock(&adev->srbm_mutex);
1716
1717 adev->mes.ring[0].sched.ready = false;
1718}
1719
1720static void mes_v12_0_kiq_setting(struct amdgpu_ring *ring)
1721{
1722 uint32_t tmp;
1723 struct amdgpu_device *adev = ring->adev;
1724
1725 /* tell RLC which is KIQ queue */
1726 tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
1727 tmp &= 0xffffff00;
1728 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
1729 WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80);
1730}
1731
1732static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev)
1733{
1734 int r = 0;
1735 struct amdgpu_ip_block *ip_block;
1736
1737 if (adev->enable_uni_mes)
1738 mes_v12_0_kiq_setting(&adev->mes.ring[AMDGPU_MES_KIQ_PIPE]);
1739 else
1740 mes_v12_0_kiq_setting(&adev->gfx.kiq[0].ring);
1741
1742 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1743
1744 r = mes_v12_0_load_microcode(adev, AMDGPU_MES_SCHED_PIPE, false);
1745 if (r) {
1746 DRM_ERROR("failed to load MES fw, r=%d\n", r);
1747 return r;
1748 }
1749
1750 r = mes_v12_0_load_microcode(adev, AMDGPU_MES_KIQ_PIPE, true);
1751 if (r) {
1752 DRM_ERROR("failed to load MES kiq fw, r=%d\n", r);
1753 return r;
1754 }
1755
1756 mes_v12_0_set_ucode_start_addr(adev);
1757
1758 } else if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
1759 mes_v12_0_set_ucode_start_addr(adev);
1760
1761 mes_v12_0_enable(adev, true);
1762
1763 ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_MES);
1764 if (unlikely(!ip_block)) {
1765 dev_err(adev->dev, "Failed to get MES handle\n");
1766 return -EINVAL;
1767 }
1768
1769 r = mes_v12_0_queue_init(adev, AMDGPU_MES_KIQ_PIPE);
1770 if (r)
1771 goto failure;
1772
1773 if (adev->enable_uni_mes) {
1774 r = mes_v12_0_set_hw_resources(&adev->mes, AMDGPU_MES_KIQ_PIPE);
1775 if (r)
1776 goto failure;
1777
1778 mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_KIQ_PIPE);
1779 }
1780
1781 if (adev->mes.enable_legacy_queue_map) {
1782 r = mes_v12_0_hw_init(ip_block);
1783 if (r)
1784 goto failure;
1785 }
1786
1787 return r;
1788
1789failure:
1790 mes_v12_0_hw_fini(ip_block);
1791 return r;
1792}
1793
1794static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev)
1795{
1796 if (adev->mes.ring[0].sched.ready) {
1797 if (adev->enable_uni_mes)
1798 amdgpu_mes_unmap_legacy_queue(adev,
1799 &adev->mes.ring[AMDGPU_MES_SCHED_PIPE],
1800 RESET_QUEUES, 0, 0);
1801 else
1802 mes_v12_0_kiq_dequeue_sched(adev);
1803
1804 adev->mes.ring[0].sched.ready = false;
1805 }
1806
1807 mes_v12_0_enable(adev, false);
1808
1809 return 0;
1810}
1811
1812static int mes_v12_0_hw_init(struct amdgpu_ip_block *ip_block)
1813{
1814 int r;
1815 struct amdgpu_device *adev = ip_block->adev;
1816
1817 if (adev->mes.ring[0].sched.ready)
1818 goto out;
1819
1820 if (!adev->enable_mes_kiq) {
1821 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1822 r = mes_v12_0_load_microcode(adev,
1823 AMDGPU_MES_SCHED_PIPE, true);
1824 if (r) {
1825 DRM_ERROR("failed to MES fw, r=%d\n", r);
1826 return r;
1827 }
1828
1829 mes_v12_0_set_ucode_start_addr(adev);
1830
1831 } else if (adev->firmware.load_type ==
1832 AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
1833
1834 mes_v12_0_set_ucode_start_addr(adev);
1835 }
1836
1837 mes_v12_0_enable(adev, true);
1838 }
1839
1840 /* Enable the MES to handle doorbell ring on unmapped queue */
1841 mes_v12_0_enable_unmapped_doorbell_handling(&adev->mes, true);
1842
1843 r = mes_v12_0_queue_init(adev, AMDGPU_MES_SCHED_PIPE);
1844 if (r)
1845 goto failure;
1846
1847 r = mes_v12_0_set_hw_resources(&adev->mes, AMDGPU_MES_SCHED_PIPE);
1848 if (r)
1849 goto failure;
1850
1851 if ((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x4b)
1852 mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_SCHED_PIPE);
1853
1854 mes_v12_0_init_aggregated_doorbell(&adev->mes);
1855
1856 r = mes_v12_0_query_sched_status(&adev->mes, AMDGPU_MES_SCHED_PIPE);
1857 if (r) {
1858 DRM_ERROR("MES is busy\n");
1859 goto failure;
1860 }
1861
1862 r = amdgpu_mes_update_enforce_isolation(adev);
1863 if (r)
1864 goto failure;
1865
1866out:
1867 /*
1868 * Disable KIQ ring usage from the driver once MES is enabled.
1869 * MES uses KIQ ring exclusively so driver cannot access KIQ ring
1870 * with MES enabled.
1871 */
1872 adev->gfx.kiq[0].ring.sched.ready = false;
1873 adev->mes.ring[0].sched.ready = true;
1874
1875 return 0;
1876
1877failure:
1878 mes_v12_0_hw_fini(ip_block);
1879 return r;
1880}
1881
1882static int mes_v12_0_hw_fini(struct amdgpu_ip_block *ip_block)
1883{
1884 return 0;
1885}
1886
1887static int mes_v12_0_suspend(struct amdgpu_ip_block *ip_block)
1888{
1889 return mes_v12_0_hw_fini(ip_block);
1890}
1891
1892static int mes_v12_0_resume(struct amdgpu_ip_block *ip_block)
1893{
1894 return mes_v12_0_hw_init(ip_block);
1895}
1896
1897static int mes_v12_0_early_init(struct amdgpu_ip_block *ip_block)
1898{
1899 struct amdgpu_device *adev = ip_block->adev;
1900 int pipe, r;
1901
1902 adev->mes.hung_queue_db_array_size =
1903 MES12_HUNG_DB_OFFSET_ARRAY_SIZE;
1904 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
1905 r = amdgpu_mes_init_microcode(adev, pipe);
1906 if (r)
1907 return r;
1908 }
1909
1910 return 0;
1911}
1912
1913static const struct amd_ip_funcs mes_v12_0_ip_funcs = {
1914 .name = "mes_v12_0",
1915 .early_init = mes_v12_0_early_init,
1916 .late_init = NULL,
1917 .sw_init = mes_v12_0_sw_init,
1918 .sw_fini = mes_v12_0_sw_fini,
1919 .hw_init = mes_v12_0_hw_init,
1920 .hw_fini = mes_v12_0_hw_fini,
1921 .suspend = mes_v12_0_suspend,
1922 .resume = mes_v12_0_resume,
1923};
1924
1925const struct amdgpu_ip_block_version mes_v12_0_ip_block = {
1926 .type = AMD_IP_BLOCK_TYPE_MES,
1927 .major = 12,
1928 .minor = 0,
1929 .rev = 0,
1930 .funcs = &mes_v12_0_ip_funcs,
1931};