Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24#include <linux/ratelimit.h>
25#include <linux/printk.h>
26#include <linux/slab.h>
27#include <linux/list.h>
28#include <linux/types.h>
29#include <linux/bitops.h>
30#include <linux/sched.h>
31#include "kfd_priv.h"
32#include "kfd_device_queue_manager.h"
33#include "kfd_mqd_manager.h"
34#include "cik_regs.h"
35#include "kfd_kernel_queue.h"
36#include "amdgpu_amdkfd.h"
37
38/* Size of the per-pipe EOP queue */
39#define CIK_HPD_EOP_BYTES_LOG2 11
40#define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
41
42static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
43 u32 pasid, unsigned int vmid);
44
45static int execute_queues_cpsch(struct device_queue_manager *dqm,
46 enum kfd_unmap_queues_filter filter,
47 uint32_t filter_param);
48static int unmap_queues_cpsch(struct device_queue_manager *dqm,
49 enum kfd_unmap_queues_filter filter,
50 uint32_t filter_param, bool reset);
51
52static int map_queues_cpsch(struct device_queue_manager *dqm);
53
54static void deallocate_sdma_queue(struct device_queue_manager *dqm,
55 struct queue *q);
56
57static inline void deallocate_hqd(struct device_queue_manager *dqm,
58 struct queue *q);
59static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q);
60static int allocate_sdma_queue(struct device_queue_manager *dqm,
61 struct queue *q);
62static void kfd_process_hw_exception(struct work_struct *work);
63
64static inline
65enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
66{
67 if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI)
68 return KFD_MQD_TYPE_SDMA;
69 return KFD_MQD_TYPE_CP;
70}
71
72static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
73{
74 int i;
75 int pipe_offset = (mec * dqm->dev->shared_resources.num_pipe_per_mec
76 + pipe) * dqm->dev->shared_resources.num_queue_per_pipe;
77
78 /* queue is available for KFD usage if bit is 1 */
79 for (i = 0; i < dqm->dev->shared_resources.num_queue_per_pipe; ++i)
80 if (test_bit(pipe_offset + i,
81 dqm->dev->shared_resources.cp_queue_bitmap))
82 return true;
83 return false;
84}
85
86unsigned int get_cp_queues_num(struct device_queue_manager *dqm)
87{
88 return bitmap_weight(dqm->dev->shared_resources.cp_queue_bitmap,
89 KGD_MAX_QUEUES);
90}
91
92unsigned int get_queues_per_pipe(struct device_queue_manager *dqm)
93{
94 return dqm->dev->shared_resources.num_queue_per_pipe;
95}
96
97unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
98{
99 return dqm->dev->shared_resources.num_pipe_per_mec;
100}
101
102static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm)
103{
104 return kfd_get_num_sdma_engines(dqm->dev) +
105 kfd_get_num_xgmi_sdma_engines(dqm->dev);
106}
107
108unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
109{
110 return kfd_get_num_sdma_engines(dqm->dev) *
111 dqm->dev->device_info.num_sdma_queues_per_engine;
112}
113
114unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)
115{
116 return kfd_get_num_xgmi_sdma_engines(dqm->dev) *
117 dqm->dev->device_info.num_sdma_queues_per_engine;
118}
119
120void program_sh_mem_settings(struct device_queue_manager *dqm,
121 struct qcm_process_device *qpd)
122{
123 return dqm->dev->kfd2kgd->program_sh_mem_settings(
124 dqm->dev->adev, qpd->vmid,
125 qpd->sh_mem_config,
126 qpd->sh_mem_ape1_base,
127 qpd->sh_mem_ape1_limit,
128 qpd->sh_mem_bases);
129}
130
131static void increment_queue_count(struct device_queue_manager *dqm,
132 enum kfd_queue_type type)
133{
134 dqm->active_queue_count++;
135 if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ)
136 dqm->active_cp_queue_count++;
137}
138
139static void decrement_queue_count(struct device_queue_manager *dqm,
140 enum kfd_queue_type type)
141{
142 dqm->active_queue_count--;
143 if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ)
144 dqm->active_cp_queue_count--;
145}
146
147static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
148{
149 struct kfd_dev *dev = qpd->dqm->dev;
150
151 if (!KFD_IS_SOC15(dev)) {
152 /* On pre-SOC15 chips we need to use the queue ID to
153 * preserve the user mode ABI.
154 */
155 q->doorbell_id = q->properties.queue_id;
156 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
157 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
158 /* For SDMA queues on SOC15 with 8-byte doorbell, use static
159 * doorbell assignments based on the engine and queue id.
160 * The doobell index distance between RLC (2*i) and (2*i+1)
161 * for a SDMA engine is 512.
162 */
163 uint32_t *idx_offset =
164 dev->shared_resources.sdma_doorbell_idx;
165
166 q->doorbell_id = idx_offset[q->properties.sdma_engine_id]
167 + (q->properties.sdma_queue_id & 1)
168 * KFD_QUEUE_DOORBELL_MIRROR_OFFSET
169 + (q->properties.sdma_queue_id >> 1);
170 } else {
171 /* For CP queues on SOC15 reserve a free doorbell ID */
172 unsigned int found;
173
174 found = find_first_zero_bit(qpd->doorbell_bitmap,
175 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
176 if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
177 pr_debug("No doorbells available");
178 return -EBUSY;
179 }
180 set_bit(found, qpd->doorbell_bitmap);
181 q->doorbell_id = found;
182 }
183
184 q->properties.doorbell_off =
185 kfd_get_doorbell_dw_offset_in_bar(dev, qpd_to_pdd(qpd),
186 q->doorbell_id);
187 return 0;
188}
189
190static void deallocate_doorbell(struct qcm_process_device *qpd,
191 struct queue *q)
192{
193 unsigned int old;
194 struct kfd_dev *dev = qpd->dqm->dev;
195
196 if (!KFD_IS_SOC15(dev) ||
197 q->properties.type == KFD_QUEUE_TYPE_SDMA ||
198 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
199 return;
200
201 old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap);
202 WARN_ON(!old);
203}
204
205static void program_trap_handler_settings(struct device_queue_manager *dqm,
206 struct qcm_process_device *qpd)
207{
208 if (dqm->dev->kfd2kgd->program_trap_handler_settings)
209 dqm->dev->kfd2kgd->program_trap_handler_settings(
210 dqm->dev->adev, qpd->vmid,
211 qpd->tba_addr, qpd->tma_addr);
212}
213
214static int allocate_vmid(struct device_queue_manager *dqm,
215 struct qcm_process_device *qpd,
216 struct queue *q)
217{
218 int allocated_vmid = -1, i;
219
220 for (i = dqm->dev->vm_info.first_vmid_kfd;
221 i <= dqm->dev->vm_info.last_vmid_kfd; i++) {
222 if (!dqm->vmid_pasid[i]) {
223 allocated_vmid = i;
224 break;
225 }
226 }
227
228 if (allocated_vmid < 0) {
229 pr_err("no more vmid to allocate\n");
230 return -ENOSPC;
231 }
232
233 pr_debug("vmid allocated: %d\n", allocated_vmid);
234
235 dqm->vmid_pasid[allocated_vmid] = q->process->pasid;
236
237 set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid);
238
239 qpd->vmid = allocated_vmid;
240 q->properties.vmid = allocated_vmid;
241
242 program_sh_mem_settings(dqm, qpd);
243
244 if (KFD_IS_SOC15(dqm->dev) && dqm->dev->cwsr_enabled)
245 program_trap_handler_settings(dqm, qpd);
246
247 /* qpd->page_table_base is set earlier when register_process()
248 * is called, i.e. when the first queue is created.
249 */
250 dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->adev,
251 qpd->vmid,
252 qpd->page_table_base);
253 /* invalidate the VM context after pasid and vmid mapping is set up */
254 kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
255
256 if (dqm->dev->kfd2kgd->set_scratch_backing_va)
257 dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev,
258 qpd->sh_hidden_private_base, qpd->vmid);
259
260 return 0;
261}
262
263static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
264 struct qcm_process_device *qpd)
265{
266 const struct packet_manager_funcs *pmf = qpd->dqm->packet_mgr.pmf;
267 int ret;
268
269 if (!qpd->ib_kaddr)
270 return -ENOMEM;
271
272 ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
273 if (ret)
274 return ret;
275
276 return amdgpu_amdkfd_submit_ib(kdev->adev, KGD_ENGINE_MEC1, qpd->vmid,
277 qpd->ib_base, (uint32_t *)qpd->ib_kaddr,
278 pmf->release_mem_size / sizeof(uint32_t));
279}
280
281static void deallocate_vmid(struct device_queue_manager *dqm,
282 struct qcm_process_device *qpd,
283 struct queue *q)
284{
285 /* On GFX v7, CP doesn't flush TC at dequeue */
286 if (q->device->adev->asic_type == CHIP_HAWAII)
287 if (flush_texture_cache_nocpsch(q->device, qpd))
288 pr_err("Failed to flush TC\n");
289
290 kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
291
292 /* Release the vmid mapping */
293 set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
294 dqm->vmid_pasid[qpd->vmid] = 0;
295
296 qpd->vmid = 0;
297 q->properties.vmid = 0;
298}
299
300static int create_queue_nocpsch(struct device_queue_manager *dqm,
301 struct queue *q,
302 struct qcm_process_device *qpd)
303{
304 struct mqd_manager *mqd_mgr;
305 int retval;
306
307 dqm_lock(dqm);
308
309 if (dqm->total_queue_count >= max_num_of_queues_per_device) {
310 pr_warn("Can't create new usermode queue because %d queues were already created\n",
311 dqm->total_queue_count);
312 retval = -EPERM;
313 goto out_unlock;
314 }
315
316 if (list_empty(&qpd->queues_list)) {
317 retval = allocate_vmid(dqm, qpd, q);
318 if (retval)
319 goto out_unlock;
320 }
321 q->properties.vmid = qpd->vmid;
322 /*
323 * Eviction state logic: mark all queues as evicted, even ones
324 * not currently active. Restoring inactive queues later only
325 * updates the is_evicted flag but is a no-op otherwise.
326 */
327 q->properties.is_evicted = !!qpd->evicted;
328
329 q->properties.tba_addr = qpd->tba_addr;
330 q->properties.tma_addr = qpd->tma_addr;
331
332 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
333 q->properties.type)];
334 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
335 retval = allocate_hqd(dqm, q);
336 if (retval)
337 goto deallocate_vmid;
338 pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
339 q->pipe, q->queue);
340 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
341 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
342 retval = allocate_sdma_queue(dqm, q);
343 if (retval)
344 goto deallocate_vmid;
345 dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
346 }
347
348 retval = allocate_doorbell(qpd, q);
349 if (retval)
350 goto out_deallocate_hqd;
351
352 /* Temporarily release dqm lock to avoid a circular lock dependency */
353 dqm_unlock(dqm);
354 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties);
355 dqm_lock(dqm);
356
357 if (!q->mqd_mem_obj) {
358 retval = -ENOMEM;
359 goto out_deallocate_doorbell;
360 }
361 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
362 &q->gart_mqd_addr, &q->properties);
363 if (q->properties.is_active) {
364 if (!dqm->sched_running) {
365 WARN_ONCE(1, "Load non-HWS mqd while stopped\n");
366 goto add_queue_to_list;
367 }
368
369 if (WARN(q->process->mm != current->mm,
370 "should only run in user thread"))
371 retval = -EFAULT;
372 else
373 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
374 q->queue, &q->properties, current->mm);
375 if (retval)
376 goto out_free_mqd;
377 }
378
379add_queue_to_list:
380 list_add(&q->list, &qpd->queues_list);
381 qpd->queue_count++;
382 if (q->properties.is_active)
383 increment_queue_count(dqm, q->properties.type);
384
385 /*
386 * Unconditionally increment this counter, regardless of the queue's
387 * type or whether the queue is active.
388 */
389 dqm->total_queue_count++;
390 pr_debug("Total of %d queues are accountable so far\n",
391 dqm->total_queue_count);
392 goto out_unlock;
393
394out_free_mqd:
395 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
396out_deallocate_doorbell:
397 deallocate_doorbell(qpd, q);
398out_deallocate_hqd:
399 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
400 deallocate_hqd(dqm, q);
401 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
402 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
403 deallocate_sdma_queue(dqm, q);
404deallocate_vmid:
405 if (list_empty(&qpd->queues_list))
406 deallocate_vmid(dqm, qpd, q);
407out_unlock:
408 dqm_unlock(dqm);
409 return retval;
410}
411
412static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
413{
414 bool set;
415 int pipe, bit, i;
416
417 set = false;
418
419 for (pipe = dqm->next_pipe_to_allocate, i = 0;
420 i < get_pipes_per_mec(dqm);
421 pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) {
422
423 if (!is_pipe_enabled(dqm, 0, pipe))
424 continue;
425
426 if (dqm->allocated_queues[pipe] != 0) {
427 bit = ffs(dqm->allocated_queues[pipe]) - 1;
428 dqm->allocated_queues[pipe] &= ~(1 << bit);
429 q->pipe = pipe;
430 q->queue = bit;
431 set = true;
432 break;
433 }
434 }
435
436 if (!set)
437 return -EBUSY;
438
439 pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue);
440 /* horizontal hqd allocation */
441 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm);
442
443 return 0;
444}
445
446static inline void deallocate_hqd(struct device_queue_manager *dqm,
447 struct queue *q)
448{
449 dqm->allocated_queues[q->pipe] |= (1 << q->queue);
450}
451
452/* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked
453 * to avoid asynchronized access
454 */
455static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
456 struct qcm_process_device *qpd,
457 struct queue *q)
458{
459 int retval;
460 struct mqd_manager *mqd_mgr;
461
462 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
463 q->properties.type)];
464
465 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
466 deallocate_hqd(dqm, q);
467 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
468 deallocate_sdma_queue(dqm, q);
469 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
470 deallocate_sdma_queue(dqm, q);
471 else {
472 pr_debug("q->properties.type %d is invalid\n",
473 q->properties.type);
474 return -EINVAL;
475 }
476 dqm->total_queue_count--;
477
478 deallocate_doorbell(qpd, q);
479
480 if (!dqm->sched_running) {
481 WARN_ONCE(1, "Destroy non-HWS queue while stopped\n");
482 return 0;
483 }
484
485 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
486 KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
487 KFD_UNMAP_LATENCY_MS,
488 q->pipe, q->queue);
489 if (retval == -ETIME)
490 qpd->reset_wavefronts = true;
491
492 list_del(&q->list);
493 if (list_empty(&qpd->queues_list)) {
494 if (qpd->reset_wavefronts) {
495 pr_warn("Resetting wave fronts (nocpsch) on dev %p\n",
496 dqm->dev);
497 /* dbgdev_wave_reset_wavefronts has to be called before
498 * deallocate_vmid(), i.e. when vmid is still in use.
499 */
500 dbgdev_wave_reset_wavefronts(dqm->dev,
501 qpd->pqm->process);
502 qpd->reset_wavefronts = false;
503 }
504
505 deallocate_vmid(dqm, qpd, q);
506 }
507 qpd->queue_count--;
508 if (q->properties.is_active) {
509 decrement_queue_count(dqm, q->properties.type);
510 if (q->properties.is_gws) {
511 dqm->gws_queue_count--;
512 qpd->mapped_gws_queue = false;
513 }
514 }
515
516 return retval;
517}
518
519static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
520 struct qcm_process_device *qpd,
521 struct queue *q)
522{
523 int retval;
524 uint64_t sdma_val = 0;
525 struct kfd_process_device *pdd = qpd_to_pdd(qpd);
526 struct mqd_manager *mqd_mgr =
527 dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)];
528
529 /* Get the SDMA queue stats */
530 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
531 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
532 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
533 &sdma_val);
534 if (retval)
535 pr_err("Failed to read SDMA queue counter for queue: %d\n",
536 q->properties.queue_id);
537 }
538
539 dqm_lock(dqm);
540 retval = destroy_queue_nocpsch_locked(dqm, qpd, q);
541 if (!retval)
542 pdd->sdma_past_activity_counter += sdma_val;
543 dqm_unlock(dqm);
544
545 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
546
547 return retval;
548}
549
550static int update_queue(struct device_queue_manager *dqm, struct queue *q,
551 struct mqd_update_info *minfo)
552{
553 int retval = 0;
554 struct mqd_manager *mqd_mgr;
555 struct kfd_process_device *pdd;
556 bool prev_active = false;
557
558 dqm_lock(dqm);
559 pdd = kfd_get_process_device_data(q->device, q->process);
560 if (!pdd) {
561 retval = -ENODEV;
562 goto out_unlock;
563 }
564 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
565 q->properties.type)];
566
567 /* Save previous activity state for counters */
568 prev_active = q->properties.is_active;
569
570 /* Make sure the queue is unmapped before updating the MQD */
571 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
572 retval = unmap_queues_cpsch(dqm,
573 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false);
574 if (retval) {
575 pr_err("unmap queue failed\n");
576 goto out_unlock;
577 }
578 } else if (prev_active &&
579 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
580 q->properties.type == KFD_QUEUE_TYPE_SDMA ||
581 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
582
583 if (!dqm->sched_running) {
584 WARN_ONCE(1, "Update non-HWS queue while stopped\n");
585 goto out_unlock;
586 }
587
588 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
589 (dqm->dev->cwsr_enabled?
590 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
591 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
592 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
593 if (retval) {
594 pr_err("destroy mqd failed\n");
595 goto out_unlock;
596 }
597 }
598
599 mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties, minfo);
600
601 /*
602 * check active state vs. the previous state and modify
603 * counter accordingly. map_queues_cpsch uses the
604 * dqm->active_queue_count to determine whether a new runlist must be
605 * uploaded.
606 */
607 if (q->properties.is_active && !prev_active)
608 increment_queue_count(dqm, q->properties.type);
609 else if (!q->properties.is_active && prev_active)
610 decrement_queue_count(dqm, q->properties.type);
611
612 if (q->gws && !q->properties.is_gws) {
613 if (q->properties.is_active) {
614 dqm->gws_queue_count++;
615 pdd->qpd.mapped_gws_queue = true;
616 }
617 q->properties.is_gws = true;
618 } else if (!q->gws && q->properties.is_gws) {
619 if (q->properties.is_active) {
620 dqm->gws_queue_count--;
621 pdd->qpd.mapped_gws_queue = false;
622 }
623 q->properties.is_gws = false;
624 }
625
626 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
627 retval = map_queues_cpsch(dqm);
628 else if (q->properties.is_active &&
629 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
630 q->properties.type == KFD_QUEUE_TYPE_SDMA ||
631 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
632 if (WARN(q->process->mm != current->mm,
633 "should only run in user thread"))
634 retval = -EFAULT;
635 else
636 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd,
637 q->pipe, q->queue,
638 &q->properties, current->mm);
639 }
640
641out_unlock:
642 dqm_unlock(dqm);
643 return retval;
644}
645
646static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
647 struct qcm_process_device *qpd)
648{
649 struct queue *q;
650 struct mqd_manager *mqd_mgr;
651 struct kfd_process_device *pdd;
652 int retval, ret = 0;
653
654 dqm_lock(dqm);
655 if (qpd->evicted++ > 0) /* already evicted, do nothing */
656 goto out;
657
658 pdd = qpd_to_pdd(qpd);
659 pr_debug_ratelimited("Evicting PASID 0x%x queues\n",
660 pdd->process->pasid);
661
662 pdd->last_evict_timestamp = get_jiffies_64();
663 /* Mark all queues as evicted. Deactivate all active queues on
664 * the qpd.
665 */
666 list_for_each_entry(q, &qpd->queues_list, list) {
667 q->properties.is_evicted = true;
668 if (!q->properties.is_active)
669 continue;
670
671 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
672 q->properties.type)];
673 q->properties.is_active = false;
674 decrement_queue_count(dqm, q->properties.type);
675 if (q->properties.is_gws) {
676 dqm->gws_queue_count--;
677 qpd->mapped_gws_queue = false;
678 }
679
680 if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n"))
681 continue;
682
683 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
684 (dqm->dev->cwsr_enabled?
685 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
686 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
687 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
688 if (retval && !ret)
689 /* Return the first error, but keep going to
690 * maintain a consistent eviction state
691 */
692 ret = retval;
693 }
694
695out:
696 dqm_unlock(dqm);
697 return ret;
698}
699
700static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
701 struct qcm_process_device *qpd)
702{
703 struct queue *q;
704 struct kfd_process_device *pdd;
705 int retval = 0;
706
707 dqm_lock(dqm);
708 if (qpd->evicted++ > 0) /* already evicted, do nothing */
709 goto out;
710
711 pdd = qpd_to_pdd(qpd);
712 pr_debug_ratelimited("Evicting PASID 0x%x queues\n",
713 pdd->process->pasid);
714
715 /* Mark all queues as evicted. Deactivate all active queues on
716 * the qpd.
717 */
718 list_for_each_entry(q, &qpd->queues_list, list) {
719 q->properties.is_evicted = true;
720 if (!q->properties.is_active)
721 continue;
722
723 q->properties.is_active = false;
724 decrement_queue_count(dqm, q->properties.type);
725 }
726 pdd->last_evict_timestamp = get_jiffies_64();
727 retval = execute_queues_cpsch(dqm,
728 qpd->is_debug ?
729 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
730 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
731
732out:
733 dqm_unlock(dqm);
734 return retval;
735}
736
737static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
738 struct qcm_process_device *qpd)
739{
740 struct mm_struct *mm = NULL;
741 struct queue *q;
742 struct mqd_manager *mqd_mgr;
743 struct kfd_process_device *pdd;
744 uint64_t pd_base;
745 uint64_t eviction_duration;
746 int retval, ret = 0;
747
748 pdd = qpd_to_pdd(qpd);
749 /* Retrieve PD base */
750 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
751
752 dqm_lock(dqm);
753 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
754 goto out;
755 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
756 qpd->evicted--;
757 goto out;
758 }
759
760 pr_debug_ratelimited("Restoring PASID 0x%x queues\n",
761 pdd->process->pasid);
762
763 /* Update PD Base in QPD */
764 qpd->page_table_base = pd_base;
765 pr_debug("Updated PD address to 0x%llx\n", pd_base);
766
767 if (!list_empty(&qpd->queues_list)) {
768 dqm->dev->kfd2kgd->set_vm_context_page_table_base(
769 dqm->dev->adev,
770 qpd->vmid,
771 qpd->page_table_base);
772 kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
773 }
774
775 /* Take a safe reference to the mm_struct, which may otherwise
776 * disappear even while the kfd_process is still referenced.
777 */
778 mm = get_task_mm(pdd->process->lead_thread);
779 if (!mm) {
780 ret = -EFAULT;
781 goto out;
782 }
783
784 /* Remove the eviction flags. Activate queues that are not
785 * inactive for other reasons.
786 */
787 list_for_each_entry(q, &qpd->queues_list, list) {
788 q->properties.is_evicted = false;
789 if (!QUEUE_IS_ACTIVE(q->properties))
790 continue;
791
792 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
793 q->properties.type)];
794 q->properties.is_active = true;
795 increment_queue_count(dqm, q->properties.type);
796 if (q->properties.is_gws) {
797 dqm->gws_queue_count++;
798 qpd->mapped_gws_queue = true;
799 }
800
801 if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n"))
802 continue;
803
804 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
805 q->queue, &q->properties, mm);
806 if (retval && !ret)
807 /* Return the first error, but keep going to
808 * maintain a consistent eviction state
809 */
810 ret = retval;
811 }
812 qpd->evicted = 0;
813 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp;
814 atomic64_add(eviction_duration, &pdd->evict_duration_counter);
815out:
816 if (mm)
817 mmput(mm);
818 dqm_unlock(dqm);
819 return ret;
820}
821
822static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
823 struct qcm_process_device *qpd)
824{
825 struct queue *q;
826 struct kfd_process_device *pdd;
827 uint64_t pd_base;
828 uint64_t eviction_duration;
829 int retval = 0;
830
831 pdd = qpd_to_pdd(qpd);
832 /* Retrieve PD base */
833 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
834
835 dqm_lock(dqm);
836 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
837 goto out;
838 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
839 qpd->evicted--;
840 goto out;
841 }
842
843 pr_debug_ratelimited("Restoring PASID 0x%x queues\n",
844 pdd->process->pasid);
845
846 /* Update PD Base in QPD */
847 qpd->page_table_base = pd_base;
848 pr_debug("Updated PD address to 0x%llx\n", pd_base);
849
850 /* activate all active queues on the qpd */
851 list_for_each_entry(q, &qpd->queues_list, list) {
852 q->properties.is_evicted = false;
853 if (!QUEUE_IS_ACTIVE(q->properties))
854 continue;
855
856 q->properties.is_active = true;
857 increment_queue_count(dqm, q->properties.type);
858 }
859 retval = execute_queues_cpsch(dqm,
860 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
861 qpd->evicted = 0;
862 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp;
863 atomic64_add(eviction_duration, &pdd->evict_duration_counter);
864out:
865 dqm_unlock(dqm);
866 return retval;
867}
868
869static int register_process(struct device_queue_manager *dqm,
870 struct qcm_process_device *qpd)
871{
872 struct device_process_node *n;
873 struct kfd_process_device *pdd;
874 uint64_t pd_base;
875 int retval;
876
877 n = kzalloc(sizeof(*n), GFP_KERNEL);
878 if (!n)
879 return -ENOMEM;
880
881 n->qpd = qpd;
882
883 pdd = qpd_to_pdd(qpd);
884 /* Retrieve PD base */
885 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
886
887 dqm_lock(dqm);
888 list_add(&n->list, &dqm->queues);
889
890 /* Update PD Base in QPD */
891 qpd->page_table_base = pd_base;
892 pr_debug("Updated PD address to 0x%llx\n", pd_base);
893
894 retval = dqm->asic_ops.update_qpd(dqm, qpd);
895
896 dqm->processes_count++;
897
898 dqm_unlock(dqm);
899
900 /* Outside the DQM lock because under the DQM lock we can't do
901 * reclaim or take other locks that others hold while reclaiming.
902 */
903 kfd_inc_compute_active(dqm->dev);
904
905 return retval;
906}
907
908static int unregister_process(struct device_queue_manager *dqm,
909 struct qcm_process_device *qpd)
910{
911 int retval;
912 struct device_process_node *cur, *next;
913
914 pr_debug("qpd->queues_list is %s\n",
915 list_empty(&qpd->queues_list) ? "empty" : "not empty");
916
917 retval = 0;
918 dqm_lock(dqm);
919
920 list_for_each_entry_safe(cur, next, &dqm->queues, list) {
921 if (qpd == cur->qpd) {
922 list_del(&cur->list);
923 kfree(cur);
924 dqm->processes_count--;
925 goto out;
926 }
927 }
928 /* qpd not found in dqm list */
929 retval = 1;
930out:
931 dqm_unlock(dqm);
932
933 /* Outside the DQM lock because under the DQM lock we can't do
934 * reclaim or take other locks that others hold while reclaiming.
935 */
936 if (!retval)
937 kfd_dec_compute_active(dqm->dev);
938
939 return retval;
940}
941
942static int
943set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid,
944 unsigned int vmid)
945{
946 return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
947 dqm->dev->adev, pasid, vmid);
948}
949
950static void init_interrupts(struct device_queue_manager *dqm)
951{
952 unsigned int i;
953
954 for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++)
955 if (is_pipe_enabled(dqm, 0, i))
956 dqm->dev->kfd2kgd->init_interrupts(dqm->dev->adev, i);
957}
958
959static int initialize_nocpsch(struct device_queue_manager *dqm)
960{
961 int pipe, queue;
962
963 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
964
965 dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm),
966 sizeof(unsigned int), GFP_KERNEL);
967 if (!dqm->allocated_queues)
968 return -ENOMEM;
969
970 mutex_init(&dqm->lock_hidden);
971 INIT_LIST_HEAD(&dqm->queues);
972 dqm->active_queue_count = dqm->next_pipe_to_allocate = 0;
973 dqm->active_cp_queue_count = 0;
974 dqm->gws_queue_count = 0;
975
976 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
977 int pipe_offset = pipe * get_queues_per_pipe(dqm);
978
979 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++)
980 if (test_bit(pipe_offset + queue,
981 dqm->dev->shared_resources.cp_queue_bitmap))
982 dqm->allocated_queues[pipe] |= 1 << queue;
983 }
984
985 memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid));
986
987 dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
988 dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
989
990 return 0;
991}
992
993static void uninitialize(struct device_queue_manager *dqm)
994{
995 int i;
996
997 WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0);
998
999 kfree(dqm->allocated_queues);
1000 for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
1001 kfree(dqm->mqd_mgrs[i]);
1002 mutex_destroy(&dqm->lock_hidden);
1003}
1004
1005static int start_nocpsch(struct device_queue_manager *dqm)
1006{
1007 int r = 0;
1008
1009 pr_info("SW scheduler is used");
1010 init_interrupts(dqm);
1011
1012 if (dqm->dev->adev->asic_type == CHIP_HAWAII)
1013 r = pm_init(&dqm->packet_mgr, dqm);
1014 if (!r)
1015 dqm->sched_running = true;
1016
1017 return r;
1018}
1019
1020static int stop_nocpsch(struct device_queue_manager *dqm)
1021{
1022 if (dqm->dev->adev->asic_type == CHIP_HAWAII)
1023 pm_uninit(&dqm->packet_mgr, false);
1024 dqm->sched_running = false;
1025
1026 return 0;
1027}
1028
1029static void pre_reset(struct device_queue_manager *dqm)
1030{
1031 dqm_lock(dqm);
1032 dqm->is_resetting = true;
1033 dqm_unlock(dqm);
1034}
1035
1036static int allocate_sdma_queue(struct device_queue_manager *dqm,
1037 struct queue *q)
1038{
1039 int bit;
1040
1041 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1042 if (dqm->sdma_bitmap == 0) {
1043 pr_err("No more SDMA queue to allocate\n");
1044 return -ENOMEM;
1045 }
1046
1047 bit = __ffs64(dqm->sdma_bitmap);
1048 dqm->sdma_bitmap &= ~(1ULL << bit);
1049 q->sdma_id = bit;
1050 q->properties.sdma_engine_id = q->sdma_id %
1051 kfd_get_num_sdma_engines(dqm->dev);
1052 q->properties.sdma_queue_id = q->sdma_id /
1053 kfd_get_num_sdma_engines(dqm->dev);
1054 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1055 if (dqm->xgmi_sdma_bitmap == 0) {
1056 pr_err("No more XGMI SDMA queue to allocate\n");
1057 return -ENOMEM;
1058 }
1059 bit = __ffs64(dqm->xgmi_sdma_bitmap);
1060 dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
1061 q->sdma_id = bit;
1062 /* sdma_engine_id is sdma id including
1063 * both PCIe-optimized SDMAs and XGMI-
1064 * optimized SDMAs. The calculation below
1065 * assumes the first N engines are always
1066 * PCIe-optimized ones
1067 */
1068 q->properties.sdma_engine_id =
1069 kfd_get_num_sdma_engines(dqm->dev) +
1070 q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev);
1071 q->properties.sdma_queue_id = q->sdma_id /
1072 kfd_get_num_xgmi_sdma_engines(dqm->dev);
1073 }
1074
1075 pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
1076 pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
1077
1078 return 0;
1079}
1080
1081static void deallocate_sdma_queue(struct device_queue_manager *dqm,
1082 struct queue *q)
1083{
1084 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1085 if (q->sdma_id >= get_num_sdma_queues(dqm))
1086 return;
1087 dqm->sdma_bitmap |= (1ULL << q->sdma_id);
1088 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1089 if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm))
1090 return;
1091 dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id);
1092 }
1093}
1094
1095/*
1096 * Device Queue Manager implementation for cp scheduler
1097 */
1098
1099static int set_sched_resources(struct device_queue_manager *dqm)
1100{
1101 int i, mec;
1102 struct scheduling_resources res;
1103
1104 res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap;
1105
1106 res.queue_mask = 0;
1107 for (i = 0; i < KGD_MAX_QUEUES; ++i) {
1108 mec = (i / dqm->dev->shared_resources.num_queue_per_pipe)
1109 / dqm->dev->shared_resources.num_pipe_per_mec;
1110
1111 if (!test_bit(i, dqm->dev->shared_resources.cp_queue_bitmap))
1112 continue;
1113
1114 /* only acquire queues from the first MEC */
1115 if (mec > 0)
1116 continue;
1117
1118 /* This situation may be hit in the future if a new HW
1119 * generation exposes more than 64 queues. If so, the
1120 * definition of res.queue_mask needs updating
1121 */
1122 if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) {
1123 pr_err("Invalid queue enabled by amdgpu: %d\n", i);
1124 break;
1125 }
1126
1127 res.queue_mask |= 1ull
1128 << amdgpu_queue_mask_bit_to_set_resource_bit(
1129 dqm->dev->adev, i);
1130 }
1131 res.gws_mask = ~0ull;
1132 res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0;
1133
1134 pr_debug("Scheduling resources:\n"
1135 "vmid mask: 0x%8X\n"
1136 "queue mask: 0x%8llX\n",
1137 res.vmid_mask, res.queue_mask);
1138
1139 return pm_send_set_resources(&dqm->packet_mgr, &res);
1140}
1141
1142static int initialize_cpsch(struct device_queue_manager *dqm)
1143{
1144 uint64_t num_sdma_queues;
1145 uint64_t num_xgmi_sdma_queues;
1146
1147 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
1148
1149 mutex_init(&dqm->lock_hidden);
1150 INIT_LIST_HEAD(&dqm->queues);
1151 dqm->active_queue_count = dqm->processes_count = 0;
1152 dqm->active_cp_queue_count = 0;
1153 dqm->gws_queue_count = 0;
1154 dqm->active_runlist = false;
1155
1156 num_sdma_queues = get_num_sdma_queues(dqm);
1157 if (num_sdma_queues >= BITS_PER_TYPE(dqm->sdma_bitmap))
1158 dqm->sdma_bitmap = ULLONG_MAX;
1159 else
1160 dqm->sdma_bitmap = (BIT_ULL(num_sdma_queues) - 1);
1161
1162 num_xgmi_sdma_queues = get_num_xgmi_sdma_queues(dqm);
1163 if (num_xgmi_sdma_queues >= BITS_PER_TYPE(dqm->xgmi_sdma_bitmap))
1164 dqm->xgmi_sdma_bitmap = ULLONG_MAX;
1165 else
1166 dqm->xgmi_sdma_bitmap = (BIT_ULL(num_xgmi_sdma_queues) - 1);
1167
1168 INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
1169
1170 return 0;
1171}
1172
1173static int start_cpsch(struct device_queue_manager *dqm)
1174{
1175 int retval;
1176
1177 retval = 0;
1178
1179 dqm_lock(dqm);
1180 retval = pm_init(&dqm->packet_mgr, dqm);
1181 if (retval)
1182 goto fail_packet_manager_init;
1183
1184 retval = set_sched_resources(dqm);
1185 if (retval)
1186 goto fail_set_sched_resources;
1187
1188 pr_debug("Allocating fence memory\n");
1189
1190 /* allocate fence memory on the gart */
1191 retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr),
1192 &dqm->fence_mem);
1193
1194 if (retval)
1195 goto fail_allocate_vidmem;
1196
1197 dqm->fence_addr = (uint64_t *)dqm->fence_mem->cpu_ptr;
1198 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
1199
1200 init_interrupts(dqm);
1201
1202 /* clear hang status when driver try to start the hw scheduler */
1203 dqm->is_hws_hang = false;
1204 dqm->is_resetting = false;
1205 dqm->sched_running = true;
1206 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1207 dqm_unlock(dqm);
1208
1209 return 0;
1210fail_allocate_vidmem:
1211fail_set_sched_resources:
1212 pm_uninit(&dqm->packet_mgr, false);
1213fail_packet_manager_init:
1214 dqm_unlock(dqm);
1215 return retval;
1216}
1217
1218static int stop_cpsch(struct device_queue_manager *dqm)
1219{
1220 bool hanging;
1221
1222 dqm_lock(dqm);
1223 if (!dqm->sched_running) {
1224 dqm_unlock(dqm);
1225 return 0;
1226 }
1227
1228 if (!dqm->is_hws_hang)
1229 unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, false);
1230 hanging = dqm->is_hws_hang || dqm->is_resetting;
1231 dqm->sched_running = false;
1232
1233 pm_release_ib(&dqm->packet_mgr);
1234
1235 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
1236 pm_uninit(&dqm->packet_mgr, hanging);
1237 dqm_unlock(dqm);
1238
1239 return 0;
1240}
1241
1242static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
1243 struct kernel_queue *kq,
1244 struct qcm_process_device *qpd)
1245{
1246 dqm_lock(dqm);
1247 if (dqm->total_queue_count >= max_num_of_queues_per_device) {
1248 pr_warn("Can't create new kernel queue because %d queues were already created\n",
1249 dqm->total_queue_count);
1250 dqm_unlock(dqm);
1251 return -EPERM;
1252 }
1253
1254 /*
1255 * Unconditionally increment this counter, regardless of the queue's
1256 * type or whether the queue is active.
1257 */
1258 dqm->total_queue_count++;
1259 pr_debug("Total of %d queues are accountable so far\n",
1260 dqm->total_queue_count);
1261
1262 list_add(&kq->list, &qpd->priv_queue_list);
1263 increment_queue_count(dqm, kq->queue->properties.type);
1264 qpd->is_debug = true;
1265 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1266 dqm_unlock(dqm);
1267
1268 return 0;
1269}
1270
1271static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
1272 struct kernel_queue *kq,
1273 struct qcm_process_device *qpd)
1274{
1275 dqm_lock(dqm);
1276 list_del(&kq->list);
1277 decrement_queue_count(dqm, kq->queue->properties.type);
1278 qpd->is_debug = false;
1279 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
1280 /*
1281 * Unconditionally decrement this counter, regardless of the queue's
1282 * type.
1283 */
1284 dqm->total_queue_count--;
1285 pr_debug("Total of %d queues are accountable so far\n",
1286 dqm->total_queue_count);
1287 dqm_unlock(dqm);
1288}
1289
1290static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
1291 struct qcm_process_device *qpd)
1292{
1293 int retval;
1294 struct mqd_manager *mqd_mgr;
1295
1296 if (dqm->total_queue_count >= max_num_of_queues_per_device) {
1297 pr_warn("Can't create new usermode queue because %d queues were already created\n",
1298 dqm->total_queue_count);
1299 retval = -EPERM;
1300 goto out;
1301 }
1302
1303 if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1304 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1305 dqm_lock(dqm);
1306 retval = allocate_sdma_queue(dqm, q);
1307 dqm_unlock(dqm);
1308 if (retval)
1309 goto out;
1310 }
1311
1312 retval = allocate_doorbell(qpd, q);
1313 if (retval)
1314 goto out_deallocate_sdma_queue;
1315
1316 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1317 q->properties.type)];
1318
1319 if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1320 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
1321 dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
1322 q->properties.tba_addr = qpd->tba_addr;
1323 q->properties.tma_addr = qpd->tma_addr;
1324 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties);
1325 if (!q->mqd_mem_obj) {
1326 retval = -ENOMEM;
1327 goto out_deallocate_doorbell;
1328 }
1329
1330 dqm_lock(dqm);
1331 /*
1332 * Eviction state logic: mark all queues as evicted, even ones
1333 * not currently active. Restoring inactive queues later only
1334 * updates the is_evicted flag but is a no-op otherwise.
1335 */
1336 q->properties.is_evicted = !!qpd->evicted;
1337 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
1338 &q->gart_mqd_addr, &q->properties);
1339
1340 list_add(&q->list, &qpd->queues_list);
1341 qpd->queue_count++;
1342
1343 if (q->properties.is_active) {
1344 increment_queue_count(dqm, q->properties.type);
1345
1346 execute_queues_cpsch(dqm,
1347 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1348 }
1349
1350 /*
1351 * Unconditionally increment this counter, regardless of the queue's
1352 * type or whether the queue is active.
1353 */
1354 dqm->total_queue_count++;
1355
1356 pr_debug("Total of %d queues are accountable so far\n",
1357 dqm->total_queue_count);
1358
1359 dqm_unlock(dqm);
1360 return retval;
1361
1362out_deallocate_doorbell:
1363 deallocate_doorbell(qpd, q);
1364out_deallocate_sdma_queue:
1365 if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1366 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1367 dqm_lock(dqm);
1368 deallocate_sdma_queue(dqm, q);
1369 dqm_unlock(dqm);
1370 }
1371out:
1372 return retval;
1373}
1374
1375int amdkfd_fence_wait_timeout(uint64_t *fence_addr,
1376 uint64_t fence_value,
1377 unsigned int timeout_ms)
1378{
1379 unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies;
1380
1381 while (*fence_addr != fence_value) {
1382 if (time_after(jiffies, end_jiffies)) {
1383 pr_err("qcm fence wait loop timeout expired\n");
1384 /* In HWS case, this is used to halt the driver thread
1385 * in order not to mess up CP states before doing
1386 * scandumps for FW debugging.
1387 */
1388 while (halt_if_hws_hang)
1389 schedule();
1390
1391 return -ETIME;
1392 }
1393 schedule();
1394 }
1395
1396 return 0;
1397}
1398
1399/* dqm->lock mutex has to be locked before calling this function */
1400static int map_queues_cpsch(struct device_queue_manager *dqm)
1401{
1402 int retval;
1403
1404 if (!dqm->sched_running)
1405 return 0;
1406 if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0)
1407 return 0;
1408 if (dqm->active_runlist)
1409 return 0;
1410
1411 retval = pm_send_runlist(&dqm->packet_mgr, &dqm->queues);
1412 pr_debug("%s sent runlist\n", __func__);
1413 if (retval) {
1414 pr_err("failed to execute runlist\n");
1415 return retval;
1416 }
1417 dqm->active_runlist = true;
1418
1419 return retval;
1420}
1421
1422/* dqm->lock mutex has to be locked before calling this function */
1423static int unmap_queues_cpsch(struct device_queue_manager *dqm,
1424 enum kfd_unmap_queues_filter filter,
1425 uint32_t filter_param, bool reset)
1426{
1427 int retval = 0;
1428 struct mqd_manager *mqd_mgr;
1429
1430 if (!dqm->sched_running)
1431 return 0;
1432 if (dqm->is_hws_hang || dqm->is_resetting)
1433 return -EIO;
1434 if (!dqm->active_runlist)
1435 return retval;
1436
1437 retval = pm_send_unmap_queue(&dqm->packet_mgr, KFD_QUEUE_TYPE_COMPUTE,
1438 filter, filter_param, reset, 0);
1439 if (retval)
1440 return retval;
1441
1442 *dqm->fence_addr = KFD_FENCE_INIT;
1443 pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr,
1444 KFD_FENCE_COMPLETED);
1445 /* should be timed out */
1446 retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
1447 queue_preemption_timeout_ms);
1448 if (retval) {
1449 pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
1450 dqm->is_hws_hang = true;
1451 /* It's possible we're detecting a HWS hang in the
1452 * middle of a GPU reset. No need to schedule another
1453 * reset in this case.
1454 */
1455 if (!dqm->is_resetting)
1456 schedule_work(&dqm->hw_exception_work);
1457 return retval;
1458 }
1459
1460 /* In the current MEC firmware implementation, if compute queue
1461 * doesn't response to the preemption request in time, HIQ will
1462 * abandon the unmap request without returning any timeout error
1463 * to driver. Instead, MEC firmware will log the doorbell of the
1464 * unresponding compute queue to HIQ.MQD.queue_doorbell_id fields.
1465 * To make sure the queue unmap was successful, driver need to
1466 * check those fields
1467 */
1468 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
1469 if (mqd_mgr->read_doorbell_id(dqm->packet_mgr.priv_queue->queue->mqd)) {
1470 pr_err("HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n");
1471 while (halt_if_hws_hang)
1472 schedule();
1473 return -ETIME;
1474 }
1475
1476 pm_release_ib(&dqm->packet_mgr);
1477 dqm->active_runlist = false;
1478
1479 return retval;
1480}
1481
1482/* only for compute queue */
1483static int reset_queues_cpsch(struct device_queue_manager *dqm,
1484 uint16_t pasid)
1485{
1486 int retval;
1487
1488 dqm_lock(dqm);
1489
1490 retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID,
1491 pasid, true);
1492
1493 dqm_unlock(dqm);
1494 return retval;
1495}
1496
1497/* dqm->lock mutex has to be locked before calling this function */
1498static int execute_queues_cpsch(struct device_queue_manager *dqm,
1499 enum kfd_unmap_queues_filter filter,
1500 uint32_t filter_param)
1501{
1502 int retval;
1503
1504 if (dqm->is_hws_hang)
1505 return -EIO;
1506 retval = unmap_queues_cpsch(dqm, filter, filter_param, false);
1507 if (retval)
1508 return retval;
1509
1510 return map_queues_cpsch(dqm);
1511}
1512
1513static int destroy_queue_cpsch(struct device_queue_manager *dqm,
1514 struct qcm_process_device *qpd,
1515 struct queue *q)
1516{
1517 int retval;
1518 struct mqd_manager *mqd_mgr;
1519 uint64_t sdma_val = 0;
1520 struct kfd_process_device *pdd = qpd_to_pdd(qpd);
1521
1522 /* Get the SDMA queue stats */
1523 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
1524 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
1525 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
1526 &sdma_val);
1527 if (retval)
1528 pr_err("Failed to read SDMA queue counter for queue: %d\n",
1529 q->properties.queue_id);
1530 }
1531
1532 retval = 0;
1533
1534 /* remove queue from list to prevent rescheduling after preemption */
1535 dqm_lock(dqm);
1536
1537 if (qpd->is_debug) {
1538 /*
1539 * error, currently we do not allow to destroy a queue
1540 * of a currently debugged process
1541 */
1542 retval = -EBUSY;
1543 goto failed_try_destroy_debugged_queue;
1544
1545 }
1546
1547 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1548 q->properties.type)];
1549
1550 deallocate_doorbell(qpd, q);
1551
1552 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
1553 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
1554 deallocate_sdma_queue(dqm, q);
1555 pdd->sdma_past_activity_counter += sdma_val;
1556 }
1557
1558 list_del(&q->list);
1559 qpd->queue_count--;
1560 if (q->properties.is_active) {
1561 decrement_queue_count(dqm, q->properties.type);
1562 retval = execute_queues_cpsch(dqm,
1563 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1564 if (retval == -ETIME)
1565 qpd->reset_wavefronts = true;
1566 if (q->properties.is_gws) {
1567 dqm->gws_queue_count--;
1568 qpd->mapped_gws_queue = false;
1569 }
1570 }
1571
1572 /*
1573 * Unconditionally decrement this counter, regardless of the queue's
1574 * type
1575 */
1576 dqm->total_queue_count--;
1577 pr_debug("Total of %d queues are accountable so far\n",
1578 dqm->total_queue_count);
1579
1580 dqm_unlock(dqm);
1581
1582 /* Do free_mqd after dqm_unlock(dqm) to avoid circular locking */
1583 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1584
1585 return retval;
1586
1587failed_try_destroy_debugged_queue:
1588
1589 dqm_unlock(dqm);
1590 return retval;
1591}
1592
1593/*
1594 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
1595 * stay in user mode.
1596 */
1597#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
1598/* APE1 limit is inclusive and 64K aligned. */
1599#define APE1_LIMIT_ALIGNMENT 0xFFFF
1600
1601static bool set_cache_memory_policy(struct device_queue_manager *dqm,
1602 struct qcm_process_device *qpd,
1603 enum cache_policy default_policy,
1604 enum cache_policy alternate_policy,
1605 void __user *alternate_aperture_base,
1606 uint64_t alternate_aperture_size)
1607{
1608 bool retval = true;
1609
1610 if (!dqm->asic_ops.set_cache_memory_policy)
1611 return retval;
1612
1613 dqm_lock(dqm);
1614
1615 if (alternate_aperture_size == 0) {
1616 /* base > limit disables APE1 */
1617 qpd->sh_mem_ape1_base = 1;
1618 qpd->sh_mem_ape1_limit = 0;
1619 } else {
1620 /*
1621 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
1622 * SH_MEM_APE1_BASE[31:0], 0x0000 }
1623 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
1624 * SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
1625 * Verify that the base and size parameters can be
1626 * represented in this format and convert them.
1627 * Additionally restrict APE1 to user-mode addresses.
1628 */
1629
1630 uint64_t base = (uintptr_t)alternate_aperture_base;
1631 uint64_t limit = base + alternate_aperture_size - 1;
1632
1633 if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
1634 (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
1635 retval = false;
1636 goto out;
1637 }
1638
1639 qpd->sh_mem_ape1_base = base >> 16;
1640 qpd->sh_mem_ape1_limit = limit >> 16;
1641 }
1642
1643 retval = dqm->asic_ops.set_cache_memory_policy(
1644 dqm,
1645 qpd,
1646 default_policy,
1647 alternate_policy,
1648 alternate_aperture_base,
1649 alternate_aperture_size);
1650
1651 if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
1652 program_sh_mem_settings(dqm, qpd);
1653
1654 pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
1655 qpd->sh_mem_config, qpd->sh_mem_ape1_base,
1656 qpd->sh_mem_ape1_limit);
1657
1658out:
1659 dqm_unlock(dqm);
1660 return retval;
1661}
1662
1663static int process_termination_nocpsch(struct device_queue_manager *dqm,
1664 struct qcm_process_device *qpd)
1665{
1666 struct queue *q;
1667 struct device_process_node *cur, *next_dpn;
1668 int retval = 0;
1669 bool found = false;
1670
1671 dqm_lock(dqm);
1672
1673 /* Clear all user mode queues */
1674 while (!list_empty(&qpd->queues_list)) {
1675 struct mqd_manager *mqd_mgr;
1676 int ret;
1677
1678 q = list_first_entry(&qpd->queues_list, struct queue, list);
1679 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1680 q->properties.type)];
1681 ret = destroy_queue_nocpsch_locked(dqm, qpd, q);
1682 if (ret)
1683 retval = ret;
1684 dqm_unlock(dqm);
1685 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1686 dqm_lock(dqm);
1687 }
1688
1689 /* Unregister process */
1690 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
1691 if (qpd == cur->qpd) {
1692 list_del(&cur->list);
1693 kfree(cur);
1694 dqm->processes_count--;
1695 found = true;
1696 break;
1697 }
1698 }
1699
1700 dqm_unlock(dqm);
1701
1702 /* Outside the DQM lock because under the DQM lock we can't do
1703 * reclaim or take other locks that others hold while reclaiming.
1704 */
1705 if (found)
1706 kfd_dec_compute_active(dqm->dev);
1707
1708 return retval;
1709}
1710
1711static int get_wave_state(struct device_queue_manager *dqm,
1712 struct queue *q,
1713 void __user *ctl_stack,
1714 u32 *ctl_stack_used_size,
1715 u32 *save_area_used_size)
1716{
1717 struct mqd_manager *mqd_mgr;
1718
1719 dqm_lock(dqm);
1720
1721 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP];
1722
1723 if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE ||
1724 q->properties.is_active || !q->device->cwsr_enabled ||
1725 !mqd_mgr->get_wave_state) {
1726 dqm_unlock(dqm);
1727 return -EINVAL;
1728 }
1729
1730 dqm_unlock(dqm);
1731
1732 /*
1733 * get_wave_state is outside the dqm lock to prevent circular locking
1734 * and the queue should be protected against destruction by the process
1735 * lock.
1736 */
1737 return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack,
1738 ctl_stack_used_size, save_area_used_size);
1739}
1740
1741static int process_termination_cpsch(struct device_queue_manager *dqm,
1742 struct qcm_process_device *qpd)
1743{
1744 int retval;
1745 struct queue *q;
1746 struct kernel_queue *kq, *kq_next;
1747 struct mqd_manager *mqd_mgr;
1748 struct device_process_node *cur, *next_dpn;
1749 enum kfd_unmap_queues_filter filter =
1750 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
1751 bool found = false;
1752
1753 retval = 0;
1754
1755 dqm_lock(dqm);
1756
1757 /* Clean all kernel queues */
1758 list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) {
1759 list_del(&kq->list);
1760 decrement_queue_count(dqm, kq->queue->properties.type);
1761 qpd->is_debug = false;
1762 dqm->total_queue_count--;
1763 filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES;
1764 }
1765
1766 /* Clear all user mode queues */
1767 list_for_each_entry(q, &qpd->queues_list, list) {
1768 if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
1769 deallocate_sdma_queue(dqm, q);
1770 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
1771 deallocate_sdma_queue(dqm, q);
1772
1773 if (q->properties.is_active) {
1774 decrement_queue_count(dqm, q->properties.type);
1775 if (q->properties.is_gws) {
1776 dqm->gws_queue_count--;
1777 qpd->mapped_gws_queue = false;
1778 }
1779 }
1780
1781 dqm->total_queue_count--;
1782 }
1783
1784 /* Unregister process */
1785 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
1786 if (qpd == cur->qpd) {
1787 list_del(&cur->list);
1788 kfree(cur);
1789 dqm->processes_count--;
1790 found = true;
1791 break;
1792 }
1793 }
1794
1795 retval = execute_queues_cpsch(dqm, filter, 0);
1796 if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) {
1797 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
1798 dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process);
1799 qpd->reset_wavefronts = false;
1800 }
1801
1802 /* Lastly, free mqd resources.
1803 * Do free_mqd() after dqm_unlock to avoid circular locking.
1804 */
1805 while (!list_empty(&qpd->queues_list)) {
1806 q = list_first_entry(&qpd->queues_list, struct queue, list);
1807 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1808 q->properties.type)];
1809 list_del(&q->list);
1810 qpd->queue_count--;
1811 dqm_unlock(dqm);
1812 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1813 dqm_lock(dqm);
1814 }
1815 dqm_unlock(dqm);
1816
1817 /* Outside the DQM lock because under the DQM lock we can't do
1818 * reclaim or take other locks that others hold while reclaiming.
1819 */
1820 if (found)
1821 kfd_dec_compute_active(dqm->dev);
1822
1823 return retval;
1824}
1825
1826static int init_mqd_managers(struct device_queue_manager *dqm)
1827{
1828 int i, j;
1829 struct mqd_manager *mqd_mgr;
1830
1831 for (i = 0; i < KFD_MQD_TYPE_MAX; i++) {
1832 mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev);
1833 if (!mqd_mgr) {
1834 pr_err("mqd manager [%d] initialization failed\n", i);
1835 goto out_free;
1836 }
1837 dqm->mqd_mgrs[i] = mqd_mgr;
1838 }
1839
1840 return 0;
1841
1842out_free:
1843 for (j = 0; j < i; j++) {
1844 kfree(dqm->mqd_mgrs[j]);
1845 dqm->mqd_mgrs[j] = NULL;
1846 }
1847
1848 return -ENOMEM;
1849}
1850
1851/* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/
1852static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
1853{
1854 int retval;
1855 struct kfd_dev *dev = dqm->dev;
1856 struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
1857 uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
1858 get_num_all_sdma_engines(dqm) *
1859 dev->device_info.num_sdma_queues_per_engine +
1860 dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
1861
1862 retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size,
1863 &(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
1864 (void *)&(mem_obj->cpu_ptr), false);
1865
1866 return retval;
1867}
1868
1869struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
1870{
1871 struct device_queue_manager *dqm;
1872
1873 pr_debug("Loading device queue manager\n");
1874
1875 dqm = kzalloc(sizeof(*dqm), GFP_KERNEL);
1876 if (!dqm)
1877 return NULL;
1878
1879 switch (dev->adev->asic_type) {
1880 /* HWS is not available on Hawaii. */
1881 case CHIP_HAWAII:
1882 /* HWS depends on CWSR for timely dequeue. CWSR is not
1883 * available on Tonga.
1884 *
1885 * FIXME: This argument also applies to Kaveri.
1886 */
1887 case CHIP_TONGA:
1888 dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS;
1889 break;
1890 default:
1891 dqm->sched_policy = sched_policy;
1892 break;
1893 }
1894
1895 dqm->dev = dev;
1896 switch (dqm->sched_policy) {
1897 case KFD_SCHED_POLICY_HWS:
1898 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
1899 /* initialize dqm for cp scheduling */
1900 dqm->ops.create_queue = create_queue_cpsch;
1901 dqm->ops.initialize = initialize_cpsch;
1902 dqm->ops.start = start_cpsch;
1903 dqm->ops.stop = stop_cpsch;
1904 dqm->ops.pre_reset = pre_reset;
1905 dqm->ops.destroy_queue = destroy_queue_cpsch;
1906 dqm->ops.update_queue = update_queue;
1907 dqm->ops.register_process = register_process;
1908 dqm->ops.unregister_process = unregister_process;
1909 dqm->ops.uninitialize = uninitialize;
1910 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
1911 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
1912 dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
1913 dqm->ops.process_termination = process_termination_cpsch;
1914 dqm->ops.evict_process_queues = evict_process_queues_cpsch;
1915 dqm->ops.restore_process_queues = restore_process_queues_cpsch;
1916 dqm->ops.get_wave_state = get_wave_state;
1917 dqm->ops.reset_queues = reset_queues_cpsch;
1918 break;
1919 case KFD_SCHED_POLICY_NO_HWS:
1920 /* initialize dqm for no cp scheduling */
1921 dqm->ops.start = start_nocpsch;
1922 dqm->ops.stop = stop_nocpsch;
1923 dqm->ops.pre_reset = pre_reset;
1924 dqm->ops.create_queue = create_queue_nocpsch;
1925 dqm->ops.destroy_queue = destroy_queue_nocpsch;
1926 dqm->ops.update_queue = update_queue;
1927 dqm->ops.register_process = register_process;
1928 dqm->ops.unregister_process = unregister_process;
1929 dqm->ops.initialize = initialize_nocpsch;
1930 dqm->ops.uninitialize = uninitialize;
1931 dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
1932 dqm->ops.process_termination = process_termination_nocpsch;
1933 dqm->ops.evict_process_queues = evict_process_queues_nocpsch;
1934 dqm->ops.restore_process_queues =
1935 restore_process_queues_nocpsch;
1936 dqm->ops.get_wave_state = get_wave_state;
1937 break;
1938 default:
1939 pr_err("Invalid scheduling policy %d\n", dqm->sched_policy);
1940 goto out_free;
1941 }
1942
1943 switch (dev->adev->asic_type) {
1944 case CHIP_CARRIZO:
1945 device_queue_manager_init_vi(&dqm->asic_ops);
1946 break;
1947
1948 case CHIP_KAVERI:
1949 device_queue_manager_init_cik(&dqm->asic_ops);
1950 break;
1951
1952 case CHIP_HAWAII:
1953 device_queue_manager_init_cik_hawaii(&dqm->asic_ops);
1954 break;
1955
1956 case CHIP_TONGA:
1957 case CHIP_FIJI:
1958 case CHIP_POLARIS10:
1959 case CHIP_POLARIS11:
1960 case CHIP_POLARIS12:
1961 case CHIP_VEGAM:
1962 device_queue_manager_init_vi_tonga(&dqm->asic_ops);
1963 break;
1964
1965 default:
1966 if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1))
1967 device_queue_manager_init_v10_navi10(&dqm->asic_ops);
1968 else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1))
1969 device_queue_manager_init_v9(&dqm->asic_ops);
1970 else {
1971 WARN(1, "Unexpected ASIC family %u",
1972 dev->adev->asic_type);
1973 goto out_free;
1974 }
1975 }
1976
1977 if (init_mqd_managers(dqm))
1978 goto out_free;
1979
1980 if (allocate_hiq_sdma_mqd(dqm)) {
1981 pr_err("Failed to allocate hiq sdma mqd trunk buffer\n");
1982 goto out_free;
1983 }
1984
1985 if (!dqm->ops.initialize(dqm))
1986 return dqm;
1987
1988out_free:
1989 kfree(dqm);
1990 return NULL;
1991}
1992
1993static void deallocate_hiq_sdma_mqd(struct kfd_dev *dev,
1994 struct kfd_mem_obj *mqd)
1995{
1996 WARN(!mqd, "No hiq sdma mqd trunk to free");
1997
1998 amdgpu_amdkfd_free_gtt_mem(dev->adev, mqd->gtt_mem);
1999}
2000
2001void device_queue_manager_uninit(struct device_queue_manager *dqm)
2002{
2003 dqm->ops.uninitialize(dqm);
2004 deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd);
2005 kfree(dqm);
2006}
2007
2008int kfd_process_vm_fault(struct device_queue_manager *dqm, u32 pasid)
2009{
2010 struct kfd_process_device *pdd;
2011 struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
2012 int ret = 0;
2013
2014 if (!p)
2015 return -EINVAL;
2016 WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
2017 pdd = kfd_get_process_device_data(dqm->dev, p);
2018 if (pdd)
2019 ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd);
2020 kfd_unref_process(p);
2021
2022 return ret;
2023}
2024
2025static void kfd_process_hw_exception(struct work_struct *work)
2026{
2027 struct device_queue_manager *dqm = container_of(work,
2028 struct device_queue_manager, hw_exception_work);
2029 amdgpu_amdkfd_gpu_reset(dqm->dev->adev);
2030}
2031
2032#if defined(CONFIG_DEBUG_FS)
2033
2034static void seq_reg_dump(struct seq_file *m,
2035 uint32_t (*dump)[2], uint32_t n_regs)
2036{
2037 uint32_t i, count;
2038
2039 for (i = 0, count = 0; i < n_regs; i++) {
2040 if (count == 0 ||
2041 dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) {
2042 seq_printf(m, "%s %08x: %08x",
2043 i ? "\n" : "",
2044 dump[i][0], dump[i][1]);
2045 count = 7;
2046 } else {
2047 seq_printf(m, " %08x", dump[i][1]);
2048 count--;
2049 }
2050 }
2051
2052 seq_puts(m, "\n");
2053}
2054
2055int dqm_debugfs_hqds(struct seq_file *m, void *data)
2056{
2057 struct device_queue_manager *dqm = data;
2058 uint32_t (*dump)[2], n_regs;
2059 int pipe, queue;
2060 int r = 0;
2061
2062 if (!dqm->sched_running) {
2063 seq_printf(m, " Device is stopped\n");
2064
2065 return 0;
2066 }
2067
2068 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev,
2069 KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE,
2070 &dump, &n_regs);
2071 if (!r) {
2072 seq_printf(m, " HIQ on MEC %d Pipe %d Queue %d\n",
2073 KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1,
2074 KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm),
2075 KFD_CIK_HIQ_QUEUE);
2076 seq_reg_dump(m, dump, n_regs);
2077
2078 kfree(dump);
2079 }
2080
2081 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
2082 int pipe_offset = pipe * get_queues_per_pipe(dqm);
2083
2084 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) {
2085 if (!test_bit(pipe_offset + queue,
2086 dqm->dev->shared_resources.cp_queue_bitmap))
2087 continue;
2088
2089 r = dqm->dev->kfd2kgd->hqd_dump(
2090 dqm->dev->adev, pipe, queue, &dump, &n_regs);
2091 if (r)
2092 break;
2093
2094 seq_printf(m, " CP Pipe %d, Queue %d\n",
2095 pipe, queue);
2096 seq_reg_dump(m, dump, n_regs);
2097
2098 kfree(dump);
2099 }
2100 }
2101
2102 for (pipe = 0; pipe < get_num_all_sdma_engines(dqm); pipe++) {
2103 for (queue = 0;
2104 queue < dqm->dev->device_info.num_sdma_queues_per_engine;
2105 queue++) {
2106 r = dqm->dev->kfd2kgd->hqd_sdma_dump(
2107 dqm->dev->adev, pipe, queue, &dump, &n_regs);
2108 if (r)
2109 break;
2110
2111 seq_printf(m, " SDMA Engine %d, RLC %d\n",
2112 pipe, queue);
2113 seq_reg_dump(m, dump, n_regs);
2114
2115 kfree(dump);
2116 }
2117 }
2118
2119 return r;
2120}
2121
2122int dqm_debugfs_hang_hws(struct device_queue_manager *dqm)
2123{
2124 int r = 0;
2125
2126 dqm_lock(dqm);
2127 r = pm_debugfs_hang_hws(&dqm->packet_mgr);
2128 if (r) {
2129 dqm_unlock(dqm);
2130 return r;
2131 }
2132 dqm->active_runlist = true;
2133 r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
2134 dqm_unlock(dqm);
2135
2136 return r;
2137}
2138
2139#endif