Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23#include <linux/mutex.h>
24#include <linux/log2.h>
25#include <linux/sched.h>
26#include <linux/sched/mm.h>
27#include <linux/sched/task.h>
28#include <linux/slab.h>
29#include <linux/amd-iommu.h>
30#include <linux/notifier.h>
31#include <linux/compat.h>
32#include <linux/mman.h>
33#include <linux/file.h>
34#include <linux/pm_runtime.h>
35#include "amdgpu_amdkfd.h"
36#include "amdgpu.h"
37
38struct mm_struct;
39
40#include "kfd_priv.h"
41#include "kfd_device_queue_manager.h"
42#include "kfd_dbgmgr.h"
43#include "kfd_iommu.h"
44
45/*
46 * List of struct kfd_process (field kfd_process).
47 * Unique/indexed by mm_struct*
48 */
49DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
50static DEFINE_MUTEX(kfd_processes_mutex);
51
52DEFINE_SRCU(kfd_processes_srcu);
53
54/* For process termination handling */
55static struct workqueue_struct *kfd_process_wq;
56
57/* Ordered, single-threaded workqueue for restoring evicted
58 * processes. Restoring multiple processes concurrently under memory
59 * pressure can lead to processes blocking each other from validating
60 * their BOs and result in a live-lock situation where processes
61 * remain evicted indefinitely.
62 */
63static struct workqueue_struct *kfd_restore_wq;
64
65static struct kfd_process *find_process(const struct task_struct *thread);
66static void kfd_process_ref_release(struct kref *ref);
67static struct kfd_process *create_process(const struct task_struct *thread);
68static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep);
69
70static void evict_process_worker(struct work_struct *work);
71static void restore_process_worker(struct work_struct *work);
72
73struct kfd_procfs_tree {
74 struct kobject *kobj;
75};
76
77static struct kfd_procfs_tree procfs;
78
79static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
80 char *buffer)
81{
82 if (strcmp(attr->name, "pasid") == 0) {
83 struct kfd_process *p = container_of(attr, struct kfd_process,
84 attr_pasid);
85
86 return snprintf(buffer, PAGE_SIZE, "%d\n", p->pasid);
87 } else if (strncmp(attr->name, "vram_", 5) == 0) {
88 struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
89 attr_vram);
90 if (pdd)
91 return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage));
92 } else {
93 pr_err("Invalid attribute");
94 return -EINVAL;
95 }
96
97 return 0;
98}
99
100static void kfd_procfs_kobj_release(struct kobject *kobj)
101{
102 kfree(kobj);
103}
104
105static const struct sysfs_ops kfd_procfs_ops = {
106 .show = kfd_procfs_show,
107};
108
109static struct kobj_type procfs_type = {
110 .release = kfd_procfs_kobj_release,
111 .sysfs_ops = &kfd_procfs_ops,
112};
113
114void kfd_procfs_init(void)
115{
116 int ret = 0;
117
118 procfs.kobj = kfd_alloc_struct(procfs.kobj);
119 if (!procfs.kobj)
120 return;
121
122 ret = kobject_init_and_add(procfs.kobj, &procfs_type,
123 &kfd_device->kobj, "proc");
124 if (ret) {
125 pr_warn("Could not create procfs proc folder");
126 /* If we fail to create the procfs, clean up */
127 kfd_procfs_shutdown();
128 }
129}
130
131void kfd_procfs_shutdown(void)
132{
133 if (procfs.kobj) {
134 kobject_del(procfs.kobj);
135 kobject_put(procfs.kobj);
136 procfs.kobj = NULL;
137 }
138}
139
140static ssize_t kfd_procfs_queue_show(struct kobject *kobj,
141 struct attribute *attr, char *buffer)
142{
143 struct queue *q = container_of(kobj, struct queue, kobj);
144
145 if (!strcmp(attr->name, "size"))
146 return snprintf(buffer, PAGE_SIZE, "%llu",
147 q->properties.queue_size);
148 else if (!strcmp(attr->name, "type"))
149 return snprintf(buffer, PAGE_SIZE, "%d", q->properties.type);
150 else if (!strcmp(attr->name, "gpuid"))
151 return snprintf(buffer, PAGE_SIZE, "%u", q->device->id);
152 else
153 pr_err("Invalid attribute");
154
155 return 0;
156}
157
158static struct attribute attr_queue_size = {
159 .name = "size",
160 .mode = KFD_SYSFS_FILE_MODE
161};
162
163static struct attribute attr_queue_type = {
164 .name = "type",
165 .mode = KFD_SYSFS_FILE_MODE
166};
167
168static struct attribute attr_queue_gpuid = {
169 .name = "gpuid",
170 .mode = KFD_SYSFS_FILE_MODE
171};
172
173static struct attribute *procfs_queue_attrs[] = {
174 &attr_queue_size,
175 &attr_queue_type,
176 &attr_queue_gpuid,
177 NULL
178};
179
180static const struct sysfs_ops procfs_queue_ops = {
181 .show = kfd_procfs_queue_show,
182};
183
184static struct kobj_type procfs_queue_type = {
185 .sysfs_ops = &procfs_queue_ops,
186 .default_attrs = procfs_queue_attrs,
187};
188
189int kfd_procfs_add_queue(struct queue *q)
190{
191 struct kfd_process *proc;
192 int ret;
193
194 if (!q || !q->process)
195 return -EINVAL;
196 proc = q->process;
197
198 /* Create proc/<pid>/queues/<queue id> folder */
199 if (!proc->kobj_queues)
200 return -EFAULT;
201 ret = kobject_init_and_add(&q->kobj, &procfs_queue_type,
202 proc->kobj_queues, "%u", q->properties.queue_id);
203 if (ret < 0) {
204 pr_warn("Creating proc/<pid>/queues/%u failed",
205 q->properties.queue_id);
206 kobject_put(&q->kobj);
207 return ret;
208 }
209
210 return 0;
211}
212
213int kfd_procfs_add_vram_usage(struct kfd_process *p)
214{
215 int ret = 0;
216 struct kfd_process_device *pdd;
217
218 if (!p)
219 return -EINVAL;
220
221 if (!p->kobj)
222 return -EFAULT;
223
224 /* Create proc/<pid>/vram_<gpuid> file for each GPU */
225 list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
226 snprintf(pdd->vram_filename, MAX_VRAM_FILENAME_LEN, "vram_%u",
227 pdd->dev->id);
228 pdd->attr_vram.name = pdd->vram_filename;
229 pdd->attr_vram.mode = KFD_SYSFS_FILE_MODE;
230 sysfs_attr_init(&pdd->attr_vram);
231 ret = sysfs_create_file(p->kobj, &pdd->attr_vram);
232 if (ret)
233 pr_warn("Creating vram usage for gpu id %d failed",
234 (int)pdd->dev->id);
235 }
236
237 return ret;
238}
239
240
241void kfd_procfs_del_queue(struct queue *q)
242{
243 if (!q)
244 return;
245
246 kobject_del(&q->kobj);
247 kobject_put(&q->kobj);
248}
249
250int kfd_process_create_wq(void)
251{
252 if (!kfd_process_wq)
253 kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0);
254 if (!kfd_restore_wq)
255 kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", 0);
256
257 if (!kfd_process_wq || !kfd_restore_wq) {
258 kfd_process_destroy_wq();
259 return -ENOMEM;
260 }
261
262 return 0;
263}
264
265void kfd_process_destroy_wq(void)
266{
267 if (kfd_process_wq) {
268 destroy_workqueue(kfd_process_wq);
269 kfd_process_wq = NULL;
270 }
271 if (kfd_restore_wq) {
272 destroy_workqueue(kfd_restore_wq);
273 kfd_restore_wq = NULL;
274 }
275}
276
277static void kfd_process_free_gpuvm(struct kgd_mem *mem,
278 struct kfd_process_device *pdd)
279{
280 struct kfd_dev *dev = pdd->dev;
281
282 amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->vm);
283 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, NULL);
284}
285
286/* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process
287 * This function should be only called right after the process
288 * is created and when kfd_processes_mutex is still being held
289 * to avoid concurrency. Because of that exclusiveness, we do
290 * not need to take p->mutex.
291 */
292static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
293 uint64_t gpu_va, uint32_t size,
294 uint32_t flags, void **kptr)
295{
296 struct kfd_dev *kdev = pdd->dev;
297 struct kgd_mem *mem = NULL;
298 int handle;
299 int err;
300
301 err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, gpu_va, size,
302 pdd->vm, &mem, NULL, flags);
303 if (err)
304 goto err_alloc_mem;
305
306 err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->vm);
307 if (err)
308 goto err_map_mem;
309
310 err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->kgd, mem, true);
311 if (err) {
312 pr_debug("Sync memory failed, wait interrupted by user signal\n");
313 goto sync_memory_failed;
314 }
315
316 /* Create an obj handle so kfd_process_device_remove_obj_handle
317 * will take care of the bo removal when the process finishes.
318 * We do not need to take p->mutex, because the process is just
319 * created and the ioctls have not had the chance to run.
320 */
321 handle = kfd_process_device_create_obj_handle(pdd, mem);
322
323 if (handle < 0) {
324 err = handle;
325 goto free_gpuvm;
326 }
327
328 if (kptr) {
329 err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev->kgd,
330 (struct kgd_mem *)mem, kptr, NULL);
331 if (err) {
332 pr_debug("Map GTT BO to kernel failed\n");
333 goto free_obj_handle;
334 }
335 }
336
337 return err;
338
339free_obj_handle:
340 kfd_process_device_remove_obj_handle(pdd, handle);
341free_gpuvm:
342sync_memory_failed:
343 kfd_process_free_gpuvm(mem, pdd);
344 return err;
345
346err_map_mem:
347 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem, NULL);
348err_alloc_mem:
349 *kptr = NULL;
350 return err;
351}
352
353/* kfd_process_device_reserve_ib_mem - Reserve memory inside the
354 * process for IB usage The memory reserved is for KFD to submit
355 * IB to AMDGPU from kernel. If the memory is reserved
356 * successfully, ib_kaddr will have the CPU/kernel
357 * address. Check ib_kaddr before accessing the memory.
358 */
359static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
360{
361 struct qcm_process_device *qpd = &pdd->qpd;
362 uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT |
363 KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
364 KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE |
365 KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
366 void *kaddr;
367 int ret;
368
369 if (qpd->ib_kaddr || !qpd->ib_base)
370 return 0;
371
372 /* ib_base is only set for dGPU */
373 ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags,
374 &kaddr);
375 if (ret)
376 return ret;
377
378 qpd->ib_kaddr = kaddr;
379
380 return 0;
381}
382
383struct kfd_process *kfd_create_process(struct file *filep)
384{
385 struct kfd_process *process;
386 struct task_struct *thread = current;
387 int ret;
388
389 if (!thread->mm)
390 return ERR_PTR(-EINVAL);
391
392 /* Only the pthreads threading model is supported. */
393 if (thread->group_leader->mm != thread->mm)
394 return ERR_PTR(-EINVAL);
395
396 /*
397 * take kfd processes mutex before starting of process creation
398 * so there won't be a case where two threads of the same process
399 * create two kfd_process structures
400 */
401 mutex_lock(&kfd_processes_mutex);
402
403 /* A prior open of /dev/kfd could have already created the process. */
404 process = find_process(thread);
405 if (process) {
406 pr_debug("Process already found\n");
407 } else {
408 process = create_process(thread);
409 if (IS_ERR(process))
410 goto out;
411
412 ret = kfd_process_init_cwsr_apu(process, filep);
413 if (ret) {
414 process = ERR_PTR(ret);
415 goto out;
416 }
417
418 if (!procfs.kobj)
419 goto out;
420
421 process->kobj = kfd_alloc_struct(process->kobj);
422 if (!process->kobj) {
423 pr_warn("Creating procfs kobject failed");
424 goto out;
425 }
426 ret = kobject_init_and_add(process->kobj, &procfs_type,
427 procfs.kobj, "%d",
428 (int)process->lead_thread->pid);
429 if (ret) {
430 pr_warn("Creating procfs pid directory failed");
431 goto out;
432 }
433
434 process->attr_pasid.name = "pasid";
435 process->attr_pasid.mode = KFD_SYSFS_FILE_MODE;
436 sysfs_attr_init(&process->attr_pasid);
437 ret = sysfs_create_file(process->kobj, &process->attr_pasid);
438 if (ret)
439 pr_warn("Creating pasid for pid %d failed",
440 (int)process->lead_thread->pid);
441
442 process->kobj_queues = kobject_create_and_add("queues",
443 process->kobj);
444 if (!process->kobj_queues)
445 pr_warn("Creating KFD proc/queues folder failed");
446
447 ret = kfd_procfs_add_vram_usage(process);
448 if (ret)
449 pr_warn("Creating vram usage file for pid %d failed",
450 (int)process->lead_thread->pid);
451 }
452out:
453 if (!IS_ERR(process))
454 kref_get(&process->ref);
455 mutex_unlock(&kfd_processes_mutex);
456
457 return process;
458}
459
460struct kfd_process *kfd_get_process(const struct task_struct *thread)
461{
462 struct kfd_process *process;
463
464 if (!thread->mm)
465 return ERR_PTR(-EINVAL);
466
467 /* Only the pthreads threading model is supported. */
468 if (thread->group_leader->mm != thread->mm)
469 return ERR_PTR(-EINVAL);
470
471 process = find_process(thread);
472 if (!process)
473 return ERR_PTR(-EINVAL);
474
475 return process;
476}
477
478static struct kfd_process *find_process_by_mm(const struct mm_struct *mm)
479{
480 struct kfd_process *process;
481
482 hash_for_each_possible_rcu(kfd_processes_table, process,
483 kfd_processes, (uintptr_t)mm)
484 if (process->mm == mm)
485 return process;
486
487 return NULL;
488}
489
490static struct kfd_process *find_process(const struct task_struct *thread)
491{
492 struct kfd_process *p;
493 int idx;
494
495 idx = srcu_read_lock(&kfd_processes_srcu);
496 p = find_process_by_mm(thread->mm);
497 srcu_read_unlock(&kfd_processes_srcu, idx);
498
499 return p;
500}
501
502void kfd_unref_process(struct kfd_process *p)
503{
504 kref_put(&p->ref, kfd_process_ref_release);
505}
506
507static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
508{
509 struct kfd_process *p = pdd->process;
510 void *mem;
511 int id;
512
513 /*
514 * Remove all handles from idr and release appropriate
515 * local memory object
516 */
517 idr_for_each_entry(&pdd->alloc_idr, mem, id) {
518 struct kfd_process_device *peer_pdd;
519
520 list_for_each_entry(peer_pdd, &p->per_device_data,
521 per_device_list) {
522 if (!peer_pdd->vm)
523 continue;
524 amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
525 peer_pdd->dev->kgd, mem, peer_pdd->vm);
526 }
527
528 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem, NULL);
529 kfd_process_device_remove_obj_handle(pdd, id);
530 }
531}
532
533static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p)
534{
535 struct kfd_process_device *pdd;
536
537 list_for_each_entry(pdd, &p->per_device_data, per_device_list)
538 kfd_process_device_free_bos(pdd);
539}
540
541static void kfd_process_destroy_pdds(struct kfd_process *p)
542{
543 struct kfd_process_device *pdd, *temp;
544
545 list_for_each_entry_safe(pdd, temp, &p->per_device_data,
546 per_device_list) {
547 pr_debug("Releasing pdd (topology id %d) for process (pasid 0x%x)\n",
548 pdd->dev->id, p->pasid);
549
550 if (pdd->drm_file) {
551 amdgpu_amdkfd_gpuvm_release_process_vm(
552 pdd->dev->kgd, pdd->vm);
553 fput(pdd->drm_file);
554 }
555 else if (pdd->vm)
556 amdgpu_amdkfd_gpuvm_destroy_process_vm(
557 pdd->dev->kgd, pdd->vm);
558
559 list_del(&pdd->per_device_list);
560
561 if (pdd->qpd.cwsr_kaddr && !pdd->qpd.cwsr_base)
562 free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
563 get_order(KFD_CWSR_TBA_TMA_SIZE));
564
565 kfree(pdd->qpd.doorbell_bitmap);
566 idr_destroy(&pdd->alloc_idr);
567
568 /*
569 * before destroying pdd, make sure to report availability
570 * for auto suspend
571 */
572 if (pdd->runtime_inuse) {
573 pm_runtime_mark_last_busy(pdd->dev->ddev->dev);
574 pm_runtime_put_autosuspend(pdd->dev->ddev->dev);
575 pdd->runtime_inuse = false;
576 }
577
578 kfree(pdd);
579 }
580}
581
582/* No process locking is needed in this function, because the process
583 * is not findable any more. We must assume that no other thread is
584 * using it any more, otherwise we couldn't safely free the process
585 * structure in the end.
586 */
587static void kfd_process_wq_release(struct work_struct *work)
588{
589 struct kfd_process *p = container_of(work, struct kfd_process,
590 release_work);
591 struct kfd_process_device *pdd;
592
593 /* Remove the procfs files */
594 if (p->kobj) {
595 sysfs_remove_file(p->kobj, &p->attr_pasid);
596 kobject_del(p->kobj_queues);
597 kobject_put(p->kobj_queues);
598 p->kobj_queues = NULL;
599
600 list_for_each_entry(pdd, &p->per_device_data, per_device_list)
601 sysfs_remove_file(p->kobj, &pdd->attr_vram);
602
603 kobject_del(p->kobj);
604 kobject_put(p->kobj);
605 p->kobj = NULL;
606 }
607
608 kfd_iommu_unbind_process(p);
609
610 kfd_process_free_outstanding_kfd_bos(p);
611
612 kfd_process_destroy_pdds(p);
613 dma_fence_put(p->ef);
614
615 kfd_event_free_process(p);
616
617 kfd_pasid_free(p->pasid);
618 kfd_free_process_doorbells(p);
619
620 mutex_destroy(&p->mutex);
621
622 put_task_struct(p->lead_thread);
623
624 kfree(p);
625}
626
627static void kfd_process_ref_release(struct kref *ref)
628{
629 struct kfd_process *p = container_of(ref, struct kfd_process, ref);
630
631 INIT_WORK(&p->release_work, kfd_process_wq_release);
632 queue_work(kfd_process_wq, &p->release_work);
633}
634
635static void kfd_process_free_notifier(struct mmu_notifier *mn)
636{
637 kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier));
638}
639
640static void kfd_process_notifier_release(struct mmu_notifier *mn,
641 struct mm_struct *mm)
642{
643 struct kfd_process *p;
644 struct kfd_process_device *pdd = NULL;
645
646 /*
647 * The kfd_process structure can not be free because the
648 * mmu_notifier srcu is read locked
649 */
650 p = container_of(mn, struct kfd_process, mmu_notifier);
651 if (WARN_ON(p->mm != mm))
652 return;
653
654 mutex_lock(&kfd_processes_mutex);
655 hash_del_rcu(&p->kfd_processes);
656 mutex_unlock(&kfd_processes_mutex);
657 synchronize_srcu(&kfd_processes_srcu);
658
659 cancel_delayed_work_sync(&p->eviction_work);
660 cancel_delayed_work_sync(&p->restore_work);
661
662 mutex_lock(&p->mutex);
663
664 /* Iterate over all process device data structures and if the
665 * pdd is in debug mode, we should first force unregistration,
666 * then we will be able to destroy the queues
667 */
668 list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
669 struct kfd_dev *dev = pdd->dev;
670
671 mutex_lock(kfd_get_dbgmgr_mutex());
672 if (dev && dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
673 if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
674 kfd_dbgmgr_destroy(dev->dbgmgr);
675 dev->dbgmgr = NULL;
676 }
677 }
678 mutex_unlock(kfd_get_dbgmgr_mutex());
679 }
680
681 kfd_process_dequeue_from_all_devices(p);
682 pqm_uninit(&p->pqm);
683
684 /* Indicate to other users that MM is no longer valid */
685 p->mm = NULL;
686 /* Signal the eviction fence after user mode queues are
687 * destroyed. This allows any BOs to be freed without
688 * triggering pointless evictions or waiting for fences.
689 */
690 dma_fence_signal(p->ef);
691
692 mutex_unlock(&p->mutex);
693
694 mmu_notifier_put(&p->mmu_notifier);
695}
696
697static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
698 .release = kfd_process_notifier_release,
699 .free_notifier = kfd_process_free_notifier,
700};
701
702static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
703{
704 unsigned long offset;
705 struct kfd_process_device *pdd;
706
707 list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
708 struct kfd_dev *dev = pdd->dev;
709 struct qcm_process_device *qpd = &pdd->qpd;
710
711 if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
712 continue;
713
714 offset = KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id);
715 qpd->tba_addr = (int64_t)vm_mmap(filep, 0,
716 KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
717 MAP_SHARED, offset);
718
719 if (IS_ERR_VALUE(qpd->tba_addr)) {
720 int err = qpd->tba_addr;
721
722 pr_err("Failure to set tba address. error %d.\n", err);
723 qpd->tba_addr = 0;
724 qpd->cwsr_kaddr = NULL;
725 return err;
726 }
727
728 memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
729
730 qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
731 pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
732 qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
733 }
734
735 return 0;
736}
737
738static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
739{
740 struct kfd_dev *dev = pdd->dev;
741 struct qcm_process_device *qpd = &pdd->qpd;
742 uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT
743 | KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE
744 | KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
745 void *kaddr;
746 int ret;
747
748 if (!dev->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base)
749 return 0;
750
751 /* cwsr_base is only set for dGPU */
752 ret = kfd_process_alloc_gpuvm(pdd, qpd->cwsr_base,
753 KFD_CWSR_TBA_TMA_SIZE, flags, &kaddr);
754 if (ret)
755 return ret;
756
757 qpd->cwsr_kaddr = kaddr;
758 qpd->tba_addr = qpd->cwsr_base;
759
760 memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
761
762 qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
763 pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
764 qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
765
766 return 0;
767}
768
769/*
770 * On return the kfd_process is fully operational and will be freed when the
771 * mm is released
772 */
773static struct kfd_process *create_process(const struct task_struct *thread)
774{
775 struct kfd_process *process;
776 int err = -ENOMEM;
777
778 process = kzalloc(sizeof(*process), GFP_KERNEL);
779 if (!process)
780 goto err_alloc_process;
781
782 kref_init(&process->ref);
783 mutex_init(&process->mutex);
784 process->mm = thread->mm;
785 process->lead_thread = thread->group_leader;
786 INIT_LIST_HEAD(&process->per_device_data);
787 INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
788 INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
789 process->last_restore_timestamp = get_jiffies_64();
790 kfd_event_init_process(process);
791 process->is_32bit_user_mode = in_compat_syscall();
792
793 process->pasid = kfd_pasid_alloc();
794 if (process->pasid == 0)
795 goto err_alloc_pasid;
796
797 if (kfd_alloc_process_doorbells(process) < 0)
798 goto err_alloc_doorbells;
799
800 err = pqm_init(&process->pqm, process);
801 if (err != 0)
802 goto err_process_pqm_init;
803
804 /* init process apertures*/
805 err = kfd_init_apertures(process);
806 if (err != 0)
807 goto err_init_apertures;
808
809 /* Must be last, have to use release destruction after this */
810 process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops;
811 err = mmu_notifier_register(&process->mmu_notifier, process->mm);
812 if (err)
813 goto err_register_notifier;
814
815 get_task_struct(process->lead_thread);
816 hash_add_rcu(kfd_processes_table, &process->kfd_processes,
817 (uintptr_t)process->mm);
818
819 return process;
820
821err_register_notifier:
822 kfd_process_free_outstanding_kfd_bos(process);
823 kfd_process_destroy_pdds(process);
824err_init_apertures:
825 pqm_uninit(&process->pqm);
826err_process_pqm_init:
827 kfd_free_process_doorbells(process);
828err_alloc_doorbells:
829 kfd_pasid_free(process->pasid);
830err_alloc_pasid:
831 mutex_destroy(&process->mutex);
832 kfree(process);
833err_alloc_process:
834 return ERR_PTR(err);
835}
836
837static int init_doorbell_bitmap(struct qcm_process_device *qpd,
838 struct kfd_dev *dev)
839{
840 unsigned int i;
841 int range_start = dev->shared_resources.non_cp_doorbells_start;
842 int range_end = dev->shared_resources.non_cp_doorbells_end;
843
844 if (!KFD_IS_SOC15(dev->device_info->asic_family))
845 return 0;
846
847 qpd->doorbell_bitmap =
848 kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
849 BITS_PER_BYTE), GFP_KERNEL);
850 if (!qpd->doorbell_bitmap)
851 return -ENOMEM;
852
853 /* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */
854 pr_debug("reserved doorbell 0x%03x - 0x%03x\n", range_start, range_end);
855 pr_debug("reserved doorbell 0x%03x - 0x%03x\n",
856 range_start + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
857 range_end + KFD_QUEUE_DOORBELL_MIRROR_OFFSET);
858
859 for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) {
860 if (i >= range_start && i <= range_end) {
861 set_bit(i, qpd->doorbell_bitmap);
862 set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
863 qpd->doorbell_bitmap);
864 }
865 }
866
867 return 0;
868}
869
870struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
871 struct kfd_process *p)
872{
873 struct kfd_process_device *pdd = NULL;
874
875 list_for_each_entry(pdd, &p->per_device_data, per_device_list)
876 if (pdd->dev == dev)
877 return pdd;
878
879 return NULL;
880}
881
882struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
883 struct kfd_process *p)
884{
885 struct kfd_process_device *pdd = NULL;
886
887 pdd = kzalloc(sizeof(*pdd), GFP_KERNEL);
888 if (!pdd)
889 return NULL;
890
891 if (init_doorbell_bitmap(&pdd->qpd, dev)) {
892 pr_err("Failed to init doorbell for process\n");
893 kfree(pdd);
894 return NULL;
895 }
896
897 pdd->dev = dev;
898 INIT_LIST_HEAD(&pdd->qpd.queues_list);
899 INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
900 pdd->qpd.dqm = dev->dqm;
901 pdd->qpd.pqm = &p->pqm;
902 pdd->qpd.evicted = 0;
903 pdd->qpd.mapped_gws_queue = false;
904 pdd->process = p;
905 pdd->bound = PDD_UNBOUND;
906 pdd->already_dequeued = false;
907 pdd->runtime_inuse = false;
908 pdd->vram_usage = 0;
909 list_add(&pdd->per_device_list, &p->per_device_data);
910
911 /* Init idr used for memory handle translation */
912 idr_init(&pdd->alloc_idr);
913
914 return pdd;
915}
916
917/**
918 * kfd_process_device_init_vm - Initialize a VM for a process-device
919 *
920 * @pdd: The process-device
921 * @drm_file: Optional pointer to a DRM file descriptor
922 *
923 * If @drm_file is specified, it will be used to acquire the VM from
924 * that file descriptor. If successful, the @pdd takes ownership of
925 * the file descriptor.
926 *
927 * If @drm_file is NULL, a new VM is created.
928 *
929 * Returns 0 on success, -errno on failure.
930 */
931int kfd_process_device_init_vm(struct kfd_process_device *pdd,
932 struct file *drm_file)
933{
934 struct kfd_process *p;
935 struct kfd_dev *dev;
936 int ret;
937
938 if (pdd->vm)
939 return drm_file ? -EBUSY : 0;
940
941 p = pdd->process;
942 dev = pdd->dev;
943
944 if (drm_file)
945 ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(
946 dev->kgd, drm_file, p->pasid,
947 &pdd->vm, &p->kgd_process_info, &p->ef);
948 else
949 ret = amdgpu_amdkfd_gpuvm_create_process_vm(dev->kgd, p->pasid,
950 &pdd->vm, &p->kgd_process_info, &p->ef);
951 if (ret) {
952 pr_err("Failed to create process VM object\n");
953 return ret;
954 }
955
956 amdgpu_vm_set_task_info(pdd->vm);
957
958 ret = kfd_process_device_reserve_ib_mem(pdd);
959 if (ret)
960 goto err_reserve_ib_mem;
961 ret = kfd_process_device_init_cwsr_dgpu(pdd);
962 if (ret)
963 goto err_init_cwsr;
964
965 pdd->drm_file = drm_file;
966
967 return 0;
968
969err_init_cwsr:
970err_reserve_ib_mem:
971 kfd_process_device_free_bos(pdd);
972 if (!drm_file)
973 amdgpu_amdkfd_gpuvm_destroy_process_vm(dev->kgd, pdd->vm);
974 pdd->vm = NULL;
975
976 return ret;
977}
978
979/*
980 * Direct the IOMMU to bind the process (specifically the pasid->mm)
981 * to the device.
982 * Unbinding occurs when the process dies or the device is removed.
983 *
984 * Assumes that the process lock is held.
985 */
986struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
987 struct kfd_process *p)
988{
989 struct kfd_process_device *pdd;
990 int err;
991
992 pdd = kfd_get_process_device_data(dev, p);
993 if (!pdd) {
994 pr_err("Process device data doesn't exist\n");
995 return ERR_PTR(-ENOMEM);
996 }
997
998 /*
999 * signal runtime-pm system to auto resume and prevent
1000 * further runtime suspend once device pdd is created until
1001 * pdd is destroyed.
1002 */
1003 if (!pdd->runtime_inuse) {
1004 err = pm_runtime_get_sync(dev->ddev->dev);
1005 if (err < 0)
1006 return ERR_PTR(err);
1007 }
1008
1009 err = kfd_iommu_bind_process_to_device(pdd);
1010 if (err)
1011 goto out;
1012
1013 err = kfd_process_device_init_vm(pdd, NULL);
1014 if (err)
1015 goto out;
1016
1017 /*
1018 * make sure that runtime_usage counter is incremented just once
1019 * per pdd
1020 */
1021 pdd->runtime_inuse = true;
1022
1023 return pdd;
1024
1025out:
1026 /* balance runpm reference count and exit with error */
1027 if (!pdd->runtime_inuse) {
1028 pm_runtime_mark_last_busy(dev->ddev->dev);
1029 pm_runtime_put_autosuspend(dev->ddev->dev);
1030 }
1031
1032 return ERR_PTR(err);
1033}
1034
1035struct kfd_process_device *kfd_get_first_process_device_data(
1036 struct kfd_process *p)
1037{
1038 return list_first_entry(&p->per_device_data,
1039 struct kfd_process_device,
1040 per_device_list);
1041}
1042
1043struct kfd_process_device *kfd_get_next_process_device_data(
1044 struct kfd_process *p,
1045 struct kfd_process_device *pdd)
1046{
1047 if (list_is_last(&pdd->per_device_list, &p->per_device_data))
1048 return NULL;
1049 return list_next_entry(pdd, per_device_list);
1050}
1051
1052bool kfd_has_process_device_data(struct kfd_process *p)
1053{
1054 return !(list_empty(&p->per_device_data));
1055}
1056
1057/* Create specific handle mapped to mem from process local memory idr
1058 * Assumes that the process lock is held.
1059 */
1060int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd,
1061 void *mem)
1062{
1063 return idr_alloc(&pdd->alloc_idr, mem, 0, 0, GFP_KERNEL);
1064}
1065
1066/* Translate specific handle from process local memory idr
1067 * Assumes that the process lock is held.
1068 */
1069void *kfd_process_device_translate_handle(struct kfd_process_device *pdd,
1070 int handle)
1071{
1072 if (handle < 0)
1073 return NULL;
1074
1075 return idr_find(&pdd->alloc_idr, handle);
1076}
1077
1078/* Remove specific handle from process local memory idr
1079 * Assumes that the process lock is held.
1080 */
1081void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
1082 int handle)
1083{
1084 if (handle >= 0)
1085 idr_remove(&pdd->alloc_idr, handle);
1086}
1087
1088/* This increments the process->ref counter. */
1089struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid)
1090{
1091 struct kfd_process *p, *ret_p = NULL;
1092 unsigned int temp;
1093
1094 int idx = srcu_read_lock(&kfd_processes_srcu);
1095
1096 hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1097 if (p->pasid == pasid) {
1098 kref_get(&p->ref);
1099 ret_p = p;
1100 break;
1101 }
1102 }
1103
1104 srcu_read_unlock(&kfd_processes_srcu, idx);
1105
1106 return ret_p;
1107}
1108
1109/* This increments the process->ref counter. */
1110struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm)
1111{
1112 struct kfd_process *p;
1113
1114 int idx = srcu_read_lock(&kfd_processes_srcu);
1115
1116 p = find_process_by_mm(mm);
1117 if (p)
1118 kref_get(&p->ref);
1119
1120 srcu_read_unlock(&kfd_processes_srcu, idx);
1121
1122 return p;
1123}
1124
1125/* kfd_process_evict_queues - Evict all user queues of a process
1126 *
1127 * Eviction is reference-counted per process-device. This means multiple
1128 * evictions from different sources can be nested safely.
1129 */
1130int kfd_process_evict_queues(struct kfd_process *p)
1131{
1132 struct kfd_process_device *pdd;
1133 int r = 0;
1134 unsigned int n_evicted = 0;
1135
1136 list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
1137 r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm,
1138 &pdd->qpd);
1139 if (r) {
1140 pr_err("Failed to evict process queues\n");
1141 goto fail;
1142 }
1143 n_evicted++;
1144 }
1145
1146 return r;
1147
1148fail:
1149 /* To keep state consistent, roll back partial eviction by
1150 * restoring queues
1151 */
1152 list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
1153 if (n_evicted == 0)
1154 break;
1155 if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
1156 &pdd->qpd))
1157 pr_err("Failed to restore queues\n");
1158
1159 n_evicted--;
1160 }
1161
1162 return r;
1163}
1164
1165/* kfd_process_restore_queues - Restore all user queues of a process */
1166int kfd_process_restore_queues(struct kfd_process *p)
1167{
1168 struct kfd_process_device *pdd;
1169 int r, ret = 0;
1170
1171 list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
1172 r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
1173 &pdd->qpd);
1174 if (r) {
1175 pr_err("Failed to restore process queues\n");
1176 if (!ret)
1177 ret = r;
1178 }
1179 }
1180
1181 return ret;
1182}
1183
1184static void evict_process_worker(struct work_struct *work)
1185{
1186 int ret;
1187 struct kfd_process *p;
1188 struct delayed_work *dwork;
1189
1190 dwork = to_delayed_work(work);
1191
1192 /* Process termination destroys this worker thread. So during the
1193 * lifetime of this thread, kfd_process p will be valid
1194 */
1195 p = container_of(dwork, struct kfd_process, eviction_work);
1196 WARN_ONCE(p->last_eviction_seqno != p->ef->seqno,
1197 "Eviction fence mismatch\n");
1198
1199 /* Narrow window of overlap between restore and evict work
1200 * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos
1201 * unreserves KFD BOs, it is possible to evicted again. But
1202 * restore has few more steps of finish. So lets wait for any
1203 * previous restore work to complete
1204 */
1205 flush_delayed_work(&p->restore_work);
1206
1207 pr_debug("Started evicting pasid 0x%x\n", p->pasid);
1208 ret = kfd_process_evict_queues(p);
1209 if (!ret) {
1210 dma_fence_signal(p->ef);
1211 dma_fence_put(p->ef);
1212 p->ef = NULL;
1213 queue_delayed_work(kfd_restore_wq, &p->restore_work,
1214 msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
1215
1216 pr_debug("Finished evicting pasid 0x%x\n", p->pasid);
1217 } else
1218 pr_err("Failed to evict queues of pasid 0x%x\n", p->pasid);
1219}
1220
1221static void restore_process_worker(struct work_struct *work)
1222{
1223 struct delayed_work *dwork;
1224 struct kfd_process *p;
1225 int ret = 0;
1226
1227 dwork = to_delayed_work(work);
1228
1229 /* Process termination destroys this worker thread. So during the
1230 * lifetime of this thread, kfd_process p will be valid
1231 */
1232 p = container_of(dwork, struct kfd_process, restore_work);
1233 pr_debug("Started restoring pasid 0x%x\n", p->pasid);
1234
1235 /* Setting last_restore_timestamp before successful restoration.
1236 * Otherwise this would have to be set by KGD (restore_process_bos)
1237 * before KFD BOs are unreserved. If not, the process can be evicted
1238 * again before the timestamp is set.
1239 * If restore fails, the timestamp will be set again in the next
1240 * attempt. This would mean that the minimum GPU quanta would be
1241 * PROCESS_ACTIVE_TIME_MS - (time to execute the following two
1242 * functions)
1243 */
1244
1245 p->last_restore_timestamp = get_jiffies_64();
1246 ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p->kgd_process_info,
1247 &p->ef);
1248 if (ret) {
1249 pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n",
1250 p->pasid, PROCESS_BACK_OFF_TIME_MS);
1251 ret = queue_delayed_work(kfd_restore_wq, &p->restore_work,
1252 msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));
1253 WARN(!ret, "reschedule restore work failed\n");
1254 return;
1255 }
1256
1257 ret = kfd_process_restore_queues(p);
1258 if (!ret)
1259 pr_debug("Finished restoring pasid 0x%x\n", p->pasid);
1260 else
1261 pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid);
1262}
1263
1264void kfd_suspend_all_processes(void)
1265{
1266 struct kfd_process *p;
1267 unsigned int temp;
1268 int idx = srcu_read_lock(&kfd_processes_srcu);
1269
1270 hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1271 cancel_delayed_work_sync(&p->eviction_work);
1272 cancel_delayed_work_sync(&p->restore_work);
1273
1274 if (kfd_process_evict_queues(p))
1275 pr_err("Failed to suspend process 0x%x\n", p->pasid);
1276 dma_fence_signal(p->ef);
1277 dma_fence_put(p->ef);
1278 p->ef = NULL;
1279 }
1280 srcu_read_unlock(&kfd_processes_srcu, idx);
1281}
1282
1283int kfd_resume_all_processes(void)
1284{
1285 struct kfd_process *p;
1286 unsigned int temp;
1287 int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu);
1288
1289 hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1290 if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) {
1291 pr_err("Restore process %d failed during resume\n",
1292 p->pasid);
1293 ret = -EFAULT;
1294 }
1295 }
1296 srcu_read_unlock(&kfd_processes_srcu, idx);
1297 return ret;
1298}
1299
1300int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
1301 struct vm_area_struct *vma)
1302{
1303 struct kfd_process_device *pdd;
1304 struct qcm_process_device *qpd;
1305
1306 if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) {
1307 pr_err("Incorrect CWSR mapping size.\n");
1308 return -EINVAL;
1309 }
1310
1311 pdd = kfd_get_process_device_data(dev, process);
1312 if (!pdd)
1313 return -EINVAL;
1314 qpd = &pdd->qpd;
1315
1316 qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
1317 get_order(KFD_CWSR_TBA_TMA_SIZE));
1318 if (!qpd->cwsr_kaddr) {
1319 pr_err("Error allocating per process CWSR buffer.\n");
1320 return -ENOMEM;
1321 }
1322
1323 vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND
1324 | VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP;
1325 /* Mapping pages to user process */
1326 return remap_pfn_range(vma, vma->vm_start,
1327 PFN_DOWN(__pa(qpd->cwsr_kaddr)),
1328 KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
1329}
1330
1331void kfd_flush_tlb(struct kfd_process_device *pdd)
1332{
1333 struct kfd_dev *dev = pdd->dev;
1334
1335 if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
1336 /* Nothing to flush until a VMID is assigned, which
1337 * only happens when the first queue is created.
1338 */
1339 if (pdd->qpd.vmid)
1340 amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd,
1341 pdd->qpd.vmid);
1342 } else {
1343 amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
1344 pdd->process->pasid);
1345 }
1346}
1347
1348#if defined(CONFIG_DEBUG_FS)
1349
1350int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
1351{
1352 struct kfd_process *p;
1353 unsigned int temp;
1354 int r = 0;
1355
1356 int idx = srcu_read_lock(&kfd_processes_srcu);
1357
1358 hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1359 seq_printf(m, "Process %d PASID 0x%x:\n",
1360 p->lead_thread->tgid, p->pasid);
1361
1362 mutex_lock(&p->mutex);
1363 r = pqm_debugfs_mqds(m, &p->pqm);
1364 mutex_unlock(&p->mutex);
1365
1366 if (r)
1367 break;
1368 }
1369
1370 srcu_read_unlock(&kfd_processes_srcu, idx);
1371
1372 return r;
1373}
1374
1375#endif
1376