drivers/gpu/drm/amd/amdkfd/kfd_process.c at v5.8-rc2

tjh.dev / kernel
fork
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork
kernel / drivers / gpu / drm / amd / amdkfd / kfd_process.c
at v5.8-rc2 1376 lines 36 kB view raw
wrap content
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 */
  22
  23#include <linux/mutex.h>
  24#include <linux/log2.h>
  25#include <linux/sched.h>
  26#include <linux/sched/mm.h>
  27#include <linux/sched/task.h>
  28#include <linux/slab.h>
  29#include <linux/amd-iommu.h>
  30#include <linux/notifier.h>
  31#include <linux/compat.h>
  32#include <linux/mman.h>
  33#include <linux/file.h>
  34#include <linux/pm_runtime.h>
  35#include "amdgpu_amdkfd.h"
  36#include "amdgpu.h"
  37
  38struct mm_struct;
  39
  40#include "kfd_priv.h"
  41#include "kfd_device_queue_manager.h"
  42#include "kfd_dbgmgr.h"
  43#include "kfd_iommu.h"
  44
  45/*
  46 * List of struct kfd_process (field kfd_process).
  47 * Unique/indexed by mm_struct*
  48 */
  49DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
  50static DEFINE_MUTEX(kfd_processes_mutex);
  51
  52DEFINE_SRCU(kfd_processes_srcu);
  53
  54/* For process termination handling */
  55static struct workqueue_struct *kfd_process_wq;
  56
  57/* Ordered, single-threaded workqueue for restoring evicted
  58 * processes. Restoring multiple processes concurrently under memory
  59 * pressure can lead to processes blocking each other from validating
  60 * their BOs and result in a live-lock situation where processes
  61 * remain evicted indefinitely.
  62 */
  63static struct workqueue_struct *kfd_restore_wq;
  64
  65static struct kfd_process *find_process(const struct task_struct *thread);
  66static void kfd_process_ref_release(struct kref *ref);
  67static struct kfd_process *create_process(const struct task_struct *thread);
  68static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep);
  69
  70static void evict_process_worker(struct work_struct *work);
  71static void restore_process_worker(struct work_struct *work);
  72
  73struct kfd_procfs_tree {
  74	struct kobject *kobj;
  75};
  76
  77static struct kfd_procfs_tree procfs;
  78
  79static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
  80			       char *buffer)
  81{
  82	if (strcmp(attr->name, "pasid") == 0) {
  83		struct kfd_process *p = container_of(attr, struct kfd_process,
  84						     attr_pasid);
  85
  86		return snprintf(buffer, PAGE_SIZE, "%d\n", p->pasid);
  87	} else if (strncmp(attr->name, "vram_", 5) == 0) {
  88		struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
  89							      attr_vram);
  90		if (pdd)
  91			return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage));
  92	} else {
  93		pr_err("Invalid attribute");
  94		return -EINVAL;
  95	}
  96
  97	return 0;
  98}
  99
 100static void kfd_procfs_kobj_release(struct kobject *kobj)
 101{
 102	kfree(kobj);
 103}
 104
 105static const struct sysfs_ops kfd_procfs_ops = {
 106	.show = kfd_procfs_show,
 107};
 108
 109static struct kobj_type procfs_type = {
 110	.release = kfd_procfs_kobj_release,
 111	.sysfs_ops = &kfd_procfs_ops,
 112};
 113
 114void kfd_procfs_init(void)
 115{
 116	int ret = 0;
 117
 118	procfs.kobj = kfd_alloc_struct(procfs.kobj);
 119	if (!procfs.kobj)
 120		return;
 121
 122	ret = kobject_init_and_add(procfs.kobj, &procfs_type,
 123				   &kfd_device->kobj, "proc");
 124	if (ret) {
 125		pr_warn("Could not create procfs proc folder");
 126		/* If we fail to create the procfs, clean up */
 127		kfd_procfs_shutdown();
 128	}
 129}
 130
 131void kfd_procfs_shutdown(void)
 132{
 133	if (procfs.kobj) {
 134		kobject_del(procfs.kobj);
 135		kobject_put(procfs.kobj);
 136		procfs.kobj = NULL;
 137	}
 138}
 139
 140static ssize_t kfd_procfs_queue_show(struct kobject *kobj,
 141				     struct attribute *attr, char *buffer)
 142{
 143	struct queue *q = container_of(kobj, struct queue, kobj);
 144
 145	if (!strcmp(attr->name, "size"))
 146		return snprintf(buffer, PAGE_SIZE, "%llu",
 147				q->properties.queue_size);
 148	else if (!strcmp(attr->name, "type"))
 149		return snprintf(buffer, PAGE_SIZE, "%d", q->properties.type);
 150	else if (!strcmp(attr->name, "gpuid"))
 151		return snprintf(buffer, PAGE_SIZE, "%u", q->device->id);
 152	else
 153		pr_err("Invalid attribute");
 154
 155	return 0;
 156}
 157
 158static struct attribute attr_queue_size = {
 159	.name = "size",
 160	.mode = KFD_SYSFS_FILE_MODE
 161};
 162
 163static struct attribute attr_queue_type = {
 164	.name = "type",
 165	.mode = KFD_SYSFS_FILE_MODE
 166};
 167
 168static struct attribute attr_queue_gpuid = {
 169	.name = "gpuid",
 170	.mode = KFD_SYSFS_FILE_MODE
 171};
 172
 173static struct attribute *procfs_queue_attrs[] = {
 174	&attr_queue_size,
 175	&attr_queue_type,
 176	&attr_queue_gpuid,
 177	NULL
 178};
 179
 180static const struct sysfs_ops procfs_queue_ops = {
 181	.show = kfd_procfs_queue_show,
 182};
 183
 184static struct kobj_type procfs_queue_type = {
 185	.sysfs_ops = &procfs_queue_ops,
 186	.default_attrs = procfs_queue_attrs,
 187};
 188
 189int kfd_procfs_add_queue(struct queue *q)
 190{
 191	struct kfd_process *proc;
 192	int ret;
 193
 194	if (!q || !q->process)
 195		return -EINVAL;
 196	proc = q->process;
 197
 198	/* Create proc/<pid>/queues/<queue id> folder */
 199	if (!proc->kobj_queues)
 200		return -EFAULT;
 201	ret = kobject_init_and_add(&q->kobj, &procfs_queue_type,
 202			proc->kobj_queues, "%u", q->properties.queue_id);
 203	if (ret < 0) {
 204		pr_warn("Creating proc/<pid>/queues/%u failed",
 205			q->properties.queue_id);
 206		kobject_put(&q->kobj);
 207		return ret;
 208	}
 209
 210	return 0;
 211}
 212
 213int kfd_procfs_add_vram_usage(struct kfd_process *p)
 214{
 215	int ret = 0;
 216	struct kfd_process_device *pdd;
 217
 218	if (!p)
 219		return -EINVAL;
 220
 221	if (!p->kobj)
 222		return -EFAULT;
 223
 224	/* Create proc/<pid>/vram_<gpuid> file for each GPU */
 225	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
 226		snprintf(pdd->vram_filename, MAX_VRAM_FILENAME_LEN, "vram_%u",
 227			 pdd->dev->id);
 228		pdd->attr_vram.name = pdd->vram_filename;
 229		pdd->attr_vram.mode = KFD_SYSFS_FILE_MODE;
 230		sysfs_attr_init(&pdd->attr_vram);
 231		ret = sysfs_create_file(p->kobj, &pdd->attr_vram);
 232		if (ret)
 233			pr_warn("Creating vram usage for gpu id %d failed",
 234				(int)pdd->dev->id);
 235	}
 236
 237	return ret;
 238}
 239
 240
 241void kfd_procfs_del_queue(struct queue *q)
 242{
 243	if (!q)
 244		return;
 245
 246	kobject_del(&q->kobj);
 247	kobject_put(&q->kobj);
 248}
 249
 250int kfd_process_create_wq(void)
 251{
 252	if (!kfd_process_wq)
 253		kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0);
 254	if (!kfd_restore_wq)
 255		kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", 0);
 256
 257	if (!kfd_process_wq || !kfd_restore_wq) {
 258		kfd_process_destroy_wq();
 259		return -ENOMEM;
 260	}
 261
 262	return 0;
 263}
 264
 265void kfd_process_destroy_wq(void)
 266{
 267	if (kfd_process_wq) {
 268		destroy_workqueue(kfd_process_wq);
 269		kfd_process_wq = NULL;
 270	}
 271	if (kfd_restore_wq) {
 272		destroy_workqueue(kfd_restore_wq);
 273		kfd_restore_wq = NULL;
 274	}
 275}
 276
 277static void kfd_process_free_gpuvm(struct kgd_mem *mem,
 278			struct kfd_process_device *pdd)
 279{
 280	struct kfd_dev *dev = pdd->dev;
 281
 282	amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->vm);
 283	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, NULL);
 284}
 285
 286/* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process
 287 *	This function should be only called right after the process
 288 *	is created and when kfd_processes_mutex is still being held
 289 *	to avoid concurrency. Because of that exclusiveness, we do
 290 *	not need to take p->mutex.
 291 */
 292static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
 293				   uint64_t gpu_va, uint32_t size,
 294				   uint32_t flags, void **kptr)
 295{
 296	struct kfd_dev *kdev = pdd->dev;
 297	struct kgd_mem *mem = NULL;
 298	int handle;
 299	int err;
 300
 301	err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, gpu_va, size,
 302						 pdd->vm, &mem, NULL, flags);
 303	if (err)
 304		goto err_alloc_mem;
 305
 306	err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->vm);
 307	if (err)
 308		goto err_map_mem;
 309
 310	err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->kgd, mem, true);
 311	if (err) {
 312		pr_debug("Sync memory failed, wait interrupted by user signal\n");
 313		goto sync_memory_failed;
 314	}
 315
 316	/* Create an obj handle so kfd_process_device_remove_obj_handle
 317	 * will take care of the bo removal when the process finishes.
 318	 * We do not need to take p->mutex, because the process is just
 319	 * created and the ioctls have not had the chance to run.
 320	 */
 321	handle = kfd_process_device_create_obj_handle(pdd, mem);
 322
 323	if (handle < 0) {
 324		err = handle;
 325		goto free_gpuvm;
 326	}
 327
 328	if (kptr) {
 329		err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev->kgd,
 330				(struct kgd_mem *)mem, kptr, NULL);
 331		if (err) {
 332			pr_debug("Map GTT BO to kernel failed\n");
 333			goto free_obj_handle;
 334		}
 335	}
 336
 337	return err;
 338
 339free_obj_handle:
 340	kfd_process_device_remove_obj_handle(pdd, handle);
 341free_gpuvm:
 342sync_memory_failed:
 343	kfd_process_free_gpuvm(mem, pdd);
 344	return err;
 345
 346err_map_mem:
 347	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem, NULL);
 348err_alloc_mem:
 349	*kptr = NULL;
 350	return err;
 351}
 352
 353/* kfd_process_device_reserve_ib_mem - Reserve memory inside the
 354 *	process for IB usage The memory reserved is for KFD to submit
 355 *	IB to AMDGPU from kernel.  If the memory is reserved
 356 *	successfully, ib_kaddr will have the CPU/kernel
 357 *	address. Check ib_kaddr before accessing the memory.
 358 */
 359static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
 360{
 361	struct qcm_process_device *qpd = &pdd->qpd;
 362	uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT |
 363			KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
 364			KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE |
 365			KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
 366	void *kaddr;
 367	int ret;
 368
 369	if (qpd->ib_kaddr || !qpd->ib_base)
 370		return 0;
 371
 372	/* ib_base is only set for dGPU */
 373	ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags,
 374				      &kaddr);
 375	if (ret)
 376		return ret;
 377
 378	qpd->ib_kaddr = kaddr;
 379
 380	return 0;
 381}
 382
 383struct kfd_process *kfd_create_process(struct file *filep)
 384{
 385	struct kfd_process *process;
 386	struct task_struct *thread = current;
 387	int ret;
 388
 389	if (!thread->mm)
 390		return ERR_PTR(-EINVAL);
 391
 392	/* Only the pthreads threading model is supported. */
 393	if (thread->group_leader->mm != thread->mm)
 394		return ERR_PTR(-EINVAL);
 395
 396	/*
 397	 * take kfd processes mutex before starting of process creation
 398	 * so there won't be a case where two threads of the same process
 399	 * create two kfd_process structures
 400	 */
 401	mutex_lock(&kfd_processes_mutex);
 402
 403	/* A prior open of /dev/kfd could have already created the process. */
 404	process = find_process(thread);
 405	if (process) {
 406		pr_debug("Process already found\n");
 407	} else {
 408		process = create_process(thread);
 409		if (IS_ERR(process))
 410			goto out;
 411
 412		ret = kfd_process_init_cwsr_apu(process, filep);
 413		if (ret) {
 414			process = ERR_PTR(ret);
 415			goto out;
 416		}
 417
 418		if (!procfs.kobj)
 419			goto out;
 420
 421		process->kobj = kfd_alloc_struct(process->kobj);
 422		if (!process->kobj) {
 423			pr_warn("Creating procfs kobject failed");
 424			goto out;
 425		}
 426		ret = kobject_init_and_add(process->kobj, &procfs_type,
 427					   procfs.kobj, "%d",
 428					   (int)process->lead_thread->pid);
 429		if (ret) {
 430			pr_warn("Creating procfs pid directory failed");
 431			goto out;
 432		}
 433
 434		process->attr_pasid.name = "pasid";
 435		process->attr_pasid.mode = KFD_SYSFS_FILE_MODE;
 436		sysfs_attr_init(&process->attr_pasid);
 437		ret = sysfs_create_file(process->kobj, &process->attr_pasid);
 438		if (ret)
 439			pr_warn("Creating pasid for pid %d failed",
 440					(int)process->lead_thread->pid);
 441
 442		process->kobj_queues = kobject_create_and_add("queues",
 443							process->kobj);
 444		if (!process->kobj_queues)
 445			pr_warn("Creating KFD proc/queues folder failed");
 446
 447		ret = kfd_procfs_add_vram_usage(process);
 448		if (ret)
 449			pr_warn("Creating vram usage file for pid %d failed",
 450				(int)process->lead_thread->pid);
 451	}
 452out:
 453	if (!IS_ERR(process))
 454		kref_get(&process->ref);
 455	mutex_unlock(&kfd_processes_mutex);
 456
 457	return process;
 458}
 459
 460struct kfd_process *kfd_get_process(const struct task_struct *thread)
 461{
 462	struct kfd_process *process;
 463
 464	if (!thread->mm)
 465		return ERR_PTR(-EINVAL);
 466
 467	/* Only the pthreads threading model is supported. */
 468	if (thread->group_leader->mm != thread->mm)
 469		return ERR_PTR(-EINVAL);
 470
 471	process = find_process(thread);
 472	if (!process)
 473		return ERR_PTR(-EINVAL);
 474
 475	return process;
 476}
 477
 478static struct kfd_process *find_process_by_mm(const struct mm_struct *mm)
 479{
 480	struct kfd_process *process;
 481
 482	hash_for_each_possible_rcu(kfd_processes_table, process,
 483					kfd_processes, (uintptr_t)mm)
 484		if (process->mm == mm)
 485			return process;
 486
 487	return NULL;
 488}
 489
 490static struct kfd_process *find_process(const struct task_struct *thread)
 491{
 492	struct kfd_process *p;
 493	int idx;
 494
 495	idx = srcu_read_lock(&kfd_processes_srcu);
 496	p = find_process_by_mm(thread->mm);
 497	srcu_read_unlock(&kfd_processes_srcu, idx);
 498
 499	return p;
 500}
 501
 502void kfd_unref_process(struct kfd_process *p)
 503{
 504	kref_put(&p->ref, kfd_process_ref_release);
 505}
 506
 507static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
 508{
 509	struct kfd_process *p = pdd->process;
 510	void *mem;
 511	int id;
 512
 513	/*
 514	 * Remove all handles from idr and release appropriate
 515	 * local memory object
 516	 */
 517	idr_for_each_entry(&pdd->alloc_idr, mem, id) {
 518		struct kfd_process_device *peer_pdd;
 519
 520		list_for_each_entry(peer_pdd, &p->per_device_data,
 521				    per_device_list) {
 522			if (!peer_pdd->vm)
 523				continue;
 524			amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
 525				peer_pdd->dev->kgd, mem, peer_pdd->vm);
 526		}
 527
 528		amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem, NULL);
 529		kfd_process_device_remove_obj_handle(pdd, id);
 530	}
 531}
 532
 533static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p)
 534{
 535	struct kfd_process_device *pdd;
 536
 537	list_for_each_entry(pdd, &p->per_device_data, per_device_list)
 538		kfd_process_device_free_bos(pdd);
 539}
 540
 541static void kfd_process_destroy_pdds(struct kfd_process *p)
 542{
 543	struct kfd_process_device *pdd, *temp;
 544
 545	list_for_each_entry_safe(pdd, temp, &p->per_device_data,
 546				 per_device_list) {
 547		pr_debug("Releasing pdd (topology id %d) for process (pasid 0x%x)\n",
 548				pdd->dev->id, p->pasid);
 549
 550		if (pdd->drm_file) {
 551			amdgpu_amdkfd_gpuvm_release_process_vm(
 552					pdd->dev->kgd, pdd->vm);
 553			fput(pdd->drm_file);
 554		}
 555		else if (pdd->vm)
 556			amdgpu_amdkfd_gpuvm_destroy_process_vm(
 557				pdd->dev->kgd, pdd->vm);
 558
 559		list_del(&pdd->per_device_list);
 560
 561		if (pdd->qpd.cwsr_kaddr && !pdd->qpd.cwsr_base)
 562			free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
 563				get_order(KFD_CWSR_TBA_TMA_SIZE));
 564
 565		kfree(pdd->qpd.doorbell_bitmap);
 566		idr_destroy(&pdd->alloc_idr);
 567
 568		/*
 569		 * before destroying pdd, make sure to report availability
 570		 * for auto suspend
 571		 */
 572		if (pdd->runtime_inuse) {
 573			pm_runtime_mark_last_busy(pdd->dev->ddev->dev);
 574			pm_runtime_put_autosuspend(pdd->dev->ddev->dev);
 575			pdd->runtime_inuse = false;
 576		}
 577
 578		kfree(pdd);
 579	}
 580}
 581
 582/* No process locking is needed in this function, because the process
 583 * is not findable any more. We must assume that no other thread is
 584 * using it any more, otherwise we couldn't safely free the process
 585 * structure in the end.
 586 */
 587static void kfd_process_wq_release(struct work_struct *work)
 588{
 589	struct kfd_process *p = container_of(work, struct kfd_process,
 590					     release_work);
 591	struct kfd_process_device *pdd;
 592
 593	/* Remove the procfs files */
 594	if (p->kobj) {
 595		sysfs_remove_file(p->kobj, &p->attr_pasid);
 596		kobject_del(p->kobj_queues);
 597		kobject_put(p->kobj_queues);
 598		p->kobj_queues = NULL;
 599
 600		list_for_each_entry(pdd, &p->per_device_data, per_device_list)
 601			sysfs_remove_file(p->kobj, &pdd->attr_vram);
 602
 603		kobject_del(p->kobj);
 604		kobject_put(p->kobj);
 605		p->kobj = NULL;
 606	}
 607
 608	kfd_iommu_unbind_process(p);
 609
 610	kfd_process_free_outstanding_kfd_bos(p);
 611
 612	kfd_process_destroy_pdds(p);
 613	dma_fence_put(p->ef);
 614
 615	kfd_event_free_process(p);
 616
 617	kfd_pasid_free(p->pasid);
 618	kfd_free_process_doorbells(p);
 619
 620	mutex_destroy(&p->mutex);
 621
 622	put_task_struct(p->lead_thread);
 623
 624	kfree(p);
 625}
 626
 627static void kfd_process_ref_release(struct kref *ref)
 628{
 629	struct kfd_process *p = container_of(ref, struct kfd_process, ref);
 630
 631	INIT_WORK(&p->release_work, kfd_process_wq_release);
 632	queue_work(kfd_process_wq, &p->release_work);
 633}
 634
 635static void kfd_process_free_notifier(struct mmu_notifier *mn)
 636{
 637	kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier));
 638}
 639
 640static void kfd_process_notifier_release(struct mmu_notifier *mn,
 641					struct mm_struct *mm)
 642{
 643	struct kfd_process *p;
 644	struct kfd_process_device *pdd = NULL;
 645
 646	/*
 647	 * The kfd_process structure can not be free because the
 648	 * mmu_notifier srcu is read locked
 649	 */
 650	p = container_of(mn, struct kfd_process, mmu_notifier);
 651	if (WARN_ON(p->mm != mm))
 652		return;
 653
 654	mutex_lock(&kfd_processes_mutex);
 655	hash_del_rcu(&p->kfd_processes);
 656	mutex_unlock(&kfd_processes_mutex);
 657	synchronize_srcu(&kfd_processes_srcu);
 658
 659	cancel_delayed_work_sync(&p->eviction_work);
 660	cancel_delayed_work_sync(&p->restore_work);
 661
 662	mutex_lock(&p->mutex);
 663
 664	/* Iterate over all process device data structures and if the
 665	 * pdd is in debug mode, we should first force unregistration,
 666	 * then we will be able to destroy the queues
 667	 */
 668	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
 669		struct kfd_dev *dev = pdd->dev;
 670
 671		mutex_lock(kfd_get_dbgmgr_mutex());
 672		if (dev && dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
 673			if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
 674				kfd_dbgmgr_destroy(dev->dbgmgr);
 675				dev->dbgmgr = NULL;
 676			}
 677		}
 678		mutex_unlock(kfd_get_dbgmgr_mutex());
 679	}
 680
 681	kfd_process_dequeue_from_all_devices(p);
 682	pqm_uninit(&p->pqm);
 683
 684	/* Indicate to other users that MM is no longer valid */
 685	p->mm = NULL;
 686	/* Signal the eviction fence after user mode queues are
 687	 * destroyed. This allows any BOs to be freed without
 688	 * triggering pointless evictions or waiting for fences.
 689	 */
 690	dma_fence_signal(p->ef);
 691
 692	mutex_unlock(&p->mutex);
 693
 694	mmu_notifier_put(&p->mmu_notifier);
 695}
 696
 697static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
 698	.release = kfd_process_notifier_release,
 699	.free_notifier = kfd_process_free_notifier,
 700};
 701
 702static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
 703{
 704	unsigned long  offset;
 705	struct kfd_process_device *pdd;
 706
 707	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
 708		struct kfd_dev *dev = pdd->dev;
 709		struct qcm_process_device *qpd = &pdd->qpd;
 710
 711		if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
 712			continue;
 713
 714		offset = KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id);
 715		qpd->tba_addr = (int64_t)vm_mmap(filep, 0,
 716			KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
 717			MAP_SHARED, offset);
 718
 719		if (IS_ERR_VALUE(qpd->tba_addr)) {
 720			int err = qpd->tba_addr;
 721
 722			pr_err("Failure to set tba address. error %d.\n", err);
 723			qpd->tba_addr = 0;
 724			qpd->cwsr_kaddr = NULL;
 725			return err;
 726		}
 727
 728		memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
 729
 730		qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
 731		pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
 732			qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
 733	}
 734
 735	return 0;
 736}
 737
 738static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
 739{
 740	struct kfd_dev *dev = pdd->dev;
 741	struct qcm_process_device *qpd = &pdd->qpd;
 742	uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT
 743			| KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE
 744			| KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
 745	void *kaddr;
 746	int ret;
 747
 748	if (!dev->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base)
 749		return 0;
 750
 751	/* cwsr_base is only set for dGPU */
 752	ret = kfd_process_alloc_gpuvm(pdd, qpd->cwsr_base,
 753				      KFD_CWSR_TBA_TMA_SIZE, flags, &kaddr);
 754	if (ret)
 755		return ret;
 756
 757	qpd->cwsr_kaddr = kaddr;
 758	qpd->tba_addr = qpd->cwsr_base;
 759
 760	memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
 761
 762	qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
 763	pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
 764		 qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
 765
 766	return 0;
 767}
 768
 769/*
 770 * On return the kfd_process is fully operational and will be freed when the
 771 * mm is released
 772 */
 773static struct kfd_process *create_process(const struct task_struct *thread)
 774{
 775	struct kfd_process *process;
 776	int err = -ENOMEM;
 777
 778	process = kzalloc(sizeof(*process), GFP_KERNEL);
 779	if (!process)
 780		goto err_alloc_process;
 781
 782	kref_init(&process->ref);
 783	mutex_init(&process->mutex);
 784	process->mm = thread->mm;
 785	process->lead_thread = thread->group_leader;
 786	INIT_LIST_HEAD(&process->per_device_data);
 787	INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
 788	INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
 789	process->last_restore_timestamp = get_jiffies_64();
 790	kfd_event_init_process(process);
 791	process->is_32bit_user_mode = in_compat_syscall();
 792
 793	process->pasid = kfd_pasid_alloc();
 794	if (process->pasid == 0)
 795		goto err_alloc_pasid;
 796
 797	if (kfd_alloc_process_doorbells(process) < 0)
 798		goto err_alloc_doorbells;
 799
 800	err = pqm_init(&process->pqm, process);
 801	if (err != 0)
 802		goto err_process_pqm_init;
 803
 804	/* init process apertures*/
 805	err = kfd_init_apertures(process);
 806	if (err != 0)
 807		goto err_init_apertures;
 808
 809	/* Must be last, have to use release destruction after this */
 810	process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops;
 811	err = mmu_notifier_register(&process->mmu_notifier, process->mm);
 812	if (err)
 813		goto err_register_notifier;
 814
 815	get_task_struct(process->lead_thread);
 816	hash_add_rcu(kfd_processes_table, &process->kfd_processes,
 817			(uintptr_t)process->mm);
 818
 819	return process;
 820
 821err_register_notifier:
 822	kfd_process_free_outstanding_kfd_bos(process);
 823	kfd_process_destroy_pdds(process);
 824err_init_apertures:
 825	pqm_uninit(&process->pqm);
 826err_process_pqm_init:
 827	kfd_free_process_doorbells(process);
 828err_alloc_doorbells:
 829	kfd_pasid_free(process->pasid);
 830err_alloc_pasid:
 831	mutex_destroy(&process->mutex);
 832	kfree(process);
 833err_alloc_process:
 834	return ERR_PTR(err);
 835}
 836
 837static int init_doorbell_bitmap(struct qcm_process_device *qpd,
 838			struct kfd_dev *dev)
 839{
 840	unsigned int i;
 841	int range_start = dev->shared_resources.non_cp_doorbells_start;
 842	int range_end = dev->shared_resources.non_cp_doorbells_end;
 843
 844	if (!KFD_IS_SOC15(dev->device_info->asic_family))
 845		return 0;
 846
 847	qpd->doorbell_bitmap =
 848		kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
 849				     BITS_PER_BYTE), GFP_KERNEL);
 850	if (!qpd->doorbell_bitmap)
 851		return -ENOMEM;
 852
 853	/* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */
 854	pr_debug("reserved doorbell 0x%03x - 0x%03x\n", range_start, range_end);
 855	pr_debug("reserved doorbell 0x%03x - 0x%03x\n",
 856			range_start + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
 857			range_end + KFD_QUEUE_DOORBELL_MIRROR_OFFSET);
 858
 859	for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) {
 860		if (i >= range_start && i <= range_end) {
 861			set_bit(i, qpd->doorbell_bitmap);
 862			set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
 863				qpd->doorbell_bitmap);
 864		}
 865	}
 866
 867	return 0;
 868}
 869
 870struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
 871							struct kfd_process *p)
 872{
 873	struct kfd_process_device *pdd = NULL;
 874
 875	list_for_each_entry(pdd, &p->per_device_data, per_device_list)
 876		if (pdd->dev == dev)
 877			return pdd;
 878
 879	return NULL;
 880}
 881
 882struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
 883							struct kfd_process *p)
 884{
 885	struct kfd_process_device *pdd = NULL;
 886
 887	pdd = kzalloc(sizeof(*pdd), GFP_KERNEL);
 888	if (!pdd)
 889		return NULL;
 890
 891	if (init_doorbell_bitmap(&pdd->qpd, dev)) {
 892		pr_err("Failed to init doorbell for process\n");
 893		kfree(pdd);
 894		return NULL;
 895	}
 896
 897	pdd->dev = dev;
 898	INIT_LIST_HEAD(&pdd->qpd.queues_list);
 899	INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
 900	pdd->qpd.dqm = dev->dqm;
 901	pdd->qpd.pqm = &p->pqm;
 902	pdd->qpd.evicted = 0;
 903	pdd->qpd.mapped_gws_queue = false;
 904	pdd->process = p;
 905	pdd->bound = PDD_UNBOUND;
 906	pdd->already_dequeued = false;
 907	pdd->runtime_inuse = false;
 908	pdd->vram_usage = 0;
 909	list_add(&pdd->per_device_list, &p->per_device_data);
 910
 911	/* Init idr used for memory handle translation */
 912	idr_init(&pdd->alloc_idr);
 913
 914	return pdd;
 915}
 916
 917/**
 918 * kfd_process_device_init_vm - Initialize a VM for a process-device
 919 *
 920 * @pdd: The process-device
 921 * @drm_file: Optional pointer to a DRM file descriptor
 922 *
 923 * If @drm_file is specified, it will be used to acquire the VM from
 924 * that file descriptor. If successful, the @pdd takes ownership of
 925 * the file descriptor.
 926 *
 927 * If @drm_file is NULL, a new VM is created.
 928 *
 929 * Returns 0 on success, -errno on failure.
 930 */
 931int kfd_process_device_init_vm(struct kfd_process_device *pdd,
 932			       struct file *drm_file)
 933{
 934	struct kfd_process *p;
 935	struct kfd_dev *dev;
 936	int ret;
 937
 938	if (pdd->vm)
 939		return drm_file ? -EBUSY : 0;
 940
 941	p = pdd->process;
 942	dev = pdd->dev;
 943
 944	if (drm_file)
 945		ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(
 946			dev->kgd, drm_file, p->pasid,
 947			&pdd->vm, &p->kgd_process_info, &p->ef);
 948	else
 949		ret = amdgpu_amdkfd_gpuvm_create_process_vm(dev->kgd, p->pasid,
 950			&pdd->vm, &p->kgd_process_info, &p->ef);
 951	if (ret) {
 952		pr_err("Failed to create process VM object\n");
 953		return ret;
 954	}
 955
 956	amdgpu_vm_set_task_info(pdd->vm);
 957
 958	ret = kfd_process_device_reserve_ib_mem(pdd);
 959	if (ret)
 960		goto err_reserve_ib_mem;
 961	ret = kfd_process_device_init_cwsr_dgpu(pdd);
 962	if (ret)
 963		goto err_init_cwsr;
 964
 965	pdd->drm_file = drm_file;
 966
 967	return 0;
 968
 969err_init_cwsr:
 970err_reserve_ib_mem:
 971	kfd_process_device_free_bos(pdd);
 972	if (!drm_file)
 973		amdgpu_amdkfd_gpuvm_destroy_process_vm(dev->kgd, pdd->vm);
 974	pdd->vm = NULL;
 975
 976	return ret;
 977}
 978
 979/*
 980 * Direct the IOMMU to bind the process (specifically the pasid->mm)
 981 * to the device.
 982 * Unbinding occurs when the process dies or the device is removed.
 983 *
 984 * Assumes that the process lock is held.
 985 */
 986struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
 987							struct kfd_process *p)
 988{
 989	struct kfd_process_device *pdd;
 990	int err;
 991
 992	pdd = kfd_get_process_device_data(dev, p);
 993	if (!pdd) {
 994		pr_err("Process device data doesn't exist\n");
 995		return ERR_PTR(-ENOMEM);
 996	}
 997
 998	/*
 999	 * signal runtime-pm system to auto resume and prevent
1000	 * further runtime suspend once device pdd is created until
1001	 * pdd is destroyed.
1002	 */
1003	if (!pdd->runtime_inuse) {
1004		err = pm_runtime_get_sync(dev->ddev->dev);
1005		if (err < 0)
1006			return ERR_PTR(err);
1007	}
1008
1009	err = kfd_iommu_bind_process_to_device(pdd);
1010	if (err)
1011		goto out;
1012
1013	err = kfd_process_device_init_vm(pdd, NULL);
1014	if (err)
1015		goto out;
1016
1017	/*
1018	 * make sure that runtime_usage counter is incremented just once
1019	 * per pdd
1020	 */
1021	pdd->runtime_inuse = true;
1022
1023	return pdd;
1024
1025out:
1026	/* balance runpm reference count and exit with error */
1027	if (!pdd->runtime_inuse) {
1028		pm_runtime_mark_last_busy(dev->ddev->dev);
1029		pm_runtime_put_autosuspend(dev->ddev->dev);
1030	}
1031
1032	return ERR_PTR(err);
1033}
1034
1035struct kfd_process_device *kfd_get_first_process_device_data(
1036						struct kfd_process *p)
1037{
1038	return list_first_entry(&p->per_device_data,
1039				struct kfd_process_device,
1040				per_device_list);
1041}
1042
1043struct kfd_process_device *kfd_get_next_process_device_data(
1044						struct kfd_process *p,
1045						struct kfd_process_device *pdd)
1046{
1047	if (list_is_last(&pdd->per_device_list, &p->per_device_data))
1048		return NULL;
1049	return list_next_entry(pdd, per_device_list);
1050}
1051
1052bool kfd_has_process_device_data(struct kfd_process *p)
1053{
1054	return !(list_empty(&p->per_device_data));
1055}
1056
1057/* Create specific handle mapped to mem from process local memory idr
1058 * Assumes that the process lock is held.
1059 */
1060int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd,
1061					void *mem)
1062{
1063	return idr_alloc(&pdd->alloc_idr, mem, 0, 0, GFP_KERNEL);
1064}
1065
1066/* Translate specific handle from process local memory idr
1067 * Assumes that the process lock is held.
1068 */
1069void *kfd_process_device_translate_handle(struct kfd_process_device *pdd,
1070					int handle)
1071{
1072	if (handle < 0)
1073		return NULL;
1074
1075	return idr_find(&pdd->alloc_idr, handle);
1076}
1077
1078/* Remove specific handle from process local memory idr
1079 * Assumes that the process lock is held.
1080 */
1081void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
1082					int handle)
1083{
1084	if (handle >= 0)
1085		idr_remove(&pdd->alloc_idr, handle);
1086}
1087
1088/* This increments the process->ref counter. */
1089struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid)
1090{
1091	struct kfd_process *p, *ret_p = NULL;
1092	unsigned int temp;
1093
1094	int idx = srcu_read_lock(&kfd_processes_srcu);
1095
1096	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1097		if (p->pasid == pasid) {
1098			kref_get(&p->ref);
1099			ret_p = p;
1100			break;
1101		}
1102	}
1103
1104	srcu_read_unlock(&kfd_processes_srcu, idx);
1105
1106	return ret_p;
1107}
1108
1109/* This increments the process->ref counter. */
1110struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm)
1111{
1112	struct kfd_process *p;
1113
1114	int idx = srcu_read_lock(&kfd_processes_srcu);
1115
1116	p = find_process_by_mm(mm);
1117	if (p)
1118		kref_get(&p->ref);
1119
1120	srcu_read_unlock(&kfd_processes_srcu, idx);
1121
1122	return p;
1123}
1124
1125/* kfd_process_evict_queues - Evict all user queues of a process
1126 *
1127 * Eviction is reference-counted per process-device. This means multiple
1128 * evictions from different sources can be nested safely.
1129 */
1130int kfd_process_evict_queues(struct kfd_process *p)
1131{
1132	struct kfd_process_device *pdd;
1133	int r = 0;
1134	unsigned int n_evicted = 0;
1135
1136	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
1137		r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm,
1138							    &pdd->qpd);
1139		if (r) {
1140			pr_err("Failed to evict process queues\n");
1141			goto fail;
1142		}
1143		n_evicted++;
1144	}
1145
1146	return r;
1147
1148fail:
1149	/* To keep state consistent, roll back partial eviction by
1150	 * restoring queues
1151	 */
1152	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
1153		if (n_evicted == 0)
1154			break;
1155		if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
1156							      &pdd->qpd))
1157			pr_err("Failed to restore queues\n");
1158
1159		n_evicted--;
1160	}
1161
1162	return r;
1163}
1164
1165/* kfd_process_restore_queues - Restore all user queues of a process */
1166int kfd_process_restore_queues(struct kfd_process *p)
1167{
1168	struct kfd_process_device *pdd;
1169	int r, ret = 0;
1170
1171	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
1172		r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
1173							      &pdd->qpd);
1174		if (r) {
1175			pr_err("Failed to restore process queues\n");
1176			if (!ret)
1177				ret = r;
1178		}
1179	}
1180
1181	return ret;
1182}
1183
1184static void evict_process_worker(struct work_struct *work)
1185{
1186	int ret;
1187	struct kfd_process *p;
1188	struct delayed_work *dwork;
1189
1190	dwork = to_delayed_work(work);
1191
1192	/* Process termination destroys this worker thread. So during the
1193	 * lifetime of this thread, kfd_process p will be valid
1194	 */
1195	p = container_of(dwork, struct kfd_process, eviction_work);
1196	WARN_ONCE(p->last_eviction_seqno != p->ef->seqno,
1197		  "Eviction fence mismatch\n");
1198
1199	/* Narrow window of overlap between restore and evict work
1200	 * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos
1201	 * unreserves KFD BOs, it is possible to evicted again. But
1202	 * restore has few more steps of finish. So lets wait for any
1203	 * previous restore work to complete
1204	 */
1205	flush_delayed_work(&p->restore_work);
1206
1207	pr_debug("Started evicting pasid 0x%x\n", p->pasid);
1208	ret = kfd_process_evict_queues(p);
1209	if (!ret) {
1210		dma_fence_signal(p->ef);
1211		dma_fence_put(p->ef);
1212		p->ef = NULL;
1213		queue_delayed_work(kfd_restore_wq, &p->restore_work,
1214				msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
1215
1216		pr_debug("Finished evicting pasid 0x%x\n", p->pasid);
1217	} else
1218		pr_err("Failed to evict queues of pasid 0x%x\n", p->pasid);
1219}
1220
1221static void restore_process_worker(struct work_struct *work)
1222{
1223	struct delayed_work *dwork;
1224	struct kfd_process *p;
1225	int ret = 0;
1226
1227	dwork = to_delayed_work(work);
1228
1229	/* Process termination destroys this worker thread. So during the
1230	 * lifetime of this thread, kfd_process p will be valid
1231	 */
1232	p = container_of(dwork, struct kfd_process, restore_work);
1233	pr_debug("Started restoring pasid 0x%x\n", p->pasid);
1234
1235	/* Setting last_restore_timestamp before successful restoration.
1236	 * Otherwise this would have to be set by KGD (restore_process_bos)
1237	 * before KFD BOs are unreserved. If not, the process can be evicted
1238	 * again before the timestamp is set.
1239	 * If restore fails, the timestamp will be set again in the next
1240	 * attempt. This would mean that the minimum GPU quanta would be
1241	 * PROCESS_ACTIVE_TIME_MS - (time to execute the following two
1242	 * functions)
1243	 */
1244
1245	p->last_restore_timestamp = get_jiffies_64();
1246	ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p->kgd_process_info,
1247						     &p->ef);
1248	if (ret) {
1249		pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n",
1250			 p->pasid, PROCESS_BACK_OFF_TIME_MS);
1251		ret = queue_delayed_work(kfd_restore_wq, &p->restore_work,
1252				msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));
1253		WARN(!ret, "reschedule restore work failed\n");
1254		return;
1255	}
1256
1257	ret = kfd_process_restore_queues(p);
1258	if (!ret)
1259		pr_debug("Finished restoring pasid 0x%x\n", p->pasid);
1260	else
1261		pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid);
1262}
1263
1264void kfd_suspend_all_processes(void)
1265{
1266	struct kfd_process *p;
1267	unsigned int temp;
1268	int idx = srcu_read_lock(&kfd_processes_srcu);
1269
1270	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1271		cancel_delayed_work_sync(&p->eviction_work);
1272		cancel_delayed_work_sync(&p->restore_work);
1273
1274		if (kfd_process_evict_queues(p))
1275			pr_err("Failed to suspend process 0x%x\n", p->pasid);
1276		dma_fence_signal(p->ef);
1277		dma_fence_put(p->ef);
1278		p->ef = NULL;
1279	}
1280	srcu_read_unlock(&kfd_processes_srcu, idx);
1281}
1282
1283int kfd_resume_all_processes(void)
1284{
1285	struct kfd_process *p;
1286	unsigned int temp;
1287	int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu);
1288
1289	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1290		if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) {
1291			pr_err("Restore process %d failed during resume\n",
1292			       p->pasid);
1293			ret = -EFAULT;
1294		}
1295	}
1296	srcu_read_unlock(&kfd_processes_srcu, idx);
1297	return ret;
1298}
1299
1300int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
1301			  struct vm_area_struct *vma)
1302{
1303	struct kfd_process_device *pdd;
1304	struct qcm_process_device *qpd;
1305
1306	if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) {
1307		pr_err("Incorrect CWSR mapping size.\n");
1308		return -EINVAL;
1309	}
1310
1311	pdd = kfd_get_process_device_data(dev, process);
1312	if (!pdd)
1313		return -EINVAL;
1314	qpd = &pdd->qpd;
1315
1316	qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
1317					get_order(KFD_CWSR_TBA_TMA_SIZE));
1318	if (!qpd->cwsr_kaddr) {
1319		pr_err("Error allocating per process CWSR buffer.\n");
1320		return -ENOMEM;
1321	}
1322
1323	vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND
1324		| VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP;
1325	/* Mapping pages to user process */
1326	return remap_pfn_range(vma, vma->vm_start,
1327			       PFN_DOWN(__pa(qpd->cwsr_kaddr)),
1328			       KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
1329}
1330
1331void kfd_flush_tlb(struct kfd_process_device *pdd)
1332{
1333	struct kfd_dev *dev = pdd->dev;
1334
1335	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
1336		/* Nothing to flush until a VMID is assigned, which
1337		 * only happens when the first queue is created.
1338		 */
1339		if (pdd->qpd.vmid)
1340			amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd,
1341							pdd->qpd.vmid);
1342	} else {
1343		amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
1344						pdd->process->pasid);
1345	}
1346}
1347
1348#if defined(CONFIG_DEBUG_FS)
1349
1350int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
1351{
1352	struct kfd_process *p;
1353	unsigned int temp;
1354	int r = 0;
1355
1356	int idx = srcu_read_lock(&kfd_processes_srcu);
1357
1358	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1359		seq_printf(m, "Process %d PASID 0x%x:\n",
1360			   p->lead_thread->tgid, p->pasid);
1361
1362		mutex_lock(&p->mutex);
1363		r = pqm_debugfs_mqds(m, &p->pqm);
1364		mutex_unlock(&p->mutex);
1365
1366		if (r)
1367			break;
1368	}
1369
1370	srcu_read_unlock(&kfd_processes_srcu, idx);
1371
1372	return r;
1373}
1374
1375#endif
1376
Configure Feed

Configure Feed