drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c at v4.15-rc6 · tjh.dev/kernel

tjh.dev / kernel
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
kernel / drivers / gpu / drm / amd / amdgpu / amdgpu_cs.c
at v4.15-rc6 1600 lines 40 kB view raw
   1/*
   2 * Copyright 2008 Jerome Glisse.
   3 * All Rights Reserved.
   4 *
   5 * Permission is hereby granted, free of charge, to any person obtaining a
   6 * copy of this software and associated documentation files (the "Software"),
   7 * to deal in the Software without restriction, including without limitation
   8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9 * and/or sell copies of the Software, and to permit persons to whom the
  10 * Software is furnished to do so, subject to the following conditions:
  11 *
  12 * The above copyright notice and this permission notice (including the next
  13 * paragraph) shall be included in all copies or substantial portions of the
  14 * Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22 * DEALINGS IN THE SOFTWARE.
  23 *
  24 * Authors:
  25 *    Jerome Glisse <glisse@freedesktop.org>
  26 */
  27#include <linux/pagemap.h>
  28#include <linux/sync_file.h>
  29#include <drm/drmP.h>
  30#include <drm/amdgpu_drm.h>
  31#include <drm/drm_syncobj.h>
  32#include "amdgpu.h"
  33#include "amdgpu_trace.h"
  34
  35static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
  36				      struct drm_amdgpu_cs_chunk_fence *data,
  37				      uint32_t *offset)
  38{
  39	struct drm_gem_object *gobj;
  40	unsigned long size;
  41
  42	gobj = drm_gem_object_lookup(p->filp, data->handle);
  43	if (gobj == NULL)
  44		return -EINVAL;
  45
  46	p->uf_entry.robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
  47	p->uf_entry.priority = 0;
  48	p->uf_entry.tv.bo = &p->uf_entry.robj->tbo;
  49	p->uf_entry.tv.shared = true;
  50	p->uf_entry.user_pages = NULL;
  51
  52	size = amdgpu_bo_size(p->uf_entry.robj);
  53	if (size != PAGE_SIZE || (data->offset + 8) > size)
  54		return -EINVAL;
  55
  56	*offset = data->offset;
  57
  58	drm_gem_object_put_unlocked(gobj);
  59
  60	if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) {
  61		amdgpu_bo_unref(&p->uf_entry.robj);
  62		return -EINVAL;
  63	}
  64
  65	return 0;
  66}
  67
  68static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
  69{
  70	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
  71	struct amdgpu_vm *vm = &fpriv->vm;
  72	union drm_amdgpu_cs *cs = data;
  73	uint64_t *chunk_array_user;
  74	uint64_t *chunk_array;
  75	unsigned size, num_ibs = 0;
  76	uint32_t uf_offset = 0;
  77	int i;
  78	int ret;
  79
  80	if (cs->in.num_chunks == 0)
  81		return 0;
  82
  83	chunk_array = kmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
  84	if (!chunk_array)
  85		return -ENOMEM;
  86
  87	p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);
  88	if (!p->ctx) {
  89		ret = -EINVAL;
  90		goto free_chunk;
  91	}
  92
  93	mutex_lock(&p->ctx->lock);
  94
  95	/* get chunks */
  96	chunk_array_user = u64_to_user_ptr(cs->in.chunks);
  97	if (copy_from_user(chunk_array, chunk_array_user,
  98			   sizeof(uint64_t)*cs->in.num_chunks)) {
  99		ret = -EFAULT;
 100		goto free_chunk;
 101	}
 102
 103	p->nchunks = cs->in.num_chunks;
 104	p->chunks = kmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk),
 105			    GFP_KERNEL);
 106	if (!p->chunks) {
 107		ret = -ENOMEM;
 108		goto free_chunk;
 109	}
 110
 111	for (i = 0; i < p->nchunks; i++) {
 112		struct drm_amdgpu_cs_chunk __user **chunk_ptr = NULL;
 113		struct drm_amdgpu_cs_chunk user_chunk;
 114		uint32_t __user *cdata;
 115
 116		chunk_ptr = u64_to_user_ptr(chunk_array[i]);
 117		if (copy_from_user(&user_chunk, chunk_ptr,
 118				       sizeof(struct drm_amdgpu_cs_chunk))) {
 119			ret = -EFAULT;
 120			i--;
 121			goto free_partial_kdata;
 122		}
 123		p->chunks[i].chunk_id = user_chunk.chunk_id;
 124		p->chunks[i].length_dw = user_chunk.length_dw;
 125
 126		size = p->chunks[i].length_dw;
 127		cdata = u64_to_user_ptr(user_chunk.chunk_data);
 128
 129		p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
 130		if (p->chunks[i].kdata == NULL) {
 131			ret = -ENOMEM;
 132			i--;
 133			goto free_partial_kdata;
 134		}
 135		size *= sizeof(uint32_t);
 136		if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
 137			ret = -EFAULT;
 138			goto free_partial_kdata;
 139		}
 140
 141		switch (p->chunks[i].chunk_id) {
 142		case AMDGPU_CHUNK_ID_IB:
 143			++num_ibs;
 144			break;
 145
 146		case AMDGPU_CHUNK_ID_FENCE:
 147			size = sizeof(struct drm_amdgpu_cs_chunk_fence);
 148			if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
 149				ret = -EINVAL;
 150				goto free_partial_kdata;
 151			}
 152
 153			ret = amdgpu_cs_user_fence_chunk(p, p->chunks[i].kdata,
 154							 &uf_offset);
 155			if (ret)
 156				goto free_partial_kdata;
 157
 158			break;
 159
 160		case AMDGPU_CHUNK_ID_DEPENDENCIES:
 161		case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
 162		case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
 163			break;
 164
 165		default:
 166			ret = -EINVAL;
 167			goto free_partial_kdata;
 168		}
 169	}
 170
 171	ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job, vm);
 172	if (ret)
 173		goto free_all_kdata;
 174
 175	if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) {
 176		ret = -ECANCELED;
 177		goto free_all_kdata;
 178	}
 179
 180	if (p->uf_entry.robj)
 181		p->job->uf_addr = uf_offset;
 182	kfree(chunk_array);
 183	return 0;
 184
 185free_all_kdata:
 186	i = p->nchunks - 1;
 187free_partial_kdata:
 188	for (; i >= 0; i--)
 189		kvfree(p->chunks[i].kdata);
 190	kfree(p->chunks);
 191	p->chunks = NULL;
 192	p->nchunks = 0;
 193free_chunk:
 194	kfree(chunk_array);
 195
 196	return ret;
 197}
 198
 199/* Convert microseconds to bytes. */
 200static u64 us_to_bytes(struct amdgpu_device *adev, s64 us)
 201{
 202	if (us <= 0 || !adev->mm_stats.log2_max_MBps)
 203		return 0;
 204
 205	/* Since accum_us is incremented by a million per second, just
 206	 * multiply it by the number of MB/s to get the number of bytes.
 207	 */
 208	return us << adev->mm_stats.log2_max_MBps;
 209}
 210
 211static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes)
 212{
 213	if (!adev->mm_stats.log2_max_MBps)
 214		return 0;
 215
 216	return bytes >> adev->mm_stats.log2_max_MBps;
 217}
 218
 219/* Returns how many bytes TTM can move right now. If no bytes can be moved,
 220 * it returns 0. If it returns non-zero, it's OK to move at least one buffer,
 221 * which means it can go over the threshold once. If that happens, the driver
 222 * will be in debt and no other buffer migrations can be done until that debt
 223 * is repaid.
 224 *
 225 * This approach allows moving a buffer of any size (it's important to allow
 226 * that).
 227 *
 228 * The currency is simply time in microseconds and it increases as the clock
 229 * ticks. The accumulated microseconds (us) are converted to bytes and
 230 * returned.
 231 */
 232static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
 233					      u64 *max_bytes,
 234					      u64 *max_vis_bytes)
 235{
 236	s64 time_us, increment_us;
 237	u64 free_vram, total_vram, used_vram;
 238
 239	/* Allow a maximum of 200 accumulated ms. This is basically per-IB
 240	 * throttling.
 241	 *
 242	 * It means that in order to get full max MBps, at least 5 IBs per
 243	 * second must be submitted and not more than 200ms apart from each
 244	 * other.
 245	 */
 246	const s64 us_upper_bound = 200000;
 247
 248	if (!adev->mm_stats.log2_max_MBps) {
 249		*max_bytes = 0;
 250		*max_vis_bytes = 0;
 251		return;
 252	}
 253
 254	total_vram = adev->mc.real_vram_size - adev->vram_pin_size;
 255	used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
 256	free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
 257
 258	spin_lock(&adev->mm_stats.lock);
 259
 260	/* Increase the amount of accumulated us. */
 261	time_us = ktime_to_us(ktime_get());
 262	increment_us = time_us - adev->mm_stats.last_update_us;
 263	adev->mm_stats.last_update_us = time_us;
 264	adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us,
 265                                      us_upper_bound);
 266
 267	/* This prevents the short period of low performance when the VRAM
 268	 * usage is low and the driver is in debt or doesn't have enough
 269	 * accumulated us to fill VRAM quickly.
 270	 *
 271	 * The situation can occur in these cases:
 272	 * - a lot of VRAM is freed by userspace
 273	 * - the presence of a big buffer causes a lot of evictions
 274	 *   (solution: split buffers into smaller ones)
 275	 *
 276	 * If 128 MB or 1/8th of VRAM is free, start filling it now by setting
 277	 * accum_us to a positive number.
 278	 */
 279	if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) {
 280		s64 min_us;
 281
 282		/* Be more aggresive on dGPUs. Try to fill a portion of free
 283		 * VRAM now.
 284		 */
 285		if (!(adev->flags & AMD_IS_APU))
 286			min_us = bytes_to_us(adev, free_vram / 4);
 287		else
 288			min_us = 0; /* Reset accum_us on APUs. */
 289
 290		adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);
 291	}
 292
 293	/* This is set to 0 if the driver is in debt to disallow (optional)
 294	 * buffer moves.
 295	 */
 296	*max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
 297
 298	/* Do the same for visible VRAM if half of it is free */
 299	if (adev->mc.visible_vram_size < adev->mc.real_vram_size) {
 300		u64 total_vis_vram = adev->mc.visible_vram_size;
 301		u64 used_vis_vram =
 302			amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
 303
 304		if (used_vis_vram < total_vis_vram) {
 305			u64 free_vis_vram = total_vis_vram - used_vis_vram;
 306			adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis +
 307							  increment_us, us_upper_bound);
 308
 309			if (free_vis_vram >= total_vis_vram / 2)
 310				adev->mm_stats.accum_us_vis =
 311					max(bytes_to_us(adev, free_vis_vram / 2),
 312					    adev->mm_stats.accum_us_vis);
 313		}
 314
 315		*max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis);
 316	} else {
 317		*max_vis_bytes = 0;
 318	}
 319
 320	spin_unlock(&adev->mm_stats.lock);
 321}
 322
 323/* Report how many bytes have really been moved for the last command
 324 * submission. This can result in a debt that can stop buffer migrations
 325 * temporarily.
 326 */
 327void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
 328				  u64 num_vis_bytes)
 329{
 330	spin_lock(&adev->mm_stats.lock);
 331	adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
 332	adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes);
 333	spin_unlock(&adev->mm_stats.lock);
 334}
 335
 336static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
 337				 struct amdgpu_bo *bo)
 338{
 339	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 340	u64 initial_bytes_moved, bytes_moved;
 341	uint32_t domain;
 342	int r;
 343
 344	if (bo->pin_count)
 345		return 0;
 346
 347	/* Don't move this buffer if we have depleted our allowance
 348	 * to move it. Don't move anything if the threshold is zero.
 349	 */
 350	if (p->bytes_moved < p->bytes_moved_threshold) {
 351		if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
 352		    (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
 353			/* And don't move a CPU_ACCESS_REQUIRED BO to limited
 354			 * visible VRAM if we've depleted our allowance to do
 355			 * that.
 356			 */
 357			if (p->bytes_moved_vis < p->bytes_moved_vis_threshold)
 358				domain = bo->preferred_domains;
 359			else
 360				domain = bo->allowed_domains;
 361		} else {
 362			domain = bo->preferred_domains;
 363		}
 364	} else {
 365		domain = bo->allowed_domains;
 366	}
 367
 368retry:
 369	amdgpu_ttm_placement_from_domain(bo, domain);
 370	initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
 371	r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
 372	bytes_moved = atomic64_read(&adev->num_bytes_moved) -
 373		      initial_bytes_moved;
 374	p->bytes_moved += bytes_moved;
 375	if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
 376	    bo->tbo.mem.mem_type == TTM_PL_VRAM &&
 377	    bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT)
 378		p->bytes_moved_vis += bytes_moved;
 379
 380	if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
 381		domain = bo->allowed_domains;
 382		goto retry;
 383	}
 384
 385	return r;
 386}
 387
 388/* Last resort, try to evict something from the current working set */
 389static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
 390				struct amdgpu_bo *validated)
 391{
 392	uint32_t domain = validated->allowed_domains;
 393	int r;
 394
 395	if (!p->evictable)
 396		return false;
 397
 398	for (;&p->evictable->tv.head != &p->validated;
 399	     p->evictable = list_prev_entry(p->evictable, tv.head)) {
 400
 401		struct amdgpu_bo_list_entry *candidate = p->evictable;
 402		struct amdgpu_bo *bo = candidate->robj;
 403		struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 404		u64 initial_bytes_moved, bytes_moved;
 405		bool update_bytes_moved_vis;
 406		uint32_t other;
 407
 408		/* If we reached our current BO we can forget it */
 409		if (candidate->robj == validated)
 410			break;
 411
 412		/* We can't move pinned BOs here */
 413		if (bo->pin_count)
 414			continue;
 415
 416		other = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
 417
 418		/* Check if this BO is in one of the domains we need space for */
 419		if (!(other & domain))
 420			continue;
 421
 422		/* Check if we can move this BO somewhere else */
 423		other = bo->allowed_domains & ~domain;
 424		if (!other)
 425			continue;
 426
 427		/* Good we can try to move this BO somewhere else */
 428		amdgpu_ttm_placement_from_domain(bo, other);
 429		update_bytes_moved_vis =
 430			adev->mc.visible_vram_size < adev->mc.real_vram_size &&
 431			bo->tbo.mem.mem_type == TTM_PL_VRAM &&
 432			bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT;
 433		initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
 434		r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
 435		bytes_moved = atomic64_read(&adev->num_bytes_moved) -
 436			initial_bytes_moved;
 437		p->bytes_moved += bytes_moved;
 438		if (update_bytes_moved_vis)
 439			p->bytes_moved_vis += bytes_moved;
 440
 441		if (unlikely(r))
 442			break;
 443
 444		p->evictable = list_prev_entry(p->evictable, tv.head);
 445		list_move(&candidate->tv.head, &p->validated);
 446
 447		return true;
 448	}
 449
 450	return false;
 451}
 452
 453static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo)
 454{
 455	struct amdgpu_cs_parser *p = param;
 456	int r;
 457
 458	do {
 459		r = amdgpu_cs_bo_validate(p, bo);
 460	} while (r == -ENOMEM && amdgpu_cs_try_evict(p, bo));
 461	if (r)
 462		return r;
 463
 464	if (bo->shadow)
 465		r = amdgpu_cs_bo_validate(p, bo->shadow);
 466
 467	return r;
 468}
 469
 470static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
 471			    struct list_head *validated)
 472{
 473	struct amdgpu_bo_list_entry *lobj;
 474	int r;
 475
 476	list_for_each_entry(lobj, validated, tv.head) {
 477		struct amdgpu_bo *bo = lobj->robj;
 478		bool binding_userptr = false;
 479		struct mm_struct *usermm;
 480
 481		usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm);
 482		if (usermm && usermm != current->mm)
 483			return -EPERM;
 484
 485		/* Check if we have user pages and nobody bound the BO already */
 486		if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
 487		    lobj->user_pages) {
 488			amdgpu_ttm_placement_from_domain(bo,
 489							 AMDGPU_GEM_DOMAIN_CPU);
 490			r = ttm_bo_validate(&bo->tbo, &bo->placement, true,
 491					    false);
 492			if (r)
 493				return r;
 494			amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
 495						     lobj->user_pages);
 496			binding_userptr = true;
 497		}
 498
 499		if (p->evictable == lobj)
 500			p->evictable = NULL;
 501
 502		r = amdgpu_cs_validate(p, bo);
 503		if (r)
 504			return r;
 505
 506		if (binding_userptr) {
 507			kvfree(lobj->user_pages);
 508			lobj->user_pages = NULL;
 509		}
 510	}
 511	return 0;
 512}
 513
 514static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 515				union drm_amdgpu_cs *cs)
 516{
 517	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 518	struct amdgpu_bo_list_entry *e;
 519	struct list_head duplicates;
 520	unsigned i, tries = 10;
 521	int r;
 522
 523	INIT_LIST_HEAD(&p->validated);
 524
 525	p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
 526	if (p->bo_list) {
 527		amdgpu_bo_list_get_list(p->bo_list, &p->validated);
 528		if (p->bo_list->first_userptr != p->bo_list->num_entries)
 529			p->mn = amdgpu_mn_get(p->adev);
 530	}
 531
 532	INIT_LIST_HEAD(&duplicates);
 533	amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
 534
 535	if (p->uf_entry.robj)
 536		list_add(&p->uf_entry.tv.head, &p->validated);
 537
 538	while (1) {
 539		struct list_head need_pages;
 540		unsigned i;
 541
 542		r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
 543					   &duplicates);
 544		if (unlikely(r != 0)) {
 545			if (r != -ERESTARTSYS)
 546				DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
 547			goto error_free_pages;
 548		}
 549
 550		/* Without a BO list we don't have userptr BOs */
 551		if (!p->bo_list)
 552			break;
 553
 554		INIT_LIST_HEAD(&need_pages);
 555		for (i = p->bo_list->first_userptr;
 556		     i < p->bo_list->num_entries; ++i) {
 557			struct amdgpu_bo *bo;
 558
 559			e = &p->bo_list->array[i];
 560			bo = e->robj;
 561
 562			if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm,
 563				 &e->user_invalidated) && e->user_pages) {
 564
 565				/* We acquired a page array, but somebody
 566				 * invalidated it. Free it and try again
 567				 */
 568				release_pages(e->user_pages,
 569					      bo->tbo.ttm->num_pages);
 570				kvfree(e->user_pages);
 571				e->user_pages = NULL;
 572			}
 573
 574			if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
 575			    !e->user_pages) {
 576				list_del(&e->tv.head);
 577				list_add(&e->tv.head, &need_pages);
 578
 579				amdgpu_bo_unreserve(e->robj);
 580			}
 581		}
 582
 583		if (list_empty(&need_pages))
 584			break;
 585
 586		/* Unreserve everything again. */
 587		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
 588
 589		/* We tried too many times, just abort */
 590		if (!--tries) {
 591			r = -EDEADLK;
 592			DRM_ERROR("deadlock in %s\n", __func__);
 593			goto error_free_pages;
 594		}
 595
 596		/* Fill the page arrays for all userptrs. */
 597		list_for_each_entry(e, &need_pages, tv.head) {
 598			struct ttm_tt *ttm = e->robj->tbo.ttm;
 599
 600			e->user_pages = kvmalloc_array(ttm->num_pages,
 601							 sizeof(struct page*),
 602							 GFP_KERNEL | __GFP_ZERO);
 603			if (!e->user_pages) {
 604				r = -ENOMEM;
 605				DRM_ERROR("calloc failure in %s\n", __func__);
 606				goto error_free_pages;
 607			}
 608
 609			r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages);
 610			if (r) {
 611				DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n");
 612				kvfree(e->user_pages);
 613				e->user_pages = NULL;
 614				goto error_free_pages;
 615			}
 616		}
 617
 618		/* And try again. */
 619		list_splice(&need_pages, &p->validated);
 620	}
 621
 622	amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
 623					  &p->bytes_moved_vis_threshold);
 624	p->bytes_moved = 0;
 625	p->bytes_moved_vis = 0;
 626	p->evictable = list_last_entry(&p->validated,
 627				       struct amdgpu_bo_list_entry,
 628				       tv.head);
 629
 630	r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm,
 631				      amdgpu_cs_validate, p);
 632	if (r) {
 633		DRM_ERROR("amdgpu_vm_validate_pt_bos() failed.\n");
 634		goto error_validate;
 635	}
 636
 637	r = amdgpu_cs_list_validate(p, &duplicates);
 638	if (r) {
 639		DRM_ERROR("amdgpu_cs_list_validate(duplicates) failed.\n");
 640		goto error_validate;
 641	}
 642
 643	r = amdgpu_cs_list_validate(p, &p->validated);
 644	if (r) {
 645		DRM_ERROR("amdgpu_cs_list_validate(validated) failed.\n");
 646		goto error_validate;
 647	}
 648
 649	amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
 650				     p->bytes_moved_vis);
 651	if (p->bo_list) {
 652		struct amdgpu_bo *gds = p->bo_list->gds_obj;
 653		struct amdgpu_bo *gws = p->bo_list->gws_obj;
 654		struct amdgpu_bo *oa = p->bo_list->oa_obj;
 655		struct amdgpu_vm *vm = &fpriv->vm;
 656		unsigned i;
 657
 658		for (i = 0; i < p->bo_list->num_entries; i++) {
 659			struct amdgpu_bo *bo = p->bo_list->array[i].robj;
 660
 661			p->bo_list->array[i].bo_va = amdgpu_vm_bo_find(vm, bo);
 662		}
 663
 664		if (gds) {
 665			p->job->gds_base = amdgpu_bo_gpu_offset(gds);
 666			p->job->gds_size = amdgpu_bo_size(gds);
 667		}
 668		if (gws) {
 669			p->job->gws_base = amdgpu_bo_gpu_offset(gws);
 670			p->job->gws_size = amdgpu_bo_size(gws);
 671		}
 672		if (oa) {
 673			p->job->oa_base = amdgpu_bo_gpu_offset(oa);
 674			p->job->oa_size = amdgpu_bo_size(oa);
 675		}
 676	}
 677
 678	if (!r && p->uf_entry.robj) {
 679		struct amdgpu_bo *uf = p->uf_entry.robj;
 680
 681		r = amdgpu_ttm_bind(&uf->tbo, &uf->tbo.mem);
 682		p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
 683	}
 684
 685error_validate:
 686	if (r)
 687		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
 688
 689error_free_pages:
 690
 691	if (p->bo_list) {
 692		for (i = p->bo_list->first_userptr;
 693		     i < p->bo_list->num_entries; ++i) {
 694			e = &p->bo_list->array[i];
 695
 696			if (!e->user_pages)
 697				continue;
 698
 699			release_pages(e->user_pages,
 700				      e->robj->tbo.ttm->num_pages);
 701			kvfree(e->user_pages);
 702		}
 703	}
 704
 705	return r;
 706}
 707
 708static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
 709{
 710	struct amdgpu_bo_list_entry *e;
 711	int r;
 712
 713	list_for_each_entry(e, &p->validated, tv.head) {
 714		struct reservation_object *resv = e->robj->tbo.resv;
 715		r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp,
 716				     amdgpu_bo_explicit_sync(e->robj));
 717
 718		if (r)
 719			return r;
 720	}
 721	return 0;
 722}
 723
 724/**
 725 * cs_parser_fini() - clean parser states
 726 * @parser:	parser structure holding parsing context.
 727 * @error:	error number
 728 *
 729 * If error is set than unvalidate buffer, otherwise just free memory
 730 * used by parsing context.
 731 **/
 732static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
 733				  bool backoff)
 734{
 735	unsigned i;
 736
 737	if (error && backoff)
 738		ttm_eu_backoff_reservation(&parser->ticket,
 739					   &parser->validated);
 740
 741	for (i = 0; i < parser->num_post_dep_syncobjs; i++)
 742		drm_syncobj_put(parser->post_dep_syncobjs[i]);
 743	kfree(parser->post_dep_syncobjs);
 744
 745	dma_fence_put(parser->fence);
 746
 747	if (parser->ctx) {
 748		mutex_unlock(&parser->ctx->lock);
 749		amdgpu_ctx_put(parser->ctx);
 750	}
 751	if (parser->bo_list)
 752		amdgpu_bo_list_put(parser->bo_list);
 753
 754	for (i = 0; i < parser->nchunks; i++)
 755		kvfree(parser->chunks[i].kdata);
 756	kfree(parser->chunks);
 757	if (parser->job)
 758		amdgpu_job_free(parser->job);
 759	amdgpu_bo_unref(&parser->uf_entry.robj);
 760}
 761
 762static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
 763{
 764	struct amdgpu_device *adev = p->adev;
 765	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 766	struct amdgpu_vm *vm = &fpriv->vm;
 767	struct amdgpu_bo_va *bo_va;
 768	struct amdgpu_bo *bo;
 769	int i, r;
 770
 771	r = amdgpu_vm_update_directories(adev, vm);
 772	if (r)
 773		return r;
 774
 775	r = amdgpu_vm_clear_freed(adev, vm, NULL);
 776	if (r)
 777		return r;
 778
 779	r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
 780	if (r)
 781		return r;
 782
 783	r = amdgpu_sync_fence(adev, &p->job->sync,
 784			      fpriv->prt_va->last_pt_update);
 785	if (r)
 786		return r;
 787
 788	if (amdgpu_sriov_vf(adev)) {
 789		struct dma_fence *f;
 790
 791		bo_va = fpriv->csa_va;
 792		BUG_ON(!bo_va);
 793		r = amdgpu_vm_bo_update(adev, bo_va, false);
 794		if (r)
 795			return r;
 796
 797		f = bo_va->last_pt_update;
 798		r = amdgpu_sync_fence(adev, &p->job->sync, f);
 799		if (r)
 800			return r;
 801	}
 802
 803	if (p->bo_list) {
 804		for (i = 0; i < p->bo_list->num_entries; i++) {
 805			struct dma_fence *f;
 806
 807			/* ignore duplicates */
 808			bo = p->bo_list->array[i].robj;
 809			if (!bo)
 810				continue;
 811
 812			bo_va = p->bo_list->array[i].bo_va;
 813			if (bo_va == NULL)
 814				continue;
 815
 816			r = amdgpu_vm_bo_update(adev, bo_va, false);
 817			if (r)
 818				return r;
 819
 820			f = bo_va->last_pt_update;
 821			r = amdgpu_sync_fence(adev, &p->job->sync, f);
 822			if (r)
 823				return r;
 824		}
 825
 826	}
 827
 828	r = amdgpu_vm_handle_moved(adev, vm);
 829	if (r)
 830		return r;
 831
 832	r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update);
 833	if (r)
 834		return r;
 835
 836	if (amdgpu_vm_debug && p->bo_list) {
 837		/* Invalidate all BOs to test for userspace bugs */
 838		for (i = 0; i < p->bo_list->num_entries; i++) {
 839			/* ignore duplicates */
 840			bo = p->bo_list->array[i].robj;
 841			if (!bo)
 842				continue;
 843
 844			amdgpu_vm_bo_invalidate(adev, bo, false);
 845		}
 846	}
 847
 848	return r;
 849}
 850
 851static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
 852				 struct amdgpu_cs_parser *p)
 853{
 854	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 855	struct amdgpu_vm *vm = &fpriv->vm;
 856	struct amdgpu_ring *ring = p->job->ring;
 857	int r;
 858
 859	/* Only for UVD/VCE VM emulation */
 860	if (p->job->ring->funcs->parse_cs) {
 861		unsigned i, j;
 862
 863		for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
 864			struct drm_amdgpu_cs_chunk_ib *chunk_ib;
 865			struct amdgpu_bo_va_mapping *m;
 866			struct amdgpu_bo *aobj = NULL;
 867			struct amdgpu_cs_chunk *chunk;
 868			struct amdgpu_ib *ib;
 869			uint64_t offset;
 870			uint8_t *kptr;
 871
 872			chunk = &p->chunks[i];
 873			ib = &p->job->ibs[j];
 874			chunk_ib = chunk->kdata;
 875
 876			if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
 877				continue;
 878
 879			r = amdgpu_cs_find_mapping(p, chunk_ib->va_start,
 880						   &aobj, &m);
 881			if (r) {
 882				DRM_ERROR("IB va_start is invalid\n");
 883				return r;
 884			}
 885
 886			if ((chunk_ib->va_start + chunk_ib->ib_bytes) >
 887			    (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
 888				DRM_ERROR("IB va_start+ib_bytes is invalid\n");
 889				return -EINVAL;
 890			}
 891
 892			/* the IB should be reserved at this point */
 893			r = amdgpu_bo_kmap(aobj, (void **)&kptr);
 894			if (r) {
 895				return r;
 896			}
 897
 898			offset = m->start * AMDGPU_GPU_PAGE_SIZE;
 899			kptr += chunk_ib->va_start - offset;
 900
 901			memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
 902			amdgpu_bo_kunmap(aobj);
 903
 904			r = amdgpu_ring_parse_cs(ring, p, j);
 905			if (r)
 906				return r;
 907
 908			j++;
 909		}
 910	}
 911
 912	if (p->job->vm) {
 913		p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.base.bo);
 914
 915		r = amdgpu_bo_vm_update_pte(p);
 916		if (r)
 917			return r;
 918	}
 919
 920	return amdgpu_cs_sync_rings(p);
 921}
 922
 923static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 924			     struct amdgpu_cs_parser *parser)
 925{
 926	struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
 927	struct amdgpu_vm *vm = &fpriv->vm;
 928	int i, j;
 929	int r, ce_preempt = 0, de_preempt = 0;
 930
 931	for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
 932		struct amdgpu_cs_chunk *chunk;
 933		struct amdgpu_ib *ib;
 934		struct drm_amdgpu_cs_chunk_ib *chunk_ib;
 935		struct amdgpu_ring *ring;
 936
 937		chunk = &parser->chunks[i];
 938		ib = &parser->job->ibs[j];
 939		chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
 940
 941		if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
 942			continue;
 943
 944		if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && amdgpu_sriov_vf(adev)) {
 945			if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
 946				if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
 947					ce_preempt++;
 948				else
 949					de_preempt++;
 950			}
 951
 952			/* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */
 953			if (ce_preempt > 1 || de_preempt > 1)
 954				return -EINVAL;
 955		}
 956
 957		r = amdgpu_queue_mgr_map(adev, &parser->ctx->queue_mgr, chunk_ib->ip_type,
 958					 chunk_ib->ip_instance, chunk_ib->ring, &ring);
 959		if (r)
 960			return r;
 961
 962		if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE) {
 963			parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT;
 964			if (!parser->ctx->preamble_presented) {
 965				parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
 966				parser->ctx->preamble_presented = true;
 967			}
 968		}
 969
 970		if (parser->job->ring && parser->job->ring != ring)
 971			return -EINVAL;
 972
 973		parser->job->ring = ring;
 974
 975		r =  amdgpu_ib_get(adev, vm,
 976					ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0,
 977					ib);
 978		if (r) {
 979			DRM_ERROR("Failed to get ib !\n");
 980			return r;
 981		}
 982
 983		ib->gpu_addr = chunk_ib->va_start;
 984		ib->length_dw = chunk_ib->ib_bytes / 4;
 985		ib->flags = chunk_ib->flags;
 986
 987		j++;
 988	}
 989
 990	/* UVD & VCE fw doesn't support user fences */
 991	if (parser->job->uf_addr && (
 992	    parser->job->ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
 993	    parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE))
 994		return -EINVAL;
 995
 996	return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->job->ring->idx);
 997}
 998
 999static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
1000				       struct amdgpu_cs_chunk *chunk)
1001{
1002	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
1003	unsigned num_deps;
1004	int i, r;
1005	struct drm_amdgpu_cs_chunk_dep *deps;
1006
1007	deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
1008	num_deps = chunk->length_dw * 4 /
1009		sizeof(struct drm_amdgpu_cs_chunk_dep);
1010
1011	for (i = 0; i < num_deps; ++i) {
1012		struct amdgpu_ring *ring;
1013		struct amdgpu_ctx *ctx;
1014		struct dma_fence *fence;
1015
1016		ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
1017		if (ctx == NULL)
1018			return -EINVAL;
1019
1020		r = amdgpu_queue_mgr_map(p->adev, &ctx->queue_mgr,
1021					 deps[i].ip_type,
1022					 deps[i].ip_instance,
1023					 deps[i].ring, &ring);
1024		if (r) {
1025			amdgpu_ctx_put(ctx);
1026			return r;
1027		}
1028
1029		fence = amdgpu_ctx_get_fence(ctx, ring,
1030					     deps[i].handle);
1031		if (IS_ERR(fence)) {
1032			r = PTR_ERR(fence);
1033			amdgpu_ctx_put(ctx);
1034			return r;
1035		} else if (fence) {
1036			r = amdgpu_sync_fence(p->adev, &p->job->sync,
1037					      fence);
1038			dma_fence_put(fence);
1039			amdgpu_ctx_put(ctx);
1040			if (r)
1041				return r;
1042		}
1043	}
1044	return 0;
1045}
1046
1047static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
1048						 uint32_t handle)
1049{
1050	int r;
1051	struct dma_fence *fence;
1052	r = drm_syncobj_find_fence(p->filp, handle, &fence);
1053	if (r)
1054		return r;
1055
1056	r = amdgpu_sync_fence(p->adev, &p->job->sync, fence);
1057	dma_fence_put(fence);
1058
1059	return r;
1060}
1061
1062static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
1063					    struct amdgpu_cs_chunk *chunk)
1064{
1065	unsigned num_deps;
1066	int i, r;
1067	struct drm_amdgpu_cs_chunk_sem *deps;
1068
1069	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
1070	num_deps = chunk->length_dw * 4 /
1071		sizeof(struct drm_amdgpu_cs_chunk_sem);
1072
1073	for (i = 0; i < num_deps; ++i) {
1074		r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle);
1075		if (r)
1076			return r;
1077	}
1078	return 0;
1079}
1080
1081static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
1082					     struct amdgpu_cs_chunk *chunk)
1083{
1084	unsigned num_deps;
1085	int i;
1086	struct drm_amdgpu_cs_chunk_sem *deps;
1087	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
1088	num_deps = chunk->length_dw * 4 /
1089		sizeof(struct drm_amdgpu_cs_chunk_sem);
1090
1091	p->post_dep_syncobjs = kmalloc_array(num_deps,
1092					     sizeof(struct drm_syncobj *),
1093					     GFP_KERNEL);
1094	p->num_post_dep_syncobjs = 0;
1095
1096	if (!p->post_dep_syncobjs)
1097		return -ENOMEM;
1098
1099	for (i = 0; i < num_deps; ++i) {
1100		p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle);
1101		if (!p->post_dep_syncobjs[i])
1102			return -EINVAL;
1103		p->num_post_dep_syncobjs++;
1104	}
1105	return 0;
1106}
1107
1108static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
1109				  struct amdgpu_cs_parser *p)
1110{
1111	int i, r;
1112
1113	for (i = 0; i < p->nchunks; ++i) {
1114		struct amdgpu_cs_chunk *chunk;
1115
1116		chunk = &p->chunks[i];
1117
1118		if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES) {
1119			r = amdgpu_cs_process_fence_dep(p, chunk);
1120			if (r)
1121				return r;
1122		} else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) {
1123			r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
1124			if (r)
1125				return r;
1126		} else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_OUT) {
1127			r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
1128			if (r)
1129				return r;
1130		}
1131	}
1132
1133	return 0;
1134}
1135
1136static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
1137{
1138	int i;
1139
1140	for (i = 0; i < p->num_post_dep_syncobjs; ++i)
1141		drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence);
1142}
1143
1144static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
1145			    union drm_amdgpu_cs *cs)
1146{
1147	struct amdgpu_ring *ring = p->job->ring;
1148	struct amd_sched_entity *entity = &p->ctx->rings[ring->idx].entity;
1149	struct amdgpu_job *job;
1150	unsigned i;
1151	uint64_t seq;
1152
1153	int r;
1154
1155	amdgpu_mn_lock(p->mn);
1156	if (p->bo_list) {
1157		for (i = p->bo_list->first_userptr;
1158		     i < p->bo_list->num_entries; ++i) {
1159			struct amdgpu_bo *bo = p->bo_list->array[i].robj;
1160
1161			if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
1162				amdgpu_mn_unlock(p->mn);
1163				return -ERESTARTSYS;
1164			}
1165		}
1166	}
1167
1168	job = p->job;
1169	p->job = NULL;
1170
1171	r = amd_sched_job_init(&job->base, &ring->sched, entity, p->filp);
1172	if (r) {
1173		amdgpu_job_free(job);
1174		amdgpu_mn_unlock(p->mn);
1175		return r;
1176	}
1177
1178	job->owner = p->filp;
1179	job->fence_ctx = entity->fence_context;
1180	p->fence = dma_fence_get(&job->base.s_fence->finished);
1181
1182	r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq);
1183	if (r) {
1184		dma_fence_put(p->fence);
1185		dma_fence_put(&job->base.s_fence->finished);
1186		amdgpu_job_free(job);
1187		amdgpu_mn_unlock(p->mn);
1188		return r;
1189	}
1190
1191	amdgpu_cs_post_dependencies(p);
1192
1193	cs->out.handle = seq;
1194	job->uf_sequence = seq;
1195
1196	amdgpu_job_free_resources(job);
1197	amdgpu_ring_priority_get(job->ring,
1198				 amd_sched_get_job_priority(&job->base));
1199
1200	trace_amdgpu_cs_ioctl(job);
1201	amd_sched_entity_push_job(&job->base);
1202
1203	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
1204	amdgpu_mn_unlock(p->mn);
1205
1206	return 0;
1207}
1208
1209int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
1210{
1211	struct amdgpu_device *adev = dev->dev_private;
1212	union drm_amdgpu_cs *cs = data;
1213	struct amdgpu_cs_parser parser = {};
1214	bool reserved_buffers = false;
1215	int i, r;
1216
1217	if (!adev->accel_working)
1218		return -EBUSY;
1219
1220	parser.adev = adev;
1221	parser.filp = filp;
1222
1223	r = amdgpu_cs_parser_init(&parser, data);
1224	if (r) {
1225		DRM_ERROR("Failed to initialize parser !\n");
1226		goto out;
1227	}
1228
1229	r = amdgpu_cs_ib_fill(adev, &parser);
1230	if (r)
1231		goto out;
1232
1233	r = amdgpu_cs_parser_bos(&parser, data);
1234	if (r) {
1235		if (r == -ENOMEM)
1236			DRM_ERROR("Not enough memory for command submission!\n");
1237		else if (r != -ERESTARTSYS)
1238			DRM_ERROR("Failed to process the buffer list %d!\n", r);
1239		goto out;
1240	}
1241
1242	reserved_buffers = true;
1243
1244	r = amdgpu_cs_dependencies(adev, &parser);
1245	if (r) {
1246		DRM_ERROR("Failed in the dependencies handling %d!\n", r);
1247		goto out;
1248	}
1249
1250	for (i = 0; i < parser.job->num_ibs; i++)
1251		trace_amdgpu_cs(&parser, i);
1252
1253	r = amdgpu_cs_ib_vm_chunk(adev, &parser);
1254	if (r)
1255		goto out;
1256
1257	r = amdgpu_cs_submit(&parser, cs);
1258
1259out:
1260	amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
1261	return r;
1262}
1263
1264/**
1265 * amdgpu_cs_wait_ioctl - wait for a command submission to finish
1266 *
1267 * @dev: drm device
1268 * @data: data from userspace
1269 * @filp: file private
1270 *
1271 * Wait for the command submission identified by handle to finish.
1272 */
1273int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
1274			 struct drm_file *filp)
1275{
1276	union drm_amdgpu_wait_cs *wait = data;
1277	struct amdgpu_device *adev = dev->dev_private;
1278	unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
1279	struct amdgpu_ring *ring = NULL;
1280	struct amdgpu_ctx *ctx;
1281	struct dma_fence *fence;
1282	long r;
1283
1284	ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
1285	if (ctx == NULL)
1286		return -EINVAL;
1287
1288	r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr,
1289				 wait->in.ip_type, wait->in.ip_instance,
1290				 wait->in.ring, &ring);
1291	if (r) {
1292		amdgpu_ctx_put(ctx);
1293		return r;
1294	}
1295
1296	fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle);
1297	if (IS_ERR(fence))
1298		r = PTR_ERR(fence);
1299	else if (fence) {
1300		r = dma_fence_wait_timeout(fence, true, timeout);
1301		if (r > 0 && fence->error)
1302			r = fence->error;
1303		dma_fence_put(fence);
1304	} else
1305		r = 1;
1306
1307	amdgpu_ctx_put(ctx);
1308	if (r < 0)
1309		return r;
1310
1311	memset(wait, 0, sizeof(*wait));
1312	wait->out.status = (r == 0);
1313
1314	return 0;
1315}
1316
1317/**
1318 * amdgpu_cs_get_fence - helper to get fence from drm_amdgpu_fence
1319 *
1320 * @adev: amdgpu device
1321 * @filp: file private
1322 * @user: drm_amdgpu_fence copied from user space
1323 */
1324static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
1325					     struct drm_file *filp,
1326					     struct drm_amdgpu_fence *user)
1327{
1328	struct amdgpu_ring *ring;
1329	struct amdgpu_ctx *ctx;
1330	struct dma_fence *fence;
1331	int r;
1332
1333	ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id);
1334	if (ctx == NULL)
1335		return ERR_PTR(-EINVAL);
1336
1337	r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, user->ip_type,
1338				 user->ip_instance, user->ring, &ring);
1339	if (r) {
1340		amdgpu_ctx_put(ctx);
1341		return ERR_PTR(r);
1342	}
1343
1344	fence = amdgpu_ctx_get_fence(ctx, ring, user->seq_no);
1345	amdgpu_ctx_put(ctx);
1346
1347	return fence;
1348}
1349
1350int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
1351				    struct drm_file *filp)
1352{
1353	struct amdgpu_device *adev = dev->dev_private;
1354	union drm_amdgpu_fence_to_handle *info = data;
1355	struct dma_fence *fence;
1356	struct drm_syncobj *syncobj;
1357	struct sync_file *sync_file;
1358	int fd, r;
1359
1360	fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence);
1361	if (IS_ERR(fence))
1362		return PTR_ERR(fence);
1363
1364	switch (info->in.what) {
1365	case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ:
1366		r = drm_syncobj_create(&syncobj, 0, fence);
1367		dma_fence_put(fence);
1368		if (r)
1369			return r;
1370		r = drm_syncobj_get_handle(filp, syncobj, &info->out.handle);
1371		drm_syncobj_put(syncobj);
1372		return r;
1373
1374	case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD:
1375		r = drm_syncobj_create(&syncobj, 0, fence);
1376		dma_fence_put(fence);
1377		if (r)
1378			return r;
1379		r = drm_syncobj_get_fd(syncobj, (int*)&info->out.handle);
1380		drm_syncobj_put(syncobj);
1381		return r;
1382
1383	case AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD:
1384		fd = get_unused_fd_flags(O_CLOEXEC);
1385		if (fd < 0) {
1386			dma_fence_put(fence);
1387			return fd;
1388		}
1389
1390		sync_file = sync_file_create(fence);
1391		dma_fence_put(fence);
1392		if (!sync_file) {
1393			put_unused_fd(fd);
1394			return -ENOMEM;
1395		}
1396
1397		fd_install(fd, sync_file->file);
1398		info->out.handle = fd;
1399		return 0;
1400
1401	default:
1402		return -EINVAL;
1403	}
1404}
1405
1406/**
1407 * amdgpu_cs_wait_all_fence - wait on all fences to signal
1408 *
1409 * @adev: amdgpu device
1410 * @filp: file private
1411 * @wait: wait parameters
1412 * @fences: array of drm_amdgpu_fence
1413 */
1414static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev,
1415				     struct drm_file *filp,
1416				     union drm_amdgpu_wait_fences *wait,
1417				     struct drm_amdgpu_fence *fences)
1418{
1419	uint32_t fence_count = wait->in.fence_count;
1420	unsigned int i;
1421	long r = 1;
1422
1423	for (i = 0; i < fence_count; i++) {
1424		struct dma_fence *fence;
1425		unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);
1426
1427		fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);
1428		if (IS_ERR(fence))
1429			return PTR_ERR(fence);
1430		else if (!fence)
1431			continue;
1432
1433		r = dma_fence_wait_timeout(fence, true, timeout);
1434		dma_fence_put(fence);
1435		if (r < 0)
1436			return r;
1437
1438		if (r == 0)
1439			break;
1440
1441		if (fence->error)
1442			return fence->error;
1443	}
1444
1445	memset(wait, 0, sizeof(*wait));
1446	wait->out.status = (r > 0);
1447
1448	return 0;
1449}
1450
1451/**
1452 * amdgpu_cs_wait_any_fence - wait on any fence to signal
1453 *
1454 * @adev: amdgpu device
1455 * @filp: file private
1456 * @wait: wait parameters
1457 * @fences: array of drm_amdgpu_fence
1458 */
1459static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev,
1460				    struct drm_file *filp,
1461				    union drm_amdgpu_wait_fences *wait,
1462				    struct drm_amdgpu_fence *fences)
1463{
1464	unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);
1465	uint32_t fence_count = wait->in.fence_count;
1466	uint32_t first = ~0;
1467	struct dma_fence **array;
1468	unsigned int i;
1469	long r;
1470
1471	/* Prepare the fence array */
1472	array = kcalloc(fence_count, sizeof(struct dma_fence *), GFP_KERNEL);
1473
1474	if (array == NULL)
1475		return -ENOMEM;
1476
1477	for (i = 0; i < fence_count; i++) {
1478		struct dma_fence *fence;
1479
1480		fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);
1481		if (IS_ERR(fence)) {
1482			r = PTR_ERR(fence);
1483			goto err_free_fence_array;
1484		} else if (fence) {
1485			array[i] = fence;
1486		} else { /* NULL, the fence has been already signaled */
1487			r = 1;
1488			first = i;
1489			goto out;
1490		}
1491	}
1492
1493	r = dma_fence_wait_any_timeout(array, fence_count, true, timeout,
1494				       &first);
1495	if (r < 0)
1496		goto err_free_fence_array;
1497
1498out:
1499	memset(wait, 0, sizeof(*wait));
1500	wait->out.status = (r > 0);
1501	wait->out.first_signaled = first;
1502
1503	if (first < fence_count && array[first])
1504		r = array[first]->error;
1505	else
1506		r = 0;
1507
1508err_free_fence_array:
1509	for (i = 0; i < fence_count; i++)
1510		dma_fence_put(array[i]);
1511	kfree(array);
1512
1513	return r;
1514}
1515
1516/**
1517 * amdgpu_cs_wait_fences_ioctl - wait for multiple command submissions to finish
1518 *
1519 * @dev: drm device
1520 * @data: data from userspace
1521 * @filp: file private
1522 */
1523int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
1524				struct drm_file *filp)
1525{
1526	struct amdgpu_device *adev = dev->dev_private;
1527	union drm_amdgpu_wait_fences *wait = data;
1528	uint32_t fence_count = wait->in.fence_count;
1529	struct drm_amdgpu_fence *fences_user;
1530	struct drm_amdgpu_fence *fences;
1531	int r;
1532
1533	/* Get the fences from userspace */
1534	fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence),
1535			GFP_KERNEL);
1536	if (fences == NULL)
1537		return -ENOMEM;
1538
1539	fences_user = u64_to_user_ptr(wait->in.fences);
1540	if (copy_from_user(fences, fences_user,
1541		sizeof(struct drm_amdgpu_fence) * fence_count)) {
1542		r = -EFAULT;
1543		goto err_free_fences;
1544	}
1545
1546	if (wait->in.wait_all)
1547		r = amdgpu_cs_wait_all_fences(adev, filp, wait, fences);
1548	else
1549		r = amdgpu_cs_wait_any_fence(adev, filp, wait, fences);
1550
1551err_free_fences:
1552	kfree(fences);
1553
1554	return r;
1555}
1556
1557/**
1558 * amdgpu_cs_find_bo_va - find bo_va for VM address
1559 *
1560 * @parser: command submission parser context
1561 * @addr: VM address
1562 * @bo: resulting BO of the mapping found
1563 *
1564 * Search the buffer objects in the command submission context for a certain
1565 * virtual memory address. Returns allocation structure when found, NULL
1566 * otherwise.
1567 */
1568int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
1569			   uint64_t addr, struct amdgpu_bo **bo,
1570			   struct amdgpu_bo_va_mapping **map)
1571{
1572	struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
1573	struct amdgpu_vm *vm = &fpriv->vm;
1574	struct amdgpu_bo_va_mapping *mapping;
1575	int r;
1576
1577	addr /= AMDGPU_GPU_PAGE_SIZE;
1578
1579	mapping = amdgpu_vm_bo_lookup_mapping(vm, addr);
1580	if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo)
1581		return -EINVAL;
1582
1583	*bo = mapping->bo_va->base.bo;
1584	*map = mapping;
1585
1586	/* Double check that the BO is reserved by this CS */
1587	if (READ_ONCE((*bo)->tbo.resv->lock.ctx) != &parser->ticket)
1588		return -EINVAL;
1589
1590	if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {
1591		(*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
1592		amdgpu_ttm_placement_from_domain(*bo, (*bo)->allowed_domains);
1593		r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, false,
1594				    false);
1595		if (r)
1596			return r;
1597	}
1598
1599	return amdgpu_ttm_bind(&(*bo)->tbo, &(*bo)->tbo.mem);
1600}