drivers/gpu/drm/amd/amdgpu/amdgpu_object.c at v4.11-rc7

tjh.dev / kernel
fork atom
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork atom
kernel / drivers / gpu / drm / amd / amdgpu / amdgpu_object.c
at v4.11-rc7 1017 lines 26 kB view raw
wrap content
   1/*
   2 * Copyright 2009 Jerome Glisse.
   3 * All Rights Reserved.
   4 *
   5 * Permission is hereby granted, free of charge, to any person obtaining a
   6 * copy of this software and associated documentation files (the
   7 * "Software"), to deal in the Software without restriction, including
   8 * without limitation the rights to use, copy, modify, merge, publish,
   9 * distribute, sub license, and/or sell copies of the Software, and to
  10 * permit persons to whom the Software is furnished to do so, subject to
  11 * the following conditions:
  12 *
  13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
  20 *
  21 * The above copyright notice and this permission notice (including the
  22 * next paragraph) shall be included in all copies or substantial portions
  23 * of the Software.
  24 *
  25 */
  26/*
  27 * Authors:
  28 *    Jerome Glisse <glisse@freedesktop.org>
  29 *    Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
  30 *    Dave Airlie
  31 */
  32#include <linux/list.h>
  33#include <linux/slab.h>
  34#include <drm/drmP.h>
  35#include <drm/amdgpu_drm.h>
  36#include <drm/drm_cache.h>
  37#include "amdgpu.h"
  38#include "amdgpu_trace.h"
  39
  40
  41
  42static u64 amdgpu_get_vis_part_size(struct amdgpu_device *adev,
  43						struct ttm_mem_reg *mem)
  44{
  45	if (mem->start << PAGE_SHIFT >= adev->mc.visible_vram_size)
  46		return 0;
  47
  48	return ((mem->start << PAGE_SHIFT) + mem->size) >
  49		adev->mc.visible_vram_size ?
  50		adev->mc.visible_vram_size - (mem->start << PAGE_SHIFT) :
  51		mem->size;
  52}
  53
  54static void amdgpu_update_memory_usage(struct amdgpu_device *adev,
  55		       struct ttm_mem_reg *old_mem,
  56		       struct ttm_mem_reg *new_mem)
  57{
  58	u64 vis_size;
  59	if (!adev)
  60		return;
  61
  62	if (new_mem) {
  63		switch (new_mem->mem_type) {
  64		case TTM_PL_TT:
  65			atomic64_add(new_mem->size, &adev->gtt_usage);
  66			break;
  67		case TTM_PL_VRAM:
  68			atomic64_add(new_mem->size, &adev->vram_usage);
  69			vis_size = amdgpu_get_vis_part_size(adev, new_mem);
  70			atomic64_add(vis_size, &adev->vram_vis_usage);
  71			break;
  72		}
  73	}
  74
  75	if (old_mem) {
  76		switch (old_mem->mem_type) {
  77		case TTM_PL_TT:
  78			atomic64_sub(old_mem->size, &adev->gtt_usage);
  79			break;
  80		case TTM_PL_VRAM:
  81			atomic64_sub(old_mem->size, &adev->vram_usage);
  82			vis_size = amdgpu_get_vis_part_size(adev, old_mem);
  83			atomic64_sub(vis_size, &adev->vram_vis_usage);
  84			break;
  85		}
  86	}
  87}
  88
  89static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
  90{
  91	struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
  92	struct amdgpu_bo *bo;
  93
  94	bo = container_of(tbo, struct amdgpu_bo, tbo);
  95
  96	amdgpu_update_memory_usage(adev, &bo->tbo.mem, NULL);
  97
  98	drm_gem_object_release(&bo->gem_base);
  99	amdgpu_bo_unref(&bo->parent);
 100	if (!list_empty(&bo->shadow_list)) {
 101		mutex_lock(&adev->shadow_list_lock);
 102		list_del_init(&bo->shadow_list);
 103		mutex_unlock(&adev->shadow_list_lock);
 104	}
 105	kfree(bo->metadata);
 106	kfree(bo);
 107}
 108
 109bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo)
 110{
 111	if (bo->destroy == &amdgpu_ttm_bo_destroy)
 112		return true;
 113	return false;
 114}
 115
 116static void amdgpu_ttm_placement_init(struct amdgpu_device *adev,
 117				      struct ttm_placement *placement,
 118				      struct ttm_place *places,
 119				      u32 domain, u64 flags)
 120{
 121	u32 c = 0;
 122
 123	if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
 124		unsigned visible_pfn = adev->mc.visible_vram_size >> PAGE_SHIFT;
 125		unsigned lpfn = 0;
 126
 127		/* This forces a reallocation if the flag wasn't set before */
 128		if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
 129			lpfn = adev->mc.real_vram_size >> PAGE_SHIFT;
 130
 131		places[c].fpfn = 0;
 132		places[c].lpfn = lpfn;
 133		places[c].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
 134			TTM_PL_FLAG_VRAM;
 135		if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
 136			places[c].lpfn = visible_pfn;
 137		else
 138			places[c].flags |= TTM_PL_FLAG_TOPDOWN;
 139		c++;
 140	}
 141
 142	if (domain & AMDGPU_GEM_DOMAIN_GTT) {
 143		places[c].fpfn = 0;
 144		places[c].lpfn = 0;
 145		places[c].flags = TTM_PL_FLAG_TT;
 146		if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
 147			places[c].flags |= TTM_PL_FLAG_WC |
 148				TTM_PL_FLAG_UNCACHED;
 149		else
 150			places[c].flags |= TTM_PL_FLAG_CACHED;
 151		c++;
 152	}
 153
 154	if (domain & AMDGPU_GEM_DOMAIN_CPU) {
 155		places[c].fpfn = 0;
 156		places[c].lpfn = 0;
 157		places[c].flags = TTM_PL_FLAG_SYSTEM;
 158		if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
 159			places[c].flags |= TTM_PL_FLAG_WC |
 160				TTM_PL_FLAG_UNCACHED;
 161		else
 162			places[c].flags |= TTM_PL_FLAG_CACHED;
 163		c++;
 164	}
 165
 166	if (domain & AMDGPU_GEM_DOMAIN_GDS) {
 167		places[c].fpfn = 0;
 168		places[c].lpfn = 0;
 169		places[c].flags = TTM_PL_FLAG_UNCACHED | AMDGPU_PL_FLAG_GDS;
 170		c++;
 171	}
 172
 173	if (domain & AMDGPU_GEM_DOMAIN_GWS) {
 174		places[c].fpfn = 0;
 175		places[c].lpfn = 0;
 176		places[c].flags = TTM_PL_FLAG_UNCACHED | AMDGPU_PL_FLAG_GWS;
 177		c++;
 178	}
 179
 180	if (domain & AMDGPU_GEM_DOMAIN_OA) {
 181		places[c].fpfn = 0;
 182		places[c].lpfn = 0;
 183		places[c].flags = TTM_PL_FLAG_UNCACHED | AMDGPU_PL_FLAG_OA;
 184		c++;
 185	}
 186
 187	if (!c) {
 188		places[c].fpfn = 0;
 189		places[c].lpfn = 0;
 190		places[c].flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
 191		c++;
 192	}
 193
 194	placement->num_placement = c;
 195	placement->placement = places;
 196
 197	placement->num_busy_placement = c;
 198	placement->busy_placement = places;
 199}
 200
 201void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
 202{
 203	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
 204
 205	amdgpu_ttm_placement_init(adev, &abo->placement, abo->placements,
 206				  domain, abo->flags);
 207}
 208
 209static void amdgpu_fill_placement_to_bo(struct amdgpu_bo *bo,
 210					struct ttm_placement *placement)
 211{
 212	BUG_ON(placement->num_placement > (AMDGPU_GEM_DOMAIN_MAX + 1));
 213
 214	memcpy(bo->placements, placement->placement,
 215	       placement->num_placement * sizeof(struct ttm_place));
 216	bo->placement.num_placement = placement->num_placement;
 217	bo->placement.num_busy_placement = placement->num_busy_placement;
 218	bo->placement.placement = bo->placements;
 219	bo->placement.busy_placement = bo->placements;
 220}
 221
 222/**
 223 * amdgpu_bo_create_kernel - create BO for kernel use
 224 *
 225 * @adev: amdgpu device object
 226 * @size: size for the new BO
 227 * @align: alignment for the new BO
 228 * @domain: where to place it
 229 * @bo_ptr: resulting BO
 230 * @gpu_addr: GPU addr of the pinned BO
 231 * @cpu_addr: optional CPU address mapping
 232 *
 233 * Allocates and pins a BO for kernel internal use.
 234 *
 235 * Returns 0 on success, negative error code otherwise.
 236 */
 237int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
 238			    unsigned long size, int align,
 239			    u32 domain, struct amdgpu_bo **bo_ptr,
 240			    u64 *gpu_addr, void **cpu_addr)
 241{
 242	int r;
 243
 244	r = amdgpu_bo_create(adev, size, align, true, domain,
 245			     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
 246			     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
 247			     NULL, NULL, bo_ptr);
 248	if (r) {
 249		dev_err(adev->dev, "(%d) failed to allocate kernel bo\n", r);
 250		return r;
 251	}
 252
 253	r = amdgpu_bo_reserve(*bo_ptr, false);
 254	if (r) {
 255		dev_err(adev->dev, "(%d) failed to reserve kernel bo\n", r);
 256		goto error_free;
 257	}
 258
 259	r = amdgpu_bo_pin(*bo_ptr, domain, gpu_addr);
 260	if (r) {
 261		dev_err(adev->dev, "(%d) kernel bo pin failed\n", r);
 262		goto error_unreserve;
 263	}
 264
 265	if (cpu_addr) {
 266		r = amdgpu_bo_kmap(*bo_ptr, cpu_addr);
 267		if (r) {
 268			dev_err(adev->dev, "(%d) kernel bo map failed\n", r);
 269			goto error_unreserve;
 270		}
 271	}
 272
 273	amdgpu_bo_unreserve(*bo_ptr);
 274
 275	return 0;
 276
 277error_unreserve:
 278	amdgpu_bo_unreserve(*bo_ptr);
 279
 280error_free:
 281	amdgpu_bo_unref(bo_ptr);
 282
 283	return r;
 284}
 285
 286/**
 287 * amdgpu_bo_free_kernel - free BO for kernel use
 288 *
 289 * @bo: amdgpu BO to free
 290 *
 291 * unmaps and unpin a BO for kernel internal use.
 292 */
 293void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
 294			   void **cpu_addr)
 295{
 296	if (*bo == NULL)
 297		return;
 298
 299	if (likely(amdgpu_bo_reserve(*bo, false) == 0)) {
 300		if (cpu_addr)
 301			amdgpu_bo_kunmap(*bo);
 302
 303		amdgpu_bo_unpin(*bo);
 304		amdgpu_bo_unreserve(*bo);
 305	}
 306	amdgpu_bo_unref(bo);
 307
 308	if (gpu_addr)
 309		*gpu_addr = 0;
 310
 311	if (cpu_addr)
 312		*cpu_addr = NULL;
 313}
 314
 315int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
 316				unsigned long size, int byte_align,
 317				bool kernel, u32 domain, u64 flags,
 318				struct sg_table *sg,
 319				struct ttm_placement *placement,
 320				struct reservation_object *resv,
 321				struct amdgpu_bo **bo_ptr)
 322{
 323	struct amdgpu_bo *bo;
 324	enum ttm_bo_type type;
 325	unsigned long page_align;
 326	u64 initial_bytes_moved;
 327	size_t acc_size;
 328	int r;
 329
 330	page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT;
 331	size = ALIGN(size, PAGE_SIZE);
 332
 333	if (kernel) {
 334		type = ttm_bo_type_kernel;
 335	} else if (sg) {
 336		type = ttm_bo_type_sg;
 337	} else {
 338		type = ttm_bo_type_device;
 339	}
 340	*bo_ptr = NULL;
 341
 342	acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
 343				       sizeof(struct amdgpu_bo));
 344
 345	bo = kzalloc(sizeof(struct amdgpu_bo), GFP_KERNEL);
 346	if (bo == NULL)
 347		return -ENOMEM;
 348	r = drm_gem_object_init(adev->ddev, &bo->gem_base, size);
 349	if (unlikely(r)) {
 350		kfree(bo);
 351		return r;
 352	}
 353	INIT_LIST_HEAD(&bo->shadow_list);
 354	INIT_LIST_HEAD(&bo->va);
 355	bo->prefered_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM |
 356					 AMDGPU_GEM_DOMAIN_GTT |
 357					 AMDGPU_GEM_DOMAIN_CPU |
 358					 AMDGPU_GEM_DOMAIN_GDS |
 359					 AMDGPU_GEM_DOMAIN_GWS |
 360					 AMDGPU_GEM_DOMAIN_OA);
 361	bo->allowed_domains = bo->prefered_domains;
 362	if (!kernel && bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
 363		bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
 364
 365	bo->flags = flags;
 366
 367#ifdef CONFIG_X86_32
 368	/* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit
 369	 * See https://bugs.freedesktop.org/show_bug.cgi?id=84627
 370	 */
 371	bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
 372#elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT)
 373	/* Don't try to enable write-combining when it can't work, or things
 374	 * may be slow
 375	 * See https://bugs.freedesktop.org/show_bug.cgi?id=88758
 376	 */
 377
 378#ifndef CONFIG_COMPILE_TEST
 379#warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \
 380	 thanks to write-combining
 381#endif
 382
 383	if (bo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
 384		DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for "
 385			      "better performance thanks to write-combining\n");
 386	bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
 387#else
 388	/* For architectures that don't support WC memory,
 389	 * mask out the WC flag from the BO
 390	 */
 391	if (!drm_arch_can_wc_memory())
 392		bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
 393#endif
 394
 395	amdgpu_fill_placement_to_bo(bo, placement);
 396	/* Kernel allocation are uninterruptible */
 397
 398	if (!resv) {
 399		bool locked;
 400
 401		reservation_object_init(&bo->tbo.ttm_resv);
 402		locked = ww_mutex_trylock(&bo->tbo.ttm_resv.lock);
 403		WARN_ON(!locked);
 404	}
 405
 406	initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
 407	r = ttm_bo_init(&adev->mman.bdev, &bo->tbo, size, type,
 408			&bo->placement, page_align, !kernel, NULL,
 409			acc_size, sg, resv ? resv : &bo->tbo.ttm_resv,
 410			&amdgpu_ttm_bo_destroy);
 411	amdgpu_cs_report_moved_bytes(adev,
 412		atomic64_read(&adev->num_bytes_moved) - initial_bytes_moved);
 413
 414	if (unlikely(r != 0)) {
 415		if (!resv)
 416			ww_mutex_unlock(&bo->tbo.resv->lock);
 417		return r;
 418	}
 419
 420	bo->tbo.priority = ilog2(bo->tbo.num_pages);
 421	if (kernel)
 422		bo->tbo.priority *= 2;
 423	bo->tbo.priority = min(bo->tbo.priority, (unsigned)(TTM_MAX_BO_PRIORITY - 1));
 424
 425	if (flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
 426	    bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) {
 427		struct dma_fence *fence;
 428
 429		r = amdgpu_fill_buffer(bo, 0, bo->tbo.resv, &fence);
 430		if (unlikely(r))
 431			goto fail_unreserve;
 432
 433		amdgpu_bo_fence(bo, fence, false);
 434		dma_fence_put(bo->tbo.moving);
 435		bo->tbo.moving = dma_fence_get(fence);
 436		dma_fence_put(fence);
 437	}
 438	if (!resv)
 439		ww_mutex_unlock(&bo->tbo.resv->lock);
 440	*bo_ptr = bo;
 441
 442	trace_amdgpu_bo_create(bo);
 443
 444	return 0;
 445
 446fail_unreserve:
 447	if (!resv)
 448		ww_mutex_unlock(&bo->tbo.resv->lock);
 449	amdgpu_bo_unref(&bo);
 450	return r;
 451}
 452
 453static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
 454				   unsigned long size, int byte_align,
 455				   struct amdgpu_bo *bo)
 456{
 457	struct ttm_placement placement = {0};
 458	struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1];
 459	int r;
 460
 461	if (bo->shadow)
 462		return 0;
 463
 464	bo->flags |= AMDGPU_GEM_CREATE_SHADOW;
 465	memset(&placements, 0,
 466	       (AMDGPU_GEM_DOMAIN_MAX + 1) * sizeof(struct ttm_place));
 467
 468	amdgpu_ttm_placement_init(adev, &placement,
 469				  placements, AMDGPU_GEM_DOMAIN_GTT,
 470				  AMDGPU_GEM_CREATE_CPU_GTT_USWC);
 471
 472	r = amdgpu_bo_create_restricted(adev, size, byte_align, true,
 473					AMDGPU_GEM_DOMAIN_GTT,
 474					AMDGPU_GEM_CREATE_CPU_GTT_USWC,
 475					NULL, &placement,
 476					bo->tbo.resv,
 477					&bo->shadow);
 478	if (!r) {
 479		bo->shadow->parent = amdgpu_bo_ref(bo);
 480		mutex_lock(&adev->shadow_list_lock);
 481		list_add_tail(&bo->shadow_list, &adev->shadow_list);
 482		mutex_unlock(&adev->shadow_list_lock);
 483	}
 484
 485	return r;
 486}
 487
 488int amdgpu_bo_create(struct amdgpu_device *adev,
 489		     unsigned long size, int byte_align,
 490		     bool kernel, u32 domain, u64 flags,
 491		     struct sg_table *sg,
 492		     struct reservation_object *resv,
 493		     struct amdgpu_bo **bo_ptr)
 494{
 495	struct ttm_placement placement = {0};
 496	struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1];
 497	int r;
 498
 499	memset(&placements, 0,
 500	       (AMDGPU_GEM_DOMAIN_MAX + 1) * sizeof(struct ttm_place));
 501
 502	amdgpu_ttm_placement_init(adev, &placement,
 503				  placements, domain, flags);
 504
 505	r = amdgpu_bo_create_restricted(adev, size, byte_align, kernel,
 506					domain, flags, sg, &placement,
 507					resv, bo_ptr);
 508	if (r)
 509		return r;
 510
 511	if (amdgpu_need_backup(adev) && (flags & AMDGPU_GEM_CREATE_SHADOW)) {
 512		if (!resv) {
 513			r = ww_mutex_lock(&(*bo_ptr)->tbo.resv->lock, NULL);
 514			WARN_ON(r != 0);
 515		}
 516
 517		r = amdgpu_bo_create_shadow(adev, size, byte_align, (*bo_ptr));
 518
 519		if (!resv)
 520			ww_mutex_unlock(&(*bo_ptr)->tbo.resv->lock);
 521
 522		if (r)
 523			amdgpu_bo_unref(bo_ptr);
 524	}
 525
 526	return r;
 527}
 528
 529int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev,
 530			       struct amdgpu_ring *ring,
 531			       struct amdgpu_bo *bo,
 532			       struct reservation_object *resv,
 533			       struct dma_fence **fence,
 534			       bool direct)
 535
 536{
 537	struct amdgpu_bo *shadow = bo->shadow;
 538	uint64_t bo_addr, shadow_addr;
 539	int r;
 540
 541	if (!shadow)
 542		return -EINVAL;
 543
 544	bo_addr = amdgpu_bo_gpu_offset(bo);
 545	shadow_addr = amdgpu_bo_gpu_offset(bo->shadow);
 546
 547	r = reservation_object_reserve_shared(bo->tbo.resv);
 548	if (r)
 549		goto err;
 550
 551	r = amdgpu_copy_buffer(ring, bo_addr, shadow_addr,
 552			       amdgpu_bo_size(bo), resv, fence,
 553			       direct);
 554	if (!r)
 555		amdgpu_bo_fence(bo, *fence, true);
 556
 557err:
 558	return r;
 559}
 560
 561int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev,
 562				  struct amdgpu_ring *ring,
 563				  struct amdgpu_bo *bo,
 564				  struct reservation_object *resv,
 565				  struct dma_fence **fence,
 566				  bool direct)
 567
 568{
 569	struct amdgpu_bo *shadow = bo->shadow;
 570	uint64_t bo_addr, shadow_addr;
 571	int r;
 572
 573	if (!shadow)
 574		return -EINVAL;
 575
 576	bo_addr = amdgpu_bo_gpu_offset(bo);
 577	shadow_addr = amdgpu_bo_gpu_offset(bo->shadow);
 578
 579	r = reservation_object_reserve_shared(bo->tbo.resv);
 580	if (r)
 581		goto err;
 582
 583	r = amdgpu_copy_buffer(ring, shadow_addr, bo_addr,
 584			       amdgpu_bo_size(bo), resv, fence,
 585			       direct);
 586	if (!r)
 587		amdgpu_bo_fence(bo, *fence, true);
 588
 589err:
 590	return r;
 591}
 592
 593int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
 594{
 595	bool is_iomem;
 596	long r;
 597
 598	if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
 599		return -EPERM;
 600
 601	if (bo->kptr) {
 602		if (ptr) {
 603			*ptr = bo->kptr;
 604		}
 605		return 0;
 606	}
 607
 608	r = reservation_object_wait_timeout_rcu(bo->tbo.resv, false, false,
 609						MAX_SCHEDULE_TIMEOUT);
 610	if (r < 0)
 611		return r;
 612
 613	r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.num_pages, &bo->kmap);
 614	if (r)
 615		return r;
 616
 617	bo->kptr = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
 618	if (ptr)
 619		*ptr = bo->kptr;
 620
 621	return 0;
 622}
 623
 624void amdgpu_bo_kunmap(struct amdgpu_bo *bo)
 625{
 626	if (bo->kptr == NULL)
 627		return;
 628	bo->kptr = NULL;
 629	ttm_bo_kunmap(&bo->kmap);
 630}
 631
 632struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo)
 633{
 634	if (bo == NULL)
 635		return NULL;
 636
 637	ttm_bo_reference(&bo->tbo);
 638	return bo;
 639}
 640
 641void amdgpu_bo_unref(struct amdgpu_bo **bo)
 642{
 643	struct ttm_buffer_object *tbo;
 644
 645	if ((*bo) == NULL)
 646		return;
 647
 648	tbo = &((*bo)->tbo);
 649	ttm_bo_unref(&tbo);
 650	if (tbo == NULL)
 651		*bo = NULL;
 652}
 653
 654int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
 655			     u64 min_offset, u64 max_offset,
 656			     u64 *gpu_addr)
 657{
 658	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 659	int r, i;
 660	unsigned fpfn, lpfn;
 661
 662	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
 663		return -EPERM;
 664
 665	if (WARN_ON_ONCE(min_offset > max_offset))
 666		return -EINVAL;
 667
 668	if (bo->pin_count) {
 669		uint32_t mem_type = bo->tbo.mem.mem_type;
 670
 671		if (domain != amdgpu_mem_type_to_domain(mem_type))
 672			return -EINVAL;
 673
 674		bo->pin_count++;
 675		if (gpu_addr)
 676			*gpu_addr = amdgpu_bo_gpu_offset(bo);
 677
 678		if (max_offset != 0) {
 679			u64 domain_start = bo->tbo.bdev->man[mem_type].gpu_offset;
 680			WARN_ON_ONCE(max_offset <
 681				     (amdgpu_bo_gpu_offset(bo) - domain_start));
 682		}
 683
 684		return 0;
 685	}
 686
 687	bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
 688	amdgpu_ttm_placement_from_domain(bo, domain);
 689	for (i = 0; i < bo->placement.num_placement; i++) {
 690		/* force to pin into visible video ram */
 691		if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
 692		    !(bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) &&
 693		    (!max_offset || max_offset >
 694		     adev->mc.visible_vram_size)) {
 695			if (WARN_ON_ONCE(min_offset >
 696					 adev->mc.visible_vram_size))
 697				return -EINVAL;
 698			fpfn = min_offset >> PAGE_SHIFT;
 699			lpfn = adev->mc.visible_vram_size >> PAGE_SHIFT;
 700		} else {
 701			fpfn = min_offset >> PAGE_SHIFT;
 702			lpfn = max_offset >> PAGE_SHIFT;
 703		}
 704		if (fpfn > bo->placements[i].fpfn)
 705			bo->placements[i].fpfn = fpfn;
 706		if (!bo->placements[i].lpfn ||
 707		    (lpfn && lpfn < bo->placements[i].lpfn))
 708			bo->placements[i].lpfn = lpfn;
 709		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
 710	}
 711
 712	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
 713	if (unlikely(r)) {
 714		dev_err(adev->dev, "%p pin failed\n", bo);
 715		goto error;
 716	}
 717	r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem);
 718	if (unlikely(r)) {
 719		dev_err(adev->dev, "%p bind failed\n", bo);
 720		goto error;
 721	}
 722
 723	bo->pin_count = 1;
 724	if (gpu_addr != NULL)
 725		*gpu_addr = amdgpu_bo_gpu_offset(bo);
 726	if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
 727		adev->vram_pin_size += amdgpu_bo_size(bo);
 728		if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
 729			adev->invisible_pin_size += amdgpu_bo_size(bo);
 730	} else if (domain == AMDGPU_GEM_DOMAIN_GTT) {
 731		adev->gart_pin_size += amdgpu_bo_size(bo);
 732	}
 733
 734error:
 735	return r;
 736}
 737
 738int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain, u64 *gpu_addr)
 739{
 740	return amdgpu_bo_pin_restricted(bo, domain, 0, 0, gpu_addr);
 741}
 742
 743int amdgpu_bo_unpin(struct amdgpu_bo *bo)
 744{
 745	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 746	int r, i;
 747
 748	if (!bo->pin_count) {
 749		dev_warn(adev->dev, "%p unpin not necessary\n", bo);
 750		return 0;
 751	}
 752	bo->pin_count--;
 753	if (bo->pin_count)
 754		return 0;
 755	for (i = 0; i < bo->placement.num_placement; i++) {
 756		bo->placements[i].lpfn = 0;
 757		bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT;
 758	}
 759	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
 760	if (unlikely(r)) {
 761		dev_err(adev->dev, "%p validate failed for unpin\n", bo);
 762		goto error;
 763	}
 764
 765	if (bo->tbo.mem.mem_type == TTM_PL_VRAM) {
 766		adev->vram_pin_size -= amdgpu_bo_size(bo);
 767		if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
 768			adev->invisible_pin_size -= amdgpu_bo_size(bo);
 769	} else if (bo->tbo.mem.mem_type == TTM_PL_TT) {
 770		adev->gart_pin_size -= amdgpu_bo_size(bo);
 771	}
 772
 773error:
 774	return r;
 775}
 776
 777int amdgpu_bo_evict_vram(struct amdgpu_device *adev)
 778{
 779	/* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */
 780	if (0 && (adev->flags & AMD_IS_APU)) {
 781		/* Useless to evict on IGP chips */
 782		return 0;
 783	}
 784	return ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_VRAM);
 785}
 786
 787static const char *amdgpu_vram_names[] = {
 788	"UNKNOWN",
 789	"GDDR1",
 790	"DDR2",
 791	"GDDR3",
 792	"GDDR4",
 793	"GDDR5",
 794	"HBM",
 795	"DDR3"
 796};
 797
 798int amdgpu_bo_init(struct amdgpu_device *adev)
 799{
 800	/* reserve PAT memory space to WC for VRAM */
 801	arch_io_reserve_memtype_wc(adev->mc.aper_base,
 802				   adev->mc.aper_size);
 803
 804	/* Add an MTRR for the VRAM */
 805	adev->mc.vram_mtrr = arch_phys_wc_add(adev->mc.aper_base,
 806					      adev->mc.aper_size);
 807	DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n",
 808		adev->mc.mc_vram_size >> 20,
 809		(unsigned long long)adev->mc.aper_size >> 20);
 810	DRM_INFO("RAM width %dbits %s\n",
 811		 adev->mc.vram_width, amdgpu_vram_names[adev->mc.vram_type]);
 812	return amdgpu_ttm_init(adev);
 813}
 814
 815void amdgpu_bo_fini(struct amdgpu_device *adev)
 816{
 817	amdgpu_ttm_fini(adev);
 818	arch_phys_wc_del(adev->mc.vram_mtrr);
 819	arch_io_free_memtype_wc(adev->mc.aper_base, adev->mc.aper_size);
 820}
 821
 822int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,
 823			     struct vm_area_struct *vma)
 824{
 825	return ttm_fbdev_mmap(vma, &bo->tbo);
 826}
 827
 828int amdgpu_bo_set_tiling_flags(struct amdgpu_bo *bo, u64 tiling_flags)
 829{
 830	if (AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT) > 6)
 831		return -EINVAL;
 832
 833	bo->tiling_flags = tiling_flags;
 834	return 0;
 835}
 836
 837void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags)
 838{
 839	lockdep_assert_held(&bo->tbo.resv->lock.base);
 840
 841	if (tiling_flags)
 842		*tiling_flags = bo->tiling_flags;
 843}
 844
 845int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata,
 846			    uint32_t metadata_size, uint64_t flags)
 847{
 848	void *buffer;
 849
 850	if (!metadata_size) {
 851		if (bo->metadata_size) {
 852			kfree(bo->metadata);
 853			bo->metadata = NULL;
 854			bo->metadata_size = 0;
 855		}
 856		return 0;
 857	}
 858
 859	if (metadata == NULL)
 860		return -EINVAL;
 861
 862	buffer = kmemdup(metadata, metadata_size, GFP_KERNEL);
 863	if (buffer == NULL)
 864		return -ENOMEM;
 865
 866	kfree(bo->metadata);
 867	bo->metadata_flags = flags;
 868	bo->metadata = buffer;
 869	bo->metadata_size = metadata_size;
 870
 871	return 0;
 872}
 873
 874int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
 875			   size_t buffer_size, uint32_t *metadata_size,
 876			   uint64_t *flags)
 877{
 878	if (!buffer && !metadata_size)
 879		return -EINVAL;
 880
 881	if (buffer) {
 882		if (buffer_size < bo->metadata_size)
 883			return -EINVAL;
 884
 885		if (bo->metadata_size)
 886			memcpy(buffer, bo->metadata, bo->metadata_size);
 887	}
 888
 889	if (metadata_size)
 890		*metadata_size = bo->metadata_size;
 891	if (flags)
 892		*flags = bo->metadata_flags;
 893
 894	return 0;
 895}
 896
 897void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
 898			   bool evict,
 899			   struct ttm_mem_reg *new_mem)
 900{
 901	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
 902	struct amdgpu_bo *abo;
 903	struct ttm_mem_reg *old_mem = &bo->mem;
 904
 905	if (!amdgpu_ttm_bo_is_amdgpu_bo(bo))
 906		return;
 907
 908	abo = container_of(bo, struct amdgpu_bo, tbo);
 909	amdgpu_vm_bo_invalidate(adev, abo);
 910
 911	/* remember the eviction */
 912	if (evict)
 913		atomic64_inc(&adev->num_evictions);
 914
 915	/* update statistics */
 916	if (!new_mem)
 917		return;
 918
 919	/* move_notify is called before move happens */
 920	amdgpu_update_memory_usage(adev, &bo->mem, new_mem);
 921
 922	trace_amdgpu_ttm_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
 923}
 924
 925int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 926{
 927	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
 928	struct amdgpu_bo *abo;
 929	unsigned long offset, size, lpfn;
 930	int i, r;
 931
 932	if (!amdgpu_ttm_bo_is_amdgpu_bo(bo))
 933		return 0;
 934
 935	abo = container_of(bo, struct amdgpu_bo, tbo);
 936	if (bo->mem.mem_type != TTM_PL_VRAM)
 937		return 0;
 938
 939	size = bo->mem.num_pages << PAGE_SHIFT;
 940	offset = bo->mem.start << PAGE_SHIFT;
 941	/* TODO: figure out how to map scattered VRAM to the CPU */
 942	if ((offset + size) <= adev->mc.visible_vram_size &&
 943	    (abo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS))
 944		return 0;
 945
 946	/* Can't move a pinned BO to visible VRAM */
 947	if (abo->pin_count > 0)
 948		return -EINVAL;
 949
 950	/* hurrah the memory is not visible ! */
 951	abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
 952	amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM);
 953	lpfn =	adev->mc.visible_vram_size >> PAGE_SHIFT;
 954	for (i = 0; i < abo->placement.num_placement; i++) {
 955		/* Force into visible VRAM */
 956		if ((abo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
 957		    (!abo->placements[i].lpfn ||
 958		     abo->placements[i].lpfn > lpfn))
 959			abo->placements[i].lpfn = lpfn;
 960	}
 961	r = ttm_bo_validate(bo, &abo->placement, false, false);
 962	if (unlikely(r == -ENOMEM)) {
 963		amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT);
 964		return ttm_bo_validate(bo, &abo->placement, false, false);
 965	} else if (unlikely(r != 0)) {
 966		return r;
 967	}
 968
 969	offset = bo->mem.start << PAGE_SHIFT;
 970	/* this should never happen */
 971	if ((offset + size) > adev->mc.visible_vram_size)
 972		return -EINVAL;
 973
 974	return 0;
 975}
 976
 977/**
 978 * amdgpu_bo_fence - add fence to buffer object
 979 *
 980 * @bo: buffer object in question
 981 * @fence: fence to add
 982 * @shared: true if fence should be added shared
 983 *
 984 */
 985void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
 986		     bool shared)
 987{
 988	struct reservation_object *resv = bo->tbo.resv;
 989
 990	if (shared)
 991		reservation_object_add_shared_fence(resv, fence);
 992	else
 993		reservation_object_add_excl_fence(resv, fence);
 994}
 995
 996/**
 997 * amdgpu_bo_gpu_offset - return GPU offset of bo
 998 * @bo:	amdgpu object for which we query the offset
 999 *
1000 * Returns current GPU offset of the object.
1001 *
1002 * Note: object should either be pinned or reserved when calling this
1003 * function, it might be useful to add check for this for debugging.
1004 */
1005u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
1006{
1007	WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM);
1008	WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_TT &&
1009		     !amdgpu_ttm_is_bound(bo->tbo.ttm));
1010	WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) &&
1011		     !bo->pin_count);
1012	WARN_ON_ONCE(bo->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET);
1013	WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_VRAM &&
1014		     !(bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS));
1015
1016	return bo->tbo.offset;
1017}