Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25#include <linux/sched/mm.h>
26#include <linux/dma-fence-array.h>
27
28#include <drm/drm_gem.h>
29#include <drm/drm_print.h>
30#include <drm/intel/display_parent_interface.h>
31
32#include "display/intel_fb.h"
33#include "display/intel_frontbuffer.h"
34#include "gem/i915_gem_lmem.h"
35#include "gem/i915_gem_object_frontbuffer.h"
36#include "gem/i915_gem_tiling.h"
37#include "gt/intel_engine.h"
38#include "gt/intel_engine_heartbeat.h"
39#include "gt/intel_gt.h"
40#include "gt/intel_gt_pm.h"
41#include "gt/intel_gt_requests.h"
42#include "gt/intel_tlb.h"
43
44#include "i915_drv.h"
45#include "i915_gem_evict.h"
46#include "i915_sw_fence_work.h"
47#include "i915_trace.h"
48#include "i915_vma.h"
49#include "i915_vma_resource.h"
50
51static inline void assert_vma_held_evict(const struct i915_vma *vma)
52{
53 /*
54 * We may be forced to unbind when the vm is dead, to clean it up.
55 * This is the only exception to the requirement of the object lock
56 * being held.
57 */
58 if (kref_read(&vma->vm->ref))
59 assert_object_held_shared(vma->obj);
60}
61
62static struct kmem_cache *slab_vmas;
63
64static struct i915_vma *i915_vma_alloc(void)
65{
66 return kmem_cache_zalloc(slab_vmas, GFP_KERNEL);
67}
68
69static void i915_vma_free(struct i915_vma *vma)
70{
71 return kmem_cache_free(slab_vmas, vma);
72}
73
74#if IS_ENABLED(CONFIG_DRM_I915_ERRLOG_GEM) && IS_ENABLED(CONFIG_DRM_DEBUG_MM)
75
76#include <linux/stackdepot.h>
77
78static void vma_print_allocator(struct i915_vma *vma, const char *reason)
79{
80 char buf[512];
81
82 if (!vma->node.stack) {
83 drm_dbg(vma->obj->base.dev,
84 "vma.node [%08llx + %08llx] %s: unknown owner\n",
85 vma->node.start, vma->node.size, reason);
86 return;
87 }
88
89 stack_depot_snprint(vma->node.stack, buf, sizeof(buf), 0);
90 drm_dbg(vma->obj->base.dev,
91 "vma.node [%08llx + %08llx] %s: inserted at %s\n",
92 vma->node.start, vma->node.size, reason, buf);
93}
94
95#else
96
97static void vma_print_allocator(struct i915_vma *vma, const char *reason)
98{
99}
100
101#endif
102
103static inline struct i915_vma *active_to_vma(struct i915_active *ref)
104{
105 return container_of(ref, typeof(struct i915_vma), active);
106}
107
108static int __i915_vma_active(struct i915_active *ref)
109{
110 struct i915_vma *vma = active_to_vma(ref);
111
112 if (!i915_vma_tryget(vma))
113 return -ENOENT;
114
115 /*
116 * Exclude global GTT VMA from holding a GT wakeref
117 * while active, otherwise GPU never goes idle.
118 */
119 if (!i915_vma_is_ggtt(vma)) {
120 /*
121 * Since we and our _retire() counterpart can be
122 * called asynchronously, storing a wakeref tracking
123 * handle inside struct i915_vma is not safe, and
124 * there is no other good place for that. Hence,
125 * use untracked variants of intel_gt_pm_get/put().
126 */
127 intel_gt_pm_get_untracked(vma->vm->gt);
128 }
129
130 return 0;
131}
132
133static void __i915_vma_retire(struct i915_active *ref)
134{
135 struct i915_vma *vma = active_to_vma(ref);
136
137 if (!i915_vma_is_ggtt(vma)) {
138 /*
139 * Since we can be called from atomic contexts,
140 * use an async variant of intel_gt_pm_put().
141 */
142 intel_gt_pm_put_async_untracked(vma->vm->gt);
143 }
144
145 i915_vma_put(vma);
146}
147
148static struct i915_vma *
149vma_create(struct drm_i915_gem_object *obj,
150 struct i915_address_space *vm,
151 const struct i915_gtt_view *view)
152{
153 struct i915_vma *pos = ERR_PTR(-E2BIG);
154 struct i915_vma *vma;
155 struct rb_node *rb, **p;
156 int err;
157
158 /* The aliasing_ppgtt should never be used directly! */
159 GEM_BUG_ON(vm == &vm->gt->ggtt->alias->vm);
160
161 vma = i915_vma_alloc();
162 if (vma == NULL)
163 return ERR_PTR(-ENOMEM);
164
165 vma->ops = &vm->vma_ops;
166 vma->obj = obj;
167 vma->size = obj->base.size;
168 vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
169
170 i915_active_init(&vma->active, __i915_vma_active, __i915_vma_retire, 0);
171
172 /* Declare ourselves safe for use inside shrinkers */
173 if (IS_ENABLED(CONFIG_LOCKDEP)) {
174 fs_reclaim_acquire(GFP_KERNEL);
175 might_lock(&vma->active.mutex);
176 fs_reclaim_release(GFP_KERNEL);
177 }
178
179 INIT_LIST_HEAD(&vma->closed_link);
180 INIT_LIST_HEAD(&vma->obj_link);
181 RB_CLEAR_NODE(&vma->obj_node);
182
183 if (view && view->type != I915_GTT_VIEW_NORMAL) {
184 vma->gtt_view = *view;
185 if (view->type == I915_GTT_VIEW_PARTIAL) {
186 GEM_BUG_ON(range_overflows_t(u64,
187 view->partial.offset,
188 view->partial.size,
189 obj->base.size >> PAGE_SHIFT));
190 vma->size = view->partial.size;
191 vma->size <<= PAGE_SHIFT;
192 GEM_BUG_ON(vma->size > obj->base.size);
193 } else if (view->type == I915_GTT_VIEW_ROTATED) {
194 vma->size = intel_rotation_info_size(&view->rotated);
195 vma->size <<= PAGE_SHIFT;
196 } else if (view->type == I915_GTT_VIEW_REMAPPED) {
197 vma->size = intel_remapped_info_size(&view->remapped);
198 vma->size <<= PAGE_SHIFT;
199 }
200 }
201
202 if (unlikely(vma->size > vm->total))
203 goto err_vma;
204
205 GEM_BUG_ON(!IS_ALIGNED(vma->size, I915_GTT_PAGE_SIZE));
206
207 err = mutex_lock_interruptible(&vm->mutex);
208 if (err) {
209 pos = ERR_PTR(err);
210 goto err_vma;
211 }
212
213 vma->vm = vm;
214 list_add_tail(&vma->vm_link, &vm->unbound_list);
215
216 spin_lock(&obj->vma.lock);
217 if (i915_is_ggtt(vm)) {
218 if (unlikely(overflows_type(vma->size, u32)))
219 goto err_unlock;
220
221 vma->fence_size = i915_gem_fence_size(vm->i915, vma->size,
222 i915_gem_object_get_tiling(obj),
223 i915_gem_object_get_stride(obj));
224 if (unlikely(vma->fence_size < vma->size || /* overflow */
225 vma->fence_size > vm->total))
226 goto err_unlock;
227
228 GEM_BUG_ON(!IS_ALIGNED(vma->fence_size, I915_GTT_MIN_ALIGNMENT));
229
230 vma->fence_alignment = i915_gem_fence_alignment(vm->i915, vma->size,
231 i915_gem_object_get_tiling(obj),
232 i915_gem_object_get_stride(obj));
233 GEM_BUG_ON(!is_power_of_2(vma->fence_alignment));
234
235 __set_bit(I915_VMA_GGTT_BIT, __i915_vma_flags(vma));
236 }
237
238 rb = NULL;
239 p = &obj->vma.tree.rb_node;
240 while (*p) {
241 long cmp;
242
243 rb = *p;
244 pos = rb_entry(rb, struct i915_vma, obj_node);
245
246 /*
247 * If the view already exists in the tree, another thread
248 * already created a matching vma, so return the older instance
249 * and dispose of ours.
250 */
251 cmp = i915_vma_compare(pos, vm, view);
252 if (cmp < 0)
253 p = &rb->rb_right;
254 else if (cmp > 0)
255 p = &rb->rb_left;
256 else
257 goto err_unlock;
258 }
259 rb_link_node(&vma->obj_node, rb, p);
260 rb_insert_color(&vma->obj_node, &obj->vma.tree);
261
262 if (i915_vma_is_ggtt(vma))
263 /*
264 * We put the GGTT vma at the start of the vma-list, followed
265 * by the ppGGTT vma. This allows us to break early when
266 * iterating over only the GGTT vma for an object, see
267 * for_each_ggtt_vma()
268 */
269 list_add(&vma->obj_link, &obj->vma.list);
270 else
271 list_add_tail(&vma->obj_link, &obj->vma.list);
272
273 spin_unlock(&obj->vma.lock);
274 mutex_unlock(&vm->mutex);
275
276 return vma;
277
278err_unlock:
279 spin_unlock(&obj->vma.lock);
280 list_del_init(&vma->vm_link);
281 mutex_unlock(&vm->mutex);
282err_vma:
283 i915_vma_free(vma);
284 return pos;
285}
286
287static struct i915_vma *
288i915_vma_lookup(struct drm_i915_gem_object *obj,
289 struct i915_address_space *vm,
290 const struct i915_gtt_view *view)
291{
292 struct rb_node *rb;
293
294 rb = obj->vma.tree.rb_node;
295 while (rb) {
296 struct i915_vma *vma = rb_entry(rb, struct i915_vma, obj_node);
297 long cmp;
298
299 cmp = i915_vma_compare(vma, vm, view);
300 if (cmp == 0)
301 return vma;
302
303 if (cmp < 0)
304 rb = rb->rb_right;
305 else
306 rb = rb->rb_left;
307 }
308
309 return NULL;
310}
311
312/**
313 * i915_vma_instance - return the singleton instance of the VMA
314 * @obj: parent &struct drm_i915_gem_object to be mapped
315 * @vm: address space in which the mapping is located
316 * @view: additional mapping requirements
317 *
318 * i915_vma_instance() looks up an existing VMA of the @obj in the @vm with
319 * the same @view characteristics. If a match is not found, one is created.
320 * Once created, the VMA is kept until either the object is freed, or the
321 * address space is closed.
322 *
323 * Returns the vma, or an error pointer.
324 */
325struct i915_vma *
326i915_vma_instance(struct drm_i915_gem_object *obj,
327 struct i915_address_space *vm,
328 const struct i915_gtt_view *view)
329{
330 struct i915_vma *vma;
331
332 GEM_BUG_ON(view && !i915_is_ggtt_or_dpt(vm));
333 GEM_BUG_ON(!kref_read(&vm->ref));
334
335 spin_lock(&obj->vma.lock);
336 vma = i915_vma_lookup(obj, vm, view);
337 spin_unlock(&obj->vma.lock);
338
339 /* vma_create() will resolve the race if another creates the vma */
340 if (unlikely(!vma))
341 vma = vma_create(obj, vm, view);
342
343 GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view));
344 return vma;
345}
346
347struct i915_vma_work {
348 struct dma_fence_work base;
349 struct i915_address_space *vm;
350 struct i915_vm_pt_stash stash;
351 struct i915_vma_resource *vma_res;
352 struct drm_i915_gem_object *obj;
353 struct i915_sw_dma_fence_cb cb;
354 unsigned int pat_index;
355 unsigned int flags;
356};
357
358static void __vma_bind(struct dma_fence_work *work)
359{
360 struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
361 struct i915_vma_resource *vma_res = vw->vma_res;
362
363 /*
364 * We are about the bind the object, which must mean we have already
365 * signaled the work to potentially clear/move the pages underneath. If
366 * something went wrong at that stage then the object should have
367 * unknown_state set, in which case we need to skip the bind.
368 */
369 if (i915_gem_object_has_unknown_state(vw->obj))
370 return;
371
372 vma_res->ops->bind_vma(vma_res->vm, &vw->stash,
373 vma_res, vw->pat_index, vw->flags);
374}
375
376static void __vma_release(struct dma_fence_work *work)
377{
378 struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
379
380 if (vw->obj)
381 i915_gem_object_put(vw->obj);
382
383 i915_vm_free_pt_stash(vw->vm, &vw->stash);
384 if (vw->vma_res)
385 i915_vma_resource_put(vw->vma_res);
386}
387
388static const struct dma_fence_work_ops bind_ops = {
389 .name = "bind",
390 .work = __vma_bind,
391 .release = __vma_release,
392};
393
394struct i915_vma_work *i915_vma_work(void)
395{
396 struct i915_vma_work *vw;
397
398 vw = kzalloc_obj(*vw);
399 if (!vw)
400 return NULL;
401
402 dma_fence_work_init(&vw->base, &bind_ops);
403 vw->base.dma.error = -EAGAIN; /* disable the worker by default */
404
405 return vw;
406}
407
408int i915_vma_wait_for_bind(struct i915_vma *vma)
409{
410 int err = 0;
411
412 if (rcu_access_pointer(vma->active.excl.fence)) {
413 struct dma_fence *fence;
414
415 rcu_read_lock();
416 fence = dma_fence_get_rcu_safe(&vma->active.excl.fence);
417 rcu_read_unlock();
418 if (fence) {
419 err = dma_fence_wait(fence, true);
420 dma_fence_put(fence);
421 }
422 }
423
424 return err;
425}
426
427#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
428static int i915_vma_verify_bind_complete(struct i915_vma *vma)
429{
430 struct dma_fence *fence = i915_active_fence_get(&vma->active.excl);
431 int err;
432
433 if (!fence)
434 return 0;
435
436 if (dma_fence_is_signaled(fence))
437 err = fence->error;
438 else
439 err = -EBUSY;
440
441 dma_fence_put(fence);
442
443 return err;
444}
445#else
446#define i915_vma_verify_bind_complete(_vma) 0
447#endif
448
449I915_SELFTEST_EXPORT void
450i915_vma_resource_init_from_vma(struct i915_vma_resource *vma_res,
451 struct i915_vma *vma)
452{
453 struct drm_i915_gem_object *obj = vma->obj;
454
455 i915_vma_resource_init(vma_res, vma->vm, vma->pages, &vma->page_sizes,
456 obj->mm.rsgt, i915_gem_object_is_readonly(obj),
457 i915_gem_object_is_lmem(obj), obj->mm.region,
458 vma->ops, vma->private, __i915_vma_offset(vma),
459 __i915_vma_size(vma), vma->size, vma->guard);
460}
461
462/**
463 * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
464 * @vma: VMA to map
465 * @pat_index: PAT index to set in PTE
466 * @flags: flags like global or local mapping
467 * @work: preallocated worker for allocating and binding the PTE
468 * @vma_res: pointer to a preallocated vma resource. The resource is either
469 * consumed or freed.
470 *
471 * DMA addresses are taken from the scatter-gather table of this object (or of
472 * this VMA in case of non-default GGTT views) and PTE entries set up.
473 * Note that DMA addresses are also the only part of the SG table we care about.
474 */
475int i915_vma_bind(struct i915_vma *vma,
476 unsigned int pat_index,
477 u32 flags,
478 struct i915_vma_work *work,
479 struct i915_vma_resource *vma_res)
480{
481 u32 bind_flags;
482 u32 vma_flags;
483 int ret;
484
485 lockdep_assert_held(&vma->vm->mutex);
486 GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
487 GEM_BUG_ON(vma->size > i915_vma_size(vma));
488
489 if (GEM_DEBUG_WARN_ON(range_overflows(vma->node.start,
490 vma->node.size,
491 vma->vm->total))) {
492 i915_vma_resource_free(vma_res);
493 return -ENODEV;
494 }
495
496 if (GEM_DEBUG_WARN_ON(!flags)) {
497 i915_vma_resource_free(vma_res);
498 return -EINVAL;
499 }
500
501 bind_flags = flags;
502 bind_flags &= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
503
504 vma_flags = atomic_read(&vma->flags);
505 vma_flags &= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
506
507 bind_flags &= ~vma_flags;
508 if (bind_flags == 0) {
509 i915_vma_resource_free(vma_res);
510 return 0;
511 }
512
513 GEM_BUG_ON(!atomic_read(&vma->pages_count));
514
515 /* Wait for or await async unbinds touching our range */
516 if (work && bind_flags & vma->vm->bind_async_flags)
517 ret = i915_vma_resource_bind_dep_await(vma->vm,
518 &work->base.chain,
519 vma->node.start,
520 vma->node.size,
521 true,
522 GFP_NOWAIT |
523 __GFP_RETRY_MAYFAIL |
524 __GFP_NOWARN);
525 else
526 ret = i915_vma_resource_bind_dep_sync(vma->vm, vma->node.start,
527 vma->node.size, true);
528 if (ret) {
529 i915_vma_resource_free(vma_res);
530 return ret;
531 }
532
533 if (vma->resource || !vma_res) {
534 /* Rebinding with an additional I915_VMA_*_BIND */
535 GEM_WARN_ON(!vma_flags);
536 i915_vma_resource_free(vma_res);
537 } else {
538 i915_vma_resource_init_from_vma(vma_res, vma);
539 vma->resource = vma_res;
540 }
541 trace_i915_vma_bind(vma, bind_flags);
542 if (work && bind_flags & vma->vm->bind_async_flags) {
543 struct dma_fence *prev;
544
545 work->vma_res = i915_vma_resource_get(vma->resource);
546 work->pat_index = pat_index;
547 work->flags = bind_flags;
548
549 /*
550 * Note we only want to chain up to the migration fence on
551 * the pages (not the object itself). As we don't track that,
552 * yet, we have to use the exclusive fence instead.
553 *
554 * Also note that we do not want to track the async vma as
555 * part of the obj->resv->excl_fence as it only affects
556 * execution and not content or object's backing store lifetime.
557 */
558 prev = i915_active_set_exclusive(&vma->active, &work->base.dma);
559 if (prev) {
560 __i915_sw_fence_await_dma_fence(&work->base.chain,
561 prev,
562 &work->cb);
563 dma_fence_put(prev);
564 }
565
566 work->base.dma.error = 0; /* enable the queue_work() */
567 work->obj = i915_gem_object_get(vma->obj);
568 } else {
569 ret = i915_gem_object_wait_moving_fence(vma->obj, true);
570 if (ret) {
571 i915_vma_resource_free(vma->resource);
572 vma->resource = NULL;
573
574 return ret;
575 }
576 vma->ops->bind_vma(vma->vm, NULL, vma->resource, pat_index,
577 bind_flags);
578 }
579
580 atomic_or(bind_flags, &vma->flags);
581 return 0;
582}
583
584void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
585{
586 void __iomem *ptr;
587 int err;
588
589 if (WARN_ON_ONCE(vma->obj->flags & I915_BO_ALLOC_GPU_ONLY))
590 return IOMEM_ERR_PTR(-EINVAL);
591
592 GEM_BUG_ON(!i915_vma_is_ggtt(vma));
593 GEM_BUG_ON(!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND));
594 GEM_BUG_ON(i915_vma_verify_bind_complete(vma));
595
596 ptr = READ_ONCE(vma->iomap);
597 if (ptr == NULL) {
598 /*
599 * TODO: consider just using i915_gem_object_pin_map() for lmem
600 * instead, which already supports mapping non-contiguous chunks
601 * of pages, that way we can also drop the
602 * I915_BO_ALLOC_CONTIGUOUS when allocating the object.
603 */
604 if (i915_gem_object_is_lmem(vma->obj)) {
605 ptr = i915_gem_object_lmem_io_map(vma->obj, 0,
606 vma->obj->base.size);
607 } else if (i915_vma_is_map_and_fenceable(vma)) {
608 ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->iomap,
609 i915_vma_offset(vma),
610 i915_vma_size(vma));
611 } else {
612 ptr = (void __iomem *)
613 i915_gem_object_pin_map(vma->obj, I915_MAP_WC);
614 if (IS_ERR(ptr)) {
615 err = PTR_ERR(ptr);
616 goto err;
617 }
618 ptr = page_pack_bits(ptr, 1);
619 }
620
621 if (ptr == NULL) {
622 err = -ENOMEM;
623 goto err;
624 }
625
626 if (unlikely(cmpxchg(&vma->iomap, NULL, ptr))) {
627 if (page_unmask_bits(ptr))
628 __i915_gem_object_release_map(vma->obj);
629 else
630 io_mapping_unmap(ptr);
631 ptr = vma->iomap;
632 }
633 }
634
635 __i915_vma_pin(vma);
636
637 err = i915_vma_pin_fence(vma);
638 if (err)
639 goto err_unpin;
640
641 i915_vma_set_ggtt_write(vma);
642
643 /* NB Access through the GTT requires the device to be awake. */
644 return page_mask_bits(ptr);
645
646err_unpin:
647 __i915_vma_unpin(vma);
648err:
649 return IOMEM_ERR_PTR(err);
650}
651
652void i915_vma_flush_writes(struct i915_vma *vma)
653{
654 if (i915_vma_unset_ggtt_write(vma))
655 intel_gt_flush_ggtt_writes(vma->vm->gt);
656}
657
658void i915_vma_unpin_iomap(struct i915_vma *vma)
659{
660 GEM_BUG_ON(vma->iomap == NULL);
661
662 /* XXX We keep the mapping until __i915_vma_unbind()/evict() */
663
664 i915_vma_flush_writes(vma);
665
666 i915_vma_unpin_fence(vma);
667 i915_vma_unpin(vma);
668}
669
670void i915_vma_unpin_and_release(struct i915_vma **p_vma, unsigned int flags)
671{
672 struct i915_vma *vma;
673 struct drm_i915_gem_object *obj;
674
675 vma = fetch_and_zero(p_vma);
676 if (!vma)
677 return;
678
679 obj = vma->obj;
680 GEM_BUG_ON(!obj);
681
682 i915_vma_unpin(vma);
683
684 if (flags & I915_VMA_RELEASE_MAP)
685 i915_gem_object_unpin_map(obj);
686
687 i915_gem_object_put(obj);
688}
689
690bool i915_vma_misplaced(const struct i915_vma *vma,
691 u64 size, u64 alignment, u64 flags)
692{
693 if (!drm_mm_node_allocated(&vma->node))
694 return false;
695
696 if (test_bit(I915_VMA_ERROR_BIT, __i915_vma_flags(vma)))
697 return true;
698
699 if (i915_vma_size(vma) < size)
700 return true;
701
702 GEM_BUG_ON(alignment && !is_power_of_2(alignment));
703 if (alignment && !IS_ALIGNED(i915_vma_offset(vma), alignment))
704 return true;
705
706 if (flags & PIN_MAPPABLE && !i915_vma_is_map_and_fenceable(vma))
707 return true;
708
709 if (flags & PIN_OFFSET_BIAS &&
710 i915_vma_offset(vma) < (flags & PIN_OFFSET_MASK))
711 return true;
712
713 if (flags & PIN_OFFSET_FIXED &&
714 i915_vma_offset(vma) != (flags & PIN_OFFSET_MASK))
715 return true;
716
717 if (flags & PIN_OFFSET_GUARD &&
718 vma->guard < (flags & PIN_OFFSET_MASK))
719 return true;
720
721 return false;
722}
723
724void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
725{
726 bool mappable, fenceable;
727
728 GEM_BUG_ON(!i915_vma_is_ggtt(vma));
729 GEM_BUG_ON(!vma->fence_size);
730
731 fenceable = (i915_vma_size(vma) >= vma->fence_size &&
732 IS_ALIGNED(i915_vma_offset(vma), vma->fence_alignment));
733
734 mappable = i915_ggtt_offset(vma) + vma->fence_size <=
735 i915_vm_to_ggtt(vma->vm)->mappable_end;
736
737 if (mappable && fenceable)
738 set_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma));
739 else
740 clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma));
741}
742
743bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color)
744{
745 struct drm_mm_node *node = &vma->node;
746 struct drm_mm_node *other;
747
748 /*
749 * On some machines we have to be careful when putting differing types
750 * of snoopable memory together to avoid the prefetcher crossing memory
751 * domains and dying. During vm initialisation, we decide whether or not
752 * these constraints apply and set the drm_mm.color_adjust
753 * appropriately.
754 */
755 if (!i915_vm_has_cache_coloring(vma->vm))
756 return true;
757
758 /* Only valid to be called on an already inserted vma */
759 GEM_BUG_ON(!drm_mm_node_allocated(node));
760 GEM_BUG_ON(list_empty(&node->node_list));
761
762 other = list_prev_entry(node, node_list);
763 if (i915_node_color_differs(other, color) &&
764 !drm_mm_hole_follows(other))
765 return false;
766
767 other = list_next_entry(node, node_list);
768 if (i915_node_color_differs(other, color) &&
769 !drm_mm_hole_follows(node))
770 return false;
771
772 return true;
773}
774
775/**
776 * i915_vma_insert - finds a slot for the vma in its address space
777 * @vma: the vma
778 * @ww: An optional struct i915_gem_ww_ctx
779 * @size: requested size in bytes (can be larger than the VMA)
780 * @alignment: required alignment
781 * @flags: mask of PIN_* flags to use
782 *
783 * First we try to allocate some free space that meets the requirements for
784 * the VMA. Failing that, if the flags permit, it will evict an old VMA,
785 * preferably the oldest idle entry to make room for the new VMA.
786 *
787 * Returns:
788 * 0 on success, negative error code otherwise.
789 */
790static int
791i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
792 u64 size, u64 alignment, u64 flags)
793{
794 unsigned long color, guard;
795 u64 start, end;
796 int ret;
797
798 GEM_BUG_ON(i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
799 GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
800 GEM_BUG_ON(hweight64(flags & (PIN_OFFSET_GUARD | PIN_OFFSET_FIXED | PIN_OFFSET_BIAS)) > 1);
801
802 size = max(size, vma->size);
803 alignment = max_t(typeof(alignment), alignment, vma->display_alignment);
804 if (flags & PIN_MAPPABLE) {
805 size = max_t(typeof(size), size, vma->fence_size);
806 alignment = max_t(typeof(alignment),
807 alignment, vma->fence_alignment);
808 }
809
810 GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
811 GEM_BUG_ON(!IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT));
812 GEM_BUG_ON(!is_power_of_2(alignment));
813
814 guard = vma->guard; /* retain guard across rebinds */
815 if (flags & PIN_OFFSET_GUARD) {
816 GEM_BUG_ON(overflows_type(flags & PIN_OFFSET_MASK, u32));
817 guard = max_t(u32, guard, flags & PIN_OFFSET_MASK);
818 }
819 /*
820 * As we align the node upon insertion, but the hardware gets
821 * node.start + guard, the easiest way to make that work is
822 * to make the guard a multiple of the alignment size.
823 */
824 guard = ALIGN(guard, alignment);
825
826 start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
827 GEM_BUG_ON(!IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
828
829 end = vma->vm->total;
830 if (flags & PIN_MAPPABLE)
831 end = min_t(u64, end, i915_vm_to_ggtt(vma->vm)->mappable_end);
832 if (flags & PIN_ZONE_4G)
833 end = min_t(u64, end, (1ULL << 32) - I915_GTT_PAGE_SIZE);
834 GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
835
836 alignment = max(alignment, i915_vm_obj_min_alignment(vma->vm, vma->obj));
837
838 /*
839 * If binding the object/GGTT view requires more space than the entire
840 * aperture has, reject it early before evicting everything in a vain
841 * attempt to find space.
842 */
843 if (size > end - 2 * guard) {
844 drm_dbg(vma->obj->base.dev,
845 "Attempting to bind an object larger than the aperture: request=%llu > %s aperture=%llu\n",
846 size, flags & PIN_MAPPABLE ? "mappable" : "total", end);
847 return -ENOSPC;
848 }
849
850 color = 0;
851
852 if (i915_vm_has_cache_coloring(vma->vm))
853 color = vma->obj->pat_index;
854
855 if (flags & PIN_OFFSET_FIXED) {
856 u64 offset = flags & PIN_OFFSET_MASK;
857 if (!IS_ALIGNED(offset, alignment) ||
858 range_overflows(offset, size, end))
859 return -EINVAL;
860 /*
861 * The caller knows not of the guard added by others and
862 * requests for the offset of the start of its buffer
863 * to be fixed, which may not be the same as the position
864 * of the vma->node due to the guard pages.
865 */
866 if (offset < guard || offset + size > end - guard)
867 return -ENOSPC;
868
869 ret = i915_gem_gtt_reserve(vma->vm, ww, &vma->node,
870 size + 2 * guard,
871 offset - guard,
872 color, flags);
873 if (ret)
874 return ret;
875 } else {
876 size += 2 * guard;
877 /*
878 * We only support huge gtt pages through the 48b PPGTT,
879 * however we also don't want to force any alignment for
880 * objects which need to be tightly packed into the low 32bits.
881 *
882 * Note that we assume that GGTT are limited to 4GiB for the
883 * foreseeable future. See also i915_ggtt_offset().
884 */
885 if (upper_32_bits(end - 1) &&
886 vma->page_sizes.sg > I915_GTT_PAGE_SIZE &&
887 !HAS_64K_PAGES(vma->vm->i915)) {
888 /*
889 * We can't mix 64K and 4K PTEs in the same page-table
890 * (2M block), and so to avoid the ugliness and
891 * complexity of coloring we opt for just aligning 64K
892 * objects to 2M.
893 */
894 u64 page_alignment =
895 rounddown_pow_of_two(vma->page_sizes.sg |
896 I915_GTT_PAGE_SIZE_2M);
897
898 /*
899 * Check we don't expand for the limited Global GTT
900 * (mappable aperture is even more precious!). This
901 * also checks that we exclude the aliasing-ppgtt.
902 */
903 GEM_BUG_ON(i915_vma_is_ggtt(vma));
904
905 alignment = max(alignment, page_alignment);
906
907 if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K)
908 size = round_up(size, I915_GTT_PAGE_SIZE_2M);
909 }
910
911 ret = i915_gem_gtt_insert(vma->vm, ww, &vma->node,
912 size, alignment, color,
913 start, end, flags);
914 if (ret)
915 return ret;
916
917 GEM_BUG_ON(vma->node.start < start);
918 GEM_BUG_ON(vma->node.start + vma->node.size > end);
919 }
920 GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
921 GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, color));
922
923 list_move_tail(&vma->vm_link, &vma->vm->bound_list);
924 vma->guard = guard;
925
926 return 0;
927}
928
929static void
930i915_vma_detach(struct i915_vma *vma)
931{
932 GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
933 GEM_BUG_ON(i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
934
935 /*
936 * And finally now the object is completely decoupled from this
937 * vma, we can drop its hold on the backing storage and allow
938 * it to be reaped by the shrinker.
939 */
940 list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
941}
942
943static bool try_qad_pin(struct i915_vma *vma, unsigned int flags)
944{
945 unsigned int bound;
946
947 bound = atomic_read(&vma->flags);
948
949 if (flags & PIN_VALIDATE) {
950 flags &= I915_VMA_BIND_MASK;
951
952 return (flags & bound) == flags;
953 }
954
955 /* with the lock mandatory for unbind, we don't race here */
956 flags &= I915_VMA_BIND_MASK;
957 do {
958 if (unlikely(flags & ~bound))
959 return false;
960
961 if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR)))
962 return false;
963
964 GEM_BUG_ON(((bound + 1) & I915_VMA_PIN_MASK) == 0);
965 } while (!atomic_try_cmpxchg(&vma->flags, &bound, bound + 1));
966
967 return true;
968}
969
970static struct scatterlist *
971rotate_pages(struct drm_i915_gem_object *obj, unsigned int offset,
972 unsigned int width, unsigned int height,
973 unsigned int src_stride, unsigned int dst_stride,
974 struct sg_table *st, struct scatterlist *sg)
975{
976 unsigned int column, row;
977 pgoff_t src_idx;
978
979 for (column = 0; column < width; column++) {
980 unsigned int left;
981
982 src_idx = src_stride * (height - 1) + column + offset;
983 for (row = 0; row < height; row++) {
984 st->nents++;
985 /*
986 * We don't need the pages, but need to initialize
987 * the entries so the sg list can be happily traversed.
988 * The only thing we need are DMA addresses.
989 */
990 sg_set_page(sg, NULL, I915_GTT_PAGE_SIZE, 0);
991 sg_dma_address(sg) =
992 i915_gem_object_get_dma_address(obj, src_idx);
993 sg_dma_len(sg) = I915_GTT_PAGE_SIZE;
994 sg = sg_next(sg);
995 src_idx -= src_stride;
996 }
997
998 left = (dst_stride - height) * I915_GTT_PAGE_SIZE;
999
1000 if (!left)
1001 continue;
1002
1003 st->nents++;
1004
1005 /*
1006 * The DE ignores the PTEs for the padding tiles, the sg entry
1007 * here is just a convenience to indicate how many padding PTEs
1008 * to insert at this spot.
1009 */
1010 sg_set_page(sg, NULL, left, 0);
1011 sg_dma_address(sg) = 0;
1012 sg_dma_len(sg) = left;
1013 sg = sg_next(sg);
1014 }
1015
1016 return sg;
1017}
1018
1019static noinline struct sg_table *
1020intel_rotate_pages(struct intel_rotation_info *rot_info,
1021 struct drm_i915_gem_object *obj)
1022{
1023 unsigned int size = intel_rotation_info_size(rot_info);
1024 struct drm_i915_private *i915 = to_i915(obj->base.dev);
1025 struct sg_table *st;
1026 struct scatterlist *sg;
1027 int ret = -ENOMEM;
1028 int i;
1029
1030 /* Allocate target SG list. */
1031 st = kmalloc_obj(*st);
1032 if (!st)
1033 goto err_st_alloc;
1034
1035 ret = sg_alloc_table(st, size, GFP_KERNEL);
1036 if (ret)
1037 goto err_sg_alloc;
1038
1039 st->nents = 0;
1040 sg = st->sgl;
1041
1042 for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++)
1043 sg = rotate_pages(obj, rot_info->plane[i].offset,
1044 rot_info->plane[i].width, rot_info->plane[i].height,
1045 rot_info->plane[i].src_stride,
1046 rot_info->plane[i].dst_stride,
1047 st, sg);
1048
1049 return st;
1050
1051err_sg_alloc:
1052 kfree(st);
1053err_st_alloc:
1054
1055 drm_dbg(&i915->drm, "Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
1056 obj->base.size, rot_info->plane[0].width,
1057 rot_info->plane[0].height, size);
1058
1059 return ERR_PTR(ret);
1060}
1061
1062static struct scatterlist *
1063add_padding_pages(unsigned int count,
1064 struct sg_table *st, struct scatterlist *sg)
1065{
1066 st->nents++;
1067
1068 /*
1069 * The DE ignores the PTEs for the padding tiles, the sg entry
1070 * here is just a convenience to indicate how many padding PTEs
1071 * to insert at this spot.
1072 */
1073 sg_set_page(sg, NULL, count * I915_GTT_PAGE_SIZE, 0);
1074 sg_dma_address(sg) = 0;
1075 sg_dma_len(sg) = count * I915_GTT_PAGE_SIZE;
1076 sg = sg_next(sg);
1077
1078 return sg;
1079}
1080
1081static struct scatterlist *
1082remap_tiled_color_plane_pages(struct drm_i915_gem_object *obj,
1083 unsigned long offset, unsigned int alignment_pad,
1084 unsigned int width, unsigned int height,
1085 unsigned int src_stride, unsigned int dst_stride,
1086 struct sg_table *st, struct scatterlist *sg,
1087 unsigned int *gtt_offset)
1088{
1089 unsigned int row;
1090
1091 if (!width || !height)
1092 return sg;
1093
1094 if (alignment_pad)
1095 sg = add_padding_pages(alignment_pad, st, sg);
1096
1097 for (row = 0; row < height; row++) {
1098 unsigned int left = width * I915_GTT_PAGE_SIZE;
1099
1100 while (left) {
1101 dma_addr_t addr;
1102 unsigned int length;
1103
1104 /*
1105 * We don't need the pages, but need to initialize
1106 * the entries so the sg list can be happily traversed.
1107 * The only thing we need are DMA addresses.
1108 */
1109
1110 addr = i915_gem_object_get_dma_address_len(obj, offset, &length);
1111
1112 length = min(left, length);
1113
1114 st->nents++;
1115
1116 sg_set_page(sg, NULL, length, 0);
1117 sg_dma_address(sg) = addr;
1118 sg_dma_len(sg) = length;
1119 sg = sg_next(sg);
1120
1121 offset += length / I915_GTT_PAGE_SIZE;
1122 left -= length;
1123 }
1124
1125 offset += src_stride - width;
1126
1127 left = (dst_stride - width) * I915_GTT_PAGE_SIZE;
1128
1129 if (!left)
1130 continue;
1131
1132 sg = add_padding_pages(left >> PAGE_SHIFT, st, sg);
1133 }
1134
1135 *gtt_offset += alignment_pad + dst_stride * height;
1136
1137 return sg;
1138}
1139
1140static struct scatterlist *
1141remap_contiguous_pages(struct drm_i915_gem_object *obj,
1142 pgoff_t obj_offset,
1143 unsigned int count,
1144 struct sg_table *st, struct scatterlist *sg)
1145{
1146 struct scatterlist *iter;
1147 unsigned int offset;
1148
1149 iter = i915_gem_object_get_sg_dma(obj, obj_offset, &offset);
1150 GEM_BUG_ON(!iter);
1151
1152 do {
1153 unsigned int len;
1154
1155 len = min(sg_dma_len(iter) - (offset << PAGE_SHIFT),
1156 count << PAGE_SHIFT);
1157 sg_set_page(sg, NULL, len, 0);
1158 sg_dma_address(sg) =
1159 sg_dma_address(iter) + (offset << PAGE_SHIFT);
1160 sg_dma_len(sg) = len;
1161
1162 st->nents++;
1163 count -= len >> PAGE_SHIFT;
1164 if (count == 0)
1165 return sg;
1166
1167 sg = __sg_next(sg);
1168 iter = __sg_next(iter);
1169 offset = 0;
1170 } while (1);
1171}
1172
1173static struct scatterlist *
1174remap_linear_color_plane_pages(struct drm_i915_gem_object *obj,
1175 pgoff_t obj_offset, unsigned int alignment_pad,
1176 unsigned int size,
1177 struct sg_table *st, struct scatterlist *sg,
1178 unsigned int *gtt_offset)
1179{
1180 if (!size)
1181 return sg;
1182
1183 if (alignment_pad)
1184 sg = add_padding_pages(alignment_pad, st, sg);
1185
1186 sg = remap_contiguous_pages(obj, obj_offset, size, st, sg);
1187 sg = sg_next(sg);
1188
1189 *gtt_offset += alignment_pad + size;
1190
1191 return sg;
1192}
1193
1194static struct scatterlist *
1195remap_color_plane_pages(const struct intel_remapped_info *rem_info,
1196 struct drm_i915_gem_object *obj,
1197 int color_plane,
1198 struct sg_table *st, struct scatterlist *sg,
1199 unsigned int *gtt_offset)
1200{
1201 unsigned int alignment_pad = 0;
1202
1203 if (rem_info->plane_alignment)
1204 alignment_pad = ALIGN(*gtt_offset, rem_info->plane_alignment) - *gtt_offset;
1205
1206 if (rem_info->plane[color_plane].linear)
1207 sg = remap_linear_color_plane_pages(obj,
1208 rem_info->plane[color_plane].offset,
1209 alignment_pad,
1210 rem_info->plane[color_plane].size,
1211 st, sg,
1212 gtt_offset);
1213
1214 else
1215 sg = remap_tiled_color_plane_pages(obj,
1216 rem_info->plane[color_plane].offset,
1217 alignment_pad,
1218 rem_info->plane[color_plane].width,
1219 rem_info->plane[color_plane].height,
1220 rem_info->plane[color_plane].src_stride,
1221 rem_info->plane[color_plane].dst_stride,
1222 st, sg,
1223 gtt_offset);
1224
1225 return sg;
1226}
1227
1228static noinline struct sg_table *
1229intel_remap_pages(struct intel_remapped_info *rem_info,
1230 struct drm_i915_gem_object *obj)
1231{
1232 unsigned int size = intel_remapped_info_size(rem_info);
1233 struct drm_i915_private *i915 = to_i915(obj->base.dev);
1234 struct sg_table *st;
1235 struct scatterlist *sg;
1236 unsigned int gtt_offset = 0;
1237 int ret = -ENOMEM;
1238 int i;
1239
1240 /* Allocate target SG list. */
1241 st = kmalloc_obj(*st);
1242 if (!st)
1243 goto err_st_alloc;
1244
1245 ret = sg_alloc_table(st, size, GFP_KERNEL);
1246 if (ret)
1247 goto err_sg_alloc;
1248
1249 st->nents = 0;
1250 sg = st->sgl;
1251
1252 for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++)
1253 sg = remap_color_plane_pages(rem_info, obj, i, st, sg, >t_offset);
1254
1255 i915_sg_trim(st);
1256
1257 return st;
1258
1259err_sg_alloc:
1260 kfree(st);
1261err_st_alloc:
1262
1263 drm_dbg(&i915->drm, "Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n",
1264 obj->base.size, rem_info->plane[0].width,
1265 rem_info->plane[0].height, size);
1266
1267 return ERR_PTR(ret);
1268}
1269
1270static noinline struct sg_table *
1271intel_partial_pages(const struct i915_gtt_view *view,
1272 struct drm_i915_gem_object *obj)
1273{
1274 struct sg_table *st;
1275 struct scatterlist *sg;
1276 unsigned int count = view->partial.size;
1277 int ret = -ENOMEM;
1278
1279 st = kmalloc_obj(*st);
1280 if (!st)
1281 goto err_st_alloc;
1282
1283 ret = sg_alloc_table(st, count, GFP_KERNEL);
1284 if (ret)
1285 goto err_sg_alloc;
1286
1287 st->nents = 0;
1288
1289 sg = remap_contiguous_pages(obj, view->partial.offset, count, st, st->sgl);
1290
1291 sg_mark_end(sg);
1292 i915_sg_trim(st); /* Drop any unused tail entries. */
1293
1294 return st;
1295
1296err_sg_alloc:
1297 kfree(st);
1298err_st_alloc:
1299 return ERR_PTR(ret);
1300}
1301
1302static int
1303__i915_vma_get_pages(struct i915_vma *vma)
1304{
1305 struct sg_table *pages;
1306
1307 /*
1308 * The vma->pages are only valid within the lifespan of the borrowed
1309 * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so
1310 * must be the vma->pages. A simple rule is that vma->pages must only
1311 * be accessed when the obj->mm.pages are pinned.
1312 */
1313 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj));
1314
1315 switch (vma->gtt_view.type) {
1316 default:
1317 GEM_BUG_ON(vma->gtt_view.type);
1318 fallthrough;
1319 case I915_GTT_VIEW_NORMAL:
1320 pages = vma->obj->mm.pages;
1321 break;
1322
1323 case I915_GTT_VIEW_ROTATED:
1324 pages =
1325 intel_rotate_pages(&vma->gtt_view.rotated, vma->obj);
1326 break;
1327
1328 case I915_GTT_VIEW_REMAPPED:
1329 pages =
1330 intel_remap_pages(&vma->gtt_view.remapped, vma->obj);
1331 break;
1332
1333 case I915_GTT_VIEW_PARTIAL:
1334 pages = intel_partial_pages(&vma->gtt_view, vma->obj);
1335 break;
1336 }
1337
1338 if (IS_ERR(pages)) {
1339 drm_err(&vma->vm->i915->drm,
1340 "Failed to get pages for VMA view type %u (%ld)!\n",
1341 vma->gtt_view.type, PTR_ERR(pages));
1342 return PTR_ERR(pages);
1343 }
1344
1345 vma->pages = pages;
1346
1347 return 0;
1348}
1349
1350I915_SELFTEST_EXPORT int i915_vma_get_pages(struct i915_vma *vma)
1351{
1352 int err;
1353
1354 if (atomic_add_unless(&vma->pages_count, 1, 0))
1355 return 0;
1356
1357 err = i915_gem_object_pin_pages(vma->obj);
1358 if (err)
1359 return err;
1360
1361 err = __i915_vma_get_pages(vma);
1362 if (err)
1363 goto err_unpin;
1364
1365 vma->page_sizes = vma->obj->mm.page_sizes;
1366 atomic_inc(&vma->pages_count);
1367
1368 return 0;
1369
1370err_unpin:
1371 __i915_gem_object_unpin_pages(vma->obj);
1372
1373 return err;
1374}
1375
1376void vma_invalidate_tlb(struct i915_address_space *vm, u32 *tlb)
1377{
1378 struct intel_gt *gt;
1379 int id;
1380
1381 if (!tlb)
1382 return;
1383
1384 /*
1385 * Before we release the pages that were bound by this vma, we
1386 * must invalidate all the TLBs that may still have a reference
1387 * back to our physical address. It only needs to be done once,
1388 * so after updating the PTE to point away from the pages, record
1389 * the most recent TLB invalidation seqno, and if we have not yet
1390 * flushed the TLBs upon release, perform a full invalidation.
1391 */
1392 for_each_gt(gt, vm->i915, id)
1393 WRITE_ONCE(tlb[id],
1394 intel_gt_next_invalidate_tlb_full(gt));
1395}
1396
1397static void __vma_put_pages(struct i915_vma *vma, unsigned int count)
1398{
1399 /* We allocate under vma_get_pages, so beware the shrinker */
1400 GEM_BUG_ON(atomic_read(&vma->pages_count) < count);
1401
1402 if (atomic_sub_return(count, &vma->pages_count) == 0) {
1403 if (vma->pages != vma->obj->mm.pages) {
1404 sg_free_table(vma->pages);
1405 kfree(vma->pages);
1406 }
1407 vma->pages = NULL;
1408
1409 i915_gem_object_unpin_pages(vma->obj);
1410 }
1411}
1412
1413I915_SELFTEST_EXPORT void i915_vma_put_pages(struct i915_vma *vma)
1414{
1415 if (atomic_add_unless(&vma->pages_count, -1, 1))
1416 return;
1417
1418 __vma_put_pages(vma, 1);
1419}
1420
1421static void vma_unbind_pages(struct i915_vma *vma)
1422{
1423 unsigned int count;
1424
1425 lockdep_assert_held(&vma->vm->mutex);
1426
1427 /* The upper portion of pages_count is the number of bindings */
1428 count = atomic_read(&vma->pages_count);
1429 count >>= I915_VMA_PAGES_BIAS;
1430 GEM_BUG_ON(!count);
1431
1432 __vma_put_pages(vma, count | count << I915_VMA_PAGES_BIAS);
1433}
1434
1435int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
1436 u64 size, u64 alignment, u64 flags)
1437{
1438 struct i915_vma_work *work = NULL;
1439 struct dma_fence *moving = NULL;
1440 struct i915_vma_resource *vma_res = NULL;
1441 intel_wakeref_t wakeref;
1442 unsigned int bound;
1443 int err;
1444
1445 assert_vma_held(vma);
1446 GEM_BUG_ON(!ww);
1447
1448 BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND);
1449 BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND);
1450
1451 GEM_BUG_ON(!(flags & (PIN_USER | PIN_GLOBAL)));
1452
1453 /* First try and grab the pin without rebinding the vma */
1454 if (try_qad_pin(vma, flags))
1455 return 0;
1456
1457 err = i915_vma_get_pages(vma);
1458 if (err)
1459 return err;
1460
1461 /*
1462 * In case of a global GTT, we must hold a runtime-pm wakeref
1463 * while global PTEs are updated. In other cases, we hold
1464 * the rpm reference while the VMA is active. Since runtime
1465 * resume may require allocations, which are forbidden inside
1466 * vm->mutex, get the first rpm wakeref outside of the mutex.
1467 */
1468 wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
1469
1470 if (flags & vma->vm->bind_async_flags) {
1471 /* lock VM */
1472 err = i915_vm_lock_objects(vma->vm, ww);
1473 if (err)
1474 goto err_rpm;
1475
1476 work = i915_vma_work();
1477 if (!work) {
1478 err = -ENOMEM;
1479 goto err_rpm;
1480 }
1481
1482 work->vm = vma->vm;
1483
1484 err = i915_gem_object_get_moving_fence(vma->obj, &moving);
1485 if (err)
1486 goto err_rpm;
1487
1488 dma_fence_work_chain(&work->base, moving);
1489
1490 /* Allocate enough page directories to used PTE */
1491 if (vma->vm->allocate_va_range) {
1492 err = i915_vm_alloc_pt_stash(vma->vm,
1493 &work->stash,
1494 vma->size);
1495 if (err)
1496 goto err_fence;
1497
1498 err = i915_vm_map_pt_stash(vma->vm, &work->stash);
1499 if (err)
1500 goto err_fence;
1501 }
1502 }
1503
1504 vma_res = i915_vma_resource_alloc();
1505 if (IS_ERR(vma_res)) {
1506 err = PTR_ERR(vma_res);
1507 goto err_fence;
1508 }
1509
1510 /*
1511 * Differentiate between user/kernel vma inside the aliasing-ppgtt.
1512 *
1513 * We conflate the Global GTT with the user's vma when using the
1514 * aliasing-ppgtt, but it is still vitally important to try and
1515 * keep the use cases distinct. For example, userptr objects are
1516 * not allowed inside the Global GTT as that will cause lock
1517 * inversions when we have to evict them the mmu_notifier callbacks -
1518 * but they are allowed to be part of the user ppGTT which can never
1519 * be mapped. As such we try to give the distinct users of the same
1520 * mutex, distinct lockclasses [equivalent to how we keep i915_ggtt
1521 * and i915_ppgtt separate].
1522 *
1523 * NB this may cause us to mask real lock inversions -- while the
1524 * code is safe today, lockdep may not be able to spot future
1525 * transgressions.
1526 */
1527 err = mutex_lock_interruptible_nested(&vma->vm->mutex,
1528 !(flags & PIN_GLOBAL));
1529 if (err)
1530 goto err_vma_res;
1531
1532 /* No more allocations allowed now we hold vm->mutex */
1533
1534 if (unlikely(i915_vma_is_closed(vma))) {
1535 err = -ENOENT;
1536 goto err_unlock;
1537 }
1538
1539 bound = atomic_read(&vma->flags);
1540 if (unlikely(bound & I915_VMA_ERROR)) {
1541 err = -ENOMEM;
1542 goto err_unlock;
1543 }
1544
1545 if (unlikely(!((bound + 1) & I915_VMA_PIN_MASK))) {
1546 err = -EAGAIN; /* pins are meant to be fairly temporary */
1547 goto err_unlock;
1548 }
1549
1550 if (unlikely(!(flags & ~bound & I915_VMA_BIND_MASK))) {
1551 if (!(flags & PIN_VALIDATE))
1552 __i915_vma_pin(vma);
1553 goto err_unlock;
1554 }
1555
1556 err = i915_active_acquire(&vma->active);
1557 if (err)
1558 goto err_unlock;
1559
1560 if (!(bound & I915_VMA_BIND_MASK)) {
1561 err = i915_vma_insert(vma, ww, size, alignment, flags);
1562 if (err)
1563 goto err_active;
1564
1565 if (i915_is_ggtt(vma->vm))
1566 __i915_vma_set_map_and_fenceable(vma);
1567 }
1568
1569 GEM_BUG_ON(!vma->pages);
1570 err = i915_vma_bind(vma,
1571 vma->obj->pat_index,
1572 flags, work, vma_res);
1573 vma_res = NULL;
1574 if (err)
1575 goto err_remove;
1576
1577 /* There should only be at most 2 active bindings (user, global) */
1578 GEM_BUG_ON(bound + I915_VMA_PAGES_ACTIVE < bound);
1579 atomic_add(I915_VMA_PAGES_ACTIVE, &vma->pages_count);
1580 list_move_tail(&vma->vm_link, &vma->vm->bound_list);
1581
1582 if (!(flags & PIN_VALIDATE)) {
1583 __i915_vma_pin(vma);
1584 GEM_BUG_ON(!i915_vma_is_pinned(vma));
1585 }
1586 GEM_BUG_ON(!i915_vma_is_bound(vma, flags));
1587 GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
1588
1589err_remove:
1590 if (!i915_vma_is_bound(vma, I915_VMA_BIND_MASK)) {
1591 i915_vma_detach(vma);
1592 drm_mm_remove_node(&vma->node);
1593 }
1594err_active:
1595 i915_active_release(&vma->active);
1596err_unlock:
1597 mutex_unlock(&vma->vm->mutex);
1598err_vma_res:
1599 i915_vma_resource_free(vma_res);
1600err_fence:
1601 if (work) {
1602 /*
1603 * When pinning VMA to GGTT on CHV or BXT with VTD enabled,
1604 * commit VMA binding asynchronously to avoid risk of lock
1605 * inversion among reservation_ww locks held here and
1606 * cpu_hotplug_lock acquired from stop_machine(), which we
1607 * wrap around GGTT updates when running in those environments.
1608 */
1609 if (i915_vma_is_ggtt(vma) &&
1610 intel_vm_no_concurrent_access_wa(vma->vm->i915))
1611 dma_fence_work_commit(&work->base);
1612 else
1613 dma_fence_work_commit_imm(&work->base);
1614 }
1615err_rpm:
1616 intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref);
1617
1618 if (moving)
1619 dma_fence_put(moving);
1620
1621 i915_vma_put_pages(vma);
1622 return err;
1623}
1624
1625int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
1626{
1627 struct i915_gem_ww_ctx ww;
1628 int err;
1629
1630 i915_gem_ww_ctx_init(&ww, true);
1631retry:
1632 err = i915_gem_object_lock(vma->obj, &ww);
1633 if (!err)
1634 err = i915_vma_pin_ww(vma, &ww, size, alignment, flags);
1635 if (err == -EDEADLK) {
1636 err = i915_gem_ww_ctx_backoff(&ww);
1637 if (!err)
1638 goto retry;
1639 }
1640 i915_gem_ww_ctx_fini(&ww);
1641
1642 return err;
1643}
1644
1645static void flush_idle_contexts(struct intel_gt *gt)
1646{
1647 struct intel_engine_cs *engine;
1648 enum intel_engine_id id;
1649
1650 for_each_engine(engine, gt, id)
1651 intel_engine_flush_barriers(engine);
1652
1653 intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT);
1654}
1655
1656static int __i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
1657 u32 align, unsigned int flags)
1658{
1659 struct i915_address_space *vm = vma->vm;
1660 struct intel_gt *gt;
1661 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
1662 int err;
1663
1664 do {
1665 err = i915_vma_pin_ww(vma, ww, 0, align, flags | PIN_GLOBAL);
1666
1667 if (err != -ENOSPC) {
1668 if (!err) {
1669 err = i915_vma_wait_for_bind(vma);
1670 if (err)
1671 i915_vma_unpin(vma);
1672 }
1673 return err;
1674 }
1675
1676 /* Unlike i915_vma_pin, we don't take no for an answer! */
1677 list_for_each_entry(gt, &ggtt->gt_list, ggtt_link)
1678 flush_idle_contexts(gt);
1679 if (mutex_lock_interruptible(&vm->mutex) == 0) {
1680 /*
1681 * We pass NULL ww here, as we don't want to unbind
1682 * locked objects when called from execbuf when pinning
1683 * is removed. This would probably regress badly.
1684 */
1685 i915_gem_evict_vm(vm, NULL, NULL);
1686 mutex_unlock(&vm->mutex);
1687 }
1688 } while (1);
1689}
1690
1691int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
1692 u32 align, unsigned int flags)
1693{
1694 struct i915_gem_ww_ctx _ww;
1695 int err;
1696
1697 GEM_BUG_ON(!i915_vma_is_ggtt(vma));
1698
1699 if (ww)
1700 return __i915_ggtt_pin(vma, ww, align, flags);
1701
1702 lockdep_assert_not_held(&vma->obj->base.resv->lock.base);
1703
1704 for_i915_gem_ww(&_ww, err, true) {
1705 err = i915_gem_object_lock(vma->obj, &_ww);
1706 if (!err)
1707 err = __i915_ggtt_pin(vma, &_ww, align, flags);
1708 }
1709
1710 return err;
1711}
1712
1713/**
1714 * i915_ggtt_clear_scanout - Clear scanout flag for all objects ggtt vmas
1715 * @obj: i915 GEM object
1716 * This function clears scanout flags for objects ggtt vmas. These flags are set
1717 * when object is pinned for display use and this function to clear them all is
1718 * targeted to be called by frontbuffer tracking code when the frontbuffer is
1719 * about to be released.
1720 */
1721void i915_ggtt_clear_scanout(struct drm_i915_gem_object *obj)
1722{
1723 struct i915_vma *vma;
1724
1725 spin_lock(&obj->vma.lock);
1726 for_each_ggtt_vma(vma, obj) {
1727 i915_vma_clear_scanout(vma);
1728 vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
1729 }
1730 spin_unlock(&obj->vma.lock);
1731}
1732
1733static void __vma_close(struct i915_vma *vma, struct intel_gt *gt)
1734{
1735 /*
1736 * We defer actually closing, unbinding and destroying the VMA until
1737 * the next idle point, or if the object is freed in the meantime. By
1738 * postponing the unbind, we allow for it to be resurrected by the
1739 * client, avoiding the work required to rebind the VMA. This is
1740 * advantageous for DRI, where the client/server pass objects
1741 * between themselves, temporarily opening a local VMA to the
1742 * object, and then closing it again. The same object is then reused
1743 * on the next frame (or two, depending on the depth of the swap queue)
1744 * causing us to rebind the VMA once more. This ends up being a lot
1745 * of wasted work for the steady state.
1746 */
1747 GEM_BUG_ON(i915_vma_is_closed(vma));
1748 list_add(&vma->closed_link, >->closed_vma);
1749}
1750
1751void i915_vma_close(struct i915_vma *vma)
1752{
1753 struct intel_gt *gt = vma->vm->gt;
1754 unsigned long flags;
1755
1756 if (i915_vma_is_ggtt(vma))
1757 return;
1758
1759 GEM_BUG_ON(!atomic_read(&vma->open_count));
1760 if (atomic_dec_and_lock_irqsave(&vma->open_count,
1761 >->closed_lock,
1762 flags)) {
1763 __vma_close(vma, gt);
1764 spin_unlock_irqrestore(>->closed_lock, flags);
1765 }
1766}
1767
1768static void __i915_vma_remove_closed(struct i915_vma *vma)
1769{
1770 list_del_init(&vma->closed_link);
1771}
1772
1773void i915_vma_reopen(struct i915_vma *vma)
1774{
1775 struct intel_gt *gt = vma->vm->gt;
1776
1777 spin_lock_irq(>->closed_lock);
1778 if (i915_vma_is_closed(vma))
1779 __i915_vma_remove_closed(vma);
1780 spin_unlock_irq(>->closed_lock);
1781}
1782
1783static void force_unbind(struct i915_vma *vma)
1784{
1785 if (!drm_mm_node_allocated(&vma->node))
1786 return;
1787
1788 atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
1789 WARN_ON(__i915_vma_unbind(vma));
1790 GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
1791}
1792
1793static void release_references(struct i915_vma *vma, struct intel_gt *gt,
1794 bool vm_ddestroy)
1795{
1796 struct drm_i915_gem_object *obj = vma->obj;
1797
1798 GEM_BUG_ON(i915_vma_is_active(vma));
1799
1800 spin_lock(&obj->vma.lock);
1801 list_del(&vma->obj_link);
1802 if (!RB_EMPTY_NODE(&vma->obj_node))
1803 rb_erase(&vma->obj_node, &obj->vma.tree);
1804
1805 spin_unlock(&obj->vma.lock);
1806
1807 spin_lock_irq(>->closed_lock);
1808 __i915_vma_remove_closed(vma);
1809 spin_unlock_irq(>->closed_lock);
1810
1811 if (vm_ddestroy)
1812 i915_vm_resv_put(vma->vm);
1813
1814 i915_active_fini(&vma->active);
1815 GEM_WARN_ON(vma->resource);
1816 i915_vma_free(vma);
1817}
1818
1819/*
1820 * i915_vma_destroy_locked - Remove all weak reference to the vma and put
1821 * the initial reference.
1822 *
1823 * This function should be called when it's decided the vma isn't needed
1824 * anymore. The caller must assure that it doesn't race with another lookup
1825 * plus destroy, typically by taking an appropriate reference.
1826 *
1827 * Current callsites are
1828 * - __i915_gem_object_pages_fini()
1829 * - __i915_vm_close() - Blocks the above function by taking a reference on
1830 * the object.
1831 * - __i915_vma_parked() - Blocks the above functions by taking a reference
1832 * on the vm and a reference on the object. Also takes the object lock so
1833 * destruction from __i915_vma_parked() can be blocked by holding the
1834 * object lock. Since the object lock is only allowed from within i915 with
1835 * an object refcount, holding the object lock also implicitly blocks the
1836 * vma freeing from __i915_gem_object_pages_fini().
1837 *
1838 * Because of locks taken during destruction, a vma is also guaranteed to
1839 * stay alive while the following locks are held if it was looked up while
1840 * holding one of the locks:
1841 * - vm->mutex
1842 * - obj->vma.lock
1843 * - gt->closed_lock
1844 */
1845void i915_vma_destroy_locked(struct i915_vma *vma)
1846{
1847 lockdep_assert_held(&vma->vm->mutex);
1848
1849 force_unbind(vma);
1850 list_del_init(&vma->vm_link);
1851 release_references(vma, vma->vm->gt, false);
1852}
1853
1854void i915_vma_destroy(struct i915_vma *vma)
1855{
1856 struct intel_gt *gt;
1857 bool vm_ddestroy;
1858
1859 mutex_lock(&vma->vm->mutex);
1860 force_unbind(vma);
1861 list_del_init(&vma->vm_link);
1862 vm_ddestroy = vma->vm_ddestroy;
1863 vma->vm_ddestroy = false;
1864
1865 /* vma->vm may be freed when releasing vma->vm->mutex. */
1866 gt = vma->vm->gt;
1867 mutex_unlock(&vma->vm->mutex);
1868 release_references(vma, gt, vm_ddestroy);
1869}
1870
1871void i915_vma_parked(struct intel_gt *gt)
1872{
1873 struct i915_vma *vma, *next;
1874 LIST_HEAD(closed);
1875
1876 spin_lock_irq(>->closed_lock);
1877 list_for_each_entry_safe(vma, next, >->closed_vma, closed_link) {
1878 struct drm_i915_gem_object *obj = vma->obj;
1879 struct i915_address_space *vm = vma->vm;
1880
1881 /* XXX All to avoid keeping a reference on i915_vma itself */
1882
1883 if (!kref_get_unless_zero(&obj->base.refcount))
1884 continue;
1885
1886 if (!i915_vm_tryget(vm)) {
1887 i915_gem_object_put(obj);
1888 continue;
1889 }
1890
1891 list_move(&vma->closed_link, &closed);
1892 }
1893 spin_unlock_irq(>->closed_lock);
1894
1895 /* As the GT is held idle, no vma can be reopened as we destroy them */
1896 list_for_each_entry_safe(vma, next, &closed, closed_link) {
1897 struct drm_i915_gem_object *obj = vma->obj;
1898 struct i915_address_space *vm = vma->vm;
1899
1900 if (i915_gem_object_trylock(obj, NULL)) {
1901 INIT_LIST_HEAD(&vma->closed_link);
1902 i915_vma_destroy(vma);
1903 i915_gem_object_unlock(obj);
1904 } else {
1905 /* back you go.. */
1906 spin_lock_irq(>->closed_lock);
1907 list_add(&vma->closed_link, >->closed_vma);
1908 spin_unlock_irq(>->closed_lock);
1909 }
1910
1911 i915_gem_object_put(obj);
1912 i915_vm_put(vm);
1913 }
1914}
1915
1916static void __i915_vma_iounmap(struct i915_vma *vma)
1917{
1918 GEM_BUG_ON(i915_vma_is_pinned(vma));
1919
1920 if (vma->iomap == NULL)
1921 return;
1922
1923 if (page_unmask_bits(vma->iomap))
1924 __i915_gem_object_release_map(vma->obj);
1925 else
1926 io_mapping_unmap(vma->iomap);
1927 vma->iomap = NULL;
1928}
1929
1930void i915_vma_revoke_mmap(struct i915_vma *vma)
1931{
1932 struct drm_vma_offset_node *node;
1933 u64 vma_offset;
1934
1935 if (!i915_vma_has_userfault(vma))
1936 return;
1937
1938 GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma));
1939 GEM_BUG_ON(!vma->obj->userfault_count);
1940
1941 node = &vma->mmo->vma_node;
1942 vma_offset = vma->gtt_view.partial.offset << PAGE_SHIFT;
1943 unmap_mapping_range(vma->vm->i915->drm.anon_inode->i_mapping,
1944 drm_vma_node_offset_addr(node) + vma_offset,
1945 vma->size,
1946 1);
1947
1948 i915_vma_unset_userfault(vma);
1949 if (!--vma->obj->userfault_count)
1950 list_del(&vma->obj->userfault_link);
1951}
1952
1953static int
1954__i915_request_await_bind(struct i915_request *rq, struct i915_vma *vma)
1955{
1956 return __i915_request_await_exclusive(rq, &vma->active);
1957}
1958
1959static int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq)
1960{
1961 int err;
1962
1963 /* Wait for the vma to be bound before we start! */
1964 err = __i915_request_await_bind(rq, vma);
1965 if (err)
1966 return err;
1967
1968 return i915_active_add_request(&vma->active, rq);
1969}
1970
1971int _i915_vma_move_to_active(struct i915_vma *vma,
1972 struct i915_request *rq,
1973 struct dma_fence *fence,
1974 unsigned int flags)
1975{
1976 struct drm_i915_gem_object *obj = vma->obj;
1977 int err;
1978
1979 assert_object_held(obj);
1980
1981 GEM_BUG_ON(!vma->pages);
1982
1983 if (!(flags & __EXEC_OBJECT_NO_REQUEST_AWAIT)) {
1984 err = i915_request_await_object(rq, vma->obj, flags & EXEC_OBJECT_WRITE);
1985 if (unlikely(err))
1986 return err;
1987 }
1988 err = __i915_vma_move_to_active(vma, rq);
1989 if (unlikely(err))
1990 return err;
1991
1992 /*
1993 * Reserve fences slot early to prevent an allocation after preparing
1994 * the workload and associating fences with dma_resv.
1995 */
1996 if (fence && !(flags & __EXEC_OBJECT_NO_RESERVE)) {
1997 struct dma_fence *curr;
1998 int idx;
1999
2000 dma_fence_array_for_each(curr, idx, fence)
2001 ;
2002 err = dma_resv_reserve_fences(vma->obj->base.resv, idx);
2003 if (unlikely(err))
2004 return err;
2005 }
2006
2007 if (flags & EXEC_OBJECT_WRITE) {
2008 struct i915_frontbuffer *front;
2009
2010 front = i915_gem_object_frontbuffer_lookup(obj);
2011 if (unlikely(front)) {
2012 if (intel_frontbuffer_invalidate(&front->base, ORIGIN_CS))
2013 i915_active_add_request(&front->write, rq);
2014 i915_gem_object_frontbuffer_put(front);
2015 }
2016 }
2017
2018 if (fence) {
2019 struct dma_fence *curr;
2020 enum dma_resv_usage usage;
2021 int idx;
2022
2023 if (flags & EXEC_OBJECT_WRITE) {
2024 usage = DMA_RESV_USAGE_WRITE;
2025 obj->write_domain = I915_GEM_DOMAIN_RENDER;
2026 obj->read_domains = 0;
2027 } else {
2028 usage = DMA_RESV_USAGE_READ;
2029 obj->write_domain = 0;
2030 }
2031
2032 dma_fence_array_for_each(curr, idx, fence)
2033 dma_resv_add_fence(vma->obj->base.resv, curr, usage);
2034 }
2035
2036 if (flags & EXEC_OBJECT_NEEDS_FENCE && vma->fence)
2037 i915_active_add_request(&vma->fence->active, rq);
2038
2039 obj->read_domains |= I915_GEM_GPU_DOMAINS;
2040 obj->mm.dirty = true;
2041
2042 GEM_BUG_ON(!i915_vma_is_active(vma));
2043 return 0;
2044}
2045
2046struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async)
2047{
2048 struct i915_vma_resource *vma_res = vma->resource;
2049 struct dma_fence *unbind_fence;
2050
2051 GEM_BUG_ON(i915_vma_is_pinned(vma));
2052 assert_vma_held_evict(vma);
2053
2054 if (i915_vma_is_map_and_fenceable(vma)) {
2055 /* Force a pagefault for domain tracking on next user access */
2056 i915_vma_revoke_mmap(vma);
2057
2058 /*
2059 * Check that we have flushed all writes through the GGTT
2060 * before the unbind, other due to non-strict nature of those
2061 * indirect writes they may end up referencing the GGTT PTE
2062 * after the unbind.
2063 *
2064 * Note that we may be concurrently poking at the GGTT_WRITE
2065 * bit from set-domain, as we mark all GGTT vma associated
2066 * with an object. We know this is for another vma, as we
2067 * are currently unbinding this one -- so if this vma will be
2068 * reused, it will be refaulted and have its dirty bit set
2069 * before the next write.
2070 */
2071 i915_vma_flush_writes(vma);
2072
2073 /* release the fence reg _after_ flushing */
2074 i915_vma_revoke_fence(vma);
2075
2076 clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma));
2077 }
2078
2079 __i915_vma_iounmap(vma);
2080
2081 GEM_BUG_ON(vma->fence);
2082 GEM_BUG_ON(i915_vma_has_userfault(vma));
2083
2084 /* Object backend must be async capable. */
2085 GEM_WARN_ON(async && !vma->resource->bi.pages_rsgt);
2086
2087 /* If vm is not open, unbind is a nop. */
2088 vma_res->needs_wakeref = i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND) &&
2089 kref_read(&vma->vm->ref);
2090 vma_res->skip_pte_rewrite = !kref_read(&vma->vm->ref) ||
2091 vma->vm->skip_pte_rewrite;
2092 trace_i915_vma_unbind(vma);
2093
2094 if (async)
2095 unbind_fence = i915_vma_resource_unbind(vma_res,
2096 vma->obj->mm.tlb);
2097 else
2098 unbind_fence = i915_vma_resource_unbind(vma_res, NULL);
2099
2100 vma->resource = NULL;
2101
2102 atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR | I915_VMA_GGTT_WRITE),
2103 &vma->flags);
2104
2105 i915_vma_detach(vma);
2106
2107 if (!async) {
2108 if (unbind_fence) {
2109 dma_fence_wait(unbind_fence, false);
2110 dma_fence_put(unbind_fence);
2111 unbind_fence = NULL;
2112 }
2113 vma_invalidate_tlb(vma->vm, vma->obj->mm.tlb);
2114 }
2115
2116 /*
2117 * Binding itself may not have completed until the unbind fence signals,
2118 * so don't drop the pages until that happens, unless the resource is
2119 * async_capable.
2120 */
2121
2122 vma_unbind_pages(vma);
2123 return unbind_fence;
2124}
2125
2126int __i915_vma_unbind(struct i915_vma *vma)
2127{
2128 int ret;
2129
2130 lockdep_assert_held(&vma->vm->mutex);
2131 assert_vma_held_evict(vma);
2132
2133 if (!drm_mm_node_allocated(&vma->node))
2134 return 0;
2135
2136 if (i915_vma_is_pinned(vma)) {
2137 vma_print_allocator(vma, "is pinned");
2138 return -EAGAIN;
2139 }
2140
2141 /*
2142 * After confirming that no one else is pinning this vma, wait for
2143 * any laggards who may have crept in during the wait (through
2144 * a residual pin skipping the vm->mutex) to complete.
2145 */
2146 ret = i915_vma_sync(vma);
2147 if (ret)
2148 return ret;
2149
2150 GEM_BUG_ON(i915_vma_is_active(vma));
2151 __i915_vma_evict(vma, false);
2152
2153 drm_mm_remove_node(&vma->node); /* pairs with i915_vma_release() */
2154 return 0;
2155}
2156
2157static struct dma_fence *__i915_vma_unbind_async(struct i915_vma *vma)
2158{
2159 struct dma_fence *fence;
2160
2161 lockdep_assert_held(&vma->vm->mutex);
2162
2163 if (!drm_mm_node_allocated(&vma->node))
2164 return NULL;
2165
2166 if (i915_vma_is_pinned(vma) ||
2167 &vma->obj->mm.rsgt->table != vma->resource->bi.pages)
2168 return ERR_PTR(-EAGAIN);
2169
2170 /*
2171 * We probably need to replace this with awaiting the fences of the
2172 * object's dma_resv when the vma active goes away. When doing that
2173 * we need to be careful to not add the vma_resource unbind fence
2174 * immediately to the object's dma_resv, because then unbinding
2175 * the next vma from the object, in case there are many, will
2176 * actually await the unbinding of the previous vmas, which is
2177 * undesirable.
2178 */
2179 if (i915_sw_fence_await_active(&vma->resource->chain, &vma->active,
2180 I915_ACTIVE_AWAIT_EXCL |
2181 I915_ACTIVE_AWAIT_ACTIVE) < 0) {
2182 return ERR_PTR(-EBUSY);
2183 }
2184
2185 fence = __i915_vma_evict(vma, true);
2186
2187 drm_mm_remove_node(&vma->node); /* pairs with i915_vma_release() */
2188
2189 return fence;
2190}
2191
2192int i915_vma_unbind(struct i915_vma *vma)
2193{
2194 struct i915_address_space *vm = vma->vm;
2195 intel_wakeref_t wakeref = NULL;
2196 int err;
2197
2198 assert_object_held_shared(vma->obj);
2199
2200 /* Optimistic wait before taking the mutex */
2201 err = i915_vma_sync(vma);
2202 if (err)
2203 return err;
2204
2205 if (!drm_mm_node_allocated(&vma->node))
2206 return 0;
2207
2208 if (i915_vma_is_pinned(vma)) {
2209 vma_print_allocator(vma, "is pinned");
2210 return -EAGAIN;
2211 }
2212
2213 if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
2214 /* XXX not always required: nop_clear_range */
2215 wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm);
2216
2217 err = mutex_lock_interruptible_nested(&vma->vm->mutex, !wakeref);
2218 if (err)
2219 goto out_rpm;
2220
2221 err = __i915_vma_unbind(vma);
2222 mutex_unlock(&vm->mutex);
2223
2224out_rpm:
2225 if (wakeref)
2226 intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref);
2227 return err;
2228}
2229
2230int i915_vma_unbind_async(struct i915_vma *vma, bool trylock_vm)
2231{
2232 struct drm_i915_gem_object *obj = vma->obj;
2233 struct i915_address_space *vm = vma->vm;
2234 intel_wakeref_t wakeref = NULL;
2235 struct dma_fence *fence;
2236 int err;
2237
2238 /*
2239 * We need the dma-resv lock since we add the
2240 * unbind fence to the dma-resv object.
2241 */
2242 assert_object_held(obj);
2243
2244 if (!drm_mm_node_allocated(&vma->node))
2245 return 0;
2246
2247 if (i915_vma_is_pinned(vma)) {
2248 vma_print_allocator(vma, "is pinned");
2249 return -EAGAIN;
2250 }
2251
2252 if (!obj->mm.rsgt)
2253 return -EBUSY;
2254
2255 err = dma_resv_reserve_fences(obj->base.resv, 2);
2256 if (err)
2257 return -EBUSY;
2258
2259 /*
2260 * It would be great if we could grab this wakeref from the
2261 * async unbind work if needed, but we can't because it uses
2262 * kmalloc and it's in the dma-fence signalling critical path.
2263 */
2264 if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
2265 wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm);
2266
2267 if (trylock_vm && !mutex_trylock(&vm->mutex)) {
2268 err = -EBUSY;
2269 goto out_rpm;
2270 } else if (!trylock_vm) {
2271 err = mutex_lock_interruptible_nested(&vm->mutex, !wakeref);
2272 if (err)
2273 goto out_rpm;
2274 }
2275
2276 fence = __i915_vma_unbind_async(vma);
2277 mutex_unlock(&vm->mutex);
2278 if (IS_ERR_OR_NULL(fence)) {
2279 err = PTR_ERR_OR_ZERO(fence);
2280 goto out_rpm;
2281 }
2282
2283 dma_resv_add_fence(obj->base.resv, fence, DMA_RESV_USAGE_READ);
2284 dma_fence_put(fence);
2285
2286out_rpm:
2287 if (wakeref)
2288 intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref);
2289 return err;
2290}
2291
2292int i915_vma_unbind_unlocked(struct i915_vma *vma)
2293{
2294 int err;
2295
2296 i915_gem_object_lock(vma->obj, NULL);
2297 err = i915_vma_unbind(vma);
2298 i915_gem_object_unlock(vma->obj);
2299
2300 return err;
2301}
2302
2303struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma)
2304{
2305 i915_gem_object_make_unshrinkable(vma->obj);
2306 return vma;
2307}
2308
2309void i915_vma_make_shrinkable(struct i915_vma *vma)
2310{
2311 i915_gem_object_make_shrinkable(vma->obj);
2312}
2313
2314void i915_vma_make_purgeable(struct i915_vma *vma)
2315{
2316 i915_gem_object_make_purgeable(vma->obj);
2317}
2318
2319#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
2320#include "selftests/i915_vma.c"
2321#endif
2322
2323void i915_vma_module_exit(void)
2324{
2325 kmem_cache_destroy(slab_vmas);
2326}
2327
2328int __init i915_vma_module_init(void)
2329{
2330 slab_vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN);
2331 if (!slab_vmas)
2332 return -ENOMEM;
2333
2334 return 0;
2335}
2336
2337static int i915_vma_fence_id(const struct i915_vma *vma)
2338{
2339 return vma->fence ? vma->fence->id : -1;
2340}
2341
2342const struct intel_display_vma_interface i915_display_vma_interface = {
2343 .fence_id = i915_vma_fence_id,
2344};