Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2025 Intel Corporation
4 */
5
6#include "xe_vm_madvise.h"
7
8#include <linux/nospec.h>
9#include <drm/xe_drm.h>
10
11#include "xe_bo.h"
12#include "xe_pat.h"
13#include "xe_pt.h"
14#include "xe_svm.h"
15
16struct xe_vmas_in_madvise_range {
17 u64 addr;
18 u64 range;
19 struct xe_vma **vmas;
20 int num_vmas;
21 bool has_bo_vmas;
22 bool has_svm_userptr_vmas;
23};
24
25static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range *madvise_range)
26{
27 u64 addr = madvise_range->addr;
28 u64 range = madvise_range->range;
29
30 struct xe_vma **__vmas;
31 struct drm_gpuva *gpuva;
32 int max_vmas = 8;
33
34 lockdep_assert_held(&vm->lock);
35
36 madvise_range->num_vmas = 0;
37 madvise_range->vmas = kmalloc_array(max_vmas, sizeof(*madvise_range->vmas), GFP_KERNEL);
38 if (!madvise_range->vmas)
39 return -ENOMEM;
40
41 vm_dbg(&vm->xe->drm, "VMA's in range: start=0x%016llx, end=0x%016llx", addr, addr + range);
42
43 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, addr, addr + range) {
44 struct xe_vma *vma = gpuva_to_vma(gpuva);
45
46 if (xe_vma_bo(vma))
47 madvise_range->has_bo_vmas = true;
48 else if (xe_vma_is_cpu_addr_mirror(vma) || xe_vma_is_userptr(vma))
49 madvise_range->has_svm_userptr_vmas = true;
50
51 if (madvise_range->num_vmas == max_vmas) {
52 max_vmas <<= 1;
53 __vmas = krealloc(madvise_range->vmas,
54 max_vmas * sizeof(*madvise_range->vmas),
55 GFP_KERNEL);
56 if (!__vmas) {
57 kfree(madvise_range->vmas);
58 return -ENOMEM;
59 }
60 madvise_range->vmas = __vmas;
61 }
62
63 madvise_range->vmas[madvise_range->num_vmas] = vma;
64 (madvise_range->num_vmas)++;
65 }
66
67 if (!madvise_range->num_vmas)
68 kfree(madvise_range->vmas);
69
70 vm_dbg(&vm->xe->drm, "madvise_range-num_vmas = %d\n", madvise_range->num_vmas);
71
72 return 0;
73}
74
75static void madvise_preferred_mem_loc(struct xe_device *xe, struct xe_vm *vm,
76 struct xe_vma **vmas, int num_vmas,
77 struct drm_xe_madvise *op)
78{
79 int i;
80
81 xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC);
82
83 for (i = 0; i < num_vmas; i++) {
84 /*TODO: Extend attributes to bo based vmas */
85 if ((vmas[i]->attr.preferred_loc.devmem_fd == op->preferred_mem_loc.devmem_fd &&
86 vmas[i]->attr.preferred_loc.migration_policy ==
87 op->preferred_mem_loc.migration_policy) ||
88 !xe_vma_is_cpu_addr_mirror(vmas[i])) {
89 vmas[i]->skip_invalidation = true;
90 } else {
91 vmas[i]->skip_invalidation = false;
92 vmas[i]->attr.preferred_loc.devmem_fd = op->preferred_mem_loc.devmem_fd;
93 /* Till multi-device support is not added migration_policy
94 * is of no use and can be ignored.
95 */
96 vmas[i]->attr.preferred_loc.migration_policy =
97 op->preferred_mem_loc.migration_policy;
98 }
99 }
100}
101
102static void madvise_atomic(struct xe_device *xe, struct xe_vm *vm,
103 struct xe_vma **vmas, int num_vmas,
104 struct drm_xe_madvise *op)
105{
106 struct xe_bo *bo;
107 int i;
108
109 xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC);
110 xe_assert(vm->xe, op->atomic.val <= DRM_XE_ATOMIC_CPU);
111
112 for (i = 0; i < num_vmas; i++) {
113 if (xe_vma_is_userptr(vmas[i]) &&
114 !(op->atomic.val == DRM_XE_ATOMIC_DEVICE &&
115 xe->info.has_device_atomics_on_smem)) {
116 vmas[i]->skip_invalidation = true;
117 continue;
118 }
119
120 if (vmas[i]->attr.atomic_access == op->atomic.val) {
121 vmas[i]->skip_invalidation = true;
122 } else {
123 vmas[i]->skip_invalidation = false;
124 vmas[i]->attr.atomic_access = op->atomic.val;
125 }
126
127 bo = xe_vma_bo(vmas[i]);
128 if (!bo || bo->attr.atomic_access == op->atomic.val)
129 continue;
130
131 vmas[i]->skip_invalidation = false;
132 xe_bo_assert_held(bo);
133 bo->attr.atomic_access = op->atomic.val;
134
135 /* Invalidate cpu page table, so bo can migrate to smem in next access */
136 if (xe_bo_is_vram(bo) &&
137 (bo->attr.atomic_access == DRM_XE_ATOMIC_CPU ||
138 bo->attr.atomic_access == DRM_XE_ATOMIC_GLOBAL))
139 ttm_bo_unmap_virtual(&bo->ttm);
140 }
141}
142
143static void madvise_pat_index(struct xe_device *xe, struct xe_vm *vm,
144 struct xe_vma **vmas, int num_vmas,
145 struct drm_xe_madvise *op)
146{
147 int i;
148
149 xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_PAT);
150
151 for (i = 0; i < num_vmas; i++) {
152 if (vmas[i]->attr.pat_index == op->pat_index.val) {
153 vmas[i]->skip_invalidation = true;
154 } else {
155 vmas[i]->skip_invalidation = false;
156 vmas[i]->attr.pat_index = op->pat_index.val;
157 }
158 }
159}
160
161typedef void (*madvise_func)(struct xe_device *xe, struct xe_vm *vm,
162 struct xe_vma **vmas, int num_vmas,
163 struct drm_xe_madvise *op);
164
165static const madvise_func madvise_funcs[] = {
166 [DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC] = madvise_preferred_mem_loc,
167 [DRM_XE_MEM_RANGE_ATTR_ATOMIC] = madvise_atomic,
168 [DRM_XE_MEM_RANGE_ATTR_PAT] = madvise_pat_index,
169};
170
171static u8 xe_zap_ptes_in_madvise_range(struct xe_vm *vm, u64 start, u64 end)
172{
173 struct drm_gpuva *gpuva;
174 struct xe_tile *tile;
175 u8 id, tile_mask = 0;
176
177 lockdep_assert_held_write(&vm->lock);
178
179 /* Wait for pending binds */
180 if (dma_resv_wait_timeout(xe_vm_resv(vm), DMA_RESV_USAGE_BOOKKEEP,
181 false, MAX_SCHEDULE_TIMEOUT) <= 0)
182 XE_WARN_ON(1);
183
184 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) {
185 struct xe_vma *vma = gpuva_to_vma(gpuva);
186
187 if (vma->skip_invalidation || xe_vma_is_null(vma))
188 continue;
189
190 if (xe_vma_is_cpu_addr_mirror(vma)) {
191 tile_mask |= xe_svm_ranges_zap_ptes_in_range(vm,
192 xe_vma_start(vma),
193 xe_vma_end(vma));
194 } else {
195 for_each_tile(tile, vm->xe, id) {
196 if (xe_pt_zap_ptes(tile, vma)) {
197 tile_mask |= BIT(id);
198
199 /*
200 * WRITE_ONCE pairs with READ_ONCE
201 * in xe_vm_has_valid_gpu_mapping()
202 */
203 WRITE_ONCE(vma->tile_invalidated,
204 vma->tile_invalidated | BIT(id));
205 }
206 }
207 }
208 }
209
210 return tile_mask;
211}
212
213static int xe_vm_invalidate_madvise_range(struct xe_vm *vm, u64 start, u64 end)
214{
215 u8 tile_mask = xe_zap_ptes_in_madvise_range(vm, start, end);
216
217 if (!tile_mask)
218 return 0;
219
220 xe_device_wmb(vm->xe);
221
222 return xe_vm_range_tilemask_tlb_inval(vm, start, end, tile_mask);
223}
224
225static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madvise *args)
226{
227 if (XE_IOCTL_DBG(xe, !args))
228 return false;
229
230 if (XE_IOCTL_DBG(xe, !IS_ALIGNED(args->start, SZ_4K)))
231 return false;
232
233 if (XE_IOCTL_DBG(xe, !IS_ALIGNED(args->range, SZ_4K)))
234 return false;
235
236 if (XE_IOCTL_DBG(xe, args->range < SZ_4K))
237 return false;
238
239 switch (args->type) {
240 case DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC:
241 {
242 s32 fd = (s32)args->preferred_mem_loc.devmem_fd;
243
244 if (XE_IOCTL_DBG(xe, fd < DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM))
245 return false;
246
247 if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.migration_policy >
248 DRM_XE_MIGRATE_ONLY_SYSTEM_PAGES))
249 return false;
250
251 if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.pad))
252 return false;
253
254 if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.reserved))
255 return false;
256 break;
257 }
258 case DRM_XE_MEM_RANGE_ATTR_ATOMIC:
259 if (XE_IOCTL_DBG(xe, args->atomic.val > DRM_XE_ATOMIC_CPU))
260 return false;
261
262 if (XE_IOCTL_DBG(xe, args->atomic.pad))
263 return false;
264
265 if (XE_IOCTL_DBG(xe, args->atomic.reserved))
266 return false;
267
268 break;
269 case DRM_XE_MEM_RANGE_ATTR_PAT:
270 {
271 u16 coh_mode = xe_pat_index_get_coh_mode(xe, args->pat_index.val);
272
273 if (XE_IOCTL_DBG(xe, !coh_mode))
274 return false;
275
276 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY))
277 return false;
278
279 if (XE_IOCTL_DBG(xe, args->pat_index.pad))
280 return false;
281
282 if (XE_IOCTL_DBG(xe, args->pat_index.reserved))
283 return false;
284 break;
285 }
286 default:
287 if (XE_IOCTL_DBG(xe, 1))
288 return false;
289 }
290
291 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
292 return false;
293
294 return true;
295}
296
297static bool check_bo_args_are_sane(struct xe_vm *vm, struct xe_vma **vmas,
298 int num_vmas, u32 atomic_val)
299{
300 struct xe_device *xe = vm->xe;
301 struct xe_bo *bo;
302 int i;
303
304 for (i = 0; i < num_vmas; i++) {
305 bo = xe_vma_bo(vmas[i]);
306 if (!bo)
307 continue;
308 /*
309 * NOTE: The following atomic checks are platform-specific. For example,
310 * if a device supports CXL atomics, these may not be necessary or
311 * may behave differently.
312 */
313 if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_CPU &&
314 !(bo->flags & XE_BO_FLAG_SYSTEM)))
315 return false;
316
317 if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_DEVICE &&
318 !(bo->flags & XE_BO_FLAG_VRAM0) &&
319 !(bo->flags & XE_BO_FLAG_VRAM1) &&
320 !(bo->flags & XE_BO_FLAG_SYSTEM &&
321 xe->info.has_device_atomics_on_smem)))
322 return false;
323
324 if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_GLOBAL &&
325 (!(bo->flags & XE_BO_FLAG_SYSTEM) ||
326 (!(bo->flags & XE_BO_FLAG_VRAM0) &&
327 !(bo->flags & XE_BO_FLAG_VRAM1)))))
328 return false;
329 }
330 return true;
331}
332/**
333 * xe_vm_madvise_ioctl - Handle MADVise ioctl for a VM
334 * @dev: DRM device pointer
335 * @data: Pointer to ioctl data (drm_xe_madvise*)
336 * @file: DRM file pointer
337 *
338 * Handles the MADVISE ioctl to provide memory advice for vma's within
339 * input range.
340 *
341 * Return: 0 on success or a negative error code on failure.
342 */
343int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
344{
345 struct xe_device *xe = to_xe_device(dev);
346 struct xe_file *xef = to_xe_file(file);
347 struct drm_xe_madvise *args = data;
348 struct xe_vmas_in_madvise_range madvise_range = {.addr = args->start,
349 .range = args->range, };
350 struct xe_vm *vm;
351 struct drm_exec exec;
352 int err, attr_type;
353
354 vm = xe_vm_lookup(xef, args->vm_id);
355 if (XE_IOCTL_DBG(xe, !vm))
356 return -EINVAL;
357
358 if (!madvise_args_are_sane(vm->xe, args)) {
359 err = -EINVAL;
360 goto put_vm;
361 }
362
363 xe_svm_flush(vm);
364
365 err = down_write_killable(&vm->lock);
366 if (err)
367 goto put_vm;
368
369 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
370 err = -ENOENT;
371 goto unlock_vm;
372 }
373
374 err = xe_vm_alloc_madvise_vma(vm, args->start, args->range);
375 if (err)
376 goto unlock_vm;
377
378 err = get_vmas(vm, &madvise_range);
379 if (err || !madvise_range.num_vmas)
380 goto unlock_vm;
381
382 if (madvise_range.has_bo_vmas) {
383 if (args->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC) {
384 if (!check_bo_args_are_sane(vm, madvise_range.vmas,
385 madvise_range.num_vmas,
386 args->atomic.val)) {
387 err = -EINVAL;
388 goto unlock_vm;
389 }
390 }
391
392 drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES | DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
393 drm_exec_until_all_locked(&exec) {
394 for (int i = 0; i < madvise_range.num_vmas; i++) {
395 struct xe_bo *bo = xe_vma_bo(madvise_range.vmas[i]);
396
397 if (!bo)
398 continue;
399 err = drm_exec_lock_obj(&exec, &bo->ttm.base);
400 drm_exec_retry_on_contention(&exec);
401 if (err)
402 goto err_fini;
403 }
404 }
405 }
406
407 if (madvise_range.has_svm_userptr_vmas) {
408 err = xe_svm_notifier_lock_interruptible(vm);
409 if (err)
410 goto err_fini;
411 }
412
413 attr_type = array_index_nospec(args->type, ARRAY_SIZE(madvise_funcs));
414 madvise_funcs[attr_type](xe, vm, madvise_range.vmas, madvise_range.num_vmas, args);
415
416 err = xe_vm_invalidate_madvise_range(vm, args->start, args->start + args->range);
417
418 if (madvise_range.has_svm_userptr_vmas)
419 xe_svm_notifier_unlock(vm);
420
421err_fini:
422 if (madvise_range.has_bo_vmas)
423 drm_exec_fini(&exec);
424 kfree(madvise_range.vmas);
425 madvise_range.vmas = NULL;
426unlock_vm:
427 up_write(&vm->lock);
428put_vm:
429 xe_vm_put(vm);
430 return err;
431}