drm/xe: Reset VMA attributes to default in SVM garbage collector

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Restore default memory attributes for VMAs during garbage collection
if they were modified by madvise. Reuse existing VMA if fully overlapping;
otherwise, allocate a new mirror VMA.

v2 (Matthew Brost)
- Add helper for vma split
- Add retry to get updated vma

v3
- Rebase on gpuvm layer

Suggested-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20250821173104.3030148-19-himal.prasad.ghimiray@intel.com
Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>

Himal Prasad Ghimiray 7 months ago a2eb8aec 58dc430d

+192 -50

3 changed files

expand all

drivers

gpu

drm

xe_svm.c

xe_vm.c

xe_vm.h

+75 -5

drivers/gpu/drm/xe/xe_svm.c

··· 253 253 return 0; 254 254 } 255 255 256 + static int xe_svm_range_set_default_attr(struct xe_vm *vm, u64 range_start, u64 range_end) 257 + { 258 + struct xe_vma *vma; 259 + struct xe_vma_mem_attr default_attr = { 260 + .preferred_loc = { 261 + .devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE, 262 + .migration_policy = DRM_XE_MIGRATE_ALL_PAGES, 263 + }, 264 + .atomic_access = DRM_XE_ATOMIC_UNDEFINED, 265 + }; 266 + int err = 0; 267 + 268 + vma = xe_vm_find_vma_by_addr(vm, range_start); 269 + if (!vma) 270 + return -EINVAL; 271 + 272 + if (xe_vma_has_default_mem_attrs(vma)) 273 + return 0; 274 + 275 + vm_dbg(&vm->xe->drm, "Existing VMA start=0x%016llx, vma_end=0x%016llx", 276 + xe_vma_start(vma), xe_vma_end(vma)); 277 + 278 + if (xe_vma_start(vma) == range_start && xe_vma_end(vma) == range_end) { 279 + default_attr.pat_index = vma->attr.default_pat_index; 280 + default_attr.default_pat_index = vma->attr.default_pat_index; 281 + vma->attr = default_attr; 282 + } else { 283 + vm_dbg(&vm->xe->drm, "Split VMA start=0x%016llx, vma_end=0x%016llx", 284 + range_start, range_end); 285 + err = xe_vm_alloc_cpu_addr_mirror_vma(vm, range_start, range_end - range_start); 286 + if (err) { 287 + drm_warn(&vm->xe->drm, "VMA SPLIT failed: %pe\n", ERR_PTR(err)); 288 + xe_vm_kill(vm, true); 289 + return err; 290 + } 291 + } 292 + 293 + /* 294 + * On call from xe_svm_handle_pagefault original VMA might be changed 295 + * signal this to lookup for VMA again. 296 + */ 297 + return -EAGAIN; 298 + } 299 + 256 300 static int xe_svm_garbage_collector(struct xe_vm *vm) 257 301 { 258 302 struct xe_svm_range *range; 259 - int err; 303 + u64 range_start; 304 + u64 range_end; 305 + int err, ret = 0; 260 306 261 307 lockdep_assert_held_write(&vm->lock); 262 308 ··· 317 271 if (!range) 318 272 break; 319 273 274 + range_start = xe_svm_range_start(range); 275 + range_end = xe_svm_range_end(range); 276 + 320 277 list_del(&range->garbage_collector_link); 321 278 spin_unlock(&vm->svm.garbage_collector.lock); 322 279 ··· 332 283 return err; 333 284 } 334 285 286 + err = xe_svm_range_set_default_attr(vm, range_start, range_end); 287 + if (err) { 288 + if (err == -EAGAIN) 289 + ret = -EAGAIN; 290 + else 291 + return err; 292 + } 293 + 335 294 spin_lock(&vm->svm.garbage_collector.lock); 336 295 } 337 296 spin_unlock(&vm->svm.garbage_collector.lock); 338 297 339 - return 0; 298 + return ret; 340 299 } 341 300 342 301 static void xe_svm_garbage_collector_work_func(struct work_struct *w) ··· 984 927 struct xe_gt *gt, u64 fault_addr, 985 928 bool atomic) 986 929 { 987 - int need_vram; 988 - 930 + int need_vram, ret; 931 + retry: 989 932 need_vram = xe_vma_need_vram_for_atomic(vm->xe, vma, atomic); 990 933 if (need_vram < 0) 991 934 return need_vram; 992 935 993 - return __xe_svm_handle_pagefault(vm, vma, gt, fault_addr, need_vram ? true : false); 936 + ret = __xe_svm_handle_pagefault(vm, vma, gt, fault_addr, 937 + need_vram ? true : false); 938 + if (ret == -EAGAIN) { 939 + /* 940 + * Retry once on -EAGAIN to re-lookup the VMA, as the original VMA 941 + * may have been split by xe_svm_range_set_default_attr. 942 + */ 943 + vma = xe_vm_find_vma_by_addr(vm, fault_addr); 944 + if (!vma) 945 + return -EINVAL; 946 + 947 + goto retry; 948 + } 949 + return ret; 994 950 } 995 951 996 952 /**

+115 -45

drivers/gpu/drm/xe/xe_vm.c

··· 4291 4291 } 4292 4292 } 4293 4293 4294 - /** 4295 - * xe_vm_alloc_madvise_vma - Allocate VMA's with madvise ops 4296 - * @vm: Pointer to the xe_vm structure 4297 - * @start: Starting input address 4298 - * @range: Size of the input range 4299 - * 4300 - * This function splits existing vma to create new vma for user provided input range 4301 - * 4302 - * Return: 0 if success 4303 - */ 4304 - int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range) 4294 + static int xe_vm_alloc_vma(struct xe_vm *vm, 4295 + struct drm_gpuvm_map_req *map_req, 4296 + bool is_madvise) 4305 4297 { 4306 - struct drm_gpuvm_map_req map_req = { 4307 - .map.va.addr = start, 4308 - .map.va.range = range, 4309 - }; 4310 - 4311 4298 struct xe_vma_ops vops; 4312 4299 struct drm_gpuva_ops *ops = NULL; 4313 4300 struct drm_gpuva_op *__op; 4314 4301 bool is_cpu_addr_mirror = false; 4315 4302 bool remap_op = false; 4316 4303 struct xe_vma_mem_attr tmp_attr; 4304 + u16 default_pat; 4317 4305 int err; 4318 4306 4319 4307 lockdep_assert_held_write(&vm->lock); 4320 4308 4321 - vm_dbg(&vm->xe->drm, "MADVISE_OPS_CREATE: addr=0x%016llx, size=0x%016llx", start, range); 4322 - ops = drm_gpuvm_madvise_ops_create(&vm->gpuvm, &map_req); 4309 + if (is_madvise) 4310 + ops = drm_gpuvm_madvise_ops_create(&vm->gpuvm, map_req); 4311 + else 4312 + ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, map_req); 4313 + 4323 4314 if (IS_ERR(ops)) 4324 4315 return PTR_ERR(ops); 4325 4316 ··· 4321 4330 4322 4331 drm_gpuva_for_each_op(__op, ops) { 4323 4332 struct xe_vma_op *op = gpuva_op_to_vma_op(__op); 4333 + struct xe_vma *vma = NULL; 4324 4334 4325 - if (__op->op == DRM_GPUVA_OP_REMAP) { 4326 - xe_assert(vm->xe, !remap_op); 4327 - remap_op = true; 4335 + if (!is_madvise) { 4336 + if (__op->op == DRM_GPUVA_OP_UNMAP) { 4337 + vma = gpuva_to_vma(op->base.unmap.va); 4338 + XE_WARN_ON(!xe_vma_has_default_mem_attrs(vma)); 4339 + default_pat = vma->attr.default_pat_index; 4340 + } 4328 4341 4329 - if (xe_vma_is_cpu_addr_mirror(gpuva_to_vma(op->base.remap.unmap->va))) 4330 - is_cpu_addr_mirror = true; 4331 - else 4332 - is_cpu_addr_mirror = false; 4342 + if (__op->op == DRM_GPUVA_OP_REMAP) { 4343 + vma = gpuva_to_vma(op->base.remap.unmap->va); 4344 + default_pat = vma->attr.default_pat_index; 4345 + } 4346 + 4347 + if (__op->op == DRM_GPUVA_OP_MAP) { 4348 + op->map.is_cpu_addr_mirror = true; 4349 + op->map.pat_index = default_pat; 4350 + } 4351 + } else { 4352 + if (__op->op == DRM_GPUVA_OP_REMAP) { 4353 + vma = gpuva_to_vma(op->base.remap.unmap->va); 4354 + xe_assert(vm->xe, !remap_op); 4355 + xe_assert(vm->xe, xe_vma_has_no_bo(vma)); 4356 + remap_op = true; 4357 + 4358 + if (xe_vma_is_cpu_addr_mirror(vma)) 4359 + is_cpu_addr_mirror = true; 4360 + else 4361 + is_cpu_addr_mirror = false; 4362 + } 4363 + 4364 + if (__op->op == DRM_GPUVA_OP_MAP) { 4365 + xe_assert(vm->xe, remap_op); 4366 + remap_op = false; 4367 + /* 4368 + * In case of madvise ops DRM_GPUVA_OP_MAP is 4369 + * always after DRM_GPUVA_OP_REMAP, so ensure 4370 + * we assign op->map.is_cpu_addr_mirror true 4371 + * if REMAP is for xe_vma_is_cpu_addr_mirror vma 4372 + */ 4373 + op->map.is_cpu_addr_mirror = is_cpu_addr_mirror; 4374 + } 4333 4375 } 4334 - 4335 - if (__op->op == DRM_GPUVA_OP_MAP) { 4336 - xe_assert(vm->xe, remap_op); 4337 - remap_op = false; 4338 - 4339 - /* In case of madvise ops DRM_GPUVA_OP_MAP is always after 4340 - * DRM_GPUVA_OP_REMAP, so ensure we assign op->map.is_cpu_addr_mirror true 4341 - * if REMAP is for xe_vma_is_cpu_addr_mirror vma 4342 - */ 4343 - op->map.is_cpu_addr_mirror = is_cpu_addr_mirror; 4344 - } 4345 - 4346 4376 print_op(vm->xe, __op); 4347 4377 } 4348 4378 4349 4379 xe_vma_ops_init(&vops, vm, NULL, NULL, 0); 4350 - vops.flags |= XE_VMA_OPS_FLAG_MADVISE; 4380 + 4381 + if (is_madvise) 4382 + vops.flags |= XE_VMA_OPS_FLAG_MADVISE; 4383 + 4351 4384 err = vm_bind_ioctl_ops_parse(vm, ops, &vops); 4352 4385 if (err) 4353 4386 goto unwind_ops; ··· 4383 4368 struct xe_vma *vma; 4384 4369 4385 4370 if (__op->op == DRM_GPUVA_OP_UNMAP) { 4386 - /* There should be no unmap */ 4387 - XE_WARN_ON("UNEXPECTED UNMAP"); 4388 - xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), NULL); 4371 + vma = gpuva_to_vma(op->base.unmap.va); 4372 + /* There should be no unmap for madvise */ 4373 + if (is_madvise) 4374 + XE_WARN_ON("UNEXPECTED UNMAP"); 4375 + 4376 + xe_vma_destroy(vma, NULL); 4389 4377 } else if (__op->op == DRM_GPUVA_OP_REMAP) { 4390 4378 vma = gpuva_to_vma(op->base.remap.unmap->va); 4391 - /* Store attributes for REMAP UNMAPPED VMA, so they can be assigned 4392 - * to newly MAP created vma. 4379 + /* In case of madvise ops Store attributes for REMAP UNMAPPED 4380 + * VMA, so they can be assigned to newly MAP created vma. 4393 4381 */ 4394 - tmp_attr = vma->attr; 4382 + if (is_madvise) 4383 + tmp_attr = vma->attr; 4384 + 4395 4385 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL); 4396 4386 } else if (__op->op == DRM_GPUVA_OP_MAP) { 4397 4387 vma = op->map.vma; ··· 4404 4384 * Therefore temp_attr will always have sane values, making it safe to 4405 4385 * copy them to new vma. 4406 4386 */ 4407 - vma->attr = tmp_attr; 4387 + if (is_madvise) 4388 + vma->attr = tmp_attr; 4408 4389 } 4409 4390 } 4410 4391 ··· 4418 4397 free_ops: 4419 4398 drm_gpuva_ops_free(&vm->gpuvm, ops); 4420 4399 return err; 4400 + } 4401 + 4402 + /** 4403 + * xe_vm_alloc_madvise_vma - Allocate VMA's with madvise ops 4404 + * @vm: Pointer to the xe_vm structure 4405 + * @start: Starting input address 4406 + * @range: Size of the input range 4407 + * 4408 + * This function splits existing vma to create new vma for user provided input range 4409 + * 4410 + * Return: 0 if success 4411 + */ 4412 + int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range) 4413 + { 4414 + struct drm_gpuvm_map_req map_req = { 4415 + .map.va.addr = start, 4416 + .map.va.range = range, 4417 + }; 4418 + 4419 + lockdep_assert_held_write(&vm->lock); 4420 + 4421 + vm_dbg(&vm->xe->drm, "MADVISE_OPS_CREATE: addr=0x%016llx, size=0x%016llx", start, range); 4422 + 4423 + return xe_vm_alloc_vma(vm, &map_req, true); 4424 + } 4425 + 4426 + /** 4427 + * xe_vm_alloc_cpu_addr_mirror_vma - Allocate CPU addr mirror vma 4428 + * @vm: Pointer to the xe_vm structure 4429 + * @start: Starting input address 4430 + * @range: Size of the input range 4431 + * 4432 + * This function splits/merges existing vma to create new vma for user provided input range 4433 + * 4434 + * Return: 0 if success 4435 + */ 4436 + int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t start, uint64_t range) 4437 + { 4438 + struct drm_gpuvm_map_req map_req = { 4439 + .map.va.addr = start, 4440 + .map.va.range = range, 4441 + }; 4442 + 4443 + lockdep_assert_held_write(&vm->lock); 4444 + 4445 + vm_dbg(&vm->xe->drm, "CPU_ADDR_MIRROR_VMA_OPS_CREATE: addr=0x%016llx, size=0x%016llx", 4446 + start, range); 4447 + 4448 + return xe_vm_alloc_vma(vm, &map_req, false); 4421 4449 }

drivers/gpu/drm/xe/xe_vm.h

··· 177 177 178 178 int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t addr, uint64_t size); 179 179 180 + int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t addr, uint64_t size); 181 + 180 182 /** 181 183 * to_userptr_vma() - Return a pointer to an embedding userptr vma 182 184 * @vma: Pointer to the embedded struct xe_vma