Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: Avoid reclaim fs while eviction lock

[Why]
Avoid reclaim filesystem while eviction lock is held called from
MMU notifier.

[How]
Setting PF_MEMALLOC_NOFS flags while eviction mutex is locked.
Using memalloc_nofs_save / memalloc_nofs_restore API.

Signed-off-by: Alex Sierra <alex.sierra@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Alex Sierra and committed by
Alex Deucher
a269e449 a9ffe2a9

+38 -8
+33 -7
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
··· 83 83 }; 84 84 85 85 /** 86 + * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS 87 + * happens while holding this lock anywhere to prevent deadlocks when 88 + * an MMU notifier runs in reclaim-FS context. 89 + */ 90 + static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm) 91 + { 92 + mutex_lock(&vm->eviction_lock); 93 + vm->saved_flags = memalloc_nofs_save(); 94 + } 95 + 96 + static inline int amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm) 97 + { 98 + if (mutex_trylock(&vm->eviction_lock)) { 99 + vm->saved_flags = memalloc_nofs_save(); 100 + return 1; 101 + } 102 + return 0; 103 + } 104 + 105 + static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm) 106 + { 107 + memalloc_nofs_restore(vm->saved_flags); 108 + mutex_unlock(&vm->eviction_lock); 109 + } 110 + 111 + /** 86 112 * amdgpu_vm_level_shift - return the addr shift for each level 87 113 * 88 114 * @adev: amdgpu_device pointer ··· 704 678 } 705 679 } 706 680 707 - mutex_lock(&vm->eviction_lock); 681 + amdgpu_vm_eviction_lock(vm); 708 682 vm->evicting = false; 709 - mutex_unlock(&vm->eviction_lock); 683 + amdgpu_vm_eviction_unlock(vm); 710 684 711 685 return 0; 712 686 } ··· 1585 1559 if (!(flags & AMDGPU_PTE_VALID)) 1586 1560 owner = AMDGPU_FENCE_OWNER_KFD; 1587 1561 1588 - mutex_lock(&vm->eviction_lock); 1562 + amdgpu_vm_eviction_lock(vm); 1589 1563 if (vm->evicting) { 1590 1564 r = -EBUSY; 1591 1565 goto error_unlock; ··· 1602 1576 r = vm->update_funcs->commit(&params, fence); 1603 1577 1604 1578 error_unlock: 1605 - mutex_unlock(&vm->eviction_lock); 1579 + amdgpu_vm_eviction_unlock(vm); 1606 1580 return r; 1607 1581 } 1608 1582 ··· 2559 2533 return false; 2560 2534 2561 2535 /* Try to block ongoing updates */ 2562 - if (!mutex_trylock(&bo_base->vm->eviction_lock)) 2536 + if (!amdgpu_vm_eviction_trylock(bo_base->vm)) 2563 2537 return false; 2564 2538 2565 2539 /* Don't evict VM page tables while they are updated */ 2566 2540 if (!dma_fence_is_signaled(bo_base->vm->last_direct) || 2567 2541 !dma_fence_is_signaled(bo_base->vm->last_delayed)) { 2568 - mutex_unlock(&bo_base->vm->eviction_lock); 2542 + amdgpu_vm_eviction_unlock(bo_base->vm); 2569 2543 return false; 2570 2544 } 2571 2545 2572 2546 bo_base->vm->evicting = true; 2573 - mutex_unlock(&bo_base->vm->eviction_lock); 2547 + amdgpu_vm_eviction_unlock(bo_base->vm); 2574 2548 return true; 2575 2549 } 2576 2550
+5 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
··· 30 30 #include <drm/gpu_scheduler.h> 31 31 #include <drm/drm_file.h> 32 32 #include <drm/ttm/ttm_bo_driver.h> 33 + #include <linux/sched/mm.h> 33 34 34 35 #include "amdgpu_sync.h" 35 36 #include "amdgpu_ring.h" ··· 240 239 /* tree of virtual addresses mapped */ 241 240 struct rb_root_cached va; 242 241 243 - /* Lock to prevent eviction while we are updating page tables */ 242 + /* Lock to prevent eviction while we are updating page tables 243 + * use vm_eviction_lock/unlock(vm) 244 + */ 244 245 struct mutex eviction_lock; 245 246 bool evicting; 247 + unsigned int saved_flags; 246 248 247 249 /* BOs who needs a validation */ 248 250 struct list_head evicted;