Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: explicitely sync to VM updates v2

Allows us to reduce the overhead while syncing to fences a bit.

v2: also drop adev parameter from the functions

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Christian König and committed by
Alex Deucher
e095fc17 6ceeb144

+51 -40
+4 -4
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
··· 358 358 if (ret) 359 359 return ret; 360 360 361 - return amdgpu_sync_fence(NULL, sync, vm->last_update, false); 361 + return amdgpu_sync_fence(sync, vm->last_update, false); 362 362 } 363 363 364 364 static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) ··· 751 751 752 752 amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); 753 753 754 - amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false); 754 + amdgpu_sync_fence(sync, bo_va->last_pt_update, false); 755 755 756 756 return 0; 757 757 } ··· 770 770 return ret; 771 771 } 772 772 773 - return amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false); 773 + return amdgpu_sync_fence(sync, bo_va->last_pt_update, false); 774 774 } 775 775 776 776 static int map_bo_to_gpuvm(struct amdgpu_device *adev, ··· 2045 2045 pr_debug("Memory eviction: Validate BOs failed. Try again\n"); 2046 2046 goto validate_map_fail; 2047 2047 } 2048 - ret = amdgpu_sync_fence(NULL, &sync_obj, bo->tbo.moving, false); 2048 + ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving, false); 2049 2049 if (ret) { 2050 2050 pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); 2051 2051 goto validate_map_fail;
+6 -13
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
··· 797 797 if (r) 798 798 return r; 799 799 800 - r = amdgpu_sync_fence(adev, &p->job->sync, 801 - fpriv->prt_va->last_pt_update, false); 800 + r = amdgpu_sync_vm_fence(&p->job->sync, fpriv->prt_va->last_pt_update); 802 801 if (r) 803 802 return r; 804 803 805 804 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { 806 - struct dma_fence *f; 807 - 808 805 bo_va = fpriv->csa_va; 809 806 BUG_ON(!bo_va); 810 807 r = amdgpu_vm_bo_update(adev, bo_va, false); 811 808 if (r) 812 809 return r; 813 810 814 - f = bo_va->last_pt_update; 815 - r = amdgpu_sync_fence(adev, &p->job->sync, f, false); 811 + r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update); 816 812 if (r) 817 813 return r; 818 814 } 819 815 820 816 amdgpu_bo_list_for_each_entry(e, p->bo_list) { 821 - struct dma_fence *f; 822 - 823 817 /* ignore duplicates */ 824 818 bo = ttm_to_amdgpu_bo(e->tv.bo); 825 819 if (!bo) ··· 827 833 if (r) 828 834 return r; 829 835 830 - f = bo_va->last_pt_update; 831 - r = amdgpu_sync_fence(adev, &p->job->sync, f, false); 836 + r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update); 832 837 if (r) 833 838 return r; 834 839 } ··· 840 847 if (r) 841 848 return r; 842 849 843 - r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update, false); 850 + r = amdgpu_sync_vm_fence(&p->job->sync, vm->last_update); 844 851 if (r) 845 852 return r; 846 853 ··· 982 989 dma_fence_put(old); 983 990 } 984 991 985 - r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true); 992 + r = amdgpu_sync_fence(&p->job->sync, fence, true); 986 993 dma_fence_put(fence); 987 994 if (r) 988 995 return r; ··· 1004 1011 return r; 1005 1012 } 1006 1013 1007 - r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true); 1014 + r = amdgpu_sync_fence(&p->job->sync, fence, true); 1008 1015 dma_fence_put(fence); 1009 1016 1010 1017 return r;
+6 -7
drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
··· 206 206 int r; 207 207 208 208 if (ring->vmid_wait && !dma_fence_is_signaled(ring->vmid_wait)) 209 - return amdgpu_sync_fence(adev, sync, ring->vmid_wait, false); 209 + return amdgpu_sync_fence(sync, ring->vmid_wait, false); 210 210 211 211 fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL); 212 212 if (!fences) ··· 241 241 return -ENOMEM; 242 242 } 243 243 244 - r = amdgpu_sync_fence(adev, sync, &array->base, false); 244 + r = amdgpu_sync_fence(sync, &array->base, false); 245 245 dma_fence_put(ring->vmid_wait); 246 246 ring->vmid_wait = &array->base; 247 247 return r; ··· 294 294 tmp = amdgpu_sync_peek_fence(&(*id)->active, ring); 295 295 if (tmp) { 296 296 *id = NULL; 297 - r = amdgpu_sync_fence(adev, sync, tmp, false); 297 + r = amdgpu_sync_fence(sync, tmp, false); 298 298 return r; 299 299 } 300 300 needs_flush = true; ··· 303 303 /* Good we can use this VMID. Remember this submission as 304 304 * user of the VMID. 305 305 */ 306 - r = amdgpu_sync_fence(ring->adev, &(*id)->active, fence, false); 306 + r = amdgpu_sync_fence(&(*id)->active, fence, false); 307 307 if (r) 308 308 return r; 309 309 ··· 375 375 /* Good, we can use this VMID. Remember this submission as 376 376 * user of the VMID. 377 377 */ 378 - r = amdgpu_sync_fence(ring->adev, &(*id)->active, fence, false); 378 + r = amdgpu_sync_fence(&(*id)->active, fence, false); 379 379 if (r) 380 380 return r; 381 381 ··· 435 435 id = idle; 436 436 437 437 /* Remember this submission as user of the VMID */ 438 - r = amdgpu_sync_fence(ring->adev, &id->active, 439 - fence, false); 438 + r = amdgpu_sync_fence(&id->active, fence, false); 440 439 if (r) 441 440 goto error; 442 441
+1 -2
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
··· 193 193 fence = amdgpu_sync_get_fence(&job->sync, &explicit); 194 194 if (fence && explicit) { 195 195 if (drm_sched_dependency_optimized(fence, s_entity)) { 196 - r = amdgpu_sync_fence(ring->adev, &job->sched_sync, 197 - fence, false); 196 + r = amdgpu_sync_fence(&job->sched_sync, fence, false); 198 197 if (r) 199 198 DRM_ERROR("Error adding fence (%d)\n", r); 200 199 }
+28 -10
drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
··· 129 129 * Tries to add the fence to an existing hash entry. Returns true when an entry 130 130 * was found, false otherwise. 131 131 */ 132 - static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f, bool explicit) 132 + static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f, 133 + bool explicit) 133 134 { 134 135 struct amdgpu_sync_entry *e; 135 136 ··· 152 151 * amdgpu_sync_fence - remember to sync to this fence 153 152 * 154 153 * @sync: sync object to add fence to 155 - * @fence: fence to sync to 154 + * @f: fence to sync to 155 + * @explicit: if this is an explicit dependency 156 156 * 157 + * Add the fence to the sync object. 157 158 */ 158 - int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, 159 - struct dma_fence *f, bool explicit) 159 + int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f, 160 + bool explicit) 160 161 { 161 162 struct amdgpu_sync_entry *e; 162 163 163 164 if (!f) 164 165 return 0; 165 - if (amdgpu_sync_same_dev(adev, f) && 166 - amdgpu_sync_get_owner(f) == AMDGPU_FENCE_OWNER_VM) 167 - amdgpu_sync_keep_later(&sync->last_vm_update, f); 168 166 169 167 if (amdgpu_sync_add_later(sync, f, explicit)) 170 168 return 0; ··· 177 177 hash_add(sync->fences, &e->node, f->context); 178 178 e->fence = dma_fence_get(f); 179 179 return 0; 180 + } 181 + 182 + /** 183 + * amdgpu_sync_vm_fence - remember to sync to this VM fence 184 + * 185 + * @adev: amdgpu device 186 + * @sync: sync object to add fence to 187 + * @fence: the VM fence to add 188 + * 189 + * Add the fence to the sync object and remember it as VM update. 190 + */ 191 + int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence) 192 + { 193 + if (!fence) 194 + return 0; 195 + 196 + amdgpu_sync_keep_later(&sync->last_vm_update, fence); 197 + return amdgpu_sync_fence(sync, fence, false); 180 198 } 181 199 182 200 /** ··· 222 204 223 205 /* always sync to the exclusive fence */ 224 206 f = dma_resv_get_excl(resv); 225 - r = amdgpu_sync_fence(adev, sync, f, false); 207 + r = amdgpu_sync_fence(sync, f, false); 226 208 227 209 flist = dma_resv_get_list(resv); 228 210 if (!flist || r) ··· 257 239 continue; 258 240 } 259 241 260 - r = amdgpu_sync_fence(adev, sync, f, false); 242 + r = amdgpu_sync_fence(sync, f, false); 261 243 if (r) 262 244 break; 263 245 } ··· 358 340 hash_for_each_safe(source->fences, i, tmp, e, node) { 359 341 f = e->fence; 360 342 if (!dma_fence_is_signaled(f)) { 361 - r = amdgpu_sync_fence(NULL, clone, f, e->explicit); 343 + r = amdgpu_sync_fence(clone, f, e->explicit); 362 344 if (r) 363 345 return r; 364 346 } else {
+5 -3
drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
··· 40 40 }; 41 41 42 42 void amdgpu_sync_create(struct amdgpu_sync *sync); 43 - int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, 44 - struct dma_fence *f, bool explicit); 43 + int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f, 44 + bool explicit); 45 + int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence); 45 46 int amdgpu_sync_resv(struct amdgpu_device *adev, 46 47 struct amdgpu_sync *sync, 47 48 struct dma_resv *resv, ··· 50 49 bool explicit_sync); 51 50 struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, 52 51 struct amdgpu_ring *ring); 53 - struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit); 52 + struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, 53 + bool *explicit); 54 54 int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone); 55 55 int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr); 56 56 void amdgpu_sync_free(struct amdgpu_sync *sync);
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
··· 71 71 p->num_dw_left = ndw; 72 72 73 73 /* Wait for moves to be completed */ 74 - r = amdgpu_sync_fence(p->adev, &p->job->sync, exclusive, false); 74 + r = amdgpu_sync_fence(&p->job->sync, exclusive, false); 75 75 if (r) 76 76 return r; 77 77