Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: remove distinction between explicit and implicit sync (v2)

According to Marek a pipeline sync should be inserted for implicit syncs well.

v2: bump the driver version

Signed-off-by: Christian König <christian.koenig@amd.com>
Tested-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Christian König and committed by
Alex Deucher
174b328b 614c5611

+33 -50
+4 -4
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
··· 395 395 if (ret) 396 396 return ret; 397 397 398 - return amdgpu_sync_fence(sync, vm->last_update, false); 398 + return amdgpu_sync_fence(sync, vm->last_update); 399 399 } 400 400 401 401 static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) ··· 785 785 786 786 amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); 787 787 788 - amdgpu_sync_fence(sync, bo_va->last_pt_update, false); 788 + amdgpu_sync_fence(sync, bo_va->last_pt_update); 789 789 790 790 return 0; 791 791 } ··· 804 804 return ret; 805 805 } 806 806 807 - return amdgpu_sync_fence(sync, bo_va->last_pt_update, false); 807 + return amdgpu_sync_fence(sync, bo_va->last_pt_update); 808 808 } 809 809 810 810 static int map_bo_to_gpuvm(struct amdgpu_device *adev, ··· 2102 2102 pr_debug("Memory eviction: Validate BOs failed. Try again\n"); 2103 2103 goto validate_map_fail; 2104 2104 } 2105 - ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving, false); 2105 + ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving); 2106 2106 if (ret) { 2107 2107 pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); 2108 2108 goto validate_map_fail;
+2 -2
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
··· 992 992 dma_fence_put(old); 993 993 } 994 994 995 - r = amdgpu_sync_fence(&p->job->sync, fence, true); 995 + r = amdgpu_sync_fence(&p->job->sync, fence); 996 996 dma_fence_put(fence); 997 997 if (r) 998 998 return r; ··· 1014 1014 return r; 1015 1015 } 1016 1016 1017 - r = amdgpu_sync_fence(&p->job->sync, fence, true); 1017 + r = amdgpu_sync_fence(&p->job->sync, fence); 1018 1018 dma_fence_put(fence); 1019 1019 1020 1020 return r;
+2 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
··· 87 87 * - 3.36.0 - Allow reading more status registers on si/cik 88 88 * - 3.37.0 - L2 is invalidated before SDMA IBs, needed for correctness 89 89 * - 3.38.0 - Add AMDGPU_IB_FLAG_EMIT_MEM_SYNC 90 + * - 3.39.0 - DMABUF implicit sync does a full pipeline sync 90 91 */ 91 92 #define KMS_DRIVER_MAJOR 3 92 - #define KMS_DRIVER_MINOR 38 93 + #define KMS_DRIVER_MINOR 39 93 94 #define KMS_DRIVER_PATCHLEVEL 0 94 95 95 96 int amdgpu_vram_limit = 0;
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
··· 178 178 179 179 need_ctx_switch = ring->current_ctx != fence_ctx; 180 180 if (ring->funcs->emit_pipeline_sync && job && 181 - ((tmp = amdgpu_sync_get_fence(&job->sched_sync, NULL)) || 181 + ((tmp = amdgpu_sync_get_fence(&job->sched_sync)) || 182 182 (amdgpu_sriov_vf(adev) && need_ctx_switch) || 183 183 amdgpu_vm_need_pipeline_sync(ring, job))) { 184 184 need_pipe_sync = true;
+6 -6
drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
··· 206 206 int r; 207 207 208 208 if (ring->vmid_wait && !dma_fence_is_signaled(ring->vmid_wait)) 209 - return amdgpu_sync_fence(sync, ring->vmid_wait, false); 209 + return amdgpu_sync_fence(sync, ring->vmid_wait); 210 210 211 211 fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL); 212 212 if (!fences) ··· 241 241 return -ENOMEM; 242 242 } 243 243 244 - r = amdgpu_sync_fence(sync, &array->base, false); 244 + r = amdgpu_sync_fence(sync, &array->base); 245 245 dma_fence_put(ring->vmid_wait); 246 246 ring->vmid_wait = &array->base; 247 247 return r; ··· 294 294 tmp = amdgpu_sync_peek_fence(&(*id)->active, ring); 295 295 if (tmp) { 296 296 *id = NULL; 297 - r = amdgpu_sync_fence(sync, tmp, false); 297 + r = amdgpu_sync_fence(sync, tmp); 298 298 return r; 299 299 } 300 300 needs_flush = true; ··· 303 303 /* Good we can use this VMID. Remember this submission as 304 304 * user of the VMID. 305 305 */ 306 - r = amdgpu_sync_fence(&(*id)->active, fence, false); 306 + r = amdgpu_sync_fence(&(*id)->active, fence); 307 307 if (r) 308 308 return r; 309 309 ··· 375 375 /* Good, we can use this VMID. Remember this submission as 376 376 * user of the VMID. 377 377 */ 378 - r = amdgpu_sync_fence(&(*id)->active, fence, false); 378 + r = amdgpu_sync_fence(&(*id)->active, fence); 379 379 if (r) 380 380 return r; 381 381 ··· 435 435 id = idle; 436 436 437 437 /* Remember this submission as user of the VMID */ 438 - r = amdgpu_sync_fence(&id->active, fence, false); 438 + r = amdgpu_sync_fence(&id->active, fence); 439 439 if (r) 440 440 goto error; 441 441
+6 -9
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
··· 183 183 struct amdgpu_job *job = to_amdgpu_job(sched_job); 184 184 struct amdgpu_vm *vm = job->vm; 185 185 struct dma_fence *fence; 186 - bool explicit = false; 187 186 int r; 188 187 189 - fence = amdgpu_sync_get_fence(&job->sync, &explicit); 190 - if (fence && explicit) { 191 - if (drm_sched_dependency_optimized(fence, s_entity)) { 192 - r = amdgpu_sync_fence(&job->sched_sync, fence, false); 193 - if (r) 194 - DRM_ERROR("Error adding fence (%d)\n", r); 195 - } 188 + fence = amdgpu_sync_get_fence(&job->sync); 189 + if (fence && drm_sched_dependency_optimized(fence, s_entity)) { 190 + r = amdgpu_sync_fence(&job->sched_sync, fence); 191 + if (r) 192 + DRM_ERROR("Error adding fence (%d)\n", r); 196 193 } 197 194 198 195 while (fence == NULL && vm && !job->vmid) { ··· 199 202 if (r) 200 203 DRM_ERROR("Error getting VM ID (%d)\n", r); 201 204 202 - fence = amdgpu_sync_get_fence(&job->sync, NULL); 205 + fence = amdgpu_sync_get_fence(&job->sync); 203 206 } 204 207 205 208 return fence;
+9 -22
drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
··· 35 35 struct amdgpu_sync_entry { 36 36 struct hlist_node node; 37 37 struct dma_fence *fence; 38 - bool explicit; 39 38 }; 40 39 41 40 static struct kmem_cache *amdgpu_sync_slab; ··· 128 129 * Tries to add the fence to an existing hash entry. Returns true when an entry 129 130 * was found, false otherwise. 130 131 */ 131 - static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f, 132 - bool explicit) 132 + static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f) 133 133 { 134 134 struct amdgpu_sync_entry *e; 135 135 ··· 137 139 continue; 138 140 139 141 amdgpu_sync_keep_later(&e->fence, f); 140 - 141 - /* Preserve eplicit flag to not loose pipe line sync */ 142 - e->explicit |= explicit; 143 - 144 142 return true; 145 143 } 146 144 return false; ··· 147 153 * 148 154 * @sync: sync object to add fence to 149 155 * @f: fence to sync to 150 - * @explicit: if this is an explicit dependency 151 156 * 152 157 * Add the fence to the sync object. 153 158 */ 154 - int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f, 155 - bool explicit) 159 + int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f) 156 160 { 157 161 struct amdgpu_sync_entry *e; 158 162 159 163 if (!f) 160 164 return 0; 161 165 162 - if (amdgpu_sync_add_later(sync, f, explicit)) 166 + if (amdgpu_sync_add_later(sync, f)) 163 167 return 0; 164 168 165 169 e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL); 166 170 if (!e) 167 171 return -ENOMEM; 168 - 169 - e->explicit = explicit; 170 172 171 173 hash_add(sync->fences, &e->node, f->context); 172 174 e->fence = dma_fence_get(f); ··· 184 194 return 0; 185 195 186 196 amdgpu_sync_keep_later(&sync->last_vm_update, fence); 187 - return amdgpu_sync_fence(sync, fence, false); 197 + return amdgpu_sync_fence(sync, fence); 188 198 } 189 199 190 200 /** ··· 211 221 212 222 /* always sync to the exclusive fence */ 213 223 f = dma_resv_get_excl(resv); 214 - r = amdgpu_sync_fence(sync, f, false); 224 + r = amdgpu_sync_fence(sync, f); 215 225 216 226 flist = dma_resv_get_list(resv); 217 227 if (!flist || r) ··· 227 237 228 238 /* Always sync to moves, no matter what */ 229 239 if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED) { 230 - r = amdgpu_sync_fence(sync, f, false); 240 + r = amdgpu_sync_fence(sync, f); 231 241 if (r) 232 242 break; 233 243 } ··· 267 277 268 278 WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD, 269 279 "Adding eviction fence to sync obj"); 270 - r = amdgpu_sync_fence(sync, f, false); 280 + r = amdgpu_sync_fence(sync, f); 271 281 if (r) 272 282 break; 273 283 } ··· 322 332 * amdgpu_sync_get_fence - get the next fence from the sync object 323 333 * 324 334 * @sync: sync object to use 325 - * @explicit: true if the next fence is explicit 326 335 * 327 336 * Get and removes the next fence from the sync object not signaled yet. 328 337 */ 329 - struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit) 338 + struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync) 330 339 { 331 340 struct amdgpu_sync_entry *e; 332 341 struct hlist_node *tmp; ··· 334 345 hash_for_each_safe(sync->fences, i, tmp, e, node) { 335 346 336 347 f = e->fence; 337 - if (explicit) 338 - *explicit = e->explicit; 339 348 340 349 hash_del(&e->node); 341 350 kmem_cache_free(amdgpu_sync_slab, e); ··· 365 378 hash_for_each_safe(source->fences, i, tmp, e, node) { 366 379 f = e->fence; 367 380 if (!dma_fence_is_signaled(f)) { 368 - r = amdgpu_sync_fence(clone, f, e->explicit); 381 + r = amdgpu_sync_fence(clone, f); 369 382 if (r) 370 383 return r; 371 384 } else {
+2 -4
drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
··· 47 47 }; 48 48 49 49 void amdgpu_sync_create(struct amdgpu_sync *sync); 50 - int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f, 51 - bool explicit); 50 + int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f); 52 51 int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence); 53 52 int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, 54 53 struct dma_resv *resv, enum amdgpu_sync_mode mode, 55 54 void *owner); 56 55 struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, 57 56 struct amdgpu_ring *ring); 58 - struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, 59 - bool *explicit); 57 + struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); 60 58 int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone); 61 59 int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr); 62 60 void amdgpu_sync_free(struct amdgpu_sync *sync);
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
··· 208 208 int r; 209 209 210 210 /* Wait for PD/PT moves to be completed */ 211 - r = amdgpu_sync_fence(&p->job->sync, bo->tbo.moving, false); 211 + r = amdgpu_sync_fence(&p->job->sync, bo->tbo.moving); 212 212 if (r) 213 213 return r; 214 214