Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: fix and cleanup user fence handling v2

We leaked the BO in the error pass, additional to that we only have
one user fence for all IBs in a job.

v2: remove white space changes

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Christian König and committed by
Alex Deucher
758ac17f d88bf583

+38 -47
+7 -12
drivers/gpu/drm/amd/amdgpu/amdgpu.h
··· 368 368 #define AMDGPU_FENCE_FLAG_64BIT (1 << 0) 369 369 #define AMDGPU_FENCE_FLAG_INT (1 << 1) 370 370 371 - struct amdgpu_user_fence { 372 - /* write-back bo */ 373 - struct amdgpu_bo *bo; 374 - /* write-back address offset to bo start */ 375 - uint32_t offset; 376 - }; 377 - 378 371 int amdgpu_fence_driver_init(struct amdgpu_device *adev); 379 372 void amdgpu_fence_driver_fini(struct amdgpu_device *adev); 380 373 void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev); ··· 734 741 uint32_t length_dw; 735 742 uint64_t gpu_addr; 736 743 uint32_t *ptr; 737 - struct amdgpu_user_fence *user; 738 744 uint32_t flags; 739 - /* resulting sequence number */ 740 - uint64_t sequence; 741 745 }; 742 746 743 747 enum amdgpu_ring_type { ··· 1209 1219 struct amdgpu_cs_chunk { 1210 1220 uint32_t chunk_id; 1211 1221 uint32_t length_dw; 1212 - uint32_t *kdata; 1222 + void *kdata; 1213 1223 }; 1214 1224 1215 1225 struct amdgpu_cs_parser { ··· 1253 1263 uint32_t gds_base, gds_size; 1254 1264 uint32_t gws_base, gws_size; 1255 1265 uint32_t oa_base, oa_size; 1256 - struct amdgpu_user_fence uf; 1266 + 1267 + /* user fence handling */ 1268 + struct amdgpu_bo *uf_bo; 1269 + uint32_t uf_offset; 1270 + uint64_t uf_sequence; 1271 + 1257 1272 }; 1258 1273 #define to_amdgpu_job(sched_job) \ 1259 1274 container_of((sched_job), struct amdgpu_job, base)
+25 -30
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
··· 87 87 } 88 88 89 89 static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, 90 - struct amdgpu_user_fence *uf, 91 - struct drm_amdgpu_cs_chunk_fence *fence_data) 90 + struct drm_amdgpu_cs_chunk_fence *data, 91 + uint32_t *offset) 92 92 { 93 93 struct drm_gem_object *gobj; 94 - uint32_t handle; 95 94 96 - handle = fence_data->handle; 97 95 gobj = drm_gem_object_lookup(p->adev->ddev, p->filp, 98 - fence_data->handle); 96 + data->handle); 99 97 if (gobj == NULL) 100 98 return -EINVAL; 101 99 102 - uf->bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); 103 - uf->offset = fence_data->offset; 104 - 105 - if (amdgpu_ttm_tt_get_usermm(uf->bo->tbo.ttm)) { 106 - drm_gem_object_unreference_unlocked(gobj); 107 - return -EINVAL; 108 - } 109 - 110 - p->uf_entry.robj = amdgpu_bo_ref(uf->bo); 100 + p->uf_entry.robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); 111 101 p->uf_entry.priority = 0; 112 102 p->uf_entry.tv.bo = &p->uf_entry.robj->tbo; 113 103 p->uf_entry.tv.shared = true; 114 104 p->uf_entry.user_pages = NULL; 105 + *offset = data->offset; 115 106 116 107 drm_gem_object_unreference_unlocked(gobj); 108 + 109 + if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) { 110 + amdgpu_bo_unref(&p->uf_entry.robj); 111 + return -EINVAL; 112 + } 113 + 117 114 return 0; 118 115 } 119 116 ··· 121 124 union drm_amdgpu_cs *cs = data; 122 125 uint64_t *chunk_array_user; 123 126 uint64_t *chunk_array; 124 - struct amdgpu_user_fence uf = {}; 125 127 unsigned size, num_ibs = 0; 128 + uint32_t uf_offset = 0; 126 129 int i; 127 130 int ret; 128 131 ··· 197 200 goto free_partial_kdata; 198 201 } 199 202 200 - ret = amdgpu_cs_user_fence_chunk(p, &uf, (void *)p->chunks[i].kdata); 203 + ret = amdgpu_cs_user_fence_chunk(p, p->chunks[i].kdata, 204 + &uf_offset); 201 205 if (ret) 202 206 goto free_partial_kdata; 203 207 ··· 217 219 if (ret) 218 220 goto free_all_kdata; 219 221 220 - p->job->uf = uf; 222 + if (p->uf_entry.robj) { 223 + p->job->uf_bo = amdgpu_bo_ref(p->uf_entry.robj); 224 + p->job->uf_offset = uf_offset; 225 + } 221 226 222 227 kfree(chunk_array); 223 228 return 0; ··· 378 377 INIT_LIST_HEAD(&duplicates); 379 378 amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); 380 379 381 - if (p->job->uf.bo) 380 + if (p->uf_entry.robj) 382 381 list_add(&p->uf_entry.tv.head, &p->validated); 383 382 384 383 if (need_mmap_lock) ··· 761 760 j++; 762 761 } 763 762 764 - /* wrap the last IB with user fence */ 765 - if (parser->job->uf.bo) { 766 - struct amdgpu_ib *ib = &parser->job->ibs[parser->job->num_ibs - 1]; 767 - 768 - /* UVD & VCE fw doesn't support user fences */ 769 - if (parser->job->ring->type == AMDGPU_RING_TYPE_UVD || 770 - parser->job->ring->type == AMDGPU_RING_TYPE_VCE) 771 - return -EINVAL; 772 - 773 - ib->user = &parser->job->uf; 774 - } 763 + /* UVD & VCE fw doesn't support user fences */ 764 + if (parser->job->uf_bo && ( 765 + parser->job->ring->type == AMDGPU_RING_TYPE_UVD || 766 + parser->job->ring->type == AMDGPU_RING_TYPE_VCE)) 767 + return -EINVAL; 775 768 776 769 return 0; 777 770 } ··· 851 856 job->ctx = entity->fence_context; 852 857 p->fence = fence_get(fence); 853 858 cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, fence); 854 - job->ibs[job->num_ibs - 1].sequence = cs->out.handle; 859 + job->uf_sequence = cs->out.handle; 855 860 856 861 trace_amdgpu_cs_ioctl(job); 857 862 amd_sched_entity_push_job(&job->base);
+5 -4
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
··· 203 203 } 204 204 205 205 /* wrap the last IB with fence */ 206 - if (ib->user) { 207 - uint64_t addr = amdgpu_bo_gpu_offset(ib->user->bo); 208 - addr += ib->user->offset; 209 - amdgpu_ring_emit_fence(ring, addr, ib->sequence, 206 + if (job && job->uf_bo) { 207 + uint64_t addr = amdgpu_bo_gpu_offset(job->uf_bo); 208 + 209 + addr += job->uf_offset; 210 + amdgpu_ring_emit_fence(ring, addr, job->uf_sequence, 210 211 AMDGPU_FENCE_FLAG_64BIT); 211 212 } 212 213
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
··· 97 97 amdgpu_sa_bo_free(job->adev, &job->ibs[i].sa_bo, f); 98 98 fence_put(job->fence); 99 99 100 - amdgpu_bo_unref(&job->uf.bo); 100 + amdgpu_bo_unref(&job->uf_bo); 101 101 amdgpu_sync_free(&job->sync); 102 102 103 103 if (!job->base.use_sched)