Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: clean up and unify hw fence handling

Decouple the amdgpu fence from the amdgpu_job structure.
This lets us clean up the separate fence ops for the embedded
fence and other fences. This also allows us to allocate the
vm fence up front when we allocate the job.

v2: Additional cleanup suggested by Christian
v3: Additional cleanups suggested by Christian
v4: Additional cleanups suggested by David and
vm fence fix
v5: cast seqno (David)

Cc: David.Wu3@amd.com
Cc: christian.koenig@amd.com
Tested-by: David (Ming Qiang) Wu <David.Wu3@amd.com>
Reviewed-by: David (Ming Qiang) Wu <David.Wu3@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

+63 -167
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
··· 1902 1902 continue; 1903 1903 } 1904 1904 job = to_amdgpu_job(s_job); 1905 - if (preempted && (&job->hw_fence.base) == fence) 1905 + if (preempted && (&job->hw_fence->base) == fence) 1906 1906 /* mark the job as preempted */ 1907 1907 job->preemption_status |= AMDGPU_IB_PREEMPTED; 1908 1908 }
+1 -6
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
··· 5809 5809 if (!amdgpu_ring_sched_ready(ring)) 5810 5810 continue; 5811 5811 5812 - /* Clear job fence from fence drv to avoid force_completion 5813 - * leave NULL and vm flush fence in fence drv 5814 - */ 5815 - amdgpu_fence_driver_clear_job_fences(ring); 5816 - 5817 5812 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ 5818 5813 amdgpu_fence_driver_force_completion(ring); 5819 5814 } ··· 6537 6542 * 6538 6543 * job->base holds a reference to parent fence 6539 6544 */ 6540 - if (job && dma_fence_is_signaled(&job->hw_fence.base)) { 6545 + if (job && dma_fence_is_signaled(&job->hw_fence->base)) { 6541 6546 job_signaled = true; 6542 6547 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); 6543 6548 goto skip_hw_reset;
+13 -130
drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
··· 45 45 * Cast helper 46 46 */ 47 47 static const struct dma_fence_ops amdgpu_fence_ops; 48 - static const struct dma_fence_ops amdgpu_job_fence_ops; 49 48 static inline struct amdgpu_fence *to_amdgpu_fence(struct dma_fence *f) 50 49 { 51 50 struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base); 52 51 53 - if (__f->base.ops == &amdgpu_fence_ops || 54 - __f->base.ops == &amdgpu_job_fence_ops) 55 - return __f; 56 - 57 - return NULL; 52 + return __f; 58 53 } 59 54 60 55 /** ··· 93 98 * amdgpu_fence_emit - emit a fence on the requested ring 94 99 * 95 100 * @ring: ring the fence is associated with 96 - * @f: resulting fence object 97 101 * @af: amdgpu fence input 98 102 * @flags: flags to pass into the subordinate .emit_fence() call 99 103 * 100 104 * Emits a fence command on the requested ring (all asics). 101 105 * Returns 0 on success, -ENOMEM on failure. 102 106 */ 103 - int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, 104 - struct amdgpu_fence *af, unsigned int flags) 107 + int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af, 108 + unsigned int flags) 105 109 { 106 110 struct amdgpu_device *adev = ring->adev; 107 111 struct dma_fence *fence; 108 - struct amdgpu_fence *am_fence; 109 112 struct dma_fence __rcu **ptr; 110 113 uint32_t seq; 111 114 int r; 112 115 113 - if (!af) { 114 - /* create a separate hw fence */ 115 - am_fence = kzalloc(sizeof(*am_fence), GFP_KERNEL); 116 - if (!am_fence) 117 - return -ENOMEM; 118 - } else { 119 - am_fence = af; 120 - } 121 - fence = &am_fence->base; 122 - am_fence->ring = ring; 116 + fence = &af->base; 117 + af->ring = ring; 123 118 124 119 seq = ++ring->fence_drv.sync_seq; 125 - am_fence->seq = seq; 126 - if (af) { 127 - dma_fence_init(fence, &amdgpu_job_fence_ops, 128 - &ring->fence_drv.lock, 129 - adev->fence_context + ring->idx, seq); 130 - /* Against remove in amdgpu_job_{free, free_cb} */ 131 - dma_fence_get(fence); 132 - } else { 133 - dma_fence_init(fence, &amdgpu_fence_ops, 134 - &ring->fence_drv.lock, 135 - adev->fence_context + ring->idx, seq); 136 - } 120 + dma_fence_init(fence, &amdgpu_fence_ops, 121 + &ring->fence_drv.lock, 122 + adev->fence_context + ring->idx, seq); 137 123 138 124 amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, 139 125 seq, flags | AMDGPU_FENCE_FLAG_INT); 140 - amdgpu_fence_save_wptr(fence); 126 + amdgpu_fence_save_wptr(af); 141 127 pm_runtime_get_noresume(adev_to_drm(adev)->dev); 142 128 ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; 143 129 if (unlikely(rcu_dereference_protected(*ptr, 1))) { ··· 142 166 * emitting the fence would mess up the hardware ring buffer. 143 167 */ 144 168 rcu_assign_pointer(*ptr, dma_fence_get(fence)); 145 - 146 - *f = fence; 147 169 148 170 return 0; 149 171 } ··· 644 670 } 645 671 646 672 /** 647 - * amdgpu_fence_driver_clear_job_fences - clear job embedded fences of ring 648 - * 649 - * @ring: fence of the ring to be cleared 650 - * 651 - */ 652 - void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring) 653 - { 654 - int i; 655 - struct dma_fence *old, **ptr; 656 - 657 - for (i = 0; i <= ring->fence_drv.num_fences_mask; i++) { 658 - ptr = &ring->fence_drv.fences[i]; 659 - old = rcu_dereference_protected(*ptr, 1); 660 - if (old && old->ops == &amdgpu_job_fence_ops) { 661 - struct amdgpu_job *job; 662 - 663 - /* For non-scheduler bad job, i.e. failed ib test, we need to signal 664 - * it right here or we won't be able to track them in fence_drv 665 - * and they will remain unsignaled during sa_bo free. 666 - */ 667 - job = container_of(old, struct amdgpu_job, hw_fence.base); 668 - if (!job->base.s_fence && !dma_fence_is_signaled(old)) 669 - dma_fence_signal(old); 670 - RCU_INIT_POINTER(*ptr, NULL); 671 - dma_fence_put(old); 672 - } 673 - } 674 - } 675 - 676 - /** 677 673 * amdgpu_fence_driver_set_error - set error code on fences 678 674 * @ring: the ring which contains the fences 679 675 * @error: the error code to set ··· 699 755 /** 700 756 * amdgpu_fence_driver_guilty_force_completion - force signal of specified sequence 701 757 * 702 - * @fence: fence of the ring to signal 758 + * @af: fence of the ring to signal 703 759 * 704 760 */ 705 761 void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af) ··· 736 792 } while (last_seq != seq); 737 793 spin_unlock_irqrestore(&ring->fence_drv.lock, flags); 738 794 /* signal the guilty fence */ 739 - amdgpu_fence_write(ring, af->seq); 795 + amdgpu_fence_write(ring, (u32)af->base.seqno); 740 796 amdgpu_fence_process(ring); 741 797 } 742 798 743 - void amdgpu_fence_save_wptr(struct dma_fence *fence) 799 + void amdgpu_fence_save_wptr(struct amdgpu_fence *af) 744 800 { 745 - struct amdgpu_fence *am_fence = container_of(fence, struct amdgpu_fence, base); 746 - 747 - am_fence->wptr = am_fence->ring->wptr; 801 + af->wptr = af->ring->wptr; 748 802 } 749 803 750 804 static void amdgpu_ring_backup_unprocessed_command(struct amdgpu_ring *ring, ··· 808 866 return (const char *)to_amdgpu_fence(f)->ring->name; 809 867 } 810 868 811 - static const char *amdgpu_job_fence_get_timeline_name(struct dma_fence *f) 812 - { 813 - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base); 814 - 815 - return (const char *)to_amdgpu_ring(job->base.sched)->name; 816 - } 817 - 818 869 /** 819 870 * amdgpu_fence_enable_signaling - enable signalling on fence 820 871 * @f: fence ··· 820 885 { 821 886 if (!timer_pending(&to_amdgpu_fence(f)->ring->fence_drv.fallback_timer)) 822 887 amdgpu_fence_schedule_fallback(to_amdgpu_fence(f)->ring); 823 - 824 - return true; 825 - } 826 - 827 - /** 828 - * amdgpu_job_fence_enable_signaling - enable signalling on job fence 829 - * @f: fence 830 - * 831 - * This is the simliar function with amdgpu_fence_enable_signaling above, it 832 - * only handles the job embedded fence. 833 - */ 834 - static bool amdgpu_job_fence_enable_signaling(struct dma_fence *f) 835 - { 836 - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base); 837 - 838 - if (!timer_pending(&to_amdgpu_ring(job->base.sched)->fence_drv.fallback_timer)) 839 - amdgpu_fence_schedule_fallback(to_amdgpu_ring(job->base.sched)); 840 888 841 889 return true; 842 890 } ··· 840 922 } 841 923 842 924 /** 843 - * amdgpu_job_fence_free - free up the job with embedded fence 844 - * 845 - * @rcu: RCU callback head 846 - * 847 - * Free up the job with embedded fence after the RCU grace period. 848 - */ 849 - static void amdgpu_job_fence_free(struct rcu_head *rcu) 850 - { 851 - struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); 852 - 853 - /* free job if fence has a parent job */ 854 - kfree(container_of(f, struct amdgpu_job, hw_fence.base)); 855 - } 856 - 857 - /** 858 925 * amdgpu_fence_release - callback that fence can be freed 859 926 * 860 927 * @f: fence ··· 852 949 call_rcu(&f->rcu, amdgpu_fence_free); 853 950 } 854 951 855 - /** 856 - * amdgpu_job_fence_release - callback that job embedded fence can be freed 857 - * 858 - * @f: fence 859 - * 860 - * This is the simliar function with amdgpu_fence_release above, it 861 - * only handles the job embedded fence. 862 - */ 863 - static void amdgpu_job_fence_release(struct dma_fence *f) 864 - { 865 - call_rcu(&f->rcu, amdgpu_job_fence_free); 866 - } 867 - 868 952 static const struct dma_fence_ops amdgpu_fence_ops = { 869 953 .get_driver_name = amdgpu_fence_get_driver_name, 870 954 .get_timeline_name = amdgpu_fence_get_timeline_name, 871 955 .enable_signaling = amdgpu_fence_enable_signaling, 872 956 .release = amdgpu_fence_release, 873 - }; 874 - 875 - static const struct dma_fence_ops amdgpu_job_fence_ops = { 876 - .get_driver_name = amdgpu_fence_get_driver_name, 877 - .get_timeline_name = amdgpu_job_fence_get_timeline_name, 878 - .enable_signaling = amdgpu_job_fence_enable_signaling, 879 - .release = amdgpu_job_fence_release, 880 957 }; 881 958 882 959 /*
+11 -6
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
··· 149 149 if (job) { 150 150 vm = job->vm; 151 151 fence_ctx = job->base.s_fence ? 152 - job->base.s_fence->scheduled.context : 0; 152 + job->base.s_fence->finished.context : 0; 153 153 shadow_va = job->shadow_va; 154 154 csa_va = job->csa_va; 155 155 gds_va = job->gds_va; 156 156 init_shadow = job->init_shadow; 157 - af = &job->hw_fence; 157 + af = job->hw_fence; 158 158 /* Save the context of the job for reset handling. 159 159 * The driver needs this so it can skip the ring 160 160 * contents for guilty contexts. 161 161 */ 162 - af->context = job->base.s_fence ? job->base.s_fence->finished.context : 0; 162 + af->context = fence_ctx; 163 + /* the vm fence is also part of the job's context */ 164 + job->hw_vm_fence->context = fence_ctx; 163 165 } else { 164 166 vm = NULL; 165 167 fence_ctx = 0; ··· 169 167 csa_va = 0; 170 168 gds_va = 0; 171 169 init_shadow = false; 172 - af = NULL; 170 + af = kzalloc(sizeof(*af), GFP_ATOMIC); 171 + if (!af) 172 + return -ENOMEM; 173 173 } 174 174 175 175 if (!ring->sched.ready) { ··· 293 289 amdgpu_ring_init_cond_exec(ring, ring->cond_exe_gpu_addr); 294 290 } 295 291 296 - r = amdgpu_fence_emit(ring, f, af, fence_flags); 292 + r = amdgpu_fence_emit(ring, af, fence_flags); 297 293 if (r) { 298 294 dev_err(adev->dev, "failed to emit fence (%d)\n", r); 299 295 if (job && job->vmid) ··· 301 297 amdgpu_ring_undo(ring); 302 298 return r; 303 299 } 300 + *f = &af->base; 304 301 305 302 if (ring->funcs->insert_end) 306 303 ring->funcs->insert_end(ring); ··· 322 317 * fence so we know what rings contents to backup 323 318 * after we reset the queue. 324 319 */ 325 - amdgpu_fence_save_wptr(*f); 320 + amdgpu_fence_save_wptr(af); 326 321 327 322 amdgpu_ring_ib_end(ring); 328 323 amdgpu_ring_commit(ring);
+30 -13
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
··· 137 137 ring->funcs->reset) { 138 138 dev_err(adev->dev, "Starting %s ring reset\n", 139 139 s_job->sched->name); 140 - r = amdgpu_ring_reset(ring, job->vmid, &job->hw_fence); 140 + r = amdgpu_ring_reset(ring, job->vmid, job->hw_fence); 141 141 if (!r) { 142 142 atomic_inc(&ring->adev->gpu_reset_counter); 143 143 dev_err(adev->dev, "Ring %s reset succeeded\n", ··· 186 186 unsigned int num_ibs, struct amdgpu_job **job, 187 187 u64 drm_client_id) 188 188 { 189 + struct amdgpu_fence *af; 190 + int r; 191 + 189 192 if (num_ibs == 0) 190 193 return -EINVAL; 191 194 192 195 *job = kzalloc(struct_size(*job, ibs, num_ibs), GFP_KERNEL); 193 196 if (!*job) 194 197 return -ENOMEM; 198 + 199 + af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL); 200 + if (!af) { 201 + r = -ENOMEM; 202 + goto err_job; 203 + } 204 + (*job)->hw_fence = af; 205 + 206 + af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL); 207 + if (!af) { 208 + r = -ENOMEM; 209 + goto err_fence; 210 + } 211 + (*job)->hw_vm_fence = af; 195 212 196 213 (*job)->vm = vm; 197 214 ··· 221 204 222 205 return drm_sched_job_init(&(*job)->base, entity, 1, owner, 223 206 drm_client_id); 207 + 208 + err_fence: 209 + kfree((*job)->hw_fence); 210 + err_job: 211 + kfree(*job); 212 + 213 + return r; 224 214 } 225 215 226 216 int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, ··· 275 251 struct dma_fence *f; 276 252 unsigned i; 277 253 278 - /* Check if any fences where initialized */ 254 + /* Check if any fences were initialized */ 279 255 if (job->base.s_fence && job->base.s_fence->finished.ops) 280 256 f = &job->base.s_fence->finished; 281 - else if (job->hw_fence.base.ops) 282 - f = &job->hw_fence.base; 257 + else if (job->hw_fence && job->hw_fence->base.ops) 258 + f = &job->hw_fence->base; 283 259 else 284 260 f = NULL; 285 261 ··· 295 271 296 272 amdgpu_sync_free(&job->explicit_sync); 297 273 298 - /* only put the hw fence if has embedded fence */ 299 - if (!job->hw_fence.base.ops) 300 - kfree(job); 301 - else 302 - dma_fence_put(&job->hw_fence.base); 274 + kfree(job); 303 275 } 304 276 305 277 void amdgpu_job_set_gang_leader(struct amdgpu_job *job, ··· 324 304 if (job->gang_submit != &job->base.s_fence->scheduled) 325 305 dma_fence_put(job->gang_submit); 326 306 327 - if (!job->hw_fence.base.ops) 328 - kfree(job); 329 - else 330 - dma_fence_put(&job->hw_fence.base); 307 + kfree(job); 331 308 } 332 309 333 310 struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job)
+2 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
··· 64 64 struct drm_sched_job base; 65 65 struct amdgpu_vm *vm; 66 66 struct amdgpu_sync explicit_sync; 67 - struct amdgpu_fence hw_fence; 67 + struct amdgpu_fence *hw_fence; 68 + struct amdgpu_fence *hw_vm_fence; 68 69 struct dma_fence *gang_submit; 69 70 uint32_t preamble_status; 70 71 uint32_t preemption_status;
+3 -5
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
··· 147 147 u64 wptr; 148 148 /* fence context for resets */ 149 149 u64 context; 150 - uint32_t seq; 151 150 }; 152 151 153 152 extern const struct drm_sched_backend_ops amdgpu_sched_ops; 154 153 155 - void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring); 156 154 void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error); 157 155 void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring); 158 156 void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af); 159 - void amdgpu_fence_save_wptr(struct dma_fence *fence); 157 + void amdgpu_fence_save_wptr(struct amdgpu_fence *af); 160 158 161 159 int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring); 162 160 int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, ··· 164 166 void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev); 165 167 int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev); 166 168 void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev); 167 - int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, 168 - struct amdgpu_fence *af, unsigned int flags); 169 + int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af, 170 + unsigned int flags); 169 171 int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s, 170 172 uint32_t timeout); 171 173 bool amdgpu_fence_process(struct amdgpu_ring *ring);
+2 -5
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
··· 779 779 bool cleaner_shader_needed = false; 780 780 bool pasid_mapping_needed = false; 781 781 struct dma_fence *fence = NULL; 782 - struct amdgpu_fence *af; 783 782 unsigned int patch; 784 783 int r; 785 784 ··· 841 842 } 842 843 843 844 if (vm_flush_needed || pasid_mapping_needed || cleaner_shader_needed) { 844 - r = amdgpu_fence_emit(ring, &fence, NULL, 0); 845 + r = amdgpu_fence_emit(ring, job->hw_vm_fence, 0); 845 846 if (r) 846 847 return r; 847 - /* this is part of the job's context */ 848 - af = container_of(fence, struct amdgpu_fence, base); 849 - af->context = job->base.s_fence ? job->base.s_fence->finished.context : 0; 848 + fence = &job->hw_vm_fence->base; 850 849 } 851 850 852 851 if (vm_flush_needed) {