drm/amdgpu: clean up and unify hw fence handling

+1 -1

drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c

··· 1902 1902 continue; 1903 1903 } 1904 1904 job = to_amdgpu_job(s_job); 1905 - if (preempted && (&job->hw_fence.base) == fence) 1905 + if (preempted && (&job->hw_fence->base) == fence) 1906 1906 /* mark the job as preempted */ 1907 1907 job->preemption_status |= AMDGPU_IB_PREEMPTED; 1908 1908 }

+1 -6

drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

··· 5809 5809 if (!amdgpu_ring_sched_ready(ring)) 5810 5810 continue; 5811 5811 5812 - /* Clear job fence from fence drv to avoid force_completion 5813 - * leave NULL and vm flush fence in fence drv 5814 - */ 5815 - amdgpu_fence_driver_clear_job_fences(ring); 5816 - 5817 5812 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ 5818 5813 amdgpu_fence_driver_force_completion(ring); 5819 5814 } ··· 6537 6542 * 6538 6543 * job->base holds a reference to parent fence 6539 6544 */ 6540 - if (job && dma_fence_is_signaled(&job->hw_fence.base)) { 6545 + if (job && dma_fence_is_signaled(&job->hw_fence->base)) { 6541 6546 job_signaled = true; 6542 6547 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); 6543 6548 goto skip_hw_reset;

+13 -130

drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c

··· 45 45 * Cast helper 46 46 */ 47 47 static const struct dma_fence_ops amdgpu_fence_ops; 48 - static const struct dma_fence_ops amdgpu_job_fence_ops; 49 48 static inline struct amdgpu_fence *to_amdgpu_fence(struct dma_fence *f) 50 49 { 51 50 struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base); 52 51 53 - if (__f->base.ops == &amdgpu_fence_ops || 54 - __f->base.ops == &amdgpu_job_fence_ops) 55 - return __f; 56 - 57 - return NULL; 52 + return __f; 58 53 } 59 54 60 55 /** ··· 93 98 * amdgpu_fence_emit - emit a fence on the requested ring 94 99 * 95 100 * @ring: ring the fence is associated with 96 - * @f: resulting fence object 97 101 * @af: amdgpu fence input 98 102 * @flags: flags to pass into the subordinate .emit_fence() call 99 103 * 100 104 * Emits a fence command on the requested ring (all asics). 101 105 * Returns 0 on success, -ENOMEM on failure. 102 106 */ 103 - int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, 104 - struct amdgpu_fence *af, unsigned int flags) 107 + int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af, 108 + unsigned int flags) 105 109 { 106 110 struct amdgpu_device *adev = ring->adev; 107 111 struct dma_fence *fence; 108 - struct amdgpu_fence *am_fence; 109 112 struct dma_fence __rcu **ptr; 110 113 uint32_t seq; 111 114 int r; 112 115 113 - if (!af) { 114 - /* create a separate hw fence */ 115 - am_fence = kzalloc(sizeof(*am_fence), GFP_KERNEL); 116 - if (!am_fence) 117 - return -ENOMEM; 118 - } else { 119 - am_fence = af; 120 - } 121 - fence = &am_fence->base; 122 - am_fence->ring = ring; 116 + fence = &af->base; 117 + af->ring = ring; 123 118 124 119 seq = ++ring->fence_drv.sync_seq; 125 - am_fence->seq = seq; 126 - if (af) { 127 - dma_fence_init(fence, &amdgpu_job_fence_ops, 128 - &ring->fence_drv.lock, 129 - adev->fence_context + ring->idx, seq); 130 - /* Against remove in amdgpu_job_{free, free_cb} */ 131 - dma_fence_get(fence); 132 - } else { 133 - dma_fence_init(fence, &amdgpu_fence_ops, 134 - &ring->fence_drv.lock, 135 - adev->fence_context + ring->idx, seq); 136 - } 120 + dma_fence_init(fence, &amdgpu_fence_ops, 121 + &ring->fence_drv.lock, 122 + adev->fence_context + ring->idx, seq); 137 123 138 124 amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, 139 125 seq, flags | AMDGPU_FENCE_FLAG_INT); 140 - amdgpu_fence_save_wptr(fence); 126 + amdgpu_fence_save_wptr(af); 141 127 pm_runtime_get_noresume(adev_to_drm(adev)->dev); 142 128 ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; 143 129 if (unlikely(rcu_dereference_protected(*ptr, 1))) { ··· 142 166 * emitting the fence would mess up the hardware ring buffer. 143 167 */ 144 168 rcu_assign_pointer(*ptr, dma_fence_get(fence)); 145 - 146 - *f = fence; 147 169 148 170 return 0; 149 171 } ··· 644 670 } 645 671 646 672 /** 647 - * amdgpu_fence_driver_clear_job_fences - clear job embedded fences of ring 648 - * 649 - * @ring: fence of the ring to be cleared 650 - * 651 - */ 652 - void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring) 653 - { 654 - int i; 655 - struct dma_fence *old, **ptr; 656 - 657 - for (i = 0; i <= ring->fence_drv.num_fences_mask; i++) { 658 - ptr = &ring->fence_drv.fences[i]; 659 - old = rcu_dereference_protected(*ptr, 1); 660 - if (old && old->ops == &amdgpu_job_fence_ops) { 661 - struct amdgpu_job *job; 662 - 663 - /* For non-scheduler bad job, i.e. failed ib test, we need to signal 664 - * it right here or we won't be able to track them in fence_drv 665 - * and they will remain unsignaled during sa_bo free. 666 - */ 667 - job = container_of(old, struct amdgpu_job, hw_fence.base); 668 - if (!job->base.s_fence && !dma_fence_is_signaled(old)) 669 - dma_fence_signal(old); 670 - RCU_INIT_POINTER(*ptr, NULL); 671 - dma_fence_put(old); 672 - } 673 - } 674 - } 675 - 676 - /** 677 673 * amdgpu_fence_driver_set_error - set error code on fences 678 674 * @ring: the ring which contains the fences 679 675 * @error: the error code to set ··· 699 755 /** 700 756 * amdgpu_fence_driver_guilty_force_completion - force signal of specified sequence 701 757 * 702 - * @fence: fence of the ring to signal 758 + * @af: fence of the ring to signal 703 759 * 704 760 */ 705 761 void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af) ··· 736 792 } while (last_seq != seq); 737 793 spin_unlock_irqrestore(&ring->fence_drv.lock, flags); 738 794 /* signal the guilty fence */ 739 - amdgpu_fence_write(ring, af->seq); 795 + amdgpu_fence_write(ring, (u32)af->base.seqno); 740 796 amdgpu_fence_process(ring); 741 797 } 742 798 743 - void amdgpu_fence_save_wptr(struct dma_fence *fence) 799 + void amdgpu_fence_save_wptr(struct amdgpu_fence *af) 744 800 { 745 - struct amdgpu_fence *am_fence = container_of(fence, struct amdgpu_fence, base); 746 - 747 - am_fence->wptr = am_fence->ring->wptr; 801 + af->wptr = af->ring->wptr; 748 802 } 749 803 750 804 static void amdgpu_ring_backup_unprocessed_command(struct amdgpu_ring *ring, ··· 808 866 return (const char *)to_amdgpu_fence(f)->ring->name; 809 867 } 810 868 811 - static const char *amdgpu_job_fence_get_timeline_name(struct dma_fence *f) 812 - { 813 - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base); 814 - 815 - return (const char *)to_amdgpu_ring(job->base.sched)->name; 816 - } 817 - 818 869 /** 819 870 * amdgpu_fence_enable_signaling - enable signalling on fence 820 871 * @f: fence ··· 820 885 { 821 886 if (!timer_pending(&to_amdgpu_fence(f)->ring->fence_drv.fallback_timer)) 822 887 amdgpu_fence_schedule_fallback(to_amdgpu_fence(f)->ring); 823 - 824 - return true; 825 - } 826 - 827 - /** 828 - * amdgpu_job_fence_enable_signaling - enable signalling on job fence 829 - * @f: fence 830 - * 831 - * This is the simliar function with amdgpu_fence_enable_signaling above, it 832 - * only handles the job embedded fence. 833 - */ 834 - static bool amdgpu_job_fence_enable_signaling(struct dma_fence *f) 835 - { 836 - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base); 837 - 838 - if (!timer_pending(&to_amdgpu_ring(job->base.sched)->fence_drv.fallback_timer)) 839 - amdgpu_fence_schedule_fallback(to_amdgpu_ring(job->base.sched)); 840 888 841 889 return true; 842 890 } ··· 840 922 } 841 923 842 924 /** 843 - * amdgpu_job_fence_free - free up the job with embedded fence 844 - * 845 - * @rcu: RCU callback head 846 - * 847 - * Free up the job with embedded fence after the RCU grace period. 848 - */ 849 - static void amdgpu_job_fence_free(struct rcu_head *rcu) 850 - { 851 - struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); 852 - 853 - /* free job if fence has a parent job */ 854 - kfree(container_of(f, struct amdgpu_job, hw_fence.base)); 855 - } 856 - 857 - /** 858 925 * amdgpu_fence_release - callback that fence can be freed 859 926 * 860 927 * @f: fence ··· 852 949 call_rcu(&f->rcu, amdgpu_fence_free); 853 950 } 854 951 855 - /** 856 - * amdgpu_job_fence_release - callback that job embedded fence can be freed 857 - * 858 - * @f: fence 859 - * 860 - * This is the simliar function with amdgpu_fence_release above, it 861 - * only handles the job embedded fence. 862 - */ 863 - static void amdgpu_job_fence_release(struct dma_fence *f) 864 - { 865 - call_rcu(&f->rcu, amdgpu_job_fence_free); 866 - } 867 - 868 952 static const struct dma_fence_ops amdgpu_fence_ops = { 869 953 .get_driver_name = amdgpu_fence_get_driver_name, 870 954 .get_timeline_name = amdgpu_fence_get_timeline_name, 871 955 .enable_signaling = amdgpu_fence_enable_signaling, 872 956 .release = amdgpu_fence_release, 873 - }; 874 - 875 - static const struct dma_fence_ops amdgpu_job_fence_ops = { 876 - .get_driver_name = amdgpu_fence_get_driver_name, 877 - .get_timeline_name = amdgpu_job_fence_get_timeline_name, 878 - .enable_signaling = amdgpu_job_fence_enable_signaling, 879 - .release = amdgpu_job_fence_release, 880 957 }; 881 958 882 959 /*

+11 -6

drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c

··· 149 149 if (job) { 150 150 vm = job->vm; 151 151 fence_ctx = job->base.s_fence ? 152 - job->base.s_fence->scheduled.context : 0; 152 + job->base.s_fence->finished.context : 0; 153 153 shadow_va = job->shadow_va; 154 154 csa_va = job->csa_va; 155 155 gds_va = job->gds_va; 156 156 init_shadow = job->init_shadow; 157 - af = &job->hw_fence; 157 + af = job->hw_fence; 158 158 /* Save the context of the job for reset handling. 159 159 * The driver needs this so it can skip the ring 160 160 * contents for guilty contexts. 161 161 */ 162 - af->context = job->base.s_fence ? job->base.s_fence->finished.context : 0; 162 + af->context = fence_ctx; 163 + /* the vm fence is also part of the job's context */ 164 + job->hw_vm_fence->context = fence_ctx; 163 165 } else { 164 166 vm = NULL; 165 167 fence_ctx = 0; ··· 169 167 csa_va = 0; 170 168 gds_va = 0; 171 169 init_shadow = false; 172 - af = NULL; 170 + af = kzalloc(sizeof(*af), GFP_ATOMIC); 171 + if (!af) 172 + return -ENOMEM; 173 173 } 174 174 175 175 if (!ring->sched.ready) { ··· 293 289 amdgpu_ring_init_cond_exec(ring, ring->cond_exe_gpu_addr); 294 290 } 295 291 296 - r = amdgpu_fence_emit(ring, f, af, fence_flags); 292 + r = amdgpu_fence_emit(ring, af, fence_flags); 297 293 if (r) { 298 294 dev_err(adev->dev, "failed to emit fence (%d)\n", r); 299 295 if (job && job->vmid) ··· 301 297 amdgpu_ring_undo(ring); 302 298 return r; 303 299 } 300 + *f = &af->base; 304 301 305 302 if (ring->funcs->insert_end) 306 303 ring->funcs->insert_end(ring); ··· 322 317 * fence so we know what rings contents to backup 323 318 * after we reset the queue. 324 319 */ 325 - amdgpu_fence_save_wptr(*f); 320 + amdgpu_fence_save_wptr(af); 326 321 327 322 amdgpu_ring_ib_end(ring); 328 323 amdgpu_ring_commit(ring);

+30 -13

drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

··· 137 137 ring->funcs->reset) { 138 138 dev_err(adev->dev, "Starting %s ring reset\n", 139 139 s_job->sched->name); 140 - r = amdgpu_ring_reset(ring, job->vmid, &job->hw_fence); 140 + r = amdgpu_ring_reset(ring, job->vmid, job->hw_fence); 141 141 if (!r) { 142 142 atomic_inc(&ring->adev->gpu_reset_counter); 143 143 dev_err(adev->dev, "Ring %s reset succeeded\n", ··· 186 186 unsigned int num_ibs, struct amdgpu_job **job, 187 187 u64 drm_client_id) 188 188 { 189 + struct amdgpu_fence *af; 190 + int r; 191 + 189 192 if (num_ibs == 0) 190 193 return -EINVAL; 191 194 192 195 *job = kzalloc(struct_size(*job, ibs, num_ibs), GFP_KERNEL); 193 196 if (!*job) 194 197 return -ENOMEM; 198 + 199 + af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL); 200 + if (!af) { 201 + r = -ENOMEM; 202 + goto err_job; 203 + } 204 + (*job)->hw_fence = af; 205 + 206 + af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL); 207 + if (!af) { 208 + r = -ENOMEM; 209 + goto err_fence; 210 + } 211 + (*job)->hw_vm_fence = af; 195 212 196 213 (*job)->vm = vm; 197 214 ··· 221 204 222 205 return drm_sched_job_init(&(*job)->base, entity, 1, owner, 223 206 drm_client_id); 207 + 208 + err_fence: 209 + kfree((*job)->hw_fence); 210 + err_job: 211 + kfree(*job); 212 + 213 + return r; 224 214 } 225 215 226 216 int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, ··· 275 251 struct dma_fence *f; 276 252 unsigned i; 277 253 278 - /* Check if any fences where initialized */ 254 + /* Check if any fences were initialized */ 279 255 if (job->base.s_fence && job->base.s_fence->finished.ops) 280 256 f = &job->base.s_fence->finished; 281 - else if (job->hw_fence.base.ops) 282 - f = &job->hw_fence.base; 257 + else if (job->hw_fence && job->hw_fence->base.ops) 258 + f = &job->hw_fence->base; 283 259 else 284 260 f = NULL; 285 261 ··· 295 271 296 272 amdgpu_sync_free(&job->explicit_sync); 297 273 298 - /* only put the hw fence if has embedded fence */ 299 - if (!job->hw_fence.base.ops) 300 - kfree(job); 301 - else 302 - dma_fence_put(&job->hw_fence.base); 274 + kfree(job); 303 275 } 304 276 305 277 void amdgpu_job_set_gang_leader(struct amdgpu_job *job, ··· 324 304 if (job->gang_submit != &job->base.s_fence->scheduled) 325 305 dma_fence_put(job->gang_submit); 326 306 327 - if (!job->hw_fence.base.ops) 328 - kfree(job); 329 - else 330 - dma_fence_put(&job->hw_fence.base); 307 + kfree(job); 331 308 } 332 309 333 310 struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job)

+2 -1

drivers/gpu/drm/amd/amdgpu/amdgpu_job.h

··· 64 64 struct drm_sched_job base; 65 65 struct amdgpu_vm *vm; 66 66 struct amdgpu_sync explicit_sync; 67 - struct amdgpu_fence hw_fence; 67 + struct amdgpu_fence *hw_fence; 68 + struct amdgpu_fence *hw_vm_fence; 68 69 struct dma_fence *gang_submit; 69 70 uint32_t preamble_status; 70 71 uint32_t preemption_status;

+3 -5

drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h

··· 147 147 u64 wptr; 148 148 /* fence context for resets */ 149 149 u64 context; 150 - uint32_t seq; 151 150 }; 152 151 153 152 extern const struct drm_sched_backend_ops amdgpu_sched_ops; 154 153 155 - void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring); 156 154 void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error); 157 155 void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring); 158 156 void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af); 159 - void amdgpu_fence_save_wptr(struct dma_fence *fence); 157 + void amdgpu_fence_save_wptr(struct amdgpu_fence *af); 160 158 161 159 int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring); 162 160 int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, ··· 164 166 void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev); 165 167 int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev); 166 168 void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev); 167 - int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, 168 - struct amdgpu_fence *af, unsigned int flags); 169 + int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af, 170 + unsigned int flags); 169 171 int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s, 170 172 uint32_t timeout); 171 173 bool amdgpu_fence_process(struct amdgpu_ring *ring);

+2 -5

drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

··· 779 779 bool cleaner_shader_needed = false; 780 780 bool pasid_mapping_needed = false; 781 781 struct dma_fence *fence = NULL; 782 - struct amdgpu_fence *af; 783 782 unsigned int patch; 784 783 int r; 785 784 ··· 841 842 } 842 843 843 844 if (vm_flush_needed || pasid_mapping_needed || cleaner_shader_needed) { 844 - r = amdgpu_fence_emit(ring, &fence, NULL, 0); 845 + r = amdgpu_fence_emit(ring, job->hw_vm_fence, 0); 845 846 if (r) 846 847 return r; 847 - /* this is part of the job's context */ 848 - af = container_of(fence, struct amdgpu_fence, base); 849 - af->context = job->base.s_fence ? job->base.s_fence->finished.context : 0; 848 + fence = &job->hw_vm_fence->base; 850 849 } 851 850 852 851 if (vm_flush_needed) {