Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'drm-next-4.3' of git://people.freedesktop.org/~agd5f/linux into drm-next

- DP fixes for radeon and amdgpu
- IH ring fix for tonga and fiji
- Lots of GPU scheduler fixes
- Misc additional fixes

* 'drm-next-4.3' of git://people.freedesktop.org/~agd5f/linux: (42 commits)
drm/amdgpu: fix wait queue handling in the scheduler
drm/amdgpu: remove extra parameters from scheduler callbacks
drm/amdgpu: wake up scheduler only when neccessary
drm/amdgpu: remove entity idle timeout v2
drm/amdgpu: fix postclose order
drm/amdgpu: use IB for copy buffer of eviction
drm/amdgpu: adjust the judgement of removing fence callback
drm/amdgpu: fix no sync_wait in copy_buffer
drm/amdgpu: fix last_vm_update fence is not effetive for sched fence
drm/amdgpu: add priv data to sched
drm/amdgpu: add owner for sched fence
drm/amdgpu: remove entity reference from sched fence
drm/amdgpu: fix and cleanup amd_sched_entity_push_job
drm/amdgpu: remove amdgpu_bo_list_clone
drm/amdgpu: remove the context from amdgpu_job
drm/amdgpu: remove unused parameters to amd_sched_create
drm/amdgpu: remove sched_lock
drm/amdgpu: remove prepare_job callback
drm/amdgpu: cleanup a scheduler function name
drm/amdgpu: reorder scheduler functions
...

+683 -739
+30 -24
drivers/gpu/drm/amd/amdgpu/amdgpu.h
··· 183 183 struct amdgpu_ring; 184 184 struct amdgpu_semaphore; 185 185 struct amdgpu_cs_parser; 186 + struct amdgpu_job; 186 187 struct amdgpu_irq_src; 187 188 struct amdgpu_fpriv; 188 189 ··· 247 246 unsigned copy_num_dw; 248 247 249 248 /* used for buffer migration */ 250 - void (*emit_copy_buffer)(struct amdgpu_ring *ring, 249 + void (*emit_copy_buffer)(struct amdgpu_ib *ib, 251 250 /* src addr in bytes */ 252 251 uint64_t src_offset, 253 252 /* dst addr in bytes */ ··· 440 439 int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); 441 440 unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring); 442 441 443 - signed long amdgpu_fence_wait_any(struct amdgpu_device *adev, 444 - struct amdgpu_fence **fences, 445 - bool intr, long t); 442 + signed long amdgpu_fence_wait_multiple(struct amdgpu_device *adev, 443 + struct fence **array, 444 + uint32_t count, 445 + bool wait_all, 446 + bool intr, 447 + signed long t); 446 448 struct amdgpu_fence *amdgpu_fence_ref(struct amdgpu_fence *fence); 447 449 void amdgpu_fence_unref(struct amdgpu_fence **fence); 448 450 ··· 518 514 uint64_t dst_offset, 519 515 uint32_t byte_count, 520 516 struct reservation_object *resv, 521 - struct amdgpu_fence **fence); 517 + struct fence **fence); 522 518 int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma); 523 519 524 520 struct amdgpu_bo_list_entry { ··· 654 650 struct amdgpu_sa_manager *manager; 655 651 unsigned soffset; 656 652 unsigned eoffset; 657 - struct amdgpu_fence *fence; 653 + struct fence *fence; 658 654 }; 659 655 660 656 /* ··· 696 692 struct amdgpu_semaphore *semaphore); 697 693 void amdgpu_semaphore_free(struct amdgpu_device *adev, 698 694 struct amdgpu_semaphore **semaphore, 699 - struct amdgpu_fence *fence); 695 + struct fence *fence); 700 696 701 697 /* 702 698 * Synchronization ··· 704 700 struct amdgpu_sync { 705 701 struct amdgpu_semaphore *semaphores[AMDGPU_NUM_SYNCS]; 706 702 struct amdgpu_fence *sync_to[AMDGPU_MAX_RINGS]; 707 - struct amdgpu_fence *last_vm_update; 703 + DECLARE_HASHTABLE(fences, 4); 704 + struct fence *last_vm_update; 708 705 }; 709 706 710 707 void amdgpu_sync_create(struct amdgpu_sync *sync); ··· 717 712 void *owner); 718 713 int amdgpu_sync_rings(struct amdgpu_sync *sync, 719 714 struct amdgpu_ring *ring); 715 + int amdgpu_sync_wait(struct amdgpu_sync *sync); 720 716 void amdgpu_sync_free(struct amdgpu_device *adev, struct amdgpu_sync *sync, 721 - struct amdgpu_fence *fence); 717 + struct fence *fence); 722 718 723 719 /* 724 720 * GART structures, functions & helpers ··· 877 871 struct amdgpu_ring *ring, 878 872 struct amdgpu_ib *ibs, 879 873 unsigned num_ibs, 880 - int (*free_job)(struct amdgpu_cs_parser *), 874 + int (*free_job)(struct amdgpu_job *), 881 875 void *owner, 882 876 struct fence **fence); 883 877 ··· 963 957 unsigned id; 964 958 uint64_t pd_gpu_addr; 965 959 /* last flushed PD/PT update */ 966 - struct amdgpu_fence *flushed_updates; 960 + struct fence *flushed_updates; 967 961 /* last use of vmid */ 968 962 struct amdgpu_fence *last_id_use; 969 963 }; ··· 1048 1042 int amdgpu_ctx_put(struct amdgpu_ctx *ctx); 1049 1043 1050 1044 uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, 1051 - struct fence *fence, uint64_t queued_seq); 1045 + struct fence *fence); 1052 1046 struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, 1053 1047 struct amdgpu_ring *ring, uint64_t seq); 1054 1048 ··· 1083 1077 struct amdgpu_bo_list_entry *array; 1084 1078 }; 1085 1079 1086 - struct amdgpu_bo_list * 1087 - amdgpu_bo_list_clone(struct amdgpu_bo_list *list); 1088 1080 struct amdgpu_bo_list * 1089 1081 amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id); 1090 1082 void amdgpu_bo_list_put(struct amdgpu_bo_list *list); ··· 1259 1255 1260 1256 /* user fence */ 1261 1257 struct amdgpu_user_fence uf; 1258 + }; 1262 1259 1263 - struct amdgpu_ring *ring; 1264 - struct mutex job_lock; 1265 - struct work_struct job_work; 1266 - int (*prepare_job)(struct amdgpu_cs_parser *sched_job); 1267 - int (*run_job)(struct amdgpu_cs_parser *sched_job); 1268 - int (*free_job)(struct amdgpu_cs_parser *sched_job); 1269 - struct amd_sched_fence *s_fence; 1260 + struct amdgpu_job { 1261 + struct amd_sched_job base; 1262 + struct amdgpu_device *adev; 1263 + struct amdgpu_ib *ibs; 1264 + uint32_t num_ibs; 1265 + struct mutex job_lock; 1266 + struct amdgpu_user_fence uf; 1267 + int (*free_job)(struct amdgpu_job *sched_job); 1270 1268 }; 1271 1269 1272 1270 static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, uint32_t ib_idx, int idx) ··· 2247 2241 #define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r)) 2248 2242 #define amdgpu_display_stop_mc_access(adev, s) (adev)->mode_info.funcs->stop_mc_access((adev), (s)) 2249 2243 #define amdgpu_display_resume_mc_access(adev, s) (adev)->mode_info.funcs->resume_mc_access((adev), (s)) 2250 - #define amdgpu_emit_copy_buffer(adev, r, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((r), (s), (d), (b)) 2244 + #define amdgpu_emit_copy_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b)) 2251 2245 #define amdgpu_emit_fill_buffer(adev, r, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((r), (s), (d), (b)) 2252 2246 #define amdgpu_dpm_get_temperature(adev) (adev)->pm.funcs->get_temperature((adev)) 2253 2247 #define amdgpu_dpm_pre_set_power_state(adev) (adev)->pm.funcs->pre_set_power_state((adev)) ··· 2349 2343 struct amdgpu_sync *sync); 2350 2344 void amdgpu_vm_flush(struct amdgpu_ring *ring, 2351 2345 struct amdgpu_vm *vm, 2352 - struct amdgpu_fence *updates); 2346 + struct fence *updates); 2353 2347 void amdgpu_vm_fence(struct amdgpu_device *adev, 2354 2348 struct amdgpu_vm *vm, 2355 2349 struct amdgpu_fence *fence); ··· 2379 2373 uint64_t addr); 2380 2374 void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, 2381 2375 struct amdgpu_bo_va *bo_va); 2382 - 2376 + int amdgpu_vm_free_job(struct amdgpu_job *job); 2383 2377 /* 2384 2378 * functions used by amdgpu_encoder.c 2385 2379 */
+4 -4
drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
··· 33 33 { 34 34 unsigned long start_jiffies; 35 35 unsigned long end_jiffies; 36 - struct amdgpu_fence *fence = NULL; 36 + struct fence *fence = NULL; 37 37 int i, r; 38 38 39 39 start_jiffies = jiffies; ··· 42 42 r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence); 43 43 if (r) 44 44 goto exit_do_move; 45 - r = fence_wait(&fence->base, false); 45 + r = fence_wait(fence, false); 46 46 if (r) 47 47 goto exit_do_move; 48 - amdgpu_fence_unref(&fence); 48 + fence_put(fence); 49 49 } 50 50 end_jiffies = jiffies; 51 51 r = jiffies_to_msecs(end_jiffies - start_jiffies); 52 52 53 53 exit_do_move: 54 54 if (fence) 55 - amdgpu_fence_unref(&fence); 55 + fence_put(fence); 56 56 return r; 57 57 } 58 58
-33
drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
··· 62 62 return 0; 63 63 } 64 64 65 - struct amdgpu_bo_list * 66 - amdgpu_bo_list_clone(struct amdgpu_bo_list *list) 67 - { 68 - struct amdgpu_bo_list *result; 69 - unsigned i; 70 - 71 - result = kmalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL); 72 - if (!result) 73 - return NULL; 74 - 75 - result->array = drm_calloc_large(list->num_entries, 76 - sizeof(struct amdgpu_bo_list_entry)); 77 - if (!result->array) { 78 - kfree(result); 79 - return NULL; 80 - } 81 - 82 - mutex_init(&result->lock); 83 - result->gds_obj = list->gds_obj; 84 - result->gws_obj = list->gws_obj; 85 - result->oa_obj = list->oa_obj; 86 - result->has_userptr = list->has_userptr; 87 - result->num_entries = list->num_entries; 88 - 89 - memcpy(result->array, list->array, list->num_entries * 90 - sizeof(struct amdgpu_bo_list_entry)); 91 - 92 - for (i = 0; i < result->num_entries; ++i) 93 - amdgpu_bo_ref(result->array[i].robj); 94 - 95 - return result; 96 - } 97 - 98 65 static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id) 99 66 { 100 67 struct amdgpu_bo_list *list;
+5
drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
··· 75 75 if (!amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) { 76 76 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); 77 77 } else if (amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) { 78 + /* Don't try to start link training before we 79 + * have the dpcd */ 80 + if (!amdgpu_atombios_dp_get_dpcd(amdgpu_connector)) 81 + return; 82 + 78 83 /* set it to OFF so that drm_helper_connector_dpms() 79 84 * won't return immediately since the current state 80 85 * is ON at this point.
+93 -147
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
··· 126 126 return 0; 127 127 } 128 128 129 - static void amdgpu_job_work_func(struct work_struct *work) 130 - { 131 - struct amdgpu_cs_parser *sched_job = 132 - container_of(work, struct amdgpu_cs_parser, 133 - job_work); 134 - mutex_lock(&sched_job->job_lock); 135 - if (sched_job->free_job) 136 - sched_job->free_job(sched_job); 137 - mutex_unlock(&sched_job->job_lock); 138 - /* after processing job, free memory */ 139 - fence_put(&sched_job->s_fence->base); 140 - kfree(sched_job); 141 - } 142 129 struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev, 143 130 struct drm_file *filp, 144 131 struct amdgpu_ctx *ctx, ··· 144 157 parser->ctx = ctx; 145 158 parser->ibs = ibs; 146 159 parser->num_ibs = num_ibs; 147 - if (amdgpu_enable_scheduler) { 148 - mutex_init(&parser->job_lock); 149 - INIT_WORK(&parser->job_work, amdgpu_job_work_func); 150 - } 151 160 for (i = 0; i < num_ibs; i++) 152 161 ibs[i].ctx = ctx; 153 162 ··· 156 173 uint64_t *chunk_array_user; 157 174 uint64_t *chunk_array = NULL; 158 175 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 159 - struct amdgpu_bo_list *bo_list = NULL; 160 176 unsigned size, i; 161 177 int r = 0; 162 178 ··· 167 185 r = -EINVAL; 168 186 goto out; 169 187 } 170 - bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); 171 - if (!amdgpu_enable_scheduler) 172 - p->bo_list = bo_list; 173 - else { 174 - if (bo_list && !bo_list->has_userptr) { 175 - p->bo_list = amdgpu_bo_list_clone(bo_list); 176 - amdgpu_bo_list_put(bo_list); 177 - if (!p->bo_list) 178 - return -ENOMEM; 179 - } else if (bo_list && bo_list->has_userptr) 180 - p->bo_list = bo_list; 181 - else 182 - p->bo_list = NULL; 183 - } 188 + p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); 184 189 185 190 /* get chunks */ 186 191 INIT_LIST_HEAD(&p->validated); ··· 260 291 } 261 292 262 293 263 - p->ibs = kmalloc_array(p->num_ibs, sizeof(struct amdgpu_ib), GFP_KERNEL); 294 + p->ibs = kcalloc(p->num_ibs, sizeof(struct amdgpu_ib), GFP_KERNEL); 264 295 if (!p->ibs) 265 296 r = -ENOMEM; 266 297 ··· 467 498 unsigned i; 468 499 if (parser->ctx) 469 500 amdgpu_ctx_put(parser->ctx); 470 - if (parser->bo_list) { 471 - if (amdgpu_enable_scheduler && !parser->bo_list->has_userptr) 472 - amdgpu_bo_list_free(parser->bo_list); 473 - else 474 - amdgpu_bo_list_put(parser->bo_list); 475 - } 501 + if (parser->bo_list) 502 + amdgpu_bo_list_put(parser->bo_list); 503 + 476 504 drm_free_large(parser->vm_bos); 477 505 for (i = 0; i < parser->nchunks; i++) 478 506 drm_free_large(parser->chunks[i].kdata); 479 507 kfree(parser->chunks); 480 - if (parser->ibs) 481 - for (i = 0; i < parser->num_ibs; i++) 482 - amdgpu_ib_free(parser->adev, &parser->ibs[i]); 483 - kfree(parser->ibs); 484 - if (parser->uf.bo) 485 - drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base); 486 - 487 508 if (!amdgpu_enable_scheduler) 488 - kfree(parser); 509 + { 510 + if (parser->ibs) 511 + for (i = 0; i < parser->num_ibs; i++) 512 + amdgpu_ib_free(parser->adev, &parser->ibs[i]); 513 + kfree(parser->ibs); 514 + if (parser->uf.bo) 515 + drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base); 516 + } 517 + 518 + kfree(parser); 489 519 } 490 520 491 521 /** ··· 499 531 { 500 532 amdgpu_cs_parser_fini_early(parser, error, backoff); 501 533 amdgpu_cs_parser_fini_late(parser); 502 - } 503 - 504 - static int amdgpu_cs_parser_free_job(struct amdgpu_cs_parser *sched_job) 505 - { 506 - amdgpu_cs_parser_fini_late(sched_job); 507 - return 0; 508 534 } 509 535 510 536 static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, ··· 772 810 return 0; 773 811 } 774 812 775 - static int amdgpu_cs_parser_prepare_job(struct amdgpu_cs_parser *sched_job) 813 + static int amdgpu_cs_free_job(struct amdgpu_job *sched_job) 776 814 { 777 - int r, i; 778 - struct amdgpu_cs_parser *parser = sched_job; 779 - struct amdgpu_device *adev = sched_job->adev; 780 - bool reserved_buffers = false; 781 - 782 - r = amdgpu_cs_parser_relocs(parser); 783 - if (r) { 784 - if (r != -ERESTARTSYS) { 785 - if (r == -ENOMEM) 786 - DRM_ERROR("Not enough memory for command submission!\n"); 787 - else 788 - DRM_ERROR("Failed to process the buffer list %d!\n", r); 789 - } 790 - } 791 - 792 - if (!r) { 793 - reserved_buffers = true; 794 - r = amdgpu_cs_ib_fill(adev, parser); 795 - } 796 - if (!r) { 797 - r = amdgpu_cs_dependencies(adev, parser); 798 - if (r) 799 - DRM_ERROR("Failed in the dependencies handling %d!\n", r); 800 - } 801 - if (r) { 802 - amdgpu_cs_parser_fini(parser, r, reserved_buffers); 803 - return r; 804 - } 805 - 806 - for (i = 0; i < parser->num_ibs; i++) 807 - trace_amdgpu_cs(parser, i); 808 - 809 - r = amdgpu_cs_ib_vm_chunk(adev, parser); 810 - return r; 811 - } 812 - 813 - static struct amdgpu_ring *amdgpu_cs_parser_get_ring( 814 - struct amdgpu_device *adev, 815 - struct amdgpu_cs_parser *parser) 816 - { 817 - int i, r; 818 - 819 - struct amdgpu_cs_chunk *chunk; 820 - struct drm_amdgpu_cs_chunk_ib *chunk_ib; 821 - struct amdgpu_ring *ring; 822 - for (i = 0; i < parser->nchunks; i++) { 823 - chunk = &parser->chunks[i]; 824 - chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata; 825 - 826 - if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) 827 - continue; 828 - 829 - r = amdgpu_cs_get_ring(adev, chunk_ib->ip_type, 830 - chunk_ib->ip_instance, chunk_ib->ring, 831 - &ring); 832 - if (r) 833 - return NULL; 834 - break; 835 - } 836 - return ring; 815 + int i; 816 + if (sched_job->ibs) 817 + for (i = 0; i < sched_job->num_ibs; i++) 818 + amdgpu_ib_free(sched_job->adev, &sched_job->ibs[i]); 819 + kfree(sched_job->ibs); 820 + if (sched_job->uf.bo) 821 + drm_gem_object_unreference_unlocked(&sched_job->uf.bo->gem_base); 822 + return 0; 837 823 } 838 824 839 825 int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) ··· 789 879 struct amdgpu_device *adev = dev->dev_private; 790 880 union drm_amdgpu_cs *cs = data; 791 881 struct amdgpu_cs_parser *parser; 792 - int r; 882 + bool reserved_buffers = false; 883 + int i, r; 793 884 794 885 down_read(&adev->exclusive_lock); 795 886 if (!adev->accel_working) { ··· 810 899 return r; 811 900 } 812 901 813 - if (amdgpu_enable_scheduler && parser->num_ibs) { 814 - struct amdgpu_ring * ring = 815 - amdgpu_cs_parser_get_ring(adev, parser); 816 - r = amdgpu_cs_parser_prepare_job(parser); 817 - if (r) 818 - goto out; 819 - parser->ring = ring; 820 - parser->free_job = amdgpu_cs_parser_free_job; 821 - mutex_lock(&parser->job_lock); 822 - r = amd_sched_push_job(ring->scheduler, 823 - &parser->ctx->rings[ring->idx].entity, 824 - parser, 825 - &parser->s_fence); 826 - if (r) { 827 - mutex_unlock(&parser->job_lock); 828 - goto out; 829 - } 830 - parser->ibs[parser->num_ibs - 1].sequence = 831 - amdgpu_ctx_add_fence(parser->ctx, ring, 832 - &parser->s_fence->base, 833 - parser->s_fence->v_seq); 834 - cs->out.handle = parser->s_fence->v_seq; 835 - list_sort(NULL, &parser->validated, cmp_size_smaller_first); 836 - ttm_eu_fence_buffer_objects(&parser->ticket, 837 - &parser->validated, 838 - &parser->s_fence->base); 839 - 840 - mutex_unlock(&parser->job_lock); 841 - up_read(&adev->exclusive_lock); 842 - return 0; 902 + r = amdgpu_cs_parser_relocs(parser); 903 + if (r == -ENOMEM) 904 + DRM_ERROR("Not enough memory for command submission!\n"); 905 + else if (r && r != -ERESTARTSYS) 906 + DRM_ERROR("Failed to process the buffer list %d!\n", r); 907 + else if (!r) { 908 + reserved_buffers = true; 909 + r = amdgpu_cs_ib_fill(adev, parser); 843 910 } 844 - r = amdgpu_cs_parser_prepare_job(parser); 911 + 912 + if (!r) { 913 + r = amdgpu_cs_dependencies(adev, parser); 914 + if (r) 915 + DRM_ERROR("Failed in the dependencies handling %d!\n", r); 916 + } 917 + 845 918 if (r) 846 919 goto out; 847 920 921 + for (i = 0; i < parser->num_ibs; i++) 922 + trace_amdgpu_cs(parser, i); 923 + 924 + r = amdgpu_cs_ib_vm_chunk(adev, parser); 925 + if (r) 926 + goto out; 927 + 928 + if (amdgpu_enable_scheduler && parser->num_ibs) { 929 + struct amdgpu_job *job; 930 + struct amdgpu_ring * ring = parser->ibs->ring; 931 + job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL); 932 + if (!job) 933 + return -ENOMEM; 934 + job->base.sched = ring->scheduler; 935 + job->base.s_entity = &parser->ctx->rings[ring->idx].entity; 936 + job->adev = parser->adev; 937 + job->ibs = parser->ibs; 938 + job->num_ibs = parser->num_ibs; 939 + job->base.owner = parser->filp; 940 + mutex_init(&job->job_lock); 941 + if (job->ibs[job->num_ibs - 1].user) { 942 + memcpy(&job->uf, &parser->uf, 943 + sizeof(struct amdgpu_user_fence)); 944 + job->ibs[job->num_ibs - 1].user = &job->uf; 945 + } 946 + 947 + job->free_job = amdgpu_cs_free_job; 948 + mutex_lock(&job->job_lock); 949 + r = amd_sched_entity_push_job((struct amd_sched_job *)job); 950 + if (r) { 951 + mutex_unlock(&job->job_lock); 952 + amdgpu_cs_free_job(job); 953 + kfree(job); 954 + goto out; 955 + } 956 + cs->out.handle = 957 + amdgpu_ctx_add_fence(parser->ctx, ring, 958 + &job->base.s_fence->base); 959 + parser->ibs[parser->num_ibs - 1].sequence = cs->out.handle; 960 + 961 + list_sort(NULL, &parser->validated, cmp_size_smaller_first); 962 + ttm_eu_fence_buffer_objects(&parser->ticket, 963 + &parser->validated, 964 + &job->base.s_fence->base); 965 + 966 + mutex_unlock(&job->job_lock); 967 + amdgpu_cs_parser_fini_late(parser); 968 + up_read(&adev->exclusive_lock); 969 + return 0; 970 + } 971 + 848 972 cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence; 849 973 out: 850 - amdgpu_cs_parser_fini(parser, r, true); 974 + amdgpu_cs_parser_fini(parser, r, reserved_buffers); 851 975 up_read(&adev->exclusive_lock); 852 976 r = amdgpu_cs_handle_lockup(adev, r); 853 977 return r;
+5 -15
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
··· 229 229 } 230 230 231 231 uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, 232 - struct fence *fence, uint64_t queued_seq) 232 + struct fence *fence) 233 233 { 234 234 struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; 235 - uint64_t seq = 0; 235 + uint64_t seq = cring->sequence; 236 236 unsigned idx = 0; 237 237 struct fence *other = NULL; 238 238 239 - if (amdgpu_enable_scheduler) 240 - seq = queued_seq; 241 - else 242 - seq = cring->sequence; 243 239 idx = seq % AMDGPU_CTX_MAX_CS_PENDING; 244 240 other = cring->fences[idx]; 245 241 if (other) { ··· 249 253 250 254 spin_lock(&ctx->ring_lock); 251 255 cring->fences[idx] = fence; 252 - if (!amdgpu_enable_scheduler) 253 - cring->sequence++; 256 + cring->sequence++; 254 257 spin_unlock(&ctx->ring_lock); 255 258 256 259 fence_put(other); ··· 262 267 { 263 268 struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; 264 269 struct fence *fence; 265 - uint64_t queued_seq; 266 270 267 271 spin_lock(&ctx->ring_lock); 268 - if (amdgpu_enable_scheduler) 269 - queued_seq = amd_sched_next_queued_seq(&cring->entity); 270 - else 271 - queued_seq = cring->sequence; 272 272 273 - if (seq >= queued_seq) { 273 + if (seq >= cring->sequence) { 274 274 spin_unlock(&ctx->ring_lock); 275 275 return ERR_PTR(-EINVAL); 276 276 } 277 277 278 278 279 - if (seq + AMDGPU_CTX_MAX_CS_PENDING < queued_seq) { 279 + if (seq + AMDGPU_CTX_MAX_CS_PENDING < cring->sequence) { 280 280 spin_unlock(&ctx->ring_lock); 281 281 return NULL; 282 282 }
+2 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
··· 49 49 /* 50 50 * KMS wrapper. 51 51 * - 3.0.0 - initial driver 52 + * - 3.1.0 - allow reading more status registers (GRBM, SRBM, SDMA, CP) 52 53 */ 53 54 #define KMS_DRIVER_MAJOR 3 54 - #define KMS_DRIVER_MINOR 0 55 + #define KMS_DRIVER_MINOR 1 55 56 #define KMS_DRIVER_PATCHLEVEL 0 56 57 57 58 int amdgpu_vram_limit = 0;
+71 -28
drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
··· 626 626 ring->fence_drv.ring = ring; 627 627 628 628 if (amdgpu_enable_scheduler) { 629 - ring->scheduler = amd_sched_create((void *)ring->adev, 630 - &amdgpu_sched_ops, 631 - ring->idx, 5, 0, 632 - amdgpu_sched_hw_submission); 629 + ring->scheduler = amd_sched_create(&amdgpu_sched_ops, 630 + ring->idx, 631 + amdgpu_sched_hw_submission, 632 + (void *)ring->adev); 633 633 if (!ring->scheduler) 634 634 DRM_ERROR("Failed to create scheduler on ring %d.\n", 635 635 ring->idx); ··· 836 836 return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags); 837 837 } 838 838 839 - static inline bool amdgpu_test_signaled_any(struct amdgpu_fence **fences) 839 + static bool amdgpu_test_signaled_any(struct fence **fences, uint32_t count) 840 840 { 841 841 int idx; 842 - struct amdgpu_fence *fence; 842 + struct fence *fence; 843 843 844 - idx = 0; 845 - for (idx = 0; idx < AMDGPU_MAX_RINGS; ++idx) { 844 + for (idx = 0; idx < count; ++idx) { 846 845 fence = fences[idx]; 847 846 if (fence) { 848 - if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags)) 847 + if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags)) 849 848 return true; 850 849 } 851 850 } 852 851 return false; 852 + } 853 + 854 + static bool amdgpu_test_signaled_all(struct fence **fences, uint32_t count) 855 + { 856 + int idx; 857 + struct fence *fence; 858 + 859 + for (idx = 0; idx < count; ++idx) { 860 + fence = fences[idx]; 861 + if (fence) { 862 + if (!test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags)) 863 + return false; 864 + } 865 + } 866 + 867 + return true; 853 868 } 854 869 855 870 struct amdgpu_wait_cb { ··· 882 867 static signed long amdgpu_fence_default_wait(struct fence *f, bool intr, 883 868 signed long t) 884 869 { 885 - struct amdgpu_fence *array[AMDGPU_MAX_RINGS]; 886 870 struct amdgpu_fence *fence = to_amdgpu_fence(f); 887 871 struct amdgpu_device *adev = fence->ring->adev; 888 872 889 - memset(&array[0], 0, sizeof(array)); 890 - array[0] = fence; 891 - 892 - return amdgpu_fence_wait_any(adev, array, intr, t); 873 + return amdgpu_fence_wait_multiple(adev, &f, 1, false, intr, t); 893 874 } 894 875 895 - /* wait until any fence in array signaled */ 896 - signed long amdgpu_fence_wait_any(struct amdgpu_device *adev, 897 - struct amdgpu_fence **array, bool intr, signed long t) 876 + /** 877 + * Wait the fence array with timeout 878 + * 879 + * @adev: amdgpu device 880 + * @array: the fence array with amdgpu fence pointer 881 + * @count: the number of the fence array 882 + * @wait_all: the flag of wait all(true) or wait any(false) 883 + * @intr: when sleep, set the current task interruptable or not 884 + * @t: timeout to wait 885 + * 886 + * If wait_all is true, it will return when all fences are signaled or timeout. 887 + * If wait_all is false, it will return when any fence is signaled or timeout. 888 + */ 889 + signed long amdgpu_fence_wait_multiple(struct amdgpu_device *adev, 890 + struct fence **array, 891 + uint32_t count, 892 + bool wait_all, 893 + bool intr, 894 + signed long t) 898 895 { 899 896 long idx = 0; 900 - struct amdgpu_wait_cb cb[AMDGPU_MAX_RINGS]; 901 - struct amdgpu_fence *fence; 897 + struct amdgpu_wait_cb *cb; 898 + struct fence *fence; 902 899 903 900 BUG_ON(!array); 904 901 905 - for (idx = 0; idx < AMDGPU_MAX_RINGS; ++idx) { 902 + cb = kcalloc(count, sizeof(struct amdgpu_wait_cb), GFP_KERNEL); 903 + if (cb == NULL) { 904 + t = -ENOMEM; 905 + goto err_free_cb; 906 + } 907 + 908 + for (idx = 0; idx < count; ++idx) { 906 909 fence = array[idx]; 907 910 if (fence) { 908 911 cb[idx].task = current; 909 - if (fence_add_callback(&fence->base, 910 - &cb[idx].base, amdgpu_fence_wait_cb)) 911 - return t; /* return if fence is already signaled */ 912 + if (fence_add_callback(fence, 913 + &cb[idx].base, amdgpu_fence_wait_cb)) { 914 + /* The fence is already signaled */ 915 + if (wait_all) 916 + continue; 917 + else 918 + goto fence_rm_cb; 919 + } 912 920 } 913 921 } 914 922 ··· 945 907 * amdgpu_test_signaled_any must be called after 946 908 * set_current_state to prevent a race with wake_up_process 947 909 */ 948 - if (amdgpu_test_signaled_any(array)) 910 + if (!wait_all && amdgpu_test_signaled_any(array, count)) 911 + break; 912 + if (wait_all && amdgpu_test_signaled_all(array, count)) 949 913 break; 950 914 951 915 if (adev->needs_reset) { ··· 963 923 964 924 __set_current_state(TASK_RUNNING); 965 925 966 - idx = 0; 967 - for (idx = 0; idx < AMDGPU_MAX_RINGS; ++idx) { 926 + fence_rm_cb: 927 + for (idx = 0; idx < count; ++idx) { 968 928 fence = array[idx]; 969 - if (fence) 970 - fence_remove_callback(&fence->base, &cb[idx].base); 929 + if (fence && cb[idx].base.func) 930 + fence_remove_callback(fence, &cb[idx].base); 971 931 } 932 + 933 + err_free_cb: 934 + kfree(cb); 972 935 973 936 return t; 974 937 }
+8 -25
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
··· 73 73 74 74 if (!vm) 75 75 ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); 76 - else 77 - ib->gpu_addr = 0; 78 - 79 - } else { 80 - ib->sa_bo = NULL; 81 - ib->ptr = NULL; 82 - ib->gpu_addr = 0; 83 76 } 84 77 85 78 amdgpu_sync_create(&ib->sync); 86 79 87 80 ib->ring = ring; 88 - ib->fence = NULL; 89 - ib->user = NULL; 90 81 ib->vm = vm; 91 - ib->ctx = NULL; 92 - ib->gds_base = 0; 93 - ib->gds_size = 0; 94 - ib->gws_base = 0; 95 - ib->gws_size = 0; 96 - ib->oa_base = 0; 97 - ib->oa_size = 0; 98 - ib->flags = 0; 99 82 100 83 return 0; 101 84 } ··· 93 110 */ 94 111 void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib) 95 112 { 96 - amdgpu_sync_free(adev, &ib->sync, ib->fence); 97 - amdgpu_sa_bo_free(adev, &ib->sa_bo, ib->fence); 113 + amdgpu_sync_free(adev, &ib->sync, &ib->fence->base); 114 + amdgpu_sa_bo_free(adev, &ib->sa_bo, &ib->fence->base); 98 115 amdgpu_fence_unref(&ib->fence); 99 116 } 100 117 ··· 126 143 struct amdgpu_ring *ring; 127 144 struct amdgpu_ctx *ctx, *old_ctx; 128 145 struct amdgpu_vm *vm; 129 - uint64_t sequence; 130 146 unsigned i; 131 147 int r = 0; 132 148 ··· 140 158 dev_err(adev->dev, "couldn't schedule ib\n"); 141 159 return -EINVAL; 142 160 } 143 - 161 + r = amdgpu_sync_wait(&ibs->sync); 162 + if (r) { 163 + dev_err(adev->dev, "IB sync failed (%d).\n", r); 164 + return r; 165 + } 144 166 r = amdgpu_ring_lock(ring, (256 + AMDGPU_NUM_SYNCS * 8) * num_ibs); 145 167 if (r) { 146 168 dev_err(adev->dev, "scheduling IB failed (%d).\n", r); ··· 202 216 return r; 203 217 } 204 218 205 - sequence = amdgpu_enable_scheduler ? ib->sequence : 0; 206 - 207 219 if (!amdgpu_enable_scheduler && ib->ctx) 208 220 ib->sequence = amdgpu_ctx_add_fence(ib->ctx, ring, 209 - &ib->fence->base, 210 - sequence); 221 + &ib->fence->base); 211 222 212 223 /* wrap the last IB with fence */ 213 224 if (ib->user) {
+7 -13
drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
··· 98 98 /* add 8 bytes for the rptr/wptr shadows and 99 99 * add them to the end of the ring allocation. 100 100 */ 101 - adev->irq.ih.ring = kzalloc(adev->irq.ih.ring_size + 8, GFP_KERNEL); 101 + adev->irq.ih.ring = pci_alloc_consistent(adev->pdev, 102 + adev->irq.ih.ring_size + 8, 103 + &adev->irq.ih.rb_dma_addr); 102 104 if (adev->irq.ih.ring == NULL) 103 105 return -ENOMEM; 104 - adev->irq.ih.rb_dma_addr = pci_map_single(adev->pdev, 105 - (void *)adev->irq.ih.ring, 106 - adev->irq.ih.ring_size, 107 - PCI_DMA_BIDIRECTIONAL); 108 - if (pci_dma_mapping_error(adev->pdev, adev->irq.ih.rb_dma_addr)) { 109 - dev_err(&adev->pdev->dev, "Failed to DMA MAP the IH RB page\n"); 110 - kfree((void *)adev->irq.ih.ring); 111 - return -ENOMEM; 112 - } 106 + memset((void *)adev->irq.ih.ring, 0, adev->irq.ih.ring_size + 8); 113 107 adev->irq.ih.wptr_offs = (adev->irq.ih.ring_size / 4) + 0; 114 108 adev->irq.ih.rptr_offs = (adev->irq.ih.ring_size / 4) + 1; 115 109 } ··· 143 149 /* add 8 bytes for the rptr/wptr shadows and 144 150 * add them to the end of the ring allocation. 145 151 */ 146 - pci_unmap_single(adev->pdev, adev->irq.ih.rb_dma_addr, 147 - adev->irq.ih.ring_size + 8, PCI_DMA_BIDIRECTIONAL); 148 - kfree((void *)adev->irq.ih.ring); 152 + pci_free_consistent(adev->pdev, adev->irq.ih.ring_size + 8, 153 + (void *)adev->irq.ih.ring, 154 + adev->irq.ih.rb_dma_addr); 149 155 adev->irq.ih.ring = NULL; 150 156 } 151 157 } else {
+2 -2
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
··· 560 560 if (!fpriv) 561 561 return; 562 562 563 + amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); 564 + 563 565 amdgpu_vm_fini(adev, &fpriv->vm); 564 566 565 567 idr_for_each_entry(&fpriv->bo_list_handles, list, handle) ··· 569 567 570 568 idr_destroy(&fpriv->bo_list_handles); 571 569 mutex_destroy(&fpriv->bo_list_lock); 572 - 573 - amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); 574 570 575 571 kfree(fpriv); 576 572 file_priv->driver_priv = NULL;
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
··· 193 193 unsigned size, unsigned align); 194 194 void amdgpu_sa_bo_free(struct amdgpu_device *adev, 195 195 struct amdgpu_sa_bo **sa_bo, 196 - struct amdgpu_fence *fence); 196 + struct fence *fence); 197 197 #if defined(CONFIG_DEBUG_FS) 198 198 void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager, 199 199 struct seq_file *m);
+39 -14
drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
··· 139 139 return r; 140 140 } 141 141 142 + static uint32_t amdgpu_sa_get_ring_from_fence(struct fence *f) 143 + { 144 + struct amdgpu_fence *a_fence; 145 + struct amd_sched_fence *s_fence; 146 + 147 + s_fence = to_amd_sched_fence(f); 148 + if (s_fence) 149 + return s_fence->scheduler->ring_id; 150 + a_fence = to_amdgpu_fence(f); 151 + if (a_fence) 152 + return a_fence->ring->idx; 153 + return 0; 154 + } 155 + 142 156 static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo) 143 157 { 144 158 struct amdgpu_sa_manager *sa_manager = sa_bo->manager; ··· 161 147 } 162 148 list_del_init(&sa_bo->olist); 163 149 list_del_init(&sa_bo->flist); 164 - amdgpu_fence_unref(&sa_bo->fence); 150 + fence_put(sa_bo->fence); 165 151 kfree(sa_bo); 166 152 } 167 153 ··· 175 161 sa_bo = list_entry(sa_manager->hole->next, struct amdgpu_sa_bo, olist); 176 162 list_for_each_entry_safe_from(sa_bo, tmp, &sa_manager->olist, olist) { 177 163 if (sa_bo->fence == NULL || 178 - !fence_is_signaled(&sa_bo->fence->base)) { 164 + !fence_is_signaled(sa_bo->fence)) { 179 165 return; 180 166 } 181 167 amdgpu_sa_bo_remove_locked(sa_bo); ··· 260 246 } 261 247 262 248 static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager, 263 - struct amdgpu_fence **fences, 249 + struct fence **fences, 264 250 unsigned *tries) 265 251 { 266 252 struct amdgpu_sa_bo *best_bo = NULL; ··· 289 275 sa_bo = list_first_entry(&sa_manager->flist[i], 290 276 struct amdgpu_sa_bo, flist); 291 277 292 - if (!fence_is_signaled(&sa_bo->fence->base)) { 278 + if (!fence_is_signaled(sa_bo->fence)) { 293 279 fences[i] = sa_bo->fence; 294 280 continue; 295 281 } ··· 313 299 } 314 300 315 301 if (best_bo) { 316 - ++tries[best_bo->fence->ring->idx]; 302 + uint32_t idx = amdgpu_sa_get_ring_from_fence(best_bo->fence); 303 + ++tries[idx]; 317 304 sa_manager->hole = best_bo->olist.prev; 318 305 319 306 /* we knew that this one is signaled, ··· 330 315 struct amdgpu_sa_bo **sa_bo, 331 316 unsigned size, unsigned align) 332 317 { 333 - struct amdgpu_fence *fences[AMDGPU_MAX_RINGS]; 318 + struct fence *fences[AMDGPU_MAX_RINGS]; 334 319 unsigned tries[AMDGPU_MAX_RINGS]; 335 320 int i, r; 336 321 signed long t; ··· 367 352 } while (amdgpu_sa_bo_next_hole(sa_manager, fences, tries)); 368 353 369 354 spin_unlock(&sa_manager->wq.lock); 370 - t = amdgpu_fence_wait_any(adev, fences, false, MAX_SCHEDULE_TIMEOUT); 355 + t = amdgpu_fence_wait_multiple(adev, fences, AMDGPU_MAX_RINGS, false, false, 356 + MAX_SCHEDULE_TIMEOUT); 371 357 r = (t > 0) ? 0 : t; 372 358 spin_lock(&sa_manager->wq.lock); 373 359 /* if we have nothing to wait for block */ ··· 388 372 } 389 373 390 374 void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo, 391 - struct amdgpu_fence *fence) 375 + struct fence *fence) 392 376 { 393 377 struct amdgpu_sa_manager *sa_manager; 394 378 ··· 398 382 399 383 sa_manager = (*sa_bo)->manager; 400 384 spin_lock(&sa_manager->wq.lock); 401 - if (fence && !fence_is_signaled(&fence->base)) { 402 - (*sa_bo)->fence = amdgpu_fence_ref(fence); 403 - list_add_tail(&(*sa_bo)->flist, 404 - &sa_manager->flist[fence->ring->idx]); 385 + if (fence && !fence_is_signaled(fence)) { 386 + uint32_t idx; 387 + (*sa_bo)->fence = fence_get(fence); 388 + idx = amdgpu_sa_get_ring_from_fence(fence); 389 + list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[idx]); 405 390 } else { 406 391 amdgpu_sa_bo_remove_locked(*sa_bo); 407 392 } ··· 429 412 seq_printf(m, "[0x%010llx 0x%010llx] size %8lld", 430 413 soffset, eoffset, eoffset - soffset); 431 414 if (i->fence) { 432 - seq_printf(m, " protected by 0x%016llx on ring %d", 433 - i->fence->seq, i->fence->ring->idx); 415 + struct amdgpu_fence *a_fence = to_amdgpu_fence(i->fence); 416 + struct amd_sched_fence *s_fence = to_amd_sched_fence(i->fence); 417 + if (a_fence) 418 + seq_printf(m, " protected by 0x%016llx on ring %d", 419 + a_fence->seq, a_fence->ring->idx); 420 + if (s_fence) 421 + seq_printf(m, " protected by 0x%016x on ring %d", 422 + s_fence->base.seqno, 423 + s_fence->scheduler->ring_id); 424 + 434 425 } 435 426 seq_printf(m, "\n"); 436 427 }
+35 -59
drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
··· 27 27 #include <drm/drmP.h> 28 28 #include "amdgpu.h" 29 29 30 - static int amdgpu_sched_prepare_job(struct amd_gpu_scheduler *sched, 31 - struct amd_sched_entity *entity, 32 - struct amd_sched_job *job) 30 + static struct fence *amdgpu_sched_run_job(struct amd_sched_job *job) 33 31 { 34 - int r = 0; 35 - struct amdgpu_cs_parser *sched_job; 36 - if (!job || !job->data) { 37 - DRM_ERROR("job is null\n"); 38 - return -EINVAL; 39 - } 40 - 41 - sched_job = (struct amdgpu_cs_parser *)job->data; 42 - if (sched_job->prepare_job) { 43 - r = sched_job->prepare_job(sched_job); 44 - if (r) { 45 - DRM_ERROR("Prepare job error\n"); 46 - schedule_work(&sched_job->job_work); 47 - } 48 - } 49 - return r; 50 - } 51 - 52 - static struct fence *amdgpu_sched_run_job(struct amd_gpu_scheduler *sched, 53 - struct amd_sched_entity *entity, 54 - struct amd_sched_job *job) 55 - { 56 - int r = 0; 57 - struct amdgpu_cs_parser *sched_job; 32 + struct amdgpu_job *sched_job; 58 33 struct amdgpu_fence *fence; 34 + int r; 59 35 60 - if (!job || !job->data) { 36 + if (!job) { 61 37 DRM_ERROR("job is null\n"); 62 38 return NULL; 63 39 } 64 - sched_job = (struct amdgpu_cs_parser *)job->data; 40 + sched_job = (struct amdgpu_job *)job; 65 41 mutex_lock(&sched_job->job_lock); 66 42 r = amdgpu_ib_schedule(sched_job->adev, 67 43 sched_job->num_ibs, 68 44 sched_job->ibs, 69 - sched_job->filp); 45 + sched_job->base.owner); 70 46 if (r) 71 47 goto err; 72 48 fence = amdgpu_fence_ref(sched_job->ibs[sched_job->num_ibs - 1].fence); 73 49 74 - if (sched_job->run_job) { 75 - r = sched_job->run_job(sched_job); 76 - if (r) 77 - goto err; 78 - } 50 + if (sched_job->free_job) 51 + sched_job->free_job(sched_job); 79 52 80 53 mutex_unlock(&sched_job->job_lock); 81 54 return &fence->base; ··· 56 83 err: 57 84 DRM_ERROR("Run job error\n"); 58 85 mutex_unlock(&sched_job->job_lock); 59 - schedule_work(&sched_job->job_work); 86 + job->sched->ops->process_job(job); 60 87 return NULL; 61 88 } 62 89 63 - static void amdgpu_sched_process_job(struct amd_gpu_scheduler *sched, 64 - struct amd_sched_job *job) 90 + static void amdgpu_sched_process_job(struct amd_sched_job *job) 65 91 { 66 - struct amdgpu_cs_parser *sched_job; 92 + struct amdgpu_job *sched_job; 67 93 68 - if (!job || !job->data) { 94 + if (!job) { 69 95 DRM_ERROR("job is null\n"); 70 96 return; 71 97 } 72 - sched_job = (struct amdgpu_cs_parser *)job->data; 73 - schedule_work(&sched_job->job_work); 98 + sched_job = (struct amdgpu_job *)job; 99 + /* after processing job, free memory */ 100 + fence_put(&sched_job->base.s_fence->base); 101 + kfree(sched_job); 74 102 } 75 103 76 104 struct amd_sched_backend_ops amdgpu_sched_ops = { 77 - .prepare_job = amdgpu_sched_prepare_job, 78 105 .run_job = amdgpu_sched_run_job, 79 106 .process_job = amdgpu_sched_process_job 80 107 }; ··· 83 110 struct amdgpu_ring *ring, 84 111 struct amdgpu_ib *ibs, 85 112 unsigned num_ibs, 86 - int (*free_job)(struct amdgpu_cs_parser *), 113 + int (*free_job)(struct amdgpu_job *), 87 114 void *owner, 88 115 struct fence **f) 89 116 { 90 117 int r = 0; 91 118 if (amdgpu_enable_scheduler) { 92 - struct amdgpu_cs_parser *sched_job = 93 - amdgpu_cs_parser_create(adev, owner, &adev->kernel_ctx, 94 - ibs, num_ibs); 95 - if(!sched_job) { 119 + struct amdgpu_job *job = 120 + kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL); 121 + if (!job) 96 122 return -ENOMEM; 97 - } 98 - sched_job->free_job = free_job; 99 - mutex_lock(&sched_job->job_lock); 100 - r = amd_sched_push_job(ring->scheduler, 101 - &adev->kernel_ctx.rings[ring->idx].entity, 102 - sched_job, &sched_job->s_fence); 123 + job->base.sched = ring->scheduler; 124 + job->base.s_entity = &adev->kernel_ctx.rings[ring->idx].entity; 125 + job->adev = adev; 126 + job->ibs = ibs; 127 + job->num_ibs = num_ibs; 128 + job->base.owner = owner; 129 + mutex_init(&job->job_lock); 130 + job->free_job = free_job; 131 + mutex_lock(&job->job_lock); 132 + r = amd_sched_entity_push_job((struct amd_sched_job *)job); 103 133 if (r) { 104 - mutex_unlock(&sched_job->job_lock); 105 - kfree(sched_job); 134 + mutex_unlock(&job->job_lock); 135 + kfree(job); 106 136 return r; 107 137 } 108 - ibs[num_ibs - 1].sequence = sched_job->s_fence->v_seq; 109 - *f = fence_get(&sched_job->s_fence->base); 110 - mutex_unlock(&sched_job->job_lock); 138 + *f = fence_get(&job->base.s_fence->base); 139 + mutex_unlock(&job->job_lock); 111 140 } else { 112 141 r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner); 113 142 if (r) 114 143 return r; 115 144 *f = fence_get(&ibs[num_ibs - 1].fence->base); 116 145 } 146 + 117 147 return 0; 118 148 }
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c
··· 87 87 88 88 void amdgpu_semaphore_free(struct amdgpu_device *adev, 89 89 struct amdgpu_semaphore **semaphore, 90 - struct amdgpu_fence *fence) 90 + struct fence *fence) 91 91 { 92 92 if (semaphore == NULL || *semaphore == NULL) { 93 93 return;
+92 -11
drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
··· 32 32 #include "amdgpu.h" 33 33 #include "amdgpu_trace.h" 34 34 35 + struct amdgpu_sync_entry { 36 + struct hlist_node node; 37 + struct fence *fence; 38 + }; 39 + 35 40 /** 36 41 * amdgpu_sync_create - zero init sync object 37 42 * ··· 54 49 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) 55 50 sync->sync_to[i] = NULL; 56 51 52 + hash_init(sync->fences); 57 53 sync->last_vm_update = NULL; 54 + } 55 + 56 + static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, struct fence *f) 57 + { 58 + struct amdgpu_fence *a_fence = to_amdgpu_fence(f); 59 + struct amd_sched_fence *s_fence = to_amd_sched_fence(f); 60 + 61 + if (a_fence) 62 + return a_fence->ring->adev == adev; 63 + if (s_fence) 64 + return (struct amdgpu_device *)s_fence->scheduler->priv == adev; 65 + return false; 66 + } 67 + 68 + static bool amdgpu_sync_test_owner(struct fence *f, void *owner) 69 + { 70 + struct amdgpu_fence *a_fence = to_amdgpu_fence(f); 71 + struct amd_sched_fence *s_fence = to_amd_sched_fence(f); 72 + if (s_fence) 73 + return s_fence->owner == owner; 74 + if (a_fence) 75 + return a_fence->owner == owner; 76 + return false; 58 77 } 59 78 60 79 /** ··· 91 62 int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, 92 63 struct fence *f) 93 64 { 65 + struct amdgpu_sync_entry *e; 94 66 struct amdgpu_fence *fence; 95 67 struct amdgpu_fence *other; 68 + struct fence *tmp, *later; 96 69 97 70 if (!f) 98 71 return 0; 99 72 73 + if (amdgpu_sync_same_dev(adev, f) && 74 + amdgpu_sync_test_owner(f, AMDGPU_FENCE_OWNER_VM)) { 75 + if (sync->last_vm_update) { 76 + tmp = sync->last_vm_update; 77 + BUG_ON(f->context != tmp->context); 78 + later = (f->seqno - tmp->seqno <= INT_MAX) ? f : tmp; 79 + sync->last_vm_update = fence_get(later); 80 + fence_put(tmp); 81 + } else 82 + sync->last_vm_update = fence_get(f); 83 + } 84 + 100 85 fence = to_amdgpu_fence(f); 101 - if (!fence || fence->ring->adev != adev) 102 - return fence_wait(f, true); 86 + if (!fence || fence->ring->adev != adev) { 87 + hash_for_each_possible(sync->fences, e, node, f->context) { 88 + struct fence *new; 89 + if (unlikely(e->fence->context != f->context)) 90 + continue; 91 + new = fence_get(fence_later(e->fence, f)); 92 + if (new) { 93 + fence_put(e->fence); 94 + e->fence = new; 95 + } 96 + return 0; 97 + } 98 + 99 + e = kmalloc(sizeof(struct amdgpu_sync_entry), GFP_KERNEL); 100 + if (!e) 101 + return -ENOMEM; 102 + 103 + hash_add(sync->fences, &e->node, f->context); 104 + e->fence = fence_get(f); 105 + return 0; 106 + } 103 107 104 108 other = sync->sync_to[fence->ring->idx]; 105 109 sync->sync_to[fence->ring->idx] = amdgpu_fence_ref( 106 110 amdgpu_fence_later(fence, other)); 107 111 amdgpu_fence_unref(&other); 108 - 109 - if (fence->owner == AMDGPU_FENCE_OWNER_VM) { 110 - other = sync->last_vm_update; 111 - sync->last_vm_update = amdgpu_fence_ref( 112 - amdgpu_fence_later(fence, other)); 113 - amdgpu_fence_unref(&other); 114 - } 115 112 116 113 return 0; 117 114 } ··· 200 145 break; 201 146 } 202 147 return r; 148 + } 149 + 150 + int amdgpu_sync_wait(struct amdgpu_sync *sync) 151 + { 152 + struct amdgpu_sync_entry *e; 153 + struct hlist_node *tmp; 154 + int i, r; 155 + 156 + hash_for_each_safe(sync->fences, i, tmp, e, node) { 157 + r = fence_wait(e->fence, false); 158 + if (r) 159 + return r; 160 + 161 + hash_del(&e->node); 162 + fence_put(e->fence); 163 + kfree(e); 164 + } 165 + return 0; 203 166 } 204 167 205 168 /** ··· 307 234 */ 308 235 void amdgpu_sync_free(struct amdgpu_device *adev, 309 236 struct amdgpu_sync *sync, 310 - struct amdgpu_fence *fence) 237 + struct fence *fence) 311 238 { 239 + struct amdgpu_sync_entry *e; 240 + struct hlist_node *tmp; 312 241 unsigned i; 242 + 243 + hash_for_each_safe(sync->fences, i, tmp, e, node) { 244 + hash_del(&e->node); 245 + fence_put(e->fence); 246 + kfree(e); 247 + } 313 248 314 249 for (i = 0; i < AMDGPU_NUM_SYNCS; ++i) 315 250 amdgpu_semaphore_free(adev, &sync->semaphores[i], fence); ··· 325 244 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) 326 245 amdgpu_fence_unref(&sync->sync_to[i]); 327 246 328 - amdgpu_fence_unref(&sync->last_vm_update); 247 + fence_put(sync->last_vm_update); 329 248 }
+6 -6
drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
··· 77 77 void *gtt_map, *vram_map; 78 78 void **gtt_start, **gtt_end; 79 79 void **vram_start, **vram_end; 80 - struct amdgpu_fence *fence = NULL; 80 + struct fence *fence = NULL; 81 81 82 82 r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, 83 83 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, gtt_obj + i); ··· 116 116 goto out_lclean_unpin; 117 117 } 118 118 119 - r = fence_wait(&fence->base, false); 119 + r = fence_wait(fence, false); 120 120 if (r) { 121 121 DRM_ERROR("Failed to wait for GTT->VRAM fence %d\n", i); 122 122 goto out_lclean_unpin; 123 123 } 124 124 125 - amdgpu_fence_unref(&fence); 125 + fence_put(fence); 126 126 127 127 r = amdgpu_bo_kmap(vram_obj, &vram_map); 128 128 if (r) { ··· 161 161 goto out_lclean_unpin; 162 162 } 163 163 164 - r = fence_wait(&fence->base, false); 164 + r = fence_wait(fence, false); 165 165 if (r) { 166 166 DRM_ERROR("Failed to wait for VRAM->GTT fence %d\n", i); 167 167 goto out_lclean_unpin; 168 168 } 169 169 170 - amdgpu_fence_unref(&fence); 170 + fence_put(fence); 171 171 172 172 r = amdgpu_bo_kmap(gtt_obj[i], &gtt_map); 173 173 if (r) { ··· 214 214 amdgpu_bo_unref(&gtt_obj[i]); 215 215 } 216 216 if (fence) 217 - amdgpu_fence_unref(&fence); 217 + fence_put(fence); 218 218 break; 219 219 } 220 220
+41 -32
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
··· 228 228 struct amdgpu_device *adev; 229 229 struct amdgpu_ring *ring; 230 230 uint64_t old_start, new_start; 231 - struct amdgpu_fence *fence; 231 + struct fence *fence; 232 232 int r; 233 233 234 234 adev = amdgpu_get_adev(bo->bdev); ··· 269 269 new_mem->num_pages * PAGE_SIZE, /* bytes */ 270 270 bo->resv, &fence); 271 271 /* FIXME: handle copy error */ 272 - r = ttm_bo_move_accel_cleanup(bo, &fence->base, 272 + r = ttm_bo_move_accel_cleanup(bo, fence, 273 273 evict, no_wait_gpu, new_mem); 274 - amdgpu_fence_unref(&fence); 274 + fence_put(fence); 275 275 return r; 276 276 } 277 277 ··· 987 987 uint64_t dst_offset, 988 988 uint32_t byte_count, 989 989 struct reservation_object *resv, 990 - struct amdgpu_fence **fence) 990 + struct fence **fence) 991 991 { 992 992 struct amdgpu_device *adev = ring->adev; 993 - struct amdgpu_sync sync; 994 993 uint32_t max_bytes; 995 994 unsigned num_loops, num_dw; 995 + struct amdgpu_ib *ib; 996 996 unsigned i; 997 997 int r; 998 - 999 - /* sync other rings */ 1000 - amdgpu_sync_create(&sync); 1001 - if (resv) { 1002 - r = amdgpu_sync_resv(adev, &sync, resv, false); 1003 - if (r) { 1004 - DRM_ERROR("sync failed (%d).\n", r); 1005 - amdgpu_sync_free(adev, &sync, NULL); 1006 - return r; 1007 - } 1008 - } 1009 998 1010 999 max_bytes = adev->mman.buffer_funcs->copy_max_bytes; 1011 1000 num_loops = DIV_ROUND_UP(byte_count, max_bytes); 1012 1001 num_dw = num_loops * adev->mman.buffer_funcs->copy_num_dw; 1013 1002 1014 - /* for fence and sync */ 1015 - num_dw += 64 + AMDGPU_NUM_SYNCS * 8; 1003 + /* for IB padding */ 1004 + while (num_dw & 0x7) 1005 + num_dw++; 1016 1006 1017 - r = amdgpu_ring_lock(ring, num_dw); 1007 + ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); 1008 + if (!ib) 1009 + return -ENOMEM; 1010 + 1011 + r = amdgpu_ib_get(ring, NULL, num_dw * 4, ib); 1018 1012 if (r) { 1019 - DRM_ERROR("ring lock failed (%d).\n", r); 1020 - amdgpu_sync_free(adev, &sync, NULL); 1013 + kfree(ib); 1021 1014 return r; 1022 1015 } 1023 1016 1024 - amdgpu_sync_rings(&sync, ring); 1017 + ib->length_dw = 0; 1018 + 1019 + if (resv) { 1020 + r = amdgpu_sync_resv(adev, &ib->sync, resv, 1021 + AMDGPU_FENCE_OWNER_UNDEFINED); 1022 + if (r) { 1023 + DRM_ERROR("sync failed (%d).\n", r); 1024 + goto error_free; 1025 + } 1026 + } 1025 1027 1026 1028 for (i = 0; i < num_loops; i++) { 1027 1029 uint32_t cur_size_in_bytes = min(byte_count, max_bytes); 1028 1030 1029 - amdgpu_emit_copy_buffer(adev, ring, src_offset, dst_offset, 1031 + amdgpu_emit_copy_buffer(adev, ib, src_offset, dst_offset, 1030 1032 cur_size_in_bytes); 1031 1033 1032 1034 src_offset += cur_size_in_bytes; ··· 1036 1034 byte_count -= cur_size_in_bytes; 1037 1035 } 1038 1036 1039 - r = amdgpu_fence_emit(ring, AMDGPU_FENCE_OWNER_MOVE, fence); 1040 - if (r) { 1041 - amdgpu_ring_unlock_undo(ring); 1042 - amdgpu_sync_free(adev, &sync, NULL); 1043 - return r; 1037 + amdgpu_vm_pad_ib(adev, ib); 1038 + WARN_ON(ib->length_dw > num_dw); 1039 + r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, 1040 + &amdgpu_vm_free_job, 1041 + AMDGPU_FENCE_OWNER_MOVE, 1042 + fence); 1043 + if (r) 1044 + goto error_free; 1045 + 1046 + if (!amdgpu_enable_scheduler) { 1047 + amdgpu_ib_free(adev, ib); 1048 + kfree(ib); 1044 1049 } 1045 - 1046 - amdgpu_ring_unlock_commit(ring); 1047 - amdgpu_sync_free(adev, &sync, *fence); 1048 - 1049 1050 return 0; 1051 + error_free: 1052 + amdgpu_ib_free(adev, ib); 1053 + kfree(ib); 1054 + return r; 1050 1055 } 1051 1056 1052 1057 #if defined(CONFIG_DEBUG_FS)
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
··· 807 807 } 808 808 809 809 static int amdgpu_uvd_free_job( 810 - struct amdgpu_cs_parser *sched_job) 810 + struct amdgpu_job *sched_job) 811 811 { 812 812 amdgpu_ib_free(sched_job->adev, sched_job->ibs); 813 813 kfree(sched_job->ibs);
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
··· 340 340 } 341 341 342 342 static int amdgpu_vce_free_job( 343 - struct amdgpu_cs_parser *sched_job) 343 + struct amdgpu_job *sched_job) 344 344 { 345 345 amdgpu_ib_free(sched_job->adev, sched_job->ibs); 346 346 kfree(sched_job->ibs);
+18 -9
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
··· 200 200 */ 201 201 void amdgpu_vm_flush(struct amdgpu_ring *ring, 202 202 struct amdgpu_vm *vm, 203 - struct amdgpu_fence *updates) 203 + struct fence *updates) 204 204 { 205 205 uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); 206 206 struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx]; 207 - struct amdgpu_fence *flushed_updates = vm_id->flushed_updates; 207 + struct fence *flushed_updates = vm_id->flushed_updates; 208 + bool is_earlier = false; 209 + 210 + if (flushed_updates && updates) { 211 + BUG_ON(flushed_updates->context != updates->context); 212 + is_earlier = (updates->seqno - flushed_updates->seqno <= 213 + INT_MAX) ? true : false; 214 + } 208 215 209 216 if (pd_addr != vm_id->pd_gpu_addr || !flushed_updates || 210 - (updates && amdgpu_fence_is_earlier(flushed_updates, updates))) { 217 + is_earlier) { 211 218 212 219 trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id->id); 213 - vm_id->flushed_updates = amdgpu_fence_ref( 214 - amdgpu_fence_later(flushed_updates, updates)); 215 - amdgpu_fence_unref(&flushed_updates); 220 + if (is_earlier) { 221 + vm_id->flushed_updates = fence_get(updates); 222 + fence_put(flushed_updates); 223 + } 224 + if (!flushed_updates) 225 + vm_id->flushed_updates = fence_get(updates); 216 226 vm_id->pd_gpu_addr = pd_addr; 217 227 amdgpu_ring_emit_vm_flush(ring, vm_id->id, vm_id->pd_gpu_addr); 218 228 } ··· 316 306 } 317 307 } 318 308 319 - static int amdgpu_vm_free_job( 320 - struct amdgpu_cs_parser *sched_job) 309 + int amdgpu_vm_free_job(struct amdgpu_job *sched_job) 321 310 { 322 311 int i; 323 312 for (i = 0; i < sched_job->num_ibs; i++) ··· 1356 1347 fence_put(vm->page_directory_fence); 1357 1348 1358 1349 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 1359 - amdgpu_fence_unref(&vm->ids[i].flushed_updates); 1350 + fence_put(vm->ids[i].flushed_updates); 1360 1351 amdgpu_fence_unref(&vm->ids[i].last_id_use); 1361 1352 } 1362 1353
+9 -8
drivers/gpu/drm/amd/amdgpu/cik_sdma.c
··· 630 630 gpu_addr = adev->wb.gpu_addr + (index * 4); 631 631 tmp = 0xCAFEDEAD; 632 632 adev->wb.wb[index] = cpu_to_le32(tmp); 633 + memset(&ib, 0, sizeof(ib)); 633 634 r = amdgpu_ib_get(ring, NULL, 256, &ib); 634 635 if (r) { 635 636 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); ··· 1339 1338 * Used by the amdgpu ttm implementation to move pages if 1340 1339 * registered as the asic copy callback. 1341 1340 */ 1342 - static void cik_sdma_emit_copy_buffer(struct amdgpu_ring *ring, 1341 + static void cik_sdma_emit_copy_buffer(struct amdgpu_ib *ib, 1343 1342 uint64_t src_offset, 1344 1343 uint64_t dst_offset, 1345 1344 uint32_t byte_count) 1346 1345 { 1347 - amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0)); 1348 - amdgpu_ring_write(ring, byte_count); 1349 - amdgpu_ring_write(ring, 0); /* src/dst endian swap */ 1350 - amdgpu_ring_write(ring, lower_32_bits(src_offset)); 1351 - amdgpu_ring_write(ring, upper_32_bits(src_offset)); 1352 - amdgpu_ring_write(ring, lower_32_bits(dst_offset)); 1353 - amdgpu_ring_write(ring, upper_32_bits(dst_offset)); 1346 + ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0); 1347 + ib->ptr[ib->length_dw++] = byte_count; 1348 + ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ 1349 + ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); 1350 + ib->ptr[ib->length_dw++] = upper_32_bits(src_offset); 1351 + ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); 1352 + ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); 1354 1353 } 1355 1354 1356 1355 /**
+1
drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
··· 2660 2660 return r; 2661 2661 } 2662 2662 WREG32(scratch, 0xCAFEDEAD); 2663 + memset(&ib, 0, sizeof(ib)); 2663 2664 r = amdgpu_ib_get(ring, NULL, 256, &ib); 2664 2665 if (r) { 2665 2666 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
+1
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
··· 622 622 return r; 623 623 } 624 624 WREG32(scratch, 0xCAFEDEAD); 625 + memset(&ib, 0, sizeof(ib)); 625 626 r = amdgpu_ib_get(ring, NULL, 256, &ib); 626 627 if (r) { 627 628 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
+10 -9
drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
··· 689 689 gpu_addr = adev->wb.gpu_addr + (index * 4); 690 690 tmp = 0xCAFEDEAD; 691 691 adev->wb.wb[index] = cpu_to_le32(tmp); 692 + memset(&ib, 0, sizeof(ib)); 692 693 r = amdgpu_ib_get(ring, NULL, 256, &ib); 693 694 if (r) { 694 695 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); ··· 1350 1349 * Used by the amdgpu ttm implementation to move pages if 1351 1350 * registered as the asic copy callback. 1352 1351 */ 1353 - static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ring *ring, 1352 + static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib, 1354 1353 uint64_t src_offset, 1355 1354 uint64_t dst_offset, 1356 1355 uint32_t byte_count) 1357 1356 { 1358 - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | 1359 - SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR)); 1360 - amdgpu_ring_write(ring, byte_count); 1361 - amdgpu_ring_write(ring, 0); /* src/dst endian swap */ 1362 - amdgpu_ring_write(ring, lower_32_bits(src_offset)); 1363 - amdgpu_ring_write(ring, upper_32_bits(src_offset)); 1364 - amdgpu_ring_write(ring, lower_32_bits(dst_offset)); 1365 - amdgpu_ring_write(ring, upper_32_bits(dst_offset)); 1357 + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | 1358 + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); 1359 + ib->ptr[ib->length_dw++] = byte_count; 1360 + ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ 1361 + ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); 1362 + ib->ptr[ib->length_dw++] = upper_32_bits(src_offset); 1363 + ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); 1364 + ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); 1366 1365 } 1367 1366 1368 1367 /**
+10 -9
drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
··· 810 810 gpu_addr = adev->wb.gpu_addr + (index * 4); 811 811 tmp = 0xCAFEDEAD; 812 812 adev->wb.wb[index] = cpu_to_le32(tmp); 813 + memset(&ib, 0, sizeof(ib)); 813 814 r = amdgpu_ib_get(ring, NULL, 256, &ib); 814 815 if (r) { 815 816 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); ··· 1474 1473 * Used by the amdgpu ttm implementation to move pages if 1475 1474 * registered as the asic copy callback. 1476 1475 */ 1477 - static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ring *ring, 1476 + static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ib *ib, 1478 1477 uint64_t src_offset, 1479 1478 uint64_t dst_offset, 1480 1479 uint32_t byte_count) 1481 1480 { 1482 - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | 1483 - SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR)); 1484 - amdgpu_ring_write(ring, byte_count); 1485 - amdgpu_ring_write(ring, 0); /* src/dst endian swap */ 1486 - amdgpu_ring_write(ring, lower_32_bits(src_offset)); 1487 - amdgpu_ring_write(ring, upper_32_bits(src_offset)); 1488 - amdgpu_ring_write(ring, lower_32_bits(dst_offset)); 1489 - amdgpu_ring_write(ring, upper_32_bits(dst_offset)); 1481 + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | 1482 + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); 1483 + ib->ptr[ib->length_dw++] = byte_count; 1484 + ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ 1485 + ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); 1486 + ib->ptr[ib->length_dw++] = upper_32_bits(src_offset); 1487 + ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); 1488 + ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); 1490 1489 } 1491 1490 1492 1491 /**
+155 -228
drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
··· 27 27 #include <drm/drmP.h> 28 28 #include "gpu_scheduler.h" 29 29 30 + static void amd_sched_wakeup(struct amd_gpu_scheduler *sched); 31 + 30 32 /* Initialize a given run queue struct */ 31 33 static void amd_sched_rq_init(struct amd_sched_rq *rq) 32 34 { 35 + spin_lock_init(&rq->lock); 33 36 INIT_LIST_HEAD(&rq->entities); 34 - mutex_init(&rq->lock); 35 37 rq->current_entity = NULL; 36 38 } 37 39 38 40 static void amd_sched_rq_add_entity(struct amd_sched_rq *rq, 39 41 struct amd_sched_entity *entity) 40 42 { 41 - mutex_lock(&rq->lock); 43 + spin_lock(&rq->lock); 42 44 list_add_tail(&entity->list, &rq->entities); 43 - mutex_unlock(&rq->lock); 45 + spin_unlock(&rq->lock); 44 46 } 45 47 46 48 static void amd_sched_rq_remove_entity(struct amd_sched_rq *rq, 47 49 struct amd_sched_entity *entity) 48 50 { 49 - mutex_lock(&rq->lock); 51 + spin_lock(&rq->lock); 50 52 list_del_init(&entity->list); 51 53 if (rq->current_entity == entity) 52 54 rq->current_entity = NULL; 53 - mutex_unlock(&rq->lock); 55 + spin_unlock(&rq->lock); 54 56 } 55 57 56 58 /** ··· 63 61 static struct amd_sched_entity * 64 62 amd_sched_rq_select_entity(struct amd_sched_rq *rq) 65 63 { 66 - struct amd_sched_entity *entity = rq->current_entity; 64 + struct amd_sched_entity *entity; 67 65 66 + spin_lock(&rq->lock); 67 + 68 + entity = rq->current_entity; 68 69 if (entity) { 69 70 list_for_each_entry_continue(entity, &rq->entities, list) { 70 71 if (!kfifo_is_empty(&entity->job_queue)) { 71 72 rq->current_entity = entity; 73 + spin_unlock(&rq->lock); 72 74 return rq->current_entity; 73 75 } 74 76 } ··· 82 76 83 77 if (!kfifo_is_empty(&entity->job_queue)) { 84 78 rq->current_entity = entity; 79 + spin_unlock(&rq->lock); 85 80 return rq->current_entity; 86 81 } 87 82 ··· 90 83 break; 91 84 } 92 85 86 + spin_unlock(&rq->lock); 87 + 93 88 return NULL; 94 - } 95 - 96 - /** 97 - * Note: This function should only been called inside scheduler main 98 - * function for thread safety, there is no other protection here. 99 - * return ture if scheduler has something ready to run. 100 - * 101 - * For active_hw_rq, there is only one producer(scheduler thread) and 102 - * one consumer(ISR). It should be safe to use this function in scheduler 103 - * main thread to decide whether to continue emit more IBs. 104 - */ 105 - static bool is_scheduler_ready(struct amd_gpu_scheduler *sched) 106 - { 107 - unsigned long flags; 108 - bool full; 109 - 110 - spin_lock_irqsave(&sched->queue_lock, flags); 111 - full = atomic64_read(&sched->hw_rq_count) < 112 - sched->hw_submission_limit ? true : false; 113 - spin_unlock_irqrestore(&sched->queue_lock, flags); 114 - 115 - return full; 116 - } 117 - 118 - /** 119 - * Select next entity from the kernel run queue, if not available, 120 - * return null. 121 - */ 122 - static struct amd_sched_entity * 123 - kernel_rq_select_context(struct amd_gpu_scheduler *sched) 124 - { 125 - struct amd_sched_entity *sched_entity; 126 - struct amd_sched_rq *rq = &sched->kernel_rq; 127 - 128 - mutex_lock(&rq->lock); 129 - sched_entity = amd_sched_rq_select_entity(rq); 130 - mutex_unlock(&rq->lock); 131 - return sched_entity; 132 - } 133 - 134 - /** 135 - * Select next entity containing real IB submissions 136 - */ 137 - static struct amd_sched_entity * 138 - select_context(struct amd_gpu_scheduler *sched) 139 - { 140 - struct amd_sched_entity *wake_entity = NULL; 141 - struct amd_sched_entity *tmp; 142 - struct amd_sched_rq *rq; 143 - 144 - if (!is_scheduler_ready(sched)) 145 - return NULL; 146 - 147 - /* Kernel run queue has higher priority than normal run queue*/ 148 - tmp = kernel_rq_select_context(sched); 149 - if (tmp != NULL) 150 - goto exit; 151 - 152 - rq = &sched->sched_rq; 153 - mutex_lock(&rq->lock); 154 - tmp = amd_sched_rq_select_entity(rq); 155 - mutex_unlock(&rq->lock); 156 - exit: 157 - if (sched->current_entity && (sched->current_entity != tmp)) 158 - wake_entity = sched->current_entity; 159 - sched->current_entity = tmp; 160 - if (wake_entity && wake_entity->need_wakeup) 161 - wake_up(&wake_entity->wait_queue); 162 - return tmp; 163 89 } 164 90 165 91 /** ··· 111 171 struct amd_sched_rq *rq, 112 172 uint32_t jobs) 113 173 { 114 - uint64_t seq_ring = 0; 115 - char name[20]; 116 - 117 174 if (!(sched && entity && rq)) 118 175 return -EINVAL; 119 176 120 177 memset(entity, 0, sizeof(struct amd_sched_entity)); 121 - seq_ring = ((uint64_t)sched->ring_id) << 60; 122 - spin_lock_init(&entity->lock); 123 178 entity->belongto_rq = rq; 124 179 entity->scheduler = sched; 125 - init_waitqueue_head(&entity->wait_queue); 126 - init_waitqueue_head(&entity->wait_emit); 127 180 entity->fence_context = fence_context_alloc(1); 128 - snprintf(name, sizeof(name), "c_entity[%llu]", entity->fence_context); 129 - memcpy(entity->name, name, 20); 130 - entity->need_wakeup = false; 131 181 if(kfifo_alloc(&entity->job_queue, 132 182 jobs * sizeof(void *), 133 183 GFP_KERNEL)) 134 184 return -EINVAL; 135 185 136 186 spin_lock_init(&entity->queue_lock); 137 - atomic64_set(&entity->last_queued_v_seq, seq_ring); 138 - atomic64_set(&entity->last_signaled_v_seq, seq_ring); 187 + atomic_set(&entity->fence_seq, 0); 139 188 140 189 /* Add the entity to the run queue */ 141 190 amd_sched_rq_add_entity(rq, entity); ··· 139 210 * 140 211 * return true if entity is initialized, false otherwise 141 212 */ 142 - static bool is_context_entity_initialized(struct amd_gpu_scheduler *sched, 143 - struct amd_sched_entity *entity) 213 + static bool amd_sched_entity_is_initialized(struct amd_gpu_scheduler *sched, 214 + struct amd_sched_entity *entity) 144 215 { 145 216 return entity->scheduler == sched && 146 217 entity->belongto_rq != NULL; 147 218 } 148 219 149 - static bool is_context_entity_idle(struct amd_gpu_scheduler *sched, 150 - struct amd_sched_entity *entity) 220 + /** 221 + * Check if entity is idle 222 + * 223 + * @entity The pointer to a valid scheduler entity 224 + * 225 + * Return true if entity don't has any unscheduled jobs. 226 + */ 227 + static bool amd_sched_entity_is_idle(struct amd_sched_entity *entity) 151 228 { 152 - /** 153 - * Idle means no pending IBs, and the entity is not 154 - * currently being used. 155 - */ 156 - barrier(); 157 - if ((sched->current_entity != entity) && 158 - kfifo_is_empty(&entity->job_queue)) 229 + rmb(); 230 + if (kfifo_is_empty(&entity->job_queue)) 159 231 return true; 160 232 161 233 return false; ··· 168 238 * @sched Pointer to scheduler instance 169 239 * @entity The pointer to a valid scheduler entity 170 240 * 171 - * return 0 if succeed. negative error code on failure 241 + * Cleanup and free the allocated resources. 172 242 */ 173 - int amd_sched_entity_fini(struct amd_gpu_scheduler *sched, 174 - struct amd_sched_entity *entity) 243 + void amd_sched_entity_fini(struct amd_gpu_scheduler *sched, 244 + struct amd_sched_entity *entity) 175 245 { 176 - int r = 0; 177 246 struct amd_sched_rq *rq = entity->belongto_rq; 178 247 179 - if (!is_context_entity_initialized(sched, entity)) 180 - return 0; 181 - entity->need_wakeup = true; 248 + if (!amd_sched_entity_is_initialized(sched, entity)) 249 + return; 250 + 182 251 /** 183 252 * The client will not queue more IBs during this fini, consume existing 184 253 * queued IBs 185 254 */ 186 - r = wait_event_timeout( 187 - entity->wait_queue, 188 - is_context_entity_idle(sched, entity), 189 - msecs_to_jiffies(AMD_GPU_WAIT_IDLE_TIMEOUT_IN_MS) 190 - ) ? 0 : -1; 191 - 192 - if (r) { 193 - if (entity->is_pending) 194 - DRM_INFO("Entity %p is in waiting state during fini,\ 195 - all pending ibs will be canceled.\n", 196 - entity); 197 - } 255 + wait_event(sched->job_scheduled, amd_sched_entity_is_idle(entity)); 198 256 199 257 amd_sched_rq_remove_entity(rq, entity); 200 258 kfifo_free(&entity->job_queue); 259 + } 260 + 261 + /** 262 + * Helper to submit a job to the job queue 263 + * 264 + * @job The pointer to job required to submit 265 + * 266 + * Returns true if we could submit the job. 267 + */ 268 + static bool amd_sched_entity_in(struct amd_sched_job *job) 269 + { 270 + struct amd_sched_entity *entity = job->s_entity; 271 + bool added, first = false; 272 + 273 + spin_lock(&entity->queue_lock); 274 + added = kfifo_in(&entity->job_queue, &job, sizeof(job)) == sizeof(job); 275 + 276 + if (added && kfifo_len(&entity->job_queue) == sizeof(job)) 277 + first = true; 278 + 279 + spin_unlock(&entity->queue_lock); 280 + 281 + /* first job wakes up scheduler */ 282 + if (first) 283 + amd_sched_wakeup(job->sched); 284 + 285 + return added; 286 + } 287 + 288 + /** 289 + * Submit a job to the job queue 290 + * 291 + * @job The pointer to job required to submit 292 + * 293 + * Returns 0 for success, negative error code otherwise. 294 + */ 295 + int amd_sched_entity_push_job(struct amd_sched_job *sched_job) 296 + { 297 + struct amd_sched_entity *entity = sched_job->s_entity; 298 + struct amd_sched_fence *fence = amd_sched_fence_create( 299 + entity, sched_job->owner); 300 + int r; 301 + 302 + if (!fence) 303 + return -ENOMEM; 304 + 305 + fence_get(&fence->base); 306 + sched_job->s_fence = fence; 307 + 308 + r = wait_event_interruptible(entity->scheduler->job_scheduled, 309 + amd_sched_entity_in(sched_job)); 310 + 201 311 return r; 202 312 } 203 313 204 314 /** 205 - * Submit a normal job to the job queue 206 - * 207 - * @sched The pointer to the scheduler 208 - * @c_entity The pointer to amd_sched_entity 209 - * @job The pointer to job required to submit 210 - * return 0 if succeed. -1 if failed. 211 - * -2 indicate queue is full for this client, client should wait untill 212 - * scheduler consum some queued command. 213 - * -1 other fail. 214 - */ 215 - int amd_sched_push_job(struct amd_gpu_scheduler *sched, 216 - struct amd_sched_entity *c_entity, 217 - void *data, 218 - struct amd_sched_fence **fence) 315 + * Return ture if we can push more jobs to the hw. 316 + */ 317 + static bool amd_sched_ready(struct amd_gpu_scheduler *sched) 219 318 { 220 - struct amd_sched_job *job; 319 + return atomic_read(&sched->hw_rq_count) < 320 + sched->hw_submission_limit; 321 + } 221 322 222 - if (!fence) 223 - return -EINVAL; 224 - job = kzalloc(sizeof(struct amd_sched_job), GFP_KERNEL); 225 - if (!job) 226 - return -ENOMEM; 227 - job->sched = sched; 228 - job->s_entity = c_entity; 229 - job->data = data; 230 - *fence = amd_sched_fence_create(c_entity); 231 - if ((*fence) == NULL) { 232 - kfree(job); 233 - return -EINVAL; 234 - } 235 - fence_get(&(*fence)->base); 236 - job->s_fence = *fence; 237 - while (kfifo_in_spinlocked(&c_entity->job_queue, &job, sizeof(void *), 238 - &c_entity->queue_lock) != sizeof(void *)) { 239 - /** 240 - * Current context used up all its IB slots 241 - * wait here, or need to check whether GPU is hung 242 - */ 243 - schedule(); 244 - } 245 - /* first job wake up scheduler */ 246 - if ((kfifo_len(&c_entity->job_queue) / sizeof(void *)) == 1) 247 - wake_up_interruptible(&sched->wait_queue); 248 - return 0; 323 + /** 324 + * Wake up the scheduler when it is ready 325 + */ 326 + static void amd_sched_wakeup(struct amd_gpu_scheduler *sched) 327 + { 328 + if (amd_sched_ready(sched)) 329 + wake_up_interruptible(&sched->wake_up_worker); 330 + } 331 + 332 + /** 333 + * Select next entity containing real IB submissions 334 + */ 335 + static struct amd_sched_entity * 336 + amd_sched_select_context(struct amd_gpu_scheduler *sched) 337 + { 338 + struct amd_sched_entity *tmp; 339 + 340 + if (!amd_sched_ready(sched)) 341 + return NULL; 342 + 343 + /* Kernel run queue has higher priority than normal run queue*/ 344 + tmp = amd_sched_rq_select_entity(&sched->kernel_rq); 345 + if (tmp == NULL) 346 + tmp = amd_sched_rq_select_entity(&sched->sched_rq); 347 + 348 + return tmp; 249 349 } 250 350 251 351 static void amd_sched_process_job(struct fence *f, struct fence_cb *cb) ··· 283 323 struct amd_sched_job *sched_job = 284 324 container_of(cb, struct amd_sched_job, cb); 285 325 struct amd_gpu_scheduler *sched; 286 - unsigned long flags; 287 326 288 327 sched = sched_job->sched; 289 - atomic64_set(&sched_job->s_entity->last_signaled_v_seq, 290 - sched_job->s_fence->v_seq); 291 328 amd_sched_fence_signal(sched_job->s_fence); 292 - spin_lock_irqsave(&sched->queue_lock, flags); 293 - list_del(&sched_job->list); 294 - atomic64_dec(&sched->hw_rq_count); 295 - spin_unlock_irqrestore(&sched->queue_lock, flags); 296 - 297 - sched->ops->process_job(sched, sched_job); 329 + atomic_dec(&sched->hw_rq_count); 298 330 fence_put(&sched_job->s_fence->base); 299 - kfree(sched_job); 300 - wake_up_interruptible(&sched->wait_queue); 331 + sched->ops->process_job(sched_job); 332 + wake_up_interruptible(&sched->wake_up_worker); 301 333 } 302 334 303 335 static int amd_sched_main(void *param) 304 336 { 305 - int r; 306 - struct amd_sched_job *job; 307 337 struct sched_param sparam = {.sched_priority = 1}; 308 - struct amd_sched_entity *c_entity = NULL; 309 338 struct amd_gpu_scheduler *sched = (struct amd_gpu_scheduler *)param; 339 + int r; 310 340 311 341 sched_setscheduler(current, SCHED_FIFO, &sparam); 312 342 313 343 while (!kthread_should_stop()) { 344 + struct amd_sched_entity *c_entity = NULL; 345 + struct amd_sched_job *job; 314 346 struct fence *fence; 315 347 316 - wait_event_interruptible(sched->wait_queue, 317 - is_scheduler_ready(sched) && 318 - (c_entity = select_context(sched))); 348 + wait_event_interruptible(sched->wake_up_worker, 349 + kthread_should_stop() || 350 + (c_entity = amd_sched_select_context(sched))); 351 + 352 + if (!c_entity) 353 + continue; 354 + 319 355 r = kfifo_out(&c_entity->job_queue, &job, sizeof(void *)); 320 356 if (r != sizeof(void *)) 321 357 continue; 322 - r = sched->ops->prepare_job(sched, c_entity, job); 323 - if (!r) { 324 - unsigned long flags; 325 - spin_lock_irqsave(&sched->queue_lock, flags); 326 - list_add_tail(&job->list, &sched->active_hw_rq); 327 - atomic64_inc(&sched->hw_rq_count); 328 - spin_unlock_irqrestore(&sched->queue_lock, flags); 329 - } 330 - mutex_lock(&sched->sched_lock); 331 - fence = sched->ops->run_job(sched, c_entity, job); 358 + atomic_inc(&sched->hw_rq_count); 359 + 360 + fence = sched->ops->run_job(job); 332 361 if (fence) { 333 362 r = fence_add_callback(fence, &job->cb, 334 363 amd_sched_process_job); ··· 327 378 DRM_ERROR("fence add callback failed (%d)\n", r); 328 379 fence_put(fence); 329 380 } 330 - mutex_unlock(&sched->sched_lock); 381 + 382 + wake_up(&sched->job_scheduled); 331 383 } 332 384 return 0; 333 385 } ··· 336 386 /** 337 387 * Create a gpu scheduler 338 388 * 339 - * @device The device context for this scheduler 340 - * @ops The backend operations for this scheduler. 341 - * @id The scheduler is per ring, here is ring id. 342 - * @granularity The minumum ms unit the scheduler will scheduled. 343 - * @preemption Indicate whether this ring support preemption, 0 is no. 389 + * @ops The backend operations for this scheduler. 390 + * @ring The the ring id for the scheduler. 391 + * @hw_submissions Number of hw submissions to do. 344 392 * 345 - * return the pointer to scheduler for success, otherwise return NULL 393 + * Return the pointer to scheduler for success, otherwise return NULL 346 394 */ 347 - struct amd_gpu_scheduler *amd_sched_create(void *device, 348 - struct amd_sched_backend_ops *ops, 349 - unsigned ring, 350 - unsigned granularity, 351 - unsigned preemption, 352 - unsigned hw_submission) 395 + struct amd_gpu_scheduler *amd_sched_create(struct amd_sched_backend_ops *ops, 396 + unsigned ring, unsigned hw_submission, 397 + void *priv) 353 398 { 354 399 struct amd_gpu_scheduler *sched; 355 - char name[20]; 356 400 357 401 sched = kzalloc(sizeof(struct amd_gpu_scheduler), GFP_KERNEL); 358 402 if (!sched) 359 403 return NULL; 360 404 361 - sched->device = device; 362 405 sched->ops = ops; 363 - sched->granularity = granularity; 364 406 sched->ring_id = ring; 365 - sched->preemption = preemption; 366 407 sched->hw_submission_limit = hw_submission; 367 - snprintf(name, sizeof(name), "gpu_sched[%d]", ring); 368 - mutex_init(&sched->sched_lock); 369 - spin_lock_init(&sched->queue_lock); 408 + sched->priv = priv; 409 + snprintf(sched->name, sizeof(sched->name), "amdgpu[%d]", ring); 370 410 amd_sched_rq_init(&sched->sched_rq); 371 411 amd_sched_rq_init(&sched->kernel_rq); 372 412 373 - init_waitqueue_head(&sched->wait_queue); 374 - INIT_LIST_HEAD(&sched->active_hw_rq); 375 - atomic64_set(&sched->hw_rq_count, 0); 413 + init_waitqueue_head(&sched->wake_up_worker); 414 + init_waitqueue_head(&sched->job_scheduled); 415 + atomic_set(&sched->hw_rq_count, 0); 376 416 /* Each scheduler will run on a seperate kernel thread */ 377 - sched->thread = kthread_create(amd_sched_main, sched, name); 378 - if (sched->thread) { 379 - wake_up_process(sched->thread); 380 - return sched; 417 + sched->thread = kthread_run(amd_sched_main, sched, sched->name); 418 + if (IS_ERR(sched->thread)) { 419 + DRM_ERROR("Failed to create scheduler for id %d.\n", ring); 420 + kfree(sched); 421 + return NULL; 381 422 } 382 423 383 - DRM_ERROR("Failed to create scheduler for id %d.\n", ring); 384 - kfree(sched); 385 - return NULL; 424 + return sched; 386 425 } 387 426 388 427 /** ··· 386 447 kthread_stop(sched->thread); 387 448 kfree(sched); 388 449 return 0; 389 - } 390 - 391 - /** 392 - * Get next queued sequence number 393 - * 394 - * @entity The context entity 395 - * 396 - * return the next queued sequence number 397 - */ 398 - uint64_t amd_sched_next_queued_seq(struct amd_sched_entity *c_entity) 399 - { 400 - return atomic64_read(&c_entity->last_queued_v_seq) + 1; 401 450 }
+19 -50
drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
··· 27 27 #include <linux/kfifo.h> 28 28 #include <linux/fence.h> 29 29 30 - #define AMD_GPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 31 - 32 30 struct amd_gpu_scheduler; 33 31 struct amd_sched_rq; 34 32 ··· 39 41 struct amd_sched_entity { 40 42 struct list_head list; 41 43 struct amd_sched_rq *belongto_rq; 42 - spinlock_t lock; 43 - /* the virtual_seq is unique per context per ring */ 44 - atomic64_t last_queued_v_seq; 45 - atomic64_t last_signaled_v_seq; 44 + atomic_t fence_seq; 46 45 /* the job_queue maintains the jobs submitted by clients */ 47 46 struct kfifo job_queue; 48 47 spinlock_t queue_lock; 49 48 struct amd_gpu_scheduler *scheduler; 50 - wait_queue_head_t wait_queue; 51 - wait_queue_head_t wait_emit; 52 - bool is_pending; 53 49 uint64_t fence_context; 54 - char name[20]; 55 - bool need_wakeup; 56 50 }; 57 51 58 52 /** ··· 53 63 * the next entity to emit commands from. 54 64 */ 55 65 struct amd_sched_rq { 56 - struct mutex lock; 66 + spinlock_t lock; 57 67 struct list_head entities; 58 68 struct amd_sched_entity *current_entity; 59 69 }; 60 70 61 71 struct amd_sched_fence { 62 72 struct fence base; 63 - struct fence_cb cb; 64 - struct amd_sched_entity *entity; 65 - uint64_t v_seq; 73 + struct amd_gpu_scheduler *scheduler; 66 74 spinlock_t lock; 75 + void *owner; 67 76 }; 68 77 69 78 struct amd_sched_job { 70 - struct list_head list; 71 79 struct fence_cb cb; 72 80 struct amd_gpu_scheduler *sched; 73 81 struct amd_sched_entity *s_entity; 74 - void *data; 75 82 struct amd_sched_fence *s_fence; 83 + void *owner; 76 84 }; 77 85 78 86 extern const struct fence_ops amd_sched_fence_ops; ··· 89 101 * these functions should be implemented in driver side 90 102 */ 91 103 struct amd_sched_backend_ops { 92 - int (*prepare_job)(struct amd_gpu_scheduler *sched, 93 - struct amd_sched_entity *c_entity, 94 - struct amd_sched_job *job); 95 - struct fence *(*run_job)(struct amd_gpu_scheduler *sched, 96 - struct amd_sched_entity *c_entity, 97 - struct amd_sched_job *job); 98 - void (*process_job)(struct amd_gpu_scheduler *sched, 99 - struct amd_sched_job *job); 104 + struct fence *(*run_job)(struct amd_sched_job *job); 105 + void (*process_job)(struct amd_sched_job *job); 100 106 }; 101 107 102 108 /** 103 109 * One scheduler is implemented for each hardware ring 104 110 */ 105 111 struct amd_gpu_scheduler { 106 - void *device; 107 112 struct task_struct *thread; 108 113 struct amd_sched_rq sched_rq; 109 114 struct amd_sched_rq kernel_rq; 110 - struct list_head active_hw_rq; 111 - atomic64_t hw_rq_count; 115 + atomic_t hw_rq_count; 112 116 struct amd_sched_backend_ops *ops; 113 117 uint32_t ring_id; 114 - uint32_t granularity; /* in ms unit */ 115 - uint32_t preemption; 116 - wait_queue_head_t wait_queue; 117 - struct amd_sched_entity *current_entity; 118 - struct mutex sched_lock; 119 - spinlock_t queue_lock; 118 + wait_queue_head_t wake_up_worker; 119 + wait_queue_head_t job_scheduled; 120 120 uint32_t hw_submission_limit; 121 + char name[20]; 122 + void *priv; 121 123 }; 122 124 123 - struct amd_gpu_scheduler *amd_sched_create(void *device, 124 - struct amd_sched_backend_ops *ops, 125 - uint32_t ring, 126 - uint32_t granularity, 127 - uint32_t preemption, 128 - uint32_t hw_submission); 125 + struct amd_gpu_scheduler * 126 + amd_sched_create(struct amd_sched_backend_ops *ops, 127 + uint32_t ring, uint32_t hw_submission, void *priv); 129 128 int amd_sched_destroy(struct amd_gpu_scheduler *sched); 130 - 131 - int amd_sched_push_job(struct amd_gpu_scheduler *sched, 132 - struct amd_sched_entity *c_entity, 133 - void *data, 134 - struct amd_sched_fence **fence); 135 129 136 130 int amd_sched_entity_init(struct amd_gpu_scheduler *sched, 137 131 struct amd_sched_entity *entity, 138 132 struct amd_sched_rq *rq, 139 133 uint32_t jobs); 140 - int amd_sched_entity_fini(struct amd_gpu_scheduler *sched, 141 - struct amd_sched_entity *entity); 142 - 143 - uint64_t amd_sched_next_queued_seq(struct amd_sched_entity *c_entity); 134 + void amd_sched_entity_fini(struct amd_gpu_scheduler *sched, 135 + struct amd_sched_entity *entity); 136 + int amd_sched_entity_push_job(struct amd_sched_job *sched_job); 144 137 145 138 struct amd_sched_fence *amd_sched_fence_create( 146 - struct amd_sched_entity *s_entity); 139 + struct amd_sched_entity *s_entity, void *owner); 147 140 void amd_sched_fence_signal(struct amd_sched_fence *fence); 148 141 149 142
+11 -8
drivers/gpu/drm/amd/scheduler/sched_fence.c
··· 27 27 #include <drm/drmP.h> 28 28 #include "gpu_scheduler.h" 29 29 30 - struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *s_entity) 30 + struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *s_entity, void *owner) 31 31 { 32 32 struct amd_sched_fence *fence = NULL; 33 + unsigned seq; 34 + 33 35 fence = kzalloc(sizeof(struct amd_sched_fence), GFP_KERNEL); 34 36 if (fence == NULL) 35 37 return NULL; 36 - fence->v_seq = atomic64_inc_return(&s_entity->last_queued_v_seq); 37 - fence->entity = s_entity; 38 + fence->owner = owner; 39 + fence->scheduler = s_entity->scheduler; 38 40 spin_lock_init(&fence->lock); 39 - fence_init(&fence->base, &amd_sched_fence_ops, 40 - &fence->lock, 41 - s_entity->fence_context, 42 - fence->v_seq); 41 + 42 + seq = atomic_inc_return(&s_entity->fence_seq); 43 + fence_init(&fence->base, &amd_sched_fence_ops, &fence->lock, 44 + s_entity->fence_context, seq); 45 + 43 46 return fence; 44 47 } 45 48 ··· 63 60 static const char *amd_sched_fence_get_timeline_name(struct fence *f) 64 61 { 65 62 struct amd_sched_fence *fence = to_amd_sched_fence(f); 66 - return (const char *)fence->entity->name; 63 + return (const char *)fence->scheduler->name; 67 64 } 68 65 69 66 static bool amd_sched_fence_enable_signaling(struct fence *f)
+5
drivers/gpu/drm/radeon/radeon_connectors.c
··· 95 95 if (!radeon_hpd_sense(rdev, radeon_connector->hpd.hpd)) { 96 96 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); 97 97 } else if (radeon_dp_needs_link_train(radeon_connector)) { 98 + /* Don't try to start link training before we 99 + * have the dpcd */ 100 + if (!radeon_dp_getdpcd(radeon_connector)) 101 + return; 102 + 98 103 /* set it to OFF so that drm_helper_connector_dpms() 99 104 * won't return immediately since the current state 100 105 * is ON at this point.