···76extern int amdgpu_modeset;77extern int amdgpu_vram_limit;78extern int amdgpu_vis_vram_limit;79-extern unsigned amdgpu_gart_size;80extern int amdgpu_gtt_size;81extern int amdgpu_moverate;82extern int amdgpu_benchmarking;
···76extern int amdgpu_modeset;77extern int amdgpu_vram_limit;78extern int amdgpu_vis_vram_limit;79+extern int amdgpu_gart_size;80extern int amdgpu_gtt_size;81extern int amdgpu_moverate;82extern int amdgpu_benchmarking;
···1079 GFP_KERNEL);1080 p->num_post_dep_syncobjs = 0;10810001082 for (i = 0; i < num_deps; ++i) {1083 p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle);1084 if (!p->post_dep_syncobjs[i])···1153 cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence);1154 job->uf_sequence = cs->out.handle;1155 amdgpu_job_free_resources(job);1156- amdgpu_cs_parser_fini(p, 0, true);11571158 trace_amdgpu_cs_ioctl(job);1159 amd_sched_entity_push_job(&job->base);···1210 goto out;12111212 r = amdgpu_cs_submit(&parser, cs);1213- if (r)1214- goto out;12151216- return 0;1217out:1218 amdgpu_cs_parser_fini(&parser, r, reserved_buffers);1219 return r;
···1079 GFP_KERNEL);1080 p->num_post_dep_syncobjs = 0;10811082+ if (!p->post_dep_syncobjs)1083+ return -ENOMEM;1084+1085 for (i = 0; i < num_deps; ++i) {1086 p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle);1087 if (!p->post_dep_syncobjs[i])···1150 cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence);1151 job->uf_sequence = cs->out.handle;1152 amdgpu_job_free_resources(job);011531154 trace_amdgpu_cs_ioctl(job);1155 amd_sched_entity_push_job(&job->base);···1208 goto out;12091210 r = amdgpu_cs_submit(&parser, cs);00121101212out:1213 amdgpu_cs_parser_fini(&parser, r, reserved_buffers);1214 return r;
+2-8
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
···1062 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);1063 }10641065- if (amdgpu_gart_size < 32) {1066 /* gart size must be greater or equal to 32M */1067 dev_warn(adev->dev, "gart size (%d) too small\n",1068 amdgpu_gart_size);1069- amdgpu_gart_size = 32;1070 }10711072 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {···2619 r = amdgpu_bo_validate(bo->shadow);2620 if (r) {2621 DRM_ERROR("bo validate failed!\n");2622- goto err;2623- }2624-2625- r = amdgpu_ttm_bind(&bo->shadow->tbo, &bo->shadow->tbo.mem);2626- if (r) {2627- DRM_ERROR("%p bind failed\n", bo->shadow);2628 goto err;2629 }2630
···1062 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);1063 }10641065+ if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {1066 /* gart size must be greater or equal to 32M */1067 dev_warn(adev->dev, "gart size (%d) too small\n",1068 amdgpu_gart_size);1069+ amdgpu_gart_size = -1;1070 }10711072 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {···2619 r = amdgpu_bo_validate(bo->shadow);2620 if (r) {2621 DRM_ERROR("bo validate failed!\n");0000002622 goto err;2623 }2624
+2-2
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
···7677int amdgpu_vram_limit = 0;78int amdgpu_vis_vram_limit = 0;79-unsigned amdgpu_gart_size = 256;80int amdgpu_gtt_size = -1; /* auto */81int amdgpu_moverate = -1; /* auto */82int amdgpu_benchmarking = 0;···128MODULE_PARM_DESC(vis_vramlimit, "Restrict visible VRAM for testing, in megabytes");129module_param_named(vis_vramlimit, amdgpu_vis_vram_limit, int, 0444);130131-MODULE_PARM_DESC(gartsize, "Size of PCIE/IGP gart to setup in megabytes (32, 64, etc.)");132module_param_named(gartsize, amdgpu_gart_size, uint, 0600);133134MODULE_PARM_DESC(gttsize, "Size of the GTT domain in megabytes (-1 = auto)");
···7677int amdgpu_vram_limit = 0;78int amdgpu_vis_vram_limit = 0;79+int amdgpu_gart_size = -1; /* auto */80int amdgpu_gtt_size = -1; /* auto */81int amdgpu_moverate = -1; /* auto */82int amdgpu_benchmarking = 0;···128MODULE_PARM_DESC(vis_vramlimit, "Restrict visible VRAM for testing, in megabytes");129module_param_named(vis_vramlimit, amdgpu_vis_vram_limit, int, 0444);130131+MODULE_PARM_DESC(gartsize, "Size of GART to setup in megabytes (32, 64, etc., -1=auto)");132module_param_named(gartsize, amdgpu_gart_size, uint, 0600);133134MODULE_PARM_DESC(gttsize, "Size of the GTT domain in megabytes (-1 = auto)");
-12
drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
···57 */5859/**60- * amdgpu_gart_set_defaults - set the default gart_size61- *62- * @adev: amdgpu_device pointer63- *64- * Set the default gart_size based on parameters and available VRAM.65- */66-void amdgpu_gart_set_defaults(struct amdgpu_device *adev)67-{68- adev->mc.gart_size = (uint64_t)amdgpu_gart_size << 20;69-}70-71-/**72 * amdgpu_gart_table_ram_alloc - allocate system ram for gart page table73 *74 * @adev: amdgpu_device pointer
···57 */5859/**00000000000060 * amdgpu_gart_table_ram_alloc - allocate system ram for gart page table61 *62 * @adev: amdgpu_device pointer
···170 unsigned irq_type)171{172 int r;173+ int sched_hw_submission = amdgpu_sched_hw_submission;174+175+ /* Set the hw submission limit higher for KIQ because176+ * it's used for a number of gfx/compute tasks by both177+ * KFD and KGD which may have outstanding fences and178+ * it doesn't really use the gpu scheduler anyway;179+ * KIQ tasks get submitted directly to the ring.180+ */181+ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)182+ sched_hw_submission = max(sched_hw_submission, 256);183184 if (ring->adev == NULL) {185 if (adev->num_rings >= AMDGPU_MAX_RINGS)···178 ring->adev = adev;179 ring->idx = adev->num_rings++;180 adev->rings[ring->idx] = ring;181+ r = amdgpu_fence_driver_init_ring(ring, sched_hw_submission);0182 if (r)183 return r;184 }···218 return r;219 }220221+ ring->ring_size = roundup_pow_of_two(max_dw * 4 * sched_hw_submission);0222223 ring->buf_mask = (ring->ring_size / 4) - 1;224 ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
+44-34
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
···761 sg_free_table(ttm->sg);762}763764-static int amdgpu_ttm_do_bind(struct ttm_tt *ttm, struct ttm_mem_reg *mem)765-{766- struct amdgpu_ttm_tt *gtt = (void *)ttm;767- uint64_t flags;768- int r;769-770- spin_lock(>t->adev->gtt_list_lock);771- flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, mem);772- gtt->offset = (u64)mem->start << PAGE_SHIFT;773- r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,774- ttm->pages, gtt->ttm.dma_address, flags);775-776- if (r) {777- DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",778- ttm->num_pages, gtt->offset);779- goto error_gart_bind;780- }781-782- list_add_tail(>t->list, >t->adev->gtt_list);783-error_gart_bind:784- spin_unlock(>t->adev->gtt_list_lock);785- return r;786-787-}788-789static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,790 struct ttm_mem_reg *bo_mem)791{792 struct amdgpu_ttm_tt *gtt = (void*)ttm;0793 int r = 0;794795 if (gtt->userptr) {···785 bo_mem->mem_type == AMDGPU_PL_OA)786 return -EINVAL;787788- if (amdgpu_gtt_mgr_is_allocated(bo_mem))789- r = amdgpu_ttm_do_bind(ttm, bo_mem);790000000000000000791 return r;792}793···815816int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem)817{0818 struct ttm_tt *ttm = bo->ttm;0000819 int r;820821 if (!ttm || amdgpu_ttm_is_bound(ttm))822 return 0;823824- r = amdgpu_gtt_mgr_alloc(&bo->bdev->man[TTM_PL_TT], bo,825- NULL, bo_mem);826- if (r) {827- DRM_ERROR("Failed to allocate GTT address space (%d)\n", r);828- return r;829- }000830831- return amdgpu_ttm_do_bind(ttm, bo_mem);00000000000832}833834int amdgpu_ttm_recover_gart(struct amdgpu_device *adev)
···332 adev->mc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;333 adev->mc.visible_vram_size = adev->mc.aper_size;334335+ /* set the gart size */336+ if (amdgpu_gart_size == -1) {337+ switch (adev->asic_type) {338+ case CHIP_HAINAN: /* no MM engines */339+ default:340+ adev->mc.gart_size = 256ULL << 20;341+ break;342+ case CHIP_VERDE: /* UVD, VCE do not support GPUVM */343+ case CHIP_TAHITI: /* UVD, VCE do not support GPUVM */344+ case CHIP_PITCAIRN: /* UVD, VCE do not support GPUVM */345+ case CHIP_OLAND: /* UVD, VCE do not support GPUVM */346+ adev->mc.gart_size = 1024ULL << 20;347+ break;348+ }349+ } else {350+ adev->mc.gart_size = (u64)amdgpu_gart_size << 20;351+ }352+353 gmc_v6_0_vram_gtt_location(adev, &adev->mc);354355 return 0;
···386 if (adev->mc.visible_vram_size > adev->mc.real_vram_size)387 adev->mc.visible_vram_size = adev->mc.real_vram_size;388389+ /* set the gart size */390+ if (amdgpu_gart_size == -1) {391+ switch (adev->asic_type) {392+ case CHIP_TOPAZ: /* no MM engines */393+ default:394+ adev->mc.gart_size = 256ULL << 20;395+ break;396+#ifdef CONFIG_DRM_AMDGPU_CIK397+ case CHIP_BONAIRE: /* UVD, VCE do not support GPUVM */398+ case CHIP_HAWAII: /* UVD, VCE do not support GPUVM */399+ case CHIP_KAVERI: /* UVD, VCE do not support GPUVM */400+ case CHIP_KABINI: /* UVD, VCE do not support GPUVM */401+ case CHIP_MULLINS: /* UVD, VCE do not support GPUVM */402+ adev->mc.gart_size = 1024ULL << 20;403+ break;404+#endif405+ }406+ } else {407+ adev->mc.gart_size = (u64)amdgpu_gart_size << 20;408+ }409+410 gmc_v7_0_vram_gtt_location(adev, &adev->mc);411412 return 0;
···562 if (adev->mc.visible_vram_size > adev->mc.real_vram_size)563 adev->mc.visible_vram_size = adev->mc.real_vram_size;564565+ /* set the gart size */566+ if (amdgpu_gart_size == -1) {567+ switch (adev->asic_type) {568+ case CHIP_POLARIS11: /* all engines support GPUVM */569+ case CHIP_POLARIS10: /* all engines support GPUVM */570+ case CHIP_POLARIS12: /* all engines support GPUVM */571+ default:572+ adev->mc.gart_size = 256ULL << 20;573+ break;574+ case CHIP_TONGA: /* UVD, VCE do not support GPUVM */575+ case CHIP_FIJI: /* UVD, VCE do not support GPUVM */576+ case CHIP_CARRIZO: /* UVD, VCE do not support GPUVM, DCE SG support */577+ case CHIP_STONEY: /* UVD does not support GPUVM, DCE SG support */578+ adev->mc.gart_size = 1024ULL << 20;579+ break;580+ }581+ } else {582+ adev->mc.gart_size = (u64)amdgpu_gart_size << 20;583+ }584+585 gmc_v8_0_vram_gtt_location(adev, &adev->mc);586587 return 0;
···205 struct amd_sched_entity *entity)206{207 struct amd_sched_rq *rq = entity->rq;0208209 if (!amd_sched_entity_is_initialized(sched, entity))210 return;211-212 /**213 * The client will not queue more IBs during this fini, consume existing214- * queued IBs215 */216- wait_event(sched->job_scheduled, amd_sched_entity_is_idle(entity));217-000218 amd_sched_rq_remove_entity(rq, entity);000000000000219 kfifo_free(&entity->job_queue);220}221
···205 struct amd_sched_entity *entity)206{207 struct amd_sched_rq *rq = entity->rq;208+ int r;209210 if (!amd_sched_entity_is_initialized(sched, entity))211 return;0212 /**213 * The client will not queue more IBs during this fini, consume existing214+ * queued IBs or discard them on SIGKILL215 */216+ if ((current->flags & PF_SIGNALED) && current->exit_code == SIGKILL)217+ r = -ERESTARTSYS;218+ else219+ r = wait_event_killable(sched->job_scheduled,220+ amd_sched_entity_is_idle(entity));221 amd_sched_rq_remove_entity(rq, entity);222+ if (r) {223+ struct amd_sched_job *job;224+225+ /* Park the kernel for a moment to make sure it isn't processing226+ * our enity.227+ */228+ kthread_park(sched->thread);229+ kthread_unpark(sched->thread);230+ while (kfifo_out(&entity->job_queue, &job, sizeof(job)))231+ sched->ops->free_job(job);232+233+ }234 kfifo_free(&entity->job_queue);235}236
···469 * TODO: Explicit member copy would probably be better here.470 */4710472 INIT_LIST_HEAD(&fbo->ddestroy);473 INIT_LIST_HEAD(&fbo->lru);474 INIT_LIST_HEAD(&fbo->swap);
···469 * TODO: Explicit member copy would probably be better here.470 */471472+ atomic_inc(&bo->glob->bo_count);473 INIT_LIST_HEAD(&fbo->ddestroy);474 INIT_LIST_HEAD(&fbo->lru);475 INIT_LIST_HEAD(&fbo->swap);