Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: allocate entities on demand

Currently we pre-allocate entities and fences for all the HW IPs on
context creation and some of which are might never be used.

This patch tries to resolve entity/fences wastage by creating entity
only when needed.

v2: allocate memory for entity and fences together

Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Nirmoy Das and committed by
Alex Deucher
977f7e10 18c6b74e

+124 -117
+120 -115
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
··· 42 42 [AMDGPU_HW_IP_VCN_JPEG] = 1, 43 43 }; 44 44 45 - static int amdgpu_ctx_total_num_entities(void) 46 - { 47 - unsigned i, num_entities = 0; 48 - 49 - for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) 50 - num_entities += amdgpu_ctx_num_entities[i]; 51 - 52 - return num_entities; 53 - } 54 - 55 45 static int amdgpu_ctx_priority_permit(struct drm_file *filp, 56 46 enum drm_sched_priority priority) 57 47 { 48 + if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX) 49 + return -EINVAL; 50 + 58 51 /* NORMAL and below are accessible by everyone */ 59 52 if (priority <= DRM_SCHED_PRIORITY_NORMAL) 60 53 return 0; ··· 61 68 return -EACCES; 62 69 } 63 70 64 - static int amdgpu_ctx_init(struct amdgpu_device *adev, 65 - enum drm_sched_priority priority, 66 - struct drm_file *filp, 67 - struct amdgpu_ctx *ctx) 71 + static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip, const u32 ring) 68 72 { 69 - unsigned num_entities = amdgpu_ctx_total_num_entities(); 70 - unsigned i, j; 73 + struct amdgpu_device *adev = ctx->adev; 74 + struct amdgpu_ctx_entity *entity; 75 + struct drm_gpu_scheduler **scheds = NULL, *sched = NULL; 76 + unsigned num_scheds = 0; 77 + enum drm_sched_priority priority; 71 78 int r; 72 79 73 - if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX) 74 - return -EINVAL; 80 + entity = kcalloc(1, offsetof(typeof(*entity), fences[amdgpu_sched_jobs]), 81 + GFP_KERNEL); 82 + if (!entity) 83 + return -ENOMEM; 75 84 76 - r = amdgpu_ctx_priority_permit(filp, priority); 77 - if (r) 78 - return r; 79 - 80 - memset(ctx, 0, sizeof(*ctx)); 81 - ctx->adev = adev; 82 - 83 - 84 - ctx->entities[0] = kcalloc(num_entities, 85 - sizeof(struct amdgpu_ctx_entity), 86 - GFP_KERNEL); 87 - if (!ctx->entities[0]) 88 - return -ENOMEM; 89 - 90 - 91 - for (i = 0; i < num_entities; ++i) { 92 - struct amdgpu_ctx_entity *entity = &ctx->entities[0][i]; 93 - 94 - entity->sequence = 1; 95 - entity->fences = kcalloc(amdgpu_sched_jobs, 96 - sizeof(struct dma_fence*), GFP_KERNEL); 97 - if (!entity->fences) { 98 - r = -ENOMEM; 99 - goto error_cleanup_memory; 100 - } 101 - } 102 - for (i = 1; i < AMDGPU_HW_IP_NUM; ++i) 103 - ctx->entities[i] = ctx->entities[i - 1] + 104 - amdgpu_ctx_num_entities[i - 1]; 105 - 106 - kref_init(&ctx->refcount); 107 - spin_lock_init(&ctx->ring_lock); 108 - mutex_init(&ctx->lock); 109 - 110 - ctx->reset_counter = atomic_read(&adev->gpu_reset_counter); 111 - ctx->reset_counter_query = ctx->reset_counter; 112 - ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter); 113 - ctx->init_priority = priority; 114 - ctx->override_priority = DRM_SCHED_PRIORITY_UNSET; 115 - 116 - for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { 117 - struct drm_gpu_scheduler **scheds; 118 - struct drm_gpu_scheduler *sched; 119 - unsigned num_scheds = 0; 120 - 121 - switch (i) { 85 + entity->sequence = 1; 86 + priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ? 87 + ctx->init_priority : ctx->override_priority; 88 + switch (hw_ip) { 122 89 case AMDGPU_HW_IP_GFX: 123 90 sched = &adev->gfx.gfx_ring[0].sched; 124 91 scheds = &sched; ··· 119 166 scheds = adev->jpeg.jpeg_sched; 120 167 num_scheds = adev->jpeg.num_jpeg_sched; 121 168 break; 122 - } 123 - 124 - for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) 125 - r = drm_sched_entity_init(&ctx->entities[i][j].entity, 126 - priority, scheds, 127 - num_scheds, &ctx->guilty); 128 - if (r) 129 - goto error_cleanup_entities; 130 169 } 170 + 171 + r = drm_sched_entity_init(&entity->entity, priority, scheds, num_scheds, 172 + &ctx->guilty); 173 + if (r) 174 + goto error_free_entity; 175 + 176 + ctx->entities[hw_ip][ring] = entity; 177 + return 0; 178 + 179 + error_free_entity: 180 + kfree(entity); 181 + 182 + return r; 183 + } 184 + 185 + static int amdgpu_ctx_init(struct amdgpu_device *adev, 186 + enum drm_sched_priority priority, 187 + struct drm_file *filp, 188 + struct amdgpu_ctx *ctx) 189 + { 190 + int r; 191 + 192 + r = amdgpu_ctx_priority_permit(filp, priority); 193 + if (r) 194 + return r; 195 + 196 + memset(ctx, 0, sizeof(*ctx)); 197 + 198 + ctx->adev = adev; 199 + 200 + kref_init(&ctx->refcount); 201 + spin_lock_init(&ctx->ring_lock); 202 + mutex_init(&ctx->lock); 203 + 204 + ctx->reset_counter = atomic_read(&adev->gpu_reset_counter); 205 + ctx->reset_counter_query = ctx->reset_counter; 206 + ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter); 207 + ctx->init_priority = priority; 208 + ctx->override_priority = DRM_SCHED_PRIORITY_UNSET; 131 209 132 210 return 0; 133 211 134 - error_cleanup_entities: 135 - for (i = 0; i < num_entities; ++i) 136 - drm_sched_entity_destroy(&ctx->entities[0][i].entity); 212 + } 137 213 138 - error_cleanup_memory: 139 - for (i = 0; i < num_entities; ++i) { 140 - struct amdgpu_ctx_entity *entity = &ctx->entities[0][i]; 214 + static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity) 215 + { 141 216 142 - kfree(entity->fences); 143 - entity->fences = NULL; 144 - } 217 + int i; 145 218 146 - kfree(ctx->entities[0]); 147 - ctx->entities[0] = NULL; 148 - return r; 219 + if (!entity) 220 + return; 221 + 222 + for (i = 0; i < amdgpu_sched_jobs; ++i) 223 + dma_fence_put(entity->fences[i]); 224 + 225 + kfree(entity); 149 226 } 150 227 151 228 static void amdgpu_ctx_fini(struct kref *ref) 152 229 { 153 230 struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount); 154 - unsigned num_entities = amdgpu_ctx_total_num_entities(); 155 231 struct amdgpu_device *adev = ctx->adev; 156 232 unsigned i, j; 157 233 158 234 if (!adev) 159 235 return; 160 236 161 - for (i = 0; i < num_entities; ++i) { 162 - struct amdgpu_ctx_entity *entity = &ctx->entities[0][i]; 163 - 164 - for (j = 0; j < amdgpu_sched_jobs; ++j) 165 - dma_fence_put(entity->fences[j]); 166 - 167 - kfree(entity->fences); 237 + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { 238 + for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) { 239 + amdgpu_ctx_fini_entity(ctx->entities[i][j]); 240 + ctx->entities[i][j] = NULL; 241 + } 168 242 } 169 243 170 - kfree(ctx->entities[0]); 171 244 mutex_destroy(&ctx->lock); 172 - 173 245 kfree(ctx); 174 246 } 175 247 176 248 int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance, 177 249 u32 ring, struct drm_sched_entity **entity) 178 250 { 251 + int r; 252 + 179 253 if (hw_ip >= AMDGPU_HW_IP_NUM) { 180 254 DRM_ERROR("unknown HW IP type: %d\n", hw_ip); 181 255 return -EINVAL; ··· 219 239 return -EINVAL; 220 240 } 221 241 222 - *entity = &ctx->entities[hw_ip][ring].entity; 242 + if (ctx->entities[hw_ip][ring] == NULL) { 243 + r = amdgpu_ctx_init_entity(ctx, hw_ip, ring); 244 + if (r) 245 + return r; 246 + } 247 + 248 + *entity = &ctx->entities[hw_ip][ring]->entity; 223 249 return 0; 224 250 } 225 251 ··· 265 279 static void amdgpu_ctx_do_release(struct kref *ref) 266 280 { 267 281 struct amdgpu_ctx *ctx; 268 - unsigned num_entities; 269 - u32 i; 282 + u32 i, j; 270 283 271 284 ctx = container_of(ref, struct amdgpu_ctx, refcount); 285 + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { 286 + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { 287 + if (!ctx->entities[i][j]) 288 + continue; 272 289 273 - num_entities = amdgpu_ctx_total_num_entities(); 274 - for (i = 0; i < num_entities; i++) 275 - drm_sched_entity_destroy(&ctx->entities[0][i].entity); 290 + drm_sched_entity_destroy(&ctx->entities[i][j]->entity); 291 + } 292 + } 276 293 277 294 amdgpu_ctx_fini(ref); 278 295 } ··· 505 516 void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, 506 517 enum drm_sched_priority priority) 507 518 { 508 - unsigned num_entities = amdgpu_ctx_total_num_entities(); 509 519 enum drm_sched_priority ctx_prio; 510 - unsigned i; 520 + unsigned i, j; 511 521 512 522 ctx->override_priority = priority; 513 523 514 524 ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ? 515 525 ctx->init_priority : ctx->override_priority; 526 + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { 527 + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { 528 + struct drm_sched_entity *entity; 516 529 517 - for (i = 0; i < num_entities; i++) { 518 - struct drm_sched_entity *entity = &ctx->entities[0][i].entity; 530 + if (!ctx->entities[i][j]) 531 + continue; 519 532 520 - drm_sched_entity_set_priority(entity, ctx_prio); 533 + entity = &ctx->entities[i][j]->entity; 534 + drm_sched_entity_set_priority(entity, ctx_prio); 535 + } 521 536 } 522 537 } 523 538 ··· 557 564 558 565 long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout) 559 566 { 560 - unsigned num_entities = amdgpu_ctx_total_num_entities(); 561 567 struct amdgpu_ctx *ctx; 562 568 struct idr *idp; 563 - uint32_t id, i; 569 + uint32_t id, i, j; 564 570 565 571 idp = &mgr->ctx_handles; 566 572 567 573 mutex_lock(&mgr->lock); 568 574 idr_for_each_entry(idp, ctx, id) { 569 - for (i = 0; i < num_entities; i++) { 570 - struct drm_sched_entity *entity; 575 + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { 576 + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { 577 + struct drm_sched_entity *entity; 571 578 572 - entity = &ctx->entities[0][i].entity; 573 - timeout = drm_sched_entity_flush(entity, timeout); 579 + if (!ctx->entities[i][j]) 580 + continue; 581 + 582 + entity = &ctx->entities[i][j]->entity; 583 + timeout = drm_sched_entity_flush(entity, timeout); 584 + } 574 585 } 575 586 } 576 587 mutex_unlock(&mgr->lock); ··· 583 586 584 587 void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) 585 588 { 586 - unsigned num_entities = amdgpu_ctx_total_num_entities(); 587 589 struct amdgpu_ctx *ctx; 588 590 struct idr *idp; 589 - uint32_t id, i; 591 + uint32_t id, i, j; 590 592 591 593 idp = &mgr->ctx_handles; 592 594 ··· 595 599 continue; 596 600 } 597 601 598 - for (i = 0; i < num_entities; i++) 599 - drm_sched_entity_fini(&ctx->entities[0][i].entity); 602 + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { 603 + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { 604 + struct drm_sched_entity *entity; 605 + 606 + if (!ctx->entities[i][j]) 607 + continue; 608 + 609 + entity = &ctx->entities[i][j]->entity; 610 + drm_sched_entity_fini(entity); 611 + } 612 + } 600 613 } 601 614 } 602 615
+4 -2
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
··· 29 29 struct drm_file; 30 30 struct amdgpu_fpriv; 31 31 32 + #define AMDGPU_MAX_ENTITY_NUM 4 33 + 32 34 struct amdgpu_ctx_entity { 33 35 uint64_t sequence; 34 - struct dma_fence **fences; 35 36 struct drm_sched_entity entity; 37 + struct dma_fence *fences[]; 36 38 }; 37 39 38 40 struct amdgpu_ctx { ··· 44 42 unsigned reset_counter_query; 45 43 uint32_t vram_lost_counter; 46 44 spinlock_t ring_lock; 47 - struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM]; 45 + struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM][AMDGPU_MAX_ENTITY_NUM]; 48 46 bool preamble_presented; 49 47 enum drm_sched_priority init_priority; 50 48 enum drm_sched_priority override_priority;