accel/amdxdna: Add command execution · tjh.dev/kernel@aac2430

+604 -3

drivers/accel/amdxdna/aie2_ctx.c

··· 8 8 #include <drm/drm_gem.h> 9 9 #include <drm/drm_gem_shmem_helper.h> 10 10 #include <drm/drm_print.h> 11 + #include <drm/drm_syncobj.h> 12 + #include <linux/hmm.h> 11 13 #include <linux/types.h> 14 + #include <trace/events/amdxdna.h> 12 15 16 + #include "aie2_msg_priv.h" 13 17 #include "aie2_pci.h" 14 18 #include "aie2_solver.h" 15 19 #include "amdxdna_ctx.h" 16 20 #include "amdxdna_gem.h" 17 21 #include "amdxdna_mailbox.h" 18 22 #include "amdxdna_pci_drv.h" 23 + 24 + bool force_cmdlist; 25 + module_param(force_cmdlist, bool, 0600); 26 + MODULE_PARM_DESC(force_cmdlist, "Force use command list (Default false)"); 27 + 28 + #define HWCTX_MAX_TIMEOUT 60000 /* milliseconds */ 29 + 30 + static void aie2_job_release(struct kref *ref) 31 + { 32 + struct amdxdna_sched_job *job; 33 + 34 + job = container_of(ref, struct amdxdna_sched_job, refcnt); 35 + amdxdna_sched_job_cleanup(job); 36 + if (job->out_fence) 37 + dma_fence_put(job->out_fence); 38 + kfree(job); 39 + } 40 + 41 + static void aie2_job_put(struct amdxdna_sched_job *job) 42 + { 43 + kref_put(&job->refcnt, aie2_job_release); 44 + } 45 + 46 + /* The bad_job is used in aie2_sched_job_timedout, otherwise, set it to NULL */ 47 + static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx, 48 + struct drm_sched_job *bad_job) 49 + { 50 + drm_sched_stop(&hwctx->priv->sched, bad_job); 51 + aie2_destroy_context(xdna->dev_handle, hwctx); 52 + } 53 + 54 + static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx) 55 + { 56 + struct amdxdna_gem_obj *heap = hwctx->priv->heap; 57 + int ret; 58 + 59 + ret = aie2_create_context(xdna->dev_handle, hwctx); 60 + if (ret) { 61 + XDNA_ERR(xdna, "Create hwctx failed, ret %d", ret); 62 + goto out; 63 + } 64 + 65 + ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id, 66 + heap->mem.userptr, heap->mem.size); 67 + if (ret) { 68 + XDNA_ERR(xdna, "Map host buf failed, ret %d", ret); 69 + goto out; 70 + } 71 + 72 + if (hwctx->status != HWCTX_STAT_READY) { 73 + XDNA_DBG(xdna, "hwctx is not ready, status %d", hwctx->status); 74 + goto out; 75 + } 76 + 77 + ret = aie2_config_cu(hwctx); 78 + if (ret) { 79 + XDNA_ERR(xdna, "Config cu failed, ret %d", ret); 80 + goto out; 81 + } 82 + 83 + out: 84 + drm_sched_start(&hwctx->priv->sched, 0); 85 + XDNA_DBG(xdna, "%s restarted, ret %d", hwctx->name, ret); 86 + return ret; 87 + } 88 + 89 + void aie2_restart_ctx(struct amdxdna_client *client) 90 + { 91 + struct amdxdna_dev *xdna = client->xdna; 92 + struct amdxdna_hwctx *hwctx; 93 + int next = 0; 94 + 95 + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); 96 + mutex_lock(&client->hwctx_lock); 97 + idr_for_each_entry_continue(&client->hwctx_idr, hwctx, next) { 98 + if (hwctx->status != HWCTX_STAT_STOP) 99 + continue; 100 + 101 + hwctx->status = hwctx->old_status; 102 + XDNA_DBG(xdna, "Resetting %s", hwctx->name); 103 + aie2_hwctx_restart(xdna, hwctx); 104 + } 105 + mutex_unlock(&client->hwctx_lock); 106 + } 107 + 108 + static struct dma_fence *aie2_cmd_get_out_fence(struct amdxdna_hwctx *hwctx, u64 seq) 109 + { 110 + struct dma_fence *fence, *out_fence = NULL; 111 + int ret; 112 + 113 + fence = drm_syncobj_fence_get(hwctx->priv->syncobj); 114 + if (!fence) 115 + return NULL; 116 + 117 + ret = dma_fence_chain_find_seqno(&fence, seq); 118 + if (ret) 119 + goto out; 120 + 121 + out_fence = dma_fence_get(dma_fence_chain_contained(fence)); 122 + 123 + out: 124 + dma_fence_put(fence); 125 + return out_fence; 126 + } 127 + 128 + static void aie2_hwctx_wait_for_idle(struct amdxdna_hwctx *hwctx) 129 + { 130 + struct dma_fence *fence; 131 + 132 + fence = aie2_cmd_get_out_fence(hwctx, hwctx->priv->seq - 1); 133 + if (!fence) 134 + return; 135 + 136 + dma_fence_wait(fence, false); 137 + dma_fence_put(fence); 138 + } 139 + 140 + static void 141 + aie2_sched_notify(struct amdxdna_sched_job *job) 142 + { 143 + struct dma_fence *fence = job->fence; 144 + 145 + trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq); 146 + job->hwctx->priv->completed++; 147 + dma_fence_signal(fence); 148 + 149 + up(&job->hwctx->priv->job_sem); 150 + job->job_done = true; 151 + dma_fence_put(fence); 152 + mmput(job->mm); 153 + aie2_job_put(job); 154 + } 155 + 156 + static int 157 + aie2_sched_resp_handler(void *handle, const u32 *data, size_t size) 158 + { 159 + struct amdxdna_sched_job *job = handle; 160 + struct amdxdna_gem_obj *cmd_abo; 161 + u32 ret = 0; 162 + u32 status; 163 + 164 + cmd_abo = job->cmd_bo; 165 + 166 + if (unlikely(!data)) 167 + goto out; 168 + 169 + if (unlikely(size != sizeof(u32))) { 170 + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT); 171 + ret = -EINVAL; 172 + goto out; 173 + } 174 + 175 + status = *data; 176 + XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status); 177 + if (status == AIE2_STATUS_SUCCESS) 178 + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED); 179 + else 180 + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ERROR); 181 + 182 + out: 183 + aie2_sched_notify(job); 184 + return ret; 185 + } 186 + 187 + static int 188 + aie2_sched_nocmd_resp_handler(void *handle, const u32 *data, size_t size) 189 + { 190 + struct amdxdna_sched_job *job = handle; 191 + u32 ret = 0; 192 + u32 status; 193 + 194 + if (unlikely(!data)) 195 + goto out; 196 + 197 + if (unlikely(size != sizeof(u32))) { 198 + ret = -EINVAL; 199 + goto out; 200 + } 201 + 202 + status = *data; 203 + XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status); 204 + 205 + out: 206 + aie2_sched_notify(job); 207 + return ret; 208 + } 209 + 210 + static int 211 + aie2_sched_cmdlist_resp_handler(void *handle, const u32 *data, size_t size) 212 + { 213 + struct amdxdna_sched_job *job = handle; 214 + struct amdxdna_gem_obj *cmd_abo; 215 + struct cmd_chain_resp *resp; 216 + struct amdxdna_dev *xdna; 217 + u32 fail_cmd_status; 218 + u32 fail_cmd_idx; 219 + u32 ret = 0; 220 + 221 + cmd_abo = job->cmd_bo; 222 + if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) { 223 + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT); 224 + ret = -EINVAL; 225 + goto out; 226 + } 227 + 228 + resp = (struct cmd_chain_resp *)data; 229 + xdna = job->hwctx->client->xdna; 230 + XDNA_DBG(xdna, "Status 0x%x", resp->status); 231 + if (resp->status == AIE2_STATUS_SUCCESS) { 232 + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED); 233 + goto out; 234 + } 235 + 236 + /* Slow path to handle error, read from ringbuf on BAR */ 237 + fail_cmd_idx = resp->fail_cmd_idx; 238 + fail_cmd_status = resp->fail_cmd_status; 239 + XDNA_DBG(xdna, "Failed cmd idx %d, status 0x%x", 240 + fail_cmd_idx, fail_cmd_status); 241 + 242 + if (fail_cmd_status == AIE2_STATUS_SUCCESS) { 243 + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT); 244 + ret = -EINVAL; 245 + goto out; 246 + } 247 + amdxdna_cmd_set_state(cmd_abo, fail_cmd_status); 248 + 249 + if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN) { 250 + struct amdxdna_cmd_chain *cc = amdxdna_cmd_get_payload(cmd_abo, NULL); 251 + 252 + cc->error_index = fail_cmd_idx; 253 + if (cc->error_index >= cc->command_count) 254 + cc->error_index = 0; 255 + } 256 + out: 257 + aie2_sched_notify(job); 258 + return ret; 259 + } 260 + 261 + static struct dma_fence * 262 + aie2_sched_job_run(struct drm_sched_job *sched_job) 263 + { 264 + struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job); 265 + struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; 266 + struct amdxdna_hwctx *hwctx = job->hwctx; 267 + struct dma_fence *fence; 268 + int ret; 269 + 270 + if (!mmget_not_zero(job->mm)) 271 + return ERR_PTR(-ESRCH); 272 + 273 + kref_get(&job->refcnt); 274 + fence = dma_fence_get(job->fence); 275 + 276 + if (unlikely(!cmd_abo)) { 277 + ret = aie2_sync_bo(hwctx, job, aie2_sched_nocmd_resp_handler); 278 + goto out; 279 + } 280 + 281 + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_NEW); 282 + 283 + if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN) 284 + ret = aie2_cmdlist_multi_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler); 285 + else if (force_cmdlist) 286 + ret = aie2_cmdlist_single_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler); 287 + else 288 + ret = aie2_execbuf(hwctx, job, aie2_sched_resp_handler); 289 + 290 + out: 291 + if (ret) { 292 + dma_fence_put(job->fence); 293 + aie2_job_put(job); 294 + mmput(job->mm); 295 + fence = ERR_PTR(ret); 296 + } 297 + trace_xdna_job(sched_job, hwctx->name, "sent to device", job->seq); 298 + 299 + return fence; 300 + } 301 + 302 + static void aie2_sched_job_free(struct drm_sched_job *sched_job) 303 + { 304 + struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job); 305 + struct amdxdna_hwctx *hwctx = job->hwctx; 306 + 307 + trace_xdna_job(sched_job, hwctx->name, "job free", job->seq); 308 + if (!job->job_done) 309 + up(&hwctx->priv->job_sem); 310 + 311 + drm_sched_job_cleanup(sched_job); 312 + aie2_job_put(job); 313 + } 314 + 315 + static enum drm_gpu_sched_stat 316 + aie2_sched_job_timedout(struct drm_sched_job *sched_job) 317 + { 318 + struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job); 319 + struct amdxdna_hwctx *hwctx = job->hwctx; 320 + struct amdxdna_dev *xdna; 321 + 322 + xdna = hwctx->client->xdna; 323 + trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq); 324 + mutex_lock(&xdna->dev_lock); 325 + aie2_hwctx_stop(xdna, hwctx, sched_job); 326 + 327 + aie2_hwctx_restart(xdna, hwctx); 328 + mutex_unlock(&xdna->dev_lock); 329 + 330 + return DRM_GPU_SCHED_STAT_NOMINAL; 331 + } 332 + 333 + const struct drm_sched_backend_ops sched_ops = { 334 + .run_job = aie2_sched_job_run, 335 + .free_job = aie2_sched_job_free, 336 + .timedout_job = aie2_sched_job_timedout, 337 + }; 19 338 20 339 static int aie2_hwctx_col_list(struct amdxdna_hwctx *hwctx) 21 340 { ··· 445 126 XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret); 446 127 } 447 128 129 + static int aie2_ctx_syncobj_create(struct amdxdna_hwctx *hwctx) 130 + { 131 + struct amdxdna_dev *xdna = hwctx->client->xdna; 132 + struct drm_file *filp = hwctx->client->filp; 133 + struct drm_syncobj *syncobj; 134 + u32 hdl; 135 + int ret; 136 + 137 + hwctx->syncobj_hdl = AMDXDNA_INVALID_FENCE_HANDLE; 138 + 139 + ret = drm_syncobj_create(&syncobj, 0, NULL); 140 + if (ret) { 141 + XDNA_ERR(xdna, "Create ctx syncobj failed, ret %d", ret); 142 + return ret; 143 + } 144 + ret = drm_syncobj_get_handle(filp, syncobj, &hdl); 145 + if (ret) { 146 + drm_syncobj_put(syncobj); 147 + XDNA_ERR(xdna, "Create ctx syncobj handle failed, ret %d", ret); 148 + return ret; 149 + } 150 + hwctx->priv->syncobj = syncobj; 151 + hwctx->syncobj_hdl = hdl; 152 + 153 + return 0; 154 + } 155 + 156 + static void aie2_ctx_syncobj_destroy(struct amdxdna_hwctx *hwctx) 157 + { 158 + /* 159 + * The syncobj_hdl is owned by user space and will be cleaned up 160 + * separately. 161 + */ 162 + drm_syncobj_put(hwctx->priv->syncobj); 163 + } 164 + 448 165 int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) 449 166 { 450 167 struct amdxdna_client *client = hwctx->client; 451 168 struct amdxdna_dev *xdna = client->xdna; 169 + struct drm_gpu_scheduler *sched; 452 170 struct amdxdna_hwctx_priv *priv; 453 171 struct amdxdna_gem_obj *heap; 454 - int ret; 172 + int i, ret; 455 173 456 174 priv = kzalloc(sizeof(*hwctx->priv), GFP_KERNEL); 457 175 if (!priv) ··· 506 150 drm_gem_object_get(to_gobj(heap)); 507 151 mutex_unlock(&client->mm_lock); 508 152 priv->heap = heap; 153 + sema_init(&priv->job_sem, HWCTX_MAX_CMDS); 509 154 510 155 ret = amdxdna_gem_pin(heap); 511 156 if (ret) { ··· 514 157 goto put_heap; 515 158 } 516 159 160 + for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) { 161 + struct amdxdna_gem_obj *abo; 162 + struct amdxdna_drm_create_bo args = { 163 + .flags = 0, 164 + .type = AMDXDNA_BO_DEV, 165 + .vaddr = 0, 166 + .size = MAX_CHAIN_CMDBUF_SIZE, 167 + }; 168 + 169 + abo = amdxdna_drm_alloc_dev_bo(&xdna->ddev, &args, client->filp, true); 170 + if (IS_ERR(abo)) { 171 + ret = PTR_ERR(abo); 172 + goto free_cmd_bufs; 173 + } 174 + 175 + XDNA_DBG(xdna, "Command buf %d addr 0x%llx size 0x%lx", 176 + i, abo->mem.dev_addr, abo->mem.size); 177 + priv->cmd_buf[i] = abo; 178 + } 179 + 180 + sched = &priv->sched; 181 + mutex_init(&priv->io_lock); 182 + 183 + fs_reclaim_acquire(GFP_KERNEL); 184 + might_lock(&priv->io_lock); 185 + fs_reclaim_release(GFP_KERNEL); 186 + 187 + ret = drm_sched_init(sched, &sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT, 188 + HWCTX_MAX_CMDS, 0, msecs_to_jiffies(HWCTX_MAX_TIMEOUT), 189 + NULL, NULL, hwctx->name, xdna->ddev.dev); 190 + if (ret) { 191 + XDNA_ERR(xdna, "Failed to init DRM scheduler. ret %d", ret); 192 + goto free_cmd_bufs; 193 + } 194 + 195 + ret = drm_sched_entity_init(&priv->entity, DRM_SCHED_PRIORITY_NORMAL, 196 + &sched, 1, NULL); 197 + if (ret) { 198 + XDNA_ERR(xdna, "Failed to initial sched entiry. ret %d", ret); 199 + goto free_sched; 200 + } 201 + 517 202 ret = aie2_hwctx_col_list(hwctx); 518 203 if (ret) { 519 204 XDNA_ERR(xdna, "Create col list failed, ret %d", ret); 520 - goto unpin; 205 + goto free_entity; 521 206 } 522 207 523 208 ret = aie2_alloc_resource(hwctx); ··· 574 175 XDNA_ERR(xdna, "Map host buffer failed, ret %d", ret); 575 176 goto release_resource; 576 177 } 178 + 179 + ret = aie2_ctx_syncobj_create(hwctx); 180 + if (ret) { 181 + XDNA_ERR(xdna, "Create syncobj failed, ret %d", ret); 182 + goto release_resource; 183 + } 184 + 577 185 hwctx->status = HWCTX_STAT_INIT; 578 186 579 187 XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name); ··· 591 185 aie2_release_resource(hwctx); 592 186 free_col_list: 593 187 kfree(hwctx->col_list); 594 - unpin: 188 + free_entity: 189 + drm_sched_entity_destroy(&priv->entity); 190 + free_sched: 191 + drm_sched_fini(&priv->sched); 192 + free_cmd_bufs: 193 + for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) { 194 + if (!priv->cmd_buf[i]) 195 + continue; 196 + drm_gem_object_put(to_gobj(priv->cmd_buf[i])); 197 + } 595 198 amdxdna_gem_unpin(heap); 596 199 put_heap: 597 200 drm_gem_object_put(to_gobj(heap)); ··· 611 196 612 197 void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx) 613 198 { 199 + struct amdxdna_dev *xdna; 200 + int idx; 201 + 202 + xdna = hwctx->client->xdna; 203 + drm_sched_wqueue_stop(&hwctx->priv->sched); 204 + 205 + /* Now, scheduler will not send command to device. */ 614 206 aie2_release_resource(hwctx); 615 207 208 + /* 209 + * All submitted commands are aborted. 210 + * Restart scheduler queues to cleanup jobs. The amdxdna_sched_job_run() 211 + * will return NODEV if it is called. 212 + */ 213 + drm_sched_wqueue_start(&hwctx->priv->sched); 214 + 215 + aie2_hwctx_wait_for_idle(hwctx); 216 + drm_sched_entity_destroy(&hwctx->priv->entity); 217 + drm_sched_fini(&hwctx->priv->sched); 218 + aie2_ctx_syncobj_destroy(hwctx); 219 + 220 + XDNA_DBG(xdna, "%s sequence number %lld", hwctx->name, hwctx->priv->seq); 221 + 222 + for (idx = 0; idx < ARRAY_SIZE(hwctx->priv->cmd_buf); idx++) 223 + drm_gem_object_put(to_gobj(hwctx->priv->cmd_buf[idx])); 616 224 amdxdna_gem_unpin(hwctx->priv->heap); 617 225 drm_gem_object_put(to_gobj(hwctx->priv->heap)); 618 226 227 + mutex_destroy(&hwctx->priv->io_lock); 619 228 kfree(hwctx->col_list); 620 229 kfree(hwctx->priv); 621 230 kfree(hwctx->cus); ··· 705 266 XDNA_DBG(xdna, "Not supported type %d", type); 706 267 return -EOPNOTSUPP; 707 268 } 269 + } 270 + 271 + static int aie2_populate_range(struct amdxdna_gem_obj *abo) 272 + { 273 + struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); 274 + struct mm_struct *mm = abo->mem.notifier.mm; 275 + struct hmm_range range = { 0 }; 276 + unsigned long timeout; 277 + int ret; 278 + 279 + XDNA_INFO_ONCE(xdna, "populate memory range %llx size %lx", 280 + abo->mem.userptr, abo->mem.size); 281 + range.notifier = &abo->mem.notifier; 282 + range.start = abo->mem.userptr; 283 + range.end = abo->mem.userptr + abo->mem.size; 284 + range.hmm_pfns = abo->mem.pfns; 285 + range.default_flags = HMM_PFN_REQ_FAULT; 286 + 287 + if (!mmget_not_zero(mm)) 288 + return -EFAULT; 289 + 290 + timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 291 + again: 292 + range.notifier_seq = mmu_interval_read_begin(&abo->mem.notifier); 293 + mmap_read_lock(mm); 294 + ret = hmm_range_fault(&range); 295 + mmap_read_unlock(mm); 296 + if (ret) { 297 + if (time_after(jiffies, timeout)) { 298 + ret = -ETIME; 299 + goto put_mm; 300 + } 301 + 302 + if (ret == -EBUSY) 303 + goto again; 304 + 305 + goto put_mm; 306 + } 307 + 308 + down_read(&xdna->notifier_lock); 309 + if (mmu_interval_read_retry(&abo->mem.notifier, range.notifier_seq)) { 310 + up_read(&xdna->notifier_lock); 311 + goto again; 312 + } 313 + abo->mem.map_invalid = false; 314 + up_read(&xdna->notifier_lock); 315 + 316 + put_mm: 317 + mmput(mm); 318 + return ret; 319 + } 320 + 321 + int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq) 322 + { 323 + struct amdxdna_dev *xdna = hwctx->client->xdna; 324 + struct ww_acquire_ctx acquire_ctx; 325 + struct dma_fence_chain *chain; 326 + struct amdxdna_gem_obj *abo; 327 + unsigned long timeout = 0; 328 + int ret, i; 329 + 330 + ret = down_interruptible(&hwctx->priv->job_sem); 331 + if (ret) { 332 + XDNA_ERR(xdna, "Grab job sem failed, ret %d", ret); 333 + return ret; 334 + } 335 + 336 + chain = dma_fence_chain_alloc(); 337 + if (!chain) { 338 + XDNA_ERR(xdna, "Alloc fence chain failed"); 339 + ret = -ENOMEM; 340 + goto up_sem; 341 + } 342 + 343 + ret = drm_sched_job_init(&job->base, &hwctx->priv->entity, 1, hwctx); 344 + if (ret) { 345 + XDNA_ERR(xdna, "DRM job init failed, ret %d", ret); 346 + goto free_chain; 347 + } 348 + 349 + retry: 350 + ret = drm_gem_lock_reservations(job->bos, job->bo_cnt, &acquire_ctx); 351 + if (ret) { 352 + XDNA_WARN(xdna, "Failed to lock BOs, ret %d", ret); 353 + goto cleanup_job; 354 + } 355 + 356 + for (i = 0; i < job->bo_cnt; i++) { 357 + ret = dma_resv_reserve_fences(job->bos[i]->resv, 1); 358 + if (ret) { 359 + XDNA_WARN(xdna, "Failed to reserve fences %d", ret); 360 + drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx); 361 + goto cleanup_job; 362 + } 363 + } 364 + 365 + down_read(&xdna->notifier_lock); 366 + for (i = 0; i < job->bo_cnt; i++) { 367 + abo = to_xdna_obj(job->bos[i]); 368 + if (abo->mem.map_invalid) { 369 + up_read(&xdna->notifier_lock); 370 + drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx); 371 + if (!timeout) { 372 + timeout = jiffies + 373 + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 374 + } else if (time_after(jiffies, timeout)) { 375 + ret = -ETIME; 376 + goto cleanup_job; 377 + } 378 + 379 + ret = aie2_populate_range(abo); 380 + if (ret) 381 + goto cleanup_job; 382 + goto retry; 383 + } 384 + } 385 + 386 + mutex_lock(&hwctx->priv->io_lock); 387 + drm_sched_job_arm(&job->base); 388 + job->out_fence = dma_fence_get(&job->base.s_fence->finished); 389 + for (i = 0; i < job->bo_cnt; i++) 390 + dma_resv_add_fence(job->bos[i]->resv, job->out_fence, DMA_RESV_USAGE_WRITE); 391 + job->seq = hwctx->priv->seq++; 392 + kref_get(&job->refcnt); 393 + drm_sched_entity_push_job(&job->base); 394 + 395 + *seq = job->seq; 396 + drm_syncobj_add_point(hwctx->priv->syncobj, chain, job->out_fence, *seq); 397 + mutex_unlock(&hwctx->priv->io_lock); 398 + 399 + up_read(&xdna->notifier_lock); 400 + drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx); 401 + 402 + aie2_job_put(job); 403 + 404 + return 0; 405 + 406 + cleanup_job: 407 + drm_sched_job_cleanup(&job->base); 408 + free_chain: 409 + dma_fence_chain_free(chain); 410 + up_sem: 411 + up(&hwctx->priv->job_sem); 412 + job->job_done = true; 413 + return ret; 414 + } 415 + 416 + void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, 417 + unsigned long cur_seq) 418 + { 419 + struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); 420 + struct drm_gem_object *gobj = to_gobj(abo); 421 + long ret; 422 + 423 + down_write(&xdna->notifier_lock); 424 + abo->mem.map_invalid = true; 425 + mmu_interval_set_seq(&abo->mem.notifier, cur_seq); 426 + up_write(&xdna->notifier_lock); 427 + ret = dma_resv_wait_timeout(gobj->resv, DMA_RESV_USAGE_BOOKKEEP, 428 + true, MAX_SCHEDULE_TIMEOUT); 429 + if (!ret || ret == -ERESTARTSYS) 430 + XDNA_ERR(xdna, "Failed to wait for bo, ret %ld", ret); 708 431 }

+343

drivers/accel/amdxdna/aie2_message.c

··· 4 4 */ 5 5 6 6 #include <drm/amdxdna_accel.h> 7 + #include <drm/drm_cache.h> 7 8 #include <drm/drm_device.h> 8 9 #include <drm/drm_gem.h> 9 10 #include <drm/drm_gem_shmem_helper.h> 10 11 #include <drm/drm_print.h> 12 + #include <drm/gpu_scheduler.h> 11 13 #include <linux/bitfield.h> 12 14 #include <linux/errno.h> 13 15 #include <linux/pci.h> ··· 363 361 XDNA_ERR(xdna, "Command opcode 0x%x failed, status 0x%x ret %d", 364 362 msg.opcode, resp.status, ret); 365 363 return ret; 364 + } 365 + 366 + int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, 367 + int (*notify_cb)(void *, const u32 *, size_t)) 368 + { 369 + struct mailbox_channel *chann = hwctx->priv->mbox_chann; 370 + struct amdxdna_dev *xdna = hwctx->client->xdna; 371 + struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; 372 + union { 373 + struct execute_buffer_req ebuf; 374 + struct exec_dpu_req dpu; 375 + } req; 376 + struct xdna_mailbox_msg msg; 377 + u32 payload_len; 378 + void *payload; 379 + int cu_idx; 380 + int ret; 381 + u32 op; 382 + 383 + if (!chann) 384 + return -ENODEV; 385 + 386 + payload = amdxdna_cmd_get_payload(cmd_abo, &payload_len); 387 + if (!payload) { 388 + XDNA_ERR(xdna, "Invalid command, cannot get payload"); 389 + return -EINVAL; 390 + } 391 + 392 + cu_idx = amdxdna_cmd_get_cu_idx(cmd_abo); 393 + if (cu_idx < 0) { 394 + XDNA_DBG(xdna, "Invalid cu idx"); 395 + return -EINVAL; 396 + } 397 + 398 + op = amdxdna_cmd_get_op(cmd_abo); 399 + switch (op) { 400 + case ERT_START_CU: 401 + if (unlikely(payload_len > sizeof(req.ebuf.payload))) 402 + XDNA_DBG(xdna, "Invalid ebuf payload len: %d", payload_len); 403 + req.ebuf.cu_idx = cu_idx; 404 + memcpy(req.ebuf.payload, payload, sizeof(req.ebuf.payload)); 405 + msg.send_size = sizeof(req.ebuf); 406 + msg.opcode = MSG_OP_EXECUTE_BUFFER_CF; 407 + break; 408 + case ERT_START_NPU: { 409 + struct amdxdna_cmd_start_npu *sn = payload; 410 + 411 + if (unlikely(payload_len - sizeof(*sn) > sizeof(req.dpu.payload))) 412 + XDNA_DBG(xdna, "Invalid dpu payload len: %d", payload_len); 413 + req.dpu.inst_buf_addr = sn->buffer; 414 + req.dpu.inst_size = sn->buffer_size; 415 + req.dpu.inst_prop_cnt = sn->prop_count; 416 + req.dpu.cu_idx = cu_idx; 417 + memcpy(req.dpu.payload, sn->prop_args, sizeof(req.dpu.payload)); 418 + msg.send_size = sizeof(req.dpu); 419 + msg.opcode = MSG_OP_EXEC_DPU; 420 + break; 421 + } 422 + default: 423 + XDNA_DBG(xdna, "Invalid ERT cmd op code: %d", op); 424 + return -EINVAL; 425 + } 426 + msg.handle = job; 427 + msg.notify_cb = notify_cb; 428 + msg.send_data = (u8 *)&req; 429 + print_hex_dump_debug("cmd: ", DUMP_PREFIX_OFFSET, 16, 4, &req, 430 + 0x40, false); 431 + 432 + ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); 433 + if (ret) { 434 + XDNA_ERR(xdna, "Send message failed"); 435 + return ret; 436 + } 437 + 438 + return 0; 439 + } 440 + 441 + static int 442 + aie2_cmdlist_fill_one_slot_cf(void *cmd_buf, u32 offset, 443 + struct amdxdna_gem_obj *abo, u32 *size) 444 + { 445 + struct cmd_chain_slot_execbuf_cf *buf = cmd_buf + offset; 446 + int cu_idx = amdxdna_cmd_get_cu_idx(abo); 447 + u32 payload_len; 448 + void *payload; 449 + 450 + if (cu_idx < 0) 451 + return -EINVAL; 452 + 453 + payload = amdxdna_cmd_get_payload(abo, &payload_len); 454 + if (!payload) 455 + return -EINVAL; 456 + 457 + if (!slot_cf_has_space(offset, payload_len)) 458 + return -ENOSPC; 459 + 460 + buf->cu_idx = cu_idx; 461 + buf->arg_cnt = payload_len / sizeof(u32); 462 + memcpy(buf->args, payload, payload_len); 463 + /* Accurate buf size to hint firmware to do necessary copy */ 464 + *size = sizeof(*buf) + payload_len; 465 + return 0; 466 + } 467 + 468 + static int 469 + aie2_cmdlist_fill_one_slot_dpu(void *cmd_buf, u32 offset, 470 + struct amdxdna_gem_obj *abo, u32 *size) 471 + { 472 + struct cmd_chain_slot_dpu *buf = cmd_buf + offset; 473 + int cu_idx = amdxdna_cmd_get_cu_idx(abo); 474 + struct amdxdna_cmd_start_npu *sn; 475 + u32 payload_len; 476 + void *payload; 477 + u32 arg_sz; 478 + 479 + if (cu_idx < 0) 480 + return -EINVAL; 481 + 482 + payload = amdxdna_cmd_get_payload(abo, &payload_len); 483 + if (!payload) 484 + return -EINVAL; 485 + sn = payload; 486 + arg_sz = payload_len - sizeof(*sn); 487 + if (payload_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE) 488 + return -EINVAL; 489 + 490 + if (!slot_dpu_has_space(offset, arg_sz)) 491 + return -ENOSPC; 492 + 493 + buf->inst_buf_addr = sn->buffer; 494 + buf->inst_size = sn->buffer_size; 495 + buf->inst_prop_cnt = sn->prop_count; 496 + buf->cu_idx = cu_idx; 497 + buf->arg_cnt = arg_sz / sizeof(u32); 498 + memcpy(buf->args, sn->prop_args, arg_sz); 499 + 500 + /* Accurate buf size to hint firmware to do necessary copy */ 501 + *size += sizeof(*buf) + arg_sz; 502 + return 0; 503 + } 504 + 505 + static int 506 + aie2_cmdlist_fill_one_slot(u32 op, struct amdxdna_gem_obj *cmdbuf_abo, u32 offset, 507 + struct amdxdna_gem_obj *abo, u32 *size) 508 + { 509 + u32 this_op = amdxdna_cmd_get_op(abo); 510 + void *cmd_buf = cmdbuf_abo->mem.kva; 511 + int ret; 512 + 513 + if (this_op != op) { 514 + ret = -EINVAL; 515 + goto done; 516 + } 517 + 518 + switch (op) { 519 + case ERT_START_CU: 520 + ret = aie2_cmdlist_fill_one_slot_cf(cmd_buf, offset, abo, size); 521 + break; 522 + case ERT_START_NPU: 523 + ret = aie2_cmdlist_fill_one_slot_dpu(cmd_buf, offset, abo, size); 524 + break; 525 + default: 526 + ret = -EOPNOTSUPP; 527 + } 528 + 529 + done: 530 + if (ret) { 531 + XDNA_ERR(abo->client->xdna, "Can't fill slot for cmd op %d ret %d", 532 + op, ret); 533 + } 534 + return ret; 535 + } 536 + 537 + static inline struct amdxdna_gem_obj * 538 + aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job) 539 + { 540 + int idx = get_job_idx(job->seq); 541 + 542 + return job->hwctx->priv->cmd_buf[idx]; 543 + } 544 + 545 + static void 546 + aie2_cmdlist_prepare_request(struct cmd_chain_req *req, 547 + struct amdxdna_gem_obj *cmdbuf_abo, u32 size, u32 cnt) 548 + { 549 + req->buf_addr = cmdbuf_abo->mem.dev_addr; 550 + req->buf_size = size; 551 + req->count = cnt; 552 + drm_clflush_virt_range(cmdbuf_abo->mem.kva, size); 553 + XDNA_DBG(cmdbuf_abo->client->xdna, "Command buf addr 0x%llx size 0x%x count %d", 554 + req->buf_addr, size, cnt); 555 + } 556 + 557 + static inline u32 558 + aie2_cmd_op_to_msg_op(u32 op) 559 + { 560 + switch (op) { 561 + case ERT_START_CU: 562 + return MSG_OP_CHAIN_EXEC_BUFFER_CF; 563 + case ERT_START_NPU: 564 + return MSG_OP_CHAIN_EXEC_DPU; 565 + default: 566 + return MSG_OP_MAX_OPCODE; 567 + } 568 + } 569 + 570 + int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx, 571 + struct amdxdna_sched_job *job, 572 + int (*notify_cb)(void *, const u32 *, size_t)) 573 + { 574 + struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job); 575 + struct mailbox_channel *chann = hwctx->priv->mbox_chann; 576 + struct amdxdna_client *client = hwctx->client; 577 + struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; 578 + struct amdxdna_cmd_chain *payload; 579 + struct xdna_mailbox_msg msg; 580 + struct cmd_chain_req req; 581 + u32 payload_len; 582 + u32 offset = 0; 583 + u32 size; 584 + int ret; 585 + u32 op; 586 + u32 i; 587 + 588 + op = amdxdna_cmd_get_op(cmd_abo); 589 + payload = amdxdna_cmd_get_payload(cmd_abo, &payload_len); 590 + if (op != ERT_CMD_CHAIN || !payload || 591 + payload_len < struct_size(payload, data, payload->command_count)) 592 + return -EINVAL; 593 + 594 + for (i = 0; i < payload->command_count; i++) { 595 + u32 boh = (u32)(payload->data[i]); 596 + struct amdxdna_gem_obj *abo; 597 + 598 + abo = amdxdna_gem_get_obj(client, boh, AMDXDNA_BO_CMD); 599 + if (!abo) { 600 + XDNA_ERR(client->xdna, "Failed to find cmd BO %d", boh); 601 + return -ENOENT; 602 + } 603 + 604 + /* All sub-cmd should have same op, use the first one. */ 605 + if (i == 0) 606 + op = amdxdna_cmd_get_op(abo); 607 + 608 + ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, offset, abo, &size); 609 + amdxdna_gem_put_obj(abo); 610 + if (ret) 611 + return -EINVAL; 612 + 613 + offset += size; 614 + } 615 + 616 + /* The offset is the accumulated total size of the cmd buffer */ 617 + aie2_cmdlist_prepare_request(&req, cmdbuf_abo, offset, payload->command_count); 618 + 619 + msg.opcode = aie2_cmd_op_to_msg_op(op); 620 + if (msg.opcode == MSG_OP_MAX_OPCODE) 621 + return -EOPNOTSUPP; 622 + msg.handle = job; 623 + msg.notify_cb = notify_cb; 624 + msg.send_data = (u8 *)&req; 625 + msg.send_size = sizeof(req); 626 + ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); 627 + if (ret) { 628 + XDNA_ERR(hwctx->client->xdna, "Send message failed"); 629 + return ret; 630 + } 631 + 632 + return 0; 633 + } 634 + 635 + int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx, 636 + struct amdxdna_sched_job *job, 637 + int (*notify_cb)(void *, const u32 *, size_t)) 638 + { 639 + struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job); 640 + struct mailbox_channel *chann = hwctx->priv->mbox_chann; 641 + struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; 642 + struct xdna_mailbox_msg msg; 643 + struct cmd_chain_req req; 644 + u32 size; 645 + int ret; 646 + u32 op; 647 + 648 + op = amdxdna_cmd_get_op(cmd_abo); 649 + ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, 0, cmd_abo, &size); 650 + if (ret) 651 + return ret; 652 + 653 + aie2_cmdlist_prepare_request(&req, cmdbuf_abo, size, 1); 654 + 655 + msg.opcode = aie2_cmd_op_to_msg_op(op); 656 + if (msg.opcode == MSG_OP_MAX_OPCODE) 657 + return -EOPNOTSUPP; 658 + msg.handle = job; 659 + msg.notify_cb = notify_cb; 660 + msg.send_data = (u8 *)&req; 661 + msg.send_size = sizeof(req); 662 + ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); 663 + if (ret) { 664 + XDNA_ERR(hwctx->client->xdna, "Send message failed"); 665 + return ret; 666 + } 667 + 668 + return 0; 669 + } 670 + 671 + int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, 672 + int (*notify_cb)(void *, const u32 *, size_t)) 673 + { 674 + struct mailbox_channel *chann = hwctx->priv->mbox_chann; 675 + struct amdxdna_gem_obj *abo = to_xdna_obj(job->bos[0]); 676 + struct amdxdna_dev *xdna = hwctx->client->xdna; 677 + struct xdna_mailbox_msg msg; 678 + struct sync_bo_req req; 679 + int ret = 0; 680 + 681 + req.src_addr = 0; 682 + req.dst_addr = abo->mem.dev_addr - hwctx->client->dev_heap->mem.dev_addr; 683 + req.size = abo->mem.size; 684 + 685 + /* Device to Host */ 686 + req.type = FIELD_PREP(AIE2_MSG_SYNC_BO_SRC_TYPE, SYNC_BO_DEV_MEM) | 687 + FIELD_PREP(AIE2_MSG_SYNC_BO_DST_TYPE, SYNC_BO_HOST_MEM); 688 + 689 + XDNA_DBG(xdna, "sync %d bytes src(0x%llx) to dst(0x%llx) completed", 690 + req.size, req.src_addr, req.dst_addr); 691 + 692 + msg.handle = job; 693 + msg.notify_cb = notify_cb; 694 + msg.send_data = (u8 *)&req; 695 + msg.send_size = sizeof(req); 696 + msg.opcode = MSG_OP_SYNC_BO; 697 + 698 + ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); 699 + if (ret) { 700 + XDNA_ERR(xdna, "Send message failed"); 701 + return ret; 702 + } 703 + 704 + return 0; 366 705 }

+5

drivers/accel/amdxdna/aie2_pci.c

··· 5 5 6 6 #include <drm/amdxdna_accel.h> 7 7 #include <drm/drm_device.h> 8 + #include <drm/drm_gem_shmem_helper.h> 8 9 #include <drm/drm_managed.h> 9 10 #include <drm/drm_print.h> 11 + #include <drm/gpu_scheduler.h> 10 12 #include <linux/errno.h> 11 13 #include <linux/firmware.h> 12 14 #include <linux/iommu.h> ··· 19 17 #include "aie2_pci.h" 20 18 #include "aie2_solver.h" 21 19 #include "amdxdna_ctx.h" 20 + #include "amdxdna_gem.h" 22 21 #include "amdxdna_mailbox.h" 23 22 #include "amdxdna_pci_drv.h" 24 23 ··· 499 496 .hwctx_init = aie2_hwctx_init, 500 497 .hwctx_fini = aie2_hwctx_fini, 501 498 .hwctx_config = aie2_hwctx_config, 499 + .cmd_submit = aie2_cmd_submit, 500 + .hmm_invalidate = aie2_hmm_invalidate, 502 501 };

+39

drivers/accel/amdxdna/aie2_pci.h

··· 6 6 #ifndef _AIE2_PCI_H_ 7 7 #define _AIE2_PCI_H_ 8 8 9 + #include <linux/semaphore.h> 10 + 9 11 #include "amdxdna_mailbox.h" 10 12 11 13 #define AIE2_INTERVAL 20000 /* us */ ··· 78 76 PSP_MAX_REGS /* Keep this at the end */ 79 77 }; 80 78 79 + struct amdxdna_client; 81 80 struct amdxdna_fw_ver; 82 81 struct amdxdna_hwctx; 82 + struct amdxdna_sched_job; 83 83 84 84 struct psp_config { 85 85 const void *fw_buf; ··· 122 118 u32 value; 123 119 }; 124 120 121 + /* 122 + * Define the maximum number of pending commands in a hardware context. 123 + * Must be power of 2! 124 + */ 125 + #define HWCTX_MAX_CMDS 4 126 + #define get_job_idx(seq) ((seq) & (HWCTX_MAX_CMDS - 1)) 125 127 struct amdxdna_hwctx_priv { 126 128 struct amdxdna_gem_obj *heap; 127 129 void *mbox_chann; 130 + 131 + struct drm_gpu_scheduler sched; 132 + struct drm_sched_entity entity; 133 + 134 + struct mutex io_lock; /* protect seq and cmd order */ 135 + struct wait_queue_head job_free_wq; 136 + u32 num_pending; 137 + u64 seq; 138 + struct semaphore job_sem; 139 + bool job_done; 140 + 141 + /* Completed job counter */ 142 + u64 completed; 143 + 144 + struct amdxdna_gem_obj *cmd_buf[HWCTX_MAX_CMDS]; 145 + struct drm_syncobj *syncobj; 128 146 }; 129 147 130 148 struct amdxdna_dev_hdl { ··· 225 199 int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx); 226 200 int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size); 227 201 int aie2_config_cu(struct amdxdna_hwctx *hwctx); 202 + int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, 203 + int (*notify_cb)(void *, const u32 *, size_t)); 204 + int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx, 205 + struct amdxdna_sched_job *job, 206 + int (*notify_cb)(void *, const u32 *, size_t)); 207 + int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx, 208 + struct amdxdna_sched_job *job, 209 + int (*notify_cb)(void *, const u32 *, size_t)); 210 + int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, 211 + int (*notify_cb)(void *, const u32 *, size_t)); 228 212 229 213 /* aie2_hwctx.c */ 230 214 int aie2_hwctx_init(struct amdxdna_hwctx *hwctx); 231 215 void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx); 232 216 int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size); 217 + int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq); 218 + void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, unsigned long cur_seq); 219 + void aie2_restart_ctx(struct amdxdna_client *client); 233 220 234 221 #endif /* _AIE2_PCI_H_ */

+2

drivers/accel/amdxdna/aie2_psp.c

··· 4 4 */ 5 5 6 6 #include <drm/drm_device.h> 7 + #include <drm/drm_gem_shmem_helper.h> 7 8 #include <drm/drm_managed.h> 8 9 #include <drm/drm_print.h> 10 + #include <drm/gpu_scheduler.h> 9 11 #include <linux/bitfield.h> 10 12 #include <linux/iopoll.h> 11 13

+2

drivers/accel/amdxdna/aie2_smu.c

··· 4 4 */ 5 5 6 6 #include <drm/drm_device.h> 7 + #include <drm/drm_gem_shmem_helper.h> 7 8 #include <drm/drm_print.h> 9 + #include <drm/gpu_scheduler.h> 8 10 #include <linux/iopoll.h> 9 11 10 12 #include "aie2_pci.h"

+314 -6

drivers/accel/amdxdna/amdxdna_ctx.c

··· 7 7 #include <drm/drm_device.h> 8 8 #include <drm/drm_drv.h> 9 9 #include <drm/drm_file.h> 10 + #include <drm/drm_gem.h> 11 + #include <drm/drm_gem_shmem_helper.h> 10 12 #include <drm/drm_print.h> 13 + #include <drm/gpu_scheduler.h> 14 + #include <trace/events/amdxdna.h> 11 15 12 16 #include "amdxdna_ctx.h" 17 + #include "amdxdna_gem.h" 13 18 #include "amdxdna_pci_drv.h" 14 19 15 20 #define MAX_HWCTX_ID 255 21 + #define MAX_ARG_COUNT 4095 16 22 17 - static void amdxdna_hwctx_destroy(struct amdxdna_hwctx *hwctx) 23 + struct amdxdna_fence { 24 + struct dma_fence base; 25 + spinlock_t lock; /* for base */ 26 + struct amdxdna_hwctx *hwctx; 27 + }; 28 + 29 + static const char *amdxdna_fence_get_driver_name(struct dma_fence *fence) 30 + { 31 + return KBUILD_MODNAME; 32 + } 33 + 34 + static const char *amdxdna_fence_get_timeline_name(struct dma_fence *fence) 35 + { 36 + struct amdxdna_fence *xdna_fence; 37 + 38 + xdna_fence = container_of(fence, struct amdxdna_fence, base); 39 + 40 + return xdna_fence->hwctx->name; 41 + } 42 + 43 + static const struct dma_fence_ops fence_ops = { 44 + .get_driver_name = amdxdna_fence_get_driver_name, 45 + .get_timeline_name = amdxdna_fence_get_timeline_name, 46 + }; 47 + 48 + static struct dma_fence *amdxdna_fence_create(struct amdxdna_hwctx *hwctx) 49 + { 50 + struct amdxdna_fence *fence; 51 + 52 + fence = kzalloc(sizeof(*fence), GFP_KERNEL); 53 + if (!fence) 54 + return NULL; 55 + 56 + fence->hwctx = hwctx; 57 + spin_lock_init(&fence->lock); 58 + dma_fence_init(&fence->base, &fence_ops, &fence->lock, hwctx->id, 0); 59 + return &fence->base; 60 + } 61 + 62 + static void amdxdna_hwctx_destroy_rcu(struct amdxdna_hwctx *hwctx, 63 + struct srcu_struct *ss) 18 64 { 19 65 struct amdxdna_dev *xdna = hwctx->client->xdna; 66 + 67 + synchronize_srcu(ss); 20 68 21 69 /* At this point, user is not able to submit new commands */ 22 70 mutex_lock(&xdna->dev_lock); ··· 73 25 74 26 kfree(hwctx->name); 75 27 kfree(hwctx); 28 + } 29 + 30 + void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size) 31 + { 32 + struct amdxdna_cmd *cmd = abo->mem.kva; 33 + u32 num_masks, count; 34 + 35 + if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN) 36 + num_masks = 0; 37 + else 38 + num_masks = 1 + FIELD_GET(AMDXDNA_CMD_EXTRA_CU_MASK, cmd->header); 39 + 40 + if (size) { 41 + count = FIELD_GET(AMDXDNA_CMD_COUNT, cmd->header); 42 + if (unlikely(count <= num_masks)) { 43 + *size = 0; 44 + return NULL; 45 + } 46 + *size = (count - num_masks) * sizeof(u32); 47 + } 48 + return &cmd->data[num_masks]; 49 + } 50 + 51 + int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo) 52 + { 53 + struct amdxdna_cmd *cmd = abo->mem.kva; 54 + u32 num_masks, i; 55 + u32 *cu_mask; 56 + 57 + if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN) 58 + return -1; 59 + 60 + num_masks = 1 + FIELD_GET(AMDXDNA_CMD_EXTRA_CU_MASK, cmd->header); 61 + cu_mask = cmd->data; 62 + for (i = 0; i < num_masks; i++) { 63 + if (cu_mask[i]) 64 + return ffs(cu_mask[i]) - 1; 65 + } 66 + 67 + return -1; 76 68 } 77 69 78 70 /* ··· 131 43 client->pid, hwctx->id); 132 44 idr_remove(&client->hwctx_idr, hwctx->id); 133 45 mutex_unlock(&client->hwctx_lock); 134 - amdxdna_hwctx_destroy(hwctx); 46 + amdxdna_hwctx_destroy_rcu(hwctx, &client->hwctx_srcu); 135 47 mutex_lock(&client->hwctx_lock); 136 48 } 137 49 mutex_unlock(&client->hwctx_lock); ··· 223 135 if (!drm_dev_enter(dev, &idx)) 224 136 return -ENODEV; 225 137 138 + /* 139 + * Use hwctx_lock to achieve exclusion with other hwctx writers, 140 + * SRCU to synchronize with exec/wait command ioctls. 141 + * 142 + * The pushed jobs are handled by DRM scheduler during destroy. 143 + */ 226 144 mutex_lock(&client->hwctx_lock); 227 145 hwctx = idr_find(&client->hwctx_idr, args->handle); 228 146 if (!hwctx) { ··· 241 147 idr_remove(&client->hwctx_idr, hwctx->id); 242 148 mutex_unlock(&client->hwctx_lock); 243 149 244 - amdxdna_hwctx_destroy(hwctx); 150 + amdxdna_hwctx_destroy_rcu(hwctx, &client->hwctx_srcu); 245 151 246 152 XDNA_DBG(xdna, "PID %d destroyed HW context %d", client->pid, args->handle); 247 153 out: ··· 255 161 struct amdxdna_drm_config_hwctx *args = data; 256 162 struct amdxdna_dev *xdna = to_xdna_dev(dev); 257 163 struct amdxdna_hwctx *hwctx; 164 + int ret, idx; 258 165 u32 buf_size; 259 166 void *buf; 260 167 u64 val; 261 - int ret; 262 168 263 169 if (!xdna->dev_info->ops->hwctx_config) 264 170 return -EOPNOTSUPP; ··· 297 203 } 298 204 299 205 mutex_lock(&xdna->dev_lock); 206 + idx = srcu_read_lock(&client->hwctx_srcu); 300 207 hwctx = idr_find(&client->hwctx_idr, args->handle); 301 208 if (!hwctx) { 302 209 XDNA_DBG(xdna, "PID %d failed to get hwctx %d", client->pid, args->handle); 303 210 ret = -EINVAL; 304 - goto unlock; 211 + goto unlock_srcu; 305 212 } 306 213 307 214 ret = xdna->dev_info->ops->hwctx_config(hwctx, args->param_type, val, buf, buf_size); 308 215 309 - unlock: 216 + unlock_srcu: 217 + srcu_read_unlock(&client->hwctx_srcu, idx); 310 218 mutex_unlock(&xdna->dev_lock); 311 219 kfree(buf); 312 220 return ret; 221 + } 222 + 223 + static void 224 + amdxdna_arg_bos_put(struct amdxdna_sched_job *job) 225 + { 226 + int i; 227 + 228 + for (i = 0; i < job->bo_cnt; i++) { 229 + if (!job->bos[i]) 230 + break; 231 + drm_gem_object_put(job->bos[i]); 232 + } 233 + } 234 + 235 + static int 236 + amdxdna_arg_bos_lookup(struct amdxdna_client *client, 237 + struct amdxdna_sched_job *job, 238 + u32 *bo_hdls, u32 bo_cnt) 239 + { 240 + struct drm_gem_object *gobj; 241 + int i, ret; 242 + 243 + job->bo_cnt = bo_cnt; 244 + for (i = 0; i < job->bo_cnt; i++) { 245 + struct amdxdna_gem_obj *abo; 246 + 247 + gobj = drm_gem_object_lookup(client->filp, bo_hdls[i]); 248 + if (!gobj) { 249 + ret = -ENOENT; 250 + goto put_shmem_bo; 251 + } 252 + abo = to_xdna_obj(gobj); 253 + 254 + mutex_lock(&abo->lock); 255 + if (abo->pinned) { 256 + mutex_unlock(&abo->lock); 257 + job->bos[i] = gobj; 258 + continue; 259 + } 260 + 261 + ret = amdxdna_gem_pin_nolock(abo); 262 + if (ret) { 263 + mutex_unlock(&abo->lock); 264 + drm_gem_object_put(gobj); 265 + goto put_shmem_bo; 266 + } 267 + abo->pinned = true; 268 + mutex_unlock(&abo->lock); 269 + 270 + job->bos[i] = gobj; 271 + } 272 + 273 + return 0; 274 + 275 + put_shmem_bo: 276 + amdxdna_arg_bos_put(job); 277 + return ret; 278 + } 279 + 280 + void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job) 281 + { 282 + trace_amdxdna_debug_point(job->hwctx->name, job->seq, "job release"); 283 + amdxdna_arg_bos_put(job); 284 + amdxdna_gem_put_obj(job->cmd_bo); 285 + } 286 + 287 + int amdxdna_cmd_submit(struct amdxdna_client *client, 288 + u32 cmd_bo_hdl, u32 *arg_bo_hdls, u32 arg_bo_cnt, 289 + u32 hwctx_hdl, u64 *seq) 290 + { 291 + struct amdxdna_dev *xdna = client->xdna; 292 + struct amdxdna_sched_job *job; 293 + struct amdxdna_hwctx *hwctx; 294 + int ret, idx; 295 + 296 + XDNA_DBG(xdna, "Command BO hdl %d, Arg BO count %d", cmd_bo_hdl, arg_bo_cnt); 297 + job = kzalloc(struct_size(job, bos, arg_bo_cnt), GFP_KERNEL); 298 + if (!job) 299 + return -ENOMEM; 300 + 301 + if (cmd_bo_hdl != AMDXDNA_INVALID_BO_HANDLE) { 302 + job->cmd_bo = amdxdna_gem_get_obj(client, cmd_bo_hdl, AMDXDNA_BO_CMD); 303 + if (!job->cmd_bo) { 304 + XDNA_ERR(xdna, "Failed to get cmd bo from %d", cmd_bo_hdl); 305 + ret = -EINVAL; 306 + goto free_job; 307 + } 308 + } else { 309 + job->cmd_bo = NULL; 310 + } 311 + 312 + ret = amdxdna_arg_bos_lookup(client, job, arg_bo_hdls, arg_bo_cnt); 313 + if (ret) { 314 + XDNA_ERR(xdna, "Argument BOs lookup failed, ret %d", ret); 315 + goto cmd_put; 316 + } 317 + 318 + idx = srcu_read_lock(&client->hwctx_srcu); 319 + hwctx = idr_find(&client->hwctx_idr, hwctx_hdl); 320 + if (!hwctx) { 321 + XDNA_DBG(xdna, "PID %d failed to get hwctx %d", 322 + client->pid, hwctx_hdl); 323 + ret = -EINVAL; 324 + goto unlock_srcu; 325 + } 326 + 327 + if (hwctx->status != HWCTX_STAT_READY) { 328 + XDNA_ERR(xdna, "HW Context is not ready"); 329 + ret = -EINVAL; 330 + goto unlock_srcu; 331 + } 332 + 333 + job->hwctx = hwctx; 334 + job->mm = current->mm; 335 + 336 + job->fence = amdxdna_fence_create(hwctx); 337 + if (!job->fence) { 338 + XDNA_ERR(xdna, "Failed to create fence"); 339 + ret = -ENOMEM; 340 + goto unlock_srcu; 341 + } 342 + kref_init(&job->refcnt); 343 + 344 + ret = xdna->dev_info->ops->cmd_submit(hwctx, job, seq); 345 + if (ret) 346 + goto put_fence; 347 + 348 + /* 349 + * The amdxdna_hwctx_destroy_rcu() will release hwctx and associated 350 + * resource after synchronize_srcu(). The submitted jobs should be 351 + * handled by the queue, for example DRM scheduler, in device layer. 352 + * For here we can unlock SRCU. 353 + */ 354 + srcu_read_unlock(&client->hwctx_srcu, idx); 355 + trace_amdxdna_debug_point(hwctx->name, *seq, "job pushed"); 356 + 357 + return 0; 358 + 359 + put_fence: 360 + dma_fence_put(job->fence); 361 + unlock_srcu: 362 + srcu_read_unlock(&client->hwctx_srcu, idx); 363 + amdxdna_arg_bos_put(job); 364 + cmd_put: 365 + amdxdna_gem_put_obj(job->cmd_bo); 366 + free_job: 367 + kfree(job); 368 + return ret; 369 + } 370 + 371 + /* 372 + * The submit command ioctl submits a command to firmware. One firmware command 373 + * may contain multiple command BOs for processing as a whole. 374 + * The command sequence number is returned which can be used for wait command ioctl. 375 + */ 376 + static int amdxdna_drm_submit_execbuf(struct amdxdna_client *client, 377 + struct amdxdna_drm_exec_cmd *args) 378 + { 379 + struct amdxdna_dev *xdna = client->xdna; 380 + u32 *arg_bo_hdls; 381 + u32 cmd_bo_hdl; 382 + int ret; 383 + 384 + if (!args->arg_count || args->arg_count > MAX_ARG_COUNT) { 385 + XDNA_ERR(xdna, "Invalid arg bo count %d", args->arg_count); 386 + return -EINVAL; 387 + } 388 + 389 + /* Only support single command for now. */ 390 + if (args->cmd_count != 1) { 391 + XDNA_ERR(xdna, "Invalid cmd bo count %d", args->cmd_count); 392 + return -EINVAL; 393 + } 394 + 395 + cmd_bo_hdl = (u32)args->cmd_handles; 396 + arg_bo_hdls = kcalloc(args->arg_count, sizeof(u32), GFP_KERNEL); 397 + if (!arg_bo_hdls) 398 + return -ENOMEM; 399 + ret = copy_from_user(arg_bo_hdls, u64_to_user_ptr(args->args), 400 + args->arg_count * sizeof(u32)); 401 + if (ret) { 402 + ret = -EFAULT; 403 + goto free_cmd_bo_hdls; 404 + } 405 + 406 + ret = amdxdna_cmd_submit(client, cmd_bo_hdl, arg_bo_hdls, 407 + args->arg_count, args->hwctx, &args->seq); 408 + if (ret) 409 + XDNA_DBG(xdna, "Submit cmds failed, ret %d", ret); 410 + 411 + free_cmd_bo_hdls: 412 + kfree(arg_bo_hdls); 413 + if (!ret) 414 + XDNA_DBG(xdna, "Pushed cmd %lld to scheduler", args->seq); 415 + return ret; 416 + } 417 + 418 + int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) 419 + { 420 + struct amdxdna_client *client = filp->driver_priv; 421 + struct amdxdna_drm_exec_cmd *args = data; 422 + 423 + if (args->ext || args->ext_flags) 424 + return -EINVAL; 425 + 426 + switch (args->type) { 427 + case AMDXDNA_CMD_SUBMIT_EXEC_BUF: 428 + return amdxdna_drm_submit_execbuf(client, args); 429 + } 430 + 431 + XDNA_ERR(client->xdna, "Invalid command type %d", args->type); 432 + return -EINVAL; 313 433 }

+111

drivers/accel/amdxdna/amdxdna_ctx.h

··· 6 6 #ifndef _AMDXDNA_CTX_H_ 7 7 #define _AMDXDNA_CTX_H_ 8 8 9 + #include <linux/bitfield.h> 10 + 11 + #include "amdxdna_gem.h" 12 + 13 + struct amdxdna_hwctx_priv; 14 + 15 + enum ert_cmd_opcode { 16 + ERT_START_CU = 0, 17 + ERT_CMD_CHAIN = 19, 18 + ERT_START_NPU = 20, 19 + }; 20 + 21 + enum ert_cmd_state { 22 + ERT_CMD_STATE_INVALID, 23 + ERT_CMD_STATE_NEW, 24 + ERT_CMD_STATE_QUEUED, 25 + ERT_CMD_STATE_RUNNING, 26 + ERT_CMD_STATE_COMPLETED, 27 + ERT_CMD_STATE_ERROR, 28 + ERT_CMD_STATE_ABORT, 29 + ERT_CMD_STATE_SUBMITTED, 30 + ERT_CMD_STATE_TIMEOUT, 31 + ERT_CMD_STATE_NORESPONSE, 32 + }; 33 + 34 + /* 35 + * Interpretation of the beginning of data payload for ERT_START_NPU in 36 + * amdxdna_cmd. The rest of the payload in amdxdna_cmd is regular kernel args. 37 + */ 38 + struct amdxdna_cmd_start_npu { 39 + u64 buffer; /* instruction buffer address */ 40 + u32 buffer_size; /* size of buffer in bytes */ 41 + u32 prop_count; /* properties count */ 42 + u32 prop_args[]; /* properties and regular kernel arguments */ 43 + }; 44 + 45 + /* 46 + * Interpretation of the beginning of data payload for ERT_CMD_CHAIN in 47 + * amdxdna_cmd. The rest of the payload in amdxdna_cmd is cmd BO handles. 48 + */ 49 + struct amdxdna_cmd_chain { 50 + u32 command_count; 51 + u32 submit_index; 52 + u32 error_index; 53 + u32 reserved[3]; 54 + u64 data[] __counted_by(command_count); 55 + }; 56 + 9 57 /* Exec buffer command header format */ 10 58 #define AMDXDNA_CMD_STATE GENMASK(3, 0) 11 59 #define AMDXDNA_CMD_EXTRA_CU_MASK GENMASK(11, 10) ··· 89 41 u32 syncobj_hdl; 90 42 }; 91 43 44 + #define drm_job_to_xdna_job(j) \ 45 + container_of(j, struct amdxdna_sched_job, base) 46 + 47 + struct amdxdna_sched_job { 48 + struct drm_sched_job base; 49 + struct kref refcnt; 50 + struct amdxdna_hwctx *hwctx; 51 + struct mm_struct *mm; 52 + /* The fence to notice DRM scheduler that job is done by hardware */ 53 + struct dma_fence *fence; 54 + /* user can wait on this fence */ 55 + struct dma_fence *out_fence; 56 + bool job_done; 57 + u64 seq; 58 + struct amdxdna_gem_obj *cmd_bo; 59 + size_t bo_cnt; 60 + struct drm_gem_object *bos[] __counted_by(bo_cnt); 61 + }; 62 + 63 + static inline u32 64 + amdxdna_cmd_get_op(struct amdxdna_gem_obj *abo) 65 + { 66 + struct amdxdna_cmd *cmd = abo->mem.kva; 67 + 68 + return FIELD_GET(AMDXDNA_CMD_OPCODE, cmd->header); 69 + } 70 + 71 + static inline void 72 + amdxdna_cmd_set_state(struct amdxdna_gem_obj *abo, enum ert_cmd_state s) 73 + { 74 + struct amdxdna_cmd *cmd = abo->mem.kva; 75 + 76 + cmd->header &= ~AMDXDNA_CMD_STATE; 77 + cmd->header |= FIELD_PREP(AMDXDNA_CMD_STATE, s); 78 + } 79 + 80 + static inline enum ert_cmd_state 81 + amdxdna_cmd_get_state(struct amdxdna_gem_obj *abo) 82 + { 83 + struct amdxdna_cmd *cmd = abo->mem.kva; 84 + 85 + return FIELD_GET(AMDXDNA_CMD_STATE, cmd->header); 86 + } 87 + 88 + void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size); 89 + int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo); 90 + 91 + static inline u32 amdxdna_hwctx_col_map(struct amdxdna_hwctx *hwctx) 92 + { 93 + return GENMASK(hwctx->start_col + hwctx->num_col - 1, 94 + hwctx->start_col); 95 + } 96 + 97 + void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job); 92 98 void amdxdna_hwctx_remove_all(struct amdxdna_client *client); 99 + 100 + int amdxdna_cmd_submit(struct amdxdna_client *client, 101 + u32 cmd_bo_hdls, u32 *arg_bo_hdls, u32 arg_bo_cnt, 102 + u32 hwctx_hdl, u64 *seq); 103 + 104 + int amdxdna_cmd_wait(struct amdxdna_client *client, u32 hwctx_hdl, 105 + u64 seq, u32 timeout); 106 + 93 107 int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); 94 108 int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); 95 109 int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); 110 + int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); 96 111 97 112 #endif /* _AMDXDNA_CTX_H_ */

+1

drivers/accel/amdxdna/amdxdna_gem.c

··· 8 8 #include <drm/drm_device.h> 9 9 #include <drm/drm_gem.h> 10 10 #include <drm/drm_gem_shmem_helper.h> 11 + #include <drm/gpu_scheduler.h> 11 12 #include <linux/iosys-map.h> 12 13 #include <linux/vmalloc.h> 13 14

+5

drivers/accel/amdxdna/amdxdna_mailbox_helper.c

··· 3 3 * Copyright (C) 2024, Advanced Micro Devices, Inc. 4 4 */ 5 5 6 + #include <drm/amdxdna_accel.h> 6 7 #include <drm/drm_device.h> 7 8 #include <drm/drm_print.h> 9 + #include <drm/drm_gem.h> 10 + #include <drm/drm_gem_shmem_helper.h> 11 + #include <drm/gpu_scheduler.h> 8 12 #include <linux/completion.h> 9 13 14 + #include "amdxdna_gem.h" 10 15 #include "amdxdna_mailbox.h" 11 16 #include "amdxdna_mailbox_helper.h" 12 17 #include "amdxdna_pci_drv.h"

+12

drivers/accel/amdxdna/amdxdna_pci_drv.c

··· 10 10 #include <drm/drm_gem_shmem_helper.h> 11 11 #include <drm/drm_ioctl.h> 12 12 #include <drm/drm_managed.h> 13 + #include <drm/gpu_scheduler.h> 13 14 #include <linux/iommu.h> 14 15 #include <linux/pci.h> 15 16 ··· 65 64 goto unbind_sva; 66 65 } 67 66 mutex_init(&client->hwctx_lock); 67 + init_srcu_struct(&client->hwctx_srcu); 68 68 idr_init_base(&client->hwctx_idr, AMDXDNA_INVALID_CTX_HANDLE + 1); 69 69 mutex_init(&client->mm_lock); 70 70 ··· 95 93 XDNA_DBG(xdna, "closing pid %d", client->pid); 96 94 97 95 idr_destroy(&client->hwctx_idr); 96 + cleanup_srcu_struct(&client->hwctx_srcu); 98 97 mutex_destroy(&client->hwctx_lock); 99 98 mutex_destroy(&client->mm_lock); 100 99 if (client->dev_heap) ··· 136 133 DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_BO, amdxdna_drm_create_bo_ioctl, 0), 137 134 DRM_IOCTL_DEF_DRV(AMDXDNA_GET_BO_INFO, amdxdna_drm_get_bo_info_ioctl, 0), 138 135 DRM_IOCTL_DEF_DRV(AMDXDNA_SYNC_BO, amdxdna_drm_sync_bo_ioctl, 0), 136 + /* Execution */ 137 + DRM_IOCTL_DEF_DRV(AMDXDNA_EXEC_CMD, amdxdna_drm_submit_cmd_ioctl, 0), 139 138 }; 140 139 141 140 static const struct file_operations amdxdna_fops = { ··· 195 190 return -ENODEV; 196 191 197 192 drmm_mutex_init(&xdna->ddev, &xdna->dev_lock); 193 + init_rwsem(&xdna->notifier_lock); 198 194 INIT_LIST_HEAD(&xdna->client_list); 199 195 pci_set_drvdata(pdev, xdna); 196 + 197 + if (IS_ENABLED(CONFIG_LOCKDEP)) { 198 + fs_reclaim_acquire(GFP_KERNEL); 199 + might_lock(&xdna->notifier_lock); 200 + fs_reclaim_release(GFP_KERNEL); 201 + } 200 202 201 203 mutex_lock(&xdna->dev_lock); 202 204 ret = xdna->dev_info->ops->init(xdna);

+5

drivers/accel/amdxdna/amdxdna_pci_drv.h

··· 20 20 struct amdxdna_dev; 21 21 struct amdxdna_gem_obj; 22 22 struct amdxdna_hwctx; 23 + struct amdxdna_sched_job; 23 24 24 25 /* 25 26 * struct amdxdna_dev_ops - Device hardware operation callbacks ··· 32 31 void (*hwctx_fini)(struct amdxdna_hwctx *hwctx); 33 32 int (*hwctx_config)(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size); 34 33 void (*hmm_invalidate)(struct amdxdna_gem_obj *abo, unsigned long cur_seq); 34 + int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq); 35 35 }; 36 36 37 37 /* ··· 71 69 struct mutex dev_lock; /* per device lock */ 72 70 struct list_head client_list; 73 71 struct amdxdna_fw_ver fw_ver; 72 + struct rw_semaphore notifier_lock; /* for mmu notifier*/ 74 73 }; 75 74 76 75 /* ··· 91 88 struct list_head node; 92 89 pid_t pid; 93 90 struct mutex hwctx_lock; /* protect hwctx */ 91 + /* do NOT wait this srcu when hwctx_lock is held */ 92 + struct srcu_struct hwctx_srcu; 94 93 struct idr hwctx_idr; 95 94 struct amdxdna_dev *xdna; 96 95 struct drm_file *filp;

+5

drivers/accel/amdxdna/amdxdna_sysfs.c

··· 3 3 * Copyright (C) 2023-2024, Advanced Micro Devices, Inc. 4 4 */ 5 5 6 + #include <drm/amdxdna_accel.h> 6 7 #include <drm/drm_device.h> 8 + #include <drm/drm_gem_shmem_helper.h> 7 9 #include <drm/drm_print.h> 10 + #include <drm/gpu_scheduler.h> 11 + #include <linux/types.h> 8 12 13 + #include "amdxdna_gem.h" 9 14 #include "amdxdna_pci_drv.h" 10 15 11 16 static ssize_t vbnv_show(struct device *dev, struct device_attribute *attr, char *buf)

+1

drivers/accel/amdxdna/npu1_regs.c

··· 5 5 6 6 #include <drm/amdxdna_accel.h> 7 7 #include <drm/drm_device.h> 8 + #include <drm/gpu_scheduler.h> 8 9 #include <linux/sizes.h> 9 10 10 11 #include "aie2_pci.h"

+1

drivers/accel/amdxdna/npu2_regs.c

··· 5 5 6 6 #include <drm/amdxdna_accel.h> 7 7 #include <drm/drm_device.h> 8 + #include <drm/gpu_scheduler.h> 8 9 #include <linux/sizes.h> 9 10 10 11 #include "aie2_pci.h"

+1

drivers/accel/amdxdna/npu4_regs.c

··· 5 5 6 6 #include <drm/amdxdna_accel.h> 7 7 #include <drm/drm_device.h> 8 + #include <drm/gpu_scheduler.h> 8 9 #include <linux/sizes.h> 9 10 10 11 #include "aie2_pci.h"

+1

drivers/accel/amdxdna/npu5_regs.c

··· 5 5 6 6 #include <drm/amdxdna_accel.h> 7 7 #include <drm/drm_device.h> 8 + #include <drm/gpu_scheduler.h> 8 9 #include <linux/sizes.h> 9 10 10 11 #include "aie2_pci.h"

+41

include/trace/events/amdxdna.h

··· 9 9 #if !defined(_TRACE_AMDXDNA_H) || defined(TRACE_HEADER_MULTI_READ) 10 10 #define _TRACE_AMDXDNA_H 11 11 12 + #include <drm/gpu_scheduler.h> 12 13 #include <linux/tracepoint.h> 14 + 15 + TRACE_EVENT(amdxdna_debug_point, 16 + TP_PROTO(const char *name, u64 number, const char *str), 17 + 18 + TP_ARGS(name, number, str), 19 + 20 + TP_STRUCT__entry(__string(name, name) 21 + __field(u64, number) 22 + __string(str, str)), 23 + 24 + TP_fast_assign(__assign_str(name); 25 + __entry->number = number; 26 + __assign_str(str);), 27 + 28 + TP_printk("%s:%llu %s", __get_str(name), __entry->number, 29 + __get_str(str)) 30 + ); 31 + 32 + TRACE_EVENT(xdna_job, 33 + TP_PROTO(struct drm_sched_job *sched_job, const char *name, const char *str, u64 seq), 34 + 35 + TP_ARGS(sched_job, name, str, seq), 36 + 37 + TP_STRUCT__entry(__string(name, name) 38 + __string(str, str) 39 + __field(u64, fence_context) 40 + __field(u64, fence_seqno) 41 + __field(u64, seq)), 42 + 43 + TP_fast_assign(__assign_str(name); 44 + __assign_str(str); 45 + __entry->fence_context = sched_job->s_fence->finished.context; 46 + __entry->fence_seqno = sched_job->s_fence->finished.seqno; 47 + __entry->seq = seq;), 48 + 49 + TP_printk("fence=(context:%llu, seqno:%lld), %s seq#:%lld %s", 50 + __entry->fence_context, __entry->fence_seqno, 51 + __get_str(name), __entry->seq, 52 + __get_str(str)) 53 + ); 13 54 14 55 DECLARE_EVENT_CLASS(xdna_mbox_msg, 15 56 TP_PROTO(char *name, u8 chann_id, u32 opcode, u32 msg_id),

+38

include/uapi/drm/amdxdna_accel.h

··· 13 13 extern "C" { 14 14 #endif 15 15 16 + #define AMDXDNA_INVALID_CMD_HANDLE (~0UL) 16 17 #define AMDXDNA_INVALID_ADDR (~0UL) 17 18 #define AMDXDNA_INVALID_CTX_HANDLE 0 18 19 #define AMDXDNA_INVALID_BO_HANDLE 0 20 + #define AMDXDNA_INVALID_FENCE_HANDLE 0 19 21 20 22 enum amdxdna_device_type { 21 23 AMDXDNA_DEV_TYPE_UNKNOWN = -1, ··· 31 29 DRM_AMDXDNA_CREATE_BO, 32 30 DRM_AMDXDNA_GET_BO_INFO, 33 31 DRM_AMDXDNA_SYNC_BO, 32 + DRM_AMDXDNA_EXEC_CMD, 34 33 }; 35 34 36 35 /** ··· 204 201 __u64 size; 205 202 }; 206 203 204 + enum amdxdna_cmd_type { 205 + AMDXDNA_CMD_SUBMIT_EXEC_BUF = 0, 206 + AMDXDNA_CMD_SUBMIT_DEPENDENCY, 207 + AMDXDNA_CMD_SUBMIT_SIGNAL, 208 + }; 209 + 210 + /** 211 + * struct amdxdna_drm_exec_cmd - Execute command. 212 + * @ext: MBZ. 213 + * @ext_flags: MBZ. 214 + * @hwctx: Hardware context handle. 215 + * @type: One of command type in enum amdxdna_cmd_type. 216 + * @cmd_handles: Array of command handles or the command handle itself 217 + * in case of just one. 218 + * @args: Array of arguments for all command handles. 219 + * @cmd_count: Number of command handles in the cmd_handles array. 220 + * @arg_count: Number of arguments in the args array. 221 + * @seq: Returned sequence number for this command. 222 + */ 223 + struct amdxdna_drm_exec_cmd { 224 + __u64 ext; 225 + __u64 ext_flags; 226 + __u32 hwctx; 227 + __u32 type; 228 + __u64 cmd_handles; 229 + __u64 args; 230 + __u32 cmd_count; 231 + __u32 arg_count; 232 + __u64 seq; 233 + }; 234 + 207 235 #define DRM_IOCTL_AMDXDNA_CREATE_HWCTX \ 208 236 DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_CREATE_HWCTX, \ 209 237 struct amdxdna_drm_create_hwctx) ··· 258 224 #define DRM_IOCTL_AMDXDNA_SYNC_BO \ 259 225 DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_SYNC_BO, \ 260 226 struct amdxdna_drm_sync_bo) 227 + 228 + #define DRM_IOCTL_AMDXDNA_EXEC_CMD \ 229 + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_EXEC_CMD, \ 230 + struct amdxdna_drm_exec_cmd) 261 231 262 232 #if defined(__cplusplus) 263 233 } /* extern c end */