accel/ivpu: Add command buffer submission logic

+1

drivers/accel/ivpu/Makefile

··· 7 7 ivpu_gem.o \ 8 8 ivpu_hw_mtl.o \ 9 9 ivpu_ipc.o \ 10 + ivpu_job.o \ 10 11 ivpu_jsm_msg.o \ 11 12 ivpu_mmu.o \ 12 13 ivpu_mmu_context.o

+30 -4

drivers/accel/ivpu/ivpu_drv.c

··· 20 20 #include "ivpu_gem.h" 21 21 #include "ivpu_hw.h" 22 22 #include "ivpu_ipc.h" 23 + #include "ivpu_job.h" 23 24 #include "ivpu_jsm_msg.h" 24 25 #include "ivpu_mmu.h" 25 26 #include "ivpu_mmu_context.h" ··· 31 30 #endif 32 31 33 32 static const struct drm_driver driver; 33 + 34 + static struct lock_class_key submitted_jobs_xa_lock_class_key; 34 35 35 36 int ivpu_dbg_mask; 36 37 module_param_named(dbg_mask, ivpu_dbg_mask, int, 0644); ··· 87 84 88 85 ivpu_dbg(vdev, FILE, "file_priv release: ctx %u\n", file_priv->ctx.id); 89 86 87 + ivpu_cmdq_release_all(file_priv); 88 + ivpu_bo_remove_all_bos_from_context(&file_priv->ctx); 90 89 ivpu_mmu_user_context_fini(vdev, &file_priv->ctx); 91 90 drm_WARN_ON(&vdev->drm, xa_erase_irq(&vdev->context_xa, file_priv->ctx.id) != file_priv); 91 + mutex_destroy(&file_priv->lock); 92 92 kfree(file_priv); 93 93 } 94 94 ··· 215 209 file_priv->vdev = vdev; 216 210 file_priv->priority = DRM_IVPU_CONTEXT_PRIORITY_NORMAL; 217 211 kref_init(&file_priv->ref); 212 + mutex_init(&file_priv->lock); 218 213 219 214 ret = ivpu_mmu_user_context_init(vdev, &file_priv->ctx, ctx_id); 220 215 if (ret) 221 - goto err_free_file_priv; 216 + goto err_mutex_destroy; 222 217 223 218 old = xa_store_irq(&vdev->context_xa, ctx_id, file_priv, GFP_KERNEL); 224 219 if (xa_is_err(old)) { ··· 236 229 237 230 err_ctx_fini: 238 231 ivpu_mmu_user_context_fini(vdev, &file_priv->ctx); 239 - err_free_file_priv: 232 + err_mutex_destroy: 233 + mutex_destroy(&file_priv->lock); 240 234 kfree(file_priv); 241 235 err_xa_erase: 242 236 xa_erase_irq(&vdev->context_xa, ctx_id); ··· 260 252 DRM_IOCTL_DEF_DRV(IVPU_SET_PARAM, ivpu_set_param_ioctl, 0), 261 253 DRM_IOCTL_DEF_DRV(IVPU_BO_CREATE, ivpu_bo_create_ioctl, 0), 262 254 DRM_IOCTL_DEF_DRV(IVPU_BO_INFO, ivpu_bo_info_ioctl, 0), 255 + DRM_IOCTL_DEF_DRV(IVPU_SUBMIT, ivpu_submit_ioctl, 0), 256 + DRM_IOCTL_DEF_DRV(IVPU_BO_WAIT, ivpu_bo_wait_ioctl, 0), 263 257 }; 264 258 265 259 static int ivpu_wait_for_ready(struct ivpu_device *vdev) ··· 468 458 vdev->context_xa_limit.max = IVPU_CONTEXT_LIMIT; 469 459 atomic64_set(&vdev->unique_id_counter, 0); 470 460 xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC); 461 + xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1); 462 + lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key); 471 463 472 464 ret = ivpu_pci_init(vdev); 473 465 if (ret) { ··· 521 509 goto err_fw_fini; 522 510 } 523 511 512 + ret = ivpu_job_done_thread_init(vdev); 513 + if (ret) { 514 + ivpu_err(vdev, "Failed to initialize job done thread: %d\n", ret); 515 + goto err_ipc_fini; 516 + } 517 + 524 518 ret = ivpu_fw_load(vdev); 525 519 if (ret) { 526 520 ivpu_err(vdev, "Failed to load firmware: %d\n", ret); 527 - goto err_fw_fini; 521 + goto err_job_done_thread_fini; 528 522 } 529 523 530 524 ret = ivpu_boot(vdev); 531 525 if (ret) { 532 526 ivpu_err(vdev, "Failed to boot: %d\n", ret); 533 - goto err_fw_fini; 527 + goto err_job_done_thread_fini; 534 528 } 535 529 536 530 return 0; 537 531 532 + err_job_done_thread_fini: 533 + ivpu_job_done_thread_fini(vdev); 534 + err_ipc_fini: 535 + ivpu_ipc_fini(vdev); 538 536 err_fw_fini: 539 537 ivpu_fw_fini(vdev); 540 538 err_mmu_gctx_fini: ··· 552 530 err_power_down: 553 531 ivpu_hw_power_down(vdev); 554 532 err_xa_destroy: 533 + xa_destroy(&vdev->submitted_jobs_xa); 555 534 xa_destroy(&vdev->context_xa); 556 535 return ret; 557 536 } ··· 560 537 static void ivpu_dev_fini(struct ivpu_device *vdev) 561 538 { 562 539 ivpu_shutdown(vdev); 540 + ivpu_job_done_thread_fini(vdev); 563 541 ivpu_ipc_fini(vdev); 564 542 ivpu_fw_fini(vdev); 565 543 ivpu_mmu_global_context_fini(vdev); 566 544 545 + drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa)); 546 + xa_destroy(&vdev->submitted_jobs_xa); 567 547 drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->context_xa)); 568 548 xa_destroy(&vdev->context_xa); 569 549 }

+5

drivers/accel/ivpu/ivpu_drv.h

··· 94 94 struct xarray context_xa; 95 95 struct xa_limit context_xa_limit; 96 96 97 + struct xarray submitted_jobs_xa; 98 + struct task_struct *job_done_thread; 99 + 97 100 atomic64_t unique_id_counter; 98 101 99 102 struct { ··· 114 111 struct ivpu_file_priv { 115 112 struct kref ref; 116 113 struct ivpu_device *vdev; 114 + struct mutex lock; /* Protects cmdq */ 115 + struct ivpu_cmdq *cmdq[IVPU_NUM_ENGINES]; 117 116 struct ivpu_mmu_context ctx; 118 117 u32 priority; 119 118 bool has_mmu_faults;

+26

drivers/accel/ivpu/ivpu_gem.c

··· 678 678 return ret; 679 679 } 680 680 681 + int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 682 + { 683 + struct drm_ivpu_bo_wait *args = data; 684 + struct drm_gem_object *obj; 685 + unsigned long timeout; 686 + long ret; 687 + 688 + timeout = drm_timeout_abs_to_jiffies(args->timeout_ns); 689 + 690 + obj = drm_gem_object_lookup(file, args->handle); 691 + if (!obj) 692 + return -EINVAL; 693 + 694 + ret = dma_resv_wait_timeout(obj->resv, DMA_RESV_USAGE_READ, true, timeout); 695 + if (ret == 0) { 696 + ret = -ETIMEDOUT; 697 + } else if (ret > 0) { 698 + ret = 0; 699 + args->job_status = to_ivpu_bo(obj)->job_status; 700 + } 701 + 702 + drm_gem_object_put(obj); 703 + 704 + return ret; 705 + } 706 + 681 707 static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p) 682 708 { 683 709 unsigned long dma_refcount = 0;

+1

drivers/accel/ivpu/ivpu_gem.h

··· 30 30 u32 handle; 31 31 u32 flags; 32 32 uintptr_t user_ptr; 33 + u32 job_status; 33 34 }; 34 35 35 36 enum ivpu_bo_type {

+602

drivers/accel/ivpu/ivpu_job.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Copyright (C) 2020-2023 Intel Corporation 4 + */ 5 + 6 + #include <drm/drm_file.h> 7 + 8 + #include <linux/bitfield.h> 9 + #include <linux/highmem.h> 10 + #include <linux/kthread.h> 11 + #include <linux/pci.h> 12 + #include <linux/module.h> 13 + #include <uapi/drm/ivpu_accel.h> 14 + 15 + #include "ivpu_drv.h" 16 + #include "ivpu_hw.h" 17 + #include "ivpu_ipc.h" 18 + #include "ivpu_job.h" 19 + #include "ivpu_jsm_msg.h" 20 + 21 + #define CMD_BUF_IDX 0 22 + #define JOB_ID_JOB_MASK GENMASK(7, 0) 23 + #define JOB_ID_CONTEXT_MASK GENMASK(31, 8) 24 + #define JOB_MAX_BUFFER_COUNT 65535 25 + 26 + static unsigned int ivpu_tdr_timeout_ms; 27 + module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, uint, 0644); 28 + MODULE_PARM_DESC(tdr_timeout_ms, "Timeout for device hang detection, in milliseconds, 0 - default"); 29 + 30 + static void ivpu_cmdq_ring_db(struct ivpu_device *vdev, struct ivpu_cmdq *cmdq) 31 + { 32 + ivpu_hw_reg_db_set(vdev, cmdq->db_id); 33 + } 34 + 35 + static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv, u16 engine) 36 + { 37 + struct ivpu_device *vdev = file_priv->vdev; 38 + struct vpu_job_queue_header *jobq_header; 39 + struct ivpu_cmdq *cmdq; 40 + 41 + cmdq = kzalloc(sizeof(*cmdq), GFP_KERNEL); 42 + if (!cmdq) 43 + return NULL; 44 + 45 + cmdq->mem = ivpu_bo_alloc_internal(vdev, 0, SZ_4K, DRM_IVPU_BO_WC); 46 + if (!cmdq->mem) 47 + goto cmdq_free; 48 + 49 + cmdq->db_id = file_priv->ctx.id + engine * ivpu_get_context_count(vdev); 50 + cmdq->entry_count = (u32)((cmdq->mem->base.size - sizeof(struct vpu_job_queue_header)) / 51 + sizeof(struct vpu_job_queue_entry)); 52 + 53 + cmdq->jobq = (struct vpu_job_queue *)cmdq->mem->kvaddr; 54 + jobq_header = &cmdq->jobq->header; 55 + jobq_header->engine_idx = engine; 56 + jobq_header->head = 0; 57 + jobq_header->tail = 0; 58 + wmb(); /* Flush WC buffer for jobq->header */ 59 + 60 + return cmdq; 61 + 62 + cmdq_free: 63 + kfree(cmdq); 64 + return NULL; 65 + } 66 + 67 + static void ivpu_cmdq_free(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) 68 + { 69 + if (!cmdq) 70 + return; 71 + 72 + ivpu_bo_free_internal(cmdq->mem); 73 + kfree(cmdq); 74 + } 75 + 76 + static struct ivpu_cmdq *ivpu_cmdq_acquire(struct ivpu_file_priv *file_priv, u16 engine) 77 + { 78 + struct ivpu_device *vdev = file_priv->vdev; 79 + struct ivpu_cmdq *cmdq = file_priv->cmdq[engine]; 80 + int ret; 81 + 82 + lockdep_assert_held(&file_priv->lock); 83 + 84 + if (!cmdq) { 85 + cmdq = ivpu_cmdq_alloc(file_priv, engine); 86 + if (!cmdq) 87 + return NULL; 88 + file_priv->cmdq[engine] = cmdq; 89 + } 90 + 91 + if (cmdq->db_registered) 92 + return cmdq; 93 + 94 + ret = ivpu_jsm_register_db(vdev, file_priv->ctx.id, cmdq->db_id, 95 + cmdq->mem->vpu_addr, cmdq->mem->base.size); 96 + if (ret) 97 + return NULL; 98 + 99 + cmdq->db_registered = true; 100 + 101 + return cmdq; 102 + } 103 + 104 + static void ivpu_cmdq_release_locked(struct ivpu_file_priv *file_priv, u16 engine) 105 + { 106 + struct ivpu_cmdq *cmdq = file_priv->cmdq[engine]; 107 + 108 + lockdep_assert_held(&file_priv->lock); 109 + 110 + if (cmdq) { 111 + file_priv->cmdq[engine] = NULL; 112 + if (cmdq->db_registered) 113 + ivpu_jsm_unregister_db(file_priv->vdev, cmdq->db_id); 114 + 115 + ivpu_cmdq_free(file_priv, cmdq); 116 + } 117 + } 118 + 119 + void ivpu_cmdq_release_all(struct ivpu_file_priv *file_priv) 120 + { 121 + int i; 122 + 123 + mutex_lock(&file_priv->lock); 124 + 125 + for (i = 0; i < IVPU_NUM_ENGINES; i++) 126 + ivpu_cmdq_release_locked(file_priv, i); 127 + 128 + mutex_unlock(&file_priv->lock); 129 + } 130 + 131 + /* 132 + * Mark the doorbell as unregistered and reset job queue pointers. 133 + * This function needs to be called when the VPU hardware is restarted 134 + * and FW looses job queue state. The next time job queue is used it 135 + * will be registered again. 136 + */ 137 + static void ivpu_cmdq_reset_locked(struct ivpu_file_priv *file_priv, u16 engine) 138 + { 139 + struct ivpu_cmdq *cmdq = file_priv->cmdq[engine]; 140 + 141 + lockdep_assert_held(&file_priv->lock); 142 + 143 + if (cmdq) { 144 + cmdq->db_registered = false; 145 + cmdq->jobq->header.head = 0; 146 + cmdq->jobq->header.tail = 0; 147 + wmb(); /* Flush WC buffer for jobq header */ 148 + } 149 + } 150 + 151 + static void ivpu_cmdq_reset_all(struct ivpu_file_priv *file_priv) 152 + { 153 + int i; 154 + 155 + mutex_lock(&file_priv->lock); 156 + 157 + for (i = 0; i < IVPU_NUM_ENGINES; i++) 158 + ivpu_cmdq_reset_locked(file_priv, i); 159 + 160 + mutex_unlock(&file_priv->lock); 161 + } 162 + 163 + void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev) 164 + { 165 + struct ivpu_file_priv *file_priv; 166 + unsigned long ctx_id; 167 + 168 + xa_for_each(&vdev->context_xa, ctx_id, file_priv) { 169 + file_priv = ivpu_file_priv_get_by_ctx_id(vdev, ctx_id); 170 + if (!file_priv) 171 + continue; 172 + 173 + ivpu_cmdq_reset_all(file_priv); 174 + 175 + ivpu_file_priv_put(&file_priv); 176 + } 177 + } 178 + 179 + static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job) 180 + { 181 + struct ivpu_device *vdev = job->vdev; 182 + struct vpu_job_queue_header *header = &cmdq->jobq->header; 183 + struct vpu_job_queue_entry *entry; 184 + u32 tail = READ_ONCE(header->tail); 185 + u32 next_entry = (tail + 1) % cmdq->entry_count; 186 + 187 + /* Check if there is space left in job queue */ 188 + if (next_entry == header->head) { 189 + ivpu_dbg(vdev, JOB, "Job queue full: ctx %d engine %d db %d head %d tail %d\n", 190 + job->file_priv->ctx.id, job->engine_idx, cmdq->db_id, header->head, tail); 191 + return -EBUSY; 192 + } 193 + 194 + entry = &cmdq->jobq->job[tail]; 195 + entry->batch_buf_addr = job->cmd_buf_vpu_addr; 196 + entry->job_id = job->job_id; 197 + entry->flags = 0; 198 + wmb(); /* Ensure that tail is updated after filling entry */ 199 + header->tail = next_entry; 200 + wmb(); /* Flush WC buffer for jobq header */ 201 + 202 + return 0; 203 + } 204 + 205 + struct ivpu_fence { 206 + struct dma_fence base; 207 + spinlock_t lock; /* protects base */ 208 + struct ivpu_device *vdev; 209 + }; 210 + 211 + static inline struct ivpu_fence *to_vpu_fence(struct dma_fence *fence) 212 + { 213 + return container_of(fence, struct ivpu_fence, base); 214 + } 215 + 216 + static const char *ivpu_fence_get_driver_name(struct dma_fence *fence) 217 + { 218 + return DRIVER_NAME; 219 + } 220 + 221 + static const char *ivpu_fence_get_timeline_name(struct dma_fence *fence) 222 + { 223 + struct ivpu_fence *ivpu_fence = to_vpu_fence(fence); 224 + 225 + return dev_name(ivpu_fence->vdev->drm.dev); 226 + } 227 + 228 + static const struct dma_fence_ops ivpu_fence_ops = { 229 + .get_driver_name = ivpu_fence_get_driver_name, 230 + .get_timeline_name = ivpu_fence_get_timeline_name, 231 + }; 232 + 233 + static struct dma_fence *ivpu_fence_create(struct ivpu_device *vdev) 234 + { 235 + struct ivpu_fence *fence; 236 + 237 + fence = kzalloc(sizeof(*fence), GFP_KERNEL); 238 + if (!fence) 239 + return NULL; 240 + 241 + fence->vdev = vdev; 242 + spin_lock_init(&fence->lock); 243 + dma_fence_init(&fence->base, &ivpu_fence_ops, &fence->lock, dma_fence_context_alloc(1), 1); 244 + 245 + return &fence->base; 246 + } 247 + 248 + static void job_get(struct ivpu_job *job, struct ivpu_job **link) 249 + { 250 + struct ivpu_device *vdev = job->vdev; 251 + 252 + kref_get(&job->ref); 253 + *link = job; 254 + 255 + ivpu_dbg(vdev, KREF, "Job get: id %u refcount %u\n", job->job_id, kref_read(&job->ref)); 256 + } 257 + 258 + static void job_release(struct kref *ref) 259 + { 260 + struct ivpu_job *job = container_of(ref, struct ivpu_job, ref); 261 + struct ivpu_device *vdev = job->vdev; 262 + u32 i; 263 + 264 + for (i = 0; i < job->bo_count; i++) 265 + if (job->bos[i]) 266 + drm_gem_object_put(&job->bos[i]->base); 267 + 268 + dma_fence_put(job->done_fence); 269 + ivpu_file_priv_put(&job->file_priv); 270 + 271 + ivpu_dbg(vdev, KREF, "Job released: id %u\n", job->job_id); 272 + kfree(job); 273 + } 274 + 275 + static void job_put(struct ivpu_job *job) 276 + { 277 + struct ivpu_device *vdev = job->vdev; 278 + 279 + ivpu_dbg(vdev, KREF, "Job put: id %u refcount %u\n", job->job_id, kref_read(&job->ref)); 280 + kref_put(&job->ref, job_release); 281 + } 282 + 283 + static struct ivpu_job * 284 + ivpu_create_job(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count) 285 + { 286 + struct ivpu_device *vdev = file_priv->vdev; 287 + struct ivpu_job *job; 288 + size_t buf_size; 289 + 290 + buf_size = sizeof(*job) + bo_count * sizeof(struct ivpu_bo *); 291 + job = kzalloc(buf_size, GFP_KERNEL); 292 + if (!job) 293 + return NULL; 294 + 295 + kref_init(&job->ref); 296 + 297 + job->vdev = vdev; 298 + job->engine_idx = engine_idx; 299 + job->bo_count = bo_count; 300 + job->done_fence = ivpu_fence_create(vdev); 301 + if (!job->done_fence) { 302 + ivpu_warn_ratelimited(vdev, "Failed to create a fence\n"); 303 + goto err_free_job; 304 + } 305 + 306 + job->file_priv = ivpu_file_priv_get(file_priv); 307 + 308 + ivpu_dbg(vdev, JOB, "Job created: ctx %2d engine %d", file_priv->ctx.id, job->engine_idx); 309 + 310 + return job; 311 + 312 + err_free_job: 313 + kfree(job); 314 + return NULL; 315 + } 316 + 317 + static int ivpu_job_done(struct ivpu_device *vdev, u32 job_id, u32 job_status) 318 + { 319 + struct ivpu_job *job; 320 + 321 + job = xa_erase(&vdev->submitted_jobs_xa, job_id); 322 + if (!job) 323 + return -ENOENT; 324 + 325 + if (job->file_priv->has_mmu_faults) 326 + job_status = VPU_JSM_STATUS_ABORTED; 327 + 328 + job->bos[CMD_BUF_IDX]->job_status = job_status; 329 + dma_fence_signal(job->done_fence); 330 + 331 + ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d engine %d status 0x%x\n", 332 + job->job_id, job->file_priv->ctx.id, job->engine_idx, job_status); 333 + 334 + job_put(job); 335 + return 0; 336 + } 337 + 338 + static void ivpu_job_done_message(struct ivpu_device *vdev, void *msg) 339 + { 340 + struct vpu_ipc_msg_payload_job_done *payload; 341 + struct vpu_jsm_msg *job_ret_msg = msg; 342 + int ret; 343 + 344 + payload = (struct vpu_ipc_msg_payload_job_done *)&job_ret_msg->payload; 345 + 346 + ret = ivpu_job_done(vdev, payload->job_id, payload->job_status); 347 + if (ret) 348 + ivpu_err(vdev, "Failed to finish job %d: %d\n", payload->job_id, ret); 349 + } 350 + 351 + void ivpu_jobs_abort_all(struct ivpu_device *vdev) 352 + { 353 + struct ivpu_job *job; 354 + unsigned long id; 355 + 356 + xa_for_each(&vdev->submitted_jobs_xa, id, job) 357 + ivpu_job_done(vdev, id, VPU_JSM_STATUS_ABORTED); 358 + } 359 + 360 + static int ivpu_direct_job_submission(struct ivpu_job *job) 361 + { 362 + struct ivpu_file_priv *file_priv = job->file_priv; 363 + struct ivpu_device *vdev = job->vdev; 364 + struct xa_limit job_id_range; 365 + struct ivpu_cmdq *cmdq; 366 + int ret; 367 + 368 + mutex_lock(&file_priv->lock); 369 + 370 + cmdq = ivpu_cmdq_acquire(job->file_priv, job->engine_idx); 371 + if (!cmdq) { 372 + ivpu_warn(vdev, "Failed get job queue, ctx %d engine %d\n", 373 + file_priv->ctx.id, job->engine_idx); 374 + ret = -EINVAL; 375 + goto err_unlock; 376 + } 377 + 378 + job_id_range.min = FIELD_PREP(JOB_ID_CONTEXT_MASK, (file_priv->ctx.id - 1)); 379 + job_id_range.max = job_id_range.min | JOB_ID_JOB_MASK; 380 + 381 + job_get(job, &job); 382 + ret = xa_alloc(&vdev->submitted_jobs_xa, &job->job_id, job, job_id_range, GFP_KERNEL); 383 + if (ret) { 384 + ivpu_warn_ratelimited(vdev, "Failed to allocate job id: %d\n", ret); 385 + goto err_job_put; 386 + } 387 + 388 + ret = ivpu_cmdq_push_job(cmdq, job); 389 + if (ret) 390 + goto err_xa_erase; 391 + 392 + ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d engine %d next %d\n", 393 + job->job_id, file_priv->ctx.id, job->engine_idx, cmdq->jobq->header.tail); 394 + 395 + if (ivpu_test_mode == IVPU_TEST_MODE_NULL_HW) { 396 + ivpu_job_done(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS); 397 + cmdq->jobq->header.head = cmdq->jobq->header.tail; 398 + wmb(); /* Flush WC buffer for jobq header */ 399 + } else { 400 + ivpu_cmdq_ring_db(vdev, cmdq); 401 + } 402 + 403 + mutex_unlock(&file_priv->lock); 404 + return 0; 405 + 406 + err_xa_erase: 407 + xa_erase(&vdev->submitted_jobs_xa, job->job_id); 408 + err_job_put: 409 + job_put(job); 410 + err_unlock: 411 + mutex_unlock(&file_priv->lock); 412 + return ret; 413 + } 414 + 415 + static int 416 + ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32 *buf_handles, 417 + u32 buf_count, u32 commands_offset) 418 + { 419 + struct ivpu_file_priv *file_priv = file->driver_priv; 420 + struct ivpu_device *vdev = file_priv->vdev; 421 + struct ww_acquire_ctx acquire_ctx; 422 + struct ivpu_bo *bo; 423 + int ret; 424 + u32 i; 425 + 426 + for (i = 0; i < buf_count; i++) { 427 + struct drm_gem_object *obj = drm_gem_object_lookup(file, buf_handles[i]); 428 + 429 + if (!obj) 430 + return -ENOENT; 431 + 432 + job->bos[i] = to_ivpu_bo(obj); 433 + 434 + ret = ivpu_bo_pin(job->bos[i]); 435 + if (ret) 436 + return ret; 437 + } 438 + 439 + bo = job->bos[CMD_BUF_IDX]; 440 + if (!dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_READ)) { 441 + ivpu_warn(vdev, "Buffer is already in use\n"); 442 + return -EBUSY; 443 + } 444 + 445 + if (commands_offset >= bo->base.size) { 446 + ivpu_warn(vdev, "Invalid command buffer offset %u\n", commands_offset); 447 + return -EINVAL; 448 + } 449 + 450 + job->cmd_buf_vpu_addr = bo->vpu_addr + commands_offset; 451 + 452 + ret = drm_gem_lock_reservations((struct drm_gem_object **)job->bos, buf_count, 453 + &acquire_ctx); 454 + if (ret) { 455 + ivpu_warn(vdev, "Failed to lock reservations: %d\n", ret); 456 + return ret; 457 + } 458 + 459 + for (i = 0; i < buf_count; i++) { 460 + ret = dma_resv_reserve_fences(job->bos[i]->base.resv, 1); 461 + if (ret) { 462 + ivpu_warn(vdev, "Failed to reserve fences: %d\n", ret); 463 + goto unlock_reservations; 464 + } 465 + } 466 + 467 + for (i = 0; i < buf_count; i++) 468 + dma_resv_add_fence(job->bos[i]->base.resv, job->done_fence, DMA_RESV_USAGE_WRITE); 469 + 470 + unlock_reservations: 471 + drm_gem_unlock_reservations((struct drm_gem_object **)job->bos, buf_count, &acquire_ctx); 472 + 473 + wmb(); /* Flush write combining buffers */ 474 + 475 + return ret; 476 + } 477 + 478 + int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 479 + { 480 + int ret = 0; 481 + struct ivpu_file_priv *file_priv = file->driver_priv; 482 + struct ivpu_device *vdev = file_priv->vdev; 483 + struct drm_ivpu_submit *params = data; 484 + struct ivpu_job *job; 485 + u32 *buf_handles; 486 + 487 + if (params->engine > DRM_IVPU_ENGINE_COPY) 488 + return -EINVAL; 489 + 490 + if (params->buffer_count == 0 || params->buffer_count > JOB_MAX_BUFFER_COUNT) 491 + return -EINVAL; 492 + 493 + if (!IS_ALIGNED(params->commands_offset, 8)) 494 + return -EINVAL; 495 + 496 + if (!file_priv->ctx.id) 497 + return -EINVAL; 498 + 499 + if (file_priv->has_mmu_faults) 500 + return -EBADFD; 501 + 502 + buf_handles = kcalloc(params->buffer_count, sizeof(u32), GFP_KERNEL); 503 + if (!buf_handles) 504 + return -ENOMEM; 505 + 506 + ret = copy_from_user(buf_handles, 507 + (void __user *)params->buffers_ptr, 508 + params->buffer_count * sizeof(u32)); 509 + if (ret) { 510 + ret = -EFAULT; 511 + goto free_handles; 512 + } 513 + 514 + ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u buf_count %u\n", 515 + file_priv->ctx.id, params->buffer_count); 516 + 517 + job = ivpu_create_job(file_priv, params->engine, params->buffer_count); 518 + if (!job) { 519 + ivpu_err(vdev, "Failed to create job\n"); 520 + ret = -ENOMEM; 521 + goto free_handles; 522 + } 523 + 524 + ret = ivpu_job_prepare_bos_for_submit(file, job, buf_handles, params->buffer_count, 525 + params->commands_offset); 526 + if (ret) { 527 + ivpu_err(vdev, "Failed to prepare job, ret %d\n", ret); 528 + goto job_put; 529 + } 530 + 531 + ret = ivpu_direct_job_submission(job); 532 + if (ret) { 533 + dma_fence_signal(job->done_fence); 534 + ivpu_err(vdev, "Failed to submit job to the HW, ret %d\n", ret); 535 + } 536 + 537 + job_put: 538 + job_put(job); 539 + free_handles: 540 + kfree(buf_handles); 541 + 542 + return ret; 543 + } 544 + 545 + static int ivpu_job_done_thread(void *arg) 546 + { 547 + struct ivpu_device *vdev = (struct ivpu_device *)arg; 548 + struct ivpu_ipc_consumer cons; 549 + struct vpu_jsm_msg jsm_msg; 550 + bool jobs_submitted; 551 + unsigned int timeout; 552 + int ret; 553 + 554 + ivpu_dbg(vdev, JOB, "Started %s\n", __func__); 555 + 556 + ivpu_ipc_consumer_add(vdev, &cons, VPU_IPC_CHAN_JOB_RET); 557 + 558 + while (!kthread_should_stop()) { 559 + timeout = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr; 560 + jobs_submitted = !xa_empty(&vdev->submitted_jobs_xa); 561 + ret = ivpu_ipc_receive(vdev, &cons, NULL, &jsm_msg, timeout); 562 + if (!ret) { 563 + ivpu_job_done_message(vdev, &jsm_msg); 564 + } else if (ret == -ETIMEDOUT) { 565 + if (jobs_submitted && !xa_empty(&vdev->submitted_jobs_xa)) { 566 + ivpu_err(vdev, "TDR detected, timeout %d ms", timeout); 567 + ivpu_hw_diagnose_failure(vdev); 568 + } 569 + } 570 + } 571 + 572 + ivpu_ipc_consumer_del(vdev, &cons); 573 + 574 + ivpu_jobs_abort_all(vdev); 575 + 576 + ivpu_dbg(vdev, JOB, "Stopped %s\n", __func__); 577 + return 0; 578 + } 579 + 580 + int ivpu_job_done_thread_init(struct ivpu_device *vdev) 581 + { 582 + struct task_struct *thread; 583 + 584 + thread = kthread_run(&ivpu_job_done_thread, (void *)vdev, "ivpu_job_done_thread"); 585 + if (IS_ERR(thread)) { 586 + ivpu_err(vdev, "Failed to start job completion thread\n"); 587 + return -EIO; 588 + } 589 + 590 + get_task_struct(thread); 591 + wake_up_process(thread); 592 + 593 + vdev->job_done_thread = thread; 594 + 595 + return 0; 596 + } 597 + 598 + void ivpu_job_done_thread_fini(struct ivpu_device *vdev) 599 + { 600 + kthread_stop(vdev->job_done_thread); 601 + put_task_struct(vdev->job_done_thread); 602 + }

+67

drivers/accel/ivpu/ivpu_job.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + /* 3 + * Copyright (C) 2020-2023 Intel Corporation 4 + */ 5 + 6 + #ifndef __IVPU_JOB_H__ 7 + #define __IVPU_JOB_H__ 8 + 9 + #include <linux/kref.h> 10 + #include <linux/idr.h> 11 + 12 + #include "ivpu_gem.h" 13 + 14 + struct ivpu_device; 15 + struct ivpu_file_priv; 16 + 17 + /** 18 + * struct ivpu_cmdq - Object representing device queue used to send jobs. 19 + * @jobq: Pointer to job queue memory shared with the device 20 + * @mem: Memory allocated for the job queue, shared with device 21 + * @entry_count Number of job entries in the queue 22 + * @db_id: Doorbell assigned to this job queue 23 + * @db_registered: True if doorbell is registered in device 24 + */ 25 + struct ivpu_cmdq { 26 + struct vpu_job_queue *jobq; 27 + struct ivpu_bo *mem; 28 + u32 entry_count; 29 + u32 db_id; 30 + bool db_registered; 31 + }; 32 + 33 + /** 34 + * struct ivpu_job - KMD object that represents batchbuffer / DMA buffer. 35 + * Each batch / DMA buffer is a job to be submitted and executed by the VPU FW. 36 + * This is a unit of execution, and be tracked by the job_id for 37 + * any status reporting from VPU FW through IPC JOB RET/DONE message. 38 + * @file_priv: The client that submitted this job 39 + * @job_id: Job ID for KMD tracking and job status reporting from VPU FW 40 + * @status: Status of the Job from IPC JOB RET/DONE message 41 + * @batch_buffer: CPU vaddr points to the batch buffer memory allocated for the job 42 + * @submit_status_offset: Offset within batch buffer where job completion handler 43 + will update the job status 44 + */ 45 + struct ivpu_job { 46 + struct kref ref; 47 + struct ivpu_device *vdev; 48 + struct ivpu_file_priv *file_priv; 49 + struct dma_fence *done_fence; 50 + u64 cmd_buf_vpu_addr; 51 + u32 job_id; 52 + u32 engine_idx; 53 + size_t bo_count; 54 + struct ivpu_bo *bos[]; 55 + }; 56 + 57 + int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file); 58 + 59 + void ivpu_cmdq_release_all(struct ivpu_file_priv *file_priv); 60 + void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev); 61 + 62 + int ivpu_job_done_thread_init(struct ivpu_device *vdev); 63 + void ivpu_job_done_thread_fini(struct ivpu_device *vdev); 64 + 65 + void ivpu_jobs_abort_all(struct ivpu_device *vdev); 66 + 67 + #endif /* __IVPU_JOB_H__ */

+92

include/uapi/drm/ivpu_accel.h

··· 19 19 #define DRM_IVPU_SET_PARAM 0x01 20 20 #define DRM_IVPU_BO_CREATE 0x02 21 21 #define DRM_IVPU_BO_INFO 0x03 22 + #define DRM_IVPU_SUBMIT 0x05 23 + #define DRM_IVPU_BO_WAIT 0x06 22 24 23 25 #define DRM_IOCTL_IVPU_GET_PARAM \ 24 26 DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_GET_PARAM, struct drm_ivpu_param) ··· 33 31 34 32 #define DRM_IOCTL_IVPU_BO_INFO \ 35 33 DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_BO_INFO, struct drm_ivpu_bo_info) 34 + 35 + #define DRM_IOCTL_IVPU_SUBMIT \ 36 + DRM_IOW(DRM_COMMAND_BASE + DRM_IVPU_SUBMIT, struct drm_ivpu_submit) 37 + 38 + #define DRM_IOCTL_IVPU_BO_WAIT \ 39 + DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_BO_WAIT, struct drm_ivpu_bo_wait) 36 40 37 41 /** 38 42 * DOC: contexts ··· 213 205 214 206 /** @size: Returned GEM object size, aligned to PAGE_SIZE */ 215 207 __u64 size; 208 + }; 209 + 210 + /* drm_ivpu_submit engines */ 211 + #define DRM_IVPU_ENGINE_COMPUTE 0 212 + #define DRM_IVPU_ENGINE_COPY 1 213 + 214 + /** 215 + * struct drm_ivpu_submit - Submit commands to the VPU 216 + * 217 + * Execute a single command buffer on a given VPU engine. 218 + * Handles to all referenced buffer objects have to be provided in @buffers_ptr. 219 + * 220 + * User space may wait on job completion using %DRM_IVPU_BO_WAIT ioctl. 221 + */ 222 + struct drm_ivpu_submit { 223 + /** 224 + * @buffers_ptr: 225 + * 226 + * A pointer to an u32 array of GEM handles of the BOs required for this job. 227 + * The number of elements in the array must be equal to the value given by @buffer_count. 228 + * 229 + * The first BO is the command buffer. The rest of array has to contain all 230 + * BOs referenced from the command buffer. 231 + */ 232 + __u64 buffers_ptr; 233 + 234 + /** @buffer_count: Number of elements in the @buffers_ptr */ 235 + __u32 buffer_count; 236 + 237 + /** 238 + * @engine: Select the engine this job should be executed on 239 + * 240 + * %DRM_IVPU_ENGINE_COMPUTE: 241 + * 242 + * Performs Deep Learning Neural Compute Inference Operations 243 + * 244 + * %DRM_IVPU_ENGINE_COPY: 245 + * 246 + * Performs memory copy operations to/from system memory allocated for VPU 247 + */ 248 + __u32 engine; 249 + 250 + /** @flags: Reserved for future use - must be zero */ 251 + __u32 flags; 252 + 253 + /** 254 + * @commands_offset: 255 + * 256 + * Offset inside the first buffer in @buffers_ptr containing commands 257 + * to be executed. The offset has to be 8-byte aligned. 258 + */ 259 + __u32 commands_offset; 260 + }; 261 + 262 + /* drm_ivpu_bo_wait job status codes */ 263 + #define DRM_IVPU_JOB_STATUS_SUCCESS 0 264 + 265 + /** 266 + * struct drm_ivpu_bo_wait - Wait for BO to become inactive 267 + * 268 + * Blocks until a given buffer object becomes inactive. 269 + * With @timeout_ms set to 0 returns immediately. 270 + */ 271 + struct drm_ivpu_bo_wait { 272 + /** @handle: Handle to the buffer object to be waited on */ 273 + __u32 handle; 274 + 275 + /** @flags: Reserved for future use - must be zero */ 276 + __u32 flags; 277 + 278 + /** @timeout_ns: Absolute timeout in nanoseconds (may be zero) */ 279 + __s64 timeout_ns; 280 + 281 + /** 282 + * @job_status: 283 + * 284 + * Job status code which is updated after the job is completed. 285 + * &DRM_IVPU_JOB_STATUS_SUCCESS or device specific error otherwise. 286 + * Valid only if @handle points to a command buffer. 287 + */ 288 + __u32 job_status; 289 + 290 + /** @pad: Padding - must be zero */ 291 + __u32 pad; 216 292 }; 217 293 218 294 #if defined(__cplusplus)