Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

accel/ivpu: Add command buffer submission logic

Each of the user contexts has two command queues, one for compute engine
and one for the copy engine. Command queues are allocated and registered
in the device when the first job (command buffer) is submitted from
the user space to the VPU device. The userspace provides a list of
GEM buffer object handles to submit to the VPU, the driver resolves
buffer handles, pins physical memory if needed, increments ref count
for each buffer and stores pointers to buffer objects in
the ivpu_job objects that track jobs submitted to the device.
The VPU signals job completion with an asynchronous message that
contains the job id passed to firmware when the job was submitted.

Currently, the driver supports simple scheduling logic
where jobs submitted from user space are immediately pushed
to the VPU device command queues. In the future, it will be
extended to use hardware base scheduling and/or drm_sched.

Co-developed-by: Andrzej Kacprowski <andrzej.kacprowski@linux.intel.com>
Signed-off-by: Andrzej Kacprowski <andrzej.kacprowski@linux.intel.com>
Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20230117092723.60441-7-jacek.lawrynowicz@linux.intel.com

authored by

Jacek Lawrynowicz and committed by
Daniel Vetter
cd727221 02d5b0aa

+824 -4
+1
drivers/accel/ivpu/Makefile
··· 7 7 ivpu_gem.o \ 8 8 ivpu_hw_mtl.o \ 9 9 ivpu_ipc.o \ 10 + ivpu_job.o \ 10 11 ivpu_jsm_msg.o \ 11 12 ivpu_mmu.o \ 12 13 ivpu_mmu_context.o
+30 -4
drivers/accel/ivpu/ivpu_drv.c
··· 20 20 #include "ivpu_gem.h" 21 21 #include "ivpu_hw.h" 22 22 #include "ivpu_ipc.h" 23 + #include "ivpu_job.h" 23 24 #include "ivpu_jsm_msg.h" 24 25 #include "ivpu_mmu.h" 25 26 #include "ivpu_mmu_context.h" ··· 31 30 #endif 32 31 33 32 static const struct drm_driver driver; 33 + 34 + static struct lock_class_key submitted_jobs_xa_lock_class_key; 34 35 35 36 int ivpu_dbg_mask; 36 37 module_param_named(dbg_mask, ivpu_dbg_mask, int, 0644); ··· 87 84 88 85 ivpu_dbg(vdev, FILE, "file_priv release: ctx %u\n", file_priv->ctx.id); 89 86 87 + ivpu_cmdq_release_all(file_priv); 88 + ivpu_bo_remove_all_bos_from_context(&file_priv->ctx); 90 89 ivpu_mmu_user_context_fini(vdev, &file_priv->ctx); 91 90 drm_WARN_ON(&vdev->drm, xa_erase_irq(&vdev->context_xa, file_priv->ctx.id) != file_priv); 91 + mutex_destroy(&file_priv->lock); 92 92 kfree(file_priv); 93 93 } 94 94 ··· 215 209 file_priv->vdev = vdev; 216 210 file_priv->priority = DRM_IVPU_CONTEXT_PRIORITY_NORMAL; 217 211 kref_init(&file_priv->ref); 212 + mutex_init(&file_priv->lock); 218 213 219 214 ret = ivpu_mmu_user_context_init(vdev, &file_priv->ctx, ctx_id); 220 215 if (ret) 221 - goto err_free_file_priv; 216 + goto err_mutex_destroy; 222 217 223 218 old = xa_store_irq(&vdev->context_xa, ctx_id, file_priv, GFP_KERNEL); 224 219 if (xa_is_err(old)) { ··· 236 229 237 230 err_ctx_fini: 238 231 ivpu_mmu_user_context_fini(vdev, &file_priv->ctx); 239 - err_free_file_priv: 232 + err_mutex_destroy: 233 + mutex_destroy(&file_priv->lock); 240 234 kfree(file_priv); 241 235 err_xa_erase: 242 236 xa_erase_irq(&vdev->context_xa, ctx_id); ··· 260 252 DRM_IOCTL_DEF_DRV(IVPU_SET_PARAM, ivpu_set_param_ioctl, 0), 261 253 DRM_IOCTL_DEF_DRV(IVPU_BO_CREATE, ivpu_bo_create_ioctl, 0), 262 254 DRM_IOCTL_DEF_DRV(IVPU_BO_INFO, ivpu_bo_info_ioctl, 0), 255 + DRM_IOCTL_DEF_DRV(IVPU_SUBMIT, ivpu_submit_ioctl, 0), 256 + DRM_IOCTL_DEF_DRV(IVPU_BO_WAIT, ivpu_bo_wait_ioctl, 0), 263 257 }; 264 258 265 259 static int ivpu_wait_for_ready(struct ivpu_device *vdev) ··· 468 458 vdev->context_xa_limit.max = IVPU_CONTEXT_LIMIT; 469 459 atomic64_set(&vdev->unique_id_counter, 0); 470 460 xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC); 461 + xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1); 462 + lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key); 471 463 472 464 ret = ivpu_pci_init(vdev); 473 465 if (ret) { ··· 521 509 goto err_fw_fini; 522 510 } 523 511 512 + ret = ivpu_job_done_thread_init(vdev); 513 + if (ret) { 514 + ivpu_err(vdev, "Failed to initialize job done thread: %d\n", ret); 515 + goto err_ipc_fini; 516 + } 517 + 524 518 ret = ivpu_fw_load(vdev); 525 519 if (ret) { 526 520 ivpu_err(vdev, "Failed to load firmware: %d\n", ret); 527 - goto err_fw_fini; 521 + goto err_job_done_thread_fini; 528 522 } 529 523 530 524 ret = ivpu_boot(vdev); 531 525 if (ret) { 532 526 ivpu_err(vdev, "Failed to boot: %d\n", ret); 533 - goto err_fw_fini; 527 + goto err_job_done_thread_fini; 534 528 } 535 529 536 530 return 0; 537 531 532 + err_job_done_thread_fini: 533 + ivpu_job_done_thread_fini(vdev); 534 + err_ipc_fini: 535 + ivpu_ipc_fini(vdev); 538 536 err_fw_fini: 539 537 ivpu_fw_fini(vdev); 540 538 err_mmu_gctx_fini: ··· 552 530 err_power_down: 553 531 ivpu_hw_power_down(vdev); 554 532 err_xa_destroy: 533 + xa_destroy(&vdev->submitted_jobs_xa); 555 534 xa_destroy(&vdev->context_xa); 556 535 return ret; 557 536 } ··· 560 537 static void ivpu_dev_fini(struct ivpu_device *vdev) 561 538 { 562 539 ivpu_shutdown(vdev); 540 + ivpu_job_done_thread_fini(vdev); 563 541 ivpu_ipc_fini(vdev); 564 542 ivpu_fw_fini(vdev); 565 543 ivpu_mmu_global_context_fini(vdev); 566 544 545 + drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa)); 546 + xa_destroy(&vdev->submitted_jobs_xa); 567 547 drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->context_xa)); 568 548 xa_destroy(&vdev->context_xa); 569 549 }
+5
drivers/accel/ivpu/ivpu_drv.h
··· 94 94 struct xarray context_xa; 95 95 struct xa_limit context_xa_limit; 96 96 97 + struct xarray submitted_jobs_xa; 98 + struct task_struct *job_done_thread; 99 + 97 100 atomic64_t unique_id_counter; 98 101 99 102 struct { ··· 114 111 struct ivpu_file_priv { 115 112 struct kref ref; 116 113 struct ivpu_device *vdev; 114 + struct mutex lock; /* Protects cmdq */ 115 + struct ivpu_cmdq *cmdq[IVPU_NUM_ENGINES]; 117 116 struct ivpu_mmu_context ctx; 118 117 u32 priority; 119 118 bool has_mmu_faults;
+26
drivers/accel/ivpu/ivpu_gem.c
··· 678 678 return ret; 679 679 } 680 680 681 + int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 682 + { 683 + struct drm_ivpu_bo_wait *args = data; 684 + struct drm_gem_object *obj; 685 + unsigned long timeout; 686 + long ret; 687 + 688 + timeout = drm_timeout_abs_to_jiffies(args->timeout_ns); 689 + 690 + obj = drm_gem_object_lookup(file, args->handle); 691 + if (!obj) 692 + return -EINVAL; 693 + 694 + ret = dma_resv_wait_timeout(obj->resv, DMA_RESV_USAGE_READ, true, timeout); 695 + if (ret == 0) { 696 + ret = -ETIMEDOUT; 697 + } else if (ret > 0) { 698 + ret = 0; 699 + args->job_status = to_ivpu_bo(obj)->job_status; 700 + } 701 + 702 + drm_gem_object_put(obj); 703 + 704 + return ret; 705 + } 706 + 681 707 static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p) 682 708 { 683 709 unsigned long dma_refcount = 0;
+1
drivers/accel/ivpu/ivpu_gem.h
··· 30 30 u32 handle; 31 31 u32 flags; 32 32 uintptr_t user_ptr; 33 + u32 job_status; 33 34 }; 34 35 35 36 enum ivpu_bo_type {
+602
drivers/accel/ivpu/ivpu_job.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Copyright (C) 2020-2023 Intel Corporation 4 + */ 5 + 6 + #include <drm/drm_file.h> 7 + 8 + #include <linux/bitfield.h> 9 + #include <linux/highmem.h> 10 + #include <linux/kthread.h> 11 + #include <linux/pci.h> 12 + #include <linux/module.h> 13 + #include <uapi/drm/ivpu_accel.h> 14 + 15 + #include "ivpu_drv.h" 16 + #include "ivpu_hw.h" 17 + #include "ivpu_ipc.h" 18 + #include "ivpu_job.h" 19 + #include "ivpu_jsm_msg.h" 20 + 21 + #define CMD_BUF_IDX 0 22 + #define JOB_ID_JOB_MASK GENMASK(7, 0) 23 + #define JOB_ID_CONTEXT_MASK GENMASK(31, 8) 24 + #define JOB_MAX_BUFFER_COUNT 65535 25 + 26 + static unsigned int ivpu_tdr_timeout_ms; 27 + module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, uint, 0644); 28 + MODULE_PARM_DESC(tdr_timeout_ms, "Timeout for device hang detection, in milliseconds, 0 - default"); 29 + 30 + static void ivpu_cmdq_ring_db(struct ivpu_device *vdev, struct ivpu_cmdq *cmdq) 31 + { 32 + ivpu_hw_reg_db_set(vdev, cmdq->db_id); 33 + } 34 + 35 + static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv, u16 engine) 36 + { 37 + struct ivpu_device *vdev = file_priv->vdev; 38 + struct vpu_job_queue_header *jobq_header; 39 + struct ivpu_cmdq *cmdq; 40 + 41 + cmdq = kzalloc(sizeof(*cmdq), GFP_KERNEL); 42 + if (!cmdq) 43 + return NULL; 44 + 45 + cmdq->mem = ivpu_bo_alloc_internal(vdev, 0, SZ_4K, DRM_IVPU_BO_WC); 46 + if (!cmdq->mem) 47 + goto cmdq_free; 48 + 49 + cmdq->db_id = file_priv->ctx.id + engine * ivpu_get_context_count(vdev); 50 + cmdq->entry_count = (u32)((cmdq->mem->base.size - sizeof(struct vpu_job_queue_header)) / 51 + sizeof(struct vpu_job_queue_entry)); 52 + 53 + cmdq->jobq = (struct vpu_job_queue *)cmdq->mem->kvaddr; 54 + jobq_header = &cmdq->jobq->header; 55 + jobq_header->engine_idx = engine; 56 + jobq_header->head = 0; 57 + jobq_header->tail = 0; 58 + wmb(); /* Flush WC buffer for jobq->header */ 59 + 60 + return cmdq; 61 + 62 + cmdq_free: 63 + kfree(cmdq); 64 + return NULL; 65 + } 66 + 67 + static void ivpu_cmdq_free(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) 68 + { 69 + if (!cmdq) 70 + return; 71 + 72 + ivpu_bo_free_internal(cmdq->mem); 73 + kfree(cmdq); 74 + } 75 + 76 + static struct ivpu_cmdq *ivpu_cmdq_acquire(struct ivpu_file_priv *file_priv, u16 engine) 77 + { 78 + struct ivpu_device *vdev = file_priv->vdev; 79 + struct ivpu_cmdq *cmdq = file_priv->cmdq[engine]; 80 + int ret; 81 + 82 + lockdep_assert_held(&file_priv->lock); 83 + 84 + if (!cmdq) { 85 + cmdq = ivpu_cmdq_alloc(file_priv, engine); 86 + if (!cmdq) 87 + return NULL; 88 + file_priv->cmdq[engine] = cmdq; 89 + } 90 + 91 + if (cmdq->db_registered) 92 + return cmdq; 93 + 94 + ret = ivpu_jsm_register_db(vdev, file_priv->ctx.id, cmdq->db_id, 95 + cmdq->mem->vpu_addr, cmdq->mem->base.size); 96 + if (ret) 97 + return NULL; 98 + 99 + cmdq->db_registered = true; 100 + 101 + return cmdq; 102 + } 103 + 104 + static void ivpu_cmdq_release_locked(struct ivpu_file_priv *file_priv, u16 engine) 105 + { 106 + struct ivpu_cmdq *cmdq = file_priv->cmdq[engine]; 107 + 108 + lockdep_assert_held(&file_priv->lock); 109 + 110 + if (cmdq) { 111 + file_priv->cmdq[engine] = NULL; 112 + if (cmdq->db_registered) 113 + ivpu_jsm_unregister_db(file_priv->vdev, cmdq->db_id); 114 + 115 + ivpu_cmdq_free(file_priv, cmdq); 116 + } 117 + } 118 + 119 + void ivpu_cmdq_release_all(struct ivpu_file_priv *file_priv) 120 + { 121 + int i; 122 + 123 + mutex_lock(&file_priv->lock); 124 + 125 + for (i = 0; i < IVPU_NUM_ENGINES; i++) 126 + ivpu_cmdq_release_locked(file_priv, i); 127 + 128 + mutex_unlock(&file_priv->lock); 129 + } 130 + 131 + /* 132 + * Mark the doorbell as unregistered and reset job queue pointers. 133 + * This function needs to be called when the VPU hardware is restarted 134 + * and FW looses job queue state. The next time job queue is used it 135 + * will be registered again. 136 + */ 137 + static void ivpu_cmdq_reset_locked(struct ivpu_file_priv *file_priv, u16 engine) 138 + { 139 + struct ivpu_cmdq *cmdq = file_priv->cmdq[engine]; 140 + 141 + lockdep_assert_held(&file_priv->lock); 142 + 143 + if (cmdq) { 144 + cmdq->db_registered = false; 145 + cmdq->jobq->header.head = 0; 146 + cmdq->jobq->header.tail = 0; 147 + wmb(); /* Flush WC buffer for jobq header */ 148 + } 149 + } 150 + 151 + static void ivpu_cmdq_reset_all(struct ivpu_file_priv *file_priv) 152 + { 153 + int i; 154 + 155 + mutex_lock(&file_priv->lock); 156 + 157 + for (i = 0; i < IVPU_NUM_ENGINES; i++) 158 + ivpu_cmdq_reset_locked(file_priv, i); 159 + 160 + mutex_unlock(&file_priv->lock); 161 + } 162 + 163 + void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev) 164 + { 165 + struct ivpu_file_priv *file_priv; 166 + unsigned long ctx_id; 167 + 168 + xa_for_each(&vdev->context_xa, ctx_id, file_priv) { 169 + file_priv = ivpu_file_priv_get_by_ctx_id(vdev, ctx_id); 170 + if (!file_priv) 171 + continue; 172 + 173 + ivpu_cmdq_reset_all(file_priv); 174 + 175 + ivpu_file_priv_put(&file_priv); 176 + } 177 + } 178 + 179 + static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job) 180 + { 181 + struct ivpu_device *vdev = job->vdev; 182 + struct vpu_job_queue_header *header = &cmdq->jobq->header; 183 + struct vpu_job_queue_entry *entry; 184 + u32 tail = READ_ONCE(header->tail); 185 + u32 next_entry = (tail + 1) % cmdq->entry_count; 186 + 187 + /* Check if there is space left in job queue */ 188 + if (next_entry == header->head) { 189 + ivpu_dbg(vdev, JOB, "Job queue full: ctx %d engine %d db %d head %d tail %d\n", 190 + job->file_priv->ctx.id, job->engine_idx, cmdq->db_id, header->head, tail); 191 + return -EBUSY; 192 + } 193 + 194 + entry = &cmdq->jobq->job[tail]; 195 + entry->batch_buf_addr = job->cmd_buf_vpu_addr; 196 + entry->job_id = job->job_id; 197 + entry->flags = 0; 198 + wmb(); /* Ensure that tail is updated after filling entry */ 199 + header->tail = next_entry; 200 + wmb(); /* Flush WC buffer for jobq header */ 201 + 202 + return 0; 203 + } 204 + 205 + struct ivpu_fence { 206 + struct dma_fence base; 207 + spinlock_t lock; /* protects base */ 208 + struct ivpu_device *vdev; 209 + }; 210 + 211 + static inline struct ivpu_fence *to_vpu_fence(struct dma_fence *fence) 212 + { 213 + return container_of(fence, struct ivpu_fence, base); 214 + } 215 + 216 + static const char *ivpu_fence_get_driver_name(struct dma_fence *fence) 217 + { 218 + return DRIVER_NAME; 219 + } 220 + 221 + static const char *ivpu_fence_get_timeline_name(struct dma_fence *fence) 222 + { 223 + struct ivpu_fence *ivpu_fence = to_vpu_fence(fence); 224 + 225 + return dev_name(ivpu_fence->vdev->drm.dev); 226 + } 227 + 228 + static const struct dma_fence_ops ivpu_fence_ops = { 229 + .get_driver_name = ivpu_fence_get_driver_name, 230 + .get_timeline_name = ivpu_fence_get_timeline_name, 231 + }; 232 + 233 + static struct dma_fence *ivpu_fence_create(struct ivpu_device *vdev) 234 + { 235 + struct ivpu_fence *fence; 236 + 237 + fence = kzalloc(sizeof(*fence), GFP_KERNEL); 238 + if (!fence) 239 + return NULL; 240 + 241 + fence->vdev = vdev; 242 + spin_lock_init(&fence->lock); 243 + dma_fence_init(&fence->base, &ivpu_fence_ops, &fence->lock, dma_fence_context_alloc(1), 1); 244 + 245 + return &fence->base; 246 + } 247 + 248 + static void job_get(struct ivpu_job *job, struct ivpu_job **link) 249 + { 250 + struct ivpu_device *vdev = job->vdev; 251 + 252 + kref_get(&job->ref); 253 + *link = job; 254 + 255 + ivpu_dbg(vdev, KREF, "Job get: id %u refcount %u\n", job->job_id, kref_read(&job->ref)); 256 + } 257 + 258 + static void job_release(struct kref *ref) 259 + { 260 + struct ivpu_job *job = container_of(ref, struct ivpu_job, ref); 261 + struct ivpu_device *vdev = job->vdev; 262 + u32 i; 263 + 264 + for (i = 0; i < job->bo_count; i++) 265 + if (job->bos[i]) 266 + drm_gem_object_put(&job->bos[i]->base); 267 + 268 + dma_fence_put(job->done_fence); 269 + ivpu_file_priv_put(&job->file_priv); 270 + 271 + ivpu_dbg(vdev, KREF, "Job released: id %u\n", job->job_id); 272 + kfree(job); 273 + } 274 + 275 + static void job_put(struct ivpu_job *job) 276 + { 277 + struct ivpu_device *vdev = job->vdev; 278 + 279 + ivpu_dbg(vdev, KREF, "Job put: id %u refcount %u\n", job->job_id, kref_read(&job->ref)); 280 + kref_put(&job->ref, job_release); 281 + } 282 + 283 + static struct ivpu_job * 284 + ivpu_create_job(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count) 285 + { 286 + struct ivpu_device *vdev = file_priv->vdev; 287 + struct ivpu_job *job; 288 + size_t buf_size; 289 + 290 + buf_size = sizeof(*job) + bo_count * sizeof(struct ivpu_bo *); 291 + job = kzalloc(buf_size, GFP_KERNEL); 292 + if (!job) 293 + return NULL; 294 + 295 + kref_init(&job->ref); 296 + 297 + job->vdev = vdev; 298 + job->engine_idx = engine_idx; 299 + job->bo_count = bo_count; 300 + job->done_fence = ivpu_fence_create(vdev); 301 + if (!job->done_fence) { 302 + ivpu_warn_ratelimited(vdev, "Failed to create a fence\n"); 303 + goto err_free_job; 304 + } 305 + 306 + job->file_priv = ivpu_file_priv_get(file_priv); 307 + 308 + ivpu_dbg(vdev, JOB, "Job created: ctx %2d engine %d", file_priv->ctx.id, job->engine_idx); 309 + 310 + return job; 311 + 312 + err_free_job: 313 + kfree(job); 314 + return NULL; 315 + } 316 + 317 + static int ivpu_job_done(struct ivpu_device *vdev, u32 job_id, u32 job_status) 318 + { 319 + struct ivpu_job *job; 320 + 321 + job = xa_erase(&vdev->submitted_jobs_xa, job_id); 322 + if (!job) 323 + return -ENOENT; 324 + 325 + if (job->file_priv->has_mmu_faults) 326 + job_status = VPU_JSM_STATUS_ABORTED; 327 + 328 + job->bos[CMD_BUF_IDX]->job_status = job_status; 329 + dma_fence_signal(job->done_fence); 330 + 331 + ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d engine %d status 0x%x\n", 332 + job->job_id, job->file_priv->ctx.id, job->engine_idx, job_status); 333 + 334 + job_put(job); 335 + return 0; 336 + } 337 + 338 + static void ivpu_job_done_message(struct ivpu_device *vdev, void *msg) 339 + { 340 + struct vpu_ipc_msg_payload_job_done *payload; 341 + struct vpu_jsm_msg *job_ret_msg = msg; 342 + int ret; 343 + 344 + payload = (struct vpu_ipc_msg_payload_job_done *)&job_ret_msg->payload; 345 + 346 + ret = ivpu_job_done(vdev, payload->job_id, payload->job_status); 347 + if (ret) 348 + ivpu_err(vdev, "Failed to finish job %d: %d\n", payload->job_id, ret); 349 + } 350 + 351 + void ivpu_jobs_abort_all(struct ivpu_device *vdev) 352 + { 353 + struct ivpu_job *job; 354 + unsigned long id; 355 + 356 + xa_for_each(&vdev->submitted_jobs_xa, id, job) 357 + ivpu_job_done(vdev, id, VPU_JSM_STATUS_ABORTED); 358 + } 359 + 360 + static int ivpu_direct_job_submission(struct ivpu_job *job) 361 + { 362 + struct ivpu_file_priv *file_priv = job->file_priv; 363 + struct ivpu_device *vdev = job->vdev; 364 + struct xa_limit job_id_range; 365 + struct ivpu_cmdq *cmdq; 366 + int ret; 367 + 368 + mutex_lock(&file_priv->lock); 369 + 370 + cmdq = ivpu_cmdq_acquire(job->file_priv, job->engine_idx); 371 + if (!cmdq) { 372 + ivpu_warn(vdev, "Failed get job queue, ctx %d engine %d\n", 373 + file_priv->ctx.id, job->engine_idx); 374 + ret = -EINVAL; 375 + goto err_unlock; 376 + } 377 + 378 + job_id_range.min = FIELD_PREP(JOB_ID_CONTEXT_MASK, (file_priv->ctx.id - 1)); 379 + job_id_range.max = job_id_range.min | JOB_ID_JOB_MASK; 380 + 381 + job_get(job, &job); 382 + ret = xa_alloc(&vdev->submitted_jobs_xa, &job->job_id, job, job_id_range, GFP_KERNEL); 383 + if (ret) { 384 + ivpu_warn_ratelimited(vdev, "Failed to allocate job id: %d\n", ret); 385 + goto err_job_put; 386 + } 387 + 388 + ret = ivpu_cmdq_push_job(cmdq, job); 389 + if (ret) 390 + goto err_xa_erase; 391 + 392 + ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d engine %d next %d\n", 393 + job->job_id, file_priv->ctx.id, job->engine_idx, cmdq->jobq->header.tail); 394 + 395 + if (ivpu_test_mode == IVPU_TEST_MODE_NULL_HW) { 396 + ivpu_job_done(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS); 397 + cmdq->jobq->header.head = cmdq->jobq->header.tail; 398 + wmb(); /* Flush WC buffer for jobq header */ 399 + } else { 400 + ivpu_cmdq_ring_db(vdev, cmdq); 401 + } 402 + 403 + mutex_unlock(&file_priv->lock); 404 + return 0; 405 + 406 + err_xa_erase: 407 + xa_erase(&vdev->submitted_jobs_xa, job->job_id); 408 + err_job_put: 409 + job_put(job); 410 + err_unlock: 411 + mutex_unlock(&file_priv->lock); 412 + return ret; 413 + } 414 + 415 + static int 416 + ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32 *buf_handles, 417 + u32 buf_count, u32 commands_offset) 418 + { 419 + struct ivpu_file_priv *file_priv = file->driver_priv; 420 + struct ivpu_device *vdev = file_priv->vdev; 421 + struct ww_acquire_ctx acquire_ctx; 422 + struct ivpu_bo *bo; 423 + int ret; 424 + u32 i; 425 + 426 + for (i = 0; i < buf_count; i++) { 427 + struct drm_gem_object *obj = drm_gem_object_lookup(file, buf_handles[i]); 428 + 429 + if (!obj) 430 + return -ENOENT; 431 + 432 + job->bos[i] = to_ivpu_bo(obj); 433 + 434 + ret = ivpu_bo_pin(job->bos[i]); 435 + if (ret) 436 + return ret; 437 + } 438 + 439 + bo = job->bos[CMD_BUF_IDX]; 440 + if (!dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_READ)) { 441 + ivpu_warn(vdev, "Buffer is already in use\n"); 442 + return -EBUSY; 443 + } 444 + 445 + if (commands_offset >= bo->base.size) { 446 + ivpu_warn(vdev, "Invalid command buffer offset %u\n", commands_offset); 447 + return -EINVAL; 448 + } 449 + 450 + job->cmd_buf_vpu_addr = bo->vpu_addr + commands_offset; 451 + 452 + ret = drm_gem_lock_reservations((struct drm_gem_object **)job->bos, buf_count, 453 + &acquire_ctx); 454 + if (ret) { 455 + ivpu_warn(vdev, "Failed to lock reservations: %d\n", ret); 456 + return ret; 457 + } 458 + 459 + for (i = 0; i < buf_count; i++) { 460 + ret = dma_resv_reserve_fences(job->bos[i]->base.resv, 1); 461 + if (ret) { 462 + ivpu_warn(vdev, "Failed to reserve fences: %d\n", ret); 463 + goto unlock_reservations; 464 + } 465 + } 466 + 467 + for (i = 0; i < buf_count; i++) 468 + dma_resv_add_fence(job->bos[i]->base.resv, job->done_fence, DMA_RESV_USAGE_WRITE); 469 + 470 + unlock_reservations: 471 + drm_gem_unlock_reservations((struct drm_gem_object **)job->bos, buf_count, &acquire_ctx); 472 + 473 + wmb(); /* Flush write combining buffers */ 474 + 475 + return ret; 476 + } 477 + 478 + int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 479 + { 480 + int ret = 0; 481 + struct ivpu_file_priv *file_priv = file->driver_priv; 482 + struct ivpu_device *vdev = file_priv->vdev; 483 + struct drm_ivpu_submit *params = data; 484 + struct ivpu_job *job; 485 + u32 *buf_handles; 486 + 487 + if (params->engine > DRM_IVPU_ENGINE_COPY) 488 + return -EINVAL; 489 + 490 + if (params->buffer_count == 0 || params->buffer_count > JOB_MAX_BUFFER_COUNT) 491 + return -EINVAL; 492 + 493 + if (!IS_ALIGNED(params->commands_offset, 8)) 494 + return -EINVAL; 495 + 496 + if (!file_priv->ctx.id) 497 + return -EINVAL; 498 + 499 + if (file_priv->has_mmu_faults) 500 + return -EBADFD; 501 + 502 + buf_handles = kcalloc(params->buffer_count, sizeof(u32), GFP_KERNEL); 503 + if (!buf_handles) 504 + return -ENOMEM; 505 + 506 + ret = copy_from_user(buf_handles, 507 + (void __user *)params->buffers_ptr, 508 + params->buffer_count * sizeof(u32)); 509 + if (ret) { 510 + ret = -EFAULT; 511 + goto free_handles; 512 + } 513 + 514 + ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u buf_count %u\n", 515 + file_priv->ctx.id, params->buffer_count); 516 + 517 + job = ivpu_create_job(file_priv, params->engine, params->buffer_count); 518 + if (!job) { 519 + ivpu_err(vdev, "Failed to create job\n"); 520 + ret = -ENOMEM; 521 + goto free_handles; 522 + } 523 + 524 + ret = ivpu_job_prepare_bos_for_submit(file, job, buf_handles, params->buffer_count, 525 + params->commands_offset); 526 + if (ret) { 527 + ivpu_err(vdev, "Failed to prepare job, ret %d\n", ret); 528 + goto job_put; 529 + } 530 + 531 + ret = ivpu_direct_job_submission(job); 532 + if (ret) { 533 + dma_fence_signal(job->done_fence); 534 + ivpu_err(vdev, "Failed to submit job to the HW, ret %d\n", ret); 535 + } 536 + 537 + job_put: 538 + job_put(job); 539 + free_handles: 540 + kfree(buf_handles); 541 + 542 + return ret; 543 + } 544 + 545 + static int ivpu_job_done_thread(void *arg) 546 + { 547 + struct ivpu_device *vdev = (struct ivpu_device *)arg; 548 + struct ivpu_ipc_consumer cons; 549 + struct vpu_jsm_msg jsm_msg; 550 + bool jobs_submitted; 551 + unsigned int timeout; 552 + int ret; 553 + 554 + ivpu_dbg(vdev, JOB, "Started %s\n", __func__); 555 + 556 + ivpu_ipc_consumer_add(vdev, &cons, VPU_IPC_CHAN_JOB_RET); 557 + 558 + while (!kthread_should_stop()) { 559 + timeout = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr; 560 + jobs_submitted = !xa_empty(&vdev->submitted_jobs_xa); 561 + ret = ivpu_ipc_receive(vdev, &cons, NULL, &jsm_msg, timeout); 562 + if (!ret) { 563 + ivpu_job_done_message(vdev, &jsm_msg); 564 + } else if (ret == -ETIMEDOUT) { 565 + if (jobs_submitted && !xa_empty(&vdev->submitted_jobs_xa)) { 566 + ivpu_err(vdev, "TDR detected, timeout %d ms", timeout); 567 + ivpu_hw_diagnose_failure(vdev); 568 + } 569 + } 570 + } 571 + 572 + ivpu_ipc_consumer_del(vdev, &cons); 573 + 574 + ivpu_jobs_abort_all(vdev); 575 + 576 + ivpu_dbg(vdev, JOB, "Stopped %s\n", __func__); 577 + return 0; 578 + } 579 + 580 + int ivpu_job_done_thread_init(struct ivpu_device *vdev) 581 + { 582 + struct task_struct *thread; 583 + 584 + thread = kthread_run(&ivpu_job_done_thread, (void *)vdev, "ivpu_job_done_thread"); 585 + if (IS_ERR(thread)) { 586 + ivpu_err(vdev, "Failed to start job completion thread\n"); 587 + return -EIO; 588 + } 589 + 590 + get_task_struct(thread); 591 + wake_up_process(thread); 592 + 593 + vdev->job_done_thread = thread; 594 + 595 + return 0; 596 + } 597 + 598 + void ivpu_job_done_thread_fini(struct ivpu_device *vdev) 599 + { 600 + kthread_stop(vdev->job_done_thread); 601 + put_task_struct(vdev->job_done_thread); 602 + }
+67
drivers/accel/ivpu/ivpu_job.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + /* 3 + * Copyright (C) 2020-2023 Intel Corporation 4 + */ 5 + 6 + #ifndef __IVPU_JOB_H__ 7 + #define __IVPU_JOB_H__ 8 + 9 + #include <linux/kref.h> 10 + #include <linux/idr.h> 11 + 12 + #include "ivpu_gem.h" 13 + 14 + struct ivpu_device; 15 + struct ivpu_file_priv; 16 + 17 + /** 18 + * struct ivpu_cmdq - Object representing device queue used to send jobs. 19 + * @jobq: Pointer to job queue memory shared with the device 20 + * @mem: Memory allocated for the job queue, shared with device 21 + * @entry_count Number of job entries in the queue 22 + * @db_id: Doorbell assigned to this job queue 23 + * @db_registered: True if doorbell is registered in device 24 + */ 25 + struct ivpu_cmdq { 26 + struct vpu_job_queue *jobq; 27 + struct ivpu_bo *mem; 28 + u32 entry_count; 29 + u32 db_id; 30 + bool db_registered; 31 + }; 32 + 33 + /** 34 + * struct ivpu_job - KMD object that represents batchbuffer / DMA buffer. 35 + * Each batch / DMA buffer is a job to be submitted and executed by the VPU FW. 36 + * This is a unit of execution, and be tracked by the job_id for 37 + * any status reporting from VPU FW through IPC JOB RET/DONE message. 38 + * @file_priv: The client that submitted this job 39 + * @job_id: Job ID for KMD tracking and job status reporting from VPU FW 40 + * @status: Status of the Job from IPC JOB RET/DONE message 41 + * @batch_buffer: CPU vaddr points to the batch buffer memory allocated for the job 42 + * @submit_status_offset: Offset within batch buffer where job completion handler 43 + will update the job status 44 + */ 45 + struct ivpu_job { 46 + struct kref ref; 47 + struct ivpu_device *vdev; 48 + struct ivpu_file_priv *file_priv; 49 + struct dma_fence *done_fence; 50 + u64 cmd_buf_vpu_addr; 51 + u32 job_id; 52 + u32 engine_idx; 53 + size_t bo_count; 54 + struct ivpu_bo *bos[]; 55 + }; 56 + 57 + int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file); 58 + 59 + void ivpu_cmdq_release_all(struct ivpu_file_priv *file_priv); 60 + void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev); 61 + 62 + int ivpu_job_done_thread_init(struct ivpu_device *vdev); 63 + void ivpu_job_done_thread_fini(struct ivpu_device *vdev); 64 + 65 + void ivpu_jobs_abort_all(struct ivpu_device *vdev); 66 + 67 + #endif /* __IVPU_JOB_H__ */
+92
include/uapi/drm/ivpu_accel.h
··· 19 19 #define DRM_IVPU_SET_PARAM 0x01 20 20 #define DRM_IVPU_BO_CREATE 0x02 21 21 #define DRM_IVPU_BO_INFO 0x03 22 + #define DRM_IVPU_SUBMIT 0x05 23 + #define DRM_IVPU_BO_WAIT 0x06 22 24 23 25 #define DRM_IOCTL_IVPU_GET_PARAM \ 24 26 DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_GET_PARAM, struct drm_ivpu_param) ··· 33 31 34 32 #define DRM_IOCTL_IVPU_BO_INFO \ 35 33 DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_BO_INFO, struct drm_ivpu_bo_info) 34 + 35 + #define DRM_IOCTL_IVPU_SUBMIT \ 36 + DRM_IOW(DRM_COMMAND_BASE + DRM_IVPU_SUBMIT, struct drm_ivpu_submit) 37 + 38 + #define DRM_IOCTL_IVPU_BO_WAIT \ 39 + DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_BO_WAIT, struct drm_ivpu_bo_wait) 36 40 37 41 /** 38 42 * DOC: contexts ··· 213 205 214 206 /** @size: Returned GEM object size, aligned to PAGE_SIZE */ 215 207 __u64 size; 208 + }; 209 + 210 + /* drm_ivpu_submit engines */ 211 + #define DRM_IVPU_ENGINE_COMPUTE 0 212 + #define DRM_IVPU_ENGINE_COPY 1 213 + 214 + /** 215 + * struct drm_ivpu_submit - Submit commands to the VPU 216 + * 217 + * Execute a single command buffer on a given VPU engine. 218 + * Handles to all referenced buffer objects have to be provided in @buffers_ptr. 219 + * 220 + * User space may wait on job completion using %DRM_IVPU_BO_WAIT ioctl. 221 + */ 222 + struct drm_ivpu_submit { 223 + /** 224 + * @buffers_ptr: 225 + * 226 + * A pointer to an u32 array of GEM handles of the BOs required for this job. 227 + * The number of elements in the array must be equal to the value given by @buffer_count. 228 + * 229 + * The first BO is the command buffer. The rest of array has to contain all 230 + * BOs referenced from the command buffer. 231 + */ 232 + __u64 buffers_ptr; 233 + 234 + /** @buffer_count: Number of elements in the @buffers_ptr */ 235 + __u32 buffer_count; 236 + 237 + /** 238 + * @engine: Select the engine this job should be executed on 239 + * 240 + * %DRM_IVPU_ENGINE_COMPUTE: 241 + * 242 + * Performs Deep Learning Neural Compute Inference Operations 243 + * 244 + * %DRM_IVPU_ENGINE_COPY: 245 + * 246 + * Performs memory copy operations to/from system memory allocated for VPU 247 + */ 248 + __u32 engine; 249 + 250 + /** @flags: Reserved for future use - must be zero */ 251 + __u32 flags; 252 + 253 + /** 254 + * @commands_offset: 255 + * 256 + * Offset inside the first buffer in @buffers_ptr containing commands 257 + * to be executed. The offset has to be 8-byte aligned. 258 + */ 259 + __u32 commands_offset; 260 + }; 261 + 262 + /* drm_ivpu_bo_wait job status codes */ 263 + #define DRM_IVPU_JOB_STATUS_SUCCESS 0 264 + 265 + /** 266 + * struct drm_ivpu_bo_wait - Wait for BO to become inactive 267 + * 268 + * Blocks until a given buffer object becomes inactive. 269 + * With @timeout_ms set to 0 returns immediately. 270 + */ 271 + struct drm_ivpu_bo_wait { 272 + /** @handle: Handle to the buffer object to be waited on */ 273 + __u32 handle; 274 + 275 + /** @flags: Reserved for future use - must be zero */ 276 + __u32 flags; 277 + 278 + /** @timeout_ns: Absolute timeout in nanoseconds (may be zero) */ 279 + __s64 timeout_ns; 280 + 281 + /** 282 + * @job_status: 283 + * 284 + * Job status code which is updated after the job is completed. 285 + * &DRM_IVPU_JOB_STATUS_SUCCESS or device specific error otherwise. 286 + * Valid only if @handle points to a command buffer. 287 + */ 288 + __u32 job_status; 289 + 290 + /** @pad: Padding - must be zero */ 291 + __u32 pad; 216 292 }; 217 293 218 294 #if defined(__cplusplus)