Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/v3d: Create a CPU job extension for the copy performance query job

A CPU job is a type of job that performs operations that requires CPU
intervention. A copy performance query job is a job that copy the complete
or partial result of a query to a buffer. In order to copy the result of
a performance query to a buffer, we need to get the values from the
performance monitors.

So, create a user extension for the CPU job that enables the creation
of a copy performance query job. This user extension will allow the creation
of a CPU job that copy the results of a performance query to a BO with the
possibility to indicate the availability with a availability bit.

Signed-off-by: Maíra Canal <mcanal@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231130164420.932823-19-mcanal@igalia.com

+198
+1
drivers/gpu/drm/v3d/v3d_drv.h
··· 322 322 V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY, 323 323 V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY, 324 324 V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY, 325 + V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY, 325 326 }; 326 327 327 328 struct v3d_timestamp_query {
+65
drivers/gpu/drm/v3d/v3d_sched.c
··· 452 452 } 453 453 } 454 454 455 + static void 456 + v3d_write_performance_query_result(struct v3d_cpu_job *job, void *data, u32 query) 457 + { 458 + struct v3d_performance_query_info *performance_query = &job->performance_query; 459 + struct v3d_copy_query_results_info *copy = &job->copy; 460 + struct v3d_file_priv *v3d_priv = job->base.file->driver_priv; 461 + struct v3d_dev *v3d = job->base.v3d; 462 + struct v3d_perfmon *perfmon; 463 + u64 counter_values[V3D_PERFCNT_NUM]; 464 + 465 + for (int i = 0; i < performance_query->nperfmons; i++) { 466 + perfmon = v3d_perfmon_find(v3d_priv, 467 + performance_query->queries[query].kperfmon_ids[i]); 468 + if (!perfmon) { 469 + DRM_DEBUG("Failed to find perfmon."); 470 + continue; 471 + } 472 + 473 + v3d_perfmon_stop(v3d, perfmon, true); 474 + 475 + memcpy(&counter_values[i * DRM_V3D_MAX_PERF_COUNTERS], perfmon->values, 476 + perfmon->ncounters * sizeof(u64)); 477 + 478 + v3d_perfmon_put(perfmon); 479 + } 480 + 481 + for (int i = 0; i < performance_query->ncounters; i++) 482 + write_to_buffer(data, i, copy->do_64bit, counter_values[i]); 483 + } 484 + 485 + static void 486 + v3d_copy_performance_query(struct v3d_cpu_job *job) 487 + { 488 + struct v3d_performance_query_info *performance_query = &job->performance_query; 489 + struct v3d_copy_query_results_info *copy = &job->copy; 490 + struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]); 491 + struct dma_fence *fence; 492 + bool available, write_result; 493 + u8 *data; 494 + 495 + v3d_get_bo_vaddr(bo); 496 + 497 + data = ((u8 *)bo->vaddr) + copy->offset; 498 + 499 + for (int i = 0; i < performance_query->count; i++) { 500 + fence = drm_syncobj_fence_get(performance_query->queries[i].syncobj); 501 + available = fence ? dma_fence_is_signaled(fence) : false; 502 + 503 + write_result = available || copy->do_partial; 504 + if (write_result) 505 + v3d_write_performance_query_result(job, data, i); 506 + 507 + if (copy->availability_bit) 508 + write_to_buffer(data, performance_query->ncounters, 509 + copy->do_64bit, available ? 1u : 0u); 510 + 511 + data += copy->stride; 512 + 513 + dma_fence_put(fence); 514 + } 515 + 516 + v3d_put_bo_vaddr(bo); 517 + } 518 + 455 519 static const v3d_cpu_job_fn cpu_job_function[] = { 456 520 [V3D_CPU_JOB_TYPE_INDIRECT_CSD] = v3d_rewrite_csd_job_wg_counts_from_indirect, 457 521 [V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY] = v3d_timestamp_query, 458 522 [V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY] = v3d_reset_timestamp_queries, 459 523 [V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY] = v3d_copy_query_results, 460 524 [V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY] = v3d_reset_performance_queries, 525 + [V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY] = v3d_copy_performance_query, 461 526 }; 462 527 463 528 static struct dma_fence *
+82
drivers/gpu/drm/v3d/v3d_submit.c
··· 672 672 return 0; 673 673 } 674 674 675 + static int 676 + v3d_get_cpu_copy_performance_query_params(struct drm_file *file_priv, 677 + struct drm_v3d_extension __user *ext, 678 + struct v3d_cpu_job *job) 679 + { 680 + u32 __user *syncs; 681 + u64 __user *kperfmon_ids; 682 + struct drm_v3d_copy_performance_query copy; 683 + 684 + if (!job) { 685 + DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 686 + return -EINVAL; 687 + } 688 + 689 + if (job->job_type) { 690 + DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 691 + return -EINVAL; 692 + } 693 + 694 + if (copy_from_user(&copy, ext, sizeof(copy))) 695 + return -EFAULT; 696 + 697 + if (copy.pad) 698 + return -EINVAL; 699 + 700 + job->job_type = V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY; 701 + 702 + job->performance_query.queries = kvmalloc_array(copy.count, 703 + sizeof(struct v3d_performance_query), 704 + GFP_KERNEL); 705 + if (!job->performance_query.queries) 706 + return -ENOMEM; 707 + 708 + syncs = u64_to_user_ptr(copy.syncs); 709 + kperfmon_ids = u64_to_user_ptr(copy.kperfmon_ids); 710 + 711 + for (int i = 0; i < copy.count; i++) { 712 + u32 sync; 713 + u64 ids; 714 + u32 __user *ids_pointer; 715 + u32 id; 716 + 717 + if (copy_from_user(&sync, syncs++, sizeof(sync))) { 718 + kvfree(job->performance_query.queries); 719 + return -EFAULT; 720 + } 721 + 722 + job->performance_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); 723 + 724 + if (copy_from_user(&ids, kperfmon_ids++, sizeof(ids))) { 725 + kvfree(job->performance_query.queries); 726 + return -EFAULT; 727 + } 728 + 729 + ids_pointer = u64_to_user_ptr(ids); 730 + 731 + for (int j = 0; j < copy.nperfmons; j++) { 732 + if (copy_from_user(&id, ids_pointer++, sizeof(id))) { 733 + kvfree(job->performance_query.queries); 734 + return -EFAULT; 735 + } 736 + 737 + job->performance_query.queries[i].kperfmon_ids[j] = id; 738 + } 739 + } 740 + job->performance_query.count = copy.count; 741 + job->performance_query.nperfmons = copy.nperfmons; 742 + job->performance_query.ncounters = copy.ncounters; 743 + 744 + job->copy.do_64bit = copy.do_64bit; 745 + job->copy.do_partial = copy.do_partial; 746 + job->copy.availability_bit = copy.availability_bit; 747 + job->copy.offset = copy.offset; 748 + job->copy.stride = copy.stride; 749 + 750 + return 0; 751 + } 752 + 675 753 /* Whenever userspace sets ioctl extensions, v3d_get_extensions parses data 676 754 * according to the extension id (name). 677 755 */ ··· 789 711 break; 790 712 case DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY: 791 713 ret = v3d_get_cpu_reset_performance_params(file_priv, user_ext, job); 714 + break; 715 + case DRM_V3D_EXT_ID_CPU_COPY_PERFORMANCE_QUERY: 716 + ret = v3d_get_cpu_copy_performance_query_params(file_priv, user_ext, job); 792 717 break; 793 718 default: 794 719 DRM_DEBUG_DRIVER("Unknown extension id: %d\n", ext.id); ··· 1173 1092 [V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY] = 1, 1174 1093 [V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY] = 2, 1175 1094 [V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY] = 0, 1095 + [V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY] = 1, 1176 1096 }; 1177 1097 1178 1098 /**
+50
include/uapi/drm/v3d_drm.h
··· 77 77 #define DRM_V3D_EXT_ID_CPU_RESET_TIMESTAMP_QUERY 0x04 78 78 #define DRM_V3D_EXT_ID_CPU_COPY_TIMESTAMP_QUERY 0x05 79 79 #define DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY 0x06 80 + #define DRM_V3D_EXT_ID_CPU_COPY_PERFORMANCE_QUERY 0x07 80 81 __u32 flags; /* mbz */ 81 82 }; 82 83 ··· 520 519 __u64 kperfmon_ids; 521 520 }; 522 521 522 + /** 523 + * struct drm_v3d_copy_performance_query - ioctl extension for the CPU job to copy 524 + * performance query results to a buffer 525 + * 526 + * When an extension DRM_V3D_EXT_ID_CPU_COPY_PERFORMANCE_QUERY is defined, it 527 + * points to this extension to define a copy performance query submission. This 528 + * CPU job will copy the performance queries results to a BO with the offset 529 + * and stride defined in the extension. 530 + */ 531 + struct drm_v3d_copy_performance_query { 532 + struct drm_v3d_extension base; 533 + 534 + /* Define if should write to buffer using 64 or 32 bits */ 535 + __u8 do_64bit; 536 + 537 + /* Define if it can write to buffer even if the query is not available */ 538 + __u8 do_partial; 539 + 540 + /* Define if it should write availability bit to buffer */ 541 + __u8 availability_bit; 542 + 543 + /* mbz */ 544 + __u8 pad; 545 + 546 + /* Offset of the buffer in the BO */ 547 + __u32 offset; 548 + 549 + /* Stride of the buffer in the BO */ 550 + __u32 stride; 551 + 552 + /* Number of performance monitors */ 553 + __u32 nperfmons; 554 + 555 + /* Number of performance counters related to this query pool */ 556 + __u32 ncounters; 557 + 558 + /* Number of queries */ 559 + __u32 count; 560 + 561 + /* Array of performance queries's syncobjs to indicate its availability */ 562 + __u64 syncs; 563 + 564 + /* Array of u64 user-pointers that point to an array of kperfmon_ids */ 565 + __u64 kperfmon_ids; 566 + }; 567 + 523 568 struct drm_v3d_submit_cpu { 524 569 /* Pointer to a u32 array of the BOs that are referenced by the job. 525 570 * ··· 584 537 * 585 538 * For DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY, it must contain no 586 539 * BOs. 540 + * 541 + * For DRM_V3D_EXT_ID_CPU_COPY_PERFORMANCE_QUERY, it must contain one 542 + * BO, where the performance queries will be written. 587 543 */ 588 544 __u64 bo_handles; 589 545