Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/v3d: Create a CPU job extension for the reset performance query job

A CPU job is a type of job that performs operations that requires CPU
intervention. A reset performance query job is a job that resets the
performance queries by resetting the values of the perfmons. Moreover,
we also reset the syncobjs related to the availability of the query.

So, create a user extension for the CPU job that enables the creation
of a reset performance job. This user extension will allow the creation of
a CPU job that resets the perfmons values and resets the availability syncobj.

Signed-off-by: Maíra Canal <mcanal@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231130164420.932823-18-mcanal@igalia.com

+167
+28
drivers/gpu/drm/v3d/v3d_drv.h
··· 321 321 V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY, 322 322 V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY, 323 323 V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY, 324 + V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY, 324 325 }; 325 326 326 327 struct v3d_timestamp_query { ··· 329 328 u32 offset; 330 329 331 330 /* Syncobj that indicates the timestamp availability */ 331 + struct drm_syncobj *syncobj; 332 + }; 333 + 334 + /* Number of perfmons required to handle all supported performance counters */ 335 + #define V3D_MAX_PERFMONS DIV_ROUND_UP(V3D_PERFCNT_NUM, \ 336 + DRM_V3D_MAX_PERF_COUNTERS) 337 + 338 + struct v3d_performance_query { 339 + /* Performance monitor IDs for this query */ 340 + u32 kperfmon_ids[V3D_MAX_PERFMONS]; 341 + 342 + /* Syncobj that indicates the query availability */ 332 343 struct drm_syncobj *syncobj; 333 344 }; 334 345 ··· 375 362 u32 count; 376 363 }; 377 364 365 + struct v3d_performance_query_info { 366 + struct v3d_performance_query *queries; 367 + 368 + /* Number of performance queries */ 369 + u32 count; 370 + 371 + /* Number of performance monitors related to that query pool */ 372 + u32 nperfmons; 373 + 374 + /* Number of performance counters related to that query pool */ 375 + u32 ncounters; 376 + }; 377 + 378 378 struct v3d_copy_query_results_info { 379 379 /* Define if should write to buffer using 64 or 32 bits */ 380 380 bool do_64bit; ··· 415 389 struct v3d_timestamp_query_info timestamp_query; 416 390 417 391 struct v3d_copy_query_results_info copy; 392 + 393 + struct v3d_performance_query_info performance_query; 418 394 }; 419 395 420 396 typedef void (*v3d_cpu_job_fn)(struct v3d_cpu_job *);
+36
drivers/gpu/drm/v3d/v3d_sched.c
··· 78 78 { 79 79 struct v3d_cpu_job *job = to_cpu_job(sched_job); 80 80 struct v3d_timestamp_query_info *timestamp_query = &job->timestamp_query; 81 + struct v3d_performance_query_info *performance_query = &job->performance_query; 81 82 82 83 if (timestamp_query->queries) { 83 84 for (int i = 0; i < timestamp_query->count; i++) 84 85 drm_syncobj_put(timestamp_query->queries[i].syncobj); 85 86 kvfree(timestamp_query->queries); 87 + } 88 + 89 + if (performance_query->queries) { 90 + for (int i = 0; i < performance_query->count; i++) 91 + drm_syncobj_put(performance_query->queries[i].syncobj); 92 + kvfree(performance_query->queries); 86 93 } 87 94 88 95 v3d_job_cleanup(&job->base); ··· 424 417 v3d_put_bo_vaddr(bo); 425 418 } 426 419 420 + static void 421 + v3d_reset_performance_queries(struct v3d_cpu_job *job) 422 + { 423 + struct v3d_performance_query_info *performance_query = &job->performance_query; 424 + struct v3d_file_priv *v3d_priv = job->base.file->driver_priv; 425 + struct v3d_dev *v3d = job->base.v3d; 426 + struct v3d_perfmon *perfmon; 427 + 428 + for (int i = 0; i < performance_query->count; i++) { 429 + for (int j = 0; j < performance_query->nperfmons; j++) { 430 + perfmon = v3d_perfmon_find(v3d_priv, 431 + performance_query->queries[i].kperfmon_ids[j]); 432 + if (!perfmon) { 433 + DRM_DEBUG("Failed to find perfmon."); 434 + continue; 435 + } 436 + 437 + v3d_perfmon_stop(v3d, perfmon, false); 438 + 439 + memset(perfmon->values, 0, perfmon->ncounters * sizeof(u64)); 440 + 441 + v3d_perfmon_put(perfmon); 442 + } 443 + 444 + drm_syncobj_replace_fence(performance_query->queries[i].syncobj, NULL); 445 + } 446 + } 447 + 427 448 static const v3d_cpu_job_fn cpu_job_function[] = { 428 449 [V3D_CPU_JOB_TYPE_INDIRECT_CSD] = v3d_rewrite_csd_job_wg_counts_from_indirect, 429 450 [V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY] = v3d_timestamp_query, 430 451 [V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY] = v3d_reset_timestamp_queries, 431 452 [V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY] = v3d_copy_query_results, 453 + [V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY] = v3d_reset_performance_queries, 432 454 }; 433 455 434 456 static struct dma_fence *
+73
drivers/gpu/drm/v3d/v3d_submit.c
··· 604 604 return 0; 605 605 } 606 606 607 + static int 608 + v3d_get_cpu_reset_performance_params(struct drm_file *file_priv, 609 + struct drm_v3d_extension __user *ext, 610 + struct v3d_cpu_job *job) 611 + { 612 + u32 __user *syncs; 613 + u64 __user *kperfmon_ids; 614 + struct drm_v3d_reset_performance_query reset; 615 + 616 + if (!job) { 617 + DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 618 + return -EINVAL; 619 + } 620 + 621 + if (job->job_type) { 622 + DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 623 + return -EINVAL; 624 + } 625 + 626 + if (copy_from_user(&reset, ext, sizeof(reset))) 627 + return -EFAULT; 628 + 629 + job->job_type = V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY; 630 + 631 + job->performance_query.queries = kvmalloc_array(reset.count, 632 + sizeof(struct v3d_performance_query), 633 + GFP_KERNEL); 634 + if (!job->performance_query.queries) 635 + return -ENOMEM; 636 + 637 + syncs = u64_to_user_ptr(reset.syncs); 638 + kperfmon_ids = u64_to_user_ptr(reset.kperfmon_ids); 639 + 640 + for (int i = 0; i < reset.count; i++) { 641 + u32 sync; 642 + u64 ids; 643 + u32 __user *ids_pointer; 644 + u32 id; 645 + 646 + if (copy_from_user(&sync, syncs++, sizeof(sync))) { 647 + kvfree(job->performance_query.queries); 648 + return -EFAULT; 649 + } 650 + 651 + job->performance_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); 652 + 653 + if (copy_from_user(&ids, kperfmon_ids++, sizeof(ids))) { 654 + kvfree(job->performance_query.queries); 655 + return -EFAULT; 656 + } 657 + 658 + ids_pointer = u64_to_user_ptr(ids); 659 + 660 + for (int j = 0; j < reset.nperfmons; j++) { 661 + if (copy_from_user(&id, ids_pointer++, sizeof(id))) { 662 + kvfree(job->performance_query.queries); 663 + return -EFAULT; 664 + } 665 + 666 + job->performance_query.queries[i].kperfmon_ids[j] = id; 667 + } 668 + } 669 + job->performance_query.count = reset.count; 670 + job->performance_query.nperfmons = reset.nperfmons; 671 + 672 + return 0; 673 + } 674 + 607 675 /* Whenever userspace sets ioctl extensions, v3d_get_extensions parses data 608 676 * according to the extension id (name). 609 677 */ ··· 708 640 break; 709 641 case DRM_V3D_EXT_ID_CPU_COPY_TIMESTAMP_QUERY: 710 642 ret = v3d_get_cpu_copy_query_results_params(file_priv, user_ext, job); 643 + break; 644 + case DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY: 645 + ret = v3d_get_cpu_reset_performance_params(file_priv, user_ext, job); 711 646 break; 712 647 default: 713 648 DRM_DEBUG_DRIVER("Unknown extension id: %d\n", ext.id); ··· 1091 1020 [V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY] = 1, 1092 1021 [V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY] = 1, 1093 1022 [V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY] = 2, 1023 + [V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY] = 0, 1094 1024 }; 1095 1025 1096 1026 /** ··· 1230 1158 v3d_job_cleanup(clean_job); 1231 1159 v3d_put_multisync_post_deps(&se); 1232 1160 kvfree(cpu_job->timestamp_query.queries); 1161 + kvfree(cpu_job->performance_query.queries); 1233 1162 1234 1163 return ret; 1235 1164 }
+30
include/uapi/drm/v3d_drm.h
··· 76 76 #define DRM_V3D_EXT_ID_CPU_TIMESTAMP_QUERY 0x03 77 77 #define DRM_V3D_EXT_ID_CPU_RESET_TIMESTAMP_QUERY 0x04 78 78 #define DRM_V3D_EXT_ID_CPU_COPY_TIMESTAMP_QUERY 0x05 79 + #define DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY 0x06 79 80 __u32 flags; /* mbz */ 80 81 }; 81 82 ··· 493 492 __u64 syncs; 494 493 }; 495 494 495 + /** 496 + * struct drm_v3d_reset_performance_query - ioctl extension for the CPU job to 497 + * reset performance queries 498 + * 499 + * When an extension DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY is defined, it 500 + * points to this extension to define a reset performance submission. This CPU 501 + * job will reset the performance queries by resetting the values of the 502 + * performance monitors. Moreover, it will reset the syncobj to reset query 503 + * availability. 504 + */ 505 + struct drm_v3d_reset_performance_query { 506 + struct drm_v3d_extension base; 507 + 508 + /* Array of performance queries's syncobjs to indicate its availability */ 509 + __u64 syncs; 510 + 511 + /* Number of queries */ 512 + __u32 count; 513 + 514 + /* Number of performance monitors */ 515 + __u32 nperfmons; 516 + 517 + /* Array of u64 user-pointers that point to an array of kperfmon_ids */ 518 + __u64 kperfmon_ids; 519 + }; 520 + 496 521 struct drm_v3d_submit_cpu { 497 522 /* Pointer to a u32 array of the BOs that are referenced by the job. 498 523 * ··· 534 507 * For DRM_V3D_EXT_ID_CPU_COPY_TIMESTAMP_QUERY, it must contain two 535 508 * BOs. The first is the BO where the timestamp queries will be written 536 509 * to. The second is the BO that contains the timestamp. 510 + * 511 + * For DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY, it must contain no 512 + * BOs. 537 513 */ 538 514 __u64 bo_handles; 539 515