commit 9bca5bcdeb0d157084a3de6ab1b17424eb875d10 · tjh.dev/kernel

+5 -5

drivers/accel/ivpu/ivpu_drv.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0-only 2 2 /* 3 - * Copyright (C) 2020-2024 Intel Corporation 3 + * Copyright (C) 2020-2025 Intel Corporation 4 4 */ 5 5 6 6 #include <linux/firmware.h> ··· 164 164 args->value = vdev->platform; 165 165 break; 166 166 case DRM_IVPU_PARAM_CORE_CLOCK_RATE: 167 - args->value = ivpu_hw_ratio_to_freq(vdev, vdev->hw->pll.max_ratio); 167 + args->value = ivpu_hw_dpu_max_freq_get(vdev); 168 168 break; 169 169 case DRM_IVPU_PARAM_NUM_CONTEXTS: 170 170 args->value = ivpu_get_context_count(vdev); ··· 421 421 { 422 422 ivpu_hw_irq_disable(vdev); 423 423 disable_irq(vdev->irq); 424 - cancel_work_sync(&vdev->irq_ipc_work); 425 - cancel_work_sync(&vdev->irq_dct_work); 426 - cancel_work_sync(&vdev->context_abort_work); 424 + flush_work(&vdev->irq_ipc_work); 425 + flush_work(&vdev->irq_dct_work); 426 + flush_work(&vdev->context_abort_work); 427 427 ivpu_ipc_disable(vdev); 428 428 ivpu_mmu_disable(vdev); 429 429 }

+13 -4

drivers/accel/ivpu/ivpu_fw.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0-only 2 2 /* 3 - * Copyright (C) 2020-2024 Intel Corporation 3 + * Copyright (C) 2020-2025 Intel Corporation 4 4 */ 5 5 6 6 #include <linux/firmware.h> ··· 233 233 fw->dvfs_mode = 0; 234 234 235 235 fw->sched_mode = ivpu_fw_sched_mode_select(vdev, fw_hdr); 236 - fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_size; 237 - fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_size; 238 236 ivpu_info(vdev, "Scheduler mode: %s\n", fw->sched_mode ? "HW" : "OS"); 237 + 238 + if (fw_hdr->preemption_buffer_1_max_size) 239 + fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_max_size; 240 + else 241 + fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_size; 242 + 243 + if (fw_hdr->preemption_buffer_2_max_size) 244 + fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_max_size; 245 + else 246 + fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_size; 247 + ivpu_dbg(vdev, FW_BOOT, "Preemption buffer sizes: primary %u, secondary %u\n", 248 + fw->primary_preempt_buf_size, fw->secondary_preempt_buf_size); 239 249 240 250 if (fw_hdr->ro_section_start_address && !is_within_range(fw_hdr->ro_section_start_address, 241 251 fw_hdr->ro_section_size, ··· 576 566 577 567 boot_params->magic = VPU_BOOT_PARAMS_MAGIC; 578 568 boot_params->vpu_id = to_pci_dev(vdev->drm.dev)->bus->number; 579 - boot_params->frequency = ivpu_hw_pll_freq_get(vdev); 580 569 581 570 /* 582 571 * This param is a debug firmware feature. It switches default clock

+8 -8

drivers/accel/ivpu/ivpu_hw.h

··· 1 1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 2 /* 3 - * Copyright (C) 2020-2024 Intel Corporation 3 + * Copyright (C) 2020-2025 Intel Corporation 4 4 */ 5 5 6 6 #ifndef __IVPU_HW_H__ ··· 82 82 return range->end - range->start; 83 83 } 84 84 85 - static inline u32 ivpu_hw_ratio_to_freq(struct ivpu_device *vdev, u32 ratio) 85 + static inline u32 ivpu_hw_dpu_max_freq_get(struct ivpu_device *vdev) 86 86 { 87 - return ivpu_hw_btrs_ratio_to_freq(vdev, ratio); 87 + return ivpu_hw_btrs_dpu_max_freq_get(vdev); 88 + } 89 + 90 + static inline u32 ivpu_hw_dpu_freq_get(struct ivpu_device *vdev) 91 + { 92 + return ivpu_hw_btrs_dpu_freq_get(vdev); 88 93 } 89 94 90 95 static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev) 91 96 { 92 97 ivpu_hw_ip_irq_clear(vdev); 93 - } 94 - 95 - static inline u32 ivpu_hw_pll_freq_get(struct ivpu_device *vdev) 96 - { 97 - return ivpu_hw_btrs_pll_freq_get(vdev); 98 98 } 99 99 100 100 static inline u32 ivpu_hw_profiling_freq_get(struct ivpu_device *vdev)

+64 -70

drivers/accel/ivpu/ivpu_hw_btrs.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0-only 2 2 /* 3 - * Copyright (C) 2020-2024 Intel Corporation 3 + * Copyright (C) 2020-2025 Intel Corporation 4 4 */ 5 + 6 + #include <linux/units.h> 5 7 6 8 #include "ivpu_drv.h" 7 9 #include "ivpu_hw.h" ··· 30 28 31 29 #define BTRS_LNL_ALL_IRQ_MASK ((u32)-1) 32 30 33 - #define BTRS_MTL_WP_CONFIG_1_TILE_5_3_RATIO WP_CONFIG(MTL_CONFIG_1_TILE, MTL_PLL_RATIO_5_3) 34 - #define BTRS_MTL_WP_CONFIG_1_TILE_4_3_RATIO WP_CONFIG(MTL_CONFIG_1_TILE, MTL_PLL_RATIO_4_3) 35 - #define BTRS_MTL_WP_CONFIG_2_TILE_5_3_RATIO WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_5_3) 36 - #define BTRS_MTL_WP_CONFIG_2_TILE_4_3_RATIO WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_4_3) 37 - #define BTRS_MTL_WP_CONFIG_0_TILE_PLL_OFF WP_CONFIG(0, 0) 38 31 39 32 #define PLL_CDYN_DEFAULT 0x80 40 33 #define PLL_EPP_DEFAULT 0x80 41 34 #define PLL_CONFIG_DEFAULT 0x0 42 - #define PLL_SIMULATION_FREQ 10000000 43 - #define PLL_REF_CLK_FREQ 50000000 35 + #define PLL_REF_CLK_FREQ 50000000ull 36 + #define PLL_RATIO_TO_FREQ(x) ((x) * PLL_REF_CLK_FREQ) 37 + 44 38 #define PLL_TIMEOUT_US (1500 * USEC_PER_MSEC) 45 39 #define IDLE_TIMEOUT_US (5 * USEC_PER_MSEC) 46 40 #define TIMEOUT_US (150 * USEC_PER_MSEC) ··· 59 61 #define DCT_REQ 0x2 60 62 #define DCT_ENABLE 0x1 61 63 #define DCT_DISABLE 0x0 64 + 65 + static u32 pll_ratio_to_dpu_freq(struct ivpu_device *vdev, u32 ratio); 62 66 63 67 int ivpu_hw_btrs_irqs_clear_with_0_mtl(struct ivpu_device *vdev) 64 68 { ··· 156 156 157 157 hw->tile_fuse = BTRS_MTL_TILE_FUSE_ENABLE_BOTH; 158 158 hw->sku = BTRS_MTL_TILE_SKU_BOTH; 159 - hw->config = BTRS_MTL_WP_CONFIG_2_TILE_4_3_RATIO; 159 + hw->config = WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_4_3); 160 160 161 161 return 0; 162 162 } ··· 334 334 335 335 prepare_wp_request(vdev, &wp, enable); 336 336 337 - ivpu_dbg(vdev, PM, "PLL workpoint request: %u Hz, config: 0x%x, epp: 0x%x, cdyn: 0x%x\n", 338 - PLL_RATIO_TO_FREQ(wp.target), wp.cfg, wp.epp, wp.cdyn); 337 + ivpu_dbg(vdev, PM, "PLL workpoint request: %lu MHz, config: 0x%x, epp: 0x%x, cdyn: 0x%x\n", 338 + pll_ratio_to_dpu_freq(vdev, wp.target) / HZ_PER_MHZ, wp.cfg, wp.epp, wp.cdyn); 339 339 340 340 ret = wp_request_send(vdev, &wp); 341 341 if (ret) { ··· 573 573 return REGB_POLL_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US); 574 574 } 575 575 576 + static u32 pll_config_get_mtl(struct ivpu_device *vdev) 577 + { 578 + return REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL); 579 + } 580 + 581 + static u32 pll_config_get_lnl(struct ivpu_device *vdev) 582 + { 583 + return REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ); 584 + } 585 + 586 + static u32 pll_ratio_to_dpu_freq_mtl(u16 ratio) 587 + { 588 + return (PLL_RATIO_TO_FREQ(ratio) * 2) / 3; 589 + } 590 + 591 + static u32 pll_ratio_to_dpu_freq_lnl(u16 ratio) 592 + { 593 + return PLL_RATIO_TO_FREQ(ratio) / 2; 594 + } 595 + 596 + static u32 pll_ratio_to_dpu_freq(struct ivpu_device *vdev, u32 ratio) 597 + { 598 + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) 599 + return pll_ratio_to_dpu_freq_mtl(ratio); 600 + else 601 + return pll_ratio_to_dpu_freq_lnl(ratio); 602 + } 603 + 604 + u32 ivpu_hw_btrs_dpu_max_freq_get(struct ivpu_device *vdev) 605 + { 606 + return pll_ratio_to_dpu_freq(vdev, vdev->hw->pll.max_ratio); 607 + } 608 + 609 + u32 ivpu_hw_btrs_dpu_freq_get(struct ivpu_device *vdev) 610 + { 611 + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) 612 + return pll_ratio_to_dpu_freq_mtl(pll_config_get_mtl(vdev)); 613 + else 614 + return pll_ratio_to_dpu_freq_lnl(pll_config_get_lnl(vdev)); 615 + } 616 + 576 617 /* Handler for IRQs from Buttress core (irqB) */ 577 618 bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq) 578 619 { ··· 623 582 if (!status) 624 583 return false; 625 584 626 - if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, FREQ_CHANGE, status)) 627 - ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x", 628 - REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL)); 585 + if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, FREQ_CHANGE, status)) { 586 + u32 pll = pll_config_get_mtl(vdev); 587 + 588 + ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq, wp %08x, %lu MHz", 589 + pll, pll_ratio_to_dpu_freq_mtl(pll) / HZ_PER_MHZ); 590 + } 629 591 630 592 if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, ATS_ERR, status)) { 631 593 ivpu_err(vdev, "ATS_ERR irq 0x%016llx", REGB_RD64(VPU_HW_BTRS_MTL_ATS_ERR_LOG_0)); ··· 677 633 queue_work(system_wq, &vdev->irq_dct_work); 678 634 } 679 635 680 - if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, FREQ_CHANGE, status)) 681 - ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x", REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ)); 636 + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, FREQ_CHANGE, status)) { 637 + u32 pll = pll_config_get_lnl(vdev); 638 + 639 + ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq, wp %08x, %lu MHz", 640 + pll, pll_ratio_to_dpu_freq_lnl(pll) / HZ_PER_MHZ); 641 + } 682 642 683 643 if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, ATS_ERR, status)) { 684 644 ivpu_err(vdev, "ATS_ERR LOG1 0x%08x ATS_ERR_LOG2 0x%08x\n", ··· 763 715 val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS, PARAM2, active_percent, val); 764 716 765 717 REGB_WR32(VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS, val); 766 - } 767 - 768 - static u32 pll_ratio_to_freq_mtl(u32 ratio, u32 config) 769 - { 770 - u32 pll_clock = PLL_REF_CLK_FREQ * ratio; 771 - u32 cpu_clock; 772 - 773 - if ((config & 0xff) == MTL_PLL_RATIO_4_3) 774 - cpu_clock = pll_clock * 2 / 4; 775 - else 776 - cpu_clock = pll_clock * 2 / 5; 777 - 778 - return cpu_clock; 779 - } 780 - 781 - u32 ivpu_hw_btrs_ratio_to_freq(struct ivpu_device *vdev, u32 ratio) 782 - { 783 - struct ivpu_hw_info *hw = vdev->hw; 784 - 785 - if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) 786 - return pll_ratio_to_freq_mtl(ratio, hw->config); 787 - else 788 - return PLL_RATIO_TO_FREQ(ratio); 789 - } 790 - 791 - static u32 pll_freq_get_mtl(struct ivpu_device *vdev) 792 - { 793 - u32 pll_curr_ratio; 794 - 795 - pll_curr_ratio = REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL); 796 - pll_curr_ratio &= VPU_HW_BTRS_MTL_CURRENT_PLL_RATIO_MASK; 797 - 798 - if (!ivpu_is_silicon(vdev)) 799 - return PLL_SIMULATION_FREQ; 800 - 801 - return pll_ratio_to_freq_mtl(pll_curr_ratio, vdev->hw->config); 802 - } 803 - 804 - static u32 pll_freq_get_lnl(struct ivpu_device *vdev) 805 - { 806 - u32 pll_curr_ratio; 807 - 808 - pll_curr_ratio = REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ); 809 - pll_curr_ratio &= VPU_HW_BTRS_LNL_PLL_FREQ_RATIO_MASK; 810 - 811 - return PLL_RATIO_TO_FREQ(pll_curr_ratio); 812 - } 813 - 814 - u32 ivpu_hw_btrs_pll_freq_get(struct ivpu_device *vdev) 815 - { 816 - if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) 817 - return pll_freq_get_mtl(vdev); 818 - else 819 - return pll_freq_get_lnl(vdev); 820 718 } 821 719 822 720 u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev)

+3 -4

drivers/accel/ivpu/ivpu_hw_btrs.h

··· 1 1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 2 /* 3 - * Copyright (C) 2020-2024 Intel Corporation 3 + * Copyright (C) 2020-2025 Intel Corporation 4 4 */ 5 5 6 6 #ifndef __IVPU_HW_BTRS_H__ ··· 13 13 14 14 #define PLL_PROFILING_FREQ_DEFAULT 38400000 15 15 #define PLL_PROFILING_FREQ_HIGH 400000000 16 - #define PLL_RATIO_TO_FREQ(x) ((x) * PLL_REF_CLK_FREQ) 17 16 18 17 #define DCT_DEFAULT_ACTIVE_PERCENT 15u 19 18 #define DCT_PERIOD_US 35300u ··· 31 32 void ivpu_hw_btrs_profiling_freq_reg_set_lnl(struct ivpu_device *vdev); 32 33 void ivpu_hw_btrs_ats_print_lnl(struct ivpu_device *vdev); 33 34 void ivpu_hw_btrs_clock_relinquish_disable_lnl(struct ivpu_device *vdev); 35 + u32 ivpu_hw_btrs_dpu_max_freq_get(struct ivpu_device *vdev); 36 + u32 ivpu_hw_btrs_dpu_freq_get(struct ivpu_device *vdev); 34 37 bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq); 35 38 bool ivpu_hw_btrs_irq_handler_lnl(struct ivpu_device *vdev, int irq); 36 39 int ivpu_hw_btrs_dct_get_request(struct ivpu_device *vdev, bool *enable); 37 40 void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u32 dct_percent); 38 - u32 ivpu_hw_btrs_pll_freq_get(struct ivpu_device *vdev); 39 - u32 ivpu_hw_btrs_ratio_to_freq(struct ivpu_device *vdev, u32 ratio); 40 41 u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev); 41 42 u32 ivpu_hw_btrs_telemetry_size_get(struct ivpu_device *vdev); 42 43 u32 ivpu_hw_btrs_telemetry_enable_get(struct ivpu_device *vdev);

+8 -7

drivers/accel/ivpu/ivpu_job.c

··· 470 470 struct ivpu_device *vdev = job->vdev; 471 471 u32 i; 472 472 473 - ivpu_dbg(vdev, JOB, "Job destroyed: id %3u ctx %2d engine %d", 474 - job->job_id, job->file_priv->ctx.id, job->engine_idx); 473 + ivpu_dbg(vdev, JOB, "Job destroyed: id %3u ctx %2d cmdq_id %u engine %d", 474 + job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx); 475 475 476 476 for (i = 0; i < job->bo_count; i++) 477 477 if (job->bos[i]) ··· 564 564 dma_fence_signal(job->done_fence); 565 565 566 566 trace_job("done", job); 567 - ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d engine %d status 0x%x\n", 568 - job->job_id, job->file_priv->ctx.id, job->engine_idx, job_status); 567 + ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d cmdq_id %u engine %d status 0x%x\n", 568 + job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx, job_status); 569 569 570 570 ivpu_job_destroy(job); 571 571 ivpu_stop_job_timeout_detection(vdev); ··· 664 664 } 665 665 666 666 trace_job("submit", job); 667 - ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d engine %d prio %d addr 0x%llx next %d\n", 668 - job->job_id, file_priv->ctx.id, job->engine_idx, cmdq->priority, 667 + ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d cmdq_id %u engine %d prio %d addr 0x%llx next %d\n", 668 + job->job_id, file_priv->ctx.id, cmdq->id, job->engine_idx, cmdq->priority, 669 669 job->cmd_buf_vpu_addr, cmdq->jobq->header.tail); 670 670 671 671 mutex_unlock(&file_priv->lock); ··· 777 777 goto err_free_handles; 778 778 } 779 779 780 - ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u buf_count %u\n", file_priv->ctx.id, buffer_count); 780 + ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u cmdq_id %u buf_count %u\n", 781 + file_priv->ctx.id, cmdq_id, buffer_count); 781 782 782 783 job = ivpu_job_create(file_priv, engine, buffer_count); 783 784 if (!job) {

+48 -1

drivers/accel/ivpu/ivpu_sysfs.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0-only 2 2 /* 3 - * Copyright (C) 2024 Intel Corporation 3 + * Copyright (C) 2024-2025 Intel Corporation 4 4 */ 5 5 6 6 #include <linux/device.h> 7 7 #include <linux/err.h> 8 + #include <linux/pm_runtime.h> 9 + #include <linux/units.h> 8 10 9 11 #include "ivpu_drv.h" 10 12 #include "ivpu_gem.h" ··· 92 90 93 91 static DEVICE_ATTR_RO(sched_mode); 94 92 93 + /** 94 + * DOC: npu_max_frequency 95 + * 96 + * The npu_max_frequency shows maximum frequency in MHz of the NPU's data 97 + * processing unit 98 + */ 99 + static ssize_t 100 + npu_max_frequency_mhz_show(struct device *dev, struct device_attribute *attr, char *buf) 101 + { 102 + struct drm_device *drm = dev_get_drvdata(dev); 103 + struct ivpu_device *vdev = to_ivpu_device(drm); 104 + u32 freq = ivpu_hw_dpu_max_freq_get(vdev); 105 + 106 + return sysfs_emit(buf, "%lu\n", freq / HZ_PER_MHZ); 107 + } 108 + 109 + static DEVICE_ATTR_RO(npu_max_frequency_mhz); 110 + 111 + /** 112 + * DOC: npu_current_frequency_mhz 113 + * 114 + * The npu_current_frequency_mhz shows current frequency in MHz of the NPU's 115 + * data processing unit 116 + */ 117 + static ssize_t 118 + npu_current_frequency_mhz_show(struct device *dev, struct device_attribute *attr, char *buf) 119 + { 120 + struct drm_device *drm = dev_get_drvdata(dev); 121 + struct ivpu_device *vdev = to_ivpu_device(drm); 122 + u32 freq = 0; 123 + 124 + /* Read frequency only if device is active, otherwise frequency is 0 */ 125 + if (pm_runtime_get_if_active(vdev->drm.dev) > 0) { 126 + freq = ivpu_hw_dpu_freq_get(vdev); 127 + 128 + pm_runtime_put_autosuspend(vdev->drm.dev); 129 + } 130 + 131 + return sysfs_emit(buf, "%lu\n", freq / HZ_PER_MHZ); 132 + } 133 + 134 + static DEVICE_ATTR_RO(npu_current_frequency_mhz); 135 + 95 136 static struct attribute *ivpu_dev_attrs[] = { 96 137 &dev_attr_npu_busy_time_us.attr, 97 138 &dev_attr_npu_memory_utilization.attr, 98 139 &dev_attr_sched_mode.attr, 140 + &dev_attr_npu_max_frequency_mhz.attr, 141 + &dev_attr_npu_current_frequency_mhz.attr, 99 142 NULL, 100 143 }; 101 144

+10 -3

drivers/accel/ivpu/vpu_boot_api.h

··· 26 26 * Minor version changes when API backward compatibility is preserved. 27 27 * Resets to 0 if Major version is incremented. 28 28 */ 29 - #define VPU_BOOT_API_VER_MINOR 26 29 + #define VPU_BOOT_API_VER_MINOR 28 30 30 31 31 /* 32 32 * API header changed (field names, documentation, formatting) but API itself has not been changed ··· 76 76 * submission queue size and device capabilities. 77 77 */ 78 78 u32 preemption_buffer_2_size; 79 + /* 80 + * Maximum preemption buffer size that the FW can use: no need for the host 81 + * driver to allocate more space than that specified by these fields. 82 + * A value of 0 means no declared limit. 83 + */ 84 + u32 preemption_buffer_1_max_size; 85 + u32 preemption_buffer_2_max_size; 79 86 /* Space reserved for future preemption-related fields. */ 80 - u32 preemption_reserved[6]; 87 + u32 preemption_reserved[4]; 81 88 /* FW image read only section start address, 4KB aligned */ 82 89 u64 ro_section_start_address; 83 90 /* FW image read only section size, 4KB aligned */ ··· 141 134 /* 142 135 * Processor bit shifts (for loggable HW components). 143 136 */ 144 - #define VPU_TRACE_PROC_BIT_ARM 0 137 + #define VPU_TRACE_PROC_BIT_RESERVED 0 145 138 #define VPU_TRACE_PROC_BIT_LRT 1 146 139 #define VPU_TRACE_PROC_BIT_LNN 2 147 140 #define VPU_TRACE_PROC_BIT_SHV_0 3

+36 -17

drivers/accel/ivpu/vpu_jsm_api.h

··· 22 22 /* 23 23 * Minor version changes when API backward compatibility is preserved. 24 24 */ 25 - #define VPU_JSM_API_VER_MINOR 25 25 + #define VPU_JSM_API_VER_MINOR 29 26 26 27 27 /* 28 28 * API header changed (field names, documentation, formatting) but API itself has not been changed ··· 53 53 * Engine indexes. 54 54 */ 55 55 #define VPU_ENGINE_COMPUTE 0 56 - #define VPU_ENGINE_COPY 1 57 - #define VPU_ENGINE_NB 2 56 + #define VPU_ENGINE_NB 1 58 57 59 58 /* 60 59 * VPU status values. ··· 125 126 * When set, indicates that job queue uses native fences (as inline commands 126 127 * in job queue). Such queues may also use legacy fences (as commands in batch buffers). 127 128 * When cleared, indicates the job queue only uses legacy fences. 128 - * NOTE: For queues using native fences, VPU expects that all jobs in the queue 129 - * are immediately followed by an inline command object. This object is expected 130 - * to be a fence signal command in most cases, but can also be a NOP in case the host 131 - * does not need per-job fence signalling. Other inline commands objects can be 132 - * inserted between "job and inline command" pairs. 129 + * NOTES: 130 + * 1. For queues using native fences, VPU expects that all jobs in the queue 131 + * are immediately followed by an inline command object. This object is expected 132 + * to be a fence signal command in most cases, but can also be a NOP in case the host 133 + * does not need per-job fence signalling. Other inline commands objects can be 134 + * inserted between "job and inline command" pairs. 135 + * 2. Native fence queues are only supported on VPU 40xx onwards. 133 136 */ 134 137 VPU_JOB_QUEUE_FLAGS_USE_NATIVE_FENCE_MASK = (1 << 1U), 135 138 ··· 276 275 u64 value; 277 276 /* User VA of the log buffer in which to add log entry on completion. */ 278 277 u64 log_buffer_va; 278 + /* NPU private data. */ 279 + u64 npu_private_data; 279 280 } fence; 280 281 /* Other commands do not have a payload. */ 281 282 /* Payload definition for future inline commands can be inserted here. */ ··· 794 791 /** Metric group mask that identifies metric streamer instance. */ 795 792 u64 metric_group_mask; 796 793 /** 797 - * Address and size of the buffer where the VPU will write metric data. If 798 - * the buffer address is 0 or same as the currently used buffer the VPU will 799 - * continue writing metric data to the current buffer. In this case the 800 - * buffer size is ignored and the size of the current buffer is unchanged. 801 - * If the address is non-zero and differs from the current buffer address the 802 - * VPU will immediately switch data collection to the new buffer. 794 + * Address and size of the buffer where the VPU will write metric data. 795 + * This member dictates how the update operation should perform: 796 + * 1. client needs information about the number of collected samples and the 797 + * amount of data written to the current buffer 798 + * 2. client wants to switch to a new buffer 799 + * 800 + * Case 1. is identified by the buffer address being 0 or the same as the 801 + * currently used buffer address. In this case the buffer size is ignored and 802 + * the size of the current buffer is unchanged. The VPU will return an update 803 + * in the vpu_jsm_metric_streamer_done structure. The internal writing position 804 + * into the buffer is not changed. 805 + * 806 + * Case 2. is identified by the address being non-zero and differs from the 807 + * current buffer address. The VPU will immediately switch data collection to 808 + * the new buffer. Then the VPU will return an update in the 809 + * vpu_jsm_metric_streamer_done structure. 803 810 */ 804 811 u64 buffer_addr; 805 812 u64 buffer_size; ··· 947 934 /* 948 935 * Default quantum in 100ns units for scheduling across processes 949 936 * within a priority band 937 + * Minimum value supported by NPU is 1ms (10000 in 100ns units). 950 938 */ 951 939 u32 process_quantum[VPU_HWS_NUM_PRIORITY_BANDS]; 952 940 /* ··· 960 946 * in situations when it's starved by the focus band. 961 947 */ 962 948 u32 normal_band_percentage; 963 - /* Reserved */ 964 - u32 reserved_0; 949 + /* 950 + * TDR timeout value in milliseconds. Default value of 0 meaning no timeout. 951 + */ 952 + u32 tdr_timeout; 965 953 }; 966 954 967 955 /* ··· 1040 1024 s32 in_process_priority; 1041 1025 /* Zero padding / Reserved */ 1042 1026 u32 reserved_1; 1043 - /* Context quantum relative to other contexts of same priority in the same process */ 1027 + /* 1028 + * Context quantum relative to other contexts of same priority in the same process 1029 + * Minimum value supported by NPU is 1ms (10000 in 100ns units). 1030 + */ 1044 1031 u64 context_quantum; 1045 1032 /* Grace period when preempting context of the same priority within the same process */ 1046 1033 u64 grace_period_same_priority;

+14 -5

drivers/dma-buf/sw_sync.c

··· 438 438 return -EINVAL; 439 439 440 440 pt = dma_fence_to_sync_pt(fence); 441 - if (!pt) 442 - return -EINVAL; 441 + if (!pt) { 442 + ret = -EINVAL; 443 + goto put_fence; 444 + } 443 445 444 446 spin_lock_irqsave(fence->lock, flags); 445 - if (test_bit(SW_SYNC_HAS_DEADLINE_BIT, &fence->flags)) { 446 - data.deadline_ns = ktime_to_ns(pt->deadline); 447 - } else { 447 + if (!test_bit(SW_SYNC_HAS_DEADLINE_BIT, &fence->flags)) { 448 448 ret = -ENOENT; 449 + goto unlock; 449 450 } 451 + data.deadline_ns = ktime_to_ns(pt->deadline); 450 452 spin_unlock_irqrestore(fence->lock, flags); 451 453 452 454 dma_fence_put(fence); ··· 460 458 return -EFAULT; 461 459 462 460 return 0; 461 + 462 + unlock: 463 + spin_unlock_irqrestore(fence->lock, flags); 464 + put_fence: 465 + dma_fence_put(fence); 466 + 467 + return ret; 463 468 } 464 469 465 470 static long sw_sync_ioctl(struct file *file, unsigned int cmd,

+1 -1

drivers/gpu/drm/mgag200/mgag200_mode.c

··· 223 223 vsyncstr = mode->crtc_vsync_start - 1; 224 224 vsyncend = mode->crtc_vsync_end - 1; 225 225 vtotal = mode->crtc_vtotal - 2; 226 - vblkstr = mode->crtc_vblank_start; 226 + vblkstr = mode->crtc_vblank_start - 1; 227 227 vblkend = vtotal + 1; 228 228 229 229 linecomp = vdispend;

+13 -3

drivers/gpu/drm/v3d/v3d_sched.c

··· 428 428 struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]); 429 429 struct v3d_bo *indirect = to_v3d_bo(indirect_csd->indirect); 430 430 struct drm_v3d_submit_csd *args = &indirect_csd->job->args; 431 - u32 *wg_counts; 431 + struct v3d_dev *v3d = job->base.v3d; 432 + u32 num_batches, *wg_counts; 432 433 433 434 v3d_get_bo_vaddr(bo); 434 435 v3d_get_bo_vaddr(indirect); ··· 442 441 args->cfg[0] = wg_counts[0] << V3D_CSD_CFG012_WG_COUNT_SHIFT; 443 442 args->cfg[1] = wg_counts[1] << V3D_CSD_CFG012_WG_COUNT_SHIFT; 444 443 args->cfg[2] = wg_counts[2] << V3D_CSD_CFG012_WG_COUNT_SHIFT; 445 - args->cfg[4] = DIV_ROUND_UP(indirect_csd->wg_size, 16) * 446 - (wg_counts[0] * wg_counts[1] * wg_counts[2]) - 1; 444 + 445 + num_batches = DIV_ROUND_UP(indirect_csd->wg_size, 16) * 446 + (wg_counts[0] * wg_counts[1] * wg_counts[2]); 447 + 448 + /* V3D 7.1.6 and later don't subtract 1 from the number of batches */ 449 + if (v3d->ver < 71 || (v3d->ver == 71 && v3d->rev < 6)) 450 + args->cfg[4] = num_batches - 1; 451 + else 452 + args->cfg[4] = num_batches; 453 + 454 + WARN_ON(args->cfg[4] == ~0); 447 455 448 456 for (int i = 0; i < 3; i++) { 449 457 /* 0xffffffff indicates that the uniform rewrite is not needed */

+1 -2

include/drm/drm_gem.h

··· 585 585 */ 586 586 static inline bool drm_gem_is_imported(const struct drm_gem_object *obj) 587 587 { 588 - /* The dma-buf's priv field points to the original GEM object. */ 589 - return obj->dma_buf && (obj->dma_buf->priv != obj); 588 + return !!obj->import_attach; 590 589 } 591 590 592 591 #ifdef CONFIG_LOCKDEP

+2 -2

include/uapi/drm/ivpu_accel.h

··· 1 1 /* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ 2 2 /* 3 - * Copyright (C) 2020-2024 Intel Corporation 3 + * Copyright (C) 2020-2025 Intel Corporation 4 4 */ 5 5 6 6 #ifndef __UAPI_IVPU_DRM_H__ ··· 147 147 * platform type when executing on a simulator or emulator (read-only) 148 148 * 149 149 * %DRM_IVPU_PARAM_CORE_CLOCK_RATE: 150 - * Current PLL frequency (read-only) 150 + * Maximum frequency of the NPU data processing unit clock (read-only) 151 151 * 152 152 * %DRM_IVPU_PARAM_NUM_CONTEXTS: 153 153 * Maximum number of simultaneously existing contexts (read-only)