commit 51c7960b87f465d01ea8d8ff174e81dd69f3b2b4 · tjh.dev/kernel

tjh.dev / kernel

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Merge tag 'drm-fixes-2025-04-19' of https://gitlab.freedesktop.org/drm/kernel

Pull drm fixes from Dave Airlie:
"Easter rc3 pull request, fixes in all the usuals, amdgpu, xe, msm,
with some i915/ivpu/mgag200/v3d fixes, then a couple of bits in
dma-buf/gem.

Hopefully has no easter eggs in it.

dma-buf:
- Correctly decrement refcounter on errors

gem:
- Fix test for imported buffers

amdgpu:
- Cleaner shader sysfs fix
- Suspend fix
- Fix doorbell free ordering
- Video caps fix
- DML2 memory allocation optimization
- HDP fix

i915:
- Fix DP DSC configurations that require 3 DSC engines per pipe

xe:
- Fix LRC address being written too late for GuC
- Fix notifier vs folio deadlock
- Fix race betwen dma_buf unmap and vram eviction
- Fix debugfs handling PXP terminations unconditionally

msm:
- Display:
- Fix to call dpu_plane_atomic_check_pipe() for both SSPPs in
case of multi-rect
- Fix to validate plane_state pointer before using it in
dpu_plane_virtual_atomic_check()
- Fix to make sure dereferencing dpu_encoder_phys happens after
making sure it is valid in _dpu_encoder_trigger_start()
- Remove the remaining intr_tear_rd_ptr which we initialized to
-1 because NO_IRQ indices start from 0 now
- GPU:
- Fix IB_SIZE overflow

ivpu:
- Fix debugging
- Fixes to frequency
- Support firmware API 3.28.3
- Flush jobs upon reset

mgag200:
- Set vblank start to correct values

v3d:
- Fix Indirect Dispatch"

* tag 'drm-fixes-2025-04-19' of https://gitlab.freedesktop.org/drm/kernel: (26 commits)
drm/msm/a6xx+: Don't let IB_SIZE overflow
drm/xe/pxp: do not queue unneeded terminations from debugfs
drm/xe/dma_buf: stop relying on placement in unmap
drm/xe/userptr: fix notifier vs folio deadlock
drm/xe: Set LRC addresses before guc load
drm/mgag200: Fix value in <VBLKSTR> register
drm/gem: Internally test import_attach for imported objects
drm/amdgpu: Use the right function for hdp flush
drm/amd/display/dml2: use vzalloc rather than kzalloc
drm/amdgpu: Add back JPEG to video caps for carrizo and newer
drm/amdgpu: fix warning of drm_mm_clean
drm/amd: Forbid suspending into non-default suspend states
drm/amdgpu: use a dummy owner for sysfs triggered cleaner shaders v4
drm/i915/dp: Check for HAS_DSC_3ENGINES while configuring DSC slices
drm/i915/display: Add macro for checking 3 DSC engines
dma-buf/sw_sync: Decrement refcount on error in sw_sync_ioctl_get_deadline()
accel/ivpu: Add cmdq_id to job related logs
accel/ivpu: Show NPU frequency in sysfs
accel/ivpu: Fix the NPU's DPU frequency calculation
accel/ivpu: Update FW Boot API to version 3.28.3
...

Linus Torvalds 9 months ago 51c7960b 8560697b

+407 -285

47 changed files

expand all

unified split

drivers

accel

ivpu

ivpu_drv.c

ivpu_fw.c

ivpu_hw.h

ivpu_hw_btrs.c

ivpu_hw_btrs.h

ivpu_job.c

ivpu_sysfs.c

vpu_boot_api.h

vpu_jsm_api.h

dma-buf

sw_sync.c

gpu

drm

amd

amdgpu

amdgpu.h

amdgpu_device.c

amdgpu_drv.c

amdgpu_gfx.c

gfx_v10_0.c

gfx_v11_0.c

gfx_v12_0.c

gmc_v10_0.c

gmc_v11_0.c

gmc_v12_0.c

gmc_v9_0.c

psp_v11_0.c

psp_v13_0.c

psp_v14_0.c

vi.c

display

dml2

dml21

dml21_wrapper.c

dml2_wrapper.c

i915

display

intel_display_device.h

intel_dp.c

mgag200

mgag200_mode.c

msm

adreno

a6xx_gpu.c

disp

dpu1

catalog

dpu_1_14_msm8937.h

dpu_1_15_msm8917.h

dpu_1_16_msm8953.h

dpu_1_7_msm8996.h

dpu_3_2_sdm660.h

dpu_3_3_sdm630.h

dpu_encoder.c

dpu_plane.c

registers

adreno

adreno_pm4.xml

v3d

v3d_sched.c

xe_dma_buf.c

xe_guc_ads.c

xe_hmm.c

xe_pxp_debugfs.c

include

drm

drm_gem.h

uapi

drm

ivpu_accel.h

+5 -5

drivers/accel/ivpu/ivpu_drv.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0-only 2 2 /* 3 - * Copyright (C) 2020-2024 Intel Corporation 3 + * Copyright (C) 2020-2025 Intel Corporation 4 4 */ 5 5 6 6 #include <linux/firmware.h> ··· 164 164 args->value = vdev->platform; 165 165 break; 166 166 case DRM_IVPU_PARAM_CORE_CLOCK_RATE: 167 - args->value = ivpu_hw_ratio_to_freq(vdev, vdev->hw->pll.max_ratio); 167 + args->value = ivpu_hw_dpu_max_freq_get(vdev); 168 168 break; 169 169 case DRM_IVPU_PARAM_NUM_CONTEXTS: 170 170 args->value = ivpu_get_context_count(vdev); ··· 421 421 { 422 422 ivpu_hw_irq_disable(vdev); 423 423 disable_irq(vdev->irq); 424 - cancel_work_sync(&vdev->irq_ipc_work); 425 - cancel_work_sync(&vdev->irq_dct_work); 426 - cancel_work_sync(&vdev->context_abort_work); 424 + flush_work(&vdev->irq_ipc_work); 425 + flush_work(&vdev->irq_dct_work); 426 + flush_work(&vdev->context_abort_work); 427 427 ivpu_ipc_disable(vdev); 428 428 ivpu_mmu_disable(vdev); 429 429 }

+13 -4

drivers/accel/ivpu/ivpu_fw.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0-only 2 2 /* 3 - * Copyright (C) 2020-2024 Intel Corporation 3 + * Copyright (C) 2020-2025 Intel Corporation 4 4 */ 5 5 6 6 #include <linux/firmware.h> ··· 233 233 fw->dvfs_mode = 0; 234 234 235 235 fw->sched_mode = ivpu_fw_sched_mode_select(vdev, fw_hdr); 236 - fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_size; 237 - fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_size; 238 236 ivpu_info(vdev, "Scheduler mode: %s\n", fw->sched_mode ? "HW" : "OS"); 237 + 238 + if (fw_hdr->preemption_buffer_1_max_size) 239 + fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_max_size; 240 + else 241 + fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_size; 242 + 243 + if (fw_hdr->preemption_buffer_2_max_size) 244 + fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_max_size; 245 + else 246 + fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_size; 247 + ivpu_dbg(vdev, FW_BOOT, "Preemption buffer sizes: primary %u, secondary %u\n", 248 + fw->primary_preempt_buf_size, fw->secondary_preempt_buf_size); 239 249 240 250 if (fw_hdr->ro_section_start_address && !is_within_range(fw_hdr->ro_section_start_address, 241 251 fw_hdr->ro_section_size, ··· 576 566 577 567 boot_params->magic = VPU_BOOT_PARAMS_MAGIC; 578 568 boot_params->vpu_id = to_pci_dev(vdev->drm.dev)->bus->number; 579 - boot_params->frequency = ivpu_hw_pll_freq_get(vdev); 580 569 581 570 /* 582 571 * This param is a debug firmware feature. It switches default clock

+8 -8

drivers/accel/ivpu/ivpu_hw.h

··· 1 1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 2 /* 3 - * Copyright (C) 2020-2024 Intel Corporation 3 + * Copyright (C) 2020-2025 Intel Corporation 4 4 */ 5 5 6 6 #ifndef __IVPU_HW_H__ ··· 82 82 return range->end - range->start; 83 83 } 84 84 85 - static inline u32 ivpu_hw_ratio_to_freq(struct ivpu_device *vdev, u32 ratio) 85 + static inline u32 ivpu_hw_dpu_max_freq_get(struct ivpu_device *vdev) 86 86 { 87 - return ivpu_hw_btrs_ratio_to_freq(vdev, ratio); 87 + return ivpu_hw_btrs_dpu_max_freq_get(vdev); 88 + } 89 + 90 + static inline u32 ivpu_hw_dpu_freq_get(struct ivpu_device *vdev) 91 + { 92 + return ivpu_hw_btrs_dpu_freq_get(vdev); 88 93 } 89 94 90 95 static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev) 91 96 { 92 97 ivpu_hw_ip_irq_clear(vdev); 93 - } 94 - 95 - static inline u32 ivpu_hw_pll_freq_get(struct ivpu_device *vdev) 96 - { 97 - return ivpu_hw_btrs_pll_freq_get(vdev); 98 98 } 99 99 100 100 static inline u32 ivpu_hw_profiling_freq_get(struct ivpu_device *vdev)

+64 -70

drivers/accel/ivpu/ivpu_hw_btrs.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0-only 2 2 /* 3 - * Copyright (C) 2020-2024 Intel Corporation 3 + * Copyright (C) 2020-2025 Intel Corporation 4 4 */ 5 + 6 + #include <linux/units.h> 5 7 6 8 #include "ivpu_drv.h" 7 9 #include "ivpu_hw.h" ··· 30 28 31 29 #define BTRS_LNL_ALL_IRQ_MASK ((u32)-1) 32 30 33 - #define BTRS_MTL_WP_CONFIG_1_TILE_5_3_RATIO WP_CONFIG(MTL_CONFIG_1_TILE, MTL_PLL_RATIO_5_3) 34 - #define BTRS_MTL_WP_CONFIG_1_TILE_4_3_RATIO WP_CONFIG(MTL_CONFIG_1_TILE, MTL_PLL_RATIO_4_3) 35 - #define BTRS_MTL_WP_CONFIG_2_TILE_5_3_RATIO WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_5_3) 36 - #define BTRS_MTL_WP_CONFIG_2_TILE_4_3_RATIO WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_4_3) 37 - #define BTRS_MTL_WP_CONFIG_0_TILE_PLL_OFF WP_CONFIG(0, 0) 38 31 39 32 #define PLL_CDYN_DEFAULT 0x80 40 33 #define PLL_EPP_DEFAULT 0x80 41 34 #define PLL_CONFIG_DEFAULT 0x0 42 - #define PLL_SIMULATION_FREQ 10000000 43 - #define PLL_REF_CLK_FREQ 50000000 35 + #define PLL_REF_CLK_FREQ 50000000ull 36 + #define PLL_RATIO_TO_FREQ(x) ((x) * PLL_REF_CLK_FREQ) 37 + 44 38 #define PLL_TIMEOUT_US (1500 * USEC_PER_MSEC) 45 39 #define IDLE_TIMEOUT_US (5 * USEC_PER_MSEC) 46 40 #define TIMEOUT_US (150 * USEC_PER_MSEC) ··· 59 61 #define DCT_REQ 0x2 60 62 #define DCT_ENABLE 0x1 61 63 #define DCT_DISABLE 0x0 64 + 65 + static u32 pll_ratio_to_dpu_freq(struct ivpu_device *vdev, u32 ratio); 62 66 63 67 int ivpu_hw_btrs_irqs_clear_with_0_mtl(struct ivpu_device *vdev) 64 68 { ··· 156 156 157 157 hw->tile_fuse = BTRS_MTL_TILE_FUSE_ENABLE_BOTH; 158 158 hw->sku = BTRS_MTL_TILE_SKU_BOTH; 159 - hw->config = BTRS_MTL_WP_CONFIG_2_TILE_4_3_RATIO; 159 + hw->config = WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_4_3); 160 160 161 161 return 0; 162 162 } ··· 334 334 335 335 prepare_wp_request(vdev, &wp, enable); 336 336 337 - ivpu_dbg(vdev, PM, "PLL workpoint request: %u Hz, config: 0x%x, epp: 0x%x, cdyn: 0x%x\n", 338 - PLL_RATIO_TO_FREQ(wp.target), wp.cfg, wp.epp, wp.cdyn); 337 + ivpu_dbg(vdev, PM, "PLL workpoint request: %lu MHz, config: 0x%x, epp: 0x%x, cdyn: 0x%x\n", 338 + pll_ratio_to_dpu_freq(vdev, wp.target) / HZ_PER_MHZ, wp.cfg, wp.epp, wp.cdyn); 339 339 340 340 ret = wp_request_send(vdev, &wp); 341 341 if (ret) { ··· 573 573 return REGB_POLL_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US); 574 574 } 575 575 576 + static u32 pll_config_get_mtl(struct ivpu_device *vdev) 577 + { 578 + return REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL); 579 + } 580 + 581 + static u32 pll_config_get_lnl(struct ivpu_device *vdev) 582 + { 583 + return REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ); 584 + } 585 + 586 + static u32 pll_ratio_to_dpu_freq_mtl(u16 ratio) 587 + { 588 + return (PLL_RATIO_TO_FREQ(ratio) * 2) / 3; 589 + } 590 + 591 + static u32 pll_ratio_to_dpu_freq_lnl(u16 ratio) 592 + { 593 + return PLL_RATIO_TO_FREQ(ratio) / 2; 594 + } 595 + 596 + static u32 pll_ratio_to_dpu_freq(struct ivpu_device *vdev, u32 ratio) 597 + { 598 + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) 599 + return pll_ratio_to_dpu_freq_mtl(ratio); 600 + else 601 + return pll_ratio_to_dpu_freq_lnl(ratio); 602 + } 603 + 604 + u32 ivpu_hw_btrs_dpu_max_freq_get(struct ivpu_device *vdev) 605 + { 606 + return pll_ratio_to_dpu_freq(vdev, vdev->hw->pll.max_ratio); 607 + } 608 + 609 + u32 ivpu_hw_btrs_dpu_freq_get(struct ivpu_device *vdev) 610 + { 611 + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) 612 + return pll_ratio_to_dpu_freq_mtl(pll_config_get_mtl(vdev)); 613 + else 614 + return pll_ratio_to_dpu_freq_lnl(pll_config_get_lnl(vdev)); 615 + } 616 + 576 617 /* Handler for IRQs from Buttress core (irqB) */ 577 618 bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq) 578 619 { ··· 623 582 if (!status) 624 583 return false; 625 584 626 - if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, FREQ_CHANGE, status)) 627 - ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x", 628 - REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL)); 585 + if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, FREQ_CHANGE, status)) { 586 + u32 pll = pll_config_get_mtl(vdev); 587 + 588 + ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq, wp %08x, %lu MHz", 589 + pll, pll_ratio_to_dpu_freq_mtl(pll) / HZ_PER_MHZ); 590 + } 629 591 630 592 if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, ATS_ERR, status)) { 631 593 ivpu_err(vdev, "ATS_ERR irq 0x%016llx", REGB_RD64(VPU_HW_BTRS_MTL_ATS_ERR_LOG_0)); ··· 677 633 queue_work(system_wq, &vdev->irq_dct_work); 678 634 } 679 635 680 - if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, FREQ_CHANGE, status)) 681 - ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x", REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ)); 636 + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, FREQ_CHANGE, status)) { 637 + u32 pll = pll_config_get_lnl(vdev); 638 + 639 + ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq, wp %08x, %lu MHz", 640 + pll, pll_ratio_to_dpu_freq_lnl(pll) / HZ_PER_MHZ); 641 + } 682 642 683 643 if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, ATS_ERR, status)) { 684 644 ivpu_err(vdev, "ATS_ERR LOG1 0x%08x ATS_ERR_LOG2 0x%08x\n", ··· 763 715 val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS, PARAM2, active_percent, val); 764 716 765 717 REGB_WR32(VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS, val); 766 - } 767 - 768 - static u32 pll_ratio_to_freq_mtl(u32 ratio, u32 config) 769 - { 770 - u32 pll_clock = PLL_REF_CLK_FREQ * ratio; 771 - u32 cpu_clock; 772 - 773 - if ((config & 0xff) == MTL_PLL_RATIO_4_3) 774 - cpu_clock = pll_clock * 2 / 4; 775 - else 776 - cpu_clock = pll_clock * 2 / 5; 777 - 778 - return cpu_clock; 779 - } 780 - 781 - u32 ivpu_hw_btrs_ratio_to_freq(struct ivpu_device *vdev, u32 ratio) 782 - { 783 - struct ivpu_hw_info *hw = vdev->hw; 784 - 785 - if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) 786 - return pll_ratio_to_freq_mtl(ratio, hw->config); 787 - else 788 - return PLL_RATIO_TO_FREQ(ratio); 789 - } 790 - 791 - static u32 pll_freq_get_mtl(struct ivpu_device *vdev) 792 - { 793 - u32 pll_curr_ratio; 794 - 795 - pll_curr_ratio = REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL); 796 - pll_curr_ratio &= VPU_HW_BTRS_MTL_CURRENT_PLL_RATIO_MASK; 797 - 798 - if (!ivpu_is_silicon(vdev)) 799 - return PLL_SIMULATION_FREQ; 800 - 801 - return pll_ratio_to_freq_mtl(pll_curr_ratio, vdev->hw->config); 802 - } 803 - 804 - static u32 pll_freq_get_lnl(struct ivpu_device *vdev) 805 - { 806 - u32 pll_curr_ratio; 807 - 808 - pll_curr_ratio = REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ); 809 - pll_curr_ratio &= VPU_HW_BTRS_LNL_PLL_FREQ_RATIO_MASK; 810 - 811 - return PLL_RATIO_TO_FREQ(pll_curr_ratio); 812 - } 813 - 814 - u32 ivpu_hw_btrs_pll_freq_get(struct ivpu_device *vdev) 815 - { 816 - if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) 817 - return pll_freq_get_mtl(vdev); 818 - else 819 - return pll_freq_get_lnl(vdev); 820 718 } 821 719 822 720 u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev)

+3 -4

drivers/accel/ivpu/ivpu_hw_btrs.h

··· 1 1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 2 /* 3 - * Copyright (C) 2020-2024 Intel Corporation 3 + * Copyright (C) 2020-2025 Intel Corporation 4 4 */ 5 5 6 6 #ifndef __IVPU_HW_BTRS_H__ ··· 13 13 14 14 #define PLL_PROFILING_FREQ_DEFAULT 38400000 15 15 #define PLL_PROFILING_FREQ_HIGH 400000000 16 - #define PLL_RATIO_TO_FREQ(x) ((x) * PLL_REF_CLK_FREQ) 17 16 18 17 #define DCT_DEFAULT_ACTIVE_PERCENT 15u 19 18 #define DCT_PERIOD_US 35300u ··· 31 32 void ivpu_hw_btrs_profiling_freq_reg_set_lnl(struct ivpu_device *vdev); 32 33 void ivpu_hw_btrs_ats_print_lnl(struct ivpu_device *vdev); 33 34 void ivpu_hw_btrs_clock_relinquish_disable_lnl(struct ivpu_device *vdev); 35 + u32 ivpu_hw_btrs_dpu_max_freq_get(struct ivpu_device *vdev); 36 + u32 ivpu_hw_btrs_dpu_freq_get(struct ivpu_device *vdev); 34 37 bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq); 35 38 bool ivpu_hw_btrs_irq_handler_lnl(struct ivpu_device *vdev, int irq); 36 39 int ivpu_hw_btrs_dct_get_request(struct ivpu_device *vdev, bool *enable); 37 40 void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u32 dct_percent); 38 - u32 ivpu_hw_btrs_pll_freq_get(struct ivpu_device *vdev); 39 - u32 ivpu_hw_btrs_ratio_to_freq(struct ivpu_device *vdev, u32 ratio); 40 41 u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev); 41 42 u32 ivpu_hw_btrs_telemetry_size_get(struct ivpu_device *vdev); 42 43 u32 ivpu_hw_btrs_telemetry_enable_get(struct ivpu_device *vdev);

+8 -7

drivers/accel/ivpu/ivpu_job.c

··· 470 470 struct ivpu_device *vdev = job->vdev; 471 471 u32 i; 472 472 473 - ivpu_dbg(vdev, JOB, "Job destroyed: id %3u ctx %2d engine %d", 474 - job->job_id, job->file_priv->ctx.id, job->engine_idx); 473 + ivpu_dbg(vdev, JOB, "Job destroyed: id %3u ctx %2d cmdq_id %u engine %d", 474 + job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx); 475 475 476 476 for (i = 0; i < job->bo_count; i++) 477 477 if (job->bos[i]) ··· 564 564 dma_fence_signal(job->done_fence); 565 565 566 566 trace_job("done", job); 567 - ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d engine %d status 0x%x\n", 568 - job->job_id, job->file_priv->ctx.id, job->engine_idx, job_status); 567 + ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d cmdq_id %u engine %d status 0x%x\n", 568 + job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx, job_status); 569 569 570 570 ivpu_job_destroy(job); 571 571 ivpu_stop_job_timeout_detection(vdev); ··· 664 664 } 665 665 666 666 trace_job("submit", job); 667 - ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d engine %d prio %d addr 0x%llx next %d\n", 668 - job->job_id, file_priv->ctx.id, job->engine_idx, cmdq->priority, 667 + ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d cmdq_id %u engine %d prio %d addr 0x%llx next %d\n", 668 + job->job_id, file_priv->ctx.id, cmdq->id, job->engine_idx, cmdq->priority, 669 669 job->cmd_buf_vpu_addr, cmdq->jobq->header.tail); 670 670 671 671 mutex_unlock(&file_priv->lock); ··· 777 777 goto err_free_handles; 778 778 } 779 779 780 - ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u buf_count %u\n", file_priv->ctx.id, buffer_count); 780 + ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u cmdq_id %u buf_count %u\n", 781 + file_priv->ctx.id, cmdq_id, buffer_count); 781 782 782 783 job = ivpu_job_create(file_priv, engine, buffer_count); 783 784 if (!job) {

+48 -1

drivers/accel/ivpu/ivpu_sysfs.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0-only 2 2 /* 3 - * Copyright (C) 2024 Intel Corporation 3 + * Copyright (C) 2024-2025 Intel Corporation 4 4 */ 5 5 6 6 #include <linux/device.h> 7 7 #include <linux/err.h> 8 + #include <linux/pm_runtime.h> 9 + #include <linux/units.h> 8 10 9 11 #include "ivpu_drv.h" 10 12 #include "ivpu_gem.h" ··· 92 90 93 91 static DEVICE_ATTR_RO(sched_mode); 94 92 93 + /** 94 + * DOC: npu_max_frequency 95 + * 96 + * The npu_max_frequency shows maximum frequency in MHz of the NPU's data 97 + * processing unit 98 + */ 99 + static ssize_t 100 + npu_max_frequency_mhz_show(struct device *dev, struct device_attribute *attr, char *buf) 101 + { 102 + struct drm_device *drm = dev_get_drvdata(dev); 103 + struct ivpu_device *vdev = to_ivpu_device(drm); 104 + u32 freq = ivpu_hw_dpu_max_freq_get(vdev); 105 + 106 + return sysfs_emit(buf, "%lu\n", freq / HZ_PER_MHZ); 107 + } 108 + 109 + static DEVICE_ATTR_RO(npu_max_frequency_mhz); 110 + 111 + /** 112 + * DOC: npu_current_frequency_mhz 113 + * 114 + * The npu_current_frequency_mhz shows current frequency in MHz of the NPU's 115 + * data processing unit 116 + */ 117 + static ssize_t 118 + npu_current_frequency_mhz_show(struct device *dev, struct device_attribute *attr, char *buf) 119 + { 120 + struct drm_device *drm = dev_get_drvdata(dev); 121 + struct ivpu_device *vdev = to_ivpu_device(drm); 122 + u32 freq = 0; 123 + 124 + /* Read frequency only if device is active, otherwise frequency is 0 */ 125 + if (pm_runtime_get_if_active(vdev->drm.dev) > 0) { 126 + freq = ivpu_hw_dpu_freq_get(vdev); 127 + 128 + pm_runtime_put_autosuspend(vdev->drm.dev); 129 + } 130 + 131 + return sysfs_emit(buf, "%lu\n", freq / HZ_PER_MHZ); 132 + } 133 + 134 + static DEVICE_ATTR_RO(npu_current_frequency_mhz); 135 + 95 136 static struct attribute *ivpu_dev_attrs[] = { 96 137 &dev_attr_npu_busy_time_us.attr, 97 138 &dev_attr_npu_memory_utilization.attr, 98 139 &dev_attr_sched_mode.attr, 140 + &dev_attr_npu_max_frequency_mhz.attr, 141 + &dev_attr_npu_current_frequency_mhz.attr, 99 142 NULL, 100 143 }; 101 144

+10 -3

drivers/accel/ivpu/vpu_boot_api.h

··· 26 26 * Minor version changes when API backward compatibility is preserved. 27 27 * Resets to 0 if Major version is incremented. 28 28 */ 29 - #define VPU_BOOT_API_VER_MINOR 26 29 + #define VPU_BOOT_API_VER_MINOR 28 30 30 31 31 /* 32 32 * API header changed (field names, documentation, formatting) but API itself has not been changed ··· 76 76 * submission queue size and device capabilities. 77 77 */ 78 78 u32 preemption_buffer_2_size; 79 + /* 80 + * Maximum preemption buffer size that the FW can use: no need for the host 81 + * driver to allocate more space than that specified by these fields. 82 + * A value of 0 means no declared limit. 83 + */ 84 + u32 preemption_buffer_1_max_size; 85 + u32 preemption_buffer_2_max_size; 79 86 /* Space reserved for future preemption-related fields. */ 80 - u32 preemption_reserved[6]; 87 + u32 preemption_reserved[4]; 81 88 /* FW image read only section start address, 4KB aligned */ 82 89 u64 ro_section_start_address; 83 90 /* FW image read only section size, 4KB aligned */ ··· 141 134 /* 142 135 * Processor bit shifts (for loggable HW components). 143 136 */ 144 - #define VPU_TRACE_PROC_BIT_ARM 0 137 + #define VPU_TRACE_PROC_BIT_RESERVED 0 145 138 #define VPU_TRACE_PROC_BIT_LRT 1 146 139 #define VPU_TRACE_PROC_BIT_LNN 2 147 140 #define VPU_TRACE_PROC_BIT_SHV_0 3

+36 -17

drivers/accel/ivpu/vpu_jsm_api.h

··· 22 22 /* 23 23 * Minor version changes when API backward compatibility is preserved. 24 24 */ 25 - #define VPU_JSM_API_VER_MINOR 25 25 + #define VPU_JSM_API_VER_MINOR 29 26 26 27 27 /* 28 28 * API header changed (field names, documentation, formatting) but API itself has not been changed ··· 53 53 * Engine indexes. 54 54 */ 55 55 #define VPU_ENGINE_COMPUTE 0 56 - #define VPU_ENGINE_COPY 1 57 - #define VPU_ENGINE_NB 2 56 + #define VPU_ENGINE_NB 1 58 57 59 58 /* 60 59 * VPU status values. ··· 125 126 * When set, indicates that job queue uses native fences (as inline commands 126 127 * in job queue). Such queues may also use legacy fences (as commands in batch buffers). 127 128 * When cleared, indicates the job queue only uses legacy fences. 128 - * NOTE: For queues using native fences, VPU expects that all jobs in the queue 129 - * are immediately followed by an inline command object. This object is expected 130 - * to be a fence signal command in most cases, but can also be a NOP in case the host 131 - * does not need per-job fence signalling. Other inline commands objects can be 132 - * inserted between "job and inline command" pairs. 129 + * NOTES: 130 + * 1. For queues using native fences, VPU expects that all jobs in the queue 131 + * are immediately followed by an inline command object. This object is expected 132 + * to be a fence signal command in most cases, but can also be a NOP in case the host 133 + * does not need per-job fence signalling. Other inline commands objects can be 134 + * inserted between "job and inline command" pairs. 135 + * 2. Native fence queues are only supported on VPU 40xx onwards. 133 136 */ 134 137 VPU_JOB_QUEUE_FLAGS_USE_NATIVE_FENCE_MASK = (1 << 1U), 135 138 ··· 276 275 u64 value; 277 276 /* User VA of the log buffer in which to add log entry on completion. */ 278 277 u64 log_buffer_va; 278 + /* NPU private data. */ 279 + u64 npu_private_data; 279 280 } fence; 280 281 /* Other commands do not have a payload. */ 281 282 /* Payload definition for future inline commands can be inserted here. */ ··· 794 791 /** Metric group mask that identifies metric streamer instance. */ 795 792 u64 metric_group_mask; 796 793 /** 797 - * Address and size of the buffer where the VPU will write metric data. If 798 - * the buffer address is 0 or same as the currently used buffer the VPU will 799 - * continue writing metric data to the current buffer. In this case the 800 - * buffer size is ignored and the size of the current buffer is unchanged. 801 - * If the address is non-zero and differs from the current buffer address the 802 - * VPU will immediately switch data collection to the new buffer. 794 + * Address and size of the buffer where the VPU will write metric data. 795 + * This member dictates how the update operation should perform: 796 + * 1. client needs information about the number of collected samples and the 797 + * amount of data written to the current buffer 798 + * 2. client wants to switch to a new buffer 799 + * 800 + * Case 1. is identified by the buffer address being 0 or the same as the 801 + * currently used buffer address. In this case the buffer size is ignored and 802 + * the size of the current buffer is unchanged. The VPU will return an update 803 + * in the vpu_jsm_metric_streamer_done structure. The internal writing position 804 + * into the buffer is not changed. 805 + * 806 + * Case 2. is identified by the address being non-zero and differs from the 807 + * current buffer address. The VPU will immediately switch data collection to 808 + * the new buffer. Then the VPU will return an update in the 809 + * vpu_jsm_metric_streamer_done structure. 803 810 */ 804 811 u64 buffer_addr; 805 812 u64 buffer_size; ··· 947 934 /* 948 935 * Default quantum in 100ns units for scheduling across processes 949 936 * within a priority band 937 + * Minimum value supported by NPU is 1ms (10000 in 100ns units). 950 938 */ 951 939 u32 process_quantum[VPU_HWS_NUM_PRIORITY_BANDS]; 952 940 /* ··· 960 946 * in situations when it's starved by the focus band. 961 947 */ 962 948 u32 normal_band_percentage; 963 - /* Reserved */ 964 - u32 reserved_0; 949 + /* 950 + * TDR timeout value in milliseconds. Default value of 0 meaning no timeout. 951 + */ 952 + u32 tdr_timeout; 965 953 }; 966 954 967 955 /* ··· 1040 1024 s32 in_process_priority; 1041 1025 /* Zero padding / Reserved */ 1042 1026 u32 reserved_1; 1043 - /* Context quantum relative to other contexts of same priority in the same process */ 1027 + /* 1028 + * Context quantum relative to other contexts of same priority in the same process 1029 + * Minimum value supported by NPU is 1ms (10000 in 100ns units). 1030 + */ 1044 1031 u64 context_quantum; 1045 1032 /* Grace period when preempting context of the same priority within the same process */ 1046 1033 u64 grace_period_same_priority;

+14 -5

drivers/dma-buf/sw_sync.c

··· 438 438 return -EINVAL; 439 439 440 440 pt = dma_fence_to_sync_pt(fence); 441 - if (!pt) 442 - return -EINVAL; 441 + if (!pt) { 442 + ret = -EINVAL; 443 + goto put_fence; 444 + } 443 445 444 446 spin_lock_irqsave(fence->lock, flags); 445 - if (test_bit(SW_SYNC_HAS_DEADLINE_BIT, &fence->flags)) { 446 - data.deadline_ns = ktime_to_ns(pt->deadline); 447 - } else { 447 + if (!test_bit(SW_SYNC_HAS_DEADLINE_BIT, &fence->flags)) { 448 448 ret = -ENOENT; 449 + goto unlock; 449 450 } 451 + data.deadline_ns = ktime_to_ns(pt->deadline); 450 452 spin_unlock_irqrestore(fence->lock, flags); 451 453 452 454 dma_fence_put(fence); ··· 460 458 return -EFAULT; 461 459 462 460 return 0; 461 + 462 + unlock: 463 + spin_unlock_irqrestore(fence->lock, flags); 464 + put_fence: 465 + dma_fence_put(fence); 466 + 467 + return ret; 463 468 } 464 469 465 470 static long sw_sync_ioctl(struct file *file, unsigned int cmd,

drivers/gpu/drm/amd/amdgpu/amdgpu.h

··· 1123 1123 bool in_s3; 1124 1124 bool in_s4; 1125 1125 bool in_s0ix; 1126 + suspend_state_t last_suspend_state; 1126 1127 1127 1128 enum pp_mp1_state mp1_state; 1128 1129 struct amdgpu_doorbell_index doorbell_index;

+1 -1

drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

··· 3510 3510 amdgpu_device_mem_scratch_fini(adev); 3511 3511 amdgpu_ib_pool_fini(adev); 3512 3512 amdgpu_seq64_fini(adev); 3513 + amdgpu_doorbell_fini(adev); 3513 3514 } 3514 3515 if (adev->ip_blocks[i].version->funcs->sw_fini) { 3515 3516 r = adev->ip_blocks[i].version->funcs->sw_fini(&adev->ip_blocks[i]); ··· 4859 4858 4860 4859 iounmap(adev->rmmio); 4861 4860 adev->rmmio = NULL; 4862 - amdgpu_doorbell_fini(adev); 4863 4861 drm_dev_exit(idx); 4864 4862 } 4865 4863

+13 -1

drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

··· 2548 2548 adev->in_s0ix = true; 2549 2549 else if (amdgpu_acpi_is_s3_active(adev)) 2550 2550 adev->in_s3 = true; 2551 - if (!adev->in_s0ix && !adev->in_s3) 2551 + if (!adev->in_s0ix && !adev->in_s3) { 2552 + /* don't allow going deep first time followed by s2idle the next time */ 2553 + if (adev->last_suspend_state != PM_SUSPEND_ON && 2554 + adev->last_suspend_state != pm_suspend_target_state) { 2555 + drm_err_once(drm_dev, "Unsupported suspend state %d\n", 2556 + pm_suspend_target_state); 2557 + return -EINVAL; 2558 + } 2552 2559 return 0; 2560 + } 2561 + 2562 + /* cache the state last used for suspend */ 2563 + adev->last_suspend_state = pm_suspend_target_state; 2564 + 2553 2565 return amdgpu_device_suspend(drm_dev, true); 2554 2566 } 2555 2567

+11 -3

drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c

··· 1438 1438 struct amdgpu_device *adev = ring->adev; 1439 1439 struct drm_gpu_scheduler *sched = &ring->sched; 1440 1440 struct drm_sched_entity entity; 1441 + static atomic_t counter; 1441 1442 struct dma_fence *f; 1442 1443 struct amdgpu_job *job; 1443 1444 struct amdgpu_ib *ib; 1445 + void *owner; 1444 1446 int i, r; 1445 1447 1446 1448 /* Initialize the scheduler entity */ ··· 1453 1451 goto err; 1454 1452 } 1455 1453 1456 - r = amdgpu_job_alloc_with_ib(ring->adev, &entity, NULL, 1457 - 64, 0, 1458 - &job); 1454 + /* 1455 + * Use some unique dummy value as the owner to make sure we execute 1456 + * the cleaner shader on each submission. The value just need to change 1457 + * for each submission and is otherwise meaningless. 1458 + */ 1459 + owner = (void *)(unsigned long)atomic_inc_return(&counter); 1460 + 1461 + r = amdgpu_job_alloc_with_ib(ring->adev, &entity, owner, 1462 + 64, 0, &job); 1459 1463 if (r) 1460 1464 goto err; 1461 1465

+4 -4

drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c

··· 6114 6114 } 6115 6115 6116 6116 if (amdgpu_emu_mode == 1) 6117 - adev->hdp.funcs->flush_hdp(adev, NULL); 6117 + amdgpu_device_flush_hdp(adev, NULL); 6118 6118 6119 6119 tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL); 6120 6120 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); ··· 6192 6192 } 6193 6193 6194 6194 if (amdgpu_emu_mode == 1) 6195 - adev->hdp.funcs->flush_hdp(adev, NULL); 6195 + amdgpu_device_flush_hdp(adev, NULL); 6196 6196 6197 6197 tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_CNTL); 6198 6198 tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, VMID, 0); ··· 6269 6269 } 6270 6270 6271 6271 if (amdgpu_emu_mode == 1) 6272 - adev->hdp.funcs->flush_hdp(adev, NULL); 6272 + amdgpu_device_flush_hdp(adev, NULL); 6273 6273 6274 6274 tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_CNTL); 6275 6275 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); ··· 6644 6644 } 6645 6645 6646 6646 if (amdgpu_emu_mode == 1) 6647 - adev->hdp.funcs->flush_hdp(adev, NULL); 6647 + amdgpu_device_flush_hdp(adev, NULL); 6648 6648 6649 6649 tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL); 6650 6650 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);

+6 -6

drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c

··· 2428 2428 } 2429 2429 2430 2430 if (amdgpu_emu_mode == 1) 2431 - adev->hdp.funcs->flush_hdp(adev, NULL); 2431 + amdgpu_device_flush_hdp(adev, NULL); 2432 2432 2433 2433 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 2434 2434 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); ··· 2472 2472 } 2473 2473 2474 2474 if (amdgpu_emu_mode == 1) 2475 - adev->hdp.funcs->flush_hdp(adev, NULL); 2475 + amdgpu_device_flush_hdp(adev, NULL); 2476 2476 2477 2477 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 2478 2478 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); ··· 2517 2517 } 2518 2518 2519 2519 if (amdgpu_emu_mode == 1) 2520 - adev->hdp.funcs->flush_hdp(adev, NULL); 2520 + amdgpu_device_flush_hdp(adev, NULL); 2521 2521 2522 2522 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 2523 2523 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); ··· 3153 3153 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj); 3154 3154 3155 3155 if (amdgpu_emu_mode == 1) 3156 - adev->hdp.funcs->flush_hdp(adev, NULL); 3156 + amdgpu_device_flush_hdp(adev, NULL); 3157 3157 3158 3158 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 3159 3159 lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); ··· 3371 3371 amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj); 3372 3372 3373 3373 if (amdgpu_emu_mode == 1) 3374 - adev->hdp.funcs->flush_hdp(adev, NULL); 3374 + amdgpu_device_flush_hdp(adev, NULL); 3375 3375 3376 3376 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 3377 3377 lower_32_bits(adev->gfx.me.me_fw_gpu_addr)); ··· 4541 4541 if (r) 4542 4542 return r; 4543 4543 4544 - adev->hdp.funcs->flush_hdp(adev, NULL); 4544 + amdgpu_device_flush_hdp(adev, NULL); 4545 4545 4546 4546 value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? 4547 4547 false : true;

+3 -3

drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c

··· 2324 2324 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj); 2325 2325 2326 2326 if (amdgpu_emu_mode == 1) 2327 - adev->hdp.funcs->flush_hdp(adev, NULL); 2327 + amdgpu_device_flush_hdp(adev, NULL); 2328 2328 2329 2329 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 2330 2330 lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); ··· 2468 2468 amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj); 2469 2469 2470 2470 if (amdgpu_emu_mode == 1) 2471 - adev->hdp.funcs->flush_hdp(adev, NULL); 2471 + amdgpu_device_flush_hdp(adev, NULL); 2472 2472 2473 2473 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 2474 2474 lower_32_bits(adev->gfx.me.me_fw_gpu_addr)); ··· 3426 3426 if (r) 3427 3427 return r; 3428 3428 3429 - adev->hdp.funcs->flush_hdp(adev, NULL); 3429 + amdgpu_device_flush_hdp(adev, NULL); 3430 3430 3431 3431 value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? 3432 3432 false : true;

+2 -2

drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c

··· 268 268 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; 269 269 270 270 /* flush hdp cache */ 271 - adev->hdp.funcs->flush_hdp(adev, NULL); 271 + amdgpu_device_flush_hdp(adev, NULL); 272 272 273 273 /* This is necessary for SRIOV as well as for GFXOFF to function 274 274 * properly under bare metal ··· 969 969 adev->hdp.funcs->init_registers(adev); 970 970 971 971 /* Flush HDP after it is initialized */ 972 - adev->hdp.funcs->flush_hdp(adev, NULL); 972 + amdgpu_device_flush_hdp(adev, NULL); 973 973 974 974 value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? 975 975 false : true;

+2 -2

drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c

··· 229 229 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; 230 230 231 231 /* flush hdp cache */ 232 - adev->hdp.funcs->flush_hdp(adev, NULL); 232 + amdgpu_device_flush_hdp(adev, NULL); 233 233 234 234 /* This is necessary for SRIOV as well as for GFXOFF to function 235 235 * properly under bare metal ··· 899 899 return r; 900 900 901 901 /* Flush HDP after it is initialized */ 902 - adev->hdp.funcs->flush_hdp(adev, NULL); 902 + amdgpu_device_flush_hdp(adev, NULL); 903 903 904 904 value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? 905 905 false : true;

+2 -2

drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c

··· 297 297 return; 298 298 299 299 /* flush hdp cache */ 300 - adev->hdp.funcs->flush_hdp(adev, NULL); 300 + amdgpu_device_flush_hdp(adev, NULL); 301 301 302 302 /* This is necessary for SRIOV as well as for GFXOFF to function 303 303 * properly under bare metal ··· 881 881 return r; 882 882 883 883 /* Flush HDP after it is initialized */ 884 - adev->hdp.funcs->flush_hdp(adev, NULL); 884 + amdgpu_device_flush_hdp(adev, NULL); 885 885 886 886 value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? 887 887 false : true;

+1 -1

drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

··· 2435 2435 adev->hdp.funcs->init_registers(adev); 2436 2436 2437 2437 /* After HDP is initialized, flush HDP.*/ 2438 - adev->hdp.funcs->flush_hdp(adev, NULL); 2438 + amdgpu_device_flush_hdp(adev, NULL); 2439 2439 2440 2440 if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) 2441 2441 value = false;

+1 -1

drivers/gpu/drm/amd/amdgpu/psp_v11_0.c

··· 533 533 } 534 534 535 535 memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); 536 - adev->hdp.funcs->flush_hdp(adev, NULL); 536 + amdgpu_device_flush_hdp(adev, NULL); 537 537 vfree(buf); 538 538 drm_dev_exit(idx); 539 539 } else {

+1 -1

drivers/gpu/drm/amd/amdgpu/psp_v13_0.c

··· 610 610 } 611 611 612 612 memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); 613 - adev->hdp.funcs->flush_hdp(adev, NULL); 613 + amdgpu_device_flush_hdp(adev, NULL); 614 614 vfree(buf); 615 615 drm_dev_exit(idx); 616 616 } else {

+1 -1

drivers/gpu/drm/amd/amdgpu/psp_v14_0.c

··· 498 498 } 499 499 500 500 memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); 501 - adev->hdp.funcs->flush_hdp(adev, NULL); 501 + amdgpu_device_flush_hdp(adev, NULL); 502 502 vfree(buf); 503 503 drm_dev_exit(idx); 504 504 } else {

drivers/gpu/drm/amd/amdgpu/vi.c

··· 239 239 .max_pixels_per_frame = 4096 * 4096, 240 240 .max_level = 186, 241 241 }, 242 + { 243 + .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 244 + .max_width = 4096, 245 + .max_height = 4096, 246 + .max_pixels_per_frame = 4096 * 4096, 247 + .max_level = 0, 248 + }, 242 249 }; 243 250 244 251 static const struct amdgpu_video_codecs cz_video_codecs_decode =

+6 -5

drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c

··· 2 2 // 3 3 // Copyright 2024 Advanced Micro Devices, Inc. 4 4 5 + #include <linux/vmalloc.h> 5 6 6 7 #include "dml2_internal_types.h" 7 8 #include "dml_top.h" ··· 14 13 15 14 static bool dml21_allocate_memory(struct dml2_context **dml_ctx) 16 15 { 17 - *dml_ctx = kzalloc(sizeof(struct dml2_context), GFP_KERNEL); 16 + *dml_ctx = vzalloc(sizeof(struct dml2_context)); 18 17 if (!(*dml_ctx)) 19 18 return false; 20 19 21 - (*dml_ctx)->v21.dml_init.dml2_instance = kzalloc(sizeof(struct dml2_instance), GFP_KERNEL); 20 + (*dml_ctx)->v21.dml_init.dml2_instance = vzalloc(sizeof(struct dml2_instance)); 22 21 if (!((*dml_ctx)->v21.dml_init.dml2_instance)) 23 22 return false; 24 23 ··· 28 27 (*dml_ctx)->v21.mode_support.display_config = &(*dml_ctx)->v21.display_config; 29 28 (*dml_ctx)->v21.mode_programming.display_config = (*dml_ctx)->v21.mode_support.display_config; 30 29 31 - (*dml_ctx)->v21.mode_programming.programming = kzalloc(sizeof(struct dml2_display_cfg_programming), GFP_KERNEL); 30 + (*dml_ctx)->v21.mode_programming.programming = vzalloc(sizeof(struct dml2_display_cfg_programming)); 32 31 if (!((*dml_ctx)->v21.mode_programming.programming)) 33 32 return false; 34 33 ··· 116 115 117 116 void dml21_destroy(struct dml2_context *dml2) 118 117 { 119 - kfree(dml2->v21.dml_init.dml2_instance); 120 - kfree(dml2->v21.mode_programming.programming); 118 + vfree(dml2->v21.dml_init.dml2_instance); 119 + vfree(dml2->v21.mode_programming.programming); 121 120 } 122 121 123 122 static void dml21_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *context, struct resource_context *out_new_hw_state,

+4 -2

drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c

··· 24 24 * 25 25 */ 26 26 27 + #include <linux/vmalloc.h> 28 + 27 29 #include "display_mode_core.h" 28 30 #include "dml2_internal_types.h" 29 31 #include "dml2_utils.h" ··· 749 747 750 748 static inline struct dml2_context *dml2_allocate_memory(void) 751 749 { 752 - return (struct dml2_context *) kzalloc(sizeof(struct dml2_context), GFP_KERNEL); 750 + return (struct dml2_context *) vzalloc(sizeof(struct dml2_context)); 753 751 } 754 752 755 753 static void dml2_init(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2) ··· 823 821 824 822 if (dml2->architecture == dml2_architecture_21) 825 823 dml21_destroy(dml2); 826 - kfree(dml2); 824 + vfree(dml2); 827 825 } 828 826 829 827 void dml2_extract_dram_and_fclk_change_support(struct dml2_context *dml2,

drivers/gpu/drm/i915/display/intel_display_device.h

··· 161 161 #define HAS_DPT(__display) (DISPLAY_VER(__display) >= 13) 162 162 #define HAS_DSB(__display) (DISPLAY_INFO(__display)->has_dsb) 163 163 #define HAS_DSC(__display) (DISPLAY_RUNTIME_INFO(__display)->has_dsc) 164 + #define HAS_DSC_3ENGINES(__display) (DISPLAY_VERx100(__display) == 1401 && HAS_DSC(__display)) 164 165 #define HAS_DSC_MST(__display) (DISPLAY_VER(__display) >= 12 && HAS_DSC(__display)) 165 166 #define HAS_FBC(__display) (DISPLAY_RUNTIME_INFO(__display)->fbc_mask != 0) 166 167 #define HAS_FBC_DIRTY_RECT(__display) (DISPLAY_VER(__display) >= 30)

+4 -3

drivers/gpu/drm/i915/display/intel_dp.c

··· 1050 1050 u8 test_slice_count = valid_dsc_slicecount[i] * num_joined_pipes; 1051 1051 1052 1052 /* 1053 - * 3 DSC Slices per pipe need 3 DSC engines, 1054 - * which is supported only with Ultrajoiner. 1053 + * 3 DSC Slices per pipe need 3 DSC engines, which is supported only 1054 + * with Ultrajoiner only for some platforms. 1055 1055 */ 1056 - if (valid_dsc_slicecount[i] == 3 && num_joined_pipes != 4) 1056 + if (valid_dsc_slicecount[i] == 3 && 1057 + (!HAS_DSC_3ENGINES(display) || num_joined_pipes != 4)) 1057 1058 continue; 1058 1059 1059 1060 if (test_slice_count >

+1 -1

drivers/gpu/drm/mgag200/mgag200_mode.c

··· 223 223 vsyncstr = mode->crtc_vsync_start - 1; 224 224 vsyncend = mode->crtc_vsync_end - 1; 225 225 vtotal = mode->crtc_vtotal - 2; 226 - vblkstr = mode->crtc_vblank_start; 226 + vblkstr = mode->crtc_vblank_start - 1; 227 227 vblkend = vtotal + 1; 228 228 229 229 linecomp = vdispend;

+4 -4

drivers/gpu/drm/msm/adreno/a6xx_gpu.c

··· 242 242 break; 243 243 fallthrough; 244 244 case MSM_SUBMIT_CMD_BUF: 245 - OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3); 245 + OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3); 246 246 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); 247 247 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); 248 - OUT_RING(ring, submit->cmd[i].size); 248 + OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size)); 249 249 ibs++; 250 250 break; 251 251 } ··· 377 377 break; 378 378 fallthrough; 379 379 case MSM_SUBMIT_CMD_BUF: 380 - OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3); 380 + OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3); 381 381 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); 382 382 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); 383 - OUT_RING(ring, submit->cmd[i].size); 383 + OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size)); 384 384 ibs++; 385 385 break; 386 386 }

-2

drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_14_msm8937.h

··· 132 132 .prog_fetch_lines_worst_case = 14, 133 133 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26), 134 134 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27), 135 - .intr_tear_rd_ptr = -1, 136 135 }, { 137 136 .name = "intf_2", .id = INTF_2, 138 137 .base = 0x6b000, .len = 0x268, ··· 140 141 .prog_fetch_lines_worst_case = 14, 141 142 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 28), 142 143 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 29), 143 - .intr_tear_rd_ptr = -1, 144 144 }, 145 145 }; 146 146

-1

drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_15_msm8917.h

··· 118 118 .prog_fetch_lines_worst_case = 14, 119 119 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26), 120 120 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27), 121 - .intr_tear_rd_ptr = -1, 122 121 }, 123 122 }; 124 123

-3

drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_16_msm8953.h

··· 131 131 .prog_fetch_lines_worst_case = 14, 132 132 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 24), 133 133 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 25), 134 - .intr_tear_rd_ptr = -1, 135 134 }, { 136 135 .name = "intf_1", .id = INTF_1, 137 136 .base = 0x6a800, .len = 0x268, ··· 139 140 .prog_fetch_lines_worst_case = 14, 140 141 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26), 141 142 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27), 142 - .intr_tear_rd_ptr = -1, 143 143 }, { 144 144 .name = "intf_2", .id = INTF_2, 145 145 .base = 0x6b000, .len = 0x268, ··· 147 149 .prog_fetch_lines_worst_case = 14, 148 150 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 28), 149 151 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 29), 150 - .intr_tear_rd_ptr = -1, 151 152 }, 152 153 }; 153 154

-4

drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_7_msm8996.h

··· 241 241 .prog_fetch_lines_worst_case = 25, 242 242 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 24), 243 243 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 25), 244 - .intr_tear_rd_ptr = -1, 245 244 }, { 246 245 .name = "intf_1", .id = INTF_1, 247 246 .base = 0x6a800, .len = 0x268, ··· 249 250 .prog_fetch_lines_worst_case = 25, 250 251 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26), 251 252 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27), 252 - .intr_tear_rd_ptr = -1, 253 253 }, { 254 254 .name = "intf_2", .id = INTF_2, 255 255 .base = 0x6b000, .len = 0x268, ··· 257 259 .prog_fetch_lines_worst_case = 25, 258 260 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 28), 259 261 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 29), 260 - .intr_tear_rd_ptr = -1, 261 262 }, { 262 263 .name = "intf_3", .id = INTF_3, 263 264 .base = 0x6b800, .len = 0x268, ··· 264 267 .prog_fetch_lines_worst_case = 25, 265 268 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 30), 266 269 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 31), 267 - .intr_tear_rd_ptr = -1, 268 270 }, 269 271 }; 270 272

-3

drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_2_sdm660.h

··· 202 202 .prog_fetch_lines_worst_case = 21, 203 203 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 24), 204 204 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 25), 205 - .intr_tear_rd_ptr = -1, 206 205 }, { 207 206 .name = "intf_1", .id = INTF_1, 208 207 .base = 0x6a800, .len = 0x280, ··· 210 211 .prog_fetch_lines_worst_case = 21, 211 212 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26), 212 213 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27), 213 - .intr_tear_rd_ptr = -1, 214 214 }, { 215 215 .name = "intf_2", .id = INTF_2, 216 216 .base = 0x6b000, .len = 0x280, ··· 218 220 .prog_fetch_lines_worst_case = 21, 219 221 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 28), 220 222 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 29), 221 - .intr_tear_rd_ptr = -1, 222 223 }, 223 224 }; 224 225

-2

drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_3_sdm630.h

··· 147 147 .prog_fetch_lines_worst_case = 21, 148 148 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 24), 149 149 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 25), 150 - .intr_tear_rd_ptr = -1, 151 150 }, { 152 151 .name = "intf_1", .id = INTF_1, 153 152 .base = 0x6a800, .len = 0x280, ··· 155 156 .prog_fetch_lines_worst_case = 21, 156 157 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26), 157 158 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27), 158 - .intr_tear_rd_ptr = -1, 159 159 }, 160 160 }; 161 161

+3 -1

drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c

··· 1666 1666 */ 1667 1667 static void _dpu_encoder_trigger_start(struct dpu_encoder_phys *phys) 1668 1668 { 1669 - struct dpu_encoder_virt *dpu_enc = to_dpu_encoder_virt(phys->parent); 1669 + struct dpu_encoder_virt *dpu_enc; 1670 1670 1671 1671 if (!phys) { 1672 1672 DPU_ERROR("invalid argument(s)\n"); ··· 1677 1677 DPU_ERROR("invalid pingpong hw\n"); 1678 1678 return; 1679 1679 } 1680 + 1681 + dpu_enc = to_dpu_encoder_virt(phys->parent); 1680 1682 1681 1683 if (phys->parent->encoder_type == DRM_MODE_ENCODER_VIRTUAL && 1682 1684 dpu_enc->cwb_mask) {

+39 -35

drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c

··· 729 729 static int dpu_plane_atomic_check_pipe(struct dpu_plane *pdpu, 730 730 struct dpu_sw_pipe *pipe, 731 731 struct dpu_sw_pipe_cfg *pipe_cfg, 732 - const struct msm_format *fmt, 733 - const struct drm_display_mode *mode) 732 + const struct drm_display_mode *mode, 733 + struct drm_plane_state *new_plane_state) 734 734 { 735 735 uint32_t min_src_size; 736 736 struct dpu_kms *kms = _dpu_plane_get_kms(&pdpu->base); 737 737 int ret; 738 + const struct msm_format *fmt; 739 + uint32_t supported_rotations; 740 + const struct dpu_sspp_cfg *pipe_hw_caps; 741 + const struct dpu_sspp_sub_blks *sblk; 742 + 743 + pipe_hw_caps = pipe->sspp->cap; 744 + sblk = pipe->sspp->cap->sblk; 745 + 746 + /* 747 + * We already have verified scaling against platform limitations. 748 + * Now check if the SSPP supports scaling at all. 749 + */ 750 + if (!sblk->scaler_blk.len && 751 + ((drm_rect_width(&new_plane_state->src) >> 16 != 752 + drm_rect_width(&new_plane_state->dst)) || 753 + (drm_rect_height(&new_plane_state->src) >> 16 != 754 + drm_rect_height(&new_plane_state->dst)))) 755 + return -ERANGE; 756 + 757 + fmt = msm_framebuffer_format(new_plane_state->fb); 758 + 759 + supported_rotations = DRM_MODE_REFLECT_MASK | DRM_MODE_ROTATE_0; 760 + 761 + if (pipe_hw_caps->features & BIT(DPU_SSPP_INLINE_ROTATION)) 762 + supported_rotations |= DRM_MODE_ROTATE_90; 763 + 764 + pipe_cfg->rotation = drm_rotation_simplify(new_plane_state->rotation, 765 + supported_rotations); 738 766 739 767 min_src_size = MSM_FORMAT_IS_YUV(fmt) ? 2 : 1; 740 768 ··· 951 923 struct dpu_plane_state *pstate = to_dpu_plane_state(new_plane_state); 952 924 struct dpu_sw_pipe *pipe = &pstate->pipe; 953 925 struct dpu_sw_pipe *r_pipe = &pstate->r_pipe; 954 - const struct msm_format *fmt; 955 926 struct dpu_sw_pipe_cfg *pipe_cfg = &pstate->pipe_cfg; 956 927 struct dpu_sw_pipe_cfg *r_pipe_cfg = &pstate->r_pipe_cfg; 957 - uint32_t supported_rotations; 958 - const struct dpu_sspp_cfg *pipe_hw_caps; 959 - const struct dpu_sspp_sub_blks *sblk; 960 928 int ret = 0; 961 929 962 - pipe_hw_caps = pipe->sspp->cap; 963 - sblk = pipe->sspp->cap->sblk; 964 - 965 - /* 966 - * We already have verified scaling against platform limitations. 967 - * Now check if the SSPP supports scaling at all. 968 - */ 969 - if (!sblk->scaler_blk.len && 970 - ((drm_rect_width(&new_plane_state->src) >> 16 != 971 - drm_rect_width(&new_plane_state->dst)) || 972 - (drm_rect_height(&new_plane_state->src) >> 16 != 973 - drm_rect_height(&new_plane_state->dst)))) 974 - return -ERANGE; 975 - 976 - fmt = msm_framebuffer_format(new_plane_state->fb); 977 - 978 - supported_rotations = DRM_MODE_REFLECT_MASK | DRM_MODE_ROTATE_0; 979 - 980 - if (pipe_hw_caps->features & BIT(DPU_SSPP_INLINE_ROTATION)) 981 - supported_rotations |= DRM_MODE_ROTATE_90; 982 - 983 - pipe_cfg->rotation = drm_rotation_simplify(new_plane_state->rotation, 984 - supported_rotations); 985 - r_pipe_cfg->rotation = pipe_cfg->rotation; 986 - 987 - ret = dpu_plane_atomic_check_pipe(pdpu, pipe, pipe_cfg, fmt, 988 - &crtc_state->adjusted_mode); 930 + ret = dpu_plane_atomic_check_pipe(pdpu, pipe, pipe_cfg, 931 + &crtc_state->adjusted_mode, 932 + new_plane_state); 989 933 if (ret) 990 934 return ret; 991 935 992 936 if (drm_rect_width(&r_pipe_cfg->src_rect) != 0) { 993 - ret = dpu_plane_atomic_check_pipe(pdpu, r_pipe, r_pipe_cfg, fmt, 994 - &crtc_state->adjusted_mode); 937 + ret = dpu_plane_atomic_check_pipe(pdpu, r_pipe, r_pipe_cfg, 938 + &crtc_state->adjusted_mode, 939 + new_plane_state); 995 940 if (ret) 996 941 return ret; 997 942 } ··· 1059 1058 struct dpu_plane_state *pstate = to_dpu_plane_state(plane_state); 1060 1059 struct drm_crtc_state *crtc_state; 1061 1060 int ret; 1061 + 1062 + if (IS_ERR(plane_state)) 1063 + return PTR_ERR(plane_state); 1062 1064 1063 1065 if (plane_state->crtc) 1064 1066 crtc_state = drm_atomic_get_new_crtc_state(state,

drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml

··· 2259 2259 </reg32> 2260 2260 </domain> 2261 2261 2262 + <domain name="CP_INDIRECT_BUFFER" width="32" varset="chip" prefix="chip" variants="A5XX-"> 2263 + <reg64 offset="0" name="IB_BASE" type="address"/> 2264 + <reg32 offset="2" name="2"> 2265 + <bitfield name="IB_SIZE" low="0" high="19"/> 2266 + </reg32> 2267 + </domain> 2268 + 2262 2269 </database> 2263 2270

+13 -3

drivers/gpu/drm/v3d/v3d_sched.c

··· 428 428 struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]); 429 429 struct v3d_bo *indirect = to_v3d_bo(indirect_csd->indirect); 430 430 struct drm_v3d_submit_csd *args = &indirect_csd->job->args; 431 - u32 *wg_counts; 431 + struct v3d_dev *v3d = job->base.v3d; 432 + u32 num_batches, *wg_counts; 432 433 433 434 v3d_get_bo_vaddr(bo); 434 435 v3d_get_bo_vaddr(indirect); ··· 442 441 args->cfg[0] = wg_counts[0] << V3D_CSD_CFG012_WG_COUNT_SHIFT; 443 442 args->cfg[1] = wg_counts[1] << V3D_CSD_CFG012_WG_COUNT_SHIFT; 444 443 args->cfg[2] = wg_counts[2] << V3D_CSD_CFG012_WG_COUNT_SHIFT; 445 - args->cfg[4] = DIV_ROUND_UP(indirect_csd->wg_size, 16) * 446 - (wg_counts[0] * wg_counts[1] * wg_counts[2]) - 1; 444 + 445 + num_batches = DIV_ROUND_UP(indirect_csd->wg_size, 16) * 446 + (wg_counts[0] * wg_counts[1] * wg_counts[2]); 447 + 448 + /* V3D 7.1.6 and later don't subtract 1 from the number of batches */ 449 + if (v3d->ver < 71 || (v3d->ver == 71 && v3d->rev < 6)) 450 + args->cfg[4] = num_batches - 1; 451 + else 452 + args->cfg[4] = num_batches; 453 + 454 + WARN_ON(args->cfg[4] == ~0); 447 455 448 456 for (int i = 0; i < 3; i++) { 449 457 /* 0xffffffff indicates that the uniform rewrite is not needed */

+1 -4

drivers/gpu/drm/xe/xe_dma_buf.c

··· 145 145 struct sg_table *sgt, 146 146 enum dma_data_direction dir) 147 147 { 148 - struct dma_buf *dma_buf = attach->dmabuf; 149 - struct xe_bo *bo = gem_to_xe_bo(dma_buf->priv); 150 - 151 - if (!xe_bo_is_vram(bo)) { 148 + if (sg_page(sgt->sgl)) { 152 149 dma_unmap_sgtable(attach->dev, sgt, dir, 0); 153 150 sg_free_table(sgt); 154 151 kfree(sgt);

+45 -30

drivers/gpu/drm/xe/xe_guc_ads.c

··· 490 490 engine_enable_mask(gt, XE_ENGINE_CLASS_OTHER)); 491 491 } 492 492 493 - static void guc_prep_golden_lrc_null(struct xe_guc_ads *ads) 493 + /* 494 + * Write the offsets corresponding to the golden LRCs. The actual data is 495 + * populated later by guc_golden_lrc_populate() 496 + */ 497 + static void guc_golden_lrc_init(struct xe_guc_ads *ads) 494 498 { 495 499 struct xe_device *xe = ads_to_xe(ads); 500 + struct xe_gt *gt = ads_to_gt(ads); 496 501 struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), 497 502 offsetof(struct __guc_ads_blob, system_info)); 498 - u8 guc_class; 503 + size_t alloc_size, real_size; 504 + u32 addr_ggtt, offset; 505 + int class; 499 506 500 - for (guc_class = 0; guc_class <= GUC_MAX_ENGINE_CLASSES; ++guc_class) { 507 + offset = guc_ads_golden_lrc_offset(ads); 508 + addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset; 509 + 510 + for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { 511 + u8 guc_class; 512 + 513 + guc_class = xe_engine_class_to_guc_class(class); 514 + 501 515 if (!info_map_read(xe, &info_map, 502 516 engine_enabled_masks[guc_class])) 503 517 continue; 504 518 519 + real_size = xe_gt_lrc_size(gt, class); 520 + alloc_size = PAGE_ALIGN(real_size); 521 + 522 + /* 523 + * This interface is slightly confusing. We need to pass the 524 + * base address of the full golden context and the size of just 525 + * the engine state, which is the section of the context image 526 + * that starts after the execlists LRC registers. This is 527 + * required to allow the GuC to restore just the engine state 528 + * when a watchdog reset occurs. 529 + * We calculate the engine state size by removing the size of 530 + * what comes before it in the context image (which is identical 531 + * on all engines). 532 + */ 505 533 ads_blob_write(ads, ads.eng_state_size[guc_class], 506 - guc_ads_golden_lrc_size(ads) - 507 - xe_lrc_skip_size(xe)); 534 + real_size - xe_lrc_skip_size(xe)); 508 535 ads_blob_write(ads, ads.golden_context_lrca[guc_class], 509 - xe_bo_ggtt_addr(ads->bo) + 510 - guc_ads_golden_lrc_offset(ads)); 536 + addr_ggtt); 537 + 538 + addr_ggtt += alloc_size; 511 539 } 512 540 } 513 541 ··· 885 857 886 858 xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size); 887 859 guc_policies_init(ads); 888 - guc_prep_golden_lrc_null(ads); 860 + guc_golden_lrc_init(ads); 889 861 guc_mapping_table_init_invalid(gt, &info_map); 890 862 guc_doorbell_init(ads); 891 863 ··· 911 883 guc_policies_init(ads); 912 884 fill_engine_enable_masks(gt, &info_map); 913 885 guc_mmio_reg_state_init(ads); 914 - guc_prep_golden_lrc_null(ads); 886 + guc_golden_lrc_init(ads); 915 887 guc_mapping_table_init(gt, &info_map); 916 888 guc_capture_prep_lists(ads); 917 889 guc_doorbell_init(ads); ··· 931 903 guc_ads_private_data_offset(ads)); 932 904 } 933 905 934 - static void guc_populate_golden_lrc(struct xe_guc_ads *ads) 906 + /* 907 + * After the golden LRC's are recorded for each engine class by the first 908 + * submission, copy them to the ADS, as initialized earlier by 909 + * guc_golden_lrc_init(). 910 + */ 911 + static void guc_golden_lrc_populate(struct xe_guc_ads *ads) 935 912 { 936 913 struct xe_device *xe = ads_to_xe(ads); 937 914 struct xe_gt *gt = ads_to_gt(ads); 938 915 struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), 939 916 offsetof(struct __guc_ads_blob, system_info)); 940 917 size_t total_size = 0, alloc_size, real_size; 941 - u32 addr_ggtt, offset; 918 + u32 offset; 942 919 int class; 943 920 944 921 offset = guc_ads_golden_lrc_offset(ads); 945 - addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset; 946 922 947 923 for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { 948 924 u8 guc_class; ··· 963 931 alloc_size = PAGE_ALIGN(real_size); 964 932 total_size += alloc_size; 965 933 966 - /* 967 - * This interface is slightly confusing. We need to pass the 968 - * base address of the full golden context and the size of just 969 - * the engine state, which is the section of the context image 970 - * that starts after the execlists LRC registers. This is 971 - * required to allow the GuC to restore just the engine state 972 - * when a watchdog reset occurs. 973 - * We calculate the engine state size by removing the size of 974 - * what comes before it in the context image (which is identical 975 - * on all engines). 976 - */ 977 - ads_blob_write(ads, ads.eng_state_size[guc_class], 978 - real_size - xe_lrc_skip_size(xe)); 979 - ads_blob_write(ads, ads.golden_context_lrca[guc_class], 980 - addr_ggtt); 981 - 982 934 xe_map_memcpy_to(xe, ads_to_map(ads), offset, 983 935 gt->default_lrc[class], real_size); 984 936 985 - addr_ggtt += alloc_size; 986 937 offset += alloc_size; 987 938 } 988 939 ··· 974 959 975 960 void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads) 976 961 { 977 - guc_populate_golden_lrc(ads); 962 + guc_golden_lrc_populate(ads); 978 963 } 979 964 980 965 static int guc_ads_action_update_policies(struct xe_guc_ads *ads, u32 policy_offset)

-24

drivers/gpu/drm/xe/xe_hmm.c

··· 19 19 return (end - start) >> PAGE_SHIFT; 20 20 } 21 21 22 - /** 23 - * xe_mark_range_accessed() - mark a range is accessed, so core mm 24 - * have such information for memory eviction or write back to 25 - * hard disk 26 - * @range: the range to mark 27 - * @write: if write to this range, we mark pages in this range 28 - * as dirty 29 - */ 30 - static void xe_mark_range_accessed(struct hmm_range *range, bool write) 31 - { 32 - struct page *page; 33 - u64 i, npages; 34 - 35 - npages = xe_npages_in_range(range->start, range->end); 36 - for (i = 0; i < npages; i++) { 37 - page = hmm_pfn_to_page(range->hmm_pfns[i]); 38 - if (write) 39 - set_page_dirty_lock(page); 40 - 41 - mark_page_accessed(page); 42 - } 43 - } 44 - 45 22 static int xe_alloc_sg(struct xe_device *xe, struct sg_table *st, 46 23 struct hmm_range *range, struct rw_semaphore *notifier_sem) 47 24 { ··· 308 331 if (ret) 309 332 goto out_unlock; 310 333 311 - xe_mark_range_accessed(&hmm_range, write); 312 334 userptr->sg = &userptr->sgt; 313 335 xe_hmm_userptr_set_mapped(uvma); 314 336 userptr->notifier_seq = hmm_range.notifier_seq;

+11 -2

drivers/gpu/drm/xe/xe_pxp_debugfs.c

··· 66 66 { 67 67 struct xe_pxp *pxp = node_to_pxp(m->private); 68 68 struct drm_printer p = drm_seq_file_printer(m); 69 + int ready = xe_pxp_get_readiness_status(pxp); 69 70 70 - if (!xe_pxp_is_enabled(pxp)) 71 - return -ENODEV; 71 + if (ready < 0) 72 + return ready; /* disabled or error occurred */ 73 + else if (!ready) 74 + return -EBUSY; /* init still in progress */ 75 + 76 + /* no need for a termination if PXP is not active */ 77 + if (pxp->status != XE_PXP_ACTIVE) { 78 + drm_printf(&p, "PXP not active\n"); 79 + return 0; 80 + } 72 81 73 82 /* simulate a termination interrupt */ 74 83 spin_lock_irq(&pxp->xe->irq.lock);

+1 -2

include/drm/drm_gem.h

··· 585 585 */ 586 586 static inline bool drm_gem_is_imported(const struct drm_gem_object *obj) 587 587 { 588 - /* The dma-buf's priv field points to the original GEM object. */ 589 - return obj->dma_buf && (obj->dma_buf->priv != obj); 588 + return !!obj->import_attach; 590 589 } 591 590 592 591 #ifdef CONFIG_LOCKDEP

+2 -2

include/uapi/drm/ivpu_accel.h

··· 1 1 /* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ 2 2 /* 3 - * Copyright (C) 2020-2024 Intel Corporation 3 + * Copyright (C) 2020-2025 Intel Corporation 4 4 */ 5 5 6 6 #ifndef __UAPI_IVPU_DRM_H__ ··· 147 147 * platform type when executing on a simulator or emulator (read-only) 148 148 * 149 149 * %DRM_IVPU_PARAM_CORE_CLOCK_RATE: 150 - * Current PLL frequency (read-only) 150 + * Maximum frequency of the NPU data processing unit clock (read-only) 151 151 * 152 152 * %DRM_IVPU_PARAM_NUM_CONTEXTS: 153 153 * Maximum number of simultaneously existing contexts (read-only)