commit 51c7960b87f465d01ea8d8ff174e81dd69f3b2b4 · tjh.dev/kernel

+5 -5

drivers/accel/ivpu/ivpu_drv.c

··· 1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 - * Copyright (C) 2020-2024 Intel Corporation 4 */ 5 6 #include <linux/firmware.h> ··· 164 args->value = vdev->platform; 165 break; 166 case DRM_IVPU_PARAM_CORE_CLOCK_RATE: 167 - args->value = ivpu_hw_ratio_to_freq(vdev, vdev->hw->pll.max_ratio); 168 break; 169 case DRM_IVPU_PARAM_NUM_CONTEXTS: 170 args->value = ivpu_get_context_count(vdev); ··· 421 { 422 ivpu_hw_irq_disable(vdev); 423 disable_irq(vdev->irq); 424 - cancel_work_sync(&vdev->irq_ipc_work); 425 - cancel_work_sync(&vdev->irq_dct_work); 426 - cancel_work_sync(&vdev->context_abort_work); 427 ivpu_ipc_disable(vdev); 428 ivpu_mmu_disable(vdev); 429 }

··· 1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 + * Copyright (C) 2020-2025 Intel Corporation 4 */ 5 6 #include <linux/firmware.h> ··· 164 args->value = vdev->platform; 165 break; 166 case DRM_IVPU_PARAM_CORE_CLOCK_RATE: 167 + args->value = ivpu_hw_dpu_max_freq_get(vdev); 168 break; 169 case DRM_IVPU_PARAM_NUM_CONTEXTS: 170 args->value = ivpu_get_context_count(vdev); ··· 421 { 422 ivpu_hw_irq_disable(vdev); 423 disable_irq(vdev->irq); 424 + flush_work(&vdev->irq_ipc_work); 425 + flush_work(&vdev->irq_dct_work); 426 + flush_work(&vdev->context_abort_work); 427 ivpu_ipc_disable(vdev); 428 ivpu_mmu_disable(vdev); 429 }

+13 -4

drivers/accel/ivpu/ivpu_fw.c

··· 1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 - * Copyright (C) 2020-2024 Intel Corporation 4 */ 5 6 #include <linux/firmware.h> ··· 233 fw->dvfs_mode = 0; 234 235 fw->sched_mode = ivpu_fw_sched_mode_select(vdev, fw_hdr); 236 - fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_size; 237 - fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_size; 238 ivpu_info(vdev, "Scheduler mode: %s\n", fw->sched_mode ? "HW" : "OS"); 239 240 if (fw_hdr->ro_section_start_address && !is_within_range(fw_hdr->ro_section_start_address, 241 fw_hdr->ro_section_size, ··· 576 577 boot_params->magic = VPU_BOOT_PARAMS_MAGIC; 578 boot_params->vpu_id = to_pci_dev(vdev->drm.dev)->bus->number; 579 - boot_params->frequency = ivpu_hw_pll_freq_get(vdev); 580 581 /* 582 * This param is a debug firmware feature. It switches default clock

··· 1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 + * Copyright (C) 2020-2025 Intel Corporation 4 */ 5 6 #include <linux/firmware.h> ··· 233 fw->dvfs_mode = 0; 234 235 fw->sched_mode = ivpu_fw_sched_mode_select(vdev, fw_hdr); 236 ivpu_info(vdev, "Scheduler mode: %s\n", fw->sched_mode ? "HW" : "OS"); 237 + 238 + if (fw_hdr->preemption_buffer_1_max_size) 239 + fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_max_size; 240 + else 241 + fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_size; 242 + 243 + if (fw_hdr->preemption_buffer_2_max_size) 244 + fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_max_size; 245 + else 246 + fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_size; 247 + ivpu_dbg(vdev, FW_BOOT, "Preemption buffer sizes: primary %u, secondary %u\n", 248 + fw->primary_preempt_buf_size, fw->secondary_preempt_buf_size); 249 250 if (fw_hdr->ro_section_start_address && !is_within_range(fw_hdr->ro_section_start_address, 251 fw_hdr->ro_section_size, ··· 566 567 boot_params->magic = VPU_BOOT_PARAMS_MAGIC; 568 boot_params->vpu_id = to_pci_dev(vdev->drm.dev)->bus->number; 569 570 /* 571 * This param is a debug firmware feature. It switches default clock

+8 -8

drivers/accel/ivpu/ivpu_hw.h

··· 1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 - * Copyright (C) 2020-2024 Intel Corporation 4 */ 5 6 #ifndef __IVPU_HW_H__ ··· 82 return range->end - range->start; 83 } 84 85 - static inline u32 ivpu_hw_ratio_to_freq(struct ivpu_device *vdev, u32 ratio) 86 { 87 - return ivpu_hw_btrs_ratio_to_freq(vdev, ratio); 88 } 89 90 static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev) 91 { 92 ivpu_hw_ip_irq_clear(vdev); 93 - } 94 - 95 - static inline u32 ivpu_hw_pll_freq_get(struct ivpu_device *vdev) 96 - { 97 - return ivpu_hw_btrs_pll_freq_get(vdev); 98 } 99 100 static inline u32 ivpu_hw_profiling_freq_get(struct ivpu_device *vdev)

··· 1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 + * Copyright (C) 2020-2025 Intel Corporation 4 */ 5 6 #ifndef __IVPU_HW_H__ ··· 82 return range->end - range->start; 83 } 84 85 + static inline u32 ivpu_hw_dpu_max_freq_get(struct ivpu_device *vdev) 86 { 87 + return ivpu_hw_btrs_dpu_max_freq_get(vdev); 88 + } 89 + 90 + static inline u32 ivpu_hw_dpu_freq_get(struct ivpu_device *vdev) 91 + { 92 + return ivpu_hw_btrs_dpu_freq_get(vdev); 93 } 94 95 static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev) 96 { 97 ivpu_hw_ip_irq_clear(vdev); 98 } 99 100 static inline u32 ivpu_hw_profiling_freq_get(struct ivpu_device *vdev)

+64 -70

drivers/accel/ivpu/ivpu_hw_btrs.c

··· 1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 - * Copyright (C) 2020-2024 Intel Corporation 4 */ 5 6 #include "ivpu_drv.h" 7 #include "ivpu_hw.h" ··· 30 31 #define BTRS_LNL_ALL_IRQ_MASK ((u32)-1) 32 33 - #define BTRS_MTL_WP_CONFIG_1_TILE_5_3_RATIO WP_CONFIG(MTL_CONFIG_1_TILE, MTL_PLL_RATIO_5_3) 34 - #define BTRS_MTL_WP_CONFIG_1_TILE_4_3_RATIO WP_CONFIG(MTL_CONFIG_1_TILE, MTL_PLL_RATIO_4_3) 35 - #define BTRS_MTL_WP_CONFIG_2_TILE_5_3_RATIO WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_5_3) 36 - #define BTRS_MTL_WP_CONFIG_2_TILE_4_3_RATIO WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_4_3) 37 - #define BTRS_MTL_WP_CONFIG_0_TILE_PLL_OFF WP_CONFIG(0, 0) 38 39 #define PLL_CDYN_DEFAULT 0x80 40 #define PLL_EPP_DEFAULT 0x80 41 #define PLL_CONFIG_DEFAULT 0x0 42 - #define PLL_SIMULATION_FREQ 10000000 43 - #define PLL_REF_CLK_FREQ 50000000 44 #define PLL_TIMEOUT_US (1500 * USEC_PER_MSEC) 45 #define IDLE_TIMEOUT_US (5 * USEC_PER_MSEC) 46 #define TIMEOUT_US (150 * USEC_PER_MSEC) ··· 59 #define DCT_REQ 0x2 60 #define DCT_ENABLE 0x1 61 #define DCT_DISABLE 0x0 62 63 int ivpu_hw_btrs_irqs_clear_with_0_mtl(struct ivpu_device *vdev) 64 { ··· 156 157 hw->tile_fuse = BTRS_MTL_TILE_FUSE_ENABLE_BOTH; 158 hw->sku = BTRS_MTL_TILE_SKU_BOTH; 159 - hw->config = BTRS_MTL_WP_CONFIG_2_TILE_4_3_RATIO; 160 161 return 0; 162 } ··· 334 335 prepare_wp_request(vdev, &wp, enable); 336 337 - ivpu_dbg(vdev, PM, "PLL workpoint request: %u Hz, config: 0x%x, epp: 0x%x, cdyn: 0x%x\n", 338 - PLL_RATIO_TO_FREQ(wp.target), wp.cfg, wp.epp, wp.cdyn); 339 340 ret = wp_request_send(vdev, &wp); 341 if (ret) { ··· 573 return REGB_POLL_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US); 574 } 575 576 /* Handler for IRQs from Buttress core (irqB) */ 577 bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq) 578 { ··· 623 if (!status) 624 return false; 625 626 - if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, FREQ_CHANGE, status)) 627 - ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x", 628 - REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL)); 629 630 if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, ATS_ERR, status)) { 631 ivpu_err(vdev, "ATS_ERR irq 0x%016llx", REGB_RD64(VPU_HW_BTRS_MTL_ATS_ERR_LOG_0)); ··· 677 queue_work(system_wq, &vdev->irq_dct_work); 678 } 679 680 - if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, FREQ_CHANGE, status)) 681 - ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x", REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ)); 682 683 if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, ATS_ERR, status)) { 684 ivpu_err(vdev, "ATS_ERR LOG1 0x%08x ATS_ERR_LOG2 0x%08x\n", ··· 763 val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS, PARAM2, active_percent, val); 764 765 REGB_WR32(VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS, val); 766 - } 767 - 768 - static u32 pll_ratio_to_freq_mtl(u32 ratio, u32 config) 769 - { 770 - u32 pll_clock = PLL_REF_CLK_FREQ * ratio; 771 - u32 cpu_clock; 772 - 773 - if ((config & 0xff) == MTL_PLL_RATIO_4_3) 774 - cpu_clock = pll_clock * 2 / 4; 775 - else 776 - cpu_clock = pll_clock * 2 / 5; 777 - 778 - return cpu_clock; 779 - } 780 - 781 - u32 ivpu_hw_btrs_ratio_to_freq(struct ivpu_device *vdev, u32 ratio) 782 - { 783 - struct ivpu_hw_info *hw = vdev->hw; 784 - 785 - if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) 786 - return pll_ratio_to_freq_mtl(ratio, hw->config); 787 - else 788 - return PLL_RATIO_TO_FREQ(ratio); 789 - } 790 - 791 - static u32 pll_freq_get_mtl(struct ivpu_device *vdev) 792 - { 793 - u32 pll_curr_ratio; 794 - 795 - pll_curr_ratio = REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL); 796 - pll_curr_ratio &= VPU_HW_BTRS_MTL_CURRENT_PLL_RATIO_MASK; 797 - 798 - if (!ivpu_is_silicon(vdev)) 799 - return PLL_SIMULATION_FREQ; 800 - 801 - return pll_ratio_to_freq_mtl(pll_curr_ratio, vdev->hw->config); 802 - } 803 - 804 - static u32 pll_freq_get_lnl(struct ivpu_device *vdev) 805 - { 806 - u32 pll_curr_ratio; 807 - 808 - pll_curr_ratio = REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ); 809 - pll_curr_ratio &= VPU_HW_BTRS_LNL_PLL_FREQ_RATIO_MASK; 810 - 811 - return PLL_RATIO_TO_FREQ(pll_curr_ratio); 812 - } 813 - 814 - u32 ivpu_hw_btrs_pll_freq_get(struct ivpu_device *vdev) 815 - { 816 - if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) 817 - return pll_freq_get_mtl(vdev); 818 - else 819 - return pll_freq_get_lnl(vdev); 820 } 821 822 u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev)

··· 1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 + * Copyright (C) 2020-2025 Intel Corporation 4 */ 5 + 6 + #include <linux/units.h> 7 8 #include "ivpu_drv.h" 9 #include "ivpu_hw.h" ··· 28 29 #define BTRS_LNL_ALL_IRQ_MASK ((u32)-1) 30 31 32 #define PLL_CDYN_DEFAULT 0x80 33 #define PLL_EPP_DEFAULT 0x80 34 #define PLL_CONFIG_DEFAULT 0x0 35 + #define PLL_REF_CLK_FREQ 50000000ull 36 + #define PLL_RATIO_TO_FREQ(x) ((x) * PLL_REF_CLK_FREQ) 37 + 38 #define PLL_TIMEOUT_US (1500 * USEC_PER_MSEC) 39 #define IDLE_TIMEOUT_US (5 * USEC_PER_MSEC) 40 #define TIMEOUT_US (150 * USEC_PER_MSEC) ··· 61 #define DCT_REQ 0x2 62 #define DCT_ENABLE 0x1 63 #define DCT_DISABLE 0x0 64 + 65 + static u32 pll_ratio_to_dpu_freq(struct ivpu_device *vdev, u32 ratio); 66 67 int ivpu_hw_btrs_irqs_clear_with_0_mtl(struct ivpu_device *vdev) 68 { ··· 156 157 hw->tile_fuse = BTRS_MTL_TILE_FUSE_ENABLE_BOTH; 158 hw->sku = BTRS_MTL_TILE_SKU_BOTH; 159 + hw->config = WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_4_3); 160 161 return 0; 162 } ··· 334 335 prepare_wp_request(vdev, &wp, enable); 336 337 + ivpu_dbg(vdev, PM, "PLL workpoint request: %lu MHz, config: 0x%x, epp: 0x%x, cdyn: 0x%x\n", 338 + pll_ratio_to_dpu_freq(vdev, wp.target) / HZ_PER_MHZ, wp.cfg, wp.epp, wp.cdyn); 339 340 ret = wp_request_send(vdev, &wp); 341 if (ret) { ··· 573 return REGB_POLL_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US); 574 } 575 576 + static u32 pll_config_get_mtl(struct ivpu_device *vdev) 577 + { 578 + return REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL); 579 + } 580 + 581 + static u32 pll_config_get_lnl(struct ivpu_device *vdev) 582 + { 583 + return REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ); 584 + } 585 + 586 + static u32 pll_ratio_to_dpu_freq_mtl(u16 ratio) 587 + { 588 + return (PLL_RATIO_TO_FREQ(ratio) * 2) / 3; 589 + } 590 + 591 + static u32 pll_ratio_to_dpu_freq_lnl(u16 ratio) 592 + { 593 + return PLL_RATIO_TO_FREQ(ratio) / 2; 594 + } 595 + 596 + static u32 pll_ratio_to_dpu_freq(struct ivpu_device *vdev, u32 ratio) 597 + { 598 + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) 599 + return pll_ratio_to_dpu_freq_mtl(ratio); 600 + else 601 + return pll_ratio_to_dpu_freq_lnl(ratio); 602 + } 603 + 604 + u32 ivpu_hw_btrs_dpu_max_freq_get(struct ivpu_device *vdev) 605 + { 606 + return pll_ratio_to_dpu_freq(vdev, vdev->hw->pll.max_ratio); 607 + } 608 + 609 + u32 ivpu_hw_btrs_dpu_freq_get(struct ivpu_device *vdev) 610 + { 611 + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) 612 + return pll_ratio_to_dpu_freq_mtl(pll_config_get_mtl(vdev)); 613 + else 614 + return pll_ratio_to_dpu_freq_lnl(pll_config_get_lnl(vdev)); 615 + } 616 + 617 /* Handler for IRQs from Buttress core (irqB) */ 618 bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq) 619 { ··· 582 if (!status) 583 return false; 584 585 + if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, FREQ_CHANGE, status)) { 586 + u32 pll = pll_config_get_mtl(vdev); 587 + 588 + ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq, wp %08x, %lu MHz", 589 + pll, pll_ratio_to_dpu_freq_mtl(pll) / HZ_PER_MHZ); 590 + } 591 592 if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, ATS_ERR, status)) { 593 ivpu_err(vdev, "ATS_ERR irq 0x%016llx", REGB_RD64(VPU_HW_BTRS_MTL_ATS_ERR_LOG_0)); ··· 633 queue_work(system_wq, &vdev->irq_dct_work); 634 } 635 636 + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, FREQ_CHANGE, status)) { 637 + u32 pll = pll_config_get_lnl(vdev); 638 + 639 + ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq, wp %08x, %lu MHz", 640 + pll, pll_ratio_to_dpu_freq_lnl(pll) / HZ_PER_MHZ); 641 + } 642 643 if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, ATS_ERR, status)) { 644 ivpu_err(vdev, "ATS_ERR LOG1 0x%08x ATS_ERR_LOG2 0x%08x\n", ··· 715 val = REG_SET_FLD_NUM(VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS, PARAM2, active_percent, val); 716 717 REGB_WR32(VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS, val); 718 } 719 720 u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev)

+3 -4

drivers/accel/ivpu/ivpu_hw_btrs.h

··· 1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 - * Copyright (C) 2020-2024 Intel Corporation 4 */ 5 6 #ifndef __IVPU_HW_BTRS_H__ ··· 13 14 #define PLL_PROFILING_FREQ_DEFAULT 38400000 15 #define PLL_PROFILING_FREQ_HIGH 400000000 16 - #define PLL_RATIO_TO_FREQ(x) ((x) * PLL_REF_CLK_FREQ) 17 18 #define DCT_DEFAULT_ACTIVE_PERCENT 15u 19 #define DCT_PERIOD_US 35300u ··· 31 void ivpu_hw_btrs_profiling_freq_reg_set_lnl(struct ivpu_device *vdev); 32 void ivpu_hw_btrs_ats_print_lnl(struct ivpu_device *vdev); 33 void ivpu_hw_btrs_clock_relinquish_disable_lnl(struct ivpu_device *vdev); 34 bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq); 35 bool ivpu_hw_btrs_irq_handler_lnl(struct ivpu_device *vdev, int irq); 36 int ivpu_hw_btrs_dct_get_request(struct ivpu_device *vdev, bool *enable); 37 void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u32 dct_percent); 38 - u32 ivpu_hw_btrs_pll_freq_get(struct ivpu_device *vdev); 39 - u32 ivpu_hw_btrs_ratio_to_freq(struct ivpu_device *vdev, u32 ratio); 40 u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev); 41 u32 ivpu_hw_btrs_telemetry_size_get(struct ivpu_device *vdev); 42 u32 ivpu_hw_btrs_telemetry_enable_get(struct ivpu_device *vdev);

··· 1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 + * Copyright (C) 2020-2025 Intel Corporation 4 */ 5 6 #ifndef __IVPU_HW_BTRS_H__ ··· 13 14 #define PLL_PROFILING_FREQ_DEFAULT 38400000 15 #define PLL_PROFILING_FREQ_HIGH 400000000 16 17 #define DCT_DEFAULT_ACTIVE_PERCENT 15u 18 #define DCT_PERIOD_US 35300u ··· 32 void ivpu_hw_btrs_profiling_freq_reg_set_lnl(struct ivpu_device *vdev); 33 void ivpu_hw_btrs_ats_print_lnl(struct ivpu_device *vdev); 34 void ivpu_hw_btrs_clock_relinquish_disable_lnl(struct ivpu_device *vdev); 35 + u32 ivpu_hw_btrs_dpu_max_freq_get(struct ivpu_device *vdev); 36 + u32 ivpu_hw_btrs_dpu_freq_get(struct ivpu_device *vdev); 37 bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq); 38 bool ivpu_hw_btrs_irq_handler_lnl(struct ivpu_device *vdev, int irq); 39 int ivpu_hw_btrs_dct_get_request(struct ivpu_device *vdev, bool *enable); 40 void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u32 dct_percent); 41 u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev); 42 u32 ivpu_hw_btrs_telemetry_size_get(struct ivpu_device *vdev); 43 u32 ivpu_hw_btrs_telemetry_enable_get(struct ivpu_device *vdev);

+8 -7

drivers/accel/ivpu/ivpu_job.c

··· 470 struct ivpu_device *vdev = job->vdev; 471 u32 i; 472 473 - ivpu_dbg(vdev, JOB, "Job destroyed: id %3u ctx %2d engine %d", 474 - job->job_id, job->file_priv->ctx.id, job->engine_idx); 475 476 for (i = 0; i < job->bo_count; i++) 477 if (job->bos[i]) ··· 564 dma_fence_signal(job->done_fence); 565 566 trace_job("done", job); 567 - ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d engine %d status 0x%x\n", 568 - job->job_id, job->file_priv->ctx.id, job->engine_idx, job_status); 569 570 ivpu_job_destroy(job); 571 ivpu_stop_job_timeout_detection(vdev); ··· 664 } 665 666 trace_job("submit", job); 667 - ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d engine %d prio %d addr 0x%llx next %d\n", 668 - job->job_id, file_priv->ctx.id, job->engine_idx, cmdq->priority, 669 job->cmd_buf_vpu_addr, cmdq->jobq->header.tail); 670 671 mutex_unlock(&file_priv->lock); ··· 777 goto err_free_handles; 778 } 779 780 - ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u buf_count %u\n", file_priv->ctx.id, buffer_count); 781 782 job = ivpu_job_create(file_priv, engine, buffer_count); 783 if (!job) {

··· 470 struct ivpu_device *vdev = job->vdev; 471 u32 i; 472 473 + ivpu_dbg(vdev, JOB, "Job destroyed: id %3u ctx %2d cmdq_id %u engine %d", 474 + job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx); 475 476 for (i = 0; i < job->bo_count; i++) 477 if (job->bos[i]) ··· 564 dma_fence_signal(job->done_fence); 565 566 trace_job("done", job); 567 + ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d cmdq_id %u engine %d status 0x%x\n", 568 + job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx, job_status); 569 570 ivpu_job_destroy(job); 571 ivpu_stop_job_timeout_detection(vdev); ··· 664 } 665 666 trace_job("submit", job); 667 + ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d cmdq_id %u engine %d prio %d addr 0x%llx next %d\n", 668 + job->job_id, file_priv->ctx.id, cmdq->id, job->engine_idx, cmdq->priority, 669 job->cmd_buf_vpu_addr, cmdq->jobq->header.tail); 670 671 mutex_unlock(&file_priv->lock); ··· 777 goto err_free_handles; 778 } 779 780 + ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u cmdq_id %u buf_count %u\n", 781 + file_priv->ctx.id, cmdq_id, buffer_count); 782 783 job = ivpu_job_create(file_priv, engine, buffer_count); 784 if (!job) {

+48 -1

drivers/accel/ivpu/ivpu_sysfs.c

··· 1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 - * Copyright (C) 2024 Intel Corporation 4 */ 5 6 #include <linux/device.h> 7 #include <linux/err.h> 8 9 #include "ivpu_drv.h" 10 #include "ivpu_gem.h" ··· 92 93 static DEVICE_ATTR_RO(sched_mode); 94 95 static struct attribute *ivpu_dev_attrs[] = { 96 &dev_attr_npu_busy_time_us.attr, 97 &dev_attr_npu_memory_utilization.attr, 98 &dev_attr_sched_mode.attr, 99 NULL, 100 }; 101

··· 1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 + * Copyright (C) 2024-2025 Intel Corporation 4 */ 5 6 #include <linux/device.h> 7 #include <linux/err.h> 8 + #include <linux/pm_runtime.h> 9 + #include <linux/units.h> 10 11 #include "ivpu_drv.h" 12 #include "ivpu_gem.h" ··· 90 91 static DEVICE_ATTR_RO(sched_mode); 92 93 + /** 94 + * DOC: npu_max_frequency 95 + * 96 + * The npu_max_frequency shows maximum frequency in MHz of the NPU's data 97 + * processing unit 98 + */ 99 + static ssize_t 100 + npu_max_frequency_mhz_show(struct device *dev, struct device_attribute *attr, char *buf) 101 + { 102 + struct drm_device *drm = dev_get_drvdata(dev); 103 + struct ivpu_device *vdev = to_ivpu_device(drm); 104 + u32 freq = ivpu_hw_dpu_max_freq_get(vdev); 105 + 106 + return sysfs_emit(buf, "%lu\n", freq / HZ_PER_MHZ); 107 + } 108 + 109 + static DEVICE_ATTR_RO(npu_max_frequency_mhz); 110 + 111 + /** 112 + * DOC: npu_current_frequency_mhz 113 + * 114 + * The npu_current_frequency_mhz shows current frequency in MHz of the NPU's 115 + * data processing unit 116 + */ 117 + static ssize_t 118 + npu_current_frequency_mhz_show(struct device *dev, struct device_attribute *attr, char *buf) 119 + { 120 + struct drm_device *drm = dev_get_drvdata(dev); 121 + struct ivpu_device *vdev = to_ivpu_device(drm); 122 + u32 freq = 0; 123 + 124 + /* Read frequency only if device is active, otherwise frequency is 0 */ 125 + if (pm_runtime_get_if_active(vdev->drm.dev) > 0) { 126 + freq = ivpu_hw_dpu_freq_get(vdev); 127 + 128 + pm_runtime_put_autosuspend(vdev->drm.dev); 129 + } 130 + 131 + return sysfs_emit(buf, "%lu\n", freq / HZ_PER_MHZ); 132 + } 133 + 134 + static DEVICE_ATTR_RO(npu_current_frequency_mhz); 135 + 136 static struct attribute *ivpu_dev_attrs[] = { 137 &dev_attr_npu_busy_time_us.attr, 138 &dev_attr_npu_memory_utilization.attr, 139 &dev_attr_sched_mode.attr, 140 + &dev_attr_npu_max_frequency_mhz.attr, 141 + &dev_attr_npu_current_frequency_mhz.attr, 142 NULL, 143 }; 144

+10 -3

drivers/accel/ivpu/vpu_boot_api.h

··· 26 * Minor version changes when API backward compatibility is preserved. 27 * Resets to 0 if Major version is incremented. 28 */ 29 - #define VPU_BOOT_API_VER_MINOR 26 30 31 /* 32 * API header changed (field names, documentation, formatting) but API itself has not been changed ··· 76 * submission queue size and device capabilities. 77 */ 78 u32 preemption_buffer_2_size; 79 /* Space reserved for future preemption-related fields. */ 80 - u32 preemption_reserved[6]; 81 /* FW image read only section start address, 4KB aligned */ 82 u64 ro_section_start_address; 83 /* FW image read only section size, 4KB aligned */ ··· 141 /* 142 * Processor bit shifts (for loggable HW components). 143 */ 144 - #define VPU_TRACE_PROC_BIT_ARM 0 145 #define VPU_TRACE_PROC_BIT_LRT 1 146 #define VPU_TRACE_PROC_BIT_LNN 2 147 #define VPU_TRACE_PROC_BIT_SHV_0 3

··· 26 * Minor version changes when API backward compatibility is preserved. 27 * Resets to 0 if Major version is incremented. 28 */ 29 + #define VPU_BOOT_API_VER_MINOR 28 30 31 /* 32 * API header changed (field names, documentation, formatting) but API itself has not been changed ··· 76 * submission queue size and device capabilities. 77 */ 78 u32 preemption_buffer_2_size; 79 + /* 80 + * Maximum preemption buffer size that the FW can use: no need for the host 81 + * driver to allocate more space than that specified by these fields. 82 + * A value of 0 means no declared limit. 83 + */ 84 + u32 preemption_buffer_1_max_size; 85 + u32 preemption_buffer_2_max_size; 86 /* Space reserved for future preemption-related fields. */ 87 + u32 preemption_reserved[4]; 88 /* FW image read only section start address, 4KB aligned */ 89 u64 ro_section_start_address; 90 /* FW image read only section size, 4KB aligned */ ··· 134 /* 135 * Processor bit shifts (for loggable HW components). 136 */ 137 + #define VPU_TRACE_PROC_BIT_RESERVED 0 138 #define VPU_TRACE_PROC_BIT_LRT 1 139 #define VPU_TRACE_PROC_BIT_LNN 2 140 #define VPU_TRACE_PROC_BIT_SHV_0 3

+36 -17

drivers/accel/ivpu/vpu_jsm_api.h

··· 22 /* 23 * Minor version changes when API backward compatibility is preserved. 24 */ 25 - #define VPU_JSM_API_VER_MINOR 25 26 27 /* 28 * API header changed (field names, documentation, formatting) but API itself has not been changed ··· 53 * Engine indexes. 54 */ 55 #define VPU_ENGINE_COMPUTE 0 56 - #define VPU_ENGINE_COPY 1 57 - #define VPU_ENGINE_NB 2 58 59 /* 60 * VPU status values. ··· 125 * When set, indicates that job queue uses native fences (as inline commands 126 * in job queue). Such queues may also use legacy fences (as commands in batch buffers). 127 * When cleared, indicates the job queue only uses legacy fences. 128 - * NOTE: For queues using native fences, VPU expects that all jobs in the queue 129 - * are immediately followed by an inline command object. This object is expected 130 - * to be a fence signal command in most cases, but can also be a NOP in case the host 131 - * does not need per-job fence signalling. Other inline commands objects can be 132 - * inserted between "job and inline command" pairs. 133 */ 134 VPU_JOB_QUEUE_FLAGS_USE_NATIVE_FENCE_MASK = (1 << 1U), 135 ··· 276 u64 value; 277 /* User VA of the log buffer in which to add log entry on completion. */ 278 u64 log_buffer_va; 279 } fence; 280 /* Other commands do not have a payload. */ 281 /* Payload definition for future inline commands can be inserted here. */ ··· 794 /** Metric group mask that identifies metric streamer instance. */ 795 u64 metric_group_mask; 796 /** 797 - * Address and size of the buffer where the VPU will write metric data. If 798 - * the buffer address is 0 or same as the currently used buffer the VPU will 799 - * continue writing metric data to the current buffer. In this case the 800 - * buffer size is ignored and the size of the current buffer is unchanged. 801 - * If the address is non-zero and differs from the current buffer address the 802 - * VPU will immediately switch data collection to the new buffer. 803 */ 804 u64 buffer_addr; 805 u64 buffer_size; ··· 947 /* 948 * Default quantum in 100ns units for scheduling across processes 949 * within a priority band 950 */ 951 u32 process_quantum[VPU_HWS_NUM_PRIORITY_BANDS]; 952 /* ··· 960 * in situations when it's starved by the focus band. 961 */ 962 u32 normal_band_percentage; 963 - /* Reserved */ 964 - u32 reserved_0; 965 }; 966 967 /* ··· 1040 s32 in_process_priority; 1041 /* Zero padding / Reserved */ 1042 u32 reserved_1; 1043 - /* Context quantum relative to other contexts of same priority in the same process */ 1044 u64 context_quantum; 1045 /* Grace period when preempting context of the same priority within the same process */ 1046 u64 grace_period_same_priority;

··· 22 /* 23 * Minor version changes when API backward compatibility is preserved. 24 */ 25 + #define VPU_JSM_API_VER_MINOR 29 26 27 /* 28 * API header changed (field names, documentation, formatting) but API itself has not been changed ··· 53 * Engine indexes. 54 */ 55 #define VPU_ENGINE_COMPUTE 0 56 + #define VPU_ENGINE_NB 1 57 58 /* 59 * VPU status values. ··· 126 * When set, indicates that job queue uses native fences (as inline commands 127 * in job queue). Such queues may also use legacy fences (as commands in batch buffers). 128 * When cleared, indicates the job queue only uses legacy fences. 129 + * NOTES: 130 + * 1. For queues using native fences, VPU expects that all jobs in the queue 131 + * are immediately followed by an inline command object. This object is expected 132 + * to be a fence signal command in most cases, but can also be a NOP in case the host 133 + * does not need per-job fence signalling. Other inline commands objects can be 134 + * inserted between "job and inline command" pairs. 135 + * 2. Native fence queues are only supported on VPU 40xx onwards. 136 */ 137 VPU_JOB_QUEUE_FLAGS_USE_NATIVE_FENCE_MASK = (1 << 1U), 138 ··· 275 u64 value; 276 /* User VA of the log buffer in which to add log entry on completion. */ 277 u64 log_buffer_va; 278 + /* NPU private data. */ 279 + u64 npu_private_data; 280 } fence; 281 /* Other commands do not have a payload. */ 282 /* Payload definition for future inline commands can be inserted here. */ ··· 791 /** Metric group mask that identifies metric streamer instance. */ 792 u64 metric_group_mask; 793 /** 794 + * Address and size of the buffer where the VPU will write metric data. 795 + * This member dictates how the update operation should perform: 796 + * 1. client needs information about the number of collected samples and the 797 + * amount of data written to the current buffer 798 + * 2. client wants to switch to a new buffer 799 + * 800 + * Case 1. is identified by the buffer address being 0 or the same as the 801 + * currently used buffer address. In this case the buffer size is ignored and 802 + * the size of the current buffer is unchanged. The VPU will return an update 803 + * in the vpu_jsm_metric_streamer_done structure. The internal writing position 804 + * into the buffer is not changed. 805 + * 806 + * Case 2. is identified by the address being non-zero and differs from the 807 + * current buffer address. The VPU will immediately switch data collection to 808 + * the new buffer. Then the VPU will return an update in the 809 + * vpu_jsm_metric_streamer_done structure. 810 */ 811 u64 buffer_addr; 812 u64 buffer_size; ··· 934 /* 935 * Default quantum in 100ns units for scheduling across processes 936 * within a priority band 937 + * Minimum value supported by NPU is 1ms (10000 in 100ns units). 938 */ 939 u32 process_quantum[VPU_HWS_NUM_PRIORITY_BANDS]; 940 /* ··· 946 * in situations when it's starved by the focus band. 947 */ 948 u32 normal_band_percentage; 949 + /* 950 + * TDR timeout value in milliseconds. Default value of 0 meaning no timeout. 951 + */ 952 + u32 tdr_timeout; 953 }; 954 955 /* ··· 1024 s32 in_process_priority; 1025 /* Zero padding / Reserved */ 1026 u32 reserved_1; 1027 + /* 1028 + * Context quantum relative to other contexts of same priority in the same process 1029 + * Minimum value supported by NPU is 1ms (10000 in 100ns units). 1030 + */ 1031 u64 context_quantum; 1032 /* Grace period when preempting context of the same priority within the same process */ 1033 u64 grace_period_same_priority;

+14 -5

drivers/dma-buf/sw_sync.c

··· 438 return -EINVAL; 439 440 pt = dma_fence_to_sync_pt(fence); 441 - if (!pt) 442 - return -EINVAL; 443 444 spin_lock_irqsave(fence->lock, flags); 445 - if (test_bit(SW_SYNC_HAS_DEADLINE_BIT, &fence->flags)) { 446 - data.deadline_ns = ktime_to_ns(pt->deadline); 447 - } else { 448 ret = -ENOENT; 449 } 450 spin_unlock_irqrestore(fence->lock, flags); 451 452 dma_fence_put(fence); ··· 460 return -EFAULT; 461 462 return 0; 463 } 464 465 static long sw_sync_ioctl(struct file *file, unsigned int cmd,

··· 438 return -EINVAL; 439 440 pt = dma_fence_to_sync_pt(fence); 441 + if (!pt) { 442 + ret = -EINVAL; 443 + goto put_fence; 444 + } 445 446 spin_lock_irqsave(fence->lock, flags); 447 + if (!test_bit(SW_SYNC_HAS_DEADLINE_BIT, &fence->flags)) { 448 ret = -ENOENT; 449 + goto unlock; 450 } 451 + data.deadline_ns = ktime_to_ns(pt->deadline); 452 spin_unlock_irqrestore(fence->lock, flags); 453 454 dma_fence_put(fence); ··· 458 return -EFAULT; 459 460 return 0; 461 + 462 + unlock: 463 + spin_unlock_irqrestore(fence->lock, flags); 464 + put_fence: 465 + dma_fence_put(fence); 466 + 467 + return ret; 468 } 469 470 static long sw_sync_ioctl(struct file *file, unsigned int cmd,

+1

drivers/gpu/drm/amd/amdgpu/amdgpu.h

··· 1123 bool in_s3; 1124 bool in_s4; 1125 bool in_s0ix; 1126 1127 enum pp_mp1_state mp1_state; 1128 struct amdgpu_doorbell_index doorbell_index;

··· 1123 bool in_s3; 1124 bool in_s4; 1125 bool in_s0ix; 1126 + suspend_state_t last_suspend_state; 1127 1128 enum pp_mp1_state mp1_state; 1129 struct amdgpu_doorbell_index doorbell_index;

+1 -1

drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

··· 3510 amdgpu_device_mem_scratch_fini(adev); 3511 amdgpu_ib_pool_fini(adev); 3512 amdgpu_seq64_fini(adev); 3513 } 3514 if (adev->ip_blocks[i].version->funcs->sw_fini) { 3515 r = adev->ip_blocks[i].version->funcs->sw_fini(&adev->ip_blocks[i]); ··· 4859 4860 iounmap(adev->rmmio); 4861 adev->rmmio = NULL; 4862 - amdgpu_doorbell_fini(adev); 4863 drm_dev_exit(idx); 4864 } 4865

··· 3510 amdgpu_device_mem_scratch_fini(adev); 3511 amdgpu_ib_pool_fini(adev); 3512 amdgpu_seq64_fini(adev); 3513 + amdgpu_doorbell_fini(adev); 3514 } 3515 if (adev->ip_blocks[i].version->funcs->sw_fini) { 3516 r = adev->ip_blocks[i].version->funcs->sw_fini(&adev->ip_blocks[i]); ··· 4858 4859 iounmap(adev->rmmio); 4860 adev->rmmio = NULL; 4861 drm_dev_exit(idx); 4862 } 4863

+13 -1

drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

··· 2548 adev->in_s0ix = true; 2549 else if (amdgpu_acpi_is_s3_active(adev)) 2550 adev->in_s3 = true; 2551 - if (!adev->in_s0ix && !adev->in_s3) 2552 return 0; 2553 return amdgpu_device_suspend(drm_dev, true); 2554 } 2555

··· 2548 adev->in_s0ix = true; 2549 else if (amdgpu_acpi_is_s3_active(adev)) 2550 adev->in_s3 = true; 2551 + if (!adev->in_s0ix && !adev->in_s3) { 2552 + /* don't allow going deep first time followed by s2idle the next time */ 2553 + if (adev->last_suspend_state != PM_SUSPEND_ON && 2554 + adev->last_suspend_state != pm_suspend_target_state) { 2555 + drm_err_once(drm_dev, "Unsupported suspend state %d\n", 2556 + pm_suspend_target_state); 2557 + return -EINVAL; 2558 + } 2559 return 0; 2560 + } 2561 + 2562 + /* cache the state last used for suspend */ 2563 + adev->last_suspend_state = pm_suspend_target_state; 2564 + 2565 return amdgpu_device_suspend(drm_dev, true); 2566 } 2567

+11 -3

drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c

··· 1438 struct amdgpu_device *adev = ring->adev; 1439 struct drm_gpu_scheduler *sched = &ring->sched; 1440 struct drm_sched_entity entity; 1441 struct dma_fence *f; 1442 struct amdgpu_job *job; 1443 struct amdgpu_ib *ib; 1444 int i, r; 1445 1446 /* Initialize the scheduler entity */ ··· 1453 goto err; 1454 } 1455 1456 - r = amdgpu_job_alloc_with_ib(ring->adev, &entity, NULL, 1457 - 64, 0, 1458 - &job); 1459 if (r) 1460 goto err; 1461

··· 1438 struct amdgpu_device *adev = ring->adev; 1439 struct drm_gpu_scheduler *sched = &ring->sched; 1440 struct drm_sched_entity entity; 1441 + static atomic_t counter; 1442 struct dma_fence *f; 1443 struct amdgpu_job *job; 1444 struct amdgpu_ib *ib; 1445 + void *owner; 1446 int i, r; 1447 1448 /* Initialize the scheduler entity */ ··· 1451 goto err; 1452 } 1453 1454 + /* 1455 + * Use some unique dummy value as the owner to make sure we execute 1456 + * the cleaner shader on each submission. The value just need to change 1457 + * for each submission and is otherwise meaningless. 1458 + */ 1459 + owner = (void *)(unsigned long)atomic_inc_return(&counter); 1460 + 1461 + r = amdgpu_job_alloc_with_ib(ring->adev, &entity, owner, 1462 + 64, 0, &job); 1463 if (r) 1464 goto err; 1465

+4 -4

drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c

··· 6114 } 6115 6116 if (amdgpu_emu_mode == 1) 6117 - adev->hdp.funcs->flush_hdp(adev, NULL); 6118 6119 tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL); 6120 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); ··· 6192 } 6193 6194 if (amdgpu_emu_mode == 1) 6195 - adev->hdp.funcs->flush_hdp(adev, NULL); 6196 6197 tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_CNTL); 6198 tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, VMID, 0); ··· 6269 } 6270 6271 if (amdgpu_emu_mode == 1) 6272 - adev->hdp.funcs->flush_hdp(adev, NULL); 6273 6274 tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_CNTL); 6275 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); ··· 6644 } 6645 6646 if (amdgpu_emu_mode == 1) 6647 - adev->hdp.funcs->flush_hdp(adev, NULL); 6648 6649 tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL); 6650 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);

··· 6114 } 6115 6116 if (amdgpu_emu_mode == 1) 6117 + amdgpu_device_flush_hdp(adev, NULL); 6118 6119 tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL); 6120 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); ··· 6192 } 6193 6194 if (amdgpu_emu_mode == 1) 6195 + amdgpu_device_flush_hdp(adev, NULL); 6196 6197 tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_CNTL); 6198 tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, VMID, 0); ··· 6269 } 6270 6271 if (amdgpu_emu_mode == 1) 6272 + amdgpu_device_flush_hdp(adev, NULL); 6273 6274 tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_CNTL); 6275 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); ··· 6644 } 6645 6646 if (amdgpu_emu_mode == 1) 6647 + amdgpu_device_flush_hdp(adev, NULL); 6648 6649 tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL); 6650 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);

+6 -6

drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c

··· 2428 } 2429 2430 if (amdgpu_emu_mode == 1) 2431 - adev->hdp.funcs->flush_hdp(adev, NULL); 2432 2433 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 2434 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); ··· 2472 } 2473 2474 if (amdgpu_emu_mode == 1) 2475 - adev->hdp.funcs->flush_hdp(adev, NULL); 2476 2477 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 2478 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); ··· 2517 } 2518 2519 if (amdgpu_emu_mode == 1) 2520 - adev->hdp.funcs->flush_hdp(adev, NULL); 2521 2522 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 2523 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); ··· 3153 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj); 3154 3155 if (amdgpu_emu_mode == 1) 3156 - adev->hdp.funcs->flush_hdp(adev, NULL); 3157 3158 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 3159 lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); ··· 3371 amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj); 3372 3373 if (amdgpu_emu_mode == 1) 3374 - adev->hdp.funcs->flush_hdp(adev, NULL); 3375 3376 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 3377 lower_32_bits(adev->gfx.me.me_fw_gpu_addr)); ··· 4541 if (r) 4542 return r; 4543 4544 - adev->hdp.funcs->flush_hdp(adev, NULL); 4545 4546 value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? 4547 false : true;

··· 2428 } 2429 2430 if (amdgpu_emu_mode == 1) 2431 + amdgpu_device_flush_hdp(adev, NULL); 2432 2433 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 2434 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); ··· 2472 } 2473 2474 if (amdgpu_emu_mode == 1) 2475 + amdgpu_device_flush_hdp(adev, NULL); 2476 2477 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 2478 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); ··· 2517 } 2518 2519 if (amdgpu_emu_mode == 1) 2520 + amdgpu_device_flush_hdp(adev, NULL); 2521 2522 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 2523 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); ··· 3153 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj); 3154 3155 if (amdgpu_emu_mode == 1) 3156 + amdgpu_device_flush_hdp(adev, NULL); 3157 3158 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 3159 lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); ··· 3371 amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj); 3372 3373 if (amdgpu_emu_mode == 1) 3374 + amdgpu_device_flush_hdp(adev, NULL); 3375 3376 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 3377 lower_32_bits(adev->gfx.me.me_fw_gpu_addr)); ··· 4541 if (r) 4542 return r; 4543 4544 + amdgpu_device_flush_hdp(adev, NULL); 4545 4546 value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? 4547 false : true;

+3 -3

drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c

··· 2324 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj); 2325 2326 if (amdgpu_emu_mode == 1) 2327 - adev->hdp.funcs->flush_hdp(adev, NULL); 2328 2329 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 2330 lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); ··· 2468 amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj); 2469 2470 if (amdgpu_emu_mode == 1) 2471 - adev->hdp.funcs->flush_hdp(adev, NULL); 2472 2473 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 2474 lower_32_bits(adev->gfx.me.me_fw_gpu_addr)); ··· 3426 if (r) 3427 return r; 3428 3429 - adev->hdp.funcs->flush_hdp(adev, NULL); 3430 3431 value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? 3432 false : true;

··· 2324 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj); 2325 2326 if (amdgpu_emu_mode == 1) 2327 + amdgpu_device_flush_hdp(adev, NULL); 2328 2329 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 2330 lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); ··· 2468 amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj); 2469 2470 if (amdgpu_emu_mode == 1) 2471 + amdgpu_device_flush_hdp(adev, NULL); 2472 2473 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 2474 lower_32_bits(adev->gfx.me.me_fw_gpu_addr)); ··· 3426 if (r) 3427 return r; 3428 3429 + amdgpu_device_flush_hdp(adev, NULL); 3430 3431 value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? 3432 false : true;

+2 -2

drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c

··· 268 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; 269 270 /* flush hdp cache */ 271 - adev->hdp.funcs->flush_hdp(adev, NULL); 272 273 /* This is necessary for SRIOV as well as for GFXOFF to function 274 * properly under bare metal ··· 969 adev->hdp.funcs->init_registers(adev); 970 971 /* Flush HDP after it is initialized */ 972 - adev->hdp.funcs->flush_hdp(adev, NULL); 973 974 value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? 975 false : true;

··· 268 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; 269 270 /* flush hdp cache */ 271 + amdgpu_device_flush_hdp(adev, NULL); 272 273 /* This is necessary for SRIOV as well as for GFXOFF to function 274 * properly under bare metal ··· 969 adev->hdp.funcs->init_registers(adev); 970 971 /* Flush HDP after it is initialized */ 972 + amdgpu_device_flush_hdp(adev, NULL); 973 974 value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? 975 false : true;

+2 -2

drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c

··· 229 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; 230 231 /* flush hdp cache */ 232 - adev->hdp.funcs->flush_hdp(adev, NULL); 233 234 /* This is necessary for SRIOV as well as for GFXOFF to function 235 * properly under bare metal ··· 899 return r; 900 901 /* Flush HDP after it is initialized */ 902 - adev->hdp.funcs->flush_hdp(adev, NULL); 903 904 value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? 905 false : true;

··· 229 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; 230 231 /* flush hdp cache */ 232 + amdgpu_device_flush_hdp(adev, NULL); 233 234 /* This is necessary for SRIOV as well as for GFXOFF to function 235 * properly under bare metal ··· 899 return r; 900 901 /* Flush HDP after it is initialized */ 902 + amdgpu_device_flush_hdp(adev, NULL); 903 904 value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? 905 false : true;

+2 -2

drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c

··· 297 return; 298 299 /* flush hdp cache */ 300 - adev->hdp.funcs->flush_hdp(adev, NULL); 301 302 /* This is necessary for SRIOV as well as for GFXOFF to function 303 * properly under bare metal ··· 881 return r; 882 883 /* Flush HDP after it is initialized */ 884 - adev->hdp.funcs->flush_hdp(adev, NULL); 885 886 value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? 887 false : true;

··· 297 return; 298 299 /* flush hdp cache */ 300 + amdgpu_device_flush_hdp(adev, NULL); 301 302 /* This is necessary for SRIOV as well as for GFXOFF to function 303 * properly under bare metal ··· 881 return r; 882 883 /* Flush HDP after it is initialized */ 884 + amdgpu_device_flush_hdp(adev, NULL); 885 886 value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? 887 false : true;

+1 -1

drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

··· 2435 adev->hdp.funcs->init_registers(adev); 2436 2437 /* After HDP is initialized, flush HDP.*/ 2438 - adev->hdp.funcs->flush_hdp(adev, NULL); 2439 2440 if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) 2441 value = false;

··· 2435 adev->hdp.funcs->init_registers(adev); 2436 2437 /* After HDP is initialized, flush HDP.*/ 2438 + amdgpu_device_flush_hdp(adev, NULL); 2439 2440 if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) 2441 value = false;

+1 -1

drivers/gpu/drm/amd/amdgpu/psp_v11_0.c

··· 533 } 534 535 memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); 536 - adev->hdp.funcs->flush_hdp(adev, NULL); 537 vfree(buf); 538 drm_dev_exit(idx); 539 } else {

··· 533 } 534 535 memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); 536 + amdgpu_device_flush_hdp(adev, NULL); 537 vfree(buf); 538 drm_dev_exit(idx); 539 } else {

+1 -1

drivers/gpu/drm/amd/amdgpu/psp_v13_0.c

··· 610 } 611 612 memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); 613 - adev->hdp.funcs->flush_hdp(adev, NULL); 614 vfree(buf); 615 drm_dev_exit(idx); 616 } else {

··· 610 } 611 612 memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); 613 + amdgpu_device_flush_hdp(adev, NULL); 614 vfree(buf); 615 drm_dev_exit(idx); 616 } else {

+1 -1

drivers/gpu/drm/amd/amdgpu/psp_v14_0.c

··· 498 } 499 500 memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); 501 - adev->hdp.funcs->flush_hdp(adev, NULL); 502 vfree(buf); 503 drm_dev_exit(idx); 504 } else {

··· 498 } 499 500 memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); 501 + amdgpu_device_flush_hdp(adev, NULL); 502 vfree(buf); 503 drm_dev_exit(idx); 504 } else {

+7

drivers/gpu/drm/amd/amdgpu/vi.c

··· 239 .max_pixels_per_frame = 4096 * 4096, 240 .max_level = 186, 241 }, 242 }; 243 244 static const struct amdgpu_video_codecs cz_video_codecs_decode =

··· 239 .max_pixels_per_frame = 4096 * 4096, 240 .max_level = 186, 241 }, 242 + { 243 + .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 244 + .max_width = 4096, 245 + .max_height = 4096, 246 + .max_pixels_per_frame = 4096 * 4096, 247 + .max_level = 0, 248 + }, 249 }; 250 251 static const struct amdgpu_video_codecs cz_video_codecs_decode =

+6 -5

drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c

··· 2 // 3 // Copyright 2024 Advanced Micro Devices, Inc. 4 5 6 #include "dml2_internal_types.h" 7 #include "dml_top.h" ··· 14 15 static bool dml21_allocate_memory(struct dml2_context **dml_ctx) 16 { 17 - *dml_ctx = kzalloc(sizeof(struct dml2_context), GFP_KERNEL); 18 if (!(*dml_ctx)) 19 return false; 20 21 - (*dml_ctx)->v21.dml_init.dml2_instance = kzalloc(sizeof(struct dml2_instance), GFP_KERNEL); 22 if (!((*dml_ctx)->v21.dml_init.dml2_instance)) 23 return false; 24 ··· 28 (*dml_ctx)->v21.mode_support.display_config = &(*dml_ctx)->v21.display_config; 29 (*dml_ctx)->v21.mode_programming.display_config = (*dml_ctx)->v21.mode_support.display_config; 30 31 - (*dml_ctx)->v21.mode_programming.programming = kzalloc(sizeof(struct dml2_display_cfg_programming), GFP_KERNEL); 32 if (!((*dml_ctx)->v21.mode_programming.programming)) 33 return false; 34 ··· 116 117 void dml21_destroy(struct dml2_context *dml2) 118 { 119 - kfree(dml2->v21.dml_init.dml2_instance); 120 - kfree(dml2->v21.mode_programming.programming); 121 } 122 123 static void dml21_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *context, struct resource_context *out_new_hw_state,

··· 2 // 3 // Copyright 2024 Advanced Micro Devices, Inc. 4 5 + #include <linux/vmalloc.h> 6 7 #include "dml2_internal_types.h" 8 #include "dml_top.h" ··· 13 14 static bool dml21_allocate_memory(struct dml2_context **dml_ctx) 15 { 16 + *dml_ctx = vzalloc(sizeof(struct dml2_context)); 17 if (!(*dml_ctx)) 18 return false; 19 20 + (*dml_ctx)->v21.dml_init.dml2_instance = vzalloc(sizeof(struct dml2_instance)); 21 if (!((*dml_ctx)->v21.dml_init.dml2_instance)) 22 return false; 23 ··· 27 (*dml_ctx)->v21.mode_support.display_config = &(*dml_ctx)->v21.display_config; 28 (*dml_ctx)->v21.mode_programming.display_config = (*dml_ctx)->v21.mode_support.display_config; 29 30 + (*dml_ctx)->v21.mode_programming.programming = vzalloc(sizeof(struct dml2_display_cfg_programming)); 31 if (!((*dml_ctx)->v21.mode_programming.programming)) 32 return false; 33 ··· 115 116 void dml21_destroy(struct dml2_context *dml2) 117 { 118 + vfree(dml2->v21.dml_init.dml2_instance); 119 + vfree(dml2->v21.mode_programming.programming); 120 } 121 122 static void dml21_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *context, struct resource_context *out_new_hw_state,

+4 -2

drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c

··· 24 * 25 */ 26 27 #include "display_mode_core.h" 28 #include "dml2_internal_types.h" 29 #include "dml2_utils.h" ··· 749 750 static inline struct dml2_context *dml2_allocate_memory(void) 751 { 752 - return (struct dml2_context *) kzalloc(sizeof(struct dml2_context), GFP_KERNEL); 753 } 754 755 static void dml2_init(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2) ··· 823 824 if (dml2->architecture == dml2_architecture_21) 825 dml21_destroy(dml2); 826 - kfree(dml2); 827 } 828 829 void dml2_extract_dram_and_fclk_change_support(struct dml2_context *dml2,

··· 24 * 25 */ 26 27 + #include <linux/vmalloc.h> 28 + 29 #include "display_mode_core.h" 30 #include "dml2_internal_types.h" 31 #include "dml2_utils.h" ··· 747 748 static inline struct dml2_context *dml2_allocate_memory(void) 749 { 750 + return (struct dml2_context *) vzalloc(sizeof(struct dml2_context)); 751 } 752 753 static void dml2_init(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2) ··· 821 822 if (dml2->architecture == dml2_architecture_21) 823 dml21_destroy(dml2); 824 + vfree(dml2); 825 } 826 827 void dml2_extract_dram_and_fclk_change_support(struct dml2_context *dml2,

+1

drivers/gpu/drm/i915/display/intel_display_device.h

··· 161 #define HAS_DPT(__display) (DISPLAY_VER(__display) >= 13) 162 #define HAS_DSB(__display) (DISPLAY_INFO(__display)->has_dsb) 163 #define HAS_DSC(__display) (DISPLAY_RUNTIME_INFO(__display)->has_dsc) 164 #define HAS_DSC_MST(__display) (DISPLAY_VER(__display) >= 12 && HAS_DSC(__display)) 165 #define HAS_FBC(__display) (DISPLAY_RUNTIME_INFO(__display)->fbc_mask != 0) 166 #define HAS_FBC_DIRTY_RECT(__display) (DISPLAY_VER(__display) >= 30)

··· 161 #define HAS_DPT(__display) (DISPLAY_VER(__display) >= 13) 162 #define HAS_DSB(__display) (DISPLAY_INFO(__display)->has_dsb) 163 #define HAS_DSC(__display) (DISPLAY_RUNTIME_INFO(__display)->has_dsc) 164 + #define HAS_DSC_3ENGINES(__display) (DISPLAY_VERx100(__display) == 1401 && HAS_DSC(__display)) 165 #define HAS_DSC_MST(__display) (DISPLAY_VER(__display) >= 12 && HAS_DSC(__display)) 166 #define HAS_FBC(__display) (DISPLAY_RUNTIME_INFO(__display)->fbc_mask != 0) 167 #define HAS_FBC_DIRTY_RECT(__display) (DISPLAY_VER(__display) >= 30)

+4 -3

drivers/gpu/drm/i915/display/intel_dp.c

··· 1050 u8 test_slice_count = valid_dsc_slicecount[i] * num_joined_pipes; 1051 1052 /* 1053 - * 3 DSC Slices per pipe need 3 DSC engines, 1054 - * which is supported only with Ultrajoiner. 1055 */ 1056 - if (valid_dsc_slicecount[i] == 3 && num_joined_pipes != 4) 1057 continue; 1058 1059 if (test_slice_count >

··· 1050 u8 test_slice_count = valid_dsc_slicecount[i] * num_joined_pipes; 1051 1052 /* 1053 + * 3 DSC Slices per pipe need 3 DSC engines, which is supported only 1054 + * with Ultrajoiner only for some platforms. 1055 */ 1056 + if (valid_dsc_slicecount[i] == 3 && 1057 + (!HAS_DSC_3ENGINES(display) || num_joined_pipes != 4)) 1058 continue; 1059 1060 if (test_slice_count >

+1 -1

drivers/gpu/drm/mgag200/mgag200_mode.c

··· 223 vsyncstr = mode->crtc_vsync_start - 1; 224 vsyncend = mode->crtc_vsync_end - 1; 225 vtotal = mode->crtc_vtotal - 2; 226 - vblkstr = mode->crtc_vblank_start; 227 vblkend = vtotal + 1; 228 229 linecomp = vdispend;

··· 223 vsyncstr = mode->crtc_vsync_start - 1; 224 vsyncend = mode->crtc_vsync_end - 1; 225 vtotal = mode->crtc_vtotal - 2; 226 + vblkstr = mode->crtc_vblank_start - 1; 227 vblkend = vtotal + 1; 228 229 linecomp = vdispend;

+4 -4

drivers/gpu/drm/msm/adreno/a6xx_gpu.c

··· 242 break; 243 fallthrough; 244 case MSM_SUBMIT_CMD_BUF: 245 - OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3); 246 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); 247 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); 248 - OUT_RING(ring, submit->cmd[i].size); 249 ibs++; 250 break; 251 } ··· 377 break; 378 fallthrough; 379 case MSM_SUBMIT_CMD_BUF: 380 - OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3); 381 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); 382 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); 383 - OUT_RING(ring, submit->cmd[i].size); 384 ibs++; 385 break; 386 }

··· 242 break; 243 fallthrough; 244 case MSM_SUBMIT_CMD_BUF: 245 + OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3); 246 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); 247 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); 248 + OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size)); 249 ibs++; 250 break; 251 } ··· 377 break; 378 fallthrough; 379 case MSM_SUBMIT_CMD_BUF: 380 + OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3); 381 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); 382 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); 383 + OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size)); 384 ibs++; 385 break; 386 }

-2

drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_14_msm8937.h

··· 132 .prog_fetch_lines_worst_case = 14, 133 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26), 134 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27), 135 - .intr_tear_rd_ptr = -1, 136 }, { 137 .name = "intf_2", .id = INTF_2, 138 .base = 0x6b000, .len = 0x268, ··· 140 .prog_fetch_lines_worst_case = 14, 141 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 28), 142 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 29), 143 - .intr_tear_rd_ptr = -1, 144 }, 145 }; 146

··· 132 .prog_fetch_lines_worst_case = 14, 133 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26), 134 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27), 135 }, { 136 .name = "intf_2", .id = INTF_2, 137 .base = 0x6b000, .len = 0x268, ··· 141 .prog_fetch_lines_worst_case = 14, 142 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 28), 143 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 29), 144 }, 145 }; 146

-1

drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_15_msm8917.h

··· 118 .prog_fetch_lines_worst_case = 14, 119 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26), 120 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27), 121 - .intr_tear_rd_ptr = -1, 122 }, 123 }; 124

··· 118 .prog_fetch_lines_worst_case = 14, 119 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26), 120 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27), 121 }, 122 }; 123

-3

drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_16_msm8953.h

··· 131 .prog_fetch_lines_worst_case = 14, 132 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 24), 133 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 25), 134 - .intr_tear_rd_ptr = -1, 135 }, { 136 .name = "intf_1", .id = INTF_1, 137 .base = 0x6a800, .len = 0x268, ··· 139 .prog_fetch_lines_worst_case = 14, 140 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26), 141 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27), 142 - .intr_tear_rd_ptr = -1, 143 }, { 144 .name = "intf_2", .id = INTF_2, 145 .base = 0x6b000, .len = 0x268, ··· 147 .prog_fetch_lines_worst_case = 14, 148 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 28), 149 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 29), 150 - .intr_tear_rd_ptr = -1, 151 }, 152 }; 153

··· 131 .prog_fetch_lines_worst_case = 14, 132 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 24), 133 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 25), 134 }, { 135 .name = "intf_1", .id = INTF_1, 136 .base = 0x6a800, .len = 0x268, ··· 140 .prog_fetch_lines_worst_case = 14, 141 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26), 142 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27), 143 }, { 144 .name = "intf_2", .id = INTF_2, 145 .base = 0x6b000, .len = 0x268, ··· 149 .prog_fetch_lines_worst_case = 14, 150 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 28), 151 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 29), 152 }, 153 }; 154

-4

drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_7_msm8996.h

··· 241 .prog_fetch_lines_worst_case = 25, 242 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 24), 243 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 25), 244 - .intr_tear_rd_ptr = -1, 245 }, { 246 .name = "intf_1", .id = INTF_1, 247 .base = 0x6a800, .len = 0x268, ··· 249 .prog_fetch_lines_worst_case = 25, 250 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26), 251 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27), 252 - .intr_tear_rd_ptr = -1, 253 }, { 254 .name = "intf_2", .id = INTF_2, 255 .base = 0x6b000, .len = 0x268, ··· 257 .prog_fetch_lines_worst_case = 25, 258 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 28), 259 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 29), 260 - .intr_tear_rd_ptr = -1, 261 }, { 262 .name = "intf_3", .id = INTF_3, 263 .base = 0x6b800, .len = 0x268, ··· 264 .prog_fetch_lines_worst_case = 25, 265 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 30), 266 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 31), 267 - .intr_tear_rd_ptr = -1, 268 }, 269 }; 270

··· 241 .prog_fetch_lines_worst_case = 25, 242 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 24), 243 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 25), 244 }, { 245 .name = "intf_1", .id = INTF_1, 246 .base = 0x6a800, .len = 0x268, ··· 250 .prog_fetch_lines_worst_case = 25, 251 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26), 252 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27), 253 }, { 254 .name = "intf_2", .id = INTF_2, 255 .base = 0x6b000, .len = 0x268, ··· 259 .prog_fetch_lines_worst_case = 25, 260 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 28), 261 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 29), 262 }, { 263 .name = "intf_3", .id = INTF_3, 264 .base = 0x6b800, .len = 0x268, ··· 267 .prog_fetch_lines_worst_case = 25, 268 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 30), 269 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 31), 270 }, 271 }; 272

-3

drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_2_sdm660.h

··· 202 .prog_fetch_lines_worst_case = 21, 203 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 24), 204 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 25), 205 - .intr_tear_rd_ptr = -1, 206 }, { 207 .name = "intf_1", .id = INTF_1, 208 .base = 0x6a800, .len = 0x280, ··· 210 .prog_fetch_lines_worst_case = 21, 211 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26), 212 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27), 213 - .intr_tear_rd_ptr = -1, 214 }, { 215 .name = "intf_2", .id = INTF_2, 216 .base = 0x6b000, .len = 0x280, ··· 218 .prog_fetch_lines_worst_case = 21, 219 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 28), 220 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 29), 221 - .intr_tear_rd_ptr = -1, 222 }, 223 }; 224

··· 202 .prog_fetch_lines_worst_case = 21, 203 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 24), 204 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 25), 205 }, { 206 .name = "intf_1", .id = INTF_1, 207 .base = 0x6a800, .len = 0x280, ··· 211 .prog_fetch_lines_worst_case = 21, 212 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26), 213 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27), 214 }, { 215 .name = "intf_2", .id = INTF_2, 216 .base = 0x6b000, .len = 0x280, ··· 220 .prog_fetch_lines_worst_case = 21, 221 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 28), 222 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 29), 223 }, 224 }; 225

-2

drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_3_sdm630.h

··· 147 .prog_fetch_lines_worst_case = 21, 148 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 24), 149 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 25), 150 - .intr_tear_rd_ptr = -1, 151 }, { 152 .name = "intf_1", .id = INTF_1, 153 .base = 0x6a800, .len = 0x280, ··· 155 .prog_fetch_lines_worst_case = 21, 156 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26), 157 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27), 158 - .intr_tear_rd_ptr = -1, 159 }, 160 }; 161

··· 147 .prog_fetch_lines_worst_case = 21, 148 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 24), 149 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 25), 150 }, { 151 .name = "intf_1", .id = INTF_1, 152 .base = 0x6a800, .len = 0x280, ··· 156 .prog_fetch_lines_worst_case = 21, 157 .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26), 158 .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27), 159 }, 160 }; 161

+3 -1

drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c

··· 1666 */ 1667 static void _dpu_encoder_trigger_start(struct dpu_encoder_phys *phys) 1668 { 1669 - struct dpu_encoder_virt *dpu_enc = to_dpu_encoder_virt(phys->parent); 1670 1671 if (!phys) { 1672 DPU_ERROR("invalid argument(s)\n"); ··· 1677 DPU_ERROR("invalid pingpong hw\n"); 1678 return; 1679 } 1680 1681 if (phys->parent->encoder_type == DRM_MODE_ENCODER_VIRTUAL && 1682 dpu_enc->cwb_mask) {

··· 1666 */ 1667 static void _dpu_encoder_trigger_start(struct dpu_encoder_phys *phys) 1668 { 1669 + struct dpu_encoder_virt *dpu_enc; 1670 1671 if (!phys) { 1672 DPU_ERROR("invalid argument(s)\n"); ··· 1677 DPU_ERROR("invalid pingpong hw\n"); 1678 return; 1679 } 1680 + 1681 + dpu_enc = to_dpu_encoder_virt(phys->parent); 1682 1683 if (phys->parent->encoder_type == DRM_MODE_ENCODER_VIRTUAL && 1684 dpu_enc->cwb_mask) {

+39 -35

drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c

··· 729 static int dpu_plane_atomic_check_pipe(struct dpu_plane *pdpu, 730 struct dpu_sw_pipe *pipe, 731 struct dpu_sw_pipe_cfg *pipe_cfg, 732 - const struct msm_format *fmt, 733 - const struct drm_display_mode *mode) 734 { 735 uint32_t min_src_size; 736 struct dpu_kms *kms = _dpu_plane_get_kms(&pdpu->base); 737 int ret; 738 739 min_src_size = MSM_FORMAT_IS_YUV(fmt) ? 2 : 1; 740 ··· 951 struct dpu_plane_state *pstate = to_dpu_plane_state(new_plane_state); 952 struct dpu_sw_pipe *pipe = &pstate->pipe; 953 struct dpu_sw_pipe *r_pipe = &pstate->r_pipe; 954 - const struct msm_format *fmt; 955 struct dpu_sw_pipe_cfg *pipe_cfg = &pstate->pipe_cfg; 956 struct dpu_sw_pipe_cfg *r_pipe_cfg = &pstate->r_pipe_cfg; 957 - uint32_t supported_rotations; 958 - const struct dpu_sspp_cfg *pipe_hw_caps; 959 - const struct dpu_sspp_sub_blks *sblk; 960 int ret = 0; 961 962 - pipe_hw_caps = pipe->sspp->cap; 963 - sblk = pipe->sspp->cap->sblk; 964 - 965 - /* 966 - * We already have verified scaling against platform limitations. 967 - * Now check if the SSPP supports scaling at all. 968 - */ 969 - if (!sblk->scaler_blk.len && 970 - ((drm_rect_width(&new_plane_state->src) >> 16 != 971 - drm_rect_width(&new_plane_state->dst)) || 972 - (drm_rect_height(&new_plane_state->src) >> 16 != 973 - drm_rect_height(&new_plane_state->dst)))) 974 - return -ERANGE; 975 - 976 - fmt = msm_framebuffer_format(new_plane_state->fb); 977 - 978 - supported_rotations = DRM_MODE_REFLECT_MASK | DRM_MODE_ROTATE_0; 979 - 980 - if (pipe_hw_caps->features & BIT(DPU_SSPP_INLINE_ROTATION)) 981 - supported_rotations |= DRM_MODE_ROTATE_90; 982 - 983 - pipe_cfg->rotation = drm_rotation_simplify(new_plane_state->rotation, 984 - supported_rotations); 985 - r_pipe_cfg->rotation = pipe_cfg->rotation; 986 - 987 - ret = dpu_plane_atomic_check_pipe(pdpu, pipe, pipe_cfg, fmt, 988 - &crtc_state->adjusted_mode); 989 if (ret) 990 return ret; 991 992 if (drm_rect_width(&r_pipe_cfg->src_rect) != 0) { 993 - ret = dpu_plane_atomic_check_pipe(pdpu, r_pipe, r_pipe_cfg, fmt, 994 - &crtc_state->adjusted_mode); 995 if (ret) 996 return ret; 997 } ··· 1059 struct dpu_plane_state *pstate = to_dpu_plane_state(plane_state); 1060 struct drm_crtc_state *crtc_state; 1061 int ret; 1062 1063 if (plane_state->crtc) 1064 crtc_state = drm_atomic_get_new_crtc_state(state,

··· 729 static int dpu_plane_atomic_check_pipe(struct dpu_plane *pdpu, 730 struct dpu_sw_pipe *pipe, 731 struct dpu_sw_pipe_cfg *pipe_cfg, 732 + const struct drm_display_mode *mode, 733 + struct drm_plane_state *new_plane_state) 734 { 735 uint32_t min_src_size; 736 struct dpu_kms *kms = _dpu_plane_get_kms(&pdpu->base); 737 int ret; 738 + const struct msm_format *fmt; 739 + uint32_t supported_rotations; 740 + const struct dpu_sspp_cfg *pipe_hw_caps; 741 + const struct dpu_sspp_sub_blks *sblk; 742 + 743 + pipe_hw_caps = pipe->sspp->cap; 744 + sblk = pipe->sspp->cap->sblk; 745 + 746 + /* 747 + * We already have verified scaling against platform limitations. 748 + * Now check if the SSPP supports scaling at all. 749 + */ 750 + if (!sblk->scaler_blk.len && 751 + ((drm_rect_width(&new_plane_state->src) >> 16 != 752 + drm_rect_width(&new_plane_state->dst)) || 753 + (drm_rect_height(&new_plane_state->src) >> 16 != 754 + drm_rect_height(&new_plane_state->dst)))) 755 + return -ERANGE; 756 + 757 + fmt = msm_framebuffer_format(new_plane_state->fb); 758 + 759 + supported_rotations = DRM_MODE_REFLECT_MASK | DRM_MODE_ROTATE_0; 760 + 761 + if (pipe_hw_caps->features & BIT(DPU_SSPP_INLINE_ROTATION)) 762 + supported_rotations |= DRM_MODE_ROTATE_90; 763 + 764 + pipe_cfg->rotation = drm_rotation_simplify(new_plane_state->rotation, 765 + supported_rotations); 766 767 min_src_size = MSM_FORMAT_IS_YUV(fmt) ? 2 : 1; 768 ··· 923 struct dpu_plane_state *pstate = to_dpu_plane_state(new_plane_state); 924 struct dpu_sw_pipe *pipe = &pstate->pipe; 925 struct dpu_sw_pipe *r_pipe = &pstate->r_pipe; 926 struct dpu_sw_pipe_cfg *pipe_cfg = &pstate->pipe_cfg; 927 struct dpu_sw_pipe_cfg *r_pipe_cfg = &pstate->r_pipe_cfg; 928 int ret = 0; 929 930 + ret = dpu_plane_atomic_check_pipe(pdpu, pipe, pipe_cfg, 931 + &crtc_state->adjusted_mode, 932 + new_plane_state); 933 if (ret) 934 return ret; 935 936 if (drm_rect_width(&r_pipe_cfg->src_rect) != 0) { 937 + ret = dpu_plane_atomic_check_pipe(pdpu, r_pipe, r_pipe_cfg, 938 + &crtc_state->adjusted_mode, 939 + new_plane_state); 940 if (ret) 941 return ret; 942 } ··· 1058 struct dpu_plane_state *pstate = to_dpu_plane_state(plane_state); 1059 struct drm_crtc_state *crtc_state; 1060 int ret; 1061 + 1062 + if (IS_ERR(plane_state)) 1063 + return PTR_ERR(plane_state); 1064 1065 if (plane_state->crtc) 1066 crtc_state = drm_atomic_get_new_crtc_state(state,

+7

drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml

··· 2259 </reg32> 2260 </domain> 2261 2262 </database> 2263

··· 2259 </reg32> 2260 </domain> 2261 2262 + <domain name="CP_INDIRECT_BUFFER" width="32" varset="chip" prefix="chip" variants="A5XX-"> 2263 + <reg64 offset="0" name="IB_BASE" type="address"/> 2264 + <reg32 offset="2" name="2"> 2265 + <bitfield name="IB_SIZE" low="0" high="19"/> 2266 + </reg32> 2267 + </domain> 2268 + 2269 </database> 2270

+13 -3

drivers/gpu/drm/v3d/v3d_sched.c

··· 428 struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]); 429 struct v3d_bo *indirect = to_v3d_bo(indirect_csd->indirect); 430 struct drm_v3d_submit_csd *args = &indirect_csd->job->args; 431 - u32 *wg_counts; 432 433 v3d_get_bo_vaddr(bo); 434 v3d_get_bo_vaddr(indirect); ··· 442 args->cfg[0] = wg_counts[0] << V3D_CSD_CFG012_WG_COUNT_SHIFT; 443 args->cfg[1] = wg_counts[1] << V3D_CSD_CFG012_WG_COUNT_SHIFT; 444 args->cfg[2] = wg_counts[2] << V3D_CSD_CFG012_WG_COUNT_SHIFT; 445 - args->cfg[4] = DIV_ROUND_UP(indirect_csd->wg_size, 16) * 446 - (wg_counts[0] * wg_counts[1] * wg_counts[2]) - 1; 447 448 for (int i = 0; i < 3; i++) { 449 /* 0xffffffff indicates that the uniform rewrite is not needed */

··· 428 struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]); 429 struct v3d_bo *indirect = to_v3d_bo(indirect_csd->indirect); 430 struct drm_v3d_submit_csd *args = &indirect_csd->job->args; 431 + struct v3d_dev *v3d = job->base.v3d; 432 + u32 num_batches, *wg_counts; 433 434 v3d_get_bo_vaddr(bo); 435 v3d_get_bo_vaddr(indirect); ··· 441 args->cfg[0] = wg_counts[0] << V3D_CSD_CFG012_WG_COUNT_SHIFT; 442 args->cfg[1] = wg_counts[1] << V3D_CSD_CFG012_WG_COUNT_SHIFT; 443 args->cfg[2] = wg_counts[2] << V3D_CSD_CFG012_WG_COUNT_SHIFT; 444 + 445 + num_batches = DIV_ROUND_UP(indirect_csd->wg_size, 16) * 446 + (wg_counts[0] * wg_counts[1] * wg_counts[2]); 447 + 448 + /* V3D 7.1.6 and later don't subtract 1 from the number of batches */ 449 + if (v3d->ver < 71 || (v3d->ver == 71 && v3d->rev < 6)) 450 + args->cfg[4] = num_batches - 1; 451 + else 452 + args->cfg[4] = num_batches; 453 + 454 + WARN_ON(args->cfg[4] == ~0); 455 456 for (int i = 0; i < 3; i++) { 457 /* 0xffffffff indicates that the uniform rewrite is not needed */

+1 -4

drivers/gpu/drm/xe/xe_dma_buf.c

··· 145 struct sg_table *sgt, 146 enum dma_data_direction dir) 147 { 148 - struct dma_buf *dma_buf = attach->dmabuf; 149 - struct xe_bo *bo = gem_to_xe_bo(dma_buf->priv); 150 - 151 - if (!xe_bo_is_vram(bo)) { 152 dma_unmap_sgtable(attach->dev, sgt, dir, 0); 153 sg_free_table(sgt); 154 kfree(sgt);

··· 145 struct sg_table *sgt, 146 enum dma_data_direction dir) 147 { 148 + if (sg_page(sgt->sgl)) { 149 dma_unmap_sgtable(attach->dev, sgt, dir, 0); 150 sg_free_table(sgt); 151 kfree(sgt);

+45 -30

drivers/gpu/drm/xe/xe_guc_ads.c

··· 490 engine_enable_mask(gt, XE_ENGINE_CLASS_OTHER)); 491 } 492 493 - static void guc_prep_golden_lrc_null(struct xe_guc_ads *ads) 494 { 495 struct xe_device *xe = ads_to_xe(ads); 496 struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), 497 offsetof(struct __guc_ads_blob, system_info)); 498 - u8 guc_class; 499 500 - for (guc_class = 0; guc_class <= GUC_MAX_ENGINE_CLASSES; ++guc_class) { 501 if (!info_map_read(xe, &info_map, 502 engine_enabled_masks[guc_class])) 503 continue; 504 505 ads_blob_write(ads, ads.eng_state_size[guc_class], 506 - guc_ads_golden_lrc_size(ads) - 507 - xe_lrc_skip_size(xe)); 508 ads_blob_write(ads, ads.golden_context_lrca[guc_class], 509 - xe_bo_ggtt_addr(ads->bo) + 510 - guc_ads_golden_lrc_offset(ads)); 511 } 512 } 513 ··· 885 886 xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size); 887 guc_policies_init(ads); 888 - guc_prep_golden_lrc_null(ads); 889 guc_mapping_table_init_invalid(gt, &info_map); 890 guc_doorbell_init(ads); 891 ··· 911 guc_policies_init(ads); 912 fill_engine_enable_masks(gt, &info_map); 913 guc_mmio_reg_state_init(ads); 914 - guc_prep_golden_lrc_null(ads); 915 guc_mapping_table_init(gt, &info_map); 916 guc_capture_prep_lists(ads); 917 guc_doorbell_init(ads); ··· 931 guc_ads_private_data_offset(ads)); 932 } 933 934 - static void guc_populate_golden_lrc(struct xe_guc_ads *ads) 935 { 936 struct xe_device *xe = ads_to_xe(ads); 937 struct xe_gt *gt = ads_to_gt(ads); 938 struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), 939 offsetof(struct __guc_ads_blob, system_info)); 940 size_t total_size = 0, alloc_size, real_size; 941 - u32 addr_ggtt, offset; 942 int class; 943 944 offset = guc_ads_golden_lrc_offset(ads); 945 - addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset; 946 947 for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { 948 u8 guc_class; ··· 963 alloc_size = PAGE_ALIGN(real_size); 964 total_size += alloc_size; 965 966 - /* 967 - * This interface is slightly confusing. We need to pass the 968 - * base address of the full golden context and the size of just 969 - * the engine state, which is the section of the context image 970 - * that starts after the execlists LRC registers. This is 971 - * required to allow the GuC to restore just the engine state 972 - * when a watchdog reset occurs. 973 - * We calculate the engine state size by removing the size of 974 - * what comes before it in the context image (which is identical 975 - * on all engines). 976 - */ 977 - ads_blob_write(ads, ads.eng_state_size[guc_class], 978 - real_size - xe_lrc_skip_size(xe)); 979 - ads_blob_write(ads, ads.golden_context_lrca[guc_class], 980 - addr_ggtt); 981 - 982 xe_map_memcpy_to(xe, ads_to_map(ads), offset, 983 gt->default_lrc[class], real_size); 984 985 - addr_ggtt += alloc_size; 986 offset += alloc_size; 987 } 988 ··· 974 975 void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads) 976 { 977 - guc_populate_golden_lrc(ads); 978 } 979 980 static int guc_ads_action_update_policies(struct xe_guc_ads *ads, u32 policy_offset)

··· 490 engine_enable_mask(gt, XE_ENGINE_CLASS_OTHER)); 491 } 492 493 + /* 494 + * Write the offsets corresponding to the golden LRCs. The actual data is 495 + * populated later by guc_golden_lrc_populate() 496 + */ 497 + static void guc_golden_lrc_init(struct xe_guc_ads *ads) 498 { 499 struct xe_device *xe = ads_to_xe(ads); 500 + struct xe_gt *gt = ads_to_gt(ads); 501 struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), 502 offsetof(struct __guc_ads_blob, system_info)); 503 + size_t alloc_size, real_size; 504 + u32 addr_ggtt, offset; 505 + int class; 506 507 + offset = guc_ads_golden_lrc_offset(ads); 508 + addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset; 509 + 510 + for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { 511 + u8 guc_class; 512 + 513 + guc_class = xe_engine_class_to_guc_class(class); 514 + 515 if (!info_map_read(xe, &info_map, 516 engine_enabled_masks[guc_class])) 517 continue; 518 519 + real_size = xe_gt_lrc_size(gt, class); 520 + alloc_size = PAGE_ALIGN(real_size); 521 + 522 + /* 523 + * This interface is slightly confusing. We need to pass the 524 + * base address of the full golden context and the size of just 525 + * the engine state, which is the section of the context image 526 + * that starts after the execlists LRC registers. This is 527 + * required to allow the GuC to restore just the engine state 528 + * when a watchdog reset occurs. 529 + * We calculate the engine state size by removing the size of 530 + * what comes before it in the context image (which is identical 531 + * on all engines). 532 + */ 533 ads_blob_write(ads, ads.eng_state_size[guc_class], 534 + real_size - xe_lrc_skip_size(xe)); 535 ads_blob_write(ads, ads.golden_context_lrca[guc_class], 536 + addr_ggtt); 537 + 538 + addr_ggtt += alloc_size; 539 } 540 } 541 ··· 857 858 xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size); 859 guc_policies_init(ads); 860 + guc_golden_lrc_init(ads); 861 guc_mapping_table_init_invalid(gt, &info_map); 862 guc_doorbell_init(ads); 863 ··· 883 guc_policies_init(ads); 884 fill_engine_enable_masks(gt, &info_map); 885 guc_mmio_reg_state_init(ads); 886 + guc_golden_lrc_init(ads); 887 guc_mapping_table_init(gt, &info_map); 888 guc_capture_prep_lists(ads); 889 guc_doorbell_init(ads); ··· 903 guc_ads_private_data_offset(ads)); 904 } 905 906 + /* 907 + * After the golden LRC's are recorded for each engine class by the first 908 + * submission, copy them to the ADS, as initialized earlier by 909 + * guc_golden_lrc_init(). 910 + */ 911 + static void guc_golden_lrc_populate(struct xe_guc_ads *ads) 912 { 913 struct xe_device *xe = ads_to_xe(ads); 914 struct xe_gt *gt = ads_to_gt(ads); 915 struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), 916 offsetof(struct __guc_ads_blob, system_info)); 917 size_t total_size = 0, alloc_size, real_size; 918 + u32 offset; 919 int class; 920 921 offset = guc_ads_golden_lrc_offset(ads); 922 923 for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { 924 u8 guc_class; ··· 931 alloc_size = PAGE_ALIGN(real_size); 932 total_size += alloc_size; 933 934 xe_map_memcpy_to(xe, ads_to_map(ads), offset, 935 gt->default_lrc[class], real_size); 936 937 offset += alloc_size; 938 } 939 ··· 959 960 void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads) 961 { 962 + guc_golden_lrc_populate(ads); 963 } 964 965 static int guc_ads_action_update_policies(struct xe_guc_ads *ads, u32 policy_offset)

-24

drivers/gpu/drm/xe/xe_hmm.c

··· 19 return (end - start) >> PAGE_SHIFT; 20 } 21 22 - /** 23 - * xe_mark_range_accessed() - mark a range is accessed, so core mm 24 - * have such information for memory eviction or write back to 25 - * hard disk 26 - * @range: the range to mark 27 - * @write: if write to this range, we mark pages in this range 28 - * as dirty 29 - */ 30 - static void xe_mark_range_accessed(struct hmm_range *range, bool write) 31 - { 32 - struct page *page; 33 - u64 i, npages; 34 - 35 - npages = xe_npages_in_range(range->start, range->end); 36 - for (i = 0; i < npages; i++) { 37 - page = hmm_pfn_to_page(range->hmm_pfns[i]); 38 - if (write) 39 - set_page_dirty_lock(page); 40 - 41 - mark_page_accessed(page); 42 - } 43 - } 44 - 45 static int xe_alloc_sg(struct xe_device *xe, struct sg_table *st, 46 struct hmm_range *range, struct rw_semaphore *notifier_sem) 47 { ··· 308 if (ret) 309 goto out_unlock; 310 311 - xe_mark_range_accessed(&hmm_range, write); 312 userptr->sg = &userptr->sgt; 313 xe_hmm_userptr_set_mapped(uvma); 314 userptr->notifier_seq = hmm_range.notifier_seq;

··· 19 return (end - start) >> PAGE_SHIFT; 20 } 21 22 static int xe_alloc_sg(struct xe_device *xe, struct sg_table *st, 23 struct hmm_range *range, struct rw_semaphore *notifier_sem) 24 { ··· 331 if (ret) 332 goto out_unlock; 333 334 userptr->sg = &userptr->sgt; 335 xe_hmm_userptr_set_mapped(uvma); 336 userptr->notifier_seq = hmm_range.notifier_seq;

+11 -2

drivers/gpu/drm/xe/xe_pxp_debugfs.c

··· 66 { 67 struct xe_pxp *pxp = node_to_pxp(m->private); 68 struct drm_printer p = drm_seq_file_printer(m); 69 70 - if (!xe_pxp_is_enabled(pxp)) 71 - return -ENODEV; 72 73 /* simulate a termination interrupt */ 74 spin_lock_irq(&pxp->xe->irq.lock);

··· 66 { 67 struct xe_pxp *pxp = node_to_pxp(m->private); 68 struct drm_printer p = drm_seq_file_printer(m); 69 + int ready = xe_pxp_get_readiness_status(pxp); 70 71 + if (ready < 0) 72 + return ready; /* disabled or error occurred */ 73 + else if (!ready) 74 + return -EBUSY; /* init still in progress */ 75 + 76 + /* no need for a termination if PXP is not active */ 77 + if (pxp->status != XE_PXP_ACTIVE) { 78 + drm_printf(&p, "PXP not active\n"); 79 + return 0; 80 + } 81 82 /* simulate a termination interrupt */ 83 spin_lock_irq(&pxp->xe->irq.lock);

+1 -2

include/drm/drm_gem.h

··· 585 */ 586 static inline bool drm_gem_is_imported(const struct drm_gem_object *obj) 587 { 588 - /* The dma-buf's priv field points to the original GEM object. */ 589 - return obj->dma_buf && (obj->dma_buf->priv != obj); 590 } 591 592 #ifdef CONFIG_LOCKDEP

··· 585 */ 586 static inline bool drm_gem_is_imported(const struct drm_gem_object *obj) 587 { 588 + return !!obj->import_attach; 589 } 590 591 #ifdef CONFIG_LOCKDEP

+2 -2

include/uapi/drm/ivpu_accel.h

··· 1 /* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ 2 /* 3 - * Copyright (C) 2020-2024 Intel Corporation 4 */ 5 6 #ifndef __UAPI_IVPU_DRM_H__ ··· 147 * platform type when executing on a simulator or emulator (read-only) 148 * 149 * %DRM_IVPU_PARAM_CORE_CLOCK_RATE: 150 - * Current PLL frequency (read-only) 151 * 152 * %DRM_IVPU_PARAM_NUM_CONTEXTS: 153 * Maximum number of simultaneously existing contexts (read-only)

··· 1 /* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ 2 /* 3 + * Copyright (C) 2020-2025 Intel Corporation 4 */ 5 6 #ifndef __UAPI_IVPU_DRM_H__ ··· 147 * platform type when executing on a simulator or emulator (read-only) 148 * 149 * %DRM_IVPU_PARAM_CORE_CLOCK_RATE: 150 + * Maximum frequency of the NPU data processing unit clock (read-only) 151 * 152 * %DRM_IVPU_PARAM_NUM_CONTEXTS: 153 * Maximum number of simultaneously existing contexts (read-only)