Merge tag 'amd-drm-next-6.2-2022-12-02' of https://gitlab.freedesktop.org/agd5f/linux into drm-next

+23

Documentation/gpu/amdgpu/amdgpu-glossary.rst

··· 30 30 EOP 31 31 End Of Pipe/Pipeline 32 32 33 + GART 34 + Graphics Address Remapping Table. This is the name we use for the GPUVM 35 + page table used by the GPU kernel driver. It remaps system resources 36 + (memory or MMIO space) into the GPU's address space so the GPU can access 37 + them. The name GART harkens back to the days of AGP when the platform 38 + provided an MMU that the GPU could use to get a contiguous view of 39 + scattered pages for DMA. The MMU has since moved on to the GPU, but the 40 + name stuck. 41 + 33 42 GC 34 43 Graphics and Compute 35 44 36 45 GMC 37 46 Graphic Memory Controller 47 + 48 + GPUVM 49 + GPU Virtual Memory. This is the GPU's MMU. The GPU supports multiple 50 + virtual address spaces that can be in flight at any given time. These 51 + allow the GPU to remap VRAM and system resources into GPU virtual address 52 + spaces for use by the GPU kernel driver and applications using the GPU. 53 + These provide memory protection for different applications using the GPU. 54 + 55 + GTT 56 + Graphics Translation Tables. This is a memory pool managed through TTM 57 + which provides access to system resources (memory or MMIO space) for 58 + use by the GPU. These addresses can be mapped into the "GART" GPUVM page 59 + table for use by the kernel driver or into per process GPUVM page tables 60 + for application usage. 38 61 39 62 IH 40 63 Interrupt Handler

+2 -2

Documentation/gpu/amdgpu/driver-core.rst

··· 148 148 MMU Notifier 149 149 ============ 150 150 151 - .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c 151 + .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c 152 152 :doc: MMU Notifier 153 153 154 - .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c 154 + .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c 155 155 :internal: 156 156 157 157 AMDGPU Virtual Memory

+1 -1

Documentation/gpu/amdgpu/index.rst

··· 3 3 ========================== 4 4 5 5 The drm/amdgpu driver supports all AMD Radeon GPUs based on the Graphics Core 6 - Next (GCN) architecture. 6 + Next (GCN), Radeon DNA (RDNA), and Compute DNA (CDNA) architectures. 7 7 8 8 .. toctree:: 9 9

+2 -1

drivers/gpu/drm/amd/amdgpu/Makefile

··· 58 58 amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \ 59 59 amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \ 60 60 amdgpu_fw_attestation.o amdgpu_securedisplay.o \ 61 - amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o 61 + amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \ 62 + amdgpu_ring_mux.o 62 63 63 64 amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o 64 65

+1 -1

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

··· 2204 2204 2205 2205 ret = drm_vma_node_allow(&obj->vma_node, drm_priv); 2206 2206 if (ret) { 2207 - kfree(mem); 2207 + kfree(*mem); 2208 2208 return ret; 2209 2209 } 2210 2210

+11 -7

drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c

··· 104 104 static int amdgpu_atomfirmware_allocate_fb_v2_1(struct amdgpu_device *adev, 105 105 struct vram_usagebyfirmware_v2_1 *fw_usage, int *usage_bytes) 106 106 { 107 - uint32_t start_addr, fw_size, drv_size; 107 + u32 start_addr, fw_size, drv_size; 108 108 109 109 start_addr = le32_to_cpu(fw_usage->start_address_in_kb); 110 110 fw_size = le16_to_cpu(fw_usage->used_by_firmware_in_kb); ··· 116 116 drv_size); 117 117 118 118 if ((start_addr & ATOM_VRAM_OPERATION_FLAGS_MASK) == 119 - (uint32_t)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION << 119 + (u32)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION << 120 120 ATOM_VRAM_OPERATION_FLAGS_SHIFT)) { 121 121 /* Firmware request VRAM reservation for SR-IOV */ 122 122 adev->mman.fw_vram_usage_start_offset = (start_addr & ··· 133 133 static int amdgpu_atomfirmware_allocate_fb_v2_2(struct amdgpu_device *adev, 134 134 struct vram_usagebyfirmware_v2_2 *fw_usage, int *usage_bytes) 135 135 { 136 - uint32_t fw_start_addr, fw_size, drv_start_addr, drv_size; 136 + u32 fw_start_addr, fw_size, drv_start_addr, drv_size; 137 137 138 138 fw_start_addr = le32_to_cpu(fw_usage->fw_region_start_address_in_kb); 139 139 fw_size = le16_to_cpu(fw_usage->used_by_firmware_in_kb); ··· 147 147 drv_start_addr, 148 148 drv_size); 149 149 150 - if ((fw_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION << 30)) == 0) { 150 + if (amdgpu_sriov_vf(adev) && 151 + ((fw_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION << 152 + ATOM_VRAM_OPERATION_FLAGS_SHIFT)) == 0)) { 151 153 /* Firmware request VRAM reservation for SR-IOV */ 152 154 adev->mman.fw_vram_usage_start_offset = (fw_start_addr & 153 155 (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10; 154 156 adev->mman.fw_vram_usage_size = fw_size << 10; 155 157 } 156 158 157 - if ((drv_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION << 30)) == 0) { 159 + if (amdgpu_sriov_vf(adev) && 160 + ((drv_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION << 161 + ATOM_VRAM_OPERATION_FLAGS_SHIFT)) == 0)) { 158 162 /* driver request VRAM reservation for SR-IOV */ 159 163 adev->mman.drv_vram_usage_start_offset = (drv_start_addr & 160 164 (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10; ··· 176 172 vram_usagebyfirmware); 177 173 struct vram_usagebyfirmware_v2_1 *fw_usage_v2_1; 178 174 struct vram_usagebyfirmware_v2_2 *fw_usage_v2_2; 179 - uint16_t data_offset; 180 - uint8_t frev, crev; 175 + u16 data_offset; 176 + u8 frev, crev; 181 177 int usage_bytes = 0; 182 178 183 179 if (amdgpu_atom_parse_data_header(ctx, index, NULL, &frev, &crev, &data_offset)) {

+1

drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c

··· 317 317 318 318 if (!found) 319 319 return false; 320 + pci_dev_put(pdev); 320 321 321 322 adev->bios = kmalloc(size, GFP_KERNEL); 322 323 if (!adev->bios) {

+5

drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

··· 2473 2473 if (!amdgpu_sriov_vf(adev)) { 2474 2474 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); 2475 2475 2476 + if (WARN_ON(!hive)) { 2477 + r = -ENOENT; 2478 + goto init_failed; 2479 + } 2480 + 2476 2481 if (!hive->reset_domain || 2477 2482 !amdgpu_reset_get_reset_domain(hive->reset_domain)) { 2478 2483 r = -ENOENT;

+9

drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c

··· 1512 1512 case IP_VERSION(11, 0, 1): 1513 1513 case IP_VERSION(11, 0, 2): 1514 1514 case IP_VERSION(11, 0, 3): 1515 + case IP_VERSION(11, 0, 4): 1515 1516 amdgpu_device_ip_block_add(adev, &soc21_common_ip_block); 1516 1517 break; 1517 1518 default: ··· 1557 1556 case IP_VERSION(11, 0, 1): 1558 1557 case IP_VERSION(11, 0, 2): 1559 1558 case IP_VERSION(11, 0, 3): 1559 + case IP_VERSION(11, 0, 4): 1560 1560 amdgpu_device_ip_block_add(adev, &gmc_v11_0_ip_block); 1561 1561 break; 1562 1562 default: ··· 1643 1641 case IP_VERSION(13, 0, 7): 1644 1642 case IP_VERSION(13, 0, 8): 1645 1643 case IP_VERSION(13, 0, 10): 1644 + case IP_VERSION(13, 0, 11): 1646 1645 amdgpu_device_ip_block_add(adev, &psp_v13_0_ip_block); 1647 1646 break; 1648 1647 case IP_VERSION(13, 0, 4): ··· 1694 1691 case IP_VERSION(13, 0, 7): 1695 1692 case IP_VERSION(13, 0, 8): 1696 1693 case IP_VERSION(13, 0, 10): 1694 + case IP_VERSION(13, 0, 11): 1697 1695 amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block); 1698 1696 break; 1699 1697 default: ··· 1808 1804 case IP_VERSION(11, 0, 1): 1809 1805 case IP_VERSION(11, 0, 2): 1810 1806 case IP_VERSION(11, 0, 3): 1807 + case IP_VERSION(11, 0, 4): 1811 1808 amdgpu_device_ip_block_add(adev, &gfx_v11_0_ip_block); 1812 1809 break; 1813 1810 default: ··· 1972 1967 case IP_VERSION(11, 0, 1): 1973 1968 case IP_VERSION(11, 0, 2): 1974 1969 case IP_VERSION(11, 0, 3): 1970 + case IP_VERSION(11, 0, 4): 1975 1971 amdgpu_device_ip_block_add(adev, &mes_v11_0_ip_block); 1976 1972 adev->enable_mes = true; 1977 1973 adev->enable_mes_kiq = true; ··· 2203 2197 adev->family = AMDGPU_FAMILY_GC_11_0_0; 2204 2198 break; 2205 2199 case IP_VERSION(11, 0, 1): 2200 + case IP_VERSION(11, 0, 4): 2206 2201 adev->family = AMDGPU_FAMILY_GC_11_0_1; 2207 2202 break; 2208 2203 default: ··· 2221 2214 case IP_VERSION(10, 3, 6): 2222 2215 case IP_VERSION(10, 3, 7): 2223 2216 case IP_VERSION(11, 0, 1): 2217 + case IP_VERSION(11, 0, 4): 2224 2218 adev->flags |= AMD_IS_APU; 2225 2219 break; 2226 2220 default: ··· 2278 2270 adev->nbio.hdp_flush_reg = &nbio_v4_3_hdp_flush_reg; 2279 2271 break; 2280 2272 case IP_VERSION(7, 7, 0): 2273 + case IP_VERSION(7, 7, 1): 2281 2274 adev->nbio.funcs = &nbio_v7_7_funcs; 2282 2275 adev->nbio.hdp_flush_reg = &nbio_v7_7_hdp_flush_reg; 2283 2276 break;

+8 -4

drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

··· 231 231 232 232 /** 233 233 * DOC: gartsize (uint) 234 - * Restrict the size of GART in Mib (32, 64, etc.) for testing. The default is -1 (The size depends on asic). 234 + * Restrict the size of GART (for kernel use) in Mib (32, 64, etc.) for testing. 235 + * The default is -1 (The size depends on asic). 235 236 */ 236 - MODULE_PARM_DESC(gartsize, "Size of GART to setup in megabytes (32, 64, etc., -1=auto)"); 237 + MODULE_PARM_DESC(gartsize, "Size of kernel GART to setup in megabytes (32, 64, etc., -1=auto)"); 237 238 module_param_named(gartsize, amdgpu_gart_size, uint, 0600); 238 239 239 240 /** 240 241 * DOC: gttsize (int) 241 - * Restrict the size of GTT domain in MiB for testing. The default is -1 (Use 1/2 RAM, minimum value is 3GB). 242 + * Restrict the size of GTT domain (for userspace use) in MiB for testing. 243 + * The default is -1 (Use 1/2 RAM, minimum value is 3GB). 242 244 */ 243 - MODULE_PARM_DESC(gttsize, "Size of the GTT domain in megabytes (-1 = auto)"); 245 + MODULE_PARM_DESC(gttsize, "Size of the GTT userspace domain in megabytes (-1 = auto)"); 244 246 module_param_named(gttsize, amdgpu_gtt_size, int, 0600); 245 247 246 248 /** ··· 2570 2568 } else if (amdgpu_device_supports_baco(drm_dev)) { 2571 2569 amdgpu_device_baco_enter(drm_dev); 2572 2570 } 2571 + 2572 + dev_dbg(&pdev->dev, "asic/device is runtime suspended\n"); 2573 2573 2574 2574 return 0; 2575 2575 }

+54

drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c

··· 55 55 56 56 /* RB, DMA, etc. */ 57 57 struct amdgpu_ring *ring; 58 + ktime_t start_timestamp; 58 59 }; 59 60 60 61 static struct kmem_cache *amdgpu_fence_slab; ··· 199 198 return r; 200 199 } 201 200 } 201 + 202 + to_amdgpu_fence(fence)->start_timestamp = ktime_get(); 202 203 203 204 /* This function can't be called concurrently anyway, otherwise 204 205 * emitting the fence would mess up the hardware ring buffer. ··· 407 404 emitted -= atomic_read(&ring->fence_drv.last_seq); 408 405 emitted += READ_ONCE(ring->fence_drv.sync_seq); 409 406 return lower_32_bits(emitted); 407 + } 408 + 409 + /** 410 + * amdgpu_fence_last_unsignaled_time_us - the time fence emitted until now 411 + * @ring: ring the fence is associated with 412 + * 413 + * Find the earliest fence unsignaled until now, calculate the time delta 414 + * between the time fence emitted and now. 415 + */ 416 + u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring) 417 + { 418 + struct amdgpu_fence_driver *drv = &ring->fence_drv; 419 + struct dma_fence *fence; 420 + uint32_t last_seq, sync_seq; 421 + 422 + last_seq = atomic_read(&ring->fence_drv.last_seq); 423 + sync_seq = READ_ONCE(ring->fence_drv.sync_seq); 424 + if (last_seq == sync_seq) 425 + return 0; 426 + 427 + ++last_seq; 428 + last_seq &= drv->num_fences_mask; 429 + fence = drv->fences[last_seq]; 430 + if (!fence) 431 + return 0; 432 + 433 + return ktime_us_delta(ktime_get(), 434 + to_amdgpu_fence(fence)->start_timestamp); 435 + } 436 + 437 + /** 438 + * amdgpu_fence_update_start_timestamp - update the timestamp of the fence 439 + * @ring: ring the fence is associated with 440 + * @seq: the fence seq number to update. 441 + * @timestamp: the start timestamp to update. 442 + * 443 + * The function called at the time the fence and related ib is about to 444 + * resubmit to gpu in MCBP scenario. Thus we do not consider race condition 445 + * with amdgpu_fence_process to modify the same fence. 446 + */ 447 + void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq, ktime_t timestamp) 448 + { 449 + struct amdgpu_fence_driver *drv = &ring->fence_drv; 450 + struct dma_fence *fence; 451 + 452 + seq &= drv->num_fences_mask; 453 + fence = drv->fences[seq]; 454 + if (!fence) 455 + return; 456 + 457 + to_amdgpu_fence(fence)->start_timestamp = timestamp; 410 458 } 411 459 412 460 /**

+4

drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h

··· 33 33 #include "amdgpu_imu.h" 34 34 #include "soc15.h" 35 35 #include "amdgpu_ras.h" 36 + #include "amdgpu_ring_mux.h" 36 37 37 38 /* GFX current status */ 38 39 #define AMDGPU_GFX_NORMAL_MODE 0x00000000L ··· 353 352 struct amdgpu_gfx_ras *ras; 354 353 355 354 bool is_poweron; 355 + 356 + struct amdgpu_ring sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS]; 357 + struct amdgpu_ring_mux muxer; 356 358 }; 357 359 358 360 #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))

+35 -15

drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c

··· 51 51 #include "amdgpu_amdkfd.h" 52 52 #include "amdgpu_hmm.h" 53 53 54 + #define MAX_WALK_BYTE (2UL << 30) 55 + 54 56 /** 55 57 * amdgpu_hmm_invalidate_gfx - callback to notify about mm change 56 58 * ··· 165 163 struct hmm_range **phmm_range) 166 164 { 167 165 struct hmm_range *hmm_range; 166 + unsigned long end; 168 167 unsigned long timeout; 169 168 unsigned long i; 170 169 unsigned long *pfns; ··· 187 184 hmm_range->default_flags |= HMM_PFN_REQ_WRITE; 188 185 hmm_range->hmm_pfns = pfns; 189 186 hmm_range->start = start; 190 - hmm_range->end = start + npages * PAGE_SIZE; 187 + end = start + npages * PAGE_SIZE; 191 188 hmm_range->dev_private_owner = owner; 192 189 193 - /* Assuming 512MB takes maxmium 1 second to fault page address */ 194 - timeout = max(npages >> 17, 1ULL) * HMM_RANGE_DEFAULT_TIMEOUT; 195 - timeout = jiffies + msecs_to_jiffies(timeout); 190 + do { 191 + hmm_range->end = min(hmm_range->start + MAX_WALK_BYTE, end); 192 + 193 + pr_debug("hmm range: start = 0x%lx, end = 0x%lx", 194 + hmm_range->start, hmm_range->end); 195 + 196 + /* Assuming 512MB takes maxmium 1 second to fault page address */ 197 + timeout = max((hmm_range->end - hmm_range->start) >> 29, 1UL); 198 + timeout *= HMM_RANGE_DEFAULT_TIMEOUT; 199 + timeout = jiffies + msecs_to_jiffies(timeout); 196 200 197 201 retry: 198 - hmm_range->notifier_seq = mmu_interval_read_begin(notifier); 199 - r = hmm_range_fault(hmm_range); 200 - if (unlikely(r)) { 201 - /* 202 - * FIXME: This timeout should encompass the retry from 203 - * mmu_interval_read_retry() as well. 204 - */ 205 - if (r == -EBUSY && !time_after(jiffies, timeout)) 206 - goto retry; 207 - goto out_free_pfns; 208 - } 202 + hmm_range->notifier_seq = mmu_interval_read_begin(notifier); 203 + r = hmm_range_fault(hmm_range); 204 + if (unlikely(r)) { 205 + /* 206 + * FIXME: This timeout should encompass the retry from 207 + * mmu_interval_read_retry() as well. 208 + */ 209 + if (r == -EBUSY && !time_after(jiffies, timeout)) 210 + goto retry; 211 + goto out_free_pfns; 212 + } 213 + 214 + if (hmm_range->end == end) 215 + break; 216 + hmm_range->hmm_pfns += MAX_WALK_BYTE >> PAGE_SHIFT; 217 + hmm_range->start = hmm_range->end; 218 + schedule(); 219 + } while (hmm_range->end < end); 220 + 221 + hmm_range->start = start; 222 + hmm_range->hmm_pfns = pfns; 209 223 210 224 /* 211 225 * Due to default_flags, all pages are HMM_PFN_VALID or

+2

drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c

··· 211 211 } 212 212 } 213 213 214 + amdgpu_ring_ib_begin(ring); 214 215 if (job && ring->funcs->init_cond_exec) 215 216 patch_offset = amdgpu_ring_init_cond_exec(ring); 216 217 ··· 286 285 ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH) 287 286 ring->funcs->emit_wave_limit(ring, false); 288 287 288 + amdgpu_ring_ib_end(ring); 289 289 amdgpu_ring_commit(ring); 290 290 return 0; 291 291 }

+2 -2

drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

··· 431 431 case AMDGPU_HW_IP_VCN_DEC: 432 432 type = AMD_IP_BLOCK_TYPE_VCN; 433 433 for (i = 0; i < adev->vcn.num_vcn_inst; i++) { 434 - if (adev->uvd.harvest_config & (1 << i)) 434 + if (adev->vcn.harvest_config & (1 << i)) 435 435 continue; 436 436 437 437 if (adev->vcn.inst[i].ring_dec.sched.ready) ··· 443 443 case AMDGPU_HW_IP_VCN_ENC: 444 444 type = AMD_IP_BLOCK_TYPE_VCN; 445 445 for (i = 0; i < adev->vcn.num_vcn_inst; i++) { 446 - if (adev->uvd.harvest_config & (1 << i)) 446 + if (adev->vcn.harvest_config & (1 << i)) 447 447 continue; 448 448 449 449 for (j = 0; j < adev->vcn.num_enc_rings; j++)

+18 -21

drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c

··· 165 165 case IP_VERSION(13, 0, 5): 166 166 case IP_VERSION(13, 0, 8): 167 167 case IP_VERSION(13, 0, 10): 168 + case IP_VERSION(13, 0, 11): 168 169 psp_v13_0_set_psp_funcs(psp); 169 170 psp->autoload_supported = true; 170 171 break; ··· 513 512 struct psp_gfx_cmd_resp *cmd = psp->cmd; 514 513 515 514 psp_memory_training_fini(psp); 516 - if (psp->sos_fw) { 517 - release_firmware(psp->sos_fw); 518 - psp->sos_fw = NULL; 519 - } 520 - if (psp->asd_fw) { 521 - release_firmware(psp->asd_fw); 522 - psp->asd_fw = NULL; 523 - } 524 - if (psp->ta_fw) { 525 - release_firmware(psp->ta_fw); 526 - psp->ta_fw = NULL; 527 - } 528 - if (psp->cap_fw) { 529 - release_firmware(psp->cap_fw); 530 - psp->cap_fw = NULL; 531 - } 532 - if (psp->toc_fw) { 533 - release_firmware(psp->toc_fw); 534 - psp->toc_fw = NULL; 535 - } 515 + 516 + release_firmware(psp->sos_fw); 517 + psp->sos_fw = NULL; 518 + 519 + release_firmware(psp->asd_fw); 520 + psp->asd_fw = NULL; 521 + 522 + release_firmware(psp->ta_fw); 523 + psp->ta_fw = NULL; 524 + 525 + release_firmware(psp->cap_fw); 526 + psp->cap_fw = NULL; 527 + 528 + release_firmware(psp->toc_fw); 529 + psp->toc_fw = NULL; 530 + 536 531 if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 0) || 537 532 adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 7)) 538 533 psp_sysfs_fini(adev); ··· 858 861 struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp); 859 862 860 863 psp_prep_tmr_unload_cmd_buf(psp, cmd); 861 - dev_info(psp->adev->dev, "free PSP TMR buffer\n"); 864 + dev_dbg(psp->adev->dev, "free PSP TMR buffer\n"); 862 865 863 866 ret = psp_cmd_submit_buf(psp, NULL, cmd, 864 867 psp->fence_buf_mc_addr);

+1 -1

drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c

··· 1267 1267 struct amdgpu_ras *con = 1268 1268 container_of(attr, struct amdgpu_ras, features_attr); 1269 1269 1270 - return scnprintf(buf, PAGE_SIZE, "feature mask: 0x%x\n", con->features); 1270 + return sysfs_emit(buf, "feature mask: 0x%x\n", con->features); 1271 1271 } 1272 1272 1273 1273 static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev)

+12

drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c

··· 569 569 570 570 return mqd_mgr->init_mqd(adev, ring->mqd_ptr, &prop); 571 571 } 572 + 573 + void amdgpu_ring_ib_begin(struct amdgpu_ring *ring) 574 + { 575 + if (ring->is_sw_ring) 576 + amdgpu_sw_ring_ib_begin(ring); 577 + } 578 + 579 + void amdgpu_ring_ib_end(struct amdgpu_ring *ring) 580 + { 581 + if (ring->is_sw_ring) 582 + amdgpu_sw_ring_ib_end(ring); 583 + }

+14

drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h

··· 39 39 #define AMDGPU_MAX_RINGS 28 40 40 #define AMDGPU_MAX_HWIP_RINGS 8 41 41 #define AMDGPU_MAX_GFX_RINGS 2 42 + #define AMDGPU_MAX_SW_GFX_RINGS 2 42 43 #define AMDGPU_MAX_COMPUTE_RINGS 8 43 44 #define AMDGPU_MAX_VCE_RINGS 3 44 45 #define AMDGPU_MAX_UVD_ENC_RINGS 2 ··· 60 59 #define AMDGPU_FENCE_FLAG_64BIT (1 << 0) 61 60 #define AMDGPU_FENCE_FLAG_INT (1 << 1) 62 61 #define AMDGPU_FENCE_FLAG_TC_WB_ONLY (1 << 2) 62 + #define AMDGPU_FENCE_FLAG_EXEC (1 << 3) 63 63 64 64 #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched) 65 65 ··· 145 143 uint32_t wait_seq, 146 144 signed long timeout); 147 145 unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring); 146 + 148 147 void amdgpu_fence_driver_isr_toggle(struct amdgpu_device *adev, bool stop); 148 + 149 + u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring); 150 + void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq, 151 + ktime_t timestamp); 149 152 150 153 /* 151 154 * Rings. ··· 286 279 bool is_mes_queue; 287 280 uint32_t hw_queue_id; 288 281 struct amdgpu_mes_ctx_data *mes_ctx; 282 + 283 + bool is_sw_ring; 284 + unsigned int entry_index; 285 + 289 286 }; 290 287 291 288 #define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), (ib))) ··· 318 307 #define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r) 319 308 320 309 int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw); 310 + void amdgpu_ring_ib_begin(struct amdgpu_ring *ring); 311 + void amdgpu_ring_ib_end(struct amdgpu_ring *ring); 312 + 321 313 void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); 322 314 void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib); 323 315 void amdgpu_ring_commit(struct amdgpu_ring *ring);

+516

drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c

··· 1 + /* 2 + * Copyright 2022 Advanced Micro Devices, Inc. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included in 12 + * all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 + * OTHER DEALINGS IN THE SOFTWARE. 21 + * 22 + */ 23 + #include <linux/slab.h> 24 + #include <drm/drm_print.h> 25 + 26 + #include "amdgpu_ring_mux.h" 27 + #include "amdgpu_ring.h" 28 + #include "amdgpu.h" 29 + 30 + #define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ / 2) 31 + #define AMDGPU_MAX_LAST_UNSIGNALED_THRESHOLD_US 10000 32 + 33 + static const struct ring_info { 34 + unsigned int hw_pio; 35 + const char *ring_name; 36 + } sw_ring_info[] = { 37 + { AMDGPU_RING_PRIO_DEFAULT, "gfx_low"}, 38 + { AMDGPU_RING_PRIO_2, "gfx_high"}, 39 + }; 40 + 41 + static struct kmem_cache *amdgpu_mux_chunk_slab; 42 + 43 + static inline struct amdgpu_mux_entry *amdgpu_ring_mux_sw_entry(struct amdgpu_ring_mux *mux, 44 + struct amdgpu_ring *ring) 45 + { 46 + return ring->entry_index < mux->ring_entry_size ? 47 + &mux->ring_entry[ring->entry_index] : NULL; 48 + } 49 + 50 + /* copy packages on sw ring range[begin, end) */ 51 + static void amdgpu_ring_mux_copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, 52 + struct amdgpu_ring *ring, 53 + u64 s_start, u64 s_end) 54 + { 55 + u64 start, end; 56 + struct amdgpu_ring *real_ring = mux->real_ring; 57 + 58 + start = s_start & ring->buf_mask; 59 + end = s_end & ring->buf_mask; 60 + 61 + if (start == end) { 62 + DRM_ERROR("no more data copied from sw ring\n"); 63 + return; 64 + } 65 + if (start > end) { 66 + amdgpu_ring_alloc(real_ring, (ring->ring_size >> 2) + end - start); 67 + amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[start], 68 + (ring->ring_size >> 2) - start); 69 + amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[0], end); 70 + } else { 71 + amdgpu_ring_alloc(real_ring, end - start); 72 + amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[start], end - start); 73 + } 74 + } 75 + 76 + static void amdgpu_mux_resubmit_chunks(struct amdgpu_ring_mux *mux) 77 + { 78 + struct amdgpu_mux_entry *e = NULL; 79 + struct amdgpu_mux_chunk *chunk; 80 + uint32_t seq, last_seq; 81 + int i; 82 + 83 + /*find low priority entries:*/ 84 + if (!mux->s_resubmit) 85 + return; 86 + 87 + for (i = 0; i < mux->num_ring_entries; i++) { 88 + if (mux->ring_entry[i].ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) { 89 + e = &mux->ring_entry[i]; 90 + break; 91 + } 92 + } 93 + 94 + if (!e) { 95 + DRM_ERROR("%s no low priority ring found\n", __func__); 96 + return; 97 + } 98 + 99 + last_seq = atomic_read(&e->ring->fence_drv.last_seq); 100 + seq = mux->seqno_to_resubmit; 101 + if (last_seq < seq) { 102 + /*resubmit all the fences between (last_seq, seq]*/ 103 + list_for_each_entry(chunk, &e->list, entry) { 104 + if (chunk->sync_seq > last_seq && chunk->sync_seq <= seq) { 105 + amdgpu_fence_update_start_timestamp(e->ring, 106 + chunk->sync_seq, 107 + ktime_get()); 108 + amdgpu_ring_mux_copy_pkt_from_sw_ring(mux, e->ring, 109 + chunk->start, 110 + chunk->end); 111 + mux->wptr_resubmit = chunk->end; 112 + amdgpu_ring_commit(mux->real_ring); 113 + } 114 + } 115 + } 116 + 117 + del_timer(&mux->resubmit_timer); 118 + mux->s_resubmit = false; 119 + } 120 + 121 + static void amdgpu_ring_mux_schedule_resubmit(struct amdgpu_ring_mux *mux) 122 + { 123 + mod_timer(&mux->resubmit_timer, jiffies + AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT); 124 + } 125 + 126 + static void amdgpu_mux_resubmit_fallback(struct timer_list *t) 127 + { 128 + struct amdgpu_ring_mux *mux = from_timer(mux, t, resubmit_timer); 129 + 130 + if (!spin_trylock(&mux->lock)) { 131 + amdgpu_ring_mux_schedule_resubmit(mux); 132 + DRM_ERROR("reschedule resubmit\n"); 133 + return; 134 + } 135 + amdgpu_mux_resubmit_chunks(mux); 136 + spin_unlock(&mux->lock); 137 + } 138 + 139 + int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, 140 + unsigned int entry_size) 141 + { 142 + mux->real_ring = ring; 143 + mux->num_ring_entries = 0; 144 + 145 + mux->ring_entry = kcalloc(entry_size, sizeof(struct amdgpu_mux_entry), GFP_KERNEL); 146 + if (!mux->ring_entry) 147 + return -ENOMEM; 148 + 149 + mux->ring_entry_size = entry_size; 150 + mux->s_resubmit = false; 151 + 152 + amdgpu_mux_chunk_slab = kmem_cache_create("amdgpu_mux_chunk", 153 + sizeof(struct amdgpu_mux_chunk), 0, 154 + SLAB_HWCACHE_ALIGN, NULL); 155 + if (!amdgpu_mux_chunk_slab) { 156 + DRM_ERROR("create amdgpu_mux_chunk cache failed\n"); 157 + return -ENOMEM; 158 + } 159 + 160 + spin_lock_init(&mux->lock); 161 + timer_setup(&mux->resubmit_timer, amdgpu_mux_resubmit_fallback, 0); 162 + 163 + return 0; 164 + } 165 + 166 + void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux) 167 + { 168 + struct amdgpu_mux_entry *e; 169 + struct amdgpu_mux_chunk *chunk, *chunk2; 170 + int i; 171 + 172 + for (i = 0; i < mux->num_ring_entries; i++) { 173 + e = &mux->ring_entry[i]; 174 + list_for_each_entry_safe(chunk, chunk2, &e->list, entry) { 175 + list_del(&chunk->entry); 176 + kmem_cache_free(amdgpu_mux_chunk_slab, chunk); 177 + } 178 + } 179 + kmem_cache_destroy(amdgpu_mux_chunk_slab); 180 + kfree(mux->ring_entry); 181 + mux->ring_entry = NULL; 182 + mux->num_ring_entries = 0; 183 + mux->ring_entry_size = 0; 184 + } 185 + 186 + int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring) 187 + { 188 + struct amdgpu_mux_entry *e; 189 + 190 + if (mux->num_ring_entries >= mux->ring_entry_size) { 191 + DRM_ERROR("add sw ring exceeding max entry size\n"); 192 + return -ENOENT; 193 + } 194 + 195 + e = &mux->ring_entry[mux->num_ring_entries]; 196 + ring->entry_index = mux->num_ring_entries; 197 + e->ring = ring; 198 + 199 + INIT_LIST_HEAD(&e->list); 200 + mux->num_ring_entries += 1; 201 + return 0; 202 + } 203 + 204 + void amdgpu_ring_mux_set_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, u64 wptr) 205 + { 206 + struct amdgpu_mux_entry *e; 207 + 208 + spin_lock(&mux->lock); 209 + 210 + if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) 211 + amdgpu_mux_resubmit_chunks(mux); 212 + 213 + e = amdgpu_ring_mux_sw_entry(mux, ring); 214 + if (!e) { 215 + DRM_ERROR("cannot find entry for sw ring\n"); 216 + spin_unlock(&mux->lock); 217 + return; 218 + } 219 + 220 + /* We could skip this set wptr as preemption in process. */ 221 + if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && mux->pending_trailing_fence_signaled) { 222 + spin_unlock(&mux->lock); 223 + return; 224 + } 225 + 226 + e->sw_cptr = e->sw_wptr; 227 + /* Update cptr if the package already copied in resubmit functions */ 228 + if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && e->sw_cptr < mux->wptr_resubmit) 229 + e->sw_cptr = mux->wptr_resubmit; 230 + e->sw_wptr = wptr; 231 + e->start_ptr_in_hw_ring = mux->real_ring->wptr; 232 + 233 + /* Skip copying for the packages already resubmitted.*/ 234 + if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT || mux->wptr_resubmit < wptr) { 235 + amdgpu_ring_mux_copy_pkt_from_sw_ring(mux, ring, e->sw_cptr, wptr); 236 + e->end_ptr_in_hw_ring = mux->real_ring->wptr; 237 + amdgpu_ring_commit(mux->real_ring); 238 + } else { 239 + e->end_ptr_in_hw_ring = mux->real_ring->wptr; 240 + } 241 + spin_unlock(&mux->lock); 242 + } 243 + 244 + u64 amdgpu_ring_mux_get_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring) 245 + { 246 + struct amdgpu_mux_entry *e; 247 + 248 + e = amdgpu_ring_mux_sw_entry(mux, ring); 249 + if (!e) { 250 + DRM_ERROR("cannot find entry for sw ring\n"); 251 + return 0; 252 + } 253 + 254 + return e->sw_wptr; 255 + } 256 + 257 + /** 258 + * amdgpu_ring_mux_get_rptr - get the readptr of the software ring 259 + * @mux: the multiplexer the software rings attach to 260 + * @ring: the software ring of which we calculate the readptr 261 + * 262 + * The return value of the readptr is not precise while the other rings could 263 + * write data onto the real ring buffer.After overwriting on the real ring, we 264 + * can not decide if our packages have been excuted or not read yet. However, 265 + * this function is only called by the tools such as umr to collect the latest 266 + * packages for the hang analysis. We assume the hang happens near our latest 267 + * submit. Thus we could use the following logic to give the clue: 268 + * If the readptr is between start and end, then we return the copy pointer 269 + * plus the distance from start to readptr. If the readptr is before start, we 270 + * return the copy pointer. Lastly, if the readptr is past end, we return the 271 + * write pointer. 272 + */ 273 + u64 amdgpu_ring_mux_get_rptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring) 274 + { 275 + struct amdgpu_mux_entry *e; 276 + u64 readp, offset, start, end; 277 + 278 + e = amdgpu_ring_mux_sw_entry(mux, ring); 279 + if (!e) { 280 + DRM_ERROR("no sw entry found!\n"); 281 + return 0; 282 + } 283 + 284 + readp = amdgpu_ring_get_rptr(mux->real_ring); 285 + 286 + start = e->start_ptr_in_hw_ring & mux->real_ring->buf_mask; 287 + end = e->end_ptr_in_hw_ring & mux->real_ring->buf_mask; 288 + if (start > end) { 289 + if (readp <= end) 290 + readp += mux->real_ring->ring_size >> 2; 291 + end += mux->real_ring->ring_size >> 2; 292 + } 293 + 294 + if (start <= readp && readp <= end) { 295 + offset = readp - start; 296 + e->sw_rptr = (e->sw_cptr + offset) & ring->buf_mask; 297 + } else if (readp < start) { 298 + e->sw_rptr = e->sw_cptr; 299 + } else { 300 + /* end < readptr */ 301 + e->sw_rptr = e->sw_wptr; 302 + } 303 + 304 + return e->sw_rptr; 305 + } 306 + 307 + u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring) 308 + { 309 + struct amdgpu_device *adev = ring->adev; 310 + struct amdgpu_ring_mux *mux = &adev->gfx.muxer; 311 + 312 + WARN_ON(!ring->is_sw_ring); 313 + return amdgpu_ring_mux_get_rptr(mux, ring); 314 + } 315 + 316 + u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring) 317 + { 318 + struct amdgpu_device *adev = ring->adev; 319 + struct amdgpu_ring_mux *mux = &adev->gfx.muxer; 320 + 321 + WARN_ON(!ring->is_sw_ring); 322 + return amdgpu_ring_mux_get_wptr(mux, ring); 323 + } 324 + 325 + void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring) 326 + { 327 + struct amdgpu_device *adev = ring->adev; 328 + struct amdgpu_ring_mux *mux = &adev->gfx.muxer; 329 + 330 + WARN_ON(!ring->is_sw_ring); 331 + amdgpu_ring_mux_set_wptr(mux, ring, ring->wptr); 332 + } 333 + 334 + /* Override insert_nop to prevent emitting nops to the software rings */ 335 + void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) 336 + { 337 + WARN_ON(!ring->is_sw_ring); 338 + } 339 + 340 + const char *amdgpu_sw_ring_name(int idx) 341 + { 342 + return idx < ARRAY_SIZE(sw_ring_info) ? 343 + sw_ring_info[idx].ring_name : NULL; 344 + } 345 + 346 + unsigned int amdgpu_sw_ring_priority(int idx) 347 + { 348 + return idx < ARRAY_SIZE(sw_ring_info) ? 349 + sw_ring_info[idx].hw_pio : AMDGPU_RING_PRIO_DEFAULT; 350 + } 351 + 352 + /*Scan on low prio rings to have unsignaled fence and high ring has no fence.*/ 353 + int amdgpu_mcbp_scan(struct amdgpu_ring_mux *mux) 354 + { 355 + struct amdgpu_ring *ring; 356 + int i, need_preempt; 357 + 358 + need_preempt = 0; 359 + for (i = 0; i < mux->num_ring_entries; i++) { 360 + ring = mux->ring_entry[i].ring; 361 + if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT && 362 + amdgpu_fence_count_emitted(ring) > 0) 363 + return 0; 364 + if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && 365 + amdgpu_fence_last_unsignaled_time_us(ring) > 366 + AMDGPU_MAX_LAST_UNSIGNALED_THRESHOLD_US) 367 + need_preempt = 1; 368 + } 369 + return need_preempt && !mux->s_resubmit; 370 + } 371 + 372 + /* Trigger Mid-Command Buffer Preemption (MCBP) and find if we need to resubmit. */ 373 + int amdgpu_mcbp_trigger_preempt(struct amdgpu_ring_mux *mux) 374 + { 375 + int r; 376 + 377 + spin_lock(&mux->lock); 378 + mux->pending_trailing_fence_signaled = true; 379 + r = amdgpu_ring_preempt_ib(mux->real_ring); 380 + spin_unlock(&mux->lock); 381 + return r; 382 + } 383 + 384 + void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring) 385 + { 386 + struct amdgpu_device *adev = ring->adev; 387 + struct amdgpu_ring_mux *mux = &adev->gfx.muxer; 388 + 389 + WARN_ON(!ring->is_sw_ring); 390 + if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) { 391 + if (amdgpu_mcbp_scan(mux) > 0) 392 + amdgpu_mcbp_trigger_preempt(mux); 393 + return; 394 + } 395 + 396 + amdgpu_ring_mux_start_ib(mux, ring); 397 + } 398 + 399 + void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring) 400 + { 401 + struct amdgpu_device *adev = ring->adev; 402 + struct amdgpu_ring_mux *mux = &adev->gfx.muxer; 403 + 404 + WARN_ON(!ring->is_sw_ring); 405 + if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) 406 + return; 407 + amdgpu_ring_mux_end_ib(mux, ring); 408 + } 409 + 410 + void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring) 411 + { 412 + struct amdgpu_mux_entry *e; 413 + struct amdgpu_mux_chunk *chunk; 414 + 415 + spin_lock(&mux->lock); 416 + amdgpu_mux_resubmit_chunks(mux); 417 + spin_unlock(&mux->lock); 418 + 419 + e = amdgpu_ring_mux_sw_entry(mux, ring); 420 + if (!e) { 421 + DRM_ERROR("cannot find entry!\n"); 422 + return; 423 + } 424 + 425 + chunk = kmem_cache_alloc(amdgpu_mux_chunk_slab, GFP_KERNEL); 426 + if (!chunk) { 427 + DRM_ERROR("alloc amdgpu_mux_chunk_slab failed\n"); 428 + return; 429 + } 430 + 431 + chunk->start = ring->wptr; 432 + list_add_tail(&chunk->entry, &e->list); 433 + } 434 + 435 + static void scan_and_remove_signaled_chunk(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring) 436 + { 437 + uint32_t last_seq, size = 0; 438 + struct amdgpu_mux_entry *e; 439 + struct amdgpu_mux_chunk *chunk, *tmp; 440 + 441 + e = amdgpu_ring_mux_sw_entry(mux, ring); 442 + if (!e) { 443 + DRM_ERROR("cannot find entry!\n"); 444 + return; 445 + } 446 + 447 + last_seq = atomic_read(&ring->fence_drv.last_seq); 448 + 449 + list_for_each_entry_safe(chunk, tmp, &e->list, entry) { 450 + if (chunk->sync_seq <= last_seq) { 451 + list_del(&chunk->entry); 452 + kmem_cache_free(amdgpu_mux_chunk_slab, chunk); 453 + } else { 454 + size++; 455 + } 456 + } 457 + } 458 + 459 + void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring) 460 + { 461 + struct amdgpu_mux_entry *e; 462 + struct amdgpu_mux_chunk *chunk; 463 + 464 + e = amdgpu_ring_mux_sw_entry(mux, ring); 465 + if (!e) { 466 + DRM_ERROR("cannot find entry!\n"); 467 + return; 468 + } 469 + 470 + chunk = list_last_entry(&e->list, struct amdgpu_mux_chunk, entry); 471 + if (!chunk) { 472 + DRM_ERROR("cannot find chunk!\n"); 473 + return; 474 + } 475 + 476 + chunk->end = ring->wptr; 477 + chunk->sync_seq = READ_ONCE(ring->fence_drv.sync_seq); 478 + 479 + scan_and_remove_signaled_chunk(mux, ring); 480 + } 481 + 482 + bool amdgpu_mcbp_handle_trailing_fence_irq(struct amdgpu_ring_mux *mux) 483 + { 484 + struct amdgpu_mux_entry *e; 485 + struct amdgpu_ring *ring = NULL; 486 + int i; 487 + 488 + if (!mux->pending_trailing_fence_signaled) 489 + return false; 490 + 491 + if (mux->real_ring->trail_seq != le32_to_cpu(*mux->real_ring->trail_fence_cpu_addr)) 492 + return false; 493 + 494 + for (i = 0; i < mux->num_ring_entries; i++) { 495 + e = &mux->ring_entry[i]; 496 + if (e->ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) { 497 + ring = e->ring; 498 + break; 499 + } 500 + } 501 + 502 + if (!ring) { 503 + DRM_ERROR("cannot find low priority ring\n"); 504 + return false; 505 + } 506 + 507 + amdgpu_fence_process(ring); 508 + if (amdgpu_fence_count_emitted(ring) > 0) { 509 + mux->s_resubmit = true; 510 + mux->seqno_to_resubmit = ring->fence_drv.sync_seq; 511 + amdgpu_ring_mux_schedule_resubmit(mux); 512 + } 513 + 514 + mux->pending_trailing_fence_signaled = false; 515 + return true; 516 + }

+103

drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h

··· 1 + /* 2 + * Copyright 2022 Advanced Micro Devices, Inc. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included in 12 + * all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 + * OTHER DEALINGS IN THE SOFTWARE. 21 + * 22 + */ 23 + 24 + #ifndef __AMDGPU_RING_MUX__ 25 + #define __AMDGPU_RING_MUX__ 26 + 27 + #include <linux/timer.h> 28 + #include <linux/spinlock.h> 29 + #include "amdgpu_ring.h" 30 + 31 + struct amdgpu_ring; 32 + 33 + /** 34 + * struct amdgpu_mux_entry - the entry recording software rings copying information. 35 + * @ring: the pointer to the software ring. 36 + * @start_ptr_in_hw_ring: last start location copied to in the hardware ring. 37 + * @end_ptr_in_hw_ring: last end location copied to in the hardware ring. 38 + * @sw_cptr: the position of the copy pointer in the sw ring. 39 + * @sw_rptr: the read pointer in software ring. 40 + * @sw_wptr: the write pointer in software ring. 41 + * @list: list head for amdgpu_mux_chunk 42 + */ 43 + struct amdgpu_mux_entry { 44 + struct amdgpu_ring *ring; 45 + u64 start_ptr_in_hw_ring; 46 + u64 end_ptr_in_hw_ring; 47 + u64 sw_cptr; 48 + u64 sw_rptr; 49 + u64 sw_wptr; 50 + struct list_head list; 51 + }; 52 + 53 + struct amdgpu_ring_mux { 54 + struct amdgpu_ring *real_ring; 55 + 56 + struct amdgpu_mux_entry *ring_entry; 57 + unsigned int num_ring_entries; 58 + unsigned int ring_entry_size; 59 + /*the lock for copy data from different software rings*/ 60 + spinlock_t lock; 61 + bool s_resubmit; 62 + uint32_t seqno_to_resubmit; 63 + u64 wptr_resubmit; 64 + struct timer_list resubmit_timer; 65 + 66 + bool pending_trailing_fence_signaled; 67 + }; 68 + 69 + /** 70 + * struct amdgpu_mux_chunk - save the location of indirect buffer's package on softare rings. 71 + * @entry: the list entry. 72 + * @sync_seq: the fence seqno related with the saved IB. 73 + * @start:- start location on the software ring. 74 + * @end:- end location on the software ring. 75 + */ 76 + struct amdgpu_mux_chunk { 77 + struct list_head entry; 78 + uint32_t sync_seq; 79 + u64 start; 80 + u64 end; 81 + }; 82 + 83 + int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, 84 + unsigned int entry_size); 85 + void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux); 86 + int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring); 87 + void amdgpu_ring_mux_set_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, u64 wptr); 88 + u64 amdgpu_ring_mux_get_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring); 89 + u64 amdgpu_ring_mux_get_rptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring); 90 + void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring); 91 + void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring); 92 + bool amdgpu_mcbp_handle_trailing_fence_irq(struct amdgpu_ring_mux *mux); 93 + 94 + u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring); 95 + u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring); 96 + void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring); 97 + void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); 98 + void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring); 99 + void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring); 100 + const char *amdgpu_sw_ring_name(int idx); 101 + unsigned int amdgpu_sw_ring_priority(int idx); 102 + 103 + #endif

+4 -3

drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

··· 1545 1545 { 1546 1546 amdgpu_bo_free_kernel(&adev->mman.drv_vram_usage_reserved_bo, 1547 1547 NULL, 1548 - NULL); 1548 + &adev->mman.drv_vram_usage_va); 1549 1549 } 1550 1550 1551 1551 /** ··· 1583 1583 */ 1584 1584 static int amdgpu_ttm_drv_reserve_vram_init(struct amdgpu_device *adev) 1585 1585 { 1586 - uint64_t vram_size = adev->gmc.visible_vram_size; 1586 + u64 vram_size = adev->gmc.visible_vram_size; 1587 1587 1588 + adev->mman.drv_vram_usage_va = NULL; 1588 1589 adev->mman.drv_vram_usage_reserved_bo = NULL; 1589 1590 1590 1591 if (adev->mman.drv_vram_usage_size == 0 || ··· 1597 1596 adev->mman.drv_vram_usage_size, 1598 1597 AMDGPU_GEM_DOMAIN_VRAM, 1599 1598 &adev->mman.drv_vram_usage_reserved_bo, 1600 - NULL); 1599 + &adev->mman.drv_vram_usage_va); 1601 1600 } 1602 1601 1603 1602 /*

+1

drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h

··· 90 90 u64 drv_vram_usage_start_offset; 91 91 u64 drv_vram_usage_size; 92 92 struct amdgpu_bo *drv_vram_usage_reserved_bo; 93 + void *drv_vram_usage_va; 93 94 94 95 /* PAGE_SIZE'd BO for process memory r/w over SDMA. */ 95 96 struct amdgpu_bo *sdma_access_bo;

+3

drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c

··· 156 156 break; 157 157 case IP_VERSION(3, 0, 2): 158 158 fw_name = FIRMWARE_VANGOGH; 159 + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 160 + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 161 + adev->vcn.indirect_sram = true; 159 162 break; 160 163 case IP_VERSION(3, 0, 16): 161 164 fw_name = FIRMWARE_DIMGREY_CAVEFISH;

+35 -19

drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c

··· 428 428 struct eeprom_table_record bp; 429 429 uint64_t retired_page; 430 430 uint32_t bp_idx, bp_cnt; 431 + void *vram_usage_va = NULL; 432 + 433 + if (adev->mman.fw_vram_usage_va) 434 + vram_usage_va = adev->mman.fw_vram_usage_va; 435 + else 436 + vram_usage_va = adev->mman.drv_vram_usage_va; 431 437 432 438 if (bp_block_size) { 433 439 bp_cnt = bp_block_size / sizeof(uint64_t); 434 440 for (bp_idx = 0; bp_idx < bp_cnt; bp_idx++) { 435 - retired_page = *(uint64_t *)(adev->mman.fw_vram_usage_va + 441 + retired_page = *(uint64_t *)(vram_usage_va + 436 442 bp_block_offset + bp_idx * sizeof(uint64_t)); 437 443 bp.retired_page = retired_page; 438 444 ··· 649 643 adev->virt.fw_reserve.p_vf2pf = NULL; 650 644 adev->virt.vf2pf_update_interval_ms = 0; 651 645 652 - if (adev->mman.fw_vram_usage_va != NULL) { 646 + if (adev->mman.fw_vram_usage_va && adev->mman.drv_vram_usage_va) { 647 + DRM_WARN("Currently fw_vram and drv_vram should not have values at the same time!"); 648 + } else if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) { 653 649 /* go through this logic in ip_init and reset to init workqueue*/ 654 650 amdgpu_virt_exchange_data(adev); 655 651 ··· 674 666 uint32_t bp_block_size = 0; 675 667 struct amd_sriov_msg_pf2vf_info *pf2vf_v2 = NULL; 676 668 677 - if (adev->mman.fw_vram_usage_va != NULL) { 678 - 679 - adev->virt.fw_reserve.p_pf2vf = 680 - (struct amd_sriov_msg_pf2vf_info_header *) 681 - (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10)); 682 - adev->virt.fw_reserve.p_vf2pf = 683 - (struct amd_sriov_msg_vf2pf_info_header *) 684 - (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10)); 669 + if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) { 670 + if (adev->mman.fw_vram_usage_va) { 671 + adev->virt.fw_reserve.p_pf2vf = 672 + (struct amd_sriov_msg_pf2vf_info_header *) 673 + (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10)); 674 + adev->virt.fw_reserve.p_vf2pf = 675 + (struct amd_sriov_msg_vf2pf_info_header *) 676 + (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10)); 677 + } else if (adev->mman.drv_vram_usage_va) { 678 + adev->virt.fw_reserve.p_pf2vf = 679 + (struct amd_sriov_msg_pf2vf_info_header *) 680 + (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10)); 681 + adev->virt.fw_reserve.p_vf2pf = 682 + (struct amd_sriov_msg_vf2pf_info_header *) 683 + (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10)); 684 + } 685 685 686 686 amdgpu_virt_read_pf2vf_data(adev); 687 687 amdgpu_virt_write_vf2pf_data(adev); 688 688 689 689 /* bad page handling for version 2 */ 690 690 if (adev->virt.fw_reserve.p_pf2vf->version == 2) { 691 - pf2vf_v2 = (struct amd_sriov_msg_pf2vf_info *)adev->virt.fw_reserve.p_pf2vf; 691 + pf2vf_v2 = (struct amd_sriov_msg_pf2vf_info *)adev->virt.fw_reserve.p_pf2vf; 692 692 693 - bp_block_offset = ((uint64_t)pf2vf_v2->bp_block_offset_low & 0xFFFFFFFF) | 694 - ((((uint64_t)pf2vf_v2->bp_block_offset_high) << 32) & 0xFFFFFFFF00000000); 695 - bp_block_size = pf2vf_v2->bp_block_size; 693 + bp_block_offset = ((uint64_t)pf2vf_v2->bp_block_offset_low & 0xFFFFFFFF) | 694 + ((((uint64_t)pf2vf_v2->bp_block_offset_high) << 32) & 0xFFFFFFFF00000000); 695 + bp_block_size = pf2vf_v2->bp_block_size; 696 696 697 - if (bp_block_size && !adev->virt.ras_init_done) 698 - amdgpu_virt_init_ras_err_handler_data(adev); 697 + if (bp_block_size && !adev->virt.ras_init_done) 698 + amdgpu_virt_init_ras_err_handler_data(adev); 699 699 700 - if (adev->virt.ras_init_done) 701 - amdgpu_virt_add_bad_page(adev, bp_block_offset, bp_block_size); 702 - } 700 + if (adev->virt.ras_init_done) 701 + amdgpu_virt_add_bad_page(adev, bp_block_offset, bp_block_size); 702 + } 703 703 } 704 704 } 705 705

+33 -10

drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

··· 45 45 /** 46 46 * DOC: GPUVM 47 47 * 48 - * GPUVM is similar to the legacy gart on older asics, however 49 - * rather than there being a single global gart table 50 - * for the entire GPU, there are multiple VM page tables active 51 - * at any given time. The VM page tables can contain a mix 52 - * vram pages and system memory pages and system memory pages 48 + * GPUVM is the MMU functionality provided on the GPU. 49 + * GPUVM is similar to the legacy GART on older asics, however 50 + * rather than there being a single global GART table 51 + * for the entire GPU, there can be multiple GPUVM page tables active 52 + * at any given time. The GPUVM page tables can contain a mix 53 + * VRAM pages and system pages (both memory and MMIO) and system pages 53 54 * can be mapped as snooped (cached system pages) or unsnooped 54 55 * (uncached system pages). 55 - * Each VM has an ID associated with it and there is a page table 56 - * associated with each VMID. When executing a command buffer, 57 - * the kernel tells the ring what VMID to use for that command 56 + * 57 + * Each active GPUVM has an ID associated with it and there is a page table 58 + * linked with each VMID. When executing a command buffer, 59 + * the kernel tells the engine what VMID to use for that command 58 60 * buffer. VMIDs are allocated dynamically as commands are submitted. 59 61 * The userspace drivers maintain their own address space and the kernel 60 62 * sets up their pages tables accordingly when they submit their 61 63 * command buffers and a VMID is assigned. 62 - * Cayman/Trinity support up to 8 active VMs at any given time; 63 - * SI supports 16. 64 + * The hardware supports up to 16 active GPUVMs at any given time. 65 + * 66 + * Each GPUVM is represented by a 1-2 or 1-5 level page table, depending 67 + * on the ASIC family. GPUVM supports RWX attributes on each page as well 68 + * as other features such as encryption and caching attributes. 69 + * 70 + * VMID 0 is special. It is the GPUVM used for the kernel driver. In 71 + * addition to an aperture managed by a page table, VMID 0 also has 72 + * several other apertures. There is an aperture for direct access to VRAM 73 + * and there is a legacy AGP aperture which just forwards accesses directly 74 + * to the matching system physical addresses (or IOVAs when an IOMMU is 75 + * present). These apertures provide direct access to these memories without 76 + * incurring the overhead of a page table. VMID 0 is used by the kernel 77 + * driver for tasks like memory management. 78 + * 79 + * GPU clients (i.e., engines on the GPU) use GPUVM VMIDs to access memory. 80 + * For user applications, each application can have their own unique GPUVM 81 + * address space. The application manages the address space and the kernel 82 + * driver manages the GPUVM page tables for each process. If an GPU client 83 + * accesses an invalid page, it will generate a GPU page fault, similar to 84 + * accessing an invalid page on a CPU. 64 85 */ 65 86 66 87 #define START(node) ((node)->start) ··· 562 541 if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) 563 542 return 0; 564 543 544 + amdgpu_ring_ib_begin(ring); 565 545 if (ring->funcs->init_cond_exec) 566 546 patch_offset = amdgpu_ring_init_cond_exec(ring); 567 547 ··· 623 601 amdgpu_ring_emit_switch_buffer(ring); 624 602 amdgpu_ring_emit_switch_buffer(ring); 625 603 } 604 + amdgpu_ring_ib_end(ring); 626 605 return 0; 627 606 } 628 607

-2

drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c

··· 386 386 if (ret) { 387 387 dev_err(adev->dev, "XGMI: failed initializing kobject for xgmi hive\n"); 388 388 kobject_put(&hive->kobj); 389 - kfree(hive); 390 389 hive = NULL; 391 390 goto pro_end; 392 391 } ··· 409 410 dev_err(adev->dev, "XGMI: failed initializing reset domain for xgmi hive\n"); 410 411 ret = -ENOMEM; 411 412 kobject_put(&hive->kobj); 412 - kfree(hive); 413 413 hive = NULL; 414 414 goto pro_end; 415 415 }

+9 -1

drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c

··· 77 77 MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin"); 78 78 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin"); 79 79 MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin"); 80 + MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin"); 81 + MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin"); 82 + MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin"); 83 + MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin"); 80 84 81 85 static const struct soc15_reg_golden golden_settings_gc_11_0_1[] = 82 86 { ··· 266 262 { 267 263 switch (adev->ip_versions[GC_HWIP][0]) { 268 264 case IP_VERSION(11, 0, 1): 265 + case IP_VERSION(11, 0, 4): 269 266 soc15_program_register_sequence(adev, 270 267 golden_settings_gc_11_0_1, 271 268 (const u32)ARRAY_SIZE(golden_settings_gc_11_0_1)); ··· 860 855 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 861 856 break; 862 857 case IP_VERSION(11, 0, 1): 858 + case IP_VERSION(11, 0, 4): 863 859 adev->gfx.config.max_hw_contexts = 8; 864 860 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 865 861 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; ··· 1290 1284 case IP_VERSION(11, 0, 1): 1291 1285 case IP_VERSION(11, 0, 2): 1292 1286 case IP_VERSION(11, 0, 3): 1287 + case IP_VERSION(11, 0, 4): 1293 1288 adev->gfx.me.num_me = 1; 1294 1289 adev->gfx.me.num_pipe_per_me = 1; 1295 1290 adev->gfx.me.num_queue_per_pipe = 1; ··· 2493 2486 for (i = 0; i < adev->usec_timeout; i++) { 2494 2487 cp_status = RREG32_SOC15(GC, 0, regCP_STAT); 2495 2488 2496 - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 1)) 2489 + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 1) || 2490 + adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 4)) 2497 2491 bootload_status = RREG32_SOC15(GC, 0, 2498 2492 regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1); 2499 2493 else

+266 -30

drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c

··· 47 47 48 48 #include "amdgpu_ras.h" 49 49 50 + #include "amdgpu_ring_mux.h" 50 51 #include "gfx_v9_4.h" 51 52 #include "gfx_v9_0.h" 52 53 #include "gfx_v9_4_2.h" ··· 57 56 #include "asic_reg/gc/gc_9_0_default.h" 58 57 59 58 #define GFX9_NUM_GFX_RINGS 1 59 + #define GFX9_NUM_SW_GFX_RINGS 2 60 60 #define GFX9_MEC_HPD_SIZE 4096 61 61 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 62 62 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L ··· 755 753 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 756 754 struct amdgpu_cu_info *cu_info); 757 755 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); 758 - static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); 756 + static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); 759 757 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); 760 758 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 761 759 void *ras_error_status); ··· 828 826 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 829 827 830 828 if (action == PREEMPT_QUEUES_NO_UNMAP) { 831 - amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); 832 - amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); 833 - amdgpu_ring_write(kiq_ring, seq); 829 + amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask)); 830 + amdgpu_ring_write(kiq_ring, 0); 831 + amdgpu_ring_write(kiq_ring, 0); 832 + 834 833 } else { 835 834 amdgpu_ring_write(kiq_ring, 0); 836 835 amdgpu_ring_write(kiq_ring, 0); ··· 2106 2103 struct amdgpu_ring *ring; 2107 2104 struct amdgpu_kiq *kiq; 2108 2105 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2106 + unsigned int hw_prio; 2109 2107 2110 2108 switch (adev->ip_versions[GC_HWIP][0]) { 2111 2109 case IP_VERSION(9, 0, 1): ··· 2190 2186 sprintf(ring->name, "gfx_%d", i); 2191 2187 ring->use_doorbell = true; 2192 2188 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 2189 + 2190 + /* disable scheduler on the real ring */ 2191 + ring->no_scheduler = true; 2193 2192 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2194 2193 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, 2195 2194 AMDGPU_RING_PRIO_DEFAULT, NULL); 2196 2195 if (r) 2197 2196 return r; 2197 + } 2198 + 2199 + /* set up the software rings */ 2200 + if (adev->gfx.num_gfx_rings) { 2201 + for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) { 2202 + ring = &adev->gfx.sw_gfx_ring[i]; 2203 + ring->ring_obj = NULL; 2204 + sprintf(ring->name, amdgpu_sw_ring_name(i)); 2205 + ring->use_doorbell = true; 2206 + ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 2207 + ring->is_sw_ring = true; 2208 + hw_prio = amdgpu_sw_ring_priority(i); 2209 + r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2210 + AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio, 2211 + NULL); 2212 + if (r) 2213 + return r; 2214 + ring->wptr = 0; 2215 + } 2216 + 2217 + /* init the muxer and add software rings */ 2218 + r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0], 2219 + GFX9_NUM_SW_GFX_RINGS); 2220 + if (r) { 2221 + DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r); 2222 + return r; 2223 + } 2224 + for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) { 2225 + r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer, 2226 + &adev->gfx.sw_gfx_ring[i]); 2227 + if (r) { 2228 + DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r); 2229 + return r; 2230 + } 2231 + } 2198 2232 } 2199 2233 2200 2234 /* set up the compute queues - allocate horizontally across pipes */ ··· 2284 2242 { 2285 2243 int i; 2286 2244 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2245 + 2246 + if (adev->gfx.num_gfx_rings) { 2247 + for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) 2248 + amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]); 2249 + amdgpu_ring_mux_fini(&adev->gfx.muxer); 2250 + } 2287 2251 2288 2252 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2289 2253 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); ··· 5205 5157 5206 5158 control |= ib->length_dw | (vmid << 24); 5207 5159 5208 - if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 5160 + if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) { 5209 5161 control |= INDIRECT_BUFFER_PRE_ENB(1); 5210 5162 5163 + if (flags & AMDGPU_IB_PREEMPTED) 5164 + control |= INDIRECT_BUFFER_PRE_RESUME(1); 5165 + 5211 5166 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid) 5212 - gfx_v9_0_ring_emit_de_meta(ring); 5167 + gfx_v9_0_ring_emit_de_meta(ring, 5168 + (!amdgpu_sriov_vf(ring->adev) && 5169 + flags & AMDGPU_IB_PREEMPTED) ? 5170 + true : false); 5213 5171 } 5214 5172 5215 5173 amdgpu_ring_write(ring, header); ··· 5270 5216 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 5271 5217 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 5272 5218 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; 5219 + bool exec = flags & AMDGPU_FENCE_FLAG_EXEC; 5220 + uint32_t dw2 = 0; 5273 5221 5274 5222 /* RELEASE_MEM - flush caches, send int */ 5275 5223 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 5276 - amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN | 5277 - EOP_TC_NC_ACTION_EN) : 5278 - (EOP_TCL1_ACTION_EN | 5279 - EOP_TC_ACTION_EN | 5280 - EOP_TC_WB_ACTION_EN | 5281 - EOP_TC_MD_ACTION_EN)) | 5282 - EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 5283 - EVENT_INDEX(5))); 5224 + 5225 + if (writeback) { 5226 + dw2 = EOP_TC_NC_ACTION_EN; 5227 + } else { 5228 + dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN | 5229 + EOP_TC_MD_ACTION_EN; 5230 + } 5231 + dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 5232 + EVENT_INDEX(5); 5233 + if (exec) 5234 + dw2 |= EOP_EXEC; 5235 + 5236 + amdgpu_ring_write(ring, dw2); 5284 5237 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 5285 5238 5286 5239 /* ··· 5392 5331 amdgpu_ring_write(ring, 0); 5393 5332 } 5394 5333 5395 - static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring) 5334 + static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume) 5396 5335 { 5336 + struct amdgpu_device *adev = ring->adev; 5397 5337 struct v9_ce_ib_state ce_payload = {0}; 5398 - uint64_t csa_addr; 5338 + uint64_t offset, ce_payload_gpu_addr; 5339 + void *ce_payload_cpu_addr; 5399 5340 int cnt; 5400 5341 5401 5342 cnt = (sizeof(ce_payload) >> 2) + 4 - 2; 5402 - csa_addr = amdgpu_csa_vaddr(ring->adev); 5343 + 5344 + if (ring->is_mes_queue) { 5345 + offset = offsetof(struct amdgpu_mes_ctx_meta_data, 5346 + gfx[0].gfx_meta_data) + 5347 + offsetof(struct v9_gfx_meta_data, ce_payload); 5348 + ce_payload_gpu_addr = 5349 + amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 5350 + ce_payload_cpu_addr = 5351 + amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); 5352 + } else { 5353 + offset = offsetof(struct v9_gfx_meta_data, ce_payload); 5354 + ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; 5355 + ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; 5356 + } 5403 5357 5404 5358 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5405 5359 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 5406 5360 WRITE_DATA_DST_SEL(8) | 5407 5361 WR_CONFIRM) | 5408 5362 WRITE_DATA_CACHE_POLICY(0)); 5409 - amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 5410 - amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 5411 - amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2); 5363 + amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr)); 5364 + amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr)); 5365 + 5366 + if (resume) 5367 + amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr, 5368 + sizeof(ce_payload) >> 2); 5369 + else 5370 + amdgpu_ring_write_multiple(ring, (void *)&ce_payload, 5371 + sizeof(ce_payload) >> 2); 5412 5372 } 5413 5373 5414 - static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) 5374 + static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring) 5415 5375 { 5376 + int i, r = 0; 5377 + struct amdgpu_device *adev = ring->adev; 5378 + struct amdgpu_kiq *kiq = &adev->gfx.kiq; 5379 + struct amdgpu_ring *kiq_ring = &kiq->ring; 5380 + unsigned long flags; 5381 + 5382 + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 5383 + return -EINVAL; 5384 + 5385 + spin_lock_irqsave(&kiq->ring_lock, flags); 5386 + 5387 + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { 5388 + spin_unlock_irqrestore(&kiq->ring_lock, flags); 5389 + return -ENOMEM; 5390 + } 5391 + 5392 + /* assert preemption condition */ 5393 + amdgpu_ring_set_preempt_cond_exec(ring, false); 5394 + 5395 + ring->trail_seq += 1; 5396 + amdgpu_ring_alloc(ring, 13); 5397 + gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr, 5398 + ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT); 5399 + /*reset the CP_VMID_PREEMPT after trailing fence*/ 5400 + amdgpu_ring_emit_wreg(ring, 5401 + SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT), 5402 + 0x0); 5403 + 5404 + /* assert IB preemption, emit the trailing fence */ 5405 + kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP, 5406 + ring->trail_fence_gpu_addr, 5407 + ring->trail_seq); 5408 + 5409 + amdgpu_ring_commit(kiq_ring); 5410 + spin_unlock_irqrestore(&kiq->ring_lock, flags); 5411 + 5412 + /* poll the trailing fence */ 5413 + for (i = 0; i < adev->usec_timeout; i++) { 5414 + if (ring->trail_seq == 5415 + le32_to_cpu(*ring->trail_fence_cpu_addr)) 5416 + break; 5417 + udelay(1); 5418 + } 5419 + 5420 + if (i >= adev->usec_timeout) { 5421 + r = -EINVAL; 5422 + DRM_WARN("ring %d timeout to preempt ib\n", ring->idx); 5423 + } 5424 + 5425 + amdgpu_ring_commit(ring); 5426 + 5427 + /* deassert preemption condition */ 5428 + amdgpu_ring_set_preempt_cond_exec(ring, true); 5429 + return r; 5430 + } 5431 + 5432 + static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) 5433 + { 5434 + struct amdgpu_device *adev = ring->adev; 5416 5435 struct v9_de_ib_state de_payload = {0}; 5417 - uint64_t csa_addr, gds_addr; 5436 + uint64_t offset, gds_addr, de_payload_gpu_addr; 5437 + void *de_payload_cpu_addr; 5418 5438 int cnt; 5419 5439 5420 - csa_addr = amdgpu_csa_vaddr(ring->adev); 5421 - gds_addr = csa_addr + 4096; 5440 + if (ring->is_mes_queue) { 5441 + offset = offsetof(struct amdgpu_mes_ctx_meta_data, 5442 + gfx[0].gfx_meta_data) + 5443 + offsetof(struct v9_gfx_meta_data, de_payload); 5444 + de_payload_gpu_addr = 5445 + amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 5446 + de_payload_cpu_addr = 5447 + amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); 5448 + 5449 + offset = offsetof(struct amdgpu_mes_ctx_meta_data, 5450 + gfx[0].gds_backup) + 5451 + offsetof(struct v9_gfx_meta_data, de_payload); 5452 + gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 5453 + } else { 5454 + offset = offsetof(struct v9_gfx_meta_data, de_payload); 5455 + de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; 5456 + de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; 5457 + 5458 + gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + 5459 + AMDGPU_CSA_SIZE - adev->gds.gds_size, 5460 + PAGE_SIZE); 5461 + } 5462 + 5422 5463 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 5423 5464 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 5424 5465 ··· 5530 5367 WRITE_DATA_DST_SEL(8) | 5531 5368 WR_CONFIRM) | 5532 5369 WRITE_DATA_CACHE_POLICY(0)); 5533 - amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 5534 - amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 5535 - amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); 5370 + amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr)); 5371 + amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr)); 5372 + 5373 + if (resume) 5374 + amdgpu_ring_write_multiple(ring, de_payload_cpu_addr, 5375 + sizeof(de_payload) >> 2); 5376 + else 5377 + amdgpu_ring_write_multiple(ring, (void *)&de_payload, 5378 + sizeof(de_payload) >> 2); 5536 5379 } 5537 5380 5538 5381 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, ··· 5554 5385 { 5555 5386 uint32_t dw2 = 0; 5556 5387 5557 - if (amdgpu_sriov_vf(ring->adev)) 5558 - gfx_v9_0_ring_emit_ce_meta(ring); 5388 + gfx_v9_0_ring_emit_ce_meta(ring, 5389 + (!amdgpu_sriov_vf(ring->adev) && 5390 + flags & AMDGPU_IB_PREEMPTED) ? true : false); 5559 5391 5560 5392 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 5561 5393 if (flags & AMDGPU_HAVE_CTX_SWITCH) { ··· 5882 5712 5883 5713 switch (me_id) { 5884 5714 case 0: 5885 - amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 5715 + if (adev->gfx.num_gfx_rings && 5716 + !amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) { 5717 + /* Fence signals are handled on the software rings*/ 5718 + for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) 5719 + amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]); 5720 + } 5886 5721 break; 5887 5722 case 1: 5888 5723 case 2: ··· 6884 6709 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 6885 6710 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 6886 6711 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, 6712 + .preempt_ib = gfx_v9_0_ring_preempt_ib, 6713 + .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, 6714 + .emit_wreg = gfx_v9_0_ring_emit_wreg, 6715 + .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6716 + .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6717 + .soft_recovery = gfx_v9_0_ring_soft_recovery, 6718 + .emit_mem_sync = gfx_v9_0_emit_mem_sync, 6719 + }; 6720 + 6721 + static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { 6722 + .type = AMDGPU_RING_TYPE_GFX, 6723 + .align_mask = 0xff, 6724 + .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6725 + .support_64bit_ptrs = true, 6726 + .secure_submission_supported = true, 6727 + .vmhub = AMDGPU_GFXHUB_0, 6728 + .get_rptr = amdgpu_sw_ring_get_rptr_gfx, 6729 + .get_wptr = amdgpu_sw_ring_get_wptr_gfx, 6730 + .set_wptr = amdgpu_sw_ring_set_wptr_gfx, 6731 + .emit_frame_size = /* totally 242 maximum if 16 IBs */ 6732 + 5 + /* COND_EXEC */ 6733 + 7 + /* PIPELINE_SYNC */ 6734 + SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6735 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6736 + 2 + /* VM_FLUSH */ 6737 + 8 + /* FENCE for VM_FLUSH */ 6738 + 20 + /* GDS switch */ 6739 + 4 + /* double SWITCH_BUFFER, 6740 + * the first COND_EXEC jump to the place just 6741 + * prior to this double SWITCH_BUFFER 6742 + */ 6743 + 5 + /* COND_EXEC */ 6744 + 7 + /* HDP_flush */ 6745 + 4 + /* VGT_flush */ 6746 + 14 + /* CE_META */ 6747 + 31 + /* DE_META */ 6748 + 3 + /* CNTX_CTRL */ 6749 + 5 + /* HDP_INVL */ 6750 + 8 + 8 + /* FENCE x2 */ 6751 + 2 + /* SWITCH_BUFFER */ 6752 + 7, /* gfx_v9_0_emit_mem_sync */ 6753 + .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ 6754 + .emit_ib = gfx_v9_0_ring_emit_ib_gfx, 6755 + .emit_fence = gfx_v9_0_ring_emit_fence, 6756 + .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 6757 + .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 6758 + .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 6759 + .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 6760 + .test_ring = gfx_v9_0_ring_test_ring, 6761 + .test_ib = gfx_v9_0_ring_test_ib, 6762 + .insert_nop = amdgpu_sw_ring_insert_nop, 6763 + .pad_ib = amdgpu_ring_generic_pad_ib, 6764 + .emit_switch_buffer = gfx_v9_ring_emit_sb, 6765 + .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 6766 + .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 6767 + .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, 6887 6768 .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, 6888 6769 .emit_wreg = gfx_v9_0_ring_emit_wreg, 6889 6770 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, ··· 7024 6793 7025 6794 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 7026 6795 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; 6796 + 6797 + if (adev->gfx.num_gfx_rings) { 6798 + for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) 6799 + adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx; 6800 + } 7027 6801 7028 6802 for (i = 0; i < adev->gfx.num_compute_rings; i++) 7029 6803 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;

+1

drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c

··· 759 759 case IP_VERSION(11, 0, 1): 760 760 case IP_VERSION(11, 0, 2): 761 761 case IP_VERSION(11, 0, 3): 762 + case IP_VERSION(11, 0, 4): 762 763 adev->num_vmhubs = 2; 763 764 /* 764 765 * To fulfill 4-level page support,

+1

drivers/gpu/drm/amd/amdgpu/mes_v11_0.c

··· 388 388 mes_set_hw_res_pkt.disable_reset = 1; 389 389 mes_set_hw_res_pkt.disable_mes_log = 1; 390 390 mes_set_hw_res_pkt.use_different_vmid_compute = 1; 391 + mes_set_hw_res_pkt.enable_reg_active_poll = 1; 391 392 mes_set_hw_res_pkt.oversubscription_timer = 50; 392 393 393 394 return mes_v11_0_submit_pkt_and_poll_completion(mes,

+14 -14

drivers/gpu/drm/amd/amdgpu/nv.c

··· 82 82 /* Navi1x */ 83 83 static const struct amdgpu_video_codec_info nv_video_codecs_decode_array[] = 84 84 { 85 - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, 86 - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, 87 - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, 88 - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, 85 + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, 86 + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, 87 + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, 88 + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, 89 89 {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, 90 90 {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, 91 91 {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, ··· 100 100 /* Sienna Cichlid */ 101 101 static const struct amdgpu_video_codec_info sc_video_codecs_decode_array[] = 102 102 { 103 - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, 104 - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, 105 - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, 106 - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, 103 + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, 104 + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, 105 + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, 106 + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, 107 107 {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, 108 108 {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, 109 109 {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, ··· 125 125 126 126 static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array[] = 127 127 { 128 - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, 129 - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, 130 - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, 131 - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, 128 + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, 129 + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, 130 + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, 131 + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, 132 132 {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, 133 133 {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, 134 134 {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, ··· 149 149 150 150 /* Beige Goby*/ 151 151 static const struct amdgpu_video_codec_info bg_video_codecs_decode_array[] = { 152 - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, 152 + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, 153 153 {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, 154 154 {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, 155 155 }; ··· 166 166 167 167 /* Yellow Carp*/ 168 168 static const struct amdgpu_video_codec_info yc_video_codecs_decode_array[] = { 169 - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, 169 + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, 170 170 {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, 171 171 {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, 172 172 {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},

+3

drivers/gpu/drm/amd/amdgpu/psp_v13_0.c

··· 46 46 MODULE_FIRMWARE("amdgpu/psp_13_0_7_ta.bin"); 47 47 MODULE_FIRMWARE("amdgpu/psp_13_0_10_sos.bin"); 48 48 MODULE_FIRMWARE("amdgpu/psp_13_0_10_ta.bin"); 49 + MODULE_FIRMWARE("amdgpu/psp_13_0_11_toc.bin"); 50 + MODULE_FIRMWARE("amdgpu/psp_13_0_11_ta.bin"); 49 51 50 52 /* For large FW files the time to complete can be very long */ 51 53 #define USBC_PD_POLLING_LIMIT_S 240 ··· 104 102 case IP_VERSION(13, 0, 3): 105 103 case IP_VERSION(13, 0, 5): 106 104 case IP_VERSION(13, 0, 8): 105 + case IP_VERSION(13, 0, 11): 107 106 err = psp_init_toc_microcode(psp, chip_name); 108 107 if (err) 109 108 return err;

+15 -9

drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c

··· 907 907 908 908 909 909 /** 910 - * sdma_v4_0_gfx_stop - stop the gfx async dma engines 910 + * sdma_v4_0_gfx_enable - enable the gfx async dma engines 911 911 * 912 912 * @adev: amdgpu_device pointer 913 - * 914 - * Stop the gfx async dma ring buffers (VEGA10). 913 + * @enable: enable SDMA RB/IB 914 + * control the gfx async dma ring buffers (VEGA10). 915 915 */ 916 - static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev) 916 + static void sdma_v4_0_gfx_enable(struct amdgpu_device *adev, bool enable) 917 917 { 918 918 u32 rb_cntl, ib_cntl; 919 919 int i; ··· 922 922 923 923 for (i = 0; i < adev->sdma.num_instances; i++) { 924 924 rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL); 925 - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); 925 + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, enable ? 1 : 0); 926 926 WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl); 927 927 ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL); 928 - ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); 928 + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, enable ? 1 : 0); 929 929 WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl); 930 930 } 931 931 } ··· 1044 1044 int i; 1045 1045 1046 1046 if (!enable) { 1047 - sdma_v4_0_gfx_stop(adev); 1047 + sdma_v4_0_gfx_enable(adev, enable); 1048 1048 sdma_v4_0_rlc_stop(adev); 1049 1049 if (adev->sdma.has_page_queue) 1050 1050 sdma_v4_0_page_stop(adev); ··· 1960 1960 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1961 1961 1962 1962 /* SMU saves SDMA state for us */ 1963 - if (adev->in_s0ix) 1963 + if (adev->in_s0ix) { 1964 + sdma_v4_0_gfx_enable(adev, false); 1964 1965 return 0; 1966 + } 1965 1967 1966 1968 return sdma_v4_0_hw_fini(adev); 1967 1969 } ··· 1973 1971 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1974 1972 1975 1973 /* SMU restores SDMA state for us */ 1976 - if (adev->in_s0ix) 1974 + if (adev->in_s0ix) { 1975 + sdma_v4_0_enable(adev, true); 1976 + sdma_v4_0_gfx_enable(adev, true); 1977 + amdgpu_ttm_set_buffer_funcs_status(adev, true); 1977 1978 return 0; 1979 + } 1978 1980 1979 1981 return sdma_v4_0_hw_init(adev); 1980 1982 }

+12 -12

drivers/gpu/drm/amd/amdgpu/soc15.c

··· 103 103 /* Vega */ 104 104 static const struct amdgpu_video_codec_info vega_video_codecs_decode_array[] = 105 105 { 106 - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, 107 - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, 108 - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, 109 - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, 106 + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, 107 + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, 108 + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, 109 + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, 110 110 {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)}, 111 111 {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, 112 112 }; ··· 120 120 /* Raven */ 121 121 static const struct amdgpu_video_codec_info rv_video_codecs_decode_array[] = 122 122 { 123 - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, 124 - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, 125 - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, 126 - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, 123 + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, 124 + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, 125 + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, 126 + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, 127 127 {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)}, 128 128 {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, 129 129 {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 4096, 4096, 0)}, ··· 138 138 /* Renoir, Arcturus */ 139 139 static const struct amdgpu_video_codec_info rn_video_codecs_decode_array[] = 140 140 { 141 - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, 142 - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, 143 - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, 144 - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, 141 + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, 142 + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, 143 + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, 144 + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, 145 145 {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, 146 146 {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, 147 147 {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},

+2

drivers/gpu/drm/amd/amdgpu/soc15d.h

··· 162 162 * 2 - Bypass 163 163 */ 164 164 #define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21) 165 + #define INDIRECT_BUFFER_PRE_RESUME(x) ((x) << 30) 165 166 #define PACKET3_COPY_DATA 0x40 166 167 #define PACKET3_PFP_SYNC_ME 0x42 167 168 #define PACKET3_COND_WRITE 0x45 ··· 185 184 #define EOP_TC_ACTION_EN (1 << 17) /* L2 */ 186 185 #define EOP_TC_NC_ACTION_EN (1 << 19) 187 186 #define EOP_TC_MD_ACTION_EN (1 << 21) /* L2 metadata */ 187 + #define EOP_EXEC (1 << 28) /* For Trailing Fence */ 188 188 189 189 #define DATA_SEL(x) ((x) << 29) 190 190 /* 0 - discard

+28 -2

drivers/gpu/drm/amd/amdgpu/soc21.c

··· 43 43 #include "soc15.h" 44 44 #include "soc15_common.h" 45 45 #include "soc21.h" 46 + #include "mxgpu_nv.h" 46 47 47 48 static const struct amd_ip_funcs soc21_common_ip_funcs; 48 49 ··· 62 61 63 62 static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array[] = 64 63 { 65 - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, 64 + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, 66 65 {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, 67 66 {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, 68 67 {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, ··· 326 325 case IP_VERSION(13, 0, 10): 327 326 return AMD_RESET_METHOD_MODE1; 328 327 case IP_VERSION(13, 0, 4): 328 + case IP_VERSION(13, 0, 11): 329 329 return AMD_RESET_METHOD_MODE2; 330 330 default: 331 331 if (amdgpu_dpm_is_baco_supported(adev)) ··· 646 644 AMD_PG_SUPPORT_JPEG; 647 645 adev->external_rev_id = adev->rev_id + 0x20; 648 646 break; 647 + case IP_VERSION(11, 0, 4): 648 + adev->cg_flags = AMD_CG_SUPPORT_VCN_MGCG | 649 + AMD_CG_SUPPORT_JPEG_MGCG; 650 + adev->pg_flags = AMD_PG_SUPPORT_VCN | 651 + AMD_PG_SUPPORT_GFX_PG | 652 + AMD_PG_SUPPORT_JPEG; 653 + adev->external_rev_id = adev->rev_id + 0x1; 654 + break; 655 + 649 656 default: 650 657 /* FIXME: not supported yet */ 651 658 return -EINVAL; 652 659 } 653 660 654 - if (amdgpu_sriov_vf(adev)) 661 + if (amdgpu_sriov_vf(adev)) { 655 662 amdgpu_virt_init_setting(adev); 663 + xgpu_nv_mailbox_set_irq_funcs(adev); 664 + } 656 665 657 666 return 0; 658 667 } 659 668 660 669 static int soc21_common_late_init(void *handle) 661 670 { 671 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 672 + 673 + if (amdgpu_sriov_vf(adev)) 674 + xgpu_nv_mailbox_get_irq(adev); 675 + 662 676 return 0; 663 677 } 664 678 665 679 static int soc21_common_sw_init(void *handle) 666 680 { 681 + struct amdgpu_device *adev = (struct amdgpu_device *)handle; 682 + 683 + if (amdgpu_sriov_vf(adev)) 684 + xgpu_nv_mailbox_add_irq_id(adev); 685 + 667 686 return 0; 668 687 } 669 688 ··· 721 698 722 699 /* disable the doorbell aperture */ 723 700 soc21_enable_doorbell_aperture(adev, false); 701 + 702 + if (amdgpu_sriov_vf(adev)) 703 + xgpu_nv_mailbox_put_irq(adev); 724 704 725 705 return 0; 726 706 }

+24

drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c

··· 862 862 return; 863 863 } 864 864 865 + static void vcn_v4_0_enable_ras(struct amdgpu_device *adev, int inst_idx, 866 + bool indirect) 867 + { 868 + uint32_t tmp; 869 + 870 + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) 871 + return; 872 + 873 + tmp = VCN_RAS_CNTL__VCPU_VCODEC_REARM_MASK | 874 + VCN_RAS_CNTL__VCPU_VCODEC_IH_EN_MASK | 875 + VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN_MASK | 876 + VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN_MASK; 877 + WREG32_SOC15_DPG_MODE(inst_idx, 878 + SOC15_DPG_MODE_OFFSET(VCN, 0, regVCN_RAS_CNTL), 879 + tmp, 0, indirect); 880 + 881 + tmp = UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK; 882 + WREG32_SOC15_DPG_MODE(inst_idx, 883 + SOC15_DPG_MODE_OFFSET(VCN, 0, regUVD_SYS_INT_EN), 884 + tmp, 0, indirect); 885 + } 886 + 865 887 /** 866 888 * vcn_v4_0_start_dpg_mode - VCN start with dpg mode 867 889 * ··· 971 949 tmp = 0x1f << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT; 972 950 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 973 951 VCN, inst_idx, regUVD_LMI_CTRL2), tmp, 0, indirect); 952 + 953 + vcn_v4_0_enable_ras(adev, inst_idx, indirect); 974 954 975 955 /* enable master interrupt */ 976 956 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(

+1

drivers/gpu/drm/amd/amdkfd/kfd_crat.c

··· 1521 1521 case IP_VERSION(11, 0, 1): 1522 1522 case IP_VERSION(11, 0, 2): 1523 1523 case IP_VERSION(11, 0, 3): 1524 + case IP_VERSION(11, 0, 4): 1524 1525 num_of_cache_types = 1525 1526 kfd_fill_gpu_cache_info_from_gfx_config(kdev, *pcache_info); 1526 1527 break;

+2

drivers/gpu/drm/amd/amdkfd/kfd_device.c

··· 153 153 case IP_VERSION(11, 0, 1): 154 154 case IP_VERSION(11, 0, 2): 155 155 case IP_VERSION(11, 0, 3): 156 + case IP_VERSION(11, 0, 4): 156 157 kfd->device_info.event_interrupt_class = &event_interrupt_class_v11; 157 158 break; 158 159 default: ··· 395 394 f2g = &gfx_v11_kfd2kgd; 396 395 break; 397 396 case IP_VERSION(11, 0, 1): 397 + case IP_VERSION(11, 0, 4): 398 398 gfx_target_version = 110003; 399 399 f2g = &gfx_v11_kfd2kgd; 400 400 break;

+2 -3

drivers/gpu/drm/amd/amdkfd/kfd_topology.c

··· 2012 2012 2013 2013 kfd_debug_print_topology(); 2014 2014 2015 - if (!res) 2016 - kfd_notify_gpu_change(gpu_id, 1); 2015 + kfd_notify_gpu_change(gpu_id, 1); 2017 2016 2018 - return res; 2017 + return 0; 2019 2018 } 2020 2019 2021 2020 /**

+158 -125

drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c

··· 146 146 /* Number of bytes in PSP footer for firmware. */ 147 147 #define PSP_FOOTER_BYTES 0x100 148 148 149 - /* 150 - * DMUB Async to Sync Mechanism Status 151 - */ 152 - #define DMUB_ASYNC_TO_SYNC_ACCESS_FAIL 1 153 - #define DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT 2 154 - #define DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS 3 155 - #define DMUB_ASYNC_TO_SYNC_ACCESS_INVALID 4 156 - 157 149 /** 158 150 * DOC: overview 159 151 * ··· 1096 1104 /* Initialize hardware. */ 1097 1105 memset(&hw_params, 0, sizeof(hw_params)); 1098 1106 hw_params.fb_base = adev->gmc.fb_start; 1099 - hw_params.fb_offset = adev->gmc.aper_base; 1107 + hw_params.fb_offset = adev->vm_manager.vram_base_offset; 1100 1108 1101 1109 /* backdoor load firmware and trigger dmub running */ 1102 1110 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ··· 1218 1226 pa_config->system_aperture.agp_top = (uint64_t)agp_top << 24; 1219 1227 1220 1228 pa_config->system_aperture.fb_base = adev->gmc.fb_start; 1221 - pa_config->system_aperture.fb_offset = adev->gmc.aper_base; 1229 + pa_config->system_aperture.fb_offset = adev->vm_manager.vram_base_offset; 1222 1230 pa_config->system_aperture.fb_top = adev->gmc.fb_end; 1223 1231 1224 1232 pa_config->gart_config.page_table_start_addr = page_table_start.quad_part << 12; ··· 1433 1441 memset(&init_params, 0, sizeof(init_params)); 1434 1442 #endif 1435 1443 1444 + mutex_init(&adev->dm.dpia_aux_lock); 1436 1445 mutex_init(&adev->dm.dc_lock); 1437 1446 mutex_init(&adev->dm.audio_lock); 1438 1447 ··· 1798 1805 1799 1806 mutex_destroy(&adev->dm.audio_lock); 1800 1807 mutex_destroy(&adev->dm.dc_lock); 1808 + mutex_destroy(&adev->dm.dpia_aux_lock); 1801 1809 1802 1810 return; 1803 1811 } ··· 4869 4875 return 0; 4870 4876 } 4871 4877 4878 + static inline void fill_dc_dirty_rect(struct drm_plane *plane, 4879 + struct rect *dirty_rect, int32_t x, 4880 + int32_t y, int32_t width, int32_t height, 4881 + int *i, bool ffu) 4882 + { 4883 + if (*i > DC_MAX_DIRTY_RECTS) 4884 + return; 4885 + 4886 + if (*i == DC_MAX_DIRTY_RECTS) 4887 + goto out; 4888 + 4889 + dirty_rect->x = x; 4890 + dirty_rect->y = y; 4891 + dirty_rect->width = width; 4892 + dirty_rect->height = height; 4893 + 4894 + if (ffu) 4895 + drm_dbg(plane->dev, 4896 + "[PLANE:%d] PSR FFU dirty rect size (%d, %d)\n", 4897 + plane->base.id, width, height); 4898 + else 4899 + drm_dbg(plane->dev, 4900 + "[PLANE:%d] PSR SU dirty rect at (%d, %d) size (%d, %d)", 4901 + plane->base.id, x, y, width, height); 4902 + 4903 + out: 4904 + (*i)++; 4905 + } 4906 + 4872 4907 /** 4873 4908 * fill_dc_dirty_rects() - Fill DC dirty regions for PSR selective updates 4874 4909 * ··· 4918 4895 * addition, certain use cases - such as cursor and multi-plane overlay (MPO) - 4919 4896 * implicitly provide damage clips without any client support via the plane 4920 4897 * bounds. 4921 - * 4922 - * Today, amdgpu_dm only supports the MPO and cursor usecase. 4923 - * 4924 - * TODO: Also enable for FB_DAMAGE_CLIPS 4925 4898 */ 4926 4899 static void fill_dc_dirty_rects(struct drm_plane *plane, 4927 4900 struct drm_plane_state *old_plane_state, ··· 4928 4909 struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state); 4929 4910 struct rect *dirty_rects = flip_addrs->dirty_rects; 4930 4911 uint32_t num_clips; 4912 + struct drm_mode_rect *clips; 4931 4913 bool bb_changed; 4932 4914 bool fb_changed; 4933 4915 uint32_t i = 0; 4934 - 4935 - flip_addrs->dirty_rect_count = 0; 4936 4916 4937 4917 /* 4938 4918 * Cursor plane has it's own dirty rect update interface. See ··· 4940 4922 if (plane->type == DRM_PLANE_TYPE_CURSOR) 4941 4923 return; 4942 4924 4943 - /* 4944 - * Today, we only consider MPO use-case for PSR SU. If MPO not 4945 - * requested, and there is a plane update, do FFU. 4946 - */ 4925 + num_clips = drm_plane_get_damage_clips_count(new_plane_state); 4926 + clips = drm_plane_get_damage_clips(new_plane_state); 4927 + 4947 4928 if (!dm_crtc_state->mpo_requested) { 4948 - dirty_rects[0].x = 0; 4949 - dirty_rects[0].y = 0; 4950 - dirty_rects[0].width = dm_crtc_state->base.mode.crtc_hdisplay; 4951 - dirty_rects[0].height = dm_crtc_state->base.mode.crtc_vdisplay; 4952 - flip_addrs->dirty_rect_count = 1; 4953 - DRM_DEBUG_DRIVER("[PLANE:%d] PSR FFU dirty rect size (%d, %d)\n", 4954 - new_plane_state->plane->base.id, 4955 - dm_crtc_state->base.mode.crtc_hdisplay, 4956 - dm_crtc_state->base.mode.crtc_vdisplay); 4929 + if (!num_clips || num_clips > DC_MAX_DIRTY_RECTS) 4930 + goto ffu; 4931 + 4932 + for (; flip_addrs->dirty_rect_count < num_clips; clips++) 4933 + fill_dc_dirty_rect(new_plane_state->plane, 4934 + &dirty_rects[i], clips->x1, 4935 + clips->y1, clips->x2 - clips->x1, 4936 + clips->y2 - clips->y1, 4937 + &flip_addrs->dirty_rect_count, 4938 + false); 4957 4939 return; 4958 4940 } 4959 4941 ··· 4964 4946 * If plane is moved or resized, also add old bounding box to dirty 4965 4947 * rects. 4966 4948 */ 4967 - num_clips = drm_plane_get_damage_clips_count(new_plane_state); 4968 4949 fb_changed = old_plane_state->fb->base.id != 4969 4950 new_plane_state->fb->base.id; 4970 4951 bb_changed = (old_plane_state->crtc_x != new_plane_state->crtc_x || ··· 4971 4954 old_plane_state->crtc_w != new_plane_state->crtc_w || 4972 4955 old_plane_state->crtc_h != new_plane_state->crtc_h); 4973 4956 4974 - DRM_DEBUG_DRIVER("[PLANE:%d] PSR bb_changed:%d fb_changed:%d num_clips:%d\n", 4975 - new_plane_state->plane->base.id, 4976 - bb_changed, fb_changed, num_clips); 4957 + drm_dbg(plane->dev, 4958 + "[PLANE:%d] PSR bb_changed:%d fb_changed:%d num_clips:%d\n", 4959 + new_plane_state->plane->base.id, 4960 + bb_changed, fb_changed, num_clips); 4977 4961 4978 - if (num_clips || fb_changed || bb_changed) { 4979 - dirty_rects[i].x = new_plane_state->crtc_x; 4980 - dirty_rects[i].y = new_plane_state->crtc_y; 4981 - dirty_rects[i].width = new_plane_state->crtc_w; 4982 - dirty_rects[i].height = new_plane_state->crtc_h; 4983 - DRM_DEBUG_DRIVER("[PLANE:%d] PSR SU dirty rect at (%d, %d) size (%d, %d)\n", 4984 - new_plane_state->plane->base.id, 4985 - dirty_rects[i].x, dirty_rects[i].y, 4986 - dirty_rects[i].width, dirty_rects[i].height); 4987 - i += 1; 4988 - } 4989 - 4990 - /* Add old plane bounding-box if plane is moved or resized */ 4991 4962 if (bb_changed) { 4992 - dirty_rects[i].x = old_plane_state->crtc_x; 4993 - dirty_rects[i].y = old_plane_state->crtc_y; 4994 - dirty_rects[i].width = old_plane_state->crtc_w; 4995 - dirty_rects[i].height = old_plane_state->crtc_h; 4996 - DRM_DEBUG_DRIVER("[PLANE:%d] PSR SU dirty rect at (%d, %d) size (%d, %d)\n", 4997 - old_plane_state->plane->base.id, 4998 - dirty_rects[i].x, dirty_rects[i].y, 4999 - dirty_rects[i].width, dirty_rects[i].height); 5000 - i += 1; 4963 + fill_dc_dirty_rect(new_plane_state->plane, &dirty_rects[i], 4964 + new_plane_state->crtc_x, 4965 + new_plane_state->crtc_y, 4966 + new_plane_state->crtc_w, 4967 + new_plane_state->crtc_h, &i, false); 4968 + 4969 + /* Add old plane bounding-box if plane is moved or resized */ 4970 + fill_dc_dirty_rect(new_plane_state->plane, &dirty_rects[i], 4971 + old_plane_state->crtc_x, 4972 + old_plane_state->crtc_y, 4973 + old_plane_state->crtc_w, 4974 + old_plane_state->crtc_h, &i, false); 5001 4975 } 4976 + 4977 + if (num_clips) { 4978 + for (; i < num_clips; clips++) 4979 + fill_dc_dirty_rect(new_plane_state->plane, 4980 + &dirty_rects[i], clips->x1, 4981 + clips->y1, clips->x2 - clips->x1, 4982 + clips->y2 - clips->y1, &i, false); 4983 + } else if (fb_changed && !bb_changed) { 4984 + fill_dc_dirty_rect(new_plane_state->plane, &dirty_rects[i], 4985 + new_plane_state->crtc_x, 4986 + new_plane_state->crtc_y, 4987 + new_plane_state->crtc_w, 4988 + new_plane_state->crtc_h, &i, false); 4989 + } 4990 + 4991 + if (i > DC_MAX_DIRTY_RECTS) 4992 + goto ffu; 5002 4993 5003 4994 flip_addrs->dirty_rect_count = i; 4995 + return; 4996 + 4997 + ffu: 4998 + fill_dc_dirty_rect(new_plane_state->plane, &dirty_rects[0], 0, 0, 4999 + dm_crtc_state->base.mode.crtc_hdisplay, 5000 + dm_crtc_state->base.mode.crtc_vdisplay, 5001 + &flip_addrs->dirty_rect_count, true); 5004 5002 } 5005 5003 5006 5004 static void update_stream_scaling_settings(const struct drm_display_mode *mode, ··· 10236 10204 return value; 10237 10205 } 10238 10206 10239 - static int amdgpu_dm_set_dmub_async_sync_status(bool is_cmd_aux, 10240 - struct dc_context *ctx, 10241 - uint8_t status_type, 10242 - uint32_t *operation_result) 10207 + int amdgpu_dm_process_dmub_aux_transfer_sync( 10208 + struct dc_context *ctx, 10209 + unsigned int link_index, 10210 + struct aux_payload *payload, 10211 + enum aux_return_code_type *operation_result) 10243 10212 { 10244 10213 struct amdgpu_device *adev = ctx->driver_context; 10245 - int return_status = -1; 10246 10214 struct dmub_notification *p_notify = adev->dm.dmub_notify; 10215 + int ret = -1; 10247 10216 10248 - if (is_cmd_aux) { 10249 - if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS) { 10250 - return_status = p_notify->aux_reply.length; 10251 - *operation_result = p_notify->result; 10252 - } else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT) { 10253 - *operation_result = AUX_RET_ERROR_TIMEOUT; 10254 - } else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_FAIL) { 10255 - *operation_result = AUX_RET_ERROR_ENGINE_ACQUIRE; 10256 - } else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_INVALID) { 10257 - *operation_result = AUX_RET_ERROR_INVALID_REPLY; 10258 - } else { 10259 - *operation_result = AUX_RET_ERROR_UNKNOWN; 10260 - } 10261 - } else { 10262 - if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS) { 10263 - return_status = 0; 10264 - *operation_result = p_notify->sc_status; 10265 - } else { 10266 - *operation_result = SET_CONFIG_UNKNOWN_ERROR; 10267 - } 10217 + mutex_lock(&adev->dm.dpia_aux_lock); 10218 + if (!dc_process_dmub_aux_transfer_async(ctx->dc, link_index, payload)) { 10219 + *operation_result = AUX_RET_ERROR_ENGINE_ACQUIRE; 10220 + goto out; 10221 + } 10222 + 10223 + if (!wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ)) { 10224 + DRM_ERROR("wait_for_completion_timeout timeout!"); 10225 + *operation_result = AUX_RET_ERROR_TIMEOUT; 10226 + goto out; 10268 10227 } 10269 10228 10270 - return return_status; 10229 + if (p_notify->result != AUX_RET_SUCCESS) { 10230 + /* 10231 + * Transient states before tunneling is enabled could 10232 + * lead to this error. We can ignore this for now. 10233 + */ 10234 + if (p_notify->result != AUX_RET_ERROR_PROTOCOL_ERROR) { 10235 + DRM_WARN("DPIA AUX failed on 0x%x(%d), error %d\n", 10236 + payload->address, payload->length, 10237 + p_notify->result); 10238 + } 10239 + *operation_result = AUX_RET_ERROR_INVALID_REPLY; 10240 + goto out; 10241 + } 10242 + 10243 + 10244 + payload->reply[0] = adev->dm.dmub_notify->aux_reply.command; 10245 + if (!payload->write && p_notify->aux_reply.length && 10246 + (payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK)) { 10247 + 10248 + if (payload->length != p_notify->aux_reply.length) { 10249 + DRM_WARN("invalid read length %d from DPIA AUX 0x%x(%d)!\n", 10250 + p_notify->aux_reply.length, 10251 + payload->address, payload->length); 10252 + *operation_result = AUX_RET_ERROR_INVALID_REPLY; 10253 + goto out; 10254 + } 10255 + 10256 + memcpy(payload->data, p_notify->aux_reply.data, 10257 + p_notify->aux_reply.length); 10258 + } 10259 + 10260 + /* success */ 10261 + ret = p_notify->aux_reply.length; 10262 + *operation_result = p_notify->result; 10263 + out: 10264 + mutex_unlock(&adev->dm.dpia_aux_lock); 10265 + return ret; 10271 10266 } 10272 10267 10273 - int amdgpu_dm_process_dmub_aux_transfer_sync(bool is_cmd_aux, struct dc_context *ctx, 10274 - unsigned int link_index, void *cmd_payload, void *operation_result) 10268 + int amdgpu_dm_process_dmub_set_config_sync( 10269 + struct dc_context *ctx, 10270 + unsigned int link_index, 10271 + struct set_config_cmd_payload *payload, 10272 + enum set_config_status *operation_result) 10275 10273 { 10276 10274 struct amdgpu_device *adev = ctx->driver_context; 10277 - int ret = 0; 10275 + bool is_cmd_complete; 10276 + int ret; 10278 10277 10279 - if (is_cmd_aux) { 10280 - dc_process_dmub_aux_transfer_async(ctx->dc, 10281 - link_index, (struct aux_payload *)cmd_payload); 10282 - } else if (dc_process_dmub_set_config_async(ctx->dc, link_index, 10283 - (struct set_config_cmd_payload *)cmd_payload, 10284 - adev->dm.dmub_notify)) { 10285 - return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux, 10286 - ctx, DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS, 10287 - (uint32_t *)operation_result); 10288 - } 10278 + mutex_lock(&adev->dm.dpia_aux_lock); 10279 + is_cmd_complete = dc_process_dmub_set_config_async(ctx->dc, 10280 + link_index, payload, adev->dm.dmub_notify); 10289 10281 10290 - ret = wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ); 10291 - if (ret == 0) { 10282 + if (is_cmd_complete || wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ)) { 10283 + ret = 0; 10284 + *operation_result = adev->dm.dmub_notify->sc_status; 10285 + } else { 10292 10286 DRM_ERROR("wait_for_completion_timeout timeout!"); 10293 - return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux, 10294 - ctx, DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT, 10295 - (uint32_t *)operation_result); 10287 + ret = -1; 10288 + *operation_result = SET_CONFIG_UNKNOWN_ERROR; 10296 10289 } 10297 10290 10298 - if (is_cmd_aux) { 10299 - if (adev->dm.dmub_notify->result == AUX_RET_SUCCESS) { 10300 - struct aux_payload *payload = (struct aux_payload *)cmd_payload; 10301 - 10302 - payload->reply[0] = adev->dm.dmub_notify->aux_reply.command; 10303 - if (!payload->write && adev->dm.dmub_notify->aux_reply.length && 10304 - payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK) { 10305 - 10306 - if (payload->length != adev->dm.dmub_notify->aux_reply.length) { 10307 - DRM_WARN("invalid read from DPIA AUX %x(%d) got length %d!\n", 10308 - payload->address, payload->length, 10309 - adev->dm.dmub_notify->aux_reply.length); 10310 - return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux, ctx, 10311 - DMUB_ASYNC_TO_SYNC_ACCESS_INVALID, 10312 - (uint32_t *)operation_result); 10313 - } 10314 - 10315 - memcpy(payload->data, adev->dm.dmub_notify->aux_reply.data, 10316 - adev->dm.dmub_notify->aux_reply.length); 10317 - } 10318 - } 10319 - } 10320 - 10321 - return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux, 10322 - ctx, DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS, 10323 - (uint32_t *)operation_result); 10291 + mutex_unlock(&adev->dm.dpia_aux_lock); 10292 + return ret; 10324 10293 } 10325 10294 10326 10295 /*

+14 -3

drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h

··· 59 59 #include "signal_types.h" 60 60 #include "amdgpu_dm_crc.h" 61 61 struct aux_payload; 62 + struct set_config_cmd_payload; 62 63 enum aux_return_code_type; 64 + enum set_config_status; 63 65 64 66 /* Forward declarations */ 65 67 struct amdgpu_device; ··· 544 542 * occurred on certain intel platform 545 543 */ 546 544 bool aux_hpd_discon_quirk; 545 + 546 + /** 547 + * @dpia_aux_lock: 548 + * 549 + * Guards access to DPIA AUX 550 + */ 551 + struct mutex dpia_aux_lock; 547 552 }; 548 553 549 554 enum dsc_clock_force_state { ··· 794 785 795 786 extern const struct drm_encoder_helper_funcs amdgpu_dm_encoder_helper_funcs; 796 787 797 - int amdgpu_dm_process_dmub_aux_transfer_sync(bool is_cmd_aux, 798 - struct dc_context *ctx, unsigned int link_index, 799 - void *payload, void *operation_result); 788 + int amdgpu_dm_process_dmub_aux_transfer_sync(struct dc_context *ctx, unsigned int link_index, 789 + struct aux_payload *payload, enum aux_return_code_type *operation_result); 790 + 791 + int amdgpu_dm_process_dmub_set_config_sync(struct dc_context *ctx, unsigned int link_index, 792 + struct set_config_cmd_payload *payload, enum set_config_status *operation_result); 800 793 801 794 bool check_seamless_boot_capability(struct amdgpu_device *adev); 802 795

+22 -1

drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c

··· 2639 2639 return 0; 2640 2640 } 2641 2641 2642 + /* 2643 + * Reports whether the connected display is a USB4 DPIA tunneled display 2644 + * Example usage: cat /sys/kernel/debug/dri/0/DP-8/is_dpia_link 2645 + */ 2646 + static int is_dpia_link_show(struct seq_file *m, void *data) 2647 + { 2648 + struct drm_connector *connector = m->private; 2649 + struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); 2650 + struct dc_link *link = aconnector->dc_link; 2651 + 2652 + if (connector->status != connector_status_connected) 2653 + return -ENODEV; 2654 + 2655 + seq_printf(m, "%s\n", (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) ? "yes" : 2656 + (link->ep_type == DISPLAY_ENDPOINT_PHY) ? "no" : "unknown"); 2657 + 2658 + return 0; 2659 + } 2660 + 2642 2661 DEFINE_SHOW_ATTRIBUTE(dp_dsc_fec_support); 2643 2662 DEFINE_SHOW_ATTRIBUTE(dmub_fw_state); 2644 2663 DEFINE_SHOW_ATTRIBUTE(dmub_tracebuffer); ··· 2669 2650 DEFINE_SHOW_ATTRIBUTE(psr_capability); 2670 2651 DEFINE_SHOW_ATTRIBUTE(dp_is_mst_connector); 2671 2652 DEFINE_SHOW_ATTRIBUTE(dp_mst_progress_status); 2653 + DEFINE_SHOW_ATTRIBUTE(is_dpia_link); 2672 2654 2673 2655 static const struct file_operations dp_dsc_clock_en_debugfs_fops = { 2674 2656 .owner = THIS_MODULE, ··· 2814 2794 {"max_bpc", &dp_max_bpc_debugfs_fops}, 2815 2795 {"dsc_disable_passthrough", &dp_dsc_disable_passthrough_debugfs_fops}, 2816 2796 {"is_mst_connector", &dp_is_mst_connector_fops}, 2817 - {"mst_progress_status", &dp_mst_progress_status_fops} 2797 + {"mst_progress_status", &dp_mst_progress_status_fops}, 2798 + {"is_dpia_link", &is_dpia_link_fops} 2818 2799 }; 2819 2800 2820 2801 #ifdef CONFIG_DRM_AMD_DC_HDCP

+4 -6

drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c

··· 817 817 struct aux_payload *payload, 818 818 enum aux_return_code_type *operation_result) 819 819 { 820 - return amdgpu_dm_process_dmub_aux_transfer_sync(true, ctx, 821 - link->link_index, (void *)payload, 822 - (void *)operation_result); 820 + return amdgpu_dm_process_dmub_aux_transfer_sync(ctx, link->link_index, payload, 821 + operation_result); 823 822 } 824 823 825 824 int dm_helpers_dmub_set_config_sync(struct dc_context *ctx, ··· 826 827 struct set_config_cmd_payload *payload, 827 828 enum set_config_status *operation_result) 828 829 { 829 - return amdgpu_dm_process_dmub_aux_transfer_sync(false, ctx, 830 - link->link_index, (void *)payload, 831 - (void *)operation_result); 830 + return amdgpu_dm_process_dmub_set_config_sync(ctx, link->link_index, payload, 831 + operation_result); 832 832 } 833 833 834 834 void dm_set_dcn_clocks(struct dc_context *ctx, struct dc_clocks *clks)

+4

drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c

··· 1600 1600 drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0, 1601 1601 supported_rotations); 1602 1602 1603 + if (dm->adev->ip_versions[DCE_HWIP][0] > IP_VERSION(3, 0, 1) && 1604 + plane->type != DRM_PLANE_TYPE_CURSOR) 1605 + drm_plane_enable_fb_damage_clips(plane); 1606 + 1603 1607 drm_plane_helper_add(plane, &dm_plane_helper_funcs); 1604 1608 1605 1609 #ifdef CONFIG_DRM_AMD_DC_HDR

+1 -1

drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c

··· 438 438 } 439 439 440 440 if (!new_clocks->dtbclk_en) { 441 - new_clocks->ref_dtbclk_khz = 0; 441 + new_clocks->ref_dtbclk_khz = clk_mgr_base->bw_params->clk_table.entries[0].dtbclk_mhz * 1000; 442 442 } 443 443 444 444 /* clock limits are received with MHz precision, divide by 1000 to prevent setting clocks at every call */

+20 -1

drivers/gpu/drm/amd/display/dc/core/dc.c

··· 3061 3061 * Ensures that we have enough pipes for newly added MPO planes 3062 3062 */ 3063 3063 if (dc->res_pool->funcs->remove_phantom_pipes) 3064 - dc->res_pool->funcs->remove_phantom_pipes(dc, context); 3064 + dc->res_pool->funcs->remove_phantom_pipes(dc, context, false); 3065 3065 3066 3066 /*remove old surfaces from context */ 3067 3067 if (!dc_rem_all_planes_for_stream(dc, stream, context)) { ··· 3954 3954 struct dc_state *context; 3955 3955 enum surface_update_type update_type; 3956 3956 int i; 3957 + struct mall_temp_config mall_temp_config; 3957 3958 3958 3959 /* In cases where MPO and split or ODM are used transitions can 3959 3960 * cause underflow. Apply stream configuration with minimal pipe ··· 3986 3985 3987 3986 /* on plane removal, minimal state is the new one */ 3988 3987 if (force_minimal_pipe_splitting && !is_plane_addition) { 3988 + /* Since all phantom pipes are removed in full validation, 3989 + * we have to save and restore the subvp/mall config when 3990 + * we do a minimal transition since the flags marking the 3991 + * pipe as subvp/phantom will be cleared (dc copy constructor 3992 + * creates a shallow copy). 3993 + */ 3994 + if (dc->res_pool->funcs->save_mall_state) 3995 + dc->res_pool->funcs->save_mall_state(dc, context, &mall_temp_config); 3989 3996 if (!commit_minimal_transition_state(dc, context)) { 3990 3997 dc_release_state(context); 3991 3998 return false; 3992 3999 } 4000 + if (dc->res_pool->funcs->restore_mall_state) 4001 + dc->res_pool->funcs->restore_mall_state(dc, context, &mall_temp_config); 3993 4002 4003 + /* If we do a minimal transition with plane removal and the context 4004 + * has subvp we also have to retain back the phantom stream / planes 4005 + * since the refcount is decremented as part of the min transition 4006 + * (we commit a state with no subvp, so the phantom streams / planes 4007 + * had to be removed). 4008 + */ 4009 + if (dc->res_pool->funcs->retain_phantom_pipes) 4010 + dc->res_pool->funcs->retain_phantom_pipes(dc, context); 3994 4011 update_type = UPDATE_TYPE_FULL; 3995 4012 } 3996 4013

+1 -1

drivers/gpu/drm/amd/display/dc/dc.h

··· 47 47 struct set_config_cmd_payload; 48 48 struct dmub_notification; 49 49 50 - #define DC_VER "3.2.213" 50 + #define DC_VER "3.2.214" 51 51 52 52 #define MAX_SURFACES 3 53 53 #define MAX_PLANES 6

+11

drivers/gpu/drm/amd/display/dc/dc_stream.h

··· 160 160 struct dc_stream_state *paired_stream; // master / slave stream 161 161 }; 162 162 163 + /* Temp struct used to save and restore MALL config 164 + * during validation. 165 + * 166 + * TODO: Move MALL config into dc_state instead of stream struct 167 + * to avoid needing to save/restore. 168 + */ 169 + struct mall_temp_config { 170 + struct mall_stream_config mall_stream_config[MAX_PIPES]; 171 + bool is_phantom_plane[MAX_PIPES]; 172 + }; 173 + 163 174 struct dc_stream_state { 164 175 // sink is deprecated, new code should not reference 165 176 // this pointer

+28 -2

drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c

··· 869 869 tg->funcs->clear_optc_underflow(tg); 870 870 } 871 871 872 + static int calculate_vready_offset_for_group(struct pipe_ctx *pipe) 873 + { 874 + struct pipe_ctx *other_pipe; 875 + int vready_offset = pipe->pipe_dlg_param.vready_offset; 876 + 877 + /* Always use the largest vready_offset of all connected pipes */ 878 + for (other_pipe = pipe->bottom_pipe; other_pipe != NULL; other_pipe = other_pipe->bottom_pipe) { 879 + if (other_pipe->pipe_dlg_param.vready_offset > vready_offset) 880 + vready_offset = other_pipe->pipe_dlg_param.vready_offset; 881 + } 882 + for (other_pipe = pipe->top_pipe; other_pipe != NULL; other_pipe = other_pipe->top_pipe) { 883 + if (other_pipe->pipe_dlg_param.vready_offset > vready_offset) 884 + vready_offset = other_pipe->pipe_dlg_param.vready_offset; 885 + } 886 + for (other_pipe = pipe->next_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->next_odm_pipe) { 887 + if (other_pipe->pipe_dlg_param.vready_offset > vready_offset) 888 + vready_offset = other_pipe->pipe_dlg_param.vready_offset; 889 + } 890 + for (other_pipe = pipe->prev_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->prev_odm_pipe) { 891 + if (other_pipe->pipe_dlg_param.vready_offset > vready_offset) 892 + vready_offset = other_pipe->pipe_dlg_param.vready_offset; 893 + } 894 + 895 + return vready_offset; 896 + } 897 + 872 898 enum dc_status dcn10_enable_stream_timing( 873 899 struct pipe_ctx *pipe_ctx, 874 900 struct dc_state *context, ··· 938 912 pipe_ctx->stream_res.tg->funcs->program_timing( 939 913 pipe_ctx->stream_res.tg, 940 914 &stream->timing, 941 - pipe_ctx->pipe_dlg_param.vready_offset, 915 + calculate_vready_offset_for_group(pipe_ctx), 942 916 pipe_ctx->pipe_dlg_param.vstartup_start, 943 917 pipe_ctx->pipe_dlg_param.vupdate_offset, 944 918 pipe_ctx->pipe_dlg_param.vupdate_width, ··· 2934 2908 2935 2909 pipe_ctx->stream_res.tg->funcs->program_global_sync( 2936 2910 pipe_ctx->stream_res.tg, 2937 - pipe_ctx->pipe_dlg_param.vready_offset, 2911 + calculate_vready_offset_for_group(pipe_ctx), 2938 2912 pipe_ctx->pipe_dlg_param.vstartup_start, 2939 2913 pipe_ctx->pipe_dlg_param.vupdate_offset, 2940 2914 pipe_ctx->pipe_dlg_param.vupdate_width);

+31 -3

drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c

··· 1652 1652 hubp->funcs->phantom_hubp_post_enable(hubp); 1653 1653 } 1654 1654 1655 + static int calculate_vready_offset_for_group(struct pipe_ctx *pipe) 1656 + { 1657 + struct pipe_ctx *other_pipe; 1658 + int vready_offset = pipe->pipe_dlg_param.vready_offset; 1659 + 1660 + /* Always use the largest vready_offset of all connected pipes */ 1661 + for (other_pipe = pipe->bottom_pipe; other_pipe != NULL; other_pipe = other_pipe->bottom_pipe) { 1662 + if (other_pipe->pipe_dlg_param.vready_offset > vready_offset) 1663 + vready_offset = other_pipe->pipe_dlg_param.vready_offset; 1664 + } 1665 + for (other_pipe = pipe->top_pipe; other_pipe != NULL; other_pipe = other_pipe->top_pipe) { 1666 + if (other_pipe->pipe_dlg_param.vready_offset > vready_offset) 1667 + vready_offset = other_pipe->pipe_dlg_param.vready_offset; 1668 + } 1669 + for (other_pipe = pipe->next_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->next_odm_pipe) { 1670 + if (other_pipe->pipe_dlg_param.vready_offset > vready_offset) 1671 + vready_offset = other_pipe->pipe_dlg_param.vready_offset; 1672 + } 1673 + for (other_pipe = pipe->prev_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->prev_odm_pipe) { 1674 + if (other_pipe->pipe_dlg_param.vready_offset > vready_offset) 1675 + vready_offset = other_pipe->pipe_dlg_param.vready_offset; 1676 + } 1677 + 1678 + return vready_offset; 1679 + } 1655 1680 1656 1681 static void dcn20_program_pipe( 1657 1682 struct dc *dc, ··· 1695 1670 && !pipe_ctx->prev_odm_pipe) { 1696 1671 pipe_ctx->stream_res.tg->funcs->program_global_sync( 1697 1672 pipe_ctx->stream_res.tg, 1698 - pipe_ctx->pipe_dlg_param.vready_offset, 1673 + calculate_vready_offset_for_group(pipe_ctx), 1699 1674 pipe_ctx->pipe_dlg_param.vstartup_start, 1700 1675 pipe_ctx->pipe_dlg_param.vupdate_offset, 1701 1676 pipe_ctx->pipe_dlg_param.vupdate_width); ··· 1741 1716 * only do gamma programming for powering on, internal memcmp to avoid 1742 1717 * updating on slave planes 1743 1718 */ 1744 - if (pipe_ctx->update_flags.bits.enable || pipe_ctx->stream->update_flags.bits.out_tf) 1719 + if (pipe_ctx->update_flags.bits.enable || 1720 + pipe_ctx->update_flags.bits.plane_changed || 1721 + pipe_ctx->stream->update_flags.bits.out_tf || 1722 + pipe_ctx->plane_state->update_flags.bits.output_tf_change) 1745 1723 hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream); 1746 1724 1747 1725 /* If the pipe has been enabled or has a different opp, we ··· 2095 2067 2096 2068 pipe_ctx->stream_res.tg->funcs->program_global_sync( 2097 2069 pipe_ctx->stream_res.tg, 2098 - pipe_ctx->pipe_dlg_param.vready_offset, 2070 + calculate_vready_offset_for_group(pipe_ctx), 2099 2071 pipe_ctx->pipe_dlg_param.vstartup_start, 2100 2072 pipe_ctx->pipe_dlg_param.vupdate_offset, 2101 2073 pipe_ctx->pipe_dlg_param.vupdate_width);

+1 -5

drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c

··· 225 225 } else { 226 226 REG_UPDATE_2(OTG_PIXEL_RATE_CNTL[params->otg_inst], 227 227 DTBCLK_DTO_ENABLE[params->otg_inst], 0, 228 - PIPE_DTO_SRC_SEL[params->otg_inst], 1); 229 - if (params->is_hdmi) 230 - REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst], 231 - PIPE_DTO_SRC_SEL[params->otg_inst], 0); 232 - 228 + PIPE_DTO_SRC_SEL[params->otg_inst], params->is_hdmi ? 0 : 1); 233 229 REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], 0); 234 230 REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], 0); 235 231 }

+4 -4

drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c

··· 262 262 num_mblks = ((mall_alloc_width_blk_aligned + mblk_width - 1) / mblk_width) * 263 263 ((mall_alloc_height_blk_aligned + mblk_height - 1) / mblk_height); 264 264 265 - /* For DCC: 266 - * meta_num_mblk = CEILING(full_mblk_width_ub_l*full_mblk_height_ub_l*Bpe/256/mblk_bytes, 1) 265 + /*For DCC: 266 + * meta_num_mblk = CEILING(meta_pitch*full_vp_height*Bpe/256/mblk_bytes, 1) 267 267 */ 268 268 if (pipe->plane_state->dcc.enable) 269 - num_mblks += (mall_alloc_width_blk_aligned * mall_alloc_width_blk_aligned * bytes_per_pixel + 269 + num_mblks += (pipe->plane_state->dcc.meta_pitch * pipe->plane_res.scl_data.viewport.height * bytes_per_pixel + 270 270 (256 * DCN3_2_MALL_MBLK_SIZE_BYTES) - 1) / (256 * DCN3_2_MALL_MBLK_SIZE_BYTES); 271 271 272 272 bytes_in_mall = num_mblks * DCN3_2_MALL_MBLK_SIZE_BYTES; ··· 316 316 cache_lines_used += (((cursor_size + DCN3_2_MALL_MBLK_SIZE_BYTES - 1) / 317 317 DCN3_2_MALL_MBLK_SIZE_BYTES) * DCN3_2_MALL_MBLK_SIZE_BYTES) / 318 318 dc->caps.cache_line_size + 2; 319 + break; 319 320 } 320 - break; 321 321 } 322 322 } 323 323

+41 -25

drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c

··· 1743 1743 } 1744 1744 1745 1745 // return true if removed piped from ctx, false otherwise 1746 - bool dcn32_remove_phantom_pipes(struct dc *dc, struct dc_state *context) 1746 + bool dcn32_remove_phantom_pipes(struct dc *dc, struct dc_state *context, bool fast_update) 1747 1747 { 1748 1748 int i; 1749 1749 bool removed_pipe = false; ··· 1770 1770 removed_pipe = true; 1771 1771 } 1772 1772 1773 - // Clear all phantom stream info 1774 - if (pipe->stream) { 1775 - pipe->stream->mall_stream_config.type = SUBVP_NONE; 1776 - pipe->stream->mall_stream_config.paired_stream = NULL; 1777 - } 1773 + /* For non-full updates, a shallow copy of the current state 1774 + * is created. In this case we don't want to erase the current 1775 + * state (there can be 2 HIRQL threads, one in flip, and one in 1776 + * checkMPO) that can cause a race condition. 1777 + * 1778 + * This is just a workaround, needs a proper fix. 1779 + */ 1780 + if (!fast_update) { 1781 + // Clear all phantom stream info 1782 + if (pipe->stream) { 1783 + pipe->stream->mall_stream_config.type = SUBVP_NONE; 1784 + pipe->stream->mall_stream_config.paired_stream = NULL; 1785 + } 1778 1786 1779 - if (pipe->plane_state) { 1780 - pipe->plane_state->is_phantom = false; 1787 + if (pipe->plane_state) { 1788 + pipe->plane_state->is_phantom = false; 1789 + } 1781 1790 } 1782 1791 } 1783 1792 return removed_pipe; ··· 1959 1950 pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; 1960 1951 pipes[pipe_cnt].pipe.scale_ratio_depth.lb_depth = dm_lb_19; 1961 1952 1962 - switch (pipe->stream->mall_stream_config.type) { 1963 - case SUBVP_MAIN: 1964 - pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_sub_viewport; 1965 - subvp_in_use = true; 1966 - break; 1967 - case SUBVP_PHANTOM: 1968 - pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_phantom_pipe; 1969 - pipes[pipe_cnt].pipe.src.use_mall_for_static_screen = dm_use_mall_static_screen_disable; 1970 - // Disallow unbounded req for SubVP according to DCHUB programming guide 1971 - pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; 1972 - break; 1973 - case SUBVP_NONE: 1974 - pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_disable; 1975 - pipes[pipe_cnt].pipe.src.use_mall_for_static_screen = dm_use_mall_static_screen_disable; 1976 - break; 1977 - default: 1978 - break; 1953 + /* Only populate DML input with subvp info for full updates. 1954 + * This is just a workaround -- needs a proper fix. 1955 + */ 1956 + if (!fast_validate) { 1957 + switch (pipe->stream->mall_stream_config.type) { 1958 + case SUBVP_MAIN: 1959 + pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_sub_viewport; 1960 + subvp_in_use = true; 1961 + break; 1962 + case SUBVP_PHANTOM: 1963 + pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_phantom_pipe; 1964 + pipes[pipe_cnt].pipe.src.use_mall_for_static_screen = dm_use_mall_static_screen_disable; 1965 + // Disallow unbounded req for SubVP according to DCHUB programming guide 1966 + pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; 1967 + break; 1968 + case SUBVP_NONE: 1969 + pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_disable; 1970 + pipes[pipe_cnt].pipe.src.use_mall_for_static_screen = dm_use_mall_static_screen_disable; 1971 + break; 1972 + default: 1973 + break; 1974 + } 1979 1975 } 1980 1976 1981 1977 pipes[pipe_cnt].dout.dsc_input_bpc = 0; ··· 2069 2055 .add_phantom_pipes = dcn32_add_phantom_pipes, 2070 2056 .remove_phantom_pipes = dcn32_remove_phantom_pipes, 2071 2057 .retain_phantom_pipes = dcn32_retain_phantom_pipes, 2058 + .save_mall_state = dcn32_save_mall_state, 2059 + .restore_mall_state = dcn32_restore_mall_state, 2072 2060 }; 2073 2061 2074 2062

+1 -12

drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h

··· 45 45 extern struct _vcs_dpi_ip_params_st dcn3_2_ip; 46 46 extern struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc; 47 47 48 - /* Temp struct used to save and restore MALL config 49 - * during validation. 50 - * 51 - * TODO: Move MALL config into dc_state instead of stream struct 52 - * to avoid needing to save/restore. 53 - */ 54 - struct mall_temp_config { 55 - struct mall_stream_config mall_stream_config[MAX_PIPES]; 56 - bool is_phantom_plane[MAX_PIPES]; 57 - }; 58 - 59 48 struct dcn32_resource_pool { 60 49 struct resource_pool base; 61 50 }; ··· 70 81 struct dc_transfer_func **shaper); 71 82 72 83 bool dcn32_remove_phantom_pipes(struct dc *dc, 73 - struct dc_state *context); 84 + struct dc_state *context, bool fast_update); 74 85 75 86 void dcn32_retain_phantom_pipes(struct dc *dc, 76 87 struct dc_state *context);

+10 -5

drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c

··· 97 97 * FLOOR(vp_x_start, blk_width) 98 98 */ 99 99 full_vp_width_blk_aligned = ((pipe->plane_res.scl_data.viewport.x + 100 - pipe->plane_res.scl_data.viewport.width + mblk_width - 1) / mblk_width * mblk_width) + 100 + pipe->plane_res.scl_data.viewport.width + mblk_width - 1) / mblk_width * mblk_width) - 101 101 (pipe->plane_res.scl_data.viewport.x / mblk_width * mblk_width); 102 102 103 103 /* full_vp_height_blk_aligned = FLOOR(vp_y_start + full_vp_height + blk_height - 1, blk_height) - 104 104 * FLOOR(vp_y_start, blk_height) 105 105 */ 106 106 full_vp_height_blk_aligned = ((pipe->plane_res.scl_data.viewport.y + 107 - full_vp_height + mblk_height - 1) / mblk_height * mblk_height) + 107 + full_vp_height + mblk_height - 1) / mblk_height * mblk_height) - 108 108 (pipe->plane_res.scl_data.viewport.y / mblk_height * mblk_height); 109 109 110 110 /* mall_alloc_width_blk_aligned_l/c = full_vp_width_blk_aligned_l/c */ ··· 121 121 */ 122 122 num_mblks = ((mall_alloc_width_blk_aligned + mblk_width - 1) / mblk_width) * 123 123 ((mall_alloc_height_blk_aligned + mblk_height - 1) / mblk_height); 124 + 125 + /*For DCC: 126 + * meta_num_mblk = CEILING(meta_pitch*full_vp_height*Bpe/256/mblk_bytes, 1) 127 + */ 128 + if (pipe->plane_state->dcc.enable) 129 + num_mblks += (pipe->plane_state->dcc.meta_pitch * pipe->plane_res.scl_data.viewport.height * bytes_per_pixel + 130 + (256 * DCN3_2_MALL_MBLK_SIZE_BYTES) - 1) / (256 * DCN3_2_MALL_MBLK_SIZE_BYTES); 131 + 124 132 bytes_in_mall = num_mblks * DCN3_2_MALL_MBLK_SIZE_BYTES; 125 133 // cache lines used is total bytes / cache_line size. Add +2 for worst case alignment 126 134 // (MALL is 64-byte aligned) 127 135 cache_lines_per_plane = bytes_in_mall / dc->caps.cache_line_size + 2; 128 136 129 - /* For DCC divide by 256 */ 130 - if (pipe->plane_state->dcc.enable) 131 - cache_lines_per_plane = cache_lines_per_plane + (cache_lines_per_plane / 256) + 1; 132 137 cache_lines_used += cache_lines_per_plane; 133 138 } 134 139 }

+2

drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c

··· 1622 1622 .add_phantom_pipes = dcn32_add_phantom_pipes, 1623 1623 .remove_phantom_pipes = dcn32_remove_phantom_pipes, 1624 1624 .retain_phantom_pipes = dcn32_retain_phantom_pipes, 1625 + .save_mall_state = dcn32_save_mall_state, 1626 + .restore_mall_state = dcn32_restore_mall_state, 1625 1627 }; 1626 1628 1627 1629

+3

drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c

··· 559 559 context->bw_ctx.bw.dcn.clk.dramclk_khz = 0; 560 560 context->bw_ctx.bw.dcn.clk.fclk_khz = 0; 561 561 context->bw_ctx.bw.dcn.clk.p_state_change_support = true; 562 + for (i = 0; i < dc->res_pool->pipe_count; i++) 563 + if (context->res_ctx.pipe_ctx[i].stream) 564 + context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = 0; 562 565 } 563 566 } 564 567

+6 -3

drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c

··· 1203 1203 // If SubVP pipe config is unsupported (or cannot be used for UCLK switching) 1204 1204 // remove phantom pipes and repopulate dml pipes 1205 1205 if (!found_supported_config) { 1206 - dc->res_pool->funcs->remove_phantom_pipes(dc, context); 1206 + dc->res_pool->funcs->remove_phantom_pipes(dc, context, false); 1207 1207 vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] = dm_dram_clock_change_unsupported; 1208 1208 *pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, false); 1209 1209 ··· 1320 1320 1321 1321 if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) 1322 1322 context->bw_ctx.bw.dcn.clk.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; 1323 - context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; 1323 + if (context->res_ctx.pipe_ctx[i].plane_state) 1324 + context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; 1325 + else 1326 + context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = 0; 1324 1327 context->res_ctx.pipe_ctx[i].pipe_dlg_param = pipes[pipe_idx].pipe.dest; 1325 1328 pipe_idx++; 1326 1329 } ··· 1518 1515 return false; 1519 1516 1520 1517 // For each full update, remove all existing phantom pipes first 1521 - dc->res_pool->funcs->remove_phantom_pipes(dc, context); 1518 + dc->res_pool->funcs->remove_phantom_pipes(dc, context, fast_validate); 1522 1519 1523 1520 dc->res_pool->funcs->update_soc_for_wm_a(dc, context); 1524 1521

+3 -1

drivers/gpu/drm/amd/display/dc/inc/core_types.h

··· 240 240 unsigned int pipe_cnt, 241 241 unsigned int index); 242 242 243 - bool (*remove_phantom_pipes)(struct dc *dc, struct dc_state *context); 243 + bool (*remove_phantom_pipes)(struct dc *dc, struct dc_state *context, bool fast_update); 244 244 void (*retain_phantom_pipes)(struct dc *dc, struct dc_state *context); 245 245 void (*get_panel_config_defaults)(struct dc_panel_config *panel_config); 246 + void (*save_mall_state)(struct dc *dc, struct dc_state *context, struct mall_temp_config *temp_config); 247 + void (*restore_mall_state)(struct dc *dc, struct dc_state *context, struct mall_temp_config *temp_config); 246 248 }; 247 249 248 250 struct audio_support{

+5 -1

drivers/gpu/drm/amd/include/mes_v11_api_def.h

··· 222 222 uint32_t apply_grbm_remote_register_dummy_read_wa : 1; 223 223 uint32_t second_gfx_pipe_enabled : 1; 224 224 uint32_t enable_level_process_quantum_check : 1; 225 - uint32_t reserved : 25; 225 + uint32_t legacy_sch_mode : 1; 226 + uint32_t disable_add_queue_wptr_mc_addr : 1; 227 + uint32_t enable_mes_event_int_logging : 1; 228 + uint32_t enable_reg_active_poll : 1; 229 + uint32_t reserved : 21; 226 230 }; 227 231 uint32_t uint32_t_all; 228 232 };

+2 -1

drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c

··· 161 161 162 162 int smu_set_gfx_power_up_by_imu(struct smu_context *smu) 163 163 { 164 - if (!smu->ppt_funcs && !smu->ppt_funcs->set_gfx_power_up_by_imu) 164 + if (!smu->ppt_funcs || !smu->ppt_funcs->set_gfx_power_up_by_imu) 165 165 return -EOPNOTSUPP; 166 166 167 167 return smu->ppt_funcs->set_gfx_power_up_by_imu(smu); ··· 585 585 yellow_carp_set_ppt_funcs(smu); 586 586 break; 587 587 case IP_VERSION(13, 0, 4): 588 + case IP_VERSION(13, 0, 11): 588 589 smu_v13_0_4_set_ppt_funcs(smu); 589 590 break; 590 591 case IP_VERSION(13, 0, 5):

+27 -1

drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c

··· 79 79 #define mmTHM_BACO_CNTL_ARCT 0xA7 80 80 #define mmTHM_BACO_CNTL_ARCT_BASE_IDX 0 81 81 82 + static void smu_v11_0_poll_baco_exit(struct smu_context *smu) 83 + { 84 + struct amdgpu_device *adev = smu->adev; 85 + uint32_t data, loop = 0; 86 + 87 + do { 88 + usleep_range(1000, 1100); 89 + data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL); 90 + } while ((data & 0x100) && (++loop < 100)); 91 + } 92 + 82 93 int smu_v11_0_init_microcode(struct smu_context *smu) 83 94 { 84 95 struct amdgpu_device *adev = smu->adev; ··· 1599 1588 if (amdgpu_sriov_vf(smu->adev) || !smu_baco->platform_support) 1600 1589 return false; 1601 1590 1591 + /* return true if ASIC is in BACO state already */ 1592 + if (smu_v11_0_baco_get_state(smu) == SMU_BACO_STATE_ENTER) 1593 + return true; 1594 + 1602 1595 /* Arcturus does not support this bit mask */ 1603 1596 if (smu_cmn_feature_is_supported(smu, SMU_FEATURE_BACO_BIT) && 1604 1597 !smu_cmn_feature_is_enabled(smu, SMU_FEATURE_BACO_BIT)) ··· 1700 1685 1701 1686 int smu_v11_0_baco_exit(struct smu_context *smu) 1702 1687 { 1703 - return smu_v11_0_baco_set_state(smu, SMU_BACO_STATE_EXIT); 1688 + int ret; 1689 + 1690 + ret = smu_v11_0_baco_set_state(smu, SMU_BACO_STATE_EXIT); 1691 + if (!ret) { 1692 + /* 1693 + * Poll BACO exit status to ensure FW has completed 1694 + * BACO exit process to avoid timing issues. 1695 + */ 1696 + smu_v11_0_poll_baco_exit(smu); 1697 + } 1698 + 1699 + return ret; 1704 1700 } 1705 1701 1706 1702 int smu_v11_0_mode1_reset(struct smu_context *smu)

+3

drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c

··· 250 250 251 251 switch (adev->ip_versions[MP1_HWIP][0]) { 252 252 case IP_VERSION(13, 0, 4): 253 + case IP_VERSION(13, 0, 11): 253 254 mp1_fw_flags = RREG32_PCIE(MP1_Public | 254 255 (smnMP1_V13_0_4_FIRMWARE_FLAGS & 0xffffffff)); 255 256 break; ··· 302 301 smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_YELLOW_CARP; 303 302 break; 304 303 case IP_VERSION(13, 0, 4): 304 + case IP_VERSION(13, 0, 11): 305 305 smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_SMU_V13_0_4; 306 306 break; 307 307 case IP_VERSION(13, 0, 5): ··· 843 841 case IP_VERSION(13, 0, 7): 844 842 case IP_VERSION(13, 0, 8): 845 843 case IP_VERSION(13, 0, 10): 844 + case IP_VERSION(13, 0, 11): 846 845 if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) 847 846 return 0; 848 847 if (enable)

+14 -3

drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c

··· 1026 1026 .set_gfx_power_up_by_imu = smu_v13_0_set_gfx_power_up_by_imu, 1027 1027 }; 1028 1028 1029 + static void smu_v13_0_4_set_smu_mailbox_registers(struct smu_context *smu) 1030 + { 1031 + struct amdgpu_device *adev = smu->adev; 1032 + 1033 + smu->param_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_82); 1034 + smu->msg_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_66); 1035 + smu->resp_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_90); 1036 + } 1037 + 1029 1038 void smu_v13_0_4_set_ppt_funcs(struct smu_context *smu) 1030 1039 { 1031 1040 struct amdgpu_device *adev = smu->adev; ··· 1044 1035 smu->feature_map = smu_v13_0_4_feature_mask_map; 1045 1036 smu->table_map = smu_v13_0_4_table_map; 1046 1037 smu->is_apu = true; 1047 - smu->param_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_82); 1048 - smu->msg_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_66); 1049 - smu->resp_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_90); 1038 + 1039 + if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 4)) 1040 + smu_v13_0_4_set_smu_mailbox_registers(smu); 1041 + else 1042 + smu_v13_0_set_smu_mailbox_registers(smu); 1050 1043 }

+1

drivers/gpu/drm/radeon/radeon_bios.c

··· 227 227 228 228 if (!found) 229 229 return false; 230 + pci_dev_put(pdev); 230 231 231 232 rdev->bios = kmalloc(size, GFP_KERNEL); 232 233 if (!rdev->bios) {