Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'amd-drm-fixes-6.0-2022-08-17' of https://gitlab.freedesktop.org/agd5f/linux into drm-fixes

amd-drm-fixes-6.0-2022-08-17:

amdgpu:
- Revert some DML stack changes
- Rounding fixes in KFD allocations
- atombios vram info table parsing fix
- DCN 3.1.4 fixes
- Clockgating fixes for various new IPs
- SMU 13.0.4 fixes
- DCN 3.1.4 FP fixes
- TMDS fixes for YCbCr420 4k modes
- DCN 3.2.x fixes
- USB 4 fixes
- SMU 13.0 fixes
- SMU driver unload memory leak fixes
- Display orientation fix
- Regression fix for generic fbdev conversion
- SDMA 6.x fixes
- SR-IOV fixes
- IH 6.x fixes
- Use after free fix in bo list handling
- Revert pipe1 support
- XGMI hive reset fix

amdkfd:
- Fix potential crach in kfd_create_indirect_link_prop()

Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220818025206.6463-1-alexander.deucher@amd.com

+1758 -1184
+14 -31
drivers/gpu/drm/amd/amdgpu/aldebaran.c
··· 148 148 struct amdgpu_reset_context *reset_context) 149 149 { 150 150 struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle; 151 + struct list_head *reset_device_list = reset_context->reset_device_list; 151 152 struct amdgpu_device *tmp_adev = NULL; 152 - struct list_head reset_device_list; 153 153 int r = 0; 154 154 155 155 dev_dbg(adev->dev, "aldebaran perform hw reset\n"); 156 + 157 + if (reset_device_list == NULL) 158 + return -EINVAL; 159 + 156 160 if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2) && 157 161 reset_context->hive == NULL) { 158 162 /* Wrong context, return error */ 159 163 return -EINVAL; 160 164 } 161 165 162 - INIT_LIST_HEAD(&reset_device_list); 163 - if (reset_context->hive) { 164 - list_for_each_entry (tmp_adev, 165 - &reset_context->hive->device_list, 166 - gmc.xgmi.head) 167 - list_add_tail(&tmp_adev->reset_list, 168 - &reset_device_list); 169 - } else { 170 - list_add_tail(&reset_context->reset_req_dev->reset_list, 171 - &reset_device_list); 172 - } 173 - 174 - list_for_each_entry (tmp_adev, &reset_device_list, reset_list) { 166 + list_for_each_entry(tmp_adev, reset_device_list, reset_list) { 175 167 mutex_lock(&tmp_adev->reset_cntl->reset_lock); 176 168 tmp_adev->reset_cntl->active_reset = AMD_RESET_METHOD_MODE2; 177 169 } ··· 171 179 * Mode2 reset doesn't need any sync between nodes in XGMI hive, instead launch 172 180 * them together so that they can be completed asynchronously on multiple nodes 173 181 */ 174 - list_for_each_entry (tmp_adev, &reset_device_list, reset_list) { 182 + list_for_each_entry(tmp_adev, reset_device_list, reset_list) { 175 183 /* For XGMI run all resets in parallel to speed up the process */ 176 184 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { 177 185 if (!queue_work(system_unbound_wq, ··· 189 197 190 198 /* For XGMI wait for all resets to complete before proceed */ 191 199 if (!r) { 192 - list_for_each_entry (tmp_adev, &reset_device_list, reset_list) { 200 + list_for_each_entry(tmp_adev, reset_device_list, reset_list) { 193 201 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { 194 202 flush_work(&tmp_adev->reset_cntl->reset_work); 195 203 r = tmp_adev->asic_reset_res; ··· 199 207 } 200 208 } 201 209 202 - list_for_each_entry (tmp_adev, &reset_device_list, reset_list) { 210 + list_for_each_entry(tmp_adev, reset_device_list, reset_list) { 203 211 mutex_unlock(&tmp_adev->reset_cntl->reset_lock); 204 212 tmp_adev->reset_cntl->active_reset = AMD_RESET_METHOD_NONE; 205 213 } ··· 331 339 aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl, 332 340 struct amdgpu_reset_context *reset_context) 333 341 { 342 + struct list_head *reset_device_list = reset_context->reset_device_list; 334 343 struct amdgpu_device *tmp_adev = NULL; 335 - struct list_head reset_device_list; 336 344 int r; 345 + 346 + if (reset_device_list == NULL) 347 + return -EINVAL; 337 348 338 349 if (reset_context->reset_req_dev->ip_versions[MP1_HWIP][0] == 339 350 IP_VERSION(13, 0, 2) && ··· 345 350 return -EINVAL; 346 351 } 347 352 348 - INIT_LIST_HEAD(&reset_device_list); 349 - if (reset_context->hive) { 350 - list_for_each_entry (tmp_adev, 351 - &reset_context->hive->device_list, 352 - gmc.xgmi.head) 353 - list_add_tail(&tmp_adev->reset_list, 354 - &reset_device_list); 355 - } else { 356 - list_add_tail(&reset_context->reset_req_dev->reset_list, 357 - &reset_device_list); 358 - } 359 - 360 - list_for_each_entry (tmp_adev, &reset_device_list, reset_list) { 353 + list_for_each_entry(tmp_adev, reset_device_list, reset_list) { 361 354 dev_info(tmp_adev->dev, 362 355 "GPU reset succeeded, trying to resume\n"); 363 356 r = aldebaran_mode2_restore_ip(tmp_adev);
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu.h
··· 317 317 AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0, 318 318 AMDGPU_CP_KIQ_IRQ_LAST 319 319 }; 320 - 320 + #define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */ 321 321 #define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */ 322 322 #define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ 323 323 #define MAX_KIQ_REG_TRY 1000
+1
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
··· 96 96 struct amdgpu_kfd_dev { 97 97 struct kfd_dev *dev; 98 98 uint64_t vram_used; 99 + uint64_t vram_used_aligned; 99 100 bool init_complete; 100 101 struct work_struct reset_work; 101 102 };
+12 -9
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
··· 40 40 #define AMDGPU_USERPTR_RESTORE_DELAY_MS 1 41 41 42 42 /* 43 - * Align VRAM allocations to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB 43 + * Align VRAM availability to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB 44 44 * BO chunk 45 45 */ 46 - #define VRAM_ALLOCATION_ALIGN (1 << 21) 46 + #define VRAM_AVAILABLITY_ALIGN (1 << 21) 47 47 48 48 /* Impose limit on how much memory KFD can use */ 49 49 static struct { ··· 149 149 * to avoid fragmentation caused by 4K allocations in the tail 150 150 * 2M BO chunk. 151 151 */ 152 - vram_needed = ALIGN(size, VRAM_ALLOCATION_ALIGN); 152 + vram_needed = size; 153 153 } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { 154 154 system_mem_needed = size; 155 155 } else if (!(alloc_flag & ··· 182 182 */ 183 183 WARN_ONCE(vram_needed && !adev, 184 184 "adev reference can't be null when vram is used"); 185 - if (adev) 185 + if (adev) { 186 186 adev->kfd.vram_used += vram_needed; 187 + adev->kfd.vram_used_aligned += ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN); 188 + } 187 189 kfd_mem_limit.system_mem_used += system_mem_needed; 188 190 kfd_mem_limit.ttm_mem_used += ttm_mem_needed; 189 191 ··· 205 203 } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { 206 204 WARN_ONCE(!adev, 207 205 "adev reference can't be null when alloc mem flags vram is set"); 208 - if (adev) 209 - adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN); 206 + if (adev) { 207 + adev->kfd.vram_used -= size; 208 + adev->kfd.vram_used_aligned -= ALIGN(size, VRAM_AVAILABLITY_ALIGN); 209 + } 210 210 } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { 211 211 kfd_mem_limit.system_mem_used -= size; 212 212 } else if (!(alloc_flag & ··· 1612 1608 uint64_t reserved_for_pt = 1613 1609 ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); 1614 1610 size_t available; 1615 - 1616 1611 spin_lock(&kfd_mem_limit.mem_limit_lock); 1617 1612 available = adev->gmc.real_vram_size 1618 - - adev->kfd.vram_used 1613 + - adev->kfd.vram_used_aligned 1619 1614 - atomic64_read(&adev->vram_pin_size) 1620 1615 - reserved_for_pt; 1621 1616 spin_unlock(&kfd_mem_limit.mem_limit_lock); 1622 1617 1623 - return ALIGN_DOWN(available, VRAM_ALLOCATION_ALIGN); 1618 + return ALIGN_DOWN(available, VRAM_AVAILABLITY_ALIGN); 1624 1619 } 1625 1620 1626 1621 int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
··· 314 314 mem_channel_number = vram_info->v30.channel_num; 315 315 mem_channel_width = vram_info->v30.channel_width; 316 316 if (vram_width) 317 - *vram_width = mem_channel_number * mem_channel_width; 317 + *vram_width = mem_channel_number * (1 << mem_channel_width); 318 318 break; 319 319 default: 320 320 return -EINVAL;
+2 -6
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
··· 837 837 continue; 838 838 839 839 r = amdgpu_vm_bo_update(adev, bo_va, false); 840 - if (r) { 841 - mutex_unlock(&p->bo_list->bo_list_mutex); 840 + if (r) 842 841 return r; 843 - } 844 842 845 843 r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update); 846 - if (r) { 847 - mutex_unlock(&p->bo_list->bo_list_mutex); 844 + if (r) 848 845 return r; 849 - } 850 846 } 851 847 852 848 r = amdgpu_vm_handle_moved(adev, vm);
+3 -2
drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
··· 1705 1705 { 1706 1706 struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; 1707 1707 char reg_offset[11]; 1708 - uint32_t *new, *tmp = NULL; 1708 + uint32_t *new = NULL, *tmp = NULL; 1709 1709 int ret, i = 0, len = 0; 1710 1710 1711 1711 do { ··· 1747 1747 ret = size; 1748 1748 1749 1749 error_free: 1750 - kfree(tmp); 1750 + if (tmp != new) 1751 + kfree(tmp); 1751 1752 kfree(new); 1752 1753 return ret; 1753 1754 }
+2
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
··· 4742 4742 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, 4743 4743 reset_list); 4744 4744 amdgpu_reset_reg_dumps(tmp_adev); 4745 + 4746 + reset_context->reset_device_list = device_list_handle; 4745 4747 r = amdgpu_reset_perform_reset(tmp_adev, reset_context); 4746 4748 /* If reset handler not implemented, continue; otherwise return */ 4747 4749 if (r == -ENOSYS)
-4
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
··· 272 272 /* Signal all jobs not yet scheduled */ 273 273 for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { 274 274 struct drm_sched_rq *rq = &sched->sched_rq[i]; 275 - 276 - if (!rq) 277 - continue; 278 - 279 275 spin_lock(&rq->lock); 280 276 list_for_each_entry(s_entity, &rq->entities, list) { 281 277 while ((s_job = to_drm_sched_job(spsc_queue_pop(&s_entity->job_queue)))) {
+1
drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
··· 37 37 struct amdgpu_device *reset_req_dev; 38 38 struct amdgpu_job *job; 39 39 struct amdgpu_hive_info *hive; 40 + struct list_head *reset_device_list; 40 41 unsigned long flags; 41 42 }; 42 43
+18 -16
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
··· 637 637 #endif 638 638 }; 639 639 640 + #define ttm_to_amdgpu_ttm_tt(ptr) container_of(ptr, struct amdgpu_ttm_tt, ttm) 641 + 640 642 #ifdef CONFIG_DRM_AMDGPU_USERPTR 641 643 /* 642 644 * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user ··· 650 648 int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) 651 649 { 652 650 struct ttm_tt *ttm = bo->tbo.ttm; 653 - struct amdgpu_ttm_tt *gtt = (void *)ttm; 651 + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); 654 652 unsigned long start = gtt->userptr; 655 653 struct vm_area_struct *vma; 656 654 struct mm_struct *mm; ··· 704 702 */ 705 703 bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) 706 704 { 707 - struct amdgpu_ttm_tt *gtt = (void *)ttm; 705 + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); 708 706 bool r = false; 709 707 710 708 if (!gtt || !gtt->userptr) ··· 753 751 struct ttm_tt *ttm) 754 752 { 755 753 struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); 756 - struct amdgpu_ttm_tt *gtt = (void *)ttm; 754 + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); 757 755 int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); 758 756 enum dma_data_direction direction = write ? 759 757 DMA_BIDIRECTIONAL : DMA_TO_DEVICE; ··· 790 788 struct ttm_tt *ttm) 791 789 { 792 790 struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); 793 - struct amdgpu_ttm_tt *gtt = (void *)ttm; 791 + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); 794 792 int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); 795 793 enum dma_data_direction direction = write ? 796 794 DMA_BIDIRECTIONAL : DMA_TO_DEVICE; ··· 824 822 { 825 823 struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo); 826 824 struct ttm_tt *ttm = tbo->ttm; 827 - struct amdgpu_ttm_tt *gtt = (void *)ttm; 825 + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); 828 826 829 827 if (amdgpu_bo_encrypted(abo)) 830 828 flags |= AMDGPU_PTE_TMZ; ··· 862 860 struct ttm_resource *bo_mem) 863 861 { 864 862 struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); 865 - struct amdgpu_ttm_tt *gtt = (void*)ttm; 863 + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); 866 864 uint64_t flags; 867 865 int r; 868 866 ··· 929 927 { 930 928 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); 931 929 struct ttm_operation_ctx ctx = { false, false }; 932 - struct amdgpu_ttm_tt *gtt = (void *)bo->ttm; 930 + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(bo->ttm); 933 931 struct ttm_placement placement; 934 932 struct ttm_place placements; 935 933 struct ttm_resource *tmp; ··· 1000 998 struct ttm_tt *ttm) 1001 999 { 1002 1000 struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); 1003 - struct amdgpu_ttm_tt *gtt = (void *)ttm; 1001 + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); 1004 1002 1005 1003 /* if the pages have userptr pinning then clear that first */ 1006 1004 if (gtt->userptr) { ··· 1027 1025 static void amdgpu_ttm_backend_destroy(struct ttm_device *bdev, 1028 1026 struct ttm_tt *ttm) 1029 1027 { 1030 - struct amdgpu_ttm_tt *gtt = (void *)ttm; 1028 + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); 1031 1029 1032 1030 if (gtt->usertask) 1033 1031 put_task_struct(gtt->usertask); ··· 1081 1079 struct ttm_operation_ctx *ctx) 1082 1080 { 1083 1081 struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); 1084 - struct amdgpu_ttm_tt *gtt = (void *)ttm; 1082 + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); 1085 1083 pgoff_t i; 1086 1084 int ret; 1087 1085 ··· 1115 1113 static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev, 1116 1114 struct ttm_tt *ttm) 1117 1115 { 1118 - struct amdgpu_ttm_tt *gtt = (void *)ttm; 1116 + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); 1119 1117 struct amdgpu_device *adev; 1120 1118 pgoff_t i; 1121 1119 ··· 1184 1182 /* Set TTM_TT_FLAG_EXTERNAL before populate but after create. */ 1185 1183 bo->ttm->page_flags |= TTM_TT_FLAG_EXTERNAL; 1186 1184 1187 - gtt = (void *)bo->ttm; 1185 + gtt = ttm_to_amdgpu_ttm_tt(bo->ttm); 1188 1186 gtt->userptr = addr; 1189 1187 gtt->userflags = flags; 1190 1188 ··· 1201 1199 */ 1202 1200 struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm) 1203 1201 { 1204 - struct amdgpu_ttm_tt *gtt = (void *)ttm; 1202 + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); 1205 1203 1206 1204 if (gtt == NULL) 1207 1205 return NULL; ··· 1220 1218 bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, 1221 1219 unsigned long end, unsigned long *userptr) 1222 1220 { 1223 - struct amdgpu_ttm_tt *gtt = (void *)ttm; 1221 + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); 1224 1222 unsigned long size; 1225 1223 1226 1224 if (gtt == NULL || !gtt->userptr) ··· 1243 1241 */ 1244 1242 bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm) 1245 1243 { 1246 - struct amdgpu_ttm_tt *gtt = (void *)ttm; 1244 + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); 1247 1245 1248 1246 if (gtt == NULL || !gtt->userptr) 1249 1247 return false; ··· 1256 1254 */ 1257 1255 bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm) 1258 1256 { 1259 - struct amdgpu_ttm_tt *gtt = (void *)ttm; 1257 + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); 1260 1258 1261 1259 if (gtt == NULL) 1262 1260 return false;
+1 -2
drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
··· 496 496 adev_to_drm(adev)->mode_config.max_height = YRES_MAX; 497 497 498 498 adev_to_drm(adev)->mode_config.preferred_depth = 24; 499 - /* disable prefer shadow for now due to hibernation issues */ 500 - adev_to_drm(adev)->mode_config.prefer_shadow = 0; 499 + adev_to_drm(adev)->mode_config.prefer_shadow = 1; 501 500 502 501 adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; 503 502
+37 -5
drivers/gpu/drm/amd/amdgpu/athub_v3_0.c
··· 28 28 #include "navi10_enum.h" 29 29 #include "soc15_common.h" 30 30 31 + #define regATHUB_MISC_CNTL_V3_0_1 0x00d7 32 + #define regATHUB_MISC_CNTL_V3_0_1_BASE_IDX 0 33 + 34 + 35 + static uint32_t athub_v3_0_get_cg_cntl(struct amdgpu_device *adev) 36 + { 37 + uint32_t data; 38 + 39 + switch (adev->ip_versions[ATHUB_HWIP][0]) { 40 + case IP_VERSION(3, 0, 1): 41 + data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1); 42 + break; 43 + default: 44 + data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL); 45 + break; 46 + } 47 + return data; 48 + } 49 + 50 + static void athub_v3_0_set_cg_cntl(struct amdgpu_device *adev, uint32_t data) 51 + { 52 + switch (adev->ip_versions[ATHUB_HWIP][0]) { 53 + case IP_VERSION(3, 0, 1): 54 + WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1, data); 55 + break; 56 + default: 57 + WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data); 58 + break; 59 + } 60 + } 61 + 31 62 static void 32 63 athub_v3_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 33 64 bool enable) 34 65 { 35 66 uint32_t def, data; 36 67 37 - def = data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL); 68 + def = data = athub_v3_0_get_cg_cntl(adev); 38 69 39 70 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_MGCG)) 40 71 data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK; ··· 73 42 data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK; 74 43 75 44 if (def != data) 76 - WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data); 45 + athub_v3_0_set_cg_cntl(adev, data); 77 46 } 78 47 79 48 static void ··· 82 51 { 83 52 uint32_t def, data; 84 53 85 - def = data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL); 54 + def = data = athub_v3_0_get_cg_cntl(adev); 86 55 87 56 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_LS)) 88 57 data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; ··· 90 59 data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; 91 60 92 61 if (def != data) 93 - WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data); 62 + athub_v3_0_set_cg_cntl(adev, data); 94 63 } 95 64 96 65 int athub_v3_0_set_clockgating(struct amdgpu_device *adev, ··· 101 70 102 71 switch (adev->ip_versions[ATHUB_HWIP][0]) { 103 72 case IP_VERSION(3, 0, 0): 73 + case IP_VERSION(3, 0, 1): 104 74 case IP_VERSION(3, 0, 2): 105 75 athub_v3_0_update_medium_grain_clock_gating(adev, 106 76 state == AMD_CG_STATE_GATE); ··· 120 88 int data; 121 89 122 90 /* AMD_CG_SUPPORT_ATHUB_MGCG */ 123 - data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL); 91 + data = athub_v3_0_get_cg_cntl(adev); 124 92 if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK) 125 93 *flags |= AMD_CG_SUPPORT_ATHUB_MGCG; 126 94
+1 -2
drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
··· 2796 2796 adev_to_drm(adev)->mode_config.max_height = 16384; 2797 2797 2798 2798 adev_to_drm(adev)->mode_config.preferred_depth = 24; 2799 - /* disable prefer shadow for now due to hibernation issues */ 2800 - adev_to_drm(adev)->mode_config.prefer_shadow = 0; 2799 + adev_to_drm(adev)->mode_config.prefer_shadow = 1; 2801 2800 2802 2801 adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; 2803 2802
+1 -2
drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
··· 2914 2914 adev_to_drm(adev)->mode_config.max_height = 16384; 2915 2915 2916 2916 adev_to_drm(adev)->mode_config.preferred_depth = 24; 2917 - /* disable prefer shadow for now due to hibernation issues */ 2918 - adev_to_drm(adev)->mode_config.prefer_shadow = 0; 2917 + adev_to_drm(adev)->mode_config.prefer_shadow = 1; 2919 2918 2920 2919 adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; 2921 2920
+1 -2
drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
··· 2673 2673 adev_to_drm(adev)->mode_config.max_width = 16384; 2674 2674 adev_to_drm(adev)->mode_config.max_height = 16384; 2675 2675 adev_to_drm(adev)->mode_config.preferred_depth = 24; 2676 - /* disable prefer shadow for now due to hibernation issues */ 2677 - adev_to_drm(adev)->mode_config.prefer_shadow = 0; 2676 + adev_to_drm(adev)->mode_config.prefer_shadow = 1; 2678 2677 adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; 2679 2678 adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; 2680 2679
+5 -2
drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
··· 2693 2693 adev_to_drm(adev)->mode_config.max_height = 16384; 2694 2694 2695 2695 adev_to_drm(adev)->mode_config.preferred_depth = 24; 2696 - /* disable prefer shadow for now due to hibernation issues */ 2697 - adev_to_drm(adev)->mode_config.prefer_shadow = 0; 2696 + if (adev->asic_type == CHIP_HAWAII) 2697 + /* disable prefer shadow for now due to hibernation issues */ 2698 + adev_to_drm(adev)->mode_config.prefer_shadow = 0; 2699 + else 2700 + adev_to_drm(adev)->mode_config.prefer_shadow = 1; 2698 2701 2699 2702 adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; 2700 2703
+1 -1
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
··· 4846 4846 case IP_VERSION(10, 3, 3): 4847 4847 case IP_VERSION(10, 3, 7): 4848 4848 adev->gfx.me.num_me = 1; 4849 - adev->gfx.me.num_pipe_per_me = 2; 4849 + adev->gfx.me.num_pipe_per_me = 1; 4850 4850 adev->gfx.me.num_queue_per_pipe = 1; 4851 4851 adev->gfx.mec.num_mec = 2; 4852 4852 adev->gfx.mec.num_pipe_per_mec = 4;
+39
drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
··· 53 53 #define GFX11_MEC_HPD_SIZE 2048 54 54 55 55 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 56 + #define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1 0x1388 56 57 57 58 #define regCGTT_WD_CLK_CTRL 0x5086 58 59 #define regCGTT_WD_CLK_CTRL_BASE_IDX 1 ··· 5280 5279 .update_spm_vmid = gfx_v11_0_update_spm_vmid, 5281 5280 }; 5282 5281 5282 + static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable) 5283 + { 5284 + u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL); 5285 + 5286 + if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) 5287 + data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; 5288 + else 5289 + data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; 5290 + 5291 + WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data); 5292 + 5293 + // Program RLC_PG_DELAY3 for CGPG hysteresis 5294 + if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) { 5295 + switch (adev->ip_versions[GC_HWIP][0]) { 5296 + case IP_VERSION(11, 0, 1): 5297 + WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1); 5298 + break; 5299 + default: 5300 + break; 5301 + } 5302 + } 5303 + } 5304 + 5305 + static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable) 5306 + { 5307 + amdgpu_gfx_rlc_enter_safe_mode(adev); 5308 + 5309 + gfx_v11_cntl_power_gating(adev, enable); 5310 + 5311 + amdgpu_gfx_rlc_exit_safe_mode(adev); 5312 + } 5313 + 5283 5314 static int gfx_v11_0_set_powergating_state(void *handle, 5284 5315 enum amd_powergating_state state) 5285 5316 { ··· 5325 5292 case IP_VERSION(11, 0, 0): 5326 5293 case IP_VERSION(11, 0, 2): 5327 5294 amdgpu_gfx_off_ctrl(adev, enable); 5295 + break; 5296 + case IP_VERSION(11, 0, 1): 5297 + gfx_v11_cntl_pg(adev, enable); 5298 + /* TODO: Enable this when GFXOFF is ready */ 5299 + // amdgpu_gfx_off_ctrl(adev, enable); 5328 5300 break; 5329 5301 default: 5330 5302 break; ··· 5348 5310 5349 5311 switch (adev->ip_versions[GC_HWIP][0]) { 5350 5312 case IP_VERSION(11, 0, 0): 5313 + case IP_VERSION(11, 0, 1): 5351 5314 case IP_VERSION(11, 0, 2): 5352 5315 gfx_v11_0_update_gfx_clock_gating(adev, 5353 5316 state == AMD_CG_STATE_GATE);
+2 -1
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
··· 419 419 uint32_t seq; 420 420 uint16_t queried_pasid; 421 421 bool ret; 422 + u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout; 422 423 struct amdgpu_ring *ring = &adev->gfx.kiq.ring; 423 424 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 424 425 ··· 438 437 439 438 amdgpu_ring_commit(ring); 440 439 spin_unlock(&adev->gfx.kiq.ring_lock); 441 - r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); 440 + r = amdgpu_fence_wait_polling(ring, seq, usec_timeout); 442 441 if (r < 1) { 443 442 dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); 444 443 return -ETIME;
+5 -1
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
··· 896 896 uint32_t seq; 897 897 uint16_t queried_pasid; 898 898 bool ret; 899 + u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout; 899 900 struct amdgpu_ring *ring = &adev->gfx.kiq.ring; 900 901 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 901 902 ··· 936 935 937 936 amdgpu_ring_commit(ring); 938 937 spin_unlock(&adev->gfx.kiq.ring_lock); 939 - r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); 938 + r = amdgpu_fence_wait_polling(ring, seq, usec_timeout); 940 939 if (r < 1) { 941 940 dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); 942 941 up_read(&adev->reset_domain->sem); ··· 1625 1624 amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 47); 1626 1625 else 1627 1626 amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); 1627 + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) 1628 + adev->gmc.translate_further = adev->vm_manager.num_level > 1; 1628 1629 break; 1629 1630 case IP_VERSION(9, 4, 1): 1630 1631 adev->num_vmhubs = 3; 1631 1632 1632 1633 /* Keep the vm size same with Vega20 */ 1633 1634 amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); 1635 + adev->gmc.translate_further = adev->vm_manager.num_level > 1; 1634 1636 break; 1635 1637 default: 1636 1638 break;
+150
drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c
··· 40 40 0); 41 41 } 42 42 43 + static void hdp_v5_2_update_mem_power_gating(struct amdgpu_device *adev, 44 + bool enable) 45 + { 46 + uint32_t hdp_clk_cntl; 47 + uint32_t hdp_mem_pwr_cntl; 48 + 49 + if (!(adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | 50 + AMD_CG_SUPPORT_HDP_DS | 51 + AMD_CG_SUPPORT_HDP_SD))) 52 + return; 53 + 54 + hdp_clk_cntl = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL); 55 + hdp_mem_pwr_cntl = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL); 56 + 57 + /* Before doing clock/power mode switch, forced on MEM clock */ 58 + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, 59 + ATOMIC_MEM_CLK_SOFT_OVERRIDE, 1); 60 + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, 61 + RC_MEM_CLK_SOFT_OVERRIDE, 1); 62 + WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl); 63 + 64 + /* disable clock and power gating before any changing */ 65 + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, 66 + ATOMIC_MEM_POWER_CTRL_EN, 0); 67 + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, 68 + ATOMIC_MEM_POWER_LS_EN, 0); 69 + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, 70 + ATOMIC_MEM_POWER_DS_EN, 0); 71 + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, 72 + ATOMIC_MEM_POWER_SD_EN, 0); 73 + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, 74 + RC_MEM_POWER_CTRL_EN, 0); 75 + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, 76 + RC_MEM_POWER_LS_EN, 0); 77 + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, 78 + RC_MEM_POWER_DS_EN, 0); 79 + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, 80 + RC_MEM_POWER_SD_EN, 0); 81 + WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl); 82 + 83 + /* Already disabled above. The actions below are for "enabled" only */ 84 + if (enable) { 85 + /* only one clock gating mode (LS/DS/SD) can be enabled */ 86 + if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) { 87 + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, 88 + HDP_MEM_POWER_CTRL, 89 + ATOMIC_MEM_POWER_SD_EN, 1); 90 + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, 91 + HDP_MEM_POWER_CTRL, 92 + RC_MEM_POWER_SD_EN, 1); 93 + } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) { 94 + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, 95 + HDP_MEM_POWER_CTRL, 96 + ATOMIC_MEM_POWER_LS_EN, 1); 97 + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, 98 + HDP_MEM_POWER_CTRL, 99 + RC_MEM_POWER_LS_EN, 1); 100 + } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) { 101 + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, 102 + HDP_MEM_POWER_CTRL, 103 + ATOMIC_MEM_POWER_DS_EN, 1); 104 + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, 105 + HDP_MEM_POWER_CTRL, 106 + RC_MEM_POWER_DS_EN, 1); 107 + } 108 + 109 + /* confirmed that ATOMIC/RC_MEM_POWER_CTRL_EN have to be set for SRAM LS/DS/SD */ 110 + if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS | 111 + AMD_CG_SUPPORT_HDP_SD)) { 112 + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, 113 + ATOMIC_MEM_POWER_CTRL_EN, 1); 114 + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, 115 + RC_MEM_POWER_CTRL_EN, 1); 116 + WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl); 117 + } 118 + } 119 + 120 + /* disable MEM clock override after clock/power mode changing */ 121 + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, 122 + ATOMIC_MEM_CLK_SOFT_OVERRIDE, 0); 123 + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, 124 + RC_MEM_CLK_SOFT_OVERRIDE, 0); 125 + WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl); 126 + } 127 + 128 + static void hdp_v5_2_update_medium_grain_clock_gating(struct amdgpu_device *adev, 129 + bool enable) 130 + { 131 + uint32_t hdp_clk_cntl; 132 + 133 + if (!(adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG)) 134 + return; 135 + 136 + hdp_clk_cntl = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL); 137 + 138 + if (enable) { 139 + hdp_clk_cntl &= 140 + ~(uint32_t) 141 + (HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK | 142 + HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK | 143 + HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK | 144 + HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK | 145 + HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK | 146 + HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK); 147 + } else { 148 + hdp_clk_cntl |= HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK | 149 + HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK | 150 + HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK | 151 + HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK | 152 + HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK | 153 + HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK; 154 + } 155 + 156 + WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl); 157 + } 158 + 159 + static void hdp_v5_2_get_clockgating_state(struct amdgpu_device *adev, 160 + u64 *flags) 161 + { 162 + uint32_t tmp; 163 + 164 + /* AMD_CG_SUPPORT_HDP_MGCG */ 165 + tmp = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL); 166 + if (!(tmp & (HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK | 167 + HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK | 168 + HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK | 169 + HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK | 170 + HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK | 171 + HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK))) 172 + *flags |= AMD_CG_SUPPORT_HDP_MGCG; 173 + 174 + /* AMD_CG_SUPPORT_HDP_LS/DS/SD */ 175 + tmp = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL); 176 + if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_LS_EN_MASK) 177 + *flags |= AMD_CG_SUPPORT_HDP_LS; 178 + else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_DS_EN_MASK) 179 + *flags |= AMD_CG_SUPPORT_HDP_DS; 180 + else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_SD_EN_MASK) 181 + *flags |= AMD_CG_SUPPORT_HDP_SD; 182 + } 183 + 184 + static void hdp_v5_2_update_clock_gating(struct amdgpu_device *adev, 185 + bool enable) 186 + { 187 + hdp_v5_2_update_mem_power_gating(adev, enable); 188 + hdp_v5_2_update_medium_grain_clock_gating(adev, enable); 189 + } 190 + 43 191 const struct amdgpu_hdp_funcs hdp_v5_2_funcs = { 44 192 .flush_hdp = hdp_v5_2_flush_hdp, 193 + .update_clock_gating = hdp_v5_2_update_clock_gating, 194 + .get_clock_gating_state = hdp_v5_2_get_clockgating_state, 45 195 };
+1
drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
··· 727 727 static const struct amdgpu_ih_funcs ih_v6_0_funcs = { 728 728 .get_wptr = ih_v6_0_get_wptr, 729 729 .decode_iv = amdgpu_ih_decode_iv_helper, 730 + .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper, 730 731 .set_rptr = ih_v6_0_set_rptr 731 732 }; 732 733
+39 -3
drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
··· 518 518 static void mmhub_v3_0_1_update_medium_grain_clock_gating(struct amdgpu_device *adev, 519 519 bool enable) 520 520 { 521 - //TODO 521 + uint32_t def, data; 522 + 523 + def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG); 524 + 525 + if (enable) 526 + data |= MM_ATC_L2_MISC_CG__ENABLE_MASK; 527 + else 528 + data &= ~MM_ATC_L2_MISC_CG__ENABLE_MASK; 529 + 530 + if (def != data) 531 + WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data); 522 532 } 523 533 524 534 static void mmhub_v3_0_1_update_medium_grain_light_sleep(struct amdgpu_device *adev, 525 535 bool enable) 526 536 { 527 - //TODO 537 + uint32_t def, data; 538 + 539 + def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG); 540 + 541 + if (enable) 542 + data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; 543 + else 544 + data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; 545 + 546 + if (def != data) 547 + WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data); 528 548 } 529 549 530 550 static int mmhub_v3_0_1_set_clockgating(struct amdgpu_device *adev, 531 551 enum amd_clockgating_state state) 532 552 { 553 + if (amdgpu_sriov_vf(adev)) 554 + return 0; 555 + 533 556 mmhub_v3_0_1_update_medium_grain_clock_gating(adev, 534 557 state == AMD_CG_STATE_GATE); 535 558 mmhub_v3_0_1_update_medium_grain_light_sleep(adev, ··· 562 539 563 540 static void mmhub_v3_0_1_get_clockgating(struct amdgpu_device *adev, u64 *flags) 564 541 { 565 - //TODO 542 + int data; 543 + 544 + if (amdgpu_sriov_vf(adev)) 545 + *flags = 0; 546 + 547 + data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG); 548 + 549 + /* AMD_CG_SUPPORT_MC_MGCG */ 550 + if (data & MM_ATC_L2_MISC_CG__ENABLE_MASK) 551 + *flags |= AMD_CG_SUPPORT_MC_MGCG; 552 + 553 + /* AMD_CG_SUPPORT_MC_LS */ 554 + if (data & MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK) 555 + *flags |= AMD_CG_SUPPORT_MC_LS; 566 556 } 567 557 568 558 const struct amdgpu_mmhub_funcs mmhub_v3_0_1_funcs = {
+6 -1
drivers/gpu/drm/amd/amdgpu/navi10_ih.c
··· 409 409 u32 wptr, tmp; 410 410 struct amdgpu_ih_regs *ih_regs; 411 411 412 - if (ih == &adev->irq.ih) { 412 + if (ih == &adev->irq.ih || ih == &adev->irq.ih_soft) { 413 413 /* Only ring0 supports writeback. On other rings fall back 414 414 * to register-based code with overflow checking below. 415 + * ih_soft ring doesn't have any backing hardware registers, 416 + * update wptr and return. 415 417 */ 416 418 wptr = le32_to_cpu(*ih->wptr_cpu); 417 419 ··· 484 482 struct amdgpu_ih_ring *ih) 485 483 { 486 484 struct amdgpu_ih_regs *ih_regs; 485 + 486 + if (ih == &adev->irq.ih_soft) 487 + return; 487 488 488 489 if (ih->use_doorbell) { 489 490 /* XXX check if swapping is necessary on BE */
+10
drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
··· 101 101 adev->psp.dtm_context.context.bin_desc.start_addr = 102 102 (uint8_t *)adev->psp.hdcp_context.context.bin_desc.start_addr + 103 103 le32_to_cpu(ta_hdr->dtm.offset_bytes); 104 + 105 + if (adev->apu_flags & AMD_APU_IS_RENOIR) { 106 + adev->psp.securedisplay_context.context.bin_desc.fw_version = 107 + le32_to_cpu(ta_hdr->securedisplay.fw_version); 108 + adev->psp.securedisplay_context.context.bin_desc.size_bytes = 109 + le32_to_cpu(ta_hdr->securedisplay.size_bytes); 110 + adev->psp.securedisplay_context.context.bin_desc.start_addr = 111 + (uint8_t *)adev->psp.hdcp_context.context.bin_desc.start_addr + 112 + le32_to_cpu(ta_hdr->securedisplay.offset_bytes); 113 + } 104 114 } 105 115 106 116 return 0;
-1
drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
··· 20 20 * OTHER DEALINGS IN THE SOFTWARE. 21 21 * 22 22 */ 23 - #include <linux/dev_printk.h> 24 23 #include <drm/drm_drv.h> 25 24 #include <linux/vmalloc.h> 26 25 #include "amdgpu.h"
+24 -1
drivers/gpu/drm/amd/amdgpu/soc21.c
··· 546 546 case IP_VERSION(11, 0, 0): 547 547 adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG | 548 548 AMD_CG_SUPPORT_GFX_CGLS | 549 + #if 0 549 550 AMD_CG_SUPPORT_GFX_3D_CGCG | 550 551 AMD_CG_SUPPORT_GFX_3D_CGLS | 552 + #endif 551 553 AMD_CG_SUPPORT_GFX_MGCG | 552 554 AMD_CG_SUPPORT_REPEATER_FGCG | 553 555 AMD_CG_SUPPORT_GFX_FGCG | ··· 577 575 AMD_CG_SUPPORT_VCN_MGCG | 578 576 AMD_CG_SUPPORT_JPEG_MGCG | 579 577 AMD_CG_SUPPORT_ATHUB_MGCG | 580 - AMD_CG_SUPPORT_ATHUB_LS; 578 + AMD_CG_SUPPORT_ATHUB_LS | 579 + AMD_CG_SUPPORT_IH_CG | 580 + AMD_CG_SUPPORT_HDP_SD; 581 581 adev->pg_flags = 582 582 AMD_PG_SUPPORT_VCN | 583 583 AMD_PG_SUPPORT_VCN_DPG | ··· 590 586 break; 591 587 case IP_VERSION(11, 0, 1): 592 588 adev->cg_flags = 589 + AMD_CG_SUPPORT_GFX_CGCG | 590 + AMD_CG_SUPPORT_GFX_CGLS | 591 + AMD_CG_SUPPORT_GFX_MGCG | 592 + AMD_CG_SUPPORT_GFX_FGCG | 593 + AMD_CG_SUPPORT_REPEATER_FGCG | 594 + AMD_CG_SUPPORT_GFX_PERF_CLK | 595 + AMD_CG_SUPPORT_MC_MGCG | 596 + AMD_CG_SUPPORT_MC_LS | 597 + AMD_CG_SUPPORT_HDP_MGCG | 598 + AMD_CG_SUPPORT_HDP_LS | 599 + AMD_CG_SUPPORT_ATHUB_MGCG | 600 + AMD_CG_SUPPORT_ATHUB_LS | 601 + AMD_CG_SUPPORT_IH_CG | 593 602 AMD_CG_SUPPORT_VCN_MGCG | 594 603 AMD_CG_SUPPORT_JPEG_MGCG; 595 604 adev->pg_flags = 605 + AMD_PG_SUPPORT_GFX_PG | 596 606 AMD_PG_SUPPORT_JPEG; 597 607 adev->external_rev_id = adev->rev_id + 0x1; 598 608 break; ··· 701 683 702 684 switch (adev->ip_versions[NBIO_HWIP][0]) { 703 685 case IP_VERSION(4, 3, 0): 686 + case IP_VERSION(4, 3, 1): 704 687 adev->nbio.funcs->update_medium_grain_clock_gating(adev, 705 688 state == AMD_CG_STATE_GATE); 706 689 adev->nbio.funcs->update_medium_grain_light_sleep(adev, 707 690 state == AMD_CG_STATE_GATE); 691 + adev->hdp.funcs->update_clock_gating(adev, 692 + state == AMD_CG_STATE_GATE); 693 + break; 694 + case IP_VERSION(7, 7, 0): 708 695 adev->hdp.funcs->update_clock_gating(adev, 709 696 state == AMD_CG_STATE_GATE); 710 697 break;
+2 -3
drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
··· 1115 1115 * 1116 1116 * Stop VCN block with dpg mode 1117 1117 */ 1118 - static int vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) 1118 + static void vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) 1119 1119 { 1120 1120 uint32_t tmp; 1121 1121 ··· 1133 1133 /* disable dynamic power gating mode */ 1134 1134 WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 0, 1135 1135 ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); 1136 - return 0; 1137 1136 } 1138 1137 1139 1138 /** ··· 1153 1154 fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; 1154 1155 1155 1156 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { 1156 - r = vcn_v4_0_stop_dpg_mode(adev, i); 1157 + vcn_v4_0_stop_dpg_mode(adev, i); 1157 1158 continue; 1158 1159 } 1159 1160
+6 -1
drivers/gpu/drm/amd/amdgpu/vega10_ih.c
··· 334 334 u32 wptr, tmp; 335 335 struct amdgpu_ih_regs *ih_regs; 336 336 337 - if (ih == &adev->irq.ih) { 337 + if (ih == &adev->irq.ih || ih == &adev->irq.ih_soft) { 338 338 /* Only ring0 supports writeback. On other rings fall back 339 339 * to register-based code with overflow checking below. 340 + * ih_soft ring doesn't have any backing hardware registers, 341 + * update wptr and return. 340 342 */ 341 343 wptr = le32_to_cpu(*ih->wptr_cpu); 342 344 ··· 410 408 struct amdgpu_ih_ring *ih) 411 409 { 412 410 struct amdgpu_ih_regs *ih_regs; 411 + 412 + if (ih == &adev->irq.ih_soft) 413 + return; 413 414 414 415 if (ih->use_doorbell) { 415 416 /* XXX check if swapping is necessary on BE */
+6 -1
drivers/gpu/drm/amd/amdgpu/vega20_ih.c
··· 385 385 u32 wptr, tmp; 386 386 struct amdgpu_ih_regs *ih_regs; 387 387 388 - if (ih == &adev->irq.ih) { 388 + if (ih == &adev->irq.ih || ih == &adev->irq.ih_soft) { 389 389 /* Only ring0 supports writeback. On other rings fall back 390 390 * to register-based code with overflow checking below. 391 + * ih_soft ring doesn't have any backing hardware registers, 392 + * update wptr and return. 391 393 */ 392 394 wptr = le32_to_cpu(*ih->wptr_cpu); 393 395 ··· 462 460 struct amdgpu_ih_ring *ih) 463 461 { 464 462 struct amdgpu_ih_regs *ih_regs; 463 + 464 + if (ih == &adev->irq.ih_soft) 465 + return; 465 466 466 467 if (ih->use_doorbell) { 467 468 /* XXX check if swapping is necessary on BE */
+1 -1
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
··· 874 874 err = kfd_wait_on_events(p, args->num_events, 875 875 (void __user *)args->events_ptr, 876 876 (args->wait_for_all != 0), 877 - args->timeout, &args->wait_result); 877 + &args->timeout, &args->wait_result); 878 878 879 879 return err; 880 880 }
+6 -1
drivers/gpu/drm/amd/amdkfd/kfd_device.c
··· 102 102 103 103 switch (sdma_version) { 104 104 case IP_VERSION(6, 0, 0): 105 - case IP_VERSION(6, 0, 1): 106 105 case IP_VERSION(6, 0, 2): 107 106 /* Reserve 1 for paging and 1 for gfx */ 108 107 kfd->device_info.num_reserved_sdma_queues_per_engine = 2; 109 108 /* BIT(0)=engine-0 queue-0; BIT(1)=engine-1 queue-0; BIT(2)=engine-0 queue-1; ... */ 110 109 kfd->device_info.reserved_sdma_queues_bitmap = 0xFULL; 110 + break; 111 + case IP_VERSION(6, 0, 1): 112 + /* Reserve 1 for paging and 1 for gfx */ 113 + kfd->device_info.num_reserved_sdma_queues_per_engine = 2; 114 + /* BIT(0)=engine-0 queue-0; BIT(1)=engine-0 queue-1; ... */ 115 + kfd->device_info.reserved_sdma_queues_bitmap = 0x3ULL; 111 116 break; 112 117 default: 113 118 break;
+12 -12
drivers/gpu/drm/amd/amdkfd/kfd_events.c
··· 894 894 return msecs_to_jiffies(user_timeout_ms) + 1; 895 895 } 896 896 897 - static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters) 897 + static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters, 898 + bool undo_auto_reset) 898 899 { 899 900 uint32_t i; 900 901 ··· 904 903 spin_lock(&waiters[i].event->lock); 905 904 remove_wait_queue(&waiters[i].event->wq, 906 905 &waiters[i].wait); 906 + if (undo_auto_reset && waiters[i].activated && 907 + waiters[i].event && waiters[i].event->auto_reset) 908 + set_event(waiters[i].event); 907 909 spin_unlock(&waiters[i].event->lock); 908 910 } 909 911 ··· 915 911 916 912 int kfd_wait_on_events(struct kfd_process *p, 917 913 uint32_t num_events, void __user *data, 918 - bool all, uint32_t user_timeout_ms, 914 + bool all, uint32_t *user_timeout_ms, 919 915 uint32_t *wait_result) 920 916 { 921 917 struct kfd_event_data __user *events = ··· 924 920 int ret = 0; 925 921 926 922 struct kfd_event_waiter *event_waiters = NULL; 927 - long timeout = user_timeout_to_jiffies(user_timeout_ms); 923 + long timeout = user_timeout_to_jiffies(*user_timeout_ms); 928 924 929 925 event_waiters = alloc_event_waiters(num_events); 930 926 if (!event_waiters) { ··· 974 970 } 975 971 976 972 if (signal_pending(current)) { 977 - /* 978 - * This is wrong when a nonzero, non-infinite timeout 979 - * is specified. We need to use 980 - * ERESTARTSYS_RESTARTBLOCK, but struct restart_block 981 - * contains a union with data for each user and it's 982 - * in generic kernel code that I don't want to 983 - * touch yet. 984 - */ 985 973 ret = -ERESTARTSYS; 974 + if (*user_timeout_ms != KFD_EVENT_TIMEOUT_IMMEDIATE && 975 + *user_timeout_ms != KFD_EVENT_TIMEOUT_INFINITE) 976 + *user_timeout_ms = jiffies_to_msecs( 977 + max(0l, timeout-1)); 986 978 break; 987 979 } 988 980 ··· 1019 1019 event_waiters, events); 1020 1020 1021 1021 out_unlock: 1022 - free_waiters(num_events, event_waiters); 1022 + free_waiters(num_events, event_waiters, ret == -ERESTARTSYS); 1023 1023 mutex_unlock(&p->event_mutex); 1024 1024 out: 1025 1025 if (ret)
+1 -1
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
··· 1317 1317 int kfd_event_mmap(struct kfd_process *process, struct vm_area_struct *vma); 1318 1318 int kfd_wait_on_events(struct kfd_process *p, 1319 1319 uint32_t num_events, void __user *data, 1320 - bool all, uint32_t user_timeout_ms, 1320 + bool all, uint32_t *user_timeout_ms, 1321 1321 uint32_t *wait_result); 1322 1322 void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id, 1323 1323 uint32_t valid_id_bits);
+7 -10
drivers/gpu/drm/amd/amdkfd/kfd_svm.c
··· 541 541 kfree(svm_bo); 542 542 return -ESRCH; 543 543 } 544 - svm_bo->svms = prange->svms; 545 544 svm_bo->eviction_fence = 546 545 amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), 547 546 mm, ··· 3272 3273 static void svm_range_evict_svm_bo_worker(struct work_struct *work) 3273 3274 { 3274 3275 struct svm_range_bo *svm_bo; 3275 - struct kfd_process *p; 3276 3276 struct mm_struct *mm; 3277 3277 int r = 0; 3278 3278 ··· 3279 3281 if (!svm_bo_ref_unless_zero(svm_bo)) 3280 3282 return; /* svm_bo was freed while eviction was pending */ 3281 3283 3282 - /* svm_range_bo_release destroys this worker thread. So during 3283 - * the lifetime of this thread, kfd_process and mm will be valid. 3284 - */ 3285 - p = container_of(svm_bo->svms, struct kfd_process, svms); 3286 - mm = p->mm; 3287 - if (!mm) 3284 + if (mmget_not_zero(svm_bo->eviction_fence->mm)) { 3285 + mm = svm_bo->eviction_fence->mm; 3286 + } else { 3287 + svm_range_bo_unref(svm_bo); 3288 3288 return; 3289 + } 3289 3290 3290 3291 mmap_read_lock(mm); 3291 3292 spin_lock(&svm_bo->list_lock); ··· 3302 3305 3303 3306 mutex_lock(&prange->migrate_mutex); 3304 3307 do { 3305 - r = svm_migrate_vram_to_ram(prange, 3306 - svm_bo->eviction_fence->mm, 3308 + r = svm_migrate_vram_to_ram(prange, mm, 3307 3309 KFD_MIGRATE_TRIGGER_TTM_EVICTION); 3308 3310 } while (!r && prange->actual_loc && --retries); 3309 3311 ··· 3320 3324 } 3321 3325 spin_unlock(&svm_bo->list_lock); 3322 3326 mmap_read_unlock(mm); 3327 + mmput(mm); 3323 3328 3324 3329 dma_fence_signal(&svm_bo->eviction_fence->base); 3325 3330
-1
drivers/gpu/drm/amd/amdkfd/kfd_svm.h
··· 46 46 spinlock_t list_lock; 47 47 struct amdgpu_amdkfd_fence *eviction_fence; 48 48 struct work_struct eviction_work; 49 - struct svm_range_list *svms; 50 49 uint32_t evicting; 51 50 struct work_struct release_work; 52 51 };
+7 -4
drivers/gpu/drm/amd/amdkfd/kfd_topology.c
··· 1392 1392 1393 1393 static int kfd_create_indirect_link_prop(struct kfd_topology_device *kdev, int gpu_node) 1394 1394 { 1395 + struct kfd_iolink_properties *gpu_link, *tmp_link, *cpu_link; 1395 1396 struct kfd_iolink_properties *props = NULL, *props2 = NULL; 1396 - struct kfd_iolink_properties *gpu_link, *cpu_link; 1397 1397 struct kfd_topology_device *cpu_dev; 1398 1398 int ret = 0; 1399 1399 int i, num_cpu; ··· 1416 1416 continue; 1417 1417 1418 1418 /* find CPU <--> CPU links */ 1419 + cpu_link = NULL; 1419 1420 cpu_dev = kfd_topology_device_by_proximity_domain(i); 1420 1421 if (cpu_dev) { 1421 - list_for_each_entry(cpu_link, 1422 + list_for_each_entry(tmp_link, 1422 1423 &cpu_dev->io_link_props, list) { 1423 - if (cpu_link->node_to == gpu_link->node_to) 1424 + if (tmp_link->node_to == gpu_link->node_to) { 1425 + cpu_link = tmp_link; 1424 1426 break; 1427 + } 1425 1428 } 1426 1429 } 1427 1430 1428 - if (cpu_link->node_to != gpu_link->node_to) 1431 + if (!cpu_link) 1429 1432 return -ENOMEM; 1430 1433 1431 1434 /* CPU <--> CPU <--> GPU, GPU node*/
+11 -4
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
··· 3825 3825 adev_to_drm(adev)->mode_config.max_height = 16384; 3826 3826 3827 3827 adev_to_drm(adev)->mode_config.preferred_depth = 24; 3828 - /* disable prefer shadow for now due to hibernation issues */ 3829 - adev_to_drm(adev)->mode_config.prefer_shadow = 0; 3828 + if (adev->asic_type == CHIP_HAWAII) 3829 + /* disable prefer shadow for now due to hibernation issues */ 3830 + adev_to_drm(adev)->mode_config.prefer_shadow = 0; 3831 + else 3832 + adev_to_drm(adev)->mode_config.prefer_shadow = 1; 3830 3833 /* indicates support for immediate flip */ 3831 3834 adev_to_drm(adev)->mode_config.async_page_flip = true; 3832 3835 ··· 4138 4135 } 4139 4136 } 4140 4137 4138 + static void amdgpu_set_panel_orientation(struct drm_connector *connector); 4141 4139 4142 4140 /* 4143 4141 * In this architecture, the association ··· 4330 4326 adev_to_drm(adev)->vblank_disable_immediate = false; 4331 4327 } 4332 4328 } 4329 + amdgpu_set_panel_orientation(&aconnector->base); 4333 4330 } 4334 4331 4335 4332 /* Software is initialized. Now we can register interrupt handlers. */ ··· 6689 6684 connector->connector_type != DRM_MODE_CONNECTOR_LVDS) 6690 6685 return; 6691 6686 6687 + mutex_lock(&connector->dev->mode_config.mutex); 6688 + amdgpu_dm_connector_get_modes(connector); 6689 + mutex_unlock(&connector->dev->mode_config.mutex); 6690 + 6692 6691 encoder = amdgpu_dm_connector_to_encoder(connector); 6693 6692 if (!encoder) 6694 6693 return; ··· 6737 6728 * restored here. 6738 6729 */ 6739 6730 amdgpu_dm_update_freesync_caps(connector, edid); 6740 - 6741 - amdgpu_set_panel_orientation(connector); 6742 6731 } else { 6743 6732 amdgpu_dm_connector->num_modes = 0; 6744 6733 }
+2 -2
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
··· 660 660 add_gfx10_1_modifiers(adev, mods, &size, &capacity); 661 661 break; 662 662 case AMDGPU_FAMILY_GC_11_0_0: 663 - case AMDGPU_FAMILY_GC_11_0_2: 663 + case AMDGPU_FAMILY_GC_11_0_1: 664 664 add_gfx11_modifiers(adev, mods, &size, &capacity); 665 665 break; 666 666 } ··· 1412 1412 } 1413 1413 break; 1414 1414 case AMDGPU_FAMILY_GC_11_0_0: 1415 - case AMDGPU_FAMILY_GC_11_0_2: 1415 + case AMDGPU_FAMILY_GC_11_0_1: 1416 1416 switch (AMD_FMT_MOD_GET(TILE, modifier)) { 1417 1417 case AMD_FMT_MOD_TILE_GFX11_256K_R_X: 1418 1418 case AMD_FMT_MOD_TILE_GFX9_64K_R_X:
+21
drivers/gpu/drm/amd/display/dc/basics/conversion.c
··· 100 100 matrix[i] = (uint16_t)reg_value; 101 101 } 102 102 } 103 + 104 + static uint32_t find_gcd(uint32_t a, uint32_t b) 105 + { 106 + uint32_t remainder = 0; 107 + while (b != 0) { 108 + remainder = a % b; 109 + a = b; 110 + b = remainder; 111 + } 112 + return a; 113 + } 114 + 115 + void reduce_fraction(uint32_t num, uint32_t den, 116 + uint32_t *out_num, uint32_t *out_den) 117 + { 118 + uint32_t gcd = 0; 119 + 120 + gcd = find_gcd(num, den); 121 + *out_num = num / gcd; 122 + *out_den = den / gcd; 123 + }
+3
drivers/gpu/drm/amd/display/dc/basics/conversion.h
··· 38 38 struct fixed31_32 *flt, 39 39 uint32_t buffer_size); 40 40 41 + void reduce_fraction(uint32_t num, uint32_t den, 42 + uint32_t *out_num, uint32_t *out_den); 43 + 41 44 static inline unsigned int log_2(unsigned int num) 42 45 { 43 46 return ilog2(num);
+2 -2
drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
··· 337 337 break; 338 338 } 339 339 340 - case AMDGPU_FAMILY_GC_11_0_2: { 340 + case AMDGPU_FAMILY_GC_11_0_1: { 341 341 struct clk_mgr_dcn314 *clk_mgr = kzalloc(sizeof(*clk_mgr), GFP_KERNEL); 342 342 343 343 if (clk_mgr == NULL) { ··· 397 397 dcn32_clk_mgr_destroy(clk_mgr); 398 398 break; 399 399 400 - case AMDGPU_FAMILY_GC_11_0_2: 400 + case AMDGPU_FAMILY_GC_11_0_1: 401 401 dcn314_clk_mgr_destroy(clk_mgr); 402 402 break; 403 403
+1 -2
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
··· 24 24 */ 25 25 26 26 #include "dccg.h" 27 - #include "clk_mgr_internal.h" 27 + #include "rn_clk_mgr.h" 28 28 29 29 #include "dcn20/dcn20_clk_mgr.h" 30 - #include "rn_clk_mgr.h" 31 30 #include "dml/dcn20/dcn20_fpu.h" 32 31 33 32 #include "dce100/dce_clk_mgr.h"
+1
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h
··· 28 28 29 29 #include "clk_mgr.h" 30 30 #include "dm_pp_smu.h" 31 + #include "clk_mgr_internal.h" 31 32 32 33 extern struct wm_table ddr4_wm_table_gs; 33 34 extern struct wm_table lpddr4_wm_table_gs;
+125 -77
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
··· 307 307 dcn314_smu_enable_pme_wa(clk_mgr); 308 308 } 309 309 310 - void dcn314_init_clocks(struct clk_mgr *clk_mgr) 311 - { 312 - memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks)); 313 - // Assumption is that boot state always supports pstate 314 - clk_mgr->clks.p_state_change_support = true; 315 - clk_mgr->clks.prev_p_state_change_support = true; 316 - clk_mgr->clks.pwr_state = DCN_PWR_STATE_UNKNOWN; 317 - clk_mgr->clks.zstate_support = DCN_ZSTATE_SUPPORT_UNKNOWN; 318 - } 319 - 320 310 bool dcn314_are_clock_states_equal(struct dc_clocks *a, 321 311 struct dc_clocks *b) 322 312 { ··· 415 425 } 416 426 }; 417 427 418 - static DpmClocks_t dummy_clocks; 428 + static DpmClocks314_t dummy_clocks; 419 429 420 430 static struct dcn314_watermarks dummy_wms = { 0 }; 421 431 ··· 500 510 static void dcn314_get_dpm_table_from_smu(struct clk_mgr_internal *clk_mgr, 501 511 struct dcn314_smu_dpm_clks *smu_dpm_clks) 502 512 { 503 - DpmClocks_t *table = smu_dpm_clks->dpm_clks; 513 + DpmClocks314_t *table = smu_dpm_clks->dpm_clks; 504 514 505 515 if (!clk_mgr->smu_ver) 506 516 return; ··· 517 527 dcn314_smu_transfer_dpm_table_smu_2_dram(clk_mgr); 518 528 } 519 529 530 + static inline bool is_valid_clock_value(uint32_t clock_value) 531 + { 532 + return clock_value > 1 && clock_value < 100000; 533 + } 534 + 535 + static unsigned int convert_wck_ratio(uint8_t wck_ratio) 536 + { 537 + switch (wck_ratio) { 538 + case WCK_RATIO_1_2: 539 + return 2; 540 + 541 + case WCK_RATIO_1_4: 542 + return 4; 543 + 544 + default: 545 + break; 546 + } 547 + return 1; 548 + } 549 + 520 550 static uint32_t find_max_clk_value(const uint32_t clocks[], uint32_t num_clocks) 521 551 { 522 552 uint32_t max = 0; ··· 550 540 return max; 551 541 } 552 542 553 - static unsigned int find_clk_for_voltage( 554 - const DpmClocks_t *clock_table, 555 - const uint32_t clocks[], 556 - unsigned int voltage) 557 - { 558 - int i; 559 - int max_voltage = 0; 560 - int clock = 0; 561 - 562 - for (i = 0; i < NUM_SOC_VOLTAGE_LEVELS; i++) { 563 - if (clock_table->SocVoltage[i] == voltage) { 564 - return clocks[i]; 565 - } else if (clock_table->SocVoltage[i] >= max_voltage && 566 - clock_table->SocVoltage[i] < voltage) { 567 - max_voltage = clock_table->SocVoltage[i]; 568 - clock = clocks[i]; 569 - } 570 - } 571 - 572 - ASSERT(clock); 573 - return clock; 574 - } 575 - 576 543 static void dcn314_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk_mgr, 577 544 struct integrated_info *bios_info, 578 - const DpmClocks_t *clock_table) 545 + const DpmClocks314_t *clock_table) 579 546 { 580 - int i, j; 581 547 struct clk_bw_params *bw_params = clk_mgr->base.bw_params; 582 - uint32_t max_dispclk = 0, max_dppclk = 0; 548 + struct clk_limit_table_entry def_max = bw_params->clk_table.entries[bw_params->clk_table.num_entries - 1]; 549 + uint32_t max_pstate = 0, max_fclk = 0, min_pstate = 0, max_dispclk = 0, max_dppclk = 0; 550 + int i; 583 551 584 - j = -1; 585 - 586 - ASSERT(NUM_DF_PSTATE_LEVELS <= MAX_NUM_DPM_LVL); 587 - 588 - /* Find lowest DPM, FCLK is filled in reverse order*/ 589 - 590 - for (i = NUM_DF_PSTATE_LEVELS - 1; i >= 0; i--) { 591 - if (clock_table->DfPstateTable[i].FClk != 0) { 592 - j = i; 593 - break; 552 + /* Find highest valid fclk pstate */ 553 + for (i = 0; i < clock_table->NumDfPstatesEnabled; i++) { 554 + if (is_valid_clock_value(clock_table->DfPstateTable[i].FClk) && 555 + clock_table->DfPstateTable[i].FClk > max_fclk) { 556 + max_fclk = clock_table->DfPstateTable[i].FClk; 557 + max_pstate = i; 594 558 } 595 559 } 596 560 597 - if (j == -1) { 598 - /* clock table is all 0s, just use our own hardcode */ 599 - ASSERT(0); 600 - return; 601 - } 561 + /* We expect the table to contain at least one valid fclk entry. */ 562 + ASSERT(is_valid_clock_value(max_fclk)); 602 563 603 - bw_params->clk_table.num_entries = j + 1; 604 - 605 - /* dispclk and dppclk can be max at any voltage, same number of levels for both */ 564 + /* Dispclk and dppclk can be max at any voltage, same number of levels for both */ 606 565 if (clock_table->NumDispClkLevelsEnabled <= NUM_DISPCLK_DPM_LEVELS && 607 566 clock_table->NumDispClkLevelsEnabled <= NUM_DPPCLK_DPM_LEVELS) { 608 567 max_dispclk = find_max_clk_value(clock_table->DispClocks, clock_table->NumDispClkLevelsEnabled); 609 568 max_dppclk = find_max_clk_value(clock_table->DppClocks, clock_table->NumDispClkLevelsEnabled); 610 569 } else { 570 + /* Invalid number of entries in the table from PMFW. */ 611 571 ASSERT(0); 612 572 } 613 573 614 - for (i = 0; i < bw_params->clk_table.num_entries; i++, j--) { 615 - bw_params->clk_table.entries[i].fclk_mhz = clock_table->DfPstateTable[j].FClk; 616 - bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[j].MemClk; 617 - bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[j].Voltage; 618 - switch (clock_table->DfPstateTable[j].WckRatio) { 619 - case WCK_RATIO_1_2: 620 - bw_params->clk_table.entries[i].wck_ratio = 2; 621 - break; 622 - case WCK_RATIO_1_4: 623 - bw_params->clk_table.entries[i].wck_ratio = 4; 624 - break; 625 - default: 626 - bw_params->clk_table.entries[i].wck_ratio = 1; 574 + /* Base the clock table on dcfclk, need at least one entry regardless of pmfw table */ 575 + for (i = 0; i < clock_table->NumDcfClkLevelsEnabled; i++) { 576 + uint32_t min_fclk = clock_table->DfPstateTable[0].FClk; 577 + int j; 578 + 579 + for (j = 1; j < clock_table->NumDfPstatesEnabled; j++) { 580 + if (is_valid_clock_value(clock_table->DfPstateTable[j].FClk) && 581 + clock_table->DfPstateTable[j].FClk < min_fclk && 582 + clock_table->DfPstateTable[j].Voltage <= clock_table->SocVoltage[i]) { 583 + min_fclk = clock_table->DfPstateTable[j].FClk; 584 + min_pstate = j; 585 + } 627 586 } 628 - bw_params->clk_table.entries[i].dcfclk_mhz = find_clk_for_voltage(clock_table, clock_table->DcfClocks, clock_table->DfPstateTable[j].Voltage); 629 - bw_params->clk_table.entries[i].socclk_mhz = find_clk_for_voltage(clock_table, clock_table->SocClocks, clock_table->DfPstateTable[j].Voltage); 587 + 588 + /* First search defaults for the clocks we don't read using closest lower or equal default dcfclk */ 589 + for (j = bw_params->clk_table.num_entries - 1; j > 0; j--) 590 + if (bw_params->clk_table.entries[j].dcfclk_mhz <= clock_table->DcfClocks[i]) 591 + break; 592 + 593 + bw_params->clk_table.entries[i].phyclk_mhz = bw_params->clk_table.entries[j].phyclk_mhz; 594 + bw_params->clk_table.entries[i].phyclk_d18_mhz = bw_params->clk_table.entries[j].phyclk_d18_mhz; 595 + bw_params->clk_table.entries[i].dtbclk_mhz = bw_params->clk_table.entries[j].dtbclk_mhz; 596 + 597 + /* Now update clocks we do read */ 598 + bw_params->clk_table.entries[i].fclk_mhz = min_fclk; 599 + bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[min_pstate].MemClk; 600 + bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[min_pstate].Voltage; 601 + bw_params->clk_table.entries[i].dcfclk_mhz = clock_table->DcfClocks[i]; 602 + bw_params->clk_table.entries[i].socclk_mhz = clock_table->SocClocks[i]; 630 603 bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk; 631 604 bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk; 632 - } 605 + bw_params->clk_table.entries[i].wck_ratio = convert_wck_ratio( 606 + clock_table->DfPstateTable[min_pstate].WckRatio); 607 + }; 633 608 609 + /* Make sure to include at least one entry at highest pstate */ 610 + if (max_pstate != min_pstate || i == 0) { 611 + if (i > MAX_NUM_DPM_LVL - 1) 612 + i = MAX_NUM_DPM_LVL - 1; 613 + 614 + bw_params->clk_table.entries[i].fclk_mhz = max_fclk; 615 + bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[max_pstate].MemClk; 616 + bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[max_pstate].Voltage; 617 + bw_params->clk_table.entries[i].dcfclk_mhz = find_max_clk_value(clock_table->DcfClocks, NUM_DCFCLK_DPM_LEVELS); 618 + bw_params->clk_table.entries[i].socclk_mhz = find_max_clk_value(clock_table->SocClocks, NUM_SOCCLK_DPM_LEVELS); 619 + bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk; 620 + bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk; 621 + bw_params->clk_table.entries[i].wck_ratio = convert_wck_ratio( 622 + clock_table->DfPstateTable[max_pstate].WckRatio); 623 + i++; 624 + } 625 + bw_params->clk_table.num_entries = i--; 626 + 627 + /* Make sure all highest clocks are included*/ 628 + bw_params->clk_table.entries[i].socclk_mhz = find_max_clk_value(clock_table->SocClocks, NUM_SOCCLK_DPM_LEVELS); 629 + bw_params->clk_table.entries[i].dispclk_mhz = find_max_clk_value(clock_table->DispClocks, NUM_DISPCLK_DPM_LEVELS); 630 + bw_params->clk_table.entries[i].dppclk_mhz = find_max_clk_value(clock_table->DppClocks, NUM_DPPCLK_DPM_LEVELS); 631 + ASSERT(clock_table->DcfClocks[i] == find_max_clk_value(clock_table->DcfClocks, NUM_DCFCLK_DPM_LEVELS)); 632 + bw_params->clk_table.entries[i].phyclk_mhz = def_max.phyclk_mhz; 633 + bw_params->clk_table.entries[i].phyclk_d18_mhz = def_max.phyclk_d18_mhz; 634 + bw_params->clk_table.entries[i].dtbclk_mhz = def_max.dtbclk_mhz; 635 + 636 + /* 637 + * Set any 0 clocks to max default setting. Not an issue for 638 + * power since we aren't doing switching in such case anyway 639 + */ 640 + for (i = 0; i < bw_params->clk_table.num_entries; i++) { 641 + if (!bw_params->clk_table.entries[i].fclk_mhz) { 642 + bw_params->clk_table.entries[i].fclk_mhz = def_max.fclk_mhz; 643 + bw_params->clk_table.entries[i].memclk_mhz = def_max.memclk_mhz; 644 + bw_params->clk_table.entries[i].voltage = def_max.voltage; 645 + } 646 + if (!bw_params->clk_table.entries[i].dcfclk_mhz) 647 + bw_params->clk_table.entries[i].dcfclk_mhz = def_max.dcfclk_mhz; 648 + if (!bw_params->clk_table.entries[i].socclk_mhz) 649 + bw_params->clk_table.entries[i].socclk_mhz = def_max.socclk_mhz; 650 + if (!bw_params->clk_table.entries[i].dispclk_mhz) 651 + bw_params->clk_table.entries[i].dispclk_mhz = def_max.dispclk_mhz; 652 + if (!bw_params->clk_table.entries[i].dppclk_mhz) 653 + bw_params->clk_table.entries[i].dppclk_mhz = def_max.dppclk_mhz; 654 + if (!bw_params->clk_table.entries[i].phyclk_mhz) 655 + bw_params->clk_table.entries[i].phyclk_mhz = def_max.phyclk_mhz; 656 + if (!bw_params->clk_table.entries[i].phyclk_d18_mhz) 657 + bw_params->clk_table.entries[i].phyclk_d18_mhz = def_max.phyclk_d18_mhz; 658 + if (!bw_params->clk_table.entries[i].dtbclk_mhz) 659 + bw_params->clk_table.entries[i].dtbclk_mhz = def_max.dtbclk_mhz; 660 + } 661 + ASSERT(bw_params->clk_table.entries[i-1].dcfclk_mhz); 634 662 bw_params->vram_type = bios_info->memory_type; 635 - bw_params->num_channels = bios_info->ma_channel_number; 663 + bw_params->num_channels = bios_info->ma_channel_number ? bios_info->ma_channel_number : 4; 636 664 637 665 for (i = 0; i < WM_SET_COUNT; i++) { 638 666 bw_params->wm_table.entries[i].wm_inst = i; ··· 689 641 .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz, 690 642 .get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz, 691 643 .update_clocks = dcn314_update_clocks, 692 - .init_clocks = dcn314_init_clocks, 644 + .init_clocks = dcn31_init_clocks, 693 645 .enable_pme_wa = dcn314_enable_pme_wa, 694 646 .are_clock_states_equal = dcn314_are_clock_states_equal, 695 647 .notify_wm_ranges = dcn314_notify_wm_ranges ··· 729 681 } 730 682 ASSERT(clk_mgr->smu_wm_set.wm_set); 731 683 732 - smu_dpm_clks.dpm_clks = (DpmClocks_t *)dm_helpers_allocate_gpu_mem( 684 + smu_dpm_clks.dpm_clks = (DpmClocks314_t *)dm_helpers_allocate_gpu_mem( 733 685 clk_mgr->base.base.ctx, 734 686 DC_MEM_ALLOC_TYPE_FRAME_BUFFER, 735 - sizeof(DpmClocks_t), 687 + sizeof(DpmClocks314_t), 736 688 &smu_dpm_clks.mc_address.quad_part); 737 689 738 690 if (smu_dpm_clks.dpm_clks == NULL) { ··· 777 729 if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) { 778 730 dcn314_get_dpm_table_from_smu(&clk_mgr->base, &smu_dpm_clks); 779 731 780 - if (ctx->dc_bios && ctx->dc_bios->integrated_info) { 732 + if (ctx->dc_bios && ctx->dc_bios->integrated_info && ctx->dc->config.use_default_clock_table == false) { 781 733 dcn314_clk_mgr_helper_populate_bw_params( 782 734 &clk_mgr->base, 783 735 ctx->dc_bios->integrated_info,
+1 -1
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h
··· 42 42 43 43 bool dcn314_are_clock_states_equal(struct dc_clocks *a, 44 44 struct dc_clocks *b); 45 - void dcn314_init_clocks(struct clk_mgr *clk_mgr); 45 + 46 46 void dcn314_update_clocks(struct clk_mgr *clk_mgr_base, 47 47 struct dc_state *context, 48 48 bool safe_to_lower);
+32 -1
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h
··· 36 36 WCK_RATIO_MAX 37 37 } WCK_RATIO_e; 38 38 39 + typedef struct { 40 + uint32_t FClk; 41 + uint32_t MemClk; 42 + uint32_t Voltage; 43 + uint8_t WckRatio; 44 + uint8_t Spare[3]; 45 + } DfPstateTable314_t; 46 + 47 + //Freq in MHz 48 + //Voltage in milli volts with 2 fractional bits 49 + typedef struct { 50 + uint32_t DcfClocks[NUM_DCFCLK_DPM_LEVELS]; 51 + uint32_t DispClocks[NUM_DISPCLK_DPM_LEVELS]; 52 + uint32_t DppClocks[NUM_DPPCLK_DPM_LEVELS]; 53 + uint32_t SocClocks[NUM_SOCCLK_DPM_LEVELS]; 54 + uint32_t VClocks[NUM_VCN_DPM_LEVELS]; 55 + uint32_t DClocks[NUM_VCN_DPM_LEVELS]; 56 + uint32_t SocVoltage[NUM_SOC_VOLTAGE_LEVELS]; 57 + DfPstateTable314_t DfPstateTable[NUM_DF_PSTATE_LEVELS]; 58 + 59 + uint8_t NumDcfClkLevelsEnabled; 60 + uint8_t NumDispClkLevelsEnabled; //Applies to both Dispclk and Dppclk 61 + uint8_t NumSocClkLevelsEnabled; 62 + uint8_t VcnClkLevelsEnabled; //Applies to both Vclk and Dclk 63 + uint8_t NumDfPstatesEnabled; 64 + uint8_t spare[3]; 65 + 66 + uint32_t MinGfxClk; 67 + uint32_t MaxGfxClk; 68 + } DpmClocks314_t; 69 + 39 70 struct dcn314_watermarks { 40 71 // Watermarks 41 72 WatermarkRowGeneric_t WatermarkRow[WM_COUNT][NUM_WM_RANGES]; ··· 74 43 }; 75 44 76 45 struct dcn314_smu_dpm_clks { 77 - DpmClocks_t *dpm_clks; 46 + DpmClocks314_t *dpm_clks; 78 47 union large_integer mc_address; 79 48 }; 80 49
+23 -18
drivers/gpu/drm/amd/display/dc/core/dc.c
··· 1074 1074 struct dc_stream_state *old_stream = 1075 1075 dc->current_state->res_ctx.pipe_ctx[i].stream; 1076 1076 bool should_disable = true; 1077 - bool pipe_split_change = 1078 - context->res_ctx.pipe_ctx[i].top_pipe != dc->current_state->res_ctx.pipe_ctx[i].top_pipe; 1077 + bool pipe_split_change = false; 1078 + 1079 + if ((context->res_ctx.pipe_ctx[i].top_pipe) && 1080 + (dc->current_state->res_ctx.pipe_ctx[i].top_pipe)) 1081 + pipe_split_change = context->res_ctx.pipe_ctx[i].top_pipe->pipe_idx != 1082 + dc->current_state->res_ctx.pipe_ctx[i].top_pipe->pipe_idx; 1083 + else 1084 + pipe_split_change = context->res_ctx.pipe_ctx[i].top_pipe != 1085 + dc->current_state->res_ctx.pipe_ctx[i].top_pipe; 1079 1086 1080 1087 for (j = 0; j < context->stream_count; j++) { 1081 1088 if (old_stream == context->streams[j]) { ··· 3236 3229 odm_pipe->ttu_regs.min_ttu_vblank = MAX_TTU; 3237 3230 } 3238 3231 3239 - if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed) { 3232 + if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed) 3240 3233 if (top_pipe_to_program && 3241 3234 top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) { 3242 3235 if (should_use_dmub_lock(stream->link)) { ··· 3254 3247 top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable( 3255 3248 top_pipe_to_program->stream_res.tg); 3256 3249 } 3257 - } 3258 3250 3259 3251 if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) { 3260 3252 if (dc->hwss.subvp_pipe_control_lock) ··· 3472 3466 dc->hwss.pipe_control_lock(dc, top_pipe_to_program, false); 3473 3467 } 3474 3468 3475 - if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed) { 3469 + if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed) 3476 3470 if (top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) { 3477 3471 top_pipe_to_program->stream_res.tg->funcs->wait_for_state( 3478 3472 top_pipe_to_program->stream_res.tg, ··· 3499 3493 top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_disable( 3500 3494 top_pipe_to_program->stream_res.tg); 3501 3495 } 3502 - } 3503 3496 3504 - if (update_type != UPDATE_TYPE_FAST) { 3497 + if (update_type != UPDATE_TYPE_FAST) 3505 3498 dc->hwss.post_unlock_program_front_end(dc, context); 3506 3499 3507 - /* Since phantom pipe programming is moved to post_unlock_program_front_end, 3508 - * move the SubVP lock to after the phantom pipes have been setup 3509 - */ 3510 - if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) { 3511 - if (dc->hwss.subvp_pipe_control_lock) 3512 - dc->hwss.subvp_pipe_control_lock(dc, context, false, should_lock_all_pipes, NULL, subvp_prev_use); 3513 - } else { 3514 - if (dc->hwss.subvp_pipe_control_lock) 3515 - dc->hwss.subvp_pipe_control_lock(dc, context, false, should_lock_all_pipes, top_pipe_to_program, subvp_prev_use); 3516 - } 3500 + /* Since phantom pipe programming is moved to post_unlock_program_front_end, 3501 + * move the SubVP lock to after the phantom pipes have been setup 3502 + */ 3503 + if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) { 3504 + if (dc->hwss.subvp_pipe_control_lock) 3505 + dc->hwss.subvp_pipe_control_lock(dc, context, false, should_lock_all_pipes, NULL, subvp_prev_use); 3506 + } else { 3507 + if (dc->hwss.subvp_pipe_control_lock) 3508 + dc->hwss.subvp_pipe_control_lock(dc, context, false, should_lock_all_pipes, top_pipe_to_program, subvp_prev_use); 3517 3509 } 3518 3510 3519 3511 // Fire manual trigger only when bottom plane is flipped ··· 4296 4292 !dc->debug.dpia_debug.bits.disable_dpia) 4297 4293 return true; 4298 4294 4299 - if (dc->ctx->asic_id.chip_family == AMDGPU_FAMILY_GC_11_0_2 && 4295 + if (dc->ctx->asic_id.chip_family == AMDGPU_FAMILY_GC_11_0_1 && 4300 4296 !dc->debug.dpia_debug.bits.disable_dpia) 4301 4297 return true; 4302 4298 ··· 4344 4340 struct dc_context *dc_ctx = dc->ctx; 4345 4341 4346 4342 dmub_enable_outbox_notification(dc_ctx->dmub_srv); 4343 + DC_LOG_DC("%s: dmub outbox notifications enabled\n", __func__); 4347 4344 } 4348 4345 4349 4346 /**
+1 -1
drivers/gpu/drm/amd/display/dc/core/dc_link.c
··· 3372 3372 switch(link->ctx->asic_id.chip_family) { 3373 3373 case FAMILY_YELLOW_CARP: 3374 3374 case AMDGPU_FAMILY_GC_10_3_6: 3375 - case AMDGPU_FAMILY_GC_11_0_2: 3375 + case AMDGPU_FAMILY_GC_11_0_1: 3376 3376 if(!dc->debug.disable_z10) 3377 3377 psr_context->psr_level.bits.SKIP_CRTC_DISABLE = false; 3378 3378 break;
+1 -1
drivers/gpu/drm/amd/display/dc/core/dc_resource.c
··· 169 169 if (ASICREV_IS_GC_11_0_2(asic_id.hw_internal_rev)) 170 170 dc_version = DCN_VERSION_3_21; 171 171 break; 172 - case AMDGPU_FAMILY_GC_11_0_2: 172 + case AMDGPU_FAMILY_GC_11_0_1: 173 173 dc_version = DCN_VERSION_3_14; 174 174 break; 175 175 default:
+5 -1
drivers/gpu/drm/amd/display/dc/dc.h
··· 47 47 struct set_config_cmd_payload; 48 48 struct dmub_notification; 49 49 50 - #define DC_VER "3.2.196" 50 + #define DC_VER "3.2.198" 51 51 52 52 #define MAX_SURFACES 3 53 53 #define MAX_PLANES 6 ··· 213 213 uint32_t cache_num_ways; 214 214 uint16_t subvp_fw_processing_delay_us; 215 215 uint16_t subvp_prefetch_end_to_mall_start_us; 216 + uint8_t subvp_swath_height_margin_lines; // subvp start line must be aligned to 2 x swath height 216 217 uint16_t subvp_pstate_allow_width_us; 217 218 uint16_t subvp_vertical_int_margin_us; 218 219 bool seamless_odm; ··· 353 352 bool use_pipe_ctx_sync_logic; 354 353 bool ignore_dpref_ss; 355 354 bool enable_mipi_converter_optimization; 355 + bool use_default_clock_table; 356 356 }; 357 357 358 358 enum visual_confirm { ··· 611 609 int percent_of_ideal_drambw; 612 610 int dram_clock_change_latency_ns; 613 611 int dummy_clock_change_latency_ns; 612 + int fclk_clock_change_latency_ns; 614 613 /* This forces a hard min on the DCFCLK we use 615 614 * for DML. Unlike the debug option for forcing 616 615 * DCFCLK, this override affects watermark calculations ··· 754 751 uint32_t mst_start_top_delay; 755 752 uint8_t psr_power_use_phy_fsm; 756 753 enum dml_hostvm_override_opts dml_hostvm_override; 754 + bool dml_disallow_alternate_prefetch_modes; 757 755 bool use_legacy_soc_bb_mechanism; 758 756 bool exit_idle_opt_for_cursor_updates; 759 757 bool enable_single_display_2to1_odm_policy;
+13 -2
drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
··· 29 29 #include "dm_helpers.h" 30 30 #include "dc_hw_types.h" 31 31 #include "core_types.h" 32 + #include "../basics/conversion.h" 32 33 33 34 #define CTX dc_dmub_srv->ctx 34 35 #define DC_LOGGER CTX->logger ··· 276 275 union dmub_rb_cmd cmd = { 0 }; 277 276 278 277 cmd.drr_update.header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH; 279 - // TODO: Uncomment once FW headers are promoted 280 - //cmd.drr_update.header.sub_type = DMUB_CMD__FAMS_SET_MANUAL_TRIGGER; 278 + cmd.drr_update.header.sub_type = DMUB_CMD__FAMS_SET_MANUAL_TRIGGER; 281 279 cmd.drr_update.dmub_optc_state_req.tg_inst = tg_inst; 282 280 283 281 cmd.drr_update.header.payload_bytes = sizeof(cmd.drr_update) - sizeof(cmd.drr_update.header); ··· 601 601 &cmd->fw_assisted_mclk_switch_v2.config_data.pipe_data[cmd_pipe_index]; 602 602 struct dc_crtc_timing *main_timing = &subvp_pipe->stream->timing; 603 603 struct dc_crtc_timing *phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing; 604 + uint32_t out_num, out_den; 604 605 605 606 pipe_data->mode = SUBVP; 606 607 pipe_data->pipe_config.subvp_data.pix_clk_100hz = subvp_pipe->stream->timing.pix_clk_100hz; ··· 613 612 main_timing->v_total - main_timing->v_front_porch - main_timing->v_addressable; 614 613 pipe_data->pipe_config.subvp_data.mall_region_lines = phantom_timing->v_addressable; 615 614 pipe_data->pipe_config.subvp_data.main_pipe_index = subvp_pipe->pipe_idx; 615 + pipe_data->pipe_config.subvp_data.is_drr = subvp_pipe->stream->ignore_msa_timing_param; 616 + 617 + /* Calculate the scaling factor from the src and dst height. 618 + * e.g. If 3840x2160 being downscaled to 1920x1080, the scaling factor is 1/2. 619 + * Reduce the fraction 1080/2160 = 1/2 for the "scaling factor" 620 + */ 621 + reduce_fraction(subvp_pipe->stream->src.height, subvp_pipe->stream->dst.height, &out_num, &out_den); 622 + // TODO: Uncomment below lines once DMCUB include headers are promoted 623 + //pipe_data->pipe_config.subvp_data.scale_factor_numerator = out_num; 624 + //pipe_data->pipe_config.subvp_data.scale_factor_denominator = out_den; 616 625 617 626 // Prefetch lines is equal to VACTIVE + BP + VSYNC 618 627 pipe_data->pipe_config.subvp_data.prefetch_lines =
+1
drivers/gpu/drm/amd/display/dc/dc_link.h
··· 344 344 DETECT_REASON_HPDRX, 345 345 DETECT_REASON_FALLBACK, 346 346 DETECT_REASON_RETRAIN, 347 + DETECT_REASON_TDR, 347 348 }; 348 349 349 350 bool dc_link_detect(struct dc_link *dc_link, enum dc_detect_reason reason);
+2
drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
··· 543 543 switch (pix_clk_params->color_depth) { 544 544 case COLOR_DEPTH_101010: 545 545 actual_pixel_clock_100hz = (actual_pixel_clock_100hz * 5) >> 2; 546 + actual_pixel_clock_100hz -= actual_pixel_clock_100hz % 10; 546 547 break; 547 548 case COLOR_DEPTH_121212: 548 549 actual_pixel_clock_100hz = (actual_pixel_clock_100hz * 6) >> 2; 550 + actual_pixel_clock_100hz -= actual_pixel_clock_100hz % 10; 549 551 break; 550 552 case COLOR_DEPTH_161616: 551 553 actual_pixel_clock_100hz = actual_pixel_clock_100hz * 2;
-2
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
··· 361 361 select = INPUT_CSC_SELECT_ICSC; 362 362 break; 363 363 case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: 364 - pixel_format = 22; 365 - break; 366 364 case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: 367 365 pixel_format = 26; /* ARGB16161616_UNORM */ 368 366 break;
-3
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
··· 278 278 SURFACE_PIXEL_FORMAT, 10); 279 279 break; 280 280 case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: 281 - REG_UPDATE(DCSURF_SURFACE_CONFIG, 282 - SURFACE_PIXEL_FORMAT, 22); 283 - break; 284 281 case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: /*we use crossbar already*/ 285 282 REG_UPDATE(DCSURF_SURFACE_CONFIG, 286 283 SURFACE_PIXEL_FORMAT, 26); /* ARGB16161616_UNORM */
+1
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
··· 110 110 */ 111 111 if (pipe_ctx->top_pipe || 112 112 !pipe_ctx->stream || 113 + !pipe_ctx->plane_state || 113 114 !tg->funcs->is_tg_enabled(tg)) 114 115 continue; 115 116
+6
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c
··· 131 131 while (tmp_mpcc != NULL) { 132 132 if (tmp_mpcc->dpp_id == dpp_id) 133 133 return tmp_mpcc; 134 + 135 + /* avoid circular linked list */ 136 + ASSERT(tmp_mpcc != tmp_mpcc->mpcc_bot); 137 + if (tmp_mpcc == tmp_mpcc->mpcc_bot) 138 + break; 139 + 134 140 tmp_mpcc = tmp_mpcc->mpcc_bot; 135 141 } 136 142 return NULL;
+5
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
··· 465 465 OTG_CLOCK_ON, 1, 466 466 1, 1000); 467 467 } else { 468 + 469 + //last chance to clear underflow, otherwise, it will always there due to clock is off. 470 + if (optc->funcs->is_optc_underflow_occurred(optc) == true) 471 + optc->funcs->clear_optc_underflow(optc); 472 + 468 473 REG_UPDATE_2(OTG_CLOCK_CONTROL, 469 474 OTG_CLOCK_GATE_DIS, 0, 470 475 OTG_CLOCK_EN, 0);
-2
drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c
··· 166 166 select = DCN2_ICSC_SELECT_ICSC_A; 167 167 break; 168 168 case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: 169 - pixel_format = 22; 170 - break; 171 169 case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: 172 170 pixel_format = 26; /* ARGB16161616_UNORM */ 173 171 break;
-3
drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c
··· 463 463 SURFACE_PIXEL_FORMAT, 10); 464 464 break; 465 465 case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: 466 - REG_UPDATE(DCSURF_SURFACE_CONFIG, 467 - SURFACE_PIXEL_FORMAT, 22); 468 - break; 469 466 case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: /*we use crossbar already*/ 470 467 REG_UPDATE(DCSURF_SURFACE_CONFIG, 471 468 SURFACE_PIXEL_FORMAT, 26); /* ARGB16161616_UNORM */
+6
drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c
··· 531 531 while (tmp_mpcc != NULL) { 532 532 if (tmp_mpcc->dpp_id == 0xf || tmp_mpcc->dpp_id == dpp_id) 533 533 return tmp_mpcc; 534 + 535 + /* avoid circular linked list */ 536 + ASSERT(tmp_mpcc != tmp_mpcc->mpcc_bot); 537 + if (tmp_mpcc == tmp_mpcc->mpcc_bot) 538 + break; 539 + 534 540 tmp_mpcc = tmp_mpcc->mpcc_bot; 535 541 } 536 542 return NULL;
+7 -1
drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c
··· 67 67 void dcn21_dchvm_init(struct hubbub *hubbub) 68 68 { 69 69 struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub); 70 - uint32_t riommu_active; 70 + uint32_t riommu_active, prefetch_done; 71 71 int i; 72 72 73 + REG_GET(DCHVM_RIOMMU_STAT0, HOSTVM_PREFETCH_DONE, &prefetch_done); 74 + 75 + if (prefetch_done) { 76 + hubbub->riommu_active = true; 77 + return; 78 + } 73 79 //Init DCHVM block 74 80 REG_UPDATE(DCHVM_CTRL0, HOSTVM_INIT_REQ, 1); 75 81
-2
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c
··· 244 244 select = INPUT_CSC_SELECT_ICSC; 245 245 break; 246 246 case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: 247 - pixel_format = 22; 248 - break; 249 247 case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: 250 248 pixel_format = 26; /* ARGB16161616_UNORM */ 251 249 break;
+1 -1
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c
··· 86 86 VMID, address->vmid); 87 87 88 88 if (address->type == PLN_ADDR_TYPE_GRPH_STEREO) { 89 - REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0x1); 89 + REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0); 90 90 REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_IN_STEREOSYNC, 0x1); 91 91 92 92 } else {
+1 -1
drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
··· 372 372 int afmt_inst; 373 373 374 374 /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */ 375 - if (eng_id <= ENGINE_ID_DIGE) { 375 + if (eng_id <= ENGINE_ID_DIGB) { 376 376 vpg_inst = eng_id; 377 377 afmt_inst = eng_id; 378 378 } else
+2 -1
drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.h
··· 162 162 SE_SF(DP_SYM32_ENC0_DP_SYM32_ENC_SDP_AUDIO_CONTROL0, AIP_ENABLE, mask_sh),\ 163 163 SE_SF(DP_SYM32_ENC0_DP_SYM32_ENC_SDP_AUDIO_CONTROL0, ACM_ENABLE, mask_sh),\ 164 164 SE_SF(DP_SYM32_ENC0_DP_SYM32_ENC_VID_CRC_CONTROL, CRC_ENABLE, mask_sh),\ 165 - SE_SF(DP_SYM32_ENC0_DP_SYM32_ENC_VID_CRC_CONTROL, CRC_CONT_MODE_ENABLE, mask_sh) 165 + SE_SF(DP_SYM32_ENC0_DP_SYM32_ENC_VID_CRC_CONTROL, CRC_CONT_MODE_ENABLE, mask_sh),\ 166 + SE_SF(DP_SYM32_ENC0_DP_SYM32_ENC_HBLANK_CONTROL, HBLANK_MINIMUM_SYMBOL_WIDTH, mask_sh) 166 167 167 168 168 169 #define DCN3_1_HPO_DP_STREAM_ENC_REG_FIELD_LIST(type) \
+1 -1
drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
··· 2153 2153 pool->base.usb4_dpia_count = 4; 2154 2154 } 2155 2155 2156 - if (dc->ctx->asic_id.chip_family == AMDGPU_FAMILY_GC_11_0_2) 2156 + if (dc->ctx->asic_id.chip_family == AMDGPU_FAMILY_GC_11_0_1) 2157 2157 pool->base.usb4_dpia_count = 4; 2158 2158 2159 2159 /* Audio, Stream Encoders including HPO and virtual, MPC 3D LUTs */
-1
drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h
··· 32 32 container_of(pool, struct dcn31_resource_pool, base) 33 33 34 34 extern struct _vcs_dpi_ip_params_st dcn3_1_ip; 35 - extern struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc; 36 35 37 36 struct dcn31_resource_pool { 38 37 struct resource_pool base;
-25
drivers/gpu/drm/amd/display/dc/dcn314/Makefile
··· 13 13 DCN314 = dcn314_resource.o dcn314_hwseq.o dcn314_init.o \ 14 14 dcn314_dio_stream_encoder.o dcn314_dccg.o dcn314_optc.o 15 15 16 - ifdef CONFIG_X86 17 - CFLAGS_$(AMDDALPATH)/dc/dcn314/dcn314_resource.o := -mhard-float -msse 18 - endif 19 - 20 - ifdef CONFIG_PPC64 21 - CFLAGS_$(AMDDALPATH)/dc/dcn314/dcn314_resource.o := -mhard-float -maltivec 22 - endif 23 - 24 - ifdef CONFIG_CC_IS_GCC 25 - ifeq ($(call cc-ifversion, -lt, 0701, y), y) 26 - IS_OLD_GCC = 1 27 - endif 28 - endif 29 - 30 - ifdef CONFIG_X86 31 - ifdef IS_OLD_GCC 32 - # Stack alignment mismatch, proceed with caution. 33 - # GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 34 - # (8B stack alignment). 35 - CFLAGS_$(AMDDALPATH)/dc/dcn314/dcn314_resource.o += -mpreferred-stack-boundary=4 36 - else 37 - CFLAGS_$(AMDDALPATH)/dc/dcn314/dcn314_resource.o += -msse2 38 - endif 39 - endif 40 - 41 16 AMD_DAL_DCN314 = $(addprefix $(AMDDALPATH)/dc/dcn314/,$(DCN314)) 42 17 43 18 AMD_DISPLAY_FILES += $(AMD_DAL_DCN314)
+35 -7
drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c
··· 343 343 { 344 344 struct dc_stream_state *stream = pipe_ctx->stream; 345 345 unsigned int odm_combine_factor = 0; 346 + struct dc *dc = pipe_ctx->stream->ctx->dc; 347 + bool two_pix_per_container = false; 346 348 349 + two_pix_per_container = optc2_is_two_pixels_per_containter(&stream->timing); 347 350 odm_combine_factor = get_odm_config(pipe_ctx, NULL); 348 351 349 352 if (is_dp_128b_132b_signal(pipe_ctx)) { ··· 358 355 else 359 356 *k2_div = PIXEL_RATE_DIV_BY_4; 360 357 } else if (dc_is_dp_signal(pipe_ctx->stream->signal)) { 361 - if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) { 358 + if (two_pix_per_container) { 362 359 *k1_div = PIXEL_RATE_DIV_BY_1; 363 360 *k2_div = PIXEL_RATE_DIV_BY_2; 364 - } else if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) { 365 - *k1_div = PIXEL_RATE_DIV_BY_2; 366 - *k2_div = PIXEL_RATE_DIV_BY_2; 367 361 } else { 368 - if (odm_combine_factor == 1) 369 - *k2_div = PIXEL_RATE_DIV_BY_4; 370 - else if (odm_combine_factor == 2) 362 + *k1_div = PIXEL_RATE_DIV_BY_1; 363 + *k2_div = PIXEL_RATE_DIV_BY_4; 364 + if ((odm_combine_factor == 2) || dc->debug.enable_dp_dig_pixel_rate_div_policy) 371 365 *k2_div = PIXEL_RATE_DIV_BY_2; 372 366 } 373 367 } ··· 373 373 ASSERT(false); 374 374 375 375 return odm_combine_factor; 376 + } 377 + 378 + void dcn314_set_pixels_per_cycle(struct pipe_ctx *pipe_ctx) 379 + { 380 + uint32_t pix_per_cycle = 1; 381 + uint32_t odm_combine_factor = 1; 382 + 383 + if (!pipe_ctx || !pipe_ctx->stream || !pipe_ctx->stream_res.stream_enc) 384 + return; 385 + 386 + odm_combine_factor = get_odm_config(pipe_ctx, NULL); 387 + if (optc2_is_two_pixels_per_containter(&pipe_ctx->stream->timing) || odm_combine_factor > 1 388 + || dcn314_is_dp_dig_pixel_rate_div_policy(pipe_ctx)) 389 + pix_per_cycle = 2; 390 + 391 + if (pipe_ctx->stream_res.stream_enc->funcs->set_input_mode) 392 + pipe_ctx->stream_res.stream_enc->funcs->set_input_mode(pipe_ctx->stream_res.stream_enc, 393 + pix_per_cycle); 394 + } 395 + 396 + bool dcn314_is_dp_dig_pixel_rate_div_policy(struct pipe_ctx *pipe_ctx) 397 + { 398 + struct dc *dc = pipe_ctx->stream->ctx->dc; 399 + 400 + if (dc_is_dp_signal(pipe_ctx->stream->signal) && !is_dp_128b_132b_signal(pipe_ctx) && 401 + dc->debug.enable_dp_dig_pixel_rate_div_policy) 402 + return true; 403 + return false; 376 404 }
+4
drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h
··· 39 39 40 40 unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsigned int *k1_div, unsigned int *k2_div); 41 41 42 + void dcn314_set_pixels_per_cycle(struct pipe_ctx *pipe_ctx); 43 + 44 + bool dcn314_is_dp_dig_pixel_rate_div_policy(struct pipe_ctx *pipe_ctx); 45 + 42 46 #endif /* __DC_HWSS_DCN314_H__ */
+2
drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c
··· 145 145 .set_shaper_3dlut = dcn20_set_shaper_3dlut, 146 146 .setup_hpo_hw_control = dcn31_setup_hpo_hw_control, 147 147 .calculate_dccg_k1_k2_values = dcn314_calculate_dccg_k1_k2_values, 148 + .set_pixels_per_cycle = dcn314_set_pixels_per_cycle, 149 + .is_dp_dig_pixel_rate_div_policy = dcn314_is_dp_dig_pixel_rate_div_policy, 148 150 }; 149 151 150 152 void dcn314_hw_sequencer_construct(struct dc *dc)
+11 -330
drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c
··· 70 70 #include "dce110/dce110_resource.h" 71 71 #include "dml/display_mode_vba.h" 72 72 #include "dml/dcn31/dcn31_fpu.h" 73 + #include "dml/dcn314/dcn314_fpu.h" 73 74 #include "dcn314/dcn314_dccg.h" 74 75 #include "dcn10/dcn10_resource.h" 75 76 #include "dcn31/dcn31_panel_cntl.h" ··· 132 131 133 132 134 133 #define DC_LOGGER_INIT(logger) 135 - 136 - #define DCN3_14_DEFAULT_DET_SIZE 384 137 - #define DCN3_14_MAX_DET_SIZE 384 138 - #define DCN3_14_MIN_COMPBUF_SIZE_KB 128 139 - #define DCN3_14_CRB_SEGMENT_SIZE_KB 64 140 - struct _vcs_dpi_ip_params_st dcn3_14_ip = { 141 - .VBlankNomDefaultUS = 668, 142 - .gpuvm_enable = 1, 143 - .gpuvm_max_page_table_levels = 1, 144 - .hostvm_enable = 1, 145 - .hostvm_max_page_table_levels = 2, 146 - .rob_buffer_size_kbytes = 64, 147 - .det_buffer_size_kbytes = DCN3_14_DEFAULT_DET_SIZE, 148 - .config_return_buffer_size_in_kbytes = 1792, 149 - .compressed_buffer_segment_size_in_kbytes = 64, 150 - .meta_fifo_size_in_kentries = 32, 151 - .zero_size_buffer_entries = 512, 152 - .compbuf_reserved_space_64b = 256, 153 - .compbuf_reserved_space_zs = 64, 154 - .dpp_output_buffer_pixels = 2560, 155 - .opp_output_buffer_lines = 1, 156 - .pixel_chunk_size_kbytes = 8, 157 - .meta_chunk_size_kbytes = 2, 158 - .min_meta_chunk_size_bytes = 256, 159 - .writeback_chunk_size_kbytes = 8, 160 - .ptoi_supported = false, 161 - .num_dsc = 4, 162 - .maximum_dsc_bits_per_component = 10, 163 - .dsc422_native_support = false, 164 - .is_line_buffer_bpp_fixed = true, 165 - .line_buffer_fixed_bpp = 48, 166 - .line_buffer_size_bits = 789504, 167 - .max_line_buffer_lines = 12, 168 - .writeback_interface_buffer_size_kbytes = 90, 169 - .max_num_dpp = 4, 170 - .max_num_otg = 4, 171 - .max_num_hdmi_frl_outputs = 1, 172 - .max_num_wb = 1, 173 - .max_dchub_pscl_bw_pix_per_clk = 4, 174 - .max_pscl_lb_bw_pix_per_clk = 2, 175 - .max_lb_vscl_bw_pix_per_clk = 4, 176 - .max_vscl_hscl_bw_pix_per_clk = 4, 177 - .max_hscl_ratio = 6, 178 - .max_vscl_ratio = 6, 179 - .max_hscl_taps = 8, 180 - .max_vscl_taps = 8, 181 - .dpte_buffer_size_in_pte_reqs_luma = 64, 182 - .dpte_buffer_size_in_pte_reqs_chroma = 34, 183 - .dispclk_ramp_margin_percent = 1, 184 - .max_inter_dcn_tile_repeaters = 8, 185 - .cursor_buffer_size = 16, 186 - .cursor_chunk_size = 2, 187 - .writeback_line_buffer_buffer_size = 0, 188 - .writeback_min_hscl_ratio = 1, 189 - .writeback_min_vscl_ratio = 1, 190 - .writeback_max_hscl_ratio = 1, 191 - .writeback_max_vscl_ratio = 1, 192 - .writeback_max_hscl_taps = 1, 193 - .writeback_max_vscl_taps = 1, 194 - .dppclk_delay_subtotal = 46, 195 - .dppclk_delay_scl = 50, 196 - .dppclk_delay_scl_lb_only = 16, 197 - .dppclk_delay_cnvc_formatter = 27, 198 - .dppclk_delay_cnvc_cursor = 6, 199 - .dispclk_delay_subtotal = 119, 200 - .dynamic_metadata_vm_enabled = false, 201 - .odm_combine_4to1_supported = false, 202 - .dcc_supported = true, 203 - }; 204 - 205 - struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc = { 206 - /*TODO: correct dispclk/dppclk voltage level determination*/ 207 - .clock_limits = { 208 - { 209 - .state = 0, 210 - .dispclk_mhz = 1200.0, 211 - .dppclk_mhz = 1200.0, 212 - .phyclk_mhz = 600.0, 213 - .phyclk_d18_mhz = 667.0, 214 - .dscclk_mhz = 186.0, 215 - .dtbclk_mhz = 625.0, 216 - }, 217 - { 218 - .state = 1, 219 - .dispclk_mhz = 1200.0, 220 - .dppclk_mhz = 1200.0, 221 - .phyclk_mhz = 810.0, 222 - .phyclk_d18_mhz = 667.0, 223 - .dscclk_mhz = 209.0, 224 - .dtbclk_mhz = 625.0, 225 - }, 226 - { 227 - .state = 2, 228 - .dispclk_mhz = 1200.0, 229 - .dppclk_mhz = 1200.0, 230 - .phyclk_mhz = 810.0, 231 - .phyclk_d18_mhz = 667.0, 232 - .dscclk_mhz = 209.0, 233 - .dtbclk_mhz = 625.0, 234 - }, 235 - { 236 - .state = 3, 237 - .dispclk_mhz = 1200.0, 238 - .dppclk_mhz = 1200.0, 239 - .phyclk_mhz = 810.0, 240 - .phyclk_d18_mhz = 667.0, 241 - .dscclk_mhz = 371.0, 242 - .dtbclk_mhz = 625.0, 243 - }, 244 - { 245 - .state = 4, 246 - .dispclk_mhz = 1200.0, 247 - .dppclk_mhz = 1200.0, 248 - .phyclk_mhz = 810.0, 249 - .phyclk_d18_mhz = 667.0, 250 - .dscclk_mhz = 417.0, 251 - .dtbclk_mhz = 625.0, 252 - }, 253 - }, 254 - .num_states = 5, 255 - .sr_exit_time_us = 9.0, 256 - .sr_enter_plus_exit_time_us = 11.0, 257 - .sr_exit_z8_time_us = 442.0, 258 - .sr_enter_plus_exit_z8_time_us = 560.0, 259 - .writeback_latency_us = 12.0, 260 - .dram_channel_width_bytes = 4, 261 - .round_trip_ping_latency_dcfclk_cycles = 106, 262 - .urgent_latency_pixel_data_only_us = 4.0, 263 - .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, 264 - .urgent_latency_vm_data_only_us = 4.0, 265 - .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, 266 - .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, 267 - .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, 268 - .pct_ideal_sdp_bw_after_urgent = 80.0, 269 - .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0, 270 - .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, 271 - .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, 272 - .max_avg_sdp_bw_use_normal_percent = 60.0, 273 - .max_avg_dram_bw_use_normal_percent = 60.0, 274 - .fabric_datapath_to_dcn_data_return_bytes = 32, 275 - .return_bus_width_bytes = 64, 276 - .downspread_percent = 0.38, 277 - .dcn_downspread_percent = 0.5, 278 - .gpuvm_min_page_size_bytes = 4096, 279 - .hostvm_min_page_size_bytes = 4096, 280 - .do_urgent_latency_adjustment = false, 281 - .urgent_latency_adjustment_fabric_clock_component_us = 0, 282 - .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, 283 - }; 284 134 285 135 enum dcn31_clk_src_array_id { 286 136 DCN31_CLK_SRC_PLL0, ··· 1254 1402 int afmt_inst; 1255 1403 1256 1404 /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */ 1257 - if (eng_id <= ENGINE_ID_DIGF) { 1405 + if (eng_id < ENGINE_ID_DIGF) { 1258 1406 vpg_inst = eng_id; 1259 1407 afmt_inst = eng_id; 1260 1408 } else ··· 1299 1447 * VPG[8] -> HPO_DP[2] 1300 1448 * VPG[9] -> HPO_DP[3] 1301 1449 */ 1302 - vpg_inst = hpo_dp_inst + 6; 1450 + //Uses offset index 5-8, but actually maps to vpg_inst 6-9 1451 + vpg_inst = hpo_dp_inst + 5; 1303 1452 1304 1453 /* Mapping of APG register blocks to HPO DP block instance: 1305 1454 * APG[0] -> HPO_DP[0] ··· 1646 1793 return NULL; 1647 1794 } 1648 1795 1649 - static bool is_dual_plane(enum surface_pixel_format format) 1650 - { 1651 - return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA; 1652 - } 1653 - 1654 1796 static int dcn314_populate_dml_pipes_from_context( 1655 1797 struct dc *dc, struct dc_state *context, 1656 1798 display_e2e_pipe_params_st *pipes, 1657 1799 bool fast_validate) 1658 1800 { 1659 - int i, pipe_cnt; 1660 - struct resource_context *res_ctx = &context->res_ctx; 1661 - struct pipe_ctx *pipe; 1662 - bool upscaled = false; 1801 + int pipe_cnt; 1663 1802 1664 - dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); 1665 - 1666 - for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { 1667 - struct dc_crtc_timing *timing; 1668 - 1669 - if (!res_ctx->pipe_ctx[i].stream) 1670 - continue; 1671 - pipe = &res_ctx->pipe_ctx[i]; 1672 - timing = &pipe->stream->timing; 1673 - 1674 - if (dc_extended_blank_supported(dc) && pipe->stream->adjust.v_total_max == pipe->stream->adjust.v_total_min 1675 - && pipe->stream->adjust.v_total_min > timing->v_total) 1676 - pipes[pipe_cnt].pipe.dest.vtotal = pipe->stream->adjust.v_total_min; 1677 - 1678 - if (pipe->plane_state && 1679 - (pipe->plane_state->src_rect.height < pipe->plane_state->dst_rect.height || 1680 - pipe->plane_state->src_rect.width < pipe->plane_state->dst_rect.width)) 1681 - upscaled = true; 1682 - 1683 - /* 1684 - * Immediate flip can be set dynamically after enabling the plane. 1685 - * We need to require support for immediate flip or underflow can be 1686 - * intermittently experienced depending on peak b/w requirements. 1687 - */ 1688 - pipes[pipe_cnt].pipe.src.immediate_flip = true; 1689 - 1690 - pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; 1691 - pipes[pipe_cnt].pipe.src.hostvm = dc->res_pool->hubbub->riommu_active; 1692 - pipes[pipe_cnt].pipe.src.gpuvm = true; 1693 - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; 1694 - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; 1695 - pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; 1696 - pipes[pipe_cnt].pipe.src.dcc_rate = 3; 1697 - pipes[pipe_cnt].dout.dsc_input_bpc = 0; 1698 - 1699 - if (pipes[pipe_cnt].dout.dsc_enable) { 1700 - switch (timing->display_color_depth) { 1701 - case COLOR_DEPTH_888: 1702 - pipes[pipe_cnt].dout.dsc_input_bpc = 8; 1703 - break; 1704 - case COLOR_DEPTH_101010: 1705 - pipes[pipe_cnt].dout.dsc_input_bpc = 10; 1706 - break; 1707 - case COLOR_DEPTH_121212: 1708 - pipes[pipe_cnt].dout.dsc_input_bpc = 12; 1709 - break; 1710 - default: 1711 - ASSERT(0); 1712 - break; 1713 - } 1714 - } 1715 - 1716 - pipe_cnt++; 1717 - } 1718 - context->bw_ctx.dml.ip.det_buffer_size_kbytes = DCN3_14_DEFAULT_DET_SIZE; 1719 - 1720 - dc->config.enable_4to1MPC = false; 1721 - if (pipe_cnt == 1 && pipe->plane_state && !dc->debug.disable_z9_mpc) { 1722 - if (is_dual_plane(pipe->plane_state->format) 1723 - && pipe->plane_state->src_rect.width <= 1920 && pipe->plane_state->src_rect.height <= 1080) { 1724 - dc->config.enable_4to1MPC = true; 1725 - } else if (!is_dual_plane(pipe->plane_state->format) && pipe->plane_state->src_rect.width <= 5120) { 1726 - /* Limit to 5k max to avoid forced pipe split when there is not enough detile for swath */ 1727 - context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192; 1728 - pipes[0].pipe.src.unbounded_req_mode = true; 1729 - } 1730 - } else if (context->stream_count >= dc->debug.crb_alloc_policy_min_disp_count 1731 - && dc->debug.crb_alloc_policy > DET_SIZE_DEFAULT) { 1732 - context->bw_ctx.dml.ip.det_buffer_size_kbytes = dc->debug.crb_alloc_policy * 64; 1733 - } else if (context->stream_count >= 3 && upscaled) { 1734 - context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192; 1735 - } 1736 - 1737 - for (i = 0; i < dc->res_pool->pipe_count; i++) { 1738 - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; 1739 - 1740 - if (!pipe->stream) 1741 - continue; 1742 - 1743 - if (pipe->stream->signal == SIGNAL_TYPE_EDP && dc->debug.seamless_boot_odm_combine && 1744 - pipe->stream->apply_seamless_boot_optimization) { 1745 - 1746 - if (pipe->stream->apply_boot_odm_mode == dm_odm_combine_policy_2to1) { 1747 - context->bw_ctx.dml.vba.ODMCombinePolicy = dm_odm_combine_policy_2to1; 1748 - break; 1749 - } 1750 - } 1751 - } 1803 + DC_FP_START(); 1804 + pipe_cnt = dcn314_populate_dml_pipes_from_context_fpu(dc, context, pipes, fast_validate); 1805 + DC_FP_END(); 1752 1806 1753 1807 return pipe_cnt; 1754 1808 } ··· 1666 1906 1667 1907 static void dcn314_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) 1668 1908 { 1669 - struct clk_limit_table *clk_table = &bw_params->clk_table; 1670 - struct _vcs_dpi_voltage_scaling_st *clock_tmp = dcn3_14_soc._clock_tmp; 1671 - unsigned int i, closest_clk_lvl; 1672 - int max_dispclk_mhz = 0, max_dppclk_mhz = 0; 1673 - int j; 1674 - 1675 - // Default clock levels are used for diags, which may lead to overclocking. 1676 - if (!IS_DIAG_DC(dc->ctx->dce_environment)) { 1677 - 1678 - dcn3_14_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator; 1679 - dcn3_14_ip.max_num_dpp = dc->res_pool->pipe_count; 1680 - 1681 - if (bw_params->num_channels > 0) 1682 - dcn3_14_soc.num_chans = bw_params->num_channels; 1683 - 1684 - ASSERT(dcn3_14_soc.num_chans); 1685 - ASSERT(clk_table->num_entries); 1686 - 1687 - /* Prepass to find max clocks independent of voltage level. */ 1688 - for (i = 0; i < clk_table->num_entries; ++i) { 1689 - if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz) 1690 - max_dispclk_mhz = clk_table->entries[i].dispclk_mhz; 1691 - if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz) 1692 - max_dppclk_mhz = clk_table->entries[i].dppclk_mhz; 1693 - } 1694 - 1695 - for (i = 0; i < clk_table->num_entries; i++) { 1696 - /* loop backwards*/ 1697 - for (closest_clk_lvl = 0, j = dcn3_14_soc.num_states - 1; j >= 0; j--) { 1698 - if ((unsigned int) dcn3_14_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) { 1699 - closest_clk_lvl = j; 1700 - break; 1701 - } 1702 - } 1703 - if (clk_table->num_entries == 1) { 1704 - /*smu gives one DPM level, let's take the highest one*/ 1705 - closest_clk_lvl = dcn3_14_soc.num_states - 1; 1706 - } 1707 - 1708 - clock_tmp[i].state = i; 1709 - 1710 - /* Clocks dependent on voltage level. */ 1711 - clock_tmp[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; 1712 - if (clk_table->num_entries == 1 && 1713 - clock_tmp[i].dcfclk_mhz < dcn3_14_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) { 1714 - /*SMU fix not released yet*/ 1715 - clock_tmp[i].dcfclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dcfclk_mhz; 1716 - } 1717 - clock_tmp[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; 1718 - clock_tmp[i].socclk_mhz = clk_table->entries[i].socclk_mhz; 1719 - 1720 - if (clk_table->entries[i].memclk_mhz && clk_table->entries[i].wck_ratio) 1721 - clock_tmp[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio; 1722 - 1723 - /* Clocks independent of voltage level. */ 1724 - clock_tmp[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz : 1725 - dcn3_14_soc.clock_limits[closest_clk_lvl].dispclk_mhz; 1726 - 1727 - clock_tmp[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz : 1728 - dcn3_14_soc.clock_limits[closest_clk_lvl].dppclk_mhz; 1729 - 1730 - clock_tmp[i].dram_bw_per_chan_gbps = dcn3_14_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; 1731 - clock_tmp[i].dscclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dscclk_mhz; 1732 - clock_tmp[i].dtbclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; 1733 - clock_tmp[i].phyclk_d18_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; 1734 - clock_tmp[i].phyclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].phyclk_mhz; 1735 - } 1736 - for (i = 0; i < clk_table->num_entries; i++) 1737 - dcn3_14_soc.clock_limits[i] = clock_tmp[i]; 1738 - if (clk_table->num_entries) 1739 - dcn3_14_soc.num_states = clk_table->num_entries; 1740 - } 1741 - 1742 - if (max_dispclk_mhz) { 1743 - dcn3_14_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; 1744 - dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; 1745 - } 1746 - 1747 - if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) 1748 - dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN31); 1749 - else 1750 - dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN31_FPGA); 1909 + DC_FP_START(); 1910 + dcn314_update_bw_bounding_box_fpu(dc, bw_params); 1911 + DC_FP_END(); 1751 1912 } 1752 1913 1753 1914 static struct resource_funcs dcn314_res_pool_funcs = {
+3
drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h
··· 29 29 30 30 #include "core_types.h" 31 31 32 + extern struct _vcs_dpi_ip_params_st dcn3_14_ip; 33 + extern struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc; 34 + 32 35 #define TO_DCN314_RES_POOL(pool)\ 33 36 container_of(pool, struct dcn314_resource_pool, base) 34 37
-1
drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h
··· 32 32 container_of(pool, struct dcn315_resource_pool, base) 33 33 34 34 extern struct _vcs_dpi_ip_params_st dcn3_15_ip; 35 - extern struct _vcs_dpi_ip_params_st dcn3_15_soc; 36 35 37 36 struct dcn315_resource_pool { 38 37 struct resource_pool base;
-1
drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h
··· 32 32 container_of(pool, struct dcn316_resource_pool, base) 33 33 34 34 extern struct _vcs_dpi_ip_params_st dcn3_16_ip; 35 - extern struct _vcs_dpi_ip_params_st dcn3_16_soc; 36 35 37 36 struct dcn316_resource_pool { 38 37 struct resource_pool base;
+36 -4
drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
··· 250 250 uint32_t total_lines = 0; 251 251 uint32_t lines_per_way = 0; 252 252 uint32_t num_ways = 0; 253 + uint32_t prev_addr_low = 0; 253 254 254 255 for (i = 0; i < ctx->stream_count; i++) { 255 256 stream = ctx->streams[i]; ··· 268 267 plane = ctx->stream_status[i].plane_states[j]; 269 268 270 269 // Calculate total surface size 271 - surface_size = plane->plane_size.surface_pitch * 270 + if (prev_addr_low != plane->address.grph.addr.u.low_part) { 271 + /* if plane address are different from prev FB, then userspace allocated separate FBs*/ 272 + surface_size += plane->plane_size.surface_pitch * 272 273 plane->plane_size.surface_size.height * 273 274 (plane->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4); 274 275 276 + prev_addr_low = plane->address.grph.addr.u.low_part; 277 + } else { 278 + /* We have the same fb for all the planes. 279 + * Xorg always creates one giant fb that holds all surfaces, 280 + * so allocating it once is sufficient. 281 + * */ 282 + continue; 283 + } 275 284 // Convert surface size + starting address to number of cache lines required 276 285 // (alignment accounted for) 277 286 cache_lines_used += dcn32_cache_lines_for_surface(dc, surface_size, ··· 331 320 bool dcn32_apply_idle_power_optimizations(struct dc *dc, bool enable) 332 321 { 333 322 union dmub_rb_cmd cmd; 334 - uint8_t ways; 323 + uint8_t ways, i; 324 + int j; 325 + bool stereo_in_use = false; 326 + struct dc_plane_state *plane = NULL; 335 327 336 328 if (!dc->ctx->dmub_srv) 337 329 return false; ··· 363 349 * and configure HUBP's to fetch from MALL 364 350 */ 365 351 ways = dcn32_calculate_cab_allocation(dc, dc->current_state); 366 - if (ways <= dc->caps.cache_num_ways) { 352 + 353 + /* MALL not supported with Stereo3D. If any plane is using stereo, 354 + * don't try to enter MALL. 355 + */ 356 + for (i = 0; i < dc->current_state->stream_count; i++) { 357 + for (j = 0; j < dc->current_state->stream_status[i].plane_count; j++) { 358 + plane = dc->current_state->stream_status[i].plane_states[j]; 359 + 360 + if (plane->address.type == PLN_ADDR_TYPE_GRPH_STEREO) { 361 + stereo_in_use = true; 362 + break; 363 + } 364 + } 365 + if (stereo_in_use) 366 + break; 367 + } 368 + if (ways <= dc->caps.cache_num_ways && !stereo_in_use) { 367 369 memset(&cmd, 0, sizeof(cmd)); 368 370 cmd.cab.header.type = DMUB_CMD__CAB_FOR_SS; 369 371 cmd.cab.header.sub_type = DMUB_CMD__CAB_DCN_SS_FIT_IN_CAB; ··· 713 683 if (pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { 714 684 hubp->funcs->hubp_update_mall_sel(hubp, 1, false); 715 685 } else { 686 + // MALL not supported with Stereo3D 716 687 hubp->funcs->hubp_update_mall_sel(hubp, 717 688 num_ways <= dc->caps.cache_num_ways && 718 - pipe->stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED ? 2 : 0, 689 + pipe->stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED && 690 + pipe->plane_state->address.type != PLN_ADDR_TYPE_GRPH_STEREO ? 2 : 0, 719 691 cache_cursor); 720 692 } 721 693 }
+1 -1
drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c
··· 281 281 .lock_doublebuffer_enable = optc3_lock_doublebuffer_enable, 282 282 .lock_doublebuffer_disable = optc3_lock_doublebuffer_disable, 283 283 .enable_optc_clock = optc1_enable_optc_clock, 284 - .set_drr = optc31_set_drr, // TODO: Update to optc32_set_drr once FW headers are promoted 284 + .set_drr = optc32_set_drr, 285 285 .get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal, 286 286 .set_vtotal_min_max = optc3_set_vtotal_min_max, 287 287 .set_static_screen_control = optc1_set_static_screen_control,
+2 -1
drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
··· 867 867 } 868 868 }, 869 869 .use_max_lb = true, 870 - .force_disable_subvp = true, 870 + .force_disable_subvp = false, 871 871 .exit_idle_opt_for_cursor_updates = true, 872 872 .enable_single_display_2to1_odm_policy = true, 873 873 .enable_dp_dig_pixel_rate_div_policy = 1, ··· 2051 2051 dc->caps.max_cab_allocation_bytes = 67108864; // 64MB = 1024 * 1024 * 64 2052 2052 dc->caps.subvp_fw_processing_delay_us = 15; 2053 2053 dc->caps.subvp_prefetch_end_to_mall_start_us = 15; 2054 + dc->caps.subvp_swath_height_margin_lines = 16; 2054 2055 dc->caps.subvp_pstate_allow_width_us = 20; 2055 2056 dc->caps.subvp_vertical_int_margin_us = 30; 2056 2057
+1 -1
drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
··· 63 63 if (pipe->stream && pipe->plane_state && !pipe->top_pipe && 64 64 pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { 65 65 bytes_per_pixel = pipe->plane_state->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4; 66 - mall_region_pixels = pipe->stream->timing.h_addressable * pipe->stream->timing.v_addressable; 66 + mall_region_pixels = pipe->plane_state->plane_size.surface_pitch * pipe->stream->timing.v_addressable; 67 67 68 68 // For bytes required in MALL, calculate based on number of MBlks required 69 69 num_mblks = (mall_region_pixels * bytes_per_pixel +
+3 -2
drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
··· 868 868 } 869 869 }, 870 870 .use_max_lb = true, 871 - .force_disable_subvp = true, 871 + .force_disable_subvp = false, 872 872 .exit_idle_opt_for_cursor_updates = true, 873 873 .enable_single_display_2to1_odm_policy = true, 874 874 .enable_dp_dig_pixel_rate_div_policy = 1, ··· 1662 1662 dc->caps.max_cab_allocation_bytes = 33554432; // 32MB = 1024 * 1024 * 32 1663 1663 dc->caps.subvp_fw_processing_delay_us = 15; 1664 1664 dc->caps.subvp_prefetch_end_to_mall_start_us = 15; 1665 + dc->caps.subvp_swath_height_margin_lines = 16; 1665 1666 dc->caps.subvp_pstate_allow_width_us = 20; 1666 - 1667 + dc->caps.subvp_vertical_int_margin_us = 30; 1667 1668 dc->caps.max_slave_planes = 1; 1668 1669 dc->caps.max_slave_yuv_planes = 1; 1669 1670 dc->caps.max_slave_rgb_planes = 1;
+2 -2
drivers/gpu/drm/amd/display/dc/dml/Makefile
··· 61 61 CFLAGS_$(AMDDALPATH)/dc/dml/dcn10/dcn10_fpu.o := $(dml_ccflags) 62 62 CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/dcn20_fpu.o := $(dml_ccflags) 63 63 CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags) 64 - CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags) 65 64 CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_ccflags) 66 65 CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags) 67 66 CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_ccflags) ··· 70 71 CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags) 71 72 CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_ccflags) $(frame_warn_flag) 72 73 CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_ccflags) 74 + CFLAGS_$(AMDDALPATH)/dc/dml/dcn314/dcn314_fpu.o := $(dml_ccflags) 73 75 CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/dcn30_fpu.o := $(dml_ccflags) 74 76 CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/dcn32_fpu.o := $(dml_ccflags) 75 77 CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_ccflags) $(frame_warn_flag) ··· 82 82 CFLAGS_$(AMDDALPATH)/dc/dml/dcn302/dcn302_fpu.o := $(dml_ccflags) 83 83 CFLAGS_$(AMDDALPATH)/dc/dml/dcn303/dcn303_fpu.o := $(dml_ccflags) 84 84 CFLAGS_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := $(dml_ccflags) 85 - CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags) 86 85 CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calcs.o := $(dml_ccflags) 87 86 CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_auto.o := $(dml_ccflags) 88 87 CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_ccflags) -Wno-tautological-compare ··· 130 131 DML += dcn301/dcn301_fpu.o 131 132 DML += dcn302/dcn302_fpu.o 132 133 DML += dcn303/dcn303_fpu.o 134 + DML += dcn314/dcn314_fpu.o 133 135 DML += dsc/rc_calc_fpu.o 134 136 DML += calcs/dcn_calcs.o calcs/dcn_calc_math.o calcs/dcn_calc_auto.o 135 137 endif
+1
drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
··· 30 30 #include "dchubbub.h" 31 31 #include "dcn20/dcn20_resource.h" 32 32 #include "dcn21/dcn21_resource.h" 33 + #include "clk_mgr/dcn21/rn_clk_mgr.h" 33 34 34 35 #include "dcn20_fpu.h" 35 36
+1
drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
··· 26 26 #include "clk_mgr.h" 27 27 #include "dcn20/dcn20_resource.h" 28 28 #include "dcn301/dcn301_resource.h" 29 + #include "clk_mgr/dcn301/vg_clk_mgr.h" 29 30 30 31 #include "dml/dcn20/dcn20_fpu.h" 31 32 #include "dcn301_fpu.h"
+6 -3
drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
··· 25 25 26 26 #include "resource.h" 27 27 #include "clk_mgr.h" 28 + #include "dcn31/dcn31_resource.h" 29 + #include "dcn315/dcn315_resource.h" 30 + #include "dcn316/dcn316_resource.h" 28 31 29 32 #include "dml/dcn20/dcn20_fpu.h" 30 33 #include "dcn31_fpu.h" ··· 117 114 .dcc_supported = true, 118 115 }; 119 116 120 - struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc = { 117 + static struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc = { 121 118 /*TODO: correct dispclk/dppclk voltage level determination*/ 122 119 .clock_limits = { 123 120 { ··· 262 259 .dcc_supported = true, 263 260 }; 264 261 265 - struct _vcs_dpi_soc_bounding_box_st dcn3_15_soc = { 262 + static struct _vcs_dpi_soc_bounding_box_st dcn3_15_soc = { 266 263 .sr_exit_time_us = 9.0, 267 264 .sr_enter_plus_exit_time_us = 11.0, 268 265 .sr_exit_z8_time_us = 50.0, ··· 358 355 .dcc_supported = true, 359 356 }; 360 357 361 - struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = { 358 + static struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = { 362 359 /*TODO: correct dispclk/dppclk voltage level determination*/ 363 360 .clock_limits = { 364 361 {
+1 -1
drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
··· 26 26 #include "dc.h" 27 27 #include "dc_link.h" 28 28 #include "../display_mode_lib.h" 29 - #include "dml/dcn30/display_mode_vba_30.h" 29 + #include "../dcn30/display_mode_vba_30.h" 30 30 #include "display_mode_vba_31.h" 31 31 #include "../dml_inline_defs.h" 32 32
+1 -1
drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c
··· 27 27 #include "../display_mode_vba.h" 28 28 #include "../dml_inline_defs.h" 29 29 #include "display_rq_dlg_calc_31.h" 30 - #include "dml/dcn30/display_mode_vba_30.h" 30 + #include "../dcn30/display_mode_vba_30.h" 31 31 32 32 static bool is_dual_plane(enum source_format_class source_format) 33 33 {
+376
drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c
··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright 2022 Advanced Micro Devices, Inc. 4 + * 5 + * Permission is hereby granted, free of charge, to any person obtaining a 6 + * copy of this software and associated documentation files (the "Software"), 7 + * to deal in the Software without restriction, including without limitation 8 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 + * and/or sell copies of the Software, and to permit persons to whom the 10 + * Software is furnished to do so, subject to the following conditions: 11 + * 12 + * The above copyright notice and this permission notice shall be included in 13 + * all copies or substantial portions of the Software. 14 + * 15 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 + * OTHER DEALINGS IN THE SOFTWARE. 22 + * 23 + * Authors: AMD 24 + * 25 + */ 26 + 27 + #include "clk_mgr.h" 28 + #include "resource.h" 29 + #include "dcn31/dcn31_hubbub.h" 30 + #include "dcn314_fpu.h" 31 + #include "dml/dcn20/dcn20_fpu.h" 32 + #include "dml/display_mode_vba.h" 33 + 34 + struct _vcs_dpi_ip_params_st dcn3_14_ip = { 35 + .VBlankNomDefaultUS = 668, 36 + .gpuvm_enable = 1, 37 + .gpuvm_max_page_table_levels = 1, 38 + .hostvm_enable = 1, 39 + .hostvm_max_page_table_levels = 2, 40 + .rob_buffer_size_kbytes = 64, 41 + .det_buffer_size_kbytes = DCN3_14_DEFAULT_DET_SIZE, 42 + .config_return_buffer_size_in_kbytes = 1792, 43 + .compressed_buffer_segment_size_in_kbytes = 64, 44 + .meta_fifo_size_in_kentries = 32, 45 + .zero_size_buffer_entries = 512, 46 + .compbuf_reserved_space_64b = 256, 47 + .compbuf_reserved_space_zs = 64, 48 + .dpp_output_buffer_pixels = 2560, 49 + .opp_output_buffer_lines = 1, 50 + .pixel_chunk_size_kbytes = 8, 51 + .meta_chunk_size_kbytes = 2, 52 + .min_meta_chunk_size_bytes = 256, 53 + .writeback_chunk_size_kbytes = 8, 54 + .ptoi_supported = false, 55 + .num_dsc = 4, 56 + .maximum_dsc_bits_per_component = 10, 57 + .dsc422_native_support = false, 58 + .is_line_buffer_bpp_fixed = true, 59 + .line_buffer_fixed_bpp = 48, 60 + .line_buffer_size_bits = 789504, 61 + .max_line_buffer_lines = 12, 62 + .writeback_interface_buffer_size_kbytes = 90, 63 + .max_num_dpp = 4, 64 + .max_num_otg = 4, 65 + .max_num_hdmi_frl_outputs = 1, 66 + .max_num_wb = 1, 67 + .max_dchub_pscl_bw_pix_per_clk = 4, 68 + .max_pscl_lb_bw_pix_per_clk = 2, 69 + .max_lb_vscl_bw_pix_per_clk = 4, 70 + .max_vscl_hscl_bw_pix_per_clk = 4, 71 + .max_hscl_ratio = 6, 72 + .max_vscl_ratio = 6, 73 + .max_hscl_taps = 8, 74 + .max_vscl_taps = 8, 75 + .dpte_buffer_size_in_pte_reqs_luma = 64, 76 + .dpte_buffer_size_in_pte_reqs_chroma = 34, 77 + .dispclk_ramp_margin_percent = 1, 78 + .max_inter_dcn_tile_repeaters = 8, 79 + .cursor_buffer_size = 16, 80 + .cursor_chunk_size = 2, 81 + .writeback_line_buffer_buffer_size = 0, 82 + .writeback_min_hscl_ratio = 1, 83 + .writeback_min_vscl_ratio = 1, 84 + .writeback_max_hscl_ratio = 1, 85 + .writeback_max_vscl_ratio = 1, 86 + .writeback_max_hscl_taps = 1, 87 + .writeback_max_vscl_taps = 1, 88 + .dppclk_delay_subtotal = 46, 89 + .dppclk_delay_scl = 50, 90 + .dppclk_delay_scl_lb_only = 16, 91 + .dppclk_delay_cnvc_formatter = 27, 92 + .dppclk_delay_cnvc_cursor = 6, 93 + .dispclk_delay_subtotal = 119, 94 + .dynamic_metadata_vm_enabled = false, 95 + .odm_combine_4to1_supported = false, 96 + .dcc_supported = true, 97 + }; 98 + 99 + struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc = { 100 + /*TODO: correct dispclk/dppclk voltage level determination*/ 101 + .clock_limits = { 102 + { 103 + .state = 0, 104 + .dispclk_mhz = 1200.0, 105 + .dppclk_mhz = 1200.0, 106 + .phyclk_mhz = 600.0, 107 + .phyclk_d18_mhz = 667.0, 108 + .dscclk_mhz = 186.0, 109 + .dtbclk_mhz = 600.0, 110 + }, 111 + { 112 + .state = 1, 113 + .dispclk_mhz = 1200.0, 114 + .dppclk_mhz = 1200.0, 115 + .phyclk_mhz = 810.0, 116 + .phyclk_d18_mhz = 667.0, 117 + .dscclk_mhz = 209.0, 118 + .dtbclk_mhz = 600.0, 119 + }, 120 + { 121 + .state = 2, 122 + .dispclk_mhz = 1200.0, 123 + .dppclk_mhz = 1200.0, 124 + .phyclk_mhz = 810.0, 125 + .phyclk_d18_mhz = 667.0, 126 + .dscclk_mhz = 209.0, 127 + .dtbclk_mhz = 600.0, 128 + }, 129 + { 130 + .state = 3, 131 + .dispclk_mhz = 1200.0, 132 + .dppclk_mhz = 1200.0, 133 + .phyclk_mhz = 810.0, 134 + .phyclk_d18_mhz = 667.0, 135 + .dscclk_mhz = 371.0, 136 + .dtbclk_mhz = 600.0, 137 + }, 138 + { 139 + .state = 4, 140 + .dispclk_mhz = 1200.0, 141 + .dppclk_mhz = 1200.0, 142 + .phyclk_mhz = 810.0, 143 + .phyclk_d18_mhz = 667.0, 144 + .dscclk_mhz = 417.0, 145 + .dtbclk_mhz = 600.0, 146 + }, 147 + }, 148 + .num_states = 5, 149 + .sr_exit_time_us = 9.0, 150 + .sr_enter_plus_exit_time_us = 11.0, 151 + .sr_exit_z8_time_us = 442.0, 152 + .sr_enter_plus_exit_z8_time_us = 560.0, 153 + .writeback_latency_us = 12.0, 154 + .dram_channel_width_bytes = 4, 155 + .round_trip_ping_latency_dcfclk_cycles = 106, 156 + .urgent_latency_pixel_data_only_us = 4.0, 157 + .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, 158 + .urgent_latency_vm_data_only_us = 4.0, 159 + .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, 160 + .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, 161 + .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, 162 + .pct_ideal_sdp_bw_after_urgent = 80.0, 163 + .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0, 164 + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, 165 + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, 166 + .max_avg_sdp_bw_use_normal_percent = 60.0, 167 + .max_avg_dram_bw_use_normal_percent = 60.0, 168 + .fabric_datapath_to_dcn_data_return_bytes = 32, 169 + .return_bus_width_bytes = 64, 170 + .downspread_percent = 0.38, 171 + .dcn_downspread_percent = 0.5, 172 + .gpuvm_min_page_size_bytes = 4096, 173 + .hostvm_min_page_size_bytes = 4096, 174 + .do_urgent_latency_adjustment = false, 175 + .urgent_latency_adjustment_fabric_clock_component_us = 0, 176 + .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, 177 + }; 178 + 179 + 180 + void dcn314_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params) 181 + { 182 + struct clk_limit_table *clk_table = &bw_params->clk_table; 183 + struct _vcs_dpi_voltage_scaling_st *clock_limits = 184 + dcn3_14_soc.clock_limits; 185 + unsigned int i, closest_clk_lvl; 186 + int max_dispclk_mhz = 0, max_dppclk_mhz = 0; 187 + int j; 188 + 189 + dc_assert_fp_enabled(); 190 + 191 + // Default clock levels are used for diags, which may lead to overclocking. 192 + if (!IS_DIAG_DC(dc->ctx->dce_environment) && dc->config.use_default_clock_table == false) { 193 + 194 + dcn3_14_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator; 195 + dcn3_14_ip.max_num_dpp = dc->res_pool->pipe_count; 196 + 197 + if (bw_params->num_channels > 0) 198 + dcn3_14_soc.num_chans = bw_params->num_channels; 199 + 200 + ASSERT(dcn3_14_soc.num_chans); 201 + ASSERT(clk_table->num_entries); 202 + 203 + /* Prepass to find max clocks independent of voltage level. */ 204 + for (i = 0; i < clk_table->num_entries; ++i) { 205 + if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz) 206 + max_dispclk_mhz = clk_table->entries[i].dispclk_mhz; 207 + if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz) 208 + max_dppclk_mhz = clk_table->entries[i].dppclk_mhz; 209 + } 210 + 211 + for (i = 0; i < clk_table->num_entries; i++) { 212 + /* loop backwards*/ 213 + for (closest_clk_lvl = 0, j = dcn3_14_soc.num_states - 1; j >= 0; j--) { 214 + if ((unsigned int) dcn3_14_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) { 215 + closest_clk_lvl = j; 216 + break; 217 + } 218 + } 219 + if (clk_table->num_entries == 1) { 220 + /*smu gives one DPM level, let's take the highest one*/ 221 + closest_clk_lvl = dcn3_14_soc.num_states - 1; 222 + } 223 + 224 + clock_limits[i].state = i; 225 + 226 + /* Clocks dependent on voltage level. */ 227 + clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; 228 + if (clk_table->num_entries == 1 && 229 + clock_limits[i].dcfclk_mhz < dcn3_14_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) { 230 + /*SMU fix not released yet*/ 231 + clock_limits[i].dcfclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dcfclk_mhz; 232 + } 233 + clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; 234 + clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz; 235 + 236 + if (clk_table->entries[i].memclk_mhz && clk_table->entries[i].wck_ratio) 237 + clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio; 238 + 239 + /* Clocks independent of voltage level. */ 240 + clock_limits[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz : 241 + dcn3_14_soc.clock_limits[closest_clk_lvl].dispclk_mhz; 242 + 243 + clock_limits[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz : 244 + dcn3_14_soc.clock_limits[closest_clk_lvl].dppclk_mhz; 245 + 246 + clock_limits[i].dram_bw_per_chan_gbps = dcn3_14_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; 247 + clock_limits[i].dscclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dscclk_mhz; 248 + clock_limits[i].dtbclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; 249 + clock_limits[i].phyclk_d18_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; 250 + clock_limits[i].phyclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].phyclk_mhz; 251 + } 252 + for (i = 0; i < clk_table->num_entries; i++) 253 + dcn3_14_soc.clock_limits[i] = clock_limits[i]; 254 + if (clk_table->num_entries) { 255 + dcn3_14_soc.num_states = clk_table->num_entries; 256 + } 257 + } 258 + 259 + if (max_dispclk_mhz) { 260 + dcn3_14_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; 261 + dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; 262 + } 263 + 264 + if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) 265 + dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN31); 266 + else 267 + dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN31_FPGA); 268 + } 269 + 270 + static bool is_dual_plane(enum surface_pixel_format format) 271 + { 272 + return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA; 273 + } 274 + 275 + int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *context, 276 + display_e2e_pipe_params_st *pipes, 277 + bool fast_validate) 278 + { 279 + int i, pipe_cnt; 280 + struct resource_context *res_ctx = &context->res_ctx; 281 + struct pipe_ctx *pipe; 282 + bool upscaled = false; 283 + 284 + dc_assert_fp_enabled(); 285 + 286 + dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); 287 + 288 + for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { 289 + struct dc_crtc_timing *timing; 290 + 291 + if (!res_ctx->pipe_ctx[i].stream) 292 + continue; 293 + pipe = &res_ctx->pipe_ctx[i]; 294 + timing = &pipe->stream->timing; 295 + 296 + if (dc_extended_blank_supported(dc) && pipe->stream->adjust.v_total_max == pipe->stream->adjust.v_total_min 297 + && pipe->stream->adjust.v_total_min > timing->v_total) 298 + pipes[pipe_cnt].pipe.dest.vtotal = pipe->stream->adjust.v_total_min; 299 + 300 + if (pipe->plane_state && 301 + (pipe->plane_state->src_rect.height < pipe->plane_state->dst_rect.height || 302 + pipe->plane_state->src_rect.width < pipe->plane_state->dst_rect.width)) 303 + upscaled = true; 304 + 305 + /* 306 + * Immediate flip can be set dynamically after enabling the plane. 307 + * We need to require support for immediate flip or underflow can be 308 + * intermittently experienced depending on peak b/w requirements. 309 + */ 310 + pipes[pipe_cnt].pipe.src.immediate_flip = true; 311 + 312 + pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; 313 + pipes[pipe_cnt].pipe.src.hostvm = dc->res_pool->hubbub->riommu_active; 314 + pipes[pipe_cnt].pipe.src.gpuvm = true; 315 + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; 316 + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; 317 + pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; 318 + pipes[pipe_cnt].pipe.src.dcc_rate = 3; 319 + pipes[pipe_cnt].dout.dsc_input_bpc = 0; 320 + 321 + if (pipes[pipe_cnt].dout.dsc_enable) { 322 + switch (timing->display_color_depth) { 323 + case COLOR_DEPTH_888: 324 + pipes[pipe_cnt].dout.dsc_input_bpc = 8; 325 + break; 326 + case COLOR_DEPTH_101010: 327 + pipes[pipe_cnt].dout.dsc_input_bpc = 10; 328 + break; 329 + case COLOR_DEPTH_121212: 330 + pipes[pipe_cnt].dout.dsc_input_bpc = 12; 331 + break; 332 + default: 333 + ASSERT(0); 334 + break; 335 + } 336 + } 337 + 338 + pipe_cnt++; 339 + } 340 + context->bw_ctx.dml.ip.det_buffer_size_kbytes = DCN3_14_DEFAULT_DET_SIZE; 341 + 342 + dc->config.enable_4to1MPC = false; 343 + if (pipe_cnt == 1 && pipe->plane_state && !dc->debug.disable_z9_mpc) { 344 + if (is_dual_plane(pipe->plane_state->format) 345 + && pipe->plane_state->src_rect.width <= 1920 && pipe->plane_state->src_rect.height <= 1080) { 346 + dc->config.enable_4to1MPC = true; 347 + } else if (!is_dual_plane(pipe->plane_state->format) && pipe->plane_state->src_rect.width <= 5120) { 348 + /* Limit to 5k max to avoid forced pipe split when there is not enough detile for swath */ 349 + context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192; 350 + pipes[0].pipe.src.unbounded_req_mode = true; 351 + } 352 + } else if (context->stream_count >= dc->debug.crb_alloc_policy_min_disp_count 353 + && dc->debug.crb_alloc_policy > DET_SIZE_DEFAULT) { 354 + context->bw_ctx.dml.ip.det_buffer_size_kbytes = dc->debug.crb_alloc_policy * 64; 355 + } else if (context->stream_count >= 3 && upscaled) { 356 + context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192; 357 + } 358 + 359 + for (i = 0; i < dc->res_pool->pipe_count; i++) { 360 + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; 361 + 362 + if (!pipe->stream) 363 + continue; 364 + 365 + if (pipe->stream->signal == SIGNAL_TYPE_EDP && dc->debug.seamless_boot_odm_combine && 366 + pipe->stream->apply_seamless_boot_optimization) { 367 + 368 + if (pipe->stream->apply_boot_odm_mode == dm_odm_combine_policy_2to1) { 369 + context->bw_ctx.dml.vba.ODMCombinePolicy = dm_odm_combine_policy_2to1; 370 + break; 371 + } 372 + } 373 + } 374 + 375 + return pipe_cnt; 376 + }
+40
drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h
··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright 2022 Advanced Micro Devices, Inc. 4 + * 5 + * Permission is hereby granted, free of charge, to any person obtaining a 6 + * copy of this software and associated documentation files (the "Software"), 7 + * to deal in the Software without restriction, including without limitation 8 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 + * and/or sell copies of the Software, and to permit persons to whom the 10 + * Software is furnished to do so, subject to the following conditions: 11 + * 12 + * The above copyright notice and this permission notice shall be included in 13 + * all copies or substantial portions of the Software. 14 + * 15 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 + * OTHER DEALINGS IN THE SOFTWARE. 22 + * 23 + * Authors: AMD 24 + * 25 + */ 26 + 27 + #ifndef __DCN314_FPU_H__ 28 + #define __DCN314_FPU_H__ 29 + 30 + #define DCN3_14_DEFAULT_DET_SIZE 384 31 + #define DCN3_14_MAX_DET_SIZE 384 32 + #define DCN3_14_MIN_COMPBUF_SIZE_KB 128 33 + #define DCN3_14_CRB_SEGMENT_SIZE_KB 64 34 + 35 + void dcn314_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params); 36 + int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *context, 37 + display_e2e_pipe_params_st *pipes, 38 + bool fast_validate); 39 + 40 + #endif
+36 -11
drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
··· 473 473 474 474 // DML calculation for MALL region doesn't take into account FW delay 475 475 // and required pstate allow width for multi-display cases 476 + /* Add 16 lines margin to the MALL REGION because SUB_VP_START_LINE must be aligned 477 + * to 2 swaths (i.e. 16 lines) 478 + */ 476 479 phantom_vactive = get_subviewport_lines_needed_in_mall(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx) + 477 - pstate_width_fw_delay_lines; 480 + pstate_width_fw_delay_lines + dc->caps.subvp_swath_height_margin_lines; 478 481 479 482 // For backporch of phantom pipe, use vstartup of the main pipe 480 483 phantom_bp = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); ··· 493 490 phantom_stream->timing.v_front_porch + 494 491 phantom_stream->timing.v_sync_width + 495 492 phantom_bp; 493 + phantom_stream->timing.flags.DSC = 0; // Don't need DSC for phantom timing 496 494 } 497 495 498 496 /** ··· 987 983 * DML favors voltage over p-state, but we're more interested in 988 984 * supporting p-state over voltage. We can't support p-state in 989 985 * prefetch mode > 0 so try capping the prefetch mode to start. 986 + * Override present for testing. 990 987 */ 991 - context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = 988 + if (dc->debug.dml_disallow_alternate_prefetch_modes) 989 + context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = 992 990 dm_prefetch_support_uclk_fclk_and_stutter; 991 + else 992 + context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = 993 + dm_prefetch_support_uclk_fclk_and_stutter_if_possible; 994 + 993 995 *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt); 994 996 /* This may adjust vlevel and maxMpcComb */ 995 997 if (*vlevel < context->bw_ctx.dml.soc.num_states) ··· 1024 1014 * will not allow for switch in VBLANK. The DRR display must have it's VBLANK stretched 1025 1015 * enough to support MCLK switching. 1026 1016 */ 1027 - if (*vlevel == context->bw_ctx.dml.soc.num_states) { 1017 + if (*vlevel == context->bw_ctx.dml.soc.num_states && 1018 + context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final == 1019 + dm_prefetch_support_uclk_fclk_and_stutter) { 1028 1020 context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = 1029 1021 dm_prefetch_support_stutter; 1030 1022 /* There are params (such as FabricClock) that need to be recalculated ··· 1356 1344 int split[MAX_PIPES] = { 0 }; 1357 1345 bool merge[MAX_PIPES] = { false }; 1358 1346 bool newly_split[MAX_PIPES] = { false }; 1359 - int pipe_cnt, i, pipe_idx, vlevel; 1347 + int pipe_cnt, i, pipe_idx; 1348 + int vlevel = context->bw_ctx.dml.soc.num_states; 1360 1349 struct vba_vars_st *vba = &context->bw_ctx.dml.vba; 1361 1350 1362 1351 dc_assert_fp_enabled(); ··· 1386 1373 DC_FP_END(); 1387 1374 } 1388 1375 1389 - if (fast_validate || vlevel == context->bw_ctx.dml.soc.num_states || 1390 - vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) { 1376 + if (fast_validate || 1377 + (dc->debug.dml_disallow_alternate_prefetch_modes && 1378 + (vlevel == context->bw_ctx.dml.soc.num_states || 1379 + vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported))) { 1391 1380 /* 1392 - * If mode is unsupported or there's still no p-state support then 1393 - * fall back to favoring voltage. 1381 + * If dml_disallow_alternate_prefetch_modes is false, then we have already 1382 + * tried alternate prefetch modes during full validation. 1394 1383 * 1395 - * If Prefetch mode 0 failed for this config, or passed with Max UCLK, try if 1396 - * supported with Prefetch mode 1 (dm_prefetch_support_fclk_and_stutter == 2) 1384 + * If mode is unsupported or there is no p-state support, then 1385 + * fall back to favouring voltage. 1386 + * 1387 + * If Prefetch mode 0 failed for this config, or passed with Max UCLK, then try 1388 + * to support with Prefetch mode 1 (dm_prefetch_support_fclk_and_stutter == 2) 1397 1389 */ 1398 1390 context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = 1399 - dm_prefetch_support_fclk_and_stutter; 1391 + dm_prefetch_support_fclk_and_stutter; 1400 1392 1401 1393 vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); 1402 1394 ··· 2114 2096 && dc->bb_overrides.dram_clock_change_latency_ns) { 2115 2097 dcn3_2_soc.dram_clock_change_latency_us = 2116 2098 dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; 2099 + } 2100 + 2101 + if ((int)(dcn3_2_soc.fclk_change_latency_us * 1000) 2102 + != dc->bb_overrides.fclk_clock_change_latency_ns 2103 + && dc->bb_overrides.fclk_clock_change_latency_ns) { 2104 + dcn3_2_soc.fclk_change_latency_us = 2105 + dc->bb_overrides.fclk_clock_change_latency_ns / 1000; 2117 2106 } 2118 2107 2119 2108 if ((int)(dcn3_2_soc.dummy_pstate_latency_us * 1000)
+1 -11
drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
··· 221 221 // VBA_DELTA 222 222 // Calculate DET size, swath height 223 223 dml32_CalculateSwathAndDETConfiguration( 224 - &v->dummy_vars.dml32_CalculateSwathAndDETConfiguration, 225 224 mode_lib->vba.DETSizeOverride, 226 225 mode_lib->vba.UsesMALLForPStateChange, 227 226 mode_lib->vba.ConfigReturnBufferSizeInKByte, ··· 460 461 { 461 462 462 463 dml32_CalculateVMRowAndSwath( 463 - &v->dummy_vars.dml32_CalculateVMRowAndSwath, 464 464 mode_lib->vba.NumberOfActiveSurfaces, 465 465 v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters, 466 466 v->SurfaceSizeInMALL, ··· 755 757 v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelY = v->BytePerPixelY[k]; 756 758 v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelC = v->BytePerPixelC[k]; 757 759 v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ProgressiveToInterlaceUnitInOPP = mode_lib->vba.ProgressiveToInterlaceUnitInOPP; 758 - v->ErrorResult[k] = dml32_CalculatePrefetchSchedule( 759 - &v->dummy_vars.dml32_CalculatePrefetchSchedule, 760 - v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor, 760 + v->ErrorResult[k] = dml32_CalculatePrefetchSchedule(v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor, 761 761 &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe, v->DSCDelay[k], 762 762 mode_lib->vba.DPPCLKDelaySubtotal + mode_lib->vba.DPPCLKDelayCNVCFormater, 763 763 mode_lib->vba.DPPCLKDelaySCL, ··· 1163 1167 v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SMNLatency = mode_lib->vba.SMNLatency; 1164 1168 1165 1169 dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( 1166 - &v->dummy_vars.dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport, 1167 1170 mode_lib->vba.USRRetrainingRequiredFinal, 1168 1171 mode_lib->vba.UsesMALLForPStateChange, 1169 1172 mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], ··· 1947 1952 } 1948 1953 1949 1954 dml32_CalculateSwathAndDETConfiguration( 1950 - &v->dummy_vars.dml32_CalculateSwathAndDETConfiguration, 1951 1955 mode_lib->vba.DETSizeOverride, 1952 1956 mode_lib->vba.UsesMALLForPStateChange, 1953 1957 mode_lib->vba.ConfigReturnBufferSizeInKByte, ··· 2543 2549 } 2544 2550 2545 2551 dml32_CalculateSwathAndDETConfiguration( 2546 - &v->dummy_vars.dml32_CalculateSwathAndDETConfiguration, 2547 2552 mode_lib->vba.DETSizeOverride, 2548 2553 mode_lib->vba.UsesMALLForPStateChange, 2549 2554 mode_lib->vba.ConfigReturnBufferSizeInKByte, ··· 2742 2749 2743 2750 { 2744 2751 dml32_CalculateVMRowAndSwath( 2745 - &v->dummy_vars.dml32_CalculateVMRowAndSwath, 2746 2752 mode_lib->vba.NumberOfActiveSurfaces, 2747 2753 v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters, 2748 2754 mode_lib->vba.SurfaceSizeInMALL, ··· 3258 3266 3259 3267 mode_lib->vba.NoTimeForPrefetch[i][j][k] = 3260 3268 dml32_CalculatePrefetchSchedule( 3261 - &v->dummy_vars.dml32_CalculatePrefetchSchedule, 3262 3269 v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor, 3263 3270 &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe, 3264 3271 mode_lib->vba.DSCDelayPerState[i][k], ··· 3557 3566 3558 3567 { 3559 3568 dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( 3560 - &v->dummy_vars.dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport, 3561 3569 mode_lib->vba.USRRetrainingRequiredFinal, 3562 3570 mode_lib->vba.UsesMALLForPStateChange, 3563 3571 mode_lib->vba.PrefetchModePerState[i][j],
+439 -371
drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
··· 391 391 } // CalculateBytePerPixelAndBlockSizes 392 392 393 393 void dml32_CalculateSwathAndDETConfiguration( 394 - struct dml32_CalculateSwathAndDETConfiguration *st_vars, 395 394 unsigned int DETSizeOverride[], 396 395 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 397 396 unsigned int ConfigReturnBufferSizeInKByte, ··· 455 456 bool ViewportSizeSupportPerSurface[], 456 457 bool *ViewportSizeSupport) 457 458 { 459 + unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX]; 460 + unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX]; 461 + unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX]; 462 + unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX]; 463 + unsigned int RoundedUpSwathSizeBytesY; 464 + unsigned int RoundedUpSwathSizeBytesC; 465 + double SwathWidthdoubleDPP[DC__NUM_DPP__MAX]; 466 + double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX]; 458 467 unsigned int k; 459 - 460 - st_vars->TotalActiveDPP = 0; 461 - st_vars->NoChromaSurfaces = true; 468 + unsigned int TotalActiveDPP = 0; 469 + bool NoChromaSurfaces = true; 470 + unsigned int DETBufferSizeInKByteForSwathCalculation; 462 471 463 472 #ifdef __DML_VBA_DEBUG__ 464 473 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); ··· 501 494 DPPPerSurface, 502 495 503 496 /* Output */ 504 - st_vars->SwathWidthdoubleDPP, 505 - st_vars->SwathWidthdoubleDPPChroma, 497 + SwathWidthdoubleDPP, 498 + SwathWidthdoubleDPPChroma, 506 499 SwathWidth, 507 500 SwathWidthChroma, 508 - st_vars->MaximumSwathHeightY, 509 - st_vars->MaximumSwathHeightC, 501 + MaximumSwathHeightY, 502 + MaximumSwathHeightC, 510 503 swath_width_luma_ub, 511 504 swath_width_chroma_ub); 512 505 513 506 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 514 - st_vars->RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * st_vars->MaximumSwathHeightY[k]; 515 - st_vars->RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * st_vars->MaximumSwathHeightC[k]; 507 + RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k]; 508 + RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k]; 516 509 #ifdef __DML_VBA_DEBUG__ 517 510 dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]); 518 511 dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]); 519 512 dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]); 520 - dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, st_vars->MaximumSwathHeightY[k]); 513 + dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]); 521 514 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k, 522 - st_vars->RoundedUpMaxSwathSizeBytesY[k]); 515 + RoundedUpMaxSwathSizeBytesY[k]); 523 516 dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]); 524 517 dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]); 525 - dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, st_vars->MaximumSwathHeightC[k]); 518 + dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]); 526 519 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k, 527 - st_vars->RoundedUpMaxSwathSizeBytesC[k]); 520 + RoundedUpMaxSwathSizeBytesC[k]); 528 521 #endif 529 522 530 523 if (SourcePixelFormat[k] == dm_420_10) { 531 - st_vars->RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) st_vars->RoundedUpMaxSwathSizeBytesY[k], 256); 532 - st_vars->RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) st_vars->RoundedUpMaxSwathSizeBytesC[k], 256); 524 + RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256); 525 + RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256); 533 526 } 534 527 } 535 528 536 529 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 537 - st_vars->TotalActiveDPP = st_vars->TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]); 530 + TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]); 538 531 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || 539 532 SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) { 540 - st_vars->NoChromaSurfaces = false; 533 + NoChromaSurfaces = false; 541 534 } 542 535 } 543 536 ··· 547 540 // if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data 548 541 // - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio] 549 542 // - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req 550 - *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (st_vars->RoundedUpMaxSwathSizeBytesY[0]/512); 543 + *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512); 551 544 552 545 if (*CompBufReservedSpaceNeedAdjustment == 1) { 553 - *CompBufReservedSpaceKBytes = ROBSizeKBytes - st_vars->RoundedUpMaxSwathSizeBytesY[0]/512; 546 + *CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512; 554 547 } 555 548 556 549 #ifdef __DML_VBA_DEBUG__ ··· 558 551 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, *CompBufReservedSpaceNeedAdjustment); 559 552 #endif 560 553 561 - *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, st_vars->TotalActiveDPP, st_vars->NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); 554 + *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); 562 555 563 556 dml32_CalculateDETBufferSize(DETSizeOverride, 564 557 UseMALLForPStateChange, ··· 573 566 SourcePixelFormat, 574 567 ReadBandwidthLuma, 575 568 ReadBandwidthChroma, 576 - st_vars->RoundedUpMaxSwathSizeBytesY, 577 - st_vars->RoundedUpMaxSwathSizeBytesC, 569 + RoundedUpMaxSwathSizeBytesY, 570 + RoundedUpMaxSwathSizeBytesC, 578 571 DPPPerSurface, 579 572 580 573 /* Output */ ··· 582 575 CompressedBufferSizeInkByte); 583 576 584 577 #ifdef __DML_VBA_DEBUG__ 585 - dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, st_vars->TotalActiveDPP); 578 + dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP); 586 579 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte); 587 580 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); 588 581 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal); ··· 593 586 *ViewportSizeSupport = true; 594 587 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 595 588 596 - st_vars->DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] == 589 + DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] == 597 590 dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]); 598 591 #ifdef __DML_VBA_DEBUG__ 599 592 dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k, 600 - st_vars->DETBufferSizeInKByteForSwathCalculation); 593 + DETBufferSizeInKByteForSwathCalculation); 601 594 #endif 602 595 603 - if (st_vars->RoundedUpMaxSwathSizeBytesY[k] + st_vars->RoundedUpMaxSwathSizeBytesC[k] <= 604 - st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { 605 - SwathHeightY[k] = st_vars->MaximumSwathHeightY[k]; 606 - SwathHeightC[k] = st_vars->MaximumSwathHeightC[k]; 607 - st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k]; 608 - st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k]; 609 - } else if (st_vars->RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * st_vars->RoundedUpMaxSwathSizeBytesC[k] && 610 - st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2 + st_vars->RoundedUpMaxSwathSizeBytesC[k] <= 611 - st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { 612 - SwathHeightY[k] = st_vars->MaximumSwathHeightY[k] / 2; 613 - SwathHeightC[k] = st_vars->MaximumSwathHeightC[k]; 614 - st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2; 615 - st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k]; 616 - } else if (st_vars->RoundedUpMaxSwathSizeBytesY[k] < 1.5 * st_vars->RoundedUpMaxSwathSizeBytesC[k] && 617 - st_vars->RoundedUpMaxSwathSizeBytesY[k] + st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2 <= 618 - st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { 619 - SwathHeightY[k] = st_vars->MaximumSwathHeightY[k]; 620 - SwathHeightC[k] = st_vars->MaximumSwathHeightC[k] / 2; 621 - st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k]; 622 - st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2; 596 + if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <= 597 + DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { 598 + SwathHeightY[k] = MaximumSwathHeightY[k]; 599 + SwathHeightC[k] = MaximumSwathHeightC[k]; 600 + RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k]; 601 + RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k]; 602 + } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] && 603 + RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <= 604 + DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { 605 + SwathHeightY[k] = MaximumSwathHeightY[k] / 2; 606 + SwathHeightC[k] = MaximumSwathHeightC[k]; 607 + RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2; 608 + RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k]; 609 + } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] && 610 + RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <= 611 + DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { 612 + SwathHeightY[k] = MaximumSwathHeightY[k]; 613 + SwathHeightC[k] = MaximumSwathHeightC[k] / 2; 614 + RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k]; 615 + RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2; 623 616 } else { 624 - SwathHeightY[k] = st_vars->MaximumSwathHeightY[k] / 2; 625 - SwathHeightC[k] = st_vars->MaximumSwathHeightC[k] / 2; 626 - st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2; 627 - st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2; 617 + SwathHeightY[k] = MaximumSwathHeightY[k] / 2; 618 + SwathHeightC[k] = MaximumSwathHeightC[k] / 2; 619 + RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2; 620 + RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2; 628 621 } 629 622 630 - if ((st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2 + st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2 > 631 - st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) 623 + if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 > 624 + DETBufferSizeInKByteForSwathCalculation * 1024 / 2) 632 625 || SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 && 633 626 SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) { 634 627 *ViewportSizeSupport = false; ··· 643 636 #endif 644 637 DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024; 645 638 DETBufferSizeC[k] = 0; 646 - } else if (st_vars->RoundedUpSwathSizeBytesY <= 1.5 * st_vars->RoundedUpSwathSizeBytesC) { 639 + } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) { 647 640 #ifdef __DML_VBA_DEBUG__ 648 641 dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k); 649 642 #endif ··· 661 654 dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]); 662 655 dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]); 663 656 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, 664 - k, st_vars->RoundedUpMaxSwathSizeBytesY[k]); 657 + k, RoundedUpMaxSwathSizeBytesY[k]); 665 658 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, 666 - k, st_vars->RoundedUpMaxSwathSizeBytesC[k]); 667 - dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, st_vars->RoundedUpSwathSizeBytesY); 668 - dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, st_vars->RoundedUpSwathSizeBytesC); 659 + k, RoundedUpMaxSwathSizeBytesC[k]); 660 + dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY); 661 + dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC); 669 662 dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]); 670 663 dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); 671 664 dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]); ··· 1874 1867 } // CalculateSurfaceSizeInMall 1875 1868 1876 1869 void dml32_CalculateVMRowAndSwath( 1877 - struct dml32_CalculateVMRowAndSwath *st_vars, 1878 1870 unsigned int NumberOfActiveSurfaces, 1879 1871 DmlPipe myPipe[], 1880 1872 unsigned int SurfaceSizeInMALL[], ··· 1939 1933 unsigned int BIGK_FRAGMENT_SIZE[]) 1940 1934 { 1941 1935 unsigned int k; 1936 + unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX]; 1937 + unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX]; 1938 + unsigned int PDEAndMetaPTEBytesFrameY; 1939 + unsigned int PDEAndMetaPTEBytesFrameC; 1940 + unsigned int MetaRowByteY[DC__NUM_DPP__MAX]; 1941 + unsigned int MetaRowByteC[DC__NUM_DPP__MAX]; 1942 + unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX]; 1943 + unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX]; 1944 + unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX]; 1945 + unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX]; 1946 + unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; 1947 + unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX]; 1948 + unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; 1949 + unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX]; 1950 + bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX]; 1942 1951 1943 1952 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1944 1953 if (HostVMEnable == true) { ··· 1975 1954 myPipe[k].SourcePixelFormat == dm_rgbe_alpha) { 1976 1955 if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) && 1977 1956 !IsVertical(myPipe[k].SourceRotation)) { 1978 - st_vars->PTEBufferSizeInRequestsForLuma[k] = 1957 + PTEBufferSizeInRequestsForLuma[k] = 1979 1958 (PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2; 1980 - st_vars->PTEBufferSizeInRequestsForChroma[k] = st_vars->PTEBufferSizeInRequestsForLuma[k]; 1959 + PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k]; 1981 1960 } else { 1982 - st_vars->PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma; 1983 - st_vars->PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma; 1961 + PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma; 1962 + PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma; 1984 1963 } 1985 1964 1986 - st_vars->PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes( 1965 + PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes( 1987 1966 myPipe[k].ViewportStationary, 1988 1967 myPipe[k].DCCEnable, 1989 1968 myPipe[k].DPPPerSurface, ··· 2003 1982 GPUVMMaxPageTableLevels, 2004 1983 GPUVMMinPageSizeKBytes[k], 2005 1984 HostVMMinPageSize, 2006 - st_vars->PTEBufferSizeInRequestsForChroma[k], 1985 + PTEBufferSizeInRequestsForChroma[k], 2007 1986 myPipe[k].PitchC, 2008 1987 myPipe[k].DCCMetaPitchC, 2009 1988 myPipe[k].BlockWidthC, 2010 1989 myPipe[k].BlockHeightC, 2011 1990 2012 1991 /* Output */ 2013 - &st_vars->MetaRowByteC[k], 2014 - &st_vars->PixelPTEBytesPerRowC[k], 1992 + &MetaRowByteC[k], 1993 + &PixelPTEBytesPerRowC[k], 2015 1994 &dpte_row_width_chroma_ub[k], 2016 1995 &dpte_row_height_chroma[k], 2017 1996 &dpte_row_height_linear_chroma[k], 2018 - &st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k], 2019 - &st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k], 2020 - &st_vars->dpte_row_height_chroma_one_row_per_frame[k], 1997 + &PixelPTEBytesPerRowC_one_row_per_frame[k], 1998 + &dpte_row_width_chroma_ub_one_row_per_frame[k], 1999 + &dpte_row_height_chroma_one_row_per_frame[k], 2021 2000 &meta_req_width_chroma[k], 2022 2001 &meta_req_height_chroma[k], 2023 2002 &meta_row_width_chroma[k], ··· 2045 2024 &VInitPreFillC[k], 2046 2025 &MaxNumSwathC[k]); 2047 2026 } else { 2048 - st_vars->PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma; 2049 - st_vars->PTEBufferSizeInRequestsForChroma[k] = 0; 2050 - st_vars->PixelPTEBytesPerRowC[k] = 0; 2051 - st_vars->PDEAndMetaPTEBytesFrameC = 0; 2052 - st_vars->MetaRowByteC[k] = 0; 2027 + PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma; 2028 + PTEBufferSizeInRequestsForChroma[k] = 0; 2029 + PixelPTEBytesPerRowC[k] = 0; 2030 + PDEAndMetaPTEBytesFrameC = 0; 2031 + MetaRowByteC[k] = 0; 2053 2032 MaxNumSwathC[k] = 0; 2054 2033 PrefetchSourceLinesC[k] = 0; 2055 - st_vars->dpte_row_height_chroma_one_row_per_frame[k] = 0; 2056 - st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0; 2057 - st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0; 2034 + dpte_row_height_chroma_one_row_per_frame[k] = 0; 2035 + dpte_row_width_chroma_ub_one_row_per_frame[k] = 0; 2036 + PixelPTEBytesPerRowC_one_row_per_frame[k] = 0; 2058 2037 } 2059 2038 2060 - st_vars->PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes( 2039 + PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes( 2061 2040 myPipe[k].ViewportStationary, 2062 2041 myPipe[k].DCCEnable, 2063 2042 myPipe[k].DPPPerSurface, ··· 2077 2056 GPUVMMaxPageTableLevels, 2078 2057 GPUVMMinPageSizeKBytes[k], 2079 2058 HostVMMinPageSize, 2080 - st_vars->PTEBufferSizeInRequestsForLuma[k], 2059 + PTEBufferSizeInRequestsForLuma[k], 2081 2060 myPipe[k].PitchY, 2082 2061 myPipe[k].DCCMetaPitchY, 2083 2062 myPipe[k].BlockWidthY, 2084 2063 myPipe[k].BlockHeightY, 2085 2064 2086 2065 /* Output */ 2087 - &st_vars->MetaRowByteY[k], 2088 - &st_vars->PixelPTEBytesPerRowY[k], 2066 + &MetaRowByteY[k], 2067 + &PixelPTEBytesPerRowY[k], 2089 2068 &dpte_row_width_luma_ub[k], 2090 2069 &dpte_row_height_luma[k], 2091 2070 &dpte_row_height_linear_luma[k], 2092 - &st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k], 2093 - &st_vars->dpte_row_width_luma_ub_one_row_per_frame[k], 2094 - &st_vars->dpte_row_height_luma_one_row_per_frame[k], 2071 + &PixelPTEBytesPerRowY_one_row_per_frame[k], 2072 + &dpte_row_width_luma_ub_one_row_per_frame[k], 2073 + &dpte_row_height_luma_one_row_per_frame[k], 2095 2074 &meta_req_width[k], 2096 2075 &meta_req_height[k], 2097 2076 &meta_row_width[k], ··· 2119 2098 &VInitPreFillY[k], 2120 2099 &MaxNumSwathY[k]); 2121 2100 2122 - PDEAndMetaPTEBytesFrame[k] = st_vars->PDEAndMetaPTEBytesFrameY + st_vars->PDEAndMetaPTEBytesFrameC; 2123 - MetaRowByte[k] = st_vars->MetaRowByteY[k] + st_vars->MetaRowByteC[k]; 2101 + PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC; 2102 + MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k]; 2124 2103 2125 - if (st_vars->PixelPTEBytesPerRowY[k] <= 64 * st_vars->PTEBufferSizeInRequestsForLuma[k] && 2126 - st_vars->PixelPTEBytesPerRowC[k] <= 64 * st_vars->PTEBufferSizeInRequestsForChroma[k]) { 2104 + if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] && 2105 + PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) { 2127 2106 PTEBufferSizeNotExceeded[k] = true; 2128 2107 } else { 2129 2108 PTEBufferSizeNotExceeded[k] = false; 2130 2109 } 2131 2110 2132 - st_vars->one_row_per_frame_fits_in_buffer[k] = (st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * 2133 - st_vars->PTEBufferSizeInRequestsForLuma[k] && 2134 - st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * st_vars->PTEBufferSizeInRequestsForChroma[k]); 2111 + one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * 2112 + PTEBufferSizeInRequestsForLuma[k] && 2113 + PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]); 2135 2114 } 2136 2115 2137 2116 dml32_CalculateMALLUseForStaticScreen( ··· 2139 2118 MALLAllocatedForDCN, 2140 2119 UseMALLForStaticScreen, // mode 2141 2120 SurfaceSizeInMALL, 2142 - st_vars->one_row_per_frame_fits_in_buffer, 2121 + one_row_per_frame_fits_in_buffer, 2143 2122 /* Output */ 2144 2123 UsesMALLForStaticScreen); // boolen 2145 2124 ··· 2165 2144 !(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame); 2166 2145 2167 2146 if (use_one_row_for_frame[k]) { 2168 - dpte_row_height_luma[k] = st_vars->dpte_row_height_luma_one_row_per_frame[k]; 2169 - dpte_row_width_luma_ub[k] = st_vars->dpte_row_width_luma_ub_one_row_per_frame[k]; 2170 - st_vars->PixelPTEBytesPerRowY[k] = st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k]; 2171 - dpte_row_height_chroma[k] = st_vars->dpte_row_height_chroma_one_row_per_frame[k]; 2172 - dpte_row_width_chroma_ub[k] = st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k]; 2173 - st_vars->PixelPTEBytesPerRowC[k] = st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k]; 2174 - PTEBufferSizeNotExceeded[k] = st_vars->one_row_per_frame_fits_in_buffer[k]; 2147 + dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k]; 2148 + dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k]; 2149 + PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k]; 2150 + dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k]; 2151 + dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k]; 2152 + PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k]; 2153 + PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k]; 2175 2154 } 2176 2155 2177 2156 if (MetaRowByte[k] <= DCCMetaBufferSizeBytes) ··· 2179 2158 else 2180 2159 DCCMetaBufferSizeNotExceeded[k] = false; 2181 2160 2182 - PixelPTEBytesPerRow[k] = st_vars->PixelPTEBytesPerRowY[k] + st_vars->PixelPTEBytesPerRowC[k]; 2161 + PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k]; 2183 2162 if (use_one_row_for_frame[k]) 2184 2163 PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2; 2185 2164 ··· 2190 2169 myPipe[k].VRatioChroma, 2191 2170 myPipe[k].DCCEnable, 2192 2171 myPipe[k].HTotal / myPipe[k].PixelClock, 2193 - st_vars->MetaRowByteY[k], st_vars->MetaRowByteC[k], 2172 + MetaRowByteY[k], MetaRowByteC[k], 2194 2173 meta_row_height[k], 2195 2174 meta_row_height_chroma[k], 2196 - st_vars->PixelPTEBytesPerRowY[k], 2197 - st_vars->PixelPTEBytesPerRowC[k], 2175 + PixelPTEBytesPerRowY[k], 2176 + PixelPTEBytesPerRowC[k], 2198 2177 dpte_row_height_luma[k], 2199 2178 dpte_row_height_chroma[k], 2200 2179 ··· 2210 2189 dml_print("DML::%s: k=%d, dpte_row_height_luma = %d\n", __func__, k, dpte_row_height_luma[k]); 2211 2190 dml_print("DML::%s: k=%d, dpte_row_width_luma_ub = %d\n", 2212 2191 __func__, k, dpte_row_width_luma_ub[k]); 2213 - dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, st_vars->PixelPTEBytesPerRowY[k]); 2192 + dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, PixelPTEBytesPerRowY[k]); 2214 2193 dml_print("DML::%s: k=%d, dpte_row_height_chroma = %d\n", 2215 2194 __func__, k, dpte_row_height_chroma[k]); 2216 2195 dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub = %d\n", 2217 2196 __func__, k, dpte_row_width_chroma_ub[k]); 2218 - dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, st_vars->PixelPTEBytesPerRowC[k]); 2197 + dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, PixelPTEBytesPerRowC[k]); 2219 2198 dml_print("DML::%s: k=%d, PixelPTEBytesPerRow = %d\n", __func__, k, PixelPTEBytesPerRow[k]); 2220 2199 dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded = %d\n", 2221 2200 __func__, k, PTEBufferSizeNotExceeded[k]); ··· 3363 3342 } // CalculateExtraLatency 3364 3343 3365 3344 bool dml32_CalculatePrefetchSchedule( 3366 - struct dml32_CalculatePrefetchSchedule *st_vars, 3367 3345 double HostVMInefficiencyFactor, 3368 3346 DmlPipe *myPipe, 3369 3347 unsigned int DSCDelay, ··· 3426 3406 double *VReadyOffsetPix) 3427 3407 { 3428 3408 bool MyError = false; 3429 - 3430 - st_vars->TimeForFetchingMetaPTE = 0; 3431 - st_vars->TimeForFetchingRowInVBlank = 0; 3432 - st_vars->LinesToRequestPrefetchPixelData = 0; 3433 - st_vars->max_vratio_pre = __DML_MAX_VRATIO_PRE__; 3434 - st_vars->Tsw_est1 = 0; 3435 - st_vars->Tsw_est3 = 0; 3409 + unsigned int DPPCycles, DISPCLKCycles; 3410 + double DSTTotalPixelsAfterScaler; 3411 + double LineTime; 3412 + double dst_y_prefetch_equ; 3413 + double prefetch_bw_oto; 3414 + double Tvm_oto; 3415 + double Tr0_oto; 3416 + double Tvm_oto_lines; 3417 + double Tr0_oto_lines; 3418 + double dst_y_prefetch_oto; 3419 + double TimeForFetchingMetaPTE = 0; 3420 + double TimeForFetchingRowInVBlank = 0; 3421 + double LinesToRequestPrefetchPixelData = 0; 3422 + unsigned int HostVMDynamicLevelsTrips; 3423 + double trip_to_mem; 3424 + double Tvm_trips; 3425 + double Tr0_trips; 3426 + double Tvm_trips_rounded; 3427 + double Tr0_trips_rounded; 3428 + double Lsw_oto; 3429 + double Tpre_rounded; 3430 + double prefetch_bw_equ; 3431 + double Tvm_equ; 3432 + double Tr0_equ; 3433 + double Tdmbf; 3434 + double Tdmec; 3435 + double Tdmsks; 3436 + double prefetch_sw_bytes; 3437 + double bytes_pp; 3438 + double dep_bytes; 3439 + unsigned int max_vratio_pre = __DML_MAX_VRATIO_PRE__; 3440 + double min_Lsw; 3441 + double Tsw_est1 = 0; 3442 + double Tsw_est3 = 0; 3436 3443 3437 3444 if (GPUVMEnable == true && HostVMEnable == true) 3438 - st_vars->HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; 3445 + HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; 3439 3446 else 3440 - st_vars->HostVMDynamicLevelsTrips = 0; 3447 + HostVMDynamicLevelsTrips = 0; 3441 3448 #ifdef __DML_VBA_DEBUG__ 3442 3449 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); 3443 3450 dml_print("DML::%s: GPUVMPageTableLevels = %d\n", __func__, GPUVMPageTableLevels); ··· 3487 3440 TSetup, 3488 3441 3489 3442 /* output */ 3490 - &st_vars->Tdmbf, 3491 - &st_vars->Tdmec, 3492 - &st_vars->Tdmsks, 3443 + &Tdmbf, 3444 + &Tdmec, 3445 + &Tdmsks, 3493 3446 VUpdateOffsetPix, 3494 3447 VUpdateWidthPix, 3495 3448 VReadyOffsetPix); 3496 3449 3497 - st_vars->LineTime = myPipe->HTotal / myPipe->PixelClock; 3498 - st_vars->trip_to_mem = UrgentLatency; 3499 - st_vars->Tvm_trips = UrgentExtraLatency + st_vars->trip_to_mem * (GPUVMPageTableLevels * (st_vars->HostVMDynamicLevelsTrips + 1) - 1); 3450 + LineTime = myPipe->HTotal / myPipe->PixelClock; 3451 + trip_to_mem = UrgentLatency; 3452 + Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); 3500 3453 3501 3454 if (DynamicMetadataVMEnabled == true) 3502 - *Tdmdl = TWait + st_vars->Tvm_trips + st_vars->trip_to_mem; 3455 + *Tdmdl = TWait + Tvm_trips + trip_to_mem; 3503 3456 else 3504 3457 *Tdmdl = TWait + UrgentExtraLatency; 3505 3458 ··· 3509 3462 #endif 3510 3463 3511 3464 if (DynamicMetadataEnable == true) { 3512 - if (VStartup * st_vars->LineTime < *TSetup + *Tdmdl + st_vars->Tdmbf + st_vars->Tdmec + st_vars->Tdmsks) { 3465 + if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) { 3513 3466 *NotEnoughTimeForDynamicMetadata = true; 3514 3467 #ifdef __DML_VBA_DEBUG__ 3515 3468 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); 3516 3469 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", 3517 - __func__, st_vars->Tdmbf); 3518 - dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, st_vars->Tdmec); 3470 + __func__, Tdmbf); 3471 + dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); 3519 3472 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", 3520 - __func__, st_vars->Tdmsks); 3473 + __func__, Tdmsks); 3521 3474 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", 3522 3475 __func__, *Tdmdl); 3523 3476 #endif ··· 3529 3482 } 3530 3483 3531 3484 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && 3532 - GPUVMEnable == true ? TWait + st_vars->Tvm_trips : 0); 3485 + GPUVMEnable == true ? TWait + Tvm_trips : 0); 3533 3486 3534 3487 if (myPipe->ScalerEnabled) 3535 - st_vars->DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL; 3488 + DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL; 3536 3489 else 3537 - st_vars->DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly; 3490 + DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly; 3538 3491 3539 - st_vars->DPPCycles = st_vars->DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; 3492 + DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; 3540 3493 3541 - st_vars->DISPCLKCycles = DISPCLKDelaySubtotal; 3494 + DISPCLKCycles = DISPCLKDelaySubtotal; 3542 3495 3543 3496 if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0) 3544 3497 return true; 3545 3498 3546 - *DSTXAfterScaler = st_vars->DPPCycles * myPipe->PixelClock / myPipe->Dppclk + st_vars->DISPCLKCycles * 3499 + *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles * 3547 3500 myPipe->PixelClock / myPipe->Dispclk + DSCDelay; 3548 3501 3549 3502 *DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0) ··· 3553 3506 + ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0); 3554 3507 3555 3508 #ifdef __DML_VBA_DEBUG__ 3556 - dml_print("DML::%s: DPPCycles: %d\n", __func__, st_vars->DPPCycles); 3509 + dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles); 3557 3510 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock); 3558 3511 dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk); 3559 - dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, st_vars->DISPCLKCycles); 3512 + dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles); 3560 3513 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->Dispclk); 3561 3514 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay); 3562 3515 dml_print("DML::%s: ODMMode: %d\n", __func__, myPipe->ODMMode); ··· 3569 3522 else 3570 3523 *DSTYAfterScaler = 0; 3571 3524 3572 - st_vars->DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; 3573 - *DSTYAfterScaler = dml_floor(st_vars->DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); 3574 - *DSTXAfterScaler = st_vars->DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); 3525 + DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; 3526 + *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); 3527 + *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); 3575 3528 #ifdef __DML_VBA_DEBUG__ 3576 3529 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler); 3577 3530 dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler); ··· 3579 3532 3580 3533 MyError = false; 3581 3534 3582 - st_vars->Tr0_trips = st_vars->trip_to_mem * (st_vars->HostVMDynamicLevelsTrips + 1); 3535 + Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1); 3583 3536 3584 3537 if (GPUVMEnable == true) { 3585 - st_vars->Tvm_trips_rounded = dml_ceil(4.0 * st_vars->Tvm_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime; 3586 - st_vars->Tr0_trips_rounded = dml_ceil(4.0 * st_vars->Tr0_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime; 3538 + Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime; 3539 + Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime; 3587 3540 if (GPUVMPageTableLevels >= 3) { 3588 - *Tno_bw = UrgentExtraLatency + st_vars->trip_to_mem * 3589 - (double) ((GPUVMPageTableLevels - 2) * (st_vars->HostVMDynamicLevelsTrips + 1) - 1); 3541 + *Tno_bw = UrgentExtraLatency + trip_to_mem * 3542 + (double) ((GPUVMPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1); 3590 3543 } else if (GPUVMPageTableLevels == 1 && myPipe->DCCEnable != true) { 3591 - st_vars->Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / st_vars->LineTime, 1.0) / 3592 - 4.0 * st_vars->LineTime; // VBA_ERROR 3544 + Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) / 3545 + 4.0 * LineTime; // VBA_ERROR 3593 3546 *Tno_bw = UrgentExtraLatency; 3594 3547 } else { 3595 3548 *Tno_bw = 0; 3596 3549 } 3597 3550 } else if (myPipe->DCCEnable == true) { 3598 - st_vars->Tvm_trips_rounded = st_vars->LineTime / 4.0; 3599 - st_vars->Tr0_trips_rounded = dml_ceil(4.0 * st_vars->Tr0_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime; 3551 + Tvm_trips_rounded = LineTime / 4.0; 3552 + Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime; 3600 3553 *Tno_bw = 0; 3601 3554 } else { 3602 - st_vars->Tvm_trips_rounded = st_vars->LineTime / 4.0; 3603 - st_vars->Tr0_trips_rounded = st_vars->LineTime / 2.0; 3555 + Tvm_trips_rounded = LineTime / 4.0; 3556 + Tr0_trips_rounded = LineTime / 2.0; 3604 3557 *Tno_bw = 0; 3605 3558 } 3606 - st_vars->Tvm_trips_rounded = dml_max(st_vars->Tvm_trips_rounded, st_vars->LineTime / 4.0); 3607 - st_vars->Tr0_trips_rounded = dml_max(st_vars->Tr0_trips_rounded, st_vars->LineTime / 4.0); 3559 + Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0); 3560 + Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0); 3608 3561 3609 3562 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 3610 3563 || myPipe->SourcePixelFormat == dm_420_12) { 3611 - st_vars->bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4; 3564 + bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4; 3612 3565 } else { 3613 - st_vars->bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; 3566 + bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; 3614 3567 } 3615 3568 3616 - st_vars->prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY 3569 + prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY 3617 3570 + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC; 3618 - st_vars->prefetch_bw_oto = dml_max(st_vars->bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface, 3619 - st_vars->prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * st_vars->LineTime)); 3571 + prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface, 3572 + prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime)); 3620 3573 3621 - st_vars->min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / st_vars->max_vratio_pre; 3622 - st_vars->min_Lsw = dml_max(st_vars->min_Lsw, 1.0); 3623 - st_vars->Lsw_oto = dml_ceil(4.0 * dml_max(st_vars->prefetch_sw_bytes / st_vars->prefetch_bw_oto / st_vars->LineTime, st_vars->min_Lsw), 1.0) / 4.0; 3574 + min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre; 3575 + min_Lsw = dml_max(min_Lsw, 1.0); 3576 + Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0; 3624 3577 3625 3578 if (GPUVMEnable == true) { 3626 - st_vars->Tvm_oto = dml_max3( 3627 - st_vars->Tvm_trips, 3628 - *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / st_vars->prefetch_bw_oto, 3629 - st_vars->LineTime / 4.0); 3579 + Tvm_oto = dml_max3( 3580 + Tvm_trips, 3581 + *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, 3582 + LineTime / 4.0); 3630 3583 } else 3631 - st_vars->Tvm_oto = st_vars->LineTime / 4.0; 3584 + Tvm_oto = LineTime / 4.0; 3632 3585 3633 3586 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { 3634 - st_vars->Tr0_oto = dml_max4( 3635 - st_vars->Tr0_trips, 3636 - (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / st_vars->prefetch_bw_oto, 3637 - (st_vars->LineTime - st_vars->Tvm_oto)/2.0, 3638 - st_vars->LineTime / 4.0); 3587 + Tr0_oto = dml_max4( 3588 + Tr0_trips, 3589 + (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, 3590 + (LineTime - Tvm_oto)/2.0, 3591 + LineTime / 4.0); 3639 3592 #ifdef __DML_VBA_DEBUG__ 3640 3593 dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__, 3641 - (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / st_vars->prefetch_bw_oto); 3642 - dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, st_vars->Tr0_trips); 3643 - dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, st_vars->LineTime - st_vars->Tvm_oto); 3644 - dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, st_vars->LineTime / 4); 3594 + (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto); 3595 + dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips); 3596 + dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto); 3597 + dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4); 3645 3598 #endif 3646 3599 } else 3647 - st_vars->Tr0_oto = (st_vars->LineTime - st_vars->Tvm_oto) / 2.0; 3600 + Tr0_oto = (LineTime - Tvm_oto) / 2.0; 3648 3601 3649 - st_vars->Tvm_oto_lines = dml_ceil(4.0 * st_vars->Tvm_oto / st_vars->LineTime, 1) / 4.0; 3650 - st_vars->Tr0_oto_lines = dml_ceil(4.0 * st_vars->Tr0_oto / st_vars->LineTime, 1) / 4.0; 3651 - st_vars->dst_y_prefetch_oto = st_vars->Tvm_oto_lines + 2 * st_vars->Tr0_oto_lines + st_vars->Lsw_oto; 3602 + Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0; 3603 + Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0; 3604 + dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto; 3652 3605 3653 - st_vars->dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / st_vars->LineTime - 3606 + dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - 3654 3607 (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal); 3655 3608 3656 3609 #ifdef __DML_VBA_DEBUG__ 3657 3610 dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal); 3658 - dml_print("DML::%s: min_Lsw = %f\n", __func__, st_vars->min_Lsw); 3611 + dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw); 3659 3612 dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw); 3660 3613 dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency); 3661 - dml_print("DML::%s: trip_to_mem = %f\n", __func__, st_vars->trip_to_mem); 3614 + dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem); 3662 3615 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); 3663 3616 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 3664 3617 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); 3665 3618 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC); 3666 3619 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); 3667 3620 dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub); 3668 - dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, st_vars->prefetch_sw_bytes); 3669 - dml_print("DML::%s: bytes_pp = %f\n", __func__, st_vars->bytes_pp); 3621 + dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes); 3622 + dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp); 3670 3623 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 3671 3624 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 3672 3625 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 3673 3626 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 3674 - dml_print("DML::%s: Tvm_trips = %f\n", __func__, st_vars->Tvm_trips); 3675 - dml_print("DML::%s: Tr0_trips = %f\n", __func__, st_vars->Tr0_trips); 3676 - dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, st_vars->prefetch_bw_oto); 3677 - dml_print("DML::%s: Tr0_oto = %f\n", __func__, st_vars->Tr0_oto); 3678 - dml_print("DML::%s: Tvm_oto = %f\n", __func__, st_vars->Tvm_oto); 3679 - dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, st_vars->Tvm_oto_lines); 3680 - dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, st_vars->Tr0_oto_lines); 3681 - dml_print("DML::%s: Lsw_oto = %f\n", __func__, st_vars->Lsw_oto); 3682 - dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, st_vars->dst_y_prefetch_oto); 3683 - dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, st_vars->dst_y_prefetch_equ); 3627 + dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips); 3628 + dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips); 3629 + dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto); 3630 + dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto); 3631 + dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto); 3632 + dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines); 3633 + dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines); 3634 + dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto); 3635 + dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto); 3636 + dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ); 3684 3637 #endif 3685 3638 3686 - st_vars->dst_y_prefetch_equ = dml_floor(4.0 * (st_vars->dst_y_prefetch_equ + 0.125), 1) / 4.0; 3687 - st_vars->Tpre_rounded = st_vars->dst_y_prefetch_equ * st_vars->LineTime; 3639 + dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0; 3640 + Tpre_rounded = dst_y_prefetch_equ * LineTime; 3688 3641 #ifdef __DML_VBA_DEBUG__ 3689 - dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, st_vars->dst_y_prefetch_equ); 3690 - dml_print("DML::%s: LineTime: %f\n", __func__, st_vars->LineTime); 3642 + dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ); 3643 + dml_print("DML::%s: LineTime: %f\n", __func__, LineTime); 3691 3644 dml_print("DML::%s: VStartup: %d\n", __func__, VStartup); 3692 3645 dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", 3693 - __func__, VStartup * st_vars->LineTime); 3646 + __func__, VStartup * LineTime); 3694 3647 dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup); 3695 3648 dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc); 3696 - dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, st_vars->Tdmbf); 3697 - dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, st_vars->Tdmec); 3649 + dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf); 3650 + dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); 3698 3651 dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm); 3699 3652 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl); 3700 3653 dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n", 3701 3654 __func__, *DSTYAfterScaler); 3702 3655 #endif 3703 - st_vars->dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, 3656 + dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, 3704 3657 MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor); 3705 3658 3706 - if (st_vars->prefetch_sw_bytes < st_vars->dep_bytes) 3707 - st_vars->prefetch_sw_bytes = 2 * st_vars->dep_bytes; 3659 + if (prefetch_sw_bytes < dep_bytes) 3660 + prefetch_sw_bytes = 2 * dep_bytes; 3708 3661 3709 3662 *PrefetchBandwidth = 0; 3710 3663 *DestinationLinesToRequestVMInVBlank = 0; ··· 3712 3665 *VRatioPrefetchY = 0; 3713 3666 *VRatioPrefetchC = 0; 3714 3667 *RequiredPrefetchPixDataBWLuma = 0; 3715 - if (st_vars->dst_y_prefetch_equ > 1) { 3668 + if (dst_y_prefetch_equ > 1) { 3716 3669 double PrefetchBandwidth1; 3717 3670 double PrefetchBandwidth2; 3718 3671 double PrefetchBandwidth3; 3719 3672 double PrefetchBandwidth4; 3720 3673 3721 - if (st_vars->Tpre_rounded - *Tno_bw > 0) { 3674 + if (Tpre_rounded - *Tno_bw > 0) { 3722 3675 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte 3723 3676 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 3724 - + st_vars->prefetch_sw_bytes) / (st_vars->Tpre_rounded - *Tno_bw); 3725 - st_vars->Tsw_est1 = st_vars->prefetch_sw_bytes / PrefetchBandwidth1; 3677 + + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw); 3678 + Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1; 3726 3679 } else 3727 3680 PrefetchBandwidth1 = 0; 3728 3681 3729 - if (VStartup == MaxVStartup && (st_vars->Tsw_est1 / st_vars->LineTime < st_vars->min_Lsw) 3730 - && st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - *Tno_bw > 0) { 3682 + if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw) 3683 + && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) { 3731 3684 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte 3732 3685 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3733 - / (st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - *Tno_bw); 3686 + / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw); 3734 3687 } 3735 3688 3736 - if (st_vars->Tpre_rounded - *Tno_bw - 2 * st_vars->Tr0_trips_rounded > 0) 3737 - PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + st_vars->prefetch_sw_bytes) / 3738 - (st_vars->Tpre_rounded - *Tno_bw - 2 * st_vars->Tr0_trips_rounded); 3689 + if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0) 3690 + PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / 3691 + (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded); 3739 3692 else 3740 3693 PrefetchBandwidth2 = 0; 3741 3694 3742 - if (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded > 0) { 3695 + if (Tpre_rounded - Tvm_trips_rounded > 0) { 3743 3696 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 3744 - + st_vars->prefetch_sw_bytes) / (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded); 3745 - st_vars->Tsw_est3 = st_vars->prefetch_sw_bytes / PrefetchBandwidth3; 3697 + + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded); 3698 + Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3; 3746 3699 } else 3747 3700 PrefetchBandwidth3 = 0; 3748 3701 3749 3702 3750 3703 if (VStartup == MaxVStartup && 3751 - (st_vars->Tsw_est3 / st_vars->LineTime < st_vars->min_Lsw) && st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * 3752 - st_vars->LineTime - st_vars->Tvm_trips_rounded > 0) { 3704 + (Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 * 3705 + LineTime - Tvm_trips_rounded > 0) { 3753 3706 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3754 - / (st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - st_vars->Tvm_trips_rounded); 3707 + / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded); 3755 3708 } 3756 3709 3757 - if (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded - 2 * st_vars->Tr0_trips_rounded > 0) { 3758 - PrefetchBandwidth4 = st_vars->prefetch_sw_bytes / 3759 - (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded - 2 * st_vars->Tr0_trips_rounded); 3710 + if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) { 3711 + PrefetchBandwidth4 = prefetch_sw_bytes / 3712 + (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded); 3760 3713 } else { 3761 3714 PrefetchBandwidth4 = 0; 3762 3715 } 3763 3716 3764 3717 #ifdef __DML_VBA_DEBUG__ 3765 - dml_print("DML::%s: Tpre_rounded: %f\n", __func__, st_vars->Tpre_rounded); 3718 + dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded); 3766 3719 dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw); 3767 - dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, st_vars->Tvm_trips_rounded); 3768 - dml_print("DML::%s: Tsw_est1: %f\n", __func__, st_vars->Tsw_est1); 3769 - dml_print("DML::%s: Tsw_est3: %f\n", __func__, st_vars->Tsw_est3); 3720 + dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded); 3721 + dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1); 3722 + dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3); 3770 3723 dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1); 3771 3724 dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2); 3772 3725 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3); ··· 3779 3732 3780 3733 if (PrefetchBandwidth1 > 0) { 3781 3734 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 3782 - >= st_vars->Tvm_trips_rounded 3735 + >= Tvm_trips_rounded 3783 3736 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3784 - / PrefetchBandwidth1 >= st_vars->Tr0_trips_rounded) { 3737 + / PrefetchBandwidth1 >= Tr0_trips_rounded) { 3785 3738 Case1OK = true; 3786 3739 } else { 3787 3740 Case1OK = false; ··· 3792 3745 3793 3746 if (PrefetchBandwidth2 > 0) { 3794 3747 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 3795 - >= st_vars->Tvm_trips_rounded 3748 + >= Tvm_trips_rounded 3796 3749 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3797 - / PrefetchBandwidth2 < st_vars->Tr0_trips_rounded) { 3750 + / PrefetchBandwidth2 < Tr0_trips_rounded) { 3798 3751 Case2OK = true; 3799 3752 } else { 3800 3753 Case2OK = false; ··· 3805 3758 3806 3759 if (PrefetchBandwidth3 > 0) { 3807 3760 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < 3808 - st_vars->Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * 3761 + Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * 3809 3762 HostVMInefficiencyFactor) / PrefetchBandwidth3 >= 3810 - st_vars->Tr0_trips_rounded) { 3763 + Tr0_trips_rounded) { 3811 3764 Case3OK = true; 3812 3765 } else { 3813 3766 Case3OK = false; ··· 3817 3770 } 3818 3771 3819 3772 if (Case1OK) 3820 - st_vars->prefetch_bw_equ = PrefetchBandwidth1; 3773 + prefetch_bw_equ = PrefetchBandwidth1; 3821 3774 else if (Case2OK) 3822 - st_vars->prefetch_bw_equ = PrefetchBandwidth2; 3775 + prefetch_bw_equ = PrefetchBandwidth2; 3823 3776 else if (Case3OK) 3824 - st_vars->prefetch_bw_equ = PrefetchBandwidth3; 3777 + prefetch_bw_equ = PrefetchBandwidth3; 3825 3778 else 3826 - st_vars->prefetch_bw_equ = PrefetchBandwidth4; 3779 + prefetch_bw_equ = PrefetchBandwidth4; 3827 3780 3828 3781 #ifdef __DML_VBA_DEBUG__ 3829 3782 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK); 3830 3783 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK); 3831 3784 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK); 3832 - dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, st_vars->prefetch_bw_equ); 3785 + dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ); 3833 3786 #endif 3834 3787 3835 - if (st_vars->prefetch_bw_equ > 0) { 3788 + if (prefetch_bw_equ > 0) { 3836 3789 if (GPUVMEnable == true) { 3837 - st_vars->Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * 3838 - HostVMInefficiencyFactor / st_vars->prefetch_bw_equ, 3839 - st_vars->Tvm_trips, st_vars->LineTime / 4); 3790 + Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * 3791 + HostVMInefficiencyFactor / prefetch_bw_equ, 3792 + Tvm_trips, LineTime / 4); 3840 3793 } else { 3841 - st_vars->Tvm_equ = st_vars->LineTime / 4; 3794 + Tvm_equ = LineTime / 4; 3842 3795 } 3843 3796 3844 3797 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { 3845 - st_vars->Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow * 3846 - HostVMInefficiencyFactor) / st_vars->prefetch_bw_equ, st_vars->Tr0_trips, 3847 - (st_vars->LineTime - st_vars->Tvm_equ) / 2, st_vars->LineTime / 4); 3798 + Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow * 3799 + HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips, 3800 + (LineTime - Tvm_equ) / 2, LineTime / 4); 3848 3801 } else { 3849 - st_vars->Tr0_equ = (st_vars->LineTime - st_vars->Tvm_equ) / 2; 3802 + Tr0_equ = (LineTime - Tvm_equ) / 2; 3850 3803 } 3851 3804 } else { 3852 - st_vars->Tvm_equ = 0; 3853 - st_vars->Tr0_equ = 0; 3805 + Tvm_equ = 0; 3806 + Tr0_equ = 0; 3854 3807 #ifdef __DML_VBA_DEBUG__ 3855 3808 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__); 3856 3809 #endif 3857 3810 } 3858 3811 } 3859 3812 3860 - if (st_vars->dst_y_prefetch_oto < st_vars->dst_y_prefetch_equ) { 3861 - *DestinationLinesForPrefetch = st_vars->dst_y_prefetch_oto; 3862 - st_vars->TimeForFetchingMetaPTE = st_vars->Tvm_oto; 3863 - st_vars->TimeForFetchingRowInVBlank = st_vars->Tr0_oto; 3864 - *PrefetchBandwidth = st_vars->prefetch_bw_oto; 3813 + if (dst_y_prefetch_oto < dst_y_prefetch_equ) { 3814 + *DestinationLinesForPrefetch = dst_y_prefetch_oto; 3815 + TimeForFetchingMetaPTE = Tvm_oto; 3816 + TimeForFetchingRowInVBlank = Tr0_oto; 3817 + *PrefetchBandwidth = prefetch_bw_oto; 3865 3818 } else { 3866 - *DestinationLinesForPrefetch = st_vars->dst_y_prefetch_equ; 3867 - st_vars->TimeForFetchingMetaPTE = st_vars->Tvm_equ; 3868 - st_vars->TimeForFetchingRowInVBlank = st_vars->Tr0_equ; 3869 - *PrefetchBandwidth = st_vars->prefetch_bw_equ; 3819 + *DestinationLinesForPrefetch = dst_y_prefetch_equ; 3820 + TimeForFetchingMetaPTE = Tvm_equ; 3821 + TimeForFetchingRowInVBlank = Tr0_equ; 3822 + *PrefetchBandwidth = prefetch_bw_equ; 3870 3823 } 3871 3824 3872 - *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * st_vars->TimeForFetchingMetaPTE / st_vars->LineTime, 1.0) / 4.0; 3825 + *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; 3873 3826 3874 3827 *DestinationLinesToRequestRowInVBlank = 3875 - dml_ceil(4.0 * st_vars->TimeForFetchingRowInVBlank / st_vars->LineTime, 1.0) / 4.0; 3828 + dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0; 3876 3829 3877 - st_vars->LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - 3830 + LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - 3878 3831 *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank; 3879 3832 3880 3833 #ifdef __DML_VBA_DEBUG__ 3881 3834 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch); 3882 3835 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", 3883 3836 __func__, *DestinationLinesToRequestVMInVBlank); 3884 - dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, st_vars->TimeForFetchingRowInVBlank); 3885 - dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime); 3837 + dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank); 3838 + dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 3886 3839 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", 3887 3840 __func__, *DestinationLinesToRequestRowInVBlank); 3888 3841 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 3889 - dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, st_vars->LinesToRequestPrefetchPixelData); 3842 + dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData); 3890 3843 #endif 3891 3844 3892 - if (st_vars->LinesToRequestPrefetchPixelData >= 1 && st_vars->prefetch_bw_equ > 0) { 3893 - *VRatioPrefetchY = (double) PrefetchSourceLinesY / st_vars->LinesToRequestPrefetchPixelData; 3845 + if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) { 3846 + *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData; 3894 3847 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 3895 3848 #ifdef __DML_VBA_DEBUG__ 3896 3849 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); ··· 3898 3851 dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY); 3899 3852 #endif 3900 3853 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { 3901 - if (st_vars->LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { 3854 + if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { 3902 3855 *VRatioPrefetchY = 3903 3856 dml_max((double) PrefetchSourceLinesY / 3904 - st_vars->LinesToRequestPrefetchPixelData, 3857 + LinesToRequestPrefetchPixelData, 3905 3858 (double) MaxNumSwathY * SwathHeightY / 3906 - (st_vars->LinesToRequestPrefetchPixelData - 3859 + (LinesToRequestPrefetchPixelData - 3907 3860 (VInitPreFillY - 3.0) / 2.0)); 3908 3861 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 3909 3862 } else { ··· 3917 3870 #endif 3918 3871 } 3919 3872 3920 - *VRatioPrefetchC = (double) PrefetchSourceLinesC / st_vars->LinesToRequestPrefetchPixelData; 3873 + *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData; 3921 3874 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 3922 3875 3923 3876 #ifdef __DML_VBA_DEBUG__ ··· 3926 3879 dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC); 3927 3880 #endif 3928 3881 if ((SwathHeightC > 4)) { 3929 - if (st_vars->LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { 3882 + if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { 3930 3883 *VRatioPrefetchC = 3931 3884 dml_max(*VRatioPrefetchC, 3932 3885 (double) MaxNumSwathC * SwathHeightC / 3933 - (st_vars->LinesToRequestPrefetchPixelData - 3886 + (LinesToRequestPrefetchPixelData - 3934 3887 (VInitPreFillC - 3.0) / 2.0)); 3935 3888 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 3936 3889 } else { ··· 3945 3898 } 3946 3899 3947 3900 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY 3948 - / st_vars->LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub 3949 - / st_vars->LineTime; 3901 + / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub 3902 + / LineTime; 3950 3903 3951 3904 #ifdef __DML_VBA_DEBUG__ 3952 3905 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); 3953 3906 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); 3954 - dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime); 3907 + dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 3955 3908 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", 3956 3909 __func__, *RequiredPrefetchPixDataBWLuma); 3957 3910 #endif 3958 3911 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / 3959 - st_vars->LinesToRequestPrefetchPixelData 3912 + LinesToRequestPrefetchPixelData 3960 3913 * myPipe->BytePerPixelC 3961 - * swath_width_chroma_ub / st_vars->LineTime; 3914 + * swath_width_chroma_ub / LineTime; 3962 3915 } else { 3963 3916 MyError = true; 3964 3917 #ifdef __DML_VBA_DEBUG__ 3965 3918 dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n", 3966 - __func__, st_vars->LinesToRequestPrefetchPixelData); 3919 + __func__, LinesToRequestPrefetchPixelData); 3967 3920 #endif 3968 3921 *VRatioPrefetchY = 0; 3969 3922 *VRatioPrefetchC = 0; ··· 3972 3925 } 3973 3926 #ifdef __DML_VBA_DEBUG__ 3974 3927 dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", 3975 - (double)st_vars->LinesToRequestPrefetchPixelData * st_vars->LineTime + 3976 - 2.0*st_vars->TimeForFetchingRowInVBlank + st_vars->TimeForFetchingMetaPTE); 3977 - dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", st_vars->TimeForFetchingMetaPTE); 3928 + (double)LinesToRequestPrefetchPixelData * LineTime + 3929 + 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE); 3930 + dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE); 3978 3931 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", 3979 - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * st_vars->LineTime); 3932 + (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime); 3980 3933 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n"); 3981 - dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * st_vars->LineTime - 3982 - st_vars->TimeForFetchingMetaPTE - 2*st_vars->TimeForFetchingRowInVBlank - (*DSTYAfterScaler + 3983 - ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * st_vars->LineTime - TWait - TCalc - *TSetup); 3934 + dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime - 3935 + TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler + 3936 + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup); 3984 3937 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", 3985 3938 PixelPTEBytesPerRow); 3986 3939 #endif ··· 3988 3941 MyError = true; 3989 3942 #ifdef __DML_VBA_DEBUG__ 3990 3943 dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n", 3991 - __func__, st_vars->dst_y_prefetch_equ); 3944 + __func__, dst_y_prefetch_equ); 3992 3945 #endif 3993 3946 } 3994 3947 ··· 4004 3957 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 4005 3958 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", 4006 3959 __func__, *DestinationLinesToRequestVMInVBlank); 4007 - dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime); 3960 + dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 4008 3961 #endif 4009 3962 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / 4010 - (*DestinationLinesToRequestVMInVBlank * st_vars->LineTime); 3963 + (*DestinationLinesToRequestVMInVBlank * LineTime); 4011 3964 #ifdef __DML_VBA_DEBUG__ 4012 3965 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); 4013 3966 #endif ··· 4024 3977 prefetch_row_bw = 0; 4025 3978 } else if (*DestinationLinesToRequestRowInVBlank > 0) { 4026 3979 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / 4027 - (*DestinationLinesToRequestRowInVBlank * st_vars->LineTime); 3980 + (*DestinationLinesToRequestRowInVBlank * LineTime); 4028 3981 4029 3982 #ifdef __DML_VBA_DEBUG__ 4030 3983 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); ··· 4047 4000 4048 4001 if (MyError) { 4049 4002 *PrefetchBandwidth = 0; 4050 - st_vars->TimeForFetchingMetaPTE = 0; 4051 - st_vars->TimeForFetchingRowInVBlank = 0; 4003 + TimeForFetchingMetaPTE = 0; 4004 + TimeForFetchingRowInVBlank = 0; 4052 4005 *DestinationLinesToRequestVMInVBlank = 0; 4053 4006 *DestinationLinesToRequestRowInVBlank = 0; 4054 4007 *DestinationLinesForPrefetch = 0; 4055 - st_vars->LinesToRequestPrefetchPixelData = 0; 4008 + LinesToRequestPrefetchPixelData = 0; 4056 4009 *VRatioPrefetchY = 0; 4057 4010 *VRatioPrefetchC = 0; 4058 4011 *RequiredPrefetchPixDataBWLuma = 0; ··· 4206 4159 } // CalculateFlipSchedule 4207 4160 4208 4161 void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( 4209 - struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport *st_vars, 4210 4162 bool USRRetrainingRequiredFinal, 4211 4163 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 4212 4164 unsigned int PrefetchMode, ··· 4267 4221 double ActiveDRAMClockChangeLatencyMargin[]) 4268 4222 { 4269 4223 unsigned int i, j, k; 4224 + unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0; 4225 + unsigned int DRAMClockChangeSupportNumber = 0; 4226 + unsigned int LastSurfaceWithoutMargin; 4227 + unsigned int DRAMClockChangeMethod = 0; 4228 + bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false; 4229 + double MinActiveFCLKChangeMargin = 0.; 4230 + double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.; 4231 + double ActiveClockChangeLatencyHidingY; 4232 + double ActiveClockChangeLatencyHidingC; 4233 + double ActiveClockChangeLatencyHiding; 4234 + double EffectiveDETBufferSizeY; 4235 + double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX]; 4236 + double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX]; 4237 + double TotalPixelBW = 0.0; 4238 + bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX]; 4239 + double EffectiveLBLatencyHidingY; 4240 + double EffectiveLBLatencyHidingC; 4241 + double LinesInDETY[DC__NUM_DPP__MAX]; 4242 + double LinesInDETC[DC__NUM_DPP__MAX]; 4243 + unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; 4244 + unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX]; 4245 + double FullDETBufferingTimeY; 4246 + double FullDETBufferingTimeC; 4247 + double WritebackDRAMClockChangeLatencyMargin; 4248 + double WritebackFCLKChangeLatencyMargin; 4249 + double WritebackLatencyHiding; 4250 + bool SameTimingForFCLKChange; 4270 4251 4271 - st_vars->SurfaceWithMinActiveFCLKChangeMargin = 0; 4272 - st_vars->DRAMClockChangeSupportNumber = 0; 4273 - st_vars->DRAMClockChangeMethod = 0; 4274 - st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false; 4275 - st_vars->MinActiveFCLKChangeMargin = 0.; 4276 - st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.; 4277 - st_vars->TotalPixelBW = 0.0; 4278 - st_vars->TotalActiveWriteback = 0; 4252 + unsigned int TotalActiveWriteback = 0; 4253 + unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX]; 4254 + unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX]; 4279 4255 4280 4256 Watermark->UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency; 4281 4257 Watermark->USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency ··· 4329 4261 #endif 4330 4262 4331 4263 4332 - st_vars->TotalActiveWriteback = 0; 4264 + TotalActiveWriteback = 0; 4333 4265 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4334 4266 if (WritebackEnable[k] == true) 4335 - st_vars->TotalActiveWriteback = st_vars->TotalActiveWriteback + 1; 4267 + TotalActiveWriteback = TotalActiveWriteback + 1; 4336 4268 } 4337 4269 4338 - if (st_vars->TotalActiveWriteback <= 1) { 4270 + if (TotalActiveWriteback <= 1) { 4339 4271 Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency; 4340 4272 } else { 4341 4273 Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency ··· 4345 4277 Watermark->WritebackUrgentWatermark = Watermark->WritebackUrgentWatermark 4346 4278 + mmSOCParameters.USRRetrainingLatency; 4347 4279 4348 - if (st_vars->TotalActiveWriteback <= 1) { 4280 + if (TotalActiveWriteback <= 1) { 4349 4281 Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency 4350 4282 + mmSOCParameters.WritebackLatency; 4351 4283 Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency ··· 4375 4307 #endif 4376 4308 4377 4309 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4378 - st_vars->TotalPixelBW = st_vars->TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] + 4310 + TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] + 4379 4311 SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k]) / (HTotal[k] / PixelClock[k]); 4380 4312 } 4381 4313 4382 4314 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4383 4315 4384 - st_vars->LBLatencyHidingSourceLinesY[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (VTaps[k] - 1); 4385 - st_vars->LBLatencyHidingSourceLinesC[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTapsChroma[k] - 1); 4316 + LBLatencyHidingSourceLinesY[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (VTaps[k] - 1); 4317 + LBLatencyHidingSourceLinesC[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTapsChroma[k] - 1); 4386 4318 4387 4319 4388 4320 #ifdef __DML_VBA_DEBUG__ ··· 4393 4325 dml_print("DML::%s: k=%d, VTaps = %d\n", __func__, k, VTaps[k]); 4394 4326 #endif 4395 4327 4396 - st_vars->EffectiveLBLatencyHidingY = st_vars->LBLatencyHidingSourceLinesY[k] / VRatio[k] * (HTotal[k] / PixelClock[k]); 4397 - st_vars->EffectiveLBLatencyHidingC = st_vars->LBLatencyHidingSourceLinesC[k] / VRatioChroma[k] * (HTotal[k] / PixelClock[k]); 4398 - st_vars->EffectiveDETBufferSizeY = DETBufferSizeY[k]; 4328 + EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / VRatio[k] * (HTotal[k] / PixelClock[k]); 4329 + EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / VRatioChroma[k] * (HTotal[k] / PixelClock[k]); 4330 + EffectiveDETBufferSizeY = DETBufferSizeY[k]; 4399 4331 4400 4332 if (UnboundedRequestEnabled) { 4401 - st_vars->EffectiveDETBufferSizeY = st_vars->EffectiveDETBufferSizeY 4333 + EffectiveDETBufferSizeY = EffectiveDETBufferSizeY 4402 4334 + CompressedBufferSizeInkByte * 1024 4403 4335 * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k]) 4404 - / (HTotal[k] / PixelClock[k]) / st_vars->TotalPixelBW; 4336 + / (HTotal[k] / PixelClock[k]) / TotalPixelBW; 4405 4337 } 4406 4338 4407 - st_vars->LinesInDETY[k] = (double) st_vars->EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; 4408 - st_vars->LinesInDETYRoundedDownToSwath[k] = dml_floor(st_vars->LinesInDETY[k], SwathHeightY[k]); 4409 - st_vars->FullDETBufferingTimeY = st_vars->LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k]; 4339 + LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; 4340 + LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]); 4341 + FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k]; 4410 4342 4411 - st_vars->ActiveClockChangeLatencyHidingY = st_vars->EffectiveLBLatencyHidingY + st_vars->FullDETBufferingTimeY 4343 + ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY 4412 4344 - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k]; 4413 4345 4414 4346 if (NumberOfActiveSurfaces > 1) { 4415 - st_vars->ActiveClockChangeLatencyHidingY = st_vars->ActiveClockChangeLatencyHidingY 4347 + ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY 4416 4348 - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightY[k] * HTotal[k] 4417 4349 / PixelClock[k] / VRatio[k]; 4418 4350 } 4419 4351 4420 4352 if (BytePerPixelDETC[k] > 0) { 4421 - st_vars->LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; 4422 - st_vars->LinesInDETCRoundedDownToSwath[k] = dml_floor(st_vars->LinesInDETC[k], SwathHeightC[k]); 4423 - st_vars->FullDETBufferingTimeC = st_vars->LinesInDETCRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) 4353 + LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; 4354 + LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]); 4355 + FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) 4424 4356 / VRatioChroma[k]; 4425 - st_vars->ActiveClockChangeLatencyHidingC = st_vars->EffectiveLBLatencyHidingC + st_vars->FullDETBufferingTimeC 4357 + ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC 4426 4358 - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] 4427 4359 / PixelClock[k]; 4428 4360 if (NumberOfActiveSurfaces > 1) { 4429 - st_vars->ActiveClockChangeLatencyHidingC = st_vars->ActiveClockChangeLatencyHidingC 4361 + ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC 4430 4362 - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightC[k] * HTotal[k] 4431 4363 / PixelClock[k] / VRatioChroma[k]; 4432 4364 } 4433 - st_vars->ActiveClockChangeLatencyHiding = dml_min(st_vars->ActiveClockChangeLatencyHidingY, 4434 - st_vars->ActiveClockChangeLatencyHidingC); 4365 + ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY, 4366 + ActiveClockChangeLatencyHidingC); 4435 4367 } else { 4436 - st_vars->ActiveClockChangeLatencyHiding = st_vars->ActiveClockChangeLatencyHidingY; 4368 + ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY; 4437 4369 } 4438 4370 4439 - ActiveDRAMClockChangeLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark 4371 + ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark 4440 4372 - Watermark->DRAMClockChangeWatermark; 4441 - st_vars->ActiveFCLKChangeLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark 4373 + ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark 4442 4374 - Watermark->FCLKChangeWatermark; 4443 - st_vars->USRRetrainingLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->USRRetrainingWatermark; 4375 + USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->USRRetrainingWatermark; 4444 4376 4445 4377 if (WritebackEnable[k]) { 4446 - st_vars->WritebackLatencyHiding = WritebackInterfaceBufferSize * 1024 4378 + WritebackLatencyHiding = WritebackInterfaceBufferSize * 1024 4447 4379 / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] 4448 4380 / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4); 4449 4381 if (WritebackPixelFormat[k] == dm_444_64) 4450 - st_vars->WritebackLatencyHiding = st_vars->WritebackLatencyHiding / 2; 4382 + WritebackLatencyHiding = WritebackLatencyHiding / 2; 4451 4383 4452 - st_vars->WritebackDRAMClockChangeLatencyMargin = st_vars->WritebackLatencyHiding 4384 + WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding 4453 4385 - Watermark->WritebackDRAMClockChangeWatermark; 4454 4386 4455 - st_vars->WritebackFCLKChangeLatencyMargin = st_vars->WritebackLatencyHiding 4387 + WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding 4456 4388 - Watermark->WritebackFCLKChangeWatermark; 4457 4389 4458 4390 ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k], 4459 - st_vars->WritebackFCLKChangeLatencyMargin); 4460 - st_vars->ActiveFCLKChangeLatencyMargin[k] = dml_min(st_vars->ActiveFCLKChangeLatencyMargin[k], 4461 - st_vars->WritebackDRAMClockChangeLatencyMargin); 4391 + WritebackFCLKChangeLatencyMargin); 4392 + ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k], 4393 + WritebackDRAMClockChangeLatencyMargin); 4462 4394 } 4463 4395 MaxActiveDRAMClockChangeLatencySupported[k] = 4464 4396 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ? ··· 4477 4409 HTotal[i] == HTotal[j] && VTotal[i] == VTotal[j] && 4478 4410 VActive[i] == VActive[j]) || (SynchronizeDRRDisplaysForUCLKPStateChangeFinal && 4479 4411 (DRRDisplay[i] || DRRDisplay[j]))) { 4480 - st_vars->SynchronizedSurfaces[i][j] = true; 4412 + SynchronizedSurfaces[i][j] = true; 4481 4413 } else { 4482 - st_vars->SynchronizedSurfaces[i][j] = false; 4414 + SynchronizedSurfaces[i][j] = false; 4483 4415 } 4484 4416 } 4485 4417 } 4486 4418 4487 4419 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4488 4420 if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && 4489 - (!st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin || 4490 - st_vars->ActiveFCLKChangeLatencyMargin[k] < st_vars->MinActiveFCLKChangeMargin)) { 4491 - st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true; 4492 - st_vars->MinActiveFCLKChangeMargin = st_vars->ActiveFCLKChangeLatencyMargin[k]; 4493 - st_vars->SurfaceWithMinActiveFCLKChangeMargin = k; 4421 + (!FoundFirstSurfaceWithMinActiveFCLKChangeMargin || 4422 + ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) { 4423 + FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true; 4424 + MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k]; 4425 + SurfaceWithMinActiveFCLKChangeMargin = k; 4494 4426 } 4495 4427 } 4496 4428 4497 - *MinActiveFCLKChangeLatencySupported = st_vars->MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency; 4429 + *MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency; 4498 4430 4499 - st_vars->SameTimingForFCLKChange = true; 4431 + SameTimingForFCLKChange = true; 4500 4432 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4501 - if (!st_vars->SynchronizedSurfaces[k][st_vars->SurfaceWithMinActiveFCLKChangeMargin]) { 4433 + if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) { 4502 4434 if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && 4503 - (st_vars->SameTimingForFCLKChange || 4504 - st_vars->ActiveFCLKChangeLatencyMargin[k] < 4505 - st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) { 4506 - st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = st_vars->ActiveFCLKChangeLatencyMargin[k]; 4435 + (SameTimingForFCLKChange || 4436 + ActiveFCLKChangeLatencyMargin[k] < 4437 + SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) { 4438 + SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k]; 4507 4439 } 4508 - st_vars->SameTimingForFCLKChange = false; 4440 + SameTimingForFCLKChange = false; 4509 4441 } 4510 4442 } 4511 4443 4512 - if (st_vars->MinActiveFCLKChangeMargin > 0) { 4444 + if (MinActiveFCLKChangeMargin > 0) { 4513 4445 *FCLKChangeSupport = dm_fclock_change_vactive; 4514 - } else if ((st_vars->SameTimingForFCLKChange || st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) && 4446 + } else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) && 4515 4447 (PrefetchMode <= 1)) { 4516 4448 *FCLKChangeSupport = dm_fclock_change_vblank; 4517 4449 } else { ··· 4521 4453 *USRRetrainingSupport = true; 4522 4454 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4523 4455 if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && 4524 - (st_vars->USRRetrainingLatencyMargin[k] < 0)) { 4456 + (USRRetrainingLatencyMargin[k] < 0)) { 4525 4457 *USRRetrainingSupport = false; 4526 4458 } 4527 4459 } ··· 4532 4464 UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe && 4533 4465 ActiveDRAMClockChangeLatencyMargin[k] < 0) { 4534 4466 if (PrefetchMode > 0) { 4535 - st_vars->DRAMClockChangeSupportNumber = 2; 4536 - } else if (st_vars->DRAMClockChangeSupportNumber == 0) { 4537 - st_vars->DRAMClockChangeSupportNumber = 1; 4538 - st_vars->LastSurfaceWithoutMargin = k; 4539 - } else if (st_vars->DRAMClockChangeSupportNumber == 1 && 4540 - !st_vars->SynchronizedSurfaces[st_vars->LastSurfaceWithoutMargin][k]) { 4541 - st_vars->DRAMClockChangeSupportNumber = 2; 4467 + DRAMClockChangeSupportNumber = 2; 4468 + } else if (DRAMClockChangeSupportNumber == 0) { 4469 + DRAMClockChangeSupportNumber = 1; 4470 + LastSurfaceWithoutMargin = k; 4471 + } else if (DRAMClockChangeSupportNumber == 1 && 4472 + !SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) { 4473 + DRAMClockChangeSupportNumber = 2; 4542 4474 } 4543 4475 } 4544 4476 } 4545 4477 4546 4478 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4547 4479 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame) 4548 - st_vars->DRAMClockChangeMethod = 1; 4480 + DRAMClockChangeMethod = 1; 4549 4481 else if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) 4550 - st_vars->DRAMClockChangeMethod = 2; 4482 + DRAMClockChangeMethod = 2; 4551 4483 } 4552 4484 4553 - if (st_vars->DRAMClockChangeMethod == 0) { 4554 - if (st_vars->DRAMClockChangeSupportNumber == 0) 4485 + if (DRAMClockChangeMethod == 0) { 4486 + if (DRAMClockChangeSupportNumber == 0) 4555 4487 *DRAMClockChangeSupport = dm_dram_clock_change_vactive; 4556 - else if (st_vars->DRAMClockChangeSupportNumber == 1) 4488 + else if (DRAMClockChangeSupportNumber == 1) 4557 4489 *DRAMClockChangeSupport = dm_dram_clock_change_vblank; 4558 4490 else 4559 4491 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; 4560 - } else if (st_vars->DRAMClockChangeMethod == 1) { 4561 - if (st_vars->DRAMClockChangeSupportNumber == 0) 4492 + } else if (DRAMClockChangeMethod == 1) { 4493 + if (DRAMClockChangeSupportNumber == 0) 4562 4494 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame; 4563 - else if (st_vars->DRAMClockChangeSupportNumber == 1) 4495 + else if (DRAMClockChangeSupportNumber == 1) 4564 4496 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame; 4565 4497 else 4566 4498 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; 4567 4499 } else { 4568 - if (st_vars->DRAMClockChangeSupportNumber == 0) 4500 + if (DRAMClockChangeSupportNumber == 0) 4569 4501 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp; 4570 - else if (st_vars->DRAMClockChangeSupportNumber == 1) 4502 + else if (DRAMClockChangeSupportNumber == 1) 4571 4503 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp; 4572 4504 else 4573 4505 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; ··· 4581 4513 4582 4514 dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (HTotal[k] / PixelClock[k]), 1); 4583 4515 src_y_pstate_l = dml_ceil(dst_y_pstate * VRatio[k], SwathHeightY[k]); 4584 - src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + st_vars->LBLatencyHidingSourceLinesY[k]; 4516 + src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k]; 4585 4517 sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + meta_row_height[k]; 4586 4518 4587 4519 #ifdef __DML_VBA_DEBUG__ ··· 4589 4521 dml_print("DML::%s: k=%d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); 4590 4522 dml_print("DML::%s: k=%d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]); 4591 4523 dml_print("DML::%s: k=%d, SwathHeightY = %d\n", __func__, k, SwathHeightY[k]); 4592 - dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, st_vars->LBLatencyHidingSourceLinesY[k]); 4524 + dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]); 4593 4525 dml_print("DML::%s: k=%d, dst_y_pstate = %d\n", __func__, k, dst_y_pstate); 4594 4526 dml_print("DML::%s: k=%d, src_y_pstate_l = %d\n", __func__, k, src_y_pstate_l); 4595 4527 dml_print("DML::%s: k=%d, src_y_ahead_l = %d\n", __func__, k, src_y_ahead_l); ··· 4600 4532 4601 4533 if (BytePerPixelDETC[k] > 0) { 4602 4534 src_y_pstate_c = dml_ceil(dst_y_pstate * VRatioChroma[k], SwathHeightC[k]); 4603 - src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + st_vars->LBLatencyHidingSourceLinesC[k]; 4535 + src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k]; 4604 4536 sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + meta_row_height_chroma[k]; 4605 4537 SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c); 4606 4538
-5
drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
··· 30 30 #include "os_types.h" 31 31 #include "../dc_features.h" 32 32 #include "../display_mode_structs.h" 33 - #include "dml/display_mode_vba.h" 34 33 35 34 unsigned int dml32_dscceComputeDelay( 36 35 unsigned int bpc, ··· 81 82 double *DPPCLKUsingSingleDPP); 82 83 83 84 void dml32_CalculateSwathAndDETConfiguration( 84 - struct dml32_CalculateSwathAndDETConfiguration *st_vars, 85 85 unsigned int DETSizeOverride[], 86 86 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 87 87 unsigned int ConfigReturnBufferSizeInKByte, ··· 360 362 bool *ExceededMALLSize); 361 363 362 364 void dml32_CalculateVMRowAndSwath( 363 - struct dml32_CalculateVMRowAndSwath *st_vars, 364 365 unsigned int NumberOfActiveSurfaces, 365 366 DmlPipe myPipe[], 366 367 unsigned int SurfaceSizeInMALL[], ··· 712 715 unsigned int HostVMMaxNonCachedPageTableLevels); 713 716 714 717 bool dml32_CalculatePrefetchSchedule( 715 - struct dml32_CalculatePrefetchSchedule *st_vars, 716 718 double HostVMInefficiencyFactor, 717 719 DmlPipe *myPipe, 718 720 unsigned int DSCDelay, ··· 807 811 bool *ImmediateFlipSupportedForPipe); 808 812 809 813 void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( 810 - struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport *st_vars, 811 814 bool USRRetrainingRequiredFinal, 812 815 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 813 816 unsigned int PrefetchMode,
+7
drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c
··· 498 498 dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; 499 499 } 500 500 501 + if ((int)(dcn3_21_soc.fclk_change_latency_us * 1000) 502 + != dc->bb_overrides.fclk_clock_change_latency_ns 503 + && dc->bb_overrides.fclk_clock_change_latency_ns) { 504 + dcn3_21_soc.fclk_change_latency_us = 505 + dc->bb_overrides.fclk_clock_change_latency_ns / 1000; 506 + } 507 + 501 508 if ((int)(dcn3_21_soc.dummy_pstate_latency_us * 1000) 502 509 != dc->bb_overrides.dummy_clock_change_latency_ns 503 510 && dc->bb_overrides.dummy_clock_change_latency_ns) {
-106
drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
··· 182 182 unsigned int *BlockWidth256BytesY, 183 183 unsigned int *BlockWidth256BytesC); 184 184 185 - struct dml32_CalculateSwathAndDETConfiguration { 186 - unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX]; 187 - unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX]; 188 - unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX]; 189 - unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX]; 190 - unsigned int RoundedUpSwathSizeBytesY; 191 - unsigned int RoundedUpSwathSizeBytesC; 192 - double SwathWidthdoubleDPP[DC__NUM_DPP__MAX]; 193 - double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX]; 194 - unsigned int TotalActiveDPP; 195 - bool NoChromaSurfaces; 196 - unsigned int DETBufferSizeInKByteForSwathCalculation; 197 - }; 198 - 199 - struct dml32_CalculateVMRowAndSwath { 200 - unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX]; 201 - unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX]; 202 - unsigned int PDEAndMetaPTEBytesFrameY; 203 - unsigned int PDEAndMetaPTEBytesFrameC; 204 - unsigned int MetaRowByteY[DC__NUM_DPP__MAX]; 205 - unsigned int MetaRowByteC[DC__NUM_DPP__MAX]; 206 - unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX]; 207 - unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX]; 208 - unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX]; 209 - unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX]; 210 - unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; 211 - unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX]; 212 - unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; 213 - unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX]; 214 - bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX]; 215 - }; 216 - 217 - struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport { 218 - unsigned int SurfaceWithMinActiveFCLKChangeMargin; 219 - unsigned int DRAMClockChangeSupportNumber; 220 - unsigned int LastSurfaceWithoutMargin; 221 - unsigned int DRAMClockChangeMethod; 222 - bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin; 223 - double MinActiveFCLKChangeMargin; 224 - double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank; 225 - double ActiveClockChangeLatencyHidingY; 226 - double ActiveClockChangeLatencyHidingC; 227 - double ActiveClockChangeLatencyHiding; 228 - double EffectiveDETBufferSizeY; 229 - double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX]; 230 - double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX]; 231 - double TotalPixelBW; 232 - bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX]; 233 - double EffectiveLBLatencyHidingY; 234 - double EffectiveLBLatencyHidingC; 235 - double LinesInDETY[DC__NUM_DPP__MAX]; 236 - double LinesInDETC[DC__NUM_DPP__MAX]; 237 - unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; 238 - unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX]; 239 - double FullDETBufferingTimeY; 240 - double FullDETBufferingTimeC; 241 - double WritebackDRAMClockChangeLatencyMargin; 242 - double WritebackFCLKChangeLatencyMargin; 243 - double WritebackLatencyHiding; 244 - bool SameTimingForFCLKChange; 245 - unsigned int TotalActiveWriteback; 246 - unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX]; 247 - unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX]; 248 - }; 249 - 250 - struct dml32_CalculatePrefetchSchedule { 251 - unsigned int DPPCycles, DISPCLKCycles; 252 - double DSTTotalPixelsAfterScaler; 253 - double LineTime; 254 - double dst_y_prefetch_equ; 255 - double prefetch_bw_oto; 256 - double Tvm_oto; 257 - double Tr0_oto; 258 - double Tvm_oto_lines; 259 - double Tr0_oto_lines; 260 - double dst_y_prefetch_oto; 261 - double TimeForFetchingMetaPTE; 262 - double TimeForFetchingRowInVBlank; 263 - double LinesToRequestPrefetchPixelData; 264 - unsigned int HostVMDynamicLevelsTrips; 265 - double trip_to_mem; 266 - double Tvm_trips; 267 - double Tr0_trips; 268 - double Tvm_trips_rounded; 269 - double Tr0_trips_rounded; 270 - double Lsw_oto; 271 - double Tpre_rounded; 272 - double prefetch_bw_equ; 273 - double Tvm_equ; 274 - double Tr0_equ; 275 - double Tdmbf; 276 - double Tdmec; 277 - double Tdmsks; 278 - double prefetch_sw_bytes; 279 - double bytes_pp; 280 - double dep_bytes; 281 - unsigned int max_vratio_pre; 282 - double min_Lsw; 283 - double Tsw_est1; 284 - double Tsw_est3; 285 - }; 286 - 287 185 struct DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation { 288 186 unsigned int dummy_integer_array[2][DC__NUM_DPP__MAX]; 289 187 double dummy_single_array[2][DC__NUM_DPP__MAX]; ··· 253 355 struct DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation 254 356 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation; 255 357 struct dml32_ModeSupportAndSystemConfigurationFull dml32_ModeSupportAndSystemConfigurationFull; 256 - struct dml32_CalculateSwathAndDETConfiguration dml32_CalculateSwathAndDETConfiguration; 257 - struct dml32_CalculateVMRowAndSwath dml32_CalculateVMRowAndSwath; 258 - struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport; 259 - struct dml32_CalculatePrefetchSchedule dml32_CalculatePrefetchSchedule; 260 358 }; 261 359 262 360 struct vba_vars_st {
+4 -2
drivers/gpu/drm/amd/display/include/dal_asic_id.h
··· 244 244 #define ASICREV_IS_GC_10_3_7(eChipRev) ((eChipRev >= GC_10_3_7_A0) && (eChipRev < GC_10_3_7_UNKNOWN)) 245 245 246 246 #define AMDGPU_FAMILY_GC_11_0_0 145 247 - #define AMDGPU_FAMILY_GC_11_0_2 148 247 + #define AMDGPU_FAMILY_GC_11_0_1 148 248 248 #define GC_11_0_0_A0 0x1 249 249 #define GC_11_0_2_A0 0x10 250 + #define GC_11_0_3_A0 0x20 250 251 #define GC_11_UNKNOWN 0xFF 251 252 252 253 #define ASICREV_IS_GC_11_0_0(eChipRev) (eChipRev < GC_11_0_2_A0) 253 - #define ASICREV_IS_GC_11_0_2(eChipRev) (eChipRev >= GC_11_0_2_A0 && eChipRev < GC_11_UNKNOWN) 254 + #define ASICREV_IS_GC_11_0_2(eChipRev) (eChipRev >= GC_11_0_2_A0 && eChipRev < GC_11_0_3_A0) 255 + #define ASICREV_IS_GC_11_0_3(eChipRev) (eChipRev >= GC_11_0_3_A0 && eChipRev < GC_11_UNKNOWN) 254 256 255 257 /* 256 258 * ASIC chip ID
+3 -1
drivers/gpu/drm/amd/display/include/logger_types.h
··· 119 119 LOG_HDMI_RETIMER_REDRIVER, 120 120 LOG_DSC, 121 121 LOG_SMU_MSG, 122 + LOG_DC2RESERVED4, 123 + LOG_DC2RESERVED5, 122 124 LOG_DWB, 123 125 LOG_GAMMA_DEBUG, 124 126 LOG_MAX_HW_POINTS, 125 127 LOG_ALL_TF_CHANNELS, 126 128 LOG_SAMPLE_1DLUT, 127 129 LOG_DP2, 128 - LOG_SECTION_TOTAL_COUNT 130 + LOG_DC2RESERVED12, 129 131 }; 130 132 131 133 #define DC_MIN_LOG_MASK ((1 << LOG_ERROR) | \
+3 -12
drivers/gpu/drm/amd/display/modules/freesync/freesync.c
··· 613 613 * Note: We should never go above the field rate of the mode timing set. 614 614 */ 615 615 infopacket->sb[8] = (unsigned char)((vrr->max_refresh_in_uhz + 500000) / 1000000); 616 - 617 - /* FreeSync HDR */ 618 - infopacket->sb[9] = 0; 619 - infopacket->sb[10] = 0; 620 616 } 621 617 622 618 static void build_vrr_infopacket_data_v3(const struct mod_vrr_params *vrr, ··· 680 684 681 685 /* PB16 : Reserved bits 7:1, FixedRate bit 0 */ 682 686 infopacket->sb[16] = (vrr->state == VRR_STATE_ACTIVE_FIXED) ? 1 : 0; 683 - 684 - //FreeSync HDR 685 - infopacket->sb[9] = 0; 686 - infopacket->sb[10] = 0; 687 687 } 688 688 689 689 static void build_vrr_infopacket_fs2_data(enum color_transfer_func app_tf, ··· 764 772 /* HB2 = [Bits 7:5 = 0] [Bits 4:0 = Length = 0x09] */ 765 773 infopacket->hb2 = 0x09; 766 774 767 - *payload_size = 0x0A; 768 - 775 + *payload_size = 0x09; 769 776 } else if (dc_is_dp_signal(signal)) { 770 777 771 778 /* HEADER */ ··· 813 822 infopacket->hb1 = version; 814 823 815 824 /* HB2 = [Bits 7:5 = 0] [Bits 4:0 = Length] */ 816 - *payload_size = 0x10; 817 - infopacket->hb2 = *payload_size - 1; //-1 for checksum 825 + infopacket->hb2 = 0x10; 818 826 827 + *payload_size = 0x10; 819 828 } else if (dc_is_dp_signal(signal)) { 820 829 821 830 /* HEADER */
+3 -1
drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h
··· 27 27 // *** IMPORTANT *** 28 28 // SMU TEAM: Always increment the interface version if 29 29 // any structure is changed in this file 30 - #define PMFW_DRIVER_IF_VERSION 4 30 + #define PMFW_DRIVER_IF_VERSION 5 31 31 32 32 typedef struct { 33 33 int32_t value; ··· 197 197 198 198 uint16_t SkinTemp; 199 199 uint16_t DeviceState; 200 + uint16_t CurTemp; //[centi-Celsius] 201 + uint16_t spare2; 200 202 } SmuMetrics_t; 201 203 202 204 typedef struct {
+1 -1
drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
··· 28 28 #define SMU13_DRIVER_IF_VERSION_INV 0xFFFFFFFF 29 29 #define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x04 30 30 #define SMU13_DRIVER_IF_VERSION_ALDE 0x08 31 - #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x04 31 + #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x05 32 32 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04 33 33 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x2C 34 34 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x2C
+1
drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
··· 4283 4283 .dump_pptable = sienna_cichlid_dump_pptable, 4284 4284 .init_microcode = smu_v11_0_init_microcode, 4285 4285 .load_microcode = smu_v11_0_load_microcode, 4286 + .fini_microcode = smu_v11_0_fini_microcode, 4286 4287 .init_smc_tables = sienna_cichlid_init_smc_tables, 4287 4288 .fini_smc_tables = smu_v11_0_fini_smc_tables, 4288 4289 .init_power = smu_v11_0_init_power,
+11 -10
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
··· 212 212 if (!adev->scpm_enabled) 213 213 return 0; 214 214 215 + if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 7)) 216 + return 0; 217 + 215 218 /* override pptable_id from driver parameter */ 216 219 if (amdgpu_smu_pptable_id >= 0) { 217 220 pptable_id = amdgpu_smu_pptable_id; ··· 222 219 } else { 223 220 pptable_id = smu->smu_table.boot_values.pp_table_id; 224 221 225 - if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 7) && 226 - pptable_id == 3667) 227 - pptable_id = 36671; 228 - 229 - if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 7) && 230 - pptable_id == 3688) 231 - pptable_id = 36881; 232 222 /* 233 223 * Temporary solution for SMU V13.0.0 with SCPM enabled: 234 224 * - use 36831 signed pptable when pp_table_id is 3683 225 + * - use 37151 signed pptable when pp_table_id is 3715 235 226 * - use 36641 signed pptable when pp_table_id is 3664 or 0 236 227 * TODO: drop these when the pptable carried in vbios is ready. 237 228 */ ··· 237 240 break; 238 241 case 3683: 239 242 pptable_id = 36831; 243 + break; 244 + case 3715: 245 + pptable_id = 37151; 240 246 break; 241 247 default: 242 248 dev_err(adev->dev, "Unsupported pptable id %d\n", pptable_id); ··· 478 478 479 479 /* 480 480 * Temporary solution for SMU V13.0.0 with SCPM disabled: 481 - * - use 3664 or 3683 on request 481 + * - use 3664, 3683 or 3715 on request 482 482 * - use 3664 when pptable_id is 0 483 483 * TODO: drop these when the pptable carried in vbios is ready. 484 484 */ ··· 489 489 break; 490 490 case 3664: 491 491 case 3683: 492 + case 3715: 492 493 break; 493 494 default: 494 495 dev_err(adev->dev, "Unsupported pptable id %d\n", pptable_id); ··· 2345 2344 2346 2345 index = smu_cmn_to_asic_specific_index(smu, CMN2ASIC_MAPPING_MSG, 2347 2346 SMU_MSG_EnableGfxImu); 2348 - 2349 - return smu_cmn_send_msg_without_waiting(smu, index, 0); 2347 + /* Param 1 to tell PMFW to enable GFXOFF feature */ 2348 + return smu_cmn_send_msg_without_waiting(smu, index, 1); 2350 2349 } 2351 2350 2352 2351 int smu_v13_0_od_edit_dpm_table(struct smu_context *smu,
+2
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
··· 1792 1792 .dump_pptable = smu_v13_0_0_dump_pptable, 1793 1793 .init_microcode = smu_v13_0_init_microcode, 1794 1794 .load_microcode = smu_v13_0_load_microcode, 1795 + .fini_microcode = smu_v13_0_fini_microcode, 1795 1796 .init_smc_tables = smu_v13_0_0_init_smc_tables, 1797 + .fini_smc_tables = smu_v13_0_fini_smc_tables, 1796 1798 .init_power = smu_v13_0_init_power, 1797 1799 .fini_power = smu_v13_0_fini_power, 1798 1800 .check_fw_status = smu_v13_0_check_fw_status,
+3 -14
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
··· 71 71 MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 1), 72 72 MSG_MAP(GetSmuVersion, PPSMC_MSG_GetPmfwVersion, 1), 73 73 MSG_MAP(GetDriverIfVersion, PPSMC_MSG_GetDriverIfVersion, 1), 74 - MSG_MAP(EnableGfxOff, PPSMC_MSG_EnableGfxOff, 1), 75 74 MSG_MAP(AllowGfxOff, PPSMC_MSG_AllowGfxOff, 1), 76 75 MSG_MAP(DisallowGfxOff, PPSMC_MSG_DisallowGfxOff, 1), 77 76 MSG_MAP(PowerDownVcn, PPSMC_MSG_PowerDownVcn, 1), ··· 198 199 kfree(smu_table->watermarks_table); 199 200 smu_table->watermarks_table = NULL; 200 201 202 + kfree(smu_table->gpu_metrics_table); 203 + smu_table->gpu_metrics_table = NULL; 204 + 201 205 return 0; 202 206 } 203 207 ··· 225 223 if (!en && !adev->in_s0ix) 226 224 ret = smu_cmn_send_smc_msg(smu, SMU_MSG_PrepareMp1ForUnload, NULL); 227 225 228 - return ret; 229 - } 230 - 231 - static int smu_v13_0_4_post_smu_init(struct smu_context *smu) 232 - { 233 - struct amdgpu_device *adev = smu->adev; 234 - int ret = 0; 235 - 236 - /* allow message will be sent after enable message */ 237 - ret = smu_cmn_send_smc_msg(smu, SMU_MSG_EnableGfxOff, NULL); 238 - if (ret) 239 - dev_err(adev->dev, "Failed to Enable GfxOff!\n"); 240 226 return ret; 241 227 } 242 228 ··· 1016 1026 .get_pp_feature_mask = smu_cmn_get_pp_feature_mask, 1017 1027 .set_driver_table_location = smu_v13_0_set_driver_table_location, 1018 1028 .gfx_off_control = smu_v13_0_gfx_off_control, 1019 - .post_init = smu_v13_0_4_post_smu_init, 1020 1029 .mode2_reset = smu_v13_0_4_mode2_reset, 1021 1030 .get_dpm_ultimate_freq = smu_v13_0_4_get_dpm_ultimate_freq, 1022 1031 .od_edit_dpm_table = smu_v13_0_od_edit_dpm_table,
+3
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c
··· 176 176 kfree(smu_table->watermarks_table); 177 177 smu_table->watermarks_table = NULL; 178 178 179 + kfree(smu_table->gpu_metrics_table); 180 + smu_table->gpu_metrics_table = NULL; 181 + 179 182 return 0; 180 183 } 181 184
+14
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
··· 1567 1567 return ret; 1568 1568 } 1569 1569 1570 + static bool smu_v13_0_7_is_mode1_reset_supported(struct smu_context *smu) 1571 + { 1572 + struct amdgpu_device *adev = smu->adev; 1573 + 1574 + /* SRIOV does not support SMU mode1 reset */ 1575 + if (amdgpu_sriov_vf(adev)) 1576 + return false; 1577 + 1578 + return true; 1579 + } 1570 1580 static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { 1571 1581 .get_allowed_feature_mask = smu_v13_0_7_get_allowed_feature_mask, 1572 1582 .set_default_dpm_table = smu_v13_0_7_set_default_dpm_table, ··· 1584 1574 .dump_pptable = smu_v13_0_7_dump_pptable, 1585 1575 .init_microcode = smu_v13_0_init_microcode, 1586 1576 .load_microcode = smu_v13_0_load_microcode, 1577 + .fini_microcode = smu_v13_0_fini_microcode, 1587 1578 .init_smc_tables = smu_v13_0_7_init_smc_tables, 1579 + .fini_smc_tables = smu_v13_0_fini_smc_tables, 1588 1580 .init_power = smu_v13_0_init_power, 1589 1581 .fini_power = smu_v13_0_fini_power, 1590 1582 .check_fw_status = smu_v13_0_7_check_fw_status, ··· 1636 1624 .baco_set_state = smu_v13_0_baco_set_state, 1637 1625 .baco_enter = smu_v13_0_baco_enter, 1638 1626 .baco_exit = smu_v13_0_baco_exit, 1627 + .mode1_reset_is_support = smu_v13_0_7_is_mode1_reset_supported, 1628 + .mode1_reset = smu_v13_0_mode1_reset, 1639 1629 .set_mp1_state = smu_v13_0_7_set_mp1_state, 1640 1630 }; 1641 1631