Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'drm-next-5.5-2019-11-08' of git://people.freedesktop.org/~agd5f/linux into drm-next

drm-next-5.5-2019-11-08:

amdgpu:
- Enable VCN dynamic powergating on RV/RV2
- Fixes for Navi14
- Misc Navi fixes
- Fix MSI-X tear down
- Misc Arturus fixes
- Fix xgmi powerstate handling
- Documenation fixes

scheduler:
- Fix static code checker warning
- Fix possible thread reactivation while thread is stopped
- Avoid cleanup if thread is parked

radeon:
- SI dpm fix ported from amdgpu

Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexdeucher@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191108212713.5078-1-alexander.deucher@amd.com

+406 -62
+35
Documentation/gpu/amdgpu.rst
··· 82 82 AMDGPU RAS Support 83 83 ================== 84 84 85 + The AMDGPU RAS interfaces are exposed via sysfs (for informational queries) and 86 + debugfs (for error injection). 87 + 85 88 RAS debugfs/sysfs Control and Error Injection Interfaces 86 89 -------------------------------------------------------- 87 90 88 91 .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 89 92 :doc: AMDGPU RAS debugfs control interface 93 + 94 + RAS Reboot Behavior for Unrecoverable Errors 95 + -------------------------------------------------------- 96 + 97 + .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 98 + :doc: AMDGPU RAS Reboot Behavior for Unrecoverable Errors 90 99 91 100 RAS Error Count sysfs Interface 92 101 ------------------------------- ··· 117 108 118 109 .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 119 110 :internal: 111 + 112 + Sample Code 113 + ----------- 114 + Sample code for testing error injection can be found here: 115 + https://cgit.freedesktop.org/mesa/drm/tree/tests/amdgpu/ras_tests.c 116 + 117 + This is part of the libdrm amdgpu unit tests which cover several areas of the GPU. 118 + There are four sets of tests: 119 + 120 + RAS Basic Test 121 + 122 + The test verifies the RAS feature enabled status and makes sure the necessary sysfs and debugfs files 123 + are present. 124 + 125 + RAS Query Test 126 + 127 + This test checks the RAS availability and enablement status for each supported IP block as well as 128 + the error counts. 129 + 130 + RAS Inject Test 131 + 132 + This test injects errors for each IP. 133 + 134 + RAS Disable Test 135 + 136 + This test tests disabling of RAS features for each IP block. 120 137 121 138 122 139 GPU Power/Thermal Controls and Monitoring
+3
drivers/gpu/drm/amd/amdgpu/amdgpu.h
··· 977 977 978 978 uint64_t unique_id; 979 979 uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS]; 980 + 981 + /* device pstate */ 982 + int pstate; 980 983 }; 981 984 982 985 static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
+2 -4
drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
··· 33 33 { 34 34 unsigned long start_jiffies; 35 35 unsigned long end_jiffies; 36 - struct dma_fence *fence = NULL; 36 + struct dma_fence *fence; 37 37 int i, r; 38 38 39 39 start_jiffies = jiffies; ··· 44 44 if (r) 45 45 goto exit_do_move; 46 46 r = dma_fence_wait(fence, false); 47 + dma_fence_put(fence); 47 48 if (r) 48 49 goto exit_do_move; 49 - dma_fence_put(fence); 50 50 } 51 51 end_jiffies = jiffies; 52 52 r = jiffies_to_msecs(end_jiffies - start_jiffies); 53 53 54 54 exit_do_move: 55 - if (fence) 56 - dma_fence_put(fence); 57 55 return r; 58 56 } 59 57
+10
drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
··· 859 859 struct amdgpu_device *adev = dev->dev_private; 860 860 int r = 0, i; 861 861 862 + /* Avoid accidently unparking the sched thread during GPU reset */ 863 + mutex_lock(&adev->lock_reset); 864 + 862 865 /* hold on the scheduler */ 863 866 for (i = 0; i < AMDGPU_MAX_RINGS; i++) { 864 867 struct amdgpu_ring *ring = adev->rings[i]; ··· 886 883 continue; 887 884 kthread_unpark(ring->sched.thread); 888 885 } 886 + 887 + mutex_unlock(&adev->lock_reset); 889 888 890 889 return 0; 891 890 } ··· 1041 1036 if (!fences) 1042 1037 return -ENOMEM; 1043 1038 1039 + /* Avoid accidently unparking the sched thread during GPU reset */ 1040 + mutex_lock(&adev->lock_reset); 1041 + 1044 1042 /* stop the scheduler */ 1045 1043 kthread_park(ring->sched.thread); 1046 1044 ··· 1082 1074 failure: 1083 1075 /* restart the scheduler */ 1084 1076 kthread_unpark(ring->sched.thread); 1077 + 1078 + mutex_unlock(&adev->lock_reset); 1085 1079 1086 1080 ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); 1087 1081
+34 -2
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
··· 2057 2057 */ 2058 2058 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) 2059 2059 { 2060 + struct amdgpu_gpu_instance *gpu_instance; 2060 2061 int i = 0, r; 2061 2062 2062 2063 for (i = 0; i < adev->num_ip_blocks; i++) { ··· 2083 2082 if (r) 2084 2083 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r); 2085 2084 2086 - /* set to low pstate by default */ 2087 - amdgpu_xgmi_set_pstate(adev, 0); 2085 + 2086 + if (adev->gmc.xgmi.num_physical_nodes > 1) { 2087 + mutex_lock(&mgpu_info.mutex); 2088 + 2089 + /* 2090 + * Reset device p-state to low as this was booted with high. 2091 + * 2092 + * This should be performed only after all devices from the same 2093 + * hive get initialized. 2094 + * 2095 + * However, it's unknown how many device in the hive in advance. 2096 + * As this is counted one by one during devices initializations. 2097 + * 2098 + * So, we wait for all XGMI interlinked devices initialized. 2099 + * This may bring some delays as those devices may come from 2100 + * different hives. But that should be OK. 2101 + */ 2102 + if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) { 2103 + for (i = 0; i < mgpu_info.num_gpu; i++) { 2104 + gpu_instance = &(mgpu_info.gpu_ins[i]); 2105 + if (gpu_instance->adev->flags & AMD_IS_APU) 2106 + continue; 2107 + 2108 + r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0); 2109 + if (r) { 2110 + DRM_ERROR("pstate setting failed (%d).\n", r); 2111 + break; 2112 + } 2113 + } 2114 + } 2115 + 2116 + mutex_unlock(&mgpu_info.mutex); 2117 + } 2088 2118 2089 2119 return 0; 2090 2120 }
+30
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
··· 127 127 }; 128 128 129 129 struct amdgpu_gmc { 130 + /* FB's physical address in MMIO space (for CPU to 131 + * map FB). This is different compared to the agp/ 132 + * gart/vram_start/end field as the later is from 133 + * GPU's view and aper_base is from CPU's view. 134 + */ 130 135 resource_size_t aper_size; 131 136 resource_size_t aper_base; 132 137 /* for some chips with <= 32MB we need to lie 133 138 * about vram size near mc fb location */ 134 139 u64 mc_vram_size; 135 140 u64 visible_vram_size; 141 + /* AGP aperture start and end in MC address space 142 + * Driver find a hole in the MC address space 143 + * to place AGP by setting MC_VM_AGP_BOT/TOP registers 144 + * Under VMID0, logical address == MC address. AGP 145 + * aperture maps to physical bus or IOVA addressed. 146 + * AGP aperture is used to simulate FB in ZFB case. 147 + * AGP aperture is also used for page table in system 148 + * memory (mainly for APU). 149 + * 150 + */ 136 151 u64 agp_size; 137 152 u64 agp_start; 138 153 u64 agp_end; 154 + /* GART aperture start and end in MC address space 155 + * Driver find a hole in the MC address space 156 + * to place GART by setting VM_CONTEXT0_PAGE_TABLE_START/END_ADDR 157 + * registers 158 + * Under VMID0, logical address inside GART aperture will 159 + * be translated through gpuvm gart page table to access 160 + * paged system memory 161 + */ 139 162 u64 gart_size; 140 163 u64 gart_start; 141 164 u64 gart_end; 165 + /* Frame buffer aperture of this GPU device. Different from 166 + * fb_start (see below), this only covers the local GPU device. 167 + * Driver get fb_start from MC_VM_FB_LOCATION_BASE (set by vbios) 168 + * and calculate vram_start of this local device by adding an 169 + * offset inside the XGMI hive. 170 + * Under VMID0, logical address == MC address 171 + */ 142 172 u64 vram_start; 143 173 u64 vram_end; 144 174 /* FB region , it's same as local vram region in single GPU, in XGMI
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
··· 311 311 drm_irq_uninstall(adev->ddev); 312 312 adev->irq.installed = false; 313 313 if (adev->irq.msi_enabled) 314 - pci_disable_msi(adev->pdev); 314 + pci_free_irq_vectors(adev->pdev); 315 315 if (!amdgpu_device_has_dc_support(adev)) 316 316 flush_work(&adev->hotplug_work); 317 317 }
+33 -7
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
··· 220 220 * As their names indicate, inject operation will write the 221 221 * value to the address. 222 222 * 223 - * Second member: struct ras_debug_if::op. 223 + * The second member: struct ras_debug_if::op. 224 224 * It has three kinds of operations. 225 225 * 226 226 * - 0: disable RAS on the block. Take ::head as its data. ··· 228 228 * - 2: inject errors on the block. Take ::inject as its data. 229 229 * 230 230 * How to use the interface? 231 - * programs: 232 - * copy the struct ras_debug_if in your codes and initialize it. 233 - * write the struct to the control node. 231 + * 232 + * Programs 233 + * 234 + * Copy the struct ras_debug_if in your codes and initialize it. 235 + * Write the struct to the control node. 236 + * 237 + * Shells 234 238 * 235 239 * .. code-block:: bash 236 240 * 237 241 * echo op block [error [sub_block address value]] > .../ras/ras_ctrl 242 + * 243 + * Parameters: 238 244 * 239 245 * op: disable, enable, inject 240 246 * disable: only block is needed ··· 271 265 * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count 272 266 * 273 267 * .. note:: 274 - * Operation is only allowed on blocks which are supported. 268 + * Operations are only allowed on blocks which are supported. 275 269 * Please check ras mask at /sys/module/amdgpu/parameters/ras_mask 270 + * to see which blocks support RAS on a particular asic. 271 + * 276 272 */ 277 273 static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *buf, 278 274 size_t size, loff_t *pos) ··· 330 322 * DOC: AMDGPU RAS debugfs EEPROM table reset interface 331 323 * 332 324 * Some boards contain an EEPROM which is used to persistently store a list of 333 - * bad pages containing ECC errors detected in vram. This interface provides 325 + * bad pages which experiences ECC errors in vram. This interface provides 334 326 * a way to reset the EEPROM, e.g., after testing error injection. 335 327 * 336 328 * Usage: ··· 370 362 /** 371 363 * DOC: AMDGPU RAS sysfs Error Count Interface 372 364 * 373 - * It allows user to read the error count for each IP block on the gpu through 365 + * It allows the user to read the error count for each IP block on the gpu through 374 366 * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count 375 367 * 376 368 * It outputs the multiple lines which report the uncorrected (ue) and corrected ··· 1035 1027 } 1036 1028 /* sysfs end */ 1037 1029 1030 + /** 1031 + * DOC: AMDGPU RAS Reboot Behavior for Unrecoverable Errors 1032 + * 1033 + * Normally when there is an uncorrectable error, the driver will reset 1034 + * the GPU to recover. However, in the event of an unrecoverable error, 1035 + * the driver provides an interface to reboot the system automatically 1036 + * in that event. 1037 + * 1038 + * The following file in debugfs provides that interface: 1039 + * /sys/kernel/debug/dri/[0/1/2...]/ras/auto_reboot 1040 + * 1041 + * Usage: 1042 + * 1043 + * .. code-block:: bash 1044 + * 1045 + * echo true > .../ras/auto_reboot 1046 + * 1047 + */ 1038 1048 /* debugfs begin */ 1039 1049 static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev) 1040 1050 {
+2
drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
··· 138 138 } 139 139 140 140 dma_fence_put(fence); 141 + fence = NULL; 141 142 142 143 r = amdgpu_bo_kmap(vram_obj, &vram_map); 143 144 if (r) { ··· 184 183 } 185 184 186 185 dma_fence_put(fence); 186 + fence = NULL; 187 187 188 188 r = amdgpu_bo_kmap(gtt_obj[i], &gtt_map); 189 189 if (r) {
+3 -3
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
··· 1906 1906 void *stolen_vga_buf; 1907 1907 /* return the VGA stolen memory (if any) back to VRAM */ 1908 1908 amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, &stolen_vga_buf); 1909 - 1910 - /* return the IP Discovery TMR memory back to VRAM */ 1911 - amdgpu_bo_free_kernel(&adev->discovery_memory, NULL, NULL); 1912 1909 } 1913 1910 1914 1911 /** ··· 1918 1921 1919 1922 amdgpu_ttm_debugfs_fini(adev); 1920 1923 amdgpu_ttm_training_reserve_vram_fini(adev); 1924 + /* return the IP Discovery TMR memory back to VRAM */ 1925 + amdgpu_bo_free_kernel(&adev->discovery_memory, NULL, NULL); 1921 1926 amdgpu_ttm_fw_reserve_vram_fini(adev); 1927 + 1922 1928 if (adev->mman.aper_base_kaddr) 1923 1929 iounmap(adev->mman.aper_base_kaddr); 1924 1930 adev->mman.aper_base_kaddr = NULL;
+9 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
··· 1418 1418 uint64_t incr, entry_end, pe_start; 1419 1419 struct amdgpu_bo *pt; 1420 1420 1421 + /* make sure that the page tables covering the address range are 1422 + * actually allocated 1423 + */ 1421 1424 r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor, 1422 1425 params->direct); 1423 1426 if (r) ··· 1494 1491 } while (frag_start < entry_end); 1495 1492 1496 1493 if (amdgpu_vm_pt_descendant(adev, &cursor)) { 1497 - /* Free all child entries */ 1494 + /* Free all child entries. 1495 + * Update the tables with the flags and addresses and free up subsequent 1496 + * tables in the case of huge pages or freed up areas. 1497 + * This is the maximum you can free, because all other page tables are not 1498 + * completely covered by the range and so potentially still in use. 1499 + */ 1498 1500 while (cursor.pfn < frag_start) { 1499 1501 amdgpu_vm_free_pts(adev, params->vm, &cursor); 1500 1502 amdgpu_vm_pt_next(adev, &cursor);
+39 -3
drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
··· 274 274 { 275 275 int ret = 0; 276 276 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0); 277 + struct amdgpu_device *tmp_adev; 278 + bool update_hive_pstate = true; 279 + bool is_high_pstate = pstate && adev->asic_type == CHIP_VEGA20; 277 280 278 281 if (!hive) 279 282 return 0; 280 283 281 - if (hive->pstate == pstate) 282 - return 0; 284 + mutex_lock(&hive->hive_lock); 285 + 286 + if (hive->pstate == pstate) { 287 + adev->pstate = is_high_pstate ? pstate : adev->pstate; 288 + goto out; 289 + } 283 290 284 291 dev_dbg(adev->dev, "Set xgmi pstate %d.\n", pstate); 285 292 286 293 if (is_support_sw_smu_xgmi(adev)) 287 294 ret = smu_set_xgmi_pstate(&adev->smu, pstate); 288 - if (ret) 295 + else if (adev->powerplay.pp_funcs && 296 + adev->powerplay.pp_funcs->set_xgmi_pstate) 297 + ret = adev->powerplay.pp_funcs->set_xgmi_pstate(adev->powerplay.pp_handle, 298 + pstate); 299 + 300 + if (ret) { 289 301 dev_err(adev->dev, 290 302 "XGMI: Set pstate failure on device %llx, hive %llx, ret %d", 291 303 adev->gmc.xgmi.node_id, 292 304 adev->gmc.xgmi.hive_id, ret); 305 + goto out; 306 + } 307 + 308 + /* Update device pstate */ 309 + adev->pstate = pstate; 310 + 311 + /* 312 + * Update the hive pstate only all devices of the hive 313 + * are in the same pstate 314 + */ 315 + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { 316 + if (tmp_adev->pstate != adev->pstate) { 317 + update_hive_pstate = false; 318 + break; 319 + } 320 + } 321 + if (update_hive_pstate || is_high_pstate) 322 + hive->pstate = pstate; 323 + 324 + out: 325 + mutex_unlock(&hive->hive_lock); 293 326 294 327 return ret; 295 328 } ··· 396 363 adev->gmc.xgmi.node_id, adev->gmc.xgmi.hive_id); 397 364 goto exit; 398 365 } 366 + 367 + /* Set default device pstate */ 368 + adev->pstate = -1; 399 369 400 370 top_info = &adev->psp.xgmi_context.top_info; 401 371
+20 -4
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
··· 2738 2738 * And it's needed by gfxoff feature. 2739 2739 */ 2740 2740 if (adev->gfx.rlc.is_rlc_v2_1) { 2741 - gfx_v9_1_init_rlc_save_restore_list(adev); 2741 + if (adev->asic_type == CHIP_VEGA12 || 2742 + (adev->asic_type == CHIP_RAVEN && 2743 + adev->rev_id >= 8)) 2744 + gfx_v9_1_init_rlc_save_restore_list(adev); 2742 2745 gfx_v9_0_enable_save_restore_machine(adev); 2743 2746 } 2744 2747 ··· 3892 3889 uint64_t clock; 3893 3890 3894 3891 mutex_lock(&adev->gfx.gpu_clock_mutex); 3895 - WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 3896 - clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | 3897 - ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 3892 + if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) { 3893 + uint32_t tmp, lsb, msb, i = 0; 3894 + do { 3895 + if (i != 0) 3896 + udelay(1); 3897 + tmp = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB); 3898 + lsb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_LSB); 3899 + msb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB); 3900 + i++; 3901 + } while (unlikely(tmp != msb) && (i < adev->usec_timeout)); 3902 + clock = (uint64_t)lsb | ((uint64_t)msb << 32ULL); 3903 + } else { 3904 + WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 3905 + clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | 3906 + ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 3907 + } 3898 3908 mutex_unlock(&adev->gfx.gpu_clock_mutex); 3899 3909 return clock; 3900 3910 }
+11
drivers/gpu/drm/amd/amdgpu/nv.c
··· 539 539 return false; 540 540 } 541 541 542 + static uint64_t nv_get_pcie_replay_count(struct amdgpu_device *adev) 543 + { 544 + 545 + /* TODO 546 + * dummy implement for pcie_replay_count sysfs interface 547 + * */ 548 + 549 + return 0; 550 + } 551 + 542 552 static void nv_init_doorbell_index(struct amdgpu_device *adev) 543 553 { 544 554 adev->doorbell_index.kiq = AMDGPU_NAVI10_DOORBELL_KIQ; ··· 596 586 .need_full_reset = &nv_need_full_reset, 597 587 .get_pcie_usage = &nv_get_pcie_usage, 598 588 .need_reset_on_init = &nv_need_reset_on_init, 589 + .get_pcie_replay_count = &nv_get_pcie_replay_count, 599 590 }; 600 591 601 592 static int nv_common_early_init(void *handle)
+6 -2
drivers/gpu/drm/amd/amdgpu/soc15.c
··· 1145 1145 AMD_CG_SUPPORT_SDMA_LS | 1146 1146 AMD_CG_SUPPORT_VCN_MGCG; 1147 1147 1148 - adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN; 1148 + adev->pg_flags = AMD_PG_SUPPORT_SDMA | 1149 + AMD_PG_SUPPORT_VCN | 1150 + AMD_PG_SUPPORT_VCN_DPG; 1149 1151 } else if (adev->pdev->device == 0x15d8) { 1150 1152 adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | 1151 1153 AMD_CG_SUPPORT_GFX_MGLS | ··· 1190 1188 AMD_CG_SUPPORT_SDMA_LS | 1191 1189 AMD_CG_SUPPORT_VCN_MGCG; 1192 1190 1193 - adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN; 1191 + adev->pg_flags = AMD_PG_SUPPORT_SDMA | 1192 + AMD_PG_SUPPORT_VCN | 1193 + AMD_PG_SUPPORT_VCN_DPG; 1194 1194 } 1195 1195 break; 1196 1196 case CHIP_ARCTURUS:
+1 -1
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
··· 4239 4239 result = MODE_OK; 4240 4240 else 4241 4241 DRM_DEBUG_KMS("Mode %dx%d (clk %d) failed DC validation with error %d\n", 4242 - mode->vdisplay, 4243 4242 mode->hdisplay, 4243 + mode->vdisplay, 4244 4244 mode->clock, 4245 4245 dc_result); 4246 4246
-9
drivers/gpu/drm/amd/display/dc/core/dc_link.c
··· 3027 3027 CONTROLLER_DP_TEST_PATTERN_VIDEOMODE, 3028 3028 COLOR_DEPTH_UNDEFINED); 3029 3029 3030 - /* This second call is needed to reconfigure the DIG 3031 - * as a workaround for the incorrect value being applied 3032 - * from transmitter control. 3033 - */ 3034 - if (!dc_is_virtual_signal(pipe_ctx->stream->signal)) 3035 - stream->link->link_enc->funcs->setup( 3036 - stream->link->link_enc, 3037 - pipe_ctx->stream->signal); 3038 - 3039 3030 #ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT 3040 3031 if (pipe_ctx->stream->timing.flags.DSC) { 3041 3032 if (dc_is_dp_signal(pipe_ctx->stream->signal) ||
+4
drivers/gpu/drm/amd/include/kgd_pp_interface.h
··· 220 220 ((group) << PP_GROUP_SHIFT | (block) << PP_BLOCK_SHIFT | \ 221 221 (support) << PP_STATE_SUPPORT_SHIFT | (state) << PP_STATE_SHIFT) 222 222 223 + #define XGMI_MODE_PSTATE_D3 0 224 + #define XGMI_MODE_PSTATE_D0 1 225 + 223 226 struct seq_file; 224 227 enum amd_pp_clock_type; 225 228 struct amd_pp_simple_clock_info; ··· 321 318 int (*set_ppfeature_status)(void *handle, uint64_t ppfeature_masks); 322 319 int (*asic_reset_mode_2)(void *handle); 323 320 int (*set_df_cstate)(void *handle, enum pp_df_cstate state); 321 + int (*set_xgmi_pstate)(void *handle, uint32_t pstate); 324 322 }; 325 323 326 324 #endif
+26
drivers/gpu/drm/amd/powerplay/amd_powerplay.c
··· 969 969 workload = hwmgr->workload_setting[index]; 970 970 } 971 971 972 + if (type == PP_SMC_POWER_PROFILE_COMPUTE && 973 + hwmgr->hwmgr_func->disable_power_features_for_compute_performance) { 974 + if (hwmgr->hwmgr_func->disable_power_features_for_compute_performance(hwmgr, en)) { 975 + mutex_unlock(&hwmgr->smu_lock); 976 + return -EINVAL; 977 + } 978 + } 979 + 972 980 if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) 973 981 hwmgr->hwmgr_func->set_power_profile_mode(hwmgr, &workload, 0); 974 982 mutex_unlock(&hwmgr->smu_lock); ··· 1574 1566 return 0; 1575 1567 } 1576 1568 1569 + static int pp_set_xgmi_pstate(void *handle, uint32_t pstate) 1570 + { 1571 + struct pp_hwmgr *hwmgr = handle; 1572 + 1573 + if (!hwmgr) 1574 + return -EINVAL; 1575 + 1576 + if (!hwmgr->pm_en || !hwmgr->hwmgr_func->set_xgmi_pstate) 1577 + return 0; 1578 + 1579 + mutex_lock(&hwmgr->smu_lock); 1580 + hwmgr->hwmgr_func->set_xgmi_pstate(hwmgr, pstate); 1581 + mutex_unlock(&hwmgr->smu_lock); 1582 + 1583 + return 0; 1584 + } 1585 + 1577 1586 static const struct amd_pm_funcs pp_dpm_funcs = { 1578 1587 .load_firmware = pp_dpm_load_fw, 1579 1588 .wait_for_fw_loading_complete = pp_dpm_fw_loading_complete, ··· 1650 1625 .asic_reset_mode_2 = pp_asic_reset_mode_2, 1651 1626 .smu_i2c_bus_access = pp_smu_i2c_bus_access, 1652 1627 .set_df_cstate = pp_set_df_cstate, 1628 + .set_xgmi_pstate = pp_set_xgmi_pstate, 1653 1629 };
+20 -9
drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
··· 383 383 return true; 384 384 } 385 385 386 - 386 + /** 387 + * smu_dpm_set_power_gate - power gate/ungate the specific IP block 388 + * 389 + * @smu: smu_context pointer 390 + * @block_type: the IP block to power gate/ungate 391 + * @gate: to power gate if true, ungate otherwise 392 + * 393 + * This API uses no smu->mutex lock protection due to: 394 + * 1. It is either called by other IP block(gfx/sdma/vcn/uvd/vce). 395 + * This is guarded to be race condition free by the caller. 396 + * 2. Or get called on user setting request of power_dpm_force_performance_level. 397 + * Under this case, the smu->mutex lock protection is already enforced on 398 + * the parent API smu_force_performance_level of the call path. 399 + */ 387 400 int smu_dpm_set_power_gate(struct smu_context *smu, uint32_t block_type, 388 401 bool gate) 389 402 { 390 403 int ret = 0; 391 - 392 - mutex_lock(&smu->mutex); 393 404 394 405 switch (block_type) { 395 406 case AMD_IP_BLOCK_TYPE_UVD: ··· 418 407 default: 419 408 break; 420 409 } 421 - 422 - mutex_unlock(&smu->mutex); 423 410 424 411 return ret; 425 412 } ··· 535 526 536 527 bool is_support_sw_smu_xgmi(struct amdgpu_device *adev) 537 528 { 538 - if (amdgpu_dpm != 1) 529 + if (!is_support_sw_smu(adev)) 539 530 return false; 540 531 541 532 if (adev->asic_type == CHIP_VEGA20) ··· 714 705 { 715 706 struct smu_context *smu = &adev->smu; 716 707 708 + if (adev->pm.pp_feature & PP_OVERDRIVE_MASK) 709 + smu->od_enabled = true; 710 + 717 711 switch (adev->asic_type) { 718 712 case CHIP_VEGA20: 719 713 vega20_set_ppt_funcs(smu); ··· 728 716 break; 729 717 case CHIP_ARCTURUS: 730 718 arcturus_set_ppt_funcs(smu); 719 + /* OD is not supported on Arcturus */ 720 + smu->od_enabled =false; 731 721 break; 732 722 case CHIP_RENOIR: 733 723 renoir_set_ppt_funcs(smu); ··· 737 723 default: 738 724 return -EINVAL; 739 725 } 740 - 741 - if (adev->pm.pp_feature & PP_OVERDRIVE_MASK) 742 - smu->od_enabled = true; 743 726 744 727 return 0; 745 728 }
+62
drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
··· 3689 3689 PP_ASSERT_WITH_CODE(!result, 3690 3690 "Failed to upload PPtable!", return result); 3691 3691 3692 + /* 3693 + * If a custom pp table is loaded, set DPMTABLE_OD_UPDATE_VDDC flag. 3694 + * That effectively disables AVFS feature. 3695 + */ 3696 + if(hwmgr->hardcode_pp_table != NULL) 3697 + data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_VDDC; 3698 + 3692 3699 vega10_update_avfs(hwmgr); 3693 3700 3694 3701 /* ··· 5270 5263 return 0; 5271 5264 } 5272 5265 5266 + static int vega10_disable_power_features_for_compute_performance(struct pp_hwmgr *hwmgr, bool disable) 5267 + { 5268 + struct vega10_hwmgr *data = hwmgr->backend; 5269 + uint32_t feature_mask = 0; 5270 + 5271 + if (disable) { 5272 + feature_mask |= data->smu_features[GNLD_ULV].enabled ? 5273 + data->smu_features[GNLD_ULV].smu_feature_bitmap : 0; 5274 + feature_mask |= data->smu_features[GNLD_DS_GFXCLK].enabled ? 5275 + data->smu_features[GNLD_DS_GFXCLK].smu_feature_bitmap : 0; 5276 + feature_mask |= data->smu_features[GNLD_DS_SOCCLK].enabled ? 5277 + data->smu_features[GNLD_DS_SOCCLK].smu_feature_bitmap : 0; 5278 + feature_mask |= data->smu_features[GNLD_DS_LCLK].enabled ? 5279 + data->smu_features[GNLD_DS_LCLK].smu_feature_bitmap : 0; 5280 + feature_mask |= data->smu_features[GNLD_DS_DCEFCLK].enabled ? 5281 + data->smu_features[GNLD_DS_DCEFCLK].smu_feature_bitmap : 0; 5282 + } else { 5283 + feature_mask |= (!data->smu_features[GNLD_ULV].enabled) ? 5284 + data->smu_features[GNLD_ULV].smu_feature_bitmap : 0; 5285 + feature_mask |= (!data->smu_features[GNLD_DS_GFXCLK].enabled) ? 5286 + data->smu_features[GNLD_DS_GFXCLK].smu_feature_bitmap : 0; 5287 + feature_mask |= (!data->smu_features[GNLD_DS_SOCCLK].enabled) ? 5288 + data->smu_features[GNLD_DS_SOCCLK].smu_feature_bitmap : 0; 5289 + feature_mask |= (!data->smu_features[GNLD_DS_LCLK].enabled) ? 5290 + data->smu_features[GNLD_DS_LCLK].smu_feature_bitmap : 0; 5291 + feature_mask |= (!data->smu_features[GNLD_DS_DCEFCLK].enabled) ? 5292 + data->smu_features[GNLD_DS_DCEFCLK].smu_feature_bitmap : 0; 5293 + } 5294 + 5295 + if (feature_mask) 5296 + PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr, 5297 + !disable, feature_mask), 5298 + "enable/disable power features for compute performance Failed!", 5299 + return -EINVAL); 5300 + 5301 + if (disable) { 5302 + data->smu_features[GNLD_ULV].enabled = false; 5303 + data->smu_features[GNLD_DS_GFXCLK].enabled = false; 5304 + data->smu_features[GNLD_DS_SOCCLK].enabled = false; 5305 + data->smu_features[GNLD_DS_LCLK].enabled = false; 5306 + data->smu_features[GNLD_DS_DCEFCLK].enabled = false; 5307 + } else { 5308 + data->smu_features[GNLD_ULV].enabled = true; 5309 + data->smu_features[GNLD_DS_GFXCLK].enabled = true; 5310 + data->smu_features[GNLD_DS_SOCCLK].enabled = true; 5311 + data->smu_features[GNLD_DS_LCLK].enabled = true; 5312 + data->smu_features[GNLD_DS_DCEFCLK].enabled = true; 5313 + } 5314 + 5315 + return 0; 5316 + 5317 + } 5318 + 5273 5319 static const struct pp_hwmgr_func vega10_hwmgr_funcs = { 5274 5320 .backend_init = vega10_hwmgr_backend_init, 5275 5321 .backend_fini = vega10_hwmgr_backend_fini, ··· 5390 5330 .get_ppfeature_status = vega10_get_ppfeature_status, 5391 5331 .set_ppfeature_status = vega10_set_ppfeature_status, 5392 5332 .set_mp1_state = vega10_set_mp1_state, 5333 + .disable_power_features_for_compute_performance = 5334 + vega10_disable_power_features_for_compute_performance, 5393 5335 }; 5394 5336 5395 5337 int vega10_hwmgr_init(struct pp_hwmgr *hwmgr)
+15
drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
··· 4176 4176 return ret; 4177 4177 } 4178 4178 4179 + static int vega20_set_xgmi_pstate(struct pp_hwmgr *hwmgr, 4180 + uint32_t pstate) 4181 + { 4182 + int ret; 4183 + 4184 + ret = smum_send_msg_to_smc_with_parameter(hwmgr, 4185 + PPSMC_MSG_SetXgmiMode, 4186 + pstate ? XGMI_MODE_PSTATE_D0 : XGMI_MODE_PSTATE_D3); 4187 + if (ret) 4188 + pr_err("SetXgmiPstate failed!\n"); 4189 + 4190 + return ret; 4191 + } 4192 + 4179 4193 static const struct pp_hwmgr_func vega20_hwmgr_funcs = { 4180 4194 /* init/fini related */ 4181 4195 .backend_init = vega20_hwmgr_backend_init, ··· 4259 4245 .set_mp1_state = vega20_set_mp1_state, 4260 4246 .smu_i2c_bus_access = vega20_smu_i2c_bus_access, 4261 4247 .set_df_cstate = vega20_set_df_cstate, 4248 + .set_xgmi_pstate = vega20_set_xgmi_pstate, 4262 4249 }; 4263 4250 4264 4251 int vega20_hwmgr_init(struct pp_hwmgr *hwmgr)
+3
drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
··· 356 356 int (*asic_reset)(struct pp_hwmgr *hwmgr, enum SMU_ASIC_RESET_MODE mode); 357 357 int (*smu_i2c_bus_access)(struct pp_hwmgr *hwmgr, bool aquire); 358 358 int (*set_df_cstate)(struct pp_hwmgr *hwmgr, enum pp_df_cstate state); 359 + int (*set_xgmi_pstate)(struct pp_hwmgr *hwmgr, uint32_t pstate); 360 + int (*disable_power_features_for_compute_performance)(struct pp_hwmgr *hwmgr, 361 + bool disable); 359 362 }; 360 363 361 364 struct pp_table_func {
+1 -1
drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h
··· 159 159 //FIXME need updating 160 160 // Debug Overrides Bitmask 161 161 #define DPM_OVERRIDE_DISABLE_UCLK_PID 0x00000001 162 - #define DPM_OVERRIDE_ENABLE_VOLT_LINK_VCN_FCLK 0x00000002 162 + #define DPM_OVERRIDE_DISABLE_VOLT_LINK_VCN_FCLK 0x00000002 163 163 164 164 // I2C Config Bit Defines 165 165 #define I2C_CONTROLLER_ENABLED 1
+1 -1
drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h
··· 27 27 28 28 #define SMU11_DRIVER_IF_VERSION_INV 0xFFFFFFFF 29 29 #define SMU11_DRIVER_IF_VERSION_VG20 0x13 30 - #define SMU11_DRIVER_IF_VERSION_ARCT 0x0F 30 + #define SMU11_DRIVER_IF_VERSION_ARCT 0x10 31 31 #define SMU11_DRIVER_IF_VERSION_NV10 0x33 32 32 #define SMU11_DRIVER_IF_VERSION_NV14 0x34 33 33
+3 -1
drivers/gpu/drm/amd/powerplay/renoir_ppt.c
··· 180 180 int i, size = 0, ret = 0; 181 181 uint32_t cur_value = 0, value = 0, count = 0, min = 0, max = 0; 182 182 DpmClocks_t *clk_table = smu->smu_table.clocks_table; 183 - SmuMetrics_t metrics = {0}; 183 + SmuMetrics_t metrics; 184 184 185 185 if (!clk_table || clk_type >= SMU_CLK_COUNT) 186 186 return -EINVAL; 187 + 188 + memset(&metrics, 0, sizeof(metrics)); 187 189 188 190 ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, 0, 189 191 (void *)&metrics, false);
+3 -4
drivers/gpu/drm/amd/powerplay/smu_v11_0.c
··· 368 368 version_major = le16_to_cpu(hdr->header.header_version_major); 369 369 version_minor = le16_to_cpu(hdr->header.header_version_minor); 370 370 if (version_major == 2 && smu->smu_table.boot_values.pp_table_id > 0) { 371 + pr_info("use driver provided pptable %d\n", smu->smu_table.boot_values.pp_table_id); 371 372 switch (version_minor) { 372 373 case 0: 373 374 ret = smu_v11_0_set_pptable_v2_0(smu, &table, &size); ··· 385 384 return ret; 386 385 387 386 } else { 387 + pr_info("use vbios provided pptable\n"); 388 388 index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, 389 389 powerplayinfo); 390 390 ··· 1465 1463 return ret; 1466 1464 } 1467 1465 1468 - #define XGMI_STATE_D0 1 1469 - #define XGMI_STATE_D3 0 1470 - 1471 1466 int smu_v11_0_set_xgmi_pstate(struct smu_context *smu, 1472 1467 uint32_t pstate) 1473 1468 { 1474 1469 int ret = 0; 1475 1470 ret = smu_send_smc_msg_with_param(smu, 1476 1471 SMU_MSG_SetXgmiMode, 1477 - pstate ? XGMI_STATE_D0 : XGMI_STATE_D3); 1472 + pstate ? XGMI_MODE_PSTATE_D0 : XGMI_MODE_PSTATE_D3); 1478 1473 return ret; 1479 1474 } 1480 1475
+8 -4
drivers/gpu/drm/scheduler/sched_entity.c
··· 23 23 24 24 #include <linux/kthread.h> 25 25 #include <linux/slab.h> 26 + #include <linux/completion.h> 26 27 27 28 #include <drm/drm_print.h> 28 29 #include <drm/gpu_scheduler.h> ··· 68 67 GFP_KERNEL); 69 68 if (!entity->rq_list) 70 69 return -ENOMEM; 70 + 71 + init_completion(&entity->entity_idle); 71 72 72 73 for (i = 0; i < num_rq_list; ++i) 73 74 entity->rq_list[i] = rq_list[i]; ··· 289 286 */ 290 287 if (spsc_queue_count(&entity->job_queue)) { 291 288 if (sched) { 292 - /* Park the kernel for a moment to make sure it isn't processing 293 - * our enity. 289 + /* 290 + * Wait for thread to idle to make sure it isn't processing 291 + * this entity. 294 292 */ 295 - kthread_park(sched->thread); 296 - kthread_unpark(sched->thread); 293 + wait_for_completion(&entity->entity_idle); 294 + 297 295 } 298 296 if (entity->dependency) { 299 297 dma_fence_remove_callback(entity->dependency,
+18 -5
drivers/gpu/drm/scheduler/sched_main.c
··· 47 47 #include <linux/kthread.h> 48 48 #include <linux/wait.h> 49 49 #include <linux/sched.h> 50 + #include <linux/completion.h> 50 51 #include <uapi/linux/sched/types.h> 51 52 52 53 #include <drm/drm_print.h> ··· 135 134 list_for_each_entry_continue(entity, &rq->entities, list) { 136 135 if (drm_sched_entity_is_ready(entity)) { 137 136 rq->current_entity = entity; 137 + reinit_completion(&entity->entity_idle); 138 138 spin_unlock(&rq->lock); 139 139 return entity; 140 140 } ··· 146 144 147 145 if (drm_sched_entity_is_ready(entity)) { 148 146 rq->current_entity = entity; 147 + reinit_completion(&entity->entity_idle); 149 148 spin_unlock(&rq->lock); 150 149 return entity; 151 150 } ··· 499 496 fence = sched->ops->run_job(s_job); 500 497 501 498 if (IS_ERR_OR_NULL(fence)) { 499 + if (IS_ERR(fence)) 500 + dma_fence_set_error(&s_fence->finished, PTR_ERR(fence)); 501 + 502 502 s_job->s_fence->parent = NULL; 503 - dma_fence_set_error(&s_fence->finished, PTR_ERR(fence)); 504 503 } else { 505 504 s_job->s_fence->parent = fence; 506 505 } ··· 650 645 struct drm_sched_job *job; 651 646 unsigned long flags; 652 647 653 - /* Don't destroy jobs while the timeout worker is running */ 654 - if (sched->timeout != MAX_SCHEDULE_TIMEOUT && 655 - !cancel_delayed_work(&sched->work_tdr)) 648 + /* 649 + * Don't destroy jobs while the timeout worker is running OR thread 650 + * is being parked and hence assumed to not touch ring_mirror_list 651 + */ 652 + if ((sched->timeout != MAX_SCHEDULE_TIMEOUT && 653 + !cancel_delayed_work(&sched->work_tdr)) || 654 + __kthread_should_park(sched->thread)) 656 655 return NULL; 657 656 658 657 spin_lock_irqsave(&sched->job_list_lock, flags); ··· 733 724 continue; 734 725 735 726 sched_job = drm_sched_entity_pop_job(entity); 727 + 728 + complete(&entity->entity_idle); 729 + 736 730 if (!sched_job) 737 731 continue; 738 732 ··· 758 746 r); 759 747 dma_fence_put(fence); 760 748 } else { 749 + if (IS_ERR(fence)) 750 + dma_fence_set_error(&s_fence->finished, PTR_ERR(fence)); 761 751 762 - dma_fence_set_error(&s_fence->finished, PTR_ERR(fence)); 763 752 drm_sched_process_job(NULL, &sched_job->cb); 764 753 } 765 754
+3
include/drm/gpu_scheduler.h
··· 26 26 27 27 #include <drm/spsc_queue.h> 28 28 #include <linux/dma-fence.h> 29 + #include <linux/completion.h> 29 30 30 31 #define MAX_WAIT_SCHED_ENTITY_Q_EMPTY msecs_to_jiffies(1000) 31 32 ··· 72 71 * @last_scheduled: points to the finished fence of the last scheduled job. 73 72 * @last_user: last group leader pushing a job into the entity. 74 73 * @stopped: Marks the enity as removed from rq and destined for termination. 74 + * @entity_idle: Signals when enityt is not in use 75 75 * 76 76 * Entities will emit jobs in order to their corresponding hardware 77 77 * ring, and the scheduler will alternate between entities based on ··· 96 94 struct dma_fence *last_scheduled; 97 95 struct task_struct *last_user; 98 96 bool stopped; 97 + struct completion entity_idle; 99 98 }; 100 99 101 100 /**