Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: fix amdgpu_need_full_reset (v2)

IP types are not an index. Each asic may have number and
type of IPs. Properly check the the type rather than
using the type id as an index.

v2: fix all the IPs to not use IP type as an idx as well.

Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org

+60 -64
+16 -7
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
··· 2075 2075 if (!adev->ip_block_status[i].valid) 2076 2076 continue; 2077 2077 if (adev->ip_blocks[i].funcs->check_soft_reset) 2078 - adev->ip_blocks[i].funcs->check_soft_reset(adev); 2078 + adev->ip_block_status[i].hang = 2079 + adev->ip_blocks[i].funcs->check_soft_reset(adev); 2079 2080 if (adev->ip_block_status[i].hang) { 2080 2081 DRM_INFO("IP block:%d is hang!\n", i); 2081 2082 asic_hang = true; ··· 2105 2104 2106 2105 static bool amdgpu_need_full_reset(struct amdgpu_device *adev) 2107 2106 { 2108 - if (adev->ip_block_status[AMD_IP_BLOCK_TYPE_GMC].hang || 2109 - adev->ip_block_status[AMD_IP_BLOCK_TYPE_SMC].hang || 2110 - adev->ip_block_status[AMD_IP_BLOCK_TYPE_ACP].hang || 2111 - adev->ip_block_status[AMD_IP_BLOCK_TYPE_DCE].hang) { 2112 - DRM_INFO("Some block need full reset!\n"); 2113 - return true; 2107 + int i; 2108 + 2109 + for (i = 0; i < adev->num_ip_blocks; i++) { 2110 + if (!adev->ip_block_status[i].valid) 2111 + continue; 2112 + if ((adev->ip_blocks[i].type == AMD_IP_BLOCK_TYPE_GMC) || 2113 + (adev->ip_blocks[i].type == AMD_IP_BLOCK_TYPE_SMC) || 2114 + (adev->ip_blocks[i].type == AMD_IP_BLOCK_TYPE_ACP) || 2115 + (adev->ip_blocks[i].type == AMD_IP_BLOCK_TYPE_DCE)) { 2116 + if (adev->ip_block_status[i].hang) { 2117 + DRM_INFO("Some block need full reset!\n"); 2118 + return true; 2119 + } 2120 + } 2114 2121 } 2115 2122 return false; 2116 2123 }
+2 -10
drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
··· 3188 3188 return 0; 3189 3189 } 3190 3190 3191 - static int dce_v10_0_check_soft_reset(void *handle) 3191 + static bool dce_v10_0_check_soft_reset(void *handle) 3192 3192 { 3193 3193 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3194 3194 3195 - if (dce_v10_0_is_display_hung(adev)) 3196 - adev->ip_block_status[AMD_IP_BLOCK_TYPE_DCE].hang = true; 3197 - else 3198 - adev->ip_block_status[AMD_IP_BLOCK_TYPE_DCE].hang = false; 3199 - 3200 - return 0; 3195 + return dce_v10_0_is_display_hung(adev); 3201 3196 } 3202 3197 3203 3198 static int dce_v10_0_soft_reset(void *handle) 3204 3199 { 3205 3200 u32 srbm_soft_reset = 0, tmp; 3206 3201 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3207 - 3208 - if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_DCE].hang) 3209 - return 0; 3210 3202 3211 3203 if (dce_v10_0_is_display_hung(adev)) 3212 3204 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK;
+9 -8
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
··· 5144 5144 return -ETIMEDOUT; 5145 5145 } 5146 5146 5147 - static int gfx_v8_0_check_soft_reset(void *handle) 5147 + static bool gfx_v8_0_check_soft_reset(void *handle) 5148 5148 { 5149 5149 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5150 5150 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; ··· 5196 5196 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1); 5197 5197 5198 5198 if (grbm_soft_reset || srbm_soft_reset) { 5199 - adev->ip_block_status[AMD_IP_BLOCK_TYPE_GFX].hang = true; 5200 5199 adev->gfx.grbm_soft_reset = grbm_soft_reset; 5201 5200 adev->gfx.srbm_soft_reset = srbm_soft_reset; 5201 + return true; 5202 5202 } else { 5203 - adev->ip_block_status[AMD_IP_BLOCK_TYPE_GFX].hang = false; 5204 5203 adev->gfx.grbm_soft_reset = 0; 5205 5204 adev->gfx.srbm_soft_reset = 0; 5205 + return false; 5206 5206 } 5207 - 5208 - return 0; 5209 5207 } 5210 5208 5211 5209 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev, ··· 5231 5233 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5232 5234 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5233 5235 5234 - if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_GFX].hang) 5236 + if ((!adev->gfx.grbm_soft_reset) && 5237 + (!adev->gfx.srbm_soft_reset)) 5235 5238 return 0; 5236 5239 5237 5240 grbm_soft_reset = adev->gfx.grbm_soft_reset; ··· 5270 5271 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5271 5272 u32 tmp; 5272 5273 5273 - if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_GFX].hang) 5274 + if ((!adev->gfx.grbm_soft_reset) && 5275 + (!adev->gfx.srbm_soft_reset)) 5274 5276 return 0; 5275 5277 5276 5278 grbm_soft_reset = adev->gfx.grbm_soft_reset; ··· 5341 5341 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5342 5342 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5343 5343 5344 - if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_GFX].hang) 5344 + if ((!adev->gfx.grbm_soft_reset) && 5345 + (!adev->gfx.srbm_soft_reset)) 5345 5346 return 0; 5346 5347 5347 5348 grbm_soft_reset = adev->gfx.grbm_soft_reset;
+6 -7
drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
··· 1099 1099 1100 1100 } 1101 1101 1102 - static int gmc_v8_0_check_soft_reset(void *handle) 1102 + static bool gmc_v8_0_check_soft_reset(void *handle) 1103 1103 { 1104 1104 u32 srbm_soft_reset = 0; 1105 1105 struct amdgpu_device *adev = (struct amdgpu_device *)handle; ··· 1116 1116 SRBM_SOFT_RESET, SOFT_RESET_MC, 1); 1117 1117 } 1118 1118 if (srbm_soft_reset) { 1119 - adev->ip_block_status[AMD_IP_BLOCK_TYPE_GMC].hang = true; 1120 1119 adev->mc.srbm_soft_reset = srbm_soft_reset; 1120 + return true; 1121 1121 } else { 1122 - adev->ip_block_status[AMD_IP_BLOCK_TYPE_GMC].hang = false; 1123 1122 adev->mc.srbm_soft_reset = 0; 1123 + return false; 1124 1124 } 1125 - return 0; 1126 1125 } 1127 1126 1128 1127 static int gmc_v8_0_pre_soft_reset(void *handle) 1129 1128 { 1130 1129 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1131 1130 1132 - if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_GMC].hang) 1131 + if (!adev->mc.srbm_soft_reset) 1133 1132 return 0; 1134 1133 1135 1134 gmc_v8_0_mc_stop(adev, &adev->mc.save); ··· 1144 1145 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1145 1146 u32 srbm_soft_reset; 1146 1147 1147 - if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_GMC].hang) 1148 + if (!adev->mc.srbm_soft_reset) 1148 1149 return 0; 1149 1150 srbm_soft_reset = adev->mc.srbm_soft_reset; 1150 1151 ··· 1174 1175 { 1175 1176 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1176 1177 1177 - if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_GMC].hang) 1178 + if (!adev->mc.srbm_soft_reset) 1178 1179 return 0; 1179 1180 1180 1181 gmc_v8_0_mc_resume(adev, &adev->mc.save);
+6 -8
drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
··· 1268 1268 return -ETIMEDOUT; 1269 1269 } 1270 1270 1271 - static int sdma_v3_0_check_soft_reset(void *handle) 1271 + static bool sdma_v3_0_check_soft_reset(void *handle) 1272 1272 { 1273 1273 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1274 1274 u32 srbm_soft_reset = 0; ··· 1281 1281 } 1282 1282 1283 1283 if (srbm_soft_reset) { 1284 - adev->ip_block_status[AMD_IP_BLOCK_TYPE_SDMA].hang = true; 1285 1284 adev->sdma.srbm_soft_reset = srbm_soft_reset; 1285 + return true; 1286 1286 } else { 1287 - adev->ip_block_status[AMD_IP_BLOCK_TYPE_SDMA].hang = false; 1288 1287 adev->sdma.srbm_soft_reset = 0; 1288 + return false; 1289 1289 } 1290 - 1291 - return 0; 1292 1290 } 1293 1291 1294 1292 static int sdma_v3_0_pre_soft_reset(void *handle) ··· 1294 1296 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1295 1297 u32 srbm_soft_reset = 0; 1296 1298 1297 - if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_SDMA].hang) 1299 + if (!adev->sdma.srbm_soft_reset) 1298 1300 return 0; 1299 1301 1300 1302 srbm_soft_reset = adev->sdma.srbm_soft_reset; ··· 1313 1315 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1314 1316 u32 srbm_soft_reset = 0; 1315 1317 1316 - if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_SDMA].hang) 1318 + if (!adev->sdma.srbm_soft_reset) 1317 1319 return 0; 1318 1320 1319 1321 srbm_soft_reset = adev->sdma.srbm_soft_reset; ··· 1333 1335 u32 srbm_soft_reset = 0; 1334 1336 u32 tmp; 1335 1337 1336 - if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_SDMA].hang) 1338 + if (!adev->sdma.srbm_soft_reset) 1337 1339 return 0; 1338 1340 1339 1341 srbm_soft_reset = adev->sdma.srbm_soft_reset;
+6 -8
drivers/gpu/drm/amd/amdgpu/tonga_ih.c
··· 373 373 return -ETIMEDOUT; 374 374 } 375 375 376 - static int tonga_ih_check_soft_reset(void *handle) 376 + static bool tonga_ih_check_soft_reset(void *handle) 377 377 { 378 378 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 379 379 u32 srbm_soft_reset = 0; ··· 384 384 SOFT_RESET_IH, 1); 385 385 386 386 if (srbm_soft_reset) { 387 - adev->ip_block_status[AMD_IP_BLOCK_TYPE_IH].hang = true; 388 387 adev->irq.srbm_soft_reset = srbm_soft_reset; 388 + return true; 389 389 } else { 390 - adev->ip_block_status[AMD_IP_BLOCK_TYPE_IH].hang = false; 391 390 adev->irq.srbm_soft_reset = 0; 391 + return false; 392 392 } 393 - 394 - return 0; 395 393 } 396 394 397 395 static int tonga_ih_pre_soft_reset(void *handle) 398 396 { 399 397 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 400 398 401 - if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_IH].hang) 399 + if (!adev->irq.srbm_soft_reset) 402 400 return 0; 403 401 404 402 return tonga_ih_hw_fini(adev); ··· 406 408 { 407 409 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 408 410 409 - if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_IH].hang) 411 + if (!adev->irq.srbm_soft_reset) 410 412 return 0; 411 413 412 414 return tonga_ih_hw_init(adev); ··· 417 419 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 418 420 u32 srbm_soft_reset; 419 421 420 - if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_IH].hang) 422 + if (!adev->irq.srbm_soft_reset) 421 423 return 0; 422 424 srbm_soft_reset = adev->irq.srbm_soft_reset; 423 425
+7 -7
drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
··· 770 770 } 771 771 772 772 #define AMDGPU_UVD_STATUS_BUSY_MASK 0xfd 773 - static int uvd_v6_0_check_soft_reset(void *handle) 773 + static bool uvd_v6_0_check_soft_reset(void *handle) 774 774 { 775 775 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 776 776 u32 srbm_soft_reset = 0; ··· 782 782 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_UVD, 1); 783 783 784 784 if (srbm_soft_reset) { 785 - adev->ip_block_status[AMD_IP_BLOCK_TYPE_UVD].hang = true; 786 785 adev->uvd.srbm_soft_reset = srbm_soft_reset; 786 + return true; 787 787 } else { 788 - adev->ip_block_status[AMD_IP_BLOCK_TYPE_UVD].hang = false; 789 788 adev->uvd.srbm_soft_reset = 0; 789 + return false; 790 790 } 791 - return 0; 792 791 } 792 + 793 793 static int uvd_v6_0_pre_soft_reset(void *handle) 794 794 { 795 795 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 796 796 797 - if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_UVD].hang) 797 + if (!adev->uvd.srbm_soft_reset) 798 798 return 0; 799 799 800 800 uvd_v6_0_stop(adev); ··· 806 806 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 807 807 u32 srbm_soft_reset; 808 808 809 - if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_UVD].hang) 809 + if (!adev->uvd.srbm_soft_reset) 810 810 return 0; 811 811 srbm_soft_reset = adev->uvd.srbm_soft_reset; 812 812 ··· 836 836 { 837 837 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 838 838 839 - if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_UVD].hang) 839 + if (!adev->uvd.srbm_soft_reset) 840 840 return 0; 841 841 842 842 mdelay(5);
+7 -8
drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
··· 561 561 #define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \ 562 562 VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK) 563 563 564 - static int vce_v3_0_check_soft_reset(void *handle) 564 + static bool vce_v3_0_check_soft_reset(void *handle) 565 565 { 566 566 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 567 567 u32 srbm_soft_reset = 0; ··· 591 591 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); 592 592 } 593 593 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0); 594 + mutex_unlock(&adev->grbm_idx_mutex); 594 595 595 596 if (srbm_soft_reset) { 596 - adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang = true; 597 597 adev->vce.srbm_soft_reset = srbm_soft_reset; 598 + return true; 598 599 } else { 599 - adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang = false; 600 600 adev->vce.srbm_soft_reset = 0; 601 + return false; 601 602 } 602 - mutex_unlock(&adev->grbm_idx_mutex); 603 - return 0; 604 603 } 605 604 606 605 static int vce_v3_0_soft_reset(void *handle) ··· 607 608 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 608 609 u32 srbm_soft_reset; 609 610 610 - if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang) 611 + if (!adev->vce.srbm_soft_reset) 611 612 return 0; 612 613 srbm_soft_reset = adev->vce.srbm_soft_reset; 613 614 ··· 637 638 { 638 639 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 639 640 640 - if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang) 641 + if (!adev->vce.srbm_soft_reset) 641 642 return 0; 642 643 643 644 mdelay(5); ··· 650 651 { 651 652 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 652 653 653 - if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang) 654 + if (!adev->vce.srbm_soft_reset) 654 655 return 0; 655 656 656 657 mdelay(5);
+1 -1
drivers/gpu/drm/amd/include/amd_shared.h
··· 165 165 /* poll for idle */ 166 166 int (*wait_for_idle)(void *handle); 167 167 /* check soft reset the IP block */ 168 - int (*check_soft_reset)(void *handle); 168 + bool (*check_soft_reset)(void *handle); 169 169 /* pre soft reset the IP block */ 170 170 int (*pre_soft_reset)(void *handle); 171 171 /* soft reset the IP block */