Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: Add init level for post reset reinit

When device needs to be reset before initialization, it's not required
for all IPs to be initialized before a reset. In such cases, it needs to
identify whether the IP/feature is initialized for the first time or
whether it's reinitialized after a reset.

Add RESET_RECOVERY init level to identify post reset reinitialization
phase. This only provides a device level identification, IP/features may
choose to track their state independently also.

Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Acked-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Lijo Lazar and committed by
Alex Deucher
a86e0c0e 6719ab82

+38 -3
+4
drivers/gpu/drm/amd/amdgpu/aldebaran.c
··· 330 330 } 331 331 332 332 list_for_each_entry(tmp_adev, reset_device_list, reset_list) { 333 + amdgpu_set_init_level(tmp_adev, 334 + AMDGPU_INIT_LEVEL_RESET_RECOVERY); 333 335 dev_info(tmp_adev->dev, 334 336 "GPU reset succeeded, trying to resume\n"); 335 337 r = aldebaran_mode2_restore_ip(tmp_adev); ··· 377 375 tmp_adev); 378 376 379 377 if (!r) { 378 + amdgpu_set_init_level(tmp_adev, 379 + AMDGPU_INIT_LEVEL_DEFAULT); 380 380 amdgpu_irq_gpu_reset_resume_helper(tmp_adev); 381 381 382 382 r = amdgpu_ib_ring_tests(tmp_adev);
+1
drivers/gpu/drm/amd/amdgpu/amdgpu.h
··· 839 839 enum amdgpu_init_lvl_id { 840 840 AMDGPU_INIT_LEVEL_DEFAULT, 841 841 AMDGPU_INIT_LEVEL_MINIMAL_XGMI, 842 + AMDGPU_INIT_LEVEL_RESET_RECOVERY, 842 843 }; 843 844 844 845 struct amdgpu_init_level {
+22 -3
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
··· 156 156 .hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL, 157 157 }; 158 158 159 + struct amdgpu_init_level amdgpu_init_recovery = { 160 + .level = AMDGPU_INIT_LEVEL_RESET_RECOVERY, 161 + .hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL, 162 + }; 163 + 159 164 /* 160 165 * Minimal blocks needed to be initialized before a XGMI hive can be reset. This 161 166 * is used for cases like reset on initialization where the entire hive needs to ··· 186 181 switch (lvl) { 187 182 case AMDGPU_INIT_LEVEL_MINIMAL_XGMI: 188 183 adev->init_lvl = &amdgpu_init_minimal_xgmi; 184 + break; 185 + case AMDGPU_INIT_LEVEL_RESET_RECOVERY: 186 + adev->init_lvl = &amdgpu_init_recovery; 189 187 break; 190 188 case AMDGPU_INIT_LEVEL_DEFAULT: 191 189 fallthrough; ··· 5427 5419 struct list_head *device_list_handle; 5428 5420 bool full_reset, vram_lost = false; 5429 5421 struct amdgpu_device *tmp_adev; 5430 - int r; 5422 + int r, init_level; 5431 5423 5432 5424 device_list_handle = reset_context->reset_device_list; 5433 5425 ··· 5436 5428 5437 5429 full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); 5438 5430 5431 + /** 5432 + * If it's reset on init, it's default init level, otherwise keep level 5433 + * as recovery level. 5434 + */ 5435 + if (reset_context->method == AMD_RESET_METHOD_ON_INIT) 5436 + init_level = AMDGPU_INIT_LEVEL_DEFAULT; 5437 + else 5438 + init_level = AMDGPU_INIT_LEVEL_RESET_RECOVERY; 5439 + 5439 5440 r = 0; 5440 5441 list_for_each_entry(tmp_adev, device_list_handle, reset_list) { 5441 - /* After reset, it's default init level */ 5442 - amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_DEFAULT); 5442 + amdgpu_set_init_level(tmp_adev, init_level); 5443 5443 if (full_reset) { 5444 5444 /* post card */ 5445 5445 amdgpu_ras_set_fed(tmp_adev, false); ··· 5534 5518 5535 5519 out: 5536 5520 if (!r) { 5521 + /* IP init is complete now, set level as default */ 5522 + amdgpu_set_init_level(tmp_adev, 5523 + AMDGPU_INIT_LEVEL_DEFAULT); 5537 5524 amdgpu_irq_gpu_reset_resume_helper(tmp_adev); 5538 5525 r = amdgpu_ib_ring_tests(tmp_adev); 5539 5526 if (r) {
+5
drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
··· 342 342 strscpy(buf, "unknown", len); 343 343 } 344 344 } 345 + 346 + bool amdgpu_reset_in_recovery(struct amdgpu_device *adev) 347 + { 348 + return (adev->init_lvl->level == AMDGPU_INIT_LEVEL_RESET_RECOVERY); 349 + }
+2
drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
··· 158 158 int amdgpu_reset_do_xgmi_reset_on_init( 159 159 struct amdgpu_reset_context *reset_context); 160 160 161 + bool amdgpu_reset_in_recovery(struct amdgpu_device *adev); 162 + 161 163 #endif
+2
drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c
··· 220 220 int r; 221 221 struct amdgpu_device *tmp_adev = (struct amdgpu_device *)reset_ctl->handle; 222 222 223 + amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_RESET_RECOVERY); 223 224 dev_info(tmp_adev->dev, 224 225 "GPU reset succeeded, trying to resume\n"); 225 226 r = sienna_cichlid_mode2_restore_ip(tmp_adev); ··· 238 237 239 238 amdgpu_irq_gpu_reset_resume_helper(tmp_adev); 240 239 240 + amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_DEFAULT); 241 241 r = amdgpu_ib_ring_tests(tmp_adev); 242 242 if (r) { 243 243 dev_err(tmp_adev->dev,
+2
drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c
··· 221 221 int r; 222 222 struct amdgpu_device *tmp_adev = (struct amdgpu_device *)reset_ctl->handle; 223 223 224 + amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_RESET_RECOVERY); 224 225 dev_info(tmp_adev->dev, 225 226 "GPU reset succeeded, trying to resume\n"); 226 227 r = smu_v13_0_10_mode2_restore_ip(tmp_adev); ··· 235 234 236 235 amdgpu_irq_gpu_reset_resume_helper(tmp_adev); 237 236 237 + amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_DEFAULT); 238 238 r = amdgpu_ib_ring_tests(tmp_adev); 239 239 if (r) { 240 240 dev_err(tmp_adev->dev,