Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: invalidate mmhub semaphore workaround in gmc9/gmc10

It may lose gpuvm invalidate acknowldege state across power-gating off
cycle. To avoid this issue in gmc9/gmc10 invalidation, add semaphore acquire
before invalidation and semaphore release after invalidation.

After adding semaphore acquire before invalidation, the semaphore
register become read-only if another process try to acquire semaphore.
Then it will not be able to release this semaphore. Then it may cause
deadlock problem. If this deadlock problem happens, it needs a semaphore
firmware fix.

Signed-off-by: changzhu <Changfeng.Zhu@amd.com>
Acked-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org

authored by

changzhu and committed by
Alex Deucher
f920d1bb 6c2c8972

+116 -2
+57
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
··· 235 235 const unsigned eng = 17; 236 236 unsigned int i; 237 237 238 + spin_lock(&adev->gmc.invalidate_lock); 239 + /* 240 + * It may lose gpuvm invalidate acknowldege state across power-gating 241 + * off cycle, add semaphore acquire before invalidation and semaphore 242 + * release after invalidation to avoid entering power gated state 243 + * to WA the Issue 244 + */ 245 + 246 + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ 247 + if (vmhub == AMDGPU_MMHUB_0 || 248 + vmhub == AMDGPU_MMHUB_1) { 249 + for (i = 0; i < adev->usec_timeout; i++) { 250 + /* a read return value of 1 means semaphore acuqire */ 251 + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng); 252 + if (tmp & 0x1) 253 + break; 254 + udelay(1); 255 + } 256 + 257 + if (i >= adev->usec_timeout) 258 + DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); 259 + } 260 + 238 261 WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); 239 262 240 263 /* ··· 276 253 277 254 udelay(1); 278 255 } 256 + 257 + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ 258 + if (vmhub == AMDGPU_MMHUB_0 || 259 + vmhub == AMDGPU_MMHUB_1) 260 + /* 261 + * add semaphore release after invalidation, 262 + * write with 0 means semaphore release 263 + */ 264 + WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0); 265 + 266 + spin_unlock(&adev->gmc.invalidate_lock); 279 267 280 268 if (i < adev->usec_timeout) 281 269 return; ··· 372 338 uint32_t req = gmc_v10_0_get_invalidate_req(vmid, 0); 373 339 unsigned eng = ring->vm_inv_eng; 374 340 341 + /* 342 + * It may lose gpuvm invalidate acknowldege state across power-gating 343 + * off cycle, add semaphore acquire before invalidation and semaphore 344 + * release after invalidation to avoid entering power gated state 345 + * to WA the Issue 346 + */ 347 + 348 + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ 349 + if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || 350 + ring->funcs->vmhub == AMDGPU_MMHUB_1) 351 + /* a read return value of 1 means semaphore acuqire */ 352 + amdgpu_ring_emit_reg_wait(ring, 353 + hub->vm_inv_eng0_sem + eng, 0x1, 0x1); 354 + 375 355 amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), 376 356 lower_32_bits(pd_addr)); 377 357 ··· 395 347 amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng, 396 348 hub->vm_inv_eng0_ack + eng, 397 349 req, 1 << vmid); 350 + 351 + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ 352 + if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || 353 + ring->funcs->vmhub == AMDGPU_MMHUB_1) 354 + /* 355 + * add semaphore release after invalidation, 356 + * write with 0 means semaphore release 357 + */ 358 + amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0); 398 359 399 360 return pd_addr; 400 361 }
+57
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
··· 459 459 } 460 460 461 461 spin_lock(&adev->gmc.invalidate_lock); 462 + 463 + /* 464 + * It may lose gpuvm invalidate acknowldege state across power-gating 465 + * off cycle, add semaphore acquire before invalidation and semaphore 466 + * release after invalidation to avoid entering power gated state 467 + * to WA the Issue 468 + */ 469 + 470 + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ 471 + if (vmhub == AMDGPU_MMHUB_0 || 472 + vmhub == AMDGPU_MMHUB_1) { 473 + for (j = 0; j < adev->usec_timeout; j++) { 474 + /* a read return value of 1 means semaphore acuqire */ 475 + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng); 476 + if (tmp & 0x1) 477 + break; 478 + udelay(1); 479 + } 480 + 481 + if (j >= adev->usec_timeout) 482 + DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); 483 + } 484 + 462 485 WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); 463 486 464 487 /* ··· 497 474 break; 498 475 udelay(1); 499 476 } 477 + 478 + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ 479 + if (vmhub == AMDGPU_MMHUB_0 || 480 + vmhub == AMDGPU_MMHUB_1) 481 + /* 482 + * add semaphore release after invalidation, 483 + * write with 0 means semaphore release 484 + */ 485 + WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0); 486 + 500 487 spin_unlock(&adev->gmc.invalidate_lock); 488 + 501 489 if (j < adev->usec_timeout) 502 490 return; 503 491 ··· 523 489 uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0); 524 490 unsigned eng = ring->vm_inv_eng; 525 491 492 + /* 493 + * It may lose gpuvm invalidate acknowldege state across power-gating 494 + * off cycle, add semaphore acquire before invalidation and semaphore 495 + * release after invalidation to avoid entering power gated state 496 + * to WA the Issue 497 + */ 498 + 499 + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ 500 + if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || 501 + ring->funcs->vmhub == AMDGPU_MMHUB_1) 502 + /* a read return value of 1 means semaphore acuqire */ 503 + amdgpu_ring_emit_reg_wait(ring, 504 + hub->vm_inv_eng0_sem + eng, 0x1, 0x1); 505 + 526 506 amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), 527 507 lower_32_bits(pd_addr)); 528 508 ··· 546 498 amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng, 547 499 hub->vm_inv_eng0_ack + eng, 548 500 req, 1 << vmid); 501 + 502 + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ 503 + if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || 504 + ring->funcs->vmhub == AMDGPU_MMHUB_1) 505 + /* 506 + * add semaphore release after invalidation, 507 + * write with 0 means semaphore release 508 + */ 509 + amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0); 549 510 550 511 return pd_addr; 551 512 }
+2 -2
drivers/gpu/drm/amd/amdgpu/soc15.h
··· 28 28 #include "nbio_v7_0.h" 29 29 #include "nbio_v7_4.h" 30 30 31 - #define SOC15_FLUSH_GPU_TLB_NUM_WREG 4 32 - #define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT 1 31 + #define SOC15_FLUSH_GPU_TLB_NUM_WREG 6 32 + #define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT 3 33 33 34 34 extern const struct amd_ip_funcs soc15_common_ip_funcs; 35 35