Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdkfd: use kiq to load the mqd of hiq queue for gfx v9 (v6)

There is an issue that CP will check the HIQ queue to be configured and mapped
with KIQ ring, otherwise, it will be unable to read back the secure buffer while
the gfxoff is enabled even with trusted IP blocks.

v1 -> v2:
- Fix to remove surplus set_resources packets.
- Fill the whole configuration in MQD.
- Change the author as Aaron because he addressed the key point of this issue.
- Add kiq ring lock.

v2 -> v3:
- Free the lock while in error return case.
- Remove the programming only needed by the queue is unmapped.

v3 -> v4:
- Remove doorbell programming because it's used for restarting queue.
- Remove CP scheduler programming because map_queue packet will handle this.

v4 -> v5:
- Remove cp_hqd_active because mec ucode will enable it while use map_queues.
- Revise goto out_unlock.
- Correct the right doorbell offset for HIQ that kfd driver assigned in the
packet.

v5 -> v6:
- Merge Arcturus fix into this patch because it will get oops in Arcturus
platform.

Reported-by: Lisa Saturday <Lisa.Saturday@amd.com>
Signed-off-by: Aaron Liu <aaron.liu@amd.com>
Signed-off-by: Huang Rui <ray.huang@amd.com>
Reviewed-and-Tested-by: Aaron Liu <aaron.liu@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Aaron Liu and committed by
Alex Deucher
35cd89d5 d175e9ac

+76 -21
+1
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
··· 305 305 .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, 306 306 .init_interrupts = kgd_gfx_v9_init_interrupts, 307 307 .hqd_load = kgd_gfx_v9_hqd_load, 308 + .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load, 308 309 .hqd_sdma_load = kgd_hqd_sdma_load, 309 310 .hqd_dump = kgd_gfx_v9_hqd_dump, 310 311 .hqd_sdma_dump = kgd_hqd_sdma_dump,
+59 -20
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
··· 103 103 lock_srbm(kgd, mec, pipe, queue_id, 0); 104 104 } 105 105 106 - static uint32_t get_queue_mask(struct amdgpu_device *adev, 106 + static uint64_t get_queue_mask(struct amdgpu_device *adev, 107 107 uint32_t pipe_id, uint32_t queue_id) 108 108 { 109 - unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe + 110 - queue_id) & 31; 109 + unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe + 110 + queue_id; 111 111 112 - return ((uint32_t)1) << bit; 112 + return 1ull << bit; 113 113 } 114 114 115 115 static void release_queue(struct kgd_dev *kgd) ··· 258 258 259 259 acquire_queue(kgd, pipe_id, queue_id); 260 260 261 - /* HIQ is set during driver init period with vmid set to 0*/ 262 - if (m->cp_hqd_vmid == 0) { 263 - uint32_t value, mec, pipe; 264 - 265 - mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 266 - pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 267 - 268 - pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", 269 - mec, pipe, queue_id); 270 - value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS)); 271 - value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, 272 - ((mec << 5) | (pipe << 3) | queue_id | 0x80)); 273 - WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); 274 - } 275 - 276 261 /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ 277 262 mqd_hqd = &m->cp_mqd_base_addr_lo; 278 263 hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); ··· 308 323 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), 309 324 upper_32_bits((uintptr_t)wptr)); 310 325 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), 311 - get_queue_mask(adev, pipe_id, queue_id)); 326 + (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); 312 327 } 313 328 314 329 /* Start the EOP fetcher */ ··· 322 337 release_queue(kgd); 323 338 324 339 return 0; 340 + } 341 + 342 + int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, 343 + uint32_t pipe_id, uint32_t queue_id, 344 + uint32_t doorbell_off) 345 + { 346 + struct amdgpu_device *adev = get_amdgpu_device(kgd); 347 + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 348 + struct v9_mqd *m; 349 + uint32_t mec, pipe; 350 + int r; 351 + 352 + m = get_mqd(mqd); 353 + 354 + acquire_queue(kgd, pipe_id, queue_id); 355 + 356 + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 357 + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 358 + 359 + pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", 360 + mec, pipe, queue_id); 361 + 362 + spin_lock(&adev->gfx.kiq.ring_lock); 363 + r = amdgpu_ring_alloc(kiq_ring, 7); 364 + if (r) { 365 + pr_err("Failed to alloc KIQ (%d).\n", r); 366 + goto out_unlock; 367 + } 368 + 369 + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 370 + amdgpu_ring_write(kiq_ring, 371 + PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 372 + PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */ 373 + PACKET3_MAP_QUEUES_QUEUE(queue_id) | 374 + PACKET3_MAP_QUEUES_PIPE(pipe) | 375 + PACKET3_MAP_QUEUES_ME((mec - 1)) | 376 + PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 377 + PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 378 + PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */ 379 + PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 380 + amdgpu_ring_write(kiq_ring, 381 + PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off)); 382 + amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo); 383 + amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi); 384 + amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo); 385 + amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi); 386 + amdgpu_ring_commit(kiq_ring); 387 + 388 + out_unlock: 389 + spin_unlock(&adev->gfx.kiq.ring_lock); 390 + release_queue(kgd); 391 + 392 + return r; 325 393 } 326 394 327 395 int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, ··· 722 684 .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, 723 685 .init_interrupts = kgd_gfx_v9_init_interrupts, 724 686 .hqd_load = kgd_gfx_v9_hqd_load, 687 + .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load, 725 688 .hqd_sdma_load = kgd_hqd_sdma_load, 726 689 .hqd_dump = kgd_gfx_v9_hqd_dump, 727 690 .hqd_sdma_dump = kgd_hqd_sdma_dump,
+3
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
··· 33 33 uint32_t queue_id, uint32_t __user *wptr, 34 34 uint32_t wptr_shift, uint32_t wptr_mask, 35 35 struct mm_struct *mm); 36 + int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, 37 + uint32_t pipe_id, uint32_t queue_id, 38 + uint32_t doorbell_off); 36 39 int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, 37 40 uint32_t pipe_id, uint32_t queue_id, 38 41 uint32_t (**dump)[2], uint32_t *n_regs);
+9 -1
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
··· 191 191 wptr_shift, 0, mms); 192 192 } 193 193 194 + static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd, 195 + uint32_t pipe_id, uint32_t queue_id, 196 + struct queue_properties *p, struct mm_struct *mms) 197 + { 198 + return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->kgd, mqd, pipe_id, 199 + queue_id, p->doorbell_off); 200 + } 201 + 194 202 static void update_mqd(struct mqd_manager *mm, void *mqd, 195 203 struct queue_properties *q) 196 204 { ··· 457 449 mqd->allocate_mqd = allocate_hiq_mqd; 458 450 mqd->init_mqd = init_mqd_hiq; 459 451 mqd->free_mqd = free_mqd_hiq_sdma; 460 - mqd->load_mqd = load_mqd; 452 + mqd->load_mqd = hiq_load_mqd_kiq; 461 453 mqd->update_mqd = update_mqd; 462 454 mqd->destroy_mqd = destroy_mqd; 463 455 mqd->is_occupied = is_occupied;
+4
drivers/gpu/drm/amd/include/kgd_kfd_interface.h
··· 256 256 uint32_t wptr_shift, uint32_t wptr_mask, 257 257 struct mm_struct *mm); 258 258 259 + int (*hiq_mqd_load)(struct kgd_dev *kgd, void *mqd, 260 + uint32_t pipe_id, uint32_t queue_id, 261 + uint32_t doorbell_off); 262 + 259 263 int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd, 260 264 uint32_t __user *wptr, struct mm_struct *mm); 261 265