Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdkfd: use map_queues for hiq on gfx v10 as well

To align with gfx v9, we use the map_queues packet to load hiq MQD.

Signed-off-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Huang Rui and committed by
Alex Deucher
8eee00f6 35cd89d5

+70 -22
+61 -21
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
··· 107 107 lock_srbm(kgd, mec, pipe, queue_id, 0); 108 108 } 109 109 110 - static uint32_t get_queue_mask(struct amdgpu_device *adev, 110 + static uint64_t get_queue_mask(struct amdgpu_device *adev, 111 111 uint32_t pipe_id, uint32_t queue_id) 112 112 { 113 - unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe + 114 - queue_id) & 31; 113 + unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe + 114 + queue_id; 115 115 116 - return ((uint32_t)1) << bit; 116 + return 1ull << bit; 117 117 } 118 118 119 119 static void release_queue(struct kgd_dev *kgd) ··· 268 268 pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id); 269 269 acquire_queue(kgd, pipe_id, queue_id); 270 270 271 - /* HIQ is set during driver init period with vmid set to 0*/ 272 - if (m->cp_hqd_vmid == 0) { 273 - uint32_t value, mec, pipe; 274 - 275 - mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 276 - pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 277 - 278 - pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", 279 - mec, pipe, queue_id); 280 - value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS)); 281 - value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, 282 - ((mec << 5) | (pipe << 3) | queue_id | 0x80)); 283 - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); 284 - } 285 - 286 271 /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ 287 272 mqd_hqd = &m->cp_mqd_base_addr_lo; 288 273 hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); ··· 317 332 lower_32_bits((uint64_t)wptr)); 318 333 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), 319 334 upper_32_bits((uint64_t)wptr)); 320 - pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__, get_queue_mask(adev, pipe_id, queue_id)); 335 + pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__, 336 + (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); 321 337 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), 322 - get_queue_mask(adev, pipe_id, queue_id)); 338 + (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); 323 339 } 324 340 325 341 /* Start the EOP fetcher */ ··· 334 348 release_queue(kgd); 335 349 336 350 return 0; 351 + } 352 + 353 + static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, 354 + uint32_t pipe_id, uint32_t queue_id, 355 + uint32_t doorbell_off) 356 + { 357 + struct amdgpu_device *adev = get_amdgpu_device(kgd); 358 + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 359 + struct v10_compute_mqd *m; 360 + uint32_t mec, pipe; 361 + int r; 362 + 363 + m = get_mqd(mqd); 364 + 365 + acquire_queue(kgd, pipe_id, queue_id); 366 + 367 + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 368 + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 369 + 370 + pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", 371 + mec, pipe, queue_id); 372 + 373 + spin_lock(&adev->gfx.kiq.ring_lock); 374 + r = amdgpu_ring_alloc(kiq_ring, 7); 375 + if (r) { 376 + pr_err("Failed to alloc KIQ (%d).\n", r); 377 + goto out_unlock; 378 + } 379 + 380 + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 381 + amdgpu_ring_write(kiq_ring, 382 + PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 383 + PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */ 384 + PACKET3_MAP_QUEUES_QUEUE(queue_id) | 385 + PACKET3_MAP_QUEUES_PIPE(pipe) | 386 + PACKET3_MAP_QUEUES_ME((mec - 1)) | 387 + PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 388 + PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 389 + PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */ 390 + PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 391 + amdgpu_ring_write(kiq_ring, 392 + PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off)); 393 + amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo); 394 + amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi); 395 + amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo); 396 + amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi); 397 + amdgpu_ring_commit(kiq_ring); 398 + 399 + out_unlock: 400 + spin_unlock(&adev->gfx.kiq.ring_lock); 401 + release_queue(kgd); 402 + 403 + return r; 337 404 } 338 405 339 406 static int kgd_hqd_dump(struct kgd_dev *kgd, ··· 791 752 .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, 792 753 .init_interrupts = kgd_init_interrupts, 793 754 .hqd_load = kgd_hqd_load, 755 + .hiq_mqd_load = kgd_hiq_mqd_load, 794 756 .hqd_sdma_load = kgd_hqd_sdma_load, 795 757 .hqd_dump = kgd_hqd_dump, 796 758 .hqd_sdma_dump = kgd_hqd_sdma_dump,
+9 -1
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
··· 153 153 return r; 154 154 } 155 155 156 + static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd, 157 + uint32_t pipe_id, uint32_t queue_id, 158 + struct queue_properties *p, struct mm_struct *mms) 159 + { 160 + return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->kgd, mqd, pipe_id, 161 + queue_id, p->doorbell_off); 162 + } 163 + 156 164 static void update_mqd(struct mqd_manager *mm, void *mqd, 157 165 struct queue_properties *q) 158 166 { ··· 417 409 mqd->allocate_mqd = allocate_hiq_mqd; 418 410 mqd->init_mqd = init_mqd_hiq; 419 411 mqd->free_mqd = free_mqd_hiq_sdma; 420 - mqd->load_mqd = load_mqd; 412 + mqd->load_mqd = hiq_load_mqd_kiq; 421 413 mqd->update_mqd = update_mqd; 422 414 mqd->destroy_mqd = destroy_mqd; 423 415 mqd->is_occupied = is_occupied;