Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amd: Update MEC HQD loading code for KFD

Various bug fixes and improvements that accumulated over the last two
years.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>

authored by

Felix Kuehling and committed by
Oded Gabbay
70539bd7 c2cade3d

+322 -69
+16
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
··· 26 26 #define AMDGPU_AMDKFD_H_INCLUDED 27 27 28 28 #include <linux/types.h> 29 + #include <linux/mmu_context.h> 29 30 #include <kgd_kfd_interface.h> 30 31 31 32 struct amdgpu_device; ··· 60 59 uint64_t get_gpu_clock_counter(struct kgd_dev *kgd); 61 60 62 61 uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd); 62 + 63 + #define read_user_wptr(mmptr, wptr, dst) \ 64 + ({ \ 65 + bool valid = false; \ 66 + if ((mmptr) && (wptr)) { \ 67 + if ((mmptr) == current->mm) { \ 68 + valid = !get_user((dst), (wptr)); \ 69 + } else if (current->mm == NULL) { \ 70 + use_mm(mmptr); \ 71 + valid = !get_user((dst), (wptr)); \ 72 + unuse_mm(mmptr); \ 73 + } \ 74 + } \ 75 + valid; \ 76 + }) 63 77 64 78 #endif /* AMDGPU_AMDKFD_H_INCLUDED */
+113 -17
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
··· 39 39 #include "gmc/gmc_7_1_sh_mask.h" 40 40 #include "cik_structs.h" 41 41 42 + enum hqd_dequeue_request_type { 43 + NO_ACTION = 0, 44 + DRAIN_PIPE, 45 + RESET_WAVES 46 + }; 47 + 42 48 enum { 43 49 MAX_TRAPID = 8, /* 3 bits in the bitfield. */ 44 50 MAX_WATCH_ADDRESSES = 4 ··· 102 96 uint32_t hpd_size, uint64_t hpd_gpu_addr); 103 97 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); 104 98 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, 105 - uint32_t queue_id, uint32_t __user *wptr); 99 + uint32_t queue_id, uint32_t __user *wptr, 100 + uint32_t wptr_shift, uint32_t wptr_mask, 101 + struct mm_struct *mm); 106 102 static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); 107 103 static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, 108 104 uint32_t pipe_id, uint32_t queue_id); 109 105 110 - static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, 106 + static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, 107 + enum kfd_preempt_type reset_type, 111 108 unsigned int utimeout, uint32_t pipe_id, 112 109 uint32_t queue_id); 113 110 static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); ··· 299 290 } 300 291 301 292 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, 302 - uint32_t queue_id, uint32_t __user *wptr) 293 + uint32_t queue_id, uint32_t __user *wptr, 294 + uint32_t wptr_shift, uint32_t wptr_mask, 295 + struct mm_struct *mm) 303 296 { 304 297 struct amdgpu_device *adev = get_amdgpu_device(kgd); 305 - uint32_t wptr_shadow, is_wptr_shadow_valid; 306 298 struct cik_mqd *m; 299 + uint32_t *mqd_hqd; 300 + uint32_t reg, wptr_val, data; 307 301 308 302 m = get_mqd(mqd); 309 303 310 - is_wptr_shadow_valid = !get_user(wptr_shadow, wptr); 311 - if (is_wptr_shadow_valid) 312 - m->cp_hqd_pq_wptr = wptr_shadow; 313 - 314 304 acquire_queue(kgd, pipe_id, queue_id); 315 - gfx_v7_0_mqd_commit(adev, m); 305 + 306 + /* HQD registers extend from CP_MQD_BASE_ADDR to CP_MQD_CONTROL. */ 307 + mqd_hqd = &m->cp_mqd_base_addr_lo; 308 + 309 + for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++) 310 + WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]); 311 + 312 + /* Copy userspace write pointer value to register. 313 + * Activate doorbell logic to monitor subsequent changes. 314 + */ 315 + data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, 316 + CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); 317 + WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data); 318 + 319 + if (read_user_wptr(mm, wptr, wptr_val)) 320 + WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask); 321 + 322 + data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); 323 + WREG32(mmCP_HQD_ACTIVE, data); 324 + 316 325 release_queue(kgd); 317 326 318 327 return 0; ··· 409 382 return false; 410 383 } 411 384 412 - static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, 385 + static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, 386 + enum kfd_preempt_type reset_type, 413 387 unsigned int utimeout, uint32_t pipe_id, 414 388 uint32_t queue_id) 415 389 { 416 390 struct amdgpu_device *adev = get_amdgpu_device(kgd); 417 391 uint32_t temp; 418 - int timeout = utimeout; 392 + enum hqd_dequeue_request_type type; 393 + unsigned long flags, end_jiffies; 394 + int retry; 419 395 420 396 acquire_queue(kgd, pipe_id, queue_id); 421 397 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0); 422 398 423 - WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type); 399 + switch (reset_type) { 400 + case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: 401 + type = DRAIN_PIPE; 402 + break; 403 + case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: 404 + type = RESET_WAVES; 405 + break; 406 + default: 407 + type = DRAIN_PIPE; 408 + break; 409 + } 424 410 411 + /* Workaround: If IQ timer is active and the wait time is close to or 412 + * equal to 0, dequeueing is not safe. Wait until either the wait time 413 + * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is 414 + * cleared before continuing. Also, ensure wait times are set to at 415 + * least 0x3. 416 + */ 417 + local_irq_save(flags); 418 + preempt_disable(); 419 + retry = 5000; /* wait for 500 usecs at maximum */ 420 + while (true) { 421 + temp = RREG32(mmCP_HQD_IQ_TIMER); 422 + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) { 423 + pr_debug("HW is processing IQ\n"); 424 + goto loop; 425 + } 426 + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) { 427 + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE) 428 + == 3) /* SEM-rearm is safe */ 429 + break; 430 + /* Wait time 3 is safe for CP, but our MMIO read/write 431 + * time is close to 1 microsecond, so check for 10 to 432 + * leave more buffer room 433 + */ 434 + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME) 435 + >= 10) 436 + break; 437 + pr_debug("IQ timer is active\n"); 438 + } else 439 + break; 440 + loop: 441 + if (!retry) { 442 + pr_err("CP HQD IQ timer status time out\n"); 443 + break; 444 + } 445 + ndelay(100); 446 + --retry; 447 + } 448 + retry = 1000; 449 + while (true) { 450 + temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST); 451 + if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK)) 452 + break; 453 + pr_debug("Dequeue request is pending\n"); 454 + 455 + if (!retry) { 456 + pr_err("CP HQD dequeue request time out\n"); 457 + break; 458 + } 459 + ndelay(100); 460 + --retry; 461 + } 462 + local_irq_restore(flags); 463 + preempt_enable(); 464 + 465 + WREG32(mmCP_HQD_DEQUEUE_REQUEST, type); 466 + 467 + end_jiffies = (utimeout * HZ / 1000) + jiffies; 425 468 while (true) { 426 469 temp = RREG32(mmCP_HQD_ACTIVE); 427 - if (temp & CP_HQD_ACTIVE__ACTIVE_MASK) 470 + if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) 428 471 break; 429 - if (timeout <= 0) { 430 - pr_err("kfd: cp queue preemption time out.\n"); 472 + if (time_after(jiffies, end_jiffies)) { 473 + pr_err("cp queue preemption time out\n"); 431 474 release_queue(kgd); 432 475 return -ETIME; 433 476 } 434 - msleep(20); 435 - timeout -= 20; 477 + usleep_range(500, 1000); 436 478 } 437 479 438 480 release_queue(kgd);
+147 -18
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
··· 39 39 #include "vi_structs.h" 40 40 #include "vid.h" 41 41 42 + enum hqd_dequeue_request_type { 43 + NO_ACTION = 0, 44 + DRAIN_PIPE, 45 + RESET_WAVES 46 + }; 47 + 42 48 struct cik_sdma_rlc_registers; 43 49 44 50 /* ··· 61 55 uint32_t hpd_size, uint64_t hpd_gpu_addr); 62 56 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); 63 57 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, 64 - uint32_t queue_id, uint32_t __user *wptr); 58 + uint32_t queue_id, uint32_t __user *wptr, 59 + uint32_t wptr_shift, uint32_t wptr_mask, 60 + struct mm_struct *mm); 65 61 static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); 66 62 static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, 67 63 uint32_t pipe_id, uint32_t queue_id); 68 64 static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); 69 - static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, 65 + static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, 66 + enum kfd_preempt_type reset_type, 70 67 unsigned int utimeout, uint32_t pipe_id, 71 68 uint32_t queue_id); 72 69 static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, ··· 253 244 } 254 245 255 246 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, 256 - uint32_t queue_id, uint32_t __user *wptr) 247 + uint32_t queue_id, uint32_t __user *wptr, 248 + uint32_t wptr_shift, uint32_t wptr_mask, 249 + struct mm_struct *mm) 257 250 { 258 - struct vi_mqd *m; 259 - uint32_t shadow_wptr, valid_wptr; 260 251 struct amdgpu_device *adev = get_amdgpu_device(kgd); 252 + struct vi_mqd *m; 253 + uint32_t *mqd_hqd; 254 + uint32_t reg, wptr_val, data; 261 255 262 256 m = get_mqd(mqd); 263 257 264 - valid_wptr = copy_from_user(&shadow_wptr, wptr, sizeof(shadow_wptr)); 265 - if (valid_wptr == 0) 266 - m->cp_hqd_pq_wptr = shadow_wptr; 267 - 268 258 acquire_queue(kgd, pipe_id, queue_id); 269 - gfx_v8_0_mqd_commit(adev, mqd); 259 + 260 + /* HIQ is set during driver init period with vmid set to 0*/ 261 + if (m->cp_hqd_vmid == 0) { 262 + uint32_t value, mec, pipe; 263 + 264 + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 265 + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 266 + 267 + pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", 268 + mec, pipe, queue_id); 269 + value = RREG32(mmRLC_CP_SCHEDULERS); 270 + value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, 271 + ((mec << 5) | (pipe << 3) | queue_id | 0x80)); 272 + WREG32(mmRLC_CP_SCHEDULERS, value); 273 + } 274 + 275 + /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ 276 + mqd_hqd = &m->cp_mqd_base_addr_lo; 277 + 278 + for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_CONTROL; reg++) 279 + WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]); 280 + 281 + /* Tonga errata: EOP RPTR/WPTR should be left unmodified. 282 + * This is safe since EOP RPTR==WPTR for any inactive HQD 283 + * on ASICs that do not support context-save. 284 + * EOP writes/reads can start anywhere in the ring. 285 + */ 286 + if (get_amdgpu_device(kgd)->asic_type != CHIP_TONGA) { 287 + WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr); 288 + WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr); 289 + WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem); 290 + } 291 + 292 + for (reg = mmCP_HQD_EOP_EVENTS; reg <= mmCP_HQD_ERROR; reg++) 293 + WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]); 294 + 295 + /* Copy userspace write pointer value to register. 296 + * Activate doorbell logic to monitor subsequent changes. 297 + */ 298 + data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, 299 + CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); 300 + WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data); 301 + 302 + if (read_user_wptr(mm, wptr, wptr_val)) 303 + WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask); 304 + 305 + data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); 306 + WREG32(mmCP_HQD_ACTIVE, data); 307 + 270 308 release_queue(kgd); 271 309 272 310 return 0; ··· 364 308 return false; 365 309 } 366 310 367 - static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, 311 + static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, 312 + enum kfd_preempt_type reset_type, 368 313 unsigned int utimeout, uint32_t pipe_id, 369 314 uint32_t queue_id) 370 315 { 371 316 struct amdgpu_device *adev = get_amdgpu_device(kgd); 372 317 uint32_t temp; 373 - int timeout = utimeout; 318 + enum hqd_dequeue_request_type type; 319 + unsigned long flags, end_jiffies; 320 + int retry; 321 + struct vi_mqd *m = get_mqd(mqd); 374 322 375 323 acquire_queue(kgd, pipe_id, queue_id); 376 324 377 - WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type); 325 + if (m->cp_hqd_vmid == 0) 326 + WREG32_FIELD(RLC_CP_SCHEDULERS, scheduler1, 0); 378 327 328 + switch (reset_type) { 329 + case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: 330 + type = DRAIN_PIPE; 331 + break; 332 + case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: 333 + type = RESET_WAVES; 334 + break; 335 + default: 336 + type = DRAIN_PIPE; 337 + break; 338 + } 339 + 340 + /* Workaround: If IQ timer is active and the wait time is close to or 341 + * equal to 0, dequeueing is not safe. Wait until either the wait time 342 + * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is 343 + * cleared before continuing. Also, ensure wait times are set to at 344 + * least 0x3. 345 + */ 346 + local_irq_save(flags); 347 + preempt_disable(); 348 + retry = 5000; /* wait for 500 usecs at maximum */ 349 + while (true) { 350 + temp = RREG32(mmCP_HQD_IQ_TIMER); 351 + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) { 352 + pr_debug("HW is processing IQ\n"); 353 + goto loop; 354 + } 355 + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) { 356 + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE) 357 + == 3) /* SEM-rearm is safe */ 358 + break; 359 + /* Wait time 3 is safe for CP, but our MMIO read/write 360 + * time is close to 1 microsecond, so check for 10 to 361 + * leave more buffer room 362 + */ 363 + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME) 364 + >= 10) 365 + break; 366 + pr_debug("IQ timer is active\n"); 367 + } else 368 + break; 369 + loop: 370 + if (!retry) { 371 + pr_err("CP HQD IQ timer status time out\n"); 372 + break; 373 + } 374 + ndelay(100); 375 + --retry; 376 + } 377 + retry = 1000; 378 + while (true) { 379 + temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST); 380 + if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK)) 381 + break; 382 + pr_debug("Dequeue request is pending\n"); 383 + 384 + if (!retry) { 385 + pr_err("CP HQD dequeue request time out\n"); 386 + break; 387 + } 388 + ndelay(100); 389 + --retry; 390 + } 391 + local_irq_restore(flags); 392 + preempt_enable(); 393 + 394 + WREG32(mmCP_HQD_DEQUEUE_REQUEST, type); 395 + 396 + end_jiffies = (utimeout * HZ / 1000) + jiffies; 379 397 while (true) { 380 398 temp = RREG32(mmCP_HQD_ACTIVE); 381 - if (temp & CP_HQD_ACTIVE__ACTIVE_MASK) 399 + if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) 382 400 break; 383 - if (timeout <= 0) { 384 - pr_err("kfd: cp queue preemption time out.\n"); 401 + if (time_after(jiffies, end_jiffies)) { 402 + pr_err("cp queue preemption time out.\n"); 385 403 release_queue(kgd); 386 404 return -ETIME; 387 405 } 388 - msleep(20); 389 - timeout -= 20; 406 + usleep_range(500, 1000); 390 407 } 391 408 392 409 release_queue(kgd);
+3 -4
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
··· 270 270 pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", 271 271 q->pipe, q->queue); 272 272 273 - retval = mqd->load_mqd(mqd, q->mqd, q->pipe, 274 - q->queue, (uint32_t __user *) q->properties.write_ptr); 273 + retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, &q->properties, 274 + q->process->mm); 275 275 if (retval) 276 276 goto out_uninit_mqd; 277 277 ··· 587 587 if (retval) 588 588 goto out_deallocate_sdma_queue; 589 589 590 - retval = mqd->load_mqd(mqd, q->mqd, 0, 591 - 0, NULL); 590 + retval = mqd->load_mqd(mqd, q->mqd, 0, 0, &q->properties, NULL); 592 591 if (retval) 593 592 goto out_uninit_mqd; 594 593
+2 -1
drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
··· 143 143 kq->queue->pipe = KFD_CIK_HIQ_PIPE; 144 144 kq->queue->queue = KFD_CIK_HIQ_QUEUE; 145 145 kq->mqd->load_mqd(kq->mqd, kq->queue->mqd, kq->queue->pipe, 146 - kq->queue->queue, NULL); 146 + kq->queue->queue, &kq->queue->properties, 147 + NULL); 147 148 } else { 148 149 /* allocate fence for DIQ */ 149 150
+2 -1
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
··· 67 67 68 68 int (*load_mqd)(struct mqd_manager *mm, void *mqd, 69 69 uint32_t pipe_id, uint32_t queue_id, 70 - uint32_t __user *wptr); 70 + struct queue_properties *p, 71 + struct mm_struct *mms); 71 72 72 73 int (*update_mqd)(struct mqd_manager *mm, void *mqd, 73 74 struct queue_properties *q);
+13 -10
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
··· 144 144 } 145 145 146 146 static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, 147 - uint32_t queue_id, uint32_t __user *wptr) 147 + uint32_t queue_id, struct queue_properties *p, 148 + struct mm_struct *mms) 148 149 { 149 - return mm->dev->kfd2kgd->hqd_load 150 - (mm->dev->kgd, mqd, pipe_id, queue_id, wptr); 150 + /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */ 151 + uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); 152 + uint32_t wptr_mask = (uint32_t)((p->queue_size / sizeof(uint32_t)) - 1); 153 + 154 + return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, 155 + (uint32_t __user *)p->write_ptr, 156 + wptr_shift, wptr_mask, mms); 151 157 } 152 158 153 159 static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, 154 - uint32_t pipe_id, uint32_t queue_id, 155 - uint32_t __user *wptr) 160 + uint32_t pipe_id, uint32_t queue_id, 161 + struct queue_properties *p, struct mm_struct *mms) 156 162 { 157 163 return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd); 158 164 } ··· 182 176 m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); 183 177 m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); 184 178 m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); 185 - m->cp_hqd_pq_doorbell_control = DOORBELL_EN | 186 - DOORBELL_OFFSET(q->doorbell_off); 179 + m->cp_hqd_pq_doorbell_control = DOORBELL_OFFSET(q->doorbell_off); 187 180 188 181 m->cp_hqd_vmid = q->vmid; 189 182 190 183 if (q->format == KFD_QUEUE_FORMAT_AQL) 191 184 m->cp_hqd_pq_control |= NO_UPDATE_RPTR; 192 185 193 - m->cp_hqd_active = 0; 194 186 q->is_active = false; 195 187 if (q->queue_size > 0 && 196 188 q->queue_address != 0 && 197 189 q->queue_percent > 0) { 198 - m->cp_hqd_active = 1; 199 190 q->is_active = true; 200 191 } 201 192 ··· 242 239 unsigned int timeout, uint32_t pipe_id, 243 240 uint32_t queue_id) 244 241 { 245 - return mm->dev->kfd2kgd->hqd_destroy(mm->dev->kgd, type, timeout, 242 + return mm->dev->kfd2kgd->hqd_destroy(mm->dev->kgd, mqd, type, timeout, 246 243 pipe_id, queue_id); 247 244 } 248 245
+9 -7
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
··· 94 94 95 95 static int load_mqd(struct mqd_manager *mm, void *mqd, 96 96 uint32_t pipe_id, uint32_t queue_id, 97 - uint32_t __user *wptr) 97 + struct queue_properties *p, struct mm_struct *mms) 98 98 { 99 - return mm->dev->kfd2kgd->hqd_load 100 - (mm->dev->kgd, mqd, pipe_id, queue_id, wptr); 99 + /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */ 100 + uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); 101 + uint32_t wptr_mask = (uint32_t)((p->queue_size / sizeof(uint32_t)) - 1); 102 + 103 + return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, 104 + (uint32_t __user *)p->write_ptr, 105 + wptr_shift, wptr_mask, mms); 101 106 } 102 107 103 108 static int __update_mqd(struct mqd_manager *mm, void *mqd, ··· 127 122 m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); 128 123 129 124 m->cp_hqd_pq_doorbell_control = 130 - 1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN__SHIFT | 131 125 q->doorbell_off << 132 126 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT; 133 127 pr_debug("cp_hqd_pq_doorbell_control 0x%x\n", ··· 163 159 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT; 164 160 } 165 161 166 - m->cp_hqd_active = 0; 167 162 q->is_active = false; 168 163 if (q->queue_size > 0 && 169 164 q->queue_address != 0 && 170 165 q->queue_percent > 0) { 171 - m->cp_hqd_active = 1; 172 166 q->is_active = true; 173 167 } 174 168 ··· 186 184 uint32_t queue_id) 187 185 { 188 186 return mm->dev->kfd2kgd->hqd_destroy 189 - (mm->dev->kgd, type, timeout, 187 + (mm->dev->kgd, mqd, type, timeout, 190 188 pipe_id, queue_id); 191 189 } 192 190
-5
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
··· 239 239 KFD_PREEMPT_TYPE_FILTER_BY_PASID 240 240 }; 241 241 242 - enum kfd_preempt_type { 243 - KFD_PREEMPT_TYPE_WAVEFRONT, 244 - KFD_PREEMPT_TYPE_WAVEFRONT_RESET 245 - }; 246 - 247 242 /** 248 243 * enum kfd_queue_type 249 244 *
+9 -2
drivers/gpu/drm/amd/include/kgd_kfd_interface.h
··· 41 41 42 42 struct kgd_mem; 43 43 44 + enum kfd_preempt_type { 45 + KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN = 0, 46 + KFD_PREEMPT_TYPE_WAVEFRONT_RESET, 47 + }; 48 + 44 49 enum kgd_memory_pool { 45 50 KGD_POOL_SYSTEM_CACHEABLE = 1, 46 51 KGD_POOL_SYSTEM_WRITECOMBINE = 2, ··· 158 153 int (*init_interrupts)(struct kgd_dev *kgd, uint32_t pipe_id); 159 154 160 155 int (*hqd_load)(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, 161 - uint32_t queue_id, uint32_t __user *wptr); 156 + uint32_t queue_id, uint32_t __user *wptr, 157 + uint32_t wptr_shift, uint32_t wptr_mask, 158 + struct mm_struct *mm); 162 159 163 160 int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd); 164 161 165 162 bool (*hqd_is_occupied)(struct kgd_dev *kgd, uint64_t queue_address, 166 163 uint32_t pipe_id, uint32_t queue_id); 167 164 168 - int (*hqd_destroy)(struct kgd_dev *kgd, uint32_t reset_type, 165 + int (*hqd_destroy)(struct kgd_dev *kgd, void *mqd, uint32_t reset_type, 169 166 unsigned int timeout, uint32_t pipe_id, 170 167 uint32_t queue_id); 171 168
+8 -4
drivers/gpu/drm/radeon/radeon_kfd.c
··· 75 75 uint32_t hpd_size, uint64_t hpd_gpu_addr); 76 76 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); 77 77 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, 78 - uint32_t queue_id, uint32_t __user *wptr); 78 + uint32_t queue_id, uint32_t __user *wptr, 79 + uint32_t wptr_shift, uint32_t wptr_mask, 80 + struct mm_struct *mm); 79 81 static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); 80 82 static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, 81 83 uint32_t pipe_id, uint32_t queue_id); 82 84 83 - static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, 85 + static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t reset_type, 84 86 unsigned int timeout, uint32_t pipe_id, 85 87 uint32_t queue_id); 86 88 static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); ··· 484 482 } 485 483 486 484 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, 487 - uint32_t queue_id, uint32_t __user *wptr) 485 + uint32_t queue_id, uint32_t __user *wptr, 486 + uint32_t wptr_shift, uint32_t wptr_mask, 487 + struct mm_struct *mm) 488 488 { 489 489 uint32_t wptr_shadow, is_wptr_shadow_valid; 490 490 struct cik_mqd *m; ··· 640 636 return false; 641 637 } 642 638 643 - static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, 639 + static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t reset_type, 644 640 unsigned int timeout, uint32_t pipe_id, 645 641 uint32_t queue_id) 646 642 {