Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdkfd: Update MQD management on multi XCC setup

Update MQD management for both HIQ and user-mode compute
queues on a multi XCC setup. MQDs needs to be allocated,
initialized, loaded and destroyed for each XCC in the KFD
node.

v2: squash in fix "drm/amdkfd: Fix SDMA+HIQ HQD allocation on GFX9.4.3"

Signed-off-by: Mukul Joshi <mukul.joshi@amd.com>
Signed-off-by: Amber Lin <Amber.Lin@amd.com>
Tested-by: Amber Lin <Amber.Lin@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Mukul Joshi and committed by
Alex Deucher
2f77b9a2 74c5b85d

+380 -57
+36 -15
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
··· 800 800 sg_free_table(ttm->sg); 801 801 } 802 802 803 + /* 804 + * total_pages is constructed as MQD0+CtrlStack0 + MQD1+CtrlStack1 + ... 805 + * MQDn+CtrlStackn where n is the number of XCCs per partition. 806 + * pages_per_xcc is the size of one MQD+CtrlStack. The first page is MQD 807 + * and uses memory type default, UC. The rest of pages_per_xcc are 808 + * Ctrl stack and modify their memory type to NC. 809 + */ 810 + static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev, 811 + struct ttm_tt *ttm, uint64_t flags) 812 + { 813 + struct amdgpu_ttm_tt *gtt = (void *)ttm; 814 + uint64_t total_pages = ttm->num_pages; 815 + int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp); 816 + uint64_t page_idx, pages_per_xcc = total_pages / num_xcc; 817 + int i; 818 + uint64_t ctrl_flags = (flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) | 819 + AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC); 820 + 821 + for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) { 822 + /* MQD page: use default flags */ 823 + amdgpu_gart_bind(adev, 824 + gtt->offset + (page_idx << PAGE_SHIFT), 825 + 1, &gtt->ttm.dma_address[page_idx], flags); 826 + /* 827 + * Ctrl pages - modify the memory type to NC (ctrl_flags) from 828 + * the second page of the BO onward. 829 + */ 830 + amdgpu_gart_bind(adev, 831 + gtt->offset + ((page_idx + 1) << PAGE_SHIFT), 832 + pages_per_xcc - 1, 833 + &gtt->ttm.dma_address[page_idx + 1], 834 + ctrl_flags); 835 + } 836 + } 837 + 803 838 static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev, 804 839 struct ttm_buffer_object *tbo, 805 840 uint64_t flags) ··· 847 812 flags |= AMDGPU_PTE_TMZ; 848 813 849 814 if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) { 850 - uint64_t page_idx = 1; 851 - 852 - amdgpu_gart_bind(adev, gtt->offset, page_idx, 853 - gtt->ttm.dma_address, flags); 854 - 855 - /* The memory type of the first page defaults to UC. Now 856 - * modify the memory type to NC from the second page of 857 - * the BO onward. 858 - */ 859 - flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK; 860 - flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC); 861 - 862 - amdgpu_gart_bind(adev, gtt->offset + (page_idx << PAGE_SHIFT), 863 - ttm->num_pages - page_idx, 864 - &(gtt->ttm.dma_address[page_idx]), flags); 815 + amdgpu_ttm_gart_bind_gfx9_mqd(adev, ttm, flags); 865 816 } else { 866 817 amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages, 867 818 gtt->ttm.dma_address, flags);
+2 -1
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
··· 2247 2247 uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size * 2248 2248 get_num_all_sdma_engines(dqm) * 2249 2249 dev->kfd->device_info.num_sdma_queues_per_engine + 2250 - dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size; 2250 + (dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size * 2251 + dqm->dev->num_xcc_per_node); 2251 2252 2252 2253 retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size, 2253 2254 &(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
+27 -1
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
··· 76 76 q->sdma_queue_id) * 77 77 dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size; 78 78 79 - offset += dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size; 79 + offset += dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size * 80 + dev->num_xcc_per_node; 80 81 81 82 mqd_mem_obj->gtt_mem = (void *)((uint64_t)dev->dqm->hiq_sdma_mqd.gtt_mem 82 83 + offset); ··· 246 245 uint32_t queue_id) 247 246 { 248 247 return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev, mqd); 248 + } 249 + 250 + uint64_t kfd_hiq_mqd_stride(struct kfd_node *dev) 251 + { 252 + return dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size; 253 + } 254 + 255 + void kfd_get_hiq_xcc_mqd(struct kfd_node *dev, struct kfd_mem_obj *mqd_mem_obj, 256 + uint32_t virtual_xcc_id) 257 + { 258 + uint64_t offset; 259 + 260 + offset = kfd_hiq_mqd_stride(dev) * virtual_xcc_id; 261 + 262 + mqd_mem_obj->gtt_mem = (virtual_xcc_id == 0) ? 263 + dev->dqm->hiq_sdma_mqd.gtt_mem : NULL; 264 + mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr + offset; 265 + mqd_mem_obj->cpu_ptr = (uint32_t *)((uintptr_t) 266 + dev->dqm->hiq_sdma_mqd.cpu_ptr + offset); 267 + } 268 + 269 + uint64_t kfd_mqd_stride(struct mqd_manager *mm, 270 + struct queue_properties *q) 271 + { 272 + return mm->mqd_size; 249 273 }
+8
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
··· 119 119 int (*debugfs_show_mqd)(struct seq_file *m, void *data); 120 120 #endif 121 121 uint32_t (*read_doorbell_id)(void *mqd); 122 + uint64_t (*mqd_stride)(struct mqd_manager *mm, 123 + struct queue_properties *p); 122 124 123 125 struct mutex mqd_mutex; 124 126 struct kfd_node *dev; ··· 166 164 uint64_t queue_address, uint32_t pipe_id, 167 165 uint32_t queue_id); 168 166 167 + void kfd_get_hiq_xcc_mqd(struct kfd_node *dev, 168 + struct kfd_mem_obj *mqd_mem_obj, uint32_t virtual_xcc_id); 169 + 170 + uint64_t kfd_hiq_mqd_stride(struct kfd_node *dev); 171 + uint64_t kfd_mqd_stride(struct mqd_manager *mm, 172 + struct queue_properties *q); 169 173 #endif /* KFD_MQD_MANAGER_H_ */
+3
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
··· 428 428 mqd->destroy_mqd = kfd_destroy_mqd_cp; 429 429 mqd->is_occupied = kfd_is_occupied_cp; 430 430 mqd->mqd_size = sizeof(struct cik_mqd); 431 + mqd->mqd_stride = kfd_mqd_stride; 431 432 #if defined(CONFIG_DEBUG_FS) 432 433 mqd->debugfs_show_mqd = debugfs_show_mqd; 433 434 #endif ··· 443 442 mqd->destroy_mqd = kfd_destroy_mqd_cp; 444 443 mqd->is_occupied = kfd_is_occupied_cp; 445 444 mqd->mqd_size = sizeof(struct cik_mqd); 445 + mqd->mqd_stride = kfd_mqd_stride; 446 446 #if defined(CONFIG_DEBUG_FS) 447 447 mqd->debugfs_show_mqd = debugfs_show_mqd; 448 448 #endif ··· 459 457 mqd->checkpoint_mqd = checkpoint_mqd_sdma; 460 458 mqd->restore_mqd = restore_mqd_sdma; 461 459 mqd->mqd_size = sizeof(struct cik_sdma_rlc_registers); 460 + mqd->mqd_stride = kfd_mqd_stride; 462 461 #if defined(CONFIG_DEBUG_FS) 463 462 mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; 464 463 #endif
+3
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
··· 432 432 mqd->get_wave_state = get_wave_state; 433 433 mqd->checkpoint_mqd = checkpoint_mqd; 434 434 mqd->restore_mqd = restore_mqd; 435 + mqd->mqd_stride = kfd_mqd_stride; 435 436 #if defined(CONFIG_DEBUG_FS) 436 437 mqd->debugfs_show_mqd = debugfs_show_mqd; 437 438 #endif ··· 448 447 mqd->destroy_mqd = kfd_destroy_mqd_cp; 449 448 mqd->is_occupied = kfd_is_occupied_cp; 450 449 mqd->mqd_size = sizeof(struct v10_compute_mqd); 450 + mqd->mqd_stride = kfd_mqd_stride; 451 451 #if defined(CONFIG_DEBUG_FS) 452 452 mqd->debugfs_show_mqd = debugfs_show_mqd; 453 453 #endif ··· 480 478 mqd->checkpoint_mqd = checkpoint_mqd_sdma; 481 479 mqd->restore_mqd = restore_mqd_sdma; 482 480 mqd->mqd_size = sizeof(struct v10_sdma_mqd); 481 + mqd->mqd_stride = kfd_mqd_stride; 483 482 #if defined(CONFIG_DEBUG_FS) 484 483 mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; 485 484 #endif
+262 -30
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
··· 33 33 #include "sdma0/sdma0_4_0_sh_mask.h" 34 34 #include "amdgpu_amdkfd.h" 35 35 36 + static void update_mqd(struct mqd_manager *mm, void *mqd, 37 + struct queue_properties *q, 38 + struct mqd_update_info *minfo); 39 + 40 + static uint64_t mqd_stride_v9(struct mqd_manager *mm, 41 + struct queue_properties *q) 42 + { 43 + if (mm->dev->kfd->cwsr_enabled && 44 + q->type == KFD_QUEUE_TYPE_COMPUTE) 45 + return ALIGN(q->ctl_stack_size, PAGE_SIZE) + 46 + ALIGN(sizeof(struct v9_mqd), PAGE_SIZE); 47 + 48 + return mm->mqd_size; 49 + } 50 + 36 51 static inline struct v9_mqd *get_mqd(void *mqd) 37 52 { 38 53 return (struct v9_mqd *)mqd; ··· 125 110 if (!mqd_mem_obj) 126 111 return NULL; 127 112 retval = amdgpu_amdkfd_alloc_gtt_mem(node->adev, 128 - ALIGN(q->ctl_stack_size, PAGE_SIZE) + 129 - ALIGN(sizeof(struct v9_mqd), PAGE_SIZE), 113 + (ALIGN(q->ctl_stack_size, PAGE_SIZE) + 114 + ALIGN(sizeof(struct v9_mqd), PAGE_SIZE)) * 115 + node->num_xcc_per_node, 130 116 &(mqd_mem_obj->gtt_mem), 131 117 &(mqd_mem_obj->gpu_addr), 132 118 (void *)&(mqd_mem_obj->cpu_ptr), true); ··· 181 165 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT | 182 166 1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT; 183 167 184 - if (q->format == KFD_QUEUE_FORMAT_AQL) { 168 + if (q->format == KFD_QUEUE_FORMAT_AQL) 185 169 m->cp_hqd_aql_control = 186 170 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT; 187 - if (KFD_GC_VERSION(mm->dev) == IP_VERSION(9, 4, 3)) { 188 - /* On GC 9.4.3, DW 41 is re-purposed as 189 - * compute_tg_chunk_size. 190 - * TODO: review this setting when active CUs in the 191 - * partition play a role 192 - */ 193 - m->compute_static_thread_mgmt_se6 = 1; 194 - } 195 - } else { 196 - /* PM4 queue */ 197 - if (KFD_GC_VERSION(mm->dev) == IP_VERSION(9, 4, 3)) { 198 - m->compute_static_thread_mgmt_se6 = 0; 199 - /* TODO: program pm4_target_xcc */ 200 - } 201 - } 202 171 203 172 if (q->tba_addr) { 204 173 m->compute_pgm_rsrc2 |= ··· 206 205 *mqd = m; 207 206 if (gart_addr) 208 207 *gart_addr = addr; 209 - mm->update_mqd(mm, m, q, NULL); 208 + update_mqd(mm, m, q, NULL); 210 209 } 211 210 212 211 static int load_mqd(struct mqd_manager *mm, void *mqd, ··· 270 269 m->cp_hqd_vmid = q->vmid; 271 270 272 271 if (q->format == KFD_QUEUE_FORMAT_AQL) { 273 - m->cp_hqd_pq_control |= 272 + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK | 274 273 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT | 275 274 1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT | 276 275 1 << CP_HQD_PQ_CONTROL__WPP_CLAMP_EN__SHIFT; 277 - if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3)) 278 - m->cp_hqd_pq_control |= 279 - CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK; 280 276 m->cp_hqd_pq_doorbell_control |= 1 << 281 277 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT; 282 278 } ··· 464 466 qp->is_active = 0; 465 467 } 466 468 469 + static void init_mqd_hiq_v9_4_3(struct mqd_manager *mm, void **mqd, 470 + struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, 471 + struct queue_properties *q) 472 + { 473 + struct v9_mqd *m; 474 + int xcc = 0; 475 + struct kfd_mem_obj xcc_mqd_mem_obj; 476 + uint64_t xcc_gart_addr = 0; 477 + 478 + memset(&xcc_mqd_mem_obj, 0x0, sizeof(struct kfd_mem_obj)); 479 + 480 + for (xcc = 0; xcc < mm->dev->num_xcc_per_node; xcc++) { 481 + kfd_get_hiq_xcc_mqd(mm->dev, &xcc_mqd_mem_obj, xcc); 482 + 483 + init_mqd(mm, (void **)&m, &xcc_mqd_mem_obj, &xcc_gart_addr, q); 484 + 485 + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK | 486 + 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT | 487 + 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT; 488 + m->cp_mqd_stride_size = kfd_hiq_mqd_stride(mm->dev); 489 + if (xcc == 0) { 490 + /* Set no_update_rptr = 0 in Master XCC */ 491 + m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK; 492 + 493 + /* Set the MQD pointer and gart address to XCC0 MQD */ 494 + *mqd = m; 495 + *gart_addr = xcc_gart_addr; 496 + } 497 + } 498 + } 499 + 500 + static int hiq_load_mqd_kiq_v9_4_3(struct mqd_manager *mm, void *mqd, 501 + uint32_t pipe_id, uint32_t queue_id, 502 + struct queue_properties *p, struct mm_struct *mms) 503 + { 504 + int xcc, err; 505 + void *xcc_mqd; 506 + uint64_t hiq_mqd_size = kfd_hiq_mqd_stride(mm->dev); 507 + 508 + for (xcc = 0; xcc < mm->dev->num_xcc_per_node; xcc++) { 509 + xcc_mqd = mqd + hiq_mqd_size * xcc; 510 + err = mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->adev, xcc_mqd, 511 + pipe_id, queue_id, 512 + p->doorbell_off); 513 + if (err) { 514 + pr_debug("Failed to load HIQ MQD for XCC: %d\n", xcc); 515 + break; 516 + } 517 + } 518 + 519 + return err; 520 + } 521 + 522 + static int destroy_hiq_mqd_v9_4_3(struct mqd_manager *mm, void *mqd, 523 + enum kfd_preempt_type type, unsigned int timeout, 524 + uint32_t pipe_id, uint32_t queue_id) 525 + { 526 + int xcc = 0, err; 527 + void *xcc_mqd; 528 + uint64_t hiq_mqd_size = kfd_hiq_mqd_stride(mm->dev); 529 + 530 + for (xcc = 0; xcc < mm->dev->num_xcc_per_node; xcc++) { 531 + xcc_mqd = mqd + hiq_mqd_size * xcc; 532 + err = mm->dev->kfd2kgd->hqd_destroy(mm->dev->adev, xcc_mqd, 533 + type, timeout, pipe_id, 534 + queue_id); 535 + if (err) { 536 + pr_debug("Destroy MQD failed for xcc: %d\n", xcc); 537 + break; 538 + } 539 + } 540 + 541 + return err; 542 + } 543 + 544 + static void get_xcc_mqd(struct kfd_mem_obj *mqd_mem_obj, 545 + struct kfd_mem_obj *xcc_mqd_mem_obj, 546 + uint64_t offset) 547 + { 548 + xcc_mqd_mem_obj->gtt_mem = (offset == 0) ? 549 + mqd_mem_obj->gtt_mem : NULL; 550 + xcc_mqd_mem_obj->gpu_addr = mqd_mem_obj->gpu_addr + offset; 551 + xcc_mqd_mem_obj->cpu_ptr = (uint32_t *)((uintptr_t)mqd_mem_obj->cpu_ptr 552 + + offset); 553 + } 554 + 555 + static void init_mqd_v9_4_3(struct mqd_manager *mm, void **mqd, 556 + struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, 557 + struct queue_properties *q) 558 + { 559 + struct v9_mqd *m; 560 + int xcc = 0; 561 + struct kfd_mem_obj xcc_mqd_mem_obj; 562 + uint64_t xcc_gart_addr = 0; 563 + uint64_t offset = mm->mqd_stride(mm, q); 564 + 565 + memset(&xcc_mqd_mem_obj, 0x0, sizeof(struct kfd_mem_obj)); 566 + for (xcc = 0; xcc < mm->dev->num_xcc_per_node; xcc++) { 567 + get_xcc_mqd(mqd_mem_obj, &xcc_mqd_mem_obj, offset*xcc); 568 + 569 + init_mqd(mm, (void **)&m, &xcc_mqd_mem_obj, &xcc_gart_addr, q); 570 + 571 + m->cp_mqd_stride_size = offset; 572 + if (q->format == KFD_QUEUE_FORMAT_AQL) { 573 + m->compute_tg_chunk_size = 1; 574 + 575 + switch (xcc) { 576 + case 0: 577 + /* Master XCC */ 578 + m->cp_hqd_pq_control &= 579 + ~CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK; 580 + m->compute_current_logic_xcc_id = 581 + mm->dev->num_xcc_per_node - 1; 582 + break; 583 + default: 584 + m->compute_current_logic_xcc_id = 585 + xcc - 1; 586 + break; 587 + } 588 + } else { 589 + /* PM4 Queue */ 590 + m->compute_current_logic_xcc_id = 0; 591 + m->compute_tg_chunk_size = 0; 592 + } 593 + 594 + if (xcc == 0) { 595 + /* Set the MQD pointer and gart address to XCC0 MQD */ 596 + *mqd = m; 597 + *gart_addr = xcc_gart_addr; 598 + } 599 + } 600 + } 601 + 602 + static void update_mqd_v9_4_3(struct mqd_manager *mm, void *mqd, 603 + struct queue_properties *q, struct mqd_update_info *minfo) 604 + { 605 + struct v9_mqd *m; 606 + int xcc = 0; 607 + uint64_t size = mm->mqd_stride(mm, q); 608 + 609 + for (xcc = 0; xcc < mm->dev->num_xcc_per_node; xcc++) { 610 + m = get_mqd(mqd + size * xcc); 611 + update_mqd(mm, m, q, minfo); 612 + 613 + if (q->format == KFD_QUEUE_FORMAT_AQL) { 614 + switch (xcc) { 615 + case 0: 616 + /* Master XCC */ 617 + m->cp_hqd_pq_control &= 618 + ~CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK; 619 + m->compute_current_logic_xcc_id = 620 + mm->dev->num_xcc_per_node - 1; 621 + break; 622 + default: 623 + m->compute_current_logic_xcc_id = 624 + xcc - 1; 625 + break; 626 + } 627 + m->compute_tg_chunk_size = 1; 628 + } else { 629 + /* PM4 Queue */ 630 + m->compute_current_logic_xcc_id = 0; 631 + m->compute_tg_chunk_size = 0; 632 + } 633 + } 634 + } 635 + 636 + static int destroy_mqd_v9_4_3(struct mqd_manager *mm, void *mqd, 637 + enum kfd_preempt_type type, unsigned int timeout, 638 + uint32_t pipe_id, uint32_t queue_id) 639 + { 640 + int xcc = 0, err; 641 + void *xcc_mqd; 642 + struct v9_mqd *m; 643 + uint64_t mqd_offset; 644 + 645 + m = get_mqd(mqd); 646 + mqd_offset = m->cp_mqd_stride_size; 647 + 648 + for (xcc = 0; xcc < mm->dev->num_xcc_per_node; xcc++) { 649 + xcc_mqd = mqd + mqd_offset * xcc; 650 + err = mm->dev->kfd2kgd->hqd_destroy(mm->dev->adev, xcc_mqd, 651 + type, timeout, pipe_id, 652 + queue_id); 653 + if (err) { 654 + pr_debug("Destroy MQD failed for xcc: %d\n", xcc); 655 + break; 656 + } 657 + } 658 + 659 + return err; 660 + } 661 + 662 + static int load_mqd_v9_4_3(struct mqd_manager *mm, void *mqd, 663 + uint32_t pipe_id, uint32_t queue_id, 664 + struct queue_properties *p, struct mm_struct *mms) 665 + { 666 + /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */ 667 + uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); 668 + int xcc = 0, err; 669 + void *xcc_mqd; 670 + uint64_t mqd_stride_size = mm->mqd_stride(mm, p); 671 + 672 + for (xcc = 0; xcc < mm->dev->num_xcc_per_node; xcc++) { 673 + xcc_mqd = mqd + mqd_stride_size * xcc; 674 + err = mm->dev->kfd2kgd->hqd_load(mm->dev->adev, xcc_mqd, 675 + pipe_id, queue_id, 676 + (uint32_t __user *)p->write_ptr, 677 + wptr_shift, 0, mms); 678 + if (err) { 679 + pr_debug("Load MQD failed for xcc: %d\n", xcc); 680 + break; 681 + } 682 + } 683 + 684 + return err; 685 + } 686 + 467 687 #if defined(CONFIG_DEBUG_FS) 468 688 469 689 static int debugfs_show_mqd(struct seq_file *m, void *data) ··· 717 501 switch (type) { 718 502 case KFD_MQD_TYPE_CP: 719 503 mqd->allocate_mqd = allocate_mqd; 720 - mqd->init_mqd = init_mqd; 721 504 mqd->free_mqd = kfd_free_mqd_cp; 722 - mqd->load_mqd = load_mqd; 723 - mqd->update_mqd = update_mqd; 724 - mqd->destroy_mqd = kfd_destroy_mqd_cp; 725 505 mqd->is_occupied = kfd_is_occupied_cp; 726 506 mqd->get_wave_state = get_wave_state; 727 507 mqd->get_checkpoint_info = get_checkpoint_info; 728 508 mqd->checkpoint_mqd = checkpoint_mqd; 729 509 mqd->restore_mqd = restore_mqd; 730 510 mqd->mqd_size = sizeof(struct v9_mqd); 511 + mqd->mqd_stride = mqd_stride_v9; 731 512 #if defined(CONFIG_DEBUG_FS) 732 513 mqd->debugfs_show_mqd = debugfs_show_mqd; 733 514 #endif 515 + if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3)) { 516 + mqd->init_mqd = init_mqd_v9_4_3; 517 + mqd->load_mqd = load_mqd_v9_4_3; 518 + mqd->update_mqd = update_mqd_v9_4_3; 519 + mqd->destroy_mqd = destroy_mqd_v9_4_3; 520 + } else { 521 + mqd->init_mqd = init_mqd; 522 + mqd->load_mqd = load_mqd; 523 + mqd->update_mqd = update_mqd; 524 + mqd->destroy_mqd = kfd_destroy_mqd_cp; 525 + } 734 526 break; 735 527 case KFD_MQD_TYPE_HIQ: 736 528 mqd->allocate_mqd = allocate_hiq_mqd; 737 - mqd->init_mqd = init_mqd_hiq; 738 529 mqd->free_mqd = free_mqd_hiq_sdma; 739 - mqd->load_mqd = kfd_hiq_load_mqd_kiq; 740 530 mqd->update_mqd = update_mqd; 741 - mqd->destroy_mqd = kfd_destroy_mqd_cp; 742 531 mqd->is_occupied = kfd_is_occupied_cp; 743 532 mqd->mqd_size = sizeof(struct v9_mqd); 533 + mqd->mqd_stride = kfd_mqd_stride; 744 534 #if defined(CONFIG_DEBUG_FS) 745 535 mqd->debugfs_show_mqd = debugfs_show_mqd; 746 536 #endif 747 537 mqd->read_doorbell_id = read_doorbell_id; 538 + if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3)) { 539 + mqd->init_mqd = init_mqd_hiq_v9_4_3; 540 + mqd->load_mqd = hiq_load_mqd_kiq_v9_4_3; 541 + mqd->destroy_mqd = destroy_hiq_mqd_v9_4_3; 542 + } else { 543 + mqd->init_mqd = init_mqd_hiq; 544 + mqd->load_mqd = kfd_hiq_load_mqd_kiq; 545 + mqd->destroy_mqd = kfd_destroy_mqd_cp; 546 + } 748 547 break; 749 548 case KFD_MQD_TYPE_DIQ: 750 549 mqd->allocate_mqd = allocate_mqd; ··· 785 554 mqd->checkpoint_mqd = checkpoint_mqd_sdma; 786 555 mqd->restore_mqd = restore_mqd_sdma; 787 556 mqd->mqd_size = sizeof(struct v9_sdma_mqd); 557 + mqd->mqd_stride = kfd_mqd_stride; 788 558 #if defined(CONFIG_DEBUG_FS) 789 559 mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; 790 560 #endif
+3
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
··· 486 486 mqd->destroy_mqd = kfd_destroy_mqd_cp; 487 487 mqd->is_occupied = kfd_is_occupied_cp; 488 488 mqd->mqd_size = sizeof(struct vi_mqd); 489 + mqd->mqd_stride = kfd_mqd_stride; 489 490 #if defined(CONFIG_DEBUG_FS) 490 491 mqd->debugfs_show_mqd = debugfs_show_mqd; 491 492 #endif ··· 501 500 mqd->destroy_mqd = kfd_destroy_mqd_cp; 502 501 mqd->is_occupied = kfd_is_occupied_cp; 503 502 mqd->mqd_size = sizeof(struct vi_mqd); 503 + mqd->mqd_stride = kfd_mqd_stride; 504 504 #if defined(CONFIG_DEBUG_FS) 505 505 mqd->debugfs_show_mqd = debugfs_show_mqd; 506 506 #endif ··· 517 515 mqd->checkpoint_mqd = checkpoint_mqd_sdma; 518 516 mqd->restore_mqd = restore_mqd_sdma; 519 517 mqd->mqd_size = sizeof(struct vi_sdma_mqd); 518 + mqd->mqd_stride = kfd_mqd_stride; 520 519 #if defined(CONFIG_DEBUG_FS) 521 520 mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; 522 521 #endif
+12 -4
drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
··· 927 927 struct queue *q; 928 928 enum KFD_MQD_TYPE mqd_type; 929 929 struct mqd_manager *mqd_mgr; 930 - int r = 0; 930 + int r = 0, xcc, num_xccs = 1; 931 + void *mqd; 932 + uint64_t size = 0; 931 933 932 934 list_for_each_entry(pqn, &pqm->queues, process_queue_list) { 933 935 if (pqn->q) { ··· 945 943 seq_printf(m, " Compute queue on device %x\n", 946 944 q->device->id); 947 945 mqd_type = KFD_MQD_TYPE_CP; 946 + num_xccs = q->device->num_xcc_per_node; 948 947 break; 949 948 default: 950 949 seq_printf(m, ··· 954 951 continue; 955 952 } 956 953 mqd_mgr = q->device->dqm->mqd_mgrs[mqd_type]; 954 + size = mqd_mgr->mqd_stride(mqd_mgr, 955 + &q->properties); 957 956 } else if (pqn->kq) { 958 957 q = pqn->kq->queue; 959 958 mqd_mgr = pqn->kq->mqd_mgr; ··· 977 972 continue; 978 973 } 979 974 980 - r = mqd_mgr->debugfs_show_mqd(m, q->mqd); 981 - if (r != 0) 982 - break; 975 + for (xcc = 0; xcc < num_xccs; xcc++) { 976 + mqd = q->mqd + size * xcc; 977 + r = mqd_mgr->debugfs_show_mqd(m, mqd); 978 + if (r != 0) 979 + break; 980 + } 983 981 } 984 982 985 983 return r;
+24 -6
drivers/gpu/drm/amd/include/v9_structs.h
··· 196 196 uint32_t compute_wave_restore_addr_lo; 197 197 uint32_t compute_wave_restore_addr_hi; 198 198 uint32_t compute_wave_restore_control; 199 - uint32_t compute_static_thread_mgmt_se4; 200 - uint32_t compute_static_thread_mgmt_se5; 201 - uint32_t compute_static_thread_mgmt_se6; 202 - uint32_t compute_static_thread_mgmt_se7; 199 + union { 200 + struct { 201 + uint32_t compute_static_thread_mgmt_se4; 202 + uint32_t compute_static_thread_mgmt_se5; 203 + uint32_t compute_static_thread_mgmt_se6; 204 + uint32_t compute_static_thread_mgmt_se7; 205 + }; 206 + struct { 207 + uint32_t compute_current_logic_xcc_id; // offset: 39 (0x27) 208 + uint32_t compute_restart_cg_tg_id; // offset: 40 (0x28) 209 + uint32_t compute_tg_chunk_size; // offset: 41 (0x29) 210 + uint32_t compute_restore_tg_chunk_size; // offset: 42 (0x2A) 211 + }; 212 + }; 203 213 uint32_t reserved_43; 204 214 uint32_t reserved_44; 205 215 uint32_t reserved_45; ··· 392 382 uint32_t iqtimer_pkt_dw29; 393 383 uint32_t iqtimer_pkt_dw30; 394 384 uint32_t iqtimer_pkt_dw31; 395 - uint32_t reserved_225; 396 - uint32_t reserved_226; 385 + union { 386 + struct { 387 + uint32_t reserved_225; 388 + uint32_t reserved_226; 389 + }; 390 + struct { 391 + uint32_t pm4_target_xcc_in_xcp; // offset: 225 (0xE1) 392 + uint32_t cp_mqd_stride_size; // offset: 226 (0xE2) 393 + }; 394 + }; 397 395 uint32_t reserved_227; 398 396 uint32_t set_resources_header; 399 397 uint32_t set_resources_dw1;