Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdkfd: CWSR with software scheduler

This patch adds support to program trap handler settings
when loading driver with software scheduler (sched_policy=2).

Signed-off-by: Mukul Joshi <mukul.joshi@amd.com>
Suggested-by: Jay Cornwall <Jay.Cornwall@amd.com>
Reviewed-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Mukul Joshi and committed by
Alex Deucher
b53ef0df eff8cbf0

+116 -3
+31
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
··· 560 560 case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: 561 561 type = RESET_WAVES; 562 562 break; 563 + case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE: 564 + type = SAVE_WAVES; 565 + break; 563 566 default: 564 567 type = DRAIN_PIPE; 565 568 break; ··· 757 754 adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base); 758 755 } 759 756 757 + static void program_trap_handler_settings(struct kgd_dev *kgd, 758 + uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) 759 + { 760 + struct amdgpu_device *adev = get_amdgpu_device(kgd); 761 + 762 + lock_srbm(kgd, 0, 0, 0, vmid); 763 + 764 + /* 765 + * Program TBA registers 766 + */ 767 + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO), 768 + lower_32_bits(tba_addr >> 8)); 769 + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI), 770 + upper_32_bits(tba_addr >> 8) | 771 + (1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT)); 772 + 773 + /* 774 + * Program TMA registers 775 + */ 776 + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO), 777 + lower_32_bits(tma_addr >> 8)); 778 + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI), 779 + upper_32_bits(tma_addr >> 8)); 780 + 781 + unlock_srbm(kgd); 782 + } 783 + 760 784 const struct kfd2kgd_calls gfx_v10_kfd2kgd = { 761 785 .program_sh_mem_settings = kgd_program_sh_mem_settings, 762 786 .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, ··· 804 774 .get_atc_vmid_pasid_mapping_info = 805 775 get_atc_vmid_pasid_mapping_info, 806 776 .set_vm_context_page_table_base = set_vm_context_page_table_base, 777 + .program_trap_handler_settings = program_trap_handler_settings, 807 778 };
+31
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
··· 537 537 case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: 538 538 type = RESET_WAVES; 539 539 break; 540 + case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE: 541 + type = SAVE_WAVES; 542 + break; 540 543 default: 541 544 type = DRAIN_PIPE; 542 545 break; ··· 659 656 660 657 /* SDMA is on gfxhub as well for Navi1* series */ 661 658 adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base); 659 + } 660 + 661 + static void program_trap_handler_settings_v10_3(struct kgd_dev *kgd, 662 + uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) 663 + { 664 + struct amdgpu_device *adev = get_amdgpu_device(kgd); 665 + 666 + lock_srbm(kgd, 0, 0, 0, vmid); 667 + 668 + /* 669 + * Program TBA registers 670 + */ 671 + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO), 672 + lower_32_bits(tba_addr >> 8)); 673 + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI), 674 + upper_32_bits(tba_addr >> 8) | 675 + (1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT)); 676 + 677 + /* 678 + * Program TMA registers 679 + */ 680 + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO), 681 + lower_32_bits(tma_addr >> 8)); 682 + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI), 683 + upper_32_bits(tma_addr >> 8)); 684 + 685 + unlock_srbm(kgd); 662 686 } 663 687 664 688 #if 0 ··· 850 820 .address_watch_get_offset = address_watch_get_offset_v10_3, 851 821 .get_atc_vmid_pasid_mapping_info = NULL, 852 822 .set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3, 823 + .program_trap_handler_settings = program_trap_handler_settings_v10_3, 853 824 #if 0 854 825 .enable_debug_trap = enable_debug_trap_v10_3, 855 826 .disable_debug_trap = disable_debug_trap_v10_3,
+32 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
··· 42 42 enum hqd_dequeue_request_type { 43 43 NO_ACTION = 0, 44 44 DRAIN_PIPE, 45 - RESET_WAVES 45 + RESET_WAVES, 46 + SAVE_WAVES 46 47 }; 47 48 48 49 static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) ··· 567 566 case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: 568 567 type = RESET_WAVES; 569 568 break; 569 + case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE: 570 + type = SAVE_WAVES; 571 + break; 570 572 default: 571 573 type = DRAIN_PIPE; 572 574 break; ··· 882 878 adev->gfx.cu_info.max_waves_per_simd; 883 879 } 884 880 881 + static void kgd_gfx_v9_program_trap_handler_settings(struct kgd_dev *kgd, 882 + uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) 883 + { 884 + struct amdgpu_device *adev = get_amdgpu_device(kgd); 885 + 886 + lock_srbm(kgd, 0, 0, 0, vmid); 887 + 888 + /* 889 + * Program TBA registers 890 + */ 891 + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO), 892 + lower_32_bits(tba_addr >> 8)); 893 + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI), 894 + upper_32_bits(tba_addr >> 8)); 895 + 896 + /* 897 + * Program TMA registers 898 + */ 899 + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO), 900 + lower_32_bits(tma_addr >> 8)); 901 + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI), 902 + upper_32_bits(tma_addr >> 8)); 903 + 904 + unlock_srbm(kgd); 905 + } 906 + 885 907 const struct kfd2kgd_calls gfx_v9_kfd2kgd = { 886 908 .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, 887 909 .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, ··· 929 899 kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, 930 900 .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, 931 901 .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, 902 + .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, 932 903 };
+19 -2
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
··· 211 211 WARN_ON(!old); 212 212 } 213 213 214 + static void program_trap_handler_settings(struct device_queue_manager *dqm, 215 + struct qcm_process_device *qpd) 216 + { 217 + if (dqm->dev->kfd2kgd->program_trap_handler_settings) 218 + dqm->dev->kfd2kgd->program_trap_handler_settings( 219 + dqm->dev->kgd, qpd->vmid, 220 + qpd->tba_addr, qpd->tma_addr); 221 + } 222 + 214 223 static int allocate_vmid(struct device_queue_manager *dqm, 215 224 struct qcm_process_device *qpd, 216 225 struct queue *q) ··· 249 240 q->properties.vmid = allocated_vmid; 250 241 251 242 program_sh_mem_settings(dqm, qpd); 243 + 244 + if (dqm->dev->device_info->asic_family >= CHIP_VEGA10 && 245 + dqm->dev->cwsr_enabled) 246 + program_trap_handler_settings(dqm, qpd); 252 247 253 248 /* qpd->page_table_base is set earlier when register_process() 254 249 * is called, i.e. when the first queue is created. ··· 595 582 } 596 583 597 584 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 598 - KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, 585 + (dqm->dev->cwsr_enabled? 586 + KFD_PREEMPT_TYPE_WAVEFRONT_SAVE: 587 + KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 599 588 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 600 589 if (retval) { 601 590 pr_err("destroy mqd failed\n"); ··· 690 675 continue; 691 676 692 677 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 693 - KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, 678 + (dqm->dev->cwsr_enabled? 679 + KFD_PREEMPT_TYPE_WAVEFRONT_SAVE: 680 + KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 694 681 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 695 682 if (retval && !ret) 696 683 /* Return the first error, but keep going to
+3
drivers/gpu/drm/amd/include/kgd_kfd_interface.h
··· 44 44 enum kfd_preempt_type { 45 45 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN = 0, 46 46 KFD_PREEMPT_TYPE_WAVEFRONT_RESET, 47 + KFD_PREEMPT_TYPE_WAVEFRONT_SAVE 47 48 }; 48 49 49 50 struct kfd_vm_fault_info { ··· 299 298 300 299 void (*get_cu_occupancy)(struct kgd_dev *kgd, int pasid, int *wave_cnt, 301 300 int *max_waves_per_cu); 301 + void (*program_trap_handler_settings)(struct kgd_dev *kgd, 302 + uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr); 302 303 }; 303 304 304 305 #endif /* KGD_KFD_INTERFACE_H_INCLUDED */