Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdkfd: Eliminate get_atc_vmid_pasid_mapping_valid

get_atc_vmid_pasid_mapping_valid() is very similar to
get_atc_vmid_pasid_mapping_pasid(), so they can be merged into a new
function get_atc_vmid_pasid_mapping_info() to reduce register access
times. More importantly, getting the PASID and the valid bit atomically
with a single read fixes some potential race conditions where the
mapping changes between the two reads.

Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Yong Zhao and committed by
Alex Deucher
56fc40ab 3fe023d4

+76 -122
+2 -4
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
··· 278 278 .address_watch_execute = kgd_gfx_v9_address_watch_execute, 279 279 .wave_control_execute = kgd_gfx_v9_wave_control_execute, 280 280 .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, 281 - .get_atc_vmid_pasid_mapping_pasid = 282 - kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid, 283 - .get_atc_vmid_pasid_mapping_valid = 284 - kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid, 281 + .get_atc_vmid_pasid_mapping_info = 282 + kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, 285 283 .get_tile_config = kgd_gfx_v9_get_tile_config, 286 284 .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, 287 285 .invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs,
+19 -30
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
··· 98 98 unsigned int watch_point_id, 99 99 unsigned int reg_offset); 100 100 101 - static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, 102 - uint8_t vmid); 103 - static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, 104 - uint8_t vmid); 101 + static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, 102 + uint8_t vmid, uint16_t *p_pasid); 105 103 static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, 106 104 uint64_t page_table_base); 107 105 static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); ··· 153 155 .address_watch_execute = kgd_address_watch_execute, 154 156 .wave_control_execute = kgd_wave_control_execute, 155 157 .address_watch_get_offset = kgd_address_watch_get_offset, 156 - .get_atc_vmid_pasid_mapping_pasid = 157 - get_atc_vmid_pasid_mapping_pasid, 158 - .get_atc_vmid_pasid_mapping_valid = 159 - get_atc_vmid_pasid_mapping_valid, 158 + .get_atc_vmid_pasid_mapping_info = 159 + get_atc_vmid_pasid_mapping_info, 160 160 .get_tile_config = amdgpu_amdkfd_get_tile_config, 161 161 .set_vm_context_page_table_base = set_vm_context_page_table_base, 162 162 .invalidate_tlbs = invalidate_tlbs, ··· 771 775 return 0; 772 776 } 773 777 774 - static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, 775 - uint8_t vmid) 778 + static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, 779 + uint8_t vmid, uint16_t *p_pasid) 776 780 { 777 - uint32_t reg; 781 + uint32_t value; 778 782 struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 779 783 780 - reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) 784 + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) 781 785 + vmid); 782 - return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; 783 - } 786 + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; 784 787 785 - static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, 786 - uint8_t vmid) 787 - { 788 - uint32_t reg; 789 - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 790 - 791 - reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) 792 - + vmid); 793 - return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; 788 + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); 794 789 } 795 790 796 791 static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) ··· 813 826 { 814 827 struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 815 828 int vmid; 829 + uint16_t queried_pasid; 830 + bool ret; 816 831 struct amdgpu_ring *ring = &adev->gfx.kiq.ring; 817 832 818 833 if (amdgpu_emu_mode == 0 && ring->sched.ready) ··· 823 834 for (vmid = 0; vmid < 16; vmid++) { 824 835 if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) 825 836 continue; 826 - if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { 827 - if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid) 828 - == pasid) { 829 - amdgpu_gmc_flush_gpu_tlb(adev, vmid, 830 - AMDGPU_GFXHUB_0, 0); 831 - break; 832 - } 837 + 838 + ret = get_atc_vmid_pasid_mapping_info(kgd, vmid, 839 + &queried_pasid); 840 + if (ret && queried_pasid == pasid) { 841 + amdgpu_gmc_flush_gpu_tlb(adev, vmid, 842 + AMDGPU_GFXHUB_0, 0); 843 + break; 833 844 } 834 845 } 835 846
+9 -19
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
··· 133 133 unsigned int watch_point_id, 134 134 unsigned int reg_offset); 135 135 136 - static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid); 137 - static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, 138 - uint8_t vmid); 136 + static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, 137 + uint8_t vmid, uint16_t *p_pasid); 139 138 140 139 static void set_scratch_backing_va(struct kgd_dev *kgd, 141 140 uint64_t va, uint32_t vmid); ··· 185 186 .address_watch_execute = kgd_address_watch_execute, 186 187 .wave_control_execute = kgd_wave_control_execute, 187 188 .address_watch_get_offset = kgd_address_watch_get_offset, 188 - .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid, 189 - .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, 189 + .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info, 190 190 .set_scratch_backing_va = set_scratch_backing_va, 191 191 .get_tile_config = get_tile_config, 192 192 .set_vm_context_page_table_base = set_vm_context_page_table_base, ··· 751 753 return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset]; 752 754 } 753 755 754 - static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, 755 - uint8_t vmid) 756 + static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, 757 + uint8_t vmid, uint16_t *p_pasid) 756 758 { 757 - uint32_t reg; 759 + uint32_t value; 758 760 struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 759 761 760 - reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); 761 - return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; 762 - } 762 + value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); 763 + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; 763 764 764 - static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, 765 - uint8_t vmid) 766 - { 767 - uint32_t reg; 768 - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 769 - 770 - reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); 771 - return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; 765 + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); 772 766 } 773 767 774 768 static void set_scratch_backing_va(struct kgd_dev *kgd,
+10 -22
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
··· 89 89 unsigned int watch_point_id, 90 90 unsigned int reg_offset); 91 91 92 - static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, 93 - uint8_t vmid); 94 - static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, 95 - uint8_t vmid); 92 + static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, 93 + uint8_t vmid, uint16_t *p_pasid); 96 94 static void set_scratch_backing_va(struct kgd_dev *kgd, 97 95 uint64_t va, uint32_t vmid); 98 96 static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, ··· 139 141 .address_watch_execute = kgd_address_watch_execute, 140 142 .wave_control_execute = kgd_wave_control_execute, 141 143 .address_watch_get_offset = kgd_address_watch_get_offset, 142 - .get_atc_vmid_pasid_mapping_pasid = 143 - get_atc_vmid_pasid_mapping_pasid, 144 - .get_atc_vmid_pasid_mapping_valid = 145 - get_atc_vmid_pasid_mapping_valid, 144 + .get_atc_vmid_pasid_mapping_info = 145 + get_atc_vmid_pasid_mapping_info, 146 146 .set_scratch_backing_va = set_scratch_backing_va, 147 147 .get_tile_config = get_tile_config, 148 148 .set_vm_context_page_table_base = set_vm_context_page_table_base, ··· 663 667 return 0; 664 668 } 665 669 666 - static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, 667 - uint8_t vmid) 670 + static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, 671 + uint8_t vmid, uint16_t *p_pasid) 668 672 { 669 - uint32_t reg; 673 + uint32_t value; 670 674 struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 671 675 672 - reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); 673 - return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; 674 - } 676 + value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); 677 + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; 675 678 676 - static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, 677 - uint8_t vmid) 678 - { 679 - uint32_t reg; 680 - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 681 - 682 - reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); 683 - return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; 679 + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); 684 680 } 685 681 686 682 static int kgd_address_watch_disable(struct kgd_dev *kgd)
+18 -27
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
··· 612 612 return 0; 613 613 } 614 614 615 - bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, 616 - uint8_t vmid) 615 + bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, 616 + uint8_t vmid, uint16_t *p_pasid) 617 617 { 618 - uint32_t reg; 618 + uint32_t value; 619 619 struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 620 620 621 - reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) 621 + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) 622 622 + vmid); 623 - return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; 624 - } 623 + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; 625 624 626 - uint16_t kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, 627 - uint8_t vmid) 628 - { 629 - uint32_t reg; 630 - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 631 - 632 - reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) 633 - + vmid); 634 - return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; 625 + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); 635 626 } 636 627 637 628 static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid, ··· 657 666 { 658 667 struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 659 668 int vmid, i; 669 + uint16_t queried_pasid; 670 + bool ret; 660 671 struct amdgpu_ring *ring = &adev->gfx.kiq.ring; 661 672 uint32_t flush_type = 0; 662 673 ··· 674 681 for (vmid = 0; vmid < 16; vmid++) { 675 682 if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) 676 683 continue; 677 - if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { 678 - if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(kgd, vmid) 679 - == pasid) { 680 - for (i = 0; i < adev->num_vmhubs; i++) 681 - amdgpu_gmc_flush_gpu_tlb(adev, vmid, 682 - i, flush_type); 683 - break; 684 - } 684 + 685 + ret = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(kgd, vmid, 686 + &queried_pasid); 687 + if (ret && queried_pasid == pasid) { 688 + for (i = 0; i < adev->num_vmhubs; i++) 689 + amdgpu_gmc_flush_gpu_tlb(adev, vmid, 690 + i, flush_type); 691 + break; 685 692 } 686 693 } 687 694 ··· 806 813 .address_watch_execute = kgd_gfx_v9_address_watch_execute, 807 814 .wave_control_execute = kgd_gfx_v9_wave_control_execute, 808 815 .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, 809 - .get_atc_vmid_pasid_mapping_pasid = 810 - kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid, 811 - .get_atc_vmid_pasid_mapping_valid = 812 - kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid, 816 + .get_atc_vmid_pasid_mapping_info = 817 + kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, 813 818 .get_tile_config = kgd_gfx_v9_get_tile_config, 814 819 .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, 815 820 .invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs,
+2 -4
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
··· 55 55 unsigned int watch_point_id, 56 56 unsigned int reg_offset); 57 57 58 - bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, 59 - uint8_t vmid); 60 - uint16_t kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, 61 - uint8_t vmid); 58 + bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, 59 + uint8_t vmid, uint16_t *p_pasid); 62 60 void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, 63 61 uint64_t page_table_base); 64 62 int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
+5 -3
drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
··· 33 33 const struct cik_ih_ring_entry *ihre = 34 34 (const struct cik_ih_ring_entry *)ih_ring_entry; 35 35 const struct kfd2kgd_calls *f2g = dev->kfd2kgd; 36 - unsigned int vmid, pasid; 36 + unsigned int vmid; 37 + uint16_t pasid; 38 + bool ret; 37 39 38 40 /* This workaround is due to HW/FW limitation on Hawaii that 39 41 * VMID and PASID are not written into ih_ring_entry ··· 50 48 *tmp_ihre = *ihre; 51 49 52 50 vmid = f2g->read_vmid_from_vmfault_reg(dev->kgd); 53 - pasid = f2g->get_atc_vmid_pasid_mapping_pasid(dev->kgd, vmid); 51 + ret = f2g->get_atc_vmid_pasid_mapping_info(dev->kgd, vmid, &pasid); 54 52 55 53 tmp_ihre->ring_id &= 0x000000ff; 56 54 tmp_ihre->ring_id |= vmid << 8; 57 55 tmp_ihre->ring_id |= pasid << 16; 58 56 59 - return (pasid != 0) && 57 + return ret && (pasid != 0) && 60 58 vmid >= dev->vm_info.first_vmid_kfd && 61 59 vmid <= dev->vm_info.last_vmid_kfd; 62 60 }
+8 -8
drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
··· 761 761 { 762 762 int status = 0; 763 763 unsigned int vmid; 764 + uint16_t queried_pasid; 764 765 union SQ_CMD_BITS reg_sq_cmd; 765 766 union GRBM_GFX_INDEX_BITS reg_gfx_index; 766 767 struct kfd_process_device *pdd; ··· 783 782 */ 784 783 785 784 for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { 786 - if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid 787 - (dev->kgd, vmid)) { 788 - if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid 789 - (dev->kgd, vmid) == p->pasid) { 790 - pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n", 791 - vmid, p->pasid); 792 - break; 793 - } 785 + status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info 786 + (dev->kgd, vmid, &queried_pasid); 787 + 788 + if (status && queried_pasid == p->pasid) { 789 + pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n", 790 + vmid, p->pasid); 791 + break; 794 792 } 795 793 } 796 794
+3 -5
drivers/gpu/drm/amd/include/kgd_kfd_interface.h
··· 291 291 uint32_t (*address_watch_get_offset)(struct kgd_dev *kgd, 292 292 unsigned int watch_point_id, 293 293 unsigned int reg_offset); 294 - bool (*get_atc_vmid_pasid_mapping_valid)( 294 + bool (*get_atc_vmid_pasid_mapping_info)( 295 295 struct kgd_dev *kgd, 296 - uint8_t vmid); 297 - uint16_t (*get_atc_vmid_pasid_mapping_pasid)( 298 - struct kgd_dev *kgd, 299 - uint8_t vmid); 296 + uint8_t vmid, 297 + uint16_t *p_pasid); 300 298 301 299 /* No longer needed from GFXv9 onward. The scratch base address is 302 300 * passed to the shader by the CP. It's the user mode driver's