Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu/gmc9: Adjust GART and AGP location with xgmi offset (v2)

On hives with xgmi enabled, the fb_location aperture is a size
which defines the total framebuffer size of all nodes in the
hive. Each GPU in the hive has the same view via the fb_location
aperture. GPU0 starts at offset (0 * segment size),
GPU1 starts at offset (1 * segment size), etc.

For access to local vram on each GPU, we need to take this offset into
account. This including on setting up GPUVM page table and GART table

v2: squash in "drm/amdgpu: Init correct fb region for none XGMI configuration"

Acked-by: Huang Rui <ray.huang@amd.com>
Acked-by: Slava Abramov <slava.abramov@amd.com>
Signed-off-by: Shaoyun Liu <Shaoyun.Liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Huang Rui <ray.huang@amd.com>

+39 -10
+15 -10
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
··· 121 121 mc->vram_end = mc->vram_start + mc->mc_vram_size - 1; 122 122 if (limit && limit < mc->real_vram_size) 123 123 mc->real_vram_size = limit; 124 + 125 + if (mc->xgmi.num_physical_nodes == 0) { 126 + mc->fb_start = mc->vram_start; 127 + mc->fb_end = mc->vram_end; 128 + } 124 129 dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n", 125 130 mc->mc_vram_size >> 20, mc->vram_start, 126 131 mc->vram_end, mc->real_vram_size >> 20); ··· 152 147 /* VCE doesn't like it when BOs cross a 4GB segment, so align 153 148 * the GART base on a 4GB boundary as well. 154 149 */ 155 - size_bf = mc->vram_start; 156 - size_af = adev->gmc.mc_mask + 1 - ALIGN(mc->vram_end + 1, four_gb); 150 + size_bf = mc->fb_start; 151 + size_af = adev->gmc.mc_mask + 1 - ALIGN(mc->fb_end + 1, four_gb); 157 152 158 153 if (mc->gart_size > max(size_bf, size_af)) { 159 154 dev_warn(adev->dev, "limiting GART\n"); ··· 189 184 const uint64_t sixteen_gb_mask = ~(sixteen_gb - 1); 190 185 u64 size_af, size_bf; 191 186 192 - if (mc->vram_start > mc->gart_start) { 193 - size_bf = (mc->vram_start & sixteen_gb_mask) - 187 + if (mc->fb_start > mc->gart_start) { 188 + size_bf = (mc->fb_start & sixteen_gb_mask) - 194 189 ALIGN(mc->gart_end + 1, sixteen_gb); 195 - size_af = mc->mc_mask + 1 - ALIGN(mc->vram_end + 1, sixteen_gb); 190 + size_af = mc->mc_mask + 1 - ALIGN(mc->fb_end + 1, sixteen_gb); 196 191 } else { 197 - size_bf = mc->vram_start & sixteen_gb_mask; 192 + size_bf = mc->fb_start & sixteen_gb_mask; 198 193 size_af = (mc->gart_start & sixteen_gb_mask) - 199 - ALIGN(mc->vram_end + 1, sixteen_gb); 194 + ALIGN(mc->fb_end + 1, sixteen_gb); 200 195 } 201 196 202 197 if (size_bf > size_af) { 203 - mc->agp_start = mc->vram_start > mc->gart_start ? 198 + mc->agp_start = mc->fb_start > mc->gart_start ? 204 199 mc->gart_end + 1 : 0; 205 200 mc->agp_size = size_bf; 206 201 } else { 207 - mc->agp_start = (mc->vram_start > mc->gart_start ? 208 - mc->vram_end : mc->gart_end) + 1, 202 + mc->agp_start = (mc->fb_start > mc->gart_start ? 203 + mc->fb_end : mc->gart_end) + 1, 209 204 mc->agp_size = size_af; 210 205 } 211 206
+8
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
··· 114 114 u64 gart_end; 115 115 u64 vram_start; 116 116 u64 vram_end; 117 + /* FB region , it's same as local vram region in single GPU, in XGMI 118 + * configuration, this region covers all GPUs in the same hive , 119 + * each GPU in the hive has the same view of this FB region . 120 + * GPU0's vram starts at offset (0 * segment size) , 121 + * GPU1 starts at offset (1 * segment size), etc. 122 + */ 123 + u64 fb_start; 124 + u64 fb_end; 117 125 unsigned vram_width; 118 126 u64 real_vram_size; 119 127 int vram_mtrr;
+3
drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c
··· 44 44 REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_LFB_REGION); 45 45 if (adev->gmc.xgmi.physical_node_id > 3) 46 46 return -EINVAL; 47 + adev->gmc.xgmi.node_segment_size = REG_GET_FIELD( 48 + RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_SIZE), 49 + MC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24; 47 50 } 48 51 49 52 return 0;
+6
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
··· 771 771 u64 base = 0; 772 772 if (!amdgpu_sriov_vf(adev)) 773 773 base = mmhub_v1_0_get_fb_location(adev); 774 + /* add the xgmi offset of the physical node */ 775 + base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; 774 776 amdgpu_gmc_vram_location(adev, &adev->gmc, base); 775 777 amdgpu_gmc_gart_location(adev, mc); 776 778 if (!amdgpu_sriov_vf(adev)) 777 779 amdgpu_gmc_agp_location(adev, mc); 778 780 /* base offset of vram pages */ 779 781 adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev); 782 + 783 + /* XXX: add the xgmi offset of the physical node? */ 784 + adev->vm_manager.vram_base_offset += 785 + adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; 780 786 } 781 787 782 788 /**
+7
drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
··· 38 38 u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev) 39 39 { 40 40 u64 base = RREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE); 41 + u64 top = RREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_TOP); 41 42 42 43 base &= MC_VM_FB_LOCATION_BASE__FB_BASE_MASK; 43 44 base <<= 24; 45 + 46 + top &= MC_VM_FB_LOCATION_TOP__FB_TOP_MASK; 47 + top <<= 24; 48 + 49 + adev->gmc.fb_start = base; 50 + adev->gmc.fb_end = top; 44 51 45 52 return base; 46 53 }