Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: use TTM_PL_FLAG_CONTIGUOUS v2

Implement AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS using TTM_PL_FLAG_CONTIGUOUS
instead of a placement limit. That allows us to better handle CPU
accessible placements.

v2: prevent virtual BO start address from overflowing

Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Michel Dänzer <michel.daenzer@amd.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Christian König and committed by
Alex Deucher
89bb5752 f75e237c

+18 -10
+5 -6
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
··· 122 122 123 123 if (domain & AMDGPU_GEM_DOMAIN_VRAM) { 124 124 unsigned visible_pfn = adev->mc.visible_vram_size >> PAGE_SHIFT; 125 - unsigned lpfn = 0; 126 - 127 - /* This forces a reallocation if the flag wasn't set before */ 128 - if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) 129 - lpfn = adev->mc.real_vram_size >> PAGE_SHIFT; 130 125 131 126 places[c].fpfn = 0; 132 - places[c].lpfn = lpfn; 127 + places[c].lpfn = 0; 133 128 places[c].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | 134 129 TTM_PL_FLAG_VRAM; 130 + 135 131 if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) 136 132 places[c].lpfn = visible_pfn; 137 133 else 138 134 places[c].flags |= TTM_PL_FLAG_TOPDOWN; 135 + 136 + if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) 137 + places[c].flags |= TTM_PL_FLAG_CONTIGUOUS; 139 138 c++; 140 139 } 141 140
+13 -4
drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
··· 93 93 const struct ttm_place *place, 94 94 struct ttm_mem_reg *mem) 95 95 { 96 - struct amdgpu_bo *bo = container_of(tbo, struct amdgpu_bo, tbo); 97 96 struct amdgpu_vram_mgr *mgr = man->priv; 98 97 struct drm_mm *mm = &mgr->mm; 99 98 struct drm_mm_node *nodes; ··· 105 106 if (!lpfn) 106 107 lpfn = man->size; 107 108 108 - if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS || 109 - place->lpfn || amdgpu_vram_page_split == -1) { 109 + if (place->flags & TTM_PL_FLAG_CONTIGUOUS || 110 + amdgpu_vram_page_split == -1) { 110 111 pages_per_node = ~0ul; 111 112 num_nodes = 1; 112 113 } else { ··· 123 124 if (place->flags & TTM_PL_FLAG_TOPDOWN) 124 125 mode = DRM_MM_INSERT_HIGH; 125 126 127 + mem->start = 0; 126 128 pages_left = mem->num_pages; 127 129 128 130 spin_lock(&mgr->lock); 129 131 for (i = 0; i < num_nodes; ++i) { 130 132 unsigned long pages = min(pages_left, pages_per_node); 131 133 uint32_t alignment = mem->page_alignment; 134 + unsigned long start; 132 135 133 136 if (pages == pages_per_node) 134 137 alignment = pages_per_node; ··· 142 141 if (unlikely(r)) 143 142 goto error; 144 143 144 + /* Calculate a virtual BO start address to easily check if 145 + * everything is CPU accessible. 146 + */ 147 + start = nodes[i].start + nodes[i].size; 148 + if (start > mem->num_pages) 149 + start -= mem->num_pages; 150 + else 151 + start = 0; 152 + mem->start = max(mem->start, start); 145 153 pages_left -= pages; 146 154 } 147 155 spin_unlock(&mgr->lock); 148 156 149 - mem->start = num_nodes == 1 ? nodes[0].start : AMDGPU_BO_INVALID_OFFSET; 150 157 mem->mm_node = nodes; 151 158 152 159 return 0;