Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/imagination: Use cached memory with dma_coherent

The TI k3-j721s2 platform does not allow us to use uncached memory
(which is what the driver currently does) without disabling cache snooping
on the AXI ACE-Lite interface, which would be too much of a performance
hit.

Given the platform is dma-coherent, we can simply force all
device-accessible memory allocations through the CPU cache. In fact, this
can be done whenever the dma_coherent attribute is present.

Reviewed-by: Frank Binns <frank.binns@imgtec.com>
Link: https://lore.kernel.org/r/20250410-sets-bxs-4-64-patch-v1-v6-15-eda620c5865f@imgtec.com
Signed-off-by: Matt Coster <matt.coster@imgtec.com>

+18 -6
+7 -3
drivers/gpu/drm/imagination/pvr_gem.c
··· 19 19 #include <linux/log2.h> 20 20 #include <linux/mutex.h> 21 21 #include <linux/pagemap.h> 22 + #include <linux/property.h> 22 23 #include <linux/refcount.h> 23 24 #include <linux/scatterlist.h> 24 25 ··· 335 334 struct pvr_gem_object * 336 335 pvr_gem_object_create(struct pvr_device *pvr_dev, size_t size, u64 flags) 337 336 { 337 + struct drm_device *drm_dev = from_pvr_device(pvr_dev); 338 338 struct drm_gem_shmem_object *shmem_obj; 339 339 struct pvr_gem_object *pvr_obj; 340 340 struct sg_table *sgt; ··· 345 343 if (size == 0 || !pvr_gem_object_flags_validate(flags)) 346 344 return ERR_PTR(-EINVAL); 347 345 348 - shmem_obj = drm_gem_shmem_create(from_pvr_device(pvr_dev), size); 346 + if (device_get_dma_attr(drm_dev->dev) == DEV_DMA_COHERENT) 347 + flags |= PVR_BO_CPU_CACHED; 348 + 349 + shmem_obj = drm_gem_shmem_create(drm_dev, size); 349 350 if (IS_ERR(shmem_obj)) 350 351 return ERR_CAST(shmem_obj); 351 352 ··· 363 358 goto err_shmem_object_free; 364 359 } 365 360 366 - dma_sync_sgtable_for_device(shmem_obj->base.dev->dev, sgt, 367 - DMA_BIDIRECTIONAL); 361 + dma_sync_sgtable_for_device(drm_dev->dev, sgt, DMA_BIDIRECTIONAL); 368 362 369 363 /* 370 364 * Do this last because pvr_gem_object_zero() requires a fully
+4 -2
drivers/gpu/drm/imagination/pvr_gem.h
··· 44 44 * Bits not defined anywhere are "undefined". 45 45 * 46 46 * CPU mapping options 47 - * :PVR_BO_CPU_CACHED: By default, all GEM objects are mapped write-combined on the CPU. Set this 48 - * flag to override this behaviour and map the object cached. 47 + * :PVR_BO_CPU_CACHED: By default, all GEM objects are mapped write-combined on the CPU. Set 48 + * this flag to override this behaviour and map the object cached. If the dma_coherent 49 + * property is present in devicetree, all allocations will be mapped as if this flag was set. 50 + * This does not require any additional consideration at allocation time. 49 51 * 50 52 * Firmware options 51 53 * :PVR_BO_FW_NO_CLEAR_ON_RESET: By default, all FW objects are cleared and reinitialised on hard
+7 -1
drivers/gpu/drm/imagination/pvr_mmu.c
··· 17 17 #include <linux/dma-mapping.h> 18 18 #include <linux/kmemleak.h> 19 19 #include <linux/minmax.h> 20 + #include <linux/property.h> 20 21 #include <linux/sizes.h> 21 22 22 23 #define PVR_SHIFT_FROM_SIZE(size_) (__builtin_ctzll(size_)) ··· 260 259 struct device *dev = from_pvr_device(pvr_dev)->dev; 261 260 262 261 struct page *raw_page; 262 + pgprot_t prot; 263 263 int err; 264 264 265 265 dma_addr_t dma_addr; ··· 270 268 if (!raw_page) 271 269 return -ENOMEM; 272 270 273 - host_ptr = vmap(&raw_page, 1, VM_MAP, pgprot_writecombine(PAGE_KERNEL)); 271 + prot = PAGE_KERNEL; 272 + if (device_get_dma_attr(dev) != DEV_DMA_COHERENT) 273 + prot = pgprot_writecombine(prot); 274 + 275 + host_ptr = vmap(&raw_page, 1, VM_MAP, prot); 274 276 if (!host_ptr) { 275 277 err = -ENOMEM; 276 278 goto err_free_page;