drm/msm: Support pgtable preallocation · tjh.dev/kernel@e601ea3

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

drm/msm: Support pgtable preallocation

Introduce a mechanism to count the worst case # of pages required in a
VM_BIND op.

Note that previously we would have had to somehow account for
allocations in unmap, when splitting a block. This behavior was removed
in commit 33729a5fc0ca ("iommu/io-pgtable-arm: Remove split on unmap
behavior)"

Signed-off-by: Rob Clark <robdclark@chromium.org>
Signed-off-by: Rob Clark <robin.clark@oss.qualcomm.com>
Tested-by: Antonino Maniscalco <antomani103@gmail.com>
Reviewed-by: Antonino Maniscalco <antomani103@gmail.com>
Patchwork: https://patchwork.freedesktop.org/patch/661515/

authored by

Rob Clark and committed by

Rob Clark 9 months ago e601ea31 2b93efeb

+225 -1

3 changed files

expand all

drivers

gpu

drm

msm

msm_gem.h

msm_iommu.c

msm_mmu.h

drivers/gpu/drm/msm/msm_gem.h

··· 7 7 #ifndef __MSM_GEM_H__ 8 8 #define __MSM_GEM_H__ 9 9 10 + #include "msm_mmu.h" 10 11 #include <linux/kref.h> 11 12 #include <linux/dma-resv.h> 12 13 #include "drm/drm_exec.h"

+190 -1

drivers/gpu/drm/msm/msm_iommu.c

··· 6 6 7 7 #include <linux/adreno-smmu-priv.h> 8 8 #include <linux/io-pgtable.h> 9 + #include <linux/kmemleak.h> 9 10 #include "msm_drv.h" 10 11 #include "msm_mmu.h" 11 12 ··· 15 14 struct iommu_domain *domain; 16 15 atomic_t pagetables; 17 16 struct page *prr_page; 17 + 18 + struct kmem_cache *pt_cache; 18 19 }; 19 20 20 21 #define to_msm_iommu(x) container_of(x, struct msm_iommu, base) ··· 30 27 unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */ 31 28 phys_addr_t ttbr; 32 29 u32 asid; 30 + 31 + /** @root_page_table: Stores the root page table pointer. */ 32 + void *root_page_table; 33 33 }; 34 34 static struct msm_iommu_pagetable *to_pagetable(struct msm_mmu *mmu) 35 35 { ··· 288 282 return 0; 289 283 } 290 284 285 + static void 286 + msm_iommu_pagetable_prealloc_count(struct msm_mmu *mmu, struct msm_mmu_prealloc *p, 287 + uint64_t iova, size_t len) 288 + { 289 + u64 pt_count; 290 + 291 + /* 292 + * L1, L2 and L3 page tables. 293 + * 294 + * We could optimize L3 allocation by iterating over the sgt and merging 295 + * 2M contiguous blocks, but it's simpler to over-provision and return 296 + * the pages if they're not used. 297 + * 298 + * The first level descriptor (v8 / v7-lpae page table format) encodes 299 + * 30 bits of address. The second level encodes 29. For the 3rd it is 300 + * 39. 301 + * 302 + * https://developer.arm.com/documentation/ddi0406/c/System-Level-Architecture/Virtual-Memory-System-Architecture--VMSA-/Long-descriptor-translation-table-format/Long-descriptor-translation-table-format-descriptors?lang=en#BEIHEFFB 303 + */ 304 + pt_count = ((ALIGN(iova + len, 1ull << 39) - ALIGN_DOWN(iova, 1ull << 39)) >> 39) + 305 + ((ALIGN(iova + len, 1ull << 30) - ALIGN_DOWN(iova, 1ull << 30)) >> 30) + 306 + ((ALIGN(iova + len, 1ull << 21) - ALIGN_DOWN(iova, 1ull << 21)) >> 21); 307 + 308 + p->count += pt_count; 309 + } 310 + 311 + static struct kmem_cache * 312 + get_pt_cache(struct msm_mmu *mmu) 313 + { 314 + struct msm_iommu_pagetable *pagetable = to_pagetable(mmu); 315 + return to_msm_iommu(pagetable->parent)->pt_cache; 316 + } 317 + 318 + static int 319 + msm_iommu_pagetable_prealloc_allocate(struct msm_mmu *mmu, struct msm_mmu_prealloc *p) 320 + { 321 + struct kmem_cache *pt_cache = get_pt_cache(mmu); 322 + int ret; 323 + 324 + p->pages = kvmalloc_array(p->count, sizeof(p->pages), GFP_KERNEL); 325 + if (!p->pages) 326 + return -ENOMEM; 327 + 328 + ret = kmem_cache_alloc_bulk(pt_cache, GFP_KERNEL, p->count, p->pages); 329 + if (ret != p->count) { 330 + p->count = ret; 331 + return -ENOMEM; 332 + } 333 + 334 + return 0; 335 + } 336 + 337 + static void 338 + msm_iommu_pagetable_prealloc_cleanup(struct msm_mmu *mmu, struct msm_mmu_prealloc *p) 339 + { 340 + struct kmem_cache *pt_cache = get_pt_cache(mmu); 341 + uint32_t remaining_pt_count = p->count - p->ptr; 342 + 343 + kmem_cache_free_bulk(pt_cache, remaining_pt_count, &p->pages[p->ptr]); 344 + kvfree(p->pages); 345 + } 346 + 347 + /** 348 + * alloc_pt() - Custom page table allocator 349 + * @cookie: Cookie passed at page table allocation time. 350 + * @size: Size of the page table. This size should be fixed, 351 + * and determined at creation time based on the granule size. 352 + * @gfp: GFP flags. 353 + * 354 + * We want a custom allocator so we can use a cache for page table 355 + * allocations and amortize the cost of the over-reservation that's 356 + * done to allow asynchronous VM operations. 357 + * 358 + * Return: non-NULL on success, NULL if the allocation failed for any 359 + * reason. 360 + */ 361 + static void * 362 + msm_iommu_pagetable_alloc_pt(void *cookie, size_t size, gfp_t gfp) 363 + { 364 + struct msm_iommu_pagetable *pagetable = cookie; 365 + struct msm_mmu_prealloc *p = pagetable->base.prealloc; 366 + void *page; 367 + 368 + /* Allocation of the root page table happening during init. */ 369 + if (unlikely(!pagetable->root_page_table)) { 370 + struct page *p; 371 + 372 + p = alloc_pages_node(dev_to_node(pagetable->iommu_dev), 373 + gfp | __GFP_ZERO, get_order(size)); 374 + page = p ? page_address(p) : NULL; 375 + pagetable->root_page_table = page; 376 + return page; 377 + } 378 + 379 + if (WARN_ON(!p) || WARN_ON(p->ptr >= p->count)) 380 + return NULL; 381 + 382 + page = p->pages[p->ptr++]; 383 + memset(page, 0, size); 384 + 385 + /* 386 + * Page table entries don't use virtual addresses, which trips out 387 + * kmemleak. kmemleak_alloc_phys() might work, but physical addresses 388 + * are mixed with other fields, and I fear kmemleak won't detect that 389 + * either. 390 + * 391 + * Let's just ignore memory passed to the page-table driver for now. 392 + */ 393 + kmemleak_ignore(page); 394 + 395 + return page; 396 + } 397 + 398 + 399 + /** 400 + * free_pt() - Custom page table free function 401 + * @cookie: Cookie passed at page table allocation time. 402 + * @data: Page table to free. 403 + * @size: Size of the page table. This size should be fixed, 404 + * and determined at creation time based on the granule size. 405 + */ 406 + static void 407 + msm_iommu_pagetable_free_pt(void *cookie, void *data, size_t size) 408 + { 409 + struct msm_iommu_pagetable *pagetable = cookie; 410 + 411 + if (unlikely(pagetable->root_page_table == data)) { 412 + free_pages((unsigned long)data, get_order(size)); 413 + pagetable->root_page_table = NULL; 414 + return; 415 + } 416 + 417 + kmem_cache_free(get_pt_cache(&pagetable->base), data); 418 + } 419 + 291 420 static const struct msm_mmu_funcs pagetable_funcs = { 421 + .prealloc_count = msm_iommu_pagetable_prealloc_count, 422 + .prealloc_allocate = msm_iommu_pagetable_prealloc_allocate, 423 + .prealloc_cleanup = msm_iommu_pagetable_prealloc_cleanup, 292 424 .map = msm_iommu_pagetable_map, 293 425 .unmap = msm_iommu_pagetable_unmap, 294 426 .destroy = msm_iommu_pagetable_destroy, ··· 477 333 static int msm_gpu_fault_handler(struct iommu_domain *domain, struct device *dev, 478 334 unsigned long iova, int flags, void *arg); 479 335 336 + static size_t get_tblsz(const struct io_pgtable_cfg *cfg) 337 + { 338 + int pg_shift, bits_per_level; 339 + 340 + pg_shift = __ffs(cfg->pgsize_bitmap); 341 + /* arm_lpae_iopte is u64: */ 342 + bits_per_level = pg_shift - ilog2(sizeof(u64)); 343 + 344 + return sizeof(u64) << bits_per_level; 345 + } 346 + 480 347 struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent, bool kernel_managed) 481 348 { 482 349 struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(parent->dev); ··· 524 369 525 370 if (!kernel_managed) { 526 371 ttbr0_cfg.quirks |= IO_PGTABLE_QUIRK_NO_WARN; 372 + 373 + /* 374 + * With userspace managed VM (aka VM_BIND), we need to pre- 375 + * allocate pages ahead of time for map/unmap operations, 376 + * handing them to io-pgtable via custom alloc/free ops as 377 + * needed: 378 + */ 379 + ttbr0_cfg.alloc = msm_iommu_pagetable_alloc_pt; 380 + ttbr0_cfg.free = msm_iommu_pagetable_free_pt; 381 + 382 + /* 383 + * Restrict to single page granules. Otherwise we may run 384 + * into a situation where userspace wants to unmap/remap 385 + * only a part of a larger block mapping, which is not 386 + * possible without unmapping the entire block. Which in 387 + * turn could cause faults if the GPU is accessing other 388 + * parts of the block mapping. 389 + * 390 + * Note that prior to commit 33729a5fc0ca ("iommu/io-pgtable-arm: 391 + * Remove split on unmap behavior)" this was handled in 392 + * io-pgtable-arm. But this apparently does not work 393 + * correctly on SMMUv3. 394 + */ 395 + WARN_ON(!(ttbr0_cfg.pgsize_bitmap & PAGE_SIZE)); 396 + ttbr0_cfg.pgsize_bitmap = PAGE_SIZE; 527 397 } 528 398 399 + pagetable->iommu_dev = ttbr1_cfg->iommu_dev; 529 400 pagetable->pgtbl_ops = alloc_io_pgtable_ops(ARM_64_LPAE_S1, 530 401 &ttbr0_cfg, pagetable); 531 402 ··· 595 414 /* Needed later for TLB flush */ 596 415 pagetable->parent = parent; 597 416 pagetable->tlb = ttbr1_cfg->tlb; 598 - pagetable->iommu_dev = ttbr1_cfg->iommu_dev; 599 417 pagetable->pgsize_bitmap = ttbr0_cfg.pgsize_bitmap; 600 418 pagetable->ttbr = ttbr0_cfg.arm_lpae_s1_cfg.ttbr; 601 419 ··· 690 510 { 691 511 struct msm_iommu *iommu = to_msm_iommu(mmu); 692 512 iommu_domain_free(iommu->domain); 513 + kmem_cache_destroy(iommu->pt_cache); 693 514 kfree(iommu); 694 515 } 695 516 ··· 764 583 return mmu; 765 584 766 585 iommu = to_msm_iommu(mmu); 586 + if (adreno_smmu && adreno_smmu->cookie) { 587 + const struct io_pgtable_cfg *cfg = 588 + adreno_smmu->get_ttbr1_cfg(adreno_smmu->cookie); 589 + size_t tblsz = get_tblsz(cfg); 590 + 591 + iommu->pt_cache = 592 + kmem_cache_create("msm-mmu-pt", tblsz, tblsz, 0, NULL); 593 + } 767 594 iommu_set_fault_handler(iommu->domain, msm_gpu_fault_handler, iommu); 768 595 769 596 /* Enable stall on iommu fault: */

+34

drivers/gpu/drm/msm/msm_mmu.h

··· 9 9 10 10 #include <linux/iommu.h> 11 11 12 + struct msm_mmu_prealloc; 13 + struct msm_mmu; 14 + struct msm_gpu; 15 + 12 16 struct msm_mmu_funcs { 13 17 void (*detach)(struct msm_mmu *mmu); 18 + void (*prealloc_count)(struct msm_mmu *mmu, struct msm_mmu_prealloc *p, 19 + uint64_t iova, size_t len); 20 + int (*prealloc_allocate)(struct msm_mmu *mmu, struct msm_mmu_prealloc *p); 21 + void (*prealloc_cleanup)(struct msm_mmu *mmu, struct msm_mmu_prealloc *p); 14 22 int (*map)(struct msm_mmu *mmu, uint64_t iova, struct sg_table *sgt, 15 23 size_t off, size_t len, int prot); 16 24 int (*unmap)(struct msm_mmu *mmu, uint64_t iova, size_t len); ··· 32 24 MSM_MMU_IOMMU_PAGETABLE, 33 25 }; 34 26 27 + /** 28 + * struct msm_mmu_prealloc - Tracking for pre-allocated pages for MMU updates. 29 + */ 30 + struct msm_mmu_prealloc { 31 + /** @count: Number of pages reserved. */ 32 + uint32_t count; 33 + /** @ptr: Index of first unused page in @pages */ 34 + uint32_t ptr; 35 + /** 36 + * @pages: Array of pages preallocated for MMU table updates. 37 + * 38 + * After a VM operation, there might be free pages remaining in this 39 + * array (since the amount allocated is a worst-case). These are 40 + * returned to the pt_cache at mmu->prealloc_cleanup(). 41 + */ 42 + void **pages; 43 + }; 44 + 35 45 struct msm_mmu { 36 46 const struct msm_mmu_funcs *funcs; 37 47 struct device *dev; 38 48 int (*handler)(void *arg, unsigned long iova, int flags, void *data); 39 49 void *arg; 40 50 enum msm_mmu_type type; 51 + 52 + /** 53 + * @prealloc: pre-allocated pages for pgtable 54 + * 55 + * Set while a VM_BIND job is running, serialized under 56 + * msm_gem_vm::mmu_lock. 57 + */ 58 + struct msm_mmu_prealloc *prealloc; 41 59 }; 42 60 43 61 static inline void msm_mmu_init(struct msm_mmu *mmu, struct device *dev,