Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/panfrost: Add support for AARCH64_4K page table format

Currently, Panfrost only supports MMU configuration in "LEGACY" (as
Bifrost calls it) mode, a (modified) version of LPAE "Large Physical
Address Extension", which in Linux we've called "mali_lpae".

This commit adds support for conditionally enabling AARCH64_4K page
table format. To achieve that, a "GPU optional quirks" field was added
to `struct panfrost_features` with the related flag.

Note that, in order to enable AARCH64_4K mode, the GPU variant must have
the HW_FEATURE_AARCH64_MMU feature flag present.

Signed-off-by: Ariel D'Alessandro <ariel.dalessandro@collabora.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Adrián Larumbe <adrian.larumbe@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Link: https://lore.kernel.org/r/20250324185801.168664-5-ariel.dalessandro@collabora.com

authored by

Ariel D'Alessandro and committed by
Steven Price
f49dfccc db599be9

+183 -7
+16
drivers/gpu/drm/panfrost/panfrost_device.h
··· 42 42 GPU_PM_VREG_OFF, 43 43 }; 44 44 45 + /** 46 + * enum panfrost_gpu_quirks - GPU optional quirks 47 + * @GPU_QUIRK_FORCE_AARCH64_PGTABLE: Use AARCH64_4K page table format 48 + */ 49 + enum panfrost_gpu_quirks { 50 + GPU_QUIRK_FORCE_AARCH64_PGTABLE, 51 + }; 52 + 45 53 struct panfrost_features { 46 54 u16 id; 47 55 u16 revision; ··· 103 95 104 96 /* Allowed PM features */ 105 97 u8 pm_features; 98 + 99 + /* GPU configuration quirks */ 100 + u8 gpu_quirks; 106 101 }; 107 102 108 103 struct panfrost_device { ··· 173 162 int as; 174 163 atomic_t as_count; 175 164 struct list_head list; 165 + struct { 166 + u64 transtab; 167 + u64 memattr; 168 + u64 transcfg; 169 + } cfg; 176 170 }; 177 171 178 172 struct panfrost_engine_usage {
+133 -7
drivers/gpu/drm/panfrost/panfrost_mmu.c
··· 26 26 #define mmu_write(dev, reg, data) writel(data, dev->iomem + reg) 27 27 #define mmu_read(dev, reg) readl(dev->iomem + reg) 28 28 29 + static u64 mair_to_memattr(u64 mair, bool coherent) 30 + { 31 + u64 memattr = 0; 32 + u32 i; 33 + 34 + for (i = 0; i < 8; i++) { 35 + u8 in_attr = mair >> (8 * i), out_attr; 36 + u8 outer = in_attr >> 4, inner = in_attr & 0xf; 37 + 38 + /* For caching to be enabled, inner and outer caching policy 39 + * have to be both write-back, if one of them is write-through 40 + * or non-cacheable, we just choose non-cacheable. Device 41 + * memory is also translated to non-cacheable. 42 + */ 43 + if (!(outer & 3) || !(outer & 4) || !(inner & 4)) { 44 + out_attr = AS_MEMATTR_AARCH64_INNER_OUTER_NC | 45 + AS_MEMATTR_AARCH64_SH_MIDGARD_INNER | 46 + AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(false, false); 47 + } else { 48 + out_attr = AS_MEMATTR_AARCH64_INNER_OUTER_WB | 49 + AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(inner & 1, inner & 2); 50 + /* Use SH_MIDGARD_INNER mode when device isn't coherent, 51 + * so SH_IS, which is used when IOMMU_CACHE is set, maps 52 + * to Mali's internal-shareable mode. As per the Mali 53 + * Spec, inner and outer-shareable modes aren't allowed 54 + * for WB memory when coherency is disabled. 55 + * Use SH_CPU_INNER mode when coherency is enabled, so 56 + * that SH_IS actually maps to the standard definition of 57 + * inner-shareable. 58 + */ 59 + if (!coherent) 60 + out_attr |= AS_MEMATTR_AARCH64_SH_MIDGARD_INNER; 61 + else 62 + out_attr |= AS_MEMATTR_AARCH64_SH_CPU_INNER; 63 + } 64 + 65 + memattr |= (u64)out_attr << (8 * i); 66 + } 67 + 68 + return memattr; 69 + } 70 + 29 71 static int wait_ready(struct panfrost_device *pfdev, u32 as_nr) 30 72 { 31 73 int ret; ··· 166 124 static void panfrost_mmu_enable(struct panfrost_device *pfdev, struct panfrost_mmu *mmu) 167 125 { 168 126 int as_nr = mmu->as; 169 - struct io_pgtable_cfg *cfg = &mmu->pgtbl_cfg; 170 - u64 transtab = cfg->arm_mali_lpae_cfg.transtab; 171 - u64 memattr = cfg->arm_mali_lpae_cfg.memattr; 127 + u64 transtab = mmu->cfg.transtab; 128 + u64 memattr = mmu->cfg.memattr; 129 + u64 transcfg = mmu->cfg.transcfg; 172 130 173 131 mmu_hw_do_operation_locked(pfdev, as_nr, 0, ~0ULL, AS_COMMAND_FLUSH_MEM); 174 132 ··· 180 138 */ 181 139 mmu_write(pfdev, AS_MEMATTR_LO(as_nr), lower_32_bits(memattr)); 182 140 mmu_write(pfdev, AS_MEMATTR_HI(as_nr), upper_32_bits(memattr)); 141 + 142 + mmu_write(pfdev, AS_TRANSCFG_LO(as_nr), lower_32_bits(transcfg)); 143 + mmu_write(pfdev, AS_TRANSCFG_HI(as_nr), upper_32_bits(transcfg)); 183 144 184 145 write_cmd(pfdev, as_nr, AS_COMMAND_UPDATE); 185 146 } ··· 197 152 mmu_write(pfdev, AS_MEMATTR_LO(as_nr), 0); 198 153 mmu_write(pfdev, AS_MEMATTR_HI(as_nr), 0); 199 154 155 + mmu_write(pfdev, AS_TRANSCFG_LO(as_nr), AS_TRANSCFG_ADRMODE_UNMAPPED); 156 + mmu_write(pfdev, AS_TRANSCFG_HI(as_nr), 0); 157 + 200 158 write_cmd(pfdev, as_nr, AS_COMMAND_UPDATE); 159 + } 160 + 161 + static int mmu_cfg_init_mali_lpae(struct panfrost_mmu *mmu) 162 + { 163 + struct io_pgtable_cfg *pgtbl_cfg = &mmu->pgtbl_cfg; 164 + 165 + /* TODO: The following fields are duplicated between the MMU and Page 166 + * Table config structs. Ideally, should be kept in one place. 167 + */ 168 + mmu->cfg.transtab = pgtbl_cfg->arm_mali_lpae_cfg.transtab; 169 + mmu->cfg.memattr = pgtbl_cfg->arm_mali_lpae_cfg.memattr; 170 + mmu->cfg.transcfg = AS_TRANSCFG_ADRMODE_LEGACY; 171 + 172 + return 0; 173 + } 174 + 175 + static int mmu_cfg_init_aarch64_4k(struct panfrost_mmu *mmu) 176 + { 177 + struct io_pgtable_cfg *pgtbl_cfg = &mmu->pgtbl_cfg; 178 + struct panfrost_device *pfdev = mmu->pfdev; 179 + 180 + if (drm_WARN_ON(pfdev->ddev, pgtbl_cfg->arm_lpae_s1_cfg.ttbr & 181 + ~AS_TRANSTAB_AARCH64_4K_ADDR_MASK)) 182 + return -EINVAL; 183 + 184 + mmu->cfg.transtab = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; 185 + 186 + mmu->cfg.memattr = mair_to_memattr(pgtbl_cfg->arm_lpae_s1_cfg.mair, 187 + pgtbl_cfg->coherent_walk); 188 + 189 + mmu->cfg.transcfg = AS_TRANSCFG_PTW_MEMATTR_WB | 190 + AS_TRANSCFG_PTW_RA | 191 + AS_TRANSCFG_ADRMODE_AARCH64_4K | 192 + AS_TRANSCFG_INA_BITS(55 - pgtbl_cfg->ias); 193 + if (pgtbl_cfg->coherent_walk) 194 + mmu->cfg.transcfg |= AS_TRANSCFG_PTW_SH_OS; 195 + 196 + return 0; 197 + } 198 + 199 + static int panfrost_mmu_cfg_init(struct panfrost_mmu *mmu, 200 + enum io_pgtable_fmt fmt) 201 + { 202 + struct panfrost_device *pfdev = mmu->pfdev; 203 + 204 + switch (fmt) { 205 + case ARM_64_LPAE_S1: 206 + return mmu_cfg_init_aarch64_4k(mmu); 207 + case ARM_MALI_LPAE: 208 + return mmu_cfg_init_mali_lpae(mmu); 209 + default: 210 + /* This should never happen */ 211 + drm_WARN(pfdev->ddev, 1, "Invalid pgtable format"); 212 + return -EINVAL; 213 + } 201 214 } 202 215 203 216 u32 panfrost_mmu_as_get(struct panfrost_device *pfdev, struct panfrost_mmu *mmu) ··· 721 618 u32 va_bits = GPU_MMU_FEATURES_VA_BITS(pfdev->features.mmu_features); 722 619 u32 pa_bits = GPU_MMU_FEATURES_PA_BITS(pfdev->features.mmu_features); 723 620 struct panfrost_mmu *mmu; 621 + enum io_pgtable_fmt fmt; 622 + int ret; 623 + 624 + if (pfdev->comp->gpu_quirks & BIT(GPU_QUIRK_FORCE_AARCH64_PGTABLE)) { 625 + if (!panfrost_has_hw_feature(pfdev, HW_FEATURE_AARCH64_MMU)) { 626 + dev_err_once(pfdev->dev, 627 + "AARCH64_4K page table not supported\n"); 628 + return ERR_PTR(-EINVAL); 629 + } 630 + fmt = ARM_64_LPAE_S1; 631 + } else { 632 + fmt = ARM_MALI_LPAE; 633 + } 724 634 725 635 mmu = kzalloc(sizeof(*mmu), GFP_KERNEL); 726 636 if (!mmu) ··· 758 642 .iommu_dev = pfdev->dev, 759 643 }; 760 644 761 - mmu->pgtbl_ops = alloc_io_pgtable_ops(ARM_MALI_LPAE, &mmu->pgtbl_cfg, 762 - mmu); 645 + mmu->pgtbl_ops = alloc_io_pgtable_ops(fmt, &mmu->pgtbl_cfg, mmu); 763 646 if (!mmu->pgtbl_ops) { 764 - kfree(mmu); 765 - return ERR_PTR(-EINVAL); 647 + ret = -EINVAL; 648 + goto err_free_mmu; 766 649 } 650 + 651 + ret = panfrost_mmu_cfg_init(mmu, fmt); 652 + if (ret) 653 + goto err_free_io_pgtable; 767 654 768 655 kref_init(&mmu->refcount); 769 656 770 657 return mmu; 658 + 659 + err_free_io_pgtable: 660 + free_io_pgtable_ops(mmu->pgtbl_ops); 661 + 662 + err_free_mmu: 663 + kfree(mmu); 664 + return ERR_PTR(ret); 771 665 } 772 666 773 667 static const char *access_type_name(struct panfrost_device *pfdev,
+34
drivers/gpu/drm/panfrost/panfrost_regs.h
··· 301 301 #define AS_TRANSTAB_HI(as) (MMU_AS(as) + 0x04) /* (RW) Translation Table Base Address for address space n, high word */ 302 302 #define AS_MEMATTR_LO(as) (MMU_AS(as) + 0x08) /* (RW) Memory attributes for address space n, low word. */ 303 303 #define AS_MEMATTR_HI(as) (MMU_AS(as) + 0x0C) /* (RW) Memory attributes for address space n, high word. */ 304 + #define AS_MEMATTR_AARCH64_INNER_ALLOC_IMPL (2 << 2) 305 + #define AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(w, r) ((3 << 2) | \ 306 + ((w) ? BIT(0) : 0) | \ 307 + ((r) ? BIT(1) : 0)) 308 + #define AS_MEMATTR_AARCH64_SH_MIDGARD_INNER (0 << 4) 309 + #define AS_MEMATTR_AARCH64_SH_CPU_INNER (1 << 4) 310 + #define AS_MEMATTR_AARCH64_SH_CPU_INNER_SHADER_COH (2 << 4) 311 + #define AS_MEMATTR_AARCH64_SHARED (0 << 6) 312 + #define AS_MEMATTR_AARCH64_INNER_OUTER_NC (1 << 6) 313 + #define AS_MEMATTR_AARCH64_INNER_OUTER_WB (2 << 6) 314 + #define AS_MEMATTR_AARCH64_FAULT (3 << 6) 304 315 #define AS_LOCKADDR_LO(as) (MMU_AS(as) + 0x10) /* (RW) Lock region address for address space n, low word */ 305 316 #define AS_LOCKADDR_HI(as) (MMU_AS(as) + 0x14) /* (RW) Lock region address for address space n, high word */ 306 317 #define AS_COMMAND(as) (MMU_AS(as) + 0x18) /* (WO) MMU command register for address space n */ ··· 322 311 /* Additional Bifrost AS registers */ 323 312 #define AS_TRANSCFG_LO(as) (MMU_AS(as) + 0x30) /* (RW) Translation table configuration for address space n, low word */ 324 313 #define AS_TRANSCFG_HI(as) (MMU_AS(as) + 0x34) /* (RW) Translation table configuration for address space n, high word */ 314 + #define AS_TRANSCFG_ADRMODE_LEGACY (0 << 0) 315 + #define AS_TRANSCFG_ADRMODE_UNMAPPED (1 << 0) 316 + #define AS_TRANSCFG_ADRMODE_IDENTITY (2 << 0) 317 + #define AS_TRANSCFG_ADRMODE_AARCH64_4K (6 << 0) 318 + #define AS_TRANSCFG_ADRMODE_AARCH64_64K (8 << 0) 319 + #define AS_TRANSCFG_INA_BITS(x) ((x) << 6) 320 + #define AS_TRANSCFG_OUTA_BITS(x) ((x) << 14) 321 + #define AS_TRANSCFG_SL_CONCAT BIT(22) 322 + #define AS_TRANSCFG_PTW_MEMATTR_NC (1 << 24) 323 + #define AS_TRANSCFG_PTW_MEMATTR_WB (2 << 24) 324 + #define AS_TRANSCFG_PTW_SH_NS (0 << 28) 325 + #define AS_TRANSCFG_PTW_SH_OS (2 << 28) 326 + #define AS_TRANSCFG_PTW_SH_IS (3 << 28) 327 + #define AS_TRANSCFG_PTW_RA BIT(30) 328 + #define AS_TRANSCFG_DISABLE_HIER_AP BIT(33) 329 + #define AS_TRANSCFG_DISABLE_AF_FAULT BIT(34) 330 + #define AS_TRANSCFG_WXN BIT(35) 331 + #define AS_TRANSCFG_XREADABLE BIT(36) 325 332 #define AS_FAULTEXTRA_LO(as) (MMU_AS(as) + 0x38) /* (RO) Secondary fault address for address space n, low word */ 326 333 #define AS_FAULTEXTRA_HI(as) (MMU_AS(as) + 0x3C) /* (RO) Secondary fault address for address space n, high word */ 327 334 ··· 354 325 #define AS_TRANSTAB_LPAE_ADRMODE_MASK 0x3 355 326 #define AS_TRANSTAB_LPAE_READ_INNER BIT(2) 356 327 #define AS_TRANSTAB_LPAE_SHARE_OUTER BIT(4) 328 + 329 + /* 330 + * Begin AARCH64_4K MMU TRANSTAB register values 331 + */ 332 + #define AS_TRANSTAB_AARCH64_4K_ADDR_MASK 0xfffffffffffffff0 357 333 358 334 #define AS_STATUS_AS_ACTIVE 0x01 359 335