Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

iommu/arm-smmu-v3: Use S2FWB for NESTED domains

Force Write Back (FWB) changes how the S2 IOPTE's MemAttr field
works. When S2FWB is supported and enabled the IOPTE will force cachable
access to IOMMU_CACHE memory when nesting with a S1 and deny cachable
access when !IOMMU_CACHE.

When using a single stage of translation, a simple S2 domain, it doesn't
change things for PCI devices as it is just a different encoding for the
existing mapping of the IOMMU protection flags to cachability attributes.
For non-PCI it also changes the combining rules when incoming transactions
have inconsistent attributes.

However, when used with a nested S1, FWB has the effect of preventing the
guest from choosing a MemAttr in it's S1 that would cause ordinary DMA to
bypass the cache. Consistent with KVM we wish to deny the guest the
ability to become incoherent with cached memory the hypervisor believes is
cachable so we don't have to flush it.

Allow NESTED domains to be created if the SMMU has S2FWB support and use
S2FWB for NESTING_PARENTS. This is an additional option to CANWBS.

Link: https://patch.msgid.link/r/10-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com
Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jerry Snitselaar <jsnitsel@redhat.com>
Reviewed-by: Donald Dutile <ddutile@redhat.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>

+38 -9
+5 -2
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
··· 220 220 * Must support some way to prevent the VM from bypassing the cache 221 221 * because VFIO currently does not do any cache maintenance. canwbs 222 222 * indicates the device is fully coherent and no cache maintenance is 223 - * ever required, even for PCI No-Snoop. 223 + * ever required, even for PCI No-Snoop. S2FWB means the S1 can't make 224 + * things non-coherent using the memattr, but No-Snoop behavior is not 225 + * effected. 224 226 */ 225 - if (!arm_smmu_master_canwbs(master)) 227 + if (!arm_smmu_master_canwbs(master) && 228 + !(smmu->features & ARM_SMMU_FEAT_S2FWB)) 226 229 return ERR_PTR(-EOPNOTSUPP); 227 230 228 231 vsmmu = iommufd_viommu_alloc(ictx, struct arm_vsmmu, core,
+7 -1
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
··· 1046 1046 /* S2 translates */ 1047 1047 if (cfg & BIT(1)) { 1048 1048 used_bits[1] |= 1049 - cpu_to_le64(STRTAB_STE_1_EATS | STRTAB_STE_1_SHCFG); 1049 + cpu_to_le64(STRTAB_STE_1_S2FWB | STRTAB_STE_1_EATS | 1050 + STRTAB_STE_1_SHCFG); 1050 1051 used_bits[2] |= 1051 1052 cpu_to_le64(STRTAB_STE_2_S2VMID | STRTAB_STE_2_VTCR | 1052 1053 STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2ENDI | ··· 1655 1654 FIELD_PREP(STRTAB_STE_1_EATS, 1656 1655 ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0)); 1657 1656 1657 + if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_S2FWB) 1658 + target->data[1] |= cpu_to_le64(STRTAB_STE_1_S2FWB); 1658 1659 if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR) 1659 1660 target->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG, 1660 1661 STRTAB_STE_1_SHCFG_INCOMING)); ··· 2475 2472 pgtbl_cfg.oas = smmu->oas; 2476 2473 fmt = ARM_64_LPAE_S2; 2477 2474 finalise_stage_fn = arm_smmu_domain_finalise_s2; 2475 + if ((smmu->features & ARM_SMMU_FEAT_S2FWB) && 2476 + (flags & IOMMU_HWPT_ALLOC_NEST_PARENT)) 2477 + pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_S2FWB; 2478 2478 break; 2479 2479 default: 2480 2480 return -EINVAL;
+3
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
··· 58 58 #define IDR1_SIDSIZE GENMASK(5, 0) 59 59 60 60 #define ARM_SMMU_IDR3 0xc 61 + #define IDR3_FWB (1 << 8) 61 62 #define IDR3_RIL (1 << 10) 62 63 63 64 #define ARM_SMMU_IDR5 0x14 ··· 266 265 #define STRTAB_STE_1_S1COR GENMASK_ULL(5, 4) 267 266 #define STRTAB_STE_1_S1CSH GENMASK_ULL(7, 6) 268 267 268 + #define STRTAB_STE_1_S2FWB (1UL << 25) 269 269 #define STRTAB_STE_1_S1STALLD (1UL << 27) 270 270 271 271 #define STRTAB_STE_1_EATS GENMASK_ULL(29, 28) ··· 742 740 #define ARM_SMMU_FEAT_ATTR_TYPES_OVR (1 << 20) 743 741 #define ARM_SMMU_FEAT_HA (1 << 21) 744 742 #define ARM_SMMU_FEAT_HD (1 << 22) 743 + #define ARM_SMMU_FEAT_S2FWB (1 << 23) 745 744 u32 features; 746 745 747 746 #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)
+21 -6
drivers/iommu/io-pgtable-arm.c
··· 106 106 #define ARM_LPAE_PTE_HAP_FAULT (((arm_lpae_iopte)0) << 6) 107 107 #define ARM_LPAE_PTE_HAP_READ (((arm_lpae_iopte)1) << 6) 108 108 #define ARM_LPAE_PTE_HAP_WRITE (((arm_lpae_iopte)2) << 6) 109 + /* 110 + * For !FWB these code to: 111 + * 1111 = Normal outer write back cachable / Inner Write Back Cachable 112 + * Permit S1 to override 113 + * 0101 = Normal Non-cachable / Inner Non-cachable 114 + * 0001 = Device / Device-nGnRE 115 + * For S2FWB these code: 116 + * 0110 Force Normal Write Back 117 + * 0101 Normal* is forced Normal-NC, Device unchanged 118 + * 0001 Force Device-nGnRE 119 + */ 120 + #define ARM_LPAE_PTE_MEMATTR_FWB_WB (((arm_lpae_iopte)0x6) << 2) 109 121 #define ARM_LPAE_PTE_MEMATTR_OIWB (((arm_lpae_iopte)0xf) << 2) 110 122 #define ARM_LPAE_PTE_MEMATTR_NC (((arm_lpae_iopte)0x5) << 2) 111 123 #define ARM_LPAE_PTE_MEMATTR_DEV (((arm_lpae_iopte)0x1) << 2) ··· 470 458 */ 471 459 if (data->iop.fmt == ARM_64_LPAE_S2 || 472 460 data->iop.fmt == ARM_32_LPAE_S2) { 473 - if (prot & IOMMU_MMIO) 461 + if (prot & IOMMU_MMIO) { 474 462 pte |= ARM_LPAE_PTE_MEMATTR_DEV; 475 - else if (prot & IOMMU_CACHE) 476 - pte |= ARM_LPAE_PTE_MEMATTR_OIWB; 477 - else 463 + } else if (prot & IOMMU_CACHE) { 464 + if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_S2FWB) 465 + pte |= ARM_LPAE_PTE_MEMATTR_FWB_WB; 466 + else 467 + pte |= ARM_LPAE_PTE_MEMATTR_OIWB; 468 + } else { 478 469 pte |= ARM_LPAE_PTE_MEMATTR_NC; 470 + } 479 471 } else { 480 472 if (prot & IOMMU_MMIO) 481 473 pte |= (ARM_LPAE_MAIR_ATTR_IDX_DEV ··· 1051 1035 struct arm_lpae_io_pgtable *data; 1052 1036 typeof(&cfg->arm_lpae_s2_cfg.vtcr) vtcr = &cfg->arm_lpae_s2_cfg.vtcr; 1053 1037 1054 - /* The NS quirk doesn't apply at stage 2 */ 1055 - if (cfg->quirks) 1038 + if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_S2FWB)) 1056 1039 return NULL; 1057 1040 1058 1041 data = arm_lpae_alloc_pgtable(cfg);
+2
include/linux/io-pgtable.h
··· 87 87 * attributes set in the TCR for a non-coherent page-table walker. 88 88 * 89 89 * IO_PGTABLE_QUIRK_ARM_HD: Enables dirty tracking in stage 1 pagetable. 90 + * IO_PGTABLE_QUIRK_ARM_S2FWB: Use the FWB format for the MemAttrs bits 90 91 */ 91 92 #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) 92 93 #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) ··· 96 95 #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5) 97 96 #define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6) 98 97 #define IO_PGTABLE_QUIRK_ARM_HD BIT(7) 98 + #define IO_PGTABLE_QUIRK_ARM_S2FWB BIT(8) 99 99 unsigned long quirks; 100 100 unsigned long pgsize_bitmap; 101 101 unsigned int ias;