Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'iommu-updates-v5.2' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/joro/iommu

Pull IOMMU updates from Joerg Roedel:

- ATS support for ARM-SMMU-v3.

- AUX domain support in the IOMMU-API and the Intel VT-d driver. This
adds support for multiple DMA address spaces per (PCI-)device. The
use-case is to multiplex devices between host and KVM guests in a
more flexible way than supported by SR-IOV.

- the rest are smaller cleanups and fixes, two of which needed to be
reverted after testing in linux-next.

* tag 'iommu-updates-v5.2' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (45 commits)
Revert "iommu/amd: Flush not present cache in iommu_map_page"
Revert "iommu/amd: Remove the leftover of bypass support"
iommu/vt-d: Fix leak in intel_pasid_alloc_table on error path
iommu/vt-d: Make kernel parameter igfx_off work with vIOMMU
iommu/vt-d: Set intel_iommu_gfx_mapped correctly
iommu/amd: Flush not present cache in iommu_map_page
iommu/vt-d: Cleanup: no spaces at the start of a line
iommu/vt-d: Don't request page request irq under dmar_global_lock
iommu/vt-d: Use struct_size() helper
iommu/mediatek: Fix leaked of_node references
iommu/amd: Remove amd_iommu_pd_list
iommu/arm-smmu: Log CBFRSYNRA register on context fault
iommu/arm-smmu-v3: Don't disable SMMU in kdump kernel
iommu/arm-smmu-v3: Disable tagged pointers
iommu/arm-smmu-v3: Add support for PCI ATS
iommu/arm-smmu-v3: Link domains and devices
iommu/arm-smmu-v3: Add a master->domain pointer
iommu/arm-smmu-v3: Store SteamIDs in master
iommu/arm-smmu-v3: Rename arm_smmu_master_data to arm_smmu_master
ACPI/IORT: Check ATS capability in root complex nodes
...

+1375 -345
+11
drivers/acpi/arm64/iort.c
··· 1031 1031 dev_dbg(dev, "dma_pfn_offset(%#08llx)\n", offset); 1032 1032 } 1033 1033 1034 + static bool iort_pci_rc_supports_ats(struct acpi_iort_node *node) 1035 + { 1036 + struct acpi_iort_root_complex *pci_rc; 1037 + 1038 + pci_rc = (struct acpi_iort_root_complex *)node->node_data; 1039 + return pci_rc->ats_attribute & ACPI_IORT_ATS_SUPPORTED; 1040 + } 1041 + 1034 1042 /** 1035 1043 * iort_iommu_configure - Set-up IOMMU configuration for a device. 1036 1044 * ··· 1074 1066 info.node = node; 1075 1067 err = pci_for_each_dma_alias(to_pci_dev(dev), 1076 1068 iort_pci_iommu_init, &info); 1069 + 1070 + if (!err && iort_pci_rc_supports_ats(node)) 1071 + dev->iommu_fwspec->flags |= IOMMU_FWSPEC_PCI_RC_ATS; 1077 1072 } else { 1078 1073 int i = 0; 1079 1074
+25
drivers/iommu/Kconfig
··· 359 359 Say Y here if your SoC includes an IOMMU device implementing 360 360 the ARM SMMU architecture. 361 361 362 + config ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT 363 + bool "Default to disabling bypass on ARM SMMU v1 and v2" 364 + depends on ARM_SMMU 365 + default y 366 + help 367 + Say Y here to (by default) disable bypass streams such that 368 + incoming transactions from devices that are not attached to 369 + an iommu domain will report an abort back to the device and 370 + will not be allowed to pass through the SMMU. 371 + 372 + Any old kernels that existed before this KConfig was 373 + introduced would default to _allowing_ bypass (AKA the 374 + equivalent of NO for this config). However the default for 375 + this option is YES because the old behavior is insecure. 376 + 377 + There are few reasons to allow unmatched stream bypass, and 378 + even fewer good ones. If saying YES here breaks your board 379 + you should work on fixing your board. This KConfig option 380 + is expected to be removed in the future and we'll simply 381 + hardcode the bypass disable in the code. 382 + 383 + NOTE: the kernel command line parameter 384 + 'arm-smmu.disable_bypass' will continue to override this 385 + config. 386 + 362 387 config ARM_SMMU_V3 363 388 bool "ARM Ltd. System MMU Version 3 (SMMUv3) Support" 364 389 depends on ARM64
+1 -51
drivers/iommu/amd_iommu.c
··· 1723 1723 * 1724 1724 ****************************************************************************/ 1725 1725 1726 - /* 1727 - * This function adds a protection domain to the global protection domain list 1728 - */ 1729 - static void add_domain_to_list(struct protection_domain *domain) 1730 - { 1731 - unsigned long flags; 1732 - 1733 - spin_lock_irqsave(&amd_iommu_pd_lock, flags); 1734 - list_add(&domain->list, &amd_iommu_pd_list); 1735 - spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); 1736 - } 1737 - 1738 - /* 1739 - * This function removes a protection domain to the global 1740 - * protection domain list 1741 - */ 1742 - static void del_domain_from_list(struct protection_domain *domain) 1743 - { 1744 - unsigned long flags; 1745 - 1746 - spin_lock_irqsave(&amd_iommu_pd_lock, flags); 1747 - list_del(&domain->list); 1748 - spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); 1749 - } 1750 - 1751 1726 static u16 domain_id_alloc(void) 1752 1727 { 1753 1728 int id; ··· 1813 1838 if (!dom) 1814 1839 return; 1815 1840 1816 - del_domain_from_list(&dom->domain); 1817 - 1818 1841 put_iova_domain(&dom->iovad); 1819 1842 1820 1843 free_pagetable(&dom->domain); ··· 1852 1879 1853 1880 /* Initialize reserved ranges */ 1854 1881 copy_reserved_iova(&reserved_iova_ranges, &dma_dom->iovad); 1855 - 1856 - add_domain_to_list(&dma_dom->domain); 1857 1882 1858 1883 return dma_dom; 1859 1884 ··· 2093 2122 return ret; 2094 2123 } 2095 2124 2096 - /* FIXME: Move this to PCI code */ 2097 - #define PCI_PRI_TLP_OFF (1 << 15) 2098 - 2099 - static bool pci_pri_tlp_required(struct pci_dev *pdev) 2100 - { 2101 - u16 status; 2102 - int pos; 2103 - 2104 - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); 2105 - if (!pos) 2106 - return false; 2107 - 2108 - pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status); 2109 - 2110 - return (status & PCI_PRI_TLP_OFF) ? true : false; 2111 - } 2112 - 2113 2125 /* 2114 2126 * If a device is not yet associated with a domain, this function makes the 2115 2127 * device visible in the domain ··· 2121 2167 2122 2168 dev_data->ats.enabled = true; 2123 2169 dev_data->ats.qdep = pci_ats_queue_depth(pdev); 2124 - dev_data->pri_tlp = pci_pri_tlp_required(pdev); 2170 + dev_data->pri_tlp = pci_prg_resp_pasid_required(pdev); 2125 2171 } 2126 2172 } else if (amd_iommu_iotlb_sup && 2127 2173 pci_enable_ats(pdev, PAGE_SHIFT) == 0) { ··· 2851 2897 if (!domain) 2852 2898 return; 2853 2899 2854 - del_domain_from_list(domain); 2855 - 2856 2900 if (domain->id) 2857 2901 domain_id_free(domain->id); 2858 2902 ··· 2879 2927 2880 2928 if (protection_domain_init(domain)) 2881 2929 goto out_err; 2882 - 2883 - add_domain_to_list(domain); 2884 2930 2885 2931 return domain; 2886 2932
-8
drivers/iommu/amd_iommu_init.c
··· 189 189 bool amd_iommu_force_isolation __read_mostly; 190 190 191 191 /* 192 - * List of protection domains - used during resume 193 - */ 194 - LIST_HEAD(amd_iommu_pd_list); 195 - spinlock_t amd_iommu_pd_lock; 196 - 197 - /* 198 192 * Pointer to the device table which is shared by all AMD IOMMUs 199 193 * it is indexed by the PCI device id or the HT unit id and contains 200 194 * information about the domain the device belongs to as well as the ··· 2519 2525 * error value placeholder 2520 2526 */ 2521 2527 __set_bit(0, amd_iommu_pd_alloc_bitmap); 2522 - 2523 - spin_lock_init(&amd_iommu_pd_lock); 2524 2528 2525 2529 /* 2526 2530 * now the data structures are allocated and basically initialized
-6
drivers/iommu/amd_iommu_types.h
··· 675 675 extern struct amd_iommu *amd_iommus[MAX_IOMMUS]; 676 676 677 677 /* 678 - * Declarations for the global list of all protection domains 679 - */ 680 - extern spinlock_t amd_iommu_pd_lock; 681 - extern struct list_head amd_iommu_pd_list; 682 - 683 - /* 684 678 * Structure defining one entry in the device table 685 679 */ 686 680 struct dev_table_entry {
+2
drivers/iommu/arm-smmu-regs.h
··· 147 147 #define CBAR_IRPTNDX_SHIFT 24 148 148 #define CBAR_IRPTNDX_MASK 0xff 149 149 150 + #define ARM_SMMU_GR1_CBFRSYNRA(n) (0x400 + ((n) << 2)) 151 + 150 152 #define ARM_SMMU_GR1_CBA2R(n) (0x800 + ((n) << 2)) 151 153 #define CBA2R_RW64_32BIT (0 << 0) 152 154 #define CBA2R_RW64_64BIT (1 << 0)
+279 -76
drivers/iommu/arm-smmu-v3.c
··· 29 29 #include <linux/of_iommu.h> 30 30 #include <linux/of_platform.h> 31 31 #include <linux/pci.h> 32 + #include <linux/pci-ats.h> 32 33 #include <linux/platform_device.h> 33 34 34 35 #include <linux/amba/bus.h> ··· 87 86 #define IDR5_VAX_52_BIT 1 88 87 89 88 #define ARM_SMMU_CR0 0x20 89 + #define CR0_ATSCHK (1 << 4) 90 90 #define CR0_CMDQEN (1 << 3) 91 91 #define CR0_EVTQEN (1 << 2) 92 92 #define CR0_PRIQEN (1 << 1) ··· 296 294 #define CMDQ_ERR_CERROR_NONE_IDX 0 297 295 #define CMDQ_ERR_CERROR_ILL_IDX 1 298 296 #define CMDQ_ERR_CERROR_ABT_IDX 2 297 + #define CMDQ_ERR_CERROR_ATC_INV_IDX 3 299 298 300 299 #define CMDQ_0_OP GENMASK_ULL(7, 0) 301 300 #define CMDQ_0_SSV (1UL << 11) ··· 314 311 #define CMDQ_TLBI_1_LEAF (1UL << 0) 315 312 #define CMDQ_TLBI_1_VA_MASK GENMASK_ULL(63, 12) 316 313 #define CMDQ_TLBI_1_IPA_MASK GENMASK_ULL(51, 12) 314 + 315 + #define CMDQ_ATC_0_SSID GENMASK_ULL(31, 12) 316 + #define CMDQ_ATC_0_SID GENMASK_ULL(63, 32) 317 + #define CMDQ_ATC_0_GLOBAL (1UL << 9) 318 + #define CMDQ_ATC_1_SIZE GENMASK_ULL(5, 0) 319 + #define CMDQ_ATC_1_ADDR_MASK GENMASK_ULL(63, 12) 317 320 318 321 #define CMDQ_PRI_0_SSID GENMASK_ULL(31, 12) 319 322 #define CMDQ_PRI_0_SID GENMASK_ULL(63, 32) ··· 442 433 u64 addr; 443 434 } tlbi; 444 435 436 + #define CMDQ_OP_ATC_INV 0x40 437 + #define ATC_INV_SIZE_ALL 52 438 + struct { 439 + u32 sid; 440 + u32 ssid; 441 + u64 addr; 442 + u8 size; 443 + bool global; 444 + } atc; 445 + 445 446 #define CMDQ_OP_PRI_RESP 0x41 446 447 struct { 447 448 u32 sid; ··· 522 503 u16 vmid; 523 504 u64 vttbr; 524 505 u64 vtcr; 525 - }; 526 - 527 - struct arm_smmu_strtab_ent { 528 - /* 529 - * An STE is "assigned" if the master emitting the corresponding SID 530 - * is attached to a domain. The behaviour of an unassigned STE is 531 - * determined by the disable_bypass parameter, whereas an assigned 532 - * STE behaves according to s1_cfg/s2_cfg, which themselves are 533 - * configured according to the domain type. 534 - */ 535 - bool assigned; 536 - struct arm_smmu_s1_cfg *s1_cfg; 537 - struct arm_smmu_s2_cfg *s2_cfg; 538 506 }; 539 507 540 508 struct arm_smmu_strtab_cfg { ··· 597 591 }; 598 592 599 593 /* SMMU private data for each master */ 600 - struct arm_smmu_master_data { 594 + struct arm_smmu_master { 601 595 struct arm_smmu_device *smmu; 602 - struct arm_smmu_strtab_ent ste; 596 + struct device *dev; 597 + struct arm_smmu_domain *domain; 598 + struct list_head domain_head; 599 + u32 *sids; 600 + unsigned int num_sids; 601 + bool ats_enabled :1; 603 602 }; 604 603 605 604 /* SMMU private data for an IOMMU domain */ ··· 629 618 }; 630 619 631 620 struct iommu_domain domain; 621 + 622 + struct list_head devices; 623 + spinlock_t devices_lock; 632 624 }; 633 625 634 626 struct arm_smmu_option_prop { ··· 834 820 case CMDQ_OP_TLBI_S12_VMALL: 835 821 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid); 836 822 break; 823 + case CMDQ_OP_ATC_INV: 824 + cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid); 825 + cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global); 826 + cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid); 827 + cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid); 828 + cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size); 829 + cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK; 830 + break; 837 831 case CMDQ_OP_PRI_RESP: 838 832 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid); 839 833 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid); ··· 886 864 [CMDQ_ERR_CERROR_NONE_IDX] = "No error", 887 865 [CMDQ_ERR_CERROR_ILL_IDX] = "Illegal command", 888 866 [CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch", 867 + [CMDQ_ERR_CERROR_ATC_INV_IDX] = "ATC invalidate timeout", 889 868 }; 890 869 891 870 int i; ··· 905 882 case CMDQ_ERR_CERROR_ABT_IDX: 906 883 dev_err(smmu->dev, "retrying command fetch\n"); 907 884 case CMDQ_ERR_CERROR_NONE_IDX: 885 + return; 886 + case CMDQ_ERR_CERROR_ATC_INV_IDX: 887 + /* 888 + * ATC Invalidation Completion timeout. CONS is still pointing 889 + * at the CMD_SYNC. Attempt to complete other pending commands 890 + * by repeating the CMD_SYNC, though we might well end up back 891 + * here since the ATC invalidation may still be pending. 892 + */ 908 893 return; 909 894 case CMDQ_ERR_CERROR_ILL_IDX: 910 895 /* Fallthrough */ ··· 1030 999 return ret; 1031 1000 } 1032 1001 1033 - static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu) 1002 + static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu) 1034 1003 { 1035 1004 int ret; 1036 1005 bool msi = (smmu->features & ARM_SMMU_FEAT_MSI) && ··· 1040 1009 : __arm_smmu_cmdq_issue_sync(smmu); 1041 1010 if (ret) 1042 1011 dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n"); 1012 + return ret; 1043 1013 } 1044 1014 1045 1015 /* Context descriptor manipulation functions */ ··· 1057 1025 val |= ARM_SMMU_TCR2CD(tcr, EPD0); 1058 1026 val |= ARM_SMMU_TCR2CD(tcr, EPD1); 1059 1027 val |= ARM_SMMU_TCR2CD(tcr, IPS); 1060 - val |= ARM_SMMU_TCR2CD(tcr, TBI0); 1061 1028 1062 1029 return val; 1063 1030 } ··· 1116 1085 arm_smmu_cmdq_issue_sync(smmu); 1117 1086 } 1118 1087 1119 - static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, 1120 - __le64 *dst, struct arm_smmu_strtab_ent *ste) 1088 + static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, 1089 + __le64 *dst) 1121 1090 { 1122 1091 /* 1123 1092 * This is hideously complicated, but we only really care about ··· 1137 1106 */ 1138 1107 u64 val = le64_to_cpu(dst[0]); 1139 1108 bool ste_live = false; 1109 + struct arm_smmu_device *smmu = NULL; 1110 + struct arm_smmu_s1_cfg *s1_cfg = NULL; 1111 + struct arm_smmu_s2_cfg *s2_cfg = NULL; 1112 + struct arm_smmu_domain *smmu_domain = NULL; 1140 1113 struct arm_smmu_cmdq_ent prefetch_cmd = { 1141 1114 .opcode = CMDQ_OP_PREFETCH_CFG, 1142 1115 .prefetch = { 1143 1116 .sid = sid, 1144 1117 }, 1145 1118 }; 1119 + 1120 + if (master) { 1121 + smmu_domain = master->domain; 1122 + smmu = master->smmu; 1123 + } 1124 + 1125 + if (smmu_domain) { 1126 + switch (smmu_domain->stage) { 1127 + case ARM_SMMU_DOMAIN_S1: 1128 + s1_cfg = &smmu_domain->s1_cfg; 1129 + break; 1130 + case ARM_SMMU_DOMAIN_S2: 1131 + case ARM_SMMU_DOMAIN_NESTED: 1132 + s2_cfg = &smmu_domain->s2_cfg; 1133 + break; 1134 + default: 1135 + break; 1136 + } 1137 + } 1146 1138 1147 1139 if (val & STRTAB_STE_0_V) { 1148 1140 switch (FIELD_GET(STRTAB_STE_0_CFG, val)) { ··· 1187 1133 val = STRTAB_STE_0_V; 1188 1134 1189 1135 /* Bypass/fault */ 1190 - if (!ste->assigned || !(ste->s1_cfg || ste->s2_cfg)) { 1191 - if (!ste->assigned && disable_bypass) 1136 + if (!smmu_domain || !(s1_cfg || s2_cfg)) { 1137 + if (!smmu_domain && disable_bypass) 1192 1138 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT); 1193 1139 else 1194 1140 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS); ··· 1206 1152 return; 1207 1153 } 1208 1154 1209 - if (ste->s1_cfg) { 1155 + if (s1_cfg) { 1210 1156 BUG_ON(ste_live); 1211 1157 dst[1] = cpu_to_le64( 1212 1158 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) | 1213 1159 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) | 1214 1160 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) | 1215 - #ifdef CONFIG_PCI_ATS 1216 - FIELD_PREP(STRTAB_STE_1_EATS, STRTAB_STE_1_EATS_TRANS) | 1217 - #endif 1218 1161 FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1)); 1219 1162 1220 1163 if (smmu->features & ARM_SMMU_FEAT_STALLS && 1221 1164 !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE)) 1222 1165 dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD); 1223 1166 1224 - val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) | 1167 + val |= (s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) | 1225 1168 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS); 1226 1169 } 1227 1170 1228 - if (ste->s2_cfg) { 1171 + if (s2_cfg) { 1229 1172 BUG_ON(ste_live); 1230 1173 dst[2] = cpu_to_le64( 1231 - FIELD_PREP(STRTAB_STE_2_S2VMID, ste->s2_cfg->vmid) | 1232 - FIELD_PREP(STRTAB_STE_2_VTCR, ste->s2_cfg->vtcr) | 1174 + FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) | 1175 + FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) | 1233 1176 #ifdef __BIG_ENDIAN 1234 1177 STRTAB_STE_2_S2ENDI | 1235 1178 #endif 1236 1179 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 | 1237 1180 STRTAB_STE_2_S2R); 1238 1181 1239 - dst[3] = cpu_to_le64(ste->s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK); 1182 + dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK); 1240 1183 1241 1184 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS); 1242 1185 } 1186 + 1187 + if (master->ats_enabled) 1188 + dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS, 1189 + STRTAB_STE_1_EATS_TRANS)); 1243 1190 1244 1191 arm_smmu_sync_ste_for_sid(smmu, sid); 1245 1192 dst[0] = cpu_to_le64(val); ··· 1254 1199 static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent) 1255 1200 { 1256 1201 unsigned int i; 1257 - struct arm_smmu_strtab_ent ste = { .assigned = false }; 1258 1202 1259 1203 for (i = 0; i < nent; ++i) { 1260 - arm_smmu_write_strtab_ent(NULL, -1, strtab, &ste); 1204 + arm_smmu_write_strtab_ent(NULL, -1, strtab); 1261 1205 strtab += STRTAB_STE_DWORDS; 1262 1206 } 1263 1207 } ··· 1444 1390 return IRQ_WAKE_THREAD; 1445 1391 } 1446 1392 1393 + static void 1394 + arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size, 1395 + struct arm_smmu_cmdq_ent *cmd) 1396 + { 1397 + size_t log2_span; 1398 + size_t span_mask; 1399 + /* ATC invalidates are always on 4096-bytes pages */ 1400 + size_t inval_grain_shift = 12; 1401 + unsigned long page_start, page_end; 1402 + 1403 + *cmd = (struct arm_smmu_cmdq_ent) { 1404 + .opcode = CMDQ_OP_ATC_INV, 1405 + .substream_valid = !!ssid, 1406 + .atc.ssid = ssid, 1407 + }; 1408 + 1409 + if (!size) { 1410 + cmd->atc.size = ATC_INV_SIZE_ALL; 1411 + return; 1412 + } 1413 + 1414 + page_start = iova >> inval_grain_shift; 1415 + page_end = (iova + size - 1) >> inval_grain_shift; 1416 + 1417 + /* 1418 + * In an ATS Invalidate Request, the address must be aligned on the 1419 + * range size, which must be a power of two number of page sizes. We 1420 + * thus have to choose between grossly over-invalidating the region, or 1421 + * splitting the invalidation into multiple commands. For simplicity 1422 + * we'll go with the first solution, but should refine it in the future 1423 + * if multiple commands are shown to be more efficient. 1424 + * 1425 + * Find the smallest power of two that covers the range. The most 1426 + * significant differing bit between the start and end addresses, 1427 + * fls(start ^ end), indicates the required span. For example: 1428 + * 1429 + * We want to invalidate pages [8; 11]. This is already the ideal range: 1430 + * x = 0b1000 ^ 0b1011 = 0b11 1431 + * span = 1 << fls(x) = 4 1432 + * 1433 + * To invalidate pages [7; 10], we need to invalidate [0; 15]: 1434 + * x = 0b0111 ^ 0b1010 = 0b1101 1435 + * span = 1 << fls(x) = 16 1436 + */ 1437 + log2_span = fls_long(page_start ^ page_end); 1438 + span_mask = (1ULL << log2_span) - 1; 1439 + 1440 + page_start &= ~span_mask; 1441 + 1442 + cmd->atc.addr = page_start << inval_grain_shift; 1443 + cmd->atc.size = log2_span; 1444 + } 1445 + 1446 + static int arm_smmu_atc_inv_master(struct arm_smmu_master *master, 1447 + struct arm_smmu_cmdq_ent *cmd) 1448 + { 1449 + int i; 1450 + 1451 + if (!master->ats_enabled) 1452 + return 0; 1453 + 1454 + for (i = 0; i < master->num_sids; i++) { 1455 + cmd->atc.sid = master->sids[i]; 1456 + arm_smmu_cmdq_issue_cmd(master->smmu, cmd); 1457 + } 1458 + 1459 + return arm_smmu_cmdq_issue_sync(master->smmu); 1460 + } 1461 + 1462 + static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, 1463 + int ssid, unsigned long iova, size_t size) 1464 + { 1465 + int ret = 0; 1466 + unsigned long flags; 1467 + struct arm_smmu_cmdq_ent cmd; 1468 + struct arm_smmu_master *master; 1469 + 1470 + if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS)) 1471 + return 0; 1472 + 1473 + arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd); 1474 + 1475 + spin_lock_irqsave(&smmu_domain->devices_lock, flags); 1476 + list_for_each_entry(master, &smmu_domain->devices, domain_head) 1477 + ret |= arm_smmu_atc_inv_master(master, &cmd); 1478 + spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); 1479 + 1480 + return ret ? -ETIMEDOUT : 0; 1481 + } 1482 + 1447 1483 /* IO_PGTABLE API */ 1448 1484 static void arm_smmu_tlb_sync(void *cookie) 1449 1485 { ··· 1637 1493 } 1638 1494 1639 1495 mutex_init(&smmu_domain->init_mutex); 1496 + INIT_LIST_HEAD(&smmu_domain->devices); 1497 + spin_lock_init(&smmu_domain->devices_lock); 1498 + 1640 1499 return &smmu_domain->domain; 1641 1500 } 1642 1501 ··· 1835 1688 return step; 1836 1689 } 1837 1690 1838 - static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec) 1691 + static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master) 1839 1692 { 1840 1693 int i, j; 1841 - struct arm_smmu_master_data *master = fwspec->iommu_priv; 1842 1694 struct arm_smmu_device *smmu = master->smmu; 1843 1695 1844 - for (i = 0; i < fwspec->num_ids; ++i) { 1845 - u32 sid = fwspec->ids[i]; 1696 + for (i = 0; i < master->num_sids; ++i) { 1697 + u32 sid = master->sids[i]; 1846 1698 __le64 *step = arm_smmu_get_step_for_sid(smmu, sid); 1847 1699 1848 1700 /* Bridged PCI devices may end up with duplicated IDs */ 1849 1701 for (j = 0; j < i; j++) 1850 - if (fwspec->ids[j] == sid) 1702 + if (master->sids[j] == sid) 1851 1703 break; 1852 1704 if (j < i) 1853 1705 continue; 1854 1706 1855 - arm_smmu_write_strtab_ent(smmu, sid, step, &master->ste); 1707 + arm_smmu_write_strtab_ent(master, sid, step); 1856 1708 } 1857 1709 } 1858 1710 1859 - static void arm_smmu_detach_dev(struct device *dev) 1711 + static int arm_smmu_enable_ats(struct arm_smmu_master *master) 1860 1712 { 1861 - struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 1862 - struct arm_smmu_master_data *master = fwspec->iommu_priv; 1713 + int ret; 1714 + size_t stu; 1715 + struct pci_dev *pdev; 1716 + struct arm_smmu_device *smmu = master->smmu; 1717 + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev); 1863 1718 1864 - master->ste.assigned = false; 1865 - arm_smmu_install_ste_for_dev(fwspec); 1719 + if (!(smmu->features & ARM_SMMU_FEAT_ATS) || !dev_is_pci(master->dev) || 1720 + !(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS) || pci_ats_disabled()) 1721 + return -ENXIO; 1722 + 1723 + pdev = to_pci_dev(master->dev); 1724 + if (pdev->untrusted) 1725 + return -EPERM; 1726 + 1727 + /* Smallest Translation Unit: log2 of the smallest supported granule */ 1728 + stu = __ffs(smmu->pgsize_bitmap); 1729 + 1730 + ret = pci_enable_ats(pdev, stu); 1731 + if (ret) 1732 + return ret; 1733 + 1734 + master->ats_enabled = true; 1735 + return 0; 1736 + } 1737 + 1738 + static void arm_smmu_disable_ats(struct arm_smmu_master *master) 1739 + { 1740 + if (!master->ats_enabled || !dev_is_pci(master->dev)) 1741 + return; 1742 + 1743 + pci_disable_ats(to_pci_dev(master->dev)); 1744 + master->ats_enabled = false; 1745 + } 1746 + 1747 + static void arm_smmu_detach_dev(struct arm_smmu_master *master) 1748 + { 1749 + unsigned long flags; 1750 + struct arm_smmu_domain *smmu_domain = master->domain; 1751 + 1752 + if (!smmu_domain) 1753 + return; 1754 + 1755 + spin_lock_irqsave(&smmu_domain->devices_lock, flags); 1756 + list_del(&master->domain_head); 1757 + spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); 1758 + 1759 + master->domain = NULL; 1760 + arm_smmu_install_ste_for_dev(master); 1761 + 1762 + /* Disabling ATS invalidates all ATC entries */ 1763 + arm_smmu_disable_ats(master); 1866 1764 } 1867 1765 1868 1766 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) 1869 1767 { 1870 1768 int ret = 0; 1769 + unsigned long flags; 1871 1770 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 1872 1771 struct arm_smmu_device *smmu; 1873 1772 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 1874 - struct arm_smmu_master_data *master; 1875 - struct arm_smmu_strtab_ent *ste; 1773 + struct arm_smmu_master *master; 1876 1774 1877 1775 if (!fwspec) 1878 1776 return -ENOENT; 1879 1777 1880 1778 master = fwspec->iommu_priv; 1881 1779 smmu = master->smmu; 1882 - ste = &master->ste; 1883 1780 1884 - /* Already attached to a different domain? */ 1885 - if (ste->assigned) 1886 - arm_smmu_detach_dev(dev); 1781 + arm_smmu_detach_dev(master); 1887 1782 1888 1783 mutex_lock(&smmu_domain->init_mutex); 1889 1784 ··· 1945 1756 goto out_unlock; 1946 1757 } 1947 1758 1948 - ste->assigned = true; 1759 + master->domain = smmu_domain; 1949 1760 1950 - if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS) { 1951 - ste->s1_cfg = NULL; 1952 - ste->s2_cfg = NULL; 1953 - } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { 1954 - ste->s1_cfg = &smmu_domain->s1_cfg; 1955 - ste->s2_cfg = NULL; 1956 - arm_smmu_write_ctx_desc(smmu, ste->s1_cfg); 1957 - } else { 1958 - ste->s1_cfg = NULL; 1959 - ste->s2_cfg = &smmu_domain->s2_cfg; 1960 - } 1761 + spin_lock_irqsave(&smmu_domain->devices_lock, flags); 1762 + list_add(&master->domain_head, &smmu_domain->devices); 1763 + spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); 1961 1764 1962 - arm_smmu_install_ste_for_dev(fwspec); 1765 + if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS) 1766 + arm_smmu_enable_ats(master); 1767 + 1768 + if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) 1769 + arm_smmu_write_ctx_desc(smmu, &smmu_domain->s1_cfg); 1770 + 1771 + arm_smmu_install_ste_for_dev(master); 1963 1772 out_unlock: 1964 1773 mutex_unlock(&smmu_domain->init_mutex); 1965 1774 return ret; ··· 1977 1790 static size_t 1978 1791 arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) 1979 1792 { 1980 - struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; 1793 + int ret; 1794 + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 1795 + struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; 1981 1796 1982 1797 if (!ops) 1983 1798 return 0; 1984 1799 1985 - return ops->unmap(ops, iova, size); 1800 + ret = ops->unmap(ops, iova, size); 1801 + if (ret && arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size)) 1802 + return 0; 1803 + 1804 + return ret; 1986 1805 } 1987 1806 1988 1807 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain) ··· 2053 1860 { 2054 1861 int i, ret; 2055 1862 struct arm_smmu_device *smmu; 2056 - struct arm_smmu_master_data *master; 1863 + struct arm_smmu_master *master; 2057 1864 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2058 1865 struct iommu_group *group; 2059 1866 ··· 2075 1882 if (!master) 2076 1883 return -ENOMEM; 2077 1884 1885 + master->dev = dev; 2078 1886 master->smmu = smmu; 1887 + master->sids = fwspec->ids; 1888 + master->num_sids = fwspec->num_ids; 2079 1889 fwspec->iommu_priv = master; 2080 1890 } 2081 1891 2082 1892 /* Check the SIDs are in range of the SMMU and our stream table */ 2083 - for (i = 0; i < fwspec->num_ids; i++) { 2084 - u32 sid = fwspec->ids[i]; 1893 + for (i = 0; i < master->num_sids; i++) { 1894 + u32 sid = master->sids[i]; 2085 1895 2086 1896 if (!arm_smmu_sid_in_range(smmu, sid)) 2087 1897 return -ERANGE; ··· 2109 1913 static void arm_smmu_remove_device(struct device *dev) 2110 1914 { 2111 1915 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2112 - struct arm_smmu_master_data *master; 1916 + struct arm_smmu_master *master; 2113 1917 struct arm_smmu_device *smmu; 2114 1918 2115 1919 if (!fwspec || fwspec->ops != &arm_smmu_ops) ··· 2117 1921 2118 1922 master = fwspec->iommu_priv; 2119 1923 smmu = master->smmu; 2120 - if (master && master->ste.assigned) 2121 - arm_smmu_detach_dev(dev); 1924 + arm_smmu_detach_dev(master); 2122 1925 iommu_group_remove_device(dev); 2123 1926 iommu_device_unlink(&smmu->iommu, dev); 2124 1927 kfree(master); ··· 2649 2454 /* Clear CR0 and sync (disables SMMU and queue processing) */ 2650 2455 reg = readl_relaxed(smmu->base + ARM_SMMU_CR0); 2651 2456 if (reg & CR0_SMMUEN) { 2652 - if (is_kdump_kernel()) { 2653 - arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0); 2654 - arm_smmu_device_disable(smmu); 2655 - return -EBUSY; 2656 - } 2657 - 2658 2457 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n"); 2458 + WARN_ON(is_kdump_kernel() && !disable_bypass); 2459 + arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0); 2659 2460 } 2660 2461 2661 2462 ret = arm_smmu_device_disable(smmu); ··· 2738 2547 } 2739 2548 } 2740 2549 2550 + if (smmu->features & ARM_SMMU_FEAT_ATS) { 2551 + enables |= CR0_ATSCHK; 2552 + ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, 2553 + ARM_SMMU_CR0ACK); 2554 + if (ret) { 2555 + dev_err(smmu->dev, "failed to enable ATS check\n"); 2556 + return ret; 2557 + } 2558 + } 2559 + 2741 2560 ret = arm_smmu_setup_irqs(smmu); 2742 2561 if (ret) { 2743 2562 dev_err(smmu->dev, "failed to setup irqs\n"); 2744 2563 return ret; 2745 2564 } 2746 2565 2566 + if (is_kdump_kernel()) 2567 + enables &= ~(CR0_EVTQEN | CR0_PRIQEN); 2747 2568 2748 2569 /* Enable the SMMU interface, or ensure bypass */ 2749 2570 if (!bypass || disable_bypass) {
+7 -4
drivers/iommu/arm-smmu.c
··· 110 110 module_param(force_stage, int, S_IRUGO); 111 111 MODULE_PARM_DESC(force_stage, 112 112 "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation."); 113 - static bool disable_bypass; 113 + static bool disable_bypass = 114 + IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT); 114 115 module_param(disable_bypass, bool, S_IRUGO); 115 116 MODULE_PARM_DESC(disable_bypass, 116 117 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU."); ··· 570 569 571 570 static irqreturn_t arm_smmu_context_fault(int irq, void *dev) 572 571 { 573 - u32 fsr, fsynr; 572 + u32 fsr, fsynr, cbfrsynra; 574 573 unsigned long iova; 575 574 struct iommu_domain *domain = dev; 576 575 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 577 576 struct arm_smmu_cfg *cfg = &smmu_domain->cfg; 578 577 struct arm_smmu_device *smmu = smmu_domain->smmu; 578 + void __iomem *gr1_base = ARM_SMMU_GR1(smmu); 579 579 void __iomem *cb_base; 580 580 581 581 cb_base = ARM_SMMU_CB(smmu, cfg->cbndx); ··· 587 585 588 586 fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0); 589 587 iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR); 588 + cbfrsynra = readl_relaxed(gr1_base + ARM_SMMU_GR1_CBFRSYNRA(cfg->cbndx)); 590 589 591 590 dev_err_ratelimited(smmu->dev, 592 - "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n", 593 - fsr, iova, fsynr, cfg->cbndx); 591 + "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n", 592 + fsr, iova, fsynr, cbfrsynra, cfg->cbndx); 594 593 595 594 writel(fsr, cb_base + ARM_SMMU_CB_FSR); 596 595 return IRQ_HANDLED;
+1 -1
drivers/iommu/dmar.c
··· 145 145 for (tmp = dev; tmp; tmp = tmp->bus->self) 146 146 level++; 147 147 148 - size = sizeof(*info) + level * sizeof(info->path[0]); 148 + size = struct_size(info, path, level); 149 149 if (size <= sizeof(dmar_pci_notify_info_buf)) { 150 150 info = (struct dmar_pci_notify_info *)dmar_pci_notify_info_buf; 151 151 } else {
+454 -128
drivers/iommu/intel-iommu.c
··· 2341 2341 } 2342 2342 2343 2343 static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, 2344 - struct scatterlist *sg, unsigned long phys_pfn, 2345 - unsigned long nr_pages, int prot) 2344 + struct scatterlist *sg, unsigned long phys_pfn, 2345 + unsigned long nr_pages, int prot) 2346 2346 { 2347 - int ret; 2348 - struct intel_iommu *iommu; 2347 + int ret; 2348 + struct intel_iommu *iommu; 2349 2349 2350 - /* Do the real mapping first */ 2351 - ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot); 2352 - if (ret) 2353 - return ret; 2350 + /* Do the real mapping first */ 2351 + ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot); 2352 + if (ret) 2353 + return ret; 2354 2354 2355 - /* Notify about the new mapping */ 2356 - if (domain_type_is_vm(domain)) { 2357 - /* VM typed domains can have more than one IOMMUs */ 2358 - int iommu_id; 2359 - for_each_domain_iommu(iommu_id, domain) { 2360 - iommu = g_iommus[iommu_id]; 2361 - __mapping_notify_one(iommu, domain, iov_pfn, nr_pages); 2362 - } 2363 - } else { 2364 - /* General domains only have one IOMMU */ 2365 - iommu = domain_get_iommu(domain); 2366 - __mapping_notify_one(iommu, domain, iov_pfn, nr_pages); 2367 - } 2355 + /* Notify about the new mapping */ 2356 + if (domain_type_is_vm(domain)) { 2357 + /* VM typed domains can have more than one IOMMUs */ 2358 + int iommu_id; 2368 2359 2369 - return 0; 2360 + for_each_domain_iommu(iommu_id, domain) { 2361 + iommu = g_iommus[iommu_id]; 2362 + __mapping_notify_one(iommu, domain, iov_pfn, nr_pages); 2363 + } 2364 + } else { 2365 + /* General domains only have one IOMMU */ 2366 + iommu = domain_get_iommu(domain); 2367 + __mapping_notify_one(iommu, domain, iov_pfn, nr_pages); 2368 + } 2369 + 2370 + return 0; 2370 2371 } 2371 2372 2372 2373 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn, ··· 2486 2485 info->domain = domain; 2487 2486 info->iommu = iommu; 2488 2487 info->pasid_table = NULL; 2488 + info->auxd_enabled = 0; 2489 + INIT_LIST_HEAD(&info->auxiliary_domains); 2489 2490 2490 2491 if (dev && dev_is_pci(dev)) { 2491 2492 struct pci_dev *pdev = to_pci_dev(info->dev); ··· 3415 3412 iommu_identity_mapping |= IDENTMAP_ALL; 3416 3413 3417 3414 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA 3418 - iommu_identity_mapping |= IDENTMAP_GFX; 3415 + dmar_map_gfx = 0; 3419 3416 #endif 3417 + 3418 + if (!dmar_map_gfx) 3419 + iommu_identity_mapping |= IDENTMAP_GFX; 3420 3420 3421 3421 check_tylersburg_isoch(); 3422 3422 ··· 3502 3496 3503 3497 #ifdef CONFIG_INTEL_IOMMU_SVM 3504 3498 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) { 3499 + /* 3500 + * Call dmar_alloc_hwirq() with dmar_global_lock held, 3501 + * could cause possible lock race condition. 3502 + */ 3503 + up_write(&dmar_global_lock); 3505 3504 ret = intel_svm_enable_prq(iommu); 3505 + down_write(&dmar_global_lock); 3506 3506 if (ret) 3507 3507 goto free_iommu; 3508 3508 } ··· 3618 3606 } 3619 3607 3620 3608 /* Check if the dev needs to go through non-identity map and unmap process.*/ 3621 - static int iommu_no_mapping(struct device *dev) 3609 + static bool iommu_need_mapping(struct device *dev) 3622 3610 { 3623 3611 int found; 3624 3612 3625 3613 if (iommu_dummy(dev)) 3626 - return 1; 3614 + return false; 3627 3615 3628 3616 if (!iommu_identity_mapping) 3629 - return 0; 3617 + return true; 3630 3618 3631 3619 found = identity_mapping(dev); 3632 3620 if (found) { 3633 3621 if (iommu_should_identity_map(dev, 0)) 3634 - return 1; 3635 - else { 3636 - /* 3637 - * 32 bit DMA is removed from si_domain and fall back 3638 - * to non-identity mapping. 3639 - */ 3640 - dmar_remove_one_dev_info(dev); 3641 - dev_info(dev, "32bit DMA uses non-identity mapping\n"); 3642 - return 0; 3643 - } 3622 + return false; 3623 + 3624 + /* 3625 + * 32 bit DMA is removed from si_domain and fall back to 3626 + * non-identity mapping. 3627 + */ 3628 + dmar_remove_one_dev_info(dev); 3629 + dev_info(dev, "32bit DMA uses non-identity mapping\n"); 3644 3630 } else { 3645 3631 /* 3646 3632 * In case of a detached 64 bit DMA device from vm, the device 3647 3633 * is put into si_domain for identity mapping. 3648 3634 */ 3649 - if (iommu_should_identity_map(dev, 0)) { 3650 - int ret; 3651 - ret = domain_add_dev_info(si_domain, dev); 3652 - if (!ret) { 3653 - dev_info(dev, "64bit DMA uses identity mapping\n"); 3654 - return 1; 3655 - } 3635 + if (iommu_should_identity_map(dev, 0) && 3636 + !domain_add_dev_info(si_domain, dev)) { 3637 + dev_info(dev, "64bit DMA uses identity mapping\n"); 3638 + return false; 3656 3639 } 3657 3640 } 3658 3641 3659 - return 0; 3642 + return true; 3660 3643 } 3661 3644 3662 3645 static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr, ··· 3666 3659 unsigned long paddr_pfn = paddr >> PAGE_SHIFT; 3667 3660 3668 3661 BUG_ON(dir == DMA_NONE); 3669 - 3670 - if (iommu_no_mapping(dev)) 3671 - return paddr; 3672 3662 3673 3663 domain = get_valid_domain_for_dev(dev); 3674 3664 if (!domain) ··· 3715 3711 enum dma_data_direction dir, 3716 3712 unsigned long attrs) 3717 3713 { 3718 - return __intel_map_single(dev, page_to_phys(page) + offset, size, 3719 - dir, *dev->dma_mask); 3714 + if (iommu_need_mapping(dev)) 3715 + return __intel_map_single(dev, page_to_phys(page) + offset, 3716 + size, dir, *dev->dma_mask); 3717 + return dma_direct_map_page(dev, page, offset, size, dir, attrs); 3720 3718 } 3721 3719 3722 3720 static dma_addr_t intel_map_resource(struct device *dev, phys_addr_t phys_addr, 3723 3721 size_t size, enum dma_data_direction dir, 3724 3722 unsigned long attrs) 3725 3723 { 3726 - return __intel_map_single(dev, phys_addr, size, dir, *dev->dma_mask); 3724 + if (iommu_need_mapping(dev)) 3725 + return __intel_map_single(dev, phys_addr, size, dir, 3726 + *dev->dma_mask); 3727 + return dma_direct_map_resource(dev, phys_addr, size, dir, attrs); 3727 3728 } 3728 3729 3729 3730 static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) ··· 3739 3730 unsigned long iova_pfn; 3740 3731 struct intel_iommu *iommu; 3741 3732 struct page *freelist; 3742 - 3743 - if (iommu_no_mapping(dev)) 3744 - return; 3733 + struct pci_dev *pdev = NULL; 3745 3734 3746 3735 domain = find_domain(dev); 3747 3736 BUG_ON(!domain); ··· 3752 3745 start_pfn = mm_to_dma_pfn(iova_pfn); 3753 3746 last_pfn = start_pfn + nrpages - 1; 3754 3747 3748 + if (dev_is_pci(dev)) 3749 + pdev = to_pci_dev(dev); 3750 + 3755 3751 dev_dbg(dev, "Device unmapping: pfn %lx-%lx\n", start_pfn, last_pfn); 3756 3752 3757 3753 freelist = domain_unmap(domain, start_pfn, last_pfn); 3758 3754 3759 - if (intel_iommu_strict) { 3755 + if (intel_iommu_strict || (pdev && pdev->untrusted)) { 3760 3756 iommu_flush_iotlb_psi(iommu, domain, start_pfn, 3761 3757 nrpages, !freelist, 0); 3762 3758 /* free iova */ ··· 3779 3769 size_t size, enum dma_data_direction dir, 3780 3770 unsigned long attrs) 3781 3771 { 3782 - intel_unmap(dev, dev_addr, size); 3772 + if (iommu_need_mapping(dev)) 3773 + intel_unmap(dev, dev_addr, size); 3774 + else 3775 + dma_direct_unmap_page(dev, dev_addr, size, dir, attrs); 3776 + } 3777 + 3778 + static void intel_unmap_resource(struct device *dev, dma_addr_t dev_addr, 3779 + size_t size, enum dma_data_direction dir, unsigned long attrs) 3780 + { 3781 + if (iommu_need_mapping(dev)) 3782 + intel_unmap(dev, dev_addr, size); 3783 3783 } 3784 3784 3785 3785 static void *intel_alloc_coherent(struct device *dev, size_t size, ··· 3799 3779 struct page *page = NULL; 3800 3780 int order; 3801 3781 3782 + if (!iommu_need_mapping(dev)) 3783 + return dma_direct_alloc(dev, size, dma_handle, flags, attrs); 3784 + 3802 3785 size = PAGE_ALIGN(size); 3803 3786 order = get_order(size); 3804 - 3805 - if (!iommu_no_mapping(dev)) 3806 - flags &= ~(GFP_DMA | GFP_DMA32); 3807 - else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) { 3808 - if (dev->coherent_dma_mask < DMA_BIT_MASK(32)) 3809 - flags |= GFP_DMA; 3810 - else 3811 - flags |= GFP_DMA32; 3812 - } 3813 3787 3814 3788 if (gfpflags_allow_blocking(flags)) { 3815 3789 unsigned int count = size >> PAGE_SHIFT; 3816 3790 3817 3791 page = dma_alloc_from_contiguous(dev, count, order, 3818 3792 flags & __GFP_NOWARN); 3819 - if (page && iommu_no_mapping(dev) && 3820 - page_to_phys(page) + size > dev->coherent_dma_mask) { 3821 - dma_release_from_contiguous(dev, page, count); 3822 - page = NULL; 3823 - } 3824 3793 } 3825 3794 3826 3795 if (!page) ··· 3835 3826 int order; 3836 3827 struct page *page = virt_to_page(vaddr); 3837 3828 3829 + if (!iommu_need_mapping(dev)) 3830 + return dma_direct_free(dev, size, vaddr, dma_handle, attrs); 3831 + 3838 3832 size = PAGE_ALIGN(size); 3839 3833 order = get_order(size); 3840 3834 ··· 3855 3843 struct scatterlist *sg; 3856 3844 int i; 3857 3845 3846 + if (!iommu_need_mapping(dev)) 3847 + return dma_direct_unmap_sg(dev, sglist, nelems, dir, attrs); 3848 + 3858 3849 for_each_sg(sglist, sg, nelems, i) { 3859 3850 nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg)); 3860 3851 } 3861 3852 3862 3853 intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT); 3863 - } 3864 - 3865 - static int intel_nontranslate_map_sg(struct device *hddev, 3866 - struct scatterlist *sglist, int nelems, int dir) 3867 - { 3868 - int i; 3869 - struct scatterlist *sg; 3870 - 3871 - for_each_sg(sglist, sg, nelems, i) { 3872 - BUG_ON(!sg_page(sg)); 3873 - sg->dma_address = sg_phys(sg); 3874 - sg->dma_length = sg->length; 3875 - } 3876 - return nelems; 3877 3854 } 3878 3855 3879 3856 static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems, ··· 3879 3878 struct intel_iommu *iommu; 3880 3879 3881 3880 BUG_ON(dir == DMA_NONE); 3882 - if (iommu_no_mapping(dev)) 3883 - return intel_nontranslate_map_sg(dev, sglist, nelems, dir); 3881 + if (!iommu_need_mapping(dev)) 3882 + return dma_direct_map_sg(dev, sglist, nelems, dir, attrs); 3884 3883 3885 3884 domain = get_valid_domain_for_dev(dev); 3886 3885 if (!domain) ··· 3930 3929 .map_page = intel_map_page, 3931 3930 .unmap_page = intel_unmap_page, 3932 3931 .map_resource = intel_map_resource, 3933 - .unmap_resource = intel_unmap_page, 3932 + .unmap_resource = intel_unmap_resource, 3934 3933 .dma_supported = dma_direct_supported, 3935 3934 }; 3936 3935 ··· 4056 4055 4057 4056 /* This IOMMU has *only* gfx devices. Either bypass it or 4058 4057 set the gfx_mapped flag, as appropriate */ 4059 - if (dmar_map_gfx) { 4060 - intel_iommu_gfx_mapped = 1; 4061 - } else { 4058 + if (!dmar_map_gfx) { 4062 4059 drhd->ignored = 1; 4063 4060 for_each_active_dev_scope(drhd->devices, 4064 4061 drhd->devices_cnt, i, dev) ··· 4085 4086 iommu_disable_protect_mem_regions(iommu); 4086 4087 continue; 4087 4088 } 4088 - 4089 + 4089 4090 iommu_flush_write_buffer(iommu); 4090 4091 4091 4092 iommu_set_root_entry(iommu); ··· 4895 4896 goto out_free_reserved_range; 4896 4897 } 4897 4898 4899 + if (dmar_map_gfx) 4900 + intel_iommu_gfx_mapped = 1; 4901 + 4898 4902 init_no_remapping_devices(); 4899 4903 4900 4904 ret = init_dmars(); ··· 5067 5065 domain_exit(to_dmar_domain(domain)); 5068 5066 } 5069 5067 5070 - static int intel_iommu_attach_device(struct iommu_domain *domain, 5071 - struct device *dev) 5068 + /* 5069 + * Check whether a @domain could be attached to the @dev through the 5070 + * aux-domain attach/detach APIs. 5071 + */ 5072 + static inline bool 5073 + is_aux_domain(struct device *dev, struct iommu_domain *domain) 5074 + { 5075 + struct device_domain_info *info = dev->archdata.iommu; 5076 + 5077 + return info && info->auxd_enabled && 5078 + domain->type == IOMMU_DOMAIN_UNMANAGED; 5079 + } 5080 + 5081 + static void auxiliary_link_device(struct dmar_domain *domain, 5082 + struct device *dev) 5083 + { 5084 + struct device_domain_info *info = dev->archdata.iommu; 5085 + 5086 + assert_spin_locked(&device_domain_lock); 5087 + if (WARN_ON(!info)) 5088 + return; 5089 + 5090 + domain->auxd_refcnt++; 5091 + list_add(&domain->auxd, &info->auxiliary_domains); 5092 + } 5093 + 5094 + static void auxiliary_unlink_device(struct dmar_domain *domain, 5095 + struct device *dev) 5096 + { 5097 + struct device_domain_info *info = dev->archdata.iommu; 5098 + 5099 + assert_spin_locked(&device_domain_lock); 5100 + if (WARN_ON(!info)) 5101 + return; 5102 + 5103 + list_del(&domain->auxd); 5104 + domain->auxd_refcnt--; 5105 + 5106 + if (!domain->auxd_refcnt && domain->default_pasid > 0) 5107 + intel_pasid_free_id(domain->default_pasid); 5108 + } 5109 + 5110 + static int aux_domain_add_dev(struct dmar_domain *domain, 5111 + struct device *dev) 5112 + { 5113 + int ret; 5114 + u8 bus, devfn; 5115 + unsigned long flags; 5116 + struct intel_iommu *iommu; 5117 + 5118 + iommu = device_to_iommu(dev, &bus, &devfn); 5119 + if (!iommu) 5120 + return -ENODEV; 5121 + 5122 + if (domain->default_pasid <= 0) { 5123 + int pasid; 5124 + 5125 + pasid = intel_pasid_alloc_id(domain, PASID_MIN, 5126 + pci_max_pasids(to_pci_dev(dev)), 5127 + GFP_KERNEL); 5128 + if (pasid <= 0) { 5129 + pr_err("Can't allocate default pasid\n"); 5130 + return -ENODEV; 5131 + } 5132 + domain->default_pasid = pasid; 5133 + } 5134 + 5135 + spin_lock_irqsave(&device_domain_lock, flags); 5136 + /* 5137 + * iommu->lock must be held to attach domain to iommu and setup the 5138 + * pasid entry for second level translation. 5139 + */ 5140 + spin_lock(&iommu->lock); 5141 + ret = domain_attach_iommu(domain, iommu); 5142 + if (ret) 5143 + goto attach_failed; 5144 + 5145 + /* Setup the PASID entry for mediated devices: */ 5146 + ret = intel_pasid_setup_second_level(iommu, domain, dev, 5147 + domain->default_pasid); 5148 + if (ret) 5149 + goto table_failed; 5150 + spin_unlock(&iommu->lock); 5151 + 5152 + auxiliary_link_device(domain, dev); 5153 + 5154 + spin_unlock_irqrestore(&device_domain_lock, flags); 5155 + 5156 + return 0; 5157 + 5158 + table_failed: 5159 + domain_detach_iommu(domain, iommu); 5160 + attach_failed: 5161 + spin_unlock(&iommu->lock); 5162 + spin_unlock_irqrestore(&device_domain_lock, flags); 5163 + if (!domain->auxd_refcnt && domain->default_pasid > 0) 5164 + intel_pasid_free_id(domain->default_pasid); 5165 + 5166 + return ret; 5167 + } 5168 + 5169 + static void aux_domain_remove_dev(struct dmar_domain *domain, 5170 + struct device *dev) 5171 + { 5172 + struct device_domain_info *info; 5173 + struct intel_iommu *iommu; 5174 + unsigned long flags; 5175 + 5176 + if (!is_aux_domain(dev, &domain->domain)) 5177 + return; 5178 + 5179 + spin_lock_irqsave(&device_domain_lock, flags); 5180 + info = dev->archdata.iommu; 5181 + iommu = info->iommu; 5182 + 5183 + auxiliary_unlink_device(domain, dev); 5184 + 5185 + spin_lock(&iommu->lock); 5186 + intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid); 5187 + domain_detach_iommu(domain, iommu); 5188 + spin_unlock(&iommu->lock); 5189 + 5190 + spin_unlock_irqrestore(&device_domain_lock, flags); 5191 + } 5192 + 5193 + static int prepare_domain_attach_device(struct iommu_domain *domain, 5194 + struct device *dev) 5072 5195 { 5073 5196 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 5074 5197 struct intel_iommu *iommu; 5075 5198 int addr_width; 5076 5199 u8 bus, devfn; 5077 - 5078 - if (device_is_rmrr_locked(dev)) { 5079 - dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n"); 5080 - return -EPERM; 5081 - } 5082 - 5083 - /* normally dev is not mapped */ 5084 - if (unlikely(domain_context_mapped(dev))) { 5085 - struct dmar_domain *old_domain; 5086 - 5087 - old_domain = find_domain(dev); 5088 - if (old_domain) { 5089 - rcu_read_lock(); 5090 - dmar_remove_one_dev_info(dev); 5091 - rcu_read_unlock(); 5092 - 5093 - if (!domain_type_is_vm_or_si(old_domain) && 5094 - list_empty(&old_domain->devices)) 5095 - domain_exit(old_domain); 5096 - } 5097 - } 5098 5200 5099 5201 iommu = device_to_iommu(dev, &bus, &devfn); 5100 5202 if (!iommu) ··· 5232 5126 dmar_domain->agaw--; 5233 5127 } 5234 5128 5235 - return domain_add_dev_info(dmar_domain, dev); 5129 + return 0; 5130 + } 5131 + 5132 + static int intel_iommu_attach_device(struct iommu_domain *domain, 5133 + struct device *dev) 5134 + { 5135 + int ret; 5136 + 5137 + if (device_is_rmrr_locked(dev)) { 5138 + dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n"); 5139 + return -EPERM; 5140 + } 5141 + 5142 + if (is_aux_domain(dev, domain)) 5143 + return -EPERM; 5144 + 5145 + /* normally dev is not mapped */ 5146 + if (unlikely(domain_context_mapped(dev))) { 5147 + struct dmar_domain *old_domain; 5148 + 5149 + old_domain = find_domain(dev); 5150 + if (old_domain) { 5151 + rcu_read_lock(); 5152 + dmar_remove_one_dev_info(dev); 5153 + rcu_read_unlock(); 5154 + 5155 + if (!domain_type_is_vm_or_si(old_domain) && 5156 + list_empty(&old_domain->devices)) 5157 + domain_exit(old_domain); 5158 + } 5159 + } 5160 + 5161 + ret = prepare_domain_attach_device(domain, dev); 5162 + if (ret) 5163 + return ret; 5164 + 5165 + return domain_add_dev_info(to_dmar_domain(domain), dev); 5166 + } 5167 + 5168 + static int intel_iommu_aux_attach_device(struct iommu_domain *domain, 5169 + struct device *dev) 5170 + { 5171 + int ret; 5172 + 5173 + if (!is_aux_domain(dev, domain)) 5174 + return -EPERM; 5175 + 5176 + ret = prepare_domain_attach_device(domain, dev); 5177 + if (ret) 5178 + return ret; 5179 + 5180 + return aux_domain_add_dev(to_dmar_domain(domain), dev); 5236 5181 } 5237 5182 5238 5183 static void intel_iommu_detach_device(struct iommu_domain *domain, 5239 5184 struct device *dev) 5240 5185 { 5241 5186 dmar_remove_one_dev_info(dev); 5187 + } 5188 + 5189 + static void intel_iommu_aux_detach_device(struct iommu_domain *domain, 5190 + struct device *dev) 5191 + { 5192 + aux_domain_remove_dev(to_dmar_domain(domain), dev); 5242 5193 } 5243 5194 5244 5195 static int intel_iommu_map(struct iommu_domain *domain, ··· 5386 5223 return phys; 5387 5224 } 5388 5225 5226 + static inline bool scalable_mode_support(void) 5227 + { 5228 + struct dmar_drhd_unit *drhd; 5229 + struct intel_iommu *iommu; 5230 + bool ret = true; 5231 + 5232 + rcu_read_lock(); 5233 + for_each_active_iommu(iommu, drhd) { 5234 + if (!sm_supported(iommu)) { 5235 + ret = false; 5236 + break; 5237 + } 5238 + } 5239 + rcu_read_unlock(); 5240 + 5241 + return ret; 5242 + } 5243 + 5244 + static inline bool iommu_pasid_support(void) 5245 + { 5246 + struct dmar_drhd_unit *drhd; 5247 + struct intel_iommu *iommu; 5248 + bool ret = true; 5249 + 5250 + rcu_read_lock(); 5251 + for_each_active_iommu(iommu, drhd) { 5252 + if (!pasid_supported(iommu)) { 5253 + ret = false; 5254 + break; 5255 + } 5256 + } 5257 + rcu_read_unlock(); 5258 + 5259 + return ret; 5260 + } 5261 + 5389 5262 static bool intel_iommu_capable(enum iommu_cap cap) 5390 5263 { 5391 5264 if (cap == IOMMU_CAP_CACHE_COHERENCY) ··· 5506 5307 } 5507 5308 } 5508 5309 5509 - #ifdef CONFIG_INTEL_IOMMU_SVM 5510 - int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev) 5310 + int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev) 5511 5311 { 5512 5312 struct device_domain_info *info; 5513 5313 struct context_entry *context; ··· 5515 5317 u64 ctx_lo; 5516 5318 int ret; 5517 5319 5518 - domain = get_valid_domain_for_dev(sdev->dev); 5320 + domain = get_valid_domain_for_dev(dev); 5519 5321 if (!domain) 5520 5322 return -EINVAL; 5521 5323 ··· 5523 5325 spin_lock(&iommu->lock); 5524 5326 5525 5327 ret = -EINVAL; 5526 - info = sdev->dev->archdata.iommu; 5328 + info = dev->archdata.iommu; 5527 5329 if (!info || !info->pasid_supported) 5528 5330 goto out; 5529 5331 ··· 5533 5335 5534 5336 ctx_lo = context[0].lo; 5535 5337 5536 - sdev->did = FLPT_DEFAULT_DID; 5537 - sdev->sid = PCI_DEVID(info->bus, info->devfn); 5538 - 5539 5338 if (!(ctx_lo & CONTEXT_PASIDE)) { 5540 5339 ctx_lo |= CONTEXT_PASIDE; 5541 5340 context[0].lo = ctx_lo; 5542 5341 wmb(); 5543 - iommu->flush.flush_context(iommu, sdev->did, sdev->sid, 5342 + iommu->flush.flush_context(iommu, 5343 + domain->iommu_did[iommu->seq_id], 5344 + PCI_DEVID(info->bus, info->devfn), 5544 5345 DMA_CCMD_MASK_NOBIT, 5545 5346 DMA_CCMD_DEVICE_INVL); 5546 5347 } ··· 5548 5351 if (!info->pasid_enabled) 5549 5352 iommu_enable_dev_iotlb(info); 5550 5353 5551 - if (info->ats_enabled) { 5552 - sdev->dev_iotlb = 1; 5553 - sdev->qdep = info->ats_qdep; 5554 - if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS) 5555 - sdev->qdep = 0; 5556 - } 5557 5354 ret = 0; 5558 5355 5559 5356 out: ··· 5557 5366 return ret; 5558 5367 } 5559 5368 5369 + #ifdef CONFIG_INTEL_IOMMU_SVM 5560 5370 struct intel_iommu *intel_svm_device_to_iommu(struct device *dev) 5561 5371 { 5562 5372 struct intel_iommu *iommu; ··· 5579 5387 } 5580 5388 #endif /* CONFIG_INTEL_IOMMU_SVM */ 5581 5389 5390 + static int intel_iommu_enable_auxd(struct device *dev) 5391 + { 5392 + struct device_domain_info *info; 5393 + struct intel_iommu *iommu; 5394 + unsigned long flags; 5395 + u8 bus, devfn; 5396 + int ret; 5397 + 5398 + iommu = device_to_iommu(dev, &bus, &devfn); 5399 + if (!iommu || dmar_disabled) 5400 + return -EINVAL; 5401 + 5402 + if (!sm_supported(iommu) || !pasid_supported(iommu)) 5403 + return -EINVAL; 5404 + 5405 + ret = intel_iommu_enable_pasid(iommu, dev); 5406 + if (ret) 5407 + return -ENODEV; 5408 + 5409 + spin_lock_irqsave(&device_domain_lock, flags); 5410 + info = dev->archdata.iommu; 5411 + info->auxd_enabled = 1; 5412 + spin_unlock_irqrestore(&device_domain_lock, flags); 5413 + 5414 + return 0; 5415 + } 5416 + 5417 + static int intel_iommu_disable_auxd(struct device *dev) 5418 + { 5419 + struct device_domain_info *info; 5420 + unsigned long flags; 5421 + 5422 + spin_lock_irqsave(&device_domain_lock, flags); 5423 + info = dev->archdata.iommu; 5424 + if (!WARN_ON(!info)) 5425 + info->auxd_enabled = 0; 5426 + spin_unlock_irqrestore(&device_domain_lock, flags); 5427 + 5428 + return 0; 5429 + } 5430 + 5431 + /* 5432 + * A PCI express designated vendor specific extended capability is defined 5433 + * in the section 3.7 of Intel scalable I/O virtualization technical spec 5434 + * for system software and tools to detect endpoint devices supporting the 5435 + * Intel scalable IO virtualization without host driver dependency. 5436 + * 5437 + * Returns the address of the matching extended capability structure within 5438 + * the device's PCI configuration space or 0 if the device does not support 5439 + * it. 5440 + */ 5441 + static int siov_find_pci_dvsec(struct pci_dev *pdev) 5442 + { 5443 + int pos; 5444 + u16 vendor, id; 5445 + 5446 + pos = pci_find_next_ext_capability(pdev, 0, 0x23); 5447 + while (pos) { 5448 + pci_read_config_word(pdev, pos + 4, &vendor); 5449 + pci_read_config_word(pdev, pos + 8, &id); 5450 + if (vendor == PCI_VENDOR_ID_INTEL && id == 5) 5451 + return pos; 5452 + 5453 + pos = pci_find_next_ext_capability(pdev, pos, 0x23); 5454 + } 5455 + 5456 + return 0; 5457 + } 5458 + 5459 + static bool 5460 + intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat) 5461 + { 5462 + if (feat == IOMMU_DEV_FEAT_AUX) { 5463 + int ret; 5464 + 5465 + if (!dev_is_pci(dev) || dmar_disabled || 5466 + !scalable_mode_support() || !iommu_pasid_support()) 5467 + return false; 5468 + 5469 + ret = pci_pasid_features(to_pci_dev(dev)); 5470 + if (ret < 0) 5471 + return false; 5472 + 5473 + return !!siov_find_pci_dvsec(to_pci_dev(dev)); 5474 + } 5475 + 5476 + return false; 5477 + } 5478 + 5479 + static int 5480 + intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) 5481 + { 5482 + if (feat == IOMMU_DEV_FEAT_AUX) 5483 + return intel_iommu_enable_auxd(dev); 5484 + 5485 + return -ENODEV; 5486 + } 5487 + 5488 + static int 5489 + intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat) 5490 + { 5491 + if (feat == IOMMU_DEV_FEAT_AUX) 5492 + return intel_iommu_disable_auxd(dev); 5493 + 5494 + return -ENODEV; 5495 + } 5496 + 5497 + static bool 5498 + intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat) 5499 + { 5500 + struct device_domain_info *info = dev->archdata.iommu; 5501 + 5502 + if (feat == IOMMU_DEV_FEAT_AUX) 5503 + return scalable_mode_support() && info && info->auxd_enabled; 5504 + 5505 + return false; 5506 + } 5507 + 5508 + static int 5509 + intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev) 5510 + { 5511 + struct dmar_domain *dmar_domain = to_dmar_domain(domain); 5512 + 5513 + return dmar_domain->default_pasid > 0 ? 5514 + dmar_domain->default_pasid : -EINVAL; 5515 + } 5516 + 5582 5517 const struct iommu_ops intel_iommu_ops = { 5583 5518 .capable = intel_iommu_capable, 5584 5519 .domain_alloc = intel_iommu_domain_alloc, 5585 5520 .domain_free = intel_iommu_domain_free, 5586 5521 .attach_dev = intel_iommu_attach_device, 5587 5522 .detach_dev = intel_iommu_detach_device, 5523 + .aux_attach_dev = intel_iommu_aux_attach_device, 5524 + .aux_detach_dev = intel_iommu_aux_detach_device, 5525 + .aux_get_pasid = intel_iommu_aux_get_pasid, 5588 5526 .map = intel_iommu_map, 5589 5527 .unmap = intel_iommu_unmap, 5590 5528 .iova_to_phys = intel_iommu_iova_to_phys, ··· 5723 5401 .get_resv_regions = intel_iommu_get_resv_regions, 5724 5402 .put_resv_regions = intel_iommu_put_resv_regions, 5725 5403 .device_group = pci_device_group, 5404 + .dev_has_feat = intel_iommu_dev_has_feat, 5405 + .dev_feat_enabled = intel_iommu_dev_feat_enabled, 5406 + .dev_enable_feat = intel_iommu_dev_enable_feat, 5407 + .dev_disable_feat = intel_iommu_dev_disable_feat, 5726 5408 .pgsize_bitmap = INTEL_IOMMU_PGSIZES, 5727 5409 }; 5728 5410
+3 -1
drivers/iommu/intel-pasid.c
··· 154 154 order = size ? get_order(size) : 0; 155 155 pages = alloc_pages_node(info->iommu->node, 156 156 GFP_KERNEL | __GFP_ZERO, order); 157 - if (!pages) 157 + if (!pages) { 158 + kfree(pasid_table); 158 159 return -ENOMEM; 160 + } 159 161 160 162 pasid_table->table = page_address(pages); 161 163 pasid_table->order = order;
+18 -1
drivers/iommu/intel-svm.c
··· 228 228 int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops) 229 229 { 230 230 struct intel_iommu *iommu = intel_svm_device_to_iommu(dev); 231 + struct device_domain_info *info; 231 232 struct intel_svm_dev *sdev; 232 233 struct intel_svm *svm = NULL; 233 234 struct mm_struct *mm = NULL; ··· 292 291 } 293 292 sdev->dev = dev; 294 293 295 - ret = intel_iommu_enable_pasid(iommu, sdev); 294 + ret = intel_iommu_enable_pasid(iommu, dev); 296 295 if (ret || !pasid) { 297 296 /* If they don't actually want to assign a PASID, this is 298 297 * just an enabling check/preparation. */ 299 298 kfree(sdev); 300 299 goto out; 301 300 } 301 + 302 + info = dev->archdata.iommu; 303 + if (!info || !info->pasid_supported) { 304 + kfree(sdev); 305 + goto out; 306 + } 307 + 308 + sdev->did = FLPT_DEFAULT_DID; 309 + sdev->sid = PCI_DEVID(info->bus, info->devfn); 310 + if (info->ats_enabled) { 311 + sdev->dev_iotlb = 1; 312 + sdev->qdep = info->ats_qdep; 313 + if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS) 314 + sdev->qdep = 0; 315 + } 316 + 302 317 /* Finish the setup now we know we're keeping it */ 303 318 sdev->users = 1; 304 319 sdev->ops = ops;
+3 -4
drivers/iommu/intel_irq_remapping.c
··· 548 548 goto out_free_table; 549 549 } 550 550 551 - bitmap = kcalloc(BITS_TO_LONGS(INTR_REMAP_TABLE_ENTRIES), 552 - sizeof(long), GFP_ATOMIC); 551 + bitmap = bitmap_zalloc(INTR_REMAP_TABLE_ENTRIES, GFP_ATOMIC); 553 552 if (bitmap == NULL) { 554 553 pr_err("IR%d: failed to allocate bitmap\n", iommu->seq_id); 555 554 goto out_free_pages; ··· 615 616 return 0; 616 617 617 618 out_free_bitmap: 618 - kfree(bitmap); 619 + bitmap_free(bitmap); 619 620 out_free_pages: 620 621 __free_pages(pages, INTR_REMAP_PAGE_ORDER); 621 622 out_free_table: ··· 639 640 } 640 641 free_pages((unsigned long)iommu->ir_table->base, 641 642 INTR_REMAP_PAGE_ORDER); 642 - kfree(iommu->ir_table->bitmap); 643 + bitmap_free(iommu->ir_table->bitmap); 643 644 kfree(iommu->ir_table); 644 645 iommu->ir_table = NULL; 645 646 }
+202 -9
drivers/iommu/iommu.c
··· 45 45 #endif 46 46 static bool iommu_dma_strict __read_mostly = true; 47 47 48 - struct iommu_callback_data { 49 - const struct iommu_ops *ops; 50 - }; 51 - 52 48 struct iommu_group { 53 49 struct kobject kobj; 54 50 struct kobject *devices_kobj; ··· 1213 1217 { 1214 1218 int err; 1215 1219 struct notifier_block *nb; 1216 - struct iommu_callback_data cb = { 1217 - .ops = ops, 1218 - }; 1219 1220 1220 1221 nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL); 1221 1222 if (!nb) ··· 1224 1231 if (err) 1225 1232 goto out_free; 1226 1233 1227 - err = bus_for_each_dev(bus, NULL, &cb, add_iommu_group); 1234 + err = bus_for_each_dev(bus, NULL, NULL, add_iommu_group); 1228 1235 if (err) 1229 1236 goto out_err; 1230 1237 ··· 1233 1240 1234 1241 out_err: 1235 1242 /* Clean up */ 1236 - bus_for_each_dev(bus, NULL, &cb, remove_iommu_group); 1243 + bus_for_each_dev(bus, NULL, NULL, remove_iommu_group); 1237 1244 bus_unregister_notifier(bus, nb); 1238 1245 1239 1246 out_free: ··· 2032 2039 return 0; 2033 2040 } 2034 2041 EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids); 2042 + 2043 + /* 2044 + * Per device IOMMU features. 2045 + */ 2046 + bool iommu_dev_has_feature(struct device *dev, enum iommu_dev_features feat) 2047 + { 2048 + const struct iommu_ops *ops = dev->bus->iommu_ops; 2049 + 2050 + if (ops && ops->dev_has_feat) 2051 + return ops->dev_has_feat(dev, feat); 2052 + 2053 + return false; 2054 + } 2055 + EXPORT_SYMBOL_GPL(iommu_dev_has_feature); 2056 + 2057 + int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) 2058 + { 2059 + const struct iommu_ops *ops = dev->bus->iommu_ops; 2060 + 2061 + if (ops && ops->dev_enable_feat) 2062 + return ops->dev_enable_feat(dev, feat); 2063 + 2064 + return -ENODEV; 2065 + } 2066 + EXPORT_SYMBOL_GPL(iommu_dev_enable_feature); 2067 + 2068 + /* 2069 + * The device drivers should do the necessary cleanups before calling this. 2070 + * For example, before disabling the aux-domain feature, the device driver 2071 + * should detach all aux-domains. Otherwise, this will return -EBUSY. 2072 + */ 2073 + int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) 2074 + { 2075 + const struct iommu_ops *ops = dev->bus->iommu_ops; 2076 + 2077 + if (ops && ops->dev_disable_feat) 2078 + return ops->dev_disable_feat(dev, feat); 2079 + 2080 + return -EBUSY; 2081 + } 2082 + EXPORT_SYMBOL_GPL(iommu_dev_disable_feature); 2083 + 2084 + bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features feat) 2085 + { 2086 + const struct iommu_ops *ops = dev->bus->iommu_ops; 2087 + 2088 + if (ops && ops->dev_feat_enabled) 2089 + return ops->dev_feat_enabled(dev, feat); 2090 + 2091 + return false; 2092 + } 2093 + EXPORT_SYMBOL_GPL(iommu_dev_feature_enabled); 2094 + 2095 + /* 2096 + * Aux-domain specific attach/detach. 2097 + * 2098 + * Only works if iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX) returns 2099 + * true. Also, as long as domains are attached to a device through this 2100 + * interface, any tries to call iommu_attach_device() should fail 2101 + * (iommu_detach_device() can't fail, so we fail when trying to re-attach). 2102 + * This should make us safe against a device being attached to a guest as a 2103 + * whole while there are still pasid users on it (aux and sva). 2104 + */ 2105 + int iommu_aux_attach_device(struct iommu_domain *domain, struct device *dev) 2106 + { 2107 + int ret = -ENODEV; 2108 + 2109 + if (domain->ops->aux_attach_dev) 2110 + ret = domain->ops->aux_attach_dev(domain, dev); 2111 + 2112 + if (!ret) 2113 + trace_attach_device_to_domain(dev); 2114 + 2115 + return ret; 2116 + } 2117 + EXPORT_SYMBOL_GPL(iommu_aux_attach_device); 2118 + 2119 + void iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev) 2120 + { 2121 + if (domain->ops->aux_detach_dev) { 2122 + domain->ops->aux_detach_dev(domain, dev); 2123 + trace_detach_device_from_domain(dev); 2124 + } 2125 + } 2126 + EXPORT_SYMBOL_GPL(iommu_aux_detach_device); 2127 + 2128 + int iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev) 2129 + { 2130 + int ret = -ENODEV; 2131 + 2132 + if (domain->ops->aux_get_pasid) 2133 + ret = domain->ops->aux_get_pasid(domain, dev); 2134 + 2135 + return ret; 2136 + } 2137 + EXPORT_SYMBOL_GPL(iommu_aux_get_pasid); 2138 + 2139 + /** 2140 + * iommu_sva_bind_device() - Bind a process address space to a device 2141 + * @dev: the device 2142 + * @mm: the mm to bind, caller must hold a reference to it 2143 + * 2144 + * Create a bond between device and address space, allowing the device to access 2145 + * the mm using the returned PASID. If a bond already exists between @device and 2146 + * @mm, it is returned and an additional reference is taken. Caller must call 2147 + * iommu_sva_unbind_device() to release each reference. 2148 + * 2149 + * iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA) must be called first, to 2150 + * initialize the required SVA features. 2151 + * 2152 + * On error, returns an ERR_PTR value. 2153 + */ 2154 + struct iommu_sva * 2155 + iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata) 2156 + { 2157 + struct iommu_group *group; 2158 + struct iommu_sva *handle = ERR_PTR(-EINVAL); 2159 + const struct iommu_ops *ops = dev->bus->iommu_ops; 2160 + 2161 + if (!ops || !ops->sva_bind) 2162 + return ERR_PTR(-ENODEV); 2163 + 2164 + group = iommu_group_get(dev); 2165 + if (!group) 2166 + return ERR_PTR(-ENODEV); 2167 + 2168 + /* Ensure device count and domain don't change while we're binding */ 2169 + mutex_lock(&group->mutex); 2170 + 2171 + /* 2172 + * To keep things simple, SVA currently doesn't support IOMMU groups 2173 + * with more than one device. Existing SVA-capable systems are not 2174 + * affected by the problems that required IOMMU groups (lack of ACS 2175 + * isolation, device ID aliasing and other hardware issues). 2176 + */ 2177 + if (iommu_group_device_count(group) != 1) 2178 + goto out_unlock; 2179 + 2180 + handle = ops->sva_bind(dev, mm, drvdata); 2181 + 2182 + out_unlock: 2183 + mutex_unlock(&group->mutex); 2184 + iommu_group_put(group); 2185 + 2186 + return handle; 2187 + } 2188 + EXPORT_SYMBOL_GPL(iommu_sva_bind_device); 2189 + 2190 + /** 2191 + * iommu_sva_unbind_device() - Remove a bond created with iommu_sva_bind_device 2192 + * @handle: the handle returned by iommu_sva_bind_device() 2193 + * 2194 + * Put reference to a bond between device and address space. The device should 2195 + * not be issuing any more transaction for this PASID. All outstanding page 2196 + * requests for this PASID must have been flushed to the IOMMU. 2197 + * 2198 + * Returns 0 on success, or an error value 2199 + */ 2200 + void iommu_sva_unbind_device(struct iommu_sva *handle) 2201 + { 2202 + struct iommu_group *group; 2203 + struct device *dev = handle->dev; 2204 + const struct iommu_ops *ops = dev->bus->iommu_ops; 2205 + 2206 + if (!ops || !ops->sva_unbind) 2207 + return; 2208 + 2209 + group = iommu_group_get(dev); 2210 + if (!group) 2211 + return; 2212 + 2213 + mutex_lock(&group->mutex); 2214 + ops->sva_unbind(handle); 2215 + mutex_unlock(&group->mutex); 2216 + 2217 + iommu_group_put(group); 2218 + } 2219 + EXPORT_SYMBOL_GPL(iommu_sva_unbind_device); 2220 + 2221 + int iommu_sva_set_ops(struct iommu_sva *handle, 2222 + const struct iommu_sva_ops *sva_ops) 2223 + { 2224 + if (handle->ops && handle->ops != sva_ops) 2225 + return -EEXIST; 2226 + 2227 + handle->ops = sva_ops; 2228 + return 0; 2229 + } 2230 + EXPORT_SYMBOL_GPL(iommu_sva_set_ops); 2231 + 2232 + int iommu_sva_get_pasid(struct iommu_sva *handle) 2233 + { 2234 + const struct iommu_ops *ops = handle->dev->bus->iommu_ops; 2235 + 2236 + if (!ops || !ops->sva_get_pasid) 2237 + return IOMMU_PASID_INVALID; 2238 + 2239 + return ops->sva_get_pasid(handle); 2240 + } 2241 + EXPORT_SYMBOL_GPL(iommu_sva_get_pasid);
+6 -2
drivers/iommu/mtk_iommu.c
··· 632 632 if (!larbnode) 633 633 return -EINVAL; 634 634 635 - if (!of_device_is_available(larbnode)) 635 + if (!of_device_is_available(larbnode)) { 636 + of_node_put(larbnode); 636 637 continue; 638 + } 637 639 638 640 ret = of_property_read_u32(larbnode, "mediatek,larb-id", &id); 639 641 if (ret)/* The id is consecutive if there is no this property */ 640 642 id = i; 641 643 642 644 plarbdev = of_find_device_by_node(larbnode); 643 - if (!plarbdev) 645 + if (!plarbdev) { 646 + of_node_put(larbnode); 644 647 return -EPROBE_DEFER; 648 + } 645 649 data->smi_imu.larb_imu[id].dev = &plarbdev->dev; 646 650 647 651 component_match_add_release(dev, &match, release_of,
+31 -10
drivers/iommu/tegra-smmu.c
··· 102 102 #define SMMU_TLB_FLUSH_VA_MATCH_ALL (0 << 0) 103 103 #define SMMU_TLB_FLUSH_VA_MATCH_SECTION (2 << 0) 104 104 #define SMMU_TLB_FLUSH_VA_MATCH_GROUP (3 << 0) 105 - #define SMMU_TLB_FLUSH_ASID(x) (((x) & 0x7f) << 24) 106 105 #define SMMU_TLB_FLUSH_VA_SECTION(addr) ((((addr) & 0xffc00000) >> 12) | \ 107 106 SMMU_TLB_FLUSH_VA_MATCH_SECTION) 108 107 #define SMMU_TLB_FLUSH_VA_GROUP(addr) ((((addr) & 0xffffc000) >> 12) | \ ··· 145 146 146 147 #define SMMU_PDE_ATTR (SMMU_PDE_READABLE | SMMU_PDE_WRITABLE | \ 147 148 SMMU_PDE_NONSECURE) 148 - #define SMMU_PTE_ATTR (SMMU_PTE_READABLE | SMMU_PTE_WRITABLE | \ 149 - SMMU_PTE_NONSECURE) 150 149 151 150 static unsigned int iova_pd_index(unsigned long iova) 152 151 { ··· 202 205 { 203 206 u32 value; 204 207 205 - value = SMMU_TLB_FLUSH_ASID_MATCH | SMMU_TLB_FLUSH_ASID(asid) | 206 - SMMU_TLB_FLUSH_VA_MATCH_ALL; 208 + if (smmu->soc->num_asids == 4) 209 + value = (asid & 0x3) << 29; 210 + else 211 + value = (asid & 0x7f) << 24; 212 + 213 + value |= SMMU_TLB_FLUSH_ASID_MATCH | SMMU_TLB_FLUSH_VA_MATCH_ALL; 207 214 smmu_writel(smmu, value, SMMU_TLB_FLUSH); 208 215 } 209 216 ··· 217 216 { 218 217 u32 value; 219 218 220 - value = SMMU_TLB_FLUSH_ASID_MATCH | SMMU_TLB_FLUSH_ASID(asid) | 221 - SMMU_TLB_FLUSH_VA_SECTION(iova); 219 + if (smmu->soc->num_asids == 4) 220 + value = (asid & 0x3) << 29; 221 + else 222 + value = (asid & 0x7f) << 24; 223 + 224 + value |= SMMU_TLB_FLUSH_ASID_MATCH | SMMU_TLB_FLUSH_VA_SECTION(iova); 222 225 smmu_writel(smmu, value, SMMU_TLB_FLUSH); 223 226 } 224 227 ··· 232 227 { 233 228 u32 value; 234 229 235 - value = SMMU_TLB_FLUSH_ASID_MATCH | SMMU_TLB_FLUSH_ASID(asid) | 236 - SMMU_TLB_FLUSH_VA_GROUP(iova); 230 + if (smmu->soc->num_asids == 4) 231 + value = (asid & 0x3) << 29; 232 + else 233 + value = (asid & 0x7f) << 24; 234 + 235 + value |= SMMU_TLB_FLUSH_ASID_MATCH | SMMU_TLB_FLUSH_VA_GROUP(iova); 237 236 smmu_writel(smmu, value, SMMU_TLB_FLUSH); 238 237 } 239 238 ··· 325 316 326 317 /* TODO: free page directory and page tables */ 327 318 319 + WARN_ON_ONCE(as->use_count); 320 + kfree(as->count); 321 + kfree(as->pts); 328 322 kfree(as); 329 323 } 330 324 ··· 657 645 { 658 646 struct tegra_smmu_as *as = to_smmu_as(domain); 659 647 dma_addr_t pte_dma; 648 + u32 pte_attrs; 660 649 u32 *pte; 661 650 662 651 pte = as_get_pte(as, iova, &pte_dma); ··· 668 655 if (*pte == 0) 669 656 tegra_smmu_pte_get_use(as, iova); 670 657 658 + pte_attrs = SMMU_PTE_NONSECURE; 659 + 660 + if (prot & IOMMU_READ) 661 + pte_attrs |= SMMU_PTE_READABLE; 662 + 663 + if (prot & IOMMU_WRITE) 664 + pte_attrs |= SMMU_PTE_WRITABLE; 665 + 671 666 tegra_smmu_set_pte(as, iova, pte, pte_dma, 672 - __phys_to_pfn(paddr) | SMMU_PTE_ATTR); 667 + __phys_to_pfn(paddr) | pte_attrs); 673 668 674 669 return 0; 675 670 }
+18
drivers/vfio/mdev/mdev_core.c
··· 388 388 return 0; 389 389 } 390 390 391 + int mdev_set_iommu_device(struct device *dev, struct device *iommu_device) 392 + { 393 + struct mdev_device *mdev = to_mdev_device(dev); 394 + 395 + mdev->iommu_device = iommu_device; 396 + 397 + return 0; 398 + } 399 + EXPORT_SYMBOL(mdev_set_iommu_device); 400 + 401 + struct device *mdev_get_iommu_device(struct device *dev) 402 + { 403 + struct mdev_device *mdev = to_mdev_device(dev); 404 + 405 + return mdev->iommu_device; 406 + } 407 + EXPORT_SYMBOL(mdev_get_iommu_device); 408 + 391 409 static int __init mdev_init(void) 392 410 { 393 411 return mdev_bus_register();
+1
drivers/vfio/mdev/mdev_private.h
··· 32 32 void *driver_data; 33 33 struct list_head next; 34 34 struct kobject *type_kobj; 35 + struct device *iommu_device; 35 36 bool active; 36 37 }; 37 38
+119 -20
drivers/vfio/vfio_iommu_type1.c
··· 97 97 struct vfio_group { 98 98 struct iommu_group *iommu_group; 99 99 struct list_head next; 100 + bool mdev_group; /* An mdev group */ 100 101 }; 101 102 102 103 /* ··· 565 564 mutex_lock(&iommu->lock); 566 565 567 566 /* Fail if notifier list is empty */ 568 - if ((!iommu->external_domain) || (!iommu->notifier.head)) { 567 + if (!iommu->notifier.head) { 569 568 ret = -EINVAL; 570 569 goto pin_done; 571 570 } ··· 646 645 return -EACCES; 647 646 648 647 mutex_lock(&iommu->lock); 649 - 650 - if (!iommu->external_domain) { 651 - mutex_unlock(&iommu->lock); 652 - return -EINVAL; 653 - } 654 648 655 649 do_accounting = !IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu); 656 650 for (i = 0; i < npage; i++) { ··· 1307 1311 return ret; 1308 1312 } 1309 1313 1314 + static struct device *vfio_mdev_get_iommu_device(struct device *dev) 1315 + { 1316 + struct device *(*fn)(struct device *dev); 1317 + struct device *iommu_device; 1318 + 1319 + fn = symbol_get(mdev_get_iommu_device); 1320 + if (fn) { 1321 + iommu_device = fn(dev); 1322 + symbol_put(mdev_get_iommu_device); 1323 + 1324 + return iommu_device; 1325 + } 1326 + 1327 + return NULL; 1328 + } 1329 + 1330 + static int vfio_mdev_attach_domain(struct device *dev, void *data) 1331 + { 1332 + struct iommu_domain *domain = data; 1333 + struct device *iommu_device; 1334 + 1335 + iommu_device = vfio_mdev_get_iommu_device(dev); 1336 + if (iommu_device) { 1337 + if (iommu_dev_feature_enabled(iommu_device, IOMMU_DEV_FEAT_AUX)) 1338 + return iommu_aux_attach_device(domain, iommu_device); 1339 + else 1340 + return iommu_attach_device(domain, iommu_device); 1341 + } 1342 + 1343 + return -EINVAL; 1344 + } 1345 + 1346 + static int vfio_mdev_detach_domain(struct device *dev, void *data) 1347 + { 1348 + struct iommu_domain *domain = data; 1349 + struct device *iommu_device; 1350 + 1351 + iommu_device = vfio_mdev_get_iommu_device(dev); 1352 + if (iommu_device) { 1353 + if (iommu_dev_feature_enabled(iommu_device, IOMMU_DEV_FEAT_AUX)) 1354 + iommu_aux_detach_device(domain, iommu_device); 1355 + else 1356 + iommu_detach_device(domain, iommu_device); 1357 + } 1358 + 1359 + return 0; 1360 + } 1361 + 1362 + static int vfio_iommu_attach_group(struct vfio_domain *domain, 1363 + struct vfio_group *group) 1364 + { 1365 + if (group->mdev_group) 1366 + return iommu_group_for_each_dev(group->iommu_group, 1367 + domain->domain, 1368 + vfio_mdev_attach_domain); 1369 + else 1370 + return iommu_attach_group(domain->domain, group->iommu_group); 1371 + } 1372 + 1373 + static void vfio_iommu_detach_group(struct vfio_domain *domain, 1374 + struct vfio_group *group) 1375 + { 1376 + if (group->mdev_group) 1377 + iommu_group_for_each_dev(group->iommu_group, domain->domain, 1378 + vfio_mdev_detach_domain); 1379 + else 1380 + iommu_detach_group(domain->domain, group->iommu_group); 1381 + } 1382 + 1383 + static bool vfio_bus_is_mdev(struct bus_type *bus) 1384 + { 1385 + struct bus_type *mdev_bus; 1386 + bool ret = false; 1387 + 1388 + mdev_bus = symbol_get(mdev_bus_type); 1389 + if (mdev_bus) { 1390 + ret = (bus == mdev_bus); 1391 + symbol_put(mdev_bus_type); 1392 + } 1393 + 1394 + return ret; 1395 + } 1396 + 1397 + static int vfio_mdev_iommu_device(struct device *dev, void *data) 1398 + { 1399 + struct device **old = data, *new; 1400 + 1401 + new = vfio_mdev_get_iommu_device(dev); 1402 + if (!new || (*old && *old != new)) 1403 + return -EINVAL; 1404 + 1405 + *old = new; 1406 + 1407 + return 0; 1408 + } 1409 + 1310 1410 static int vfio_iommu_type1_attach_group(void *iommu_data, 1311 1411 struct iommu_group *iommu_group) 1312 1412 { 1313 1413 struct vfio_iommu *iommu = iommu_data; 1314 1414 struct vfio_group *group; 1315 1415 struct vfio_domain *domain, *d; 1316 - struct bus_type *bus = NULL, *mdev_bus; 1416 + struct bus_type *bus = NULL; 1317 1417 int ret; 1318 1418 bool resv_msi, msi_remap; 1319 1419 phys_addr_t resv_msi_base; ··· 1444 1352 if (ret) 1445 1353 goto out_free; 1446 1354 1447 - mdev_bus = symbol_get(mdev_bus_type); 1355 + if (vfio_bus_is_mdev(bus)) { 1356 + struct device *iommu_device = NULL; 1448 1357 1449 - if (mdev_bus) { 1450 - if ((bus == mdev_bus) && !iommu_present(bus)) { 1451 - symbol_put(mdev_bus_type); 1358 + group->mdev_group = true; 1359 + 1360 + /* Determine the isolation type */ 1361 + ret = iommu_group_for_each_dev(iommu_group, &iommu_device, 1362 + vfio_mdev_iommu_device); 1363 + if (ret || !iommu_device) { 1452 1364 if (!iommu->external_domain) { 1453 1365 INIT_LIST_HEAD(&domain->group_list); 1454 1366 iommu->external_domain = domain; 1455 - } else 1367 + } else { 1456 1368 kfree(domain); 1369 + } 1457 1370 1458 1371 list_add(&group->next, 1459 1372 &iommu->external_domain->group_list); 1460 1373 mutex_unlock(&iommu->lock); 1374 + 1461 1375 return 0; 1462 1376 } 1463 - symbol_put(mdev_bus_type); 1377 + 1378 + bus = iommu_device->bus; 1464 1379 } 1465 1380 1466 1381 domain->domain = iommu_domain_alloc(bus); ··· 1485 1386 goto out_domain; 1486 1387 } 1487 1388 1488 - ret = iommu_attach_group(domain->domain, iommu_group); 1389 + ret = vfio_iommu_attach_group(domain, group); 1489 1390 if (ret) 1490 1391 goto out_domain; 1491 1392 ··· 1517 1418 list_for_each_entry(d, &iommu->domain_list, next) { 1518 1419 if (d->domain->ops == domain->domain->ops && 1519 1420 d->prot == domain->prot) { 1520 - iommu_detach_group(domain->domain, iommu_group); 1521 - if (!iommu_attach_group(d->domain, iommu_group)) { 1421 + vfio_iommu_detach_group(domain, group); 1422 + if (!vfio_iommu_attach_group(d, group)) { 1522 1423 list_add(&group->next, &d->group_list); 1523 1424 iommu_domain_free(domain->domain); 1524 1425 kfree(domain); ··· 1526 1427 return 0; 1527 1428 } 1528 1429 1529 - ret = iommu_attach_group(domain->domain, iommu_group); 1430 + ret = vfio_iommu_attach_group(domain, group); 1530 1431 if (ret) 1531 1432 goto out_domain; 1532 1433 } ··· 1552 1453 return 0; 1553 1454 1554 1455 out_detach: 1555 - iommu_detach_group(domain->domain, iommu_group); 1456 + vfio_iommu_detach_group(domain, group); 1556 1457 out_domain: 1557 1458 iommu_domain_free(domain->domain); 1558 1459 out_free: ··· 1643 1544 if (!group) 1644 1545 continue; 1645 1546 1646 - iommu_detach_group(domain->domain, iommu_group); 1547 + vfio_iommu_detach_group(domain, group); 1647 1548 list_del(&group->next); 1648 1549 kfree(group); 1649 1550 /* ··· 1709 1610 list_for_each_entry_safe(group, group_tmp, 1710 1611 &domain->group_list, next) { 1711 1612 if (!external) 1712 - iommu_detach_group(domain->domain, group->iommu_group); 1613 + vfio_iommu_detach_group(domain, group); 1713 1614 list_del(&group->next); 1714 1615 kfree(group); 1715 1616 }
+12 -1
include/linux/intel-iommu.h
··· 489 489 /* Domain ids per IOMMU. Use u16 since 490 490 * domain ids are 16 bit wide according 491 491 * to VT-d spec, section 9.3 */ 492 + unsigned int auxd_refcnt; /* Refcount of auxiliary attaching */ 492 493 493 494 bool has_iotlb_device; 494 495 struct list_head devices; /* all devices' list */ 496 + struct list_head auxd; /* link to device's auxiliary list */ 495 497 struct iova_domain iovad; /* iova's that belong to this domain */ 496 498 497 499 struct dma_pte *pgd; /* virtual address */ ··· 511 509 0 == 4KiB (no superpages), 1 == 2MiB, 512 510 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */ 513 511 u64 max_addr; /* maximum mapped address */ 512 + 513 + int default_pasid; /* 514 + * The default pasid used for non-SVM 515 + * traffic on mediated devices. 516 + */ 514 517 515 518 struct iommu_domain domain; /* generic domain data structure for 516 519 iommu core */ ··· 566 559 struct list_head link; /* link to domain siblings */ 567 560 struct list_head global; /* link to global list */ 568 561 struct list_head table; /* link to pasid table */ 562 + struct list_head auxiliary_domains; /* auxiliary domains 563 + * attached to this device 564 + */ 569 565 u8 bus; /* PCI bus number */ 570 566 u8 devfn; /* PCI devfn number */ 571 567 u16 pfsid; /* SRIOV physical function source ID */ ··· 578 568 u8 pri_enabled:1; 579 569 u8 ats_supported:1; 580 570 u8 ats_enabled:1; 571 + u8 auxd_enabled:1; /* Multiple domains per device */ 581 572 u8 ats_qdep; 582 573 struct device *dev; /* it's NULL for PCIe-to-PCI bridge */ 583 574 struct intel_iommu *iommu; /* IOMMU used by this device */ ··· 661 650 int for_each_device_domain(int (*fn)(struct device_domain_info *info, 662 651 void *data), void *data); 663 652 void iommu_flush_write_buffer(struct intel_iommu *iommu); 653 + int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev); 664 654 665 655 #ifdef CONFIG_INTEL_IOMMU_SVM 666 656 int intel_svm_init(struct intel_iommu *iommu); ··· 691 679 struct list_head list; 692 680 }; 693 681 694 - extern int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev); 695 682 extern struct intel_iommu *intel_svm_device_to_iommu(struct device *dev); 696 683 #endif 697 684
+144
include/linux/iommu.h
··· 48 48 struct device; 49 49 struct iommu_domain; 50 50 struct notifier_block; 51 + struct iommu_sva; 51 52 52 53 /* iommu fault flags */ 53 54 #define IOMMU_FAULT_READ 0x0 ··· 56 55 57 56 typedef int (*iommu_fault_handler_t)(struct iommu_domain *, 58 57 struct device *, unsigned long, int, void *); 58 + typedef int (*iommu_mm_exit_handler_t)(struct device *dev, struct iommu_sva *, 59 + void *); 59 60 60 61 struct iommu_domain_geometry { 61 62 dma_addr_t aperture_start; /* First address that can be mapped */ ··· 159 156 enum iommu_resv_type type; 160 157 }; 161 158 159 + /* Per device IOMMU features */ 160 + enum iommu_dev_features { 161 + IOMMU_DEV_FEAT_AUX, /* Aux-domain feature */ 162 + IOMMU_DEV_FEAT_SVA, /* Shared Virtual Addresses */ 163 + }; 164 + 165 + #define IOMMU_PASID_INVALID (-1U) 166 + 167 + /** 168 + * struct iommu_sva_ops - device driver callbacks for an SVA context 169 + * 170 + * @mm_exit: called when the mm is about to be torn down by exit_mmap. After 171 + * @mm_exit returns, the device must not issue any more transaction 172 + * with the PASID given as argument. 173 + * 174 + * The @mm_exit handler is allowed to sleep. Be careful about the 175 + * locks taken in @mm_exit, because they might lead to deadlocks if 176 + * they are also held when dropping references to the mm. Consider the 177 + * following call chain: 178 + * mutex_lock(A); mmput(mm) -> exit_mm() -> @mm_exit() -> mutex_lock(A) 179 + * Using mmput_async() prevents this scenario. 180 + * 181 + */ 182 + struct iommu_sva_ops { 183 + iommu_mm_exit_handler_t mm_exit; 184 + }; 185 + 162 186 #ifdef CONFIG_IOMMU_API 163 187 164 188 /** ··· 216 186 * @of_xlate: add OF master IDs to iommu grouping 217 187 * @is_attach_deferred: Check if domain attach should be deferred from iommu 218 188 * driver init to device driver init (default no) 189 + * @dev_has/enable/disable_feat: per device entries to check/enable/disable 190 + * iommu specific features. 191 + * @dev_feat_enabled: check enabled feature 192 + * @aux_attach/detach_dev: aux-domain specific attach/detach entries. 193 + * @aux_get_pasid: get the pasid given an aux-domain 194 + * @sva_bind: Bind process address space to device 195 + * @sva_unbind: Unbind process address space from device 196 + * @sva_get_pasid: Get PASID associated to a SVA handle 219 197 * @pgsize_bitmap: bitmap of all possible supported page sizes 220 198 */ 221 199 struct iommu_ops { ··· 267 229 268 230 int (*of_xlate)(struct device *dev, struct of_phandle_args *args); 269 231 bool (*is_attach_deferred)(struct iommu_domain *domain, struct device *dev); 232 + 233 + /* Per device IOMMU features */ 234 + bool (*dev_has_feat)(struct device *dev, enum iommu_dev_features f); 235 + bool (*dev_feat_enabled)(struct device *dev, enum iommu_dev_features f); 236 + int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f); 237 + int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f); 238 + 239 + /* Aux-domain specific attach/detach entries */ 240 + int (*aux_attach_dev)(struct iommu_domain *domain, struct device *dev); 241 + void (*aux_detach_dev)(struct iommu_domain *domain, struct device *dev); 242 + int (*aux_get_pasid)(struct iommu_domain *domain, struct device *dev); 243 + 244 + struct iommu_sva *(*sva_bind)(struct device *dev, struct mm_struct *mm, 245 + void *drvdata); 246 + void (*sva_unbind)(struct iommu_sva *handle); 247 + int (*sva_get_pasid)(struct iommu_sva *handle); 270 248 271 249 unsigned long pgsize_bitmap; 272 250 }; ··· 446 392 const struct iommu_ops *ops; 447 393 struct fwnode_handle *iommu_fwnode; 448 394 void *iommu_priv; 395 + u32 flags; 449 396 unsigned int num_ids; 450 397 u32 ids[1]; 398 + }; 399 + 400 + /* ATS is supported */ 401 + #define IOMMU_FWSPEC_PCI_RC_ATS (1 << 0) 402 + 403 + /** 404 + * struct iommu_sva - handle to a device-mm bond 405 + */ 406 + struct iommu_sva { 407 + struct device *dev; 408 + const struct iommu_sva_ops *ops; 451 409 }; 452 410 453 411 int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, ··· 481 415 482 416 int iommu_probe_device(struct device *dev); 483 417 void iommu_release_device(struct device *dev); 418 + 419 + bool iommu_dev_has_feature(struct device *dev, enum iommu_dev_features f); 420 + int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f); 421 + int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f); 422 + bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features f); 423 + int iommu_aux_attach_device(struct iommu_domain *domain, struct device *dev); 424 + void iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev); 425 + int iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev); 426 + 427 + struct iommu_sva *iommu_sva_bind_device(struct device *dev, 428 + struct mm_struct *mm, 429 + void *drvdata); 430 + void iommu_sva_unbind_device(struct iommu_sva *handle); 431 + int iommu_sva_set_ops(struct iommu_sva *handle, 432 + const struct iommu_sva_ops *ops); 433 + int iommu_sva_get_pasid(struct iommu_sva *handle); 484 434 485 435 #else /* CONFIG_IOMMU_API */ 486 436 ··· 780 698 const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode) 781 699 { 782 700 return NULL; 701 + } 702 + 703 + static inline bool 704 + iommu_dev_has_feature(struct device *dev, enum iommu_dev_features feat) 705 + { 706 + return false; 707 + } 708 + 709 + static inline bool 710 + iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features feat) 711 + { 712 + return false; 713 + } 714 + 715 + static inline int 716 + iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) 717 + { 718 + return -ENODEV; 719 + } 720 + 721 + static inline int 722 + iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) 723 + { 724 + return -ENODEV; 725 + } 726 + 727 + static inline int 728 + iommu_aux_attach_device(struct iommu_domain *domain, struct device *dev) 729 + { 730 + return -ENODEV; 731 + } 732 + 733 + static inline void 734 + iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev) 735 + { 736 + } 737 + 738 + static inline int 739 + iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev) 740 + { 741 + return -ENODEV; 742 + } 743 + 744 + static inline struct iommu_sva * 745 + iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata) 746 + { 747 + return NULL; 748 + } 749 + 750 + static inline void iommu_sva_unbind_device(struct iommu_sva *handle) 751 + { 752 + } 753 + 754 + static inline int iommu_sva_set_ops(struct iommu_sva *handle, 755 + const struct iommu_sva_ops *ops) 756 + { 757 + return -EINVAL; 758 + } 759 + 760 + static inline int iommu_sva_get_pasid(struct iommu_sva *handle) 761 + { 762 + return IOMMU_PASID_INVALID; 783 763 } 784 764 785 765 #endif /* CONFIG_IOMMU_API */
+8 -8
include/linux/iova.h
··· 76 76 unsigned long start_pfn; /* Lower limit for this domain */ 77 77 unsigned long dma_32bit_pfn; 78 78 unsigned long max32_alloc_size; /* Size of last failed allocation */ 79 + struct iova_fq __percpu *fq; /* Flush Queue */ 80 + 81 + atomic64_t fq_flush_start_cnt; /* Number of TLB flushes that 82 + have been started */ 83 + 84 + atomic64_t fq_flush_finish_cnt; /* Number of TLB flushes that 85 + have been finished */ 86 + 79 87 struct iova anchor; /* rbtree lookup anchor */ 80 88 struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE]; /* IOVA range caches */ 81 89 ··· 92 84 93 85 iova_entry_dtor entry_dtor; /* IOMMU driver specific destructor for 94 86 iova entry */ 95 - 96 - struct iova_fq __percpu *fq; /* Flush Queue */ 97 - 98 - atomic64_t fq_flush_start_cnt; /* Number of TLB flushes that 99 - have been started */ 100 - 101 - atomic64_t fq_flush_finish_cnt; /* Number of TLB flushes that 102 - have been finished */ 103 87 104 88 struct timer_list fq_timer; /* Timer to regularily empty the 105 89 flush-queues */
+14
include/linux/mdev.h
··· 15 15 16 16 struct mdev_device; 17 17 18 + /* 19 + * Called by the parent device driver to set the device which represents 20 + * this mdev in iommu protection scope. By default, the iommu device is 21 + * NULL, that indicates using vendor defined isolation. 22 + * 23 + * @dev: the mediated device that iommu will isolate. 24 + * @iommu_device: a pci device which represents the iommu for @dev. 25 + * 26 + * Return 0 for success, otherwise negative error value. 27 + */ 28 + int mdev_set_iommu_device(struct device *dev, struct device *iommu_device); 29 + 30 + struct device *mdev_get_iommu_device(struct device *dev); 31 + 18 32 /** 19 33 * struct mdev_parent_ops - Structure to be registered for each parent device to 20 34 * register the device to mdev module.
+16 -15
include/linux/pci.h
··· 1521 1521 1522 1522 bool pci_ats_disabled(void); 1523 1523 1524 - #ifdef CONFIG_PCI_ATS 1525 - /* Address Translation Service */ 1526 - void pci_ats_init(struct pci_dev *dev); 1527 - int pci_enable_ats(struct pci_dev *dev, int ps); 1528 - void pci_disable_ats(struct pci_dev *dev); 1529 - int pci_ats_queue_depth(struct pci_dev *dev); 1530 - int pci_ats_page_aligned(struct pci_dev *dev); 1531 - #else 1532 - static inline void pci_ats_init(struct pci_dev *d) { } 1533 - static inline int pci_enable_ats(struct pci_dev *d, int ps) { return -ENODEV; } 1534 - static inline void pci_disable_ats(struct pci_dev *d) { } 1535 - static inline int pci_ats_queue_depth(struct pci_dev *d) { return -ENODEV; } 1536 - static inline int pci_ats_page_aligned(struct pci_dev *dev) { return 0; } 1537 - #endif 1538 - 1539 1524 #ifdef CONFIG_PCIE_PTM 1540 1525 int pci_enable_ptm(struct pci_dev *dev, u8 *granularity); 1541 1526 #else ··· 1713 1728 static inline const struct pci_device_id *pci_match_id(const struct pci_device_id *ids, 1714 1729 struct pci_dev *dev) 1715 1730 { return NULL; } 1731 + static inline bool pci_ats_disabled(void) { return true; } 1716 1732 #endif /* CONFIG_PCI */ 1733 + 1734 + #ifdef CONFIG_PCI_ATS 1735 + /* Address Translation Service */ 1736 + void pci_ats_init(struct pci_dev *dev); 1737 + int pci_enable_ats(struct pci_dev *dev, int ps); 1738 + void pci_disable_ats(struct pci_dev *dev); 1739 + int pci_ats_queue_depth(struct pci_dev *dev); 1740 + int pci_ats_page_aligned(struct pci_dev *dev); 1741 + #else 1742 + static inline void pci_ats_init(struct pci_dev *d) { } 1743 + static inline int pci_enable_ats(struct pci_dev *d, int ps) { return -ENODEV; } 1744 + static inline void pci_disable_ats(struct pci_dev *d) { } 1745 + static inline int pci_ats_queue_depth(struct pci_dev *d) { return -ENODEV; } 1746 + static inline int pci_ats_page_aligned(struct pci_dev *dev) { return 0; } 1747 + #endif 1717 1748 1718 1749 /* Include architecture-dependent settings and functions */ 1719 1750