Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'iommu-updates-v5.17' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu

Pull iommu updates from Joerg Roedel:

- Identity domain support for virtio-iommu

- Move flush queue code into iommu-dma

- Some fixes for AMD IOMMU suspend/resume support when x2apic is used

- Arm SMMU Updates from Will Deacon:
- Revert evtq and priq back to their former sizes
- Return early on short-descriptor page-table allocation failure
- Fix page fault reporting for Adreno GPU on SMMUv2
- Make SMMUv3 MMU notifier ops 'const'
- Numerous new compatible strings for Qualcomm SMMUv2 implementations

- Various smaller fixes and cleanups

* tag 'iommu-updates-v5.17' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (38 commits)
iommu/iova: Temporarily include dma-mapping.h from iova.h
iommu: Move flush queue data into iommu_dma_cookie
iommu/iova: Move flush queue code to iommu-dma
iommu/iova: Consolidate flush queue code
iommu/vt-d: Use put_pages_list
iommu/amd: Use put_pages_list
iommu/amd: Simplify pagetable freeing
iommu/iova: Squash flush_cb abstraction
iommu/iova: Squash entry_dtor abstraction
iommu/iova: Fix race between FQ timeout and teardown
iommu/amd: Fix typo in *glues … together* in comment
iommu/vt-d: Remove unused dma_to_mm_pfn function
iommu/vt-d: Drop duplicate check in dma_pte_free_pagetable()
iommu/vt-d: Use bitmap_zalloc() when applicable
iommu/amd: Remove useless irq affinity notifier
iommu/amd: X2apic mode: mask/unmask interrupts on suspend/resume
iommu/amd: X2apic mode: setup the INTX registers on mask/unmask
iommu/amd: X2apic mode: re-enable after resume
iommu/amd: Restore GA log/tail pointer on host resume
iommu/iova: Move fast alloc size roundup into alloc_iova_fast()
...

+498 -575
+2
Documentation/devicetree/bindings/iommu/arm,smmu.yaml
··· 38 38 - qcom,sc7280-smmu-500 39 39 - qcom,sc8180x-smmu-500 40 40 - qcom,sdm845-smmu-500 41 + - qcom,sdx55-smmu-500 41 42 - qcom,sm6350-smmu-500 42 43 - qcom,sm8150-smmu-500 43 44 - qcom,sm8250-smmu-500 44 45 - qcom,sm8350-smmu-500 46 + - qcom,sm8450-smmu-500 45 47 - const: arm,mmu-500 46 48 - description: Qcom Adreno GPUs implementing "arm,smmu-v2" 47 49 items:
-2
drivers/iommu/amd/amd_iommu_types.h
··· 645 645 /* DebugFS Info */ 646 646 struct dentry *debugfs; 647 647 #endif 648 - /* IRQ notifier for IntCapXT interrupt */ 649 - struct irq_affinity_notify intcapxt_notify; 650 648 }; 651 649 652 650 static inline struct amd_iommu *dev_to_amd_iommu(struct device *dev)
+59 -50
drivers/iommu/amd/init.c
··· 806 806 { 807 807 #ifdef CONFIG_IRQ_REMAP 808 808 u32 status, i; 809 + u64 entry; 809 810 810 811 if (!iommu->ga_log) 811 812 return -EINVAL; 812 813 813 - status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); 814 - 815 814 /* Check if already running */ 816 - if (status & (MMIO_STATUS_GALOG_RUN_MASK)) 815 + status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); 816 + if (WARN_ON(status & (MMIO_STATUS_GALOG_RUN_MASK))) 817 817 return 0; 818 + 819 + entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512; 820 + memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET, 821 + &entry, sizeof(entry)); 822 + entry = (iommu_virt_to_phys(iommu->ga_log_tail) & 823 + (BIT_ULL(52)-1)) & ~7ULL; 824 + memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET, 825 + &entry, sizeof(entry)); 826 + writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET); 827 + writel(0x00, iommu->mmio_base + MMIO_GA_TAIL_OFFSET); 828 + 818 829 819 830 iommu_feature_enable(iommu, CONTROL_GAINT_EN); 820 831 iommu_feature_enable(iommu, CONTROL_GALOG_EN); ··· 836 825 break; 837 826 } 838 827 839 - if (i >= LOOP_TIMEOUT) 828 + if (WARN_ON(i >= LOOP_TIMEOUT)) 840 829 return -EINVAL; 841 830 #endif /* CONFIG_IRQ_REMAP */ 842 831 return 0; ··· 845 834 static int iommu_init_ga_log(struct amd_iommu *iommu) 846 835 { 847 836 #ifdef CONFIG_IRQ_REMAP 848 - u64 entry; 849 - 850 837 if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) 851 838 return 0; 852 839 ··· 857 848 get_order(8)); 858 849 if (!iommu->ga_log_tail) 859 850 goto err_out; 860 - 861 - entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512; 862 - memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET, 863 - &entry, sizeof(entry)); 864 - entry = (iommu_virt_to_phys(iommu->ga_log_tail) & 865 - (BIT_ULL(52)-1)) & ~7ULL; 866 - memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET, 867 - &entry, sizeof(entry)); 868 - writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET); 869 - writel(0x00, iommu->mmio_base + MMIO_GA_TAIL_OFFSET); 870 851 871 852 return 0; 872 853 err_out: ··· 1522 1523 } 1523 1524 1524 1525 /* 1525 - * This function clues the initialization function for one IOMMU 1526 + * This function glues the initialization function for one IOMMU 1526 1527 * together and also allocates the command buffer and programs the 1527 1528 * hardware. It does NOT enable the IOMMU. This is done afterwards. 1528 1529 */ ··· 2015 2016 }; 2016 2017 } __attribute__ ((packed)); 2017 2018 2018 - /* 2019 - * There isn't really any need to mask/unmask at the irqchip level because 2020 - * the 64-bit INTCAPXT registers can be updated atomically without tearing 2021 - * when the affinity is being updated. 2022 - */ 2023 - static void intcapxt_unmask_irq(struct irq_data *data) 2024 - { 2025 - } 2026 - 2027 - static void intcapxt_mask_irq(struct irq_data *data) 2028 - { 2029 - } 2030 2019 2031 2020 static struct irq_chip intcapxt_controller; 2032 2021 2033 2022 static int intcapxt_irqdomain_activate(struct irq_domain *domain, 2034 2023 struct irq_data *irqd, bool reserve) 2035 2024 { 2036 - struct amd_iommu *iommu = irqd->chip_data; 2037 - struct irq_cfg *cfg = irqd_cfg(irqd); 2038 - union intcapxt xt; 2039 - 2040 - xt.capxt = 0ULL; 2041 - xt.dest_mode_logical = apic->dest_mode_logical; 2042 - xt.vector = cfg->vector; 2043 - xt.destid_0_23 = cfg->dest_apicid & GENMASK(23, 0); 2044 - xt.destid_24_31 = cfg->dest_apicid >> 24; 2045 - 2046 - /** 2047 - * Current IOMMU implemtation uses the same IRQ for all 2048 - * 3 IOMMU interrupts. 2049 - */ 2050 - writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET); 2051 - writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET); 2052 - writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET); 2053 2025 return 0; 2054 2026 } 2055 2027 2056 2028 static void intcapxt_irqdomain_deactivate(struct irq_domain *domain, 2057 2029 struct irq_data *irqd) 2058 2030 { 2059 - intcapxt_mask_irq(irqd); 2060 2031 } 2061 2032 2062 2033 ··· 2060 2091 irq_domain_free_irqs_top(domain, virq, nr_irqs); 2061 2092 } 2062 2093 2094 + 2095 + static void intcapxt_unmask_irq(struct irq_data *irqd) 2096 + { 2097 + struct amd_iommu *iommu = irqd->chip_data; 2098 + struct irq_cfg *cfg = irqd_cfg(irqd); 2099 + union intcapxt xt; 2100 + 2101 + xt.capxt = 0ULL; 2102 + xt.dest_mode_logical = apic->dest_mode_logical; 2103 + xt.vector = cfg->vector; 2104 + xt.destid_0_23 = cfg->dest_apicid & GENMASK(23, 0); 2105 + xt.destid_24_31 = cfg->dest_apicid >> 24; 2106 + 2107 + /** 2108 + * Current IOMMU implementation uses the same IRQ for all 2109 + * 3 IOMMU interrupts. 2110 + */ 2111 + writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET); 2112 + writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET); 2113 + writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET); 2114 + } 2115 + 2116 + static void intcapxt_mask_irq(struct irq_data *irqd) 2117 + { 2118 + struct amd_iommu *iommu = irqd->chip_data; 2119 + 2120 + writeq(0, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET); 2121 + writeq(0, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET); 2122 + writeq(0, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET); 2123 + } 2124 + 2125 + 2063 2126 static int intcapxt_set_affinity(struct irq_data *irqd, 2064 2127 const struct cpumask *mask, bool force) 2065 2128 { ··· 2101 2100 ret = parent->chip->irq_set_affinity(parent, mask, force); 2102 2101 if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) 2103 2102 return ret; 2103 + return 0; 2104 + } 2104 2105 2105 - return intcapxt_irqdomain_activate(irqd->domain, irqd, false); 2106 + static int intcapxt_set_wake(struct irq_data *irqd, unsigned int on) 2107 + { 2108 + return on ? -EOPNOTSUPP : 0; 2106 2109 } 2107 2110 2108 2111 static struct irq_chip intcapxt_controller = { ··· 2116 2111 .irq_ack = irq_chip_ack_parent, 2117 2112 .irq_retrigger = irq_chip_retrigger_hierarchy, 2118 2113 .irq_set_affinity = intcapxt_set_affinity, 2119 - .flags = IRQCHIP_SKIP_SET_WAKE, 2114 + .irq_set_wake = intcapxt_set_wake, 2115 + .flags = IRQCHIP_MASK_ON_SUSPEND, 2120 2116 }; 2121 2117 2122 2118 static const struct irq_domain_ops intcapxt_domain_ops = { ··· 2179 2173 return ret; 2180 2174 } 2181 2175 2182 - iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN); 2183 2176 return 0; 2184 2177 } 2185 2178 ··· 2201 2196 2202 2197 iommu->int_enabled = true; 2203 2198 enable_faults: 2199 + 2200 + if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) 2201 + iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN); 2202 + 2204 2203 iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); 2205 2204 2206 2205 if (iommu->ppr_log != NULL)
+46 -74
drivers/iommu/amd/io_pgtable.c
··· 74 74 * 75 75 ****************************************************************************/ 76 76 77 - static void free_page_list(struct page *freelist) 77 + static void free_pt_page(u64 *pt, struct list_head *freelist) 78 78 { 79 - while (freelist != NULL) { 80 - unsigned long p = (unsigned long)page_address(freelist); 79 + struct page *p = virt_to_page(pt); 81 80 82 - freelist = freelist->freelist; 83 - free_page(p); 81 + list_add_tail(&p->lru, freelist); 82 + } 83 + 84 + static void free_pt_lvl(u64 *pt, struct list_head *freelist, int lvl) 85 + { 86 + u64 *p; 87 + int i; 88 + 89 + for (i = 0; i < 512; ++i) { 90 + /* PTE present? */ 91 + if (!IOMMU_PTE_PRESENT(pt[i])) 92 + continue; 93 + 94 + /* Large PTE? */ 95 + if (PM_PTE_LEVEL(pt[i]) == 0 || 96 + PM_PTE_LEVEL(pt[i]) == 7) 97 + continue; 98 + 99 + /* 100 + * Free the next level. No need to look at l1 tables here since 101 + * they can only contain leaf PTEs; just free them directly. 102 + */ 103 + p = IOMMU_PTE_PAGE(pt[i]); 104 + if (lvl > 2) 105 + free_pt_lvl(p, freelist, lvl - 1); 106 + else 107 + free_pt_page(p, freelist); 84 108 } 109 + 110 + free_pt_page(pt, freelist); 85 111 } 86 112 87 - static struct page *free_pt_page(unsigned long pt, struct page *freelist) 88 - { 89 - struct page *p = virt_to_page((void *)pt); 90 - 91 - p->freelist = freelist; 92 - 93 - return p; 94 - } 95 - 96 - #define DEFINE_FREE_PT_FN(LVL, FN) \ 97 - static struct page *free_pt_##LVL (unsigned long __pt, struct page *freelist) \ 98 - { \ 99 - unsigned long p; \ 100 - u64 *pt; \ 101 - int i; \ 102 - \ 103 - pt = (u64 *)__pt; \ 104 - \ 105 - for (i = 0; i < 512; ++i) { \ 106 - /* PTE present? */ \ 107 - if (!IOMMU_PTE_PRESENT(pt[i])) \ 108 - continue; \ 109 - \ 110 - /* Large PTE? */ \ 111 - if (PM_PTE_LEVEL(pt[i]) == 0 || \ 112 - PM_PTE_LEVEL(pt[i]) == 7) \ 113 - continue; \ 114 - \ 115 - p = (unsigned long)IOMMU_PTE_PAGE(pt[i]); \ 116 - freelist = FN(p, freelist); \ 117 - } \ 118 - \ 119 - return free_pt_page((unsigned long)pt, freelist); \ 120 - } 121 - 122 - DEFINE_FREE_PT_FN(l2, free_pt_page) 123 - DEFINE_FREE_PT_FN(l3, free_pt_l2) 124 - DEFINE_FREE_PT_FN(l4, free_pt_l3) 125 - DEFINE_FREE_PT_FN(l5, free_pt_l4) 126 - DEFINE_FREE_PT_FN(l6, free_pt_l5) 127 - 128 - static struct page *free_sub_pt(unsigned long root, int mode, 129 - struct page *freelist) 113 + static void free_sub_pt(u64 *root, int mode, struct list_head *freelist) 130 114 { 131 115 switch (mode) { 132 116 case PAGE_MODE_NONE: 133 117 case PAGE_MODE_7_LEVEL: 134 118 break; 135 119 case PAGE_MODE_1_LEVEL: 136 - freelist = free_pt_page(root, freelist); 120 + free_pt_page(root, freelist); 137 121 break; 138 122 case PAGE_MODE_2_LEVEL: 139 - freelist = free_pt_l2(root, freelist); 140 - break; 141 123 case PAGE_MODE_3_LEVEL: 142 - freelist = free_pt_l3(root, freelist); 143 - break; 144 124 case PAGE_MODE_4_LEVEL: 145 - freelist = free_pt_l4(root, freelist); 146 - break; 147 125 case PAGE_MODE_5_LEVEL: 148 - freelist = free_pt_l5(root, freelist); 149 - break; 150 126 case PAGE_MODE_6_LEVEL: 151 - freelist = free_pt_l6(root, freelist); 127 + free_pt_lvl(root, freelist, mode); 152 128 break; 153 129 default: 154 130 BUG(); 155 131 } 156 - 157 - return freelist; 158 132 } 159 133 160 134 void amd_iommu_domain_set_pgtable(struct protection_domain *domain, ··· 336 362 return pte; 337 363 } 338 364 339 - static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist) 365 + static void free_clear_pte(u64 *pte, u64 pteval, struct list_head *freelist) 340 366 { 341 - unsigned long pt; 367 + u64 *pt; 342 368 int mode; 343 369 344 370 while (cmpxchg64(pte, pteval, 0) != pteval) { ··· 347 373 } 348 374 349 375 if (!IOMMU_PTE_PRESENT(pteval)) 350 - return freelist; 376 + return; 351 377 352 - pt = (unsigned long)IOMMU_PTE_PAGE(pteval); 378 + pt = IOMMU_PTE_PAGE(pteval); 353 379 mode = IOMMU_PTE_MODE(pteval); 354 380 355 - return free_sub_pt(pt, mode, freelist); 381 + free_sub_pt(pt, mode, freelist); 356 382 } 357 383 358 384 /* ··· 366 392 phys_addr_t paddr, size_t size, int prot, gfp_t gfp) 367 393 { 368 394 struct protection_domain *dom = io_pgtable_ops_to_domain(ops); 369 - struct page *freelist = NULL; 395 + LIST_HEAD(freelist); 370 396 bool updated = false; 371 397 u64 __pte, *pte; 372 398 int ret, i, count; ··· 386 412 goto out; 387 413 388 414 for (i = 0; i < count; ++i) 389 - freelist = free_clear_pte(&pte[i], pte[i], freelist); 415 + free_clear_pte(&pte[i], pte[i], &freelist); 390 416 391 - if (freelist != NULL) 417 + if (!list_empty(&freelist)) 392 418 updated = true; 393 419 394 420 if (count > 1) { ··· 423 449 } 424 450 425 451 /* Everything flushed out, free pages now */ 426 - free_page_list(freelist); 452 + put_pages_list(&freelist); 427 453 428 454 return ret; 429 455 } ··· 485 511 { 486 512 struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop); 487 513 struct protection_domain *dom; 488 - struct page *freelist = NULL; 489 - unsigned long root; 514 + LIST_HEAD(freelist); 490 515 491 516 if (pgtable->mode == PAGE_MODE_NONE) 492 517 return; ··· 502 529 BUG_ON(pgtable->mode < PAGE_MODE_NONE || 503 530 pgtable->mode > PAGE_MODE_6_LEVEL); 504 531 505 - root = (unsigned long)pgtable->root; 506 - freelist = free_sub_pt(root, pgtable->mode, freelist); 532 + free_sub_pt(pgtable->root, pgtable->mode, &freelist); 507 533 508 - free_page_list(freelist); 534 + put_pages_list(&freelist); 509 535 } 510 536 511 537 static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
+1 -1
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
··· 220 220 kfree(mn_to_smmu(mn)); 221 221 } 222 222 223 - static struct mmu_notifier_ops arm_smmu_mmu_notifier_ops = { 223 + static const struct mmu_notifier_ops arm_smmu_mmu_notifier_ops = { 224 224 .invalidate_range = arm_smmu_mm_invalidate_range, 225 225 .release = arm_smmu_mm_release, 226 226 .free_notifier = arm_smmu_mmu_notifier_free,
+2 -3
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
··· 184 184 #else 185 185 #define Q_MAX_SZ_SHIFT (PAGE_SHIFT + MAX_ORDER - 1) 186 186 #endif 187 - #define Q_MIN_SZ_SHIFT (PAGE_SHIFT) 188 187 189 188 /* 190 189 * Stream table. ··· 373 374 /* Event queue */ 374 375 #define EVTQ_ENT_SZ_SHIFT 5 375 376 #define EVTQ_ENT_DWORDS ((1 << EVTQ_ENT_SZ_SHIFT) >> 3) 376 - #define EVTQ_MAX_SZ_SHIFT (Q_MIN_SZ_SHIFT - EVTQ_ENT_SZ_SHIFT) 377 + #define EVTQ_MAX_SZ_SHIFT (Q_MAX_SZ_SHIFT - EVTQ_ENT_SZ_SHIFT) 377 378 378 379 #define EVTQ_0_ID GENMASK_ULL(7, 0) 379 380 ··· 399 400 /* PRI queue */ 400 401 #define PRIQ_ENT_SZ_SHIFT 4 401 402 #define PRIQ_ENT_DWORDS ((1 << PRIQ_ENT_SZ_SHIFT) >> 3) 402 - #define PRIQ_MAX_SZ_SHIFT (Q_MIN_SZ_SHIFT - PRIQ_ENT_SZ_SHIFT) 403 + #define PRIQ_MAX_SZ_SHIFT (Q_MAX_SZ_SHIFT - PRIQ_ENT_SZ_SHIFT) 403 404 404 405 #define PRIQ_0_SID GENMASK_ULL(31, 0) 405 406 #define PRIQ_0_SSID GENMASK_ULL(51, 32)
+2 -1
drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
··· 51 51 info->fsynr1 = arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_FSYNR1); 52 52 info->far = arm_smmu_cb_readq(smmu, cfg->cbndx, ARM_SMMU_CB_FAR); 53 53 info->cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(cfg->cbndx)); 54 - info->ttbr0 = arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_TTBR0); 54 + info->ttbr0 = arm_smmu_cb_readq(smmu, cfg->cbndx, ARM_SMMU_CB_TTBR0); 55 55 info->contextidr = arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_CONTEXTIDR); 56 56 } 57 57 ··· 415 415 { .compatible = "qcom,sm8150-smmu-500" }, 416 416 { .compatible = "qcom,sm8250-smmu-500" }, 417 417 { .compatible = "qcom,sm8350-smmu-500" }, 418 + { .compatible = "qcom,sm8450-smmu-500" }, 418 419 { } 419 420 }; 420 421
+217 -57
drivers/iommu/dma-iommu.c
··· 9 9 */ 10 10 11 11 #include <linux/acpi_iort.h> 12 + #include <linux/atomic.h> 13 + #include <linux/crash_dump.h> 12 14 #include <linux/device.h> 13 - #include <linux/dma-map-ops.h> 15 + #include <linux/dma-direct.h> 14 16 #include <linux/dma-iommu.h> 17 + #include <linux/dma-map-ops.h> 15 18 #include <linux/gfp.h> 16 19 #include <linux/huge_mm.h> 17 20 #include <linux/iommu.h> ··· 23 20 #include <linux/mm.h> 24 21 #include <linux/mutex.h> 25 22 #include <linux/pci.h> 26 - #include <linux/swiotlb.h> 27 23 #include <linux/scatterlist.h> 24 + #include <linux/spinlock.h> 25 + #include <linux/swiotlb.h> 28 26 #include <linux/vmalloc.h> 29 - #include <linux/crash_dump.h> 30 - #include <linux/dma-direct.h> 31 27 32 28 struct iommu_dma_msi_page { 33 29 struct list_head list; ··· 43 41 enum iommu_dma_cookie_type type; 44 42 union { 45 43 /* Full allocator for IOMMU_DMA_IOVA_COOKIE */ 46 - struct iova_domain iovad; 44 + struct { 45 + struct iova_domain iovad; 46 + 47 + struct iova_fq __percpu *fq; /* Flush queue */ 48 + /* Number of TLB flushes that have been started */ 49 + atomic64_t fq_flush_start_cnt; 50 + /* Number of TLB flushes that have been finished */ 51 + atomic64_t fq_flush_finish_cnt; 52 + /* Timer to regularily empty the flush queues */ 53 + struct timer_list fq_timer; 54 + /* 1 when timer is active, 0 when not */ 55 + atomic_t fq_timer_on; 56 + }; 47 57 /* Trivial linear page allocator for IOMMU_DMA_MSI_COOKIE */ 48 58 dma_addr_t msi_iova; 49 59 }; ··· 78 64 } 79 65 early_param("iommu.forcedac", iommu_dma_forcedac_setup); 80 66 81 - static void iommu_dma_entry_dtor(unsigned long data) 67 + /* Number of entries per flush queue */ 68 + #define IOVA_FQ_SIZE 256 69 + 70 + /* Timeout (in ms) after which entries are flushed from the queue */ 71 + #define IOVA_FQ_TIMEOUT 10 72 + 73 + /* Flush queue entry for deferred flushing */ 74 + struct iova_fq_entry { 75 + unsigned long iova_pfn; 76 + unsigned long pages; 77 + struct list_head freelist; 78 + u64 counter; /* Flush counter when this entry was added */ 79 + }; 80 + 81 + /* Per-CPU flush queue structure */ 82 + struct iova_fq { 83 + struct iova_fq_entry entries[IOVA_FQ_SIZE]; 84 + unsigned int head, tail; 85 + spinlock_t lock; 86 + }; 87 + 88 + #define fq_ring_for_each(i, fq) \ 89 + for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE) 90 + 91 + static inline bool fq_full(struct iova_fq *fq) 82 92 { 83 - struct page *freelist = (struct page *)data; 93 + assert_spin_locked(&fq->lock); 94 + return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head); 95 + } 84 96 85 - while (freelist) { 86 - unsigned long p = (unsigned long)page_address(freelist); 97 + static inline unsigned int fq_ring_add(struct iova_fq *fq) 98 + { 99 + unsigned int idx = fq->tail; 87 100 88 - freelist = freelist->freelist; 89 - free_page(p); 101 + assert_spin_locked(&fq->lock); 102 + 103 + fq->tail = (idx + 1) % IOVA_FQ_SIZE; 104 + 105 + return idx; 106 + } 107 + 108 + static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq) 109 + { 110 + u64 counter = atomic64_read(&cookie->fq_flush_finish_cnt); 111 + unsigned int idx; 112 + 113 + assert_spin_locked(&fq->lock); 114 + 115 + fq_ring_for_each(idx, fq) { 116 + 117 + if (fq->entries[idx].counter >= counter) 118 + break; 119 + 120 + put_pages_list(&fq->entries[idx].freelist); 121 + free_iova_fast(&cookie->iovad, 122 + fq->entries[idx].iova_pfn, 123 + fq->entries[idx].pages); 124 + 125 + fq->head = (fq->head + 1) % IOVA_FQ_SIZE; 90 126 } 127 + } 128 + 129 + static void fq_flush_iotlb(struct iommu_dma_cookie *cookie) 130 + { 131 + atomic64_inc(&cookie->fq_flush_start_cnt); 132 + cookie->fq_domain->ops->flush_iotlb_all(cookie->fq_domain); 133 + atomic64_inc(&cookie->fq_flush_finish_cnt); 134 + } 135 + 136 + static void fq_flush_timeout(struct timer_list *t) 137 + { 138 + struct iommu_dma_cookie *cookie = from_timer(cookie, t, fq_timer); 139 + int cpu; 140 + 141 + atomic_set(&cookie->fq_timer_on, 0); 142 + fq_flush_iotlb(cookie); 143 + 144 + for_each_possible_cpu(cpu) { 145 + unsigned long flags; 146 + struct iova_fq *fq; 147 + 148 + fq = per_cpu_ptr(cookie->fq, cpu); 149 + spin_lock_irqsave(&fq->lock, flags); 150 + fq_ring_free(cookie, fq); 151 + spin_unlock_irqrestore(&fq->lock, flags); 152 + } 153 + } 154 + 155 + static void queue_iova(struct iommu_dma_cookie *cookie, 156 + unsigned long pfn, unsigned long pages, 157 + struct list_head *freelist) 158 + { 159 + struct iova_fq *fq; 160 + unsigned long flags; 161 + unsigned int idx; 162 + 163 + /* 164 + * Order against the IOMMU driver's pagetable update from unmapping 165 + * @pte, to guarantee that fq_flush_iotlb() observes that if called 166 + * from a different CPU before we release the lock below. Full barrier 167 + * so it also pairs with iommu_dma_init_fq() to avoid seeing partially 168 + * written fq state here. 169 + */ 170 + smp_mb(); 171 + 172 + fq = raw_cpu_ptr(cookie->fq); 173 + spin_lock_irqsave(&fq->lock, flags); 174 + 175 + /* 176 + * First remove all entries from the flush queue that have already been 177 + * flushed out on another CPU. This makes the fq_full() check below less 178 + * likely to be true. 179 + */ 180 + fq_ring_free(cookie, fq); 181 + 182 + if (fq_full(fq)) { 183 + fq_flush_iotlb(cookie); 184 + fq_ring_free(cookie, fq); 185 + } 186 + 187 + idx = fq_ring_add(fq); 188 + 189 + fq->entries[idx].iova_pfn = pfn; 190 + fq->entries[idx].pages = pages; 191 + fq->entries[idx].counter = atomic64_read(&cookie->fq_flush_start_cnt); 192 + list_splice(freelist, &fq->entries[idx].freelist); 193 + 194 + spin_unlock_irqrestore(&fq->lock, flags); 195 + 196 + /* Avoid false sharing as much as possible. */ 197 + if (!atomic_read(&cookie->fq_timer_on) && 198 + !atomic_xchg(&cookie->fq_timer_on, 1)) 199 + mod_timer(&cookie->fq_timer, 200 + jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT)); 201 + } 202 + 203 + static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie) 204 + { 205 + int cpu, idx; 206 + 207 + if (!cookie->fq) 208 + return; 209 + 210 + del_timer_sync(&cookie->fq_timer); 211 + /* The IOVAs will be torn down separately, so just free our queued pages */ 212 + for_each_possible_cpu(cpu) { 213 + struct iova_fq *fq = per_cpu_ptr(cookie->fq, cpu); 214 + 215 + fq_ring_for_each(idx, fq) 216 + put_pages_list(&fq->entries[idx].freelist); 217 + } 218 + 219 + free_percpu(cookie->fq); 220 + } 221 + 222 + /* sysfs updates are serialised by the mutex of the group owning @domain */ 223 + int iommu_dma_init_fq(struct iommu_domain *domain) 224 + { 225 + struct iommu_dma_cookie *cookie = domain->iova_cookie; 226 + struct iova_fq __percpu *queue; 227 + int i, cpu; 228 + 229 + if (cookie->fq_domain) 230 + return 0; 231 + 232 + atomic64_set(&cookie->fq_flush_start_cnt, 0); 233 + atomic64_set(&cookie->fq_flush_finish_cnt, 0); 234 + 235 + queue = alloc_percpu(struct iova_fq); 236 + if (!queue) { 237 + pr_warn("iova flush queue initialization failed\n"); 238 + return -ENOMEM; 239 + } 240 + 241 + for_each_possible_cpu(cpu) { 242 + struct iova_fq *fq = per_cpu_ptr(queue, cpu); 243 + 244 + fq->head = 0; 245 + fq->tail = 0; 246 + 247 + spin_lock_init(&fq->lock); 248 + 249 + for (i = 0; i < IOVA_FQ_SIZE; i++) 250 + INIT_LIST_HEAD(&fq->entries[i].freelist); 251 + } 252 + 253 + cookie->fq = queue; 254 + 255 + timer_setup(&cookie->fq_timer, fq_flush_timeout, 0); 256 + atomic_set(&cookie->fq_timer_on, 0); 257 + /* 258 + * Prevent incomplete fq state being observable. Pairs with path from 259 + * __iommu_dma_unmap() through iommu_dma_free_iova() to queue_iova() 260 + */ 261 + smp_wmb(); 262 + WRITE_ONCE(cookie->fq_domain, domain); 263 + return 0; 91 264 } 92 265 93 266 static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie) ··· 357 156 if (!cookie) 358 157 return; 359 158 360 - if (cookie->type == IOMMU_DMA_IOVA_COOKIE && cookie->iovad.granule) 159 + if (cookie->type == IOMMU_DMA_IOVA_COOKIE && cookie->iovad.granule) { 160 + iommu_dma_free_fq(cookie); 361 161 put_iova_domain(&cookie->iovad); 162 + } 362 163 363 164 list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list) { 364 165 list_del(&msi->list); ··· 497 294 return ret; 498 295 } 499 296 500 - static void iommu_dma_flush_iotlb_all(struct iova_domain *iovad) 501 - { 502 - struct iommu_dma_cookie *cookie; 503 - struct iommu_domain *domain; 504 - 505 - cookie = container_of(iovad, struct iommu_dma_cookie, iovad); 506 - domain = cookie->fq_domain; 507 - 508 - domain->ops->flush_iotlb_all(domain); 509 - } 510 - 511 297 static bool dev_is_untrusted(struct device *dev) 512 298 { 513 299 return dev_is_pci(dev) && to_pci_dev(dev)->untrusted; ··· 505 313 static bool dev_use_swiotlb(struct device *dev) 506 314 { 507 315 return IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev); 508 - } 509 - 510 - /* sysfs updates are serialised by the mutex of the group owning @domain */ 511 - int iommu_dma_init_fq(struct iommu_domain *domain) 512 - { 513 - struct iommu_dma_cookie *cookie = domain->iova_cookie; 514 - int ret; 515 - 516 - if (cookie->fq_domain) 517 - return 0; 518 - 519 - ret = init_iova_flush_queue(&cookie->iovad, iommu_dma_flush_iotlb_all, 520 - iommu_dma_entry_dtor); 521 - if (ret) { 522 - pr_warn("iova flush queue initialization failed\n"); 523 - return ret; 524 - } 525 - /* 526 - * Prevent incomplete iovad->fq being observable. Pairs with path from 527 - * __iommu_dma_unmap() through iommu_dma_free_iova() to queue_iova() 528 - */ 529 - smp_wmb(); 530 - WRITE_ONCE(cookie->fq_domain, domain); 531 - return 0; 532 316 } 533 317 534 318 /** ··· 610 442 611 443 shift = iova_shift(iovad); 612 444 iova_len = size >> shift; 613 - /* 614 - * Freeing non-power-of-two-sized allocations back into the IOVA caches 615 - * will come back to bite us badly, so we have to waste a bit of space 616 - * rounding up anything cacheable to make sure that can't happen. The 617 - * order of the unadjusted size will still match upon freeing. 618 - */ 619 - if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1))) 620 - iova_len = roundup_pow_of_two(iova_len); 621 445 622 446 dma_limit = min_not_zero(dma_limit, dev->bus_dma_limit); 623 447 ··· 637 477 if (cookie->type == IOMMU_DMA_MSI_COOKIE) 638 478 cookie->msi_iova -= size; 639 479 else if (gather && gather->queued) 640 - queue_iova(iovad, iova_pfn(iovad, iova), 480 + queue_iova(cookie, iova_pfn(iovad, iova), 641 481 size >> iova_shift(iovad), 642 - (unsigned long)gather->freelist); 482 + &gather->freelist); 643 483 else 644 484 free_iova_fast(iovad, iova_pfn(iovad, iova), 645 485 size >> iova_shift(iovad));
+37 -74
drivers/iommu/intel/iommu.c
··· 133 133 134 134 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things 135 135 are never going to work. */ 136 - static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn) 137 - { 138 - return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT); 139 - } 140 - 141 136 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn) 142 137 { 143 138 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT); ··· 1275 1280 unsigned long last_pfn, 1276 1281 int retain_level) 1277 1282 { 1278 - BUG_ON(!domain_pfn_supported(domain, start_pfn)); 1279 - BUG_ON(!domain_pfn_supported(domain, last_pfn)); 1280 - BUG_ON(start_pfn > last_pfn); 1281 - 1282 1283 dma_pte_clear_range(domain, start_pfn, last_pfn); 1283 1284 1284 1285 /* We don't need lock here; nobody else touches the iova range */ ··· 1294 1303 know the hardware page-walk will no longer touch them. 1295 1304 The 'pte' argument is the *parent* PTE, pointing to the page that is to 1296 1305 be freed. */ 1297 - static struct page *dma_pte_list_pagetables(struct dmar_domain *domain, 1298 - int level, struct dma_pte *pte, 1299 - struct page *freelist) 1306 + static void dma_pte_list_pagetables(struct dmar_domain *domain, 1307 + int level, struct dma_pte *pte, 1308 + struct list_head *freelist) 1300 1309 { 1301 1310 struct page *pg; 1302 1311 1303 1312 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT); 1304 - pg->freelist = freelist; 1305 - freelist = pg; 1313 + list_add_tail(&pg->lru, freelist); 1306 1314 1307 1315 if (level == 1) 1308 - return freelist; 1316 + return; 1309 1317 1310 1318 pte = page_address(pg); 1311 1319 do { 1312 1320 if (dma_pte_present(pte) && !dma_pte_superpage(pte)) 1313 - freelist = dma_pte_list_pagetables(domain, level - 1, 1314 - pte, freelist); 1321 + dma_pte_list_pagetables(domain, level - 1, pte, freelist); 1315 1322 pte++; 1316 1323 } while (!first_pte_in_page(pte)); 1317 - 1318 - return freelist; 1319 1324 } 1320 1325 1321 - static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level, 1322 - struct dma_pte *pte, unsigned long pfn, 1323 - unsigned long start_pfn, 1324 - unsigned long last_pfn, 1325 - struct page *freelist) 1326 + static void dma_pte_clear_level(struct dmar_domain *domain, int level, 1327 + struct dma_pte *pte, unsigned long pfn, 1328 + unsigned long start_pfn, unsigned long last_pfn, 1329 + struct list_head *freelist) 1326 1330 { 1327 1331 struct dma_pte *first_pte = NULL, *last_pte = NULL; 1328 1332 ··· 1336 1350 /* These suborbinate page tables are going away entirely. Don't 1337 1351 bother to clear them; we're just going to *free* them. */ 1338 1352 if (level > 1 && !dma_pte_superpage(pte)) 1339 - freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist); 1353 + dma_pte_list_pagetables(domain, level - 1, pte, freelist); 1340 1354 1341 1355 dma_clear_pte(pte); 1342 1356 if (!first_pte) ··· 1344 1358 last_pte = pte; 1345 1359 } else if (level > 1) { 1346 1360 /* Recurse down into a level that isn't *entirely* obsolete */ 1347 - freelist = dma_pte_clear_level(domain, level - 1, 1348 - phys_to_virt(dma_pte_addr(pte)), 1349 - level_pfn, start_pfn, last_pfn, 1350 - freelist); 1361 + dma_pte_clear_level(domain, level - 1, 1362 + phys_to_virt(dma_pte_addr(pte)), 1363 + level_pfn, start_pfn, last_pfn, 1364 + freelist); 1351 1365 } 1352 1366 next: 1353 1367 pfn = level_pfn + level_size(level); ··· 1356 1370 if (first_pte) 1357 1371 domain_flush_cache(domain, first_pte, 1358 1372 (void *)++last_pte - (void *)first_pte); 1359 - 1360 - return freelist; 1361 1373 } 1362 1374 1363 1375 /* We can't just free the pages because the IOMMU may still be walking 1364 1376 the page tables, and may have cached the intermediate levels. The 1365 1377 pages can only be freed after the IOTLB flush has been done. */ 1366 - static struct page *domain_unmap(struct dmar_domain *domain, 1367 - unsigned long start_pfn, 1368 - unsigned long last_pfn, 1369 - struct page *freelist) 1378 + static void domain_unmap(struct dmar_domain *domain, unsigned long start_pfn, 1379 + unsigned long last_pfn, struct list_head *freelist) 1370 1380 { 1371 1381 BUG_ON(!domain_pfn_supported(domain, start_pfn)); 1372 1382 BUG_ON(!domain_pfn_supported(domain, last_pfn)); 1373 1383 BUG_ON(start_pfn > last_pfn); 1374 1384 1375 1385 /* we don't need lock here; nobody else touches the iova range */ 1376 - freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw), 1377 - domain->pgd, 0, start_pfn, last_pfn, 1378 - freelist); 1386 + dma_pte_clear_level(domain, agaw_to_level(domain->agaw), 1387 + domain->pgd, 0, start_pfn, last_pfn, freelist); 1379 1388 1380 1389 /* free pgd */ 1381 1390 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) { 1382 1391 struct page *pgd_page = virt_to_page(domain->pgd); 1383 - pgd_page->freelist = freelist; 1384 - freelist = pgd_page; 1385 - 1392 + list_add_tail(&pgd_page->lru, freelist); 1386 1393 domain->pgd = NULL; 1387 - } 1388 - 1389 - return freelist; 1390 - } 1391 - 1392 - static void dma_free_pagelist(struct page *freelist) 1393 - { 1394 - struct page *pg; 1395 - 1396 - while ((pg = freelist)) { 1397 - freelist = pg->freelist; 1398 - free_pgtable_page(page_address(pg)); 1399 1394 } 1400 1395 } 1401 1396 ··· 1845 1878 1846 1879 static int iommu_init_domains(struct intel_iommu *iommu) 1847 1880 { 1848 - u32 ndomains, nlongs; 1881 + u32 ndomains; 1849 1882 size_t size; 1850 1883 1851 1884 ndomains = cap_ndoms(iommu->cap); 1852 1885 pr_debug("%s: Number of Domains supported <%d>\n", 1853 1886 iommu->name, ndomains); 1854 - nlongs = BITS_TO_LONGS(ndomains); 1855 1887 1856 1888 spin_lock_init(&iommu->lock); 1857 1889 1858 - iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL); 1890 + iommu->domain_ids = bitmap_zalloc(ndomains, GFP_KERNEL); 1859 1891 if (!iommu->domain_ids) 1860 1892 return -ENOMEM; 1861 1893 ··· 1869 1903 if (!iommu->domains || !iommu->domains[0]) { 1870 1904 pr_err("%s: Allocating domain array failed\n", 1871 1905 iommu->name); 1872 - kfree(iommu->domain_ids); 1906 + bitmap_free(iommu->domain_ids); 1873 1907 kfree(iommu->domains); 1874 1908 iommu->domain_ids = NULL; 1875 1909 iommu->domains = NULL; ··· 1930 1964 for (i = 0; i < elems; i++) 1931 1965 kfree(iommu->domains[i]); 1932 1966 kfree(iommu->domains); 1933 - kfree(iommu->domain_ids); 1967 + bitmap_free(iommu->domain_ids); 1934 1968 iommu->domains = NULL; 1935 1969 iommu->domain_ids = NULL; 1936 1970 } ··· 2061 2095 domain_remove_dev_info(domain); 2062 2096 2063 2097 if (domain->pgd) { 2064 - struct page *freelist; 2098 + LIST_HEAD(freelist); 2065 2099 2066 - freelist = domain_unmap(domain, 0, 2067 - DOMAIN_MAX_PFN(domain->gaw), NULL); 2068 - dma_free_pagelist(freelist); 2100 + domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw), &freelist); 2101 + put_pages_list(&freelist); 2069 2102 } 2070 2103 2071 2104 free_domain_mem(domain); ··· 2077 2112 */ 2078 2113 static inline unsigned long context_get_sm_pds(struct pasid_table *table) 2079 2114 { 2080 - int pds, max_pde; 2115 + unsigned long pds, max_pde; 2081 2116 2082 2117 max_pde = table->max_pasid >> PASID_PDE_SHIFT; 2083 - pds = find_first_bit((unsigned long *)&max_pde, MAX_NR_PASID_BITS); 2118 + pds = find_first_bit(&max_pde, MAX_NR_PASID_BITS); 2084 2119 if (pds < 7) 2085 2120 return 0; 2086 2121 ··· 4157 4192 { 4158 4193 struct dmar_drhd_unit *drhd; 4159 4194 struct intel_iommu *iommu; 4160 - struct page *freelist; 4195 + LIST_HEAD(freelist); 4161 4196 4162 - freelist = domain_unmap(si_domain, 4163 - start_vpfn, last_vpfn, 4164 - NULL); 4197 + domain_unmap(si_domain, start_vpfn, last_vpfn, &freelist); 4165 4198 4166 4199 rcu_read_lock(); 4167 4200 for_each_active_iommu(iommu, drhd) 4168 4201 iommu_flush_iotlb_psi(iommu, si_domain, 4169 4202 start_vpfn, mhp->nr_pages, 4170 - !freelist, 0); 4203 + list_empty(&freelist), 0); 4171 4204 rcu_read_unlock(); 4172 - dma_free_pagelist(freelist); 4205 + put_pages_list(&freelist); 4173 4206 } 4174 4207 break; 4175 4208 } ··· 5174 5211 start_pfn = iova >> VTD_PAGE_SHIFT; 5175 5212 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT; 5176 5213 5177 - gather->freelist = domain_unmap(dmar_domain, start_pfn, 5178 - last_pfn, gather->freelist); 5214 + domain_unmap(dmar_domain, start_pfn, last_pfn, &gather->freelist); 5179 5215 5180 5216 if (dmar_domain->max_addr == iova + size) 5181 5217 dmar_domain->max_addr = iova; ··· 5210 5248 5211 5249 for_each_domain_iommu(iommu_id, dmar_domain) 5212 5250 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain, 5213 - start_pfn, nrpages, !gather->freelist, 0); 5251 + start_pfn, nrpages, 5252 + list_empty(&gather->freelist), 0); 5214 5253 5215 - dma_free_pagelist(gather->freelist); 5254 + put_pages_list(&gather->freelist); 5216 5255 } 5217 5256 5218 5257 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
+5 -1
drivers/iommu/io-pgtable-arm-v7s.c
··· 246 246 __GFP_ZERO | ARM_V7S_TABLE_GFP_DMA, get_order(size)); 247 247 else if (lvl == 2) 248 248 table = kmem_cache_zalloc(data->l2_tables, gfp); 249 + 250 + if (!table) 251 + return NULL; 252 + 249 253 phys = virt_to_phys(table); 250 254 if (phys != (arm_v7s_iopte)phys) { 251 255 /* Doesn't fit in PTE */ 252 256 dev_err(dev, "Page table does not fit in PTE: %pa", &phys); 253 257 goto out_free; 254 258 } 255 - if (table && !cfg->coherent_walk) { 259 + if (!cfg->coherent_walk) { 256 260 dma = dma_map_single(dev, table, size, DMA_TO_DEVICE); 257 261 if (dma_mapping_error(dev, dma)) 258 262 goto out_free;
+5 -4
drivers/iommu/io-pgtable-arm.c
··· 315 315 static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table, 316 316 arm_lpae_iopte *ptep, 317 317 arm_lpae_iopte curr, 318 - struct io_pgtable_cfg *cfg) 318 + struct arm_lpae_io_pgtable *data) 319 319 { 320 320 arm_lpae_iopte old, new; 321 + struct io_pgtable_cfg *cfg = &data->iop.cfg; 321 322 322 - new = __pa(table) | ARM_LPAE_PTE_TYPE_TABLE; 323 + new = paddr_to_iopte(__pa(table), data) | ARM_LPAE_PTE_TYPE_TABLE; 323 324 if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS) 324 325 new |= ARM_LPAE_PTE_NSTABLE; 325 326 ··· 381 380 if (!cptep) 382 381 return -ENOMEM; 383 382 384 - pte = arm_lpae_install_table(cptep, ptep, 0, cfg); 383 + pte = arm_lpae_install_table(cptep, ptep, 0, data); 385 384 if (pte) 386 385 __arm_lpae_free_pages(cptep, tblsz, cfg); 387 386 } else if (!cfg->coherent_walk && !(pte & ARM_LPAE_PTE_SW_SYNC)) { ··· 593 592 __arm_lpae_init_pte(data, blk_paddr, pte, lvl, 1, &tablep[i]); 594 593 } 595 594 596 - pte = arm_lpae_install_table(tablep, ptep, blk_pte, cfg); 595 + pte = arm_lpae_install_table(tablep, ptep, blk_pte, data); 597 596 if (pte != blk_pte) { 598 597 __arm_lpae_free_pages(tablep, tablesz, cfg); 599 598 /*
+2 -1
drivers/iommu/iommu.c
··· 288 288 */ 289 289 mutex_lock(&group->mutex); 290 290 iommu_alloc_default_domain(group, dev); 291 - mutex_unlock(&group->mutex); 292 291 293 292 if (group->default_domain) { 294 293 ret = __iommu_attach_device(group->default_domain, dev); 295 294 if (ret) { 295 + mutex_unlock(&group->mutex); 296 296 iommu_group_put(group); 297 297 goto err_release; 298 298 } ··· 300 300 301 301 iommu_create_device_direct_mappings(group, dev); 302 302 303 + mutex_unlock(&group->mutex); 303 304 iommu_group_put(group); 304 305 305 306 if (ops->probe_finalize)
+9 -200
drivers/iommu/iova.c
··· 24 24 static void init_iova_rcaches(struct iova_domain *iovad); 25 25 static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad); 26 26 static void free_iova_rcaches(struct iova_domain *iovad); 27 - static void fq_destroy_all_entries(struct iova_domain *iovad); 28 - static void fq_flush_timeout(struct timer_list *t); 29 27 30 28 static int iova_cpuhp_dead(unsigned int cpu, struct hlist_node *node) 31 29 { ··· 61 63 iovad->start_pfn = start_pfn; 62 64 iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad)); 63 65 iovad->max32_alloc_size = iovad->dma_32bit_pfn; 64 - iovad->flush_cb = NULL; 65 - iovad->fq = NULL; 66 66 iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR; 67 67 rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node); 68 68 rb_insert_color(&iovad->anchor.node, &iovad->rbroot); ··· 68 72 init_iova_rcaches(iovad); 69 73 } 70 74 EXPORT_SYMBOL_GPL(init_iova_domain); 71 - 72 - static bool has_iova_flush_queue(struct iova_domain *iovad) 73 - { 74 - return !!iovad->fq; 75 - } 76 - 77 - static void free_iova_flush_queue(struct iova_domain *iovad) 78 - { 79 - if (!has_iova_flush_queue(iovad)) 80 - return; 81 - 82 - if (timer_pending(&iovad->fq_timer)) 83 - del_timer(&iovad->fq_timer); 84 - 85 - fq_destroy_all_entries(iovad); 86 - 87 - free_percpu(iovad->fq); 88 - 89 - iovad->fq = NULL; 90 - iovad->flush_cb = NULL; 91 - iovad->entry_dtor = NULL; 92 - } 93 - 94 - int init_iova_flush_queue(struct iova_domain *iovad, 95 - iova_flush_cb flush_cb, iova_entry_dtor entry_dtor) 96 - { 97 - struct iova_fq __percpu *queue; 98 - int cpu; 99 - 100 - atomic64_set(&iovad->fq_flush_start_cnt, 0); 101 - atomic64_set(&iovad->fq_flush_finish_cnt, 0); 102 - 103 - queue = alloc_percpu(struct iova_fq); 104 - if (!queue) 105 - return -ENOMEM; 106 - 107 - iovad->flush_cb = flush_cb; 108 - iovad->entry_dtor = entry_dtor; 109 - 110 - for_each_possible_cpu(cpu) { 111 - struct iova_fq *fq; 112 - 113 - fq = per_cpu_ptr(queue, cpu); 114 - fq->head = 0; 115 - fq->tail = 0; 116 - 117 - spin_lock_init(&fq->lock); 118 - } 119 - 120 - iovad->fq = queue; 121 - 122 - timer_setup(&iovad->fq_timer, fq_flush_timeout, 0); 123 - atomic_set(&iovad->fq_timer_on, 0); 124 - 125 - return 0; 126 - } 127 75 128 76 static struct rb_node * 129 77 __get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn) ··· 437 497 unsigned long iova_pfn; 438 498 struct iova *new_iova; 439 499 500 + /* 501 + * Freeing non-power-of-two-sized allocations back into the IOVA caches 502 + * will come back to bite us badly, so we have to waste a bit of space 503 + * rounding up anything cacheable to make sure that can't happen. The 504 + * order of the unadjusted size will still match upon freeing. 505 + */ 506 + if (size < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1))) 507 + size = roundup_pow_of_two(size); 508 + 440 509 iova_pfn = iova_rcache_get(iovad, size, limit_pfn + 1); 441 510 if (iova_pfn) 442 511 return iova_pfn; ··· 488 539 } 489 540 EXPORT_SYMBOL_GPL(free_iova_fast); 490 541 491 - #define fq_ring_for_each(i, fq) \ 492 - for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE) 493 - 494 - static inline bool fq_full(struct iova_fq *fq) 495 - { 496 - assert_spin_locked(&fq->lock); 497 - return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head); 498 - } 499 - 500 - static inline unsigned fq_ring_add(struct iova_fq *fq) 501 - { 502 - unsigned idx = fq->tail; 503 - 504 - assert_spin_locked(&fq->lock); 505 - 506 - fq->tail = (idx + 1) % IOVA_FQ_SIZE; 507 - 508 - return idx; 509 - } 510 - 511 - static void fq_ring_free(struct iova_domain *iovad, struct iova_fq *fq) 512 - { 513 - u64 counter = atomic64_read(&iovad->fq_flush_finish_cnt); 514 - unsigned idx; 515 - 516 - assert_spin_locked(&fq->lock); 517 - 518 - fq_ring_for_each(idx, fq) { 519 - 520 - if (fq->entries[idx].counter >= counter) 521 - break; 522 - 523 - if (iovad->entry_dtor) 524 - iovad->entry_dtor(fq->entries[idx].data); 525 - 526 - free_iova_fast(iovad, 527 - fq->entries[idx].iova_pfn, 528 - fq->entries[idx].pages); 529 - 530 - fq->head = (fq->head + 1) % IOVA_FQ_SIZE; 531 - } 532 - } 533 - 534 - static void iova_domain_flush(struct iova_domain *iovad) 535 - { 536 - atomic64_inc(&iovad->fq_flush_start_cnt); 537 - iovad->flush_cb(iovad); 538 - atomic64_inc(&iovad->fq_flush_finish_cnt); 539 - } 540 - 541 - static void fq_destroy_all_entries(struct iova_domain *iovad) 542 - { 543 - int cpu; 544 - 545 - /* 546 - * This code runs when the iova_domain is being detroyed, so don't 547 - * bother to free iovas, just call the entry_dtor on all remaining 548 - * entries. 549 - */ 550 - if (!iovad->entry_dtor) 551 - return; 552 - 553 - for_each_possible_cpu(cpu) { 554 - struct iova_fq *fq = per_cpu_ptr(iovad->fq, cpu); 555 - int idx; 556 - 557 - fq_ring_for_each(idx, fq) 558 - iovad->entry_dtor(fq->entries[idx].data); 559 - } 560 - } 561 - 562 - static void fq_flush_timeout(struct timer_list *t) 563 - { 564 - struct iova_domain *iovad = from_timer(iovad, t, fq_timer); 565 - int cpu; 566 - 567 - atomic_set(&iovad->fq_timer_on, 0); 568 - iova_domain_flush(iovad); 569 - 570 - for_each_possible_cpu(cpu) { 571 - unsigned long flags; 572 - struct iova_fq *fq; 573 - 574 - fq = per_cpu_ptr(iovad->fq, cpu); 575 - spin_lock_irqsave(&fq->lock, flags); 576 - fq_ring_free(iovad, fq); 577 - spin_unlock_irqrestore(&fq->lock, flags); 578 - } 579 - } 580 - 581 - void queue_iova(struct iova_domain *iovad, 582 - unsigned long pfn, unsigned long pages, 583 - unsigned long data) 584 - { 585 - struct iova_fq *fq; 586 - unsigned long flags; 587 - unsigned idx; 588 - 589 - /* 590 - * Order against the IOMMU driver's pagetable update from unmapping 591 - * @pte, to guarantee that iova_domain_flush() observes that if called 592 - * from a different CPU before we release the lock below. Full barrier 593 - * so it also pairs with iommu_dma_init_fq() to avoid seeing partially 594 - * written fq state here. 595 - */ 596 - smp_mb(); 597 - 598 - fq = raw_cpu_ptr(iovad->fq); 599 - spin_lock_irqsave(&fq->lock, flags); 600 - 601 - /* 602 - * First remove all entries from the flush queue that have already been 603 - * flushed out on another CPU. This makes the fq_full() check below less 604 - * likely to be true. 605 - */ 606 - fq_ring_free(iovad, fq); 607 - 608 - if (fq_full(fq)) { 609 - iova_domain_flush(iovad); 610 - fq_ring_free(iovad, fq); 611 - } 612 - 613 - idx = fq_ring_add(fq); 614 - 615 - fq->entries[idx].iova_pfn = pfn; 616 - fq->entries[idx].pages = pages; 617 - fq->entries[idx].data = data; 618 - fq->entries[idx].counter = atomic64_read(&iovad->fq_flush_start_cnt); 619 - 620 - spin_unlock_irqrestore(&fq->lock, flags); 621 - 622 - /* Avoid false sharing as much as possible. */ 623 - if (!atomic_read(&iovad->fq_timer_on) && 624 - !atomic_xchg(&iovad->fq_timer_on, 1)) 625 - mod_timer(&iovad->fq_timer, 626 - jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT)); 627 - } 628 - 629 542 /** 630 543 * put_iova_domain - destroys the iova domain 631 544 * @iovad: - iova domain in question. ··· 499 688 500 689 cpuhp_state_remove_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD, 501 690 &iovad->cpuhp_dead); 502 - 503 - free_iova_flush_queue(iovad); 504 691 free_iova_rcaches(iovad); 505 692 rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node) 506 693 free_iova_mem(iova);
+95 -20
drivers/iommu/virtio-iommu.c
··· 71 71 struct rb_root_cached mappings; 72 72 73 73 unsigned long nr_endpoints; 74 + bool bypass; 74 75 }; 75 76 76 77 struct viommu_endpoint { ··· 311 310 * 312 311 * On success, return the new mapping. Otherwise return NULL. 313 312 */ 314 - static int viommu_add_mapping(struct viommu_domain *vdomain, unsigned long iova, 315 - phys_addr_t paddr, size_t size, u32 flags) 313 + static int viommu_add_mapping(struct viommu_domain *vdomain, u64 iova, u64 end, 314 + phys_addr_t paddr, u32 flags) 316 315 { 317 316 unsigned long irqflags; 318 317 struct viommu_mapping *mapping; ··· 323 322 324 323 mapping->paddr = paddr; 325 324 mapping->iova.start = iova; 326 - mapping->iova.last = iova + size - 1; 325 + mapping->iova.last = end; 327 326 mapping->flags = flags; 328 327 329 328 spin_lock_irqsave(&vdomain->mappings_lock, irqflags); ··· 338 337 * 339 338 * @vdomain: the domain 340 339 * @iova: start of the range 341 - * @size: size of the range. A size of 0 corresponds to the entire address 342 - * space. 340 + * @end: end of the range 343 341 * 344 - * On success, returns the number of unmapped bytes (>= size) 342 + * On success, returns the number of unmapped bytes 345 343 */ 346 344 static size_t viommu_del_mappings(struct viommu_domain *vdomain, 347 - unsigned long iova, size_t size) 345 + u64 iova, u64 end) 348 346 { 349 347 size_t unmapped = 0; 350 348 unsigned long flags; 351 - unsigned long last = iova + size - 1; 352 349 struct viommu_mapping *mapping = NULL; 353 350 struct interval_tree_node *node, *next; 354 351 355 352 spin_lock_irqsave(&vdomain->mappings_lock, flags); 356 - next = interval_tree_iter_first(&vdomain->mappings, iova, last); 353 + next = interval_tree_iter_first(&vdomain->mappings, iova, end); 357 354 while (next) { 358 355 node = next; 359 356 mapping = container_of(node, struct viommu_mapping, iova); 360 - next = interval_tree_iter_next(node, iova, last); 357 + next = interval_tree_iter_next(node, iova, end); 361 358 362 359 /* Trying to split a mapping? */ 363 360 if (mapping->iova.start < iova) ··· 373 374 spin_unlock_irqrestore(&vdomain->mappings_lock, flags); 374 375 375 376 return unmapped; 377 + } 378 + 379 + /* 380 + * Fill the domain with identity mappings, skipping the device's reserved 381 + * regions. 382 + */ 383 + static int viommu_domain_map_identity(struct viommu_endpoint *vdev, 384 + struct viommu_domain *vdomain) 385 + { 386 + int ret; 387 + struct iommu_resv_region *resv; 388 + u64 iova = vdomain->domain.geometry.aperture_start; 389 + u64 limit = vdomain->domain.geometry.aperture_end; 390 + u32 flags = VIRTIO_IOMMU_MAP_F_READ | VIRTIO_IOMMU_MAP_F_WRITE; 391 + unsigned long granule = 1UL << __ffs(vdomain->domain.pgsize_bitmap); 392 + 393 + iova = ALIGN(iova, granule); 394 + limit = ALIGN_DOWN(limit + 1, granule) - 1; 395 + 396 + list_for_each_entry(resv, &vdev->resv_regions, list) { 397 + u64 resv_start = ALIGN_DOWN(resv->start, granule); 398 + u64 resv_end = ALIGN(resv->start + resv->length, granule) - 1; 399 + 400 + if (resv_end < iova || resv_start > limit) 401 + /* No overlap */ 402 + continue; 403 + 404 + if (resv_start > iova) { 405 + ret = viommu_add_mapping(vdomain, iova, resv_start - 1, 406 + (phys_addr_t)iova, flags); 407 + if (ret) 408 + goto err_unmap; 409 + } 410 + 411 + if (resv_end >= limit) 412 + return 0; 413 + 414 + iova = resv_end + 1; 415 + } 416 + 417 + ret = viommu_add_mapping(vdomain, iova, limit, (phys_addr_t)iova, 418 + flags); 419 + if (ret) 420 + goto err_unmap; 421 + return 0; 422 + 423 + err_unmap: 424 + viommu_del_mappings(vdomain, 0, iova); 425 + return ret; 376 426 } 377 427 378 428 /* ··· 470 422 size_t size; 471 423 u64 start64, end64; 472 424 phys_addr_t start, end; 473 - struct iommu_resv_region *region = NULL; 425 + struct iommu_resv_region *region = NULL, *next; 474 426 unsigned long prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO; 475 427 476 428 start = start64 = le64_to_cpu(mem->start); ··· 501 453 if (!region) 502 454 return -ENOMEM; 503 455 504 - list_add(&region->list, &vdev->resv_regions); 456 + /* Keep the list sorted */ 457 + list_for_each_entry(next, &vdev->resv_regions, list) { 458 + if (next->start > region->start) 459 + break; 460 + } 461 + list_add_tail(&region->list, &next->list); 505 462 return 0; 506 463 } 507 464 ··· 640 587 { 641 588 struct viommu_domain *vdomain; 642 589 643 - if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA) 590 + if (type != IOMMU_DOMAIN_UNMANAGED && 591 + type != IOMMU_DOMAIN_DMA && 592 + type != IOMMU_DOMAIN_IDENTITY) 644 593 return NULL; 645 594 646 595 vdomain = kzalloc(sizeof(*vdomain), GFP_KERNEL); ··· 685 630 vdomain->map_flags = viommu->map_flags; 686 631 vdomain->viommu = viommu; 687 632 633 + if (domain->type == IOMMU_DOMAIN_IDENTITY) { 634 + if (virtio_has_feature(viommu->vdev, 635 + VIRTIO_IOMMU_F_BYPASS_CONFIG)) { 636 + vdomain->bypass = true; 637 + return 0; 638 + } 639 + 640 + ret = viommu_domain_map_identity(vdev, vdomain); 641 + if (ret) { 642 + ida_free(&viommu->domain_ids, vdomain->id); 643 + vdomain->viommu = NULL; 644 + return -EOPNOTSUPP; 645 + } 646 + } 647 + 688 648 return 0; 689 649 } 690 650 ··· 707 637 { 708 638 struct viommu_domain *vdomain = to_viommu_domain(domain); 709 639 710 - /* Free all remaining mappings (size 2^64) */ 711 - viommu_del_mappings(vdomain, 0, 0); 640 + /* Free all remaining mappings */ 641 + viommu_del_mappings(vdomain, 0, ULLONG_MAX); 712 642 713 643 if (vdomain->viommu) 714 644 ida_free(&vdomain->viommu->domain_ids, vdomain->id); ··· 743 673 744 674 /* 745 675 * In the virtio-iommu device, when attaching the endpoint to a new 746 - * domain, it is detached from the old one and, if as as a result the 676 + * domain, it is detached from the old one and, if as a result the 747 677 * old domain isn't attached to any endpoint, all mappings are removed 748 678 * from the old domain and it is freed. 749 679 * ··· 760 690 .head.type = VIRTIO_IOMMU_T_ATTACH, 761 691 .domain = cpu_to_le32(vdomain->id), 762 692 }; 693 + 694 + if (vdomain->bypass) 695 + req.flags |= cpu_to_le32(VIRTIO_IOMMU_ATTACH_F_BYPASS); 763 696 764 697 for (i = 0; i < fwspec->num_ids; i++) { 765 698 req.endpoint = cpu_to_le32(fwspec->ids[i]); ··· 793 720 { 794 721 int ret; 795 722 u32 flags; 723 + u64 end = iova + size - 1; 796 724 struct virtio_iommu_req_map map; 797 725 struct viommu_domain *vdomain = to_viommu_domain(domain); 798 726 ··· 804 730 if (flags & ~vdomain->map_flags) 805 731 return -EINVAL; 806 732 807 - ret = viommu_add_mapping(vdomain, iova, paddr, size, flags); 733 + ret = viommu_add_mapping(vdomain, iova, end, paddr, flags); 808 734 if (ret) 809 735 return ret; 810 736 ··· 813 739 .domain = cpu_to_le32(vdomain->id), 814 740 .virt_start = cpu_to_le64(iova), 815 741 .phys_start = cpu_to_le64(paddr), 816 - .virt_end = cpu_to_le64(iova + size - 1), 742 + .virt_end = cpu_to_le64(end), 817 743 .flags = cpu_to_le32(flags), 818 744 }; 819 745 ··· 822 748 823 749 ret = viommu_send_req_sync(vdomain->viommu, &map, sizeof(map)); 824 750 if (ret) 825 - viommu_del_mappings(vdomain, iova, size); 751 + viommu_del_mappings(vdomain, iova, end); 826 752 827 753 return ret; 828 754 } ··· 835 761 struct virtio_iommu_req_unmap unmap; 836 762 struct viommu_domain *vdomain = to_viommu_domain(domain); 837 763 838 - unmapped = viommu_del_mappings(vdomain, iova, size); 764 + unmapped = viommu_del_mappings(vdomain, iova, iova + size - 1); 839 765 if (unmapped < size) 840 766 return 0; 841 767 ··· 1206 1132 VIRTIO_IOMMU_F_DOMAIN_RANGE, 1207 1133 VIRTIO_IOMMU_F_PROBE, 1208 1134 VIRTIO_IOMMU_F_MMIO, 1135 + VIRTIO_IOMMU_F_BYPASS_CONFIG, 1209 1136 }; 1210 1137 1211 1138 static struct virtio_device_id id_table[] = {
-8
drivers/vdpa/vdpa_user/iova_domain.c
··· 292 292 unsigned long iova_len = iova_align(iovad, size) >> shift; 293 293 unsigned long iova_pfn; 294 294 295 - /* 296 - * Freeing non-power-of-two-sized allocations back into the IOVA caches 297 - * will come back to bite us badly, so we have to waste a bit of space 298 - * rounding up anything cacheable to make sure that can't happen. The 299 - * order of the unadjusted size will still match upon freeing. 300 - */ 301 - if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1))) 302 - iova_len = roundup_pow_of_two(iova_len); 303 295 iova_pfn = alloc_iova_fast(iovad, iova_len, limit >> shift, true); 304 296 305 297 return iova_pfn << shift;
-6
include/linux/intel-svm.h
··· 8 8 #ifndef __INTEL_SVM_H__ 9 9 #define __INTEL_SVM_H__ 10 10 11 - /* Values for rxwp in fault_cb callback */ 12 - #define SVM_REQ_READ (1<<3) 13 - #define SVM_REQ_WRITE (1<<2) 14 - #define SVM_REQ_EXEC (1<<1) 15 - #define SVM_REQ_PRIV (1<<0) 16 - 17 11 /* Page Request Queue depth */ 18 12 #define PRQ_ORDER 2 19 13 #define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x20)
+2 -1
include/linux/iommu.h
··· 186 186 unsigned long start; 187 187 unsigned long end; 188 188 size_t pgsize; 189 - struct page *freelist; 189 + struct list_head freelist; 190 190 bool queued; 191 191 }; 192 192 ··· 399 399 { 400 400 *gather = (struct iommu_iotlb_gather) { 401 401 .start = ULONG_MAX, 402 + .freelist = LIST_HEAD_INIT(gather->freelist), 402 403 }; 403 404 } 404 405
+1 -67
include/linux/iova.h
··· 12 12 #include <linux/types.h> 13 13 #include <linux/kernel.h> 14 14 #include <linux/rbtree.h> 15 - #include <linux/atomic.h> 16 15 #include <linux/dma-mapping.h> 17 16 18 17 /* iova structure */ ··· 34 35 struct iova_cpu_rcache __percpu *cpu_rcaches; 35 36 }; 36 37 37 - struct iova_domain; 38 - 39 - /* Call-Back from IOVA code into IOMMU drivers */ 40 - typedef void (* iova_flush_cb)(struct iova_domain *domain); 41 - 42 - /* Destructor for per-entry data */ 43 - typedef void (* iova_entry_dtor)(unsigned long data); 44 - 45 - /* Number of entries per Flush Queue */ 46 - #define IOVA_FQ_SIZE 256 47 - 48 - /* Timeout (in ms) after which entries are flushed from the Flush-Queue */ 49 - #define IOVA_FQ_TIMEOUT 10 50 - 51 - /* Flush Queue entry for defered flushing */ 52 - struct iova_fq_entry { 53 - unsigned long iova_pfn; 54 - unsigned long pages; 55 - unsigned long data; 56 - u64 counter; /* Flush counter when this entrie was added */ 57 - }; 58 - 59 - /* Per-CPU Flush Queue structure */ 60 - struct iova_fq { 61 - struct iova_fq_entry entries[IOVA_FQ_SIZE]; 62 - unsigned head, tail; 63 - spinlock_t lock; 64 - }; 65 - 66 38 /* holds all the iova translations for a domain */ 67 39 struct iova_domain { 68 40 spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */ ··· 44 74 unsigned long start_pfn; /* Lower limit for this domain */ 45 75 unsigned long dma_32bit_pfn; 46 76 unsigned long max32_alloc_size; /* Size of last failed allocation */ 47 - struct iova_fq __percpu *fq; /* Flush Queue */ 48 - 49 - atomic64_t fq_flush_start_cnt; /* Number of TLB flushes that 50 - have been started */ 51 - 52 - atomic64_t fq_flush_finish_cnt; /* Number of TLB flushes that 53 - have been finished */ 54 - 55 77 struct iova anchor; /* rbtree lookup anchor */ 78 + 56 79 struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE]; /* IOVA range caches */ 57 - 58 - iova_flush_cb flush_cb; /* Call-Back function to flush IOMMU 59 - TLBs */ 60 - 61 - iova_entry_dtor entry_dtor; /* IOMMU driver specific destructor for 62 - iova entry */ 63 - 64 - struct timer_list fq_timer; /* Timer to regularily empty the 65 - flush-queues */ 66 - atomic_t fq_timer_on; /* 1 when timer is active, 0 67 - when not */ 68 80 struct hlist_node cpuhp_dead; 69 81 }; 70 82 ··· 96 144 bool size_aligned); 97 145 void free_iova_fast(struct iova_domain *iovad, unsigned long pfn, 98 146 unsigned long size); 99 - void queue_iova(struct iova_domain *iovad, 100 - unsigned long pfn, unsigned long pages, 101 - unsigned long data); 102 147 unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size, 103 148 unsigned long limit_pfn, bool flush_rcache); 104 149 struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, 105 150 unsigned long pfn_hi); 106 151 void init_iova_domain(struct iova_domain *iovad, unsigned long granule, 107 152 unsigned long start_pfn); 108 - int init_iova_flush_queue(struct iova_domain *iovad, 109 - iova_flush_cb flush_cb, iova_entry_dtor entry_dtor); 110 153 struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); 111 154 void put_iova_domain(struct iova_domain *iovad); 112 155 #else ··· 136 189 { 137 190 } 138 191 139 - static inline void queue_iova(struct iova_domain *iovad, 140 - unsigned long pfn, unsigned long pages, 141 - unsigned long data) 142 - { 143 - } 144 - 145 192 static inline unsigned long alloc_iova_fast(struct iova_domain *iovad, 146 193 unsigned long size, 147 194 unsigned long limit_pfn, ··· 155 214 unsigned long granule, 156 215 unsigned long start_pfn) 157 216 { 158 - } 159 - 160 - static inline int init_iova_flush_queue(struct iova_domain *iovad, 161 - iova_flush_cb flush_cb, 162 - iova_entry_dtor entry_dtor) 163 - { 164 - return -ENODEV; 165 217 } 166 218 167 219 static inline struct iova *find_iova(struct iova_domain *iovad,
+6 -4
include/trace/events/iommu.h
··· 101 101 __entry->size = size; 102 102 ), 103 103 104 - TP_printk("IOMMU: iova=0x%016llx paddr=0x%016llx size=%zu", 105 - __entry->iova, __entry->paddr, __entry->size 104 + TP_printk("IOMMU: iova=0x%016llx - 0x%016llx paddr=0x%016llx size=%zu", 105 + __entry->iova, __entry->iova + __entry->size, __entry->paddr, 106 + __entry->size 106 107 ) 107 108 ); 108 109 ··· 125 124 __entry->unmapped_size = unmapped_size; 126 125 ), 127 126 128 - TP_printk("IOMMU: iova=0x%016llx size=%zu unmapped_size=%zu", 129 - __entry->iova, __entry->size, __entry->unmapped_size 127 + TP_printk("IOMMU: iova=0x%016llx - 0x%016llx size=%zu unmapped_size=%zu", 128 + __entry->iova, __entry->iova + __entry->size, 129 + __entry->size, __entry->unmapped_size 130 130 ) 131 131 ); 132 132
+7 -1
include/uapi/linux/virtio_iommu.h
··· 16 16 #define VIRTIO_IOMMU_F_BYPASS 3 17 17 #define VIRTIO_IOMMU_F_PROBE 4 18 18 #define VIRTIO_IOMMU_F_MMIO 5 19 + #define VIRTIO_IOMMU_F_BYPASS_CONFIG 6 19 20 20 21 struct virtio_iommu_range_64 { 21 22 __le64 start; ··· 37 36 struct virtio_iommu_range_32 domain_range; 38 37 /* Probe buffer size */ 39 38 __le32 probe_size; 39 + __u8 bypass; 40 + __u8 reserved[3]; 40 41 }; 41 42 42 43 /* Request types */ ··· 69 66 __u8 reserved[3]; 70 67 }; 71 68 69 + #define VIRTIO_IOMMU_ATTACH_F_BYPASS (1 << 0) 70 + 72 71 struct virtio_iommu_req_attach { 73 72 struct virtio_iommu_req_head head; 74 73 __le32 domain; 75 74 __le32 endpoint; 76 - __u8 reserved[8]; 75 + __le32 flags; 76 + __u8 reserved[4]; 77 77 struct virtio_iommu_req_tail tail; 78 78 }; 79 79