Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'iommu-updates-v4.13' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu

Pull IOMMU updates from Joerg Roedel:
"This update comes with:

- Support for lockless operation in the ARM io-pgtable code.

This is an important step to solve the scalability problems in the
common dma-iommu code for ARM

- Some Errata workarounds for ARM SMMU implemenations

- Rewrite of the deferred IO/TLB flush code in the AMD IOMMU driver.

The code suffered from very high flush rates, with the new
implementation the flush rate is down to ~1% of what it was before

- Support for amd_iommu=off when booting with kexec.

The problem here was that the IOMMU driver bailed out early without
disabling the iommu hardware, if it was enabled in the old kernel

- The Rockchip IOMMU driver is now available on ARM64

- Align the return value of the iommu_ops->device_group call-backs to
not miss error values

- Preempt-disable optimizations in the Intel VT-d and common IOVA
code to help Linux-RT

- Various other small cleanups and fixes"

* tag 'iommu-updates-v4.13' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (60 commits)
iommu/vt-d: Constify intel_dma_ops
iommu: Warn once when device_group callback returns NULL
iommu/omap: Return ERR_PTR in device_group call-back
iommu: Return ERR_PTR() values from device_group call-backs
iommu/s390: Use iommu_group_get_for_dev() in s390_iommu_add_device()
iommu/vt-d: Don't disable preemption while accessing deferred_flush()
iommu/iova: Don't disable preempt around this_cpu_ptr()
iommu/arm-smmu-v3: Add workaround for Cavium ThunderX2 erratum #126
iommu/arm-smmu-v3: Enable ACPI based HiSilicon CMD_PREFETCH quirk(erratum 161010701)
iommu/arm-smmu-v3: Add workaround for Cavium ThunderX2 erratum #74
ACPI/IORT: Fixup SMMUv3 resource size for Cavium ThunderX2 SMMUv3 model
iommu/arm-smmu-v3, acpi: Add temporary Cavium SMMU-V3 IORT model number definitions
iommu/io-pgtable-arm: Use dma_wmb() instead of wmb() when publishing table
iommu/io-pgtable: depend on !GENERIC_ATOMIC64 when using COMPILE_TEST with LPAE
iommu/arm-smmu-v3: Remove io-pgtable spinlock
iommu/arm-smmu: Remove io-pgtable spinlock
iommu/io-pgtable-arm-v7s: Support lockless operation
iommu/io-pgtable-arm: Support lockless operation
iommu/io-pgtable: Introduce explicit coherency
iommu/io-pgtable-arm-v7s: Refactor split_blk_unmap
...

+1241 -556
+4 -1
Documentation/arm64/silicon-errata.txt
··· 61 61 | Cavium | ThunderX ITS | #23144 | CAVIUM_ERRATUM_23144 | 62 62 | Cavium | ThunderX GICv3 | #23154 | CAVIUM_ERRATUM_23154 | 63 63 | Cavium | ThunderX Core | #27456 | CAVIUM_ERRATUM_27456 | 64 - | Cavium | ThunderX SMMUv2 | #27704 | N/A | 65 64 | Cavium | ThunderX Core | #30115 | CAVIUM_ERRATUM_30115 | 65 + | Cavium | ThunderX SMMUv2 | #27704 | N/A | 66 + | Cavium | ThunderX2 SMMUv3| #74 | N/A | 67 + | Cavium | ThunderX2 SMMUv3| #126 | N/A | 66 68 | | | | | 67 69 | Freescale/NXP | LS2080A/LS1043A | A-008585 | FSL_ERRATUM_A008585 | 68 70 | | | | | 69 71 | Hisilicon | Hip0{5,6,7} | #161010101 | HISILICON_ERRATUM_161010101 | 72 + | Hisilicon | Hip0{6,7} | #161010701 | N/A | 70 73 | | | | | 71 74 | Qualcomm Tech. | Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 | 72 75 | Qualcomm Tech. | Falkor v1 | E1009 | QCOM_FALKOR_ERRATUM_1009 |
+12
Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
··· 26 26 * "priq" - PRI Queue not empty 27 27 * "cmdq-sync" - CMD_SYNC complete 28 28 * "gerror" - Global Error activated 29 + * "combined" - The combined interrupt is optional, 30 + and should only be provided if the 31 + hardware supports just a single, 32 + combined interrupt line. 33 + If provided, then the combined interrupt 34 + will be used in preference to any others. 29 35 30 36 - #iommu-cells : See the generic IOMMU binding described in 31 37 devicetree/bindings/pci/pci-iommu.txt ··· 54 48 55 49 - hisilicon,broken-prefetch-cmd 56 50 : Avoid sending CMD_PREFETCH_* commands to the SMMU. 51 + 52 + - cavium,cn9900-broken-page1-regspace 53 + : Replaces all page 1 offsets used for EVTQ_PROD/CONS, 54 + PRIQ_PROD/CONS register access with page 0 offsets. 55 + Set for Cavium ThunderX2 silicon that doesn't support 56 + SMMU page1 register space. 57 57 58 58 ** Example 59 59
+60 -17
drivers/acpi/arm64/iort.c
··· 31 31 #define IORT_IOMMU_TYPE ((1 << ACPI_IORT_NODE_SMMU) | \ 32 32 (1 << ACPI_IORT_NODE_SMMU_V3)) 33 33 34 + /* Until ACPICA headers cover IORT rev. C */ 35 + #ifndef ACPI_IORT_SMMU_V3_CAVIUM_CN99XX 36 + #define ACPI_IORT_SMMU_V3_CAVIUM_CN99XX 0x2 37 + #endif 38 + 34 39 struct iort_its_msi_chip { 35 40 struct list_head list; 36 41 struct fwnode_handle *fw_node; ··· 824 819 return num_res; 825 820 } 826 821 822 + static bool arm_smmu_v3_is_combined_irq(struct acpi_iort_smmu_v3 *smmu) 823 + { 824 + /* 825 + * Cavium ThunderX2 implementation doesn't not support unique 826 + * irq line. Use single irq line for all the SMMUv3 interrupts. 827 + */ 828 + if (smmu->model != ACPI_IORT_SMMU_V3_CAVIUM_CN99XX) 829 + return false; 830 + 831 + /* 832 + * ThunderX2 doesn't support MSIs from the SMMU, so we're checking 833 + * SPI numbers here. 834 + */ 835 + return smmu->event_gsiv == smmu->pri_gsiv && 836 + smmu->event_gsiv == smmu->gerr_gsiv && 837 + smmu->event_gsiv == smmu->sync_gsiv; 838 + } 839 + 840 + static unsigned long arm_smmu_v3_resource_size(struct acpi_iort_smmu_v3 *smmu) 841 + { 842 + /* 843 + * Override the size, for Cavium ThunderX2 implementation 844 + * which doesn't support the page 1 SMMU register space. 845 + */ 846 + if (smmu->model == ACPI_IORT_SMMU_V3_CAVIUM_CN99XX) 847 + return SZ_64K; 848 + 849 + return SZ_128K; 850 + } 851 + 827 852 static void __init arm_smmu_v3_init_resources(struct resource *res, 828 853 struct acpi_iort_node *node) 829 854 { ··· 864 829 smmu = (struct acpi_iort_smmu_v3 *)node->node_data; 865 830 866 831 res[num_res].start = smmu->base_address; 867 - res[num_res].end = smmu->base_address + SZ_128K - 1; 832 + res[num_res].end = smmu->base_address + 833 + arm_smmu_v3_resource_size(smmu) - 1; 868 834 res[num_res].flags = IORESOURCE_MEM; 869 835 870 836 num_res++; 837 + if (arm_smmu_v3_is_combined_irq(smmu)) { 838 + if (smmu->event_gsiv) 839 + acpi_iort_register_irq(smmu->event_gsiv, "combined", 840 + ACPI_EDGE_SENSITIVE, 841 + &res[num_res++]); 842 + } else { 871 843 872 - if (smmu->event_gsiv) 873 - acpi_iort_register_irq(smmu->event_gsiv, "eventq", 874 - ACPI_EDGE_SENSITIVE, 875 - &res[num_res++]); 844 + if (smmu->event_gsiv) 845 + acpi_iort_register_irq(smmu->event_gsiv, "eventq", 846 + ACPI_EDGE_SENSITIVE, 847 + &res[num_res++]); 876 848 877 - if (smmu->pri_gsiv) 878 - acpi_iort_register_irq(smmu->pri_gsiv, "priq", 879 - ACPI_EDGE_SENSITIVE, 880 - &res[num_res++]); 849 + if (smmu->pri_gsiv) 850 + acpi_iort_register_irq(smmu->pri_gsiv, "priq", 851 + ACPI_EDGE_SENSITIVE, 852 + &res[num_res++]); 881 853 882 - if (smmu->gerr_gsiv) 883 - acpi_iort_register_irq(smmu->gerr_gsiv, "gerror", 884 - ACPI_EDGE_SENSITIVE, 885 - &res[num_res++]); 854 + if (smmu->gerr_gsiv) 855 + acpi_iort_register_irq(smmu->gerr_gsiv, "gerror", 856 + ACPI_EDGE_SENSITIVE, 857 + &res[num_res++]); 886 858 887 - if (smmu->sync_gsiv) 888 - acpi_iort_register_irq(smmu->sync_gsiv, "cmdq-sync", 889 - ACPI_EDGE_SENSITIVE, 890 - &res[num_res++]); 859 + if (smmu->sync_gsiv) 860 + acpi_iort_register_irq(smmu->sync_gsiv, "cmdq-sync", 861 + ACPI_EDGE_SENSITIVE, 862 + &res[num_res++]); 863 + } 891 864 } 892 865 893 866 static bool __init arm_smmu_v3_is_coherent(struct acpi_iort_node *node)
+3 -3
drivers/iommu/Kconfig
··· 23 23 config IOMMU_IO_PGTABLE_LPAE 24 24 bool "ARMv7/v8 Long Descriptor Format" 25 25 select IOMMU_IO_PGTABLE 26 - depends on HAS_DMA && (ARM || ARM64 || COMPILE_TEST) 26 + depends on HAS_DMA && (ARM || ARM64 || (COMPILE_TEST && !GENERIC_ATOMIC64)) 27 27 help 28 28 Enable support for the ARM long descriptor pagetable format. 29 29 This allocator supports 4K/2M/1G, 16K/32M and 64K/512M page ··· 219 219 220 220 config ROCKCHIP_IOMMU 221 221 bool "Rockchip IOMMU Support" 222 - depends on ARM 222 + depends on ARM || ARM64 223 223 depends on ARCH_ROCKCHIP || COMPILE_TEST 224 224 select IOMMU_API 225 225 select ARM_DMA_USE_IOMMU ··· 274 274 275 275 config IPMMU_VMSA 276 276 bool "Renesas VMSA-compatible IPMMU" 277 - depends on ARM_LPAE 277 + depends on ARM || IOMMU_DMA 278 278 depends on ARCH_RENESAS || COMPILE_TEST 279 279 select IOMMU_API 280 280 select IOMMU_IO_PGTABLE_LPAE
+292 -166
drivers/iommu/amd_iommu.c
··· 91 91 LIST_HEAD(hpet_map); 92 92 LIST_HEAD(acpihid_map); 93 93 94 - #define FLUSH_QUEUE_SIZE 256 95 - 96 - struct flush_queue_entry { 97 - unsigned long iova_pfn; 98 - unsigned long pages; 99 - struct dma_ops_domain *dma_dom; 100 - }; 101 - 102 - struct flush_queue { 103 - spinlock_t lock; 104 - unsigned next; 105 - struct flush_queue_entry *entries; 106 - }; 107 - 108 - static DEFINE_PER_CPU(struct flush_queue, flush_queue); 109 - 110 - static atomic_t queue_timer_on; 111 - static struct timer_list queue_timer; 112 - 113 94 /* 114 95 * Domain for untranslated devices - only allocated 115 96 * if iommu=pt passed on kernel cmd line. ··· 121 140 PPR completions */ 122 141 u32 errata; /* Bitmap for errata to apply */ 123 142 bool use_vapic; /* Enable device to use vapic mode */ 143 + 144 + struct ratelimit_state rs; /* Ratelimit IOPF messages */ 124 145 }; 125 146 126 147 /* ··· 138 155 static int protection_domain_init(struct protection_domain *domain); 139 156 static void detach_device(struct device *dev); 140 157 158 + #define FLUSH_QUEUE_SIZE 256 159 + 160 + struct flush_queue_entry { 161 + unsigned long iova_pfn; 162 + unsigned long pages; 163 + u64 counter; /* Flush counter when this entry was added to the queue */ 164 + }; 165 + 166 + struct flush_queue { 167 + struct flush_queue_entry *entries; 168 + unsigned head, tail; 169 + spinlock_t lock; 170 + }; 171 + 141 172 /* 142 173 * Data container for a dma_ops specific protection domain 143 174 */ ··· 161 164 162 165 /* IOVA RB-Tree */ 163 166 struct iova_domain iovad; 167 + 168 + struct flush_queue __percpu *flush_queue; 169 + 170 + /* 171 + * We need two counter here to be race-free wrt. IOTLB flushing and 172 + * adding entries to the flush queue. 173 + * 174 + * The flush_start_cnt is incremented _before_ the IOTLB flush starts. 175 + * New entries added to the flush ring-buffer get their 'counter' value 176 + * from here. This way we can make sure that entries added to the queue 177 + * (or other per-cpu queues of the same domain) while the TLB is about 178 + * to be flushed are not considered to be flushed already. 179 + */ 180 + atomic64_t flush_start_cnt; 181 + 182 + /* 183 + * The flush_finish_cnt is incremented when an IOTLB flush is complete. 184 + * This value is always smaller than flush_start_cnt. The queue_add 185 + * function frees all IOVAs that have a counter value smaller than 186 + * flush_finish_cnt. This makes sure that we only free IOVAs that are 187 + * flushed out of the IOTLB of the domain. 188 + */ 189 + atomic64_t flush_finish_cnt; 190 + 191 + /* 192 + * Timer to make sure we don't keep IOVAs around unflushed 193 + * for too long 194 + */ 195 + struct timer_list flush_timer; 196 + atomic_t flush_timer_on; 164 197 }; 165 198 166 199 static struct iova_domain reserved_iova_ranges; ··· 281 254 spin_lock_irqsave(&dev_data_list_lock, flags); 282 255 list_add_tail(&dev_data->dev_data_list, &dev_data_list); 283 256 spin_unlock_irqrestore(&dev_data_list_lock, flags); 257 + 258 + ratelimit_default_init(&dev_data->rs); 284 259 285 260 return dev_data; 286 261 } ··· 582 553 pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]); 583 554 } 584 555 556 + static void amd_iommu_report_page_fault(u16 devid, u16 domain_id, 557 + u64 address, int flags) 558 + { 559 + struct iommu_dev_data *dev_data = NULL; 560 + struct pci_dev *pdev; 561 + 562 + pdev = pci_get_bus_and_slot(PCI_BUS_NUM(devid), devid & 0xff); 563 + if (pdev) 564 + dev_data = get_dev_data(&pdev->dev); 565 + 566 + if (dev_data && __ratelimit(&dev_data->rs)) { 567 + dev_err(&pdev->dev, "AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%016llx flags=0x%04x]\n", 568 + domain_id, address, flags); 569 + } else if (printk_ratelimit()) { 570 + pr_err("AMD-Vi: Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%016llx flags=0x%04x]\n", 571 + PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), 572 + domain_id, address, flags); 573 + } 574 + 575 + if (pdev) 576 + pci_dev_put(pdev); 577 + } 578 + 585 579 static void iommu_print_event(struct amd_iommu *iommu, void *__evt) 586 580 { 587 581 int type, devid, domid, flags; ··· 629 577 goto retry; 630 578 } 631 579 632 - printk(KERN_ERR "AMD-Vi: Event logged ["); 580 + if (type == EVENT_TYPE_IO_FAULT) { 581 + amd_iommu_report_page_fault(devid, domid, address, flags); 582 + return; 583 + } else { 584 + printk(KERN_ERR "AMD-Vi: Event logged ["); 585 + } 633 586 634 587 switch (type) { 635 588 case EVENT_TYPE_ILL_DEV: ··· 643 586 PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), 644 587 address, flags); 645 588 dump_dte_entry(devid); 646 - break; 647 - case EVENT_TYPE_IO_FAULT: 648 - printk("IO_PAGE_FAULT device=%02x:%02x.%x " 649 - "domain=0x%04x address=0x%016llx flags=0x%04x]\n", 650 - PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), 651 - domid, address, flags); 652 589 break; 653 590 case EVENT_TYPE_DEV_TAB_ERR: 654 591 printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x " ··· 901 850 } 902 851 903 852 static void copy_cmd_to_buffer(struct amd_iommu *iommu, 904 - struct iommu_cmd *cmd, 905 - u32 tail) 853 + struct iommu_cmd *cmd) 906 854 { 907 855 u8 *target; 908 856 909 - target = iommu->cmd_buf + tail; 910 - tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE; 857 + target = iommu->cmd_buf + iommu->cmd_buf_tail; 858 + 859 + iommu->cmd_buf_tail += sizeof(*cmd); 860 + iommu->cmd_buf_tail %= CMD_BUFFER_SIZE; 911 861 912 862 /* Copy command to buffer */ 913 863 memcpy(target, cmd, sizeof(*cmd)); 914 864 915 865 /* Tell the IOMMU about it */ 916 - writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); 866 + writel(iommu->cmd_buf_tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); 917 867 } 918 868 919 869 static void build_completion_wait(struct iommu_cmd *cmd, u64 address) ··· 1072 1020 struct iommu_cmd *cmd, 1073 1021 bool sync) 1074 1022 { 1075 - u32 left, tail, head, next_tail; 1023 + unsigned int count = 0; 1024 + u32 left, next_tail; 1076 1025 1026 + next_tail = (iommu->cmd_buf_tail + sizeof(*cmd)) % CMD_BUFFER_SIZE; 1077 1027 again: 1078 - 1079 - head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); 1080 - tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); 1081 - next_tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE; 1082 - left = (head - next_tail) % CMD_BUFFER_SIZE; 1028 + left = (iommu->cmd_buf_head - next_tail) % CMD_BUFFER_SIZE; 1083 1029 1084 1030 if (left <= 0x20) { 1085 - struct iommu_cmd sync_cmd; 1086 - int ret; 1031 + /* Skip udelay() the first time around */ 1032 + if (count++) { 1033 + if (count == LOOP_TIMEOUT) { 1034 + pr_err("AMD-Vi: Command buffer timeout\n"); 1035 + return -EIO; 1036 + } 1087 1037 1088 - iommu->cmd_sem = 0; 1038 + udelay(1); 1039 + } 1089 1040 1090 - build_completion_wait(&sync_cmd, (u64)&iommu->cmd_sem); 1091 - copy_cmd_to_buffer(iommu, &sync_cmd, tail); 1092 - 1093 - if ((ret = wait_on_sem(&iommu->cmd_sem)) != 0) 1094 - return ret; 1041 + /* Update head and recheck remaining space */ 1042 + iommu->cmd_buf_head = readl(iommu->mmio_base + 1043 + MMIO_CMD_HEAD_OFFSET); 1095 1044 1096 1045 goto again; 1097 1046 } 1098 1047 1099 - copy_cmd_to_buffer(iommu, cmd, tail); 1048 + copy_cmd_to_buffer(iommu, cmd); 1100 1049 1101 - /* We need to sync now to make sure all commands are processed */ 1050 + /* Do we need to make sure all commands are processed? */ 1102 1051 iommu->need_sync = sync; 1103 1052 1104 1053 return 0; ··· 1788 1735 free_page((unsigned long)domain->gcr3_tbl); 1789 1736 } 1790 1737 1738 + static void dma_ops_domain_free_flush_queue(struct dma_ops_domain *dom) 1739 + { 1740 + int cpu; 1741 + 1742 + for_each_possible_cpu(cpu) { 1743 + struct flush_queue *queue; 1744 + 1745 + queue = per_cpu_ptr(dom->flush_queue, cpu); 1746 + kfree(queue->entries); 1747 + } 1748 + 1749 + free_percpu(dom->flush_queue); 1750 + 1751 + dom->flush_queue = NULL; 1752 + } 1753 + 1754 + static int dma_ops_domain_alloc_flush_queue(struct dma_ops_domain *dom) 1755 + { 1756 + int cpu; 1757 + 1758 + atomic64_set(&dom->flush_start_cnt, 0); 1759 + atomic64_set(&dom->flush_finish_cnt, 0); 1760 + 1761 + dom->flush_queue = alloc_percpu(struct flush_queue); 1762 + if (!dom->flush_queue) 1763 + return -ENOMEM; 1764 + 1765 + /* First make sure everything is cleared */ 1766 + for_each_possible_cpu(cpu) { 1767 + struct flush_queue *queue; 1768 + 1769 + queue = per_cpu_ptr(dom->flush_queue, cpu); 1770 + queue->head = 0; 1771 + queue->tail = 0; 1772 + queue->entries = NULL; 1773 + } 1774 + 1775 + /* Now start doing the allocation */ 1776 + for_each_possible_cpu(cpu) { 1777 + struct flush_queue *queue; 1778 + 1779 + queue = per_cpu_ptr(dom->flush_queue, cpu); 1780 + queue->entries = kzalloc(FLUSH_QUEUE_SIZE * sizeof(*queue->entries), 1781 + GFP_KERNEL); 1782 + if (!queue->entries) { 1783 + dma_ops_domain_free_flush_queue(dom); 1784 + return -ENOMEM; 1785 + } 1786 + 1787 + spin_lock_init(&queue->lock); 1788 + } 1789 + 1790 + return 0; 1791 + } 1792 + 1793 + static void dma_ops_domain_flush_tlb(struct dma_ops_domain *dom) 1794 + { 1795 + atomic64_inc(&dom->flush_start_cnt); 1796 + domain_flush_tlb(&dom->domain); 1797 + domain_flush_complete(&dom->domain); 1798 + atomic64_inc(&dom->flush_finish_cnt); 1799 + } 1800 + 1801 + static inline bool queue_ring_full(struct flush_queue *queue) 1802 + { 1803 + assert_spin_locked(&queue->lock); 1804 + 1805 + return (((queue->tail + 1) % FLUSH_QUEUE_SIZE) == queue->head); 1806 + } 1807 + 1808 + #define queue_ring_for_each(i, q) \ 1809 + for (i = (q)->head; i != (q)->tail; i = (i + 1) % FLUSH_QUEUE_SIZE) 1810 + 1811 + static inline unsigned queue_ring_add(struct flush_queue *queue) 1812 + { 1813 + unsigned idx = queue->tail; 1814 + 1815 + assert_spin_locked(&queue->lock); 1816 + queue->tail = (idx + 1) % FLUSH_QUEUE_SIZE; 1817 + 1818 + return idx; 1819 + } 1820 + 1821 + static inline void queue_ring_remove_head(struct flush_queue *queue) 1822 + { 1823 + assert_spin_locked(&queue->lock); 1824 + queue->head = (queue->head + 1) % FLUSH_QUEUE_SIZE; 1825 + } 1826 + 1827 + static void queue_ring_free_flushed(struct dma_ops_domain *dom, 1828 + struct flush_queue *queue) 1829 + { 1830 + u64 counter = atomic64_read(&dom->flush_finish_cnt); 1831 + int idx; 1832 + 1833 + queue_ring_for_each(idx, queue) { 1834 + /* 1835 + * This assumes that counter values in the ring-buffer are 1836 + * monotonously rising. 1837 + */ 1838 + if (queue->entries[idx].counter >= counter) 1839 + break; 1840 + 1841 + free_iova_fast(&dom->iovad, 1842 + queue->entries[idx].iova_pfn, 1843 + queue->entries[idx].pages); 1844 + 1845 + queue_ring_remove_head(queue); 1846 + } 1847 + } 1848 + 1849 + static void queue_add(struct dma_ops_domain *dom, 1850 + unsigned long address, unsigned long pages) 1851 + { 1852 + struct flush_queue *queue; 1853 + unsigned long flags; 1854 + int idx; 1855 + 1856 + pages = __roundup_pow_of_two(pages); 1857 + address >>= PAGE_SHIFT; 1858 + 1859 + queue = get_cpu_ptr(dom->flush_queue); 1860 + spin_lock_irqsave(&queue->lock, flags); 1861 + 1862 + /* 1863 + * First remove the enries from the ring-buffer that are already 1864 + * flushed to make the below queue_ring_full() check less likely 1865 + */ 1866 + queue_ring_free_flushed(dom, queue); 1867 + 1868 + /* 1869 + * When ring-queue is full, flush the entries from the IOTLB so 1870 + * that we can free all entries with queue_ring_free_flushed() 1871 + * below. 1872 + */ 1873 + if (queue_ring_full(queue)) { 1874 + dma_ops_domain_flush_tlb(dom); 1875 + queue_ring_free_flushed(dom, queue); 1876 + } 1877 + 1878 + idx = queue_ring_add(queue); 1879 + 1880 + queue->entries[idx].iova_pfn = address; 1881 + queue->entries[idx].pages = pages; 1882 + queue->entries[idx].counter = atomic64_read(&dom->flush_start_cnt); 1883 + 1884 + spin_unlock_irqrestore(&queue->lock, flags); 1885 + 1886 + if (atomic_cmpxchg(&dom->flush_timer_on, 0, 1) == 0) 1887 + mod_timer(&dom->flush_timer, jiffies + msecs_to_jiffies(10)); 1888 + 1889 + put_cpu_ptr(dom->flush_queue); 1890 + } 1891 + 1892 + static void queue_flush_timeout(unsigned long data) 1893 + { 1894 + struct dma_ops_domain *dom = (struct dma_ops_domain *)data; 1895 + int cpu; 1896 + 1897 + atomic_set(&dom->flush_timer_on, 0); 1898 + 1899 + dma_ops_domain_flush_tlb(dom); 1900 + 1901 + for_each_possible_cpu(cpu) { 1902 + struct flush_queue *queue; 1903 + unsigned long flags; 1904 + 1905 + queue = per_cpu_ptr(dom->flush_queue, cpu); 1906 + spin_lock_irqsave(&queue->lock, flags); 1907 + queue_ring_free_flushed(dom, queue); 1908 + spin_unlock_irqrestore(&queue->lock, flags); 1909 + } 1910 + } 1911 + 1791 1912 /* 1792 1913 * Free a domain, only used if something went wrong in the 1793 1914 * allocation path and we need to free an already allocated page table ··· 1972 1745 return; 1973 1746 1974 1747 del_domain_from_list(&dom->domain); 1748 + 1749 + if (timer_pending(&dom->flush_timer)) 1750 + del_timer(&dom->flush_timer); 1751 + 1752 + dma_ops_domain_free_flush_queue(dom); 1975 1753 1976 1754 put_iova_domain(&dom->iovad); 1977 1755 ··· 2015 1783 2016 1784 /* Initialize reserved ranges */ 2017 1785 copy_reserved_iova(&reserved_iova_ranges, &dma_dom->iovad); 1786 + 1787 + if (dma_ops_domain_alloc_flush_queue(dma_dom)) 1788 + goto free_dma_dom; 1789 + 1790 + setup_timer(&dma_dom->flush_timer, queue_flush_timeout, 1791 + (unsigned long)dma_dom); 1792 + 1793 + atomic_set(&dma_dom->flush_timer_on, 0); 2018 1794 2019 1795 add_domain_to_list(&dma_dom->domain); 2020 1796 ··· 2086 1846 flags |= tmp; 2087 1847 } 2088 1848 2089 - flags &= ~(0xffffUL); 1849 + 1850 + flags &= ~(DTE_FLAG_SA | 0xffffULL); 2090 1851 flags |= domain->id; 2091 1852 2092 1853 amd_iommu_dev_table[devid].data[1] = flags; ··· 2467 2226 * The next functions belong to the dma_ops mapping/unmapping code. 2468 2227 * 2469 2228 *****************************************************************************/ 2470 - 2471 - static void __queue_flush(struct flush_queue *queue) 2472 - { 2473 - struct protection_domain *domain; 2474 - unsigned long flags; 2475 - int idx; 2476 - 2477 - /* First flush TLB of all known domains */ 2478 - spin_lock_irqsave(&amd_iommu_pd_lock, flags); 2479 - list_for_each_entry(domain, &amd_iommu_pd_list, list) 2480 - domain_flush_tlb(domain); 2481 - spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); 2482 - 2483 - /* Wait until flushes have completed */ 2484 - domain_flush_complete(NULL); 2485 - 2486 - for (idx = 0; idx < queue->next; ++idx) { 2487 - struct flush_queue_entry *entry; 2488 - 2489 - entry = queue->entries + idx; 2490 - 2491 - free_iova_fast(&entry->dma_dom->iovad, 2492 - entry->iova_pfn, 2493 - entry->pages); 2494 - 2495 - /* Not really necessary, just to make sure we catch any bugs */ 2496 - entry->dma_dom = NULL; 2497 - } 2498 - 2499 - queue->next = 0; 2500 - } 2501 - 2502 - static void queue_flush_all(void) 2503 - { 2504 - int cpu; 2505 - 2506 - for_each_possible_cpu(cpu) { 2507 - struct flush_queue *queue; 2508 - unsigned long flags; 2509 - 2510 - queue = per_cpu_ptr(&flush_queue, cpu); 2511 - spin_lock_irqsave(&queue->lock, flags); 2512 - if (queue->next > 0) 2513 - __queue_flush(queue); 2514 - spin_unlock_irqrestore(&queue->lock, flags); 2515 - } 2516 - } 2517 - 2518 - static void queue_flush_timeout(unsigned long unsused) 2519 - { 2520 - atomic_set(&queue_timer_on, 0); 2521 - queue_flush_all(); 2522 - } 2523 - 2524 - static void queue_add(struct dma_ops_domain *dma_dom, 2525 - unsigned long address, unsigned long pages) 2526 - { 2527 - struct flush_queue_entry *entry; 2528 - struct flush_queue *queue; 2529 - unsigned long flags; 2530 - int idx; 2531 - 2532 - pages = __roundup_pow_of_two(pages); 2533 - address >>= PAGE_SHIFT; 2534 - 2535 - queue = get_cpu_ptr(&flush_queue); 2536 - spin_lock_irqsave(&queue->lock, flags); 2537 - 2538 - if (queue->next == FLUSH_QUEUE_SIZE) 2539 - __queue_flush(queue); 2540 - 2541 - idx = queue->next++; 2542 - entry = queue->entries + idx; 2543 - 2544 - entry->iova_pfn = address; 2545 - entry->pages = pages; 2546 - entry->dma_dom = dma_dom; 2547 - 2548 - spin_unlock_irqrestore(&queue->lock, flags); 2549 - 2550 - if (atomic_cmpxchg(&queue_timer_on, 0, 1) == 0) 2551 - mod_timer(&queue_timer, jiffies + msecs_to_jiffies(10)); 2552 - 2553 - put_cpu_ptr(&flush_queue); 2554 - } 2555 - 2556 2229 2557 2230 /* 2558 2231 * In the dma_ops path we only have the struct device. This function ··· 2962 2807 2963 2808 int __init amd_iommu_init_api(void) 2964 2809 { 2965 - int ret, cpu, err = 0; 2810 + int ret, err = 0; 2966 2811 2967 2812 ret = iova_cache_get(); 2968 2813 if (ret) ··· 2971 2816 ret = init_reserved_iova_ranges(); 2972 2817 if (ret) 2973 2818 return ret; 2974 - 2975 - for_each_possible_cpu(cpu) { 2976 - struct flush_queue *queue = per_cpu_ptr(&flush_queue, cpu); 2977 - 2978 - queue->entries = kzalloc(FLUSH_QUEUE_SIZE * 2979 - sizeof(*queue->entries), 2980 - GFP_KERNEL); 2981 - if (!queue->entries) 2982 - goto out_put_iova; 2983 - 2984 - spin_lock_init(&queue->lock); 2985 - } 2986 2819 2987 2820 err = bus_set_iommu(&pci_bus_type, &amd_iommu_ops); 2988 2821 if (err) ··· 2983 2840 err = bus_set_iommu(&platform_bus_type, &amd_iommu_ops); 2984 2841 if (err) 2985 2842 return err; 2843 + 2986 2844 return 0; 2987 - 2988 - out_put_iova: 2989 - for_each_possible_cpu(cpu) { 2990 - struct flush_queue *queue = per_cpu_ptr(&flush_queue, cpu); 2991 - 2992 - kfree(queue->entries); 2993 - } 2994 - 2995 - return -ENOMEM; 2996 2845 } 2997 2846 2998 2847 int __init amd_iommu_init_dma_ops(void) 2999 2848 { 3000 - setup_timer(&queue_timer, queue_flush_timeout, 0); 3001 - atomic_set(&queue_timer_on, 0); 3002 - 3003 2849 swiotlb = iommu_pass_through ? 1 : 0; 3004 2850 iommu_detected = 1; 3005 2851 ··· 3144 3012 3145 3013 switch (dom->type) { 3146 3014 case IOMMU_DOMAIN_DMA: 3147 - /* 3148 - * First make sure the domain is no longer referenced from the 3149 - * flush queue 3150 - */ 3151 - queue_flush_all(); 3152 - 3153 3015 /* Now release the domain */ 3154 3016 dma_dom = to_dma_ops_domain(domain); 3155 3017 dma_ops_domain_free(dma_dom); ··· 4407 4281 irte_info->index); 4408 4282 } 4409 4283 4410 - static struct irq_domain_ops amd_ir_domain_ops = { 4284 + static const struct irq_domain_ops amd_ir_domain_ops = { 4411 4285 .alloc = irq_remapping_alloc, 4412 4286 .free = irq_remapping_free, 4413 4287 .activate = irq_remapping_activate,
+35 -9
drivers/iommu/amd_iommu_init.c
··· 29 29 #include <linux/export.h> 30 30 #include <linux/iommu.h> 31 31 #include <linux/kmemleak.h> 32 + #include <linux/crash_dump.h> 32 33 #include <asm/pci-direct.h> 33 34 #include <asm/iommu.h> 34 35 #include <asm/gart.h> ··· 237 236 IOMMU_INITIALIZED, 238 237 IOMMU_NOT_FOUND, 239 238 IOMMU_INIT_ERROR, 239 + IOMMU_CMDLINE_DISABLED, 240 240 }; 241 241 242 242 /* Early ioapic and hpet maps from kernel command line */ ··· 590 588 591 589 writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); 592 590 writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); 591 + iommu->cmd_buf_head = 0; 592 + iommu->cmd_buf_tail = 0; 593 593 594 594 iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); 595 595 } ··· 1902 1898 for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { 1903 1899 set_dev_entry_bit(devid, DEV_ENTRY_VALID); 1904 1900 set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION); 1901 + /* 1902 + * In kdump kernels in-flight DMA from the old kernel might 1903 + * cause IO_PAGE_FAULTs. There are no reports that a kdump 1904 + * actually failed because of that, so just disable fault 1905 + * reporting in the hardware to get rid of the messages 1906 + */ 1907 + if (is_kdump_kernel()) 1908 + set_dev_entry_bit(devid, DEV_ENTRY_NO_PAGE_FAULT); 1905 1909 } 1906 1910 } 1907 1911 ··· 2109 2097 .resume = amd_iommu_resume, 2110 2098 }; 2111 2099 2112 - static void __init free_on_init_error(void) 2100 + static void __init free_iommu_resources(void) 2113 2101 { 2114 2102 kmemleak_free(irq_lookup_table); 2115 2103 free_pages((unsigned long)irq_lookup_table, 2116 2104 get_order(rlookup_table_size)); 2105 + irq_lookup_table = NULL; 2117 2106 2118 2107 kmem_cache_destroy(amd_iommu_irq_cache); 2119 2108 amd_iommu_irq_cache = NULL; 2120 2109 2121 2110 free_pages((unsigned long)amd_iommu_rlookup_table, 2122 2111 get_order(rlookup_table_size)); 2112 + amd_iommu_rlookup_table = NULL; 2123 2113 2124 2114 free_pages((unsigned long)amd_iommu_alias_table, 2125 2115 get_order(alias_table_size)); 2116 + amd_iommu_alias_table = NULL; 2126 2117 2127 2118 free_pages((unsigned long)amd_iommu_dev_table, 2128 2119 get_order(dev_table_size)); 2120 + amd_iommu_dev_table = NULL; 2129 2121 2130 2122 free_iommu_all(); 2131 2123 ··· 2199 2183 { 2200 2184 free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 2201 2185 get_order(MAX_DOMAIN_ID/8)); 2186 + amd_iommu_pd_alloc_bitmap = NULL; 2202 2187 2203 2188 free_unity_maps(); 2204 2189 } ··· 2324 2307 if (ret) 2325 2308 goto out; 2326 2309 2310 + /* Disable any previously enabled IOMMUs */ 2311 + disable_iommus(); 2312 + 2327 2313 if (amd_iommu_irq_remap) 2328 2314 amd_iommu_irq_remap = check_ioapic_information(); 2329 2315 ··· 2430 2410 case IOMMU_IVRS_DETECTED: 2431 2411 ret = early_amd_iommu_init(); 2432 2412 init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED; 2413 + if (init_state == IOMMU_ACPI_FINISHED && amd_iommu_disabled) { 2414 + pr_info("AMD-Vi: AMD IOMMU disabled on kernel command-line\n"); 2415 + free_dma_resources(); 2416 + free_iommu_resources(); 2417 + init_state = IOMMU_CMDLINE_DISABLED; 2418 + ret = -EINVAL; 2419 + } 2433 2420 break; 2434 2421 case IOMMU_ACPI_FINISHED: 2435 2422 early_enable_iommus(); ··· 2465 2438 break; 2466 2439 case IOMMU_NOT_FOUND: 2467 2440 case IOMMU_INIT_ERROR: 2441 + case IOMMU_CMDLINE_DISABLED: 2468 2442 /* Error states => do nothing */ 2469 2443 ret = -EINVAL; 2470 2444 break; ··· 2479 2451 2480 2452 static int __init iommu_go_to_state(enum iommu_init_state state) 2481 2453 { 2482 - int ret = 0; 2454 + int ret = -EINVAL; 2483 2455 2484 2456 while (init_state != state) { 2485 - ret = state_next(); 2486 - if (init_state == IOMMU_NOT_FOUND || 2487 - init_state == IOMMU_INIT_ERROR) 2457 + if (init_state == IOMMU_NOT_FOUND || 2458 + init_state == IOMMU_INIT_ERROR || 2459 + init_state == IOMMU_CMDLINE_DISABLED) 2488 2460 break; 2461 + ret = state_next(); 2489 2462 } 2490 2463 2491 2464 return ret; ··· 2551 2522 free_dma_resources(); 2552 2523 if (!irq_remapping_enabled) { 2553 2524 disable_iommus(); 2554 - free_on_init_error(); 2525 + free_iommu_resources(); 2555 2526 } else { 2556 2527 struct amd_iommu *iommu; 2557 2528 ··· 2576 2547 int ret; 2577 2548 2578 2549 if (no_iommu || (iommu_detected && !gart_iommu_aperture)) 2579 - return -ENODEV; 2580 - 2581 - if (amd_iommu_disabled) 2582 2550 return -ENODEV; 2583 2551 2584 2552 ret = iommu_go_to_state(IOMMU_IVRS_DETECTED);
+3
drivers/iommu/amd_iommu_types.h
··· 322 322 #define IOMMU_PTE_IW (1ULL << 62) 323 323 324 324 #define DTE_FLAG_IOTLB (1ULL << 32) 325 + #define DTE_FLAG_SA (1ULL << 34) 325 326 #define DTE_FLAG_GV (1ULL << 55) 326 327 #define DTE_FLAG_MASK (0x3ffULL << 32) 327 328 #define DTE_GLX_SHIFT (56) ··· 517 516 518 517 /* command buffer virtual address */ 519 518 u8 *cmd_buf; 519 + u32 cmd_buf_head; 520 + u32 cmd_buf_tail; 520 521 521 522 /* event buffer virtual address */ 522 523 u8 *evt_buf;
+165 -78
drivers/iommu/arm-smmu-v3.c
··· 408 408 409 409 /* High-level queue structures */ 410 410 #define ARM_SMMU_POLL_TIMEOUT_US 100 411 + #define ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US 1000000 /* 1s! */ 411 412 412 413 #define MSI_IOVA_BASE 0x8000000 413 414 #define MSI_IOVA_LENGTH 0x100000 415 + 416 + /* Until ACPICA headers cover IORT rev. C */ 417 + #ifndef ACPI_IORT_SMMU_HISILICON_HI161X 418 + #define ACPI_IORT_SMMU_HISILICON_HI161X 0x1 419 + #endif 420 + 421 + #ifndef ACPI_IORT_SMMU_V3_CAVIUM_CN99XX 422 + #define ACPI_IORT_SMMU_V3_CAVIUM_CN99XX 0x2 423 + #endif 414 424 415 425 static bool disable_bypass; 416 426 module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO); ··· 607 597 u32 features; 608 598 609 599 #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) 600 + #define ARM_SMMU_OPT_PAGE0_REGS_ONLY (1 << 1) 610 601 u32 options; 611 602 612 603 struct arm_smmu_cmdq cmdq; ··· 615 604 struct arm_smmu_priq priq; 616 605 617 606 int gerr_irq; 607 + int combined_irq; 618 608 619 609 unsigned long ias; /* IPA */ 620 610 unsigned long oas; /* PA */ ··· 657 645 struct mutex init_mutex; /* Protects smmu pointer */ 658 646 659 647 struct io_pgtable_ops *pgtbl_ops; 660 - spinlock_t pgtbl_lock; 661 648 662 649 enum arm_smmu_domain_stage stage; 663 650 union { ··· 674 663 675 664 static struct arm_smmu_option_prop arm_smmu_options[] = { 676 665 { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" }, 666 + { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"}, 677 667 { 0, NULL}, 678 668 }; 669 + 670 + static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset, 671 + struct arm_smmu_device *smmu) 672 + { 673 + if ((offset > SZ_64K) && 674 + (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)) 675 + offset -= SZ_64K; 676 + 677 + return smmu->base + offset; 678 + } 679 679 680 680 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) 681 681 { ··· 759 737 */ 760 738 static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe) 761 739 { 762 - ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US); 740 + ktime_t timeout; 741 + unsigned int delay = 1; 742 + 743 + /* Wait longer if it's queue drain */ 744 + timeout = ktime_add_us(ktime_get(), drain ? 745 + ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US : 746 + ARM_SMMU_POLL_TIMEOUT_US); 763 747 764 748 while (queue_sync_cons(q), (drain ? !queue_empty(q) : queue_full(q))) { 765 749 if (ktime_compare(ktime_get(), timeout) > 0) ··· 775 747 wfe(); 776 748 } else { 777 749 cpu_relax(); 778 - udelay(1); 750 + udelay(delay); 751 + delay *= 2; 779 752 } 780 753 } 781 754 ··· 1331 1302 return IRQ_HANDLED; 1332 1303 } 1333 1304 1305 + static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev) 1306 + { 1307 + struct arm_smmu_device *smmu = dev; 1308 + 1309 + arm_smmu_evtq_thread(irq, dev); 1310 + if (smmu->features & ARM_SMMU_FEAT_PRI) 1311 + arm_smmu_priq_thread(irq, dev); 1312 + 1313 + return IRQ_HANDLED; 1314 + } 1315 + 1316 + static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev) 1317 + { 1318 + arm_smmu_gerror_handler(irq, dev); 1319 + arm_smmu_cmdq_sync_handler(irq, dev); 1320 + return IRQ_WAKE_THREAD; 1321 + } 1322 + 1334 1323 /* IO_PGTABLE API */ 1335 1324 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu) 1336 1325 { ··· 1453 1406 } 1454 1407 1455 1408 mutex_init(&smmu_domain->init_mutex); 1456 - spin_lock_init(&smmu_domain->pgtbl_lock); 1457 1409 return &smmu_domain->domain; 1458 1410 } 1459 1411 ··· 1601 1555 .iommu_dev = smmu->dev, 1602 1556 }; 1603 1557 1558 + if (smmu->features & ARM_SMMU_FEAT_COHERENCY) 1559 + pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA; 1560 + 1604 1561 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); 1605 1562 if (!pgtbl_ops) 1606 1563 return -ENOMEM; ··· 1724 1675 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova, 1725 1676 phys_addr_t paddr, size_t size, int prot) 1726 1677 { 1727 - int ret; 1728 - unsigned long flags; 1729 - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 1730 - struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; 1678 + struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; 1731 1679 1732 1680 if (!ops) 1733 1681 return -ENODEV; 1734 1682 1735 - spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags); 1736 - ret = ops->map(ops, iova, paddr, size, prot); 1737 - spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags); 1738 - return ret; 1683 + return ops->map(ops, iova, paddr, size, prot); 1739 1684 } 1740 1685 1741 1686 static size_t 1742 1687 arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) 1743 1688 { 1744 - size_t ret; 1745 - unsigned long flags; 1746 - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 1747 - struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; 1689 + struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; 1748 1690 1749 1691 if (!ops) 1750 1692 return 0; 1751 1693 1752 - spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags); 1753 - ret = ops->unmap(ops, iova, size); 1754 - spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags); 1755 - return ret; 1694 + return ops->unmap(ops, iova, size); 1756 1695 } 1757 1696 1758 1697 static phys_addr_t 1759 1698 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) 1760 1699 { 1761 - phys_addr_t ret; 1762 - unsigned long flags; 1763 - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 1764 - struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; 1700 + struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; 1765 1701 1766 1702 if (domain->type == IOMMU_DOMAIN_IDENTITY) 1767 1703 return iova; ··· 1754 1720 if (!ops) 1755 1721 return 0; 1756 1722 1757 - spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags); 1758 - ret = ops->iova_to_phys(ops, iova); 1759 - spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags); 1760 - 1761 - return ret; 1723 + return ops->iova_to_phys(ops, iova); 1762 1724 } 1763 1725 1764 1726 static struct platform_driver arm_smmu_driver; ··· 1991 1961 return -ENOMEM; 1992 1962 } 1993 1963 1994 - q->prod_reg = smmu->base + prod_off; 1995 - q->cons_reg = smmu->base + cons_off; 1964 + q->prod_reg = arm_smmu_page1_fixup(prod_off, smmu); 1965 + q->cons_reg = arm_smmu_page1_fixup(cons_off, smmu); 1996 1966 q->ent_dwords = dwords; 1997 1967 1998 1968 q->q_base = Q_BASE_RWA; ··· 2248 2218 devm_add_action(dev, arm_smmu_free_msis, dev); 2249 2219 } 2250 2220 2251 - static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) 2221 + static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu) 2252 2222 { 2253 - int ret, irq; 2254 - u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN; 2255 - 2256 - /* Disable IRQs first */ 2257 - ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL, 2258 - ARM_SMMU_IRQ_CTRLACK); 2259 - if (ret) { 2260 - dev_err(smmu->dev, "failed to disable irqs\n"); 2261 - return ret; 2262 - } 2223 + int irq, ret; 2263 2224 2264 2225 arm_smmu_setup_msis(smmu); 2265 2226 ··· 2293 2272 if (ret < 0) 2294 2273 dev_warn(smmu->dev, 2295 2274 "failed to enable priq irq\n"); 2296 - else 2297 - irqen_flags |= IRQ_CTRL_PRIQ_IRQEN; 2298 2275 } 2299 2276 } 2277 + } 2278 + 2279 + static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) 2280 + { 2281 + int ret, irq; 2282 + u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN; 2283 + 2284 + /* Disable IRQs first */ 2285 + ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL, 2286 + ARM_SMMU_IRQ_CTRLACK); 2287 + if (ret) { 2288 + dev_err(smmu->dev, "failed to disable irqs\n"); 2289 + return ret; 2290 + } 2291 + 2292 + irq = smmu->combined_irq; 2293 + if (irq) { 2294 + /* 2295 + * Cavium ThunderX2 implementation doesn't not support unique 2296 + * irq lines. Use single irq line for all the SMMUv3 interrupts. 2297 + */ 2298 + ret = devm_request_threaded_irq(smmu->dev, irq, 2299 + arm_smmu_combined_irq_handler, 2300 + arm_smmu_combined_irq_thread, 2301 + IRQF_ONESHOT, 2302 + "arm-smmu-v3-combined-irq", smmu); 2303 + if (ret < 0) 2304 + dev_warn(smmu->dev, "failed to enable combined irq\n"); 2305 + } else 2306 + arm_smmu_setup_unique_irqs(smmu); 2307 + 2308 + if (smmu->features & ARM_SMMU_FEAT_PRI) 2309 + irqen_flags |= IRQ_CTRL_PRIQ_IRQEN; 2300 2310 2301 2311 /* Enable interrupt generation on the SMMU */ 2302 2312 ret = arm_smmu_write_reg_sync(smmu, irqen_flags, ··· 2415 2363 2416 2364 /* Event queue */ 2417 2365 writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE); 2418 - writel_relaxed(smmu->evtq.q.prod, smmu->base + ARM_SMMU_EVTQ_PROD); 2419 - writel_relaxed(smmu->evtq.q.cons, smmu->base + ARM_SMMU_EVTQ_CONS); 2366 + writel_relaxed(smmu->evtq.q.prod, 2367 + arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu)); 2368 + writel_relaxed(smmu->evtq.q.cons, 2369 + arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu)); 2420 2370 2421 2371 enables |= CR0_EVTQEN; 2422 2372 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, ··· 2433 2379 writeq_relaxed(smmu->priq.q.q_base, 2434 2380 smmu->base + ARM_SMMU_PRIQ_BASE); 2435 2381 writel_relaxed(smmu->priq.q.prod, 2436 - smmu->base + ARM_SMMU_PRIQ_PROD); 2382 + arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu)); 2437 2383 writel_relaxed(smmu->priq.q.cons, 2438 - smmu->base + ARM_SMMU_PRIQ_CONS); 2384 + arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu)); 2439 2385 2440 2386 enables |= CR0_PRIQEN; 2441 2387 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, ··· 2659 2605 } 2660 2606 2661 2607 #ifdef CONFIG_ACPI 2608 + static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu) 2609 + { 2610 + switch (model) { 2611 + case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX: 2612 + smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY; 2613 + break; 2614 + case ACPI_IORT_SMMU_HISILICON_HI161X: 2615 + smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH; 2616 + break; 2617 + } 2618 + 2619 + dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options); 2620 + } 2621 + 2662 2622 static int arm_smmu_device_acpi_probe(struct platform_device *pdev, 2663 2623 struct arm_smmu_device *smmu) 2664 2624 { ··· 2684 2616 2685 2617 /* Retrieve SMMUv3 specific data */ 2686 2618 iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data; 2619 + 2620 + acpi_smmu_get_options(iort_smmu->model, smmu); 2687 2621 2688 2622 if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE) 2689 2623 smmu->features |= ARM_SMMU_FEAT_COHERENCY; ··· 2722 2652 return ret; 2723 2653 } 2724 2654 2655 + static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu) 2656 + { 2657 + if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY) 2658 + return SZ_64K; 2659 + else 2660 + return SZ_128K; 2661 + } 2662 + 2725 2663 static int arm_smmu_device_probe(struct platform_device *pdev) 2726 2664 { 2727 2665 int irq, ret; ··· 2746 2668 } 2747 2669 smmu->dev = dev; 2748 2670 2749 - /* Base address */ 2750 - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); 2751 - if (resource_size(res) + 1 < SZ_128K) { 2752 - dev_err(dev, "MMIO region too small (%pr)\n", res); 2753 - return -EINVAL; 2754 - } 2755 - ioaddr = res->start; 2756 - 2757 - smmu->base = devm_ioremap_resource(dev, res); 2758 - if (IS_ERR(smmu->base)) 2759 - return PTR_ERR(smmu->base); 2760 - 2761 - /* Interrupt lines */ 2762 - irq = platform_get_irq_byname(pdev, "eventq"); 2763 - if (irq > 0) 2764 - smmu->evtq.q.irq = irq; 2765 - 2766 - irq = platform_get_irq_byname(pdev, "priq"); 2767 - if (irq > 0) 2768 - smmu->priq.q.irq = irq; 2769 - 2770 - irq = platform_get_irq_byname(pdev, "cmdq-sync"); 2771 - if (irq > 0) 2772 - smmu->cmdq.q.irq = irq; 2773 - 2774 - irq = platform_get_irq_byname(pdev, "gerror"); 2775 - if (irq > 0) 2776 - smmu->gerr_irq = irq; 2777 - 2778 2671 if (dev->of_node) { 2779 2672 ret = arm_smmu_device_dt_probe(pdev, smmu); 2780 2673 } else { ··· 2757 2708 /* Set bypass mode according to firmware probing result */ 2758 2709 bypass = !!ret; 2759 2710 2711 + /* Base address */ 2712 + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); 2713 + if (resource_size(res) + 1 < arm_smmu_resource_size(smmu)) { 2714 + dev_err(dev, "MMIO region too small (%pr)\n", res); 2715 + return -EINVAL; 2716 + } 2717 + ioaddr = res->start; 2718 + 2719 + smmu->base = devm_ioremap_resource(dev, res); 2720 + if (IS_ERR(smmu->base)) 2721 + return PTR_ERR(smmu->base); 2722 + 2723 + /* Interrupt lines */ 2724 + 2725 + irq = platform_get_irq_byname(pdev, "combined"); 2726 + if (irq > 0) 2727 + smmu->combined_irq = irq; 2728 + else { 2729 + irq = platform_get_irq_byname(pdev, "eventq"); 2730 + if (irq > 0) 2731 + smmu->evtq.q.irq = irq; 2732 + 2733 + irq = platform_get_irq_byname(pdev, "priq"); 2734 + if (irq > 0) 2735 + smmu->priq.q.irq = irq; 2736 + 2737 + irq = platform_get_irq_byname(pdev, "cmdq-sync"); 2738 + if (irq > 0) 2739 + smmu->cmdq.q.irq = irq; 2740 + 2741 + irq = platform_get_irq_byname(pdev, "gerror"); 2742 + if (irq > 0) 2743 + smmu->gerr_irq = irq; 2744 + } 2760 2745 /* Probe the h/w */ 2761 2746 ret = arm_smmu_device_hw_probe(smmu); 2762 2747 if (ret) ··· 2819 2736 iommu_device_set_fwnode(&smmu->iommu, dev->fwnode); 2820 2737 2821 2738 ret = iommu_device_register(&smmu->iommu); 2739 + if (ret) { 2740 + dev_err(dev, "Failed to register iommu\n"); 2741 + return ret; 2742 + } 2822 2743 2823 2744 #ifdef CONFIG_PCI 2824 2745 if (pci_bus_type.iommu_ops != &arm_smmu_ops) { ··· 2855 2768 return 0; 2856 2769 } 2857 2770 2858 - static struct of_device_id arm_smmu_of_match[] = { 2771 + static const struct of_device_id arm_smmu_of_match[] = { 2859 2772 { .compatible = "arm,smmu-v3", }, 2860 2773 { }, 2861 2774 };
+33 -31
drivers/iommu/arm-smmu.c
··· 312 312 CAVIUM_SMMUV2, 313 313 }; 314 314 315 + /* Until ACPICA headers cover IORT rev. C */ 316 + #ifndef ACPI_IORT_SMMU_CORELINK_MMU401 317 + #define ACPI_IORT_SMMU_CORELINK_MMU401 0x4 318 + #endif 319 + #ifndef ACPI_IORT_SMMU_CAVIUM_THUNDERX 320 + #define ACPI_IORT_SMMU_CAVIUM_THUNDERX 0x5 321 + #endif 322 + 315 323 struct arm_smmu_s2cr { 316 324 struct iommu_group *group; 317 325 int count; ··· 433 425 struct arm_smmu_domain { 434 426 struct arm_smmu_device *smmu; 435 427 struct io_pgtable_ops *pgtbl_ops; 436 - spinlock_t pgtbl_lock; 437 428 struct arm_smmu_cfg cfg; 438 429 enum arm_smmu_domain_stage stage; 439 430 struct mutex init_mutex; /* Protects smmu pointer */ 431 + spinlock_t cb_lock; /* Serialises ATS1* ops */ 440 432 struct iommu_domain domain; 441 433 }; 442 434 ··· 1018 1010 .iommu_dev = smmu->dev, 1019 1011 }; 1020 1012 1013 + if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) 1014 + pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA; 1015 + 1021 1016 smmu_domain->smmu = smmu; 1022 1017 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); 1023 1018 if (!pgtbl_ops) { ··· 1113 1102 } 1114 1103 1115 1104 mutex_init(&smmu_domain->init_mutex); 1116 - spin_lock_init(&smmu_domain->pgtbl_lock); 1105 + spin_lock_init(&smmu_domain->cb_lock); 1117 1106 1118 1107 return &smmu_domain->domain; 1119 1108 } ··· 1391 1380 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova, 1392 1381 phys_addr_t paddr, size_t size, int prot) 1393 1382 { 1394 - int ret; 1395 - unsigned long flags; 1396 - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 1397 - struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops; 1383 + struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; 1398 1384 1399 1385 if (!ops) 1400 1386 return -ENODEV; 1401 1387 1402 - spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags); 1403 - ret = ops->map(ops, iova, paddr, size, prot); 1404 - spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags); 1405 - return ret; 1388 + return ops->map(ops, iova, paddr, size, prot); 1406 1389 } 1407 1390 1408 1391 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, 1409 1392 size_t size) 1410 1393 { 1411 - size_t ret; 1412 - unsigned long flags; 1413 - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 1414 - struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops; 1394 + struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; 1415 1395 1416 1396 if (!ops) 1417 1397 return 0; 1418 1398 1419 - spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags); 1420 - ret = ops->unmap(ops, iova, size); 1421 - spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags); 1422 - return ret; 1399 + return ops->unmap(ops, iova, size); 1423 1400 } 1424 1401 1425 1402 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, ··· 1421 1422 void __iomem *cb_base; 1422 1423 u32 tmp; 1423 1424 u64 phys; 1424 - unsigned long va; 1425 + unsigned long va, flags; 1425 1426 1426 1427 cb_base = ARM_SMMU_CB(smmu, cfg->cbndx); 1427 1428 1429 + spin_lock_irqsave(&smmu_domain->cb_lock, flags); 1428 1430 /* ATS1 registers can only be written atomically */ 1429 1431 va = iova & ~0xfffUL; 1430 1432 if (smmu->version == ARM_SMMU_V2) ··· 1435 1435 1436 1436 if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp, 1437 1437 !(tmp & ATSR_ACTIVE), 5, 50)) { 1438 + spin_unlock_irqrestore(&smmu_domain->cb_lock, flags); 1438 1439 dev_err(dev, 1439 1440 "iova to phys timed out on %pad. Falling back to software table walk.\n", 1440 1441 &iova); ··· 1443 1442 } 1444 1443 1445 1444 phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR); 1445 + spin_unlock_irqrestore(&smmu_domain->cb_lock, flags); 1446 1446 if (phys & CB_PAR_F) { 1447 1447 dev_err(dev, "translation fault!\n"); 1448 1448 dev_err(dev, "PAR = 0x%llx\n", phys); ··· 1456 1454 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain, 1457 1455 dma_addr_t iova) 1458 1456 { 1459 - phys_addr_t ret; 1460 - unsigned long flags; 1461 1457 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 1462 - struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops; 1458 + struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; 1463 1459 1464 1460 if (domain->type == IOMMU_DOMAIN_IDENTITY) 1465 1461 return iova; ··· 1465 1465 if (!ops) 1466 1466 return 0; 1467 1467 1468 - spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags); 1469 1468 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS && 1470 - smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { 1471 - ret = arm_smmu_iova_to_phys_hard(domain, iova); 1472 - } else { 1473 - ret = ops->iova_to_phys(ops, iova); 1474 - } 1469 + smmu_domain->stage == ARM_SMMU_DOMAIN_S1) 1470 + return arm_smmu_iova_to_phys_hard(domain, iova); 1475 1471 1476 - spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags); 1477 - 1478 - return ret; 1472 + return ops->iova_to_phys(ops, iova); 1479 1473 } 1480 1474 1481 1475 static bool arm_smmu_capable(enum iommu_cap cap) ··· 2067 2073 smmu->version = ARM_SMMU_V1; 2068 2074 smmu->model = GENERIC_SMMU; 2069 2075 break; 2076 + case ACPI_IORT_SMMU_CORELINK_MMU401: 2077 + smmu->version = ARM_SMMU_V1_64K; 2078 + smmu->model = GENERIC_SMMU; 2079 + break; 2070 2080 case ACPI_IORT_SMMU_V2: 2071 2081 smmu->version = ARM_SMMU_V2; 2072 2082 smmu->model = GENERIC_SMMU; ··· 2078 2080 case ACPI_IORT_SMMU_CORELINK_MMU500: 2079 2081 smmu->version = ARM_SMMU_V2; 2080 2082 smmu->model = ARM_MMU500; 2083 + break; 2084 + case ACPI_IORT_SMMU_CAVIUM_THUNDERX: 2085 + smmu->version = ARM_SMMU_V2; 2086 + smmu->model = CAVIUM_SMMUV2; 2081 2087 break; 2082 2088 default: 2083 2089 ret = -ENODEV;
+1 -1
drivers/iommu/dma-iommu.c
··· 316 316 * If we have devices with different DMA masks, move the free 317 317 * area cache limit down for the benefit of the smaller one. 318 318 */ 319 - iovad->dma_32bit_pfn = min(end_pfn, iovad->dma_32bit_pfn); 319 + iovad->dma_32bit_pfn = min(end_pfn + 1, iovad->dma_32bit_pfn); 320 320 321 321 return 0; 322 322 }
+5 -21
drivers/iommu/intel-iommu.c
··· 481 481 struct deferred_flush_table *tables; 482 482 }; 483 483 484 - DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush); 484 + static DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush); 485 485 486 486 /* bitmap for indexing intel_iommus */ 487 487 static int g_num_of_iommus; ··· 2390 2390 2391 2391 /* No lock here, assumes no domain exit in normal case */ 2392 2392 info = dev->archdata.iommu; 2393 - if (info) 2393 + if (likely(info)) 2394 2394 return info->domain; 2395 2395 return NULL; 2396 2396 } ··· 3478 3478 return iova_pfn; 3479 3479 } 3480 3480 3481 - static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev) 3481 + static struct dmar_domain *get_valid_domain_for_dev(struct device *dev) 3482 3482 { 3483 3483 struct dmar_domain *domain, *tmp; 3484 3484 struct dmar_rmrr_unit *rmrr; ··· 3523 3523 3524 3524 3525 3525 return domain; 3526 - } 3527 - 3528 - static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev) 3529 - { 3530 - struct device_domain_info *info; 3531 - 3532 - /* No lock here, assumes no domain exit in normal case */ 3533 - info = dev->archdata.iommu; 3534 - if (likely(info)) 3535 - return info->domain; 3536 - 3537 - return __get_valid_domain_for_dev(dev); 3538 3526 } 3539 3527 3540 3528 /* Check if the dev needs to go through non-identity map and unmap process.*/ ··· 3713 3725 struct intel_iommu *iommu; 3714 3726 struct deferred_flush_entry *entry; 3715 3727 struct deferred_flush_data *flush_data; 3716 - unsigned int cpuid; 3717 3728 3718 - cpuid = get_cpu(); 3719 - flush_data = per_cpu_ptr(&deferred_flush, cpuid); 3729 + flush_data = raw_cpu_ptr(&deferred_flush); 3720 3730 3721 3731 /* Flush all CPUs' entries to avoid deferring too much. If 3722 3732 * this becomes a bottleneck, can just flush us, and rely on ··· 3747 3761 } 3748 3762 flush_data->size++; 3749 3763 spin_unlock_irqrestore(&flush_data->lock, flags); 3750 - 3751 - put_cpu(); 3752 3764 } 3753 3765 3754 3766 static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) ··· 3957 3973 return !dma_addr; 3958 3974 } 3959 3975 3960 - struct dma_map_ops intel_dma_ops = { 3976 + const struct dma_map_ops intel_dma_ops = { 3961 3977 .alloc = intel_alloc_coherent, 3962 3978 .free = intel_free_coherent, 3963 3979 .map_sg = intel_map_sg,
+30
drivers/iommu/intel-svm.c
··· 489 489 } 490 490 EXPORT_SYMBOL_GPL(intel_svm_unbind_mm); 491 491 492 + int intel_svm_is_pasid_valid(struct device *dev, int pasid) 493 + { 494 + struct intel_iommu *iommu; 495 + struct intel_svm *svm; 496 + int ret = -EINVAL; 497 + 498 + mutex_lock(&pasid_mutex); 499 + iommu = intel_svm_device_to_iommu(dev); 500 + if (!iommu || !iommu->pasid_table) 501 + goto out; 502 + 503 + svm = idr_find(&iommu->pasid_idr, pasid); 504 + if (!svm) 505 + goto out; 506 + 507 + /* init_mm is used in this case */ 508 + if (!svm->mm) 509 + ret = 1; 510 + else if (atomic_read(&svm->mm->mm_users) > 0) 511 + ret = 1; 512 + else 513 + ret = 0; 514 + 515 + out: 516 + mutex_unlock(&pasid_mutex); 517 + 518 + return ret; 519 + } 520 + EXPORT_SYMBOL_GPL(intel_svm_is_pasid_valid); 521 + 492 522 /* Page request queue descriptor */ 493 523 struct page_req_dsc { 494 524 u64 srr:1;
+2 -2
drivers/iommu/intel_irq_remapping.c
··· 76 76 * the dmar_global_lock. 77 77 */ 78 78 static DEFINE_RAW_SPINLOCK(irq_2_ir_lock); 79 - static struct irq_domain_ops intel_ir_domain_ops; 79 + static const struct irq_domain_ops intel_ir_domain_ops; 80 80 81 81 static void iommu_disable_irq_remapping(struct intel_iommu *iommu); 82 82 static int __init parse_ioapics_under_ir(void); ··· 1407 1407 modify_irte(&data->irq_2_iommu, &entry); 1408 1408 } 1409 1409 1410 - static struct irq_domain_ops intel_ir_domain_ops = { 1410 + static const struct irq_domain_ops intel_ir_domain_ops = { 1411 1411 .alloc = intel_irq_remapping_alloc, 1412 1412 .free = intel_irq_remapping_free, 1413 1413 .activate = intel_irq_remapping_activate,
+120 -65
drivers/iommu/io-pgtable-arm-v7s.c
··· 32 32 33 33 #define pr_fmt(fmt) "arm-v7s io-pgtable: " fmt 34 34 35 + #include <linux/atomic.h> 35 36 #include <linux/dma-mapping.h> 36 37 #include <linux/gfp.h> 37 38 #include <linux/iommu.h> ··· 40 39 #include <linux/kmemleak.h> 41 40 #include <linux/sizes.h> 42 41 #include <linux/slab.h> 42 + #include <linux/spinlock.h> 43 43 #include <linux/types.h> 44 44 45 45 #include <asm/barrier.h> ··· 94 92 #define ARM_V7S_PTE_TYPE_CONT_PAGE 0x1 95 93 96 94 #define ARM_V7S_PTE_IS_VALID(pte) (((pte) & 0x3) != 0) 97 - #define ARM_V7S_PTE_IS_TABLE(pte, lvl) (lvl == 1 && ((pte) & ARM_V7S_PTE_TYPE_TABLE)) 95 + #define ARM_V7S_PTE_IS_TABLE(pte, lvl) \ 96 + ((lvl) == 1 && (((pte) & 0x3) == ARM_V7S_PTE_TYPE_TABLE)) 98 97 99 98 /* Page table bits */ 100 99 #define ARM_V7S_ATTR_XN(lvl) BIT(4 * (2 - (lvl))) ··· 170 167 171 168 arm_v7s_iopte *pgd; 172 169 struct kmem_cache *l2_tables; 170 + spinlock_t split_lock; 173 171 }; 174 172 175 173 static dma_addr_t __arm_v7s_dma_addr(void *pages) ··· 190 186 static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp, 191 187 struct arm_v7s_io_pgtable *data) 192 188 { 193 - struct device *dev = data->iop.cfg.iommu_dev; 189 + struct io_pgtable_cfg *cfg = &data->iop.cfg; 190 + struct device *dev = cfg->iommu_dev; 194 191 dma_addr_t dma; 195 192 size_t size = ARM_V7S_TABLE_SIZE(lvl); 196 193 void *table = NULL; ··· 200 195 table = (void *)__get_dma_pages(__GFP_ZERO, get_order(size)); 201 196 else if (lvl == 2) 202 197 table = kmem_cache_zalloc(data->l2_tables, gfp | GFP_DMA); 203 - if (table && !selftest_running) { 198 + if (table && !(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) { 204 199 dma = dma_map_single(dev, table, size, DMA_TO_DEVICE); 205 200 if (dma_mapping_error(dev, dma)) 206 201 goto out_free; ··· 229 224 static void __arm_v7s_free_table(void *table, int lvl, 230 225 struct arm_v7s_io_pgtable *data) 231 226 { 232 - struct device *dev = data->iop.cfg.iommu_dev; 227 + struct io_pgtable_cfg *cfg = &data->iop.cfg; 228 + struct device *dev = cfg->iommu_dev; 233 229 size_t size = ARM_V7S_TABLE_SIZE(lvl); 234 230 235 - if (!selftest_running) 231 + if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) 236 232 dma_unmap_single(dev, __arm_v7s_dma_addr(table), size, 237 233 DMA_TO_DEVICE); 238 234 if (lvl == 1) ··· 245 239 static void __arm_v7s_pte_sync(arm_v7s_iopte *ptep, int num_entries, 246 240 struct io_pgtable_cfg *cfg) 247 241 { 248 - if (selftest_running) 242 + if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) 249 243 return; 250 244 251 245 dma_sync_single_for_device(cfg->iommu_dev, __arm_v7s_dma_addr(ptep), ··· 285 279 pte |= ARM_V7S_ATTR_B; 286 280 else if (prot & IOMMU_CACHE) 287 281 pte |= ARM_V7S_ATTR_B | ARM_V7S_ATTR_C; 282 + 283 + pte |= ARM_V7S_PTE_TYPE_PAGE; 284 + if (lvl == 1 && (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)) 285 + pte |= ARM_V7S_ATTR_NS_SECTION; 286 + 287 + if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB) 288 + pte |= ARM_V7S_ATTR_MTK_4GB; 288 289 289 290 return pte; 290 291 } ··· 365 352 int lvl, int num_entries, arm_v7s_iopte *ptep) 366 353 { 367 354 struct io_pgtable_cfg *cfg = &data->iop.cfg; 368 - arm_v7s_iopte pte = arm_v7s_prot_to_pte(prot, lvl, cfg); 355 + arm_v7s_iopte pte; 369 356 int i; 370 357 371 358 for (i = 0; i < num_entries; i++) ··· 387 374 return -EEXIST; 388 375 } 389 376 390 - pte |= ARM_V7S_PTE_TYPE_PAGE; 391 - if (lvl == 1 && (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)) 392 - pte |= ARM_V7S_ATTR_NS_SECTION; 393 - 394 - if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB) 395 - pte |= ARM_V7S_ATTR_MTK_4GB; 396 - 377 + pte = arm_v7s_prot_to_pte(prot, lvl, cfg); 397 378 if (num_entries > 1) 398 379 pte = arm_v7s_pte_to_cont(pte, lvl); 399 380 ··· 395 388 396 389 __arm_v7s_set_pte(ptep, pte, num_entries, cfg); 397 390 return 0; 391 + } 392 + 393 + static arm_v7s_iopte arm_v7s_install_table(arm_v7s_iopte *table, 394 + arm_v7s_iopte *ptep, 395 + arm_v7s_iopte curr, 396 + struct io_pgtable_cfg *cfg) 397 + { 398 + arm_v7s_iopte old, new; 399 + 400 + new = virt_to_phys(table) | ARM_V7S_PTE_TYPE_TABLE; 401 + if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS) 402 + new |= ARM_V7S_ATTR_NS_TABLE; 403 + 404 + /* 405 + * Ensure the table itself is visible before its PTE can be. 406 + * Whilst we could get away with cmpxchg64_release below, this 407 + * doesn't have any ordering semantics when !CONFIG_SMP. 408 + */ 409 + dma_wmb(); 410 + 411 + old = cmpxchg_relaxed(ptep, curr, new); 412 + __arm_v7s_pte_sync(ptep, 1, cfg); 413 + 414 + return old; 398 415 } 399 416 400 417 static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova, ··· 442 411 return -EINVAL; 443 412 444 413 /* Grab a pointer to the next level */ 445 - pte = *ptep; 414 + pte = READ_ONCE(*ptep); 446 415 if (!pte) { 447 416 cptep = __arm_v7s_alloc_table(lvl + 1, GFP_ATOMIC, data); 448 417 if (!cptep) 449 418 return -ENOMEM; 450 419 451 - pte = virt_to_phys(cptep) | ARM_V7S_PTE_TYPE_TABLE; 452 - if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS) 453 - pte |= ARM_V7S_ATTR_NS_TABLE; 454 - 455 - __arm_v7s_set_pte(ptep, pte, 1, cfg); 456 - } else if (ARM_V7S_PTE_IS_TABLE(pte, lvl)) { 457 - cptep = iopte_deref(pte, lvl); 420 + pte = arm_v7s_install_table(cptep, ptep, 0, cfg); 421 + if (pte) 422 + __arm_v7s_free_table(cptep, lvl + 1, data); 458 423 } else { 424 + /* We've no easy way of knowing if it's synced yet, so... */ 425 + __arm_v7s_pte_sync(ptep, 1, cfg); 426 + } 427 + 428 + if (ARM_V7S_PTE_IS_TABLE(pte, lvl)) { 429 + cptep = iopte_deref(pte, lvl); 430 + } else if (pte) { 459 431 /* We require an unmap first */ 460 432 WARN_ON(!selftest_running); 461 433 return -EEXIST; ··· 511 477 kfree(data); 512 478 } 513 479 514 - static void arm_v7s_split_cont(struct arm_v7s_io_pgtable *data, 515 - unsigned long iova, int idx, int lvl, 516 - arm_v7s_iopte *ptep) 480 + static arm_v7s_iopte arm_v7s_split_cont(struct arm_v7s_io_pgtable *data, 481 + unsigned long iova, int idx, int lvl, 482 + arm_v7s_iopte *ptep) 517 483 { 518 484 struct io_pgtable *iop = &data->iop; 519 485 arm_v7s_iopte pte; 520 486 size_t size = ARM_V7S_BLOCK_SIZE(lvl); 521 487 int i; 522 488 489 + /* Check that we didn't lose a race to get the lock */ 490 + pte = *ptep; 491 + if (!arm_v7s_pte_is_cont(pte, lvl)) 492 + return pte; 493 + 523 494 ptep -= idx & (ARM_V7S_CONT_PAGES - 1); 524 - pte = arm_v7s_cont_to_pte(*ptep, lvl); 525 - for (i = 0; i < ARM_V7S_CONT_PAGES; i++) { 526 - ptep[i] = pte; 527 - pte += size; 528 - } 495 + pte = arm_v7s_cont_to_pte(pte, lvl); 496 + for (i = 0; i < ARM_V7S_CONT_PAGES; i++) 497 + ptep[i] = pte + i * size; 529 498 530 499 __arm_v7s_pte_sync(ptep, ARM_V7S_CONT_PAGES, &iop->cfg); 531 500 532 501 size *= ARM_V7S_CONT_PAGES; 533 502 io_pgtable_tlb_add_flush(iop, iova, size, size, true); 534 503 io_pgtable_tlb_sync(iop); 504 + return pte; 535 505 } 536 506 537 507 static int arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data, 538 508 unsigned long iova, size_t size, 539 - arm_v7s_iopte *ptep) 509 + arm_v7s_iopte blk_pte, arm_v7s_iopte *ptep) 540 510 { 541 - unsigned long blk_start, blk_end, blk_size; 542 - phys_addr_t blk_paddr; 543 - arm_v7s_iopte table = 0; 544 - int prot = arm_v7s_pte_to_prot(*ptep, 1); 511 + struct io_pgtable_cfg *cfg = &data->iop.cfg; 512 + arm_v7s_iopte pte, *tablep; 513 + int i, unmap_idx, num_entries, num_ptes; 545 514 546 - blk_size = ARM_V7S_BLOCK_SIZE(1); 547 - blk_start = iova & ARM_V7S_LVL_MASK(1); 548 - blk_end = blk_start + ARM_V7S_BLOCK_SIZE(1); 549 - blk_paddr = *ptep & ARM_V7S_LVL_MASK(1); 515 + tablep = __arm_v7s_alloc_table(2, GFP_ATOMIC, data); 516 + if (!tablep) 517 + return 0; /* Bytes unmapped */ 550 518 551 - for (; blk_start < blk_end; blk_start += size, blk_paddr += size) { 552 - arm_v7s_iopte *tablep; 519 + num_ptes = ARM_V7S_PTES_PER_LVL(2); 520 + num_entries = size >> ARM_V7S_LVL_SHIFT(2); 521 + unmap_idx = ARM_V7S_LVL_IDX(iova, 2); 553 522 523 + pte = arm_v7s_prot_to_pte(arm_v7s_pte_to_prot(blk_pte, 1), 2, cfg); 524 + if (num_entries > 1) 525 + pte = arm_v7s_pte_to_cont(pte, 2); 526 + 527 + for (i = 0; i < num_ptes; i += num_entries, pte += size) { 554 528 /* Unmap! */ 555 - if (blk_start == iova) 529 + if (i == unmap_idx) 556 530 continue; 557 531 558 - /* __arm_v7s_map expects a pointer to the start of the table */ 559 - tablep = &table - ARM_V7S_LVL_IDX(blk_start, 1); 560 - if (__arm_v7s_map(data, blk_start, blk_paddr, size, prot, 1, 561 - tablep) < 0) { 562 - if (table) { 563 - /* Free the table we allocated */ 564 - tablep = iopte_deref(table, 1); 565 - __arm_v7s_free_table(tablep, 2, data); 566 - } 567 - return 0; /* Bytes unmapped */ 568 - } 532 + __arm_v7s_set_pte(&tablep[i], pte, num_entries, cfg); 569 533 } 570 534 571 - __arm_v7s_set_pte(ptep, table, 1, &data->iop.cfg); 572 - iova &= ~(blk_size - 1); 573 - io_pgtable_tlb_add_flush(&data->iop, iova, blk_size, blk_size, true); 535 + pte = arm_v7s_install_table(tablep, ptep, blk_pte, cfg); 536 + if (pte != blk_pte) { 537 + __arm_v7s_free_table(tablep, 2, data); 538 + 539 + if (!ARM_V7S_PTE_IS_TABLE(pte, 1)) 540 + return 0; 541 + 542 + tablep = iopte_deref(pte, 1); 543 + return __arm_v7s_unmap(data, iova, size, 2, tablep); 544 + } 545 + 546 + io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true); 574 547 return size; 575 548 } 576 549 ··· 596 555 idx = ARM_V7S_LVL_IDX(iova, lvl); 597 556 ptep += idx; 598 557 do { 599 - if (WARN_ON(!ARM_V7S_PTE_IS_VALID(ptep[i]))) 558 + pte[i] = READ_ONCE(ptep[i]); 559 + if (WARN_ON(!ARM_V7S_PTE_IS_VALID(pte[i]))) 600 560 return 0; 601 - pte[i] = ptep[i]; 602 561 } while (++i < num_entries); 603 562 604 563 /* 605 564 * If we've hit a contiguous 'large page' entry at this level, it 606 565 * needs splitting first, unless we're unmapping the whole lot. 566 + * 567 + * For splitting, we can't rewrite 16 PTEs atomically, and since we 568 + * can't necessarily assume TEX remap we don't have a software bit to 569 + * mark live entries being split. In practice (i.e. DMA API code), we 570 + * will never be splitting large pages anyway, so just wrap this edge 571 + * case in a lock for the sake of correctness and be done with it. 607 572 */ 608 - if (num_entries <= 1 && arm_v7s_pte_is_cont(pte[0], lvl)) 609 - arm_v7s_split_cont(data, iova, idx, lvl, ptep); 573 + if (num_entries <= 1 && arm_v7s_pte_is_cont(pte[0], lvl)) { 574 + unsigned long flags; 575 + 576 + spin_lock_irqsave(&data->split_lock, flags); 577 + pte[0] = arm_v7s_split_cont(data, iova, idx, lvl, ptep); 578 + spin_unlock_irqrestore(&data->split_lock, flags); 579 + } 610 580 611 581 /* If the size matches this level, we're in the right place */ 612 582 if (num_entries) { ··· 645 593 * Insert a table at the next level to map the old region, 646 594 * minus the part we want to unmap 647 595 */ 648 - return arm_v7s_split_blk_unmap(data, iova, size, ptep); 596 + return arm_v7s_split_blk_unmap(data, iova, size, pte[0], ptep); 649 597 } 650 598 651 599 /* Keep on walkin' */ ··· 675 623 u32 mask; 676 624 677 625 do { 678 - pte = ptep[ARM_V7S_LVL_IDX(iova, ++lvl)]; 626 + ptep += ARM_V7S_LVL_IDX(iova, ++lvl); 627 + pte = READ_ONCE(*ptep); 679 628 ptep = iopte_deref(pte, lvl); 680 629 } while (ARM_V7S_PTE_IS_TABLE(pte, lvl)); 681 630 ··· 704 651 if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | 705 652 IO_PGTABLE_QUIRK_NO_PERMS | 706 653 IO_PGTABLE_QUIRK_TLBI_ON_MAP | 707 - IO_PGTABLE_QUIRK_ARM_MTK_4GB)) 654 + IO_PGTABLE_QUIRK_ARM_MTK_4GB | 655 + IO_PGTABLE_QUIRK_NO_DMA)) 708 656 return NULL; 709 657 710 658 /* If ARM_MTK_4GB is enabled, the NO_PERMS is also expected. */ ··· 717 663 if (!data) 718 664 return NULL; 719 665 666 + spin_lock_init(&data->split_lock); 720 667 data->l2_tables = kmem_cache_create("io-pgtable_armv7s_l2", 721 668 ARM_V7S_TABLE_SIZE(2), 722 669 ARM_V7S_TABLE_SIZE(2), ··· 804 749 WARN_ON(cookie != cfg_cookie); 805 750 } 806 751 807 - static struct iommu_gather_ops dummy_tlb_ops = { 752 + static const struct iommu_gather_ops dummy_tlb_ops = { 808 753 .tlb_flush_all = dummy_tlb_flush_all, 809 754 .tlb_add_flush = dummy_tlb_add_flush, 810 755 .tlb_sync = dummy_tlb_sync, ··· 823 768 .tlb = &dummy_tlb_ops, 824 769 .oas = 32, 825 770 .ias = 32, 826 - .quirks = IO_PGTABLE_QUIRK_ARM_NS, 771 + .quirks = IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA, 827 772 .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M, 828 773 }; 829 774 unsigned int iova, size, iova_start;
+131 -64
drivers/iommu/io-pgtable-arm.c
··· 20 20 21 21 #define pr_fmt(fmt) "arm-lpae io-pgtable: " fmt 22 22 23 + #include <linux/atomic.h> 23 24 #include <linux/iommu.h> 24 25 #include <linux/kernel.h> 25 26 #include <linux/sizes.h> ··· 100 99 #define ARM_LPAE_PTE_ATTR_HI_MASK (((arm_lpae_iopte)6) << 52) 101 100 #define ARM_LPAE_PTE_ATTR_MASK (ARM_LPAE_PTE_ATTR_LO_MASK | \ 102 101 ARM_LPAE_PTE_ATTR_HI_MASK) 102 + /* Software bit for solving coherency races */ 103 + #define ARM_LPAE_PTE_SW_SYNC (((arm_lpae_iopte)1) << 55) 103 104 104 105 /* Stage-1 PTE */ 105 106 #define ARM_LPAE_PTE_AP_UNPRIV (((arm_lpae_iopte)1) << 6) ··· 220 217 if (!pages) 221 218 return NULL; 222 219 223 - if (!selftest_running) { 220 + if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) { 224 221 dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE); 225 222 if (dma_mapping_error(dev, dma)) 226 223 goto out_free; ··· 246 243 static void __arm_lpae_free_pages(void *pages, size_t size, 247 244 struct io_pgtable_cfg *cfg) 248 245 { 249 - if (!selftest_running) 246 + if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) 250 247 dma_unmap_single(cfg->iommu_dev, __arm_lpae_dma_addr(pages), 251 248 size, DMA_TO_DEVICE); 252 249 free_pages_exact(pages, size); 250 + } 251 + 252 + static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep, 253 + struct io_pgtable_cfg *cfg) 254 + { 255 + dma_sync_single_for_device(cfg->iommu_dev, __arm_lpae_dma_addr(ptep), 256 + sizeof(*ptep), DMA_TO_DEVICE); 253 257 } 254 258 255 259 static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte, ··· 264 254 { 265 255 *ptep = pte; 266 256 267 - if (!selftest_running) 268 - dma_sync_single_for_device(cfg->iommu_dev, 269 - __arm_lpae_dma_addr(ptep), 270 - sizeof(pte), DMA_TO_DEVICE); 257 + if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) 258 + __arm_lpae_sync_pte(ptep, cfg); 271 259 } 272 260 273 261 static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, 274 262 unsigned long iova, size_t size, int lvl, 275 263 arm_lpae_iopte *ptep); 276 264 265 + static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, 266 + phys_addr_t paddr, arm_lpae_iopte prot, 267 + int lvl, arm_lpae_iopte *ptep) 268 + { 269 + arm_lpae_iopte pte = prot; 270 + 271 + if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS) 272 + pte |= ARM_LPAE_PTE_NS; 273 + 274 + if (lvl == ARM_LPAE_MAX_LEVELS - 1) 275 + pte |= ARM_LPAE_PTE_TYPE_PAGE; 276 + else 277 + pte |= ARM_LPAE_PTE_TYPE_BLOCK; 278 + 279 + pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS; 280 + pte |= pfn_to_iopte(paddr >> data->pg_shift, data); 281 + 282 + __arm_lpae_set_pte(ptep, pte, &data->iop.cfg); 283 + } 284 + 277 285 static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, 278 286 unsigned long iova, phys_addr_t paddr, 279 287 arm_lpae_iopte prot, int lvl, 280 288 arm_lpae_iopte *ptep) 281 289 { 282 - arm_lpae_iopte pte = prot; 283 - struct io_pgtable_cfg *cfg = &data->iop.cfg; 290 + arm_lpae_iopte pte = *ptep; 284 291 285 - if (iopte_leaf(*ptep, lvl)) { 292 + if (iopte_leaf(pte, lvl)) { 286 293 /* We require an unmap first */ 287 294 WARN_ON(!selftest_running); 288 295 return -EEXIST; 289 - } else if (iopte_type(*ptep, lvl) == ARM_LPAE_PTE_TYPE_TABLE) { 296 + } else if (iopte_type(pte, lvl) == ARM_LPAE_PTE_TYPE_TABLE) { 290 297 /* 291 298 * We need to unmap and free the old table before 292 299 * overwriting it with a block entry. ··· 316 289 return -EINVAL; 317 290 } 318 291 319 - if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS) 320 - pte |= ARM_LPAE_PTE_NS; 321 - 322 - if (lvl == ARM_LPAE_MAX_LEVELS - 1) 323 - pte |= ARM_LPAE_PTE_TYPE_PAGE; 324 - else 325 - pte |= ARM_LPAE_PTE_TYPE_BLOCK; 326 - 327 - pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS; 328 - pte |= pfn_to_iopte(paddr >> data->pg_shift, data); 329 - 330 - __arm_lpae_set_pte(ptep, pte, cfg); 292 + __arm_lpae_init_pte(data, paddr, prot, lvl, ptep); 331 293 return 0; 294 + } 295 + 296 + static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table, 297 + arm_lpae_iopte *ptep, 298 + arm_lpae_iopte curr, 299 + struct io_pgtable_cfg *cfg) 300 + { 301 + arm_lpae_iopte old, new; 302 + 303 + new = __pa(table) | ARM_LPAE_PTE_TYPE_TABLE; 304 + if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS) 305 + new |= ARM_LPAE_PTE_NSTABLE; 306 + 307 + /* 308 + * Ensure the table itself is visible before its PTE can be. 309 + * Whilst we could get away with cmpxchg64_release below, this 310 + * doesn't have any ordering semantics when !CONFIG_SMP. 311 + */ 312 + dma_wmb(); 313 + 314 + old = cmpxchg64_relaxed(ptep, curr, new); 315 + 316 + if ((cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA) || 317 + (old & ARM_LPAE_PTE_SW_SYNC)) 318 + return old; 319 + 320 + /* Even if it's not ours, there's no point waiting; just kick it */ 321 + __arm_lpae_sync_pte(ptep, cfg); 322 + if (old == curr) 323 + WRITE_ONCE(*ptep, new | ARM_LPAE_PTE_SW_SYNC); 324 + 325 + return old; 332 326 } 333 327 334 328 static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, ··· 358 310 { 359 311 arm_lpae_iopte *cptep, pte; 360 312 size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data); 313 + size_t tblsz = ARM_LPAE_GRANULE(data); 361 314 struct io_pgtable_cfg *cfg = &data->iop.cfg; 362 315 363 316 /* Find our entry at the current level */ ··· 373 324 return -EINVAL; 374 325 375 326 /* Grab a pointer to the next level */ 376 - pte = *ptep; 327 + pte = READ_ONCE(*ptep); 377 328 if (!pte) { 378 - cptep = __arm_lpae_alloc_pages(ARM_LPAE_GRANULE(data), 379 - GFP_ATOMIC, cfg); 329 + cptep = __arm_lpae_alloc_pages(tblsz, GFP_ATOMIC, cfg); 380 330 if (!cptep) 381 331 return -ENOMEM; 382 332 383 - pte = __pa(cptep) | ARM_LPAE_PTE_TYPE_TABLE; 384 - if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS) 385 - pte |= ARM_LPAE_PTE_NSTABLE; 386 - __arm_lpae_set_pte(ptep, pte, cfg); 387 - } else if (!iopte_leaf(pte, lvl)) { 333 + pte = arm_lpae_install_table(cptep, ptep, 0, cfg); 334 + if (pte) 335 + __arm_lpae_free_pages(cptep, tblsz, cfg); 336 + } else if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA) && 337 + !(pte & ARM_LPAE_PTE_SW_SYNC)) { 338 + __arm_lpae_sync_pte(ptep, cfg); 339 + } 340 + 341 + if (pte && !iopte_leaf(pte, lvl)) { 388 342 cptep = iopte_deref(pte, data); 389 - } else { 343 + } else if (pte) { 390 344 /* We require an unmap first */ 391 345 WARN_ON(!selftest_running); 392 346 return -EEXIST; ··· 504 452 505 453 static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, 506 454 unsigned long iova, size_t size, 507 - arm_lpae_iopte prot, int lvl, 508 - arm_lpae_iopte *ptep, size_t blk_size) 455 + arm_lpae_iopte blk_pte, int lvl, 456 + arm_lpae_iopte *ptep) 509 457 { 510 - unsigned long blk_start, blk_end; 458 + struct io_pgtable_cfg *cfg = &data->iop.cfg; 459 + arm_lpae_iopte pte, *tablep; 511 460 phys_addr_t blk_paddr; 512 - arm_lpae_iopte table = 0; 461 + size_t tablesz = ARM_LPAE_GRANULE(data); 462 + size_t split_sz = ARM_LPAE_BLOCK_SIZE(lvl, data); 463 + int i, unmap_idx = -1; 513 464 514 - blk_start = iova & ~(blk_size - 1); 515 - blk_end = blk_start + blk_size; 516 - blk_paddr = iopte_to_pfn(*ptep, data) << data->pg_shift; 465 + if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS)) 466 + return 0; 517 467 518 - for (; blk_start < blk_end; blk_start += size, blk_paddr += size) { 519 - arm_lpae_iopte *tablep; 468 + tablep = __arm_lpae_alloc_pages(tablesz, GFP_ATOMIC, cfg); 469 + if (!tablep) 470 + return 0; /* Bytes unmapped */ 520 471 472 + if (size == split_sz) 473 + unmap_idx = ARM_LPAE_LVL_IDX(iova, lvl, data); 474 + 475 + blk_paddr = iopte_to_pfn(blk_pte, data) << data->pg_shift; 476 + pte = iopte_prot(blk_pte); 477 + 478 + for (i = 0; i < tablesz / sizeof(pte); i++, blk_paddr += split_sz) { 521 479 /* Unmap! */ 522 - if (blk_start == iova) 480 + if (i == unmap_idx) 523 481 continue; 524 482 525 - /* __arm_lpae_map expects a pointer to the start of the table */ 526 - tablep = &table - ARM_LPAE_LVL_IDX(blk_start, lvl, data); 527 - if (__arm_lpae_map(data, blk_start, blk_paddr, size, prot, lvl, 528 - tablep) < 0) { 529 - if (table) { 530 - /* Free the table we allocated */ 531 - tablep = iopte_deref(table, data); 532 - __arm_lpae_free_pgtable(data, lvl + 1, tablep); 533 - } 534 - return 0; /* Bytes unmapped */ 535 - } 483 + __arm_lpae_init_pte(data, blk_paddr, pte, lvl, &tablep[i]); 536 484 } 537 485 538 - __arm_lpae_set_pte(ptep, table, &data->iop.cfg); 539 - iova &= ~(blk_size - 1); 540 - io_pgtable_tlb_add_flush(&data->iop, iova, blk_size, blk_size, true); 486 + pte = arm_lpae_install_table(tablep, ptep, blk_pte, cfg); 487 + if (pte != blk_pte) { 488 + __arm_lpae_free_pages(tablep, tablesz, cfg); 489 + /* 490 + * We may race against someone unmapping another part of this 491 + * block, but anything else is invalid. We can't misinterpret 492 + * a page entry here since we're never at the last level. 493 + */ 494 + if (iopte_type(pte, lvl - 1) != ARM_LPAE_PTE_TYPE_TABLE) 495 + return 0; 496 + 497 + tablep = iopte_deref(pte, data); 498 + } 499 + 500 + if (unmap_idx < 0) 501 + return __arm_lpae_unmap(data, iova, size, lvl, tablep); 502 + 503 + io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true); 541 504 return size; 542 505 } 543 506 ··· 562 495 { 563 496 arm_lpae_iopte pte; 564 497 struct io_pgtable *iop = &data->iop; 565 - size_t blk_size = ARM_LPAE_BLOCK_SIZE(lvl, data); 566 498 567 499 /* Something went horribly wrong and we ran out of page table */ 568 500 if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS)) 569 501 return 0; 570 502 571 503 ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); 572 - pte = *ptep; 504 + pte = READ_ONCE(*ptep); 573 505 if (WARN_ON(!pte)) 574 506 return 0; 575 507 576 508 /* If the size matches this level, we're in the right place */ 577 - if (size == blk_size) { 509 + if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) { 578 510 __arm_lpae_set_pte(ptep, 0, &iop->cfg); 579 511 580 512 if (!iopte_leaf(pte, lvl)) { ··· 593 527 * Insert a table at the next level to map the old region, 594 528 * minus the part we want to unmap 595 529 */ 596 - return arm_lpae_split_blk_unmap(data, iova, size, 597 - iopte_prot(pte), lvl, ptep, 598 - blk_size); 530 + return arm_lpae_split_blk_unmap(data, iova, size, pte, 531 + lvl + 1, ptep); 599 532 } 600 533 601 534 /* Keep on walkin' */ ··· 630 565 return 0; 631 566 632 567 /* Grab the IOPTE we're interested in */ 633 - pte = *(ptep + ARM_LPAE_LVL_IDX(iova, lvl, data)); 568 + ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); 569 + pte = READ_ONCE(*ptep); 634 570 635 571 /* Valid entry? */ 636 572 if (!pte) ··· 739 673 u64 reg; 740 674 struct arm_lpae_io_pgtable *data; 741 675 742 - if (cfg->quirks & ~IO_PGTABLE_QUIRK_ARM_NS) 676 + if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA)) 743 677 return NULL; 744 678 745 679 data = arm_lpae_alloc_pgtable(cfg); ··· 828 762 struct arm_lpae_io_pgtable *data; 829 763 830 764 /* The NS quirk doesn't apply at stage 2 */ 831 - if (cfg->quirks) 765 + if (cfg->quirks & ~IO_PGTABLE_QUIRK_NO_DMA) 832 766 return NULL; 833 767 834 768 data = arm_lpae_alloc_pgtable(cfg); ··· 1132 1066 struct io_pgtable_cfg cfg = { 1133 1067 .tlb = &dummy_tlb_ops, 1134 1068 .oas = 48, 1069 + .quirks = IO_PGTABLE_QUIRK_NO_DMA, 1135 1070 }; 1136 1071 1137 1072 for (i = 0; i < ARRAY_SIZE(pgsize); ++i) {
+6
drivers/iommu/io-pgtable.h
··· 65 65 * PTEs, for Mediatek IOMMUs which treat it as a 33rd address bit 66 66 * when the SoC is in "4GB mode" and they can only access the high 67 67 * remap of DRAM (0x1_00000000 to 0x1_ffffffff). 68 + * 69 + * IO_PGTABLE_QUIRK_NO_DMA: Guarantees that the tables will only ever 70 + * be accessed by a fully cache-coherent IOMMU or CPU (e.g. for a 71 + * software-emulated IOMMU), such that pagetable updates need not 72 + * be treated as explicit DMA data. 68 73 */ 69 74 #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) 70 75 #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) 71 76 #define IO_PGTABLE_QUIRK_TLBI_ON_MAP BIT(2) 72 77 #define IO_PGTABLE_QUIRK_ARM_MTK_4GB BIT(3) 78 + #define IO_PGTABLE_QUIRK_NO_DMA BIT(4) 73 79 unsigned long quirks; 74 80 unsigned long pgsize_bitmap; 75 81 unsigned int ias;
+5 -12
drivers/iommu/iommu.c
··· 915 915 */ 916 916 struct iommu_group *generic_device_group(struct device *dev) 917 917 { 918 - struct iommu_group *group; 919 - 920 - group = iommu_group_alloc(); 921 - if (IS_ERR(group)) 922 - return NULL; 923 - 924 - return group; 918 + return iommu_group_alloc(); 925 919 } 926 920 927 921 /* ··· 982 988 return group; 983 989 984 990 /* No shared group found, allocate new */ 985 - group = iommu_group_alloc(); 986 - if (IS_ERR(group)) 987 - return NULL; 988 - 989 - return group; 991 + return iommu_group_alloc(); 990 992 } 991 993 992 994 /** ··· 1009 1019 1010 1020 if (ops && ops->device_group) 1011 1021 group = ops->device_group(dev); 1022 + 1023 + if (WARN_ON_ONCE(group == NULL)) 1024 + return ERR_PTR(-EINVAL); 1012 1025 1013 1026 if (IS_ERR(group)) 1014 1027 return group;
+12 -18
drivers/iommu/iova.c
··· 22 22 #include <linux/slab.h> 23 23 #include <linux/smp.h> 24 24 #include <linux/bitops.h> 25 + #include <linux/cpu.h> 25 26 26 27 static bool iova_rcache_insert(struct iova_domain *iovad, 27 28 unsigned long pfn, ··· 49 48 iovad->cached32_node = NULL; 50 49 iovad->granule = granule; 51 50 iovad->start_pfn = start_pfn; 52 - iovad->dma_32bit_pfn = pfn_32bit; 51 + iovad->dma_32bit_pfn = pfn_32bit + 1; 53 52 init_iova_rcaches(iovad); 54 53 } 55 54 EXPORT_SYMBOL_GPL(init_iova_domain); ··· 64 63 struct rb_node *prev_node = rb_prev(iovad->cached32_node); 65 64 struct iova *curr_iova = 66 65 rb_entry(iovad->cached32_node, struct iova, node); 67 - *limit_pfn = curr_iova->pfn_lo - 1; 66 + *limit_pfn = curr_iova->pfn_lo; 68 67 return prev_node; 69 68 } 70 69 } ··· 136 135 static unsigned int 137 136 iova_get_pad_size(unsigned int size, unsigned int limit_pfn) 138 137 { 139 - return (limit_pfn + 1 - size) & (__roundup_pow_of_two(size) - 1); 138 + return (limit_pfn - size) & (__roundup_pow_of_two(size) - 1); 140 139 } 141 140 142 141 static int __alloc_and_insert_iova_range(struct iova_domain *iovad, ··· 156 155 while (curr) { 157 156 struct iova *curr_iova = rb_entry(curr, struct iova, node); 158 157 159 - if (limit_pfn < curr_iova->pfn_lo) 158 + if (limit_pfn <= curr_iova->pfn_lo) { 160 159 goto move_left; 161 - else if (limit_pfn < curr_iova->pfn_hi) 162 - goto adjust_limit_pfn; 163 - else { 160 + } else if (limit_pfn > curr_iova->pfn_hi) { 164 161 if (size_aligned) 165 162 pad_size = iova_get_pad_size(size, limit_pfn); 166 - if ((curr_iova->pfn_hi + size + pad_size) <= limit_pfn) 163 + if ((curr_iova->pfn_hi + size + pad_size) < limit_pfn) 167 164 break; /* found a free slot */ 168 165 } 169 - adjust_limit_pfn: 170 - limit_pfn = curr_iova->pfn_lo ? (curr_iova->pfn_lo - 1) : 0; 166 + limit_pfn = curr_iova->pfn_lo; 171 167 move_left: 172 168 prev = curr; 173 169 curr = rb_prev(curr); ··· 180 182 } 181 183 182 184 /* pfn_lo will point to size aligned address if size_aligned is set */ 183 - new->pfn_lo = limit_pfn - (size + pad_size) + 1; 185 + new->pfn_lo = limit_pfn - (size + pad_size); 184 186 new->pfn_hi = new->pfn_lo + size - 1; 185 187 186 188 /* If we have 'prev', it's a valid place to start the insertion. */ ··· 267 269 if (!new_iova) 268 270 return NULL; 269 271 270 - ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn, 272 + ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn + 1, 271 273 new_iova, size_aligned); 272 274 273 275 if (ret) { ··· 396 398 397 399 /* Try replenishing IOVAs by flushing rcache. */ 398 400 flushed_rcache = true; 399 - preempt_disable(); 400 401 for_each_online_cpu(cpu) 401 402 free_cpu_cached_iovas(cpu, iovad); 402 - preempt_enable(); 403 403 goto retry; 404 404 } 405 405 ··· 725 729 bool can_insert = false; 726 730 unsigned long flags; 727 731 728 - cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches); 732 + cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches); 729 733 spin_lock_irqsave(&cpu_rcache->lock, flags); 730 734 731 735 if (!iova_magazine_full(cpu_rcache->loaded)) { ··· 755 759 iova_magazine_push(cpu_rcache->loaded, iova_pfn); 756 760 757 761 spin_unlock_irqrestore(&cpu_rcache->lock, flags); 758 - put_cpu_ptr(rcache->cpu_rcaches); 759 762 760 763 if (mag_to_free) { 761 764 iova_magazine_free_pfns(mag_to_free, iovad); ··· 788 793 bool has_pfn = false; 789 794 unsigned long flags; 790 795 791 - cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches); 796 + cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches); 792 797 spin_lock_irqsave(&cpu_rcache->lock, flags); 793 798 794 799 if (!iova_magazine_empty(cpu_rcache->loaded)) { ··· 810 815 iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn); 811 816 812 817 spin_unlock_irqrestore(&cpu_rcache->lock, flags); 813 - put_cpu_ptr(rcache->cpu_rcaches); 814 818 815 819 return iova_pfn; 816 820 }
+296 -57
drivers/iommu/ipmmu-vmsa.c
··· 8 8 * the Free Software Foundation; version 2 of the License. 9 9 */ 10 10 11 + #include <linux/bitmap.h> 11 12 #include <linux/delay.h> 13 + #include <linux/dma-iommu.h> 12 14 #include <linux/dma-mapping.h> 13 15 #include <linux/err.h> 14 16 #include <linux/export.h> ··· 23 21 #include <linux/sizes.h> 24 22 #include <linux/slab.h> 25 23 24 + #if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA) 26 25 #include <asm/dma-iommu.h> 27 26 #include <asm/pgalloc.h> 27 + #endif 28 28 29 29 #include "io-pgtable.h" 30 + 31 + #define IPMMU_CTX_MAX 1 30 32 31 33 struct ipmmu_vmsa_device { 32 34 struct device *dev; ··· 38 32 struct list_head list; 39 33 40 34 unsigned int num_utlbs; 35 + spinlock_t lock; /* Protects ctx and domains[] */ 36 + DECLARE_BITMAP(ctx, IPMMU_CTX_MAX); 37 + struct ipmmu_vmsa_domain *domains[IPMMU_CTX_MAX]; 41 38 42 39 struct dma_iommu_mapping *mapping; 43 40 }; ··· 56 47 spinlock_t lock; /* Protects mappings */ 57 48 }; 58 49 59 - struct ipmmu_vmsa_archdata { 50 + struct ipmmu_vmsa_iommu_priv { 60 51 struct ipmmu_vmsa_device *mmu; 61 52 unsigned int *utlbs; 62 53 unsigned int num_utlbs; 54 + struct device *dev; 55 + struct list_head list; 63 56 }; 64 57 65 58 static DEFINE_SPINLOCK(ipmmu_devices_lock); ··· 70 59 static struct ipmmu_vmsa_domain *to_vmsa_domain(struct iommu_domain *dom) 71 60 { 72 61 return container_of(dom, struct ipmmu_vmsa_domain, io_domain); 62 + } 63 + 64 + 65 + static struct ipmmu_vmsa_iommu_priv *to_priv(struct device *dev) 66 + { 67 + #if defined(CONFIG_ARM) 68 + return dev->archdata.iommu; 69 + #else 70 + return dev->iommu_fwspec->iommu_priv; 71 + #endif 72 + } 73 + static void set_priv(struct device *dev, struct ipmmu_vmsa_iommu_priv *p) 74 + { 75 + #if defined(CONFIG_ARM) 76 + dev->archdata.iommu = p; 77 + #else 78 + dev->iommu_fwspec->iommu_priv = p; 79 + #endif 73 80 } 74 81 75 82 #define TLB_LOOP_TIMEOUT 100 /* 100us */ ··· 322 293 * Domain/Context Management 323 294 */ 324 295 296 + static int ipmmu_domain_allocate_context(struct ipmmu_vmsa_device *mmu, 297 + struct ipmmu_vmsa_domain *domain) 298 + { 299 + unsigned long flags; 300 + int ret; 301 + 302 + spin_lock_irqsave(&mmu->lock, flags); 303 + 304 + ret = find_first_zero_bit(mmu->ctx, IPMMU_CTX_MAX); 305 + if (ret != IPMMU_CTX_MAX) { 306 + mmu->domains[ret] = domain; 307 + set_bit(ret, mmu->ctx); 308 + } 309 + 310 + spin_unlock_irqrestore(&mmu->lock, flags); 311 + 312 + return ret; 313 + } 314 + 325 315 static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) 326 316 { 327 317 u64 ttbr; 318 + int ret; 328 319 329 320 /* 330 321 * Allocate the page table operations. ··· 358 309 * non-secure mode. 359 310 */ 360 311 domain->cfg.quirks = IO_PGTABLE_QUIRK_ARM_NS; 361 - domain->cfg.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K, 312 + domain->cfg.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K; 362 313 domain->cfg.ias = 32; 363 314 domain->cfg.oas = 40; 364 315 domain->cfg.tlb = &ipmmu_gather_ops; ··· 376 327 return -EINVAL; 377 328 378 329 /* 379 - * TODO: When adding support for multiple contexts, find an unused 380 - * context. 330 + * Find an unused context. 381 331 */ 382 - domain->context_id = 0; 332 + ret = ipmmu_domain_allocate_context(domain->mmu, domain); 333 + if (ret == IPMMU_CTX_MAX) { 334 + free_io_pgtable_ops(domain->iop); 335 + return -EBUSY; 336 + } 337 + 338 + domain->context_id = ret; 383 339 384 340 /* TTBR0 */ 385 341 ttbr = domain->cfg.arm_lpae_s1_cfg.ttbr[0]; ··· 426 372 return 0; 427 373 } 428 374 375 + static void ipmmu_domain_free_context(struct ipmmu_vmsa_device *mmu, 376 + unsigned int context_id) 377 + { 378 + unsigned long flags; 379 + 380 + spin_lock_irqsave(&mmu->lock, flags); 381 + 382 + clear_bit(context_id, mmu->ctx); 383 + mmu->domains[context_id] = NULL; 384 + 385 + spin_unlock_irqrestore(&mmu->lock, flags); 386 + } 387 + 429 388 static void ipmmu_domain_destroy_context(struct ipmmu_vmsa_domain *domain) 430 389 { 431 390 /* ··· 449 382 */ 450 383 ipmmu_ctx_write(domain, IMCTR, IMCTR_FLUSH); 451 384 ipmmu_tlb_sync(domain); 385 + ipmmu_domain_free_context(domain->mmu, domain->context_id); 452 386 } 453 387 454 388 /* ----------------------------------------------------------------------------- ··· 507 439 static irqreturn_t ipmmu_irq(int irq, void *dev) 508 440 { 509 441 struct ipmmu_vmsa_device *mmu = dev; 510 - struct iommu_domain *io_domain; 511 - struct ipmmu_vmsa_domain *domain; 442 + irqreturn_t status = IRQ_NONE; 443 + unsigned int i; 444 + unsigned long flags; 512 445 513 - if (!mmu->mapping) 514 - return IRQ_NONE; 446 + spin_lock_irqsave(&mmu->lock, flags); 515 447 516 - io_domain = mmu->mapping->domain; 517 - domain = to_vmsa_domain(io_domain); 448 + /* 449 + * Check interrupts for all active contexts. 450 + */ 451 + for (i = 0; i < IPMMU_CTX_MAX; i++) { 452 + if (!mmu->domains[i]) 453 + continue; 454 + if (ipmmu_domain_irq(mmu->domains[i]) == IRQ_HANDLED) 455 + status = IRQ_HANDLED; 456 + } 518 457 519 - return ipmmu_domain_irq(domain); 458 + spin_unlock_irqrestore(&mmu->lock, flags); 459 + 460 + return status; 520 461 } 521 462 522 463 /* ----------------------------------------------------------------------------- 523 464 * IOMMU Operations 524 465 */ 525 466 526 - static struct iommu_domain *ipmmu_domain_alloc(unsigned type) 467 + static struct iommu_domain *__ipmmu_domain_alloc(unsigned type) 527 468 { 528 469 struct ipmmu_vmsa_domain *domain; 529 - 530 - if (type != IOMMU_DOMAIN_UNMANAGED) 531 - return NULL; 532 470 533 471 domain = kzalloc(sizeof(*domain), GFP_KERNEL); 534 472 if (!domain) ··· 561 487 static int ipmmu_attach_device(struct iommu_domain *io_domain, 562 488 struct device *dev) 563 489 { 564 - struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu; 565 - struct ipmmu_vmsa_device *mmu = archdata->mmu; 490 + struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev); 491 + struct ipmmu_vmsa_device *mmu = priv->mmu; 566 492 struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); 567 493 unsigned long flags; 568 494 unsigned int i; ··· 587 513 dev_err(dev, "Can't attach IPMMU %s to domain on IPMMU %s\n", 588 514 dev_name(mmu->dev), dev_name(domain->mmu->dev)); 589 515 ret = -EINVAL; 590 - } 516 + } else 517 + dev_info(dev, "Reusing IPMMU context %u\n", domain->context_id); 591 518 592 519 spin_unlock_irqrestore(&domain->lock, flags); 593 520 594 521 if (ret < 0) 595 522 return ret; 596 523 597 - for (i = 0; i < archdata->num_utlbs; ++i) 598 - ipmmu_utlb_enable(domain, archdata->utlbs[i]); 524 + for (i = 0; i < priv->num_utlbs; ++i) 525 + ipmmu_utlb_enable(domain, priv->utlbs[i]); 599 526 600 527 return 0; 601 528 } ··· 604 529 static void ipmmu_detach_device(struct iommu_domain *io_domain, 605 530 struct device *dev) 606 531 { 607 - struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu; 532 + struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev); 608 533 struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); 609 534 unsigned int i; 610 535 611 - for (i = 0; i < archdata->num_utlbs; ++i) 612 - ipmmu_utlb_disable(domain, archdata->utlbs[i]); 536 + for (i = 0; i < priv->num_utlbs; ++i) 537 + ipmmu_utlb_disable(domain, priv->utlbs[i]); 613 538 614 539 /* 615 540 * TODO: Optimize by disabling the context when no device is attached. ··· 670 595 return 0; 671 596 } 672 597 673 - static int ipmmu_add_device(struct device *dev) 598 + static int ipmmu_init_platform_device(struct device *dev) 674 599 { 675 - struct ipmmu_vmsa_archdata *archdata; 600 + struct ipmmu_vmsa_iommu_priv *priv; 676 601 struct ipmmu_vmsa_device *mmu; 677 - struct iommu_group *group = NULL; 678 602 unsigned int *utlbs; 679 603 unsigned int i; 680 604 int num_utlbs; 681 605 int ret = -ENODEV; 682 - 683 - if (dev->archdata.iommu) { 684 - dev_warn(dev, "IOMMU driver already assigned to device %s\n", 685 - dev_name(dev)); 686 - return -EINVAL; 687 - } 688 606 689 607 /* Find the master corresponding to the device. */ 690 608 ··· 715 647 } 716 648 } 717 649 650 + priv = kzalloc(sizeof(*priv), GFP_KERNEL); 651 + if (!priv) { 652 + ret = -ENOMEM; 653 + goto error; 654 + } 655 + 656 + priv->mmu = mmu; 657 + priv->utlbs = utlbs; 658 + priv->num_utlbs = num_utlbs; 659 + priv->dev = dev; 660 + set_priv(dev, priv); 661 + return 0; 662 + 663 + error: 664 + kfree(utlbs); 665 + return ret; 666 + } 667 + 668 + #if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA) 669 + 670 + static struct iommu_domain *ipmmu_domain_alloc(unsigned type) 671 + { 672 + if (type != IOMMU_DOMAIN_UNMANAGED) 673 + return NULL; 674 + 675 + return __ipmmu_domain_alloc(type); 676 + } 677 + 678 + static int ipmmu_add_device(struct device *dev) 679 + { 680 + struct ipmmu_vmsa_device *mmu = NULL; 681 + struct iommu_group *group; 682 + int ret; 683 + 684 + if (to_priv(dev)) { 685 + dev_warn(dev, "IOMMU driver already assigned to device %s\n", 686 + dev_name(dev)); 687 + return -EINVAL; 688 + } 689 + 718 690 /* Create a device group and add the device to it. */ 719 691 group = iommu_group_alloc(); 720 692 if (IS_ERR(group)) { ··· 772 664 goto error; 773 665 } 774 666 775 - archdata = kzalloc(sizeof(*archdata), GFP_KERNEL); 776 - if (!archdata) { 777 - ret = -ENOMEM; 667 + ret = ipmmu_init_platform_device(dev); 668 + if (ret < 0) 778 669 goto error; 779 - } 780 - 781 - archdata->mmu = mmu; 782 - archdata->utlbs = utlbs; 783 - archdata->num_utlbs = num_utlbs; 784 - dev->archdata.iommu = archdata; 785 670 786 671 /* 787 672 * Create the ARM mapping, used by the ARM DMA mapping core to allocate ··· 785 684 * - Make the mapping size configurable ? We currently use a 2GB mapping 786 685 * at a 1GB offset to ensure that NULL VAs will fault. 787 686 */ 687 + mmu = to_priv(dev)->mmu; 788 688 if (!mmu->mapping) { 789 689 struct dma_iommu_mapping *mapping; 790 690 ··· 810 708 return 0; 811 709 812 710 error: 813 - arm_iommu_release_mapping(mmu->mapping); 814 - 815 - kfree(dev->archdata.iommu); 816 - kfree(utlbs); 817 - 818 - dev->archdata.iommu = NULL; 711 + if (mmu) 712 + arm_iommu_release_mapping(mmu->mapping); 819 713 820 714 if (!IS_ERR_OR_NULL(group)) 821 715 iommu_group_remove_device(dev); 716 + 717 + kfree(to_priv(dev)->utlbs); 718 + kfree(to_priv(dev)); 719 + set_priv(dev, NULL); 822 720 823 721 return ret; 824 722 } 825 723 826 724 static void ipmmu_remove_device(struct device *dev) 827 725 { 828 - struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu; 726 + struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev); 829 727 830 728 arm_iommu_detach_device(dev); 831 729 iommu_group_remove_device(dev); 832 730 833 - kfree(archdata->utlbs); 834 - kfree(archdata); 731 + kfree(priv->utlbs); 732 + kfree(priv); 835 733 836 - dev->archdata.iommu = NULL; 734 + set_priv(dev, NULL); 837 735 } 838 736 839 737 static const struct iommu_ops ipmmu_ops = { ··· 849 747 .remove_device = ipmmu_remove_device, 850 748 .pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K, 851 749 }; 750 + 751 + #endif /* !CONFIG_ARM && CONFIG_IOMMU_DMA */ 752 + 753 + #ifdef CONFIG_IOMMU_DMA 754 + 755 + static DEFINE_SPINLOCK(ipmmu_slave_devices_lock); 756 + static LIST_HEAD(ipmmu_slave_devices); 757 + 758 + static struct iommu_domain *ipmmu_domain_alloc_dma(unsigned type) 759 + { 760 + struct iommu_domain *io_domain = NULL; 761 + 762 + switch (type) { 763 + case IOMMU_DOMAIN_UNMANAGED: 764 + io_domain = __ipmmu_domain_alloc(type); 765 + break; 766 + 767 + case IOMMU_DOMAIN_DMA: 768 + io_domain = __ipmmu_domain_alloc(type); 769 + if (io_domain) 770 + iommu_get_dma_cookie(io_domain); 771 + break; 772 + } 773 + 774 + return io_domain; 775 + } 776 + 777 + static void ipmmu_domain_free_dma(struct iommu_domain *io_domain) 778 + { 779 + switch (io_domain->type) { 780 + case IOMMU_DOMAIN_DMA: 781 + iommu_put_dma_cookie(io_domain); 782 + /* fall-through */ 783 + default: 784 + ipmmu_domain_free(io_domain); 785 + break; 786 + } 787 + } 788 + 789 + static int ipmmu_add_device_dma(struct device *dev) 790 + { 791 + struct iommu_fwspec *fwspec = dev->iommu_fwspec; 792 + struct iommu_group *group; 793 + 794 + /* 795 + * Only let through devices that have been verified in xlate() 796 + * We may get called with dev->iommu_fwspec set to NULL. 797 + */ 798 + if (!fwspec || !fwspec->iommu_priv) 799 + return -ENODEV; 800 + 801 + group = iommu_group_get_for_dev(dev); 802 + if (IS_ERR(group)) 803 + return PTR_ERR(group); 804 + 805 + spin_lock(&ipmmu_slave_devices_lock); 806 + list_add(&to_priv(dev)->list, &ipmmu_slave_devices); 807 + spin_unlock(&ipmmu_slave_devices_lock); 808 + return 0; 809 + } 810 + 811 + static void ipmmu_remove_device_dma(struct device *dev) 812 + { 813 + struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev); 814 + 815 + spin_lock(&ipmmu_slave_devices_lock); 816 + list_del(&priv->list); 817 + spin_unlock(&ipmmu_slave_devices_lock); 818 + 819 + iommu_group_remove_device(dev); 820 + } 821 + 822 + static struct device *ipmmu_find_sibling_device(struct device *dev) 823 + { 824 + struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev); 825 + struct ipmmu_vmsa_iommu_priv *sibling_priv = NULL; 826 + bool found = false; 827 + 828 + spin_lock(&ipmmu_slave_devices_lock); 829 + 830 + list_for_each_entry(sibling_priv, &ipmmu_slave_devices, list) { 831 + if (priv == sibling_priv) 832 + continue; 833 + if (sibling_priv->mmu == priv->mmu) { 834 + found = true; 835 + break; 836 + } 837 + } 838 + 839 + spin_unlock(&ipmmu_slave_devices_lock); 840 + 841 + return found ? sibling_priv->dev : NULL; 842 + } 843 + 844 + static struct iommu_group *ipmmu_find_group_dma(struct device *dev) 845 + { 846 + struct iommu_group *group; 847 + struct device *sibling; 848 + 849 + sibling = ipmmu_find_sibling_device(dev); 850 + if (sibling) 851 + group = iommu_group_get(sibling); 852 + if (!sibling || IS_ERR(group)) 853 + group = generic_device_group(dev); 854 + 855 + return group; 856 + } 857 + 858 + static int ipmmu_of_xlate_dma(struct device *dev, 859 + struct of_phandle_args *spec) 860 + { 861 + /* If the IPMMU device is disabled in DT then return error 862 + * to make sure the of_iommu code does not install ops 863 + * even though the iommu device is disabled 864 + */ 865 + if (!of_device_is_available(spec->np)) 866 + return -ENODEV; 867 + 868 + return ipmmu_init_platform_device(dev); 869 + } 870 + 871 + static const struct iommu_ops ipmmu_ops = { 872 + .domain_alloc = ipmmu_domain_alloc_dma, 873 + .domain_free = ipmmu_domain_free_dma, 874 + .attach_dev = ipmmu_attach_device, 875 + .detach_dev = ipmmu_detach_device, 876 + .map = ipmmu_map, 877 + .unmap = ipmmu_unmap, 878 + .map_sg = default_iommu_map_sg, 879 + .iova_to_phys = ipmmu_iova_to_phys, 880 + .add_device = ipmmu_add_device_dma, 881 + .remove_device = ipmmu_remove_device_dma, 882 + .device_group = ipmmu_find_group_dma, 883 + .pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K, 884 + .of_xlate = ipmmu_of_xlate_dma, 885 + }; 886 + 887 + #endif /* CONFIG_IOMMU_DMA */ 852 888 853 889 /* ----------------------------------------------------------------------------- 854 890 * Probe/remove and init ··· 1008 768 int irq; 1009 769 int ret; 1010 770 1011 - if (!IS_ENABLED(CONFIG_OF) && !pdev->dev.platform_data) { 1012 - dev_err(&pdev->dev, "missing platform data\n"); 1013 - return -EINVAL; 1014 - } 1015 - 1016 771 mmu = devm_kzalloc(&pdev->dev, sizeof(*mmu), GFP_KERNEL); 1017 772 if (!mmu) { 1018 773 dev_err(&pdev->dev, "cannot allocate device data\n"); ··· 1016 781 1017 782 mmu->dev = &pdev->dev; 1018 783 mmu->num_utlbs = 32; 784 + spin_lock_init(&mmu->lock); 785 + bitmap_zero(mmu->ctx, IPMMU_CTX_MAX); 1019 786 1020 787 /* Map I/O memory and request IRQ. */ 1021 788 res = platform_get_resource(pdev, IORESOURCE_MEM, 0); ··· 1077 840 list_del(&mmu->list); 1078 841 spin_unlock(&ipmmu_devices_lock); 1079 842 843 + #if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA) 1080 844 arm_iommu_release_mapping(mmu->mapping); 845 + #endif 1081 846 1082 847 ipmmu_device_reset(mmu); 1083 848
+1 -1
drivers/iommu/omap-iommu.c
··· 1309 1309 static struct iommu_group *omap_iommu_device_group(struct device *dev) 1310 1310 { 1311 1311 struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; 1312 - struct iommu_group *group = NULL; 1312 + struct iommu_group *group = ERR_PTR(-EINVAL); 1313 1313 1314 1314 if (arch_data->iommu_dev) 1315 1315 group = arch_data->iommu_dev->group;
+5 -10
drivers/iommu/s390-iommu.c
··· 165 165 166 166 static int s390_iommu_add_device(struct device *dev) 167 167 { 168 - struct iommu_group *group; 169 - int rc; 168 + struct iommu_group *group = iommu_group_get_for_dev(dev); 170 169 171 - group = iommu_group_get(dev); 172 - if (!group) { 173 - group = iommu_group_alloc(); 174 - if (IS_ERR(group)) 175 - return PTR_ERR(group); 176 - } 170 + if (IS_ERR(group)) 171 + return PTR_ERR(group); 177 172 178 - rc = iommu_group_add_device(group, dev); 179 173 iommu_group_put(group); 180 174 181 - return rc; 175 + return 0; 182 176 } 183 177 184 178 static void s390_iommu_remove_device(struct device *dev) ··· 338 344 .iova_to_phys = s390_iommu_iova_to_phys, 339 345 .add_device = s390_iommu_add_device, 340 346 .remove_device = s390_iommu_remove_device, 347 + .device_group = generic_device_group, 341 348 .pgsize_bitmap = S390_IOMMU_PGSIZES, 342 349 }; 343 350
+20
include/linux/intel-svm.h
··· 102 102 */ 103 103 extern int intel_svm_unbind_mm(struct device *dev, int pasid); 104 104 105 + /** 106 + * intel_svm_is_pasid_valid() - check if pasid is valid 107 + * @dev: Device for which PASID was allocated 108 + * @pasid: PASID value to be checked 109 + * 110 + * This function checks if the specified pasid is still valid. A 111 + * valid pasid means the backing mm is still having a valid user. 112 + * For kernel callers init_mm is always valid. for other mm, if mm->mm_users 113 + * is non-zero, it is valid. 114 + * 115 + * returns -EINVAL if invalid pasid, 0 if pasid ref count is invalid 116 + * 1 if pasid is valid. 117 + */ 118 + extern int intel_svm_is_pasid_valid(struct device *dev, int pasid); 119 + 105 120 #else /* CONFIG_INTEL_IOMMU_SVM */ 106 121 107 122 static inline int intel_svm_bind_mm(struct device *dev, int *pasid, ··· 128 113 static inline int intel_svm_unbind_mm(struct device *dev, int pasid) 129 114 { 130 115 BUG(); 116 + } 117 + 118 + static int intel_svm_is_pasid_valid(struct device *dev, int pasid) 119 + { 120 + return -EINVAL; 131 121 } 132 122 #endif /* CONFIG_INTEL_IOMMU_SVM */ 133 123