Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

s390/pci: Use dma-iommu layer

While s390 already has a standard IOMMU driver and previous changes have
added I/O TLB flushing operations this driver is currently only used for
user-space PCI access such as vfio-pci. For the DMA API s390 instead
utilizes its own implementation in arch/s390/pci/pci_dma.c which drives
the same hardware and shares some code but requires a complex and
fragile hand over between DMA API and IOMMU API use of a device and
despite code sharing still leads to significant duplication and
maintenance effort. Let's utilize the common code DMAP API
implementation from drivers/iommu/dma-iommu.c instead allowing us to
get rid of arch/s390/pci/pci_dma.c.

Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
Link: https://lore.kernel.org/r/20230928-dma_iommu-v13-3-9e5fc4dacc36@linux.ibm.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>

authored by

Niklas Schnelle and committed by
Joerg Roedel
c76c067e b6f88870

+401 -938
+5 -4
Documentation/admin-guide/kernel-parameters.txt
··· 2220 2220 forcing Dual Address Cycle for PCI cards supporting 2221 2221 greater than 32-bit addressing. 2222 2222 2223 - iommu.strict= [ARM64, X86] Configure TLB invalidation behaviour 2223 + iommu.strict= [ARM64, X86, S390] Configure TLB invalidation behaviour 2224 2224 Format: { "0" | "1" } 2225 2225 0 - Lazy mode. 2226 2226 Request that DMA unmap operations use deferred ··· 5611 5611 s390_iommu= [HW,S390] 5612 5612 Set s390 IOTLB flushing mode 5613 5613 strict 5614 - With strict flushing every unmap operation will result in 5615 - an IOTLB flush. Default is lazy flushing before reuse, 5616 - which is faster. 5614 + With strict flushing every unmap operation will result 5615 + in an IOTLB flush. Default is lazy flushing before 5616 + reuse, which is faster. Deprecated, equivalent to 5617 + iommu.strict=1. 5617 5618 5618 5619 s390_iommu_aperture= [KNL,S390] 5619 5620 Specifies the size of the per device DMA address space
-7
arch/s390/include/asm/pci.h
··· 159 159 unsigned long *dma_table; 160 160 int tlb_refresh; 161 161 162 - spinlock_t iommu_bitmap_lock; 163 - unsigned long *iommu_bitmap; 164 - unsigned long *lazy_bitmap; 165 - unsigned long iommu_size; 166 - unsigned long iommu_pages; 167 - unsigned int next_bit; 168 - 169 162 struct iommu_device iommu_dev; /* IOMMU core handle */ 170 163 171 164 char res_name[16];
+3
arch/s390/include/asm/pci_clp.h
··· 50 50 #define CLP_UTIL_STR_LEN 64 51 51 #define CLP_PFIP_NR_SEGMENTS 4 52 52 53 + /* PCI function type numbers */ 54 + #define PCI_FUNC_TYPE_ISM 0x5 /* ISM device */ 55 + 53 56 extern bool zpci_unique_uid; 54 57 55 58 struct clp_rsp_slpc_pci {
+9 -110
arch/s390/include/asm/pci_dma.h
··· 82 82 #define ZPCI_TABLE_VALID_MASK 0x20 83 83 #define ZPCI_TABLE_PROT_MASK 0x200 84 84 85 - static inline unsigned int calc_rtx(dma_addr_t ptr) 86 - { 87 - return ((unsigned long) ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK; 88 - } 85 + struct zpci_iommu_ctrs { 86 + atomic64_t mapped_pages; 87 + atomic64_t unmapped_pages; 88 + atomic64_t global_rpcits; 89 + atomic64_t sync_map_rpcits; 90 + atomic64_t sync_rpcits; 91 + }; 89 92 90 - static inline unsigned int calc_sx(dma_addr_t ptr) 91 - { 92 - return ((unsigned long) ptr >> ZPCI_ST_SHIFT) & ZPCI_INDEX_MASK; 93 - } 93 + struct zpci_dev; 94 94 95 - static inline unsigned int calc_px(dma_addr_t ptr) 96 - { 97 - return ((unsigned long) ptr >> PAGE_SHIFT) & ZPCI_PT_MASK; 98 - } 99 - 100 - static inline void set_pt_pfaa(unsigned long *entry, phys_addr_t pfaa) 101 - { 102 - *entry &= ZPCI_PTE_FLAG_MASK; 103 - *entry |= (pfaa & ZPCI_PTE_ADDR_MASK); 104 - } 105 - 106 - static inline void set_rt_sto(unsigned long *entry, phys_addr_t sto) 107 - { 108 - *entry &= ZPCI_RTE_FLAG_MASK; 109 - *entry |= (sto & ZPCI_RTE_ADDR_MASK); 110 - *entry |= ZPCI_TABLE_TYPE_RTX; 111 - } 112 - 113 - static inline void set_st_pto(unsigned long *entry, phys_addr_t pto) 114 - { 115 - *entry &= ZPCI_STE_FLAG_MASK; 116 - *entry |= (pto & ZPCI_STE_ADDR_MASK); 117 - *entry |= ZPCI_TABLE_TYPE_SX; 118 - } 119 - 120 - static inline void validate_rt_entry(unsigned long *entry) 121 - { 122 - *entry &= ~ZPCI_TABLE_VALID_MASK; 123 - *entry &= ~ZPCI_TABLE_OFFSET_MASK; 124 - *entry |= ZPCI_TABLE_VALID; 125 - *entry |= ZPCI_TABLE_LEN_RTX; 126 - } 127 - 128 - static inline void validate_st_entry(unsigned long *entry) 129 - { 130 - *entry &= ~ZPCI_TABLE_VALID_MASK; 131 - *entry |= ZPCI_TABLE_VALID; 132 - } 133 - 134 - static inline void invalidate_pt_entry(unsigned long *entry) 135 - { 136 - WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_INVALID); 137 - *entry &= ~ZPCI_PTE_VALID_MASK; 138 - *entry |= ZPCI_PTE_INVALID; 139 - } 140 - 141 - static inline void validate_pt_entry(unsigned long *entry) 142 - { 143 - WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID); 144 - *entry &= ~ZPCI_PTE_VALID_MASK; 145 - *entry |= ZPCI_PTE_VALID; 146 - } 147 - 148 - static inline void entry_set_protected(unsigned long *entry) 149 - { 150 - *entry &= ~ZPCI_TABLE_PROT_MASK; 151 - *entry |= ZPCI_TABLE_PROTECTED; 152 - } 153 - 154 - static inline void entry_clr_protected(unsigned long *entry) 155 - { 156 - *entry &= ~ZPCI_TABLE_PROT_MASK; 157 - *entry |= ZPCI_TABLE_UNPROTECTED; 158 - } 159 - 160 - static inline int reg_entry_isvalid(unsigned long entry) 161 - { 162 - return (entry & ZPCI_TABLE_VALID_MASK) == ZPCI_TABLE_VALID; 163 - } 164 - 165 - static inline int pt_entry_isvalid(unsigned long entry) 166 - { 167 - return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID; 168 - } 169 - 170 - static inline unsigned long *get_rt_sto(unsigned long entry) 171 - { 172 - if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX) 173 - return phys_to_virt(entry & ZPCI_RTE_ADDR_MASK); 174 - else 175 - return NULL; 176 - 177 - } 178 - 179 - static inline unsigned long *get_st_pto(unsigned long entry) 180 - { 181 - if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_SX) 182 - return phys_to_virt(entry & ZPCI_STE_ADDR_MASK); 183 - else 184 - return NULL; 185 - } 186 - 187 - /* Prototypes */ 188 - void dma_free_seg_table(unsigned long); 189 - unsigned long *dma_alloc_cpu_table(gfp_t gfp); 190 - void dma_cleanup_tables(unsigned long *); 191 - unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr, 192 - gfp_t gfp); 193 - void dma_update_cpu_trans(unsigned long *entry, phys_addr_t page_addr, int flags); 194 - 195 - extern const struct dma_map_ops s390_pci_dma_ops; 196 - 95 + struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev); 197 96 198 97 #endif
+1 -1
arch/s390/pci/Makefile
··· 3 3 # Makefile for the s390 PCI subsystem. 4 4 # 5 5 6 - obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_dma.o pci_clp.o pci_sysfs.o \ 6 + obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_clp.o pci_sysfs.o \ 7 7 pci_event.o pci_debug.o pci_insn.o pci_mmio.o \ 8 8 pci_bus.o pci_kvm_hook.o 9 9 obj-$(CONFIG_PCI_IOV) += pci_iov.o
+5 -17
arch/s390/pci/pci.c
··· 124 124 125 125 WARN_ON_ONCE(iota & 0x3fff); 126 126 fib.pba = base; 127 - fib.pal = limit; 127 + /* Work around off by one in ISM virt device */ 128 + if (zdev->pft == PCI_FUNC_TYPE_ISM && limit > base) 129 + fib.pal = limit + (1 << 12); 130 + else 131 + fib.pal = limit; 128 132 fib.iota = iota | ZPCI_IOTA_RTTO_FLAG; 129 133 fib.gd = zdev->gisa; 130 134 cc = zpci_mod_fc(req, &fib, status); ··· 586 582 pdev->no_vf_scan = 1; 587 583 588 584 pdev->dev.groups = zpci_attr_groups; 589 - pdev->dev.dma_ops = &s390_pci_dma_ops; 590 585 zpci_map_resources(pdev); 591 586 592 587 for (i = 0; i < PCI_STD_NUM_BARS; i++) { ··· 759 756 if (zdev->dma_table) 760 757 rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, 761 758 virt_to_phys(zdev->dma_table), &status); 762 - else 763 - rc = zpci_dma_init_device(zdev); 764 759 if (rc) { 765 760 zpci_disable_device(zdev); 766 761 return rc; ··· 866 865 if (zdev->zbus->bus) 867 866 zpci_bus_remove_device(zdev, false); 868 867 869 - if (zdev->dma_table) { 870 - rc = zpci_dma_exit_device(zdev); 871 - if (rc) 872 - return rc; 873 - } 874 868 if (zdev_enabled(zdev)) { 875 869 rc = zpci_disable_device(zdev); 876 870 if (rc) ··· 914 918 if (zdev->zbus->bus) 915 919 zpci_bus_remove_device(zdev, false); 916 920 917 - if (zdev->dma_table) 918 - zpci_dma_exit_device(zdev); 919 921 if (zdev_enabled(zdev)) 920 922 zpci_disable_device(zdev); 921 923 ··· 1103 1109 if (rc) 1104 1110 goto out_irq; 1105 1111 1106 - rc = zpci_dma_init(); 1107 - if (rc) 1108 - goto out_dma; 1109 - 1110 1112 rc = clp_scan_pci_devices(); 1111 1113 if (rc) 1112 1114 goto out_find; ··· 1112 1122 return 0; 1113 1123 1114 1124 out_find: 1115 - zpci_dma_exit(); 1116 - out_dma: 1117 1125 zpci_irq_exit(); 1118 1126 out_irq: 1119 1127 zpci_mem_exit();
-5
arch/s390/pci/pci_bus.c
··· 47 47 rc = zpci_enable_device(zdev); 48 48 if (rc) 49 49 return rc; 50 - rc = zpci_dma_init_device(zdev); 51 - if (rc) { 52 - zpci_disable_device(zdev); 53 - return rc; 54 - } 55 50 } 56 51 57 52 if (!zdev->has_resources) {
+9 -3
arch/s390/pci/pci_debug.c
··· 53 53 }; 54 54 55 55 static char *pci_sw_names[] = { 56 - "Allocated pages", 57 56 "Mapped pages", 58 57 "Unmapped pages", 58 + "Global RPCITs", 59 + "Sync Map RPCITs", 60 + "Sync RPCITs", 59 61 }; 60 62 61 63 static void pci_fmb_show(struct seq_file *m, char *name[], int length, ··· 71 69 72 70 static void pci_sw_counter_show(struct seq_file *m) 73 71 { 74 - struct zpci_dev *zdev = m->private; 75 - atomic64_t *counter = &zdev->allocated_pages; 72 + struct zpci_iommu_ctrs *ctrs = zpci_get_iommu_ctrs(m->private); 73 + atomic64_t *counter; 76 74 int i; 77 75 76 + if (!ctrs) 77 + return; 78 + 79 + counter = &ctrs->mapped_pages; 78 80 for (i = 0; i < ARRAY_SIZE(pci_sw_names); i++, counter++) 79 81 seq_printf(m, "%26s:\t%llu\n", pci_sw_names[i], 80 82 atomic64_read(counter));
-735
arch/s390/pci/pci_dma.c
··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - /* 3 - * Copyright IBM Corp. 2012 4 - * 5 - * Author(s): 6 - * Jan Glauber <jang@linux.vnet.ibm.com> 7 - */ 8 - 9 - #include <linux/kernel.h> 10 - #include <linux/slab.h> 11 - #include <linux/export.h> 12 - #include <linux/iommu-helper.h> 13 - #include <linux/dma-map-ops.h> 14 - #include <linux/vmalloc.h> 15 - #include <linux/pci.h> 16 - #include <asm/pci_dma.h> 17 - 18 - static struct kmem_cache *dma_region_table_cache; 19 - static struct kmem_cache *dma_page_table_cache; 20 - static int s390_iommu_strict; 21 - static u64 s390_iommu_aperture; 22 - static u32 s390_iommu_aperture_factor = 1; 23 - 24 - static int zpci_refresh_global(struct zpci_dev *zdev) 25 - { 26 - return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma, 27 - zdev->iommu_pages * PAGE_SIZE); 28 - } 29 - 30 - unsigned long *dma_alloc_cpu_table(gfp_t gfp) 31 - { 32 - unsigned long *table, *entry; 33 - 34 - table = kmem_cache_alloc(dma_region_table_cache, gfp); 35 - if (!table) 36 - return NULL; 37 - 38 - for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++) 39 - *entry = ZPCI_TABLE_INVALID; 40 - return table; 41 - } 42 - 43 - static void dma_free_cpu_table(void *table) 44 - { 45 - kmem_cache_free(dma_region_table_cache, table); 46 - } 47 - 48 - static unsigned long *dma_alloc_page_table(gfp_t gfp) 49 - { 50 - unsigned long *table, *entry; 51 - 52 - table = kmem_cache_alloc(dma_page_table_cache, gfp); 53 - if (!table) 54 - return NULL; 55 - 56 - for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++) 57 - *entry = ZPCI_PTE_INVALID; 58 - return table; 59 - } 60 - 61 - static void dma_free_page_table(void *table) 62 - { 63 - kmem_cache_free(dma_page_table_cache, table); 64 - } 65 - 66 - static unsigned long *dma_get_seg_table_origin(unsigned long *rtep, gfp_t gfp) 67 - { 68 - unsigned long old_rte, rte; 69 - unsigned long *sto; 70 - 71 - rte = READ_ONCE(*rtep); 72 - if (reg_entry_isvalid(rte)) { 73 - sto = get_rt_sto(rte); 74 - } else { 75 - sto = dma_alloc_cpu_table(gfp); 76 - if (!sto) 77 - return NULL; 78 - 79 - set_rt_sto(&rte, virt_to_phys(sto)); 80 - validate_rt_entry(&rte); 81 - entry_clr_protected(&rte); 82 - 83 - old_rte = cmpxchg(rtep, ZPCI_TABLE_INVALID, rte); 84 - if (old_rte != ZPCI_TABLE_INVALID) { 85 - /* Somone else was faster, use theirs */ 86 - dma_free_cpu_table(sto); 87 - sto = get_rt_sto(old_rte); 88 - } 89 - } 90 - return sto; 91 - } 92 - 93 - static unsigned long *dma_get_page_table_origin(unsigned long *step, gfp_t gfp) 94 - { 95 - unsigned long old_ste, ste; 96 - unsigned long *pto; 97 - 98 - ste = READ_ONCE(*step); 99 - if (reg_entry_isvalid(ste)) { 100 - pto = get_st_pto(ste); 101 - } else { 102 - pto = dma_alloc_page_table(gfp); 103 - if (!pto) 104 - return NULL; 105 - set_st_pto(&ste, virt_to_phys(pto)); 106 - validate_st_entry(&ste); 107 - entry_clr_protected(&ste); 108 - 109 - old_ste = cmpxchg(step, ZPCI_TABLE_INVALID, ste); 110 - if (old_ste != ZPCI_TABLE_INVALID) { 111 - /* Somone else was faster, use theirs */ 112 - dma_free_page_table(pto); 113 - pto = get_st_pto(old_ste); 114 - } 115 - } 116 - return pto; 117 - } 118 - 119 - unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr, 120 - gfp_t gfp) 121 - { 122 - unsigned long *sto, *pto; 123 - unsigned int rtx, sx, px; 124 - 125 - rtx = calc_rtx(dma_addr); 126 - sto = dma_get_seg_table_origin(&rto[rtx], gfp); 127 - if (!sto) 128 - return NULL; 129 - 130 - sx = calc_sx(dma_addr); 131 - pto = dma_get_page_table_origin(&sto[sx], gfp); 132 - if (!pto) 133 - return NULL; 134 - 135 - px = calc_px(dma_addr); 136 - return &pto[px]; 137 - } 138 - 139 - void dma_update_cpu_trans(unsigned long *ptep, phys_addr_t page_addr, int flags) 140 - { 141 - unsigned long pte; 142 - 143 - pte = READ_ONCE(*ptep); 144 - if (flags & ZPCI_PTE_INVALID) { 145 - invalidate_pt_entry(&pte); 146 - } else { 147 - set_pt_pfaa(&pte, page_addr); 148 - validate_pt_entry(&pte); 149 - } 150 - 151 - if (flags & ZPCI_TABLE_PROTECTED) 152 - entry_set_protected(&pte); 153 - else 154 - entry_clr_protected(&pte); 155 - 156 - xchg(ptep, pte); 157 - } 158 - 159 - static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa, 160 - dma_addr_t dma_addr, size_t size, int flags) 161 - { 162 - unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; 163 - phys_addr_t page_addr = (pa & PAGE_MASK); 164 - unsigned long *entry; 165 - int i, rc = 0; 166 - 167 - if (!nr_pages) 168 - return -EINVAL; 169 - 170 - if (!zdev->dma_table) 171 - return -EINVAL; 172 - 173 - for (i = 0; i < nr_pages; i++) { 174 - entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr, 175 - GFP_ATOMIC); 176 - if (!entry) { 177 - rc = -ENOMEM; 178 - goto undo_cpu_trans; 179 - } 180 - dma_update_cpu_trans(entry, page_addr, flags); 181 - page_addr += PAGE_SIZE; 182 - dma_addr += PAGE_SIZE; 183 - } 184 - 185 - undo_cpu_trans: 186 - if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) { 187 - flags = ZPCI_PTE_INVALID; 188 - while (i-- > 0) { 189 - page_addr -= PAGE_SIZE; 190 - dma_addr -= PAGE_SIZE; 191 - entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr, 192 - GFP_ATOMIC); 193 - if (!entry) 194 - break; 195 - dma_update_cpu_trans(entry, page_addr, flags); 196 - } 197 - } 198 - return rc; 199 - } 200 - 201 - static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr, 202 - size_t size, int flags) 203 - { 204 - unsigned long irqflags; 205 - int ret; 206 - 207 - /* 208 - * With zdev->tlb_refresh == 0, rpcit is not required to establish new 209 - * translations when previously invalid translation-table entries are 210 - * validated. With lazy unmap, rpcit is skipped for previously valid 211 - * entries, but a global rpcit is then required before any address can 212 - * be re-used, i.e. after each iommu bitmap wrap-around. 213 - */ 214 - if ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID) { 215 - if (!zdev->tlb_refresh) 216 - return 0; 217 - } else { 218 - if (!s390_iommu_strict) 219 - return 0; 220 - } 221 - 222 - ret = zpci_refresh_trans((u64) zdev->fh << 32, dma_addr, 223 - PAGE_ALIGN(size)); 224 - if (ret == -ENOMEM && !s390_iommu_strict) { 225 - /* enable the hypervisor to free some resources */ 226 - if (zpci_refresh_global(zdev)) 227 - goto out; 228 - 229 - spin_lock_irqsave(&zdev->iommu_bitmap_lock, irqflags); 230 - bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap, 231 - zdev->lazy_bitmap, zdev->iommu_pages); 232 - bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages); 233 - spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, irqflags); 234 - ret = 0; 235 - } 236 - out: 237 - return ret; 238 - } 239 - 240 - static int dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa, 241 - dma_addr_t dma_addr, size_t size, int flags) 242 - { 243 - int rc; 244 - 245 - rc = __dma_update_trans(zdev, pa, dma_addr, size, flags); 246 - if (rc) 247 - return rc; 248 - 249 - rc = __dma_purge_tlb(zdev, dma_addr, size, flags); 250 - if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) 251 - __dma_update_trans(zdev, pa, dma_addr, size, ZPCI_PTE_INVALID); 252 - 253 - return rc; 254 - } 255 - 256 - void dma_free_seg_table(unsigned long entry) 257 - { 258 - unsigned long *sto = get_rt_sto(entry); 259 - int sx; 260 - 261 - for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++) 262 - if (reg_entry_isvalid(sto[sx])) 263 - dma_free_page_table(get_st_pto(sto[sx])); 264 - 265 - dma_free_cpu_table(sto); 266 - } 267 - 268 - void dma_cleanup_tables(unsigned long *table) 269 - { 270 - int rtx; 271 - 272 - if (!table) 273 - return; 274 - 275 - for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++) 276 - if (reg_entry_isvalid(table[rtx])) 277 - dma_free_seg_table(table[rtx]); 278 - 279 - dma_free_cpu_table(table); 280 - } 281 - 282 - static unsigned long __dma_alloc_iommu(struct device *dev, 283 - unsigned long start, int size) 284 - { 285 - struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 286 - 287 - return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages, 288 - start, size, zdev->start_dma >> PAGE_SHIFT, 289 - dma_get_seg_boundary_nr_pages(dev, PAGE_SHIFT), 290 - 0); 291 - } 292 - 293 - static dma_addr_t dma_alloc_address(struct device *dev, int size) 294 - { 295 - struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 296 - unsigned long offset, flags; 297 - 298 - spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); 299 - offset = __dma_alloc_iommu(dev, zdev->next_bit, size); 300 - if (offset == -1) { 301 - if (!s390_iommu_strict) { 302 - /* global flush before DMA addresses are reused */ 303 - if (zpci_refresh_global(zdev)) 304 - goto out_error; 305 - 306 - bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap, 307 - zdev->lazy_bitmap, zdev->iommu_pages); 308 - bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages); 309 - } 310 - /* wrap-around */ 311 - offset = __dma_alloc_iommu(dev, 0, size); 312 - if (offset == -1) 313 - goto out_error; 314 - } 315 - zdev->next_bit = offset + size; 316 - spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); 317 - 318 - return zdev->start_dma + offset * PAGE_SIZE; 319 - 320 - out_error: 321 - spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); 322 - return DMA_MAPPING_ERROR; 323 - } 324 - 325 - static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size) 326 - { 327 - struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 328 - unsigned long flags, offset; 329 - 330 - offset = (dma_addr - zdev->start_dma) >> PAGE_SHIFT; 331 - 332 - spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); 333 - if (!zdev->iommu_bitmap) 334 - goto out; 335 - 336 - if (s390_iommu_strict) 337 - bitmap_clear(zdev->iommu_bitmap, offset, size); 338 - else 339 - bitmap_set(zdev->lazy_bitmap, offset, size); 340 - 341 - out: 342 - spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); 343 - } 344 - 345 - static inline void zpci_err_dma(unsigned long rc, unsigned long addr) 346 - { 347 - struct { 348 - unsigned long rc; 349 - unsigned long addr; 350 - } __packed data = {rc, addr}; 351 - 352 - zpci_err_hex(&data, sizeof(data)); 353 - } 354 - 355 - static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, 356 - unsigned long offset, size_t size, 357 - enum dma_data_direction direction, 358 - unsigned long attrs) 359 - { 360 - struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 361 - unsigned long pa = page_to_phys(page) + offset; 362 - int flags = ZPCI_PTE_VALID; 363 - unsigned long nr_pages; 364 - dma_addr_t dma_addr; 365 - int ret; 366 - 367 - /* This rounds up number of pages based on size and offset */ 368 - nr_pages = iommu_num_pages(pa, size, PAGE_SIZE); 369 - dma_addr = dma_alloc_address(dev, nr_pages); 370 - if (dma_addr == DMA_MAPPING_ERROR) { 371 - ret = -ENOSPC; 372 - goto out_err; 373 - } 374 - 375 - /* Use rounded up size */ 376 - size = nr_pages * PAGE_SIZE; 377 - 378 - if (direction == DMA_NONE || direction == DMA_TO_DEVICE) 379 - flags |= ZPCI_TABLE_PROTECTED; 380 - 381 - ret = dma_update_trans(zdev, pa, dma_addr, size, flags); 382 - if (ret) 383 - goto out_free; 384 - 385 - atomic64_add(nr_pages, &zdev->mapped_pages); 386 - return dma_addr + (offset & ~PAGE_MASK); 387 - 388 - out_free: 389 - dma_free_address(dev, dma_addr, nr_pages); 390 - out_err: 391 - zpci_err("map error:\n"); 392 - zpci_err_dma(ret, pa); 393 - return DMA_MAPPING_ERROR; 394 - } 395 - 396 - static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr, 397 - size_t size, enum dma_data_direction direction, 398 - unsigned long attrs) 399 - { 400 - struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 401 - int npages, ret; 402 - 403 - npages = iommu_num_pages(dma_addr, size, PAGE_SIZE); 404 - dma_addr = dma_addr & PAGE_MASK; 405 - ret = dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE, 406 - ZPCI_PTE_INVALID); 407 - if (ret) { 408 - zpci_err("unmap error:\n"); 409 - zpci_err_dma(ret, dma_addr); 410 - return; 411 - } 412 - 413 - atomic64_add(npages, &zdev->unmapped_pages); 414 - dma_free_address(dev, dma_addr, npages); 415 - } 416 - 417 - static void *s390_dma_alloc(struct device *dev, size_t size, 418 - dma_addr_t *dma_handle, gfp_t flag, 419 - unsigned long attrs) 420 - { 421 - struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 422 - struct page *page; 423 - phys_addr_t pa; 424 - dma_addr_t map; 425 - 426 - size = PAGE_ALIGN(size); 427 - page = alloc_pages(flag | __GFP_ZERO, get_order(size)); 428 - if (!page) 429 - return NULL; 430 - 431 - pa = page_to_phys(page); 432 - map = s390_dma_map_pages(dev, page, 0, size, DMA_BIDIRECTIONAL, 0); 433 - if (dma_mapping_error(dev, map)) { 434 - __free_pages(page, get_order(size)); 435 - return NULL; 436 - } 437 - 438 - atomic64_add(size / PAGE_SIZE, &zdev->allocated_pages); 439 - if (dma_handle) 440 - *dma_handle = map; 441 - return phys_to_virt(pa); 442 - } 443 - 444 - static void s390_dma_free(struct device *dev, size_t size, 445 - void *vaddr, dma_addr_t dma_handle, 446 - unsigned long attrs) 447 - { 448 - struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 449 - 450 - size = PAGE_ALIGN(size); 451 - atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages); 452 - s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, 0); 453 - free_pages((unsigned long)vaddr, get_order(size)); 454 - } 455 - 456 - /* Map a segment into a contiguous dma address area */ 457 - static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg, 458 - size_t size, dma_addr_t *handle, 459 - enum dma_data_direction dir) 460 - { 461 - unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; 462 - struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); 463 - dma_addr_t dma_addr_base, dma_addr; 464 - int flags = ZPCI_PTE_VALID; 465 - struct scatterlist *s; 466 - phys_addr_t pa = 0; 467 - int ret; 468 - 469 - dma_addr_base = dma_alloc_address(dev, nr_pages); 470 - if (dma_addr_base == DMA_MAPPING_ERROR) 471 - return -ENOMEM; 472 - 473 - dma_addr = dma_addr_base; 474 - if (dir == DMA_NONE || dir == DMA_TO_DEVICE) 475 - flags |= ZPCI_TABLE_PROTECTED; 476 - 477 - for (s = sg; dma_addr < dma_addr_base + size; s = sg_next(s)) { 478 - pa = page_to_phys(sg_page(s)); 479 - ret = __dma_update_trans(zdev, pa, dma_addr, 480 - s->offset + s->length, flags); 481 - if (ret) 482 - goto unmap; 483 - 484 - dma_addr += s->offset + s->length; 485 - } 486 - ret = __dma_purge_tlb(zdev, dma_addr_base, size, flags); 487 - if (ret) 488 - goto unmap; 489 - 490 - *handle = dma_addr_base; 491 - atomic64_add(nr_pages, &zdev->mapped_pages); 492 - 493 - return ret; 494 - 495 - unmap: 496 - dma_update_trans(zdev, 0, dma_addr_base, dma_addr - dma_addr_base, 497 - ZPCI_PTE_INVALID); 498 - dma_free_address(dev, dma_addr_base, nr_pages); 499 - zpci_err("map error:\n"); 500 - zpci_err_dma(ret, pa); 501 - return ret; 502 - } 503 - 504 - static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg, 505 - int nr_elements, enum dma_data_direction dir, 506 - unsigned long attrs) 507 - { 508 - struct scatterlist *s = sg, *start = sg, *dma = sg; 509 - unsigned int max = dma_get_max_seg_size(dev); 510 - unsigned int size = s->offset + s->length; 511 - unsigned int offset = s->offset; 512 - int count = 0, i, ret; 513 - 514 - for (i = 1; i < nr_elements; i++) { 515 - s = sg_next(s); 516 - 517 - s->dma_length = 0; 518 - 519 - if (s->offset || (size & ~PAGE_MASK) || 520 - size + s->length > max) { 521 - ret = __s390_dma_map_sg(dev, start, size, 522 - &dma->dma_address, dir); 523 - if (ret) 524 - goto unmap; 525 - 526 - dma->dma_address += offset; 527 - dma->dma_length = size - offset; 528 - 529 - size = offset = s->offset; 530 - start = s; 531 - dma = sg_next(dma); 532 - count++; 533 - } 534 - size += s->length; 535 - } 536 - ret = __s390_dma_map_sg(dev, start, size, &dma->dma_address, dir); 537 - if (ret) 538 - goto unmap; 539 - 540 - dma->dma_address += offset; 541 - dma->dma_length = size - offset; 542 - 543 - return count + 1; 544 - unmap: 545 - for_each_sg(sg, s, count, i) 546 - s390_dma_unmap_pages(dev, sg_dma_address(s), sg_dma_len(s), 547 - dir, attrs); 548 - 549 - return ret; 550 - } 551 - 552 - static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg, 553 - int nr_elements, enum dma_data_direction dir, 554 - unsigned long attrs) 555 - { 556 - struct scatterlist *s; 557 - int i; 558 - 559 - for_each_sg(sg, s, nr_elements, i) { 560 - if (s->dma_length) 561 - s390_dma_unmap_pages(dev, s->dma_address, s->dma_length, 562 - dir, attrs); 563 - s->dma_address = 0; 564 - s->dma_length = 0; 565 - } 566 - } 567 - 568 - int zpci_dma_init_device(struct zpci_dev *zdev) 569 - { 570 - u8 status; 571 - int rc; 572 - 573 - /* 574 - * At this point, if the device is part of an IOMMU domain, this would 575 - * be a strong hint towards a bug in the IOMMU API (common) code and/or 576 - * simultaneous access via IOMMU and DMA API. So let's issue a warning. 577 - */ 578 - WARN_ON(zdev->s390_domain); 579 - 580 - spin_lock_init(&zdev->iommu_bitmap_lock); 581 - 582 - zdev->dma_table = dma_alloc_cpu_table(GFP_KERNEL); 583 - if (!zdev->dma_table) { 584 - rc = -ENOMEM; 585 - goto out; 586 - } 587 - 588 - /* 589 - * Restrict the iommu bitmap size to the minimum of the following: 590 - * - s390_iommu_aperture which defaults to high_memory 591 - * - 3-level pagetable address limit minus start_dma offset 592 - * - DMA address range allowed by the hardware (clp query pci fn) 593 - * 594 - * Also set zdev->end_dma to the actual end address of the usable 595 - * range, instead of the theoretical maximum as reported by hardware. 596 - * 597 - * This limits the number of concurrently usable DMA mappings since 598 - * for each DMA mapped memory address we need a DMA address including 599 - * extra DMA addresses for multiple mappings of the same memory address. 600 - */ 601 - zdev->start_dma = PAGE_ALIGN(zdev->start_dma); 602 - zdev->iommu_size = min3(s390_iommu_aperture, 603 - ZPCI_TABLE_SIZE_RT - zdev->start_dma, 604 - zdev->end_dma - zdev->start_dma + 1); 605 - zdev->end_dma = zdev->start_dma + zdev->iommu_size - 1; 606 - zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT; 607 - zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8); 608 - if (!zdev->iommu_bitmap) { 609 - rc = -ENOMEM; 610 - goto free_dma_table; 611 - } 612 - if (!s390_iommu_strict) { 613 - zdev->lazy_bitmap = vzalloc(zdev->iommu_pages / 8); 614 - if (!zdev->lazy_bitmap) { 615 - rc = -ENOMEM; 616 - goto free_bitmap; 617 - } 618 - 619 - } 620 - if (zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, 621 - virt_to_phys(zdev->dma_table), &status)) { 622 - rc = -EIO; 623 - goto free_bitmap; 624 - } 625 - 626 - return 0; 627 - free_bitmap: 628 - vfree(zdev->iommu_bitmap); 629 - zdev->iommu_bitmap = NULL; 630 - vfree(zdev->lazy_bitmap); 631 - zdev->lazy_bitmap = NULL; 632 - free_dma_table: 633 - dma_free_cpu_table(zdev->dma_table); 634 - zdev->dma_table = NULL; 635 - out: 636 - return rc; 637 - } 638 - 639 - int zpci_dma_exit_device(struct zpci_dev *zdev) 640 - { 641 - int cc = 0; 642 - 643 - /* 644 - * At this point, if the device is part of an IOMMU domain, this would 645 - * be a strong hint towards a bug in the IOMMU API (common) code and/or 646 - * simultaneous access via IOMMU and DMA API. So let's issue a warning. 647 - */ 648 - WARN_ON(zdev->s390_domain); 649 - if (zdev_enabled(zdev)) 650 - cc = zpci_unregister_ioat(zdev, 0); 651 - /* 652 - * cc == 3 indicates the function is gone already. This can happen 653 - * if the function was deconfigured/disabled suddenly and we have not 654 - * received a new handle yet. 655 - */ 656 - if (cc && cc != 3) 657 - return -EIO; 658 - 659 - dma_cleanup_tables(zdev->dma_table); 660 - zdev->dma_table = NULL; 661 - vfree(zdev->iommu_bitmap); 662 - zdev->iommu_bitmap = NULL; 663 - vfree(zdev->lazy_bitmap); 664 - zdev->lazy_bitmap = NULL; 665 - zdev->next_bit = 0; 666 - return 0; 667 - } 668 - 669 - static int __init dma_alloc_cpu_table_caches(void) 670 - { 671 - dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables", 672 - ZPCI_TABLE_SIZE, ZPCI_TABLE_ALIGN, 673 - 0, NULL); 674 - if (!dma_region_table_cache) 675 - return -ENOMEM; 676 - 677 - dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables", 678 - ZPCI_PT_SIZE, ZPCI_PT_ALIGN, 679 - 0, NULL); 680 - if (!dma_page_table_cache) { 681 - kmem_cache_destroy(dma_region_table_cache); 682 - return -ENOMEM; 683 - } 684 - return 0; 685 - } 686 - 687 - int __init zpci_dma_init(void) 688 - { 689 - s390_iommu_aperture = (u64)virt_to_phys(high_memory); 690 - if (!s390_iommu_aperture_factor) 691 - s390_iommu_aperture = ULONG_MAX; 692 - else 693 - s390_iommu_aperture *= s390_iommu_aperture_factor; 694 - 695 - return dma_alloc_cpu_table_caches(); 696 - } 697 - 698 - void zpci_dma_exit(void) 699 - { 700 - kmem_cache_destroy(dma_page_table_cache); 701 - kmem_cache_destroy(dma_region_table_cache); 702 - } 703 - 704 - const struct dma_map_ops s390_pci_dma_ops = { 705 - .alloc = s390_dma_alloc, 706 - .free = s390_dma_free, 707 - .map_sg = s390_dma_map_sg, 708 - .unmap_sg = s390_dma_unmap_sg, 709 - .map_page = s390_dma_map_pages, 710 - .unmap_page = s390_dma_unmap_pages, 711 - .mmap = dma_common_mmap, 712 - .get_sgtable = dma_common_get_sgtable, 713 - .alloc_pages = dma_common_alloc_pages, 714 - .free_pages = dma_common_free_pages, 715 - /* dma_supported is unconditionally true without a callback */ 716 - }; 717 - EXPORT_SYMBOL_GPL(s390_pci_dma_ops); 718 - 719 - static int __init s390_iommu_setup(char *str) 720 - { 721 - if (!strcmp(str, "strict")) 722 - s390_iommu_strict = 1; 723 - return 1; 724 - } 725 - 726 - __setup("s390_iommu=", s390_iommu_setup); 727 - 728 - static int __init s390_iommu_aperture_setup(char *str) 729 - { 730 - if (kstrtou32(str, 10, &s390_iommu_aperture_factor)) 731 - s390_iommu_aperture_factor = 1; 732 - return 1; 733 - } 734 - 735 - __setup("s390_iommu_aperture=", s390_iommu_aperture_setup);
-2
arch/s390/pci/pci_event.c
··· 313 313 /* Even though the device is already gone we still 314 314 * need to free zPCI resources as part of the disable. 315 315 */ 316 - if (zdev->dma_table) 317 - zpci_dma_exit_device(zdev); 318 316 if (zdev_enabled(zdev)) 319 317 zpci_disable_device(zdev); 320 318 zdev->state = ZPCI_FN_STATE_STANDBY;
+8 -11
arch/s390/pci/pci_sysfs.c
··· 56 56 struct pci_dev *pdev = to_pci_dev(dev); 57 57 struct zpci_dev *zdev = to_zpci(pdev); 58 58 int ret = 0; 59 + u8 status; 59 60 60 61 /* Can't use device_remove_self() here as that would lead us to lock 61 62 * the pci_rescan_remove_lock while holding the device' kernfs lock. ··· 83 82 pci_lock_rescan_remove(); 84 83 if (pci_dev_is_added(pdev)) { 85 84 pci_stop_and_remove_bus_device(pdev); 86 - if (zdev->dma_table) { 87 - ret = zpci_dma_exit_device(zdev); 88 - if (ret) 89 - goto out; 90 - } 91 - 92 85 if (zdev_enabled(zdev)) { 93 86 ret = zpci_disable_device(zdev); 94 87 /* ··· 100 105 ret = zpci_enable_device(zdev); 101 106 if (ret) 102 107 goto out; 103 - ret = zpci_dma_init_device(zdev); 104 - if (ret) { 105 - zpci_disable_device(zdev); 106 - goto out; 108 + 109 + if (zdev->dma_table) { 110 + ret = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, 111 + virt_to_phys(zdev->dma_table), &status); 112 + if (ret) 113 + zpci_disable_device(zdev); 107 114 } 108 - pci_rescan_bus(zdev->zbus->bus); 109 115 } 110 116 out: 117 + pci_rescan_bus(zdev->zbus->bus); 111 118 pci_unlock_rescan_remove(); 112 119 if (kn) 113 120 sysfs_unbreak_active_protection(kn);
+2 -2
drivers/iommu/Kconfig
··· 91 91 choice 92 92 prompt "IOMMU default domain type" 93 93 depends on IOMMU_API 94 - default IOMMU_DEFAULT_DMA_LAZY if X86 || IA64 94 + default IOMMU_DEFAULT_DMA_LAZY if X86 || IA64 || S390 95 95 default IOMMU_DEFAULT_DMA_STRICT 96 96 help 97 97 Choose the type of IOMMU domain used to manage DMA API usage by ··· 146 146 147 147 # IOMMU-agnostic DMA-mapping layer 148 148 config IOMMU_DMA 149 - def_bool ARM64 || IA64 || X86 149 + def_bool ARM64 || IA64 || X86 || S390 150 150 select DMA_OPS 151 151 select IOMMU_API 152 152 select IOMMU_IOVA
+359 -41
drivers/iommu/s390-iommu.c
··· 14 14 #include <linux/rcupdate.h> 15 15 #include <asm/pci_dma.h> 16 16 17 + #include "dma-iommu.h" 18 + 17 19 static const struct iommu_ops s390_iommu_ops; 20 + 21 + static struct kmem_cache *dma_region_table_cache; 22 + static struct kmem_cache *dma_page_table_cache; 23 + 24 + static u64 s390_iommu_aperture; 25 + static u32 s390_iommu_aperture_factor = 1; 18 26 19 27 struct s390_domain { 20 28 struct iommu_domain domain; 21 29 struct list_head devices; 30 + struct zpci_iommu_ctrs ctrs; 22 31 unsigned long *dma_table; 23 32 spinlock_t list_lock; 24 33 struct rcu_head rcu; 25 34 }; 35 + 36 + static inline unsigned int calc_rtx(dma_addr_t ptr) 37 + { 38 + return ((unsigned long)ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK; 39 + } 40 + 41 + static inline unsigned int calc_sx(dma_addr_t ptr) 42 + { 43 + return ((unsigned long)ptr >> ZPCI_ST_SHIFT) & ZPCI_INDEX_MASK; 44 + } 45 + 46 + static inline unsigned int calc_px(dma_addr_t ptr) 47 + { 48 + return ((unsigned long)ptr >> PAGE_SHIFT) & ZPCI_PT_MASK; 49 + } 50 + 51 + static inline void set_pt_pfaa(unsigned long *entry, phys_addr_t pfaa) 52 + { 53 + *entry &= ZPCI_PTE_FLAG_MASK; 54 + *entry |= (pfaa & ZPCI_PTE_ADDR_MASK); 55 + } 56 + 57 + static inline void set_rt_sto(unsigned long *entry, phys_addr_t sto) 58 + { 59 + *entry &= ZPCI_RTE_FLAG_MASK; 60 + *entry |= (sto & ZPCI_RTE_ADDR_MASK); 61 + *entry |= ZPCI_TABLE_TYPE_RTX; 62 + } 63 + 64 + static inline void set_st_pto(unsigned long *entry, phys_addr_t pto) 65 + { 66 + *entry &= ZPCI_STE_FLAG_MASK; 67 + *entry |= (pto & ZPCI_STE_ADDR_MASK); 68 + *entry |= ZPCI_TABLE_TYPE_SX; 69 + } 70 + 71 + static inline void validate_rt_entry(unsigned long *entry) 72 + { 73 + *entry &= ~ZPCI_TABLE_VALID_MASK; 74 + *entry &= ~ZPCI_TABLE_OFFSET_MASK; 75 + *entry |= ZPCI_TABLE_VALID; 76 + *entry |= ZPCI_TABLE_LEN_RTX; 77 + } 78 + 79 + static inline void validate_st_entry(unsigned long *entry) 80 + { 81 + *entry &= ~ZPCI_TABLE_VALID_MASK; 82 + *entry |= ZPCI_TABLE_VALID; 83 + } 84 + 85 + static inline void invalidate_pt_entry(unsigned long *entry) 86 + { 87 + WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_INVALID); 88 + *entry &= ~ZPCI_PTE_VALID_MASK; 89 + *entry |= ZPCI_PTE_INVALID; 90 + } 91 + 92 + static inline void validate_pt_entry(unsigned long *entry) 93 + { 94 + WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID); 95 + *entry &= ~ZPCI_PTE_VALID_MASK; 96 + *entry |= ZPCI_PTE_VALID; 97 + } 98 + 99 + static inline void entry_set_protected(unsigned long *entry) 100 + { 101 + *entry &= ~ZPCI_TABLE_PROT_MASK; 102 + *entry |= ZPCI_TABLE_PROTECTED; 103 + } 104 + 105 + static inline void entry_clr_protected(unsigned long *entry) 106 + { 107 + *entry &= ~ZPCI_TABLE_PROT_MASK; 108 + *entry |= ZPCI_TABLE_UNPROTECTED; 109 + } 110 + 111 + static inline int reg_entry_isvalid(unsigned long entry) 112 + { 113 + return (entry & ZPCI_TABLE_VALID_MASK) == ZPCI_TABLE_VALID; 114 + } 115 + 116 + static inline int pt_entry_isvalid(unsigned long entry) 117 + { 118 + return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID; 119 + } 120 + 121 + static inline unsigned long *get_rt_sto(unsigned long entry) 122 + { 123 + if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX) 124 + return phys_to_virt(entry & ZPCI_RTE_ADDR_MASK); 125 + else 126 + return NULL; 127 + } 128 + 129 + static inline unsigned long *get_st_pto(unsigned long entry) 130 + { 131 + if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_SX) 132 + return phys_to_virt(entry & ZPCI_STE_ADDR_MASK); 133 + else 134 + return NULL; 135 + } 136 + 137 + static int __init dma_alloc_cpu_table_caches(void) 138 + { 139 + dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables", 140 + ZPCI_TABLE_SIZE, 141 + ZPCI_TABLE_ALIGN, 142 + 0, NULL); 143 + if (!dma_region_table_cache) 144 + return -ENOMEM; 145 + 146 + dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables", 147 + ZPCI_PT_SIZE, 148 + ZPCI_PT_ALIGN, 149 + 0, NULL); 150 + if (!dma_page_table_cache) { 151 + kmem_cache_destroy(dma_region_table_cache); 152 + return -ENOMEM; 153 + } 154 + return 0; 155 + } 156 + 157 + static unsigned long *dma_alloc_cpu_table(gfp_t gfp) 158 + { 159 + unsigned long *table, *entry; 160 + 161 + table = kmem_cache_alloc(dma_region_table_cache, gfp); 162 + if (!table) 163 + return NULL; 164 + 165 + for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++) 166 + *entry = ZPCI_TABLE_INVALID; 167 + return table; 168 + } 169 + 170 + static void dma_free_cpu_table(void *table) 171 + { 172 + kmem_cache_free(dma_region_table_cache, table); 173 + } 174 + 175 + static void dma_free_page_table(void *table) 176 + { 177 + kmem_cache_free(dma_page_table_cache, table); 178 + } 179 + 180 + static void dma_free_seg_table(unsigned long entry) 181 + { 182 + unsigned long *sto = get_rt_sto(entry); 183 + int sx; 184 + 185 + for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++) 186 + if (reg_entry_isvalid(sto[sx])) 187 + dma_free_page_table(get_st_pto(sto[sx])); 188 + 189 + dma_free_cpu_table(sto); 190 + } 191 + 192 + static void dma_cleanup_tables(unsigned long *table) 193 + { 194 + int rtx; 195 + 196 + if (!table) 197 + return; 198 + 199 + for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++) 200 + if (reg_entry_isvalid(table[rtx])) 201 + dma_free_seg_table(table[rtx]); 202 + 203 + dma_free_cpu_table(table); 204 + } 205 + 206 + static unsigned long *dma_alloc_page_table(gfp_t gfp) 207 + { 208 + unsigned long *table, *entry; 209 + 210 + table = kmem_cache_alloc(dma_page_table_cache, gfp); 211 + if (!table) 212 + return NULL; 213 + 214 + for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++) 215 + *entry = ZPCI_PTE_INVALID; 216 + return table; 217 + } 218 + 219 + static unsigned long *dma_get_seg_table_origin(unsigned long *rtep, gfp_t gfp) 220 + { 221 + unsigned long old_rte, rte; 222 + unsigned long *sto; 223 + 224 + rte = READ_ONCE(*rtep); 225 + if (reg_entry_isvalid(rte)) { 226 + sto = get_rt_sto(rte); 227 + } else { 228 + sto = dma_alloc_cpu_table(gfp); 229 + if (!sto) 230 + return NULL; 231 + 232 + set_rt_sto(&rte, virt_to_phys(sto)); 233 + validate_rt_entry(&rte); 234 + entry_clr_protected(&rte); 235 + 236 + old_rte = cmpxchg(rtep, ZPCI_TABLE_INVALID, rte); 237 + if (old_rte != ZPCI_TABLE_INVALID) { 238 + /* Somone else was faster, use theirs */ 239 + dma_free_cpu_table(sto); 240 + sto = get_rt_sto(old_rte); 241 + } 242 + } 243 + return sto; 244 + } 245 + 246 + static unsigned long *dma_get_page_table_origin(unsigned long *step, gfp_t gfp) 247 + { 248 + unsigned long old_ste, ste; 249 + unsigned long *pto; 250 + 251 + ste = READ_ONCE(*step); 252 + if (reg_entry_isvalid(ste)) { 253 + pto = get_st_pto(ste); 254 + } else { 255 + pto = dma_alloc_page_table(gfp); 256 + if (!pto) 257 + return NULL; 258 + set_st_pto(&ste, virt_to_phys(pto)); 259 + validate_st_entry(&ste); 260 + entry_clr_protected(&ste); 261 + 262 + old_ste = cmpxchg(step, ZPCI_TABLE_INVALID, ste); 263 + if (old_ste != ZPCI_TABLE_INVALID) { 264 + /* Somone else was faster, use theirs */ 265 + dma_free_page_table(pto); 266 + pto = get_st_pto(old_ste); 267 + } 268 + } 269 + return pto; 270 + } 271 + 272 + static unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr, gfp_t gfp) 273 + { 274 + unsigned long *sto, *pto; 275 + unsigned int rtx, sx, px; 276 + 277 + rtx = calc_rtx(dma_addr); 278 + sto = dma_get_seg_table_origin(&rto[rtx], gfp); 279 + if (!sto) 280 + return NULL; 281 + 282 + sx = calc_sx(dma_addr); 283 + pto = dma_get_page_table_origin(&sto[sx], gfp); 284 + if (!pto) 285 + return NULL; 286 + 287 + px = calc_px(dma_addr); 288 + return &pto[px]; 289 + } 290 + 291 + static void dma_update_cpu_trans(unsigned long *ptep, phys_addr_t page_addr, int flags) 292 + { 293 + unsigned long pte; 294 + 295 + pte = READ_ONCE(*ptep); 296 + if (flags & ZPCI_PTE_INVALID) { 297 + invalidate_pt_entry(&pte); 298 + } else { 299 + set_pt_pfaa(&pte, page_addr); 300 + validate_pt_entry(&pte); 301 + } 302 + 303 + if (flags & ZPCI_TABLE_PROTECTED) 304 + entry_set_protected(&pte); 305 + else 306 + entry_clr_protected(&pte); 307 + 308 + xchg(ptep, pte); 309 + } 26 310 27 311 static struct s390_domain *to_s390_domain(struct iommu_domain *dom) 28 312 { ··· 317 33 { 318 34 switch (cap) { 319 35 case IOMMU_CAP_CACHE_COHERENCY: 36 + return true; 37 + case IOMMU_CAP_DEFERRED_FLUSH: 320 38 return true; 321 39 default: 322 40 return false; ··· 367 81 call_rcu(&s390_domain->rcu, s390_iommu_rcu_free_domain); 368 82 } 369 83 370 - static void __s390_iommu_detach_device(struct zpci_dev *zdev) 84 + static void s390_iommu_detach_device(struct iommu_domain *domain, 85 + struct device *dev) 371 86 { 372 - struct s390_domain *s390_domain = zdev->s390_domain; 87 + struct s390_domain *s390_domain = to_s390_domain(domain); 88 + struct zpci_dev *zdev = to_zpci_dev(dev); 373 89 unsigned long flags; 374 - 375 - if (!s390_domain) 376 - return; 377 90 378 91 spin_lock_irqsave(&s390_domain->list_lock, flags); 379 92 list_del_rcu(&zdev->iommu_list); ··· 400 115 return -EINVAL; 401 116 402 117 if (zdev->s390_domain) 403 - __s390_iommu_detach_device(zdev); 404 - else if (zdev->dma_table) 405 - zpci_dma_exit_device(zdev); 118 + s390_iommu_detach_device(&zdev->s390_domain->domain, dev); 406 119 407 120 cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, 408 121 virt_to_phys(s390_domain->dma_table), &status); ··· 410 127 */ 411 128 if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL) 412 129 return -EIO; 413 - zdev->dma_table = s390_domain->dma_table; 414 130 415 131 zdev->dma_table = s390_domain->dma_table; 416 132 zdev->s390_domain = s390_domain; ··· 420 138 421 139 return 0; 422 140 } 423 - 424 - /* 425 - * Switch control over the IOMMU to S390's internal dma_api ops 426 - */ 427 - static int s390_iommu_platform_attach(struct iommu_domain *platform_domain, 428 - struct device *dev) 429 - { 430 - struct zpci_dev *zdev = to_zpci_dev(dev); 431 - 432 - if (!zdev->s390_domain) 433 - return 0; 434 - 435 - __s390_iommu_detach_device(zdev); 436 - zpci_dma_init_device(zdev); 437 - return 0; 438 - } 439 - 440 - static struct iommu_domain_ops s390_iommu_platform_ops = { 441 - .attach_dev = s390_iommu_platform_attach, 442 - }; 443 - 444 - static struct iommu_domain s390_iommu_platform_domain = { 445 - .type = IOMMU_DOMAIN_PLATFORM, 446 - .ops = &s390_iommu_platform_ops, 447 - }; 448 141 449 142 static void s390_iommu_get_resv_regions(struct device *dev, 450 143 struct list_head *list) ··· 473 216 * to the device, but keep it attached to other devices in the group. 474 217 */ 475 218 if (zdev) 476 - __s390_iommu_detach_device(zdev); 219 + s390_iommu_detach_device(&zdev->s390_domain->domain, dev); 477 220 } 478 221 479 222 static int zpci_refresh_all(struct zpci_dev *zdev) ··· 489 232 490 233 rcu_read_lock(); 491 234 list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) { 235 + atomic64_inc(&s390_domain->ctrs.global_rpcits); 492 236 zpci_refresh_all(zdev); 493 237 } 494 238 rcu_read_unlock(); ··· 508 250 509 251 rcu_read_lock(); 510 252 list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) { 253 + atomic64_inc(&s390_domain->ctrs.sync_rpcits); 511 254 zpci_refresh_trans((u64)zdev->fh << 32, gather->start, 512 255 size); 513 256 } ··· 526 267 list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) { 527 268 if (!zdev->tlb_refresh) 528 269 continue; 270 + atomic64_inc(&s390_domain->ctrs.sync_map_rpcits); 529 271 ret = zpci_refresh_trans((u64)zdev->fh << 32, 530 272 iova, size); 531 273 /* ··· 621 361 if (!IS_ALIGNED(iova | paddr, pgsize)) 622 362 return -EINVAL; 623 363 624 - if (!(prot & IOMMU_READ)) 625 - return -EINVAL; 626 - 627 364 if (!(prot & IOMMU_WRITE)) 628 365 flags |= ZPCI_TABLE_PROTECTED; 629 366 630 367 rc = s390_iommu_validate_trans(s390_domain, paddr, iova, 631 - pgcount, flags, gfp); 632 - if (!rc) 368 + pgcount, flags, gfp); 369 + if (!rc) { 633 370 *mapped = size; 371 + atomic64_add(pgcount, &s390_domain->ctrs.mapped_pages); 372 + } 634 373 635 374 return rc; 636 375 } ··· 685 426 return 0; 686 427 687 428 iommu_iotlb_gather_add_range(gather, iova, size); 429 + atomic64_add(pgcount, &s390_domain->ctrs.unmapped_pages); 688 430 689 431 return size; 690 432 } 691 433 434 + static void s390_iommu_probe_finalize(struct device *dev) 435 + { 436 + iommu_setup_dma_ops(dev, 0, U64_MAX); 437 + } 438 + 439 + struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev) 440 + { 441 + if (!zdev || !zdev->s390_domain) 442 + return NULL; 443 + return &zdev->s390_domain->ctrs; 444 + } 445 + 692 446 int zpci_init_iommu(struct zpci_dev *zdev) 693 447 { 448 + u64 aperture_size; 694 449 int rc = 0; 695 450 696 451 rc = iommu_device_sysfs_add(&zdev->iommu_dev, NULL, NULL, ··· 715 442 rc = iommu_device_register(&zdev->iommu_dev, &s390_iommu_ops, NULL); 716 443 if (rc) 717 444 goto out_sysfs; 445 + 446 + zdev->start_dma = PAGE_ALIGN(zdev->start_dma); 447 + aperture_size = min3(s390_iommu_aperture, 448 + ZPCI_TABLE_SIZE_RT - zdev->start_dma, 449 + zdev->end_dma - zdev->start_dma + 1); 450 + zdev->end_dma = zdev->start_dma + aperture_size - 1; 718 451 719 452 return 0; 720 453 ··· 737 458 iommu_device_sysfs_remove(&zdev->iommu_dev); 738 459 } 739 460 461 + static int __init s390_iommu_setup(char *str) 462 + { 463 + if (!strcmp(str, "strict")) { 464 + pr_warn("s390_iommu=strict deprecated; use iommu.strict=1 instead\n"); 465 + iommu_set_dma_strict(); 466 + } 467 + return 1; 468 + } 469 + 470 + __setup("s390_iommu=", s390_iommu_setup); 471 + 472 + static int __init s390_iommu_aperture_setup(char *str) 473 + { 474 + if (kstrtou32(str, 10, &s390_iommu_aperture_factor)) 475 + s390_iommu_aperture_factor = 1; 476 + return 1; 477 + } 478 + 479 + __setup("s390_iommu_aperture=", s390_iommu_aperture_setup); 480 + 481 + static int __init s390_iommu_init(void) 482 + { 483 + int rc; 484 + 485 + iommu_dma_forcedac = true; 486 + s390_iommu_aperture = (u64)virt_to_phys(high_memory); 487 + if (!s390_iommu_aperture_factor) 488 + s390_iommu_aperture = ULONG_MAX; 489 + else 490 + s390_iommu_aperture *= s390_iommu_aperture_factor; 491 + 492 + rc = dma_alloc_cpu_table_caches(); 493 + if (rc) 494 + return rc; 495 + 496 + return rc; 497 + } 498 + subsys_initcall(s390_iommu_init); 499 + 740 500 static const struct iommu_ops s390_iommu_ops = { 741 - .default_domain = &s390_iommu_platform_domain, 742 501 .capable = s390_iommu_capable, 743 502 .domain_alloc_paging = s390_domain_alloc_paging, 744 503 .probe_device = s390_iommu_probe_device, 504 + .probe_finalize = s390_iommu_probe_finalize, 745 505 .release_device = s390_iommu_release_device, 746 506 .device_group = generic_device_group, 747 507 .pgsize_bitmap = SZ_4K,