Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'dma-mapping-5.20-2022-08-06' of git://git.infradead.org/users/hch/dma-mapping

Pull dma-mapping updates from Christoph Hellwig:

- convert arm32 to the common dma-direct code (Arnd Bergmann, Robin
Murphy, Christoph Hellwig)

- restructure the PCIe peer to peer mapping support (Logan Gunthorpe)

- allow the IOMMU code to communicate an optional DMA mapping length
and use that in scsi and libata (John Garry)

- split the global swiotlb lock (Tianyu Lan)

- various fixes and cleanup (Chao Gao, Dan Carpenter, Dongli Zhang,
Lukas Bulwahn, Robin Murphy)

* tag 'dma-mapping-5.20-2022-08-06' of git://git.infradead.org/users/hch/dma-mapping: (45 commits)
swiotlb: fix passing local variable to debugfs_create_ulong()
dma-mapping: reformat comment to suppress htmldoc warning
PCI/P2PDMA: Remove pci_p2pdma_[un]map_sg()
RDMA/rw: drop pci_p2pdma_[un]map_sg()
RDMA/core: introduce ib_dma_pci_p2p_dma_supported()
nvme-pci: convert to using dma_map_sgtable()
nvme-pci: check DMA ops when indicating support for PCI P2PDMA
iommu/dma: support PCI P2PDMA pages in dma-iommu map_sg
iommu: Explicitly skip bus address marked segments in __iommu_map_sg()
dma-mapping: add flags to dma_map_ops to indicate PCI P2PDMA support
dma-direct: support PCI P2PDMA pages in dma-direct map_sg
dma-mapping: allow EREMOTEIO return code for P2PDMA transfers
PCI/P2PDMA: Introduce helpers for dma_map_sg implementations
PCI/P2PDMA: Attempt to set map_type if it has not been set
lib/scatterlist: add flag for indicating P2PDMA segments in an SGL
swiotlb: clean up some coding style and minor issues
dma-mapping: update comment after dmabounce removal
scsi: sd: Add a comment about limiting max_sectors to shost optimal limit
ata: libata-scsi: cap ata_device->max_sectors according to shost->max_sectors
scsi: scsi_transport_sas: cap shost opt_sectors according to DMA optimal limit
...

+839 -1700
+4 -1
Documentation/admin-guide/kernel-parameters.txt
··· 5999 5999 it if 0 is given (See Documentation/admin-guide/cgroup-v1/memory.rst) 6000 6000 6001 6001 swiotlb= [ARM,IA-64,PPC,MIPS,X86] 6002 - Format: { <int> | force | noforce } 6002 + Format: { <int> [,<int>] | force | noforce } 6003 6003 <int> -- Number of I/O TLB slabs 6004 + <int> -- Second integer after comma. Number of swiotlb 6005 + areas with their own lock. Will be rounded up 6006 + to a power of 2. 6004 6007 force -- force using of bounce buffers even if they 6005 6008 wouldn't be automatically used by the kernel 6006 6009 noforce -- Never use bounce buffers (for debugging)
+14
Documentation/core-api/dma-api.rst
··· 206 206 207 207 :: 208 208 209 + size_t 210 + dma_opt_mapping_size(struct device *dev); 211 + 212 + Returns the maximum optimal size of a mapping for the device. 213 + 214 + Mapping larger buffers may take much longer in certain scenarios. In 215 + addition, for high-rate short-lived streaming mappings, the upfront time 216 + spent on the mapping may account for an appreciable part of the total 217 + request lifetime. As such, if splitting larger requests incurs no 218 + significant performance penalty, then device drivers are advised to 219 + limit total DMA streaming mappings length to the returned value. 220 + 221 + :: 222 + 209 223 bool 210 224 dma_need_sync(struct device *dev, dma_addr_t dma_addr); 211 225
+5 -3
Documentation/x86/x86_64/boot-options.rst
··· 287 287 iommu options only relevant to the software bounce buffering (SWIOTLB) IOMMU 288 288 implementation: 289 289 290 - swiotlb=<pages>[,force] 291 - <pages> 292 - Prereserve that many 128K pages for the software IO bounce buffering. 290 + swiotlb=<slots>[,force,noforce] 291 + <slots> 292 + Prereserve that many 2K slots for the software IO bounce buffering. 293 293 force 294 294 Force all IO through the software TLB. 295 + noforce 296 + Do not initialize the software TLB. 295 297 296 298 297 299 Miscellaneous
+2 -3
arch/arm/Kconfig
··· 15 15 select ARCH_HAS_MEMBARRIER_SYNC_CORE 16 16 select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE 17 17 select ARCH_HAS_PTE_SPECIAL if ARM_LPAE 18 - select ARCH_HAS_PHYS_TO_DMA 19 18 select ARCH_HAS_SETUP_DMA_OPS 20 19 select ARCH_HAS_SET_MEMORY 21 20 select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL 22 21 select ARCH_HAS_STRICT_MODULE_RWX if MMU 23 - select ARCH_HAS_SYNC_DMA_FOR_DEVICE if SWIOTLB || !MMU 24 - select ARCH_HAS_SYNC_DMA_FOR_CPU if SWIOTLB || !MMU 22 + select ARCH_HAS_SYNC_DMA_FOR_DEVICE 23 + select ARCH_HAS_SYNC_DMA_FOR_CPU 25 24 select ARCH_HAS_TEARDOWN_DMA_OPS if MMU 26 25 select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST 27 26 select ARCH_HAVE_CUSTOM_GPIO_H
+1 -5
arch/arm/common/Kconfig
··· 1 1 # SPDX-License-Identifier: GPL-2.0 2 2 config SA1111 3 3 bool 4 - select DMABOUNCE if !ARCH_PXA 5 - 6 - config DMABOUNCE 7 - bool 8 - select ZONE_DMA 4 + select ZONE_DMA if ARCH_SA1100 9 5 10 6 config KRAIT_L2_ACCESSORS 11 7 bool
-1
arch/arm/common/Makefile
··· 6 6 obj-y += firmware.o 7 7 8 8 obj-$(CONFIG_SA1111) += sa1111.o 9 - obj-$(CONFIG_DMABOUNCE) += dmabounce.o 10 9 obj-$(CONFIG_KRAIT_L2_ACCESSORS) += krait-l2-accessors.o 11 10 obj-$(CONFIG_SHARP_LOCOMO) += locomo.o 12 11 obj-$(CONFIG_SHARP_PARAM) += sharpsl_param.o
-582
arch/arm/common/dmabounce.c
··· 1 - // SPDX-License-Identifier: GPL-2.0-only 2 - /* 3 - * arch/arm/common/dmabounce.c 4 - * 5 - * Special dma_{map/unmap/dma_sync}_* routines for systems that have 6 - * limited DMA windows. These functions utilize bounce buffers to 7 - * copy data to/from buffers located outside the DMA region. This 8 - * only works for systems in which DMA memory is at the bottom of 9 - * RAM, the remainder of memory is at the top and the DMA memory 10 - * can be marked as ZONE_DMA. Anything beyond that such as discontiguous 11 - * DMA windows will require custom implementations that reserve memory 12 - * areas at early bootup. 13 - * 14 - * Original version by Brad Parker (brad@heeltoe.com) 15 - * Re-written by Christopher Hoover <ch@murgatroid.com> 16 - * Made generic by Deepak Saxena <dsaxena@plexity.net> 17 - * 18 - * Copyright (C) 2002 Hewlett Packard Company. 19 - * Copyright (C) 2004 MontaVista Software, Inc. 20 - */ 21 - 22 - #include <linux/module.h> 23 - #include <linux/init.h> 24 - #include <linux/slab.h> 25 - #include <linux/page-flags.h> 26 - #include <linux/device.h> 27 - #include <linux/dma-direct.h> 28 - #include <linux/dma-map-ops.h> 29 - #include <linux/dmapool.h> 30 - #include <linux/list.h> 31 - #include <linux/scatterlist.h> 32 - 33 - #include <asm/cacheflush.h> 34 - #include <asm/dma-iommu.h> 35 - 36 - #undef STATS 37 - 38 - #ifdef STATS 39 - #define DO_STATS(X) do { X ; } while (0) 40 - #else 41 - #define DO_STATS(X) do { } while (0) 42 - #endif 43 - 44 - /* ************************************************** */ 45 - 46 - struct safe_buffer { 47 - struct list_head node; 48 - 49 - /* original request */ 50 - void *ptr; 51 - size_t size; 52 - int direction; 53 - 54 - /* safe buffer info */ 55 - struct dmabounce_pool *pool; 56 - void *safe; 57 - dma_addr_t safe_dma_addr; 58 - }; 59 - 60 - struct dmabounce_pool { 61 - unsigned long size; 62 - struct dma_pool *pool; 63 - #ifdef STATS 64 - unsigned long allocs; 65 - #endif 66 - }; 67 - 68 - struct dmabounce_device_info { 69 - struct device *dev; 70 - struct list_head safe_buffers; 71 - #ifdef STATS 72 - unsigned long total_allocs; 73 - unsigned long map_op_count; 74 - unsigned long bounce_count; 75 - int attr_res; 76 - #endif 77 - struct dmabounce_pool small; 78 - struct dmabounce_pool large; 79 - 80 - rwlock_t lock; 81 - 82 - int (*needs_bounce)(struct device *, dma_addr_t, size_t); 83 - }; 84 - 85 - #ifdef STATS 86 - static ssize_t dmabounce_show(struct device *dev, struct device_attribute *attr, 87 - char *buf) 88 - { 89 - struct dmabounce_device_info *device_info = dev->archdata.dmabounce; 90 - return sprintf(buf, "%lu %lu %lu %lu %lu %lu\n", 91 - device_info->small.allocs, 92 - device_info->large.allocs, 93 - device_info->total_allocs - device_info->small.allocs - 94 - device_info->large.allocs, 95 - device_info->total_allocs, 96 - device_info->map_op_count, 97 - device_info->bounce_count); 98 - } 99 - 100 - static DEVICE_ATTR(dmabounce_stats, 0400, dmabounce_show, NULL); 101 - #endif 102 - 103 - 104 - /* allocate a 'safe' buffer and keep track of it */ 105 - static inline struct safe_buffer * 106 - alloc_safe_buffer(struct dmabounce_device_info *device_info, void *ptr, 107 - size_t size, enum dma_data_direction dir) 108 - { 109 - struct safe_buffer *buf; 110 - struct dmabounce_pool *pool; 111 - struct device *dev = device_info->dev; 112 - unsigned long flags; 113 - 114 - dev_dbg(dev, "%s(ptr=%p, size=%d, dir=%d)\n", 115 - __func__, ptr, size, dir); 116 - 117 - if (size <= device_info->small.size) { 118 - pool = &device_info->small; 119 - } else if (size <= device_info->large.size) { 120 - pool = &device_info->large; 121 - } else { 122 - pool = NULL; 123 - } 124 - 125 - buf = kmalloc(sizeof(struct safe_buffer), GFP_ATOMIC); 126 - if (buf == NULL) { 127 - dev_warn(dev, "%s: kmalloc failed\n", __func__); 128 - return NULL; 129 - } 130 - 131 - buf->ptr = ptr; 132 - buf->size = size; 133 - buf->direction = dir; 134 - buf->pool = pool; 135 - 136 - if (pool) { 137 - buf->safe = dma_pool_alloc(pool->pool, GFP_ATOMIC, 138 - &buf->safe_dma_addr); 139 - } else { 140 - buf->safe = dma_alloc_coherent(dev, size, &buf->safe_dma_addr, 141 - GFP_ATOMIC); 142 - } 143 - 144 - if (buf->safe == NULL) { 145 - dev_warn(dev, 146 - "%s: could not alloc dma memory (size=%d)\n", 147 - __func__, size); 148 - kfree(buf); 149 - return NULL; 150 - } 151 - 152 - #ifdef STATS 153 - if (pool) 154 - pool->allocs++; 155 - device_info->total_allocs++; 156 - #endif 157 - 158 - write_lock_irqsave(&device_info->lock, flags); 159 - list_add(&buf->node, &device_info->safe_buffers); 160 - write_unlock_irqrestore(&device_info->lock, flags); 161 - 162 - return buf; 163 - } 164 - 165 - /* determine if a buffer is from our "safe" pool */ 166 - static inline struct safe_buffer * 167 - find_safe_buffer(struct dmabounce_device_info *device_info, dma_addr_t safe_dma_addr) 168 - { 169 - struct safe_buffer *b, *rb = NULL; 170 - unsigned long flags; 171 - 172 - read_lock_irqsave(&device_info->lock, flags); 173 - 174 - list_for_each_entry(b, &device_info->safe_buffers, node) 175 - if (b->safe_dma_addr <= safe_dma_addr && 176 - b->safe_dma_addr + b->size > safe_dma_addr) { 177 - rb = b; 178 - break; 179 - } 180 - 181 - read_unlock_irqrestore(&device_info->lock, flags); 182 - return rb; 183 - } 184 - 185 - static inline void 186 - free_safe_buffer(struct dmabounce_device_info *device_info, struct safe_buffer *buf) 187 - { 188 - unsigned long flags; 189 - 190 - dev_dbg(device_info->dev, "%s(buf=%p)\n", __func__, buf); 191 - 192 - write_lock_irqsave(&device_info->lock, flags); 193 - 194 - list_del(&buf->node); 195 - 196 - write_unlock_irqrestore(&device_info->lock, flags); 197 - 198 - if (buf->pool) 199 - dma_pool_free(buf->pool->pool, buf->safe, buf->safe_dma_addr); 200 - else 201 - dma_free_coherent(device_info->dev, buf->size, buf->safe, 202 - buf->safe_dma_addr); 203 - 204 - kfree(buf); 205 - } 206 - 207 - /* ************************************************** */ 208 - 209 - static struct safe_buffer *find_safe_buffer_dev(struct device *dev, 210 - dma_addr_t dma_addr, const char *where) 211 - { 212 - if (!dev || !dev->archdata.dmabounce) 213 - return NULL; 214 - if (dma_mapping_error(dev, dma_addr)) { 215 - dev_err(dev, "Trying to %s invalid mapping\n", where); 216 - return NULL; 217 - } 218 - return find_safe_buffer(dev->archdata.dmabounce, dma_addr); 219 - } 220 - 221 - static int needs_bounce(struct device *dev, dma_addr_t dma_addr, size_t size) 222 - { 223 - if (!dev || !dev->archdata.dmabounce) 224 - return 0; 225 - 226 - if (dev->dma_mask) { 227 - unsigned long limit, mask = *dev->dma_mask; 228 - 229 - limit = (mask + 1) & ~mask; 230 - if (limit && size > limit) { 231 - dev_err(dev, "DMA mapping too big (requested %#x " 232 - "mask %#Lx)\n", size, *dev->dma_mask); 233 - return -E2BIG; 234 - } 235 - 236 - /* Figure out if we need to bounce from the DMA mask. */ 237 - if ((dma_addr | (dma_addr + size - 1)) & ~mask) 238 - return 1; 239 - } 240 - 241 - return !!dev->archdata.dmabounce->needs_bounce(dev, dma_addr, size); 242 - } 243 - 244 - static inline dma_addr_t map_single(struct device *dev, void *ptr, size_t size, 245 - enum dma_data_direction dir, 246 - unsigned long attrs) 247 - { 248 - struct dmabounce_device_info *device_info = dev->archdata.dmabounce; 249 - struct safe_buffer *buf; 250 - 251 - if (device_info) 252 - DO_STATS ( device_info->map_op_count++ ); 253 - 254 - buf = alloc_safe_buffer(device_info, ptr, size, dir); 255 - if (buf == NULL) { 256 - dev_err(dev, "%s: unable to map unsafe buffer %p!\n", 257 - __func__, ptr); 258 - return DMA_MAPPING_ERROR; 259 - } 260 - 261 - dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n", 262 - __func__, buf->ptr, virt_to_dma(dev, buf->ptr), 263 - buf->safe, buf->safe_dma_addr); 264 - 265 - if ((dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) && 266 - !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) { 267 - dev_dbg(dev, "%s: copy unsafe %p to safe %p, size %d\n", 268 - __func__, ptr, buf->safe, size); 269 - memcpy(buf->safe, ptr, size); 270 - } 271 - 272 - return buf->safe_dma_addr; 273 - } 274 - 275 - static inline void unmap_single(struct device *dev, struct safe_buffer *buf, 276 - size_t size, enum dma_data_direction dir, 277 - unsigned long attrs) 278 - { 279 - BUG_ON(buf->size != size); 280 - BUG_ON(buf->direction != dir); 281 - 282 - dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n", 283 - __func__, buf->ptr, virt_to_dma(dev, buf->ptr), 284 - buf->safe, buf->safe_dma_addr); 285 - 286 - DO_STATS(dev->archdata.dmabounce->bounce_count++); 287 - 288 - if ((dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) && 289 - !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) { 290 - void *ptr = buf->ptr; 291 - 292 - dev_dbg(dev, "%s: copy back safe %p to unsafe %p size %d\n", 293 - __func__, buf->safe, ptr, size); 294 - memcpy(ptr, buf->safe, size); 295 - 296 - /* 297 - * Since we may have written to a page cache page, 298 - * we need to ensure that the data will be coherent 299 - * with user mappings. 300 - */ 301 - __cpuc_flush_dcache_area(ptr, size); 302 - } 303 - free_safe_buffer(dev->archdata.dmabounce, buf); 304 - } 305 - 306 - /* ************************************************** */ 307 - 308 - /* 309 - * see if a buffer address is in an 'unsafe' range. if it is 310 - * allocate a 'safe' buffer and copy the unsafe buffer into it. 311 - * substitute the safe buffer for the unsafe one. 312 - * (basically move the buffer from an unsafe area to a safe one) 313 - */ 314 - static dma_addr_t dmabounce_map_page(struct device *dev, struct page *page, 315 - unsigned long offset, size_t size, enum dma_data_direction dir, 316 - unsigned long attrs) 317 - { 318 - dma_addr_t dma_addr; 319 - int ret; 320 - 321 - dev_dbg(dev, "%s(page=%p,off=%#lx,size=%zx,dir=%x)\n", 322 - __func__, page, offset, size, dir); 323 - 324 - dma_addr = pfn_to_dma(dev, page_to_pfn(page)) + offset; 325 - 326 - ret = needs_bounce(dev, dma_addr, size); 327 - if (ret < 0) 328 - return DMA_MAPPING_ERROR; 329 - 330 - if (ret == 0) { 331 - arm_dma_ops.sync_single_for_device(dev, dma_addr, size, dir); 332 - return dma_addr; 333 - } 334 - 335 - if (PageHighMem(page)) { 336 - dev_err(dev, "DMA buffer bouncing of HIGHMEM pages is not supported\n"); 337 - return DMA_MAPPING_ERROR; 338 - } 339 - 340 - return map_single(dev, page_address(page) + offset, size, dir, attrs); 341 - } 342 - 343 - /* 344 - * see if a mapped address was really a "safe" buffer and if so, copy 345 - * the data from the safe buffer back to the unsafe buffer and free up 346 - * the safe buffer. (basically return things back to the way they 347 - * should be) 348 - */ 349 - static void dmabounce_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, 350 - enum dma_data_direction dir, unsigned long attrs) 351 - { 352 - struct safe_buffer *buf; 353 - 354 - dev_dbg(dev, "%s(dma=%#x,size=%d,dir=%x)\n", 355 - __func__, dma_addr, size, dir); 356 - 357 - buf = find_safe_buffer_dev(dev, dma_addr, __func__); 358 - if (!buf) { 359 - arm_dma_ops.sync_single_for_cpu(dev, dma_addr, size, dir); 360 - return; 361 - } 362 - 363 - unmap_single(dev, buf, size, dir, attrs); 364 - } 365 - 366 - static int __dmabounce_sync_for_cpu(struct device *dev, dma_addr_t addr, 367 - size_t sz, enum dma_data_direction dir) 368 - { 369 - struct safe_buffer *buf; 370 - unsigned long off; 371 - 372 - dev_dbg(dev, "%s(dma=%#x,sz=%zx,dir=%x)\n", 373 - __func__, addr, sz, dir); 374 - 375 - buf = find_safe_buffer_dev(dev, addr, __func__); 376 - if (!buf) 377 - return 1; 378 - 379 - off = addr - buf->safe_dma_addr; 380 - 381 - BUG_ON(buf->direction != dir); 382 - 383 - dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x off=%#lx) mapped to %p (dma=%#x)\n", 384 - __func__, buf->ptr, virt_to_dma(dev, buf->ptr), off, 385 - buf->safe, buf->safe_dma_addr); 386 - 387 - DO_STATS(dev->archdata.dmabounce->bounce_count++); 388 - 389 - if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) { 390 - dev_dbg(dev, "%s: copy back safe %p to unsafe %p size %d\n", 391 - __func__, buf->safe + off, buf->ptr + off, sz); 392 - memcpy(buf->ptr + off, buf->safe + off, sz); 393 - } 394 - return 0; 395 - } 396 - 397 - static void dmabounce_sync_for_cpu(struct device *dev, 398 - dma_addr_t handle, size_t size, enum dma_data_direction dir) 399 - { 400 - if (!__dmabounce_sync_for_cpu(dev, handle, size, dir)) 401 - return; 402 - 403 - arm_dma_ops.sync_single_for_cpu(dev, handle, size, dir); 404 - } 405 - 406 - static int __dmabounce_sync_for_device(struct device *dev, dma_addr_t addr, 407 - size_t sz, enum dma_data_direction dir) 408 - { 409 - struct safe_buffer *buf; 410 - unsigned long off; 411 - 412 - dev_dbg(dev, "%s(dma=%#x,sz=%zx,dir=%x)\n", 413 - __func__, addr, sz, dir); 414 - 415 - buf = find_safe_buffer_dev(dev, addr, __func__); 416 - if (!buf) 417 - return 1; 418 - 419 - off = addr - buf->safe_dma_addr; 420 - 421 - BUG_ON(buf->direction != dir); 422 - 423 - dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x off=%#lx) mapped to %p (dma=%#x)\n", 424 - __func__, buf->ptr, virt_to_dma(dev, buf->ptr), off, 425 - buf->safe, buf->safe_dma_addr); 426 - 427 - DO_STATS(dev->archdata.dmabounce->bounce_count++); 428 - 429 - if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) { 430 - dev_dbg(dev, "%s: copy out unsafe %p to safe %p, size %d\n", 431 - __func__,buf->ptr + off, buf->safe + off, sz); 432 - memcpy(buf->safe + off, buf->ptr + off, sz); 433 - } 434 - return 0; 435 - } 436 - 437 - static void dmabounce_sync_for_device(struct device *dev, 438 - dma_addr_t handle, size_t size, enum dma_data_direction dir) 439 - { 440 - if (!__dmabounce_sync_for_device(dev, handle, size, dir)) 441 - return; 442 - 443 - arm_dma_ops.sync_single_for_device(dev, handle, size, dir); 444 - } 445 - 446 - static int dmabounce_dma_supported(struct device *dev, u64 dma_mask) 447 - { 448 - if (dev->archdata.dmabounce) 449 - return 0; 450 - 451 - return arm_dma_ops.dma_supported(dev, dma_mask); 452 - } 453 - 454 - static const struct dma_map_ops dmabounce_ops = { 455 - .alloc = arm_dma_alloc, 456 - .free = arm_dma_free, 457 - .mmap = arm_dma_mmap, 458 - .get_sgtable = arm_dma_get_sgtable, 459 - .map_page = dmabounce_map_page, 460 - .unmap_page = dmabounce_unmap_page, 461 - .sync_single_for_cpu = dmabounce_sync_for_cpu, 462 - .sync_single_for_device = dmabounce_sync_for_device, 463 - .map_sg = arm_dma_map_sg, 464 - .unmap_sg = arm_dma_unmap_sg, 465 - .sync_sg_for_cpu = arm_dma_sync_sg_for_cpu, 466 - .sync_sg_for_device = arm_dma_sync_sg_for_device, 467 - .dma_supported = dmabounce_dma_supported, 468 - }; 469 - 470 - static int dmabounce_init_pool(struct dmabounce_pool *pool, struct device *dev, 471 - const char *name, unsigned long size) 472 - { 473 - pool->size = size; 474 - DO_STATS(pool->allocs = 0); 475 - pool->pool = dma_pool_create(name, dev, size, 476 - 0 /* byte alignment */, 477 - 0 /* no page-crossing issues */); 478 - 479 - return pool->pool ? 0 : -ENOMEM; 480 - } 481 - 482 - int dmabounce_register_dev(struct device *dev, unsigned long small_buffer_size, 483 - unsigned long large_buffer_size, 484 - int (*needs_bounce_fn)(struct device *, dma_addr_t, size_t)) 485 - { 486 - struct dmabounce_device_info *device_info; 487 - int ret; 488 - 489 - device_info = kmalloc(sizeof(struct dmabounce_device_info), GFP_ATOMIC); 490 - if (!device_info) { 491 - dev_err(dev, 492 - "Could not allocated dmabounce_device_info\n"); 493 - return -ENOMEM; 494 - } 495 - 496 - ret = dmabounce_init_pool(&device_info->small, dev, 497 - "small_dmabounce_pool", small_buffer_size); 498 - if (ret) { 499 - dev_err(dev, 500 - "dmabounce: could not allocate DMA pool for %ld byte objects\n", 501 - small_buffer_size); 502 - goto err_free; 503 - } 504 - 505 - if (large_buffer_size) { 506 - ret = dmabounce_init_pool(&device_info->large, dev, 507 - "large_dmabounce_pool", 508 - large_buffer_size); 509 - if (ret) { 510 - dev_err(dev, 511 - "dmabounce: could not allocate DMA pool for %ld byte objects\n", 512 - large_buffer_size); 513 - goto err_destroy; 514 - } 515 - } 516 - 517 - device_info->dev = dev; 518 - INIT_LIST_HEAD(&device_info->safe_buffers); 519 - rwlock_init(&device_info->lock); 520 - device_info->needs_bounce = needs_bounce_fn; 521 - 522 - #ifdef STATS 523 - device_info->total_allocs = 0; 524 - device_info->map_op_count = 0; 525 - device_info->bounce_count = 0; 526 - device_info->attr_res = device_create_file(dev, &dev_attr_dmabounce_stats); 527 - #endif 528 - 529 - dev->archdata.dmabounce = device_info; 530 - set_dma_ops(dev, &dmabounce_ops); 531 - 532 - dev_info(dev, "dmabounce: registered device\n"); 533 - 534 - return 0; 535 - 536 - err_destroy: 537 - dma_pool_destroy(device_info->small.pool); 538 - err_free: 539 - kfree(device_info); 540 - return ret; 541 - } 542 - EXPORT_SYMBOL(dmabounce_register_dev); 543 - 544 - void dmabounce_unregister_dev(struct device *dev) 545 - { 546 - struct dmabounce_device_info *device_info = dev->archdata.dmabounce; 547 - 548 - dev->archdata.dmabounce = NULL; 549 - set_dma_ops(dev, NULL); 550 - 551 - if (!device_info) { 552 - dev_warn(dev, 553 - "Never registered with dmabounce but attempting" 554 - "to unregister!\n"); 555 - return; 556 - } 557 - 558 - if (!list_empty(&device_info->safe_buffers)) { 559 - dev_err(dev, 560 - "Removing from dmabounce with pending buffers!\n"); 561 - BUG(); 562 - } 563 - 564 - if (device_info->small.pool) 565 - dma_pool_destroy(device_info->small.pool); 566 - if (device_info->large.pool) 567 - dma_pool_destroy(device_info->large.pool); 568 - 569 - #ifdef STATS 570 - if (device_info->attr_res == 0) 571 - device_remove_file(dev, &dev_attr_dmabounce_stats); 572 - #endif 573 - 574 - kfree(device_info); 575 - 576 - dev_info(dev, "dmabounce: device unregistered\n"); 577 - } 578 - EXPORT_SYMBOL(dmabounce_unregister_dev); 579 - 580 - MODULE_AUTHOR("Christopher Hoover <ch@hpl.hp.com>, Deepak Saxena <dsaxena@plexity.net>"); 581 - MODULE_DESCRIPTION("Special dma_{map/unmap/dma_sync}_* routines for systems with limited DMA windows"); 582 - MODULE_LICENSE("GPL");
-64
arch/arm/common/sa1111.c
··· 1389 1389 } 1390 1390 EXPORT_SYMBOL(sa1111_driver_unregister); 1391 1391 1392 - #ifdef CONFIG_DMABOUNCE 1393 - /* 1394 - * According to the "Intel StrongARM SA-1111 Microprocessor Companion 1395 - * Chip Specification Update" (June 2000), erratum #7, there is a 1396 - * significant bug in the SA1111 SDRAM shared memory controller. If 1397 - * an access to a region of memory above 1MB relative to the bank base, 1398 - * it is important that address bit 10 _NOT_ be asserted. Depending 1399 - * on the configuration of the RAM, bit 10 may correspond to one 1400 - * of several different (processor-relative) address bits. 1401 - * 1402 - * This routine only identifies whether or not a given DMA address 1403 - * is susceptible to the bug. 1404 - * 1405 - * This should only get called for sa1111_device types due to the 1406 - * way we configure our device dma_masks. 1407 - */ 1408 - static int sa1111_needs_bounce(struct device *dev, dma_addr_t addr, size_t size) 1409 - { 1410 - /* 1411 - * Section 4.6 of the "Intel StrongARM SA-1111 Development Module 1412 - * User's Guide" mentions that jumpers R51 and R52 control the 1413 - * target of SA-1111 DMA (either SDRAM bank 0 on Assabet, or 1414 - * SDRAM bank 1 on Neponset). The default configuration selects 1415 - * Assabet, so any address in bank 1 is necessarily invalid. 1416 - */ 1417 - return (machine_is_assabet() || machine_is_pfs168()) && 1418 - (addr >= 0xc8000000 || (addr + size) >= 0xc8000000); 1419 - } 1420 - 1421 - static int sa1111_notifier_call(struct notifier_block *n, unsigned long action, 1422 - void *data) 1423 - { 1424 - struct sa1111_dev *dev = to_sa1111_device(data); 1425 - 1426 - switch (action) { 1427 - case BUS_NOTIFY_ADD_DEVICE: 1428 - if (dev->dev.dma_mask && dev->dma_mask < 0xffffffffUL) { 1429 - int ret = dmabounce_register_dev(&dev->dev, 1024, 4096, 1430 - sa1111_needs_bounce); 1431 - if (ret) 1432 - dev_err(&dev->dev, "failed to register with dmabounce: %d\n", ret); 1433 - } 1434 - break; 1435 - 1436 - case BUS_NOTIFY_DEL_DEVICE: 1437 - if (dev->dev.dma_mask && dev->dma_mask < 0xffffffffUL) 1438 - dmabounce_unregister_dev(&dev->dev); 1439 - break; 1440 - } 1441 - return NOTIFY_OK; 1442 - } 1443 - 1444 - static struct notifier_block sa1111_bus_notifier = { 1445 - .notifier_call = sa1111_notifier_call, 1446 - }; 1447 - #endif 1448 - 1449 1392 static int __init sa1111_init(void) 1450 1393 { 1451 1394 int ret = bus_register(&sa1111_bus_type); 1452 - #ifdef CONFIG_DMABOUNCE 1453 - if (ret == 0) 1454 - bus_register_notifier(&sa1111_bus_type, &sa1111_bus_notifier); 1455 - #endif 1456 1395 if (ret == 0) 1457 1396 platform_driver_register(&sa1111_device_driver); 1458 1397 return ret; ··· 1400 1461 static void __exit sa1111_exit(void) 1401 1462 { 1402 1463 platform_driver_unregister(&sa1111_device_driver); 1403 - #ifdef CONFIG_DMABOUNCE 1404 - bus_unregister_notifier(&sa1111_bus_type, &sa1111_bus_notifier); 1405 - #endif 1406 1464 bus_unregister(&sa1111_bus_type); 1407 1465 } 1408 1466
-3
arch/arm/include/asm/device.h
··· 6 6 #define ASMARM_DEVICE_H 7 7 8 8 struct dev_archdata { 9 - #ifdef CONFIG_DMABOUNCE 10 - struct dmabounce_device_info *dmabounce; 11 - #endif 12 9 #ifdef CONFIG_ARM_DMA_USE_IOMMU 13 10 struct dma_iommu_mapping *mapping; 14 11 #endif
+1 -48
arch/arm/include/asm/dma-direct.h
··· 1 - /* SPDX-License-Identifier: GPL-2.0 */ 2 - #ifndef ASM_ARM_DMA_DIRECT_H 3 - #define ASM_ARM_DMA_DIRECT_H 1 4 - 5 - #include <asm/memory.h> 6 - 7 - /* 8 - * dma_to_pfn/pfn_to_dma/virt_to_dma are architecture private 9 - * functions used internally by the DMA-mapping API to provide DMA 10 - * addresses. They must not be used by drivers. 11 - */ 12 - static inline dma_addr_t pfn_to_dma(struct device *dev, unsigned long pfn) 13 - { 14 - if (dev && dev->dma_range_map) 15 - pfn = PFN_DOWN(translate_phys_to_dma(dev, PFN_PHYS(pfn))); 16 - return (dma_addr_t)__pfn_to_bus(pfn); 17 - } 18 - 19 - static inline unsigned long dma_to_pfn(struct device *dev, dma_addr_t addr) 20 - { 21 - unsigned long pfn = __bus_to_pfn(addr); 22 - 23 - if (dev && dev->dma_range_map) 24 - pfn = PFN_DOWN(translate_dma_to_phys(dev, PFN_PHYS(pfn))); 25 - return pfn; 26 - } 27 - 28 - static inline dma_addr_t virt_to_dma(struct device *dev, void *addr) 29 - { 30 - if (dev) 31 - return pfn_to_dma(dev, virt_to_pfn(addr)); 32 - 33 - return (dma_addr_t)__virt_to_bus((unsigned long)(addr)); 34 - } 35 - 36 - static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) 37 - { 38 - unsigned int offset = paddr & ~PAGE_MASK; 39 - return pfn_to_dma(dev, __phys_to_pfn(paddr)) + offset; 40 - } 41 - 42 - static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr) 43 - { 44 - unsigned int offset = dev_addr & ~PAGE_MASK; 45 - return __pfn_to_phys(dma_to_pfn(dev, dev_addr)) + offset; 46 - } 47 - 48 - #endif /* ASM_ARM_DMA_DIRECT_H */ 1 + #include <mach/dma-direct.h>
-128
arch/arm/include/asm/dma-mapping.h
··· 1 - /* SPDX-License-Identifier: GPL-2.0 */ 2 - #ifndef ASMARM_DMA_MAPPING_H 3 - #define ASMARM_DMA_MAPPING_H 4 - 5 - #ifdef __KERNEL__ 6 - 7 - #include <linux/mm_types.h> 8 - #include <linux/scatterlist.h> 9 - 10 - #include <xen/xen.h> 11 - #include <asm/xen/hypervisor.h> 12 - 13 - extern const struct dma_map_ops arm_dma_ops; 14 - extern const struct dma_map_ops arm_coherent_dma_ops; 15 - 16 - static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) 17 - { 18 - if (IS_ENABLED(CONFIG_MMU) && !IS_ENABLED(CONFIG_ARM_LPAE)) 19 - return &arm_dma_ops; 20 - return NULL; 21 - } 22 - 23 - /** 24 - * arm_dma_alloc - allocate consistent memory for DMA 25 - * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 26 - * @size: required memory size 27 - * @handle: bus-specific DMA address 28 - * @attrs: optinal attributes that specific mapping properties 29 - * 30 - * Allocate some memory for a device for performing DMA. This function 31 - * allocates pages, and will return the CPU-viewed address, and sets @handle 32 - * to be the device-viewed address. 33 - */ 34 - extern void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, 35 - gfp_t gfp, unsigned long attrs); 36 - 37 - /** 38 - * arm_dma_free - free memory allocated by arm_dma_alloc 39 - * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 40 - * @size: size of memory originally requested in dma_alloc_coherent 41 - * @cpu_addr: CPU-view address returned from dma_alloc_coherent 42 - * @handle: device-view address returned from dma_alloc_coherent 43 - * @attrs: optinal attributes that specific mapping properties 44 - * 45 - * Free (and unmap) a DMA buffer previously allocated by 46 - * arm_dma_alloc(). 47 - * 48 - * References to memory and mappings associated with cpu_addr/handle 49 - * during and after this call executing are illegal. 50 - */ 51 - extern void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, 52 - dma_addr_t handle, unsigned long attrs); 53 - 54 - /** 55 - * arm_dma_mmap - map a coherent DMA allocation into user space 56 - * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 57 - * @vma: vm_area_struct describing requested user mapping 58 - * @cpu_addr: kernel CPU-view address returned from dma_alloc_coherent 59 - * @handle: device-view address returned from dma_alloc_coherent 60 - * @size: size of memory originally requested in dma_alloc_coherent 61 - * @attrs: optinal attributes that specific mapping properties 62 - * 63 - * Map a coherent DMA buffer previously allocated by dma_alloc_coherent 64 - * into user space. The coherent DMA buffer must not be freed by the 65 - * driver until the user space mapping has been released. 66 - */ 67 - extern int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, 68 - void *cpu_addr, dma_addr_t dma_addr, size_t size, 69 - unsigned long attrs); 70 - 71 - /* 72 - * For SA-1111, IXP425, and ADI systems the dma-mapping functions are "magic" 73 - * and utilize bounce buffers as needed to work around limited DMA windows. 74 - * 75 - * On the SA-1111, a bug limits DMA to only certain regions of RAM. 76 - * On the IXP425, the PCI inbound window is 64MB (256MB total RAM) 77 - * On some ADI engineering systems, PCI inbound window is 32MB (12MB total RAM) 78 - * 79 - * The following are helper functions used by the dmabounce subystem 80 - * 81 - */ 82 - 83 - /** 84 - * dmabounce_register_dev 85 - * 86 - * @dev: valid struct device pointer 87 - * @small_buf_size: size of buffers to use with small buffer pool 88 - * @large_buf_size: size of buffers to use with large buffer pool (can be 0) 89 - * @needs_bounce_fn: called to determine whether buffer needs bouncing 90 - * 91 - * This function should be called by low-level platform code to register 92 - * a device as requireing DMA buffer bouncing. The function will allocate 93 - * appropriate DMA pools for the device. 94 - */ 95 - extern int dmabounce_register_dev(struct device *, unsigned long, 96 - unsigned long, int (*)(struct device *, dma_addr_t, size_t)); 97 - 98 - /** 99 - * dmabounce_unregister_dev 100 - * 101 - * @dev: valid struct device pointer 102 - * 103 - * This function should be called by low-level platform code when device 104 - * that was previously registered with dmabounce_register_dev is removed 105 - * from the system. 106 - * 107 - */ 108 - extern void dmabounce_unregister_dev(struct device *); 109 - 110 - 111 - 112 - /* 113 - * The scatter list versions of the above methods. 114 - */ 115 - extern int arm_dma_map_sg(struct device *, struct scatterlist *, int, 116 - enum dma_data_direction, unsigned long attrs); 117 - extern void arm_dma_unmap_sg(struct device *, struct scatterlist *, int, 118 - enum dma_data_direction, unsigned long attrs); 119 - extern void arm_dma_sync_sg_for_cpu(struct device *, struct scatterlist *, int, 120 - enum dma_data_direction); 121 - extern void arm_dma_sync_sg_for_device(struct device *, struct scatterlist *, int, 122 - enum dma_data_direction); 123 - extern int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt, 124 - void *cpu_addr, dma_addr_t dma_addr, size_t size, 125 - unsigned long attrs); 126 - 127 - #endif /* __KERNEL__ */ 128 - #endif
-2
arch/arm/include/asm/memory.h
··· 378 378 #ifndef __virt_to_bus 379 379 #define __virt_to_bus __virt_to_phys 380 380 #define __bus_to_virt __phys_to_virt 381 - #define __pfn_to_bus(x) __pfn_to_phys(x) 382 - #define __bus_to_pfn(x) __phys_to_pfn(x) 383 381 #endif 384 382 385 383 /*
+1
arch/arm/mach-footbridge/Kconfig
··· 61 61 62 62 # Footbridge support 63 63 config FOOTBRIDGE 64 + select ARCH_HAS_PHYS_TO_DMA 64 65 bool 65 66 66 67 # Footbridge in host mode
+11 -8
arch/arm/mach-footbridge/common.c
··· 12 12 #include <linux/init.h> 13 13 #include <linux/io.h> 14 14 #include <linux/spinlock.h> 15 + #include <linux/dma-direct.h> 15 16 #include <video/vga.h> 16 17 17 18 #include <asm/page.h> ··· 336 335 return res; 337 336 } 338 337 EXPORT_SYMBOL(__bus_to_virt); 339 - 340 - unsigned long __pfn_to_bus(unsigned long pfn) 338 + #else 339 + static inline unsigned long fb_bus_sdram_offset(void) 341 340 { 342 - return __pfn_to_phys(pfn) + (fb_bus_sdram_offset() - PHYS_OFFSET); 341 + return BUS_OFFSET; 343 342 } 344 - EXPORT_SYMBOL(__pfn_to_bus); 343 + #endif /* CONFIG_FOOTBRIDGE_ADDIN */ 345 344 346 - unsigned long __bus_to_pfn(unsigned long bus) 345 + dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) 347 346 { 348 - return __phys_to_pfn(bus - (fb_bus_sdram_offset() - PHYS_OFFSET)); 347 + return paddr + (fb_bus_sdram_offset() - PHYS_OFFSET); 349 348 } 350 - EXPORT_SYMBOL(__bus_to_pfn); 351 349 352 - #endif 350 + phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr) 351 + { 352 + return dev_addr - (fb_bus_sdram_offset() - PHYS_OFFSET); 353 + }
+8
arch/arm/mach-footbridge/include/mach/dma-direct.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef MACH_FOOTBRIDGE_DMA_DIRECT_H 3 + #define MACH_FOOTBRIDGE_DMA_DIRECT_H 1 4 + 5 + dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); 6 + phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr); 7 + 8 + #endif /* MACH_FOOTBRIDGE_DMA_DIRECT_H */
-4
arch/arm/mach-footbridge/include/mach/memory.h
··· 26 26 #ifndef __ASSEMBLY__ 27 27 extern unsigned long __virt_to_bus(unsigned long); 28 28 extern unsigned long __bus_to_virt(unsigned long); 29 - extern unsigned long __pfn_to_bus(unsigned long); 30 - extern unsigned long __bus_to_pfn(unsigned long); 31 29 #endif 32 30 #define __virt_to_bus __virt_to_bus 33 31 #define __bus_to_virt __bus_to_virt ··· 40 42 #define BUS_OFFSET 0xe0000000 41 43 #define __virt_to_bus(x) ((x) + (BUS_OFFSET - PAGE_OFFSET)) 42 44 #define __bus_to_virt(x) ((x) - (BUS_OFFSET - PAGE_OFFSET)) 43 - #define __pfn_to_bus(x) (__pfn_to_phys(x) + (BUS_OFFSET - PHYS_OFFSET)) 44 - #define __bus_to_pfn(x) __phys_to_pfn((x) - (BUS_OFFSET - PHYS_OFFSET)) 45 45 46 46 #else 47 47
+1 -1
arch/arm/mach-highbank/highbank.c
··· 98 98 if (of_property_read_bool(dev->of_node, "dma-coherent")) { 99 99 val = readl(sregs_base + reg); 100 100 writel(val | 0xff01, sregs_base + reg); 101 - set_dma_ops(dev, &arm_coherent_dma_ops); 101 + dev->dma_coherent = true; 102 102 } 103 103 104 104 return NOTIFY_OK;
+1 -1
arch/arm/mach-mvebu/coherency.c
··· 95 95 96 96 if (event != BUS_NOTIFY_ADD_DEVICE) 97 97 return NOTIFY_DONE; 98 - set_dma_ops(dev, &arm_coherent_dma_ops); 98 + dev->dma_coherent = true; 99 99 100 100 return NOTIFY_OK; 101 101 }
+74 -580
arch/arm/mm/dma-mapping.c
··· 103 103 * before transfers and delay cache invalidation until transfer completion. 104 104 * 105 105 */ 106 - static void __dma_page_cpu_to_dev(struct page *, unsigned long, 107 - size_t, enum dma_data_direction); 108 - static void __dma_page_dev_to_cpu(struct page *, unsigned long, 109 - size_t, enum dma_data_direction); 110 - 111 - /** 112 - * arm_dma_map_page - map a portion of a page for streaming DMA 113 - * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 114 - * @page: page that buffer resides in 115 - * @offset: offset into page for start of buffer 116 - * @size: size of buffer to map 117 - * @dir: DMA transfer direction 118 - * 119 - * Ensure that any data held in the cache is appropriately discarded 120 - * or written back. 121 - * 122 - * The device owns this memory once this call has completed. The CPU 123 - * can regain ownership by calling dma_unmap_page(). 124 - */ 125 - static dma_addr_t arm_dma_map_page(struct device *dev, struct page *page, 126 - unsigned long offset, size_t size, enum dma_data_direction dir, 127 - unsigned long attrs) 128 - { 129 - if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 130 - __dma_page_cpu_to_dev(page, offset, size, dir); 131 - return pfn_to_dma(dev, page_to_pfn(page)) + offset; 132 - } 133 - 134 - static dma_addr_t arm_coherent_dma_map_page(struct device *dev, struct page *page, 135 - unsigned long offset, size_t size, enum dma_data_direction dir, 136 - unsigned long attrs) 137 - { 138 - return pfn_to_dma(dev, page_to_pfn(page)) + offset; 139 - } 140 - 141 - /** 142 - * arm_dma_unmap_page - unmap a buffer previously mapped through dma_map_page() 143 - * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 144 - * @handle: DMA address of buffer 145 - * @size: size of buffer (same as passed to dma_map_page) 146 - * @dir: DMA transfer direction (same as passed to dma_map_page) 147 - * 148 - * Unmap a page streaming mode DMA translation. The handle and size 149 - * must match what was provided in the previous dma_map_page() call. 150 - * All other usages are undefined. 151 - * 152 - * After this call, reads by the CPU to the buffer are guaranteed to see 153 - * whatever the device wrote there. 154 - */ 155 - static void arm_dma_unmap_page(struct device *dev, dma_addr_t handle, 156 - size_t size, enum dma_data_direction dir, unsigned long attrs) 157 - { 158 - if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 159 - __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)), 160 - handle & ~PAGE_MASK, size, dir); 161 - } 162 - 163 - static void arm_dma_sync_single_for_cpu(struct device *dev, 164 - dma_addr_t handle, size_t size, enum dma_data_direction dir) 165 - { 166 - unsigned int offset = handle & (PAGE_SIZE - 1); 167 - struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset)); 168 - __dma_page_dev_to_cpu(page, offset, size, dir); 169 - } 170 - 171 - static void arm_dma_sync_single_for_device(struct device *dev, 172 - dma_addr_t handle, size_t size, enum dma_data_direction dir) 173 - { 174 - unsigned int offset = handle & (PAGE_SIZE - 1); 175 - struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset)); 176 - __dma_page_cpu_to_dev(page, offset, size, dir); 177 - } 178 - 179 - /* 180 - * Return whether the given device DMA address mask can be supported 181 - * properly. For example, if your device can only drive the low 24-bits 182 - * during bus mastering, then you would pass 0x00ffffff as the mask 183 - * to this function. 184 - */ 185 - static int arm_dma_supported(struct device *dev, u64 mask) 186 - { 187 - unsigned long max_dma_pfn = min(max_pfn - 1, arm_dma_pfn_limit); 188 - 189 - /* 190 - * Translate the device's DMA mask to a PFN limit. This 191 - * PFN number includes the page which we can DMA to. 192 - */ 193 - return dma_to_pfn(dev, mask) >= max_dma_pfn; 194 - } 195 - 196 - const struct dma_map_ops arm_dma_ops = { 197 - .alloc = arm_dma_alloc, 198 - .free = arm_dma_free, 199 - .alloc_pages = dma_direct_alloc_pages, 200 - .free_pages = dma_direct_free_pages, 201 - .mmap = arm_dma_mmap, 202 - .get_sgtable = arm_dma_get_sgtable, 203 - .map_page = arm_dma_map_page, 204 - .unmap_page = arm_dma_unmap_page, 205 - .map_sg = arm_dma_map_sg, 206 - .unmap_sg = arm_dma_unmap_sg, 207 - .map_resource = dma_direct_map_resource, 208 - .sync_single_for_cpu = arm_dma_sync_single_for_cpu, 209 - .sync_single_for_device = arm_dma_sync_single_for_device, 210 - .sync_sg_for_cpu = arm_dma_sync_sg_for_cpu, 211 - .sync_sg_for_device = arm_dma_sync_sg_for_device, 212 - .dma_supported = arm_dma_supported, 213 - .get_required_mask = dma_direct_get_required_mask, 214 - }; 215 - EXPORT_SYMBOL(arm_dma_ops); 216 - 217 - static void *arm_coherent_dma_alloc(struct device *dev, size_t size, 218 - dma_addr_t *handle, gfp_t gfp, unsigned long attrs); 219 - static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr, 220 - dma_addr_t handle, unsigned long attrs); 221 - static int arm_coherent_dma_mmap(struct device *dev, struct vm_area_struct *vma, 222 - void *cpu_addr, dma_addr_t dma_addr, size_t size, 223 - unsigned long attrs); 224 - 225 - const struct dma_map_ops arm_coherent_dma_ops = { 226 - .alloc = arm_coherent_dma_alloc, 227 - .free = arm_coherent_dma_free, 228 - .alloc_pages = dma_direct_alloc_pages, 229 - .free_pages = dma_direct_free_pages, 230 - .mmap = arm_coherent_dma_mmap, 231 - .get_sgtable = arm_dma_get_sgtable, 232 - .map_page = arm_coherent_dma_map_page, 233 - .map_sg = arm_dma_map_sg, 234 - .map_resource = dma_direct_map_resource, 235 - .dma_supported = arm_dma_supported, 236 - .get_required_mask = dma_direct_get_required_mask, 237 - }; 238 - EXPORT_SYMBOL(arm_coherent_dma_ops); 239 106 240 107 static void __dma_clear_buffer(struct page *page, size_t size, int coherent_flag) 241 108 { ··· 592 725 if (page) { 593 726 unsigned long flags; 594 727 595 - *handle = pfn_to_dma(dev, page_to_pfn(page)); 728 + *handle = phys_to_dma(dev, page_to_phys(page)); 596 729 buf->virt = args.want_vaddr ? addr : page; 597 730 598 731 spin_lock_irqsave(&arm_dma_bufs_lock, flags); ··· 606 739 } 607 740 608 741 /* 609 - * Allocate DMA-coherent memory space and return both the kernel remapped 610 - * virtual and bus address for that space. 611 - */ 612 - void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, 613 - gfp_t gfp, unsigned long attrs) 614 - { 615 - pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL); 616 - 617 - return __dma_alloc(dev, size, handle, gfp, prot, false, 618 - attrs, __builtin_return_address(0)); 619 - } 620 - 621 - static void *arm_coherent_dma_alloc(struct device *dev, size_t size, 622 - dma_addr_t *handle, gfp_t gfp, unsigned long attrs) 623 - { 624 - return __dma_alloc(dev, size, handle, gfp, PAGE_KERNEL, true, 625 - attrs, __builtin_return_address(0)); 626 - } 627 - 628 - static int __arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, 629 - void *cpu_addr, dma_addr_t dma_addr, size_t size, 630 - unsigned long attrs) 631 - { 632 - int ret = -ENXIO; 633 - unsigned long nr_vma_pages = vma_pages(vma); 634 - unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; 635 - unsigned long pfn = dma_to_pfn(dev, dma_addr); 636 - unsigned long off = vma->vm_pgoff; 637 - 638 - if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) 639 - return ret; 640 - 641 - if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) { 642 - ret = remap_pfn_range(vma, vma->vm_start, 643 - pfn + off, 644 - vma->vm_end - vma->vm_start, 645 - vma->vm_page_prot); 646 - } 647 - 648 - return ret; 649 - } 650 - 651 - /* 652 - * Create userspace mapping for the DMA-coherent memory. 653 - */ 654 - static int arm_coherent_dma_mmap(struct device *dev, struct vm_area_struct *vma, 655 - void *cpu_addr, dma_addr_t dma_addr, size_t size, 656 - unsigned long attrs) 657 - { 658 - return __arm_dma_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); 659 - } 660 - 661 - int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, 662 - void *cpu_addr, dma_addr_t dma_addr, size_t size, 663 - unsigned long attrs) 664 - { 665 - vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); 666 - return __arm_dma_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); 667 - } 668 - 669 - /* 670 742 * Free a buffer as defined by the above mapping. 671 743 */ 672 744 static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr, 673 745 dma_addr_t handle, unsigned long attrs, 674 746 bool is_coherent) 675 747 { 676 - struct page *page = pfn_to_page(dma_to_pfn(dev, handle)); 748 + struct page *page = phys_to_page(dma_to_phys(dev, handle)); 677 749 struct arm_dma_buffer *buf; 678 750 struct arm_dma_free_args args = { 679 751 .dev = dev, ··· 628 822 629 823 buf->allocator->free(&args); 630 824 kfree(buf); 631 - } 632 - 633 - void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, 634 - dma_addr_t handle, unsigned long attrs) 635 - { 636 - __arm_dma_free(dev, size, cpu_addr, handle, attrs, false); 637 - } 638 - 639 - static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr, 640 - dma_addr_t handle, unsigned long attrs) 641 - { 642 - __arm_dma_free(dev, size, cpu_addr, handle, attrs, true); 643 - } 644 - 645 - int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt, 646 - void *cpu_addr, dma_addr_t handle, size_t size, 647 - unsigned long attrs) 648 - { 649 - unsigned long pfn = dma_to_pfn(dev, handle); 650 - struct page *page; 651 - int ret; 652 - 653 - /* If the PFN is not valid, we do not have a struct page */ 654 - if (!pfn_valid(pfn)) 655 - return -ENXIO; 656 - 657 - page = pfn_to_page(pfn); 658 - 659 - ret = sg_alloc_table(sgt, 1, GFP_KERNEL); 660 - if (unlikely(ret)) 661 - return ret; 662 - 663 - sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); 664 - return 0; 665 825 } 666 826 667 827 static void dma_cache_maint_page(struct page *page, unsigned long offset, ··· 679 907 680 908 /* 681 909 * Make an area consistent for devices. 682 - * Note: Drivers should NOT use this function directly, as it will break 683 - * platforms with CONFIG_DMABOUNCE. 910 + * Note: Drivers should NOT use this function directly. 684 911 * Use the driver DMA support - see dma-mapping.h (dma_sync_*) 685 912 */ 686 913 static void __dma_page_cpu_to_dev(struct page *page, unsigned long off, ··· 730 959 left -= PAGE_SIZE; 731 960 } 732 961 } 733 - } 734 - 735 - /** 736 - * arm_dma_map_sg - map a set of SG buffers for streaming mode DMA 737 - * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 738 - * @sg: list of buffers 739 - * @nents: number of buffers to map 740 - * @dir: DMA transfer direction 741 - * 742 - * Map a set of buffers described by scatterlist in streaming mode for DMA. 743 - * This is the scatter-gather version of the dma_map_single interface. 744 - * Here the scatter gather list elements are each tagged with the 745 - * appropriate dma address and length. They are obtained via 746 - * sg_dma_{address,length}. 747 - * 748 - * Device ownership issues as mentioned for dma_map_single are the same 749 - * here. 750 - */ 751 - int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, 752 - enum dma_data_direction dir, unsigned long attrs) 753 - { 754 - const struct dma_map_ops *ops = get_dma_ops(dev); 755 - struct scatterlist *s; 756 - int i, j, ret; 757 - 758 - for_each_sg(sg, s, nents, i) { 759 - #ifdef CONFIG_NEED_SG_DMA_LENGTH 760 - s->dma_length = s->length; 761 - #endif 762 - s->dma_address = ops->map_page(dev, sg_page(s), s->offset, 763 - s->length, dir, attrs); 764 - if (dma_mapping_error(dev, s->dma_address)) { 765 - ret = -EIO; 766 - goto bad_mapping; 767 - } 768 - } 769 - return nents; 770 - 771 - bad_mapping: 772 - for_each_sg(sg, s, i, j) 773 - ops->unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, attrs); 774 - return ret; 775 - } 776 - 777 - /** 778 - * arm_dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg 779 - * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 780 - * @sg: list of buffers 781 - * @nents: number of buffers to unmap (same as was passed to dma_map_sg) 782 - * @dir: DMA transfer direction (same as was passed to dma_map_sg) 783 - * 784 - * Unmap a set of streaming mode DMA translations. Again, CPU access 785 - * rules concerning calls here are the same as for dma_unmap_single(). 786 - */ 787 - void arm_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, 788 - enum dma_data_direction dir, unsigned long attrs) 789 - { 790 - const struct dma_map_ops *ops = get_dma_ops(dev); 791 - struct scatterlist *s; 792 - 793 - int i; 794 - 795 - for_each_sg(sg, s, nents, i) 796 - ops->unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, attrs); 797 - } 798 - 799 - /** 800 - * arm_dma_sync_sg_for_cpu 801 - * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 802 - * @sg: list of buffers 803 - * @nents: number of buffers to map (returned from dma_map_sg) 804 - * @dir: DMA transfer direction (same as was passed to dma_map_sg) 805 - */ 806 - void arm_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, 807 - int nents, enum dma_data_direction dir) 808 - { 809 - const struct dma_map_ops *ops = get_dma_ops(dev); 810 - struct scatterlist *s; 811 - int i; 812 - 813 - for_each_sg(sg, s, nents, i) 814 - ops->sync_single_for_cpu(dev, sg_dma_address(s), s->length, 815 - dir); 816 - } 817 - 818 - /** 819 - * arm_dma_sync_sg_for_device 820 - * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices 821 - * @sg: list of buffers 822 - * @nents: number of buffers to map (returned from dma_map_sg) 823 - * @dir: DMA transfer direction (same as was passed to dma_map_sg) 824 - */ 825 - void arm_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, 826 - int nents, enum dma_data_direction dir) 827 - { 828 - const struct dma_map_ops *ops = get_dma_ops(dev); 829 - struct scatterlist *s; 830 - int i; 831 - 832 - for_each_sg(sg, s, nents, i) 833 - ops->sync_single_for_device(dev, sg_dma_address(s), s->length, 834 - dir); 835 - } 836 - 837 - static const struct dma_map_ops *arm_get_dma_map_ops(bool coherent) 838 - { 839 - /* 840 - * When CONFIG_ARM_LPAE is set, physical address can extend above 841 - * 32-bits, which then can't be addressed by devices that only support 842 - * 32-bit DMA. 843 - * Use the generic dma-direct / swiotlb ops code in that case, as that 844 - * handles bounce buffering for us. 845 - */ 846 - if (IS_ENABLED(CONFIG_ARM_LPAE)) 847 - return NULL; 848 - return coherent ? &arm_coherent_dma_ops : &arm_dma_ops; 849 962 } 850 963 851 964 #ifdef CONFIG_ARM_DMA_USE_IOMMU ··· 1078 1423 __free_from_pool(cpu_addr, size); 1079 1424 } 1080 1425 1081 - static void *__arm_iommu_alloc_attrs(struct device *dev, size_t size, 1082 - dma_addr_t *handle, gfp_t gfp, unsigned long attrs, 1083 - int coherent_flag) 1426 + static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, 1427 + dma_addr_t *handle, gfp_t gfp, unsigned long attrs) 1084 1428 { 1085 1429 pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL); 1086 1430 struct page **pages; 1087 1431 void *addr = NULL; 1432 + int coherent_flag = dev->dma_coherent ? COHERENT : NORMAL; 1088 1433 1089 1434 *handle = DMA_MAPPING_ERROR; 1090 1435 size = PAGE_ALIGN(size); ··· 1127 1472 return NULL; 1128 1473 } 1129 1474 1130 - static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, 1131 - dma_addr_t *handle, gfp_t gfp, unsigned long attrs) 1132 - { 1133 - return __arm_iommu_alloc_attrs(dev, size, handle, gfp, attrs, NORMAL); 1134 - } 1135 - 1136 - static void *arm_coherent_iommu_alloc_attrs(struct device *dev, size_t size, 1137 - dma_addr_t *handle, gfp_t gfp, unsigned long attrs) 1138 - { 1139 - return __arm_iommu_alloc_attrs(dev, size, handle, gfp, attrs, COHERENT); 1140 - } 1141 - 1142 - static int __arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, 1475 + static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, 1143 1476 void *cpu_addr, dma_addr_t dma_addr, size_t size, 1144 1477 unsigned long attrs) 1145 1478 { ··· 1141 1498 if (vma->vm_pgoff >= nr_pages) 1142 1499 return -ENXIO; 1143 1500 1501 + if (!dev->dma_coherent) 1502 + vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); 1503 + 1144 1504 err = vm_map_pages(vma, pages, nr_pages); 1145 1505 if (err) 1146 1506 pr_err("Remapping memory failed: %d\n", err); 1147 1507 1148 1508 return err; 1149 1509 } 1150 - static int arm_iommu_mmap_attrs(struct device *dev, 1151 - struct vm_area_struct *vma, void *cpu_addr, 1152 - dma_addr_t dma_addr, size_t size, unsigned long attrs) 1153 - { 1154 - vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); 1155 - 1156 - return __arm_iommu_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, attrs); 1157 - } 1158 - 1159 - static int arm_coherent_iommu_mmap_attrs(struct device *dev, 1160 - struct vm_area_struct *vma, void *cpu_addr, 1161 - dma_addr_t dma_addr, size_t size, unsigned long attrs) 1162 - { 1163 - return __arm_iommu_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, attrs); 1164 - } 1165 1510 1166 1511 /* 1167 1512 * free a page as defined by the above mapping. 1168 1513 * Must not be called with IRQs disabled. 1169 1514 */ 1170 - static void __arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, 1171 - dma_addr_t handle, unsigned long attrs, int coherent_flag) 1515 + static void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, 1516 + dma_addr_t handle, unsigned long attrs) 1172 1517 { 1518 + int coherent_flag = dev->dma_coherent ? COHERENT : NORMAL; 1173 1519 struct page **pages; 1174 1520 size = PAGE_ALIGN(size); 1175 1521 ··· 1180 1548 __iommu_free_buffer(dev, pages, size, attrs); 1181 1549 } 1182 1550 1183 - static void arm_iommu_free_attrs(struct device *dev, size_t size, 1184 - void *cpu_addr, dma_addr_t handle, 1185 - unsigned long attrs) 1186 - { 1187 - __arm_iommu_free_attrs(dev, size, cpu_addr, handle, attrs, NORMAL); 1188 - } 1189 - 1190 - static void arm_coherent_iommu_free_attrs(struct device *dev, size_t size, 1191 - void *cpu_addr, dma_addr_t handle, unsigned long attrs) 1192 - { 1193 - __arm_iommu_free_attrs(dev, size, cpu_addr, handle, attrs, COHERENT); 1194 - } 1195 - 1196 1551 static int arm_iommu_get_sgtable(struct device *dev, struct sg_table *sgt, 1197 1552 void *cpu_addr, dma_addr_t dma_addr, 1198 1553 size_t size, unsigned long attrs) ··· 1199 1580 */ 1200 1581 static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, 1201 1582 size_t size, dma_addr_t *handle, 1202 - enum dma_data_direction dir, unsigned long attrs, 1203 - bool is_coherent) 1583 + enum dma_data_direction dir, unsigned long attrs) 1204 1584 { 1205 1585 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 1206 1586 dma_addr_t iova, iova_base; ··· 1219 1601 phys_addr_t phys = page_to_phys(sg_page(s)); 1220 1602 unsigned int len = PAGE_ALIGN(s->offset + s->length); 1221 1603 1222 - if (!is_coherent && (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 1604 + if (!dev->dma_coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) 1223 1605 __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); 1224 1606 1225 1607 prot = __dma_info_to_prot(dir, attrs); ··· 1239 1621 return ret; 1240 1622 } 1241 1623 1242 - static int __iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, 1243 - enum dma_data_direction dir, unsigned long attrs, 1244 - bool is_coherent) 1624 + /** 1625 + * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA 1626 + * @dev: valid struct device pointer 1627 + * @sg: list of buffers 1628 + * @nents: number of buffers to map 1629 + * @dir: DMA transfer direction 1630 + * 1631 + * Map a set of buffers described by scatterlist in streaming mode for DMA. 1632 + * The scatter gather list elements are merged together (if possible) and 1633 + * tagged with the appropriate dma address and length. They are obtained via 1634 + * sg_dma_{address,length}. 1635 + */ 1636 + static int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, 1637 + int nents, enum dma_data_direction dir, unsigned long attrs) 1245 1638 { 1246 1639 struct scatterlist *s = sg, *dma = sg, *start = sg; 1247 1640 int i, count = 0, ret; ··· 1267 1638 1268 1639 if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) { 1269 1640 ret = __map_sg_chunk(dev, start, size, 1270 - &dma->dma_address, dir, attrs, 1271 - is_coherent); 1641 + &dma->dma_address, dir, attrs); 1272 1642 if (ret < 0) 1273 1643 goto bad_mapping; 1274 1644 ··· 1281 1653 } 1282 1654 size += s->length; 1283 1655 } 1284 - ret = __map_sg_chunk(dev, start, size, &dma->dma_address, dir, attrs, 1285 - is_coherent); 1656 + ret = __map_sg_chunk(dev, start, size, &dma->dma_address, dir, attrs); 1286 1657 if (ret < 0) 1287 1658 goto bad_mapping; 1288 1659 ··· 1299 1672 } 1300 1673 1301 1674 /** 1302 - * arm_coherent_iommu_map_sg - map a set of SG buffers for streaming mode DMA 1303 - * @dev: valid struct device pointer 1304 - * @sg: list of buffers 1305 - * @nents: number of buffers to map 1306 - * @dir: DMA transfer direction 1307 - * 1308 - * Map a set of i/o coherent buffers described by scatterlist in streaming 1309 - * mode for DMA. The scatter gather list elements are merged together (if 1310 - * possible) and tagged with the appropriate dma address and length. They are 1311 - * obtained via sg_dma_{address,length}. 1312 - */ 1313 - static int arm_coherent_iommu_map_sg(struct device *dev, struct scatterlist *sg, 1314 - int nents, enum dma_data_direction dir, unsigned long attrs) 1315 - { 1316 - return __iommu_map_sg(dev, sg, nents, dir, attrs, true); 1317 - } 1318 - 1319 - /** 1320 - * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA 1321 - * @dev: valid struct device pointer 1322 - * @sg: list of buffers 1323 - * @nents: number of buffers to map 1324 - * @dir: DMA transfer direction 1325 - * 1326 - * Map a set of buffers described by scatterlist in streaming mode for DMA. 1327 - * The scatter gather list elements are merged together (if possible) and 1328 - * tagged with the appropriate dma address and length. They are obtained via 1329 - * sg_dma_{address,length}. 1330 - */ 1331 - static int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, 1332 - int nents, enum dma_data_direction dir, unsigned long attrs) 1333 - { 1334 - return __iommu_map_sg(dev, sg, nents, dir, attrs, false); 1335 - } 1336 - 1337 - static void __iommu_unmap_sg(struct device *dev, struct scatterlist *sg, 1338 - int nents, enum dma_data_direction dir, 1339 - unsigned long attrs, bool is_coherent) 1340 - { 1341 - struct scatterlist *s; 1342 - int i; 1343 - 1344 - for_each_sg(sg, s, nents, i) { 1345 - if (sg_dma_len(s)) 1346 - __iommu_remove_mapping(dev, sg_dma_address(s), 1347 - sg_dma_len(s)); 1348 - if (!is_coherent && (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 1349 - __dma_page_dev_to_cpu(sg_page(s), s->offset, 1350 - s->length, dir); 1351 - } 1352 - } 1353 - 1354 - /** 1355 - * arm_coherent_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg 1356 - * @dev: valid struct device pointer 1357 - * @sg: list of buffers 1358 - * @nents: number of buffers to unmap (same as was passed to dma_map_sg) 1359 - * @dir: DMA transfer direction (same as was passed to dma_map_sg) 1360 - * 1361 - * Unmap a set of streaming mode DMA translations. Again, CPU access 1362 - * rules concerning calls here are the same as for dma_unmap_single(). 1363 - */ 1364 - static void arm_coherent_iommu_unmap_sg(struct device *dev, 1365 - struct scatterlist *sg, int nents, enum dma_data_direction dir, 1366 - unsigned long attrs) 1367 - { 1368 - __iommu_unmap_sg(dev, sg, nents, dir, attrs, true); 1369 - } 1370 - 1371 - /** 1372 1675 * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg 1373 1676 * @dev: valid struct device pointer 1374 1677 * @sg: list of buffers ··· 1313 1756 enum dma_data_direction dir, 1314 1757 unsigned long attrs) 1315 1758 { 1316 - __iommu_unmap_sg(dev, sg, nents, dir, attrs, false); 1759 + struct scatterlist *s; 1760 + int i; 1761 + 1762 + for_each_sg(sg, s, nents, i) { 1763 + if (sg_dma_len(s)) 1764 + __iommu_remove_mapping(dev, sg_dma_address(s), 1765 + sg_dma_len(s)); 1766 + if (!dev->dma_coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) 1767 + __dma_page_dev_to_cpu(sg_page(s), s->offset, 1768 + s->length, dir); 1769 + } 1317 1770 } 1318 1771 1319 1772 /** ··· 1339 1772 { 1340 1773 struct scatterlist *s; 1341 1774 int i; 1775 + 1776 + if (dev->dma_coherent) 1777 + return; 1342 1778 1343 1779 for_each_sg(sg, s, nents, i) 1344 1780 __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir); ··· 1362 1792 struct scatterlist *s; 1363 1793 int i; 1364 1794 1795 + if (dev->dma_coherent) 1796 + return; 1797 + 1365 1798 for_each_sg(sg, s, nents, i) 1366 1799 __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); 1367 1800 } 1368 1801 1369 - 1370 1802 /** 1371 - * arm_coherent_iommu_map_page 1803 + * arm_iommu_map_page 1372 1804 * @dev: valid struct device pointer 1373 1805 * @page: page that buffer resides in 1374 1806 * @offset: offset into page for start of buffer 1375 1807 * @size: size of buffer to map 1376 1808 * @dir: DMA transfer direction 1377 1809 * 1378 - * Coherent IOMMU aware version of arm_dma_map_page() 1810 + * IOMMU aware version of arm_dma_map_page() 1379 1811 */ 1380 - static dma_addr_t arm_coherent_iommu_map_page(struct device *dev, struct page *page, 1812 + static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page, 1381 1813 unsigned long offset, size_t size, enum dma_data_direction dir, 1382 1814 unsigned long attrs) 1383 1815 { 1384 1816 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 1385 1817 dma_addr_t dma_addr; 1386 1818 int ret, prot, len = PAGE_ALIGN(size + offset); 1819 + 1820 + if (!dev->dma_coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) 1821 + __dma_page_cpu_to_dev(page, offset, size, dir); 1387 1822 1388 1823 dma_addr = __alloc_iova(mapping, len); 1389 1824 if (dma_addr == DMA_MAPPING_ERROR) ··· 1407 1832 } 1408 1833 1409 1834 /** 1410 - * arm_iommu_map_page 1411 - * @dev: valid struct device pointer 1412 - * @page: page that buffer resides in 1413 - * @offset: offset into page for start of buffer 1414 - * @size: size of buffer to map 1415 - * @dir: DMA transfer direction 1416 - * 1417 - * IOMMU aware version of arm_dma_map_page() 1418 - */ 1419 - static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page, 1420 - unsigned long offset, size_t size, enum dma_data_direction dir, 1421 - unsigned long attrs) 1422 - { 1423 - if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 1424 - __dma_page_cpu_to_dev(page, offset, size, dir); 1425 - 1426 - return arm_coherent_iommu_map_page(dev, page, offset, size, dir, attrs); 1427 - } 1428 - 1429 - /** 1430 - * arm_coherent_iommu_unmap_page 1431 - * @dev: valid struct device pointer 1432 - * @handle: DMA address of buffer 1433 - * @size: size of buffer (same as passed to dma_map_page) 1434 - * @dir: DMA transfer direction (same as passed to dma_map_page) 1435 - * 1436 - * Coherent IOMMU aware version of arm_dma_unmap_page() 1437 - */ 1438 - static void arm_coherent_iommu_unmap_page(struct device *dev, dma_addr_t handle, 1439 - size_t size, enum dma_data_direction dir, unsigned long attrs) 1440 - { 1441 - struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 1442 - dma_addr_t iova = handle & PAGE_MASK; 1443 - int offset = handle & ~PAGE_MASK; 1444 - int len = PAGE_ALIGN(size + offset); 1445 - 1446 - if (!iova) 1447 - return; 1448 - 1449 - iommu_unmap(mapping->domain, iova, len); 1450 - __free_iova(mapping, iova, len); 1451 - } 1452 - 1453 - /** 1454 1835 * arm_iommu_unmap_page 1455 1836 * @dev: valid struct device pointer 1456 1837 * @handle: DMA address of buffer ··· 1420 1889 { 1421 1890 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 1422 1891 dma_addr_t iova = handle & PAGE_MASK; 1423 - struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); 1892 + struct page *page; 1424 1893 int offset = handle & ~PAGE_MASK; 1425 1894 int len = PAGE_ALIGN(size + offset); 1426 1895 1427 1896 if (!iova) 1428 1897 return; 1429 1898 1430 - if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 1899 + if (!dev->dma_coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) { 1900 + page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); 1431 1901 __dma_page_dev_to_cpu(page, offset, size, dir); 1902 + } 1432 1903 1433 1904 iommu_unmap(mapping->domain, iova, len); 1434 1905 __free_iova(mapping, iova, len); ··· 1498 1965 { 1499 1966 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 1500 1967 dma_addr_t iova = handle & PAGE_MASK; 1501 - struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); 1968 + struct page *page; 1502 1969 unsigned int offset = handle & ~PAGE_MASK; 1503 1970 1504 - if (!iova) 1971 + if (dev->dma_coherent || !iova) 1505 1972 return; 1506 1973 1974 + page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); 1507 1975 __dma_page_dev_to_cpu(page, offset, size, dir); 1508 1976 } 1509 1977 ··· 1513 1979 { 1514 1980 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 1515 1981 dma_addr_t iova = handle & PAGE_MASK; 1516 - struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); 1982 + struct page *page; 1517 1983 unsigned int offset = handle & ~PAGE_MASK; 1518 1984 1519 - if (!iova) 1985 + if (dev->dma_coherent || !iova) 1520 1986 return; 1521 1987 1988 + page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); 1522 1989 __dma_page_cpu_to_dev(page, offset, size, dir); 1523 1990 } 1524 1991 ··· 1541 2006 1542 2007 .map_resource = arm_iommu_map_resource, 1543 2008 .unmap_resource = arm_iommu_unmap_resource, 1544 - 1545 - .dma_supported = arm_dma_supported, 1546 - }; 1547 - 1548 - static const struct dma_map_ops iommu_coherent_ops = { 1549 - .alloc = arm_coherent_iommu_alloc_attrs, 1550 - .free = arm_coherent_iommu_free_attrs, 1551 - .mmap = arm_coherent_iommu_mmap_attrs, 1552 - .get_sgtable = arm_iommu_get_sgtable, 1553 - 1554 - .map_page = arm_coherent_iommu_map_page, 1555 - .unmap_page = arm_coherent_iommu_unmap_page, 1556 - 1557 - .map_sg = arm_coherent_iommu_map_sg, 1558 - .unmap_sg = arm_coherent_iommu_unmap_sg, 1559 - 1560 - .map_resource = arm_iommu_map_resource, 1561 - .unmap_resource = arm_iommu_unmap_resource, 1562 - 1563 - .dma_supported = arm_dma_supported, 1564 2009 }; 1565 2010 1566 2011 /** ··· 1716 2201 iommu_detach_device(mapping->domain, dev); 1717 2202 kref_put(&mapping->kref, release_iommu_mapping); 1718 2203 to_dma_iommu_mapping(dev) = NULL; 1719 - set_dma_ops(dev, arm_get_dma_map_ops(dev->archdata.dma_coherent)); 2204 + set_dma_ops(dev, NULL); 1720 2205 1721 2206 pr_debug("Detached IOMMU controller from %s device.\n", dev_name(dev)); 1722 2207 } 1723 2208 EXPORT_SYMBOL_GPL(arm_iommu_detach_device); 1724 2209 1725 - static const struct dma_map_ops *arm_get_iommu_dma_map_ops(bool coherent) 1726 - { 1727 - return coherent ? &iommu_coherent_ops : &iommu_ops; 1728 - } 1729 - 1730 - static bool arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size, 1731 - const struct iommu_ops *iommu) 2210 + static void arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size, 2211 + const struct iommu_ops *iommu, bool coherent) 1732 2212 { 1733 2213 struct dma_iommu_mapping *mapping; 1734 - 1735 - if (!iommu) 1736 - return false; 1737 2214 1738 2215 mapping = arm_iommu_create_mapping(dev->bus, dma_base, size); 1739 2216 if (IS_ERR(mapping)) { 1740 2217 pr_warn("Failed to create %llu-byte IOMMU mapping for device %s\n", 1741 2218 size, dev_name(dev)); 1742 - return false; 2219 + return; 1743 2220 } 1744 2221 1745 2222 if (__arm_iommu_attach_device(dev, mapping)) { 1746 2223 pr_warn("Failed to attached device %s to IOMMU_mapping\n", 1747 2224 dev_name(dev)); 1748 2225 arm_iommu_release_mapping(mapping); 1749 - return false; 2226 + return; 1750 2227 } 1751 2228 1752 - return true; 2229 + set_dma_ops(dev, &iommu_ops); 1753 2230 } 1754 2231 1755 2232 static void arm_teardown_iommu_dma_ops(struct device *dev) ··· 1757 2250 1758 2251 #else 1759 2252 1760 - static bool arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size, 1761 - const struct iommu_ops *iommu) 2253 + static void arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size, 2254 + const struct iommu_ops *iommu, bool coherent) 1762 2255 { 1763 - return false; 1764 2256 } 1765 2257 1766 2258 static void arm_teardown_iommu_dma_ops(struct device *dev) { } 1767 - 1768 - #define arm_get_iommu_dma_map_ops arm_get_dma_map_ops 1769 2259 1770 2260 #endif /* CONFIG_ARM_DMA_USE_IOMMU */ 1771 2261 1772 2262 void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, 1773 2263 const struct iommu_ops *iommu, bool coherent) 1774 2264 { 1775 - const struct dma_map_ops *dma_ops; 1776 - 1777 2265 dev->archdata.dma_coherent = coherent; 1778 - #ifdef CONFIG_SWIOTLB 1779 2266 dev->dma_coherent = coherent; 1780 - #endif 1781 2267 1782 2268 /* 1783 2269 * Don't override the dma_ops if they have already been set. Ideally ··· 1780 2280 if (dev->dma_ops) 1781 2281 return; 1782 2282 1783 - if (arm_setup_iommu_dma_ops(dev, dma_base, size, iommu)) 1784 - dma_ops = arm_get_iommu_dma_map_ops(coherent); 1785 - else 1786 - dma_ops = arm_get_dma_map_ops(coherent); 1787 - 1788 - set_dma_ops(dev, dma_ops); 2283 + if (iommu) 2284 + arm_setup_iommu_dma_ops(dev, dma_base, size, iommu, coherent); 1789 2285 1790 2286 xen_setup_dma_ops(dev); 1791 2287 dev->archdata.dma_ops_setup = true; ··· 1797 2301 set_dma_ops(dev, NULL); 1798 2302 } 1799 2303 1800 - #ifdef CONFIG_SWIOTLB 1801 2304 void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, 1802 2305 enum dma_data_direction dir) 1803 2306 { ··· 1824 2329 { 1825 2330 __arm_dma_free(dev, size, cpu_addr, dma_handle, attrs, false); 1826 2331 } 1827 - #endif /* CONFIG_SWIOTLB */
+1
drivers/ata/libata-scsi.c
··· 1060 1060 dev->flags |= ATA_DFLAG_NO_UNLOAD; 1061 1061 1062 1062 /* configure max sectors */ 1063 + dev->max_sectors = min(dev->max_sectors, sdev->host->max_sectors); 1063 1064 blk_queue_max_hw_sectors(q, dev->max_sectors); 1064 1065 1065 1066 if (dev->class == ATA_DEV_ATAPI) {
+9 -36
drivers/infiniband/core/rw.c
··· 274 274 return 1; 275 275 } 276 276 277 - static void rdma_rw_unmap_sg(struct ib_device *dev, struct scatterlist *sg, 278 - u32 sg_cnt, enum dma_data_direction dir) 279 - { 280 - if (is_pci_p2pdma_page(sg_page(sg))) 281 - pci_p2pdma_unmap_sg(dev->dma_device, sg, sg_cnt, dir); 282 - else 283 - ib_dma_unmap_sg(dev, sg, sg_cnt, dir); 284 - } 285 - 286 - static int rdma_rw_map_sgtable(struct ib_device *dev, struct sg_table *sgt, 287 - enum dma_data_direction dir) 288 - { 289 - int nents; 290 - 291 - if (is_pci_p2pdma_page(sg_page(sgt->sgl))) { 292 - if (WARN_ON_ONCE(ib_uses_virt_dma(dev))) 293 - return 0; 294 - nents = pci_p2pdma_map_sg(dev->dma_device, sgt->sgl, 295 - sgt->orig_nents, dir); 296 - if (!nents) 297 - return -EIO; 298 - sgt->nents = nents; 299 - return 0; 300 - } 301 - return ib_dma_map_sgtable_attrs(dev, sgt, dir, 0); 302 - } 303 - 304 277 /** 305 278 * rdma_rw_ctx_init - initialize a RDMA READ/WRITE context 306 279 * @ctx: context to initialize ··· 300 327 }; 301 328 int ret; 302 329 303 - ret = rdma_rw_map_sgtable(dev, &sgt, dir); 330 + ret = ib_dma_map_sgtable_attrs(dev, &sgt, dir, 0); 304 331 if (ret) 305 332 return ret; 306 333 sg_cnt = sgt.nents; ··· 339 366 return ret; 340 367 341 368 out_unmap_sg: 342 - rdma_rw_unmap_sg(dev, sgt.sgl, sgt.orig_nents, dir); 369 + ib_dma_unmap_sgtable_attrs(dev, &sgt, dir, 0); 343 370 return ret; 344 371 } 345 372 EXPORT_SYMBOL(rdma_rw_ctx_init); ··· 387 414 return -EINVAL; 388 415 } 389 416 390 - ret = rdma_rw_map_sgtable(dev, &sgt, dir); 417 + ret = ib_dma_map_sgtable_attrs(dev, &sgt, dir, 0); 391 418 if (ret) 392 419 return ret; 393 420 394 421 if (prot_sg_cnt) { 395 - ret = rdma_rw_map_sgtable(dev, &prot_sgt, dir); 422 + ret = ib_dma_map_sgtable_attrs(dev, &prot_sgt, dir, 0); 396 423 if (ret) 397 424 goto out_unmap_sg; 398 425 } ··· 459 486 kfree(ctx->reg); 460 487 out_unmap_prot_sg: 461 488 if (prot_sgt.nents) 462 - rdma_rw_unmap_sg(dev, prot_sgt.sgl, prot_sgt.orig_nents, dir); 489 + ib_dma_unmap_sgtable_attrs(dev, &prot_sgt, dir, 0); 463 490 out_unmap_sg: 464 - rdma_rw_unmap_sg(dev, sgt.sgl, sgt.orig_nents, dir); 491 + ib_dma_unmap_sgtable_attrs(dev, &sgt, dir, 0); 465 492 return ret; 466 493 } 467 494 EXPORT_SYMBOL(rdma_rw_ctx_signature_init); ··· 594 621 break; 595 622 } 596 623 597 - rdma_rw_unmap_sg(qp->pd->device, sg, sg_cnt, dir); 624 + ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir); 598 625 } 599 626 EXPORT_SYMBOL(rdma_rw_ctx_destroy); 600 627 ··· 622 649 kfree(ctx->reg); 623 650 624 651 if (prot_sg_cnt) 625 - rdma_rw_unmap_sg(qp->pd->device, prot_sg, prot_sg_cnt, dir); 626 - rdma_rw_unmap_sg(qp->pd->device, sg, sg_cnt, dir); 652 + ib_dma_unmap_sg(qp->pd->device, prot_sg, prot_sg_cnt, dir); 653 + ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir); 627 654 } 628 655 EXPORT_SYMBOL(rdma_rw_ctx_destroy_signature); 629 656
+91 -14
drivers/iommu/dma-iommu.c
··· 21 21 #include <linux/iova.h> 22 22 #include <linux/irq.h> 23 23 #include <linux/list_sort.h> 24 + #include <linux/memremap.h> 24 25 #include <linux/mm.h> 25 26 #include <linux/mutex.h> 26 27 #include <linux/pci.h> ··· 1063 1062 1064 1063 for_each_sg(sg, s, nents, i) { 1065 1064 /* Restore this segment's original unaligned fields first */ 1065 + dma_addr_t s_dma_addr = sg_dma_address(s); 1066 1066 unsigned int s_iova_off = sg_dma_address(s); 1067 1067 unsigned int s_length = sg_dma_len(s); 1068 1068 unsigned int s_iova_len = s->length; 1069 1069 1070 - s->offset += s_iova_off; 1071 - s->length = s_length; 1072 1070 sg_dma_address(s) = DMA_MAPPING_ERROR; 1073 1071 sg_dma_len(s) = 0; 1072 + 1073 + if (sg_is_dma_bus_address(s)) { 1074 + if (i > 0) 1075 + cur = sg_next(cur); 1076 + 1077 + sg_dma_unmark_bus_address(s); 1078 + sg_dma_address(cur) = s_dma_addr; 1079 + sg_dma_len(cur) = s_length; 1080 + sg_dma_mark_bus_address(cur); 1081 + count++; 1082 + cur_len = 0; 1083 + continue; 1084 + } 1085 + 1086 + s->offset += s_iova_off; 1087 + s->length = s_length; 1074 1088 1075 1089 /* 1076 1090 * Now fill in the real DMA data. If... ··· 1127 1111 int i; 1128 1112 1129 1113 for_each_sg(sg, s, nents, i) { 1130 - if (sg_dma_address(s) != DMA_MAPPING_ERROR) 1131 - s->offset += sg_dma_address(s); 1132 - if (sg_dma_len(s)) 1133 - s->length = sg_dma_len(s); 1114 + if (sg_is_dma_bus_address(s)) { 1115 + sg_dma_unmark_bus_address(s); 1116 + } else { 1117 + if (sg_dma_address(s) != DMA_MAPPING_ERROR) 1118 + s->offset += sg_dma_address(s); 1119 + if (sg_dma_len(s)) 1120 + s->length = sg_dma_len(s); 1121 + } 1134 1122 sg_dma_address(s) = DMA_MAPPING_ERROR; 1135 1123 sg_dma_len(s) = 0; 1136 1124 } ··· 1187 1167 struct iova_domain *iovad = &cookie->iovad; 1188 1168 struct scatterlist *s, *prev = NULL; 1189 1169 int prot = dma_info_to_prot(dir, dev_is_dma_coherent(dev), attrs); 1170 + struct pci_p2pdma_map_state p2pdma_state = {}; 1171 + enum pci_p2pdma_map_type map; 1190 1172 dma_addr_t iova; 1191 1173 size_t iova_len = 0; 1192 1174 unsigned long mask = dma_get_seg_boundary(dev); ··· 1218 1196 size_t s_length = s->length; 1219 1197 size_t pad_len = (mask - iova_len + 1) & mask; 1220 1198 1199 + if (is_pci_p2pdma_page(sg_page(s))) { 1200 + map = pci_p2pdma_map_segment(&p2pdma_state, dev, s); 1201 + switch (map) { 1202 + case PCI_P2PDMA_MAP_BUS_ADDR: 1203 + /* 1204 + * iommu_map_sg() will skip this segment as 1205 + * it is marked as a bus address, 1206 + * __finalise_sg() will copy the dma address 1207 + * into the output segment. 1208 + */ 1209 + continue; 1210 + case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: 1211 + /* 1212 + * Mapping through host bridge should be 1213 + * mapped with regular IOVAs, thus we 1214 + * do nothing here and continue below. 1215 + */ 1216 + break; 1217 + default: 1218 + ret = -EREMOTEIO; 1219 + goto out_restore_sg; 1220 + } 1221 + } 1222 + 1221 1223 sg_dma_address(s) = s_iova_off; 1222 1224 sg_dma_len(s) = s_length; 1223 1225 s->offset -= s_iova_off; ··· 1270 1224 prev = s; 1271 1225 } 1272 1226 1227 + if (!iova_len) 1228 + return __finalise_sg(dev, sg, nents, 0); 1229 + 1273 1230 iova = iommu_dma_alloc_iova(domain, iova_len, dma_get_mask(dev), dev); 1274 1231 if (!iova) { 1275 1232 ret = -ENOMEM; ··· 1294 1245 out_restore_sg: 1295 1246 __invalidate_sg(sg, nents); 1296 1247 out: 1297 - if (ret != -ENOMEM) 1248 + if (ret != -ENOMEM && ret != -EREMOTEIO) 1298 1249 return -EINVAL; 1299 1250 return ret; 1300 1251 } ··· 1302 1253 static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, 1303 1254 int nents, enum dma_data_direction dir, unsigned long attrs) 1304 1255 { 1305 - dma_addr_t start, end; 1256 + dma_addr_t end = 0, start; 1306 1257 struct scatterlist *tmp; 1307 1258 int i; 1308 1259 ··· 1316 1267 1317 1268 /* 1318 1269 * The scatterlist segments are mapped into a single 1319 - * contiguous IOVA allocation, so this is incredibly easy. 1270 + * contiguous IOVA allocation, the start and end points 1271 + * just have to be determined. 1320 1272 */ 1321 - start = sg_dma_address(sg); 1322 - for_each_sg(sg_next(sg), tmp, nents - 1, i) { 1273 + for_each_sg(sg, tmp, nents, i) { 1274 + if (sg_is_dma_bus_address(tmp)) { 1275 + sg_dma_unmark_bus_address(tmp); 1276 + continue; 1277 + } 1278 + 1323 1279 if (sg_dma_len(tmp) == 0) 1324 1280 break; 1325 - sg = tmp; 1281 + 1282 + start = sg_dma_address(tmp); 1283 + break; 1326 1284 } 1327 - end = sg_dma_address(sg) + sg_dma_len(sg); 1328 - __iommu_dma_unmap(dev, start, end - start); 1285 + 1286 + nents -= i; 1287 + for_each_sg(tmp, tmp, nents, i) { 1288 + if (sg_is_dma_bus_address(tmp)) { 1289 + sg_dma_unmark_bus_address(tmp); 1290 + continue; 1291 + } 1292 + 1293 + if (sg_dma_len(tmp) == 0) 1294 + break; 1295 + 1296 + end = sg_dma_address(tmp) + sg_dma_len(tmp); 1297 + } 1298 + 1299 + if (end) 1300 + __iommu_dma_unmap(dev, start, end - start); 1329 1301 } 1330 1302 1331 1303 static dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, ··· 1538 1468 return (1UL << __ffs(domain->pgsize_bitmap)) - 1; 1539 1469 } 1540 1470 1471 + static size_t iommu_dma_opt_mapping_size(void) 1472 + { 1473 + return iova_rcache_range(); 1474 + } 1475 + 1541 1476 static const struct dma_map_ops iommu_dma_ops = { 1477 + .flags = DMA_F_PCI_P2PDMA_SUPPORTED, 1542 1478 .alloc = iommu_dma_alloc, 1543 1479 .free = iommu_dma_free, 1544 1480 .alloc_pages = dma_common_alloc_pages, ··· 1564 1488 .map_resource = iommu_dma_map_resource, 1565 1489 .unmap_resource = iommu_dma_unmap_resource, 1566 1490 .get_merge_boundary = iommu_dma_get_merge_boundary, 1491 + .opt_mapping_size = iommu_dma_opt_mapping_size, 1567 1492 }; 1568 1493 1569 1494 /*
+4
drivers/iommu/iommu.c
··· 2460 2460 len = 0; 2461 2461 } 2462 2462 2463 + if (sg_is_dma_bus_address(sg)) 2464 + goto next; 2465 + 2463 2466 if (len) { 2464 2467 len += sg->length; 2465 2468 } else { ··· 2470 2467 start = s_phys; 2471 2468 } 2472 2469 2470 + next: 2473 2471 if (++i < nents) 2474 2472 sg = sg_next(sg); 2475 2473 }
+5
drivers/iommu/iova.c
··· 26 26 static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad); 27 27 static void free_iova_rcaches(struct iova_domain *iovad); 28 28 29 + unsigned long iova_rcache_range(void) 30 + { 31 + return PAGE_SIZE << (IOVA_RANGE_CACHE_MAX_SIZE - 1); 32 + } 33 + 29 34 static int iova_cpuhp_dead(unsigned int cpu, struct hlist_node *node) 30 35 { 31 36 struct iova_domain *iovad;
+2 -1
drivers/nvme/host/core.c
··· 4198 4198 blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, ns->queue); 4199 4199 4200 4200 blk_queue_flag_set(QUEUE_FLAG_NONROT, ns->queue); 4201 - if (ctrl->ops->flags & NVME_F_PCI_P2PDMA) 4201 + if (ctrl->ops->supports_pci_p2pdma && 4202 + ctrl->ops->supports_pci_p2pdma(ctrl)) 4202 4203 blk_queue_flag_set(QUEUE_FLAG_PCI_P2PDMA, ns->queue); 4203 4204 4204 4205 ns->ctrl = ctrl;
+1 -1
drivers/nvme/host/nvme.h
··· 504 504 unsigned int flags; 505 505 #define NVME_F_FABRICS (1 << 0) 506 506 #define NVME_F_METADATA_SUPPORTED (1 << 1) 507 - #define NVME_F_PCI_P2PDMA (1 << 2) 508 507 int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val); 509 508 int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val); 510 509 int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val); ··· 513 514 void (*stop_ctrl)(struct nvme_ctrl *ctrl); 514 515 int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size); 515 516 void (*print_device_info)(struct nvme_ctrl *ctrl); 517 + bool (*supports_pci_p2pdma)(struct nvme_ctrl *ctrl); 516 518 }; 517 519 518 520 /*
+38 -43
drivers/nvme/host/pci.c
··· 230 230 bool use_sgl; 231 231 int aborted; 232 232 int npages; /* In the PRP list. 0 means small pool in use */ 233 - int nents; /* Used in scatterlist */ 234 233 dma_addr_t first_dma; 235 234 unsigned int dma_len; /* length of single DMA segment mapping */ 236 235 dma_addr_t meta_dma; 237 - struct scatterlist *sg; 236 + struct sg_table sgt; 238 237 }; 239 238 240 239 static inline unsigned int nvme_dbbuf_size(struct nvme_dev *dev) ··· 523 524 static void **nvme_pci_iod_list(struct request *req) 524 525 { 525 526 struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 526 - return (void **)(iod->sg + blk_rq_nr_phys_segments(req)); 527 + return (void **)(iod->sgt.sgl + blk_rq_nr_phys_segments(req)); 527 528 } 528 529 529 530 static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req) ··· 575 576 } 576 577 } 577 578 578 - static void nvme_unmap_sg(struct nvme_dev *dev, struct request *req) 579 - { 580 - struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 581 - 582 - if (is_pci_p2pdma_page(sg_page(iod->sg))) 583 - pci_p2pdma_unmap_sg(dev->dev, iod->sg, iod->nents, 584 - rq_dma_dir(req)); 585 - else 586 - dma_unmap_sg(dev->dev, iod->sg, iod->nents, rq_dma_dir(req)); 587 - } 588 - 589 579 static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) 590 580 { 591 581 struct nvme_iod *iod = blk_mq_rq_to_pdu(req); ··· 585 597 return; 586 598 } 587 599 588 - WARN_ON_ONCE(!iod->nents); 600 + WARN_ON_ONCE(!iod->sgt.nents); 589 601 590 - nvme_unmap_sg(dev, req); 602 + dma_unmap_sgtable(dev->dev, &iod->sgt, rq_dma_dir(req), 0); 603 + 591 604 if (iod->npages == 0) 592 605 dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0], 593 606 iod->first_dma); ··· 596 607 nvme_free_sgls(dev, req); 597 608 else 598 609 nvme_free_prps(dev, req); 599 - mempool_free(iod->sg, dev->iod_mempool); 610 + mempool_free(iod->sgt.sgl, dev->iod_mempool); 600 611 } 601 612 602 613 static void nvme_print_sgl(struct scatterlist *sgl, int nents) ··· 619 630 struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 620 631 struct dma_pool *pool; 621 632 int length = blk_rq_payload_bytes(req); 622 - struct scatterlist *sg = iod->sg; 633 + struct scatterlist *sg = iod->sgt.sgl; 623 634 int dma_len = sg_dma_len(sg); 624 635 u64 dma_addr = sg_dma_address(sg); 625 636 int offset = dma_addr & (NVME_CTRL_PAGE_SIZE - 1); ··· 691 702 dma_len = sg_dma_len(sg); 692 703 } 693 704 done: 694 - cmnd->dptr.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); 705 + cmnd->dptr.prp1 = cpu_to_le64(sg_dma_address(iod->sgt.sgl)); 695 706 cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma); 696 707 return BLK_STS_OK; 697 708 free_prps: 698 709 nvme_free_prps(dev, req); 699 710 return BLK_STS_RESOURCE; 700 711 bad_sgl: 701 - WARN(DO_ONCE(nvme_print_sgl, iod->sg, iod->nents), 712 + WARN(DO_ONCE(nvme_print_sgl, iod->sgt.sgl, iod->sgt.nents), 702 713 "Invalid SGL for payload:%d nents:%d\n", 703 - blk_rq_payload_bytes(req), iod->nents); 714 + blk_rq_payload_bytes(req), iod->sgt.nents); 704 715 return BLK_STS_IOERR; 705 716 } 706 717 ··· 726 737 } 727 738 728 739 static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev, 729 - struct request *req, struct nvme_rw_command *cmd, int entries) 740 + struct request *req, struct nvme_rw_command *cmd) 730 741 { 731 742 struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 732 743 struct dma_pool *pool; 733 744 struct nvme_sgl_desc *sg_list; 734 - struct scatterlist *sg = iod->sg; 745 + struct scatterlist *sg = iod->sgt.sgl; 746 + unsigned int entries = iod->sgt.nents; 735 747 dma_addr_t sgl_dma; 736 748 int i = 0; 737 749 ··· 830 840 { 831 841 struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 832 842 blk_status_t ret = BLK_STS_RESOURCE; 833 - int nr_mapped; 843 + int rc; 834 844 835 845 if (blk_rq_nr_phys_segments(req) == 1) { 836 846 struct bio_vec bv = req_bvec(req); ··· 848 858 } 849 859 850 860 iod->dma_len = 0; 851 - iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC); 852 - if (!iod->sg) 861 + iod->sgt.sgl = mempool_alloc(dev->iod_mempool, GFP_ATOMIC); 862 + if (!iod->sgt.sgl) 853 863 return BLK_STS_RESOURCE; 854 - sg_init_table(iod->sg, blk_rq_nr_phys_segments(req)); 855 - iod->nents = blk_rq_map_sg(req->q, req, iod->sg); 856 - if (!iod->nents) 864 + sg_init_table(iod->sgt.sgl, blk_rq_nr_phys_segments(req)); 865 + iod->sgt.orig_nents = blk_rq_map_sg(req->q, req, iod->sgt.sgl); 866 + if (!iod->sgt.orig_nents) 857 867 goto out_free_sg; 858 868 859 - if (is_pci_p2pdma_page(sg_page(iod->sg))) 860 - nr_mapped = pci_p2pdma_map_sg_attrs(dev->dev, iod->sg, 861 - iod->nents, rq_dma_dir(req), DMA_ATTR_NO_WARN); 862 - else 863 - nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, 864 - rq_dma_dir(req), DMA_ATTR_NO_WARN); 865 - if (!nr_mapped) 869 + rc = dma_map_sgtable(dev->dev, &iod->sgt, rq_dma_dir(req), 870 + DMA_ATTR_NO_WARN); 871 + if (rc) { 872 + if (rc == -EREMOTEIO) 873 + ret = BLK_STS_TARGET; 866 874 goto out_free_sg; 875 + } 867 876 868 877 iod->use_sgl = nvme_pci_use_sgls(dev, req); 869 878 if (iod->use_sgl) 870 - ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw, nr_mapped); 879 + ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw); 871 880 else 872 881 ret = nvme_pci_setup_prps(dev, req, &cmnd->rw); 873 882 if (ret != BLK_STS_OK) ··· 874 885 return BLK_STS_OK; 875 886 876 887 out_unmap_sg: 877 - nvme_unmap_sg(dev, req); 888 + dma_unmap_sgtable(dev->dev, &iod->sgt, rq_dma_dir(req), 0); 878 889 out_free_sg: 879 - mempool_free(iod->sg, dev->iod_mempool); 890 + mempool_free(iod->sgt.sgl, dev->iod_mempool); 880 891 return ret; 881 892 } 882 893 ··· 900 911 901 912 iod->aborted = 0; 902 913 iod->npages = -1; 903 - iod->nents = 0; 914 + iod->sgt.nents = 0; 904 915 905 916 ret = nvme_setup_cmd(req->q->queuedata, req); 906 917 if (ret) ··· 2981 2992 return snprintf(buf, size, "%s\n", dev_name(&pdev->dev)); 2982 2993 } 2983 2994 2984 - 2985 2995 static void nvme_pci_print_device_info(struct nvme_ctrl *ctrl) 2986 2996 { 2987 2997 struct pci_dev *pdev = to_pci_dev(to_nvme_dev(ctrl)->dev); ··· 2995 3007 subsys->firmware_rev); 2996 3008 } 2997 3009 3010 + static bool nvme_pci_supports_pci_p2pdma(struct nvme_ctrl *ctrl) 3011 + { 3012 + struct nvme_dev *dev = to_nvme_dev(ctrl); 3013 + 3014 + return dma_pci_p2pdma_supported(dev->dev); 3015 + } 3016 + 2998 3017 static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { 2999 3018 .name = "pcie", 3000 3019 .module = THIS_MODULE, 3001 - .flags = NVME_F_METADATA_SUPPORTED | 3002 - NVME_F_PCI_P2PDMA, 3020 + .flags = NVME_F_METADATA_SUPPORTED, 3003 3021 .reg_read32 = nvme_pci_reg_read32, 3004 3022 .reg_write32 = nvme_pci_reg_write32, 3005 3023 .reg_read64 = nvme_pci_reg_read64, ··· 3013 3019 .submit_async_event = nvme_pci_submit_async_event, 3014 3020 .get_address = nvme_pci_get_address, 3015 3021 .print_device_info = nvme_pci_print_device_info, 3022 + .supports_pci_p2pdma = nvme_pci_supports_pci_p2pdma, 3016 3023 }; 3017 3024 3018 3025 static int nvme_dev_map(struct nvme_dev *dev)
+1 -1
drivers/nvme/target/rdma.c
··· 415 415 if (ib_dma_mapping_error(ndev->device, r->send_sge.addr)) 416 416 goto out_free_rsp; 417 417 418 - if (!ib_uses_virt_dma(ndev->device)) 418 + if (ib_dma_pci_p2p_dma_supported(ndev->device)) 419 419 r->req.p2p_client = &ndev->device->dev; 420 420 r->send_sge.length = sizeof(*r->req.cqe); 421 421 r->send_sge.lkey = ndev->pd->local_dma_lkey;
+5
drivers/pci/Kconfig
··· 164 164 config PCI_P2PDMA 165 165 bool "PCI peer-to-peer transfer support" 166 166 depends on ZONE_DEVICE 167 + # 168 + # The need for the scatterlist DMA bus address flag means PCI P2PDMA 169 + # requires 64bit 170 + # 171 + depends on 64BIT 167 172 select GENERIC_ALLOCATOR 168 173 help 169 174 Enableѕ drivers to do PCI peer-to-peer transactions to and from
+36 -67
drivers/pci/p2pdma.c
··· 10 10 11 11 #define pr_fmt(fmt) "pci-p2pdma: " fmt 12 12 #include <linux/ctype.h> 13 + #include <linux/dma-map-ops.h> 13 14 #include <linux/pci-p2pdma.h> 14 15 #include <linux/module.h> 15 16 #include <linux/slab.h> ··· 20 19 #include <linux/random.h> 21 20 #include <linux/seq_buf.h> 22 21 #include <linux/xarray.h> 23 - 24 - enum pci_p2pdma_map_type { 25 - PCI_P2PDMA_MAP_UNKNOWN = 0, 26 - PCI_P2PDMA_MAP_NOT_SUPPORTED, 27 - PCI_P2PDMA_MAP_BUS_ADDR, 28 - PCI_P2PDMA_MAP_THRU_HOST_BRIDGE, 29 - }; 30 22 31 23 struct pci_p2pdma { 32 24 struct gen_pool *pool; ··· 848 854 struct pci_dev *provider = to_p2p_pgmap(pgmap)->provider; 849 855 struct pci_dev *client; 850 856 struct pci_p2pdma *p2pdma; 857 + int dist; 851 858 852 859 if (!provider->p2pdma) 853 860 return PCI_P2PDMA_MAP_NOT_SUPPORTED; ··· 865 870 type = xa_to_value(xa_load(&p2pdma->map_types, 866 871 map_types_idx(client))); 867 872 rcu_read_unlock(); 873 + 874 + if (type == PCI_P2PDMA_MAP_UNKNOWN) 875 + return calc_map_type_and_dist(provider, client, &dist, true); 876 + 868 877 return type; 869 878 } 870 879 871 - static int __pci_p2pdma_map_sg(struct pci_p2pdma_pagemap *p2p_pgmap, 872 - struct device *dev, struct scatterlist *sg, int nents) 880 + /** 881 + * pci_p2pdma_map_segment - map an sg segment determining the mapping type 882 + * @state: State structure that should be declared outside of the for_each_sg() 883 + * loop and initialized to zero. 884 + * @dev: DMA device that's doing the mapping operation 885 + * @sg: scatterlist segment to map 886 + * 887 + * This is a helper to be used by non-IOMMU dma_map_sg() implementations where 888 + * the sg segment is the same for the page_link and the dma_address. 889 + * 890 + * Attempt to map a single segment in an SGL with the PCI bus address. 891 + * The segment must point to a PCI P2PDMA page and thus must be 892 + * wrapped in a is_pci_p2pdma_page(sg_page(sg)) check. 893 + * 894 + * Returns the type of mapping used and maps the page if the type is 895 + * PCI_P2PDMA_MAP_BUS_ADDR. 896 + */ 897 + enum pci_p2pdma_map_type 898 + pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev, 899 + struct scatterlist *sg) 873 900 { 874 - struct scatterlist *s; 875 - int i; 876 - 877 - for_each_sg(sg, s, nents, i) { 878 - s->dma_address = sg_phys(s) + p2p_pgmap->bus_offset; 879 - sg_dma_len(s) = s->length; 901 + if (state->pgmap != sg_page(sg)->pgmap) { 902 + state->pgmap = sg_page(sg)->pgmap; 903 + state->map = pci_p2pdma_map_type(state->pgmap, dev); 904 + state->bus_off = to_p2p_pgmap(state->pgmap)->bus_offset; 880 905 } 881 906 882 - return nents; 883 - } 884 - 885 - /** 886 - * pci_p2pdma_map_sg_attrs - map a PCI peer-to-peer scatterlist for DMA 887 - * @dev: device doing the DMA request 888 - * @sg: scatter list to map 889 - * @nents: elements in the scatterlist 890 - * @dir: DMA direction 891 - * @attrs: DMA attributes passed to dma_map_sg() (if called) 892 - * 893 - * Scatterlists mapped with this function should be unmapped using 894 - * pci_p2pdma_unmap_sg_attrs(). 895 - * 896 - * Returns the number of SG entries mapped or 0 on error. 897 - */ 898 - int pci_p2pdma_map_sg_attrs(struct device *dev, struct scatterlist *sg, 899 - int nents, enum dma_data_direction dir, unsigned long attrs) 900 - { 901 - struct pci_p2pdma_pagemap *p2p_pgmap = 902 - to_p2p_pgmap(sg_page(sg)->pgmap); 903 - 904 - switch (pci_p2pdma_map_type(sg_page(sg)->pgmap, dev)) { 905 - case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: 906 - return dma_map_sg_attrs(dev, sg, nents, dir, attrs); 907 - case PCI_P2PDMA_MAP_BUS_ADDR: 908 - return __pci_p2pdma_map_sg(p2p_pgmap, dev, sg, nents); 909 - default: 910 - WARN_ON_ONCE(1); 911 - return 0; 907 + if (state->map == PCI_P2PDMA_MAP_BUS_ADDR) { 908 + sg->dma_address = sg_phys(sg) + state->bus_off; 909 + sg_dma_len(sg) = sg->length; 910 + sg_dma_mark_bus_address(sg); 912 911 } 912 + 913 + return state->map; 913 914 } 914 - EXPORT_SYMBOL_GPL(pci_p2pdma_map_sg_attrs); 915 - 916 - /** 917 - * pci_p2pdma_unmap_sg_attrs - unmap a PCI peer-to-peer scatterlist that was 918 - * mapped with pci_p2pdma_map_sg() 919 - * @dev: device doing the DMA request 920 - * @sg: scatter list to map 921 - * @nents: number of elements returned by pci_p2pdma_map_sg() 922 - * @dir: DMA direction 923 - * @attrs: DMA attributes passed to dma_unmap_sg() (if called) 924 - */ 925 - void pci_p2pdma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, 926 - int nents, enum dma_data_direction dir, unsigned long attrs) 927 - { 928 - enum pci_p2pdma_map_type map_type; 929 - 930 - map_type = pci_p2pdma_map_type(sg_page(sg)->pgmap, dev); 931 - 932 - if (map_type == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE) 933 - dma_unmap_sg_attrs(dev, sg, nents, dir, attrs); 934 - } 935 - EXPORT_SYMBOL_GPL(pci_p2pdma_unmap_sg_attrs); 936 915 937 916 /** 938 917 * pci_p2pdma_enable_store - parse a configfs/sysfs attribute store
+5
drivers/scsi/hosts.c
··· 236 236 237 237 shost->dma_dev = dma_dev; 238 238 239 + if (dma_dev->dma_mask) { 240 + shost->max_sectors = min_t(unsigned int, shost->max_sectors, 241 + dma_max_mapping_size(dma_dev) >> SECTOR_SHIFT); 242 + } 243 + 239 244 error = scsi_mq_setup_tags(shost); 240 245 if (error) 241 246 goto fail;
-4
drivers/scsi/scsi_lib.c
··· 1876 1876 blk_queue_max_integrity_segments(q, shost->sg_prot_tablesize); 1877 1877 } 1878 1878 1879 - if (dev->dma_mask) { 1880 - shost->max_sectors = min_t(unsigned int, shost->max_sectors, 1881 - dma_max_mapping_size(dev) >> SECTOR_SHIFT); 1882 - } 1883 1879 blk_queue_max_hw_sectors(q, shost->max_sectors); 1884 1880 blk_queue_segment_boundary(q, shost->dma_boundary); 1885 1881 dma_set_seg_boundary(dev, shost->dma_boundary);
+6
drivers/scsi/scsi_transport_sas.c
··· 225 225 { 226 226 struct Scsi_Host *shost = dev_to_shost(dev); 227 227 struct sas_host_attrs *sas_host = to_sas_host_attrs(shost); 228 + struct device *dma_dev = shost->dma_dev; 228 229 229 230 INIT_LIST_HEAD(&sas_host->rphy_list); 230 231 mutex_init(&sas_host->lock); ··· 236 235 if (sas_bsg_initialize(shost, NULL)) 237 236 dev_printk(KERN_ERR, dev, "fail to a bsg device %d\n", 238 237 shost->host_no); 238 + 239 + if (dma_dev->dma_mask) { 240 + shost->opt_sectors = min_t(unsigned int, shost->max_sectors, 241 + dma_opt_mapping_size(dma_dev) >> SECTOR_SHIFT); 242 + } 239 243 240 244 return 0; 241 245 }
+7
drivers/scsi/sd.c
··· 3297 3297 (sector_t)BLK_DEF_MAX_SECTORS); 3298 3298 } 3299 3299 3300 + /* 3301 + * Limit default to SCSI host optimal sector limit if set. There may be 3302 + * an impact on performance for when the size of a request exceeds this 3303 + * host limit. 3304 + */ 3305 + rw_max = min_not_zero(rw_max, sdp->host->opt_sectors); 3306 + 3300 3307 /* Do not exceed controller limit */ 3301 3308 rw_max = min(rw_max, queue_max_hw_sectors(q)); 3302 3309
+14 -3
drivers/usb/core/hcd.c
··· 1251 1251 EXPORT_SYMBOL_GPL(usb_hcd_unlink_urb_from_ep); 1252 1252 1253 1253 /* 1254 - * Some usb host controllers can only perform dma using a small SRAM area. 1254 + * Some usb host controllers can only perform dma using a small SRAM area, 1255 + * or have restrictions on addressable DRAM. 1255 1256 * The usb core itself is however optimized for host controllers that can dma 1256 1257 * using regular system memory - like pci devices doing bus mastering. 1257 1258 * ··· 3128 3127 if (IS_ERR(hcd->localmem_pool)) 3129 3128 return PTR_ERR(hcd->localmem_pool); 3130 3129 3131 - local_mem = devm_memremap(hcd->self.sysdev, phys_addr, 3132 - size, MEMREMAP_WC); 3130 + /* 3131 + * if a physical SRAM address was passed, map it, otherwise 3132 + * allocate system memory as a buffer. 3133 + */ 3134 + if (phys_addr) 3135 + local_mem = devm_memremap(hcd->self.sysdev, phys_addr, 3136 + size, MEMREMAP_WC); 3137 + else 3138 + local_mem = dmam_alloc_attrs(hcd->self.sysdev, size, &dma, 3139 + GFP_KERNEL, 3140 + DMA_ATTR_WRITE_COMBINE); 3141 + 3133 3142 if (IS_ERR(local_mem)) 3134 3143 return PTR_ERR(local_mem); 3135 3144
+25
drivers/usb/host/ohci-sa1111.c
··· 203 203 goto err1; 204 204 } 205 205 206 + /* 207 + * According to the "Intel StrongARM SA-1111 Microprocessor Companion 208 + * Chip Specification Update" (June 2000), erratum #7, there is a 209 + * significant bug in the SA1111 SDRAM shared memory controller. If 210 + * an access to a region of memory above 1MB relative to the bank base, 211 + * it is important that address bit 10 _NOT_ be asserted. Depending 212 + * on the configuration of the RAM, bit 10 may correspond to one 213 + * of several different (processor-relative) address bits. 214 + * 215 + * Section 4.6 of the "Intel StrongARM SA-1111 Development Module 216 + * User's Guide" mentions that jumpers R51 and R52 control the 217 + * target of SA-1111 DMA (either SDRAM bank 0 on Assabet, or 218 + * SDRAM bank 1 on Neponset). The default configuration selects 219 + * Assabet, so any address in bank 1 is necessarily invalid. 220 + * 221 + * As a workaround, use a bounce buffer in addressable memory 222 + * as local_mem, relying on ZONE_DMA to provide an area that 223 + * fits within the above constraints. 224 + * 225 + * SZ_64K is an estimate for what size this might need. 226 + */ 227 + ret = usb_hcd_setup_local_mem(hcd, 0, 0, SZ_64K); 228 + if (ret) 229 + goto err1; 230 + 206 231 if (!request_mem_region(hcd->rsrc_start, hcd->rsrc_len, hcd_name)) { 207 232 dev_dbg(&dev->dev, "request_mem_region failed\n"); 208 233 ret = -EBUSY;
+64
include/linux/dma-map-ops.h
··· 11 11 12 12 struct cma; 13 13 14 + /* 15 + * Values for struct dma_map_ops.flags: 16 + * 17 + * DMA_F_PCI_P2PDMA_SUPPORTED: Indicates the dma_map_ops implementation can 18 + * handle PCI P2PDMA pages in the map_sg/unmap_sg operation. 19 + */ 20 + #define DMA_F_PCI_P2PDMA_SUPPORTED (1 << 0) 21 + 14 22 struct dma_map_ops { 23 + unsigned int flags; 24 + 15 25 void *(*alloc)(struct device *dev, size_t size, 16 26 dma_addr_t *dma_handle, gfp_t gfp, 17 27 unsigned long attrs); ··· 79 69 int (*dma_supported)(struct device *dev, u64 mask); 80 70 u64 (*get_required_mask)(struct device *dev); 81 71 size_t (*max_mapping_size)(struct device *dev); 72 + size_t (*opt_mapping_size)(void); 82 73 unsigned long (*get_merge_boundary)(struct device *dev); 83 74 }; 84 75 ··· 389 378 #endif /* CONFIG_DMA_API_DEBUG */ 390 379 391 380 extern const struct dma_map_ops dma_dummy_ops; 381 + 382 + enum pci_p2pdma_map_type { 383 + /* 384 + * PCI_P2PDMA_MAP_UNKNOWN: Used internally for indicating the mapping 385 + * type hasn't been calculated yet. Functions that return this enum 386 + * never return this value. 387 + */ 388 + PCI_P2PDMA_MAP_UNKNOWN = 0, 389 + 390 + /* 391 + * PCI_P2PDMA_MAP_NOT_SUPPORTED: Indicates the transaction will 392 + * traverse the host bridge and the host bridge is not in the 393 + * allowlist. DMA Mapping routines should return an error when 394 + * this is returned. 395 + */ 396 + PCI_P2PDMA_MAP_NOT_SUPPORTED, 397 + 398 + /* 399 + * PCI_P2PDMA_BUS_ADDR: Indicates that two devices can talk to 400 + * each other directly through a PCI switch and the transaction will 401 + * not traverse the host bridge. Such a mapping should program 402 + * the DMA engine with PCI bus addresses. 403 + */ 404 + PCI_P2PDMA_MAP_BUS_ADDR, 405 + 406 + /* 407 + * PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: Indicates two devices can talk 408 + * to each other, but the transaction traverses a host bridge on the 409 + * allowlist. In this case, a normal mapping either with CPU physical 410 + * addresses (in the case of dma-direct) or IOVA addresses (in the 411 + * case of IOMMUs) should be used to program the DMA engine. 412 + */ 413 + PCI_P2PDMA_MAP_THRU_HOST_BRIDGE, 414 + }; 415 + 416 + struct pci_p2pdma_map_state { 417 + struct dev_pagemap *pgmap; 418 + int map; 419 + u64 bus_off; 420 + }; 421 + 422 + #ifdef CONFIG_PCI_P2PDMA 423 + enum pci_p2pdma_map_type 424 + pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev, 425 + struct scatterlist *sg); 426 + #else /* CONFIG_PCI_P2PDMA */ 427 + static inline enum pci_p2pdma_map_type 428 + pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev, 429 + struct scatterlist *sg) 430 + { 431 + return PCI_P2PDMA_MAP_NOT_SUPPORTED; 432 + } 433 + #endif /* CONFIG_PCI_P2PDMA */ 392 434 393 435 #endif /* _LINUX_DMA_MAP_OPS_H */
+10
include/linux/dma-mapping.h
··· 140 140 unsigned long attrs); 141 141 bool dma_can_mmap(struct device *dev); 142 142 int dma_supported(struct device *dev, u64 mask); 143 + bool dma_pci_p2pdma_supported(struct device *dev); 143 144 int dma_set_mask(struct device *dev, u64 mask); 144 145 int dma_set_coherent_mask(struct device *dev, u64 mask); 145 146 u64 dma_get_required_mask(struct device *dev); 146 147 size_t dma_max_mapping_size(struct device *dev); 148 + size_t dma_opt_mapping_size(struct device *dev); 147 149 bool dma_need_sync(struct device *dev, dma_addr_t dma_addr); 148 150 unsigned long dma_get_merge_boundary(struct device *dev); 149 151 struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size, ··· 252 250 { 253 251 return 0; 254 252 } 253 + static inline bool dma_pci_p2pdma_supported(struct device *dev) 254 + { 255 + return false; 256 + } 255 257 static inline int dma_set_mask(struct device *dev, u64 mask) 256 258 { 257 259 return -EIO; ··· 269 263 return 0; 270 264 } 271 265 static inline size_t dma_max_mapping_size(struct device *dev) 266 + { 267 + return 0; 268 + } 269 + static inline size_t dma_opt_mapping_size(struct device *dev) 272 270 { 273 271 return 0; 274 272 }
+2
include/linux/iova.h
··· 79 79 int iova_cache_get(void); 80 80 void iova_cache_put(void); 81 81 82 + unsigned long iova_rcache_range(void); 83 + 82 84 void free_iova(struct iova_domain *iovad, unsigned long pfn); 83 85 void __free_iova(struct iova_domain *iovad, struct iova *iova); 84 86 struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size,
-27
include/linux/pci-p2pdma.h
··· 30 30 unsigned int *nents, u32 length); 31 31 void pci_p2pmem_free_sgl(struct pci_dev *pdev, struct scatterlist *sgl); 32 32 void pci_p2pmem_publish(struct pci_dev *pdev, bool publish); 33 - int pci_p2pdma_map_sg_attrs(struct device *dev, struct scatterlist *sg, 34 - int nents, enum dma_data_direction dir, unsigned long attrs); 35 - void pci_p2pdma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, 36 - int nents, enum dma_data_direction dir, unsigned long attrs); 37 33 int pci_p2pdma_enable_store(const char *page, struct pci_dev **p2p_dev, 38 34 bool *use_p2pdma); 39 35 ssize_t pci_p2pdma_enable_show(char *page, struct pci_dev *p2p_dev, ··· 79 83 static inline void pci_p2pmem_publish(struct pci_dev *pdev, bool publish) 80 84 { 81 85 } 82 - static inline int pci_p2pdma_map_sg_attrs(struct device *dev, 83 - struct scatterlist *sg, int nents, enum dma_data_direction dir, 84 - unsigned long attrs) 85 - { 86 - return 0; 87 - } 88 - static inline void pci_p2pdma_unmap_sg_attrs(struct device *dev, 89 - struct scatterlist *sg, int nents, enum dma_data_direction dir, 90 - unsigned long attrs) 91 - { 92 - } 93 86 static inline int pci_p2pdma_enable_store(const char *page, 94 87 struct pci_dev **p2p_dev, bool *use_p2pdma) 95 88 { ··· 102 117 static inline struct pci_dev *pci_p2pmem_find(struct device *client) 103 118 { 104 119 return pci_p2pmem_find_many(&client, 1); 105 - } 106 - 107 - static inline int pci_p2pdma_map_sg(struct device *dev, struct scatterlist *sg, 108 - int nents, enum dma_data_direction dir) 109 - { 110 - return pci_p2pdma_map_sg_attrs(dev, sg, nents, dir, 0); 111 - } 112 - 113 - static inline void pci_p2pdma_unmap_sg(struct device *dev, 114 - struct scatterlist *sg, int nents, enum dma_data_direction dir) 115 - { 116 - pci_p2pdma_unmap_sg_attrs(dev, sg, nents, dir, 0); 117 120 } 118 121 119 122 #endif /* _LINUX_PCI_P2P_H */
+69
include/linux/scatterlist.h
··· 16 16 #ifdef CONFIG_NEED_SG_DMA_LENGTH 17 17 unsigned int dma_length; 18 18 #endif 19 + #ifdef CONFIG_PCI_P2PDMA 20 + unsigned int dma_flags; 21 + #endif 19 22 }; 20 23 21 24 /* ··· 247 244 { 248 245 sg->page_link &= ~SG_END; 249 246 } 247 + 248 + /* 249 + * CONFGI_PCI_P2PDMA depends on CONFIG_64BIT which means there is 4 bytes 250 + * in struct scatterlist (assuming also CONFIG_NEED_SG_DMA_LENGTH is set). 251 + * Use this padding for DMA flags bits to indicate when a specific 252 + * dma address is a bus address. 253 + */ 254 + #ifdef CONFIG_PCI_P2PDMA 255 + 256 + #define SG_DMA_BUS_ADDRESS (1 << 0) 257 + 258 + /** 259 + * sg_dma_is_bus address - Return whether a given segment was marked 260 + * as a bus address 261 + * @sg: SG entry 262 + * 263 + * Description: 264 + * Returns true if sg_dma_mark_bus_address() has been called on 265 + * this segment. 266 + **/ 267 + static inline bool sg_is_dma_bus_address(struct scatterlist *sg) 268 + { 269 + return sg->dma_flags & SG_DMA_BUS_ADDRESS; 270 + } 271 + 272 + /** 273 + * sg_dma_mark_bus address - Mark the scatterlist entry as a bus address 274 + * @sg: SG entry 275 + * 276 + * Description: 277 + * Marks the passed in sg entry to indicate that the dma_address is 278 + * a bus address and doesn't need to be unmapped. This should only be 279 + * used by dma_map_sg() implementations to mark bus addresses 280 + * so they can be properly cleaned up in dma_unmap_sg(). 281 + **/ 282 + static inline void sg_dma_mark_bus_address(struct scatterlist *sg) 283 + { 284 + sg->dma_flags |= SG_DMA_BUS_ADDRESS; 285 + } 286 + 287 + /** 288 + * sg_unmark_bus_address - Unmark the scatterlist entry as a bus address 289 + * @sg: SG entry 290 + * 291 + * Description: 292 + * Clears the bus address mark. 293 + **/ 294 + static inline void sg_dma_unmark_bus_address(struct scatterlist *sg) 295 + { 296 + sg->dma_flags &= ~SG_DMA_BUS_ADDRESS; 297 + } 298 + 299 + #else 300 + 301 + static inline bool sg_is_dma_bus_address(struct scatterlist *sg) 302 + { 303 + return false; 304 + } 305 + static inline void sg_dma_mark_bus_address(struct scatterlist *sg) 306 + { 307 + } 308 + static inline void sg_dma_unmark_bus_address(struct scatterlist *sg) 309 + { 310 + } 311 + 312 + #endif 250 313 251 314 /** 252 315 * sg_phys - Return physical address of an sg entry
+6 -11
include/linux/swiotlb.h
··· 60 60 size_t size, enum dma_data_direction dir, unsigned long attrs); 61 61 62 62 #ifdef CONFIG_SWIOTLB 63 - extern enum swiotlb_force swiotlb_force; 64 63 65 64 /** 66 65 * struct io_tlb_mem - IO TLB Memory Pool Descriptor ··· 79 80 * @used: The number of used IO TLB block. 80 81 * @list: The free list describing the number of free entries available 81 82 * from each index. 82 - * @index: The index to start searching in the next round. 83 83 * @orig_addr: The original address corresponding to a mapped entry. 84 84 * @alloc_size: Size of the allocated buffer. 85 - * @lock: The lock to protect the above data structures in the map and 86 - * unmap calls. 87 85 * @debugfs: The dentry to debugfs. 88 86 * @late_alloc: %true if allocated using the page allocator 89 87 * @force_bounce: %true if swiotlb bouncing is forced 90 88 * @for_alloc: %true if the pool is used for memory allocation 89 + * @nareas: The area number in the pool. 90 + * @area_nslabs: The slot number in the area. 91 91 */ 92 92 struct io_tlb_mem { 93 93 phys_addr_t start; ··· 94 96 void *vaddr; 95 97 unsigned long nslabs; 96 98 unsigned long used; 97 - unsigned int index; 98 - spinlock_t lock; 99 99 struct dentry *debugfs; 100 100 bool late_alloc; 101 101 bool force_bounce; 102 102 bool for_alloc; 103 - struct io_tlb_slot { 104 - phys_addr_t orig_addr; 105 - size_t alloc_size; 106 - unsigned int list; 107 - } *slots; 103 + unsigned int nareas; 104 + unsigned int area_nslabs; 105 + struct io_tlb_area *areas; 106 + struct io_tlb_slot *slots; 108 107 }; 109 108 extern struct io_tlb_mem io_tlb_default_mem; 110 109
+11
include/rdma/ib_verbs.h
··· 4013 4013 return IS_ENABLED(CONFIG_INFINIBAND_VIRT_DMA) && !dev->dma_device; 4014 4014 } 4015 4015 4016 + /* 4017 + * Check if a IB device's underlying DMA mapping supports P2PDMA transfers. 4018 + */ 4019 + static inline bool ib_dma_pci_p2p_dma_supported(struct ib_device *dev) 4020 + { 4021 + if (ib_uses_virt_dma(dev)) 4022 + return false; 4023 + 4024 + return dma_pci_p2pdma_supported(dev->dma_device); 4025 + } 4026 + 4016 4027 /** 4017 4028 * ib_dma_mapping_error - check a DMA addr for error 4018 4029 * @dev: The device for which the dma_addr was created
+1
include/scsi/scsi_host.h
··· 607 607 short unsigned int sg_tablesize; 608 608 short unsigned int sg_prot_tablesize; 609 609 unsigned int max_sectors; 610 + unsigned int opt_sectors; 610 611 unsigned int max_segment_size; 611 612 unsigned long dma_boundary; 612 613 unsigned long virt_boundary_mask;
+37 -6
kernel/dma/direct.c
··· 453 453 arch_sync_dma_for_cpu_all(); 454 454 } 455 455 456 + /* 457 + * Unmaps segments, except for ones marked as pci_p2pdma which do not 458 + * require any further action as they contain a bus address. 459 + */ 456 460 void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, 457 461 int nents, enum dma_data_direction dir, unsigned long attrs) 458 462 { 459 463 struct scatterlist *sg; 460 464 int i; 461 465 462 - for_each_sg(sgl, sg, nents, i) 463 - dma_direct_unmap_page(dev, sg->dma_address, sg_dma_len(sg), dir, 464 - attrs); 466 + for_each_sg(sgl, sg, nents, i) { 467 + if (sg_is_dma_bus_address(sg)) 468 + sg_dma_unmark_bus_address(sg); 469 + else 470 + dma_direct_unmap_page(dev, sg->dma_address, 471 + sg_dma_len(sg), dir, attrs); 472 + } 465 473 } 466 474 #endif 467 475 468 476 int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, 469 477 enum dma_data_direction dir, unsigned long attrs) 470 478 { 471 - int i; 479 + struct pci_p2pdma_map_state p2pdma_state = {}; 480 + enum pci_p2pdma_map_type map; 472 481 struct scatterlist *sg; 482 + int i, ret; 473 483 474 484 for_each_sg(sgl, sg, nents, i) { 485 + if (is_pci_p2pdma_page(sg_page(sg))) { 486 + map = pci_p2pdma_map_segment(&p2pdma_state, dev, sg); 487 + switch (map) { 488 + case PCI_P2PDMA_MAP_BUS_ADDR: 489 + continue; 490 + case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: 491 + /* 492 + * Any P2P mapping that traverses the PCI 493 + * host bridge must be mapped with CPU physical 494 + * address and not PCI bus addresses. This is 495 + * done with dma_direct_map_page() below. 496 + */ 497 + break; 498 + default: 499 + ret = -EREMOTEIO; 500 + goto out_unmap; 501 + } 502 + } 503 + 475 504 sg->dma_address = dma_direct_map_page(dev, sg_page(sg), 476 505 sg->offset, sg->length, dir, attrs); 477 - if (sg->dma_address == DMA_MAPPING_ERROR) 506 + if (sg->dma_address == DMA_MAPPING_ERROR) { 507 + ret = -EIO; 478 508 goto out_unmap; 509 + } 479 510 sg_dma_len(sg) = sg->length; 480 511 } 481 512 ··· 514 483 515 484 out_unmap: 516 485 dma_direct_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); 517 - return -EIO; 486 + return ret; 518 487 } 519 488 520 489 dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr,
+7 -1
kernel/dma/direct.h
··· 8 8 #define _KERNEL_DMA_DIRECT_H 9 9 10 10 #include <linux/dma-direct.h> 11 + #include <linux/memremap.h> 11 12 12 13 int dma_direct_get_sgtable(struct device *dev, struct sg_table *sgt, 13 14 void *cpu_addr, dma_addr_t dma_addr, size_t size, ··· 88 87 phys_addr_t phys = page_to_phys(page) + offset; 89 88 dma_addr_t dma_addr = phys_to_dma(dev, phys); 90 89 91 - if (is_swiotlb_force_bounce(dev)) 90 + if (is_swiotlb_force_bounce(dev)) { 91 + if (is_pci_p2pdma_page(page)) 92 + return DMA_MAPPING_ERROR; 92 93 return swiotlb_map(dev, phys, size, dir, attrs); 94 + } 93 95 94 96 if (unlikely(!dma_capable(dev, dma_addr, size, true))) { 97 + if (is_pci_p2pdma_page(page)) 98 + return DMA_MAPPING_ERROR; 95 99 if (is_swiotlb_active(dev)) 96 100 return swiotlb_map(dev, phys, size, dir, attrs); 97 101
+40 -7
kernel/dma/mapping.c
··· 197 197 if (ents > 0) 198 198 debug_dma_map_sg(dev, sg, nents, ents, dir, attrs); 199 199 else if (WARN_ON_ONCE(ents != -EINVAL && ents != -ENOMEM && 200 - ents != -EIO)) 200 + ents != -EIO && ents != -EREMOTEIO)) 201 201 return -EIO; 202 202 203 203 return ents; ··· 249 249 * Returns 0 on success or a negative error code on error. The following 250 250 * error codes are supported with the given meaning: 251 251 * 252 - * -EINVAL An invalid argument, unaligned access or other error 253 - * in usage. Will not succeed if retried. 254 - * -ENOMEM Insufficient resources (like memory or IOVA space) to 255 - * complete the mapping. Should succeed if retried later. 256 - * -EIO Legacy error code with an unknown meaning. eg. this is 257 - * returned if a lower level call returned DMA_MAPPING_ERROR. 252 + * -EINVAL An invalid argument, unaligned access or other error 253 + * in usage. Will not succeed if retried. 254 + * -ENOMEM Insufficient resources (like memory or IOVA space) to 255 + * complete the mapping. Should succeed if retried later. 256 + * -EIO Legacy error code with an unknown meaning. eg. this is 257 + * returned if a lower level call returned 258 + * DMA_MAPPING_ERROR. 259 + * -EREMOTEIO The DMA device cannot access P2PDMA memory specified 260 + * in the sg_table. This will not succeed if retried. 258 261 */ 259 262 int dma_map_sgtable(struct device *dev, struct sg_table *sgt, 260 263 enum dma_data_direction dir, unsigned long attrs) ··· 723 720 } 724 721 EXPORT_SYMBOL(dma_supported); 725 722 723 + bool dma_pci_p2pdma_supported(struct device *dev) 724 + { 725 + const struct dma_map_ops *ops = get_dma_ops(dev); 726 + 727 + /* if ops is not set, dma direct will be used which supports P2PDMA */ 728 + if (!ops) 729 + return true; 730 + 731 + /* 732 + * Note: dma_ops_bypass is not checked here because P2PDMA should 733 + * not be used with dma mapping ops that do not have support even 734 + * if the specific device is bypassing them. 735 + */ 736 + 737 + return ops->flags & DMA_F_PCI_P2PDMA_SUPPORTED; 738 + } 739 + EXPORT_SYMBOL_GPL(dma_pci_p2pdma_supported); 740 + 726 741 #ifdef CONFIG_ARCH_HAS_DMA_SET_MASK 727 742 void arch_dma_set_mask(struct device *dev, u64 mask); 728 743 #else ··· 793 772 return size; 794 773 } 795 774 EXPORT_SYMBOL_GPL(dma_max_mapping_size); 775 + 776 + size_t dma_opt_mapping_size(struct device *dev) 777 + { 778 + const struct dma_map_ops *ops = get_dma_ops(dev); 779 + size_t size = SIZE_MAX; 780 + 781 + if (ops && ops->opt_mapping_size) 782 + size = ops->opt_mapping_size(); 783 + 784 + return min(dma_max_mapping_size(dev), size); 785 + } 786 + EXPORT_SYMBOL_GPL(dma_opt_mapping_size); 796 787 797 788 bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) 798 789 {
+219 -44
kernel/dma/swiotlb.c
··· 62 62 63 63 #define INVALID_PHYS_ADDR (~(phys_addr_t)0) 64 64 65 + struct io_tlb_slot { 66 + phys_addr_t orig_addr; 67 + size_t alloc_size; 68 + unsigned int list; 69 + }; 70 + 65 71 static bool swiotlb_force_bounce; 66 72 static bool swiotlb_force_disable; 67 73 ··· 76 70 phys_addr_t swiotlb_unencrypted_base; 77 71 78 72 static unsigned long default_nslabs = IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT; 73 + static unsigned long default_nareas; 74 + 75 + /** 76 + * struct io_tlb_area - IO TLB memory area descriptor 77 + * 78 + * This is a single area with a single lock. 79 + * 80 + * @used: The number of used IO TLB block. 81 + * @index: The slot index to start searching in this area for next round. 82 + * @lock: The lock to protect the above data structures in the map and 83 + * unmap calls. 84 + */ 85 + struct io_tlb_area { 86 + unsigned long used; 87 + unsigned int index; 88 + spinlock_t lock; 89 + }; 90 + 91 + /* 92 + * Round up number of slabs to the next power of 2. The last area is going 93 + * be smaller than the rest if default_nslabs is not power of two. 94 + * The number of slot in an area should be a multiple of IO_TLB_SEGSIZE, 95 + * otherwise a segment may span two or more areas. It conflicts with free 96 + * contiguous slots tracking: free slots are treated contiguous no matter 97 + * whether they cross an area boundary. 98 + * 99 + * Return true if default_nslabs is rounded up. 100 + */ 101 + static bool round_up_default_nslabs(void) 102 + { 103 + if (!default_nareas) 104 + return false; 105 + 106 + if (default_nslabs < IO_TLB_SEGSIZE * default_nareas) 107 + default_nslabs = IO_TLB_SEGSIZE * default_nareas; 108 + else if (is_power_of_2(default_nslabs)) 109 + return false; 110 + default_nslabs = roundup_pow_of_two(default_nslabs); 111 + return true; 112 + } 113 + 114 + static void swiotlb_adjust_nareas(unsigned int nareas) 115 + { 116 + /* use a single area when non is specified */ 117 + if (!nareas) 118 + nareas = 1; 119 + else if (!is_power_of_2(nareas)) 120 + nareas = roundup_pow_of_two(nareas); 121 + 122 + default_nareas = nareas; 123 + 124 + pr_info("area num %d.\n", nareas); 125 + if (round_up_default_nslabs()) 126 + pr_info("SWIOTLB bounce buffer size roundup to %luMB", 127 + (default_nslabs << IO_TLB_SHIFT) >> 20); 128 + } 79 129 80 130 static int __init 81 131 setup_io_tlb_npages(char *str) ··· 141 79 default_nslabs = 142 80 ALIGN(simple_strtoul(str, &str, 0), IO_TLB_SEGSIZE); 143 81 } 82 + if (*str == ',') 83 + ++str; 84 + if (isdigit(*str)) 85 + swiotlb_adjust_nareas(simple_strtoul(str, &str, 0)); 144 86 if (*str == ',') 145 87 ++str; 146 88 if (!strcmp(str, "force")) ··· 178 112 */ 179 113 if (default_nslabs != IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT) 180 114 return; 115 + 181 116 size = ALIGN(size, IO_TLB_SIZE); 182 117 default_nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE); 118 + if (round_up_default_nslabs()) 119 + size = default_nslabs << IO_TLB_SHIFT; 183 120 pr_info("SWIOTLB bounce buffer size adjusted to %luMB", size >> 20); 184 121 } 185 122 ··· 261 192 } 262 193 263 194 static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start, 264 - unsigned long nslabs, unsigned int flags, bool late_alloc) 195 + unsigned long nslabs, unsigned int flags, 196 + bool late_alloc, unsigned int nareas) 265 197 { 266 198 void *vaddr = phys_to_virt(start); 267 199 unsigned long bytes = nslabs << IO_TLB_SHIFT, i; ··· 270 200 mem->nslabs = nslabs; 271 201 mem->start = start; 272 202 mem->end = mem->start + bytes; 273 - mem->index = 0; 274 203 mem->late_alloc = late_alloc; 204 + mem->nareas = nareas; 205 + mem->area_nslabs = nslabs / mem->nareas; 275 206 276 207 mem->force_bounce = swiotlb_force_bounce || (flags & SWIOTLB_FORCE); 277 208 278 - spin_lock_init(&mem->lock); 209 + for (i = 0; i < mem->nareas; i++) { 210 + spin_lock_init(&mem->areas[i].lock); 211 + mem->areas[i].index = 0; 212 + mem->areas[i].used = 0; 213 + } 214 + 279 215 for (i = 0; i < mem->nslabs; i++) { 280 216 mem->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i); 281 217 mem->slots[i].orig_addr = INVALID_PHYS_ADDR; ··· 308 232 int (*remap)(void *tlb, unsigned long nslabs)) 309 233 { 310 234 struct io_tlb_mem *mem = &io_tlb_default_mem; 311 - unsigned long nslabs = default_nslabs; 235 + unsigned long nslabs; 312 236 size_t alloc_size; 313 237 size_t bytes; 314 238 void *tlb; ··· 317 241 return; 318 242 if (swiotlb_force_disable) 319 243 return; 244 + 245 + /* 246 + * default_nslabs maybe changed when adjust area number. 247 + * So allocate bounce buffer after adjusting area number. 248 + */ 249 + if (!default_nareas) 250 + swiotlb_adjust_nareas(num_possible_cpus()); 251 + 252 + nslabs = default_nslabs; 253 + if (nslabs < IO_TLB_MIN_SLABS) 254 + panic("%s: nslabs = %lu too small\n", __func__, nslabs); 320 255 321 256 /* 322 257 * By default allocate the bounce buffer memory from low memory, but ··· 341 254 else 342 255 tlb = memblock_alloc_low(bytes, PAGE_SIZE); 343 256 if (!tlb) { 344 - pr_warn("%s: failed to allocate tlb structure\n", __func__); 257 + pr_warn("%s: Failed to allocate %zu bytes tlb structure\n", 258 + __func__, bytes); 345 259 return; 346 260 } 347 261 ··· 362 274 panic("%s: Failed to allocate %zu bytes align=0x%lx\n", 363 275 __func__, alloc_size, PAGE_SIZE); 364 276 365 - swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, flags, false); 277 + mem->areas = memblock_alloc(array_size(sizeof(struct io_tlb_area), 278 + default_nareas), SMP_CACHE_BYTES); 279 + if (!mem->areas) 280 + panic("%s: Failed to allocate mem->areas.\n", __func__); 281 + 282 + swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, flags, false, 283 + default_nareas); 366 284 367 285 if (flags & SWIOTLB_VERBOSE) 368 286 swiotlb_print_info(); ··· 376 282 377 283 void __init swiotlb_init(bool addressing_limit, unsigned int flags) 378 284 { 379 - return swiotlb_init_remap(addressing_limit, flags, NULL); 285 + swiotlb_init_remap(addressing_limit, flags, NULL); 380 286 } 381 287 382 288 /* ··· 390 296 struct io_tlb_mem *mem = &io_tlb_default_mem; 391 297 unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE); 392 298 unsigned char *vstart = NULL; 393 - unsigned int order; 299 + unsigned int order, area_order; 394 300 bool retried = false; 395 301 int rc = 0; 396 302 ··· 431 337 (PAGE_SIZE << order) >> 20); 432 338 } 433 339 340 + if (!default_nareas) 341 + swiotlb_adjust_nareas(num_possible_cpus()); 342 + 343 + area_order = get_order(array_size(sizeof(*mem->areas), 344 + default_nareas)); 345 + mem->areas = (struct io_tlb_area *) 346 + __get_free_pages(GFP_KERNEL | __GFP_ZERO, area_order); 347 + if (!mem->areas) 348 + goto error_area; 349 + 434 350 mem->slots = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 435 351 get_order(array_size(sizeof(*mem->slots), nslabs))); 436 - if (!mem->slots) { 437 - free_pages((unsigned long)vstart, order); 438 - return -ENOMEM; 439 - } 352 + if (!mem->slots) 353 + goto error_slots; 440 354 441 355 set_memory_decrypted((unsigned long)vstart, 442 356 (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT); 443 - swiotlb_init_io_tlb_mem(mem, virt_to_phys(vstart), nslabs, 0, true); 357 + swiotlb_init_io_tlb_mem(mem, virt_to_phys(vstart), nslabs, 0, true, 358 + default_nareas); 444 359 445 360 swiotlb_print_info(); 446 361 return 0; 362 + 363 + error_slots: 364 + free_pages((unsigned long)mem->areas, area_order); 365 + error_area: 366 + free_pages((unsigned long)vstart, order); 367 + return -ENOMEM; 447 368 } 448 369 449 370 void __init swiotlb_exit(void) ··· 466 357 struct io_tlb_mem *mem = &io_tlb_default_mem; 467 358 unsigned long tbl_vaddr; 468 359 size_t tbl_size, slots_size; 360 + unsigned int area_order; 469 361 470 362 if (swiotlb_force_bounce) 471 363 return; ··· 481 371 482 372 set_memory_encrypted(tbl_vaddr, tbl_size >> PAGE_SHIFT); 483 373 if (mem->late_alloc) { 374 + area_order = get_order(array_size(sizeof(*mem->areas), 375 + mem->nareas)); 376 + free_pages((unsigned long)mem->areas, area_order); 484 377 free_pages(tbl_vaddr, get_order(tbl_size)); 485 378 free_pages((unsigned long)mem->slots, get_order(slots_size)); 486 379 } else { 380 + memblock_free_late(__pa(mem->areas), 381 + array_size(sizeof(*mem->areas), mem->nareas)); 487 382 memblock_free_late(mem->start, tbl_size); 488 383 memblock_free_late(__pa(mem->slots), slots_size); 489 384 } ··· 591 476 return nr_slots(boundary_mask + 1); 592 477 } 593 478 594 - static unsigned int wrap_index(struct io_tlb_mem *mem, unsigned int index) 479 + static unsigned int wrap_area_index(struct io_tlb_mem *mem, unsigned int index) 595 480 { 596 - if (index >= mem->nslabs) 481 + if (index >= mem->area_nslabs) 597 482 return 0; 598 483 return index; 599 484 } ··· 602 487 * Find a suitable number of IO TLB entries size that will fit this request and 603 488 * allocate a buffer from that IO TLB pool. 604 489 */ 605 - static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, 606 - size_t alloc_size, unsigned int alloc_align_mask) 490 + static int swiotlb_do_find_slots(struct device *dev, int area_index, 491 + phys_addr_t orig_addr, size_t alloc_size, 492 + unsigned int alloc_align_mask) 607 493 { 608 494 struct io_tlb_mem *mem = dev->dma_io_tlb_mem; 495 + struct io_tlb_area *area = mem->areas + area_index; 609 496 unsigned long boundary_mask = dma_get_seg_boundary(dev); 610 497 dma_addr_t tbl_dma_addr = 611 498 phys_to_dma_unencrypted(dev, mem->start) & boundary_mask; ··· 618 501 unsigned int index, wrap, count = 0, i; 619 502 unsigned int offset = swiotlb_align_offset(dev, orig_addr); 620 503 unsigned long flags; 504 + unsigned int slot_base; 505 + unsigned int slot_index; 621 506 622 507 BUG_ON(!nslots); 508 + BUG_ON(area_index >= mem->nareas); 623 509 624 510 /* 625 511 * For mappings with an alignment requirement don't bother looping to ··· 634 514 stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT)); 635 515 stride = max(stride, (alloc_align_mask >> IO_TLB_SHIFT) + 1); 636 516 637 - spin_lock_irqsave(&mem->lock, flags); 638 - if (unlikely(nslots > mem->nslabs - mem->used)) 517 + spin_lock_irqsave(&area->lock, flags); 518 + if (unlikely(nslots > mem->area_nslabs - area->used)) 639 519 goto not_found; 640 520 641 - index = wrap = wrap_index(mem, ALIGN(mem->index, stride)); 521 + slot_base = area_index * mem->area_nslabs; 522 + index = wrap = wrap_area_index(mem, ALIGN(area->index, stride)); 523 + 642 524 do { 525 + slot_index = slot_base + index; 526 + 643 527 if (orig_addr && 644 - (slot_addr(tbl_dma_addr, index) & iotlb_align_mask) != 645 - (orig_addr & iotlb_align_mask)) { 646 - index = wrap_index(mem, index + 1); 528 + (slot_addr(tbl_dma_addr, slot_index) & 529 + iotlb_align_mask) != (orig_addr & iotlb_align_mask)) { 530 + index = wrap_area_index(mem, index + 1); 647 531 continue; 648 532 } 649 533 ··· 656 532 * contiguous buffers, we allocate the buffers from that slot 657 533 * and mark the entries as '0' indicating unavailable. 658 534 */ 659 - if (!iommu_is_span_boundary(index, nslots, 535 + if (!iommu_is_span_boundary(slot_index, nslots, 660 536 nr_slots(tbl_dma_addr), 661 537 max_slots)) { 662 - if (mem->slots[index].list >= nslots) 538 + if (mem->slots[slot_index].list >= nslots) 663 539 goto found; 664 540 } 665 - index = wrap_index(mem, index + stride); 541 + index = wrap_area_index(mem, index + stride); 666 542 } while (index != wrap); 667 543 668 544 not_found: 669 - spin_unlock_irqrestore(&mem->lock, flags); 545 + spin_unlock_irqrestore(&area->lock, flags); 670 546 return -1; 671 547 672 548 found: 673 - for (i = index; i < index + nslots; i++) { 549 + for (i = slot_index; i < slot_index + nslots; i++) { 674 550 mem->slots[i].list = 0; 675 - mem->slots[i].alloc_size = 676 - alloc_size - (offset + ((i - index) << IO_TLB_SHIFT)); 551 + mem->slots[i].alloc_size = alloc_size - (offset + 552 + ((i - slot_index) << IO_TLB_SHIFT)); 677 553 } 678 - for (i = index - 1; 554 + for (i = slot_index - 1; 679 555 io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && 680 556 mem->slots[i].list; i--) 681 557 mem->slots[i].list = ++count; ··· 683 559 /* 684 560 * Update the indices to avoid searching in the next round. 685 561 */ 686 - if (index + nslots < mem->nslabs) 687 - mem->index = index + nslots; 562 + if (index + nslots < mem->area_nslabs) 563 + area->index = index + nslots; 688 564 else 689 - mem->index = 0; 690 - mem->used += nslots; 565 + area->index = 0; 566 + area->used += nslots; 567 + spin_unlock_irqrestore(&area->lock, flags); 568 + return slot_index; 569 + } 691 570 692 - spin_unlock_irqrestore(&mem->lock, flags); 693 - return index; 571 + static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, 572 + size_t alloc_size, unsigned int alloc_align_mask) 573 + { 574 + struct io_tlb_mem *mem = dev->dma_io_tlb_mem; 575 + int start = raw_smp_processor_id() & (mem->nareas - 1); 576 + int i = start, index; 577 + 578 + do { 579 + index = swiotlb_do_find_slots(dev, i, orig_addr, alloc_size, 580 + alloc_align_mask); 581 + if (index >= 0) 582 + return index; 583 + if (++i >= mem->nareas) 584 + i = 0; 585 + } while (i != start); 586 + 587 + return -1; 588 + } 589 + 590 + static unsigned long mem_used(struct io_tlb_mem *mem) 591 + { 592 + int i; 593 + unsigned long used = 0; 594 + 595 + for (i = 0; i < mem->nareas; i++) 596 + used += mem->areas[i].used; 597 + return used; 694 598 } 695 599 696 600 phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, ··· 732 580 int index; 733 581 phys_addr_t tlb_addr; 734 582 735 - if (!mem) 583 + if (!mem || !mem->nslabs) 736 584 panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); 737 585 738 586 if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) ··· 750 598 if (!(attrs & DMA_ATTR_NO_WARN)) 751 599 dev_warn_ratelimited(dev, 752 600 "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n", 753 - alloc_size, mem->nslabs, mem->used); 601 + alloc_size, mem->nslabs, mem_used(mem)); 754 602 return (phys_addr_t)DMA_MAPPING_ERROR; 755 603 } 756 604 ··· 780 628 unsigned int offset = swiotlb_align_offset(dev, tlb_addr); 781 629 int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT; 782 630 int nslots = nr_slots(mem->slots[index].alloc_size + offset); 631 + int aindex = index / mem->area_nslabs; 632 + struct io_tlb_area *area = &mem->areas[aindex]; 783 633 int count, i; 784 634 785 635 /* ··· 790 636 * While returning the entries to the free list, we merge the entries 791 637 * with slots below and above the pool being returned. 792 638 */ 793 - spin_lock_irqsave(&mem->lock, flags); 639 + BUG_ON(aindex >= mem->nareas); 640 + 641 + spin_lock_irqsave(&area->lock, flags); 794 642 if (index + nslots < ALIGN(index + 1, IO_TLB_SEGSIZE)) 795 643 count = mem->slots[index + nslots].list; 796 644 else ··· 816 660 io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && mem->slots[i].list; 817 661 i--) 818 662 mem->slots[i].list = ++count; 819 - mem->used -= nslots; 820 - spin_unlock_irqrestore(&mem->lock, flags); 663 + area->used -= nslots; 664 + spin_unlock_irqrestore(&area->lock, flags); 821 665 } 822 666 823 667 /* ··· 912 756 } 913 757 EXPORT_SYMBOL_GPL(is_swiotlb_active); 914 758 759 + static int io_tlb_used_get(void *data, u64 *val) 760 + { 761 + *val = mem_used(&io_tlb_default_mem); 762 + return 0; 763 + } 764 + DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_used, io_tlb_used_get, NULL, "%llu\n"); 765 + 915 766 static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem, 916 767 const char *dirname) 917 768 { ··· 927 764 return; 928 765 929 766 debugfs_create_ulong("io_tlb_nslabs", 0400, mem->debugfs, &mem->nslabs); 930 - debugfs_create_ulong("io_tlb_used", 0400, mem->debugfs, &mem->used); 767 + debugfs_create_file("io_tlb_used", 0400, mem->debugfs, NULL, 768 + &fops_io_tlb_used); 931 769 } 932 770 933 771 static int __init __maybe_unused swiotlb_create_default_debugfs(void) ··· 979 815 struct io_tlb_mem *mem = rmem->priv; 980 816 unsigned long nslabs = rmem->size >> IO_TLB_SHIFT; 981 817 818 + /* Set Per-device io tlb area to one */ 819 + unsigned int nareas = 1; 820 + 982 821 /* 983 822 * Since multiple devices can share the same pool, the private data, 984 823 * io_tlb_mem struct, will be initialized by the first device attached ··· 998 831 return -ENOMEM; 999 832 } 1000 833 834 + mem->areas = kcalloc(nareas, sizeof(*mem->areas), 835 + GFP_KERNEL); 836 + if (!mem->areas) { 837 + kfree(mem->slots); 838 + kfree(mem); 839 + return -ENOMEM; 840 + } 841 + 1001 842 set_memory_decrypted((unsigned long)phys_to_virt(rmem->base), 1002 843 rmem->size >> PAGE_SHIFT); 1003 844 swiotlb_init_io_tlb_mem(mem, rmem->base, nslabs, SWIOTLB_FORCE, 1004 - false); 845 + false, nareas); 1005 846 mem->for_alloc = true; 1006 847 1007 848 rmem->priv = mem;