Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.26-rc3 857 lines 25 kB view raw
1/* 2 * Dynamic DMA mapping support. 3 * 4 * This implementation is a fallback for platforms that do not support 5 * I/O TLBs (aka DMA address translation hardware). 6 * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com> 7 * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com> 8 * Copyright (C) 2000, 2003 Hewlett-Packard Co 9 * David Mosberger-Tang <davidm@hpl.hp.com> 10 * 11 * 03/05/07 davidm Switch from PCI-DMA to generic device DMA API. 12 * 00/12/13 davidm Rename to swiotlb.c and add mark_clean() to avoid 13 * unnecessary i-cache flushing. 14 * 04/07/.. ak Better overflow handling. Assorted fixes. 15 * 05/09/10 linville Add support for syncing ranges, support syncing for 16 * DMA_BIDIRECTIONAL mappings, miscellaneous cleanup. 17 */ 18 19#include <linux/cache.h> 20#include <linux/dma-mapping.h> 21#include <linux/mm.h> 22#include <linux/module.h> 23#include <linux/spinlock.h> 24#include <linux/string.h> 25#include <linux/types.h> 26#include <linux/ctype.h> 27 28#include <asm/io.h> 29#include <asm/dma.h> 30#include <asm/scatterlist.h> 31 32#include <linux/init.h> 33#include <linux/bootmem.h> 34#include <linux/iommu-helper.h> 35 36#define OFFSET(val,align) ((unsigned long) \ 37 ( (val) & ( (align) - 1))) 38 39#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg))) 40#define SG_ENT_PHYS_ADDRESS(sg) virt_to_bus(SG_ENT_VIRT_ADDRESS(sg)) 41 42/* 43 * Maximum allowable number of contiguous slabs to map, 44 * must be a power of 2. What is the appropriate value ? 45 * The complexity of {map,unmap}_single is linearly dependent on this value. 46 */ 47#define IO_TLB_SEGSIZE 128 48 49/* 50 * log of the size of each IO TLB slab. The number of slabs is command line 51 * controllable. 52 */ 53#define IO_TLB_SHIFT 11 54 55#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT)) 56 57/* 58 * Minimum IO TLB size to bother booting with. Systems with mainly 59 * 64bit capable cards will only lightly use the swiotlb. If we can't 60 * allocate a contiguous 1MB, we're probably in trouble anyway. 61 */ 62#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) 63 64/* 65 * Enumeration for sync targets 66 */ 67enum dma_sync_target { 68 SYNC_FOR_CPU = 0, 69 SYNC_FOR_DEVICE = 1, 70}; 71 72int swiotlb_force; 73 74/* 75 * Used to do a quick range check in swiotlb_unmap_single and 76 * swiotlb_sync_single_*, to see if the memory was in fact allocated by this 77 * API. 78 */ 79static char *io_tlb_start, *io_tlb_end; 80 81/* 82 * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and 83 * io_tlb_end. This is command line adjustable via setup_io_tlb_npages. 84 */ 85static unsigned long io_tlb_nslabs; 86 87/* 88 * When the IOMMU overflows we return a fallback buffer. This sets the size. 89 */ 90static unsigned long io_tlb_overflow = 32*1024; 91 92void *io_tlb_overflow_buffer; 93 94/* 95 * This is a free list describing the number of free entries available from 96 * each index 97 */ 98static unsigned int *io_tlb_list; 99static unsigned int io_tlb_index; 100 101/* 102 * We need to save away the original address corresponding to a mapped entry 103 * for the sync operations. 104 */ 105static unsigned char **io_tlb_orig_addr; 106 107/* 108 * Protect the above data structures in the map and unmap calls 109 */ 110static DEFINE_SPINLOCK(io_tlb_lock); 111 112static int __init 113setup_io_tlb_npages(char *str) 114{ 115 if (isdigit(*str)) { 116 io_tlb_nslabs = simple_strtoul(str, &str, 0); 117 /* avoid tail segment of size < IO_TLB_SEGSIZE */ 118 io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); 119 } 120 if (*str == ',') 121 ++str; 122 if (!strcmp(str, "force")) 123 swiotlb_force = 1; 124 return 1; 125} 126__setup("swiotlb=", setup_io_tlb_npages); 127/* make io_tlb_overflow tunable too? */ 128 129/* 130 * Statically reserve bounce buffer space and initialize bounce buffer data 131 * structures for the software IO TLB used to implement the DMA API. 132 */ 133void __init 134swiotlb_init_with_default_size(size_t default_size) 135{ 136 unsigned long i, bytes; 137 138 if (!io_tlb_nslabs) { 139 io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); 140 io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); 141 } 142 143 bytes = io_tlb_nslabs << IO_TLB_SHIFT; 144 145 /* 146 * Get IO TLB memory from the low pages 147 */ 148 io_tlb_start = alloc_bootmem_low_pages(bytes); 149 if (!io_tlb_start) 150 panic("Cannot allocate SWIOTLB buffer"); 151 io_tlb_end = io_tlb_start + bytes; 152 153 /* 154 * Allocate and initialize the free list array. This array is used 155 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE 156 * between io_tlb_start and io_tlb_end. 157 */ 158 io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int)); 159 for (i = 0; i < io_tlb_nslabs; i++) 160 io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); 161 io_tlb_index = 0; 162 io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(char *)); 163 164 /* 165 * Get the overflow emergency buffer 166 */ 167 io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow); 168 if (!io_tlb_overflow_buffer) 169 panic("Cannot allocate SWIOTLB overflow buffer!\n"); 170 171 printk(KERN_INFO "Placing software IO TLB between 0x%lx - 0x%lx\n", 172 virt_to_bus(io_tlb_start), virt_to_bus(io_tlb_end)); 173} 174 175void __init 176swiotlb_init(void) 177{ 178 swiotlb_init_with_default_size(64 * (1<<20)); /* default to 64MB */ 179} 180 181/* 182 * Systems with larger DMA zones (those that don't support ISA) can 183 * initialize the swiotlb later using the slab allocator if needed. 184 * This should be just like above, but with some error catching. 185 */ 186int 187swiotlb_late_init_with_default_size(size_t default_size) 188{ 189 unsigned long i, bytes, req_nslabs = io_tlb_nslabs; 190 unsigned int order; 191 192 if (!io_tlb_nslabs) { 193 io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); 194 io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); 195 } 196 197 /* 198 * Get IO TLB memory from the low pages 199 */ 200 order = get_order(io_tlb_nslabs << IO_TLB_SHIFT); 201 io_tlb_nslabs = SLABS_PER_PAGE << order; 202 bytes = io_tlb_nslabs << IO_TLB_SHIFT; 203 204 while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { 205 io_tlb_start = (char *)__get_free_pages(GFP_DMA | __GFP_NOWARN, 206 order); 207 if (io_tlb_start) 208 break; 209 order--; 210 } 211 212 if (!io_tlb_start) 213 goto cleanup1; 214 215 if (order != get_order(bytes)) { 216 printk(KERN_WARNING "Warning: only able to allocate %ld MB " 217 "for software IO TLB\n", (PAGE_SIZE << order) >> 20); 218 io_tlb_nslabs = SLABS_PER_PAGE << order; 219 bytes = io_tlb_nslabs << IO_TLB_SHIFT; 220 } 221 io_tlb_end = io_tlb_start + bytes; 222 memset(io_tlb_start, 0, bytes); 223 224 /* 225 * Allocate and initialize the free list array. This array is used 226 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE 227 * between io_tlb_start and io_tlb_end. 228 */ 229 io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL, 230 get_order(io_tlb_nslabs * sizeof(int))); 231 if (!io_tlb_list) 232 goto cleanup2; 233 234 for (i = 0; i < io_tlb_nslabs; i++) 235 io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); 236 io_tlb_index = 0; 237 238 io_tlb_orig_addr = (unsigned char **)__get_free_pages(GFP_KERNEL, 239 get_order(io_tlb_nslabs * sizeof(char *))); 240 if (!io_tlb_orig_addr) 241 goto cleanup3; 242 243 memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(char *)); 244 245 /* 246 * Get the overflow emergency buffer 247 */ 248 io_tlb_overflow_buffer = (void *)__get_free_pages(GFP_DMA, 249 get_order(io_tlb_overflow)); 250 if (!io_tlb_overflow_buffer) 251 goto cleanup4; 252 253 printk(KERN_INFO "Placing %luMB software IO TLB between 0x%lx - " 254 "0x%lx\n", bytes >> 20, 255 virt_to_bus(io_tlb_start), virt_to_bus(io_tlb_end)); 256 257 return 0; 258 259cleanup4: 260 free_pages((unsigned long)io_tlb_orig_addr, get_order(io_tlb_nslabs * 261 sizeof(char *))); 262 io_tlb_orig_addr = NULL; 263cleanup3: 264 free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * 265 sizeof(int))); 266 io_tlb_list = NULL; 267cleanup2: 268 io_tlb_end = NULL; 269 free_pages((unsigned long)io_tlb_start, order); 270 io_tlb_start = NULL; 271cleanup1: 272 io_tlb_nslabs = req_nslabs; 273 return -ENOMEM; 274} 275 276static int 277address_needs_mapping(struct device *hwdev, dma_addr_t addr) 278{ 279 dma_addr_t mask = 0xffffffff; 280 /* If the device has a mask, use it, otherwise default to 32 bits */ 281 if (hwdev && hwdev->dma_mask) 282 mask = *hwdev->dma_mask; 283 return (addr & ~mask) != 0; 284} 285 286/* 287 * Allocates bounce buffer and returns its kernel virtual address. 288 */ 289static void * 290map_single(struct device *hwdev, char *buffer, size_t size, int dir) 291{ 292 unsigned long flags; 293 char *dma_addr; 294 unsigned int nslots, stride, index, wrap; 295 int i; 296 unsigned long start_dma_addr; 297 unsigned long mask; 298 unsigned long offset_slots; 299 unsigned long max_slots; 300 301 mask = dma_get_seg_boundary(hwdev); 302 start_dma_addr = virt_to_bus(io_tlb_start) & mask; 303 304 offset_slots = ALIGN(start_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; 305 max_slots = mask + 1 306 ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT 307 : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT); 308 309 /* 310 * For mappings greater than a page, we limit the stride (and 311 * hence alignment) to a page size. 312 */ 313 nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; 314 if (size > PAGE_SIZE) 315 stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT)); 316 else 317 stride = 1; 318 319 BUG_ON(!nslots); 320 321 /* 322 * Find suitable number of IO TLB entries size that will fit this 323 * request and allocate a buffer from that IO TLB pool. 324 */ 325 spin_lock_irqsave(&io_tlb_lock, flags); 326 index = ALIGN(io_tlb_index, stride); 327 if (index >= io_tlb_nslabs) 328 index = 0; 329 wrap = index; 330 331 do { 332 while (iommu_is_span_boundary(index, nslots, offset_slots, 333 max_slots)) { 334 index += stride; 335 if (index >= io_tlb_nslabs) 336 index = 0; 337 if (index == wrap) 338 goto not_found; 339 } 340 341 /* 342 * If we find a slot that indicates we have 'nslots' number of 343 * contiguous buffers, we allocate the buffers from that slot 344 * and mark the entries as '0' indicating unavailable. 345 */ 346 if (io_tlb_list[index] >= nslots) { 347 int count = 0; 348 349 for (i = index; i < (int) (index + nslots); i++) 350 io_tlb_list[i] = 0; 351 for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--) 352 io_tlb_list[i] = ++count; 353 dma_addr = io_tlb_start + (index << IO_TLB_SHIFT); 354 355 /* 356 * Update the indices to avoid searching in the next 357 * round. 358 */ 359 io_tlb_index = ((index + nslots) < io_tlb_nslabs 360 ? (index + nslots) : 0); 361 362 goto found; 363 } 364 index += stride; 365 if (index >= io_tlb_nslabs) 366 index = 0; 367 } while (index != wrap); 368 369not_found: 370 spin_unlock_irqrestore(&io_tlb_lock, flags); 371 return NULL; 372found: 373 spin_unlock_irqrestore(&io_tlb_lock, flags); 374 375 /* 376 * Save away the mapping from the original address to the DMA address. 377 * This is needed when we sync the memory. Then we sync the buffer if 378 * needed. 379 */ 380 for (i = 0; i < nslots; i++) 381 io_tlb_orig_addr[index+i] = buffer + (i << IO_TLB_SHIFT); 382 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) 383 memcpy(dma_addr, buffer, size); 384 385 return dma_addr; 386} 387 388/* 389 * dma_addr is the kernel virtual address of the bounce buffer to unmap. 390 */ 391static void 392unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) 393{ 394 unsigned long flags; 395 int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; 396 int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; 397 char *buffer = io_tlb_orig_addr[index]; 398 399 /* 400 * First, sync the memory before unmapping the entry 401 */ 402 if (buffer && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) 403 /* 404 * bounce... copy the data back into the original buffer * and 405 * delete the bounce buffer. 406 */ 407 memcpy(buffer, dma_addr, size); 408 409 /* 410 * Return the buffer to the free list by setting the corresponding 411 * entries to indicate the number of contigous entries available. 412 * While returning the entries to the free list, we merge the entries 413 * with slots below and above the pool being returned. 414 */ 415 spin_lock_irqsave(&io_tlb_lock, flags); 416 { 417 count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ? 418 io_tlb_list[index + nslots] : 0); 419 /* 420 * Step 1: return the slots to the free list, merging the 421 * slots with superceeding slots 422 */ 423 for (i = index + nslots - 1; i >= index; i--) 424 io_tlb_list[i] = ++count; 425 /* 426 * Step 2: merge the returned slots with the preceding slots, 427 * if available (non zero) 428 */ 429 for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--) 430 io_tlb_list[i] = ++count; 431 } 432 spin_unlock_irqrestore(&io_tlb_lock, flags); 433} 434 435static void 436sync_single(struct device *hwdev, char *dma_addr, size_t size, 437 int dir, int target) 438{ 439 int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; 440 char *buffer = io_tlb_orig_addr[index]; 441 442 buffer += ((unsigned long)dma_addr & ((1 << IO_TLB_SHIFT) - 1)); 443 444 switch (target) { 445 case SYNC_FOR_CPU: 446 if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) 447 memcpy(buffer, dma_addr, size); 448 else 449 BUG_ON(dir != DMA_TO_DEVICE); 450 break; 451 case SYNC_FOR_DEVICE: 452 if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) 453 memcpy(dma_addr, buffer, size); 454 else 455 BUG_ON(dir != DMA_FROM_DEVICE); 456 break; 457 default: 458 BUG(); 459 } 460} 461 462void * 463swiotlb_alloc_coherent(struct device *hwdev, size_t size, 464 dma_addr_t *dma_handle, gfp_t flags) 465{ 466 dma_addr_t dev_addr; 467 void *ret; 468 int order = get_order(size); 469 470 /* 471 * XXX fix me: the DMA API should pass us an explicit DMA mask 472 * instead, or use ZONE_DMA32 (ia64 overloads ZONE_DMA to be a ~32 473 * bit range instead of a 16MB one). 474 */ 475 flags |= GFP_DMA; 476 477 ret = (void *)__get_free_pages(flags, order); 478 if (ret && address_needs_mapping(hwdev, virt_to_bus(ret))) { 479 /* 480 * The allocated memory isn't reachable by the device. 481 * Fall back on swiotlb_map_single(). 482 */ 483 free_pages((unsigned long) ret, order); 484 ret = NULL; 485 } 486 if (!ret) { 487 /* 488 * We are either out of memory or the device can't DMA 489 * to GFP_DMA memory; fall back on 490 * swiotlb_map_single(), which will grab memory from 491 * the lowest available address range. 492 */ 493 dma_addr_t handle; 494 handle = swiotlb_map_single(NULL, NULL, size, DMA_FROM_DEVICE); 495 if (swiotlb_dma_mapping_error(handle)) 496 return NULL; 497 498 ret = bus_to_virt(handle); 499 } 500 501 memset(ret, 0, size); 502 dev_addr = virt_to_bus(ret); 503 504 /* Confirm address can be DMA'd by device */ 505 if (address_needs_mapping(hwdev, dev_addr)) { 506 printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n", 507 (unsigned long long)*hwdev->dma_mask, 508 (unsigned long long)dev_addr); 509 panic("swiotlb_alloc_coherent: allocated memory is out of " 510 "range for device"); 511 } 512 *dma_handle = dev_addr; 513 return ret; 514} 515 516void 517swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, 518 dma_addr_t dma_handle) 519{ 520 WARN_ON(irqs_disabled()); 521 if (!(vaddr >= (void *)io_tlb_start 522 && vaddr < (void *)io_tlb_end)) 523 free_pages((unsigned long) vaddr, get_order(size)); 524 else 525 /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ 526 swiotlb_unmap_single (hwdev, dma_handle, size, DMA_TO_DEVICE); 527} 528 529static void 530swiotlb_full(struct device *dev, size_t size, int dir, int do_panic) 531{ 532 /* 533 * Ran out of IOMMU space for this operation. This is very bad. 534 * Unfortunately the drivers cannot handle this operation properly. 535 * unless they check for dma_mapping_error (most don't) 536 * When the mapping is small enough return a static buffer to limit 537 * the damage, or panic when the transfer is too big. 538 */ 539 printk(KERN_ERR "DMA: Out of SW-IOMMU space for %zu bytes at " 540 "device %s\n", size, dev ? dev->bus_id : "?"); 541 542 if (size > io_tlb_overflow && do_panic) { 543 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) 544 panic("DMA: Memory would be corrupted\n"); 545 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) 546 panic("DMA: Random memory would be DMAed\n"); 547 } 548} 549 550/* 551 * Map a single buffer of the indicated size for DMA in streaming mode. The 552 * physical address to use is returned. 553 * 554 * Once the device is given the dma address, the device owns this memory until 555 * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed. 556 */ 557dma_addr_t 558swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size, 559 int dir, struct dma_attrs *attrs) 560{ 561 dma_addr_t dev_addr = virt_to_bus(ptr); 562 void *map; 563 564 BUG_ON(dir == DMA_NONE); 565 /* 566 * If the pointer passed in happens to be in the device's DMA window, 567 * we can safely return the device addr and not worry about bounce 568 * buffering it. 569 */ 570 if (!address_needs_mapping(hwdev, dev_addr) && !swiotlb_force) 571 return dev_addr; 572 573 /* 574 * Oh well, have to allocate and map a bounce buffer. 575 */ 576 map = map_single(hwdev, ptr, size, dir); 577 if (!map) { 578 swiotlb_full(hwdev, size, dir, 1); 579 map = io_tlb_overflow_buffer; 580 } 581 582 dev_addr = virt_to_bus(map); 583 584 /* 585 * Ensure that the address returned is DMA'ble 586 */ 587 if (address_needs_mapping(hwdev, dev_addr)) 588 panic("map_single: bounce buffer is not DMA'ble"); 589 590 return dev_addr; 591} 592EXPORT_SYMBOL(swiotlb_map_single_attrs); 593 594dma_addr_t 595swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir) 596{ 597 return swiotlb_map_single_attrs(hwdev, ptr, size, dir, NULL); 598} 599 600/* 601 * Unmap a single streaming mode DMA translation. The dma_addr and size must 602 * match what was provided for in a previous swiotlb_map_single call. All 603 * other usages are undefined. 604 * 605 * After this call, reads by the cpu to the buffer are guaranteed to see 606 * whatever the device wrote there. 607 */ 608void 609swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr, 610 size_t size, int dir, struct dma_attrs *attrs) 611{ 612 char *dma_addr = bus_to_virt(dev_addr); 613 614 BUG_ON(dir == DMA_NONE); 615 if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) 616 unmap_single(hwdev, dma_addr, size, dir); 617 else if (dir == DMA_FROM_DEVICE) 618 dma_mark_clean(dma_addr, size); 619} 620EXPORT_SYMBOL(swiotlb_unmap_single_attrs); 621 622void 623swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size, 624 int dir) 625{ 626 return swiotlb_unmap_single_attrs(hwdev, dev_addr, size, dir, NULL); 627} 628/* 629 * Make physical memory consistent for a single streaming mode DMA translation 630 * after a transfer. 631 * 632 * If you perform a swiotlb_map_single() but wish to interrogate the buffer 633 * using the cpu, yet do not wish to teardown the dma mapping, you must 634 * call this function before doing so. At the next point you give the dma 635 * address back to the card, you must first perform a 636 * swiotlb_dma_sync_for_device, and then the device again owns the buffer 637 */ 638static void 639swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, 640 size_t size, int dir, int target) 641{ 642 char *dma_addr = bus_to_virt(dev_addr); 643 644 BUG_ON(dir == DMA_NONE); 645 if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) 646 sync_single(hwdev, dma_addr, size, dir, target); 647 else if (dir == DMA_FROM_DEVICE) 648 dma_mark_clean(dma_addr, size); 649} 650 651void 652swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, 653 size_t size, int dir) 654{ 655 swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU); 656} 657 658void 659swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, 660 size_t size, int dir) 661{ 662 swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE); 663} 664 665/* 666 * Same as above, but for a sub-range of the mapping. 667 */ 668static void 669swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr, 670 unsigned long offset, size_t size, 671 int dir, int target) 672{ 673 char *dma_addr = bus_to_virt(dev_addr) + offset; 674 675 BUG_ON(dir == DMA_NONE); 676 if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) 677 sync_single(hwdev, dma_addr, size, dir, target); 678 else if (dir == DMA_FROM_DEVICE) 679 dma_mark_clean(dma_addr, size); 680} 681 682void 683swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr, 684 unsigned long offset, size_t size, int dir) 685{ 686 swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir, 687 SYNC_FOR_CPU); 688} 689 690void 691swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr, 692 unsigned long offset, size_t size, int dir) 693{ 694 swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir, 695 SYNC_FOR_DEVICE); 696} 697 698void swiotlb_unmap_sg_attrs(struct device *, struct scatterlist *, int, int, 699 struct dma_attrs *); 700/* 701 * Map a set of buffers described by scatterlist in streaming mode for DMA. 702 * This is the scatter-gather version of the above swiotlb_map_single 703 * interface. Here the scatter gather list elements are each tagged with the 704 * appropriate dma address and length. They are obtained via 705 * sg_dma_{address,length}(SG). 706 * 707 * NOTE: An implementation may be able to use a smaller number of 708 * DMA address/length pairs than there are SG table elements. 709 * (for example via virtual mapping capabilities) 710 * The routine returns the number of addr/length pairs actually 711 * used, at most nents. 712 * 713 * Device ownership issues as mentioned above for swiotlb_map_single are the 714 * same here. 715 */ 716int 717swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, 718 int dir, struct dma_attrs *attrs) 719{ 720 struct scatterlist *sg; 721 void *addr; 722 dma_addr_t dev_addr; 723 int i; 724 725 BUG_ON(dir == DMA_NONE); 726 727 for_each_sg(sgl, sg, nelems, i) { 728 addr = SG_ENT_VIRT_ADDRESS(sg); 729 dev_addr = virt_to_bus(addr); 730 if (swiotlb_force || address_needs_mapping(hwdev, dev_addr)) { 731 void *map = map_single(hwdev, addr, sg->length, dir); 732 if (!map) { 733 /* Don't panic here, we expect map_sg users 734 to do proper error handling. */ 735 swiotlb_full(hwdev, sg->length, dir, 0); 736 swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, 737 attrs); 738 sgl[0].dma_length = 0; 739 return 0; 740 } 741 sg->dma_address = virt_to_bus(map); 742 } else 743 sg->dma_address = dev_addr; 744 sg->dma_length = sg->length; 745 } 746 return nelems; 747} 748EXPORT_SYMBOL(swiotlb_map_sg_attrs); 749 750int 751swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, 752 int dir) 753{ 754 return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL); 755} 756 757/* 758 * Unmap a set of streaming mode DMA translations. Again, cpu read rules 759 * concerning calls here are the same as for swiotlb_unmap_single() above. 760 */ 761void 762swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, 763 int nelems, int dir, struct dma_attrs *attrs) 764{ 765 struct scatterlist *sg; 766 int i; 767 768 BUG_ON(dir == DMA_NONE); 769 770 for_each_sg(sgl, sg, nelems, i) { 771 if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg)) 772 unmap_single(hwdev, bus_to_virt(sg->dma_address), 773 sg->dma_length, dir); 774 else if (dir == DMA_FROM_DEVICE) 775 dma_mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length); 776 } 777} 778EXPORT_SYMBOL(swiotlb_unmap_sg_attrs); 779 780void 781swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, 782 int dir) 783{ 784 return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL); 785} 786 787/* 788 * Make physical memory consistent for a set of streaming mode DMA translations 789 * after a transfer. 790 * 791 * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules 792 * and usage. 793 */ 794static void 795swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, 796 int nelems, int dir, int target) 797{ 798 struct scatterlist *sg; 799 int i; 800 801 BUG_ON(dir == DMA_NONE); 802 803 for_each_sg(sgl, sg, nelems, i) { 804 if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg)) 805 sync_single(hwdev, bus_to_virt(sg->dma_address), 806 sg->dma_length, dir, target); 807 else if (dir == DMA_FROM_DEVICE) 808 dma_mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length); 809 } 810} 811 812void 813swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, 814 int nelems, int dir) 815{ 816 swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU); 817} 818 819void 820swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, 821 int nelems, int dir) 822{ 823 swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE); 824} 825 826int 827swiotlb_dma_mapping_error(dma_addr_t dma_addr) 828{ 829 return (dma_addr == virt_to_bus(io_tlb_overflow_buffer)); 830} 831 832/* 833 * Return whether the given device DMA address mask can be supported 834 * properly. For example, if your device can only drive the low 24-bits 835 * during bus mastering, then you would pass 0x00ffffff as the mask to 836 * this function. 837 */ 838int 839swiotlb_dma_supported(struct device *hwdev, u64 mask) 840{ 841 return virt_to_bus(io_tlb_end - 1) <= mask; 842} 843 844EXPORT_SYMBOL(swiotlb_map_single); 845EXPORT_SYMBOL(swiotlb_unmap_single); 846EXPORT_SYMBOL(swiotlb_map_sg); 847EXPORT_SYMBOL(swiotlb_unmap_sg); 848EXPORT_SYMBOL(swiotlb_sync_single_for_cpu); 849EXPORT_SYMBOL(swiotlb_sync_single_for_device); 850EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_cpu); 851EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device); 852EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu); 853EXPORT_SYMBOL(swiotlb_sync_sg_for_device); 854EXPORT_SYMBOL(swiotlb_dma_mapping_error); 855EXPORT_SYMBOL(swiotlb_alloc_coherent); 856EXPORT_SYMBOL(swiotlb_free_coherent); 857EXPORT_SYMBOL(swiotlb_dma_supported);