at v3.11 27 kB view raw
1/* 2 * Dynamic DMA mapping support. 3 * 4 * This implementation is a fallback for platforms that do not support 5 * I/O TLBs (aka DMA address translation hardware). 6 * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com> 7 * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com> 8 * Copyright (C) 2000, 2003 Hewlett-Packard Co 9 * David Mosberger-Tang <davidm@hpl.hp.com> 10 * 11 * 03/05/07 davidm Switch from PCI-DMA to generic device DMA API. 12 * 00/12/13 davidm Rename to swiotlb.c and add mark_clean() to avoid 13 * unnecessary i-cache flushing. 14 * 04/07/.. ak Better overflow handling. Assorted fixes. 15 * 05/09/10 linville Add support for syncing ranges, support syncing for 16 * DMA_BIDIRECTIONAL mappings, miscellaneous cleanup. 17 * 08/12/11 beckyb Add highmem support 18 */ 19 20#include <linux/cache.h> 21#include <linux/dma-mapping.h> 22#include <linux/mm.h> 23#include <linux/export.h> 24#include <linux/spinlock.h> 25#include <linux/string.h> 26#include <linux/swiotlb.h> 27#include <linux/pfn.h> 28#include <linux/types.h> 29#include <linux/ctype.h> 30#include <linux/highmem.h> 31#include <linux/gfp.h> 32 33#include <asm/io.h> 34#include <asm/dma.h> 35#include <asm/scatterlist.h> 36 37#include <linux/init.h> 38#include <linux/bootmem.h> 39#include <linux/iommu-helper.h> 40 41#define OFFSET(val,align) ((unsigned long) \ 42 ( (val) & ( (align) - 1))) 43 44#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT)) 45 46/* 47 * Minimum IO TLB size to bother booting with. Systems with mainly 48 * 64bit capable cards will only lightly use the swiotlb. If we can't 49 * allocate a contiguous 1MB, we're probably in trouble anyway. 50 */ 51#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) 52 53int swiotlb_force; 54 55/* 56 * Used to do a quick range check in swiotlb_tbl_unmap_single and 57 * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this 58 * API. 59 */ 60static phys_addr_t io_tlb_start, io_tlb_end; 61 62/* 63 * The number of IO TLB blocks (in groups of 64) between io_tlb_start and 64 * io_tlb_end. This is command line adjustable via setup_io_tlb_npages. 65 */ 66static unsigned long io_tlb_nslabs; 67 68/* 69 * When the IOMMU overflows we return a fallback buffer. This sets the size. 70 */ 71static unsigned long io_tlb_overflow = 32*1024; 72 73static phys_addr_t io_tlb_overflow_buffer; 74 75/* 76 * This is a free list describing the number of free entries available from 77 * each index 78 */ 79static unsigned int *io_tlb_list; 80static unsigned int io_tlb_index; 81 82/* 83 * We need to save away the original address corresponding to a mapped entry 84 * for the sync operations. 85 */ 86static phys_addr_t *io_tlb_orig_addr; 87 88/* 89 * Protect the above data structures in the map and unmap calls 90 */ 91static DEFINE_SPINLOCK(io_tlb_lock); 92 93static int late_alloc; 94 95static int __init 96setup_io_tlb_npages(char *str) 97{ 98 if (isdigit(*str)) { 99 io_tlb_nslabs = simple_strtoul(str, &str, 0); 100 /* avoid tail segment of size < IO_TLB_SEGSIZE */ 101 io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); 102 } 103 if (*str == ',') 104 ++str; 105 if (!strcmp(str, "force")) 106 swiotlb_force = 1; 107 108 return 0; 109} 110early_param("swiotlb", setup_io_tlb_npages); 111/* make io_tlb_overflow tunable too? */ 112 113unsigned long swiotlb_nr_tbl(void) 114{ 115 return io_tlb_nslabs; 116} 117EXPORT_SYMBOL_GPL(swiotlb_nr_tbl); 118 119/* default to 64MB */ 120#define IO_TLB_DEFAULT_SIZE (64UL<<20) 121unsigned long swiotlb_size_or_default(void) 122{ 123 unsigned long size; 124 125 size = io_tlb_nslabs << IO_TLB_SHIFT; 126 127 return size ? size : (IO_TLB_DEFAULT_SIZE); 128} 129 130/* Note that this doesn't work with highmem page */ 131static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, 132 volatile void *address) 133{ 134 return phys_to_dma(hwdev, virt_to_phys(address)); 135} 136 137static bool no_iotlb_memory; 138 139void swiotlb_print_info(void) 140{ 141 unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT; 142 unsigned char *vstart, *vend; 143 144 if (no_iotlb_memory) { 145 pr_warn("software IO TLB: No low mem\n"); 146 return; 147 } 148 149 vstart = phys_to_virt(io_tlb_start); 150 vend = phys_to_virt(io_tlb_end); 151 152 printk(KERN_INFO "software IO TLB [mem %#010llx-%#010llx] (%luMB) mapped at [%p-%p]\n", 153 (unsigned long long)io_tlb_start, 154 (unsigned long long)io_tlb_end, 155 bytes >> 20, vstart, vend - 1); 156} 157 158int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) 159{ 160 void *v_overflow_buffer; 161 unsigned long i, bytes; 162 163 bytes = nslabs << IO_TLB_SHIFT; 164 165 io_tlb_nslabs = nslabs; 166 io_tlb_start = __pa(tlb); 167 io_tlb_end = io_tlb_start + bytes; 168 169 /* 170 * Get the overflow emergency buffer 171 */ 172 v_overflow_buffer = alloc_bootmem_low_pages_nopanic( 173 PAGE_ALIGN(io_tlb_overflow)); 174 if (!v_overflow_buffer) 175 return -ENOMEM; 176 177 io_tlb_overflow_buffer = __pa(v_overflow_buffer); 178 179 /* 180 * Allocate and initialize the free list array. This array is used 181 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE 182 * between io_tlb_start and io_tlb_end. 183 */ 184 io_tlb_list = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(int))); 185 for (i = 0; i < io_tlb_nslabs; i++) 186 io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); 187 io_tlb_index = 0; 188 io_tlb_orig_addr = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t))); 189 190 if (verbose) 191 swiotlb_print_info(); 192 193 return 0; 194} 195 196/* 197 * Statically reserve bounce buffer space and initialize bounce buffer data 198 * structures for the software IO TLB used to implement the DMA API. 199 */ 200void __init 201swiotlb_init(int verbose) 202{ 203 size_t default_size = IO_TLB_DEFAULT_SIZE; 204 unsigned char *vstart; 205 unsigned long bytes; 206 207 if (!io_tlb_nslabs) { 208 io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); 209 io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); 210 } 211 212 bytes = io_tlb_nslabs << IO_TLB_SHIFT; 213 214 /* Get IO TLB memory from the low pages */ 215 vstart = alloc_bootmem_low_pages_nopanic(PAGE_ALIGN(bytes)); 216 if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose)) 217 return; 218 219 if (io_tlb_start) 220 free_bootmem(io_tlb_start, 221 PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); 222 pr_warn("Cannot allocate SWIOTLB buffer"); 223 no_iotlb_memory = true; 224} 225 226/* 227 * Systems with larger DMA zones (those that don't support ISA) can 228 * initialize the swiotlb later using the slab allocator if needed. 229 * This should be just like above, but with some error catching. 230 */ 231int 232swiotlb_late_init_with_default_size(size_t default_size) 233{ 234 unsigned long bytes, req_nslabs = io_tlb_nslabs; 235 unsigned char *vstart = NULL; 236 unsigned int order; 237 int rc = 0; 238 239 if (!io_tlb_nslabs) { 240 io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); 241 io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); 242 } 243 244 /* 245 * Get IO TLB memory from the low pages 246 */ 247 order = get_order(io_tlb_nslabs << IO_TLB_SHIFT); 248 io_tlb_nslabs = SLABS_PER_PAGE << order; 249 bytes = io_tlb_nslabs << IO_TLB_SHIFT; 250 251 while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { 252 vstart = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, 253 order); 254 if (vstart) 255 break; 256 order--; 257 } 258 259 if (!vstart) { 260 io_tlb_nslabs = req_nslabs; 261 return -ENOMEM; 262 } 263 if (order != get_order(bytes)) { 264 printk(KERN_WARNING "Warning: only able to allocate %ld MB " 265 "for software IO TLB\n", (PAGE_SIZE << order) >> 20); 266 io_tlb_nslabs = SLABS_PER_PAGE << order; 267 } 268 rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs); 269 if (rc) 270 free_pages((unsigned long)vstart, order); 271 return rc; 272} 273 274int 275swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) 276{ 277 unsigned long i, bytes; 278 unsigned char *v_overflow_buffer; 279 280 bytes = nslabs << IO_TLB_SHIFT; 281 282 io_tlb_nslabs = nslabs; 283 io_tlb_start = virt_to_phys(tlb); 284 io_tlb_end = io_tlb_start + bytes; 285 286 memset(tlb, 0, bytes); 287 288 /* 289 * Get the overflow emergency buffer 290 */ 291 v_overflow_buffer = (void *)__get_free_pages(GFP_DMA, 292 get_order(io_tlb_overflow)); 293 if (!v_overflow_buffer) 294 goto cleanup2; 295 296 io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer); 297 298 /* 299 * Allocate and initialize the free list array. This array is used 300 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE 301 * between io_tlb_start and io_tlb_end. 302 */ 303 io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL, 304 get_order(io_tlb_nslabs * sizeof(int))); 305 if (!io_tlb_list) 306 goto cleanup3; 307 308 for (i = 0; i < io_tlb_nslabs; i++) 309 io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); 310 io_tlb_index = 0; 311 312 io_tlb_orig_addr = (phys_addr_t *) 313 __get_free_pages(GFP_KERNEL, 314 get_order(io_tlb_nslabs * 315 sizeof(phys_addr_t))); 316 if (!io_tlb_orig_addr) 317 goto cleanup4; 318 319 memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(phys_addr_t)); 320 321 swiotlb_print_info(); 322 323 late_alloc = 1; 324 325 return 0; 326 327cleanup4: 328 free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * 329 sizeof(int))); 330 io_tlb_list = NULL; 331cleanup3: 332 free_pages((unsigned long)v_overflow_buffer, 333 get_order(io_tlb_overflow)); 334 io_tlb_overflow_buffer = 0; 335cleanup2: 336 io_tlb_end = 0; 337 io_tlb_start = 0; 338 io_tlb_nslabs = 0; 339 return -ENOMEM; 340} 341 342void __init swiotlb_free(void) 343{ 344 if (!io_tlb_orig_addr) 345 return; 346 347 if (late_alloc) { 348 free_pages((unsigned long)phys_to_virt(io_tlb_overflow_buffer), 349 get_order(io_tlb_overflow)); 350 free_pages((unsigned long)io_tlb_orig_addr, 351 get_order(io_tlb_nslabs * sizeof(phys_addr_t))); 352 free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * 353 sizeof(int))); 354 free_pages((unsigned long)phys_to_virt(io_tlb_start), 355 get_order(io_tlb_nslabs << IO_TLB_SHIFT)); 356 } else { 357 free_bootmem_late(io_tlb_overflow_buffer, 358 PAGE_ALIGN(io_tlb_overflow)); 359 free_bootmem_late(__pa(io_tlb_orig_addr), 360 PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t))); 361 free_bootmem_late(__pa(io_tlb_list), 362 PAGE_ALIGN(io_tlb_nslabs * sizeof(int))); 363 free_bootmem_late(io_tlb_start, 364 PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); 365 } 366 io_tlb_nslabs = 0; 367} 368 369static int is_swiotlb_buffer(phys_addr_t paddr) 370{ 371 return paddr >= io_tlb_start && paddr < io_tlb_end; 372} 373 374/* 375 * Bounce: copy the swiotlb buffer back to the original dma location 376 */ 377static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr, 378 size_t size, enum dma_data_direction dir) 379{ 380 unsigned long pfn = PFN_DOWN(orig_addr); 381 unsigned char *vaddr = phys_to_virt(tlb_addr); 382 383 if (PageHighMem(pfn_to_page(pfn))) { 384 /* The buffer does not have a mapping. Map it in and copy */ 385 unsigned int offset = orig_addr & ~PAGE_MASK; 386 char *buffer; 387 unsigned int sz = 0; 388 unsigned long flags; 389 390 while (size) { 391 sz = min_t(size_t, PAGE_SIZE - offset, size); 392 393 local_irq_save(flags); 394 buffer = kmap_atomic(pfn_to_page(pfn)); 395 if (dir == DMA_TO_DEVICE) 396 memcpy(vaddr, buffer + offset, sz); 397 else 398 memcpy(buffer + offset, vaddr, sz); 399 kunmap_atomic(buffer); 400 local_irq_restore(flags); 401 402 size -= sz; 403 pfn++; 404 vaddr += sz; 405 offset = 0; 406 } 407 } else if (dir == DMA_TO_DEVICE) { 408 memcpy(vaddr, phys_to_virt(orig_addr), size); 409 } else { 410 memcpy(phys_to_virt(orig_addr), vaddr, size); 411 } 412} 413 414phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, 415 dma_addr_t tbl_dma_addr, 416 phys_addr_t orig_addr, size_t size, 417 enum dma_data_direction dir) 418{ 419 unsigned long flags; 420 phys_addr_t tlb_addr; 421 unsigned int nslots, stride, index, wrap; 422 int i; 423 unsigned long mask; 424 unsigned long offset_slots; 425 unsigned long max_slots; 426 427 if (no_iotlb_memory) 428 panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); 429 430 mask = dma_get_seg_boundary(hwdev); 431 432 tbl_dma_addr &= mask; 433 434 offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; 435 436 /* 437 * Carefully handle integer overflow which can occur when mask == ~0UL. 438 */ 439 max_slots = mask + 1 440 ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT 441 : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT); 442 443 /* 444 * For mappings greater than a page, we limit the stride (and 445 * hence alignment) to a page size. 446 */ 447 nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; 448 if (size > PAGE_SIZE) 449 stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT)); 450 else 451 stride = 1; 452 453 BUG_ON(!nslots); 454 455 /* 456 * Find suitable number of IO TLB entries size that will fit this 457 * request and allocate a buffer from that IO TLB pool. 458 */ 459 spin_lock_irqsave(&io_tlb_lock, flags); 460 index = ALIGN(io_tlb_index, stride); 461 if (index >= io_tlb_nslabs) 462 index = 0; 463 wrap = index; 464 465 do { 466 while (iommu_is_span_boundary(index, nslots, offset_slots, 467 max_slots)) { 468 index += stride; 469 if (index >= io_tlb_nslabs) 470 index = 0; 471 if (index == wrap) 472 goto not_found; 473 } 474 475 /* 476 * If we find a slot that indicates we have 'nslots' number of 477 * contiguous buffers, we allocate the buffers from that slot 478 * and mark the entries as '0' indicating unavailable. 479 */ 480 if (io_tlb_list[index] >= nslots) { 481 int count = 0; 482 483 for (i = index; i < (int) (index + nslots); i++) 484 io_tlb_list[i] = 0; 485 for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--) 486 io_tlb_list[i] = ++count; 487 tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT); 488 489 /* 490 * Update the indices to avoid searching in the next 491 * round. 492 */ 493 io_tlb_index = ((index + nslots) < io_tlb_nslabs 494 ? (index + nslots) : 0); 495 496 goto found; 497 } 498 index += stride; 499 if (index >= io_tlb_nslabs) 500 index = 0; 501 } while (index != wrap); 502 503not_found: 504 spin_unlock_irqrestore(&io_tlb_lock, flags); 505 return SWIOTLB_MAP_ERROR; 506found: 507 spin_unlock_irqrestore(&io_tlb_lock, flags); 508 509 /* 510 * Save away the mapping from the original address to the DMA address. 511 * This is needed when we sync the memory. Then we sync the buffer if 512 * needed. 513 */ 514 for (i = 0; i < nslots; i++) 515 io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT); 516 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) 517 swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE); 518 519 return tlb_addr; 520} 521EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single); 522 523/* 524 * Allocates bounce buffer and returns its kernel virtual address. 525 */ 526 527phys_addr_t map_single(struct device *hwdev, phys_addr_t phys, size_t size, 528 enum dma_data_direction dir) 529{ 530 dma_addr_t start_dma_addr = phys_to_dma(hwdev, io_tlb_start); 531 532 return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, dir); 533} 534 535/* 536 * dma_addr is the kernel virtual address of the bounce buffer to unmap. 537 */ 538void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, 539 size_t size, enum dma_data_direction dir) 540{ 541 unsigned long flags; 542 int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; 543 int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT; 544 phys_addr_t orig_addr = io_tlb_orig_addr[index]; 545 546 /* 547 * First, sync the memory before unmapping the entry 548 */ 549 if (orig_addr && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) 550 swiotlb_bounce(orig_addr, tlb_addr, size, DMA_FROM_DEVICE); 551 552 /* 553 * Return the buffer to the free list by setting the corresponding 554 * entries to indicate the number of contiguous entries available. 555 * While returning the entries to the free list, we merge the entries 556 * with slots below and above the pool being returned. 557 */ 558 spin_lock_irqsave(&io_tlb_lock, flags); 559 { 560 count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ? 561 io_tlb_list[index + nslots] : 0); 562 /* 563 * Step 1: return the slots to the free list, merging the 564 * slots with superceeding slots 565 */ 566 for (i = index + nslots - 1; i >= index; i--) 567 io_tlb_list[i] = ++count; 568 /* 569 * Step 2: merge the returned slots with the preceding slots, 570 * if available (non zero) 571 */ 572 for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--) 573 io_tlb_list[i] = ++count; 574 } 575 spin_unlock_irqrestore(&io_tlb_lock, flags); 576} 577EXPORT_SYMBOL_GPL(swiotlb_tbl_unmap_single); 578 579void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr, 580 size_t size, enum dma_data_direction dir, 581 enum dma_sync_target target) 582{ 583 int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT; 584 phys_addr_t orig_addr = io_tlb_orig_addr[index]; 585 586 orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1); 587 588 switch (target) { 589 case SYNC_FOR_CPU: 590 if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) 591 swiotlb_bounce(orig_addr, tlb_addr, 592 size, DMA_FROM_DEVICE); 593 else 594 BUG_ON(dir != DMA_TO_DEVICE); 595 break; 596 case SYNC_FOR_DEVICE: 597 if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) 598 swiotlb_bounce(orig_addr, tlb_addr, 599 size, DMA_TO_DEVICE); 600 else 601 BUG_ON(dir != DMA_FROM_DEVICE); 602 break; 603 default: 604 BUG(); 605 } 606} 607EXPORT_SYMBOL_GPL(swiotlb_tbl_sync_single); 608 609void * 610swiotlb_alloc_coherent(struct device *hwdev, size_t size, 611 dma_addr_t *dma_handle, gfp_t flags) 612{ 613 dma_addr_t dev_addr; 614 void *ret; 615 int order = get_order(size); 616 u64 dma_mask = DMA_BIT_MASK(32); 617 618 if (hwdev && hwdev->coherent_dma_mask) 619 dma_mask = hwdev->coherent_dma_mask; 620 621 ret = (void *)__get_free_pages(flags, order); 622 if (ret) { 623 dev_addr = swiotlb_virt_to_bus(hwdev, ret); 624 if (dev_addr + size - 1 > dma_mask) { 625 /* 626 * The allocated memory isn't reachable by the device. 627 */ 628 free_pages((unsigned long) ret, order); 629 ret = NULL; 630 } 631 } 632 if (!ret) { 633 /* 634 * We are either out of memory or the device can't DMA to 635 * GFP_DMA memory; fall back on map_single(), which 636 * will grab memory from the lowest available address range. 637 */ 638 phys_addr_t paddr = map_single(hwdev, 0, size, DMA_FROM_DEVICE); 639 if (paddr == SWIOTLB_MAP_ERROR) 640 return NULL; 641 642 ret = phys_to_virt(paddr); 643 dev_addr = phys_to_dma(hwdev, paddr); 644 645 /* Confirm address can be DMA'd by device */ 646 if (dev_addr + size - 1 > dma_mask) { 647 printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n", 648 (unsigned long long)dma_mask, 649 (unsigned long long)dev_addr); 650 651 /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ 652 swiotlb_tbl_unmap_single(hwdev, paddr, 653 size, DMA_TO_DEVICE); 654 return NULL; 655 } 656 } 657 658 *dma_handle = dev_addr; 659 memset(ret, 0, size); 660 661 return ret; 662} 663EXPORT_SYMBOL(swiotlb_alloc_coherent); 664 665void 666swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, 667 dma_addr_t dev_addr) 668{ 669 phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); 670 671 WARN_ON(irqs_disabled()); 672 if (!is_swiotlb_buffer(paddr)) 673 free_pages((unsigned long)vaddr, get_order(size)); 674 else 675 /* DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single */ 676 swiotlb_tbl_unmap_single(hwdev, paddr, size, DMA_TO_DEVICE); 677} 678EXPORT_SYMBOL(swiotlb_free_coherent); 679 680static void 681swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir, 682 int do_panic) 683{ 684 /* 685 * Ran out of IOMMU space for this operation. This is very bad. 686 * Unfortunately the drivers cannot handle this operation properly. 687 * unless they check for dma_mapping_error (most don't) 688 * When the mapping is small enough return a static buffer to limit 689 * the damage, or panic when the transfer is too big. 690 */ 691 printk(KERN_ERR "DMA: Out of SW-IOMMU space for %zu bytes at " 692 "device %s\n", size, dev ? dev_name(dev) : "?"); 693 694 if (size <= io_tlb_overflow || !do_panic) 695 return; 696 697 if (dir == DMA_BIDIRECTIONAL) 698 panic("DMA: Random memory could be DMA accessed\n"); 699 if (dir == DMA_FROM_DEVICE) 700 panic("DMA: Random memory could be DMA written\n"); 701 if (dir == DMA_TO_DEVICE) 702 panic("DMA: Random memory could be DMA read\n"); 703} 704 705/* 706 * Map a single buffer of the indicated size for DMA in streaming mode. The 707 * physical address to use is returned. 708 * 709 * Once the device is given the dma address, the device owns this memory until 710 * either swiotlb_unmap_page or swiotlb_dma_sync_single is performed. 711 */ 712dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, 713 unsigned long offset, size_t size, 714 enum dma_data_direction dir, 715 struct dma_attrs *attrs) 716{ 717 phys_addr_t map, phys = page_to_phys(page) + offset; 718 dma_addr_t dev_addr = phys_to_dma(dev, phys); 719 720 BUG_ON(dir == DMA_NONE); 721 /* 722 * If the address happens to be in the device's DMA window, 723 * we can safely return the device addr and not worry about bounce 724 * buffering it. 725 */ 726 if (dma_capable(dev, dev_addr, size) && !swiotlb_force) 727 return dev_addr; 728 729 /* Oh well, have to allocate and map a bounce buffer. */ 730 map = map_single(dev, phys, size, dir); 731 if (map == SWIOTLB_MAP_ERROR) { 732 swiotlb_full(dev, size, dir, 1); 733 return phys_to_dma(dev, io_tlb_overflow_buffer); 734 } 735 736 dev_addr = phys_to_dma(dev, map); 737 738 /* Ensure that the address returned is DMA'ble */ 739 if (!dma_capable(dev, dev_addr, size)) { 740 swiotlb_tbl_unmap_single(dev, map, size, dir); 741 return phys_to_dma(dev, io_tlb_overflow_buffer); 742 } 743 744 return dev_addr; 745} 746EXPORT_SYMBOL_GPL(swiotlb_map_page); 747 748/* 749 * Unmap a single streaming mode DMA translation. The dma_addr and size must 750 * match what was provided for in a previous swiotlb_map_page call. All 751 * other usages are undefined. 752 * 753 * After this call, reads by the cpu to the buffer are guaranteed to see 754 * whatever the device wrote there. 755 */ 756static void unmap_single(struct device *hwdev, dma_addr_t dev_addr, 757 size_t size, enum dma_data_direction dir) 758{ 759 phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); 760 761 BUG_ON(dir == DMA_NONE); 762 763 if (is_swiotlb_buffer(paddr)) { 764 swiotlb_tbl_unmap_single(hwdev, paddr, size, dir); 765 return; 766 } 767 768 if (dir != DMA_FROM_DEVICE) 769 return; 770 771 /* 772 * phys_to_virt doesn't work with hihgmem page but we could 773 * call dma_mark_clean() with hihgmem page here. However, we 774 * are fine since dma_mark_clean() is null on POWERPC. We can 775 * make dma_mark_clean() take a physical address if necessary. 776 */ 777 dma_mark_clean(phys_to_virt(paddr), size); 778} 779 780void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, 781 size_t size, enum dma_data_direction dir, 782 struct dma_attrs *attrs) 783{ 784 unmap_single(hwdev, dev_addr, size, dir); 785} 786EXPORT_SYMBOL_GPL(swiotlb_unmap_page); 787 788/* 789 * Make physical memory consistent for a single streaming mode DMA translation 790 * after a transfer. 791 * 792 * If you perform a swiotlb_map_page() but wish to interrogate the buffer 793 * using the cpu, yet do not wish to teardown the dma mapping, you must 794 * call this function before doing so. At the next point you give the dma 795 * address back to the card, you must first perform a 796 * swiotlb_dma_sync_for_device, and then the device again owns the buffer 797 */ 798static void 799swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, 800 size_t size, enum dma_data_direction dir, 801 enum dma_sync_target target) 802{ 803 phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); 804 805 BUG_ON(dir == DMA_NONE); 806 807 if (is_swiotlb_buffer(paddr)) { 808 swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target); 809 return; 810 } 811 812 if (dir != DMA_FROM_DEVICE) 813 return; 814 815 dma_mark_clean(phys_to_virt(paddr), size); 816} 817 818void 819swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, 820 size_t size, enum dma_data_direction dir) 821{ 822 swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU); 823} 824EXPORT_SYMBOL(swiotlb_sync_single_for_cpu); 825 826void 827swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, 828 size_t size, enum dma_data_direction dir) 829{ 830 swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE); 831} 832EXPORT_SYMBOL(swiotlb_sync_single_for_device); 833 834/* 835 * Map a set of buffers described by scatterlist in streaming mode for DMA. 836 * This is the scatter-gather version of the above swiotlb_map_page 837 * interface. Here the scatter gather list elements are each tagged with the 838 * appropriate dma address and length. They are obtained via 839 * sg_dma_{address,length}(SG). 840 * 841 * NOTE: An implementation may be able to use a smaller number of 842 * DMA address/length pairs than there are SG table elements. 843 * (for example via virtual mapping capabilities) 844 * The routine returns the number of addr/length pairs actually 845 * used, at most nents. 846 * 847 * Device ownership issues as mentioned above for swiotlb_map_page are the 848 * same here. 849 */ 850int 851swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, 852 enum dma_data_direction dir, struct dma_attrs *attrs) 853{ 854 struct scatterlist *sg; 855 int i; 856 857 BUG_ON(dir == DMA_NONE); 858 859 for_each_sg(sgl, sg, nelems, i) { 860 phys_addr_t paddr = sg_phys(sg); 861 dma_addr_t dev_addr = phys_to_dma(hwdev, paddr); 862 863 if (swiotlb_force || 864 !dma_capable(hwdev, dev_addr, sg->length)) { 865 phys_addr_t map = map_single(hwdev, sg_phys(sg), 866 sg->length, dir); 867 if (map == SWIOTLB_MAP_ERROR) { 868 /* Don't panic here, we expect map_sg users 869 to do proper error handling. */ 870 swiotlb_full(hwdev, sg->length, dir, 0); 871 swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, 872 attrs); 873 sgl[0].dma_length = 0; 874 return 0; 875 } 876 sg->dma_address = phys_to_dma(hwdev, map); 877 } else 878 sg->dma_address = dev_addr; 879 sg->dma_length = sg->length; 880 } 881 return nelems; 882} 883EXPORT_SYMBOL(swiotlb_map_sg_attrs); 884 885int 886swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, 887 enum dma_data_direction dir) 888{ 889 return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL); 890} 891EXPORT_SYMBOL(swiotlb_map_sg); 892 893/* 894 * Unmap a set of streaming mode DMA translations. Again, cpu read rules 895 * concerning calls here are the same as for swiotlb_unmap_page() above. 896 */ 897void 898swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, 899 int nelems, enum dma_data_direction dir, struct dma_attrs *attrs) 900{ 901 struct scatterlist *sg; 902 int i; 903 904 BUG_ON(dir == DMA_NONE); 905 906 for_each_sg(sgl, sg, nelems, i) 907 unmap_single(hwdev, sg->dma_address, sg->dma_length, dir); 908 909} 910EXPORT_SYMBOL(swiotlb_unmap_sg_attrs); 911 912void 913swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, 914 enum dma_data_direction dir) 915{ 916 return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL); 917} 918EXPORT_SYMBOL(swiotlb_unmap_sg); 919 920/* 921 * Make physical memory consistent for a set of streaming mode DMA translations 922 * after a transfer. 923 * 924 * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules 925 * and usage. 926 */ 927static void 928swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, 929 int nelems, enum dma_data_direction dir, 930 enum dma_sync_target target) 931{ 932 struct scatterlist *sg; 933 int i; 934 935 for_each_sg(sgl, sg, nelems, i) 936 swiotlb_sync_single(hwdev, sg->dma_address, 937 sg->dma_length, dir, target); 938} 939 940void 941swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, 942 int nelems, enum dma_data_direction dir) 943{ 944 swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU); 945} 946EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu); 947 948void 949swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, 950 int nelems, enum dma_data_direction dir) 951{ 952 swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE); 953} 954EXPORT_SYMBOL(swiotlb_sync_sg_for_device); 955 956int 957swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr) 958{ 959 return (dma_addr == phys_to_dma(hwdev, io_tlb_overflow_buffer)); 960} 961EXPORT_SYMBOL(swiotlb_dma_mapping_error); 962 963/* 964 * Return whether the given device DMA address mask can be supported 965 * properly. For example, if your device can only drive the low 24-bits 966 * during bus mastering, then you would pass 0x00ffffff as the mask to 967 * this function. 968 */ 969int 970swiotlb_dma_supported(struct device *hwdev, u64 mask) 971{ 972 return phys_to_dma(hwdev, io_tlb_end - 1) <= mask; 973} 974EXPORT_SYMBOL(swiotlb_dma_supported);