lib/swiotlb.c at v2.6.32-rc7

tjh.dev / kernel
fork
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork
kernel / lib / swiotlb.c
at v2.6.32-rc7 896 lines 25 kB view raw
wrap content
  1/*
  2 * Dynamic DMA mapping support.
  3 *
  4 * This implementation is a fallback for platforms that do not support
  5 * I/O TLBs (aka DMA address translation hardware).
  6 * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com>
  7 * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com>
  8 * Copyright (C) 2000, 2003 Hewlett-Packard Co
  9 *	David Mosberger-Tang <davidm@hpl.hp.com>
 10 *
 11 * 03/05/07 davidm	Switch from PCI-DMA to generic device DMA API.
 12 * 00/12/13 davidm	Rename to swiotlb.c and add mark_clean() to avoid
 13 *			unnecessary i-cache flushing.
 14 * 04/07/.. ak		Better overflow handling. Assorted fixes.
 15 * 05/09/10 linville	Add support for syncing ranges, support syncing for
 16 *			DMA_BIDIRECTIONAL mappings, miscellaneous cleanup.
 17 * 08/12/11 beckyb	Add highmem support
 18 */
 19
 20#include <linux/cache.h>
 21#include <linux/dma-mapping.h>
 22#include <linux/mm.h>
 23#include <linux/module.h>
 24#include <linux/spinlock.h>
 25#include <linux/string.h>
 26#include <linux/swiotlb.h>
 27#include <linux/pfn.h>
 28#include <linux/types.h>
 29#include <linux/ctype.h>
 30#include <linux/highmem.h>
 31
 32#include <asm/io.h>
 33#include <asm/dma.h>
 34#include <asm/scatterlist.h>
 35
 36#include <linux/init.h>
 37#include <linux/bootmem.h>
 38#include <linux/iommu-helper.h>
 39
 40#define OFFSET(val,align) ((unsigned long)	\
 41	                   ( (val) & ( (align) - 1)))
 42
 43#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
 44
 45/*
 46 * Minimum IO TLB size to bother booting with.  Systems with mainly
 47 * 64bit capable cards will only lightly use the swiotlb.  If we can't
 48 * allocate a contiguous 1MB, we're probably in trouble anyway.
 49 */
 50#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
 51
 52/*
 53 * Enumeration for sync targets
 54 */
 55enum dma_sync_target {
 56	SYNC_FOR_CPU = 0,
 57	SYNC_FOR_DEVICE = 1,
 58};
 59
 60int swiotlb_force;
 61
 62/*
 63 * Used to do a quick range check in unmap_single and
 64 * sync_single_*, to see if the memory was in fact allocated by this
 65 * API.
 66 */
 67static char *io_tlb_start, *io_tlb_end;
 68
 69/*
 70 * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and
 71 * io_tlb_end.  This is command line adjustable via setup_io_tlb_npages.
 72 */
 73static unsigned long io_tlb_nslabs;
 74
 75/*
 76 * When the IOMMU overflows we return a fallback buffer. This sets the size.
 77 */
 78static unsigned long io_tlb_overflow = 32*1024;
 79
 80void *io_tlb_overflow_buffer;
 81
 82/*
 83 * This is a free list describing the number of free entries available from
 84 * each index
 85 */
 86static unsigned int *io_tlb_list;
 87static unsigned int io_tlb_index;
 88
 89/*
 90 * We need to save away the original address corresponding to a mapped entry
 91 * for the sync operations.
 92 */
 93static phys_addr_t *io_tlb_orig_addr;
 94
 95/*
 96 * Protect the above data structures in the map and unmap calls
 97 */
 98static DEFINE_SPINLOCK(io_tlb_lock);
 99
100static int __init
101setup_io_tlb_npages(char *str)
102{
103	if (isdigit(*str)) {
104		io_tlb_nslabs = simple_strtoul(str, &str, 0);
105		/* avoid tail segment of size < IO_TLB_SEGSIZE */
106		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
107	}
108	if (*str == ',')
109		++str;
110	if (!strcmp(str, "force"))
111		swiotlb_force = 1;
112	return 1;
113}
114__setup("swiotlb=", setup_io_tlb_npages);
115/* make io_tlb_overflow tunable too? */
116
117/* Note that this doesn't work with highmem page */
118static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
119				      volatile void *address)
120{
121	return phys_to_dma(hwdev, virt_to_phys(address));
122}
123
124static void swiotlb_print_info(unsigned long bytes)
125{
126	phys_addr_t pstart, pend;
127
128	pstart = virt_to_phys(io_tlb_start);
129	pend = virt_to_phys(io_tlb_end);
130
131	printk(KERN_INFO "Placing %luMB software IO TLB between %p - %p\n",
132	       bytes >> 20, io_tlb_start, io_tlb_end);
133	printk(KERN_INFO "software IO TLB at phys %#llx - %#llx\n",
134	       (unsigned long long)pstart,
135	       (unsigned long long)pend);
136}
137
138/*
139 * Statically reserve bounce buffer space and initialize bounce buffer data
140 * structures for the software IO TLB used to implement the DMA API.
141 */
142void __init
143swiotlb_init_with_default_size(size_t default_size)
144{
145	unsigned long i, bytes;
146
147	if (!io_tlb_nslabs) {
148		io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
149		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
150	}
151
152	bytes = io_tlb_nslabs << IO_TLB_SHIFT;
153
154	/*
155	 * Get IO TLB memory from the low pages
156	 */
157	io_tlb_start = alloc_bootmem_low_pages(bytes);
158	if (!io_tlb_start)
159		panic("Cannot allocate SWIOTLB buffer");
160	io_tlb_end = io_tlb_start + bytes;
161
162	/*
163	 * Allocate and initialize the free list array.  This array is used
164	 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
165	 * between io_tlb_start and io_tlb_end.
166	 */
167	io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int));
168	for (i = 0; i < io_tlb_nslabs; i++)
169 		io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
170	io_tlb_index = 0;
171	io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(phys_addr_t));
172
173	/*
174	 * Get the overflow emergency buffer
175	 */
176	io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow);
177	if (!io_tlb_overflow_buffer)
178		panic("Cannot allocate SWIOTLB overflow buffer!\n");
179
180	swiotlb_print_info(bytes);
181}
182
183void __init
184swiotlb_init(void)
185{
186	swiotlb_init_with_default_size(64 * (1<<20));	/* default to 64MB */
187}
188
189/*
190 * Systems with larger DMA zones (those that don't support ISA) can
191 * initialize the swiotlb later using the slab allocator if needed.
192 * This should be just like above, but with some error catching.
193 */
194int
195swiotlb_late_init_with_default_size(size_t default_size)
196{
197	unsigned long i, bytes, req_nslabs = io_tlb_nslabs;
198	unsigned int order;
199
200	if (!io_tlb_nslabs) {
201		io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
202		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
203	}
204
205	/*
206	 * Get IO TLB memory from the low pages
207	 */
208	order = get_order(io_tlb_nslabs << IO_TLB_SHIFT);
209	io_tlb_nslabs = SLABS_PER_PAGE << order;
210	bytes = io_tlb_nslabs << IO_TLB_SHIFT;
211
212	while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
213		io_tlb_start = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
214							order);
215		if (io_tlb_start)
216			break;
217		order--;
218	}
219
220	if (!io_tlb_start)
221		goto cleanup1;
222
223	if (order != get_order(bytes)) {
224		printk(KERN_WARNING "Warning: only able to allocate %ld MB "
225		       "for software IO TLB\n", (PAGE_SIZE << order) >> 20);
226		io_tlb_nslabs = SLABS_PER_PAGE << order;
227		bytes = io_tlb_nslabs << IO_TLB_SHIFT;
228	}
229	io_tlb_end = io_tlb_start + bytes;
230	memset(io_tlb_start, 0, bytes);
231
232	/*
233	 * Allocate and initialize the free list array.  This array is used
234	 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
235	 * between io_tlb_start and io_tlb_end.
236	 */
237	io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL,
238	                              get_order(io_tlb_nslabs * sizeof(int)));
239	if (!io_tlb_list)
240		goto cleanup2;
241
242	for (i = 0; i < io_tlb_nslabs; i++)
243 		io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
244	io_tlb_index = 0;
245
246	io_tlb_orig_addr = (phys_addr_t *)
247		__get_free_pages(GFP_KERNEL,
248				 get_order(io_tlb_nslabs *
249					   sizeof(phys_addr_t)));
250	if (!io_tlb_orig_addr)
251		goto cleanup3;
252
253	memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(phys_addr_t));
254
255	/*
256	 * Get the overflow emergency buffer
257	 */
258	io_tlb_overflow_buffer = (void *)__get_free_pages(GFP_DMA,
259	                                          get_order(io_tlb_overflow));
260	if (!io_tlb_overflow_buffer)
261		goto cleanup4;
262
263	swiotlb_print_info(bytes);
264
265	return 0;
266
267cleanup4:
268	free_pages((unsigned long)io_tlb_orig_addr,
269		   get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
270	io_tlb_orig_addr = NULL;
271cleanup3:
272	free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
273	                                                 sizeof(int)));
274	io_tlb_list = NULL;
275cleanup2:
276	io_tlb_end = NULL;
277	free_pages((unsigned long)io_tlb_start, order);
278	io_tlb_start = NULL;
279cleanup1:
280	io_tlb_nslabs = req_nslabs;
281	return -ENOMEM;
282}
283
284static int is_swiotlb_buffer(phys_addr_t paddr)
285{
286	return paddr >= virt_to_phys(io_tlb_start) &&
287		paddr < virt_to_phys(io_tlb_end);
288}
289
290/*
291 * Bounce: copy the swiotlb buffer back to the original dma location
292 */
293static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
294			   enum dma_data_direction dir)
295{
296	unsigned long pfn = PFN_DOWN(phys);
297
298	if (PageHighMem(pfn_to_page(pfn))) {
299		/* The buffer does not have a mapping.  Map it in and copy */
300		unsigned int offset = phys & ~PAGE_MASK;
301		char *buffer;
302		unsigned int sz = 0;
303		unsigned long flags;
304
305		while (size) {
306			sz = min_t(size_t, PAGE_SIZE - offset, size);
307
308			local_irq_save(flags);
309			buffer = kmap_atomic(pfn_to_page(pfn),
310					     KM_BOUNCE_READ);
311			if (dir == DMA_TO_DEVICE)
312				memcpy(dma_addr, buffer + offset, sz);
313			else
314				memcpy(buffer + offset, dma_addr, sz);
315			kunmap_atomic(buffer, KM_BOUNCE_READ);
316			local_irq_restore(flags);
317
318			size -= sz;
319			pfn++;
320			dma_addr += sz;
321			offset = 0;
322		}
323	} else {
324		if (dir == DMA_TO_DEVICE)
325			memcpy(dma_addr, phys_to_virt(phys), size);
326		else
327			memcpy(phys_to_virt(phys), dma_addr, size);
328	}
329}
330
331/*
332 * Allocates bounce buffer and returns its kernel virtual address.
333 */
334static void *
335map_single(struct device *hwdev, phys_addr_t phys, size_t size, int dir)
336{
337	unsigned long flags;
338	char *dma_addr;
339	unsigned int nslots, stride, index, wrap;
340	int i;
341	unsigned long start_dma_addr;
342	unsigned long mask;
343	unsigned long offset_slots;
344	unsigned long max_slots;
345
346	mask = dma_get_seg_boundary(hwdev);
347	start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start) & mask;
348
349	offset_slots = ALIGN(start_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
350
351	/*
352 	 * Carefully handle integer overflow which can occur when mask == ~0UL.
353 	 */
354	max_slots = mask + 1
355		    ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT
356		    : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
357
358	/*
359	 * For mappings greater than a page, we limit the stride (and
360	 * hence alignment) to a page size.
361	 */
362	nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
363	if (size > PAGE_SIZE)
364		stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
365	else
366		stride = 1;
367
368	BUG_ON(!nslots);
369
370	/*
371	 * Find suitable number of IO TLB entries size that will fit this
372	 * request and allocate a buffer from that IO TLB pool.
373	 */
374	spin_lock_irqsave(&io_tlb_lock, flags);
375	index = ALIGN(io_tlb_index, stride);
376	if (index >= io_tlb_nslabs)
377		index = 0;
378	wrap = index;
379
380	do {
381		while (iommu_is_span_boundary(index, nslots, offset_slots,
382					      max_slots)) {
383			index += stride;
384			if (index >= io_tlb_nslabs)
385				index = 0;
386			if (index == wrap)
387				goto not_found;
388		}
389
390		/*
391		 * If we find a slot that indicates we have 'nslots' number of
392		 * contiguous buffers, we allocate the buffers from that slot
393		 * and mark the entries as '0' indicating unavailable.
394		 */
395		if (io_tlb_list[index] >= nslots) {
396			int count = 0;
397
398			for (i = index; i < (int) (index + nslots); i++)
399				io_tlb_list[i] = 0;
400			for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--)
401				io_tlb_list[i] = ++count;
402			dma_addr = io_tlb_start + (index << IO_TLB_SHIFT);
403
404			/*
405			 * Update the indices to avoid searching in the next
406			 * round.
407			 */
408			io_tlb_index = ((index + nslots) < io_tlb_nslabs
409					? (index + nslots) : 0);
410
411			goto found;
412		}
413		index += stride;
414		if (index >= io_tlb_nslabs)
415			index = 0;
416	} while (index != wrap);
417
418not_found:
419	spin_unlock_irqrestore(&io_tlb_lock, flags);
420	return NULL;
421found:
422	spin_unlock_irqrestore(&io_tlb_lock, flags);
423
424	/*
425	 * Save away the mapping from the original address to the DMA address.
426	 * This is needed when we sync the memory.  Then we sync the buffer if
427	 * needed.
428	 */
429	for (i = 0; i < nslots; i++)
430		io_tlb_orig_addr[index+i] = phys + (i << IO_TLB_SHIFT);
431	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
432		swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE);
433
434	return dma_addr;
435}
436
437/*
438 * dma_addr is the kernel virtual address of the bounce buffer to unmap.
439 */
440static void
441do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
442{
443	unsigned long flags;
444	int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
445	int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
446	phys_addr_t phys = io_tlb_orig_addr[index];
447
448	/*
449	 * First, sync the memory before unmapping the entry
450	 */
451	if (phys && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
452		swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE);
453
454	/*
455	 * Return the buffer to the free list by setting the corresponding
456	 * entries to indicate the number of contigous entries available.
457	 * While returning the entries to the free list, we merge the entries
458	 * with slots below and above the pool being returned.
459	 */
460	spin_lock_irqsave(&io_tlb_lock, flags);
461	{
462		count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
463			 io_tlb_list[index + nslots] : 0);
464		/*
465		 * Step 1: return the slots to the free list, merging the
466		 * slots with superceeding slots
467		 */
468		for (i = index + nslots - 1; i >= index; i--)
469			io_tlb_list[i] = ++count;
470		/*
471		 * Step 2: merge the returned slots with the preceding slots,
472		 * if available (non zero)
473		 */
474		for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
475			io_tlb_list[i] = ++count;
476	}
477	spin_unlock_irqrestore(&io_tlb_lock, flags);
478}
479
480static void
481sync_single(struct device *hwdev, char *dma_addr, size_t size,
482	    int dir, int target)
483{
484	int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
485	phys_addr_t phys = io_tlb_orig_addr[index];
486
487	phys += ((unsigned long)dma_addr & ((1 << IO_TLB_SHIFT) - 1));
488
489	switch (target) {
490	case SYNC_FOR_CPU:
491		if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
492			swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE);
493		else
494			BUG_ON(dir != DMA_TO_DEVICE);
495		break;
496	case SYNC_FOR_DEVICE:
497		if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
498			swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE);
499		else
500			BUG_ON(dir != DMA_FROM_DEVICE);
501		break;
502	default:
503		BUG();
504	}
505}
506
507void *
508swiotlb_alloc_coherent(struct device *hwdev, size_t size,
509		       dma_addr_t *dma_handle, gfp_t flags)
510{
511	dma_addr_t dev_addr;
512	void *ret;
513	int order = get_order(size);
514	u64 dma_mask = DMA_BIT_MASK(32);
515
516	if (hwdev && hwdev->coherent_dma_mask)
517		dma_mask = hwdev->coherent_dma_mask;
518
519	ret = (void *)__get_free_pages(flags, order);
520	if (ret && swiotlb_virt_to_bus(hwdev, ret) + size > dma_mask) {
521		/*
522		 * The allocated memory isn't reachable by the device.
523		 */
524		free_pages((unsigned long) ret, order);
525		ret = NULL;
526	}
527	if (!ret) {
528		/*
529		 * We are either out of memory or the device can't DMA
530		 * to GFP_DMA memory; fall back on map_single(), which
531		 * will grab memory from the lowest available address range.
532		 */
533		ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE);
534		if (!ret)
535			return NULL;
536	}
537
538	memset(ret, 0, size);
539	dev_addr = swiotlb_virt_to_bus(hwdev, ret);
540
541	/* Confirm address can be DMA'd by device */
542	if (dev_addr + size > dma_mask) {
543		printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
544		       (unsigned long long)dma_mask,
545		       (unsigned long long)dev_addr);
546
547		/* DMA_TO_DEVICE to avoid memcpy in unmap_single */
548		do_unmap_single(hwdev, ret, size, DMA_TO_DEVICE);
549		return NULL;
550	}
551	*dma_handle = dev_addr;
552	return ret;
553}
554EXPORT_SYMBOL(swiotlb_alloc_coherent);
555
556void
557swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
558		      dma_addr_t dev_addr)
559{
560	phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
561
562	WARN_ON(irqs_disabled());
563	if (!is_swiotlb_buffer(paddr))
564		free_pages((unsigned long)vaddr, get_order(size));
565	else
566		/* DMA_TO_DEVICE to avoid memcpy in unmap_single */
567		do_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE);
568}
569EXPORT_SYMBOL(swiotlb_free_coherent);
570
571static void
572swiotlb_full(struct device *dev, size_t size, int dir, int do_panic)
573{
574	/*
575	 * Ran out of IOMMU space for this operation. This is very bad.
576	 * Unfortunately the drivers cannot handle this operation properly.
577	 * unless they check for dma_mapping_error (most don't)
578	 * When the mapping is small enough return a static buffer to limit
579	 * the damage, or panic when the transfer is too big.
580	 */
581	printk(KERN_ERR "DMA: Out of SW-IOMMU space for %zu bytes at "
582	       "device %s\n", size, dev ? dev_name(dev) : "?");
583
584	if (size <= io_tlb_overflow || !do_panic)
585		return;
586
587	if (dir == DMA_BIDIRECTIONAL)
588		panic("DMA: Random memory could be DMA accessed\n");
589	if (dir == DMA_FROM_DEVICE)
590		panic("DMA: Random memory could be DMA written\n");
591	if (dir == DMA_TO_DEVICE)
592		panic("DMA: Random memory could be DMA read\n");
593}
594
595/*
596 * Map a single buffer of the indicated size for DMA in streaming mode.  The
597 * physical address to use is returned.
598 *
599 * Once the device is given the dma address, the device owns this memory until
600 * either swiotlb_unmap_page or swiotlb_dma_sync_single is performed.
601 */
602dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
603			    unsigned long offset, size_t size,
604			    enum dma_data_direction dir,
605			    struct dma_attrs *attrs)
606{
607	phys_addr_t phys = page_to_phys(page) + offset;
608	dma_addr_t dev_addr = phys_to_dma(dev, phys);
609	void *map;
610
611	BUG_ON(dir == DMA_NONE);
612	/*
613	 * If the address happens to be in the device's DMA window,
614	 * we can safely return the device addr and not worry about bounce
615	 * buffering it.
616	 */
617	if (dma_capable(dev, dev_addr, size) && !swiotlb_force)
618		return dev_addr;
619
620	/*
621	 * Oh well, have to allocate and map a bounce buffer.
622	 */
623	map = map_single(dev, phys, size, dir);
624	if (!map) {
625		swiotlb_full(dev, size, dir, 1);
626		map = io_tlb_overflow_buffer;
627	}
628
629	dev_addr = swiotlb_virt_to_bus(dev, map);
630
631	/*
632	 * Ensure that the address returned is DMA'ble
633	 */
634	if (!dma_capable(dev, dev_addr, size))
635		panic("map_single: bounce buffer is not DMA'ble");
636
637	return dev_addr;
638}
639EXPORT_SYMBOL_GPL(swiotlb_map_page);
640
641/*
642 * Unmap a single streaming mode DMA translation.  The dma_addr and size must
643 * match what was provided for in a previous swiotlb_map_page call.  All
644 * other usages are undefined.
645 *
646 * After this call, reads by the cpu to the buffer are guaranteed to see
647 * whatever the device wrote there.
648 */
649static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
650			 size_t size, int dir)
651{
652	phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
653
654	BUG_ON(dir == DMA_NONE);
655
656	if (is_swiotlb_buffer(paddr)) {
657		do_unmap_single(hwdev, phys_to_virt(paddr), size, dir);
658		return;
659	}
660
661	if (dir != DMA_FROM_DEVICE)
662		return;
663
664	/*
665	 * phys_to_virt doesn't work with hihgmem page but we could
666	 * call dma_mark_clean() with hihgmem page here. However, we
667	 * are fine since dma_mark_clean() is null on POWERPC. We can
668	 * make dma_mark_clean() take a physical address if necessary.
669	 */
670	dma_mark_clean(phys_to_virt(paddr), size);
671}
672
673void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
674			size_t size, enum dma_data_direction dir,
675			struct dma_attrs *attrs)
676{
677	unmap_single(hwdev, dev_addr, size, dir);
678}
679EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
680
681/*
682 * Make physical memory consistent for a single streaming mode DMA translation
683 * after a transfer.
684 *
685 * If you perform a swiotlb_map_page() but wish to interrogate the buffer
686 * using the cpu, yet do not wish to teardown the dma mapping, you must
687 * call this function before doing so.  At the next point you give the dma
688 * address back to the card, you must first perform a
689 * swiotlb_dma_sync_for_device, and then the device again owns the buffer
690 */
691static void
692swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
693		    size_t size, int dir, int target)
694{
695	phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
696
697	BUG_ON(dir == DMA_NONE);
698
699	if (is_swiotlb_buffer(paddr)) {
700		sync_single(hwdev, phys_to_virt(paddr), size, dir, target);
701		return;
702	}
703
704	if (dir != DMA_FROM_DEVICE)
705		return;
706
707	dma_mark_clean(phys_to_virt(paddr), size);
708}
709
710void
711swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
712			    size_t size, enum dma_data_direction dir)
713{
714	swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU);
715}
716EXPORT_SYMBOL(swiotlb_sync_single_for_cpu);
717
718void
719swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
720			       size_t size, enum dma_data_direction dir)
721{
722	swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE);
723}
724EXPORT_SYMBOL(swiotlb_sync_single_for_device);
725
726/*
727 * Same as above, but for a sub-range of the mapping.
728 */
729static void
730swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr,
731			  unsigned long offset, size_t size,
732			  int dir, int target)
733{
734	swiotlb_sync_single(hwdev, dev_addr + offset, size, dir, target);
735}
736
737void
738swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
739				  unsigned long offset, size_t size,
740				  enum dma_data_direction dir)
741{
742	swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
743				  SYNC_FOR_CPU);
744}
745EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_cpu);
746
747void
748swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr,
749				     unsigned long offset, size_t size,
750				     enum dma_data_direction dir)
751{
752	swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
753				  SYNC_FOR_DEVICE);
754}
755EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device);
756
757/*
758 * Map a set of buffers described by scatterlist in streaming mode for DMA.
759 * This is the scatter-gather version of the above swiotlb_map_page
760 * interface.  Here the scatter gather list elements are each tagged with the
761 * appropriate dma address and length.  They are obtained via
762 * sg_dma_{address,length}(SG).
763 *
764 * NOTE: An implementation may be able to use a smaller number of
765 *       DMA address/length pairs than there are SG table elements.
766 *       (for example via virtual mapping capabilities)
767 *       The routine returns the number of addr/length pairs actually
768 *       used, at most nents.
769 *
770 * Device ownership issues as mentioned above for swiotlb_map_page are the
771 * same here.
772 */
773int
774swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
775		     enum dma_data_direction dir, struct dma_attrs *attrs)
776{
777	struct scatterlist *sg;
778	int i;
779
780	BUG_ON(dir == DMA_NONE);
781
782	for_each_sg(sgl, sg, nelems, i) {
783		phys_addr_t paddr = sg_phys(sg);
784		dma_addr_t dev_addr = phys_to_dma(hwdev, paddr);
785
786		if (swiotlb_force ||
787		    !dma_capable(hwdev, dev_addr, sg->length)) {
788			void *map = map_single(hwdev, sg_phys(sg),
789					       sg->length, dir);
790			if (!map) {
791				/* Don't panic here, we expect map_sg users
792				   to do proper error handling. */
793				swiotlb_full(hwdev, sg->length, dir, 0);
794				swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
795						       attrs);
796				sgl[0].dma_length = 0;
797				return 0;
798			}
799			sg->dma_address = swiotlb_virt_to_bus(hwdev, map);
800		} else
801			sg->dma_address = dev_addr;
802		sg->dma_length = sg->length;
803	}
804	return nelems;
805}
806EXPORT_SYMBOL(swiotlb_map_sg_attrs);
807
808int
809swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
810	       int dir)
811{
812	return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL);
813}
814EXPORT_SYMBOL(swiotlb_map_sg);
815
816/*
817 * Unmap a set of streaming mode DMA translations.  Again, cpu read rules
818 * concerning calls here are the same as for swiotlb_unmap_page() above.
819 */
820void
821swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
822		       int nelems, enum dma_data_direction dir, struct dma_attrs *attrs)
823{
824	struct scatterlist *sg;
825	int i;
826
827	BUG_ON(dir == DMA_NONE);
828
829	for_each_sg(sgl, sg, nelems, i)
830		unmap_single(hwdev, sg->dma_address, sg->dma_length, dir);
831
832}
833EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
834
835void
836swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
837		 int dir)
838{
839	return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL);
840}
841EXPORT_SYMBOL(swiotlb_unmap_sg);
842
843/*
844 * Make physical memory consistent for a set of streaming mode DMA translations
845 * after a transfer.
846 *
847 * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules
848 * and usage.
849 */
850static void
851swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
852		int nelems, int dir, int target)
853{
854	struct scatterlist *sg;
855	int i;
856
857	for_each_sg(sgl, sg, nelems, i)
858		swiotlb_sync_single(hwdev, sg->dma_address,
859				    sg->dma_length, dir, target);
860}
861
862void
863swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
864			int nelems, enum dma_data_direction dir)
865{
866	swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU);
867}
868EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu);
869
870void
871swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
872			   int nelems, enum dma_data_direction dir)
873{
874	swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE);
875}
876EXPORT_SYMBOL(swiotlb_sync_sg_for_device);
877
878int
879swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
880{
881	return (dma_addr == swiotlb_virt_to_bus(hwdev, io_tlb_overflow_buffer));
882}
883EXPORT_SYMBOL(swiotlb_dma_mapping_error);
884
885/*
886 * Return whether the given device DMA address mask can be supported
887 * properly.  For example, if your device can only drive the low 24-bits
888 * during bus mastering, then you would pass 0x00ffffff as the mask to
889 * this function.
890 */
891int
892swiotlb_dma_supported(struct device *hwdev, u64 mask)
893{
894	return swiotlb_virt_to_bus(hwdev, io_tlb_end - 1) <= mask;
895}
896EXPORT_SYMBOL(swiotlb_dma_supported);
Configure Feed

Configure Feed