lib/swiotlb.c at v2.6.26-rc3

tjh.dev / kernel
fork
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork
kernel / lib / swiotlb.c
at v2.6.26-rc3 857 lines 25 kB view raw
wrap content
  1/*
  2 * Dynamic DMA mapping support.
  3 *
  4 * This implementation is a fallback for platforms that do not support
  5 * I/O TLBs (aka DMA address translation hardware).
  6 * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com>
  7 * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com>
  8 * Copyright (C) 2000, 2003 Hewlett-Packard Co
  9 *	David Mosberger-Tang <davidm@hpl.hp.com>
 10 *
 11 * 03/05/07 davidm	Switch from PCI-DMA to generic device DMA API.
 12 * 00/12/13 davidm	Rename to swiotlb.c and add mark_clean() to avoid
 13 *			unnecessary i-cache flushing.
 14 * 04/07/.. ak		Better overflow handling. Assorted fixes.
 15 * 05/09/10 linville	Add support for syncing ranges, support syncing for
 16 *			DMA_BIDIRECTIONAL mappings, miscellaneous cleanup.
 17 */
 18
 19#include <linux/cache.h>
 20#include <linux/dma-mapping.h>
 21#include <linux/mm.h>
 22#include <linux/module.h>
 23#include <linux/spinlock.h>
 24#include <linux/string.h>
 25#include <linux/types.h>
 26#include <linux/ctype.h>
 27
 28#include <asm/io.h>
 29#include <asm/dma.h>
 30#include <asm/scatterlist.h>
 31
 32#include <linux/init.h>
 33#include <linux/bootmem.h>
 34#include <linux/iommu-helper.h>
 35
 36#define OFFSET(val,align) ((unsigned long)	\
 37	                   ( (val) & ( (align) - 1)))
 38
 39#define SG_ENT_VIRT_ADDRESS(sg)	(sg_virt((sg)))
 40#define SG_ENT_PHYS_ADDRESS(sg)	virt_to_bus(SG_ENT_VIRT_ADDRESS(sg))
 41
 42/*
 43 * Maximum allowable number of contiguous slabs to map,
 44 * must be a power of 2.  What is the appropriate value ?
 45 * The complexity of {map,unmap}_single is linearly dependent on this value.
 46 */
 47#define IO_TLB_SEGSIZE	128
 48
 49/*
 50 * log of the size of each IO TLB slab.  The number of slabs is command line
 51 * controllable.
 52 */
 53#define IO_TLB_SHIFT 11
 54
 55#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
 56
 57/*
 58 * Minimum IO TLB size to bother booting with.  Systems with mainly
 59 * 64bit capable cards will only lightly use the swiotlb.  If we can't
 60 * allocate a contiguous 1MB, we're probably in trouble anyway.
 61 */
 62#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
 63
 64/*
 65 * Enumeration for sync targets
 66 */
 67enum dma_sync_target {
 68	SYNC_FOR_CPU = 0,
 69	SYNC_FOR_DEVICE = 1,
 70};
 71
 72int swiotlb_force;
 73
 74/*
 75 * Used to do a quick range check in swiotlb_unmap_single and
 76 * swiotlb_sync_single_*, to see if the memory was in fact allocated by this
 77 * API.
 78 */
 79static char *io_tlb_start, *io_tlb_end;
 80
 81/*
 82 * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and
 83 * io_tlb_end.  This is command line adjustable via setup_io_tlb_npages.
 84 */
 85static unsigned long io_tlb_nslabs;
 86
 87/*
 88 * When the IOMMU overflows we return a fallback buffer. This sets the size.
 89 */
 90static unsigned long io_tlb_overflow = 32*1024;
 91
 92void *io_tlb_overflow_buffer;
 93
 94/*
 95 * This is a free list describing the number of free entries available from
 96 * each index
 97 */
 98static unsigned int *io_tlb_list;
 99static unsigned int io_tlb_index;
100
101/*
102 * We need to save away the original address corresponding to a mapped entry
103 * for the sync operations.
104 */
105static unsigned char **io_tlb_orig_addr;
106
107/*
108 * Protect the above data structures in the map and unmap calls
109 */
110static DEFINE_SPINLOCK(io_tlb_lock);
111
112static int __init
113setup_io_tlb_npages(char *str)
114{
115	if (isdigit(*str)) {
116		io_tlb_nslabs = simple_strtoul(str, &str, 0);
117		/* avoid tail segment of size < IO_TLB_SEGSIZE */
118		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
119	}
120	if (*str == ',')
121		++str;
122	if (!strcmp(str, "force"))
123		swiotlb_force = 1;
124	return 1;
125}
126__setup("swiotlb=", setup_io_tlb_npages);
127/* make io_tlb_overflow tunable too? */
128
129/*
130 * Statically reserve bounce buffer space and initialize bounce buffer data
131 * structures for the software IO TLB used to implement the DMA API.
132 */
133void __init
134swiotlb_init_with_default_size(size_t default_size)
135{
136	unsigned long i, bytes;
137
138	if (!io_tlb_nslabs) {
139		io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
140		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
141	}
142
143	bytes = io_tlb_nslabs << IO_TLB_SHIFT;
144
145	/*
146	 * Get IO TLB memory from the low pages
147	 */
148	io_tlb_start = alloc_bootmem_low_pages(bytes);
149	if (!io_tlb_start)
150		panic("Cannot allocate SWIOTLB buffer");
151	io_tlb_end = io_tlb_start + bytes;
152
153	/*
154	 * Allocate and initialize the free list array.  This array is used
155	 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
156	 * between io_tlb_start and io_tlb_end.
157	 */
158	io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int));
159	for (i = 0; i < io_tlb_nslabs; i++)
160 		io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
161	io_tlb_index = 0;
162	io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(char *));
163
164	/*
165	 * Get the overflow emergency buffer
166	 */
167	io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow);
168	if (!io_tlb_overflow_buffer)
169		panic("Cannot allocate SWIOTLB overflow buffer!\n");
170
171	printk(KERN_INFO "Placing software IO TLB between 0x%lx - 0x%lx\n",
172	       virt_to_bus(io_tlb_start), virt_to_bus(io_tlb_end));
173}
174
175void __init
176swiotlb_init(void)
177{
178	swiotlb_init_with_default_size(64 * (1<<20));	/* default to 64MB */
179}
180
181/*
182 * Systems with larger DMA zones (those that don't support ISA) can
183 * initialize the swiotlb later using the slab allocator if needed.
184 * This should be just like above, but with some error catching.
185 */
186int
187swiotlb_late_init_with_default_size(size_t default_size)
188{
189	unsigned long i, bytes, req_nslabs = io_tlb_nslabs;
190	unsigned int order;
191
192	if (!io_tlb_nslabs) {
193		io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
194		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
195	}
196
197	/*
198	 * Get IO TLB memory from the low pages
199	 */
200	order = get_order(io_tlb_nslabs << IO_TLB_SHIFT);
201	io_tlb_nslabs = SLABS_PER_PAGE << order;
202	bytes = io_tlb_nslabs << IO_TLB_SHIFT;
203
204	while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
205		io_tlb_start = (char *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
206		                                        order);
207		if (io_tlb_start)
208			break;
209		order--;
210	}
211
212	if (!io_tlb_start)
213		goto cleanup1;
214
215	if (order != get_order(bytes)) {
216		printk(KERN_WARNING "Warning: only able to allocate %ld MB "
217		       "for software IO TLB\n", (PAGE_SIZE << order) >> 20);
218		io_tlb_nslabs = SLABS_PER_PAGE << order;
219		bytes = io_tlb_nslabs << IO_TLB_SHIFT;
220	}
221	io_tlb_end = io_tlb_start + bytes;
222	memset(io_tlb_start, 0, bytes);
223
224	/*
225	 * Allocate and initialize the free list array.  This array is used
226	 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
227	 * between io_tlb_start and io_tlb_end.
228	 */
229	io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL,
230	                              get_order(io_tlb_nslabs * sizeof(int)));
231	if (!io_tlb_list)
232		goto cleanup2;
233
234	for (i = 0; i < io_tlb_nslabs; i++)
235 		io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
236	io_tlb_index = 0;
237
238	io_tlb_orig_addr = (unsigned char **)__get_free_pages(GFP_KERNEL,
239	                           get_order(io_tlb_nslabs * sizeof(char *)));
240	if (!io_tlb_orig_addr)
241		goto cleanup3;
242
243	memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(char *));
244
245	/*
246	 * Get the overflow emergency buffer
247	 */
248	io_tlb_overflow_buffer = (void *)__get_free_pages(GFP_DMA,
249	                                          get_order(io_tlb_overflow));
250	if (!io_tlb_overflow_buffer)
251		goto cleanup4;
252
253	printk(KERN_INFO "Placing %luMB software IO TLB between 0x%lx - "
254	       "0x%lx\n", bytes >> 20,
255	       virt_to_bus(io_tlb_start), virt_to_bus(io_tlb_end));
256
257	return 0;
258
259cleanup4:
260	free_pages((unsigned long)io_tlb_orig_addr, get_order(io_tlb_nslabs *
261	                                                      sizeof(char *)));
262	io_tlb_orig_addr = NULL;
263cleanup3:
264	free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
265	                                                 sizeof(int)));
266	io_tlb_list = NULL;
267cleanup2:
268	io_tlb_end = NULL;
269	free_pages((unsigned long)io_tlb_start, order);
270	io_tlb_start = NULL;
271cleanup1:
272	io_tlb_nslabs = req_nslabs;
273	return -ENOMEM;
274}
275
276static int
277address_needs_mapping(struct device *hwdev, dma_addr_t addr)
278{
279	dma_addr_t mask = 0xffffffff;
280	/* If the device has a mask, use it, otherwise default to 32 bits */
281	if (hwdev && hwdev->dma_mask)
282		mask = *hwdev->dma_mask;
283	return (addr & ~mask) != 0;
284}
285
286/*
287 * Allocates bounce buffer and returns its kernel virtual address.
288 */
289static void *
290map_single(struct device *hwdev, char *buffer, size_t size, int dir)
291{
292	unsigned long flags;
293	char *dma_addr;
294	unsigned int nslots, stride, index, wrap;
295	int i;
296	unsigned long start_dma_addr;
297	unsigned long mask;
298	unsigned long offset_slots;
299	unsigned long max_slots;
300
301	mask = dma_get_seg_boundary(hwdev);
302	start_dma_addr = virt_to_bus(io_tlb_start) & mask;
303
304	offset_slots = ALIGN(start_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
305	max_slots = mask + 1
306		    ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT
307		    : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
308
309	/*
310	 * For mappings greater than a page, we limit the stride (and
311	 * hence alignment) to a page size.
312	 */
313	nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
314	if (size > PAGE_SIZE)
315		stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
316	else
317		stride = 1;
318
319	BUG_ON(!nslots);
320
321	/*
322	 * Find suitable number of IO TLB entries size that will fit this
323	 * request and allocate a buffer from that IO TLB pool.
324	 */
325	spin_lock_irqsave(&io_tlb_lock, flags);
326	index = ALIGN(io_tlb_index, stride);
327	if (index >= io_tlb_nslabs)
328		index = 0;
329	wrap = index;
330
331	do {
332		while (iommu_is_span_boundary(index, nslots, offset_slots,
333					      max_slots)) {
334			index += stride;
335			if (index >= io_tlb_nslabs)
336				index = 0;
337			if (index == wrap)
338				goto not_found;
339		}
340
341		/*
342		 * If we find a slot that indicates we have 'nslots' number of
343		 * contiguous buffers, we allocate the buffers from that slot
344		 * and mark the entries as '0' indicating unavailable.
345		 */
346		if (io_tlb_list[index] >= nslots) {
347			int count = 0;
348
349			for (i = index; i < (int) (index + nslots); i++)
350				io_tlb_list[i] = 0;
351			for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--)
352				io_tlb_list[i] = ++count;
353			dma_addr = io_tlb_start + (index << IO_TLB_SHIFT);
354
355			/*
356			 * Update the indices to avoid searching in the next
357			 * round.
358			 */
359			io_tlb_index = ((index + nslots) < io_tlb_nslabs
360					? (index + nslots) : 0);
361
362			goto found;
363		}
364		index += stride;
365		if (index >= io_tlb_nslabs)
366			index = 0;
367	} while (index != wrap);
368
369not_found:
370	spin_unlock_irqrestore(&io_tlb_lock, flags);
371	return NULL;
372found:
373	spin_unlock_irqrestore(&io_tlb_lock, flags);
374
375	/*
376	 * Save away the mapping from the original address to the DMA address.
377	 * This is needed when we sync the memory.  Then we sync the buffer if
378	 * needed.
379	 */
380	for (i = 0; i < nslots; i++)
381		io_tlb_orig_addr[index+i] = buffer + (i << IO_TLB_SHIFT);
382	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
383		memcpy(dma_addr, buffer, size);
384
385	return dma_addr;
386}
387
388/*
389 * dma_addr is the kernel virtual address of the bounce buffer to unmap.
390 */
391static void
392unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
393{
394	unsigned long flags;
395	int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
396	int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
397	char *buffer = io_tlb_orig_addr[index];
398
399	/*
400	 * First, sync the memory before unmapping the entry
401	 */
402	if (buffer && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
403		/*
404		 * bounce... copy the data back into the original buffer * and
405		 * delete the bounce buffer.
406		 */
407		memcpy(buffer, dma_addr, size);
408
409	/*
410	 * Return the buffer to the free list by setting the corresponding
411	 * entries to indicate the number of contigous entries available.
412	 * While returning the entries to the free list, we merge the entries
413	 * with slots below and above the pool being returned.
414	 */
415	spin_lock_irqsave(&io_tlb_lock, flags);
416	{
417		count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
418			 io_tlb_list[index + nslots] : 0);
419		/*
420		 * Step 1: return the slots to the free list, merging the
421		 * slots with superceeding slots
422		 */
423		for (i = index + nslots - 1; i >= index; i--)
424			io_tlb_list[i] = ++count;
425		/*
426		 * Step 2: merge the returned slots with the preceding slots,
427		 * if available (non zero)
428		 */
429		for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
430			io_tlb_list[i] = ++count;
431	}
432	spin_unlock_irqrestore(&io_tlb_lock, flags);
433}
434
435static void
436sync_single(struct device *hwdev, char *dma_addr, size_t size,
437	    int dir, int target)
438{
439	int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
440	char *buffer = io_tlb_orig_addr[index];
441
442	buffer += ((unsigned long)dma_addr & ((1 << IO_TLB_SHIFT) - 1));
443
444	switch (target) {
445	case SYNC_FOR_CPU:
446		if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
447			memcpy(buffer, dma_addr, size);
448		else
449			BUG_ON(dir != DMA_TO_DEVICE);
450		break;
451	case SYNC_FOR_DEVICE:
452		if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
453			memcpy(dma_addr, buffer, size);
454		else
455			BUG_ON(dir != DMA_FROM_DEVICE);
456		break;
457	default:
458		BUG();
459	}
460}
461
462void *
463swiotlb_alloc_coherent(struct device *hwdev, size_t size,
464		       dma_addr_t *dma_handle, gfp_t flags)
465{
466	dma_addr_t dev_addr;
467	void *ret;
468	int order = get_order(size);
469
470	/*
471	 * XXX fix me: the DMA API should pass us an explicit DMA mask
472	 * instead, or use ZONE_DMA32 (ia64 overloads ZONE_DMA to be a ~32
473	 * bit range instead of a 16MB one).
474	 */
475	flags |= GFP_DMA;
476
477	ret = (void *)__get_free_pages(flags, order);
478	if (ret && address_needs_mapping(hwdev, virt_to_bus(ret))) {
479		/*
480		 * The allocated memory isn't reachable by the device.
481		 * Fall back on swiotlb_map_single().
482		 */
483		free_pages((unsigned long) ret, order);
484		ret = NULL;
485	}
486	if (!ret) {
487		/*
488		 * We are either out of memory or the device can't DMA
489		 * to GFP_DMA memory; fall back on
490		 * swiotlb_map_single(), which will grab memory from
491		 * the lowest available address range.
492		 */
493		dma_addr_t handle;
494		handle = swiotlb_map_single(NULL, NULL, size, DMA_FROM_DEVICE);
495		if (swiotlb_dma_mapping_error(handle))
496			return NULL;
497
498		ret = bus_to_virt(handle);
499	}
500
501	memset(ret, 0, size);
502	dev_addr = virt_to_bus(ret);
503
504	/* Confirm address can be DMA'd by device */
505	if (address_needs_mapping(hwdev, dev_addr)) {
506		printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
507		       (unsigned long long)*hwdev->dma_mask,
508		       (unsigned long long)dev_addr);
509		panic("swiotlb_alloc_coherent: allocated memory is out of "
510		      "range for device");
511	}
512	*dma_handle = dev_addr;
513	return ret;
514}
515
516void
517swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
518		      dma_addr_t dma_handle)
519{
520	WARN_ON(irqs_disabled());
521	if (!(vaddr >= (void *)io_tlb_start
522                    && vaddr < (void *)io_tlb_end))
523		free_pages((unsigned long) vaddr, get_order(size));
524	else
525		/* DMA_TO_DEVICE to avoid memcpy in unmap_single */
526		swiotlb_unmap_single (hwdev, dma_handle, size, DMA_TO_DEVICE);
527}
528
529static void
530swiotlb_full(struct device *dev, size_t size, int dir, int do_panic)
531{
532	/*
533	 * Ran out of IOMMU space for this operation. This is very bad.
534	 * Unfortunately the drivers cannot handle this operation properly.
535	 * unless they check for dma_mapping_error (most don't)
536	 * When the mapping is small enough return a static buffer to limit
537	 * the damage, or panic when the transfer is too big.
538	 */
539	printk(KERN_ERR "DMA: Out of SW-IOMMU space for %zu bytes at "
540	       "device %s\n", size, dev ? dev->bus_id : "?");
541
542	if (size > io_tlb_overflow && do_panic) {
543		if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
544			panic("DMA: Memory would be corrupted\n");
545		if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
546			panic("DMA: Random memory would be DMAed\n");
547	}
548}
549
550/*
551 * Map a single buffer of the indicated size for DMA in streaming mode.  The
552 * physical address to use is returned.
553 *
554 * Once the device is given the dma address, the device owns this memory until
555 * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed.
556 */
557dma_addr_t
558swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
559			 int dir, struct dma_attrs *attrs)
560{
561	dma_addr_t dev_addr = virt_to_bus(ptr);
562	void *map;
563
564	BUG_ON(dir == DMA_NONE);
565	/*
566	 * If the pointer passed in happens to be in the device's DMA window,
567	 * we can safely return the device addr and not worry about bounce
568	 * buffering it.
569	 */
570	if (!address_needs_mapping(hwdev, dev_addr) && !swiotlb_force)
571		return dev_addr;
572
573	/*
574	 * Oh well, have to allocate and map a bounce buffer.
575	 */
576	map = map_single(hwdev, ptr, size, dir);
577	if (!map) {
578		swiotlb_full(hwdev, size, dir, 1);
579		map = io_tlb_overflow_buffer;
580	}
581
582	dev_addr = virt_to_bus(map);
583
584	/*
585	 * Ensure that the address returned is DMA'ble
586	 */
587	if (address_needs_mapping(hwdev, dev_addr))
588		panic("map_single: bounce buffer is not DMA'ble");
589
590	return dev_addr;
591}
592EXPORT_SYMBOL(swiotlb_map_single_attrs);
593
594dma_addr_t
595swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir)
596{
597	return swiotlb_map_single_attrs(hwdev, ptr, size, dir, NULL);
598}
599
600/*
601 * Unmap a single streaming mode DMA translation.  The dma_addr and size must
602 * match what was provided for in a previous swiotlb_map_single call.  All
603 * other usages are undefined.
604 *
605 * After this call, reads by the cpu to the buffer are guaranteed to see
606 * whatever the device wrote there.
607 */
608void
609swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr,
610			   size_t size, int dir, struct dma_attrs *attrs)
611{
612	char *dma_addr = bus_to_virt(dev_addr);
613
614	BUG_ON(dir == DMA_NONE);
615	if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
616		unmap_single(hwdev, dma_addr, size, dir);
617	else if (dir == DMA_FROM_DEVICE)
618		dma_mark_clean(dma_addr, size);
619}
620EXPORT_SYMBOL(swiotlb_unmap_single_attrs);
621
622void
623swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size,
624		     int dir)
625{
626	return swiotlb_unmap_single_attrs(hwdev, dev_addr, size, dir, NULL);
627}
628/*
629 * Make physical memory consistent for a single streaming mode DMA translation
630 * after a transfer.
631 *
632 * If you perform a swiotlb_map_single() but wish to interrogate the buffer
633 * using the cpu, yet do not wish to teardown the dma mapping, you must
634 * call this function before doing so.  At the next point you give the dma
635 * address back to the card, you must first perform a
636 * swiotlb_dma_sync_for_device, and then the device again owns the buffer
637 */
638static void
639swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
640		    size_t size, int dir, int target)
641{
642	char *dma_addr = bus_to_virt(dev_addr);
643
644	BUG_ON(dir == DMA_NONE);
645	if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
646		sync_single(hwdev, dma_addr, size, dir, target);
647	else if (dir == DMA_FROM_DEVICE)
648		dma_mark_clean(dma_addr, size);
649}
650
651void
652swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
653			    size_t size, int dir)
654{
655	swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU);
656}
657
658void
659swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
660			       size_t size, int dir)
661{
662	swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE);
663}
664
665/*
666 * Same as above, but for a sub-range of the mapping.
667 */
668static void
669swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr,
670			  unsigned long offset, size_t size,
671			  int dir, int target)
672{
673	char *dma_addr = bus_to_virt(dev_addr) + offset;
674
675	BUG_ON(dir == DMA_NONE);
676	if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
677		sync_single(hwdev, dma_addr, size, dir, target);
678	else if (dir == DMA_FROM_DEVICE)
679		dma_mark_clean(dma_addr, size);
680}
681
682void
683swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
684				  unsigned long offset, size_t size, int dir)
685{
686	swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
687				  SYNC_FOR_CPU);
688}
689
690void
691swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr,
692				     unsigned long offset, size_t size, int dir)
693{
694	swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
695				  SYNC_FOR_DEVICE);
696}
697
698void swiotlb_unmap_sg_attrs(struct device *, struct scatterlist *, int, int,
699			    struct dma_attrs *);
700/*
701 * Map a set of buffers described by scatterlist in streaming mode for DMA.
702 * This is the scatter-gather version of the above swiotlb_map_single
703 * interface.  Here the scatter gather list elements are each tagged with the
704 * appropriate dma address and length.  They are obtained via
705 * sg_dma_{address,length}(SG).
706 *
707 * NOTE: An implementation may be able to use a smaller number of
708 *       DMA address/length pairs than there are SG table elements.
709 *       (for example via virtual mapping capabilities)
710 *       The routine returns the number of addr/length pairs actually
711 *       used, at most nents.
712 *
713 * Device ownership issues as mentioned above for swiotlb_map_single are the
714 * same here.
715 */
716int
717swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
718		     int dir, struct dma_attrs *attrs)
719{
720	struct scatterlist *sg;
721	void *addr;
722	dma_addr_t dev_addr;
723	int i;
724
725	BUG_ON(dir == DMA_NONE);
726
727	for_each_sg(sgl, sg, nelems, i) {
728		addr = SG_ENT_VIRT_ADDRESS(sg);
729		dev_addr = virt_to_bus(addr);
730		if (swiotlb_force || address_needs_mapping(hwdev, dev_addr)) {
731			void *map = map_single(hwdev, addr, sg->length, dir);
732			if (!map) {
733				/* Don't panic here, we expect map_sg users
734				   to do proper error handling. */
735				swiotlb_full(hwdev, sg->length, dir, 0);
736				swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
737						       attrs);
738				sgl[0].dma_length = 0;
739				return 0;
740			}
741			sg->dma_address = virt_to_bus(map);
742		} else
743			sg->dma_address = dev_addr;
744		sg->dma_length = sg->length;
745	}
746	return nelems;
747}
748EXPORT_SYMBOL(swiotlb_map_sg_attrs);
749
750int
751swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
752	       int dir)
753{
754	return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL);
755}
756
757/*
758 * Unmap a set of streaming mode DMA translations.  Again, cpu read rules
759 * concerning calls here are the same as for swiotlb_unmap_single() above.
760 */
761void
762swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
763		       int nelems, int dir, struct dma_attrs *attrs)
764{
765	struct scatterlist *sg;
766	int i;
767
768	BUG_ON(dir == DMA_NONE);
769
770	for_each_sg(sgl, sg, nelems, i) {
771		if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
772			unmap_single(hwdev, bus_to_virt(sg->dma_address),
773				     sg->dma_length, dir);
774		else if (dir == DMA_FROM_DEVICE)
775			dma_mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length);
776	}
777}
778EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
779
780void
781swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
782		 int dir)
783{
784	return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL);
785}
786
787/*
788 * Make physical memory consistent for a set of streaming mode DMA translations
789 * after a transfer.
790 *
791 * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules
792 * and usage.
793 */
794static void
795swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
796		int nelems, int dir, int target)
797{
798	struct scatterlist *sg;
799	int i;
800
801	BUG_ON(dir == DMA_NONE);
802
803	for_each_sg(sgl, sg, nelems, i) {
804		if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
805			sync_single(hwdev, bus_to_virt(sg->dma_address),
806				    sg->dma_length, dir, target);
807		else if (dir == DMA_FROM_DEVICE)
808			dma_mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length);
809	}
810}
811
812void
813swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
814			int nelems, int dir)
815{
816	swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU);
817}
818
819void
820swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
821			   int nelems, int dir)
822{
823	swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE);
824}
825
826int
827swiotlb_dma_mapping_error(dma_addr_t dma_addr)
828{
829	return (dma_addr == virt_to_bus(io_tlb_overflow_buffer));
830}
831
832/*
833 * Return whether the given device DMA address mask can be supported
834 * properly.  For example, if your device can only drive the low 24-bits
835 * during bus mastering, then you would pass 0x00ffffff as the mask to
836 * this function.
837 */
838int
839swiotlb_dma_supported(struct device *hwdev, u64 mask)
840{
841	return virt_to_bus(io_tlb_end - 1) <= mask;
842}
843
844EXPORT_SYMBOL(swiotlb_map_single);
845EXPORT_SYMBOL(swiotlb_unmap_single);
846EXPORT_SYMBOL(swiotlb_map_sg);
847EXPORT_SYMBOL(swiotlb_unmap_sg);
848EXPORT_SYMBOL(swiotlb_sync_single_for_cpu);
849EXPORT_SYMBOL(swiotlb_sync_single_for_device);
850EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_cpu);
851EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device);
852EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu);
853EXPORT_SYMBOL(swiotlb_sync_sg_for_device);
854EXPORT_SYMBOL(swiotlb_dma_mapping_error);
855EXPORT_SYMBOL(swiotlb_alloc_coherent);
856EXPORT_SYMBOL(swiotlb_free_coherent);
857EXPORT_SYMBOL(swiotlb_dma_supported);
Configure Feed

Configure Feed