Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

dma-mapping: treat dev->bus_dma_mask as a DMA limit

Using a mask to represent bus DMA constraints has a set of limitations.
The biggest one being it can only hold a power of two (minus one). The
DMA mapping code is already aware of this and treats dev->bus_dma_mask
as a limit. This quirk is already used by some architectures although
still rare.

With the introduction of the Raspberry Pi 4 we've found a new contender
for the use of bus DMA limits, as its PCIe bus can only address the
lower 3GB of memory (of a total of 4GB). This is impossible to represent
with a mask. To make things worse the device-tree code rounds non power
of two bus DMA limits to the next power of two, which is unacceptable in
this case.

In the light of this, rename dev->bus_dma_mask to dev->bus_dma_limit all
over the tree and treat it as such. Note that dev->bus_dma_limit should
contain the higher accessible DMA address.

Signed-off-by: Nicolas Saenz Julienne <nsaenzjulienne@suse.de>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>

authored by

Nicolas Saenz Julienne and committed by
Christoph Hellwig
a7ba70f1 d7293f79

+46 -53
+8 -8
arch/mips/pci/fixup-sb1250.c
··· 21 21 22 22 /* 23 23 * The BCM1250, etc. PCI host bridge does not support DAC on its 32-bit 24 - * bus, so we set the bus's DMA mask accordingly. However the HT link 24 + * bus, so we set the bus's DMA limit accordingly. However the HT link 25 25 * down the artificial PCI-HT bridge supports 40-bit addressing and the 26 26 * SP1011 HT-PCI bridge downstream supports both DAC and a 64-bit bus 27 27 * width, so we record the PCI-HT bridge's secondary and subordinate bus 28 - * numbers and do not set the mask for devices present in the inclusive 28 + * numbers and do not set the limit for devices present in the inclusive 29 29 * range of those. 30 30 */ 31 - struct sb1250_bus_dma_mask_exclude { 31 + struct sb1250_bus_dma_limit_exclude { 32 32 bool set; 33 33 unsigned char start; 34 34 unsigned char end; 35 35 }; 36 36 37 - static int sb1250_bus_dma_mask(struct pci_dev *dev, void *data) 37 + static int sb1250_bus_dma_limit(struct pci_dev *dev, void *data) 38 38 { 39 - struct sb1250_bus_dma_mask_exclude *exclude = data; 39 + struct sb1250_bus_dma_limit_exclude *exclude = data; 40 40 bool exclude_this; 41 41 bool ht_bridge; 42 42 ··· 55 55 exclude->start, exclude->end); 56 56 } else { 57 57 dev_dbg(&dev->dev, "disabling DAC for device"); 58 - dev->dev.bus_dma_mask = DMA_BIT_MASK(32); 58 + dev->dev.bus_dma_limit = DMA_BIT_MASK(32); 59 59 } 60 60 61 61 return 0; ··· 63 63 64 64 static void quirk_sb1250_pci_dac(struct pci_dev *dev) 65 65 { 66 - struct sb1250_bus_dma_mask_exclude exclude = { .set = false }; 66 + struct sb1250_bus_dma_limit_exclude exclude = { .set = false }; 67 67 68 - pci_walk_bus(dev->bus, sb1250_bus_dma_mask, &exclude); 68 + pci_walk_bus(dev->bus, sb1250_bus_dma_limit, &exclude); 69 69 } 70 70 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SIBYTE, PCI_DEVICE_ID_BCM1250_PCI, 71 71 quirk_sb1250_pci_dac);
+3 -3
arch/powerpc/sysdev/fsl_pci.c
··· 115 115 { 116 116 struct pci_controller *hose = pci_bus_to_host(pdev->bus); 117 117 118 - pdev->dev.bus_dma_mask = 119 - hose->dma_window_base_cur + hose->dma_window_size; 118 + pdev->dev.bus_dma_limit = 119 + hose->dma_window_base_cur + hose->dma_window_size - 1; 120 120 } 121 121 122 122 static void setup_swiotlb_ops(struct pci_controller *hose) ··· 135 135 * mapping that allows addressing any RAM address from across PCI. 136 136 */ 137 137 if (dev_is_pci(dev) && dma_mask >= pci64_dma_offset * 2 - 1) { 138 - dev->bus_dma_mask = 0; 138 + dev->bus_dma_limit = 0; 139 139 dev->archdata.dma_offset = pci64_dma_offset; 140 140 } 141 141 }
+1 -1
arch/x86/kernel/pci-dma.c
··· 146 146 147 147 static int via_no_dac_cb(struct pci_dev *pdev, void *data) 148 148 { 149 - pdev->dev.bus_dma_mask = DMA_BIT_MASK(32); 149 + pdev->dev.bus_dma_limit = DMA_BIT_MASK(32); 150 150 return 0; 151 151 } 152 152
+1 -1
arch/x86/mm/mem_encrypt.c
··· 367 367 if (sme_active()) { 368 368 u64 dma_enc_mask = DMA_BIT_MASK(__ffs64(sme_me_mask)); 369 369 u64 dma_dev_mask = min_not_zero(dev->coherent_dma_mask, 370 - dev->bus_dma_mask); 370 + dev->bus_dma_limit); 371 371 372 372 if (dma_dev_mask <= dma_enc_mask) 373 373 return true;
+1 -1
arch/x86/pci/sta2x11-fixup.c
··· 143 143 144 144 dev->dma_pfn_offset = PFN_DOWN(-amba_base); 145 145 146 - dev->bus_dma_mask = max_amba_addr; 146 + dev->bus_dma_limit = max_amba_addr; 147 147 pci_set_consistent_dma_mask(pdev, max_amba_addr); 148 148 pci_set_dma_mask(pdev, max_amba_addr); 149 149
+7 -13
drivers/acpi/arm64/iort.c
··· 1057 1057 */ 1058 1058 void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) 1059 1059 { 1060 - u64 mask, dmaaddr = 0, size = 0, offset = 0; 1061 - int ret, msb; 1060 + u64 end, mask, dmaaddr = 0, size = 0, offset = 0; 1061 + int ret; 1062 1062 1063 1063 /* 1064 1064 * If @dev is expected to be DMA-capable then the bus code that created ··· 1085 1085 } 1086 1086 1087 1087 if (!ret) { 1088 - msb = fls64(dmaaddr + size - 1); 1089 1088 /* 1090 - * Round-up to the power-of-two mask or set 1091 - * the mask to the whole 64-bit address space 1092 - * in case the DMA region covers the full 1093 - * memory window. 1089 + * Limit coherent and dma mask based on size retrieved from 1090 + * firmware. 1094 1091 */ 1095 - mask = msb == 64 ? U64_MAX : (1ULL << msb) - 1; 1096 - /* 1097 - * Limit coherent and dma mask based on size 1098 - * retrieved from firmware. 1099 - */ 1100 - dev->bus_dma_mask = mask; 1092 + end = dmaaddr + size - 1; 1093 + mask = DMA_BIT_MASK(ilog2(end) + 1); 1094 + dev->bus_dma_limit = end; 1101 1095 dev->coherent_dma_mask = mask; 1102 1096 *dev->dma_mask = mask; 1103 1097 }
+1 -1
drivers/ata/ahci.c
··· 897 897 * value, don't extend it here. This happens on STA2X11, for example. 898 898 * 899 899 * XXX: manipulating the DMA mask from platform code is completely 900 - * bogus, platform code should use dev->bus_dma_mask instead.. 900 + * bogus, platform code should use dev->bus_dma_limit instead.. 901 901 */ 902 902 if (pdev->dma_mask && pdev->dma_mask < DMA_BIT_MASK(32)) 903 903 return 0;
+1 -2
drivers/iommu/dma-iommu.c
··· 405 405 if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1))) 406 406 iova_len = roundup_pow_of_two(iova_len); 407 407 408 - if (dev->bus_dma_mask) 409 - dma_limit &= dev->bus_dma_mask; 408 + dma_limit = min_not_zero(dma_limit, dev->bus_dma_limit); 410 409 411 410 if (domain->geometry.force_aperture) 412 411 dma_limit = min(dma_limit, domain->geometry.aperture_end);
+5 -4
drivers/of/device.c
··· 93 93 bool coherent; 94 94 unsigned long offset; 95 95 const struct iommu_ops *iommu; 96 - u64 mask; 96 + u64 mask, end; 97 97 98 98 ret = of_dma_get_range(np, &dma_addr, &paddr, &size); 99 99 if (ret < 0) { ··· 148 148 * Limit coherent and dma mask based on size and default mask 149 149 * set by the driver. 150 150 */ 151 - mask = DMA_BIT_MASK(ilog2(dma_addr + size - 1) + 1); 151 + end = dma_addr + size - 1; 152 + mask = DMA_BIT_MASK(ilog2(end) + 1); 152 153 dev->coherent_dma_mask &= mask; 153 154 *dev->dma_mask &= mask; 154 - /* ...but only set bus mask if we found valid dma-ranges earlier */ 155 + /* ...but only set bus limit if we found valid dma-ranges earlier */ 155 156 if (!ret) 156 - dev->bus_dma_mask = mask; 157 + dev->bus_dma_limit = end; 157 158 158 159 coherent = of_dma_is_coherent(np); 159 160 dev_dbg(dev, "device is%sdma coherent\n",
+3 -3
include/linux/device.h
··· 1186 1186 * @coherent_dma_mask: Like dma_mask, but for alloc_coherent mapping as not all 1187 1187 * hardware supports 64-bit addresses for consistent allocations 1188 1188 * such descriptors. 1189 - * @bus_dma_mask: Mask of an upstream bridge or bus which imposes a smaller DMA 1190 - * limit than the device itself supports. 1189 + * @bus_dma_limit: Limit of an upstream bridge or bus which imposes a smaller 1190 + * DMA limit than the device itself supports. 1191 1191 * @dma_pfn_offset: offset of DMA memory range relatively of RAM 1192 1192 * @dma_parms: A low level driver may set these to teach IOMMU code about 1193 1193 * segment limitations. ··· 1270 1270 not all hardware supports 1271 1271 64 bit addresses for consistent 1272 1272 allocations such descriptors. */ 1273 - u64 bus_dma_mask; /* upstream dma_mask constraint */ 1273 + u64 bus_dma_limit; /* upstream dma constraint */ 1274 1274 unsigned long dma_pfn_offset; 1275 1275 1276 1276 struct device_dma_parameters *dma_parms;
+1 -1
include/linux/dma-direct.h
··· 63 63 min(addr, end) < phys_to_dma(dev, PFN_PHYS(min_low_pfn))) 64 64 return false; 65 65 66 - return end <= min_not_zero(*dev->dma_mask, dev->bus_dma_mask); 66 + return end <= min_not_zero(*dev->dma_mask, dev->bus_dma_limit); 67 67 } 68 68 69 69 u64 dma_direct_get_required_mask(struct device *dev);
+1 -1
include/linux/dma-mapping.h
··· 697 697 */ 698 698 static inline bool dma_addressing_limited(struct device *dev) 699 699 { 700 - return min_not_zero(dma_get_mask(dev), dev->bus_dma_mask) < 700 + return min_not_zero(dma_get_mask(dev), dev->bus_dma_limit) < 701 701 dma_get_required_mask(dev); 702 702 } 703 703
+13 -14
kernel/dma/direct.c
··· 27 27 { 28 28 if (!dev->dma_mask) { 29 29 dev_err_once(dev, "DMA map on device without dma_mask\n"); 30 - } else if (*dev->dma_mask >= DMA_BIT_MASK(32) || dev->bus_dma_mask) { 30 + } else if (*dev->dma_mask >= DMA_BIT_MASK(32) || dev->bus_dma_limit) { 31 31 dev_err_once(dev, 32 - "overflow %pad+%zu of DMA mask %llx bus mask %llx\n", 33 - &dma_addr, size, *dev->dma_mask, dev->bus_dma_mask); 32 + "overflow %pad+%zu of DMA mask %llx bus limit %llx\n", 33 + &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit); 34 34 } 35 35 WARN_ON_ONCE(1); 36 36 } ··· 57 57 } 58 58 59 59 static gfp_t __dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, 60 - u64 *phys_mask) 60 + u64 *phys_limit) 61 61 { 62 - if (dev->bus_dma_mask && dev->bus_dma_mask < dma_mask) 63 - dma_mask = dev->bus_dma_mask; 62 + u64 dma_limit = min_not_zero(dma_mask, dev->bus_dma_limit); 64 63 65 64 if (force_dma_unencrypted(dev)) 66 - *phys_mask = __dma_to_phys(dev, dma_mask); 65 + *phys_limit = __dma_to_phys(dev, dma_limit); 67 66 else 68 - *phys_mask = dma_to_phys(dev, dma_mask); 67 + *phys_limit = dma_to_phys(dev, dma_limit); 69 68 70 69 /* 71 70 * Optimistically try the zone that the physical address mask falls ··· 74 75 * Note that GFP_DMA32 and GFP_DMA are no ops without the corresponding 75 76 * zones. 76 77 */ 77 - if (*phys_mask <= DMA_BIT_MASK(zone_dma_bits)) 78 + if (*phys_limit <= DMA_BIT_MASK(zone_dma_bits)) 78 79 return GFP_DMA; 79 - if (*phys_mask <= DMA_BIT_MASK(32)) 80 + if (*phys_limit <= DMA_BIT_MASK(32)) 80 81 return GFP_DMA32; 81 82 return 0; 82 83 } ··· 84 85 static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) 85 86 { 86 87 return phys_to_dma_direct(dev, phys) + size - 1 <= 87 - min_not_zero(dev->coherent_dma_mask, dev->bus_dma_mask); 88 + min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit); 88 89 } 89 90 90 91 struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, ··· 93 94 size_t alloc_size = PAGE_ALIGN(size); 94 95 int node = dev_to_node(dev); 95 96 struct page *page = NULL; 96 - u64 phys_mask; 97 + u64 phys_limit; 97 98 98 99 if (attrs & DMA_ATTR_NO_WARN) 99 100 gfp |= __GFP_NOWARN; ··· 101 102 /* we always manually zero the memory once we are done: */ 102 103 gfp &= ~__GFP_ZERO; 103 104 gfp |= __dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, 104 - &phys_mask); 105 + &phys_limit); 105 106 page = dma_alloc_contiguous(dev, alloc_size, gfp); 106 107 if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { 107 108 dma_free_contiguous(dev, page, alloc_size); ··· 115 116 page = NULL; 116 117 117 118 if (IS_ENABLED(CONFIG_ZONE_DMA32) && 118 - phys_mask < DMA_BIT_MASK(64) && 119 + phys_limit < DMA_BIT_MASK(64) && 119 120 !(gfp & (GFP_DMA32 | GFP_DMA))) { 120 121 gfp |= GFP_DMA32; 121 122 goto again;