Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

iommu/pages: Add support for incoherent IOMMU page table walkers

Some IOMMU HW cannot snoop the CPU cache when it walks the IO page tables.
The CPU is required to flush the cache to make changes visible to the HW.

Provide some helpers from iommu-pages to manage this. The helpers combine
both the ARM and x86 (used in Intel VT-d) versions of the cache flushing
under a single API.

The ARM version uses the DMA API to access the cache flush on the
assumption that the iommu is using a direct mapping and is already marked
incoherent. The helpers will do the DMA API calls to set things up and
keep track of DMA mapped folios using a bit in the ioptdesc so that
unmapping on error paths is cleaner.

The Intel version just calls the arch cache flush call directly and has no
need to cleanup prior to destruction.

Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

authored by

Jason Gunthorpe and committed by
Joerg Roedel
36ae67b1 bc5233c0

+183 -2
+134
drivers/iommu/iommu-pages.c
··· 4 4 * Pasha Tatashin <pasha.tatashin@soleen.com> 5 5 */ 6 6 #include "iommu-pages.h" 7 + #include <linux/dma-mapping.h> 7 8 #include <linux/gfp.h> 8 9 #include <linux/mm.h> 9 10 ··· 23 22 #undef IOPTDESC_MATCH 24 23 static_assert(sizeof(struct ioptdesc) <= sizeof(struct page)); 25 24 25 + static inline size_t ioptdesc_mem_size(struct ioptdesc *desc) 26 + { 27 + return 1UL << (folio_order(ioptdesc_folio(desc)) + PAGE_SHIFT); 28 + } 29 + 26 30 /** 27 31 * iommu_alloc_pages_node_sz - Allocate a zeroed page of a given size from 28 32 * specific NUMA node ··· 42 36 */ 43 37 void *iommu_alloc_pages_node_sz(int nid, gfp_t gfp, size_t size) 44 38 { 39 + struct ioptdesc *iopt; 45 40 unsigned long pgcnt; 46 41 struct folio *folio; 47 42 unsigned int order; ··· 67 60 if (unlikely(!folio)) 68 61 return NULL; 69 62 63 + iopt = folio_ioptdesc(folio); 64 + iopt->incoherent = false; 65 + 70 66 /* 71 67 * All page allocations that should be reported to as "iommu-pagetables" 72 68 * to userspace must use one of the functions below. This includes ··· 91 81 { 92 82 struct folio *folio = ioptdesc_folio(iopt); 93 83 const unsigned long pgcnt = folio_nr_pages(folio); 84 + 85 + if (IOMMU_PAGES_USE_DMA_API) 86 + WARN_ON_ONCE(iopt->incoherent); 94 87 95 88 mod_node_page_state(folio_pgdat(folio), NR_IOMMU_PAGES, -pgcnt); 96 89 lruvec_stat_mod_folio(folio, NR_SECONDARY_PAGETABLE, -pgcnt); ··· 130 117 __iommu_free_desc(iopt); 131 118 } 132 119 EXPORT_SYMBOL_GPL(iommu_put_pages_list); 120 + 121 + /** 122 + * iommu_pages_start_incoherent - Setup the page for cache incoherent operation 123 + * @virt: The page to setup 124 + * @dma_dev: The iommu device 125 + * 126 + * For incoherent memory this will use the DMA API to manage the cache flushing 127 + * on some arches. This is a lot of complexity compared to just calling 128 + * arch_sync_dma_for_device(), but it is what the existing ARM iommu drivers 129 + * have been doing. The DMA API requires keeping track of the DMA map and 130 + * freeing it when required. This keeps track of the dma map inside the ioptdesc 131 + * so that error paths are simple for the caller. 132 + */ 133 + int iommu_pages_start_incoherent(void *virt, struct device *dma_dev) 134 + { 135 + struct ioptdesc *iopt = virt_to_ioptdesc(virt); 136 + dma_addr_t dma; 137 + 138 + if (WARN_ON(iopt->incoherent)) 139 + return -EINVAL; 140 + 141 + if (!IOMMU_PAGES_USE_DMA_API) { 142 + iommu_pages_flush_incoherent(dma_dev, virt, 0, 143 + ioptdesc_mem_size(iopt)); 144 + } else { 145 + dma = dma_map_single(dma_dev, virt, ioptdesc_mem_size(iopt), 146 + DMA_TO_DEVICE); 147 + if (dma_mapping_error(dma_dev, dma)) 148 + return -EINVAL; 149 + 150 + /* 151 + * The DMA API is not allowed to do anything other than DMA 152 + * direct. It would be nice to also check 153 + * dev_is_dma_coherent(dma_dev)); 154 + */ 155 + if (WARN_ON(dma != virt_to_phys(virt))) { 156 + dma_unmap_single(dma_dev, dma, ioptdesc_mem_size(iopt), 157 + DMA_TO_DEVICE); 158 + return -EOPNOTSUPP; 159 + } 160 + } 161 + 162 + iopt->incoherent = 1; 163 + return 0; 164 + } 165 + EXPORT_SYMBOL_GPL(iommu_pages_start_incoherent); 166 + 167 + /** 168 + * iommu_pages_start_incoherent_list - Make a list of pages incoherent 169 + * @list: The list of pages to setup 170 + * @dma_dev: The iommu device 171 + * 172 + * Perform iommu_pages_start_incoherent() across all of list. 173 + * 174 + * If this fails the caller must call iommu_pages_stop_incoherent_list(). 175 + */ 176 + int iommu_pages_start_incoherent_list(struct iommu_pages_list *list, 177 + struct device *dma_dev) 178 + { 179 + struct ioptdesc *cur; 180 + int ret; 181 + 182 + list_for_each_entry(cur, &list->pages, iopt_freelist_elm) { 183 + if (WARN_ON(cur->incoherent)) 184 + continue; 185 + 186 + ret = iommu_pages_start_incoherent( 187 + folio_address(ioptdesc_folio(cur)), dma_dev); 188 + if (ret) 189 + return ret; 190 + } 191 + return 0; 192 + } 193 + EXPORT_SYMBOL_GPL(iommu_pages_start_incoherent_list); 194 + 195 + /** 196 + * iommu_pages_stop_incoherent_list - Undo incoherence across a list 197 + * @list: The list of pages to release 198 + * @dma_dev: The iommu device 199 + * 200 + * Revert iommu_pages_start_incoherent() across all of the list. Pages that did 201 + * not call or succeed iommu_pages_start_incoherent() will be ignored. 202 + */ 203 + #if IOMMU_PAGES_USE_DMA_API 204 + void iommu_pages_stop_incoherent_list(struct iommu_pages_list *list, 205 + struct device *dma_dev) 206 + { 207 + struct ioptdesc *cur; 208 + 209 + list_for_each_entry(cur, &list->pages, iopt_freelist_elm) { 210 + struct folio *folio = ioptdesc_folio(cur); 211 + 212 + if (!cur->incoherent) 213 + continue; 214 + dma_unmap_single(dma_dev, virt_to_phys(folio_address(folio)), 215 + ioptdesc_mem_size(cur), DMA_TO_DEVICE); 216 + cur->incoherent = 0; 217 + } 218 + } 219 + EXPORT_SYMBOL_GPL(iommu_pages_stop_incoherent_list); 220 + 221 + /** 222 + * iommu_pages_free_incoherent - Free an incoherent page 223 + * @virt: virtual address of the page to be freed. 224 + * @dma_dev: The iommu device 225 + * 226 + * If the page is incoherent it made coherent again then freed. 227 + */ 228 + void iommu_pages_free_incoherent(void *virt, struct device *dma_dev) 229 + { 230 + struct ioptdesc *iopt = virt_to_ioptdesc(virt); 231 + 232 + if (iopt->incoherent) { 233 + dma_unmap_single(dma_dev, virt_to_phys(virt), 234 + ioptdesc_mem_size(iopt), DMA_TO_DEVICE); 235 + iopt->incoherent = 0; 236 + } 237 + __iommu_free_desc(iopt); 238 + } 239 + EXPORT_SYMBOL_GPL(iommu_pages_free_incoherent); 240 + #endif
+49 -2
drivers/iommu/iommu-pages.h
··· 21 21 22 22 struct list_head iopt_freelist_elm; 23 23 unsigned long __page_mapping; 24 - pgoff_t __index; 24 + union { 25 + u8 incoherent; 26 + pgoff_t __index; 27 + }; 25 28 void *_private; 26 29 27 30 unsigned int __page_type; ··· 101 98 return iommu_alloc_pages_node_sz(NUMA_NO_NODE, gfp, size); 102 99 } 103 100 104 - #endif /* __IOMMU_PAGES_H */ 101 + int iommu_pages_start_incoherent(void *virt, struct device *dma_dev); 102 + int iommu_pages_start_incoherent_list(struct iommu_pages_list *list, 103 + struct device *dma_dev); 104 + 105 + #ifdef CONFIG_X86 106 + #define IOMMU_PAGES_USE_DMA_API 0 107 + #include <linux/cacheflush.h> 108 + 109 + static inline void iommu_pages_flush_incoherent(struct device *dma_dev, 110 + void *virt, size_t offset, 111 + size_t len) 112 + { 113 + clflush_cache_range(virt + offset, len); 114 + } 115 + static inline void 116 + iommu_pages_stop_incoherent_list(struct iommu_pages_list *list, 117 + struct device *dma_dev) 118 + { 119 + /* 120 + * For performance leave the incoherent flag alone which turns this into 121 + * a NOP. For X86 the rest of the stop/free flow ignores the flag. 122 + */ 123 + } 124 + static inline void iommu_pages_free_incoherent(void *virt, 125 + struct device *dma_dev) 126 + { 127 + iommu_free_pages(virt); 128 + } 129 + #else 130 + #define IOMMU_PAGES_USE_DMA_API 1 131 + #include <linux/dma-mapping.h> 132 + 133 + static inline void iommu_pages_flush_incoherent(struct device *dma_dev, 134 + void *virt, size_t offset, 135 + size_t len) 136 + { 137 + dma_sync_single_for_device(dma_dev, (uintptr_t)virt + offset, len, 138 + DMA_TO_DEVICE); 139 + } 140 + void iommu_pages_stop_incoherent_list(struct iommu_pages_list *list, 141 + struct device *dma_dev); 142 + void iommu_pages_free_incoherent(void *virt, struct device *dma_dev); 143 + #endif 144 + 145 + #endif /* __IOMMU_PAGES_H */