at master 9.0 kB view raw
1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES 4 */ 5#ifndef __GENERIC_PT_IOMMU_H 6#define __GENERIC_PT_IOMMU_H 7 8#include <linux/generic_pt/common.h> 9#include <linux/iommu.h> 10#include <linux/mm_types.h> 11 12struct iommu_iotlb_gather; 13struct pt_iommu_ops; 14struct pt_iommu_driver_ops; 15struct iommu_dirty_bitmap; 16 17/** 18 * DOC: IOMMU Radix Page Table 19 * 20 * The IOMMU implementation of the Generic Page Table provides an ops struct 21 * that is useful to go with an iommu_domain to serve the DMA API, IOMMUFD and 22 * the generic map/unmap interface. 23 * 24 * This interface uses a caller provided locking approach. The caller must have 25 * a VA range lock concept that prevents concurrent threads from calling ops on 26 * the same VA. Generally the range lock must be at least as large as a single 27 * map call. 28 */ 29 30/** 31 * struct pt_iommu - Base structure for IOMMU page tables 32 * 33 * The format-specific struct will include this as the first member. 34 */ 35struct pt_iommu { 36 /** 37 * @domain: The core IOMMU domain. The driver should use a union to 38 * overlay this memory with its previously existing domain struct to 39 * create an alias. 40 */ 41 struct iommu_domain domain; 42 43 /** 44 * @ops: Function pointers to access the API 45 */ 46 const struct pt_iommu_ops *ops; 47 48 /** 49 * @driver_ops: Function pointers provided by the HW driver to help 50 * manage HW details like caches. 51 */ 52 const struct pt_iommu_driver_ops *driver_ops; 53 54 /** 55 * @nid: Node ID to use for table memory allocations. The IOMMU driver 56 * may want to set the NID to the device's NID, if there are multiple 57 * table walkers. 58 */ 59 int nid; 60 61 /** 62 * @iommu_device: Device pointer used for any DMA cache flushing when 63 * PT_FEAT_DMA_INCOHERENT. This is the iommu device that created the 64 * page table which must have dma ops that perform cache flushing. 65 */ 66 struct device *iommu_device; 67}; 68 69/** 70 * struct pt_iommu_info - Details about the IOMMU page table 71 * 72 * Returned from pt_iommu_ops->get_info() 73 */ 74struct pt_iommu_info { 75 /** 76 * @pgsize_bitmap: A bitmask where each set bit indicates 77 * a page size that can be natively stored in the page table. 78 */ 79 u64 pgsize_bitmap; 80}; 81 82struct pt_iommu_ops { 83 /** 84 * @set_dirty: Make the iova write dirty 85 * @iommu_table: Table to manipulate 86 * @iova: IO virtual address to start 87 * 88 * This is only used by iommufd testing. It makes the iova dirty so that 89 * read_and_clear_dirty() will see it as dirty. Unlike all the other ops 90 * this one is safe to call without holding any locking. It may return 91 * -EAGAIN if there is a race. 92 */ 93 int (*set_dirty)(struct pt_iommu *iommu_table, dma_addr_t iova); 94 95 /** 96 * @get_info: Return the pt_iommu_info structure 97 * @iommu_table: Table to query 98 * 99 * Return some basic static information about the page table. 100 */ 101 void (*get_info)(struct pt_iommu *iommu_table, 102 struct pt_iommu_info *info); 103 104 /** 105 * @deinit: Undo a format specific init operation 106 * @iommu_table: Table to destroy 107 * 108 * Release all of the memory. The caller must have already removed the 109 * table from all HW access and all caches. 110 */ 111 void (*deinit)(struct pt_iommu *iommu_table); 112}; 113 114/** 115 * struct pt_iommu_driver_ops - HW IOTLB cache flushing operations 116 * 117 * The IOMMU driver should implement these using container_of(iommu_table) to 118 * get to it's iommu_domain derived structure. All ops can be called in atomic 119 * contexts as they are buried under DMA API calls. 120 */ 121struct pt_iommu_driver_ops { 122 /** 123 * @change_top: Update the top of table pointer 124 * @iommu_table: Table to operate on 125 * @top_paddr: New CPU physical address of the top pointer 126 * @top_level: IOMMU PT level of the new top 127 * 128 * Called under the get_top_lock() spinlock. The driver must update all 129 * HW references to this domain with a new top address and 130 * configuration. On return mappings placed in the new top must be 131 * reachable by the HW. 132 * 133 * top_level encodes the level in IOMMU PT format, level 0 is the 134 * smallest page size increasing from there. This has to be translated 135 * to any HW specific format. During this call the new top will not be 136 * visible to any other API. 137 * 138 * This op is only used by PT_FEAT_DYNAMIC_TOP, and is required if 139 * enabled. 140 */ 141 void (*change_top)(struct pt_iommu *iommu_table, phys_addr_t top_paddr, 142 unsigned int top_level); 143 144 /** 145 * @get_top_lock: lock to hold when changing the table top 146 * @iommu_table: Table to operate on 147 * 148 * Return a lock to hold when changing the table top page table from 149 * being stored in HW. The lock will be held prior to calling 150 * change_top() and released once the top is fully visible. 151 * 152 * Typically this would be a lock that protects the iommu_domain's 153 * attachment list. 154 * 155 * This op is only used by PT_FEAT_DYNAMIC_TOP, and is required if 156 * enabled. 157 */ 158 spinlock_t *(*get_top_lock)(struct pt_iommu *iommu_table); 159}; 160 161static inline void pt_iommu_deinit(struct pt_iommu *iommu_table) 162{ 163 /* 164 * It is safe to call pt_iommu_deinit() before an init, or if init 165 * fails. The ops pointer will only become non-NULL if deinit needs to be 166 * run. 167 */ 168 if (iommu_table->ops) 169 iommu_table->ops->deinit(iommu_table); 170} 171 172/** 173 * struct pt_iommu_cfg - Common configuration values for all formats 174 */ 175struct pt_iommu_cfg { 176 /** 177 * @features: Features required. Only these features will be turned on. 178 * The feature list should reflect what the IOMMU HW is capable of. 179 */ 180 unsigned int features; 181 /** 182 * @hw_max_vasz_lg2: Maximum VA the IOMMU HW can support. This will 183 * imply the top level of the table. 184 */ 185 u8 hw_max_vasz_lg2; 186 /** 187 * @hw_max_oasz_lg2: Maximum OA the IOMMU HW can support. The format 188 * might select a lower maximum OA. 189 */ 190 u8 hw_max_oasz_lg2; 191}; 192 193/* Generate the exported function signatures from iommu_pt.h */ 194#define IOMMU_PROTOTYPES(fmt) \ 195 phys_addr_t pt_iommu_##fmt##_iova_to_phys(struct iommu_domain *domain, \ 196 dma_addr_t iova); \ 197 int pt_iommu_##fmt##_map_pages(struct iommu_domain *domain, \ 198 unsigned long iova, phys_addr_t paddr, \ 199 size_t pgsize, size_t pgcount, \ 200 int prot, gfp_t gfp, size_t *mapped); \ 201 size_t pt_iommu_##fmt##_unmap_pages( \ 202 struct iommu_domain *domain, unsigned long iova, \ 203 size_t pgsize, size_t pgcount, \ 204 struct iommu_iotlb_gather *iotlb_gather); \ 205 int pt_iommu_##fmt##_read_and_clear_dirty( \ 206 struct iommu_domain *domain, unsigned long iova, size_t size, \ 207 unsigned long flags, struct iommu_dirty_bitmap *dirty); \ 208 int pt_iommu_##fmt##_init(struct pt_iommu_##fmt *table, \ 209 const struct pt_iommu_##fmt##_cfg *cfg, \ 210 gfp_t gfp); \ 211 void pt_iommu_##fmt##_hw_info(struct pt_iommu_##fmt *table, \ 212 struct pt_iommu_##fmt##_hw_info *info) 213#define IOMMU_FORMAT(fmt, member) \ 214 struct pt_iommu_##fmt { \ 215 struct pt_iommu iommu; \ 216 struct pt_##fmt member; \ 217 }; \ 218 IOMMU_PROTOTYPES(fmt) 219 220/* 221 * A driver uses IOMMU_PT_DOMAIN_OPS to populate the iommu_domain_ops for the 222 * iommu_pt 223 */ 224#define IOMMU_PT_DOMAIN_OPS(fmt) \ 225 .iova_to_phys = &pt_iommu_##fmt##_iova_to_phys, \ 226 .map_pages = &pt_iommu_##fmt##_map_pages, \ 227 .unmap_pages = &pt_iommu_##fmt##_unmap_pages 228#define IOMMU_PT_DIRTY_OPS(fmt) \ 229 .read_and_clear_dirty = &pt_iommu_##fmt##_read_and_clear_dirty 230 231/* 232 * The driver should setup its domain struct like 233 * union { 234 * struct iommu_domain domain; 235 * struct pt_iommu_xxx xx; 236 * }; 237 * PT_IOMMU_CHECK_DOMAIN(struct mock_iommu_domain, xx.iommu, domain); 238 * 239 * Which creates an alias between driver_domain.domain and 240 * driver_domain.xx.iommu.domain. This is to avoid a mass rename of existing 241 * driver_domain.domain users. 242 */ 243#define PT_IOMMU_CHECK_DOMAIN(s, pt_iommu_memb, domain_memb) \ 244 static_assert(offsetof(s, pt_iommu_memb.domain) == \ 245 offsetof(s, domain_memb)) 246 247struct pt_iommu_amdv1_cfg { 248 struct pt_iommu_cfg common; 249 unsigned int starting_level; 250}; 251 252struct pt_iommu_amdv1_hw_info { 253 u64 host_pt_root; 254 u8 mode; 255}; 256 257IOMMU_FORMAT(amdv1, amdpt); 258 259/* amdv1_mock is used by the iommufd selftest */ 260#define pt_iommu_amdv1_mock pt_iommu_amdv1 261#define pt_iommu_amdv1_mock_cfg pt_iommu_amdv1_cfg 262struct pt_iommu_amdv1_mock_hw_info; 263IOMMU_PROTOTYPES(amdv1_mock); 264 265struct pt_iommu_vtdss_cfg { 266 struct pt_iommu_cfg common; 267 /* 4 is a 57 bit 5 level table */ 268 unsigned int top_level; 269}; 270 271struct pt_iommu_vtdss_hw_info { 272 u64 ssptptr; 273 u8 aw; 274}; 275 276IOMMU_FORMAT(vtdss, vtdss_pt); 277 278struct pt_iommu_x86_64_cfg { 279 struct pt_iommu_cfg common; 280 /* 4 is a 57 bit 5 level table */ 281 unsigned int top_level; 282}; 283 284struct pt_iommu_x86_64_hw_info { 285 u64 gcr3_pt; 286 u8 levels; 287}; 288 289IOMMU_FORMAT(x86_64, x86_64_pt); 290 291#undef IOMMU_PROTOTYPES 292#undef IOMMU_FORMAT 293#endif