Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

iommu/amd: Remove AMD io_pgtable support

None of this is used anymore, delete it.

Reviewed-by: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
Reviewed-by: Vasant Hegde <vasant.hegde@amd.com>
Tested-by: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
Tested-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

authored by

Jason Gunthorpe and committed by
Joerg Roedel
2fdf6db4 789a5913

+1 -1050
+1 -1
drivers/iommu/amd/Makefile
··· 1 1 # SPDX-License-Identifier: GPL-2.0-only 2 - obj-y += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o ppr.o pasid.o 2 + obj-y += iommu.o init.o quirks.o ppr.o pasid.o 3 3 obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o
-98
drivers/iommu/amd/amd_iommu_types.h
··· 18 18 #include <linux/spinlock.h> 19 19 #include <linux/pci.h> 20 20 #include <linux/irqreturn.h> 21 - #include <linux/io-pgtable.h> 22 21 #include <linux/generic_pt/iommu.h> 23 22 24 23 /* ··· 337 338 #define GUEST_PGTABLE_4_LEVEL 0x00 338 339 #define GUEST_PGTABLE_5_LEVEL 0x01 339 340 340 - #define PM_LEVEL_SHIFT(x) (12 + ((x) * 9)) 341 - #define PM_LEVEL_SIZE(x) (((x) < 6) ? \ 342 - ((1ULL << PM_LEVEL_SHIFT((x))) - 1): \ 343 - (0xffffffffffffffffULL)) 344 - #define PM_LEVEL_INDEX(x, a) (((a) >> PM_LEVEL_SHIFT((x))) & 0x1ffULL) 345 - #define PM_LEVEL_ENC(x) (((x) << 9) & 0xe00ULL) 346 - #define PM_LEVEL_PDE(x, a) ((a) | PM_LEVEL_ENC((x)) | \ 347 - IOMMU_PTE_PR | IOMMU_PTE_IR | IOMMU_PTE_IW) 348 - #define PM_PTE_LEVEL(pte) (((pte) >> 9) & 0x7ULL) 349 - 350 - #define PM_MAP_4k 0 351 341 #define PM_ADDR_MASK 0x000ffffffffff000ULL 352 - #define PM_MAP_MASK(lvl) (PM_ADDR_MASK & \ 353 - (~((1ULL << (12 + ((lvl) * 9))) - 1))) 354 - #define PM_ALIGNED(lvl, addr) ((PM_MAP_MASK(lvl) & (addr)) == (addr)) 355 - 356 - /* 357 - * Returns the page table level to use for a given page size 358 - * Pagesize is expected to be a power-of-two 359 - */ 360 - #define PAGE_SIZE_LEVEL(pagesize) \ 361 - ((__ffs(pagesize) - 12) / 9) 362 - /* 363 - * Returns the number of ptes to use for a given page size 364 - * Pagesize is expected to be a power-of-two 365 - */ 366 - #define PAGE_SIZE_PTE_COUNT(pagesize) \ 367 - (1ULL << ((__ffs(pagesize) - 12) % 9)) 368 - 369 - /* 370 - * Aligns a given io-virtual address to a given page size 371 - * Pagesize is expected to be a power-of-two 372 - */ 373 - #define PAGE_SIZE_ALIGN(address, pagesize) \ 374 - ((address) & ~((pagesize) - 1)) 375 - /* 376 - * Creates an IOMMU PTE for an address and a given pagesize 377 - * The PTE has no permission bits set 378 - * Pagesize is expected to be a power-of-two larger than 4096 379 - */ 380 - #define PAGE_SIZE_PTE(address, pagesize) \ 381 - (((address) | ((pagesize) - 1)) & \ 382 - (~(pagesize >> 1)) & PM_ADDR_MASK) 383 - 384 - /* 385 - * Takes a PTE value with mode=0x07 and returns the page size it maps 386 - */ 387 - #define PTE_PAGE_SIZE(pte) \ 388 - (1ULL << (1 + ffz(((pte) | 0xfffULL)))) 389 - 390 - /* 391 - * Takes a page-table level and returns the default page-size for this level 392 - */ 393 - #define PTE_LEVEL_PAGE_SIZE(level) \ 394 - (1ULL << (12 + (9 * (level)))) 395 - 396 - /* 397 - * The IOPTE dirty bit 398 - */ 399 - #define IOMMU_PTE_HD_BIT (6) 400 - 401 - /* 402 - * Bit value definition for I/O PTE fields 403 - */ 404 - #define IOMMU_PTE_PR BIT_ULL(0) 405 - #define IOMMU_PTE_HD BIT_ULL(IOMMU_PTE_HD_BIT) 406 - #define IOMMU_PTE_U BIT_ULL(59) 407 - #define IOMMU_PTE_FC BIT_ULL(60) 408 - #define IOMMU_PTE_IR BIT_ULL(61) 409 - #define IOMMU_PTE_IW BIT_ULL(62) 410 342 411 343 /* 412 344 * Bit value definition for DTE fields ··· 366 436 367 437 /* DTE[128:179] | DTE[184:191] */ 368 438 #define DTE_DATA2_INTR_MASK ~GENMASK_ULL(55, 52) 369 - 370 - #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL) 371 - #define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_PR) 372 - #define IOMMU_PTE_DIRTY(pte) ((pte) & IOMMU_PTE_HD) 373 - #define IOMMU_PTE_PAGE(pte) (iommu_phys_to_virt((pte) & IOMMU_PAGE_MASK)) 374 - #define IOMMU_PTE_MODE(pte) (((pte) >> 9) & 0x07) 375 439 376 440 #define IOMMU_PROT_MASK 0x03 377 441 #define IOMMU_PROT_IR 0x01 ··· 459 535 460 536 #define AMD_IOMMU_FLAG_TRANS_PRE_ENABLED (1 << 0) 461 537 462 - #define io_pgtable_to_data(x) \ 463 - container_of((x), struct amd_io_pgtable, pgtbl) 464 - 465 - #define io_pgtable_ops_to_data(x) \ 466 - io_pgtable_to_data(io_pgtable_ops_to_pgtable(x)) 467 - 468 - #define io_pgtable_ops_to_domain(x) \ 469 - container_of(io_pgtable_ops_to_data(x), \ 470 - struct protection_domain, iop) 471 - 472 - #define io_pgtable_cfg_to_data(x) \ 473 - container_of((x), struct amd_io_pgtable, pgtbl.cfg) 474 - 475 538 struct gcr3_tbl_info { 476 539 u64 *gcr3_tbl; /* Guest CR3 table */ 477 540 int glx; /* Number of levels for GCR3 table */ 478 541 u32 pasid_cnt; /* Track attached PASIDs */ 479 542 u16 domid; /* Per device domain ID */ 480 - }; 481 - 482 - struct amd_io_pgtable { 483 - seqcount_t seqcount; /* Protects root/mode update */ 484 - struct io_pgtable pgtbl; 485 - int mode; 486 - u64 *root; 487 - u64 *pgd; /* v2 pgtable pgd pointer */ 488 543 }; 489 544 490 545 enum protection_domain_mode { ··· 500 597 struct pt_iommu_x86_64 amdv2; 501 598 }; 502 599 struct list_head dev_list; /* List of all devices in this domain */ 503 - struct amd_io_pgtable iop; 504 600 spinlock_t lock; /* mostly used to lock the page table*/ 505 601 u16 id; /* the domain id written to the device table */ 506 602 enum protection_domain_mode pd_mode; /* Track page table type */
-575
drivers/iommu/amd/io_pgtable.c
··· 1 - // SPDX-License-Identifier: GPL-2.0-only 2 - /* 3 - * CPU-agnostic AMD IO page table allocator. 4 - * 5 - * Copyright (C) 2020 Advanced Micro Devices, Inc. 6 - * Author: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> 7 - */ 8 - 9 - #define pr_fmt(fmt) "AMD-Vi: " fmt 10 - #define dev_fmt(fmt) pr_fmt(fmt) 11 - 12 - #include <linux/atomic.h> 13 - #include <linux/bitops.h> 14 - #include <linux/io-pgtable.h> 15 - #include <linux/kernel.h> 16 - #include <linux/sizes.h> 17 - #include <linux/slab.h> 18 - #include <linux/types.h> 19 - #include <linux/dma-mapping.h> 20 - #include <linux/seqlock.h> 21 - 22 - #include <asm/barrier.h> 23 - 24 - #include "amd_iommu_types.h" 25 - #include "amd_iommu.h" 26 - #include "../iommu-pages.h" 27 - 28 - /* 29 - * Helper function to get the first pte of a large mapping 30 - */ 31 - static u64 *first_pte_l7(u64 *pte, unsigned long *page_size, 32 - unsigned long *count) 33 - { 34 - unsigned long pte_mask, pg_size, cnt; 35 - u64 *fpte; 36 - 37 - pg_size = PTE_PAGE_SIZE(*pte); 38 - cnt = PAGE_SIZE_PTE_COUNT(pg_size); 39 - pte_mask = ~((cnt << 3) - 1); 40 - fpte = (u64 *)(((unsigned long)pte) & pte_mask); 41 - 42 - if (page_size) 43 - *page_size = pg_size; 44 - 45 - if (count) 46 - *count = cnt; 47 - 48 - return fpte; 49 - } 50 - 51 - static void free_pt_lvl(u64 *pt, struct iommu_pages_list *freelist, int lvl) 52 - { 53 - u64 *p; 54 - int i; 55 - 56 - for (i = 0; i < 512; ++i) { 57 - /* PTE present? */ 58 - if (!IOMMU_PTE_PRESENT(pt[i])) 59 - continue; 60 - 61 - /* Large PTE? */ 62 - if (PM_PTE_LEVEL(pt[i]) == 0 || 63 - PM_PTE_LEVEL(pt[i]) == 7) 64 - continue; 65 - 66 - /* 67 - * Free the next level. No need to look at l1 tables here since 68 - * they can only contain leaf PTEs; just free them directly. 69 - */ 70 - p = IOMMU_PTE_PAGE(pt[i]); 71 - if (lvl > 2) 72 - free_pt_lvl(p, freelist, lvl - 1); 73 - else 74 - iommu_pages_list_add(freelist, p); 75 - } 76 - 77 - iommu_pages_list_add(freelist, pt); 78 - } 79 - 80 - static void free_sub_pt(u64 *root, int mode, struct iommu_pages_list *freelist) 81 - { 82 - switch (mode) { 83 - case PAGE_MODE_NONE: 84 - case PAGE_MODE_7_LEVEL: 85 - break; 86 - case PAGE_MODE_1_LEVEL: 87 - iommu_pages_list_add(freelist, root); 88 - break; 89 - case PAGE_MODE_2_LEVEL: 90 - case PAGE_MODE_3_LEVEL: 91 - case PAGE_MODE_4_LEVEL: 92 - case PAGE_MODE_5_LEVEL: 93 - case PAGE_MODE_6_LEVEL: 94 - free_pt_lvl(root, freelist, mode); 95 - break; 96 - default: 97 - BUG(); 98 - } 99 - } 100 - 101 - /* 102 - * This function is used to add another level to an IO page table. Adding 103 - * another level increases the size of the address space by 9 bits to a size up 104 - * to 64 bits. 105 - */ 106 - static bool increase_address_space(struct amd_io_pgtable *pgtable, 107 - unsigned long address, 108 - unsigned int page_size_level, 109 - gfp_t gfp) 110 - { 111 - struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg; 112 - struct protection_domain *domain = 113 - container_of(pgtable, struct protection_domain, iop); 114 - unsigned long flags; 115 - bool ret = true; 116 - u64 *pte; 117 - 118 - pte = iommu_alloc_pages_node_sz(cfg->amd.nid, gfp, SZ_4K); 119 - if (!pte) 120 - return false; 121 - 122 - spin_lock_irqsave(&domain->lock, flags); 123 - 124 - if (address <= PM_LEVEL_SIZE(pgtable->mode) && 125 - pgtable->mode - 1 >= page_size_level) 126 - goto out; 127 - 128 - ret = false; 129 - if (WARN_ON_ONCE(pgtable->mode == amd_iommu_hpt_level)) 130 - goto out; 131 - 132 - *pte = PM_LEVEL_PDE(pgtable->mode, iommu_virt_to_phys(pgtable->root)); 133 - 134 - write_seqcount_begin(&pgtable->seqcount); 135 - pgtable->root = pte; 136 - pgtable->mode += 1; 137 - write_seqcount_end(&pgtable->seqcount); 138 - 139 - pte = NULL; 140 - ret = true; 141 - 142 - out: 143 - spin_unlock_irqrestore(&domain->lock, flags); 144 - iommu_free_pages(pte); 145 - 146 - return ret; 147 - } 148 - 149 - static u64 *alloc_pte(struct amd_io_pgtable *pgtable, 150 - unsigned long address, 151 - unsigned long page_size, 152 - u64 **pte_page, 153 - gfp_t gfp, 154 - bool *updated) 155 - { 156 - unsigned long last_addr = address + (page_size - 1); 157 - struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg; 158 - unsigned int seqcount; 159 - int level, end_lvl; 160 - u64 *pte, *page; 161 - 162 - BUG_ON(!is_power_of_2(page_size)); 163 - 164 - while (last_addr > PM_LEVEL_SIZE(pgtable->mode) || 165 - pgtable->mode - 1 < PAGE_SIZE_LEVEL(page_size)) { 166 - /* 167 - * Return an error if there is no memory to update the 168 - * page-table. 169 - */ 170 - if (!increase_address_space(pgtable, last_addr, 171 - PAGE_SIZE_LEVEL(page_size), gfp)) 172 - return NULL; 173 - } 174 - 175 - 176 - do { 177 - seqcount = read_seqcount_begin(&pgtable->seqcount); 178 - 179 - level = pgtable->mode - 1; 180 - pte = &pgtable->root[PM_LEVEL_INDEX(level, address)]; 181 - } while (read_seqcount_retry(&pgtable->seqcount, seqcount)); 182 - 183 - 184 - address = PAGE_SIZE_ALIGN(address, page_size); 185 - end_lvl = PAGE_SIZE_LEVEL(page_size); 186 - 187 - while (level > end_lvl) { 188 - u64 __pte, __npte; 189 - int pte_level; 190 - 191 - __pte = *pte; 192 - pte_level = PM_PTE_LEVEL(__pte); 193 - 194 - /* 195 - * If we replace a series of large PTEs, we need 196 - * to tear down all of them. 197 - */ 198 - if (IOMMU_PTE_PRESENT(__pte) && 199 - pte_level == PAGE_MODE_7_LEVEL) { 200 - unsigned long count, i; 201 - u64 *lpte; 202 - 203 - lpte = first_pte_l7(pte, NULL, &count); 204 - 205 - /* 206 - * Unmap the replicated PTEs that still match the 207 - * original large mapping 208 - */ 209 - for (i = 0; i < count; ++i) 210 - cmpxchg64(&lpte[i], __pte, 0ULL); 211 - 212 - *updated = true; 213 - continue; 214 - } 215 - 216 - if (!IOMMU_PTE_PRESENT(__pte) || 217 - pte_level == PAGE_MODE_NONE) { 218 - page = iommu_alloc_pages_node_sz(cfg->amd.nid, gfp, 219 - SZ_4K); 220 - 221 - if (!page) 222 - return NULL; 223 - 224 - __npte = PM_LEVEL_PDE(level, iommu_virt_to_phys(page)); 225 - 226 - /* pte could have been changed somewhere. */ 227 - if (!try_cmpxchg64(pte, &__pte, __npte)) 228 - iommu_free_pages(page); 229 - else if (IOMMU_PTE_PRESENT(__pte)) 230 - *updated = true; 231 - 232 - continue; 233 - } 234 - 235 - /* No level skipping support yet */ 236 - if (pte_level != level) 237 - return NULL; 238 - 239 - level -= 1; 240 - 241 - pte = IOMMU_PTE_PAGE(__pte); 242 - 243 - if (pte_page && level == end_lvl) 244 - *pte_page = pte; 245 - 246 - pte = &pte[PM_LEVEL_INDEX(level, address)]; 247 - } 248 - 249 - return pte; 250 - } 251 - 252 - /* 253 - * This function checks if there is a PTE for a given dma address. If 254 - * there is one, it returns the pointer to it. 255 - */ 256 - static u64 *fetch_pte(struct amd_io_pgtable *pgtable, 257 - unsigned long address, 258 - unsigned long *page_size) 259 - { 260 - int level; 261 - unsigned int seqcount; 262 - u64 *pte; 263 - 264 - *page_size = 0; 265 - 266 - if (address > PM_LEVEL_SIZE(pgtable->mode)) 267 - return NULL; 268 - 269 - do { 270 - seqcount = read_seqcount_begin(&pgtable->seqcount); 271 - level = pgtable->mode - 1; 272 - pte = &pgtable->root[PM_LEVEL_INDEX(level, address)]; 273 - } while (read_seqcount_retry(&pgtable->seqcount, seqcount)); 274 - 275 - *page_size = PTE_LEVEL_PAGE_SIZE(level); 276 - 277 - while (level > 0) { 278 - 279 - /* Not Present */ 280 - if (!IOMMU_PTE_PRESENT(*pte)) 281 - return NULL; 282 - 283 - /* Large PTE */ 284 - if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL || 285 - PM_PTE_LEVEL(*pte) == PAGE_MODE_NONE) 286 - break; 287 - 288 - /* No level skipping support yet */ 289 - if (PM_PTE_LEVEL(*pte) != level) 290 - return NULL; 291 - 292 - level -= 1; 293 - 294 - /* Walk to the next level */ 295 - pte = IOMMU_PTE_PAGE(*pte); 296 - pte = &pte[PM_LEVEL_INDEX(level, address)]; 297 - *page_size = PTE_LEVEL_PAGE_SIZE(level); 298 - } 299 - 300 - /* 301 - * If we have a series of large PTEs, make 302 - * sure to return a pointer to the first one. 303 - */ 304 - if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL) 305 - pte = first_pte_l7(pte, page_size, NULL); 306 - 307 - return pte; 308 - } 309 - 310 - static void free_clear_pte(u64 *pte, u64 pteval, 311 - struct iommu_pages_list *freelist) 312 - { 313 - u64 *pt; 314 - int mode; 315 - 316 - while (!try_cmpxchg64(pte, &pteval, 0)) 317 - pr_warn("AMD-Vi: IOMMU pte changed since we read it\n"); 318 - 319 - if (!IOMMU_PTE_PRESENT(pteval)) 320 - return; 321 - 322 - pt = IOMMU_PTE_PAGE(pteval); 323 - mode = IOMMU_PTE_MODE(pteval); 324 - 325 - free_sub_pt(pt, mode, freelist); 326 - } 327 - 328 - /* 329 - * Generic mapping functions. It maps a physical address into a DMA 330 - * address space. It allocates the page table pages if necessary. 331 - * In the future it can be extended to a generic mapping function 332 - * supporting all features of AMD IOMMU page tables like level skipping 333 - * and full 64 bit address spaces. 334 - */ 335 - static int iommu_v1_map_pages(struct io_pgtable_ops *ops, unsigned long iova, 336 - phys_addr_t paddr, size_t pgsize, size_t pgcount, 337 - int prot, gfp_t gfp, size_t *mapped) 338 - { 339 - struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); 340 - struct iommu_pages_list freelist = IOMMU_PAGES_LIST_INIT(freelist); 341 - bool updated = false; 342 - u64 __pte, *pte; 343 - int ret, i, count; 344 - size_t size = pgcount << __ffs(pgsize); 345 - unsigned long o_iova = iova; 346 - 347 - BUG_ON(!IS_ALIGNED(iova, pgsize)); 348 - BUG_ON(!IS_ALIGNED(paddr, pgsize)); 349 - 350 - ret = -EINVAL; 351 - if (!(prot & IOMMU_PROT_MASK)) 352 - goto out; 353 - 354 - while (pgcount > 0) { 355 - count = PAGE_SIZE_PTE_COUNT(pgsize); 356 - pte = alloc_pte(pgtable, iova, pgsize, NULL, gfp, &updated); 357 - 358 - ret = -ENOMEM; 359 - if (!pte) 360 - goto out; 361 - 362 - for (i = 0; i < count; ++i) 363 - free_clear_pte(&pte[i], pte[i], &freelist); 364 - 365 - if (!iommu_pages_list_empty(&freelist)) 366 - updated = true; 367 - 368 - if (count > 1) { 369 - __pte = PAGE_SIZE_PTE(__sme_set(paddr), pgsize); 370 - __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC; 371 - } else 372 - __pte = __sme_set(paddr) | IOMMU_PTE_PR | IOMMU_PTE_FC; 373 - 374 - if (prot & IOMMU_PROT_IR) 375 - __pte |= IOMMU_PTE_IR; 376 - if (prot & IOMMU_PROT_IW) 377 - __pte |= IOMMU_PTE_IW; 378 - 379 - for (i = 0; i < count; ++i) 380 - pte[i] = __pte; 381 - 382 - iova += pgsize; 383 - paddr += pgsize; 384 - pgcount--; 385 - if (mapped) 386 - *mapped += pgsize; 387 - } 388 - 389 - ret = 0; 390 - 391 - out: 392 - if (updated) { 393 - struct protection_domain *dom = io_pgtable_ops_to_domain(ops); 394 - unsigned long flags; 395 - 396 - spin_lock_irqsave(&dom->lock, flags); 397 - /* 398 - * Flush domain TLB(s) and wait for completion. Any Device-Table 399 - * Updates and flushing already happened in 400 - * increase_address_space(). 401 - */ 402 - amd_iommu_domain_flush_pages(dom, o_iova, size); 403 - spin_unlock_irqrestore(&dom->lock, flags); 404 - } 405 - 406 - /* Everything flushed out, free pages now */ 407 - iommu_put_pages_list(&freelist); 408 - 409 - return ret; 410 - } 411 - 412 - static unsigned long iommu_v1_unmap_pages(struct io_pgtable_ops *ops, 413 - unsigned long iova, 414 - size_t pgsize, size_t pgcount, 415 - struct iommu_iotlb_gather *gather) 416 - { 417 - struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); 418 - unsigned long long unmapped; 419 - unsigned long unmap_size; 420 - u64 *pte; 421 - size_t size = pgcount << __ffs(pgsize); 422 - 423 - BUG_ON(!is_power_of_2(pgsize)); 424 - 425 - unmapped = 0; 426 - 427 - while (unmapped < size) { 428 - pte = fetch_pte(pgtable, iova, &unmap_size); 429 - if (pte) { 430 - int i, count; 431 - 432 - count = PAGE_SIZE_PTE_COUNT(unmap_size); 433 - for (i = 0; i < count; i++) 434 - pte[i] = 0ULL; 435 - } else { 436 - return unmapped; 437 - } 438 - 439 - iova = (iova & ~(unmap_size - 1)) + unmap_size; 440 - unmapped += unmap_size; 441 - } 442 - 443 - return unmapped; 444 - } 445 - 446 - static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova) 447 - { 448 - struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); 449 - unsigned long offset_mask, pte_pgsize; 450 - u64 *pte, __pte; 451 - 452 - pte = fetch_pte(pgtable, iova, &pte_pgsize); 453 - 454 - if (!pte || !IOMMU_PTE_PRESENT(*pte)) 455 - return 0; 456 - 457 - offset_mask = pte_pgsize - 1; 458 - __pte = __sme_clr(*pte & PM_ADDR_MASK); 459 - 460 - return (__pte & ~offset_mask) | (iova & offset_mask); 461 - } 462 - 463 - static bool pte_test_and_clear_dirty(u64 *ptep, unsigned long size, 464 - unsigned long flags) 465 - { 466 - bool test_only = flags & IOMMU_DIRTY_NO_CLEAR; 467 - bool dirty = false; 468 - int i, count; 469 - 470 - /* 471 - * 2.2.3.2 Host Dirty Support 472 - * When a non-default page size is used , software must OR the 473 - * Dirty bits in all of the replicated host PTEs used to map 474 - * the page. The IOMMU does not guarantee the Dirty bits are 475 - * set in all of the replicated PTEs. Any portion of the page 476 - * may have been written even if the Dirty bit is set in only 477 - * one of the replicated PTEs. 478 - */ 479 - count = PAGE_SIZE_PTE_COUNT(size); 480 - for (i = 0; i < count && test_only; i++) { 481 - if (test_bit(IOMMU_PTE_HD_BIT, (unsigned long *)&ptep[i])) { 482 - dirty = true; 483 - break; 484 - } 485 - } 486 - 487 - for (i = 0; i < count && !test_only; i++) { 488 - if (test_and_clear_bit(IOMMU_PTE_HD_BIT, 489 - (unsigned long *)&ptep[i])) { 490 - dirty = true; 491 - } 492 - } 493 - 494 - return dirty; 495 - } 496 - 497 - static int iommu_v1_read_and_clear_dirty(struct io_pgtable_ops *ops, 498 - unsigned long iova, size_t size, 499 - unsigned long flags, 500 - struct iommu_dirty_bitmap *dirty) 501 - { 502 - struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); 503 - unsigned long end = iova + size - 1; 504 - 505 - do { 506 - unsigned long pgsize = 0; 507 - u64 *ptep, pte; 508 - 509 - ptep = fetch_pte(pgtable, iova, &pgsize); 510 - if (ptep) 511 - pte = READ_ONCE(*ptep); 512 - if (!ptep || !IOMMU_PTE_PRESENT(pte)) { 513 - pgsize = pgsize ?: PTE_LEVEL_PAGE_SIZE(0); 514 - iova += pgsize; 515 - continue; 516 - } 517 - 518 - /* 519 - * Mark the whole IOVA range as dirty even if only one of 520 - * the replicated PTEs were marked dirty. 521 - */ 522 - if (pte_test_and_clear_dirty(ptep, pgsize, flags)) 523 - iommu_dirty_bitmap_record(dirty, iova, pgsize); 524 - iova += pgsize; 525 - } while (iova < end); 526 - 527 - return 0; 528 - } 529 - 530 - /* 531 - * ---------------------------------------------------- 532 - */ 533 - static void v1_free_pgtable(struct io_pgtable *iop) 534 - { 535 - struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, pgtbl); 536 - struct iommu_pages_list freelist = IOMMU_PAGES_LIST_INIT(freelist); 537 - 538 - if (pgtable->mode == PAGE_MODE_NONE) 539 - return; 540 - 541 - /* Page-table is not visible to IOMMU anymore, so free it */ 542 - BUG_ON(pgtable->mode < PAGE_MODE_NONE || 543 - pgtable->mode > amd_iommu_hpt_level); 544 - 545 - free_sub_pt(pgtable->root, pgtable->mode, &freelist); 546 - iommu_put_pages_list(&freelist); 547 - } 548 - 549 - static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) 550 - { 551 - struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); 552 - 553 - pgtable->root = 554 - iommu_alloc_pages_node_sz(cfg->amd.nid, GFP_KERNEL, SZ_4K); 555 - if (!pgtable->root) 556 - return NULL; 557 - pgtable->mode = PAGE_MODE_3_LEVEL; 558 - seqcount_init(&pgtable->seqcount); 559 - 560 - cfg->pgsize_bitmap = amd_iommu_pgsize_bitmap; 561 - cfg->ias = IOMMU_IN_ADDR_BIT_SIZE; 562 - cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE; 563 - 564 - pgtable->pgtbl.ops.map_pages = iommu_v1_map_pages; 565 - pgtable->pgtbl.ops.unmap_pages = iommu_v1_unmap_pages; 566 - pgtable->pgtbl.ops.iova_to_phys = iommu_v1_iova_to_phys; 567 - pgtable->pgtbl.ops.read_and_clear_dirty = iommu_v1_read_and_clear_dirty; 568 - 569 - return &pgtable->pgtbl; 570 - } 571 - 572 - struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns = { 573 - .alloc = v1_alloc_pgtable, 574 - .free = v1_free_pgtable, 575 - };
-370
drivers/iommu/amd/io_pgtable_v2.c
··· 1 - // SPDX-License-Identifier: GPL-2.0-only 2 - /* 3 - * CPU-agnostic AMD IO page table v2 allocator. 4 - * 5 - * Copyright (C) 2022, 2023 Advanced Micro Devices, Inc. 6 - * Author: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> 7 - * Author: Vasant Hegde <vasant.hegde@amd.com> 8 - */ 9 - 10 - #define pr_fmt(fmt) "AMD-Vi: " fmt 11 - #define dev_fmt(fmt) pr_fmt(fmt) 12 - 13 - #include <linux/bitops.h> 14 - #include <linux/io-pgtable.h> 15 - #include <linux/kernel.h> 16 - 17 - #include <asm/barrier.h> 18 - 19 - #include "amd_iommu_types.h" 20 - #include "amd_iommu.h" 21 - #include "../iommu-pages.h" 22 - 23 - #define IOMMU_PAGE_PRESENT BIT_ULL(0) /* Is present */ 24 - #define IOMMU_PAGE_RW BIT_ULL(1) /* Writeable */ 25 - #define IOMMU_PAGE_USER BIT_ULL(2) /* Userspace addressable */ 26 - #define IOMMU_PAGE_PWT BIT_ULL(3) /* Page write through */ 27 - #define IOMMU_PAGE_PCD BIT_ULL(4) /* Page cache disabled */ 28 - #define IOMMU_PAGE_ACCESS BIT_ULL(5) /* Was accessed (updated by IOMMU) */ 29 - #define IOMMU_PAGE_DIRTY BIT_ULL(6) /* Was written to (updated by IOMMU) */ 30 - #define IOMMU_PAGE_PSE BIT_ULL(7) /* Page Size Extensions */ 31 - #define IOMMU_PAGE_NX BIT_ULL(63) /* No execute */ 32 - 33 - #define MAX_PTRS_PER_PAGE 512 34 - 35 - #define IOMMU_PAGE_SIZE_2M BIT_ULL(21) 36 - #define IOMMU_PAGE_SIZE_1G BIT_ULL(30) 37 - 38 - 39 - static inline int get_pgtable_level(void) 40 - { 41 - return amd_iommu_gpt_level; 42 - } 43 - 44 - static inline bool is_large_pte(u64 pte) 45 - { 46 - return (pte & IOMMU_PAGE_PSE); 47 - } 48 - 49 - static inline u64 set_pgtable_attr(u64 *page) 50 - { 51 - u64 prot; 52 - 53 - prot = IOMMU_PAGE_PRESENT | IOMMU_PAGE_RW | IOMMU_PAGE_USER; 54 - prot |= IOMMU_PAGE_ACCESS; 55 - 56 - return (iommu_virt_to_phys(page) | prot); 57 - } 58 - 59 - static inline void *get_pgtable_pte(u64 pte) 60 - { 61 - return iommu_phys_to_virt(pte & PM_ADDR_MASK); 62 - } 63 - 64 - static u64 set_pte_attr(u64 paddr, u64 pg_size, int prot) 65 - { 66 - u64 pte; 67 - 68 - pte = __sme_set(paddr & PM_ADDR_MASK); 69 - pte |= IOMMU_PAGE_PRESENT | IOMMU_PAGE_USER; 70 - pte |= IOMMU_PAGE_ACCESS | IOMMU_PAGE_DIRTY; 71 - 72 - if (prot & IOMMU_PROT_IW) 73 - pte |= IOMMU_PAGE_RW; 74 - 75 - /* Large page */ 76 - if (pg_size == IOMMU_PAGE_SIZE_1G || pg_size == IOMMU_PAGE_SIZE_2M) 77 - pte |= IOMMU_PAGE_PSE; 78 - 79 - return pte; 80 - } 81 - 82 - static inline u64 get_alloc_page_size(u64 size) 83 - { 84 - if (size >= IOMMU_PAGE_SIZE_1G) 85 - return IOMMU_PAGE_SIZE_1G; 86 - 87 - if (size >= IOMMU_PAGE_SIZE_2M) 88 - return IOMMU_PAGE_SIZE_2M; 89 - 90 - return PAGE_SIZE; 91 - } 92 - 93 - static inline int page_size_to_level(u64 pg_size) 94 - { 95 - if (pg_size == IOMMU_PAGE_SIZE_1G) 96 - return PAGE_MODE_3_LEVEL; 97 - if (pg_size == IOMMU_PAGE_SIZE_2M) 98 - return PAGE_MODE_2_LEVEL; 99 - 100 - return PAGE_MODE_1_LEVEL; 101 - } 102 - 103 - static void free_pgtable(u64 *pt, int level) 104 - { 105 - u64 *p; 106 - int i; 107 - 108 - for (i = 0; i < MAX_PTRS_PER_PAGE; i++) { 109 - /* PTE present? */ 110 - if (!IOMMU_PTE_PRESENT(pt[i])) 111 - continue; 112 - 113 - if (is_large_pte(pt[i])) 114 - continue; 115 - 116 - /* 117 - * Free the next level. No need to look at l1 tables here since 118 - * they can only contain leaf PTEs; just free them directly. 119 - */ 120 - p = get_pgtable_pte(pt[i]); 121 - if (level > 2) 122 - free_pgtable(p, level - 1); 123 - else 124 - iommu_free_pages(p); 125 - } 126 - 127 - iommu_free_pages(pt); 128 - } 129 - 130 - /* Allocate page table */ 131 - static u64 *v2_alloc_pte(int nid, u64 *pgd, unsigned long iova, 132 - unsigned long pg_size, gfp_t gfp, bool *updated) 133 - { 134 - u64 *pte, *page; 135 - int level, end_level; 136 - 137 - level = get_pgtable_level() - 1; 138 - end_level = page_size_to_level(pg_size); 139 - pte = &pgd[PM_LEVEL_INDEX(level, iova)]; 140 - iova = PAGE_SIZE_ALIGN(iova, PAGE_SIZE); 141 - 142 - while (level >= end_level) { 143 - u64 __pte, __npte; 144 - 145 - __pte = *pte; 146 - 147 - if (IOMMU_PTE_PRESENT(__pte) && is_large_pte(__pte)) { 148 - /* Unmap large pte */ 149 - cmpxchg64(pte, *pte, 0ULL); 150 - *updated = true; 151 - continue; 152 - } 153 - 154 - if (!IOMMU_PTE_PRESENT(__pte)) { 155 - page = iommu_alloc_pages_node_sz(nid, gfp, SZ_4K); 156 - if (!page) 157 - return NULL; 158 - 159 - __npte = set_pgtable_attr(page); 160 - /* pte could have been changed somewhere. */ 161 - if (!try_cmpxchg64(pte, &__pte, __npte)) 162 - iommu_free_pages(page); 163 - else if (IOMMU_PTE_PRESENT(__pte)) 164 - *updated = true; 165 - 166 - continue; 167 - } 168 - 169 - level -= 1; 170 - pte = get_pgtable_pte(__pte); 171 - pte = &pte[PM_LEVEL_INDEX(level, iova)]; 172 - } 173 - 174 - /* Tear down existing pte entries */ 175 - if (IOMMU_PTE_PRESENT(*pte)) { 176 - u64 *__pte; 177 - 178 - *updated = true; 179 - __pte = get_pgtable_pte(*pte); 180 - cmpxchg64(pte, *pte, 0ULL); 181 - if (pg_size == IOMMU_PAGE_SIZE_1G) 182 - free_pgtable(__pte, end_level - 1); 183 - else if (pg_size == IOMMU_PAGE_SIZE_2M) 184 - iommu_free_pages(__pte); 185 - } 186 - 187 - return pte; 188 - } 189 - 190 - /* 191 - * This function checks if there is a PTE for a given dma address. 192 - * If there is one, it returns the pointer to it. 193 - */ 194 - static u64 *fetch_pte(struct amd_io_pgtable *pgtable, 195 - unsigned long iova, unsigned long *page_size) 196 - { 197 - u64 *pte; 198 - int level; 199 - 200 - level = get_pgtable_level() - 1; 201 - pte = &pgtable->pgd[PM_LEVEL_INDEX(level, iova)]; 202 - /* Default page size is 4K */ 203 - *page_size = PAGE_SIZE; 204 - 205 - while (level) { 206 - /* Not present */ 207 - if (!IOMMU_PTE_PRESENT(*pte)) 208 - return NULL; 209 - 210 - /* Walk to the next level */ 211 - pte = get_pgtable_pte(*pte); 212 - pte = &pte[PM_LEVEL_INDEX(level - 1, iova)]; 213 - 214 - /* Large page */ 215 - if (is_large_pte(*pte)) { 216 - if (level == PAGE_MODE_3_LEVEL) 217 - *page_size = IOMMU_PAGE_SIZE_1G; 218 - else if (level == PAGE_MODE_2_LEVEL) 219 - *page_size = IOMMU_PAGE_SIZE_2M; 220 - else 221 - return NULL; /* Wrongly set PSE bit in PTE */ 222 - 223 - break; 224 - } 225 - 226 - level -= 1; 227 - } 228 - 229 - return pte; 230 - } 231 - 232 - static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova, 233 - phys_addr_t paddr, size_t pgsize, size_t pgcount, 234 - int prot, gfp_t gfp, size_t *mapped) 235 - { 236 - struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); 237 - struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg; 238 - u64 *pte; 239 - unsigned long map_size; 240 - unsigned long mapped_size = 0; 241 - unsigned long o_iova = iova; 242 - size_t size = pgcount << __ffs(pgsize); 243 - int ret = 0; 244 - bool updated = false; 245 - 246 - if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize) || !pgcount) 247 - return -EINVAL; 248 - 249 - if (!(prot & IOMMU_PROT_MASK)) 250 - return -EINVAL; 251 - 252 - while (mapped_size < size) { 253 - map_size = get_alloc_page_size(pgsize); 254 - pte = v2_alloc_pte(cfg->amd.nid, pgtable->pgd, 255 - iova, map_size, gfp, &updated); 256 - if (!pte) { 257 - ret = -ENOMEM; 258 - goto out; 259 - } 260 - 261 - *pte = set_pte_attr(paddr, map_size, prot); 262 - 263 - iova += map_size; 264 - paddr += map_size; 265 - mapped_size += map_size; 266 - } 267 - 268 - out: 269 - if (updated) { 270 - struct protection_domain *pdom = io_pgtable_ops_to_domain(ops); 271 - unsigned long flags; 272 - 273 - spin_lock_irqsave(&pdom->lock, flags); 274 - amd_iommu_domain_flush_pages(pdom, o_iova, size); 275 - spin_unlock_irqrestore(&pdom->lock, flags); 276 - } 277 - 278 - if (mapped) 279 - *mapped += mapped_size; 280 - 281 - return ret; 282 - } 283 - 284 - static unsigned long iommu_v2_unmap_pages(struct io_pgtable_ops *ops, 285 - unsigned long iova, 286 - size_t pgsize, size_t pgcount, 287 - struct iommu_iotlb_gather *gather) 288 - { 289 - struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); 290 - struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg; 291 - unsigned long unmap_size; 292 - unsigned long unmapped = 0; 293 - size_t size = pgcount << __ffs(pgsize); 294 - u64 *pte; 295 - 296 - if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize || !pgcount)) 297 - return 0; 298 - 299 - while (unmapped < size) { 300 - pte = fetch_pte(pgtable, iova, &unmap_size); 301 - if (!pte) 302 - return unmapped; 303 - 304 - *pte = 0ULL; 305 - 306 - iova = (iova & ~(unmap_size - 1)) + unmap_size; 307 - unmapped += unmap_size; 308 - } 309 - 310 - return unmapped; 311 - } 312 - 313 - static phys_addr_t iommu_v2_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova) 314 - { 315 - struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); 316 - unsigned long offset_mask, pte_pgsize; 317 - u64 *pte, __pte; 318 - 319 - pte = fetch_pte(pgtable, iova, &pte_pgsize); 320 - if (!pte || !IOMMU_PTE_PRESENT(*pte)) 321 - return 0; 322 - 323 - offset_mask = pte_pgsize - 1; 324 - __pte = __sme_clr(*pte & PM_ADDR_MASK); 325 - 326 - return (__pte & ~offset_mask) | (iova & offset_mask); 327 - } 328 - 329 - /* 330 - * ---------------------------------------------------- 331 - */ 332 - static void v2_free_pgtable(struct io_pgtable *iop) 333 - { 334 - struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, pgtbl); 335 - 336 - if (!pgtable || !pgtable->pgd) 337 - return; 338 - 339 - /* Free page table */ 340 - free_pgtable(pgtable->pgd, get_pgtable_level()); 341 - pgtable->pgd = NULL; 342 - } 343 - 344 - static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) 345 - { 346 - struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); 347 - int ias = IOMMU_IN_ADDR_BIT_SIZE; 348 - 349 - pgtable->pgd = iommu_alloc_pages_node_sz(cfg->amd.nid, GFP_KERNEL, SZ_4K); 350 - if (!pgtable->pgd) 351 - return NULL; 352 - 353 - if (get_pgtable_level() == PAGE_MODE_5_LEVEL) 354 - ias = 57; 355 - 356 - pgtable->pgtbl.ops.map_pages = iommu_v2_map_pages; 357 - pgtable->pgtbl.ops.unmap_pages = iommu_v2_unmap_pages; 358 - pgtable->pgtbl.ops.iova_to_phys = iommu_v2_iova_to_phys; 359 - 360 - cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES_V2; 361 - cfg->ias = ias; 362 - cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE; 363 - 364 - return &pgtable->pgtbl; 365 - } 366 - 367 - struct io_pgtable_init_fns io_pgtable_amd_iommu_v2_init_fns = { 368 - .alloc = v2_alloc_pgtable, 369 - .free = v2_free_pgtable, 370 - };
-4
drivers/iommu/io-pgtable.c
··· 28 28 #ifdef CONFIG_IOMMU_IO_PGTABLE_ARMV7S 29 29 [ARM_V7S] = &io_pgtable_arm_v7s_init_fns, 30 30 #endif 31 - #ifdef CONFIG_AMD_IOMMU 32 - [AMD_IOMMU_V1] = &io_pgtable_amd_iommu_v1_init_fns, 33 - [AMD_IOMMU_V2] = &io_pgtable_amd_iommu_v2_init_fns, 34 - #endif 35 31 }; 36 32 37 33 static int check_custom_allocator(enum io_pgtable_fmt fmt,
-2
include/linux/io-pgtable.h
··· 15 15 ARM_64_LPAE_S2, 16 16 ARM_V7S, 17 17 ARM_MALI_LPAE, 18 - AMD_IOMMU_V1, 19 - AMD_IOMMU_V2, 20 18 APPLE_DART, 21 19 APPLE_DART2, 22 20 IO_PGTABLE_NUM_FMTS,