Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'iommu-updates-v4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu

Pull IOMMU updates from Joerg Roedel:
"The updates include:

- Small code cleanups in the AMD IOMMUv2 driver

- Scalability improvements for the DMA-API implementation of the AMD
IOMMU driver. This is just a starting point, but already showed
some good improvements in my tests.

- Removal of the unused Renesas IPMMU/IPMMUI driver

- Updates for ARM-SMMU include:
* Some fixes to get the driver working nicely on Broadcom hardware
* A change to the io-pgtable API to indicate the unit in which to
flush (all callers converted, with Ack from Laurent)
* Use of devm_* for allocating/freeing the SMMUv3 buffers

- Some other small fixes and improvements for other drivers"

* tag 'iommu-updates-v4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (46 commits)
iommu/vt-d: Fix up error handling in alloc_iommu
iommu/vt-d: Check the return value of iommu_device_create()
iommu/amd: Remove an unneeded condition
iommu/amd: Preallocate dma_ops apertures based on dma_mask
iommu/amd: Use trylock to aquire bitmap_lock
iommu/amd: Make dma_ops_domain->next_index percpu
iommu/amd: Relax locking in dma_ops path
iommu/amd: Initialize new aperture range before making it visible
iommu/amd: Build io page-tables with cmpxchg64
iommu/amd: Allocate new aperture ranges in dma_ops_alloc_addresses
iommu/amd: Optimize dma_ops_free_addresses
iommu/amd: Remove need_flush from struct dma_ops_domain
iommu/amd: Iterate over all aperture ranges in dma_ops_area_alloc
iommu/amd: Flush iommu tlb in dma_ops_free_addresses
iommu/amd: Rename dma_ops_domain->next_address to next_index
iommu/amd: Remove 'start' parameter from dma_ops_area_alloc
iommu/amd: Flush iommu tlb in dma_ops_aperture_alloc()
iommu/amd: Retry address allocation within one aperture
iommu/amd: Move aperture_range.offset to another cache-line
iommu/amd: Add dma_ops_aperture_alloc() function
...

+414 -1053
+10 -2
Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt
··· 7 7 8 8 Required Properties: 9 9 10 - - compatible: Must contain "renesas,ipmmu-vmsa". 10 + - compatible: Must contain SoC-specific and generic entries from below. 11 + 12 + - "renesas,ipmmu-r8a73a4" for the R8A73A4 (R-Mobile APE6) IPMMU. 13 + - "renesas,ipmmu-r8a7790" for the R8A7790 (R-Car H2) IPMMU. 14 + - "renesas,ipmmu-r8a7791" for the R8A7791 (R-Car M2-W) IPMMU. 15 + - "renesas,ipmmu-r8a7793" for the R8A7793 (R-Car M2-N) IPMMU. 16 + - "renesas,ipmmu-r8a7794" for the R8A7794 (R-Car E2) IPMMU. 17 + - "renesas,ipmmu-vmsa" for generic R-Car Gen2 VMSA-compatible IPMMU. 18 + 11 19 - reg: Base address and size of the IPMMU registers. 12 20 - interrupts: Specifiers for the MMU fault interrupts. For instances that 13 21 support secure mode two interrupts must be specified, for non-secure and ··· 35 27 Example: R8A7791 IPMMU-MX and VSP1-D0 bus master 36 28 37 29 ipmmu_mx: mmu@fe951000 { 38 - compatible = "renasas,ipmmu-vmsa"; 30 + compatible = "renasas,ipmmu-r8a7791", "renasas,ipmmu-vmsa"; 39 31 reg = <0 0xfe951000 0 0x1000>; 40 32 interrupts = <0 222 IRQ_TYPE_LEVEL_HIGH>, 41 33 <0 221 IRQ_TYPE_LEVEL_HIGH>;
-75
drivers/iommu/Kconfig
··· 263 263 264 264 Say N unless you need kernel log message for IOMMU debugging. 265 265 266 - config SHMOBILE_IPMMU 267 - bool 268 - 269 - config SHMOBILE_IPMMU_TLB 270 - bool 271 - 272 - config SHMOBILE_IOMMU 273 - bool "IOMMU for Renesas IPMMU/IPMMUI" 274 - default n 275 - depends on ARM && MMU 276 - depends on ARCH_SHMOBILE || COMPILE_TEST 277 - select IOMMU_API 278 - select ARM_DMA_USE_IOMMU 279 - select SHMOBILE_IPMMU 280 - select SHMOBILE_IPMMU_TLB 281 - help 282 - Support for Renesas IPMMU/IPMMUI. This option enables 283 - remapping of DMA memory accesses from all of the IP blocks 284 - on the ICB. 285 - 286 - Warning: Drivers (including userspace drivers of UIO 287 - devices) of the IP blocks on the ICB *must* use addresses 288 - allocated from the IPMMU (iova) for DMA with this option 289 - enabled. 290 - 291 - If unsure, say N. 292 - 293 - choice 294 - prompt "IPMMU/IPMMUI address space size" 295 - default SHMOBILE_IOMMU_ADDRSIZE_2048MB 296 - depends on SHMOBILE_IOMMU 297 - help 298 - This option sets IPMMU/IPMMUI address space size by 299 - adjusting the 1st level page table size. The page table size 300 - is calculated as follows: 301 - 302 - page table size = number of page table entries * 4 bytes 303 - number of page table entries = address space size / 1 MiB 304 - 305 - For example, when the address space size is 2048 MiB, the 306 - 1st level page table size is 8192 bytes. 307 - 308 - config SHMOBILE_IOMMU_ADDRSIZE_2048MB 309 - bool "2 GiB" 310 - 311 - config SHMOBILE_IOMMU_ADDRSIZE_1024MB 312 - bool "1 GiB" 313 - 314 - config SHMOBILE_IOMMU_ADDRSIZE_512MB 315 - bool "512 MiB" 316 - 317 - config SHMOBILE_IOMMU_ADDRSIZE_256MB 318 - bool "256 MiB" 319 - 320 - config SHMOBILE_IOMMU_ADDRSIZE_128MB 321 - bool "128 MiB" 322 - 323 - config SHMOBILE_IOMMU_ADDRSIZE_64MB 324 - bool "64 MiB" 325 - 326 - config SHMOBILE_IOMMU_ADDRSIZE_32MB 327 - bool "32 MiB" 328 - 329 - endchoice 330 - 331 - config SHMOBILE_IOMMU_L1SIZE 332 - int 333 - default 8192 if SHMOBILE_IOMMU_ADDRSIZE_2048MB 334 - default 4096 if SHMOBILE_IOMMU_ADDRSIZE_1024MB 335 - default 2048 if SHMOBILE_IOMMU_ADDRSIZE_512MB 336 - default 1024 if SHMOBILE_IOMMU_ADDRSIZE_256MB 337 - default 512 if SHMOBILE_IOMMU_ADDRSIZE_128MB 338 - default 256 if SHMOBILE_IOMMU_ADDRSIZE_64MB 339 - default 128 if SHMOBILE_IOMMU_ADDRSIZE_32MB 340 - 341 266 config IPMMU_VMSA 342 267 bool "Renesas VMSA-compatible IPMMU" 343 268 depends on ARM_LPAE
-2
drivers/iommu/Makefile
··· 22 22 obj-$(CONFIG_TEGRA_IOMMU_GART) += tegra-gart.o 23 23 obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o 24 24 obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o 25 - obj-$(CONFIG_SHMOBILE_IOMMU) += shmobile-iommu.o 26 - obj-$(CONFIG_SHMOBILE_IPMMU) += shmobile-ipmmu.o 27 25 obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o 28 26 obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
+249 -151
drivers/iommu/amd_iommu.c
··· 35 35 #include <linux/msi.h> 36 36 #include <linux/dma-contiguous.h> 37 37 #include <linux/irqdomain.h> 38 + #include <linux/percpu.h> 38 39 #include <asm/irq_remapping.h> 39 40 #include <asm/io_apic.h> 40 41 #include <asm/apic.h> ··· 114 113 115 114 static void update_domain(struct protection_domain *domain); 116 115 static int protection_domain_init(struct protection_domain *domain); 116 + 117 + /* 118 + * For dynamic growth the aperture size is split into ranges of 128MB of 119 + * DMA address space each. This struct represents one such range. 120 + */ 121 + struct aperture_range { 122 + 123 + spinlock_t bitmap_lock; 124 + 125 + /* address allocation bitmap */ 126 + unsigned long *bitmap; 127 + unsigned long offset; 128 + unsigned long next_bit; 129 + 130 + /* 131 + * Array of PTE pages for the aperture. In this array we save all the 132 + * leaf pages of the domain page table used for the aperture. This way 133 + * we don't need to walk the page table to find a specific PTE. We can 134 + * just calculate its address in constant time. 135 + */ 136 + u64 *pte_pages[64]; 137 + }; 138 + 139 + /* 140 + * Data container for a dma_ops specific protection domain 141 + */ 142 + struct dma_ops_domain { 143 + /* generic protection domain information */ 144 + struct protection_domain domain; 145 + 146 + /* size of the aperture for the mappings */ 147 + unsigned long aperture_size; 148 + 149 + /* aperture index we start searching for free addresses */ 150 + u32 __percpu *next_index; 151 + 152 + /* address space relevant data */ 153 + struct aperture_range *aperture[APERTURE_MAX_RANGES]; 154 + }; 117 155 118 156 /**************************************************************************** 119 157 * ··· 1207 1167 end_lvl = PAGE_SIZE_LEVEL(page_size); 1208 1168 1209 1169 while (level > end_lvl) { 1210 - if (!IOMMU_PTE_PRESENT(*pte)) { 1170 + u64 __pte, __npte; 1171 + 1172 + __pte = *pte; 1173 + 1174 + if (!IOMMU_PTE_PRESENT(__pte)) { 1211 1175 page = (u64 *)get_zeroed_page(gfp); 1212 1176 if (!page) 1213 1177 return NULL; 1214 - *pte = PM_LEVEL_PDE(level, virt_to_phys(page)); 1178 + 1179 + __npte = PM_LEVEL_PDE(level, virt_to_phys(page)); 1180 + 1181 + if (cmpxchg64(pte, __pte, __npte)) { 1182 + free_page((unsigned long)page); 1183 + continue; 1184 + } 1215 1185 } 1216 1186 1217 1187 /* No level skipping support yet */ ··· 1426 1376 bool populate, gfp_t gfp) 1427 1377 { 1428 1378 int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; 1429 - struct amd_iommu *iommu; 1430 1379 unsigned long i, old_size, pte_pgsize; 1380 + struct aperture_range *range; 1381 + struct amd_iommu *iommu; 1382 + unsigned long flags; 1431 1383 1432 1384 #ifdef CONFIG_IOMMU_STRESS 1433 1385 populate = false; ··· 1438 1386 if (index >= APERTURE_MAX_RANGES) 1439 1387 return -ENOMEM; 1440 1388 1441 - dma_dom->aperture[index] = kzalloc(sizeof(struct aperture_range), gfp); 1442 - if (!dma_dom->aperture[index]) 1389 + range = kzalloc(sizeof(struct aperture_range), gfp); 1390 + if (!range) 1443 1391 return -ENOMEM; 1444 1392 1445 - dma_dom->aperture[index]->bitmap = (void *)get_zeroed_page(gfp); 1446 - if (!dma_dom->aperture[index]->bitmap) 1393 + range->bitmap = (void *)get_zeroed_page(gfp); 1394 + if (!range->bitmap) 1447 1395 goto out_free; 1448 1396 1449 - dma_dom->aperture[index]->offset = dma_dom->aperture_size; 1397 + range->offset = dma_dom->aperture_size; 1398 + 1399 + spin_lock_init(&range->bitmap_lock); 1450 1400 1451 1401 if (populate) { 1452 1402 unsigned long address = dma_dom->aperture_size; ··· 1461 1407 if (!pte) 1462 1408 goto out_free; 1463 1409 1464 - dma_dom->aperture[index]->pte_pages[i] = pte_page; 1410 + range->pte_pages[i] = pte_page; 1465 1411 1466 1412 address += APERTURE_RANGE_SIZE / 64; 1467 1413 } 1468 1414 } 1469 1415 1470 - old_size = dma_dom->aperture_size; 1471 - dma_dom->aperture_size += APERTURE_RANGE_SIZE; 1416 + spin_lock_irqsave(&dma_dom->domain.lock, flags); 1417 + 1418 + /* First take the bitmap_lock and then publish the range */ 1419 + spin_lock(&range->bitmap_lock); 1420 + 1421 + old_size = dma_dom->aperture_size; 1422 + dma_dom->aperture[index] = range; 1423 + dma_dom->aperture_size += APERTURE_RANGE_SIZE; 1472 1424 1473 1425 /* Reserve address range used for MSI messages */ 1474 1426 if (old_size < MSI_ADDR_BASE_LO && ··· 1521 1461 1522 1462 update_domain(&dma_dom->domain); 1523 1463 1464 + spin_unlock(&range->bitmap_lock); 1465 + 1466 + spin_unlock_irqrestore(&dma_dom->domain.lock, flags); 1467 + 1524 1468 return 0; 1525 1469 1526 1470 out_free: 1527 1471 update_domain(&dma_dom->domain); 1528 1472 1529 - free_page((unsigned long)dma_dom->aperture[index]->bitmap); 1473 + free_page((unsigned long)range->bitmap); 1530 1474 1531 - kfree(dma_dom->aperture[index]); 1532 - dma_dom->aperture[index] = NULL; 1475 + kfree(range); 1533 1476 1534 1477 return -ENOMEM; 1478 + } 1479 + 1480 + static dma_addr_t dma_ops_aperture_alloc(struct dma_ops_domain *dom, 1481 + struct aperture_range *range, 1482 + unsigned long pages, 1483 + unsigned long dma_mask, 1484 + unsigned long boundary_size, 1485 + unsigned long align_mask, 1486 + bool trylock) 1487 + { 1488 + unsigned long offset, limit, flags; 1489 + dma_addr_t address; 1490 + bool flush = false; 1491 + 1492 + offset = range->offset >> PAGE_SHIFT; 1493 + limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset, 1494 + dma_mask >> PAGE_SHIFT); 1495 + 1496 + if (trylock) { 1497 + if (!spin_trylock_irqsave(&range->bitmap_lock, flags)) 1498 + return -1; 1499 + } else { 1500 + spin_lock_irqsave(&range->bitmap_lock, flags); 1501 + } 1502 + 1503 + address = iommu_area_alloc(range->bitmap, limit, range->next_bit, 1504 + pages, offset, boundary_size, align_mask); 1505 + if (address == -1) { 1506 + /* Nothing found, retry one time */ 1507 + address = iommu_area_alloc(range->bitmap, limit, 1508 + 0, pages, offset, boundary_size, 1509 + align_mask); 1510 + flush = true; 1511 + } 1512 + 1513 + if (address != -1) 1514 + range->next_bit = address + pages; 1515 + 1516 + spin_unlock_irqrestore(&range->bitmap_lock, flags); 1517 + 1518 + if (flush) { 1519 + domain_flush_tlb(&dom->domain); 1520 + domain_flush_complete(&dom->domain); 1521 + } 1522 + 1523 + return address; 1535 1524 } 1536 1525 1537 1526 static unsigned long dma_ops_area_alloc(struct device *dev, 1538 1527 struct dma_ops_domain *dom, 1539 1528 unsigned int pages, 1540 1529 unsigned long align_mask, 1541 - u64 dma_mask, 1542 - unsigned long start) 1530 + u64 dma_mask) 1543 1531 { 1544 - unsigned long next_bit = dom->next_address % APERTURE_RANGE_SIZE; 1545 - int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT; 1546 - int i = start >> APERTURE_RANGE_SHIFT; 1547 1532 unsigned long boundary_size, mask; 1548 1533 unsigned long address = -1; 1549 - unsigned long limit; 1534 + bool first = true; 1535 + u32 start, i; 1550 1536 1551 - next_bit >>= PAGE_SHIFT; 1537 + preempt_disable(); 1552 1538 1553 1539 mask = dma_get_seg_boundary(dev); 1540 + 1541 + again: 1542 + start = this_cpu_read(*dom->next_index); 1543 + 1544 + /* Sanity check - is it really necessary? */ 1545 + if (unlikely(start > APERTURE_MAX_RANGES)) { 1546 + start = 0; 1547 + this_cpu_write(*dom->next_index, 0); 1548 + } 1554 1549 1555 1550 boundary_size = mask + 1 ? ALIGN(mask + 1, PAGE_SIZE) >> PAGE_SHIFT : 1556 1551 1UL << (BITS_PER_LONG - PAGE_SHIFT); 1557 1552 1558 - for (;i < max_index; ++i) { 1559 - unsigned long offset = dom->aperture[i]->offset >> PAGE_SHIFT; 1553 + for (i = 0; i < APERTURE_MAX_RANGES; ++i) { 1554 + struct aperture_range *range; 1555 + int index; 1560 1556 1561 - if (dom->aperture[i]->offset >= dma_mask) 1562 - break; 1557 + index = (start + i) % APERTURE_MAX_RANGES; 1563 1558 1564 - limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset, 1565 - dma_mask >> PAGE_SHIFT); 1559 + range = dom->aperture[index]; 1566 1560 1567 - address = iommu_area_alloc(dom->aperture[i]->bitmap, 1568 - limit, next_bit, pages, 0, 1569 - boundary_size, align_mask); 1561 + if (!range || range->offset >= dma_mask) 1562 + continue; 1563 + 1564 + address = dma_ops_aperture_alloc(dom, range, pages, 1565 + dma_mask, boundary_size, 1566 + align_mask, first); 1570 1567 if (address != -1) { 1571 - address = dom->aperture[i]->offset + 1572 - (address << PAGE_SHIFT); 1573 - dom->next_address = address + (pages << PAGE_SHIFT); 1568 + address = range->offset + (address << PAGE_SHIFT); 1569 + this_cpu_write(*dom->next_index, index); 1574 1570 break; 1575 1571 } 1576 - 1577 - next_bit = 0; 1578 1572 } 1573 + 1574 + if (address == -1 && first) { 1575 + first = false; 1576 + goto again; 1577 + } 1578 + 1579 + preempt_enable(); 1579 1580 1580 1581 return address; 1581 1582 } ··· 1647 1526 unsigned long align_mask, 1648 1527 u64 dma_mask) 1649 1528 { 1650 - unsigned long address; 1529 + unsigned long address = -1; 1651 1530 1652 - #ifdef CONFIG_IOMMU_STRESS 1653 - dom->next_address = 0; 1654 - dom->need_flush = true; 1655 - #endif 1531 + while (address == -1) { 1532 + address = dma_ops_area_alloc(dev, dom, pages, 1533 + align_mask, dma_mask); 1656 1534 1657 - address = dma_ops_area_alloc(dev, dom, pages, align_mask, 1658 - dma_mask, dom->next_address); 1659 - 1660 - if (address == -1) { 1661 - dom->next_address = 0; 1662 - address = dma_ops_area_alloc(dev, dom, pages, align_mask, 1663 - dma_mask, 0); 1664 - dom->need_flush = true; 1535 + if (address == -1 && alloc_new_range(dom, false, GFP_ATOMIC)) 1536 + break; 1665 1537 } 1666 1538 1667 1539 if (unlikely(address == -1)) ··· 1676 1562 { 1677 1563 unsigned i = address >> APERTURE_RANGE_SHIFT; 1678 1564 struct aperture_range *range = dom->aperture[i]; 1565 + unsigned long flags; 1679 1566 1680 1567 BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL); 1681 1568 ··· 1685 1570 return; 1686 1571 #endif 1687 1572 1688 - if (address >= dom->next_address) 1689 - dom->need_flush = true; 1573 + if (amd_iommu_unmap_flush) { 1574 + domain_flush_tlb(&dom->domain); 1575 + domain_flush_complete(&dom->domain); 1576 + } 1690 1577 1691 1578 address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT; 1692 1579 1580 + spin_lock_irqsave(&range->bitmap_lock, flags); 1581 + if (address + pages > range->next_bit) 1582 + range->next_bit = address + pages; 1693 1583 bitmap_clear(range->bitmap, address, pages); 1584 + spin_unlock_irqrestore(&range->bitmap_lock, flags); 1694 1585 1695 1586 } 1696 1587 ··· 1876 1755 if (!dom) 1877 1756 return; 1878 1757 1758 + free_percpu(dom->next_index); 1759 + 1879 1760 del_domain_from_list(&dom->domain); 1880 1761 1881 1762 free_pagetable(&dom->domain); ··· 1892 1769 kfree(dom); 1893 1770 } 1894 1771 1772 + static int dma_ops_domain_alloc_apertures(struct dma_ops_domain *dma_dom, 1773 + int max_apertures) 1774 + { 1775 + int ret, i, apertures; 1776 + 1777 + apertures = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; 1778 + ret = 0; 1779 + 1780 + for (i = apertures; i < max_apertures; ++i) { 1781 + ret = alloc_new_range(dma_dom, false, GFP_KERNEL); 1782 + if (ret) 1783 + break; 1784 + } 1785 + 1786 + return ret; 1787 + } 1788 + 1895 1789 /* 1896 1790 * Allocates a new protection domain usable for the dma_ops functions. 1897 1791 * It also initializes the page table and the address allocator data ··· 1917 1777 static struct dma_ops_domain *dma_ops_domain_alloc(void) 1918 1778 { 1919 1779 struct dma_ops_domain *dma_dom; 1780 + int cpu; 1920 1781 1921 1782 dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL); 1922 1783 if (!dma_dom) ··· 1926 1785 if (protection_domain_init(&dma_dom->domain)) 1927 1786 goto free_dma_dom; 1928 1787 1788 + dma_dom->next_index = alloc_percpu(u32); 1789 + if (!dma_dom->next_index) 1790 + goto free_dma_dom; 1791 + 1929 1792 dma_dom->domain.mode = PAGE_MODE_2_LEVEL; 1930 1793 dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); 1931 1794 dma_dom->domain.flags = PD_DMA_OPS_MASK; 1932 1795 dma_dom->domain.priv = dma_dom; 1933 1796 if (!dma_dom->domain.pt_root) 1934 1797 goto free_dma_dom; 1935 - 1936 - dma_dom->need_flush = false; 1937 1798 1938 1799 add_domain_to_list(&dma_dom->domain); 1939 1800 ··· 1947 1804 * a valid dma-address. So we can use 0 as error value 1948 1805 */ 1949 1806 dma_dom->aperture[0]->bitmap[0] = 1; 1950 - dma_dom->next_address = 0; 1951 1807 1808 + for_each_possible_cpu(cpu) 1809 + *per_cpu_ptr(dma_dom->next_index, cpu) = 0; 1952 1810 1953 1811 return dma_dom; 1954 1812 ··· 2472 2328 else if (direction == DMA_BIDIRECTIONAL) 2473 2329 __pte |= IOMMU_PTE_IR | IOMMU_PTE_IW; 2474 2330 2475 - WARN_ON(*pte); 2331 + WARN_ON_ONCE(*pte); 2476 2332 2477 2333 *pte = __pte; 2478 2334 ··· 2501 2357 2502 2358 pte += PM_LEVEL_INDEX(0, address); 2503 2359 2504 - WARN_ON(!*pte); 2360 + WARN_ON_ONCE(!*pte); 2505 2361 2506 2362 *pte = 0ULL; 2507 2363 } ··· 2537 2393 if (align) 2538 2394 align_mask = (1UL << get_order(size)) - 1; 2539 2395 2540 - retry: 2541 2396 address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, 2542 2397 dma_mask); 2543 - if (unlikely(address == DMA_ERROR_CODE)) { 2544 - /* 2545 - * setting next_address here will let the address 2546 - * allocator only scan the new allocated range in the 2547 - * first run. This is a small optimization. 2548 - */ 2549 - dma_dom->next_address = dma_dom->aperture_size; 2550 2398 2551 - if (alloc_new_range(dma_dom, false, GFP_ATOMIC)) 2552 - goto out; 2553 - 2554 - /* 2555 - * aperture was successfully enlarged by 128 MB, try 2556 - * allocation again 2557 - */ 2558 - goto retry; 2559 - } 2399 + if (address == DMA_ERROR_CODE) 2400 + goto out; 2560 2401 2561 2402 start = address; 2562 2403 for (i = 0; i < pages; ++i) { ··· 2556 2427 2557 2428 ADD_STATS_COUNTER(alloced_io_mem, size); 2558 2429 2559 - if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { 2560 - domain_flush_tlb(&dma_dom->domain); 2561 - dma_dom->need_flush = false; 2562 - } else if (unlikely(amd_iommu_np_cache)) 2430 + if (unlikely(amd_iommu_np_cache)) { 2563 2431 domain_flush_pages(&dma_dom->domain, address, size); 2432 + domain_flush_complete(&dma_dom->domain); 2433 + } 2564 2434 2565 2435 out: 2566 2436 return address; ··· 2606 2478 SUB_STATS_COUNTER(alloced_io_mem, size); 2607 2479 2608 2480 dma_ops_free_addresses(dma_dom, dma_addr, pages); 2609 - 2610 - if (amd_iommu_unmap_flush || dma_dom->need_flush) { 2611 - domain_flush_pages(&dma_dom->domain, flush_addr, size); 2612 - dma_dom->need_flush = false; 2613 - } 2614 2481 } 2615 2482 2616 2483 /* ··· 2616 2493 enum dma_data_direction dir, 2617 2494 struct dma_attrs *attrs) 2618 2495 { 2619 - unsigned long flags; 2620 - struct protection_domain *domain; 2621 - dma_addr_t addr; 2622 - u64 dma_mask; 2623 2496 phys_addr_t paddr = page_to_phys(page) + offset; 2497 + struct protection_domain *domain; 2498 + u64 dma_mask; 2624 2499 2625 2500 INC_STATS_COUNTER(cnt_map_single); 2626 2501 ··· 2630 2509 2631 2510 dma_mask = *dev->dma_mask; 2632 2511 2633 - spin_lock_irqsave(&domain->lock, flags); 2634 - 2635 - addr = __map_single(dev, domain->priv, paddr, size, dir, false, 2512 + return __map_single(dev, domain->priv, paddr, size, dir, false, 2636 2513 dma_mask); 2637 - if (addr == DMA_ERROR_CODE) 2638 - goto out; 2639 - 2640 - domain_flush_complete(domain); 2641 - 2642 - out: 2643 - spin_unlock_irqrestore(&domain->lock, flags); 2644 - 2645 - return addr; 2646 2514 } 2647 2515 2648 2516 /* ··· 2640 2530 static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, 2641 2531 enum dma_data_direction dir, struct dma_attrs *attrs) 2642 2532 { 2643 - unsigned long flags; 2644 2533 struct protection_domain *domain; 2645 2534 2646 2535 INC_STATS_COUNTER(cnt_unmap_single); ··· 2648 2539 if (IS_ERR(domain)) 2649 2540 return; 2650 2541 2651 - spin_lock_irqsave(&domain->lock, flags); 2652 - 2653 2542 __unmap_single(domain->priv, dma_addr, size, dir); 2654 - 2655 - domain_flush_complete(domain); 2656 - 2657 - spin_unlock_irqrestore(&domain->lock, flags); 2658 2543 } 2659 2544 2660 2545 /* ··· 2659 2556 int nelems, enum dma_data_direction dir, 2660 2557 struct dma_attrs *attrs) 2661 2558 { 2662 - unsigned long flags; 2663 2559 struct protection_domain *domain; 2664 2560 int i; 2665 2561 struct scatterlist *s; ··· 2674 2572 2675 2573 dma_mask = *dev->dma_mask; 2676 2574 2677 - spin_lock_irqsave(&domain->lock, flags); 2678 - 2679 2575 for_each_sg(sglist, s, nelems, i) { 2680 2576 paddr = sg_phys(s); 2681 2577 ··· 2688 2588 goto unmap; 2689 2589 } 2690 2590 2691 - domain_flush_complete(domain); 2692 - 2693 - out: 2694 - spin_unlock_irqrestore(&domain->lock, flags); 2695 - 2696 2591 return mapped_elems; 2592 + 2697 2593 unmap: 2698 2594 for_each_sg(sglist, s, mapped_elems, i) { 2699 2595 if (s->dma_address) ··· 2698 2602 s->dma_address = s->dma_length = 0; 2699 2603 } 2700 2604 2701 - mapped_elems = 0; 2702 - 2703 - goto out; 2605 + return 0; 2704 2606 } 2705 2607 2706 2608 /* ··· 2709 2615 int nelems, enum dma_data_direction dir, 2710 2616 struct dma_attrs *attrs) 2711 2617 { 2712 - unsigned long flags; 2713 2618 struct protection_domain *domain; 2714 2619 struct scatterlist *s; 2715 2620 int i; ··· 2719 2626 if (IS_ERR(domain)) 2720 2627 return; 2721 2628 2722 - spin_lock_irqsave(&domain->lock, flags); 2723 - 2724 2629 for_each_sg(sglist, s, nelems, i) { 2725 2630 __unmap_single(domain->priv, s->dma_address, 2726 2631 s->dma_length, dir); 2727 2632 s->dma_address = s->dma_length = 0; 2728 2633 } 2729 - 2730 - domain_flush_complete(domain); 2731 - 2732 - spin_unlock_irqrestore(&domain->lock, flags); 2733 2634 } 2734 2635 2735 2636 /* ··· 2735 2648 { 2736 2649 u64 dma_mask = dev->coherent_dma_mask; 2737 2650 struct protection_domain *domain; 2738 - unsigned long flags; 2739 2651 struct page *page; 2740 2652 2741 2653 INC_STATS_COUNTER(cnt_alloc_coherent); ··· 2766 2680 if (!dma_mask) 2767 2681 dma_mask = *dev->dma_mask; 2768 2682 2769 - spin_lock_irqsave(&domain->lock, flags); 2770 - 2771 2683 *dma_addr = __map_single(dev, domain->priv, page_to_phys(page), 2772 2684 size, DMA_BIDIRECTIONAL, true, dma_mask); 2773 2685 2774 - if (*dma_addr == DMA_ERROR_CODE) { 2775 - spin_unlock_irqrestore(&domain->lock, flags); 2686 + if (*dma_addr == DMA_ERROR_CODE) 2776 2687 goto out_free; 2777 - } 2778 - 2779 - domain_flush_complete(domain); 2780 - 2781 - spin_unlock_irqrestore(&domain->lock, flags); 2782 2688 2783 2689 return page_address(page); 2784 2690 ··· 2790 2712 struct dma_attrs *attrs) 2791 2713 { 2792 2714 struct protection_domain *domain; 2793 - unsigned long flags; 2794 2715 struct page *page; 2795 2716 2796 2717 INC_STATS_COUNTER(cnt_free_coherent); ··· 2801 2724 if (IS_ERR(domain)) 2802 2725 goto free_mem; 2803 2726 2804 - spin_lock_irqsave(&domain->lock, flags); 2805 - 2806 2727 __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); 2807 - 2808 - domain_flush_complete(domain); 2809 - 2810 - spin_unlock_irqrestore(&domain->lock, flags); 2811 2728 2812 2729 free_mem: 2813 2730 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) ··· 2817 2746 return check_device(dev); 2818 2747 } 2819 2748 2749 + static int set_dma_mask(struct device *dev, u64 mask) 2750 + { 2751 + struct protection_domain *domain; 2752 + int max_apertures = 1; 2753 + 2754 + domain = get_domain(dev); 2755 + if (IS_ERR(domain)) 2756 + return PTR_ERR(domain); 2757 + 2758 + if (mask == DMA_BIT_MASK(64)) 2759 + max_apertures = 8; 2760 + else if (mask > DMA_BIT_MASK(32)) 2761 + max_apertures = 4; 2762 + 2763 + /* 2764 + * To prevent lock contention it doesn't make sense to allocate more 2765 + * apertures than online cpus 2766 + */ 2767 + if (max_apertures > num_online_cpus()) 2768 + max_apertures = num_online_cpus(); 2769 + 2770 + if (dma_ops_domain_alloc_apertures(domain->priv, max_apertures)) 2771 + dev_err(dev, "Can't allocate %d iommu apertures\n", 2772 + max_apertures); 2773 + 2774 + return 0; 2775 + } 2776 + 2820 2777 static struct dma_map_ops amd_iommu_dma_ops = { 2821 - .alloc = alloc_coherent, 2822 - .free = free_coherent, 2823 - .map_page = map_page, 2824 - .unmap_page = unmap_page, 2825 - .map_sg = map_sg, 2826 - .unmap_sg = unmap_sg, 2827 - .dma_supported = amd_iommu_dma_supported, 2778 + .alloc = alloc_coherent, 2779 + .free = free_coherent, 2780 + .map_page = map_page, 2781 + .unmap_page = unmap_page, 2782 + .map_sg = map_sg, 2783 + .unmap_sg = unmap_sg, 2784 + .dma_supported = amd_iommu_dma_supported, 2785 + .set_dma_mask = set_dma_mask, 2828 2786 }; 2829 2787 2830 2788 int __init amd_iommu_init_api(void) ··· 3857 3757 case X86_IRQ_ALLOC_TYPE_MSI: 3858 3758 case X86_IRQ_ALLOC_TYPE_MSIX: 3859 3759 devid = get_device_id(&info->msi_dev->dev); 3860 - if (devid >= 0) { 3861 - iommu = amd_iommu_rlookup_table[devid]; 3862 - if (iommu) 3863 - return iommu->msi_domain; 3864 - } 3760 + iommu = amd_iommu_rlookup_table[devid]; 3761 + if (iommu) 3762 + return iommu->msi_domain; 3865 3763 break; 3866 3764 default: 3867 3765 break;
-40
drivers/iommu/amd_iommu_types.h
··· 425 425 }; 426 426 427 427 /* 428 - * For dynamic growth the aperture size is split into ranges of 128MB of 429 - * DMA address space each. This struct represents one such range. 430 - */ 431 - struct aperture_range { 432 - 433 - /* address allocation bitmap */ 434 - unsigned long *bitmap; 435 - 436 - /* 437 - * Array of PTE pages for the aperture. In this array we save all the 438 - * leaf pages of the domain page table used for the aperture. This way 439 - * we don't need to walk the page table to find a specific PTE. We can 440 - * just calculate its address in constant time. 441 - */ 442 - u64 *pte_pages[64]; 443 - 444 - unsigned long offset; 445 - }; 446 - 447 - /* 448 - * Data container for a dma_ops specific protection domain 449 - */ 450 - struct dma_ops_domain { 451 - /* generic protection domain information */ 452 - struct protection_domain domain; 453 - 454 - /* size of the aperture for the mappings */ 455 - unsigned long aperture_size; 456 - 457 - /* address we start to search for free addresses */ 458 - unsigned long next_address; 459 - 460 - /* address space relevant data */ 461 - struct aperture_range *aperture[APERTURE_MAX_RANGES]; 462 - 463 - /* This will be set to true when TLB needs to be flushed */ 464 - bool need_flush; 465 - }; 466 - 467 - /* 468 428 * Structure where we save information about one hardware AMD IOMMU in the 469 429 * system. 470 430 */
+18 -22
drivers/iommu/amd_iommu_v2.c
··· 432 432 unbind_pasid(pasid_state); 433 433 } 434 434 435 - static struct mmu_notifier_ops iommu_mn = { 435 + static const struct mmu_notifier_ops iommu_mn = { 436 436 .release = mn_release, 437 437 .clear_flush_young = mn_clear_flush_young, 438 438 .invalidate_page = mn_invalidate_page, ··· 513 513 static void do_fault(struct work_struct *work) 514 514 { 515 515 struct fault *fault = container_of(work, struct fault, work); 516 - struct mm_struct *mm; 517 516 struct vm_area_struct *vma; 517 + int ret = VM_FAULT_ERROR; 518 + unsigned int flags = 0; 519 + struct mm_struct *mm; 518 520 u64 address; 519 - int ret, write; 520 - 521 - write = !!(fault->flags & PPR_FAULT_WRITE); 522 521 523 522 mm = fault->state->mm; 524 523 address = fault->address; 525 524 525 + if (fault->flags & PPR_FAULT_USER) 526 + flags |= FAULT_FLAG_USER; 527 + if (fault->flags & PPR_FAULT_WRITE) 528 + flags |= FAULT_FLAG_WRITE; 529 + 526 530 down_read(&mm->mmap_sem); 527 531 vma = find_extend_vma(mm, address); 528 - if (!vma || address < vma->vm_start) { 532 + if (!vma || address < vma->vm_start) 529 533 /* failed to get a vma in the right range */ 530 - up_read(&mm->mmap_sem); 531 - handle_fault_error(fault); 532 534 goto out; 533 - } 534 535 535 536 /* Check if we have the right permissions on the vma */ 536 - if (access_error(vma, fault)) { 537 - up_read(&mm->mmap_sem); 538 - handle_fault_error(fault); 537 + if (access_error(vma, fault)) 539 538 goto out; 540 - } 541 539 542 - ret = handle_mm_fault(mm, vma, address, write); 543 - if (ret & VM_FAULT_ERROR) { 544 - /* failed to service fault */ 545 - up_read(&mm->mmap_sem); 546 - handle_fault_error(fault); 547 - goto out; 548 - } 549 - 550 - up_read(&mm->mmap_sem); 540 + ret = handle_mm_fault(mm, vma, address, flags); 551 541 552 542 out: 543 + up_read(&mm->mmap_sem); 544 + 545 + if (ret & VM_FAULT_ERROR) 546 + /* failed to service fault */ 547 + handle_fault_error(fault); 548 + 553 549 finish_pri_tag(fault->dev_state, fault->state, fault->tag); 554 550 555 551 put_pasid_state(fault->state);
+71 -139
drivers/iommu/arm-smmu-v3.c
··· 40 40 #define IDR0_ST_LVL_SHIFT 27 41 41 #define IDR0_ST_LVL_MASK 0x3 42 42 #define IDR0_ST_LVL_2LVL (1 << IDR0_ST_LVL_SHIFT) 43 - #define IDR0_STALL_MODEL (3 << 24) 43 + #define IDR0_STALL_MODEL_SHIFT 24 44 + #define IDR0_STALL_MODEL_MASK 0x3 45 + #define IDR0_STALL_MODEL_STALL (0 << IDR0_STALL_MODEL_SHIFT) 46 + #define IDR0_STALL_MODEL_FORCE (2 << IDR0_STALL_MODEL_SHIFT) 44 47 #define IDR0_TTENDIAN_SHIFT 21 45 48 #define IDR0_TTENDIAN_MASK 0x3 46 49 #define IDR0_TTENDIAN_LE (2 << IDR0_TTENDIAN_SHIFT) ··· 256 253 #define STRTAB_STE_1_STRW_EL2 2UL 257 254 #define STRTAB_STE_1_STRW_SHIFT 30 258 255 256 + #define STRTAB_STE_1_SHCFG_INCOMING 1UL 257 + #define STRTAB_STE_1_SHCFG_SHIFT 44 258 + 259 259 #define STRTAB_STE_2_S2VMID_SHIFT 0 260 260 #define STRTAB_STE_2_S2VMID_MASK 0xffffUL 261 261 #define STRTAB_STE_2_VTCR_SHIFT 32 ··· 384 378 #define PRIQ_0_SID_MASK 0xffffffffUL 385 379 #define PRIQ_0_SSID_SHIFT 32 386 380 #define PRIQ_0_SSID_MASK 0xfffffUL 387 - #define PRIQ_0_OF (1UL << 57) 388 381 #define PRIQ_0_PERM_PRIV (1UL << 58) 389 382 #define PRIQ_0_PERM_EXEC (1UL << 59) 390 383 #define PRIQ_0_PERM_READ (1UL << 60) ··· 860 855 }; 861 856 862 857 dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons, 863 - cerror_str[idx]); 858 + idx < ARRAY_SIZE(cerror_str) ? cerror_str[idx] : "Unknown"); 864 859 865 860 switch (idx) { 866 - case CMDQ_ERR_CERROR_ILL_IDX: 867 - break; 868 861 case CMDQ_ERR_CERROR_ABT_IDX: 869 862 dev_err(smmu->dev, "retrying command fetch\n"); 870 863 case CMDQ_ERR_CERROR_NONE_IDX: 871 864 return; 865 + case CMDQ_ERR_CERROR_ILL_IDX: 866 + /* Fallthrough */ 867 + default: 868 + break; 872 869 } 873 870 874 871 /* ··· 1049 1042 val |= disable_bypass ? STRTAB_STE_0_CFG_ABORT 1050 1043 : STRTAB_STE_0_CFG_BYPASS; 1051 1044 dst[0] = cpu_to_le64(val); 1045 + dst[1] = cpu_to_le64(STRTAB_STE_1_SHCFG_INCOMING 1046 + << STRTAB_STE_1_SHCFG_SHIFT); 1052 1047 dst[2] = 0; /* Nuke the VMID */ 1053 1048 if (ste_live) 1054 1049 arm_smmu_sync_ste_for_sid(smmu, sid); ··· 1065 1056 STRTAB_STE_1_S1C_CACHE_WBRA 1066 1057 << STRTAB_STE_1_S1COR_SHIFT | 1067 1058 STRTAB_STE_1_S1C_SH_ISH << STRTAB_STE_1_S1CSH_SHIFT | 1068 - STRTAB_STE_1_S1STALLD | 1069 1059 #ifdef CONFIG_PCI_ATS 1070 1060 STRTAB_STE_1_EATS_TRANS << STRTAB_STE_1_EATS_SHIFT | 1071 1061 #endif 1072 1062 STRTAB_STE_1_STRW_NSEL1 << STRTAB_STE_1_STRW_SHIFT); 1063 + 1064 + if (smmu->features & ARM_SMMU_FEAT_STALLS) 1065 + dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD); 1073 1066 1074 1067 val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK 1075 1068 << STRTAB_STE_0_S1CTXPTR_SHIFT) | ··· 1134 1123 strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS]; 1135 1124 1136 1125 desc->span = STRTAB_SPLIT + 1; 1137 - desc->l2ptr = dma_zalloc_coherent(smmu->dev, size, &desc->l2ptr_dma, 1138 - GFP_KERNEL); 1126 + desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma, 1127 + GFP_KERNEL | __GFP_ZERO); 1139 1128 if (!desc->l2ptr) { 1140 1129 dev_err(smmu->dev, 1141 1130 "failed to allocate l2 stream table for SID %u\n", ··· 1261 1250 1262 1251 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev) 1263 1252 { 1264 - u32 gerror, gerrorn; 1253 + u32 gerror, gerrorn, active; 1265 1254 struct arm_smmu_device *smmu = dev; 1266 1255 1267 1256 gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR); 1268 1257 gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN); 1269 1258 1270 - gerror ^= gerrorn; 1271 - if (!(gerror & GERROR_ERR_MASK)) 1259 + active = gerror ^ gerrorn; 1260 + if (!(active & GERROR_ERR_MASK)) 1272 1261 return IRQ_NONE; /* No errors pending */ 1273 1262 1274 1263 dev_warn(smmu->dev, 1275 1264 "unexpected global error reported (0x%08x), this could be serious\n", 1276 - gerror); 1265 + active); 1277 1266 1278 - if (gerror & GERROR_SFM_ERR) { 1267 + if (active & GERROR_SFM_ERR) { 1279 1268 dev_err(smmu->dev, "device has entered Service Failure Mode!\n"); 1280 1269 arm_smmu_device_disable(smmu); 1281 1270 } 1282 1271 1283 - if (gerror & GERROR_MSI_GERROR_ABT_ERR) 1272 + if (active & GERROR_MSI_GERROR_ABT_ERR) 1284 1273 dev_warn(smmu->dev, "GERROR MSI write aborted\n"); 1285 1274 1286 - if (gerror & GERROR_MSI_PRIQ_ABT_ERR) { 1275 + if (active & GERROR_MSI_PRIQ_ABT_ERR) { 1287 1276 dev_warn(smmu->dev, "PRIQ MSI write aborted\n"); 1288 1277 arm_smmu_priq_handler(irq, smmu->dev); 1289 1278 } 1290 1279 1291 - if (gerror & GERROR_MSI_EVTQ_ABT_ERR) { 1280 + if (active & GERROR_MSI_EVTQ_ABT_ERR) { 1292 1281 dev_warn(smmu->dev, "EVTQ MSI write aborted\n"); 1293 1282 arm_smmu_evtq_handler(irq, smmu->dev); 1294 1283 } 1295 1284 1296 - if (gerror & GERROR_MSI_CMDQ_ABT_ERR) { 1285 + if (active & GERROR_MSI_CMDQ_ABT_ERR) { 1297 1286 dev_warn(smmu->dev, "CMDQ MSI write aborted\n"); 1298 1287 arm_smmu_cmdq_sync_handler(irq, smmu->dev); 1299 1288 } 1300 1289 1301 - if (gerror & GERROR_PRIQ_ABT_ERR) 1290 + if (active & GERROR_PRIQ_ABT_ERR) 1302 1291 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n"); 1303 1292 1304 - if (gerror & GERROR_EVTQ_ABT_ERR) 1293 + if (active & GERROR_EVTQ_ABT_ERR) 1305 1294 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n"); 1306 1295 1307 - if (gerror & GERROR_CMDQ_ERR) 1296 + if (active & GERROR_CMDQ_ERR) 1308 1297 arm_smmu_cmdq_skip_err(smmu); 1309 1298 1310 1299 writel(gerror, smmu->base + ARM_SMMU_GERRORN); ··· 1346 1335 } 1347 1336 1348 1337 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, 1349 - bool leaf, void *cookie) 1338 + size_t granule, bool leaf, void *cookie) 1350 1339 { 1351 1340 struct arm_smmu_domain *smmu_domain = cookie; 1352 1341 struct arm_smmu_device *smmu = smmu_domain->smmu; ··· 1365 1354 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; 1366 1355 } 1367 1356 1368 - arm_smmu_cmdq_issue_cmd(smmu, &cmd); 1357 + do { 1358 + arm_smmu_cmdq_issue_cmd(smmu, &cmd); 1359 + cmd.tlbi.addr += granule; 1360 + } while (size -= granule); 1369 1361 } 1370 1362 1371 1363 static struct iommu_gather_ops arm_smmu_gather_ops = { ··· 1443 1429 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; 1444 1430 1445 1431 if (cfg->cdptr) { 1446 - dma_free_coherent(smmu_domain->smmu->dev, 1447 - CTXDESC_CD_DWORDS << 3, 1448 - cfg->cdptr, 1449 - cfg->cdptr_dma); 1432 + dmam_free_coherent(smmu_domain->smmu->dev, 1433 + CTXDESC_CD_DWORDS << 3, 1434 + cfg->cdptr, 1435 + cfg->cdptr_dma); 1450 1436 1451 1437 arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid); 1452 1438 } ··· 1471 1457 if (IS_ERR_VALUE(asid)) 1472 1458 return asid; 1473 1459 1474 - cfg->cdptr = dma_zalloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3, 1475 - &cfg->cdptr_dma, GFP_KERNEL); 1460 + cfg->cdptr = dmam_alloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3, 1461 + &cfg->cdptr_dma, 1462 + GFP_KERNEL | __GFP_ZERO); 1476 1463 if (!cfg->cdptr) { 1477 1464 dev_warn(smmu->dev, "failed to allocate context descriptor\n"); 1478 1465 ret = -ENOMEM; ··· 1819 1804 smmu = arm_smmu_get_for_pci_dev(pdev); 1820 1805 if (!smmu) { 1821 1806 ret = -ENOENT; 1822 - goto out_put_group; 1807 + goto out_remove_dev; 1823 1808 } 1824 1809 1825 1810 smmu_group = kzalloc(sizeof(*smmu_group), GFP_KERNEL); 1826 1811 if (!smmu_group) { 1827 1812 ret = -ENOMEM; 1828 - goto out_put_group; 1813 + goto out_remove_dev; 1829 1814 } 1830 1815 1831 1816 smmu_group->ste.valid = true; ··· 1841 1826 for (i = 0; i < smmu_group->num_sids; ++i) { 1842 1827 /* If we already know about this SID, then we're done */ 1843 1828 if (smmu_group->sids[i] == sid) 1844 - return 0; 1829 + goto out_put_group; 1845 1830 } 1846 1831 1847 1832 /* Check the SID is in range of the SMMU and our stream table */ 1848 1833 if (!arm_smmu_sid_in_range(smmu, sid)) { 1849 1834 ret = -ERANGE; 1850 - goto out_put_group; 1835 + goto out_remove_dev; 1851 1836 } 1852 1837 1853 1838 /* Ensure l2 strtab is initialised */ 1854 1839 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) { 1855 1840 ret = arm_smmu_init_l2_strtab(smmu, sid); 1856 1841 if (ret) 1857 - goto out_put_group; 1842 + goto out_remove_dev; 1858 1843 } 1859 1844 1860 1845 /* Resize the SID array for the group */ ··· 1864 1849 if (!sids) { 1865 1850 smmu_group->num_sids--; 1866 1851 ret = -ENOMEM; 1867 - goto out_put_group; 1852 + goto out_remove_dev; 1868 1853 } 1869 1854 1870 1855 /* Add the new SID */ 1871 1856 sids[smmu_group->num_sids - 1] = sid; 1872 1857 smmu_group->sids = sids; 1873 - return 0; 1874 1858 1875 1859 out_put_group: 1860 + iommu_group_put(group); 1861 + return 0; 1862 + 1863 + out_remove_dev: 1864 + iommu_group_remove_device(dev); 1876 1865 iommu_group_put(group); 1877 1866 return ret; 1878 1867 } ··· 1956 1937 { 1957 1938 size_t qsz = ((1 << q->max_n_shift) * dwords) << 3; 1958 1939 1959 - q->base = dma_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL); 1940 + q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL); 1960 1941 if (!q->base) { 1961 1942 dev_err(smmu->dev, "failed to allocate queue (0x%zx bytes)\n", 1962 1943 qsz); ··· 1976 1957 return 0; 1977 1958 } 1978 1959 1979 - static void arm_smmu_free_one_queue(struct arm_smmu_device *smmu, 1980 - struct arm_smmu_queue *q) 1981 - { 1982 - size_t qsz = ((1 << q->max_n_shift) * q->ent_dwords) << 3; 1983 - 1984 - dma_free_coherent(smmu->dev, qsz, q->base, q->base_dma); 1985 - } 1986 - 1987 - static void arm_smmu_free_queues(struct arm_smmu_device *smmu) 1988 - { 1989 - arm_smmu_free_one_queue(smmu, &smmu->cmdq.q); 1990 - arm_smmu_free_one_queue(smmu, &smmu->evtq.q); 1991 - 1992 - if (smmu->features & ARM_SMMU_FEAT_PRI) 1993 - arm_smmu_free_one_queue(smmu, &smmu->priq.q); 1994 - } 1995 - 1996 1960 static int arm_smmu_init_queues(struct arm_smmu_device *smmu) 1997 1961 { 1998 1962 int ret; ··· 1985 1983 ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD, 1986 1984 ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS); 1987 1985 if (ret) 1988 - goto out; 1986 + return ret; 1989 1987 1990 1988 /* evtq */ 1991 1989 ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD, 1992 1990 ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS); 1993 1991 if (ret) 1994 - goto out_free_cmdq; 1992 + return ret; 1995 1993 1996 1994 /* priq */ 1997 1995 if (!(smmu->features & ARM_SMMU_FEAT_PRI)) 1998 1996 return 0; 1999 1997 2000 - ret = arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD, 2001 - ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS); 2002 - if (ret) 2003 - goto out_free_evtq; 2004 - 2005 - return 0; 2006 - 2007 - out_free_evtq: 2008 - arm_smmu_free_one_queue(smmu, &smmu->evtq.q); 2009 - out_free_cmdq: 2010 - arm_smmu_free_one_queue(smmu, &smmu->cmdq.q); 2011 - out: 2012 - return ret; 2013 - } 2014 - 2015 - static void arm_smmu_free_l2_strtab(struct arm_smmu_device *smmu) 2016 - { 2017 - int i; 2018 - size_t size; 2019 - struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; 2020 - 2021 - size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3); 2022 - for (i = 0; i < cfg->num_l1_ents; ++i) { 2023 - struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[i]; 2024 - 2025 - if (!desc->l2ptr) 2026 - continue; 2027 - 2028 - dma_free_coherent(smmu->dev, size, desc->l2ptr, 2029 - desc->l2ptr_dma); 2030 - } 1998 + return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD, 1999 + ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS); 2031 2000 } 2032 2001 2033 2002 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu) ··· 2027 2054 void *strtab; 2028 2055 u64 reg; 2029 2056 u32 size, l1size; 2030 - int ret; 2031 2057 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; 2032 2058 2033 2059 /* ··· 2049 2077 size, smmu->sid_bits); 2050 2078 2051 2079 l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3); 2052 - strtab = dma_zalloc_coherent(smmu->dev, l1size, &cfg->strtab_dma, 2053 - GFP_KERNEL); 2080 + strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma, 2081 + GFP_KERNEL | __GFP_ZERO); 2054 2082 if (!strtab) { 2055 2083 dev_err(smmu->dev, 2056 2084 "failed to allocate l1 stream table (%u bytes)\n", ··· 2067 2095 << STRTAB_BASE_CFG_SPLIT_SHIFT; 2068 2096 cfg->strtab_base_cfg = reg; 2069 2097 2070 - ret = arm_smmu_init_l1_strtab(smmu); 2071 - if (ret) 2072 - dma_free_coherent(smmu->dev, 2073 - l1size, 2074 - strtab, 2075 - cfg->strtab_dma); 2076 - return ret; 2098 + return arm_smmu_init_l1_strtab(smmu); 2077 2099 } 2078 2100 2079 2101 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu) ··· 2078 2112 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; 2079 2113 2080 2114 size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3); 2081 - strtab = dma_zalloc_coherent(smmu->dev, size, &cfg->strtab_dma, 2082 - GFP_KERNEL); 2115 + strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma, 2116 + GFP_KERNEL | __GFP_ZERO); 2083 2117 if (!strtab) { 2084 2118 dev_err(smmu->dev, 2085 2119 "failed to allocate linear stream table (%u bytes)\n", ··· 2123 2157 return 0; 2124 2158 } 2125 2159 2126 - static void arm_smmu_free_strtab(struct arm_smmu_device *smmu) 2127 - { 2128 - struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; 2129 - u32 size = cfg->num_l1_ents; 2130 - 2131 - if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) { 2132 - arm_smmu_free_l2_strtab(smmu); 2133 - size *= STRTAB_L1_DESC_DWORDS << 3; 2134 - } else { 2135 - size *= STRTAB_STE_DWORDS * 3; 2136 - } 2137 - 2138 - dma_free_coherent(smmu->dev, size, cfg->strtab, cfg->strtab_dma); 2139 - } 2140 - 2141 2160 static int arm_smmu_init_structures(struct arm_smmu_device *smmu) 2142 2161 { 2143 2162 int ret; ··· 2131 2180 if (ret) 2132 2181 return ret; 2133 2182 2134 - ret = arm_smmu_init_strtab(smmu); 2135 - if (ret) 2136 - goto out_free_queues; 2137 - 2138 - return 0; 2139 - 2140 - out_free_queues: 2141 - arm_smmu_free_queues(smmu); 2142 - return ret; 2143 - } 2144 - 2145 - static void arm_smmu_free_structures(struct arm_smmu_device *smmu) 2146 - { 2147 - arm_smmu_free_strtab(smmu); 2148 - arm_smmu_free_queues(smmu); 2183 + return arm_smmu_init_strtab(smmu); 2149 2184 } 2150 2185 2151 2186 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val, ··· 2469 2532 dev_warn(smmu->dev, "IDR0.COHACC overridden by dma-coherent property (%s)\n", 2470 2533 coherent ? "true" : "false"); 2471 2534 2472 - if (reg & IDR0_STALL_MODEL) 2535 + switch (reg & IDR0_STALL_MODEL_MASK << IDR0_STALL_MODEL_SHIFT) { 2536 + case IDR0_STALL_MODEL_STALL: 2537 + /* Fallthrough */ 2538 + case IDR0_STALL_MODEL_FORCE: 2473 2539 smmu->features |= ARM_SMMU_FEAT_STALLS; 2540 + } 2474 2541 2475 2542 if (reg & IDR0_S1P) 2476 2543 smmu->features |= ARM_SMMU_FEAT_TRANS_S1; ··· 2640 2699 platform_set_drvdata(pdev, smmu); 2641 2700 2642 2701 /* Reset the device */ 2643 - ret = arm_smmu_device_reset(smmu); 2644 - if (ret) 2645 - goto out_free_structures; 2646 - 2647 - return 0; 2648 - 2649 - out_free_structures: 2650 - arm_smmu_free_structures(smmu); 2651 - return ret; 2702 + return arm_smmu_device_reset(smmu); 2652 2703 } 2653 2704 2654 2705 static int arm_smmu_device_remove(struct platform_device *pdev) ··· 2648 2715 struct arm_smmu_device *smmu = platform_get_drvdata(pdev); 2649 2716 2650 2717 arm_smmu_device_disable(smmu); 2651 - arm_smmu_free_structures(smmu); 2652 2718 return 0; 2653 2719 } 2654 2720
+16 -7
drivers/iommu/arm-smmu.c
··· 582 582 } 583 583 584 584 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, 585 - bool leaf, void *cookie) 585 + size_t granule, bool leaf, void *cookie) 586 586 { 587 587 struct arm_smmu_domain *smmu_domain = cookie; 588 588 struct arm_smmu_cfg *cfg = &smmu_domain->cfg; ··· 597 597 if (!IS_ENABLED(CONFIG_64BIT) || smmu->version == ARM_SMMU_V1) { 598 598 iova &= ~12UL; 599 599 iova |= ARM_SMMU_CB_ASID(cfg); 600 - writel_relaxed(iova, reg); 600 + do { 601 + writel_relaxed(iova, reg); 602 + iova += granule; 603 + } while (size -= granule); 601 604 #ifdef CONFIG_64BIT 602 605 } else { 603 606 iova >>= 12; 604 607 iova |= (u64)ARM_SMMU_CB_ASID(cfg) << 48; 605 - writeq_relaxed(iova, reg); 608 + do { 609 + writeq_relaxed(iova, reg); 610 + iova += granule >> 12; 611 + } while (size -= granule); 606 612 #endif 607 613 } 608 614 #ifdef CONFIG_64BIT ··· 616 610 reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); 617 611 reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L : 618 612 ARM_SMMU_CB_S2_TLBIIPAS2; 619 - writeq_relaxed(iova >> 12, reg); 613 + iova >>= 12; 614 + do { 615 + writeq_relaxed(iova, reg); 616 + iova += granule >> 12; 617 + } while (size -= granule); 620 618 #endif 621 619 } else { 622 620 reg = ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_TLBIVMID; ··· 955 945 free_irq(irq, domain); 956 946 } 957 947 958 - if (smmu_domain->pgtbl_ops) 959 - free_io_pgtable_ops(smmu_domain->pgtbl_ops); 960 - 948 + free_io_pgtable_ops(smmu_domain->pgtbl_ops); 961 949 __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx); 962 950 } 963 951 ··· 1365 1357 if (IS_ERR(group)) 1366 1358 return PTR_ERR(group); 1367 1359 1360 + iommu_group_put(group); 1368 1361 return 0; 1369 1362 } 1370 1363
+9 -3
drivers/iommu/dmar.c
··· 1063 1063 1064 1064 raw_spin_lock_init(&iommu->register_lock); 1065 1065 1066 - drhd->iommu = iommu; 1067 - 1068 - if (intel_iommu_enabled) 1066 + if (intel_iommu_enabled) { 1069 1067 iommu->iommu_dev = iommu_device_create(NULL, iommu, 1070 1068 intel_iommu_groups, 1071 1069 "%s", iommu->name); 1070 + 1071 + if (IS_ERR(iommu->iommu_dev)) { 1072 + err = PTR_ERR(iommu->iommu_dev); 1073 + goto err_unmap; 1074 + } 1075 + } 1076 + 1077 + drhd->iommu = iommu; 1072 1078 1073 1079 return 0; 1074 1080
+26 -23
drivers/iommu/io-pgtable-arm.c
··· 38 38 #define io_pgtable_to_data(x) \ 39 39 container_of((x), struct arm_lpae_io_pgtable, iop) 40 40 41 - #define io_pgtable_ops_to_pgtable(x) \ 42 - container_of((x), struct io_pgtable, ops) 43 - 44 41 #define io_pgtable_ops_to_data(x) \ 45 42 io_pgtable_to_data(io_pgtable_ops_to_pgtable(x)) 46 43 ··· 55 58 ((((d)->levels - ((l) - ARM_LPAE_START_LVL(d) + 1)) \ 56 59 * (d)->bits_per_level) + (d)->pg_shift) 57 60 61 + #define ARM_LPAE_GRANULE(d) (1UL << (d)->pg_shift) 62 + 58 63 #define ARM_LPAE_PAGES_PER_PGD(d) \ 59 - DIV_ROUND_UP((d)->pgd_size, 1UL << (d)->pg_shift) 64 + DIV_ROUND_UP((d)->pgd_size, ARM_LPAE_GRANULE(d)) 60 65 61 66 /* 62 67 * Calculate the index at level l used to map virtual address a using the ··· 168 169 /* IOPTE accessors */ 169 170 #define iopte_deref(pte,d) \ 170 171 (__va((pte) & ((1ULL << ARM_LPAE_MAX_ADDR_BITS) - 1) \ 171 - & ~((1ULL << (d)->pg_shift) - 1))) 172 + & ~(ARM_LPAE_GRANULE(d) - 1ULL))) 172 173 173 174 #define iopte_type(pte,l) \ 174 175 (((pte) >> ARM_LPAE_PTE_TYPE_SHIFT) & ARM_LPAE_PTE_TYPE_MASK) ··· 325 326 /* Grab a pointer to the next level */ 326 327 pte = *ptep; 327 328 if (!pte) { 328 - cptep = __arm_lpae_alloc_pages(1UL << data->pg_shift, 329 + cptep = __arm_lpae_alloc_pages(ARM_LPAE_GRANULE(data), 329 330 GFP_ATOMIC, cfg); 330 331 if (!cptep) 331 332 return -ENOMEM; ··· 404 405 arm_lpae_iopte *start, *end; 405 406 unsigned long table_size; 406 407 407 - /* Only leaf entries at the last level */ 408 - if (lvl == ARM_LPAE_MAX_LEVELS - 1) 409 - return; 410 - 411 408 if (lvl == ARM_LPAE_START_LVL(data)) 412 409 table_size = data->pgd_size; 413 410 else 414 - table_size = 1UL << data->pg_shift; 411 + table_size = ARM_LPAE_GRANULE(data); 415 412 416 413 start = ptep; 417 - end = (void *)ptep + table_size; 414 + 415 + /* Only leaf entries at the last level */ 416 + if (lvl == ARM_LPAE_MAX_LEVELS - 1) 417 + end = ptep; 418 + else 419 + end = (void *)ptep + table_size; 418 420 419 421 while (ptep != end) { 420 422 arm_lpae_iopte pte = *ptep++; ··· 473 473 474 474 __arm_lpae_set_pte(ptep, table, cfg); 475 475 iova &= ~(blk_size - 1); 476 - cfg->tlb->tlb_add_flush(iova, blk_size, true, data->iop.cookie); 476 + cfg->tlb->tlb_add_flush(iova, blk_size, blk_size, true, data->iop.cookie); 477 477 return size; 478 478 } 479 479 ··· 486 486 void *cookie = data->iop.cookie; 487 487 size_t blk_size = ARM_LPAE_BLOCK_SIZE(lvl, data); 488 488 489 + /* Something went horribly wrong and we ran out of page table */ 490 + if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS)) 491 + return 0; 492 + 489 493 ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); 490 494 pte = *ptep; 491 - 492 - /* Something went horribly wrong and we ran out of page table */ 493 - if (WARN_ON(!pte || (lvl == ARM_LPAE_MAX_LEVELS))) 495 + if (WARN_ON(!pte)) 494 496 return 0; 495 497 496 498 /* If the size matches this level, we're in the right place */ ··· 501 499 502 500 if (!iopte_leaf(pte, lvl)) { 503 501 /* Also flush any partial walks */ 504 - tlb->tlb_add_flush(iova, size, false, cookie); 502 + tlb->tlb_add_flush(iova, size, ARM_LPAE_GRANULE(data), 503 + false, cookie); 505 504 tlb->tlb_sync(cookie); 506 505 ptep = iopte_deref(pte, data); 507 506 __arm_lpae_free_pgtable(data, lvl + 1, ptep); 508 507 } else { 509 - tlb->tlb_add_flush(iova, size, true, cookie); 508 + tlb->tlb_add_flush(iova, size, size, true, cookie); 510 509 } 511 510 512 511 return size; ··· 573 570 return 0; 574 571 575 572 found_translation: 576 - iova &= ((1 << data->pg_shift) - 1); 573 + iova &= (ARM_LPAE_GRANULE(data) - 1); 577 574 return ((phys_addr_t)iopte_to_pfn(pte,data) << data->pg_shift) | iova; 578 575 } 579 576 ··· 671 668 (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) | 672 669 (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT); 673 670 674 - switch (1 << data->pg_shift) { 671 + switch (ARM_LPAE_GRANULE(data)) { 675 672 case SZ_4K: 676 673 reg |= ARM_LPAE_TCR_TG0_4K; 677 674 break; ··· 772 769 773 770 sl = ARM_LPAE_START_LVL(data); 774 771 775 - switch (1 << data->pg_shift) { 772 + switch (ARM_LPAE_GRANULE(data)) { 776 773 case SZ_4K: 777 774 reg |= ARM_LPAE_TCR_TG0_4K; 778 775 sl++; /* SL0 format is different for 4K granule size */ ··· 892 889 WARN_ON(cookie != cfg_cookie); 893 890 } 894 891 895 - static void dummy_tlb_add_flush(unsigned long iova, size_t size, bool leaf, 896 - void *cookie) 892 + static void dummy_tlb_add_flush(unsigned long iova, size_t size, 893 + size_t granule, bool leaf, void *cookie) 897 894 { 898 895 WARN_ON(cookie != cfg_cookie); 899 896 WARN_ON(!(size & cfg_cookie->pgsize_bitmap));
+4 -2
drivers/iommu/io-pgtable.h
··· 26 26 */ 27 27 struct iommu_gather_ops { 28 28 void (*tlb_flush_all)(void *cookie); 29 - void (*tlb_add_flush)(unsigned long iova, size_t size, bool leaf, 30 - void *cookie); 29 + void (*tlb_add_flush)(unsigned long iova, size_t size, size_t granule, 30 + bool leaf, void *cookie); 31 31 void (*tlb_sync)(void *cookie); 32 32 }; 33 33 ··· 130 130 struct io_pgtable_cfg cfg; 131 131 struct io_pgtable_ops ops; 132 132 }; 133 + 134 + #define io_pgtable_ops_to_pgtable(x) container_of((x), struct io_pgtable, ops) 133 135 134 136 /** 135 137 * struct io_pgtable_init_fns - Alloc/free a set of page tables for a
+2 -2
drivers/iommu/ipmmu-vmsa.c
··· 277 277 ipmmu_tlb_invalidate(domain); 278 278 } 279 279 280 - static void ipmmu_tlb_add_flush(unsigned long iova, size_t size, bool leaf, 281 - void *cookie) 280 + static void ipmmu_tlb_add_flush(unsigned long iova, size_t size, 281 + size_t granule, bool leaf, void *cookie) 282 282 { 283 283 /* The hardware doesn't support selective TLB flush. */ 284 284 }
+7 -18
drivers/iommu/msm_iommu_dev.c
··· 359 359 .remove = msm_iommu_ctx_remove, 360 360 }; 361 361 362 + static struct platform_driver * const drivers[] = { 363 + &msm_iommu_driver, 364 + &msm_iommu_ctx_driver, 365 + }; 366 + 362 367 static int __init msm_iommu_driver_init(void) 363 368 { 364 - int ret; 365 - ret = platform_driver_register(&msm_iommu_driver); 366 - if (ret != 0) { 367 - pr_err("Failed to register IOMMU driver\n"); 368 - goto error; 369 - } 370 - 371 - ret = platform_driver_register(&msm_iommu_ctx_driver); 372 - if (ret != 0) { 373 - platform_driver_unregister(&msm_iommu_driver); 374 - pr_err("Failed to register IOMMU context driver\n"); 375 - goto error; 376 - } 377 - 378 - error: 379 - return ret; 369 + return platform_register_drivers(drivers, ARRAY_SIZE(drivers)); 380 370 } 381 371 382 372 static void __exit msm_iommu_driver_exit(void) 383 373 { 384 - platform_driver_unregister(&msm_iommu_ctx_driver); 385 - platform_driver_unregister(&msm_iommu_driver); 374 + platform_unregister_drivers(drivers, ARRAY_SIZE(drivers)); 386 375 } 387 376 388 377 subsys_initcall(msm_iommu_driver_init);
+2 -2
drivers/iommu/s390-iommu.c
··· 49 49 } 50 50 } 51 51 52 - struct iommu_domain *s390_domain_alloc(unsigned domain_type) 52 + static struct iommu_domain *s390_domain_alloc(unsigned domain_type) 53 53 { 54 54 struct s390_domain *s390_domain; 55 55 ··· 73 73 return &s390_domain->domain; 74 74 } 75 75 76 - void s390_domain_free(struct iommu_domain *domain) 76 + static void s390_domain_free(struct iommu_domain *domain) 77 77 { 78 78 struct s390_domain *s390_domain = to_s390_domain(domain); 79 79
-402
drivers/iommu/shmobile-iommu.c
··· 1 - /* 2 - * IOMMU for IPMMU/IPMMUI 3 - * Copyright (C) 2012 Hideki EIRAKU 4 - * 5 - * This program is free software; you can redistribute it and/or modify 6 - * it under the terms of the GNU General Public License as published by 7 - * the Free Software Foundation; version 2 of the License. 8 - */ 9 - 10 - #include <linux/dma-mapping.h> 11 - #include <linux/io.h> 12 - #include <linux/iommu.h> 13 - #include <linux/platform_device.h> 14 - #include <linux/sizes.h> 15 - #include <linux/slab.h> 16 - #include <asm/dma-iommu.h> 17 - #include "shmobile-ipmmu.h" 18 - 19 - #define L1_SIZE CONFIG_SHMOBILE_IOMMU_L1SIZE 20 - #define L1_LEN (L1_SIZE / 4) 21 - #define L1_ALIGN L1_SIZE 22 - #define L2_SIZE SZ_1K 23 - #define L2_LEN (L2_SIZE / 4) 24 - #define L2_ALIGN L2_SIZE 25 - 26 - struct shmobile_iommu_domain_pgtable { 27 - uint32_t *pgtable; 28 - dma_addr_t handle; 29 - }; 30 - 31 - struct shmobile_iommu_archdata { 32 - struct list_head attached_list; 33 - struct dma_iommu_mapping *iommu_mapping; 34 - spinlock_t attach_lock; 35 - struct shmobile_iommu_domain *attached; 36 - int num_attached_devices; 37 - struct shmobile_ipmmu *ipmmu; 38 - }; 39 - 40 - struct shmobile_iommu_domain { 41 - struct shmobile_iommu_domain_pgtable l1, l2[L1_LEN]; 42 - spinlock_t map_lock; 43 - spinlock_t attached_list_lock; 44 - struct list_head attached_list; 45 - struct iommu_domain domain; 46 - }; 47 - 48 - static struct shmobile_iommu_archdata *ipmmu_archdata; 49 - static struct kmem_cache *l1cache, *l2cache; 50 - 51 - static struct shmobile_iommu_domain *to_sh_domain(struct iommu_domain *dom) 52 - { 53 - return container_of(dom, struct shmobile_iommu_domain, domain); 54 - } 55 - 56 - static int pgtable_alloc(struct shmobile_iommu_domain_pgtable *pgtable, 57 - struct kmem_cache *cache, size_t size) 58 - { 59 - pgtable->pgtable = kmem_cache_zalloc(cache, GFP_ATOMIC); 60 - if (!pgtable->pgtable) 61 - return -ENOMEM; 62 - pgtable->handle = dma_map_single(NULL, pgtable->pgtable, size, 63 - DMA_TO_DEVICE); 64 - return 0; 65 - } 66 - 67 - static void pgtable_free(struct shmobile_iommu_domain_pgtable *pgtable, 68 - struct kmem_cache *cache, size_t size) 69 - { 70 - dma_unmap_single(NULL, pgtable->handle, size, DMA_TO_DEVICE); 71 - kmem_cache_free(cache, pgtable->pgtable); 72 - } 73 - 74 - static uint32_t pgtable_read(struct shmobile_iommu_domain_pgtable *pgtable, 75 - unsigned int index) 76 - { 77 - return pgtable->pgtable[index]; 78 - } 79 - 80 - static void pgtable_write(struct shmobile_iommu_domain_pgtable *pgtable, 81 - unsigned int index, unsigned int count, uint32_t val) 82 - { 83 - unsigned int i; 84 - 85 - for (i = 0; i < count; i++) 86 - pgtable->pgtable[index + i] = val; 87 - dma_sync_single_for_device(NULL, pgtable->handle + index * sizeof(val), 88 - sizeof(val) * count, DMA_TO_DEVICE); 89 - } 90 - 91 - static struct iommu_domain *shmobile_iommu_domain_alloc(unsigned type) 92 - { 93 - struct shmobile_iommu_domain *sh_domain; 94 - int i, ret; 95 - 96 - if (type != IOMMU_DOMAIN_UNMANAGED) 97 - return NULL; 98 - 99 - sh_domain = kzalloc(sizeof(*sh_domain), GFP_KERNEL); 100 - if (!sh_domain) 101 - return NULL; 102 - ret = pgtable_alloc(&sh_domain->l1, l1cache, L1_SIZE); 103 - if (ret < 0) { 104 - kfree(sh_domain); 105 - return NULL; 106 - } 107 - for (i = 0; i < L1_LEN; i++) 108 - sh_domain->l2[i].pgtable = NULL; 109 - spin_lock_init(&sh_domain->map_lock); 110 - spin_lock_init(&sh_domain->attached_list_lock); 111 - INIT_LIST_HEAD(&sh_domain->attached_list); 112 - return &sh_domain->domain; 113 - } 114 - 115 - static void shmobile_iommu_domain_free(struct iommu_domain *domain) 116 - { 117 - struct shmobile_iommu_domain *sh_domain = to_sh_domain(domain); 118 - int i; 119 - 120 - for (i = 0; i < L1_LEN; i++) { 121 - if (sh_domain->l2[i].pgtable) 122 - pgtable_free(&sh_domain->l2[i], l2cache, L2_SIZE); 123 - } 124 - pgtable_free(&sh_domain->l1, l1cache, L1_SIZE); 125 - kfree(sh_domain); 126 - } 127 - 128 - static int shmobile_iommu_attach_device(struct iommu_domain *domain, 129 - struct device *dev) 130 - { 131 - struct shmobile_iommu_archdata *archdata = dev->archdata.iommu; 132 - struct shmobile_iommu_domain *sh_domain = to_sh_domain(domain); 133 - int ret = -EBUSY; 134 - 135 - if (!archdata) 136 - return -ENODEV; 137 - spin_lock(&sh_domain->attached_list_lock); 138 - spin_lock(&archdata->attach_lock); 139 - if (archdata->attached != sh_domain) { 140 - if (archdata->attached) 141 - goto err; 142 - ipmmu_tlb_set(archdata->ipmmu, sh_domain->l1.handle, L1_SIZE, 143 - 0); 144 - ipmmu_tlb_flush(archdata->ipmmu); 145 - archdata->attached = sh_domain; 146 - archdata->num_attached_devices = 0; 147 - list_add(&archdata->attached_list, &sh_domain->attached_list); 148 - } 149 - archdata->num_attached_devices++; 150 - ret = 0; 151 - err: 152 - spin_unlock(&archdata->attach_lock); 153 - spin_unlock(&sh_domain->attached_list_lock); 154 - return ret; 155 - } 156 - 157 - static void shmobile_iommu_detach_device(struct iommu_domain *domain, 158 - struct device *dev) 159 - { 160 - struct shmobile_iommu_archdata *archdata = dev->archdata.iommu; 161 - struct shmobile_iommu_domain *sh_domain = to_sh_domain(domain); 162 - 163 - if (!archdata) 164 - return; 165 - spin_lock(&sh_domain->attached_list_lock); 166 - spin_lock(&archdata->attach_lock); 167 - archdata->num_attached_devices--; 168 - if (!archdata->num_attached_devices) { 169 - ipmmu_tlb_set(archdata->ipmmu, 0, 0, 0); 170 - ipmmu_tlb_flush(archdata->ipmmu); 171 - archdata->attached = NULL; 172 - list_del(&archdata->attached_list); 173 - } 174 - spin_unlock(&archdata->attach_lock); 175 - spin_unlock(&sh_domain->attached_list_lock); 176 - } 177 - 178 - static void domain_tlb_flush(struct shmobile_iommu_domain *sh_domain) 179 - { 180 - struct shmobile_iommu_archdata *archdata; 181 - 182 - spin_lock(&sh_domain->attached_list_lock); 183 - list_for_each_entry(archdata, &sh_domain->attached_list, attached_list) 184 - ipmmu_tlb_flush(archdata->ipmmu); 185 - spin_unlock(&sh_domain->attached_list_lock); 186 - } 187 - 188 - static int l2alloc(struct shmobile_iommu_domain *sh_domain, 189 - unsigned int l1index) 190 - { 191 - int ret; 192 - 193 - if (!sh_domain->l2[l1index].pgtable) { 194 - ret = pgtable_alloc(&sh_domain->l2[l1index], l2cache, L2_SIZE); 195 - if (ret < 0) 196 - return ret; 197 - } 198 - pgtable_write(&sh_domain->l1, l1index, 1, 199 - sh_domain->l2[l1index].handle | 0x1); 200 - return 0; 201 - } 202 - 203 - static void l2realfree(struct shmobile_iommu_domain_pgtable *l2) 204 - { 205 - if (l2->pgtable) 206 - pgtable_free(l2, l2cache, L2_SIZE); 207 - } 208 - 209 - static void l2free(struct shmobile_iommu_domain *sh_domain, 210 - unsigned int l1index, 211 - struct shmobile_iommu_domain_pgtable *l2) 212 - { 213 - pgtable_write(&sh_domain->l1, l1index, 1, 0); 214 - if (sh_domain->l2[l1index].pgtable) { 215 - *l2 = sh_domain->l2[l1index]; 216 - sh_domain->l2[l1index].pgtable = NULL; 217 - } 218 - } 219 - 220 - static int shmobile_iommu_map(struct iommu_domain *domain, unsigned long iova, 221 - phys_addr_t paddr, size_t size, int prot) 222 - { 223 - struct shmobile_iommu_domain_pgtable l2 = { .pgtable = NULL }; 224 - struct shmobile_iommu_domain *sh_domain = to_sh_domain(domain); 225 - unsigned int l1index, l2index; 226 - int ret; 227 - 228 - l1index = iova >> 20; 229 - switch (size) { 230 - case SZ_4K: 231 - l2index = (iova >> 12) & 0xff; 232 - spin_lock(&sh_domain->map_lock); 233 - ret = l2alloc(sh_domain, l1index); 234 - if (!ret) 235 - pgtable_write(&sh_domain->l2[l1index], l2index, 1, 236 - paddr | 0xff2); 237 - spin_unlock(&sh_domain->map_lock); 238 - break; 239 - case SZ_64K: 240 - l2index = (iova >> 12) & 0xf0; 241 - spin_lock(&sh_domain->map_lock); 242 - ret = l2alloc(sh_domain, l1index); 243 - if (!ret) 244 - pgtable_write(&sh_domain->l2[l1index], l2index, 0x10, 245 - paddr | 0xff1); 246 - spin_unlock(&sh_domain->map_lock); 247 - break; 248 - case SZ_1M: 249 - spin_lock(&sh_domain->map_lock); 250 - l2free(sh_domain, l1index, &l2); 251 - pgtable_write(&sh_domain->l1, l1index, 1, paddr | 0xc02); 252 - spin_unlock(&sh_domain->map_lock); 253 - ret = 0; 254 - break; 255 - default: 256 - ret = -EINVAL; 257 - } 258 - if (!ret) 259 - domain_tlb_flush(sh_domain); 260 - l2realfree(&l2); 261 - return ret; 262 - } 263 - 264 - static size_t shmobile_iommu_unmap(struct iommu_domain *domain, 265 - unsigned long iova, size_t size) 266 - { 267 - struct shmobile_iommu_domain_pgtable l2 = { .pgtable = NULL }; 268 - struct shmobile_iommu_domain *sh_domain = to_sh_domain(domain); 269 - unsigned int l1index, l2index; 270 - uint32_t l2entry = 0; 271 - size_t ret = 0; 272 - 273 - l1index = iova >> 20; 274 - if (!(iova & 0xfffff) && size >= SZ_1M) { 275 - spin_lock(&sh_domain->map_lock); 276 - l2free(sh_domain, l1index, &l2); 277 - spin_unlock(&sh_domain->map_lock); 278 - ret = SZ_1M; 279 - goto done; 280 - } 281 - l2index = (iova >> 12) & 0xff; 282 - spin_lock(&sh_domain->map_lock); 283 - if (sh_domain->l2[l1index].pgtable) 284 - l2entry = pgtable_read(&sh_domain->l2[l1index], l2index); 285 - switch (l2entry & 3) { 286 - case 1: 287 - if (l2index & 0xf) 288 - break; 289 - pgtable_write(&sh_domain->l2[l1index], l2index, 0x10, 0); 290 - ret = SZ_64K; 291 - break; 292 - case 2: 293 - pgtable_write(&sh_domain->l2[l1index], l2index, 1, 0); 294 - ret = SZ_4K; 295 - break; 296 - } 297 - spin_unlock(&sh_domain->map_lock); 298 - done: 299 - if (ret) 300 - domain_tlb_flush(sh_domain); 301 - l2realfree(&l2); 302 - return ret; 303 - } 304 - 305 - static phys_addr_t shmobile_iommu_iova_to_phys(struct iommu_domain *domain, 306 - dma_addr_t iova) 307 - { 308 - struct shmobile_iommu_domain *sh_domain = to_sh_domain(domain); 309 - uint32_t l1entry = 0, l2entry = 0; 310 - unsigned int l1index, l2index; 311 - 312 - l1index = iova >> 20; 313 - l2index = (iova >> 12) & 0xff; 314 - spin_lock(&sh_domain->map_lock); 315 - if (sh_domain->l2[l1index].pgtable) 316 - l2entry = pgtable_read(&sh_domain->l2[l1index], l2index); 317 - else 318 - l1entry = pgtable_read(&sh_domain->l1, l1index); 319 - spin_unlock(&sh_domain->map_lock); 320 - switch (l2entry & 3) { 321 - case 1: 322 - return (l2entry & ~0xffff) | (iova & 0xffff); 323 - case 2: 324 - return (l2entry & ~0xfff) | (iova & 0xfff); 325 - default: 326 - if ((l1entry & 3) == 2) 327 - return (l1entry & ~0xfffff) | (iova & 0xfffff); 328 - return 0; 329 - } 330 - } 331 - 332 - static int find_dev_name(struct shmobile_ipmmu *ipmmu, const char *dev_name) 333 - { 334 - unsigned int i, n = ipmmu->num_dev_names; 335 - 336 - for (i = 0; i < n; i++) { 337 - if (strcmp(ipmmu->dev_names[i], dev_name) == 0) 338 - return 1; 339 - } 340 - return 0; 341 - } 342 - 343 - static int shmobile_iommu_add_device(struct device *dev) 344 - { 345 - struct shmobile_iommu_archdata *archdata = ipmmu_archdata; 346 - struct dma_iommu_mapping *mapping; 347 - 348 - if (!find_dev_name(archdata->ipmmu, dev_name(dev))) 349 - return 0; 350 - mapping = archdata->iommu_mapping; 351 - if (!mapping) { 352 - mapping = arm_iommu_create_mapping(&platform_bus_type, 0, 353 - L1_LEN << 20); 354 - if (IS_ERR(mapping)) 355 - return PTR_ERR(mapping); 356 - archdata->iommu_mapping = mapping; 357 - } 358 - dev->archdata.iommu = archdata; 359 - if (arm_iommu_attach_device(dev, mapping)) 360 - pr_err("arm_iommu_attach_device failed\n"); 361 - return 0; 362 - } 363 - 364 - static const struct iommu_ops shmobile_iommu_ops = { 365 - .domain_alloc = shmobile_iommu_domain_alloc, 366 - .domain_free = shmobile_iommu_domain_free, 367 - .attach_dev = shmobile_iommu_attach_device, 368 - .detach_dev = shmobile_iommu_detach_device, 369 - .map = shmobile_iommu_map, 370 - .unmap = shmobile_iommu_unmap, 371 - .map_sg = default_iommu_map_sg, 372 - .iova_to_phys = shmobile_iommu_iova_to_phys, 373 - .add_device = shmobile_iommu_add_device, 374 - .pgsize_bitmap = SZ_1M | SZ_64K | SZ_4K, 375 - }; 376 - 377 - int ipmmu_iommu_init(struct shmobile_ipmmu *ipmmu) 378 - { 379 - static struct shmobile_iommu_archdata *archdata; 380 - 381 - l1cache = kmem_cache_create("shmobile-iommu-pgtable1", L1_SIZE, 382 - L1_ALIGN, SLAB_HWCACHE_ALIGN, NULL); 383 - if (!l1cache) 384 - return -ENOMEM; 385 - l2cache = kmem_cache_create("shmobile-iommu-pgtable2", L2_SIZE, 386 - L2_ALIGN, SLAB_HWCACHE_ALIGN, NULL); 387 - if (!l2cache) { 388 - kmem_cache_destroy(l1cache); 389 - return -ENOMEM; 390 - } 391 - archdata = kzalloc(sizeof(*archdata), GFP_KERNEL); 392 - if (!archdata) { 393 - kmem_cache_destroy(l1cache); 394 - kmem_cache_destroy(l2cache); 395 - return -ENOMEM; 396 - } 397 - spin_lock_init(&archdata->attach_lock); 398 - archdata->ipmmu = ipmmu; 399 - ipmmu_archdata = archdata; 400 - bus_set_iommu(&platform_bus_type, &shmobile_iommu_ops); 401 - return 0; 402 - }
-129
drivers/iommu/shmobile-ipmmu.c
··· 1 - /* 2 - * IPMMU/IPMMUI 3 - * Copyright (C) 2012 Hideki EIRAKU 4 - * 5 - * This program is free software; you can redistribute it and/or modify 6 - * it under the terms of the GNU General Public License as published by 7 - * the Free Software Foundation; version 2 of the License. 8 - */ 9 - 10 - #include <linux/err.h> 11 - #include <linux/export.h> 12 - #include <linux/io.h> 13 - #include <linux/platform_device.h> 14 - #include <linux/slab.h> 15 - #include <linux/platform_data/sh_ipmmu.h> 16 - #include "shmobile-ipmmu.h" 17 - 18 - #define IMCTR1 0x000 19 - #define IMCTR2 0x004 20 - #define IMASID 0x010 21 - #define IMTTBR 0x014 22 - #define IMTTBCR 0x018 23 - 24 - #define IMCTR1_TLBEN (1 << 0) 25 - #define IMCTR1_FLUSH (1 << 1) 26 - 27 - static void ipmmu_reg_write(struct shmobile_ipmmu *ipmmu, unsigned long reg_off, 28 - unsigned long data) 29 - { 30 - iowrite32(data, ipmmu->ipmmu_base + reg_off); 31 - } 32 - 33 - void ipmmu_tlb_flush(struct shmobile_ipmmu *ipmmu) 34 - { 35 - if (!ipmmu) 36 - return; 37 - 38 - spin_lock(&ipmmu->flush_lock); 39 - if (ipmmu->tlb_enabled) 40 - ipmmu_reg_write(ipmmu, IMCTR1, IMCTR1_FLUSH | IMCTR1_TLBEN); 41 - else 42 - ipmmu_reg_write(ipmmu, IMCTR1, IMCTR1_FLUSH); 43 - spin_unlock(&ipmmu->flush_lock); 44 - } 45 - 46 - void ipmmu_tlb_set(struct shmobile_ipmmu *ipmmu, unsigned long phys, int size, 47 - int asid) 48 - { 49 - if (!ipmmu) 50 - return; 51 - 52 - spin_lock(&ipmmu->flush_lock); 53 - switch (size) { 54 - default: 55 - ipmmu->tlb_enabled = 0; 56 - break; 57 - case 0x2000: 58 - ipmmu_reg_write(ipmmu, IMTTBCR, 1); 59 - ipmmu->tlb_enabled = 1; 60 - break; 61 - case 0x1000: 62 - ipmmu_reg_write(ipmmu, IMTTBCR, 2); 63 - ipmmu->tlb_enabled = 1; 64 - break; 65 - case 0x800: 66 - ipmmu_reg_write(ipmmu, IMTTBCR, 3); 67 - ipmmu->tlb_enabled = 1; 68 - break; 69 - case 0x400: 70 - ipmmu_reg_write(ipmmu, IMTTBCR, 4); 71 - ipmmu->tlb_enabled = 1; 72 - break; 73 - case 0x200: 74 - ipmmu_reg_write(ipmmu, IMTTBCR, 5); 75 - ipmmu->tlb_enabled = 1; 76 - break; 77 - case 0x100: 78 - ipmmu_reg_write(ipmmu, IMTTBCR, 6); 79 - ipmmu->tlb_enabled = 1; 80 - break; 81 - case 0x80: 82 - ipmmu_reg_write(ipmmu, IMTTBCR, 7); 83 - ipmmu->tlb_enabled = 1; 84 - break; 85 - } 86 - ipmmu_reg_write(ipmmu, IMTTBR, phys); 87 - ipmmu_reg_write(ipmmu, IMASID, asid); 88 - spin_unlock(&ipmmu->flush_lock); 89 - } 90 - 91 - static int ipmmu_probe(struct platform_device *pdev) 92 - { 93 - struct shmobile_ipmmu *ipmmu; 94 - struct resource *res; 95 - struct shmobile_ipmmu_platform_data *pdata = pdev->dev.platform_data; 96 - 97 - ipmmu = devm_kzalloc(&pdev->dev, sizeof(*ipmmu), GFP_KERNEL); 98 - if (!ipmmu) { 99 - dev_err(&pdev->dev, "cannot allocate device data\n"); 100 - return -ENOMEM; 101 - } 102 - spin_lock_init(&ipmmu->flush_lock); 103 - ipmmu->dev = &pdev->dev; 104 - 105 - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); 106 - ipmmu->ipmmu_base = devm_ioremap_resource(&pdev->dev, res); 107 - if (IS_ERR(ipmmu->ipmmu_base)) 108 - return PTR_ERR(ipmmu->ipmmu_base); 109 - 110 - ipmmu->dev_names = pdata->dev_names; 111 - ipmmu->num_dev_names = pdata->num_dev_names; 112 - platform_set_drvdata(pdev, ipmmu); 113 - ipmmu_reg_write(ipmmu, IMCTR1, 0x0); /* disable TLB */ 114 - ipmmu_reg_write(ipmmu, IMCTR2, 0x0); /* disable PMB */ 115 - return ipmmu_iommu_init(ipmmu); 116 - } 117 - 118 - static struct platform_driver ipmmu_driver = { 119 - .probe = ipmmu_probe, 120 - .driver = { 121 - .name = "ipmmu", 122 - }, 123 - }; 124 - 125 - static int __init ipmmu_init(void) 126 - { 127 - return platform_driver_register(&ipmmu_driver); 128 - } 129 - subsys_initcall(ipmmu_init);
-34
drivers/iommu/shmobile-ipmmu.h
··· 1 - /* shmobile-ipmmu.h 2 - * 3 - * Copyright (C) 2012 Hideki EIRAKU 4 - * 5 - * This program is free software; you can redistribute it and/or modify 6 - * it under the terms of the GNU General Public License as published by 7 - * the Free Software Foundation; version 2 of the License. 8 - */ 9 - 10 - #ifndef __SHMOBILE_IPMMU_H__ 11 - #define __SHMOBILE_IPMMU_H__ 12 - 13 - struct shmobile_ipmmu { 14 - struct device *dev; 15 - void __iomem *ipmmu_base; 16 - int tlb_enabled; 17 - spinlock_t flush_lock; 18 - const char * const *dev_names; 19 - unsigned int num_dev_names; 20 - }; 21 - 22 - #ifdef CONFIG_SHMOBILE_IPMMU_TLB 23 - void ipmmu_tlb_flush(struct shmobile_ipmmu *ipmmu); 24 - void ipmmu_tlb_set(struct shmobile_ipmmu *ipmmu, unsigned long phys, int size, 25 - int asid); 26 - int ipmmu_iommu_init(struct shmobile_ipmmu *ipmmu); 27 - #else 28 - static inline int ipmmu_iommu_init(struct shmobile_ipmmu *ipmmu) 29 - { 30 - return -EINVAL; 31 - } 32 - #endif 33 - 34 - #endif /* __SHMOBILE_IPMMU_H__ */