Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'iommu-v4.15-rc1' of git://github.com/awilliam/linux-vfio

Pull IOMMU updates from Alex Williamson:
"As Joerg mentioned[1], he's out on paternity leave through the end of
the year and I'm filling in for him in the interim:

- Enforce MSI multiple IRQ alignment in AMD IOMMU

- VT-d PASID error handling fixes

- Add r8a7795 IPMMU support

- Manage runtime PM links on exynos at {add,remove}_device callbacks

- Fix Mediatek driver name to avoid conflict

- Add terminate support to qcom fault handler

- 64-bit IOVA optimizations

- Simplfy IOVA domain destruction, better use of rcache, and skip
anchor nodes on copy

- Convert to IOMMU TLB sync API in io-pgtable-arm{-v7s}

- Drop command queue lock when waiting for CMD_SYNC completion on ARM
SMMU implementations supporting MSI to cacheable memory

- iomu-vmsa cleanup inspired by missed IOTLB sync callbacks

- Fix sleeping lock with preemption disabled for RT

- Dual MMU support for TI DRA7xx DSPs

- Optional flush option on IOVA allocation avoiding overhead when
caller can try other options

[1] https://lkml.org/lkml/2017/10/22/72"

* tag 'iommu-v4.15-rc1' of git://github.com/awilliam/linux-vfio: (54 commits)
iommu/iova: Use raw_cpu_ptr() instead of get_cpu_ptr() for ->fq
iommu/mediatek: Fix driver name
iommu/ipmmu-vmsa: Hook up r8a7795 DT matching code
iommu/ipmmu-vmsa: Allow two bit SL0
iommu/ipmmu-vmsa: Make IMBUSCTR setup optional
iommu/ipmmu-vmsa: Write IMCTR twice
iommu/ipmmu-vmsa: IPMMU device is 40-bit bus master
iommu/ipmmu-vmsa: Make use of IOMMU_OF_DECLARE()
iommu/ipmmu-vmsa: Enable multi context support
iommu/ipmmu-vmsa: Add optional root device feature
iommu/ipmmu-vmsa: Introduce features, break out alias
iommu/ipmmu-vmsa: Unify ipmmu_ops
iommu/ipmmu-vmsa: Clean up struct ipmmu_vmsa_iommu_priv
iommu/ipmmu-vmsa: Simplify group allocation
iommu/ipmmu-vmsa: Unify domain alloc/free
iommu/ipmmu-vmsa: Fix return value check in ipmmu_find_group_dma()
iommu/vt-d: Clear pasid table entry when memory unbound
iommu/vt-d: Clear Page Request Overflow fault bit
iommu/vt-d: Missing checks for pasid tables if allocation fails
iommu/amd: Limit the IOVA page range to the specified addresses
...

+1007 -651
+1 -2
drivers/gpu/drm/tegra/drm.c
··· 155 155 156 156 order = __ffs(tegra->domain->pgsize_bitmap); 157 157 init_iova_domain(&tegra->carveout.domain, 1UL << order, 158 - carveout_start >> order, 159 - carveout_end >> order); 158 + carveout_start >> order); 160 159 161 160 tegra->carveout.shift = iova_shift(&tegra->carveout.domain); 162 161 tegra->carveout.limit = carveout_end >> tegra->carveout.shift;
+1 -2
drivers/gpu/host1x/dev.c
··· 198 198 199 199 order = __ffs(host->domain->pgsize_bitmap); 200 200 init_iova_domain(&host->iova, 1UL << order, 201 - geometry->aperture_start >> order, 202 - geometry->aperture_end >> order); 201 + geometry->aperture_start >> order); 203 202 host->iova_end = geometry->aperture_end; 204 203 } 205 204
+24 -19
drivers/iommu/amd_iommu.c
··· 63 63 /* IO virtual address start page frame number */ 64 64 #define IOVA_START_PFN (1) 65 65 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) 66 - #define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32)) 67 66 68 67 /* Reserved IOVA ranges */ 69 68 #define MSI_RANGE_START (0xfee00000) ··· 1546 1547 1547 1548 if (dma_mask > DMA_BIT_MASK(32)) 1548 1549 pfn = alloc_iova_fast(&dma_dom->iovad, pages, 1549 - IOVA_PFN(DMA_BIT_MASK(32))); 1550 + IOVA_PFN(DMA_BIT_MASK(32)), false); 1550 1551 1551 1552 if (!pfn) 1552 - pfn = alloc_iova_fast(&dma_dom->iovad, pages, IOVA_PFN(dma_mask)); 1553 + pfn = alloc_iova_fast(&dma_dom->iovad, pages, 1554 + IOVA_PFN(dma_mask), true); 1553 1555 1554 1556 return (pfn << PAGE_SHIFT); 1555 1557 } ··· 1788 1788 if (!dma_dom->domain.pt_root) 1789 1789 goto free_dma_dom; 1790 1790 1791 - init_iova_domain(&dma_dom->iovad, PAGE_SIZE, 1792 - IOVA_START_PFN, DMA_32BIT_PFN); 1791 + init_iova_domain(&dma_dom->iovad, PAGE_SIZE, IOVA_START_PFN); 1793 1792 1794 1793 if (init_iova_flush_queue(&dma_dom->iovad, iova_domain_flush_tlb, NULL)) 1795 1794 goto free_dma_dom; ··· 2382 2383 size_t size, 2383 2384 int dir) 2384 2385 { 2385 - dma_addr_t flush_addr; 2386 2386 dma_addr_t i, start; 2387 2387 unsigned int pages; 2388 2388 2389 - flush_addr = dma_addr; 2390 2389 pages = iommu_num_pages(dma_addr, size, PAGE_SIZE); 2391 2390 dma_addr &= PAGE_MASK; 2392 2391 start = dma_addr; ··· 2693 2696 struct pci_dev *pdev = NULL; 2694 2697 struct iova *val; 2695 2698 2696 - init_iova_domain(&reserved_iova_ranges, PAGE_SIZE, 2697 - IOVA_START_PFN, DMA_32BIT_PFN); 2699 + init_iova_domain(&reserved_iova_ranges, PAGE_SIZE, IOVA_START_PFN); 2698 2700 2699 2701 lockdep_set_class(&reserved_iova_ranges.iova_rbtree_lock, 2700 2702 &reserved_rbtree_key); ··· 3151 3155 unsigned long start, end; 3152 3156 3153 3157 start = IOVA_PFN(region->start); 3154 - end = IOVA_PFN(region->start + region->length); 3158 + end = IOVA_PFN(region->start + region->length - 1); 3155 3159 3156 3160 WARN_ON_ONCE(reserve_iova(&dma_dom->iovad, start, end) == NULL); 3157 3161 } ··· 3659 3663 return table; 3660 3664 } 3661 3665 3662 - static int alloc_irq_index(u16 devid, int count) 3666 + static int alloc_irq_index(u16 devid, int count, bool align) 3663 3667 { 3664 3668 struct irq_remap_table *table; 3669 + int index, c, alignment = 1; 3665 3670 unsigned long flags; 3666 - int index, c; 3667 3671 struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; 3668 3672 3669 3673 if (!iommu) ··· 3673 3677 if (!table) 3674 3678 return -ENODEV; 3675 3679 3680 + if (align) 3681 + alignment = roundup_pow_of_two(count); 3682 + 3676 3683 spin_lock_irqsave(&table->lock, flags); 3677 3684 3678 3685 /* Scan table for free entries */ 3679 - for (c = 0, index = table->min_index; 3680 - index < MAX_IRQS_PER_TABLE; 3681 - ++index) { 3682 - if (!iommu->irte_ops->is_allocated(table, index)) 3686 + for (index = ALIGN(table->min_index, alignment), c = 0; 3687 + index < MAX_IRQS_PER_TABLE;) { 3688 + if (!iommu->irte_ops->is_allocated(table, index)) { 3683 3689 c += 1; 3684 - else 3685 - c = 0; 3690 + } else { 3691 + c = 0; 3692 + index = ALIGN(index + 1, alignment); 3693 + continue; 3694 + } 3686 3695 3687 3696 if (c == count) { 3688 3697 for (; c != 0; --c) ··· 3696 3695 index -= count - 1; 3697 3696 goto out; 3698 3697 } 3698 + 3699 + index++; 3699 3700 } 3700 3701 3701 3702 index = -ENOSPC; ··· 4102 4099 else 4103 4100 ret = -ENOMEM; 4104 4101 } else { 4105 - index = alloc_irq_index(devid, nr_irqs); 4102 + bool align = (info->type == X86_IRQ_ALLOC_TYPE_MSI); 4103 + 4104 + index = alloc_irq_index(devid, nr_irqs, align); 4106 4105 } 4107 4106 if (index < 0) { 4108 4107 pr_warn("Failed to allocate IRTE\n");
+150 -66
drivers/iommu/arm-smmu-v3.c
··· 316 316 #define ARM64_TCR_TBI0_MASK 0x1UL 317 317 318 318 #define CTXDESC_CD_0_AA64 (1UL << 41) 319 + #define CTXDESC_CD_0_S (1UL << 44) 319 320 #define CTXDESC_CD_0_R (1UL << 45) 320 321 #define CTXDESC_CD_0_A (1UL << 46) 321 322 #define CTXDESC_CD_0_ASET_SHIFT 47 ··· 378 377 379 378 #define CMDQ_SYNC_0_CS_SHIFT 12 380 379 #define CMDQ_SYNC_0_CS_NONE (0UL << CMDQ_SYNC_0_CS_SHIFT) 380 + #define CMDQ_SYNC_0_CS_IRQ (1UL << CMDQ_SYNC_0_CS_SHIFT) 381 381 #define CMDQ_SYNC_0_CS_SEV (2UL << CMDQ_SYNC_0_CS_SHIFT) 382 + #define CMDQ_SYNC_0_MSH_SHIFT 22 383 + #define CMDQ_SYNC_0_MSH_ISH (3UL << CMDQ_SYNC_0_MSH_SHIFT) 384 + #define CMDQ_SYNC_0_MSIATTR_SHIFT 24 385 + #define CMDQ_SYNC_0_MSIATTR_OIWB (0xfUL << CMDQ_SYNC_0_MSIATTR_SHIFT) 386 + #define CMDQ_SYNC_0_MSIDATA_SHIFT 32 387 + #define CMDQ_SYNC_0_MSIDATA_MASK 0xffffffffUL 388 + #define CMDQ_SYNC_1_MSIADDR_SHIFT 0 389 + #define CMDQ_SYNC_1_MSIADDR_MASK 0xffffffffffffcUL 382 390 383 391 /* Event queue */ 384 392 #define EVTQ_ENT_DWORDS 4 ··· 418 408 419 409 /* High-level queue structures */ 420 410 #define ARM_SMMU_POLL_TIMEOUT_US 100 421 - #define ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US 1000000 /* 1s! */ 411 + #define ARM_SMMU_CMDQ_SYNC_TIMEOUT_US 1000000 /* 1s! */ 412 + #define ARM_SMMU_CMDQ_SYNC_SPIN_COUNT 10 422 413 423 414 #define MSI_IOVA_BASE 0x8000000 424 415 #define MSI_IOVA_LENGTH 0x100000 425 - 426 - /* Until ACPICA headers cover IORT rev. C */ 427 - #ifndef ACPI_IORT_SMMU_HISILICON_HI161X 428 - #define ACPI_IORT_SMMU_HISILICON_HI161X 0x1 429 - #endif 430 - 431 - #ifndef ACPI_IORT_SMMU_V3_CAVIUM_CN99XX 432 - #define ACPI_IORT_SMMU_V3_CAVIUM_CN99XX 0x2 433 - #endif 434 416 435 417 static bool disable_bypass; 436 418 module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO); ··· 506 504 } pri; 507 505 508 506 #define CMDQ_OP_CMD_SYNC 0x46 507 + struct { 508 + u32 msidata; 509 + u64 msiaddr; 510 + } sync; 509 511 }; 510 512 }; 511 513 ··· 610 604 #define ARM_SMMU_FEAT_TRANS_S2 (1 << 10) 611 605 #define ARM_SMMU_FEAT_STALLS (1 << 11) 612 606 #define ARM_SMMU_FEAT_HYP (1 << 12) 607 + #define ARM_SMMU_FEAT_STALL_FORCE (1 << 13) 613 608 u32 features; 614 609 615 610 #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) ··· 623 616 624 617 int gerr_irq; 625 618 int combined_irq; 619 + atomic_t sync_nr; 626 620 627 621 unsigned long ias; /* IPA */ 628 622 unsigned long oas; /* PA */ ··· 641 633 unsigned int sid_bits; 642 634 643 635 struct arm_smmu_strtab_cfg strtab_cfg; 636 + 637 + u32 sync_count; 644 638 645 639 /* IOMMU core code handle */ 646 640 struct iommu_device iommu; ··· 767 757 * Wait for the SMMU to consume items. If drain is true, wait until the queue 768 758 * is empty. Otherwise, wait until there is at least one free slot. 769 759 */ 770 - static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe) 760 + static int queue_poll_cons(struct arm_smmu_queue *q, bool sync, bool wfe) 771 761 { 772 762 ktime_t timeout; 773 - unsigned int delay = 1; 763 + unsigned int delay = 1, spin_cnt = 0; 774 764 775 - /* Wait longer if it's queue drain */ 776 - timeout = ktime_add_us(ktime_get(), drain ? 777 - ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US : 765 + /* Wait longer if it's a CMD_SYNC */ 766 + timeout = ktime_add_us(ktime_get(), sync ? 767 + ARM_SMMU_CMDQ_SYNC_TIMEOUT_US : 778 768 ARM_SMMU_POLL_TIMEOUT_US); 779 769 780 - while (queue_sync_cons(q), (drain ? !queue_empty(q) : queue_full(q))) { 770 + while (queue_sync_cons(q), (sync ? !queue_empty(q) : queue_full(q))) { 781 771 if (ktime_compare(ktime_get(), timeout) > 0) 782 772 return -ETIMEDOUT; 783 773 784 774 if (wfe) { 785 775 wfe(); 786 - } else { 776 + } else if (++spin_cnt < ARM_SMMU_CMDQ_SYNC_SPIN_COUNT) { 787 777 cpu_relax(); 778 + continue; 779 + } else { 788 780 udelay(delay); 789 781 delay *= 2; 782 + spin_cnt = 0; 790 783 } 791 784 } 792 785 ··· 891 878 } 892 879 break; 893 880 case CMDQ_OP_CMD_SYNC: 894 - cmd[0] |= CMDQ_SYNC_0_CS_SEV; 881 + if (ent->sync.msiaddr) 882 + cmd[0] |= CMDQ_SYNC_0_CS_IRQ; 883 + else 884 + cmd[0] |= CMDQ_SYNC_0_CS_SEV; 885 + cmd[0] |= CMDQ_SYNC_0_MSH_ISH | CMDQ_SYNC_0_MSIATTR_OIWB; 886 + cmd[0] |= (u64)ent->sync.msidata << CMDQ_SYNC_0_MSIDATA_SHIFT; 887 + cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK; 895 888 break; 896 889 default: 897 890 return -ENOENT; ··· 955 936 queue_write(Q_ENT(q, cons), cmd, q->ent_dwords); 956 937 } 957 938 939 + static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd) 940 + { 941 + struct arm_smmu_queue *q = &smmu->cmdq.q; 942 + bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV); 943 + 944 + while (queue_insert_raw(q, cmd) == -ENOSPC) { 945 + if (queue_poll_cons(q, false, wfe)) 946 + dev_err_ratelimited(smmu->dev, "CMDQ timeout\n"); 947 + } 948 + } 949 + 958 950 static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, 959 951 struct arm_smmu_cmdq_ent *ent) 960 952 { 961 953 u64 cmd[CMDQ_ENT_DWORDS]; 962 954 unsigned long flags; 963 - bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV); 964 - struct arm_smmu_queue *q = &smmu->cmdq.q; 965 955 966 956 if (arm_smmu_cmdq_build_cmd(cmd, ent)) { 967 957 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n", ··· 979 951 } 980 952 981 953 spin_lock_irqsave(&smmu->cmdq.lock, flags); 982 - while (queue_insert_raw(q, cmd) == -ENOSPC) { 983 - if (queue_poll_cons(q, false, wfe)) 984 - dev_err_ratelimited(smmu->dev, "CMDQ timeout\n"); 985 - } 986 - 987 - if (ent->opcode == CMDQ_OP_CMD_SYNC && queue_poll_cons(q, true, wfe)) 988 - dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n"); 954 + arm_smmu_cmdq_insert_cmd(smmu, cmd); 989 955 spin_unlock_irqrestore(&smmu->cmdq.lock, flags); 956 + } 957 + 958 + /* 959 + * The difference between val and sync_idx is bounded by the maximum size of 960 + * a queue at 2^20 entries, so 32 bits is plenty for wrap-safe arithmetic. 961 + */ 962 + static int __arm_smmu_sync_poll_msi(struct arm_smmu_device *smmu, u32 sync_idx) 963 + { 964 + ktime_t timeout; 965 + u32 val; 966 + 967 + timeout = ktime_add_us(ktime_get(), ARM_SMMU_CMDQ_SYNC_TIMEOUT_US); 968 + val = smp_cond_load_acquire(&smmu->sync_count, 969 + (int)(VAL - sync_idx) >= 0 || 970 + !ktime_before(ktime_get(), timeout)); 971 + 972 + return (int)(val - sync_idx) < 0 ? -ETIMEDOUT : 0; 973 + } 974 + 975 + static int __arm_smmu_cmdq_issue_sync_msi(struct arm_smmu_device *smmu) 976 + { 977 + u64 cmd[CMDQ_ENT_DWORDS]; 978 + unsigned long flags; 979 + struct arm_smmu_cmdq_ent ent = { 980 + .opcode = CMDQ_OP_CMD_SYNC, 981 + .sync = { 982 + .msidata = atomic_inc_return_relaxed(&smmu->sync_nr), 983 + .msiaddr = virt_to_phys(&smmu->sync_count), 984 + }, 985 + }; 986 + 987 + arm_smmu_cmdq_build_cmd(cmd, &ent); 988 + 989 + spin_lock_irqsave(&smmu->cmdq.lock, flags); 990 + arm_smmu_cmdq_insert_cmd(smmu, cmd); 991 + spin_unlock_irqrestore(&smmu->cmdq.lock, flags); 992 + 993 + return __arm_smmu_sync_poll_msi(smmu, ent.sync.msidata); 994 + } 995 + 996 + static int __arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu) 997 + { 998 + u64 cmd[CMDQ_ENT_DWORDS]; 999 + unsigned long flags; 1000 + bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV); 1001 + struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC }; 1002 + int ret; 1003 + 1004 + arm_smmu_cmdq_build_cmd(cmd, &ent); 1005 + 1006 + spin_lock_irqsave(&smmu->cmdq.lock, flags); 1007 + arm_smmu_cmdq_insert_cmd(smmu, cmd); 1008 + ret = queue_poll_cons(&smmu->cmdq.q, true, wfe); 1009 + spin_unlock_irqrestore(&smmu->cmdq.lock, flags); 1010 + 1011 + return ret; 1012 + } 1013 + 1014 + static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu) 1015 + { 1016 + int ret; 1017 + bool msi = (smmu->features & ARM_SMMU_FEAT_MSI) && 1018 + (smmu->features & ARM_SMMU_FEAT_COHERENCY); 1019 + 1020 + ret = msi ? __arm_smmu_cmdq_issue_sync_msi(smmu) 1021 + : __arm_smmu_cmdq_issue_sync(smmu); 1022 + if (ret) 1023 + dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n"); 990 1024 } 991 1025 992 1026 /* Context descriptor manipulation functions */ ··· 1086 996 CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET_PRIVATE | 1087 997 CTXDESC_CD_0_AA64 | (u64)cfg->cd.asid << CTXDESC_CD_0_ASID_SHIFT | 1088 998 CTXDESC_CD_0_V; 999 + 1000 + /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */ 1001 + if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE) 1002 + val |= CTXDESC_CD_0_S; 1003 + 1089 1004 cfg->cdptr[0] = cpu_to_le64(val); 1090 1005 1091 1006 val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK << CTXDESC_CD_1_TTB0_SHIFT; ··· 1124 1029 }; 1125 1030 1126 1031 arm_smmu_cmdq_issue_cmd(smmu, &cmd); 1127 - cmd.opcode = CMDQ_OP_CMD_SYNC; 1128 - arm_smmu_cmdq_issue_cmd(smmu, &cmd); 1032 + arm_smmu_cmdq_issue_sync(smmu); 1129 1033 } 1130 1034 1131 1035 static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, ··· 1188 1094 dst[1] = cpu_to_le64(STRTAB_STE_1_SHCFG_INCOMING 1189 1095 << STRTAB_STE_1_SHCFG_SHIFT); 1190 1096 dst[2] = 0; /* Nuke the VMID */ 1191 - if (ste_live) 1097 + /* 1098 + * The SMMU can perform negative caching, so we must sync 1099 + * the STE regardless of whether the old value was live. 1100 + */ 1101 + if (smmu) 1192 1102 arm_smmu_sync_ste_for_sid(smmu, sid); 1193 1103 return; 1194 1104 } ··· 1210 1112 #endif 1211 1113 STRTAB_STE_1_STRW_NSEL1 << STRTAB_STE_1_STRW_SHIFT); 1212 1114 1213 - if (smmu->features & ARM_SMMU_FEAT_STALLS) 1115 + if (smmu->features & ARM_SMMU_FEAT_STALLS && 1116 + !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE)) 1214 1117 dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD); 1215 1118 1216 1119 val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK ··· 1374 1275 return IRQ_HANDLED; 1375 1276 } 1376 1277 1377 - static irqreturn_t arm_smmu_cmdq_sync_handler(int irq, void *dev) 1378 - { 1379 - /* We don't actually use CMD_SYNC interrupts for anything */ 1380 - return IRQ_HANDLED; 1381 - } 1382 - 1383 1278 static int arm_smmu_device_disable(struct arm_smmu_device *smmu); 1384 1279 1385 1280 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev) ··· 1406 1313 if (active & GERROR_MSI_EVTQ_ABT_ERR) 1407 1314 dev_warn(smmu->dev, "EVTQ MSI write aborted\n"); 1408 1315 1409 - if (active & GERROR_MSI_CMDQ_ABT_ERR) { 1316 + if (active & GERROR_MSI_CMDQ_ABT_ERR) 1410 1317 dev_warn(smmu->dev, "CMDQ MSI write aborted\n"); 1411 - arm_smmu_cmdq_sync_handler(irq, smmu->dev); 1412 - } 1413 1318 1414 1319 if (active & GERROR_PRIQ_ABT_ERR) 1415 1320 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n"); ··· 1436 1345 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev) 1437 1346 { 1438 1347 arm_smmu_gerror_handler(irq, dev); 1439 - arm_smmu_cmdq_sync_handler(irq, dev); 1440 1348 return IRQ_WAKE_THREAD; 1441 1349 } 1442 1350 1443 1351 /* IO_PGTABLE API */ 1444 1352 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu) 1445 1353 { 1446 - struct arm_smmu_cmdq_ent cmd; 1447 - 1448 - cmd.opcode = CMDQ_OP_CMD_SYNC; 1449 - arm_smmu_cmdq_issue_cmd(smmu, &cmd); 1354 + arm_smmu_cmdq_issue_sync(smmu); 1450 1355 } 1451 1356 1452 1357 static void arm_smmu_tlb_sync(void *cookie) ··· 1830 1743 return ops->unmap(ops, iova, size); 1831 1744 } 1832 1745 1746 + static void arm_smmu_iotlb_sync(struct iommu_domain *domain) 1747 + { 1748 + struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu; 1749 + 1750 + if (smmu) 1751 + __arm_smmu_tlb_sync(smmu); 1752 + } 1753 + 1833 1754 static phys_addr_t 1834 1755 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) 1835 1756 { ··· 2058 1963 .map = arm_smmu_map, 2059 1964 .unmap = arm_smmu_unmap, 2060 1965 .map_sg = default_iommu_map_sg, 1966 + .flush_iotlb_all = arm_smmu_iotlb_sync, 1967 + .iotlb_sync = arm_smmu_iotlb_sync, 2061 1968 .iova_to_phys = arm_smmu_iova_to_phys, 2062 1969 .add_device = arm_smmu_add_device, 2063 1970 .remove_device = arm_smmu_remove_device, ··· 2244 2147 { 2245 2148 int ret; 2246 2149 2150 + atomic_set(&smmu->sync_nr, 0); 2247 2151 ret = arm_smmu_init_queues(smmu); 2248 2152 if (ret) 2249 2153 return ret; ··· 2361 2263 "arm-smmu-v3-evtq", smmu); 2362 2264 if (ret < 0) 2363 2265 dev_warn(smmu->dev, "failed to enable evtq irq\n"); 2364 - } 2365 - 2366 - irq = smmu->cmdq.q.irq; 2367 - if (irq) { 2368 - ret = devm_request_irq(smmu->dev, irq, 2369 - arm_smmu_cmdq_sync_handler, 0, 2370 - "arm-smmu-v3-cmdq-sync", smmu); 2371 - if (ret < 0) 2372 - dev_warn(smmu->dev, "failed to enable cmdq-sync irq\n"); 2373 2266 } 2374 2267 2375 2268 irq = smmu->gerr_irq; ··· 2488 2399 /* Invalidate any cached configuration */ 2489 2400 cmd.opcode = CMDQ_OP_CFGI_ALL; 2490 2401 arm_smmu_cmdq_issue_cmd(smmu, &cmd); 2491 - cmd.opcode = CMDQ_OP_CMD_SYNC; 2492 - arm_smmu_cmdq_issue_cmd(smmu, &cmd); 2402 + arm_smmu_cmdq_issue_sync(smmu); 2493 2403 2494 2404 /* Invalidate any stale TLB entries */ 2495 2405 if (smmu->features & ARM_SMMU_FEAT_HYP) { ··· 2498 2410 2499 2411 cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL; 2500 2412 arm_smmu_cmdq_issue_cmd(smmu, &cmd); 2501 - cmd.opcode = CMDQ_OP_CMD_SYNC; 2502 - arm_smmu_cmdq_issue_cmd(smmu, &cmd); 2413 + arm_smmu_cmdq_issue_sync(smmu); 2503 2414 2504 2415 /* Event queue */ 2505 2416 writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE); ··· 2619 2532 * register, but warn on mismatch. 2620 2533 */ 2621 2534 if (!!(reg & IDR0_COHACC) != coherent) 2622 - dev_warn(smmu->dev, "IDR0.COHACC overridden by dma-coherent property (%s)\n", 2535 + dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n", 2623 2536 coherent ? "true" : "false"); 2624 2537 2625 2538 switch (reg & IDR0_STALL_MODEL_MASK << IDR0_STALL_MODEL_SHIFT) { 2626 - case IDR0_STALL_MODEL_STALL: 2627 - /* Fallthrough */ 2628 2539 case IDR0_STALL_MODEL_FORCE: 2540 + smmu->features |= ARM_SMMU_FEAT_STALL_FORCE; 2541 + /* Fallthrough */ 2542 + case IDR0_STALL_MODEL_STALL: 2629 2543 smmu->features |= ARM_SMMU_FEAT_STALLS; 2630 2544 } 2631 2545 ··· 2753 2665 case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX: 2754 2666 smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY; 2755 2667 break; 2756 - case ACPI_IORT_SMMU_HISILICON_HI161X: 2668 + case ACPI_IORT_SMMU_V3_HISILICON_HI161X: 2757 2669 smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH; 2758 2670 break; 2759 2671 } ··· 2870 2782 irq = platform_get_irq_byname(pdev, "priq"); 2871 2783 if (irq > 0) 2872 2784 smmu->priq.q.irq = irq; 2873 - 2874 - irq = platform_get_irq_byname(pdev, "cmdq-sync"); 2875 - if (irq > 0) 2876 - smmu->cmdq.q.irq = irq; 2877 2785 2878 2786 irq = platform_get_irq_byname(pdev, "gerror"); 2879 2787 if (irq > 0)
+17 -14
drivers/iommu/arm-smmu.c
··· 59 59 #define ARM_MMU500_ACTLR_CPRE (1 << 1) 60 60 61 61 #define ARM_MMU500_ACR_CACHE_LOCK (1 << 26) 62 + #define ARM_MMU500_ACR_S2CRB_TLBEN (1 << 10) 62 63 #define ARM_MMU500_ACR_SMTNMB_TLBEN (1 << 8) 63 64 64 65 #define TLB_LOOP_TIMEOUT 1000000 /* 1s! */ ··· 119 118 ARM_MMU500, 120 119 CAVIUM_SMMUV2, 121 120 }; 122 - 123 - /* Until ACPICA headers cover IORT rev. C */ 124 - #ifndef ACPI_IORT_SMMU_CORELINK_MMU401 125 - #define ACPI_IORT_SMMU_CORELINK_MMU401 0x4 126 - #endif 127 - #ifndef ACPI_IORT_SMMU_CAVIUM_THUNDERX 128 - #define ACPI_IORT_SMMU_CAVIUM_THUNDERX 0x5 129 - #endif 130 121 131 122 struct arm_smmu_s2cr { 132 123 struct iommu_group *group; ··· 243 250 struct arm_smmu_domain { 244 251 struct arm_smmu_device *smmu; 245 252 struct io_pgtable_ops *pgtbl_ops; 253 + const struct iommu_gather_ops *tlb_ops; 246 254 struct arm_smmu_cfg cfg; 247 255 enum arm_smmu_domain_stage stage; 248 256 struct mutex init_mutex; /* Protects smmu pointer */ ··· 729 735 enum io_pgtable_fmt fmt; 730 736 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 731 737 struct arm_smmu_cfg *cfg = &smmu_domain->cfg; 732 - const struct iommu_gather_ops *tlb_ops; 733 738 734 739 mutex_lock(&smmu_domain->init_mutex); 735 740 if (smmu_domain->smmu) ··· 806 813 ias = min(ias, 32UL); 807 814 oas = min(oas, 32UL); 808 815 } 809 - tlb_ops = &arm_smmu_s1_tlb_ops; 816 + smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops; 810 817 break; 811 818 case ARM_SMMU_DOMAIN_NESTED: 812 819 /* ··· 826 833 oas = min(oas, 40UL); 827 834 } 828 835 if (smmu->version == ARM_SMMU_V2) 829 - tlb_ops = &arm_smmu_s2_tlb_ops_v2; 836 + smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2; 830 837 else 831 - tlb_ops = &arm_smmu_s2_tlb_ops_v1; 838 + smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1; 832 839 break; 833 840 default: 834 841 ret = -EINVAL; ··· 856 863 .pgsize_bitmap = smmu->pgsize_bitmap, 857 864 .ias = ias, 858 865 .oas = oas, 859 - .tlb = tlb_ops, 866 + .tlb = smmu_domain->tlb_ops, 860 867 .iommu_dev = smmu->dev, 861 868 }; 862 869 ··· 1252 1259 return ops->unmap(ops, iova, size); 1253 1260 } 1254 1261 1262 + static void arm_smmu_iotlb_sync(struct iommu_domain *domain) 1263 + { 1264 + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 1265 + 1266 + if (smmu_domain->tlb_ops) 1267 + smmu_domain->tlb_ops->tlb_sync(smmu_domain); 1268 + } 1269 + 1255 1270 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, 1256 1271 dma_addr_t iova) 1257 1272 { ··· 1563 1562 .map = arm_smmu_map, 1564 1563 .unmap = arm_smmu_unmap, 1565 1564 .map_sg = default_iommu_map_sg, 1565 + .flush_iotlb_all = arm_smmu_iotlb_sync, 1566 + .iotlb_sync = arm_smmu_iotlb_sync, 1566 1567 .iova_to_phys = arm_smmu_iova_to_phys, 1567 1568 .add_device = arm_smmu_add_device, 1568 1569 .remove_device = arm_smmu_remove_device, ··· 1609 1606 * Allow unmatched Stream IDs to allocate bypass 1610 1607 * TLB entries for reduced latency. 1611 1608 */ 1612 - reg |= ARM_MMU500_ACR_SMTNMB_TLBEN; 1609 + reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN; 1613 1610 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR); 1614 1611 } 1615 1612
+5 -19
drivers/iommu/dma-iommu.c
··· 292 292 /* ...then finally give it a kicking to make sure it fits */ 293 293 base_pfn = max_t(unsigned long, base_pfn, 294 294 domain->geometry.aperture_start >> order); 295 - end_pfn = min_t(unsigned long, end_pfn, 296 - domain->geometry.aperture_end >> order); 297 295 } 298 - /* 299 - * PCI devices may have larger DMA masks, but still prefer allocating 300 - * within a 32-bit mask to avoid DAC addressing. Such limitations don't 301 - * apply to the typical platform device, so for those we may as well 302 - * leave the cache limit at the top of their range to save an rb_last() 303 - * traversal on every allocation. 304 - */ 305 - if (dev && dev_is_pci(dev)) 306 - end_pfn &= DMA_BIT_MASK(32) >> order; 307 296 308 297 /* start_pfn is always nonzero for an already-initialised domain */ 309 298 if (iovad->start_pfn) { ··· 301 312 pr_warn("Incompatible range for DMA domain\n"); 302 313 return -EFAULT; 303 314 } 304 - /* 305 - * If we have devices with different DMA masks, move the free 306 - * area cache limit down for the benefit of the smaller one. 307 - */ 308 - iovad->dma_32bit_pfn = min(end_pfn + 1, iovad->dma_32bit_pfn); 309 315 310 316 return 0; 311 317 } 312 318 313 - init_iova_domain(iovad, 1UL << order, base_pfn, end_pfn); 319 + init_iova_domain(iovad, 1UL << order, base_pfn); 314 320 if (!dev) 315 321 return 0; 316 322 ··· 370 386 371 387 /* Try to get PCI devices a SAC address */ 372 388 if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev)) 373 - iova = alloc_iova_fast(iovad, iova_len, DMA_BIT_MASK(32) >> shift); 389 + iova = alloc_iova_fast(iovad, iova_len, 390 + DMA_BIT_MASK(32) >> shift, false); 374 391 375 392 if (!iova) 376 - iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift); 393 + iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift, 394 + true); 377 395 378 396 return (dma_addr_t)iova << shift; 379 397 }
+7 -3
drivers/iommu/dmar.c
··· 801 801 dmar_free_pci_notify_info(info); 802 802 } 803 803 } 804 - 805 - bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb); 806 804 } 807 805 808 806 return dmar_dev_scope_status; 807 + } 808 + 809 + void dmar_register_bus_notifier(void) 810 + { 811 + bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb); 809 812 } 810 813 811 814 ··· 1679 1676 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1680 1677 } 1681 1678 1682 - writel(DMA_FSTS_PFO | DMA_FSTS_PPF, iommu->reg + DMAR_FSTS_REG); 1679 + writel(DMA_FSTS_PFO | DMA_FSTS_PPF | DMA_FSTS_PRO, 1680 + iommu->reg + DMAR_FSTS_REG); 1683 1681 1684 1682 unlock_exit: 1685 1683 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
+16 -7
drivers/iommu/exynos-iommu.c
··· 263 263 struct sysmmu_drvdata { 264 264 struct device *sysmmu; /* SYSMMU controller device */ 265 265 struct device *master; /* master device (owner) */ 266 + struct device_link *link; /* runtime PM link to master */ 266 267 void __iomem *sfrbase; /* our registers */ 267 268 struct clk *clk; /* SYSMMU's clock */ 268 269 struct clk *aclk; /* SYSMMU's aclk clock */ ··· 1251 1250 1252 1251 static int exynos_iommu_add_device(struct device *dev) 1253 1252 { 1253 + struct exynos_iommu_owner *owner = dev->archdata.iommu; 1254 + struct sysmmu_drvdata *data; 1254 1255 struct iommu_group *group; 1255 1256 1256 1257 if (!has_sysmmu(dev)) ··· 1263 1260 if (IS_ERR(group)) 1264 1261 return PTR_ERR(group); 1265 1262 1263 + list_for_each_entry(data, &owner->controllers, owner_node) { 1264 + /* 1265 + * SYSMMU will be runtime activated via device link 1266 + * (dependency) to its master device, so there are no 1267 + * direct calls to pm_runtime_get/put in this driver. 1268 + */ 1269 + data->link = device_link_add(dev, data->sysmmu, 1270 + DL_FLAG_PM_RUNTIME); 1271 + } 1266 1272 iommu_group_put(group); 1267 1273 1268 1274 return 0; ··· 1280 1268 static void exynos_iommu_remove_device(struct device *dev) 1281 1269 { 1282 1270 struct exynos_iommu_owner *owner = dev->archdata.iommu; 1271 + struct sysmmu_drvdata *data; 1283 1272 1284 1273 if (!has_sysmmu(dev)) 1285 1274 return; ··· 1296 1283 } 1297 1284 } 1298 1285 iommu_group_remove_device(dev); 1286 + 1287 + list_for_each_entry(data, &owner->controllers, owner_node) 1288 + device_link_del(data->link); 1299 1289 } 1300 1290 1301 1291 static int exynos_iommu_of_xlate(struct device *dev, ··· 1331 1315 1332 1316 list_add_tail(&data->owner_node, &owner->controllers); 1333 1317 data->master = dev; 1334 - 1335 - /* 1336 - * SYSMMU will be runtime activated via device link (dependency) to its 1337 - * master device, so there are no direct calls to pm_runtime_get/put 1338 - * in this driver. 1339 - */ 1340 - device_link_add(dev, data->sysmmu, DL_FLAG_PM_RUNTIME); 1341 1318 1342 1319 return 0; 1343 1320 }
+17 -11
drivers/iommu/intel-iommu.c
··· 82 82 #define IOVA_START_PFN (1) 83 83 84 84 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) 85 - #define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32)) 86 - #define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64)) 87 85 88 86 /* page table handling */ 89 87 #define LEVEL_STRIDE (9) ··· 1876 1878 struct iova *iova; 1877 1879 int i; 1878 1880 1879 - init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN, 1880 - DMA_32BIT_PFN); 1881 + init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN); 1881 1882 1882 1883 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock, 1883 1884 &reserved_rbtree_key); ··· 1935 1938 unsigned long sagaw; 1936 1939 int err; 1937 1940 1938 - init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN, 1939 - DMA_32BIT_PFN); 1941 + init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); 1940 1942 1941 1943 err = init_iova_flush_queue(&domain->iovad, 1942 1944 iommu_flush_iova, iova_entry_free); ··· 2054 2058 if (context_copied(context)) { 2055 2059 u16 did_old = context_domain_id(context); 2056 2060 2057 - if (did_old >= 0 && did_old < cap_ndoms(iommu->cap)) { 2061 + if (did_old < cap_ndoms(iommu->cap)) { 2058 2062 iommu->flush.flush_context(iommu, did_old, 2059 2063 (((u16)bus) << 8) | devfn, 2060 2064 DMA_CCMD_MASK_NOBIT, ··· 3469 3473 * from higher range 3470 3474 */ 3471 3475 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages, 3472 - IOVA_PFN(DMA_BIT_MASK(32))); 3476 + IOVA_PFN(DMA_BIT_MASK(32)), false); 3473 3477 if (iova_pfn) 3474 3478 return iova_pfn; 3475 3479 } 3476 - iova_pfn = alloc_iova_fast(&domain->iovad, nrpages, IOVA_PFN(dma_mask)); 3480 + iova_pfn = alloc_iova_fast(&domain->iovad, nrpages, 3481 + IOVA_PFN(dma_mask), true); 3477 3482 if (unlikely(!iova_pfn)) { 3478 3483 pr_err("Allocating %ld-page iova for %s failed", 3479 3484 nrpages, dev_name(dev)); ··· 4749 4752 goto out_free_dmar; 4750 4753 } 4751 4754 4755 + up_write(&dmar_global_lock); 4756 + 4757 + /* 4758 + * The bus notifier takes the dmar_global_lock, so lockdep will 4759 + * complain later when we register it under the lock. 4760 + */ 4761 + dmar_register_bus_notifier(); 4762 + 4763 + down_write(&dmar_global_lock); 4764 + 4752 4765 if (no_iommu || dmar_disabled) { 4753 4766 /* 4754 4767 * We exit the function here to ensure IOMMU's remapping and ··· 4904 4897 { 4905 4898 int adjust_width; 4906 4899 4907 - init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN, 4908 - DMA_32BIT_PFN); 4900 + init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); 4909 4901 domain_reserve_special_ranges(domain); 4910 4902 4911 4903 /* calculate AGAW */
+3 -1
drivers/iommu/intel-svm.c
··· 292 292 int pasid_max; 293 293 int ret; 294 294 295 - if (WARN_ON(!iommu)) 295 + if (WARN_ON(!iommu || !iommu->pasid_table)) 296 296 return -EINVAL; 297 297 298 298 if (dev_is_pci(dev)) { ··· 458 458 kfree_rcu(sdev, rcu); 459 459 460 460 if (list_empty(&svm->devs)) { 461 + svm->iommu->pasid_table[svm->pasid].val = 0; 462 + wmb(); 461 463 462 464 idr_remove(&svm->iommu->pasid_idr, svm->pasid); 463 465 if (svm->mm)
+1 -6
drivers/iommu/io-pgtable-arm-v7s.c
··· 660 660 size_t size) 661 661 { 662 662 struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops); 663 - size_t unmapped; 664 663 665 664 if (WARN_ON(upper_32_bits(iova))) 666 665 return 0; 667 666 668 - unmapped = __arm_v7s_unmap(data, iova, size, 1, data->pgd); 669 - if (unmapped) 670 - io_pgtable_tlb_sync(&data->iop); 671 - 672 - return unmapped; 667 + return __arm_v7s_unmap(data, iova, size, 1, data->pgd); 673 668 } 674 669 675 670 static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops,
+1 -6
drivers/iommu/io-pgtable-arm.c
··· 609 609 static int arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova, 610 610 size_t size) 611 611 { 612 - size_t unmapped; 613 612 struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); 614 613 arm_lpae_iopte *ptep = data->pgd; 615 614 int lvl = ARM_LPAE_START_LVL(data); ··· 616 617 if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias))) 617 618 return 0; 618 619 619 - unmapped = __arm_lpae_unmap(data, iova, size, lvl, ptep); 620 - if (unmapped) 621 - io_pgtable_tlb_sync(&data->iop); 622 - 623 - return unmapped; 620 + return __arm_lpae_unmap(data, iova, size, lvl, ptep); 624 621 } 625 622 626 623 static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
+89 -133
drivers/iommu/iova.c
··· 24 24 #include <linux/bitops.h> 25 25 #include <linux/cpu.h> 26 26 27 + /* The anchor node sits above the top of the usable address space */ 28 + #define IOVA_ANCHOR ~0UL 29 + 27 30 static bool iova_rcache_insert(struct iova_domain *iovad, 28 31 unsigned long pfn, 29 32 unsigned long size); ··· 40 37 41 38 void 42 39 init_iova_domain(struct iova_domain *iovad, unsigned long granule, 43 - unsigned long start_pfn, unsigned long pfn_32bit) 40 + unsigned long start_pfn) 44 41 { 45 42 /* 46 43 * IOVA granularity will normally be equal to the smallest ··· 51 48 52 49 spin_lock_init(&iovad->iova_rbtree_lock); 53 50 iovad->rbroot = RB_ROOT; 54 - iovad->cached32_node = NULL; 51 + iovad->cached_node = &iovad->anchor.node; 52 + iovad->cached32_node = &iovad->anchor.node; 55 53 iovad->granule = granule; 56 54 iovad->start_pfn = start_pfn; 57 - iovad->dma_32bit_pfn = pfn_32bit + 1; 55 + iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad)); 58 56 iovad->flush_cb = NULL; 59 57 iovad->fq = NULL; 58 + iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR; 59 + rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node); 60 + rb_insert_color(&iovad->anchor.node, &iovad->rbroot); 60 61 init_iova_rcaches(iovad); 61 62 } 62 63 EXPORT_SYMBOL_GPL(init_iova_domain); ··· 115 108 EXPORT_SYMBOL_GPL(init_iova_flush_queue); 116 109 117 110 static struct rb_node * 118 - __get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn) 111 + __get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn) 119 112 { 120 - if ((*limit_pfn > iovad->dma_32bit_pfn) || 121 - (iovad->cached32_node == NULL)) 122 - return rb_last(&iovad->rbroot); 123 - else { 124 - struct rb_node *prev_node = rb_prev(iovad->cached32_node); 125 - struct iova *curr_iova = 126 - rb_entry(iovad->cached32_node, struct iova, node); 127 - *limit_pfn = curr_iova->pfn_lo; 128 - return prev_node; 129 - } 113 + if (limit_pfn <= iovad->dma_32bit_pfn) 114 + return iovad->cached32_node; 115 + 116 + return iovad->cached_node; 130 117 } 131 118 132 119 static void 133 - __cached_rbnode_insert_update(struct iova_domain *iovad, 134 - unsigned long limit_pfn, struct iova *new) 120 + __cached_rbnode_insert_update(struct iova_domain *iovad, struct iova *new) 135 121 { 136 - if (limit_pfn != iovad->dma_32bit_pfn) 137 - return; 138 - iovad->cached32_node = &new->node; 122 + if (new->pfn_hi < iovad->dma_32bit_pfn) 123 + iovad->cached32_node = &new->node; 124 + else 125 + iovad->cached_node = &new->node; 139 126 } 140 127 141 128 static void 142 129 __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) 143 130 { 144 131 struct iova *cached_iova; 145 - struct rb_node *curr; 146 132 147 - if (!iovad->cached32_node) 148 - return; 149 - curr = iovad->cached32_node; 150 - cached_iova = rb_entry(curr, struct iova, node); 133 + cached_iova = rb_entry(iovad->cached32_node, struct iova, node); 134 + if (free->pfn_hi < iovad->dma_32bit_pfn && 135 + free->pfn_lo >= cached_iova->pfn_lo) 136 + iovad->cached32_node = rb_next(&free->node); 151 137 152 - if (free->pfn_lo >= cached_iova->pfn_lo) { 153 - struct rb_node *node = rb_next(&free->node); 154 - struct iova *iova = rb_entry(node, struct iova, node); 155 - 156 - /* only cache if it's below 32bit pfn */ 157 - if (node && iova->pfn_lo < iovad->dma_32bit_pfn) 158 - iovad->cached32_node = node; 159 - else 160 - iovad->cached32_node = NULL; 161 - } 138 + cached_iova = rb_entry(iovad->cached_node, struct iova, node); 139 + if (free->pfn_lo >= cached_iova->pfn_lo) 140 + iovad->cached_node = rb_next(&free->node); 162 141 } 163 142 164 143 /* Insert the iova into domain rbtree by holding writer lock */ ··· 175 182 rb_insert_color(&iova->node, root); 176 183 } 177 184 178 - /* 179 - * Computes the padding size required, to make the start address 180 - * naturally aligned on the power-of-two order of its size 181 - */ 182 - static unsigned int 183 - iova_get_pad_size(unsigned int size, unsigned int limit_pfn) 184 - { 185 - return (limit_pfn - size) & (__roundup_pow_of_two(size) - 1); 186 - } 187 - 188 185 static int __alloc_and_insert_iova_range(struct iova_domain *iovad, 189 186 unsigned long size, unsigned long limit_pfn, 190 187 struct iova *new, bool size_aligned) 191 188 { 192 - struct rb_node *prev, *curr = NULL; 189 + struct rb_node *curr, *prev; 190 + struct iova *curr_iova; 193 191 unsigned long flags; 194 - unsigned long saved_pfn; 195 - unsigned int pad_size = 0; 192 + unsigned long new_pfn; 193 + unsigned long align_mask = ~0UL; 194 + 195 + if (size_aligned) 196 + align_mask <<= fls_long(size - 1); 196 197 197 198 /* Walk the tree backwards */ 198 199 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); 199 - saved_pfn = limit_pfn; 200 - curr = __get_cached_rbnode(iovad, &limit_pfn); 201 - prev = curr; 202 - while (curr) { 203 - struct iova *curr_iova = rb_entry(curr, struct iova, node); 204 - 205 - if (limit_pfn <= curr_iova->pfn_lo) { 206 - goto move_left; 207 - } else if (limit_pfn > curr_iova->pfn_hi) { 208 - if (size_aligned) 209 - pad_size = iova_get_pad_size(size, limit_pfn); 210 - if ((curr_iova->pfn_hi + size + pad_size) < limit_pfn) 211 - break; /* found a free slot */ 212 - } 213 - limit_pfn = curr_iova->pfn_lo; 214 - move_left: 200 + curr = __get_cached_rbnode(iovad, limit_pfn); 201 + curr_iova = rb_entry(curr, struct iova, node); 202 + do { 203 + limit_pfn = min(limit_pfn, curr_iova->pfn_lo); 204 + new_pfn = (limit_pfn - size) & align_mask; 215 205 prev = curr; 216 206 curr = rb_prev(curr); 217 - } 207 + curr_iova = rb_entry(curr, struct iova, node); 208 + } while (curr && new_pfn <= curr_iova->pfn_hi); 218 209 219 - if (!curr) { 220 - if (size_aligned) 221 - pad_size = iova_get_pad_size(size, limit_pfn); 222 - if ((iovad->start_pfn + size + pad_size) > limit_pfn) { 223 - spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 224 - return -ENOMEM; 225 - } 210 + if (limit_pfn < size || new_pfn < iovad->start_pfn) { 211 + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 212 + return -ENOMEM; 226 213 } 227 214 228 215 /* pfn_lo will point to size aligned address if size_aligned is set */ 229 - new->pfn_lo = limit_pfn - (size + pad_size); 216 + new->pfn_lo = new_pfn; 230 217 new->pfn_hi = new->pfn_lo + size - 1; 231 218 232 219 /* If we have 'prev', it's a valid place to start the insertion. */ 233 220 iova_insert_rbtree(&iovad->rbroot, new, prev); 234 - __cached_rbnode_insert_update(iovad, saved_pfn, new); 221 + __cached_rbnode_insert_update(iovad, new); 235 222 236 223 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 237 224 ··· 231 258 232 259 void free_iova_mem(struct iova *iova) 233 260 { 234 - kmem_cache_free(iova_cache, iova); 261 + if (iova->pfn_lo != IOVA_ANCHOR) 262 + kmem_cache_free(iova_cache, iova); 235 263 } 236 264 EXPORT_SYMBOL(free_iova_mem); 237 265 ··· 316 342 while (node) { 317 343 struct iova *iova = rb_entry(node, struct iova, node); 318 344 319 - /* If pfn falls within iova's range, return iova */ 320 - if ((pfn >= iova->pfn_lo) && (pfn <= iova->pfn_hi)) { 321 - return iova; 322 - } 323 - 324 345 if (pfn < iova->pfn_lo) 325 346 node = node->rb_left; 326 - else if (pfn > iova->pfn_lo) 347 + else if (pfn > iova->pfn_hi) 327 348 node = node->rb_right; 349 + else 350 + return iova; /* pfn falls within iova's range */ 328 351 } 329 352 330 353 return NULL; ··· 395 424 * @iovad: - iova domain in question 396 425 * @size: - size of page frames to allocate 397 426 * @limit_pfn: - max limit address 427 + * @flush_rcache: - set to flush rcache on regular allocation failure 398 428 * This function tries to satisfy an iova allocation from the rcache, 399 - * and falls back to regular allocation on failure. 429 + * and falls back to regular allocation on failure. If regular allocation 430 + * fails too and the flush_rcache flag is set then the rcache will be flushed. 400 431 */ 401 432 unsigned long 402 433 alloc_iova_fast(struct iova_domain *iovad, unsigned long size, 403 - unsigned long limit_pfn) 434 + unsigned long limit_pfn, bool flush_rcache) 404 435 { 405 - bool flushed_rcache = false; 406 436 unsigned long iova_pfn; 407 437 struct iova *new_iova; 408 438 409 - iova_pfn = iova_rcache_get(iovad, size, limit_pfn); 439 + iova_pfn = iova_rcache_get(iovad, size, limit_pfn + 1); 410 440 if (iova_pfn) 411 441 return iova_pfn; 412 442 ··· 416 444 if (!new_iova) { 417 445 unsigned int cpu; 418 446 419 - if (flushed_rcache) 447 + if (!flush_rcache) 420 448 return 0; 421 449 422 450 /* Try replenishing IOVAs by flushing rcache. */ 423 - flushed_rcache = true; 451 + flush_rcache = false; 424 452 for_each_online_cpu(cpu) 425 453 free_cpu_cached_iovas(cpu, iovad); 426 454 goto retry; ··· 542 570 unsigned long pfn, unsigned long pages, 543 571 unsigned long data) 544 572 { 545 - struct iova_fq *fq = get_cpu_ptr(iovad->fq); 573 + struct iova_fq *fq = raw_cpu_ptr(iovad->fq); 546 574 unsigned long flags; 547 575 unsigned idx; 548 576 ··· 572 600 if (atomic_cmpxchg(&iovad->fq_timer_on, 0, 1) == 0) 573 601 mod_timer(&iovad->fq_timer, 574 602 jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT)); 575 - 576 - put_cpu_ptr(iovad->fq); 577 603 } 578 604 EXPORT_SYMBOL_GPL(queue_iova); 579 605 ··· 582 612 */ 583 613 void put_iova_domain(struct iova_domain *iovad) 584 614 { 585 - struct rb_node *node; 586 - unsigned long flags; 615 + struct iova *iova, *tmp; 587 616 588 617 free_iova_flush_queue(iovad); 589 618 free_iova_rcaches(iovad); 590 - spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); 591 - node = rb_first(&iovad->rbroot); 592 - while (node) { 593 - struct iova *iova = rb_entry(node, struct iova, node); 594 - 595 - rb_erase(node, &iovad->rbroot); 619 + rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node) 596 620 free_iova_mem(iova); 597 - node = rb_first(&iovad->rbroot); 598 - } 599 - spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 600 621 } 601 622 EXPORT_SYMBOL_GPL(put_iova_domain); 602 623 ··· 656 695 struct iova *iova; 657 696 unsigned int overlap = 0; 658 697 698 + /* Don't allow nonsensical pfns */ 699 + if (WARN_ON((pfn_hi | pfn_lo) > (ULLONG_MAX >> iova_shift(iovad)))) 700 + return NULL; 701 + 659 702 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); 660 703 for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) { 661 704 if (__is_range_overlap(node, pfn_lo, pfn_hi)) { ··· 702 737 for (node = rb_first(&from->rbroot); node; node = rb_next(node)) { 703 738 struct iova *iova = rb_entry(node, struct iova, node); 704 739 struct iova *new_iova; 740 + 741 + if (iova->pfn_lo == IOVA_ANCHOR) 742 + continue; 705 743 706 744 new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi); 707 745 if (!new_iova) ··· 823 855 static unsigned long iova_magazine_pop(struct iova_magazine *mag, 824 856 unsigned long limit_pfn) 825 857 { 858 + int i; 859 + unsigned long pfn; 860 + 826 861 BUG_ON(iova_magazine_empty(mag)); 827 862 828 - if (mag->pfns[mag->size - 1] >= limit_pfn) 829 - return 0; 863 + /* Only fall back to the rbtree if we have no suitable pfns at all */ 864 + for (i = mag->size - 1; mag->pfns[i] > limit_pfn; i--) 865 + if (i == 0) 866 + return 0; 830 867 831 - return mag->pfns[--mag->size]; 868 + /* Swap it to pop it */ 869 + pfn = mag->pfns[i]; 870 + mag->pfns[i] = mag->pfns[--mag->size]; 871 + 872 + return pfn; 832 873 } 833 874 834 875 static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn) ··· 988 1011 if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE) 989 1012 return 0; 990 1013 991 - return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn); 992 - } 993 - 994 - /* 995 - * Free a cpu's rcache. 996 - */ 997 - static void free_cpu_iova_rcache(unsigned int cpu, struct iova_domain *iovad, 998 - struct iova_rcache *rcache) 999 - { 1000 - struct iova_cpu_rcache *cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); 1001 - unsigned long flags; 1002 - 1003 - spin_lock_irqsave(&cpu_rcache->lock, flags); 1004 - 1005 - iova_magazine_free_pfns(cpu_rcache->loaded, iovad); 1006 - iova_magazine_free(cpu_rcache->loaded); 1007 - 1008 - iova_magazine_free_pfns(cpu_rcache->prev, iovad); 1009 - iova_magazine_free(cpu_rcache->prev); 1010 - 1011 - spin_unlock_irqrestore(&cpu_rcache->lock, flags); 1014 + return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size); 1012 1015 } 1013 1016 1014 1017 /* ··· 997 1040 static void free_iova_rcaches(struct iova_domain *iovad) 998 1041 { 999 1042 struct iova_rcache *rcache; 1000 - unsigned long flags; 1043 + struct iova_cpu_rcache *cpu_rcache; 1001 1044 unsigned int cpu; 1002 1045 int i, j; 1003 1046 1004 1047 for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { 1005 1048 rcache = &iovad->rcaches[i]; 1006 - for_each_possible_cpu(cpu) 1007 - free_cpu_iova_rcache(cpu, iovad, rcache); 1008 - spin_lock_irqsave(&rcache->lock, flags); 1009 - free_percpu(rcache->cpu_rcaches); 1010 - for (j = 0; j < rcache->depot_size; ++j) { 1011 - iova_magazine_free_pfns(rcache->depot[j], iovad); 1012 - iova_magazine_free(rcache->depot[j]); 1049 + for_each_possible_cpu(cpu) { 1050 + cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); 1051 + iova_magazine_free(cpu_rcache->loaded); 1052 + iova_magazine_free(cpu_rcache->prev); 1013 1053 } 1014 - spin_unlock_irqrestore(&rcache->lock, flags); 1054 + free_percpu(rcache->cpu_rcaches); 1055 + for (j = 0; j < rcache->depot_size; ++j) 1056 + iova_magazine_free(rcache->depot[j]); 1015 1057 } 1016 1058 } 1017 1059
+328 -235
drivers/iommu/ipmmu-vmsa.c
··· 19 19 #include <linux/iommu.h> 20 20 #include <linux/module.h> 21 21 #include <linux/of.h> 22 + #include <linux/of_device.h> 23 + #include <linux/of_iommu.h> 22 24 #include <linux/of_platform.h> 23 25 #include <linux/platform_device.h> 24 26 #include <linux/sizes.h> 25 27 #include <linux/slab.h> 28 + #include <linux/sys_soc.h> 26 29 27 30 #if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA) 28 31 #include <asm/dma-iommu.h> 29 32 #include <asm/pgalloc.h> 33 + #else 34 + #define arm_iommu_create_mapping(...) NULL 35 + #define arm_iommu_attach_device(...) -ENODEV 36 + #define arm_iommu_release_mapping(...) do {} while (0) 37 + #define arm_iommu_detach_device(...) do {} while (0) 30 38 #endif 31 39 32 40 #include "io-pgtable.h" 33 41 34 - #define IPMMU_CTX_MAX 1 42 + #define IPMMU_CTX_MAX 8 43 + 44 + struct ipmmu_features { 45 + bool use_ns_alias_offset; 46 + bool has_cache_leaf_nodes; 47 + unsigned int number_of_contexts; 48 + bool setup_imbuscr; 49 + bool twobit_imttbcr_sl0; 50 + }; 35 51 36 52 struct ipmmu_vmsa_device { 37 53 struct device *dev; 38 54 void __iomem *base; 39 55 struct iommu_device iommu; 40 - 56 + struct ipmmu_vmsa_device *root; 57 + const struct ipmmu_features *features; 41 58 unsigned int num_utlbs; 59 + unsigned int num_ctx; 42 60 spinlock_t lock; /* Protects ctx and domains[] */ 43 61 DECLARE_BITMAP(ctx, IPMMU_CTX_MAX); 44 62 struct ipmmu_vmsa_domain *domains[IPMMU_CTX_MAX]; 45 63 64 + struct iommu_group *group; 46 65 struct dma_iommu_mapping *mapping; 47 66 }; 48 67 ··· 76 57 spinlock_t lock; /* Protects mappings */ 77 58 }; 78 59 79 - struct ipmmu_vmsa_iommu_priv { 80 - struct ipmmu_vmsa_device *mmu; 81 - struct device *dev; 82 - struct list_head list; 83 - }; 84 - 85 60 static struct ipmmu_vmsa_domain *to_vmsa_domain(struct iommu_domain *dom) 86 61 { 87 62 return container_of(dom, struct ipmmu_vmsa_domain, io_domain); 88 63 } 89 64 90 - static struct ipmmu_vmsa_iommu_priv *to_priv(struct device *dev) 65 + static struct ipmmu_vmsa_device *to_ipmmu(struct device *dev) 91 66 { 92 67 return dev->iommu_fwspec ? dev->iommu_fwspec->iommu_priv : NULL; 93 68 } ··· 146 133 #define IMTTBCR_TSZ0_MASK (7 << 0) 147 134 #define IMTTBCR_TSZ0_SHIFT O 148 135 136 + #define IMTTBCR_SL0_TWOBIT_LVL_3 (0 << 6) 137 + #define IMTTBCR_SL0_TWOBIT_LVL_2 (1 << 6) 138 + #define IMTTBCR_SL0_TWOBIT_LVL_1 (2 << 6) 139 + 149 140 #define IMBUSCR 0x000c 150 141 #define IMBUSCR_DVM (1 << 2) 151 142 #define IMBUSCR_BUSSEL_SYS (0 << 0) ··· 211 194 #define IMUASID_ASID0_SHIFT 0 212 195 213 196 /* ----------------------------------------------------------------------------- 197 + * Root device handling 198 + */ 199 + 200 + static struct platform_driver ipmmu_driver; 201 + 202 + static bool ipmmu_is_root(struct ipmmu_vmsa_device *mmu) 203 + { 204 + return mmu->root == mmu; 205 + } 206 + 207 + static int __ipmmu_check_device(struct device *dev, void *data) 208 + { 209 + struct ipmmu_vmsa_device *mmu = dev_get_drvdata(dev); 210 + struct ipmmu_vmsa_device **rootp = data; 211 + 212 + if (ipmmu_is_root(mmu)) 213 + *rootp = mmu; 214 + 215 + return 0; 216 + } 217 + 218 + static struct ipmmu_vmsa_device *ipmmu_find_root(void) 219 + { 220 + struct ipmmu_vmsa_device *root = NULL; 221 + 222 + return driver_for_each_device(&ipmmu_driver.driver, NULL, &root, 223 + __ipmmu_check_device) == 0 ? root : NULL; 224 + } 225 + 226 + /* ----------------------------------------------------------------------------- 214 227 * Read/Write Access 215 228 */ 216 229 ··· 255 208 iowrite32(data, mmu->base + offset); 256 209 } 257 210 258 - static u32 ipmmu_ctx_read(struct ipmmu_vmsa_domain *domain, unsigned int reg) 211 + static u32 ipmmu_ctx_read_root(struct ipmmu_vmsa_domain *domain, 212 + unsigned int reg) 259 213 { 260 - return ipmmu_read(domain->mmu, domain->context_id * IM_CTX_SIZE + reg); 214 + return ipmmu_read(domain->mmu->root, 215 + domain->context_id * IM_CTX_SIZE + reg); 261 216 } 262 217 263 - static void ipmmu_ctx_write(struct ipmmu_vmsa_domain *domain, unsigned int reg, 264 - u32 data) 218 + static void ipmmu_ctx_write_root(struct ipmmu_vmsa_domain *domain, 219 + unsigned int reg, u32 data) 265 220 { 266 - ipmmu_write(domain->mmu, domain->context_id * IM_CTX_SIZE + reg, data); 221 + ipmmu_write(domain->mmu->root, 222 + domain->context_id * IM_CTX_SIZE + reg, data); 223 + } 224 + 225 + static void ipmmu_ctx_write_all(struct ipmmu_vmsa_domain *domain, 226 + unsigned int reg, u32 data) 227 + { 228 + if (domain->mmu != domain->mmu->root) 229 + ipmmu_write(domain->mmu, 230 + domain->context_id * IM_CTX_SIZE + reg, data); 231 + 232 + ipmmu_write(domain->mmu->root, 233 + domain->context_id * IM_CTX_SIZE + reg, data); 267 234 } 268 235 269 236 /* ----------------------------------------------------------------------------- ··· 289 228 { 290 229 unsigned int count = 0; 291 230 292 - while (ipmmu_ctx_read(domain, IMCTR) & IMCTR_FLUSH) { 231 + while (ipmmu_ctx_read_root(domain, IMCTR) & IMCTR_FLUSH) { 293 232 cpu_relax(); 294 233 if (++count == TLB_LOOP_TIMEOUT) { 295 234 dev_err_ratelimited(domain->mmu->dev, ··· 304 243 { 305 244 u32 reg; 306 245 307 - reg = ipmmu_ctx_read(domain, IMCTR); 246 + reg = ipmmu_ctx_read_root(domain, IMCTR); 308 247 reg |= IMCTR_FLUSH; 309 - ipmmu_ctx_write(domain, IMCTR, reg); 248 + ipmmu_ctx_write_all(domain, IMCTR, reg); 310 249 311 250 ipmmu_tlb_sync(domain); 312 251 } ··· 374 313 375 314 spin_lock_irqsave(&mmu->lock, flags); 376 315 377 - ret = find_first_zero_bit(mmu->ctx, IPMMU_CTX_MAX); 378 - if (ret != IPMMU_CTX_MAX) { 316 + ret = find_first_zero_bit(mmu->ctx, mmu->num_ctx); 317 + if (ret != mmu->num_ctx) { 379 318 mmu->domains[ret] = domain; 380 319 set_bit(ret, mmu->ctx); 381 - } 320 + } else 321 + ret = -EBUSY; 382 322 383 323 spin_unlock_irqrestore(&mmu->lock, flags); 384 324 ··· 402 340 static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) 403 341 { 404 342 u64 ttbr; 343 + u32 tmp; 405 344 int ret; 406 345 407 346 /* ··· 427 364 * TODO: Add support for coherent walk through CCI with DVM and remove 428 365 * cache handling. For now, delegate it to the io-pgtable code. 429 366 */ 430 - domain->cfg.iommu_dev = domain->mmu->dev; 367 + domain->cfg.iommu_dev = domain->mmu->root->dev; 431 368 432 369 /* 433 370 * Find an unused context. 434 371 */ 435 - ret = ipmmu_domain_allocate_context(domain->mmu, domain); 436 - if (ret == IPMMU_CTX_MAX) 437 - return -EBUSY; 372 + ret = ipmmu_domain_allocate_context(domain->mmu->root, domain); 373 + if (ret < 0) 374 + return ret; 438 375 439 376 domain->context_id = ret; 440 377 441 378 domain->iop = alloc_io_pgtable_ops(ARM_32_LPAE_S1, &domain->cfg, 442 379 domain); 443 380 if (!domain->iop) { 444 - ipmmu_domain_free_context(domain->mmu, domain->context_id); 381 + ipmmu_domain_free_context(domain->mmu->root, 382 + domain->context_id); 445 383 return -EINVAL; 446 384 } 447 385 448 386 /* TTBR0 */ 449 387 ttbr = domain->cfg.arm_lpae_s1_cfg.ttbr[0]; 450 - ipmmu_ctx_write(domain, IMTTLBR0, ttbr); 451 - ipmmu_ctx_write(domain, IMTTUBR0, ttbr >> 32); 388 + ipmmu_ctx_write_root(domain, IMTTLBR0, ttbr); 389 + ipmmu_ctx_write_root(domain, IMTTUBR0, ttbr >> 32); 452 390 453 391 /* 454 392 * TTBCR 455 393 * We use long descriptors with inner-shareable WBWA tables and allocate 456 394 * the whole 32-bit VA space to TTBR0. 457 395 */ 458 - ipmmu_ctx_write(domain, IMTTBCR, IMTTBCR_EAE | 459 - IMTTBCR_SH0_INNER_SHAREABLE | IMTTBCR_ORGN0_WB_WA | 460 - IMTTBCR_IRGN0_WB_WA | IMTTBCR_SL0_LVL_1); 396 + if (domain->mmu->features->twobit_imttbcr_sl0) 397 + tmp = IMTTBCR_SL0_TWOBIT_LVL_1; 398 + else 399 + tmp = IMTTBCR_SL0_LVL_1; 400 + 401 + ipmmu_ctx_write_root(domain, IMTTBCR, IMTTBCR_EAE | 402 + IMTTBCR_SH0_INNER_SHAREABLE | IMTTBCR_ORGN0_WB_WA | 403 + IMTTBCR_IRGN0_WB_WA | tmp); 461 404 462 405 /* MAIR0 */ 463 - ipmmu_ctx_write(domain, IMMAIR0, domain->cfg.arm_lpae_s1_cfg.mair[0]); 406 + ipmmu_ctx_write_root(domain, IMMAIR0, 407 + domain->cfg.arm_lpae_s1_cfg.mair[0]); 464 408 465 409 /* IMBUSCR */ 466 - ipmmu_ctx_write(domain, IMBUSCR, 467 - ipmmu_ctx_read(domain, IMBUSCR) & 468 - ~(IMBUSCR_DVM | IMBUSCR_BUSSEL_MASK)); 410 + if (domain->mmu->features->setup_imbuscr) 411 + ipmmu_ctx_write_root(domain, IMBUSCR, 412 + ipmmu_ctx_read_root(domain, IMBUSCR) & 413 + ~(IMBUSCR_DVM | IMBUSCR_BUSSEL_MASK)); 469 414 470 415 /* 471 416 * IMSTR 472 417 * Clear all interrupt flags. 473 418 */ 474 - ipmmu_ctx_write(domain, IMSTR, ipmmu_ctx_read(domain, IMSTR)); 419 + ipmmu_ctx_write_root(domain, IMSTR, ipmmu_ctx_read_root(domain, IMSTR)); 475 420 476 421 /* 477 422 * IMCTR ··· 488 417 * software management as we have no use for it. Flush the TLB as 489 418 * required when modifying the context registers. 490 419 */ 491 - ipmmu_ctx_write(domain, IMCTR, IMCTR_INTEN | IMCTR_FLUSH | IMCTR_MMUEN); 420 + ipmmu_ctx_write_all(domain, IMCTR, 421 + IMCTR_INTEN | IMCTR_FLUSH | IMCTR_MMUEN); 492 422 493 423 return 0; 494 424 } ··· 502 430 * 503 431 * TODO: Is TLB flush really needed ? 504 432 */ 505 - ipmmu_ctx_write(domain, IMCTR, IMCTR_FLUSH); 433 + ipmmu_ctx_write_all(domain, IMCTR, IMCTR_FLUSH); 506 434 ipmmu_tlb_sync(domain); 507 - ipmmu_domain_free_context(domain->mmu, domain->context_id); 435 + ipmmu_domain_free_context(domain->mmu->root, domain->context_id); 508 436 } 509 437 510 438 /* ----------------------------------------------------------------------------- ··· 518 446 u32 status; 519 447 u32 iova; 520 448 521 - status = ipmmu_ctx_read(domain, IMSTR); 449 + status = ipmmu_ctx_read_root(domain, IMSTR); 522 450 if (!(status & err_mask)) 523 451 return IRQ_NONE; 524 452 525 - iova = ipmmu_ctx_read(domain, IMEAR); 453 + iova = ipmmu_ctx_read_root(domain, IMEAR); 526 454 527 455 /* 528 456 * Clear the error status flags. Unlike traditional interrupt flag ··· 530 458 * seems to require 0. The error address register must be read before, 531 459 * otherwise its value will be 0. 532 460 */ 533 - ipmmu_ctx_write(domain, IMSTR, 0); 461 + ipmmu_ctx_write_root(domain, IMSTR, 0); 534 462 535 463 /* Log fatal errors. */ 536 464 if (status & IMSTR_MHIT) ··· 571 499 /* 572 500 * Check interrupts for all active contexts. 573 501 */ 574 - for (i = 0; i < IPMMU_CTX_MAX; i++) { 502 + for (i = 0; i < mmu->num_ctx; i++) { 575 503 if (!mmu->domains[i]) 576 504 continue; 577 505 if (ipmmu_domain_irq(mmu->domains[i]) == IRQ_HANDLED) ··· 600 528 return &domain->io_domain; 601 529 } 602 530 531 + static struct iommu_domain *ipmmu_domain_alloc(unsigned type) 532 + { 533 + struct iommu_domain *io_domain = NULL; 534 + 535 + switch (type) { 536 + case IOMMU_DOMAIN_UNMANAGED: 537 + io_domain = __ipmmu_domain_alloc(type); 538 + break; 539 + 540 + case IOMMU_DOMAIN_DMA: 541 + io_domain = __ipmmu_domain_alloc(type); 542 + if (io_domain && iommu_get_dma_cookie(io_domain)) { 543 + kfree(io_domain); 544 + io_domain = NULL; 545 + } 546 + break; 547 + } 548 + 549 + return io_domain; 550 + } 551 + 603 552 static void ipmmu_domain_free(struct iommu_domain *io_domain) 604 553 { 605 554 struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); ··· 629 536 * Free the domain resources. We assume that all devices have already 630 537 * been detached. 631 538 */ 539 + iommu_put_dma_cookie(io_domain); 632 540 ipmmu_domain_destroy_context(domain); 633 541 free_io_pgtable_ops(domain->iop); 634 542 kfree(domain); ··· 638 544 static int ipmmu_attach_device(struct iommu_domain *io_domain, 639 545 struct device *dev) 640 546 { 641 - struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev); 642 547 struct iommu_fwspec *fwspec = dev->iommu_fwspec; 643 - struct ipmmu_vmsa_device *mmu = priv->mmu; 548 + struct ipmmu_vmsa_device *mmu = to_ipmmu(dev); 644 549 struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); 645 550 unsigned long flags; 646 551 unsigned int i; 647 552 int ret = 0; 648 553 649 - if (!priv || !priv->mmu) { 554 + if (!mmu) { 650 555 dev_err(dev, "Cannot attach to IPMMU\n"); 651 556 return -ENXIO; 652 557 } ··· 656 563 /* The domain hasn't been used yet, initialize it. */ 657 564 domain->mmu = mmu; 658 565 ret = ipmmu_domain_init_context(domain); 566 + if (ret < 0) { 567 + dev_err(dev, "Unable to initialize IPMMU context\n"); 568 + domain->mmu = NULL; 569 + } else { 570 + dev_info(dev, "Using IPMMU context %u\n", 571 + domain->context_id); 572 + } 659 573 } else if (domain->mmu != mmu) { 660 574 /* 661 575 * Something is wrong, we can't attach two devices using ··· 719 619 return domain->iop->unmap(domain->iop, iova, size); 720 620 } 721 621 622 + static void ipmmu_iotlb_sync(struct iommu_domain *io_domain) 623 + { 624 + struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); 625 + 626 + if (domain->mmu) 627 + ipmmu_tlb_flush_all(domain); 628 + } 629 + 722 630 static phys_addr_t ipmmu_iova_to_phys(struct iommu_domain *io_domain, 723 631 dma_addr_t iova) 724 632 { ··· 741 633 struct of_phandle_args *args) 742 634 { 743 635 struct platform_device *ipmmu_pdev; 744 - struct ipmmu_vmsa_iommu_priv *priv; 745 636 746 637 ipmmu_pdev = of_find_device_by_node(args->np); 747 638 if (!ipmmu_pdev) 748 639 return -ENODEV; 749 640 750 - priv = kzalloc(sizeof(*priv), GFP_KERNEL); 751 - if (!priv) 752 - return -ENOMEM; 753 - 754 - priv->mmu = platform_get_drvdata(ipmmu_pdev); 755 - priv->dev = dev; 756 - dev->iommu_fwspec->iommu_priv = priv; 641 + dev->iommu_fwspec->iommu_priv = platform_get_drvdata(ipmmu_pdev); 757 642 return 0; 758 643 } 644 + 645 + static bool ipmmu_slave_whitelist(struct device *dev) 646 + { 647 + /* By default, do not allow use of IPMMU */ 648 + return false; 649 + } 650 + 651 + static const struct soc_device_attribute soc_r8a7795[] = { 652 + { .soc_id = "r8a7795", }, 653 + { /* sentinel */ } 654 + }; 759 655 760 656 static int ipmmu_of_xlate(struct device *dev, 761 657 struct of_phandle_args *spec) 762 658 { 659 + /* For R-Car Gen3 use a white list to opt-in slave devices */ 660 + if (soc_device_match(soc_r8a7795) && !ipmmu_slave_whitelist(dev)) 661 + return -ENODEV; 662 + 763 663 iommu_fwspec_add_ids(dev, spec->args, 1); 764 664 765 665 /* Initialize once - xlate() will call multiple times */ 766 - if (to_priv(dev)) 666 + if (to_ipmmu(dev)) 767 667 return 0; 768 668 769 669 return ipmmu_init_platform_device(dev, spec); 770 670 } 771 671 772 - #if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA) 773 - 774 - static struct iommu_domain *ipmmu_domain_alloc(unsigned type) 672 + static int ipmmu_init_arm_mapping(struct device *dev) 775 673 { 776 - if (type != IOMMU_DOMAIN_UNMANAGED) 777 - return NULL; 778 - 779 - return __ipmmu_domain_alloc(type); 780 - } 781 - 782 - static int ipmmu_add_device(struct device *dev) 783 - { 784 - struct ipmmu_vmsa_device *mmu = NULL; 674 + struct ipmmu_vmsa_device *mmu = to_ipmmu(dev); 785 675 struct iommu_group *group; 786 676 int ret; 787 - 788 - /* 789 - * Only let through devices that have been verified in xlate() 790 - */ 791 - if (!to_priv(dev)) 792 - return -ENODEV; 793 677 794 678 /* Create a device group and add the device to it. */ 795 679 group = iommu_group_alloc(); 796 680 if (IS_ERR(group)) { 797 681 dev_err(dev, "Failed to allocate IOMMU group\n"); 798 - ret = PTR_ERR(group); 799 - goto error; 682 + return PTR_ERR(group); 800 683 } 801 684 802 685 ret = iommu_group_add_device(group, dev); ··· 795 696 796 697 if (ret < 0) { 797 698 dev_err(dev, "Failed to add device to IPMMU group\n"); 798 - group = NULL; 799 - goto error; 699 + return ret; 800 700 } 801 701 802 702 /* ··· 807 709 * - Make the mapping size configurable ? We currently use a 2GB mapping 808 710 * at a 1GB offset to ensure that NULL VAs will fault. 809 711 */ 810 - mmu = to_priv(dev)->mmu; 811 712 if (!mmu->mapping) { 812 713 struct dma_iommu_mapping *mapping; 813 714 ··· 831 734 return 0; 832 735 833 736 error: 834 - if (mmu) 737 + iommu_group_remove_device(dev); 738 + if (mmu->mapping) 835 739 arm_iommu_release_mapping(mmu->mapping); 836 740 837 - if (!IS_ERR_OR_NULL(group)) 838 - iommu_group_remove_device(dev); 839 - 840 741 return ret; 742 + } 743 + 744 + static int ipmmu_add_device(struct device *dev) 745 + { 746 + struct iommu_group *group; 747 + 748 + /* 749 + * Only let through devices that have been verified in xlate() 750 + */ 751 + if (!to_ipmmu(dev)) 752 + return -ENODEV; 753 + 754 + if (IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_IOMMU_DMA)) 755 + return ipmmu_init_arm_mapping(dev); 756 + 757 + group = iommu_group_get_for_dev(dev); 758 + if (IS_ERR(group)) 759 + return PTR_ERR(group); 760 + 761 + iommu_group_put(group); 762 + return 0; 841 763 } 842 764 843 765 static void ipmmu_remove_device(struct device *dev) 844 766 { 845 767 arm_iommu_detach_device(dev); 846 768 iommu_group_remove_device(dev); 769 + } 770 + 771 + static struct iommu_group *ipmmu_find_group(struct device *dev) 772 + { 773 + struct ipmmu_vmsa_device *mmu = to_ipmmu(dev); 774 + struct iommu_group *group; 775 + 776 + if (mmu->group) 777 + return iommu_group_ref_get(mmu->group); 778 + 779 + group = iommu_group_alloc(); 780 + if (!IS_ERR(group)) 781 + mmu->group = group; 782 + 783 + return group; 847 784 } 848 785 849 786 static const struct iommu_ops ipmmu_ops = { ··· 887 756 .detach_dev = ipmmu_detach_device, 888 757 .map = ipmmu_map, 889 758 .unmap = ipmmu_unmap, 759 + .flush_iotlb_all = ipmmu_iotlb_sync, 760 + .iotlb_sync = ipmmu_iotlb_sync, 890 761 .map_sg = default_iommu_map_sg, 891 762 .iova_to_phys = ipmmu_iova_to_phys, 892 763 .add_device = ipmmu_add_device, 893 764 .remove_device = ipmmu_remove_device, 765 + .device_group = ipmmu_find_group, 894 766 .pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K, 895 767 .of_xlate = ipmmu_of_xlate, 896 768 }; 897 - 898 - #endif /* !CONFIG_ARM && CONFIG_IOMMU_DMA */ 899 - 900 - #ifdef CONFIG_IOMMU_DMA 901 - 902 - static DEFINE_SPINLOCK(ipmmu_slave_devices_lock); 903 - static LIST_HEAD(ipmmu_slave_devices); 904 - 905 - static struct iommu_domain *ipmmu_domain_alloc_dma(unsigned type) 906 - { 907 - struct iommu_domain *io_domain = NULL; 908 - 909 - switch (type) { 910 - case IOMMU_DOMAIN_UNMANAGED: 911 - io_domain = __ipmmu_domain_alloc(type); 912 - break; 913 - 914 - case IOMMU_DOMAIN_DMA: 915 - io_domain = __ipmmu_domain_alloc(type); 916 - if (io_domain) 917 - iommu_get_dma_cookie(io_domain); 918 - break; 919 - } 920 - 921 - return io_domain; 922 - } 923 - 924 - static void ipmmu_domain_free_dma(struct iommu_domain *io_domain) 925 - { 926 - switch (io_domain->type) { 927 - case IOMMU_DOMAIN_DMA: 928 - iommu_put_dma_cookie(io_domain); 929 - /* fall-through */ 930 - default: 931 - ipmmu_domain_free(io_domain); 932 - break; 933 - } 934 - } 935 - 936 - static int ipmmu_add_device_dma(struct device *dev) 937 - { 938 - struct iommu_group *group; 939 - 940 - /* 941 - * Only let through devices that have been verified in xlate() 942 - */ 943 - if (!to_priv(dev)) 944 - return -ENODEV; 945 - 946 - group = iommu_group_get_for_dev(dev); 947 - if (IS_ERR(group)) 948 - return PTR_ERR(group); 949 - 950 - spin_lock(&ipmmu_slave_devices_lock); 951 - list_add(&to_priv(dev)->list, &ipmmu_slave_devices); 952 - spin_unlock(&ipmmu_slave_devices_lock); 953 - return 0; 954 - } 955 - 956 - static void ipmmu_remove_device_dma(struct device *dev) 957 - { 958 - struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev); 959 - 960 - spin_lock(&ipmmu_slave_devices_lock); 961 - list_del(&priv->list); 962 - spin_unlock(&ipmmu_slave_devices_lock); 963 - 964 - iommu_group_remove_device(dev); 965 - } 966 - 967 - static struct device *ipmmu_find_sibling_device(struct device *dev) 968 - { 969 - struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev); 970 - struct ipmmu_vmsa_iommu_priv *sibling_priv = NULL; 971 - bool found = false; 972 - 973 - spin_lock(&ipmmu_slave_devices_lock); 974 - 975 - list_for_each_entry(sibling_priv, &ipmmu_slave_devices, list) { 976 - if (priv == sibling_priv) 977 - continue; 978 - if (sibling_priv->mmu == priv->mmu) { 979 - found = true; 980 - break; 981 - } 982 - } 983 - 984 - spin_unlock(&ipmmu_slave_devices_lock); 985 - 986 - return found ? sibling_priv->dev : NULL; 987 - } 988 - 989 - static struct iommu_group *ipmmu_find_group_dma(struct device *dev) 990 - { 991 - struct iommu_group *group; 992 - struct device *sibling; 993 - 994 - sibling = ipmmu_find_sibling_device(dev); 995 - if (sibling) 996 - group = iommu_group_get(sibling); 997 - if (!sibling || IS_ERR(group)) 998 - group = generic_device_group(dev); 999 - 1000 - return group; 1001 - } 1002 - 1003 - static const struct iommu_ops ipmmu_ops = { 1004 - .domain_alloc = ipmmu_domain_alloc_dma, 1005 - .domain_free = ipmmu_domain_free_dma, 1006 - .attach_dev = ipmmu_attach_device, 1007 - .detach_dev = ipmmu_detach_device, 1008 - .map = ipmmu_map, 1009 - .unmap = ipmmu_unmap, 1010 - .map_sg = default_iommu_map_sg, 1011 - .iova_to_phys = ipmmu_iova_to_phys, 1012 - .add_device = ipmmu_add_device_dma, 1013 - .remove_device = ipmmu_remove_device_dma, 1014 - .device_group = ipmmu_find_group_dma, 1015 - .pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K, 1016 - .of_xlate = ipmmu_of_xlate, 1017 - }; 1018 - 1019 - #endif /* CONFIG_IOMMU_DMA */ 1020 769 1021 770 /* ----------------------------------------------------------------------------- 1022 771 * Probe/remove and init ··· 907 896 unsigned int i; 908 897 909 898 /* Disable all contexts. */ 910 - for (i = 0; i < 4; ++i) 899 + for (i = 0; i < mmu->num_ctx; ++i) 911 900 ipmmu_write(mmu, i * IM_CTX_SIZE + IMCTR, 0); 912 901 } 902 + 903 + static const struct ipmmu_features ipmmu_features_default = { 904 + .use_ns_alias_offset = true, 905 + .has_cache_leaf_nodes = false, 906 + .number_of_contexts = 1, /* software only tested with one context */ 907 + .setup_imbuscr = true, 908 + .twobit_imttbcr_sl0 = false, 909 + }; 910 + 911 + static const struct ipmmu_features ipmmu_features_r8a7795 = { 912 + .use_ns_alias_offset = false, 913 + .has_cache_leaf_nodes = true, 914 + .number_of_contexts = 8, 915 + .setup_imbuscr = false, 916 + .twobit_imttbcr_sl0 = true, 917 + }; 918 + 919 + static const struct of_device_id ipmmu_of_ids[] = { 920 + { 921 + .compatible = "renesas,ipmmu-vmsa", 922 + .data = &ipmmu_features_default, 923 + }, { 924 + .compatible = "renesas,ipmmu-r8a7795", 925 + .data = &ipmmu_features_r8a7795, 926 + }, { 927 + /* Terminator */ 928 + }, 929 + }; 930 + 931 + MODULE_DEVICE_TABLE(of, ipmmu_of_ids); 913 932 914 933 static int ipmmu_probe(struct platform_device *pdev) 915 934 { ··· 958 917 mmu->num_utlbs = 32; 959 918 spin_lock_init(&mmu->lock); 960 919 bitmap_zero(mmu->ctx, IPMMU_CTX_MAX); 920 + mmu->features = of_device_get_match_data(&pdev->dev); 921 + dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40)); 961 922 962 923 /* Map I/O memory and request IRQ. */ 963 924 res = platform_get_resource(pdev, IORESOURCE_MEM, 0); ··· 979 936 * Offset the registers base unconditionally to point to the non-secure 980 937 * alias space for now. 981 938 */ 982 - mmu->base += IM_NS_ALIAS_OFFSET; 939 + if (mmu->features->use_ns_alias_offset) 940 + mmu->base += IM_NS_ALIAS_OFFSET; 941 + 942 + mmu->num_ctx = min_t(unsigned int, IPMMU_CTX_MAX, 943 + mmu->features->number_of_contexts); 983 944 984 945 irq = platform_get_irq(pdev, 0); 985 - if (irq < 0) { 986 - dev_err(&pdev->dev, "no IRQ found\n"); 987 - return irq; 946 + 947 + /* 948 + * Determine if this IPMMU instance is a root device by checking for 949 + * the lack of has_cache_leaf_nodes flag or renesas,ipmmu-main property. 950 + */ 951 + if (!mmu->features->has_cache_leaf_nodes || 952 + !of_find_property(pdev->dev.of_node, "renesas,ipmmu-main", NULL)) 953 + mmu->root = mmu; 954 + else 955 + mmu->root = ipmmu_find_root(); 956 + 957 + /* 958 + * Wait until the root device has been registered for sure. 959 + */ 960 + if (!mmu->root) 961 + return -EPROBE_DEFER; 962 + 963 + /* Root devices have mandatory IRQs */ 964 + if (ipmmu_is_root(mmu)) { 965 + if (irq < 0) { 966 + dev_err(&pdev->dev, "no IRQ found\n"); 967 + return irq; 968 + } 969 + 970 + ret = devm_request_irq(&pdev->dev, irq, ipmmu_irq, 0, 971 + dev_name(&pdev->dev), mmu); 972 + if (ret < 0) { 973 + dev_err(&pdev->dev, "failed to request IRQ %d\n", irq); 974 + return ret; 975 + } 976 + 977 + ipmmu_device_reset(mmu); 988 978 } 989 979 990 - ret = devm_request_irq(&pdev->dev, irq, ipmmu_irq, 0, 991 - dev_name(&pdev->dev), mmu); 992 - if (ret < 0) { 993 - dev_err(&pdev->dev, "failed to request IRQ %d\n", irq); 994 - return ret; 980 + /* 981 + * Register the IPMMU to the IOMMU subsystem in the following cases: 982 + * - R-Car Gen2 IPMMU (all devices registered) 983 + * - R-Car Gen3 IPMMU (leaf devices only - skip root IPMMU-MM device) 984 + */ 985 + if (!mmu->features->has_cache_leaf_nodes || !ipmmu_is_root(mmu)) { 986 + ret = iommu_device_sysfs_add(&mmu->iommu, &pdev->dev, NULL, 987 + dev_name(&pdev->dev)); 988 + if (ret) 989 + return ret; 990 + 991 + iommu_device_set_ops(&mmu->iommu, &ipmmu_ops); 992 + iommu_device_set_fwnode(&mmu->iommu, 993 + &pdev->dev.of_node->fwnode); 994 + 995 + ret = iommu_device_register(&mmu->iommu); 996 + if (ret) 997 + return ret; 998 + 999 + #if defined(CONFIG_IOMMU_DMA) 1000 + if (!iommu_present(&platform_bus_type)) 1001 + bus_set_iommu(&platform_bus_type, &ipmmu_ops); 1002 + #endif 995 1003 } 996 - 997 - ipmmu_device_reset(mmu); 998 - 999 - ret = iommu_device_sysfs_add(&mmu->iommu, &pdev->dev, NULL, 1000 - dev_name(&pdev->dev)); 1001 - if (ret) 1002 - return ret; 1003 - 1004 - iommu_device_set_ops(&mmu->iommu, &ipmmu_ops); 1005 - iommu_device_set_fwnode(&mmu->iommu, &pdev->dev.of_node->fwnode); 1006 - 1007 - ret = iommu_device_register(&mmu->iommu); 1008 - if (ret) 1009 - return ret; 1010 1004 1011 1005 /* 1012 1006 * We can't create the ARM mapping here as it requires the bus to have ··· 1063 983 iommu_device_sysfs_remove(&mmu->iommu); 1064 984 iommu_device_unregister(&mmu->iommu); 1065 985 1066 - #if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA) 1067 986 arm_iommu_release_mapping(mmu->mapping); 1068 - #endif 1069 987 1070 988 ipmmu_device_reset(mmu); 1071 989 1072 990 return 0; 1073 991 } 1074 - 1075 - static const struct of_device_id ipmmu_of_ids[] = { 1076 - { .compatible = "renesas,ipmmu-vmsa", }, 1077 - { } 1078 - }; 1079 992 1080 993 static struct platform_driver ipmmu_driver = { 1081 994 .driver = { ··· 1081 1008 1082 1009 static int __init ipmmu_init(void) 1083 1010 { 1011 + static bool setup_done; 1084 1012 int ret; 1013 + 1014 + if (setup_done) 1015 + return 0; 1085 1016 1086 1017 ret = platform_driver_register(&ipmmu_driver); 1087 1018 if (ret < 0) 1088 1019 return ret; 1089 1020 1021 + #if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA) 1090 1022 if (!iommu_present(&platform_bus_type)) 1091 1023 bus_set_iommu(&platform_bus_type, &ipmmu_ops); 1024 + #endif 1092 1025 1026 + setup_done = true; 1093 1027 return 0; 1094 1028 } 1095 1029 ··· 1107 1027 1108 1028 subsys_initcall(ipmmu_init); 1109 1029 module_exit(ipmmu_exit); 1030 + 1031 + #ifdef CONFIG_IOMMU_DMA 1032 + static int __init ipmmu_vmsa_iommu_of_setup(struct device_node *np) 1033 + { 1034 + ipmmu_init(); 1035 + return 0; 1036 + } 1037 + 1038 + IOMMU_OF_DECLARE(ipmmu_vmsa_iommu_of, "renesas,ipmmu-vmsa", 1039 + ipmmu_vmsa_iommu_of_setup); 1040 + IOMMU_OF_DECLARE(ipmmu_r8a7795_iommu_of, "renesas,ipmmu-r8a7795", 1041 + ipmmu_vmsa_iommu_of_setup); 1042 + #endif 1110 1043 1111 1044 MODULE_DESCRIPTION("IOMMU API for Renesas VMSA-compatible IPMMU"); 1112 1045 MODULE_AUTHOR("Laurent Pinchart <laurent.pinchart@ideasonboard.com>");
+7
drivers/iommu/mtk_iommu.c
··· 392 392 return unmapsz; 393 393 } 394 394 395 + static void mtk_iommu_iotlb_sync(struct iommu_domain *domain) 396 + { 397 + mtk_iommu_tlb_sync(mtk_iommu_get_m4u_data()); 398 + } 399 + 395 400 static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, 396 401 dma_addr_t iova) 397 402 { ··· 496 491 .map = mtk_iommu_map, 497 492 .unmap = mtk_iommu_unmap, 498 493 .map_sg = default_iommu_map_sg, 494 + .flush_iotlb_all = mtk_iommu_iotlb_sync, 495 + .iotlb_sync = mtk_iommu_iotlb_sync, 499 496 .iova_to_phys = mtk_iommu_iova_to_phys, 500 497 .add_device = mtk_iommu_add_device, 501 498 .remove_device = mtk_iommu_remove_device,
+1 -1
drivers/iommu/mtk_iommu_v1.c
··· 708 708 .probe = mtk_iommu_probe, 709 709 .remove = mtk_iommu_remove, 710 710 .driver = { 711 - .name = "mtk-iommu", 711 + .name = "mtk-iommu-v1", 712 712 .of_match_table = mtk_iommu_of_ids, 713 713 .pm = &mtk_iommu_pm_ops, 714 714 }
+286 -97
drivers/iommu/omap-iommu.c
··· 2 2 * omap iommu: tlb and pagetable primitives 3 3 * 4 4 * Copyright (C) 2008-2010 Nokia Corporation 5 + * Copyright (C) 2013-2017 Texas Instruments Incorporated - http://www.ti.com/ 5 6 * 6 7 * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>, 7 8 * Paul Mundt and Toshihiro Kobayashi ··· 72 71 **/ 73 72 void omap_iommu_save_ctx(struct device *dev) 74 73 { 75 - struct omap_iommu *obj = dev_to_omap_iommu(dev); 76 - u32 *p = obj->ctx; 74 + struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; 75 + struct omap_iommu *obj; 76 + u32 *p; 77 77 int i; 78 78 79 - for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) { 80 - p[i] = iommu_read_reg(obj, i * sizeof(u32)); 81 - dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, p[i]); 79 + if (!arch_data) 80 + return; 81 + 82 + while (arch_data->iommu_dev) { 83 + obj = arch_data->iommu_dev; 84 + p = obj->ctx; 85 + for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) { 86 + p[i] = iommu_read_reg(obj, i * sizeof(u32)); 87 + dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, 88 + p[i]); 89 + } 90 + arch_data++; 82 91 } 83 92 } 84 93 EXPORT_SYMBOL_GPL(omap_iommu_save_ctx); ··· 99 88 **/ 100 89 void omap_iommu_restore_ctx(struct device *dev) 101 90 { 102 - struct omap_iommu *obj = dev_to_omap_iommu(dev); 103 - u32 *p = obj->ctx; 91 + struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; 92 + struct omap_iommu *obj; 93 + u32 *p; 104 94 int i; 105 95 106 - for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) { 107 - iommu_write_reg(obj, p[i], i * sizeof(u32)); 108 - dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, p[i]); 96 + if (!arch_data) 97 + return; 98 + 99 + while (arch_data->iommu_dev) { 100 + obj = arch_data->iommu_dev; 101 + p = obj->ctx; 102 + for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) { 103 + iommu_write_reg(obj, p[i], i * sizeof(u32)); 104 + dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, 105 + p[i]); 106 + } 107 + arch_data++; 109 108 } 110 109 } 111 110 EXPORT_SYMBOL_GPL(omap_iommu_restore_ctx); ··· 826 805 struct iommu_domain *domain = obj->domain; 827 806 struct omap_iommu_domain *omap_domain = to_omap_domain(domain); 828 807 829 - if (!omap_domain->iommu_dev) 808 + if (!omap_domain->dev) 830 809 return IRQ_NONE; 831 810 832 811 errs = iommu_report_fault(obj, &da); ··· 912 891 spin_unlock(&obj->iommu_lock); 913 892 914 893 dev_dbg(obj->dev, "%s: %s\n", __func__, obj->name); 894 + } 895 + 896 + static bool omap_iommu_can_register(struct platform_device *pdev) 897 + { 898 + struct device_node *np = pdev->dev.of_node; 899 + 900 + if (!of_device_is_compatible(np, "ti,dra7-dsp-iommu")) 901 + return true; 902 + 903 + /* 904 + * restrict IOMMU core registration only for processor-port MDMA MMUs 905 + * on DRA7 DSPs 906 + */ 907 + if ((!strcmp(dev_name(&pdev->dev), "40d01000.mmu")) || 908 + (!strcmp(dev_name(&pdev->dev), "41501000.mmu"))) 909 + return true; 910 + 911 + return false; 915 912 } 916 913 917 914 static int omap_iommu_dra7_get_dsp_system_cfg(struct platform_device *pdev, ··· 1023 984 return err; 1024 985 platform_set_drvdata(pdev, obj); 1025 986 1026 - obj->group = iommu_group_alloc(); 1027 - if (IS_ERR(obj->group)) 1028 - return PTR_ERR(obj->group); 987 + if (omap_iommu_can_register(pdev)) { 988 + obj->group = iommu_group_alloc(); 989 + if (IS_ERR(obj->group)) 990 + return PTR_ERR(obj->group); 1029 991 1030 - err = iommu_device_sysfs_add(&obj->iommu, obj->dev, NULL, obj->name); 1031 - if (err) 1032 - goto out_group; 992 + err = iommu_device_sysfs_add(&obj->iommu, obj->dev, NULL, 993 + obj->name); 994 + if (err) 995 + goto out_group; 1033 996 1034 - iommu_device_set_ops(&obj->iommu, &omap_iommu_ops); 997 + iommu_device_set_ops(&obj->iommu, &omap_iommu_ops); 1035 998 1036 - err = iommu_device_register(&obj->iommu); 1037 - if (err) 1038 - goto out_sysfs; 999 + err = iommu_device_register(&obj->iommu); 1000 + if (err) 1001 + goto out_sysfs; 1002 + } 1039 1003 1040 1004 pm_runtime_irq_safe(obj->dev); 1041 1005 pm_runtime_enable(obj->dev); ··· 1060 1018 { 1061 1019 struct omap_iommu *obj = platform_get_drvdata(pdev); 1062 1020 1063 - iommu_group_put(obj->group); 1064 - obj->group = NULL; 1021 + if (obj->group) { 1022 + iommu_group_put(obj->group); 1023 + obj->group = NULL; 1065 1024 1066 - iommu_device_sysfs_remove(&obj->iommu); 1067 - iommu_device_unregister(&obj->iommu); 1025 + iommu_device_sysfs_remove(&obj->iommu); 1026 + iommu_device_unregister(&obj->iommu); 1027 + } 1068 1028 1069 1029 omap_iommu_debugfs_remove(obj); 1070 1030 ··· 1112 1068 phys_addr_t pa, size_t bytes, int prot) 1113 1069 { 1114 1070 struct omap_iommu_domain *omap_domain = to_omap_domain(domain); 1115 - struct omap_iommu *oiommu = omap_domain->iommu_dev; 1116 - struct device *dev = oiommu->dev; 1071 + struct device *dev = omap_domain->dev; 1072 + struct omap_iommu_device *iommu; 1073 + struct omap_iommu *oiommu; 1117 1074 struct iotlb_entry e; 1118 1075 int omap_pgsz; 1119 - u32 ret; 1076 + u32 ret = -EINVAL; 1077 + int i; 1120 1078 1121 1079 omap_pgsz = bytes_to_iopgsz(bytes); 1122 1080 if (omap_pgsz < 0) { ··· 1130 1084 1131 1085 iotlb_init_entry(&e, da, pa, omap_pgsz); 1132 1086 1133 - ret = omap_iopgtable_store_entry(oiommu, &e); 1134 - if (ret) 1135 - dev_err(dev, "omap_iopgtable_store_entry failed: %d\n", ret); 1087 + iommu = omap_domain->iommus; 1088 + for (i = 0; i < omap_domain->num_iommus; i++, iommu++) { 1089 + oiommu = iommu->iommu_dev; 1090 + ret = omap_iopgtable_store_entry(oiommu, &e); 1091 + if (ret) { 1092 + dev_err(dev, "omap_iopgtable_store_entry failed: %d\n", 1093 + ret); 1094 + break; 1095 + } 1096 + } 1097 + 1098 + if (ret) { 1099 + while (i--) { 1100 + iommu--; 1101 + oiommu = iommu->iommu_dev; 1102 + iopgtable_clear_entry(oiommu, da); 1103 + } 1104 + } 1136 1105 1137 1106 return ret; 1138 1107 } ··· 1156 1095 size_t size) 1157 1096 { 1158 1097 struct omap_iommu_domain *omap_domain = to_omap_domain(domain); 1159 - struct omap_iommu *oiommu = omap_domain->iommu_dev; 1160 - struct device *dev = oiommu->dev; 1098 + struct device *dev = omap_domain->dev; 1099 + struct omap_iommu_device *iommu; 1100 + struct omap_iommu *oiommu; 1101 + bool error = false; 1102 + size_t bytes = 0; 1103 + int i; 1161 1104 1162 1105 dev_dbg(dev, "unmapping da 0x%lx size %u\n", da, size); 1163 1106 1164 - return iopgtable_clear_entry(oiommu, da); 1107 + iommu = omap_domain->iommus; 1108 + for (i = 0; i < omap_domain->num_iommus; i++, iommu++) { 1109 + oiommu = iommu->iommu_dev; 1110 + bytes = iopgtable_clear_entry(oiommu, da); 1111 + if (!bytes) 1112 + error = true; 1113 + } 1114 + 1115 + /* 1116 + * simplify return - we are only checking if any of the iommus 1117 + * reported an error, but not if all of them are unmapping the 1118 + * same number of entries. This should not occur due to the 1119 + * mirror programming. 1120 + */ 1121 + return error ? 0 : bytes; 1122 + } 1123 + 1124 + static int omap_iommu_count(struct device *dev) 1125 + { 1126 + struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; 1127 + int count = 0; 1128 + 1129 + while (arch_data->iommu_dev) { 1130 + count++; 1131 + arch_data++; 1132 + } 1133 + 1134 + return count; 1135 + } 1136 + 1137 + /* caller should call cleanup if this function fails */ 1138 + static int omap_iommu_attach_init(struct device *dev, 1139 + struct omap_iommu_domain *odomain) 1140 + { 1141 + struct omap_iommu_device *iommu; 1142 + int i; 1143 + 1144 + odomain->num_iommus = omap_iommu_count(dev); 1145 + if (!odomain->num_iommus) 1146 + return -EINVAL; 1147 + 1148 + odomain->iommus = kcalloc(odomain->num_iommus, sizeof(*iommu), 1149 + GFP_ATOMIC); 1150 + if (!odomain->iommus) 1151 + return -ENOMEM; 1152 + 1153 + iommu = odomain->iommus; 1154 + for (i = 0; i < odomain->num_iommus; i++, iommu++) { 1155 + iommu->pgtable = kzalloc(IOPGD_TABLE_SIZE, GFP_ATOMIC); 1156 + if (!iommu->pgtable) 1157 + return -ENOMEM; 1158 + 1159 + /* 1160 + * should never fail, but please keep this around to ensure 1161 + * we keep the hardware happy 1162 + */ 1163 + if (WARN_ON(!IS_ALIGNED((long)iommu->pgtable, 1164 + IOPGD_TABLE_SIZE))) 1165 + return -EINVAL; 1166 + } 1167 + 1168 + return 0; 1169 + } 1170 + 1171 + static void omap_iommu_detach_fini(struct omap_iommu_domain *odomain) 1172 + { 1173 + int i; 1174 + struct omap_iommu_device *iommu = odomain->iommus; 1175 + 1176 + for (i = 0; iommu && i < odomain->num_iommus; i++, iommu++) 1177 + kfree(iommu->pgtable); 1178 + 1179 + kfree(odomain->iommus); 1180 + odomain->num_iommus = 0; 1181 + odomain->iommus = NULL; 1165 1182 } 1166 1183 1167 1184 static int ··· 1247 1108 { 1248 1109 struct omap_iommu_domain *omap_domain = to_omap_domain(domain); 1249 1110 struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; 1111 + struct omap_iommu_device *iommu; 1250 1112 struct omap_iommu *oiommu; 1251 1113 int ret = 0; 1114 + int i; 1252 1115 1253 1116 if (!arch_data || !arch_data->iommu_dev) { 1254 1117 dev_err(dev, "device doesn't have an associated iommu\n"); ··· 1259 1118 1260 1119 spin_lock(&omap_domain->lock); 1261 1120 1262 - /* only a single device is supported per domain for now */ 1263 - if (omap_domain->iommu_dev) { 1121 + /* only a single client device can be attached to a domain */ 1122 + if (omap_domain->dev) { 1264 1123 dev_err(dev, "iommu domain is already attached\n"); 1265 1124 ret = -EBUSY; 1266 1125 goto out; 1267 1126 } 1268 1127 1269 - oiommu = arch_data->iommu_dev; 1270 - 1271 - /* get a handle to and enable the omap iommu */ 1272 - ret = omap_iommu_attach(oiommu, omap_domain->pgtable); 1128 + ret = omap_iommu_attach_init(dev, omap_domain); 1273 1129 if (ret) { 1274 - dev_err(dev, "can't get omap iommu: %d\n", ret); 1275 - goto out; 1130 + dev_err(dev, "failed to allocate required iommu data %d\n", 1131 + ret); 1132 + goto init_fail; 1276 1133 } 1277 1134 1278 - omap_domain->iommu_dev = oiommu; 1279 - omap_domain->dev = dev; 1280 - oiommu->domain = domain; 1135 + iommu = omap_domain->iommus; 1136 + for (i = 0; i < omap_domain->num_iommus; i++, iommu++, arch_data++) { 1137 + /* configure and enable the omap iommu */ 1138 + oiommu = arch_data->iommu_dev; 1139 + ret = omap_iommu_attach(oiommu, iommu->pgtable); 1140 + if (ret) { 1141 + dev_err(dev, "can't get omap iommu: %d\n", ret); 1142 + goto attach_fail; 1143 + } 1281 1144 1145 + oiommu->domain = domain; 1146 + iommu->iommu_dev = oiommu; 1147 + } 1148 + 1149 + omap_domain->dev = dev; 1150 + 1151 + goto out; 1152 + 1153 + attach_fail: 1154 + while (i--) { 1155 + iommu--; 1156 + arch_data--; 1157 + oiommu = iommu->iommu_dev; 1158 + omap_iommu_detach(oiommu); 1159 + iommu->iommu_dev = NULL; 1160 + oiommu->domain = NULL; 1161 + } 1162 + init_fail: 1163 + omap_iommu_detach_fini(omap_domain); 1282 1164 out: 1283 1165 spin_unlock(&omap_domain->lock); 1284 1166 return ret; ··· 1310 1146 static void _omap_iommu_detach_dev(struct omap_iommu_domain *omap_domain, 1311 1147 struct device *dev) 1312 1148 { 1313 - struct omap_iommu *oiommu = dev_to_omap_iommu(dev); 1149 + struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; 1150 + struct omap_iommu_device *iommu = omap_domain->iommus; 1151 + struct omap_iommu *oiommu; 1152 + int i; 1314 1153 1315 - /* only a single device is supported per domain for now */ 1316 - if (omap_domain->iommu_dev != oiommu) { 1317 - dev_err(dev, "invalid iommu device\n"); 1154 + if (!omap_domain->dev) { 1155 + dev_err(dev, "domain has no attached device\n"); 1318 1156 return; 1319 1157 } 1320 1158 1321 - iopgtable_clear_entry_all(oiommu); 1159 + /* only a single device is supported per domain for now */ 1160 + if (omap_domain->dev != dev) { 1161 + dev_err(dev, "invalid attached device\n"); 1162 + return; 1163 + } 1322 1164 1323 - omap_iommu_detach(oiommu); 1165 + /* 1166 + * cleanup in the reverse order of attachment - this addresses 1167 + * any h/w dependencies between multiple instances, if any 1168 + */ 1169 + iommu += (omap_domain->num_iommus - 1); 1170 + arch_data += (omap_domain->num_iommus - 1); 1171 + for (i = 0; i < omap_domain->num_iommus; i++, iommu--, arch_data--) { 1172 + oiommu = iommu->iommu_dev; 1173 + iopgtable_clear_entry_all(oiommu); 1324 1174 1325 - omap_domain->iommu_dev = NULL; 1175 + omap_iommu_detach(oiommu); 1176 + iommu->iommu_dev = NULL; 1177 + oiommu->domain = NULL; 1178 + } 1179 + 1180 + omap_iommu_detach_fini(omap_domain); 1181 + 1326 1182 omap_domain->dev = NULL; 1327 - oiommu->domain = NULL; 1328 1183 } 1329 1184 1330 1185 static void omap_iommu_detach_dev(struct iommu_domain *domain, ··· 1365 1182 1366 1183 omap_domain = kzalloc(sizeof(*omap_domain), GFP_KERNEL); 1367 1184 if (!omap_domain) 1368 - goto out; 1369 - 1370 - omap_domain->pgtable = kzalloc(IOPGD_TABLE_SIZE, GFP_KERNEL); 1371 - if (!omap_domain->pgtable) 1372 - goto fail_nomem; 1373 - 1374 - /* 1375 - * should never fail, but please keep this around to ensure 1376 - * we keep the hardware happy 1377 - */ 1378 - if (WARN_ON(!IS_ALIGNED((long)omap_domain->pgtable, IOPGD_TABLE_SIZE))) 1379 - goto fail_align; 1185 + return NULL; 1380 1186 1381 1187 spin_lock_init(&omap_domain->lock); 1382 1188 ··· 1374 1202 omap_domain->domain.geometry.force_aperture = true; 1375 1203 1376 1204 return &omap_domain->domain; 1377 - 1378 - fail_align: 1379 - kfree(omap_domain->pgtable); 1380 - fail_nomem: 1381 - kfree(omap_domain); 1382 - out: 1383 - return NULL; 1384 1205 } 1385 1206 1386 1207 static void omap_iommu_domain_free(struct iommu_domain *domain) ··· 1384 1219 * An iommu device is still attached 1385 1220 * (currently, only one device can be attached) ? 1386 1221 */ 1387 - if (omap_domain->iommu_dev) 1222 + if (omap_domain->dev) 1388 1223 _omap_iommu_detach_dev(omap_domain, omap_domain->dev); 1389 1224 1390 - kfree(omap_domain->pgtable); 1391 1225 kfree(omap_domain); 1392 1226 } 1393 1227 ··· 1394 1230 dma_addr_t da) 1395 1231 { 1396 1232 struct omap_iommu_domain *omap_domain = to_omap_domain(domain); 1397 - struct omap_iommu *oiommu = omap_domain->iommu_dev; 1233 + struct omap_iommu_device *iommu = omap_domain->iommus; 1234 + struct omap_iommu *oiommu = iommu->iommu_dev; 1398 1235 struct device *dev = oiommu->dev; 1399 1236 u32 *pgd, *pte; 1400 1237 phys_addr_t ret = 0; 1401 1238 1239 + /* 1240 + * all the iommus within the domain will have identical programming, 1241 + * so perform the lookup using just the first iommu 1242 + */ 1402 1243 iopgtable_lookup_entry(oiommu, da, &pgd, &pte); 1403 1244 1404 1245 if (pte) { ··· 1429 1260 1430 1261 static int omap_iommu_add_device(struct device *dev) 1431 1262 { 1432 - struct omap_iommu_arch_data *arch_data; 1263 + struct omap_iommu_arch_data *arch_data, *tmp; 1433 1264 struct omap_iommu *oiommu; 1434 1265 struct iommu_group *group; 1435 1266 struct device_node *np; 1436 1267 struct platform_device *pdev; 1268 + int num_iommus, i; 1437 1269 int ret; 1438 1270 1439 1271 /* ··· 1446 1276 if (!dev->of_node) 1447 1277 return 0; 1448 1278 1449 - np = of_parse_phandle(dev->of_node, "iommus", 0); 1450 - if (!np) 1279 + /* 1280 + * retrieve the count of IOMMU nodes using phandle size as element size 1281 + * since #iommu-cells = 0 for OMAP 1282 + */ 1283 + num_iommus = of_property_count_elems_of_size(dev->of_node, "iommus", 1284 + sizeof(phandle)); 1285 + if (num_iommus < 0) 1451 1286 return 0; 1452 1287 1453 - pdev = of_find_device_by_node(np); 1454 - if (WARN_ON(!pdev)) { 1455 - of_node_put(np); 1456 - return -EINVAL; 1457 - } 1458 - 1459 - oiommu = platform_get_drvdata(pdev); 1460 - if (!oiommu) { 1461 - of_node_put(np); 1462 - return -EINVAL; 1463 - } 1464 - 1465 - arch_data = kzalloc(sizeof(*arch_data), GFP_KERNEL); 1466 - if (!arch_data) { 1467 - of_node_put(np); 1288 + arch_data = kzalloc((num_iommus + 1) * sizeof(*arch_data), GFP_KERNEL); 1289 + if (!arch_data) 1468 1290 return -ENOMEM; 1291 + 1292 + for (i = 0, tmp = arch_data; i < num_iommus; i++, tmp++) { 1293 + np = of_parse_phandle(dev->of_node, "iommus", i); 1294 + if (!np) { 1295 + kfree(arch_data); 1296 + return -EINVAL; 1297 + } 1298 + 1299 + pdev = of_find_device_by_node(np); 1300 + if (WARN_ON(!pdev)) { 1301 + of_node_put(np); 1302 + kfree(arch_data); 1303 + return -EINVAL; 1304 + } 1305 + 1306 + oiommu = platform_get_drvdata(pdev); 1307 + if (!oiommu) { 1308 + of_node_put(np); 1309 + kfree(arch_data); 1310 + return -EINVAL; 1311 + } 1312 + 1313 + tmp->iommu_dev = oiommu; 1314 + 1315 + of_node_put(np); 1469 1316 } 1470 1317 1318 + /* 1319 + * use the first IOMMU alone for the sysfs device linking. 1320 + * TODO: Evaluate if a single iommu_group needs to be 1321 + * maintained for both IOMMUs 1322 + */ 1323 + oiommu = arch_data->iommu_dev; 1471 1324 ret = iommu_device_link(&oiommu->iommu, dev); 1472 1325 if (ret) { 1473 1326 kfree(arch_data); 1474 - of_node_put(np); 1475 1327 return ret; 1476 1328 } 1477 1329 1478 - arch_data->iommu_dev = oiommu; 1479 1330 dev->archdata.iommu = arch_data; 1480 1331 1481 1332 /* ··· 1511 1320 return PTR_ERR(group); 1512 1321 } 1513 1322 iommu_group_put(group); 1514 - 1515 - of_node_put(np); 1516 1323 1517 1324 return 0; 1518 1325 }
+14 -16
drivers/iommu/omap-iommu.h
··· 29 29 }; 30 30 31 31 /** 32 + * struct omap_iommu_device - omap iommu device data 33 + * @pgtable: page table used by an omap iommu attached to a domain 34 + * @iommu_dev: pointer to store an omap iommu instance attached to a domain 35 + */ 36 + struct omap_iommu_device { 37 + u32 *pgtable; 38 + struct omap_iommu *iommu_dev; 39 + }; 40 + 41 + /** 32 42 * struct omap_iommu_domain - omap iommu domain 33 - * @pgtable: the page table 34 - * @iommu_dev: an omap iommu device attached to this domain. only a single 35 - * iommu device can be attached for now. 43 + * @num_iommus: number of iommus in this domain 44 + * @iommus: omap iommu device data for all iommus in this domain 36 45 * @dev: Device using this domain. 37 46 * @lock: domain lock, should be taken when attaching/detaching 38 47 * @domain: generic domain handle used by iommu core code 39 48 */ 40 49 struct omap_iommu_domain { 41 - u32 *pgtable; 42 - struct omap_iommu *iommu_dev; 50 + u32 num_iommus; 51 + struct omap_iommu_device *iommus; 43 52 struct device *dev; 44 53 spinlock_t lock; 45 54 struct iommu_domain domain; ··· 105 96 short base; 106 97 short vict; 107 98 }; 108 - 109 - /** 110 - * dev_to_omap_iommu() - retrieves an omap iommu object from a user device 111 - * @dev: iommu client device 112 - */ 113 - static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev) 114 - { 115 - struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; 116 - 117 - return arch_data->iommu_dev; 118 - } 119 99 120 100 /* 121 101 * MMU Register offsets
+28 -5
drivers/iommu/qcom_iommu.c
··· 66 66 void __iomem *base; 67 67 bool secure_init; 68 68 u8 asid; /* asid and ctx bank # are 1:1 */ 69 + struct iommu_domain *domain; 69 70 }; 70 71 71 72 struct qcom_iommu_domain { ··· 195 194 fsynr = iommu_readl(ctx, ARM_SMMU_CB_FSYNR0); 196 195 iova = iommu_readq(ctx, ARM_SMMU_CB_FAR); 197 196 198 - dev_err_ratelimited(ctx->dev, 199 - "Unhandled context fault: fsr=0x%x, " 200 - "iova=0x%016llx, fsynr=0x%x, cb=%d\n", 201 - fsr, iova, fsynr, ctx->asid); 197 + if (!report_iommu_fault(ctx->domain, ctx->dev, iova, 0)) { 198 + dev_err_ratelimited(ctx->dev, 199 + "Unhandled context fault: fsr=0x%x, " 200 + "iova=0x%016llx, fsynr=0x%x, cb=%d\n", 201 + fsr, iova, fsynr, ctx->asid); 202 + } 202 203 203 204 iommu_writel(ctx, ARM_SMMU_CB_FSR, fsr); 205 + iommu_writel(ctx, ARM_SMMU_CB_RESUME, RESUME_TERMINATE); 204 206 205 207 return IRQ_HANDLED; 206 208 } ··· 278 274 279 275 /* SCTLR */ 280 276 reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | 281 - SCTLR_M | SCTLR_S1_ASIDPNE; 277 + SCTLR_M | SCTLR_S1_ASIDPNE | SCTLR_CFCFG; 282 278 283 279 if (IS_ENABLED(CONFIG_BIG_ENDIAN)) 284 280 reg |= SCTLR_E; 285 281 286 282 iommu_writel(ctx, ARM_SMMU_CB_SCTLR, reg); 283 + 284 + ctx->domain = domain; 287 285 } 288 286 289 287 mutex_unlock(&qcom_domain->init_mutex); ··· 401 395 402 396 /* Disable the context bank: */ 403 397 iommu_writel(ctx, ARM_SMMU_CB_SCTLR, 0); 398 + 399 + ctx->domain = NULL; 404 400 } 405 401 pm_runtime_put_sync(qcom_iommu->dev); 406 402 ··· 449 441 pm_runtime_put_sync(qcom_domain->iommu->dev); 450 442 451 443 return ret; 444 + } 445 + 446 + static void qcom_iommu_iotlb_sync(struct iommu_domain *domain) 447 + { 448 + struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); 449 + struct io_pgtable *pgtable = container_of(qcom_domain->pgtbl_ops, 450 + struct io_pgtable, ops); 451 + if (!qcom_domain->pgtbl_ops) 452 + return; 453 + 454 + pm_runtime_get_sync(qcom_domain->iommu->dev); 455 + qcom_iommu_tlb_sync(pgtable->cookie); 456 + pm_runtime_put_sync(qcom_domain->iommu->dev); 452 457 } 453 458 454 459 static phys_addr_t qcom_iommu_iova_to_phys(struct iommu_domain *domain, ··· 591 570 .map = qcom_iommu_map, 592 571 .unmap = qcom_iommu_unmap, 593 572 .map_sg = default_iommu_map_sg, 573 + .flush_iotlb_all = qcom_iommu_iotlb_sync, 574 + .iotlb_sync = qcom_iommu_iotlb_sync, 594 575 .iova_to_phys = qcom_iommu_iova_to_phys, 595 576 .add_device = qcom_iommu_add_device, 596 577 .remove_device = qcom_iommu_remove_device,
+1 -2
drivers/misc/mic/scif/scif_rma.c
··· 39 39 struct scif_endpt_rma_info *rma = &ep->rma_info; 40 40 41 41 mutex_init(&rma->rma_lock); 42 - init_iova_domain(&rma->iovad, PAGE_SIZE, SCIF_IOVA_START_PFN, 43 - SCIF_DMA_64BIT_PFN); 42 + init_iova_domain(&rma->iovad, PAGE_SIZE, SCIF_IOVA_START_PFN); 44 43 spin_lock_init(&rma->tc_lock); 45 44 mutex_init(&rma->mmn_lock); 46 45 INIT_LIST_HEAD(&rma->reg_list);
+1
include/linux/dmar.h
··· 112 112 113 113 extern int dmar_table_init(void); 114 114 extern int dmar_dev_scope_init(void); 115 + extern void dmar_register_bus_notifier(void); 115 116 extern int dmar_parse_dev_scope(void *start, void *end, int *cnt, 116 117 struct dmar_dev_scope **devices, u16 segment); 117 118 extern void *dmar_alloc_dev_scope(void *start, void *end, int *cnt);
+1
include/linux/intel-iommu.h
··· 212 212 #define DMA_FSTS_IQE (1 << 4) 213 213 #define DMA_FSTS_ICE (1 << 5) 214 214 #define DMA_FSTS_ITE (1 << 6) 215 + #define DMA_FSTS_PRO (1 << 7) 215 216 #define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff) 216 217 217 218 /* FRCD_REG, 32 bits access */
+8 -6
include/linux/iova.h
··· 70 70 struct iova_domain { 71 71 spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */ 72 72 struct rb_root rbroot; /* iova domain rbtree root */ 73 - struct rb_node *cached32_node; /* Save last alloced node */ 73 + struct rb_node *cached_node; /* Save last alloced node */ 74 + struct rb_node *cached32_node; /* Save last 32-bit alloced node */ 74 75 unsigned long granule; /* pfn granularity for this domain */ 75 76 unsigned long start_pfn; /* Lower limit for this domain */ 76 77 unsigned long dma_32bit_pfn; 78 + struct iova anchor; /* rbtree lookup anchor */ 77 79 struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE]; /* IOVA range caches */ 78 80 79 81 iova_flush_cb flush_cb; /* Call-Back function to flush IOMMU ··· 150 148 unsigned long pfn, unsigned long pages, 151 149 unsigned long data); 152 150 unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size, 153 - unsigned long limit_pfn); 151 + unsigned long limit_pfn, bool flush_rcache); 154 152 struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, 155 153 unsigned long pfn_hi); 156 154 void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); 157 155 void init_iova_domain(struct iova_domain *iovad, unsigned long granule, 158 - unsigned long start_pfn, unsigned long pfn_32bit); 156 + unsigned long start_pfn); 159 157 int init_iova_flush_queue(struct iova_domain *iovad, 160 158 iova_flush_cb flush_cb, iova_entry_dtor entry_dtor); 161 159 struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); ··· 212 210 213 211 static inline unsigned long alloc_iova_fast(struct iova_domain *iovad, 214 212 unsigned long size, 215 - unsigned long limit_pfn) 213 + unsigned long limit_pfn, 214 + bool flush_rcache) 216 215 { 217 216 return 0; 218 217 } ··· 232 229 233 230 static inline void init_iova_domain(struct iova_domain *iovad, 234 231 unsigned long granule, 235 - unsigned long start_pfn, 236 - unsigned long pfn_32bit) 232 + unsigned long start_pfn) 237 233 { 238 234 } 239 235