Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branches 'apple/dart', 'arm/smmu/updates', 'arm/smmu/bindings', 'rockchip', 's390', 'core', 'intel/vt-d' and 'amd/amd-vi' into next

+1097 -866
+2 -1
Documentation/devicetree/bindings/iommu/arm,smmu.yaml
··· 90 90 - enum: 91 91 - qcom,qcm2290-smmu-500 92 92 - qcom,qcs615-smmu-500 93 + - qcom,qcs8300-smmu-500 93 94 - qcom,sa8255p-smmu-500 94 95 - qcom,sa8775p-smmu-500 95 96 - qcom,sar2130p-smmu-500 ··· 398 397 compatible: 399 398 contains: 400 399 enum: 400 + - qcom,qcs8300-smmu-500 401 401 - qcom,sa8775p-smmu-500 402 402 - qcom,sc7280-smmu-500 403 403 - qcom,sc8280xp-smmu-500 ··· 583 581 - cavium,smmu-v2 584 582 - marvell,ap806-smmu-500 585 583 - nvidia,smmu-500 586 - - qcom,qcs8300-smmu-500 587 584 - qcom,qdu1000-smmu-500 588 585 - qcom,sa8255p-smmu-500 589 586 - qcom,sc7180-smmu-500
+1
Documentation/devicetree/bindings/iommu/qcom,iommu.yaml
··· 22 22 - enum: 23 23 - qcom,msm8916-iommu 24 24 - qcom,msm8917-iommu 25 + - qcom,msm8937-iommu 25 26 - qcom,msm8953-iommu 26 27 - const: qcom,msm-iommu-v1 27 28 - items:
+3 -1
arch/s390/include/asm/pci.h
··· 144 144 u8 util_str_avail : 1; 145 145 u8 irqs_registered : 1; 146 146 u8 tid_avail : 1; 147 - u8 reserved : 1; 147 + u8 rtr_avail : 1; /* Relaxed translation allowed */ 148 148 unsigned int devfn; /* DEVFN part of the RID*/ 149 149 150 150 u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */ ··· 217 217 struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state); 218 218 int zpci_add_device(struct zpci_dev *zdev); 219 219 int zpci_enable_device(struct zpci_dev *); 220 + int zpci_reenable_device(struct zpci_dev *zdev); 220 221 int zpci_disable_device(struct zpci_dev *); 221 222 int zpci_scan_configured_device(struct zpci_dev *zdev, u32 fh); 222 223 int zpci_deconfigure_device(struct zpci_dev *zdev); ··· 246 245 /* IOMMU Interface */ 247 246 int zpci_init_iommu(struct zpci_dev *zdev); 248 247 void zpci_destroy_iommu(struct zpci_dev *zdev); 248 + int zpci_iommu_register_ioat(struct zpci_dev *zdev, u8 *status); 249 249 250 250 #ifdef CONFIG_PCI 251 251 static inline bool zpci_use_mio(struct zpci_dev *zdev)
+3 -1
arch/s390/include/asm/pci_clp.h
··· 156 156 u16 : 4; 157 157 u16 noi : 12; /* number of interrupts */ 158 158 u8 version; 159 - u8 : 6; 159 + u8 : 2; 160 + u8 rtr : 1; /* Relaxed translation requirement */ 161 + u8 : 3; 160 162 u8 frame : 1; 161 163 u8 refresh : 1; /* TLB refresh mode */ 162 164 u16 : 3;
+2 -15
arch/s390/kvm/pci.c
··· 433 433 static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm) 434 434 { 435 435 struct zpci_dev *zdev = opaque; 436 - u8 status; 437 436 int rc; 438 437 439 438 if (!zdev) ··· 479 480 */ 480 481 zdev->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa); 481 482 482 - rc = zpci_enable_device(zdev); 483 - if (rc) 484 - goto clear_gisa; 485 - 486 - /* Re-register the IOMMU that was already created */ 487 - rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, 488 - virt_to_phys(zdev->dma_table), &status); 483 + rc = zpci_reenable_device(zdev); 489 484 if (rc) 490 485 goto clear_gisa; 491 486 ··· 509 516 { 510 517 struct zpci_dev *zdev = opaque; 511 518 struct kvm *kvm; 512 - u8 status; 513 519 514 520 if (!zdev) 515 521 return; ··· 542 550 goto out; 543 551 } 544 552 545 - if (zpci_enable_device(zdev)) 546 - goto out; 547 - 548 - /* Re-register the IOMMU that was already created */ 549 - zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, 550 - virt_to_phys(zdev->dma_table), &status); 553 + zpci_reenable_device(zdev); 551 554 552 555 out: 553 556 spin_lock(&kvm->arch.kzdev_list_lock);
+20 -15
arch/s390/pci/pci.c
··· 124 124 struct zpci_fib fib = {0}; 125 125 u8 cc; 126 126 127 - WARN_ON_ONCE(iota & 0x3fff); 128 127 fib.pba = base; 129 128 /* Work around off by one in ISM virt device */ 130 129 if (zdev->pft == PCI_FUNC_TYPE_ISM && limit > base) 131 130 fib.pal = limit + (1 << 12); 132 131 else 133 132 fib.pal = limit; 134 - fib.iota = iota | ZPCI_IOTA_RTTO_FLAG; 133 + fib.iota = iota; 135 134 fib.gd = zdev->gisa; 136 135 cc = zpci_mod_fc(req, &fib, status); 137 136 if (cc) ··· 689 690 } 690 691 EXPORT_SYMBOL_GPL(zpci_enable_device); 691 692 693 + int zpci_reenable_device(struct zpci_dev *zdev) 694 + { 695 + u8 status; 696 + int rc; 697 + 698 + rc = zpci_enable_device(zdev); 699 + if (rc) 700 + return rc; 701 + 702 + rc = zpci_iommu_register_ioat(zdev, &status); 703 + if (rc) 704 + zpci_disable_device(zdev); 705 + 706 + return rc; 707 + } 708 + EXPORT_SYMBOL_GPL(zpci_reenable_device); 709 + 692 710 int zpci_disable_device(struct zpci_dev *zdev) 693 711 { 694 712 u32 fh = zdev->fh; ··· 755 739 */ 756 740 int zpci_hot_reset_device(struct zpci_dev *zdev) 757 741 { 758 - u8 status; 759 742 int rc; 760 743 761 744 lockdep_assert_held(&zdev->state_lock); ··· 773 758 return rc; 774 759 } 775 760 776 - rc = zpci_enable_device(zdev); 777 - if (rc) 778 - return rc; 761 + rc = zpci_reenable_device(zdev); 779 762 780 - if (zdev->dma_table) 781 - rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, 782 - virt_to_phys(zdev->dma_table), &status); 783 - if (rc) { 784 - zpci_disable_device(zdev); 785 - return rc; 786 - } 787 - 788 - return 0; 763 + return rc; 789 764 } 790 765 791 766 /**
+25
arch/s390/pci/pci_bus.c
··· 19 19 #include <linux/jump_label.h> 20 20 #include <linux/pci.h> 21 21 #include <linux/printk.h> 22 + #include <linux/dma-direct.h> 22 23 23 24 #include <asm/pci_clp.h> 24 25 #include <asm/pci_dma.h> ··· 284 283 return zbus; 285 284 } 286 285 286 + static void pci_dma_range_setup(struct pci_dev *pdev) 287 + { 288 + struct zpci_dev *zdev = to_zpci(pdev); 289 + struct bus_dma_region *map; 290 + u64 aligned_end; 291 + 292 + map = kzalloc(sizeof(*map), GFP_KERNEL); 293 + if (!map) 294 + return; 295 + 296 + map->cpu_start = 0; 297 + map->dma_start = PAGE_ALIGN(zdev->start_dma); 298 + aligned_end = PAGE_ALIGN_DOWN(zdev->end_dma + 1); 299 + if (aligned_end >= map->dma_start) 300 + map->size = aligned_end - map->dma_start; 301 + else 302 + map->size = 0; 303 + WARN_ON_ONCE(map->size == 0); 304 + 305 + pdev->dev.dma_range_map = map; 306 + } 307 + 287 308 void pcibios_bus_add_device(struct pci_dev *pdev) 288 309 { 289 310 struct zpci_dev *zdev = to_zpci(pdev); 311 + 312 + pci_dma_range_setup(pdev); 290 313 291 314 /* 292 315 * With pdev->no_vf_scan the common PCI probing code does not
+1
arch/s390/pci/pci_clp.c
··· 112 112 zdev->version = response->version; 113 113 zdev->maxstbl = response->maxstbl; 114 114 zdev->dtsm = response->dtsm; 115 + zdev->rtr_avail = response->rtr; 115 116 116 117 switch (response->version) { 117 118 case 1:
+1 -10
arch/s390/pci/pci_sysfs.c
··· 52 52 53 53 static int _do_recover(struct pci_dev *pdev, struct zpci_dev *zdev) 54 54 { 55 - u8 status; 56 55 int ret; 57 56 58 57 pci_stop_and_remove_bus_device(pdev); ··· 69 70 return ret; 70 71 } 71 72 72 - ret = zpci_enable_device(zdev); 73 - if (ret) 74 - return ret; 73 + ret = zpci_reenable_device(zdev); 75 74 76 - if (zdev->dma_table) { 77 - ret = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, 78 - virt_to_phys(zdev->dma_table), &status); 79 - if (ret) 80 - zpci_disable_device(zdev); 81 - } 82 75 return ret; 83 76 } 84 77
+5
drivers/acpi/arm64/dma.c
··· 26 26 else 27 27 end = (1ULL << 32) - 1; 28 28 29 + if (dev->dma_range_map) { 30 + dev_dbg(dev, "dma_range_map already set\n"); 31 + return; 32 + } 33 + 29 34 ret = acpi_dma_get_range(dev, &map); 30 35 if (!ret && map) { 31 36 end = dma_range_map_max(map);
-7
drivers/acpi/scan.c
··· 1632 1632 err = viot_iommu_configure(dev); 1633 1633 mutex_unlock(&iommu_probe_device_lock); 1634 1634 1635 - /* 1636 - * If we have reason to believe the IOMMU driver missed the initial 1637 - * iommu_probe_device() call for dev, replay it to get things in order. 1638 - */ 1639 - if (!err && dev->bus) 1640 - err = iommu_probe_device(dev); 1641 - 1642 1635 return err; 1643 1636 } 1644 1637
+2 -1
drivers/amba/bus.c
··· 364 364 ret = acpi_dma_configure(dev, attr); 365 365 } 366 366 367 - if (!ret && !drv->driver_managed_dma) { 367 + /* @drv may not be valid when we're called from the IOMMU layer */ 368 + if (!ret && dev->driver && !drv->driver_managed_dma) { 368 369 ret = iommu_device_use_default_domain(dev); 369 370 if (ret) 370 371 arch_teardown_dma_ops(dev);
+2 -1
drivers/base/platform.c
··· 1451 1451 attr = acpi_get_dma_attr(to_acpi_device_node(fwnode)); 1452 1452 ret = acpi_dma_configure(dev, attr); 1453 1453 } 1454 - if (ret || drv->driver_managed_dma) 1454 + /* @drv may not be valid when we're called from the IOMMU layer */ 1455 + if (ret || !dev->driver || drv->driver_managed_dma) 1455 1456 return ret; 1456 1457 1457 1458 ret = iommu_device_use_default_domain(dev);
+2 -1
drivers/bus/fsl-mc/fsl-mc-bus.c
··· 153 153 else 154 154 ret = acpi_dma_configure_id(dev, DEV_DMA_COHERENT, &input_id); 155 155 156 - if (!ret && !mc_drv->driver_managed_dma) { 156 + /* @mc_drv may not be valid when we're called from the IOMMU layer */ 157 + if (!ret && dev->driver && !mc_drv->driver_managed_dma) { 157 158 ret = iommu_device_use_default_domain(dev); 158 159 if (ret) 159 160 arch_teardown_dma_ops(dev);
+2 -1
drivers/cdx/cdx.c
··· 360 360 return ret; 361 361 } 362 362 363 - if (!ret && !cdx_drv->driver_managed_dma) { 363 + /* @cdx_drv may not be valid when we're called from the IOMMU layer */ 364 + if (!ret && dev->driver && !cdx_drv->driver_managed_dma) { 364 365 ret = iommu_device_use_default_domain(dev); 365 366 if (ret) 366 367 arch_teardown_dma_ops(dev);
+1 -3
drivers/iommu/Kconfig
··· 154 154 select DMA_OPS_HELPERS 155 155 select IOMMU_API 156 156 select IOMMU_IOVA 157 - select IRQ_MSI_IOMMU 158 157 select NEED_SG_DMA_LENGTH 159 158 select NEED_SG_DMA_FLAGS if SWIOTLB 160 159 ··· 482 483 483 484 config MTK_IOMMU_V1 484 485 tristate "MediaTek IOMMU Version 1 (M4U gen1) Support" 485 - depends on ARM 486 - depends on ARCH_MEDIATEK || COMPILE_TEST 486 + depends on (ARCH_MEDIATEK && ARM) || COMPILE_TEST 487 487 select ARM_DMA_USE_IOMMU 488 488 select IOMMU_API 489 489 select MEMORY
+3 -5
drivers/iommu/amd/amd_iommu.h
··· 47 47 /* Protection domain ops */ 48 48 void amd_iommu_init_identity_domain(void); 49 49 struct protection_domain *protection_domain_alloc(void); 50 - void protection_domain_free(struct protection_domain *domain); 51 50 struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev, 52 51 struct mm_struct *mm); 53 52 void amd_iommu_domain_free(struct iommu_domain *dom); ··· 175 176 #else 176 177 static inline void amd_iommu_apply_ivrs_quirks(void) { } 177 178 #endif 179 + struct dev_table_entry *amd_iommu_get_ivhd_dte_flags(u16 segid, u16 devid); 178 180 179 181 void amd_iommu_domain_set_pgtable(struct protection_domain *domain, 180 182 u64 *root, int mode); 181 183 struct dev_table_entry *get_dev_table(struct amd_iommu *iommu); 182 - 183 - #endif 184 - 185 - struct dev_table_entry *amd_iommu_get_ivhd_dte_flags(u16 segid, u16 devid); 186 184 struct iommu_dev_data *search_dev_data(struct amd_iommu *iommu, u16 devid); 185 + 186 + #endif /* AMD_IOMMU_H */
+15 -15
drivers/iommu/amd/amd_iommu_types.h
··· 112 112 #define FEATURE_SNPAVICSUP_GAM(x) \ 113 113 (FIELD_GET(FEATURE_SNPAVICSUP, x) == 0x1) 114 114 115 + #define FEATURE_NUM_INT_REMAP_SUP GENMASK_ULL(9, 8) 116 + #define FEATURE_NUM_INT_REMAP_SUP_2K(x) \ 117 + (FIELD_GET(FEATURE_NUM_INT_REMAP_SUP, x) == 0x1) 118 + 115 119 /* Note: 116 120 * The current driver only support 16-bit PASID. 117 121 * Currently, hardware only implement upto 16-bit PASID ··· 179 175 #define CONTROL_GAM_EN 25 180 176 #define CONTROL_GALOG_EN 28 181 177 #define CONTROL_GAINT_EN 29 178 + #define CONTROL_NUM_INT_REMAP_MODE 43 179 + #define CONTROL_NUM_INT_REMAP_MODE_MASK 0x03 180 + #define CONTROL_NUM_INT_REMAP_MODE_2K 0x01 182 181 #define CONTROL_EPH_EN 45 183 182 #define CONTROL_XT_EN 50 184 183 #define CONTROL_INTCAPXT_EN 51 185 184 #define CONTROL_IRTCACHEDIS 59 186 185 #define CONTROL_SNPAVIC_EN 61 187 186 188 - #define CTRL_INV_TO_MASK (7 << CONTROL_INV_TIMEOUT) 187 + #define CTRL_INV_TO_MASK 7 189 188 #define CTRL_INV_TO_NONE 0 190 189 #define CTRL_INV_TO_1MS 1 191 190 #define CTRL_INV_TO_10MS 2 ··· 316 309 #define DTE_IRQ_REMAP_INTCTL (2ULL << 60) 317 310 #define DTE_IRQ_REMAP_ENABLE 1ULL 318 311 319 - /* 320 - * AMD IOMMU hardware only support 512 IRTEs despite 321 - * the architectural limitation of 2048 entries. 322 - */ 323 - #define DTE_INTTAB_ALIGNMENT 128 324 - #define DTE_INTTABLEN_VALUE 9ULL 325 - #define DTE_INTTABLEN (DTE_INTTABLEN_VALUE << 1) 326 312 #define DTE_INTTABLEN_MASK (0xfULL << 1) 327 - #define MAX_IRQS_PER_TABLE (1 << DTE_INTTABLEN_VALUE) 313 + #define DTE_INTTABLEN_VALUE_512 9ULL 314 + #define DTE_INTTABLEN_512 (DTE_INTTABLEN_VALUE_512 << 1) 315 + #define MAX_IRQS_PER_TABLE_512 BIT(DTE_INTTABLEN_VALUE_512) 316 + #define DTE_INTTABLEN_VALUE_2K 11ULL 317 + #define DTE_INTTABLEN_2K (DTE_INTTABLEN_VALUE_2K << 1) 318 + #define MAX_IRQS_PER_TABLE_2K BIT(DTE_INTTABLEN_VALUE_2K) 328 319 329 320 #define PAGE_MODE_NONE 0x00 330 321 #define PAGE_MODE_1_LEVEL 0x01 ··· 496 491 497 492 /* IVRS indicates that pre-boot remapping was enabled */ 498 493 extern bool amdr_ivrs_remap_support; 499 - 500 - /* kmem_cache to get tables with 128 byte alignement */ 501 - extern struct kmem_cache *amd_iommu_irq_cache; 502 494 503 495 #define PCI_SBDF_TO_SEGID(sbdf) (((sbdf) >> 16) & 0xffff) 504 496 #define PCI_SBDF_TO_DEVID(sbdf) ((sbdf) & 0xffff) ··· 853 851 struct device *dev; 854 852 u16 devid; /* PCI Device ID */ 855 853 854 + unsigned int max_irqs; /* Maximum IRQs supported by device */ 856 855 u32 max_pasids; /* Max supported PASIDs */ 857 856 u32 flags; /* Holds AMD_IOMMU_DEVICE_FLAG_<*> */ 858 857 int ats_qdep; ··· 930 927 /* 931 928 * Data structures for device handling 932 929 */ 933 - 934 - /* size of the dma_ops aperture as power of 2 */ 935 - extern unsigned amd_iommu_aperture_order; 936 930 937 931 extern bool amd_iommu_force_isolation; 938 932
+28 -41
drivers/iommu/amd/init.c
··· 12 12 #include <linux/acpi.h> 13 13 #include <linux/list.h> 14 14 #include <linux/bitmap.h> 15 - #include <linux/slab.h> 16 15 #include <linux/syscore_ops.h> 17 16 #include <linux/interrupt.h> 18 17 #include <linux/msi.h> ··· 218 219 static enum iommu_init_state init_state = IOMMU_START_STATE; 219 220 220 221 static int amd_iommu_enable_interrupts(void); 221 - static int __init iommu_go_to_state(enum iommu_init_state state); 222 222 static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg); 223 223 224 224 static bool amd_iommu_pre_enabled = true; ··· 410 412 &entry, sizeof(entry)); 411 413 } 412 414 413 - /* Generic functions to enable/disable certain features of the IOMMU. */ 414 - void iommu_feature_enable(struct amd_iommu *iommu, u8 bit) 415 + static void iommu_feature_set(struct amd_iommu *iommu, u64 val, u64 mask, u8 shift) 415 416 { 416 417 u64 ctrl; 417 418 418 419 ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); 419 - ctrl |= (1ULL << bit); 420 + mask <<= shift; 421 + ctrl &= ~mask; 422 + ctrl |= (val << shift) & mask; 420 423 writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); 424 + } 425 + 426 + /* Generic functions to enable/disable certain features of the IOMMU. */ 427 + void iommu_feature_enable(struct amd_iommu *iommu, u8 bit) 428 + { 429 + iommu_feature_set(iommu, 1ULL, 1ULL, bit); 421 430 } 422 431 423 432 static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit) 424 433 { 425 - u64 ctrl; 426 - 427 - ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); 428 - ctrl &= ~(1ULL << bit); 429 - writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); 430 - } 431 - 432 - static void iommu_set_inv_tlb_timeout(struct amd_iommu *iommu, int timeout) 433 - { 434 - u64 ctrl; 435 - 436 - ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); 437 - ctrl &= ~CTRL_INV_TO_MASK; 438 - ctrl |= (timeout << CONTROL_INV_TIMEOUT) & CTRL_INV_TO_MASK; 439 - writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); 434 + iommu_feature_set(iommu, 0ULL, 1ULL, bit); 440 435 } 441 436 442 437 /* Function to enable the hardware */ ··· 1060 1069 int_tab_len = old_devtb[devid].data[2] & DTE_INTTABLEN_MASK; 1061 1070 if (irq_v && (int_ctl || int_tab_len)) { 1062 1071 if ((int_ctl != DTE_IRQ_REMAP_INTCTL) || 1063 - (int_tab_len != DTE_INTTABLEN)) { 1072 + (int_tab_len != DTE_INTTABLEN_512 && 1073 + int_tab_len != DTE_INTTABLEN_2K)) { 1064 1074 pr_err("Wrong old irq remapping flag: %#x\n", devid); 1065 1075 memunmap(old_devtb); 1066 1076 return false; ··· 2644 2652 iommu_feature_enable(iommu, CONTROL_COHERENT_EN); 2645 2653 2646 2654 /* Set IOTLB invalidation timeout to 1s */ 2647 - iommu_set_inv_tlb_timeout(iommu, CTRL_INV_TO_1S); 2655 + iommu_feature_set(iommu, CTRL_INV_TO_1S, CTRL_INV_TO_MASK, CONTROL_INV_TIMEOUT); 2648 2656 2649 2657 /* Enable Enhanced Peripheral Page Request Handling */ 2650 2658 if (check_feature(FEATURE_EPHSUP)) ··· 2737 2745 iommu->irtcachedis_enabled ? "disabled" : "enabled"); 2738 2746 } 2739 2747 2748 + static void iommu_enable_2k_int(struct amd_iommu *iommu) 2749 + { 2750 + if (!FEATURE_NUM_INT_REMAP_SUP_2K(amd_iommu_efr2)) 2751 + return; 2752 + 2753 + iommu_feature_set(iommu, 2754 + CONTROL_NUM_INT_REMAP_MODE_2K, 2755 + CONTROL_NUM_INT_REMAP_MODE_MASK, 2756 + CONTROL_NUM_INT_REMAP_MODE); 2757 + } 2758 + 2740 2759 static void early_enable_iommu(struct amd_iommu *iommu) 2741 2760 { 2742 2761 iommu_disable(iommu); ··· 2760 2757 iommu_enable_ga(iommu); 2761 2758 iommu_enable_xt(iommu); 2762 2759 iommu_enable_irtcachedis(iommu); 2760 + iommu_enable_2k_int(iommu); 2763 2761 iommu_enable(iommu); 2764 2762 amd_iommu_flush_all_caches(iommu); 2765 2763 } ··· 2817 2813 iommu_enable_ga(iommu); 2818 2814 iommu_enable_xt(iommu); 2819 2815 iommu_enable_irtcachedis(iommu); 2816 + iommu_enable_2k_int(iommu); 2820 2817 iommu_set_device_table(iommu); 2821 2818 amd_iommu_flush_all_caches(iommu); 2822 2819 } ··· 2944 2939 2945 2940 static void __init free_iommu_resources(void) 2946 2941 { 2947 - kmem_cache_destroy(amd_iommu_irq_cache); 2948 - amd_iommu_irq_cache = NULL; 2949 - 2950 2942 free_iommu_all(); 2951 2943 free_pci_segments(); 2952 2944 } ··· 3042 3040 static int __init early_amd_iommu_init(void) 3043 3041 { 3044 3042 struct acpi_table_header *ivrs_base; 3045 - int remap_cache_sz, ret; 3043 + int ret; 3046 3044 acpi_status status; 3047 3045 3048 3046 if (!amd_iommu_detected) ··· 3104 3102 3105 3103 if (amd_iommu_irq_remap) { 3106 3104 struct amd_iommu_pci_seg *pci_seg; 3107 - /* 3108 - * Interrupt remapping enabled, create kmem_cache for the 3109 - * remapping tables. 3110 - */ 3111 3105 ret = -ENOMEM; 3112 - if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir)) 3113 - remap_cache_sz = MAX_IRQS_PER_TABLE * sizeof(u32); 3114 - else 3115 - remap_cache_sz = MAX_IRQS_PER_TABLE * (sizeof(u64) * 2); 3116 - amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache", 3117 - remap_cache_sz, 3118 - DTE_INTTAB_ALIGNMENT, 3119 - 0, NULL); 3120 - if (!amd_iommu_irq_cache) 3121 - goto out; 3122 - 3123 3106 for_each_pci_segment(pci_seg) { 3124 3107 if (alloc_irq_lookup_table(pci_seg)) 3125 3108 goto out;
-7
drivers/iommu/amd/io_pgtable.c
··· 47 47 return fpte; 48 48 } 49 49 50 - /**************************************************************************** 51 - * 52 - * The functions below are used the create the page table mappings for 53 - * unity mapped regions. 54 - * 55 - ****************************************************************************/ 56 - 57 50 static void free_pt_page(u64 *pt, struct list_head *freelist) 58 51 { 59 52 struct page *p = virt_to_page(pt);
+1 -1
drivers/iommu/amd/io_pgtable_v2.c
··· 254 254 pte = v2_alloc_pte(cfg->amd.nid, pgtable->pgd, 255 255 iova, map_size, gfp, &updated); 256 256 if (!pte) { 257 - ret = -EINVAL; 257 + ret = -ENOMEM; 258 258 goto out; 259 259 } 260 260
+60 -31
drivers/iommu/amd/iommu.c
··· 75 75 */ 76 76 DEFINE_IDA(pdom_ids); 77 77 78 - struct kmem_cache *amd_iommu_irq_cache; 79 - 80 78 static int amd_iommu_attach_device(struct iommu_domain *dom, 81 79 struct device *dev); 82 80 ··· 866 868 int type, devid, flags, tag; 867 869 volatile u32 *event = __evt; 868 870 int count = 0; 869 - u64 address; 871 + u64 address, ctrl; 870 872 u32 pasid; 871 873 872 874 retry: ··· 876 878 (event[1] & EVENT_DOMID_MASK_LO); 877 879 flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; 878 880 address = (u64)(((u64)event[3]) << 32) | event[2]; 881 + ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); 879 882 880 883 if (type == 0) { 881 884 /* Did we hit the erratum? */ ··· 898 899 dev_err(dev, "Event logged [ILLEGAL_DEV_TABLE_ENTRY device=%04x:%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n", 899 900 iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), 900 901 pasid, address, flags); 902 + dev_err(dev, "Control Reg : 0x%llx\n", ctrl); 901 903 dump_dte_entry(iommu, devid); 902 904 break; 903 905 case EVENT_TYPE_DEV_TAB_ERR: ··· 2394 2394 } 2395 2395 2396 2396 out_err: 2397 + 2397 2398 iommu_completion_wait(iommu); 2399 + 2400 + if (FEATURE_NUM_INT_REMAP_SUP_2K(amd_iommu_efr2)) 2401 + dev_data->max_irqs = MAX_IRQS_PER_TABLE_2K; 2402 + else 2403 + dev_data->max_irqs = MAX_IRQS_PER_TABLE_512; 2398 2404 2399 2405 if (dev_is_pci(dev)) 2400 2406 pci_prepare_ats(to_pci_dev(dev), PAGE_SHIFT); ··· 2437 2431 * which is not possible with the dma_ops interface. 2438 2432 * 2439 2433 *****************************************************************************/ 2440 - 2441 - void protection_domain_free(struct protection_domain *domain) 2442 - { 2443 - WARN_ON(!list_empty(&domain->dev_list)); 2444 - if (domain->domain.type & __IOMMU_DOMAIN_PAGING) 2445 - free_io_pgtable_ops(&domain->iop.pgtbl.ops); 2446 - pdom_id_free(domain->id); 2447 - kfree(domain); 2448 - } 2449 2434 2450 2435 static void protection_domain_init(struct protection_domain *domain) 2451 2436 { ··· 2575 2578 { 2576 2579 struct protection_domain *domain = to_pdomain(dom); 2577 2580 2578 - protection_domain_free(domain); 2581 + WARN_ON(!list_empty(&domain->dev_list)); 2582 + if (domain->domain.type & __IOMMU_DOMAIN_PAGING) 2583 + free_io_pgtable_ops(&domain->iop.pgtbl.ops); 2584 + pdom_id_free(domain->id); 2585 + kfree(domain); 2579 2586 } 2580 2587 2581 2588 static int blocked_domain_attach_device(struct iommu_domain *domain, ··· 3082 3081 raw_spin_unlock_irqrestore(&iommu->lock, flags); 3083 3082 } 3084 3083 3084 + static inline u8 iommu_get_int_tablen(struct iommu_dev_data *dev_data) 3085 + { 3086 + if (dev_data && dev_data->max_irqs == MAX_IRQS_PER_TABLE_2K) 3087 + return DTE_INTTABLEN_2K; 3088 + return DTE_INTTABLEN_512; 3089 + } 3090 + 3085 3091 static void set_dte_irq_entry(struct amd_iommu *iommu, u16 devid, 3086 3092 struct irq_remap_table *table) 3087 3093 { ··· 3103 3095 new &= ~DTE_IRQ_PHYS_ADDR_MASK; 3104 3096 new |= iommu_virt_to_phys(table->table); 3105 3097 new |= DTE_IRQ_REMAP_INTCTL; 3106 - new |= DTE_INTTABLEN; 3098 + new |= iommu_get_int_tablen(dev_data); 3107 3099 new |= DTE_IRQ_REMAP_ENABLE; 3108 3100 WRITE_ONCE(dte->data[2], new); 3109 3101 ··· 3129 3121 return table; 3130 3122 } 3131 3123 3132 - static struct irq_remap_table *__alloc_irq_table(void) 3124 + static struct irq_remap_table *__alloc_irq_table(int nid, int order) 3133 3125 { 3134 3126 struct irq_remap_table *table; 3135 3127 ··· 3137 3129 if (!table) 3138 3130 return NULL; 3139 3131 3140 - table->table = kmem_cache_alloc(amd_iommu_irq_cache, GFP_KERNEL); 3132 + table->table = iommu_alloc_pages_node(nid, GFP_KERNEL, order); 3141 3133 if (!table->table) { 3142 3134 kfree(table); 3143 3135 return NULL; 3144 3136 } 3145 3137 raw_spin_lock_init(&table->lock); 3146 3138 3147 - if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir)) 3148 - memset(table->table, 0, 3149 - MAX_IRQS_PER_TABLE * sizeof(u32)); 3150 - else 3151 - memset(table->table, 0, 3152 - (MAX_IRQS_PER_TABLE * (sizeof(u64) * 2))); 3153 3139 return table; 3154 3140 } 3155 3141 ··· 3175 3173 return 0; 3176 3174 } 3177 3175 3176 + static inline size_t get_irq_table_size(unsigned int max_irqs) 3177 + { 3178 + if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir)) 3179 + return max_irqs * sizeof(u32); 3180 + 3181 + return max_irqs * (sizeof(u64) * 2); 3182 + } 3183 + 3178 3184 static struct irq_remap_table *alloc_irq_table(struct amd_iommu *iommu, 3179 - u16 devid, struct pci_dev *pdev) 3185 + u16 devid, struct pci_dev *pdev, 3186 + unsigned int max_irqs) 3180 3187 { 3181 3188 struct irq_remap_table *table = NULL; 3182 3189 struct irq_remap_table *new_table = NULL; 3183 3190 struct amd_iommu_pci_seg *pci_seg; 3184 3191 unsigned long flags; 3192 + int order = get_order(get_irq_table_size(max_irqs)); 3193 + int nid = iommu && iommu->dev ? dev_to_node(&iommu->dev->dev) : NUMA_NO_NODE; 3185 3194 u16 alias; 3186 3195 3187 3196 spin_lock_irqsave(&iommu_table_lock, flags); ··· 3211 3198 spin_unlock_irqrestore(&iommu_table_lock, flags); 3212 3199 3213 3200 /* Nothing there yet, allocate new irq remapping table */ 3214 - new_table = __alloc_irq_table(); 3201 + new_table = __alloc_irq_table(nid, order); 3215 3202 if (!new_table) 3216 3203 return NULL; 3217 3204 ··· 3246 3233 spin_unlock_irqrestore(&iommu_table_lock, flags); 3247 3234 3248 3235 if (new_table) { 3249 - kmem_cache_free(amd_iommu_irq_cache, new_table->table); 3236 + iommu_free_pages(new_table->table, order); 3250 3237 kfree(new_table); 3251 3238 } 3252 3239 return table; 3253 3240 } 3254 3241 3255 3242 static int alloc_irq_index(struct amd_iommu *iommu, u16 devid, int count, 3256 - bool align, struct pci_dev *pdev) 3243 + bool align, struct pci_dev *pdev, 3244 + unsigned long max_irqs) 3257 3245 { 3258 3246 struct irq_remap_table *table; 3259 3247 int index, c, alignment = 1; 3260 3248 unsigned long flags; 3261 3249 3262 - table = alloc_irq_table(iommu, devid, pdev); 3250 + table = alloc_irq_table(iommu, devid, pdev, max_irqs); 3263 3251 if (!table) 3264 3252 return -ENODEV; 3265 3253 ··· 3271 3257 3272 3258 /* Scan table for free entries */ 3273 3259 for (index = ALIGN(table->min_index, alignment), c = 0; 3274 - index < MAX_IRQS_PER_TABLE;) { 3260 + index < max_irqs;) { 3275 3261 if (!iommu->irte_ops->is_allocated(table, index)) { 3276 3262 c += 1; 3277 3263 } else { ··· 3541 3527 msg->data = index; 3542 3528 msg->address_lo = 0; 3543 3529 msg->arch_addr_lo.base_address = X86_MSI_BASE_ADDRESS_LOW; 3530 + /* 3531 + * The struct msi_msg.dest_mode_logical is used to set the DM bit 3532 + * in MSI Message Address Register. For device w/ 2K int-remap support, 3533 + * this is bit must be set to 1 regardless of the actual destination 3534 + * mode, which is signified by the IRTE[DM]. 3535 + */ 3536 + if (FEATURE_NUM_INT_REMAP_SUP_2K(amd_iommu_efr2)) 3537 + msg->arch_addr_lo.dest_mode_logical = true; 3544 3538 msg->address_hi = X86_MSI_BASE_ADDRESS_HIGH; 3545 3539 } 3546 3540 ··· 3611 3589 struct amd_ir_data *data = NULL; 3612 3590 struct amd_iommu *iommu; 3613 3591 struct irq_cfg *cfg; 3592 + struct iommu_dev_data *dev_data; 3593 + unsigned long max_irqs; 3614 3594 int i, ret, devid, seg, sbdf; 3615 3595 int index; 3616 3596 ··· 3631 3607 if (!iommu) 3632 3608 return -EINVAL; 3633 3609 3610 + dev_data = search_dev_data(iommu, devid); 3611 + max_irqs = dev_data ? dev_data->max_irqs : MAX_IRQS_PER_TABLE_512; 3612 + 3634 3613 ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); 3635 3614 if (ret < 0) 3636 3615 return ret; ··· 3641 3614 if (info->type == X86_IRQ_ALLOC_TYPE_IOAPIC) { 3642 3615 struct irq_remap_table *table; 3643 3616 3644 - table = alloc_irq_table(iommu, devid, NULL); 3617 + table = alloc_irq_table(iommu, devid, NULL, max_irqs); 3645 3618 if (table) { 3646 3619 if (!table->min_index) { 3647 3620 /* ··· 3662 3635 bool align = (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI); 3663 3636 3664 3637 index = alloc_irq_index(iommu, devid, nr_irqs, align, 3665 - msi_desc_to_pci_dev(info->desc)); 3638 + msi_desc_to_pci_dev(info->desc), 3639 + max_irqs); 3666 3640 } else { 3667 - index = alloc_irq_index(iommu, devid, nr_irqs, false, NULL); 3641 + index = alloc_irq_index(iommu, devid, nr_irqs, false, NULL, 3642 + max_irqs); 3668 3643 } 3669 3644 3670 3645 if (index < 0) {
+1 -1
drivers/iommu/amd/pasid.c
··· 195 195 196 196 ret = mmu_notifier_register(&pdom->mn, mm); 197 197 if (ret) { 198 - protection_domain_free(pdom); 198 + amd_iommu_domain_free(&pdom->domain); 199 199 return ERR_PTR(ret); 200 200 } 201 201
+14 -8
drivers/iommu/apple-dart.c
··· 36 36 37 37 #define DART_MAX_STREAMS 256 38 38 #define DART_MAX_TTBR 4 39 - #define MAX_DARTS_PER_DEVICE 2 39 + #define MAX_DARTS_PER_DEVICE 3 40 40 41 41 /* Common registers */ 42 42 ··· 277 277 * @streams: streams for this device 278 278 */ 279 279 struct apple_dart_master_cfg { 280 + /* Intersection of DART capabilitles */ 281 + u32 supports_bypass : 1; 282 + 280 283 struct apple_dart_stream_map stream_maps[MAX_DARTS_PER_DEVICE]; 281 284 }; 282 285 ··· 687 684 struct apple_dart_stream_map *stream_map; 688 685 int i; 689 686 690 - if (!cfg->stream_maps[0].dart->supports_bypass) 687 + if (!cfg->supports_bypass) 691 688 return -EINVAL; 692 689 693 690 for_each_stream_map(i, cfg, stream_map) ··· 795 792 return -EINVAL; 796 793 sid = args->args[0]; 797 794 798 - if (!cfg) 795 + if (!cfg) { 799 796 cfg = kzalloc(sizeof(*cfg), GFP_KERNEL); 800 - if (!cfg) 801 - return -ENOMEM; 797 + if (!cfg) 798 + return -ENOMEM; 799 + /* Will be ANDed with DART capabilities */ 800 + cfg->supports_bypass = true; 801 + } 802 802 dev_iommu_priv_set(dev, cfg); 803 803 804 804 cfg_dart = cfg->stream_maps[0].dart; 805 805 if (cfg_dart) { 806 - if (cfg_dart->supports_bypass != dart->supports_bypass) 807 - return -EINVAL; 808 806 if (cfg_dart->pgsize != dart->pgsize) 809 807 return -EINVAL; 810 808 } 809 + 810 + cfg->supports_bypass &= dart->supports_bypass; 811 811 812 812 for (i = 0; i < MAX_DARTS_PER_DEVICE; ++i) { 813 813 if (cfg->stream_maps[i].dart == dart) { ··· 951 945 952 946 if (cfg->stream_maps[0].dart->pgsize > PAGE_SIZE) 953 947 return IOMMU_DOMAIN_IDENTITY; 954 - if (!cfg->stream_maps[0].dart->supports_bypass) 948 + if (!cfg->supports_bypass) 955 949 return IOMMU_DOMAIN_DMA; 956 950 957 951 return 0;
+6 -5
drivers/iommu/arm/arm-smmu/arm-smmu.c
··· 79 79 80 80 static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu) 81 81 { 82 - if (pm_runtime_enabled(smmu->dev)) 83 - pm_runtime_put_autosuspend(smmu->dev); 82 + if (pm_runtime_enabled(smmu->dev)) { 83 + pm_runtime_mark_last_busy(smmu->dev); 84 + __pm_runtime_put_autosuspend(smmu->dev); 85 + 86 + } 84 87 } 85 88 86 89 static void arm_smmu_rpm_use_autosuspend(struct arm_smmu_device *smmu) ··· 1198 1195 /* Looks ok, so add the device to the domain */ 1199 1196 arm_smmu_master_install_s2crs(cfg, S2CR_TYPE_TRANS, 1200 1197 smmu_domain->cfg.cbndx, fwspec); 1201 - arm_smmu_rpm_use_autosuspend(smmu); 1202 1198 rpm_put: 1203 1199 arm_smmu_rpm_put(smmu); 1204 1200 return ret; ··· 1220 1218 return ret; 1221 1219 1222 1220 arm_smmu_master_install_s2crs(cfg, type, 0, fwspec); 1223 - arm_smmu_rpm_use_autosuspend(smmu); 1224 1221 arm_smmu_rpm_put(smmu); 1225 1222 return 0; 1226 1223 } ··· 1487 1486 out_cfg_free: 1488 1487 kfree(cfg); 1489 1488 out_free: 1490 - iommu_fwspec_free(dev); 1491 1489 return ERR_PTR(ret); 1492 1490 } 1493 1491 ··· 2246 2246 if (dev->pm_domain) { 2247 2247 pm_runtime_set_active(dev); 2248 2248 pm_runtime_enable(dev); 2249 + arm_smmu_rpm_use_autosuspend(smmu); 2249 2250 } 2250 2251 2251 2252 return 0;
+25 -57
drivers/iommu/dma-iommu.c
··· 24 24 #include <linux/memremap.h> 25 25 #include <linux/mm.h> 26 26 #include <linux/mutex.h> 27 + #include <linux/msi.h> 27 28 #include <linux/of_iommu.h> 28 29 #include <linux/pci.h> 29 30 #include <linux/scatterlist.h> ··· 87 86 struct iommu_domain *fq_domain; 88 87 /* Options for dma-iommu use */ 89 88 struct iommu_dma_options options; 90 - struct mutex mutex; 91 89 }; 92 90 93 91 static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled); ··· 101 101 return ret; 102 102 } 103 103 early_param("iommu.forcedac", iommu_dma_forcedac_setup); 104 + 105 + static int iommu_dma_sw_msi(struct iommu_domain *domain, struct msi_desc *desc, 106 + phys_addr_t msi_addr); 104 107 105 108 /* Number of entries per flush queue */ 106 109 #define IOVA_DEFAULT_FQ_SIZE 256 ··· 400 397 if (!domain->iova_cookie) 401 398 return -ENOMEM; 402 399 403 - mutex_init(&domain->iova_cookie->mutex); 400 + iommu_domain_set_sw_msi(domain, iommu_dma_sw_msi); 404 401 return 0; 405 402 } 406 403 ··· 432 429 433 430 cookie->msi_iova = base; 434 431 domain->iova_cookie = cookie; 432 + iommu_domain_set_sw_msi(domain, iommu_dma_sw_msi); 435 433 return 0; 436 434 } 437 435 EXPORT_SYMBOL(iommu_get_msi_cookie); ··· 446 442 { 447 443 struct iommu_dma_cookie *cookie = domain->iova_cookie; 448 444 struct iommu_dma_msi_page *msi, *tmp; 445 + 446 + #if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU) 447 + if (domain->sw_msi != iommu_dma_sw_msi) 448 + return; 449 + #endif 449 450 450 451 if (!cookie) 451 452 return; ··· 707 698 domain->geometry.aperture_start >> order); 708 699 709 700 /* start_pfn is always nonzero for an already-initialised domain */ 710 - mutex_lock(&cookie->mutex); 711 701 if (iovad->start_pfn) { 712 702 if (1UL << order != iovad->granule || 713 703 base_pfn != iovad->start_pfn) { 714 704 pr_warn("Incompatible range for DMA domain\n"); 715 - ret = -EFAULT; 716 - goto done_unlock; 705 + return -EFAULT; 717 706 } 718 707 719 - ret = 0; 720 - goto done_unlock; 708 + return 0; 721 709 } 722 710 723 711 init_iova_domain(iovad, 1UL << order, base_pfn); 724 712 ret = iova_domain_init_rcaches(iovad); 725 713 if (ret) 726 - goto done_unlock; 714 + return ret; 727 715 728 716 iommu_dma_init_options(&cookie->options, dev); 729 717 ··· 729 723 (!device_iommu_capable(dev, IOMMU_CAP_DEFERRED_FLUSH) || iommu_dma_init_fq(domain))) 730 724 domain->type = IOMMU_DOMAIN_DMA; 731 725 732 - ret = iova_reserve_iommu_regions(dev, domain); 733 - 734 - done_unlock: 735 - mutex_unlock(&cookie->mutex); 736 - return ret; 726 + return iova_reserve_iommu_regions(dev, domain); 737 727 } 738 728 739 729 /** ··· 1802 1800 return NULL; 1803 1801 } 1804 1802 1805 - /** 1806 - * iommu_dma_prepare_msi() - Map the MSI page in the IOMMU domain 1807 - * @desc: MSI descriptor, will store the MSI page 1808 - * @msi_addr: MSI target address to be mapped 1809 - * 1810 - * Return: 0 on success or negative error code if the mapping failed. 1811 - */ 1812 - int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) 1803 + static int iommu_dma_sw_msi(struct iommu_domain *domain, struct msi_desc *desc, 1804 + phys_addr_t msi_addr) 1813 1805 { 1814 1806 struct device *dev = msi_desc_to_dev(desc); 1815 - struct iommu_domain *domain = iommu_get_domain_for_dev(dev); 1816 - struct iommu_dma_msi_page *msi_page; 1817 - static DEFINE_MUTEX(msi_prepare_lock); /* see below */ 1807 + const struct iommu_dma_msi_page *msi_page; 1818 1808 1819 - if (!domain || !domain->iova_cookie) { 1820 - desc->iommu_cookie = NULL; 1809 + if (!domain->iova_cookie) { 1810 + msi_desc_set_iommu_msi_iova(desc, 0, 0); 1821 1811 return 0; 1822 1812 } 1823 1813 1824 - /* 1825 - * In fact the whole prepare operation should already be serialised by 1826 - * irq_domain_mutex further up the callchain, but that's pretty subtle 1827 - * on its own, so consider this locking as failsafe documentation... 1828 - */ 1829 - mutex_lock(&msi_prepare_lock); 1814 + iommu_group_mutex_assert(dev); 1830 1815 msi_page = iommu_dma_get_msi_page(dev, msi_addr, domain); 1831 - mutex_unlock(&msi_prepare_lock); 1832 - 1833 - msi_desc_set_iommu_cookie(desc, msi_page); 1834 - 1835 1816 if (!msi_page) 1836 1817 return -ENOMEM; 1818 + 1819 + msi_desc_set_iommu_msi_iova( 1820 + desc, msi_page->iova, 1821 + ilog2(cookie_msi_granule(domain->iova_cookie))); 1837 1822 return 0; 1838 - } 1839 - 1840 - /** 1841 - * iommu_dma_compose_msi_msg() - Apply translation to an MSI message 1842 - * @desc: MSI descriptor prepared by iommu_dma_prepare_msi() 1843 - * @msg: MSI message containing target physical address 1844 - */ 1845 - void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg) 1846 - { 1847 - struct device *dev = msi_desc_to_dev(desc); 1848 - const struct iommu_domain *domain = iommu_get_domain_for_dev(dev); 1849 - const struct iommu_dma_msi_page *msi_page; 1850 - 1851 - msi_page = msi_desc_get_iommu_cookie(desc); 1852 - 1853 - if (!domain || !domain->iova_cookie || WARN_ON(!msi_page)) 1854 - return; 1855 - 1856 - msg->address_hi = upper_32_bits(msi_page->iova); 1857 - msg->address_lo &= cookie_msi_granule(domain->iova_cookie) - 1; 1858 - msg->address_lo += lower_32_bits(msi_page->iova); 1859 1823 } 1860 1824 1861 1825 static int iommu_dma_init(void)
+86 -153
drivers/iommu/intel/iommu.c
··· 737 737 return NULL; 738 738 739 739 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE); 740 - pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE; 740 + pteval = virt_to_phys(tmp_page) | DMA_PTE_READ | 741 + DMA_PTE_WRITE; 741 742 if (domain->use_first_level) 742 743 pteval |= DMA_FL_PTE_US | DMA_FL_PTE_ACCESS; 743 744 ··· 1173 1172 return true; 1174 1173 } 1175 1174 1176 - static void iommu_enable_pci_caps(struct device_domain_info *info) 1175 + static void iommu_enable_pci_ats(struct device_domain_info *info) 1177 1176 { 1178 1177 struct pci_dev *pdev; 1179 1178 1180 - if (!dev_is_pci(info->dev)) 1179 + if (!info->ats_supported) 1181 1180 return; 1182 1181 1183 1182 pdev = to_pci_dev(info->dev); 1184 - if (info->ats_supported && pci_ats_page_aligned(pdev) && 1185 - !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) 1183 + if (!pci_ats_page_aligned(pdev)) 1184 + return; 1185 + 1186 + if (!pci_enable_ats(pdev, VTD_PAGE_SHIFT)) 1186 1187 info->ats_enabled = 1; 1187 1188 } 1188 1189 1189 - static void iommu_disable_pci_caps(struct device_domain_info *info) 1190 + static void iommu_disable_pci_ats(struct device_domain_info *info) 1191 + { 1192 + if (!info->ats_enabled) 1193 + return; 1194 + 1195 + pci_disable_ats(to_pci_dev(info->dev)); 1196 + info->ats_enabled = 0; 1197 + } 1198 + 1199 + static void iommu_enable_pci_pri(struct device_domain_info *info) 1190 1200 { 1191 1201 struct pci_dev *pdev; 1192 1202 1193 - if (!dev_is_pci(info->dev)) 1203 + if (!info->ats_enabled || !info->pri_supported) 1194 1204 return; 1195 1205 1196 1206 pdev = to_pci_dev(info->dev); 1207 + /* PASID is required in PRG Response Message. */ 1208 + if (info->pasid_enabled && !pci_prg_resp_pasid_required(pdev)) 1209 + return; 1197 1210 1198 - if (info->ats_enabled) { 1199 - pci_disable_ats(pdev); 1200 - info->ats_enabled = 0; 1201 - } 1211 + if (pci_reset_pri(pdev)) 1212 + return; 1213 + 1214 + if (!pci_enable_pri(pdev, PRQ_DEPTH)) 1215 + info->pri_enabled = 1; 1216 + } 1217 + 1218 + static void iommu_disable_pci_pri(struct device_domain_info *info) 1219 + { 1220 + if (!info->pri_enabled) 1221 + return; 1222 + 1223 + if (WARN_ON(info->iopf_refcount)) 1224 + iopf_queue_remove_device(info->iommu->iopf_queue, info->dev); 1225 + 1226 + pci_disable_pri(to_pci_dev(info->dev)); 1227 + info->pri_enabled = 0; 1202 1228 } 1203 1229 1204 1230 static void intel_flush_iotlb_all(struct iommu_domain *domain) ··· 1584 1556 struct device_domain_info *info = dev_iommu_priv_get(dev); 1585 1557 struct intel_iommu *iommu = info->iommu; 1586 1558 u8 bus = info->bus, devfn = info->devfn; 1559 + int ret; 1587 1560 1588 1561 if (!dev_is_pci(dev)) 1589 1562 return domain_context_mapping_one(domain, iommu, bus, devfn); 1590 1563 1591 - return pci_for_each_dma_alias(to_pci_dev(dev), 1592 - domain_context_mapping_cb, domain); 1564 + ret = pci_for_each_dma_alias(to_pci_dev(dev), 1565 + domain_context_mapping_cb, domain); 1566 + if (ret) 1567 + return ret; 1568 + 1569 + iommu_enable_pci_ats(info); 1570 + 1571 + return 0; 1593 1572 } 1594 1573 1595 1574 /* Return largest possible superpage level for a given mapping */ ··· 1783 1748 context_clear_entry(context); 1784 1749 __iommu_flush_cache(iommu, context, sizeof(*context)); 1785 1750 spin_unlock(&iommu->lock); 1786 - intel_context_flush_present(info, context, did, true); 1751 + intel_context_flush_no_pasid(info, context, did); 1787 1752 } 1788 1753 1789 1754 int __domain_setup_first_level(struct intel_iommu *iommu, ··· 1877 1842 1878 1843 if (ret) 1879 1844 goto out_block_translation; 1880 - 1881 - iommu_enable_pci_caps(info); 1882 1845 1883 1846 ret = cache_tag_assign_domain(domain, dev, IOMMU_NO_PASID); 1884 1847 if (ret) ··· 2904 2871 if (no_iommu || dmar_disabled) 2905 2872 return; 2906 2873 2907 - down_write(&dmar_global_lock); 2874 + /* 2875 + * All other CPUs were brought down, hotplug interrupts were disabled, 2876 + * no lock and RCU checking needed anymore 2877 + */ 2878 + list_for_each_entry(drhd, &dmar_drhd_units, list) { 2879 + iommu = drhd->iommu; 2908 2880 2909 - /* Disable PMRs explicitly here. */ 2910 - for_each_iommu(iommu, drhd) 2881 + /* Disable PMRs explicitly here. */ 2911 2882 iommu_disable_protect_mem_regions(iommu); 2912 2883 2913 - /* Make sure the IOMMUs are switched off */ 2914 - intel_disable_iommus(); 2915 - 2916 - up_write(&dmar_global_lock); 2884 + /* Make sure the IOMMUs are switched off */ 2885 + iommu_disable_translation(iommu); 2886 + } 2917 2887 } 2918 2888 2919 2889 static struct intel_iommu *dev_to_intel_iommu(struct device *dev) ··· 3049 3013 if (dev->bus != &acpi_bus_type) 3050 3014 continue; 3051 3015 3016 + up_read(&dmar_global_lock); 3052 3017 adev = to_acpi_device(dev); 3053 3018 mutex_lock(&adev->physical_node_lock); 3054 3019 list_for_each_entry(pn, ··· 3059 3022 break; 3060 3023 } 3061 3024 mutex_unlock(&adev->physical_node_lock); 3025 + down_read(&dmar_global_lock); 3062 3026 3063 3027 if (ret) 3064 3028 return ret; ··· 3243 3205 3244 3206 pci_for_each_dma_alias(to_pci_dev(info->dev), 3245 3207 &domain_context_clear_one_cb, info); 3208 + iommu_disable_pci_ats(info); 3246 3209 } 3247 3210 3248 3211 /* ··· 3260 3221 if (info->domain) 3261 3222 cache_tag_unassign_domain(info->domain, dev, IOMMU_NO_PASID); 3262 3223 3263 - iommu_disable_pci_caps(info); 3264 3224 if (!dev_is_real_dma_subdevice(dev)) { 3265 3225 if (sm_supported(iommu)) 3266 3226 intel_pasid_tear_down_entry(iommu, dev, ··· 3794 3756 !pci_enable_pasid(pdev, info->pasid_supported & ~1)) 3795 3757 info->pasid_enabled = 1; 3796 3758 3759 + if (sm_supported(iommu)) 3760 + iommu_enable_pci_ats(info); 3761 + iommu_enable_pci_pri(info); 3762 + 3797 3763 return &iommu->iommu; 3798 3764 free_table: 3799 3765 intel_pasid_free_table(dev); ··· 3813 3771 { 3814 3772 struct device_domain_info *info = dev_iommu_priv_get(dev); 3815 3773 struct intel_iommu *iommu = info->iommu; 3774 + 3775 + iommu_disable_pci_pri(info); 3776 + iommu_disable_pci_ats(info); 3816 3777 3817 3778 if (info->pasid_enabled) { 3818 3779 pci_disable_pasid(to_pci_dev(dev)); ··· 3903 3858 return generic_device_group(dev); 3904 3859 } 3905 3860 3906 - static int intel_iommu_enable_sva(struct device *dev) 3861 + int intel_iommu_enable_iopf(struct device *dev) 3907 3862 { 3908 3863 struct device_domain_info *info = dev_iommu_priv_get(dev); 3909 - struct intel_iommu *iommu; 3910 - 3911 - if (!info || dmar_disabled) 3912 - return -EINVAL; 3913 - 3914 - iommu = info->iommu; 3915 - if (!iommu) 3916 - return -EINVAL; 3917 - 3918 - if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE)) 3919 - return -ENODEV; 3920 - 3921 - if (!info->pasid_enabled || !info->ats_enabled) 3922 - return -EINVAL; 3923 - 3924 - /* 3925 - * Devices having device-specific I/O fault handling should not 3926 - * support PCI/PRI. The IOMMU side has no means to check the 3927 - * capability of device-specific IOPF. Therefore, IOMMU can only 3928 - * default that if the device driver enables SVA on a non-PRI 3929 - * device, it will handle IOPF in its own way. 3930 - */ 3931 - if (!info->pri_supported) 3932 - return 0; 3933 - 3934 - /* Devices supporting PRI should have it enabled. */ 3935 - if (!info->pri_enabled) 3936 - return -EINVAL; 3937 - 3938 - return 0; 3939 - } 3940 - 3941 - static int context_flip_pri(struct device_domain_info *info, bool enable) 3942 - { 3943 3864 struct intel_iommu *iommu = info->iommu; 3944 - u8 bus = info->bus, devfn = info->devfn; 3945 - struct context_entry *context; 3946 - u16 did; 3947 - 3948 - spin_lock(&iommu->lock); 3949 - if (context_copied(iommu, bus, devfn)) { 3950 - spin_unlock(&iommu->lock); 3951 - return -EINVAL; 3952 - } 3953 - 3954 - context = iommu_context_addr(iommu, bus, devfn, false); 3955 - if (!context || !context_present(context)) { 3956 - spin_unlock(&iommu->lock); 3957 - return -ENODEV; 3958 - } 3959 - did = context_domain_id(context); 3960 - 3961 - if (enable) 3962 - context_set_sm_pre(context); 3963 - else 3964 - context_clear_sm_pre(context); 3965 - 3966 - if (!ecap_coherent(iommu->ecap)) 3967 - clflush_cache_range(context, sizeof(*context)); 3968 - intel_context_flush_present(info, context, did, true); 3969 - spin_unlock(&iommu->lock); 3970 - 3971 - return 0; 3972 - } 3973 - 3974 - static int intel_iommu_enable_iopf(struct device *dev) 3975 - { 3976 - struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL; 3977 - struct device_domain_info *info = dev_iommu_priv_get(dev); 3978 - struct intel_iommu *iommu; 3979 3865 int ret; 3980 3866 3981 - if (!pdev || !info || !info->ats_enabled || !info->pri_supported) 3867 + if (!info->pri_enabled) 3982 3868 return -ENODEV; 3983 3869 3984 - if (info->pri_enabled) 3985 - return -EBUSY; 3986 - 3987 - iommu = info->iommu; 3988 - if (!iommu) 3989 - return -EINVAL; 3990 - 3991 - /* PASID is required in PRG Response Message. */ 3992 - if (info->pasid_enabled && !pci_prg_resp_pasid_required(pdev)) 3993 - return -EINVAL; 3994 - 3995 - ret = pci_reset_pri(pdev); 3996 - if (ret) 3997 - return ret; 3870 + if (info->iopf_refcount) { 3871 + info->iopf_refcount++; 3872 + return 0; 3873 + } 3998 3874 3999 3875 ret = iopf_queue_add_device(iommu->iopf_queue, dev); 4000 3876 if (ret) 4001 3877 return ret; 4002 3878 4003 - ret = context_flip_pri(info, true); 4004 - if (ret) 4005 - goto err_remove_device; 4006 - 4007 - ret = pci_enable_pri(pdev, PRQ_DEPTH); 4008 - if (ret) 4009 - goto err_clear_pri; 4010 - 4011 - info->pri_enabled = 1; 3879 + info->iopf_refcount = 1; 4012 3880 4013 3881 return 0; 4014 - err_clear_pri: 4015 - context_flip_pri(info, false); 4016 - err_remove_device: 4017 - iopf_queue_remove_device(iommu->iopf_queue, dev); 4018 - 4019 - return ret; 4020 3882 } 4021 3883 4022 - static int intel_iommu_disable_iopf(struct device *dev) 3884 + void intel_iommu_disable_iopf(struct device *dev) 4023 3885 { 4024 3886 struct device_domain_info *info = dev_iommu_priv_get(dev); 4025 3887 struct intel_iommu *iommu = info->iommu; 4026 3888 4027 - if (!info->pri_enabled) 4028 - return -EINVAL; 3889 + if (WARN_ON(!info->pri_enabled || !info->iopf_refcount)) 3890 + return; 4029 3891 4030 - /* Disable new PRI reception: */ 4031 - context_flip_pri(info, false); 3892 + if (--info->iopf_refcount) 3893 + return; 4032 3894 4033 - /* 4034 - * Remove device from fault queue and acknowledge all outstanding 4035 - * PRQs to the device: 4036 - */ 4037 3895 iopf_queue_remove_device(iommu->iopf_queue, dev); 4038 - 4039 - /* 4040 - * PCIe spec states that by clearing PRI enable bit, the Page 4041 - * Request Interface will not issue new page requests, but has 4042 - * outstanding page requests that have been transmitted or are 4043 - * queued for transmission. This is supposed to be called after 4044 - * the device driver has stopped DMA, all PASIDs have been 4045 - * unbound and the outstanding PRQs have been drained. 4046 - */ 4047 - pci_disable_pri(to_pci_dev(dev)); 4048 - info->pri_enabled = 0; 4049 - 4050 - return 0; 4051 3896 } 4052 3897 4053 3898 static int ··· 3948 4013 return intel_iommu_enable_iopf(dev); 3949 4014 3950 4015 case IOMMU_DEV_FEAT_SVA: 3951 - return intel_iommu_enable_sva(dev); 4016 + return 0; 3952 4017 3953 4018 default: 3954 4019 return -ENODEV; ··· 3960 4025 { 3961 4026 switch (feat) { 3962 4027 case IOMMU_DEV_FEAT_IOPF: 3963 - return intel_iommu_disable_iopf(dev); 4028 + intel_iommu_disable_iopf(dev); 4029 + return 0; 3964 4030 3965 4031 case IOMMU_DEV_FEAT_SVA: 3966 4032 return 0; ··· 4346 4410 if (dev_is_real_dma_subdevice(dev)) 4347 4411 return 0; 4348 4412 4349 - if (sm_supported(iommu)) { 4413 + if (sm_supported(iommu)) 4350 4414 ret = intel_pasid_setup_pass_through(iommu, dev, IOMMU_NO_PASID); 4351 - if (!ret) 4352 - iommu_enable_pci_caps(info); 4353 - } else { 4415 + else 4354 4416 ret = device_setup_pass_through(dev); 4355 - } 4356 4417 4357 4418 return ret; 4358 4419 }
+6 -22
drivers/iommu/intel/iommu.h
··· 774 774 u8 ats_enabled:1; 775 775 u8 dtlb_extra_inval:1; /* Quirk for devices need extra flush */ 776 776 u8 ats_qdep; 777 + unsigned int iopf_refcount; 777 778 struct device *dev; /* it's NULL for PCIe-to-PCI bridge */ 778 779 struct intel_iommu *iommu; /* IOMMU used by this device */ 779 780 struct dmar_domain *domain; /* pointer to domain */ ··· 952 951 static inline unsigned long lvl_to_nr_pages(unsigned int lvl) 953 952 { 954 953 return 1UL << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH); 955 - } 956 - 957 - /* VT-d pages must always be _smaller_ than MM pages. Otherwise things 958 - are never going to work. */ 959 - static inline unsigned long mm_to_dma_pfn_start(unsigned long mm_pfn) 960 - { 961 - return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT); 962 - } 963 - static inline unsigned long mm_to_dma_pfn_end(unsigned long mm_pfn) 964 - { 965 - return ((mm_pfn + 1) << (PAGE_SHIFT - VTD_PAGE_SHIFT)) - 1; 966 - } 967 - static inline unsigned long page_to_dma_pfn(struct page *pg) 968 - { 969 - return mm_to_dma_pfn_start(page_to_pfn(pg)); 970 - } 971 - static inline unsigned long virt_to_dma_pfn(void *p) 972 - { 973 - return page_to_dma_pfn(virt_to_page(p)); 974 954 } 975 955 976 956 static inline void context_set_present(struct context_entry *context) ··· 1286 1304 void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start, 1287 1305 unsigned long end); 1288 1306 1289 - void intel_context_flush_present(struct device_domain_info *info, 1290 - struct context_entry *context, 1291 - u16 did, bool affect_domains); 1307 + void intel_context_flush_no_pasid(struct device_domain_info *info, 1308 + struct context_entry *context, u16 did); 1292 1309 1293 1310 int intel_iommu_enable_prq(struct intel_iommu *iommu); 1294 1311 int intel_iommu_finish_prq(struct intel_iommu *iommu); 1295 1312 void intel_iommu_page_response(struct device *dev, struct iopf_fault *evt, 1296 1313 struct iommu_page_response *msg); 1297 1314 void intel_iommu_drain_pasid_prq(struct device *dev, u32 pasid); 1315 + 1316 + int intel_iommu_enable_iopf(struct device *dev); 1317 + void intel_iommu_disable_iopf(struct device *dev); 1298 1318 1299 1319 #ifdef CONFIG_INTEL_IOMMU_SVM 1300 1320 void intel_svm_check(struct intel_iommu *iommu);
+27 -15
drivers/iommu/intel/irq_remapping.c
··· 25 25 #include "../irq_remapping.h" 26 26 #include "../iommu-pages.h" 27 27 28 - enum irq_mode { 29 - IRQ_REMAPPING, 30 - IRQ_POSTING, 31 - }; 32 - 33 28 struct ioapic_scope { 34 29 struct intel_iommu *iommu; 35 30 unsigned int id; ··· 44 49 u16 irte_index; 45 50 u16 sub_handle; 46 51 u8 irte_mask; 47 - enum irq_mode mode; 48 52 bool posted_msi; 53 + bool posted_vcpu; 49 54 }; 50 55 51 56 struct intel_ir_data { ··· 133 138 irq_iommu->irte_index = index; 134 139 irq_iommu->sub_handle = 0; 135 140 irq_iommu->irte_mask = mask; 136 - irq_iommu->mode = IRQ_REMAPPING; 137 141 } 138 142 raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); 139 143 ··· 187 193 188 194 rc = qi_flush_iec(iommu, index, 0); 189 195 190 - /* Update iommu mode according to the IRTE mode */ 191 - irq_iommu->mode = irte->pst ? IRQ_POSTING : IRQ_REMAPPING; 192 196 raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); 193 197 194 198 return rc; ··· 1161 1169 static inline void intel_ir_reconfigure_irte_posted(struct irq_data *irqd) {} 1162 1170 #endif 1163 1171 1164 - static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force) 1172 + static void __intel_ir_reconfigure_irte(struct irq_data *irqd, bool force_host) 1173 + { 1174 + struct intel_ir_data *ir_data = irqd->chip_data; 1175 + 1176 + /* 1177 + * Don't modify IRTEs for IRQs that are being posted to vCPUs if the 1178 + * host CPU affinity changes. 1179 + */ 1180 + if (ir_data->irq_2_iommu.posted_vcpu && !force_host) 1181 + return; 1182 + 1183 + ir_data->irq_2_iommu.posted_vcpu = false; 1184 + 1185 + if (ir_data->irq_2_iommu.posted_msi) 1186 + intel_ir_reconfigure_irte_posted(irqd); 1187 + else 1188 + modify_irte(&ir_data->irq_2_iommu, &ir_data->irte_entry); 1189 + } 1190 + 1191 + static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force_host) 1165 1192 { 1166 1193 struct intel_ir_data *ir_data = irqd->chip_data; 1167 1194 struct irte *irte = &ir_data->irte_entry; ··· 1193 1182 irte->vector = cfg->vector; 1194 1183 irte->dest_id = IRTE_DEST(cfg->dest_apicid); 1195 1184 1196 - if (ir_data->irq_2_iommu.posted_msi) 1197 - intel_ir_reconfigure_irte_posted(irqd); 1198 - else if (force || ir_data->irq_2_iommu.mode == IRQ_REMAPPING) 1199 - modify_irte(&ir_data->irq_2_iommu, irte); 1185 + __intel_ir_reconfigure_irte(irqd, force_host); 1200 1186 } 1201 1187 1202 1188 /* ··· 1248 1240 1249 1241 /* stop posting interrupts, back to the default mode */ 1250 1242 if (!vcpu_pi_info) { 1251 - modify_irte(&ir_data->irq_2_iommu, &ir_data->irte_entry); 1243 + __intel_ir_reconfigure_irte(data, true); 1252 1244 } else { 1253 1245 struct irte irte_pi; 1254 1246 ··· 1271 1263 irte_pi.pda_h = (vcpu_pi_info->pi_desc_addr >> 32) & 1272 1264 ~(-1UL << PDA_HIGH_BIT); 1273 1265 1266 + ir_data->irq_2_iommu.posted_vcpu = true; 1274 1267 modify_irte(&ir_data->irq_2_iommu, &irte_pi); 1275 1268 } 1276 1269 ··· 1497 1488 { 1498 1489 struct intel_ir_data *data = irq_data->chip_data; 1499 1490 struct irte entry; 1491 + 1492 + WARN_ON_ONCE(data->irq_2_iommu.posted_vcpu); 1493 + data->irq_2_iommu.posted_vcpu = false; 1500 1494 1501 1495 memset(&entry, 0, sizeof(entry)); 1502 1496 modify_irte(&data->irq_2_iommu, &entry);
+9 -34
drivers/iommu/intel/pasid.c
··· 932 932 context_clear_entry(context); 933 933 __iommu_flush_cache(iommu, context, sizeof(*context)); 934 934 spin_unlock(&iommu->lock); 935 - intel_context_flush_present(info, context, did, false); 935 + intel_context_flush_no_pasid(info, context, did); 936 936 } 937 937 938 938 static int pci_pasid_table_teardown(struct pci_dev *pdev, u16 alias, void *data) ··· 992 992 context_set_sm_dte(context); 993 993 if (info->pasid_supported) 994 994 context_set_pasid(context); 995 + if (info->pri_supported) 996 + context_set_sm_pre(context); 995 997 996 998 context_set_fault_enable(context); 997 999 context_set_present(context); ··· 1119 1117 1120 1118 /* 1121 1119 * Cache invalidations after change in a context table entry that was present 1122 - * according to the Spec 6.5.3.3 (Guidance to Software for Invalidations). If 1123 - * IOMMU is in scalable mode and all PASID table entries of the device were 1124 - * non-present, set flush_domains to false. Otherwise, true. 1120 + * according to the Spec 6.5.3.3 (Guidance to Software for Invalidations). 1121 + * This helper can only be used when IOMMU is working in the legacy mode or 1122 + * IOMMU is in scalable mode but all PASID table entries of the device are 1123 + * non-present. 1125 1124 */ 1126 - void intel_context_flush_present(struct device_domain_info *info, 1127 - struct context_entry *context, 1128 - u16 did, bool flush_domains) 1125 + void intel_context_flush_no_pasid(struct device_domain_info *info, 1126 + struct context_entry *context, u16 did) 1129 1127 { 1130 1128 struct intel_iommu *iommu = info->iommu; 1131 - struct pasid_entry *pte; 1132 - int i; 1133 1129 1134 1130 /* 1135 1131 * Device-selective context-cache invalidation. The Domain-ID field ··· 1148 1148 __context_flush_dev_iotlb(info); 1149 1149 1150 1150 return; 1151 - } 1152 - 1153 - /* 1154 - * For scalable mode: 1155 - * - Domain-selective PASID-cache invalidation to affected domains 1156 - * - Domain-selective IOTLB invalidation to affected domains 1157 - * - Global Device-TLB invalidation to affected functions 1158 - */ 1159 - if (flush_domains) { 1160 - /* 1161 - * If the IOMMU is running in scalable mode and there might 1162 - * be potential PASID translations, the caller should hold 1163 - * the lock to ensure that context changes and cache flushes 1164 - * are atomic. 1165 - */ 1166 - assert_spin_locked(&iommu->lock); 1167 - for (i = 0; i < info->pasid_table->max_pasid; i++) { 1168 - pte = intel_pasid_get_entry(info->dev, i); 1169 - if (!pte || !pasid_pte_is_present(pte)) 1170 - continue; 1171 - 1172 - did = pasid_get_domain_id(pte); 1173 - qi_flush_pasid_cache(iommu, did, QI_PC_ALL_PASIDS, 0); 1174 - iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); 1175 - } 1176 1151 } 1177 1152 1178 1153 __context_flush_dev_iotlb(info);
+1 -1
drivers/iommu/intel/prq.c
··· 67 67 u16 sid, did; 68 68 69 69 info = dev_iommu_priv_get(dev); 70 - if (!info->pri_enabled) 70 + if (!info->iopf_refcount) 71 71 return; 72 72 73 73 iommu = info->iommu;
+43
drivers/iommu/intel/svm.c
··· 110 110 .free_notifier = intel_mm_free_notifier, 111 111 }; 112 112 113 + static int intel_iommu_sva_supported(struct device *dev) 114 + { 115 + struct device_domain_info *info = dev_iommu_priv_get(dev); 116 + struct intel_iommu *iommu; 117 + 118 + if (!info || dmar_disabled) 119 + return -EINVAL; 120 + 121 + iommu = info->iommu; 122 + if (!iommu) 123 + return -EINVAL; 124 + 125 + if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE)) 126 + return -ENODEV; 127 + 128 + if (!info->pasid_enabled || !info->ats_enabled) 129 + return -EINVAL; 130 + 131 + /* 132 + * Devices having device-specific I/O fault handling should not 133 + * support PCI/PRI. The IOMMU side has no means to check the 134 + * capability of device-specific IOPF. Therefore, IOMMU can only 135 + * default that if the device driver enables SVA on a non-PRI 136 + * device, it will handle IOPF in its own way. 137 + */ 138 + if (!info->pri_supported) 139 + return 0; 140 + 141 + /* Devices supporting PRI should have it enabled. */ 142 + if (!info->pri_enabled) 143 + return -EINVAL; 144 + 145 + return 0; 146 + } 147 + 113 148 static int intel_svm_set_dev_pasid(struct iommu_domain *domain, 114 149 struct device *dev, ioasid_t pasid, 115 150 struct iommu_domain *old) ··· 155 120 struct dev_pasid_info *dev_pasid; 156 121 unsigned long sflags; 157 122 int ret = 0; 123 + 124 + ret = intel_iommu_sva_supported(dev); 125 + if (ret) 126 + return ret; 158 127 159 128 dev_pasid = domain_add_dev_pasid(domain, dev, pasid); 160 129 if (IS_ERR(dev_pasid)) ··· 199 160 { 200 161 struct dmar_domain *domain; 201 162 int ret; 163 + 164 + ret = intel_iommu_sva_supported(dev); 165 + if (ret) 166 + return ERR_PTR(ret); 202 167 203 168 domain = kzalloc(sizeof(*domain), GFP_KERNEL); 204 169 if (!domain)
+1 -1
drivers/iommu/io-pgtable-dart.c
··· 135 135 pte |= FIELD_PREP(APPLE_DART_PTE_SUBPAGE_START, 0); 136 136 pte |= FIELD_PREP(APPLE_DART_PTE_SUBPAGE_END, 0xfff); 137 137 138 - pte |= APPLE_DART1_PTE_PROT_SP_DIS; 139 138 pte |= APPLE_DART_PTE_VALID; 140 139 141 140 for (i = 0; i < num_entries; i++) ··· 210 211 dart_iopte pte = 0; 211 212 212 213 if (data->iop.fmt == APPLE_DART) { 214 + pte |= APPLE_DART1_PTE_PROT_SP_DIS; 213 215 if (!(prot & IOMMU_WRITE)) 214 216 pte |= APPLE_DART1_PTE_PROT_NO_WRITE; 215 217 if (!(prot & IOMMU_READ))
+3 -2
drivers/iommu/iommu-priv.h
··· 17 17 return dev->iommu->iommu_dev->ops; 18 18 } 19 19 20 + void dev_iommu_free(struct device *dev); 21 + 20 22 const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode); 21 23 22 24 static inline const struct iommu_ops *iommu_fwspec_ops(struct iommu_fwspec *fwspec) ··· 26 24 return iommu_ops_from_fwnode(fwspec ? fwspec->iommu_fwnode : NULL); 27 25 } 28 26 29 - int iommu_group_replace_domain(struct iommu_group *group, 30 - struct iommu_domain *new_domain); 27 + void iommu_fwspec_free(struct device *dev); 31 28 32 29 int iommu_device_register_bus(struct iommu_device *iommu, 33 30 const struct iommu_ops *ops,
+146 -74
drivers/iommu/iommu.c
··· 45 45 static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT); 46 46 static u32 iommu_cmd_line __read_mostly; 47 47 48 + /* Tags used with xa_tag_pointer() in group->pasid_array */ 49 + enum { IOMMU_PASID_ARRAY_DOMAIN = 0, IOMMU_PASID_ARRAY_HANDLE = 1 }; 50 + 48 51 struct iommu_group { 49 52 struct kobject kobj; 50 53 struct kobject *devices_kobj; ··· 355 352 return param; 356 353 } 357 354 358 - static void dev_iommu_free(struct device *dev) 355 + void dev_iommu_free(struct device *dev) 359 356 { 360 357 struct dev_iommu *param = dev->iommu; 361 358 ··· 407 404 * Init the dev->iommu and dev->iommu_group in the struct device and get the 408 405 * driver probed 409 406 */ 410 - static int iommu_init_device(struct device *dev, const struct iommu_ops *ops) 407 + static int iommu_init_device(struct device *dev) 411 408 { 409 + const struct iommu_ops *ops; 412 410 struct iommu_device *iommu_dev; 413 411 struct iommu_group *group; 414 412 int ret; 415 413 416 414 if (!dev_iommu_get(dev)) 417 415 return -ENOMEM; 416 + /* 417 + * For FDT-based systems and ACPI IORT/VIOT, the common firmware parsing 418 + * is buried in the bus dma_configure path. Properly unpicking that is 419 + * still a big job, so for now just invoke the whole thing. The device 420 + * already having a driver bound means dma_configure has already run and 421 + * either found no IOMMU to wait for, or we're in its replay call right 422 + * now, so either way there's no point calling it again. 423 + */ 424 + if (!dev->driver && dev->bus->dma_configure) { 425 + mutex_unlock(&iommu_probe_device_lock); 426 + dev->bus->dma_configure(dev); 427 + mutex_lock(&iommu_probe_device_lock); 428 + } 429 + /* 430 + * At this point, relevant devices either now have a fwspec which will 431 + * match ops registered with a non-NULL fwnode, or we can reasonably 432 + * assume that only one of Intel, AMD, s390, PAMU or legacy SMMUv2 can 433 + * be present, and that any of their registered instances has suitable 434 + * ops for probing, and thus cheekily co-opt the same mechanism. 435 + */ 436 + ops = iommu_fwspec_ops(dev->iommu->fwspec); 437 + if (!ops) { 438 + ret = -ENODEV; 439 + goto err_free; 440 + } 418 441 419 442 if (!try_module_get(ops->owner)) { 420 443 ret = -EINVAL; ··· 543 514 544 515 static int __iommu_probe_device(struct device *dev, struct list_head *group_list) 545 516 { 546 - const struct iommu_ops *ops; 547 517 struct iommu_group *group; 548 518 struct group_device *gdev; 549 519 int ret; 550 520 551 - /* 552 - * For FDT-based systems and ACPI IORT/VIOT, drivers register IOMMU 553 - * instances with non-NULL fwnodes, and client devices should have been 554 - * identified with a fwspec by this point. Otherwise, we can currently 555 - * assume that only one of Intel, AMD, s390, PAMU or legacy SMMUv2 can 556 - * be present, and that any of their registered instances has suitable 557 - * ops for probing, and thus cheekily co-opt the same mechanism. 558 - */ 559 - ops = iommu_fwspec_ops(dev_iommu_fwspec_get(dev)); 560 - if (!ops) 561 - return -ENODEV; 562 521 /* 563 522 * Serialise to avoid races between IOMMU drivers registering in 564 523 * parallel and/or the "replay" calls from ACPI/OF code via client ··· 560 543 if (dev->iommu_group) 561 544 return 0; 562 545 563 - ret = iommu_init_device(dev, ops); 546 + ret = iommu_init_device(dev); 564 547 if (ret) 565 548 return ret; 549 + /* 550 + * And if we do now see any replay calls, they would indicate someone 551 + * misusing the dma_configure path outside bus code. 552 + */ 553 + if (dev->driver) 554 + dev_WARN(dev, "late IOMMU probe at driver bind, something fishy here!\n"); 566 555 567 556 group = dev->iommu_group; 568 557 gdev = iommu_group_alloc_device(group, dev); ··· 2170 2147 return dev->iommu_group->default_domain; 2171 2148 } 2172 2149 2150 + static void *iommu_make_pasid_array_entry(struct iommu_domain *domain, 2151 + struct iommu_attach_handle *handle) 2152 + { 2153 + if (handle) { 2154 + handle->domain = domain; 2155 + return xa_tag_pointer(handle, IOMMU_PASID_ARRAY_HANDLE); 2156 + } 2157 + 2158 + return xa_tag_pointer(domain, IOMMU_PASID_ARRAY_DOMAIN); 2159 + } 2160 + 2173 2161 static int __iommu_attach_group(struct iommu_domain *domain, 2174 2162 struct iommu_group *group) 2175 2163 { ··· 2220 2186 return ret; 2221 2187 } 2222 2188 EXPORT_SYMBOL_GPL(iommu_attach_group); 2223 - 2224 - /** 2225 - * iommu_group_replace_domain - replace the domain that a group is attached to 2226 - * @group: IOMMU group that will be attached to the new domain 2227 - * @new_domain: new IOMMU domain to replace with 2228 - * 2229 - * This API allows the group to switch domains without being forced to go to 2230 - * the blocking domain in-between. 2231 - * 2232 - * If the currently attached domain is a core domain (e.g. a default_domain), 2233 - * it will act just like the iommu_attach_group(). 2234 - */ 2235 - int iommu_group_replace_domain(struct iommu_group *group, 2236 - struct iommu_domain *new_domain) 2237 - { 2238 - int ret; 2239 - 2240 - if (!new_domain) 2241 - return -EINVAL; 2242 - 2243 - mutex_lock(&group->mutex); 2244 - ret = __iommu_group_set_domain(group, new_domain); 2245 - mutex_unlock(&group->mutex); 2246 - return ret; 2247 - } 2248 - EXPORT_SYMBOL_NS_GPL(iommu_group_replace_domain, "IOMMUFD_INTERNAL"); 2249 2189 2250 2190 static int __iommu_device_set_domain(struct iommu_group *group, 2251 2191 struct device *dev, ··· 2857 2849 dev_iommu_fwspec_set(dev, NULL); 2858 2850 } 2859 2851 } 2860 - EXPORT_SYMBOL_GPL(iommu_fwspec_free); 2861 2852 2862 2853 int iommu_fwspec_add_ids(struct device *dev, const u32 *ids, int num_ids) 2863 2854 { ··· 3104 3097 return 0; 3105 3098 3106 3099 mutex_lock(&group->mutex); 3100 + /* We may race against bus_iommu_probe() finalising groups here */ 3101 + if (!group->default_domain) { 3102 + ret = -EPROBE_DEFER; 3103 + goto unlock_out; 3104 + } 3107 3105 if (group->owner_cnt) { 3108 3106 if (group->domain != group->default_domain || group->owner || 3109 3107 !xa_empty(&group->pasid_array)) { ··· 3386 3374 struct iommu_group *group = dev->iommu_group; 3387 3375 struct group_device *device; 3388 3376 const struct iommu_ops *ops; 3377 + void *entry; 3389 3378 int ret; 3390 3379 3391 3380 if (!group) ··· 3410 3397 } 3411 3398 } 3412 3399 3413 - if (handle) 3414 - handle->domain = domain; 3400 + entry = iommu_make_pasid_array_entry(domain, handle); 3415 3401 3416 - ret = xa_insert(&group->pasid_array, pasid, handle, GFP_KERNEL); 3402 + /* 3403 + * Entry present is a failure case. Use xa_insert() instead of 3404 + * xa_reserve(). 3405 + */ 3406 + ret = xa_insert(&group->pasid_array, pasid, XA_ZERO_ENTRY, GFP_KERNEL); 3417 3407 if (ret) 3418 3408 goto out_unlock; 3419 3409 3420 3410 ret = __iommu_set_group_pasid(domain, group, pasid); 3421 - if (ret) 3422 - xa_erase(&group->pasid_array, pasid); 3411 + if (ret) { 3412 + xa_release(&group->pasid_array, pasid); 3413 + goto out_unlock; 3414 + } 3415 + 3416 + /* 3417 + * The xa_insert() above reserved the memory, and the group->mutex is 3418 + * held, this cannot fail. The new domain cannot be visible until the 3419 + * operation succeeds as we cannot tolerate PRIs becoming concurrently 3420 + * queued and then failing attach. 3421 + */ 3422 + WARN_ON(xa_is_err(xa_store(&group->pasid_array, 3423 + pasid, entry, GFP_KERNEL))); 3424 + 3423 3425 out_unlock: 3424 3426 mutex_unlock(&group->mutex); 3425 3427 return ret; ··· 3508 3480 iommu_attach_handle_get(struct iommu_group *group, ioasid_t pasid, unsigned int type) 3509 3481 { 3510 3482 struct iommu_attach_handle *handle; 3483 + void *entry; 3511 3484 3512 3485 xa_lock(&group->pasid_array); 3513 - handle = xa_load(&group->pasid_array, pasid); 3514 - if (!handle) 3486 + entry = xa_load(&group->pasid_array, pasid); 3487 + if (!entry || xa_pointer_tag(entry) != IOMMU_PASID_ARRAY_HANDLE) { 3515 3488 handle = ERR_PTR(-ENOENT); 3516 - else if (type && handle->domain->type != type) 3517 - handle = ERR_PTR(-EBUSY); 3489 + } else { 3490 + handle = xa_untag_pointer(entry); 3491 + if (type && handle->domain->type != type) 3492 + handle = ERR_PTR(-EBUSY); 3493 + } 3518 3494 xa_unlock(&group->pasid_array); 3519 3495 3520 3496 return handle; ··· 3541 3509 struct iommu_group *group, 3542 3510 struct iommu_attach_handle *handle) 3543 3511 { 3512 + void *entry; 3544 3513 int ret; 3545 3514 3546 - if (handle) 3547 - handle->domain = domain; 3515 + if (!handle) 3516 + return -EINVAL; 3548 3517 3549 3518 mutex_lock(&group->mutex); 3550 - ret = xa_insert(&group->pasid_array, IOMMU_NO_PASID, handle, GFP_KERNEL); 3519 + entry = iommu_make_pasid_array_entry(domain, handle); 3520 + ret = xa_insert(&group->pasid_array, 3521 + IOMMU_NO_PASID, XA_ZERO_ENTRY, GFP_KERNEL); 3551 3522 if (ret) 3552 - goto err_unlock; 3523 + goto out_unlock; 3553 3524 3554 3525 ret = __iommu_attach_group(domain, group); 3555 - if (ret) 3556 - goto err_erase; 3557 - mutex_unlock(&group->mutex); 3526 + if (ret) { 3527 + xa_release(&group->pasid_array, IOMMU_NO_PASID); 3528 + goto out_unlock; 3529 + } 3558 3530 3559 - return 0; 3560 - err_erase: 3561 - xa_erase(&group->pasid_array, IOMMU_NO_PASID); 3562 - err_unlock: 3531 + /* 3532 + * The xa_insert() above reserved the memory, and the group->mutex is 3533 + * held, this cannot fail. The new domain cannot be visible until the 3534 + * operation succeeds as we cannot tolerate PRIs becoming concurrently 3535 + * queued and then failing attach. 3536 + */ 3537 + WARN_ON(xa_is_err(xa_store(&group->pasid_array, 3538 + IOMMU_NO_PASID, entry, GFP_KERNEL))); 3539 + 3540 + out_unlock: 3563 3541 mutex_unlock(&group->mutex); 3564 3542 return ret; 3565 3543 } ··· 3599 3557 * @new_domain: new IOMMU domain to replace with 3600 3558 * @handle: attach handle 3601 3559 * 3602 - * This is a variant of iommu_group_replace_domain(). It allows the caller to 3603 - * provide an attach handle for the new domain and use it when the domain is 3604 - * attached. 3560 + * This API allows the group to switch domains without being forced to go to 3561 + * the blocking domain in-between. It allows the caller to provide an attach 3562 + * handle for the new domain and use it when the domain is attached. 3563 + * 3564 + * If the currently attached domain is a core domain (e.g. a default_domain), 3565 + * it will act just like the iommu_attach_group_handle(). 3605 3566 */ 3606 3567 int iommu_replace_group_handle(struct iommu_group *group, 3607 3568 struct iommu_domain *new_domain, 3608 3569 struct iommu_attach_handle *handle) 3609 3570 { 3610 - void *curr; 3571 + void *curr, *entry; 3611 3572 int ret; 3612 3573 3613 - if (!new_domain) 3574 + if (!new_domain || !handle) 3614 3575 return -EINVAL; 3615 3576 3616 3577 mutex_lock(&group->mutex); 3617 - if (handle) { 3618 - ret = xa_reserve(&group->pasid_array, IOMMU_NO_PASID, GFP_KERNEL); 3619 - if (ret) 3620 - goto err_unlock; 3621 - handle->domain = new_domain; 3622 - } 3578 + entry = iommu_make_pasid_array_entry(new_domain, handle); 3579 + ret = xa_reserve(&group->pasid_array, IOMMU_NO_PASID, GFP_KERNEL); 3580 + if (ret) 3581 + goto err_unlock; 3623 3582 3624 3583 ret = __iommu_group_set_domain(group, new_domain); 3625 3584 if (ret) 3626 3585 goto err_release; 3627 3586 3628 - curr = xa_store(&group->pasid_array, IOMMU_NO_PASID, handle, GFP_KERNEL); 3587 + curr = xa_store(&group->pasid_array, IOMMU_NO_PASID, entry, GFP_KERNEL); 3629 3588 WARN_ON(xa_is_err(curr)); 3630 3589 3631 3590 mutex_unlock(&group->mutex); ··· 3639 3596 return ret; 3640 3597 } 3641 3598 EXPORT_SYMBOL_NS_GPL(iommu_replace_group_handle, "IOMMUFD_INTERNAL"); 3599 + 3600 + #if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU) 3601 + /** 3602 + * iommu_dma_prepare_msi() - Map the MSI page in the IOMMU domain 3603 + * @desc: MSI descriptor, will store the MSI page 3604 + * @msi_addr: MSI target address to be mapped 3605 + * 3606 + * The implementation of sw_msi() should take msi_addr and map it to 3607 + * an IOVA in the domain and call msi_desc_set_iommu_msi_iova() with the 3608 + * mapping information. 3609 + * 3610 + * Return: 0 on success or negative error code if the mapping failed. 3611 + */ 3612 + int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) 3613 + { 3614 + struct device *dev = msi_desc_to_dev(desc); 3615 + struct iommu_group *group = dev->iommu_group; 3616 + int ret = 0; 3617 + 3618 + if (!group) 3619 + return 0; 3620 + 3621 + mutex_lock(&group->mutex); 3622 + if (group->domain && group->domain->sw_msi) 3623 + ret = group->domain->sw_msi(group->domain, desc, msi_addr); 3624 + mutex_unlock(&group->mutex); 3625 + return ret; 3626 + } 3627 + #endif /* CONFIG_IRQ_MSI_IOMMU */
+245 -23
drivers/iommu/iommufd/device.c
··· 5 5 #include <linux/iommufd.h> 6 6 #include <linux/slab.h> 7 7 #include <uapi/linux/iommufd.h> 8 + #include <linux/msi.h> 8 9 9 10 #include "../iommu-priv.h" 10 11 #include "io_pagetable.h" ··· 294 293 } 295 294 EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, "IOMMUFD"); 296 295 297 - static int iommufd_group_setup_msi(struct iommufd_group *igroup, 298 - struct iommufd_hwpt_paging *hwpt_paging) 296 + /* 297 + * Get a iommufd_sw_msi_map for the msi physical address requested by the irq 298 + * layer. The mapping to IOVA is global to the iommufd file descriptor, every 299 + * domain that is attached to a device using the same MSI parameters will use 300 + * the same IOVA. 301 + */ 302 + static __maybe_unused struct iommufd_sw_msi_map * 303 + iommufd_sw_msi_get_map(struct iommufd_ctx *ictx, phys_addr_t msi_addr, 304 + phys_addr_t sw_msi_start) 299 305 { 300 - phys_addr_t sw_msi_start = igroup->sw_msi_start; 306 + struct iommufd_sw_msi_map *cur; 307 + unsigned int max_pgoff = 0; 308 + 309 + lockdep_assert_held(&ictx->sw_msi_lock); 310 + 311 + list_for_each_entry(cur, &ictx->sw_msi_list, sw_msi_item) { 312 + if (cur->sw_msi_start != sw_msi_start) 313 + continue; 314 + max_pgoff = max(max_pgoff, cur->pgoff + 1); 315 + if (cur->msi_addr == msi_addr) 316 + return cur; 317 + } 318 + 319 + if (ictx->sw_msi_id >= 320 + BITS_PER_BYTE * sizeof_field(struct iommufd_sw_msi_maps, bitmap)) 321 + return ERR_PTR(-EOVERFLOW); 322 + 323 + cur = kzalloc(sizeof(*cur), GFP_KERNEL); 324 + if (!cur) 325 + return ERR_PTR(-ENOMEM); 326 + 327 + cur->sw_msi_start = sw_msi_start; 328 + cur->msi_addr = msi_addr; 329 + cur->pgoff = max_pgoff; 330 + cur->id = ictx->sw_msi_id++; 331 + list_add_tail(&cur->sw_msi_item, &ictx->sw_msi_list); 332 + return cur; 333 + } 334 + 335 + static int iommufd_sw_msi_install(struct iommufd_ctx *ictx, 336 + struct iommufd_hwpt_paging *hwpt_paging, 337 + struct iommufd_sw_msi_map *msi_map) 338 + { 339 + unsigned long iova; 340 + 341 + lockdep_assert_held(&ictx->sw_msi_lock); 342 + 343 + iova = msi_map->sw_msi_start + msi_map->pgoff * PAGE_SIZE; 344 + if (!test_bit(msi_map->id, hwpt_paging->present_sw_msi.bitmap)) { 345 + int rc; 346 + 347 + rc = iommu_map(hwpt_paging->common.domain, iova, 348 + msi_map->msi_addr, PAGE_SIZE, 349 + IOMMU_WRITE | IOMMU_READ | IOMMU_MMIO, 350 + GFP_KERNEL_ACCOUNT); 351 + if (rc) 352 + return rc; 353 + __set_bit(msi_map->id, hwpt_paging->present_sw_msi.bitmap); 354 + } 355 + return 0; 356 + } 357 + 358 + /* 359 + * Called by the irq code if the platform translates the MSI address through the 360 + * IOMMU. msi_addr is the physical address of the MSI page. iommufd will 361 + * allocate a fd global iova for the physical page that is the same on all 362 + * domains and devices. 363 + */ 364 + #ifdef CONFIG_IRQ_MSI_IOMMU 365 + int iommufd_sw_msi(struct iommu_domain *domain, struct msi_desc *desc, 366 + phys_addr_t msi_addr) 367 + { 368 + struct device *dev = msi_desc_to_dev(desc); 369 + struct iommufd_hwpt_paging *hwpt_paging; 370 + struct iommu_attach_handle *raw_handle; 371 + struct iommufd_attach_handle *handle; 372 + struct iommufd_sw_msi_map *msi_map; 373 + struct iommufd_ctx *ictx; 374 + unsigned long iova; 301 375 int rc; 302 376 303 377 /* 304 - * If the IOMMU driver gives a IOMMU_RESV_SW_MSI then it is asking us to 305 - * call iommu_get_msi_cookie() on its behalf. This is necessary to setup 306 - * the MSI window so iommu_dma_prepare_msi() can install pages into our 307 - * domain after request_irq(). If it is not done interrupts will not 308 - * work on this domain. 309 - * 310 - * FIXME: This is conceptually broken for iommufd since we want to allow 311 - * userspace to change the domains, eg switch from an identity IOAS to a 312 - * DMA IOAS. There is currently no way to create a MSI window that 313 - * matches what the IRQ layer actually expects in a newly created 314 - * domain. 378 + * It is safe to call iommu_attach_handle_get() here because the iommu 379 + * core code invokes this under the group mutex which also prevents any 380 + * change of the attach handle for the duration of this function. 315 381 */ 316 - if (sw_msi_start != PHYS_ADDR_MAX && !hwpt_paging->msi_cookie) { 317 - rc = iommu_get_msi_cookie(hwpt_paging->common.domain, 318 - sw_msi_start); 382 + iommu_group_mutex_assert(dev); 383 + 384 + raw_handle = 385 + iommu_attach_handle_get(dev->iommu_group, IOMMU_NO_PASID, 0); 386 + if (IS_ERR(raw_handle)) 387 + return 0; 388 + hwpt_paging = find_hwpt_paging(domain->iommufd_hwpt); 389 + 390 + handle = to_iommufd_handle(raw_handle); 391 + /* No IOMMU_RESV_SW_MSI means no change to the msi_msg */ 392 + if (handle->idev->igroup->sw_msi_start == PHYS_ADDR_MAX) 393 + return 0; 394 + 395 + ictx = handle->idev->ictx; 396 + guard(mutex)(&ictx->sw_msi_lock); 397 + /* 398 + * The input msi_addr is the exact byte offset of the MSI doorbell, we 399 + * assume the caller has checked that it is contained with a MMIO region 400 + * that is secure to map at PAGE_SIZE. 401 + */ 402 + msi_map = iommufd_sw_msi_get_map(handle->idev->ictx, 403 + msi_addr & PAGE_MASK, 404 + handle->idev->igroup->sw_msi_start); 405 + if (IS_ERR(msi_map)) 406 + return PTR_ERR(msi_map); 407 + 408 + rc = iommufd_sw_msi_install(ictx, hwpt_paging, msi_map); 409 + if (rc) 410 + return rc; 411 + __set_bit(msi_map->id, handle->idev->igroup->required_sw_msi.bitmap); 412 + 413 + iova = msi_map->sw_msi_start + msi_map->pgoff * PAGE_SIZE; 414 + msi_desc_set_iommu_msi_iova(desc, iova, PAGE_SHIFT); 415 + return 0; 416 + } 417 + #endif 418 + 419 + static int iommufd_group_setup_msi(struct iommufd_group *igroup, 420 + struct iommufd_hwpt_paging *hwpt_paging) 421 + { 422 + struct iommufd_ctx *ictx = igroup->ictx; 423 + struct iommufd_sw_msi_map *cur; 424 + 425 + if (igroup->sw_msi_start == PHYS_ADDR_MAX) 426 + return 0; 427 + 428 + /* 429 + * Install all the MSI pages the device has been using into the domain 430 + */ 431 + guard(mutex)(&ictx->sw_msi_lock); 432 + list_for_each_entry(cur, &ictx->sw_msi_list, sw_msi_item) { 433 + int rc; 434 + 435 + if (cur->sw_msi_start != igroup->sw_msi_start || 436 + !test_bit(cur->id, igroup->required_sw_msi.bitmap)) 437 + continue; 438 + 439 + rc = iommufd_sw_msi_install(ictx, hwpt_paging, cur); 319 440 if (rc) 320 441 return rc; 321 - 322 - /* 323 - * iommu_get_msi_cookie() can only be called once per domain, 324 - * it returns -EBUSY on later calls. 325 - */ 326 - hwpt_paging->msi_cookie = true; 327 442 } 328 443 return 0; 329 444 } ··· 467 350 } 468 351 } 469 352 return 0; 353 + } 354 + 355 + /* The device attach/detach/replace helpers for attach_handle */ 356 + 357 + static int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt, 358 + struct iommufd_device *idev) 359 + { 360 + struct iommufd_attach_handle *handle; 361 + int rc; 362 + 363 + lockdep_assert_held(&idev->igroup->lock); 364 + 365 + handle = kzalloc(sizeof(*handle), GFP_KERNEL); 366 + if (!handle) 367 + return -ENOMEM; 368 + 369 + if (hwpt->fault) { 370 + rc = iommufd_fault_iopf_enable(idev); 371 + if (rc) 372 + goto out_free_handle; 373 + } 374 + 375 + handle->idev = idev; 376 + rc = iommu_attach_group_handle(hwpt->domain, idev->igroup->group, 377 + &handle->handle); 378 + if (rc) 379 + goto out_disable_iopf; 380 + 381 + return 0; 382 + 383 + out_disable_iopf: 384 + if (hwpt->fault) 385 + iommufd_fault_iopf_disable(idev); 386 + out_free_handle: 387 + kfree(handle); 388 + return rc; 389 + } 390 + 391 + static struct iommufd_attach_handle * 392 + iommufd_device_get_attach_handle(struct iommufd_device *idev) 393 + { 394 + struct iommu_attach_handle *handle; 395 + 396 + lockdep_assert_held(&idev->igroup->lock); 397 + 398 + handle = 399 + iommu_attach_handle_get(idev->igroup->group, IOMMU_NO_PASID, 0); 400 + if (IS_ERR(handle)) 401 + return NULL; 402 + return to_iommufd_handle(handle); 403 + } 404 + 405 + static void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt, 406 + struct iommufd_device *idev) 407 + { 408 + struct iommufd_attach_handle *handle; 409 + 410 + handle = iommufd_device_get_attach_handle(idev); 411 + iommu_detach_group_handle(hwpt->domain, idev->igroup->group); 412 + if (hwpt->fault) { 413 + iommufd_auto_response_faults(hwpt, handle); 414 + iommufd_fault_iopf_disable(idev); 415 + } 416 + kfree(handle); 417 + } 418 + 419 + static int iommufd_hwpt_replace_device(struct iommufd_device *idev, 420 + struct iommufd_hw_pagetable *hwpt, 421 + struct iommufd_hw_pagetable *old) 422 + { 423 + struct iommufd_attach_handle *handle, *old_handle = 424 + iommufd_device_get_attach_handle(idev); 425 + int rc; 426 + 427 + handle = kzalloc(sizeof(*handle), GFP_KERNEL); 428 + if (!handle) 429 + return -ENOMEM; 430 + 431 + if (hwpt->fault && !old->fault) { 432 + rc = iommufd_fault_iopf_enable(idev); 433 + if (rc) 434 + goto out_free_handle; 435 + } 436 + 437 + handle->idev = idev; 438 + rc = iommu_replace_group_handle(idev->igroup->group, hwpt->domain, 439 + &handle->handle); 440 + if (rc) 441 + goto out_disable_iopf; 442 + 443 + if (old->fault) { 444 + iommufd_auto_response_faults(hwpt, old_handle); 445 + if (!hwpt->fault) 446 + iommufd_fault_iopf_disable(idev); 447 + } 448 + kfree(old_handle); 449 + 450 + return 0; 451 + 452 + out_disable_iopf: 453 + if (hwpt->fault && !old->fault) 454 + iommufd_fault_iopf_disable(idev); 455 + out_free_handle: 456 + kfree(handle); 457 + return rc; 470 458 } 471 459 472 460 int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
+5 -125
drivers/iommu/iommufd/fault.c
··· 17 17 #include "../iommu-priv.h" 18 18 #include "iommufd_private.h" 19 19 20 - static int iommufd_fault_iopf_enable(struct iommufd_device *idev) 20 + int iommufd_fault_iopf_enable(struct iommufd_device *idev) 21 21 { 22 22 struct device *dev = idev->dev; 23 23 int ret; ··· 50 50 return ret; 51 51 } 52 52 53 - static void iommufd_fault_iopf_disable(struct iommufd_device *idev) 53 + void iommufd_fault_iopf_disable(struct iommufd_device *idev) 54 54 { 55 55 mutex_lock(&idev->iopf_lock); 56 56 if (!WARN_ON(idev->iopf_enabled == 0)) { ··· 60 60 mutex_unlock(&idev->iopf_lock); 61 61 } 62 62 63 - static int __fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt, 64 - struct iommufd_device *idev) 65 - { 66 - struct iommufd_attach_handle *handle; 67 - int ret; 68 - 69 - handle = kzalloc(sizeof(*handle), GFP_KERNEL); 70 - if (!handle) 71 - return -ENOMEM; 72 - 73 - handle->idev = idev; 74 - ret = iommu_attach_group_handle(hwpt->domain, idev->igroup->group, 75 - &handle->handle); 76 - if (ret) 77 - kfree(handle); 78 - 79 - return ret; 80 - } 81 - 82 - int iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt, 83 - struct iommufd_device *idev) 84 - { 85 - int ret; 86 - 87 - if (!hwpt->fault) 88 - return -EINVAL; 89 - 90 - ret = iommufd_fault_iopf_enable(idev); 91 - if (ret) 92 - return ret; 93 - 94 - ret = __fault_domain_attach_dev(hwpt, idev); 95 - if (ret) 96 - iommufd_fault_iopf_disable(idev); 97 - 98 - return ret; 99 - } 100 - 101 - static void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt, 102 - struct iommufd_attach_handle *handle) 63 + void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt, 64 + struct iommufd_attach_handle *handle) 103 65 { 104 66 struct iommufd_fault *fault = hwpt->fault; 105 67 struct iopf_group *group, *next; ··· 95 133 iopf_free_group(group); 96 134 } 97 135 mutex_unlock(&fault->mutex); 98 - } 99 - 100 - static struct iommufd_attach_handle * 101 - iommufd_device_get_attach_handle(struct iommufd_device *idev) 102 - { 103 - struct iommu_attach_handle *handle; 104 - 105 - handle = iommu_attach_handle_get(idev->igroup->group, IOMMU_NO_PASID, 0); 106 - if (IS_ERR(handle)) 107 - return NULL; 108 - 109 - return to_iommufd_handle(handle); 110 - } 111 - 112 - void iommufd_fault_domain_detach_dev(struct iommufd_hw_pagetable *hwpt, 113 - struct iommufd_device *idev) 114 - { 115 - struct iommufd_attach_handle *handle; 116 - 117 - handle = iommufd_device_get_attach_handle(idev); 118 - iommu_detach_group_handle(hwpt->domain, idev->igroup->group); 119 - iommufd_auto_response_faults(hwpt, handle); 120 - iommufd_fault_iopf_disable(idev); 121 - kfree(handle); 122 - } 123 - 124 - static int __fault_domain_replace_dev(struct iommufd_device *idev, 125 - struct iommufd_hw_pagetable *hwpt, 126 - struct iommufd_hw_pagetable *old) 127 - { 128 - struct iommufd_attach_handle *handle, *curr = NULL; 129 - int ret; 130 - 131 - if (old->fault) 132 - curr = iommufd_device_get_attach_handle(idev); 133 - 134 - if (hwpt->fault) { 135 - handle = kzalloc(sizeof(*handle), GFP_KERNEL); 136 - if (!handle) 137 - return -ENOMEM; 138 - 139 - handle->idev = idev; 140 - ret = iommu_replace_group_handle(idev->igroup->group, 141 - hwpt->domain, &handle->handle); 142 - } else { 143 - ret = iommu_replace_group_handle(idev->igroup->group, 144 - hwpt->domain, NULL); 145 - } 146 - 147 - if (!ret && curr) { 148 - iommufd_auto_response_faults(old, curr); 149 - kfree(curr); 150 - } 151 - 152 - return ret; 153 - } 154 - 155 - int iommufd_fault_domain_replace_dev(struct iommufd_device *idev, 156 - struct iommufd_hw_pagetable *hwpt, 157 - struct iommufd_hw_pagetable *old) 158 - { 159 - bool iopf_off = !hwpt->fault && old->fault; 160 - bool iopf_on = hwpt->fault && !old->fault; 161 - int ret; 162 - 163 - if (iopf_on) { 164 - ret = iommufd_fault_iopf_enable(idev); 165 - if (ret) 166 - return ret; 167 - } 168 - 169 - ret = __fault_domain_replace_dev(idev, hwpt, old); 170 - if (ret) { 171 - if (iopf_on) 172 - iommufd_fault_iopf_disable(idev); 173 - return ret; 174 - } 175 - 176 - if (iopf_off) 177 - iommufd_fault_iopf_disable(idev); 178 - 179 - return 0; 180 136 } 181 137 182 138 void iommufd_fault_destroy(struct iommufd_object *obj) ··· 329 449 struct iommufd_hw_pagetable *hwpt; 330 450 struct iommufd_fault *fault; 331 451 332 - hwpt = group->attach_handle->domain->fault_data; 452 + hwpt = group->attach_handle->domain->iommufd_hwpt; 333 453 fault = hwpt->fault; 334 454 335 455 spin_lock(&fault->lock);
+4 -1
drivers/iommu/iommufd/hw_pagetable.c
··· 156 156 goto out_abort; 157 157 } 158 158 } 159 + iommu_domain_set_sw_msi(hwpt->domain, iommufd_sw_msi); 159 160 160 161 /* 161 162 * Set the coherency mode before we do iopt_table_add_domain() as some ··· 252 251 goto out_abort; 253 252 } 254 253 hwpt->domain->owner = ops; 254 + iommu_domain_set_sw_msi(hwpt->domain, iommufd_sw_msi); 255 255 256 256 if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) { 257 257 rc = -EINVAL; ··· 309 307 goto out_abort; 310 308 } 311 309 hwpt->domain->owner = viommu->iommu_dev->ops; 310 + iommu_domain_set_sw_msi(hwpt->domain, iommufd_sw_msi); 312 311 313 312 if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) { 314 313 rc = -EINVAL; ··· 409 406 } 410 407 hwpt->fault = fault; 411 408 hwpt->domain->iopf_handler = iommufd_fault_iopf_handler; 412 - hwpt->domain->fault_data = hwpt; 413 409 refcount_inc(&fault->obj.users); 414 410 iommufd_put_object(ucmd->ictx, &fault->obj); 415 411 } 412 + hwpt->domain->iommufd_hwpt = hwpt; 416 413 417 414 cmd->out_hwpt_id = hwpt->obj.id; 418 415 rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+26 -38
drivers/iommu/iommufd/iommufd_private.h
··· 19 19 struct iommu_option; 20 20 struct iommufd_device; 21 21 22 + struct iommufd_sw_msi_map { 23 + struct list_head sw_msi_item; 24 + phys_addr_t sw_msi_start; 25 + phys_addr_t msi_addr; 26 + unsigned int pgoff; 27 + unsigned int id; 28 + }; 29 + 30 + /* Bitmap of struct iommufd_sw_msi_map::id */ 31 + struct iommufd_sw_msi_maps { 32 + DECLARE_BITMAP(bitmap, 64); 33 + }; 34 + 35 + int iommufd_sw_msi(struct iommu_domain *domain, struct msi_desc *desc, 36 + phys_addr_t msi_addr); 37 + 22 38 struct iommufd_ctx { 23 39 struct file *file; 24 40 struct xarray objects; 25 41 struct xarray groups; 26 42 wait_queue_head_t destroy_wait; 27 43 struct rw_semaphore ioas_creation_lock; 44 + 45 + struct mutex sw_msi_lock; 46 + struct list_head sw_msi_list; 47 + unsigned int sw_msi_id; 28 48 29 49 u8 account_mode; 30 50 /* Compatibility with VFIO no iommu */ ··· 303 283 struct iommufd_ioas *ioas; 304 284 bool auto_domain : 1; 305 285 bool enforce_cache_coherency : 1; 306 - bool msi_cookie : 1; 307 286 bool nest_parent : 1; 308 287 /* Head at iommufd_ioas::hwpt_list */ 309 288 struct list_head hwpt_item; 289 + struct iommufd_sw_msi_maps present_sw_msi; 310 290 }; 311 291 312 292 struct iommufd_hwpt_nested { ··· 403 383 struct iommu_group *group; 404 384 struct iommufd_hw_pagetable *hwpt; 405 385 struct list_head device_list; 386 + struct iommufd_sw_msi_maps required_sw_msi; 406 387 phys_addr_t sw_msi_start; 407 388 }; 408 389 ··· 517 496 void iommufd_fault_destroy(struct iommufd_object *obj); 518 497 int iommufd_fault_iopf_handler(struct iopf_group *group); 519 498 520 - int iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt, 521 - struct iommufd_device *idev); 522 - void iommufd_fault_domain_detach_dev(struct iommufd_hw_pagetable *hwpt, 523 - struct iommufd_device *idev); 524 - int iommufd_fault_domain_replace_dev(struct iommufd_device *idev, 525 - struct iommufd_hw_pagetable *hwpt, 526 - struct iommufd_hw_pagetable *old); 527 - 528 - static inline int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt, 529 - struct iommufd_device *idev) 530 - { 531 - if (hwpt->fault) 532 - return iommufd_fault_domain_attach_dev(hwpt, idev); 533 - 534 - return iommu_attach_group(hwpt->domain, idev->igroup->group); 535 - } 536 - 537 - static inline void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt, 538 - struct iommufd_device *idev) 539 - { 540 - if (hwpt->fault) { 541 - iommufd_fault_domain_detach_dev(hwpt, idev); 542 - return; 543 - } 544 - 545 - iommu_detach_group(hwpt->domain, idev->igroup->group); 546 - } 547 - 548 - static inline int iommufd_hwpt_replace_device(struct iommufd_device *idev, 549 - struct iommufd_hw_pagetable *hwpt, 550 - struct iommufd_hw_pagetable *old) 551 - { 552 - if (old->fault || hwpt->fault) 553 - return iommufd_fault_domain_replace_dev(idev, hwpt, old); 554 - 555 - return iommu_group_replace_domain(idev->igroup->group, hwpt->domain); 556 - } 499 + int iommufd_fault_iopf_enable(struct iommufd_device *idev); 500 + void iommufd_fault_iopf_disable(struct iommufd_device *idev); 501 + void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt, 502 + struct iommufd_attach_handle *handle); 557 503 558 504 static inline struct iommufd_viommu * 559 505 iommufd_get_viommu(struct iommufd_ucmd *ucmd, u32 id)
+9
drivers/iommu/iommufd/main.c
··· 227 227 xa_init(&ictx->groups); 228 228 ictx->file = filp; 229 229 init_waitqueue_head(&ictx->destroy_wait); 230 + mutex_init(&ictx->sw_msi_lock); 231 + INIT_LIST_HEAD(&ictx->sw_msi_list); 230 232 filp->private_data = ictx; 231 233 return 0; 232 234 } ··· 236 234 static int iommufd_fops_release(struct inode *inode, struct file *filp) 237 235 { 238 236 struct iommufd_ctx *ictx = filp->private_data; 237 + struct iommufd_sw_msi_map *next; 238 + struct iommufd_sw_msi_map *cur; 239 239 struct iommufd_object *obj; 240 240 241 241 /* ··· 266 262 break; 267 263 } 268 264 WARN_ON(!xa_empty(&ictx->groups)); 265 + 266 + mutex_destroy(&ictx->sw_msi_lock); 267 + list_for_each_entry_safe(cur, next, &ictx->sw_msi_list, sw_msi_item) 268 + kfree(cur); 269 + 269 270 kfree(ictx); 270 271 return 0; 271 272 }
+14 -11
drivers/iommu/mtk_iommu_v1.c
··· 27 27 #include <linux/spinlock.h> 28 28 #include <linux/string_choices.h> 29 29 #include <asm/barrier.h> 30 - #include <asm/dma-iommu.h> 31 30 #include <dt-bindings/memory/mtk-memory-port.h> 32 31 #include <dt-bindings/memory/mt2701-larb-port.h> 33 32 #include <soc/mediatek/smi.h> 33 + 34 + #if defined(CONFIG_ARM) 35 + #include <asm/dma-iommu.h> 36 + #else 37 + #define arm_iommu_create_mapping(...) NULL 38 + #define arm_iommu_attach_device(...) -ENODEV 39 + struct dma_iommu_mapping { 40 + struct iommu_domain *domain; 41 + }; 42 + #endif 34 43 35 44 #define REG_MMU_PT_BASE_ADDR 0x000 36 45 ··· 455 446 456 447 static struct iommu_device *mtk_iommu_v1_probe_device(struct device *dev) 457 448 { 458 - struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 449 + struct iommu_fwspec *fwspec = NULL; 459 450 struct of_phandle_args iommu_spec; 460 451 struct mtk_iommu_v1_data *data; 461 452 int err, idx = 0, larbid, larbidx; 462 453 struct device_link *link; 463 454 struct device *larbdev; 464 - 465 - /* 466 - * In the deferred case, free the existed fwspec. 467 - * Always initialize the fwspec internally. 468 - */ 469 - if (fwspec) { 470 - iommu_fwspec_free(dev); 471 - fwspec = dev_iommu_fwspec_get(dev); 472 - } 473 455 474 456 while (!of_parse_phandle_with_args(dev->of_node, "iommus", 475 457 "#iommu-cells", ··· 475 475 fwspec = dev_iommu_fwspec_get(dev); 476 476 idx++; 477 477 } 478 + 479 + if (!fwspec) 480 + return ERR_PTR(-ENODEV); 478 481 479 482 data = dev_iommu_priv_get(dev); 480 483
+11 -2
drivers/iommu/of_iommu.c
··· 116 116 int of_iommu_configure(struct device *dev, struct device_node *master_np, 117 117 const u32 *id) 118 118 { 119 + bool dev_iommu_present; 119 120 int err; 120 121 121 122 if (!master_np) ··· 128 127 mutex_unlock(&iommu_probe_device_lock); 129 128 return 0; 130 129 } 130 + dev_iommu_present = dev->iommu; 131 131 132 132 /* 133 133 * We don't currently walk up the tree looking for a parent IOMMU. ··· 149 147 err = of_iommu_configure_device(master_np, dev, id); 150 148 } 151 149 152 - if (err) 150 + if (err && dev_iommu_present) 153 151 iommu_fwspec_free(dev); 152 + else if (err && dev->iommu) 153 + dev_iommu_free(dev); 154 154 mutex_unlock(&iommu_probe_device_lock); 155 155 156 - if (!err && dev->bus) 156 + /* 157 + * If we're not on the iommu_probe_device() path (as indicated by the 158 + * initial dev->iommu) then try to simulate it. This should no longer 159 + * happen unless of_dma_configure() is being misused outside bus code. 160 + */ 161 + if (!err && dev->bus && !dev_iommu_present) 157 162 err = iommu_probe_device(dev); 158 163 159 164 if (err && err != -EPROBE_DEFER)
+26 -35
drivers/iommu/rockchip-iommu.c
··· 88 88 dma_addr_t dt_dma; 89 89 spinlock_t iommus_lock; /* lock for iommus list */ 90 90 spinlock_t dt_lock; /* lock for modifying page directory table */ 91 + struct device *dma_dev; 91 92 92 93 struct iommu_domain domain; 93 94 }; ··· 124 123 struct rk_iommu *iommu; 125 124 }; 126 125 127 - static struct device *dma_dev; 128 126 static const struct rk_iommu_ops *rk_ops; 129 127 static struct iommu_domain rk_identity_domain; 130 128 ··· 132 132 { 133 133 size_t size = count * sizeof(u32); /* count of u32 entry */ 134 134 135 - dma_sync_single_for_device(dma_dev, dma, size, DMA_TO_DEVICE); 135 + dma_sync_single_for_device(dom->dma_dev, dma, size, DMA_TO_DEVICE); 136 136 } 137 137 138 138 static struct rk_iommu_domain *to_rk_domain(struct iommu_domain *dom) ··· 734 734 if (!page_table) 735 735 return ERR_PTR(-ENOMEM); 736 736 737 - pt_dma = dma_map_single(dma_dev, page_table, SPAGE_SIZE, DMA_TO_DEVICE); 738 - if (dma_mapping_error(dma_dev, pt_dma)) { 739 - dev_err(dma_dev, "DMA mapping error while allocating page table\n"); 737 + pt_dma = dma_map_single(rk_domain->dma_dev, page_table, SPAGE_SIZE, DMA_TO_DEVICE); 738 + if (dma_mapping_error(rk_domain->dma_dev, pt_dma)) { 739 + dev_err(rk_domain->dma_dev, "DMA mapping error while allocating page table\n"); 740 740 iommu_free_page(page_table); 741 741 return ERR_PTR(-ENOMEM); 742 742 } ··· 1051 1051 static struct iommu_domain *rk_iommu_domain_alloc_paging(struct device *dev) 1052 1052 { 1053 1053 struct rk_iommu_domain *rk_domain; 1054 - 1055 - if (!dma_dev) 1056 - return NULL; 1054 + struct rk_iommu *iommu; 1057 1055 1058 1056 rk_domain = kzalloc(sizeof(*rk_domain), GFP_KERNEL); 1059 1057 if (!rk_domain) ··· 1066 1068 if (!rk_domain->dt) 1067 1069 goto err_free_domain; 1068 1070 1069 - rk_domain->dt_dma = dma_map_single(dma_dev, rk_domain->dt, 1071 + iommu = rk_iommu_from_dev(dev); 1072 + rk_domain->dma_dev = iommu->dev; 1073 + rk_domain->dt_dma = dma_map_single(rk_domain->dma_dev, rk_domain->dt, 1070 1074 SPAGE_SIZE, DMA_TO_DEVICE); 1071 - if (dma_mapping_error(dma_dev, rk_domain->dt_dma)) { 1072 - dev_err(dma_dev, "DMA map error for DT\n"); 1075 + if (dma_mapping_error(rk_domain->dma_dev, rk_domain->dt_dma)) { 1076 + dev_err(rk_domain->dma_dev, "DMA map error for DT\n"); 1073 1077 goto err_free_dt; 1074 1078 } 1075 1079 ··· 1105 1105 if (rk_dte_is_pt_valid(dte)) { 1106 1106 phys_addr_t pt_phys = rk_ops->pt_address(dte); 1107 1107 u32 *page_table = phys_to_virt(pt_phys); 1108 - dma_unmap_single(dma_dev, pt_phys, 1108 + dma_unmap_single(rk_domain->dma_dev, pt_phys, 1109 1109 SPAGE_SIZE, DMA_TO_DEVICE); 1110 1110 iommu_free_page(page_table); 1111 1111 } 1112 1112 } 1113 1113 1114 - dma_unmap_single(dma_dev, rk_domain->dt_dma, 1114 + dma_unmap_single(rk_domain->dma_dev, rk_domain->dt_dma, 1115 1115 SPAGE_SIZE, DMA_TO_DEVICE); 1116 1116 iommu_free_page(rk_domain->dt); 1117 1117 ··· 1148 1148 struct platform_device *iommu_dev; 1149 1149 struct rk_iommudata *data; 1150 1150 1151 - data = devm_kzalloc(dma_dev, sizeof(*data), GFP_KERNEL); 1151 + iommu_dev = of_find_device_by_node(args->np); 1152 + 1153 + data = devm_kzalloc(&iommu_dev->dev, sizeof(*data), GFP_KERNEL); 1152 1154 if (!data) 1153 1155 return -ENOMEM; 1154 - 1155 - iommu_dev = of_find_device_by_node(args->np); 1156 1156 1157 1157 data->iommu = platform_get_drvdata(iommu_dev); 1158 1158 data->iommu->domain = &rk_identity_domain; ··· 1256 1256 if (err) 1257 1257 return err; 1258 1258 1259 - err = iommu_device_sysfs_add(&iommu->iommu, dev, NULL, dev_name(dev)); 1260 - if (err) 1261 - goto err_unprepare_clocks; 1262 - 1263 - err = iommu_device_register(&iommu->iommu, &rk_iommu_ops, dev); 1264 - if (err) 1265 - goto err_remove_sysfs; 1266 - 1267 - /* 1268 - * Use the first registered IOMMU device for domain to use with DMA 1269 - * API, since a domain might not physically correspond to a single 1270 - * IOMMU device.. 1271 - */ 1272 - if (!dma_dev) 1273 - dma_dev = &pdev->dev; 1274 - 1275 1259 pm_runtime_enable(dev); 1276 1260 1277 1261 for (i = 0; i < iommu->num_irq; i++) { ··· 1274 1290 1275 1291 dma_set_mask_and_coherent(dev, rk_ops->dma_bit_mask); 1276 1292 1293 + err = iommu_device_sysfs_add(&iommu->iommu, dev, NULL, dev_name(dev)); 1294 + if (err) 1295 + goto err_pm_disable; 1296 + 1297 + err = iommu_device_register(&iommu->iommu, &rk_iommu_ops, dev); 1298 + if (err) 1299 + goto err_remove_sysfs; 1300 + 1277 1301 return 0; 1278 - err_pm_disable: 1279 - pm_runtime_disable(dev); 1280 1302 err_remove_sysfs: 1281 1303 iommu_device_sysfs_remove(&iommu->iommu); 1282 - err_unprepare_clocks: 1304 + err_pm_disable: 1305 + pm_runtime_disable(dev); 1283 1306 clk_bulk_unprepare(iommu->num_clocks, iommu->clocks); 1284 1307 return err; 1285 1308 }
+113 -25
drivers/iommu/s390-iommu.c
··· 16 16 17 17 #include "dma-iommu.h" 18 18 19 - static const struct iommu_ops s390_iommu_ops; 19 + static const struct iommu_ops s390_iommu_ops, s390_iommu_rtr_ops; 20 20 21 21 static struct kmem_cache *dma_region_table_cache; 22 22 static struct kmem_cache *dma_page_table_cache; ··· 381 381 spin_unlock_irqrestore(&zdev->dom_lock, flags); 382 382 } 383 383 384 + static int s390_iommu_domain_reg_ioat(struct zpci_dev *zdev, 385 + struct iommu_domain *domain, u8 *status) 386 + { 387 + struct s390_domain *s390_domain; 388 + int rc = 0; 389 + u64 iota; 390 + 391 + switch (domain->type) { 392 + case IOMMU_DOMAIN_IDENTITY: 393 + rc = zpci_register_ioat(zdev, 0, zdev->start_dma, 394 + zdev->end_dma, 0, status); 395 + break; 396 + case IOMMU_DOMAIN_BLOCKED: 397 + /* Nothing to do in this case */ 398 + break; 399 + default: 400 + s390_domain = to_s390_domain(domain); 401 + iota = virt_to_phys(s390_domain->dma_table) | 402 + ZPCI_IOTA_RTTO_FLAG; 403 + rc = zpci_register_ioat(zdev, 0, zdev->start_dma, 404 + zdev->end_dma, iota, status); 405 + } 406 + 407 + return rc; 408 + } 409 + 410 + int zpci_iommu_register_ioat(struct zpci_dev *zdev, u8 *status) 411 + { 412 + unsigned long flags; 413 + int rc; 414 + 415 + spin_lock_irqsave(&zdev->dom_lock, flags); 416 + 417 + rc = s390_iommu_domain_reg_ioat(zdev, zdev->s390_domain, status); 418 + 419 + spin_unlock_irqrestore(&zdev->dom_lock, flags); 420 + 421 + return rc; 422 + } 423 + 384 424 static int blocking_domain_attach_device(struct iommu_domain *domain, 385 425 struct device *dev) 386 426 { ··· 432 392 return 0; 433 393 434 394 s390_domain = to_s390_domain(zdev->s390_domain); 435 - spin_lock_irqsave(&s390_domain->list_lock, flags); 436 - list_del_rcu(&zdev->iommu_list); 437 - spin_unlock_irqrestore(&s390_domain->list_lock, flags); 395 + if (zdev->dma_table) { 396 + spin_lock_irqsave(&s390_domain->list_lock, flags); 397 + list_del_rcu(&zdev->iommu_list); 398 + spin_unlock_irqrestore(&s390_domain->list_lock, flags); 399 + } 438 400 439 401 zpci_unregister_ioat(zdev, 0); 440 402 zdev->dma_table = NULL; ··· 464 422 blocking_domain_attach_device(&blocking_domain, dev); 465 423 466 424 /* If we fail now DMA remains blocked via blocking domain */ 467 - cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, 468 - virt_to_phys(s390_domain->dma_table), &status); 425 + cc = s390_iommu_domain_reg_ioat(zdev, domain, &status); 469 426 if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL) 470 427 return -EIO; 471 428 zdev->dma_table = s390_domain->dma_table; ··· 764 723 if (rc) 765 724 goto out_err; 766 725 767 - rc = iommu_device_register(&zdev->iommu_dev, &s390_iommu_ops, NULL); 726 + if (zdev->rtr_avail) { 727 + rc = iommu_device_register(&zdev->iommu_dev, 728 + &s390_iommu_rtr_ops, NULL); 729 + } else { 730 + rc = iommu_device_register(&zdev->iommu_dev, &s390_iommu_ops, 731 + NULL); 732 + } 768 733 if (rc) 769 734 goto out_sysfs; 770 735 ··· 834 787 } 835 788 subsys_initcall(s390_iommu_init); 836 789 790 + static int s390_attach_dev_identity(struct iommu_domain *domain, 791 + struct device *dev) 792 + { 793 + struct zpci_dev *zdev = to_zpci_dev(dev); 794 + u8 status; 795 + int cc; 796 + 797 + blocking_domain_attach_device(&blocking_domain, dev); 798 + 799 + /* If we fail now DMA remains blocked via blocking domain */ 800 + cc = s390_iommu_domain_reg_ioat(zdev, domain, &status); 801 + 802 + /* 803 + * If the device is undergoing error recovery the reset code 804 + * will re-establish the new domain. 805 + */ 806 + if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL) 807 + return -EIO; 808 + 809 + zdev_s390_domain_update(zdev, domain); 810 + 811 + return 0; 812 + } 813 + 814 + static const struct iommu_domain_ops s390_identity_ops = { 815 + .attach_dev = s390_attach_dev_identity, 816 + }; 817 + 818 + static struct iommu_domain s390_identity_domain = { 819 + .type = IOMMU_DOMAIN_IDENTITY, 820 + .ops = &s390_identity_ops, 821 + }; 822 + 837 823 static struct iommu_domain blocking_domain = { 838 824 .type = IOMMU_DOMAIN_BLOCKED, 839 825 .ops = &(const struct iommu_domain_ops) { ··· 874 794 } 875 795 }; 876 796 877 - static const struct iommu_ops s390_iommu_ops = { 878 - .blocked_domain = &blocking_domain, 879 - .release_domain = &blocking_domain, 880 - .capable = s390_iommu_capable, 881 - .domain_alloc_paging = s390_domain_alloc_paging, 882 - .probe_device = s390_iommu_probe_device, 883 - .device_group = generic_device_group, 884 - .pgsize_bitmap = SZ_4K, 885 - .get_resv_regions = s390_iommu_get_resv_regions, 886 - .default_domain_ops = &(const struct iommu_domain_ops) { 887 - .attach_dev = s390_iommu_attach_device, 888 - .map_pages = s390_iommu_map_pages, 889 - .unmap_pages = s390_iommu_unmap_pages, 890 - .flush_iotlb_all = s390_iommu_flush_iotlb_all, 891 - .iotlb_sync = s390_iommu_iotlb_sync, 892 - .iotlb_sync_map = s390_iommu_iotlb_sync_map, 893 - .iova_to_phys = s390_iommu_iova_to_phys, 894 - .free = s390_domain_free, 797 + #define S390_IOMMU_COMMON_OPS() \ 798 + .blocked_domain = &blocking_domain, \ 799 + .release_domain = &blocking_domain, \ 800 + .capable = s390_iommu_capable, \ 801 + .domain_alloc_paging = s390_domain_alloc_paging, \ 802 + .probe_device = s390_iommu_probe_device, \ 803 + .device_group = generic_device_group, \ 804 + .pgsize_bitmap = SZ_4K, \ 805 + .get_resv_regions = s390_iommu_get_resv_regions, \ 806 + .default_domain_ops = &(const struct iommu_domain_ops) { \ 807 + .attach_dev = s390_iommu_attach_device, \ 808 + .map_pages = s390_iommu_map_pages, \ 809 + .unmap_pages = s390_iommu_unmap_pages, \ 810 + .flush_iotlb_all = s390_iommu_flush_iotlb_all, \ 811 + .iotlb_sync = s390_iommu_iotlb_sync, \ 812 + .iotlb_sync_map = s390_iommu_iotlb_sync_map, \ 813 + .iova_to_phys = s390_iommu_iova_to_phys, \ 814 + .free = s390_domain_free, \ 895 815 } 816 + 817 + static const struct iommu_ops s390_iommu_ops = { 818 + S390_IOMMU_COMMON_OPS() 819 + }; 820 + 821 + static const struct iommu_ops s390_iommu_rtr_ops = { 822 + .identity_domain = &s390_identity_domain, 823 + S390_IOMMU_COMMON_OPS() 896 824 };
-1
drivers/iommu/tegra-smmu.c
··· 846 846 err = ops->of_xlate(dev, args); 847 847 if (err < 0) { 848 848 dev_err(dev, "failed to parse SW group ID: %d\n", err); 849 - iommu_fwspec_free(dev); 850 849 return err; 851 850 } 852 851
+4
drivers/irqchip/Kconfig
··· 28 28 select ARM_GIC 29 29 select IRQ_MSI_LIB 30 30 select PCI_MSI 31 + select IRQ_MSI_IOMMU 31 32 32 33 config GIC_NON_BANKED 33 34 bool ··· 39 38 select PARTITION_PERCPU 40 39 select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP 41 40 select HAVE_ARM_SMCCC_DISCOVERY 41 + select IRQ_MSI_IOMMU 42 42 43 43 config ARM_GIC_V3_ITS 44 44 bool 45 45 select GENERIC_MSI_IRQ 46 46 select IRQ_MSI_LIB 47 47 default ARM_GIC_V3 48 + select IRQ_MSI_IOMMU 48 49 49 50 config ARM_GIC_V3_ITS_FSL_MC 50 51 bool ··· 411 408 412 409 config LS_SCFG_MSI 413 410 def_bool y if SOC_LS1021A || ARCH_LAYERSCAPE 411 + select IRQ_MSI_IOMMU 414 412 depends on PCI_MSI 415 413 416 414 config PARTITION_PERCPU
+1 -4
drivers/irqchip/irq-gic-v2m.c
··· 87 87 struct v2m_data *v2m = irq_data_get_irq_chip_data(data); 88 88 phys_addr_t addr = gicv2m_get_msi_addr(v2m, data->hwirq); 89 89 90 - msg->address_hi = upper_32_bits(addr); 91 - msg->address_lo = lower_32_bits(addr); 92 - 93 90 if (v2m->flags & GICV2M_GRAVITON_ADDRESS_ONLY) 94 91 msg->data = 0; 95 92 else ··· 94 97 if (v2m->flags & GICV2M_NEEDS_SPI_OFFSET) 95 98 msg->data -= v2m->spi_offset; 96 99 97 - iommu_dma_compose_msi_msg(irq_data_get_msi_desc(data), msg); 100 + msi_msg_set_addr(irq_data_get_msi_desc(data), msg, addr); 98 101 } 99 102 100 103 static struct irq_chip gicv2m_irq_chip = {
+3 -10
drivers/irqchip/irq-gic-v3-its.c
··· 1809 1809 static void its_irq_compose_msi_msg(struct irq_data *d, struct msi_msg *msg) 1810 1810 { 1811 1811 struct its_device *its_dev = irq_data_get_irq_chip_data(d); 1812 - struct its_node *its; 1813 - u64 addr; 1814 1812 1815 - its = its_dev->its; 1816 - addr = its->get_msi_base(its_dev); 1817 - 1818 - msg->address_lo = lower_32_bits(addr); 1819 - msg->address_hi = upper_32_bits(addr); 1820 - msg->data = its_get_event_id(d); 1821 - 1822 - iommu_dma_compose_msi_msg(irq_data_get_msi_desc(d), msg); 1813 + msg->data = its_get_event_id(d); 1814 + msi_msg_set_addr(irq_data_get_msi_desc(d), msg, 1815 + its_dev->its->get_msi_base(its_dev)); 1823 1816 } 1824 1817 1825 1818 static int its_irq_set_irqchip_state(struct irq_data *d,
+4 -8
drivers/irqchip/irq-gic-v3-mbi.c
··· 147 147 148 148 static void mbi_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) 149 149 { 150 - msg[0].address_hi = upper_32_bits(mbi_phys_base + GICD_SETSPI_NSR); 151 - msg[0].address_lo = lower_32_bits(mbi_phys_base + GICD_SETSPI_NSR); 152 150 msg[0].data = data->parent_data->hwirq; 153 - 154 - iommu_dma_compose_msi_msg(irq_data_get_msi_desc(data), msg); 151 + msi_msg_set_addr(irq_data_get_msi_desc(data), &msg[0], 152 + mbi_phys_base + GICD_SETSPI_NSR); 155 153 } 156 154 157 155 static void mbi_compose_mbi_msg(struct irq_data *data, struct msi_msg *msg) 158 156 { 159 157 mbi_compose_msi_msg(data, msg); 160 158 161 - msg[1].address_hi = upper_32_bits(mbi_phys_base + GICD_CLRSPI_NSR); 162 - msg[1].address_lo = lower_32_bits(mbi_phys_base + GICD_CLRSPI_NSR); 163 159 msg[1].data = data->parent_data->hwirq; 164 - 165 - iommu_dma_compose_msi_msg(irq_data_get_msi_desc(data), &msg[1]); 160 + msi_msg_set_addr(irq_data_get_msi_desc(data), &msg[1], 161 + mbi_phys_base + GICD_CLRSPI_NSR); 166 162 } 167 163 168 164 static bool mbi_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
+2 -3
drivers/irqchip/irq-ls-scfg-msi.c
··· 87 87 { 88 88 struct ls_scfg_msi *msi_data = irq_data_get_irq_chip_data(data); 89 89 90 - msg->address_hi = upper_32_bits(msi_data->msiir_addr); 91 - msg->address_lo = lower_32_bits(msi_data->msiir_addr); 92 90 msg->data = data->hwirq; 93 91 94 92 if (msi_affinity_flag) { ··· 96 98 msg->data |= cpumask_first(mask); 97 99 } 98 100 99 - iommu_dma_compose_msi_msg(irq_data_get_msi_desc(data), msg); 101 + msi_msg_set_addr(irq_data_get_msi_desc(data), msg, 102 + msi_data->msiir_addr); 100 103 } 101 104 102 105 static int ls_scfg_msi_set_affinity(struct irq_data *irq_data,
+6 -1
drivers/of/device.c
··· 99 99 bool coherent, set_map = false; 100 100 int ret; 101 101 102 + if (dev->dma_range_map) { 103 + dev_dbg(dev, "dma_range_map already set\n"); 104 + goto skip_map; 105 + } 106 + 102 107 if (np == dev->of_node) 103 108 bus_np = __of_get_dma_parent(np); 104 109 else ··· 124 119 end = dma_range_map_max(map); 125 120 set_map = true; 126 121 } 127 - 122 + skip_map: 128 123 /* 129 124 * If @dev is expected to be DMA-capable then the bus code that created 130 125 * it should have initialised its dma_mask pointer by this point. For
+2 -1
drivers/pci/pci-driver.c
··· 1653 1653 1654 1654 pci_put_host_bridge_device(bridge); 1655 1655 1656 - if (!ret && !driver->driver_managed_dma) { 1656 + /* @driver may not be valid when we're called from the IOMMU layer */ 1657 + if (!ret && dev->driver && !driver->driver_managed_dma) { 1657 1658 ret = iommu_device_use_default_domain(dev); 1658 1659 if (ret) 1659 1660 arch_teardown_dma_ops(dev);
+34 -27
include/linux/iommu.h
··· 44 44 struct iommu_fault_param; 45 45 struct iommufd_ctx; 46 46 struct iommufd_viommu; 47 + struct msi_desc; 48 + struct msi_msg; 47 49 48 50 #define IOMMU_FAULT_PERM_READ (1 << 0) /* read */ 49 51 #define IOMMU_FAULT_PERM_WRITE (1 << 1) /* write */ ··· 218 216 struct iommu_domain_geometry geometry; 219 217 struct iommu_dma_cookie *iova_cookie; 220 218 int (*iopf_handler)(struct iopf_group *group); 221 - void *fault_data; 222 - union { 219 + 220 + #if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU) 221 + int (*sw_msi)(struct iommu_domain *domain, struct msi_desc *desc, 222 + phys_addr_t msi_addr); 223 + #endif 224 + 225 + union { /* Pointer usable by owner of the domain */ 226 + struct iommufd_hw_pagetable *iommufd_hwpt; /* iommufd */ 227 + }; 228 + union { /* Fault handler */ 223 229 struct { 224 230 iommu_fault_handler_t handler; 225 231 void *handler_token; ··· 243 233 }; 244 234 }; 245 235 }; 236 + 237 + static inline void iommu_domain_set_sw_msi( 238 + struct iommu_domain *domain, 239 + int (*sw_msi)(struct iommu_domain *domain, struct msi_desc *desc, 240 + phys_addr_t msi_addr)) 241 + { 242 + #if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU) 243 + domain->sw_msi = sw_msi; 244 + #endif 245 + } 246 246 247 247 static inline bool iommu_is_dma_domain(struct iommu_domain *domain) 248 248 { ··· 1099 1079 }; 1100 1080 1101 1081 int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode); 1102 - void iommu_fwspec_free(struct device *dev); 1103 1082 int iommu_fwspec_add_ids(struct device *dev, const u32 *ids, int num_ids); 1104 1083 1105 1084 static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev) ··· 1409 1390 return -ENODEV; 1410 1391 } 1411 1392 1412 - static inline void iommu_fwspec_free(struct device *dev) 1413 - { 1414 - } 1415 - 1416 1393 static inline int iommu_fwspec_add_ids(struct device *dev, u32 *ids, 1417 1394 int num_ids) 1418 1395 { ··· 1485 1470 static inline void iommu_free_global_pasid(ioasid_t pasid) {} 1486 1471 #endif /* CONFIG_IOMMU_API */ 1487 1472 1473 + #ifdef CONFIG_IRQ_MSI_IOMMU 1474 + #ifdef CONFIG_IOMMU_API 1475 + int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr); 1476 + #else 1477 + static inline int iommu_dma_prepare_msi(struct msi_desc *desc, 1478 + phys_addr_t msi_addr) 1479 + { 1480 + return 0; 1481 + } 1482 + #endif /* CONFIG_IOMMU_API */ 1483 + #endif /* CONFIG_IRQ_MSI_IOMMU */ 1484 + 1488 1485 #if IS_ENABLED(CONFIG_LOCKDEP) && IS_ENABLED(CONFIG_IOMMU_API) 1489 1486 void iommu_group_mutex_assert(struct device *dev); 1490 1487 #else ··· 1530 1503 #endif 1531 1504 1532 1505 #ifdef CONFIG_IOMMU_DMA 1533 - #include <linux/msi.h> 1534 - 1535 1506 int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base); 1536 - 1537 - int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr); 1538 - void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg); 1539 - 1540 1507 #else /* CONFIG_IOMMU_DMA */ 1541 - 1542 - struct msi_desc; 1543 - struct msi_msg; 1544 - 1545 1508 static inline int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base) 1546 1509 { 1547 1510 return -ENODEV; 1548 1511 } 1549 - 1550 - static inline int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) 1551 - { 1552 - return 0; 1553 - } 1554 - 1555 - static inline void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg) 1556 - { 1557 - } 1558 - 1559 1512 #endif /* CONFIG_IOMMU_DMA */ 1560 1513 1561 1514 /*
+40 -21
include/linux/msi.h
··· 166 166 * @dev: Pointer to the device which uses this descriptor 167 167 * @msg: The last set MSI message cached for reuse 168 168 * @affinity: Optional pointer to a cpu affinity mask for this descriptor 169 + * @iommu_msi_iova: Optional shifted IOVA from the IOMMU to override the msi_addr. 170 + * Only used if iommu_msi_shift != 0 171 + * @iommu_msi_shift: Indicates how many bits of the original address should be 172 + * preserved when using iommu_msi_iova. 169 173 * @sysfs_attr: Pointer to sysfs device attribute 170 174 * 171 175 * @write_msi_msg: Callback that may be called when the MSI message ··· 188 184 struct msi_msg msg; 189 185 struct irq_affinity_desc *affinity; 190 186 #ifdef CONFIG_IRQ_MSI_IOMMU 191 - const void *iommu_cookie; 187 + u64 iommu_msi_iova : 58; 188 + u64 iommu_msi_shift : 6; 192 189 #endif 193 190 #ifdef CONFIG_SYSFS 194 191 struct device_attribute *sysfs_attrs; ··· 290 285 291 286 #define msi_desc_to_dev(desc) ((desc)->dev) 292 287 288 + static inline void msi_desc_set_iommu_msi_iova(struct msi_desc *desc, u64 msi_iova, 289 + unsigned int msi_shift) 290 + { 293 291 #ifdef CONFIG_IRQ_MSI_IOMMU 294 - static inline const void *msi_desc_get_iommu_cookie(struct msi_desc *desc) 295 - { 296 - return desc->iommu_cookie; 297 - } 298 - 299 - static inline void msi_desc_set_iommu_cookie(struct msi_desc *desc, 300 - const void *iommu_cookie) 301 - { 302 - desc->iommu_cookie = iommu_cookie; 303 - } 304 - #else 305 - static inline const void *msi_desc_get_iommu_cookie(struct msi_desc *desc) 306 - { 307 - return NULL; 308 - } 309 - 310 - static inline void msi_desc_set_iommu_cookie(struct msi_desc *desc, 311 - const void *iommu_cookie) 312 - { 313 - } 292 + desc->iommu_msi_iova = msi_iova >> msi_shift; 293 + desc->iommu_msi_shift = msi_shift; 314 294 #endif 295 + } 296 + 297 + /** 298 + * msi_msg_set_addr() - Set MSI address in an MSI message 299 + * 300 + * @desc: MSI descriptor that may carry an IOVA base address for MSI via @iommu_msi_iova/shift 301 + * @msg: Target MSI message to set its address_hi and address_lo 302 + * @msi_addr: Physical address to set the MSI message 303 + * 304 + * Notes: 305 + * - Override @msi_addr using the IOVA base address in the @desc if @iommu_msi_shift is set 306 + * - Otherwise, simply set @msi_addr to @msg 307 + */ 308 + static inline void msi_msg_set_addr(struct msi_desc *desc, struct msi_msg *msg, 309 + phys_addr_t msi_addr) 310 + { 311 + #ifdef CONFIG_IRQ_MSI_IOMMU 312 + if (desc->iommu_msi_shift) { 313 + u64 msi_iova = desc->iommu_msi_iova << desc->iommu_msi_shift; 314 + 315 + msg->address_hi = upper_32_bits(msi_iova); 316 + msg->address_lo = lower_32_bits(msi_iova) | 317 + (msi_addr & ((1 << desc->iommu_msi_shift) - 1)); 318 + return; 319 + } 320 + #endif 321 + msg->address_hi = upper_32_bits(msi_addr); 322 + msg->address_lo = lower_32_bits(msi_addr); 323 + } 315 324 316 325 int msi_domain_insert_msi_desc(struct device *dev, unsigned int domid, 317 326 struct msi_desc *init_desc);
+1
kernel/irq/Kconfig
··· 96 96 bool 97 97 select IRQ_DOMAIN_HIERARCHY 98 98 99 + # irqchip drivers should select this if they call iommu_dma_prepare_msi() 99 100 config IRQ_MSI_IOMMU 100 101 bool 101 102