Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branches 'arm/renesas', 'arm/smmu', 'x86/amd', 'x86/vt-d' and 'core' into next

+2389 -1305
+2
Documentation/devicetree/bindings/iommu/arm,smmu.yaml
··· 34 34 items: 35 35 - enum: 36 36 - qcom,sc7180-smmu-500 37 + - qcom,sc8180x-smmu-500 37 38 - qcom,sdm845-smmu-500 38 39 - qcom,sm8150-smmu-500 39 40 - qcom,sm8250-smmu-500 41 + - qcom,sm8350-smmu-500 40 42 - const: arm,mmu-500 41 43 - description: Qcom Adreno GPUs implementing "arm,smmu-v2" 42 44 items:
-105
Documentation/devicetree/bindings/iommu/mediatek,iommu.txt
··· 1 - * Mediatek IOMMU Architecture Implementation 2 - 3 - Some Mediatek SOCs contain a Multimedia Memory Management Unit (M4U), and 4 - this M4U have two generations of HW architecture. Generation one uses flat 5 - pagetable, and only supports 4K size page mapping. Generation two uses the 6 - ARM Short-Descriptor translation table format for address translation. 7 - 8 - About the M4U Hardware Block Diagram, please check below: 9 - 10 - EMI (External Memory Interface) 11 - | 12 - m4u (Multimedia Memory Management Unit) 13 - | 14 - +--------+ 15 - | | 16 - gals0-rx gals1-rx (Global Async Local Sync rx) 17 - | | 18 - | | 19 - gals0-tx gals1-tx (Global Async Local Sync tx) 20 - | | Some SoCs may have GALS. 21 - +--------+ 22 - | 23 - SMI Common(Smart Multimedia Interface Common) 24 - | 25 - +----------------+------- 26 - | | 27 - | gals-rx There may be GALS in some larbs. 28 - | | 29 - | | 30 - | gals-tx 31 - | | 32 - SMI larb0 SMI larb1 ... SoCs have several SMI local arbiter(larb). 33 - (display) (vdec) 34 - | | 35 - | | 36 - +-----+-----+ +----+----+ 37 - | | | | | | 38 - | | |... | | | ... There are different ports in each larb. 39 - | | | | | | 40 - OVL0 RDMA0 WDMA0 MC PP VLD 41 - 42 - As above, The Multimedia HW will go through SMI and M4U while it 43 - access EMI. SMI is a bridge between m4u and the Multimedia HW. It contain 44 - smi local arbiter and smi common. It will control whether the Multimedia 45 - HW should go though the m4u for translation or bypass it and talk 46 - directly with EMI. And also SMI help control the power domain and clocks for 47 - each local arbiter. 48 - Normally we specify a local arbiter(larb) for each multimedia HW 49 - like display, video decode, and camera. And there are different ports 50 - in each larb. Take a example, There are many ports like MC, PP, VLD in the 51 - video decode local arbiter, all these ports are according to the video HW. 52 - In some SoCs, there may be a GALS(Global Async Local Sync) module between 53 - smi-common and m4u, and additional GALS module between smi-larb and 54 - smi-common. GALS can been seen as a "asynchronous fifo" which could help 55 - synchronize for the modules in different clock frequency. 56 - 57 - Required properties: 58 - - compatible : must be one of the following string: 59 - "mediatek,mt2701-m4u" for mt2701 which uses generation one m4u HW. 60 - "mediatek,mt2712-m4u" for mt2712 which uses generation two m4u HW. 61 - "mediatek,mt6779-m4u" for mt6779 which uses generation two m4u HW. 62 - "mediatek,mt7623-m4u", "mediatek,mt2701-m4u" for mt7623 which uses 63 - generation one m4u HW. 64 - "mediatek,mt8167-m4u" for mt8167 which uses generation two m4u HW. 65 - "mediatek,mt8173-m4u" for mt8173 which uses generation two m4u HW. 66 - "mediatek,mt8183-m4u" for mt8183 which uses generation two m4u HW. 67 - - reg : m4u register base and size. 68 - - interrupts : the interrupt of m4u. 69 - - clocks : must contain one entry for each clock-names. 70 - - clock-names : Only 1 optional clock: 71 - - "bclk": the block clock of m4u. 72 - Here is the list which require this "bclk": 73 - - mt2701, mt2712, mt7623 and mt8173. 74 - Note that m4u use the EMI clock which always has been enabled before kernel 75 - if there is no this "bclk". 76 - - mediatek,larbs : List of phandle to the local arbiters in the current Socs. 77 - Refer to bindings/memory-controllers/mediatek,smi-larb.txt. It must sort 78 - according to the local arbiter index, like larb0, larb1, larb2... 79 - - iommu-cells : must be 1. This is the mtk_m4u_id according to the HW. 80 - Specifies the mtk_m4u_id as defined in 81 - dt-binding/memory/mt2701-larb-port.h for mt2701, mt7623 82 - dt-binding/memory/mt2712-larb-port.h for mt2712, 83 - dt-binding/memory/mt6779-larb-port.h for mt6779, 84 - dt-binding/memory/mt8167-larb-port.h for mt8167, 85 - dt-binding/memory/mt8173-larb-port.h for mt8173, and 86 - dt-binding/memory/mt8183-larb-port.h for mt8183. 87 - 88 - Example: 89 - iommu: iommu@10205000 { 90 - compatible = "mediatek,mt8173-m4u"; 91 - reg = <0 0x10205000 0 0x1000>; 92 - interrupts = <GIC_SPI 139 IRQ_TYPE_LEVEL_LOW>; 93 - clocks = <&infracfg CLK_INFRA_M4U>; 94 - clock-names = "bclk"; 95 - mediatek,larbs = <&larb0 &larb1 &larb2 &larb3 &larb4 &larb5>; 96 - #iommu-cells = <1>; 97 - }; 98 - 99 - Example for a client device: 100 - display { 101 - compatible = "mediatek,mt8173-disp"; 102 - iommus = <&iommu M4U_PORT_DISP_OVL0>, 103 - <&iommu M4U_PORT_DISP_RDMA0>; 104 - ... 105 - };
+183
Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml
··· 1 + # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) 2 + %YAML 1.2 3 + --- 4 + $id: http://devicetree.org/schemas/iommu/mediatek,iommu.yaml# 5 + $schema: http://devicetree.org/meta-schemas/core.yaml# 6 + 7 + title: MediaTek IOMMU Architecture Implementation 8 + 9 + maintainers: 10 + - Yong Wu <yong.wu@mediatek.com> 11 + 12 + description: |+ 13 + Some MediaTek SOCs contain a Multimedia Memory Management Unit (M4U), and 14 + this M4U have two generations of HW architecture. Generation one uses flat 15 + pagetable, and only supports 4K size page mapping. Generation two uses the 16 + ARM Short-Descriptor translation table format for address translation. 17 + 18 + About the M4U Hardware Block Diagram, please check below: 19 + 20 + EMI (External Memory Interface) 21 + | 22 + m4u (Multimedia Memory Management Unit) 23 + | 24 + +--------+ 25 + | | 26 + gals0-rx gals1-rx (Global Async Local Sync rx) 27 + | | 28 + | | 29 + gals0-tx gals1-tx (Global Async Local Sync tx) 30 + | | Some SoCs may have GALS. 31 + +--------+ 32 + | 33 + SMI Common(Smart Multimedia Interface Common) 34 + | 35 + +----------------+------- 36 + | | 37 + | gals-rx There may be GALS in some larbs. 38 + | | 39 + | | 40 + | gals-tx 41 + | | 42 + SMI larb0 SMI larb1 ... SoCs have several SMI local arbiter(larb). 43 + (display) (vdec) 44 + | | 45 + | | 46 + +-----+-----+ +----+----+ 47 + | | | | | | 48 + | | |... | | | ... There are different ports in each larb. 49 + | | | | | | 50 + OVL0 RDMA0 WDMA0 MC PP VLD 51 + 52 + As above, The Multimedia HW will go through SMI and M4U while it 53 + access EMI. SMI is a bridge between m4u and the Multimedia HW. It contain 54 + smi local arbiter and smi common. It will control whether the Multimedia 55 + HW should go though the m4u for translation or bypass it and talk 56 + directly with EMI. And also SMI help control the power domain and clocks for 57 + each local arbiter. 58 + 59 + Normally we specify a local arbiter(larb) for each multimedia HW 60 + like display, video decode, and camera. And there are different ports 61 + in each larb. Take a example, There are many ports like MC, PP, VLD in the 62 + video decode local arbiter, all these ports are according to the video HW. 63 + 64 + In some SoCs, there may be a GALS(Global Async Local Sync) module between 65 + smi-common and m4u, and additional GALS module between smi-larb and 66 + smi-common. GALS can been seen as a "asynchronous fifo" which could help 67 + synchronize for the modules in different clock frequency. 68 + 69 + properties: 70 + compatible: 71 + oneOf: 72 + - enum: 73 + - mediatek,mt2701-m4u # generation one 74 + - mediatek,mt2712-m4u # generation two 75 + - mediatek,mt6779-m4u # generation two 76 + - mediatek,mt8167-m4u # generation two 77 + - mediatek,mt8173-m4u # generation two 78 + - mediatek,mt8183-m4u # generation two 79 + - mediatek,mt8192-m4u # generation two 80 + 81 + - description: mt7623 generation one 82 + items: 83 + - const: mediatek,mt7623-m4u 84 + - const: mediatek,mt2701-m4u 85 + 86 + reg: 87 + maxItems: 1 88 + 89 + interrupts: 90 + maxItems: 1 91 + 92 + clocks: 93 + items: 94 + - description: bclk is the block clock. 95 + 96 + clock-names: 97 + items: 98 + - const: bclk 99 + 100 + mediatek,larbs: 101 + $ref: /schemas/types.yaml#/definitions/phandle-array 102 + minItems: 1 103 + maxItems: 32 104 + description: | 105 + List of phandle to the local arbiters in the current Socs. 106 + Refer to bindings/memory-controllers/mediatek,smi-larb.yaml. It must sort 107 + according to the local arbiter index, like larb0, larb1, larb2... 108 + 109 + '#iommu-cells': 110 + const: 1 111 + description: | 112 + This is the mtk_m4u_id according to the HW. Specifies the mtk_m4u_id as 113 + defined in 114 + dt-binding/memory/mt2701-larb-port.h for mt2701 and mt7623, 115 + dt-binding/memory/mt2712-larb-port.h for mt2712, 116 + dt-binding/memory/mt6779-larb-port.h for mt6779, 117 + dt-binding/memory/mt8167-larb-port.h for mt8167, 118 + dt-binding/memory/mt8173-larb-port.h for mt8173, 119 + dt-binding/memory/mt8183-larb-port.h for mt8183, 120 + dt-binding/memory/mt8192-larb-port.h for mt8192. 121 + 122 + power-domains: 123 + maxItems: 1 124 + 125 + required: 126 + - compatible 127 + - reg 128 + - interrupts 129 + - mediatek,larbs 130 + - '#iommu-cells' 131 + 132 + allOf: 133 + - if: 134 + properties: 135 + compatible: 136 + contains: 137 + enum: 138 + - mediatek,mt2701-m4u 139 + - mediatek,mt2712-m4u 140 + - mediatek,mt8173-m4u 141 + - mediatek,mt8192-m4u 142 + 143 + then: 144 + required: 145 + - clocks 146 + 147 + - if: 148 + properties: 149 + compatible: 150 + enum: 151 + - mediatek,mt8192-m4u 152 + 153 + then: 154 + required: 155 + - power-domains 156 + 157 + additionalProperties: false 158 + 159 + examples: 160 + - | 161 + #include <dt-bindings/clock/mt8173-clk.h> 162 + #include <dt-bindings/interrupt-controller/arm-gic.h> 163 + 164 + iommu: iommu@10205000 { 165 + compatible = "mediatek,mt8173-m4u"; 166 + reg = <0x10205000 0x1000>; 167 + interrupts = <GIC_SPI 139 IRQ_TYPE_LEVEL_LOW>; 168 + clocks = <&infracfg CLK_INFRA_M4U>; 169 + clock-names = "bclk"; 170 + mediatek,larbs = <&larb0 &larb1 &larb2 171 + &larb3 &larb4 &larb5>; 172 + #iommu-cells = <1>; 173 + }; 174 + 175 + - | 176 + #include <dt-bindings/memory/mt8173-larb-port.h> 177 + 178 + /* Example for a client device */ 179 + display { 180 + compatible = "mediatek,mt8173-disp"; 181 + iommus = <&iommu M4U_PORT_DISP_OVL0>, 182 + <&iommu M4U_PORT_DISP_RDMA0>; 183 + };
+9
MAINTAINERS
··· 11176 11176 F: Documentation/devicetree/bindings/i2c/i2c-mt65xx.txt 11177 11177 F: drivers/i2c/busses/i2c-mt65xx.c 11178 11178 11179 + MEDIATEK IOMMU DRIVER 11180 + M: Yong Wu <yong.wu@mediatek.com> 11181 + L: iommu@lists.linux-foundation.org 11182 + L: linux-mediatek@lists.infradead.org (moderated for non-subscribers) 11183 + S: Supported 11184 + F: Documentation/devicetree/bindings/iommu/mediatek* 11185 + F: drivers/iommu/mtk_iommu* 11186 + F: include/dt-bindings/memory/mt*-port.h 11187 + 11179 11188 MEDIATEK JPEG DRIVER 11180 11189 M: Rick Chang <rick.chang@mediatek.com> 11181 11190 M: Bin Liu <bin.liu@mediatek.com>
+1
drivers/iommu/amd/Kconfig
··· 10 10 select IOMMU_API 11 11 select IOMMU_IOVA 12 12 select IOMMU_DMA 13 + select IOMMU_IO_PGTABLE 13 14 depends on X86_64 && PCI && ACPI && HAVE_CMPXCHG_DOUBLE 14 15 help 15 16 With this option you can enable support for AMD IOMMU hardware in
+1 -1
drivers/iommu/amd/Makefile
··· 1 1 # SPDX-License-Identifier: GPL-2.0-only 2 - obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o 2 + obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o 3 3 obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o 4 4 obj-$(CONFIG_AMD_IOMMU_V2) += iommu_v2.o
+22
drivers/iommu/amd/amd_iommu.h
··· 36 36 extern int amd_iommu_reenable(int); 37 37 extern int amd_iommu_enable_faulting(void); 38 38 extern int amd_iommu_guest_ir; 39 + extern enum io_pgtable_fmt amd_iommu_pgtable; 39 40 40 41 /* IOMMUv2 specific functions */ 41 42 struct iommu_domain; ··· 57 56 extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids); 58 57 extern int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid, 59 58 u64 address); 59 + extern void amd_iommu_update_and_flush_device_table(struct protection_domain *domain); 60 + extern void amd_iommu_domain_update(struct protection_domain *domain); 61 + extern void amd_iommu_domain_flush_complete(struct protection_domain *domain); 62 + extern void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain); 60 63 extern int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid); 61 64 extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid, 62 65 unsigned long cr3); ··· 104 99 return phys_to_virt(__sme_clr(paddr)); 105 100 } 106 101 102 + static inline 103 + void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root) 104 + { 105 + atomic64_set(&domain->iop.pt_root, root); 106 + domain->iop.root = (u64 *)(root & PAGE_MASK); 107 + domain->iop.mode = root & 7; /* lowest 3 bits encode pgtable mode */ 108 + } 109 + 110 + static inline 111 + void amd_iommu_domain_clr_pt_root(struct protection_domain *domain) 112 + { 113 + amd_iommu_domain_set_pt_root(domain, 0); 114 + } 115 + 116 + 107 117 extern bool translation_pre_enabled(struct amd_iommu *iommu); 108 118 extern bool amd_iommu_is_attach_deferred(struct iommu_domain *domain, 109 119 struct device *dev); ··· 131 111 static inline void amd_iommu_apply_ivrs_quirks(void) { } 132 112 #endif 133 113 114 + extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain, 115 + u64 *root, int mode); 134 116 #endif
+36 -7
drivers/iommu/amd/amd_iommu_types.h
··· 15 15 #include <linux/spinlock.h> 16 16 #include <linux/pci.h> 17 17 #include <linux/irqreturn.h> 18 + #include <linux/io-pgtable.h> 18 19 19 20 /* 20 21 * Maximum number of IOMMUs supported ··· 253 252 254 253 #define GA_GUEST_NR 0x1 255 254 255 + #define IOMMU_IN_ADDR_BIT_SIZE 52 256 + #define IOMMU_OUT_ADDR_BIT_SIZE 52 257 + 258 + /* 259 + * This bitmap is used to advertise the page sizes our hardware support 260 + * to the IOMMU core, which will then use this information to split 261 + * physically contiguous memory regions it is mapping into page sizes 262 + * that we support. 263 + * 264 + * 512GB Pages are not supported due to a hardware bug 265 + */ 266 + #define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38)) 267 + 256 268 /* Bit value definition for dte irq remapping fields*/ 257 269 #define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6) 258 270 #define DTE_IRQ_REMAP_INTCTL_MASK (0x3ULL << 60) ··· 484 470 485 471 #define AMD_IOMMU_FLAG_TRANS_PRE_ENABLED (1 << 0) 486 472 473 + #define io_pgtable_to_data(x) \ 474 + container_of((x), struct amd_io_pgtable, iop) 475 + 476 + #define io_pgtable_ops_to_data(x) \ 477 + io_pgtable_to_data(io_pgtable_ops_to_pgtable(x)) 478 + 479 + #define io_pgtable_ops_to_domain(x) \ 480 + container_of(io_pgtable_ops_to_data(x), \ 481 + struct protection_domain, iop) 482 + 483 + #define io_pgtable_cfg_to_data(x) \ 484 + container_of((x), struct amd_io_pgtable, pgtbl_cfg) 485 + 486 + struct amd_io_pgtable { 487 + struct io_pgtable_cfg pgtbl_cfg; 488 + struct io_pgtable iop; 489 + int mode; 490 + u64 *root; 491 + atomic64_t pt_root; /* pgtable root and pgtable mode */ 492 + }; 493 + 487 494 /* 488 495 * This structure contains generic data for IOMMU protection domains 489 496 * independent of their use. ··· 513 478 struct list_head dev_list; /* List of all devices in this domain */ 514 479 struct iommu_domain domain; /* generic domain handle used by 515 480 iommu core code */ 481 + struct amd_io_pgtable iop; 516 482 spinlock_t lock; /* mostly used to lock the page table*/ 517 483 u16 id; /* the domain id written to the device table */ 518 - atomic64_t pt_root; /* pgtable root and pgtable mode */ 519 484 int glx; /* Number of levels for GCR3 table */ 520 485 u64 *gcr3_tbl; /* Guest CR3 table */ 521 486 unsigned long flags; /* flags to find out type of domain */ 522 487 unsigned dev_cnt; /* devices assigned to this domain */ 523 488 unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */ 524 - }; 525 - 526 - /* For decocded pt_root */ 527 - struct domain_pgtable { 528 - int mode; 529 - u64 *root; 530 489 }; 531 490 532 491 /*
+39 -15
drivers/iommu/amd/init.c
··· 12 12 #include <linux/acpi.h> 13 13 #include <linux/list.h> 14 14 #include <linux/bitmap.h> 15 + #include <linux/delay.h> 15 16 #include <linux/slab.h> 16 17 #include <linux/syscore_ops.h> 17 18 #include <linux/interrupt.h> ··· 148 147 bool amd_iommu_dump; 149 148 bool amd_iommu_irq_remap __read_mostly; 150 149 150 + enum io_pgtable_fmt amd_iommu_pgtable = AMD_IOMMU_V1; 151 + 151 152 int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC; 152 153 static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE; 153 154 ··· 257 254 static int amd_iommu_enable_interrupts(void); 258 255 static int __init iommu_go_to_state(enum iommu_init_state state); 259 256 static void init_device_table_dma(void); 257 + static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, 258 + u8 fxn, u64 *value, bool is_write); 260 259 261 260 static bool amd_iommu_pre_enabled = true; 262 261 ··· 1717 1712 return 0; 1718 1713 } 1719 1714 1720 - static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, 1721 - u8 fxn, u64 *value, bool is_write); 1722 - 1723 - static void init_iommu_perf_ctr(struct amd_iommu *iommu) 1715 + static void __init init_iommu_perf_ctr(struct amd_iommu *iommu) 1724 1716 { 1717 + int retry; 1725 1718 struct pci_dev *pdev = iommu->dev; 1726 - u64 val = 0xabcd, val2 = 0, save_reg = 0; 1719 + u64 val = 0xabcd, val2 = 0, save_reg, save_src; 1727 1720 1728 1721 if (!iommu_feature(iommu, FEATURE_PC)) 1729 1722 return; ··· 1729 1726 amd_iommu_pc_present = true; 1730 1727 1731 1728 /* save the value to restore, if writable */ 1732 - if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, false)) 1729 + if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, false) || 1730 + iommu_pc_get_set_reg(iommu, 0, 0, 8, &save_src, false)) 1731 + goto pc_false; 1732 + 1733 + /* 1734 + * Disable power gating by programing the performance counter 1735 + * source to 20 (i.e. counts the reads and writes from/to IOMMU 1736 + * Reserved Register [MMIO Offset 1FF8h] that are ignored.), 1737 + * which never get incremented during this init phase. 1738 + * (Note: The event is also deprecated.) 1739 + */ 1740 + val = 20; 1741 + if (iommu_pc_get_set_reg(iommu, 0, 0, 8, &val, true)) 1733 1742 goto pc_false; 1734 1743 1735 1744 /* Check if the performance counters can be written to */ 1736 - if ((iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true)) || 1737 - (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false)) || 1738 - (val != val2)) 1739 - goto pc_false; 1745 + val = 0xabcd; 1746 + for (retry = 5; retry; retry--) { 1747 + if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true) || 1748 + iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false) || 1749 + val2) 1750 + break; 1751 + 1752 + /* Wait about 20 msec for power gating to disable and retry. */ 1753 + msleep(20); 1754 + } 1740 1755 1741 1756 /* restore */ 1742 - if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, true)) 1757 + if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, true) || 1758 + iommu_pc_get_set_reg(iommu, 0, 0, 8, &save_src, true)) 1759 + goto pc_false; 1760 + 1761 + if (val != val2) 1743 1762 goto pc_false; 1744 1763 1745 1764 pci_info(pdev, "IOMMU performance counters supported\n"); ··· 1953 1928 struct pci_dev *pdev = iommu->dev; 1954 1929 int i; 1955 1930 1956 - pci_info(pdev, "Found IOMMU cap 0x%hx\n", iommu->cap_ptr); 1931 + pci_info(pdev, "Found IOMMU cap 0x%x\n", iommu->cap_ptr); 1957 1932 1958 1933 if (iommu->cap & (1 << IOMMU_CAP_EFR)) { 1959 1934 pci_info(pdev, "Extended features (%#llx):", ··· 1981 1956 static int __init amd_iommu_init_pci(void) 1982 1957 { 1983 1958 struct amd_iommu *iommu; 1984 - int ret = 0; 1959 + int ret; 1985 1960 1986 1961 for_each_iommu(iommu) { 1987 1962 ret = iommu_init_pci(iommu); ··· 2712 2687 static int __init early_amd_iommu_init(void) 2713 2688 { 2714 2689 struct acpi_table_header *ivrs_base; 2690 + int i, remap_cache_sz, ret; 2715 2691 acpi_status status; 2716 - int i, remap_cache_sz, ret = 0; 2717 2692 u32 pci_id; 2718 2693 2719 2694 if (!amd_iommu_detected) ··· 2857 2832 out: 2858 2833 /* Don't leak any ACPI memory */ 2859 2834 acpi_put_table(ivrs_base); 2860 - ivrs_base = NULL; 2861 2835 2862 2836 return ret; 2863 2837 }
+558
drivers/iommu/amd/io_pgtable.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * CPU-agnostic AMD IO page table allocator. 4 + * 5 + * Copyright (C) 2020 Advanced Micro Devices, Inc. 6 + * Author: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> 7 + */ 8 + 9 + #define pr_fmt(fmt) "AMD-Vi: " fmt 10 + #define dev_fmt(fmt) pr_fmt(fmt) 11 + 12 + #include <linux/atomic.h> 13 + #include <linux/bitops.h> 14 + #include <linux/io-pgtable.h> 15 + #include <linux/kernel.h> 16 + #include <linux/sizes.h> 17 + #include <linux/slab.h> 18 + #include <linux/types.h> 19 + #include <linux/dma-mapping.h> 20 + 21 + #include <asm/barrier.h> 22 + 23 + #include "amd_iommu_types.h" 24 + #include "amd_iommu.h" 25 + 26 + static void v1_tlb_flush_all(void *cookie) 27 + { 28 + } 29 + 30 + static void v1_tlb_flush_walk(unsigned long iova, size_t size, 31 + size_t granule, void *cookie) 32 + { 33 + } 34 + 35 + static void v1_tlb_add_page(struct iommu_iotlb_gather *gather, 36 + unsigned long iova, size_t granule, 37 + void *cookie) 38 + { 39 + } 40 + 41 + static const struct iommu_flush_ops v1_flush_ops = { 42 + .tlb_flush_all = v1_tlb_flush_all, 43 + .tlb_flush_walk = v1_tlb_flush_walk, 44 + .tlb_add_page = v1_tlb_add_page, 45 + }; 46 + 47 + /* 48 + * Helper function to get the first pte of a large mapping 49 + */ 50 + static u64 *first_pte_l7(u64 *pte, unsigned long *page_size, 51 + unsigned long *count) 52 + { 53 + unsigned long pte_mask, pg_size, cnt; 54 + u64 *fpte; 55 + 56 + pg_size = PTE_PAGE_SIZE(*pte); 57 + cnt = PAGE_SIZE_PTE_COUNT(pg_size); 58 + pte_mask = ~((cnt << 3) - 1); 59 + fpte = (u64 *)(((unsigned long)pte) & pte_mask); 60 + 61 + if (page_size) 62 + *page_size = pg_size; 63 + 64 + if (count) 65 + *count = cnt; 66 + 67 + return fpte; 68 + } 69 + 70 + /**************************************************************************** 71 + * 72 + * The functions below are used the create the page table mappings for 73 + * unity mapped regions. 74 + * 75 + ****************************************************************************/ 76 + 77 + static void free_page_list(struct page *freelist) 78 + { 79 + while (freelist != NULL) { 80 + unsigned long p = (unsigned long)page_address(freelist); 81 + 82 + freelist = freelist->freelist; 83 + free_page(p); 84 + } 85 + } 86 + 87 + static struct page *free_pt_page(unsigned long pt, struct page *freelist) 88 + { 89 + struct page *p = virt_to_page((void *)pt); 90 + 91 + p->freelist = freelist; 92 + 93 + return p; 94 + } 95 + 96 + #define DEFINE_FREE_PT_FN(LVL, FN) \ 97 + static struct page *free_pt_##LVL (unsigned long __pt, struct page *freelist) \ 98 + { \ 99 + unsigned long p; \ 100 + u64 *pt; \ 101 + int i; \ 102 + \ 103 + pt = (u64 *)__pt; \ 104 + \ 105 + for (i = 0; i < 512; ++i) { \ 106 + /* PTE present? */ \ 107 + if (!IOMMU_PTE_PRESENT(pt[i])) \ 108 + continue; \ 109 + \ 110 + /* Large PTE? */ \ 111 + if (PM_PTE_LEVEL(pt[i]) == 0 || \ 112 + PM_PTE_LEVEL(pt[i]) == 7) \ 113 + continue; \ 114 + \ 115 + p = (unsigned long)IOMMU_PTE_PAGE(pt[i]); \ 116 + freelist = FN(p, freelist); \ 117 + } \ 118 + \ 119 + return free_pt_page((unsigned long)pt, freelist); \ 120 + } 121 + 122 + DEFINE_FREE_PT_FN(l2, free_pt_page) 123 + DEFINE_FREE_PT_FN(l3, free_pt_l2) 124 + DEFINE_FREE_PT_FN(l4, free_pt_l3) 125 + DEFINE_FREE_PT_FN(l5, free_pt_l4) 126 + DEFINE_FREE_PT_FN(l6, free_pt_l5) 127 + 128 + static struct page *free_sub_pt(unsigned long root, int mode, 129 + struct page *freelist) 130 + { 131 + switch (mode) { 132 + case PAGE_MODE_NONE: 133 + case PAGE_MODE_7_LEVEL: 134 + break; 135 + case PAGE_MODE_1_LEVEL: 136 + freelist = free_pt_page(root, freelist); 137 + break; 138 + case PAGE_MODE_2_LEVEL: 139 + freelist = free_pt_l2(root, freelist); 140 + break; 141 + case PAGE_MODE_3_LEVEL: 142 + freelist = free_pt_l3(root, freelist); 143 + break; 144 + case PAGE_MODE_4_LEVEL: 145 + freelist = free_pt_l4(root, freelist); 146 + break; 147 + case PAGE_MODE_5_LEVEL: 148 + freelist = free_pt_l5(root, freelist); 149 + break; 150 + case PAGE_MODE_6_LEVEL: 151 + freelist = free_pt_l6(root, freelist); 152 + break; 153 + default: 154 + BUG(); 155 + } 156 + 157 + return freelist; 158 + } 159 + 160 + void amd_iommu_domain_set_pgtable(struct protection_domain *domain, 161 + u64 *root, int mode) 162 + { 163 + u64 pt_root; 164 + 165 + /* lowest 3 bits encode pgtable mode */ 166 + pt_root = mode & 7; 167 + pt_root |= (u64)root; 168 + 169 + amd_iommu_domain_set_pt_root(domain, pt_root); 170 + } 171 + 172 + /* 173 + * This function is used to add another level to an IO page table. Adding 174 + * another level increases the size of the address space by 9 bits to a size up 175 + * to 64 bits. 176 + */ 177 + static bool increase_address_space(struct protection_domain *domain, 178 + unsigned long address, 179 + gfp_t gfp) 180 + { 181 + unsigned long flags; 182 + bool ret = true; 183 + u64 *pte; 184 + 185 + spin_lock_irqsave(&domain->lock, flags); 186 + 187 + if (address <= PM_LEVEL_SIZE(domain->iop.mode)) 188 + goto out; 189 + 190 + ret = false; 191 + if (WARN_ON_ONCE(domain->iop.mode == PAGE_MODE_6_LEVEL)) 192 + goto out; 193 + 194 + pte = (void *)get_zeroed_page(gfp); 195 + if (!pte) 196 + goto out; 197 + 198 + *pte = PM_LEVEL_PDE(domain->iop.mode, iommu_virt_to_phys(domain->iop.root)); 199 + 200 + domain->iop.root = pte; 201 + domain->iop.mode += 1; 202 + amd_iommu_update_and_flush_device_table(domain); 203 + amd_iommu_domain_flush_complete(domain); 204 + 205 + /* 206 + * Device Table needs to be updated and flushed before the new root can 207 + * be published. 208 + */ 209 + amd_iommu_domain_set_pgtable(domain, pte, domain->iop.mode); 210 + 211 + ret = true; 212 + 213 + out: 214 + spin_unlock_irqrestore(&domain->lock, flags); 215 + 216 + return ret; 217 + } 218 + 219 + static u64 *alloc_pte(struct protection_domain *domain, 220 + unsigned long address, 221 + unsigned long page_size, 222 + u64 **pte_page, 223 + gfp_t gfp, 224 + bool *updated) 225 + { 226 + int level, end_lvl; 227 + u64 *pte, *page; 228 + 229 + BUG_ON(!is_power_of_2(page_size)); 230 + 231 + while (address > PM_LEVEL_SIZE(domain->iop.mode)) { 232 + /* 233 + * Return an error if there is no memory to update the 234 + * page-table. 235 + */ 236 + if (!increase_address_space(domain, address, gfp)) 237 + return NULL; 238 + } 239 + 240 + 241 + level = domain->iop.mode - 1; 242 + pte = &domain->iop.root[PM_LEVEL_INDEX(level, address)]; 243 + address = PAGE_SIZE_ALIGN(address, page_size); 244 + end_lvl = PAGE_SIZE_LEVEL(page_size); 245 + 246 + while (level > end_lvl) { 247 + u64 __pte, __npte; 248 + int pte_level; 249 + 250 + __pte = *pte; 251 + pte_level = PM_PTE_LEVEL(__pte); 252 + 253 + /* 254 + * If we replace a series of large PTEs, we need 255 + * to tear down all of them. 256 + */ 257 + if (IOMMU_PTE_PRESENT(__pte) && 258 + pte_level == PAGE_MODE_7_LEVEL) { 259 + unsigned long count, i; 260 + u64 *lpte; 261 + 262 + lpte = first_pte_l7(pte, NULL, &count); 263 + 264 + /* 265 + * Unmap the replicated PTEs that still match the 266 + * original large mapping 267 + */ 268 + for (i = 0; i < count; ++i) 269 + cmpxchg64(&lpte[i], __pte, 0ULL); 270 + 271 + *updated = true; 272 + continue; 273 + } 274 + 275 + if (!IOMMU_PTE_PRESENT(__pte) || 276 + pte_level == PAGE_MODE_NONE) { 277 + page = (u64 *)get_zeroed_page(gfp); 278 + 279 + if (!page) 280 + return NULL; 281 + 282 + __npte = PM_LEVEL_PDE(level, iommu_virt_to_phys(page)); 283 + 284 + /* pte could have been changed somewhere. */ 285 + if (cmpxchg64(pte, __pte, __npte) != __pte) 286 + free_page((unsigned long)page); 287 + else if (IOMMU_PTE_PRESENT(__pte)) 288 + *updated = true; 289 + 290 + continue; 291 + } 292 + 293 + /* No level skipping support yet */ 294 + if (pte_level != level) 295 + return NULL; 296 + 297 + level -= 1; 298 + 299 + pte = IOMMU_PTE_PAGE(__pte); 300 + 301 + if (pte_page && level == end_lvl) 302 + *pte_page = pte; 303 + 304 + pte = &pte[PM_LEVEL_INDEX(level, address)]; 305 + } 306 + 307 + return pte; 308 + } 309 + 310 + /* 311 + * This function checks if there is a PTE for a given dma address. If 312 + * there is one, it returns the pointer to it. 313 + */ 314 + static u64 *fetch_pte(struct amd_io_pgtable *pgtable, 315 + unsigned long address, 316 + unsigned long *page_size) 317 + { 318 + int level; 319 + u64 *pte; 320 + 321 + *page_size = 0; 322 + 323 + if (address > PM_LEVEL_SIZE(pgtable->mode)) 324 + return NULL; 325 + 326 + level = pgtable->mode - 1; 327 + pte = &pgtable->root[PM_LEVEL_INDEX(level, address)]; 328 + *page_size = PTE_LEVEL_PAGE_SIZE(level); 329 + 330 + while (level > 0) { 331 + 332 + /* Not Present */ 333 + if (!IOMMU_PTE_PRESENT(*pte)) 334 + return NULL; 335 + 336 + /* Large PTE */ 337 + if (PM_PTE_LEVEL(*pte) == 7 || 338 + PM_PTE_LEVEL(*pte) == 0) 339 + break; 340 + 341 + /* No level skipping support yet */ 342 + if (PM_PTE_LEVEL(*pte) != level) 343 + return NULL; 344 + 345 + level -= 1; 346 + 347 + /* Walk to the next level */ 348 + pte = IOMMU_PTE_PAGE(*pte); 349 + pte = &pte[PM_LEVEL_INDEX(level, address)]; 350 + *page_size = PTE_LEVEL_PAGE_SIZE(level); 351 + } 352 + 353 + /* 354 + * If we have a series of large PTEs, make 355 + * sure to return a pointer to the first one. 356 + */ 357 + if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL) 358 + pte = first_pte_l7(pte, page_size, NULL); 359 + 360 + return pte; 361 + } 362 + 363 + static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist) 364 + { 365 + unsigned long pt; 366 + int mode; 367 + 368 + while (cmpxchg64(pte, pteval, 0) != pteval) { 369 + pr_warn("AMD-Vi: IOMMU pte changed since we read it\n"); 370 + pteval = *pte; 371 + } 372 + 373 + if (!IOMMU_PTE_PRESENT(pteval)) 374 + return freelist; 375 + 376 + pt = (unsigned long)IOMMU_PTE_PAGE(pteval); 377 + mode = IOMMU_PTE_MODE(pteval); 378 + 379 + return free_sub_pt(pt, mode, freelist); 380 + } 381 + 382 + /* 383 + * Generic mapping functions. It maps a physical address into a DMA 384 + * address space. It allocates the page table pages if necessary. 385 + * In the future it can be extended to a generic mapping function 386 + * supporting all features of AMD IOMMU page tables like level skipping 387 + * and full 64 bit address spaces. 388 + */ 389 + static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova, 390 + phys_addr_t paddr, size_t size, int prot, gfp_t gfp) 391 + { 392 + struct protection_domain *dom = io_pgtable_ops_to_domain(ops); 393 + struct page *freelist = NULL; 394 + bool updated = false; 395 + u64 __pte, *pte; 396 + int ret, i, count; 397 + 398 + BUG_ON(!IS_ALIGNED(iova, size)); 399 + BUG_ON(!IS_ALIGNED(paddr, size)); 400 + 401 + ret = -EINVAL; 402 + if (!(prot & IOMMU_PROT_MASK)) 403 + goto out; 404 + 405 + count = PAGE_SIZE_PTE_COUNT(size); 406 + pte = alloc_pte(dom, iova, size, NULL, gfp, &updated); 407 + 408 + ret = -ENOMEM; 409 + if (!pte) 410 + goto out; 411 + 412 + for (i = 0; i < count; ++i) 413 + freelist = free_clear_pte(&pte[i], pte[i], freelist); 414 + 415 + if (freelist != NULL) 416 + updated = true; 417 + 418 + if (count > 1) { 419 + __pte = PAGE_SIZE_PTE(__sme_set(paddr), size); 420 + __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC; 421 + } else 422 + __pte = __sme_set(paddr) | IOMMU_PTE_PR | IOMMU_PTE_FC; 423 + 424 + if (prot & IOMMU_PROT_IR) 425 + __pte |= IOMMU_PTE_IR; 426 + if (prot & IOMMU_PROT_IW) 427 + __pte |= IOMMU_PTE_IW; 428 + 429 + for (i = 0; i < count; ++i) 430 + pte[i] = __pte; 431 + 432 + ret = 0; 433 + 434 + out: 435 + if (updated) { 436 + unsigned long flags; 437 + 438 + spin_lock_irqsave(&dom->lock, flags); 439 + /* 440 + * Flush domain TLB(s) and wait for completion. Any Device-Table 441 + * Updates and flushing already happened in 442 + * increase_address_space(). 443 + */ 444 + amd_iommu_domain_flush_tlb_pde(dom); 445 + amd_iommu_domain_flush_complete(dom); 446 + spin_unlock_irqrestore(&dom->lock, flags); 447 + } 448 + 449 + /* Everything flushed out, free pages now */ 450 + free_page_list(freelist); 451 + 452 + return ret; 453 + } 454 + 455 + static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops, 456 + unsigned long iova, 457 + size_t size, 458 + struct iommu_iotlb_gather *gather) 459 + { 460 + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); 461 + unsigned long long unmapped; 462 + unsigned long unmap_size; 463 + u64 *pte; 464 + 465 + BUG_ON(!is_power_of_2(size)); 466 + 467 + unmapped = 0; 468 + 469 + while (unmapped < size) { 470 + pte = fetch_pte(pgtable, iova, &unmap_size); 471 + if (pte) { 472 + int i, count; 473 + 474 + count = PAGE_SIZE_PTE_COUNT(unmap_size); 475 + for (i = 0; i < count; i++) 476 + pte[i] = 0ULL; 477 + } 478 + 479 + iova = (iova & ~(unmap_size - 1)) + unmap_size; 480 + unmapped += unmap_size; 481 + } 482 + 483 + BUG_ON(unmapped && !is_power_of_2(unmapped)); 484 + 485 + return unmapped; 486 + } 487 + 488 + static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova) 489 + { 490 + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); 491 + unsigned long offset_mask, pte_pgsize; 492 + u64 *pte, __pte; 493 + 494 + if (pgtable->mode == PAGE_MODE_NONE) 495 + return iova; 496 + 497 + pte = fetch_pte(pgtable, iova, &pte_pgsize); 498 + 499 + if (!pte || !IOMMU_PTE_PRESENT(*pte)) 500 + return 0; 501 + 502 + offset_mask = pte_pgsize - 1; 503 + __pte = __sme_clr(*pte & PM_ADDR_MASK); 504 + 505 + return (__pte & ~offset_mask) | (iova & offset_mask); 506 + } 507 + 508 + /* 509 + * ---------------------------------------------------- 510 + */ 511 + static void v1_free_pgtable(struct io_pgtable *iop) 512 + { 513 + struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop); 514 + struct protection_domain *dom; 515 + struct page *freelist = NULL; 516 + unsigned long root; 517 + 518 + if (pgtable->mode == PAGE_MODE_NONE) 519 + return; 520 + 521 + dom = container_of(pgtable, struct protection_domain, iop); 522 + 523 + /* Update data structure */ 524 + amd_iommu_domain_clr_pt_root(dom); 525 + 526 + /* Make changes visible to IOMMUs */ 527 + amd_iommu_domain_update(dom); 528 + 529 + /* Page-table is not visible to IOMMU anymore, so free it */ 530 + BUG_ON(pgtable->mode < PAGE_MODE_NONE || 531 + pgtable->mode > PAGE_MODE_6_LEVEL); 532 + 533 + root = (unsigned long)pgtable->root; 534 + freelist = free_sub_pt(root, pgtable->mode, freelist); 535 + 536 + free_page_list(freelist); 537 + } 538 + 539 + static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) 540 + { 541 + struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); 542 + 543 + cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES, 544 + cfg->ias = IOMMU_IN_ADDR_BIT_SIZE, 545 + cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE, 546 + cfg->tlb = &v1_flush_ops; 547 + 548 + pgtable->iop.ops.map = iommu_v1_map_page; 549 + pgtable->iop.ops.unmap = iommu_v1_unmap_page; 550 + pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys; 551 + 552 + return &pgtable->iop; 553 + } 554 + 555 + struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns = { 556 + .alloc = v1_alloc_pgtable, 557 + .free = v1_free_pgtable, 558 + };
+76 -596
drivers/iommu/amd/iommu.c
··· 31 31 #include <linux/irqdomain.h> 32 32 #include <linux/percpu.h> 33 33 #include <linux/iova.h> 34 + #include <linux/io-pgtable.h> 34 35 #include <asm/irq_remapping.h> 35 36 #include <asm/io_apic.h> 36 37 #include <asm/apic.h> ··· 57 56 #define MSI_RANGE_END (0xfeefffff) 58 57 #define HT_RANGE_START (0xfd00000000ULL) 59 58 #define HT_RANGE_END (0xffffffffffULL) 60 - 61 - /* 62 - * This bitmap is used to advertise the page sizes our hardware support 63 - * to the IOMMU core, which will then use this information to split 64 - * physically contiguous memory regions it is mapping into page sizes 65 - * that we support. 66 - * 67 - * 512GB Pages are not supported due to a hardware bug 68 - */ 69 - #define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38)) 70 59 71 60 #define DEFAULT_PGTABLE_LEVEL PAGE_MODE_3_LEVEL 72 61 ··· 87 96 88 97 struct kmem_cache *amd_iommu_irq_cache; 89 98 90 - static void update_domain(struct protection_domain *domain); 91 99 static void detach_device(struct device *dev); 92 - static void update_and_flush_device_table(struct protection_domain *domain, 93 - struct domain_pgtable *pgtable); 94 100 95 101 /**************************************************************************** 96 102 * ··· 137 149 static struct protection_domain *to_pdomain(struct iommu_domain *dom) 138 150 { 139 151 return container_of(dom, struct protection_domain, domain); 140 - } 141 - 142 - static void amd_iommu_domain_get_pgtable(struct protection_domain *domain, 143 - struct domain_pgtable *pgtable) 144 - { 145 - u64 pt_root = atomic64_read(&domain->pt_root); 146 - 147 - pgtable->root = (u64 *)(pt_root & PAGE_MASK); 148 - pgtable->mode = pt_root & 7; /* lowest 3 bits encode pgtable mode */ 149 - } 150 - 151 - static void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root) 152 - { 153 - atomic64_set(&domain->pt_root, root); 154 - } 155 - 156 - static void amd_iommu_domain_clr_pt_root(struct protection_domain *domain) 157 - { 158 - amd_iommu_domain_set_pt_root(domain, 0); 159 - } 160 - 161 - static void amd_iommu_domain_set_pgtable(struct protection_domain *domain, 162 - u64 *root, int mode) 163 - { 164 - u64 pt_root; 165 - 166 - /* lowest 3 bits encode pgtable mode */ 167 - pt_root = mode & 7; 168 - pt_root |= (u64)root; 169 - 170 - amd_iommu_domain_set_pt_root(domain, pt_root); 171 152 } 172 153 173 154 static struct iommu_dev_data *alloc_dev_data(u16 devid) ··· 392 435 * We keep dev_data around for unplugged devices and reuse it when the 393 436 * device is re-plugged - not doing so would introduce a ton of races. 394 437 */ 395 - } 396 - 397 - /* 398 - * Helper function to get the first pte of a large mapping 399 - */ 400 - static u64 *first_pte_l7(u64 *pte, unsigned long *page_size, 401 - unsigned long *count) 402 - { 403 - unsigned long pte_mask, pg_size, cnt; 404 - u64 *fpte; 405 - 406 - pg_size = PTE_PAGE_SIZE(*pte); 407 - cnt = PAGE_SIZE_PTE_COUNT(pg_size); 408 - pte_mask = ~((cnt << 3) - 1); 409 - fpte = (u64 *)(((unsigned long)pte) & pte_mask); 410 - 411 - if (page_size) 412 - *page_size = pg_size; 413 - 414 - if (count) 415 - *count = cnt; 416 - 417 - return fpte; 418 438 } 419 439 420 440 /**************************************************************************** ··· 1269 1335 } 1270 1336 1271 1337 /* Flush the whole IO/TLB for a given protection domain - including PDE */ 1272 - static void domain_flush_tlb_pde(struct protection_domain *domain) 1338 + void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain) 1273 1339 { 1274 1340 __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1); 1275 1341 } 1276 1342 1277 - static void domain_flush_complete(struct protection_domain *domain) 1343 + void amd_iommu_domain_flush_complete(struct protection_domain *domain) 1278 1344 { 1279 1345 int i; 1280 1346 ··· 1299 1365 1300 1366 spin_lock_irqsave(&domain->lock, flags); 1301 1367 domain_flush_pages(domain, iova, size); 1302 - domain_flush_complete(domain); 1368 + amd_iommu_domain_flush_complete(domain); 1303 1369 spin_unlock_irqrestore(&domain->lock, flags); 1304 1370 } 1305 1371 } ··· 1314 1380 1315 1381 list_for_each_entry(dev_data, &domain->dev_list, list) 1316 1382 device_flush_dte(dev_data); 1317 - } 1318 - 1319 - /**************************************************************************** 1320 - * 1321 - * The functions below are used the create the page table mappings for 1322 - * unity mapped regions. 1323 - * 1324 - ****************************************************************************/ 1325 - 1326 - static void free_page_list(struct page *freelist) 1327 - { 1328 - while (freelist != NULL) { 1329 - unsigned long p = (unsigned long)page_address(freelist); 1330 - freelist = freelist->freelist; 1331 - free_page(p); 1332 - } 1333 - } 1334 - 1335 - static struct page *free_pt_page(unsigned long pt, struct page *freelist) 1336 - { 1337 - struct page *p = virt_to_page((void *)pt); 1338 - 1339 - p->freelist = freelist; 1340 - 1341 - return p; 1342 - } 1343 - 1344 - #define DEFINE_FREE_PT_FN(LVL, FN) \ 1345 - static struct page *free_pt_##LVL (unsigned long __pt, struct page *freelist) \ 1346 - { \ 1347 - unsigned long p; \ 1348 - u64 *pt; \ 1349 - int i; \ 1350 - \ 1351 - pt = (u64 *)__pt; \ 1352 - \ 1353 - for (i = 0; i < 512; ++i) { \ 1354 - /* PTE present? */ \ 1355 - if (!IOMMU_PTE_PRESENT(pt[i])) \ 1356 - continue; \ 1357 - \ 1358 - /* Large PTE? */ \ 1359 - if (PM_PTE_LEVEL(pt[i]) == 0 || \ 1360 - PM_PTE_LEVEL(pt[i]) == 7) \ 1361 - continue; \ 1362 - \ 1363 - p = (unsigned long)IOMMU_PTE_PAGE(pt[i]); \ 1364 - freelist = FN(p, freelist); \ 1365 - } \ 1366 - \ 1367 - return free_pt_page((unsigned long)pt, freelist); \ 1368 - } 1369 - 1370 - DEFINE_FREE_PT_FN(l2, free_pt_page) 1371 - DEFINE_FREE_PT_FN(l3, free_pt_l2) 1372 - DEFINE_FREE_PT_FN(l4, free_pt_l3) 1373 - DEFINE_FREE_PT_FN(l5, free_pt_l4) 1374 - DEFINE_FREE_PT_FN(l6, free_pt_l5) 1375 - 1376 - static struct page *free_sub_pt(unsigned long root, int mode, 1377 - struct page *freelist) 1378 - { 1379 - switch (mode) { 1380 - case PAGE_MODE_NONE: 1381 - case PAGE_MODE_7_LEVEL: 1382 - break; 1383 - case PAGE_MODE_1_LEVEL: 1384 - freelist = free_pt_page(root, freelist); 1385 - break; 1386 - case PAGE_MODE_2_LEVEL: 1387 - freelist = free_pt_l2(root, freelist); 1388 - break; 1389 - case PAGE_MODE_3_LEVEL: 1390 - freelist = free_pt_l3(root, freelist); 1391 - break; 1392 - case PAGE_MODE_4_LEVEL: 1393 - freelist = free_pt_l4(root, freelist); 1394 - break; 1395 - case PAGE_MODE_5_LEVEL: 1396 - freelist = free_pt_l5(root, freelist); 1397 - break; 1398 - case PAGE_MODE_6_LEVEL: 1399 - freelist = free_pt_l6(root, freelist); 1400 - break; 1401 - default: 1402 - BUG(); 1403 - } 1404 - 1405 - return freelist; 1406 - } 1407 - 1408 - static void free_pagetable(struct domain_pgtable *pgtable) 1409 - { 1410 - struct page *freelist = NULL; 1411 - unsigned long root; 1412 - 1413 - if (pgtable->mode == PAGE_MODE_NONE) 1414 - return; 1415 - 1416 - BUG_ON(pgtable->mode < PAGE_MODE_NONE || 1417 - pgtable->mode > PAGE_MODE_6_LEVEL); 1418 - 1419 - root = (unsigned long)pgtable->root; 1420 - freelist = free_sub_pt(root, pgtable->mode, freelist); 1421 - 1422 - free_page_list(freelist); 1423 - } 1424 - 1425 - /* 1426 - * This function is used to add another level to an IO page table. Adding 1427 - * another level increases the size of the address space by 9 bits to a size up 1428 - * to 64 bits. 1429 - */ 1430 - static bool increase_address_space(struct protection_domain *domain, 1431 - unsigned long address, 1432 - gfp_t gfp) 1433 - { 1434 - struct domain_pgtable pgtable; 1435 - unsigned long flags; 1436 - bool ret = true; 1437 - u64 *pte; 1438 - 1439 - spin_lock_irqsave(&domain->lock, flags); 1440 - 1441 - amd_iommu_domain_get_pgtable(domain, &pgtable); 1442 - 1443 - if (address <= PM_LEVEL_SIZE(pgtable.mode)) 1444 - goto out; 1445 - 1446 - ret = false; 1447 - if (WARN_ON_ONCE(pgtable.mode == PAGE_MODE_6_LEVEL)) 1448 - goto out; 1449 - 1450 - pte = (void *)get_zeroed_page(gfp); 1451 - if (!pte) 1452 - goto out; 1453 - 1454 - *pte = PM_LEVEL_PDE(pgtable.mode, iommu_virt_to_phys(pgtable.root)); 1455 - 1456 - pgtable.root = pte; 1457 - pgtable.mode += 1; 1458 - update_and_flush_device_table(domain, &pgtable); 1459 - domain_flush_complete(domain); 1460 - 1461 - /* 1462 - * Device Table needs to be updated and flushed before the new root can 1463 - * be published. 1464 - */ 1465 - amd_iommu_domain_set_pgtable(domain, pte, pgtable.mode); 1466 - 1467 - ret = true; 1468 - 1469 - out: 1470 - spin_unlock_irqrestore(&domain->lock, flags); 1471 - 1472 - return ret; 1473 - } 1474 - 1475 - static u64 *alloc_pte(struct protection_domain *domain, 1476 - unsigned long address, 1477 - unsigned long page_size, 1478 - u64 **pte_page, 1479 - gfp_t gfp, 1480 - bool *updated) 1481 - { 1482 - struct domain_pgtable pgtable; 1483 - int level, end_lvl; 1484 - u64 *pte, *page; 1485 - 1486 - BUG_ON(!is_power_of_2(page_size)); 1487 - 1488 - amd_iommu_domain_get_pgtable(domain, &pgtable); 1489 - 1490 - while (address > PM_LEVEL_SIZE(pgtable.mode)) { 1491 - /* 1492 - * Return an error if there is no memory to update the 1493 - * page-table. 1494 - */ 1495 - if (!increase_address_space(domain, address, gfp)) 1496 - return NULL; 1497 - 1498 - /* Read new values to check if update was successful */ 1499 - amd_iommu_domain_get_pgtable(domain, &pgtable); 1500 - } 1501 - 1502 - 1503 - level = pgtable.mode - 1; 1504 - pte = &pgtable.root[PM_LEVEL_INDEX(level, address)]; 1505 - address = PAGE_SIZE_ALIGN(address, page_size); 1506 - end_lvl = PAGE_SIZE_LEVEL(page_size); 1507 - 1508 - while (level > end_lvl) { 1509 - u64 __pte, __npte; 1510 - int pte_level; 1511 - 1512 - __pte = *pte; 1513 - pte_level = PM_PTE_LEVEL(__pte); 1514 - 1515 - /* 1516 - * If we replace a series of large PTEs, we need 1517 - * to tear down all of them. 1518 - */ 1519 - if (IOMMU_PTE_PRESENT(__pte) && 1520 - pte_level == PAGE_MODE_7_LEVEL) { 1521 - unsigned long count, i; 1522 - u64 *lpte; 1523 - 1524 - lpte = first_pte_l7(pte, NULL, &count); 1525 - 1526 - /* 1527 - * Unmap the replicated PTEs that still match the 1528 - * original large mapping 1529 - */ 1530 - for (i = 0; i < count; ++i) 1531 - cmpxchg64(&lpte[i], __pte, 0ULL); 1532 - 1533 - *updated = true; 1534 - continue; 1535 - } 1536 - 1537 - if (!IOMMU_PTE_PRESENT(__pte) || 1538 - pte_level == PAGE_MODE_NONE) { 1539 - page = (u64 *)get_zeroed_page(gfp); 1540 - 1541 - if (!page) 1542 - return NULL; 1543 - 1544 - __npte = PM_LEVEL_PDE(level, iommu_virt_to_phys(page)); 1545 - 1546 - /* pte could have been changed somewhere. */ 1547 - if (cmpxchg64(pte, __pte, __npte) != __pte) 1548 - free_page((unsigned long)page); 1549 - else if (IOMMU_PTE_PRESENT(__pte)) 1550 - *updated = true; 1551 - 1552 - continue; 1553 - } 1554 - 1555 - /* No level skipping support yet */ 1556 - if (pte_level != level) 1557 - return NULL; 1558 - 1559 - level -= 1; 1560 - 1561 - pte = IOMMU_PTE_PAGE(__pte); 1562 - 1563 - if (pte_page && level == end_lvl) 1564 - *pte_page = pte; 1565 - 1566 - pte = &pte[PM_LEVEL_INDEX(level, address)]; 1567 - } 1568 - 1569 - return pte; 1570 - } 1571 - 1572 - /* 1573 - * This function checks if there is a PTE for a given dma address. If 1574 - * there is one, it returns the pointer to it. 1575 - */ 1576 - static u64 *fetch_pte(struct protection_domain *domain, 1577 - unsigned long address, 1578 - unsigned long *page_size) 1579 - { 1580 - struct domain_pgtable pgtable; 1581 - int level; 1582 - u64 *pte; 1583 - 1584 - *page_size = 0; 1585 - 1586 - amd_iommu_domain_get_pgtable(domain, &pgtable); 1587 - 1588 - if (address > PM_LEVEL_SIZE(pgtable.mode)) 1589 - return NULL; 1590 - 1591 - level = pgtable.mode - 1; 1592 - pte = &pgtable.root[PM_LEVEL_INDEX(level, address)]; 1593 - *page_size = PTE_LEVEL_PAGE_SIZE(level); 1594 - 1595 - while (level > 0) { 1596 - 1597 - /* Not Present */ 1598 - if (!IOMMU_PTE_PRESENT(*pte)) 1599 - return NULL; 1600 - 1601 - /* Large PTE */ 1602 - if (PM_PTE_LEVEL(*pte) == 7 || 1603 - PM_PTE_LEVEL(*pte) == 0) 1604 - break; 1605 - 1606 - /* No level skipping support yet */ 1607 - if (PM_PTE_LEVEL(*pte) != level) 1608 - return NULL; 1609 - 1610 - level -= 1; 1611 - 1612 - /* Walk to the next level */ 1613 - pte = IOMMU_PTE_PAGE(*pte); 1614 - pte = &pte[PM_LEVEL_INDEX(level, address)]; 1615 - *page_size = PTE_LEVEL_PAGE_SIZE(level); 1616 - } 1617 - 1618 - /* 1619 - * If we have a series of large PTEs, make 1620 - * sure to return a pointer to the first one. 1621 - */ 1622 - if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL) 1623 - pte = first_pte_l7(pte, page_size, NULL); 1624 - 1625 - return pte; 1626 - } 1627 - 1628 - static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist) 1629 - { 1630 - unsigned long pt; 1631 - int mode; 1632 - 1633 - while (cmpxchg64(pte, pteval, 0) != pteval) { 1634 - pr_warn("AMD-Vi: IOMMU pte changed since we read it\n"); 1635 - pteval = *pte; 1636 - } 1637 - 1638 - if (!IOMMU_PTE_PRESENT(pteval)) 1639 - return freelist; 1640 - 1641 - pt = (unsigned long)IOMMU_PTE_PAGE(pteval); 1642 - mode = IOMMU_PTE_MODE(pteval); 1643 - 1644 - return free_sub_pt(pt, mode, freelist); 1645 - } 1646 - 1647 - /* 1648 - * Generic mapping functions. It maps a physical address into a DMA 1649 - * address space. It allocates the page table pages if necessary. 1650 - * In the future it can be extended to a generic mapping function 1651 - * supporting all features of AMD IOMMU page tables like level skipping 1652 - * and full 64 bit address spaces. 1653 - */ 1654 - static int iommu_map_page(struct protection_domain *dom, 1655 - unsigned long bus_addr, 1656 - unsigned long phys_addr, 1657 - unsigned long page_size, 1658 - int prot, 1659 - gfp_t gfp) 1660 - { 1661 - struct page *freelist = NULL; 1662 - bool updated = false; 1663 - u64 __pte, *pte; 1664 - int ret, i, count; 1665 - 1666 - BUG_ON(!IS_ALIGNED(bus_addr, page_size)); 1667 - BUG_ON(!IS_ALIGNED(phys_addr, page_size)); 1668 - 1669 - ret = -EINVAL; 1670 - if (!(prot & IOMMU_PROT_MASK)) 1671 - goto out; 1672 - 1673 - count = PAGE_SIZE_PTE_COUNT(page_size); 1674 - pte = alloc_pte(dom, bus_addr, page_size, NULL, gfp, &updated); 1675 - 1676 - ret = -ENOMEM; 1677 - if (!pte) 1678 - goto out; 1679 - 1680 - for (i = 0; i < count; ++i) 1681 - freelist = free_clear_pte(&pte[i], pte[i], freelist); 1682 - 1683 - if (freelist != NULL) 1684 - updated = true; 1685 - 1686 - if (count > 1) { 1687 - __pte = PAGE_SIZE_PTE(__sme_set(phys_addr), page_size); 1688 - __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC; 1689 - } else 1690 - __pte = __sme_set(phys_addr) | IOMMU_PTE_PR | IOMMU_PTE_FC; 1691 - 1692 - if (prot & IOMMU_PROT_IR) 1693 - __pte |= IOMMU_PTE_IR; 1694 - if (prot & IOMMU_PROT_IW) 1695 - __pte |= IOMMU_PTE_IW; 1696 - 1697 - for (i = 0; i < count; ++i) 1698 - pte[i] = __pte; 1699 - 1700 - ret = 0; 1701 - 1702 - out: 1703 - if (updated) { 1704 - unsigned long flags; 1705 - 1706 - spin_lock_irqsave(&dom->lock, flags); 1707 - /* 1708 - * Flush domain TLB(s) and wait for completion. Any Device-Table 1709 - * Updates and flushing already happened in 1710 - * increase_address_space(). 1711 - */ 1712 - domain_flush_tlb_pde(dom); 1713 - domain_flush_complete(dom); 1714 - spin_unlock_irqrestore(&dom->lock, flags); 1715 - } 1716 - 1717 - /* Everything flushed out, free pages now */ 1718 - free_page_list(freelist); 1719 - 1720 - return ret; 1721 - } 1722 - 1723 - static unsigned long iommu_unmap_page(struct protection_domain *dom, 1724 - unsigned long bus_addr, 1725 - unsigned long page_size) 1726 - { 1727 - unsigned long long unmapped; 1728 - unsigned long unmap_size; 1729 - u64 *pte; 1730 - 1731 - BUG_ON(!is_power_of_2(page_size)); 1732 - 1733 - unmapped = 0; 1734 - 1735 - while (unmapped < page_size) { 1736 - 1737 - pte = fetch_pte(dom, bus_addr, &unmap_size); 1738 - 1739 - if (pte) { 1740 - int i, count; 1741 - 1742 - count = PAGE_SIZE_PTE_COUNT(unmap_size); 1743 - for (i = 0; i < count; i++) 1744 - pte[i] = 0ULL; 1745 - } 1746 - 1747 - bus_addr = (bus_addr & ~(unmap_size - 1)) + unmap_size; 1748 - unmapped += unmap_size; 1749 - } 1750 - 1751 - BUG_ON(unmapped && !is_power_of_2(unmapped)); 1752 - 1753 - return unmapped; 1754 1383 } 1755 1384 1756 1385 /**************************************************************************** ··· 1393 1896 } 1394 1897 1395 1898 static void set_dte_entry(u16 devid, struct protection_domain *domain, 1396 - struct domain_pgtable *pgtable, 1397 1899 bool ats, bool ppr) 1398 1900 { 1399 1901 u64 pte_root = 0; 1400 1902 u64 flags = 0; 1401 1903 u32 old_domid; 1402 1904 1403 - if (pgtable->mode != PAGE_MODE_NONE) 1404 - pte_root = iommu_virt_to_phys(pgtable->root); 1905 + if (domain->iop.mode != PAGE_MODE_NONE) 1906 + pte_root = iommu_virt_to_phys(domain->iop.root); 1405 1907 1406 - pte_root |= (pgtable->mode & DEV_ENTRY_MODE_MASK) 1908 + pte_root |= (domain->iop.mode & DEV_ENTRY_MODE_MASK) 1407 1909 << DEV_ENTRY_MODE_SHIFT; 1408 1910 pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV; 1409 1911 ··· 1475 1979 static void do_attach(struct iommu_dev_data *dev_data, 1476 1980 struct protection_domain *domain) 1477 1981 { 1478 - struct domain_pgtable pgtable; 1479 1982 struct amd_iommu *iommu; 1480 1983 bool ats; 1481 1984 ··· 1490 1995 domain->dev_cnt += 1; 1491 1996 1492 1997 /* Update device table */ 1493 - amd_iommu_domain_get_pgtable(domain, &pgtable); 1494 - set_dte_entry(dev_data->devid, domain, &pgtable, 1998 + set_dte_entry(dev_data->devid, domain, 1495 1999 ats, dev_data->iommu_v2); 1496 2000 clone_aliases(dev_data->pdev); 1497 2001 ··· 1514 2020 device_flush_dte(dev_data); 1515 2021 1516 2022 /* Flush IOTLB */ 1517 - domain_flush_tlb_pde(domain); 2023 + amd_iommu_domain_flush_tlb_pde(domain); 1518 2024 1519 2025 /* Wait for the flushes to finish */ 1520 - domain_flush_complete(domain); 2026 + amd_iommu_domain_flush_complete(domain); 1521 2027 1522 2028 /* decrease reference counters - needs to happen after the flushes */ 1523 2029 domain->dev_iommu[iommu->index] -= 1; ··· 1650 2156 * left the caches in the IOMMU dirty. So we have to flush 1651 2157 * here to evict all dirty stuff. 1652 2158 */ 1653 - domain_flush_tlb_pde(domain); 2159 + amd_iommu_domain_flush_tlb_pde(domain); 1654 2160 1655 - domain_flush_complete(domain); 2161 + amd_iommu_domain_flush_complete(domain); 1656 2162 1657 2163 out: 1658 2164 spin_unlock(&dev_data->lock); ··· 1797 2303 * 1798 2304 *****************************************************************************/ 1799 2305 1800 - static void update_device_table(struct protection_domain *domain, 1801 - struct domain_pgtable *pgtable) 2306 + static void update_device_table(struct protection_domain *domain) 1802 2307 { 1803 2308 struct iommu_dev_data *dev_data; 1804 2309 1805 2310 list_for_each_entry(dev_data, &domain->dev_list, list) { 1806 - set_dte_entry(dev_data->devid, domain, pgtable, 2311 + set_dte_entry(dev_data->devid, domain, 1807 2312 dev_data->ats.enabled, dev_data->iommu_v2); 1808 2313 clone_aliases(dev_data->pdev); 1809 2314 } 1810 2315 } 1811 2316 1812 - static void update_and_flush_device_table(struct protection_domain *domain, 1813 - struct domain_pgtable *pgtable) 2317 + void amd_iommu_update_and_flush_device_table(struct protection_domain *domain) 1814 2318 { 1815 - update_device_table(domain, pgtable); 2319 + update_device_table(domain); 1816 2320 domain_flush_devices(domain); 1817 2321 } 1818 2322 1819 - static void update_domain(struct protection_domain *domain) 2323 + void amd_iommu_domain_update(struct protection_domain *domain) 1820 2324 { 1821 - struct domain_pgtable pgtable; 1822 - 1823 2325 /* Update device table */ 1824 - amd_iommu_domain_get_pgtable(domain, &pgtable); 1825 - update_and_flush_device_table(domain, &pgtable); 2326 + amd_iommu_update_and_flush_device_table(domain); 1826 2327 1827 2328 /* Flush domain TLB(s) and wait for completion */ 1828 - domain_flush_tlb_pde(domain); 1829 - domain_flush_complete(domain); 2329 + amd_iommu_domain_flush_tlb_pde(domain); 2330 + amd_iommu_domain_flush_complete(domain); 1830 2331 } 1831 2332 1832 2333 int __init amd_iommu_init_api(void) ··· 1889 2400 1890 2401 static void protection_domain_free(struct protection_domain *domain) 1891 2402 { 1892 - struct domain_pgtable pgtable; 1893 - 1894 2403 if (!domain) 1895 2404 return; 1896 2405 1897 2406 if (domain->id) 1898 2407 domain_id_free(domain->id); 1899 2408 1900 - amd_iommu_domain_get_pgtable(domain, &pgtable); 1901 - amd_iommu_domain_clr_pt_root(domain); 1902 - free_pagetable(&pgtable); 2409 + if (domain->iop.pgtbl_cfg.tlb) 2410 + free_io_pgtable_ops(&domain->iop.iop.ops); 1903 2411 1904 2412 kfree(domain); 1905 2413 } 1906 2414 1907 - static int protection_domain_init(struct protection_domain *domain, int mode) 2415 + static int protection_domain_init_v1(struct protection_domain *domain, int mode) 1908 2416 { 1909 2417 u64 *pt_root = NULL; 1910 2418 ··· 1924 2438 return 0; 1925 2439 } 1926 2440 1927 - static struct protection_domain *protection_domain_alloc(int mode) 2441 + static struct protection_domain *protection_domain_alloc(unsigned int type) 1928 2442 { 2443 + struct io_pgtable_ops *pgtbl_ops; 1929 2444 struct protection_domain *domain; 2445 + int pgtable = amd_iommu_pgtable; 2446 + int mode = DEFAULT_PGTABLE_LEVEL; 2447 + int ret; 1930 2448 1931 2449 domain = kzalloc(sizeof(*domain), GFP_KERNEL); 1932 2450 if (!domain) 1933 2451 return NULL; 1934 2452 1935 - if (protection_domain_init(domain, mode)) 2453 + /* 2454 + * Force IOMMU v1 page table when iommu=pt and 2455 + * when allocating domain for pass-through devices. 2456 + */ 2457 + if (type == IOMMU_DOMAIN_IDENTITY) { 2458 + pgtable = AMD_IOMMU_V1; 2459 + mode = PAGE_MODE_NONE; 2460 + } else if (type == IOMMU_DOMAIN_UNMANAGED) { 2461 + pgtable = AMD_IOMMU_V1; 2462 + } 2463 + 2464 + switch (pgtable) { 2465 + case AMD_IOMMU_V1: 2466 + ret = protection_domain_init_v1(domain, mode); 2467 + break; 2468 + default: 2469 + ret = -EINVAL; 2470 + } 2471 + 2472 + if (ret) 2473 + goto out_err; 2474 + 2475 + pgtbl_ops = alloc_io_pgtable_ops(pgtable, &domain->iop.pgtbl_cfg, domain); 2476 + if (!pgtbl_ops) 1936 2477 goto out_err; 1937 2478 1938 2479 return domain; 1939 - 1940 2480 out_err: 1941 2481 kfree(domain); 1942 - 1943 2482 return NULL; 1944 2483 } 1945 2484 1946 2485 static struct iommu_domain *amd_iommu_domain_alloc(unsigned type) 1947 2486 { 1948 2487 struct protection_domain *domain; 1949 - int mode = DEFAULT_PGTABLE_LEVEL; 1950 2488 1951 - if (type == IOMMU_DOMAIN_IDENTITY) 1952 - mode = PAGE_MODE_NONE; 1953 - 1954 - domain = protection_domain_alloc(mode); 2489 + domain = protection_domain_alloc(type); 1955 2490 if (!domain) 1956 2491 return NULL; 1957 2492 ··· 2087 2580 gfp_t gfp) 2088 2581 { 2089 2582 struct protection_domain *domain = to_pdomain(dom); 2090 - struct domain_pgtable pgtable; 2583 + struct io_pgtable_ops *ops = &domain->iop.iop.ops; 2091 2584 int prot = 0; 2092 - int ret; 2585 + int ret = -EINVAL; 2093 2586 2094 - amd_iommu_domain_get_pgtable(domain, &pgtable); 2095 - if (pgtable.mode == PAGE_MODE_NONE) 2587 + if ((amd_iommu_pgtable == AMD_IOMMU_V1) && 2588 + (domain->iop.mode == PAGE_MODE_NONE)) 2096 2589 return -EINVAL; 2097 2590 2098 2591 if (iommu_prot & IOMMU_READ) ··· 2100 2593 if (iommu_prot & IOMMU_WRITE) 2101 2594 prot |= IOMMU_PROT_IW; 2102 2595 2103 - ret = iommu_map_page(domain, iova, paddr, page_size, prot, gfp); 2104 - 2105 - domain_flush_np_cache(domain, iova, page_size); 2596 + if (ops->map) { 2597 + ret = ops->map(ops, iova, paddr, page_size, prot, gfp); 2598 + domain_flush_np_cache(domain, iova, page_size); 2599 + } 2106 2600 2107 2601 return ret; 2108 2602 } ··· 2113 2605 struct iommu_iotlb_gather *gather) 2114 2606 { 2115 2607 struct protection_domain *domain = to_pdomain(dom); 2116 - struct domain_pgtable pgtable; 2608 + struct io_pgtable_ops *ops = &domain->iop.iop.ops; 2117 2609 2118 - amd_iommu_domain_get_pgtable(domain, &pgtable); 2119 - if (pgtable.mode == PAGE_MODE_NONE) 2610 + if ((amd_iommu_pgtable == AMD_IOMMU_V1) && 2611 + (domain->iop.mode == PAGE_MODE_NONE)) 2120 2612 return 0; 2121 2613 2122 - return iommu_unmap_page(domain, iova, page_size); 2614 + return (ops->unmap) ? ops->unmap(ops, iova, page_size, gather) : 0; 2123 2615 } 2124 2616 2125 2617 static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, 2126 2618 dma_addr_t iova) 2127 2619 { 2128 2620 struct protection_domain *domain = to_pdomain(dom); 2129 - unsigned long offset_mask, pte_pgsize; 2130 - struct domain_pgtable pgtable; 2131 - u64 *pte, __pte; 2621 + struct io_pgtable_ops *ops = &domain->iop.iop.ops; 2132 2622 2133 - amd_iommu_domain_get_pgtable(domain, &pgtable); 2134 - if (pgtable.mode == PAGE_MODE_NONE) 2135 - return iova; 2136 - 2137 - pte = fetch_pte(domain, iova, &pte_pgsize); 2138 - 2139 - if (!pte || !IOMMU_PTE_PRESENT(*pte)) 2140 - return 0; 2141 - 2142 - offset_mask = pte_pgsize - 1; 2143 - __pte = __sme_clr(*pte & PM_ADDR_MASK); 2144 - 2145 - return (__pte & ~offset_mask) | (iova & offset_mask); 2623 + return ops->iova_to_phys(ops, iova); 2146 2624 } 2147 2625 2148 2626 static bool amd_iommu_capable(enum iommu_cap cap) ··· 2214 2720 unsigned long flags; 2215 2721 2216 2722 spin_lock_irqsave(&dom->lock, flags); 2217 - domain_flush_tlb_pde(dom); 2218 - domain_flush_complete(dom); 2723 + amd_iommu_domain_flush_tlb_pde(dom); 2724 + amd_iommu_domain_flush_complete(dom); 2219 2725 spin_unlock_irqrestore(&dom->lock, flags); 2220 2726 } 2221 2727 ··· 2293 2799 void amd_iommu_domain_direct_map(struct iommu_domain *dom) 2294 2800 { 2295 2801 struct protection_domain *domain = to_pdomain(dom); 2296 - struct domain_pgtable pgtable; 2297 2802 unsigned long flags; 2298 2803 2299 2804 spin_lock_irqsave(&domain->lock, flags); 2300 2805 2301 - /* First save pgtable configuration*/ 2302 - amd_iommu_domain_get_pgtable(domain, &pgtable); 2303 - 2304 - /* Remove page-table from domain */ 2305 - amd_iommu_domain_clr_pt_root(domain); 2306 - 2307 - /* Make changes visible to IOMMUs */ 2308 - update_domain(domain); 2309 - 2310 - /* Page-table is not visible to IOMMU anymore, so free it */ 2311 - free_pagetable(&pgtable); 2806 + if (domain->iop.pgtbl_cfg.tlb) 2807 + free_io_pgtable_ops(&domain->iop.iop.ops); 2312 2808 2313 2809 spin_unlock_irqrestore(&domain->lock, flags); 2314 2810 } ··· 2339 2855 domain->glx = levels; 2340 2856 domain->flags |= PD_IOMMUV2_MASK; 2341 2857 2342 - update_domain(domain); 2858 + amd_iommu_domain_update(domain); 2343 2859 2344 2860 ret = 0; 2345 2861 ··· 2376 2892 } 2377 2893 2378 2894 /* Wait until IOMMU TLB flushes are complete */ 2379 - domain_flush_complete(domain); 2895 + amd_iommu_domain_flush_complete(domain); 2380 2896 2381 2897 /* Now flush device TLBs */ 2382 2898 list_for_each_entry(dev_data, &domain->dev_list, list) { ··· 2402 2918 } 2403 2919 2404 2920 /* Wait until all device TLBs are flushed */ 2405 - domain_flush_complete(domain); 2921 + amd_iommu_domain_flush_complete(domain); 2406 2922 2407 2923 ret = 0; 2408 2924 ··· 2487 3003 static int __set_gcr3(struct protection_domain *domain, u32 pasid, 2488 3004 unsigned long cr3) 2489 3005 { 2490 - struct domain_pgtable pgtable; 2491 3006 u64 *pte; 2492 3007 2493 - amd_iommu_domain_get_pgtable(domain, &pgtable); 2494 - if (pgtable.mode != PAGE_MODE_NONE) 3008 + if (domain->iop.mode != PAGE_MODE_NONE) 2495 3009 return -EINVAL; 2496 3010 2497 3011 pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, true); ··· 2503 3021 2504 3022 static int __clear_gcr3(struct protection_domain *domain, u32 pasid) 2505 3023 { 2506 - struct domain_pgtable pgtable; 2507 3024 u64 *pte; 2508 3025 2509 - amd_iommu_domain_get_pgtable(domain, &pgtable); 2510 - if (pgtable.mode != PAGE_MODE_NONE) 3026 + if (domain->iop.mode != PAGE_MODE_NONE) 2511 3027 return -EINVAL; 2512 3028 2513 3029 pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, false);
+1 -3
drivers/iommu/amd/iommu_v2.c
··· 77 77 }; 78 78 79 79 static LIST_HEAD(state_list); 80 - static spinlock_t state_lock; 80 + static DEFINE_SPINLOCK(state_lock); 81 81 82 82 static struct workqueue_struct *iommu_wq; 83 83 ··· 937 937 */ 938 938 return 0; 939 939 } 940 - 941 - spin_lock_init(&state_lock); 942 940 943 941 ret = -ENOMEM; 944 942 iommu_wq = alloc_workqueue("amd_iommu_v2", WQ_MEM_RECLAIM, 0);
+7 -3
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
··· 182 182 unsigned long start, unsigned long end) 183 183 { 184 184 struct arm_smmu_mmu_notifier *smmu_mn = mn_to_smmu(mn); 185 + struct arm_smmu_domain *smmu_domain = smmu_mn->domain; 186 + size_t size = end - start + 1; 185 187 186 - arm_smmu_atc_inv_domain(smmu_mn->domain, mm->pasid, start, 187 - end - start + 1); 188 + if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_BTM)) 189 + arm_smmu_tlb_inv_range_asid(start, size, smmu_mn->cd->asid, 190 + PAGE_SIZE, false, smmu_domain); 191 + arm_smmu_atc_inv_domain(smmu_domain, mm->pasid, start, size); 188 192 } 189 193 190 194 static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) ··· 395 391 unsigned long reg, fld; 396 392 unsigned long oas; 397 393 unsigned long asid_bits; 398 - u32 feat_mask = ARM_SMMU_FEAT_BTM | ARM_SMMU_FEAT_COHERENCY; 394 + u32 feat_mask = ARM_SMMU_FEAT_COHERENCY; 399 395 400 396 if (vabits_actual == 52) 401 397 feat_mask |= ARM_SMMU_FEAT_VAX;
+91 -63
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
··· 88 88 { 0, NULL}, 89 89 }; 90 90 91 - static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset, 92 - struct arm_smmu_device *smmu) 93 - { 94 - if (offset > SZ_64K) 95 - return smmu->page1 + offset - SZ_64K; 96 - 97 - return smmu->base + offset; 98 - } 99 - 100 91 static void parse_driver_options(struct arm_smmu_device *smmu) 101 92 { 102 93 int i = 0; ··· 263 272 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31); 264 273 break; 265 274 case CMDQ_OP_TLBI_NH_VA: 275 + cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid); 276 + fallthrough; 277 + case CMDQ_OP_TLBI_EL2_VA: 266 278 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num); 267 279 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale); 268 - cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid); 269 280 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid); 270 281 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf); 271 282 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl); ··· 288 295 fallthrough; 289 296 case CMDQ_OP_TLBI_S12_VMALL: 290 297 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid); 298 + break; 299 + case CMDQ_OP_TLBI_EL2_ASID: 300 + cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid); 291 301 break; 292 302 case CMDQ_OP_ATC_INV: 293 303 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid); ··· 882 886 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid) 883 887 { 884 888 struct arm_smmu_cmdq_ent cmd = { 885 - .opcode = CMDQ_OP_TLBI_NH_ASID, 889 + .opcode = smmu->features & ARM_SMMU_FEAT_E2H ? 890 + CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID, 886 891 .tlbi.asid = asid, 887 892 }; 888 893 ··· 1266 1269 } 1267 1270 1268 1271 if (s1_cfg) { 1272 + u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ? 1273 + STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1; 1274 + 1269 1275 BUG_ON(ste_live); 1270 1276 dst[1] = cpu_to_le64( 1271 1277 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) | 1272 1278 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) | 1273 1279 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) | 1274 1280 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) | 1275 - FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1)); 1281 + FIELD_PREP(STRTAB_STE_1_STRW, strw)); 1276 1282 1277 1283 if (smmu->features & ARM_SMMU_FEAT_STALLS && 1278 1284 !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE)) ··· 1667 1667 arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0); 1668 1668 } 1669 1669 1670 - static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size, 1671 - size_t granule, bool leaf, 1672 - struct arm_smmu_domain *smmu_domain) 1670 + static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd, 1671 + unsigned long iova, size_t size, 1672 + size_t granule, 1673 + struct arm_smmu_domain *smmu_domain) 1673 1674 { 1674 1675 struct arm_smmu_device *smmu = smmu_domain->smmu; 1675 - unsigned long start = iova, end = iova + size, num_pages = 0, tg = 0; 1676 + unsigned long end = iova + size, num_pages = 0, tg = 0; 1676 1677 size_t inv_range = granule; 1677 1678 struct arm_smmu_cmdq_batch cmds = {}; 1678 - struct arm_smmu_cmdq_ent cmd = { 1679 - .tlbi = { 1680 - .leaf = leaf, 1681 - }, 1682 - }; 1683 1679 1684 1680 if (!size) 1685 1681 return; 1686 - 1687 - if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { 1688 - cmd.opcode = CMDQ_OP_TLBI_NH_VA; 1689 - cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid; 1690 - } else { 1691 - cmd.opcode = CMDQ_OP_TLBI_S2_IPA; 1692 - cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; 1693 - } 1694 1682 1695 1683 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) { 1696 1684 /* Get the leaf page size */ 1697 1685 tg = __ffs(smmu_domain->domain.pgsize_bitmap); 1698 1686 1699 1687 /* Convert page size of 12,14,16 (log2) to 1,2,3 */ 1700 - cmd.tlbi.tg = (tg - 10) / 2; 1688 + cmd->tlbi.tg = (tg - 10) / 2; 1701 1689 1702 1690 /* Determine what level the granule is at */ 1703 - cmd.tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3)); 1691 + cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3)); 1704 1692 1705 1693 num_pages = size >> tg; 1706 1694 } ··· 1706 1718 1707 1719 /* Determine the power of 2 multiple number of pages */ 1708 1720 scale = __ffs(num_pages); 1709 - cmd.tlbi.scale = scale; 1721 + cmd->tlbi.scale = scale; 1710 1722 1711 1723 /* Determine how many chunks of 2^scale size we have */ 1712 1724 num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX; 1713 - cmd.tlbi.num = num - 1; 1725 + cmd->tlbi.num = num - 1; 1714 1726 1715 1727 /* range is num * 2^scale * pgsize */ 1716 1728 inv_range = num << (scale + tg); ··· 1719 1731 num_pages -= num << scale; 1720 1732 } 1721 1733 1722 - cmd.tlbi.addr = iova; 1723 - arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd); 1734 + cmd->tlbi.addr = iova; 1735 + arm_smmu_cmdq_batch_add(smmu, &cmds, cmd); 1724 1736 iova += inv_range; 1725 1737 } 1726 1738 arm_smmu_cmdq_batch_submit(smmu, &cmds); 1739 + } 1740 + 1741 + static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size, 1742 + size_t granule, bool leaf, 1743 + struct arm_smmu_domain *smmu_domain) 1744 + { 1745 + struct arm_smmu_cmdq_ent cmd = { 1746 + .tlbi = { 1747 + .leaf = leaf, 1748 + }, 1749 + }; 1750 + 1751 + if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { 1752 + cmd.opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ? 1753 + CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA; 1754 + cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid; 1755 + } else { 1756 + cmd.opcode = CMDQ_OP_TLBI_S2_IPA; 1757 + cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; 1758 + } 1759 + __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain); 1727 1760 1728 1761 /* 1729 1762 * Unfortunately, this can't be leaf-only since we may have 1730 1763 * zapped an entire table. 1731 1764 */ 1732 - arm_smmu_atc_inv_domain(smmu_domain, 0, start, size); 1765 + arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size); 1766 + } 1767 + 1768 + void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, 1769 + size_t granule, bool leaf, 1770 + struct arm_smmu_domain *smmu_domain) 1771 + { 1772 + struct arm_smmu_cmdq_ent cmd = { 1773 + .opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ? 1774 + CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA, 1775 + .tlbi = { 1776 + .asid = asid, 1777 + .leaf = leaf, 1778 + }, 1779 + }; 1780 + 1781 + __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain); 1733 1782 } 1734 1783 1735 1784 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather, ··· 1782 1757 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size, 1783 1758 size_t granule, void *cookie) 1784 1759 { 1785 - arm_smmu_tlb_inv_range(iova, size, granule, false, cookie); 1760 + arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie); 1786 1761 } 1787 1762 1788 1763 static const struct iommu_flush_ops arm_smmu_flush_ops = { ··· 2305 2280 { 2306 2281 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 2307 2282 2308 - arm_smmu_tlb_inv_range(gather->start, gather->end - gather->start, 2309 - gather->pgsize, true, smmu_domain); 2283 + arm_smmu_tlb_inv_range_domain(gather->start, 2284 + gather->end - gather->start + 1, 2285 + gather->pgsize, true, smmu_domain); 2310 2286 } 2311 2287 2312 2288 static phys_addr_t ··· 2637 2611 /* Probing and initialisation functions */ 2638 2612 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, 2639 2613 struct arm_smmu_queue *q, 2614 + void __iomem *page, 2640 2615 unsigned long prod_off, 2641 2616 unsigned long cons_off, 2642 2617 size_t dwords, const char *name) ··· 2666 2639 1 << q->llq.max_n_shift, name); 2667 2640 } 2668 2641 2669 - q->prod_reg = arm_smmu_page1_fixup(prod_off, smmu); 2670 - q->cons_reg = arm_smmu_page1_fixup(cons_off, smmu); 2642 + q->prod_reg = page + prod_off; 2643 + q->cons_reg = page + cons_off; 2671 2644 q->ent_dwords = dwords; 2672 2645 2673 2646 q->q_base = Q_BASE_RWA; ··· 2711 2684 int ret; 2712 2685 2713 2686 /* cmdq */ 2714 - ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD, 2715 - ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS, 2716 - "cmdq"); 2687 + ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base, 2688 + ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS, 2689 + CMDQ_ENT_DWORDS, "cmdq"); 2717 2690 if (ret) 2718 2691 return ret; 2719 2692 ··· 2722 2695 return ret; 2723 2696 2724 2697 /* evtq */ 2725 - ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD, 2726 - ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS, 2727 - "evtq"); 2698 + ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1, 2699 + ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS, 2700 + EVTQ_ENT_DWORDS, "evtq"); 2728 2701 if (ret) 2729 2702 return ret; 2730 2703 ··· 2732 2705 if (!(smmu->features & ARM_SMMU_FEAT_PRI)) 2733 2706 return 0; 2734 2707 2735 - return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD, 2736 - ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS, 2737 - "priq"); 2708 + return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1, 2709 + ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS, 2710 + PRIQ_ENT_DWORDS, "priq"); 2738 2711 } 2739 2712 2740 2713 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu) ··· 3087 3060 writel_relaxed(reg, smmu->base + ARM_SMMU_CR1); 3088 3061 3089 3062 /* CR2 (random crap) */ 3090 - reg = CR2_PTM | CR2_RECINVSID | CR2_E2H; 3063 + reg = CR2_PTM | CR2_RECINVSID; 3064 + 3065 + if (smmu->features & ARM_SMMU_FEAT_E2H) 3066 + reg |= CR2_E2H; 3067 + 3091 3068 writel_relaxed(reg, smmu->base + ARM_SMMU_CR2); 3092 3069 3093 3070 /* Stream table */ ··· 3130 3099 3131 3100 /* Event queue */ 3132 3101 writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE); 3133 - writel_relaxed(smmu->evtq.q.llq.prod, 3134 - arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu)); 3135 - writel_relaxed(smmu->evtq.q.llq.cons, 3136 - arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu)); 3102 + writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD); 3103 + writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS); 3137 3104 3138 3105 enables |= CR0_EVTQEN; 3139 3106 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, ··· 3146 3117 writeq_relaxed(smmu->priq.q.q_base, 3147 3118 smmu->base + ARM_SMMU_PRIQ_BASE); 3148 3119 writel_relaxed(smmu->priq.q.llq.prod, 3149 - arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu)); 3120 + smmu->page1 + ARM_SMMU_PRIQ_PROD); 3150 3121 writel_relaxed(smmu->priq.q.llq.cons, 3151 - arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu)); 3122 + smmu->page1 + ARM_SMMU_PRIQ_CONS); 3152 3123 3153 3124 enables |= CR0_PRIQEN; 3154 3125 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, ··· 3250 3221 smmu->options |= ARM_SMMU_OPT_MSIPOLL; 3251 3222 } 3252 3223 3253 - if (reg & IDR0_HYP) 3224 + if (reg & IDR0_HYP) { 3254 3225 smmu->features |= ARM_SMMU_FEAT_HYP; 3226 + if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN)) 3227 + smmu->features |= ARM_SMMU_FEAT_E2H; 3228 + } 3255 3229 3256 3230 /* 3257 3231 * The coherency feature as set by FW is used in preference to the ID ··· 3521 3489 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start, 3522 3490 resource_size_t size) 3523 3491 { 3524 - struct resource res = { 3525 - .flags = IORESOURCE_MEM, 3526 - .start = start, 3527 - .end = start + size - 1, 3528 - }; 3492 + struct resource res = DEFINE_RES_MEM(start, size); 3529 3493 3530 3494 return devm_ioremap_resource(dev, &res); 3531 3495 }
+10 -4
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
··· 139 139 #define ARM_SMMU_CMDQ_CONS 0x9c 140 140 141 141 #define ARM_SMMU_EVTQ_BASE 0xa0 142 - #define ARM_SMMU_EVTQ_PROD 0x100a8 143 - #define ARM_SMMU_EVTQ_CONS 0x100ac 142 + #define ARM_SMMU_EVTQ_PROD 0xa8 143 + #define ARM_SMMU_EVTQ_CONS 0xac 144 144 #define ARM_SMMU_EVTQ_IRQ_CFG0 0xb0 145 145 #define ARM_SMMU_EVTQ_IRQ_CFG1 0xb8 146 146 #define ARM_SMMU_EVTQ_IRQ_CFG2 0xbc 147 147 148 148 #define ARM_SMMU_PRIQ_BASE 0xc0 149 - #define ARM_SMMU_PRIQ_PROD 0x100c8 150 - #define ARM_SMMU_PRIQ_CONS 0x100cc 149 + #define ARM_SMMU_PRIQ_PROD 0xc8 150 + #define ARM_SMMU_PRIQ_CONS 0xcc 151 151 #define ARM_SMMU_PRIQ_IRQ_CFG0 0xd0 152 152 #define ARM_SMMU_PRIQ_IRQ_CFG1 0xd8 153 153 #define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc ··· 430 430 #define CMDQ_OP_TLBI_NH_ASID 0x11 431 431 #define CMDQ_OP_TLBI_NH_VA 0x12 432 432 #define CMDQ_OP_TLBI_EL2_ALL 0x20 433 + #define CMDQ_OP_TLBI_EL2_ASID 0x21 434 + #define CMDQ_OP_TLBI_EL2_VA 0x22 433 435 #define CMDQ_OP_TLBI_S12_VMALL 0x28 434 436 #define CMDQ_OP_TLBI_S2_IPA 0x2a 435 437 #define CMDQ_OP_TLBI_NSNH_ALL 0x30 ··· 606 604 #define ARM_SMMU_FEAT_RANGE_INV (1 << 15) 607 605 #define ARM_SMMU_FEAT_BTM (1 << 16) 608 606 #define ARM_SMMU_FEAT_SVA (1 << 17) 607 + #define ARM_SMMU_FEAT_E2H (1 << 18) 609 608 u32 features; 610 609 611 610 #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) ··· 697 694 int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid, 698 695 struct arm_smmu_ctx_desc *cd); 699 696 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid); 697 + void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, 698 + size_t granule, bool leaf, 699 + struct arm_smmu_domain *smmu_domain); 700 700 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd); 701 701 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid, 702 702 unsigned long iova, size_t size);
+5
drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
··· 166 166 { .compatible = "qcom,mdss" }, 167 167 { .compatible = "qcom,sc7180-mdss" }, 168 168 { .compatible = "qcom,sc7180-mss-pil" }, 169 + { .compatible = "qcom,sc8180x-mdss" }, 169 170 { .compatible = "qcom,sdm845-mdss" }, 170 171 { .compatible = "qcom,sdm845-mss-pil" }, 171 172 { } ··· 207 206 smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i)); 208 207 209 208 if (FIELD_GET(ARM_SMMU_SMR_VALID, smr)) { 209 + /* Ignore valid bit for SMR mask extraction. */ 210 + smr &= ~ARM_SMMU_SMR_VALID; 210 211 smmu->smrs[i].id = FIELD_GET(ARM_SMMU_SMR_ID, smr); 211 212 smmu->smrs[i].mask = FIELD_GET(ARM_SMMU_SMR_MASK, smr); 212 213 smmu->smrs[i].valid = true; ··· 330 327 static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { 331 328 { .compatible = "qcom,msm8998-smmu-v2" }, 332 329 { .compatible = "qcom,sc7180-smmu-500" }, 330 + { .compatible = "qcom,sc8180x-smmu-500" }, 333 331 { .compatible = "qcom,sdm630-smmu-v2" }, 334 332 { .compatible = "qcom,sdm845-smmu-500" }, 335 333 { .compatible = "qcom,sm8150-smmu-500" }, 336 334 { .compatible = "qcom,sm8250-smmu-500" }, 335 + { .compatible = "qcom,sm8350-smmu-500" }, 337 336 { } 338 337 }; 339 338
+11 -18
drivers/iommu/dma-iommu.c
··· 51 51 struct iommu_domain *fq_domain; 52 52 }; 53 53 54 + static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled); 55 + 54 56 void iommu_dma_free_cpu_cached_iovas(unsigned int cpu, 55 57 struct iommu_domain *domain) 56 58 { ··· 380 378 return iova_reserve_iommu_regions(dev, domain); 381 379 } 382 380 383 - static int iommu_dma_deferred_attach(struct device *dev, 384 - struct iommu_domain *domain) 385 - { 386 - const struct iommu_ops *ops = domain->ops; 387 - 388 - if (!is_kdump_kernel()) 389 - return 0; 390 - 391 - if (unlikely(ops->is_attach_deferred && 392 - ops->is_attach_deferred(domain, dev))) 393 - return iommu_attach_device(domain, dev); 394 - 395 - return 0; 396 - } 397 - 398 381 /** 399 382 * dma_info_to_prot - Translate DMA API directions and attributes to IOMMU API 400 383 * page flags. ··· 522 535 size_t iova_off = iova_offset(iovad, phys); 523 536 dma_addr_t iova; 524 537 525 - if (unlikely(iommu_dma_deferred_attach(dev, domain))) 538 + if (static_branch_unlikely(&iommu_deferred_attach_enabled) && 539 + iommu_deferred_attach(dev, domain)) 526 540 return DMA_MAPPING_ERROR; 527 541 528 542 size = iova_align(iovad, size + iova_off); ··· 681 693 682 694 *dma_handle = DMA_MAPPING_ERROR; 683 695 684 - if (unlikely(iommu_dma_deferred_attach(dev, domain))) 696 + if (static_branch_unlikely(&iommu_deferred_attach_enabled) && 697 + iommu_deferred_attach(dev, domain)) 685 698 return NULL; 686 699 687 700 min_size = alloc_sizes & -alloc_sizes; ··· 965 976 unsigned long mask = dma_get_seg_boundary(dev); 966 977 int i; 967 978 968 - if (unlikely(iommu_dma_deferred_attach(dev, domain))) 979 + if (static_branch_unlikely(&iommu_deferred_attach_enabled) && 980 + iommu_deferred_attach(dev, domain)) 969 981 return 0; 970 982 971 983 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) ··· 1414 1424 1415 1425 static int iommu_dma_init(void) 1416 1426 { 1427 + if (is_kdump_kernel()) 1428 + static_branch_enable(&iommu_deferred_attach_enabled); 1429 + 1417 1430 return iova_cache_get(); 1418 1431 } 1419 1432 arch_initcall(iommu_dma_init);
+1 -1
drivers/iommu/intel/Makefile
··· 1 1 # SPDX-License-Identifier: GPL-2.0 2 2 obj-$(CONFIG_DMAR_TABLE) += dmar.o 3 3 obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o 4 - obj-$(CONFIG_INTEL_IOMMU) += trace.o 4 + obj-$(CONFIG_DMAR_TABLE) += trace.o cap_audit.o 5 5 obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o 6 6 obj-$(CONFIG_INTEL_IOMMU_SVM) += svm.o 7 7 obj-$(CONFIG_IRQ_REMAP) += irq_remapping.o
+205
drivers/iommu/intel/cap_audit.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * cap_audit.c - audit iommu capabilities for boot time and hot plug 4 + * 5 + * Copyright (C) 2021 Intel Corporation 6 + * 7 + * Author: Kyung Min Park <kyung.min.park@intel.com> 8 + * Lu Baolu <baolu.lu@linux.intel.com> 9 + */ 10 + 11 + #define pr_fmt(fmt) "DMAR: " fmt 12 + 13 + #include <linux/intel-iommu.h> 14 + #include "cap_audit.h" 15 + 16 + static u64 intel_iommu_cap_sanity; 17 + static u64 intel_iommu_ecap_sanity; 18 + 19 + static inline void check_irq_capabilities(struct intel_iommu *a, 20 + struct intel_iommu *b) 21 + { 22 + CHECK_FEATURE_MISMATCH(a, b, cap, pi_support, CAP_PI_MASK); 23 + CHECK_FEATURE_MISMATCH(a, b, ecap, eim_support, ECAP_EIM_MASK); 24 + } 25 + 26 + static inline void check_dmar_capabilities(struct intel_iommu *a, 27 + struct intel_iommu *b) 28 + { 29 + MINIMAL_FEATURE_IOMMU(b, cap, CAP_MAMV_MASK); 30 + MINIMAL_FEATURE_IOMMU(b, cap, CAP_NFR_MASK); 31 + MINIMAL_FEATURE_IOMMU(b, cap, CAP_SLLPS_MASK); 32 + MINIMAL_FEATURE_IOMMU(b, cap, CAP_FRO_MASK); 33 + MINIMAL_FEATURE_IOMMU(b, cap, CAP_MGAW_MASK); 34 + MINIMAL_FEATURE_IOMMU(b, cap, CAP_SAGAW_MASK); 35 + MINIMAL_FEATURE_IOMMU(b, cap, CAP_NDOMS_MASK); 36 + MINIMAL_FEATURE_IOMMU(b, ecap, ECAP_PSS_MASK); 37 + MINIMAL_FEATURE_IOMMU(b, ecap, ECAP_MHMV_MASK); 38 + MINIMAL_FEATURE_IOMMU(b, ecap, ECAP_IRO_MASK); 39 + 40 + CHECK_FEATURE_MISMATCH(a, b, cap, 5lp_support, CAP_FL5LP_MASK); 41 + CHECK_FEATURE_MISMATCH(a, b, cap, fl1gp_support, CAP_FL1GP_MASK); 42 + CHECK_FEATURE_MISMATCH(a, b, cap, read_drain, CAP_RD_MASK); 43 + CHECK_FEATURE_MISMATCH(a, b, cap, write_drain, CAP_WD_MASK); 44 + CHECK_FEATURE_MISMATCH(a, b, cap, pgsel_inv, CAP_PSI_MASK); 45 + CHECK_FEATURE_MISMATCH(a, b, cap, zlr, CAP_ZLR_MASK); 46 + CHECK_FEATURE_MISMATCH(a, b, cap, caching_mode, CAP_CM_MASK); 47 + CHECK_FEATURE_MISMATCH(a, b, cap, phmr, CAP_PHMR_MASK); 48 + CHECK_FEATURE_MISMATCH(a, b, cap, plmr, CAP_PLMR_MASK); 49 + CHECK_FEATURE_MISMATCH(a, b, cap, rwbf, CAP_RWBF_MASK); 50 + CHECK_FEATURE_MISMATCH(a, b, cap, afl, CAP_AFL_MASK); 51 + CHECK_FEATURE_MISMATCH(a, b, ecap, rps, ECAP_RPS_MASK); 52 + CHECK_FEATURE_MISMATCH(a, b, ecap, smpwc, ECAP_SMPWC_MASK); 53 + CHECK_FEATURE_MISMATCH(a, b, ecap, flts, ECAP_FLTS_MASK); 54 + CHECK_FEATURE_MISMATCH(a, b, ecap, slts, ECAP_SLTS_MASK); 55 + CHECK_FEATURE_MISMATCH(a, b, ecap, nwfs, ECAP_NWFS_MASK); 56 + CHECK_FEATURE_MISMATCH(a, b, ecap, slads, ECAP_SLADS_MASK); 57 + CHECK_FEATURE_MISMATCH(a, b, ecap, vcs, ECAP_VCS_MASK); 58 + CHECK_FEATURE_MISMATCH(a, b, ecap, smts, ECAP_SMTS_MASK); 59 + CHECK_FEATURE_MISMATCH(a, b, ecap, pds, ECAP_PDS_MASK); 60 + CHECK_FEATURE_MISMATCH(a, b, ecap, dit, ECAP_DIT_MASK); 61 + CHECK_FEATURE_MISMATCH(a, b, ecap, pasid, ECAP_PASID_MASK); 62 + CHECK_FEATURE_MISMATCH(a, b, ecap, eafs, ECAP_EAFS_MASK); 63 + CHECK_FEATURE_MISMATCH(a, b, ecap, srs, ECAP_SRS_MASK); 64 + CHECK_FEATURE_MISMATCH(a, b, ecap, ers, ECAP_ERS_MASK); 65 + CHECK_FEATURE_MISMATCH(a, b, ecap, prs, ECAP_PRS_MASK); 66 + CHECK_FEATURE_MISMATCH(a, b, ecap, nest, ECAP_NEST_MASK); 67 + CHECK_FEATURE_MISMATCH(a, b, ecap, mts, ECAP_MTS_MASK); 68 + CHECK_FEATURE_MISMATCH(a, b, ecap, sc_support, ECAP_SC_MASK); 69 + CHECK_FEATURE_MISMATCH(a, b, ecap, pass_through, ECAP_PT_MASK); 70 + CHECK_FEATURE_MISMATCH(a, b, ecap, dev_iotlb_support, ECAP_DT_MASK); 71 + CHECK_FEATURE_MISMATCH(a, b, ecap, qis, ECAP_QI_MASK); 72 + CHECK_FEATURE_MISMATCH(a, b, ecap, coherent, ECAP_C_MASK); 73 + } 74 + 75 + static int cap_audit_hotplug(struct intel_iommu *iommu, enum cap_audit_type type) 76 + { 77 + bool mismatch = false; 78 + u64 old_cap = intel_iommu_cap_sanity; 79 + u64 old_ecap = intel_iommu_ecap_sanity; 80 + 81 + if (type == CAP_AUDIT_HOTPLUG_IRQR) { 82 + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, pi_support, CAP_PI_MASK); 83 + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, eim_support, ECAP_EIM_MASK); 84 + goto out; 85 + } 86 + 87 + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, 5lp_support, CAP_FL5LP_MASK); 88 + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, fl1gp_support, CAP_FL1GP_MASK); 89 + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, read_drain, CAP_RD_MASK); 90 + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, write_drain, CAP_WD_MASK); 91 + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, pgsel_inv, CAP_PSI_MASK); 92 + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, zlr, CAP_ZLR_MASK); 93 + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, caching_mode, CAP_CM_MASK); 94 + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, phmr, CAP_PHMR_MASK); 95 + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, plmr, CAP_PLMR_MASK); 96 + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, rwbf, CAP_RWBF_MASK); 97 + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, afl, CAP_AFL_MASK); 98 + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, rps, ECAP_RPS_MASK); 99 + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, smpwc, ECAP_SMPWC_MASK); 100 + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, flts, ECAP_FLTS_MASK); 101 + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, slts, ECAP_SLTS_MASK); 102 + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, nwfs, ECAP_NWFS_MASK); 103 + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, slads, ECAP_SLADS_MASK); 104 + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, vcs, ECAP_VCS_MASK); 105 + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, smts, ECAP_SMTS_MASK); 106 + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, pds, ECAP_PDS_MASK); 107 + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, dit, ECAP_DIT_MASK); 108 + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, pasid, ECAP_PASID_MASK); 109 + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, eafs, ECAP_EAFS_MASK); 110 + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, srs, ECAP_SRS_MASK); 111 + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, ers, ECAP_ERS_MASK); 112 + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, prs, ECAP_PRS_MASK); 113 + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, nest, ECAP_NEST_MASK); 114 + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, mts, ECAP_MTS_MASK); 115 + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, sc_support, ECAP_SC_MASK); 116 + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, pass_through, ECAP_PT_MASK); 117 + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, dev_iotlb_support, ECAP_DT_MASK); 118 + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, qis, ECAP_QI_MASK); 119 + CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, coherent, ECAP_C_MASK); 120 + 121 + /* Abort hot plug if the hot plug iommu feature is smaller than global */ 122 + MINIMAL_FEATURE_HOTPLUG(iommu, cap, max_amask_val, CAP_MAMV_MASK, mismatch); 123 + MINIMAL_FEATURE_HOTPLUG(iommu, cap, num_fault_regs, CAP_NFR_MASK, mismatch); 124 + MINIMAL_FEATURE_HOTPLUG(iommu, cap, super_page_val, CAP_SLLPS_MASK, mismatch); 125 + MINIMAL_FEATURE_HOTPLUG(iommu, cap, fault_reg_offset, CAP_FRO_MASK, mismatch); 126 + MINIMAL_FEATURE_HOTPLUG(iommu, cap, mgaw, CAP_MGAW_MASK, mismatch); 127 + MINIMAL_FEATURE_HOTPLUG(iommu, cap, sagaw, CAP_SAGAW_MASK, mismatch); 128 + MINIMAL_FEATURE_HOTPLUG(iommu, cap, ndoms, CAP_NDOMS_MASK, mismatch); 129 + MINIMAL_FEATURE_HOTPLUG(iommu, ecap, pss, ECAP_PSS_MASK, mismatch); 130 + MINIMAL_FEATURE_HOTPLUG(iommu, ecap, max_handle_mask, ECAP_MHMV_MASK, mismatch); 131 + MINIMAL_FEATURE_HOTPLUG(iommu, ecap, iotlb_offset, ECAP_IRO_MASK, mismatch); 132 + 133 + out: 134 + if (mismatch) { 135 + intel_iommu_cap_sanity = old_cap; 136 + intel_iommu_ecap_sanity = old_ecap; 137 + return -EFAULT; 138 + } 139 + 140 + return 0; 141 + } 142 + 143 + static int cap_audit_static(struct intel_iommu *iommu, enum cap_audit_type type) 144 + { 145 + struct dmar_drhd_unit *d; 146 + struct intel_iommu *i; 147 + 148 + rcu_read_lock(); 149 + if (list_empty(&dmar_drhd_units)) 150 + goto out; 151 + 152 + for_each_active_iommu(i, d) { 153 + if (!iommu) { 154 + intel_iommu_ecap_sanity = i->ecap; 155 + intel_iommu_cap_sanity = i->cap; 156 + iommu = i; 157 + continue; 158 + } 159 + 160 + if (type == CAP_AUDIT_STATIC_DMAR) 161 + check_dmar_capabilities(iommu, i); 162 + else 163 + check_irq_capabilities(iommu, i); 164 + } 165 + 166 + out: 167 + rcu_read_unlock(); 168 + return 0; 169 + } 170 + 171 + int intel_cap_audit(enum cap_audit_type type, struct intel_iommu *iommu) 172 + { 173 + switch (type) { 174 + case CAP_AUDIT_STATIC_DMAR: 175 + case CAP_AUDIT_STATIC_IRQR: 176 + return cap_audit_static(iommu, type); 177 + case CAP_AUDIT_HOTPLUG_DMAR: 178 + case CAP_AUDIT_HOTPLUG_IRQR: 179 + return cap_audit_hotplug(iommu, type); 180 + default: 181 + break; 182 + } 183 + 184 + return -EFAULT; 185 + } 186 + 187 + bool intel_cap_smts_sanity(void) 188 + { 189 + return ecap_smts(intel_iommu_ecap_sanity); 190 + } 191 + 192 + bool intel_cap_pasid_sanity(void) 193 + { 194 + return ecap_pasid(intel_iommu_ecap_sanity); 195 + } 196 + 197 + bool intel_cap_nest_sanity(void) 198 + { 199 + return ecap_nest(intel_iommu_ecap_sanity); 200 + } 201 + 202 + bool intel_cap_flts_sanity(void) 203 + { 204 + return ecap_flts(intel_iommu_ecap_sanity); 205 + }
+130
drivers/iommu/intel/cap_audit.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * cap_audit.h - audit iommu capabilities header 4 + * 5 + * Copyright (C) 2021 Intel Corporation 6 + * 7 + * Author: Kyung Min Park <kyung.min.park@intel.com> 8 + */ 9 + 10 + /* 11 + * Capability Register Mask 12 + */ 13 + #define CAP_FL5LP_MASK BIT_ULL(60) 14 + #define CAP_PI_MASK BIT_ULL(59) 15 + #define CAP_FL1GP_MASK BIT_ULL(56) 16 + #define CAP_RD_MASK BIT_ULL(55) 17 + #define CAP_WD_MASK BIT_ULL(54) 18 + #define CAP_MAMV_MASK GENMASK_ULL(53, 48) 19 + #define CAP_NFR_MASK GENMASK_ULL(47, 40) 20 + #define CAP_PSI_MASK BIT_ULL(39) 21 + #define CAP_SLLPS_MASK GENMASK_ULL(37, 34) 22 + #define CAP_FRO_MASK GENMASK_ULL(33, 24) 23 + #define CAP_ZLR_MASK BIT_ULL(22) 24 + #define CAP_MGAW_MASK GENMASK_ULL(21, 16) 25 + #define CAP_SAGAW_MASK GENMASK_ULL(12, 8) 26 + #define CAP_CM_MASK BIT_ULL(7) 27 + #define CAP_PHMR_MASK BIT_ULL(6) 28 + #define CAP_PLMR_MASK BIT_ULL(5) 29 + #define CAP_RWBF_MASK BIT_ULL(4) 30 + #define CAP_AFL_MASK BIT_ULL(3) 31 + #define CAP_NDOMS_MASK GENMASK_ULL(2, 0) 32 + 33 + /* 34 + * Extended Capability Register Mask 35 + */ 36 + #define ECAP_RPS_MASK BIT_ULL(49) 37 + #define ECAP_SMPWC_MASK BIT_ULL(48) 38 + #define ECAP_FLTS_MASK BIT_ULL(47) 39 + #define ECAP_SLTS_MASK BIT_ULL(46) 40 + #define ECAP_SLADS_MASK BIT_ULL(45) 41 + #define ECAP_VCS_MASK BIT_ULL(44) 42 + #define ECAP_SMTS_MASK BIT_ULL(43) 43 + #define ECAP_PDS_MASK BIT_ULL(42) 44 + #define ECAP_DIT_MASK BIT_ULL(41) 45 + #define ECAP_PASID_MASK BIT_ULL(40) 46 + #define ECAP_PSS_MASK GENMASK_ULL(39, 35) 47 + #define ECAP_EAFS_MASK BIT_ULL(34) 48 + #define ECAP_NWFS_MASK BIT_ULL(33) 49 + #define ECAP_SRS_MASK BIT_ULL(31) 50 + #define ECAP_ERS_MASK BIT_ULL(30) 51 + #define ECAP_PRS_MASK BIT_ULL(29) 52 + #define ECAP_NEST_MASK BIT_ULL(26) 53 + #define ECAP_MTS_MASK BIT_ULL(25) 54 + #define ECAP_MHMV_MASK GENMASK_ULL(23, 20) 55 + #define ECAP_IRO_MASK GENMASK_ULL(17, 8) 56 + #define ECAP_SC_MASK BIT_ULL(7) 57 + #define ECAP_PT_MASK BIT_ULL(6) 58 + #define ECAP_EIM_MASK BIT_ULL(4) 59 + #define ECAP_DT_MASK BIT_ULL(2) 60 + #define ECAP_QI_MASK BIT_ULL(1) 61 + #define ECAP_C_MASK BIT_ULL(0) 62 + 63 + /* 64 + * u64 intel_iommu_cap_sanity, intel_iommu_ecap_sanity will be adjusted as each 65 + * IOMMU gets audited. 66 + */ 67 + #define DO_CHECK_FEATURE_MISMATCH(a, b, cap, feature, MASK) \ 68 + do { \ 69 + if (cap##_##feature(a) != cap##_##feature(b)) { \ 70 + intel_iommu_##cap##_sanity &= ~(MASK); \ 71 + pr_info("IOMMU feature %s inconsistent", #feature); \ 72 + } \ 73 + } while (0) 74 + 75 + #define CHECK_FEATURE_MISMATCH(a, b, cap, feature, MASK) \ 76 + DO_CHECK_FEATURE_MISMATCH((a)->cap, (b)->cap, cap, feature, MASK) 77 + 78 + #define CHECK_FEATURE_MISMATCH_HOTPLUG(b, cap, feature, MASK) \ 79 + do { \ 80 + if (cap##_##feature(intel_iommu_##cap##_sanity)) \ 81 + DO_CHECK_FEATURE_MISMATCH(intel_iommu_##cap##_sanity, \ 82 + (b)->cap, cap, feature, MASK); \ 83 + } while (0) 84 + 85 + #define MINIMAL_FEATURE_IOMMU(iommu, cap, MASK) \ 86 + do { \ 87 + u64 min_feature = intel_iommu_##cap##_sanity & (MASK); \ 88 + min_feature = min_t(u64, min_feature, (iommu)->cap & (MASK)); \ 89 + intel_iommu_##cap##_sanity = (intel_iommu_##cap##_sanity & ~(MASK)) | \ 90 + min_feature; \ 91 + } while (0) 92 + 93 + #define MINIMAL_FEATURE_HOTPLUG(iommu, cap, feature, MASK, mismatch) \ 94 + do { \ 95 + if ((intel_iommu_##cap##_sanity & (MASK)) > \ 96 + (cap##_##feature((iommu)->cap))) \ 97 + mismatch = true; \ 98 + else \ 99 + (iommu)->cap = ((iommu)->cap & ~(MASK)) | \ 100 + (intel_iommu_##cap##_sanity & (MASK)); \ 101 + } while (0) 102 + 103 + enum cap_audit_type { 104 + CAP_AUDIT_STATIC_DMAR, 105 + CAP_AUDIT_STATIC_IRQR, 106 + CAP_AUDIT_HOTPLUG_DMAR, 107 + CAP_AUDIT_HOTPLUG_IRQR, 108 + }; 109 + 110 + bool intel_cap_smts_sanity(void); 111 + bool intel_cap_pasid_sanity(void); 112 + bool intel_cap_nest_sanity(void); 113 + bool intel_cap_flts_sanity(void); 114 + 115 + static inline bool scalable_mode_support(void) 116 + { 117 + return (intel_iommu_sm && intel_cap_smts_sanity()); 118 + } 119 + 120 + static inline bool pasid_mode_support(void) 121 + { 122 + return scalable_mode_support() && intel_cap_pasid_sanity(); 123 + } 124 + 125 + static inline bool nested_mode_support(void) 126 + { 127 + return scalable_mode_support() && intel_cap_nest_sanity(); 128 + } 129 + 130 + int intel_cap_audit(enum cap_audit_type type, struct intel_iommu *iommu);
+11
drivers/iommu/intel/dmar.c
··· 31 31 #include <linux/limits.h> 32 32 #include <asm/irq_remapping.h> 33 33 #include <asm/iommu_table.h> 34 + #include <trace/events/intel_iommu.h> 34 35 35 36 #include "../irq_remapping.h" 36 37 ··· 526 525 struct acpi_dmar_reserved_memory *rmrr; 527 526 struct acpi_dmar_atsr *atsr; 528 527 struct acpi_dmar_rhsa *rhsa; 528 + struct acpi_dmar_satc *satc; 529 529 530 530 switch (header->type) { 531 531 case ACPI_DMAR_TYPE_HARDWARE_UNIT: ··· 555 553 case ACPI_DMAR_TYPE_NAMESPACE: 556 554 /* We don't print this here because we need to sanity-check 557 555 it first. So print it in dmar_parse_one_andd() instead. */ 556 + break; 557 + case ACPI_DMAR_TYPE_SATC: 558 + satc = container_of(header, struct acpi_dmar_satc, header); 559 + pr_info("SATC flags: 0x%x\n", satc->flags); 558 560 break; 559 561 } 560 562 } ··· 647 641 .cb[ACPI_DMAR_TYPE_ROOT_ATS] = &dmar_parse_one_atsr, 648 642 .cb[ACPI_DMAR_TYPE_HARDWARE_AFFINITY] = &dmar_parse_one_rhsa, 649 643 .cb[ACPI_DMAR_TYPE_NAMESPACE] = &dmar_parse_one_andd, 644 + .cb[ACPI_DMAR_TYPE_SATC] = &dmar_parse_one_satc, 650 645 }; 651 646 652 647 /* ··· 1314 1307 offset = ((index + i) % QI_LENGTH) << shift; 1315 1308 memcpy(qi->desc + offset, &desc[i], 1 << shift); 1316 1309 qi->desc_status[(index + i) % QI_LENGTH] = QI_IN_USE; 1310 + trace_qi_submit(iommu, desc[i].qw0, desc[i].qw1, 1311 + desc[i].qw2, desc[i].qw3); 1317 1312 } 1318 1313 qi->desc_status[wait_index] = QI_IN_USE; 1319 1314 ··· 2083 2074 #define DMAR_DSM_FUNC_DRHD 1 2084 2075 #define DMAR_DSM_FUNC_ATSR 2 2085 2076 #define DMAR_DSM_FUNC_RHSA 3 2077 + #define DMAR_DSM_FUNC_SATC 4 2086 2078 2087 2079 static inline bool dmar_detect_dsm(acpi_handle handle, int func) 2088 2080 { ··· 2101 2091 [DMAR_DSM_FUNC_DRHD] = ACPI_DMAR_TYPE_HARDWARE_UNIT, 2102 2092 [DMAR_DSM_FUNC_ATSR] = ACPI_DMAR_TYPE_ROOT_ATS, 2103 2093 [DMAR_DSM_FUNC_RHSA] = ACPI_DMAR_TYPE_HARDWARE_AFFINITY, 2094 + [DMAR_DSM_FUNC_SATC] = ACPI_DMAR_TYPE_SATC, 2104 2095 }; 2105 2096 2106 2097 if (!dmar_detect_dsm(handle, func))
+172 -107
drivers/iommu/intel/iommu.c
··· 44 44 #include <asm/irq_remapping.h> 45 45 #include <asm/cacheflush.h> 46 46 #include <asm/iommu.h> 47 - #include <trace/events/intel_iommu.h> 48 47 49 48 #include "../irq_remapping.h" 50 49 #include "pasid.h" 50 + #include "cap_audit.h" 51 51 52 52 #define ROOT_SIZE VTD_PAGE_SIZE 53 53 #define CONTEXT_SIZE VTD_PAGE_SIZE ··· 316 316 u8 include_all:1; /* include all ports */ 317 317 }; 318 318 319 + struct dmar_satc_unit { 320 + struct list_head list; /* list of SATC units */ 321 + struct acpi_dmar_header *hdr; /* ACPI header */ 322 + struct dmar_dev_scope *devices; /* target devices */ 323 + struct intel_iommu *iommu; /* the corresponding iommu */ 324 + int devices_cnt; /* target device count */ 325 + u8 atc_required:1; /* ATS is required */ 326 + }; 327 + 319 328 static LIST_HEAD(dmar_atsr_units); 320 329 static LIST_HEAD(dmar_rmrr_units); 330 + static LIST_HEAD(dmar_satc_units); 321 331 322 332 #define for_each_rmrr_units(rmrr) \ 323 333 list_for_each_entry(rmrr, &dmar_rmrr_units, list) ··· 1027 1017 1028 1018 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE); 1029 1019 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE; 1030 - if (domain_use_first_level(domain)) 1020 + if (domain_use_first_level(domain)) { 1031 1021 pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US; 1022 + if (domain->domain.type == IOMMU_DOMAIN_DMA) 1023 + pteval |= DMA_FL_PTE_ACCESS; 1024 + } 1032 1025 if (cmpxchg64(&pte->val, 0ULL, pteval)) 1033 1026 /* Someone else set it while we were thinking; use theirs. */ 1034 1027 free_pgtable_page(tmp_page); ··· 1874 1861 */ 1875 1862 static bool first_level_by_default(void) 1876 1863 { 1877 - struct dmar_drhd_unit *drhd; 1878 - struct intel_iommu *iommu; 1879 - static int first_level_support = -1; 1880 - 1881 - if (likely(first_level_support != -1)) 1882 - return first_level_support; 1883 - 1884 - first_level_support = 1; 1885 - 1886 - rcu_read_lock(); 1887 - for_each_active_iommu(iommu, drhd) { 1888 - if (!sm_supported(iommu) || !ecap_flts(iommu->ecap)) { 1889 - first_level_support = 0; 1890 - break; 1891 - } 1892 - } 1893 - rcu_read_unlock(); 1894 - 1895 - return first_level_support; 1864 + return scalable_mode_support() && intel_cap_flts_sanity(); 1896 1865 } 1897 1866 1898 1867 static struct dmar_domain *alloc_domain(int flags) ··· 2293 2298 __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, 2294 2299 unsigned long phys_pfn, unsigned long nr_pages, int prot) 2295 2300 { 2296 - struct dma_pte *first_pte = NULL, *pte = NULL; 2297 2301 unsigned int largepage_lvl = 0; 2298 2302 unsigned long lvl_pages = 0; 2303 + struct dma_pte *pte = NULL; 2299 2304 phys_addr_t pteval; 2300 2305 u64 attr; 2301 2306 ··· 2305 2310 return -EINVAL; 2306 2311 2307 2312 attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP); 2308 - if (domain_use_first_level(domain)) 2313 + if (domain_use_first_level(domain)) { 2309 2314 attr |= DMA_FL_PTE_PRESENT | DMA_FL_PTE_XD | DMA_FL_PTE_US; 2315 + 2316 + if (domain->domain.type == IOMMU_DOMAIN_DMA) { 2317 + attr |= DMA_FL_PTE_ACCESS; 2318 + if (prot & DMA_PTE_WRITE) 2319 + attr |= DMA_FL_PTE_DIRTY; 2320 + } 2321 + } 2310 2322 2311 2323 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr; 2312 2324 ··· 2324 2322 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, 2325 2323 phys_pfn, nr_pages); 2326 2324 2327 - first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl); 2325 + pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl); 2328 2326 if (!pte) 2329 2327 return -ENOMEM; 2330 2328 /* It is large page*/ ··· 2385 2383 * recalculate 'pte' and switch back to smaller pages for the 2386 2384 * end of the mapping, if the trailing size is not enough to 2387 2385 * use another superpage (i.e. nr_pages < lvl_pages). 2386 + * 2387 + * We leave clflush for the leaf pte changes to iotlb_sync_map() 2388 + * callback. 2388 2389 */ 2389 2390 pte++; 2390 2391 if (!nr_pages || first_pte_in_page(pte) || 2391 - (largepage_lvl > 1 && nr_pages < lvl_pages)) { 2392 - domain_flush_cache(domain, first_pte, 2393 - (void *)pte - (void *)first_pte); 2392 + (largepage_lvl > 1 && nr_pages < lvl_pages)) 2394 2393 pte = NULL; 2395 - } 2396 - } 2397 - 2398 - return 0; 2399 - } 2400 - 2401 - static int 2402 - domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, 2403 - unsigned long phys_pfn, unsigned long nr_pages, int prot) 2404 - { 2405 - int iommu_id, ret; 2406 - struct intel_iommu *iommu; 2407 - 2408 - /* Do the real mapping first */ 2409 - ret = __domain_mapping(domain, iov_pfn, phys_pfn, nr_pages, prot); 2410 - if (ret) 2411 - return ret; 2412 - 2413 - for_each_domain_iommu(iommu_id, domain) { 2414 - iommu = g_iommus[iommu_id]; 2415 - __mapping_notify_one(iommu, domain, iov_pfn, nr_pages); 2416 2394 } 2417 2395 2418 2396 return 0; ··· 3179 3197 goto error; 3180 3198 } 3181 3199 3200 + ret = intel_cap_audit(CAP_AUDIT_STATIC_DMAR, NULL); 3201 + if (ret) 3202 + goto free_iommu; 3203 + 3182 3204 for_each_iommu(iommu, drhd) { 3183 3205 if (drhd->ignored) { 3184 3206 iommu_disable_translation(iommu); ··· 3726 3740 return 0; 3727 3741 } 3728 3742 3743 + static struct dmar_satc_unit *dmar_find_satc(struct acpi_dmar_satc *satc) 3744 + { 3745 + struct dmar_satc_unit *satcu; 3746 + struct acpi_dmar_satc *tmp; 3747 + 3748 + list_for_each_entry_rcu(satcu, &dmar_satc_units, list, 3749 + dmar_rcu_check()) { 3750 + tmp = (struct acpi_dmar_satc *)satcu->hdr; 3751 + if (satc->segment != tmp->segment) 3752 + continue; 3753 + if (satc->header.length != tmp->header.length) 3754 + continue; 3755 + if (memcmp(satc, tmp, satc->header.length) == 0) 3756 + return satcu; 3757 + } 3758 + 3759 + return NULL; 3760 + } 3761 + 3762 + int dmar_parse_one_satc(struct acpi_dmar_header *hdr, void *arg) 3763 + { 3764 + struct acpi_dmar_satc *satc; 3765 + struct dmar_satc_unit *satcu; 3766 + 3767 + if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled) 3768 + return 0; 3769 + 3770 + satc = container_of(hdr, struct acpi_dmar_satc, header); 3771 + satcu = dmar_find_satc(satc); 3772 + if (satcu) 3773 + return 0; 3774 + 3775 + satcu = kzalloc(sizeof(*satcu) + hdr->length, GFP_KERNEL); 3776 + if (!satcu) 3777 + return -ENOMEM; 3778 + 3779 + satcu->hdr = (void *)(satcu + 1); 3780 + memcpy(satcu->hdr, hdr, hdr->length); 3781 + satcu->atc_required = satc->flags & 0x1; 3782 + satcu->devices = dmar_alloc_dev_scope((void *)(satc + 1), 3783 + (void *)satc + satc->header.length, 3784 + &satcu->devices_cnt); 3785 + if (satcu->devices_cnt && !satcu->devices) { 3786 + kfree(satcu); 3787 + return -ENOMEM; 3788 + } 3789 + list_add_rcu(&satcu->list, &dmar_satc_units); 3790 + 3791 + return 0; 3792 + } 3793 + 3729 3794 static int intel_iommu_add(struct dmar_drhd_unit *dmaru) 3730 3795 { 3731 3796 int sp, ret; ··· 3784 3747 3785 3748 if (g_iommus[iommu->seq_id]) 3786 3749 return 0; 3750 + 3751 + ret = intel_cap_audit(CAP_AUDIT_HOTPLUG_DMAR, iommu); 3752 + if (ret) 3753 + goto out; 3787 3754 3788 3755 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) { 3789 3756 pr_warn("%s: Doesn't support hardware pass through.\n", ··· 3884 3843 { 3885 3844 struct dmar_rmrr_unit *rmrru, *rmrr_n; 3886 3845 struct dmar_atsr_unit *atsru, *atsr_n; 3846 + struct dmar_satc_unit *satcu, *satc_n; 3887 3847 3888 3848 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) { 3889 3849 list_del(&rmrru->list); ··· 3895 3853 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) { 3896 3854 list_del(&atsru->list); 3897 3855 intel_iommu_free_atsr(atsru); 3856 + } 3857 + list_for_each_entry_safe(satcu, satc_n, &dmar_satc_units, list) { 3858 + list_del(&satcu->list); 3859 + dmar_free_dev_scope(&satcu->devices, &satcu->devices_cnt); 3860 + kfree(satcu); 3898 3861 } 3899 3862 } 3900 3863 ··· 3952 3905 int ret; 3953 3906 struct dmar_rmrr_unit *rmrru; 3954 3907 struct dmar_atsr_unit *atsru; 3908 + struct dmar_satc_unit *satcu; 3955 3909 struct acpi_dmar_atsr *atsr; 3956 3910 struct acpi_dmar_reserved_memory *rmrr; 3911 + struct acpi_dmar_satc *satc; 3957 3912 3958 3913 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING) 3959 3914 return 0; ··· 3993 3944 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) { 3994 3945 if (dmar_remove_dev_scope(info, atsr->segment, 3995 3946 atsru->devices, atsru->devices_cnt)) 3947 + break; 3948 + } 3949 + } 3950 + list_for_each_entry(satcu, &dmar_satc_units, list) { 3951 + satc = container_of(satcu->hdr, struct acpi_dmar_satc, header); 3952 + if (info->event == BUS_NOTIFY_ADD_DEVICE) { 3953 + ret = dmar_insert_dev_scope(info, (void *)(satc + 1), 3954 + (void *)satc + satc->header.length, 3955 + satc->segment, satcu->devices, 3956 + satcu->devices_cnt); 3957 + if (ret > 0) 3958 + break; 3959 + else if (ret < 0) 3960 + return ret; 3961 + } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) { 3962 + if (dmar_remove_dev_scope(info, satc->segment, 3963 + satcu->devices, satcu->devices_cnt)) 3996 3964 break; 3997 3965 } 3998 3966 } ··· 4355 4289 4356 4290 if (list_empty(&dmar_atsr_units)) 4357 4291 pr_info("No ATSR found\n"); 4292 + 4293 + if (list_empty(&dmar_satc_units)) 4294 + pr_info("No SATC found\n"); 4358 4295 4359 4296 if (dmar_map_gfx) 4360 4297 intel_iommu_gfx_mapped = 1; ··· 5012 4943 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 5013 4944 u64 max_addr; 5014 4945 int prot = 0; 5015 - int ret; 5016 4946 5017 4947 if (iommu_prot & IOMMU_READ) 5018 4948 prot |= DMA_PTE_READ; ··· 5037 4969 /* Round up size to next multiple of PAGE_SIZE, if it and 5038 4970 the low bits of hpa would take us onto the next page */ 5039 4971 size = aligned_nrpages(hpa, size); 5040 - ret = domain_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT, 5041 - hpa >> VTD_PAGE_SHIFT, size, prot); 5042 - return ret; 4972 + return __domain_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT, 4973 + hpa >> VTD_PAGE_SHIFT, size, prot); 5043 4974 } 5044 4975 5045 4976 static size_t intel_iommu_unmap(struct iommu_domain *domain, ··· 5105 5038 VTD_PAGE_SHIFT) - 1)); 5106 5039 5107 5040 return phys; 5108 - } 5109 - 5110 - static inline bool scalable_mode_support(void) 5111 - { 5112 - struct dmar_drhd_unit *drhd; 5113 - struct intel_iommu *iommu; 5114 - bool ret = true; 5115 - 5116 - rcu_read_lock(); 5117 - for_each_active_iommu(iommu, drhd) { 5118 - if (!sm_supported(iommu)) { 5119 - ret = false; 5120 - break; 5121 - } 5122 - } 5123 - rcu_read_unlock(); 5124 - 5125 - return ret; 5126 - } 5127 - 5128 - static inline bool iommu_pasid_support(void) 5129 - { 5130 - struct dmar_drhd_unit *drhd; 5131 - struct intel_iommu *iommu; 5132 - bool ret = true; 5133 - 5134 - rcu_read_lock(); 5135 - for_each_active_iommu(iommu, drhd) { 5136 - if (!pasid_supported(iommu)) { 5137 - ret = false; 5138 - break; 5139 - } 5140 - } 5141 - rcu_read_unlock(); 5142 - 5143 - return ret; 5144 - } 5145 - 5146 - static inline bool nested_mode_support(void) 5147 - { 5148 - struct dmar_drhd_unit *drhd; 5149 - struct intel_iommu *iommu; 5150 - bool ret = true; 5151 - 5152 - rcu_read_lock(); 5153 - for_each_active_iommu(iommu, drhd) { 5154 - if (!sm_supported(iommu) || !ecap_nest(iommu->ecap)) { 5155 - ret = false; 5156 - break; 5157 - } 5158 - } 5159 - rcu_read_unlock(); 5160 - 5161 - return ret; 5162 5041 } 5163 5042 5164 5043 static bool intel_iommu_capable(enum iommu_cap cap) ··· 5347 5334 int ret; 5348 5335 5349 5336 if (!dev_is_pci(dev) || dmar_disabled || 5350 - !scalable_mode_support() || !iommu_pasid_support()) 5337 + !scalable_mode_support() || !pasid_mode_support()) 5351 5338 return false; 5352 5339 5353 5340 ret = pci_pasid_features(to_pci_dev(dev)); ··· 5521 5508 return false; 5522 5509 } 5523 5510 5511 + static void clflush_sync_map(struct dmar_domain *domain, unsigned long clf_pfn, 5512 + unsigned long clf_pages) 5513 + { 5514 + struct dma_pte *first_pte = NULL, *pte = NULL; 5515 + unsigned long lvl_pages = 0; 5516 + int level = 0; 5517 + 5518 + while (clf_pages > 0) { 5519 + if (!pte) { 5520 + level = 0; 5521 + pte = pfn_to_dma_pte(domain, clf_pfn, &level); 5522 + if (WARN_ON(!pte)) 5523 + return; 5524 + first_pte = pte; 5525 + lvl_pages = lvl_to_nr_pages(level); 5526 + } 5527 + 5528 + if (WARN_ON(!lvl_pages || clf_pages < lvl_pages)) 5529 + return; 5530 + 5531 + clf_pages -= lvl_pages; 5532 + clf_pfn += lvl_pages; 5533 + pte++; 5534 + 5535 + if (!clf_pages || first_pte_in_page(pte) || 5536 + (level > 1 && clf_pages < lvl_pages)) { 5537 + domain_flush_cache(domain, first_pte, 5538 + (void *)pte - (void *)first_pte); 5539 + pte = NULL; 5540 + } 5541 + } 5542 + } 5543 + 5544 + static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain, 5545 + unsigned long iova, size_t size) 5546 + { 5547 + struct dmar_domain *dmar_domain = to_dmar_domain(domain); 5548 + unsigned long pages = aligned_nrpages(iova, size); 5549 + unsigned long pfn = iova >> VTD_PAGE_SHIFT; 5550 + struct intel_iommu *iommu; 5551 + int iommu_id; 5552 + 5553 + if (!dmar_domain->iommu_coherency) 5554 + clflush_sync_map(dmar_domain, pfn, pages); 5555 + 5556 + for_each_domain_iommu(iommu_id, dmar_domain) { 5557 + iommu = g_iommus[iommu_id]; 5558 + __mapping_notify_one(iommu, dmar_domain, pfn, pages); 5559 + } 5560 + } 5561 + 5524 5562 const struct iommu_ops intel_iommu_ops = { 5525 5563 .capable = intel_iommu_capable, 5526 5564 .domain_alloc = intel_iommu_domain_alloc, ··· 5584 5520 .aux_detach_dev = intel_iommu_aux_detach_device, 5585 5521 .aux_get_pasid = intel_iommu_aux_get_pasid, 5586 5522 .map = intel_iommu_map, 5523 + .iotlb_sync_map = intel_iommu_iotlb_sync_map, 5587 5524 .unmap = intel_iommu_unmap, 5588 5525 .flush_iotlb_all = intel_flush_iotlb_all, 5589 5526 .iotlb_sync = intel_iommu_tlb_sync,
+8
drivers/iommu/intel/irq_remapping.c
··· 22 22 #include <asm/pci-direct.h> 23 23 24 24 #include "../irq_remapping.h" 25 + #include "cap_audit.h" 25 26 26 27 enum irq_mode { 27 28 IRQ_REMAPPING, ··· 735 734 if (dmar_table_init() < 0) 736 735 return -ENODEV; 737 736 737 + if (intel_cap_audit(CAP_AUDIT_STATIC_IRQR, NULL)) 738 + goto error; 739 + 738 740 if (!dmar_ir_support()) 739 741 return -ENODEV; 740 742 ··· 1442 1438 { 1443 1439 int ret; 1444 1440 int eim = x2apic_enabled(); 1441 + 1442 + ret = intel_cap_audit(CAP_AUDIT_HOTPLUG_IRQR, iommu); 1443 + if (ret) 1444 + return ret; 1445 1445 1446 1446 if (eim && !ecap_eim_support(iommu->ecap)) { 1447 1447 pr_info("DRHD %Lx: EIM not supported by DRHD, ecap %Lx\n",
+2 -16
drivers/iommu/intel/pasid.c
··· 457 457 } 458 458 459 459 static void 460 - iotlb_invalidation_with_pasid(struct intel_iommu *iommu, u16 did, u32 pasid) 461 - { 462 - struct qi_desc desc; 463 - 464 - desc.qw0 = QI_EIOTLB_PASID(pasid) | QI_EIOTLB_DID(did) | 465 - QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE; 466 - desc.qw1 = 0; 467 - desc.qw2 = 0; 468 - desc.qw3 = 0; 469 - 470 - qi_submit_sync(iommu, &desc, 1, 0); 471 - } 472 - 473 - static void 474 460 devtlb_invalidation_with_pasid(struct intel_iommu *iommu, 475 461 struct device *dev, u32 pasid) 476 462 { ··· 500 514 clflush_cache_range(pte, sizeof(*pte)); 501 515 502 516 pasid_cache_invalidation_with_pasid(iommu, did, pasid); 503 - iotlb_invalidation_with_pasid(iommu, did, pasid); 517 + qi_flush_piotlb(iommu, did, pasid, 0, -1, 0); 504 518 505 519 /* Device IOTLB doesn't need to be flushed in caching mode. */ 506 520 if (!cap_caching_mode(iommu->cap)) ··· 516 530 517 531 if (cap_caching_mode(iommu->cap)) { 518 532 pasid_cache_invalidation_with_pasid(iommu, did, pasid); 519 - iotlb_invalidation_with_pasid(iommu, did, pasid); 533 + qi_flush_piotlb(iommu, did, pasid, 0, -1, 0); 520 534 } else { 521 535 iommu_flush_write_buffer(iommu); 522 536 }
+20 -51
drivers/iommu/intel/svm.c
··· 123 123 unsigned long address, 124 124 unsigned long pages, int ih) 125 125 { 126 - struct qi_desc desc; 126 + struct device_domain_info *info = get_domain_info(sdev->dev); 127 127 128 - if (pages == -1) { 129 - desc.qw0 = QI_EIOTLB_PASID(svm->pasid) | 130 - QI_EIOTLB_DID(sdev->did) | 131 - QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | 132 - QI_EIOTLB_TYPE; 133 - desc.qw1 = 0; 134 - } else { 135 - int mask = ilog2(__roundup_pow_of_two(pages)); 128 + if (WARN_ON(!pages)) 129 + return; 136 130 137 - desc.qw0 = QI_EIOTLB_PASID(svm->pasid) | 138 - QI_EIOTLB_DID(sdev->did) | 139 - QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | 140 - QI_EIOTLB_TYPE; 141 - desc.qw1 = QI_EIOTLB_ADDR(address) | 142 - QI_EIOTLB_IH(ih) | 143 - QI_EIOTLB_AM(mask); 144 - } 145 - desc.qw2 = 0; 146 - desc.qw3 = 0; 147 - qi_submit_sync(sdev->iommu, &desc, 1, 0); 148 - 149 - if (sdev->dev_iotlb) { 150 - desc.qw0 = QI_DEV_EIOTLB_PASID(svm->pasid) | 151 - QI_DEV_EIOTLB_SID(sdev->sid) | 152 - QI_DEV_EIOTLB_QDEP(sdev->qdep) | 153 - QI_DEIOTLB_TYPE; 154 - if (pages == -1) { 155 - desc.qw1 = QI_DEV_EIOTLB_ADDR(-1ULL >> 1) | 156 - QI_DEV_EIOTLB_SIZE; 157 - } else if (pages > 1) { 158 - /* The least significant zero bit indicates the size. So, 159 - * for example, an "address" value of 0x12345f000 will 160 - * flush from 0x123440000 to 0x12347ffff (256KiB). */ 161 - unsigned long last = address + ((unsigned long)(pages - 1) << VTD_PAGE_SHIFT); 162 - unsigned long mask = __rounddown_pow_of_two(address ^ last); 163 - 164 - desc.qw1 = QI_DEV_EIOTLB_ADDR((address & ~mask) | 165 - (mask - 1)) | QI_DEV_EIOTLB_SIZE; 166 - } else { 167 - desc.qw1 = QI_DEV_EIOTLB_ADDR(address); 168 - } 169 - desc.qw2 = 0; 170 - desc.qw3 = 0; 171 - qi_submit_sync(sdev->iommu, &desc, 1, 0); 172 - } 131 + qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, address, pages, ih); 132 + if (info->ats_enabled) 133 + qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid, 134 + svm->pasid, sdev->qdep, address, 135 + order_base_2(pages)); 173 136 } 174 137 175 138 static void intel_flush_svm_range_dev(struct intel_svm *svm, ··· 911 948 u64 address; 912 949 913 950 handled = 1; 914 - 915 951 req = &iommu->prq[head / sizeof(*req)]; 916 - 917 - result = QI_RESP_FAILURE; 952 + result = QI_RESP_INVALID; 918 953 address = (u64)req->addr << VTD_PAGE_SHIFT; 919 954 if (!req->pasid_present) { 920 955 pr_err("%s: Page request without PASID: %08llx %08llx\n", ··· 950 989 rcu_read_unlock(); 951 990 } 952 991 953 - result = QI_RESP_INVALID; 954 992 /* Since we're using init_mm.pgd directly, we should never take 955 993 * any faults on kernel addresses. */ 956 994 if (!svm->mm) ··· 1039 1079 * Clear the page request overflow bit and wake up all threads that 1040 1080 * are waiting for the completion of this handling. 1041 1081 */ 1042 - if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) 1043 - writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG); 1082 + if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) { 1083 + pr_info_ratelimited("IOMMU: %s: PRQ overflow detected\n", 1084 + iommu->name); 1085 + head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; 1086 + tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; 1087 + if (head == tail) { 1088 + writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG); 1089 + pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared", 1090 + iommu->name); 1091 + } 1092 + } 1044 1093 1045 1094 if (!completion_done(&iommu->prq_complete)) 1046 1095 complete(&iommu->prq_complete);
+31 -34
drivers/iommu/io-pgtable-arm-v7s.c
··· 44 44 45 45 /* 46 46 * We have 32 bits total; 12 bits resolved at level 1, 8 bits at level 2, 47 - * and 12 bits in a page. With some carefully-chosen coefficients we can 48 - * hide the ugly inconsistencies behind these macros and at least let the 49 - * rest of the code pretend to be somewhat sane. 47 + * and 12 bits in a page. 48 + * MediaTek extend 2 bits to reach 34bits, 14 bits at lvl1 and 8 bits at lvl2. 50 49 */ 51 50 #define ARM_V7S_ADDR_BITS 32 52 - #define _ARM_V7S_LVL_BITS(lvl) (16 - (lvl) * 4) 53 - #define ARM_V7S_LVL_SHIFT(lvl) (ARM_V7S_ADDR_BITS - (4 + 8 * (lvl))) 51 + #define _ARM_V7S_LVL_BITS(lvl, cfg) ((lvl) == 1 ? ((cfg)->ias - 20) : 8) 52 + #define ARM_V7S_LVL_SHIFT(lvl) ((lvl) == 1 ? 20 : 12) 54 53 #define ARM_V7S_TABLE_SHIFT 10 55 54 56 - #define ARM_V7S_PTES_PER_LVL(lvl) (1 << _ARM_V7S_LVL_BITS(lvl)) 57 - #define ARM_V7S_TABLE_SIZE(lvl) \ 58 - (ARM_V7S_PTES_PER_LVL(lvl) * sizeof(arm_v7s_iopte)) 55 + #define ARM_V7S_PTES_PER_LVL(lvl, cfg) (1 << _ARM_V7S_LVL_BITS(lvl, cfg)) 56 + #define ARM_V7S_TABLE_SIZE(lvl, cfg) \ 57 + (ARM_V7S_PTES_PER_LVL(lvl, cfg) * sizeof(arm_v7s_iopte)) 59 58 60 59 #define ARM_V7S_BLOCK_SIZE(lvl) (1UL << ARM_V7S_LVL_SHIFT(lvl)) 61 60 #define ARM_V7S_LVL_MASK(lvl) ((u32)(~0U << ARM_V7S_LVL_SHIFT(lvl))) 62 61 #define ARM_V7S_TABLE_MASK ((u32)(~0U << ARM_V7S_TABLE_SHIFT)) 63 - #define _ARM_V7S_IDX_MASK(lvl) (ARM_V7S_PTES_PER_LVL(lvl) - 1) 64 - #define ARM_V7S_LVL_IDX(addr, lvl) ({ \ 62 + #define _ARM_V7S_IDX_MASK(lvl, cfg) (ARM_V7S_PTES_PER_LVL(lvl, cfg) - 1) 63 + #define ARM_V7S_LVL_IDX(addr, lvl, cfg) ({ \ 65 64 int _l = lvl; \ 66 - ((u32)(addr) >> ARM_V7S_LVL_SHIFT(_l)) & _ARM_V7S_IDX_MASK(_l); \ 65 + ((addr) >> ARM_V7S_LVL_SHIFT(_l)) & _ARM_V7S_IDX_MASK(_l, cfg); \ 67 66 }) 68 67 69 68 /* ··· 111 112 #define ARM_V7S_TEX_MASK 0x7 112 113 #define ARM_V7S_ATTR_TEX(val) (((val) & ARM_V7S_TEX_MASK) << ARM_V7S_TEX_SHIFT) 113 114 114 - /* MediaTek extend the two bits for PA 32bit/33bit */ 115 + /* MediaTek extend the bits below for PA 32bit/33bit/34bit */ 115 116 #define ARM_V7S_ATTR_MTK_PA_BIT32 BIT(9) 116 117 #define ARM_V7S_ATTR_MTK_PA_BIT33 BIT(4) 118 + #define ARM_V7S_ATTR_MTK_PA_BIT34 BIT(5) 117 119 118 120 /* *well, except for TEX on level 2 large pages, of course :( */ 119 121 #define ARM_V7S_CONT_PAGE_TEX_SHIFT 6 ··· 194 194 pte |= ARM_V7S_ATTR_MTK_PA_BIT32; 195 195 if (paddr & BIT_ULL(33)) 196 196 pte |= ARM_V7S_ATTR_MTK_PA_BIT33; 197 + if (paddr & BIT_ULL(34)) 198 + pte |= ARM_V7S_ATTR_MTK_PA_BIT34; 197 199 return pte; 198 200 } 199 201 ··· 220 218 paddr |= BIT_ULL(32); 221 219 if (pte & ARM_V7S_ATTR_MTK_PA_BIT33) 222 220 paddr |= BIT_ULL(33); 221 + if (pte & ARM_V7S_ATTR_MTK_PA_BIT34) 222 + paddr |= BIT_ULL(34); 223 223 return paddr; 224 224 } 225 225 ··· 238 234 struct device *dev = cfg->iommu_dev; 239 235 phys_addr_t phys; 240 236 dma_addr_t dma; 241 - size_t size = ARM_V7S_TABLE_SIZE(lvl); 237 + size_t size = ARM_V7S_TABLE_SIZE(lvl, cfg); 242 238 void *table = NULL; 243 239 244 240 if (lvl == 1) ··· 284 280 { 285 281 struct io_pgtable_cfg *cfg = &data->iop.cfg; 286 282 struct device *dev = cfg->iommu_dev; 287 - size_t size = ARM_V7S_TABLE_SIZE(lvl); 283 + size_t size = ARM_V7S_TABLE_SIZE(lvl, cfg); 288 284 289 285 if (!cfg->coherent_walk) 290 286 dma_unmap_single(dev, __arm_v7s_dma_addr(table), size, ··· 428 424 arm_v7s_iopte *tblp; 429 425 size_t sz = ARM_V7S_BLOCK_SIZE(lvl); 430 426 431 - tblp = ptep - ARM_V7S_LVL_IDX(iova, lvl); 427 + tblp = ptep - ARM_V7S_LVL_IDX(iova, lvl, cfg); 432 428 if (WARN_ON(__arm_v7s_unmap(data, NULL, iova + i * sz, 433 429 sz, lvl, tblp) != sz)) 434 430 return -EINVAL; ··· 481 477 int num_entries = size >> ARM_V7S_LVL_SHIFT(lvl); 482 478 483 479 /* Find our entry at the current level */ 484 - ptep += ARM_V7S_LVL_IDX(iova, lvl); 480 + ptep += ARM_V7S_LVL_IDX(iova, lvl, cfg); 485 481 486 482 /* If we can install a leaf entry at this level, then do so */ 487 483 if (num_entries) ··· 523 519 phys_addr_t paddr, size_t size, int prot, gfp_t gfp) 524 520 { 525 521 struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops); 526 - struct io_pgtable *iop = &data->iop; 527 522 int ret; 528 523 529 524 if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias) || ··· 538 535 * Synchronise all PTE updates for the new mapping before there's 539 536 * a chance for anything to kick off a table walk for the new iova. 540 537 */ 541 - if (iop->cfg.quirks & IO_PGTABLE_QUIRK_TLBI_ON_MAP) { 542 - io_pgtable_tlb_flush_walk(iop, iova, size, 543 - ARM_V7S_BLOCK_SIZE(2)); 544 - } else { 545 - wmb(); 546 - } 538 + wmb(); 547 539 548 540 return ret; 549 541 } ··· 548 550 struct arm_v7s_io_pgtable *data = io_pgtable_to_data(iop); 549 551 int i; 550 552 551 - for (i = 0; i < ARM_V7S_PTES_PER_LVL(1); i++) { 553 + for (i = 0; i < ARM_V7S_PTES_PER_LVL(1, &data->iop.cfg); i++) { 552 554 arm_v7s_iopte pte = data->pgd[i]; 553 555 554 556 if (ARM_V7S_PTE_IS_TABLE(pte, 1)) ··· 600 602 if (!tablep) 601 603 return 0; /* Bytes unmapped */ 602 604 603 - num_ptes = ARM_V7S_PTES_PER_LVL(2); 605 + num_ptes = ARM_V7S_PTES_PER_LVL(2, cfg); 604 606 num_entries = size >> ARM_V7S_LVL_SHIFT(2); 605 - unmap_idx = ARM_V7S_LVL_IDX(iova, 2); 607 + unmap_idx = ARM_V7S_LVL_IDX(iova, 2, cfg); 606 608 607 609 pte = arm_v7s_prot_to_pte(arm_v7s_pte_to_prot(blk_pte, 1), 2, cfg); 608 610 if (num_entries > 1) ··· 644 646 if (WARN_ON(lvl > 2)) 645 647 return 0; 646 648 647 - idx = ARM_V7S_LVL_IDX(iova, lvl); 649 + idx = ARM_V7S_LVL_IDX(iova, lvl, &iop->cfg); 648 650 ptep += idx; 649 651 do { 650 652 pte[i] = READ_ONCE(ptep[i]); ··· 715 717 { 716 718 struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops); 717 719 718 - if (WARN_ON(upper_32_bits(iova))) 720 + if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias))) 719 721 return 0; 720 722 721 723 return __arm_v7s_unmap(data, gather, iova, size, 1, data->pgd); ··· 730 732 u32 mask; 731 733 732 734 do { 733 - ptep += ARM_V7S_LVL_IDX(iova, ++lvl); 735 + ptep += ARM_V7S_LVL_IDX(iova, ++lvl, &data->iop.cfg); 734 736 pte = READ_ONCE(*ptep); 735 737 ptep = iopte_deref(pte, lvl, data); 736 738 } while (ARM_V7S_PTE_IS_TABLE(pte, lvl)); ··· 749 751 { 750 752 struct arm_v7s_io_pgtable *data; 751 753 752 - if (cfg->ias > ARM_V7S_ADDR_BITS) 754 + if (cfg->ias > (arm_v7s_is_mtk_enabled(cfg) ? 34 : ARM_V7S_ADDR_BITS)) 753 755 return NULL; 754 756 755 - if (cfg->oas > (arm_v7s_is_mtk_enabled(cfg) ? 34 : ARM_V7S_ADDR_BITS)) 757 + if (cfg->oas > (arm_v7s_is_mtk_enabled(cfg) ? 35 : ARM_V7S_ADDR_BITS)) 756 758 return NULL; 757 759 758 760 if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | 759 761 IO_PGTABLE_QUIRK_NO_PERMS | 760 - IO_PGTABLE_QUIRK_TLBI_ON_MAP | 761 762 IO_PGTABLE_QUIRK_ARM_MTK_EXT | 762 763 IO_PGTABLE_QUIRK_NON_STRICT)) 763 764 return NULL; ··· 772 775 773 776 spin_lock_init(&data->split_lock); 774 777 data->l2_tables = kmem_cache_create("io-pgtable_armv7s_l2", 775 - ARM_V7S_TABLE_SIZE(2), 776 - ARM_V7S_TABLE_SIZE(2), 778 + ARM_V7S_TABLE_SIZE(2, cfg), 779 + ARM_V7S_TABLE_SIZE(2, cfg), 777 780 ARM_V7S_TABLE_SLAB_FLAGS, NULL); 778 781 if (!data->l2_tables) 779 782 goto out_free_data;
+3
drivers/iommu/io-pgtable.c
··· 24 24 #ifdef CONFIG_IOMMU_IO_PGTABLE_ARMV7S 25 25 [ARM_V7S] = &io_pgtable_arm_v7s_init_fns, 26 26 #endif 27 + #ifdef CONFIG_AMD_IOMMU 28 + [AMD_IOMMU_V1] = &io_pgtable_amd_iommu_v1_init_fns, 29 + #endif 27 30 }; 28 31 29 32 struct io_pgtable_ops *alloc_io_pgtable_ops(enum io_pgtable_fmt fmt,
+28 -26
drivers/iommu/iommu.c
··· 1980 1980 } 1981 1981 EXPORT_SYMBOL_GPL(iommu_attach_device); 1982 1982 1983 + int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain) 1984 + { 1985 + const struct iommu_ops *ops = domain->ops; 1986 + 1987 + if (ops->is_attach_deferred && ops->is_attach_deferred(domain, dev)) 1988 + return __iommu_attach_device(domain, dev); 1989 + 1990 + return 0; 1991 + } 1992 + 1983 1993 /* 1984 1994 * Check flags and other user provided data for valid combinations. We also 1985 1995 * make sure no reserved fields or unused flags are set. This is to ensure ··· 2436 2426 size -= pgsize; 2437 2427 } 2438 2428 2439 - if (ops->iotlb_sync_map) 2440 - ops->iotlb_sync_map(domain); 2441 - 2442 2429 /* unroll mapping in case something went wrong */ 2443 2430 if (ret) 2444 2431 iommu_unmap(domain, orig_iova, orig_size - size); ··· 2445 2438 return ret; 2446 2439 } 2447 2440 2441 + static int _iommu_map(struct iommu_domain *domain, unsigned long iova, 2442 + phys_addr_t paddr, size_t size, int prot, gfp_t gfp) 2443 + { 2444 + const struct iommu_ops *ops = domain->ops; 2445 + int ret; 2446 + 2447 + ret = __iommu_map(domain, iova, paddr, size, prot, gfp); 2448 + if (ret == 0 && ops->iotlb_sync_map) 2449 + ops->iotlb_sync_map(domain, iova, size); 2450 + 2451 + return ret; 2452 + } 2453 + 2448 2454 int iommu_map(struct iommu_domain *domain, unsigned long iova, 2449 2455 phys_addr_t paddr, size_t size, int prot) 2450 2456 { 2451 2457 might_sleep(); 2452 - return __iommu_map(domain, iova, paddr, size, prot, GFP_KERNEL); 2458 + return _iommu_map(domain, iova, paddr, size, prot, GFP_KERNEL); 2453 2459 } 2454 2460 EXPORT_SYMBOL_GPL(iommu_map); 2455 2461 2456 2462 int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova, 2457 2463 phys_addr_t paddr, size_t size, int prot) 2458 2464 { 2459 - return __iommu_map(domain, iova, paddr, size, prot, GFP_ATOMIC); 2465 + return _iommu_map(domain, iova, paddr, size, prot, GFP_ATOMIC); 2460 2466 } 2461 2467 EXPORT_SYMBOL_GPL(iommu_map_atomic); 2462 2468 ··· 2553 2533 struct scatterlist *sg, unsigned int nents, int prot, 2554 2534 gfp_t gfp) 2555 2535 { 2536 + const struct iommu_ops *ops = domain->ops; 2556 2537 size_t len = 0, mapped = 0; 2557 2538 phys_addr_t start; 2558 2539 unsigned int i = 0; ··· 2584 2563 sg = sg_next(sg); 2585 2564 } 2586 2565 2566 + if (ops->iotlb_sync_map) 2567 + ops->iotlb_sync_map(domain, iova, mapped); 2587 2568 return mapped; 2588 2569 2589 2570 out_err: ··· 2609 2586 { 2610 2587 return __iommu_map_sg(domain, iova, sg, nents, prot, GFP_ATOMIC); 2611 2588 } 2612 - EXPORT_SYMBOL_GPL(iommu_map_sg_atomic); 2613 2589 2614 2590 int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr, 2615 2591 phys_addr_t paddr, u64 size, int prot) ··· 2620 2598 prot); 2621 2599 } 2622 2600 EXPORT_SYMBOL_GPL(iommu_domain_window_enable); 2623 - 2624 - void iommu_domain_window_disable(struct iommu_domain *domain, u32 wnd_nr) 2625 - { 2626 - if (unlikely(domain->ops->domain_window_disable == NULL)) 2627 - return; 2628 - 2629 - return domain->ops->domain_window_disable(domain, wnd_nr); 2630 - } 2631 - EXPORT_SYMBOL_GPL(iommu_domain_window_disable); 2632 2601 2633 2602 /** 2634 2603 * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework ··· 2876 2863 /* 2877 2864 * Per device IOMMU features. 2878 2865 */ 2879 - bool iommu_dev_has_feature(struct device *dev, enum iommu_dev_features feat) 2880 - { 2881 - const struct iommu_ops *ops = dev->bus->iommu_ops; 2882 - 2883 - if (ops && ops->dev_has_feat) 2884 - return ops->dev_has_feat(dev, feat); 2885 - 2886 - return false; 2887 - } 2888 - EXPORT_SYMBOL_GPL(iommu_dev_has_feature); 2889 - 2890 2866 int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) 2891 2867 { 2892 2868 const struct iommu_ops *ops = dev->bus->iommu_ops;
+1 -34
drivers/iommu/iova.c
··· 55 55 } 56 56 EXPORT_SYMBOL_GPL(init_iova_domain); 57 57 58 - bool has_iova_flush_queue(struct iova_domain *iovad) 58 + static bool has_iova_flush_queue(struct iova_domain *iovad) 59 59 { 60 60 return !!iovad->fq; 61 61 } ··· 112 112 113 113 return 0; 114 114 } 115 - EXPORT_SYMBOL_GPL(init_iova_flush_queue); 116 115 117 116 static struct rb_node * 118 117 __get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn) ··· 450 451 451 452 return new_iova->pfn_lo; 452 453 } 453 - EXPORT_SYMBOL_GPL(alloc_iova_fast); 454 454 455 455 /** 456 456 * free_iova_fast - free iova pfn range into rcache ··· 596 598 mod_timer(&iovad->fq_timer, 597 599 jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT)); 598 600 } 599 - EXPORT_SYMBOL_GPL(queue_iova); 600 601 601 602 /** 602 603 * put_iova_domain - destroys the iova domain ··· 706 709 return iova; 707 710 } 708 711 EXPORT_SYMBOL_GPL(reserve_iova); 709 - 710 - /** 711 - * copy_reserved_iova - copies the reserved between domains 712 - * @from: - source domain from where to copy 713 - * @to: - destination domin where to copy 714 - * This function copies reserved iova's from one domain to 715 - * other. 716 - */ 717 - void 718 - copy_reserved_iova(struct iova_domain *from, struct iova_domain *to) 719 - { 720 - unsigned long flags; 721 - struct rb_node *node; 722 - 723 - spin_lock_irqsave(&from->iova_rbtree_lock, flags); 724 - for (node = rb_first(&from->rbroot); node; node = rb_next(node)) { 725 - struct iova *iova = rb_entry(node, struct iova, node); 726 - struct iova *new_iova; 727 - 728 - if (iova->pfn_lo == IOVA_ANCHOR) 729 - continue; 730 - 731 - new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi); 732 - if (!new_iova) 733 - pr_err("Reserve iova range %lx@%lx failed\n", 734 - iova->pfn_lo, iova->pfn_lo); 735 - } 736 - spin_unlock_irqrestore(&from->iova_rbtree_lock, flags); 737 - } 738 - EXPORT_SYMBOL_GPL(copy_reserved_iova); 739 712 740 713 /* 741 714 * Magazine caches for IOVA ranges. For an introduction to magazines,
+23 -32
drivers/iommu/ipmmu-vmsa.c
··· 734 734 return 0; 735 735 } 736 736 737 - static const struct soc_device_attribute soc_rcar_gen3[] = { 737 + static const struct soc_device_attribute soc_needs_opt_in[] = { 738 + { .family = "R-Car Gen3", }, 739 + { .family = "RZ/G2", }, 740 + { /* sentinel */ } 741 + }; 742 + 743 + static const struct soc_device_attribute soc_denylist[] = { 738 744 { .soc_id = "r8a774a1", }, 739 - { .soc_id = "r8a774b1", }, 740 - { .soc_id = "r8a774c0", }, 741 - { .soc_id = "r8a774e1", }, 742 - { .soc_id = "r8a7795", }, 743 - { .soc_id = "r8a77961", }, 745 + { .soc_id = "r8a7795", .revision = "ES1.*" }, 746 + { .soc_id = "r8a7795", .revision = "ES2.*" }, 744 747 { .soc_id = "r8a7796", }, 745 - { .soc_id = "r8a77965", }, 746 - { .soc_id = "r8a77970", }, 747 - { .soc_id = "r8a77990", }, 748 - { .soc_id = "r8a77995", }, 749 748 { /* sentinel */ } 750 749 }; 751 750 752 - static const struct soc_device_attribute soc_rcar_gen3_whitelist[] = { 753 - { .soc_id = "r8a774b1", }, 754 - { .soc_id = "r8a774c0", }, 755 - { .soc_id = "r8a774e1", }, 756 - { .soc_id = "r8a7795", .revision = "ES3.*" }, 757 - { .soc_id = "r8a77961", }, 758 - { .soc_id = "r8a77965", }, 759 - { .soc_id = "r8a77990", }, 760 - { .soc_id = "r8a77995", }, 761 - { /* sentinel */ } 751 + static const char * const devices_allowlist[] = { 752 + "ee100000.mmc", 753 + "ee120000.mmc", 754 + "ee140000.mmc", 755 + "ee160000.mmc" 762 756 }; 763 757 764 - static const char * const rcar_gen3_slave_whitelist[] = { 765 - }; 766 - 767 - static bool ipmmu_slave_whitelist(struct device *dev) 758 + static bool ipmmu_device_is_allowed(struct device *dev) 768 759 { 769 760 unsigned int i; 770 761 771 762 /* 772 - * For R-Car Gen3 use a white list to opt-in slave devices. 763 + * R-Car Gen3 and RZ/G2 use the allow list to opt-in devices. 773 764 * For Other SoCs, this returns true anyway. 774 765 */ 775 - if (!soc_device_match(soc_rcar_gen3)) 766 + if (!soc_device_match(soc_needs_opt_in)) 776 767 return true; 777 768 778 - /* Check whether this R-Car Gen3 can use the IPMMU correctly or not */ 779 - if (!soc_device_match(soc_rcar_gen3_whitelist)) 769 + /* Check whether this SoC can use the IPMMU correctly or not */ 770 + if (soc_device_match(soc_denylist)) 780 771 return false; 781 772 782 - /* Check whether this slave device can work with the IPMMU */ 783 - for (i = 0; i < ARRAY_SIZE(rcar_gen3_slave_whitelist); i++) { 784 - if (!strcmp(dev_name(dev), rcar_gen3_slave_whitelist[i])) 773 + /* Check whether this device can work with the IPMMU */ 774 + for (i = 0; i < ARRAY_SIZE(devices_allowlist); i++) { 775 + if (!strcmp(dev_name(dev), devices_allowlist[i])) 785 776 return true; 786 777 } 787 778 ··· 783 792 static int ipmmu_of_xlate(struct device *dev, 784 793 struct of_phandle_args *spec) 785 794 { 786 - if (!ipmmu_slave_whitelist(dev)) 795 + if (!ipmmu_device_is_allowed(dev)) 787 796 return -ENODEV; 788 797 789 798 iommu_fwspec_add_ids(dev, spec->args, 1);
+9 -1
drivers/iommu/msm_iommu.c
··· 343 343 spin_lock_init(&priv->pgtlock); 344 344 345 345 priv->cfg = (struct io_pgtable_cfg) { 346 - .quirks = IO_PGTABLE_QUIRK_TLBI_ON_MAP, 347 346 .pgsize_bitmap = msm_iommu_ops.pgsize_bitmap, 348 347 .ias = 32, 349 348 .oas = 32, ··· 487 488 spin_unlock_irqrestore(&priv->pgtlock, flags); 488 489 489 490 return ret; 491 + } 492 + 493 + static void msm_iommu_sync_map(struct iommu_domain *domain, unsigned long iova, 494 + size_t size) 495 + { 496 + struct msm_priv *priv = to_msm_priv(domain); 497 + 498 + __flush_iotlb_range(iova, size, SZ_4K, false, priv); 490 499 } 491 500 492 501 static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long iova, ··· 687 680 * kick starting the other master. 688 681 */ 689 682 .iotlb_sync = NULL, 683 + .iotlb_sync_map = msm_iommu_sync_map, 690 684 .iova_to_phys = msm_iommu_iova_to_phys, 691 685 .probe_device = msm_iommu_probe_device, 692 686 .release_device = msm_iommu_release_device,
+305 -105
drivers/iommu/mtk_iommu.c
··· 3 3 * Copyright (c) 2015-2016 MediaTek Inc. 4 4 * Author: Yong Wu <yong.wu@mediatek.com> 5 5 */ 6 + #include <linux/bitfield.h> 6 7 #include <linux/bug.h> 7 8 #include <linux/clk.h> 8 9 #include <linux/component.h> 9 10 #include <linux/device.h> 11 + #include <linux/dma-direct.h> 10 12 #include <linux/dma-iommu.h> 11 13 #include <linux/err.h> 12 14 #include <linux/interrupt.h> ··· 22 20 #include <linux/of_irq.h> 23 21 #include <linux/of_platform.h> 24 22 #include <linux/platform_device.h> 23 + #include <linux/pm_runtime.h> 25 24 #include <linux/regmap.h> 26 25 #include <linux/slab.h> 27 26 #include <linux/spinlock.h> ··· 91 88 #define F_REG_MMU1_FAULT_MASK GENMASK(13, 7) 92 89 93 90 #define REG_MMU0_FAULT_VA 0x13c 91 + #define F_MMU_INVAL_VA_31_12_MASK GENMASK(31, 12) 92 + #define F_MMU_INVAL_VA_34_32_MASK GENMASK(11, 9) 93 + #define F_MMU_INVAL_PA_34_32_MASK GENMASK(8, 6) 94 94 #define F_MMU_FAULT_VA_WRITE_BIT BIT(1) 95 95 #define F_MMU_FAULT_VA_LAYER_BIT BIT(0) 96 96 ··· 109 103 110 104 #define MTK_PROTECT_PA_ALIGN 256 111 105 112 - /* 113 - * Get the local arbiter ID and the portid within the larb arbiter 114 - * from mtk_m4u_id which is defined by MTK_M4U_ID. 115 - */ 116 - #define MTK_M4U_TO_LARB(id) (((id) >> 5) & 0xf) 117 - #define MTK_M4U_TO_PORT(id) ((id) & 0x1f) 118 - 119 106 #define HAS_4GB_MODE BIT(0) 120 107 /* HW will use the EMI clock if there isn't the "bclk". */ 121 108 #define HAS_BCLK BIT(1) ··· 118 119 #define HAS_SUB_COMM BIT(5) 119 120 #define WR_THROT_EN BIT(6) 120 121 #define HAS_LEGACY_IVRP_PADDR BIT(7) 122 + #define IOVA_34_EN BIT(8) 121 123 122 124 #define MTK_IOMMU_HAS_FLAG(pdata, _x) \ 123 125 ((((pdata)->flags) & (_x)) == (_x)) ··· 127 127 struct io_pgtable_cfg cfg; 128 128 struct io_pgtable_ops *iop; 129 129 130 + struct mtk_iommu_data *data; 130 131 struct iommu_domain domain; 131 132 }; 132 133 133 134 static const struct iommu_ops mtk_iommu_ops; 135 + 136 + static int mtk_iommu_hw_init(const struct mtk_iommu_data *data); 137 + 138 + #define MTK_IOMMU_TLB_ADDR(iova) ({ \ 139 + dma_addr_t _addr = iova; \ 140 + ((lower_32_bits(_addr) & GENMASK(31, 12)) | upper_32_bits(_addr));\ 141 + }) 134 142 135 143 /* 136 144 * In M4U 4GB mode, the physical address is remapped as below: ··· 168 160 169 161 #define for_each_m4u(data) list_for_each_entry(data, &m4ulist, list) 170 162 163 + struct mtk_iommu_iova_region { 164 + dma_addr_t iova_base; 165 + unsigned long long size; 166 + }; 167 + 168 + static const struct mtk_iommu_iova_region single_domain[] = { 169 + {.iova_base = 0, .size = SZ_4G}, 170 + }; 171 + 172 + static const struct mtk_iommu_iova_region mt8192_multi_dom[] = { 173 + { .iova_base = 0x0, .size = SZ_4G}, /* disp: 0 ~ 4G */ 174 + #if IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) 175 + { .iova_base = SZ_4G, .size = SZ_4G}, /* vdec: 4G ~ 8G */ 176 + { .iova_base = SZ_4G * 2, .size = SZ_4G}, /* CAM/MDP: 8G ~ 12G */ 177 + { .iova_base = 0x240000000ULL, .size = 0x4000000}, /* CCU0 */ 178 + { .iova_base = 0x244000000ULL, .size = 0x4000000}, /* CCU1 */ 179 + #endif 180 + }; 181 + 171 182 /* 172 183 * There may be 1 or 2 M4U HWs, But we always expect they are in the same domain 173 184 * for the performance. ··· 209 182 return container_of(dom, struct mtk_iommu_domain, domain); 210 183 } 211 184 212 - static void mtk_iommu_tlb_flush_all(void *cookie) 185 + static void mtk_iommu_tlb_flush_all(struct mtk_iommu_data *data) 213 186 { 214 - struct mtk_iommu_data *data = cookie; 215 - 216 187 for_each_m4u(data) { 188 + if (pm_runtime_get_if_in_use(data->dev) <= 0) 189 + continue; 190 + 217 191 writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, 218 192 data->base + data->plat_data->inv_sel_reg); 219 193 writel_relaxed(F_ALL_INVLD, data->base + REG_MMU_INVALIDATE); 220 194 wmb(); /* Make sure the tlb flush all done */ 195 + 196 + pm_runtime_put(data->dev); 221 197 } 222 198 } 223 199 224 200 static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size, 225 - size_t granule, void *cookie) 201 + size_t granule, 202 + struct mtk_iommu_data *data) 226 203 { 227 - struct mtk_iommu_data *data = cookie; 204 + bool has_pm = !!data->dev->pm_domain; 228 205 unsigned long flags; 229 206 int ret; 230 207 u32 tmp; 231 208 232 209 for_each_m4u(data) { 210 + if (has_pm) { 211 + if (pm_runtime_get_if_in_use(data->dev) <= 0) 212 + continue; 213 + } 214 + 233 215 spin_lock_irqsave(&data->tlb_lock, flags); 234 216 writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, 235 217 data->base + data->plat_data->inv_sel_reg); 236 218 237 - writel_relaxed(iova, data->base + REG_MMU_INVLD_START_A); 238 - writel_relaxed(iova + size - 1, 219 + writel_relaxed(MTK_IOMMU_TLB_ADDR(iova), 220 + data->base + REG_MMU_INVLD_START_A); 221 + writel_relaxed(MTK_IOMMU_TLB_ADDR(iova + size - 1), 239 222 data->base + REG_MMU_INVLD_END_A); 240 223 writel_relaxed(F_MMU_INV_RANGE, 241 224 data->base + REG_MMU_INVALIDATE); ··· 256 219 if (ret) { 257 220 dev_warn(data->dev, 258 221 "Partial TLB flush timed out, falling back to full flush\n"); 259 - mtk_iommu_tlb_flush_all(cookie); 222 + mtk_iommu_tlb_flush_all(data); 260 223 } 261 224 /* Clear the CPE status */ 262 225 writel_relaxed(0, data->base + REG_MMU_CPE_DONE); 263 226 spin_unlock_irqrestore(&data->tlb_lock, flags); 227 + 228 + if (has_pm) 229 + pm_runtime_put(data->dev); 264 230 } 265 231 } 266 - 267 - static void mtk_iommu_tlb_flush_page_nosync(struct iommu_iotlb_gather *gather, 268 - unsigned long iova, size_t granule, 269 - void *cookie) 270 - { 271 - struct mtk_iommu_data *data = cookie; 272 - struct iommu_domain *domain = &data->m4u_dom->domain; 273 - 274 - iommu_iotlb_gather_add_page(domain, gather, iova, granule); 275 - } 276 - 277 - static const struct iommu_flush_ops mtk_iommu_flush_ops = { 278 - .tlb_flush_all = mtk_iommu_tlb_flush_all, 279 - .tlb_flush_walk = mtk_iommu_tlb_flush_range_sync, 280 - .tlb_add_page = mtk_iommu_tlb_flush_page_nosync, 281 - }; 282 232 283 233 static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) 284 234 { 285 235 struct mtk_iommu_data *data = dev_id; 286 236 struct mtk_iommu_domain *dom = data->m4u_dom; 287 - u32 int_state, regval, fault_iova, fault_pa; 288 237 unsigned int fault_larb, fault_port, sub_comm = 0; 238 + u32 int_state, regval, va34_32, pa34_32; 239 + u64 fault_iova, fault_pa; 289 240 bool layer, write; 290 241 291 242 /* Read error info from registers */ ··· 289 264 } 290 265 layer = fault_iova & F_MMU_FAULT_VA_LAYER_BIT; 291 266 write = fault_iova & F_MMU_FAULT_VA_WRITE_BIT; 267 + if (MTK_IOMMU_HAS_FLAG(data->plat_data, IOVA_34_EN)) { 268 + va34_32 = FIELD_GET(F_MMU_INVAL_VA_34_32_MASK, fault_iova); 269 + pa34_32 = FIELD_GET(F_MMU_INVAL_PA_34_32_MASK, fault_iova); 270 + fault_iova = fault_iova & F_MMU_INVAL_VA_31_12_MASK; 271 + fault_iova |= (u64)va34_32 << 32; 272 + fault_pa |= (u64)pa34_32 << 32; 273 + } 274 + 292 275 fault_port = F_MMU_INT_ID_PORT_ID(regval); 293 276 if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_SUB_COMM)) { 294 277 fault_larb = F_MMU_INT_ID_COMM_ID(regval); ··· 310 277 write ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ)) { 311 278 dev_err_ratelimited( 312 279 data->dev, 313 - "fault type=0x%x iova=0x%x pa=0x%x larb=%d port=%d layer=%d %s\n", 280 + "fault type=0x%x iova=0x%llx pa=0x%llx larb=%d port=%d layer=%d %s\n", 314 281 int_state, fault_iova, fault_pa, fault_larb, fault_port, 315 282 layer, write ? "write" : "read"); 316 283 } ··· 325 292 return IRQ_HANDLED; 326 293 } 327 294 328 - static void mtk_iommu_config(struct mtk_iommu_data *data, 329 - struct device *dev, bool enable) 295 + static int mtk_iommu_get_domain_id(struct device *dev, 296 + const struct mtk_iommu_plat_data *plat_data) 297 + { 298 + const struct mtk_iommu_iova_region *rgn = plat_data->iova_region; 299 + const struct bus_dma_region *dma_rgn = dev->dma_range_map; 300 + int i, candidate = -1; 301 + dma_addr_t dma_end; 302 + 303 + if (!dma_rgn || plat_data->iova_region_nr == 1) 304 + return 0; 305 + 306 + dma_end = dma_rgn->dma_start + dma_rgn->size - 1; 307 + for (i = 0; i < plat_data->iova_region_nr; i++, rgn++) { 308 + /* Best fit. */ 309 + if (dma_rgn->dma_start == rgn->iova_base && 310 + dma_end == rgn->iova_base + rgn->size - 1) 311 + return i; 312 + /* ok if it is inside this region. */ 313 + if (dma_rgn->dma_start >= rgn->iova_base && 314 + dma_end < rgn->iova_base + rgn->size) 315 + candidate = i; 316 + } 317 + 318 + if (candidate >= 0) 319 + return candidate; 320 + dev_err(dev, "Can NOT find the iommu domain id(%pad 0x%llx).\n", 321 + &dma_rgn->dma_start, dma_rgn->size); 322 + return -EINVAL; 323 + } 324 + 325 + static void mtk_iommu_config(struct mtk_iommu_data *data, struct device *dev, 326 + bool enable, unsigned int domid) 330 327 { 331 328 struct mtk_smi_larb_iommu *larb_mmu; 332 329 unsigned int larbid, portid; 333 330 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 331 + const struct mtk_iommu_iova_region *region; 334 332 int i; 335 333 336 334 for (i = 0; i < fwspec->num_ids; ++i) { 337 335 larbid = MTK_M4U_TO_LARB(fwspec->ids[i]); 338 336 portid = MTK_M4U_TO_PORT(fwspec->ids[i]); 337 + 339 338 larb_mmu = &data->larb_imu[larbid]; 340 339 341 - dev_dbg(dev, "%s iommu port: %d\n", 342 - enable ? "enable" : "disable", portid); 340 + region = data->plat_data->iova_region + domid; 341 + larb_mmu->bank[portid] = upper_32_bits(region->iova_base); 342 + 343 + dev_dbg(dev, "%s iommu for larb(%s) port %d dom %d bank %d.\n", 344 + enable ? "enable" : "disable", dev_name(larb_mmu->dev), 345 + portid, domid, larb_mmu->bank[portid]); 343 346 344 347 if (enable) 345 348 larb_mmu->mmu |= MTK_SMI_MMU_EN(portid); ··· 384 315 } 385 316 } 386 317 387 - static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom) 318 + static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom, 319 + struct mtk_iommu_data *data, 320 + unsigned int domid) 388 321 { 389 - struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); 322 + const struct mtk_iommu_iova_region *region; 323 + 324 + /* Use the exist domain as there is only one pgtable here. */ 325 + if (data->m4u_dom) { 326 + dom->iop = data->m4u_dom->iop; 327 + dom->cfg = data->m4u_dom->cfg; 328 + dom->domain.pgsize_bitmap = data->m4u_dom->cfg.pgsize_bitmap; 329 + goto update_iova_region; 330 + } 390 331 391 332 dom->cfg = (struct io_pgtable_cfg) { 392 333 .quirks = IO_PGTABLE_QUIRK_ARM_NS | 393 334 IO_PGTABLE_QUIRK_NO_PERMS | 394 - IO_PGTABLE_QUIRK_TLBI_ON_MAP | 395 335 IO_PGTABLE_QUIRK_ARM_MTK_EXT, 396 336 .pgsize_bitmap = mtk_iommu_ops.pgsize_bitmap, 397 - .ias = 32, 398 - .oas = 34, 399 - .tlb = &mtk_iommu_flush_ops, 337 + .ias = MTK_IOMMU_HAS_FLAG(data->plat_data, IOVA_34_EN) ? 34 : 32, 400 338 .iommu_dev = data->dev, 401 339 }; 340 + 341 + if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_4GB_MODE)) 342 + dom->cfg.oas = data->enable_4GB ? 33 : 32; 343 + else 344 + dom->cfg.oas = 35; 402 345 403 346 dom->iop = alloc_io_pgtable_ops(ARM_V7S, &dom->cfg, data); 404 347 if (!dom->iop) { ··· 420 339 421 340 /* Update our support page sizes bitmap */ 422 341 dom->domain.pgsize_bitmap = dom->cfg.pgsize_bitmap; 342 + 343 + update_iova_region: 344 + /* Update the iova region for this domain */ 345 + region = data->plat_data->iova_region + domid; 346 + dom->domain.geometry.aperture_start = region->iova_base; 347 + dom->domain.geometry.aperture_end = region->iova_base + region->size - 1; 348 + dom->domain.geometry.force_aperture = true; 423 349 return 0; 424 350 } 425 351 ··· 441 353 if (!dom) 442 354 return NULL; 443 355 444 - if (iommu_get_dma_cookie(&dom->domain)) 445 - goto free_dom; 446 - 447 - if (mtk_iommu_domain_finalise(dom)) 448 - goto put_dma_cookie; 449 - 450 - dom->domain.geometry.aperture_start = 0; 451 - dom->domain.geometry.aperture_end = DMA_BIT_MASK(32); 452 - dom->domain.geometry.force_aperture = true; 356 + if (iommu_get_dma_cookie(&dom->domain)) { 357 + kfree(dom); 358 + return NULL; 359 + } 453 360 454 361 return &dom->domain; 455 - 456 - put_dma_cookie: 457 - iommu_put_dma_cookie(&dom->domain); 458 - free_dom: 459 - kfree(dom); 460 - return NULL; 461 362 } 462 363 463 364 static void mtk_iommu_domain_free(struct iommu_domain *domain) 464 365 { 465 - struct mtk_iommu_domain *dom = to_mtk_domain(domain); 466 - 467 - free_io_pgtable_ops(dom->iop); 468 366 iommu_put_dma_cookie(domain); 469 367 kfree(to_mtk_domain(domain)); 470 368 } ··· 460 386 { 461 387 struct mtk_iommu_data *data = dev_iommu_priv_get(dev); 462 388 struct mtk_iommu_domain *dom = to_mtk_domain(domain); 389 + struct device *m4udev = data->dev; 390 + int ret, domid; 463 391 464 - if (!data) 465 - return -ENODEV; 392 + domid = mtk_iommu_get_domain_id(dev, data->plat_data); 393 + if (domid < 0) 394 + return domid; 466 395 467 - /* Update the pgtable base address register of the M4U HW */ 468 - if (!data->m4u_dom) { 396 + if (!dom->data) { 397 + if (mtk_iommu_domain_finalise(dom, data, domid)) 398 + return -ENODEV; 399 + dom->data = data; 400 + } 401 + 402 + if (!data->m4u_dom) { /* Initialize the M4U HW */ 403 + ret = pm_runtime_resume_and_get(m4udev); 404 + if (ret < 0) 405 + return ret; 406 + 407 + ret = mtk_iommu_hw_init(data); 408 + if (ret) { 409 + pm_runtime_put(m4udev); 410 + return ret; 411 + } 469 412 data->m4u_dom = dom; 470 413 writel(dom->cfg.arm_v7s_cfg.ttbr & MMU_PT_ADDR_MASK, 471 414 data->base + REG_MMU_PT_BASE_ADDR); 415 + 416 + pm_runtime_put(m4udev); 472 417 } 473 418 474 - mtk_iommu_config(data, dev, true); 419 + mtk_iommu_config(data, dev, true, domid); 475 420 return 0; 476 421 } 477 422 ··· 499 406 { 500 407 struct mtk_iommu_data *data = dev_iommu_priv_get(dev); 501 408 502 - if (!data) 503 - return; 504 - 505 - mtk_iommu_config(data, dev, false); 409 + mtk_iommu_config(data, dev, false, 0); 506 410 } 507 411 508 412 static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova, 509 413 phys_addr_t paddr, size_t size, int prot, gfp_t gfp) 510 414 { 511 415 struct mtk_iommu_domain *dom = to_mtk_domain(domain); 512 - struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); 513 416 514 417 /* The "4GB mode" M4U physically can not use the lower remap of Dram. */ 515 - if (data->enable_4GB) 418 + if (dom->data->enable_4GB) 516 419 paddr |= BIT_ULL(32); 517 420 518 421 /* Synchronize with the tlb_lock */ ··· 520 431 struct iommu_iotlb_gather *gather) 521 432 { 522 433 struct mtk_iommu_domain *dom = to_mtk_domain(domain); 434 + unsigned long end = iova + size - 1; 523 435 436 + if (gather->start > iova) 437 + gather->start = iova; 438 + if (gather->end < end) 439 + gather->end = end; 524 440 return dom->iop->unmap(dom->iop, iova, size, gather); 525 441 } 526 442 527 443 static void mtk_iommu_flush_iotlb_all(struct iommu_domain *domain) 528 444 { 529 - mtk_iommu_tlb_flush_all(mtk_iommu_get_m4u_data()); 445 + struct mtk_iommu_domain *dom = to_mtk_domain(domain); 446 + 447 + mtk_iommu_tlb_flush_all(dom->data); 530 448 } 531 449 532 450 static void mtk_iommu_iotlb_sync(struct iommu_domain *domain, 533 451 struct iommu_iotlb_gather *gather) 534 452 { 535 - struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); 536 - size_t length = gather->end - gather->start; 537 - 538 - if (gather->start == ULONG_MAX) 539 - return; 453 + struct mtk_iommu_domain *dom = to_mtk_domain(domain); 454 + size_t length = gather->end - gather->start + 1; 540 455 541 456 mtk_iommu_tlb_flush_range_sync(gather->start, length, gather->pgsize, 542 - data); 457 + dom->data); 458 + } 459 + 460 + static void mtk_iommu_sync_map(struct iommu_domain *domain, unsigned long iova, 461 + size_t size) 462 + { 463 + struct mtk_iommu_domain *dom = to_mtk_domain(domain); 464 + 465 + mtk_iommu_tlb_flush_range_sync(iova, size, size, dom->data); 543 466 } 544 467 545 468 static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, 546 469 dma_addr_t iova) 547 470 { 548 471 struct mtk_iommu_domain *dom = to_mtk_domain(domain); 549 - struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); 550 472 phys_addr_t pa; 551 473 552 474 pa = dom->iop->iova_to_phys(dom->iop, iova); 553 - if (data->enable_4GB && pa >= MTK_IOMMU_4GB_MODE_REMAP_BASE) 475 + if (dom->data->enable_4GB && pa >= MTK_IOMMU_4GB_MODE_REMAP_BASE) 554 476 pa &= ~BIT_ULL(32); 555 477 556 478 return pa; ··· 593 493 static struct iommu_group *mtk_iommu_device_group(struct device *dev) 594 494 { 595 495 struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); 496 + struct iommu_group *group; 497 + int domid; 596 498 597 499 if (!data) 598 500 return ERR_PTR(-ENODEV); 599 501 600 - /* All the client devices are in the same m4u iommu-group */ 601 - if (!data->m4u_group) { 602 - data->m4u_group = iommu_group_alloc(); 603 - if (IS_ERR(data->m4u_group)) 604 - dev_err(dev, "Failed to allocate M4U IOMMU group\n"); 502 + domid = mtk_iommu_get_domain_id(dev, data->plat_data); 503 + if (domid < 0) 504 + return ERR_PTR(domid); 505 + 506 + group = data->m4u_group[domid]; 507 + if (!group) { 508 + group = iommu_group_alloc(); 509 + if (!IS_ERR(group)) 510 + data->m4u_group[domid] = group; 605 511 } else { 606 - iommu_group_ref_get(data->m4u_group); 512 + iommu_group_ref_get(group); 607 513 } 608 - return data->m4u_group; 514 + return group; 609 515 } 610 516 611 517 static int mtk_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) ··· 636 530 return iommu_fwspec_add_ids(dev, args->args, 1); 637 531 } 638 532 533 + static void mtk_iommu_get_resv_regions(struct device *dev, 534 + struct list_head *head) 535 + { 536 + struct mtk_iommu_data *data = dev_iommu_priv_get(dev); 537 + unsigned int domid = mtk_iommu_get_domain_id(dev, data->plat_data), i; 538 + const struct mtk_iommu_iova_region *resv, *curdom; 539 + struct iommu_resv_region *region; 540 + int prot = IOMMU_WRITE | IOMMU_READ; 541 + 542 + if ((int)domid < 0) 543 + return; 544 + curdom = data->plat_data->iova_region + domid; 545 + for (i = 0; i < data->plat_data->iova_region_nr; i++) { 546 + resv = data->plat_data->iova_region + i; 547 + 548 + /* Only reserve when the region is inside the current domain */ 549 + if (resv->iova_base <= curdom->iova_base || 550 + resv->iova_base + resv->size >= curdom->iova_base + curdom->size) 551 + continue; 552 + 553 + region = iommu_alloc_resv_region(resv->iova_base, resv->size, 554 + prot, IOMMU_RESV_RESERVED); 555 + if (!region) 556 + return; 557 + 558 + list_add_tail(&region->list, head); 559 + } 560 + } 561 + 639 562 static const struct iommu_ops mtk_iommu_ops = { 640 563 .domain_alloc = mtk_iommu_domain_alloc, 641 564 .domain_free = mtk_iommu_domain_free, ··· 674 539 .unmap = mtk_iommu_unmap, 675 540 .flush_iotlb_all = mtk_iommu_flush_iotlb_all, 676 541 .iotlb_sync = mtk_iommu_iotlb_sync, 542 + .iotlb_sync_map = mtk_iommu_sync_map, 677 543 .iova_to_phys = mtk_iommu_iova_to_phys, 678 544 .probe_device = mtk_iommu_probe_device, 679 545 .release_device = mtk_iommu_release_device, 680 546 .device_group = mtk_iommu_device_group, 681 547 .of_xlate = mtk_iommu_of_xlate, 548 + .get_resv_regions = mtk_iommu_get_resv_regions, 549 + .put_resv_regions = generic_iommu_put_resv_regions, 682 550 .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M, 683 551 }; 684 552 ··· 777 639 { 778 640 struct mtk_iommu_data *data; 779 641 struct device *dev = &pdev->dev; 642 + struct device_node *larbnode, *smicomm_node; 643 + struct platform_device *plarbdev; 644 + struct device_link *link; 780 645 struct resource *res; 781 646 resource_size_t ioaddr; 782 647 struct component_match *match = NULL; ··· 846 705 return larb_nr; 847 706 848 707 for (i = 0; i < larb_nr; i++) { 849 - struct device_node *larbnode; 850 - struct platform_device *plarbdev; 851 708 u32 id; 852 709 853 710 larbnode = of_parse_phandle(dev->of_node, "mediatek,larbs", i); ··· 872 733 compare_of, larbnode); 873 734 } 874 735 875 - platform_set_drvdata(pdev, data); 736 + /* Get smi-common dev from the last larb. */ 737 + smicomm_node = of_parse_phandle(larbnode, "mediatek,smi", 0); 738 + if (!smicomm_node) 739 + return -EINVAL; 876 740 877 - ret = mtk_iommu_hw_init(data); 878 - if (ret) 879 - return ret; 741 + plarbdev = of_find_device_by_node(smicomm_node); 742 + of_node_put(smicomm_node); 743 + data->smicomm_dev = &plarbdev->dev; 744 + 745 + pm_runtime_enable(dev); 746 + 747 + link = device_link_add(data->smicomm_dev, dev, 748 + DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME); 749 + if (!link) { 750 + dev_err(dev, "Unable to link %s.\n", dev_name(data->smicomm_dev)); 751 + ret = -EINVAL; 752 + goto out_runtime_disable; 753 + } 754 + 755 + platform_set_drvdata(pdev, data); 880 756 881 757 ret = iommu_device_sysfs_add(&data->iommu, dev, NULL, 882 758 "mtk-iommu.%pa", &ioaddr); 883 759 if (ret) 884 - return ret; 760 + goto out_link_remove; 885 761 886 762 iommu_device_set_ops(&data->iommu, &mtk_iommu_ops); 887 763 iommu_device_set_fwnode(&data->iommu, &pdev->dev.of_node->fwnode); 888 764 889 765 ret = iommu_device_register(&data->iommu); 890 766 if (ret) 891 - return ret; 767 + goto out_sysfs_remove; 892 768 893 769 spin_lock_init(&data->tlb_lock); 894 770 list_add_tail(&data->list, &m4ulist); 895 771 896 - if (!iommu_present(&platform_bus_type)) 897 - bus_set_iommu(&platform_bus_type, &mtk_iommu_ops); 772 + if (!iommu_present(&platform_bus_type)) { 773 + ret = bus_set_iommu(&platform_bus_type, &mtk_iommu_ops); 774 + if (ret) 775 + goto out_list_del; 776 + } 898 777 899 - return component_master_add_with_match(dev, &mtk_iommu_com_ops, match); 778 + ret = component_master_add_with_match(dev, &mtk_iommu_com_ops, match); 779 + if (ret) 780 + goto out_bus_set_null; 781 + return ret; 782 + 783 + out_bus_set_null: 784 + bus_set_iommu(&platform_bus_type, NULL); 785 + out_list_del: 786 + list_del(&data->list); 787 + iommu_device_unregister(&data->iommu); 788 + out_sysfs_remove: 789 + iommu_device_sysfs_remove(&data->iommu); 790 + out_link_remove: 791 + device_link_remove(data->smicomm_dev, dev); 792 + out_runtime_disable: 793 + pm_runtime_disable(dev); 794 + return ret; 900 795 } 901 796 902 797 static int mtk_iommu_remove(struct platform_device *pdev) ··· 944 771 bus_set_iommu(&platform_bus_type, NULL); 945 772 946 773 clk_disable_unprepare(data->bclk); 774 + device_link_remove(data->smicomm_dev, &pdev->dev); 775 + pm_runtime_disable(&pdev->dev); 947 776 devm_free_irq(&pdev->dev, data->irq, data); 948 777 component_master_del(&pdev->dev, &mtk_iommu_com_ops); 949 778 return 0; 950 779 } 951 780 952 - static int __maybe_unused mtk_iommu_suspend(struct device *dev) 781 + static int __maybe_unused mtk_iommu_runtime_suspend(struct device *dev) 953 782 { 954 783 struct mtk_iommu_data *data = dev_get_drvdata(dev); 955 784 struct mtk_iommu_suspend_reg *reg = &data->reg; ··· 969 794 return 0; 970 795 } 971 796 972 - static int __maybe_unused mtk_iommu_resume(struct device *dev) 797 + static int __maybe_unused mtk_iommu_runtime_resume(struct device *dev) 973 798 { 974 799 struct mtk_iommu_data *data = dev_get_drvdata(dev); 975 800 struct mtk_iommu_suspend_reg *reg = &data->reg; ··· 977 802 void __iomem *base = data->base; 978 803 int ret; 979 804 805 + /* Avoid first resume to affect the default value of registers below. */ 806 + if (!m4u_dom) 807 + return 0; 980 808 ret = clk_prepare_enable(data->bclk); 981 809 if (ret) { 982 810 dev_err(data->dev, "Failed to enable clk(%d) in resume\n", ret); ··· 993 815 writel_relaxed(reg->int_main_control, base + REG_MMU_INT_MAIN_CONTROL); 994 816 writel_relaxed(reg->ivrp_paddr, base + REG_MMU_IVRP_PADDR); 995 817 writel_relaxed(reg->vld_pa_rng, base + REG_MMU_VLD_PA_RNG); 996 - if (m4u_dom) 997 - writel(m4u_dom->cfg.arm_v7s_cfg.ttbr & MMU_PT_ADDR_MASK, 998 - base + REG_MMU_PT_BASE_ADDR); 818 + writel(m4u_dom->cfg.arm_v7s_cfg.ttbr & MMU_PT_ADDR_MASK, base + REG_MMU_PT_BASE_ADDR); 999 819 return 0; 1000 820 } 1001 821 1002 822 static const struct dev_pm_ops mtk_iommu_pm_ops = { 1003 - SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(mtk_iommu_suspend, mtk_iommu_resume) 823 + SET_RUNTIME_PM_OPS(mtk_iommu_runtime_suspend, mtk_iommu_runtime_resume, NULL) 824 + SET_LATE_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, 825 + pm_runtime_force_resume) 1004 826 }; 1005 827 1006 828 static const struct mtk_iommu_plat_data mt2712_data = { 1007 829 .m4u_plat = M4U_MT2712, 1008 830 .flags = HAS_4GB_MODE | HAS_BCLK | HAS_VLD_PA_RNG, 1009 831 .inv_sel_reg = REG_MMU_INV_SEL_GEN1, 832 + .iova_region = single_domain, 833 + .iova_region_nr = ARRAY_SIZE(single_domain), 1010 834 .larbid_remap = {{0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}}, 1011 835 }; 1012 836 ··· 1016 836 .m4u_plat = M4U_MT6779, 1017 837 .flags = HAS_SUB_COMM | OUT_ORDER_WR_EN | WR_THROT_EN, 1018 838 .inv_sel_reg = REG_MMU_INV_SEL_GEN2, 839 + .iova_region = single_domain, 840 + .iova_region_nr = ARRAY_SIZE(single_domain), 1019 841 .larbid_remap = {{0}, {1}, {2}, {3}, {5}, {7, 8}, {10}, {9}}, 1020 842 }; 1021 843 ··· 1025 843 .m4u_plat = M4U_MT8167, 1026 844 .flags = RESET_AXI | HAS_LEGACY_IVRP_PADDR, 1027 845 .inv_sel_reg = REG_MMU_INV_SEL_GEN1, 846 + .iova_region = single_domain, 847 + .iova_region_nr = ARRAY_SIZE(single_domain), 1028 848 .larbid_remap = {{0}, {1}, {2}}, /* Linear mapping. */ 1029 849 }; 1030 850 ··· 1035 851 .flags = HAS_4GB_MODE | HAS_BCLK | RESET_AXI | 1036 852 HAS_LEGACY_IVRP_PADDR, 1037 853 .inv_sel_reg = REG_MMU_INV_SEL_GEN1, 854 + .iova_region = single_domain, 855 + .iova_region_nr = ARRAY_SIZE(single_domain), 1038 856 .larbid_remap = {{0}, {1}, {2}, {3}, {4}, {5}}, /* Linear mapping. */ 1039 857 }; 1040 858 ··· 1044 858 .m4u_plat = M4U_MT8183, 1045 859 .flags = RESET_AXI, 1046 860 .inv_sel_reg = REG_MMU_INV_SEL_GEN1, 861 + .iova_region = single_domain, 862 + .iova_region_nr = ARRAY_SIZE(single_domain), 1047 863 .larbid_remap = {{0}, {4}, {5}, {6}, {7}, {2}, {3}, {1}}, 864 + }; 865 + 866 + static const struct mtk_iommu_plat_data mt8192_data = { 867 + .m4u_plat = M4U_MT8192, 868 + .flags = HAS_BCLK | HAS_SUB_COMM | OUT_ORDER_WR_EN | 869 + WR_THROT_EN | IOVA_34_EN, 870 + .inv_sel_reg = REG_MMU_INV_SEL_GEN2, 871 + .iova_region = mt8192_multi_dom, 872 + .iova_region_nr = ARRAY_SIZE(mt8192_multi_dom), 873 + .larbid_remap = {{0}, {1}, {4, 5}, {7}, {2}, {9, 11, 19, 20}, 874 + {0, 14, 16}, {0, 13, 18, 17}}, 1048 875 }; 1049 876 1050 877 static const struct of_device_id mtk_iommu_of_ids[] = { ··· 1066 867 { .compatible = "mediatek,mt8167-m4u", .data = &mt8167_data}, 1067 868 { .compatible = "mediatek,mt8173-m4u", .data = &mt8173_data}, 1068 869 { .compatible = "mediatek,mt8183-m4u", .data = &mt8183_data}, 870 + { .compatible = "mediatek,mt8192-m4u", .data = &mt8192_data}, 1069 871 {} 1070 872 }; 1071 873
+11 -1
drivers/iommu/mtk_iommu.h
··· 17 17 #include <linux/spinlock.h> 18 18 #include <linux/dma-mapping.h> 19 19 #include <soc/mediatek/smi.h> 20 + #include <dt-bindings/memory/mtk-memory-port.h> 20 21 21 22 #define MTK_LARB_COM_MAX 8 22 23 #define MTK_LARB_SUBCOM_MAX 4 24 + 25 + #define MTK_IOMMU_GROUP_MAX 8 23 26 24 27 struct mtk_iommu_suspend_reg { 25 28 union { ··· 45 42 M4U_MT8167, 46 43 M4U_MT8173, 47 44 M4U_MT8183, 45 + M4U_MT8192, 48 46 }; 47 + 48 + struct mtk_iommu_iova_region; 49 49 50 50 struct mtk_iommu_plat_data { 51 51 enum mtk_iommu_plat m4u_plat; 52 52 u32 flags; 53 53 u32 inv_sel_reg; 54 + 55 + unsigned int iova_region_nr; 56 + const struct mtk_iommu_iova_region *iova_region; 54 57 unsigned char larbid_remap[MTK_LARB_COM_MAX][MTK_LARB_SUBCOM_MAX]; 55 58 }; 56 59 ··· 70 61 phys_addr_t protect_base; /* protect memory base */ 71 62 struct mtk_iommu_suspend_reg reg; 72 63 struct mtk_iommu_domain *m4u_dom; 73 - struct iommu_group *m4u_group; 64 + struct iommu_group *m4u_group[MTK_IOMMU_GROUP_MAX]; 74 65 bool enable_4GB; 75 66 spinlock_t tlb_lock; /* lock for tlb range flush */ 76 67 77 68 struct iommu_device iommu; 78 69 const struct mtk_iommu_plat_data *plat_data; 70 + struct device *smicomm_dev; 79 71 80 72 struct dma_iommu_mapping *mapping; /* For mtk_iommu_v1.c */ 81 73
+5 -2
drivers/iommu/tegra-gart.c
··· 261 261 return 0; 262 262 } 263 263 264 - static void gart_iommu_sync_map(struct iommu_domain *domain) 264 + static void gart_iommu_sync_map(struct iommu_domain *domain, unsigned long iova, 265 + size_t size) 265 266 { 266 267 FLUSH_GART_REGS(gart_handle); 267 268 } ··· 270 269 static void gart_iommu_sync(struct iommu_domain *domain, 271 270 struct iommu_iotlb_gather *gather) 272 271 { 273 - gart_iommu_sync_map(domain); 272 + size_t length = gather->end - gather->start + 1; 273 + 274 + gart_iommu_sync_map(domain, gather->start, length); 274 275 } 275 276 276 277 static const struct iommu_ops gart_iommu_ops = {
+8
drivers/memory/mtk-smi.c
··· 15 15 #include <linux/pm_runtime.h> 16 16 #include <soc/mediatek/smi.h> 17 17 #include <dt-bindings/memory/mt2701-larb-port.h> 18 + #include <dt-bindings/memory/mtk-memory-port.h> 18 19 19 20 /* mt8173 */ 20 21 #define SMI_LARB_MMU_EN 0xf00 ··· 44 43 /* mt2712 */ 45 44 #define SMI_LARB_NONSEC_CON(id) (0x380 + ((id) * 4)) 46 45 #define F_MMU_EN BIT(0) 46 + #define BANK_SEL(id) ({ \ 47 + u32 _id = (id) & 0x3; \ 48 + (_id << 8 | _id << 10 | _id << 12 | _id << 14); \ 49 + }) 47 50 48 51 /* SMI COMMON */ 49 52 #define SMI_BUS_SEL 0x220 ··· 92 87 const struct mtk_smi_larb_gen *larb_gen; 93 88 int larbid; 94 89 u32 *mmu; 90 + unsigned char *bank; 95 91 }; 96 92 97 93 static int mtk_smi_clk_enable(const struct mtk_smi *smi) ··· 159 153 if (dev == larb_mmu[i].dev) { 160 154 larb->larbid = i; 161 155 larb->mmu = &larb_mmu[i].mmu; 156 + larb->bank = larb_mmu[i].bank; 162 157 return 0; 163 158 } 164 159 } ··· 178 171 for_each_set_bit(i, (unsigned long *)larb->mmu, 32) { 179 172 reg = readl_relaxed(larb->base + SMI_LARB_NONSEC_CON(i)); 180 173 reg |= F_MMU_EN; 174 + reg |= BANK_SEL(larb->bank[i]); 181 175 writel(reg, larb->base + SMI_LARB_NONSEC_CON(i)); 182 176 } 183 177 }
+1 -1
drivers/perf/Kconfig
··· 62 62 63 63 config ARM_SMMU_V3_PMU 64 64 tristate "ARM SMMUv3 Performance Monitors Extension" 65 - depends on ARM64 && ACPI && ARM_SMMU_V3 65 + depends on ARM64 && ACPI 66 66 help 67 67 Provides support for the ARM SMMUv3 Performance Monitor Counter 68 68 Groups (PMCG), which provide monitoring of transactions passing
+10 -1
include/acpi/actbl1.h
··· 514 514 ACPI_DMAR_TYPE_ROOT_ATS = 2, 515 515 ACPI_DMAR_TYPE_HARDWARE_AFFINITY = 3, 516 516 ACPI_DMAR_TYPE_NAMESPACE = 4, 517 - ACPI_DMAR_TYPE_RESERVED = 5 /* 5 and greater are reserved */ 517 + ACPI_DMAR_TYPE_SATC = 5, 518 + ACPI_DMAR_TYPE_RESERVED = 6 /* 6 and greater are reserved */ 518 519 }; 519 520 520 521 /* DMAR Device Scope structure */ ··· 608 607 char device_name[1]; 609 608 }; 610 609 610 + /* 5: SOC Integrated Address Translation Cache Reporting Structure */ 611 + 612 + struct acpi_dmar_satc { 613 + struct acpi_dmar_header header; 614 + u8 flags; 615 + u8 reserved; 616 + u16 segment; 617 + }; 611 618 /******************************************************************************* 612 619 * 613 620 * DRTM - Dynamic Root of Trust for Measurement table
+2 -2
include/dt-bindings/memory/mt2701-larb-port.h
··· 4 4 * Author: Honghui Zhang <honghui.zhang@mediatek.com> 5 5 */ 6 6 7 - #ifndef _MT2701_LARB_PORT_H_ 8 - #define _MT2701_LARB_PORT_H_ 7 + #ifndef _DT_BINDINGS_MEMORY_MT2701_LARB_PORT_H_ 8 + #define _DT_BINDINGS_MEMORY_MT2701_LARB_PORT_H_ 9 9 10 10 /* 11 11 * Mediatek m4u generation 1 such as mt2701 has flat m4u port numbers,
+3 -3
include/dt-bindings/memory/mt2712-larb-port.h
··· 3 3 * Copyright (c) 2017 MediaTek Inc. 4 4 * Author: Yong Wu <yong.wu@mediatek.com> 5 5 */ 6 - #ifndef __DTS_IOMMU_PORT_MT2712_H 7 - #define __DTS_IOMMU_PORT_MT2712_H 6 + #ifndef _DT_BINDINGS_MEMORY_MT2712_LARB_PORT_H_ 7 + #define _DT_BINDINGS_MEMORY_MT2712_LARB_PORT_H_ 8 8 9 - #define MTK_M4U_ID(larb, port) (((larb) << 5) | (port)) 9 + #include <dt-bindings/memory/mtk-memory-port.h> 10 10 11 11 #define M4U_LARB0_ID 0 12 12 #define M4U_LARB1_ID 1
+3 -3
include/dt-bindings/memory/mt6779-larb-port.h
··· 4 4 * Author: Chao Hao <chao.hao@mediatek.com> 5 5 */ 6 6 7 - #ifndef _DTS_IOMMU_PORT_MT6779_H_ 8 - #define _DTS_IOMMU_PORT_MT6779_H_ 7 + #ifndef _DT_BINDINGS_MEMORY_MT6779_LARB_PORT_H_ 8 + #define _DT_BINDINGS_MEMORY_MT6779_LARB_PORT_H_ 9 9 10 - #define MTK_M4U_ID(larb, port) (((larb) << 5) | (port)) 10 + #include <dt-bindings/memory/mtk-memory-port.h> 11 11 12 12 #define M4U_LARB0_ID 0 13 13 #define M4U_LARB1_ID 1
+3 -3
include/dt-bindings/memory/mt8167-larb-port.h
··· 5 5 * Author: Honghui Zhang <honghui.zhang@mediatek.com> 6 6 * Author: Fabien Parent <fparent@baylibre.com> 7 7 */ 8 - #ifndef __DTS_IOMMU_PORT_MT8167_H 9 - #define __DTS_IOMMU_PORT_MT8167_H 8 + #ifndef _DT_BINDINGS_MEMORY_MT8167_LARB_PORT_H_ 9 + #define _DT_BINDINGS_MEMORY_MT8167_LARB_PORT_H_ 10 10 11 - #define MTK_M4U_ID(larb, port) (((larb) << 5) | (port)) 11 + #include <dt-bindings/memory/mtk-memory-port.h> 12 12 13 13 #define M4U_LARB0_ID 0 14 14 #define M4U_LARB1_ID 1
+3 -3
include/dt-bindings/memory/mt8173-larb-port.h
··· 3 3 * Copyright (c) 2015-2016 MediaTek Inc. 4 4 * Author: Yong Wu <yong.wu@mediatek.com> 5 5 */ 6 - #ifndef __DTS_IOMMU_PORT_MT8173_H 7 - #define __DTS_IOMMU_PORT_MT8173_H 6 + #ifndef _DT_BINDINGS_MEMORY_MT8173_LARB_PORT_H_ 7 + #define _DT_BINDINGS_MEMORY_MT8173_LARB_PORT_H_ 8 8 9 - #define MTK_M4U_ID(larb, port) (((larb) << 5) | (port)) 9 + #include <dt-bindings/memory/mtk-memory-port.h> 10 10 11 11 #define M4U_LARB0_ID 0 12 12 #define M4U_LARB1_ID 1
+3 -3
include/dt-bindings/memory/mt8183-larb-port.h
··· 3 3 * Copyright (c) 2018 MediaTek Inc. 4 4 * Author: Yong Wu <yong.wu@mediatek.com> 5 5 */ 6 - #ifndef __DTS_IOMMU_PORT_MT8183_H 7 - #define __DTS_IOMMU_PORT_MT8183_H 6 + #ifndef _DT_BINDINGS_MEMORY_MT8183_LARB_PORT_H_ 7 + #define _DT_BINDINGS_MEMORY_MT8183_LARB_PORT_H_ 8 8 9 - #define MTK_M4U_ID(larb, port) (((larb) << 5) | (port)) 9 + #include <dt-bindings/memory/mtk-memory-port.h> 10 10 11 11 #define M4U_LARB0_ID 0 12 12 #define M4U_LARB1_ID 1
+243
include/dt-bindings/memory/mt8192-larb-port.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + /* 3 + * Copyright (c) 2020 MediaTek Inc. 4 + * 5 + * Author: Chao Hao <chao.hao@mediatek.com> 6 + * Author: Yong Wu <yong.wu@mediatek.com> 7 + */ 8 + #ifndef _DT_BINDINGS_MEMORY_MT8192_LARB_PORT_H_ 9 + #define _DT_BINDINGS_MEMORY_MT8192_LARB_PORT_H_ 10 + 11 + #include <dt-bindings/memory/mtk-memory-port.h> 12 + 13 + /* 14 + * MM IOMMU supports 16GB dma address. 15 + * 16 + * The address will preassign like this: 17 + * 18 + * modules dma-address-region larbs-ports 19 + * disp 0 ~ 4G larb0/1 20 + * vcodec 4G ~ 8G larb4/5/7 21 + * cam/mdp 8G ~ 12G larb2/9/11/13/14/16/17/18/19/20 22 + * CCU0 0x4000_0000 ~ 0x43ff_ffff larb13: port 9/10 23 + * CCU1 0x4400_0000 ~ 0x47ff_ffff larb14: port 4/5 24 + * 25 + * larb3/6/8/10/12/15 is null. 26 + */ 27 + 28 + /* larb0 */ 29 + #define M4U_PORT_L0_DISP_POSTMASK0 MTK_M4U_ID(0, 0) 30 + #define M4U_PORT_L0_OVL_RDMA0_HDR MTK_M4U_ID(0, 1) 31 + #define M4U_PORT_L0_OVL_RDMA0 MTK_M4U_ID(0, 2) 32 + #define M4U_PORT_L0_DISP_RDMA0 MTK_M4U_ID(0, 3) 33 + #define M4U_PORT_L0_DISP_WDMA0 MTK_M4U_ID(0, 4) 34 + #define M4U_PORT_L0_DISP_FAKE0 MTK_M4U_ID(0, 5) 35 + 36 + /* larb1 */ 37 + #define M4U_PORT_L1_OVL_2L_RDMA0_HDR MTK_M4U_ID(1, 0) 38 + #define M4U_PORT_L1_OVL_2L_RDMA2_HDR MTK_M4U_ID(1, 1) 39 + #define M4U_PORT_L1_OVL_2L_RDMA0 MTK_M4U_ID(1, 2) 40 + #define M4U_PORT_L1_OVL_2L_RDMA2 MTK_M4U_ID(1, 3) 41 + #define M4U_PORT_L1_DISP_MDP_RDMA4 MTK_M4U_ID(1, 4) 42 + #define M4U_PORT_L1_DISP_RDMA4 MTK_M4U_ID(1, 5) 43 + #define M4U_PORT_L1_DISP_UFBC_WDMA0 MTK_M4U_ID(1, 6) 44 + #define M4U_PORT_L1_DISP_FAKE1 MTK_M4U_ID(1, 7) 45 + 46 + /* larb2 */ 47 + #define M4U_PORT_L2_MDP_RDMA0 MTK_M4U_ID(2, 0) 48 + #define M4U_PORT_L2_MDP_RDMA1 MTK_M4U_ID(2, 1) 49 + #define M4U_PORT_L2_MDP_WROT0 MTK_M4U_ID(2, 2) 50 + #define M4U_PORT_L2_MDP_WROT1 MTK_M4U_ID(2, 3) 51 + #define M4U_PORT_L2_MDP_DISP_FAKE0 MTK_M4U_ID(2, 4) 52 + 53 + /* larb3: null */ 54 + 55 + /* larb4 */ 56 + #define M4U_PORT_L4_VDEC_MC_EXT MTK_M4U_ID(4, 0) 57 + #define M4U_PORT_L4_VDEC_UFO_EXT MTK_M4U_ID(4, 1) 58 + #define M4U_PORT_L4_VDEC_PP_EXT MTK_M4U_ID(4, 2) 59 + #define M4U_PORT_L4_VDEC_PRED_RD_EXT MTK_M4U_ID(4, 3) 60 + #define M4U_PORT_L4_VDEC_PRED_WR_EXT MTK_M4U_ID(4, 4) 61 + #define M4U_PORT_L4_VDEC_PPWRAP_EXT MTK_M4U_ID(4, 5) 62 + #define M4U_PORT_L4_VDEC_TILE_EXT MTK_M4U_ID(4, 6) 63 + #define M4U_PORT_L4_VDEC_VLD_EXT MTK_M4U_ID(4, 7) 64 + #define M4U_PORT_L4_VDEC_VLD2_EXT MTK_M4U_ID(4, 8) 65 + #define M4U_PORT_L4_VDEC_AVC_MV_EXT MTK_M4U_ID(4, 9) 66 + #define M4U_PORT_L4_VDEC_RG_CTRL_DMA_EXT MTK_M4U_ID(4, 10) 67 + 68 + /* larb5 */ 69 + #define M4U_PORT_L5_VDEC_LAT0_VLD_EXT MTK_M4U_ID(5, 0) 70 + #define M4U_PORT_L5_VDEC_LAT0_VLD2_EXT MTK_M4U_ID(5, 1) 71 + #define M4U_PORT_L5_VDEC_LAT0_AVC_MV_EXT MTK_M4U_ID(5, 2) 72 + #define M4U_PORT_L5_VDEC_LAT0_PRED_RD_EXT MTK_M4U_ID(5, 3) 73 + #define M4U_PORT_L5_VDEC_LAT0_TILE_EXT MTK_M4U_ID(5, 4) 74 + #define M4U_PORT_L5_VDEC_LAT0_WDMA_EXT MTK_M4U_ID(5, 5) 75 + #define M4U_PORT_L5_VDEC_LAT0_RG_CTRL_DMA_EXT MTK_M4U_ID(5, 6) 76 + #define M4U_PORT_L5_VDEC_UFO_ENC_EXT MTK_M4U_ID(5, 7) 77 + 78 + /* larb6: null */ 79 + 80 + /* larb7 */ 81 + #define M4U_PORT_L7_VENC_RCPU MTK_M4U_ID(7, 0) 82 + #define M4U_PORT_L7_VENC_REC MTK_M4U_ID(7, 1) 83 + #define M4U_PORT_L7_VENC_BSDMA MTK_M4U_ID(7, 2) 84 + #define M4U_PORT_L7_VENC_SV_COMV MTK_M4U_ID(7, 3) 85 + #define M4U_PORT_L7_VENC_RD_COMV MTK_M4U_ID(7, 4) 86 + #define M4U_PORT_L7_VENC_CUR_LUMA MTK_M4U_ID(7, 5) 87 + #define M4U_PORT_L7_VENC_CUR_CHROMA MTK_M4U_ID(7, 6) 88 + #define M4U_PORT_L7_VENC_REF_LUMA MTK_M4U_ID(7, 7) 89 + #define M4U_PORT_L7_VENC_REF_CHROMA MTK_M4U_ID(7, 8) 90 + #define M4U_PORT_L7_JPGENC_Y_RDMA MTK_M4U_ID(7, 9) 91 + #define M4U_PORT_L7_JPGENC_Q_RDMA MTK_M4U_ID(7, 10) 92 + #define M4U_PORT_L7_JPGENC_C_TABLE MTK_M4U_ID(7, 11) 93 + #define M4U_PORT_L7_JPGENC_BSDMA MTK_M4U_ID(7, 12) 94 + #define M4U_PORT_L7_VENC_SUB_R_LUMA MTK_M4U_ID(7, 13) 95 + #define M4U_PORT_L7_VENC_SUB_W_LUMA MTK_M4U_ID(7, 14) 96 + 97 + /* larb8: null */ 98 + 99 + /* larb9 */ 100 + #define M4U_PORT_L9_IMG_IMGI_D1 MTK_M4U_ID(9, 0) 101 + #define M4U_PORT_L9_IMG_IMGBI_D1 MTK_M4U_ID(9, 1) 102 + #define M4U_PORT_L9_IMG_DMGI_D1 MTK_M4U_ID(9, 2) 103 + #define M4U_PORT_L9_IMG_DEPI_D1 MTK_M4U_ID(9, 3) 104 + #define M4U_PORT_L9_IMG_ICE_D1 MTK_M4U_ID(9, 4) 105 + #define M4U_PORT_L9_IMG_SMTI_D1 MTK_M4U_ID(9, 5) 106 + #define M4U_PORT_L9_IMG_SMTO_D2 MTK_M4U_ID(9, 6) 107 + #define M4U_PORT_L9_IMG_SMTO_D1 MTK_M4U_ID(9, 7) 108 + #define M4U_PORT_L9_IMG_CRZO_D1 MTK_M4U_ID(9, 8) 109 + #define M4U_PORT_L9_IMG_IMG3O_D1 MTK_M4U_ID(9, 9) 110 + #define M4U_PORT_L9_IMG_VIPI_D1 MTK_M4U_ID(9, 10) 111 + #define M4U_PORT_L9_IMG_SMTI_D5 MTK_M4U_ID(9, 11) 112 + #define M4U_PORT_L9_IMG_TIMGO_D1 MTK_M4U_ID(9, 12) 113 + #define M4U_PORT_L9_IMG_UFBC_W0 MTK_M4U_ID(9, 13) 114 + #define M4U_PORT_L9_IMG_UFBC_R0 MTK_M4U_ID(9, 14) 115 + 116 + /* larb10: null */ 117 + 118 + /* larb11 */ 119 + #define M4U_PORT_L11_IMG_IMGI_D1 MTK_M4U_ID(11, 0) 120 + #define M4U_PORT_L11_IMG_IMGBI_D1 MTK_M4U_ID(11, 1) 121 + #define M4U_PORT_L11_IMG_DMGI_D1 MTK_M4U_ID(11, 2) 122 + #define M4U_PORT_L11_IMG_DEPI_D1 MTK_M4U_ID(11, 3) 123 + #define M4U_PORT_L11_IMG_ICE_D1 MTK_M4U_ID(11, 4) 124 + #define M4U_PORT_L11_IMG_SMTI_D1 MTK_M4U_ID(11, 5) 125 + #define M4U_PORT_L11_IMG_SMTO_D2 MTK_M4U_ID(11, 6) 126 + #define M4U_PORT_L11_IMG_SMTO_D1 MTK_M4U_ID(11, 7) 127 + #define M4U_PORT_L11_IMG_CRZO_D1 MTK_M4U_ID(11, 8) 128 + #define M4U_PORT_L11_IMG_IMG3O_D1 MTK_M4U_ID(11, 9) 129 + #define M4U_PORT_L11_IMG_VIPI_D1 MTK_M4U_ID(11, 10) 130 + #define M4U_PORT_L11_IMG_SMTI_D5 MTK_M4U_ID(11, 11) 131 + #define M4U_PORT_L11_IMG_TIMGO_D1 MTK_M4U_ID(11, 12) 132 + #define M4U_PORT_L11_IMG_UFBC_W0 MTK_M4U_ID(11, 13) 133 + #define M4U_PORT_L11_IMG_UFBC_R0 MTK_M4U_ID(11, 14) 134 + #define M4U_PORT_L11_IMG_WPE_RDMA1 MTK_M4U_ID(11, 15) 135 + #define M4U_PORT_L11_IMG_WPE_RDMA0 MTK_M4U_ID(11, 16) 136 + #define M4U_PORT_L11_IMG_WPE_WDMA MTK_M4U_ID(11, 17) 137 + #define M4U_PORT_L11_IMG_MFB_RDMA0 MTK_M4U_ID(11, 18) 138 + #define M4U_PORT_L11_IMG_MFB_RDMA1 MTK_M4U_ID(11, 19) 139 + #define M4U_PORT_L11_IMG_MFB_RDMA2 MTK_M4U_ID(11, 20) 140 + #define M4U_PORT_L11_IMG_MFB_RDMA3 MTK_M4U_ID(11, 21) 141 + #define M4U_PORT_L11_IMG_MFB_RDMA4 MTK_M4U_ID(11, 22) 142 + #define M4U_PORT_L11_IMG_MFB_RDMA5 MTK_M4U_ID(11, 23) 143 + #define M4U_PORT_L11_IMG_MFB_WDMA0 MTK_M4U_ID(11, 24) 144 + #define M4U_PORT_L11_IMG_MFB_WDMA1 MTK_M4U_ID(11, 25) 145 + 146 + /* larb12: null */ 147 + 148 + /* larb13 */ 149 + #define M4U_PORT_L13_CAM_MRAWI MTK_M4U_ID(13, 0) 150 + #define M4U_PORT_L13_CAM_MRAWO0 MTK_M4U_ID(13, 1) 151 + #define M4U_PORT_L13_CAM_MRAWO1 MTK_M4U_ID(13, 2) 152 + #define M4U_PORT_L13_CAM_CAMSV1 MTK_M4U_ID(13, 3) 153 + #define M4U_PORT_L13_CAM_CAMSV2 MTK_M4U_ID(13, 4) 154 + #define M4U_PORT_L13_CAM_CAMSV3 MTK_M4U_ID(13, 5) 155 + #define M4U_PORT_L13_CAM_CAMSV4 MTK_M4U_ID(13, 6) 156 + #define M4U_PORT_L13_CAM_CAMSV5 MTK_M4U_ID(13, 7) 157 + #define M4U_PORT_L13_CAM_CAMSV6 MTK_M4U_ID(13, 8) 158 + #define M4U_PORT_L13_CAM_CCUI MTK_M4U_ID(13, 9) 159 + #define M4U_PORT_L13_CAM_CCUO MTK_M4U_ID(13, 10) 160 + #define M4U_PORT_L13_CAM_FAKE MTK_M4U_ID(13, 11) 161 + 162 + /* larb14 */ 163 + #define M4U_PORT_L14_CAM_RESERVE1 MTK_M4U_ID(14, 0) 164 + #define M4U_PORT_L14_CAM_RESERVE2 MTK_M4U_ID(14, 1) 165 + #define M4U_PORT_L14_CAM_RESERVE3 MTK_M4U_ID(14, 2) 166 + #define M4U_PORT_L14_CAM_CAMSV0 MTK_M4U_ID(14, 3) 167 + #define M4U_PORT_L14_CAM_CCUI MTK_M4U_ID(14, 4) 168 + #define M4U_PORT_L14_CAM_CCUO MTK_M4U_ID(14, 5) 169 + 170 + /* larb15: null */ 171 + 172 + /* larb16 */ 173 + #define M4U_PORT_L16_CAM_IMGO_R1_A MTK_M4U_ID(16, 0) 174 + #define M4U_PORT_L16_CAM_RRZO_R1_A MTK_M4U_ID(16, 1) 175 + #define M4U_PORT_L16_CAM_CQI_R1_A MTK_M4U_ID(16, 2) 176 + #define M4U_PORT_L16_CAM_BPCI_R1_A MTK_M4U_ID(16, 3) 177 + #define M4U_PORT_L16_CAM_YUVO_R1_A MTK_M4U_ID(16, 4) 178 + #define M4U_PORT_L16_CAM_UFDI_R2_A MTK_M4U_ID(16, 5) 179 + #define M4U_PORT_L16_CAM_RAWI_R2_A MTK_M4U_ID(16, 6) 180 + #define M4U_PORT_L16_CAM_RAWI_R3_A MTK_M4U_ID(16, 7) 181 + #define M4U_PORT_L16_CAM_AAO_R1_A MTK_M4U_ID(16, 8) 182 + #define M4U_PORT_L16_CAM_AFO_R1_A MTK_M4U_ID(16, 9) 183 + #define M4U_PORT_L16_CAM_FLKO_R1_A MTK_M4U_ID(16, 10) 184 + #define M4U_PORT_L16_CAM_LCESO_R1_A MTK_M4U_ID(16, 11) 185 + #define M4U_PORT_L16_CAM_CRZO_R1_A MTK_M4U_ID(16, 12) 186 + #define M4U_PORT_L16_CAM_LTMSO_R1_A MTK_M4U_ID(16, 13) 187 + #define M4U_PORT_L16_CAM_RSSO_R1_A MTK_M4U_ID(16, 14) 188 + #define M4U_PORT_L16_CAM_AAHO_R1_A MTK_M4U_ID(16, 15) 189 + #define M4U_PORT_L16_CAM_LSCI_R1_A MTK_M4U_ID(16, 16) 190 + 191 + /* larb17 */ 192 + #define M4U_PORT_L17_CAM_IMGO_R1_B MTK_M4U_ID(17, 0) 193 + #define M4U_PORT_L17_CAM_RRZO_R1_B MTK_M4U_ID(17, 1) 194 + #define M4U_PORT_L17_CAM_CQI_R1_B MTK_M4U_ID(17, 2) 195 + #define M4U_PORT_L17_CAM_BPCI_R1_B MTK_M4U_ID(17, 3) 196 + #define M4U_PORT_L17_CAM_YUVO_R1_B MTK_M4U_ID(17, 4) 197 + #define M4U_PORT_L17_CAM_UFDI_R2_B MTK_M4U_ID(17, 5) 198 + #define M4U_PORT_L17_CAM_RAWI_R2_B MTK_M4U_ID(17, 6) 199 + #define M4U_PORT_L17_CAM_RAWI_R3_B MTK_M4U_ID(17, 7) 200 + #define M4U_PORT_L17_CAM_AAO_R1_B MTK_M4U_ID(17, 8) 201 + #define M4U_PORT_L17_CAM_AFO_R1_B MTK_M4U_ID(17, 9) 202 + #define M4U_PORT_L17_CAM_FLKO_R1_B MTK_M4U_ID(17, 10) 203 + #define M4U_PORT_L17_CAM_LCESO_R1_B MTK_M4U_ID(17, 11) 204 + #define M4U_PORT_L17_CAM_CRZO_R1_B MTK_M4U_ID(17, 12) 205 + #define M4U_PORT_L17_CAM_LTMSO_R1_B MTK_M4U_ID(17, 13) 206 + #define M4U_PORT_L17_CAM_RSSO_R1_B MTK_M4U_ID(17, 14) 207 + #define M4U_PORT_L17_CAM_AAHO_R1_B MTK_M4U_ID(17, 15) 208 + #define M4U_PORT_L17_CAM_LSCI_R1_B MTK_M4U_ID(17, 16) 209 + 210 + /* larb18 */ 211 + #define M4U_PORT_L18_CAM_IMGO_R1_C MTK_M4U_ID(18, 0) 212 + #define M4U_PORT_L18_CAM_RRZO_R1_C MTK_M4U_ID(18, 1) 213 + #define M4U_PORT_L18_CAM_CQI_R1_C MTK_M4U_ID(18, 2) 214 + #define M4U_PORT_L18_CAM_BPCI_R1_C MTK_M4U_ID(18, 3) 215 + #define M4U_PORT_L18_CAM_YUVO_R1_C MTK_M4U_ID(18, 4) 216 + #define M4U_PORT_L18_CAM_UFDI_R2_C MTK_M4U_ID(18, 5) 217 + #define M4U_PORT_L18_CAM_RAWI_R2_C MTK_M4U_ID(18, 6) 218 + #define M4U_PORT_L18_CAM_RAWI_R3_C MTK_M4U_ID(18, 7) 219 + #define M4U_PORT_L18_CAM_AAO_R1_C MTK_M4U_ID(18, 8) 220 + #define M4U_PORT_L18_CAM_AFO_R1_C MTK_M4U_ID(18, 9) 221 + #define M4U_PORT_L18_CAM_FLKO_R1_C MTK_M4U_ID(18, 10) 222 + #define M4U_PORT_L18_CAM_LCESO_R1_C MTK_M4U_ID(18, 11) 223 + #define M4U_PORT_L18_CAM_CRZO_R1_C MTK_M4U_ID(18, 12) 224 + #define M4U_PORT_L18_CAM_LTMSO_R1_C MTK_M4U_ID(18, 13) 225 + #define M4U_PORT_L18_CAM_RSSO_R1_C MTK_M4U_ID(18, 14) 226 + #define M4U_PORT_L18_CAM_AAHO_R1_C MTK_M4U_ID(18, 15) 227 + #define M4U_PORT_L18_CAM_LSCI_R1_C MTK_M4U_ID(18, 16) 228 + 229 + /* larb19 */ 230 + #define M4U_PORT_L19_IPE_DVS_RDMA MTK_M4U_ID(19, 0) 231 + #define M4U_PORT_L19_IPE_DVS_WDMA MTK_M4U_ID(19, 1) 232 + #define M4U_PORT_L19_IPE_DVP_RDMA MTK_M4U_ID(19, 2) 233 + #define M4U_PORT_L19_IPE_DVP_WDMA MTK_M4U_ID(19, 3) 234 + 235 + /* larb20 */ 236 + #define M4U_PORT_L20_IPE_FDVT_RDA MTK_M4U_ID(20, 0) 237 + #define M4U_PORT_L20_IPE_FDVT_RDB MTK_M4U_ID(20, 1) 238 + #define M4U_PORT_L20_IPE_FDVT_WRA MTK_M4U_ID(20, 2) 239 + #define M4U_PORT_L20_IPE_FDVT_WRB MTK_M4U_ID(20, 3) 240 + #define M4U_PORT_L20_IPE_RSC_RDMA0 MTK_M4U_ID(20, 4) 241 + #define M4U_PORT_L20_IPE_RSC_WDMA MTK_M4U_ID(20, 5) 242 + 243 + #endif
+15
include/dt-bindings/memory/mtk-memory-port.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + /* 3 + * Copyright (c) 2020 MediaTek Inc. 4 + * Author: Yong Wu <yong.wu@mediatek.com> 5 + */ 6 + #ifndef __DT_BINDINGS_MEMORY_MTK_MEMORY_PORT_H_ 7 + #define __DT_BINDINGS_MEMORY_MTK_MEMORY_PORT_H_ 8 + 9 + #define MTK_LARB_NR_MAX 32 10 + 11 + #define MTK_M4U_ID(larb, port) (((larb) << 5) | (port)) 12 + #define MTK_M4U_TO_LARB(id) (((id) >> 5) & 0x1f) 13 + #define MTK_M4U_TO_PORT(id) ((id) & 0x1f) 14 + 15 + #endif
+2
include/linux/dmar.h
··· 138 138 extern int dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg); 139 139 extern int dmar_parse_one_atsr(struct acpi_dmar_header *header, void *arg); 140 140 extern int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg); 141 + extern int dmar_parse_one_satc(struct acpi_dmar_header *hdr, void *arg); 141 142 extern int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg); 142 143 extern int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert); 143 144 extern int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info); ··· 150 149 #define dmar_parse_one_atsr dmar_res_noop 151 150 #define dmar_check_one_atsr dmar_res_noop 152 151 #define dmar_release_one_atsr dmar_res_noop 152 + #define dmar_parse_one_satc dmar_res_noop 153 153 154 154 static inline int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) 155 155 {
+24 -19
include/linux/intel-iommu.h
··· 42 42 43 43 #define DMA_FL_PTE_PRESENT BIT_ULL(0) 44 44 #define DMA_FL_PTE_US BIT_ULL(2) 45 + #define DMA_FL_PTE_ACCESS BIT_ULL(5) 46 + #define DMA_FL_PTE_DIRTY BIT_ULL(6) 45 47 #define DMA_FL_PTE_XD BIT_ULL(63) 46 48 47 49 #define ADDR_WIDTH_5LEVEL (57) ··· 170 168 * Extended Capability Register 171 169 */ 172 170 171 + #define ecap_rps(e) (((e) >> 49) & 0x1) 173 172 #define ecap_smpwc(e) (((e) >> 48) & 0x1) 174 173 #define ecap_flts(e) (((e) >> 47) & 0x1) 175 174 #define ecap_slts(e) (((e) >> 46) & 0x1) 175 + #define ecap_slads(e) (((e) >> 45) & 0x1) 176 176 #define ecap_vcs(e) (((e) >> 44) & 0x1) 177 177 #define ecap_smts(e) (((e) >> 43) & 0x1) 178 - #define ecap_dit(e) ((e >> 41) & 0x1) 179 - #define ecap_pasid(e) ((e >> 40) & 0x1) 180 - #define ecap_pss(e) ((e >> 35) & 0x1f) 181 - #define ecap_eafs(e) ((e >> 34) & 0x1) 182 - #define ecap_nwfs(e) ((e >> 33) & 0x1) 183 - #define ecap_srs(e) ((e >> 31) & 0x1) 184 - #define ecap_ers(e) ((e >> 30) & 0x1) 185 - #define ecap_prs(e) ((e >> 29) & 0x1) 186 - #define ecap_broken_pasid(e) ((e >> 28) & 0x1) 187 - #define ecap_dis(e) ((e >> 27) & 0x1) 188 - #define ecap_nest(e) ((e >> 26) & 0x1) 189 - #define ecap_mts(e) ((e >> 25) & 0x1) 190 - #define ecap_ecs(e) ((e >> 24) & 0x1) 178 + #define ecap_dit(e) (((e) >> 41) & 0x1) 179 + #define ecap_pds(e) (((e) >> 42) & 0x1) 180 + #define ecap_pasid(e) (((e) >> 40) & 0x1) 181 + #define ecap_pss(e) (((e) >> 35) & 0x1f) 182 + #define ecap_eafs(e) (((e) >> 34) & 0x1) 183 + #define ecap_nwfs(e) (((e) >> 33) & 0x1) 184 + #define ecap_srs(e) (((e) >> 31) & 0x1) 185 + #define ecap_ers(e) (((e) >> 30) & 0x1) 186 + #define ecap_prs(e) (((e) >> 29) & 0x1) 187 + #define ecap_broken_pasid(e) (((e) >> 28) & 0x1) 188 + #define ecap_dis(e) (((e) >> 27) & 0x1) 189 + #define ecap_nest(e) (((e) >> 26) & 0x1) 190 + #define ecap_mts(e) (((e) >> 25) & 0x1) 191 + #define ecap_ecs(e) (((e) >> 24) & 0x1) 191 192 #define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16) 192 193 #define ecap_max_iotlb_offset(e) (ecap_iotlb_offset(e) + 16) 193 194 #define ecap_coherent(e) ((e) & 0x1) 194 195 #define ecap_qis(e) ((e) & 0x2) 195 - #define ecap_pass_through(e) ((e >> 6) & 0x1) 196 - #define ecap_eim_support(e) ((e >> 4) & 0x1) 197 - #define ecap_ir_support(e) ((e >> 3) & 0x1) 196 + #define ecap_pass_through(e) (((e) >> 6) & 0x1) 197 + #define ecap_eim_support(e) (((e) >> 4) & 0x1) 198 + #define ecap_ir_support(e) (((e) >> 3) & 0x1) 198 199 #define ecap_dev_iotlb_support(e) (((e) >> 2) & 0x1) 199 - #define ecap_max_handle_mask(e) ((e >> 20) & 0xf) 200 - #define ecap_sc_support(e) ((e >> 7) & 0x1) /* Snooping Control */ 200 + #define ecap_max_handle_mask(e) (((e) >> 20) & 0xf) 201 + #define ecap_sc_support(e) (((e) >> 7) & 0x1) /* Snooping Control */ 201 202 202 203 /* Virtual command interface capability */ 203 204 #define vccap_pasid(v) (((v) & DMA_VCS_PAS)) /* PASID allocation */ ··· 667 662 * 7: super page 668 663 * 8-10: available 669 664 * 11: snoop behavior 670 - * 12-63: Host physcial address 665 + * 12-63: Host physical address 671 666 */ 672 667 struct dma_pte { 673 668 u64 val;
+9 -10
include/linux/io-pgtable.h
··· 15 15 ARM_64_LPAE_S2, 16 16 ARM_V7S, 17 17 ARM_MALI_LPAE, 18 + AMD_IOMMU_V1, 18 19 IO_PGTABLE_NUM_FMTS, 19 20 }; 20 21 ··· 69 68 * hardware which does not implement the permissions of a given 70 69 * format, and/or requires some format-specific default value. 71 70 * 72 - * IO_PGTABLE_QUIRK_TLBI_ON_MAP: If the format forbids caching invalid 73 - * (unmapped) entries but the hardware might do so anyway, perform 74 - * TLB maintenance when mapping as well as when unmapping. 75 - * 76 71 * IO_PGTABLE_QUIRK_ARM_MTK_EXT: (ARM v7s format) MediaTek IOMMUs extend 77 - * to support up to 34 bits PA where the bit32 and bit33 are 78 - * encoded in the bit9 and bit4 of the PTE respectively. 72 + * to support up to 35 bits PA where the bit32, bit33 and bit34 are 73 + * encoded in the bit9, bit4 and bit5 of the PTE respectively. 79 74 * 80 75 * IO_PGTABLE_QUIRK_NON_STRICT: Skip issuing synchronous leaf TLBIs 81 76 * on unmap, for DMA domains using the flush queue mechanism for ··· 85 88 */ 86 89 #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) 87 90 #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) 88 - #define IO_PGTABLE_QUIRK_TLBI_ON_MAP BIT(2) 89 91 #define IO_PGTABLE_QUIRK_ARM_MTK_EXT BIT(3) 90 92 #define IO_PGTABLE_QUIRK_NON_STRICT BIT(4) 91 93 #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5) ··· 210 214 211 215 static inline void io_pgtable_tlb_flush_all(struct io_pgtable *iop) 212 216 { 213 - iop->cfg.tlb->tlb_flush_all(iop->cookie); 217 + if (iop->cfg.tlb && iop->cfg.tlb->tlb_flush_all) 218 + iop->cfg.tlb->tlb_flush_all(iop->cookie); 214 219 } 215 220 216 221 static inline void 217 222 io_pgtable_tlb_flush_walk(struct io_pgtable *iop, unsigned long iova, 218 223 size_t size, size_t granule) 219 224 { 220 - iop->cfg.tlb->tlb_flush_walk(iova, size, granule, iop->cookie); 225 + if (iop->cfg.tlb && iop->cfg.tlb->tlb_flush_walk) 226 + iop->cfg.tlb->tlb_flush_walk(iova, size, granule, iop->cookie); 221 227 } 222 228 223 229 static inline void ··· 227 229 struct iommu_iotlb_gather * gather, unsigned long iova, 228 230 size_t granule) 229 231 { 230 - if (iop->cfg.tlb->tlb_add_page) 232 + if (iop->cfg.tlb && iop->cfg.tlb->tlb_add_page) 231 233 iop->cfg.tlb->tlb_add_page(gather, iova, granule, iop->cookie); 232 234 } 233 235 ··· 249 251 extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns; 250 252 extern struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns; 251 253 extern struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns; 254 + extern struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns; 252 255 253 256 #endif /* __IO_PGTABLE_H */
+5 -16
include/linux/iommu.h
··· 170 170 * struct iommu_iotlb_gather - Range information for a pending IOTLB flush 171 171 * 172 172 * @start: IOVA representing the start of the range to be flushed 173 - * @end: IOVA representing the end of the range to be flushed (exclusive) 173 + * @end: IOVA representing the end of the range to be flushed (inclusive) 174 174 * @pgsize: The interval at which to perform the flush 175 175 * 176 176 * This structure is intended to be updated by multiple calls to the ··· 246 246 size_t (*unmap)(struct iommu_domain *domain, unsigned long iova, 247 247 size_t size, struct iommu_iotlb_gather *iotlb_gather); 248 248 void (*flush_iotlb_all)(struct iommu_domain *domain); 249 - void (*iotlb_sync_map)(struct iommu_domain *domain); 249 + void (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova, 250 + size_t size); 250 251 void (*iotlb_sync)(struct iommu_domain *domain, 251 252 struct iommu_iotlb_gather *iotlb_gather); 252 253 phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova); ··· 377 376 void iommu_device_sysfs_remove(struct iommu_device *iommu); 378 377 int iommu_device_link(struct iommu_device *iommu, struct device *link); 379 378 void iommu_device_unlink(struct iommu_device *iommu, struct device *link); 379 + int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain); 380 380 381 381 static inline void __iommu_device_set_ops(struct iommu_device *iommu, 382 382 const struct iommu_ops *ops) ··· 516 514 extern int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr, 517 515 phys_addr_t offset, u64 size, 518 516 int prot); 519 - extern void iommu_domain_window_disable(struct iommu_domain *domain, u32 wnd_nr); 520 517 521 518 extern int report_iommu_fault(struct iommu_domain *domain, struct device *dev, 522 519 unsigned long iova, int flags); ··· 539 538 struct iommu_iotlb_gather *gather, 540 539 unsigned long iova, size_t size) 541 540 { 542 - unsigned long start = iova, end = start + size; 541 + unsigned long start = iova, end = start + size - 1; 543 542 544 543 /* 545 544 * If the new page is disjoint from the current range or is mapped at ··· 631 630 int iommu_probe_device(struct device *dev); 632 631 void iommu_release_device(struct device *dev); 633 632 634 - bool iommu_dev_has_feature(struct device *dev, enum iommu_dev_features f); 635 633 int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f); 636 634 int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f); 637 635 bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features f); ··· 747 747 u64 size, int prot) 748 748 { 749 749 return -ENODEV; 750 - } 751 - 752 - static inline void iommu_domain_window_disable(struct iommu_domain *domain, 753 - u32 wnd_nr) 754 - { 755 750 } 756 751 757 752 static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) ··· 977 982 const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode) 978 983 { 979 984 return NULL; 980 - } 981 - 982 - static inline bool 983 - iommu_dev_has_feature(struct device *dev, enum iommu_dev_features feat) 984 - { 985 - return false; 986 985 } 987 986 988 987 static inline bool
-12
include/linux/iova.h
··· 150 150 unsigned long limit_pfn, bool flush_rcache); 151 151 struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, 152 152 unsigned long pfn_hi); 153 - void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); 154 153 void init_iova_domain(struct iova_domain *iovad, unsigned long granule, 155 154 unsigned long start_pfn); 156 - bool has_iova_flush_queue(struct iova_domain *iovad); 157 155 int init_iova_flush_queue(struct iova_domain *iovad, 158 156 iova_flush_cb flush_cb, iova_entry_dtor entry_dtor); 159 157 struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); ··· 210 212 return NULL; 211 213 } 212 214 213 - static inline void copy_reserved_iova(struct iova_domain *from, 214 - struct iova_domain *to) 215 - { 216 - } 217 - 218 215 static inline void init_iova_domain(struct iova_domain *iovad, 219 216 unsigned long granule, 220 217 unsigned long start_pfn) 221 218 { 222 - } 223 - 224 - static inline bool has_iova_flush_queue(struct iova_domain *iovad) 225 - { 226 - return false; 227 219 } 228 220 229 221 static inline int init_iova_flush_queue(struct iova_domain *iovad,
+1 -2
include/soc/mediatek/smi.h
··· 11 11 12 12 #ifdef CONFIG_MTK_SMI 13 13 14 - #define MTK_LARB_NR_MAX 16 15 - 16 14 #define MTK_SMI_MMU_EN(port) BIT(port) 17 15 18 16 struct mtk_smi_larb_iommu { 19 17 struct device *dev; 20 18 unsigned int mmu; 19 + unsigned char bank[32]; 21 20 }; 22 21 23 22 /*
+37 -2
include/trace/events/intel_iommu.h
··· 6 6 * 7 7 * Author: Lu Baolu <baolu.lu@linux.intel.com> 8 8 */ 9 - #ifdef CONFIG_INTEL_IOMMU 10 9 #undef TRACE_SYSTEM 11 10 #define TRACE_SYSTEM intel_iommu 12 11 ··· 134 135 struct scatterlist *sg), 135 136 TP_ARGS(dev, index, total, sg) 136 137 ); 138 + 139 + TRACE_EVENT(qi_submit, 140 + TP_PROTO(struct intel_iommu *iommu, u64 qw0, u64 qw1, u64 qw2, u64 qw3), 141 + 142 + TP_ARGS(iommu, qw0, qw1, qw2, qw3), 143 + 144 + TP_STRUCT__entry( 145 + __field(u64, qw0) 146 + __field(u64, qw1) 147 + __field(u64, qw2) 148 + __field(u64, qw3) 149 + __string(iommu, iommu->name) 150 + ), 151 + 152 + TP_fast_assign( 153 + __assign_str(iommu, iommu->name); 154 + __entry->qw0 = qw0; 155 + __entry->qw1 = qw1; 156 + __entry->qw2 = qw2; 157 + __entry->qw3 = qw3; 158 + ), 159 + 160 + TP_printk("%s %s: 0x%llx 0x%llx 0x%llx 0x%llx", 161 + __print_symbolic(__entry->qw0 & 0xf, 162 + { QI_CC_TYPE, "cc_inv" }, 163 + { QI_IOTLB_TYPE, "iotlb_inv" }, 164 + { QI_DIOTLB_TYPE, "dev_tlb_inv" }, 165 + { QI_IEC_TYPE, "iec_inv" }, 166 + { QI_IWD_TYPE, "inv_wait" }, 167 + { QI_EIOTLB_TYPE, "p_iotlb_inv" }, 168 + { QI_PC_TYPE, "pc_inv" }, 169 + { QI_DEIOTLB_TYPE, "p_dev_tlb_inv" }, 170 + { QI_PGRP_RESP_TYPE, "page_grp_resp" }), 171 + __get_str(iommu), 172 + __entry->qw0, __entry->qw1, __entry->qw2, __entry->qw3 173 + ) 174 + ); 137 175 #endif /* _TRACE_INTEL_IOMMU_H */ 138 176 139 177 /* This part must be outside protection */ 140 178 #include <trace/define_trace.h> 141 - #endif /* CONFIG_INTEL_IOMMU */