Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'iommu-updates-v6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu

Pull iommu updates from Joerg Roedel:

- Convert to platform remove callback returning void

- Extend changing default domain to normal group

- Intel VT-d updates:
- Remove VT-d virtual command interface and IOASID
- Allow the VT-d driver to support non-PRI IOPF
- Remove PASID supervisor request support
- Various small and misc cleanups

- ARM SMMU updates:
- Device-tree binding updates:
* Allow Qualcomm GPU SMMUs to accept relevant clock properties
* Document Qualcomm 8550 SoC as implementing an MMU-500
* Favour new "qcom,smmu-500" binding for Adreno SMMUs

- Fix S2CR quirk detection on non-architectural Qualcomm SMMU
implementations

- Acknowledge SMMUv3 PRI queue overflow when consuming events

- Document (in a comment) why ATS is disabled for bypass streams

- AMD IOMMU updates:
- 5-level page-table support
- NUMA awareness for memory allocations

- Unisoc driver: Support for reattaching an existing domain

- Rockchip driver: Add missing set_platform_dma_ops callback

- Mediatek driver: Adjust the dma-ranges

- Various other small fixes and cleanups

* tag 'iommu-updates-v6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (82 commits)
iommu: Remove iommu_group_get_by_id()
iommu: Make iommu_release_device() static
iommu/vt-d: Remove BUG_ON in dmar_insert_dev_scope()
iommu/vt-d: Remove a useless BUG_ON(dev->is_virtfn)
iommu/vt-d: Remove BUG_ON in map/unmap()
iommu/vt-d: Remove BUG_ON when domain->pgd is NULL
iommu/vt-d: Remove BUG_ON in handling iotlb cache invalidation
iommu/vt-d: Remove BUG_ON on checking valid pfn range
iommu/vt-d: Make size of operands same in bitwise operations
iommu/vt-d: Remove PASID supervisor request support
iommu/vt-d: Use non-privileged mode for all PASIDs
iommu/vt-d: Remove extern from function prototypes
iommu/vt-d: Do not use GFP_ATOMIC when not needed
iommu/vt-d: Remove unnecessary checks in iopf disabling path
iommu/vt-d: Move PRI handling to IOPF feature path
iommu/vt-d: Move pfsid and ats_qdep calculation to device probe path
iommu/vt-d: Move iopf code from SVA to IOPF enabling path
iommu/vt-d: Allow SVA with device-specific IOPF
dmaengine: idxd: Add enable/disable device IOPF feature
arm64: dts: mt8186: Add dma-ranges for the parent "soc" node
...

+794 -1277
-1
Documentation/ABI/testing/sysfs-kernel-iommu_groups
··· 53 53 54 54 The default domain type of a group may be modified only when 55 55 56 - - The group has only one device. 57 56 - The device in the group is not bound to any device driver. 58 57 So, the users must unbind the appropriate driver before 59 58 changing the default domain type.
+1 -1
Documentation/arch/x86/sva.rst
··· 107 107 PASID Life Cycle Management 108 108 =========================== 109 109 110 - PASID is initialized as INVALID_IOASID (-1) when a process is created. 110 + PASID is initialized as IOMMU_PASID_INVALID (-1) when a process is created. 111 111 112 112 Only processes that access SVA-capable devices need to have a PASID 113 113 allocated. This allocation happens when a process opens/binds an SVA-capable
+41 -4
Documentation/devicetree/bindings/iommu/arm,smmu.yaml
··· 53 53 - qcom,sm8250-smmu-500 54 54 - qcom,sm8350-smmu-500 55 55 - qcom,sm8450-smmu-500 56 + - qcom,sm8550-smmu-500 56 57 - const: qcom,smmu-500 57 58 - const: arm,mmu-500 58 59 ··· 76 75 - qcom,sm8350-smmu-500 77 76 - qcom,sm8450-smmu-500 78 77 - const: arm,mmu-500 79 - 80 - - description: Qcom Adreno GPUs implementing "arm,smmu-500" 78 + - description: Qcom Adreno GPUs implementing "qcom,smmu-500" and "arm,mmu-500" 81 79 items: 80 + - enum: 81 + - qcom,sc7280-smmu-500 82 + - qcom,sm6115-smmu-500 83 + - qcom,sm6125-smmu-500 84 + - qcom,sm8150-smmu-500 85 + - qcom,sm8250-smmu-500 86 + - qcom,sm8350-smmu-500 87 + - const: qcom,adreno-smmu 88 + - const: qcom,smmu-500 89 + - const: arm,mmu-500 90 + - description: Qcom Adreno GPUs implementing "arm,mmu-500" (legacy binding) 91 + deprecated: true 92 + items: 93 + # Do not add additional SoC to this list. Instead use previous list. 82 94 - enum: 83 95 - qcom,sc7280-smmu-500 84 96 - qcom,sm8150-smmu-500 ··· 378 364 - description: interface clock required to access smmu's registers 379 365 through the TCU's programming interface. 380 366 367 + - if: 368 + properties: 369 + compatible: 370 + items: 371 + - enum: 372 + - qcom,sm6115-smmu-500 373 + - qcom,sm6125-smmu-500 374 + - const: qcom,adreno-smmu 375 + - const: qcom,smmu-500 376 + - const: arm,mmu-500 377 + then: 378 + properties: 379 + clock-names: 380 + items: 381 + - const: mem 382 + - const: hlos 383 + - const: iface 384 + 385 + clocks: 386 + items: 387 + - description: GPU memory bus clock 388 + - description: Voter clock required for HLOS SMMU access 389 + - description: Interface clock required for register access 390 + 381 391 # Disallow clocks for all other platforms with specific compatibles 382 392 - if: 383 393 properties: ··· 421 383 - qcom,sdm845-smmu-500 422 384 - qcom,sdx55-smmu-500 423 385 - qcom,sdx65-smmu-500 424 - - qcom,sm6115-smmu-500 425 - - qcom,sm6125-smmu-500 426 386 - qcom,sm6350-smmu-500 427 387 - qcom,sm6375-smmu-500 428 388 - qcom,sm8350-smmu-500 429 389 - qcom,sm8450-smmu-500 390 + - qcom,sm8550-smmu-500 430 391 then: 431 392 properties: 432 393 clock-names: false
+24 -8
Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.yaml
··· 74 74 renesas,ipmmu-main: 75 75 $ref: /schemas/types.yaml#/definitions/phandle-array 76 76 items: 77 - - items: 77 + - minItems: 1 78 + items: 78 79 - description: phandle to main IPMMU 79 - - description: the interrupt bit number associated with the particular 80 - cache IPMMU device. The interrupt bit number needs to match the main 81 - IPMMU IMSSTR register. Only used by cache IPMMU instances. 80 + - description: 81 + The interrupt bit number associated with the particular cache 82 + IPMMU device. If present, the interrupt bit number needs to match 83 + the main IPMMU IMSSTR register. Only used by cache IPMMU 84 + instances. 82 85 description: 83 - Reference to the main IPMMU phandle plus 1 cell. The cell is 84 - the interrupt bit number associated with the particular cache IPMMU 85 - device. The interrupt bit number needs to match the main IPMMU IMSSTR 86 - register. Only used by cache IPMMU instances. 86 + Reference to the main IPMMU. 87 87 88 88 required: 89 89 - compatible ··· 108 108 then: 109 109 required: 110 110 - power-domains 111 + 112 + - if: 113 + properties: 114 + compatible: 115 + contains: 116 + const: renesas,rcar-gen4-ipmmu-vmsa 117 + then: 118 + properties: 119 + renesas,ipmmu-main: 120 + items: 121 + - maxItems: 1 122 + else: 123 + properties: 124 + renesas,ipmmu-main: 125 + items: 126 + - minItems: 2 111 127 112 128 examples: 113 129 - |
-7
Documentation/devicetree/bindings/media/mediatek,mt8195-jpegdec.yaml
··· 26 26 Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml for details. 27 27 Ports are according to the HW. 28 28 29 - dma-ranges: 30 - maxItems: 1 31 - description: | 32 - Describes the physical address space of IOMMU maps to memory. 33 - 34 29 "#address-cells": 35 30 const: 2 36 31 ··· 84 89 - compatible 85 90 - power-domains 86 91 - iommus 87 - - dma-ranges 88 92 - ranges 89 93 90 94 additionalProperties: false ··· 109 115 <&iommu_vpp M4U_PORT_L19_JPGDEC_BSDMA1>, 110 116 <&iommu_vpp M4U_PORT_L19_JPGDEC_BUFF_OFFSET1>, 111 117 <&iommu_vpp M4U_PORT_L19_JPGDEC_BUFF_OFFSET0>; 112 - dma-ranges = <0x1 0x0 0x0 0x40000000 0x0 0xfff00000>; 113 118 #address-cells = <2>; 114 119 #size-cells = <2>; 115 120 ranges;
-7
Documentation/devicetree/bindings/media/mediatek,mt8195-jpegenc.yaml
··· 26 26 Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml for details. 27 27 Ports are according to the HW. 28 28 29 - dma-ranges: 30 - maxItems: 1 31 - description: | 32 - Describes the physical address space of IOMMU maps to memory. 33 - 34 29 "#address-cells": 35 30 const: 2 36 31 ··· 84 89 - compatible 85 90 - power-domains 86 91 - iommus 87 - - dma-ranges 88 92 - ranges 89 93 90 94 additionalProperties: false ··· 107 113 <&iommu_vpp M4U_PORT_L20_JPGENC_C_RDMA>, 108 114 <&iommu_vpp M4U_PORT_L20_JPGENC_Q_TABLE>, 109 115 <&iommu_vpp M4U_PORT_L20_JPGENC_BSDMA>; 110 - dma-ranges = <0x1 0x0 0x0 0x40000000 0x0 0xfff00000>; 111 116 #address-cells = <2>; 112 117 #size-cells = <2>; 113 118 ranges;
-5
Documentation/devicetree/bindings/media/mediatek,vcodec-decoder.yaml
··· 56 56 List of the hardware port in respective IOMMU block for current Socs. 57 57 Refer to bindings/iommu/mediatek,iommu.yaml. 58 58 59 - dma-ranges: 60 - maxItems: 1 61 - description: | 62 - Describes the physical address space of IOMMU maps to memory. 63 - 64 59 mediatek,vpu: 65 60 $ref: /schemas/types.yaml#/definitions/phandle 66 61 description:
-5
Documentation/devicetree/bindings/media/mediatek,vcodec-encoder.yaml
··· 49 49 List of the hardware port in respective IOMMU block for current Socs. 50 50 Refer to bindings/iommu/mediatek,iommu.yaml. 51 51 52 - dma-ranges: 53 - maxItems: 1 54 - description: | 55 - Describes the physical address space of IOMMU maps to memory. 56 - 57 52 mediatek,vpu: 58 53 $ref: /schemas/types.yaml#/definitions/phandle 59 54 description:
-5
Documentation/devicetree/bindings/media/mediatek-jpeg-encoder.yaml
··· 44 44 Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml for details. 45 45 Ports are according to the HW. 46 46 47 - dma-ranges: 48 - maxItems: 1 49 - description: | 50 - Describes the physical address space of IOMMU maps to memory. 51 - 52 47 required: 53 48 - compatible 54 49 - reg
+1
arch/arm64/boot/dts/mediatek/mt8186.dtsi
··· 324 324 #address-cells = <2>; 325 325 #size-cells = <2>; 326 326 compatible = "simple-bus"; 327 + dma-ranges = <0x0 0x0 0x0 0x0 0x4 0x0>; 327 328 ranges; 328 329 329 330 gic: interrupt-controller@c000000 {
+1 -3
arch/arm64/boot/dts/mediatek/mt8195.dtsi
··· 452 452 #size-cells = <2>; 453 453 compatible = "simple-bus"; 454 454 ranges; 455 + dma-ranges = <0x0 0x0 0x0 0x0 0x4 0x0>; 455 456 456 457 gic: interrupt-controller@c000000 { 457 458 compatible = "arm,gic-v3"; ··· 2472 2471 power-domains = <&spm MT8195_POWER_DOMAIN_VENC>; 2473 2472 #address-cells = <2>; 2474 2473 #size-cells = <2>; 2475 - dma-ranges = <0x1 0x0 0x0 0x40000000 0x0 0xfff00000>; 2476 2474 }; 2477 2475 2478 2476 jpgdec-master { ··· 2483 2483 <&iommu_vdo M4U_PORT_L19_JPGDEC_BSDMA1>, 2484 2484 <&iommu_vdo M4U_PORT_L19_JPGDEC_BUFF_OFFSET1>, 2485 2485 <&iommu_vdo M4U_PORT_L19_JPGDEC_BUFF_OFFSET0>; 2486 - dma-ranges = <0x1 0x0 0x0 0x40000000 0x0 0xfff00000>; 2487 2486 #address-cells = <2>; 2488 2487 #size-cells = <2>; 2489 2488 ranges; ··· 2554 2555 <&iommu_vpp M4U_PORT_L20_JPGENC_C_RDMA>, 2555 2556 <&iommu_vpp M4U_PORT_L20_JPGENC_Q_TABLE>, 2556 2557 <&iommu_vpp M4U_PORT_L20_JPGENC_BSDMA>; 2557 - dma-ranges = <0x1 0x0 0x0 0x40000000 0x0 0xfff00000>; 2558 2558 #address-cells = <2>; 2559 2559 #size-cells = <2>; 2560 2560 ranges;
+1
arch/x86/kernel/process_64.c
··· 39 39 #include <linux/io.h> 40 40 #include <linux/ftrace.h> 41 41 #include <linux/syscalls.h> 42 + #include <linux/iommu.h> 42 43 43 44 #include <asm/processor.h> 44 45 #include <asm/pkru.h>
+1 -1
arch/x86/kernel/traps.c
··· 40 40 #include <linux/io.h> 41 41 #include <linux/hardirq.h> 42 42 #include <linux/atomic.h> 43 - #include <linux/ioasid.h> 43 + #include <linux/iommu.h> 44 44 45 45 #include <asm/stacktrace.h> 46 46 #include <asm/processor.h>
+4 -4
drivers/dma/idxd/device.c
··· 1194 1194 { 1195 1195 union msix_perm mperm; 1196 1196 1197 - if (ie->pasid == INVALID_IOASID) 1197 + if (ie->pasid == IOMMU_PASID_INVALID) 1198 1198 return; 1199 1199 1200 1200 mperm.bits = 0; ··· 1224 1224 idxd_device_clear_perm_entry(idxd, ie); 1225 1225 ie->vector = -1; 1226 1226 ie->int_handle = INVALID_INT_HANDLE; 1227 - ie->pasid = INVALID_IOASID; 1227 + ie->pasid = IOMMU_PASID_INVALID; 1228 1228 } 1229 1229 1230 1230 int idxd_wq_request_irq(struct idxd_wq *wq) ··· 1240 1240 1241 1241 ie = &wq->ie; 1242 1242 ie->vector = pci_irq_vector(pdev, ie->id); 1243 - ie->pasid = device_pasid_enabled(idxd) ? idxd->pasid : INVALID_IOASID; 1243 + ie->pasid = device_pasid_enabled(idxd) ? idxd->pasid : IOMMU_PASID_INVALID; 1244 1244 idxd_device_set_perm_entry(idxd, ie); 1245 1245 1246 1246 rc = request_threaded_irq(ie->vector, NULL, idxd_wq_thread, 0, "idxd-portal", ie); ··· 1265 1265 free_irq(ie->vector, ie); 1266 1266 err_irq: 1267 1267 idxd_device_clear_perm_entry(idxd, ie); 1268 - ie->pasid = INVALID_IOASID; 1268 + ie->pasid = IOMMU_PASID_INVALID; 1269 1269 return rc; 1270 1270 } 1271 1271
+1 -1
drivers/dma/idxd/idxd.h
··· 10 10 #include <linux/cdev.h> 11 11 #include <linux/idr.h> 12 12 #include <linux/pci.h> 13 - #include <linux/ioasid.h> 14 13 #include <linux/bitmap.h> 15 14 #include <linux/perf_event.h> 15 + #include <linux/iommu.h> 16 16 #include <uapi/linux/idxd.h> 17 17 #include "registers.h" 18 18
+26 -7
drivers/dma/idxd/init.c
··· 105 105 ie = idxd_get_ie(idxd, msix_idx); 106 106 ie->id = msix_idx; 107 107 ie->int_handle = INVALID_INT_HANDLE; 108 - ie->pasid = INVALID_IOASID; 108 + ie->pasid = IOMMU_PASID_INVALID; 109 109 110 110 spin_lock_init(&ie->list_lock); 111 111 init_llist_head(&ie->pending_llist); ··· 516 516 idxd->sva = NULL; 517 517 } 518 518 519 + static int idxd_enable_sva(struct pci_dev *pdev) 520 + { 521 + int ret; 522 + 523 + ret = iommu_dev_enable_feature(&pdev->dev, IOMMU_DEV_FEAT_IOPF); 524 + if (ret) 525 + return ret; 526 + 527 + ret = iommu_dev_enable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); 528 + if (ret) 529 + iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_IOPF); 530 + 531 + return ret; 532 + } 533 + 534 + static void idxd_disable_sva(struct pci_dev *pdev) 535 + { 536 + iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); 537 + iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_IOPF); 538 + } 539 + 519 540 static int idxd_probe(struct idxd_device *idxd) 520 541 { 521 542 struct pci_dev *pdev = idxd->pdev; ··· 551 530 dev_dbg(dev, "IDXD reset complete\n"); 552 531 553 532 if (IS_ENABLED(CONFIG_INTEL_IDXD_SVM) && sva) { 554 - if (iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA)) { 533 + if (idxd_enable_sva(pdev)) { 555 534 dev_warn(dev, "Unable to turn on user SVA feature.\n"); 556 535 } else { 557 536 set_bit(IDXD_FLAG_USER_PASID_ENABLED, &idxd->flags); ··· 599 578 if (device_pasid_enabled(idxd)) 600 579 idxd_disable_system_pasid(idxd); 601 580 if (device_user_pasid_enabled(idxd)) 602 - iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA); 581 + idxd_disable_sva(pdev); 603 582 return rc; 604 583 } 605 584 606 585 static void idxd_cleanup(struct idxd_device *idxd) 607 586 { 608 - struct device *dev = &idxd->pdev->dev; 609 - 610 587 perfmon_pmu_remove(idxd); 611 588 idxd_cleanup_interrupts(idxd); 612 589 idxd_cleanup_internals(idxd); 613 590 if (device_pasid_enabled(idxd)) 614 591 idxd_disable_system_pasid(idxd); 615 592 if (device_user_pasid_enabled(idxd)) 616 - iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA); 593 + idxd_disable_sva(idxd->pdev); 617 594 } 618 595 619 596 static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) ··· 729 710 pci_free_irq_vectors(pdev); 730 711 pci_iounmap(pdev, idxd->reg_base); 731 712 if (device_user_pasid_enabled(idxd)) 732 - iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); 713 + idxd_disable_sva(pdev); 733 714 pci_disable_device(pdev); 734 715 destroy_workqueue(idxd->wq); 735 716 perfmon_pmu_remove(idxd);
+1 -1
drivers/dma/idxd/irq.c
··· 80 80 desc.opcode = DSA_OPCODE_DRAIN; 81 81 desc.priv = 1; 82 82 83 - if (ie->pasid != INVALID_IOASID) 83 + if (ie->pasid != IOMMU_PASID_INVALID) 84 84 desc.pasid = ie->pasid; 85 85 desc.int_handle = ie->int_handle; 86 86 portal = idxd_wq_portal_addr(wq);
+2 -7
drivers/iommu/Kconfig
··· 3 3 config IOMMU_IOVA 4 4 tristate 5 5 6 - # The IOASID library may also be used by non-IOMMU_API users 7 - config IOASID 8 - tristate 9 - 10 6 # IOMMU_API always gets selected by whoever wants it. 11 7 config IOMMU_API 12 8 bool ··· 29 33 bool "ARMv7/v8 Long Descriptor Format" 30 34 select IOMMU_IO_PGTABLE 31 35 depends on ARM || ARM64 || COMPILE_TEST 32 - depends on !GENERIC_ATOMIC64 # for cpmxchg64() 36 + depends on !GENERIC_ATOMIC64 # for cmpxchg64() 33 37 help 34 38 Enable support for the ARM long descriptor pagetable format. 35 39 This allocator supports 4K/2M/1G, 16K/32M and 64K/512M page ··· 68 72 bool "Apple DART Formats" 69 73 select IOMMU_IO_PGTABLE 70 74 depends on ARM64 || COMPILE_TEST 71 - depends on !GENERIC_ATOMIC64 # for cpmxchg64() 75 + depends on !GENERIC_ATOMIC64 # for cmpxchg64() 72 76 help 73 77 Enable support for the Apple DART pagetable formats. These include 74 78 the t8020 and t6000/t8110 DART formats used in Apple M1/M2 family ··· 156 160 # Shared Virtual Addressing 157 161 config IOMMU_SVA 158 162 bool 159 - select IOASID 160 163 161 164 config FSL_PAMU 162 165 bool "Freescale IOMMU support"
-1
drivers/iommu/Makefile
··· 9 9 obj-$(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) += io-pgtable-arm-v7s.o 10 10 obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o 11 11 obj-$(CONFIG_IOMMU_IO_PGTABLE_DART) += io-pgtable-dart.o 12 - obj-$(CONFIG_IOASID) += ioasid.o 13 12 obj-$(CONFIG_IOMMU_IOVA) += iova.o 14 13 obj-$(CONFIG_OF_IOMMU) += of_iommu.o 15 14 obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o
+9
drivers/iommu/amd/amd_iommu.h
··· 34 34 extern int amd_iommu_enable_faulting(void); 35 35 extern int amd_iommu_guest_ir; 36 36 extern enum io_pgtable_fmt amd_iommu_pgtable; 37 + extern int amd_iommu_gpt_level; 37 38 38 39 /* IOMMUv2 specific functions */ 39 40 struct iommu_domain; ··· 121 120 u16 devid = pci_dev_id(pdev); 122 121 123 122 return PCI_SEG_DEVID_TO_SBDF(seg, devid); 123 + } 124 + 125 + static inline void *alloc_pgtable_page(int nid, gfp_t gfp) 126 + { 127 + struct page *page; 128 + 129 + page = alloc_pages_node(nid, gfp | __GFP_ZERO, 0); 130 + return page ? page_address(page) : NULL; 124 131 } 125 132 126 133 extern bool translation_pre_enabled(struct amd_iommu *iommu);
+10 -2
drivers/iommu/amd/amd_iommu_types.h
··· 93 93 #define FEATURE_GA (1ULL<<7) 94 94 #define FEATURE_HE (1ULL<<8) 95 95 #define FEATURE_PC (1ULL<<9) 96 + #define FEATURE_GATS_SHIFT (12) 97 + #define FEATURE_GATS_MASK (3ULL) 96 98 #define FEATURE_GAM_VAPIC (1ULL<<21) 97 99 #define FEATURE_GIOSUP (1ULL<<48) 98 100 #define FEATURE_EPHSUP (1ULL<<50) ··· 307 305 #define PAGE_MODE_6_LEVEL 0x06 308 306 #define PAGE_MODE_7_LEVEL 0x07 309 307 308 + #define GUEST_PGTABLE_4_LEVEL 0x00 309 + #define GUEST_PGTABLE_5_LEVEL 0x01 310 + 310 311 #define PM_LEVEL_SHIFT(x) (12 + ((x) * 9)) 311 312 #define PM_LEVEL_SIZE(x) (((x) < 6) ? \ 312 313 ((1ULL << PM_LEVEL_SHIFT((x))) - 1): \ ··· 402 397 #define DTE_GCR3_SHIFT_A 58 403 398 #define DTE_GCR3_SHIFT_B 16 404 399 #define DTE_GCR3_SHIFT_C 43 400 + 401 + #define DTE_GPT_LEVEL_SHIFT 54 405 402 406 403 #define GCR3_VALID 0x01ULL 407 404 ··· 556 549 spinlock_t lock; /* mostly used to lock the page table*/ 557 550 u16 id; /* the domain id written to the device table */ 558 551 int glx; /* Number of levels for GCR3 table */ 552 + int nid; /* Node ID */ 559 553 u64 *gcr3_tbl; /* Guest CR3 table */ 560 554 unsigned long flags; /* flags to find out type of domain */ 561 555 unsigned dev_cnt; /* devices assigned to this domain */ ··· 1009 1001 */ 1010 1002 struct irq_cfg *cfg; 1011 1003 int ga_vector; 1012 - int ga_root_ptr; 1013 - int ga_tag; 1004 + u64 ga_root_ptr; 1005 + u32 ga_tag; 1014 1006 }; 1015 1007 1016 1008 struct amd_irte_ops {
+25 -5
drivers/iommu/amd/init.c
··· 153 153 bool amd_iommu_irq_remap __read_mostly; 154 154 155 155 enum io_pgtable_fmt amd_iommu_pgtable = AMD_IOMMU_V1; 156 + /* Guest page table level */ 157 + int amd_iommu_gpt_level = PAGE_MODE_4_LEVEL; 156 158 157 159 int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC; 158 160 static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE; ··· 306 304 static bool check_feature_on_all_iommus(u64 mask) 307 305 { 308 306 return !!(amd_iommu_efr & mask); 307 + } 308 + 309 + static inline int check_feature_gpt_level(void) 310 + { 311 + return ((amd_iommu_efr >> FEATURE_GATS_SHIFT) & FEATURE_GATS_MASK); 309 312 } 310 313 311 314 /* ··· 1948 1941 char *buf) 1949 1942 { 1950 1943 struct amd_iommu *iommu = dev_to_amd_iommu(dev); 1951 - return sprintf(buf, "%x\n", iommu->cap); 1944 + return sysfs_emit(buf, "%x\n", iommu->cap); 1952 1945 } 1953 1946 static DEVICE_ATTR(cap, S_IRUGO, amd_iommu_show_cap, NULL); 1954 1947 ··· 1957 1950 char *buf) 1958 1951 { 1959 1952 struct amd_iommu *iommu = dev_to_amd_iommu(dev); 1960 - return sprintf(buf, "%llx:%llx\n", iommu->features2, iommu->features); 1953 + return sysfs_emit(buf, "%llx:%llx\n", iommu->features2, iommu->features); 1961 1954 } 1962 1955 static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL); 1963 1956 ··· 2162 2155 if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) 2163 2156 pr_info("X2APIC enabled\n"); 2164 2157 } 2165 - if (amd_iommu_pgtable == AMD_IOMMU_V2) 2166 - pr_info("V2 page table enabled\n"); 2158 + if (amd_iommu_pgtable == AMD_IOMMU_V2) { 2159 + pr_info("V2 page table enabled (Paging mode : %d level)\n", 2160 + amd_iommu_gpt_level); 2161 + } 2167 2162 } 2168 2163 2169 2164 static int __init amd_iommu_init_pci(void) ··· 2392 2383 struct irq_domain *domain; 2393 2384 struct irq_alloc_info info; 2394 2385 int irq, ret; 2386 + int node = dev_to_node(&iommu->dev->dev); 2395 2387 2396 2388 domain = iommu_get_irqdomain(); 2397 2389 if (!domain) ··· 2402 2392 info.type = X86_IRQ_ALLOC_TYPE_AMDVI; 2403 2393 info.data = iommu; 2404 2394 2405 - irq = irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, &info); 2395 + irq = irq_domain_alloc_irqs(domain, 1, node, &info); 2406 2396 if (irq < 0) { 2407 2397 irq_domain_remove(domain); 2408 2398 return irq; ··· 3035 3025 if (ret) 3036 3026 goto out; 3037 3027 3028 + /* 5 level guest page table */ 3029 + if (cpu_feature_enabled(X86_FEATURE_LA57) && 3030 + check_feature_gpt_level() == GUEST_PGTABLE_5_LEVEL) 3031 + amd_iommu_gpt_level = PAGE_MODE_5_LEVEL; 3032 + 3038 3033 /* Disable any previously enabled IOMMUs */ 3039 3034 if (!is_kdump_kernel() || amd_iommu_disabled) 3040 3035 disable_iommus(); ··· 3571 3556 3572 3557 bool amd_iommu_v2_supported(void) 3573 3558 { 3559 + /* CPU page table size should match IOMMU guest page table size */ 3560 + if (cpu_feature_enabled(X86_FEATURE_LA57) && 3561 + amd_iommu_gpt_level != PAGE_MODE_5_LEVEL) 3562 + return false; 3563 + 3574 3564 /* 3575 3565 * Since DTE[Mode]=0 is prohibited on SNP-enabled system 3576 3566 * (i.e. EFR[SNPSup]=1), IOMMUv2 page table cannot be used without
+2 -2
drivers/iommu/amd/io_pgtable.c
··· 156 156 bool ret = true; 157 157 u64 *pte; 158 158 159 - pte = (void *)get_zeroed_page(gfp); 159 + pte = alloc_pgtable_page(domain->nid, gfp); 160 160 if (!pte) 161 161 return false; 162 162 ··· 250 250 251 251 if (!IOMMU_PTE_PRESENT(__pte) || 252 252 pte_level == PAGE_MODE_NONE) { 253 - page = (u64 *)get_zeroed_page(gfp); 253 + page = alloc_pgtable_page(domain->nid, gfp); 254 254 255 255 if (!page) 256 256 return NULL;
+12 -13
drivers/iommu/amd/io_pgtable_v2.c
··· 37 37 38 38 static inline int get_pgtable_level(void) 39 39 { 40 - /* 5 level page table is not supported */ 41 - return PAGE_MODE_4_LEVEL; 40 + return amd_iommu_gpt_level; 42 41 } 43 42 44 43 static inline bool is_large_pte(u64 pte) 45 44 { 46 45 return (pte & IOMMU_PAGE_PSE); 47 - } 48 - 49 - static inline void *alloc_pgtable_page(void) 50 - { 51 - return (void *)get_zeroed_page(GFP_KERNEL); 52 46 } 53 47 54 48 static inline u64 set_pgtable_attr(u64 *page) ··· 132 138 } 133 139 134 140 /* Allocate page table */ 135 - static u64 *v2_alloc_pte(u64 *pgd, unsigned long iova, 136 - unsigned long pg_size, bool *updated) 141 + static u64 *v2_alloc_pte(int nid, u64 *pgd, unsigned long iova, 142 + unsigned long pg_size, gfp_t gfp, bool *updated) 137 143 { 138 144 u64 *pte, *page; 139 145 int level, end_level; ··· 156 162 } 157 163 158 164 if (!IOMMU_PTE_PRESENT(__pte)) { 159 - page = alloc_pgtable_page(); 165 + page = alloc_pgtable_page(nid, gfp); 160 166 if (!page) 161 167 return NULL; 162 168 ··· 256 262 257 263 while (mapped_size < size) { 258 264 map_size = get_alloc_page_size(pgsize); 259 - pte = v2_alloc_pte(pdom->iop.pgd, iova, map_size, &updated); 265 + pte = v2_alloc_pte(pdom->nid, pdom->iop.pgd, 266 + iova, map_size, gfp, &updated); 260 267 if (!pte) { 261 268 ret = -EINVAL; 262 269 goto out; ··· 378 383 struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); 379 384 struct protection_domain *pdom = (struct protection_domain *)cookie; 380 385 int ret; 386 + int ias = IOMMU_IN_ADDR_BIT_SIZE; 381 387 382 - pgtable->pgd = alloc_pgtable_page(); 388 + pgtable->pgd = alloc_pgtable_page(pdom->nid, GFP_ATOMIC); 383 389 if (!pgtable->pgd) 384 390 return NULL; 385 391 ··· 388 392 if (ret) 389 393 goto err_free_pgd; 390 394 395 + if (get_pgtable_level() == PAGE_MODE_5_LEVEL) 396 + ias = 57; 397 + 391 398 pgtable->iop.ops.map_pages = iommu_v2_map_pages; 392 399 pgtable->iop.ops.unmap_pages = iommu_v2_unmap_pages; 393 400 pgtable->iop.ops.iova_to_phys = iommu_v2_iova_to_phys; 394 401 395 402 cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES_V2, 396 - cfg->ias = IOMMU_IN_ADDR_BIT_SIZE, 403 + cfg->ias = ias, 397 404 cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE, 398 405 cfg->tlb = &v2_flush_ops; 399 406
+13 -4
drivers/iommu/amd/iommu.c
··· 1611 1611 tmp = DTE_GCR3_VAL_C(gcr3) << DTE_GCR3_SHIFT_C; 1612 1612 flags |= tmp; 1613 1613 1614 + if (amd_iommu_gpt_level == PAGE_MODE_5_LEVEL) { 1615 + dev_table[devid].data[2] |= 1616 + ((u64)GUEST_PGTABLE_5_LEVEL << DTE_GPT_LEVEL_SHIFT); 1617 + } 1618 + 1614 1619 if (domain->flags & PD_GIOV_MASK) 1615 1620 pte_root |= DTE_FLAG_GIOV; 1616 1621 } ··· 1667 1662 dev_data->domain = domain; 1668 1663 list_add(&dev_data->list, &domain->dev_list); 1669 1664 1665 + /* Update NUMA Node ID */ 1666 + if (domain->nid == NUMA_NO_NODE) 1667 + domain->nid = dev_to_node(dev_data->dev); 1668 + 1670 1669 /* Do reference counting */ 1671 1670 domain->dev_iommu[iommu->index] += 1; 1672 1671 domain->dev_cnt += 1; 1673 - 1674 - /* Override supported page sizes */ 1675 - if (domain->flags & PD_GIOV_MASK) 1676 - domain->domain.pgsize_bitmap = AMD_IOMMU_PGSIZES_V2; 1677 1672 1678 1673 /* Update device table */ 1679 1674 set_dte_entry(iommu, dev_data->devid, domain, ··· 2053 2048 2054 2049 domain->flags |= PD_GIOV_MASK; 2055 2050 2051 + domain->domain.pgsize_bitmap = AMD_IOMMU_PGSIZES_V2; 2052 + 2056 2053 if (domain_enable_v2(domain, 1)) { 2057 2054 domain_id_free(domain->id); 2058 2055 return -ENOMEM; ··· 2103 2096 /* No need to allocate io pgtable ops in passthrough mode */ 2104 2097 if (type == IOMMU_DOMAIN_IDENTITY) 2105 2098 return domain; 2099 + 2100 + domain->nid = NUMA_NO_NODE; 2106 2101 2107 2102 pgtbl_ops = alloc_io_pgtable_ops(pgtable, &domain->iop.pgtbl_cfg, domain); 2108 2103 if (!pgtbl_ops) {
+2 -4
drivers/iommu/apple-dart.c
··· 1150 1150 return ret; 1151 1151 } 1152 1152 1153 - static int apple_dart_remove(struct platform_device *pdev) 1153 + static void apple_dart_remove(struct platform_device *pdev) 1154 1154 { 1155 1155 struct apple_dart *dart = platform_get_drvdata(pdev); 1156 1156 ··· 1161 1161 iommu_device_sysfs_remove(&dart->iommu); 1162 1162 1163 1163 clk_bulk_disable_unprepare(dart->num_clks, dart->clks); 1164 - 1165 - return 0; 1166 1164 } 1167 1165 1168 1166 static const struct apple_dart_hw apple_dart_hw_t8103 = { ··· 1294 1296 .pm = pm_sleep_ptr(&apple_dart_pm_ops), 1295 1297 }, 1296 1298 .probe = apple_dart_probe, 1297 - .remove = apple_dart_remove, 1299 + .remove_new = apple_dart_remove, 1298 1300 }; 1299 1301 1300 1302 module_platform_driver(apple_dart_driver);
+23 -9
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
··· 152 152 q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons); 153 153 } 154 154 155 + static void queue_sync_cons_ovf(struct arm_smmu_queue *q) 156 + { 157 + struct arm_smmu_ll_queue *llq = &q->llq; 158 + 159 + if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons))) 160 + return; 161 + 162 + llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) | 163 + Q_IDX(llq, llq->cons); 164 + queue_sync_cons_out(q); 165 + } 166 + 155 167 static int queue_sync_prod_in(struct arm_smmu_queue *q) 156 168 { 157 169 u32 prod; ··· 1589 1577 } while (!queue_empty(llq)); 1590 1578 1591 1579 /* Sync our overflow flag, as we believe we're up to speed */ 1592 - llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) | 1593 - Q_IDX(llq, llq->cons); 1580 + queue_sync_cons_ovf(q); 1594 1581 return IRQ_HANDLED; 1595 1582 } 1596 1583 ··· 1647 1636 } while (!queue_empty(llq)); 1648 1637 1649 1638 /* Sync our overflow flag, as we believe we're up to speed */ 1650 - llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) | 1651 - Q_IDX(llq, llq->cons); 1652 - queue_sync_cons_out(q); 1639 + queue_sync_cons_ovf(q); 1653 1640 return IRQ_HANDLED; 1654 1641 } 1655 1642 ··· 2456 2447 2457 2448 master->domain = smmu_domain; 2458 2449 2450 + /* 2451 + * The SMMU does not support enabling ATS with bypass. When the STE is 2452 + * in bypass (STE.Config[2:0] == 0b100), ATS Translation Requests and 2453 + * Translated transactions are denied as though ATS is disabled for the 2454 + * stream (STE.EATS == 0b00), causing F_BAD_ATS_TREQ and 2455 + * F_TRANSL_FORBIDDEN events (IHI0070Ea 5.2 Stream Table Entry). 2456 + */ 2459 2457 if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS) 2460 2458 master->ats_enabled = arm_smmu_ats_supported(master); 2461 2459 ··· 3860 3844 return 0; 3861 3845 } 3862 3846 3863 - static int arm_smmu_device_remove(struct platform_device *pdev) 3847 + static void arm_smmu_device_remove(struct platform_device *pdev) 3864 3848 { 3865 3849 struct arm_smmu_device *smmu = platform_get_drvdata(pdev); 3866 3850 ··· 3868 3852 iommu_device_sysfs_remove(&smmu->iommu); 3869 3853 arm_smmu_device_disable(smmu); 3870 3854 iopf_queue_free(smmu->evtq.iopf); 3871 - 3872 - return 0; 3873 3855 } 3874 3856 3875 3857 static void arm_smmu_device_shutdown(struct platform_device *pdev) ··· 3896 3882 .suppress_bind_attrs = true, 3897 3883 }, 3898 3884 .probe = arm_smmu_device_probe, 3899 - .remove = arm_smmu_device_remove, 3885 + .remove_new = arm_smmu_device_remove, 3900 3886 .shutdown = arm_smmu_device_shutdown, 3901 3887 }; 3902 3888 module_driver(arm_smmu_driver, platform_driver_register,
+15 -1
drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
··· 268 268 269 269 static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu) 270 270 { 271 - unsigned int last_s2cr = ARM_SMMU_GR0_S2CR(smmu->num_mapping_groups - 1); 272 271 struct qcom_smmu *qsmmu = to_qcom_smmu(smmu); 272 + unsigned int last_s2cr; 273 273 u32 reg; 274 274 u32 smr; 275 275 int i; 276 + 277 + /* 278 + * Some platforms support more than the Arm SMMU architected maximum of 279 + * 128 stream matching groups. For unknown reasons, the additional 280 + * groups don't exhibit the same behavior as the architected registers, 281 + * so limit the groups to 128 until the behavior is fixed for the other 282 + * groups. 283 + */ 284 + if (smmu->num_mapping_groups > 128) { 285 + dev_notice(smmu->dev, "\tLimiting the stream matching groups to 128\n"); 286 + smmu->num_mapping_groups = 128; 287 + } 288 + 289 + last_s2cr = ARM_SMMU_GR0_S2CR(smmu->num_mapping_groups - 1); 276 290 277 291 /* 278 292 * With some firmware versions writes to S2CR of type FAULT are
+3 -11
drivers/iommu/arm/arm-smmu/arm-smmu.c
··· 139 139 int err; 140 140 141 141 np = dev_get_dev_node(dev); 142 - if (!np || !of_find_property(np, "#stream-id-cells", NULL)) { 142 + if (!np || !of_property_present(np, "#stream-id-cells")) { 143 143 of_node_put(np); 144 144 return -ENODEV; 145 145 } ··· 2195 2195 { 2196 2196 struct arm_smmu_device *smmu = platform_get_drvdata(pdev); 2197 2197 2198 - if (!smmu) 2199 - return; 2200 - 2201 2198 if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS)) 2202 2199 dev_notice(&pdev->dev, "disabling translation\n"); 2203 2200 ··· 2211 2214 clk_bulk_unprepare(smmu->num_clks, smmu->clks); 2212 2215 } 2213 2216 2214 - static int arm_smmu_device_remove(struct platform_device *pdev) 2217 + static void arm_smmu_device_remove(struct platform_device *pdev) 2215 2218 { 2216 2219 struct arm_smmu_device *smmu = platform_get_drvdata(pdev); 2217 - 2218 - if (!smmu) 2219 - return -ENODEV; 2220 2220 2221 2221 iommu_device_unregister(&smmu->iommu); 2222 2222 iommu_device_sysfs_remove(&smmu->iommu); 2223 2223 2224 2224 arm_smmu_device_shutdown(pdev); 2225 - 2226 - return 0; 2227 2225 } 2228 2226 2229 2227 static int __maybe_unused arm_smmu_runtime_resume(struct device *dev) ··· 2294 2302 .suppress_bind_attrs = true, 2295 2303 }, 2296 2304 .probe = arm_smmu_device_probe, 2297 - .remove = arm_smmu_device_remove, 2305 + .remove_new = arm_smmu_device_remove, 2298 2306 .shutdown = arm_smmu_device_shutdown, 2299 2307 }; 2300 2308 module_platform_driver(arm_smmu_driver);
+4 -8
drivers/iommu/arm/arm-smmu/qcom_iommu.c
··· 682 682 return 0; 683 683 } 684 684 685 - static int qcom_iommu_ctx_remove(struct platform_device *pdev) 685 + static void qcom_iommu_ctx_remove(struct platform_device *pdev) 686 686 { 687 687 struct qcom_iommu_dev *qcom_iommu = dev_get_drvdata(pdev->dev.parent); 688 688 struct qcom_iommu_ctx *ctx = platform_get_drvdata(pdev); ··· 690 690 platform_set_drvdata(pdev, NULL); 691 691 692 692 qcom_iommu->ctxs[ctx->asid - 1] = NULL; 693 - 694 - return 0; 695 693 } 696 694 697 695 static const struct of_device_id ctx_of_match[] = { ··· 704 706 .of_match_table = ctx_of_match, 705 707 }, 706 708 .probe = qcom_iommu_ctx_probe, 707 - .remove = qcom_iommu_ctx_remove, 709 + .remove_new = qcom_iommu_ctx_remove, 708 710 }; 709 711 710 712 static bool qcom_iommu_has_secure_context(struct qcom_iommu_dev *qcom_iommu) ··· 822 824 return ret; 823 825 } 824 826 825 - static int qcom_iommu_device_remove(struct platform_device *pdev) 827 + static void qcom_iommu_device_remove(struct platform_device *pdev) 826 828 { 827 829 struct qcom_iommu_dev *qcom_iommu = platform_get_drvdata(pdev); 828 830 ··· 830 832 platform_set_drvdata(pdev, NULL); 831 833 iommu_device_sysfs_remove(&qcom_iommu->iommu); 832 834 iommu_device_unregister(&qcom_iommu->iommu); 833 - 834 - return 0; 835 835 } 836 836 837 837 static int __maybe_unused qcom_iommu_resume(struct device *dev) ··· 866 870 .pm = &qcom_iommu_pm_ops, 867 871 }, 868 872 .probe = qcom_iommu_device_probe, 869 - .remove = qcom_iommu_device_remove, 873 + .remove_new = qcom_iommu_device_remove, 870 874 }; 871 875 872 876 static int __init qcom_iommu_init(void)
+8 -16
drivers/iommu/exynos-iommu.c
··· 747 747 return ret; 748 748 } 749 749 750 - data->clk = devm_clk_get(dev, "sysmmu"); 751 - if (PTR_ERR(data->clk) == -ENOENT) 752 - data->clk = NULL; 753 - else if (IS_ERR(data->clk)) 750 + data->clk = devm_clk_get_optional(dev, "sysmmu"); 751 + if (IS_ERR(data->clk)) 754 752 return PTR_ERR(data->clk); 755 753 756 - data->aclk = devm_clk_get(dev, "aclk"); 757 - if (PTR_ERR(data->aclk) == -ENOENT) 758 - data->aclk = NULL; 759 - else if (IS_ERR(data->aclk)) 754 + data->aclk = devm_clk_get_optional(dev, "aclk"); 755 + if (IS_ERR(data->aclk)) 760 756 return PTR_ERR(data->aclk); 761 757 762 - data->pclk = devm_clk_get(dev, "pclk"); 763 - if (PTR_ERR(data->pclk) == -ENOENT) 764 - data->pclk = NULL; 765 - else if (IS_ERR(data->pclk)) 758 + data->pclk = devm_clk_get_optional(dev, "pclk"); 759 + if (IS_ERR(data->pclk)) 766 760 return PTR_ERR(data->pclk); 767 761 768 762 if (!data->clk && (!data->aclk || !data->pclk)) { ··· 764 770 return -ENOSYS; 765 771 } 766 772 767 - data->clk_master = devm_clk_get(dev, "master"); 768 - if (PTR_ERR(data->clk_master) == -ENOENT) 769 - data->clk_master = NULL; 770 - else if (IS_ERR(data->clk_master)) 773 + data->clk_master = devm_clk_get_optional(dev, "master"); 774 + if (IS_ERR(data->clk_master)) 771 775 return PTR_ERR(data->clk_master); 772 776 773 777 data->sysmmu = dev;
+5 -4
drivers/iommu/fsl_pamu.c
··· 178 178 } 179 179 180 180 /** 181 - * pamu_config_paace() - Sets up PPAACE entry for specified liodn 181 + * pamu_config_ppaace() - Sets up PPAACE entry for specified liodn 182 182 * 183 183 * @liodn: Logical IO device number 184 184 * @omi: Operation mapping index -- if ~omi == 0 then omi not defined ··· 232 232 /** 233 233 * get_ome_index() - Returns the index in the operation mapping table 234 234 * for device. 235 - * @*omi_index: pointer for storing the index value 235 + * @omi_index: pointer for storing the index value 236 + * @dev: target device 236 237 * 237 238 */ 238 239 void get_ome_index(u32 *omi_index, struct device *dev) ··· 329 328 #define QMAN_PORTAL_PAACE 2 330 329 #define BMAN_PAACE 3 331 330 332 - /** 331 + /* 333 332 * Setup operation mapping and stash destinations for QMAN and QMAN portal. 334 333 * Memory accesses to QMAN and BMAN private memory need not be coherent, so 335 334 * clear the PAACE entry coherency attribute for them. ··· 358 357 } 359 358 } 360 359 361 - /** 360 + /* 362 361 * Setup the operation mapping table for various devices. This is a static 363 362 * table where each table index corresponds to a particular device. PAMU uses 364 363 * this table to translate device transaction to appropriate corenet
-1
drivers/iommu/intel/Kconfig
··· 18 18 select NEED_DMA_MAP_STATE 19 19 select DMAR_TABLE 20 20 select SWIOTLB 21 - select IOASID 22 21 select PCI_ATS 23 22 select PCI_PRI 24 23 select PCI_PASID
-2
drivers/iommu/intel/cap_audit.c
··· 54 54 CHECK_FEATURE_MISMATCH(a, b, ecap, slts, ECAP_SLTS_MASK); 55 55 CHECK_FEATURE_MISMATCH(a, b, ecap, nwfs, ECAP_NWFS_MASK); 56 56 CHECK_FEATURE_MISMATCH(a, b, ecap, slads, ECAP_SLADS_MASK); 57 - CHECK_FEATURE_MISMATCH(a, b, ecap, vcs, ECAP_VCS_MASK); 58 57 CHECK_FEATURE_MISMATCH(a, b, ecap, smts, ECAP_SMTS_MASK); 59 58 CHECK_FEATURE_MISMATCH(a, b, ecap, pds, ECAP_PDS_MASK); 60 59 CHECK_FEATURE_MISMATCH(a, b, ecap, dit, ECAP_DIT_MASK); ··· 100 101 CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, slts, ECAP_SLTS_MASK); 101 102 CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, nwfs, ECAP_NWFS_MASK); 102 103 CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, slads, ECAP_SLADS_MASK); 103 - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, vcs, ECAP_VCS_MASK); 104 104 CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, smts, ECAP_SMTS_MASK); 105 105 CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, pds, ECAP_PDS_MASK); 106 106 CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, dit, ECAP_DIT_MASK);
+5 -8
drivers/iommu/intel/dmar.c
··· 127 127 struct pci_dev *tmp; 128 128 struct dmar_pci_notify_info *info; 129 129 130 - BUG_ON(dev->is_virtfn); 131 - 132 130 /* 133 131 * Ignore devices that have a domain number higher than what can 134 132 * be looked up in DMAR, e.g. VMD subdevices with domain 0x10000 ··· 262 264 get_device(dev)); 263 265 return 1; 264 266 } 265 - BUG_ON(i >= devices_cnt); 267 + if (WARN_ON(i >= devices_cnt)) 268 + return -EINVAL; 266 269 } 267 270 268 271 return 0; ··· 992 993 warn_invalid_dmar(phys_addr, " returns all ones"); 993 994 goto unmap; 994 995 } 995 - if (ecap_vcs(iommu->ecap)) 996 - iommu->vccap = dmar_readq(iommu->reg + DMAR_VCCAP_REG); 997 996 998 997 /* the registers might be more than one page */ 999 998 map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), ··· 1687 1690 * is present. 1688 1691 */ 1689 1692 if (ecap_smts(iommu->ecap)) 1690 - val |= (1 << 11) | 1; 1693 + val |= BIT_ULL(11) | BIT_ULL(0); 1691 1694 1692 1695 raw_spin_lock_irqsave(&iommu->register_lock, flags); 1693 1696 ··· 1958 1961 return 0; 1959 1962 } 1960 1963 1961 - if (pasid == INVALID_IOASID) 1964 + if (pasid == IOMMU_PASID_INVALID) 1962 1965 pr_err("[%s NO_PASID] Request device [%02x:%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n", 1963 1966 type ? "DMA Read" : "DMA Write", 1964 1967 source_id >> 8, PCI_SLOT(source_id & 0xFF), ··· 2039 2042 if (!ratelimited) 2040 2043 /* Using pasid -1 if pasid is not present */ 2041 2044 dmar_fault_do_one(iommu, type, fault_reason, 2042 - pasid_present ? pasid : INVALID_IOASID, 2045 + pasid_present ? pasid : IOMMU_PASID_INVALID, 2043 2046 source_id, guest_addr); 2044 2047 2045 2048 fault_index++;
+121 -162
drivers/iommu/intel/iommu.c
··· 876 876 return; 877 877 } 878 878 /* For request-without-pasid, get the pasid from context entry */ 879 - if (intel_iommu_sm && pasid == INVALID_IOASID) 879 + if (intel_iommu_sm && pasid == IOMMU_PASID_INVALID) 880 880 pasid = PASID_RID2PASID; 881 881 882 882 dir_index = pasid >> PASID_PDE_SHIFT; ··· 914 914 struct dma_pte *parent, *pte; 915 915 int level = agaw_to_level(domain->agaw); 916 916 int offset; 917 - 918 - BUG_ON(!domain->pgd); 919 917 920 918 if (!domain_pfn_supported(domain, pfn)) 921 919 /* Address beyond IOMMU's addressing capabilities. */ ··· 1003 1005 unsigned int large_page; 1004 1006 struct dma_pte *first_pte, *pte; 1005 1007 1006 - BUG_ON(!domain_pfn_supported(domain, start_pfn)); 1007 - BUG_ON(!domain_pfn_supported(domain, last_pfn)); 1008 - BUG_ON(start_pfn > last_pfn); 1008 + if (WARN_ON(!domain_pfn_supported(domain, last_pfn)) || 1009 + WARN_ON(start_pfn > last_pfn)) 1010 + return; 1009 1011 1010 1012 /* we don't need lock here; nobody else touches the iova range */ 1011 1013 do { ··· 1164 1166 static void domain_unmap(struct dmar_domain *domain, unsigned long start_pfn, 1165 1167 unsigned long last_pfn, struct list_head *freelist) 1166 1168 { 1167 - BUG_ON(!domain_pfn_supported(domain, start_pfn)); 1168 - BUG_ON(!domain_pfn_supported(domain, last_pfn)); 1169 - BUG_ON(start_pfn > last_pfn); 1169 + if (WARN_ON(!domain_pfn_supported(domain, last_pfn)) || 1170 + WARN_ON(start_pfn > last_pfn)) 1171 + return; 1170 1172 1171 1173 /* we don't need lock here; nobody else touches the iova range */ 1172 1174 dma_pte_clear_level(domain, agaw_to_level(domain->agaw), ··· 1270 1272 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask); 1271 1273 break; 1272 1274 default: 1273 - BUG(); 1275 + pr_warn("%s: Unexpected context-cache invalidation type 0x%llx\n", 1276 + iommu->name, type); 1277 + return; 1274 1278 } 1275 1279 val |= DMA_CCMD_ICC; 1276 1280 ··· 1308 1308 val_iva = size_order | addr; 1309 1309 break; 1310 1310 default: 1311 - BUG(); 1311 + pr_warn("%s: Unexpected iotlb invalidation type 0x%llx\n", 1312 + iommu->name, type); 1313 + return; 1312 1314 } 1313 1315 /* Note: set drain read/write */ 1314 1316 #if 0 ··· 1408 1406 return; 1409 1407 1410 1408 pdev = to_pci_dev(info->dev); 1411 - /* For IOMMU that supports device IOTLB throttling (DIT), we assign 1412 - * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge 1413 - * queue depth at PF level. If DIT is not set, PFSID will be treated as 1414 - * reserved, which should be set to 0. 1415 - */ 1416 - if (!ecap_dit(info->iommu->ecap)) 1417 - info->pfsid = 0; 1418 - else { 1419 - struct pci_dev *pf_pdev; 1420 - 1421 - /* pdev will be returned if device is not a vf */ 1422 - pf_pdev = pci_physfn(pdev); 1423 - info->pfsid = pci_dev_id(pf_pdev); 1424 - } 1425 1409 1426 1410 /* The PCIe spec, in its wisdom, declares that the behaviour of 1427 1411 the device if you enable PASID support after ATS support is ··· 1417 1429 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1)) 1418 1430 info->pasid_enabled = 1; 1419 1431 1420 - if (info->pri_supported && 1421 - (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1) && 1422 - !pci_reset_pri(pdev) && !pci_enable_pri(pdev, PRQ_DEPTH)) 1423 - info->pri_enabled = 1; 1424 - 1425 1432 if (info->ats_supported && pci_ats_page_aligned(pdev) && 1426 1433 !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) { 1427 1434 info->ats_enabled = 1; 1428 1435 domain_update_iotlb(info->domain); 1429 - info->ats_qdep = pci_ats_queue_depth(pdev); 1430 1436 } 1431 1437 } 1432 1438 ··· 1437 1455 pci_disable_ats(pdev); 1438 1456 info->ats_enabled = 0; 1439 1457 domain_update_iotlb(info->domain); 1440 - } 1441 - 1442 - if (info->pri_enabled) { 1443 - pci_disable_pri(pdev); 1444 - info->pri_enabled = 0; 1445 1458 } 1446 1459 1447 1460 if (info->pasid_enabled) { ··· 1485 1508 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT; 1486 1509 u16 did = domain_id_iommu(domain, iommu); 1487 1510 1488 - BUG_ON(pages == 0); 1511 + if (WARN_ON(!pages)) 1512 + return; 1489 1513 1490 1514 if (ih) 1491 1515 ih = 1 << 6; ··· 1700 1722 if (ecap_prs(iommu->ecap)) 1701 1723 intel_svm_finish_prq(iommu); 1702 1724 } 1703 - if (vccap_pasid(iommu->vccap)) 1704 - ioasid_unregister_allocator(&iommu->pasid_allocator); 1705 - 1706 1725 #endif 1707 1726 } 1708 1727 ··· 1870 1895 */ 1871 1896 static inline void context_set_sm_dte(struct context_entry *context) 1872 1897 { 1873 - context->lo |= (1 << 2); 1898 + context->lo |= BIT_ULL(2); 1874 1899 } 1875 1900 1876 1901 /* ··· 1879 1904 */ 1880 1905 static inline void context_set_sm_pre(struct context_entry *context) 1881 1906 { 1882 - context->lo |= (1 << 4); 1907 + context->lo |= BIT_ULL(4); 1883 1908 } 1884 1909 1885 1910 /* Convert value to context PASID directory size field coding. */ ··· 1904 1929 1905 1930 pr_debug("Set context mapping for %02x:%02x.%d\n", 1906 1931 bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); 1907 - 1908 - BUG_ON(!domain->pgd); 1909 1932 1910 1933 spin_lock(&iommu->lock); 1911 1934 ret = -ENOMEM; ··· 2156 2183 phys_addr_t pteval; 2157 2184 u64 attr; 2158 2185 2159 - BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1)); 2186 + if (unlikely(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1))) 2187 + return -EINVAL; 2160 2188 2161 2189 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) 2162 2190 return -EINVAL; ··· 2315 2341 if (level != 4 && level != 5) 2316 2342 return -EINVAL; 2317 2343 2318 - if (pasid != PASID_RID2PASID) 2319 - flags |= PASID_FLAG_SUPERVISOR_MODE; 2320 2344 if (level == 5) 2321 2345 flags |= PASID_FLAG_FL5LP; 2322 2346 ··· 2769 2797 return ret; 2770 2798 } 2771 2799 2772 - #ifdef CONFIG_INTEL_IOMMU_SVM 2773 - static ioasid_t intel_vcmd_ioasid_alloc(ioasid_t min, ioasid_t max, void *data) 2774 - { 2775 - struct intel_iommu *iommu = data; 2776 - ioasid_t ioasid; 2777 - 2778 - if (!iommu) 2779 - return INVALID_IOASID; 2780 - /* 2781 - * VT-d virtual command interface always uses the full 20 bit 2782 - * PASID range. Host can partition guest PASID range based on 2783 - * policies but it is out of guest's control. 2784 - */ 2785 - if (min < PASID_MIN || max > intel_pasid_max_id) 2786 - return INVALID_IOASID; 2787 - 2788 - if (vcmd_alloc_pasid(iommu, &ioasid)) 2789 - return INVALID_IOASID; 2790 - 2791 - return ioasid; 2792 - } 2793 - 2794 - static void intel_vcmd_ioasid_free(ioasid_t ioasid, void *data) 2795 - { 2796 - struct intel_iommu *iommu = data; 2797 - 2798 - if (!iommu) 2799 - return; 2800 - /* 2801 - * Sanity check the ioasid owner is done at upper layer, e.g. VFIO 2802 - * We can only free the PASID when all the devices are unbound. 2803 - */ 2804 - if (ioasid_find(NULL, ioasid, NULL)) { 2805 - pr_alert("Cannot free active IOASID %d\n", ioasid); 2806 - return; 2807 - } 2808 - vcmd_free_pasid(iommu, ioasid); 2809 - } 2810 - 2811 - static void register_pasid_allocator(struct intel_iommu *iommu) 2812 - { 2813 - /* 2814 - * If we are running in the host, no need for custom allocator 2815 - * in that PASIDs are allocated from the host system-wide. 2816 - */ 2817 - if (!cap_caching_mode(iommu->cap)) 2818 - return; 2819 - 2820 - if (!sm_supported(iommu)) { 2821 - pr_warn("VT-d Scalable Mode not enabled, no PASID allocation\n"); 2822 - return; 2823 - } 2824 - 2825 - /* 2826 - * Register a custom PASID allocator if we are running in a guest, 2827 - * guest PASID must be obtained via virtual command interface. 2828 - * There can be multiple vIOMMUs in each guest but only one allocator 2829 - * is active. All vIOMMU allocators will eventually be calling the same 2830 - * host allocator. 2831 - */ 2832 - if (!vccap_pasid(iommu->vccap)) 2833 - return; 2834 - 2835 - pr_info("Register custom PASID allocator\n"); 2836 - iommu->pasid_allocator.alloc = intel_vcmd_ioasid_alloc; 2837 - iommu->pasid_allocator.free = intel_vcmd_ioasid_free; 2838 - iommu->pasid_allocator.pdata = (void *)iommu; 2839 - if (ioasid_register_allocator(&iommu->pasid_allocator)) { 2840 - pr_warn("Custom PASID allocator failed, scalable mode disabled\n"); 2841 - /* 2842 - * Disable scalable mode on this IOMMU if there 2843 - * is no custom allocator. Mixing SM capable vIOMMU 2844 - * and non-SM vIOMMU are not supported. 2845 - */ 2846 - intel_iommu_sm = 0; 2847 - } 2848 - } 2849 - #endif 2850 - 2851 2800 static int __init init_dmars(void) 2852 2801 { 2853 2802 struct dmar_drhd_unit *drhd; ··· 2857 2964 */ 2858 2965 for_each_active_iommu(iommu, drhd) { 2859 2966 iommu_flush_write_buffer(iommu); 2860 - #ifdef CONFIG_INTEL_IOMMU_SVM 2861 - register_pasid_allocator(iommu); 2862 - #endif 2863 2967 iommu_set_root_entry(iommu); 2864 2968 } 2865 2969 ··· 3650 3760 { 3651 3761 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 3652 3762 u32 ver = readl(iommu->reg + DMAR_VER_REG); 3653 - return sprintf(buf, "%d:%d\n", 3654 - DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver)); 3763 + return sysfs_emit(buf, "%d:%d\n", 3764 + DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver)); 3655 3765 } 3656 3766 static DEVICE_ATTR_RO(version); 3657 3767 ··· 3659 3769 struct device_attribute *attr, char *buf) 3660 3770 { 3661 3771 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 3662 - return sprintf(buf, "%llx\n", iommu->reg_phys); 3772 + return sysfs_emit(buf, "%llx\n", iommu->reg_phys); 3663 3773 } 3664 3774 static DEVICE_ATTR_RO(address); 3665 3775 ··· 3667 3777 struct device_attribute *attr, char *buf) 3668 3778 { 3669 3779 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 3670 - return sprintf(buf, "%llx\n", iommu->cap); 3780 + return sysfs_emit(buf, "%llx\n", iommu->cap); 3671 3781 } 3672 3782 static DEVICE_ATTR_RO(cap); 3673 3783 ··· 3675 3785 struct device_attribute *attr, char *buf) 3676 3786 { 3677 3787 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 3678 - return sprintf(buf, "%llx\n", iommu->ecap); 3788 + return sysfs_emit(buf, "%llx\n", iommu->ecap); 3679 3789 } 3680 3790 static DEVICE_ATTR_RO(ecap); 3681 3791 ··· 3683 3793 struct device_attribute *attr, char *buf) 3684 3794 { 3685 3795 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 3686 - return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap)); 3796 + return sysfs_emit(buf, "%ld\n", cap_ndoms(iommu->cap)); 3687 3797 } 3688 3798 static DEVICE_ATTR_RO(domains_supported); 3689 3799 ··· 3691 3801 struct device_attribute *attr, char *buf) 3692 3802 { 3693 3803 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 3694 - return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids, 3695 - cap_ndoms(iommu->cap))); 3804 + return sysfs_emit(buf, "%d\n", 3805 + bitmap_weight(iommu->domain_ids, 3806 + cap_ndoms(iommu->cap))); 3696 3807 } 3697 3808 static DEVICE_ATTR_RO(domains_used); 3698 3809 ··· 4231 4340 4232 4341 /* Cope with horrid API which requires us to unmap more than the 4233 4342 size argument if it happens to be a large-page mapping. */ 4234 - BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level, 4235 - GFP_ATOMIC)); 4343 + if (unlikely(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 4344 + &level, GFP_ATOMIC))) 4345 + return 0; 4236 4346 4237 4347 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level)) 4238 4348 size = VTD_PAGE_SIZE << level_to_offset_bits(level); ··· 4413 4521 dmar_ats_supported(pdev, iommu)) { 4414 4522 info->ats_supported = 1; 4415 4523 info->dtlb_extra_inval = dev_needs_extra_dtlb_flush(pdev); 4524 + 4525 + /* 4526 + * For IOMMU that supports device IOTLB throttling 4527 + * (DIT), we assign PFSID to the invalidation desc 4528 + * of a VF such that IOMMU HW can gauge queue depth 4529 + * at PF level. If DIT is not set, PFSID will be 4530 + * treated as reserved, which should be set to 0. 4531 + */ 4532 + if (ecap_dit(iommu->ecap)) 4533 + info->pfsid = pci_dev_id(pci_physfn(pdev)); 4534 + info->ats_qdep = pci_ats_queue_depth(pdev); 4416 4535 } 4417 4536 if (sm_supported(iommu)) { 4418 4537 if (pasid_supported(iommu)) { ··· 4541 4638 { 4542 4639 struct device_domain_info *info = dev_iommu_priv_get(dev); 4543 4640 struct intel_iommu *iommu; 4544 - int ret; 4545 4641 4546 4642 if (!info || dmar_disabled) 4547 4643 return -EINVAL; ··· 4552 4650 if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE)) 4553 4651 return -ENODEV; 4554 4652 4555 - if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled) 4653 + if (!info->pasid_enabled || !info->ats_enabled) 4556 4654 return -EINVAL; 4655 + 4656 + /* 4657 + * Devices having device-specific I/O fault handling should not 4658 + * support PCI/PRI. The IOMMU side has no means to check the 4659 + * capability of device-specific IOPF. Therefore, IOMMU can only 4660 + * default that if the device driver enables SVA on a non-PRI 4661 + * device, it will handle IOPF in its own way. 4662 + */ 4663 + if (!info->pri_supported) 4664 + return 0; 4665 + 4666 + /* Devices supporting PRI should have it enabled. */ 4667 + if (!info->pri_enabled) 4668 + return -EINVAL; 4669 + 4670 + return 0; 4671 + } 4672 + 4673 + static int intel_iommu_enable_iopf(struct device *dev) 4674 + { 4675 + struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL; 4676 + struct device_domain_info *info = dev_iommu_priv_get(dev); 4677 + struct intel_iommu *iommu; 4678 + int ret; 4679 + 4680 + if (!pdev || !info || !info->ats_enabled || !info->pri_supported) 4681 + return -ENODEV; 4682 + 4683 + if (info->pri_enabled) 4684 + return -EBUSY; 4685 + 4686 + iommu = info->iommu; 4687 + if (!iommu) 4688 + return -EINVAL; 4689 + 4690 + /* PASID is required in PRG Response Message. */ 4691 + if (info->pasid_enabled && !pci_prg_resp_pasid_required(pdev)) 4692 + return -EINVAL; 4693 + 4694 + ret = pci_reset_pri(pdev); 4695 + if (ret) 4696 + return ret; 4557 4697 4558 4698 ret = iopf_queue_add_device(iommu->iopf_queue, dev); 4559 4699 if (ret) ··· 4603 4659 4604 4660 ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev); 4605 4661 if (ret) 4606 - iopf_queue_remove_device(iommu->iopf_queue, dev); 4662 + goto iopf_remove_device; 4663 + 4664 + ret = pci_enable_pri(pdev, PRQ_DEPTH); 4665 + if (ret) 4666 + goto iopf_unregister_handler; 4667 + info->pri_enabled = 1; 4668 + 4669 + return 0; 4670 + 4671 + iopf_unregister_handler: 4672 + iommu_unregister_device_fault_handler(dev); 4673 + iopf_remove_device: 4674 + iopf_queue_remove_device(iommu->iopf_queue, dev); 4607 4675 4608 4676 return ret; 4609 4677 } 4610 4678 4611 - static int intel_iommu_disable_sva(struct device *dev) 4679 + static int intel_iommu_disable_iopf(struct device *dev) 4612 4680 { 4613 4681 struct device_domain_info *info = dev_iommu_priv_get(dev); 4614 4682 struct intel_iommu *iommu = info->iommu; 4615 - int ret; 4616 4683 4617 - ret = iommu_unregister_device_fault_handler(dev); 4618 - if (ret) 4619 - return ret; 4684 + if (!info->pri_enabled) 4685 + return -EINVAL; 4620 4686 4621 - ret = iopf_queue_remove_device(iommu->iopf_queue, dev); 4622 - if (ret) 4623 - iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev); 4687 + /* 4688 + * PCIe spec states that by clearing PRI enable bit, the Page 4689 + * Request Interface will not issue new page requests, but has 4690 + * outstanding page requests that have been transmitted or are 4691 + * queued for transmission. This is supposed to be called after 4692 + * the device driver has stopped DMA, all PASIDs have been 4693 + * unbound and the outstanding PRQs have been drained. 4694 + */ 4695 + pci_disable_pri(to_pci_dev(dev)); 4696 + info->pri_enabled = 0; 4624 4697 4625 - return ret; 4626 - } 4698 + /* 4699 + * With PRI disabled and outstanding PRQs drained, unregistering 4700 + * fault handler and removing device from iopf queue should never 4701 + * fail. 4702 + */ 4703 + WARN_ON(iommu_unregister_device_fault_handler(dev)); 4704 + WARN_ON(iopf_queue_remove_device(iommu->iopf_queue, dev)); 4627 4705 4628 - static int intel_iommu_enable_iopf(struct device *dev) 4629 - { 4630 - struct device_domain_info *info = dev_iommu_priv_get(dev); 4631 - 4632 - if (info && info->pri_supported) 4633 - return 0; 4634 - 4635 - return -ENODEV; 4706 + return 0; 4636 4707 } 4637 4708 4638 4709 static int ··· 4670 4711 { 4671 4712 switch (feat) { 4672 4713 case IOMMU_DEV_FEAT_IOPF: 4673 - return 0; 4714 + return intel_iommu_disable_iopf(dev); 4674 4715 4675 4716 case IOMMU_DEV_FEAT_SVA: 4676 - return intel_iommu_disable_sva(dev); 4717 + return 0; 4677 4718 4678 4719 default: 4679 4720 return -ENODEV;
+16 -19
drivers/iommu/intel/iommu.h
··· 19 19 #include <linux/iommu.h> 20 20 #include <linux/io-64-nonatomic-lo-hi.h> 21 21 #include <linux/dmar.h> 22 - #include <linux/ioasid.h> 23 22 #include <linux/bitfield.h> 24 23 #include <linux/xarray.h> 25 24 #include <linux/perf_event.h> ··· 197 198 #define ecap_flts(e) (((e) >> 47) & 0x1) 198 199 #define ecap_slts(e) (((e) >> 46) & 0x1) 199 200 #define ecap_slads(e) (((e) >> 45) & 0x1) 200 - #define ecap_vcs(e) (((e) >> 44) & 0x1) 201 201 #define ecap_smts(e) (((e) >> 43) & 0x1) 202 202 #define ecap_dit(e) (((e) >> 41) & 0x1) 203 203 #define ecap_pds(e) (((e) >> 42) & 0x1) ··· 676 678 unsigned char prq_name[16]; /* Name for PRQ interrupt */ 677 679 unsigned long prq_seq_number; 678 680 struct completion prq_complete; 679 - struct ioasid_allocator_ops pasid_allocator; /* Custom allocator for PASIDs */ 680 681 #endif 681 682 struct iopf_queue *iopf_queue; 682 683 unsigned char iopfq_name[16]; ··· 795 798 return (context->lo & 1); 796 799 } 797 800 798 - extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); 801 + struct dmar_drhd_unit *dmar_find_matched_drhd_unit(struct pci_dev *dev); 799 802 800 - extern int dmar_enable_qi(struct intel_iommu *iommu); 801 - extern void dmar_disable_qi(struct intel_iommu *iommu); 802 - extern int dmar_reenable_qi(struct intel_iommu *iommu); 803 - extern void qi_global_iec(struct intel_iommu *iommu); 803 + int dmar_enable_qi(struct intel_iommu *iommu); 804 + void dmar_disable_qi(struct intel_iommu *iommu); 805 + int dmar_reenable_qi(struct intel_iommu *iommu); 806 + void qi_global_iec(struct intel_iommu *iommu); 804 807 805 - extern void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, 806 - u8 fm, u64 type); 807 - extern void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, 808 - unsigned int size_order, u64 type); 809 - extern void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, 808 + void qi_flush_context(struct intel_iommu *iommu, u16 did, 809 + u16 sid, u8 fm, u64 type); 810 + void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, 811 + unsigned int size_order, u64 type); 812 + void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, 810 813 u16 qdep, u64 addr, unsigned mask); 811 814 812 815 void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr, ··· 829 832 */ 830 833 #define QI_OPT_WAIT_DRAIN BIT(0) 831 834 832 - extern int dmar_ir_support(void); 835 + int dmar_ir_support(void); 833 836 834 837 void *alloc_pgtable_page(int node, gfp_t gfp); 835 838 void free_pgtable_page(void *vaddr); ··· 837 840 struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn); 838 841 839 842 #ifdef CONFIG_INTEL_IOMMU_SVM 840 - extern void intel_svm_check(struct intel_iommu *iommu); 841 - extern int intel_svm_enable_prq(struct intel_iommu *iommu); 842 - extern int intel_svm_finish_prq(struct intel_iommu *iommu); 843 + void intel_svm_check(struct intel_iommu *iommu); 844 + int intel_svm_enable_prq(struct intel_iommu *iommu); 845 + int intel_svm_finish_prq(struct intel_iommu *iommu); 843 846 int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt, 844 847 struct iommu_page_response *msg); 845 848 struct iommu_domain *intel_svm_domain_alloc(void); ··· 886 889 887 890 #ifdef CONFIG_INTEL_IOMMU 888 891 extern int intel_iommu_sm; 889 - extern int iommu_calculate_agaw(struct intel_iommu *iommu); 890 - extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); 892 + int iommu_calculate_agaw(struct intel_iommu *iommu); 893 + int iommu_calculate_max_sagaw(struct intel_iommu *iommu); 891 894 int ecmd_submit_sync(struct intel_iommu *iommu, u8 ecmd, u64 oa, u64 ob); 892 895 893 896 static inline bool ecmd_has_pmu_essential(struct intel_iommu *iommu)
+1 -1
drivers/iommu/intel/irq_remapping.c
··· 548 548 goto out_free_table; 549 549 } 550 550 551 - bitmap = bitmap_zalloc(INTR_REMAP_TABLE_ENTRIES, GFP_ATOMIC); 551 + bitmap = bitmap_zalloc(INTR_REMAP_TABLE_ENTRIES, GFP_KERNEL); 552 552 if (bitmap == NULL) { 553 553 pr_err("IR%d: failed to allocate bitmap\n", iommu->seq_id); 554 554 goto out_free_pages;
-43
drivers/iommu/intel/pasid.c
··· 336 336 } 337 337 338 338 /* 339 - * Setup the SRE(Supervisor Request Enable) field (Bit 128) of a 340 - * scalable mode PASID entry. 341 - */ 342 - static inline void pasid_set_sre(struct pasid_entry *pe) 343 - { 344 - pasid_set_bits(&pe->val[2], 1 << 0, 1); 345 - } 346 - 347 - /* 348 339 * Setup the WPE(Write Protect Enable) field (Bit 132) of a 349 340 * scalable mode PASID entry. 350 341 */ ··· 512 521 return -EINVAL; 513 522 } 514 523 515 - if (flags & PASID_FLAG_SUPERVISOR_MODE) { 516 - #ifdef CONFIG_X86 517 - unsigned long cr0 = read_cr0(); 518 - 519 - /* CR0.WP is normally set but just to be sure */ 520 - if (unlikely(!(cr0 & X86_CR0_WP))) { 521 - pr_err("No CPU write protect!\n"); 522 - return -EINVAL; 523 - } 524 - #endif 525 - if (!ecap_srs(iommu->ecap)) { 526 - pr_err("No supervisor request support on %s\n", 527 - iommu->name); 528 - return -EINVAL; 529 - } 530 - } 531 - 532 524 if ((flags & PASID_FLAG_FL5LP) && !cap_fl5lp_support(iommu->cap)) { 533 525 pr_err("No 5-level paging support for first-level on %s\n", 534 526 iommu->name); ··· 534 560 535 561 /* Setup the first level page table pointer: */ 536 562 pasid_set_flptr(pte, (u64)__pa(pgd)); 537 - if (flags & PASID_FLAG_SUPERVISOR_MODE) { 538 - pasid_set_sre(pte); 539 - pasid_set_wpe(pte); 540 - } 541 563 542 564 if (flags & PASID_FLAG_FL5LP) 543 565 pasid_set_flpm(pte, 1); ··· 628 658 pasid_set_fault_enable(pte); 629 659 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); 630 660 631 - /* 632 - * Since it is a second level only translation setup, we should 633 - * set SRE bit as well (addresses are expected to be GPAs). 634 - */ 635 - if (pasid != PASID_RID2PASID && ecap_srs(iommu->ecap)) 636 - pasid_set_sre(pte); 637 661 pasid_set_present(pte); 638 662 spin_unlock(&iommu->lock); 639 663 ··· 664 700 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_PT); 665 701 pasid_set_fault_enable(pte); 666 702 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); 667 - 668 - /* 669 - * We should set SRE bit as well since the addresses are expected 670 - * to be GPAs. 671 - */ 672 - if (ecap_srs(iommu->ecap)) 673 - pasid_set_sre(pte); 674 703 pasid_set_present(pte); 675 704 spin_unlock(&iommu->lock); 676 705
-7
drivers/iommu/intel/pasid.h
··· 41 41 #define FLPT_DEFAULT_DID 1 42 42 #define NUM_RESERVED_DID 2 43 43 44 - /* 45 - * The SUPERVISOR_MODE flag indicates a first level translation which 46 - * can be used for access to kernel addresses. It is valid only for 47 - * access to the kernel's static 1:1 mapping of physical memory — not 48 - * to vmalloc or even module mappings. 49 - */ 50 - #define PASID_FLAG_SUPERVISOR_MODE BIT(0) 51 44 #define PASID_FLAG_NESTED BIT(1) 52 45 #define PASID_FLAG_PAGE_SNOOP BIT(2) 53 46
+1 -2
drivers/iommu/intel/svm.c
··· 16 16 #include <linux/interrupt.h> 17 17 #include <linux/mm_types.h> 18 18 #include <linux/xarray.h> 19 - #include <linux/ioasid.h> 20 19 #include <asm/page.h> 21 20 #include <asm/fpu/api.h> 22 21 ··· 272 273 if (WARN_ON(!mutex_is_locked(&pasid_mutex))) 273 274 return -EINVAL; 274 275 275 - if (pasid == INVALID_IOASID || pasid >= PASID_MAX) 276 + if (pasid == IOMMU_PASID_INVALID || pasid >= PASID_MAX) 276 277 return -EINVAL; 277 278 278 279 svm = pasid_private_find(pasid);
-422
drivers/iommu/ioasid.c
··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - /* 3 - * I/O Address Space ID allocator. There is one global IOASID space, split into 4 - * subsets. Users create a subset with DECLARE_IOASID_SET, then allocate and 5 - * free IOASIDs with ioasid_alloc() and ioasid_free(). 6 - */ 7 - #include <linux/ioasid.h> 8 - #include <linux/module.h> 9 - #include <linux/slab.h> 10 - #include <linux/spinlock.h> 11 - #include <linux/xarray.h> 12 - 13 - struct ioasid_data { 14 - ioasid_t id; 15 - struct ioasid_set *set; 16 - void *private; 17 - struct rcu_head rcu; 18 - }; 19 - 20 - /* 21 - * struct ioasid_allocator_data - Internal data structure to hold information 22 - * about an allocator. There are two types of allocators: 23 - * 24 - * - Default allocator always has its own XArray to track the IOASIDs allocated. 25 - * - Custom allocators may share allocation helpers with different private data. 26 - * Custom allocators that share the same helper functions also share the same 27 - * XArray. 28 - * Rules: 29 - * 1. Default allocator is always available, not dynamically registered. This is 30 - * to prevent race conditions with early boot code that want to register 31 - * custom allocators or allocate IOASIDs. 32 - * 2. Custom allocators take precedence over the default allocator. 33 - * 3. When all custom allocators sharing the same helper functions are 34 - * unregistered (e.g. due to hotplug), all outstanding IOASIDs must be 35 - * freed. Otherwise, outstanding IOASIDs will be lost and orphaned. 36 - * 4. When switching between custom allocators sharing the same helper 37 - * functions, outstanding IOASIDs are preserved. 38 - * 5. When switching between custom allocator and default allocator, all IOASIDs 39 - * must be freed to ensure unadulterated space for the new allocator. 40 - * 41 - * @ops: allocator helper functions and its data 42 - * @list: registered custom allocators 43 - * @slist: allocators share the same ops but different data 44 - * @flags: attributes of the allocator 45 - * @xa: xarray holds the IOASID space 46 - * @rcu: used for kfree_rcu when unregistering allocator 47 - */ 48 - struct ioasid_allocator_data { 49 - struct ioasid_allocator_ops *ops; 50 - struct list_head list; 51 - struct list_head slist; 52 - #define IOASID_ALLOCATOR_CUSTOM BIT(0) /* Needs framework to track results */ 53 - unsigned long flags; 54 - struct xarray xa; 55 - struct rcu_head rcu; 56 - }; 57 - 58 - static DEFINE_SPINLOCK(ioasid_allocator_lock); 59 - static LIST_HEAD(allocators_list); 60 - 61 - static ioasid_t default_alloc(ioasid_t min, ioasid_t max, void *opaque); 62 - static void default_free(ioasid_t ioasid, void *opaque); 63 - 64 - static struct ioasid_allocator_ops default_ops = { 65 - .alloc = default_alloc, 66 - .free = default_free, 67 - }; 68 - 69 - static struct ioasid_allocator_data default_allocator = { 70 - .ops = &default_ops, 71 - .flags = 0, 72 - .xa = XARRAY_INIT(ioasid_xa, XA_FLAGS_ALLOC), 73 - }; 74 - 75 - static struct ioasid_allocator_data *active_allocator = &default_allocator; 76 - 77 - static ioasid_t default_alloc(ioasid_t min, ioasid_t max, void *opaque) 78 - { 79 - ioasid_t id; 80 - 81 - if (xa_alloc(&default_allocator.xa, &id, opaque, XA_LIMIT(min, max), GFP_ATOMIC)) { 82 - pr_err("Failed to alloc ioasid from %d to %d\n", min, max); 83 - return INVALID_IOASID; 84 - } 85 - 86 - return id; 87 - } 88 - 89 - static void default_free(ioasid_t ioasid, void *opaque) 90 - { 91 - struct ioasid_data *ioasid_data; 92 - 93 - ioasid_data = xa_erase(&default_allocator.xa, ioasid); 94 - kfree_rcu(ioasid_data, rcu); 95 - } 96 - 97 - /* Allocate and initialize a new custom allocator with its helper functions */ 98 - static struct ioasid_allocator_data *ioasid_alloc_allocator(struct ioasid_allocator_ops *ops) 99 - { 100 - struct ioasid_allocator_data *ia_data; 101 - 102 - ia_data = kzalloc(sizeof(*ia_data), GFP_ATOMIC); 103 - if (!ia_data) 104 - return NULL; 105 - 106 - xa_init_flags(&ia_data->xa, XA_FLAGS_ALLOC); 107 - INIT_LIST_HEAD(&ia_data->slist); 108 - ia_data->flags |= IOASID_ALLOCATOR_CUSTOM; 109 - ia_data->ops = ops; 110 - 111 - /* For tracking custom allocators that share the same ops */ 112 - list_add_tail(&ops->list, &ia_data->slist); 113 - 114 - return ia_data; 115 - } 116 - 117 - static bool use_same_ops(struct ioasid_allocator_ops *a, struct ioasid_allocator_ops *b) 118 - { 119 - return (a->free == b->free) && (a->alloc == b->alloc); 120 - } 121 - 122 - /** 123 - * ioasid_register_allocator - register a custom allocator 124 - * @ops: the custom allocator ops to be registered 125 - * 126 - * Custom allocators take precedence over the default xarray based allocator. 127 - * Private data associated with the IOASID allocated by the custom allocators 128 - * are managed by IOASID framework similar to data stored in xa by default 129 - * allocator. 130 - * 131 - * There can be multiple allocators registered but only one is active. In case 132 - * of runtime removal of a custom allocator, the next one is activated based 133 - * on the registration ordering. 134 - * 135 - * Multiple allocators can share the same alloc() function, in this case the 136 - * IOASID space is shared. 137 - */ 138 - int ioasid_register_allocator(struct ioasid_allocator_ops *ops) 139 - { 140 - struct ioasid_allocator_data *ia_data; 141 - struct ioasid_allocator_data *pallocator; 142 - int ret = 0; 143 - 144 - spin_lock(&ioasid_allocator_lock); 145 - 146 - ia_data = ioasid_alloc_allocator(ops); 147 - if (!ia_data) { 148 - ret = -ENOMEM; 149 - goto out_unlock; 150 - } 151 - 152 - /* 153 - * No particular preference, we activate the first one and keep 154 - * the later registered allocators in a list in case the first one gets 155 - * removed due to hotplug. 156 - */ 157 - if (list_empty(&allocators_list)) { 158 - WARN_ON(active_allocator != &default_allocator); 159 - /* Use this new allocator if default is not active */ 160 - if (xa_empty(&active_allocator->xa)) { 161 - rcu_assign_pointer(active_allocator, ia_data); 162 - list_add_tail(&ia_data->list, &allocators_list); 163 - goto out_unlock; 164 - } 165 - pr_warn("Default allocator active with outstanding IOASID\n"); 166 - ret = -EAGAIN; 167 - goto out_free; 168 - } 169 - 170 - /* Check if the allocator is already registered */ 171 - list_for_each_entry(pallocator, &allocators_list, list) { 172 - if (pallocator->ops == ops) { 173 - pr_err("IOASID allocator already registered\n"); 174 - ret = -EEXIST; 175 - goto out_free; 176 - } else if (use_same_ops(pallocator->ops, ops)) { 177 - /* 178 - * If the new allocator shares the same ops, 179 - * then they will share the same IOASID space. 180 - * We should put them under the same xarray. 181 - */ 182 - list_add_tail(&ops->list, &pallocator->slist); 183 - goto out_free; 184 - } 185 - } 186 - list_add_tail(&ia_data->list, &allocators_list); 187 - 188 - spin_unlock(&ioasid_allocator_lock); 189 - return 0; 190 - out_free: 191 - kfree(ia_data); 192 - out_unlock: 193 - spin_unlock(&ioasid_allocator_lock); 194 - return ret; 195 - } 196 - EXPORT_SYMBOL_GPL(ioasid_register_allocator); 197 - 198 - /** 199 - * ioasid_unregister_allocator - Remove a custom IOASID allocator ops 200 - * @ops: the custom allocator to be removed 201 - * 202 - * Remove an allocator from the list, activate the next allocator in 203 - * the order it was registered. Or revert to default allocator if all 204 - * custom allocators are unregistered without outstanding IOASIDs. 205 - */ 206 - void ioasid_unregister_allocator(struct ioasid_allocator_ops *ops) 207 - { 208 - struct ioasid_allocator_data *pallocator; 209 - struct ioasid_allocator_ops *sops; 210 - 211 - spin_lock(&ioasid_allocator_lock); 212 - if (list_empty(&allocators_list)) { 213 - pr_warn("No custom IOASID allocators active!\n"); 214 - goto exit_unlock; 215 - } 216 - 217 - list_for_each_entry(pallocator, &allocators_list, list) { 218 - if (!use_same_ops(pallocator->ops, ops)) 219 - continue; 220 - 221 - if (list_is_singular(&pallocator->slist)) { 222 - /* No shared helper functions */ 223 - list_del(&pallocator->list); 224 - /* 225 - * All IOASIDs should have been freed before 226 - * the last allocator that shares the same ops 227 - * is unregistered. 228 - */ 229 - WARN_ON(!xa_empty(&pallocator->xa)); 230 - if (list_empty(&allocators_list)) { 231 - pr_info("No custom IOASID allocators, switch to default.\n"); 232 - rcu_assign_pointer(active_allocator, &default_allocator); 233 - } else if (pallocator == active_allocator) { 234 - rcu_assign_pointer(active_allocator, 235 - list_first_entry(&allocators_list, 236 - struct ioasid_allocator_data, list)); 237 - pr_info("IOASID allocator changed"); 238 - } 239 - kfree_rcu(pallocator, rcu); 240 - break; 241 - } 242 - /* 243 - * Find the matching shared ops to delete, 244 - * but keep outstanding IOASIDs 245 - */ 246 - list_for_each_entry(sops, &pallocator->slist, list) { 247 - if (sops == ops) { 248 - list_del(&ops->list); 249 - break; 250 - } 251 - } 252 - break; 253 - } 254 - 255 - exit_unlock: 256 - spin_unlock(&ioasid_allocator_lock); 257 - } 258 - EXPORT_SYMBOL_GPL(ioasid_unregister_allocator); 259 - 260 - /** 261 - * ioasid_set_data - Set private data for an allocated ioasid 262 - * @ioasid: the ID to set data 263 - * @data: the private data 264 - * 265 - * For IOASID that is already allocated, private data can be set 266 - * via this API. Future lookup can be done via ioasid_find. 267 - */ 268 - int ioasid_set_data(ioasid_t ioasid, void *data) 269 - { 270 - struct ioasid_data *ioasid_data; 271 - int ret = 0; 272 - 273 - spin_lock(&ioasid_allocator_lock); 274 - ioasid_data = xa_load(&active_allocator->xa, ioasid); 275 - if (ioasid_data) 276 - rcu_assign_pointer(ioasid_data->private, data); 277 - else 278 - ret = -ENOENT; 279 - spin_unlock(&ioasid_allocator_lock); 280 - 281 - /* 282 - * Wait for readers to stop accessing the old private data, so the 283 - * caller can free it. 284 - */ 285 - if (!ret) 286 - synchronize_rcu(); 287 - 288 - return ret; 289 - } 290 - EXPORT_SYMBOL_GPL(ioasid_set_data); 291 - 292 - /** 293 - * ioasid_alloc - Allocate an IOASID 294 - * @set: the IOASID set 295 - * @min: the minimum ID (inclusive) 296 - * @max: the maximum ID (inclusive) 297 - * @private: data private to the caller 298 - * 299 - * Allocate an ID between @min and @max. The @private pointer is stored 300 - * internally and can be retrieved with ioasid_find(). 301 - * 302 - * Return: the allocated ID on success, or %INVALID_IOASID on failure. 303 - */ 304 - ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max, 305 - void *private) 306 - { 307 - struct ioasid_data *data; 308 - void *adata; 309 - ioasid_t id; 310 - 311 - data = kzalloc(sizeof(*data), GFP_ATOMIC); 312 - if (!data) 313 - return INVALID_IOASID; 314 - 315 - data->set = set; 316 - data->private = private; 317 - 318 - /* 319 - * Custom allocator needs allocator data to perform platform specific 320 - * operations. 321 - */ 322 - spin_lock(&ioasid_allocator_lock); 323 - adata = active_allocator->flags & IOASID_ALLOCATOR_CUSTOM ? active_allocator->ops->pdata : data; 324 - id = active_allocator->ops->alloc(min, max, adata); 325 - if (id == INVALID_IOASID) { 326 - pr_err("Failed ASID allocation %lu\n", active_allocator->flags); 327 - goto exit_free; 328 - } 329 - 330 - if ((active_allocator->flags & IOASID_ALLOCATOR_CUSTOM) && 331 - xa_alloc(&active_allocator->xa, &id, data, XA_LIMIT(id, id), GFP_ATOMIC)) { 332 - /* Custom allocator needs framework to store and track allocation results */ 333 - pr_err("Failed to alloc ioasid from %d\n", id); 334 - active_allocator->ops->free(id, active_allocator->ops->pdata); 335 - goto exit_free; 336 - } 337 - data->id = id; 338 - 339 - spin_unlock(&ioasid_allocator_lock); 340 - return id; 341 - exit_free: 342 - spin_unlock(&ioasid_allocator_lock); 343 - kfree(data); 344 - return INVALID_IOASID; 345 - } 346 - EXPORT_SYMBOL_GPL(ioasid_alloc); 347 - 348 - /** 349 - * ioasid_free - Free an ioasid 350 - * @ioasid: the ID to remove 351 - */ 352 - void ioasid_free(ioasid_t ioasid) 353 - { 354 - struct ioasid_data *ioasid_data; 355 - 356 - spin_lock(&ioasid_allocator_lock); 357 - ioasid_data = xa_load(&active_allocator->xa, ioasid); 358 - if (!ioasid_data) { 359 - pr_err("Trying to free unknown IOASID %u\n", ioasid); 360 - goto exit_unlock; 361 - } 362 - 363 - active_allocator->ops->free(ioasid, active_allocator->ops->pdata); 364 - /* Custom allocator needs additional steps to free the xa element */ 365 - if (active_allocator->flags & IOASID_ALLOCATOR_CUSTOM) { 366 - ioasid_data = xa_erase(&active_allocator->xa, ioasid); 367 - kfree_rcu(ioasid_data, rcu); 368 - } 369 - 370 - exit_unlock: 371 - spin_unlock(&ioasid_allocator_lock); 372 - } 373 - EXPORT_SYMBOL_GPL(ioasid_free); 374 - 375 - /** 376 - * ioasid_find - Find IOASID data 377 - * @set: the IOASID set 378 - * @ioasid: the IOASID to find 379 - * @getter: function to call on the found object 380 - * 381 - * The optional getter function allows to take a reference to the found object 382 - * under the rcu lock. The function can also check if the object is still valid: 383 - * if @getter returns false, then the object is invalid and NULL is returned. 384 - * 385 - * If the IOASID exists, return the private pointer passed to ioasid_alloc. 386 - * Private data can be NULL if not set. Return an error if the IOASID is not 387 - * found, or if @set is not NULL and the IOASID does not belong to the set. 388 - */ 389 - void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, 390 - bool (*getter)(void *)) 391 - { 392 - void *priv; 393 - struct ioasid_data *ioasid_data; 394 - struct ioasid_allocator_data *idata; 395 - 396 - rcu_read_lock(); 397 - idata = rcu_dereference(active_allocator); 398 - ioasid_data = xa_load(&idata->xa, ioasid); 399 - if (!ioasid_data) { 400 - priv = ERR_PTR(-ENOENT); 401 - goto unlock; 402 - } 403 - if (set && ioasid_data->set != set) { 404 - /* data found but does not belong to the set */ 405 - priv = ERR_PTR(-EACCES); 406 - goto unlock; 407 - } 408 - /* Now IOASID and its set is verified, we can return the private data */ 409 - priv = rcu_dereference(ioasid_data->private); 410 - if (getter && !getter(priv)) 411 - priv = NULL; 412 - unlock: 413 - rcu_read_unlock(); 414 - 415 - return priv; 416 - } 417 - EXPORT_SYMBOL_GPL(ioasid_find); 418 - 419 - MODULE_AUTHOR("Jean-Philippe Brucker <jean-philippe.brucker@arm.com>"); 420 - MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@linux.intel.com>"); 421 - MODULE_DESCRIPTION("IO Address Space ID (IOASID) allocator"); 422 - MODULE_LICENSE("GPL");
+19 -43
drivers/iommu/iommu-sva.c
··· 10 10 #include "iommu-sva.h" 11 11 12 12 static DEFINE_MUTEX(iommu_sva_lock); 13 - static DECLARE_IOASID_SET(iommu_sva_pasid); 13 + static DEFINE_IDA(iommu_global_pasid_ida); 14 14 15 - /** 16 - * iommu_sva_alloc_pasid - Allocate a PASID for the mm 17 - * @mm: the mm 18 - * @min: minimum PASID value (inclusive) 19 - * @max: maximum PASID value (inclusive) 20 - * 21 - * Try to allocate a PASID for this mm, or take a reference to the existing one 22 - * provided it fits within the [@min, @max] range. On success the PASID is 23 - * available in mm->pasid and will be available for the lifetime of the mm. 24 - * 25 - * Returns 0 on success and < 0 on error. 26 - */ 27 - int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max) 15 + /* Allocate a PASID for the mm within range (inclusive) */ 16 + static int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max) 28 17 { 29 18 int ret = 0; 30 - ioasid_t pasid; 31 19 32 - if (min == INVALID_IOASID || max == INVALID_IOASID || 20 + if (min == IOMMU_PASID_INVALID || 21 + max == IOMMU_PASID_INVALID || 33 22 min == 0 || max < min) 34 23 return -EINVAL; 35 24 ··· 28 39 mutex_lock(&iommu_sva_lock); 29 40 /* Is a PASID already associated with this mm? */ 30 41 if (mm_valid_pasid(mm)) { 31 - if (mm->pasid < min || mm->pasid >= max) 42 + if (mm->pasid < min || mm->pasid > max) 32 43 ret = -EOVERFLOW; 33 44 goto out; 34 45 } 35 46 36 - pasid = ioasid_alloc(&iommu_sva_pasid, min, max, mm); 37 - if (pasid == INVALID_IOASID) 38 - ret = -ENOMEM; 39 - else 40 - mm_pasid_set(mm, pasid); 47 + ret = ida_alloc_range(&iommu_global_pasid_ida, min, max, GFP_KERNEL); 48 + if (ret < min) 49 + goto out; 50 + mm->pasid = ret; 51 + ret = 0; 41 52 out: 42 53 mutex_unlock(&iommu_sva_lock); 43 54 return ret; 44 55 } 45 - EXPORT_SYMBOL_GPL(iommu_sva_alloc_pasid); 46 - 47 - /* ioasid_find getter() requires a void * argument */ 48 - static bool __mmget_not_zero(void *mm) 49 - { 50 - return mmget_not_zero(mm); 51 - } 52 - 53 - /** 54 - * iommu_sva_find() - Find mm associated to the given PASID 55 - * @pasid: Process Address Space ID assigned to the mm 56 - * 57 - * On success a reference to the mm is taken, and must be released with mmput(). 58 - * 59 - * Returns the mm corresponding to this PASID, or an error if not found. 60 - */ 61 - struct mm_struct *iommu_sva_find(ioasid_t pasid) 62 - { 63 - return ioasid_find(&iommu_sva_pasid, pasid, __mmget_not_zero); 64 - } 65 - EXPORT_SYMBOL_GPL(iommu_sva_find); 66 56 67 57 /** 68 58 * iommu_sva_bind_device() - Bind a process address space to a device ··· 209 241 mmput(mm); 210 242 211 243 return status; 244 + } 245 + 246 + void mm_pasid_drop(struct mm_struct *mm) 247 + { 248 + if (likely(!mm_valid_pasid(mm))) 249 + return; 250 + 251 + ida_free(&iommu_global_pasid_ida, mm->pasid); 212 252 }
-4
drivers/iommu/iommu-sva.h
··· 5 5 #ifndef _IOMMU_SVA_H 6 6 #define _IOMMU_SVA_H 7 7 8 - #include <linux/ioasid.h> 9 8 #include <linux/mm_types.h> 10 - 11 - int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max); 12 - struct mm_struct *iommu_sva_find(ioasid_t pasid); 13 9 14 10 /* I/O Page fault */ 15 11 struct device;
+143 -204
drivers/iommu/iommu.c
··· 88 88 89 89 static int iommu_bus_notifier(struct notifier_block *nb, 90 90 unsigned long action, void *data); 91 + static void iommu_release_device(struct device *dev); 91 92 static int iommu_alloc_default_domain(struct iommu_group *group, 92 93 struct device *dev); 93 94 static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus, ··· 458 457 459 458 } 460 459 461 - void iommu_release_device(struct device *dev) 460 + /* 461 + * Remove a device from a group's device list and return the group device 462 + * if successful. 463 + */ 464 + static struct group_device * 465 + __iommu_group_remove_device(struct iommu_group *group, struct device *dev) 462 466 { 467 + struct group_device *device; 468 + 469 + lockdep_assert_held(&group->mutex); 470 + list_for_each_entry(device, &group->devices, list) { 471 + if (device->dev == dev) { 472 + list_del(&device->list); 473 + return device; 474 + } 475 + } 476 + 477 + return NULL; 478 + } 479 + 480 + /* 481 + * Release a device from its group and decrements the iommu group reference 482 + * count. 483 + */ 484 + static void __iommu_group_release_device(struct iommu_group *group, 485 + struct group_device *grp_dev) 486 + { 487 + struct device *dev = grp_dev->dev; 488 + 489 + sysfs_remove_link(group->devices_kobj, grp_dev->name); 490 + sysfs_remove_link(&dev->kobj, "iommu_group"); 491 + 492 + trace_remove_device_from_group(group->id, dev); 493 + 494 + kfree(grp_dev->name); 495 + kfree(grp_dev); 496 + dev->iommu_group = NULL; 497 + kobject_put(group->devices_kobj); 498 + } 499 + 500 + static void iommu_release_device(struct device *dev) 501 + { 502 + struct iommu_group *group = dev->iommu_group; 503 + struct group_device *device; 463 504 const struct iommu_ops *ops; 464 505 465 - if (!dev->iommu) 506 + if (!dev->iommu || !group) 466 507 return; 467 508 468 509 iommu_device_unlink(dev->iommu->iommu_dev, dev); 469 510 511 + mutex_lock(&group->mutex); 512 + device = __iommu_group_remove_device(group, dev); 513 + 514 + /* 515 + * If the group has become empty then ownership must have been released, 516 + * and the current domain must be set back to NULL or the default 517 + * domain. 518 + */ 519 + if (list_empty(&group->devices)) 520 + WARN_ON(group->owner_cnt || 521 + group->domain != group->default_domain); 522 + 523 + /* 524 + * release_device() must stop using any attached domain on the device. 525 + * If there are still other devices in the group they are not effected 526 + * by this callback. 527 + * 528 + * The IOMMU driver must set the device to either an identity or 529 + * blocking translation and stop using any domain pointer, as it is 530 + * going to be freed. 531 + */ 470 532 ops = dev_iommu_ops(dev); 471 533 if (ops->release_device) 472 534 ops->release_device(dev); 535 + mutex_unlock(&group->mutex); 473 536 474 - iommu_group_remove_device(dev); 537 + if (device) 538 + __iommu_group_release_device(group, device); 539 + 475 540 module_put(ops->owner); 476 541 dev_iommu_free(dev); 477 542 } ··· 621 554 622 555 static ssize_t iommu_group_show_name(struct iommu_group *group, char *buf) 623 556 { 624 - return sprintf(buf, "%s\n", group->name); 557 + return sysfs_emit(buf, "%s\n", group->name); 625 558 } 626 559 627 560 /** ··· 734 667 { 735 668 struct iommu_resv_region *region, *next; 736 669 struct list_head group_resv_regions; 737 - char *str = buf; 670 + int offset = 0; 738 671 739 672 INIT_LIST_HEAD(&group_resv_regions); 740 673 iommu_get_group_resv_regions(group, &group_resv_regions); 741 674 742 675 list_for_each_entry_safe(region, next, &group_resv_regions, list) { 743 - str += sprintf(str, "0x%016llx 0x%016llx %s\n", 744 - (long long int)region->start, 745 - (long long int)(region->start + 746 - region->length - 1), 747 - iommu_group_resv_type_string[region->type]); 676 + offset += sysfs_emit_at(buf, offset, "0x%016llx 0x%016llx %s\n", 677 + (long long)region->start, 678 + (long long)(region->start + 679 + region->length - 1), 680 + iommu_group_resv_type_string[region->type]); 748 681 kfree(region); 749 682 } 750 683 751 - return (str - buf); 684 + return offset; 752 685 } 753 686 754 687 static ssize_t iommu_group_show_type(struct iommu_group *group, 755 688 char *buf) 756 689 { 757 - char *type = "unknown\n"; 690 + char *type = "unknown"; 758 691 759 692 mutex_lock(&group->mutex); 760 693 if (group->default_domain) { 761 694 switch (group->default_domain->type) { 762 695 case IOMMU_DOMAIN_BLOCKED: 763 - type = "blocked\n"; 696 + type = "blocked"; 764 697 break; 765 698 case IOMMU_DOMAIN_IDENTITY: 766 - type = "identity\n"; 699 + type = "identity"; 767 700 break; 768 701 case IOMMU_DOMAIN_UNMANAGED: 769 - type = "unmanaged\n"; 702 + type = "unmanaged"; 770 703 break; 771 704 case IOMMU_DOMAIN_DMA: 772 - type = "DMA\n"; 705 + type = "DMA"; 773 706 break; 774 707 case IOMMU_DOMAIN_DMA_FQ: 775 - type = "DMA-FQ\n"; 708 + type = "DMA-FQ"; 776 709 break; 777 710 } 778 711 } 779 712 mutex_unlock(&group->mutex); 780 - strcpy(buf, type); 781 713 782 - return strlen(type); 714 + return sysfs_emit(buf, "%s\n", type); 783 715 } 784 716 785 717 static IOMMU_GROUP_ATTR(name, S_IRUGO, iommu_group_show_name, NULL); ··· 809 743 kfree(group); 810 744 } 811 745 812 - static struct kobj_type iommu_group_ktype = { 746 + static const struct kobj_type iommu_group_ktype = { 813 747 .sysfs_ops = &iommu_group_sysfs_ops, 814 748 .release = iommu_group_release, 815 749 }; ··· 885 819 return group; 886 820 } 887 821 EXPORT_SYMBOL_GPL(iommu_group_alloc); 888 - 889 - struct iommu_group *iommu_group_get_by_id(int id) 890 - { 891 - struct kobject *group_kobj; 892 - struct iommu_group *group; 893 - const char *name; 894 - 895 - if (!iommu_group_kset) 896 - return NULL; 897 - 898 - name = kasprintf(GFP_KERNEL, "%d", id); 899 - if (!name) 900 - return NULL; 901 - 902 - group_kobj = kset_find_obj(iommu_group_kset, name); 903 - kfree(name); 904 - 905 - if (!group_kobj) 906 - return NULL; 907 - 908 - group = container_of(group_kobj, struct iommu_group, kobj); 909 - BUG_ON(group->id != id); 910 - 911 - kobject_get(group->devices_kobj); 912 - kobject_put(&group->kobj); 913 - 914 - return group; 915 - } 916 - EXPORT_SYMBOL_GPL(iommu_group_get_by_id); 917 822 918 823 /** 919 824 * iommu_group_get_iommudata - retrieve iommu_data registered for a group ··· 1109 1072 void iommu_group_remove_device(struct device *dev) 1110 1073 { 1111 1074 struct iommu_group *group = dev->iommu_group; 1112 - struct group_device *tmp_device, *device = NULL; 1075 + struct group_device *device; 1113 1076 1114 1077 if (!group) 1115 1078 return; ··· 1117 1080 dev_info(dev, "Removing from iommu group %d\n", group->id); 1118 1081 1119 1082 mutex_lock(&group->mutex); 1120 - list_for_each_entry(tmp_device, &group->devices, list) { 1121 - if (tmp_device->dev == dev) { 1122 - device = tmp_device; 1123 - list_del(&device->list); 1124 - break; 1125 - } 1126 - } 1083 + device = __iommu_group_remove_device(group, dev); 1127 1084 mutex_unlock(&group->mutex); 1128 1085 1129 - if (!device) 1130 - return; 1131 - 1132 - sysfs_remove_link(group->devices_kobj, device->name); 1133 - sysfs_remove_link(&dev->kobj, "iommu_group"); 1134 - 1135 - trace_remove_device_from_group(group->id, dev); 1136 - 1137 - kfree(device->name); 1138 - kfree(device); 1139 - dev->iommu_group = NULL; 1140 - kobject_put(group->devices_kobj); 1086 + if (device) 1087 + __iommu_group_release_device(group, device); 1141 1088 } 1142 1089 EXPORT_SYMBOL_GPL(iommu_group_remove_device); 1143 1090 ··· 1989 1968 return NULL; 1990 1969 1991 1970 domain->type = type; 1992 - /* Assume all sizes by default; the driver may override this later */ 1993 - domain->pgsize_bitmap = bus->iommu_ops->pgsize_bitmap; 1971 + /* 1972 + * If not already set, assume all sizes by default; the driver 1973 + * may override this later 1974 + */ 1975 + if (!domain->pgsize_bitmap) 1976 + domain->pgsize_bitmap = bus->iommu_ops->pgsize_bitmap; 1977 + 1994 1978 if (!domain->ops) 1995 1979 domain->ops = bus->iommu_ops->default_domain_ops; 1996 1980 ··· 2847 2821 EXPORT_SYMBOL_GPL(iommu_dev_disable_feature); 2848 2822 2849 2823 /* 2850 - * Changes the default domain of an iommu group that has *only* one device 2824 + * Changes the default domain of an iommu group 2851 2825 * 2852 2826 * @group: The group for which the default domain should be changed 2853 - * @prev_dev: The device in the group (this is used to make sure that the device 2854 - * hasn't changed after the caller has called this function) 2827 + * @dev: The first device in the group 2855 2828 * @type: The type of the new default domain that gets associated with the group 2856 2829 * 2857 2830 * Returns 0 on success and error code on failure ··· 2861 2836 * Please take a closer look if intended to use for other purposes. 2862 2837 */ 2863 2838 static int iommu_change_dev_def_domain(struct iommu_group *group, 2864 - struct device *prev_dev, int type) 2839 + struct device *dev, int type) 2865 2840 { 2841 + struct __group_domain_type gtype = {NULL, 0}; 2866 2842 struct iommu_domain *prev_dom; 2867 - struct group_device *grp_dev; 2868 - int ret, dev_def_dom; 2869 - struct device *dev; 2843 + int ret; 2870 2844 2871 - mutex_lock(&group->mutex); 2872 - 2873 - if (group->default_domain != group->domain) { 2874 - dev_err_ratelimited(prev_dev, "Group not assigned to default domain\n"); 2875 - ret = -EBUSY; 2876 - goto out; 2877 - } 2878 - 2879 - /* 2880 - * iommu group wasn't locked while acquiring device lock in 2881 - * iommu_group_store_type(). So, make sure that the device count hasn't 2882 - * changed while acquiring device lock. 2883 - * 2884 - * Changing default domain of an iommu group with two or more devices 2885 - * isn't supported because there could be a potential deadlock. Consider 2886 - * the following scenario. T1 is trying to acquire device locks of all 2887 - * the devices in the group and before it could acquire all of them, 2888 - * there could be another thread T2 (from different sub-system and use 2889 - * case) that has already acquired some of the device locks and might be 2890 - * waiting for T1 to release other device locks. 2891 - */ 2892 - if (iommu_group_device_count(group) != 1) { 2893 - dev_err_ratelimited(prev_dev, "Cannot change default domain: Group has more than one device\n"); 2894 - ret = -EINVAL; 2895 - goto out; 2896 - } 2897 - 2898 - /* Since group has only one device */ 2899 - grp_dev = list_first_entry(&group->devices, struct group_device, list); 2900 - dev = grp_dev->dev; 2901 - 2902 - if (prev_dev != dev) { 2903 - dev_err_ratelimited(prev_dev, "Cannot change default domain: Device has been changed\n"); 2904 - ret = -EBUSY; 2905 - goto out; 2906 - } 2845 + lockdep_assert_held(&group->mutex); 2907 2846 2908 2847 prev_dom = group->default_domain; 2909 - if (!prev_dom) { 2910 - ret = -EINVAL; 2911 - goto out; 2912 - } 2913 - 2914 - dev_def_dom = iommu_get_def_domain_type(dev); 2848 + __iommu_group_for_each_dev(group, &gtype, 2849 + probe_get_default_domain_type); 2915 2850 if (!type) { 2916 2851 /* 2917 2852 * If the user hasn't requested any specific type of domain and 2918 2853 * if the device supports both the domains, then default to the 2919 2854 * domain the device was booted with 2920 2855 */ 2921 - type = dev_def_dom ? : iommu_def_domain_type; 2922 - } else if (dev_def_dom && type != dev_def_dom) { 2923 - dev_err_ratelimited(prev_dev, "Device cannot be in %s domain\n", 2856 + type = gtype.type ? : iommu_def_domain_type; 2857 + } else if (gtype.type && type != gtype.type) { 2858 + dev_err_ratelimited(dev, "Device cannot be in %s domain\n", 2924 2859 iommu_domain_type_str(type)); 2925 - ret = -EINVAL; 2926 - goto out; 2860 + return -EINVAL; 2927 2861 } 2928 2862 2929 2863 /* 2930 2864 * Switch to a new domain only if the requested domain type is different 2931 2865 * from the existing default domain type 2932 2866 */ 2933 - if (prev_dom->type == type) { 2934 - ret = 0; 2935 - goto out; 2936 - } 2867 + if (prev_dom->type == type) 2868 + return 0; 2937 2869 2938 - /* We can bring up a flush queue without tearing down the domain */ 2939 - if (type == IOMMU_DOMAIN_DMA_FQ && prev_dom->type == IOMMU_DOMAIN_DMA) { 2940 - ret = iommu_dma_init_fq(prev_dom); 2941 - if (!ret) 2942 - prev_dom->type = IOMMU_DOMAIN_DMA_FQ; 2943 - goto out; 2944 - } 2870 + group->default_domain = NULL; 2871 + group->domain = NULL; 2945 2872 2946 2873 /* Sets group->default_domain to the newly allocated domain */ 2947 2874 ret = iommu_group_alloc_default_domain(dev->bus, group, type); 2948 2875 if (ret) 2949 - goto out; 2876 + goto restore_old_domain; 2950 2877 2951 - ret = iommu_create_device_direct_mappings(group, dev); 2878 + ret = iommu_group_create_direct_mappings(group); 2952 2879 if (ret) 2953 2880 goto free_new_domain; 2954 2881 2955 - ret = __iommu_attach_device(group->default_domain, dev); 2882 + ret = __iommu_attach_group(group->default_domain, group); 2956 2883 if (ret) 2957 2884 goto free_new_domain; 2958 2885 2959 - group->domain = group->default_domain; 2960 - 2961 - /* 2962 - * Release the mutex here because ops->probe_finalize() call-back of 2963 - * some vendor IOMMU drivers calls arm_iommu_attach_device() which 2964 - * in-turn might call back into IOMMU core code, where it tries to take 2965 - * group->mutex, resulting in a deadlock. 2966 - */ 2967 - mutex_unlock(&group->mutex); 2968 - 2969 - /* Make sure dma_ops is appropriatley set */ 2970 - iommu_group_do_probe_finalize(dev, group->default_domain); 2971 2886 iommu_domain_free(prev_dom); 2887 + 2972 2888 return 0; 2973 2889 2974 2890 free_new_domain: 2975 2891 iommu_domain_free(group->default_domain); 2892 + restore_old_domain: 2976 2893 group->default_domain = prev_dom; 2977 2894 group->domain = prev_dom; 2978 - 2979 - out: 2980 - mutex_unlock(&group->mutex); 2981 2895 2982 2896 return ret; 2983 2897 } ··· 2927 2963 * transition. Return failure if this isn't met. 2928 2964 * 2929 2965 * We need to consider the race between this and the device release path. 2930 - * device_lock(dev) is used here to guarantee that the device release path 2966 + * group->mutex is used here to guarantee that the device release path 2931 2967 * will not be entered at the same time. 2932 2968 */ 2933 2969 static ssize_t iommu_group_store_type(struct iommu_group *group, ··· 2954 2990 else 2955 2991 return -EINVAL; 2956 2992 2957 - /* 2958 - * Lock/Unlock the group mutex here before device lock to 2959 - * 1. Make sure that the iommu group has only one device (this is a 2960 - * prerequisite for step 2) 2961 - * 2. Get struct *dev which is needed to lock device 2962 - */ 2963 2993 mutex_lock(&group->mutex); 2964 - if (iommu_group_device_count(group) != 1) { 2994 + /* We can bring up a flush queue without tearing down the domain. */ 2995 + if (req_type == IOMMU_DOMAIN_DMA_FQ && 2996 + group->default_domain->type == IOMMU_DOMAIN_DMA) { 2997 + ret = iommu_dma_init_fq(group->default_domain); 2998 + if (!ret) 2999 + group->default_domain->type = IOMMU_DOMAIN_DMA_FQ; 2965 3000 mutex_unlock(&group->mutex); 2966 - pr_err_ratelimited("Cannot change default domain: Group has more than one device\n"); 2967 - return -EINVAL; 3001 + 3002 + return ret ?: count; 2968 3003 } 2969 3004 2970 - /* Since group has only one device */ 3005 + /* Otherwise, ensure that device exists and no driver is bound. */ 3006 + if (list_empty(&group->devices) || group->owner_cnt) { 3007 + mutex_unlock(&group->mutex); 3008 + return -EPERM; 3009 + } 3010 + 2971 3011 grp_dev = list_first_entry(&group->devices, struct group_device, list); 2972 3012 dev = grp_dev->dev; 2973 - get_device(dev); 3013 + 3014 + ret = iommu_change_dev_def_domain(group, dev, req_type); 2974 3015 2975 3016 /* 2976 - * Don't hold the group mutex because taking group mutex first and then 2977 - * the device lock could potentially cause a deadlock as below. Assume 2978 - * two threads T1 and T2. T1 is trying to change default domain of an 2979 - * iommu group and T2 is trying to hot unplug a device or release [1] VF 2980 - * of a PCIe device which is in the same iommu group. T1 takes group 2981 - * mutex and before it could take device lock assume T2 has taken device 2982 - * lock and is yet to take group mutex. Now, both the threads will be 2983 - * waiting for the other thread to release lock. Below, lock order was 2984 - * suggested. 2985 - * device_lock(dev); 2986 - * mutex_lock(&group->mutex); 2987 - * iommu_change_dev_def_domain(); 2988 - * mutex_unlock(&group->mutex); 2989 - * device_unlock(dev); 2990 - * 2991 - * [1] Typical device release path 2992 - * device_lock() from device/driver core code 2993 - * -> bus_notifier() 2994 - * -> iommu_bus_notifier() 2995 - * -> iommu_release_device() 2996 - * -> ops->release_device() vendor driver calls back iommu core code 2997 - * -> mutex_lock() from iommu core code 3017 + * Release the mutex here because ops->probe_finalize() call-back of 3018 + * some vendor IOMMU drivers calls arm_iommu_attach_device() which 3019 + * in-turn might call back into IOMMU core code, where it tries to take 3020 + * group->mutex, resulting in a deadlock. 2998 3021 */ 2999 3022 mutex_unlock(&group->mutex); 3000 3023 3001 - /* Check if the device in the group still has a driver bound to it */ 3002 - device_lock(dev); 3003 - if (device_is_bound(dev) && !(req_type == IOMMU_DOMAIN_DMA_FQ && 3004 - group->default_domain->type == IOMMU_DOMAIN_DMA)) { 3005 - pr_err_ratelimited("Device is still bound to driver\n"); 3006 - ret = -EBUSY; 3007 - goto out; 3008 - } 3024 + /* Make sure dma_ops is appropriatley set */ 3025 + if (!ret) 3026 + __iommu_group_dma_finalize(group); 3009 3027 3010 - ret = iommu_change_dev_def_domain(group, dev, req_type); 3011 - ret = ret ?: count; 3012 - 3013 - out: 3014 - device_unlock(dev); 3015 - put_device(dev); 3016 - 3017 - return ret; 3028 + return ret ?: count; 3018 3029 } 3019 3030 3020 3031 static bool iommu_is_default_domain(struct iommu_group *group)
+15 -8
drivers/iommu/ipmmu-vmsa.c
··· 30 30 #define arm_iommu_create_mapping(...) NULL 31 31 #define arm_iommu_attach_device(...) -ENODEV 32 32 #define arm_iommu_release_mapping(...) do {} while (0) 33 - #define arm_iommu_detach_device(...) do {} while (0) 34 33 #endif 35 34 36 35 #define IPMMU_CTX_MAX 16U ··· 696 697 697 698 static const struct soc_device_attribute soc_denylist[] = { 698 699 { .soc_id = "r8a774a1", }, 699 - { .soc_id = "r8a7795", .revision = "ES1.*" }, 700 700 { .soc_id = "r8a7795", .revision = "ES2.*" }, 701 701 { .soc_id = "r8a7796", }, 702 702 { /* sentinel */ } ··· 818 820 819 821 static void ipmmu_release_device(struct device *dev) 820 822 { 821 - arm_iommu_detach_device(dev); 823 + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 824 + struct ipmmu_vmsa_device *mmu = to_ipmmu(dev); 825 + unsigned int i; 826 + 827 + for (i = 0; i < fwspec->num_ids; ++i) { 828 + unsigned int utlb = fwspec->ids[i]; 829 + 830 + ipmmu_imuctr_write(mmu, utlb, 0); 831 + mmu->utlb_ctx[utlb] = IPMMU_CTX_INVALID; 832 + } 833 + 834 + arm_iommu_release_mapping(mmu->mapping); 822 835 } 823 836 824 837 static struct iommu_group *ipmmu_find_group(struct device *dev) ··· 1023 1014 * the lack of has_cache_leaf_nodes flag or renesas,ipmmu-main property. 1024 1015 */ 1025 1016 if (!mmu->features->has_cache_leaf_nodes || 1026 - !of_find_property(pdev->dev.of_node, "renesas,ipmmu-main", NULL)) 1017 + !of_property_present(pdev->dev.of_node, "renesas,ipmmu-main")) 1027 1018 mmu->root = mmu; 1028 1019 else 1029 1020 mmu->root = ipmmu_find_root(); ··· 1082 1073 return 0; 1083 1074 } 1084 1075 1085 - static int ipmmu_remove(struct platform_device *pdev) 1076 + static void ipmmu_remove(struct platform_device *pdev) 1086 1077 { 1087 1078 struct ipmmu_vmsa_device *mmu = platform_get_drvdata(pdev); 1088 1079 ··· 1092 1083 arm_iommu_release_mapping(mmu->mapping); 1093 1084 1094 1085 ipmmu_device_reset(mmu); 1095 - 1096 - return 0; 1097 1086 } 1098 1087 1099 1088 #ifdef CONFIG_PM_SLEEP ··· 1138 1131 .pm = DEV_PM_OPS, 1139 1132 }, 1140 1133 .probe = ipmmu_probe, 1141 - .remove = ipmmu_remove, 1134 + .remove_new = ipmmu_remove, 1142 1135 }; 1143 1136 builtin_platform_driver(ipmmu_driver);
+2 -3
drivers/iommu/msm_iommu.c
··· 811 811 {} 812 812 }; 813 813 814 - static int msm_iommu_remove(struct platform_device *pdev) 814 + static void msm_iommu_remove(struct platform_device *pdev) 815 815 { 816 816 struct msm_iommu_dev *iommu = platform_get_drvdata(pdev); 817 817 818 818 clk_unprepare(iommu->clk); 819 819 clk_unprepare(iommu->pclk); 820 - return 0; 821 820 } 822 821 823 822 static struct platform_driver msm_iommu_driver = { ··· 825 826 .of_match_table = msm_iommu_dt_match, 826 827 }, 827 828 .probe = msm_iommu_probe, 828 - .remove = msm_iommu_remove, 829 + .remove_new = msm_iommu_remove, 829 830 }; 830 831 builtin_platform_driver(msm_iommu_driver);
+125 -33
drivers/iommu/mtk_iommu.c
··· 8 8 #include <linux/clk.h> 9 9 #include <linux/component.h> 10 10 #include <linux/device.h> 11 - #include <linux/dma-direct.h> 12 11 #include <linux/err.h> 13 12 #include <linux/interrupt.h> 14 13 #include <linux/io.h> ··· 196 197 197 198 char *pericfg_comp_str; 198 199 struct list_head *hw_list; 199 - unsigned int iova_region_nr; 200 - const struct mtk_iommu_iova_region *iova_region; 201 200 202 - u8 banks_num; 203 - bool banks_enable[MTK_IOMMU_BANK_MAX]; 204 - unsigned int banks_portmsk[MTK_IOMMU_BANK_MAX]; 201 + /* 202 + * The IOMMU HW may support 16GB iova. In order to balance the IOVA ranges, 203 + * different masters will be put in different iova ranges, for example vcodec 204 + * is in 4G-8G and cam is in 8G-12G. Meanwhile, some masters may have the 205 + * special IOVA range requirement, like CCU can only support the address 206 + * 0x40000000-0x44000000. 207 + * Here list the iova ranges this SoC supports and which larbs/ports are in 208 + * which region. 209 + * 210 + * 16GB iova all use one pgtable, but each a region is a iommu group. 211 + */ 212 + struct { 213 + unsigned int iova_region_nr; 214 + const struct mtk_iommu_iova_region *iova_region; 215 + /* 216 + * Indicate the correspondance between larbs, ports and regions. 217 + * 218 + * The index is the same as iova_region and larb port numbers are 219 + * described as bit positions. 220 + * For example, storing BIT(0) at index 2,1 means "larb 1, port0 is in region 2". 221 + * [2] = { [1] = BIT(0) } 222 + */ 223 + const u32 (*iova_region_larb_msk)[MTK_LARB_NR_MAX]; 224 + }; 225 + 226 + /* 227 + * The IOMMU HW may have 5 banks. Each bank has a independent pgtable. 228 + * Here list how many banks this SoC supports/enables and which ports are in which bank. 229 + */ 230 + struct { 231 + u8 banks_num; 232 + bool banks_enable[MTK_IOMMU_BANK_MAX]; 233 + unsigned int banks_portmsk[MTK_IOMMU_BANK_MAX]; 234 + }; 235 + 205 236 unsigned char larbid_remap[MTK_LARB_COM_MAX][MTK_LARB_SUBCOM_MAX]; 206 237 }; 207 238 ··· 332 303 333 304 #define for_each_m4u(data, head) list_for_each_entry(data, head, list) 334 305 306 + #define MTK_IOMMU_IOVA_SZ_4G (SZ_4G - SZ_8M) /* 8M as gap */ 307 + 335 308 static const struct mtk_iommu_iova_region single_domain[] = { 336 - {.iova_base = 0, .size = SZ_4G}, 309 + {.iova_base = 0, .size = MTK_IOMMU_IOVA_SZ_4G}, 337 310 }; 338 311 339 - static const struct mtk_iommu_iova_region mt8192_multi_dom[] = { 340 - { .iova_base = 0x0, .size = SZ_4G}, /* 0 ~ 4G */ 312 + #define MT8192_MULTI_REGION_NR_MAX 6 313 + 314 + #define MT8192_MULTI_REGION_NR (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) ? \ 315 + MT8192_MULTI_REGION_NR_MAX : 1) 316 + 317 + static const struct mtk_iommu_iova_region mt8192_multi_dom[MT8192_MULTI_REGION_NR] = { 318 + { .iova_base = 0x0, .size = MTK_IOMMU_IOVA_SZ_4G}, /* 0 ~ 4G, */ 341 319 #if IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) 342 - { .iova_base = SZ_4G, .size = SZ_4G}, /* 4G ~ 8G */ 343 - { .iova_base = SZ_4G * 2, .size = SZ_4G}, /* 8G ~ 12G */ 344 - { .iova_base = SZ_4G * 3, .size = SZ_4G}, /* 12G ~ 16G */ 320 + { .iova_base = SZ_4G, .size = MTK_IOMMU_IOVA_SZ_4G}, /* 4G ~ 8G */ 321 + { .iova_base = SZ_4G * 2, .size = MTK_IOMMU_IOVA_SZ_4G}, /* 8G ~ 12G */ 322 + { .iova_base = SZ_4G * 3, .size = MTK_IOMMU_IOVA_SZ_4G}, /* 12G ~ 16G */ 345 323 346 324 { .iova_base = 0x240000000ULL, .size = 0x4000000}, /* CCU0 */ 347 325 { .iova_base = 0x244000000ULL, .size = 0x4000000}, /* CCU1 */ ··· 544 508 static int mtk_iommu_get_iova_region_id(struct device *dev, 545 509 const struct mtk_iommu_plat_data *plat_data) 546 510 { 547 - const struct mtk_iommu_iova_region *rgn = plat_data->iova_region; 548 - const struct bus_dma_region *dma_rgn = dev->dma_range_map; 549 - int i, candidate = -1; 550 - dma_addr_t dma_end; 511 + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 512 + unsigned int portidmsk = 0, larbid; 513 + const u32 *rgn_larb_msk; 514 + int i; 551 515 552 - if (!dma_rgn || plat_data->iova_region_nr == 1) 516 + if (plat_data->iova_region_nr == 1) 553 517 return 0; 554 518 555 - dma_end = dma_rgn->dma_start + dma_rgn->size - 1; 556 - for (i = 0; i < plat_data->iova_region_nr; i++, rgn++) { 557 - /* Best fit. */ 558 - if (dma_rgn->dma_start == rgn->iova_base && 559 - dma_end == rgn->iova_base + rgn->size - 1) 519 + larbid = MTK_M4U_TO_LARB(fwspec->ids[0]); 520 + for (i = 0; i < fwspec->num_ids; i++) 521 + portidmsk |= BIT(MTK_M4U_TO_PORT(fwspec->ids[i])); 522 + 523 + for (i = 0; i < plat_data->iova_region_nr; i++) { 524 + rgn_larb_msk = plat_data->iova_region_larb_msk[i]; 525 + if (!rgn_larb_msk) 526 + continue; 527 + 528 + if ((rgn_larb_msk[larbid] & portidmsk) == portidmsk) 560 529 return i; 561 - /* ok if it is inside this region. */ 562 - if (dma_rgn->dma_start >= rgn->iova_base && 563 - dma_end < rgn->iova_base + rgn->size) 564 - candidate = i; 565 530 } 566 531 567 - if (candidate >= 0) 568 - return candidate; 569 - dev_err(dev, "Can NOT find the iommu domain id(%pad 0x%llx).\n", 570 - &dma_rgn->dma_start, dma_rgn->size); 532 + dev_err(dev, "Can NOT find the region for larb(%d-%x).\n", 533 + larbid, portidmsk); 571 534 return -EINVAL; 572 535 } 573 536 ··· 737 702 pm_runtime_put(m4udev); 738 703 } 739 704 mutex_unlock(&data->mutex); 705 + 706 + if (region_id > 0) { 707 + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(34)); 708 + if (ret) { 709 + dev_err(m4udev, "Failed to set dma_mask for %s(%d).\n", dev_name(dev), ret); 710 + return ret; 711 + } 712 + } 740 713 741 714 return mtk_iommu_config(data, dev, true, region_id); 742 715 ··· 1301 1258 return PTR_ERR(data->bclk); 1302 1259 } 1303 1260 1261 + if (MTK_IOMMU_HAS_FLAG(data->plat_data, PGTABLE_PA_35_EN)) { 1262 + ret = dma_set_mask(dev, DMA_BIT_MASK(35)); 1263 + if (ret) { 1264 + dev_err(dev, "Failed to set dma_mask 35.\n"); 1265 + return ret; 1266 + } 1267 + } 1268 + 1304 1269 pm_runtime_enable(dev); 1305 1270 1306 1271 if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) { ··· 1367 1316 return ret; 1368 1317 } 1369 1318 1370 - static int mtk_iommu_remove(struct platform_device *pdev) 1319 + static void mtk_iommu_remove(struct platform_device *pdev) 1371 1320 { 1372 1321 struct mtk_iommu_data *data = platform_get_drvdata(pdev); 1373 1322 struct mtk_iommu_bank_data *bank; ··· 1389 1338 continue; 1390 1339 devm_free_irq(&pdev->dev, bank->irq, bank); 1391 1340 } 1392 - return 0; 1393 1341 } 1394 1342 1395 1343 static int __maybe_unused mtk_iommu_runtime_suspend(struct device *dev) ··· 1542 1492 .larbid_remap = {{0}, {4}, {5}, {6}, {7}, {2}, {3}, {1}}, 1543 1493 }; 1544 1494 1495 + static const unsigned int mt8186_larb_region_msk[MT8192_MULTI_REGION_NR_MAX][MTK_LARB_NR_MAX] = { 1496 + [0] = {~0, ~0, ~0}, /* Region0: all ports for larb0/1/2 */ 1497 + [1] = {0, 0, 0, 0, ~0, 0, 0, ~0}, /* Region1: larb4/7 */ 1498 + [2] = {0, 0, 0, 0, 0, 0, 0, 0, /* Region2: larb8/9/11/13/16/17/19/20 */ 1499 + ~0, ~0, 0, ~0, 0, ~(u32)(BIT(9) | BIT(10)), 0, 0, 1500 + /* larb13: the other ports except port9/10 */ 1501 + ~0, ~0, 0, ~0, ~0}, 1502 + [3] = {0}, 1503 + [4] = {[13] = BIT(9) | BIT(10)}, /* larb13 port9/10 */ 1504 + [5] = {[14] = ~0}, /* larb14 */ 1505 + }; 1506 + 1545 1507 static const struct mtk_iommu_plat_data mt8186_data_mm = { 1546 1508 .m4u_plat = M4U_MT8186, 1547 1509 .flags = HAS_BCLK | HAS_SUB_COMM_2BITS | OUT_ORDER_WR_EN | ··· 1566 1504 .banks_enable = {true}, 1567 1505 .iova_region = mt8192_multi_dom, 1568 1506 .iova_region_nr = ARRAY_SIZE(mt8192_multi_dom), 1507 + .iova_region_larb_msk = mt8186_larb_region_msk, 1508 + }; 1509 + 1510 + static const unsigned int mt8192_larb_region_msk[MT8192_MULTI_REGION_NR_MAX][MTK_LARB_NR_MAX] = { 1511 + [0] = {~0, ~0}, /* Region0: larb0/1 */ 1512 + [1] = {0, 0, 0, 0, ~0, ~0, 0, ~0}, /* Region1: larb4/5/7 */ 1513 + [2] = {0, 0, ~0, 0, 0, 0, 0, 0, /* Region2: larb2/9/11/13/14/16/17/18/19/20 */ 1514 + 0, ~0, 0, ~0, 0, ~(u32)(BIT(9) | BIT(10)), ~(u32)(BIT(4) | BIT(5)), 0, 1515 + ~0, ~0, ~0, ~0, ~0}, 1516 + [3] = {0}, 1517 + [4] = {[13] = BIT(9) | BIT(10)}, /* larb13 port9/10 */ 1518 + [5] = {[14] = BIT(4) | BIT(5)}, /* larb14 port4/5 */ 1569 1519 }; 1570 1520 1571 1521 static const struct mtk_iommu_plat_data mt8192_data = { ··· 1589 1515 .banks_enable = {true}, 1590 1516 .iova_region = mt8192_multi_dom, 1591 1517 .iova_region_nr = ARRAY_SIZE(mt8192_multi_dom), 1518 + .iova_region_larb_msk = mt8192_larb_region_msk, 1592 1519 .larbid_remap = {{0}, {1}, {4, 5}, {7}, {2}, {9, 11, 19, 20}, 1593 1520 {0, 14, 16}, {0, 13, 18, 17}}, 1594 1521 }; ··· 1609 1534 .iova_region_nr = ARRAY_SIZE(single_domain), 1610 1535 }; 1611 1536 1537 + static const unsigned int mt8195_larb_region_msk[MT8192_MULTI_REGION_NR_MAX][MTK_LARB_NR_MAX] = { 1538 + [0] = {~0, ~0, ~0, ~0}, /* Region0: all ports for larb0/1/2/3 */ 1539 + [1] = {0, 0, 0, 0, 0, 0, 0, 0, 1540 + 0, 0, 0, 0, 0, 0, 0, 0, 1541 + 0, 0, 0, ~0, ~0, ~0, ~0, ~0, /* Region1: larb19/20/21/22/23/24 */ 1542 + ~0}, 1543 + [2] = {0, 0, 0, 0, ~0, ~0, ~0, ~0, /* Region2: the other larbs. */ 1544 + ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 1545 + ~0, ~0, 0, 0, 0, 0, 0, 0, 1546 + 0, ~0, ~0, ~0, ~0}, 1547 + [3] = {0}, 1548 + [4] = {[18] = BIT(0) | BIT(1)}, /* Only larb18 port0/1 */ 1549 + [5] = {[18] = BIT(2) | BIT(3)}, /* Only larb18 port2/3 */ 1550 + }; 1551 + 1612 1552 static const struct mtk_iommu_plat_data mt8195_data_vdo = { 1613 1553 .m4u_plat = M4U_MT8195, 1614 1554 .flags = HAS_BCLK | HAS_SUB_COMM_2BITS | OUT_ORDER_WR_EN | ··· 1634 1544 .banks_enable = {true}, 1635 1545 .iova_region = mt8192_multi_dom, 1636 1546 .iova_region_nr = ARRAY_SIZE(mt8192_multi_dom), 1547 + .iova_region_larb_msk = mt8195_larb_region_msk, 1637 1548 .larbid_remap = {{2, 0}, {21}, {24}, {7}, {19}, {9, 10, 11}, 1638 1549 {13, 17, 15/* 17b */, 25}, {5}}, 1639 1550 }; ··· 1649 1558 .banks_enable = {true}, 1650 1559 .iova_region = mt8192_multi_dom, 1651 1560 .iova_region_nr = ARRAY_SIZE(mt8192_multi_dom), 1561 + .iova_region_larb_msk = mt8195_larb_region_msk, 1652 1562 .larbid_remap = {{1}, {3}, 1653 1563 {22, MTK_INVALID_LARBID, MTK_INVALID_LARBID, MTK_INVALID_LARBID, 23}, 1654 1564 {8}, {20}, {12}, ··· 1687 1595 1688 1596 static struct platform_driver mtk_iommu_driver = { 1689 1597 .probe = mtk_iommu_probe, 1690 - .remove = mtk_iommu_remove, 1598 + .remove_new = mtk_iommu_remove, 1691 1599 .driver = { 1692 1600 .name = "mtk-iommu", 1693 1601 .of_match_table = mtk_iommu_of_ids,
+2 -3
drivers/iommu/mtk_iommu_v1.c
··· 703 703 return ret; 704 704 } 705 705 706 - static int mtk_iommu_v1_remove(struct platform_device *pdev) 706 + static void mtk_iommu_v1_remove(struct platform_device *pdev) 707 707 { 708 708 struct mtk_iommu_v1_data *data = platform_get_drvdata(pdev); 709 709 ··· 713 713 clk_disable_unprepare(data->bclk); 714 714 devm_free_irq(&pdev->dev, data->irq, data); 715 715 component_master_del(&pdev->dev, &mtk_iommu_v1_com_ops); 716 - return 0; 717 716 } 718 717 719 718 static int __maybe_unused mtk_iommu_v1_suspend(struct device *dev) ··· 751 752 752 753 static struct platform_driver mtk_iommu_v1_driver = { 753 754 .probe = mtk_iommu_v1_probe, 754 - .remove = mtk_iommu_v1_remove, 755 + .remove_new = mtk_iommu_v1_remove, 755 756 .driver = { 756 757 .name = "mtk-iommu-v1", 757 758 .of_match_table = mtk_iommu_v1_of_ids,
+3 -4
drivers/iommu/omap-iommu.c
··· 1191 1191 return err; 1192 1192 if (obj->nr_tlb_entries != 32 && obj->nr_tlb_entries != 8) 1193 1193 return -EINVAL; 1194 - if (of_find_property(of, "ti,iommu-bus-err-back", NULL)) 1194 + if (of_property_read_bool(of, "ti,iommu-bus-err-back")) 1195 1195 obj->has_bus_err_back = MMU_GP_REG_BUS_ERR_BACK_EN; 1196 1196 1197 1197 obj->dev = &pdev->dev; ··· 1257 1257 return err; 1258 1258 } 1259 1259 1260 - static int omap_iommu_remove(struct platform_device *pdev) 1260 + static void omap_iommu_remove(struct platform_device *pdev) 1261 1261 { 1262 1262 struct omap_iommu *obj = platform_get_drvdata(pdev); 1263 1263 ··· 1274 1274 pm_runtime_disable(obj->dev); 1275 1275 1276 1276 dev_info(&pdev->dev, "%s removed\n", obj->name); 1277 - return 0; 1278 1277 } 1279 1278 1280 1279 static const struct dev_pm_ops omap_iommu_pm_ops = { ··· 1294 1295 1295 1296 static struct platform_driver omap_iommu_driver = { 1296 1297 .probe = omap_iommu_probe, 1297 - .remove = omap_iommu_remove, 1298 + .remove_new = omap_iommu_remove, 1298 1299 .driver = { 1299 1300 .name = "omap-iommu", 1300 1301 .pm = &omap_iommu_pm_ops,
+47 -14
drivers/iommu/rockchip-iommu.c
··· 124 124 125 125 static struct device *dma_dev; 126 126 static const struct rk_iommu_ops *rk_ops; 127 + static struct iommu_domain rk_identity_domain; 127 128 128 129 static inline void rk_table_flush(struct rk_iommu_domain *dom, dma_addr_t dma, 129 130 unsigned int count) ··· 647 646 * Ignore the return code, though, since we always zap cache 648 647 * and clear the page fault anyway. 649 648 */ 650 - if (iommu->domain) 649 + if (iommu->domain != &rk_identity_domain) 651 650 report_iommu_fault(iommu->domain, iommu->dev, iova, 652 651 flags); 653 652 else ··· 981 980 return ret; 982 981 } 983 982 984 - static void rk_iommu_detach_device(struct iommu_domain *domain, 985 - struct device *dev) 983 + static int rk_iommu_identity_attach(struct iommu_domain *identity_domain, 984 + struct device *dev) 986 985 { 987 986 struct rk_iommu *iommu; 988 - struct rk_iommu_domain *rk_domain = to_rk_domain(domain); 987 + struct rk_iommu_domain *rk_domain; 989 988 unsigned long flags; 990 989 int ret; 991 990 992 991 /* Allow 'virtual devices' (eg drm) to detach from domain */ 993 992 iommu = rk_iommu_from_dev(dev); 994 993 if (!iommu) 995 - return; 994 + return -ENODEV; 995 + 996 + rk_domain = to_rk_domain(iommu->domain); 996 997 997 998 dev_dbg(dev, "Detaching from iommu domain\n"); 998 999 999 - /* iommu already detached */ 1000 - if (iommu->domain != domain) 1001 - return; 1000 + if (iommu->domain == identity_domain) 1001 + return 0; 1002 1002 1003 - iommu->domain = NULL; 1003 + iommu->domain = identity_domain; 1004 1004 1005 1005 spin_lock_irqsave(&rk_domain->iommus_lock, flags); 1006 1006 list_del_init(&iommu->node); ··· 1013 1011 rk_iommu_disable(iommu); 1014 1012 pm_runtime_put(iommu->dev); 1015 1013 } 1014 + 1015 + return 0; 1016 1016 } 1017 + 1018 + static void rk_iommu_identity_free(struct iommu_domain *domain) 1019 + { 1020 + } 1021 + 1022 + static struct iommu_domain_ops rk_identity_ops = { 1023 + .attach_dev = rk_iommu_identity_attach, 1024 + .free = rk_iommu_identity_free, 1025 + }; 1026 + 1027 + static struct iommu_domain rk_identity_domain = { 1028 + .type = IOMMU_DOMAIN_IDENTITY, 1029 + .ops = &rk_identity_ops, 1030 + }; 1031 + 1032 + #ifdef CONFIG_ARM 1033 + static void rk_iommu_set_platform_dma(struct device *dev) 1034 + { 1035 + WARN_ON(rk_iommu_identity_attach(&rk_identity_domain, dev)); 1036 + } 1037 + #endif 1017 1038 1018 1039 static int rk_iommu_attach_device(struct iommu_domain *domain, 1019 1040 struct device *dev) ··· 1060 1035 if (iommu->domain == domain) 1061 1036 return 0; 1062 1037 1063 - if (iommu->domain) 1064 - rk_iommu_detach_device(iommu->domain, dev); 1038 + ret = rk_iommu_identity_attach(&rk_identity_domain, dev); 1039 + if (ret) 1040 + return ret; 1065 1041 1066 1042 iommu->domain = domain; 1067 1043 ··· 1076 1050 1077 1051 ret = rk_iommu_enable(iommu); 1078 1052 if (ret) 1079 - rk_iommu_detach_device(iommu->domain, dev); 1053 + WARN_ON(rk_iommu_identity_attach(&rk_identity_domain, dev)); 1080 1054 1081 1055 pm_runtime_put(iommu->dev); 1082 1056 ··· 1086 1060 static struct iommu_domain *rk_iommu_domain_alloc(unsigned type) 1087 1061 { 1088 1062 struct rk_iommu_domain *rk_domain; 1063 + 1064 + if (type == IOMMU_DOMAIN_IDENTITY) 1065 + return &rk_identity_domain; 1089 1066 1090 1067 if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA) 1091 1068 return NULL; ··· 1205 1176 iommu_dev = of_find_device_by_node(args->np); 1206 1177 1207 1178 data->iommu = platform_get_drvdata(iommu_dev); 1179 + data->iommu->domain = &rk_identity_domain; 1208 1180 dev_iommu_priv_set(dev, data); 1209 1181 1210 1182 platform_device_put(iommu_dev); ··· 1218 1188 .probe_device = rk_iommu_probe_device, 1219 1189 .release_device = rk_iommu_release_device, 1220 1190 .device_group = rk_iommu_device_group, 1191 + #ifdef CONFIG_ARM 1192 + .set_platform_dma_ops = rk_iommu_set_platform_dma, 1193 + #endif 1221 1194 .pgsize_bitmap = RK_IOMMU_PGSIZE_BITMAP, 1222 1195 .of_xlate = rk_iommu_of_xlate, 1223 1196 .default_domain_ops = &(const struct iommu_domain_ops) { ··· 1376 1343 { 1377 1344 struct rk_iommu *iommu = dev_get_drvdata(dev); 1378 1345 1379 - if (!iommu->domain) 1346 + if (iommu->domain == &rk_identity_domain) 1380 1347 return 0; 1381 1348 1382 1349 rk_iommu_disable(iommu); ··· 1387 1354 { 1388 1355 struct rk_iommu *iommu = dev_get_drvdata(dev); 1389 1356 1390 - if (!iommu->domain) 1357 + if (iommu->domain == &rk_identity_domain) 1391 1358 return 0; 1392 1359 1393 1360 return rk_iommu_enable(iommu);
+43 -17
drivers/iommu/sprd-iommu.c
··· 62 62 * @eb: gate clock which controls IOMMU access 63 63 */ 64 64 struct sprd_iommu_device { 65 + struct sprd_iommu_domain *dom; 65 66 enum sprd_iommu_version ver; 66 67 u32 *prot_page_va; 67 68 dma_addr_t prot_page_pa; ··· 152 151 return &dom->domain; 153 152 } 154 153 155 - static void sprd_iommu_domain_free(struct iommu_domain *domain) 156 - { 157 - struct sprd_iommu_domain *dom = to_sprd_domain(domain); 158 - 159 - kfree(dom); 160 - } 161 - 162 154 static void sprd_iommu_first_vpn(struct sprd_iommu_domain *dom) 163 155 { 164 156 struct sprd_iommu_device *sdev = dom->sdev; ··· 224 230 sprd_iommu_update_bits(sdev, reg_cfg, mask, 0, val); 225 231 } 226 232 233 + static void sprd_iommu_cleanup(struct sprd_iommu_domain *dom) 234 + { 235 + size_t pgt_size; 236 + 237 + /* Nothing need to do if the domain hasn't been attached */ 238 + if (!dom->sdev) 239 + return; 240 + 241 + pgt_size = sprd_iommu_pgt_size(&dom->domain); 242 + dma_free_coherent(dom->sdev->dev, pgt_size, dom->pgt_va, dom->pgt_pa); 243 + dom->sdev = NULL; 244 + sprd_iommu_hw_en(dom->sdev, false); 245 + } 246 + 247 + static void sprd_iommu_domain_free(struct iommu_domain *domain) 248 + { 249 + struct sprd_iommu_domain *dom = to_sprd_domain(domain); 250 + 251 + sprd_iommu_cleanup(dom); 252 + kfree(dom); 253 + } 254 + 227 255 static int sprd_iommu_attach_device(struct iommu_domain *domain, 228 256 struct device *dev) 229 257 { ··· 253 237 struct sprd_iommu_domain *dom = to_sprd_domain(domain); 254 238 size_t pgt_size = sprd_iommu_pgt_size(domain); 255 239 256 - if (dom->sdev) 257 - return -EINVAL; 240 + /* The device is attached to this domain */ 241 + if (sdev->dom == dom) 242 + return 0; 258 243 259 - dom->pgt_va = dma_alloc_coherent(sdev->dev, pgt_size, &dom->pgt_pa, GFP_KERNEL); 260 - if (!dom->pgt_va) 261 - return -ENOMEM; 244 + /* The first time that domain is attaching to a device */ 245 + if (!dom->pgt_va) { 246 + dom->pgt_va = dma_alloc_coherent(sdev->dev, pgt_size, &dom->pgt_pa, GFP_KERNEL); 247 + if (!dom->pgt_va) 248 + return -ENOMEM; 262 249 263 - dom->sdev = sdev; 250 + dom->sdev = sdev; 251 + } 264 252 253 + sdev->dom = dom; 254 + 255 + /* 256 + * One sprd IOMMU serves one client device only, disabled it before 257 + * configure mapping table to avoid access conflict in case other 258 + * mapping table is stored in. 259 + */ 260 + sprd_iommu_hw_en(sdev, false); 265 261 sprd_iommu_first_ppn(dom); 266 262 sprd_iommu_first_vpn(dom); 267 263 sprd_iommu_vpn_range(dom); ··· 535 507 return ret; 536 508 } 537 509 538 - static int sprd_iommu_remove(struct platform_device *pdev) 510 + static void sprd_iommu_remove(struct platform_device *pdev) 539 511 { 540 512 struct sprd_iommu_device *sdev = platform_get_drvdata(pdev); 541 513 ··· 547 519 platform_set_drvdata(pdev, NULL); 548 520 iommu_device_sysfs_remove(&sdev->iommu); 549 521 iommu_device_unregister(&sdev->iommu); 550 - 551 - return 0; 552 522 } 553 523 554 524 static struct platform_driver sprd_iommu_driver = { ··· 556 530 .suppress_bind_attrs = true, 557 531 }, 558 532 .probe = sprd_iommu_probe, 559 - .remove = sprd_iommu_remove, 533 + .remove_new = sprd_iommu_remove, 560 534 }; 561 535 module_platform_driver(sprd_iommu_driver); 562 536
-3
drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c
··· 1782 1782 jpeg->vdev->device_caps = V4L2_CAP_STREAMING | 1783 1783 V4L2_CAP_VIDEO_M2M_MPLANE; 1784 1784 1785 - if (of_property_present(pdev->dev.of_node, "dma-ranges")) 1786 - dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(34)); 1787 - 1788 1785 ret = video_register_device(jpeg->vdev, VFL_TYPE_VIDEO, -1); 1789 1786 if (ret) { 1790 1787 v4l2_err(&jpeg->v4l2_dev, "Failed to register video device\n");
-8
drivers/media/platform/mediatek/vcodec/mtk_vcodec_dec_drv.c
··· 321 321 } 322 322 } 323 323 324 - if (of_property_present(pdev->dev.of_node, "dma-ranges")) { 325 - ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(34)); 326 - if (ret) { 327 - mtk_v4l2_err("Failed to set mask"); 328 - goto err_core_workq; 329 - } 330 - } 331 - 332 324 for (i = 0; i < MTK_VDEC_HW_MAX; i++) 333 325 mutex_init(&dev->dec_mutex[i]); 334 326 mutex_init(&dev->dev_mutex);
-3
drivers/media/platform/mediatek/vcodec/mtk_vcodec_enc_drv.c
··· 352 352 goto err_event_workq; 353 353 } 354 354 355 - if (of_property_present(pdev->dev.of_node, "dma-ranges")) 356 - dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(34)); 357 - 358 355 ret = video_register_device(vfd_enc, VFL_TYPE_VIDEO, -1); 359 356 if (ret) { 360 357 mtk_v4l2_err("Failed to register video device");
-74
include/linux/ioasid.h
··· 1 - /* SPDX-License-Identifier: GPL-2.0 */ 2 - #ifndef __LINUX_IOASID_H 3 - #define __LINUX_IOASID_H 4 - 5 - #include <linux/types.h> 6 - #include <linux/errno.h> 7 - 8 - #define INVALID_IOASID ((ioasid_t)-1) 9 - typedef unsigned int ioasid_t; 10 - typedef ioasid_t (*ioasid_alloc_fn_t)(ioasid_t min, ioasid_t max, void *data); 11 - typedef void (*ioasid_free_fn_t)(ioasid_t ioasid, void *data); 12 - 13 - struct ioasid_set { 14 - int dummy; 15 - }; 16 - 17 - /** 18 - * struct ioasid_allocator_ops - IOASID allocator helper functions and data 19 - * 20 - * @alloc: helper function to allocate IOASID 21 - * @free: helper function to free IOASID 22 - * @list: for tracking ops that share helper functions but not data 23 - * @pdata: data belong to the allocator, provided when calling alloc() 24 - */ 25 - struct ioasid_allocator_ops { 26 - ioasid_alloc_fn_t alloc; 27 - ioasid_free_fn_t free; 28 - struct list_head list; 29 - void *pdata; 30 - }; 31 - 32 - #define DECLARE_IOASID_SET(name) struct ioasid_set name = { 0 } 33 - 34 - #if IS_ENABLED(CONFIG_IOASID) 35 - ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max, 36 - void *private); 37 - void ioasid_free(ioasid_t ioasid); 38 - void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, 39 - bool (*getter)(void *)); 40 - int ioasid_register_allocator(struct ioasid_allocator_ops *allocator); 41 - void ioasid_unregister_allocator(struct ioasid_allocator_ops *allocator); 42 - int ioasid_set_data(ioasid_t ioasid, void *data); 43 - 44 - #else /* !CONFIG_IOASID */ 45 - static inline ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, 46 - ioasid_t max, void *private) 47 - { 48 - return INVALID_IOASID; 49 - } 50 - 51 - static inline void ioasid_free(ioasid_t ioasid) { } 52 - 53 - static inline void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, 54 - bool (*getter)(void *)) 55 - { 56 - return NULL; 57 - } 58 - 59 - static inline int ioasid_register_allocator(struct ioasid_allocator_ops *allocator) 60 - { 61 - return -ENOTSUPP; 62 - } 63 - 64 - static inline void ioasid_unregister_allocator(struct ioasid_allocator_ops *allocator) 65 - { 66 - } 67 - 68 - static inline int ioasid_set_data(ioasid_t ioasid, void *data) 69 - { 70 - return -ENOTSUPP; 71 - } 72 - 73 - #endif /* CONFIG_IOASID */ 74 - #endif /* __LINUX_IOASID_H */
+13 -8
include/linux/iommu.h
··· 13 13 #include <linux/errno.h> 14 14 #include <linux/err.h> 15 15 #include <linux/of.h> 16 - #include <linux/ioasid.h> 17 16 #include <uapi/linux/iommu.h> 18 17 19 18 #define IOMMU_READ (1 << 0) ··· 191 192 }; 192 193 193 194 #define IOMMU_PASID_INVALID (-1U) 195 + typedef unsigned int ioasid_t; 194 196 195 197 #ifdef CONFIG_IOMMU_API 196 198 ··· 460 460 extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap); 461 461 extern bool iommu_group_has_isolated_msi(struct iommu_group *group); 462 462 extern struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus); 463 - extern struct iommu_group *iommu_group_get_by_id(int id); 464 463 extern void iommu_domain_free(struct iommu_domain *domain); 465 464 extern int iommu_attach_device(struct iommu_domain *domain, 466 465 struct device *dev); ··· 698 699 } 699 700 700 701 int iommu_probe_device(struct device *dev); 701 - void iommu_release_device(struct device *dev); 702 702 703 703 int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f); 704 704 int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f); ··· 741 743 } 742 744 743 745 static inline struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus) 744 - { 745 - return NULL; 746 - } 747 - 748 - static inline struct iommu_group *iommu_group_get_by_id(int id) 749 746 { 750 747 return NULL; 751 748 } ··· 1166 1173 } 1167 1174 1168 1175 #ifdef CONFIG_IOMMU_SVA 1176 + static inline void mm_pasid_init(struct mm_struct *mm) 1177 + { 1178 + mm->pasid = IOMMU_PASID_INVALID; 1179 + } 1180 + static inline bool mm_valid_pasid(struct mm_struct *mm) 1181 + { 1182 + return mm->pasid != IOMMU_PASID_INVALID; 1183 + } 1184 + void mm_pasid_drop(struct mm_struct *mm); 1169 1185 struct iommu_sva *iommu_sva_bind_device(struct device *dev, 1170 1186 struct mm_struct *mm); 1171 1187 void iommu_sva_unbind_device(struct iommu_sva *handle); ··· 1194 1192 { 1195 1193 return IOMMU_PASID_INVALID; 1196 1194 } 1195 + static inline void mm_pasid_init(struct mm_struct *mm) {} 1196 + static inline bool mm_valid_pasid(struct mm_struct *mm) { return false; } 1197 + static inline void mm_pasid_drop(struct mm_struct *mm) {} 1197 1198 #endif /* CONFIG_IOMMU_SVA */ 1198 1199 1199 1200 #endif /* __LINUX_IOMMU_H */
-32
include/linux/sched/mm.h
··· 8 8 #include <linux/mm_types.h> 9 9 #include <linux/gfp.h> 10 10 #include <linux/sync_core.h> 11 - #include <linux/ioasid.h> 12 11 13 12 /* 14 13 * Routines for handling mm_structs ··· 481 482 static inline void membarrier_update_current_mm(struct mm_struct *next_mm) 482 483 { 483 484 } 484 - #endif 485 - 486 - #ifdef CONFIG_IOMMU_SVA 487 - static inline void mm_pasid_init(struct mm_struct *mm) 488 - { 489 - mm->pasid = INVALID_IOASID; 490 - } 491 - 492 - static inline bool mm_valid_pasid(struct mm_struct *mm) 493 - { 494 - return mm->pasid != INVALID_IOASID; 495 - } 496 - 497 - /* Associate a PASID with an mm_struct: */ 498 - static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) 499 - { 500 - mm->pasid = pasid; 501 - } 502 - 503 - static inline void mm_pasid_drop(struct mm_struct *mm) 504 - { 505 - if (mm_valid_pasid(mm)) { 506 - ioasid_free(mm->pasid); 507 - mm->pasid = INVALID_IOASID; 508 - } 509 - } 510 - #else 511 - static inline void mm_pasid_init(struct mm_struct *mm) {} 512 - static inline bool mm_valid_pasid(struct mm_struct *mm) { return false; } 513 - static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) {} 514 - static inline void mm_pasid_drop(struct mm_struct *mm) {} 515 485 #endif 516 486 517 487 #endif /* _LINUX_SCHED_MM_H */
+1
kernel/fork.c
··· 98 98 #include <linux/bpf.h> 99 99 #include <linux/stackprotector.h> 100 100 #include <linux/user_events.h> 101 + #include <linux/iommu.h> 101 102 102 103 #include <asm/pgalloc.h> 103 104 #include <linux/uaccess.h>
+2 -2
mm/init-mm.c
··· 10 10 11 11 #include <linux/atomic.h> 12 12 #include <linux/user_namespace.h> 13 - #include <linux/ioasid.h> 13 + #include <linux/iommu.h> 14 14 #include <asm/mmu.h> 15 15 16 16 #ifndef INIT_MM_CONTEXT ··· 43 43 .user_ns = &init_user_ns, 44 44 .cpu_bitmap = CPU_BITS_NONE, 45 45 #ifdef CONFIG_IOMMU_SVA 46 - .pasid = INVALID_IOASID, 46 + .pasid = IOMMU_PASID_INVALID, 47 47 #endif 48 48 INIT_MM_CONTEXT(init_mm) 49 49 };