Merge branches 'x86/vt-d', 'arm/omap', 'arm/smmu', 's390', 'core' and 'x86/amd' into next

+19

Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt

··· 36 36 NOTE: this only applies to the SMMU itself, not 37 37 masters connected upstream of the SMMU. 38 38 39 + - msi-parent : See the generic MSI binding described in 40 + devicetree/bindings/interrupt-controller/msi.txt 41 + for a description of the msi-parent property. 42 + 39 43 - hisilicon,broken-prefetch-cmd 40 44 : Avoid sending CMD_PREFETCH_* commands to the SMMU. 45 + 46 + ** Example 47 + 48 + smmu@2b400000 { 49 + compatible = "arm,smmu-v3"; 50 + reg = <0x0 0x2b400000 0x0 0x20000>; 51 + interrupts = <GIC_SPI 74 IRQ_TYPE_EDGE_RISING>, 52 + <GIC_SPI 75 IRQ_TYPE_EDGE_RISING>, 53 + <GIC_SPI 77 IRQ_TYPE_EDGE_RISING>, 54 + <GIC_SPI 79 IRQ_TYPE_EDGE_RISING>; 55 + interrupt-names = "eventq", "priq", "cmdq-sync", "gerror"; 56 + dma-coherent; 57 + #iommu-cells = <0>; 58 + msi-parent = <&its 0xff0000>; 59 + };

+27

Documentation/devicetree/bindings/iommu/ti,omap-iommu.txt

··· 4 4 - compatible : Should be one of, 5 5 "ti,omap2-iommu" for OMAP2/OMAP3 IOMMU instances 6 6 "ti,omap4-iommu" for OMAP4/OMAP5 IOMMU instances 7 + "ti,dra7-dsp-iommu" for DRA7xx DSP IOMMU instances 7 8 "ti,dra7-iommu" for DRA7xx IOMMU instances 8 9 - ti,hwmods : Name of the hwmod associated with the IOMMU instance 9 10 - reg : Address space for the configuration registers ··· 20 19 Should be either 8 or 32 (default: 32) 21 20 - ti,iommu-bus-err-back : Indicates the IOMMU instance supports throwing 22 21 back a bus error response on MMU faults. 22 + - ti,syscon-mmuconfig : Should be a pair of the phandle to the DSP_SYSTEM 23 + syscon node that contains the additional control 24 + register for enabling the MMU, and the MMU instance 25 + number (0-indexed) within the sub-system. This property 26 + is required for DSP IOMMU instances on DRA7xx SoCs. The 27 + instance number should be 0 for DSP MDMA MMUs and 1 for 28 + DSP EDMA MMUs. 23 29 24 30 Example: 25 31 /* OMAP3 ISP MMU */ ··· 37 29 interrupts = <24>; 38 30 ti,hwmods = "mmu_isp"; 39 31 ti,#tlb-entries = <8>; 32 + }; 33 + 34 + /* DRA74x DSP2 MMUs */ 35 + mmu0_dsp2: mmu@41501000 { 36 + compatible = "ti,dra7-dsp-iommu"; 37 + reg = <0x41501000 0x100>; 38 + interrupts = <GIC_SPI 146 IRQ_TYPE_LEVEL_HIGH>; 39 + ti,hwmods = "mmu0_dsp2"; 40 + #iommu-cells = <0>; 41 + ti,syscon-mmuconfig = <&dsp2_system 0x0>; 42 + }; 43 + 44 + mmu1_dsp2: mmu@41502000 { 45 + compatible = "ti,dra7-dsp-iommu"; 46 + reg = <0x41502000 0x100>; 47 + interrupts = <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>; 48 + ti,hwmods = "mmu1_dsp2"; 49 + #iommu-cells = <0>; 50 + ti,syscon-mmuconfig = <&dsp2_system 0x1>; 40 51 };

+7

MAINTAINERS

··· 8966 8966 F: include/net/iucv/ 8967 8967 F: net/iucv/ 8968 8968 8969 + S390 IOMMU (PCI) 8970 + M: Gerald Schaefer <gerald.schaefer@de.ibm.com> 8971 + L: linux-s390@vger.kernel.org 8972 + W: http://www.ibm.com/developerworks/linux/linux390/ 8973 + S: Supported 8974 + F: drivers/iommu/s390-iommu.c 8975 + 8969 8976 S3C24XX SD/MMC Driver 8970 8977 M: Ben Dooks <ben-linux@fluff.org> 8971 8978 L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)

+1

arch/arm64/Kconfig

··· 75 75 select HAVE_PERF_USER_STACK_DUMP 76 76 select HAVE_RCU_TABLE_FREE 77 77 select HAVE_SYSCALL_TRACEPOINTS 78 + select IOMMU_DMA if IOMMU_SUPPORT 78 79 select IRQ_DOMAIN 79 80 select IRQ_FORCED_THREADING 80 81 select MODULES_USE_ELF_RELA

+7 -8

arch/arm64/include/asm/dma-mapping.h

··· 54 54 return __generic_dma_ops(dev); 55 55 } 56 56 57 - static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, 58 - struct iommu_ops *iommu, bool coherent) 59 - { 60 - if (!acpi_disabled && !dev->archdata.dma_ops) 61 - dev->archdata.dma_ops = dma_ops; 62 - 63 - dev->archdata.dma_coherent = coherent; 64 - } 57 + void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, 58 + struct iommu_ops *iommu, bool coherent); 65 59 #define arch_setup_dma_ops arch_setup_dma_ops 60 + 61 + #ifdef CONFIG_IOMMU_DMA 62 + void arch_teardown_dma_ops(struct device *dev); 63 + #define arch_teardown_dma_ops arch_teardown_dma_ops 64 + #endif 66 65 67 66 /* do not use this function in a driver */ 68 67 static inline bool is_device_dma_coherent(struct device *dev)

+457

arch/arm64/mm/dma-mapping.c

··· 533 533 return 0; 534 534 } 535 535 fs_initcall(dma_debug_do_init); 536 + 537 + 538 + #ifdef CONFIG_IOMMU_DMA 539 + #include <linux/dma-iommu.h> 540 + #include <linux/platform_device.h> 541 + #include <linux/amba/bus.h> 542 + 543 + /* Thankfully, all cache ops are by VA so we can ignore phys here */ 544 + static void flush_page(struct device *dev, const void *virt, phys_addr_t phys) 545 + { 546 + __dma_flush_range(virt, virt + PAGE_SIZE); 547 + } 548 + 549 + static void *__iommu_alloc_attrs(struct device *dev, size_t size, 550 + dma_addr_t *handle, gfp_t gfp, 551 + struct dma_attrs *attrs) 552 + { 553 + bool coherent = is_device_dma_coherent(dev); 554 + int ioprot = dma_direction_to_prot(DMA_BIDIRECTIONAL, coherent); 555 + void *addr; 556 + 557 + if (WARN(!dev, "cannot create IOMMU mapping for unknown device\n")) 558 + return NULL; 559 + /* 560 + * Some drivers rely on this, and we probably don't want the 561 + * possibility of stale kernel data being read by devices anyway. 562 + */ 563 + gfp |= __GFP_ZERO; 564 + 565 + if (gfp & __GFP_WAIT) { 566 + struct page **pages; 567 + pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent); 568 + 569 + pages = iommu_dma_alloc(dev, size, gfp, ioprot, handle, 570 + flush_page); 571 + if (!pages) 572 + return NULL; 573 + 574 + addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot, 575 + __builtin_return_address(0)); 576 + if (!addr) 577 + iommu_dma_free(dev, pages, size, handle); 578 + } else { 579 + struct page *page; 580 + /* 581 + * In atomic context we can't remap anything, so we'll only 582 + * get the virtually contiguous buffer we need by way of a 583 + * physically contiguous allocation. 584 + */ 585 + if (coherent) { 586 + page = alloc_pages(gfp, get_order(size)); 587 + addr = page ? page_address(page) : NULL; 588 + } else { 589 + addr = __alloc_from_pool(size, &page, gfp); 590 + } 591 + if (!addr) 592 + return NULL; 593 + 594 + *handle = iommu_dma_map_page(dev, page, 0, size, ioprot); 595 + if (iommu_dma_mapping_error(dev, *handle)) { 596 + if (coherent) 597 + __free_pages(page, get_order(size)); 598 + else 599 + __free_from_pool(addr, size); 600 + addr = NULL; 601 + } 602 + } 603 + return addr; 604 + } 605 + 606 + static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, 607 + dma_addr_t handle, struct dma_attrs *attrs) 608 + { 609 + /* 610 + * @cpu_addr will be one of 3 things depending on how it was allocated: 611 + * - A remapped array of pages from iommu_dma_alloc(), for all 612 + * non-atomic allocations. 613 + * - A non-cacheable alias from the atomic pool, for atomic 614 + * allocations by non-coherent devices. 615 + * - A normal lowmem address, for atomic allocations by 616 + * coherent devices. 617 + * Hence how dodgy the below logic looks... 618 + */ 619 + if (__in_atomic_pool(cpu_addr, size)) { 620 + iommu_dma_unmap_page(dev, handle, size, 0, NULL); 621 + __free_from_pool(cpu_addr, size); 622 + } else if (is_vmalloc_addr(cpu_addr)){ 623 + struct vm_struct *area = find_vm_area(cpu_addr); 624 + 625 + if (WARN_ON(!area || !area->pages)) 626 + return; 627 + iommu_dma_free(dev, area->pages, size, &handle); 628 + dma_common_free_remap(cpu_addr, size, VM_USERMAP); 629 + } else { 630 + iommu_dma_unmap_page(dev, handle, size, 0, NULL); 631 + __free_pages(virt_to_page(cpu_addr), get_order(size)); 632 + } 633 + } 634 + 635 + static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, 636 + void *cpu_addr, dma_addr_t dma_addr, size_t size, 637 + struct dma_attrs *attrs) 638 + { 639 + struct vm_struct *area; 640 + int ret; 641 + 642 + vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, 643 + is_device_dma_coherent(dev)); 644 + 645 + if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) 646 + return ret; 647 + 648 + area = find_vm_area(cpu_addr); 649 + if (WARN_ON(!area || !area->pages)) 650 + return -ENXIO; 651 + 652 + return iommu_dma_mmap(area->pages, size, vma); 653 + } 654 + 655 + static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt, 656 + void *cpu_addr, dma_addr_t dma_addr, 657 + size_t size, struct dma_attrs *attrs) 658 + { 659 + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; 660 + struct vm_struct *area = find_vm_area(cpu_addr); 661 + 662 + if (WARN_ON(!area || !area->pages)) 663 + return -ENXIO; 664 + 665 + return sg_alloc_table_from_pages(sgt, area->pages, count, 0, size, 666 + GFP_KERNEL); 667 + } 668 + 669 + static void __iommu_sync_single_for_cpu(struct device *dev, 670 + dma_addr_t dev_addr, size_t size, 671 + enum dma_data_direction dir) 672 + { 673 + phys_addr_t phys; 674 + 675 + if (is_device_dma_coherent(dev)) 676 + return; 677 + 678 + phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr); 679 + __dma_unmap_area(phys_to_virt(phys), size, dir); 680 + } 681 + 682 + static void __iommu_sync_single_for_device(struct device *dev, 683 + dma_addr_t dev_addr, size_t size, 684 + enum dma_data_direction dir) 685 + { 686 + phys_addr_t phys; 687 + 688 + if (is_device_dma_coherent(dev)) 689 + return; 690 + 691 + phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr); 692 + __dma_map_area(phys_to_virt(phys), size, dir); 693 + } 694 + 695 + static dma_addr_t __iommu_map_page(struct device *dev, struct page *page, 696 + unsigned long offset, size_t size, 697 + enum dma_data_direction dir, 698 + struct dma_attrs *attrs) 699 + { 700 + bool coherent = is_device_dma_coherent(dev); 701 + int prot = dma_direction_to_prot(dir, coherent); 702 + dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot); 703 + 704 + if (!iommu_dma_mapping_error(dev, dev_addr) && 705 + !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) 706 + __iommu_sync_single_for_device(dev, dev_addr, size, dir); 707 + 708 + return dev_addr; 709 + } 710 + 711 + static void __iommu_unmap_page(struct device *dev, dma_addr_t dev_addr, 712 + size_t size, enum dma_data_direction dir, 713 + struct dma_attrs *attrs) 714 + { 715 + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) 716 + __iommu_sync_single_for_cpu(dev, dev_addr, size, dir); 717 + 718 + iommu_dma_unmap_page(dev, dev_addr, size, dir, attrs); 719 + } 720 + 721 + static void __iommu_sync_sg_for_cpu(struct device *dev, 722 + struct scatterlist *sgl, int nelems, 723 + enum dma_data_direction dir) 724 + { 725 + struct scatterlist *sg; 726 + int i; 727 + 728 + if (is_device_dma_coherent(dev)) 729 + return; 730 + 731 + for_each_sg(sgl, sg, nelems, i) 732 + __dma_unmap_area(sg_virt(sg), sg->length, dir); 733 + } 734 + 735 + static void __iommu_sync_sg_for_device(struct device *dev, 736 + struct scatterlist *sgl, int nelems, 737 + enum dma_data_direction dir) 738 + { 739 + struct scatterlist *sg; 740 + int i; 741 + 742 + if (is_device_dma_coherent(dev)) 743 + return; 744 + 745 + for_each_sg(sgl, sg, nelems, i) 746 + __dma_map_area(sg_virt(sg), sg->length, dir); 747 + } 748 + 749 + static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl, 750 + int nelems, enum dma_data_direction dir, 751 + struct dma_attrs *attrs) 752 + { 753 + bool coherent = is_device_dma_coherent(dev); 754 + 755 + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) 756 + __iommu_sync_sg_for_device(dev, sgl, nelems, dir); 757 + 758 + return iommu_dma_map_sg(dev, sgl, nelems, 759 + dma_direction_to_prot(dir, coherent)); 760 + } 761 + 762 + static void __iommu_unmap_sg_attrs(struct device *dev, 763 + struct scatterlist *sgl, int nelems, 764 + enum dma_data_direction dir, 765 + struct dma_attrs *attrs) 766 + { 767 + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) 768 + __iommu_sync_sg_for_cpu(dev, sgl, nelems, dir); 769 + 770 + iommu_dma_unmap_sg(dev, sgl, nelems, dir, attrs); 771 + } 772 + 773 + static struct dma_map_ops iommu_dma_ops = { 774 + .alloc = __iommu_alloc_attrs, 775 + .free = __iommu_free_attrs, 776 + .mmap = __iommu_mmap_attrs, 777 + .get_sgtable = __iommu_get_sgtable, 778 + .map_page = __iommu_map_page, 779 + .unmap_page = __iommu_unmap_page, 780 + .map_sg = __iommu_map_sg_attrs, 781 + .unmap_sg = __iommu_unmap_sg_attrs, 782 + .sync_single_for_cpu = __iommu_sync_single_for_cpu, 783 + .sync_single_for_device = __iommu_sync_single_for_device, 784 + .sync_sg_for_cpu = __iommu_sync_sg_for_cpu, 785 + .sync_sg_for_device = __iommu_sync_sg_for_device, 786 + .dma_supported = iommu_dma_supported, 787 + .mapping_error = iommu_dma_mapping_error, 788 + }; 789 + 790 + /* 791 + * TODO: Right now __iommu_setup_dma_ops() gets called too early to do 792 + * everything it needs to - the device is only partially created and the 793 + * IOMMU driver hasn't seen it yet, so it can't have a group. Thus we 794 + * need this delayed attachment dance. Once IOMMU probe ordering is sorted 795 + * to move the arch_setup_dma_ops() call later, all the notifier bits below 796 + * become unnecessary, and will go away. 797 + */ 798 + struct iommu_dma_notifier_data { 799 + struct list_head list; 800 + struct device *dev; 801 + const struct iommu_ops *ops; 802 + u64 dma_base; 803 + u64 size; 804 + }; 805 + static LIST_HEAD(iommu_dma_masters); 806 + static DEFINE_MUTEX(iommu_dma_notifier_lock); 807 + 808 + /* 809 + * Temporarily "borrow" a domain feature flag to to tell if we had to resort 810 + * to creating our own domain here, in case we need to clean it up again. 811 + */ 812 + #define __IOMMU_DOMAIN_FAKE_DEFAULT (1U << 31) 813 + 814 + static bool do_iommu_attach(struct device *dev, const struct iommu_ops *ops, 815 + u64 dma_base, u64 size) 816 + { 817 + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); 818 + 819 + /* 820 + * Best case: The device is either part of a group which was 821 + * already attached to a domain in a previous call, or it's 822 + * been put in a default DMA domain by the IOMMU core. 823 + */ 824 + if (!domain) { 825 + /* 826 + * Urgh. The IOMMU core isn't going to do default domains 827 + * for non-PCI devices anyway, until it has some means of 828 + * abstracting the entirely implementation-specific 829 + * sideband data/SoC topology/unicorn dust that may or 830 + * may not differentiate upstream masters. 831 + * So until then, HORRIBLE HACKS! 832 + */ 833 + domain = ops->domain_alloc(IOMMU_DOMAIN_DMA); 834 + if (!domain) 835 + goto out_no_domain; 836 + 837 + domain->ops = ops; 838 + domain->type = IOMMU_DOMAIN_DMA | __IOMMU_DOMAIN_FAKE_DEFAULT; 839 + 840 + if (iommu_attach_device(domain, dev)) 841 + goto out_put_domain; 842 + } 843 + 844 + if (iommu_dma_init_domain(domain, dma_base, size)) 845 + goto out_detach; 846 + 847 + dev->archdata.dma_ops = &iommu_dma_ops; 848 + return true; 849 + 850 + out_detach: 851 + iommu_detach_device(domain, dev); 852 + out_put_domain: 853 + if (domain->type & __IOMMU_DOMAIN_FAKE_DEFAULT) 854 + iommu_domain_free(domain); 855 + out_no_domain: 856 + pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", 857 + dev_name(dev)); 858 + return false; 859 + } 860 + 861 + static void queue_iommu_attach(struct device *dev, const struct iommu_ops *ops, 862 + u64 dma_base, u64 size) 863 + { 864 + struct iommu_dma_notifier_data *iommudata; 865 + 866 + iommudata = kzalloc(sizeof(*iommudata), GFP_KERNEL); 867 + if (!iommudata) 868 + return; 869 + 870 + iommudata->dev = dev; 871 + iommudata->ops = ops; 872 + iommudata->dma_base = dma_base; 873 + iommudata->size = size; 874 + 875 + mutex_lock(&iommu_dma_notifier_lock); 876 + list_add(&iommudata->list, &iommu_dma_masters); 877 + mutex_unlock(&iommu_dma_notifier_lock); 878 + } 879 + 880 + static int __iommu_attach_notifier(struct notifier_block *nb, 881 + unsigned long action, void *data) 882 + { 883 + struct iommu_dma_notifier_data *master, *tmp; 884 + 885 + if (action != BUS_NOTIFY_ADD_DEVICE) 886 + return 0; 887 + 888 + mutex_lock(&iommu_dma_notifier_lock); 889 + list_for_each_entry_safe(master, tmp, &iommu_dma_masters, list) { 890 + if (do_iommu_attach(master->dev, master->ops, 891 + master->dma_base, master->size)) { 892 + list_del(&master->list); 893 + kfree(master); 894 + } 895 + } 896 + mutex_unlock(&iommu_dma_notifier_lock); 897 + return 0; 898 + } 899 + 900 + static int register_iommu_dma_ops_notifier(struct bus_type *bus) 901 + { 902 + struct notifier_block *nb = kzalloc(sizeof(*nb), GFP_KERNEL); 903 + int ret; 904 + 905 + if (!nb) 906 + return -ENOMEM; 907 + /* 908 + * The device must be attached to a domain before the driver probe 909 + * routine gets a chance to start allocating DMA buffers. However, 910 + * the IOMMU driver also needs a chance to configure the iommu_group 911 + * via its add_device callback first, so we need to make the attach 912 + * happen between those two points. Since the IOMMU core uses a bus 913 + * notifier with default priority for add_device, do the same but 914 + * with a lower priority to ensure the appropriate ordering. 915 + */ 916 + nb->notifier_call = __iommu_attach_notifier; 917 + nb->priority = -100; 918 + 919 + ret = bus_register_notifier(bus, nb); 920 + if (ret) { 921 + pr_warn("Failed to register DMA domain notifier; IOMMU DMA ops unavailable on bus '%s'\n", 922 + bus->name); 923 + kfree(nb); 924 + } 925 + return ret; 926 + } 927 + 928 + static int __init __iommu_dma_init(void) 929 + { 930 + int ret; 931 + 932 + ret = iommu_dma_init(); 933 + if (!ret) 934 + ret = register_iommu_dma_ops_notifier(&platform_bus_type); 935 + if (!ret) 936 + ret = register_iommu_dma_ops_notifier(&amba_bustype); 937 + return ret; 938 + } 939 + arch_initcall(__iommu_dma_init); 940 + 941 + static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, 942 + const struct iommu_ops *ops) 943 + { 944 + struct iommu_group *group; 945 + 946 + if (!ops) 947 + return; 948 + /* 949 + * TODO: As a concession to the future, we're ready to handle being 950 + * called both early and late (i.e. after bus_add_device). Once all 951 + * the platform bus code is reworked to call us late and the notifier 952 + * junk above goes away, move the body of do_iommu_attach here. 953 + */ 954 + group = iommu_group_get(dev); 955 + if (group) { 956 + do_iommu_attach(dev, ops, dma_base, size); 957 + iommu_group_put(group); 958 + } else { 959 + queue_iommu_attach(dev, ops, dma_base, size); 960 + } 961 + } 962 + 963 + void arch_teardown_dma_ops(struct device *dev) 964 + { 965 + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); 966 + 967 + if (domain) { 968 + iommu_detach_device(domain, dev); 969 + if (domain->type & __IOMMU_DOMAIN_FAKE_DEFAULT) 970 + iommu_domain_free(domain); 971 + } 972 + 973 + dev->archdata.dma_ops = NULL; 974 + } 975 + 976 + #else 977 + 978 + static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, 979 + struct iommu_ops *iommu) 980 + { } 981 + 982 + #endif /* CONFIG_IOMMU_DMA */ 983 + 984 + void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, 985 + struct iommu_ops *iommu, bool coherent) 986 + { 987 + if (!acpi_disabled && !dev->archdata.dma_ops) 988 + dev->archdata.dma_ops = dma_ops; 989 + 990 + dev->archdata.dma_coherent = coherent; 991 + __iommu_setup_dma_ops(dev, dma_base, size, iommu); 992 + }

+1

arch/s390/Kconfig

··· 582 582 bool "PCI support" 583 583 select HAVE_DMA_ATTRS 584 584 select PCI_MSI 585 + select IOMMU_SUPPORT 585 586 help 586 587 Enable PCI support. 587 588

+4

arch/s390/include/asm/pci.h

··· 62 62 u8 size; /* order 2 exponent */ 63 63 }; 64 64 65 + struct s390_domain; 66 + 65 67 /* Private data per function */ 66 68 struct zpci_dev { 67 69 struct pci_dev *pdev; ··· 120 118 121 119 struct dentry *debugfs_dev; 122 120 struct dentry *debugfs_perf; 121 + 122 + struct s390_domain *s390_domain; /* s390 IOMMU domain data */ 123 123 }; 124 124 125 125 static inline bool zdev_enabled(struct zpci_dev *zdev)

+4 -1

arch/s390/include/asm/pci_dma.h

··· 192 192 /* Prototypes */ 193 193 int zpci_dma_init_device(struct zpci_dev *); 194 194 void zpci_dma_exit_device(struct zpci_dev *); 195 - 195 + void dma_free_seg_table(unsigned long); 196 + unsigned long *dma_alloc_cpu_table(void); 197 + void dma_cleanup_tables(unsigned long *); 198 + void dma_update_cpu_trans(unsigned long *, void *, dma_addr_t, int); 196 199 #endif

+25 -12

arch/s390/pci/pci_dma.c

··· 24 24 zdev->iommu_pages * PAGE_SIZE); 25 25 } 26 26 27 - static unsigned long *dma_alloc_cpu_table(void) 27 + unsigned long *dma_alloc_cpu_table(void) 28 28 { 29 29 unsigned long *table, *entry; 30 30 ··· 114 114 return &pto[px]; 115 115 } 116 116 117 - static void dma_update_cpu_trans(struct zpci_dev *zdev, void *page_addr, 118 - dma_addr_t dma_addr, int flags) 117 + void dma_update_cpu_trans(unsigned long *dma_table, void *page_addr, 118 + dma_addr_t dma_addr, int flags) 119 119 { 120 120 unsigned long *entry; 121 121 122 - entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr); 122 + entry = dma_walk_cpu_trans(dma_table, dma_addr); 123 123 if (!entry) { 124 124 WARN_ON_ONCE(1); 125 125 return; ··· 156 156 goto no_refresh; 157 157 158 158 for (i = 0; i < nr_pages; i++) { 159 - dma_update_cpu_trans(zdev, page_addr, dma_addr, flags); 159 + dma_update_cpu_trans(zdev->dma_table, page_addr, dma_addr, 160 + flags); 160 161 page_addr += PAGE_SIZE; 161 162 dma_addr += PAGE_SIZE; 162 163 } ··· 182 181 return rc; 183 182 } 184 183 185 - static void dma_free_seg_table(unsigned long entry) 184 + void dma_free_seg_table(unsigned long entry) 186 185 { 187 186 unsigned long *sto = get_rt_sto(entry); 188 187 int sx; ··· 194 193 dma_free_cpu_table(sto); 195 194 } 196 195 197 - static void dma_cleanup_tables(struct zpci_dev *zdev) 196 + void dma_cleanup_tables(unsigned long *table) 198 197 { 199 - unsigned long *table; 200 198 int rtx; 201 199 202 - if (!zdev || !zdev->dma_table) 200 + if (!table) 203 201 return; 204 202 205 - table = zdev->dma_table; 206 203 for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++) 207 204 if (reg_entry_isvalid(table[rtx])) 208 205 dma_free_seg_table(table[rtx]); 209 206 210 207 dma_free_cpu_table(table); 211 - zdev->dma_table = NULL; 212 208 } 213 209 214 210 static unsigned long __dma_alloc_iommu(struct zpci_dev *zdev, ··· 414 416 { 415 417 int rc; 416 418 419 + /* 420 + * At this point, if the device is part of an IOMMU domain, this would 421 + * be a strong hint towards a bug in the IOMMU API (common) code and/or 422 + * simultaneous access via IOMMU and DMA API. So let's issue a warning. 423 + */ 424 + WARN_ON(zdev->s390_domain); 425 + 417 426 spin_lock_init(&zdev->iommu_bitmap_lock); 418 427 spin_lock_init(&zdev->dma_table_lock); 419 428 ··· 455 450 456 451 void zpci_dma_exit_device(struct zpci_dev *zdev) 457 452 { 453 + /* 454 + * At this point, if the device is part of an IOMMU domain, this would 455 + * be a strong hint towards a bug in the IOMMU API (common) code and/or 456 + * simultaneous access via IOMMU and DMA API. So let's issue a warning. 457 + */ 458 + WARN_ON(zdev->s390_domain); 459 + 458 460 zpci_unregister_ioat(zdev, 0); 459 - dma_cleanup_tables(zdev); 461 + dma_cleanup_tables(zdev->dma_table); 462 + zdev->dma_table = NULL; 460 463 vfree(zdev->iommu_bitmap); 461 464 zdev->iommu_bitmap = NULL; 462 465 zdev->next_bit = 0;

+15

drivers/iommu/Kconfig

··· 48 48 def_bool y 49 49 depends on OF && IOMMU_API 50 50 51 + # IOMMU-agnostic DMA-mapping layer 52 + config IOMMU_DMA 53 + bool 54 + depends on NEED_SG_DMA_LENGTH 55 + select IOMMU_API 56 + select IOMMU_IOVA 57 + 51 58 config FSL_PAMU 52 59 bool "Freescale IOMMU support" 53 60 depends on PPC32 ··· 368 361 depends on ARM64 && PCI 369 362 select IOMMU_API 370 363 select IOMMU_IO_PGTABLE_LPAE 364 + select GENERIC_MSI_IRQ_DOMAIN 371 365 help 372 366 Support for implementations of the ARM System MMU architecture 373 367 version 3 providing translation support to a PCIe root complex. 374 368 375 369 Say Y here if your system includes an IOMMU device implementing 376 370 the ARM SMMUv3 architecture. 371 + 372 + config S390_IOMMU 373 + def_bool y if S390 && PCI 374 + depends on S390 && PCI 375 + select IOMMU_API 376 + help 377 + Support for the IOMMU API for s390 PCI devices. 377 378 378 379 endif # IOMMU_SUPPORT

+2

drivers/iommu/Makefile

··· 1 1 obj-$(CONFIG_IOMMU_API) += iommu.o 2 2 obj-$(CONFIG_IOMMU_API) += iommu-traces.o 3 3 obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o 4 + obj-$(CONFIG_IOMMU_DMA) += dma-iommu.o 4 5 obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o 5 6 obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o 6 7 obj-$(CONFIG_IOMMU_IOVA) += iova.o ··· 24 23 obj-$(CONFIG_SHMOBILE_IOMMU) += shmobile-iommu.o 25 24 obj-$(CONFIG_SHMOBILE_IPMMU) += shmobile-ipmmu.o 26 25 obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o 26 + obj-$(CONFIG_S390_IOMMU) += s390-iommu.o

+39 -134

drivers/iommu/amd_iommu.c

··· 89 89 struct iommu_dev_data { 90 90 struct list_head list; /* For domain->dev_list */ 91 91 struct list_head dev_data_list; /* For global dev_data_list */ 92 - struct list_head alias_list; /* Link alias-groups together */ 93 - struct iommu_dev_data *alias_data;/* The alias dev_data */ 94 92 struct protection_domain *domain; /* Domain the device is bound to */ 95 93 u16 devid; /* PCI Device ID */ 96 94 bool iommu_v2; /* Device can make use of IOMMUv2 */ ··· 134 136 if (!dev_data) 135 137 return NULL; 136 138 137 - INIT_LIST_HEAD(&dev_data->alias_list); 138 - 139 139 dev_data->devid = devid; 140 140 141 141 spin_lock_irqsave(&dev_data_list_lock, flags); ··· 141 145 spin_unlock_irqrestore(&dev_data_list_lock, flags); 142 146 143 147 return dev_data; 144 - } 145 - 146 - static void free_dev_data(struct iommu_dev_data *dev_data) 147 - { 148 - unsigned long flags; 149 - 150 - spin_lock_irqsave(&dev_data_list_lock, flags); 151 - list_del(&dev_data->dev_data_list); 152 - spin_unlock_irqrestore(&dev_data_list_lock, flags); 153 - 154 - kfree(dev_data); 155 148 } 156 149 157 150 static struct iommu_dev_data *search_dev_data(u16 devid) ··· 296 311 iommu_group_put(group); 297 312 } 298 313 299 - static int __last_alias(struct pci_dev *pdev, u16 alias, void *data) 300 - { 301 - *(u16 *)data = alias; 302 - return 0; 303 - } 304 - 305 - static u16 get_alias(struct device *dev) 306 - { 307 - struct pci_dev *pdev = to_pci_dev(dev); 308 - u16 devid, ivrs_alias, pci_alias; 309 - 310 - devid = get_device_id(dev); 311 - ivrs_alias = amd_iommu_alias_table[devid]; 312 - pci_for_each_dma_alias(pdev, __last_alias, &pci_alias); 313 - 314 - if (ivrs_alias == pci_alias) 315 - return ivrs_alias; 316 - 317 - /* 318 - * DMA alias showdown 319 - * 320 - * The IVRS is fairly reliable in telling us about aliases, but it 321 - * can't know about every screwy device. If we don't have an IVRS 322 - * reported alias, use the PCI reported alias. In that case we may 323 - * still need to initialize the rlookup and dev_table entries if the 324 - * alias is to a non-existent device. 325 - */ 326 - if (ivrs_alias == devid) { 327 - if (!amd_iommu_rlookup_table[pci_alias]) { 328 - amd_iommu_rlookup_table[pci_alias] = 329 - amd_iommu_rlookup_table[devid]; 330 - memcpy(amd_iommu_dev_table[pci_alias].data, 331 - amd_iommu_dev_table[devid].data, 332 - sizeof(amd_iommu_dev_table[pci_alias].data)); 333 - } 334 - 335 - return pci_alias; 336 - } 337 - 338 - pr_info("AMD-Vi: Using IVRS reported alias %02x:%02x.%d " 339 - "for device %s[%04x:%04x], kernel reported alias " 340 - "%02x:%02x.%d\n", PCI_BUS_NUM(ivrs_alias), PCI_SLOT(ivrs_alias), 341 - PCI_FUNC(ivrs_alias), dev_name(dev), pdev->vendor, pdev->device, 342 - PCI_BUS_NUM(pci_alias), PCI_SLOT(pci_alias), 343 - PCI_FUNC(pci_alias)); 344 - 345 - /* 346 - * If we don't have a PCI DMA alias and the IVRS alias is on the same 347 - * bus, then the IVRS table may know about a quirk that we don't. 348 - */ 349 - if (pci_alias == devid && 350 - PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) { 351 - pdev->dev_flags |= PCI_DEV_FLAGS_DMA_ALIAS_DEVFN; 352 - pdev->dma_alias_devfn = ivrs_alias & 0xff; 353 - pr_info("AMD-Vi: Added PCI DMA alias %02x.%d for %s\n", 354 - PCI_SLOT(ivrs_alias), PCI_FUNC(ivrs_alias), 355 - dev_name(dev)); 356 - } 357 - 358 - return ivrs_alias; 359 - } 360 - 361 314 static int iommu_init_device(struct device *dev) 362 315 { 363 316 struct pci_dev *pdev = to_pci_dev(dev); 364 317 struct iommu_dev_data *dev_data; 365 - u16 alias; 366 318 367 319 if (dev->archdata.iommu) 368 320 return 0; ··· 307 385 dev_data = find_dev_data(get_device_id(dev)); 308 386 if (!dev_data) 309 387 return -ENOMEM; 310 - 311 - alias = get_alias(dev); 312 - 313 - if (alias != dev_data->devid) { 314 - struct iommu_dev_data *alias_data; 315 - 316 - alias_data = find_dev_data(alias); 317 - if (alias_data == NULL) { 318 - pr_err("AMD-Vi: Warning: Unhandled device %s\n", 319 - dev_name(dev)); 320 - free_dev_data(dev_data); 321 - return -ENOTSUPP; 322 - } 323 - dev_data->alias_data = alias_data; 324 - 325 - /* Add device to the alias_list */ 326 - list_add(&dev_data->alias_list, &alias_data->alias_list); 327 - } 328 388 329 389 if (pci_iommuv2_capable(pdev)) { 330 390 struct amd_iommu *iommu; ··· 348 444 dev); 349 445 350 446 iommu_group_remove_device(dev); 351 - 352 - /* Unlink from alias, it may change if another device is re-plugged */ 353 - dev_data->alias_data = NULL; 354 447 355 448 /* Remove dma-ops */ 356 449 dev->archdata.dma_ops = NULL; ··· 534 633 535 634 while (head != tail) { 536 635 iommu_print_event(iommu, iommu->evt_buf + head); 537 - head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size; 636 + head = (head + EVENT_ENTRY_SIZE) % EVT_BUFFER_SIZE; 538 637 } 539 638 540 639 writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); ··· 684 783 u8 *target; 685 784 686 785 target = iommu->cmd_buf + tail; 687 - tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; 786 + tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE; 688 787 689 788 /* Copy command to buffer */ 690 789 memcpy(target, cmd, sizeof(*cmd)); ··· 851 950 u32 left, tail, head, next_tail; 852 951 unsigned long flags; 853 952 854 - WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED); 855 - 856 953 again: 857 954 spin_lock_irqsave(&iommu->lock, flags); 858 955 859 956 head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); 860 957 tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); 861 - next_tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; 862 - left = (head - next_tail) % iommu->cmd_buf_size; 958 + next_tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE; 959 + left = (head - next_tail) % CMD_BUFFER_SIZE; 863 960 864 961 if (left <= 2) { 865 962 struct iommu_cmd sync_cmd; ··· 1013 1114 static int device_flush_dte(struct iommu_dev_data *dev_data) 1014 1115 { 1015 1116 struct amd_iommu *iommu; 1117 + u16 alias; 1016 1118 int ret; 1017 1119 1018 1120 iommu = amd_iommu_rlookup_table[dev_data->devid]; 1121 + alias = amd_iommu_alias_table[dev_data->devid]; 1019 1122 1020 1123 ret = iommu_flush_dte(iommu, dev_data->devid); 1124 + if (!ret && alias != dev_data->devid) 1125 + ret = iommu_flush_dte(iommu, alias); 1021 1126 if (ret) 1022 1127 return ret; 1023 1128 ··· 1887 1984 struct protection_domain *domain) 1888 1985 { 1889 1986 struct amd_iommu *iommu; 1987 + u16 alias; 1890 1988 bool ats; 1891 1989 1892 1990 iommu = amd_iommu_rlookup_table[dev_data->devid]; 1991 + alias = amd_iommu_alias_table[dev_data->devid]; 1893 1992 ats = dev_data->ats.enabled; 1894 1993 1895 1994 /* Update data structures */ 1896 1995 dev_data->domain = domain; 1897 1996 list_add(&dev_data->list, &domain->dev_list); 1898 - set_dte_entry(dev_data->devid, domain, ats); 1899 1997 1900 1998 /* Do reference counting */ 1901 1999 domain->dev_iommu[iommu->index] += 1; 1902 2000 domain->dev_cnt += 1; 1903 2001 1904 - /* Flush the DTE entry */ 2002 + /* Update device table */ 2003 + set_dte_entry(dev_data->devid, domain, ats); 2004 + if (alias != dev_data->devid) 2005 + set_dte_entry(dev_data->devid, domain, ats); 2006 + 1905 2007 device_flush_dte(dev_data); 1906 2008 } 1907 2009 1908 2010 static void do_detach(struct iommu_dev_data *dev_data) 1909 2011 { 1910 2012 struct amd_iommu *iommu; 2013 + u16 alias; 1911 2014 1912 2015 /* 1913 2016 * First check if the device is still attached. It might already ··· 1925 2016 return; 1926 2017 1927 2018 iommu = amd_iommu_rlookup_table[dev_data->devid]; 2019 + alias = amd_iommu_alias_table[dev_data->devid]; 1928 2020 1929 2021 /* decrease reference counters */ 1930 2022 dev_data->domain->dev_iommu[iommu->index] -= 1; ··· 1935 2025 dev_data->domain = NULL; 1936 2026 list_del(&dev_data->list); 1937 2027 clear_dte_entry(dev_data->devid); 2028 + if (alias != dev_data->devid) 2029 + clear_dte_entry(alias); 1938 2030 1939 2031 /* Flush the DTE entry */ 1940 2032 device_flush_dte(dev_data); ··· 1949 2037 static int __attach_device(struct iommu_dev_data *dev_data, 1950 2038 struct protection_domain *domain) 1951 2039 { 1952 - struct iommu_dev_data *head, *entry; 1953 2040 int ret; 2041 + 2042 + /* 2043 + * Must be called with IRQs disabled. Warn here to detect early 2044 + * when its not. 2045 + */ 2046 + WARN_ON(!irqs_disabled()); 1954 2047 1955 2048 /* lock domain */ 1956 2049 spin_lock(&domain->lock); 1957 2050 1958 - head = dev_data; 1959 - 1960 - if (head->alias_data != NULL) 1961 - head = head->alias_data; 1962 - 1963 - /* Now we have the root of the alias group, if any */ 1964 - 1965 2051 ret = -EBUSY; 1966 - if (head->domain != NULL) 2052 + if (dev_data->domain != NULL) 1967 2053 goto out_unlock; 1968 2054 1969 2055 /* Attach alias group root */ 1970 - do_attach(head, domain); 1971 - 1972 - /* Attach other devices in the alias group */ 1973 - list_for_each_entry(entry, &head->alias_list, alias_list) 1974 - do_attach(entry, domain); 2056 + do_attach(dev_data, domain); 1975 2057 1976 2058 ret = 0; 1977 2059 ··· 2115 2209 */ 2116 2210 static void __detach_device(struct iommu_dev_data *dev_data) 2117 2211 { 2118 - struct iommu_dev_data *head, *entry; 2119 2212 struct protection_domain *domain; 2120 - unsigned long flags; 2121 2213 2122 - BUG_ON(!dev_data->domain); 2214 + /* 2215 + * Must be called with IRQs disabled. Warn here to detect early 2216 + * when its not. 2217 + */ 2218 + WARN_ON(!irqs_disabled()); 2219 + 2220 + if (WARN_ON(!dev_data->domain)) 2221 + return; 2123 2222 2124 2223 domain = dev_data->domain; 2125 2224 2126 - spin_lock_irqsave(&domain->lock, flags); 2225 + spin_lock(&domain->lock); 2127 2226 2128 - head = dev_data; 2129 - if (head->alias_data != NULL) 2130 - head = head->alias_data; 2227 + do_detach(dev_data); 2131 2228 2132 - list_for_each_entry(entry, &head->alias_list, alias_list) 2133 - do_detach(entry); 2134 - 2135 - do_detach(head); 2136 - 2137 - spin_unlock_irqrestore(&domain->lock, flags); 2229 + spin_unlock(&domain->lock); 2138 2230 } 2139 2231 2140 2232 /* ··· 3102 3198 .iova_to_phys = amd_iommu_iova_to_phys, 3103 3199 .add_device = amd_iommu_add_device, 3104 3200 .remove_device = amd_iommu_remove_device, 3201 + .device_group = pci_device_group, 3105 3202 .get_dm_regions = amd_iommu_get_dm_regions, 3106 3203 .put_dm_regions = amd_iommu_put_dm_regions, 3107 3204 .pgsize_bitmap = AMD_IOMMU_PGSIZES,

+27 -93

drivers/iommu/amd_iommu_init.c

··· 408 408 } 409 409 410 410 /* 411 - * This function reads the last device id the IOMMU has to handle from the PCI 412 - * capability header for this IOMMU 413 - */ 414 - static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr) 415 - { 416 - u32 cap; 417 - 418 - cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); 419 - update_last_devid(PCI_DEVID(MMIO_GET_BUS(cap), MMIO_GET_LD(cap))); 420 - 421 - return 0; 422 - } 423 - 424 - /* 425 411 * After reading the highest device id from the IOMMU PCI capability header 426 412 * this function looks if there is a higher device id defined in the ACPI table 427 413 */ ··· 419 433 p += sizeof(*h); 420 434 end += h->length; 421 435 422 - find_last_devid_on_pci(PCI_BUS_NUM(h->devid), 423 - PCI_SLOT(h->devid), 424 - PCI_FUNC(h->devid), 425 - h->cap_ptr); 426 - 427 436 while (p < end) { 428 437 dev = (struct ivhd_entry *)p; 429 438 switch (dev->type) { 439 + case IVHD_DEV_ALL: 440 + /* Use maximum BDF value for DEV_ALL */ 441 + update_last_devid(0xffff); 442 + break; 430 443 case IVHD_DEV_SELECT: 431 444 case IVHD_DEV_RANGE_END: 432 445 case IVHD_DEV_ALIAS: ··· 498 513 * write commands to that buffer later and the IOMMU will execute them 499 514 * asynchronously 500 515 */ 501 - static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) 516 + static int __init alloc_command_buffer(struct amd_iommu *iommu) 502 517 { 503 - u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 504 - get_order(CMD_BUFFER_SIZE)); 518 + iommu->cmd_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 519 + get_order(CMD_BUFFER_SIZE)); 505 520 506 - if (cmd_buf == NULL) 507 - return NULL; 508 - 509 - iommu->cmd_buf_size = CMD_BUFFER_SIZE | CMD_BUFFER_UNINITIALIZED; 510 - 511 - return cmd_buf; 521 + return iommu->cmd_buf ? 0 : -ENOMEM; 512 522 } 513 523 514 524 /* ··· 537 557 &entry, sizeof(entry)); 538 558 539 559 amd_iommu_reset_cmd_buffer(iommu); 540 - iommu->cmd_buf_size &= ~(CMD_BUFFER_UNINITIALIZED); 541 560 } 542 561 543 562 static void __init free_command_buffer(struct amd_iommu *iommu) 544 563 { 545 - free_pages((unsigned long)iommu->cmd_buf, 546 - get_order(iommu->cmd_buf_size & ~(CMD_BUFFER_UNINITIALIZED))); 564 + free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); 547 565 } 548 566 549 567 /* allocates the memory where the IOMMU will log its events to */ 550 - static u8 * __init alloc_event_buffer(struct amd_iommu *iommu) 568 + static int __init alloc_event_buffer(struct amd_iommu *iommu) 551 569 { 552 - iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 553 - get_order(EVT_BUFFER_SIZE)); 570 + iommu->evt_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 571 + get_order(EVT_BUFFER_SIZE)); 554 572 555 - if (iommu->evt_buf == NULL) 556 - return NULL; 557 - 558 - iommu->evt_buf_size = EVT_BUFFER_SIZE; 559 - 560 - return iommu->evt_buf; 573 + return iommu->evt_buf ? 0 : -ENOMEM; 561 574 } 562 575 563 576 static void iommu_enable_event_buffer(struct amd_iommu *iommu) ··· 577 604 } 578 605 579 606 /* allocates the memory where the IOMMU will log its events to */ 580 - static u8 * __init alloc_ppr_log(struct amd_iommu *iommu) 607 + static int __init alloc_ppr_log(struct amd_iommu *iommu) 581 608 { 582 - iommu->ppr_log = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 583 - get_order(PPR_LOG_SIZE)); 609 + iommu->ppr_log = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 610 + get_order(PPR_LOG_SIZE)); 584 611 585 - if (iommu->ppr_log == NULL) 586 - return NULL; 587 - 588 - return iommu->ppr_log; 612 + return iommu->ppr_log ? 0 : -ENOMEM; 589 613 } 590 614 591 615 static void iommu_enable_ppr_log(struct amd_iommu *iommu) ··· 805 835 switch (e->type) { 806 836 case IVHD_DEV_ALL: 807 837 808 - DUMP_printk(" DEV_ALL\t\t\t first devid: %02x:%02x.%x" 809 - " last device %02x:%02x.%x flags: %02x\n", 810 - PCI_BUS_NUM(iommu->first_device), 811 - PCI_SLOT(iommu->first_device), 812 - PCI_FUNC(iommu->first_device), 813 - PCI_BUS_NUM(iommu->last_device), 814 - PCI_SLOT(iommu->last_device), 815 - PCI_FUNC(iommu->last_device), 816 - e->flags); 838 + DUMP_printk(" DEV_ALL\t\t\tflags: %02x\n", e->flags); 817 839 818 - for (dev_i = iommu->first_device; 819 - dev_i <= iommu->last_device; ++dev_i) 820 - set_dev_entry_from_acpi(iommu, dev_i, 821 - e->flags, 0); 840 + for (dev_i = 0; dev_i <= amd_iommu_last_bdf; ++dev_i) 841 + set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0); 822 842 break; 823 843 case IVHD_DEV_SELECT: 824 844 ··· 964 1004 return 0; 965 1005 } 966 1006 967 - /* Initializes the device->iommu mapping for the driver */ 968 - static int __init init_iommu_devices(struct amd_iommu *iommu) 969 - { 970 - u32 i; 971 - 972 - for (i = iommu->first_device; i <= iommu->last_device; ++i) 973 - set_iommu_for_device(iommu, i); 974 - 975 - return 0; 976 - } 977 - 978 1007 static void __init free_iommu_one(struct amd_iommu *iommu) 979 1008 { 980 1009 free_command_buffer(iommu); ··· 1060 1111 if (!iommu->mmio_base) 1061 1112 return -ENOMEM; 1062 1113 1063 - iommu->cmd_buf = alloc_command_buffer(iommu); 1064 - if (!iommu->cmd_buf) 1114 + if (alloc_command_buffer(iommu)) 1065 1115 return -ENOMEM; 1066 1116 1067 - iommu->evt_buf = alloc_event_buffer(iommu); 1068 - if (!iommu->evt_buf) 1117 + if (alloc_event_buffer(iommu)) 1069 1118 return -ENOMEM; 1070 1119 1071 1120 iommu->int_enabled = false; ··· 1081 1134 * table tells us so, but this is a lie! 1082 1135 */ 1083 1136 amd_iommu_rlookup_table[iommu->devid] = NULL; 1084 - 1085 - init_iommu_devices(iommu); 1086 1137 1087 1138 return 0; 1088 1139 } ··· 1211 1266 pci_read_config_dword(iommu->dev, cap_ptr + MMIO_MISC_OFFSET, 1212 1267 &misc); 1213 1268 1214 - iommu->first_device = PCI_DEVID(MMIO_GET_BUS(range), 1215 - MMIO_GET_FD(range)); 1216 - iommu->last_device = PCI_DEVID(MMIO_GET_BUS(range), 1217 - MMIO_GET_LD(range)); 1218 - 1219 1269 if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB))) 1220 1270 amd_iommu_iotlb_sup = false; 1221 1271 ··· 1248 1308 amd_iommu_v2_present = true; 1249 1309 } 1250 1310 1251 - if (iommu_feature(iommu, FEATURE_PPR)) { 1252 - iommu->ppr_log = alloc_ppr_log(iommu); 1253 - if (!iommu->ppr_log) 1254 - return -ENOMEM; 1255 - } 1311 + if (iommu_feature(iommu, FEATURE_PPR) && alloc_ppr_log(iommu)) 1312 + return -ENOMEM; 1256 1313 1257 1314 if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) 1258 1315 amd_iommu_np_cache = true; ··· 1695 1758 free_pages((unsigned long)irq_lookup_table, 1696 1759 get_order(rlookup_table_size)); 1697 1760 1698 - if (amd_iommu_irq_cache) { 1699 - kmem_cache_destroy(amd_iommu_irq_cache); 1700 - amd_iommu_irq_cache = NULL; 1701 - 1702 - } 1761 + kmem_cache_destroy(amd_iommu_irq_cache); 1762 + amd_iommu_irq_cache = NULL; 1703 1763 1704 1764 free_pages((unsigned long)amd_iommu_rlookup_table, 1705 1765 get_order(rlookup_table_size)); ··· 2135 2201 iommu_detected = 1; 2136 2202 x86_init.iommu.iommu_init = amd_iommu_init; 2137 2203 2138 - return 0; 2204 + return 1; 2139 2205 } 2140 2206 2141 2207 /****************************************************************************

+2 -11

drivers/iommu/amd_iommu_types.h

··· 295 295 #define IOMMU_PTE_IR (1ULL << 61) 296 296 #define IOMMU_PTE_IW (1ULL << 62) 297 297 298 + #define DTE_FLAG_IOTLB (1ULL << 32) 299 + #define DTE_FLAG_GV (1ULL << 55) 298 300 #define DTE_FLAG_MASK (0x3ffULL << 32) 299 - #define DTE_FLAG_IOTLB (0x01UL << 32) 300 - #define DTE_FLAG_GV (0x01ULL << 55) 301 301 #define DTE_GLX_SHIFT (56) 302 302 #define DTE_GLX_MASK (3) 303 303 ··· 517 517 /* pci domain of this IOMMU */ 518 518 u16 pci_seg; 519 519 520 - /* first device this IOMMU handles. read from PCI */ 521 - u16 first_device; 522 - /* last device this IOMMU handles. read from PCI */ 523 - u16 last_device; 524 - 525 520 /* start of exclusion range of that IOMMU */ 526 521 u64 exclusion_start; 527 522 /* length of exclusion range of that IOMMU */ ··· 524 529 525 530 /* command buffer virtual address */ 526 531 u8 *cmd_buf; 527 - /* size of command buffer */ 528 - u32 cmd_buf_size; 529 532 530 - /* size of event buffer */ 531 - u32 evt_buf_size; 532 533 /* event buffer virtual address */ 533 534 u8 *evt_buf; 534 535

+111 -44

drivers/iommu/arm-smmu-v3.c

··· 26 26 #include <linux/iommu.h> 27 27 #include <linux/iopoll.h> 28 28 #include <linux/module.h> 29 + #include <linux/msi.h> 29 30 #include <linux/of.h> 30 31 #include <linux/of_address.h> 32 + #include <linux/of_platform.h> 31 33 #include <linux/pci.h> 32 34 #include <linux/platform_device.h> 33 35 ··· 405 403 PRI_RESP_SUCC, 406 404 }; 407 405 406 + enum arm_smmu_msi_index { 407 + EVTQ_MSI_INDEX, 408 + GERROR_MSI_INDEX, 409 + PRIQ_MSI_INDEX, 410 + ARM_SMMU_MAX_MSIS, 411 + }; 412 + 413 + static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = { 414 + [EVTQ_MSI_INDEX] = { 415 + ARM_SMMU_EVTQ_IRQ_CFG0, 416 + ARM_SMMU_EVTQ_IRQ_CFG1, 417 + ARM_SMMU_EVTQ_IRQ_CFG2, 418 + }, 419 + [GERROR_MSI_INDEX] = { 420 + ARM_SMMU_GERROR_IRQ_CFG0, 421 + ARM_SMMU_GERROR_IRQ_CFG1, 422 + ARM_SMMU_GERROR_IRQ_CFG2, 423 + }, 424 + [PRIQ_MSI_INDEX] = { 425 + ARM_SMMU_PRIQ_IRQ_CFG0, 426 + ARM_SMMU_PRIQ_IRQ_CFG1, 427 + ARM_SMMU_PRIQ_IRQ_CFG2, 428 + }, 429 + }; 430 + 408 431 struct arm_smmu_cmdq_ent { 409 432 /* Common fields */ 410 433 u8 opcode; ··· 597 570 unsigned int sid_bits; 598 571 599 572 struct arm_smmu_strtab_cfg strtab_cfg; 600 - struct list_head list; 601 573 }; 602 574 603 575 /* SMMU private data for an IOMMU group */ ··· 630 604 631 605 struct iommu_domain domain; 632 606 }; 633 - 634 - /* Our list of SMMU instances */ 635 - static DEFINE_SPINLOCK(arm_smmu_devices_lock); 636 - static LIST_HEAD(arm_smmu_devices); 637 607 638 608 struct arm_smmu_option_prop { 639 609 u32 opt; ··· 1449 1427 struct io_pgtable_cfg *pgtbl_cfg) 1450 1428 { 1451 1429 int ret; 1452 - u16 asid; 1430 + int asid; 1453 1431 struct arm_smmu_device *smmu = smmu_domain->smmu; 1454 1432 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; 1455 1433 ··· 1461 1439 &cfg->cdptr_dma, GFP_KERNEL); 1462 1440 if (!cfg->cdptr) { 1463 1441 dev_warn(smmu->dev, "failed to allocate context descriptor\n"); 1442 + ret = -ENOMEM; 1464 1443 goto out_free_asid; 1465 1444 } 1466 1445 1467 - cfg->cd.asid = asid; 1446 + cfg->cd.asid = (u16)asid; 1468 1447 cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0]; 1469 1448 cfg->cd.tcr = pgtbl_cfg->arm_lpae_s1_cfg.tcr; 1470 1449 cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair[0]; ··· 1479 1456 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain, 1480 1457 struct io_pgtable_cfg *pgtbl_cfg) 1481 1458 { 1482 - u16 vmid; 1459 + int vmid; 1483 1460 struct arm_smmu_device *smmu = smmu_domain->smmu; 1484 1461 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg; 1485 1462 ··· 1487 1464 if (IS_ERR_VALUE(vmid)) 1488 1465 return vmid; 1489 1466 1490 - cfg->vmid = vmid; 1467 + cfg->vmid = (u16)vmid; 1491 1468 cfg->vttbr = pgtbl_cfg->arm_lpae_s2_cfg.vttbr; 1492 1469 cfg->vtcr = pgtbl_cfg->arm_lpae_s2_cfg.vtcr; 1493 1470 return 0; ··· 1749 1726 static struct arm_smmu_device *arm_smmu_get_for_pci_dev(struct pci_dev *pdev) 1750 1727 { 1751 1728 struct device_node *of_node; 1752 - struct arm_smmu_device *curr, *smmu = NULL; 1729 + struct platform_device *smmu_pdev; 1730 + struct arm_smmu_device *smmu = NULL; 1753 1731 struct pci_bus *bus = pdev->bus; 1754 1732 1755 1733 /* Walk up to the root bus */ ··· 1763 1739 return NULL; 1764 1740 1765 1741 /* See if we can find an SMMU corresponding to the phandle */ 1766 - spin_lock(&arm_smmu_devices_lock); 1767 - list_for_each_entry(curr, &arm_smmu_devices, list) { 1768 - if (curr->dev->of_node == of_node) { 1769 - smmu = curr; 1770 - break; 1771 - } 1772 - } 1773 - spin_unlock(&arm_smmu_devices_lock); 1742 + smmu_pdev = of_find_device_by_node(of_node); 1743 + if (smmu_pdev) 1744 + smmu = platform_get_drvdata(smmu_pdev); 1745 + 1774 1746 of_node_put(of_node); 1775 1747 return smmu; 1776 1748 } ··· 1922 1902 .iova_to_phys = arm_smmu_iova_to_phys, 1923 1903 .add_device = arm_smmu_add_device, 1924 1904 .remove_device = arm_smmu_remove_device, 1905 + .device_group = pci_device_group, 1925 1906 .domain_get_attr = arm_smmu_domain_get_attr, 1926 1907 .domain_set_attr = arm_smmu_domain_set_attr, 1927 1908 .pgsize_bitmap = -1UL, /* Restricted during device attach */ ··· 2207 2186 1, ARM_SMMU_POLL_TIMEOUT_US); 2208 2187 } 2209 2188 2189 + static void arm_smmu_free_msis(void *data) 2190 + { 2191 + struct device *dev = data; 2192 + platform_msi_domain_free_irqs(dev); 2193 + } 2194 + 2195 + static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg) 2196 + { 2197 + phys_addr_t doorbell; 2198 + struct device *dev = msi_desc_to_dev(desc); 2199 + struct arm_smmu_device *smmu = dev_get_drvdata(dev); 2200 + phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index]; 2201 + 2202 + doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo; 2203 + doorbell &= MSI_CFG0_ADDR_MASK << MSI_CFG0_ADDR_SHIFT; 2204 + 2205 + writeq_relaxed(doorbell, smmu->base + cfg[0]); 2206 + writel_relaxed(msg->data, smmu->base + cfg[1]); 2207 + writel_relaxed(MSI_CFG2_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]); 2208 + } 2209 + 2210 + static void arm_smmu_setup_msis(struct arm_smmu_device *smmu) 2211 + { 2212 + struct msi_desc *desc; 2213 + int ret, nvec = ARM_SMMU_MAX_MSIS; 2214 + struct device *dev = smmu->dev; 2215 + 2216 + /* Clear the MSI address regs */ 2217 + writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0); 2218 + writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0); 2219 + 2220 + if (smmu->features & ARM_SMMU_FEAT_PRI) 2221 + writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0); 2222 + else 2223 + nvec--; 2224 + 2225 + if (!(smmu->features & ARM_SMMU_FEAT_MSI)) 2226 + return; 2227 + 2228 + /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */ 2229 + ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg); 2230 + if (ret) { 2231 + dev_warn(dev, "failed to allocate MSIs\n"); 2232 + return; 2233 + } 2234 + 2235 + for_each_msi_entry(desc, dev) { 2236 + switch (desc->platform.msi_index) { 2237 + case EVTQ_MSI_INDEX: 2238 + smmu->evtq.q.irq = desc->irq; 2239 + break; 2240 + case GERROR_MSI_INDEX: 2241 + smmu->gerr_irq = desc->irq; 2242 + break; 2243 + case PRIQ_MSI_INDEX: 2244 + smmu->priq.q.irq = desc->irq; 2245 + break; 2246 + default: /* Unknown */ 2247 + continue; 2248 + } 2249 + } 2250 + 2251 + /* Add callback to free MSIs on teardown */ 2252 + devm_add_action(dev, arm_smmu_free_msis, dev); 2253 + } 2254 + 2210 2255 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) 2211 2256 { 2212 2257 int ret, irq; ··· 2286 2199 return ret; 2287 2200 } 2288 2201 2289 - /* Clear the MSI address regs */ 2290 - writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0); 2291 - writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0); 2202 + arm_smmu_setup_msis(smmu); 2292 2203 2293 - /* Request wired interrupt lines */ 2204 + /* Request interrupt lines */ 2294 2205 irq = smmu->evtq.q.irq; 2295 2206 if (irq) { 2296 2207 ret = devm_request_threaded_irq(smmu->dev, irq, ··· 2317 2232 } 2318 2233 2319 2234 if (smmu->features & ARM_SMMU_FEAT_PRI) { 2320 - writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0); 2321 - 2322 2235 irq = smmu->priq.q.irq; 2323 2236 if (irq) { 2324 2237 ret = devm_request_threaded_irq(smmu->dev, irq, ··· 2695 2612 if (ret) 2696 2613 return ret; 2697 2614 2615 + /* Record our private device structure */ 2616 + platform_set_drvdata(pdev, smmu); 2617 + 2698 2618 /* Reset the device */ 2699 2619 ret = arm_smmu_device_reset(smmu); 2700 2620 if (ret) 2701 2621 goto out_free_structures; 2702 2622 2703 - /* Record our private device structure */ 2704 - INIT_LIST_HEAD(&smmu->list); 2705 - spin_lock(&arm_smmu_devices_lock); 2706 - list_add(&smmu->list, &arm_smmu_devices); 2707 - spin_unlock(&arm_smmu_devices_lock); 2708 2623 return 0; 2709 2624 2710 2625 out_free_structures: ··· 2712 2631 2713 2632 static int arm_smmu_device_remove(struct platform_device *pdev) 2714 2633 { 2715 - struct arm_smmu_device *curr, *smmu = NULL; 2716 - struct device *dev = &pdev->dev; 2717 - 2718 - spin_lock(&arm_smmu_devices_lock); 2719 - list_for_each_entry(curr, &arm_smmu_devices, list) { 2720 - if (curr->dev == dev) { 2721 - smmu = curr; 2722 - list_del(&smmu->list); 2723 - break; 2724 - } 2725 - } 2726 - spin_unlock(&arm_smmu_devices_lock); 2727 - 2728 - if (!smmu) 2729 - return -ENODEV; 2634 + struct arm_smmu_device *smmu = platform_get_drvdata(pdev); 2730 2635 2731 2636 arm_smmu_device_disable(smmu); 2732 2637 arm_smmu_free_structures(smmu);

+74 -56

drivers/iommu/arm-smmu.c

··· 70 70 ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS) \ 71 71 ? 0x400 : 0)) 72 72 73 + #ifdef CONFIG_64BIT 74 + #define smmu_writeq writeq_relaxed 75 + #else 76 + #define smmu_writeq(reg64, addr) \ 77 + do { \ 78 + u64 __val = (reg64); \ 79 + void __iomem *__addr = (addr); \ 80 + writel_relaxed(__val >> 32, __addr + 4); \ 81 + writel_relaxed(__val, __addr); \ 82 + } while (0) 83 + #endif 84 + 73 85 /* Configuration registers */ 74 86 #define ARM_SMMU_GR0_sCR0 0x0 75 87 #define sCR0_CLIENTPD (1 << 0) ··· 197 185 #define ARM_SMMU_CB_SCTLR 0x0 198 186 #define ARM_SMMU_CB_RESUME 0x8 199 187 #define ARM_SMMU_CB_TTBCR2 0x10 200 - #define ARM_SMMU_CB_TTBR0_LO 0x20 201 - #define ARM_SMMU_CB_TTBR0_HI 0x24 202 - #define ARM_SMMU_CB_TTBR1_LO 0x28 203 - #define ARM_SMMU_CB_TTBR1_HI 0x2c 188 + #define ARM_SMMU_CB_TTBR0 0x20 189 + #define ARM_SMMU_CB_TTBR1 0x28 204 190 #define ARM_SMMU_CB_TTBCR 0x30 205 191 #define ARM_SMMU_CB_S1_MAIR0 0x38 206 192 #define ARM_SMMU_CB_S1_MAIR1 0x3c ··· 236 226 #define TTBCR2_SEP_SHIFT 15 237 227 #define TTBCR2_SEP_UPSTREAM (0x7 << TTBCR2_SEP_SHIFT) 238 228 239 - #define TTBRn_HI_ASID_SHIFT 16 229 + #define TTBRn_ASID_SHIFT 48 240 230 241 231 #define FSR_MULTI (1 << 31) 242 232 #define FSR_SS (1 << 30) ··· 705 695 struct io_pgtable_cfg *pgtbl_cfg) 706 696 { 707 697 u32 reg; 698 + u64 reg64; 708 699 bool stage1; 709 700 struct arm_smmu_cfg *cfg = &smmu_domain->cfg; 710 701 struct arm_smmu_device *smmu = smmu_domain->smmu; 711 - void __iomem *cb_base, *gr0_base, *gr1_base; 702 + void __iomem *cb_base, *gr1_base; 712 703 713 - gr0_base = ARM_SMMU_GR0(smmu); 714 704 gr1_base = ARM_SMMU_GR1(smmu); 715 705 stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS; 716 706 cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); ··· 748 738 749 739 /* TTBRs */ 750 740 if (stage1) { 751 - reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0]; 752 - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO); 753 - reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0] >> 32; 754 - reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT; 755 - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI); 741 + reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0]; 756 742 757 - reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1]; 758 - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_LO); 759 - reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1] >> 32; 760 - reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT; 761 - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_HI); 743 + reg64 |= ((u64)ARM_SMMU_CB_ASID(cfg)) << TTBRn_ASID_SHIFT; 744 + smmu_writeq(reg64, cb_base + ARM_SMMU_CB_TTBR0); 745 + 746 + reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1]; 747 + reg64 |= ((u64)ARM_SMMU_CB_ASID(cfg)) << TTBRn_ASID_SHIFT; 748 + smmu_writeq(reg64, cb_base + ARM_SMMU_CB_TTBR1); 762 749 } else { 763 - reg = pgtbl_cfg->arm_lpae_s2_cfg.vttbr; 764 - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO); 765 - reg = pgtbl_cfg->arm_lpae_s2_cfg.vttbr >> 32; 766 - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI); 750 + reg64 = pgtbl_cfg->arm_lpae_s2_cfg.vttbr; 751 + smmu_writeq(reg64, cb_base + ARM_SMMU_CB_TTBR0); 767 752 } 768 753 769 754 /* TTBCR */ ··· 1217 1212 1218 1213 /* ATS1 registers can only be written atomically */ 1219 1214 va = iova & ~0xfffUL; 1220 - #ifdef CONFIG_64BIT 1221 1215 if (smmu->version == ARM_SMMU_V2) 1222 - writeq_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR); 1216 + smmu_writeq(va, cb_base + ARM_SMMU_CB_ATS1PR); 1223 1217 else 1224 - #endif 1225 1218 writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR); 1226 1219 1227 1220 if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp, 1228 1221 !(tmp & ATSR_ACTIVE), 5, 50)) { 1229 1222 dev_err(dev, 1230 - "iova to phys timed out on 0x%pad. Falling back to software table walk.\n", 1223 + "iova to phys timed out on %pad. Falling back to software table walk.\n", 1231 1224 &iova); 1232 1225 return ops->iova_to_phys(ops, iova); 1233 1226 } ··· 1295 1292 kfree(data); 1296 1293 } 1297 1294 1298 - static int arm_smmu_add_pci_device(struct pci_dev *pdev) 1295 + static int arm_smmu_init_pci_device(struct pci_dev *pdev, 1296 + struct iommu_group *group) 1299 1297 { 1300 - int i, ret; 1301 - u16 sid; 1302 - struct iommu_group *group; 1303 1298 struct arm_smmu_master_cfg *cfg; 1304 - 1305 - group = iommu_group_get_for_dev(&pdev->dev); 1306 - if (IS_ERR(group)) 1307 - return PTR_ERR(group); 1299 + u16 sid; 1300 + int i; 1308 1301 1309 1302 cfg = iommu_group_get_iommudata(group); 1310 1303 if (!cfg) { 1311 1304 cfg = kzalloc(sizeof(*cfg), GFP_KERNEL); 1312 - if (!cfg) { 1313 - ret = -ENOMEM; 1314 - goto out_put_group; 1315 - } 1305 + if (!cfg) 1306 + return -ENOMEM; 1316 1307 1317 1308 iommu_group_set_iommudata(group, cfg, 1318 1309 __arm_smmu_release_pci_iommudata); 1319 1310 } 1320 1311 1321 - if (cfg->num_streamids >= MAX_MASTER_STREAMIDS) { 1322 - ret = -ENOSPC; 1323 - goto out_put_group; 1324 - } 1312 + if (cfg->num_streamids >= MAX_MASTER_STREAMIDS) 1313 + return -ENOSPC; 1325 1314 1326 1315 /* 1327 1316 * Assume Stream ID == Requester ID for now. ··· 1329 1334 cfg->streamids[cfg->num_streamids++] = sid; 1330 1335 1331 1336 return 0; 1332 - out_put_group: 1333 - iommu_group_put(group); 1334 - return ret; 1335 1337 } 1336 1338 1337 - static int arm_smmu_add_platform_device(struct device *dev) 1339 + static int arm_smmu_init_platform_device(struct device *dev, 1340 + struct iommu_group *group) 1338 1341 { 1339 - struct iommu_group *group; 1340 - struct arm_smmu_master *master; 1341 1342 struct arm_smmu_device *smmu = find_smmu_for_device(dev); 1343 + struct arm_smmu_master *master; 1342 1344 1343 1345 if (!smmu) 1344 1346 return -ENODEV; ··· 1344 1352 if (!master) 1345 1353 return -ENODEV; 1346 1354 1347 - /* No automatic group creation for platform devices */ 1348 - group = iommu_group_alloc(); 1349 - if (IS_ERR(group)) 1350 - return PTR_ERR(group); 1351 - 1352 1355 iommu_group_set_iommudata(group, &master->cfg, NULL); 1353 - return iommu_group_add_device(group, dev); 1356 + 1357 + return 0; 1354 1358 } 1355 1359 1356 1360 static int arm_smmu_add_device(struct device *dev) 1357 1361 { 1358 - if (dev_is_pci(dev)) 1359 - return arm_smmu_add_pci_device(to_pci_dev(dev)); 1362 + struct iommu_group *group; 1360 1363 1361 - return arm_smmu_add_platform_device(dev); 1364 + group = iommu_group_get_for_dev(dev); 1365 + if (IS_ERR(group)) 1366 + return PTR_ERR(group); 1367 + 1368 + return 0; 1362 1369 } 1363 1370 1364 1371 static void arm_smmu_remove_device(struct device *dev) 1365 1372 { 1366 1373 iommu_group_remove_device(dev); 1374 + } 1375 + 1376 + static struct iommu_group *arm_smmu_device_group(struct device *dev) 1377 + { 1378 + struct iommu_group *group; 1379 + int ret; 1380 + 1381 + if (dev_is_pci(dev)) 1382 + group = pci_device_group(dev); 1383 + else 1384 + group = generic_device_group(dev); 1385 + 1386 + if (IS_ERR(group)) 1387 + return group; 1388 + 1389 + if (dev_is_pci(dev)) 1390 + ret = arm_smmu_init_pci_device(to_pci_dev(dev), group); 1391 + else 1392 + ret = arm_smmu_init_platform_device(dev, group); 1393 + 1394 + if (ret) { 1395 + iommu_group_put(group); 1396 + group = ERR_PTR(ret); 1397 + } 1398 + 1399 + return group; 1367 1400 } 1368 1401 1369 1402 static int arm_smmu_domain_get_attr(struct iommu_domain *domain, ··· 1447 1430 .iova_to_phys = arm_smmu_iova_to_phys, 1448 1431 .add_device = arm_smmu_add_device, 1449 1432 .remove_device = arm_smmu_remove_device, 1433 + .device_group = arm_smmu_device_group, 1450 1434 .domain_get_attr = arm_smmu_domain_get_attr, 1451 1435 .domain_set_attr = arm_smmu_domain_set_attr, 1452 1436 .pgsize_bitmap = -1UL, /* Restricted during device attach */

+524

drivers/iommu/dma-iommu.c

··· 1 + /* 2 + * A fairly generic DMA-API to IOMMU-API glue layer. 3 + * 4 + * Copyright (C) 2014-2015 ARM Ltd. 5 + * 6 + * based in part on arch/arm/mm/dma-mapping.c: 7 + * Copyright (C) 2000-2004 Russell King 8 + * 9 + * This program is free software; you can redistribute it and/or modify 10 + * it under the terms of the GNU General Public License version 2 as 11 + * published by the Free Software Foundation. 12 + * 13 + * This program is distributed in the hope that it will be useful, 14 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 + * GNU General Public License for more details. 17 + * 18 + * You should have received a copy of the GNU General Public License 19 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 20 + */ 21 + 22 + #include <linux/device.h> 23 + #include <linux/dma-iommu.h> 24 + #include <linux/huge_mm.h> 25 + #include <linux/iommu.h> 26 + #include <linux/iova.h> 27 + #include <linux/mm.h> 28 + 29 + int iommu_dma_init(void) 30 + { 31 + return iova_cache_get(); 32 + } 33 + 34 + /** 35 + * iommu_get_dma_cookie - Acquire DMA-API resources for a domain 36 + * @domain: IOMMU domain to prepare for DMA-API usage 37 + * 38 + * IOMMU drivers should normally call this from their domain_alloc 39 + * callback when domain->type == IOMMU_DOMAIN_DMA. 40 + */ 41 + int iommu_get_dma_cookie(struct iommu_domain *domain) 42 + { 43 + struct iova_domain *iovad; 44 + 45 + if (domain->iova_cookie) 46 + return -EEXIST; 47 + 48 + iovad = kzalloc(sizeof(*iovad), GFP_KERNEL); 49 + domain->iova_cookie = iovad; 50 + 51 + return iovad ? 0 : -ENOMEM; 52 + } 53 + EXPORT_SYMBOL(iommu_get_dma_cookie); 54 + 55 + /** 56 + * iommu_put_dma_cookie - Release a domain's DMA mapping resources 57 + * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() 58 + * 59 + * IOMMU drivers should normally call this from their domain_free callback. 60 + */ 61 + void iommu_put_dma_cookie(struct iommu_domain *domain) 62 + { 63 + struct iova_domain *iovad = domain->iova_cookie; 64 + 65 + if (!iovad) 66 + return; 67 + 68 + put_iova_domain(iovad); 69 + kfree(iovad); 70 + domain->iova_cookie = NULL; 71 + } 72 + EXPORT_SYMBOL(iommu_put_dma_cookie); 73 + 74 + /** 75 + * iommu_dma_init_domain - Initialise a DMA mapping domain 76 + * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() 77 + * @base: IOVA at which the mappable address space starts 78 + * @size: Size of IOVA space 79 + * 80 + * @base and @size should be exact multiples of IOMMU page granularity to 81 + * avoid rounding surprises. If necessary, we reserve the page at address 0 82 + * to ensure it is an invalid IOVA. It is safe to reinitialise a domain, but 83 + * any change which could make prior IOVAs invalid will fail. 84 + */ 85 + int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, u64 size) 86 + { 87 + struct iova_domain *iovad = domain->iova_cookie; 88 + unsigned long order, base_pfn, end_pfn; 89 + 90 + if (!iovad) 91 + return -ENODEV; 92 + 93 + /* Use the smallest supported page size for IOVA granularity */ 94 + order = __ffs(domain->ops->pgsize_bitmap); 95 + base_pfn = max_t(unsigned long, 1, base >> order); 96 + end_pfn = (base + size - 1) >> order; 97 + 98 + /* Check the domain allows at least some access to the device... */ 99 + if (domain->geometry.force_aperture) { 100 + if (base > domain->geometry.aperture_end || 101 + base + size <= domain->geometry.aperture_start) { 102 + pr_warn("specified DMA range outside IOMMU capability\n"); 103 + return -EFAULT; 104 + } 105 + /* ...then finally give it a kicking to make sure it fits */ 106 + base_pfn = max_t(unsigned long, base_pfn, 107 + domain->geometry.aperture_start >> order); 108 + end_pfn = min_t(unsigned long, end_pfn, 109 + domain->geometry.aperture_end >> order); 110 + } 111 + 112 + /* All we can safely do with an existing domain is enlarge it */ 113 + if (iovad->start_pfn) { 114 + if (1UL << order != iovad->granule || 115 + base_pfn != iovad->start_pfn || 116 + end_pfn < iovad->dma_32bit_pfn) { 117 + pr_warn("Incompatible range for DMA domain\n"); 118 + return -EFAULT; 119 + } 120 + iovad->dma_32bit_pfn = end_pfn; 121 + } else { 122 + init_iova_domain(iovad, 1UL << order, base_pfn, end_pfn); 123 + } 124 + return 0; 125 + } 126 + EXPORT_SYMBOL(iommu_dma_init_domain); 127 + 128 + /** 129 + * dma_direction_to_prot - Translate DMA API directions to IOMMU API page flags 130 + * @dir: Direction of DMA transfer 131 + * @coherent: Is the DMA master cache-coherent? 132 + * 133 + * Return: corresponding IOMMU API page protection flags 134 + */ 135 + int dma_direction_to_prot(enum dma_data_direction dir, bool coherent) 136 + { 137 + int prot = coherent ? IOMMU_CACHE : 0; 138 + 139 + switch (dir) { 140 + case DMA_BIDIRECTIONAL: 141 + return prot | IOMMU_READ | IOMMU_WRITE; 142 + case DMA_TO_DEVICE: 143 + return prot | IOMMU_READ; 144 + case DMA_FROM_DEVICE: 145 + return prot | IOMMU_WRITE; 146 + default: 147 + return 0; 148 + } 149 + } 150 + 151 + static struct iova *__alloc_iova(struct iova_domain *iovad, size_t size, 152 + dma_addr_t dma_limit) 153 + { 154 + unsigned long shift = iova_shift(iovad); 155 + unsigned long length = iova_align(iovad, size) >> shift; 156 + 157 + /* 158 + * Enforce size-alignment to be safe - there could perhaps be an 159 + * attribute to control this per-device, or at least per-domain... 160 + */ 161 + return alloc_iova(iovad, length, dma_limit >> shift, true); 162 + } 163 + 164 + /* The IOVA allocator knows what we mapped, so just unmap whatever that was */ 165 + static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr) 166 + { 167 + struct iova_domain *iovad = domain->iova_cookie; 168 + unsigned long shift = iova_shift(iovad); 169 + unsigned long pfn = dma_addr >> shift; 170 + struct iova *iova = find_iova(iovad, pfn); 171 + size_t size; 172 + 173 + if (WARN_ON(!iova)) 174 + return; 175 + 176 + size = iova_size(iova) << shift; 177 + size -= iommu_unmap(domain, pfn << shift, size); 178 + /* ...and if we can't, then something is horribly, horribly wrong */ 179 + WARN_ON(size > 0); 180 + __free_iova(iovad, iova); 181 + } 182 + 183 + static void __iommu_dma_free_pages(struct page **pages, int count) 184 + { 185 + while (count--) 186 + __free_page(pages[count]); 187 + kvfree(pages); 188 + } 189 + 190 + static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp) 191 + { 192 + struct page **pages; 193 + unsigned int i = 0, array_size = count * sizeof(*pages); 194 + 195 + if (array_size <= PAGE_SIZE) 196 + pages = kzalloc(array_size, GFP_KERNEL); 197 + else 198 + pages = vzalloc(array_size); 199 + if (!pages) 200 + return NULL; 201 + 202 + /* IOMMU can map any pages, so himem can also be used here */ 203 + gfp |= __GFP_NOWARN | __GFP_HIGHMEM; 204 + 205 + while (count) { 206 + struct page *page = NULL; 207 + int j, order = __fls(count); 208 + 209 + /* 210 + * Higher-order allocations are a convenience rather 211 + * than a necessity, hence using __GFP_NORETRY until 212 + * falling back to single-page allocations. 213 + */ 214 + for (order = min(order, MAX_ORDER); order > 0; order--) { 215 + page = alloc_pages(gfp | __GFP_NORETRY, order); 216 + if (!page) 217 + continue; 218 + if (PageCompound(page)) { 219 + if (!split_huge_page(page)) 220 + break; 221 + __free_pages(page, order); 222 + } else { 223 + split_page(page, order); 224 + break; 225 + } 226 + } 227 + if (!page) 228 + page = alloc_page(gfp); 229 + if (!page) { 230 + __iommu_dma_free_pages(pages, i); 231 + return NULL; 232 + } 233 + j = 1 << order; 234 + count -= j; 235 + while (j--) 236 + pages[i++] = page++; 237 + } 238 + return pages; 239 + } 240 + 241 + /** 242 + * iommu_dma_free - Free a buffer allocated by iommu_dma_alloc() 243 + * @dev: Device which owns this buffer 244 + * @pages: Array of buffer pages as returned by iommu_dma_alloc() 245 + * @size: Size of buffer in bytes 246 + * @handle: DMA address of buffer 247 + * 248 + * Frees both the pages associated with the buffer, and the array 249 + * describing them 250 + */ 251 + void iommu_dma_free(struct device *dev, struct page **pages, size_t size, 252 + dma_addr_t *handle) 253 + { 254 + __iommu_dma_unmap(iommu_get_domain_for_dev(dev), *handle); 255 + __iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT); 256 + *handle = DMA_ERROR_CODE; 257 + } 258 + 259 + /** 260 + * iommu_dma_alloc - Allocate and map a buffer contiguous in IOVA space 261 + * @dev: Device to allocate memory for. Must be a real device 262 + * attached to an iommu_dma_domain 263 + * @size: Size of buffer in bytes 264 + * @gfp: Allocation flags 265 + * @prot: IOMMU mapping flags 266 + * @handle: Out argument for allocated DMA handle 267 + * @flush_page: Arch callback which must ensure PAGE_SIZE bytes from the 268 + * given VA/PA are visible to the given non-coherent device. 269 + * 270 + * If @size is less than PAGE_SIZE, then a full CPU page will be allocated, 271 + * but an IOMMU which supports smaller pages might not map the whole thing. 272 + * 273 + * Return: Array of struct page pointers describing the buffer, 274 + * or NULL on failure. 275 + */ 276 + struct page **iommu_dma_alloc(struct device *dev, size_t size, 277 + gfp_t gfp, int prot, dma_addr_t *handle, 278 + void (*flush_page)(struct device *, const void *, phys_addr_t)) 279 + { 280 + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); 281 + struct iova_domain *iovad = domain->iova_cookie; 282 + struct iova *iova; 283 + struct page **pages; 284 + struct sg_table sgt; 285 + dma_addr_t dma_addr; 286 + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; 287 + 288 + *handle = DMA_ERROR_CODE; 289 + 290 + pages = __iommu_dma_alloc_pages(count, gfp); 291 + if (!pages) 292 + return NULL; 293 + 294 + iova = __alloc_iova(iovad, size, dev->coherent_dma_mask); 295 + if (!iova) 296 + goto out_free_pages; 297 + 298 + size = iova_align(iovad, size); 299 + if (sg_alloc_table_from_pages(&sgt, pages, count, 0, size, GFP_KERNEL)) 300 + goto out_free_iova; 301 + 302 + if (!(prot & IOMMU_CACHE)) { 303 + struct sg_mapping_iter miter; 304 + /* 305 + * The CPU-centric flushing implied by SG_MITER_TO_SG isn't 306 + * sufficient here, so skip it by using the "wrong" direction. 307 + */ 308 + sg_miter_start(&miter, sgt.sgl, sgt.orig_nents, SG_MITER_FROM_SG); 309 + while (sg_miter_next(&miter)) 310 + flush_page(dev, miter.addr, page_to_phys(miter.page)); 311 + sg_miter_stop(&miter); 312 + } 313 + 314 + dma_addr = iova_dma_addr(iovad, iova); 315 + if (iommu_map_sg(domain, dma_addr, sgt.sgl, sgt.orig_nents, prot) 316 + < size) 317 + goto out_free_sg; 318 + 319 + *handle = dma_addr; 320 + sg_free_table(&sgt); 321 + return pages; 322 + 323 + out_free_sg: 324 + sg_free_table(&sgt); 325 + out_free_iova: 326 + __free_iova(iovad, iova); 327 + out_free_pages: 328 + __iommu_dma_free_pages(pages, count); 329 + return NULL; 330 + } 331 + 332 + /** 333 + * iommu_dma_mmap - Map a buffer into provided user VMA 334 + * @pages: Array representing buffer from iommu_dma_alloc() 335 + * @size: Size of buffer in bytes 336 + * @vma: VMA describing requested userspace mapping 337 + * 338 + * Maps the pages of the buffer in @pages into @vma. The caller is responsible 339 + * for verifying the correct size and protection of @vma beforehand. 340 + */ 341 + 342 + int iommu_dma_mmap(struct page **pages, size_t size, struct vm_area_struct *vma) 343 + { 344 + unsigned long uaddr = vma->vm_start; 345 + unsigned int i, count = PAGE_ALIGN(size) >> PAGE_SHIFT; 346 + int ret = -ENXIO; 347 + 348 + for (i = vma->vm_pgoff; i < count && uaddr < vma->vm_end; i++) { 349 + ret = vm_insert_page(vma, uaddr, pages[i]); 350 + if (ret) 351 + break; 352 + uaddr += PAGE_SIZE; 353 + } 354 + return ret; 355 + } 356 + 357 + dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, 358 + unsigned long offset, size_t size, int prot) 359 + { 360 + dma_addr_t dma_addr; 361 + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); 362 + struct iova_domain *iovad = domain->iova_cookie; 363 + phys_addr_t phys = page_to_phys(page) + offset; 364 + size_t iova_off = iova_offset(iovad, phys); 365 + size_t len = iova_align(iovad, size + iova_off); 366 + struct iova *iova = __alloc_iova(iovad, len, dma_get_mask(dev)); 367 + 368 + if (!iova) 369 + return DMA_ERROR_CODE; 370 + 371 + dma_addr = iova_dma_addr(iovad, iova); 372 + if (iommu_map(domain, dma_addr, phys - iova_off, len, prot)) { 373 + __free_iova(iovad, iova); 374 + return DMA_ERROR_CODE; 375 + } 376 + return dma_addr + iova_off; 377 + } 378 + 379 + void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size, 380 + enum dma_data_direction dir, struct dma_attrs *attrs) 381 + { 382 + __iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle); 383 + } 384 + 385 + /* 386 + * Prepare a successfully-mapped scatterlist to give back to the caller. 387 + * Handling IOVA concatenation can come later, if needed 388 + */ 389 + static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents, 390 + dma_addr_t dma_addr) 391 + { 392 + struct scatterlist *s; 393 + int i; 394 + 395 + for_each_sg(sg, s, nents, i) { 396 + /* Un-swizzling the fields here, hence the naming mismatch */ 397 + unsigned int s_offset = sg_dma_address(s); 398 + unsigned int s_length = sg_dma_len(s); 399 + unsigned int s_dma_len = s->length; 400 + 401 + s->offset = s_offset; 402 + s->length = s_length; 403 + sg_dma_address(s) = dma_addr + s_offset; 404 + dma_addr += s_dma_len; 405 + } 406 + return i; 407 + } 408 + 409 + /* 410 + * If mapping failed, then just restore the original list, 411 + * but making sure the DMA fields are invalidated. 412 + */ 413 + static void __invalidate_sg(struct scatterlist *sg, int nents) 414 + { 415 + struct scatterlist *s; 416 + int i; 417 + 418 + for_each_sg(sg, s, nents, i) { 419 + if (sg_dma_address(s) != DMA_ERROR_CODE) 420 + s->offset = sg_dma_address(s); 421 + if (sg_dma_len(s)) 422 + s->length = sg_dma_len(s); 423 + sg_dma_address(s) = DMA_ERROR_CODE; 424 + sg_dma_len(s) = 0; 425 + } 426 + } 427 + 428 + /* 429 + * The DMA API client is passing in a scatterlist which could describe 430 + * any old buffer layout, but the IOMMU API requires everything to be 431 + * aligned to IOMMU pages. Hence the need for this complicated bit of 432 + * impedance-matching, to be able to hand off a suitably-aligned list, 433 + * but still preserve the original offsets and sizes for the caller. 434 + */ 435 + int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, 436 + int nents, int prot) 437 + { 438 + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); 439 + struct iova_domain *iovad = domain->iova_cookie; 440 + struct iova *iova; 441 + struct scatterlist *s, *prev = NULL; 442 + dma_addr_t dma_addr; 443 + size_t iova_len = 0; 444 + int i; 445 + 446 + /* 447 + * Work out how much IOVA space we need, and align the segments to 448 + * IOVA granules for the IOMMU driver to handle. With some clever 449 + * trickery we can modify the list in-place, but reversibly, by 450 + * hiding the original data in the as-yet-unused DMA fields. 451 + */ 452 + for_each_sg(sg, s, nents, i) { 453 + size_t s_offset = iova_offset(iovad, s->offset); 454 + size_t s_length = s->length; 455 + 456 + sg_dma_address(s) = s->offset; 457 + sg_dma_len(s) = s_length; 458 + s->offset -= s_offset; 459 + s_length = iova_align(iovad, s_length + s_offset); 460 + s->length = s_length; 461 + 462 + /* 463 + * The simple way to avoid the rare case of a segment 464 + * crossing the boundary mask is to pad the previous one 465 + * to end at a naturally-aligned IOVA for this one's size, 466 + * at the cost of potentially over-allocating a little. 467 + */ 468 + if (prev) { 469 + size_t pad_len = roundup_pow_of_two(s_length); 470 + 471 + pad_len = (pad_len - iova_len) & (pad_len - 1); 472 + prev->length += pad_len; 473 + iova_len += pad_len; 474 + } 475 + 476 + iova_len += s_length; 477 + prev = s; 478 + } 479 + 480 + iova = __alloc_iova(iovad, iova_len, dma_get_mask(dev)); 481 + if (!iova) 482 + goto out_restore_sg; 483 + 484 + /* 485 + * We'll leave any physical concatenation to the IOMMU driver's 486 + * implementation - it knows better than we do. 487 + */ 488 + dma_addr = iova_dma_addr(iovad, iova); 489 + if (iommu_map_sg(domain, dma_addr, sg, nents, prot) < iova_len) 490 + goto out_free_iova; 491 + 492 + return __finalise_sg(dev, sg, nents, dma_addr); 493 + 494 + out_free_iova: 495 + __free_iova(iovad, iova); 496 + out_restore_sg: 497 + __invalidate_sg(sg, nents); 498 + return 0; 499 + } 500 + 501 + void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, 502 + enum dma_data_direction dir, struct dma_attrs *attrs) 503 + { 504 + /* 505 + * The scatterlist segments are mapped into a single 506 + * contiguous IOVA allocation, so this is incredibly easy. 507 + */ 508 + __iommu_dma_unmap(iommu_get_domain_for_dev(dev), sg_dma_address(sg)); 509 + } 510 + 511 + int iommu_dma_supported(struct device *dev, u64 mask) 512 + { 513 + /* 514 + * 'Special' IOMMUs which don't have the same addressing capability 515 + * as the CPU will have to wait until we have some way to query that 516 + * before they'll be able to use this framework. 517 + */ 518 + return 1; 519 + } 520 + 521 + int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) 522 + { 523 + return dma_addr == DMA_ERROR_CODE; 524 + }

+16 -25

drivers/iommu/fsl_pamu_domain.c

··· 923 923 pci_endpt_partioning = check_pci_ctl_endpt_part(pci_ctl); 924 924 /* We can partition PCIe devices so assign device group to the device */ 925 925 if (pci_endpt_partioning) { 926 - group = iommu_group_get_for_dev(&pdev->dev); 926 + group = pci_device_group(&pdev->dev); 927 927 928 928 /* 929 929 * PCIe controller is not a paritionable entity ··· 956 956 return group; 957 957 } 958 958 959 - static int fsl_pamu_add_device(struct device *dev) 959 + static struct iommu_group *fsl_pamu_device_group(struct device *dev) 960 960 { 961 961 struct iommu_group *group = ERR_PTR(-ENODEV); 962 - struct pci_dev *pdev; 963 - const u32 *prop; 964 - int ret = 0, len; 962 + int len; 965 963 966 964 /* 967 965 * For platform devices we allocate a separate group for 968 966 * each of the devices. 969 967 */ 970 - if (dev_is_pci(dev)) { 971 - pdev = to_pci_dev(dev); 972 - /* Don't create device groups for virtual PCI bridges */ 973 - if (pdev->subordinate) 974 - return 0; 968 + if (dev_is_pci(dev)) 969 + group = get_pci_device_group(to_pci_dev(dev)); 970 + else if (of_get_property(dev->of_node, "fsl,liodn", &len)) 971 + group = get_device_iommu_group(dev); 975 972 976 - group = get_pci_device_group(pdev); 973 + return group; 974 + } 977 975 978 - } else { 979 - prop = of_get_property(dev->of_node, "fsl,liodn", &len); 980 - if (prop) 981 - group = get_device_iommu_group(dev); 982 - } 976 + static int fsl_pamu_add_device(struct device *dev) 977 + { 978 + struct iommu_group *group; 983 979 980 + group = iommu_group_get_for_dev(dev); 984 981 if (IS_ERR(group)) 985 982 return PTR_ERR(group); 986 983 987 - /* 988 - * Check if device has already been added to an iommu group. 989 - * Group could have already been created for a PCI device in 990 - * the iommu_group_get_for_dev path. 991 - */ 992 - if (!dev->iommu_group) 993 - ret = iommu_group_add_device(group, dev); 994 - 995 984 iommu_group_put(group); 996 - return ret; 985 + 986 + return 0; 997 987 } 998 988 999 989 static void fsl_pamu_remove_device(struct device *dev) ··· 1062 1072 .domain_get_attr = fsl_pamu_get_domain_attr, 1063 1073 .add_device = fsl_pamu_add_device, 1064 1074 .remove_device = fsl_pamu_remove_device, 1075 + .device_group = fsl_pamu_device_group, 1065 1076 }; 1066 1077 1067 1078 int __init pamu_domain_init(void)

+54 -29

drivers/iommu/intel-iommu.c

··· 34 34 #include <linux/mempool.h> 35 35 #include <linux/memory.h> 36 36 #include <linux/timer.h> 37 + #include <linux/io.h> 37 38 #include <linux/iova.h> 38 39 #include <linux/iommu.h> 39 40 #include <linux/intel-iommu.h> ··· 2435 2434 DMA_PTE_READ|DMA_PTE_WRITE); 2436 2435 } 2437 2436 2438 - static int iommu_prepare_identity_map(struct device *dev, 2439 - unsigned long long start, 2440 - unsigned long long end) 2437 + static int domain_prepare_identity_map(struct device *dev, 2438 + struct dmar_domain *domain, 2439 + unsigned long long start, 2440 + unsigned long long end) 2441 2441 { 2442 - struct dmar_domain *domain; 2443 - int ret; 2444 - 2445 - domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); 2446 - if (!domain) 2447 - return -ENOMEM; 2448 - 2449 2442 /* For _hardware_ passthrough, don't bother. But for software 2450 2443 passthrough, we do it anyway -- it may indicate a memory 2451 2444 range which is reserved in E820, so which didn't get set ··· 2459 2464 dmi_get_system_info(DMI_BIOS_VENDOR), 2460 2465 dmi_get_system_info(DMI_BIOS_VERSION), 2461 2466 dmi_get_system_info(DMI_PRODUCT_VERSION)); 2462 - ret = -EIO; 2463 - goto error; 2467 + return -EIO; 2464 2468 } 2465 2469 2466 2470 if (end >> agaw_to_width(domain->agaw)) { ··· 2469 2475 dmi_get_system_info(DMI_BIOS_VENDOR), 2470 2476 dmi_get_system_info(DMI_BIOS_VERSION), 2471 2477 dmi_get_system_info(DMI_PRODUCT_VERSION)); 2472 - ret = -EIO; 2473 - goto error; 2478 + return -EIO; 2474 2479 } 2475 2480 2476 - ret = iommu_domain_identity_map(domain, start, end); 2481 + return iommu_domain_identity_map(domain, start, end); 2482 + } 2483 + 2484 + static int iommu_prepare_identity_map(struct device *dev, 2485 + unsigned long long start, 2486 + unsigned long long end) 2487 + { 2488 + struct dmar_domain *domain; 2489 + int ret; 2490 + 2491 + domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); 2492 + if (!domain) 2493 + return -ENOMEM; 2494 + 2495 + ret = domain_prepare_identity_map(dev, domain, start, end); 2477 2496 if (ret) 2478 - goto error; 2497 + domain_exit(domain); 2479 2498 2480 - return 0; 2481 - 2482 - error: 2483 - domain_exit(domain); 2484 2499 return ret; 2485 2500 } 2486 2501 ··· 2815 2812 } 2816 2813 2817 2814 static int copy_context_table(struct intel_iommu *iommu, 2818 - struct root_entry __iomem *old_re, 2815 + struct root_entry *old_re, 2819 2816 struct context_entry **tbl, 2820 2817 int bus, bool ext) 2821 2818 { 2822 2819 int tbl_idx, pos = 0, idx, devfn, ret = 0, did; 2823 - struct context_entry __iomem *old_ce = NULL; 2824 2820 struct context_entry *new_ce = NULL, ce; 2821 + struct context_entry *old_ce = NULL; 2825 2822 struct root_entry re; 2826 2823 phys_addr_t old_ce_phys; 2827 2824 2828 2825 tbl_idx = ext ? bus * 2 : bus; 2829 - memcpy_fromio(&re, old_re, sizeof(re)); 2826 + memcpy(&re, old_re, sizeof(re)); 2830 2827 2831 2828 for (devfn = 0; devfn < 256; devfn++) { 2832 2829 /* First calculate the correct index */ ··· 2861 2858 } 2862 2859 2863 2860 ret = -ENOMEM; 2864 - old_ce = ioremap_cache(old_ce_phys, PAGE_SIZE); 2861 + old_ce = memremap(old_ce_phys, PAGE_SIZE, 2862 + MEMREMAP_WB); 2865 2863 if (!old_ce) 2866 2864 goto out; 2867 2865 ··· 2874 2870 } 2875 2871 2876 2872 /* Now copy the context entry */ 2877 - memcpy_fromio(&ce, old_ce + idx, sizeof(ce)); 2873 + memcpy(&ce, old_ce + idx, sizeof(ce)); 2878 2874 2879 2875 if (!__context_present(&ce)) 2880 2876 continue; ··· 2910 2906 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE); 2911 2907 2912 2908 out_unmap: 2913 - iounmap(old_ce); 2909 + memunmap(old_ce); 2914 2910 2915 2911 out: 2916 2912 return ret; ··· 2918 2914 2919 2915 static int copy_translation_tables(struct intel_iommu *iommu) 2920 2916 { 2921 - struct root_entry __iomem *old_rt; 2922 2917 struct context_entry **ctxt_tbls; 2918 + struct root_entry *old_rt; 2923 2919 phys_addr_t old_rt_phys; 2924 2920 int ctxt_table_entries; 2925 2921 unsigned long flags; ··· 2944 2940 if (!old_rt_phys) 2945 2941 return -EINVAL; 2946 2942 2947 - old_rt = ioremap_cache(old_rt_phys, PAGE_SIZE); 2943 + old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB); 2948 2944 if (!old_rt) 2949 2945 return -ENOMEM; 2950 2946 ··· 2993 2989 ret = 0; 2994 2990 2995 2991 out_unmap: 2996 - iounmap(old_rt); 2992 + memunmap(old_rt); 2997 2993 2998 2994 return ret; 2999 2995 } ··· 3250 3246 3251 3247 static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev) 3252 3248 { 3249 + struct dmar_rmrr_unit *rmrr; 3253 3250 struct dmar_domain *domain; 3251 + struct device *i_dev; 3252 + int i, ret; 3254 3253 3255 3254 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); 3256 3255 if (!domain) { ··· 3261 3254 dev_name(dev)); 3262 3255 return NULL; 3263 3256 } 3257 + 3258 + /* We have a new domain - setup possible RMRRs for the device */ 3259 + rcu_read_lock(); 3260 + for_each_rmrr_units(rmrr) { 3261 + for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, 3262 + i, i_dev) { 3263 + if (i_dev != dev) 3264 + continue; 3265 + 3266 + ret = domain_prepare_identity_map(dev, domain, 3267 + rmrr->base_address, 3268 + rmrr->end_address); 3269 + if (ret) 3270 + dev_err(dev, "Mapping reserved region failed\n"); 3271 + } 3272 + } 3273 + rcu_read_unlock(); 3264 3274 3265 3275 return domain; 3266 3276 } ··· 4901 4877 .iova_to_phys = intel_iommu_iova_to_phys, 4902 4878 .add_device = intel_iommu_add_device, 4903 4879 .remove_device = intel_iommu_remove_device, 4880 + .device_group = pci_device_group, 4904 4881 .pgsize_bitmap = INTEL_IOMMU_PGSIZES, 4905 4882 }; 4906 4883

+48 -16

drivers/iommu/intel_irq_remapping.c

··· 169 169 index = irq_iommu->irte_index + irq_iommu->sub_handle; 170 170 irte = &iommu->ir_table->base[index]; 171 171 172 - set_64bit(&irte->low, irte_modified->low); 173 - set_64bit(&irte->high, irte_modified->high); 172 + #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) 173 + if ((irte->pst == 1) || (irte_modified->pst == 1)) { 174 + bool ret; 175 + 176 + ret = cmpxchg_double(&irte->low, &irte->high, 177 + irte->low, irte->high, 178 + irte_modified->low, irte_modified->high); 179 + /* 180 + * We use cmpxchg16 to atomically update the 128-bit IRTE, 181 + * and it cannot be updated by the hardware or other processors 182 + * behind us, so the return value of cmpxchg16 should be the 183 + * same as the old value. 184 + */ 185 + WARN_ON(!ret); 186 + } else 187 + #endif 188 + { 189 + set_64bit(&irte->low, irte_modified->low); 190 + set_64bit(&irte->high, irte_modified->high); 191 + } 174 192 __iommu_flush_cache(iommu, irte, sizeof(*irte)); 175 193 176 194 rc = qi_flush_iec(iommu, index, 0); ··· 402 384 403 385 static int iommu_load_old_irte(struct intel_iommu *iommu) 404 386 { 405 - struct irte __iomem *old_ir_table; 387 + struct irte *old_ir_table; 406 388 phys_addr_t irt_phys; 407 389 unsigned int i; 408 390 size_t size; ··· 426 408 size = INTR_REMAP_TABLE_ENTRIES*sizeof(struct irte); 427 409 428 410 /* Map the old IR table */ 429 - old_ir_table = ioremap_cache(irt_phys, size); 411 + old_ir_table = memremap(irt_phys, size, MEMREMAP_WB); 430 412 if (!old_ir_table) 431 413 return -ENOMEM; 432 414 433 415 /* Copy data over */ 434 - memcpy_fromio(iommu->ir_table->base, old_ir_table, size); 416 + memcpy(iommu->ir_table->base, old_ir_table, size); 435 417 436 418 __iommu_flush_cache(iommu, iommu->ir_table->base, size); 437 419 ··· 444 426 bitmap_set(iommu->ir_table->bitmap, i, 1); 445 427 } 446 428 447 - iounmap(old_ir_table); 429 + memunmap(old_ir_table); 448 430 449 431 return 0; 450 432 } ··· 690 672 if (!dmar_ir_support()) 691 673 return -ENODEV; 692 674 693 - if (parse_ioapics_under_ir() != 1) { 675 + if (parse_ioapics_under_ir()) { 694 676 pr_info("Not enabling interrupt remapping\n"); 695 677 goto error; 696 678 } ··· 745 727 struct intel_iommu *iommu; 746 728 747 729 if (!disable_irq_post) { 748 - intel_irq_remap_ops.capability |= 1 << IRQ_POSTING_CAP; 730 + /* 731 + * If IRTE is in posted format, the 'pda' field goes across the 732 + * 64-bit boundary, we need use cmpxchg16b to atomically update 733 + * it. We only expose posted-interrupt when X86_FEATURE_CX16 734 + * is supported. Actually, hardware platforms supporting PI 735 + * should have X86_FEATURE_CX16 support, this has been confirmed 736 + * with Intel hardware guys. 737 + */ 738 + if ( cpu_has_cx16 ) 739 + intel_irq_remap_ops.capability |= 1 << IRQ_POSTING_CAP; 749 740 750 741 for_each_iommu(iommu, drhd) 751 742 if (!cap_pi_support(iommu->cap)) { ··· 934 907 bool ir_supported = false; 935 908 int ioapic_idx; 936 909 937 - for_each_iommu(iommu, drhd) 938 - if (ecap_ir_support(iommu->ecap)) { 939 - if (ir_parse_ioapic_hpet_scope(drhd->hdr, iommu)) 940 - return -1; 910 + for_each_iommu(iommu, drhd) { 911 + int ret; 941 912 942 - ir_supported = true; 943 - } 913 + if (!ecap_ir_support(iommu->ecap)) 914 + continue; 915 + 916 + ret = ir_parse_ioapic_hpet_scope(drhd->hdr, iommu); 917 + if (ret) 918 + return ret; 919 + 920 + ir_supported = true; 921 + } 944 922 945 923 if (!ir_supported) 946 - return 0; 924 + return -ENODEV; 947 925 948 926 for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) { 949 927 int ioapic_id = mpc_ioapic_id(ioapic_idx); ··· 960 928 } 961 929 } 962 930 963 - return 1; 931 + return 0; 964 932 } 965 933 966 934 static int __init ir_dev_scope_init(void)

+34 -12

drivers/iommu/iommu.c

··· 728 728 } 729 729 730 730 /* 731 + * Generic device_group call-back function. It just allocates one 732 + * iommu-group per device. 733 + */ 734 + struct iommu_group *generic_device_group(struct device *dev) 735 + { 736 + struct iommu_group *group; 737 + 738 + group = iommu_group_alloc(); 739 + if (IS_ERR(group)) 740 + return NULL; 741 + 742 + return group; 743 + } 744 + 745 + /* 731 746 * Use standard PCI bus topology, isolation features, and DMA alias quirks 732 747 * to find or create an IOMMU group for a device. 733 748 */ 734 - static struct iommu_group *iommu_group_get_for_pci_dev(struct pci_dev *pdev) 749 + struct iommu_group *pci_device_group(struct device *dev) 735 750 { 751 + struct pci_dev *pdev = to_pci_dev(dev); 736 752 struct group_for_pci_data data; 737 753 struct pci_bus *bus; 738 754 struct iommu_group *group = NULL; 739 755 u64 devfns[4] = { 0 }; 756 + 757 + if (WARN_ON(!dev_is_pci(dev))) 758 + return ERR_PTR(-EINVAL); 740 759 741 760 /* 742 761 * Find the upstream DMA alias for the device. A device must not ··· 810 791 if (IS_ERR(group)) 811 792 return NULL; 812 793 813 - /* 814 - * Try to allocate a default domain - needs support from the 815 - * IOMMU driver. 816 - */ 817 - group->default_domain = __iommu_domain_alloc(pdev->dev.bus, 818 - IOMMU_DOMAIN_DMA); 819 - group->domain = group->default_domain; 820 - 821 794 return group; 822 795 } 823 796 ··· 825 814 */ 826 815 struct iommu_group *iommu_group_get_for_dev(struct device *dev) 827 816 { 817 + const struct iommu_ops *ops = dev->bus->iommu_ops; 828 818 struct iommu_group *group; 829 819 int ret; 830 820 ··· 833 821 if (group) 834 822 return group; 835 823 836 - if (!dev_is_pci(dev)) 837 - return ERR_PTR(-EINVAL); 824 + group = ERR_PTR(-EINVAL); 838 825 839 - group = iommu_group_get_for_pci_dev(to_pci_dev(dev)); 826 + if (ops && ops->device_group) 827 + group = ops->device_group(dev); 840 828 841 829 if (IS_ERR(group)) 842 830 return group; 831 + 832 + /* 833 + * Try to allocate a default domain - needs support from the 834 + * IOMMU driver. 835 + */ 836 + if (!group->default_domain) { 837 + group->default_domain = __iommu_domain_alloc(dev->bus, 838 + IOMMU_DOMAIN_DMA); 839 + group->domain = group->default_domain; 840 + } 843 841 844 842 ret = iommu_group_add_device(group, dev); 845 843 if (ret) {

+58

drivers/iommu/omap-iommu.c

··· 26 26 #include <linux/of_iommu.h> 27 27 #include <linux/of_irq.h> 28 28 #include <linux/of_platform.h> 29 + #include <linux/regmap.h> 30 + #include <linux/mfd/syscon.h> 29 31 30 32 #include <asm/cacheflush.h> 31 33 ··· 114 112 } 115 113 EXPORT_SYMBOL_GPL(omap_iommu_restore_ctx); 116 114 115 + static void dra7_cfg_dspsys_mmu(struct omap_iommu *obj, bool enable) 116 + { 117 + u32 val, mask; 118 + 119 + if (!obj->syscfg) 120 + return; 121 + 122 + mask = (1 << (obj->id * DSP_SYS_MMU_CONFIG_EN_SHIFT)); 123 + val = enable ? mask : 0; 124 + regmap_update_bits(obj->syscfg, DSP_SYS_MMU_CONFIG, mask, val); 125 + } 126 + 117 127 static void __iommu_set_twl(struct omap_iommu *obj, bool on) 118 128 { 119 129 u32 l = iommu_read_reg(obj, MMU_CNTL); ··· 161 147 162 148 iommu_write_reg(obj, pa, MMU_TTB); 163 149 150 + dra7_cfg_dspsys_mmu(obj, true); 151 + 164 152 if (obj->has_bus_err_back) 165 153 iommu_write_reg(obj, MMU_GP_REG_BUS_ERR_BACK_EN, MMU_GP_REG); 166 154 ··· 177 161 178 162 l &= ~MMU_CNTL_MASK; 179 163 iommu_write_reg(obj, l, MMU_CNTL); 164 + dra7_cfg_dspsys_mmu(obj, false); 180 165 181 166 dev_dbg(obj->dev, "%s is shutting down\n", obj->name); 182 167 } ··· 881 864 dev_dbg(obj->dev, "%s: %s\n", __func__, obj->name); 882 865 } 883 866 867 + static int omap_iommu_dra7_get_dsp_system_cfg(struct platform_device *pdev, 868 + struct omap_iommu *obj) 869 + { 870 + struct device_node *np = pdev->dev.of_node; 871 + int ret; 872 + 873 + if (!of_device_is_compatible(np, "ti,dra7-dsp-iommu")) 874 + return 0; 875 + 876 + if (!of_property_read_bool(np, "ti,syscon-mmuconfig")) { 877 + dev_err(&pdev->dev, "ti,syscon-mmuconfig property is missing\n"); 878 + return -EINVAL; 879 + } 880 + 881 + obj->syscfg = 882 + syscon_regmap_lookup_by_phandle(np, "ti,syscon-mmuconfig"); 883 + if (IS_ERR(obj->syscfg)) { 884 + /* can fail with -EPROBE_DEFER */ 885 + ret = PTR_ERR(obj->syscfg); 886 + return ret; 887 + } 888 + 889 + if (of_property_read_u32_index(np, "ti,syscon-mmuconfig", 1, 890 + &obj->id)) { 891 + dev_err(&pdev->dev, "couldn't get the IOMMU instance id within subsystem\n"); 892 + return -EINVAL; 893 + } 894 + 895 + if (obj->id != 0 && obj->id != 1) { 896 + dev_err(&pdev->dev, "invalid IOMMU instance id\n"); 897 + return -EINVAL; 898 + } 899 + 900 + return 0; 901 + } 902 + 884 903 /* 885 904 * OMAP Device MMU(IOMMU) detection 886 905 */ ··· 960 907 if (IS_ERR(obj->regbase)) 961 908 return PTR_ERR(obj->regbase); 962 909 910 + err = omap_iommu_dra7_get_dsp_system_cfg(pdev, obj); 911 + if (err) 912 + return err; 913 + 963 914 irq = platform_get_irq(pdev, 0); 964 915 if (irq < 0) 965 916 return -ENODEV; ··· 1000 943 { .compatible = "ti,omap2-iommu" }, 1001 944 { .compatible = "ti,omap4-iommu" }, 1002 945 { .compatible = "ti,dra7-iommu" }, 946 + { .compatible = "ti,dra7-dsp-iommu" }, 1003 947 {}, 1004 948 }; 1005 949

+9

drivers/iommu/omap-iommu.h

··· 30 30 struct omap_iommu { 31 31 const char *name; 32 32 void __iomem *regbase; 33 + struct regmap *syscfg; 33 34 struct device *dev; 34 35 struct iommu_domain *domain; 35 36 struct dentry *debug_dir; ··· 49 48 void *ctx; /* iommu context: registres saved area */ 50 49 51 50 int has_bus_err_back; 51 + u32 id; 52 52 }; 53 53 54 54 struct cr_regs { ··· 159 157 ((pgsz) == MMU_CAM_PGSZ_1M) ? 0xfff00000 : \ 160 158 ((pgsz) == MMU_CAM_PGSZ_64K) ? 0xffff0000 : \ 161 159 ((pgsz) == MMU_CAM_PGSZ_4K) ? 0xfffff000 : 0) 160 + 161 + /* 162 + * DSP_SYSTEM registers and bit definitions (applicable only for DRA7xx DSP) 163 + */ 164 + #define DSP_SYS_REVISION 0x00 165 + #define DSP_SYS_MMU_CONFIG 0x18 166 + #define DSP_SYS_MMU_CONFIG_EN_SHIFT 4 162 167 163 168 /* 164 169 * utilities for super page(16MB, 1MB, 64KB and 4KB)

+337

drivers/iommu/s390-iommu.c

··· 1 + /* 2 + * IOMMU API for s390 PCI devices 3 + * 4 + * Copyright IBM Corp. 2015 5 + * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> 6 + */ 7 + 8 + #include <linux/pci.h> 9 + #include <linux/iommu.h> 10 + #include <linux/iommu-helper.h> 11 + #include <linux/pci.h> 12 + #include <linux/sizes.h> 13 + #include <asm/pci_dma.h> 14 + 15 + /* 16 + * Physically contiguous memory regions can be mapped with 4 KiB alignment, 17 + * we allow all page sizes that are an order of 4KiB (no special large page 18 + * support so far). 19 + */ 20 + #define S390_IOMMU_PGSIZES (~0xFFFUL) 21 + 22 + struct s390_domain { 23 + struct iommu_domain domain; 24 + struct list_head devices; 25 + unsigned long *dma_table; 26 + spinlock_t dma_table_lock; 27 + spinlock_t list_lock; 28 + }; 29 + 30 + struct s390_domain_device { 31 + struct list_head list; 32 + struct zpci_dev *zdev; 33 + }; 34 + 35 + static struct s390_domain *to_s390_domain(struct iommu_domain *dom) 36 + { 37 + return container_of(dom, struct s390_domain, domain); 38 + } 39 + 40 + static bool s390_iommu_capable(enum iommu_cap cap) 41 + { 42 + switch (cap) { 43 + case IOMMU_CAP_CACHE_COHERENCY: 44 + return true; 45 + case IOMMU_CAP_INTR_REMAP: 46 + return true; 47 + default: 48 + return false; 49 + } 50 + } 51 + 52 + struct iommu_domain *s390_domain_alloc(unsigned domain_type) 53 + { 54 + struct s390_domain *s390_domain; 55 + 56 + if (domain_type != IOMMU_DOMAIN_UNMANAGED) 57 + return NULL; 58 + 59 + s390_domain = kzalloc(sizeof(*s390_domain), GFP_KERNEL); 60 + if (!s390_domain) 61 + return NULL; 62 + 63 + s390_domain->dma_table = dma_alloc_cpu_table(); 64 + if (!s390_domain->dma_table) { 65 + kfree(s390_domain); 66 + return NULL; 67 + } 68 + 69 + spin_lock_init(&s390_domain->dma_table_lock); 70 + spin_lock_init(&s390_domain->list_lock); 71 + INIT_LIST_HEAD(&s390_domain->devices); 72 + 73 + return &s390_domain->domain; 74 + } 75 + 76 + void s390_domain_free(struct iommu_domain *domain) 77 + { 78 + struct s390_domain *s390_domain = to_s390_domain(domain); 79 + 80 + dma_cleanup_tables(s390_domain->dma_table); 81 + kfree(s390_domain); 82 + } 83 + 84 + static int s390_iommu_attach_device(struct iommu_domain *domain, 85 + struct device *dev) 86 + { 87 + struct s390_domain *s390_domain = to_s390_domain(domain); 88 + struct zpci_dev *zdev = to_pci_dev(dev)->sysdata; 89 + struct s390_domain_device *domain_device; 90 + unsigned long flags; 91 + int rc; 92 + 93 + if (!zdev) 94 + return -ENODEV; 95 + 96 + domain_device = kzalloc(sizeof(*domain_device), GFP_KERNEL); 97 + if (!domain_device) 98 + return -ENOMEM; 99 + 100 + if (zdev->dma_table) 101 + zpci_dma_exit_device(zdev); 102 + 103 + zdev->dma_table = s390_domain->dma_table; 104 + rc = zpci_register_ioat(zdev, 0, zdev->start_dma + PAGE_OFFSET, 105 + zdev->start_dma + zdev->iommu_size - 1, 106 + (u64) zdev->dma_table); 107 + if (rc) 108 + goto out_restore; 109 + 110 + spin_lock_irqsave(&s390_domain->list_lock, flags); 111 + /* First device defines the DMA range limits */ 112 + if (list_empty(&s390_domain->devices)) { 113 + domain->geometry.aperture_start = zdev->start_dma; 114 + domain->geometry.aperture_end = zdev->end_dma; 115 + domain->geometry.force_aperture = true; 116 + /* Allow only devices with identical DMA range limits */ 117 + } else if (domain->geometry.aperture_start != zdev->start_dma || 118 + domain->geometry.aperture_end != zdev->end_dma) { 119 + rc = -EINVAL; 120 + spin_unlock_irqrestore(&s390_domain->list_lock, flags); 121 + goto out_restore; 122 + } 123 + domain_device->zdev = zdev; 124 + zdev->s390_domain = s390_domain; 125 + list_add(&domain_device->list, &s390_domain->devices); 126 + spin_unlock_irqrestore(&s390_domain->list_lock, flags); 127 + 128 + return 0; 129 + 130 + out_restore: 131 + zpci_dma_init_device(zdev); 132 + kfree(domain_device); 133 + 134 + return rc; 135 + } 136 + 137 + static void s390_iommu_detach_device(struct iommu_domain *domain, 138 + struct device *dev) 139 + { 140 + struct s390_domain *s390_domain = to_s390_domain(domain); 141 + struct zpci_dev *zdev = to_pci_dev(dev)->sysdata; 142 + struct s390_domain_device *domain_device, *tmp; 143 + unsigned long flags; 144 + int found = 0; 145 + 146 + if (!zdev) 147 + return; 148 + 149 + spin_lock_irqsave(&s390_domain->list_lock, flags); 150 + list_for_each_entry_safe(domain_device, tmp, &s390_domain->devices, 151 + list) { 152 + if (domain_device->zdev == zdev) { 153 + list_del(&domain_device->list); 154 + kfree(domain_device); 155 + found = 1; 156 + break; 157 + } 158 + } 159 + spin_unlock_irqrestore(&s390_domain->list_lock, flags); 160 + 161 + if (found) { 162 + zdev->s390_domain = NULL; 163 + zpci_unregister_ioat(zdev, 0); 164 + zpci_dma_init_device(zdev); 165 + } 166 + } 167 + 168 + static int s390_iommu_add_device(struct device *dev) 169 + { 170 + struct iommu_group *group; 171 + int rc; 172 + 173 + group = iommu_group_get(dev); 174 + if (!group) { 175 + group = iommu_group_alloc(); 176 + if (IS_ERR(group)) 177 + return PTR_ERR(group); 178 + } 179 + 180 + rc = iommu_group_add_device(group, dev); 181 + iommu_group_put(group); 182 + 183 + return rc; 184 + } 185 + 186 + static void s390_iommu_remove_device(struct device *dev) 187 + { 188 + struct zpci_dev *zdev = to_pci_dev(dev)->sysdata; 189 + struct iommu_domain *domain; 190 + 191 + /* 192 + * This is a workaround for a scenario where the IOMMU API common code 193 + * "forgets" to call the detach_dev callback: After binding a device 194 + * to vfio-pci and completing the VFIO_SET_IOMMU ioctl (which triggers 195 + * the attach_dev), removing the device via 196 + * "echo 1 > /sys/bus/pci/devices/.../remove" won't trigger detach_dev, 197 + * only remove_device will be called via the BUS_NOTIFY_REMOVED_DEVICE 198 + * notifier. 199 + * 200 + * So let's call detach_dev from here if it hasn't been called before. 201 + */ 202 + if (zdev && zdev->s390_domain) { 203 + domain = iommu_get_domain_for_dev(dev); 204 + if (domain) 205 + s390_iommu_detach_device(domain, dev); 206 + } 207 + 208 + iommu_group_remove_device(dev); 209 + } 210 + 211 + static int s390_iommu_update_trans(struct s390_domain *s390_domain, 212 + unsigned long pa, dma_addr_t dma_addr, 213 + size_t size, int flags) 214 + { 215 + struct s390_domain_device *domain_device; 216 + u8 *page_addr = (u8 *) (pa & PAGE_MASK); 217 + dma_addr_t start_dma_addr = dma_addr; 218 + unsigned long irq_flags, nr_pages, i; 219 + int rc = 0; 220 + 221 + if (dma_addr < s390_domain->domain.geometry.aperture_start || 222 + dma_addr + size > s390_domain->domain.geometry.aperture_end) 223 + return -EINVAL; 224 + 225 + nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; 226 + if (!nr_pages) 227 + return 0; 228 + 229 + spin_lock_irqsave(&s390_domain->dma_table_lock, irq_flags); 230 + for (i = 0; i < nr_pages; i++) { 231 + dma_update_cpu_trans(s390_domain->dma_table, page_addr, 232 + dma_addr, flags); 233 + page_addr += PAGE_SIZE; 234 + dma_addr += PAGE_SIZE; 235 + } 236 + 237 + spin_lock(&s390_domain->list_lock); 238 + list_for_each_entry(domain_device, &s390_domain->devices, list) { 239 + rc = zpci_refresh_trans((u64) domain_device->zdev->fh << 32, 240 + start_dma_addr, nr_pages * PAGE_SIZE); 241 + if (rc) 242 + break; 243 + } 244 + spin_unlock(&s390_domain->list_lock); 245 + spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags); 246 + 247 + return rc; 248 + } 249 + 250 + static int s390_iommu_map(struct iommu_domain *domain, unsigned long iova, 251 + phys_addr_t paddr, size_t size, int prot) 252 + { 253 + struct s390_domain *s390_domain = to_s390_domain(domain); 254 + int flags = ZPCI_PTE_VALID, rc = 0; 255 + 256 + if (!(prot & IOMMU_READ)) 257 + return -EINVAL; 258 + 259 + if (!(prot & IOMMU_WRITE)) 260 + flags |= ZPCI_TABLE_PROTECTED; 261 + 262 + rc = s390_iommu_update_trans(s390_domain, (unsigned long) paddr, iova, 263 + size, flags); 264 + 265 + return rc; 266 + } 267 + 268 + static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain, 269 + dma_addr_t iova) 270 + { 271 + struct s390_domain *s390_domain = to_s390_domain(domain); 272 + unsigned long *sto, *pto, *rto, flags; 273 + unsigned int rtx, sx, px; 274 + phys_addr_t phys = 0; 275 + 276 + if (iova < domain->geometry.aperture_start || 277 + iova > domain->geometry.aperture_end) 278 + return 0; 279 + 280 + rtx = calc_rtx(iova); 281 + sx = calc_sx(iova); 282 + px = calc_px(iova); 283 + rto = s390_domain->dma_table; 284 + 285 + spin_lock_irqsave(&s390_domain->dma_table_lock, flags); 286 + if (rto && reg_entry_isvalid(rto[rtx])) { 287 + sto = get_rt_sto(rto[rtx]); 288 + if (sto && reg_entry_isvalid(sto[sx])) { 289 + pto = get_st_pto(sto[sx]); 290 + if (pto && pt_entry_isvalid(pto[px])) 291 + phys = pto[px] & ZPCI_PTE_ADDR_MASK; 292 + } 293 + } 294 + spin_unlock_irqrestore(&s390_domain->dma_table_lock, flags); 295 + 296 + return phys; 297 + } 298 + 299 + static size_t s390_iommu_unmap(struct iommu_domain *domain, 300 + unsigned long iova, size_t size) 301 + { 302 + struct s390_domain *s390_domain = to_s390_domain(domain); 303 + int flags = ZPCI_PTE_INVALID; 304 + phys_addr_t paddr; 305 + int rc; 306 + 307 + paddr = s390_iommu_iova_to_phys(domain, iova); 308 + if (!paddr) 309 + return 0; 310 + 311 + rc = s390_iommu_update_trans(s390_domain, (unsigned long) paddr, iova, 312 + size, flags); 313 + if (rc) 314 + return 0; 315 + 316 + return size; 317 + } 318 + 319 + static struct iommu_ops s390_iommu_ops = { 320 + .capable = s390_iommu_capable, 321 + .domain_alloc = s390_domain_alloc, 322 + .domain_free = s390_domain_free, 323 + .attach_dev = s390_iommu_attach_device, 324 + .detach_dev = s390_iommu_detach_device, 325 + .map = s390_iommu_map, 326 + .unmap = s390_iommu_unmap, 327 + .iova_to_phys = s390_iommu_iova_to_phys, 328 + .add_device = s390_iommu_add_device, 329 + .remove_device = s390_iommu_remove_device, 330 + .pgsize_bitmap = S390_IOMMU_PGSIZES, 331 + }; 332 + 333 + static int __init s390_iommu_init(void) 334 + { 335 + return bus_set_iommu(&pci_bus_type, &s390_iommu_ops); 336 + } 337 + subsys_initcall(s390_iommu_init);

+85

include/linux/dma-iommu.h

··· 1 + /* 2 + * Copyright (C) 2014-2015 ARM Ltd. 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License version 2 as 6 + * published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope that it will be useful, 9 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 + * GNU General Public License for more details. 12 + * 13 + * You should have received a copy of the GNU General Public License 14 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 + */ 16 + #ifndef __DMA_IOMMU_H 17 + #define __DMA_IOMMU_H 18 + 19 + #ifdef __KERNEL__ 20 + #include <asm/errno.h> 21 + 22 + #ifdef CONFIG_IOMMU_DMA 23 + #include <linux/iommu.h> 24 + 25 + int iommu_dma_init(void); 26 + 27 + /* Domain management interface for IOMMU drivers */ 28 + int iommu_get_dma_cookie(struct iommu_domain *domain); 29 + void iommu_put_dma_cookie(struct iommu_domain *domain); 30 + 31 + /* Setup call for arch DMA mapping code */ 32 + int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, u64 size); 33 + 34 + /* General helpers for DMA-API <-> IOMMU-API interaction */ 35 + int dma_direction_to_prot(enum dma_data_direction dir, bool coherent); 36 + 37 + /* 38 + * These implement the bulk of the relevant DMA mapping callbacks, but require 39 + * the arch code to take care of attributes and cache maintenance 40 + */ 41 + struct page **iommu_dma_alloc(struct device *dev, size_t size, 42 + gfp_t gfp, int prot, dma_addr_t *handle, 43 + void (*flush_page)(struct device *, const void *, phys_addr_t)); 44 + void iommu_dma_free(struct device *dev, struct page **pages, size_t size, 45 + dma_addr_t *handle); 46 + 47 + int iommu_dma_mmap(struct page **pages, size_t size, struct vm_area_struct *vma); 48 + 49 + dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, 50 + unsigned long offset, size_t size, int prot); 51 + int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, 52 + int nents, int prot); 53 + 54 + /* 55 + * Arch code with no special attribute handling may use these 56 + * directly as DMA mapping callbacks for simplicity 57 + */ 58 + void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size, 59 + enum dma_data_direction dir, struct dma_attrs *attrs); 60 + void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, 61 + enum dma_data_direction dir, struct dma_attrs *attrs); 62 + int iommu_dma_supported(struct device *dev, u64 mask); 63 + int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr); 64 + 65 + #else 66 + 67 + struct iommu_domain; 68 + 69 + static inline int iommu_dma_init(void) 70 + { 71 + return 0; 72 + } 73 + 74 + static inline int iommu_get_dma_cookie(struct iommu_domain *domain) 75 + { 76 + return -ENODEV; 77 + } 78 + 79 + static inline void iommu_put_dma_cookie(struct iommu_domain *domain) 80 + { 81 + } 82 + 83 + #endif /* CONFIG_IOMMU_DMA */ 84 + #endif /* __KERNEL__ */ 85 + #endif /* __DMA_IOMMU_H */

+7 -1

include/linux/iommu.h

··· 81 81 iommu_fault_handler_t handler; 82 82 void *handler_token; 83 83 struct iommu_domain_geometry geometry; 84 + void *iova_cookie; 84 85 }; 85 86 86 87 enum iommu_cap { ··· 168 167 phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova); 169 168 int (*add_device)(struct device *dev); 170 169 void (*remove_device)(struct device *dev); 171 - int (*device_group)(struct device *dev, unsigned int *groupid); 170 + struct iommu_group *(*device_group)(struct device *dev); 172 171 int (*domain_get_attr)(struct iommu_domain *domain, 173 172 enum iommu_attr attr, void *data); 174 173 int (*domain_set_attr)(struct iommu_domain *domain, ··· 316 315 { 317 316 return domain->ops->map_sg(domain, iova, sg, nents, prot); 318 317 } 318 + 319 + /* PCI device grouping function */ 320 + extern struct iommu_group *pci_device_group(struct device *dev); 321 + /* Generic device grouping function */ 322 + extern struct iommu_group *generic_device_group(struct device *dev); 319 323 320 324 #else /* CONFIG_IOMMU_API */ 321 325