Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branches 'iommu/fixes', 'arm/omap' and 'x86/amd' into next

Conflicts:
drivers/pci/hotplug/acpiphp_glue.c

+2787 -315
+18
Documentation/ABI/testing/sysfs-bus-pci
··· 66 66 re-discover previously removed devices. 67 67 Depends on CONFIG_HOTPLUG. 68 68 69 + What: /sys/bus/pci/devices/.../msi_irqs/ 70 + Date: September, 2011 71 + Contact: Neil Horman <nhorman@tuxdriver.com> 72 + Description: 73 + The /sys/devices/.../msi_irqs directory contains a variable set 74 + of sub-directories, with each sub-directory being named after a 75 + corresponding msi irq vector allocated to that device. Each 76 + numbered sub-directory N contains attributes of that irq. 77 + Note that this directory is not created for device drivers which 78 + do not support msi irqs 79 + 80 + What: /sys/bus/pci/devices/.../msi_irqs/<N>/mode 81 + Date: September 2011 82 + Contact: Neil Horman <nhorman@tuxdriver.com> 83 + Description: 84 + This attribute indicates the mode that the irq vector named by 85 + the parent directory is in (msi vs. msix) 86 + 69 87 What: /sys/bus/pci/devices/.../remove 70 88 Date: January 2009 71 89 Contact: Linux PCI developers <linux-pci@vger.kernel.org>
+5
Documentation/kernel-parameters.txt
··· 329 329 is a lot of faster 330 330 off - do not initialize any AMD IOMMU found in 331 331 the system 332 + force_isolation - Force device isolation for all 333 + devices. The IOMMU driver is not 334 + allowed anymore to lift isolation 335 + requirements as needed. This option 336 + does not override iommu=pt 332 337 333 338 amijoy.map= [HW,JOY] Amiga joystick support 334 339 Map of devices attached to JOY0DAT and JOY1DAT
+7
arch/arm/mach-omap2/devices.c
··· 28 28 #include <plat/board.h> 29 29 #include <plat/mcbsp.h> 30 30 #include <plat/mmc.h> 31 + #include <plat/iommu.h> 31 32 #include <plat/dma.h> 32 33 #include <plat/omap_hwmod.h> 33 34 #include <plat/omap_device.h> ··· 212 211 .resource = omap3isp_resources, 213 212 }; 214 213 214 + static struct omap_iommu_arch_data omap3_isp_iommu = { 215 + .name = "isp", 216 + }; 217 + 215 218 int omap3_init_camera(struct isp_platform_data *pdata) 216 219 { 217 220 omap3isp_device.dev.platform_data = pdata; 221 + omap3isp_device.dev.archdata.iommu = &omap3_isp_iommu; 222 + 218 223 return platform_device_register(&omap3isp_device); 219 224 } 220 225
+28 -3
arch/arm/plat-omap/include/plat/iommu.h
··· 111 111 u32 da_end; 112 112 }; 113 113 114 + /** 115 + * struct iommu_arch_data - omap iommu private data 116 + * @name: name of the iommu device 117 + * @iommu_dev: handle of the iommu device 118 + * 119 + * This is an omap iommu private data object, which binds an iommu user 120 + * to its iommu device. This object should be placed at the iommu user's 121 + * dev_archdata so generic IOMMU API can be used without having to 122 + * utilize omap-specific plumbing anymore. 123 + */ 124 + struct omap_iommu_arch_data { 125 + const char *name; 126 + struct omap_iommu *iommu_dev; 127 + }; 128 + 129 + /** 130 + * dev_to_omap_iommu() - retrieves an omap iommu object from a user device 131 + * @dev: iommu client device 132 + */ 133 + static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev) 134 + { 135 + struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; 136 + 137 + return arch_data->iommu_dev; 138 + } 139 + 114 140 /* IOMMU errors */ 115 141 #define OMAP_IOMMU_ERR_TLB_MISS (1 << 0) 116 142 #define OMAP_IOMMU_ERR_TRANS_FAULT (1 << 1) ··· 189 163 void *priv), 190 164 void *isr_priv); 191 165 192 - extern void omap_iommu_save_ctx(struct omap_iommu *obj); 193 - extern void omap_iommu_restore_ctx(struct omap_iommu *obj); 166 + extern void omap_iommu_save_ctx(struct device *dev); 167 + extern void omap_iommu_restore_ctx(struct device *dev); 194 168 195 169 extern int omap_install_iommu_arch(const struct iommu_functions *ops); 196 170 extern void omap_uninstall_iommu_arch(const struct iommu_functions *ops); ··· 202 176 omap_iommu_dump_ctx(struct omap_iommu *obj, char *buf, ssize_t len); 203 177 extern size_t 204 178 omap_dump_tlb_entries(struct omap_iommu *obj, char *buf, ssize_t len); 205 - struct device *omap_find_iommu_device(const char *name); 206 179 207 180 #endif /* __MACH_IOMMU_H */
+6 -6
arch/arm/plat-omap/include/plat/iovmm.h
··· 72 72 #define IOVMF_DA_FIXED (1 << (4 + IOVMF_SW_SHIFT)) 73 73 74 74 75 - extern struct iovm_struct *omap_find_iovm_area(struct omap_iommu *obj, u32 da); 75 + extern struct iovm_struct *omap_find_iovm_area(struct device *dev, u32 da); 76 76 extern u32 77 - omap_iommu_vmap(struct iommu_domain *domain, struct omap_iommu *obj, u32 da, 77 + omap_iommu_vmap(struct iommu_domain *domain, struct device *dev, u32 da, 78 78 const struct sg_table *sgt, u32 flags); 79 79 extern struct sg_table *omap_iommu_vunmap(struct iommu_domain *domain, 80 - struct omap_iommu *obj, u32 da); 80 + struct device *dev, u32 da); 81 81 extern u32 82 - omap_iommu_vmalloc(struct iommu_domain *domain, struct omap_iommu *obj, 82 + omap_iommu_vmalloc(struct iommu_domain *domain, struct device *dev, 83 83 u32 da, size_t bytes, u32 flags); 84 84 extern void 85 - omap_iommu_vfree(struct iommu_domain *domain, struct omap_iommu *obj, 85 + omap_iommu_vfree(struct iommu_domain *domain, struct device *dev, 86 86 const u32 da); 87 - extern void *omap_da_to_va(struct omap_iommu *obj, u32 da); 87 + extern void *omap_da_to_va(struct device *dev, u32 da); 88 88 89 89 #endif /* __IOMMU_MMAP_H */
+7
drivers/acpi/pci_root.c
··· 596 596 if (ACPI_SUCCESS(status)) { 597 597 dev_info(root->bus->bridge, 598 598 "ACPI _OSC control (0x%02x) granted\n", flags); 599 + if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM) { 600 + /* 601 + * We have ASPM control, but the FADT indicates 602 + * that it's unsupported. Clear it. 603 + */ 604 + pcie_clear_aspm(root->bus); 605 + } 599 606 } else { 600 607 dev_info(root->bus->bridge, 601 608 "ACPI _OSC request failed (%s), "
+12 -1
drivers/iommu/Kconfig
··· 34 34 bool "AMD IOMMU support" 35 35 select SWIOTLB 36 36 select PCI_MSI 37 - select PCI_IOV 37 + select PCI_ATS 38 + select PCI_PRI 39 + select PCI_PASID 38 40 select IOMMU_API 39 41 depends on X86_64 && PCI && ACPI 40 42 ---help--- ··· 59 57 statistics about whats happening in the driver and exports that 60 58 information to userspace via debugfs. 61 59 If unsure, say N. 60 + 61 + config AMD_IOMMU_V2 62 + tristate "AMD IOMMU Version 2 driver (EXPERIMENTAL)" 63 + depends on AMD_IOMMU && PROFILING && EXPERIMENTAL 64 + select MMU_NOTIFIER 65 + ---help--- 66 + This option enables support for the AMD IOMMUv2 features of the IOMMU 67 + hardware. Select this option if you want to use devices that support 68 + the the PCI PRI and PASID interface. 62 69 63 70 # Intel IOMMU support 64 71 config DMAR_TABLE
+1
drivers/iommu/Makefile
··· 1 1 obj-$(CONFIG_IOMMU_API) += iommu.o 2 2 obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o msm_iommu_dev.o 3 3 obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o 4 + obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o 4 5 obj-$(CONFIG_DMAR_TABLE) += dmar.o 5 6 obj-$(CONFIG_INTEL_IOMMU) += iova.o intel-iommu.o 6 7 obj-$(CONFIG_IRQ_REMAP) += intr_remapping.o
+824 -38
drivers/iommu/amd_iommu.c
··· 17 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 18 18 */ 19 19 20 + #include <linux/ratelimit.h> 20 21 #include <linux/pci.h> 21 22 #include <linux/pci-ats.h> 22 23 #include <linux/bitmap.h> ··· 29 28 #include <linux/iommu.h> 30 29 #include <linux/delay.h> 31 30 #include <linux/amd-iommu.h> 31 + #include <linux/notifier.h> 32 + #include <linux/export.h> 32 33 #include <asm/msidef.h> 33 34 #include <asm/proto.h> 34 35 #include <asm/iommu.h> ··· 43 40 #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) 44 41 45 42 #define LOOP_TIMEOUT 100000 43 + 44 + /* 45 + * This bitmap is used to advertise the page sizes our hardware support 46 + * to the IOMMU core, which will then use this information to split 47 + * physically contiguous memory regions it is mapping into page sizes 48 + * that we support. 49 + * 50 + * Traditionally the IOMMU core just handed us the mappings directly, 51 + * after making sure the size is an order of a 4KiB page and that the 52 + * mapping has natural alignment. 53 + * 54 + * To retain this behavior, we currently advertise that we support 55 + * all page sizes that are an order of 4KiB. 56 + * 57 + * If at some point we'd like to utilize the IOMMU core's new behavior, 58 + * we could change this to advertise the real page sizes we support. 59 + */ 60 + #define AMD_IOMMU_PGSIZES (~0xFFFUL) 46 61 47 62 static DEFINE_RWLOCK(amd_iommu_devtable_lock); 48 63 ··· 80 59 81 60 static struct iommu_ops amd_iommu_ops; 82 61 62 + static ATOMIC_NOTIFIER_HEAD(ppr_notifier); 63 + int amd_iommu_max_glx_val = -1; 64 + 83 65 /* 84 66 * general struct to manage commands send to an IOMMU 85 67 */ ··· 91 67 }; 92 68 93 69 static void update_domain(struct protection_domain *domain); 70 + static int __init alloc_passthrough_domain(void); 94 71 95 72 /**************************************************************************** 96 73 * ··· 172 147 return dev->archdata.iommu; 173 148 } 174 149 150 + static bool pci_iommuv2_capable(struct pci_dev *pdev) 151 + { 152 + static const int caps[] = { 153 + PCI_EXT_CAP_ID_ATS, 154 + PCI_EXT_CAP_ID_PRI, 155 + PCI_EXT_CAP_ID_PASID, 156 + }; 157 + int i, pos; 158 + 159 + for (i = 0; i < 3; ++i) { 160 + pos = pci_find_ext_capability(pdev, caps[i]); 161 + if (pos == 0) 162 + return false; 163 + } 164 + 165 + return true; 166 + } 167 + 168 + static bool pdev_pri_erratum(struct pci_dev *pdev, u32 erratum) 169 + { 170 + struct iommu_dev_data *dev_data; 171 + 172 + dev_data = get_dev_data(&pdev->dev); 173 + 174 + return dev_data->errata & (1 << erratum) ? true : false; 175 + } 176 + 175 177 /* 176 178 * In this function the list of preallocated protection domains is traversed to 177 179 * find the domain for a specific device ··· 256 204 257 205 static int iommu_init_device(struct device *dev) 258 206 { 207 + struct pci_dev *pdev = to_pci_dev(dev); 259 208 struct iommu_dev_data *dev_data; 260 209 u16 alias; 261 210 ··· 279 226 return -ENOTSUPP; 280 227 } 281 228 dev_data->alias_data = alias_data; 229 + } 230 + 231 + if (pci_iommuv2_capable(pdev)) { 232 + struct amd_iommu *iommu; 233 + 234 + iommu = amd_iommu_rlookup_table[dev_data->devid]; 235 + dev_data->iommu_v2 = iommu->is_iommu_v2; 282 236 } 283 237 284 238 dev->archdata.iommu = dev_data; ··· 377 317 DECLARE_STATS_COUNTER(domain_flush_all); 378 318 DECLARE_STATS_COUNTER(alloced_io_mem); 379 319 DECLARE_STATS_COUNTER(total_map_requests); 320 + DECLARE_STATS_COUNTER(complete_ppr); 321 + DECLARE_STATS_COUNTER(invalidate_iotlb); 322 + DECLARE_STATS_COUNTER(invalidate_iotlb_all); 323 + DECLARE_STATS_COUNTER(pri_requests); 324 + 380 325 381 326 static struct dentry *stats_dir; 382 327 static struct dentry *de_fflush; ··· 416 351 amd_iommu_stats_add(&domain_flush_all); 417 352 amd_iommu_stats_add(&alloced_io_mem); 418 353 amd_iommu_stats_add(&total_map_requests); 354 + amd_iommu_stats_add(&complete_ppr); 355 + amd_iommu_stats_add(&invalidate_iotlb); 356 + amd_iommu_stats_add(&invalidate_iotlb_all); 357 + amd_iommu_stats_add(&pri_requests); 419 358 } 420 359 421 360 #endif ··· 434 365 { 435 366 int i; 436 367 437 - for (i = 0; i < 8; ++i) 438 - pr_err("AMD-Vi: DTE[%d]: %08x\n", i, 368 + for (i = 0; i < 4; ++i) 369 + pr_err("AMD-Vi: DTE[%d]: %016llx\n", i, 439 370 amd_iommu_dev_table[devid].data[i]); 440 371 } 441 372 ··· 530 461 spin_unlock_irqrestore(&iommu->lock, flags); 531 462 } 532 463 464 + static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u32 head) 465 + { 466 + struct amd_iommu_fault fault; 467 + volatile u64 *raw; 468 + int i; 469 + 470 + INC_STATS_COUNTER(pri_requests); 471 + 472 + raw = (u64 *)(iommu->ppr_log + head); 473 + 474 + /* 475 + * Hardware bug: Interrupt may arrive before the entry is written to 476 + * memory. If this happens we need to wait for the entry to arrive. 477 + */ 478 + for (i = 0; i < LOOP_TIMEOUT; ++i) { 479 + if (PPR_REQ_TYPE(raw[0]) != 0) 480 + break; 481 + udelay(1); 482 + } 483 + 484 + if (PPR_REQ_TYPE(raw[0]) != PPR_REQ_FAULT) { 485 + pr_err_ratelimited("AMD-Vi: Unknown PPR request received\n"); 486 + return; 487 + } 488 + 489 + fault.address = raw[1]; 490 + fault.pasid = PPR_PASID(raw[0]); 491 + fault.device_id = PPR_DEVID(raw[0]); 492 + fault.tag = PPR_TAG(raw[0]); 493 + fault.flags = PPR_FLAGS(raw[0]); 494 + 495 + /* 496 + * To detect the hardware bug we need to clear the entry 497 + * to back to zero. 498 + */ 499 + raw[0] = raw[1] = 0; 500 + 501 + atomic_notifier_call_chain(&ppr_notifier, 0, &fault); 502 + } 503 + 504 + static void iommu_poll_ppr_log(struct amd_iommu *iommu) 505 + { 506 + unsigned long flags; 507 + u32 head, tail; 508 + 509 + if (iommu->ppr_log == NULL) 510 + return; 511 + 512 + spin_lock_irqsave(&iommu->lock, flags); 513 + 514 + head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); 515 + tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); 516 + 517 + while (head != tail) { 518 + 519 + /* Handle PPR entry */ 520 + iommu_handle_ppr_entry(iommu, head); 521 + 522 + /* Update and refresh ring-buffer state*/ 523 + head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE; 524 + writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); 525 + tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); 526 + } 527 + 528 + /* enable ppr interrupts again */ 529 + writel(MMIO_STATUS_PPR_INT_MASK, iommu->mmio_base + MMIO_STATUS_OFFSET); 530 + 531 + spin_unlock_irqrestore(&iommu->lock, flags); 532 + } 533 + 533 534 irqreturn_t amd_iommu_int_thread(int irq, void *data) 534 535 { 535 536 struct amd_iommu *iommu; 536 537 537 - for_each_iommu(iommu) 538 + for_each_iommu(iommu) { 538 539 iommu_poll_events(iommu); 540 + iommu_poll_ppr_log(iommu); 541 + } 539 542 540 543 return IRQ_HANDLED; 541 544 } ··· 734 593 CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES); 735 594 if (s) 736 595 cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; 596 + } 597 + 598 + static void build_inv_iommu_pasid(struct iommu_cmd *cmd, u16 domid, int pasid, 599 + u64 address, bool size) 600 + { 601 + memset(cmd, 0, sizeof(*cmd)); 602 + 603 + address &= ~(0xfffULL); 604 + 605 + cmd->data[0] = pasid & PASID_MASK; 606 + cmd->data[1] = domid; 607 + cmd->data[2] = lower_32_bits(address); 608 + cmd->data[3] = upper_32_bits(address); 609 + cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; 610 + cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK; 611 + if (size) 612 + cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; 613 + CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES); 614 + } 615 + 616 + static void build_inv_iotlb_pasid(struct iommu_cmd *cmd, u16 devid, int pasid, 617 + int qdep, u64 address, bool size) 618 + { 619 + memset(cmd, 0, sizeof(*cmd)); 620 + 621 + address &= ~(0xfffULL); 622 + 623 + cmd->data[0] = devid; 624 + cmd->data[0] |= (pasid & 0xff) << 16; 625 + cmd->data[0] |= (qdep & 0xff) << 24; 626 + cmd->data[1] = devid; 627 + cmd->data[1] |= ((pasid >> 8) & 0xfff) << 16; 628 + cmd->data[2] = lower_32_bits(address); 629 + cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK; 630 + cmd->data[3] = upper_32_bits(address); 631 + if (size) 632 + cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; 633 + CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES); 634 + } 635 + 636 + static void build_complete_ppr(struct iommu_cmd *cmd, u16 devid, int pasid, 637 + int status, int tag, bool gn) 638 + { 639 + memset(cmd, 0, sizeof(*cmd)); 640 + 641 + cmd->data[0] = devid; 642 + if (gn) { 643 + cmd->data[1] = pasid & PASID_MASK; 644 + cmd->data[2] = CMD_INV_IOMMU_PAGES_GN_MASK; 645 + } 646 + cmd->data[3] = tag & 0x1ff; 647 + cmd->data[3] |= (status & PPR_STATUS_MASK) << PPR_STATUS_SHIFT; 648 + 649 + CMD_SET_TYPE(cmd, CMD_COMPLETE_PPR); 737 650 } 738 651 739 652 static void build_inv_all(struct iommu_cmd *cmd) ··· 1691 1496 domain->pt_root = NULL; 1692 1497 } 1693 1498 1499 + static void free_gcr3_tbl_level1(u64 *tbl) 1500 + { 1501 + u64 *ptr; 1502 + int i; 1503 + 1504 + for (i = 0; i < 512; ++i) { 1505 + if (!(tbl[i] & GCR3_VALID)) 1506 + continue; 1507 + 1508 + ptr = __va(tbl[i] & PAGE_MASK); 1509 + 1510 + free_page((unsigned long)ptr); 1511 + } 1512 + } 1513 + 1514 + static void free_gcr3_tbl_level2(u64 *tbl) 1515 + { 1516 + u64 *ptr; 1517 + int i; 1518 + 1519 + for (i = 0; i < 512; ++i) { 1520 + if (!(tbl[i] & GCR3_VALID)) 1521 + continue; 1522 + 1523 + ptr = __va(tbl[i] & PAGE_MASK); 1524 + 1525 + free_gcr3_tbl_level1(ptr); 1526 + } 1527 + } 1528 + 1529 + static void free_gcr3_table(struct protection_domain *domain) 1530 + { 1531 + if (domain->glx == 2) 1532 + free_gcr3_tbl_level2(domain->gcr3_tbl); 1533 + else if (domain->glx == 1) 1534 + free_gcr3_tbl_level1(domain->gcr3_tbl); 1535 + else if (domain->glx != 0) 1536 + BUG(); 1537 + 1538 + free_page((unsigned long)domain->gcr3_tbl); 1539 + } 1540 + 1694 1541 /* 1695 1542 * Free a domain, only used if something went wrong in the 1696 1543 * allocation path and we need to free an already allocated page table ··· 1819 1582 1820 1583 static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats) 1821 1584 { 1822 - u64 pte_root = virt_to_phys(domain->pt_root); 1823 - u32 flags = 0; 1585 + u64 pte_root = 0; 1586 + u64 flags = 0; 1587 + 1588 + if (domain->mode != PAGE_MODE_NONE) 1589 + pte_root = virt_to_phys(domain->pt_root); 1824 1590 1825 1591 pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) 1826 1592 << DEV_ENTRY_MODE_SHIFT; 1827 1593 pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; 1828 1594 1595 + flags = amd_iommu_dev_table[devid].data[1]; 1596 + 1829 1597 if (ats) 1830 1598 flags |= DTE_FLAG_IOTLB; 1831 1599 1832 - amd_iommu_dev_table[devid].data[3] |= flags; 1833 - amd_iommu_dev_table[devid].data[2] = domain->id; 1834 - amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); 1835 - amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); 1600 + if (domain->flags & PD_IOMMUV2_MASK) { 1601 + u64 gcr3 = __pa(domain->gcr3_tbl); 1602 + u64 glx = domain->glx; 1603 + u64 tmp; 1604 + 1605 + pte_root |= DTE_FLAG_GV; 1606 + pte_root |= (glx & DTE_GLX_MASK) << DTE_GLX_SHIFT; 1607 + 1608 + /* First mask out possible old values for GCR3 table */ 1609 + tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B; 1610 + flags &= ~tmp; 1611 + 1612 + tmp = DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C; 1613 + flags &= ~tmp; 1614 + 1615 + /* Encode GCR3 table into DTE */ 1616 + tmp = DTE_GCR3_VAL_A(gcr3) << DTE_GCR3_SHIFT_A; 1617 + pte_root |= tmp; 1618 + 1619 + tmp = DTE_GCR3_VAL_B(gcr3) << DTE_GCR3_SHIFT_B; 1620 + flags |= tmp; 1621 + 1622 + tmp = DTE_GCR3_VAL_C(gcr3) << DTE_GCR3_SHIFT_C; 1623 + flags |= tmp; 1624 + } 1625 + 1626 + flags &= ~(0xffffUL); 1627 + flags |= domain->id; 1628 + 1629 + amd_iommu_dev_table[devid].data[1] = flags; 1630 + amd_iommu_dev_table[devid].data[0] = pte_root; 1836 1631 } 1837 1632 1838 1633 static void clear_dte_entry(u16 devid) ··· 1872 1603 /* remove entry from the device table seen by the hardware */ 1873 1604 amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; 1874 1605 amd_iommu_dev_table[devid].data[1] = 0; 1875 - amd_iommu_dev_table[devid].data[2] = 0; 1876 1606 1877 1607 amd_iommu_apply_erratum_63(devid); 1878 1608 } ··· 1964 1696 return ret; 1965 1697 } 1966 1698 1699 + 1700 + static void pdev_iommuv2_disable(struct pci_dev *pdev) 1701 + { 1702 + pci_disable_ats(pdev); 1703 + pci_disable_pri(pdev); 1704 + pci_disable_pasid(pdev); 1705 + } 1706 + 1707 + /* FIXME: Change generic reset-function to do the same */ 1708 + static int pri_reset_while_enabled(struct pci_dev *pdev) 1709 + { 1710 + u16 control; 1711 + int pos; 1712 + 1713 + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); 1714 + if (!pos) 1715 + return -EINVAL; 1716 + 1717 + pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control); 1718 + control |= PCI_PRI_CTRL_RESET; 1719 + pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control); 1720 + 1721 + return 0; 1722 + } 1723 + 1724 + static int pdev_iommuv2_enable(struct pci_dev *pdev) 1725 + { 1726 + bool reset_enable; 1727 + int reqs, ret; 1728 + 1729 + /* FIXME: Hardcode number of outstanding requests for now */ 1730 + reqs = 32; 1731 + if (pdev_pri_erratum(pdev, AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE)) 1732 + reqs = 1; 1733 + reset_enable = pdev_pri_erratum(pdev, AMD_PRI_DEV_ERRATUM_ENABLE_RESET); 1734 + 1735 + /* Only allow access to user-accessible pages */ 1736 + ret = pci_enable_pasid(pdev, 0); 1737 + if (ret) 1738 + goto out_err; 1739 + 1740 + /* First reset the PRI state of the device */ 1741 + ret = pci_reset_pri(pdev); 1742 + if (ret) 1743 + goto out_err; 1744 + 1745 + /* Enable PRI */ 1746 + ret = pci_enable_pri(pdev, reqs); 1747 + if (ret) 1748 + goto out_err; 1749 + 1750 + if (reset_enable) { 1751 + ret = pri_reset_while_enabled(pdev); 1752 + if (ret) 1753 + goto out_err; 1754 + } 1755 + 1756 + ret = pci_enable_ats(pdev, PAGE_SHIFT); 1757 + if (ret) 1758 + goto out_err; 1759 + 1760 + return 0; 1761 + 1762 + out_err: 1763 + pci_disable_pri(pdev); 1764 + pci_disable_pasid(pdev); 1765 + 1766 + return ret; 1767 + } 1768 + 1769 + /* FIXME: Move this to PCI code */ 1770 + #define PCI_PRI_TLP_OFF (1 << 2) 1771 + 1772 + bool pci_pri_tlp_required(struct pci_dev *pdev) 1773 + { 1774 + u16 control; 1775 + int pos; 1776 + 1777 + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); 1778 + if (!pos) 1779 + return false; 1780 + 1781 + pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control); 1782 + 1783 + return (control & PCI_PRI_TLP_OFF) ? true : false; 1784 + } 1785 + 1967 1786 /* 1968 1787 * If a device is not yet associated with a domain, this function does 1969 1788 * assigns it visible for the hardware ··· 2065 1710 2066 1711 dev_data = get_dev_data(dev); 2067 1712 2068 - if (amd_iommu_iotlb_sup && pci_enable_ats(pdev, PAGE_SHIFT) == 0) { 1713 + if (domain->flags & PD_IOMMUV2_MASK) { 1714 + if (!dev_data->iommu_v2 || !dev_data->passthrough) 1715 + return -EINVAL; 1716 + 1717 + if (pdev_iommuv2_enable(pdev) != 0) 1718 + return -EINVAL; 1719 + 1720 + dev_data->ats.enabled = true; 1721 + dev_data->ats.qdep = pci_ats_queue_depth(pdev); 1722 + dev_data->pri_tlp = pci_pri_tlp_required(pdev); 1723 + } else if (amd_iommu_iotlb_sup && 1724 + pci_enable_ats(pdev, PAGE_SHIFT) == 0) { 2069 1725 dev_data->ats.enabled = true; 2070 1726 dev_data->ats.qdep = pci_ats_queue_depth(pdev); 2071 1727 } ··· 2126 1760 * passthrough domain if it is detached from any other domain. 2127 1761 * Make sure we can deassign from the pt_domain itself. 2128 1762 */ 2129 - if (iommu_pass_through && 1763 + if (dev_data->passthrough && 2130 1764 (dev_data->domain == NULL && domain != pt_domain)) 2131 1765 __attach_device(dev_data, pt_domain); 2132 1766 } ··· 2136 1770 */ 2137 1771 static void detach_device(struct device *dev) 2138 1772 { 1773 + struct protection_domain *domain; 2139 1774 struct iommu_dev_data *dev_data; 2140 1775 unsigned long flags; 2141 1776 2142 1777 dev_data = get_dev_data(dev); 1778 + domain = dev_data->domain; 2143 1779 2144 1780 /* lock device table */ 2145 1781 write_lock_irqsave(&amd_iommu_devtable_lock, flags); 2146 1782 __detach_device(dev_data); 2147 1783 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 2148 1784 2149 - if (dev_data->ats.enabled) { 1785 + if (domain->flags & PD_IOMMUV2_MASK) 1786 + pdev_iommuv2_disable(to_pci_dev(dev)); 1787 + else if (dev_data->ats.enabled) 2150 1788 pci_disable_ats(to_pci_dev(dev)); 2151 - dev_data->ats.enabled = false; 2152 - } 1789 + 1790 + dev_data->ats.enabled = false; 2153 1791 } 2154 1792 2155 1793 /* ··· 2188 1818 static int device_change_notifier(struct notifier_block *nb, 2189 1819 unsigned long action, void *data) 2190 1820 { 2191 - struct device *dev = data; 2192 - u16 devid; 2193 - struct protection_domain *domain; 2194 1821 struct dma_ops_domain *dma_domain; 1822 + struct protection_domain *domain; 1823 + struct iommu_dev_data *dev_data; 1824 + struct device *dev = data; 2195 1825 struct amd_iommu *iommu; 2196 1826 unsigned long flags; 1827 + u16 devid; 2197 1828 2198 1829 if (!check_device(dev)) 2199 1830 return 0; 2200 1831 2201 - devid = get_device_id(dev); 2202 - iommu = amd_iommu_rlookup_table[devid]; 1832 + devid = get_device_id(dev); 1833 + iommu = amd_iommu_rlookup_table[devid]; 1834 + dev_data = get_dev_data(dev); 2203 1835 2204 1836 switch (action) { 2205 1837 case BUS_NOTIFY_UNBOUND_DRIVER: ··· 2210 1838 2211 1839 if (!domain) 2212 1840 goto out; 2213 - if (iommu_pass_through) 1841 + if (dev_data->passthrough) 2214 1842 break; 2215 1843 detach_device(dev); 2216 1844 break; ··· 2806 2434 */ 2807 2435 static void prealloc_protection_domains(void) 2808 2436 { 2809 - struct pci_dev *dev = NULL; 2437 + struct iommu_dev_data *dev_data; 2810 2438 struct dma_ops_domain *dma_dom; 2439 + struct pci_dev *dev = NULL; 2811 2440 u16 devid; 2812 2441 2813 2442 for_each_pci_dev(dev) { ··· 2816 2443 /* Do we handle this device? */ 2817 2444 if (!check_device(&dev->dev)) 2818 2445 continue; 2446 + 2447 + dev_data = get_dev_data(&dev->dev); 2448 + if (!amd_iommu_force_isolation && dev_data->iommu_v2) { 2449 + /* Make sure passthrough domain is allocated */ 2450 + alloc_passthrough_domain(); 2451 + dev_data->passthrough = true; 2452 + attach_device(&dev->dev, pt_domain); 2453 + pr_info("AMD-Vi: Using passthough domain for device %s\n", 2454 + dev_name(&dev->dev)); 2455 + } 2819 2456 2820 2457 /* Is there already any domain for it? */ 2821 2458 if (domain_for_device(&dev->dev)) ··· 2857 2474 2858 2475 static unsigned device_dma_ops_init(void) 2859 2476 { 2477 + struct iommu_dev_data *dev_data; 2860 2478 struct pci_dev *pdev = NULL; 2861 2479 unsigned unhandled = 0; 2862 2480 ··· 2867 2483 continue; 2868 2484 } 2869 2485 2870 - pdev->dev.archdata.dma_ops = &amd_iommu_dma_ops; 2486 + dev_data = get_dev_data(&pdev->dev); 2487 + 2488 + if (!dev_data->passthrough) 2489 + pdev->dev.archdata.dma_ops = &amd_iommu_dma_ops; 2490 + else 2491 + pdev->dev.archdata.dma_ops = &nommu_dma_ops; 2871 2492 } 2872 2493 2873 2494 return unhandled; ··· 2999 2610 return NULL; 3000 2611 } 3001 2612 2613 + static int __init alloc_passthrough_domain(void) 2614 + { 2615 + if (pt_domain != NULL) 2616 + return 0; 2617 + 2618 + /* allocate passthrough domain */ 2619 + pt_domain = protection_domain_alloc(); 2620 + if (!pt_domain) 2621 + return -ENOMEM; 2622 + 2623 + pt_domain->mode = PAGE_MODE_NONE; 2624 + 2625 + return 0; 2626 + } 3002 2627 static int amd_iommu_domain_init(struct iommu_domain *dom) 3003 2628 { 3004 2629 struct protection_domain *domain; ··· 3025 2622 domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); 3026 2623 if (!domain->pt_root) 3027 2624 goto out_free; 2625 + 2626 + domain->iommu_domain = dom; 3028 2627 3029 2628 dom->priv = domain; 3030 2629 ··· 3050 2645 3051 2646 BUG_ON(domain->dev_cnt != 0); 3052 2647 3053 - free_pagetable(domain); 2648 + if (domain->mode != PAGE_MODE_NONE) 2649 + free_pagetable(domain); 2650 + 2651 + if (domain->flags & PD_IOMMUV2_MASK) 2652 + free_gcr3_table(domain); 3054 2653 3055 2654 protection_domain_free(domain); 3056 2655 ··· 3111 2702 } 3112 2703 3113 2704 static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, 3114 - phys_addr_t paddr, int gfp_order, int iommu_prot) 2705 + phys_addr_t paddr, size_t page_size, int iommu_prot) 3115 2706 { 3116 - unsigned long page_size = 0x1000UL << gfp_order; 3117 2707 struct protection_domain *domain = dom->priv; 3118 2708 int prot = 0; 3119 2709 int ret; 2710 + 2711 + if (domain->mode == PAGE_MODE_NONE) 2712 + return -EINVAL; 3120 2713 3121 2714 if (iommu_prot & IOMMU_READ) 3122 2715 prot |= IOMMU_PROT_IR; ··· 3132 2721 return ret; 3133 2722 } 3134 2723 3135 - static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, 3136 - int gfp_order) 2724 + static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, 2725 + size_t page_size) 3137 2726 { 3138 2727 struct protection_domain *domain = dom->priv; 3139 - unsigned long page_size, unmap_size; 2728 + size_t unmap_size; 3140 2729 3141 - page_size = 0x1000UL << gfp_order; 2730 + if (domain->mode == PAGE_MODE_NONE) 2731 + return -EINVAL; 3142 2732 3143 2733 mutex_lock(&domain->api_lock); 3144 2734 unmap_size = iommu_unmap_page(domain, iova, page_size); ··· 3147 2735 3148 2736 domain_flush_tlb_pde(domain); 3149 2737 3150 - return get_order(unmap_size); 2738 + return unmap_size; 3151 2739 } 3152 2740 3153 2741 static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, ··· 3157 2745 unsigned long offset_mask; 3158 2746 phys_addr_t paddr; 3159 2747 u64 *pte, __pte; 2748 + 2749 + if (domain->mode == PAGE_MODE_NONE) 2750 + return iova; 3160 2751 3161 2752 pte = fetch_pte(domain, iova); 3162 2753 ··· 3197 2782 .unmap = amd_iommu_unmap, 3198 2783 .iova_to_phys = amd_iommu_iova_to_phys, 3199 2784 .domain_has_cap = amd_iommu_domain_has_cap, 2785 + .pgsize_bitmap = AMD_IOMMU_PGSIZES, 3200 2786 }; 3201 2787 3202 2788 /***************************************************************************** ··· 3212 2796 3213 2797 int __init amd_iommu_init_passthrough(void) 3214 2798 { 3215 - struct amd_iommu *iommu; 2799 + struct iommu_dev_data *dev_data; 3216 2800 struct pci_dev *dev = NULL; 2801 + struct amd_iommu *iommu; 3217 2802 u16 devid; 2803 + int ret; 3218 2804 3219 - /* allocate passthrough domain */ 3220 - pt_domain = protection_domain_alloc(); 3221 - if (!pt_domain) 3222 - return -ENOMEM; 3223 - 3224 - pt_domain->mode |= PAGE_MODE_NONE; 2805 + ret = alloc_passthrough_domain(); 2806 + if (ret) 2807 + return ret; 3225 2808 3226 2809 for_each_pci_dev(dev) { 3227 2810 if (!check_device(&dev->dev)) 3228 2811 continue; 2812 + 2813 + dev_data = get_dev_data(&dev->dev); 2814 + dev_data->passthrough = true; 3229 2815 3230 2816 devid = get_device_id(&dev->dev); 3231 2817 ··· 3238 2820 attach_device(&dev->dev, pt_domain); 3239 2821 } 3240 2822 2823 + amd_iommu_stats_init(); 2824 + 3241 2825 pr_info("AMD-Vi: Initialized for Passthrough Mode\n"); 3242 2826 3243 2827 return 0; 3244 2828 } 2829 + 2830 + /* IOMMUv2 specific functions */ 2831 + int amd_iommu_register_ppr_notifier(struct notifier_block *nb) 2832 + { 2833 + return atomic_notifier_chain_register(&ppr_notifier, nb); 2834 + } 2835 + EXPORT_SYMBOL(amd_iommu_register_ppr_notifier); 2836 + 2837 + int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb) 2838 + { 2839 + return atomic_notifier_chain_unregister(&ppr_notifier, nb); 2840 + } 2841 + EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier); 2842 + 2843 + void amd_iommu_domain_direct_map(struct iommu_domain *dom) 2844 + { 2845 + struct protection_domain *domain = dom->priv; 2846 + unsigned long flags; 2847 + 2848 + spin_lock_irqsave(&domain->lock, flags); 2849 + 2850 + /* Update data structure */ 2851 + domain->mode = PAGE_MODE_NONE; 2852 + domain->updated = true; 2853 + 2854 + /* Make changes visible to IOMMUs */ 2855 + update_domain(domain); 2856 + 2857 + /* Page-table is not visible to IOMMU anymore, so free it */ 2858 + free_pagetable(domain); 2859 + 2860 + spin_unlock_irqrestore(&domain->lock, flags); 2861 + } 2862 + EXPORT_SYMBOL(amd_iommu_domain_direct_map); 2863 + 2864 + int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids) 2865 + { 2866 + struct protection_domain *domain = dom->priv; 2867 + unsigned long flags; 2868 + int levels, ret; 2869 + 2870 + if (pasids <= 0 || pasids > (PASID_MASK + 1)) 2871 + return -EINVAL; 2872 + 2873 + /* Number of GCR3 table levels required */ 2874 + for (levels = 0; (pasids - 1) & ~0x1ff; pasids >>= 9) 2875 + levels += 1; 2876 + 2877 + if (levels > amd_iommu_max_glx_val) 2878 + return -EINVAL; 2879 + 2880 + spin_lock_irqsave(&domain->lock, flags); 2881 + 2882 + /* 2883 + * Save us all sanity checks whether devices already in the 2884 + * domain support IOMMUv2. Just force that the domain has no 2885 + * devices attached when it is switched into IOMMUv2 mode. 2886 + */ 2887 + ret = -EBUSY; 2888 + if (domain->dev_cnt > 0 || domain->flags & PD_IOMMUV2_MASK) 2889 + goto out; 2890 + 2891 + ret = -ENOMEM; 2892 + domain->gcr3_tbl = (void *)get_zeroed_page(GFP_ATOMIC); 2893 + if (domain->gcr3_tbl == NULL) 2894 + goto out; 2895 + 2896 + domain->glx = levels; 2897 + domain->flags |= PD_IOMMUV2_MASK; 2898 + domain->updated = true; 2899 + 2900 + update_domain(domain); 2901 + 2902 + ret = 0; 2903 + 2904 + out: 2905 + spin_unlock_irqrestore(&domain->lock, flags); 2906 + 2907 + return ret; 2908 + } 2909 + EXPORT_SYMBOL(amd_iommu_domain_enable_v2); 2910 + 2911 + static int __flush_pasid(struct protection_domain *domain, int pasid, 2912 + u64 address, bool size) 2913 + { 2914 + struct iommu_dev_data *dev_data; 2915 + struct iommu_cmd cmd; 2916 + int i, ret; 2917 + 2918 + if (!(domain->flags & PD_IOMMUV2_MASK)) 2919 + return -EINVAL; 2920 + 2921 + build_inv_iommu_pasid(&cmd, domain->id, pasid, address, size); 2922 + 2923 + /* 2924 + * IOMMU TLB needs to be flushed before Device TLB to 2925 + * prevent device TLB refill from IOMMU TLB 2926 + */ 2927 + for (i = 0; i < amd_iommus_present; ++i) { 2928 + if (domain->dev_iommu[i] == 0) 2929 + continue; 2930 + 2931 + ret = iommu_queue_command(amd_iommus[i], &cmd); 2932 + if (ret != 0) 2933 + goto out; 2934 + } 2935 + 2936 + /* Wait until IOMMU TLB flushes are complete */ 2937 + domain_flush_complete(domain); 2938 + 2939 + /* Now flush device TLBs */ 2940 + list_for_each_entry(dev_data, &domain->dev_list, list) { 2941 + struct amd_iommu *iommu; 2942 + int qdep; 2943 + 2944 + BUG_ON(!dev_data->ats.enabled); 2945 + 2946 + qdep = dev_data->ats.qdep; 2947 + iommu = amd_iommu_rlookup_table[dev_data->devid]; 2948 + 2949 + build_inv_iotlb_pasid(&cmd, dev_data->devid, pasid, 2950 + qdep, address, size); 2951 + 2952 + ret = iommu_queue_command(iommu, &cmd); 2953 + if (ret != 0) 2954 + goto out; 2955 + } 2956 + 2957 + /* Wait until all device TLBs are flushed */ 2958 + domain_flush_complete(domain); 2959 + 2960 + ret = 0; 2961 + 2962 + out: 2963 + 2964 + return ret; 2965 + } 2966 + 2967 + static int __amd_iommu_flush_page(struct protection_domain *domain, int pasid, 2968 + u64 address) 2969 + { 2970 + INC_STATS_COUNTER(invalidate_iotlb); 2971 + 2972 + return __flush_pasid(domain, pasid, address, false); 2973 + } 2974 + 2975 + int amd_iommu_flush_page(struct iommu_domain *dom, int pasid, 2976 + u64 address) 2977 + { 2978 + struct protection_domain *domain = dom->priv; 2979 + unsigned long flags; 2980 + int ret; 2981 + 2982 + spin_lock_irqsave(&domain->lock, flags); 2983 + ret = __amd_iommu_flush_page(domain, pasid, address); 2984 + spin_unlock_irqrestore(&domain->lock, flags); 2985 + 2986 + return ret; 2987 + } 2988 + EXPORT_SYMBOL(amd_iommu_flush_page); 2989 + 2990 + static int __amd_iommu_flush_tlb(struct protection_domain *domain, int pasid) 2991 + { 2992 + INC_STATS_COUNTER(invalidate_iotlb_all); 2993 + 2994 + return __flush_pasid(domain, pasid, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 2995 + true); 2996 + } 2997 + 2998 + int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid) 2999 + { 3000 + struct protection_domain *domain = dom->priv; 3001 + unsigned long flags; 3002 + int ret; 3003 + 3004 + spin_lock_irqsave(&domain->lock, flags); 3005 + ret = __amd_iommu_flush_tlb(domain, pasid); 3006 + spin_unlock_irqrestore(&domain->lock, flags); 3007 + 3008 + return ret; 3009 + } 3010 + EXPORT_SYMBOL(amd_iommu_flush_tlb); 3011 + 3012 + static u64 *__get_gcr3_pte(u64 *root, int level, int pasid, bool alloc) 3013 + { 3014 + int index; 3015 + u64 *pte; 3016 + 3017 + while (true) { 3018 + 3019 + index = (pasid >> (9 * level)) & 0x1ff; 3020 + pte = &root[index]; 3021 + 3022 + if (level == 0) 3023 + break; 3024 + 3025 + if (!(*pte & GCR3_VALID)) { 3026 + if (!alloc) 3027 + return NULL; 3028 + 3029 + root = (void *)get_zeroed_page(GFP_ATOMIC); 3030 + if (root == NULL) 3031 + return NULL; 3032 + 3033 + *pte = __pa(root) | GCR3_VALID; 3034 + } 3035 + 3036 + root = __va(*pte & PAGE_MASK); 3037 + 3038 + level -= 1; 3039 + } 3040 + 3041 + return pte; 3042 + } 3043 + 3044 + static int __set_gcr3(struct protection_domain *domain, int pasid, 3045 + unsigned long cr3) 3046 + { 3047 + u64 *pte; 3048 + 3049 + if (domain->mode != PAGE_MODE_NONE) 3050 + return -EINVAL; 3051 + 3052 + pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, true); 3053 + if (pte == NULL) 3054 + return -ENOMEM; 3055 + 3056 + *pte = (cr3 & PAGE_MASK) | GCR3_VALID; 3057 + 3058 + return __amd_iommu_flush_tlb(domain, pasid); 3059 + } 3060 + 3061 + static int __clear_gcr3(struct protection_domain *domain, int pasid) 3062 + { 3063 + u64 *pte; 3064 + 3065 + if (domain->mode != PAGE_MODE_NONE) 3066 + return -EINVAL; 3067 + 3068 + pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, false); 3069 + if (pte == NULL) 3070 + return 0; 3071 + 3072 + *pte = 0; 3073 + 3074 + return __amd_iommu_flush_tlb(domain, pasid); 3075 + } 3076 + 3077 + int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid, 3078 + unsigned long cr3) 3079 + { 3080 + struct protection_domain *domain = dom->priv; 3081 + unsigned long flags; 3082 + int ret; 3083 + 3084 + spin_lock_irqsave(&domain->lock, flags); 3085 + ret = __set_gcr3(domain, pasid, cr3); 3086 + spin_unlock_irqrestore(&domain->lock, flags); 3087 + 3088 + return ret; 3089 + } 3090 + EXPORT_SYMBOL(amd_iommu_domain_set_gcr3); 3091 + 3092 + int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid) 3093 + { 3094 + struct protection_domain *domain = dom->priv; 3095 + unsigned long flags; 3096 + int ret; 3097 + 3098 + spin_lock_irqsave(&domain->lock, flags); 3099 + ret = __clear_gcr3(domain, pasid); 3100 + spin_unlock_irqrestore(&domain->lock, flags); 3101 + 3102 + return ret; 3103 + } 3104 + EXPORT_SYMBOL(amd_iommu_domain_clear_gcr3); 3105 + 3106 + int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid, 3107 + int status, int tag) 3108 + { 3109 + struct iommu_dev_data *dev_data; 3110 + struct amd_iommu *iommu; 3111 + struct iommu_cmd cmd; 3112 + 3113 + INC_STATS_COUNTER(complete_ppr); 3114 + 3115 + dev_data = get_dev_data(&pdev->dev); 3116 + iommu = amd_iommu_rlookup_table[dev_data->devid]; 3117 + 3118 + build_complete_ppr(&cmd, dev_data->devid, pasid, status, 3119 + tag, dev_data->pri_tlp); 3120 + 3121 + return iommu_queue_command(iommu, &cmd); 3122 + } 3123 + EXPORT_SYMBOL(amd_iommu_complete_ppr); 3124 + 3125 + struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev) 3126 + { 3127 + struct protection_domain *domain; 3128 + 3129 + domain = get_domain(&pdev->dev); 3130 + if (IS_ERR(domain)) 3131 + return NULL; 3132 + 3133 + /* Only return IOMMUv2 domains */ 3134 + if (!(domain->flags & PD_IOMMUV2_MASK)) 3135 + return NULL; 3136 + 3137 + return domain->iommu_domain; 3138 + } 3139 + EXPORT_SYMBOL(amd_iommu_get_v2_domain); 3140 + 3141 + void amd_iommu_enable_device_erratum(struct pci_dev *pdev, u32 erratum) 3142 + { 3143 + struct iommu_dev_data *dev_data; 3144 + 3145 + if (!amd_iommu_v2_supported()) 3146 + return; 3147 + 3148 + dev_data = get_dev_data(&pdev->dev); 3149 + dev_data->errata |= (1 << erratum); 3150 + } 3151 + EXPORT_SYMBOL(amd_iommu_enable_device_erratum); 3152 + 3153 + int amd_iommu_device_info(struct pci_dev *pdev, 3154 + struct amd_iommu_device_info *info) 3155 + { 3156 + int max_pasids; 3157 + int pos; 3158 + 3159 + if (pdev == NULL || info == NULL) 3160 + return -EINVAL; 3161 + 3162 + if (!amd_iommu_v2_supported()) 3163 + return -EINVAL; 3164 + 3165 + memset(info, 0, sizeof(*info)); 3166 + 3167 + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS); 3168 + if (pos) 3169 + info->flags |= AMD_IOMMU_DEVICE_FLAG_ATS_SUP; 3170 + 3171 + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); 3172 + if (pos) 3173 + info->flags |= AMD_IOMMU_DEVICE_FLAG_PRI_SUP; 3174 + 3175 + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID); 3176 + if (pos) { 3177 + int features; 3178 + 3179 + max_pasids = 1 << (9 * (amd_iommu_max_glx_val + 1)); 3180 + max_pasids = min(max_pasids, (1 << 20)); 3181 + 3182 + info->flags |= AMD_IOMMU_DEVICE_FLAG_PASID_SUP; 3183 + info->max_pasids = min(pci_max_pasids(pdev), max_pasids); 3184 + 3185 + features = pci_pasid_features(pdev); 3186 + if (features & PCI_PASID_CAP_EXEC) 3187 + info->flags |= AMD_IOMMU_DEVICE_FLAG_EXEC_SUP; 3188 + if (features & PCI_PASID_CAP_PRIV) 3189 + info->flags |= AMD_IOMMU_DEVICE_FLAG_PRIV_SUP; 3190 + } 3191 + 3192 + return 0; 3193 + } 3194 + EXPORT_SYMBOL(amd_iommu_device_info);
+120 -13
drivers/iommu/amd_iommu_init.c
··· 25 25 #include <linux/interrupt.h> 26 26 #include <linux/msi.h> 27 27 #include <linux/amd-iommu.h> 28 + #include <linux/export.h> 28 29 #include <asm/pci-direct.h> 29 30 #include <asm/iommu.h> 30 31 #include <asm/gart.h> ··· 141 140 /* IOMMUs have a non-present cache? */ 142 141 bool amd_iommu_np_cache __read_mostly; 143 142 bool amd_iommu_iotlb_sup __read_mostly = true; 143 + 144 + u32 amd_iommu_max_pasids __read_mostly = ~0; 145 + 146 + bool amd_iommu_v2_present __read_mostly; 147 + 148 + bool amd_iommu_force_isolation __read_mostly; 144 149 145 150 /* 146 151 * The ACPI table parsing functions set this variable on an error ··· 303 296 304 297 ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); 305 298 ctrl &= ~(1 << bit); 299 + writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); 300 + } 301 + 302 + static void iommu_set_inv_tlb_timeout(struct amd_iommu *iommu, int timeout) 303 + { 304 + u32 ctrl; 305 + 306 + ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); 307 + ctrl &= ~CTRL_INV_TO_MASK; 308 + ctrl |= (timeout << CONTROL_INV_TIMEOUT) & CTRL_INV_TO_MASK; 306 309 writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); 307 310 } 308 311 ··· 598 581 free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE)); 599 582 } 600 583 584 + /* allocates the memory where the IOMMU will log its events to */ 585 + static u8 * __init alloc_ppr_log(struct amd_iommu *iommu) 586 + { 587 + iommu->ppr_log = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 588 + get_order(PPR_LOG_SIZE)); 589 + 590 + if (iommu->ppr_log == NULL) 591 + return NULL; 592 + 593 + return iommu->ppr_log; 594 + } 595 + 596 + static void iommu_enable_ppr_log(struct amd_iommu *iommu) 597 + { 598 + u64 entry; 599 + 600 + if (iommu->ppr_log == NULL) 601 + return; 602 + 603 + entry = (u64)virt_to_phys(iommu->ppr_log) | PPR_LOG_SIZE_512; 604 + 605 + memcpy_toio(iommu->mmio_base + MMIO_PPR_LOG_OFFSET, 606 + &entry, sizeof(entry)); 607 + 608 + /* set head and tail to zero manually */ 609 + writel(0x00, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); 610 + writel(0x00, iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); 611 + 612 + iommu_feature_enable(iommu, CONTROL_PPFLOG_EN); 613 + iommu_feature_enable(iommu, CONTROL_PPR_EN); 614 + } 615 + 616 + static void __init free_ppr_log(struct amd_iommu *iommu) 617 + { 618 + if (iommu->ppr_log == NULL) 619 + return; 620 + 621 + free_pages((unsigned long)iommu->ppr_log, get_order(PPR_LOG_SIZE)); 622 + } 623 + 624 + static void iommu_enable_gt(struct amd_iommu *iommu) 625 + { 626 + if (!iommu_feature(iommu, FEATURE_GT)) 627 + return; 628 + 629 + iommu_feature_enable(iommu, CONTROL_GT_EN); 630 + } 631 + 601 632 /* sets a specific bit in the device table entry. */ 602 633 static void set_dev_entry_bit(u16 devid, u8 bit) 603 634 { 604 - int i = (bit >> 5) & 0x07; 605 - int _bit = bit & 0x1f; 635 + int i = (bit >> 6) & 0x03; 636 + int _bit = bit & 0x3f; 606 637 607 - amd_iommu_dev_table[devid].data[i] |= (1 << _bit); 638 + amd_iommu_dev_table[devid].data[i] |= (1UL << _bit); 608 639 } 609 640 610 641 static int get_dev_entry_bit(u16 devid, u8 bit) 611 642 { 612 - int i = (bit >> 5) & 0x07; 613 - int _bit = bit & 0x1f; 643 + int i = (bit >> 6) & 0x03; 644 + int _bit = bit & 0x3f; 614 645 615 - return (amd_iommu_dev_table[devid].data[i] & (1 << _bit)) >> _bit; 646 + return (amd_iommu_dev_table[devid].data[i] & (1UL << _bit)) >> _bit; 616 647 } 617 648 618 649 ··· 763 698 high = readl(iommu->mmio_base + MMIO_EXT_FEATURES + 4); 764 699 765 700 iommu->features = ((u64)high << 32) | low; 701 + 702 + if (iommu_feature(iommu, FEATURE_GT)) { 703 + int glxval; 704 + u32 pasids; 705 + u64 shift; 706 + 707 + shift = iommu->features & FEATURE_PASID_MASK; 708 + shift >>= FEATURE_PASID_SHIFT; 709 + pasids = (1 << shift); 710 + 711 + amd_iommu_max_pasids = min(amd_iommu_max_pasids, pasids); 712 + 713 + glxval = iommu->features & FEATURE_GLXVAL_MASK; 714 + glxval >>= FEATURE_GLXVAL_SHIFT; 715 + 716 + if (amd_iommu_max_glx_val == -1) 717 + amd_iommu_max_glx_val = glxval; 718 + else 719 + amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval); 720 + } 721 + 722 + if (iommu_feature(iommu, FEATURE_GT) && 723 + iommu_feature(iommu, FEATURE_PPR)) { 724 + iommu->is_iommu_v2 = true; 725 + amd_iommu_v2_present = true; 726 + } 766 727 767 728 if (!is_rd890_iommu(iommu->dev)) 768 729 return; ··· 992 901 { 993 902 free_command_buffer(iommu); 994 903 free_event_buffer(iommu); 904 + free_ppr_log(iommu); 995 905 iommu_unmap_mmio_space(iommu); 996 906 } 997 907 ··· 1055 963 init_iommu_from_pci(iommu); 1056 964 init_iommu_from_acpi(iommu, h); 1057 965 init_iommu_devices(iommu); 966 + 967 + if (iommu_feature(iommu, FEATURE_PPR)) { 968 + iommu->ppr_log = alloc_ppr_log(iommu); 969 + if (!iommu->ppr_log) 970 + return -ENOMEM; 971 + } 1058 972 1059 973 if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) 1060 974 amd_iommu_np_cache = true; ··· 1147 1049 1148 1050 iommu->int_enabled = true; 1149 1051 iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); 1052 + 1053 + if (iommu->ppr_log != NULL) 1054 + iommu_feature_enable(iommu, CONTROL_PPFINT_EN); 1150 1055 1151 1056 return 0; 1152 1057 } ··· 1310 1209 * make IOMMU memory accesses cache coherent 1311 1210 */ 1312 1211 iommu_feature_enable(iommu, CONTROL_COHERENT_EN); 1212 + 1213 + /* Set IOTLB invalidation timeout to 1s */ 1214 + iommu_set_inv_tlb_timeout(iommu, CTRL_INV_TO_1S); 1313 1215 } 1314 1216 1315 1217 static void iommu_apply_resume_quirks(struct amd_iommu *iommu) ··· 1378 1274 iommu_set_device_table(iommu); 1379 1275 iommu_enable_command_buffer(iommu); 1380 1276 iommu_enable_event_buffer(iommu); 1277 + iommu_enable_ppr_log(iommu); 1278 + iommu_enable_gt(iommu); 1381 1279 iommu_set_exclusion_range(iommu); 1382 1280 iommu_init_msi(iommu); 1383 1281 iommu_enable(iommu); ··· 1409 1303 1410 1304 /* re-load the hardware */ 1411 1305 enable_iommus(); 1412 - 1413 - /* 1414 - * we have to flush after the IOMMUs are enabled because a 1415 - * disabled IOMMU will never execute the commands we send 1416 - */ 1417 - for_each_iommu(iommu) 1418 - iommu_flush_all_caches(iommu); 1419 1306 } 1420 1307 1421 1308 static int amd_iommu_suspend(void) ··· 1659 1560 amd_iommu_unmap_flush = true; 1660 1561 if (strncmp(str, "off", 3) == 0) 1661 1562 amd_iommu_disabled = true; 1563 + if (strncmp(str, "force_isolation", 15) == 0) 1564 + amd_iommu_force_isolation = true; 1662 1565 } 1663 1566 1664 1567 return 1; ··· 1673 1572 gart_iommu_hole_init, 1674 1573 0, 1675 1574 0); 1575 + 1576 + bool amd_iommu_v2_supported(void) 1577 + { 1578 + return amd_iommu_v2_present; 1579 + } 1580 + EXPORT_SYMBOL(amd_iommu_v2_supported);
+24
drivers/iommu/amd_iommu_proto.h
··· 31 31 extern void amd_iommu_uninit_devices(void); 32 32 extern void amd_iommu_init_notifier(void); 33 33 extern void amd_iommu_init_api(void); 34 + 35 + /* IOMMUv2 specific functions */ 36 + struct iommu_domain; 37 + 38 + extern bool amd_iommu_v2_supported(void); 39 + extern int amd_iommu_register_ppr_notifier(struct notifier_block *nb); 40 + extern int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb); 41 + extern void amd_iommu_domain_direct_map(struct iommu_domain *dom); 42 + extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids); 43 + extern int amd_iommu_flush_page(struct iommu_domain *dom, int pasid, 44 + u64 address); 45 + extern int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid); 46 + extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid, 47 + unsigned long cr3); 48 + extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid); 49 + extern struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev); 50 + 51 + #define PPR_SUCCESS 0x0 52 + #define PPR_INVALID 0x1 53 + #define PPR_FAILURE 0xf 54 + 55 + extern int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid, 56 + int status, int tag); 57 + 34 58 #ifndef CONFIG_AMD_IOMMU_STATS 35 59 36 60 static inline void amd_iommu_stats_init(void) { }
+115 -3
drivers/iommu/amd_iommu_types.h
··· 69 69 #define MMIO_EXCL_BASE_OFFSET 0x0020 70 70 #define MMIO_EXCL_LIMIT_OFFSET 0x0028 71 71 #define MMIO_EXT_FEATURES 0x0030 72 + #define MMIO_PPR_LOG_OFFSET 0x0038 72 73 #define MMIO_CMD_HEAD_OFFSET 0x2000 73 74 #define MMIO_CMD_TAIL_OFFSET 0x2008 74 75 #define MMIO_EVT_HEAD_OFFSET 0x2010 75 76 #define MMIO_EVT_TAIL_OFFSET 0x2018 76 77 #define MMIO_STATUS_OFFSET 0x2020 78 + #define MMIO_PPR_HEAD_OFFSET 0x2030 79 + #define MMIO_PPR_TAIL_OFFSET 0x2038 77 80 78 81 79 82 /* Extended Feature Bits */ ··· 90 87 #define FEATURE_HE (1ULL<<8) 91 88 #define FEATURE_PC (1ULL<<9) 92 89 90 + #define FEATURE_PASID_SHIFT 32 91 + #define FEATURE_PASID_MASK (0x1fULL << FEATURE_PASID_SHIFT) 92 + 93 + #define FEATURE_GLXVAL_SHIFT 14 94 + #define FEATURE_GLXVAL_MASK (0x03ULL << FEATURE_GLXVAL_SHIFT) 95 + 96 + #define PASID_MASK 0x000fffff 97 + 93 98 /* MMIO status bits */ 94 - #define MMIO_STATUS_COM_WAIT_INT_MASK 0x04 99 + #define MMIO_STATUS_COM_WAIT_INT_MASK (1 << 2) 100 + #define MMIO_STATUS_PPR_INT_MASK (1 << 6) 95 101 96 102 /* event logging constants */ 97 103 #define EVENT_ENTRY_SIZE 0x10 ··· 127 115 #define CONTROL_EVT_LOG_EN 0x02ULL 128 116 #define CONTROL_EVT_INT_EN 0x03ULL 129 117 #define CONTROL_COMWAIT_EN 0x04ULL 118 + #define CONTROL_INV_TIMEOUT 0x05ULL 130 119 #define CONTROL_PASSPW_EN 0x08ULL 131 120 #define CONTROL_RESPASSPW_EN 0x09ULL 132 121 #define CONTROL_COHERENT_EN 0x0aULL ··· 135 122 #define CONTROL_CMDBUF_EN 0x0cULL 136 123 #define CONTROL_PPFLOG_EN 0x0dULL 137 124 #define CONTROL_PPFINT_EN 0x0eULL 125 + #define CONTROL_PPR_EN 0x0fULL 126 + #define CONTROL_GT_EN 0x10ULL 127 + 128 + #define CTRL_INV_TO_MASK (7 << CONTROL_INV_TIMEOUT) 129 + #define CTRL_INV_TO_NONE 0 130 + #define CTRL_INV_TO_1MS 1 131 + #define CTRL_INV_TO_10MS 2 132 + #define CTRL_INV_TO_100MS 3 133 + #define CTRL_INV_TO_1S 4 134 + #define CTRL_INV_TO_10S 5 135 + #define CTRL_INV_TO_100S 6 138 136 139 137 /* command specific defines */ 140 138 #define CMD_COMPL_WAIT 0x01 141 139 #define CMD_INV_DEV_ENTRY 0x02 142 140 #define CMD_INV_IOMMU_PAGES 0x03 143 141 #define CMD_INV_IOTLB_PAGES 0x04 142 + #define CMD_COMPLETE_PPR 0x07 144 143 #define CMD_INV_ALL 0x08 145 144 146 145 #define CMD_COMPL_WAIT_STORE_MASK 0x01 147 146 #define CMD_COMPL_WAIT_INT_MASK 0x02 148 147 #define CMD_INV_IOMMU_PAGES_SIZE_MASK 0x01 149 148 #define CMD_INV_IOMMU_PAGES_PDE_MASK 0x02 149 + #define CMD_INV_IOMMU_PAGES_GN_MASK 0x04 150 + 151 + #define PPR_STATUS_MASK 0xf 152 + #define PPR_STATUS_SHIFT 12 150 153 151 154 #define CMD_INV_IOMMU_ALL_PAGES_ADDRESS 0x7fffffffffffffffULL 152 155 ··· 193 164 /* constants for event buffer handling */ 194 165 #define EVT_BUFFER_SIZE 8192 /* 512 entries */ 195 166 #define EVT_LEN_MASK (0x9ULL << 56) 167 + 168 + /* Constants for PPR Log handling */ 169 + #define PPR_LOG_ENTRIES 512 170 + #define PPR_LOG_SIZE_SHIFT 56 171 + #define PPR_LOG_SIZE_512 (0x9ULL << PPR_LOG_SIZE_SHIFT) 172 + #define PPR_ENTRY_SIZE 16 173 + #define PPR_LOG_SIZE (PPR_ENTRY_SIZE * PPR_LOG_ENTRIES) 174 + 175 + #define PPR_REQ_TYPE(x) (((x) >> 60) & 0xfULL) 176 + #define PPR_FLAGS(x) (((x) >> 48) & 0xfffULL) 177 + #define PPR_DEVID(x) ((x) & 0xffffULL) 178 + #define PPR_TAG(x) (((x) >> 32) & 0x3ffULL) 179 + #define PPR_PASID1(x) (((x) >> 16) & 0xffffULL) 180 + #define PPR_PASID2(x) (((x) >> 42) & 0xfULL) 181 + #define PPR_PASID(x) ((PPR_PASID2(x) << 16) | PPR_PASID1(x)) 182 + 183 + #define PPR_REQ_FAULT 0x01 196 184 197 185 #define PAGE_MODE_NONE 0x00 198 186 #define PAGE_MODE_1_LEVEL 0x01 ··· 276 230 #define IOMMU_PTE_IR (1ULL << 61) 277 231 #define IOMMU_PTE_IW (1ULL << 62) 278 232 279 - #define DTE_FLAG_IOTLB 0x01 233 + #define DTE_FLAG_IOTLB (0x01UL << 32) 234 + #define DTE_FLAG_GV (0x01ULL << 55) 235 + #define DTE_GLX_SHIFT (56) 236 + #define DTE_GLX_MASK (3) 237 + 238 + #define DTE_GCR3_VAL_A(x) (((x) >> 12) & 0x00007ULL) 239 + #define DTE_GCR3_VAL_B(x) (((x) >> 15) & 0x0ffffULL) 240 + #define DTE_GCR3_VAL_C(x) (((x) >> 31) & 0xfffffULL) 241 + 242 + #define DTE_GCR3_INDEX_A 0 243 + #define DTE_GCR3_INDEX_B 1 244 + #define DTE_GCR3_INDEX_C 1 245 + 246 + #define DTE_GCR3_SHIFT_A 58 247 + #define DTE_GCR3_SHIFT_B 16 248 + #define DTE_GCR3_SHIFT_C 43 249 + 250 + #define GCR3_VALID 0x01ULL 280 251 281 252 #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL) 282 253 #define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P) ··· 320 257 domain for an IOMMU */ 321 258 #define PD_PASSTHROUGH_MASK (1UL << 2) /* domain has no page 322 259 translation */ 260 + #define PD_IOMMUV2_MASK (1UL << 3) /* domain has gcr3 table */ 323 261 324 262 extern bool amd_iommu_dump; 325 263 #define DUMP_printk(format, arg...) \ ··· 349 285 #define APERTURE_RANGE_INDEX(a) ((a) >> APERTURE_RANGE_SHIFT) 350 286 #define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL) 351 287 288 + 289 + /* 290 + * This struct is used to pass information about 291 + * incoming PPR faults around. 292 + */ 293 + struct amd_iommu_fault { 294 + u64 address; /* IO virtual address of the fault*/ 295 + u32 pasid; /* Address space identifier */ 296 + u16 device_id; /* Originating PCI device id */ 297 + u16 tag; /* PPR tag */ 298 + u16 flags; /* Fault flags */ 299 + 300 + }; 301 + 302 + #define PPR_FAULT_EXEC (1 << 1) 303 + #define PPR_FAULT_READ (1 << 2) 304 + #define PPR_FAULT_WRITE (1 << 5) 305 + #define PPR_FAULT_USER (1 << 6) 306 + #define PPR_FAULT_RSVD (1 << 7) 307 + #define PPR_FAULT_GN (1 << 8) 308 + 309 + struct iommu_domain; 310 + 352 311 /* 353 312 * This structure contains generic data for IOMMU protection domains 354 313 * independent of their use. ··· 384 297 u16 id; /* the domain id written to the device table */ 385 298 int mode; /* paging mode (0-6 levels) */ 386 299 u64 *pt_root; /* page table root pointer */ 300 + int glx; /* Number of levels for GCR3 table */ 301 + u64 *gcr3_tbl; /* Guest CR3 table */ 387 302 unsigned long flags; /* flags to find out type of domain */ 388 303 bool updated; /* complete domain flush required */ 389 304 unsigned dev_cnt; /* devices assigned to this domain */ 390 305 unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */ 391 306 void *priv; /* private data */ 307 + struct iommu_domain *iommu_domain; /* Pointer to generic 308 + domain structure */ 392 309 393 310 }; 394 311 ··· 406 315 struct protection_domain *domain; /* Domain the device is bound to */ 407 316 atomic_t bind; /* Domain attach reverent count */ 408 317 u16 devid; /* PCI Device ID */ 318 + bool iommu_v2; /* Device can make use of IOMMUv2 */ 319 + bool passthrough; /* Default for device is pt_domain */ 409 320 struct { 410 321 bool enabled; 411 322 int qdep; 412 323 } ats; /* ATS state */ 324 + bool pri_tlp; /* PASID TLB required for 325 + PPR completions */ 326 + u32 errata; /* Bitmap for errata to apply */ 413 327 }; 414 328 415 329 /* ··· 495 399 /* Extended features */ 496 400 u64 features; 497 401 402 + /* IOMMUv2 */ 403 + bool is_iommu_v2; 404 + 498 405 /* 499 406 * Capability pointer. There could be more than one IOMMU per PCI 500 407 * device function if there are more than one AMD IOMMU capability ··· 529 430 u8 *evt_buf; 530 431 /* MSI number for event interrupt */ 531 432 u16 evt_msi_num; 433 + 434 + /* Base of the PPR log, if present */ 435 + u8 *ppr_log; 532 436 533 437 /* true if interrupts for this IOMMU are already enabled */ 534 438 bool int_enabled; ··· 586 484 * Structure defining one entry in the device table 587 485 */ 588 486 struct dev_table_entry { 589 - u32 data[8]; 487 + u64 data[4]; 590 488 }; 591 489 592 490 /* ··· 650 548 * they are reused 651 549 */ 652 550 extern bool amd_iommu_unmap_flush; 551 + 552 + /* Smallest number of PASIDs supported by any IOMMU in the system */ 553 + extern u32 amd_iommu_max_pasids; 554 + 555 + extern bool amd_iommu_v2_present; 556 + 557 + extern bool amd_iommu_force_isolation; 558 + 559 + /* Max levels of glxval supported */ 560 + extern int amd_iommu_max_glx_val; 653 561 654 562 /* takes bus and device/function and returns the device id 655 563 * FIXME: should that be in generic PCI code? */
+994
drivers/iommu/amd_iommu_v2.c
··· 1 + /* 2 + * Copyright (C) 2010-2012 Advanced Micro Devices, Inc. 3 + * Author: Joerg Roedel <joerg.roedel@amd.com> 4 + * 5 + * This program is free software; you can redistribute it and/or modify it 6 + * under the terms of the GNU General Public License version 2 as published 7 + * by the Free Software Foundation. 8 + * 9 + * This program is distributed in the hope that it will be useful, 10 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + * GNU General Public License for more details. 13 + * 14 + * You should have received a copy of the GNU General Public License 15 + * along with this program; if not, write to the Free Software 16 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 + */ 18 + 19 + #include <linux/mmu_notifier.h> 20 + #include <linux/amd-iommu.h> 21 + #include <linux/mm_types.h> 22 + #include <linux/profile.h> 23 + #include <linux/module.h> 24 + #include <linux/sched.h> 25 + #include <linux/iommu.h> 26 + #include <linux/wait.h> 27 + #include <linux/pci.h> 28 + #include <linux/gfp.h> 29 + 30 + #include "amd_iommu_types.h" 31 + #include "amd_iommu_proto.h" 32 + 33 + MODULE_LICENSE("GPL v2"); 34 + MODULE_AUTHOR("Joerg Roedel <joerg.roedel@amd.com>"); 35 + 36 + #define MAX_DEVICES 0x10000 37 + #define PRI_QUEUE_SIZE 512 38 + 39 + struct pri_queue { 40 + atomic_t inflight; 41 + bool finish; 42 + int status; 43 + }; 44 + 45 + struct pasid_state { 46 + struct list_head list; /* For global state-list */ 47 + atomic_t count; /* Reference count */ 48 + struct task_struct *task; /* Task bound to this PASID */ 49 + struct mm_struct *mm; /* mm_struct for the faults */ 50 + struct mmu_notifier mn; /* mmu_otifier handle */ 51 + struct pri_queue pri[PRI_QUEUE_SIZE]; /* PRI tag states */ 52 + struct device_state *device_state; /* Link to our device_state */ 53 + int pasid; /* PASID index */ 54 + spinlock_t lock; /* Protect pri_queues */ 55 + wait_queue_head_t wq; /* To wait for count == 0 */ 56 + }; 57 + 58 + struct device_state { 59 + atomic_t count; 60 + struct pci_dev *pdev; 61 + struct pasid_state **states; 62 + struct iommu_domain *domain; 63 + int pasid_levels; 64 + int max_pasids; 65 + amd_iommu_invalid_ppr_cb inv_ppr_cb; 66 + amd_iommu_invalidate_ctx inv_ctx_cb; 67 + spinlock_t lock; 68 + wait_queue_head_t wq; 69 + }; 70 + 71 + struct fault { 72 + struct work_struct work; 73 + struct device_state *dev_state; 74 + struct pasid_state *state; 75 + struct mm_struct *mm; 76 + u64 address; 77 + u16 devid; 78 + u16 pasid; 79 + u16 tag; 80 + u16 finish; 81 + u16 flags; 82 + }; 83 + 84 + struct device_state **state_table; 85 + static spinlock_t state_lock; 86 + 87 + /* List and lock for all pasid_states */ 88 + static LIST_HEAD(pasid_state_list); 89 + static DEFINE_SPINLOCK(ps_lock); 90 + 91 + static struct workqueue_struct *iommu_wq; 92 + 93 + /* 94 + * Empty page table - Used between 95 + * mmu_notifier_invalidate_range_start and 96 + * mmu_notifier_invalidate_range_end 97 + */ 98 + static u64 *empty_page_table; 99 + 100 + static void free_pasid_states(struct device_state *dev_state); 101 + static void unbind_pasid(struct device_state *dev_state, int pasid); 102 + static int task_exit(struct notifier_block *nb, unsigned long e, void *data); 103 + 104 + static u16 device_id(struct pci_dev *pdev) 105 + { 106 + u16 devid; 107 + 108 + devid = pdev->bus->number; 109 + devid = (devid << 8) | pdev->devfn; 110 + 111 + return devid; 112 + } 113 + 114 + static struct device_state *get_device_state(u16 devid) 115 + { 116 + struct device_state *dev_state; 117 + unsigned long flags; 118 + 119 + spin_lock_irqsave(&state_lock, flags); 120 + dev_state = state_table[devid]; 121 + if (dev_state != NULL) 122 + atomic_inc(&dev_state->count); 123 + spin_unlock_irqrestore(&state_lock, flags); 124 + 125 + return dev_state; 126 + } 127 + 128 + static void free_device_state(struct device_state *dev_state) 129 + { 130 + /* 131 + * First detach device from domain - No more PRI requests will arrive 132 + * from that device after it is unbound from the IOMMUv2 domain. 133 + */ 134 + iommu_detach_device(dev_state->domain, &dev_state->pdev->dev); 135 + 136 + /* Everything is down now, free the IOMMUv2 domain */ 137 + iommu_domain_free(dev_state->domain); 138 + 139 + /* Finally get rid of the device-state */ 140 + kfree(dev_state); 141 + } 142 + 143 + static void put_device_state(struct device_state *dev_state) 144 + { 145 + if (atomic_dec_and_test(&dev_state->count)) 146 + wake_up(&dev_state->wq); 147 + } 148 + 149 + static void put_device_state_wait(struct device_state *dev_state) 150 + { 151 + DEFINE_WAIT(wait); 152 + 153 + prepare_to_wait(&dev_state->wq, &wait, TASK_UNINTERRUPTIBLE); 154 + if (!atomic_dec_and_test(&dev_state->count)) 155 + schedule(); 156 + finish_wait(&dev_state->wq, &wait); 157 + 158 + free_device_state(dev_state); 159 + } 160 + 161 + static struct notifier_block profile_nb = { 162 + .notifier_call = task_exit, 163 + }; 164 + 165 + static void link_pasid_state(struct pasid_state *pasid_state) 166 + { 167 + spin_lock(&ps_lock); 168 + list_add_tail(&pasid_state->list, &pasid_state_list); 169 + spin_unlock(&ps_lock); 170 + } 171 + 172 + static void __unlink_pasid_state(struct pasid_state *pasid_state) 173 + { 174 + list_del(&pasid_state->list); 175 + } 176 + 177 + static void unlink_pasid_state(struct pasid_state *pasid_state) 178 + { 179 + spin_lock(&ps_lock); 180 + __unlink_pasid_state(pasid_state); 181 + spin_unlock(&ps_lock); 182 + } 183 + 184 + /* Must be called under dev_state->lock */ 185 + static struct pasid_state **__get_pasid_state_ptr(struct device_state *dev_state, 186 + int pasid, bool alloc) 187 + { 188 + struct pasid_state **root, **ptr; 189 + int level, index; 190 + 191 + level = dev_state->pasid_levels; 192 + root = dev_state->states; 193 + 194 + while (true) { 195 + 196 + index = (pasid >> (9 * level)) & 0x1ff; 197 + ptr = &root[index]; 198 + 199 + if (level == 0) 200 + break; 201 + 202 + if (*ptr == NULL) { 203 + if (!alloc) 204 + return NULL; 205 + 206 + *ptr = (void *)get_zeroed_page(GFP_ATOMIC); 207 + if (*ptr == NULL) 208 + return NULL; 209 + } 210 + 211 + root = (struct pasid_state **)*ptr; 212 + level -= 1; 213 + } 214 + 215 + return ptr; 216 + } 217 + 218 + static int set_pasid_state(struct device_state *dev_state, 219 + struct pasid_state *pasid_state, 220 + int pasid) 221 + { 222 + struct pasid_state **ptr; 223 + unsigned long flags; 224 + int ret; 225 + 226 + spin_lock_irqsave(&dev_state->lock, flags); 227 + ptr = __get_pasid_state_ptr(dev_state, pasid, true); 228 + 229 + ret = -ENOMEM; 230 + if (ptr == NULL) 231 + goto out_unlock; 232 + 233 + ret = -ENOMEM; 234 + if (*ptr != NULL) 235 + goto out_unlock; 236 + 237 + *ptr = pasid_state; 238 + 239 + ret = 0; 240 + 241 + out_unlock: 242 + spin_unlock_irqrestore(&dev_state->lock, flags); 243 + 244 + return ret; 245 + } 246 + 247 + static void clear_pasid_state(struct device_state *dev_state, int pasid) 248 + { 249 + struct pasid_state **ptr; 250 + unsigned long flags; 251 + 252 + spin_lock_irqsave(&dev_state->lock, flags); 253 + ptr = __get_pasid_state_ptr(dev_state, pasid, true); 254 + 255 + if (ptr == NULL) 256 + goto out_unlock; 257 + 258 + *ptr = NULL; 259 + 260 + out_unlock: 261 + spin_unlock_irqrestore(&dev_state->lock, flags); 262 + } 263 + 264 + static struct pasid_state *get_pasid_state(struct device_state *dev_state, 265 + int pasid) 266 + { 267 + struct pasid_state **ptr, *ret = NULL; 268 + unsigned long flags; 269 + 270 + spin_lock_irqsave(&dev_state->lock, flags); 271 + ptr = __get_pasid_state_ptr(dev_state, pasid, false); 272 + 273 + if (ptr == NULL) 274 + goto out_unlock; 275 + 276 + ret = *ptr; 277 + if (ret) 278 + atomic_inc(&ret->count); 279 + 280 + out_unlock: 281 + spin_unlock_irqrestore(&dev_state->lock, flags); 282 + 283 + return ret; 284 + } 285 + 286 + static void free_pasid_state(struct pasid_state *pasid_state) 287 + { 288 + kfree(pasid_state); 289 + } 290 + 291 + static void put_pasid_state(struct pasid_state *pasid_state) 292 + { 293 + if (atomic_dec_and_test(&pasid_state->count)) { 294 + put_device_state(pasid_state->device_state); 295 + wake_up(&pasid_state->wq); 296 + } 297 + } 298 + 299 + static void put_pasid_state_wait(struct pasid_state *pasid_state) 300 + { 301 + DEFINE_WAIT(wait); 302 + 303 + prepare_to_wait(&pasid_state->wq, &wait, TASK_UNINTERRUPTIBLE); 304 + 305 + if (atomic_dec_and_test(&pasid_state->count)) 306 + put_device_state(pasid_state->device_state); 307 + else 308 + schedule(); 309 + 310 + finish_wait(&pasid_state->wq, &wait); 311 + mmput(pasid_state->mm); 312 + free_pasid_state(pasid_state); 313 + } 314 + 315 + static void __unbind_pasid(struct pasid_state *pasid_state) 316 + { 317 + struct iommu_domain *domain; 318 + 319 + domain = pasid_state->device_state->domain; 320 + 321 + amd_iommu_domain_clear_gcr3(domain, pasid_state->pasid); 322 + clear_pasid_state(pasid_state->device_state, pasid_state->pasid); 323 + 324 + /* Make sure no more pending faults are in the queue */ 325 + flush_workqueue(iommu_wq); 326 + 327 + mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm); 328 + 329 + put_pasid_state(pasid_state); /* Reference taken in bind() function */ 330 + } 331 + 332 + static void unbind_pasid(struct device_state *dev_state, int pasid) 333 + { 334 + struct pasid_state *pasid_state; 335 + 336 + pasid_state = get_pasid_state(dev_state, pasid); 337 + if (pasid_state == NULL) 338 + return; 339 + 340 + unlink_pasid_state(pasid_state); 341 + __unbind_pasid(pasid_state); 342 + put_pasid_state_wait(pasid_state); /* Reference taken in this function */ 343 + } 344 + 345 + static void free_pasid_states_level1(struct pasid_state **tbl) 346 + { 347 + int i; 348 + 349 + for (i = 0; i < 512; ++i) { 350 + if (tbl[i] == NULL) 351 + continue; 352 + 353 + free_page((unsigned long)tbl[i]); 354 + } 355 + } 356 + 357 + static void free_pasid_states_level2(struct pasid_state **tbl) 358 + { 359 + struct pasid_state **ptr; 360 + int i; 361 + 362 + for (i = 0; i < 512; ++i) { 363 + if (tbl[i] == NULL) 364 + continue; 365 + 366 + ptr = (struct pasid_state **)tbl[i]; 367 + free_pasid_states_level1(ptr); 368 + } 369 + } 370 + 371 + static void free_pasid_states(struct device_state *dev_state) 372 + { 373 + struct pasid_state *pasid_state; 374 + int i; 375 + 376 + for (i = 0; i < dev_state->max_pasids; ++i) { 377 + pasid_state = get_pasid_state(dev_state, i); 378 + if (pasid_state == NULL) 379 + continue; 380 + 381 + put_pasid_state(pasid_state); 382 + unbind_pasid(dev_state, i); 383 + } 384 + 385 + if (dev_state->pasid_levels == 2) 386 + free_pasid_states_level2(dev_state->states); 387 + else if (dev_state->pasid_levels == 1) 388 + free_pasid_states_level1(dev_state->states); 389 + else if (dev_state->pasid_levels != 0) 390 + BUG(); 391 + 392 + free_page((unsigned long)dev_state->states); 393 + } 394 + 395 + static struct pasid_state *mn_to_state(struct mmu_notifier *mn) 396 + { 397 + return container_of(mn, struct pasid_state, mn); 398 + } 399 + 400 + static void __mn_flush_page(struct mmu_notifier *mn, 401 + unsigned long address) 402 + { 403 + struct pasid_state *pasid_state; 404 + struct device_state *dev_state; 405 + 406 + pasid_state = mn_to_state(mn); 407 + dev_state = pasid_state->device_state; 408 + 409 + amd_iommu_flush_page(dev_state->domain, pasid_state->pasid, address); 410 + } 411 + 412 + static int mn_clear_flush_young(struct mmu_notifier *mn, 413 + struct mm_struct *mm, 414 + unsigned long address) 415 + { 416 + __mn_flush_page(mn, address); 417 + 418 + return 0; 419 + } 420 + 421 + static void mn_change_pte(struct mmu_notifier *mn, 422 + struct mm_struct *mm, 423 + unsigned long address, 424 + pte_t pte) 425 + { 426 + __mn_flush_page(mn, address); 427 + } 428 + 429 + static void mn_invalidate_page(struct mmu_notifier *mn, 430 + struct mm_struct *mm, 431 + unsigned long address) 432 + { 433 + __mn_flush_page(mn, address); 434 + } 435 + 436 + static void mn_invalidate_range_start(struct mmu_notifier *mn, 437 + struct mm_struct *mm, 438 + unsigned long start, unsigned long end) 439 + { 440 + struct pasid_state *pasid_state; 441 + struct device_state *dev_state; 442 + 443 + pasid_state = mn_to_state(mn); 444 + dev_state = pasid_state->device_state; 445 + 446 + amd_iommu_domain_set_gcr3(dev_state->domain, pasid_state->pasid, 447 + __pa(empty_page_table)); 448 + } 449 + 450 + static void mn_invalidate_range_end(struct mmu_notifier *mn, 451 + struct mm_struct *mm, 452 + unsigned long start, unsigned long end) 453 + { 454 + struct pasid_state *pasid_state; 455 + struct device_state *dev_state; 456 + 457 + pasid_state = mn_to_state(mn); 458 + dev_state = pasid_state->device_state; 459 + 460 + amd_iommu_domain_set_gcr3(dev_state->domain, pasid_state->pasid, 461 + __pa(pasid_state->mm->pgd)); 462 + } 463 + 464 + static struct mmu_notifier_ops iommu_mn = { 465 + .clear_flush_young = mn_clear_flush_young, 466 + .change_pte = mn_change_pte, 467 + .invalidate_page = mn_invalidate_page, 468 + .invalidate_range_start = mn_invalidate_range_start, 469 + .invalidate_range_end = mn_invalidate_range_end, 470 + }; 471 + 472 + static void set_pri_tag_status(struct pasid_state *pasid_state, 473 + u16 tag, int status) 474 + { 475 + unsigned long flags; 476 + 477 + spin_lock_irqsave(&pasid_state->lock, flags); 478 + pasid_state->pri[tag].status = status; 479 + spin_unlock_irqrestore(&pasid_state->lock, flags); 480 + } 481 + 482 + static void finish_pri_tag(struct device_state *dev_state, 483 + struct pasid_state *pasid_state, 484 + u16 tag) 485 + { 486 + unsigned long flags; 487 + 488 + spin_lock_irqsave(&pasid_state->lock, flags); 489 + if (atomic_dec_and_test(&pasid_state->pri[tag].inflight) && 490 + pasid_state->pri[tag].finish) { 491 + amd_iommu_complete_ppr(dev_state->pdev, pasid_state->pasid, 492 + pasid_state->pri[tag].status, tag); 493 + pasid_state->pri[tag].finish = false; 494 + pasid_state->pri[tag].status = PPR_SUCCESS; 495 + } 496 + spin_unlock_irqrestore(&pasid_state->lock, flags); 497 + } 498 + 499 + static void do_fault(struct work_struct *work) 500 + { 501 + struct fault *fault = container_of(work, struct fault, work); 502 + int npages, write; 503 + struct page *page; 504 + 505 + write = !!(fault->flags & PPR_FAULT_WRITE); 506 + 507 + npages = get_user_pages(fault->state->task, fault->state->mm, 508 + fault->address, 1, write, 0, &page, NULL); 509 + 510 + if (npages == 1) { 511 + put_page(page); 512 + } else if (fault->dev_state->inv_ppr_cb) { 513 + int status; 514 + 515 + status = fault->dev_state->inv_ppr_cb(fault->dev_state->pdev, 516 + fault->pasid, 517 + fault->address, 518 + fault->flags); 519 + switch (status) { 520 + case AMD_IOMMU_INV_PRI_RSP_SUCCESS: 521 + set_pri_tag_status(fault->state, fault->tag, PPR_SUCCESS); 522 + break; 523 + case AMD_IOMMU_INV_PRI_RSP_INVALID: 524 + set_pri_tag_status(fault->state, fault->tag, PPR_INVALID); 525 + break; 526 + case AMD_IOMMU_INV_PRI_RSP_FAIL: 527 + set_pri_tag_status(fault->state, fault->tag, PPR_FAILURE); 528 + break; 529 + default: 530 + BUG(); 531 + } 532 + } else { 533 + set_pri_tag_status(fault->state, fault->tag, PPR_INVALID); 534 + } 535 + 536 + finish_pri_tag(fault->dev_state, fault->state, fault->tag); 537 + 538 + put_pasid_state(fault->state); 539 + 540 + kfree(fault); 541 + } 542 + 543 + static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data) 544 + { 545 + struct amd_iommu_fault *iommu_fault; 546 + struct pasid_state *pasid_state; 547 + struct device_state *dev_state; 548 + unsigned long flags; 549 + struct fault *fault; 550 + bool finish; 551 + u16 tag; 552 + int ret; 553 + 554 + iommu_fault = data; 555 + tag = iommu_fault->tag & 0x1ff; 556 + finish = (iommu_fault->tag >> 9) & 1; 557 + 558 + ret = NOTIFY_DONE; 559 + dev_state = get_device_state(iommu_fault->device_id); 560 + if (dev_state == NULL) 561 + goto out; 562 + 563 + pasid_state = get_pasid_state(dev_state, iommu_fault->pasid); 564 + if (pasid_state == NULL) { 565 + /* We know the device but not the PASID -> send INVALID */ 566 + amd_iommu_complete_ppr(dev_state->pdev, iommu_fault->pasid, 567 + PPR_INVALID, tag); 568 + goto out_drop_state; 569 + } 570 + 571 + spin_lock_irqsave(&pasid_state->lock, flags); 572 + atomic_inc(&pasid_state->pri[tag].inflight); 573 + if (finish) 574 + pasid_state->pri[tag].finish = true; 575 + spin_unlock_irqrestore(&pasid_state->lock, flags); 576 + 577 + fault = kzalloc(sizeof(*fault), GFP_ATOMIC); 578 + if (fault == NULL) { 579 + /* We are OOM - send success and let the device re-fault */ 580 + finish_pri_tag(dev_state, pasid_state, tag); 581 + goto out_drop_state; 582 + } 583 + 584 + fault->dev_state = dev_state; 585 + fault->address = iommu_fault->address; 586 + fault->state = pasid_state; 587 + fault->tag = tag; 588 + fault->finish = finish; 589 + fault->flags = iommu_fault->flags; 590 + INIT_WORK(&fault->work, do_fault); 591 + 592 + queue_work(iommu_wq, &fault->work); 593 + 594 + ret = NOTIFY_OK; 595 + 596 + out_drop_state: 597 + put_device_state(dev_state); 598 + 599 + out: 600 + return ret; 601 + } 602 + 603 + static struct notifier_block ppr_nb = { 604 + .notifier_call = ppr_notifier, 605 + }; 606 + 607 + static int task_exit(struct notifier_block *nb, unsigned long e, void *data) 608 + { 609 + struct pasid_state *pasid_state; 610 + struct task_struct *task; 611 + 612 + task = data; 613 + 614 + /* 615 + * Using this notifier is a hack - but there is no other choice 616 + * at the moment. What I really want is a sleeping notifier that 617 + * is called when an MM goes down. But such a notifier doesn't 618 + * exist yet. The notifier needs to sleep because it has to make 619 + * sure that the device does not use the PASID and the address 620 + * space anymore before it is destroyed. This includes waiting 621 + * for pending PRI requests to pass the workqueue. The 622 + * MMU-Notifiers would be a good fit, but they use RCU and so 623 + * they are not allowed to sleep. Lets see how we can solve this 624 + * in a more intelligent way in the future. 625 + */ 626 + again: 627 + spin_lock(&ps_lock); 628 + list_for_each_entry(pasid_state, &pasid_state_list, list) { 629 + struct device_state *dev_state; 630 + int pasid; 631 + 632 + if (pasid_state->task != task) 633 + continue; 634 + 635 + /* Drop Lock and unbind */ 636 + spin_unlock(&ps_lock); 637 + 638 + dev_state = pasid_state->device_state; 639 + pasid = pasid_state->pasid; 640 + 641 + if (pasid_state->device_state->inv_ctx_cb) 642 + dev_state->inv_ctx_cb(dev_state->pdev, pasid); 643 + 644 + unbind_pasid(dev_state, pasid); 645 + 646 + /* Task may be in the list multiple times */ 647 + goto again; 648 + } 649 + spin_unlock(&ps_lock); 650 + 651 + return NOTIFY_OK; 652 + } 653 + 654 + int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid, 655 + struct task_struct *task) 656 + { 657 + struct pasid_state *pasid_state; 658 + struct device_state *dev_state; 659 + u16 devid; 660 + int ret; 661 + 662 + might_sleep(); 663 + 664 + if (!amd_iommu_v2_supported()) 665 + return -ENODEV; 666 + 667 + devid = device_id(pdev); 668 + dev_state = get_device_state(devid); 669 + 670 + if (dev_state == NULL) 671 + return -EINVAL; 672 + 673 + ret = -EINVAL; 674 + if (pasid < 0 || pasid >= dev_state->max_pasids) 675 + goto out; 676 + 677 + ret = -ENOMEM; 678 + pasid_state = kzalloc(sizeof(*pasid_state), GFP_KERNEL); 679 + if (pasid_state == NULL) 680 + goto out; 681 + 682 + atomic_set(&pasid_state->count, 1); 683 + init_waitqueue_head(&pasid_state->wq); 684 + pasid_state->task = task; 685 + pasid_state->mm = get_task_mm(task); 686 + pasid_state->device_state = dev_state; 687 + pasid_state->pasid = pasid; 688 + pasid_state->mn.ops = &iommu_mn; 689 + 690 + if (pasid_state->mm == NULL) 691 + goto out_free; 692 + 693 + mmu_notifier_register(&pasid_state->mn, pasid_state->mm); 694 + 695 + ret = set_pasid_state(dev_state, pasid_state, pasid); 696 + if (ret) 697 + goto out_unregister; 698 + 699 + ret = amd_iommu_domain_set_gcr3(dev_state->domain, pasid, 700 + __pa(pasid_state->mm->pgd)); 701 + if (ret) 702 + goto out_clear_state; 703 + 704 + link_pasid_state(pasid_state); 705 + 706 + return 0; 707 + 708 + out_clear_state: 709 + clear_pasid_state(dev_state, pasid); 710 + 711 + out_unregister: 712 + mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm); 713 + 714 + out_free: 715 + free_pasid_state(pasid_state); 716 + 717 + out: 718 + put_device_state(dev_state); 719 + 720 + return ret; 721 + } 722 + EXPORT_SYMBOL(amd_iommu_bind_pasid); 723 + 724 + void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid) 725 + { 726 + struct device_state *dev_state; 727 + u16 devid; 728 + 729 + might_sleep(); 730 + 731 + if (!amd_iommu_v2_supported()) 732 + return; 733 + 734 + devid = device_id(pdev); 735 + dev_state = get_device_state(devid); 736 + if (dev_state == NULL) 737 + return; 738 + 739 + if (pasid < 0 || pasid >= dev_state->max_pasids) 740 + goto out; 741 + 742 + unbind_pasid(dev_state, pasid); 743 + 744 + out: 745 + put_device_state(dev_state); 746 + } 747 + EXPORT_SYMBOL(amd_iommu_unbind_pasid); 748 + 749 + int amd_iommu_init_device(struct pci_dev *pdev, int pasids) 750 + { 751 + struct device_state *dev_state; 752 + unsigned long flags; 753 + int ret, tmp; 754 + u16 devid; 755 + 756 + might_sleep(); 757 + 758 + if (!amd_iommu_v2_supported()) 759 + return -ENODEV; 760 + 761 + if (pasids <= 0 || pasids > (PASID_MASK + 1)) 762 + return -EINVAL; 763 + 764 + devid = device_id(pdev); 765 + 766 + dev_state = kzalloc(sizeof(*dev_state), GFP_KERNEL); 767 + if (dev_state == NULL) 768 + return -ENOMEM; 769 + 770 + spin_lock_init(&dev_state->lock); 771 + init_waitqueue_head(&dev_state->wq); 772 + dev_state->pdev = pdev; 773 + 774 + tmp = pasids; 775 + for (dev_state->pasid_levels = 0; (tmp - 1) & ~0x1ff; tmp >>= 9) 776 + dev_state->pasid_levels += 1; 777 + 778 + atomic_set(&dev_state->count, 1); 779 + dev_state->max_pasids = pasids; 780 + 781 + ret = -ENOMEM; 782 + dev_state->states = (void *)get_zeroed_page(GFP_KERNEL); 783 + if (dev_state->states == NULL) 784 + goto out_free_dev_state; 785 + 786 + dev_state->domain = iommu_domain_alloc(&pci_bus_type); 787 + if (dev_state->domain == NULL) 788 + goto out_free_states; 789 + 790 + amd_iommu_domain_direct_map(dev_state->domain); 791 + 792 + ret = amd_iommu_domain_enable_v2(dev_state->domain, pasids); 793 + if (ret) 794 + goto out_free_domain; 795 + 796 + ret = iommu_attach_device(dev_state->domain, &pdev->dev); 797 + if (ret != 0) 798 + goto out_free_domain; 799 + 800 + spin_lock_irqsave(&state_lock, flags); 801 + 802 + if (state_table[devid] != NULL) { 803 + spin_unlock_irqrestore(&state_lock, flags); 804 + ret = -EBUSY; 805 + goto out_free_domain; 806 + } 807 + 808 + state_table[devid] = dev_state; 809 + 810 + spin_unlock_irqrestore(&state_lock, flags); 811 + 812 + return 0; 813 + 814 + out_free_domain: 815 + iommu_domain_free(dev_state->domain); 816 + 817 + out_free_states: 818 + free_page((unsigned long)dev_state->states); 819 + 820 + out_free_dev_state: 821 + kfree(dev_state); 822 + 823 + return ret; 824 + } 825 + EXPORT_SYMBOL(amd_iommu_init_device); 826 + 827 + void amd_iommu_free_device(struct pci_dev *pdev) 828 + { 829 + struct device_state *dev_state; 830 + unsigned long flags; 831 + u16 devid; 832 + 833 + if (!amd_iommu_v2_supported()) 834 + return; 835 + 836 + devid = device_id(pdev); 837 + 838 + spin_lock_irqsave(&state_lock, flags); 839 + 840 + dev_state = state_table[devid]; 841 + if (dev_state == NULL) { 842 + spin_unlock_irqrestore(&state_lock, flags); 843 + return; 844 + } 845 + 846 + state_table[devid] = NULL; 847 + 848 + spin_unlock_irqrestore(&state_lock, flags); 849 + 850 + /* Get rid of any remaining pasid states */ 851 + free_pasid_states(dev_state); 852 + 853 + put_device_state_wait(dev_state); 854 + } 855 + EXPORT_SYMBOL(amd_iommu_free_device); 856 + 857 + int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev, 858 + amd_iommu_invalid_ppr_cb cb) 859 + { 860 + struct device_state *dev_state; 861 + unsigned long flags; 862 + u16 devid; 863 + int ret; 864 + 865 + if (!amd_iommu_v2_supported()) 866 + return -ENODEV; 867 + 868 + devid = device_id(pdev); 869 + 870 + spin_lock_irqsave(&state_lock, flags); 871 + 872 + ret = -EINVAL; 873 + dev_state = state_table[devid]; 874 + if (dev_state == NULL) 875 + goto out_unlock; 876 + 877 + dev_state->inv_ppr_cb = cb; 878 + 879 + ret = 0; 880 + 881 + out_unlock: 882 + spin_unlock_irqrestore(&state_lock, flags); 883 + 884 + return ret; 885 + } 886 + EXPORT_SYMBOL(amd_iommu_set_invalid_ppr_cb); 887 + 888 + int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev, 889 + amd_iommu_invalidate_ctx cb) 890 + { 891 + struct device_state *dev_state; 892 + unsigned long flags; 893 + u16 devid; 894 + int ret; 895 + 896 + if (!amd_iommu_v2_supported()) 897 + return -ENODEV; 898 + 899 + devid = device_id(pdev); 900 + 901 + spin_lock_irqsave(&state_lock, flags); 902 + 903 + ret = -EINVAL; 904 + dev_state = state_table[devid]; 905 + if (dev_state == NULL) 906 + goto out_unlock; 907 + 908 + dev_state->inv_ctx_cb = cb; 909 + 910 + ret = 0; 911 + 912 + out_unlock: 913 + spin_unlock_irqrestore(&state_lock, flags); 914 + 915 + return ret; 916 + } 917 + EXPORT_SYMBOL(amd_iommu_set_invalidate_ctx_cb); 918 + 919 + static int __init amd_iommu_v2_init(void) 920 + { 921 + size_t state_table_size; 922 + int ret; 923 + 924 + pr_info("AMD IOMMUv2 driver by Joerg Roedel <joerg.roedel@amd.com>"); 925 + 926 + spin_lock_init(&state_lock); 927 + 928 + state_table_size = MAX_DEVICES * sizeof(struct device_state *); 929 + state_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 930 + get_order(state_table_size)); 931 + if (state_table == NULL) 932 + return -ENOMEM; 933 + 934 + ret = -ENOMEM; 935 + iommu_wq = create_workqueue("amd_iommu_v2"); 936 + if (iommu_wq == NULL) 937 + goto out_free; 938 + 939 + ret = -ENOMEM; 940 + empty_page_table = (u64 *)get_zeroed_page(GFP_KERNEL); 941 + if (empty_page_table == NULL) 942 + goto out_destroy_wq; 943 + 944 + amd_iommu_register_ppr_notifier(&ppr_nb); 945 + profile_event_register(PROFILE_TASK_EXIT, &profile_nb); 946 + 947 + return 0; 948 + 949 + out_destroy_wq: 950 + destroy_workqueue(iommu_wq); 951 + 952 + out_free: 953 + free_pages((unsigned long)state_table, get_order(state_table_size)); 954 + 955 + return ret; 956 + } 957 + 958 + static void __exit amd_iommu_v2_exit(void) 959 + { 960 + struct device_state *dev_state; 961 + size_t state_table_size; 962 + int i; 963 + 964 + profile_event_unregister(PROFILE_TASK_EXIT, &profile_nb); 965 + amd_iommu_unregister_ppr_notifier(&ppr_nb); 966 + 967 + flush_workqueue(iommu_wq); 968 + 969 + /* 970 + * The loop below might call flush_workqueue(), so call 971 + * destroy_workqueue() after it 972 + */ 973 + for (i = 0; i < MAX_DEVICES; ++i) { 974 + dev_state = get_device_state(i); 975 + 976 + if (dev_state == NULL) 977 + continue; 978 + 979 + WARN_ON_ONCE(1); 980 + 981 + put_device_state(dev_state); 982 + amd_iommu_free_device(dev_state->pdev); 983 + } 984 + 985 + destroy_workqueue(iommu_wq); 986 + 987 + state_table_size = MAX_DEVICES * sizeof(struct device_state *); 988 + free_pages((unsigned long)state_table, get_order(state_table_size)); 989 + 990 + free_page((unsigned long)empty_page_table); 991 + } 992 + 993 + module_init(amd_iommu_v2_init); 994 + module_exit(amd_iommu_v2_exit);
+23 -7
drivers/iommu/intel-iommu.c
··· 78 78 #define LEVEL_STRIDE (9) 79 79 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1) 80 80 81 + /* 82 + * This bitmap is used to advertise the page sizes our hardware support 83 + * to the IOMMU core, which will then use this information to split 84 + * physically contiguous memory regions it is mapping into page sizes 85 + * that we support. 86 + * 87 + * Traditionally the IOMMU core just handed us the mappings directly, 88 + * after making sure the size is an order of a 4KiB page and that the 89 + * mapping has natural alignment. 90 + * 91 + * To retain this behavior, we currently advertise that we support 92 + * all page sizes that are an order of 4KiB. 93 + * 94 + * If at some point we'd like to utilize the IOMMU core's new behavior, 95 + * we could change this to advertise the real page sizes we support. 96 + */ 97 + #define INTEL_IOMMU_PGSIZES (~0xFFFUL) 98 + 81 99 static inline int agaw_to_level(int agaw) 82 100 { 83 101 return agaw + 2; ··· 4002 3984 4003 3985 static int intel_iommu_map(struct iommu_domain *domain, 4004 3986 unsigned long iova, phys_addr_t hpa, 4005 - int gfp_order, int iommu_prot) 3987 + size_t size, int iommu_prot) 4006 3988 { 4007 3989 struct dmar_domain *dmar_domain = domain->priv; 4008 3990 u64 max_addr; 4009 3991 int prot = 0; 4010 - size_t size; 4011 3992 int ret; 4012 3993 4013 3994 if (iommu_prot & IOMMU_READ) ··· 4016 3999 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping) 4017 4000 prot |= DMA_PTE_SNP; 4018 4001 4019 - size = PAGE_SIZE << gfp_order; 4020 4002 max_addr = iova + size; 4021 4003 if (dmar_domain->max_addr < max_addr) { 4022 4004 u64 end; ··· 4038 4022 return ret; 4039 4023 } 4040 4024 4041 - static int intel_iommu_unmap(struct iommu_domain *domain, 4042 - unsigned long iova, int gfp_order) 4025 + static size_t intel_iommu_unmap(struct iommu_domain *domain, 4026 + unsigned long iova, size_t size) 4043 4027 { 4044 4028 struct dmar_domain *dmar_domain = domain->priv; 4045 - size_t size = PAGE_SIZE << gfp_order; 4046 4029 int order; 4047 4030 4048 4031 order = dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT, ··· 4050 4035 if (dmar_domain->max_addr == iova + size) 4051 4036 dmar_domain->max_addr = iova; 4052 4037 4053 - return order; 4038 + return PAGE_SIZE << order; 4054 4039 } 4055 4040 4056 4041 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, ··· 4089 4074 .unmap = intel_iommu_unmap, 4090 4075 .iova_to_phys = intel_iommu_iova_to_phys, 4091 4076 .domain_has_cap = intel_iommu_domain_has_cap, 4077 + .pgsize_bitmap = INTEL_IOMMU_PGSIZES, 4092 4078 }; 4093 4079 4094 4080 static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
+105 -10
drivers/iommu/iommu.c
··· 16 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 17 */ 18 18 19 + #define pr_fmt(fmt) "%s: " fmt, __func__ 20 + 19 21 #include <linux/device.h> 20 22 #include <linux/kernel.h> 21 23 #include <linux/bug.h> ··· 159 157 EXPORT_SYMBOL_GPL(iommu_domain_has_cap); 160 158 161 159 int iommu_map(struct iommu_domain *domain, unsigned long iova, 162 - phys_addr_t paddr, int gfp_order, int prot) 160 + phys_addr_t paddr, size_t size, int prot) 163 161 { 164 - size_t size; 162 + unsigned long orig_iova = iova; 163 + unsigned int min_pagesz; 164 + size_t orig_size = size; 165 + int ret = 0; 165 166 166 167 if (unlikely(domain->ops->map == NULL)) 167 168 return -ENODEV; 168 169 169 - size = PAGE_SIZE << gfp_order; 170 + /* find out the minimum page size supported */ 171 + min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap); 170 172 171 - BUG_ON(!IS_ALIGNED(iova | paddr, size)); 173 + /* 174 + * both the virtual address and the physical one, as well as 175 + * the size of the mapping, must be aligned (at least) to the 176 + * size of the smallest page supported by the hardware 177 + */ 178 + if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) { 179 + pr_err("unaligned: iova 0x%lx pa 0x%lx size 0x%lx min_pagesz " 180 + "0x%x\n", iova, (unsigned long)paddr, 181 + (unsigned long)size, min_pagesz); 182 + return -EINVAL; 183 + } 172 184 173 - return domain->ops->map(domain, iova, paddr, gfp_order, prot); 185 + pr_debug("map: iova 0x%lx pa 0x%lx size 0x%lx\n", iova, 186 + (unsigned long)paddr, (unsigned long)size); 187 + 188 + while (size) { 189 + unsigned long pgsize, addr_merge = iova | paddr; 190 + unsigned int pgsize_idx; 191 + 192 + /* Max page size that still fits into 'size' */ 193 + pgsize_idx = __fls(size); 194 + 195 + /* need to consider alignment requirements ? */ 196 + if (likely(addr_merge)) { 197 + /* Max page size allowed by both iova and paddr */ 198 + unsigned int align_pgsize_idx = __ffs(addr_merge); 199 + 200 + pgsize_idx = min(pgsize_idx, align_pgsize_idx); 201 + } 202 + 203 + /* build a mask of acceptable page sizes */ 204 + pgsize = (1UL << (pgsize_idx + 1)) - 1; 205 + 206 + /* throw away page sizes not supported by the hardware */ 207 + pgsize &= domain->ops->pgsize_bitmap; 208 + 209 + /* make sure we're still sane */ 210 + BUG_ON(!pgsize); 211 + 212 + /* pick the biggest page */ 213 + pgsize_idx = __fls(pgsize); 214 + pgsize = 1UL << pgsize_idx; 215 + 216 + pr_debug("mapping: iova 0x%lx pa 0x%lx pgsize %lu\n", iova, 217 + (unsigned long)paddr, pgsize); 218 + 219 + ret = domain->ops->map(domain, iova, paddr, pgsize, prot); 220 + if (ret) 221 + break; 222 + 223 + iova += pgsize; 224 + paddr += pgsize; 225 + size -= pgsize; 226 + } 227 + 228 + /* unroll mapping in case something went wrong */ 229 + if (ret) 230 + iommu_unmap(domain, orig_iova, orig_size - size); 231 + 232 + return ret; 174 233 } 175 234 EXPORT_SYMBOL_GPL(iommu_map); 176 235 177 - int iommu_unmap(struct iommu_domain *domain, unsigned long iova, int gfp_order) 236 + size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) 178 237 { 179 - size_t size; 238 + size_t unmapped_page, unmapped = 0; 239 + unsigned int min_pagesz; 180 240 181 241 if (unlikely(domain->ops->unmap == NULL)) 182 242 return -ENODEV; 183 243 184 - size = PAGE_SIZE << gfp_order; 244 + /* find out the minimum page size supported */ 245 + min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap); 185 246 186 - BUG_ON(!IS_ALIGNED(iova, size)); 247 + /* 248 + * The virtual address, as well as the size of the mapping, must be 249 + * aligned (at least) to the size of the smallest page supported 250 + * by the hardware 251 + */ 252 + if (!IS_ALIGNED(iova | size, min_pagesz)) { 253 + pr_err("unaligned: iova 0x%lx size 0x%lx min_pagesz 0x%x\n", 254 + iova, (unsigned long)size, min_pagesz); 255 + return -EINVAL; 256 + } 187 257 188 - return domain->ops->unmap(domain, iova, gfp_order); 258 + pr_debug("unmap this: iova 0x%lx size 0x%lx\n", iova, 259 + (unsigned long)size); 260 + 261 + /* 262 + * Keep iterating until we either unmap 'size' bytes (or more) 263 + * or we hit an area that isn't mapped. 264 + */ 265 + while (unmapped < size) { 266 + size_t left = size - unmapped; 267 + 268 + unmapped_page = domain->ops->unmap(domain, iova, left); 269 + if (!unmapped_page) 270 + break; 271 + 272 + pr_debug("unmapped: iova 0x%lx size %lx\n", iova, 273 + (unsigned long)unmapped_page); 274 + 275 + iova += unmapped_page; 276 + unmapped += unmapped_page; 277 + } 278 + 279 + return unmapped; 189 280 } 190 281 EXPORT_SYMBOL_GPL(iommu_unmap);
+12 -13
drivers/iommu/msm_iommu.c
··· 42 42 #define RCP15_PRRR(reg) MRC(reg, p15, 0, c10, c2, 0) 43 43 #define RCP15_NMRR(reg) MRC(reg, p15, 0, c10, c2, 1) 44 44 45 + /* bitmap of the page sizes currently supported */ 46 + #define MSM_IOMMU_PGSIZES (SZ_4K | SZ_64K | SZ_1M | SZ_16M) 47 + 45 48 static int msm_iommu_tex_class[4]; 46 49 47 50 DEFINE_SPINLOCK(msm_iommu_lock); ··· 355 352 } 356 353 357 354 static int msm_iommu_map(struct iommu_domain *domain, unsigned long va, 358 - phys_addr_t pa, int order, int prot) 355 + phys_addr_t pa, size_t len, int prot) 359 356 { 360 357 struct msm_priv *priv; 361 358 unsigned long flags; ··· 366 363 unsigned long *sl_pte; 367 364 unsigned long sl_offset; 368 365 unsigned int pgprot; 369 - size_t len = 0x1000UL << order; 370 366 int ret = 0, tex, sh; 371 367 372 368 spin_lock_irqsave(&msm_iommu_lock, flags); ··· 465 463 return ret; 466 464 } 467 465 468 - static int msm_iommu_unmap(struct iommu_domain *domain, unsigned long va, 469 - int order) 466 + static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long va, 467 + size_t len) 470 468 { 471 469 struct msm_priv *priv; 472 470 unsigned long flags; ··· 476 474 unsigned long *sl_table; 477 475 unsigned long *sl_pte; 478 476 unsigned long sl_offset; 479 - size_t len = 0x1000UL << order; 480 477 int i, ret = 0; 481 478 482 479 spin_lock_irqsave(&msm_iommu_lock, flags); ··· 545 544 546 545 ret = __flush_iotlb(domain); 547 546 548 - /* 549 - * the IOMMU API requires us to return the order of the unmapped 550 - * page (on success). 551 - */ 552 - if (!ret) 553 - ret = order; 554 547 fail: 555 548 spin_unlock_irqrestore(&msm_iommu_lock, flags); 556 - return ret; 549 + 550 + /* the IOMMU API requires us to return how many bytes were unmapped */ 551 + len = ret ? 0 : len; 552 + return len; 557 553 } 558 554 559 555 static phys_addr_t msm_iommu_iova_to_phys(struct iommu_domain *domain, ··· 682 684 .map = msm_iommu_map, 683 685 .unmap = msm_iommu_unmap, 684 686 .iova_to_phys = msm_iommu_iova_to_phys, 685 - .domain_has_cap = msm_iommu_domain_has_cap 687 + .domain_has_cap = msm_iommu_domain_has_cap, 688 + .pgsize_bitmap = MSM_IOMMU_PGSIZES, 686 689 }; 687 690 688 691 static int __init get_tex_class(int icp, int ocp, int mt, int nos)
+37 -43
drivers/iommu/omap-iommu.c
··· 33 33 (__i < (n)) && (cr = __iotlb_read_cr((obj), __i), true); \ 34 34 __i++) 35 35 36 + /* bitmap of the page sizes currently supported */ 37 + #define OMAP_IOMMU_PGSIZES (SZ_4K | SZ_64K | SZ_1M | SZ_16M) 38 + 36 39 /** 37 40 * struct omap_iommu_domain - omap iommu domain 38 41 * @pgtable: the page table ··· 89 86 90 87 /** 91 88 * omap_iommu_save_ctx - Save registers for pm off-mode support 92 - * @obj: target iommu 89 + * @dev: client device 93 90 **/ 94 - void omap_iommu_save_ctx(struct omap_iommu *obj) 91 + void omap_iommu_save_ctx(struct device *dev) 95 92 { 93 + struct omap_iommu *obj = dev_to_omap_iommu(dev); 94 + 96 95 arch_iommu->save_ctx(obj); 97 96 } 98 97 EXPORT_SYMBOL_GPL(omap_iommu_save_ctx); 99 98 100 99 /** 101 100 * omap_iommu_restore_ctx - Restore registers for pm off-mode support 102 - * @obj: target iommu 101 + * @dev: client device 103 102 **/ 104 - void omap_iommu_restore_ctx(struct omap_iommu *obj) 103 + void omap_iommu_restore_ctx(struct device *dev) 105 104 { 105 + struct omap_iommu *obj = dev_to_omap_iommu(dev); 106 + 106 107 arch_iommu->restore_ctx(obj); 107 108 } 108 109 EXPORT_SYMBOL_GPL(omap_iommu_restore_ctx); ··· 827 820 } 828 821 829 822 /** 830 - * omap_find_iommu_device() - find an omap iommu device by name 831 - * @name: name of the iommu device 832 - * 833 - * The generic iommu API requires the caller to provide the device 834 - * he wishes to attach to a certain iommu domain. 835 - * 836 - * Drivers generally should not bother with this as it should just 837 - * be taken care of by the DMA-API using dev_archdata. 838 - * 839 - * This function is provided as an interim solution until the latter 840 - * materializes, and omap3isp is fully migrated to the DMA-API. 841 - */ 842 - struct device *omap_find_iommu_device(const char *name) 843 - { 844 - return driver_find_device(&omap_iommu_driver.driver, NULL, 845 - (void *)name, 846 - device_match_by_alias); 847 - } 848 - EXPORT_SYMBOL_GPL(omap_find_iommu_device); 849 - 850 - /** 851 823 * omap_iommu_attach() - attach iommu device to an iommu domain 852 - * @dev: target omap iommu device 824 + * @name: name of target omap iommu device 853 825 * @iopgd: page table 854 826 **/ 855 - static struct omap_iommu *omap_iommu_attach(struct device *dev, u32 *iopgd) 827 + static struct omap_iommu *omap_iommu_attach(const char *name, u32 *iopgd) 856 828 { 857 829 int err = -ENOMEM; 858 - struct omap_iommu *obj = to_iommu(dev); 830 + struct device *dev; 831 + struct omap_iommu *obj; 832 + 833 + dev = driver_find_device(&omap_iommu_driver.driver, NULL, 834 + (void *)name, 835 + device_match_by_alias); 836 + if (!dev) 837 + return NULL; 838 + 839 + obj = to_iommu(dev); 859 840 860 841 spin_lock(&obj->iommu_lock); 861 842 ··· 1014 1019 } 1015 1020 1016 1021 static int omap_iommu_map(struct iommu_domain *domain, unsigned long da, 1017 - phys_addr_t pa, int order, int prot) 1022 + phys_addr_t pa, size_t bytes, int prot) 1018 1023 { 1019 1024 struct omap_iommu_domain *omap_domain = domain->priv; 1020 1025 struct omap_iommu *oiommu = omap_domain->iommu_dev; 1021 1026 struct device *dev = oiommu->dev; 1022 - size_t bytes = PAGE_SIZE << order; 1023 1027 struct iotlb_entry e; 1024 1028 int omap_pgsz; 1025 1029 u32 ret, flags; ··· 1043 1049 return ret; 1044 1050 } 1045 1051 1046 - static int omap_iommu_unmap(struct iommu_domain *domain, unsigned long da, 1047 - int order) 1052 + static size_t omap_iommu_unmap(struct iommu_domain *domain, unsigned long da, 1053 + size_t size) 1048 1054 { 1049 1055 struct omap_iommu_domain *omap_domain = domain->priv; 1050 1056 struct omap_iommu *oiommu = omap_domain->iommu_dev; 1051 1057 struct device *dev = oiommu->dev; 1052 - size_t unmap_size; 1053 1058 1054 - dev_dbg(dev, "unmapping da 0x%lx order %d\n", da, order); 1059 + dev_dbg(dev, "unmapping da 0x%lx size %u\n", da, size); 1055 1060 1056 - unmap_size = iopgtable_clear_entry(oiommu, da); 1057 - 1058 - return unmap_size ? get_order(unmap_size) : -EINVAL; 1061 + return iopgtable_clear_entry(oiommu, da); 1059 1062 } 1060 1063 1061 1064 static int ··· 1060 1069 { 1061 1070 struct omap_iommu_domain *omap_domain = domain->priv; 1062 1071 struct omap_iommu *oiommu; 1072 + struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; 1063 1073 int ret = 0; 1064 1074 1065 1075 spin_lock(&omap_domain->lock); ··· 1073 1081 } 1074 1082 1075 1083 /* get a handle to and enable the omap iommu */ 1076 - oiommu = omap_iommu_attach(dev, omap_domain->pgtable); 1084 + oiommu = omap_iommu_attach(arch_data->name, omap_domain->pgtable); 1077 1085 if (IS_ERR(oiommu)) { 1078 1086 ret = PTR_ERR(oiommu); 1079 1087 dev_err(dev, "can't get omap iommu: %d\n", ret); 1080 1088 goto out; 1081 1089 } 1082 1090 1083 - omap_domain->iommu_dev = oiommu; 1091 + omap_domain->iommu_dev = arch_data->iommu_dev = oiommu; 1084 1092 oiommu->domain = domain; 1085 1093 1086 1094 out: ··· 1092 1100 struct device *dev) 1093 1101 { 1094 1102 struct omap_iommu_domain *omap_domain = domain->priv; 1095 - struct omap_iommu *oiommu = to_iommu(dev); 1103 + struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; 1104 + struct omap_iommu *oiommu = dev_to_omap_iommu(dev); 1096 1105 1097 1106 spin_lock(&omap_domain->lock); 1098 1107 ··· 1107 1114 1108 1115 omap_iommu_detach(oiommu); 1109 1116 1110 - omap_domain->iommu_dev = NULL; 1117 + omap_domain->iommu_dev = arch_data->iommu_dev = NULL; 1111 1118 1112 1119 out: 1113 1120 spin_unlock(&omap_domain->lock); ··· 1176 1183 else if (iopte_is_large(*pte)) 1177 1184 ret = omap_iommu_translate(*pte, da, IOLARGE_MASK); 1178 1185 else 1179 - dev_err(dev, "bogus pte 0x%x", *pte); 1186 + dev_err(dev, "bogus pte 0x%x, da 0x%lx", *pte, da); 1180 1187 } else { 1181 1188 if (iopgd_is_section(*pgd)) 1182 1189 ret = omap_iommu_translate(*pgd, da, IOSECTION_MASK); 1183 1190 else if (iopgd_is_super(*pgd)) 1184 1191 ret = omap_iommu_translate(*pgd, da, IOSUPER_MASK); 1185 1192 else 1186 - dev_err(dev, "bogus pgd 0x%x", *pgd); 1193 + dev_err(dev, "bogus pgd 0x%x, da 0x%lx", *pgd, da); 1187 1194 } 1188 1195 1189 1196 return ret; ··· 1204 1211 .unmap = omap_iommu_unmap, 1205 1212 .iova_to_phys = omap_iommu_iova_to_phys, 1206 1213 .domain_has_cap = omap_iommu_domain_has_cap, 1214 + .pgsize_bitmap = OMAP_IOMMU_PGSIZES, 1207 1215 }; 1208 1216 1209 1217 static int __init omap_iommu_init(void)
+26 -22
drivers/iommu/omap-iovmm.c
··· 231 231 232 232 /** 233 233 * omap_find_iovm_area - find iovma which includes @da 234 + * @dev: client device 234 235 * @da: iommu device virtual address 235 236 * 236 237 * Find the existing iovma starting at @da 237 238 */ 238 - struct iovm_struct *omap_find_iovm_area(struct omap_iommu *obj, u32 da) 239 + struct iovm_struct *omap_find_iovm_area(struct device *dev, u32 da) 239 240 { 241 + struct omap_iommu *obj = dev_to_omap_iommu(dev); 240 242 struct iovm_struct *area; 241 243 242 244 mutex_lock(&obj->mmap_lock); ··· 345 343 346 344 /** 347 345 * omap_da_to_va - convert (d) to (v) 348 - * @obj: objective iommu 346 + * @dev: client device 349 347 * @da: iommu device virtual address 350 348 * @va: mpu virtual address 351 349 * 352 350 * Returns mpu virtual addr which corresponds to a given device virtual addr 353 351 */ 354 - void *omap_da_to_va(struct omap_iommu *obj, u32 da) 352 + void *omap_da_to_va(struct device *dev, u32 da) 355 353 { 354 + struct omap_iommu *obj = dev_to_omap_iommu(dev); 356 355 void *va = NULL; 357 356 struct iovm_struct *area; 358 357 ··· 413 410 unsigned int i, j; 414 411 struct scatterlist *sg; 415 412 u32 da = new->da_start; 416 - int order; 417 413 418 414 if (!domain || !sgt) 419 415 return -EINVAL; ··· 431 429 if (bytes_to_iopgsz(bytes) < 0) 432 430 goto err_out; 433 431 434 - order = get_order(bytes); 435 - 436 432 pr_debug("%s: [%d] %08x %08x(%x)\n", __func__, 437 433 i, da, pa, bytes); 438 434 439 - err = iommu_map(domain, da, pa, order, flags); 435 + err = iommu_map(domain, da, pa, bytes, flags); 440 436 if (err) 441 437 goto err_out; 442 438 ··· 449 449 size_t bytes; 450 450 451 451 bytes = sg->length + sg->offset; 452 - order = get_order(bytes); 453 452 454 453 /* ignore failures.. we're already handling one */ 455 - iommu_unmap(domain, da, order); 454 + iommu_unmap(domain, da, bytes); 456 455 457 456 da += bytes; 458 457 } ··· 466 467 size_t total = area->da_end - area->da_start; 467 468 const struct sg_table *sgt = area->sgt; 468 469 struct scatterlist *sg; 469 - int i, err; 470 + int i; 471 + size_t unmapped; 470 472 471 473 BUG_ON(!sgtable_ok(sgt)); 472 474 BUG_ON((!total) || !IS_ALIGNED(total, PAGE_SIZE)); ··· 475 475 start = area->da_start; 476 476 for_each_sg(sgt->sgl, sg, sgt->nents, i) { 477 477 size_t bytes; 478 - int order; 479 478 480 479 bytes = sg->length + sg->offset; 481 - order = get_order(bytes); 482 480 483 - err = iommu_unmap(domain, start, order); 484 - if (err < 0) 481 + unmapped = iommu_unmap(domain, start, bytes); 482 + if (unmapped < bytes) 485 483 break; 486 484 487 485 dev_dbg(obj->dev, "%s: unmap %08x(%x) %08x\n", ··· 580 582 581 583 /** 582 584 * omap_iommu_vmap - (d)-(p)-(v) address mapper 583 - * @obj: objective iommu 585 + * @domain: iommu domain 586 + * @dev: client device 584 587 * @sgt: address of scatter gather table 585 588 * @flags: iovma and page property 586 589 * 587 590 * Creates 1-n-1 mapping with given @sgt and returns @da. 588 591 * All @sgt element must be io page size aligned. 589 592 */ 590 - u32 omap_iommu_vmap(struct iommu_domain *domain, struct omap_iommu *obj, u32 da, 593 + u32 omap_iommu_vmap(struct iommu_domain *domain, struct device *dev, u32 da, 591 594 const struct sg_table *sgt, u32 flags) 592 595 { 596 + struct omap_iommu *obj = dev_to_omap_iommu(dev); 593 597 size_t bytes; 594 598 void *va = NULL; 595 599 ··· 622 622 623 623 /** 624 624 * omap_iommu_vunmap - release virtual mapping obtained by 'omap_iommu_vmap()' 625 - * @obj: objective iommu 625 + * @domain: iommu domain 626 + * @dev: client device 626 627 * @da: iommu device virtual address 627 628 * 628 629 * Free the iommu virtually contiguous memory area starting at 629 630 * @da, which was returned by 'omap_iommu_vmap()'. 630 631 */ 631 632 struct sg_table * 632 - omap_iommu_vunmap(struct iommu_domain *domain, struct omap_iommu *obj, u32 da) 633 + omap_iommu_vunmap(struct iommu_domain *domain, struct device *dev, u32 da) 633 634 { 635 + struct omap_iommu *obj = dev_to_omap_iommu(dev); 634 636 struct sg_table *sgt; 635 637 /* 636 638 * 'sgt' is allocated before 'omap_iommu_vmalloc()' is called. ··· 649 647 650 648 /** 651 649 * omap_iommu_vmalloc - (d)-(p)-(v) address allocator and mapper 652 - * @obj: objective iommu 650 + * @dev: client device 653 651 * @da: contiguous iommu virtual memory 654 652 * @bytes: allocation size 655 653 * @flags: iovma and page property ··· 658 656 * @da again, which might be adjusted if 'IOVMF_DA_FIXED' is not set. 659 657 */ 660 658 u32 661 - omap_iommu_vmalloc(struct iommu_domain *domain, struct omap_iommu *obj, u32 da, 659 + omap_iommu_vmalloc(struct iommu_domain *domain, struct device *dev, u32 da, 662 660 size_t bytes, u32 flags) 663 661 { 662 + struct omap_iommu *obj = dev_to_omap_iommu(dev); 664 663 void *va; 665 664 struct sg_table *sgt; 666 665 ··· 701 698 702 699 /** 703 700 * omap_iommu_vfree - release memory allocated by 'omap_iommu_vmalloc()' 704 - * @obj: objective iommu 701 + * @dev: client device 705 702 * @da: iommu device virtual address 706 703 * 707 704 * Frees the iommu virtually continuous memory area starting at 708 705 * @da, as obtained from 'omap_iommu_vmalloc()'. 709 706 */ 710 - void omap_iommu_vfree(struct iommu_domain *domain, struct omap_iommu *obj, 707 + void omap_iommu_vfree(struct iommu_domain *domain, struct device *dev, 711 708 const u32 da) 712 709 { 710 + struct omap_iommu *obj = dev_to_omap_iommu(dev); 713 711 struct sg_table *sgt; 714 712 715 713 sgt = unmap_vm_area(domain, obj, da, vfree,
+5 -25
drivers/media/video/omap3isp/isp.c
··· 80 80 #include "isph3a.h" 81 81 #include "isphist.h" 82 82 83 - /* 84 - * this is provided as an interim solution until omap3isp doesn't need 85 - * any omap-specific iommu API 86 - */ 87 - #define to_iommu(dev) \ 88 - (struct omap_iommu *)platform_get_drvdata(to_platform_device(dev)) 89 - 90 83 static unsigned int autoidle; 91 84 module_param(autoidle, int, 0444); 92 85 MODULE_PARM_DESC(autoidle, "Enable OMAP3ISP AUTOIDLE support"); ··· 1107 1114 static void isp_save_ctx(struct isp_device *isp) 1108 1115 { 1109 1116 isp_save_context(isp, isp_reg_list); 1110 - if (isp->iommu) 1111 - omap_iommu_save_ctx(isp->iommu); 1117 + omap_iommu_save_ctx(isp->dev); 1112 1118 } 1113 1119 1114 1120 /* ··· 1120 1128 static void isp_restore_ctx(struct isp_device *isp) 1121 1129 { 1122 1130 isp_restore_context(isp, isp_reg_list); 1123 - if (isp->iommu) 1124 - omap_iommu_restore_ctx(isp->iommu); 1131 + omap_iommu_restore_ctx(isp->dev); 1125 1132 omap3isp_ccdc_restore_context(isp); 1126 1133 omap3isp_preview_restore_context(isp); 1127 1134 } ··· 1974 1983 isp_cleanup_modules(isp); 1975 1984 1976 1985 omap3isp_get(isp); 1977 - iommu_detach_device(isp->domain, isp->iommu_dev); 1986 + iommu_detach_device(isp->domain, &pdev->dev); 1978 1987 iommu_domain_free(isp->domain); 1979 1988 omap3isp_put(isp); 1980 1989 ··· 2122 2131 } 2123 2132 } 2124 2133 2125 - /* IOMMU */ 2126 - isp->iommu_dev = omap_find_iommu_device("isp"); 2127 - if (!isp->iommu_dev) { 2128 - dev_err(isp->dev, "omap_find_iommu_device failed\n"); 2129 - ret = -ENODEV; 2130 - goto error_isp; 2131 - } 2132 - 2133 - /* to be removed once iommu migration is complete */ 2134 - isp->iommu = to_iommu(isp->iommu_dev); 2135 - 2136 2134 isp->domain = iommu_domain_alloc(pdev->dev.bus); 2137 2135 if (!isp->domain) { 2138 2136 dev_err(isp->dev, "can't alloc iommu domain\n"); ··· 2129 2149 goto error_isp; 2130 2150 } 2131 2151 2132 - ret = iommu_attach_device(isp->domain, isp->iommu_dev); 2152 + ret = iommu_attach_device(isp->domain, &pdev->dev); 2133 2153 if (ret) { 2134 2154 dev_err(&pdev->dev, "can't attach iommu device: %d\n", ret); 2135 2155 goto free_domain; ··· 2168 2188 error_irq: 2169 2189 free_irq(isp->irq_num, isp); 2170 2190 detach_dev: 2171 - iommu_detach_device(isp->domain, isp->iommu_dev); 2191 + iommu_detach_device(isp->domain, &pdev->dev); 2172 2192 free_domain: 2173 2193 iommu_domain_free(isp->domain); 2174 2194 error_isp:
-2
drivers/media/video/omap3isp/isp.h
··· 212 212 unsigned int sbl_resources; 213 213 unsigned int subclk_resources; 214 214 215 - struct omap_iommu *iommu; 216 215 struct iommu_domain *domain; 217 - struct device *iommu_dev; 218 216 219 217 struct isp_platform_callback platform_cb; 220 218 };
+9 -9
drivers/media/video/omap3isp/ispccdc.c
··· 366 366 dma_unmap_sg(isp->dev, req->iovm->sgt->sgl, 367 367 req->iovm->sgt->nents, DMA_TO_DEVICE); 368 368 if (req->table) 369 - omap_iommu_vfree(isp->domain, isp->iommu, req->table); 369 + omap_iommu_vfree(isp->domain, isp->dev, req->table); 370 370 kfree(req); 371 371 } 372 372 ··· 438 438 439 439 req->enable = 1; 440 440 441 - req->table = omap_iommu_vmalloc(isp->domain, isp->iommu, 0, 441 + req->table = omap_iommu_vmalloc(isp->domain, isp->dev, 0, 442 442 req->config.size, IOMMU_FLAG); 443 443 if (IS_ERR_VALUE(req->table)) { 444 444 req->table = 0; ··· 446 446 goto done; 447 447 } 448 448 449 - req->iovm = omap_find_iovm_area(isp->iommu, req->table); 449 + req->iovm = omap_find_iovm_area(isp->dev, req->table); 450 450 if (req->iovm == NULL) { 451 451 ret = -ENOMEM; 452 452 goto done; ··· 462 462 dma_sync_sg_for_cpu(isp->dev, req->iovm->sgt->sgl, 463 463 req->iovm->sgt->nents, DMA_TO_DEVICE); 464 464 465 - table = omap_da_to_va(isp->iommu, req->table); 465 + table = omap_da_to_va(isp->dev, req->table); 466 466 if (copy_from_user(table, config->lsc, req->config.size)) { 467 467 ret = -EFAULT; 468 468 goto done; ··· 734 734 * already done by omap_iommu_vmalloc(). 735 735 */ 736 736 size = ccdc->fpc.fpnum * 4; 737 - table_new = omap_iommu_vmalloc(isp->domain, isp->iommu, 737 + table_new = omap_iommu_vmalloc(isp->domain, isp->dev, 738 738 0, size, IOMMU_FLAG); 739 739 if (IS_ERR_VALUE(table_new)) 740 740 return -ENOMEM; 741 741 742 - if (copy_from_user(omap_da_to_va(isp->iommu, table_new), 742 + if (copy_from_user(omap_da_to_va(isp->dev, table_new), 743 743 (__force void __user *) 744 744 ccdc->fpc.fpcaddr, size)) { 745 - omap_iommu_vfree(isp->domain, isp->iommu, 745 + omap_iommu_vfree(isp->domain, isp->dev, 746 746 table_new); 747 747 return -EFAULT; 748 748 } ··· 753 753 754 754 ccdc_configure_fpc(ccdc); 755 755 if (table_old != 0) 756 - omap_iommu_vfree(isp->domain, isp->iommu, table_old); 756 + omap_iommu_vfree(isp->domain, isp->dev, table_old); 757 757 } 758 758 759 759 return ccdc_lsc_config(ccdc, ccdc_struct); ··· 2309 2309 ccdc_lsc_free_queue(ccdc, &ccdc->lsc.free_queue); 2310 2310 2311 2311 if (ccdc->fpc.fpcaddr != 0) 2312 - omap_iommu_vfree(isp->domain, isp->iommu, ccdc->fpc.fpcaddr); 2312 + omap_iommu_vfree(isp->domain, isp->dev, ccdc->fpc.fpcaddr); 2313 2313 2314 2314 mutex_destroy(&ccdc->ioctl_lock); 2315 2315 }
+4 -4
drivers/media/video/omap3isp/ispstat.c
··· 366 366 dma_unmap_sg(isp->dev, buf->iovm->sgt->sgl, 367 367 buf->iovm->sgt->nents, 368 368 DMA_FROM_DEVICE); 369 - omap_iommu_vfree(isp->domain, isp->iommu, 369 + omap_iommu_vfree(isp->domain, isp->dev, 370 370 buf->iommu_addr); 371 371 } else { 372 372 if (!buf->virt_addr) ··· 400 400 struct iovm_struct *iovm; 401 401 402 402 WARN_ON(buf->dma_addr); 403 - buf->iommu_addr = omap_iommu_vmalloc(isp->domain, isp->iommu, 0, 403 + buf->iommu_addr = omap_iommu_vmalloc(isp->domain, isp->dev, 0, 404 404 size, IOMMU_FLAG); 405 405 if (IS_ERR((void *)buf->iommu_addr)) { 406 406 dev_err(stat->isp->dev, ··· 410 410 return -ENOMEM; 411 411 } 412 412 413 - iovm = omap_find_iovm_area(isp->iommu, buf->iommu_addr); 413 + iovm = omap_find_iovm_area(isp->dev, buf->iommu_addr); 414 414 if (!iovm || 415 415 !dma_map_sg(isp->dev, iovm->sgt->sgl, iovm->sgt->nents, 416 416 DMA_FROM_DEVICE)) { ··· 419 419 } 420 420 buf->iovm = iovm; 421 421 422 - buf->virt_addr = omap_da_to_va(stat->isp->iommu, 422 + buf->virt_addr = omap_da_to_va(stat->isp->dev, 423 423 (u32)buf->iommu_addr); 424 424 buf->empty = 1; 425 425 dev_dbg(stat->isp->dev, "%s: buffer[%d] allocated."
+2 -2
drivers/media/video/omap3isp/ispvideo.c
··· 453 453 sgt->nents = sglen; 454 454 sgt->orig_nents = sglen; 455 455 456 - da = omap_iommu_vmap(isp->domain, isp->iommu, 0, sgt, IOMMU_FLAG); 456 + da = omap_iommu_vmap(isp->domain, isp->dev, 0, sgt, IOMMU_FLAG); 457 457 if (IS_ERR_VALUE(da)) 458 458 kfree(sgt); 459 459 ··· 469 469 { 470 470 struct sg_table *sgt; 471 471 472 - sgt = omap_iommu_vunmap(isp->domain, isp->iommu, (u32)da); 472 + sgt = omap_iommu_vunmap(isp->domain, isp->dev, (u32)da); 473 473 kfree(sgt); 474 474 } 475 475
+45 -45
drivers/pci/ats.c
··· 175 175 u32 max_requests; 176 176 int pos; 177 177 178 - pos = pci_find_ext_capability(pdev, PCI_PRI_CAP); 178 + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); 179 179 if (!pos) 180 180 return -EINVAL; 181 181 182 - pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control); 183 - pci_read_config_word(pdev, pos + PCI_PRI_STATUS_OFF, &status); 184 - if ((control & PCI_PRI_ENABLE) || !(status & PCI_PRI_STATUS_STOPPED)) 182 + pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control); 183 + pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status); 184 + if ((control & PCI_PRI_CTRL_ENABLE) || 185 + !(status & PCI_PRI_STATUS_STOPPED)) 185 186 return -EBUSY; 186 187 187 - pci_read_config_dword(pdev, pos + PCI_PRI_MAX_REQ_OFF, &max_requests); 188 + pci_read_config_dword(pdev, pos + PCI_PRI_MAX_REQ, &max_requests); 188 189 reqs = min(max_requests, reqs); 189 - pci_write_config_dword(pdev, pos + PCI_PRI_ALLOC_REQ_OFF, reqs); 190 + pci_write_config_dword(pdev, pos + PCI_PRI_ALLOC_REQ, reqs); 190 191 191 - control |= PCI_PRI_ENABLE; 192 - pci_write_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, control); 192 + control |= PCI_PRI_CTRL_ENABLE; 193 + pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control); 193 194 194 195 return 0; 195 196 } ··· 207 206 u16 control; 208 207 int pos; 209 208 210 - pos = pci_find_ext_capability(pdev, PCI_PRI_CAP); 209 + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); 211 210 if (!pos) 212 211 return; 213 212 214 - pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control); 215 - control &= ~PCI_PRI_ENABLE; 216 - pci_write_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, control); 213 + pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control); 214 + control &= ~PCI_PRI_CTRL_ENABLE; 215 + pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control); 217 216 } 218 217 EXPORT_SYMBOL_GPL(pci_disable_pri); 219 218 ··· 228 227 u16 control; 229 228 int pos; 230 229 231 - pos = pci_find_ext_capability(pdev, PCI_PRI_CAP); 230 + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); 232 231 if (!pos) 233 232 return false; 234 233 235 - pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control); 234 + pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control); 236 235 237 - return (control & PCI_PRI_ENABLE) ? true : false; 236 + return (control & PCI_PRI_CTRL_ENABLE) ? true : false; 238 237 } 239 238 EXPORT_SYMBOL_GPL(pci_pri_enabled); 240 239 ··· 250 249 u16 control; 251 250 int pos; 252 251 253 - pos = pci_find_ext_capability(pdev, PCI_PRI_CAP); 252 + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); 254 253 if (!pos) 255 254 return -EINVAL; 256 255 257 - pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control); 258 - if (control & PCI_PRI_ENABLE) 256 + pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control); 257 + if (control & PCI_PRI_CTRL_ENABLE) 259 258 return -EBUSY; 260 259 261 - control |= PCI_PRI_RESET; 260 + control |= PCI_PRI_CTRL_RESET; 262 261 263 - pci_write_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, control); 262 + pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control); 264 263 265 264 return 0; 266 265 } ··· 283 282 u16 control, status; 284 283 int pos; 285 284 286 - pos = pci_find_ext_capability(pdev, PCI_PRI_CAP); 285 + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); 287 286 if (!pos) 288 287 return true; 289 288 290 - pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control); 291 - pci_read_config_word(pdev, pos + PCI_PRI_STATUS_OFF, &status); 289 + pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control); 290 + pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status); 292 291 293 - if (control & PCI_PRI_ENABLE) 292 + if (control & PCI_PRI_CTRL_ENABLE) 294 293 return false; 295 294 296 295 return (status & PCI_PRI_STATUS_STOPPED) ? true : false; ··· 312 311 u16 status, control; 313 312 int pos; 314 313 315 - pos = pci_find_ext_capability(pdev, PCI_PRI_CAP); 314 + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); 316 315 if (!pos) 317 316 return -EINVAL; 318 317 319 - pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control); 320 - pci_read_config_word(pdev, pos + PCI_PRI_STATUS_OFF, &status); 318 + pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control); 319 + pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status); 321 320 322 321 /* Stopped bit is undefined when enable == 1, so clear it */ 323 - if (control & PCI_PRI_ENABLE) 322 + if (control & PCI_PRI_CTRL_ENABLE) 324 323 status &= ~PCI_PRI_STATUS_STOPPED; 325 324 326 325 return status; ··· 343 342 u16 control, supported; 344 343 int pos; 345 344 346 - pos = pci_find_ext_capability(pdev, PCI_PASID_CAP); 345 + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID); 347 346 if (!pos) 348 347 return -EINVAL; 349 348 350 - pci_read_config_word(pdev, pos + PCI_PASID_CONTROL_OFF, &control); 351 - pci_read_config_word(pdev, pos + PCI_PASID_CAP_OFF, &supported); 349 + pci_read_config_word(pdev, pos + PCI_PASID_CTRL, &control); 350 + pci_read_config_word(pdev, pos + PCI_PASID_CAP, &supported); 352 351 353 - if (!(supported & PCI_PASID_ENABLE)) 352 + if (control & PCI_PASID_CTRL_ENABLE) 354 353 return -EINVAL; 355 354 356 - supported &= PCI_PASID_EXEC | PCI_PASID_PRIV; 355 + supported &= PCI_PASID_CAP_EXEC | PCI_PASID_CAP_PRIV; 357 356 358 357 /* User wants to enable anything unsupported? */ 359 358 if ((supported & features) != features) 360 359 return -EINVAL; 361 360 362 - control = PCI_PASID_ENABLE | features; 361 + control = PCI_PASID_CTRL_ENABLE | features; 363 362 364 - pci_write_config_word(pdev, pos + PCI_PASID_CONTROL_OFF, control); 363 + pci_write_config_word(pdev, pos + PCI_PASID_CTRL, control); 365 364 366 365 return 0; 367 366 } ··· 377 376 u16 control = 0; 378 377 int pos; 379 378 380 - pos = pci_find_ext_capability(pdev, PCI_PASID_CAP); 379 + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID); 381 380 if (!pos) 382 381 return; 383 382 384 - pci_write_config_word(pdev, pos + PCI_PASID_CONTROL_OFF, control); 383 + pci_write_config_word(pdev, pos + PCI_PASID_CTRL, control); 385 384 } 386 385 EXPORT_SYMBOL_GPL(pci_disable_pasid); 387 386 ··· 392 391 * Returns a negative value when no PASI capability is present. 393 392 * Otherwise is returns a bitmask with supported features. Current 394 393 * features reported are: 395 - * PCI_PASID_ENABLE - PASID capability can be enabled 396 - * PCI_PASID_EXEC - Execute permission supported 397 - * PCI_PASID_PRIV - Priviledged mode supported 394 + * PCI_PASID_CAP_EXEC - Execute permission supported 395 + * PCI_PASID_CAP_PRIV - Priviledged mode supported 398 396 */ 399 397 int pci_pasid_features(struct pci_dev *pdev) 400 398 { 401 399 u16 supported; 402 400 int pos; 403 401 404 - pos = pci_find_ext_capability(pdev, PCI_PASID_CAP); 402 + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID); 405 403 if (!pos) 406 404 return -EINVAL; 407 405 408 - pci_read_config_word(pdev, pos + PCI_PASID_CAP_OFF, &supported); 406 + pci_read_config_word(pdev, pos + PCI_PASID_CAP, &supported); 409 407 410 - supported &= PCI_PASID_ENABLE | PCI_PASID_EXEC | PCI_PASID_PRIV; 408 + supported &= PCI_PASID_CAP_EXEC | PCI_PASID_CAP_PRIV; 411 409 412 410 return supported; 413 411 } ··· 426 426 u16 supported; 427 427 int pos; 428 428 429 - pos = pci_find_ext_capability(pdev, PCI_PASID_CAP); 429 + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID); 430 430 if (!pos) 431 431 return -EINVAL; 432 432 433 - pci_read_config_word(pdev, pos + PCI_PASID_CAP_OFF, &supported); 433 + pci_read_config_word(pdev, pos + PCI_PASID_CAP, &supported); 434 434 435 435 supported = (supported & PASID_NUMBER_MASK) >> PASID_NUMBER_SHIFT; 436 436
-1
drivers/pci/hotplug/pciehp.h
··· 45 45 extern int pciehp_debug; 46 46 extern int pciehp_force; 47 47 extern struct workqueue_struct *pciehp_wq; 48 - extern struct workqueue_struct *pciehp_ordered_wq; 49 48 50 49 #define dbg(format, arg...) \ 51 50 do { \
+1 -10
drivers/pci/hotplug/pciehp_core.c
··· 43 43 int pciehp_poll_time; 44 44 int pciehp_force; 45 45 struct workqueue_struct *pciehp_wq; 46 - struct workqueue_struct *pciehp_ordered_wq; 47 46 48 47 #define DRIVER_VERSION "0.4" 49 48 #define DRIVER_AUTHOR "Dan Zink <dan.zink@compaq.com>, Greg Kroah-Hartman <greg@kroah.com>, Dely Sy <dely.l.sy@intel.com>" ··· 344 345 if (!pciehp_wq) 345 346 return -ENOMEM; 346 347 347 - pciehp_ordered_wq = alloc_ordered_workqueue("pciehp_ordered", 0); 348 - if (!pciehp_ordered_wq) { 349 - destroy_workqueue(pciehp_wq); 350 - return -ENOMEM; 351 - } 352 - 353 348 pciehp_firmware_init(); 354 349 retval = pcie_port_service_register(&hpdriver_portdrv); 355 350 dbg("pcie_port_service_register = %d\n", retval); 356 351 info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); 357 352 if (retval) { 358 - destroy_workqueue(pciehp_ordered_wq); 359 353 destroy_workqueue(pciehp_wq); 360 354 dbg("Failure to register service\n"); 361 355 } ··· 358 366 static void __exit pcied_cleanup(void) 359 367 { 360 368 dbg("unload_pciehpd()\n"); 361 - destroy_workqueue(pciehp_ordered_wq); 362 - destroy_workqueue(pciehp_wq); 363 369 pcie_port_service_unregister(&hpdriver_portdrv); 370 + destroy_workqueue(pciehp_wq); 364 371 info(DRIVER_DESC " version: " DRIVER_VERSION " unloaded\n"); 365 372 } 366 373
+2 -2
drivers/pci/hotplug/pciehp_ctrl.c
··· 344 344 kfree(info); 345 345 goto out; 346 346 } 347 - queue_work(pciehp_ordered_wq, &info->work); 347 + queue_work(pciehp_wq, &info->work); 348 348 out: 349 349 mutex_unlock(&p_slot->lock); 350 350 } ··· 439 439 else 440 440 p_slot->state = POWERON_STATE; 441 441 442 - queue_work(pciehp_ordered_wq, &info->work); 442 + queue_work(pciehp_wq, &info->work); 443 443 } 444 444 445 445 static void interrupt_event_handler(struct work_struct *work)
-1
drivers/pci/hotplug/pciehp_hpc.c
··· 806 806 struct slot *slot = ctrl->slot; 807 807 cancel_delayed_work(&slot->work); 808 808 flush_workqueue(pciehp_wq); 809 - flush_workqueue(pciehp_ordered_wq); 810 809 kfree(slot); 811 810 } 812 811
+121
drivers/pci/msi.c
··· 323 323 if (list_is_last(&entry->list, &dev->msi_list)) 324 324 iounmap(entry->mask_base); 325 325 } 326 + kobject_del(&entry->kobj); 327 + kobject_put(&entry->kobj); 326 328 list_del(&entry->list); 327 329 kfree(entry); 328 330 } ··· 405 403 } 406 404 EXPORT_SYMBOL_GPL(pci_restore_msi_state); 407 405 406 + 407 + #define to_msi_attr(obj) container_of(obj, struct msi_attribute, attr) 408 + #define to_msi_desc(obj) container_of(obj, struct msi_desc, kobj) 409 + 410 + struct msi_attribute { 411 + struct attribute attr; 412 + ssize_t (*show)(struct msi_desc *entry, struct msi_attribute *attr, 413 + char *buf); 414 + ssize_t (*store)(struct msi_desc *entry, struct msi_attribute *attr, 415 + const char *buf, size_t count); 416 + }; 417 + 418 + static ssize_t show_msi_mode(struct msi_desc *entry, struct msi_attribute *atr, 419 + char *buf) 420 + { 421 + return sprintf(buf, "%s\n", entry->msi_attrib.is_msix ? "msix" : "msi"); 422 + } 423 + 424 + static ssize_t msi_irq_attr_show(struct kobject *kobj, 425 + struct attribute *attr, char *buf) 426 + { 427 + struct msi_attribute *attribute = to_msi_attr(attr); 428 + struct msi_desc *entry = to_msi_desc(kobj); 429 + 430 + if (!attribute->show) 431 + return -EIO; 432 + 433 + return attribute->show(entry, attribute, buf); 434 + } 435 + 436 + static const struct sysfs_ops msi_irq_sysfs_ops = { 437 + .show = msi_irq_attr_show, 438 + }; 439 + 440 + static struct msi_attribute mode_attribute = 441 + __ATTR(mode, S_IRUGO, show_msi_mode, NULL); 442 + 443 + 444 + struct attribute *msi_irq_default_attrs[] = { 445 + &mode_attribute.attr, 446 + NULL 447 + }; 448 + 449 + void msi_kobj_release(struct kobject *kobj) 450 + { 451 + struct msi_desc *entry = to_msi_desc(kobj); 452 + 453 + pci_dev_put(entry->dev); 454 + } 455 + 456 + static struct kobj_type msi_irq_ktype = { 457 + .release = msi_kobj_release, 458 + .sysfs_ops = &msi_irq_sysfs_ops, 459 + .default_attrs = msi_irq_default_attrs, 460 + }; 461 + 462 + static int populate_msi_sysfs(struct pci_dev *pdev) 463 + { 464 + struct msi_desc *entry; 465 + struct kobject *kobj; 466 + int ret; 467 + int count = 0; 468 + 469 + pdev->msi_kset = kset_create_and_add("msi_irqs", NULL, &pdev->dev.kobj); 470 + if (!pdev->msi_kset) 471 + return -ENOMEM; 472 + 473 + list_for_each_entry(entry, &pdev->msi_list, list) { 474 + kobj = &entry->kobj; 475 + kobj->kset = pdev->msi_kset; 476 + pci_dev_get(pdev); 477 + ret = kobject_init_and_add(kobj, &msi_irq_ktype, NULL, 478 + "%u", entry->irq); 479 + if (ret) 480 + goto out_unroll; 481 + 482 + count++; 483 + } 484 + 485 + return 0; 486 + 487 + out_unroll: 488 + list_for_each_entry(entry, &pdev->msi_list, list) { 489 + if (!count) 490 + break; 491 + kobject_del(&entry->kobj); 492 + kobject_put(&entry->kobj); 493 + count--; 494 + } 495 + return ret; 496 + } 497 + 408 498 /** 409 499 * msi_capability_init - configure device's MSI capability structure 410 500 * @dev: pointer to the pci_dev data structure of MSI device function ··· 542 448 543 449 /* Configure MSI capability structure */ 544 450 ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI); 451 + if (ret) { 452 + msi_mask_irq(entry, mask, ~mask); 453 + free_msi_irqs(dev); 454 + return ret; 455 + } 456 + 457 + ret = populate_msi_sysfs(dev); 545 458 if (ret) { 546 459 msi_mask_irq(entry, mask, ~mask); 547 460 free_msi_irqs(dev); ··· 674 573 pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control); 675 574 676 575 msix_program_entries(dev, entries); 576 + 577 + ret = populate_msi_sysfs(dev); 578 + if (ret) { 579 + ret = 0; 580 + goto error; 581 + } 677 582 678 583 /* Set MSI-X enabled bits and unmask the function */ 679 584 pci_intx_for_msi(dev, 0); ··· 839 732 840 733 pci_msi_shutdown(dev); 841 734 free_msi_irqs(dev); 735 + kset_unregister(dev->msi_kset); 736 + dev->msi_kset = NULL; 842 737 } 843 738 EXPORT_SYMBOL(pci_disable_msi); 844 739 ··· 939 830 940 831 pci_msix_shutdown(dev); 941 832 free_msi_irqs(dev); 833 + kset_unregister(dev->msi_kset); 834 + dev->msi_kset = NULL; 942 835 } 943 836 EXPORT_SYMBOL(pci_disable_msix); 944 837 ··· 981 870 982 871 void pci_msi_init_pci_dev(struct pci_dev *dev) 983 872 { 873 + int pos; 984 874 INIT_LIST_HEAD(&dev->msi_list); 875 + 876 + /* Disable the msi hardware to avoid screaming interrupts 877 + * during boot. This is the power on reset default so 878 + * usually this should be a noop. 879 + */ 880 + pos = pci_find_capability(dev, PCI_CAP_ID_MSI); 881 + if (pos) 882 + msi_set_enable(dev, pos, 0); 883 + msix_set_enable(dev, 0); 985 884 }
+8 -5
drivers/pci/pci-acpi.c
··· 45 45 { 46 46 struct pci_dev *pci_dev = context; 47 47 48 - if (event == ACPI_NOTIFY_DEVICE_WAKE && pci_dev) { 48 + if (event != ACPI_NOTIFY_DEVICE_WAKE || !pci_dev) 49 + return; 50 + 51 + if (!pci_dev->pm_cap || !pci_dev->pme_support 52 + || pci_check_pme_status(pci_dev)) { 49 53 if (pci_dev->pme_poll) 50 54 pci_dev->pme_poll = false; 51 55 52 56 pci_wakeup_event(pci_dev); 53 - pci_check_pme_status(pci_dev); 54 57 pm_runtime_resume(&pci_dev->dev); 55 - if (pci_dev->subordinate) 56 - pci_pme_wakeup_bus(pci_dev->subordinate); 57 58 } 59 + 60 + if (pci_dev->subordinate) 61 + pci_pme_wakeup_bus(pci_dev->subordinate); 58 62 } 59 63 60 64 /** ··· 399 395 400 396 if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM) { 401 397 printk(KERN_INFO"ACPI FADT declares the system doesn't support PCIe ASPM, so disable it\n"); 402 - pcie_clear_aspm(); 403 398 pcie_no_aspm(); 404 399 } 405 400
+37 -21
drivers/pci/pcie/aspm.c
··· 68 68 struct aspm_latency acceptable[8]; 69 69 }; 70 70 71 - static int aspm_disabled, aspm_force, aspm_clear_state; 71 + static int aspm_disabled, aspm_force; 72 72 static bool aspm_support_enabled = true; 73 73 static DEFINE_MUTEX(aspm_lock); 74 74 static LIST_HEAD(link_list); ··· 500 500 int pos; 501 501 u32 reg32; 502 502 503 - if (aspm_clear_state) 504 - return -EINVAL; 505 - 506 503 /* 507 504 * Some functions in a slot might not all be PCIe functions, 508 505 * very strange. Disable ASPM for the whole slot ··· 571 574 pdev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM) 572 575 return; 573 576 574 - if (aspm_disabled && !aspm_clear_state) 575 - return; 576 - 577 577 /* VIA has a strange chipset, root port is under a bridge */ 578 578 if (pdev->pcie_type == PCI_EXP_TYPE_ROOT_PORT && 579 579 pdev->bus->self) ··· 602 608 * the BIOS's expectation, we'll do so once pci_enable_device() is 603 609 * called. 604 610 */ 605 - if (aspm_policy != POLICY_POWERSAVE || aspm_clear_state) { 611 + if (aspm_policy != POLICY_POWERSAVE) { 606 612 pcie_config_aspm_path(link); 607 613 pcie_set_clkpm(link, policy_to_clkpm_state(link)); 608 614 } ··· 643 649 struct pci_dev *parent = pdev->bus->self; 644 650 struct pcie_link_state *link, *root, *parent_link; 645 651 646 - if ((aspm_disabled && !aspm_clear_state) || !pci_is_pcie(pdev) || 647 - !parent || !parent->link_state) 652 + if (!pci_is_pcie(pdev) || !parent || !parent->link_state) 648 653 return; 649 654 if ((parent->pcie_type != PCI_EXP_TYPE_ROOT_PORT) && 650 655 (parent->pcie_type != PCI_EXP_TYPE_DOWNSTREAM)) ··· 727 734 * pci_disable_link_state - disable pci device's link state, so the link will 728 735 * never enter specific states 729 736 */ 730 - static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem) 737 + static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem, 738 + bool force) 731 739 { 732 740 struct pci_dev *parent = pdev->bus->self; 733 741 struct pcie_link_state *link; 734 742 735 - if (aspm_disabled || !pci_is_pcie(pdev)) 743 + if (aspm_disabled && !force) 736 744 return; 745 + 746 + if (!pci_is_pcie(pdev)) 747 + return; 748 + 737 749 if (pdev->pcie_type == PCI_EXP_TYPE_ROOT_PORT || 738 750 pdev->pcie_type == PCI_EXP_TYPE_DOWNSTREAM) 739 751 parent = pdev; ··· 766 768 767 769 void pci_disable_link_state_locked(struct pci_dev *pdev, int state) 768 770 { 769 - __pci_disable_link_state(pdev, state, false); 771 + __pci_disable_link_state(pdev, state, false, false); 770 772 } 771 773 EXPORT_SYMBOL(pci_disable_link_state_locked); 772 774 773 775 void pci_disable_link_state(struct pci_dev *pdev, int state) 774 776 { 775 - __pci_disable_link_state(pdev, state, true); 777 + __pci_disable_link_state(pdev, state, true, false); 776 778 } 777 779 EXPORT_SYMBOL(pci_disable_link_state); 780 + 781 + void pcie_clear_aspm(struct pci_bus *bus) 782 + { 783 + struct pci_dev *child; 784 + 785 + /* 786 + * Clear any ASPM setup that the firmware has carried out on this bus 787 + */ 788 + list_for_each_entry(child, &bus->devices, bus_list) { 789 + __pci_disable_link_state(child, PCIE_LINK_STATE_L0S | 790 + PCIE_LINK_STATE_L1 | 791 + PCIE_LINK_STATE_CLKPM, 792 + false, true); 793 + } 794 + } 778 795 779 796 static int pcie_aspm_set_policy(const char *val, struct kernel_param *kp) 780 797 { ··· 948 935 static int __init pcie_aspm_disable(char *str) 949 936 { 950 937 if (!strcmp(str, "off")) { 938 + aspm_policy = POLICY_DEFAULT; 951 939 aspm_disabled = 1; 952 940 aspm_support_enabled = false; 953 941 printk(KERN_INFO "PCIe ASPM is disabled\n"); ··· 961 947 962 948 __setup("pcie_aspm=", pcie_aspm_disable); 963 949 964 - void pcie_clear_aspm(void) 965 - { 966 - if (!aspm_force) 967 - aspm_clear_state = 1; 968 - } 969 - 970 950 void pcie_no_aspm(void) 971 951 { 972 - if (!aspm_force) 952 + /* 953 + * Disabling ASPM is intended to prevent the kernel from modifying 954 + * existing hardware state, not to clear existing state. To that end: 955 + * (a) set policy to POLICY_DEFAULT in order to avoid changing state 956 + * (b) prevent userspace from changing policy 957 + */ 958 + if (!aspm_force) { 959 + aspm_policy = POLICY_DEFAULT; 973 960 aspm_disabled = 1; 961 + } 974 962 } 975 963 976 964 /**
+4
include/linux/acpi.h
··· 302 302 OSC_PCI_EXPRESS_PME_CONTROL | \ 303 303 OSC_PCI_EXPRESS_AER_CONTROL | \ 304 304 OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL) 305 + 306 + #define OSC_PCI_NATIVE_HOTPLUG (OSC_PCI_EXPRESS_NATIVE_HP_CONTROL | \ 307 + OSC_SHPC_NATIVE_HP_CONTROL) 308 + 305 309 extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, 306 310 u32 *mask, u32 req); 307 311 extern void acpi_early_init(void);
+137 -1
include/linux/amd-iommu.h
··· 20 20 #ifndef _ASM_X86_AMD_IOMMU_H 21 21 #define _ASM_X86_AMD_IOMMU_H 22 22 23 - #include <linux/irqreturn.h> 23 + #include <linux/types.h> 24 24 25 25 #ifdef CONFIG_AMD_IOMMU 26 26 27 + struct task_struct; 28 + struct pci_dev; 29 + 27 30 extern int amd_iommu_detect(void); 31 + 32 + 33 + /** 34 + * amd_iommu_enable_device_erratum() - Enable erratum workaround for device 35 + * in the IOMMUv2 driver 36 + * @pdev: The PCI device the workaround is necessary for 37 + * @erratum: The erratum workaround to enable 38 + * 39 + * The function needs to be called before amd_iommu_init_device(). 40 + * Possible values for the erratum number are for now: 41 + * - AMD_PRI_DEV_ERRATUM_ENABLE_RESET - Reset PRI capability when PRI 42 + * is enabled 43 + * - AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE - Limit number of outstanding PRI 44 + * requests to one 45 + */ 46 + #define AMD_PRI_DEV_ERRATUM_ENABLE_RESET 0 47 + #define AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE 1 48 + 49 + extern void amd_iommu_enable_device_erratum(struct pci_dev *pdev, u32 erratum); 50 + 51 + /** 52 + * amd_iommu_init_device() - Init device for use with IOMMUv2 driver 53 + * @pdev: The PCI device to initialize 54 + * @pasids: Number of PASIDs to support for this device 55 + * 56 + * This function does all setup for the device pdev so that it can be 57 + * used with IOMMUv2. 58 + * Returns 0 on success or negative value on error. 59 + */ 60 + extern int amd_iommu_init_device(struct pci_dev *pdev, int pasids); 61 + 62 + /** 63 + * amd_iommu_free_device() - Free all IOMMUv2 related device resources 64 + * and disable IOMMUv2 usage for this device 65 + * @pdev: The PCI device to disable IOMMUv2 usage for' 66 + */ 67 + extern void amd_iommu_free_device(struct pci_dev *pdev); 68 + 69 + /** 70 + * amd_iommu_bind_pasid() - Bind a given task to a PASID on a device 71 + * @pdev: The PCI device to bind the task to 72 + * @pasid: The PASID on the device the task should be bound to 73 + * @task: the task to bind 74 + * 75 + * The function returns 0 on success or a negative value on error. 76 + */ 77 + extern int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid, 78 + struct task_struct *task); 79 + 80 + /** 81 + * amd_iommu_unbind_pasid() - Unbind a PASID from its task on 82 + * a device 83 + * @pdev: The device of the PASID 84 + * @pasid: The PASID to unbind 85 + * 86 + * When this function returns the device is no longer using the PASID 87 + * and the PASID is no longer bound to its task. 88 + */ 89 + extern void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid); 90 + 91 + /** 92 + * amd_iommu_set_invalid_ppr_cb() - Register a call-back for failed 93 + * PRI requests 94 + * @pdev: The PCI device the call-back should be registered for 95 + * @cb: The call-back function 96 + * 97 + * The IOMMUv2 driver invokes this call-back when it is unable to 98 + * successfully handle a PRI request. The device driver can then decide 99 + * which PRI response the device should see. Possible return values for 100 + * the call-back are: 101 + * 102 + * - AMD_IOMMU_INV_PRI_RSP_SUCCESS - Send SUCCESS back to the device 103 + * - AMD_IOMMU_INV_PRI_RSP_INVALID - Send INVALID back to the device 104 + * - AMD_IOMMU_INV_PRI_RSP_FAIL - Send Failure back to the device, 105 + * the device is required to disable 106 + * PRI when it receives this response 107 + * 108 + * The function returns 0 on success or negative value on error. 109 + */ 110 + #define AMD_IOMMU_INV_PRI_RSP_SUCCESS 0 111 + #define AMD_IOMMU_INV_PRI_RSP_INVALID 1 112 + #define AMD_IOMMU_INV_PRI_RSP_FAIL 2 113 + 114 + typedef int (*amd_iommu_invalid_ppr_cb)(struct pci_dev *pdev, 115 + int pasid, 116 + unsigned long address, 117 + u16); 118 + 119 + extern int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev, 120 + amd_iommu_invalid_ppr_cb cb); 121 + 122 + /** 123 + * amd_iommu_device_info() - Get information about IOMMUv2 support of a 124 + * PCI device 125 + * @pdev: PCI device to query information from 126 + * @info: A pointer to an amd_iommu_device_info structure which will contain 127 + * the information about the PCI device 128 + * 129 + * Returns 0 on success, negative value on error 130 + */ 131 + 132 + #define AMD_IOMMU_DEVICE_FLAG_ATS_SUP 0x1 /* ATS feature supported */ 133 + #define AMD_IOMMU_DEVICE_FLAG_PRI_SUP 0x2 /* PRI feature supported */ 134 + #define AMD_IOMMU_DEVICE_FLAG_PASID_SUP 0x4 /* PASID context supported */ 135 + #define AMD_IOMMU_DEVICE_FLAG_EXEC_SUP 0x8 /* Device may request execution 136 + on memory pages */ 137 + #define AMD_IOMMU_DEVICE_FLAG_PRIV_SUP 0x10 /* Device may request 138 + super-user privileges */ 139 + 140 + struct amd_iommu_device_info { 141 + int max_pasids; 142 + u32 flags; 143 + }; 144 + 145 + extern int amd_iommu_device_info(struct pci_dev *pdev, 146 + struct amd_iommu_device_info *info); 147 + 148 + /** 149 + * amd_iommu_set_invalidate_ctx_cb() - Register a call-back for invalidating 150 + * a pasid context. This call-back is 151 + * invoked when the IOMMUv2 driver needs to 152 + * invalidate a PASID context, for example 153 + * because the task that is bound to that 154 + * context is about to exit. 155 + * 156 + * @pdev: The PCI device the call-back should be registered for 157 + * @cb: The call-back function 158 + */ 159 + 160 + typedef void (*amd_iommu_invalidate_ctx)(struct pci_dev *pdev, int pasid); 161 + 162 + extern int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev, 163 + amd_iommu_invalidate_ctx cb); 28 164 29 165 #else 30 166
+20 -6
include/linux/iommu.h
··· 48 48 49 49 #ifdef CONFIG_IOMMU_API 50 50 51 + /** 52 + * struct iommu_ops - iommu ops and capabilities 53 + * @domain_init: init iommu domain 54 + * @domain_destroy: destroy iommu domain 55 + * @attach_dev: attach device to an iommu domain 56 + * @detach_dev: detach device from an iommu domain 57 + * @map: map a physically contiguous memory region to an iommu domain 58 + * @unmap: unmap a physically contiguous memory region from an iommu domain 59 + * @iova_to_phys: translate iova to physical address 60 + * @domain_has_cap: domain capabilities query 61 + * @commit: commit iommu domain 62 + * @pgsize_bitmap: bitmap of supported page sizes 63 + */ 51 64 struct iommu_ops { 52 65 int (*domain_init)(struct iommu_domain *domain); 53 66 void (*domain_destroy)(struct iommu_domain *domain); 54 67 int (*attach_dev)(struct iommu_domain *domain, struct device *dev); 55 68 void (*detach_dev)(struct iommu_domain *domain, struct device *dev); 56 69 int (*map)(struct iommu_domain *domain, unsigned long iova, 57 - phys_addr_t paddr, int gfp_order, int prot); 58 - int (*unmap)(struct iommu_domain *domain, unsigned long iova, 59 - int gfp_order); 70 + phys_addr_t paddr, size_t size, int prot); 71 + size_t (*unmap)(struct iommu_domain *domain, unsigned long iova, 72 + size_t size); 60 73 phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, 61 74 unsigned long iova); 62 75 int (*domain_has_cap)(struct iommu_domain *domain, 63 76 unsigned long cap); 77 + unsigned long pgsize_bitmap; 64 78 }; 65 79 66 80 extern int bus_set_iommu(struct bus_type *bus, struct iommu_ops *ops); ··· 86 72 extern void iommu_detach_device(struct iommu_domain *domain, 87 73 struct device *dev); 88 74 extern int iommu_map(struct iommu_domain *domain, unsigned long iova, 89 - phys_addr_t paddr, int gfp_order, int prot); 90 - extern int iommu_unmap(struct iommu_domain *domain, unsigned long iova, 91 - int gfp_order); 75 + phys_addr_t paddr, size_t size, int prot); 76 + extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, 77 + size_t size); 92 78 extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, 93 79 unsigned long iova); 94 80 extern int iommu_domain_has_cap(struct iommu_domain *domain,
+3
include/linux/msi.h
··· 1 1 #ifndef LINUX_MSI_H 2 2 #define LINUX_MSI_H 3 3 4 + #include <linux/kobject.h> 4 5 #include <linux/list.h> 5 6 6 7 struct msi_msg { ··· 45 44 46 45 /* Last set MSI message */ 47 46 struct msi_msg msg; 47 + 48 + struct kobject kobj; 48 49 }; 49 50 50 51 /*
+2 -2
include/linux/pci-aspm.h
··· 29 29 extern void pcie_aspm_powersave_config_link(struct pci_dev *pdev); 30 30 extern void pci_disable_link_state(struct pci_dev *pdev, int state); 31 31 extern void pci_disable_link_state_locked(struct pci_dev *pdev, int state); 32 - extern void pcie_clear_aspm(void); 32 + extern void pcie_clear_aspm(struct pci_bus *bus); 33 33 extern void pcie_no_aspm(void); 34 34 #else 35 35 static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) ··· 47 47 static inline void pci_disable_link_state(struct pci_dev *pdev, int state) 48 48 { 49 49 } 50 - static inline void pcie_clear_aspm(void) 50 + static inline void pcie_clear_aspm(struct pci_bus *bus) 51 51 { 52 52 } 53 53 static inline void pcie_no_aspm(void)
+1
include/linux/pci.h
··· 336 336 struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */ 337 337 #ifdef CONFIG_PCI_MSI 338 338 struct list_head msi_list; 339 + struct kset *msi_kset; 339 340 #endif 340 341 struct pci_vpd *vpd; 341 342 #ifdef CONFIG_PCI_ATS
+18 -16
include/linux/pci_regs.h
··· 537 537 #define PCI_EXT_CAP_ID_ARI 14 538 538 #define PCI_EXT_CAP_ID_ATS 15 539 539 #define PCI_EXT_CAP_ID_SRIOV 16 540 + #define PCI_EXT_CAP_ID_PRI 19 540 541 #define PCI_EXT_CAP_ID_LTR 24 542 + #define PCI_EXT_CAP_ID_PASID 27 541 543 542 544 /* Advanced Error Reporting */ 543 545 #define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */ ··· 666 664 #define PCI_ATS_MIN_STU 12 /* shift of minimum STU block */ 667 665 668 666 /* Page Request Interface */ 669 - #define PCI_PRI_CAP 0x13 /* PRI capability ID */ 670 - #define PCI_PRI_CONTROL_OFF 0x04 /* Offset of control register */ 671 - #define PCI_PRI_STATUS_OFF 0x06 /* Offset of status register */ 672 - #define PCI_PRI_ENABLE 0x0001 /* Enable mask */ 673 - #define PCI_PRI_RESET 0x0002 /* Reset bit mask */ 674 - #define PCI_PRI_STATUS_RF 0x0001 /* Request Failure */ 675 - #define PCI_PRI_STATUS_UPRGI 0x0002 /* Unexpected PRG index */ 676 - #define PCI_PRI_STATUS_STOPPED 0x0100 /* PRI Stopped */ 677 - #define PCI_PRI_MAX_REQ_OFF 0x08 /* Cap offset for max reqs supported */ 678 - #define PCI_PRI_ALLOC_REQ_OFF 0x0c /* Cap offset for max reqs allowed */ 667 + #define PCI_PRI_CTRL 0x04 /* PRI control register */ 668 + #define PCI_PRI_CTRL_ENABLE 0x01 /* Enable */ 669 + #define PCI_PRI_CTRL_RESET 0x02 /* Reset */ 670 + #define PCI_PRI_STATUS 0x06 /* PRI status register */ 671 + #define PCI_PRI_STATUS_RF 0x001 /* Response Failure */ 672 + #define PCI_PRI_STATUS_UPRGI 0x002 /* Unexpected PRG index */ 673 + #define PCI_PRI_STATUS_STOPPED 0x100 /* PRI Stopped */ 674 + #define PCI_PRI_MAX_REQ 0x08 /* PRI max reqs supported */ 675 + #define PCI_PRI_ALLOC_REQ 0x0c /* PRI max reqs allowed */ 679 676 680 677 /* PASID capability */ 681 - #define PCI_PASID_CAP 0x1b /* PASID capability ID */ 682 - #define PCI_PASID_CAP_OFF 0x04 /* PASID feature register */ 683 - #define PCI_PASID_CONTROL_OFF 0x06 /* PASID control register */ 684 - #define PCI_PASID_ENABLE 0x01 /* Enable/Supported bit */ 685 - #define PCI_PASID_EXEC 0x02 /* Exec permissions Enable/Supported */ 686 - #define PCI_PASID_PRIV 0x04 /* Priviledge Mode Enable/Support */ 678 + #define PCI_PASID_CAP 0x04 /* PASID feature register */ 679 + #define PCI_PASID_CAP_EXEC 0x02 /* Exec permissions Supported */ 680 + #define PCI_PASID_CAP_PRIV 0x04 /* Priviledge Mode Supported */ 681 + #define PCI_PASID_CTRL 0x06 /* PASID control register */ 682 + #define PCI_PASID_CTRL_ENABLE 0x01 /* Enable bit */ 683 + #define PCI_PASID_CTRL_EXEC 0x02 /* Exec permissions Enable */ 684 + #define PCI_PASID_CTRL_PRIV 0x04 /* Priviledge Mode Enable */ 687 685 688 686 /* Single Root I/O Virtualization */ 689 687 #define PCI_SRIOV_CAP 0x04 /* SR-IOV Capabilities */
+4 -4
virt/kvm/iommu.c
··· 113 113 114 114 /* Map into IO address space */ 115 115 r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn), 116 - get_order(page_size), flags); 116 + page_size, flags); 117 117 if (r) { 118 118 printk(KERN_ERR "kvm_iommu_map_address:" 119 119 "iommu failed to map pfn=%llx\n", pfn); ··· 292 292 293 293 while (gfn < end_gfn) { 294 294 unsigned long unmap_pages; 295 - int order; 295 + size_t size; 296 296 297 297 /* Get physical address */ 298 298 phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn)); 299 299 pfn = phys >> PAGE_SHIFT; 300 300 301 301 /* Unmap address from IO address space */ 302 - order = iommu_unmap(domain, gfn_to_gpa(gfn), 0); 303 - unmap_pages = 1ULL << order; 302 + size = iommu_unmap(domain, gfn_to_gpa(gfn), PAGE_SIZE); 303 + unmap_pages = 1ULL << get_order(size); 304 304 305 305 /* Unpin all pages we just unmapped to not leak any memory */ 306 306 kvm_unpin_pages(kvm, pfn, unmap_pages);