Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/linux-2.6-iommu

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/linux-2.6-iommu: (89 commits)
AMD IOMMU: remove now unnecessary #ifdefs
AMD IOMMU: prealloc_protection_domains should be static
kvm/iommu: fix compile warning
AMD IOMMU: add statistics about total number of map requests
AMD IOMMU: add statistics about allocated io memory
AMD IOMMU: add stats counter for domain tlb flushes
AMD IOMMU: add stats counter for single iommu domain tlb flushes
AMD IOMMU: add stats counter for cross-page request
AMD IOMMU: add stats counter for free_coherent requests
AMD IOMMU: add stats counter for alloc_coherent requests
AMD IOMMU: add stats counter for unmap_sg requests
AMD IOMMU: add stats counter for map_sg requests
AMD IOMMU: add stats counter for unmap_single requests
AMD IOMMU: add stats counter for map_single requests
AMD IOMMU: add stats counter for completion wait events
AMD IOMMU: add init code for statistic collection
AMD IOMMU: add necessary header defines for stats counting
AMD IOMMU: add Kconfig entry for statistic collection code
AMD IOMMU: use dev_name in iommu_enable function
AMD IOMMU: use calc_devid in prealloc_protection_domains
...

+1907 -477
+3
arch/ia64/Kconfig
··· 687 688 config IOMMU_HELPER 689 def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC || SWIOTLB)
··· 687 688 config IOMMU_HELPER 689 def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC || SWIOTLB) 690 + 691 + config IOMMU_API 692 + def_bool (DMAR)
+1 -1
arch/ia64/include/asm/kvm_host.h
··· 467 struct kvm_sal_data rdv_sal_data; 468 469 struct list_head assigned_dev_head; 470 - struct dmar_domain *intel_iommu_domain; 471 struct hlist_head irq_ack_notifier_list; 472 473 unsigned long irq_sources_bitmap;
··· 467 struct kvm_sal_data rdv_sal_data; 468 469 struct list_head assigned_dev_head; 470 + struct iommu_domain *iommu_domain; 471 struct hlist_head irq_ack_notifier_list; 472 473 unsigned long irq_sources_bitmap;
+2 -2
arch/ia64/kvm/Makefile
··· 51 common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ 52 coalesced_mmio.o irq_comm.o) 53 54 - ifeq ($(CONFIG_DMAR),y) 55 - common-objs += $(addprefix ../../../virt/kvm/, vtd.o) 56 endif 57 58 kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
··· 51 common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ 52 coalesced_mmio.o irq_comm.o) 53 54 + ifeq ($(CONFIG_IOMMU_API),y) 55 + common-objs += $(addprefix ../../../virt/kvm/, iommu.o) 56 endif 57 58 kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
+2 -1
arch/ia64/kvm/kvm-ia64.c
··· 31 #include <linux/bitops.h> 32 #include <linux/hrtimer.h> 33 #include <linux/uaccess.h> 34 #include <linux/intel-iommu.h> 35 36 #include <asm/pgtable.h> ··· 189 r = KVM_COALESCED_MMIO_PAGE_OFFSET; 190 break; 191 case KVM_CAP_IOMMU: 192 - r = intel_iommu_found(); 193 break; 194 default: 195 r = 0;
··· 31 #include <linux/bitops.h> 32 #include <linux/hrtimer.h> 33 #include <linux/uaccess.h> 34 + #include <linux/iommu.h> 35 #include <linux/intel-iommu.h> 36 37 #include <asm/pgtable.h> ··· 188 r = KVM_COALESCED_MMIO_PAGE_OFFSET; 189 break; 190 case KVM_CAP_IOMMU: 191 + r = iommu_found(); 192 break; 193 default: 194 r = 0;
+13
arch/x86/Kconfig
··· 586 your BIOS for an option to enable it or if you have an IVRS ACPI 587 table. 588 589 # need this always selected by IOMMU for the VIA workaround 590 config SWIOTLB 591 def_bool y if X86_64 ··· 608 609 config IOMMU_HELPER 610 def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU) 611 612 config MAXSMP 613 bool "Configure Maximum number of SMP Processors and NUMA Nodes"
··· 586 your BIOS for an option to enable it or if you have an IVRS ACPI 587 table. 588 589 + config AMD_IOMMU_STATS 590 + bool "Export AMD IOMMU statistics to debugfs" 591 + depends on AMD_IOMMU 592 + select DEBUG_FS 593 + help 594 + This option enables code in the AMD IOMMU driver to collect various 595 + statistics about whats happening in the driver and exports that 596 + information to userspace via debugfs. 597 + If unsure, say N. 598 + 599 # need this always selected by IOMMU for the VIA workaround 600 config SWIOTLB 601 def_bool y if X86_64 ··· 598 599 config IOMMU_HELPER 600 def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU) 601 + 602 + config IOMMU_API 603 + def_bool (AMD_IOMMU || DMAR) 604 605 config MAXSMP 606 bool "Configure Maximum number of SMP Processors and NUMA Nodes"
+42 -19
arch/x86/include/asm/amd_iommu_types.h
··· 190 /* FIXME: move this macro to <linux/pci.h> */ 191 #define PCI_BUS(x) (((x) >> 8) & 0xff) 192 193 /* 194 * This structure contains generic data for IOMMU protection domains 195 * independent of their use. 196 */ 197 struct protection_domain { 198 - spinlock_t lock; /* mostly used to lock the page table*/ 199 - u16 id; /* the domain id written to the device table */ 200 - int mode; /* paging mode (0-6 levels) */ 201 - u64 *pt_root; /* page table root pointer */ 202 - void *priv; /* private data */ 203 }; 204 205 /* ··· 302 bool int_enabled; 303 304 /* if one, we need to send a completion wait command */ 305 - int need_sync; 306 307 /* default dma_ops domain for that IOMMU */ 308 struct dma_ops_domain *default_dom; ··· 381 extern unsigned long *amd_iommu_pd_alloc_bitmap; 382 383 /* will be 1 if device isolation is enabled */ 384 - extern int amd_iommu_isolate; 385 386 /* 387 * If true, the addresses will be flushed on unmap time, not when ··· 389 */ 390 extern bool amd_iommu_unmap_flush; 391 392 - /* takes a PCI device id and prints it out in a readable form */ 393 - static inline void print_devid(u16 devid, int nl) 394 - { 395 - int bus = devid >> 8; 396 - int dev = devid >> 3 & 0x1f; 397 - int fn = devid & 0x07; 398 - 399 - printk("%02x:%02x.%x", bus, dev, fn); 400 - if (nl) 401 - printk("\n"); 402 - } 403 - 404 /* takes bus and device/function and returns the device id 405 * FIXME: should that be in generic PCI code? */ 406 static inline u16 calc_devid(u8 bus, u8 devfn) 407 { 408 return (((u16)bus) << 8) | devfn; 409 } 410 411 #endif /* _ASM_X86_AMD_IOMMU_TYPES_H */
··· 190 /* FIXME: move this macro to <linux/pci.h> */ 191 #define PCI_BUS(x) (((x) >> 8) & 0xff) 192 193 + /* Protection domain flags */ 194 + #define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */ 195 + #define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops 196 + domain for an IOMMU */ 197 + 198 /* 199 * This structure contains generic data for IOMMU protection domains 200 * independent of their use. 201 */ 202 struct protection_domain { 203 + spinlock_t lock; /* mostly used to lock the page table*/ 204 + u16 id; /* the domain id written to the device table */ 205 + int mode; /* paging mode (0-6 levels) */ 206 + u64 *pt_root; /* page table root pointer */ 207 + unsigned long flags; /* flags to find out type of domain */ 208 + unsigned dev_cnt; /* devices assigned to this domain */ 209 + void *priv; /* private data */ 210 }; 211 212 /* ··· 295 bool int_enabled; 296 297 /* if one, we need to send a completion wait command */ 298 + bool need_sync; 299 300 /* default dma_ops domain for that IOMMU */ 301 struct dma_ops_domain *default_dom; ··· 374 extern unsigned long *amd_iommu_pd_alloc_bitmap; 375 376 /* will be 1 if device isolation is enabled */ 377 + extern bool amd_iommu_isolate; 378 379 /* 380 * If true, the addresses will be flushed on unmap time, not when ··· 382 */ 383 extern bool amd_iommu_unmap_flush; 384 385 /* takes bus and device/function and returns the device id 386 * FIXME: should that be in generic PCI code? */ 387 static inline u16 calc_devid(u8 bus, u8 devfn) 388 { 389 return (((u16)bus) << 8) | devfn; 390 } 391 + 392 + #ifdef CONFIG_AMD_IOMMU_STATS 393 + 394 + struct __iommu_counter { 395 + char *name; 396 + struct dentry *dent; 397 + u64 value; 398 + }; 399 + 400 + #define DECLARE_STATS_COUNTER(nm) \ 401 + static struct __iommu_counter nm = { \ 402 + .name = #nm, \ 403 + } 404 + 405 + #define INC_STATS_COUNTER(name) name.value += 1 406 + #define ADD_STATS_COUNTER(name, x) name.value += (x) 407 + #define SUB_STATS_COUNTER(name, x) name.value -= (x) 408 + 409 + #else /* CONFIG_AMD_IOMMU_STATS */ 410 + 411 + #define DECLARE_STATS_COUNTER(name) 412 + #define INC_STATS_COUNTER(name) 413 + #define ADD_STATS_COUNTER(name, x) 414 + #define SUB_STATS_COUNTER(name, x) 415 + 416 + static inline void amd_iommu_stats_init(void) { } 417 + 418 + #endif /* CONFIG_AMD_IOMMU_STATS */ 419 420 #endif /* _ASM_X86_AMD_IOMMU_TYPES_H */
+1 -1
arch/x86/include/asm/kvm_host.h
··· 360 struct list_head active_mmu_pages; 361 struct list_head assigned_dev_head; 362 struct list_head oos_global_pages; 363 - struct dmar_domain *intel_iommu_domain; 364 struct kvm_pic *vpic; 365 struct kvm_ioapic *vioapic; 366 struct kvm_pit *vpit;
··· 360 struct list_head active_mmu_pages; 361 struct list_head assigned_dev_head; 362 struct list_head oos_global_pages; 363 + struct iommu_domain *iommu_domain; 364 struct kvm_pic *vpic; 365 struct kvm_ioapic *vioapic; 366 struct kvm_pit *vpit;
+607 -59
arch/x86/kernel/amd_iommu.c
··· 20 #include <linux/pci.h> 21 #include <linux/gfp.h> 22 #include <linux/bitops.h> 23 #include <linux/scatterlist.h> 24 #include <linux/iommu-helper.h> 25 #include <asm/proto.h> 26 #include <asm/iommu.h> 27 #include <asm/gart.h> ··· 42 static LIST_HEAD(iommu_pd_list); 43 static DEFINE_SPINLOCK(iommu_pd_list_lock); 44 45 /* 46 * general struct to manage commands send to an IOMMU 47 */ ··· 55 56 static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, 57 struct unity_map_entry *e); 58 59 /* returns !0 if the IOMMU is caching non-present entries in its TLB */ 60 static int iommu_has_npcache(struct amd_iommu *iommu) ··· 259 spin_lock_irqsave(&iommu->lock, flags); 260 ret = __iommu_queue_command(iommu, cmd); 261 if (!ret) 262 - iommu->need_sync = 1; 263 spin_unlock_irqrestore(&iommu->lock, flags); 264 265 return ret; 266 } 267 268 /* 269 - * This function is called whenever we need to ensure that the IOMMU has 270 - * completed execution of all commands we sent. It sends a 271 - * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs 272 - * us about that by writing a value to a physical address we pass with 273 - * the command. 274 */ 275 - static int iommu_completion_wait(struct amd_iommu *iommu) 276 { 277 - int ret = 0, ready = 0; 278 unsigned status = 0; 279 - struct iommu_cmd cmd; 280 - unsigned long flags, i = 0; 281 282 - memset(&cmd, 0, sizeof(cmd)); 283 - cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; 284 - CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); 285 - 286 - spin_lock_irqsave(&iommu->lock, flags); 287 - 288 - if (!iommu->need_sync) 289 - goto out; 290 - 291 - iommu->need_sync = 0; 292 - 293 - ret = __iommu_queue_command(iommu, &cmd); 294 - 295 - if (ret) 296 - goto out; 297 298 while (!ready && (i < EXIT_LOOP_COUNT)) { 299 ++i; ··· 290 291 if (unlikely(i == EXIT_LOOP_COUNT)) 292 panic("AMD IOMMU: Completion wait loop failed\n"); 293 294 out: 295 spin_unlock_irqrestore(&iommu->lock, flags); ··· 358 return ret; 359 } 360 361 /* 362 * Generic command send function for invalidaing TLB entries 363 */ ··· 382 struct iommu_cmd cmd; 383 int ret; 384 385 - memset(&cmd, 0, sizeof(cmd)); 386 - address &= PAGE_MASK; 387 - CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES); 388 - cmd.data[1] |= domid; 389 - cmd.data[2] = lower_32_bits(address); 390 - cmd.data[3] = upper_32_bits(address); 391 - if (s) /* size bit - we flush more than one 4kb page */ 392 - cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; 393 - if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ 394 - cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; 395 396 ret = iommu_queue_command(iommu, &cmd); 397 ··· 421 { 422 u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; 423 424 iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); 425 } 426 427 /**************************************************************************** ··· 464 * supporting all features of AMD IOMMU page tables like level skipping 465 * and full 64 bit address spaces. 466 */ 467 - static int iommu_map(struct protection_domain *dom, 468 - unsigned long bus_addr, 469 - unsigned long phys_addr, 470 - int prot) 471 { 472 u64 __pte, *pte, *page; 473 ··· 512 *pte = __pte; 513 514 return 0; 515 } 516 517 /* ··· 588 589 for (addr = e->address_start; addr < e->address_end; 590 addr += PAGE_SIZE) { 591 - ret = iommu_map(&dma_dom->domain, addr, addr, e->prot); 592 if (ret) 593 return ret; 594 /* ··· 719 return id; 720 } 721 722 /* 723 * Used to reserve address ranges in the aperture (e.g. for exclusion 724 * ranges. ··· 745 iommu_area_reserve(dom->bitmap, start_page, pages); 746 } 747 748 - static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) 749 { 750 int i, j; 751 u64 *p1, *p2, *p3; 752 753 - p1 = dma_dom->domain.pt_root; 754 755 if (!p1) 756 return; ··· 771 } 772 773 free_page((unsigned long)p1); 774 } 775 776 /* ··· 784 if (!dom) 785 return; 786 787 - dma_ops_free_pagetable(dom); 788 789 kfree(dom->pte_pages); 790 ··· 823 goto free_dma_dom; 824 dma_dom->domain.mode = PAGE_MODE_3_LEVEL; 825 dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); 826 dma_dom->domain.priv = dma_dom; 827 if (!dma_dom->domain.pt_root) 828 goto free_dma_dom; ··· 886 } 887 888 /* 889 * Find out the protection domain structure for a given PCI device. This 890 * will give us the pointer to the page table root for example. 891 */ ··· 914 * If a device is not yet associated with a domain, this function does 915 * assigns it visible for the hardware 916 */ 917 - static void set_device_domain(struct amd_iommu *iommu, 918 - struct protection_domain *domain, 919 - u16 devid) 920 { 921 unsigned long flags; 922 - 923 u64 pte_root = virt_to_phys(domain->pt_root); 924 925 pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) 926 << DEV_ENTRY_MODE_SHIFT; ··· 937 938 iommu_queue_inv_dev_entry(iommu, devid); 939 } 940 941 /***************************************************************************** 942 * ··· 1083 list_for_each_entry(entry, &iommu_pd_list, list) { 1084 if (entry->target_dev == devid) { 1085 ret = entry; 1086 - list_del(&ret->list); 1087 break; 1088 } 1089 } ··· 1133 if (!dma_dom) 1134 dma_dom = (*iommu)->default_dom; 1135 *domain = &dma_dom->domain; 1136 - set_device_domain(*iommu, *domain, *bdf); 1137 printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " 1138 - "device ", (*domain)->id); 1139 - print_devid(_bdf, 1); 1140 } 1141 1142 if (domain_for_device(_bdf) == NULL) 1143 - set_device_domain(*iommu, *domain, _bdf); 1144 1145 return 1; 1146 } ··· 1225 pages = iommu_num_pages(paddr, size, PAGE_SIZE); 1226 paddr &= PAGE_MASK; 1227 1228 if (align) 1229 align_mask = (1UL << get_order(size)) - 1; 1230 ··· 1245 start += PAGE_SIZE; 1246 } 1247 address += offset; 1248 1249 if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { 1250 iommu_flush_tlb(iommu, dma_dom->domain.id); ··· 1284 start += PAGE_SIZE; 1285 } 1286 1287 dma_ops_free_addresses(dma_dom, dma_addr, pages); 1288 1289 if (amd_iommu_unmap_flush || dma_dom->need_flush) { ··· 1307 dma_addr_t addr; 1308 u64 dma_mask; 1309 1310 if (!check_device(dev)) 1311 return bad_dma_address; 1312 ··· 1319 if (iommu == NULL || domain == NULL) 1320 /* device not handled by any AMD IOMMU */ 1321 return (dma_addr_t)paddr; 1322 1323 spin_lock_irqsave(&domain->lock, flags); 1324 addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, ··· 1348 struct protection_domain *domain; 1349 u16 devid; 1350 1351 if (!check_device(dev) || 1352 !get_device_resources(dev, &iommu, &domain, &devid)) 1353 /* device not handled by any AMD IOMMU */ 1354 return; 1355 1356 spin_lock_irqsave(&domain->lock, flags); ··· 1402 int mapped_elems = 0; 1403 u64 dma_mask; 1404 1405 if (!check_device(dev)) 1406 return 0; 1407 ··· 1413 1414 if (!iommu || !domain) 1415 return map_sg_no_iommu(dev, sglist, nelems, dir); 1416 1417 spin_lock_irqsave(&domain->lock, flags); 1418 ··· 1466 u16 devid; 1467 int i; 1468 1469 if (!check_device(dev) || 1470 !get_device_resources(dev, &iommu, &domain, &devid)) 1471 return; 1472 1473 spin_lock_irqsave(&domain->lock, flags); ··· 1502 phys_addr_t paddr; 1503 u64 dma_mask = dev->coherent_dma_mask; 1504 1505 if (!check_device(dev)) 1506 return NULL; 1507 ··· 1522 return virt_addr; 1523 } 1524 1525 if (!dma_mask) 1526 dma_mask = *dev->dma_mask; 1527 ··· 1533 *dma_addr = __map_single(dev, iommu, domain->priv, paddr, 1534 size, DMA_BIDIRECTIONAL, true, dma_mask); 1535 1536 - if (*dma_addr == bad_dma_address) { 1537 - free_pages((unsigned long)virt_addr, get_order(size)); 1538 - virt_addr = NULL; 1539 - goto out; 1540 - } 1541 1542 iommu_completion_wait(iommu); 1543 1544 - out: 1545 spin_unlock_irqrestore(&domain->lock, flags); 1546 1547 return virt_addr; 1548 } 1549 1550 /* ··· 1560 struct protection_domain *domain; 1561 u16 devid; 1562 1563 if (!check_device(dev)) 1564 return; 1565 1566 get_device_resources(dev, &iommu, &domain, &devid); 1567 1568 if (!iommu || !domain) 1569 goto free_mem; 1570 1571 spin_lock_irqsave(&domain->lock, flags); ··· 1625 u16 devid; 1626 1627 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { 1628 - devid = (dev->bus->number << 8) | dev->devfn; 1629 if (devid > amd_iommu_last_bdf) 1630 continue; 1631 devid = amd_iommu_alias_table[devid]; ··· 1672 iommu->default_dom = dma_ops_domain_alloc(iommu, order); 1673 if (iommu->default_dom == NULL) 1674 return -ENOMEM; 1675 ret = iommu_init_unity_mappings(iommu); 1676 if (ret) 1677 goto free_domains; ··· 1696 /* Make the driver finally visible to the drivers */ 1697 dma_ops = &amd_iommu_dma_ops; 1698 1699 return 0; 1700 1701 free_domains: ··· 1713 1714 return ret; 1715 }
··· 20 #include <linux/pci.h> 21 #include <linux/gfp.h> 22 #include <linux/bitops.h> 23 + #include <linux/debugfs.h> 24 #include <linux/scatterlist.h> 25 #include <linux/iommu-helper.h> 26 + #ifdef CONFIG_IOMMU_API 27 + #include <linux/iommu.h> 28 + #endif 29 #include <asm/proto.h> 30 #include <asm/iommu.h> 31 #include <asm/gart.h> ··· 38 static LIST_HEAD(iommu_pd_list); 39 static DEFINE_SPINLOCK(iommu_pd_list_lock); 40 41 + #ifdef CONFIG_IOMMU_API 42 + static struct iommu_ops amd_iommu_ops; 43 + #endif 44 + 45 /* 46 * general struct to manage commands send to an IOMMU 47 */ ··· 47 48 static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, 49 struct unity_map_entry *e); 50 + static struct dma_ops_domain *find_protection_domain(u16 devid); 51 + 52 + 53 + #ifdef CONFIG_AMD_IOMMU_STATS 54 + 55 + /* 56 + * Initialization code for statistics collection 57 + */ 58 + 59 + DECLARE_STATS_COUNTER(compl_wait); 60 + DECLARE_STATS_COUNTER(cnt_map_single); 61 + DECLARE_STATS_COUNTER(cnt_unmap_single); 62 + DECLARE_STATS_COUNTER(cnt_map_sg); 63 + DECLARE_STATS_COUNTER(cnt_unmap_sg); 64 + DECLARE_STATS_COUNTER(cnt_alloc_coherent); 65 + DECLARE_STATS_COUNTER(cnt_free_coherent); 66 + DECLARE_STATS_COUNTER(cross_page); 67 + DECLARE_STATS_COUNTER(domain_flush_single); 68 + DECLARE_STATS_COUNTER(domain_flush_all); 69 + DECLARE_STATS_COUNTER(alloced_io_mem); 70 + DECLARE_STATS_COUNTER(total_map_requests); 71 + 72 + static struct dentry *stats_dir; 73 + static struct dentry *de_isolate; 74 + static struct dentry *de_fflush; 75 + 76 + static void amd_iommu_stats_add(struct __iommu_counter *cnt) 77 + { 78 + if (stats_dir == NULL) 79 + return; 80 + 81 + cnt->dent = debugfs_create_u64(cnt->name, 0444, stats_dir, 82 + &cnt->value); 83 + } 84 + 85 + static void amd_iommu_stats_init(void) 86 + { 87 + stats_dir = debugfs_create_dir("amd-iommu", NULL); 88 + if (stats_dir == NULL) 89 + return; 90 + 91 + de_isolate = debugfs_create_bool("isolation", 0444, stats_dir, 92 + (u32 *)&amd_iommu_isolate); 93 + 94 + de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir, 95 + (u32 *)&amd_iommu_unmap_flush); 96 + 97 + amd_iommu_stats_add(&compl_wait); 98 + amd_iommu_stats_add(&cnt_map_single); 99 + amd_iommu_stats_add(&cnt_unmap_single); 100 + amd_iommu_stats_add(&cnt_map_sg); 101 + amd_iommu_stats_add(&cnt_unmap_sg); 102 + amd_iommu_stats_add(&cnt_alloc_coherent); 103 + amd_iommu_stats_add(&cnt_free_coherent); 104 + amd_iommu_stats_add(&cross_page); 105 + amd_iommu_stats_add(&domain_flush_single); 106 + amd_iommu_stats_add(&domain_flush_all); 107 + amd_iommu_stats_add(&alloced_io_mem); 108 + amd_iommu_stats_add(&total_map_requests); 109 + } 110 + 111 + #endif 112 113 /* returns !0 if the IOMMU is caching non-present entries in its TLB */ 114 static int iommu_has_npcache(struct amd_iommu *iommu) ··· 189 spin_lock_irqsave(&iommu->lock, flags); 190 ret = __iommu_queue_command(iommu, cmd); 191 if (!ret) 192 + iommu->need_sync = true; 193 spin_unlock_irqrestore(&iommu->lock, flags); 194 195 return ret; 196 } 197 198 /* 199 + * This function waits until an IOMMU has completed a completion 200 + * wait command 201 */ 202 + static void __iommu_wait_for_completion(struct amd_iommu *iommu) 203 { 204 + int ready = 0; 205 unsigned status = 0; 206 + unsigned long i = 0; 207 208 + INC_STATS_COUNTER(compl_wait); 209 210 while (!ready && (i < EXIT_LOOP_COUNT)) { 211 ++i; ··· 238 239 if (unlikely(i == EXIT_LOOP_COUNT)) 240 panic("AMD IOMMU: Completion wait loop failed\n"); 241 + } 242 + 243 + /* 244 + * This function queues a completion wait command into the command 245 + * buffer of an IOMMU 246 + */ 247 + static int __iommu_completion_wait(struct amd_iommu *iommu) 248 + { 249 + struct iommu_cmd cmd; 250 + 251 + memset(&cmd, 0, sizeof(cmd)); 252 + cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; 253 + CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); 254 + 255 + return __iommu_queue_command(iommu, &cmd); 256 + } 257 + 258 + /* 259 + * This function is called whenever we need to ensure that the IOMMU has 260 + * completed execution of all commands we sent. It sends a 261 + * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs 262 + * us about that by writing a value to a physical address we pass with 263 + * the command. 264 + */ 265 + static int iommu_completion_wait(struct amd_iommu *iommu) 266 + { 267 + int ret = 0; 268 + unsigned long flags; 269 + 270 + spin_lock_irqsave(&iommu->lock, flags); 271 + 272 + if (!iommu->need_sync) 273 + goto out; 274 + 275 + ret = __iommu_completion_wait(iommu); 276 + 277 + iommu->need_sync = false; 278 + 279 + if (ret) 280 + goto out; 281 + 282 + __iommu_wait_for_completion(iommu); 283 284 out: 285 spin_unlock_irqrestore(&iommu->lock, flags); ··· 264 return ret; 265 } 266 267 + static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, 268 + u16 domid, int pde, int s) 269 + { 270 + memset(cmd, 0, sizeof(*cmd)); 271 + address &= PAGE_MASK; 272 + CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES); 273 + cmd->data[1] |= domid; 274 + cmd->data[2] = lower_32_bits(address); 275 + cmd->data[3] = upper_32_bits(address); 276 + if (s) /* size bit - we flush more than one 4kb page */ 277 + cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; 278 + if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ 279 + cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; 280 + } 281 + 282 /* 283 * Generic command send function for invalidaing TLB entries 284 */ ··· 273 struct iommu_cmd cmd; 274 int ret; 275 276 + __iommu_build_inv_iommu_pages(&cmd, address, domid, pde, s); 277 278 ret = iommu_queue_command(iommu, &cmd); 279 ··· 321 { 322 u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; 323 324 + INC_STATS_COUNTER(domain_flush_single); 325 + 326 iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); 327 + } 328 + 329 + /* 330 + * This function is used to flush the IO/TLB for a given protection domain 331 + * on every IOMMU in the system 332 + */ 333 + static void iommu_flush_domain(u16 domid) 334 + { 335 + unsigned long flags; 336 + struct amd_iommu *iommu; 337 + struct iommu_cmd cmd; 338 + 339 + INC_STATS_COUNTER(domain_flush_all); 340 + 341 + __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 342 + domid, 1, 1); 343 + 344 + list_for_each_entry(iommu, &amd_iommu_list, list) { 345 + spin_lock_irqsave(&iommu->lock, flags); 346 + __iommu_queue_command(iommu, &cmd); 347 + __iommu_completion_wait(iommu); 348 + __iommu_wait_for_completion(iommu); 349 + spin_unlock_irqrestore(&iommu->lock, flags); 350 + } 351 } 352 353 /**************************************************************************** ··· 338 * supporting all features of AMD IOMMU page tables like level skipping 339 * and full 64 bit address spaces. 340 */ 341 + static int iommu_map_page(struct protection_domain *dom, 342 + unsigned long bus_addr, 343 + unsigned long phys_addr, 344 + int prot) 345 { 346 u64 __pte, *pte, *page; 347 ··· 386 *pte = __pte; 387 388 return 0; 389 + } 390 + 391 + static void iommu_unmap_page(struct protection_domain *dom, 392 + unsigned long bus_addr) 393 + { 394 + u64 *pte; 395 + 396 + pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)]; 397 + 398 + if (!IOMMU_PTE_PRESENT(*pte)) 399 + return; 400 + 401 + pte = IOMMU_PTE_PAGE(*pte); 402 + pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)]; 403 + 404 + if (!IOMMU_PTE_PRESENT(*pte)) 405 + return; 406 + 407 + pte = IOMMU_PTE_PAGE(*pte); 408 + pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)]; 409 + 410 + *pte = 0; 411 } 412 413 /* ··· 440 441 for (addr = e->address_start; addr < e->address_end; 442 addr += PAGE_SIZE) { 443 + ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot); 444 if (ret) 445 return ret; 446 /* ··· 571 return id; 572 } 573 574 + static void domain_id_free(int id) 575 + { 576 + unsigned long flags; 577 + 578 + write_lock_irqsave(&amd_iommu_devtable_lock, flags); 579 + if (id > 0 && id < MAX_DOMAIN_ID) 580 + __clear_bit(id, amd_iommu_pd_alloc_bitmap); 581 + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 582 + } 583 + 584 /* 585 * Used to reserve address ranges in the aperture (e.g. for exclusion 586 * ranges. ··· 587 iommu_area_reserve(dom->bitmap, start_page, pages); 588 } 589 590 + static void free_pagetable(struct protection_domain *domain) 591 { 592 int i, j; 593 u64 *p1, *p2, *p3; 594 595 + p1 = domain->pt_root; 596 597 if (!p1) 598 return; ··· 613 } 614 615 free_page((unsigned long)p1); 616 + 617 + domain->pt_root = NULL; 618 } 619 620 /* ··· 624 if (!dom) 625 return; 626 627 + free_pagetable(&dom->domain); 628 629 kfree(dom->pte_pages); 630 ··· 663 goto free_dma_dom; 664 dma_dom->domain.mode = PAGE_MODE_3_LEVEL; 665 dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); 666 + dma_dom->domain.flags = PD_DMA_OPS_MASK; 667 dma_dom->domain.priv = dma_dom; 668 if (!dma_dom->domain.pt_root) 669 goto free_dma_dom; ··· 725 } 726 727 /* 728 + * little helper function to check whether a given protection domain is a 729 + * dma_ops domain 730 + */ 731 + static bool dma_ops_domain(struct protection_domain *domain) 732 + { 733 + return domain->flags & PD_DMA_OPS_MASK; 734 + } 735 + 736 + /* 737 * Find out the protection domain structure for a given PCI device. This 738 * will give us the pointer to the page table root for example. 739 */ ··· 744 * If a device is not yet associated with a domain, this function does 745 * assigns it visible for the hardware 746 */ 747 + static void attach_device(struct amd_iommu *iommu, 748 + struct protection_domain *domain, 749 + u16 devid) 750 { 751 unsigned long flags; 752 u64 pte_root = virt_to_phys(domain->pt_root); 753 + 754 + domain->dev_cnt += 1; 755 756 pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) 757 << DEV_ENTRY_MODE_SHIFT; ··· 766 767 iommu_queue_inv_dev_entry(iommu, devid); 768 } 769 + 770 + /* 771 + * Removes a device from a protection domain (unlocked) 772 + */ 773 + static void __detach_device(struct protection_domain *domain, u16 devid) 774 + { 775 + 776 + /* lock domain */ 777 + spin_lock(&domain->lock); 778 + 779 + /* remove domain from the lookup table */ 780 + amd_iommu_pd_table[devid] = NULL; 781 + 782 + /* remove entry from the device table seen by the hardware */ 783 + amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; 784 + amd_iommu_dev_table[devid].data[1] = 0; 785 + amd_iommu_dev_table[devid].data[2] = 0; 786 + 787 + /* decrease reference counter */ 788 + domain->dev_cnt -= 1; 789 + 790 + /* ready */ 791 + spin_unlock(&domain->lock); 792 + } 793 + 794 + /* 795 + * Removes a device from a protection domain (with devtable_lock held) 796 + */ 797 + static void detach_device(struct protection_domain *domain, u16 devid) 798 + { 799 + unsigned long flags; 800 + 801 + /* lock device table */ 802 + write_lock_irqsave(&amd_iommu_devtable_lock, flags); 803 + __detach_device(domain, devid); 804 + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 805 + } 806 + 807 + static int device_change_notifier(struct notifier_block *nb, 808 + unsigned long action, void *data) 809 + { 810 + struct device *dev = data; 811 + struct pci_dev *pdev = to_pci_dev(dev); 812 + u16 devid = calc_devid(pdev->bus->number, pdev->devfn); 813 + struct protection_domain *domain; 814 + struct dma_ops_domain *dma_domain; 815 + struct amd_iommu *iommu; 816 + int order = amd_iommu_aperture_order; 817 + unsigned long flags; 818 + 819 + if (devid > amd_iommu_last_bdf) 820 + goto out; 821 + 822 + devid = amd_iommu_alias_table[devid]; 823 + 824 + iommu = amd_iommu_rlookup_table[devid]; 825 + if (iommu == NULL) 826 + goto out; 827 + 828 + domain = domain_for_device(devid); 829 + 830 + if (domain && !dma_ops_domain(domain)) 831 + WARN_ONCE(1, "AMD IOMMU WARNING: device %s already bound " 832 + "to a non-dma-ops domain\n", dev_name(dev)); 833 + 834 + switch (action) { 835 + case BUS_NOTIFY_BOUND_DRIVER: 836 + if (domain) 837 + goto out; 838 + dma_domain = find_protection_domain(devid); 839 + if (!dma_domain) 840 + dma_domain = iommu->default_dom; 841 + attach_device(iommu, &dma_domain->domain, devid); 842 + printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " 843 + "device %s\n", dma_domain->domain.id, dev_name(dev)); 844 + break; 845 + case BUS_NOTIFY_UNBIND_DRIVER: 846 + if (!domain) 847 + goto out; 848 + detach_device(domain, devid); 849 + break; 850 + case BUS_NOTIFY_ADD_DEVICE: 851 + /* allocate a protection domain if a device is added */ 852 + dma_domain = find_protection_domain(devid); 853 + if (dma_domain) 854 + goto out; 855 + dma_domain = dma_ops_domain_alloc(iommu, order); 856 + if (!dma_domain) 857 + goto out; 858 + dma_domain->target_dev = devid; 859 + 860 + spin_lock_irqsave(&iommu_pd_list_lock, flags); 861 + list_add_tail(&dma_domain->list, &iommu_pd_list); 862 + spin_unlock_irqrestore(&iommu_pd_list_lock, flags); 863 + 864 + break; 865 + default: 866 + goto out; 867 + } 868 + 869 + iommu_queue_inv_dev_entry(iommu, devid); 870 + iommu_completion_wait(iommu); 871 + 872 + out: 873 + return 0; 874 + } 875 + 876 + struct notifier_block device_nb = { 877 + .notifier_call = device_change_notifier, 878 + }; 879 880 /***************************************************************************** 881 * ··· 802 list_for_each_entry(entry, &iommu_pd_list, list) { 803 if (entry->target_dev == devid) { 804 ret = entry; 805 break; 806 } 807 } ··· 853 if (!dma_dom) 854 dma_dom = (*iommu)->default_dom; 855 *domain = &dma_dom->domain; 856 + attach_device(*iommu, *domain, *bdf); 857 printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " 858 + "device %s\n", (*domain)->id, dev_name(dev)); 859 } 860 861 if (domain_for_device(_bdf) == NULL) 862 + attach_device(*iommu, *domain, _bdf); 863 864 return 1; 865 } ··· 946 pages = iommu_num_pages(paddr, size, PAGE_SIZE); 947 paddr &= PAGE_MASK; 948 949 + INC_STATS_COUNTER(total_map_requests); 950 + 951 + if (pages > 1) 952 + INC_STATS_COUNTER(cross_page); 953 + 954 if (align) 955 align_mask = (1UL << get_order(size)) - 1; 956 ··· 961 start += PAGE_SIZE; 962 } 963 address += offset; 964 + 965 + ADD_STATS_COUNTER(alloced_io_mem, size); 966 967 if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { 968 iommu_flush_tlb(iommu, dma_dom->domain.id); ··· 998 start += PAGE_SIZE; 999 } 1000 1001 + SUB_STATS_COUNTER(alloced_io_mem, size); 1002 + 1003 dma_ops_free_addresses(dma_dom, dma_addr, pages); 1004 1005 if (amd_iommu_unmap_flush || dma_dom->need_flush) { ··· 1019 dma_addr_t addr; 1020 u64 dma_mask; 1021 1022 + INC_STATS_COUNTER(cnt_map_single); 1023 + 1024 if (!check_device(dev)) 1025 return bad_dma_address; 1026 ··· 1029 if (iommu == NULL || domain == NULL) 1030 /* device not handled by any AMD IOMMU */ 1031 return (dma_addr_t)paddr; 1032 + 1033 + if (!dma_ops_domain(domain)) 1034 + return bad_dma_address; 1035 1036 spin_lock_irqsave(&domain->lock, flags); 1037 addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, ··· 1055 struct protection_domain *domain; 1056 u16 devid; 1057 1058 + INC_STATS_COUNTER(cnt_unmap_single); 1059 + 1060 if (!check_device(dev) || 1061 !get_device_resources(dev, &iommu, &domain, &devid)) 1062 /* device not handled by any AMD IOMMU */ 1063 + return; 1064 + 1065 + if (!dma_ops_domain(domain)) 1066 return; 1067 1068 spin_lock_irqsave(&domain->lock, flags); ··· 1104 int mapped_elems = 0; 1105 u64 dma_mask; 1106 1107 + INC_STATS_COUNTER(cnt_map_sg); 1108 + 1109 if (!check_device(dev)) 1110 return 0; 1111 ··· 1113 1114 if (!iommu || !domain) 1115 return map_sg_no_iommu(dev, sglist, nelems, dir); 1116 + 1117 + if (!dma_ops_domain(domain)) 1118 + return 0; 1119 1120 spin_lock_irqsave(&domain->lock, flags); 1121 ··· 1163 u16 devid; 1164 int i; 1165 1166 + INC_STATS_COUNTER(cnt_unmap_sg); 1167 + 1168 if (!check_device(dev) || 1169 !get_device_resources(dev, &iommu, &domain, &devid)) 1170 + return; 1171 + 1172 + if (!dma_ops_domain(domain)) 1173 return; 1174 1175 spin_lock_irqsave(&domain->lock, flags); ··· 1194 phys_addr_t paddr; 1195 u64 dma_mask = dev->coherent_dma_mask; 1196 1197 + INC_STATS_COUNTER(cnt_alloc_coherent); 1198 + 1199 if (!check_device(dev)) 1200 return NULL; 1201 ··· 1212 return virt_addr; 1213 } 1214 1215 + if (!dma_ops_domain(domain)) 1216 + goto out_free; 1217 + 1218 if (!dma_mask) 1219 dma_mask = *dev->dma_mask; 1220 ··· 1220 *dma_addr = __map_single(dev, iommu, domain->priv, paddr, 1221 size, DMA_BIDIRECTIONAL, true, dma_mask); 1222 1223 + if (*dma_addr == bad_dma_address) 1224 + goto out_free; 1225 1226 iommu_completion_wait(iommu); 1227 1228 spin_unlock_irqrestore(&domain->lock, flags); 1229 1230 return virt_addr; 1231 + 1232 + out_free: 1233 + 1234 + free_pages((unsigned long)virt_addr, get_order(size)); 1235 + 1236 + return NULL; 1237 } 1238 1239 /* ··· 1245 struct protection_domain *domain; 1246 u16 devid; 1247 1248 + INC_STATS_COUNTER(cnt_free_coherent); 1249 + 1250 if (!check_device(dev)) 1251 return; 1252 1253 get_device_resources(dev, &iommu, &domain, &devid); 1254 1255 if (!iommu || !domain) 1256 + goto free_mem; 1257 + 1258 + if (!dma_ops_domain(domain)) 1259 goto free_mem; 1260 1261 spin_lock_irqsave(&domain->lock, flags); ··· 1305 u16 devid; 1306 1307 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { 1308 + devid = calc_devid(dev->bus->number, dev->devfn); 1309 if (devid > amd_iommu_last_bdf) 1310 continue; 1311 devid = amd_iommu_alias_table[devid]; ··· 1352 iommu->default_dom = dma_ops_domain_alloc(iommu, order); 1353 if (iommu->default_dom == NULL) 1354 return -ENOMEM; 1355 + iommu->default_dom->domain.flags |= PD_DEFAULT_MASK; 1356 ret = iommu_init_unity_mappings(iommu); 1357 if (ret) 1358 goto free_domains; ··· 1375 /* Make the driver finally visible to the drivers */ 1376 dma_ops = &amd_iommu_dma_ops; 1377 1378 + register_iommu(&amd_iommu_ops); 1379 + 1380 + bus_register_notifier(&pci_bus_type, &device_nb); 1381 + 1382 + amd_iommu_stats_init(); 1383 + 1384 return 0; 1385 1386 free_domains: ··· 1386 1387 return ret; 1388 } 1389 + 1390 + /***************************************************************************** 1391 + * 1392 + * The following functions belong to the exported interface of AMD IOMMU 1393 + * 1394 + * This interface allows access to lower level functions of the IOMMU 1395 + * like protection domain handling and assignement of devices to domains 1396 + * which is not possible with the dma_ops interface. 1397 + * 1398 + *****************************************************************************/ 1399 + 1400 + static void cleanup_domain(struct protection_domain *domain) 1401 + { 1402 + unsigned long flags; 1403 + u16 devid; 1404 + 1405 + write_lock_irqsave(&amd_iommu_devtable_lock, flags); 1406 + 1407 + for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) 1408 + if (amd_iommu_pd_table[devid] == domain) 1409 + __detach_device(domain, devid); 1410 + 1411 + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 1412 + } 1413 + 1414 + static int amd_iommu_domain_init(struct iommu_domain *dom) 1415 + { 1416 + struct protection_domain *domain; 1417 + 1418 + domain = kzalloc(sizeof(*domain), GFP_KERNEL); 1419 + if (!domain) 1420 + return -ENOMEM; 1421 + 1422 + spin_lock_init(&domain->lock); 1423 + domain->mode = PAGE_MODE_3_LEVEL; 1424 + domain->id = domain_id_alloc(); 1425 + if (!domain->id) 1426 + goto out_free; 1427 + domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); 1428 + if (!domain->pt_root) 1429 + goto out_free; 1430 + 1431 + dom->priv = domain; 1432 + 1433 + return 0; 1434 + 1435 + out_free: 1436 + kfree(domain); 1437 + 1438 + return -ENOMEM; 1439 + } 1440 + 1441 + static void amd_iommu_domain_destroy(struct iommu_domain *dom) 1442 + { 1443 + struct protection_domain *domain = dom->priv; 1444 + 1445 + if (!domain) 1446 + return; 1447 + 1448 + if (domain->dev_cnt > 0) 1449 + cleanup_domain(domain); 1450 + 1451 + BUG_ON(domain->dev_cnt != 0); 1452 + 1453 + free_pagetable(domain); 1454 + 1455 + domain_id_free(domain->id); 1456 + 1457 + kfree(domain); 1458 + 1459 + dom->priv = NULL; 1460 + } 1461 + 1462 + static void amd_iommu_detach_device(struct iommu_domain *dom, 1463 + struct device *dev) 1464 + { 1465 + struct protection_domain *domain = dom->priv; 1466 + struct amd_iommu *iommu; 1467 + struct pci_dev *pdev; 1468 + u16 devid; 1469 + 1470 + if (dev->bus != &pci_bus_type) 1471 + return; 1472 + 1473 + pdev = to_pci_dev(dev); 1474 + 1475 + devid = calc_devid(pdev->bus->number, pdev->devfn); 1476 + 1477 + if (devid > 0) 1478 + detach_device(domain, devid); 1479 + 1480 + iommu = amd_iommu_rlookup_table[devid]; 1481 + if (!iommu) 1482 + return; 1483 + 1484 + iommu_queue_inv_dev_entry(iommu, devid); 1485 + iommu_completion_wait(iommu); 1486 + } 1487 + 1488 + static int amd_iommu_attach_device(struct iommu_domain *dom, 1489 + struct device *dev) 1490 + { 1491 + struct protection_domain *domain = dom->priv; 1492 + struct protection_domain *old_domain; 1493 + struct amd_iommu *iommu; 1494 + struct pci_dev *pdev; 1495 + u16 devid; 1496 + 1497 + if (dev->bus != &pci_bus_type) 1498 + return -EINVAL; 1499 + 1500 + pdev = to_pci_dev(dev); 1501 + 1502 + devid = calc_devid(pdev->bus->number, pdev->devfn); 1503 + 1504 + if (devid >= amd_iommu_last_bdf || 1505 + devid != amd_iommu_alias_table[devid]) 1506 + return -EINVAL; 1507 + 1508 + iommu = amd_iommu_rlookup_table[devid]; 1509 + if (!iommu) 1510 + return -EINVAL; 1511 + 1512 + old_domain = domain_for_device(devid); 1513 + if (old_domain) 1514 + return -EBUSY; 1515 + 1516 + attach_device(iommu, domain, devid); 1517 + 1518 + iommu_completion_wait(iommu); 1519 + 1520 + return 0; 1521 + } 1522 + 1523 + static int amd_iommu_map_range(struct iommu_domain *dom, 1524 + unsigned long iova, phys_addr_t paddr, 1525 + size_t size, int iommu_prot) 1526 + { 1527 + struct protection_domain *domain = dom->priv; 1528 + unsigned long i, npages = iommu_num_pages(paddr, size, PAGE_SIZE); 1529 + int prot = 0; 1530 + int ret; 1531 + 1532 + if (iommu_prot & IOMMU_READ) 1533 + prot |= IOMMU_PROT_IR; 1534 + if (iommu_prot & IOMMU_WRITE) 1535 + prot |= IOMMU_PROT_IW; 1536 + 1537 + iova &= PAGE_MASK; 1538 + paddr &= PAGE_MASK; 1539 + 1540 + for (i = 0; i < npages; ++i) { 1541 + ret = iommu_map_page(domain, iova, paddr, prot); 1542 + if (ret) 1543 + return ret; 1544 + 1545 + iova += PAGE_SIZE; 1546 + paddr += PAGE_SIZE; 1547 + } 1548 + 1549 + return 0; 1550 + } 1551 + 1552 + static void amd_iommu_unmap_range(struct iommu_domain *dom, 1553 + unsigned long iova, size_t size) 1554 + { 1555 + 1556 + struct protection_domain *domain = dom->priv; 1557 + unsigned long i, npages = iommu_num_pages(iova, size, PAGE_SIZE); 1558 + 1559 + iova &= PAGE_MASK; 1560 + 1561 + for (i = 0; i < npages; ++i) { 1562 + iommu_unmap_page(domain, iova); 1563 + iova += PAGE_SIZE; 1564 + } 1565 + 1566 + iommu_flush_domain(domain->id); 1567 + } 1568 + 1569 + static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, 1570 + unsigned long iova) 1571 + { 1572 + struct protection_domain *domain = dom->priv; 1573 + unsigned long offset = iova & ~PAGE_MASK; 1574 + phys_addr_t paddr; 1575 + u64 *pte; 1576 + 1577 + pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(iova)]; 1578 + 1579 + if (!IOMMU_PTE_PRESENT(*pte)) 1580 + return 0; 1581 + 1582 + pte = IOMMU_PTE_PAGE(*pte); 1583 + pte = &pte[IOMMU_PTE_L1_INDEX(iova)]; 1584 + 1585 + if (!IOMMU_PTE_PRESENT(*pte)) 1586 + return 0; 1587 + 1588 + pte = IOMMU_PTE_PAGE(*pte); 1589 + pte = &pte[IOMMU_PTE_L0_INDEX(iova)]; 1590 + 1591 + if (!IOMMU_PTE_PRESENT(*pte)) 1592 + return 0; 1593 + 1594 + paddr = *pte & IOMMU_PAGE_MASK; 1595 + paddr |= offset; 1596 + 1597 + return paddr; 1598 + } 1599 + 1600 + static struct iommu_ops amd_iommu_ops = { 1601 + .domain_init = amd_iommu_domain_init, 1602 + .domain_destroy = amd_iommu_domain_destroy, 1603 + .attach_dev = amd_iommu_attach_device, 1604 + .detach_dev = amd_iommu_detach_device, 1605 + .map = amd_iommu_map_range, 1606 + .unmap = amd_iommu_unmap_range, 1607 + .iova_to_phys = amd_iommu_iova_to_phys, 1608 + }; 1609 +
+6 -9
arch/x86/kernel/amd_iommu_init.c
··· 122 LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings 123 we find in ACPI */ 124 unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ 125 - int amd_iommu_isolate = 1; /* if 1, device isolation is enabled */ 126 bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ 127 128 LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the ··· 246 /* Function to enable the hardware */ 247 static void __init iommu_enable(struct amd_iommu *iommu) 248 { 249 - printk(KERN_INFO "AMD IOMMU: Enabling IOMMU " 250 - "at %02x:%02x.%x cap 0x%hx\n", 251 - iommu->dev->bus->number, 252 - PCI_SLOT(iommu->dev->devfn), 253 - PCI_FUNC(iommu->dev->devfn), 254 - iommu->cap_ptr); 255 256 iommu_feature_enable(iommu, CONTROL_IOMMU_EN); 257 } ··· 1215 { 1216 for (; *str; ++str) { 1217 if (strncmp(str, "isolate", 7) == 0) 1218 - amd_iommu_isolate = 1; 1219 if (strncmp(str, "share", 5) == 0) 1220 - amd_iommu_isolate = 0; 1221 if (strncmp(str, "fullflush", 9) == 0) 1222 amd_iommu_unmap_flush = true; 1223 }
··· 122 LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings 123 we find in ACPI */ 124 unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ 125 + bool amd_iommu_isolate = true; /* if true, device isolation is 126 + enabled */ 127 bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ 128 129 LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the ··· 245 /* Function to enable the hardware */ 246 static void __init iommu_enable(struct amd_iommu *iommu) 247 { 248 + printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n", 249 + dev_name(&iommu->dev->dev), iommu->cap_ptr); 250 251 iommu_feature_enable(iommu, CONTROL_IOMMU_EN); 252 } ··· 1218 { 1219 for (; *str; ++str) { 1220 if (strncmp(str, "isolate", 7) == 0) 1221 + amd_iommu_isolate = true; 1222 if (strncmp(str, "share", 5) == 0) 1223 + amd_iommu_isolate = false; 1224 if (strncmp(str, "fullflush", 9) == 0) 1225 amd_iommu_unmap_flush = true; 1226 }
+2 -2
arch/x86/kvm/Makefile
··· 7 ifeq ($(CONFIG_KVM_TRACE),y) 8 common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o) 9 endif 10 - ifeq ($(CONFIG_DMAR),y) 11 - common-objs += $(addprefix ../../../virt/kvm/, vtd.o) 12 endif 13 14 EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
··· 7 ifeq ($(CONFIG_KVM_TRACE),y) 8 common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o) 9 endif 10 + ifeq ($(CONFIG_IOMMU_API),y) 11 + common-objs += $(addprefix ../../../virt/kvm/, iommu.o) 12 endif 13 14 EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
+2 -1
arch/x86/kvm/x86.c
··· 34 #include <linux/module.h> 35 #include <linux/mman.h> 36 #include <linux/highmem.h> 37 #include <linux/intel-iommu.h> 38 39 #include <asm/uaccess.h> ··· 990 r = !tdp_enabled; 991 break; 992 case KVM_CAP_IOMMU: 993 - r = intel_iommu_found(); 994 break; 995 default: 996 r = 0;
··· 34 #include <linux/module.h> 35 #include <linux/mman.h> 36 #include <linux/highmem.h> 37 + #include <linux/iommu.h> 38 #include <linux/intel-iommu.h> 39 40 #include <asm/uaccess.h> ··· 989 r = !tdp_enabled; 990 break; 991 case KVM_CAP_IOMMU: 992 + r = iommu_found(); 993 break; 994 default: 995 r = 0;
+1
drivers/base/Makefile
··· 11 obj-$(CONFIG_NUMA) += node.o 12 obj-$(CONFIG_MEMORY_HOTPLUG_SPARSE) += memory.o 13 obj-$(CONFIG_SMP) += topology.o 14 ifeq ($(CONFIG_SYSFS),y) 15 obj-$(CONFIG_MODULES) += module.o 16 endif
··· 11 obj-$(CONFIG_NUMA) += node.o 12 obj-$(CONFIG_MEMORY_HOTPLUG_SPARSE) += memory.o 13 obj-$(CONFIG_SMP) += topology.o 14 + obj-$(CONFIG_IOMMU_API) += iommu.o 15 ifeq ($(CONFIG_SYSFS),y) 16 obj-$(CONFIG_MODULES) += module.o 17 endif
+100
drivers/base/iommu.c
···
··· 1 + /* 2 + * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. 3 + * Author: Joerg Roedel <joerg.roedel@amd.com> 4 + * 5 + * This program is free software; you can redistribute it and/or modify it 6 + * under the terms of the GNU General Public License version 2 as published 7 + * by the Free Software Foundation. 8 + * 9 + * This program is distributed in the hope that it will be useful, 10 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + * GNU General Public License for more details. 13 + * 14 + * You should have received a copy of the GNU General Public License 15 + * along with this program; if not, write to the Free Software 16 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 + */ 18 + 19 + #include <linux/bug.h> 20 + #include <linux/types.h> 21 + #include <linux/errno.h> 22 + #include <linux/iommu.h> 23 + 24 + static struct iommu_ops *iommu_ops; 25 + 26 + void register_iommu(struct iommu_ops *ops) 27 + { 28 + if (iommu_ops) 29 + BUG(); 30 + 31 + iommu_ops = ops; 32 + } 33 + 34 + bool iommu_found() 35 + { 36 + return iommu_ops != NULL; 37 + } 38 + EXPORT_SYMBOL_GPL(iommu_found); 39 + 40 + struct iommu_domain *iommu_domain_alloc(void) 41 + { 42 + struct iommu_domain *domain; 43 + int ret; 44 + 45 + domain = kmalloc(sizeof(*domain), GFP_KERNEL); 46 + if (!domain) 47 + return NULL; 48 + 49 + ret = iommu_ops->domain_init(domain); 50 + if (ret) 51 + goto out_free; 52 + 53 + return domain; 54 + 55 + out_free: 56 + kfree(domain); 57 + 58 + return NULL; 59 + } 60 + EXPORT_SYMBOL_GPL(iommu_domain_alloc); 61 + 62 + void iommu_domain_free(struct iommu_domain *domain) 63 + { 64 + iommu_ops->domain_destroy(domain); 65 + kfree(domain); 66 + } 67 + EXPORT_SYMBOL_GPL(iommu_domain_free); 68 + 69 + int iommu_attach_device(struct iommu_domain *domain, struct device *dev) 70 + { 71 + return iommu_ops->attach_dev(domain, dev); 72 + } 73 + EXPORT_SYMBOL_GPL(iommu_attach_device); 74 + 75 + void iommu_detach_device(struct iommu_domain *domain, struct device *dev) 76 + { 77 + iommu_ops->detach_dev(domain, dev); 78 + } 79 + EXPORT_SYMBOL_GPL(iommu_detach_device); 80 + 81 + int iommu_map_range(struct iommu_domain *domain, unsigned long iova, 82 + phys_addr_t paddr, size_t size, int prot) 83 + { 84 + return iommu_ops->map(domain, iova, paddr, size, prot); 85 + } 86 + EXPORT_SYMBOL_GPL(iommu_map_range); 87 + 88 + void iommu_unmap_range(struct iommu_domain *domain, unsigned long iova, 89 + size_t size) 90 + { 91 + iommu_ops->unmap(domain, iova, size); 92 + } 93 + EXPORT_SYMBOL_GPL(iommu_unmap_range); 94 + 95 + phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, 96 + unsigned long iova) 97 + { 98 + return iommu_ops->iova_to_phys(domain, iova); 99 + } 100 + EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
+28 -18
drivers/pci/dmar.c
··· 191 static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru) 192 { 193 struct acpi_dmar_hardware_unit *drhd; 194 - static int include_all; 195 int ret = 0; 196 197 drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr; 198 199 - if (!dmaru->include_all) 200 - ret = dmar_parse_dev_scope((void *)(drhd + 1), 201 ((void *)drhd) + drhd->header.length, 202 &dmaru->devices_cnt, &dmaru->devices, 203 drhd->segment); 204 - else { 205 - /* Only allow one INCLUDE_ALL */ 206 - if (include_all) { 207 - printk(KERN_WARNING PREFIX "Only one INCLUDE_ALL " 208 - "device scope is allowed\n"); 209 - ret = -EINVAL; 210 - } 211 - include_all = 1; 212 - } 213 - 214 if (ret) { 215 list_del(&dmaru->list); 216 kfree(dmaru); ··· 375 struct dmar_drhd_unit * 376 dmar_find_matched_drhd_unit(struct pci_dev *dev) 377 { 378 - struct dmar_drhd_unit *drhd = NULL; 379 380 - list_for_each_entry(drhd, &dmar_drhd_units, list) { 381 - if (drhd->include_all || dmar_pci_device_match(drhd->devices, 382 - drhd->devices_cnt, dev)) 383 - return drhd; 384 } 385 386 return NULL; ··· 491 int map_size; 492 u32 ver; 493 static int iommu_allocated = 0; 494 495 iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); 496 if (!iommu) ··· 506 } 507 iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG); 508 iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG); 509 510 /* the registers might be more than one page */ 511 map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
··· 191 static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru) 192 { 193 struct acpi_dmar_hardware_unit *drhd; 194 int ret = 0; 195 196 drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr; 197 198 + if (dmaru->include_all) 199 + return 0; 200 + 201 + ret = dmar_parse_dev_scope((void *)(drhd + 1), 202 ((void *)drhd) + drhd->header.length, 203 &dmaru->devices_cnt, &dmaru->devices, 204 drhd->segment); 205 if (ret) { 206 list_del(&dmaru->list); 207 kfree(dmaru); ··· 384 struct dmar_drhd_unit * 385 dmar_find_matched_drhd_unit(struct pci_dev *dev) 386 { 387 + struct dmar_drhd_unit *dmaru = NULL; 388 + struct acpi_dmar_hardware_unit *drhd; 389 390 + list_for_each_entry(dmaru, &dmar_drhd_units, list) { 391 + drhd = container_of(dmaru->hdr, 392 + struct acpi_dmar_hardware_unit, 393 + header); 394 + 395 + if (dmaru->include_all && 396 + drhd->segment == pci_domain_nr(dev->bus)) 397 + return dmaru; 398 + 399 + if (dmar_pci_device_match(dmaru->devices, 400 + dmaru->devices_cnt, dev)) 401 + return dmaru; 402 } 403 404 return NULL; ··· 491 int map_size; 492 u32 ver; 493 static int iommu_allocated = 0; 494 + int agaw; 495 496 iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); 497 if (!iommu) ··· 505 } 506 iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG); 507 iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG); 508 + 509 + agaw = iommu_calculate_agaw(iommu); 510 + if (agaw < 0) { 511 + printk(KERN_ERR 512 + "Cannot get a valid agaw for iommu (seq_id = %d)\n", 513 + iommu->seq_id); 514 + goto error; 515 + } 516 + iommu->agaw = agaw; 517 518 /* the registers might be more than one page */ 519 map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
+828 -136
drivers/pci/intel-iommu.c
··· 27 #include <linux/slab.h> 28 #include <linux/irq.h> 29 #include <linux/interrupt.h> 30 - #include <linux/sysdev.h> 31 #include <linux/spinlock.h> 32 #include <linux/pci.h> 33 #include <linux/dmar.h> ··· 34 #include <linux/mempool.h> 35 #include <linux/timer.h> 36 #include <linux/iova.h> 37 #include <linux/intel-iommu.h> 38 #include <asm/cacheflush.h> 39 #include <asm/iommu.h> ··· 54 55 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) 56 57 58 static void flush_unmaps_timeout(unsigned long data); 59 ··· 276 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) 277 static DEFINE_SPINLOCK(device_domain_lock); 278 static LIST_HEAD(device_domain_list); 279 280 static int __init intel_iommu_setup(char *str) 281 { ··· 375 kmem_cache_free(iommu_iova_cache, iova); 376 } 377 378 /* Gets context entry for a given bus and devfn */ 379 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu, 380 u8 bus, u8 devfn) ··· 498 ret = 0; 499 goto out; 500 } 501 - ret = context_present(context[devfn]); 502 out: 503 spin_unlock_irqrestore(&iommu->lock, flags); 504 return ret; ··· 514 root = &iommu->root_entry[bus]; 515 context = get_context_addr_from_root(root); 516 if (context) { 517 - context_clear_entry(context[devfn]); 518 __iommu_flush_cache(iommu, &context[devfn], \ 519 sizeof(*context)); 520 } ··· 611 if (level == 1) 612 break; 613 614 - if (!dma_pte_present(*pte)) { 615 tmp_page = alloc_pgtable_page(); 616 617 if (!tmp_page) { ··· 619 flags); 620 return NULL; 621 } 622 - __iommu_flush_cache(domain->iommu, tmp_page, 623 - PAGE_SIZE); 624 - dma_set_pte_addr(*pte, virt_to_phys(tmp_page)); 625 /* 626 * high level table always sets r/w, last level page 627 * table control read/write 628 */ 629 - dma_set_pte_readable(*pte); 630 - dma_set_pte_writable(*pte); 631 - __iommu_flush_cache(domain->iommu, pte, sizeof(*pte)); 632 } 633 - parent = phys_to_virt(dma_pte_addr(*pte)); 634 level--; 635 } 636 ··· 652 if (level == total) 653 return pte; 654 655 - if (!dma_pte_present(*pte)) 656 break; 657 - parent = phys_to_virt(dma_pte_addr(*pte)); 658 total--; 659 } 660 return NULL; ··· 669 pte = dma_addr_level_pte(domain, addr, 1); 670 671 if (pte) { 672 - dma_clear_pte(*pte); 673 - __iommu_flush_cache(domain->iommu, pte, sizeof(*pte)); 674 } 675 } 676 ··· 716 pte = dma_addr_level_pte(domain, tmp, level); 717 if (pte) { 718 free_pgtable_page( 719 - phys_to_virt(dma_pte_addr(*pte))); 720 - dma_clear_pte(*pte); 721 - __iommu_flush_cache(domain->iommu, 722 - pte, sizeof(*pte)); 723 } 724 tmp += level_size(level); 725 } ··· 1220 1221 1222 static void domain_exit(struct dmar_domain *domain); 1223 1224 void free_dmar_iommu(struct intel_iommu *iommu) 1225 { 1226 struct dmar_domain *domain; 1227 int i; 1228 1229 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap)); 1230 for (; i < cap_ndoms(iommu->cap); ) { 1231 domain = iommu->domains[i]; 1232 clear_bit(i, iommu->domain_ids); 1233 - domain_exit(domain); 1234 i = find_next_bit(iommu->domain_ids, 1235 cap_ndoms(iommu->cap), i+1); 1236 } ··· 1258 1259 kfree(iommu->domains); 1260 kfree(iommu->domain_ids); 1261 1262 /* free context mapping */ 1263 free_context_table(iommu); ··· 1298 1299 set_bit(num, iommu->domain_ids); 1300 domain->id = num; 1301 - domain->iommu = iommu; 1302 iommu->domains[num] = domain; 1303 spin_unlock_irqrestore(&iommu->lock, flags); 1304 ··· 1310 static void iommu_free_domain(struct dmar_domain *domain) 1311 { 1312 unsigned long flags; 1313 1314 - spin_lock_irqsave(&domain->iommu->lock, flags); 1315 - clear_bit(domain->id, domain->iommu->domain_ids); 1316 - spin_unlock_irqrestore(&domain->iommu->lock, flags); 1317 } 1318 1319 static struct iova_domain reserved_iova_list; ··· 1391 1392 init_iova_domain(&domain->iovad, DMA_32BIT_PFN); 1393 spin_lock_init(&domain->mapping_lock); 1394 1395 domain_reserve_special_ranges(domain); 1396 1397 /* calculate AGAW */ 1398 - iommu = domain->iommu; 1399 if (guest_width > cap_mgaw(iommu->cap)) 1400 guest_width = cap_mgaw(iommu->cap); 1401 domain->gaw = guest_width; ··· 1412 } 1413 domain->agaw = agaw; 1414 INIT_LIST_HEAD(&domain->devices); 1415 1416 /* always allocate the top pgd */ 1417 domain->pgd = (struct dma_pte *)alloc_pgtable_page(); ··· 1456 u8 bus, u8 devfn) 1457 { 1458 struct context_entry *context; 1459 - struct intel_iommu *iommu = domain->iommu; 1460 unsigned long flags; 1461 1462 pr_debug("Set context mapping for %02x:%02x.%d\n", 1463 bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); 1464 BUG_ON(!domain->pgd); 1465 context = device_to_context_entry(iommu, bus, devfn); 1466 if (!context) 1467 return -ENOMEM; 1468 spin_lock_irqsave(&iommu->lock, flags); 1469 - if (context_present(*context)) { 1470 spin_unlock_irqrestore(&iommu->lock, flags); 1471 return 0; 1472 } 1473 1474 - context_set_domain_id(*context, domain->id); 1475 - context_set_address_width(*context, domain->agaw); 1476 - context_set_address_root(*context, virt_to_phys(domain->pgd)); 1477 - context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL); 1478 - context_set_fault_enable(*context); 1479 - context_set_present(*context); 1480 - __iommu_flush_cache(iommu, context, sizeof(*context)); 1481 1482 /* it's a non-present to present mapping */ 1483 if (iommu->flush.flush_context(iommu, domain->id, ··· 1542 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0); 1543 1544 spin_unlock_irqrestore(&iommu->lock, flags); 1545 return 0; 1546 } 1547 ··· 1584 tmp->bus->number, tmp->devfn); 1585 } 1586 1587 - static int domain_context_mapped(struct dmar_domain *domain, 1588 - struct pci_dev *pdev) 1589 { 1590 int ret; 1591 struct pci_dev *tmp, *parent; 1592 1593 - ret = device_context_mapped(domain->iommu, 1594 pdev->bus->number, pdev->devfn); 1595 if (!ret) 1596 return ret; ··· 1605 /* Secondary interface's bus number and devfn 0 */ 1606 parent = pdev->bus->self; 1607 while (parent != tmp) { 1608 - ret = device_context_mapped(domain->iommu, parent->bus->number, 1609 parent->devfn); 1610 if (!ret) 1611 return ret; 1612 parent = parent->bus->self; 1613 } 1614 if (tmp->is_pcie) 1615 - return device_context_mapped(domain->iommu, 1616 tmp->subordinate->number, 0); 1617 else 1618 - return device_context_mapped(domain->iommu, 1619 tmp->bus->number, tmp->devfn); 1620 } 1621 ··· 1643 /* We don't need lock here, nobody else 1644 * touches the iova range 1645 */ 1646 - BUG_ON(dma_pte_addr(*pte)); 1647 - dma_set_pte_addr(*pte, start_pfn << VTD_PAGE_SHIFT); 1648 - dma_set_pte_prot(*pte, prot); 1649 - __iommu_flush_cache(domain->iommu, pte, sizeof(*pte)); 1650 start_pfn++; 1651 index++; 1652 } 1653 return 0; 1654 } 1655 1656 - static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn) 1657 { 1658 - clear_context_table(domain->iommu, bus, devfn); 1659 - domain->iommu->flush.flush_context(domain->iommu, 0, 0, 0, 1660 DMA_CCMD_GLOBAL_INVL, 0); 1661 - domain->iommu->flush.flush_iotlb(domain->iommu, 0, 0, 0, 1662 DMA_TLB_GLOBAL_FLUSH, 0); 1663 } 1664 ··· 1669 { 1670 struct device_domain_info *info; 1671 unsigned long flags; 1672 1673 spin_lock_irqsave(&device_domain_lock, flags); 1674 while (!list_empty(&domain->devices)) { ··· 1681 info->dev->dev.archdata.iommu = NULL; 1682 spin_unlock_irqrestore(&device_domain_lock, flags); 1683 1684 - detach_domain_for_dev(info->domain, info->bus, info->devfn); 1685 free_devinfo_mem(info); 1686 1687 spin_lock_irqsave(&device_domain_lock, flags); ··· 1775 info->dev = NULL; 1776 info->domain = domain; 1777 /* This domain is shared by devices under p2p bridge */ 1778 - domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES; 1779 1780 /* pcie-to-pci bridge already has a domain, uses it */ 1781 found = NULL; ··· 1938 printk(KERN_ERR "IOMMU: mapping reserved region failed\n"); 1939 } 1940 } 1941 #endif 1942 1943 #ifdef CONFIG_DMAR_FLOPPY_WA ··· 1970 } 1971 #endif /* !CONFIG_DMAR_FLPY_WA */ 1972 1973 - int __init init_dmars(void) 1974 { 1975 struct dmar_drhd_unit *drhd; 1976 struct dmar_rmrr_unit *rmrr; ··· 1993 */ 1994 } 1995 1996 deferred_flush = kzalloc(g_num_of_iommus * 1997 sizeof(struct deferred_flush_tables), GFP_KERNEL); 1998 if (!deferred_flush) { 1999 ret = -ENOMEM; 2000 goto error; 2001 } ··· 2014 continue; 2015 2016 iommu = drhd->iommu; 2017 2018 ret = iommu_init_domains(iommu); 2019 if (ret) ··· 2127 iommu = drhd->iommu; 2128 free_iommu(iommu); 2129 } 2130 return ret; 2131 } 2132 ··· 2196 } 2197 2198 /* make sure context mapping is ok */ 2199 - if (unlikely(!domain_context_mapped(domain, pdev))) { 2200 ret = domain_context_mapping(domain, pdev); 2201 if (ret) { 2202 printk(KERN_ERR ··· 2218 struct iova *iova; 2219 int prot = 0; 2220 int ret; 2221 2222 BUG_ON(dir == DMA_NONE); 2223 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) ··· 2228 if (!domain) 2229 return 0; 2230 2231 size = aligned_size((u64)paddr, size); 2232 2233 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); ··· 2242 * mappings.. 2243 */ 2244 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \ 2245 - !cap_zlr(domain->iommu->cap)) 2246 prot |= DMA_PTE_READ; 2247 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) 2248 prot |= DMA_PTE_WRITE; ··· 2258 goto error; 2259 2260 /* it's a non-present to present mapping */ 2261 - ret = iommu_flush_iotlb_psi(domain->iommu, domain->id, 2262 start_paddr, size >> VTD_PAGE_SHIFT, 1); 2263 if (ret) 2264 - iommu_flush_write_buffer(domain->iommu); 2265 2266 return start_paddr + ((u64)paddr & (~PAGE_MASK)); 2267 ··· 2288 2289 /* just flush them all */ 2290 for (i = 0; i < g_num_of_iommus; i++) { 2291 - if (deferred_flush[i].next) { 2292 - struct intel_iommu *iommu = 2293 - deferred_flush[i].domain[0]->iommu; 2294 2295 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 2296 DMA_TLB_GLOBAL_FLUSH, 0); 2297 for (j = 0; j < deferred_flush[i].next; j++) { ··· 2319 { 2320 unsigned long flags; 2321 int next, iommu_id; 2322 2323 spin_lock_irqsave(&async_umap_flush_lock, flags); 2324 if (list_size == HIGH_WATER_MARK) 2325 flush_unmaps(); 2326 2327 - iommu_id = dom->iommu->seq_id; 2328 2329 next = deferred_flush[iommu_id].next; 2330 deferred_flush[iommu_id].domain[next] = dom; ··· 2348 struct dmar_domain *domain; 2349 unsigned long start_addr; 2350 struct iova *iova; 2351 2352 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) 2353 return; 2354 domain = find_domain(pdev); 2355 BUG_ON(!domain); 2356 2357 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr)); 2358 if (!iova) ··· 2372 /* free page tables */ 2373 dma_pte_free_pagetable(domain, start_addr, start_addr + size); 2374 if (intel_iommu_strict) { 2375 - if (iommu_flush_iotlb_psi(domain->iommu, 2376 domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0)) 2377 - iommu_flush_write_buffer(domain->iommu); 2378 /* free iova */ 2379 __free_iova(&domain->iovad, iova); 2380 } else { ··· 2435 size_t size = 0; 2436 void *addr; 2437 struct scatterlist *sg; 2438 2439 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) 2440 return; 2441 2442 domain = find_domain(pdev); 2443 2444 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address)); 2445 if (!iova) ··· 2460 /* free page tables */ 2461 dma_pte_free_pagetable(domain, start_addr, start_addr + size); 2462 2463 - if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr, 2464 size >> VTD_PAGE_SHIFT, 0)) 2465 - iommu_flush_write_buffer(domain->iommu); 2466 2467 /* free iova */ 2468 __free_iova(&domain->iovad, iova); ··· 2496 int ret; 2497 struct scatterlist *sg; 2498 unsigned long start_addr; 2499 2500 BUG_ON(dir == DMA_NONE); 2501 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) ··· 2505 domain = get_valid_domain_for_dev(pdev); 2506 if (!domain) 2507 return 0; 2508 2509 for_each_sg(sglist, sg, nelems, i) { 2510 addr = SG_ENT_VIRT_ADDRESS(sg); ··· 2525 * mappings.. 2526 */ 2527 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \ 2528 - !cap_zlr(domain->iommu->cap)) 2529 prot |= DMA_PTE_READ; 2530 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) 2531 prot |= DMA_PTE_WRITE; ··· 2557 } 2558 2559 /* it's a non-present to present mapping */ 2560 - if (iommu_flush_iotlb_psi(domain->iommu, domain->id, 2561 start_addr, offset >> VTD_PAGE_SHIFT, 1)) 2562 - iommu_flush_write_buffer(domain->iommu); 2563 return nelems; 2564 } 2565 ··· 2731 init_timer(&unmap_timer); 2732 force_iommu = 1; 2733 dma_ops = &intel_dma_ops; 2734 return 0; 2735 } 2736 2737 - void intel_iommu_domain_exit(struct dmar_domain *domain) 2738 { 2739 u64 end; 2740 ··· 2952 if (!domain) 2953 return; 2954 2955 end = DOMAIN_MAX_ADDR(domain->gaw); 2956 end = end & (~VTD_PAGE_MASK); 2957 ··· 2964 /* free page tables */ 2965 dma_pte_free_pagetable(domain, 0, end); 2966 2967 - iommu_free_domain(domain); 2968 free_domain_mem(domain); 2969 } 2970 - EXPORT_SYMBOL_GPL(intel_iommu_domain_exit); 2971 2972 - struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev) 2973 { 2974 - struct dmar_drhd_unit *drhd; 2975 - struct dmar_domain *domain; 2976 struct intel_iommu *iommu; 2977 2978 - drhd = dmar_find_matched_drhd_unit(pdev); 2979 - if (!drhd) { 2980 - printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n"); 2981 - return NULL; 2982 } 2983 2984 - iommu = drhd->iommu; 2985 - if (!iommu) { 2986 - printk(KERN_ERR 2987 - "intel_iommu_domain_alloc: iommu == NULL\n"); 2988 - return NULL; 2989 } 2990 - domain = iommu_alloc_domain(iommu); 2991 - if (!domain) { 2992 - printk(KERN_ERR 2993 - "intel_iommu_domain_alloc: domain == NULL\n"); 2994 - return NULL; 2995 } 2996 - if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { 2997 - printk(KERN_ERR 2998 - "intel_iommu_domain_alloc: domain_init() failed\n"); 2999 - intel_iommu_domain_exit(domain); 3000 - return NULL; 3001 - } 3002 - return domain; 3003 - } 3004 - EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc); 3005 3006 - int intel_iommu_context_mapping( 3007 - struct dmar_domain *domain, struct pci_dev *pdev) 3008 - { 3009 - int rc; 3010 - rc = domain_context_mapping(domain, pdev); 3011 - return rc; 3012 } 3013 - EXPORT_SYMBOL_GPL(intel_iommu_context_mapping); 3014 3015 - int intel_iommu_page_mapping( 3016 - struct dmar_domain *domain, dma_addr_t iova, 3017 - u64 hpa, size_t size, int prot) 3018 { 3019 - int rc; 3020 - rc = domain_page_mapping(domain, iova, hpa, size, prot); 3021 - return rc; 3022 } 3023 - EXPORT_SYMBOL_GPL(intel_iommu_page_mapping); 3024 3025 - void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn) 3026 { 3027 - detach_domain_for_dev(domain, bus, devfn); 3028 - } 3029 - EXPORT_SYMBOL_GPL(intel_iommu_detach_dev); 3030 - 3031 - struct dmar_domain * 3032 - intel_iommu_find_domain(struct pci_dev *pdev) 3033 - { 3034 - return find_domain(pdev); 3035 - } 3036 - EXPORT_SYMBOL_GPL(intel_iommu_find_domain); 3037 - 3038 - int intel_iommu_found(void) 3039 - { 3040 - return g_num_of_iommus; 3041 - } 3042 - EXPORT_SYMBOL_GPL(intel_iommu_found); 3043 - 3044 - u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova) 3045 - { 3046 struct dma_pte *pte; 3047 - u64 pfn; 3048 3049 - pfn = 0; 3050 - pte = addr_to_dma_pte(domain, iova); 3051 - 3052 if (pte) 3053 - pfn = dma_pte_addr(*pte); 3054 3055 - return pfn >> VTD_PAGE_SHIFT; 3056 } 3057 - EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);
··· 27 #include <linux/slab.h> 28 #include <linux/irq.h> 29 #include <linux/interrupt.h> 30 #include <linux/spinlock.h> 31 #include <linux/pci.h> 32 #include <linux/dmar.h> ··· 35 #include <linux/mempool.h> 36 #include <linux/timer.h> 37 #include <linux/iova.h> 38 + #include <linux/iommu.h> 39 #include <linux/intel-iommu.h> 40 #include <asm/cacheflush.h> 41 #include <asm/iommu.h> ··· 54 55 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) 56 57 + #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) 58 + #define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK) 59 + #define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK) 60 + 61 + /* global iommu list, set NULL for ignored DMAR units */ 62 + static struct intel_iommu **g_iommus; 63 + 64 + /* 65 + * 0: Present 66 + * 1-11: Reserved 67 + * 12-63: Context Ptr (12 - (haw-1)) 68 + * 64-127: Reserved 69 + */ 70 + struct root_entry { 71 + u64 val; 72 + u64 rsvd1; 73 + }; 74 + #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry)) 75 + static inline bool root_present(struct root_entry *root) 76 + { 77 + return (root->val & 1); 78 + } 79 + static inline void set_root_present(struct root_entry *root) 80 + { 81 + root->val |= 1; 82 + } 83 + static inline void set_root_value(struct root_entry *root, unsigned long value) 84 + { 85 + root->val |= value & VTD_PAGE_MASK; 86 + } 87 + 88 + static inline struct context_entry * 89 + get_context_addr_from_root(struct root_entry *root) 90 + { 91 + return (struct context_entry *) 92 + (root_present(root)?phys_to_virt( 93 + root->val & VTD_PAGE_MASK) : 94 + NULL); 95 + } 96 + 97 + /* 98 + * low 64 bits: 99 + * 0: present 100 + * 1: fault processing disable 101 + * 2-3: translation type 102 + * 12-63: address space root 103 + * high 64 bits: 104 + * 0-2: address width 105 + * 3-6: aval 106 + * 8-23: domain id 107 + */ 108 + struct context_entry { 109 + u64 lo; 110 + u64 hi; 111 + }; 112 + 113 + static inline bool context_present(struct context_entry *context) 114 + { 115 + return (context->lo & 1); 116 + } 117 + static inline void context_set_present(struct context_entry *context) 118 + { 119 + context->lo |= 1; 120 + } 121 + 122 + static inline void context_set_fault_enable(struct context_entry *context) 123 + { 124 + context->lo &= (((u64)-1) << 2) | 1; 125 + } 126 + 127 + #define CONTEXT_TT_MULTI_LEVEL 0 128 + 129 + static inline void context_set_translation_type(struct context_entry *context, 130 + unsigned long value) 131 + { 132 + context->lo &= (((u64)-1) << 4) | 3; 133 + context->lo |= (value & 3) << 2; 134 + } 135 + 136 + static inline void context_set_address_root(struct context_entry *context, 137 + unsigned long value) 138 + { 139 + context->lo |= value & VTD_PAGE_MASK; 140 + } 141 + 142 + static inline void context_set_address_width(struct context_entry *context, 143 + unsigned long value) 144 + { 145 + context->hi |= value & 7; 146 + } 147 + 148 + static inline void context_set_domain_id(struct context_entry *context, 149 + unsigned long value) 150 + { 151 + context->hi |= (value & ((1 << 16) - 1)) << 8; 152 + } 153 + 154 + static inline void context_clear_entry(struct context_entry *context) 155 + { 156 + context->lo = 0; 157 + context->hi = 0; 158 + } 159 + 160 + /* 161 + * 0: readable 162 + * 1: writable 163 + * 2-6: reserved 164 + * 7: super page 165 + * 8-11: available 166 + * 12-63: Host physcial address 167 + */ 168 + struct dma_pte { 169 + u64 val; 170 + }; 171 + 172 + static inline void dma_clear_pte(struct dma_pte *pte) 173 + { 174 + pte->val = 0; 175 + } 176 + 177 + static inline void dma_set_pte_readable(struct dma_pte *pte) 178 + { 179 + pte->val |= DMA_PTE_READ; 180 + } 181 + 182 + static inline void dma_set_pte_writable(struct dma_pte *pte) 183 + { 184 + pte->val |= DMA_PTE_WRITE; 185 + } 186 + 187 + static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot) 188 + { 189 + pte->val = (pte->val & ~3) | (prot & 3); 190 + } 191 + 192 + static inline u64 dma_pte_addr(struct dma_pte *pte) 193 + { 194 + return (pte->val & VTD_PAGE_MASK); 195 + } 196 + 197 + static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr) 198 + { 199 + pte->val |= (addr & VTD_PAGE_MASK); 200 + } 201 + 202 + static inline bool dma_pte_present(struct dma_pte *pte) 203 + { 204 + return (pte->val & 3) != 0; 205 + } 206 + 207 + /* devices under the same p2p bridge are owned in one domain */ 208 + #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0) 209 + 210 + /* domain represents a virtual machine, more than one devices 211 + * across iommus may be owned in one domain, e.g. kvm guest. 212 + */ 213 + #define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1) 214 + 215 + struct dmar_domain { 216 + int id; /* domain id */ 217 + unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/ 218 + 219 + struct list_head devices; /* all devices' list */ 220 + struct iova_domain iovad; /* iova's that belong to this domain */ 221 + 222 + struct dma_pte *pgd; /* virtual address */ 223 + spinlock_t mapping_lock; /* page table lock */ 224 + int gaw; /* max guest address width */ 225 + 226 + /* adjusted guest address width, 0 is level 2 30-bit */ 227 + int agaw; 228 + 229 + int flags; /* flags to find out type of domain */ 230 + 231 + int iommu_coherency;/* indicate coherency of iommu access */ 232 + int iommu_count; /* reference count of iommu */ 233 + spinlock_t iommu_lock; /* protect iommu set in domain */ 234 + u64 max_addr; /* maximum mapped address */ 235 + }; 236 + 237 + /* PCI domain-device relationship */ 238 + struct device_domain_info { 239 + struct list_head link; /* link to domain siblings */ 240 + struct list_head global; /* link to global list */ 241 + u8 bus; /* PCI bus numer */ 242 + u8 devfn; /* PCI devfn number */ 243 + struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */ 244 + struct dmar_domain *domain; /* pointer to domain */ 245 + }; 246 247 static void flush_unmaps_timeout(unsigned long data); 248 ··· 87 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) 88 static DEFINE_SPINLOCK(device_domain_lock); 89 static LIST_HEAD(device_domain_list); 90 + 91 + static struct iommu_ops intel_iommu_ops; 92 93 static int __init intel_iommu_setup(char *str) 94 { ··· 184 kmem_cache_free(iommu_iova_cache, iova); 185 } 186 187 + 188 + static inline int width_to_agaw(int width); 189 + 190 + /* calculate agaw for each iommu. 191 + * "SAGAW" may be different across iommus, use a default agaw, and 192 + * get a supported less agaw for iommus that don't support the default agaw. 193 + */ 194 + int iommu_calculate_agaw(struct intel_iommu *iommu) 195 + { 196 + unsigned long sagaw; 197 + int agaw = -1; 198 + 199 + sagaw = cap_sagaw(iommu->cap); 200 + for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH); 201 + agaw >= 0; agaw--) { 202 + if (test_bit(agaw, &sagaw)) 203 + break; 204 + } 205 + 206 + return agaw; 207 + } 208 + 209 + /* in native case, each domain is related to only one iommu */ 210 + static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) 211 + { 212 + int iommu_id; 213 + 214 + BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE); 215 + 216 + iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus); 217 + if (iommu_id < 0 || iommu_id >= g_num_of_iommus) 218 + return NULL; 219 + 220 + return g_iommus[iommu_id]; 221 + } 222 + 223 + /* "Coherency" capability may be different across iommus */ 224 + static void domain_update_iommu_coherency(struct dmar_domain *domain) 225 + { 226 + int i; 227 + 228 + domain->iommu_coherency = 1; 229 + 230 + i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus); 231 + for (; i < g_num_of_iommus; ) { 232 + if (!ecap_coherent(g_iommus[i]->ecap)) { 233 + domain->iommu_coherency = 0; 234 + break; 235 + } 236 + i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1); 237 + } 238 + } 239 + 240 + static struct intel_iommu *device_to_iommu(u8 bus, u8 devfn) 241 + { 242 + struct dmar_drhd_unit *drhd = NULL; 243 + int i; 244 + 245 + for_each_drhd_unit(drhd) { 246 + if (drhd->ignored) 247 + continue; 248 + 249 + for (i = 0; i < drhd->devices_cnt; i++) 250 + if (drhd->devices[i]->bus->number == bus && 251 + drhd->devices[i]->devfn == devfn) 252 + return drhd->iommu; 253 + 254 + if (drhd->include_all) 255 + return drhd->iommu; 256 + } 257 + 258 + return NULL; 259 + } 260 + 261 + static void domain_flush_cache(struct dmar_domain *domain, 262 + void *addr, int size) 263 + { 264 + if (!domain->iommu_coherency) 265 + clflush_cache_range(addr, size); 266 + } 267 + 268 /* Gets context entry for a given bus and devfn */ 269 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu, 270 u8 bus, u8 devfn) ··· 226 ret = 0; 227 goto out; 228 } 229 + ret = context_present(&context[devfn]); 230 out: 231 spin_unlock_irqrestore(&iommu->lock, flags); 232 return ret; ··· 242 root = &iommu->root_entry[bus]; 243 context = get_context_addr_from_root(root); 244 if (context) { 245 + context_clear_entry(&context[devfn]); 246 __iommu_flush_cache(iommu, &context[devfn], \ 247 sizeof(*context)); 248 } ··· 339 if (level == 1) 340 break; 341 342 + if (!dma_pte_present(pte)) { 343 tmp_page = alloc_pgtable_page(); 344 345 if (!tmp_page) { ··· 347 flags); 348 return NULL; 349 } 350 + domain_flush_cache(domain, tmp_page, PAGE_SIZE); 351 + dma_set_pte_addr(pte, virt_to_phys(tmp_page)); 352 /* 353 * high level table always sets r/w, last level page 354 * table control read/write 355 */ 356 + dma_set_pte_readable(pte); 357 + dma_set_pte_writable(pte); 358 + domain_flush_cache(domain, pte, sizeof(*pte)); 359 } 360 + parent = phys_to_virt(dma_pte_addr(pte)); 361 level--; 362 } 363 ··· 381 if (level == total) 382 return pte; 383 384 + if (!dma_pte_present(pte)) 385 break; 386 + parent = phys_to_virt(dma_pte_addr(pte)); 387 total--; 388 } 389 return NULL; ··· 398 pte = dma_addr_level_pte(domain, addr, 1); 399 400 if (pte) { 401 + dma_clear_pte(pte); 402 + domain_flush_cache(domain, pte, sizeof(*pte)); 403 } 404 } 405 ··· 445 pte = dma_addr_level_pte(domain, tmp, level); 446 if (pte) { 447 free_pgtable_page( 448 + phys_to_virt(dma_pte_addr(pte))); 449 + dma_clear_pte(pte); 450 + domain_flush_cache(domain, pte, sizeof(*pte)); 451 } 452 tmp += level_size(level); 453 } ··· 950 951 952 static void domain_exit(struct dmar_domain *domain); 953 + static void vm_domain_exit(struct dmar_domain *domain); 954 955 void free_dmar_iommu(struct intel_iommu *iommu) 956 { 957 struct dmar_domain *domain; 958 int i; 959 + unsigned long flags; 960 961 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap)); 962 for (; i < cap_ndoms(iommu->cap); ) { 963 domain = iommu->domains[i]; 964 clear_bit(i, iommu->domain_ids); 965 + 966 + spin_lock_irqsave(&domain->iommu_lock, flags); 967 + if (--domain->iommu_count == 0) { 968 + if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) 969 + vm_domain_exit(domain); 970 + else 971 + domain_exit(domain); 972 + } 973 + spin_unlock_irqrestore(&domain->iommu_lock, flags); 974 + 975 i = find_next_bit(iommu->domain_ids, 976 cap_ndoms(iommu->cap), i+1); 977 } ··· 977 978 kfree(iommu->domains); 979 kfree(iommu->domain_ids); 980 + 981 + g_iommus[iommu->seq_id] = NULL; 982 + 983 + /* if all iommus are freed, free g_iommus */ 984 + for (i = 0; i < g_num_of_iommus; i++) { 985 + if (g_iommus[i]) 986 + break; 987 + } 988 + 989 + if (i == g_num_of_iommus) 990 + kfree(g_iommus); 991 992 /* free context mapping */ 993 free_context_table(iommu); ··· 1006 1007 set_bit(num, iommu->domain_ids); 1008 domain->id = num; 1009 + memset(&domain->iommu_bmp, 0, sizeof(unsigned long)); 1010 + set_bit(iommu->seq_id, &domain->iommu_bmp); 1011 + domain->flags = 0; 1012 iommu->domains[num] = domain; 1013 spin_unlock_irqrestore(&iommu->lock, flags); 1014 ··· 1016 static void iommu_free_domain(struct dmar_domain *domain) 1017 { 1018 unsigned long flags; 1019 + struct intel_iommu *iommu; 1020 1021 + iommu = domain_get_iommu(domain); 1022 + 1023 + spin_lock_irqsave(&iommu->lock, flags); 1024 + clear_bit(domain->id, iommu->domain_ids); 1025 + spin_unlock_irqrestore(&iommu->lock, flags); 1026 } 1027 1028 static struct iova_domain reserved_iova_list; ··· 1094 1095 init_iova_domain(&domain->iovad, DMA_32BIT_PFN); 1096 spin_lock_init(&domain->mapping_lock); 1097 + spin_lock_init(&domain->iommu_lock); 1098 1099 domain_reserve_special_ranges(domain); 1100 1101 /* calculate AGAW */ 1102 + iommu = domain_get_iommu(domain); 1103 if (guest_width > cap_mgaw(iommu->cap)) 1104 guest_width = cap_mgaw(iommu->cap); 1105 domain->gaw = guest_width; ··· 1114 } 1115 domain->agaw = agaw; 1116 INIT_LIST_HEAD(&domain->devices); 1117 + 1118 + if (ecap_coherent(iommu->ecap)) 1119 + domain->iommu_coherency = 1; 1120 + else 1121 + domain->iommu_coherency = 0; 1122 + 1123 + domain->iommu_count = 1; 1124 1125 /* always allocate the top pgd */ 1126 domain->pgd = (struct dma_pte *)alloc_pgtable_page(); ··· 1151 u8 bus, u8 devfn) 1152 { 1153 struct context_entry *context; 1154 unsigned long flags; 1155 + struct intel_iommu *iommu; 1156 + struct dma_pte *pgd; 1157 + unsigned long num; 1158 + unsigned long ndomains; 1159 + int id; 1160 + int agaw; 1161 1162 pr_debug("Set context mapping for %02x:%02x.%d\n", 1163 bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); 1164 BUG_ON(!domain->pgd); 1165 + 1166 + iommu = device_to_iommu(bus, devfn); 1167 + if (!iommu) 1168 + return -ENODEV; 1169 + 1170 context = device_to_context_entry(iommu, bus, devfn); 1171 if (!context) 1172 return -ENOMEM; 1173 spin_lock_irqsave(&iommu->lock, flags); 1174 + if (context_present(context)) { 1175 spin_unlock_irqrestore(&iommu->lock, flags); 1176 return 0; 1177 } 1178 1179 + id = domain->id; 1180 + pgd = domain->pgd; 1181 + 1182 + if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) { 1183 + int found = 0; 1184 + 1185 + /* find an available domain id for this device in iommu */ 1186 + ndomains = cap_ndoms(iommu->cap); 1187 + num = find_first_bit(iommu->domain_ids, ndomains); 1188 + for (; num < ndomains; ) { 1189 + if (iommu->domains[num] == domain) { 1190 + id = num; 1191 + found = 1; 1192 + break; 1193 + } 1194 + num = find_next_bit(iommu->domain_ids, 1195 + cap_ndoms(iommu->cap), num+1); 1196 + } 1197 + 1198 + if (found == 0) { 1199 + num = find_first_zero_bit(iommu->domain_ids, ndomains); 1200 + if (num >= ndomains) { 1201 + spin_unlock_irqrestore(&iommu->lock, flags); 1202 + printk(KERN_ERR "IOMMU: no free domain ids\n"); 1203 + return -EFAULT; 1204 + } 1205 + 1206 + set_bit(num, iommu->domain_ids); 1207 + iommu->domains[num] = domain; 1208 + id = num; 1209 + } 1210 + 1211 + /* Skip top levels of page tables for 1212 + * iommu which has less agaw than default. 1213 + */ 1214 + for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) { 1215 + pgd = phys_to_virt(dma_pte_addr(pgd)); 1216 + if (!dma_pte_present(pgd)) { 1217 + spin_unlock_irqrestore(&iommu->lock, flags); 1218 + return -ENOMEM; 1219 + } 1220 + } 1221 + } 1222 + 1223 + context_set_domain_id(context, id); 1224 + context_set_address_width(context, iommu->agaw); 1225 + context_set_address_root(context, virt_to_phys(pgd)); 1226 + context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL); 1227 + context_set_fault_enable(context); 1228 + context_set_present(context); 1229 + domain_flush_cache(domain, context, sizeof(*context)); 1230 1231 /* it's a non-present to present mapping */ 1232 if (iommu->flush.flush_context(iommu, domain->id, ··· 1183 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0); 1184 1185 spin_unlock_irqrestore(&iommu->lock, flags); 1186 + 1187 + spin_lock_irqsave(&domain->iommu_lock, flags); 1188 + if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) { 1189 + domain->iommu_count++; 1190 + domain_update_iommu_coherency(domain); 1191 + } 1192 + spin_unlock_irqrestore(&domain->iommu_lock, flags); 1193 return 0; 1194 } 1195 ··· 1218 tmp->bus->number, tmp->devfn); 1219 } 1220 1221 + static int domain_context_mapped(struct pci_dev *pdev) 1222 { 1223 int ret; 1224 struct pci_dev *tmp, *parent; 1225 + struct intel_iommu *iommu; 1226 1227 + iommu = device_to_iommu(pdev->bus->number, pdev->devfn); 1228 + if (!iommu) 1229 + return -ENODEV; 1230 + 1231 + ret = device_context_mapped(iommu, 1232 pdev->bus->number, pdev->devfn); 1233 if (!ret) 1234 return ret; ··· 1235 /* Secondary interface's bus number and devfn 0 */ 1236 parent = pdev->bus->self; 1237 while (parent != tmp) { 1238 + ret = device_context_mapped(iommu, parent->bus->number, 1239 parent->devfn); 1240 if (!ret) 1241 return ret; 1242 parent = parent->bus->self; 1243 } 1244 if (tmp->is_pcie) 1245 + return device_context_mapped(iommu, 1246 tmp->subordinate->number, 0); 1247 else 1248 + return device_context_mapped(iommu, 1249 tmp->bus->number, tmp->devfn); 1250 } 1251 ··· 1273 /* We don't need lock here, nobody else 1274 * touches the iova range 1275 */ 1276 + BUG_ON(dma_pte_addr(pte)); 1277 + dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT); 1278 + dma_set_pte_prot(pte, prot); 1279 + domain_flush_cache(domain, pte, sizeof(*pte)); 1280 start_pfn++; 1281 index++; 1282 } 1283 return 0; 1284 } 1285 1286 + static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn) 1287 { 1288 + if (!iommu) 1289 + return; 1290 + 1291 + clear_context_table(iommu, bus, devfn); 1292 + iommu->flush.flush_context(iommu, 0, 0, 0, 1293 DMA_CCMD_GLOBAL_INVL, 0); 1294 + iommu->flush.flush_iotlb(iommu, 0, 0, 0, 1295 DMA_TLB_GLOBAL_FLUSH, 0); 1296 } 1297 ··· 1296 { 1297 struct device_domain_info *info; 1298 unsigned long flags; 1299 + struct intel_iommu *iommu; 1300 1301 spin_lock_irqsave(&device_domain_lock, flags); 1302 while (!list_empty(&domain->devices)) { ··· 1307 info->dev->dev.archdata.iommu = NULL; 1308 spin_unlock_irqrestore(&device_domain_lock, flags); 1309 1310 + iommu = device_to_iommu(info->bus, info->devfn); 1311 + iommu_detach_dev(iommu, info->bus, info->devfn); 1312 free_devinfo_mem(info); 1313 1314 spin_lock_irqsave(&device_domain_lock, flags); ··· 1400 info->dev = NULL; 1401 info->domain = domain; 1402 /* This domain is shared by devices under p2p bridge */ 1403 + domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES; 1404 1405 /* pcie-to-pci bridge already has a domain, uses it */ 1406 found = NULL; ··· 1563 printk(KERN_ERR "IOMMU: mapping reserved region failed\n"); 1564 } 1565 } 1566 + #else /* !CONFIG_DMAR_GFX_WA */ 1567 + static inline void iommu_prepare_gfx_mapping(void) 1568 + { 1569 + return; 1570 + } 1571 #endif 1572 1573 #ifdef CONFIG_DMAR_FLOPPY_WA ··· 1590 } 1591 #endif /* !CONFIG_DMAR_FLPY_WA */ 1592 1593 + static int __init init_dmars(void) 1594 { 1595 struct dmar_drhd_unit *drhd; 1596 struct dmar_rmrr_unit *rmrr; ··· 1613 */ 1614 } 1615 1616 + g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *), 1617 + GFP_KERNEL); 1618 + if (!g_iommus) { 1619 + printk(KERN_ERR "Allocating global iommu array failed\n"); 1620 + ret = -ENOMEM; 1621 + goto error; 1622 + } 1623 + 1624 deferred_flush = kzalloc(g_num_of_iommus * 1625 sizeof(struct deferred_flush_tables), GFP_KERNEL); 1626 if (!deferred_flush) { 1627 + kfree(g_iommus); 1628 ret = -ENOMEM; 1629 goto error; 1630 } ··· 1625 continue; 1626 1627 iommu = drhd->iommu; 1628 + g_iommus[iommu->seq_id] = iommu; 1629 1630 ret = iommu_init_domains(iommu); 1631 if (ret) ··· 1737 iommu = drhd->iommu; 1738 free_iommu(iommu); 1739 } 1740 + kfree(g_iommus); 1741 return ret; 1742 } 1743 ··· 1805 } 1806 1807 /* make sure context mapping is ok */ 1808 + if (unlikely(!domain_context_mapped(pdev))) { 1809 ret = domain_context_mapping(domain, pdev); 1810 if (ret) { 1811 printk(KERN_ERR ··· 1827 struct iova *iova; 1828 int prot = 0; 1829 int ret; 1830 + struct intel_iommu *iommu; 1831 1832 BUG_ON(dir == DMA_NONE); 1833 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) ··· 1836 if (!domain) 1837 return 0; 1838 1839 + iommu = domain_get_iommu(domain); 1840 size = aligned_size((u64)paddr, size); 1841 1842 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); ··· 1849 * mappings.. 1850 */ 1851 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \ 1852 + !cap_zlr(iommu->cap)) 1853 prot |= DMA_PTE_READ; 1854 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) 1855 prot |= DMA_PTE_WRITE; ··· 1865 goto error; 1866 1867 /* it's a non-present to present mapping */ 1868 + ret = iommu_flush_iotlb_psi(iommu, domain->id, 1869 start_paddr, size >> VTD_PAGE_SHIFT, 1); 1870 if (ret) 1871 + iommu_flush_write_buffer(iommu); 1872 1873 return start_paddr + ((u64)paddr & (~PAGE_MASK)); 1874 ··· 1895 1896 /* just flush them all */ 1897 for (i = 0; i < g_num_of_iommus; i++) { 1898 + struct intel_iommu *iommu = g_iommus[i]; 1899 + if (!iommu) 1900 + continue; 1901 1902 + if (deferred_flush[i].next) { 1903 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 1904 DMA_TLB_GLOBAL_FLUSH, 0); 1905 for (j = 0; j < deferred_flush[i].next; j++) { ··· 1925 { 1926 unsigned long flags; 1927 int next, iommu_id; 1928 + struct intel_iommu *iommu; 1929 1930 spin_lock_irqsave(&async_umap_flush_lock, flags); 1931 if (list_size == HIGH_WATER_MARK) 1932 flush_unmaps(); 1933 1934 + iommu = domain_get_iommu(dom); 1935 + iommu_id = iommu->seq_id; 1936 1937 next = deferred_flush[iommu_id].next; 1938 deferred_flush[iommu_id].domain[next] = dom; ··· 1952 struct dmar_domain *domain; 1953 unsigned long start_addr; 1954 struct iova *iova; 1955 + struct intel_iommu *iommu; 1956 1957 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) 1958 return; 1959 domain = find_domain(pdev); 1960 BUG_ON(!domain); 1961 + 1962 + iommu = domain_get_iommu(domain); 1963 1964 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr)); 1965 if (!iova) ··· 1973 /* free page tables */ 1974 dma_pte_free_pagetable(domain, start_addr, start_addr + size); 1975 if (intel_iommu_strict) { 1976 + if (iommu_flush_iotlb_psi(iommu, 1977 domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0)) 1978 + iommu_flush_write_buffer(iommu); 1979 /* free iova */ 1980 __free_iova(&domain->iovad, iova); 1981 } else { ··· 2036 size_t size = 0; 2037 void *addr; 2038 struct scatterlist *sg; 2039 + struct intel_iommu *iommu; 2040 2041 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) 2042 return; 2043 2044 domain = find_domain(pdev); 2045 + BUG_ON(!domain); 2046 + 2047 + iommu = domain_get_iommu(domain); 2048 2049 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address)); 2050 if (!iova) ··· 2057 /* free page tables */ 2058 dma_pte_free_pagetable(domain, start_addr, start_addr + size); 2059 2060 + if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr, 2061 size >> VTD_PAGE_SHIFT, 0)) 2062 + iommu_flush_write_buffer(iommu); 2063 2064 /* free iova */ 2065 __free_iova(&domain->iovad, iova); ··· 2093 int ret; 2094 struct scatterlist *sg; 2095 unsigned long start_addr; 2096 + struct intel_iommu *iommu; 2097 2098 BUG_ON(dir == DMA_NONE); 2099 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) ··· 2101 domain = get_valid_domain_for_dev(pdev); 2102 if (!domain) 2103 return 0; 2104 + 2105 + iommu = domain_get_iommu(domain); 2106 2107 for_each_sg(sglist, sg, nelems, i) { 2108 addr = SG_ENT_VIRT_ADDRESS(sg); ··· 2119 * mappings.. 2120 */ 2121 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \ 2122 + !cap_zlr(iommu->cap)) 2123 prot |= DMA_PTE_READ; 2124 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) 2125 prot |= DMA_PTE_WRITE; ··· 2151 } 2152 2153 /* it's a non-present to present mapping */ 2154 + if (iommu_flush_iotlb_psi(iommu, domain->id, 2155 start_addr, offset >> VTD_PAGE_SHIFT, 1)) 2156 + iommu_flush_write_buffer(iommu); 2157 return nelems; 2158 } 2159 ··· 2325 init_timer(&unmap_timer); 2326 force_iommu = 1; 2327 dma_ops = &intel_dma_ops; 2328 + 2329 + register_iommu(&intel_iommu_ops); 2330 + 2331 return 0; 2332 } 2333 2334 + static int vm_domain_add_dev_info(struct dmar_domain *domain, 2335 + struct pci_dev *pdev) 2336 + { 2337 + struct device_domain_info *info; 2338 + unsigned long flags; 2339 + 2340 + info = alloc_devinfo_mem(); 2341 + if (!info) 2342 + return -ENOMEM; 2343 + 2344 + info->bus = pdev->bus->number; 2345 + info->devfn = pdev->devfn; 2346 + info->dev = pdev; 2347 + info->domain = domain; 2348 + 2349 + spin_lock_irqsave(&device_domain_lock, flags); 2350 + list_add(&info->link, &domain->devices); 2351 + list_add(&info->global, &device_domain_list); 2352 + pdev->dev.archdata.iommu = info; 2353 + spin_unlock_irqrestore(&device_domain_lock, flags); 2354 + 2355 + return 0; 2356 + } 2357 + 2358 + static void vm_domain_remove_one_dev_info(struct dmar_domain *domain, 2359 + struct pci_dev *pdev) 2360 + { 2361 + struct device_domain_info *info; 2362 + struct intel_iommu *iommu; 2363 + unsigned long flags; 2364 + int found = 0; 2365 + struct list_head *entry, *tmp; 2366 + 2367 + iommu = device_to_iommu(pdev->bus->number, pdev->devfn); 2368 + if (!iommu) 2369 + return; 2370 + 2371 + spin_lock_irqsave(&device_domain_lock, flags); 2372 + list_for_each_safe(entry, tmp, &domain->devices) { 2373 + info = list_entry(entry, struct device_domain_info, link); 2374 + if (info->bus == pdev->bus->number && 2375 + info->devfn == pdev->devfn) { 2376 + list_del(&info->link); 2377 + list_del(&info->global); 2378 + if (info->dev) 2379 + info->dev->dev.archdata.iommu = NULL; 2380 + spin_unlock_irqrestore(&device_domain_lock, flags); 2381 + 2382 + iommu_detach_dev(iommu, info->bus, info->devfn); 2383 + free_devinfo_mem(info); 2384 + 2385 + spin_lock_irqsave(&device_domain_lock, flags); 2386 + 2387 + if (found) 2388 + break; 2389 + else 2390 + continue; 2391 + } 2392 + 2393 + /* if there is no other devices under the same iommu 2394 + * owned by this domain, clear this iommu in iommu_bmp 2395 + * update iommu count and coherency 2396 + */ 2397 + if (device_to_iommu(info->bus, info->devfn) == iommu) 2398 + found = 1; 2399 + } 2400 + 2401 + if (found == 0) { 2402 + unsigned long tmp_flags; 2403 + spin_lock_irqsave(&domain->iommu_lock, tmp_flags); 2404 + clear_bit(iommu->seq_id, &domain->iommu_bmp); 2405 + domain->iommu_count--; 2406 + domain_update_iommu_coherency(domain); 2407 + spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags); 2408 + } 2409 + 2410 + spin_unlock_irqrestore(&device_domain_lock, flags); 2411 + } 2412 + 2413 + static void vm_domain_remove_all_dev_info(struct dmar_domain *domain) 2414 + { 2415 + struct device_domain_info *info; 2416 + struct intel_iommu *iommu; 2417 + unsigned long flags1, flags2; 2418 + 2419 + spin_lock_irqsave(&device_domain_lock, flags1); 2420 + while (!list_empty(&domain->devices)) { 2421 + info = list_entry(domain->devices.next, 2422 + struct device_domain_info, link); 2423 + list_del(&info->link); 2424 + list_del(&info->global); 2425 + if (info->dev) 2426 + info->dev->dev.archdata.iommu = NULL; 2427 + 2428 + spin_unlock_irqrestore(&device_domain_lock, flags1); 2429 + 2430 + iommu = device_to_iommu(info->bus, info->devfn); 2431 + iommu_detach_dev(iommu, info->bus, info->devfn); 2432 + 2433 + /* clear this iommu in iommu_bmp, update iommu count 2434 + * and coherency 2435 + */ 2436 + spin_lock_irqsave(&domain->iommu_lock, flags2); 2437 + if (test_and_clear_bit(iommu->seq_id, 2438 + &domain->iommu_bmp)) { 2439 + domain->iommu_count--; 2440 + domain_update_iommu_coherency(domain); 2441 + } 2442 + spin_unlock_irqrestore(&domain->iommu_lock, flags2); 2443 + 2444 + free_devinfo_mem(info); 2445 + spin_lock_irqsave(&device_domain_lock, flags1); 2446 + } 2447 + spin_unlock_irqrestore(&device_domain_lock, flags1); 2448 + } 2449 + 2450 + /* domain id for virtual machine, it won't be set in context */ 2451 + static unsigned long vm_domid; 2452 + 2453 + static int vm_domain_min_agaw(struct dmar_domain *domain) 2454 + { 2455 + int i; 2456 + int min_agaw = domain->agaw; 2457 + 2458 + i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus); 2459 + for (; i < g_num_of_iommus; ) { 2460 + if (min_agaw > g_iommus[i]->agaw) 2461 + min_agaw = g_iommus[i]->agaw; 2462 + 2463 + i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1); 2464 + } 2465 + 2466 + return min_agaw; 2467 + } 2468 + 2469 + static struct dmar_domain *iommu_alloc_vm_domain(void) 2470 + { 2471 + struct dmar_domain *domain; 2472 + 2473 + domain = alloc_domain_mem(); 2474 + if (!domain) 2475 + return NULL; 2476 + 2477 + domain->id = vm_domid++; 2478 + memset(&domain->iommu_bmp, 0, sizeof(unsigned long)); 2479 + domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE; 2480 + 2481 + return domain; 2482 + } 2483 + 2484 + static int vm_domain_init(struct dmar_domain *domain, int guest_width) 2485 + { 2486 + int adjust_width; 2487 + 2488 + init_iova_domain(&domain->iovad, DMA_32BIT_PFN); 2489 + spin_lock_init(&domain->mapping_lock); 2490 + spin_lock_init(&domain->iommu_lock); 2491 + 2492 + domain_reserve_special_ranges(domain); 2493 + 2494 + /* calculate AGAW */ 2495 + domain->gaw = guest_width; 2496 + adjust_width = guestwidth_to_adjustwidth(guest_width); 2497 + domain->agaw = width_to_agaw(adjust_width); 2498 + 2499 + INIT_LIST_HEAD(&domain->devices); 2500 + 2501 + domain->iommu_count = 0; 2502 + domain->iommu_coherency = 0; 2503 + domain->max_addr = 0; 2504 + 2505 + /* always allocate the top pgd */ 2506 + domain->pgd = (struct dma_pte *)alloc_pgtable_page(); 2507 + if (!domain->pgd) 2508 + return -ENOMEM; 2509 + domain_flush_cache(domain, domain->pgd, PAGE_SIZE); 2510 + return 0; 2511 + } 2512 + 2513 + static void iommu_free_vm_domain(struct dmar_domain *domain) 2514 + { 2515 + unsigned long flags; 2516 + struct dmar_drhd_unit *drhd; 2517 + struct intel_iommu *iommu; 2518 + unsigned long i; 2519 + unsigned long ndomains; 2520 + 2521 + for_each_drhd_unit(drhd) { 2522 + if (drhd->ignored) 2523 + continue; 2524 + iommu = drhd->iommu; 2525 + 2526 + ndomains = cap_ndoms(iommu->cap); 2527 + i = find_first_bit(iommu->domain_ids, ndomains); 2528 + for (; i < ndomains; ) { 2529 + if (iommu->domains[i] == domain) { 2530 + spin_lock_irqsave(&iommu->lock, flags); 2531 + clear_bit(i, iommu->domain_ids); 2532 + iommu->domains[i] = NULL; 2533 + spin_unlock_irqrestore(&iommu->lock, flags); 2534 + break; 2535 + } 2536 + i = find_next_bit(iommu->domain_ids, ndomains, i+1); 2537 + } 2538 + } 2539 + } 2540 + 2541 + static void vm_domain_exit(struct dmar_domain *domain) 2542 { 2543 u64 end; 2544 ··· 2336 if (!domain) 2337 return; 2338 2339 + vm_domain_remove_all_dev_info(domain); 2340 + /* destroy iovas */ 2341 + put_iova_domain(&domain->iovad); 2342 end = DOMAIN_MAX_ADDR(domain->gaw); 2343 end = end & (~VTD_PAGE_MASK); 2344 ··· 2345 /* free page tables */ 2346 dma_pte_free_pagetable(domain, 0, end); 2347 2348 + iommu_free_vm_domain(domain); 2349 free_domain_mem(domain); 2350 } 2351 2352 + static int intel_iommu_domain_init(struct iommu_domain *domain) 2353 { 2354 + struct dmar_domain *dmar_domain; 2355 + 2356 + dmar_domain = iommu_alloc_vm_domain(); 2357 + if (!dmar_domain) { 2358 + printk(KERN_ERR 2359 + "intel_iommu_domain_init: dmar_domain == NULL\n"); 2360 + return -ENOMEM; 2361 + } 2362 + if (vm_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { 2363 + printk(KERN_ERR 2364 + "intel_iommu_domain_init() failed\n"); 2365 + vm_domain_exit(dmar_domain); 2366 + return -ENOMEM; 2367 + } 2368 + domain->priv = dmar_domain; 2369 + 2370 + return 0; 2371 + } 2372 + 2373 + static void intel_iommu_domain_destroy(struct iommu_domain *domain) 2374 + { 2375 + struct dmar_domain *dmar_domain = domain->priv; 2376 + 2377 + domain->priv = NULL; 2378 + vm_domain_exit(dmar_domain); 2379 + } 2380 + 2381 + static int intel_iommu_attach_device(struct iommu_domain *domain, 2382 + struct device *dev) 2383 + { 2384 + struct dmar_domain *dmar_domain = domain->priv; 2385 + struct pci_dev *pdev = to_pci_dev(dev); 2386 struct intel_iommu *iommu; 2387 + int addr_width; 2388 + u64 end; 2389 + int ret; 2390 2391 + /* normally pdev is not mapped */ 2392 + if (unlikely(domain_context_mapped(pdev))) { 2393 + struct dmar_domain *old_domain; 2394 + 2395 + old_domain = find_domain(pdev); 2396 + if (old_domain) { 2397 + if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) 2398 + vm_domain_remove_one_dev_info(old_domain, pdev); 2399 + else 2400 + domain_remove_dev_info(old_domain); 2401 + } 2402 } 2403 2404 + iommu = device_to_iommu(pdev->bus->number, pdev->devfn); 2405 + if (!iommu) 2406 + return -ENODEV; 2407 + 2408 + /* check if this iommu agaw is sufficient for max mapped address */ 2409 + addr_width = agaw_to_width(iommu->agaw); 2410 + end = DOMAIN_MAX_ADDR(addr_width); 2411 + end = end & VTD_PAGE_MASK; 2412 + if (end < dmar_domain->max_addr) { 2413 + printk(KERN_ERR "%s: iommu agaw (%d) is not " 2414 + "sufficient for the mapped address (%llx)\n", 2415 + __func__, iommu->agaw, dmar_domain->max_addr); 2416 + return -EFAULT; 2417 } 2418 + 2419 + ret = domain_context_mapping(dmar_domain, pdev); 2420 + if (ret) 2421 + return ret; 2422 + 2423 + ret = vm_domain_add_dev_info(dmar_domain, pdev); 2424 + return ret; 2425 + } 2426 + 2427 + static void intel_iommu_detach_device(struct iommu_domain *domain, 2428 + struct device *dev) 2429 + { 2430 + struct dmar_domain *dmar_domain = domain->priv; 2431 + struct pci_dev *pdev = to_pci_dev(dev); 2432 + 2433 + vm_domain_remove_one_dev_info(dmar_domain, pdev); 2434 + } 2435 + 2436 + static int intel_iommu_map_range(struct iommu_domain *domain, 2437 + unsigned long iova, phys_addr_t hpa, 2438 + size_t size, int iommu_prot) 2439 + { 2440 + struct dmar_domain *dmar_domain = domain->priv; 2441 + u64 max_addr; 2442 + int addr_width; 2443 + int prot = 0; 2444 + int ret; 2445 + 2446 + if (iommu_prot & IOMMU_READ) 2447 + prot |= DMA_PTE_READ; 2448 + if (iommu_prot & IOMMU_WRITE) 2449 + prot |= DMA_PTE_WRITE; 2450 + 2451 + max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size); 2452 + if (dmar_domain->max_addr < max_addr) { 2453 + int min_agaw; 2454 + u64 end; 2455 + 2456 + /* check if minimum agaw is sufficient for mapped address */ 2457 + min_agaw = vm_domain_min_agaw(dmar_domain); 2458 + addr_width = agaw_to_width(min_agaw); 2459 + end = DOMAIN_MAX_ADDR(addr_width); 2460 + end = end & VTD_PAGE_MASK; 2461 + if (end < max_addr) { 2462 + printk(KERN_ERR "%s: iommu agaw (%d) is not " 2463 + "sufficient for the mapped address (%llx)\n", 2464 + __func__, min_agaw, max_addr); 2465 + return -EFAULT; 2466 + } 2467 + dmar_domain->max_addr = max_addr; 2468 } 2469 2470 + ret = domain_page_mapping(dmar_domain, iova, hpa, size, prot); 2471 + return ret; 2472 } 2473 2474 + static void intel_iommu_unmap_range(struct iommu_domain *domain, 2475 + unsigned long iova, size_t size) 2476 { 2477 + struct dmar_domain *dmar_domain = domain->priv; 2478 + dma_addr_t base; 2479 + 2480 + /* The address might not be aligned */ 2481 + base = iova & VTD_PAGE_MASK; 2482 + size = VTD_PAGE_ALIGN(size); 2483 + dma_pte_clear_range(dmar_domain, base, base + size); 2484 + 2485 + if (dmar_domain->max_addr == base + size) 2486 + dmar_domain->max_addr = base; 2487 } 2488 2489 + static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, 2490 + unsigned long iova) 2491 { 2492 + struct dmar_domain *dmar_domain = domain->priv; 2493 struct dma_pte *pte; 2494 + u64 phys = 0; 2495 2496 + pte = addr_to_dma_pte(dmar_domain, iova); 2497 if (pte) 2498 + phys = dma_pte_addr(pte); 2499 2500 + return phys; 2501 } 2502 + 2503 + static struct iommu_ops intel_iommu_ops = { 2504 + .domain_init = intel_iommu_domain_init, 2505 + .domain_destroy = intel_iommu_domain_destroy, 2506 + .attach_dev = intel_iommu_attach_device, 2507 + .detach_dev = intel_iommu_detach_device, 2508 + .map = intel_iommu_map_range, 2509 + .unmap = intel_iommu_unmap_range, 2510 + .iova_to_phys = intel_iommu_iova_to_phys, 2511 + };
+3 -135
include/linux/dma_remapping.h
··· 9 #define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT) 10 #define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK) 11 12 - #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) 13 - #define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK) 14 - #define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK) 15 - 16 - 17 - /* 18 - * 0: Present 19 - * 1-11: Reserved 20 - * 12-63: Context Ptr (12 - (haw-1)) 21 - * 64-127: Reserved 22 - */ 23 - struct root_entry { 24 - u64 val; 25 - u64 rsvd1; 26 - }; 27 - #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry)) 28 - static inline bool root_present(struct root_entry *root) 29 - { 30 - return (root->val & 1); 31 - } 32 - static inline void set_root_present(struct root_entry *root) 33 - { 34 - root->val |= 1; 35 - } 36 - static inline void set_root_value(struct root_entry *root, unsigned long value) 37 - { 38 - root->val |= value & VTD_PAGE_MASK; 39 - } 40 - 41 - struct context_entry; 42 - static inline struct context_entry * 43 - get_context_addr_from_root(struct root_entry *root) 44 - { 45 - return (struct context_entry *) 46 - (root_present(root)?phys_to_virt( 47 - root->val & VTD_PAGE_MASK) : 48 - NULL); 49 - } 50 - 51 - /* 52 - * low 64 bits: 53 - * 0: present 54 - * 1: fault processing disable 55 - * 2-3: translation type 56 - * 12-63: address space root 57 - * high 64 bits: 58 - * 0-2: address width 59 - * 3-6: aval 60 - * 8-23: domain id 61 - */ 62 - struct context_entry { 63 - u64 lo; 64 - u64 hi; 65 - }; 66 - #define context_present(c) ((c).lo & 1) 67 - #define context_fault_disable(c) (((c).lo >> 1) & 1) 68 - #define context_translation_type(c) (((c).lo >> 2) & 3) 69 - #define context_address_root(c) ((c).lo & VTD_PAGE_MASK) 70 - #define context_address_width(c) ((c).hi & 7) 71 - #define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1)) 72 - 73 - #define context_set_present(c) do {(c).lo |= 1;} while (0) 74 - #define context_set_fault_enable(c) \ 75 - do {(c).lo &= (((u64)-1) << 2) | 1;} while (0) 76 - #define context_set_translation_type(c, val) \ 77 - do { \ 78 - (c).lo &= (((u64)-1) << 4) | 3; \ 79 - (c).lo |= ((val) & 3) << 2; \ 80 - } while (0) 81 - #define CONTEXT_TT_MULTI_LEVEL 0 82 - #define context_set_address_root(c, val) \ 83 - do {(c).lo |= (val) & VTD_PAGE_MASK; } while (0) 84 - #define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0) 85 - #define context_set_domain_id(c, val) \ 86 - do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0) 87 - #define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0) 88 - 89 - /* 90 - * 0: readable 91 - * 1: writable 92 - * 2-6: reserved 93 - * 7: super page 94 - * 8-11: available 95 - * 12-63: Host physcial address 96 - */ 97 - struct dma_pte { 98 - u64 val; 99 - }; 100 - #define dma_clear_pte(p) do {(p).val = 0;} while (0) 101 - 102 #define DMA_PTE_READ (1) 103 #define DMA_PTE_WRITE (2) 104 105 - #define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0) 106 - #define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0) 107 - #define dma_set_pte_prot(p, prot) \ 108 - do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0) 109 - #define dma_pte_addr(p) ((p).val & VTD_PAGE_MASK) 110 - #define dma_set_pte_addr(p, addr) do {\ 111 - (p).val |= ((addr) & VTD_PAGE_MASK); } while (0) 112 - #define dma_pte_present(p) (((p).val & 3) != 0) 113 - 114 struct intel_iommu; 115 116 - struct dmar_domain { 117 - int id; /* domain id */ 118 - struct intel_iommu *iommu; /* back pointer to owning iommu */ 119 - 120 - struct list_head devices; /* all devices' list */ 121 - struct iova_domain iovad; /* iova's that belong to this domain */ 122 - 123 - struct dma_pte *pgd; /* virtual address */ 124 - spinlock_t mapping_lock; /* page table lock */ 125 - int gaw; /* max guest address width */ 126 - 127 - /* adjusted guest address width, 0 is level 2 30-bit */ 128 - int agaw; 129 - 130 - #define DOMAIN_FLAG_MULTIPLE_DEVICES 1 131 - int flags; 132 - }; 133 - 134 - /* PCI domain-device relationship */ 135 - struct device_domain_info { 136 - struct list_head link; /* link to domain siblings */ 137 - struct list_head global; /* link to global list */ 138 - u8 bus; /* PCI bus numer */ 139 - u8 devfn; /* PCI devfn number */ 140 - struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */ 141 - struct dmar_domain *domain; /* pointer to domain */ 142 - }; 143 - 144 - extern int init_dmars(void); 145 extern void free_dmar_iommu(struct intel_iommu *iommu); 146 147 extern int dmar_disabled; 148 - 149 - #ifndef CONFIG_DMAR_GFX_WA 150 - static inline void iommu_prepare_gfx_mapping(void) 151 - { 152 - return; 153 - } 154 - #endif /* !CONFIG_DMAR_GFX_WA */ 155 156 #endif
··· 9 #define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT) 10 #define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK) 11 12 #define DMA_PTE_READ (1) 13 #define DMA_PTE_WRITE (2) 14 15 struct intel_iommu; 16 + struct dmar_domain; 17 + struct root_entry; 18 19 extern void free_dmar_iommu(struct intel_iommu *iommu); 20 + extern int iommu_calculate_agaw(struct intel_iommu *iommu); 21 22 extern int dmar_disabled; 23 24 #endif
-1
include/linux/dmar.h
··· 144 list_for_each_entry(rmrr, &dmar_rmrr_units, list) 145 /* Intel DMAR initialization functions */ 146 extern int intel_iommu_init(void); 147 - extern int dmar_disabled; 148 #else 149 static inline int intel_iommu_init(void) 150 {
··· 144 list_for_each_entry(rmrr, &dmar_rmrr_units, list) 145 /* Intel DMAR initialization functions */ 146 extern int intel_iommu_init(void); 147 #else 148 static inline int intel_iommu_init(void) 149 {
+1 -24
include/linux/intel-iommu.h
··· 23 #define _INTEL_IOMMU_H_ 24 25 #include <linux/types.h> 26 - #include <linux/msi.h> 27 - #include <linux/sysdev.h> 28 #include <linux/iova.h> 29 #include <linux/io.h> 30 #include <linux/dma_remapping.h> ··· 287 void __iomem *reg; /* Pointer to hardware regs, virtual addr */ 288 u64 cap; 289 u64 ecap; 290 - int seg; 291 u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ 292 spinlock_t register_lock; /* protect register handling */ 293 int seq_id; /* sequence id of the iommu */ 294 295 #ifdef CONFIG_DMAR 296 unsigned long *domain_ids; /* bitmap of domains */ ··· 300 301 unsigned int irq; 302 unsigned char name[7]; /* Device Name */ 303 - struct msi_msg saved_msg; 304 - struct sys_device sysdev; 305 struct iommu_flush flush; 306 #endif 307 struct q_inval *qi; /* Queued invalidation info */ ··· 329 int non_present_entry_flush); 330 331 extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); 332 - 333 - void intel_iommu_domain_exit(struct dmar_domain *domain); 334 - struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev); 335 - int intel_iommu_context_mapping(struct dmar_domain *domain, 336 - struct pci_dev *pdev); 337 - int intel_iommu_page_mapping(struct dmar_domain *domain, dma_addr_t iova, 338 - u64 hpa, size_t size, int prot); 339 - void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn); 340 - struct dmar_domain *intel_iommu_find_domain(struct pci_dev *pdev); 341 - u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova); 342 - 343 - #ifdef CONFIG_DMAR 344 - int intel_iommu_found(void); 345 - #else /* CONFIG_DMAR */ 346 - static inline int intel_iommu_found(void) 347 - { 348 - return 0; 349 - } 350 - #endif /* CONFIG_DMAR */ 351 352 extern void *intel_alloc_coherent(struct device *, size_t, dma_addr_t *, gfp_t); 353 extern void intel_free_coherent(struct device *, size_t, void *, dma_addr_t);
··· 23 #define _INTEL_IOMMU_H_ 24 25 #include <linux/types.h> 26 #include <linux/iova.h> 27 #include <linux/io.h> 28 #include <linux/dma_remapping.h> ··· 289 void __iomem *reg; /* Pointer to hardware regs, virtual addr */ 290 u64 cap; 291 u64 ecap; 292 u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ 293 spinlock_t register_lock; /* protect register handling */ 294 int seq_id; /* sequence id of the iommu */ 295 + int agaw; /* agaw of this iommu */ 296 297 #ifdef CONFIG_DMAR 298 unsigned long *domain_ids; /* bitmap of domains */ ··· 302 303 unsigned int irq; 304 unsigned char name[7]; /* Device Name */ 305 struct iommu_flush flush; 306 #endif 307 struct q_inval *qi; /* Queued invalidation info */ ··· 333 int non_present_entry_flush); 334 335 extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); 336 337 extern void *intel_alloc_coherent(struct device *, size_t, dma_addr_t *, gfp_t); 338 extern void intel_free_coherent(struct device *, size_t, void *, dma_addr_t);
+112
include/linux/iommu.h
···
··· 1 + /* 2 + * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. 3 + * Author: Joerg Roedel <joerg.roedel@amd.com> 4 + * 5 + * This program is free software; you can redistribute it and/or modify it 6 + * under the terms of the GNU General Public License version 2 as published 7 + * by the Free Software Foundation. 8 + * 9 + * This program is distributed in the hope that it will be useful, 10 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + * GNU General Public License for more details. 13 + * 14 + * You should have received a copy of the GNU General Public License 15 + * along with this program; if not, write to the Free Software 16 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 + */ 18 + 19 + #ifndef __LINUX_IOMMU_H 20 + #define __LINUX_IOMMU_H 21 + 22 + #define IOMMU_READ (1) 23 + #define IOMMU_WRITE (2) 24 + 25 + struct device; 26 + 27 + struct iommu_domain { 28 + void *priv; 29 + }; 30 + 31 + struct iommu_ops { 32 + int (*domain_init)(struct iommu_domain *domain); 33 + void (*domain_destroy)(struct iommu_domain *domain); 34 + int (*attach_dev)(struct iommu_domain *domain, struct device *dev); 35 + void (*detach_dev)(struct iommu_domain *domain, struct device *dev); 36 + int (*map)(struct iommu_domain *domain, unsigned long iova, 37 + phys_addr_t paddr, size_t size, int prot); 38 + void (*unmap)(struct iommu_domain *domain, unsigned long iova, 39 + size_t size); 40 + phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, 41 + unsigned long iova); 42 + }; 43 + 44 + #ifdef CONFIG_IOMMU_API 45 + 46 + extern void register_iommu(struct iommu_ops *ops); 47 + extern bool iommu_found(void); 48 + extern struct iommu_domain *iommu_domain_alloc(void); 49 + extern void iommu_domain_free(struct iommu_domain *domain); 50 + extern int iommu_attach_device(struct iommu_domain *domain, 51 + struct device *dev); 52 + extern void iommu_detach_device(struct iommu_domain *domain, 53 + struct device *dev); 54 + extern int iommu_map_range(struct iommu_domain *domain, unsigned long iova, 55 + phys_addr_t paddr, size_t size, int prot); 56 + extern void iommu_unmap_range(struct iommu_domain *domain, unsigned long iova, 57 + size_t size); 58 + extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, 59 + unsigned long iova); 60 + 61 + #else /* CONFIG_IOMMU_API */ 62 + 63 + static inline void register_iommu(struct iommu_ops *ops) 64 + { 65 + } 66 + 67 + static inline bool iommu_found(void) 68 + { 69 + return false; 70 + } 71 + 72 + static inline struct iommu_domain *iommu_domain_alloc(void) 73 + { 74 + return NULL; 75 + } 76 + 77 + static inline void iommu_domain_free(struct iommu_domain *domain) 78 + { 79 + } 80 + 81 + static inline int iommu_attach_device(struct iommu_domain *domain, 82 + struct device *dev) 83 + { 84 + return -ENODEV; 85 + } 86 + 87 + static inline void iommu_detach_device(struct iommu_domain *domain, 88 + struct device *dev) 89 + { 90 + } 91 + 92 + static inline int iommu_map_range(struct iommu_domain *domain, 93 + unsigned long iova, phys_addr_t paddr, 94 + size_t size, int prot) 95 + { 96 + return -ENODEV; 97 + } 98 + 99 + static inline void iommu_unmap_range(struct iommu_domain *domain, 100 + unsigned long iova, size_t size) 101 + { 102 + } 103 + 104 + static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, 105 + unsigned long iova) 106 + { 107 + return 0; 108 + } 109 + 110 + #endif /* CONFIG_IOMMU_API */ 111 + 112 + #endif /* __LINUX_IOMMU_H */
+22 -8
include/linux/kvm_host.h
··· 316 #define KVM_ASSIGNED_DEV_HOST_MSI (1 << 9) 317 unsigned long irq_requested_type; 318 int irq_source_id; 319 struct pci_dev *dev; 320 struct kvm *kvm; 321 }; ··· 328 int kvm_request_irq_source_id(struct kvm *kvm); 329 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); 330 331 - #ifdef CONFIG_DMAR 332 int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, 333 unsigned long npages); 334 - int kvm_iommu_map_guest(struct kvm *kvm, 335 - struct kvm_assigned_dev_kernel *assigned_dev); 336 int kvm_iommu_unmap_guest(struct kvm *kvm); 337 - #else /* CONFIG_DMAR */ 338 static inline int kvm_iommu_map_pages(struct kvm *kvm, 339 gfn_t base_gfn, 340 unsigned long npages) ··· 345 return 0; 346 } 347 348 - static inline int kvm_iommu_map_guest(struct kvm *kvm, 349 - struct kvm_assigned_dev_kernel 350 - *assigned_dev) 351 { 352 return -ENODEV; 353 } ··· 354 { 355 return 0; 356 } 357 - #endif /* CONFIG_DMAR */ 358 359 static inline void kvm_guest_enter(void) 360 {
··· 316 #define KVM_ASSIGNED_DEV_HOST_MSI (1 << 9) 317 unsigned long irq_requested_type; 318 int irq_source_id; 319 + int flags; 320 struct pci_dev *dev; 321 struct kvm *kvm; 322 }; ··· 327 int kvm_request_irq_source_id(struct kvm *kvm); 328 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); 329 330 + #ifdef CONFIG_IOMMU_API 331 int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, 332 unsigned long npages); 333 + int kvm_iommu_map_guest(struct kvm *kvm); 334 int kvm_iommu_unmap_guest(struct kvm *kvm); 335 + int kvm_assign_device(struct kvm *kvm, 336 + struct kvm_assigned_dev_kernel *assigned_dev); 337 + int kvm_deassign_device(struct kvm *kvm, 338 + struct kvm_assigned_dev_kernel *assigned_dev); 339 + #else /* CONFIG_IOMMU_API */ 340 static inline int kvm_iommu_map_pages(struct kvm *kvm, 341 gfn_t base_gfn, 342 unsigned long npages) ··· 341 return 0; 342 } 343 344 + static inline int kvm_iommu_map_guest(struct kvm *kvm) 345 { 346 return -ENODEV; 347 } ··· 352 { 353 return 0; 354 } 355 + 356 + static inline int kvm_assign_device(struct kvm *kvm, 357 + struct kvm_assigned_dev_kernel *assigned_dev) 358 + { 359 + return 0; 360 + } 361 + 362 + static inline int kvm_deassign_device(struct kvm *kvm, 363 + struct kvm_assigned_dev_kernel *assigned_dev) 364 + { 365 + return 0; 366 + } 367 + #endif /* CONFIG_IOMMU_API */ 368 369 static inline void kvm_guest_enter(void) 370 {
+49 -1
virt/kvm/kvm_main.c
··· 496 match->assigned_dev_id = assigned_dev->assigned_dev_id; 497 match->host_busnr = assigned_dev->busnr; 498 match->host_devfn = assigned_dev->devfn; 499 match->dev = dev; 500 match->irq_source_id = -1; 501 match->kvm = kvm; ··· 504 list_add(&match->list, &kvm->arch.assigned_dev_head); 505 506 if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { 507 - r = kvm_iommu_map_guest(kvm, match); 508 if (r) 509 goto out_list_del; 510 } ··· 526 pci_dev_put(dev); 527 out_free: 528 kfree(match); 529 mutex_unlock(&kvm->lock); 530 return r; 531 } ··· 1888 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) 1889 goto out; 1890 r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); 1891 if (r) 1892 goto out; 1893 break;
··· 496 match->assigned_dev_id = assigned_dev->assigned_dev_id; 497 match->host_busnr = assigned_dev->busnr; 498 match->host_devfn = assigned_dev->devfn; 499 + match->flags = assigned_dev->flags; 500 match->dev = dev; 501 match->irq_source_id = -1; 502 match->kvm = kvm; ··· 503 list_add(&match->list, &kvm->arch.assigned_dev_head); 504 505 if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { 506 + if (!kvm->arch.iommu_domain) { 507 + r = kvm_iommu_map_guest(kvm); 508 + if (r) 509 + goto out_list_del; 510 + } 511 + r = kvm_assign_device(kvm, match); 512 if (r) 513 goto out_list_del; 514 } ··· 520 pci_dev_put(dev); 521 out_free: 522 kfree(match); 523 + mutex_unlock(&kvm->lock); 524 + return r; 525 + } 526 + #endif 527 + 528 + #ifdef KVM_CAP_DEVICE_DEASSIGNMENT 529 + static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, 530 + struct kvm_assigned_pci_dev *assigned_dev) 531 + { 532 + int r = 0; 533 + struct kvm_assigned_dev_kernel *match; 534 + 535 + mutex_lock(&kvm->lock); 536 + 537 + match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 538 + assigned_dev->assigned_dev_id); 539 + if (!match) { 540 + printk(KERN_INFO "%s: device hasn't been assigned before, " 541 + "so cannot be deassigned\n", __func__); 542 + r = -EINVAL; 543 + goto out; 544 + } 545 + 546 + if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) 547 + kvm_deassign_device(kvm, match); 548 + 549 + kvm_free_assigned_device(kvm, match); 550 + 551 + out: 552 mutex_unlock(&kvm->lock); 553 return r; 554 } ··· 1853 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) 1854 goto out; 1855 r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); 1856 + if (r) 1857 + goto out; 1858 + break; 1859 + } 1860 + #endif 1861 + #ifdef KVM_CAP_DEVICE_DEASSIGNMENT 1862 + case KVM_DEASSIGN_PCI_DEVICE: { 1863 + struct kvm_assigned_pci_dev assigned_dev; 1864 + 1865 + r = -EFAULT; 1866 + if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) 1867 + goto out; 1868 + r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev); 1869 if (r) 1870 goto out; 1871 break;
+82 -59
virt/kvm/vtd.c virt/kvm/iommu.c
··· 25 #include <linux/kvm_host.h> 26 #include <linux/pci.h> 27 #include <linux/dmar.h> 28 #include <linux/intel-iommu.h> 29 30 static int kvm_iommu_unmap_memslots(struct kvm *kvm); ··· 38 gfn_t gfn = base_gfn; 39 pfn_t pfn; 40 int i, r = 0; 41 - struct dmar_domain *domain = kvm->arch.intel_iommu_domain; 42 43 /* check if iommu exists and in use */ 44 if (!domain) ··· 46 47 for (i = 0; i < npages; i++) { 48 /* check if already mapped */ 49 - pfn = (pfn_t)intel_iommu_iova_to_pfn(domain, 50 - gfn_to_gpa(gfn)); 51 - if (pfn) 52 continue; 53 54 pfn = gfn_to_pfn(kvm, gfn); 55 - r = intel_iommu_page_mapping(domain, 56 - gfn_to_gpa(gfn), 57 - pfn_to_hpa(pfn), 58 - PAGE_SIZE, 59 - DMA_PTE_READ | 60 - DMA_PTE_WRITE); 61 if (r) { 62 - printk(KERN_ERR "kvm_iommu_map_pages:" 63 "iommu failed to map pfn=%lx\n", pfn); 64 goto unmap_pages; 65 } ··· 71 72 static int kvm_iommu_map_memslots(struct kvm *kvm) 73 { 74 - int i, r; 75 76 down_read(&kvm->slots_lock); 77 for (i = 0; i < kvm->nmemslots; i++) { ··· 84 return r; 85 } 86 87 - int kvm_iommu_map_guest(struct kvm *kvm, 88 - struct kvm_assigned_dev_kernel *assigned_dev) 89 { 90 struct pci_dev *pdev = NULL; 91 int r; 92 93 - if (!intel_iommu_found()) { 94 - printk(KERN_ERR "%s: intel iommu not found\n", __func__); 95 - return -ENODEV; 96 - } 97 - 98 - printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n", 99 - assigned_dev->host_busnr, 100 - PCI_SLOT(assigned_dev->host_devfn), 101 - PCI_FUNC(assigned_dev->host_devfn)); 102 103 pdev = assigned_dev->dev; 104 105 - if (pdev == NULL) { 106 - if (kvm->arch.intel_iommu_domain) { 107 - intel_iommu_domain_exit(kvm->arch.intel_iommu_domain); 108 - kvm->arch.intel_iommu_domain = NULL; 109 - } 110 return -ENODEV; 111 } 112 113 - kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev); 114 - if (!kvm->arch.intel_iommu_domain) 115 - return -ENODEV; 116 117 r = kvm_iommu_map_memslots(kvm); 118 if (r) 119 goto out_unmap; 120 121 - intel_iommu_detach_dev(kvm->arch.intel_iommu_domain, 122 - pdev->bus->number, pdev->devfn); 123 - 124 - r = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain, 125 - pdev); 126 - if (r) { 127 - printk(KERN_ERR "Domain context map for %s failed", 128 - pci_name(pdev)); 129 - goto out_unmap; 130 - } 131 return 0; 132 133 out_unmap: ··· 165 } 166 167 static void kvm_iommu_put_pages(struct kvm *kvm, 168 - gfn_t base_gfn, unsigned long npages) 169 { 170 gfn_t gfn = base_gfn; 171 pfn_t pfn; 172 - struct dmar_domain *domain = kvm->arch.intel_iommu_domain; 173 - int i; 174 175 for (i = 0; i < npages; i++) { 176 - pfn = (pfn_t)intel_iommu_iova_to_pfn(domain, 177 - gfn_to_gpa(gfn)); 178 kvm_release_pfn_clean(pfn); 179 gfn++; 180 } 181 } 182 183 static int kvm_iommu_unmap_memslots(struct kvm *kvm) ··· 202 203 int kvm_iommu_unmap_guest(struct kvm *kvm) 204 { 205 - struct kvm_assigned_dev_kernel *entry; 206 - struct dmar_domain *domain = kvm->arch.intel_iommu_domain; 207 208 /* check if iommu exists and in use */ 209 if (!domain) 210 return 0; 211 212 - list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) { 213 - printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n", 214 - entry->host_busnr, 215 - PCI_SLOT(entry->host_devfn), 216 - PCI_FUNC(entry->host_devfn)); 217 - 218 - /* detach kvm dmar domain */ 219 - intel_iommu_detach_dev(domain, entry->host_busnr, 220 - entry->host_devfn); 221 - } 222 kvm_iommu_unmap_memslots(kvm); 223 - intel_iommu_domain_exit(domain); 224 return 0; 225 }
··· 25 #include <linux/kvm_host.h> 26 #include <linux/pci.h> 27 #include <linux/dmar.h> 28 + #include <linux/iommu.h> 29 #include <linux/intel-iommu.h> 30 31 static int kvm_iommu_unmap_memslots(struct kvm *kvm); ··· 37 gfn_t gfn = base_gfn; 38 pfn_t pfn; 39 int i, r = 0; 40 + struct iommu_domain *domain = kvm->arch.iommu_domain; 41 42 /* check if iommu exists and in use */ 43 if (!domain) ··· 45 46 for (i = 0; i < npages; i++) { 47 /* check if already mapped */ 48 + if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) 49 continue; 50 51 pfn = gfn_to_pfn(kvm, gfn); 52 + r = iommu_map_range(domain, 53 + gfn_to_gpa(gfn), 54 + pfn_to_hpa(pfn), 55 + PAGE_SIZE, 56 + IOMMU_READ | IOMMU_WRITE); 57 if (r) { 58 + printk(KERN_ERR "kvm_iommu_map_address:" 59 "iommu failed to map pfn=%lx\n", pfn); 60 goto unmap_pages; 61 } ··· 73 74 static int kvm_iommu_map_memslots(struct kvm *kvm) 75 { 76 + int i, r = 0; 77 78 down_read(&kvm->slots_lock); 79 for (i = 0; i < kvm->nmemslots; i++) { ··· 86 return r; 87 } 88 89 + int kvm_assign_device(struct kvm *kvm, 90 + struct kvm_assigned_dev_kernel *assigned_dev) 91 { 92 struct pci_dev *pdev = NULL; 93 + struct iommu_domain *domain = kvm->arch.iommu_domain; 94 int r; 95 96 + /* check if iommu exists and in use */ 97 + if (!domain) 98 + return 0; 99 100 pdev = assigned_dev->dev; 101 + if (pdev == NULL) 102 + return -ENODEV; 103 104 + r = iommu_attach_device(domain, &pdev->dev); 105 + if (r) { 106 + printk(KERN_ERR "assign device %x:%x.%x failed", 107 + pdev->bus->number, 108 + PCI_SLOT(pdev->devfn), 109 + PCI_FUNC(pdev->devfn)); 110 + return r; 111 + } 112 + 113 + printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n", 114 + assigned_dev->host_busnr, 115 + PCI_SLOT(assigned_dev->host_devfn), 116 + PCI_FUNC(assigned_dev->host_devfn)); 117 + 118 + return 0; 119 + } 120 + 121 + int kvm_deassign_device(struct kvm *kvm, 122 + struct kvm_assigned_dev_kernel *assigned_dev) 123 + { 124 + struct iommu_domain *domain = kvm->arch.iommu_domain; 125 + struct pci_dev *pdev = NULL; 126 + 127 + /* check if iommu exists and in use */ 128 + if (!domain) 129 + return 0; 130 + 131 + pdev = assigned_dev->dev; 132 + if (pdev == NULL) 133 + return -ENODEV; 134 + 135 + iommu_detach_device(domain, &pdev->dev); 136 + 137 + printk(KERN_DEBUG "deassign device: host bdf = %x:%x:%x\n", 138 + assigned_dev->host_busnr, 139 + PCI_SLOT(assigned_dev->host_devfn), 140 + PCI_FUNC(assigned_dev->host_devfn)); 141 + 142 + return 0; 143 + } 144 + 145 + int kvm_iommu_map_guest(struct kvm *kvm) 146 + { 147 + int r; 148 + 149 + if (!iommu_found()) { 150 + printk(KERN_ERR "%s: iommu not found\n", __func__); 151 return -ENODEV; 152 } 153 154 + kvm->arch.iommu_domain = iommu_domain_alloc(); 155 + if (!kvm->arch.iommu_domain) 156 + return -ENOMEM; 157 158 r = kvm_iommu_map_memslots(kvm); 159 if (r) 160 goto out_unmap; 161 162 return 0; 163 164 out_unmap: ··· 138 } 139 140 static void kvm_iommu_put_pages(struct kvm *kvm, 141 + gfn_t base_gfn, unsigned long npages) 142 { 143 gfn_t gfn = base_gfn; 144 pfn_t pfn; 145 + struct iommu_domain *domain = kvm->arch.iommu_domain; 146 + unsigned long i; 147 + u64 phys; 148 + 149 + /* check if iommu exists and in use */ 150 + if (!domain) 151 + return; 152 153 for (i = 0; i < npages; i++) { 154 + phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn)); 155 + pfn = phys >> PAGE_SHIFT; 156 kvm_release_pfn_clean(pfn); 157 gfn++; 158 } 159 + 160 + iommu_unmap_range(domain, gfn_to_gpa(base_gfn), PAGE_SIZE * npages); 161 } 162 163 static int kvm_iommu_unmap_memslots(struct kvm *kvm) ··· 168 169 int kvm_iommu_unmap_guest(struct kvm *kvm) 170 { 171 + struct iommu_domain *domain = kvm->arch.iommu_domain; 172 173 /* check if iommu exists and in use */ 174 if (!domain) 175 return 0; 176 177 kvm_iommu_unmap_memslots(kvm); 178 + iommu_domain_free(domain); 179 return 0; 180 }