Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/linux-2.6-iommu

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/linux-2.6-iommu: (89 commits)
AMD IOMMU: remove now unnecessary #ifdefs
AMD IOMMU: prealloc_protection_domains should be static
kvm/iommu: fix compile warning
AMD IOMMU: add statistics about total number of map requests
AMD IOMMU: add statistics about allocated io memory
AMD IOMMU: add stats counter for domain tlb flushes
AMD IOMMU: add stats counter for single iommu domain tlb flushes
AMD IOMMU: add stats counter for cross-page request
AMD IOMMU: add stats counter for free_coherent requests
AMD IOMMU: add stats counter for alloc_coherent requests
AMD IOMMU: add stats counter for unmap_sg requests
AMD IOMMU: add stats counter for map_sg requests
AMD IOMMU: add stats counter for unmap_single requests
AMD IOMMU: add stats counter for map_single requests
AMD IOMMU: add stats counter for completion wait events
AMD IOMMU: add init code for statistic collection
AMD IOMMU: add necessary header defines for stats counting
AMD IOMMU: add Kconfig entry for statistic collection code
AMD IOMMU: use dev_name in iommu_enable function
AMD IOMMU: use calc_devid in prealloc_protection_domains
...

+1907 -477
+3
arch/ia64/Kconfig
··· 687 687 688 688 config IOMMU_HELPER 689 689 def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC || SWIOTLB) 690 + 691 + config IOMMU_API 692 + def_bool (DMAR)
+1 -1
arch/ia64/include/asm/kvm_host.h
··· 467 467 struct kvm_sal_data rdv_sal_data; 468 468 469 469 struct list_head assigned_dev_head; 470 - struct dmar_domain *intel_iommu_domain; 470 + struct iommu_domain *iommu_domain; 471 471 struct hlist_head irq_ack_notifier_list; 472 472 473 473 unsigned long irq_sources_bitmap;
+2 -2
arch/ia64/kvm/Makefile
··· 51 51 common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ 52 52 coalesced_mmio.o irq_comm.o) 53 53 54 - ifeq ($(CONFIG_DMAR),y) 55 - common-objs += $(addprefix ../../../virt/kvm/, vtd.o) 54 + ifeq ($(CONFIG_IOMMU_API),y) 55 + common-objs += $(addprefix ../../../virt/kvm/, iommu.o) 56 56 endif 57 57 58 58 kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
+2 -1
arch/ia64/kvm/kvm-ia64.c
··· 31 31 #include <linux/bitops.h> 32 32 #include <linux/hrtimer.h> 33 33 #include <linux/uaccess.h> 34 + #include <linux/iommu.h> 34 35 #include <linux/intel-iommu.h> 35 36 36 37 #include <asm/pgtable.h> ··· 189 188 r = KVM_COALESCED_MMIO_PAGE_OFFSET; 190 189 break; 191 190 case KVM_CAP_IOMMU: 192 - r = intel_iommu_found(); 191 + r = iommu_found(); 193 192 break; 194 193 default: 195 194 r = 0;
+13
arch/x86/Kconfig
··· 586 586 your BIOS for an option to enable it or if you have an IVRS ACPI 587 587 table. 588 588 589 + config AMD_IOMMU_STATS 590 + bool "Export AMD IOMMU statistics to debugfs" 591 + depends on AMD_IOMMU 592 + select DEBUG_FS 593 + help 594 + This option enables code in the AMD IOMMU driver to collect various 595 + statistics about whats happening in the driver and exports that 596 + information to userspace via debugfs. 597 + If unsure, say N. 598 + 589 599 # need this always selected by IOMMU for the VIA workaround 590 600 config SWIOTLB 591 601 def_bool y if X86_64 ··· 608 598 609 599 config IOMMU_HELPER 610 600 def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU) 601 + 602 + config IOMMU_API 603 + def_bool (AMD_IOMMU || DMAR) 611 604 612 605 config MAXSMP 613 606 bool "Configure Maximum number of SMP Processors and NUMA Nodes"
+42 -19
arch/x86/include/asm/amd_iommu_types.h
··· 190 190 /* FIXME: move this macro to <linux/pci.h> */ 191 191 #define PCI_BUS(x) (((x) >> 8) & 0xff) 192 192 193 + /* Protection domain flags */ 194 + #define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */ 195 + #define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops 196 + domain for an IOMMU */ 197 + 193 198 /* 194 199 * This structure contains generic data for IOMMU protection domains 195 200 * independent of their use. 196 201 */ 197 202 struct protection_domain { 198 - spinlock_t lock; /* mostly used to lock the page table*/ 199 - u16 id; /* the domain id written to the device table */ 200 - int mode; /* paging mode (0-6 levels) */ 201 - u64 *pt_root; /* page table root pointer */ 202 - void *priv; /* private data */ 203 + spinlock_t lock; /* mostly used to lock the page table*/ 204 + u16 id; /* the domain id written to the device table */ 205 + int mode; /* paging mode (0-6 levels) */ 206 + u64 *pt_root; /* page table root pointer */ 207 + unsigned long flags; /* flags to find out type of domain */ 208 + unsigned dev_cnt; /* devices assigned to this domain */ 209 + void *priv; /* private data */ 203 210 }; 204 211 205 212 /* ··· 302 295 bool int_enabled; 303 296 304 297 /* if one, we need to send a completion wait command */ 305 - int need_sync; 298 + bool need_sync; 306 299 307 300 /* default dma_ops domain for that IOMMU */ 308 301 struct dma_ops_domain *default_dom; ··· 381 374 extern unsigned long *amd_iommu_pd_alloc_bitmap; 382 375 383 376 /* will be 1 if device isolation is enabled */ 384 - extern int amd_iommu_isolate; 377 + extern bool amd_iommu_isolate; 385 378 386 379 /* 387 380 * If true, the addresses will be flushed on unmap time, not when ··· 389 382 */ 390 383 extern bool amd_iommu_unmap_flush; 391 384 392 - /* takes a PCI device id and prints it out in a readable form */ 393 - static inline void print_devid(u16 devid, int nl) 394 - { 395 - int bus = devid >> 8; 396 - int dev = devid >> 3 & 0x1f; 397 - int fn = devid & 0x07; 398 - 399 - printk("%02x:%02x.%x", bus, dev, fn); 400 - if (nl) 401 - printk("\n"); 402 - } 403 - 404 385 /* takes bus and device/function and returns the device id 405 386 * FIXME: should that be in generic PCI code? */ 406 387 static inline u16 calc_devid(u8 bus, u8 devfn) 407 388 { 408 389 return (((u16)bus) << 8) | devfn; 409 390 } 391 + 392 + #ifdef CONFIG_AMD_IOMMU_STATS 393 + 394 + struct __iommu_counter { 395 + char *name; 396 + struct dentry *dent; 397 + u64 value; 398 + }; 399 + 400 + #define DECLARE_STATS_COUNTER(nm) \ 401 + static struct __iommu_counter nm = { \ 402 + .name = #nm, \ 403 + } 404 + 405 + #define INC_STATS_COUNTER(name) name.value += 1 406 + #define ADD_STATS_COUNTER(name, x) name.value += (x) 407 + #define SUB_STATS_COUNTER(name, x) name.value -= (x) 408 + 409 + #else /* CONFIG_AMD_IOMMU_STATS */ 410 + 411 + #define DECLARE_STATS_COUNTER(name) 412 + #define INC_STATS_COUNTER(name) 413 + #define ADD_STATS_COUNTER(name, x) 414 + #define SUB_STATS_COUNTER(name, x) 415 + 416 + static inline void amd_iommu_stats_init(void) { } 417 + 418 + #endif /* CONFIG_AMD_IOMMU_STATS */ 410 419 411 420 #endif /* _ASM_X86_AMD_IOMMU_TYPES_H */
+1 -1
arch/x86/include/asm/kvm_host.h
··· 360 360 struct list_head active_mmu_pages; 361 361 struct list_head assigned_dev_head; 362 362 struct list_head oos_global_pages; 363 - struct dmar_domain *intel_iommu_domain; 363 + struct iommu_domain *iommu_domain; 364 364 struct kvm_pic *vpic; 365 365 struct kvm_ioapic *vioapic; 366 366 struct kvm_pit *vpit;
+607 -59
arch/x86/kernel/amd_iommu.c
··· 20 20 #include <linux/pci.h> 21 21 #include <linux/gfp.h> 22 22 #include <linux/bitops.h> 23 + #include <linux/debugfs.h> 23 24 #include <linux/scatterlist.h> 24 25 #include <linux/iommu-helper.h> 26 + #ifdef CONFIG_IOMMU_API 27 + #include <linux/iommu.h> 28 + #endif 25 29 #include <asm/proto.h> 26 30 #include <asm/iommu.h> 27 31 #include <asm/gart.h> ··· 42 38 static LIST_HEAD(iommu_pd_list); 43 39 static DEFINE_SPINLOCK(iommu_pd_list_lock); 44 40 41 + #ifdef CONFIG_IOMMU_API 42 + static struct iommu_ops amd_iommu_ops; 43 + #endif 44 + 45 45 /* 46 46 * general struct to manage commands send to an IOMMU 47 47 */ ··· 55 47 56 48 static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, 57 49 struct unity_map_entry *e); 50 + static struct dma_ops_domain *find_protection_domain(u16 devid); 51 + 52 + 53 + #ifdef CONFIG_AMD_IOMMU_STATS 54 + 55 + /* 56 + * Initialization code for statistics collection 57 + */ 58 + 59 + DECLARE_STATS_COUNTER(compl_wait); 60 + DECLARE_STATS_COUNTER(cnt_map_single); 61 + DECLARE_STATS_COUNTER(cnt_unmap_single); 62 + DECLARE_STATS_COUNTER(cnt_map_sg); 63 + DECLARE_STATS_COUNTER(cnt_unmap_sg); 64 + DECLARE_STATS_COUNTER(cnt_alloc_coherent); 65 + DECLARE_STATS_COUNTER(cnt_free_coherent); 66 + DECLARE_STATS_COUNTER(cross_page); 67 + DECLARE_STATS_COUNTER(domain_flush_single); 68 + DECLARE_STATS_COUNTER(domain_flush_all); 69 + DECLARE_STATS_COUNTER(alloced_io_mem); 70 + DECLARE_STATS_COUNTER(total_map_requests); 71 + 72 + static struct dentry *stats_dir; 73 + static struct dentry *de_isolate; 74 + static struct dentry *de_fflush; 75 + 76 + static void amd_iommu_stats_add(struct __iommu_counter *cnt) 77 + { 78 + if (stats_dir == NULL) 79 + return; 80 + 81 + cnt->dent = debugfs_create_u64(cnt->name, 0444, stats_dir, 82 + &cnt->value); 83 + } 84 + 85 + static void amd_iommu_stats_init(void) 86 + { 87 + stats_dir = debugfs_create_dir("amd-iommu", NULL); 88 + if (stats_dir == NULL) 89 + return; 90 + 91 + de_isolate = debugfs_create_bool("isolation", 0444, stats_dir, 92 + (u32 *)&amd_iommu_isolate); 93 + 94 + de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir, 95 + (u32 *)&amd_iommu_unmap_flush); 96 + 97 + amd_iommu_stats_add(&compl_wait); 98 + amd_iommu_stats_add(&cnt_map_single); 99 + amd_iommu_stats_add(&cnt_unmap_single); 100 + amd_iommu_stats_add(&cnt_map_sg); 101 + amd_iommu_stats_add(&cnt_unmap_sg); 102 + amd_iommu_stats_add(&cnt_alloc_coherent); 103 + amd_iommu_stats_add(&cnt_free_coherent); 104 + amd_iommu_stats_add(&cross_page); 105 + amd_iommu_stats_add(&domain_flush_single); 106 + amd_iommu_stats_add(&domain_flush_all); 107 + amd_iommu_stats_add(&alloced_io_mem); 108 + amd_iommu_stats_add(&total_map_requests); 109 + } 110 + 111 + #endif 58 112 59 113 /* returns !0 if the IOMMU is caching non-present entries in its TLB */ 60 114 static int iommu_has_npcache(struct amd_iommu *iommu) ··· 259 189 spin_lock_irqsave(&iommu->lock, flags); 260 190 ret = __iommu_queue_command(iommu, cmd); 261 191 if (!ret) 262 - iommu->need_sync = 1; 192 + iommu->need_sync = true; 263 193 spin_unlock_irqrestore(&iommu->lock, flags); 264 194 265 195 return ret; 266 196 } 267 197 268 198 /* 269 - * This function is called whenever we need to ensure that the IOMMU has 270 - * completed execution of all commands we sent. It sends a 271 - * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs 272 - * us about that by writing a value to a physical address we pass with 273 - * the command. 199 + * This function waits until an IOMMU has completed a completion 200 + * wait command 274 201 */ 275 - static int iommu_completion_wait(struct amd_iommu *iommu) 202 + static void __iommu_wait_for_completion(struct amd_iommu *iommu) 276 203 { 277 - int ret = 0, ready = 0; 204 + int ready = 0; 278 205 unsigned status = 0; 279 - struct iommu_cmd cmd; 280 - unsigned long flags, i = 0; 206 + unsigned long i = 0; 281 207 282 - memset(&cmd, 0, sizeof(cmd)); 283 - cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; 284 - CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); 285 - 286 - spin_lock_irqsave(&iommu->lock, flags); 287 - 288 - if (!iommu->need_sync) 289 - goto out; 290 - 291 - iommu->need_sync = 0; 292 - 293 - ret = __iommu_queue_command(iommu, &cmd); 294 - 295 - if (ret) 296 - goto out; 208 + INC_STATS_COUNTER(compl_wait); 297 209 298 210 while (!ready && (i < EXIT_LOOP_COUNT)) { 299 211 ++i; ··· 290 238 291 239 if (unlikely(i == EXIT_LOOP_COUNT)) 292 240 panic("AMD IOMMU: Completion wait loop failed\n"); 241 + } 242 + 243 + /* 244 + * This function queues a completion wait command into the command 245 + * buffer of an IOMMU 246 + */ 247 + static int __iommu_completion_wait(struct amd_iommu *iommu) 248 + { 249 + struct iommu_cmd cmd; 250 + 251 + memset(&cmd, 0, sizeof(cmd)); 252 + cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; 253 + CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); 254 + 255 + return __iommu_queue_command(iommu, &cmd); 256 + } 257 + 258 + /* 259 + * This function is called whenever we need to ensure that the IOMMU has 260 + * completed execution of all commands we sent. It sends a 261 + * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs 262 + * us about that by writing a value to a physical address we pass with 263 + * the command. 264 + */ 265 + static int iommu_completion_wait(struct amd_iommu *iommu) 266 + { 267 + int ret = 0; 268 + unsigned long flags; 269 + 270 + spin_lock_irqsave(&iommu->lock, flags); 271 + 272 + if (!iommu->need_sync) 273 + goto out; 274 + 275 + ret = __iommu_completion_wait(iommu); 276 + 277 + iommu->need_sync = false; 278 + 279 + if (ret) 280 + goto out; 281 + 282 + __iommu_wait_for_completion(iommu); 293 283 294 284 out: 295 285 spin_unlock_irqrestore(&iommu->lock, flags); ··· 358 264 return ret; 359 265 } 360 266 267 + static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, 268 + u16 domid, int pde, int s) 269 + { 270 + memset(cmd, 0, sizeof(*cmd)); 271 + address &= PAGE_MASK; 272 + CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES); 273 + cmd->data[1] |= domid; 274 + cmd->data[2] = lower_32_bits(address); 275 + cmd->data[3] = upper_32_bits(address); 276 + if (s) /* size bit - we flush more than one 4kb page */ 277 + cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; 278 + if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ 279 + cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; 280 + } 281 + 361 282 /* 362 283 * Generic command send function for invalidaing TLB entries 363 284 */ ··· 382 273 struct iommu_cmd cmd; 383 274 int ret; 384 275 385 - memset(&cmd, 0, sizeof(cmd)); 386 - address &= PAGE_MASK; 387 - CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES); 388 - cmd.data[1] |= domid; 389 - cmd.data[2] = lower_32_bits(address); 390 - cmd.data[3] = upper_32_bits(address); 391 - if (s) /* size bit - we flush more than one 4kb page */ 392 - cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; 393 - if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ 394 - cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; 276 + __iommu_build_inv_iommu_pages(&cmd, address, domid, pde, s); 395 277 396 278 ret = iommu_queue_command(iommu, &cmd); 397 279 ··· 421 321 { 422 322 u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; 423 323 324 + INC_STATS_COUNTER(domain_flush_single); 325 + 424 326 iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); 327 + } 328 + 329 + /* 330 + * This function is used to flush the IO/TLB for a given protection domain 331 + * on every IOMMU in the system 332 + */ 333 + static void iommu_flush_domain(u16 domid) 334 + { 335 + unsigned long flags; 336 + struct amd_iommu *iommu; 337 + struct iommu_cmd cmd; 338 + 339 + INC_STATS_COUNTER(domain_flush_all); 340 + 341 + __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 342 + domid, 1, 1); 343 + 344 + list_for_each_entry(iommu, &amd_iommu_list, list) { 345 + spin_lock_irqsave(&iommu->lock, flags); 346 + __iommu_queue_command(iommu, &cmd); 347 + __iommu_completion_wait(iommu); 348 + __iommu_wait_for_completion(iommu); 349 + spin_unlock_irqrestore(&iommu->lock, flags); 350 + } 425 351 } 426 352 427 353 /**************************************************************************** ··· 464 338 * supporting all features of AMD IOMMU page tables like level skipping 465 339 * and full 64 bit address spaces. 466 340 */ 467 - static int iommu_map(struct protection_domain *dom, 468 - unsigned long bus_addr, 469 - unsigned long phys_addr, 470 - int prot) 341 + static int iommu_map_page(struct protection_domain *dom, 342 + unsigned long bus_addr, 343 + unsigned long phys_addr, 344 + int prot) 471 345 { 472 346 u64 __pte, *pte, *page; 473 347 ··· 512 386 *pte = __pte; 513 387 514 388 return 0; 389 + } 390 + 391 + static void iommu_unmap_page(struct protection_domain *dom, 392 + unsigned long bus_addr) 393 + { 394 + u64 *pte; 395 + 396 + pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)]; 397 + 398 + if (!IOMMU_PTE_PRESENT(*pte)) 399 + return; 400 + 401 + pte = IOMMU_PTE_PAGE(*pte); 402 + pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)]; 403 + 404 + if (!IOMMU_PTE_PRESENT(*pte)) 405 + return; 406 + 407 + pte = IOMMU_PTE_PAGE(*pte); 408 + pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)]; 409 + 410 + *pte = 0; 515 411 } 516 412 517 413 /* ··· 588 440 589 441 for (addr = e->address_start; addr < e->address_end; 590 442 addr += PAGE_SIZE) { 591 - ret = iommu_map(&dma_dom->domain, addr, addr, e->prot); 443 + ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot); 592 444 if (ret) 593 445 return ret; 594 446 /* ··· 719 571 return id; 720 572 } 721 573 574 + static void domain_id_free(int id) 575 + { 576 + unsigned long flags; 577 + 578 + write_lock_irqsave(&amd_iommu_devtable_lock, flags); 579 + if (id > 0 && id < MAX_DOMAIN_ID) 580 + __clear_bit(id, amd_iommu_pd_alloc_bitmap); 581 + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 582 + } 583 + 722 584 /* 723 585 * Used to reserve address ranges in the aperture (e.g. for exclusion 724 586 * ranges. ··· 745 587 iommu_area_reserve(dom->bitmap, start_page, pages); 746 588 } 747 589 748 - static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) 590 + static void free_pagetable(struct protection_domain *domain) 749 591 { 750 592 int i, j; 751 593 u64 *p1, *p2, *p3; 752 594 753 - p1 = dma_dom->domain.pt_root; 595 + p1 = domain->pt_root; 754 596 755 597 if (!p1) 756 598 return; ··· 771 613 } 772 614 773 615 free_page((unsigned long)p1); 616 + 617 + domain->pt_root = NULL; 774 618 } 775 619 776 620 /* ··· 784 624 if (!dom) 785 625 return; 786 626 787 - dma_ops_free_pagetable(dom); 627 + free_pagetable(&dom->domain); 788 628 789 629 kfree(dom->pte_pages); 790 630 ··· 823 663 goto free_dma_dom; 824 664 dma_dom->domain.mode = PAGE_MODE_3_LEVEL; 825 665 dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); 666 + dma_dom->domain.flags = PD_DMA_OPS_MASK; 826 667 dma_dom->domain.priv = dma_dom; 827 668 if (!dma_dom->domain.pt_root) 828 669 goto free_dma_dom; ··· 886 725 } 887 726 888 727 /* 728 + * little helper function to check whether a given protection domain is a 729 + * dma_ops domain 730 + */ 731 + static bool dma_ops_domain(struct protection_domain *domain) 732 + { 733 + return domain->flags & PD_DMA_OPS_MASK; 734 + } 735 + 736 + /* 889 737 * Find out the protection domain structure for a given PCI device. This 890 738 * will give us the pointer to the page table root for example. 891 739 */ ··· 914 744 * If a device is not yet associated with a domain, this function does 915 745 * assigns it visible for the hardware 916 746 */ 917 - static void set_device_domain(struct amd_iommu *iommu, 918 - struct protection_domain *domain, 919 - u16 devid) 747 + static void attach_device(struct amd_iommu *iommu, 748 + struct protection_domain *domain, 749 + u16 devid) 920 750 { 921 751 unsigned long flags; 922 - 923 752 u64 pte_root = virt_to_phys(domain->pt_root); 753 + 754 + domain->dev_cnt += 1; 924 755 925 756 pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) 926 757 << DEV_ENTRY_MODE_SHIFT; ··· 937 766 938 767 iommu_queue_inv_dev_entry(iommu, devid); 939 768 } 769 + 770 + /* 771 + * Removes a device from a protection domain (unlocked) 772 + */ 773 + static void __detach_device(struct protection_domain *domain, u16 devid) 774 + { 775 + 776 + /* lock domain */ 777 + spin_lock(&domain->lock); 778 + 779 + /* remove domain from the lookup table */ 780 + amd_iommu_pd_table[devid] = NULL; 781 + 782 + /* remove entry from the device table seen by the hardware */ 783 + amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; 784 + amd_iommu_dev_table[devid].data[1] = 0; 785 + amd_iommu_dev_table[devid].data[2] = 0; 786 + 787 + /* decrease reference counter */ 788 + domain->dev_cnt -= 1; 789 + 790 + /* ready */ 791 + spin_unlock(&domain->lock); 792 + } 793 + 794 + /* 795 + * Removes a device from a protection domain (with devtable_lock held) 796 + */ 797 + static void detach_device(struct protection_domain *domain, u16 devid) 798 + { 799 + unsigned long flags; 800 + 801 + /* lock device table */ 802 + write_lock_irqsave(&amd_iommu_devtable_lock, flags); 803 + __detach_device(domain, devid); 804 + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 805 + } 806 + 807 + static int device_change_notifier(struct notifier_block *nb, 808 + unsigned long action, void *data) 809 + { 810 + struct device *dev = data; 811 + struct pci_dev *pdev = to_pci_dev(dev); 812 + u16 devid = calc_devid(pdev->bus->number, pdev->devfn); 813 + struct protection_domain *domain; 814 + struct dma_ops_domain *dma_domain; 815 + struct amd_iommu *iommu; 816 + int order = amd_iommu_aperture_order; 817 + unsigned long flags; 818 + 819 + if (devid > amd_iommu_last_bdf) 820 + goto out; 821 + 822 + devid = amd_iommu_alias_table[devid]; 823 + 824 + iommu = amd_iommu_rlookup_table[devid]; 825 + if (iommu == NULL) 826 + goto out; 827 + 828 + domain = domain_for_device(devid); 829 + 830 + if (domain && !dma_ops_domain(domain)) 831 + WARN_ONCE(1, "AMD IOMMU WARNING: device %s already bound " 832 + "to a non-dma-ops domain\n", dev_name(dev)); 833 + 834 + switch (action) { 835 + case BUS_NOTIFY_BOUND_DRIVER: 836 + if (domain) 837 + goto out; 838 + dma_domain = find_protection_domain(devid); 839 + if (!dma_domain) 840 + dma_domain = iommu->default_dom; 841 + attach_device(iommu, &dma_domain->domain, devid); 842 + printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " 843 + "device %s\n", dma_domain->domain.id, dev_name(dev)); 844 + break; 845 + case BUS_NOTIFY_UNBIND_DRIVER: 846 + if (!domain) 847 + goto out; 848 + detach_device(domain, devid); 849 + break; 850 + case BUS_NOTIFY_ADD_DEVICE: 851 + /* allocate a protection domain if a device is added */ 852 + dma_domain = find_protection_domain(devid); 853 + if (dma_domain) 854 + goto out; 855 + dma_domain = dma_ops_domain_alloc(iommu, order); 856 + if (!dma_domain) 857 + goto out; 858 + dma_domain->target_dev = devid; 859 + 860 + spin_lock_irqsave(&iommu_pd_list_lock, flags); 861 + list_add_tail(&dma_domain->list, &iommu_pd_list); 862 + spin_unlock_irqrestore(&iommu_pd_list_lock, flags); 863 + 864 + break; 865 + default: 866 + goto out; 867 + } 868 + 869 + iommu_queue_inv_dev_entry(iommu, devid); 870 + iommu_completion_wait(iommu); 871 + 872 + out: 873 + return 0; 874 + } 875 + 876 + struct notifier_block device_nb = { 877 + .notifier_call = device_change_notifier, 878 + }; 940 879 941 880 /***************************************************************************** 942 881 * ··· 1083 802 list_for_each_entry(entry, &iommu_pd_list, list) { 1084 803 if (entry->target_dev == devid) { 1085 804 ret = entry; 1086 - list_del(&ret->list); 1087 805 break; 1088 806 } 1089 807 } ··· 1133 853 if (!dma_dom) 1134 854 dma_dom = (*iommu)->default_dom; 1135 855 *domain = &dma_dom->domain; 1136 - set_device_domain(*iommu, *domain, *bdf); 856 + attach_device(*iommu, *domain, *bdf); 1137 857 printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " 1138 - "device ", (*domain)->id); 1139 - print_devid(_bdf, 1); 858 + "device %s\n", (*domain)->id, dev_name(dev)); 1140 859 } 1141 860 1142 861 if (domain_for_device(_bdf) == NULL) 1143 - set_device_domain(*iommu, *domain, _bdf); 862 + attach_device(*iommu, *domain, _bdf); 1144 863 1145 864 return 1; 1146 865 } ··· 1225 946 pages = iommu_num_pages(paddr, size, PAGE_SIZE); 1226 947 paddr &= PAGE_MASK; 1227 948 949 + INC_STATS_COUNTER(total_map_requests); 950 + 951 + if (pages > 1) 952 + INC_STATS_COUNTER(cross_page); 953 + 1228 954 if (align) 1229 955 align_mask = (1UL << get_order(size)) - 1; 1230 956 ··· 1245 961 start += PAGE_SIZE; 1246 962 } 1247 963 address += offset; 964 + 965 + ADD_STATS_COUNTER(alloced_io_mem, size); 1248 966 1249 967 if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { 1250 968 iommu_flush_tlb(iommu, dma_dom->domain.id); ··· 1284 998 start += PAGE_SIZE; 1285 999 } 1286 1000 1001 + SUB_STATS_COUNTER(alloced_io_mem, size); 1002 + 1287 1003 dma_ops_free_addresses(dma_dom, dma_addr, pages); 1288 1004 1289 1005 if (amd_iommu_unmap_flush || dma_dom->need_flush) { ··· 1307 1019 dma_addr_t addr; 1308 1020 u64 dma_mask; 1309 1021 1022 + INC_STATS_COUNTER(cnt_map_single); 1023 + 1310 1024 if (!check_device(dev)) 1311 1025 return bad_dma_address; 1312 1026 ··· 1319 1029 if (iommu == NULL || domain == NULL) 1320 1030 /* device not handled by any AMD IOMMU */ 1321 1031 return (dma_addr_t)paddr; 1032 + 1033 + if (!dma_ops_domain(domain)) 1034 + return bad_dma_address; 1322 1035 1323 1036 spin_lock_irqsave(&domain->lock, flags); 1324 1037 addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, ··· 1348 1055 struct protection_domain *domain; 1349 1056 u16 devid; 1350 1057 1058 + INC_STATS_COUNTER(cnt_unmap_single); 1059 + 1351 1060 if (!check_device(dev) || 1352 1061 !get_device_resources(dev, &iommu, &domain, &devid)) 1353 1062 /* device not handled by any AMD IOMMU */ 1063 + return; 1064 + 1065 + if (!dma_ops_domain(domain)) 1354 1066 return; 1355 1067 1356 1068 spin_lock_irqsave(&domain->lock, flags); ··· 1402 1104 int mapped_elems = 0; 1403 1105 u64 dma_mask; 1404 1106 1107 + INC_STATS_COUNTER(cnt_map_sg); 1108 + 1405 1109 if (!check_device(dev)) 1406 1110 return 0; 1407 1111 ··· 1413 1113 1414 1114 if (!iommu || !domain) 1415 1115 return map_sg_no_iommu(dev, sglist, nelems, dir); 1116 + 1117 + if (!dma_ops_domain(domain)) 1118 + return 0; 1416 1119 1417 1120 spin_lock_irqsave(&domain->lock, flags); 1418 1121 ··· 1466 1163 u16 devid; 1467 1164 int i; 1468 1165 1166 + INC_STATS_COUNTER(cnt_unmap_sg); 1167 + 1469 1168 if (!check_device(dev) || 1470 1169 !get_device_resources(dev, &iommu, &domain, &devid)) 1170 + return; 1171 + 1172 + if (!dma_ops_domain(domain)) 1471 1173 return; 1472 1174 1473 1175 spin_lock_irqsave(&domain->lock, flags); ··· 1502 1194 phys_addr_t paddr; 1503 1195 u64 dma_mask = dev->coherent_dma_mask; 1504 1196 1197 + INC_STATS_COUNTER(cnt_alloc_coherent); 1198 + 1505 1199 if (!check_device(dev)) 1506 1200 return NULL; 1507 1201 ··· 1522 1212 return virt_addr; 1523 1213 } 1524 1214 1215 + if (!dma_ops_domain(domain)) 1216 + goto out_free; 1217 + 1525 1218 if (!dma_mask) 1526 1219 dma_mask = *dev->dma_mask; 1527 1220 ··· 1533 1220 *dma_addr = __map_single(dev, iommu, domain->priv, paddr, 1534 1221 size, DMA_BIDIRECTIONAL, true, dma_mask); 1535 1222 1536 - if (*dma_addr == bad_dma_address) { 1537 - free_pages((unsigned long)virt_addr, get_order(size)); 1538 - virt_addr = NULL; 1539 - goto out; 1540 - } 1223 + if (*dma_addr == bad_dma_address) 1224 + goto out_free; 1541 1225 1542 1226 iommu_completion_wait(iommu); 1543 1227 1544 - out: 1545 1228 spin_unlock_irqrestore(&domain->lock, flags); 1546 1229 1547 1230 return virt_addr; 1231 + 1232 + out_free: 1233 + 1234 + free_pages((unsigned long)virt_addr, get_order(size)); 1235 + 1236 + return NULL; 1548 1237 } 1549 1238 1550 1239 /* ··· 1560 1245 struct protection_domain *domain; 1561 1246 u16 devid; 1562 1247 1248 + INC_STATS_COUNTER(cnt_free_coherent); 1249 + 1563 1250 if (!check_device(dev)) 1564 1251 return; 1565 1252 1566 1253 get_device_resources(dev, &iommu, &domain, &devid); 1567 1254 1568 1255 if (!iommu || !domain) 1256 + goto free_mem; 1257 + 1258 + if (!dma_ops_domain(domain)) 1569 1259 goto free_mem; 1570 1260 1571 1261 spin_lock_irqsave(&domain->lock, flags); ··· 1625 1305 u16 devid; 1626 1306 1627 1307 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { 1628 - devid = (dev->bus->number << 8) | dev->devfn; 1308 + devid = calc_devid(dev->bus->number, dev->devfn); 1629 1309 if (devid > amd_iommu_last_bdf) 1630 1310 continue; 1631 1311 devid = amd_iommu_alias_table[devid]; ··· 1672 1352 iommu->default_dom = dma_ops_domain_alloc(iommu, order); 1673 1353 if (iommu->default_dom == NULL) 1674 1354 return -ENOMEM; 1355 + iommu->default_dom->domain.flags |= PD_DEFAULT_MASK; 1675 1356 ret = iommu_init_unity_mappings(iommu); 1676 1357 if (ret) 1677 1358 goto free_domains; ··· 1696 1375 /* Make the driver finally visible to the drivers */ 1697 1376 dma_ops = &amd_iommu_dma_ops; 1698 1377 1378 + register_iommu(&amd_iommu_ops); 1379 + 1380 + bus_register_notifier(&pci_bus_type, &device_nb); 1381 + 1382 + amd_iommu_stats_init(); 1383 + 1699 1384 return 0; 1700 1385 1701 1386 free_domains: ··· 1713 1386 1714 1387 return ret; 1715 1388 } 1389 + 1390 + /***************************************************************************** 1391 + * 1392 + * The following functions belong to the exported interface of AMD IOMMU 1393 + * 1394 + * This interface allows access to lower level functions of the IOMMU 1395 + * like protection domain handling and assignement of devices to domains 1396 + * which is not possible with the dma_ops interface. 1397 + * 1398 + *****************************************************************************/ 1399 + 1400 + static void cleanup_domain(struct protection_domain *domain) 1401 + { 1402 + unsigned long flags; 1403 + u16 devid; 1404 + 1405 + write_lock_irqsave(&amd_iommu_devtable_lock, flags); 1406 + 1407 + for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) 1408 + if (amd_iommu_pd_table[devid] == domain) 1409 + __detach_device(domain, devid); 1410 + 1411 + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 1412 + } 1413 + 1414 + static int amd_iommu_domain_init(struct iommu_domain *dom) 1415 + { 1416 + struct protection_domain *domain; 1417 + 1418 + domain = kzalloc(sizeof(*domain), GFP_KERNEL); 1419 + if (!domain) 1420 + return -ENOMEM; 1421 + 1422 + spin_lock_init(&domain->lock); 1423 + domain->mode = PAGE_MODE_3_LEVEL; 1424 + domain->id = domain_id_alloc(); 1425 + if (!domain->id) 1426 + goto out_free; 1427 + domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); 1428 + if (!domain->pt_root) 1429 + goto out_free; 1430 + 1431 + dom->priv = domain; 1432 + 1433 + return 0; 1434 + 1435 + out_free: 1436 + kfree(domain); 1437 + 1438 + return -ENOMEM; 1439 + } 1440 + 1441 + static void amd_iommu_domain_destroy(struct iommu_domain *dom) 1442 + { 1443 + struct protection_domain *domain = dom->priv; 1444 + 1445 + if (!domain) 1446 + return; 1447 + 1448 + if (domain->dev_cnt > 0) 1449 + cleanup_domain(domain); 1450 + 1451 + BUG_ON(domain->dev_cnt != 0); 1452 + 1453 + free_pagetable(domain); 1454 + 1455 + domain_id_free(domain->id); 1456 + 1457 + kfree(domain); 1458 + 1459 + dom->priv = NULL; 1460 + } 1461 + 1462 + static void amd_iommu_detach_device(struct iommu_domain *dom, 1463 + struct device *dev) 1464 + { 1465 + struct protection_domain *domain = dom->priv; 1466 + struct amd_iommu *iommu; 1467 + struct pci_dev *pdev; 1468 + u16 devid; 1469 + 1470 + if (dev->bus != &pci_bus_type) 1471 + return; 1472 + 1473 + pdev = to_pci_dev(dev); 1474 + 1475 + devid = calc_devid(pdev->bus->number, pdev->devfn); 1476 + 1477 + if (devid > 0) 1478 + detach_device(domain, devid); 1479 + 1480 + iommu = amd_iommu_rlookup_table[devid]; 1481 + if (!iommu) 1482 + return; 1483 + 1484 + iommu_queue_inv_dev_entry(iommu, devid); 1485 + iommu_completion_wait(iommu); 1486 + } 1487 + 1488 + static int amd_iommu_attach_device(struct iommu_domain *dom, 1489 + struct device *dev) 1490 + { 1491 + struct protection_domain *domain = dom->priv; 1492 + struct protection_domain *old_domain; 1493 + struct amd_iommu *iommu; 1494 + struct pci_dev *pdev; 1495 + u16 devid; 1496 + 1497 + if (dev->bus != &pci_bus_type) 1498 + return -EINVAL; 1499 + 1500 + pdev = to_pci_dev(dev); 1501 + 1502 + devid = calc_devid(pdev->bus->number, pdev->devfn); 1503 + 1504 + if (devid >= amd_iommu_last_bdf || 1505 + devid != amd_iommu_alias_table[devid]) 1506 + return -EINVAL; 1507 + 1508 + iommu = amd_iommu_rlookup_table[devid]; 1509 + if (!iommu) 1510 + return -EINVAL; 1511 + 1512 + old_domain = domain_for_device(devid); 1513 + if (old_domain) 1514 + return -EBUSY; 1515 + 1516 + attach_device(iommu, domain, devid); 1517 + 1518 + iommu_completion_wait(iommu); 1519 + 1520 + return 0; 1521 + } 1522 + 1523 + static int amd_iommu_map_range(struct iommu_domain *dom, 1524 + unsigned long iova, phys_addr_t paddr, 1525 + size_t size, int iommu_prot) 1526 + { 1527 + struct protection_domain *domain = dom->priv; 1528 + unsigned long i, npages = iommu_num_pages(paddr, size, PAGE_SIZE); 1529 + int prot = 0; 1530 + int ret; 1531 + 1532 + if (iommu_prot & IOMMU_READ) 1533 + prot |= IOMMU_PROT_IR; 1534 + if (iommu_prot & IOMMU_WRITE) 1535 + prot |= IOMMU_PROT_IW; 1536 + 1537 + iova &= PAGE_MASK; 1538 + paddr &= PAGE_MASK; 1539 + 1540 + for (i = 0; i < npages; ++i) { 1541 + ret = iommu_map_page(domain, iova, paddr, prot); 1542 + if (ret) 1543 + return ret; 1544 + 1545 + iova += PAGE_SIZE; 1546 + paddr += PAGE_SIZE; 1547 + } 1548 + 1549 + return 0; 1550 + } 1551 + 1552 + static void amd_iommu_unmap_range(struct iommu_domain *dom, 1553 + unsigned long iova, size_t size) 1554 + { 1555 + 1556 + struct protection_domain *domain = dom->priv; 1557 + unsigned long i, npages = iommu_num_pages(iova, size, PAGE_SIZE); 1558 + 1559 + iova &= PAGE_MASK; 1560 + 1561 + for (i = 0; i < npages; ++i) { 1562 + iommu_unmap_page(domain, iova); 1563 + iova += PAGE_SIZE; 1564 + } 1565 + 1566 + iommu_flush_domain(domain->id); 1567 + } 1568 + 1569 + static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, 1570 + unsigned long iova) 1571 + { 1572 + struct protection_domain *domain = dom->priv; 1573 + unsigned long offset = iova & ~PAGE_MASK; 1574 + phys_addr_t paddr; 1575 + u64 *pte; 1576 + 1577 + pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(iova)]; 1578 + 1579 + if (!IOMMU_PTE_PRESENT(*pte)) 1580 + return 0; 1581 + 1582 + pte = IOMMU_PTE_PAGE(*pte); 1583 + pte = &pte[IOMMU_PTE_L1_INDEX(iova)]; 1584 + 1585 + if (!IOMMU_PTE_PRESENT(*pte)) 1586 + return 0; 1587 + 1588 + pte = IOMMU_PTE_PAGE(*pte); 1589 + pte = &pte[IOMMU_PTE_L0_INDEX(iova)]; 1590 + 1591 + if (!IOMMU_PTE_PRESENT(*pte)) 1592 + return 0; 1593 + 1594 + paddr = *pte & IOMMU_PAGE_MASK; 1595 + paddr |= offset; 1596 + 1597 + return paddr; 1598 + } 1599 + 1600 + static struct iommu_ops amd_iommu_ops = { 1601 + .domain_init = amd_iommu_domain_init, 1602 + .domain_destroy = amd_iommu_domain_destroy, 1603 + .attach_dev = amd_iommu_attach_device, 1604 + .detach_dev = amd_iommu_detach_device, 1605 + .map = amd_iommu_map_range, 1606 + .unmap = amd_iommu_unmap_range, 1607 + .iova_to_phys = amd_iommu_iova_to_phys, 1608 + }; 1609 +
+6 -9
arch/x86/kernel/amd_iommu_init.c
··· 122 122 LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings 123 123 we find in ACPI */ 124 124 unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ 125 - int amd_iommu_isolate = 1; /* if 1, device isolation is enabled */ 125 + bool amd_iommu_isolate = true; /* if true, device isolation is 126 + enabled */ 126 127 bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ 127 128 128 129 LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the ··· 246 245 /* Function to enable the hardware */ 247 246 static void __init iommu_enable(struct amd_iommu *iommu) 248 247 { 249 - printk(KERN_INFO "AMD IOMMU: Enabling IOMMU " 250 - "at %02x:%02x.%x cap 0x%hx\n", 251 - iommu->dev->bus->number, 252 - PCI_SLOT(iommu->dev->devfn), 253 - PCI_FUNC(iommu->dev->devfn), 254 - iommu->cap_ptr); 248 + printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n", 249 + dev_name(&iommu->dev->dev), iommu->cap_ptr); 255 250 256 251 iommu_feature_enable(iommu, CONTROL_IOMMU_EN); 257 252 } ··· 1215 1218 { 1216 1219 for (; *str; ++str) { 1217 1220 if (strncmp(str, "isolate", 7) == 0) 1218 - amd_iommu_isolate = 1; 1221 + amd_iommu_isolate = true; 1219 1222 if (strncmp(str, "share", 5) == 0) 1220 - amd_iommu_isolate = 0; 1223 + amd_iommu_isolate = false; 1221 1224 if (strncmp(str, "fullflush", 9) == 0) 1222 1225 amd_iommu_unmap_flush = true; 1223 1226 }
+2 -2
arch/x86/kvm/Makefile
··· 7 7 ifeq ($(CONFIG_KVM_TRACE),y) 8 8 common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o) 9 9 endif 10 - ifeq ($(CONFIG_DMAR),y) 11 - common-objs += $(addprefix ../../../virt/kvm/, vtd.o) 10 + ifeq ($(CONFIG_IOMMU_API),y) 11 + common-objs += $(addprefix ../../../virt/kvm/, iommu.o) 12 12 endif 13 13 14 14 EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
+2 -1
arch/x86/kvm/x86.c
··· 34 34 #include <linux/module.h> 35 35 #include <linux/mman.h> 36 36 #include <linux/highmem.h> 37 + #include <linux/iommu.h> 37 38 #include <linux/intel-iommu.h> 38 39 39 40 #include <asm/uaccess.h> ··· 990 989 r = !tdp_enabled; 991 990 break; 992 991 case KVM_CAP_IOMMU: 993 - r = intel_iommu_found(); 992 + r = iommu_found(); 994 993 break; 995 994 default: 996 995 r = 0;
+1
drivers/base/Makefile
··· 11 11 obj-$(CONFIG_NUMA) += node.o 12 12 obj-$(CONFIG_MEMORY_HOTPLUG_SPARSE) += memory.o 13 13 obj-$(CONFIG_SMP) += topology.o 14 + obj-$(CONFIG_IOMMU_API) += iommu.o 14 15 ifeq ($(CONFIG_SYSFS),y) 15 16 obj-$(CONFIG_MODULES) += module.o 16 17 endif
+100
drivers/base/iommu.c
··· 1 + /* 2 + * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. 3 + * Author: Joerg Roedel <joerg.roedel@amd.com> 4 + * 5 + * This program is free software; you can redistribute it and/or modify it 6 + * under the terms of the GNU General Public License version 2 as published 7 + * by the Free Software Foundation. 8 + * 9 + * This program is distributed in the hope that it will be useful, 10 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + * GNU General Public License for more details. 13 + * 14 + * You should have received a copy of the GNU General Public License 15 + * along with this program; if not, write to the Free Software 16 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 + */ 18 + 19 + #include <linux/bug.h> 20 + #include <linux/types.h> 21 + #include <linux/errno.h> 22 + #include <linux/iommu.h> 23 + 24 + static struct iommu_ops *iommu_ops; 25 + 26 + void register_iommu(struct iommu_ops *ops) 27 + { 28 + if (iommu_ops) 29 + BUG(); 30 + 31 + iommu_ops = ops; 32 + } 33 + 34 + bool iommu_found() 35 + { 36 + return iommu_ops != NULL; 37 + } 38 + EXPORT_SYMBOL_GPL(iommu_found); 39 + 40 + struct iommu_domain *iommu_domain_alloc(void) 41 + { 42 + struct iommu_domain *domain; 43 + int ret; 44 + 45 + domain = kmalloc(sizeof(*domain), GFP_KERNEL); 46 + if (!domain) 47 + return NULL; 48 + 49 + ret = iommu_ops->domain_init(domain); 50 + if (ret) 51 + goto out_free; 52 + 53 + return domain; 54 + 55 + out_free: 56 + kfree(domain); 57 + 58 + return NULL; 59 + } 60 + EXPORT_SYMBOL_GPL(iommu_domain_alloc); 61 + 62 + void iommu_domain_free(struct iommu_domain *domain) 63 + { 64 + iommu_ops->domain_destroy(domain); 65 + kfree(domain); 66 + } 67 + EXPORT_SYMBOL_GPL(iommu_domain_free); 68 + 69 + int iommu_attach_device(struct iommu_domain *domain, struct device *dev) 70 + { 71 + return iommu_ops->attach_dev(domain, dev); 72 + } 73 + EXPORT_SYMBOL_GPL(iommu_attach_device); 74 + 75 + void iommu_detach_device(struct iommu_domain *domain, struct device *dev) 76 + { 77 + iommu_ops->detach_dev(domain, dev); 78 + } 79 + EXPORT_SYMBOL_GPL(iommu_detach_device); 80 + 81 + int iommu_map_range(struct iommu_domain *domain, unsigned long iova, 82 + phys_addr_t paddr, size_t size, int prot) 83 + { 84 + return iommu_ops->map(domain, iova, paddr, size, prot); 85 + } 86 + EXPORT_SYMBOL_GPL(iommu_map_range); 87 + 88 + void iommu_unmap_range(struct iommu_domain *domain, unsigned long iova, 89 + size_t size) 90 + { 91 + iommu_ops->unmap(domain, iova, size); 92 + } 93 + EXPORT_SYMBOL_GPL(iommu_unmap_range); 94 + 95 + phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, 96 + unsigned long iova) 97 + { 98 + return iommu_ops->iova_to_phys(domain, iova); 99 + } 100 + EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
+28 -18
drivers/pci/dmar.c
··· 191 191 static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru) 192 192 { 193 193 struct acpi_dmar_hardware_unit *drhd; 194 - static int include_all; 195 194 int ret = 0; 196 195 197 196 drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr; 198 197 199 - if (!dmaru->include_all) 200 - ret = dmar_parse_dev_scope((void *)(drhd + 1), 198 + if (dmaru->include_all) 199 + return 0; 200 + 201 + ret = dmar_parse_dev_scope((void *)(drhd + 1), 201 202 ((void *)drhd) + drhd->header.length, 202 203 &dmaru->devices_cnt, &dmaru->devices, 203 204 drhd->segment); 204 - else { 205 - /* Only allow one INCLUDE_ALL */ 206 - if (include_all) { 207 - printk(KERN_WARNING PREFIX "Only one INCLUDE_ALL " 208 - "device scope is allowed\n"); 209 - ret = -EINVAL; 210 - } 211 - include_all = 1; 212 - } 213 - 214 205 if (ret) { 215 206 list_del(&dmaru->list); 216 207 kfree(dmaru); ··· 375 384 struct dmar_drhd_unit * 376 385 dmar_find_matched_drhd_unit(struct pci_dev *dev) 377 386 { 378 - struct dmar_drhd_unit *drhd = NULL; 387 + struct dmar_drhd_unit *dmaru = NULL; 388 + struct acpi_dmar_hardware_unit *drhd; 379 389 380 - list_for_each_entry(drhd, &dmar_drhd_units, list) { 381 - if (drhd->include_all || dmar_pci_device_match(drhd->devices, 382 - drhd->devices_cnt, dev)) 383 - return drhd; 390 + list_for_each_entry(dmaru, &dmar_drhd_units, list) { 391 + drhd = container_of(dmaru->hdr, 392 + struct acpi_dmar_hardware_unit, 393 + header); 394 + 395 + if (dmaru->include_all && 396 + drhd->segment == pci_domain_nr(dev->bus)) 397 + return dmaru; 398 + 399 + if (dmar_pci_device_match(dmaru->devices, 400 + dmaru->devices_cnt, dev)) 401 + return dmaru; 384 402 } 385 403 386 404 return NULL; ··· 491 491 int map_size; 492 492 u32 ver; 493 493 static int iommu_allocated = 0; 494 + int agaw; 494 495 495 496 iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); 496 497 if (!iommu) ··· 506 505 } 507 506 iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG); 508 507 iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG); 508 + 509 + agaw = iommu_calculate_agaw(iommu); 510 + if (agaw < 0) { 511 + printk(KERN_ERR 512 + "Cannot get a valid agaw for iommu (seq_id = %d)\n", 513 + iommu->seq_id); 514 + goto error; 515 + } 516 + iommu->agaw = agaw; 509 517 510 518 /* the registers might be more than one page */ 511 519 map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
+828 -136
drivers/pci/intel-iommu.c
··· 27 27 #include <linux/slab.h> 28 28 #include <linux/irq.h> 29 29 #include <linux/interrupt.h> 30 - #include <linux/sysdev.h> 31 30 #include <linux/spinlock.h> 32 31 #include <linux/pci.h> 33 32 #include <linux/dmar.h> ··· 34 35 #include <linux/mempool.h> 35 36 #include <linux/timer.h> 36 37 #include <linux/iova.h> 38 + #include <linux/iommu.h> 37 39 #include <linux/intel-iommu.h> 38 40 #include <asm/cacheflush.h> 39 41 #include <asm/iommu.h> ··· 54 54 55 55 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) 56 56 57 + #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) 58 + #define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK) 59 + #define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK) 60 + 61 + /* global iommu list, set NULL for ignored DMAR units */ 62 + static struct intel_iommu **g_iommus; 63 + 64 + /* 65 + * 0: Present 66 + * 1-11: Reserved 67 + * 12-63: Context Ptr (12 - (haw-1)) 68 + * 64-127: Reserved 69 + */ 70 + struct root_entry { 71 + u64 val; 72 + u64 rsvd1; 73 + }; 74 + #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry)) 75 + static inline bool root_present(struct root_entry *root) 76 + { 77 + return (root->val & 1); 78 + } 79 + static inline void set_root_present(struct root_entry *root) 80 + { 81 + root->val |= 1; 82 + } 83 + static inline void set_root_value(struct root_entry *root, unsigned long value) 84 + { 85 + root->val |= value & VTD_PAGE_MASK; 86 + } 87 + 88 + static inline struct context_entry * 89 + get_context_addr_from_root(struct root_entry *root) 90 + { 91 + return (struct context_entry *) 92 + (root_present(root)?phys_to_virt( 93 + root->val & VTD_PAGE_MASK) : 94 + NULL); 95 + } 96 + 97 + /* 98 + * low 64 bits: 99 + * 0: present 100 + * 1: fault processing disable 101 + * 2-3: translation type 102 + * 12-63: address space root 103 + * high 64 bits: 104 + * 0-2: address width 105 + * 3-6: aval 106 + * 8-23: domain id 107 + */ 108 + struct context_entry { 109 + u64 lo; 110 + u64 hi; 111 + }; 112 + 113 + static inline bool context_present(struct context_entry *context) 114 + { 115 + return (context->lo & 1); 116 + } 117 + static inline void context_set_present(struct context_entry *context) 118 + { 119 + context->lo |= 1; 120 + } 121 + 122 + static inline void context_set_fault_enable(struct context_entry *context) 123 + { 124 + context->lo &= (((u64)-1) << 2) | 1; 125 + } 126 + 127 + #define CONTEXT_TT_MULTI_LEVEL 0 128 + 129 + static inline void context_set_translation_type(struct context_entry *context, 130 + unsigned long value) 131 + { 132 + context->lo &= (((u64)-1) << 4) | 3; 133 + context->lo |= (value & 3) << 2; 134 + } 135 + 136 + static inline void context_set_address_root(struct context_entry *context, 137 + unsigned long value) 138 + { 139 + context->lo |= value & VTD_PAGE_MASK; 140 + } 141 + 142 + static inline void context_set_address_width(struct context_entry *context, 143 + unsigned long value) 144 + { 145 + context->hi |= value & 7; 146 + } 147 + 148 + static inline void context_set_domain_id(struct context_entry *context, 149 + unsigned long value) 150 + { 151 + context->hi |= (value & ((1 << 16) - 1)) << 8; 152 + } 153 + 154 + static inline void context_clear_entry(struct context_entry *context) 155 + { 156 + context->lo = 0; 157 + context->hi = 0; 158 + } 159 + 160 + /* 161 + * 0: readable 162 + * 1: writable 163 + * 2-6: reserved 164 + * 7: super page 165 + * 8-11: available 166 + * 12-63: Host physcial address 167 + */ 168 + struct dma_pte { 169 + u64 val; 170 + }; 171 + 172 + static inline void dma_clear_pte(struct dma_pte *pte) 173 + { 174 + pte->val = 0; 175 + } 176 + 177 + static inline void dma_set_pte_readable(struct dma_pte *pte) 178 + { 179 + pte->val |= DMA_PTE_READ; 180 + } 181 + 182 + static inline void dma_set_pte_writable(struct dma_pte *pte) 183 + { 184 + pte->val |= DMA_PTE_WRITE; 185 + } 186 + 187 + static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot) 188 + { 189 + pte->val = (pte->val & ~3) | (prot & 3); 190 + } 191 + 192 + static inline u64 dma_pte_addr(struct dma_pte *pte) 193 + { 194 + return (pte->val & VTD_PAGE_MASK); 195 + } 196 + 197 + static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr) 198 + { 199 + pte->val |= (addr & VTD_PAGE_MASK); 200 + } 201 + 202 + static inline bool dma_pte_present(struct dma_pte *pte) 203 + { 204 + return (pte->val & 3) != 0; 205 + } 206 + 207 + /* devices under the same p2p bridge are owned in one domain */ 208 + #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0) 209 + 210 + /* domain represents a virtual machine, more than one devices 211 + * across iommus may be owned in one domain, e.g. kvm guest. 212 + */ 213 + #define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1) 214 + 215 + struct dmar_domain { 216 + int id; /* domain id */ 217 + unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/ 218 + 219 + struct list_head devices; /* all devices' list */ 220 + struct iova_domain iovad; /* iova's that belong to this domain */ 221 + 222 + struct dma_pte *pgd; /* virtual address */ 223 + spinlock_t mapping_lock; /* page table lock */ 224 + int gaw; /* max guest address width */ 225 + 226 + /* adjusted guest address width, 0 is level 2 30-bit */ 227 + int agaw; 228 + 229 + int flags; /* flags to find out type of domain */ 230 + 231 + int iommu_coherency;/* indicate coherency of iommu access */ 232 + int iommu_count; /* reference count of iommu */ 233 + spinlock_t iommu_lock; /* protect iommu set in domain */ 234 + u64 max_addr; /* maximum mapped address */ 235 + }; 236 + 237 + /* PCI domain-device relationship */ 238 + struct device_domain_info { 239 + struct list_head link; /* link to domain siblings */ 240 + struct list_head global; /* link to global list */ 241 + u8 bus; /* PCI bus numer */ 242 + u8 devfn; /* PCI devfn number */ 243 + struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */ 244 + struct dmar_domain *domain; /* pointer to domain */ 245 + }; 57 246 58 247 static void flush_unmaps_timeout(unsigned long data); 59 248 ··· 276 87 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) 277 88 static DEFINE_SPINLOCK(device_domain_lock); 278 89 static LIST_HEAD(device_domain_list); 90 + 91 + static struct iommu_ops intel_iommu_ops; 279 92 280 93 static int __init intel_iommu_setup(char *str) 281 94 { ··· 375 184 kmem_cache_free(iommu_iova_cache, iova); 376 185 } 377 186 187 + 188 + static inline int width_to_agaw(int width); 189 + 190 + /* calculate agaw for each iommu. 191 + * "SAGAW" may be different across iommus, use a default agaw, and 192 + * get a supported less agaw for iommus that don't support the default agaw. 193 + */ 194 + int iommu_calculate_agaw(struct intel_iommu *iommu) 195 + { 196 + unsigned long sagaw; 197 + int agaw = -1; 198 + 199 + sagaw = cap_sagaw(iommu->cap); 200 + for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH); 201 + agaw >= 0; agaw--) { 202 + if (test_bit(agaw, &sagaw)) 203 + break; 204 + } 205 + 206 + return agaw; 207 + } 208 + 209 + /* in native case, each domain is related to only one iommu */ 210 + static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) 211 + { 212 + int iommu_id; 213 + 214 + BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE); 215 + 216 + iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus); 217 + if (iommu_id < 0 || iommu_id >= g_num_of_iommus) 218 + return NULL; 219 + 220 + return g_iommus[iommu_id]; 221 + } 222 + 223 + /* "Coherency" capability may be different across iommus */ 224 + static void domain_update_iommu_coherency(struct dmar_domain *domain) 225 + { 226 + int i; 227 + 228 + domain->iommu_coherency = 1; 229 + 230 + i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus); 231 + for (; i < g_num_of_iommus; ) { 232 + if (!ecap_coherent(g_iommus[i]->ecap)) { 233 + domain->iommu_coherency = 0; 234 + break; 235 + } 236 + i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1); 237 + } 238 + } 239 + 240 + static struct intel_iommu *device_to_iommu(u8 bus, u8 devfn) 241 + { 242 + struct dmar_drhd_unit *drhd = NULL; 243 + int i; 244 + 245 + for_each_drhd_unit(drhd) { 246 + if (drhd->ignored) 247 + continue; 248 + 249 + for (i = 0; i < drhd->devices_cnt; i++) 250 + if (drhd->devices[i]->bus->number == bus && 251 + drhd->devices[i]->devfn == devfn) 252 + return drhd->iommu; 253 + 254 + if (drhd->include_all) 255 + return drhd->iommu; 256 + } 257 + 258 + return NULL; 259 + } 260 + 261 + static void domain_flush_cache(struct dmar_domain *domain, 262 + void *addr, int size) 263 + { 264 + if (!domain->iommu_coherency) 265 + clflush_cache_range(addr, size); 266 + } 267 + 378 268 /* Gets context entry for a given bus and devfn */ 379 269 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu, 380 270 u8 bus, u8 devfn) ··· 498 226 ret = 0; 499 227 goto out; 500 228 } 501 - ret = context_present(context[devfn]); 229 + ret = context_present(&context[devfn]); 502 230 out: 503 231 spin_unlock_irqrestore(&iommu->lock, flags); 504 232 return ret; ··· 514 242 root = &iommu->root_entry[bus]; 515 243 context = get_context_addr_from_root(root); 516 244 if (context) { 517 - context_clear_entry(context[devfn]); 245 + context_clear_entry(&context[devfn]); 518 246 __iommu_flush_cache(iommu, &context[devfn], \ 519 247 sizeof(*context)); 520 248 } ··· 611 339 if (level == 1) 612 340 break; 613 341 614 - if (!dma_pte_present(*pte)) { 342 + if (!dma_pte_present(pte)) { 615 343 tmp_page = alloc_pgtable_page(); 616 344 617 345 if (!tmp_page) { ··· 619 347 flags); 620 348 return NULL; 621 349 } 622 - __iommu_flush_cache(domain->iommu, tmp_page, 623 - PAGE_SIZE); 624 - dma_set_pte_addr(*pte, virt_to_phys(tmp_page)); 350 + domain_flush_cache(domain, tmp_page, PAGE_SIZE); 351 + dma_set_pte_addr(pte, virt_to_phys(tmp_page)); 625 352 /* 626 353 * high level table always sets r/w, last level page 627 354 * table control read/write 628 355 */ 629 - dma_set_pte_readable(*pte); 630 - dma_set_pte_writable(*pte); 631 - __iommu_flush_cache(domain->iommu, pte, sizeof(*pte)); 356 + dma_set_pte_readable(pte); 357 + dma_set_pte_writable(pte); 358 + domain_flush_cache(domain, pte, sizeof(*pte)); 632 359 } 633 - parent = phys_to_virt(dma_pte_addr(*pte)); 360 + parent = phys_to_virt(dma_pte_addr(pte)); 634 361 level--; 635 362 } 636 363 ··· 652 381 if (level == total) 653 382 return pte; 654 383 655 - if (!dma_pte_present(*pte)) 384 + if (!dma_pte_present(pte)) 656 385 break; 657 - parent = phys_to_virt(dma_pte_addr(*pte)); 386 + parent = phys_to_virt(dma_pte_addr(pte)); 658 387 total--; 659 388 } 660 389 return NULL; ··· 669 398 pte = dma_addr_level_pte(domain, addr, 1); 670 399 671 400 if (pte) { 672 - dma_clear_pte(*pte); 673 - __iommu_flush_cache(domain->iommu, pte, sizeof(*pte)); 401 + dma_clear_pte(pte); 402 + domain_flush_cache(domain, pte, sizeof(*pte)); 674 403 } 675 404 } 676 405 ··· 716 445 pte = dma_addr_level_pte(domain, tmp, level); 717 446 if (pte) { 718 447 free_pgtable_page( 719 - phys_to_virt(dma_pte_addr(*pte))); 720 - dma_clear_pte(*pte); 721 - __iommu_flush_cache(domain->iommu, 722 - pte, sizeof(*pte)); 448 + phys_to_virt(dma_pte_addr(pte))); 449 + dma_clear_pte(pte); 450 + domain_flush_cache(domain, pte, sizeof(*pte)); 723 451 } 724 452 tmp += level_size(level); 725 453 } ··· 1220 950 1221 951 1222 952 static void domain_exit(struct dmar_domain *domain); 953 + static void vm_domain_exit(struct dmar_domain *domain); 1223 954 1224 955 void free_dmar_iommu(struct intel_iommu *iommu) 1225 956 { 1226 957 struct dmar_domain *domain; 1227 958 int i; 959 + unsigned long flags; 1228 960 1229 961 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap)); 1230 962 for (; i < cap_ndoms(iommu->cap); ) { 1231 963 domain = iommu->domains[i]; 1232 964 clear_bit(i, iommu->domain_ids); 1233 - domain_exit(domain); 965 + 966 + spin_lock_irqsave(&domain->iommu_lock, flags); 967 + if (--domain->iommu_count == 0) { 968 + if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) 969 + vm_domain_exit(domain); 970 + else 971 + domain_exit(domain); 972 + } 973 + spin_unlock_irqrestore(&domain->iommu_lock, flags); 974 + 1234 975 i = find_next_bit(iommu->domain_ids, 1235 976 cap_ndoms(iommu->cap), i+1); 1236 977 } ··· 1258 977 1259 978 kfree(iommu->domains); 1260 979 kfree(iommu->domain_ids); 980 + 981 + g_iommus[iommu->seq_id] = NULL; 982 + 983 + /* if all iommus are freed, free g_iommus */ 984 + for (i = 0; i < g_num_of_iommus; i++) { 985 + if (g_iommus[i]) 986 + break; 987 + } 988 + 989 + if (i == g_num_of_iommus) 990 + kfree(g_iommus); 1261 991 1262 992 /* free context mapping */ 1263 993 free_context_table(iommu); ··· 1298 1006 1299 1007 set_bit(num, iommu->domain_ids); 1300 1008 domain->id = num; 1301 - domain->iommu = iommu; 1009 + memset(&domain->iommu_bmp, 0, sizeof(unsigned long)); 1010 + set_bit(iommu->seq_id, &domain->iommu_bmp); 1011 + domain->flags = 0; 1302 1012 iommu->domains[num] = domain; 1303 1013 spin_unlock_irqrestore(&iommu->lock, flags); 1304 1014 ··· 1310 1016 static void iommu_free_domain(struct dmar_domain *domain) 1311 1017 { 1312 1018 unsigned long flags; 1019 + struct intel_iommu *iommu; 1313 1020 1314 - spin_lock_irqsave(&domain->iommu->lock, flags); 1315 - clear_bit(domain->id, domain->iommu->domain_ids); 1316 - spin_unlock_irqrestore(&domain->iommu->lock, flags); 1021 + iommu = domain_get_iommu(domain); 1022 + 1023 + spin_lock_irqsave(&iommu->lock, flags); 1024 + clear_bit(domain->id, iommu->domain_ids); 1025 + spin_unlock_irqrestore(&iommu->lock, flags); 1317 1026 } 1318 1027 1319 1028 static struct iova_domain reserved_iova_list; ··· 1391 1094 1392 1095 init_iova_domain(&domain->iovad, DMA_32BIT_PFN); 1393 1096 spin_lock_init(&domain->mapping_lock); 1097 + spin_lock_init(&domain->iommu_lock); 1394 1098 1395 1099 domain_reserve_special_ranges(domain); 1396 1100 1397 1101 /* calculate AGAW */ 1398 - iommu = domain->iommu; 1102 + iommu = domain_get_iommu(domain); 1399 1103 if (guest_width > cap_mgaw(iommu->cap)) 1400 1104 guest_width = cap_mgaw(iommu->cap); 1401 1105 domain->gaw = guest_width; ··· 1412 1114 } 1413 1115 domain->agaw = agaw; 1414 1116 INIT_LIST_HEAD(&domain->devices); 1117 + 1118 + if (ecap_coherent(iommu->ecap)) 1119 + domain->iommu_coherency = 1; 1120 + else 1121 + domain->iommu_coherency = 0; 1122 + 1123 + domain->iommu_count = 1; 1415 1124 1416 1125 /* always allocate the top pgd */ 1417 1126 domain->pgd = (struct dma_pte *)alloc_pgtable_page(); ··· 1456 1151 u8 bus, u8 devfn) 1457 1152 { 1458 1153 struct context_entry *context; 1459 - struct intel_iommu *iommu = domain->iommu; 1460 1154 unsigned long flags; 1155 + struct intel_iommu *iommu; 1156 + struct dma_pte *pgd; 1157 + unsigned long num; 1158 + unsigned long ndomains; 1159 + int id; 1160 + int agaw; 1461 1161 1462 1162 pr_debug("Set context mapping for %02x:%02x.%d\n", 1463 1163 bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); 1464 1164 BUG_ON(!domain->pgd); 1165 + 1166 + iommu = device_to_iommu(bus, devfn); 1167 + if (!iommu) 1168 + return -ENODEV; 1169 + 1465 1170 context = device_to_context_entry(iommu, bus, devfn); 1466 1171 if (!context) 1467 1172 return -ENOMEM; 1468 1173 spin_lock_irqsave(&iommu->lock, flags); 1469 - if (context_present(*context)) { 1174 + if (context_present(context)) { 1470 1175 spin_unlock_irqrestore(&iommu->lock, flags); 1471 1176 return 0; 1472 1177 } 1473 1178 1474 - context_set_domain_id(*context, domain->id); 1475 - context_set_address_width(*context, domain->agaw); 1476 - context_set_address_root(*context, virt_to_phys(domain->pgd)); 1477 - context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL); 1478 - context_set_fault_enable(*context); 1479 - context_set_present(*context); 1480 - __iommu_flush_cache(iommu, context, sizeof(*context)); 1179 + id = domain->id; 1180 + pgd = domain->pgd; 1181 + 1182 + if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) { 1183 + int found = 0; 1184 + 1185 + /* find an available domain id for this device in iommu */ 1186 + ndomains = cap_ndoms(iommu->cap); 1187 + num = find_first_bit(iommu->domain_ids, ndomains); 1188 + for (; num < ndomains; ) { 1189 + if (iommu->domains[num] == domain) { 1190 + id = num; 1191 + found = 1; 1192 + break; 1193 + } 1194 + num = find_next_bit(iommu->domain_ids, 1195 + cap_ndoms(iommu->cap), num+1); 1196 + } 1197 + 1198 + if (found == 0) { 1199 + num = find_first_zero_bit(iommu->domain_ids, ndomains); 1200 + if (num >= ndomains) { 1201 + spin_unlock_irqrestore(&iommu->lock, flags); 1202 + printk(KERN_ERR "IOMMU: no free domain ids\n"); 1203 + return -EFAULT; 1204 + } 1205 + 1206 + set_bit(num, iommu->domain_ids); 1207 + iommu->domains[num] = domain; 1208 + id = num; 1209 + } 1210 + 1211 + /* Skip top levels of page tables for 1212 + * iommu which has less agaw than default. 1213 + */ 1214 + for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) { 1215 + pgd = phys_to_virt(dma_pte_addr(pgd)); 1216 + if (!dma_pte_present(pgd)) { 1217 + spin_unlock_irqrestore(&iommu->lock, flags); 1218 + return -ENOMEM; 1219 + } 1220 + } 1221 + } 1222 + 1223 + context_set_domain_id(context, id); 1224 + context_set_address_width(context, iommu->agaw); 1225 + context_set_address_root(context, virt_to_phys(pgd)); 1226 + context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL); 1227 + context_set_fault_enable(context); 1228 + context_set_present(context); 1229 + domain_flush_cache(domain, context, sizeof(*context)); 1481 1230 1482 1231 /* it's a non-present to present mapping */ 1483 1232 if (iommu->flush.flush_context(iommu, domain->id, ··· 1542 1183 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0); 1543 1184 1544 1185 spin_unlock_irqrestore(&iommu->lock, flags); 1186 + 1187 + spin_lock_irqsave(&domain->iommu_lock, flags); 1188 + if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) { 1189 + domain->iommu_count++; 1190 + domain_update_iommu_coherency(domain); 1191 + } 1192 + spin_unlock_irqrestore(&domain->iommu_lock, flags); 1545 1193 return 0; 1546 1194 } 1547 1195 ··· 1584 1218 tmp->bus->number, tmp->devfn); 1585 1219 } 1586 1220 1587 - static int domain_context_mapped(struct dmar_domain *domain, 1588 - struct pci_dev *pdev) 1221 + static int domain_context_mapped(struct pci_dev *pdev) 1589 1222 { 1590 1223 int ret; 1591 1224 struct pci_dev *tmp, *parent; 1225 + struct intel_iommu *iommu; 1592 1226 1593 - ret = device_context_mapped(domain->iommu, 1227 + iommu = device_to_iommu(pdev->bus->number, pdev->devfn); 1228 + if (!iommu) 1229 + return -ENODEV; 1230 + 1231 + ret = device_context_mapped(iommu, 1594 1232 pdev->bus->number, pdev->devfn); 1595 1233 if (!ret) 1596 1234 return ret; ··· 1605 1235 /* Secondary interface's bus number and devfn 0 */ 1606 1236 parent = pdev->bus->self; 1607 1237 while (parent != tmp) { 1608 - ret = device_context_mapped(domain->iommu, parent->bus->number, 1238 + ret = device_context_mapped(iommu, parent->bus->number, 1609 1239 parent->devfn); 1610 1240 if (!ret) 1611 1241 return ret; 1612 1242 parent = parent->bus->self; 1613 1243 } 1614 1244 if (tmp->is_pcie) 1615 - return device_context_mapped(domain->iommu, 1245 + return device_context_mapped(iommu, 1616 1246 tmp->subordinate->number, 0); 1617 1247 else 1618 - return device_context_mapped(domain->iommu, 1248 + return device_context_mapped(iommu, 1619 1249 tmp->bus->number, tmp->devfn); 1620 1250 } 1621 1251 ··· 1643 1273 /* We don't need lock here, nobody else 1644 1274 * touches the iova range 1645 1275 */ 1646 - BUG_ON(dma_pte_addr(*pte)); 1647 - dma_set_pte_addr(*pte, start_pfn << VTD_PAGE_SHIFT); 1648 - dma_set_pte_prot(*pte, prot); 1649 - __iommu_flush_cache(domain->iommu, pte, sizeof(*pte)); 1276 + BUG_ON(dma_pte_addr(pte)); 1277 + dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT); 1278 + dma_set_pte_prot(pte, prot); 1279 + domain_flush_cache(domain, pte, sizeof(*pte)); 1650 1280 start_pfn++; 1651 1281 index++; 1652 1282 } 1653 1283 return 0; 1654 1284 } 1655 1285 1656 - static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn) 1286 + static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn) 1657 1287 { 1658 - clear_context_table(domain->iommu, bus, devfn); 1659 - domain->iommu->flush.flush_context(domain->iommu, 0, 0, 0, 1288 + if (!iommu) 1289 + return; 1290 + 1291 + clear_context_table(iommu, bus, devfn); 1292 + iommu->flush.flush_context(iommu, 0, 0, 0, 1660 1293 DMA_CCMD_GLOBAL_INVL, 0); 1661 - domain->iommu->flush.flush_iotlb(domain->iommu, 0, 0, 0, 1294 + iommu->flush.flush_iotlb(iommu, 0, 0, 0, 1662 1295 DMA_TLB_GLOBAL_FLUSH, 0); 1663 1296 } 1664 1297 ··· 1669 1296 { 1670 1297 struct device_domain_info *info; 1671 1298 unsigned long flags; 1299 + struct intel_iommu *iommu; 1672 1300 1673 1301 spin_lock_irqsave(&device_domain_lock, flags); 1674 1302 while (!list_empty(&domain->devices)) { ··· 1681 1307 info->dev->dev.archdata.iommu = NULL; 1682 1308 spin_unlock_irqrestore(&device_domain_lock, flags); 1683 1309 1684 - detach_domain_for_dev(info->domain, info->bus, info->devfn); 1310 + iommu = device_to_iommu(info->bus, info->devfn); 1311 + iommu_detach_dev(iommu, info->bus, info->devfn); 1685 1312 free_devinfo_mem(info); 1686 1313 1687 1314 spin_lock_irqsave(&device_domain_lock, flags); ··· 1775 1400 info->dev = NULL; 1776 1401 info->domain = domain; 1777 1402 /* This domain is shared by devices under p2p bridge */ 1778 - domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES; 1403 + domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES; 1779 1404 1780 1405 /* pcie-to-pci bridge already has a domain, uses it */ 1781 1406 found = NULL; ··· 1938 1563 printk(KERN_ERR "IOMMU: mapping reserved region failed\n"); 1939 1564 } 1940 1565 } 1566 + #else /* !CONFIG_DMAR_GFX_WA */ 1567 + static inline void iommu_prepare_gfx_mapping(void) 1568 + { 1569 + return; 1570 + } 1941 1571 #endif 1942 1572 1943 1573 #ifdef CONFIG_DMAR_FLOPPY_WA ··· 1970 1590 } 1971 1591 #endif /* !CONFIG_DMAR_FLPY_WA */ 1972 1592 1973 - int __init init_dmars(void) 1593 + static int __init init_dmars(void) 1974 1594 { 1975 1595 struct dmar_drhd_unit *drhd; 1976 1596 struct dmar_rmrr_unit *rmrr; ··· 1993 1613 */ 1994 1614 } 1995 1615 1616 + g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *), 1617 + GFP_KERNEL); 1618 + if (!g_iommus) { 1619 + printk(KERN_ERR "Allocating global iommu array failed\n"); 1620 + ret = -ENOMEM; 1621 + goto error; 1622 + } 1623 + 1996 1624 deferred_flush = kzalloc(g_num_of_iommus * 1997 1625 sizeof(struct deferred_flush_tables), GFP_KERNEL); 1998 1626 if (!deferred_flush) { 1627 + kfree(g_iommus); 1999 1628 ret = -ENOMEM; 2000 1629 goto error; 2001 1630 } ··· 2014 1625 continue; 2015 1626 2016 1627 iommu = drhd->iommu; 1628 + g_iommus[iommu->seq_id] = iommu; 2017 1629 2018 1630 ret = iommu_init_domains(iommu); 2019 1631 if (ret) ··· 2127 1737 iommu = drhd->iommu; 2128 1738 free_iommu(iommu); 2129 1739 } 1740 + kfree(g_iommus); 2130 1741 return ret; 2131 1742 } 2132 1743 ··· 2196 1805 } 2197 1806 2198 1807 /* make sure context mapping is ok */ 2199 - if (unlikely(!domain_context_mapped(domain, pdev))) { 1808 + if (unlikely(!domain_context_mapped(pdev))) { 2200 1809 ret = domain_context_mapping(domain, pdev); 2201 1810 if (ret) { 2202 1811 printk(KERN_ERR ··· 2218 1827 struct iova *iova; 2219 1828 int prot = 0; 2220 1829 int ret; 1830 + struct intel_iommu *iommu; 2221 1831 2222 1832 BUG_ON(dir == DMA_NONE); 2223 1833 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) ··· 2228 1836 if (!domain) 2229 1837 return 0; 2230 1838 1839 + iommu = domain_get_iommu(domain); 2231 1840 size = aligned_size((u64)paddr, size); 2232 1841 2233 1842 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); ··· 2242 1849 * mappings.. 2243 1850 */ 2244 1851 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \ 2245 - !cap_zlr(domain->iommu->cap)) 1852 + !cap_zlr(iommu->cap)) 2246 1853 prot |= DMA_PTE_READ; 2247 1854 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) 2248 1855 prot |= DMA_PTE_WRITE; ··· 2258 1865 goto error; 2259 1866 2260 1867 /* it's a non-present to present mapping */ 2261 - ret = iommu_flush_iotlb_psi(domain->iommu, domain->id, 1868 + ret = iommu_flush_iotlb_psi(iommu, domain->id, 2262 1869 start_paddr, size >> VTD_PAGE_SHIFT, 1); 2263 1870 if (ret) 2264 - iommu_flush_write_buffer(domain->iommu); 1871 + iommu_flush_write_buffer(iommu); 2265 1872 2266 1873 return start_paddr + ((u64)paddr & (~PAGE_MASK)); 2267 1874 ··· 2288 1895 2289 1896 /* just flush them all */ 2290 1897 for (i = 0; i < g_num_of_iommus; i++) { 2291 - if (deferred_flush[i].next) { 2292 - struct intel_iommu *iommu = 2293 - deferred_flush[i].domain[0]->iommu; 1898 + struct intel_iommu *iommu = g_iommus[i]; 1899 + if (!iommu) 1900 + continue; 2294 1901 1902 + if (deferred_flush[i].next) { 2295 1903 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 2296 1904 DMA_TLB_GLOBAL_FLUSH, 0); 2297 1905 for (j = 0; j < deferred_flush[i].next; j++) { ··· 2319 1925 { 2320 1926 unsigned long flags; 2321 1927 int next, iommu_id; 1928 + struct intel_iommu *iommu; 2322 1929 2323 1930 spin_lock_irqsave(&async_umap_flush_lock, flags); 2324 1931 if (list_size == HIGH_WATER_MARK) 2325 1932 flush_unmaps(); 2326 1933 2327 - iommu_id = dom->iommu->seq_id; 1934 + iommu = domain_get_iommu(dom); 1935 + iommu_id = iommu->seq_id; 2328 1936 2329 1937 next = deferred_flush[iommu_id].next; 2330 1938 deferred_flush[iommu_id].domain[next] = dom; ··· 2348 1952 struct dmar_domain *domain; 2349 1953 unsigned long start_addr; 2350 1954 struct iova *iova; 1955 + struct intel_iommu *iommu; 2351 1956 2352 1957 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) 2353 1958 return; 2354 1959 domain = find_domain(pdev); 2355 1960 BUG_ON(!domain); 1961 + 1962 + iommu = domain_get_iommu(domain); 2356 1963 2357 1964 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr)); 2358 1965 if (!iova) ··· 2372 1973 /* free page tables */ 2373 1974 dma_pte_free_pagetable(domain, start_addr, start_addr + size); 2374 1975 if (intel_iommu_strict) { 2375 - if (iommu_flush_iotlb_psi(domain->iommu, 1976 + if (iommu_flush_iotlb_psi(iommu, 2376 1977 domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0)) 2377 - iommu_flush_write_buffer(domain->iommu); 1978 + iommu_flush_write_buffer(iommu); 2378 1979 /* free iova */ 2379 1980 __free_iova(&domain->iovad, iova); 2380 1981 } else { ··· 2435 2036 size_t size = 0; 2436 2037 void *addr; 2437 2038 struct scatterlist *sg; 2039 + struct intel_iommu *iommu; 2438 2040 2439 2041 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) 2440 2042 return; 2441 2043 2442 2044 domain = find_domain(pdev); 2045 + BUG_ON(!domain); 2046 + 2047 + iommu = domain_get_iommu(domain); 2443 2048 2444 2049 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address)); 2445 2050 if (!iova) ··· 2460 2057 /* free page tables */ 2461 2058 dma_pte_free_pagetable(domain, start_addr, start_addr + size); 2462 2059 2463 - if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr, 2060 + if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr, 2464 2061 size >> VTD_PAGE_SHIFT, 0)) 2465 - iommu_flush_write_buffer(domain->iommu); 2062 + iommu_flush_write_buffer(iommu); 2466 2063 2467 2064 /* free iova */ 2468 2065 __free_iova(&domain->iovad, iova); ··· 2496 2093 int ret; 2497 2094 struct scatterlist *sg; 2498 2095 unsigned long start_addr; 2096 + struct intel_iommu *iommu; 2499 2097 2500 2098 BUG_ON(dir == DMA_NONE); 2501 2099 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) ··· 2505 2101 domain = get_valid_domain_for_dev(pdev); 2506 2102 if (!domain) 2507 2103 return 0; 2104 + 2105 + iommu = domain_get_iommu(domain); 2508 2106 2509 2107 for_each_sg(sglist, sg, nelems, i) { 2510 2108 addr = SG_ENT_VIRT_ADDRESS(sg); ··· 2525 2119 * mappings.. 2526 2120 */ 2527 2121 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \ 2528 - !cap_zlr(domain->iommu->cap)) 2122 + !cap_zlr(iommu->cap)) 2529 2123 prot |= DMA_PTE_READ; 2530 2124 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) 2531 2125 prot |= DMA_PTE_WRITE; ··· 2557 2151 } 2558 2152 2559 2153 /* it's a non-present to present mapping */ 2560 - if (iommu_flush_iotlb_psi(domain->iommu, domain->id, 2154 + if (iommu_flush_iotlb_psi(iommu, domain->id, 2561 2155 start_addr, offset >> VTD_PAGE_SHIFT, 1)) 2562 - iommu_flush_write_buffer(domain->iommu); 2156 + iommu_flush_write_buffer(iommu); 2563 2157 return nelems; 2564 2158 } 2565 2159 ··· 2731 2325 init_timer(&unmap_timer); 2732 2326 force_iommu = 1; 2733 2327 dma_ops = &intel_dma_ops; 2328 + 2329 + register_iommu(&intel_iommu_ops); 2330 + 2734 2331 return 0; 2735 2332 } 2736 2333 2737 - void intel_iommu_domain_exit(struct dmar_domain *domain) 2334 + static int vm_domain_add_dev_info(struct dmar_domain *domain, 2335 + struct pci_dev *pdev) 2336 + { 2337 + struct device_domain_info *info; 2338 + unsigned long flags; 2339 + 2340 + info = alloc_devinfo_mem(); 2341 + if (!info) 2342 + return -ENOMEM; 2343 + 2344 + info->bus = pdev->bus->number; 2345 + info->devfn = pdev->devfn; 2346 + info->dev = pdev; 2347 + info->domain = domain; 2348 + 2349 + spin_lock_irqsave(&device_domain_lock, flags); 2350 + list_add(&info->link, &domain->devices); 2351 + list_add(&info->global, &device_domain_list); 2352 + pdev->dev.archdata.iommu = info; 2353 + spin_unlock_irqrestore(&device_domain_lock, flags); 2354 + 2355 + return 0; 2356 + } 2357 + 2358 + static void vm_domain_remove_one_dev_info(struct dmar_domain *domain, 2359 + struct pci_dev *pdev) 2360 + { 2361 + struct device_domain_info *info; 2362 + struct intel_iommu *iommu; 2363 + unsigned long flags; 2364 + int found = 0; 2365 + struct list_head *entry, *tmp; 2366 + 2367 + iommu = device_to_iommu(pdev->bus->number, pdev->devfn); 2368 + if (!iommu) 2369 + return; 2370 + 2371 + spin_lock_irqsave(&device_domain_lock, flags); 2372 + list_for_each_safe(entry, tmp, &domain->devices) { 2373 + info = list_entry(entry, struct device_domain_info, link); 2374 + if (info->bus == pdev->bus->number && 2375 + info->devfn == pdev->devfn) { 2376 + list_del(&info->link); 2377 + list_del(&info->global); 2378 + if (info->dev) 2379 + info->dev->dev.archdata.iommu = NULL; 2380 + spin_unlock_irqrestore(&device_domain_lock, flags); 2381 + 2382 + iommu_detach_dev(iommu, info->bus, info->devfn); 2383 + free_devinfo_mem(info); 2384 + 2385 + spin_lock_irqsave(&device_domain_lock, flags); 2386 + 2387 + if (found) 2388 + break; 2389 + else 2390 + continue; 2391 + } 2392 + 2393 + /* if there is no other devices under the same iommu 2394 + * owned by this domain, clear this iommu in iommu_bmp 2395 + * update iommu count and coherency 2396 + */ 2397 + if (device_to_iommu(info->bus, info->devfn) == iommu) 2398 + found = 1; 2399 + } 2400 + 2401 + if (found == 0) { 2402 + unsigned long tmp_flags; 2403 + spin_lock_irqsave(&domain->iommu_lock, tmp_flags); 2404 + clear_bit(iommu->seq_id, &domain->iommu_bmp); 2405 + domain->iommu_count--; 2406 + domain_update_iommu_coherency(domain); 2407 + spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags); 2408 + } 2409 + 2410 + spin_unlock_irqrestore(&device_domain_lock, flags); 2411 + } 2412 + 2413 + static void vm_domain_remove_all_dev_info(struct dmar_domain *domain) 2414 + { 2415 + struct device_domain_info *info; 2416 + struct intel_iommu *iommu; 2417 + unsigned long flags1, flags2; 2418 + 2419 + spin_lock_irqsave(&device_domain_lock, flags1); 2420 + while (!list_empty(&domain->devices)) { 2421 + info = list_entry(domain->devices.next, 2422 + struct device_domain_info, link); 2423 + list_del(&info->link); 2424 + list_del(&info->global); 2425 + if (info->dev) 2426 + info->dev->dev.archdata.iommu = NULL; 2427 + 2428 + spin_unlock_irqrestore(&device_domain_lock, flags1); 2429 + 2430 + iommu = device_to_iommu(info->bus, info->devfn); 2431 + iommu_detach_dev(iommu, info->bus, info->devfn); 2432 + 2433 + /* clear this iommu in iommu_bmp, update iommu count 2434 + * and coherency 2435 + */ 2436 + spin_lock_irqsave(&domain->iommu_lock, flags2); 2437 + if (test_and_clear_bit(iommu->seq_id, 2438 + &domain->iommu_bmp)) { 2439 + domain->iommu_count--; 2440 + domain_update_iommu_coherency(domain); 2441 + } 2442 + spin_unlock_irqrestore(&domain->iommu_lock, flags2); 2443 + 2444 + free_devinfo_mem(info); 2445 + spin_lock_irqsave(&device_domain_lock, flags1); 2446 + } 2447 + spin_unlock_irqrestore(&device_domain_lock, flags1); 2448 + } 2449 + 2450 + /* domain id for virtual machine, it won't be set in context */ 2451 + static unsigned long vm_domid; 2452 + 2453 + static int vm_domain_min_agaw(struct dmar_domain *domain) 2454 + { 2455 + int i; 2456 + int min_agaw = domain->agaw; 2457 + 2458 + i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus); 2459 + for (; i < g_num_of_iommus; ) { 2460 + if (min_agaw > g_iommus[i]->agaw) 2461 + min_agaw = g_iommus[i]->agaw; 2462 + 2463 + i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1); 2464 + } 2465 + 2466 + return min_agaw; 2467 + } 2468 + 2469 + static struct dmar_domain *iommu_alloc_vm_domain(void) 2470 + { 2471 + struct dmar_domain *domain; 2472 + 2473 + domain = alloc_domain_mem(); 2474 + if (!domain) 2475 + return NULL; 2476 + 2477 + domain->id = vm_domid++; 2478 + memset(&domain->iommu_bmp, 0, sizeof(unsigned long)); 2479 + domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE; 2480 + 2481 + return domain; 2482 + } 2483 + 2484 + static int vm_domain_init(struct dmar_domain *domain, int guest_width) 2485 + { 2486 + int adjust_width; 2487 + 2488 + init_iova_domain(&domain->iovad, DMA_32BIT_PFN); 2489 + spin_lock_init(&domain->mapping_lock); 2490 + spin_lock_init(&domain->iommu_lock); 2491 + 2492 + domain_reserve_special_ranges(domain); 2493 + 2494 + /* calculate AGAW */ 2495 + domain->gaw = guest_width; 2496 + adjust_width = guestwidth_to_adjustwidth(guest_width); 2497 + domain->agaw = width_to_agaw(adjust_width); 2498 + 2499 + INIT_LIST_HEAD(&domain->devices); 2500 + 2501 + domain->iommu_count = 0; 2502 + domain->iommu_coherency = 0; 2503 + domain->max_addr = 0; 2504 + 2505 + /* always allocate the top pgd */ 2506 + domain->pgd = (struct dma_pte *)alloc_pgtable_page(); 2507 + if (!domain->pgd) 2508 + return -ENOMEM; 2509 + domain_flush_cache(domain, domain->pgd, PAGE_SIZE); 2510 + return 0; 2511 + } 2512 + 2513 + static void iommu_free_vm_domain(struct dmar_domain *domain) 2514 + { 2515 + unsigned long flags; 2516 + struct dmar_drhd_unit *drhd; 2517 + struct intel_iommu *iommu; 2518 + unsigned long i; 2519 + unsigned long ndomains; 2520 + 2521 + for_each_drhd_unit(drhd) { 2522 + if (drhd->ignored) 2523 + continue; 2524 + iommu = drhd->iommu; 2525 + 2526 + ndomains = cap_ndoms(iommu->cap); 2527 + i = find_first_bit(iommu->domain_ids, ndomains); 2528 + for (; i < ndomains; ) { 2529 + if (iommu->domains[i] == domain) { 2530 + spin_lock_irqsave(&iommu->lock, flags); 2531 + clear_bit(i, iommu->domain_ids); 2532 + iommu->domains[i] = NULL; 2533 + spin_unlock_irqrestore(&iommu->lock, flags); 2534 + break; 2535 + } 2536 + i = find_next_bit(iommu->domain_ids, ndomains, i+1); 2537 + } 2538 + } 2539 + } 2540 + 2541 + static void vm_domain_exit(struct dmar_domain *domain) 2738 2542 { 2739 2543 u64 end; 2740 2544 ··· 2952 2336 if (!domain) 2953 2337 return; 2954 2338 2339 + vm_domain_remove_all_dev_info(domain); 2340 + /* destroy iovas */ 2341 + put_iova_domain(&domain->iovad); 2955 2342 end = DOMAIN_MAX_ADDR(domain->gaw); 2956 2343 end = end & (~VTD_PAGE_MASK); 2957 2344 ··· 2964 2345 /* free page tables */ 2965 2346 dma_pte_free_pagetable(domain, 0, end); 2966 2347 2967 - iommu_free_domain(domain); 2348 + iommu_free_vm_domain(domain); 2968 2349 free_domain_mem(domain); 2969 2350 } 2970 - EXPORT_SYMBOL_GPL(intel_iommu_domain_exit); 2971 2351 2972 - struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev) 2352 + static int intel_iommu_domain_init(struct iommu_domain *domain) 2973 2353 { 2974 - struct dmar_drhd_unit *drhd; 2975 - struct dmar_domain *domain; 2354 + struct dmar_domain *dmar_domain; 2355 + 2356 + dmar_domain = iommu_alloc_vm_domain(); 2357 + if (!dmar_domain) { 2358 + printk(KERN_ERR 2359 + "intel_iommu_domain_init: dmar_domain == NULL\n"); 2360 + return -ENOMEM; 2361 + } 2362 + if (vm_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { 2363 + printk(KERN_ERR 2364 + "intel_iommu_domain_init() failed\n"); 2365 + vm_domain_exit(dmar_domain); 2366 + return -ENOMEM; 2367 + } 2368 + domain->priv = dmar_domain; 2369 + 2370 + return 0; 2371 + } 2372 + 2373 + static void intel_iommu_domain_destroy(struct iommu_domain *domain) 2374 + { 2375 + struct dmar_domain *dmar_domain = domain->priv; 2376 + 2377 + domain->priv = NULL; 2378 + vm_domain_exit(dmar_domain); 2379 + } 2380 + 2381 + static int intel_iommu_attach_device(struct iommu_domain *domain, 2382 + struct device *dev) 2383 + { 2384 + struct dmar_domain *dmar_domain = domain->priv; 2385 + struct pci_dev *pdev = to_pci_dev(dev); 2976 2386 struct intel_iommu *iommu; 2387 + int addr_width; 2388 + u64 end; 2389 + int ret; 2977 2390 2978 - drhd = dmar_find_matched_drhd_unit(pdev); 2979 - if (!drhd) { 2980 - printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n"); 2981 - return NULL; 2391 + /* normally pdev is not mapped */ 2392 + if (unlikely(domain_context_mapped(pdev))) { 2393 + struct dmar_domain *old_domain; 2394 + 2395 + old_domain = find_domain(pdev); 2396 + if (old_domain) { 2397 + if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) 2398 + vm_domain_remove_one_dev_info(old_domain, pdev); 2399 + else 2400 + domain_remove_dev_info(old_domain); 2401 + } 2982 2402 } 2983 2403 2984 - iommu = drhd->iommu; 2985 - if (!iommu) { 2986 - printk(KERN_ERR 2987 - "intel_iommu_domain_alloc: iommu == NULL\n"); 2988 - return NULL; 2404 + iommu = device_to_iommu(pdev->bus->number, pdev->devfn); 2405 + if (!iommu) 2406 + return -ENODEV; 2407 + 2408 + /* check if this iommu agaw is sufficient for max mapped address */ 2409 + addr_width = agaw_to_width(iommu->agaw); 2410 + end = DOMAIN_MAX_ADDR(addr_width); 2411 + end = end & VTD_PAGE_MASK; 2412 + if (end < dmar_domain->max_addr) { 2413 + printk(KERN_ERR "%s: iommu agaw (%d) is not " 2414 + "sufficient for the mapped address (%llx)\n", 2415 + __func__, iommu->agaw, dmar_domain->max_addr); 2416 + return -EFAULT; 2989 2417 } 2990 - domain = iommu_alloc_domain(iommu); 2991 - if (!domain) { 2992 - printk(KERN_ERR 2993 - "intel_iommu_domain_alloc: domain == NULL\n"); 2994 - return NULL; 2418 + 2419 + ret = domain_context_mapping(dmar_domain, pdev); 2420 + if (ret) 2421 + return ret; 2422 + 2423 + ret = vm_domain_add_dev_info(dmar_domain, pdev); 2424 + return ret; 2425 + } 2426 + 2427 + static void intel_iommu_detach_device(struct iommu_domain *domain, 2428 + struct device *dev) 2429 + { 2430 + struct dmar_domain *dmar_domain = domain->priv; 2431 + struct pci_dev *pdev = to_pci_dev(dev); 2432 + 2433 + vm_domain_remove_one_dev_info(dmar_domain, pdev); 2434 + } 2435 + 2436 + static int intel_iommu_map_range(struct iommu_domain *domain, 2437 + unsigned long iova, phys_addr_t hpa, 2438 + size_t size, int iommu_prot) 2439 + { 2440 + struct dmar_domain *dmar_domain = domain->priv; 2441 + u64 max_addr; 2442 + int addr_width; 2443 + int prot = 0; 2444 + int ret; 2445 + 2446 + if (iommu_prot & IOMMU_READ) 2447 + prot |= DMA_PTE_READ; 2448 + if (iommu_prot & IOMMU_WRITE) 2449 + prot |= DMA_PTE_WRITE; 2450 + 2451 + max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size); 2452 + if (dmar_domain->max_addr < max_addr) { 2453 + int min_agaw; 2454 + u64 end; 2455 + 2456 + /* check if minimum agaw is sufficient for mapped address */ 2457 + min_agaw = vm_domain_min_agaw(dmar_domain); 2458 + addr_width = agaw_to_width(min_agaw); 2459 + end = DOMAIN_MAX_ADDR(addr_width); 2460 + end = end & VTD_PAGE_MASK; 2461 + if (end < max_addr) { 2462 + printk(KERN_ERR "%s: iommu agaw (%d) is not " 2463 + "sufficient for the mapped address (%llx)\n", 2464 + __func__, min_agaw, max_addr); 2465 + return -EFAULT; 2466 + } 2467 + dmar_domain->max_addr = max_addr; 2995 2468 } 2996 - if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { 2997 - printk(KERN_ERR 2998 - "intel_iommu_domain_alloc: domain_init() failed\n"); 2999 - intel_iommu_domain_exit(domain); 3000 - return NULL; 3001 - } 3002 - return domain; 3003 - } 3004 - EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc); 3005 2469 3006 - int intel_iommu_context_mapping( 3007 - struct dmar_domain *domain, struct pci_dev *pdev) 3008 - { 3009 - int rc; 3010 - rc = domain_context_mapping(domain, pdev); 3011 - return rc; 2470 + ret = domain_page_mapping(dmar_domain, iova, hpa, size, prot); 2471 + return ret; 3012 2472 } 3013 - EXPORT_SYMBOL_GPL(intel_iommu_context_mapping); 3014 2473 3015 - int intel_iommu_page_mapping( 3016 - struct dmar_domain *domain, dma_addr_t iova, 3017 - u64 hpa, size_t size, int prot) 2474 + static void intel_iommu_unmap_range(struct iommu_domain *domain, 2475 + unsigned long iova, size_t size) 3018 2476 { 3019 - int rc; 3020 - rc = domain_page_mapping(domain, iova, hpa, size, prot); 3021 - return rc; 2477 + struct dmar_domain *dmar_domain = domain->priv; 2478 + dma_addr_t base; 2479 + 2480 + /* The address might not be aligned */ 2481 + base = iova & VTD_PAGE_MASK; 2482 + size = VTD_PAGE_ALIGN(size); 2483 + dma_pte_clear_range(dmar_domain, base, base + size); 2484 + 2485 + if (dmar_domain->max_addr == base + size) 2486 + dmar_domain->max_addr = base; 3022 2487 } 3023 - EXPORT_SYMBOL_GPL(intel_iommu_page_mapping); 3024 2488 3025 - void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn) 2489 + static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, 2490 + unsigned long iova) 3026 2491 { 3027 - detach_domain_for_dev(domain, bus, devfn); 3028 - } 3029 - EXPORT_SYMBOL_GPL(intel_iommu_detach_dev); 3030 - 3031 - struct dmar_domain * 3032 - intel_iommu_find_domain(struct pci_dev *pdev) 3033 - { 3034 - return find_domain(pdev); 3035 - } 3036 - EXPORT_SYMBOL_GPL(intel_iommu_find_domain); 3037 - 3038 - int intel_iommu_found(void) 3039 - { 3040 - return g_num_of_iommus; 3041 - } 3042 - EXPORT_SYMBOL_GPL(intel_iommu_found); 3043 - 3044 - u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova) 3045 - { 2492 + struct dmar_domain *dmar_domain = domain->priv; 3046 2493 struct dma_pte *pte; 3047 - u64 pfn; 2494 + u64 phys = 0; 3048 2495 3049 - pfn = 0; 3050 - pte = addr_to_dma_pte(domain, iova); 3051 - 2496 + pte = addr_to_dma_pte(dmar_domain, iova); 3052 2497 if (pte) 3053 - pfn = dma_pte_addr(*pte); 2498 + phys = dma_pte_addr(pte); 3054 2499 3055 - return pfn >> VTD_PAGE_SHIFT; 2500 + return phys; 3056 2501 } 3057 - EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn); 2502 + 2503 + static struct iommu_ops intel_iommu_ops = { 2504 + .domain_init = intel_iommu_domain_init, 2505 + .domain_destroy = intel_iommu_domain_destroy, 2506 + .attach_dev = intel_iommu_attach_device, 2507 + .detach_dev = intel_iommu_detach_device, 2508 + .map = intel_iommu_map_range, 2509 + .unmap = intel_iommu_unmap_range, 2510 + .iova_to_phys = intel_iommu_iova_to_phys, 2511 + };
+3 -135
include/linux/dma_remapping.h
··· 9 9 #define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT) 10 10 #define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK) 11 11 12 - #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) 13 - #define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK) 14 - #define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK) 15 - 16 - 17 - /* 18 - * 0: Present 19 - * 1-11: Reserved 20 - * 12-63: Context Ptr (12 - (haw-1)) 21 - * 64-127: Reserved 22 - */ 23 - struct root_entry { 24 - u64 val; 25 - u64 rsvd1; 26 - }; 27 - #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry)) 28 - static inline bool root_present(struct root_entry *root) 29 - { 30 - return (root->val & 1); 31 - } 32 - static inline void set_root_present(struct root_entry *root) 33 - { 34 - root->val |= 1; 35 - } 36 - static inline void set_root_value(struct root_entry *root, unsigned long value) 37 - { 38 - root->val |= value & VTD_PAGE_MASK; 39 - } 40 - 41 - struct context_entry; 42 - static inline struct context_entry * 43 - get_context_addr_from_root(struct root_entry *root) 44 - { 45 - return (struct context_entry *) 46 - (root_present(root)?phys_to_virt( 47 - root->val & VTD_PAGE_MASK) : 48 - NULL); 49 - } 50 - 51 - /* 52 - * low 64 bits: 53 - * 0: present 54 - * 1: fault processing disable 55 - * 2-3: translation type 56 - * 12-63: address space root 57 - * high 64 bits: 58 - * 0-2: address width 59 - * 3-6: aval 60 - * 8-23: domain id 61 - */ 62 - struct context_entry { 63 - u64 lo; 64 - u64 hi; 65 - }; 66 - #define context_present(c) ((c).lo & 1) 67 - #define context_fault_disable(c) (((c).lo >> 1) & 1) 68 - #define context_translation_type(c) (((c).lo >> 2) & 3) 69 - #define context_address_root(c) ((c).lo & VTD_PAGE_MASK) 70 - #define context_address_width(c) ((c).hi & 7) 71 - #define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1)) 72 - 73 - #define context_set_present(c) do {(c).lo |= 1;} while (0) 74 - #define context_set_fault_enable(c) \ 75 - do {(c).lo &= (((u64)-1) << 2) | 1;} while (0) 76 - #define context_set_translation_type(c, val) \ 77 - do { \ 78 - (c).lo &= (((u64)-1) << 4) | 3; \ 79 - (c).lo |= ((val) & 3) << 2; \ 80 - } while (0) 81 - #define CONTEXT_TT_MULTI_LEVEL 0 82 - #define context_set_address_root(c, val) \ 83 - do {(c).lo |= (val) & VTD_PAGE_MASK; } while (0) 84 - #define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0) 85 - #define context_set_domain_id(c, val) \ 86 - do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0) 87 - #define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0) 88 - 89 - /* 90 - * 0: readable 91 - * 1: writable 92 - * 2-6: reserved 93 - * 7: super page 94 - * 8-11: available 95 - * 12-63: Host physcial address 96 - */ 97 - struct dma_pte { 98 - u64 val; 99 - }; 100 - #define dma_clear_pte(p) do {(p).val = 0;} while (0) 101 - 102 12 #define DMA_PTE_READ (1) 103 13 #define DMA_PTE_WRITE (2) 104 14 105 - #define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0) 106 - #define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0) 107 - #define dma_set_pte_prot(p, prot) \ 108 - do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0) 109 - #define dma_pte_addr(p) ((p).val & VTD_PAGE_MASK) 110 - #define dma_set_pte_addr(p, addr) do {\ 111 - (p).val |= ((addr) & VTD_PAGE_MASK); } while (0) 112 - #define dma_pte_present(p) (((p).val & 3) != 0) 113 - 114 15 struct intel_iommu; 16 + struct dmar_domain; 17 + struct root_entry; 115 18 116 - struct dmar_domain { 117 - int id; /* domain id */ 118 - struct intel_iommu *iommu; /* back pointer to owning iommu */ 119 - 120 - struct list_head devices; /* all devices' list */ 121 - struct iova_domain iovad; /* iova's that belong to this domain */ 122 - 123 - struct dma_pte *pgd; /* virtual address */ 124 - spinlock_t mapping_lock; /* page table lock */ 125 - int gaw; /* max guest address width */ 126 - 127 - /* adjusted guest address width, 0 is level 2 30-bit */ 128 - int agaw; 129 - 130 - #define DOMAIN_FLAG_MULTIPLE_DEVICES 1 131 - int flags; 132 - }; 133 - 134 - /* PCI domain-device relationship */ 135 - struct device_domain_info { 136 - struct list_head link; /* link to domain siblings */ 137 - struct list_head global; /* link to global list */ 138 - u8 bus; /* PCI bus numer */ 139 - u8 devfn; /* PCI devfn number */ 140 - struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */ 141 - struct dmar_domain *domain; /* pointer to domain */ 142 - }; 143 - 144 - extern int init_dmars(void); 145 19 extern void free_dmar_iommu(struct intel_iommu *iommu); 20 + extern int iommu_calculate_agaw(struct intel_iommu *iommu); 146 21 147 22 extern int dmar_disabled; 148 - 149 - #ifndef CONFIG_DMAR_GFX_WA 150 - static inline void iommu_prepare_gfx_mapping(void) 151 - { 152 - return; 153 - } 154 - #endif /* !CONFIG_DMAR_GFX_WA */ 155 23 156 24 #endif
-1
include/linux/dmar.h
··· 144 144 list_for_each_entry(rmrr, &dmar_rmrr_units, list) 145 145 /* Intel DMAR initialization functions */ 146 146 extern int intel_iommu_init(void); 147 - extern int dmar_disabled; 148 147 #else 149 148 static inline int intel_iommu_init(void) 150 149 {
+1 -24
include/linux/intel-iommu.h
··· 23 23 #define _INTEL_IOMMU_H_ 24 24 25 25 #include <linux/types.h> 26 - #include <linux/msi.h> 27 - #include <linux/sysdev.h> 28 26 #include <linux/iova.h> 29 27 #include <linux/io.h> 30 28 #include <linux/dma_remapping.h> ··· 287 289 void __iomem *reg; /* Pointer to hardware regs, virtual addr */ 288 290 u64 cap; 289 291 u64 ecap; 290 - int seg; 291 292 u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ 292 293 spinlock_t register_lock; /* protect register handling */ 293 294 int seq_id; /* sequence id of the iommu */ 295 + int agaw; /* agaw of this iommu */ 294 296 295 297 #ifdef CONFIG_DMAR 296 298 unsigned long *domain_ids; /* bitmap of domains */ ··· 300 302 301 303 unsigned int irq; 302 304 unsigned char name[7]; /* Device Name */ 303 - struct msi_msg saved_msg; 304 - struct sys_device sysdev; 305 305 struct iommu_flush flush; 306 306 #endif 307 307 struct q_inval *qi; /* Queued invalidation info */ ··· 329 333 int non_present_entry_flush); 330 334 331 335 extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); 332 - 333 - void intel_iommu_domain_exit(struct dmar_domain *domain); 334 - struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev); 335 - int intel_iommu_context_mapping(struct dmar_domain *domain, 336 - struct pci_dev *pdev); 337 - int intel_iommu_page_mapping(struct dmar_domain *domain, dma_addr_t iova, 338 - u64 hpa, size_t size, int prot); 339 - void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn); 340 - struct dmar_domain *intel_iommu_find_domain(struct pci_dev *pdev); 341 - u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova); 342 - 343 - #ifdef CONFIG_DMAR 344 - int intel_iommu_found(void); 345 - #else /* CONFIG_DMAR */ 346 - static inline int intel_iommu_found(void) 347 - { 348 - return 0; 349 - } 350 - #endif /* CONFIG_DMAR */ 351 336 352 337 extern void *intel_alloc_coherent(struct device *, size_t, dma_addr_t *, gfp_t); 353 338 extern void intel_free_coherent(struct device *, size_t, void *, dma_addr_t);
+112
include/linux/iommu.h
··· 1 + /* 2 + * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. 3 + * Author: Joerg Roedel <joerg.roedel@amd.com> 4 + * 5 + * This program is free software; you can redistribute it and/or modify it 6 + * under the terms of the GNU General Public License version 2 as published 7 + * by the Free Software Foundation. 8 + * 9 + * This program is distributed in the hope that it will be useful, 10 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + * GNU General Public License for more details. 13 + * 14 + * You should have received a copy of the GNU General Public License 15 + * along with this program; if not, write to the Free Software 16 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 + */ 18 + 19 + #ifndef __LINUX_IOMMU_H 20 + #define __LINUX_IOMMU_H 21 + 22 + #define IOMMU_READ (1) 23 + #define IOMMU_WRITE (2) 24 + 25 + struct device; 26 + 27 + struct iommu_domain { 28 + void *priv; 29 + }; 30 + 31 + struct iommu_ops { 32 + int (*domain_init)(struct iommu_domain *domain); 33 + void (*domain_destroy)(struct iommu_domain *domain); 34 + int (*attach_dev)(struct iommu_domain *domain, struct device *dev); 35 + void (*detach_dev)(struct iommu_domain *domain, struct device *dev); 36 + int (*map)(struct iommu_domain *domain, unsigned long iova, 37 + phys_addr_t paddr, size_t size, int prot); 38 + void (*unmap)(struct iommu_domain *domain, unsigned long iova, 39 + size_t size); 40 + phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, 41 + unsigned long iova); 42 + }; 43 + 44 + #ifdef CONFIG_IOMMU_API 45 + 46 + extern void register_iommu(struct iommu_ops *ops); 47 + extern bool iommu_found(void); 48 + extern struct iommu_domain *iommu_domain_alloc(void); 49 + extern void iommu_domain_free(struct iommu_domain *domain); 50 + extern int iommu_attach_device(struct iommu_domain *domain, 51 + struct device *dev); 52 + extern void iommu_detach_device(struct iommu_domain *domain, 53 + struct device *dev); 54 + extern int iommu_map_range(struct iommu_domain *domain, unsigned long iova, 55 + phys_addr_t paddr, size_t size, int prot); 56 + extern void iommu_unmap_range(struct iommu_domain *domain, unsigned long iova, 57 + size_t size); 58 + extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, 59 + unsigned long iova); 60 + 61 + #else /* CONFIG_IOMMU_API */ 62 + 63 + static inline void register_iommu(struct iommu_ops *ops) 64 + { 65 + } 66 + 67 + static inline bool iommu_found(void) 68 + { 69 + return false; 70 + } 71 + 72 + static inline struct iommu_domain *iommu_domain_alloc(void) 73 + { 74 + return NULL; 75 + } 76 + 77 + static inline void iommu_domain_free(struct iommu_domain *domain) 78 + { 79 + } 80 + 81 + static inline int iommu_attach_device(struct iommu_domain *domain, 82 + struct device *dev) 83 + { 84 + return -ENODEV; 85 + } 86 + 87 + static inline void iommu_detach_device(struct iommu_domain *domain, 88 + struct device *dev) 89 + { 90 + } 91 + 92 + static inline int iommu_map_range(struct iommu_domain *domain, 93 + unsigned long iova, phys_addr_t paddr, 94 + size_t size, int prot) 95 + { 96 + return -ENODEV; 97 + } 98 + 99 + static inline void iommu_unmap_range(struct iommu_domain *domain, 100 + unsigned long iova, size_t size) 101 + { 102 + } 103 + 104 + static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, 105 + unsigned long iova) 106 + { 107 + return 0; 108 + } 109 + 110 + #endif /* CONFIG_IOMMU_API */ 111 + 112 + #endif /* __LINUX_IOMMU_H */
+22 -8
include/linux/kvm_host.h
··· 316 316 #define KVM_ASSIGNED_DEV_HOST_MSI (1 << 9) 317 317 unsigned long irq_requested_type; 318 318 int irq_source_id; 319 + int flags; 319 320 struct pci_dev *dev; 320 321 struct kvm *kvm; 321 322 }; ··· 328 327 int kvm_request_irq_source_id(struct kvm *kvm); 329 328 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); 330 329 331 - #ifdef CONFIG_DMAR 330 + #ifdef CONFIG_IOMMU_API 332 331 int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, 333 332 unsigned long npages); 334 - int kvm_iommu_map_guest(struct kvm *kvm, 335 - struct kvm_assigned_dev_kernel *assigned_dev); 333 + int kvm_iommu_map_guest(struct kvm *kvm); 336 334 int kvm_iommu_unmap_guest(struct kvm *kvm); 337 - #else /* CONFIG_DMAR */ 335 + int kvm_assign_device(struct kvm *kvm, 336 + struct kvm_assigned_dev_kernel *assigned_dev); 337 + int kvm_deassign_device(struct kvm *kvm, 338 + struct kvm_assigned_dev_kernel *assigned_dev); 339 + #else /* CONFIG_IOMMU_API */ 338 340 static inline int kvm_iommu_map_pages(struct kvm *kvm, 339 341 gfn_t base_gfn, 340 342 unsigned long npages) ··· 345 341 return 0; 346 342 } 347 343 348 - static inline int kvm_iommu_map_guest(struct kvm *kvm, 349 - struct kvm_assigned_dev_kernel 350 - *assigned_dev) 344 + static inline int kvm_iommu_map_guest(struct kvm *kvm) 351 345 { 352 346 return -ENODEV; 353 347 } ··· 354 352 { 355 353 return 0; 356 354 } 357 - #endif /* CONFIG_DMAR */ 355 + 356 + static inline int kvm_assign_device(struct kvm *kvm, 357 + struct kvm_assigned_dev_kernel *assigned_dev) 358 + { 359 + return 0; 360 + } 361 + 362 + static inline int kvm_deassign_device(struct kvm *kvm, 363 + struct kvm_assigned_dev_kernel *assigned_dev) 364 + { 365 + return 0; 366 + } 367 + #endif /* CONFIG_IOMMU_API */ 358 368 359 369 static inline void kvm_guest_enter(void) 360 370 {
+49 -1
virt/kvm/kvm_main.c
··· 496 496 match->assigned_dev_id = assigned_dev->assigned_dev_id; 497 497 match->host_busnr = assigned_dev->busnr; 498 498 match->host_devfn = assigned_dev->devfn; 499 + match->flags = assigned_dev->flags; 499 500 match->dev = dev; 500 501 match->irq_source_id = -1; 501 502 match->kvm = kvm; ··· 504 503 list_add(&match->list, &kvm->arch.assigned_dev_head); 505 504 506 505 if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { 507 - r = kvm_iommu_map_guest(kvm, match); 506 + if (!kvm->arch.iommu_domain) { 507 + r = kvm_iommu_map_guest(kvm); 508 + if (r) 509 + goto out_list_del; 510 + } 511 + r = kvm_assign_device(kvm, match); 508 512 if (r) 509 513 goto out_list_del; 510 514 } ··· 526 520 pci_dev_put(dev); 527 521 out_free: 528 522 kfree(match); 523 + mutex_unlock(&kvm->lock); 524 + return r; 525 + } 526 + #endif 527 + 528 + #ifdef KVM_CAP_DEVICE_DEASSIGNMENT 529 + static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, 530 + struct kvm_assigned_pci_dev *assigned_dev) 531 + { 532 + int r = 0; 533 + struct kvm_assigned_dev_kernel *match; 534 + 535 + mutex_lock(&kvm->lock); 536 + 537 + match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 538 + assigned_dev->assigned_dev_id); 539 + if (!match) { 540 + printk(KERN_INFO "%s: device hasn't been assigned before, " 541 + "so cannot be deassigned\n", __func__); 542 + r = -EINVAL; 543 + goto out; 544 + } 545 + 546 + if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) 547 + kvm_deassign_device(kvm, match); 548 + 549 + kvm_free_assigned_device(kvm, match); 550 + 551 + out: 529 552 mutex_unlock(&kvm->lock); 530 553 return r; 531 554 } ··· 1888 1853 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) 1889 1854 goto out; 1890 1855 r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); 1856 + if (r) 1857 + goto out; 1858 + break; 1859 + } 1860 + #endif 1861 + #ifdef KVM_CAP_DEVICE_DEASSIGNMENT 1862 + case KVM_DEASSIGN_PCI_DEVICE: { 1863 + struct kvm_assigned_pci_dev assigned_dev; 1864 + 1865 + r = -EFAULT; 1866 + if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) 1867 + goto out; 1868 + r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev); 1891 1869 if (r) 1892 1870 goto out; 1893 1871 break;
+82 -59
virt/kvm/vtd.c virt/kvm/iommu.c
··· 25 25 #include <linux/kvm_host.h> 26 26 #include <linux/pci.h> 27 27 #include <linux/dmar.h> 28 + #include <linux/iommu.h> 28 29 #include <linux/intel-iommu.h> 29 30 30 31 static int kvm_iommu_unmap_memslots(struct kvm *kvm); ··· 38 37 gfn_t gfn = base_gfn; 39 38 pfn_t pfn; 40 39 int i, r = 0; 41 - struct dmar_domain *domain = kvm->arch.intel_iommu_domain; 40 + struct iommu_domain *domain = kvm->arch.iommu_domain; 42 41 43 42 /* check if iommu exists and in use */ 44 43 if (!domain) ··· 46 45 47 46 for (i = 0; i < npages; i++) { 48 47 /* check if already mapped */ 49 - pfn = (pfn_t)intel_iommu_iova_to_pfn(domain, 50 - gfn_to_gpa(gfn)); 51 - if (pfn) 48 + if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) 52 49 continue; 53 50 54 51 pfn = gfn_to_pfn(kvm, gfn); 55 - r = intel_iommu_page_mapping(domain, 56 - gfn_to_gpa(gfn), 57 - pfn_to_hpa(pfn), 58 - PAGE_SIZE, 59 - DMA_PTE_READ | 60 - DMA_PTE_WRITE); 52 + r = iommu_map_range(domain, 53 + gfn_to_gpa(gfn), 54 + pfn_to_hpa(pfn), 55 + PAGE_SIZE, 56 + IOMMU_READ | IOMMU_WRITE); 61 57 if (r) { 62 - printk(KERN_ERR "kvm_iommu_map_pages:" 58 + printk(KERN_ERR "kvm_iommu_map_address:" 63 59 "iommu failed to map pfn=%lx\n", pfn); 64 60 goto unmap_pages; 65 61 } ··· 71 73 72 74 static int kvm_iommu_map_memslots(struct kvm *kvm) 73 75 { 74 - int i, r; 76 + int i, r = 0; 75 77 76 78 down_read(&kvm->slots_lock); 77 79 for (i = 0; i < kvm->nmemslots; i++) { ··· 84 86 return r; 85 87 } 86 88 87 - int kvm_iommu_map_guest(struct kvm *kvm, 88 - struct kvm_assigned_dev_kernel *assigned_dev) 89 + int kvm_assign_device(struct kvm *kvm, 90 + struct kvm_assigned_dev_kernel *assigned_dev) 89 91 { 90 92 struct pci_dev *pdev = NULL; 93 + struct iommu_domain *domain = kvm->arch.iommu_domain; 91 94 int r; 92 95 93 - if (!intel_iommu_found()) { 94 - printk(KERN_ERR "%s: intel iommu not found\n", __func__); 95 - return -ENODEV; 96 - } 97 - 98 - printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n", 99 - assigned_dev->host_busnr, 100 - PCI_SLOT(assigned_dev->host_devfn), 101 - PCI_FUNC(assigned_dev->host_devfn)); 96 + /* check if iommu exists and in use */ 97 + if (!domain) 98 + return 0; 102 99 103 100 pdev = assigned_dev->dev; 101 + if (pdev == NULL) 102 + return -ENODEV; 104 103 105 - if (pdev == NULL) { 106 - if (kvm->arch.intel_iommu_domain) { 107 - intel_iommu_domain_exit(kvm->arch.intel_iommu_domain); 108 - kvm->arch.intel_iommu_domain = NULL; 109 - } 104 + r = iommu_attach_device(domain, &pdev->dev); 105 + if (r) { 106 + printk(KERN_ERR "assign device %x:%x.%x failed", 107 + pdev->bus->number, 108 + PCI_SLOT(pdev->devfn), 109 + PCI_FUNC(pdev->devfn)); 110 + return r; 111 + } 112 + 113 + printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n", 114 + assigned_dev->host_busnr, 115 + PCI_SLOT(assigned_dev->host_devfn), 116 + PCI_FUNC(assigned_dev->host_devfn)); 117 + 118 + return 0; 119 + } 120 + 121 + int kvm_deassign_device(struct kvm *kvm, 122 + struct kvm_assigned_dev_kernel *assigned_dev) 123 + { 124 + struct iommu_domain *domain = kvm->arch.iommu_domain; 125 + struct pci_dev *pdev = NULL; 126 + 127 + /* check if iommu exists and in use */ 128 + if (!domain) 129 + return 0; 130 + 131 + pdev = assigned_dev->dev; 132 + if (pdev == NULL) 133 + return -ENODEV; 134 + 135 + iommu_detach_device(domain, &pdev->dev); 136 + 137 + printk(KERN_DEBUG "deassign device: host bdf = %x:%x:%x\n", 138 + assigned_dev->host_busnr, 139 + PCI_SLOT(assigned_dev->host_devfn), 140 + PCI_FUNC(assigned_dev->host_devfn)); 141 + 142 + return 0; 143 + } 144 + 145 + int kvm_iommu_map_guest(struct kvm *kvm) 146 + { 147 + int r; 148 + 149 + if (!iommu_found()) { 150 + printk(KERN_ERR "%s: iommu not found\n", __func__); 110 151 return -ENODEV; 111 152 } 112 153 113 - kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev); 114 - if (!kvm->arch.intel_iommu_domain) 115 - return -ENODEV; 154 + kvm->arch.iommu_domain = iommu_domain_alloc(); 155 + if (!kvm->arch.iommu_domain) 156 + return -ENOMEM; 116 157 117 158 r = kvm_iommu_map_memslots(kvm); 118 159 if (r) 119 160 goto out_unmap; 120 161 121 - intel_iommu_detach_dev(kvm->arch.intel_iommu_domain, 122 - pdev->bus->number, pdev->devfn); 123 - 124 - r = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain, 125 - pdev); 126 - if (r) { 127 - printk(KERN_ERR "Domain context map for %s failed", 128 - pci_name(pdev)); 129 - goto out_unmap; 130 - } 131 162 return 0; 132 163 133 164 out_unmap: ··· 165 138 } 166 139 167 140 static void kvm_iommu_put_pages(struct kvm *kvm, 168 - gfn_t base_gfn, unsigned long npages) 141 + gfn_t base_gfn, unsigned long npages) 169 142 { 170 143 gfn_t gfn = base_gfn; 171 144 pfn_t pfn; 172 - struct dmar_domain *domain = kvm->arch.intel_iommu_domain; 173 - int i; 145 + struct iommu_domain *domain = kvm->arch.iommu_domain; 146 + unsigned long i; 147 + u64 phys; 148 + 149 + /* check if iommu exists and in use */ 150 + if (!domain) 151 + return; 174 152 175 153 for (i = 0; i < npages; i++) { 176 - pfn = (pfn_t)intel_iommu_iova_to_pfn(domain, 177 - gfn_to_gpa(gfn)); 154 + phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn)); 155 + pfn = phys >> PAGE_SHIFT; 178 156 kvm_release_pfn_clean(pfn); 179 157 gfn++; 180 158 } 159 + 160 + iommu_unmap_range(domain, gfn_to_gpa(base_gfn), PAGE_SIZE * npages); 181 161 } 182 162 183 163 static int kvm_iommu_unmap_memslots(struct kvm *kvm) ··· 202 168 203 169 int kvm_iommu_unmap_guest(struct kvm *kvm) 204 170 { 205 - struct kvm_assigned_dev_kernel *entry; 206 - struct dmar_domain *domain = kvm->arch.intel_iommu_domain; 171 + struct iommu_domain *domain = kvm->arch.iommu_domain; 207 172 208 173 /* check if iommu exists and in use */ 209 174 if (!domain) 210 175 return 0; 211 176 212 - list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) { 213 - printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n", 214 - entry->host_busnr, 215 - PCI_SLOT(entry->host_devfn), 216 - PCI_FUNC(entry->host_devfn)); 217 - 218 - /* detach kvm dmar domain */ 219 - intel_iommu_detach_dev(domain, entry->host_busnr, 220 - entry->host_devfn); 221 - } 222 177 kvm_iommu_unmap_memslots(kvm); 223 - intel_iommu_domain_exit(domain); 178 + iommu_domain_free(domain); 224 179 return 0; 225 180 }