Merge branches 'iommu/api' and 'iommu/amd' into for-linus

+677 -88
+10
arch/x86/Kconfig
··· 586 586 your BIOS for an option to enable it or if you have an IVRS ACPI 587 587 table. 588 588 589 + config AMD_IOMMU_STATS 590 + bool "Export AMD IOMMU statistics to debugfs" 591 + depends on AMD_IOMMU 592 + select DEBUG_FS 593 + help 594 + This option enables code in the AMD IOMMU driver to collect various 595 + statistics about whats happening in the driver and exports that 596 + information to userspace via debugfs. 597 + If unsure, say N. 598 + 589 599 # need this always selected by IOMMU for the VIA workaround 590 600 config SWIOTLB 591 601 def_bool y if X86_64
+42 -19
arch/x86/include/asm/amd_iommu_types.h
··· 190 190 /* FIXME: move this macro to <linux/pci.h> */ 191 191 #define PCI_BUS(x) (((x) >> 8) & 0xff) 192 192 193 + /* Protection domain flags */ 194 + #define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */ 195 + #define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops 196 + domain for an IOMMU */ 197 + 193 198 /* 194 199 * This structure contains generic data for IOMMU protection domains 195 200 * independent of their use. 196 201 */ 197 202 struct protection_domain { 198 - spinlock_t lock; /* mostly used to lock the page table*/ 199 - u16 id; /* the domain id written to the device table */ 200 - int mode; /* paging mode (0-6 levels) */ 201 - u64 *pt_root; /* page table root pointer */ 202 - void *priv; /* private data */ 203 + spinlock_t lock; /* mostly used to lock the page table*/ 204 + u16 id; /* the domain id written to the device table */ 205 + int mode; /* paging mode (0-6 levels) */ 206 + u64 *pt_root; /* page table root pointer */ 207 + unsigned long flags; /* flags to find out type of domain */ 208 + unsigned dev_cnt; /* devices assigned to this domain */ 209 + void *priv; /* private data */ 203 210 }; 204 211 205 212 /* ··· 302 295 bool int_enabled; 303 296 304 297 /* if one, we need to send a completion wait command */ 305 - int need_sync; 298 + bool need_sync; 306 299 307 300 /* default dma_ops domain for that IOMMU */ 308 301 struct dma_ops_domain *default_dom; ··· 381 374 extern unsigned long *amd_iommu_pd_alloc_bitmap; 382 375 383 376 /* will be 1 if device isolation is enabled */ 384 - extern int amd_iommu_isolate; 377 + extern bool amd_iommu_isolate; 385 378 386 379 /* 387 380 * If true, the addresses will be flushed on unmap time, not when ··· 389 382 */ 390 383 extern bool amd_iommu_unmap_flush; 391 384 392 - /* takes a PCI device id and prints it out in a readable form */ 393 - static inline void print_devid(u16 devid, int nl) 394 - { 395 - int bus = devid >> 8; 396 - int dev = devid >> 3 & 0x1f; 397 - int fn = devid & 0x07; 398 - 399 - printk("%02x:%02x.%x", bus, dev, fn); 400 - if (nl) 401 - printk("\n"); 402 - } 403 - 404 385 /* takes bus and device/function and returns the device id 405 386 * FIXME: should that be in generic PCI code? */ 406 387 static inline u16 calc_devid(u8 bus, u8 devfn) 407 388 { 408 389 return (((u16)bus) << 8) | devfn; 409 390 } 391 + 392 + #ifdef CONFIG_AMD_IOMMU_STATS 393 + 394 + struct __iommu_counter { 395 + char *name; 396 + struct dentry *dent; 397 + u64 value; 398 + }; 399 + 400 + #define DECLARE_STATS_COUNTER(nm) \ 401 + static struct __iommu_counter nm = { \ 402 + .name = #nm, \ 403 + } 404 + 405 + #define INC_STATS_COUNTER(name) name.value += 1 406 + #define ADD_STATS_COUNTER(name, x) name.value += (x) 407 + #define SUB_STATS_COUNTER(name, x) name.value -= (x) 408 + 409 + #else /* CONFIG_AMD_IOMMU_STATS */ 410 + 411 + #define DECLARE_STATS_COUNTER(name) 412 + #define INC_STATS_COUNTER(name) 413 + #define ADD_STATS_COUNTER(name, x) 414 + #define SUB_STATS_COUNTER(name, x) 415 + 416 + static inline void amd_iommu_stats_init(void) { } 417 + 418 + #endif /* CONFIG_AMD_IOMMU_STATS */ 410 419 411 420 #endif /* _ASM_X86_AMD_IOMMU_TYPES_H */
+619 -60
arch/x86/kernel/amd_iommu.c
··· 20 20 #include <linux/pci.h> 21 21 #include <linux/gfp.h> 22 22 #include <linux/bitops.h> 23 + #include <linux/debugfs.h> 23 24 #include <linux/scatterlist.h> 24 25 #include <linux/iommu-helper.h> 26 + #ifdef CONFIG_IOMMU_API 27 + #include <linux/iommu.h> 28 + #endif 25 29 #include <asm/proto.h> 26 30 #include <asm/iommu.h> 27 31 #include <asm/gart.h> ··· 42 38 static LIST_HEAD(iommu_pd_list); 43 39 static DEFINE_SPINLOCK(iommu_pd_list_lock); 44 40 41 + #ifdef CONFIG_IOMMU_API 42 + static struct iommu_ops amd_iommu_ops; 43 + #endif 44 + 45 45 /* 46 46 * general struct to manage commands send to an IOMMU 47 47 */ ··· 55 47 56 48 static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, 57 49 struct unity_map_entry *e); 50 + static struct dma_ops_domain *find_protection_domain(u16 devid); 51 + 52 + 53 + #ifdef CONFIG_AMD_IOMMU_STATS 54 + 55 + /* 56 + * Initialization code for statistics collection 57 + */ 58 + 59 + DECLARE_STATS_COUNTER(compl_wait); 60 + DECLARE_STATS_COUNTER(cnt_map_single); 61 + DECLARE_STATS_COUNTER(cnt_unmap_single); 62 + DECLARE_STATS_COUNTER(cnt_map_sg); 63 + DECLARE_STATS_COUNTER(cnt_unmap_sg); 64 + DECLARE_STATS_COUNTER(cnt_alloc_coherent); 65 + DECLARE_STATS_COUNTER(cnt_free_coherent); 66 + DECLARE_STATS_COUNTER(cross_page); 67 + DECLARE_STATS_COUNTER(domain_flush_single); 68 + DECLARE_STATS_COUNTER(domain_flush_all); 69 + DECLARE_STATS_COUNTER(alloced_io_mem); 70 + DECLARE_STATS_COUNTER(total_map_requests); 71 + 72 + static struct dentry *stats_dir; 73 + static struct dentry *de_isolate; 74 + static struct dentry *de_fflush; 75 + 76 + static void amd_iommu_stats_add(struct __iommu_counter *cnt) 77 + { 78 + if (stats_dir == NULL) 79 + return; 80 + 81 + cnt->dent = debugfs_create_u64(cnt->name, 0444, stats_dir, 82 + &cnt->value); 83 + } 84 + 85 + static void amd_iommu_stats_init(void) 86 + { 87 + stats_dir = debugfs_create_dir("amd-iommu", NULL); 88 + if (stats_dir == NULL) 89 + return; 90 + 91 + de_isolate = debugfs_create_bool("isolation", 0444, stats_dir, 92 + (u32 *)&amd_iommu_isolate); 93 + 94 + de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir, 95 + (u32 *)&amd_iommu_unmap_flush); 96 + 97 + amd_iommu_stats_add(&compl_wait); 98 + amd_iommu_stats_add(&cnt_map_single); 99 + amd_iommu_stats_add(&cnt_unmap_single); 100 + amd_iommu_stats_add(&cnt_map_sg); 101 + amd_iommu_stats_add(&cnt_unmap_sg); 102 + amd_iommu_stats_add(&cnt_alloc_coherent); 103 + amd_iommu_stats_add(&cnt_free_coherent); 104 + amd_iommu_stats_add(&cross_page); 105 + amd_iommu_stats_add(&domain_flush_single); 106 + amd_iommu_stats_add(&domain_flush_all); 107 + amd_iommu_stats_add(&alloced_io_mem); 108 + amd_iommu_stats_add(&total_map_requests); 109 + } 110 + 111 + #endif 58 112 59 113 /* returns !0 if the IOMMU is caching non-present entries in its TLB */ 60 114 static int iommu_has_npcache(struct amd_iommu *iommu) ··· 259 189 spin_lock_irqsave(&iommu->lock, flags); 260 190 ret = __iommu_queue_command(iommu, cmd); 261 191 if (!ret) 262 - iommu->need_sync = 1; 192 + iommu->need_sync = true; 263 193 spin_unlock_irqrestore(&iommu->lock, flags); 264 194 265 195 return ret; 266 196 } 267 197 268 198 /* 269 - * This function is called whenever we need to ensure that the IOMMU has 270 - * completed execution of all commands we sent. It sends a 271 - * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs 272 - * us about that by writing a value to a physical address we pass with 273 - * the command. 199 + * This function waits until an IOMMU has completed a completion 200 + * wait command 274 201 */ 275 - static int iommu_completion_wait(struct amd_iommu *iommu) 202 + static void __iommu_wait_for_completion(struct amd_iommu *iommu) 276 203 { 277 - int ret = 0, ready = 0; 204 + int ready = 0; 278 205 unsigned status = 0; 279 - struct iommu_cmd cmd; 280 - unsigned long flags, i = 0; 206 + unsigned long i = 0; 281 207 282 - memset(&cmd, 0, sizeof(cmd)); 283 - cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; 284 - CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); 285 - 286 - spin_lock_irqsave(&iommu->lock, flags); 287 - 288 - if (!iommu->need_sync) 289 - goto out; 290 - 291 - iommu->need_sync = 0; 292 - 293 - ret = __iommu_queue_command(iommu, &cmd); 294 - 295 - if (ret) 296 - goto out; 208 + INC_STATS_COUNTER(compl_wait); 297 209 298 210 while (!ready && (i < EXIT_LOOP_COUNT)) { 299 211 ++i; ··· 290 238 291 239 if (unlikely(i == EXIT_LOOP_COUNT)) 292 240 panic("AMD IOMMU: Completion wait loop failed\n"); 241 + } 242 + 243 + /* 244 + * This function queues a completion wait command into the command 245 + * buffer of an IOMMU 246 + */ 247 + static int __iommu_completion_wait(struct amd_iommu *iommu) 248 + { 249 + struct iommu_cmd cmd; 250 + 251 + memset(&cmd, 0, sizeof(cmd)); 252 + cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; 253 + CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); 254 + 255 + return __iommu_queue_command(iommu, &cmd); 256 + } 257 + 258 + /* 259 + * This function is called whenever we need to ensure that the IOMMU has 260 + * completed execution of all commands we sent. It sends a 261 + * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs 262 + * us about that by writing a value to a physical address we pass with 263 + * the command. 264 + */ 265 + static int iommu_completion_wait(struct amd_iommu *iommu) 266 + { 267 + int ret = 0; 268 + unsigned long flags; 269 + 270 + spin_lock_irqsave(&iommu->lock, flags); 271 + 272 + if (!iommu->need_sync) 273 + goto out; 274 + 275 + ret = __iommu_completion_wait(iommu); 276 + 277 + iommu->need_sync = false; 278 + 279 + if (ret) 280 + goto out; 281 + 282 + __iommu_wait_for_completion(iommu); 293 283 294 284 out: 295 285 spin_unlock_irqrestore(&iommu->lock, flags); ··· 358 264 return ret; 359 265 } 360 266 267 + static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, 268 + u16 domid, int pde, int s) 269 + { 270 + memset(cmd, 0, sizeof(*cmd)); 271 + address &= PAGE_MASK; 272 + CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES); 273 + cmd->data[1] |= domid; 274 + cmd->data[2] = lower_32_bits(address); 275 + cmd->data[3] = upper_32_bits(address); 276 + if (s) /* size bit - we flush more than one 4kb page */ 277 + cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; 278 + if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ 279 + cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; 280 + } 281 + 361 282 /* 362 283 * Generic command send function for invalidaing TLB entries 363 284 */ ··· 382 273 struct iommu_cmd cmd; 383 274 int ret; 384 275 385 - memset(&cmd, 0, sizeof(cmd)); 386 - address &= PAGE_MASK; 387 - CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES); 388 - cmd.data[1] |= domid; 389 - cmd.data[2] = lower_32_bits(address); 390 - cmd.data[3] = upper_32_bits(address); 391 - if (s) /* size bit - we flush more than one 4kb page */ 392 - cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; 393 - if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ 394 - cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; 276 + __iommu_build_inv_iommu_pages(&cmd, address, domid, pde, s); 395 277 396 278 ret = iommu_queue_command(iommu, &cmd); 397 279 ··· 421 321 { 422 322 u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; 423 323 324 + INC_STATS_COUNTER(domain_flush_single); 325 + 424 326 iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); 425 327 } 328 + 329 + #ifdef CONFIG_IOMMU_API 330 + /* 331 + * This function is used to flush the IO/TLB for a given protection domain 332 + * on every IOMMU in the system 333 + */ 334 + static void iommu_flush_domain(u16 domid) 335 + { 336 + unsigned long flags; 337 + struct amd_iommu *iommu; 338 + struct iommu_cmd cmd; 339 + 340 + INC_STATS_COUNTER(domain_flush_all); 341 + 342 + __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 343 + domid, 1, 1); 344 + 345 + list_for_each_entry(iommu, &amd_iommu_list, list) { 346 + spin_lock_irqsave(&iommu->lock, flags); 347 + __iommu_queue_command(iommu, &cmd); 348 + __iommu_completion_wait(iommu); 349 + __iommu_wait_for_completion(iommu); 350 + spin_unlock_irqrestore(&iommu->lock, flags); 351 + } 352 + } 353 + #endif 426 354 427 355 /**************************************************************************** 428 356 * ··· 466 338 * supporting all features of AMD IOMMU page tables like level skipping 467 339 * and full 64 bit address spaces. 468 340 */ 469 - static int iommu_map(struct protection_domain *dom, 470 - unsigned long bus_addr, 471 - unsigned long phys_addr, 472 - int prot) 341 + static int iommu_map_page(struct protection_domain *dom, 342 + unsigned long bus_addr, 343 + unsigned long phys_addr, 344 + int prot) 473 345 { 474 346 u64 __pte, *pte, *page; 475 347 ··· 515 387 516 388 return 0; 517 389 } 390 + 391 + #ifdef CONFIG_IOMMU_API 392 + static void iommu_unmap_page(struct protection_domain *dom, 393 + unsigned long bus_addr) 394 + { 395 + u64 *pte; 396 + 397 + pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)]; 398 + 399 + if (!IOMMU_PTE_PRESENT(*pte)) 400 + return; 401 + 402 + pte = IOMMU_PTE_PAGE(*pte); 403 + pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)]; 404 + 405 + if (!IOMMU_PTE_PRESENT(*pte)) 406 + return; 407 + 408 + pte = IOMMU_PTE_PAGE(*pte); 409 + pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)]; 410 + 411 + *pte = 0; 412 + } 413 + #endif 518 414 519 415 /* 520 416 * This function checks if a specific unity mapping entry is needed for ··· 592 440 593 441 for (addr = e->address_start; addr < e->address_end; 594 442 addr += PAGE_SIZE) { 595 - ret = iommu_map(&dma_dom->domain, addr, addr, e->prot); 443 + ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot); 596 444 if (ret) 597 445 return ret; 598 446 /* ··· 723 571 return id; 724 572 } 725 573 574 + #ifdef CONFIG_IOMMU_API 575 + static void domain_id_free(int id) 576 + { 577 + unsigned long flags; 578 + 579 + write_lock_irqsave(&amd_iommu_devtable_lock, flags); 580 + if (id > 0 && id < MAX_DOMAIN_ID) 581 + __clear_bit(id, amd_iommu_pd_alloc_bitmap); 582 + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 583 + } 584 + #endif 585 + 726 586 /* 727 587 * Used to reserve address ranges in the aperture (e.g. for exclusion 728 588 * ranges. ··· 751 587 iommu_area_reserve(dom->bitmap, start_page, pages); 752 588 } 753 589 754 - static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) 590 + static void free_pagetable(struct protection_domain *domain) 755 591 { 756 592 int i, j; 757 593 u64 *p1, *p2, *p3; 758 594 759 - p1 = dma_dom->domain.pt_root; 595 + p1 = domain->pt_root; 760 596 761 597 if (!p1) 762 598 return; ··· 777 613 } 778 614 779 615 free_page((unsigned long)p1); 616 + 617 + domain->pt_root = NULL; 780 618 } 781 619 782 620 /* ··· 790 624 if (!dom) 791 625 return; 792 626 793 - dma_ops_free_pagetable(dom); 627 + free_pagetable(&dom->domain); 794 628 795 629 kfree(dom->pte_pages); 796 630 ··· 829 663 goto free_dma_dom; 830 664 dma_dom->domain.mode = PAGE_MODE_3_LEVEL; 831 665 dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); 666 + dma_dom->domain.flags = PD_DMA_OPS_MASK; 832 667 dma_dom->domain.priv = dma_dom; 833 668 if (!dma_dom->domain.pt_root) 834 669 goto free_dma_dom; ··· 892 725 } 893 726 894 727 /* 728 + * little helper function to check whether a given protection domain is a 729 + * dma_ops domain 730 + */ 731 + static bool dma_ops_domain(struct protection_domain *domain) 732 + { 733 + return domain->flags & PD_DMA_OPS_MASK; 734 + } 735 + 736 + /* 895 737 * Find out the protection domain structure for a given PCI device. This 896 738 * will give us the pointer to the page table root for example. 897 739 */ ··· 920 744 * If a device is not yet associated with a domain, this function does 921 745 * assigns it visible for the hardware 922 746 */ 923 - static void set_device_domain(struct amd_iommu *iommu, 924 - struct protection_domain *domain, 925 - u16 devid) 747 + static void attach_device(struct amd_iommu *iommu, 748 + struct protection_domain *domain, 749 + u16 devid) 926 750 { 927 751 unsigned long flags; 928 - 929 752 u64 pte_root = virt_to_phys(domain->pt_root); 753 + 754 + domain->dev_cnt += 1; 930 755 931 756 pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) 932 757 << DEV_ENTRY_MODE_SHIFT; ··· 943 766 944 767 iommu_queue_inv_dev_entry(iommu, devid); 945 768 } 769 + 770 + /* 771 + * Removes a device from a protection domain (unlocked) 772 + */ 773 + static void __detach_device(struct protection_domain *domain, u16 devid) 774 + { 775 + 776 + /* lock domain */ 777 + spin_lock(&domain->lock); 778 + 779 + /* remove domain from the lookup table */ 780 + amd_iommu_pd_table[devid] = NULL; 781 + 782 + /* remove entry from the device table seen by the hardware */ 783 + amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; 784 + amd_iommu_dev_table[devid].data[1] = 0; 785 + amd_iommu_dev_table[devid].data[2] = 0; 786 + 787 + /* decrease reference counter */ 788 + domain->dev_cnt -= 1; 789 + 790 + /* ready */ 791 + spin_unlock(&domain->lock); 792 + } 793 + 794 + /* 795 + * Removes a device from a protection domain (with devtable_lock held) 796 + */ 797 + static void detach_device(struct protection_domain *domain, u16 devid) 798 + { 799 + unsigned long flags; 800 + 801 + /* lock device table */ 802 + write_lock_irqsave(&amd_iommu_devtable_lock, flags); 803 + __detach_device(domain, devid); 804 + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 805 + } 806 + 807 + static int device_change_notifier(struct notifier_block *nb, 808 + unsigned long action, void *data) 809 + { 810 + struct device *dev = data; 811 + struct pci_dev *pdev = to_pci_dev(dev); 812 + u16 devid = calc_devid(pdev->bus->number, pdev->devfn); 813 + struct protection_domain *domain; 814 + struct dma_ops_domain *dma_domain; 815 + struct amd_iommu *iommu; 816 + int order = amd_iommu_aperture_order; 817 + unsigned long flags; 818 + 819 + if (devid > amd_iommu_last_bdf) 820 + goto out; 821 + 822 + devid = amd_iommu_alias_table[devid]; 823 + 824 + iommu = amd_iommu_rlookup_table[devid]; 825 + if (iommu == NULL) 826 + goto out; 827 + 828 + domain = domain_for_device(devid); 829 + 830 + if (domain && !dma_ops_domain(domain)) 831 + WARN_ONCE(1, "AMD IOMMU WARNING: device %s already bound " 832 + "to a non-dma-ops domain\n", dev_name(dev)); 833 + 834 + switch (action) { 835 + case BUS_NOTIFY_BOUND_DRIVER: 836 + if (domain) 837 + goto out; 838 + dma_domain = find_protection_domain(devid); 839 + if (!dma_domain) 840 + dma_domain = iommu->default_dom; 841 + attach_device(iommu, &dma_domain->domain, devid); 842 + printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " 843 + "device %s\n", dma_domain->domain.id, dev_name(dev)); 844 + break; 845 + case BUS_NOTIFY_UNBIND_DRIVER: 846 + if (!domain) 847 + goto out; 848 + detach_device(domain, devid); 849 + break; 850 + case BUS_NOTIFY_ADD_DEVICE: 851 + /* allocate a protection domain if a device is added */ 852 + dma_domain = find_protection_domain(devid); 853 + if (dma_domain) 854 + goto out; 855 + dma_domain = dma_ops_domain_alloc(iommu, order); 856 + if (!dma_domain) 857 + goto out; 858 + dma_domain->target_dev = devid; 859 + 860 + spin_lock_irqsave(&iommu_pd_list_lock, flags); 861 + list_add_tail(&dma_domain->list, &iommu_pd_list); 862 + spin_unlock_irqrestore(&iommu_pd_list_lock, flags); 863 + 864 + break; 865 + default: 866 + goto out; 867 + } 868 + 869 + iommu_queue_inv_dev_entry(iommu, devid); 870 + iommu_completion_wait(iommu); 871 + 872 + out: 873 + return 0; 874 + } 875 + 876 + struct notifier_block device_nb = { 877 + .notifier_call = device_change_notifier, 878 + }; 946 879 947 880 /***************************************************************************** 948 881 * ··· 1089 802 list_for_each_entry(entry, &iommu_pd_list, list) { 1090 803 if (entry->target_dev == devid) { 1091 804 ret = entry; 1092 - list_del(&ret->list); 1093 805 break; 1094 806 } 1095 807 } ··· 1139 853 if (!dma_dom) 1140 854 dma_dom = (*iommu)->default_dom; 1141 855 *domain = &dma_dom->domain; 1142 - set_device_domain(*iommu, *domain, *bdf); 856 + attach_device(*iommu, *domain, *bdf); 1143 857 printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " 1144 - "device ", (*domain)->id); 1145 - print_devid(_bdf, 1); 858 + "device %s\n", (*domain)->id, dev_name(dev)); 1146 859 } 1147 860 1148 861 if (domain_for_device(_bdf) == NULL) 1149 - set_device_domain(*iommu, *domain, _bdf); 862 + attach_device(*iommu, *domain, _bdf); 1150 863 1151 864 return 1; 1152 865 } ··· 1231 946 pages = iommu_num_pages(paddr, size, PAGE_SIZE); 1232 947 paddr &= PAGE_MASK; 1233 948 949 + INC_STATS_COUNTER(total_map_requests); 950 + 951 + if (pages > 1) 952 + INC_STATS_COUNTER(cross_page); 953 + 1234 954 if (align) 1235 955 align_mask = (1UL << get_order(size)) - 1; 1236 956 ··· 1251 961 start += PAGE_SIZE; 1252 962 } 1253 963 address += offset; 964 + 965 + ADD_STATS_COUNTER(alloced_io_mem, size); 1254 966 1255 967 if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { 1256 968 iommu_flush_tlb(iommu, dma_dom->domain.id); ··· 1290 998 start += PAGE_SIZE; 1291 999 } 1292 1000 1001 + SUB_STATS_COUNTER(alloced_io_mem, size); 1002 + 1293 1003 dma_ops_free_addresses(dma_dom, dma_addr, pages); 1294 1004 1295 1005 if (amd_iommu_unmap_flush || dma_dom->need_flush) { ··· 1313 1019 dma_addr_t addr; 1314 1020 u64 dma_mask; 1315 1021 1022 + INC_STATS_COUNTER(cnt_map_single); 1023 + 1316 1024 if (!check_device(dev)) 1317 1025 return bad_dma_address; 1318 1026 ··· 1325 1029 if (iommu == NULL || domain == NULL) 1326 1030 /* device not handled by any AMD IOMMU */ 1327 1031 return (dma_addr_t)paddr; 1032 + 1033 + if (!dma_ops_domain(domain)) 1034 + return bad_dma_address; 1328 1035 1329 1036 spin_lock_irqsave(&domain->lock, flags); 1330 1037 addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, ··· 1354 1055 struct protection_domain *domain; 1355 1056 u16 devid; 1356 1057 1058 + INC_STATS_COUNTER(cnt_unmap_single); 1059 + 1357 1060 if (!check_device(dev) || 1358 1061 !get_device_resources(dev, &iommu, &domain, &devid)) 1359 1062 /* device not handled by any AMD IOMMU */ 1063 + return; 1064 + 1065 + if (!dma_ops_domain(domain)) 1360 1066 return; 1361 1067 1362 1068 spin_lock_irqsave(&domain->lock, flags); ··· 1408 1104 int mapped_elems = 0; 1409 1105 u64 dma_mask; 1410 1106 1107 + INC_STATS_COUNTER(cnt_map_sg); 1108 + 1411 1109 if (!check_device(dev)) 1412 1110 return 0; 1413 1111 ··· 1419 1113 1420 1114 if (!iommu || !domain) 1421 1115 return map_sg_no_iommu(dev, sglist, nelems, dir); 1116 + 1117 + if (!dma_ops_domain(domain)) 1118 + return 0; 1422 1119 1423 1120 spin_lock_irqsave(&domain->lock, flags); 1424 1121 ··· 1472 1163 u16 devid; 1473 1164 int i; 1474 1165 1166 + INC_STATS_COUNTER(cnt_unmap_sg); 1167 + 1475 1168 if (!check_device(dev) || 1476 1169 !get_device_resources(dev, &iommu, &domain, &devid)) 1170 + return; 1171 + 1172 + if (!dma_ops_domain(domain)) 1477 1173 return; 1478 1174 1479 1175 spin_lock_irqsave(&domain->lock, flags); ··· 1508 1194 phys_addr_t paddr; 1509 1195 u64 dma_mask = dev->coherent_dma_mask; 1510 1196 1197 + INC_STATS_COUNTER(cnt_alloc_coherent); 1198 + 1511 1199 if (!check_device(dev)) 1512 1200 return NULL; 1513 1201 ··· 1528 1212 return virt_addr; 1529 1213 } 1530 1214 1215 + if (!dma_ops_domain(domain)) 1216 + goto out_free; 1217 + 1531 1218 if (!dma_mask) 1532 1219 dma_mask = *dev->dma_mask; 1533 1220 ··· 1539 1220 *dma_addr = __map_single(dev, iommu, domain->priv, paddr, 1540 1221 size, DMA_BIDIRECTIONAL, true, dma_mask); 1541 1222 1542 - if (*dma_addr == bad_dma_address) { 1543 - free_pages((unsigned long)virt_addr, get_order(size)); 1544 - virt_addr = NULL; 1545 - goto out; 1546 - } 1223 + if (*dma_addr == bad_dma_address) 1224 + goto out_free; 1547 1225 1548 1226 iommu_completion_wait(iommu); 1549 1227 1550 - out: 1551 1228 spin_unlock_irqrestore(&domain->lock, flags); 1552 1229 1553 1230 return virt_addr; 1231 + 1232 + out_free: 1233 + 1234 + free_pages((unsigned long)virt_addr, get_order(size)); 1235 + 1236 + return NULL; 1554 1237 } 1555 1238 1556 1239 /* ··· 1566 1245 struct protection_domain *domain; 1567 1246 u16 devid; 1568 1247 1248 + INC_STATS_COUNTER(cnt_free_coherent); 1249 + 1569 1250 if (!check_device(dev)) 1570 1251 return; 1571 1252 1572 1253 get_device_resources(dev, &iommu, &domain, &devid); 1573 1254 1574 1255 if (!iommu || !domain) 1256 + goto free_mem; 1257 + 1258 + if (!dma_ops_domain(domain)) 1575 1259 goto free_mem; 1576 1260 1577 1261 spin_lock_irqsave(&domain->lock, flags); ··· 1622 1296 * we don't need to preallocate the protection domains anymore. 1623 1297 * For now we have to. 1624 1298 */ 1625 - void prealloc_protection_domains(void) 1299 + static void prealloc_protection_domains(void) 1626 1300 { 1627 1301 struct pci_dev *dev = NULL; 1628 1302 struct dma_ops_domain *dma_dom; ··· 1631 1305 u16 devid; 1632 1306 1633 1307 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { 1634 - devid = (dev->bus->number << 8) | dev->devfn; 1308 + devid = calc_devid(dev->bus->number, dev->devfn); 1635 1309 if (devid > amd_iommu_last_bdf) 1636 1310 continue; 1637 1311 devid = amd_iommu_alias_table[devid]; ··· 1678 1352 iommu->default_dom = dma_ops_domain_alloc(iommu, order); 1679 1353 if (iommu->default_dom == NULL) 1680 1354 return -ENOMEM; 1355 + iommu->default_dom->domain.flags |= PD_DEFAULT_MASK; 1681 1356 ret = iommu_init_unity_mappings(iommu); 1682 1357 if (ret) 1683 1358 goto free_domains; ··· 1702 1375 /* Make the driver finally visible to the drivers */ 1703 1376 dma_ops = &amd_iommu_dma_ops; 1704 1377 1378 + #ifdef CONFIG_IOMMU_API 1379 + register_iommu(&amd_iommu_ops); 1380 + #endif 1381 + 1382 + bus_register_notifier(&pci_bus_type, &device_nb); 1383 + 1384 + amd_iommu_stats_init(); 1385 + 1705 1386 return 0; 1706 1387 1707 1388 free_domains: ··· 1721 1386 1722 1387 return ret; 1723 1388 } 1389 + 1390 + /***************************************************************************** 1391 + * 1392 + * The following functions belong to the exported interface of AMD IOMMU 1393 + * 1394 + * This interface allows access to lower level functions of the IOMMU 1395 + * like protection domain handling and assignement of devices to domains 1396 + * which is not possible with the dma_ops interface. 1397 + * 1398 + *****************************************************************************/ 1399 + 1400 + #ifdef CONFIG_IOMMU_API 1401 + 1402 + static void cleanup_domain(struct protection_domain *domain) 1403 + { 1404 + unsigned long flags; 1405 + u16 devid; 1406 + 1407 + write_lock_irqsave(&amd_iommu_devtable_lock, flags); 1408 + 1409 + for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) 1410 + if (amd_iommu_pd_table[devid] == domain) 1411 + __detach_device(domain, devid); 1412 + 1413 + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 1414 + } 1415 + 1416 + static int amd_iommu_domain_init(struct iommu_domain *dom) 1417 + { 1418 + struct protection_domain *domain; 1419 + 1420 + domain = kzalloc(sizeof(*domain), GFP_KERNEL); 1421 + if (!domain) 1422 + return -ENOMEM; 1423 + 1424 + spin_lock_init(&domain->lock); 1425 + domain->mode = PAGE_MODE_3_LEVEL; 1426 + domain->id = domain_id_alloc(); 1427 + if (!domain->id) 1428 + goto out_free; 1429 + domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); 1430 + if (!domain->pt_root) 1431 + goto out_free; 1432 + 1433 + dom->priv = domain; 1434 + 1435 + return 0; 1436 + 1437 + out_free: 1438 + kfree(domain); 1439 + 1440 + return -ENOMEM; 1441 + } 1442 + 1443 + static void amd_iommu_domain_destroy(struct iommu_domain *dom) 1444 + { 1445 + struct protection_domain *domain = dom->priv; 1446 + 1447 + if (!domain) 1448 + return; 1449 + 1450 + if (domain->dev_cnt > 0) 1451 + cleanup_domain(domain); 1452 + 1453 + BUG_ON(domain->dev_cnt != 0); 1454 + 1455 + free_pagetable(domain); 1456 + 1457 + domain_id_free(domain->id); 1458 + 1459 + kfree(domain); 1460 + 1461 + dom->priv = NULL; 1462 + } 1463 + 1464 + static void amd_iommu_detach_device(struct iommu_domain *dom, 1465 + struct device *dev) 1466 + { 1467 + struct protection_domain *domain = dom->priv; 1468 + struct amd_iommu *iommu; 1469 + struct pci_dev *pdev; 1470 + u16 devid; 1471 + 1472 + if (dev->bus != &pci_bus_type) 1473 + return; 1474 + 1475 + pdev = to_pci_dev(dev); 1476 + 1477 + devid = calc_devid(pdev->bus->number, pdev->devfn); 1478 + 1479 + if (devid > 0) 1480 + detach_device(domain, devid); 1481 + 1482 + iommu = amd_iommu_rlookup_table[devid]; 1483 + if (!iommu) 1484 + return; 1485 + 1486 + iommu_queue_inv_dev_entry(iommu, devid); 1487 + iommu_completion_wait(iommu); 1488 + } 1489 + 1490 + static int amd_iommu_attach_device(struct iommu_domain *dom, 1491 + struct device *dev) 1492 + { 1493 + struct protection_domain *domain = dom->priv; 1494 + struct protection_domain *old_domain; 1495 + struct amd_iommu *iommu; 1496 + struct pci_dev *pdev; 1497 + u16 devid; 1498 + 1499 + if (dev->bus != &pci_bus_type) 1500 + return -EINVAL; 1501 + 1502 + pdev = to_pci_dev(dev); 1503 + 1504 + devid = calc_devid(pdev->bus->number, pdev->devfn); 1505 + 1506 + if (devid >= amd_iommu_last_bdf || 1507 + devid != amd_iommu_alias_table[devid]) 1508 + return -EINVAL; 1509 + 1510 + iommu = amd_iommu_rlookup_table[devid]; 1511 + if (!iommu) 1512 + return -EINVAL; 1513 + 1514 + old_domain = domain_for_device(devid); 1515 + if (old_domain) 1516 + return -EBUSY; 1517 + 1518 + attach_device(iommu, domain, devid); 1519 + 1520 + iommu_completion_wait(iommu); 1521 + 1522 + return 0; 1523 + } 1524 + 1525 + static int amd_iommu_map_range(struct iommu_domain *dom, 1526 + unsigned long iova, phys_addr_t paddr, 1527 + size_t size, int iommu_prot) 1528 + { 1529 + struct protection_domain *domain = dom->priv; 1530 + unsigned long i, npages = iommu_num_pages(paddr, size, PAGE_SIZE); 1531 + int prot = 0; 1532 + int ret; 1533 + 1534 + if (iommu_prot & IOMMU_READ) 1535 + prot |= IOMMU_PROT_IR; 1536 + if (iommu_prot & IOMMU_WRITE) 1537 + prot |= IOMMU_PROT_IW; 1538 + 1539 + iova &= PAGE_MASK; 1540 + paddr &= PAGE_MASK; 1541 + 1542 + for (i = 0; i < npages; ++i) { 1543 + ret = iommu_map_page(domain, iova, paddr, prot); 1544 + if (ret) 1545 + return ret; 1546 + 1547 + iova += PAGE_SIZE; 1548 + paddr += PAGE_SIZE; 1549 + } 1550 + 1551 + return 0; 1552 + } 1553 + 1554 + static void amd_iommu_unmap_range(struct iommu_domain *dom, 1555 + unsigned long iova, size_t size) 1556 + { 1557 + 1558 + struct protection_domain *domain = dom->priv; 1559 + unsigned long i, npages = iommu_num_pages(iova, size, PAGE_SIZE); 1560 + 1561 + iova &= PAGE_MASK; 1562 + 1563 + for (i = 0; i < npages; ++i) { 1564 + iommu_unmap_page(domain, iova); 1565 + iova += PAGE_SIZE; 1566 + } 1567 + 1568 + iommu_flush_domain(domain->id); 1569 + } 1570 + 1571 + static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, 1572 + unsigned long iova) 1573 + { 1574 + struct protection_domain *domain = dom->priv; 1575 + unsigned long offset = iova & ~PAGE_MASK; 1576 + phys_addr_t paddr; 1577 + u64 *pte; 1578 + 1579 + pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(iova)]; 1580 + 1581 + if (!IOMMU_PTE_PRESENT(*pte)) 1582 + return 0; 1583 + 1584 + pte = IOMMU_PTE_PAGE(*pte); 1585 + pte = &pte[IOMMU_PTE_L1_INDEX(iova)]; 1586 + 1587 + if (!IOMMU_PTE_PRESENT(*pte)) 1588 + return 0; 1589 + 1590 + pte = IOMMU_PTE_PAGE(*pte); 1591 + pte = &pte[IOMMU_PTE_L0_INDEX(iova)]; 1592 + 1593 + if (!IOMMU_PTE_PRESENT(*pte)) 1594 + return 0; 1595 + 1596 + paddr = *pte & IOMMU_PAGE_MASK; 1597 + paddr |= offset; 1598 + 1599 + return paddr; 1600 + } 1601 + 1602 + static struct iommu_ops amd_iommu_ops = { 1603 + .domain_init = amd_iommu_domain_init, 1604 + .domain_destroy = amd_iommu_domain_destroy, 1605 + .attach_dev = amd_iommu_attach_device, 1606 + .detach_dev = amd_iommu_detach_device, 1607 + .map = amd_iommu_map_range, 1608 + .unmap = amd_iommu_unmap_range, 1609 + .iova_to_phys = amd_iommu_iova_to_phys, 1610 + }; 1611 + 1612 + #endif
+6 -9
arch/x86/kernel/amd_iommu_init.c
··· 122 122 LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings 123 123 we find in ACPI */ 124 124 unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ 125 - int amd_iommu_isolate = 1; /* if 1, device isolation is enabled */ 125 + bool amd_iommu_isolate = true; /* if true, device isolation is 126 + enabled */ 126 127 bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ 127 128 128 129 LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the ··· 246 245 /* Function to enable the hardware */ 247 246 void __init iommu_enable(struct amd_iommu *iommu) 248 247 { 249 - printk(KERN_INFO "AMD IOMMU: Enabling IOMMU " 250 - "at %02x:%02x.%x cap 0x%hx\n", 251 - iommu->dev->bus->number, 252 - PCI_SLOT(iommu->dev->devfn), 253 - PCI_FUNC(iommu->dev->devfn), 254 - iommu->cap_ptr); 248 + printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n", 249 + dev_name(&iommu->dev->dev), iommu->cap_ptr); 255 250 256 251 iommu_feature_enable(iommu, CONTROL_IOMMU_EN); 257 252 } ··· 1215 1218 { 1216 1219 for (; *str; ++str) { 1217 1220 if (strncmp(str, "isolate", 7) == 0) 1218 - amd_iommu_isolate = 1; 1221 + amd_iommu_isolate = true; 1219 1222 if (strncmp(str, "share", 5) == 0) 1220 - amd_iommu_isolate = 0; 1223 + amd_iommu_isolate = false; 1221 1224 if (strncmp(str, "fullflush", 9) == 0) 1222 1225 amd_iommu_unmap_flush = true; 1223 1226 }