Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.34-rc1 3791 lines 94 kB view raw
1/* 2 * Copyright (c) 2006, Intel Corporation. 3 * 4 * This program is free software; you can redistribute it and/or modify it 5 * under the terms and conditions of the GNU General Public License, 6 * version 2, as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope it will be useful, but WITHOUT 9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11 * more details. 12 * 13 * You should have received a copy of the GNU General Public License along with 14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple 15 * Place - Suite 330, Boston, MA 02111-1307 USA. 16 * 17 * Copyright (C) 2006-2008 Intel Corporation 18 * Author: Ashok Raj <ashok.raj@intel.com> 19 * Author: Shaohua Li <shaohua.li@intel.com> 20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> 21 * Author: Fenghua Yu <fenghua.yu@intel.com> 22 */ 23 24#include <linux/init.h> 25#include <linux/bitmap.h> 26#include <linux/debugfs.h> 27#include <linux/slab.h> 28#include <linux/irq.h> 29#include <linux/interrupt.h> 30#include <linux/spinlock.h> 31#include <linux/pci.h> 32#include <linux/dmar.h> 33#include <linux/dma-mapping.h> 34#include <linux/mempool.h> 35#include <linux/timer.h> 36#include <linux/iova.h> 37#include <linux/iommu.h> 38#include <linux/intel-iommu.h> 39#include <linux/sysdev.h> 40#include <linux/tboot.h> 41#include <linux/dmi.h> 42#include <asm/cacheflush.h> 43#include <asm/iommu.h> 44#include "pci.h" 45 46#define ROOT_SIZE VTD_PAGE_SIZE 47#define CONTEXT_SIZE VTD_PAGE_SIZE 48 49#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) 50#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) 51#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e) 52 53#define IOAPIC_RANGE_START (0xfee00000) 54#define IOAPIC_RANGE_END (0xfeefffff) 55#define IOVA_START_ADDR (0x1000) 56 57#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 58 59#define MAX_AGAW_WIDTH 64 60 61#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1) 62#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1) 63 64/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR 65 to match. That way, we can use 'unsigned long' for PFNs with impunity. */ 66#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \ 67 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1)) 68#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT) 69 70#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) 71#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32)) 72#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64)) 73 74 75/* VT-d pages must always be _smaller_ than MM pages. Otherwise things 76 are never going to work. */ 77static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn) 78{ 79 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT); 80} 81 82static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn) 83{ 84 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT); 85} 86static inline unsigned long page_to_dma_pfn(struct page *pg) 87{ 88 return mm_to_dma_pfn(page_to_pfn(pg)); 89} 90static inline unsigned long virt_to_dma_pfn(void *p) 91{ 92 return page_to_dma_pfn(virt_to_page(p)); 93} 94 95/* global iommu list, set NULL for ignored DMAR units */ 96static struct intel_iommu **g_iommus; 97 98static void __init check_tylersburg_isoch(void); 99static int rwbf_quirk; 100 101/* 102 * 0: Present 103 * 1-11: Reserved 104 * 12-63: Context Ptr (12 - (haw-1)) 105 * 64-127: Reserved 106 */ 107struct root_entry { 108 u64 val; 109 u64 rsvd1; 110}; 111#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry)) 112static inline bool root_present(struct root_entry *root) 113{ 114 return (root->val & 1); 115} 116static inline void set_root_present(struct root_entry *root) 117{ 118 root->val |= 1; 119} 120static inline void set_root_value(struct root_entry *root, unsigned long value) 121{ 122 root->val |= value & VTD_PAGE_MASK; 123} 124 125static inline struct context_entry * 126get_context_addr_from_root(struct root_entry *root) 127{ 128 return (struct context_entry *) 129 (root_present(root)?phys_to_virt( 130 root->val & VTD_PAGE_MASK) : 131 NULL); 132} 133 134/* 135 * low 64 bits: 136 * 0: present 137 * 1: fault processing disable 138 * 2-3: translation type 139 * 12-63: address space root 140 * high 64 bits: 141 * 0-2: address width 142 * 3-6: aval 143 * 8-23: domain id 144 */ 145struct context_entry { 146 u64 lo; 147 u64 hi; 148}; 149 150static inline bool context_present(struct context_entry *context) 151{ 152 return (context->lo & 1); 153} 154static inline void context_set_present(struct context_entry *context) 155{ 156 context->lo |= 1; 157} 158 159static inline void context_set_fault_enable(struct context_entry *context) 160{ 161 context->lo &= (((u64)-1) << 2) | 1; 162} 163 164static inline void context_set_translation_type(struct context_entry *context, 165 unsigned long value) 166{ 167 context->lo &= (((u64)-1) << 4) | 3; 168 context->lo |= (value & 3) << 2; 169} 170 171static inline void context_set_address_root(struct context_entry *context, 172 unsigned long value) 173{ 174 context->lo |= value & VTD_PAGE_MASK; 175} 176 177static inline void context_set_address_width(struct context_entry *context, 178 unsigned long value) 179{ 180 context->hi |= value & 7; 181} 182 183static inline void context_set_domain_id(struct context_entry *context, 184 unsigned long value) 185{ 186 context->hi |= (value & ((1 << 16) - 1)) << 8; 187} 188 189static inline void context_clear_entry(struct context_entry *context) 190{ 191 context->lo = 0; 192 context->hi = 0; 193} 194 195/* 196 * 0: readable 197 * 1: writable 198 * 2-6: reserved 199 * 7: super page 200 * 8-10: available 201 * 11: snoop behavior 202 * 12-63: Host physcial address 203 */ 204struct dma_pte { 205 u64 val; 206}; 207 208static inline void dma_clear_pte(struct dma_pte *pte) 209{ 210 pte->val = 0; 211} 212 213static inline void dma_set_pte_readable(struct dma_pte *pte) 214{ 215 pte->val |= DMA_PTE_READ; 216} 217 218static inline void dma_set_pte_writable(struct dma_pte *pte) 219{ 220 pte->val |= DMA_PTE_WRITE; 221} 222 223static inline void dma_set_pte_snp(struct dma_pte *pte) 224{ 225 pte->val |= DMA_PTE_SNP; 226} 227 228static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot) 229{ 230 pte->val = (pte->val & ~3) | (prot & 3); 231} 232 233static inline u64 dma_pte_addr(struct dma_pte *pte) 234{ 235#ifdef CONFIG_64BIT 236 return pte->val & VTD_PAGE_MASK; 237#else 238 /* Must have a full atomic 64-bit read */ 239 return __cmpxchg64(pte, 0ULL, 0ULL) & VTD_PAGE_MASK; 240#endif 241} 242 243static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn) 244{ 245 pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT; 246} 247 248static inline bool dma_pte_present(struct dma_pte *pte) 249{ 250 return (pte->val & 3) != 0; 251} 252 253static inline int first_pte_in_page(struct dma_pte *pte) 254{ 255 return !((unsigned long)pte & ~VTD_PAGE_MASK); 256} 257 258/* 259 * This domain is a statically identity mapping domain. 260 * 1. This domain creats a static 1:1 mapping to all usable memory. 261 * 2. It maps to each iommu if successful. 262 * 3. Each iommu mapps to this domain if successful. 263 */ 264static struct dmar_domain *si_domain; 265static int hw_pass_through = 1; 266 267/* devices under the same p2p bridge are owned in one domain */ 268#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0) 269 270/* domain represents a virtual machine, more than one devices 271 * across iommus may be owned in one domain, e.g. kvm guest. 272 */ 273#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1) 274 275/* si_domain contains mulitple devices */ 276#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2) 277 278struct dmar_domain { 279 int id; /* domain id */ 280 int nid; /* node id */ 281 unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/ 282 283 struct list_head devices; /* all devices' list */ 284 struct iova_domain iovad; /* iova's that belong to this domain */ 285 286 struct dma_pte *pgd; /* virtual address */ 287 int gaw; /* max guest address width */ 288 289 /* adjusted guest address width, 0 is level 2 30-bit */ 290 int agaw; 291 292 int flags; /* flags to find out type of domain */ 293 294 int iommu_coherency;/* indicate coherency of iommu access */ 295 int iommu_snooping; /* indicate snooping control feature*/ 296 int iommu_count; /* reference count of iommu */ 297 spinlock_t iommu_lock; /* protect iommu set in domain */ 298 u64 max_addr; /* maximum mapped address */ 299}; 300 301/* PCI domain-device relationship */ 302struct device_domain_info { 303 struct list_head link; /* link to domain siblings */ 304 struct list_head global; /* link to global list */ 305 int segment; /* PCI domain */ 306 u8 bus; /* PCI bus number */ 307 u8 devfn; /* PCI devfn number */ 308 struct pci_dev *dev; /* it's NULL for PCIe-to-PCI bridge */ 309 struct intel_iommu *iommu; /* IOMMU used by this device */ 310 struct dmar_domain *domain; /* pointer to domain */ 311}; 312 313static void flush_unmaps_timeout(unsigned long data); 314 315DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0); 316 317#define HIGH_WATER_MARK 250 318struct deferred_flush_tables { 319 int next; 320 struct iova *iova[HIGH_WATER_MARK]; 321 struct dmar_domain *domain[HIGH_WATER_MARK]; 322}; 323 324static struct deferred_flush_tables *deferred_flush; 325 326/* bitmap for indexing intel_iommus */ 327static int g_num_of_iommus; 328 329static DEFINE_SPINLOCK(async_umap_flush_lock); 330static LIST_HEAD(unmaps_to_do); 331 332static int timer_on; 333static long list_size; 334 335static void domain_remove_dev_info(struct dmar_domain *domain); 336 337#ifdef CONFIG_DMAR_DEFAULT_ON 338int dmar_disabled = 0; 339#else 340int dmar_disabled = 1; 341#endif /*CONFIG_DMAR_DEFAULT_ON*/ 342 343static int __initdata dmar_map_gfx = 1; 344static int dmar_forcedac; 345static int intel_iommu_strict; 346 347#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) 348static DEFINE_SPINLOCK(device_domain_lock); 349static LIST_HEAD(device_domain_list); 350 351static struct iommu_ops intel_iommu_ops; 352 353static int __init intel_iommu_setup(char *str) 354{ 355 if (!str) 356 return -EINVAL; 357 while (*str) { 358 if (!strncmp(str, "on", 2)) { 359 dmar_disabled = 0; 360 printk(KERN_INFO "Intel-IOMMU: enabled\n"); 361 } else if (!strncmp(str, "off", 3)) { 362 dmar_disabled = 1; 363 printk(KERN_INFO "Intel-IOMMU: disabled\n"); 364 } else if (!strncmp(str, "igfx_off", 8)) { 365 dmar_map_gfx = 0; 366 printk(KERN_INFO 367 "Intel-IOMMU: disable GFX device mapping\n"); 368 } else if (!strncmp(str, "forcedac", 8)) { 369 printk(KERN_INFO 370 "Intel-IOMMU: Forcing DAC for PCI devices\n"); 371 dmar_forcedac = 1; 372 } else if (!strncmp(str, "strict", 6)) { 373 printk(KERN_INFO 374 "Intel-IOMMU: disable batched IOTLB flush\n"); 375 intel_iommu_strict = 1; 376 } 377 378 str += strcspn(str, ","); 379 while (*str == ',') 380 str++; 381 } 382 return 0; 383} 384__setup("intel_iommu=", intel_iommu_setup); 385 386static struct kmem_cache *iommu_domain_cache; 387static struct kmem_cache *iommu_devinfo_cache; 388static struct kmem_cache *iommu_iova_cache; 389 390static inline void *alloc_pgtable_page(int node) 391{ 392 struct page *page; 393 void *vaddr = NULL; 394 395 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0); 396 if (page) 397 vaddr = page_address(page); 398 return vaddr; 399} 400 401static inline void free_pgtable_page(void *vaddr) 402{ 403 free_page((unsigned long)vaddr); 404} 405 406static inline void *alloc_domain_mem(void) 407{ 408 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC); 409} 410 411static void free_domain_mem(void *vaddr) 412{ 413 kmem_cache_free(iommu_domain_cache, vaddr); 414} 415 416static inline void * alloc_devinfo_mem(void) 417{ 418 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC); 419} 420 421static inline void free_devinfo_mem(void *vaddr) 422{ 423 kmem_cache_free(iommu_devinfo_cache, vaddr); 424} 425 426struct iova *alloc_iova_mem(void) 427{ 428 return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC); 429} 430 431void free_iova_mem(struct iova *iova) 432{ 433 kmem_cache_free(iommu_iova_cache, iova); 434} 435 436 437static inline int width_to_agaw(int width); 438 439static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw) 440{ 441 unsigned long sagaw; 442 int agaw = -1; 443 444 sagaw = cap_sagaw(iommu->cap); 445 for (agaw = width_to_agaw(max_gaw); 446 agaw >= 0; agaw--) { 447 if (test_bit(agaw, &sagaw)) 448 break; 449 } 450 451 return agaw; 452} 453 454/* 455 * Calculate max SAGAW for each iommu. 456 */ 457int iommu_calculate_max_sagaw(struct intel_iommu *iommu) 458{ 459 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH); 460} 461 462/* 463 * calculate agaw for each iommu. 464 * "SAGAW" may be different across iommus, use a default agaw, and 465 * get a supported less agaw for iommus that don't support the default agaw. 466 */ 467int iommu_calculate_agaw(struct intel_iommu *iommu) 468{ 469 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH); 470} 471 472/* This functionin only returns single iommu in a domain */ 473static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) 474{ 475 int iommu_id; 476 477 /* si_domain and vm domain should not get here. */ 478 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE); 479 BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY); 480 481 iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus); 482 if (iommu_id < 0 || iommu_id >= g_num_of_iommus) 483 return NULL; 484 485 return g_iommus[iommu_id]; 486} 487 488static void domain_update_iommu_coherency(struct dmar_domain *domain) 489{ 490 int i; 491 492 domain->iommu_coherency = 1; 493 494 i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus); 495 for (; i < g_num_of_iommus; ) { 496 if (!ecap_coherent(g_iommus[i]->ecap)) { 497 domain->iommu_coherency = 0; 498 break; 499 } 500 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1); 501 } 502} 503 504static void domain_update_iommu_snooping(struct dmar_domain *domain) 505{ 506 int i; 507 508 domain->iommu_snooping = 1; 509 510 i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus); 511 for (; i < g_num_of_iommus; ) { 512 if (!ecap_sc_support(g_iommus[i]->ecap)) { 513 domain->iommu_snooping = 0; 514 break; 515 } 516 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1); 517 } 518} 519 520/* Some capabilities may be different across iommus */ 521static void domain_update_iommu_cap(struct dmar_domain *domain) 522{ 523 domain_update_iommu_coherency(domain); 524 domain_update_iommu_snooping(domain); 525} 526 527static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn) 528{ 529 struct dmar_drhd_unit *drhd = NULL; 530 int i; 531 532 for_each_drhd_unit(drhd) { 533 if (drhd->ignored) 534 continue; 535 if (segment != drhd->segment) 536 continue; 537 538 for (i = 0; i < drhd->devices_cnt; i++) { 539 if (drhd->devices[i] && 540 drhd->devices[i]->bus->number == bus && 541 drhd->devices[i]->devfn == devfn) 542 return drhd->iommu; 543 if (drhd->devices[i] && 544 drhd->devices[i]->subordinate && 545 drhd->devices[i]->subordinate->number <= bus && 546 drhd->devices[i]->subordinate->subordinate >= bus) 547 return drhd->iommu; 548 } 549 550 if (drhd->include_all) 551 return drhd->iommu; 552 } 553 554 return NULL; 555} 556 557static void domain_flush_cache(struct dmar_domain *domain, 558 void *addr, int size) 559{ 560 if (!domain->iommu_coherency) 561 clflush_cache_range(addr, size); 562} 563 564/* Gets context entry for a given bus and devfn */ 565static struct context_entry * device_to_context_entry(struct intel_iommu *iommu, 566 u8 bus, u8 devfn) 567{ 568 struct root_entry *root; 569 struct context_entry *context; 570 unsigned long phy_addr; 571 unsigned long flags; 572 573 spin_lock_irqsave(&iommu->lock, flags); 574 root = &iommu->root_entry[bus]; 575 context = get_context_addr_from_root(root); 576 if (!context) { 577 context = (struct context_entry *) 578 alloc_pgtable_page(iommu->node); 579 if (!context) { 580 spin_unlock_irqrestore(&iommu->lock, flags); 581 return NULL; 582 } 583 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE); 584 phy_addr = virt_to_phys((void *)context); 585 set_root_value(root, phy_addr); 586 set_root_present(root); 587 __iommu_flush_cache(iommu, root, sizeof(*root)); 588 } 589 spin_unlock_irqrestore(&iommu->lock, flags); 590 return &context[devfn]; 591} 592 593static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn) 594{ 595 struct root_entry *root; 596 struct context_entry *context; 597 int ret; 598 unsigned long flags; 599 600 spin_lock_irqsave(&iommu->lock, flags); 601 root = &iommu->root_entry[bus]; 602 context = get_context_addr_from_root(root); 603 if (!context) { 604 ret = 0; 605 goto out; 606 } 607 ret = context_present(&context[devfn]); 608out: 609 spin_unlock_irqrestore(&iommu->lock, flags); 610 return ret; 611} 612 613static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn) 614{ 615 struct root_entry *root; 616 struct context_entry *context; 617 unsigned long flags; 618 619 spin_lock_irqsave(&iommu->lock, flags); 620 root = &iommu->root_entry[bus]; 621 context = get_context_addr_from_root(root); 622 if (context) { 623 context_clear_entry(&context[devfn]); 624 __iommu_flush_cache(iommu, &context[devfn], \ 625 sizeof(*context)); 626 } 627 spin_unlock_irqrestore(&iommu->lock, flags); 628} 629 630static void free_context_table(struct intel_iommu *iommu) 631{ 632 struct root_entry *root; 633 int i; 634 unsigned long flags; 635 struct context_entry *context; 636 637 spin_lock_irqsave(&iommu->lock, flags); 638 if (!iommu->root_entry) { 639 goto out; 640 } 641 for (i = 0; i < ROOT_ENTRY_NR; i++) { 642 root = &iommu->root_entry[i]; 643 context = get_context_addr_from_root(root); 644 if (context) 645 free_pgtable_page(context); 646 } 647 free_pgtable_page(iommu->root_entry); 648 iommu->root_entry = NULL; 649out: 650 spin_unlock_irqrestore(&iommu->lock, flags); 651} 652 653/* page table handling */ 654#define LEVEL_STRIDE (9) 655#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1) 656 657static inline int agaw_to_level(int agaw) 658{ 659 return agaw + 2; 660} 661 662static inline int agaw_to_width(int agaw) 663{ 664 return 30 + agaw * LEVEL_STRIDE; 665 666} 667 668static inline int width_to_agaw(int width) 669{ 670 return (width - 30) / LEVEL_STRIDE; 671} 672 673static inline unsigned int level_to_offset_bits(int level) 674{ 675 return (level - 1) * LEVEL_STRIDE; 676} 677 678static inline int pfn_level_offset(unsigned long pfn, int level) 679{ 680 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK; 681} 682 683static inline unsigned long level_mask(int level) 684{ 685 return -1UL << level_to_offset_bits(level); 686} 687 688static inline unsigned long level_size(int level) 689{ 690 return 1UL << level_to_offset_bits(level); 691} 692 693static inline unsigned long align_to_level(unsigned long pfn, int level) 694{ 695 return (pfn + level_size(level) - 1) & level_mask(level); 696} 697 698static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, 699 unsigned long pfn) 700{ 701 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; 702 struct dma_pte *parent, *pte = NULL; 703 int level = agaw_to_level(domain->agaw); 704 int offset; 705 706 BUG_ON(!domain->pgd); 707 BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width); 708 parent = domain->pgd; 709 710 while (level > 0) { 711 void *tmp_page; 712 713 offset = pfn_level_offset(pfn, level); 714 pte = &parent[offset]; 715 if (level == 1) 716 break; 717 718 if (!dma_pte_present(pte)) { 719 uint64_t pteval; 720 721 tmp_page = alloc_pgtable_page(domain->nid); 722 723 if (!tmp_page) 724 return NULL; 725 726 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE); 727 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE; 728 if (cmpxchg64(&pte->val, 0ULL, pteval)) { 729 /* Someone else set it while we were thinking; use theirs. */ 730 free_pgtable_page(tmp_page); 731 } else { 732 dma_pte_addr(pte); 733 domain_flush_cache(domain, pte, sizeof(*pte)); 734 } 735 } 736 parent = phys_to_virt(dma_pte_addr(pte)); 737 level--; 738 } 739 740 return pte; 741} 742 743/* return address's pte at specific level */ 744static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, 745 unsigned long pfn, 746 int level) 747{ 748 struct dma_pte *parent, *pte = NULL; 749 int total = agaw_to_level(domain->agaw); 750 int offset; 751 752 parent = domain->pgd; 753 while (level <= total) { 754 offset = pfn_level_offset(pfn, total); 755 pte = &parent[offset]; 756 if (level == total) 757 return pte; 758 759 if (!dma_pte_present(pte)) 760 break; 761 parent = phys_to_virt(dma_pte_addr(pte)); 762 total--; 763 } 764 return NULL; 765} 766 767/* clear last level pte, a tlb flush should be followed */ 768static void dma_pte_clear_range(struct dmar_domain *domain, 769 unsigned long start_pfn, 770 unsigned long last_pfn) 771{ 772 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; 773 struct dma_pte *first_pte, *pte; 774 775 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); 776 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); 777 BUG_ON(start_pfn > last_pfn); 778 779 /* we don't need lock here; nobody else touches the iova range */ 780 do { 781 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1); 782 if (!pte) { 783 start_pfn = align_to_level(start_pfn + 1, 2); 784 continue; 785 } 786 do { 787 dma_clear_pte(pte); 788 start_pfn++; 789 pte++; 790 } while (start_pfn <= last_pfn && !first_pte_in_page(pte)); 791 792 domain_flush_cache(domain, first_pte, 793 (void *)pte - (void *)first_pte); 794 795 } while (start_pfn && start_pfn <= last_pfn); 796} 797 798/* free page table pages. last level pte should already be cleared */ 799static void dma_pte_free_pagetable(struct dmar_domain *domain, 800 unsigned long start_pfn, 801 unsigned long last_pfn) 802{ 803 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; 804 struct dma_pte *first_pte, *pte; 805 int total = agaw_to_level(domain->agaw); 806 int level; 807 unsigned long tmp; 808 809 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); 810 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); 811 BUG_ON(start_pfn > last_pfn); 812 813 /* We don't need lock here; nobody else touches the iova range */ 814 level = 2; 815 while (level <= total) { 816 tmp = align_to_level(start_pfn, level); 817 818 /* If we can't even clear one PTE at this level, we're done */ 819 if (tmp + level_size(level) - 1 > last_pfn) 820 return; 821 822 do { 823 first_pte = pte = dma_pfn_level_pte(domain, tmp, level); 824 if (!pte) { 825 tmp = align_to_level(tmp + 1, level + 1); 826 continue; 827 } 828 do { 829 if (dma_pte_present(pte)) { 830 free_pgtable_page(phys_to_virt(dma_pte_addr(pte))); 831 dma_clear_pte(pte); 832 } 833 pte++; 834 tmp += level_size(level); 835 } while (!first_pte_in_page(pte) && 836 tmp + level_size(level) - 1 <= last_pfn); 837 838 domain_flush_cache(domain, first_pte, 839 (void *)pte - (void *)first_pte); 840 841 } while (tmp && tmp + level_size(level) - 1 <= last_pfn); 842 level++; 843 } 844 /* free pgd */ 845 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) { 846 free_pgtable_page(domain->pgd); 847 domain->pgd = NULL; 848 } 849} 850 851/* iommu handling */ 852static int iommu_alloc_root_entry(struct intel_iommu *iommu) 853{ 854 struct root_entry *root; 855 unsigned long flags; 856 857 root = (struct root_entry *)alloc_pgtable_page(iommu->node); 858 if (!root) 859 return -ENOMEM; 860 861 __iommu_flush_cache(iommu, root, ROOT_SIZE); 862 863 spin_lock_irqsave(&iommu->lock, flags); 864 iommu->root_entry = root; 865 spin_unlock_irqrestore(&iommu->lock, flags); 866 867 return 0; 868} 869 870static void iommu_set_root_entry(struct intel_iommu *iommu) 871{ 872 void *addr; 873 u32 sts; 874 unsigned long flag; 875 876 addr = iommu->root_entry; 877 878 spin_lock_irqsave(&iommu->register_lock, flag); 879 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr)); 880 881 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG); 882 883 /* Make sure hardware complete it */ 884 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 885 readl, (sts & DMA_GSTS_RTPS), sts); 886 887 spin_unlock_irqrestore(&iommu->register_lock, flag); 888} 889 890static void iommu_flush_write_buffer(struct intel_iommu *iommu) 891{ 892 u32 val; 893 unsigned long flag; 894 895 if (!rwbf_quirk && !cap_rwbf(iommu->cap)) 896 return; 897 898 spin_lock_irqsave(&iommu->register_lock, flag); 899 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG); 900 901 /* Make sure hardware complete it */ 902 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 903 readl, (!(val & DMA_GSTS_WBFS)), val); 904 905 spin_unlock_irqrestore(&iommu->register_lock, flag); 906} 907 908/* return value determine if we need a write buffer flush */ 909static void __iommu_flush_context(struct intel_iommu *iommu, 910 u16 did, u16 source_id, u8 function_mask, 911 u64 type) 912{ 913 u64 val = 0; 914 unsigned long flag; 915 916 switch (type) { 917 case DMA_CCMD_GLOBAL_INVL: 918 val = DMA_CCMD_GLOBAL_INVL; 919 break; 920 case DMA_CCMD_DOMAIN_INVL: 921 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did); 922 break; 923 case DMA_CCMD_DEVICE_INVL: 924 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did) 925 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask); 926 break; 927 default: 928 BUG(); 929 } 930 val |= DMA_CCMD_ICC; 931 932 spin_lock_irqsave(&iommu->register_lock, flag); 933 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val); 934 935 /* Make sure hardware complete it */ 936 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG, 937 dmar_readq, (!(val & DMA_CCMD_ICC)), val); 938 939 spin_unlock_irqrestore(&iommu->register_lock, flag); 940} 941 942/* return value determine if we need a write buffer flush */ 943static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, 944 u64 addr, unsigned int size_order, u64 type) 945{ 946 int tlb_offset = ecap_iotlb_offset(iommu->ecap); 947 u64 val = 0, val_iva = 0; 948 unsigned long flag; 949 950 switch (type) { 951 case DMA_TLB_GLOBAL_FLUSH: 952 /* global flush doesn't need set IVA_REG */ 953 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT; 954 break; 955 case DMA_TLB_DSI_FLUSH: 956 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); 957 break; 958 case DMA_TLB_PSI_FLUSH: 959 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); 960 /* Note: always flush non-leaf currently */ 961 val_iva = size_order | addr; 962 break; 963 default: 964 BUG(); 965 } 966 /* Note: set drain read/write */ 967#if 0 968 /* 969 * This is probably to be super secure.. Looks like we can 970 * ignore it without any impact. 971 */ 972 if (cap_read_drain(iommu->cap)) 973 val |= DMA_TLB_READ_DRAIN; 974#endif 975 if (cap_write_drain(iommu->cap)) 976 val |= DMA_TLB_WRITE_DRAIN; 977 978 spin_lock_irqsave(&iommu->register_lock, flag); 979 /* Note: Only uses first TLB reg currently */ 980 if (val_iva) 981 dmar_writeq(iommu->reg + tlb_offset, val_iva); 982 dmar_writeq(iommu->reg + tlb_offset + 8, val); 983 984 /* Make sure hardware complete it */ 985 IOMMU_WAIT_OP(iommu, tlb_offset + 8, 986 dmar_readq, (!(val & DMA_TLB_IVT)), val); 987 988 spin_unlock_irqrestore(&iommu->register_lock, flag); 989 990 /* check IOTLB invalidation granularity */ 991 if (DMA_TLB_IAIG(val) == 0) 992 printk(KERN_ERR"IOMMU: flush IOTLB failed\n"); 993 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type)) 994 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n", 995 (unsigned long long)DMA_TLB_IIRG(type), 996 (unsigned long long)DMA_TLB_IAIG(val)); 997} 998 999static struct device_domain_info *iommu_support_dev_iotlb( 1000 struct dmar_domain *domain, int segment, u8 bus, u8 devfn) 1001{ 1002 int found = 0; 1003 unsigned long flags; 1004 struct device_domain_info *info; 1005 struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn); 1006 1007 if (!ecap_dev_iotlb_support(iommu->ecap)) 1008 return NULL; 1009 1010 if (!iommu->qi) 1011 return NULL; 1012 1013 spin_lock_irqsave(&device_domain_lock, flags); 1014 list_for_each_entry(info, &domain->devices, link) 1015 if (info->bus == bus && info->devfn == devfn) { 1016 found = 1; 1017 break; 1018 } 1019 spin_unlock_irqrestore(&device_domain_lock, flags); 1020 1021 if (!found || !info->dev) 1022 return NULL; 1023 1024 if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS)) 1025 return NULL; 1026 1027 if (!dmar_find_matched_atsr_unit(info->dev)) 1028 return NULL; 1029 1030 info->iommu = iommu; 1031 1032 return info; 1033} 1034 1035static void iommu_enable_dev_iotlb(struct device_domain_info *info) 1036{ 1037 if (!info) 1038 return; 1039 1040 pci_enable_ats(info->dev, VTD_PAGE_SHIFT); 1041} 1042 1043static void iommu_disable_dev_iotlb(struct device_domain_info *info) 1044{ 1045 if (!info->dev || !pci_ats_enabled(info->dev)) 1046 return; 1047 1048 pci_disable_ats(info->dev); 1049} 1050 1051static void iommu_flush_dev_iotlb(struct dmar_domain *domain, 1052 u64 addr, unsigned mask) 1053{ 1054 u16 sid, qdep; 1055 unsigned long flags; 1056 struct device_domain_info *info; 1057 1058 spin_lock_irqsave(&device_domain_lock, flags); 1059 list_for_each_entry(info, &domain->devices, link) { 1060 if (!info->dev || !pci_ats_enabled(info->dev)) 1061 continue; 1062 1063 sid = info->bus << 8 | info->devfn; 1064 qdep = pci_ats_queue_depth(info->dev); 1065 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask); 1066 } 1067 spin_unlock_irqrestore(&device_domain_lock, flags); 1068} 1069 1070static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, 1071 unsigned long pfn, unsigned int pages) 1072{ 1073 unsigned int mask = ilog2(__roundup_pow_of_two(pages)); 1074 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT; 1075 1076 BUG_ON(pages == 0); 1077 1078 /* 1079 * Fallback to domain selective flush if no PSI support or the size is 1080 * too big. 1081 * PSI requires page size to be 2 ^ x, and the base address is naturally 1082 * aligned to the size 1083 */ 1084 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap)) 1085 iommu->flush.flush_iotlb(iommu, did, 0, 0, 1086 DMA_TLB_DSI_FLUSH); 1087 else 1088 iommu->flush.flush_iotlb(iommu, did, addr, mask, 1089 DMA_TLB_PSI_FLUSH); 1090 1091 /* 1092 * In caching mode, domain ID 0 is reserved for non-present to present 1093 * mapping flush. Device IOTLB doesn't need to be flushed in this case. 1094 */ 1095 if (!cap_caching_mode(iommu->cap) || did) 1096 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask); 1097} 1098 1099static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) 1100{ 1101 u32 pmen; 1102 unsigned long flags; 1103 1104 spin_lock_irqsave(&iommu->register_lock, flags); 1105 pmen = readl(iommu->reg + DMAR_PMEN_REG); 1106 pmen &= ~DMA_PMEN_EPM; 1107 writel(pmen, iommu->reg + DMAR_PMEN_REG); 1108 1109 /* wait for the protected region status bit to clear */ 1110 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG, 1111 readl, !(pmen & DMA_PMEN_PRS), pmen); 1112 1113 spin_unlock_irqrestore(&iommu->register_lock, flags); 1114} 1115 1116static int iommu_enable_translation(struct intel_iommu *iommu) 1117{ 1118 u32 sts; 1119 unsigned long flags; 1120 1121 spin_lock_irqsave(&iommu->register_lock, flags); 1122 iommu->gcmd |= DMA_GCMD_TE; 1123 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); 1124 1125 /* Make sure hardware complete it */ 1126 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 1127 readl, (sts & DMA_GSTS_TES), sts); 1128 1129 spin_unlock_irqrestore(&iommu->register_lock, flags); 1130 return 0; 1131} 1132 1133static int iommu_disable_translation(struct intel_iommu *iommu) 1134{ 1135 u32 sts; 1136 unsigned long flag; 1137 1138 spin_lock_irqsave(&iommu->register_lock, flag); 1139 iommu->gcmd &= ~DMA_GCMD_TE; 1140 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); 1141 1142 /* Make sure hardware complete it */ 1143 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 1144 readl, (!(sts & DMA_GSTS_TES)), sts); 1145 1146 spin_unlock_irqrestore(&iommu->register_lock, flag); 1147 return 0; 1148} 1149 1150 1151static int iommu_init_domains(struct intel_iommu *iommu) 1152{ 1153 unsigned long ndomains; 1154 unsigned long nlongs; 1155 1156 ndomains = cap_ndoms(iommu->cap); 1157 pr_debug("Number of Domains supportd <%ld>\n", ndomains); 1158 nlongs = BITS_TO_LONGS(ndomains); 1159 1160 spin_lock_init(&iommu->lock); 1161 1162 /* TBD: there might be 64K domains, 1163 * consider other allocation for future chip 1164 */ 1165 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL); 1166 if (!iommu->domain_ids) { 1167 printk(KERN_ERR "Allocating domain id array failed\n"); 1168 return -ENOMEM; 1169 } 1170 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *), 1171 GFP_KERNEL); 1172 if (!iommu->domains) { 1173 printk(KERN_ERR "Allocating domain array failed\n"); 1174 return -ENOMEM; 1175 } 1176 1177 /* 1178 * if Caching mode is set, then invalid translations are tagged 1179 * with domainid 0. Hence we need to pre-allocate it. 1180 */ 1181 if (cap_caching_mode(iommu->cap)) 1182 set_bit(0, iommu->domain_ids); 1183 return 0; 1184} 1185 1186 1187static void domain_exit(struct dmar_domain *domain); 1188static void vm_domain_exit(struct dmar_domain *domain); 1189 1190void free_dmar_iommu(struct intel_iommu *iommu) 1191{ 1192 struct dmar_domain *domain; 1193 int i; 1194 unsigned long flags; 1195 1196 if ((iommu->domains) && (iommu->domain_ids)) { 1197 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap)); 1198 for (; i < cap_ndoms(iommu->cap); ) { 1199 domain = iommu->domains[i]; 1200 clear_bit(i, iommu->domain_ids); 1201 1202 spin_lock_irqsave(&domain->iommu_lock, flags); 1203 if (--domain->iommu_count == 0) { 1204 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) 1205 vm_domain_exit(domain); 1206 else 1207 domain_exit(domain); 1208 } 1209 spin_unlock_irqrestore(&domain->iommu_lock, flags); 1210 1211 i = find_next_bit(iommu->domain_ids, 1212 cap_ndoms(iommu->cap), i+1); 1213 } 1214 } 1215 1216 if (iommu->gcmd & DMA_GCMD_TE) 1217 iommu_disable_translation(iommu); 1218 1219 if (iommu->irq) { 1220 set_irq_data(iommu->irq, NULL); 1221 /* This will mask the irq */ 1222 free_irq(iommu->irq, iommu); 1223 destroy_irq(iommu->irq); 1224 } 1225 1226 kfree(iommu->domains); 1227 kfree(iommu->domain_ids); 1228 1229 g_iommus[iommu->seq_id] = NULL; 1230 1231 /* if all iommus are freed, free g_iommus */ 1232 for (i = 0; i < g_num_of_iommus; i++) { 1233 if (g_iommus[i]) 1234 break; 1235 } 1236 1237 if (i == g_num_of_iommus) 1238 kfree(g_iommus); 1239 1240 /* free context mapping */ 1241 free_context_table(iommu); 1242} 1243 1244static struct dmar_domain *alloc_domain(void) 1245{ 1246 struct dmar_domain *domain; 1247 1248 domain = alloc_domain_mem(); 1249 if (!domain) 1250 return NULL; 1251 1252 domain->nid = -1; 1253 memset(&domain->iommu_bmp, 0, sizeof(unsigned long)); 1254 domain->flags = 0; 1255 1256 return domain; 1257} 1258 1259static int iommu_attach_domain(struct dmar_domain *domain, 1260 struct intel_iommu *iommu) 1261{ 1262 int num; 1263 unsigned long ndomains; 1264 unsigned long flags; 1265 1266 ndomains = cap_ndoms(iommu->cap); 1267 1268 spin_lock_irqsave(&iommu->lock, flags); 1269 1270 num = find_first_zero_bit(iommu->domain_ids, ndomains); 1271 if (num >= ndomains) { 1272 spin_unlock_irqrestore(&iommu->lock, flags); 1273 printk(KERN_ERR "IOMMU: no free domain ids\n"); 1274 return -ENOMEM; 1275 } 1276 1277 domain->id = num; 1278 set_bit(num, iommu->domain_ids); 1279 set_bit(iommu->seq_id, &domain->iommu_bmp); 1280 iommu->domains[num] = domain; 1281 spin_unlock_irqrestore(&iommu->lock, flags); 1282 1283 return 0; 1284} 1285 1286static void iommu_detach_domain(struct dmar_domain *domain, 1287 struct intel_iommu *iommu) 1288{ 1289 unsigned long flags; 1290 int num, ndomains; 1291 int found = 0; 1292 1293 spin_lock_irqsave(&iommu->lock, flags); 1294 ndomains = cap_ndoms(iommu->cap); 1295 num = find_first_bit(iommu->domain_ids, ndomains); 1296 for (; num < ndomains; ) { 1297 if (iommu->domains[num] == domain) { 1298 found = 1; 1299 break; 1300 } 1301 num = find_next_bit(iommu->domain_ids, 1302 cap_ndoms(iommu->cap), num+1); 1303 } 1304 1305 if (found) { 1306 clear_bit(num, iommu->domain_ids); 1307 clear_bit(iommu->seq_id, &domain->iommu_bmp); 1308 iommu->domains[num] = NULL; 1309 } 1310 spin_unlock_irqrestore(&iommu->lock, flags); 1311} 1312 1313static struct iova_domain reserved_iova_list; 1314static struct lock_class_key reserved_rbtree_key; 1315 1316static void dmar_init_reserved_ranges(void) 1317{ 1318 struct pci_dev *pdev = NULL; 1319 struct iova *iova; 1320 int i; 1321 1322 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN); 1323 1324 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock, 1325 &reserved_rbtree_key); 1326 1327 /* IOAPIC ranges shouldn't be accessed by DMA */ 1328 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START), 1329 IOVA_PFN(IOAPIC_RANGE_END)); 1330 if (!iova) 1331 printk(KERN_ERR "Reserve IOAPIC range failed\n"); 1332 1333 /* Reserve all PCI MMIO to avoid peer-to-peer access */ 1334 for_each_pci_dev(pdev) { 1335 struct resource *r; 1336 1337 for (i = 0; i < PCI_NUM_RESOURCES; i++) { 1338 r = &pdev->resource[i]; 1339 if (!r->flags || !(r->flags & IORESOURCE_MEM)) 1340 continue; 1341 iova = reserve_iova(&reserved_iova_list, 1342 IOVA_PFN(r->start), 1343 IOVA_PFN(r->end)); 1344 if (!iova) 1345 printk(KERN_ERR "Reserve iova failed\n"); 1346 } 1347 } 1348 1349} 1350 1351static void domain_reserve_special_ranges(struct dmar_domain *domain) 1352{ 1353 copy_reserved_iova(&reserved_iova_list, &domain->iovad); 1354} 1355 1356static inline int guestwidth_to_adjustwidth(int gaw) 1357{ 1358 int agaw; 1359 int r = (gaw - 12) % 9; 1360 1361 if (r == 0) 1362 agaw = gaw; 1363 else 1364 agaw = gaw + 9 - r; 1365 if (agaw > 64) 1366 agaw = 64; 1367 return agaw; 1368} 1369 1370static int domain_init(struct dmar_domain *domain, int guest_width) 1371{ 1372 struct intel_iommu *iommu; 1373 int adjust_width, agaw; 1374 unsigned long sagaw; 1375 1376 init_iova_domain(&domain->iovad, DMA_32BIT_PFN); 1377 spin_lock_init(&domain->iommu_lock); 1378 1379 domain_reserve_special_ranges(domain); 1380 1381 /* calculate AGAW */ 1382 iommu = domain_get_iommu(domain); 1383 if (guest_width > cap_mgaw(iommu->cap)) 1384 guest_width = cap_mgaw(iommu->cap); 1385 domain->gaw = guest_width; 1386 adjust_width = guestwidth_to_adjustwidth(guest_width); 1387 agaw = width_to_agaw(adjust_width); 1388 sagaw = cap_sagaw(iommu->cap); 1389 if (!test_bit(agaw, &sagaw)) { 1390 /* hardware doesn't support it, choose a bigger one */ 1391 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw); 1392 agaw = find_next_bit(&sagaw, 5, agaw); 1393 if (agaw >= 5) 1394 return -ENODEV; 1395 } 1396 domain->agaw = agaw; 1397 INIT_LIST_HEAD(&domain->devices); 1398 1399 if (ecap_coherent(iommu->ecap)) 1400 domain->iommu_coherency = 1; 1401 else 1402 domain->iommu_coherency = 0; 1403 1404 if (ecap_sc_support(iommu->ecap)) 1405 domain->iommu_snooping = 1; 1406 else 1407 domain->iommu_snooping = 0; 1408 1409 domain->iommu_count = 1; 1410 domain->nid = iommu->node; 1411 1412 /* always allocate the top pgd */ 1413 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); 1414 if (!domain->pgd) 1415 return -ENOMEM; 1416 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE); 1417 return 0; 1418} 1419 1420static void domain_exit(struct dmar_domain *domain) 1421{ 1422 struct dmar_drhd_unit *drhd; 1423 struct intel_iommu *iommu; 1424 1425 /* Domain 0 is reserved, so dont process it */ 1426 if (!domain) 1427 return; 1428 1429 domain_remove_dev_info(domain); 1430 /* destroy iovas */ 1431 put_iova_domain(&domain->iovad); 1432 1433 /* clear ptes */ 1434 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); 1435 1436 /* free page tables */ 1437 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); 1438 1439 for_each_active_iommu(iommu, drhd) 1440 if (test_bit(iommu->seq_id, &domain->iommu_bmp)) 1441 iommu_detach_domain(domain, iommu); 1442 1443 free_domain_mem(domain); 1444} 1445 1446static int domain_context_mapping_one(struct dmar_domain *domain, int segment, 1447 u8 bus, u8 devfn, int translation) 1448{ 1449 struct context_entry *context; 1450 unsigned long flags; 1451 struct intel_iommu *iommu; 1452 struct dma_pte *pgd; 1453 unsigned long num; 1454 unsigned long ndomains; 1455 int id; 1456 int agaw; 1457 struct device_domain_info *info = NULL; 1458 1459 pr_debug("Set context mapping for %02x:%02x.%d\n", 1460 bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); 1461 1462 BUG_ON(!domain->pgd); 1463 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH && 1464 translation != CONTEXT_TT_MULTI_LEVEL); 1465 1466 iommu = device_to_iommu(segment, bus, devfn); 1467 if (!iommu) 1468 return -ENODEV; 1469 1470 context = device_to_context_entry(iommu, bus, devfn); 1471 if (!context) 1472 return -ENOMEM; 1473 spin_lock_irqsave(&iommu->lock, flags); 1474 if (context_present(context)) { 1475 spin_unlock_irqrestore(&iommu->lock, flags); 1476 return 0; 1477 } 1478 1479 id = domain->id; 1480 pgd = domain->pgd; 1481 1482 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE || 1483 domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) { 1484 int found = 0; 1485 1486 /* find an available domain id for this device in iommu */ 1487 ndomains = cap_ndoms(iommu->cap); 1488 num = find_first_bit(iommu->domain_ids, ndomains); 1489 for (; num < ndomains; ) { 1490 if (iommu->domains[num] == domain) { 1491 id = num; 1492 found = 1; 1493 break; 1494 } 1495 num = find_next_bit(iommu->domain_ids, 1496 cap_ndoms(iommu->cap), num+1); 1497 } 1498 1499 if (found == 0) { 1500 num = find_first_zero_bit(iommu->domain_ids, ndomains); 1501 if (num >= ndomains) { 1502 spin_unlock_irqrestore(&iommu->lock, flags); 1503 printk(KERN_ERR "IOMMU: no free domain ids\n"); 1504 return -EFAULT; 1505 } 1506 1507 set_bit(num, iommu->domain_ids); 1508 iommu->domains[num] = domain; 1509 id = num; 1510 } 1511 1512 /* Skip top levels of page tables for 1513 * iommu which has less agaw than default. 1514 * Unnecessary for PT mode. 1515 */ 1516 if (translation != CONTEXT_TT_PASS_THROUGH) { 1517 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) { 1518 pgd = phys_to_virt(dma_pte_addr(pgd)); 1519 if (!dma_pte_present(pgd)) { 1520 spin_unlock_irqrestore(&iommu->lock, flags); 1521 return -ENOMEM; 1522 } 1523 } 1524 } 1525 } 1526 1527 context_set_domain_id(context, id); 1528 1529 if (translation != CONTEXT_TT_PASS_THROUGH) { 1530 info = iommu_support_dev_iotlb(domain, segment, bus, devfn); 1531 translation = info ? CONTEXT_TT_DEV_IOTLB : 1532 CONTEXT_TT_MULTI_LEVEL; 1533 } 1534 /* 1535 * In pass through mode, AW must be programmed to indicate the largest 1536 * AGAW value supported by hardware. And ASR is ignored by hardware. 1537 */ 1538 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH)) 1539 context_set_address_width(context, iommu->msagaw); 1540 else { 1541 context_set_address_root(context, virt_to_phys(pgd)); 1542 context_set_address_width(context, iommu->agaw); 1543 } 1544 1545 context_set_translation_type(context, translation); 1546 context_set_fault_enable(context); 1547 context_set_present(context); 1548 domain_flush_cache(domain, context, sizeof(*context)); 1549 1550 /* 1551 * It's a non-present to present mapping. If hardware doesn't cache 1552 * non-present entry we only need to flush the write-buffer. If the 1553 * _does_ cache non-present entries, then it does so in the special 1554 * domain #0, which we have to flush: 1555 */ 1556 if (cap_caching_mode(iommu->cap)) { 1557 iommu->flush.flush_context(iommu, 0, 1558 (((u16)bus) << 8) | devfn, 1559 DMA_CCMD_MASK_NOBIT, 1560 DMA_CCMD_DEVICE_INVL); 1561 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH); 1562 } else { 1563 iommu_flush_write_buffer(iommu); 1564 } 1565 iommu_enable_dev_iotlb(info); 1566 spin_unlock_irqrestore(&iommu->lock, flags); 1567 1568 spin_lock_irqsave(&domain->iommu_lock, flags); 1569 if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) { 1570 domain->iommu_count++; 1571 if (domain->iommu_count == 1) 1572 domain->nid = iommu->node; 1573 domain_update_iommu_cap(domain); 1574 } 1575 spin_unlock_irqrestore(&domain->iommu_lock, flags); 1576 return 0; 1577} 1578 1579static int 1580domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev, 1581 int translation) 1582{ 1583 int ret; 1584 struct pci_dev *tmp, *parent; 1585 1586 ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus), 1587 pdev->bus->number, pdev->devfn, 1588 translation); 1589 if (ret) 1590 return ret; 1591 1592 /* dependent device mapping */ 1593 tmp = pci_find_upstream_pcie_bridge(pdev); 1594 if (!tmp) 1595 return 0; 1596 /* Secondary interface's bus number and devfn 0 */ 1597 parent = pdev->bus->self; 1598 while (parent != tmp) { 1599 ret = domain_context_mapping_one(domain, 1600 pci_domain_nr(parent->bus), 1601 parent->bus->number, 1602 parent->devfn, translation); 1603 if (ret) 1604 return ret; 1605 parent = parent->bus->self; 1606 } 1607 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */ 1608 return domain_context_mapping_one(domain, 1609 pci_domain_nr(tmp->subordinate), 1610 tmp->subordinate->number, 0, 1611 translation); 1612 else /* this is a legacy PCI bridge */ 1613 return domain_context_mapping_one(domain, 1614 pci_domain_nr(tmp->bus), 1615 tmp->bus->number, 1616 tmp->devfn, 1617 translation); 1618} 1619 1620static int domain_context_mapped(struct pci_dev *pdev) 1621{ 1622 int ret; 1623 struct pci_dev *tmp, *parent; 1624 struct intel_iommu *iommu; 1625 1626 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number, 1627 pdev->devfn); 1628 if (!iommu) 1629 return -ENODEV; 1630 1631 ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn); 1632 if (!ret) 1633 return ret; 1634 /* dependent device mapping */ 1635 tmp = pci_find_upstream_pcie_bridge(pdev); 1636 if (!tmp) 1637 return ret; 1638 /* Secondary interface's bus number and devfn 0 */ 1639 parent = pdev->bus->self; 1640 while (parent != tmp) { 1641 ret = device_context_mapped(iommu, parent->bus->number, 1642 parent->devfn); 1643 if (!ret) 1644 return ret; 1645 parent = parent->bus->self; 1646 } 1647 if (pci_is_pcie(tmp)) 1648 return device_context_mapped(iommu, tmp->subordinate->number, 1649 0); 1650 else 1651 return device_context_mapped(iommu, tmp->bus->number, 1652 tmp->devfn); 1653} 1654 1655/* Returns a number of VTD pages, but aligned to MM page size */ 1656static inline unsigned long aligned_nrpages(unsigned long host_addr, 1657 size_t size) 1658{ 1659 host_addr &= ~PAGE_MASK; 1660 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT; 1661} 1662 1663static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, 1664 struct scatterlist *sg, unsigned long phys_pfn, 1665 unsigned long nr_pages, int prot) 1666{ 1667 struct dma_pte *first_pte = NULL, *pte = NULL; 1668 phys_addr_t uninitialized_var(pteval); 1669 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; 1670 unsigned long sg_res; 1671 1672 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width); 1673 1674 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) 1675 return -EINVAL; 1676 1677 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP; 1678 1679 if (sg) 1680 sg_res = 0; 1681 else { 1682 sg_res = nr_pages + 1; 1683 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot; 1684 } 1685 1686 while (nr_pages--) { 1687 uint64_t tmp; 1688 1689 if (!sg_res) { 1690 sg_res = aligned_nrpages(sg->offset, sg->length); 1691 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset; 1692 sg->dma_length = sg->length; 1693 pteval = page_to_phys(sg_page(sg)) | prot; 1694 } 1695 if (!pte) { 1696 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn); 1697 if (!pte) 1698 return -ENOMEM; 1699 } 1700 /* We don't need lock here, nobody else 1701 * touches the iova range 1702 */ 1703 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval); 1704 if (tmp) { 1705 static int dumps = 5; 1706 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n", 1707 iov_pfn, tmp, (unsigned long long)pteval); 1708 if (dumps) { 1709 dumps--; 1710 debug_dma_dump_mappings(NULL); 1711 } 1712 WARN_ON(1); 1713 } 1714 pte++; 1715 if (!nr_pages || first_pte_in_page(pte)) { 1716 domain_flush_cache(domain, first_pte, 1717 (void *)pte - (void *)first_pte); 1718 pte = NULL; 1719 } 1720 iov_pfn++; 1721 pteval += VTD_PAGE_SIZE; 1722 sg_res--; 1723 if (!sg_res) 1724 sg = sg_next(sg); 1725 } 1726 return 0; 1727} 1728 1729static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn, 1730 struct scatterlist *sg, unsigned long nr_pages, 1731 int prot) 1732{ 1733 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot); 1734} 1735 1736static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn, 1737 unsigned long phys_pfn, unsigned long nr_pages, 1738 int prot) 1739{ 1740 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot); 1741} 1742 1743static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn) 1744{ 1745 if (!iommu) 1746 return; 1747 1748 clear_context_table(iommu, bus, devfn); 1749 iommu->flush.flush_context(iommu, 0, 0, 0, 1750 DMA_CCMD_GLOBAL_INVL); 1751 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); 1752} 1753 1754static void domain_remove_dev_info(struct dmar_domain *domain) 1755{ 1756 struct device_domain_info *info; 1757 unsigned long flags; 1758 struct intel_iommu *iommu; 1759 1760 spin_lock_irqsave(&device_domain_lock, flags); 1761 while (!list_empty(&domain->devices)) { 1762 info = list_entry(domain->devices.next, 1763 struct device_domain_info, link); 1764 list_del(&info->link); 1765 list_del(&info->global); 1766 if (info->dev) 1767 info->dev->dev.archdata.iommu = NULL; 1768 spin_unlock_irqrestore(&device_domain_lock, flags); 1769 1770 iommu_disable_dev_iotlb(info); 1771 iommu = device_to_iommu(info->segment, info->bus, info->devfn); 1772 iommu_detach_dev(iommu, info->bus, info->devfn); 1773 free_devinfo_mem(info); 1774 1775 spin_lock_irqsave(&device_domain_lock, flags); 1776 } 1777 spin_unlock_irqrestore(&device_domain_lock, flags); 1778} 1779 1780/* 1781 * find_domain 1782 * Note: we use struct pci_dev->dev.archdata.iommu stores the info 1783 */ 1784static struct dmar_domain * 1785find_domain(struct pci_dev *pdev) 1786{ 1787 struct device_domain_info *info; 1788 1789 /* No lock here, assumes no domain exit in normal case */ 1790 info = pdev->dev.archdata.iommu; 1791 if (info) 1792 return info->domain; 1793 return NULL; 1794} 1795 1796/* domain is initialized */ 1797static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) 1798{ 1799 struct dmar_domain *domain, *found = NULL; 1800 struct intel_iommu *iommu; 1801 struct dmar_drhd_unit *drhd; 1802 struct device_domain_info *info, *tmp; 1803 struct pci_dev *dev_tmp; 1804 unsigned long flags; 1805 int bus = 0, devfn = 0; 1806 int segment; 1807 int ret; 1808 1809 domain = find_domain(pdev); 1810 if (domain) 1811 return domain; 1812 1813 segment = pci_domain_nr(pdev->bus); 1814 1815 dev_tmp = pci_find_upstream_pcie_bridge(pdev); 1816 if (dev_tmp) { 1817 if (pci_is_pcie(dev_tmp)) { 1818 bus = dev_tmp->subordinate->number; 1819 devfn = 0; 1820 } else { 1821 bus = dev_tmp->bus->number; 1822 devfn = dev_tmp->devfn; 1823 } 1824 spin_lock_irqsave(&device_domain_lock, flags); 1825 list_for_each_entry(info, &device_domain_list, global) { 1826 if (info->segment == segment && 1827 info->bus == bus && info->devfn == devfn) { 1828 found = info->domain; 1829 break; 1830 } 1831 } 1832 spin_unlock_irqrestore(&device_domain_lock, flags); 1833 /* pcie-pci bridge already has a domain, uses it */ 1834 if (found) { 1835 domain = found; 1836 goto found_domain; 1837 } 1838 } 1839 1840 domain = alloc_domain(); 1841 if (!domain) 1842 goto error; 1843 1844 /* Allocate new domain for the device */ 1845 drhd = dmar_find_matched_drhd_unit(pdev); 1846 if (!drhd) { 1847 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n", 1848 pci_name(pdev)); 1849 return NULL; 1850 } 1851 iommu = drhd->iommu; 1852 1853 ret = iommu_attach_domain(domain, iommu); 1854 if (ret) { 1855 domain_exit(domain); 1856 goto error; 1857 } 1858 1859 if (domain_init(domain, gaw)) { 1860 domain_exit(domain); 1861 goto error; 1862 } 1863 1864 /* register pcie-to-pci device */ 1865 if (dev_tmp) { 1866 info = alloc_devinfo_mem(); 1867 if (!info) { 1868 domain_exit(domain); 1869 goto error; 1870 } 1871 info->segment = segment; 1872 info->bus = bus; 1873 info->devfn = devfn; 1874 info->dev = NULL; 1875 info->domain = domain; 1876 /* This domain is shared by devices under p2p bridge */ 1877 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES; 1878 1879 /* pcie-to-pci bridge already has a domain, uses it */ 1880 found = NULL; 1881 spin_lock_irqsave(&device_domain_lock, flags); 1882 list_for_each_entry(tmp, &device_domain_list, global) { 1883 if (tmp->segment == segment && 1884 tmp->bus == bus && tmp->devfn == devfn) { 1885 found = tmp->domain; 1886 break; 1887 } 1888 } 1889 if (found) { 1890 free_devinfo_mem(info); 1891 domain_exit(domain); 1892 domain = found; 1893 } else { 1894 list_add(&info->link, &domain->devices); 1895 list_add(&info->global, &device_domain_list); 1896 } 1897 spin_unlock_irqrestore(&device_domain_lock, flags); 1898 } 1899 1900found_domain: 1901 info = alloc_devinfo_mem(); 1902 if (!info) 1903 goto error; 1904 info->segment = segment; 1905 info->bus = pdev->bus->number; 1906 info->devfn = pdev->devfn; 1907 info->dev = pdev; 1908 info->domain = domain; 1909 spin_lock_irqsave(&device_domain_lock, flags); 1910 /* somebody is fast */ 1911 found = find_domain(pdev); 1912 if (found != NULL) { 1913 spin_unlock_irqrestore(&device_domain_lock, flags); 1914 if (found != domain) { 1915 domain_exit(domain); 1916 domain = found; 1917 } 1918 free_devinfo_mem(info); 1919 return domain; 1920 } 1921 list_add(&info->link, &domain->devices); 1922 list_add(&info->global, &device_domain_list); 1923 pdev->dev.archdata.iommu = info; 1924 spin_unlock_irqrestore(&device_domain_lock, flags); 1925 return domain; 1926error: 1927 /* recheck it here, maybe others set it */ 1928 return find_domain(pdev); 1929} 1930 1931static int iommu_identity_mapping; 1932#define IDENTMAP_ALL 1 1933#define IDENTMAP_GFX 2 1934#define IDENTMAP_AZALIA 4 1935 1936static int iommu_domain_identity_map(struct dmar_domain *domain, 1937 unsigned long long start, 1938 unsigned long long end) 1939{ 1940 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT; 1941 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT; 1942 1943 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn), 1944 dma_to_mm_pfn(last_vpfn))) { 1945 printk(KERN_ERR "IOMMU: reserve iova failed\n"); 1946 return -ENOMEM; 1947 } 1948 1949 pr_debug("Mapping reserved region %llx-%llx for domain %d\n", 1950 start, end, domain->id); 1951 /* 1952 * RMRR range might have overlap with physical memory range, 1953 * clear it first 1954 */ 1955 dma_pte_clear_range(domain, first_vpfn, last_vpfn); 1956 1957 return domain_pfn_mapping(domain, first_vpfn, first_vpfn, 1958 last_vpfn - first_vpfn + 1, 1959 DMA_PTE_READ|DMA_PTE_WRITE); 1960} 1961 1962static int iommu_prepare_identity_map(struct pci_dev *pdev, 1963 unsigned long long start, 1964 unsigned long long end) 1965{ 1966 struct dmar_domain *domain; 1967 int ret; 1968 1969 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); 1970 if (!domain) 1971 return -ENOMEM; 1972 1973 /* For _hardware_ passthrough, don't bother. But for software 1974 passthrough, we do it anyway -- it may indicate a memory 1975 range which is reserved in E820, so which didn't get set 1976 up to start with in si_domain */ 1977 if (domain == si_domain && hw_pass_through) { 1978 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n", 1979 pci_name(pdev), start, end); 1980 return 0; 1981 } 1982 1983 printk(KERN_INFO 1984 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", 1985 pci_name(pdev), start, end); 1986 1987 if (end < start) { 1988 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n" 1989 "BIOS vendor: %s; Ver: %s; Product Version: %s\n", 1990 dmi_get_system_info(DMI_BIOS_VENDOR), 1991 dmi_get_system_info(DMI_BIOS_VERSION), 1992 dmi_get_system_info(DMI_PRODUCT_VERSION)); 1993 ret = -EIO; 1994 goto error; 1995 } 1996 1997 if (end >> agaw_to_width(domain->agaw)) { 1998 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n" 1999 "BIOS vendor: %s; Ver: %s; Product Version: %s\n", 2000 agaw_to_width(domain->agaw), 2001 dmi_get_system_info(DMI_BIOS_VENDOR), 2002 dmi_get_system_info(DMI_BIOS_VERSION), 2003 dmi_get_system_info(DMI_PRODUCT_VERSION)); 2004 ret = -EIO; 2005 goto error; 2006 } 2007 2008 ret = iommu_domain_identity_map(domain, start, end); 2009 if (ret) 2010 goto error; 2011 2012 /* context entry init */ 2013 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL); 2014 if (ret) 2015 goto error; 2016 2017 return 0; 2018 2019 error: 2020 domain_exit(domain); 2021 return ret; 2022} 2023 2024static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, 2025 struct pci_dev *pdev) 2026{ 2027 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) 2028 return 0; 2029 return iommu_prepare_identity_map(pdev, rmrr->base_address, 2030 rmrr->end_address + 1); 2031} 2032 2033#ifdef CONFIG_DMAR_FLOPPY_WA 2034static inline void iommu_prepare_isa(void) 2035{ 2036 struct pci_dev *pdev; 2037 int ret; 2038 2039 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL); 2040 if (!pdev) 2041 return; 2042 2043 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n"); 2044 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024); 2045 2046 if (ret) 2047 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; " 2048 "floppy might not work\n"); 2049 2050} 2051#else 2052static inline void iommu_prepare_isa(void) 2053{ 2054 return; 2055} 2056#endif /* !CONFIG_DMAR_FLPY_WA */ 2057 2058static int md_domain_init(struct dmar_domain *domain, int guest_width); 2059 2060static int __init si_domain_work_fn(unsigned long start_pfn, 2061 unsigned long end_pfn, void *datax) 2062{ 2063 int *ret = datax; 2064 2065 *ret = iommu_domain_identity_map(si_domain, 2066 (uint64_t)start_pfn << PAGE_SHIFT, 2067 (uint64_t)end_pfn << PAGE_SHIFT); 2068 return *ret; 2069 2070} 2071 2072static int __init si_domain_init(int hw) 2073{ 2074 struct dmar_drhd_unit *drhd; 2075 struct intel_iommu *iommu; 2076 int nid, ret = 0; 2077 2078 si_domain = alloc_domain(); 2079 if (!si_domain) 2080 return -EFAULT; 2081 2082 pr_debug("Identity mapping domain is domain %d\n", si_domain->id); 2083 2084 for_each_active_iommu(iommu, drhd) { 2085 ret = iommu_attach_domain(si_domain, iommu); 2086 if (ret) { 2087 domain_exit(si_domain); 2088 return -EFAULT; 2089 } 2090 } 2091 2092 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { 2093 domain_exit(si_domain); 2094 return -EFAULT; 2095 } 2096 2097 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY; 2098 2099 if (hw) 2100 return 0; 2101 2102 for_each_online_node(nid) { 2103 work_with_active_regions(nid, si_domain_work_fn, &ret); 2104 if (ret) 2105 return ret; 2106 } 2107 2108 return 0; 2109} 2110 2111static void domain_remove_one_dev_info(struct dmar_domain *domain, 2112 struct pci_dev *pdev); 2113static int identity_mapping(struct pci_dev *pdev) 2114{ 2115 struct device_domain_info *info; 2116 2117 if (likely(!iommu_identity_mapping)) 2118 return 0; 2119 2120 2121 list_for_each_entry(info, &si_domain->devices, link) 2122 if (info->dev == pdev) 2123 return 1; 2124 return 0; 2125} 2126 2127static int domain_add_dev_info(struct dmar_domain *domain, 2128 struct pci_dev *pdev, 2129 int translation) 2130{ 2131 struct device_domain_info *info; 2132 unsigned long flags; 2133 int ret; 2134 2135 info = alloc_devinfo_mem(); 2136 if (!info) 2137 return -ENOMEM; 2138 2139 ret = domain_context_mapping(domain, pdev, translation); 2140 if (ret) { 2141 free_devinfo_mem(info); 2142 return ret; 2143 } 2144 2145 info->segment = pci_domain_nr(pdev->bus); 2146 info->bus = pdev->bus->number; 2147 info->devfn = pdev->devfn; 2148 info->dev = pdev; 2149 info->domain = domain; 2150 2151 spin_lock_irqsave(&device_domain_lock, flags); 2152 list_add(&info->link, &domain->devices); 2153 list_add(&info->global, &device_domain_list); 2154 pdev->dev.archdata.iommu = info; 2155 spin_unlock_irqrestore(&device_domain_lock, flags); 2156 2157 return 0; 2158} 2159 2160static int iommu_should_identity_map(struct pci_dev *pdev, int startup) 2161{ 2162 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev)) 2163 return 1; 2164 2165 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev)) 2166 return 1; 2167 2168 if (!(iommu_identity_mapping & IDENTMAP_ALL)) 2169 return 0; 2170 2171 /* 2172 * We want to start off with all devices in the 1:1 domain, and 2173 * take them out later if we find they can't access all of memory. 2174 * 2175 * However, we can't do this for PCI devices behind bridges, 2176 * because all PCI devices behind the same bridge will end up 2177 * with the same source-id on their transactions. 2178 * 2179 * Practically speaking, we can't change things around for these 2180 * devices at run-time, because we can't be sure there'll be no 2181 * DMA transactions in flight for any of their siblings. 2182 * 2183 * So PCI devices (unless they're on the root bus) as well as 2184 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of 2185 * the 1:1 domain, just in _case_ one of their siblings turns out 2186 * not to be able to map all of memory. 2187 */ 2188 if (!pci_is_pcie(pdev)) { 2189 if (!pci_is_root_bus(pdev->bus)) 2190 return 0; 2191 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI) 2192 return 0; 2193 } else if (pdev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE) 2194 return 0; 2195 2196 /* 2197 * At boot time, we don't yet know if devices will be 64-bit capable. 2198 * Assume that they will -- if they turn out not to be, then we can 2199 * take them out of the 1:1 domain later. 2200 */ 2201 if (!startup) 2202 return pdev->dma_mask > DMA_BIT_MASK(32); 2203 2204 return 1; 2205} 2206 2207static int __init iommu_prepare_static_identity_mapping(int hw) 2208{ 2209 struct pci_dev *pdev = NULL; 2210 int ret; 2211 2212 ret = si_domain_init(hw); 2213 if (ret) 2214 return -EFAULT; 2215 2216 for_each_pci_dev(pdev) { 2217 if (iommu_should_identity_map(pdev, 1)) { 2218 printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n", 2219 hw ? "hardware" : "software", pci_name(pdev)); 2220 2221 ret = domain_add_dev_info(si_domain, pdev, 2222 hw ? CONTEXT_TT_PASS_THROUGH : 2223 CONTEXT_TT_MULTI_LEVEL); 2224 if (ret) 2225 return ret; 2226 } 2227 } 2228 2229 return 0; 2230} 2231 2232int __init init_dmars(void) 2233{ 2234 struct dmar_drhd_unit *drhd; 2235 struct dmar_rmrr_unit *rmrr; 2236 struct pci_dev *pdev; 2237 struct intel_iommu *iommu; 2238 int i, ret; 2239 2240 /* 2241 * for each drhd 2242 * allocate root 2243 * initialize and program root entry to not present 2244 * endfor 2245 */ 2246 for_each_drhd_unit(drhd) { 2247 g_num_of_iommus++; 2248 /* 2249 * lock not needed as this is only incremented in the single 2250 * threaded kernel __init code path all other access are read 2251 * only 2252 */ 2253 } 2254 2255 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *), 2256 GFP_KERNEL); 2257 if (!g_iommus) { 2258 printk(KERN_ERR "Allocating global iommu array failed\n"); 2259 ret = -ENOMEM; 2260 goto error; 2261 } 2262 2263 deferred_flush = kzalloc(g_num_of_iommus * 2264 sizeof(struct deferred_flush_tables), GFP_KERNEL); 2265 if (!deferred_flush) { 2266 ret = -ENOMEM; 2267 goto error; 2268 } 2269 2270 for_each_drhd_unit(drhd) { 2271 if (drhd->ignored) 2272 continue; 2273 2274 iommu = drhd->iommu; 2275 g_iommus[iommu->seq_id] = iommu; 2276 2277 ret = iommu_init_domains(iommu); 2278 if (ret) 2279 goto error; 2280 2281 /* 2282 * TBD: 2283 * we could share the same root & context tables 2284 * amoung all IOMMU's. Need to Split it later. 2285 */ 2286 ret = iommu_alloc_root_entry(iommu); 2287 if (ret) { 2288 printk(KERN_ERR "IOMMU: allocate root entry failed\n"); 2289 goto error; 2290 } 2291 if (!ecap_pass_through(iommu->ecap)) 2292 hw_pass_through = 0; 2293 } 2294 2295 /* 2296 * Start from the sane iommu hardware state. 2297 */ 2298 for_each_drhd_unit(drhd) { 2299 if (drhd->ignored) 2300 continue; 2301 2302 iommu = drhd->iommu; 2303 2304 /* 2305 * If the queued invalidation is already initialized by us 2306 * (for example, while enabling interrupt-remapping) then 2307 * we got the things already rolling from a sane state. 2308 */ 2309 if (iommu->qi) 2310 continue; 2311 2312 /* 2313 * Clear any previous faults. 2314 */ 2315 dmar_fault(-1, iommu); 2316 /* 2317 * Disable queued invalidation if supported and already enabled 2318 * before OS handover. 2319 */ 2320 dmar_disable_qi(iommu); 2321 } 2322 2323 for_each_drhd_unit(drhd) { 2324 if (drhd->ignored) 2325 continue; 2326 2327 iommu = drhd->iommu; 2328 2329 if (dmar_enable_qi(iommu)) { 2330 /* 2331 * Queued Invalidate not enabled, use Register Based 2332 * Invalidate 2333 */ 2334 iommu->flush.flush_context = __iommu_flush_context; 2335 iommu->flush.flush_iotlb = __iommu_flush_iotlb; 2336 printk(KERN_INFO "IOMMU 0x%Lx: using Register based " 2337 "invalidation\n", 2338 (unsigned long long)drhd->reg_base_addr); 2339 } else { 2340 iommu->flush.flush_context = qi_flush_context; 2341 iommu->flush.flush_iotlb = qi_flush_iotlb; 2342 printk(KERN_INFO "IOMMU 0x%Lx: using Queued " 2343 "invalidation\n", 2344 (unsigned long long)drhd->reg_base_addr); 2345 } 2346 } 2347 2348 if (iommu_pass_through) 2349 iommu_identity_mapping |= IDENTMAP_ALL; 2350 2351#ifdef CONFIG_DMAR_BROKEN_GFX_WA 2352 iommu_identity_mapping |= IDENTMAP_GFX; 2353#endif 2354 2355 check_tylersburg_isoch(); 2356 2357 /* 2358 * If pass through is not set or not enabled, setup context entries for 2359 * identity mappings for rmrr, gfx, and isa and may fall back to static 2360 * identity mapping if iommu_identity_mapping is set. 2361 */ 2362 if (iommu_identity_mapping) { 2363 ret = iommu_prepare_static_identity_mapping(hw_pass_through); 2364 if (ret) { 2365 printk(KERN_CRIT "Failed to setup IOMMU pass-through\n"); 2366 goto error; 2367 } 2368 } 2369 /* 2370 * For each rmrr 2371 * for each dev attached to rmrr 2372 * do 2373 * locate drhd for dev, alloc domain for dev 2374 * allocate free domain 2375 * allocate page table entries for rmrr 2376 * if context not allocated for bus 2377 * allocate and init context 2378 * set present in root table for this bus 2379 * init context with domain, translation etc 2380 * endfor 2381 * endfor 2382 */ 2383 printk(KERN_INFO "IOMMU: Setting RMRR:\n"); 2384 for_each_rmrr_units(rmrr) { 2385 for (i = 0; i < rmrr->devices_cnt; i++) { 2386 pdev = rmrr->devices[i]; 2387 /* 2388 * some BIOS lists non-exist devices in DMAR 2389 * table. 2390 */ 2391 if (!pdev) 2392 continue; 2393 ret = iommu_prepare_rmrr_dev(rmrr, pdev); 2394 if (ret) 2395 printk(KERN_ERR 2396 "IOMMU: mapping reserved region failed\n"); 2397 } 2398 } 2399 2400 iommu_prepare_isa(); 2401 2402 /* 2403 * for each drhd 2404 * enable fault log 2405 * global invalidate context cache 2406 * global invalidate iotlb 2407 * enable translation 2408 */ 2409 for_each_drhd_unit(drhd) { 2410 if (drhd->ignored) 2411 continue; 2412 iommu = drhd->iommu; 2413 2414 iommu_flush_write_buffer(iommu); 2415 2416 ret = dmar_set_interrupt(iommu); 2417 if (ret) 2418 goto error; 2419 2420 iommu_set_root_entry(iommu); 2421 2422 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); 2423 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); 2424 2425 ret = iommu_enable_translation(iommu); 2426 if (ret) 2427 goto error; 2428 2429 iommu_disable_protect_mem_regions(iommu); 2430 } 2431 2432 return 0; 2433error: 2434 for_each_drhd_unit(drhd) { 2435 if (drhd->ignored) 2436 continue; 2437 iommu = drhd->iommu; 2438 free_iommu(iommu); 2439 } 2440 kfree(g_iommus); 2441 return ret; 2442} 2443 2444/* This takes a number of _MM_ pages, not VTD pages */ 2445static struct iova *intel_alloc_iova(struct device *dev, 2446 struct dmar_domain *domain, 2447 unsigned long nrpages, uint64_t dma_mask) 2448{ 2449 struct pci_dev *pdev = to_pci_dev(dev); 2450 struct iova *iova = NULL; 2451 2452 /* Restrict dma_mask to the width that the iommu can handle */ 2453 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask); 2454 2455 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) { 2456 /* 2457 * First try to allocate an io virtual address in 2458 * DMA_BIT_MASK(32) and if that fails then try allocating 2459 * from higher range 2460 */ 2461 iova = alloc_iova(&domain->iovad, nrpages, 2462 IOVA_PFN(DMA_BIT_MASK(32)), 1); 2463 if (iova) 2464 return iova; 2465 } 2466 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1); 2467 if (unlikely(!iova)) { 2468 printk(KERN_ERR "Allocating %ld-page iova for %s failed", 2469 nrpages, pci_name(pdev)); 2470 return NULL; 2471 } 2472 2473 return iova; 2474} 2475 2476static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev) 2477{ 2478 struct dmar_domain *domain; 2479 int ret; 2480 2481 domain = get_domain_for_dev(pdev, 2482 DEFAULT_DOMAIN_ADDRESS_WIDTH); 2483 if (!domain) { 2484 printk(KERN_ERR 2485 "Allocating domain for %s failed", pci_name(pdev)); 2486 return NULL; 2487 } 2488 2489 /* make sure context mapping is ok */ 2490 if (unlikely(!domain_context_mapped(pdev))) { 2491 ret = domain_context_mapping(domain, pdev, 2492 CONTEXT_TT_MULTI_LEVEL); 2493 if (ret) { 2494 printk(KERN_ERR 2495 "Domain context map for %s failed", 2496 pci_name(pdev)); 2497 return NULL; 2498 } 2499 } 2500 2501 return domain; 2502} 2503 2504static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev) 2505{ 2506 struct device_domain_info *info; 2507 2508 /* No lock here, assumes no domain exit in normal case */ 2509 info = dev->dev.archdata.iommu; 2510 if (likely(info)) 2511 return info->domain; 2512 2513 return __get_valid_domain_for_dev(dev); 2514} 2515 2516static int iommu_dummy(struct pci_dev *pdev) 2517{ 2518 return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO; 2519} 2520 2521/* Check if the pdev needs to go through non-identity map and unmap process.*/ 2522static int iommu_no_mapping(struct device *dev) 2523{ 2524 struct pci_dev *pdev; 2525 int found; 2526 2527 if (unlikely(dev->bus != &pci_bus_type)) 2528 return 1; 2529 2530 pdev = to_pci_dev(dev); 2531 if (iommu_dummy(pdev)) 2532 return 1; 2533 2534 if (!iommu_identity_mapping) 2535 return 0; 2536 2537 found = identity_mapping(pdev); 2538 if (found) { 2539 if (iommu_should_identity_map(pdev, 0)) 2540 return 1; 2541 else { 2542 /* 2543 * 32 bit DMA is removed from si_domain and fall back 2544 * to non-identity mapping. 2545 */ 2546 domain_remove_one_dev_info(si_domain, pdev); 2547 printk(KERN_INFO "32bit %s uses non-identity mapping\n", 2548 pci_name(pdev)); 2549 return 0; 2550 } 2551 } else { 2552 /* 2553 * In case of a detached 64 bit DMA device from vm, the device 2554 * is put into si_domain for identity mapping. 2555 */ 2556 if (iommu_should_identity_map(pdev, 0)) { 2557 int ret; 2558 ret = domain_add_dev_info(si_domain, pdev, 2559 hw_pass_through ? 2560 CONTEXT_TT_PASS_THROUGH : 2561 CONTEXT_TT_MULTI_LEVEL); 2562 if (!ret) { 2563 printk(KERN_INFO "64bit %s uses identity mapping\n", 2564 pci_name(pdev)); 2565 return 1; 2566 } 2567 } 2568 } 2569 2570 return 0; 2571} 2572 2573static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, 2574 size_t size, int dir, u64 dma_mask) 2575{ 2576 struct pci_dev *pdev = to_pci_dev(hwdev); 2577 struct dmar_domain *domain; 2578 phys_addr_t start_paddr; 2579 struct iova *iova; 2580 int prot = 0; 2581 int ret; 2582 struct intel_iommu *iommu; 2583 unsigned long paddr_pfn = paddr >> PAGE_SHIFT; 2584 2585 BUG_ON(dir == DMA_NONE); 2586 2587 if (iommu_no_mapping(hwdev)) 2588 return paddr; 2589 2590 domain = get_valid_domain_for_dev(pdev); 2591 if (!domain) 2592 return 0; 2593 2594 iommu = domain_get_iommu(domain); 2595 size = aligned_nrpages(paddr, size); 2596 2597 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), 2598 pdev->dma_mask); 2599 if (!iova) 2600 goto error; 2601 2602 /* 2603 * Check if DMAR supports zero-length reads on write only 2604 * mappings.. 2605 */ 2606 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \ 2607 !cap_zlr(iommu->cap)) 2608 prot |= DMA_PTE_READ; 2609 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) 2610 prot |= DMA_PTE_WRITE; 2611 /* 2612 * paddr - (paddr + size) might be partial page, we should map the whole 2613 * page. Note: if two part of one page are separately mapped, we 2614 * might have two guest_addr mapping to the same host paddr, but this 2615 * is not a big problem 2616 */ 2617 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo), 2618 mm_to_dma_pfn(paddr_pfn), size, prot); 2619 if (ret) 2620 goto error; 2621 2622 /* it's a non-present to present mapping. Only flush if caching mode */ 2623 if (cap_caching_mode(iommu->cap)) 2624 iommu_flush_iotlb_psi(iommu, 0, mm_to_dma_pfn(iova->pfn_lo), size); 2625 else 2626 iommu_flush_write_buffer(iommu); 2627 2628 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT; 2629 start_paddr += paddr & ~PAGE_MASK; 2630 return start_paddr; 2631 2632error: 2633 if (iova) 2634 __free_iova(&domain->iovad, iova); 2635 printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n", 2636 pci_name(pdev), size, (unsigned long long)paddr, dir); 2637 return 0; 2638} 2639 2640static dma_addr_t intel_map_page(struct device *dev, struct page *page, 2641 unsigned long offset, size_t size, 2642 enum dma_data_direction dir, 2643 struct dma_attrs *attrs) 2644{ 2645 return __intel_map_single(dev, page_to_phys(page) + offset, size, 2646 dir, to_pci_dev(dev)->dma_mask); 2647} 2648 2649static void flush_unmaps(void) 2650{ 2651 int i, j; 2652 2653 timer_on = 0; 2654 2655 /* just flush them all */ 2656 for (i = 0; i < g_num_of_iommus; i++) { 2657 struct intel_iommu *iommu = g_iommus[i]; 2658 if (!iommu) 2659 continue; 2660 2661 if (!deferred_flush[i].next) 2662 continue; 2663 2664 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 2665 DMA_TLB_GLOBAL_FLUSH); 2666 for (j = 0; j < deferred_flush[i].next; j++) { 2667 unsigned long mask; 2668 struct iova *iova = deferred_flush[i].iova[j]; 2669 2670 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1)); 2671 iommu_flush_dev_iotlb(deferred_flush[i].domain[j], 2672 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask); 2673 __free_iova(&deferred_flush[i].domain[j]->iovad, iova); 2674 } 2675 deferred_flush[i].next = 0; 2676 } 2677 2678 list_size = 0; 2679} 2680 2681static void flush_unmaps_timeout(unsigned long data) 2682{ 2683 unsigned long flags; 2684 2685 spin_lock_irqsave(&async_umap_flush_lock, flags); 2686 flush_unmaps(); 2687 spin_unlock_irqrestore(&async_umap_flush_lock, flags); 2688} 2689 2690static void add_unmap(struct dmar_domain *dom, struct iova *iova) 2691{ 2692 unsigned long flags; 2693 int next, iommu_id; 2694 struct intel_iommu *iommu; 2695 2696 spin_lock_irqsave(&async_umap_flush_lock, flags); 2697 if (list_size == HIGH_WATER_MARK) 2698 flush_unmaps(); 2699 2700 iommu = domain_get_iommu(dom); 2701 iommu_id = iommu->seq_id; 2702 2703 next = deferred_flush[iommu_id].next; 2704 deferred_flush[iommu_id].domain[next] = dom; 2705 deferred_flush[iommu_id].iova[next] = iova; 2706 deferred_flush[iommu_id].next++; 2707 2708 if (!timer_on) { 2709 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10)); 2710 timer_on = 1; 2711 } 2712 list_size++; 2713 spin_unlock_irqrestore(&async_umap_flush_lock, flags); 2714} 2715 2716static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, 2717 size_t size, enum dma_data_direction dir, 2718 struct dma_attrs *attrs) 2719{ 2720 struct pci_dev *pdev = to_pci_dev(dev); 2721 struct dmar_domain *domain; 2722 unsigned long start_pfn, last_pfn; 2723 struct iova *iova; 2724 struct intel_iommu *iommu; 2725 2726 if (iommu_no_mapping(dev)) 2727 return; 2728 2729 domain = find_domain(pdev); 2730 BUG_ON(!domain); 2731 2732 iommu = domain_get_iommu(domain); 2733 2734 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr)); 2735 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n", 2736 (unsigned long long)dev_addr)) 2737 return; 2738 2739 start_pfn = mm_to_dma_pfn(iova->pfn_lo); 2740 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1; 2741 2742 pr_debug("Device %s unmapping: pfn %lx-%lx\n", 2743 pci_name(pdev), start_pfn, last_pfn); 2744 2745 /* clear the whole page */ 2746 dma_pte_clear_range(domain, start_pfn, last_pfn); 2747 2748 /* free page tables */ 2749 dma_pte_free_pagetable(domain, start_pfn, last_pfn); 2750 2751 if (intel_iommu_strict) { 2752 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn, 2753 last_pfn - start_pfn + 1); 2754 /* free iova */ 2755 __free_iova(&domain->iovad, iova); 2756 } else { 2757 add_unmap(domain, iova); 2758 /* 2759 * queue up the release of the unmap to save the 1/6th of the 2760 * cpu used up by the iotlb flush operation... 2761 */ 2762 } 2763} 2764 2765static void *intel_alloc_coherent(struct device *hwdev, size_t size, 2766 dma_addr_t *dma_handle, gfp_t flags) 2767{ 2768 void *vaddr; 2769 int order; 2770 2771 size = PAGE_ALIGN(size); 2772 order = get_order(size); 2773 2774 if (!iommu_no_mapping(hwdev)) 2775 flags &= ~(GFP_DMA | GFP_DMA32); 2776 else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) { 2777 if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32)) 2778 flags |= GFP_DMA; 2779 else 2780 flags |= GFP_DMA32; 2781 } 2782 2783 vaddr = (void *)__get_free_pages(flags, order); 2784 if (!vaddr) 2785 return NULL; 2786 memset(vaddr, 0, size); 2787 2788 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size, 2789 DMA_BIDIRECTIONAL, 2790 hwdev->coherent_dma_mask); 2791 if (*dma_handle) 2792 return vaddr; 2793 free_pages((unsigned long)vaddr, order); 2794 return NULL; 2795} 2796 2797static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr, 2798 dma_addr_t dma_handle) 2799{ 2800 int order; 2801 2802 size = PAGE_ALIGN(size); 2803 order = get_order(size); 2804 2805 intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL); 2806 free_pages((unsigned long)vaddr, order); 2807} 2808 2809static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, 2810 int nelems, enum dma_data_direction dir, 2811 struct dma_attrs *attrs) 2812{ 2813 struct pci_dev *pdev = to_pci_dev(hwdev); 2814 struct dmar_domain *domain; 2815 unsigned long start_pfn, last_pfn; 2816 struct iova *iova; 2817 struct intel_iommu *iommu; 2818 2819 if (iommu_no_mapping(hwdev)) 2820 return; 2821 2822 domain = find_domain(pdev); 2823 BUG_ON(!domain); 2824 2825 iommu = domain_get_iommu(domain); 2826 2827 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address)); 2828 if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n", 2829 (unsigned long long)sglist[0].dma_address)) 2830 return; 2831 2832 start_pfn = mm_to_dma_pfn(iova->pfn_lo); 2833 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1; 2834 2835 /* clear the whole page */ 2836 dma_pte_clear_range(domain, start_pfn, last_pfn); 2837 2838 /* free page tables */ 2839 dma_pte_free_pagetable(domain, start_pfn, last_pfn); 2840 2841 if (intel_iommu_strict) { 2842 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn, 2843 last_pfn - start_pfn + 1); 2844 /* free iova */ 2845 __free_iova(&domain->iovad, iova); 2846 } else { 2847 add_unmap(domain, iova); 2848 /* 2849 * queue up the release of the unmap to save the 1/6th of the 2850 * cpu used up by the iotlb flush operation... 2851 */ 2852 } 2853} 2854 2855static int intel_nontranslate_map_sg(struct device *hddev, 2856 struct scatterlist *sglist, int nelems, int dir) 2857{ 2858 int i; 2859 struct scatterlist *sg; 2860 2861 for_each_sg(sglist, sg, nelems, i) { 2862 BUG_ON(!sg_page(sg)); 2863 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset; 2864 sg->dma_length = sg->length; 2865 } 2866 return nelems; 2867} 2868 2869static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, 2870 enum dma_data_direction dir, struct dma_attrs *attrs) 2871{ 2872 int i; 2873 struct pci_dev *pdev = to_pci_dev(hwdev); 2874 struct dmar_domain *domain; 2875 size_t size = 0; 2876 int prot = 0; 2877 size_t offset_pfn = 0; 2878 struct iova *iova = NULL; 2879 int ret; 2880 struct scatterlist *sg; 2881 unsigned long start_vpfn; 2882 struct intel_iommu *iommu; 2883 2884 BUG_ON(dir == DMA_NONE); 2885 if (iommu_no_mapping(hwdev)) 2886 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir); 2887 2888 domain = get_valid_domain_for_dev(pdev); 2889 if (!domain) 2890 return 0; 2891 2892 iommu = domain_get_iommu(domain); 2893 2894 for_each_sg(sglist, sg, nelems, i) 2895 size += aligned_nrpages(sg->offset, sg->length); 2896 2897 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), 2898 pdev->dma_mask); 2899 if (!iova) { 2900 sglist->dma_length = 0; 2901 return 0; 2902 } 2903 2904 /* 2905 * Check if DMAR supports zero-length reads on write only 2906 * mappings.. 2907 */ 2908 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \ 2909 !cap_zlr(iommu->cap)) 2910 prot |= DMA_PTE_READ; 2911 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) 2912 prot |= DMA_PTE_WRITE; 2913 2914 start_vpfn = mm_to_dma_pfn(iova->pfn_lo); 2915 2916 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot); 2917 if (unlikely(ret)) { 2918 /* clear the page */ 2919 dma_pte_clear_range(domain, start_vpfn, 2920 start_vpfn + size - 1); 2921 /* free page tables */ 2922 dma_pte_free_pagetable(domain, start_vpfn, 2923 start_vpfn + size - 1); 2924 /* free iova */ 2925 __free_iova(&domain->iovad, iova); 2926 return 0; 2927 } 2928 2929 /* it's a non-present to present mapping. Only flush if caching mode */ 2930 if (cap_caching_mode(iommu->cap)) 2931 iommu_flush_iotlb_psi(iommu, 0, start_vpfn, offset_pfn); 2932 else 2933 iommu_flush_write_buffer(iommu); 2934 2935 return nelems; 2936} 2937 2938static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr) 2939{ 2940 return !dma_addr; 2941} 2942 2943struct dma_map_ops intel_dma_ops = { 2944 .alloc_coherent = intel_alloc_coherent, 2945 .free_coherent = intel_free_coherent, 2946 .map_sg = intel_map_sg, 2947 .unmap_sg = intel_unmap_sg, 2948 .map_page = intel_map_page, 2949 .unmap_page = intel_unmap_page, 2950 .mapping_error = intel_mapping_error, 2951}; 2952 2953static inline int iommu_domain_cache_init(void) 2954{ 2955 int ret = 0; 2956 2957 iommu_domain_cache = kmem_cache_create("iommu_domain", 2958 sizeof(struct dmar_domain), 2959 0, 2960 SLAB_HWCACHE_ALIGN, 2961 2962 NULL); 2963 if (!iommu_domain_cache) { 2964 printk(KERN_ERR "Couldn't create iommu_domain cache\n"); 2965 ret = -ENOMEM; 2966 } 2967 2968 return ret; 2969} 2970 2971static inline int iommu_devinfo_cache_init(void) 2972{ 2973 int ret = 0; 2974 2975 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo", 2976 sizeof(struct device_domain_info), 2977 0, 2978 SLAB_HWCACHE_ALIGN, 2979 NULL); 2980 if (!iommu_devinfo_cache) { 2981 printk(KERN_ERR "Couldn't create devinfo cache\n"); 2982 ret = -ENOMEM; 2983 } 2984 2985 return ret; 2986} 2987 2988static inline int iommu_iova_cache_init(void) 2989{ 2990 int ret = 0; 2991 2992 iommu_iova_cache = kmem_cache_create("iommu_iova", 2993 sizeof(struct iova), 2994 0, 2995 SLAB_HWCACHE_ALIGN, 2996 NULL); 2997 if (!iommu_iova_cache) { 2998 printk(KERN_ERR "Couldn't create iova cache\n"); 2999 ret = -ENOMEM; 3000 } 3001 3002 return ret; 3003} 3004 3005static int __init iommu_init_mempool(void) 3006{ 3007 int ret; 3008 ret = iommu_iova_cache_init(); 3009 if (ret) 3010 return ret; 3011 3012 ret = iommu_domain_cache_init(); 3013 if (ret) 3014 goto domain_error; 3015 3016 ret = iommu_devinfo_cache_init(); 3017 if (!ret) 3018 return ret; 3019 3020 kmem_cache_destroy(iommu_domain_cache); 3021domain_error: 3022 kmem_cache_destroy(iommu_iova_cache); 3023 3024 return -ENOMEM; 3025} 3026 3027static void __init iommu_exit_mempool(void) 3028{ 3029 kmem_cache_destroy(iommu_devinfo_cache); 3030 kmem_cache_destroy(iommu_domain_cache); 3031 kmem_cache_destroy(iommu_iova_cache); 3032 3033} 3034 3035static void __init init_no_remapping_devices(void) 3036{ 3037 struct dmar_drhd_unit *drhd; 3038 3039 for_each_drhd_unit(drhd) { 3040 if (!drhd->include_all) { 3041 int i; 3042 for (i = 0; i < drhd->devices_cnt; i++) 3043 if (drhd->devices[i] != NULL) 3044 break; 3045 /* ignore DMAR unit if no pci devices exist */ 3046 if (i == drhd->devices_cnt) 3047 drhd->ignored = 1; 3048 } 3049 } 3050 3051 if (dmar_map_gfx) 3052 return; 3053 3054 for_each_drhd_unit(drhd) { 3055 int i; 3056 if (drhd->ignored || drhd->include_all) 3057 continue; 3058 3059 for (i = 0; i < drhd->devices_cnt; i++) 3060 if (drhd->devices[i] && 3061 !IS_GFX_DEVICE(drhd->devices[i])) 3062 break; 3063 3064 if (i < drhd->devices_cnt) 3065 continue; 3066 3067 /* bypass IOMMU if it is just for gfx devices */ 3068 drhd->ignored = 1; 3069 for (i = 0; i < drhd->devices_cnt; i++) { 3070 if (!drhd->devices[i]) 3071 continue; 3072 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO; 3073 } 3074 } 3075} 3076 3077#ifdef CONFIG_SUSPEND 3078static int init_iommu_hw(void) 3079{ 3080 struct dmar_drhd_unit *drhd; 3081 struct intel_iommu *iommu = NULL; 3082 3083 for_each_active_iommu(iommu, drhd) 3084 if (iommu->qi) 3085 dmar_reenable_qi(iommu); 3086 3087 for_each_active_iommu(iommu, drhd) { 3088 iommu_flush_write_buffer(iommu); 3089 3090 iommu_set_root_entry(iommu); 3091 3092 iommu->flush.flush_context(iommu, 0, 0, 0, 3093 DMA_CCMD_GLOBAL_INVL); 3094 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 3095 DMA_TLB_GLOBAL_FLUSH); 3096 iommu_enable_translation(iommu); 3097 iommu_disable_protect_mem_regions(iommu); 3098 } 3099 3100 return 0; 3101} 3102 3103static void iommu_flush_all(void) 3104{ 3105 struct dmar_drhd_unit *drhd; 3106 struct intel_iommu *iommu; 3107 3108 for_each_active_iommu(iommu, drhd) { 3109 iommu->flush.flush_context(iommu, 0, 0, 0, 3110 DMA_CCMD_GLOBAL_INVL); 3111 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 3112 DMA_TLB_GLOBAL_FLUSH); 3113 } 3114} 3115 3116static int iommu_suspend(struct sys_device *dev, pm_message_t state) 3117{ 3118 struct dmar_drhd_unit *drhd; 3119 struct intel_iommu *iommu = NULL; 3120 unsigned long flag; 3121 3122 for_each_active_iommu(iommu, drhd) { 3123 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS, 3124 GFP_ATOMIC); 3125 if (!iommu->iommu_state) 3126 goto nomem; 3127 } 3128 3129 iommu_flush_all(); 3130 3131 for_each_active_iommu(iommu, drhd) { 3132 iommu_disable_translation(iommu); 3133 3134 spin_lock_irqsave(&iommu->register_lock, flag); 3135 3136 iommu->iommu_state[SR_DMAR_FECTL_REG] = 3137 readl(iommu->reg + DMAR_FECTL_REG); 3138 iommu->iommu_state[SR_DMAR_FEDATA_REG] = 3139 readl(iommu->reg + DMAR_FEDATA_REG); 3140 iommu->iommu_state[SR_DMAR_FEADDR_REG] = 3141 readl(iommu->reg + DMAR_FEADDR_REG); 3142 iommu->iommu_state[SR_DMAR_FEUADDR_REG] = 3143 readl(iommu->reg + DMAR_FEUADDR_REG); 3144 3145 spin_unlock_irqrestore(&iommu->register_lock, flag); 3146 } 3147 return 0; 3148 3149nomem: 3150 for_each_active_iommu(iommu, drhd) 3151 kfree(iommu->iommu_state); 3152 3153 return -ENOMEM; 3154} 3155 3156static int iommu_resume(struct sys_device *dev) 3157{ 3158 struct dmar_drhd_unit *drhd; 3159 struct intel_iommu *iommu = NULL; 3160 unsigned long flag; 3161 3162 if (init_iommu_hw()) { 3163 WARN(1, "IOMMU setup failed, DMAR can not resume!\n"); 3164 return -EIO; 3165 } 3166 3167 for_each_active_iommu(iommu, drhd) { 3168 3169 spin_lock_irqsave(&iommu->register_lock, flag); 3170 3171 writel(iommu->iommu_state[SR_DMAR_FECTL_REG], 3172 iommu->reg + DMAR_FECTL_REG); 3173 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG], 3174 iommu->reg + DMAR_FEDATA_REG); 3175 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG], 3176 iommu->reg + DMAR_FEADDR_REG); 3177 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG], 3178 iommu->reg + DMAR_FEUADDR_REG); 3179 3180 spin_unlock_irqrestore(&iommu->register_lock, flag); 3181 } 3182 3183 for_each_active_iommu(iommu, drhd) 3184 kfree(iommu->iommu_state); 3185 3186 return 0; 3187} 3188 3189static struct sysdev_class iommu_sysclass = { 3190 .name = "iommu", 3191 .resume = iommu_resume, 3192 .suspend = iommu_suspend, 3193}; 3194 3195static struct sys_device device_iommu = { 3196 .cls = &iommu_sysclass, 3197}; 3198 3199static int __init init_iommu_sysfs(void) 3200{ 3201 int error; 3202 3203 error = sysdev_class_register(&iommu_sysclass); 3204 if (error) 3205 return error; 3206 3207 error = sysdev_register(&device_iommu); 3208 if (error) 3209 sysdev_class_unregister(&iommu_sysclass); 3210 3211 return error; 3212} 3213 3214#else 3215static int __init init_iommu_sysfs(void) 3216{ 3217 return 0; 3218} 3219#endif /* CONFIG_PM */ 3220 3221/* 3222 * Here we only respond to action of unbound device from driver. 3223 * 3224 * Added device is not attached to its DMAR domain here yet. That will happen 3225 * when mapping the device to iova. 3226 */ 3227static int device_notifier(struct notifier_block *nb, 3228 unsigned long action, void *data) 3229{ 3230 struct device *dev = data; 3231 struct pci_dev *pdev = to_pci_dev(dev); 3232 struct dmar_domain *domain; 3233 3234 if (iommu_no_mapping(dev)) 3235 return 0; 3236 3237 domain = find_domain(pdev); 3238 if (!domain) 3239 return 0; 3240 3241 if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through) 3242 domain_remove_one_dev_info(domain, pdev); 3243 3244 return 0; 3245} 3246 3247static struct notifier_block device_nb = { 3248 .notifier_call = device_notifier, 3249}; 3250 3251int __init intel_iommu_init(void) 3252{ 3253 int ret = 0; 3254 int force_on = 0; 3255 3256 /* VT-d is required for a TXT/tboot launch, so enforce that */ 3257 force_on = tboot_force_iommu(); 3258 3259 if (dmar_table_init()) { 3260 if (force_on) 3261 panic("tboot: Failed to initialize DMAR table\n"); 3262 return -ENODEV; 3263 } 3264 3265 if (dmar_dev_scope_init()) { 3266 if (force_on) 3267 panic("tboot: Failed to initialize DMAR device scope\n"); 3268 return -ENODEV; 3269 } 3270 3271 /* 3272 * Check the need for DMA-remapping initialization now. 3273 * Above initialization will also be used by Interrupt-remapping. 3274 */ 3275 if (no_iommu || dmar_disabled) 3276 return -ENODEV; 3277 3278 iommu_init_mempool(); 3279 dmar_init_reserved_ranges(); 3280 3281 init_no_remapping_devices(); 3282 3283 ret = init_dmars(); 3284 if (ret) { 3285 if (force_on) 3286 panic("tboot: Failed to initialize DMARs\n"); 3287 printk(KERN_ERR "IOMMU: dmar init failed\n"); 3288 put_iova_domain(&reserved_iova_list); 3289 iommu_exit_mempool(); 3290 return ret; 3291 } 3292 printk(KERN_INFO 3293 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n"); 3294 3295 init_timer(&unmap_timer); 3296#ifdef CONFIG_SWIOTLB 3297 swiotlb = 0; 3298#endif 3299 dma_ops = &intel_dma_ops; 3300 3301 init_iommu_sysfs(); 3302 3303 register_iommu(&intel_iommu_ops); 3304 3305 bus_register_notifier(&pci_bus_type, &device_nb); 3306 3307 return 0; 3308} 3309 3310static void iommu_detach_dependent_devices(struct intel_iommu *iommu, 3311 struct pci_dev *pdev) 3312{ 3313 struct pci_dev *tmp, *parent; 3314 3315 if (!iommu || !pdev) 3316 return; 3317 3318 /* dependent device detach */ 3319 tmp = pci_find_upstream_pcie_bridge(pdev); 3320 /* Secondary interface's bus number and devfn 0 */ 3321 if (tmp) { 3322 parent = pdev->bus->self; 3323 while (parent != tmp) { 3324 iommu_detach_dev(iommu, parent->bus->number, 3325 parent->devfn); 3326 parent = parent->bus->self; 3327 } 3328 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */ 3329 iommu_detach_dev(iommu, 3330 tmp->subordinate->number, 0); 3331 else /* this is a legacy PCI bridge */ 3332 iommu_detach_dev(iommu, tmp->bus->number, 3333 tmp->devfn); 3334 } 3335} 3336 3337static void domain_remove_one_dev_info(struct dmar_domain *domain, 3338 struct pci_dev *pdev) 3339{ 3340 struct device_domain_info *info; 3341 struct intel_iommu *iommu; 3342 unsigned long flags; 3343 int found = 0; 3344 struct list_head *entry, *tmp; 3345 3346 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number, 3347 pdev->devfn); 3348 if (!iommu) 3349 return; 3350 3351 spin_lock_irqsave(&device_domain_lock, flags); 3352 list_for_each_safe(entry, tmp, &domain->devices) { 3353 info = list_entry(entry, struct device_domain_info, link); 3354 /* No need to compare PCI domain; it has to be the same */ 3355 if (info->bus == pdev->bus->number && 3356 info->devfn == pdev->devfn) { 3357 list_del(&info->link); 3358 list_del(&info->global); 3359 if (info->dev) 3360 info->dev->dev.archdata.iommu = NULL; 3361 spin_unlock_irqrestore(&device_domain_lock, flags); 3362 3363 iommu_disable_dev_iotlb(info); 3364 iommu_detach_dev(iommu, info->bus, info->devfn); 3365 iommu_detach_dependent_devices(iommu, pdev); 3366 free_devinfo_mem(info); 3367 3368 spin_lock_irqsave(&device_domain_lock, flags); 3369 3370 if (found) 3371 break; 3372 else 3373 continue; 3374 } 3375 3376 /* if there is no other devices under the same iommu 3377 * owned by this domain, clear this iommu in iommu_bmp 3378 * update iommu count and coherency 3379 */ 3380 if (iommu == device_to_iommu(info->segment, info->bus, 3381 info->devfn)) 3382 found = 1; 3383 } 3384 3385 if (found == 0) { 3386 unsigned long tmp_flags; 3387 spin_lock_irqsave(&domain->iommu_lock, tmp_flags); 3388 clear_bit(iommu->seq_id, &domain->iommu_bmp); 3389 domain->iommu_count--; 3390 domain_update_iommu_cap(domain); 3391 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags); 3392 } 3393 3394 spin_unlock_irqrestore(&device_domain_lock, flags); 3395} 3396 3397static void vm_domain_remove_all_dev_info(struct dmar_domain *domain) 3398{ 3399 struct device_domain_info *info; 3400 struct intel_iommu *iommu; 3401 unsigned long flags1, flags2; 3402 3403 spin_lock_irqsave(&device_domain_lock, flags1); 3404 while (!list_empty(&domain->devices)) { 3405 info = list_entry(domain->devices.next, 3406 struct device_domain_info, link); 3407 list_del(&info->link); 3408 list_del(&info->global); 3409 if (info->dev) 3410 info->dev->dev.archdata.iommu = NULL; 3411 3412 spin_unlock_irqrestore(&device_domain_lock, flags1); 3413 3414 iommu_disable_dev_iotlb(info); 3415 iommu = device_to_iommu(info->segment, info->bus, info->devfn); 3416 iommu_detach_dev(iommu, info->bus, info->devfn); 3417 iommu_detach_dependent_devices(iommu, info->dev); 3418 3419 /* clear this iommu in iommu_bmp, update iommu count 3420 * and capabilities 3421 */ 3422 spin_lock_irqsave(&domain->iommu_lock, flags2); 3423 if (test_and_clear_bit(iommu->seq_id, 3424 &domain->iommu_bmp)) { 3425 domain->iommu_count--; 3426 domain_update_iommu_cap(domain); 3427 } 3428 spin_unlock_irqrestore(&domain->iommu_lock, flags2); 3429 3430 free_devinfo_mem(info); 3431 spin_lock_irqsave(&device_domain_lock, flags1); 3432 } 3433 spin_unlock_irqrestore(&device_domain_lock, flags1); 3434} 3435 3436/* domain id for virtual machine, it won't be set in context */ 3437static unsigned long vm_domid; 3438 3439static int vm_domain_min_agaw(struct dmar_domain *domain) 3440{ 3441 int i; 3442 int min_agaw = domain->agaw; 3443 3444 i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus); 3445 for (; i < g_num_of_iommus; ) { 3446 if (min_agaw > g_iommus[i]->agaw) 3447 min_agaw = g_iommus[i]->agaw; 3448 3449 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1); 3450 } 3451 3452 return min_agaw; 3453} 3454 3455static struct dmar_domain *iommu_alloc_vm_domain(void) 3456{ 3457 struct dmar_domain *domain; 3458 3459 domain = alloc_domain_mem(); 3460 if (!domain) 3461 return NULL; 3462 3463 domain->id = vm_domid++; 3464 domain->nid = -1; 3465 memset(&domain->iommu_bmp, 0, sizeof(unsigned long)); 3466 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE; 3467 3468 return domain; 3469} 3470 3471static int md_domain_init(struct dmar_domain *domain, int guest_width) 3472{ 3473 int adjust_width; 3474 3475 init_iova_domain(&domain->iovad, DMA_32BIT_PFN); 3476 spin_lock_init(&domain->iommu_lock); 3477 3478 domain_reserve_special_ranges(domain); 3479 3480 /* calculate AGAW */ 3481 domain->gaw = guest_width; 3482 adjust_width = guestwidth_to_adjustwidth(guest_width); 3483 domain->agaw = width_to_agaw(adjust_width); 3484 3485 INIT_LIST_HEAD(&domain->devices); 3486 3487 domain->iommu_count = 0; 3488 domain->iommu_coherency = 0; 3489 domain->iommu_snooping = 0; 3490 domain->max_addr = 0; 3491 domain->nid = -1; 3492 3493 /* always allocate the top pgd */ 3494 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); 3495 if (!domain->pgd) 3496 return -ENOMEM; 3497 domain_flush_cache(domain, domain->pgd, PAGE_SIZE); 3498 return 0; 3499} 3500 3501static void iommu_free_vm_domain(struct dmar_domain *domain) 3502{ 3503 unsigned long flags; 3504 struct dmar_drhd_unit *drhd; 3505 struct intel_iommu *iommu; 3506 unsigned long i; 3507 unsigned long ndomains; 3508 3509 for_each_drhd_unit(drhd) { 3510 if (drhd->ignored) 3511 continue; 3512 iommu = drhd->iommu; 3513 3514 ndomains = cap_ndoms(iommu->cap); 3515 i = find_first_bit(iommu->domain_ids, ndomains); 3516 for (; i < ndomains; ) { 3517 if (iommu->domains[i] == domain) { 3518 spin_lock_irqsave(&iommu->lock, flags); 3519 clear_bit(i, iommu->domain_ids); 3520 iommu->domains[i] = NULL; 3521 spin_unlock_irqrestore(&iommu->lock, flags); 3522 break; 3523 } 3524 i = find_next_bit(iommu->domain_ids, ndomains, i+1); 3525 } 3526 } 3527} 3528 3529static void vm_domain_exit(struct dmar_domain *domain) 3530{ 3531 /* Domain 0 is reserved, so dont process it */ 3532 if (!domain) 3533 return; 3534 3535 vm_domain_remove_all_dev_info(domain); 3536 /* destroy iovas */ 3537 put_iova_domain(&domain->iovad); 3538 3539 /* clear ptes */ 3540 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); 3541 3542 /* free page tables */ 3543 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); 3544 3545 iommu_free_vm_domain(domain); 3546 free_domain_mem(domain); 3547} 3548 3549static int intel_iommu_domain_init(struct iommu_domain *domain) 3550{ 3551 struct dmar_domain *dmar_domain; 3552 3553 dmar_domain = iommu_alloc_vm_domain(); 3554 if (!dmar_domain) { 3555 printk(KERN_ERR 3556 "intel_iommu_domain_init: dmar_domain == NULL\n"); 3557 return -ENOMEM; 3558 } 3559 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { 3560 printk(KERN_ERR 3561 "intel_iommu_domain_init() failed\n"); 3562 vm_domain_exit(dmar_domain); 3563 return -ENOMEM; 3564 } 3565 domain->priv = dmar_domain; 3566 3567 return 0; 3568} 3569 3570static void intel_iommu_domain_destroy(struct iommu_domain *domain) 3571{ 3572 struct dmar_domain *dmar_domain = domain->priv; 3573 3574 domain->priv = NULL; 3575 vm_domain_exit(dmar_domain); 3576} 3577 3578static int intel_iommu_attach_device(struct iommu_domain *domain, 3579 struct device *dev) 3580{ 3581 struct dmar_domain *dmar_domain = domain->priv; 3582 struct pci_dev *pdev = to_pci_dev(dev); 3583 struct intel_iommu *iommu; 3584 int addr_width; 3585 u64 end; 3586 3587 /* normally pdev is not mapped */ 3588 if (unlikely(domain_context_mapped(pdev))) { 3589 struct dmar_domain *old_domain; 3590 3591 old_domain = find_domain(pdev); 3592 if (old_domain) { 3593 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE || 3594 dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) 3595 domain_remove_one_dev_info(old_domain, pdev); 3596 else 3597 domain_remove_dev_info(old_domain); 3598 } 3599 } 3600 3601 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number, 3602 pdev->devfn); 3603 if (!iommu) 3604 return -ENODEV; 3605 3606 /* check if this iommu agaw is sufficient for max mapped address */ 3607 addr_width = agaw_to_width(iommu->agaw); 3608 end = DOMAIN_MAX_ADDR(addr_width); 3609 end = end & VTD_PAGE_MASK; 3610 if (end < dmar_domain->max_addr) { 3611 printk(KERN_ERR "%s: iommu agaw (%d) is not " 3612 "sufficient for the mapped address (%llx)\n", 3613 __func__, iommu->agaw, dmar_domain->max_addr); 3614 return -EFAULT; 3615 } 3616 3617 return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL); 3618} 3619 3620static void intel_iommu_detach_device(struct iommu_domain *domain, 3621 struct device *dev) 3622{ 3623 struct dmar_domain *dmar_domain = domain->priv; 3624 struct pci_dev *pdev = to_pci_dev(dev); 3625 3626 domain_remove_one_dev_info(dmar_domain, pdev); 3627} 3628 3629static int intel_iommu_map_range(struct iommu_domain *domain, 3630 unsigned long iova, phys_addr_t hpa, 3631 size_t size, int iommu_prot) 3632{ 3633 struct dmar_domain *dmar_domain = domain->priv; 3634 u64 max_addr; 3635 int addr_width; 3636 int prot = 0; 3637 int ret; 3638 3639 if (iommu_prot & IOMMU_READ) 3640 prot |= DMA_PTE_READ; 3641 if (iommu_prot & IOMMU_WRITE) 3642 prot |= DMA_PTE_WRITE; 3643 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping) 3644 prot |= DMA_PTE_SNP; 3645 3646 max_addr = iova + size; 3647 if (dmar_domain->max_addr < max_addr) { 3648 int min_agaw; 3649 u64 end; 3650 3651 /* check if minimum agaw is sufficient for mapped address */ 3652 min_agaw = vm_domain_min_agaw(dmar_domain); 3653 addr_width = agaw_to_width(min_agaw); 3654 end = DOMAIN_MAX_ADDR(addr_width); 3655 end = end & VTD_PAGE_MASK; 3656 if (end < max_addr) { 3657 printk(KERN_ERR "%s: iommu agaw (%d) is not " 3658 "sufficient for the mapped address (%llx)\n", 3659 __func__, min_agaw, max_addr); 3660 return -EFAULT; 3661 } 3662 dmar_domain->max_addr = max_addr; 3663 } 3664 /* Round up size to next multiple of PAGE_SIZE, if it and 3665 the low bits of hpa would take us onto the next page */ 3666 size = aligned_nrpages(hpa, size); 3667 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT, 3668 hpa >> VTD_PAGE_SHIFT, size, prot); 3669 return ret; 3670} 3671 3672static void intel_iommu_unmap_range(struct iommu_domain *domain, 3673 unsigned long iova, size_t size) 3674{ 3675 struct dmar_domain *dmar_domain = domain->priv; 3676 3677 if (!size) 3678 return; 3679 3680 dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT, 3681 (iova + size - 1) >> VTD_PAGE_SHIFT); 3682 3683 if (dmar_domain->max_addr == iova + size) 3684 dmar_domain->max_addr = iova; 3685} 3686 3687static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, 3688 unsigned long iova) 3689{ 3690 struct dmar_domain *dmar_domain = domain->priv; 3691 struct dma_pte *pte; 3692 u64 phys = 0; 3693 3694 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT); 3695 if (pte) 3696 phys = dma_pte_addr(pte); 3697 3698 return phys; 3699} 3700 3701static int intel_iommu_domain_has_cap(struct iommu_domain *domain, 3702 unsigned long cap) 3703{ 3704 struct dmar_domain *dmar_domain = domain->priv; 3705 3706 if (cap == IOMMU_CAP_CACHE_COHERENCY) 3707 return dmar_domain->iommu_snooping; 3708 3709 return 0; 3710} 3711 3712static struct iommu_ops intel_iommu_ops = { 3713 .domain_init = intel_iommu_domain_init, 3714 .domain_destroy = intel_iommu_domain_destroy, 3715 .attach_dev = intel_iommu_attach_device, 3716 .detach_dev = intel_iommu_detach_device, 3717 .map = intel_iommu_map_range, 3718 .unmap = intel_iommu_unmap_range, 3719 .iova_to_phys = intel_iommu_iova_to_phys, 3720 .domain_has_cap = intel_iommu_domain_has_cap, 3721}; 3722 3723static void __devinit quirk_iommu_rwbf(struct pci_dev *dev) 3724{ 3725 /* 3726 * Mobile 4 Series Chipset neglects to set RWBF capability, 3727 * but needs it: 3728 */ 3729 printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n"); 3730 rwbf_quirk = 1; 3731} 3732 3733DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf); 3734 3735/* On Tylersburg chipsets, some BIOSes have been known to enable the 3736 ISOCH DMAR unit for the Azalia sound device, but not give it any 3737 TLB entries, which causes it to deadlock. Check for that. We do 3738 this in a function called from init_dmars(), instead of in a PCI 3739 quirk, because we don't want to print the obnoxious "BIOS broken" 3740 message if VT-d is actually disabled. 3741*/ 3742static void __init check_tylersburg_isoch(void) 3743{ 3744 struct pci_dev *pdev; 3745 uint32_t vtisochctrl; 3746 3747 /* If there's no Azalia in the system anyway, forget it. */ 3748 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL); 3749 if (!pdev) 3750 return; 3751 pci_dev_put(pdev); 3752 3753 /* System Management Registers. Might be hidden, in which case 3754 we can't do the sanity check. But that's OK, because the 3755 known-broken BIOSes _don't_ actually hide it, so far. */ 3756 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL); 3757 if (!pdev) 3758 return; 3759 3760 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) { 3761 pci_dev_put(pdev); 3762 return; 3763 } 3764 3765 pci_dev_put(pdev); 3766 3767 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */ 3768 if (vtisochctrl & 1) 3769 return; 3770 3771 /* Drop all bits other than the number of TLB entries */ 3772 vtisochctrl &= 0x1c; 3773 3774 /* If we have the recommended number of TLB entries (16), fine. */ 3775 if (vtisochctrl == 0x10) 3776 return; 3777 3778 /* Zero TLB entries? You get to ride the short bus to school. */ 3779 if (!vtisochctrl) { 3780 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n" 3781 "BIOS vendor: %s; Ver: %s; Product Version: %s\n", 3782 dmi_get_system_info(DMI_BIOS_VENDOR), 3783 dmi_get_system_info(DMI_BIOS_VERSION), 3784 dmi_get_system_info(DMI_PRODUCT_VERSION)); 3785 iommu_identity_mapping |= IDENTMAP_AZALIA; 3786 return; 3787 } 3788 3789 printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n", 3790 vtisochctrl); 3791}