Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v5.2-rc7 5846 lines 148 kB view raw
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * Copyright © 2006-2014 Intel Corporation. 4 * 5 * Authors: David Woodhouse <dwmw2@infradead.org>, 6 * Ashok Raj <ashok.raj@intel.com>, 7 * Shaohua Li <shaohua.li@intel.com>, 8 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>, 9 * Fenghua Yu <fenghua.yu@intel.com> 10 * Joerg Roedel <jroedel@suse.de> 11 */ 12 13#define pr_fmt(fmt) "DMAR: " fmt 14#define dev_fmt(fmt) pr_fmt(fmt) 15 16#include <linux/init.h> 17#include <linux/bitmap.h> 18#include <linux/debugfs.h> 19#include <linux/export.h> 20#include <linux/slab.h> 21#include <linux/irq.h> 22#include <linux/interrupt.h> 23#include <linux/spinlock.h> 24#include <linux/pci.h> 25#include <linux/dmar.h> 26#include <linux/dma-mapping.h> 27#include <linux/mempool.h> 28#include <linux/memory.h> 29#include <linux/cpu.h> 30#include <linux/timer.h> 31#include <linux/io.h> 32#include <linux/iova.h> 33#include <linux/iommu.h> 34#include <linux/intel-iommu.h> 35#include <linux/syscore_ops.h> 36#include <linux/tboot.h> 37#include <linux/dmi.h> 38#include <linux/pci-ats.h> 39#include <linux/memblock.h> 40#include <linux/dma-contiguous.h> 41#include <linux/dma-direct.h> 42#include <linux/crash_dump.h> 43#include <linux/numa.h> 44#include <asm/irq_remapping.h> 45#include <asm/cacheflush.h> 46#include <asm/iommu.h> 47 48#include "irq_remapping.h" 49#include "intel-pasid.h" 50 51#define ROOT_SIZE VTD_PAGE_SIZE 52#define CONTEXT_SIZE VTD_PAGE_SIZE 53 54#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) 55#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB) 56#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) 57#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e) 58 59#define IOAPIC_RANGE_START (0xfee00000) 60#define IOAPIC_RANGE_END (0xfeefffff) 61#define IOVA_START_ADDR (0x1000) 62 63#define DEFAULT_DOMAIN_ADDRESS_WIDTH 57 64 65#define MAX_AGAW_WIDTH 64 66#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT) 67 68#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1) 69#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1) 70 71/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR 72 to match. That way, we can use 'unsigned long' for PFNs with impunity. */ 73#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \ 74 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1)) 75#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT) 76 77/* IO virtual address start page frame number */ 78#define IOVA_START_PFN (1) 79 80#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) 81 82/* page table handling */ 83#define LEVEL_STRIDE (9) 84#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1) 85 86/* 87 * This bitmap is used to advertise the page sizes our hardware support 88 * to the IOMMU core, which will then use this information to split 89 * physically contiguous memory regions it is mapping into page sizes 90 * that we support. 91 * 92 * Traditionally the IOMMU core just handed us the mappings directly, 93 * after making sure the size is an order of a 4KiB page and that the 94 * mapping has natural alignment. 95 * 96 * To retain this behavior, we currently advertise that we support 97 * all page sizes that are an order of 4KiB. 98 * 99 * If at some point we'd like to utilize the IOMMU core's new behavior, 100 * we could change this to advertise the real page sizes we support. 101 */ 102#define INTEL_IOMMU_PGSIZES (~0xFFFUL) 103 104static inline int agaw_to_level(int agaw) 105{ 106 return agaw + 2; 107} 108 109static inline int agaw_to_width(int agaw) 110{ 111 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH); 112} 113 114static inline int width_to_agaw(int width) 115{ 116 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE); 117} 118 119static inline unsigned int level_to_offset_bits(int level) 120{ 121 return (level - 1) * LEVEL_STRIDE; 122} 123 124static inline int pfn_level_offset(unsigned long pfn, int level) 125{ 126 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK; 127} 128 129static inline unsigned long level_mask(int level) 130{ 131 return -1UL << level_to_offset_bits(level); 132} 133 134static inline unsigned long level_size(int level) 135{ 136 return 1UL << level_to_offset_bits(level); 137} 138 139static inline unsigned long align_to_level(unsigned long pfn, int level) 140{ 141 return (pfn + level_size(level) - 1) & level_mask(level); 142} 143 144static inline unsigned long lvl_to_nr_pages(unsigned int lvl) 145{ 146 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH); 147} 148 149/* VT-d pages must always be _smaller_ than MM pages. Otherwise things 150 are never going to work. */ 151static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn) 152{ 153 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT); 154} 155 156static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn) 157{ 158 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT); 159} 160static inline unsigned long page_to_dma_pfn(struct page *pg) 161{ 162 return mm_to_dma_pfn(page_to_pfn(pg)); 163} 164static inline unsigned long virt_to_dma_pfn(void *p) 165{ 166 return page_to_dma_pfn(virt_to_page(p)); 167} 168 169/* global iommu list, set NULL for ignored DMAR units */ 170static struct intel_iommu **g_iommus; 171 172static void __init check_tylersburg_isoch(void); 173static int rwbf_quirk; 174 175/* 176 * set to 1 to panic kernel if can't successfully enable VT-d 177 * (used when kernel is launched w/ TXT) 178 */ 179static int force_on = 0; 180int intel_iommu_tboot_noforce; 181static int no_platform_optin; 182 183#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry)) 184 185/* 186 * Take a root_entry and return the Lower Context Table Pointer (LCTP) 187 * if marked present. 188 */ 189static phys_addr_t root_entry_lctp(struct root_entry *re) 190{ 191 if (!(re->lo & 1)) 192 return 0; 193 194 return re->lo & VTD_PAGE_MASK; 195} 196 197/* 198 * Take a root_entry and return the Upper Context Table Pointer (UCTP) 199 * if marked present. 200 */ 201static phys_addr_t root_entry_uctp(struct root_entry *re) 202{ 203 if (!(re->hi & 1)) 204 return 0; 205 206 return re->hi & VTD_PAGE_MASK; 207} 208 209static inline void context_clear_pasid_enable(struct context_entry *context) 210{ 211 context->lo &= ~(1ULL << 11); 212} 213 214static inline bool context_pasid_enabled(struct context_entry *context) 215{ 216 return !!(context->lo & (1ULL << 11)); 217} 218 219static inline void context_set_copied(struct context_entry *context) 220{ 221 context->hi |= (1ull << 3); 222} 223 224static inline bool context_copied(struct context_entry *context) 225{ 226 return !!(context->hi & (1ULL << 3)); 227} 228 229static inline bool __context_present(struct context_entry *context) 230{ 231 return (context->lo & 1); 232} 233 234bool context_present(struct context_entry *context) 235{ 236 return context_pasid_enabled(context) ? 237 __context_present(context) : 238 __context_present(context) && !context_copied(context); 239} 240 241static inline void context_set_present(struct context_entry *context) 242{ 243 context->lo |= 1; 244} 245 246static inline void context_set_fault_enable(struct context_entry *context) 247{ 248 context->lo &= (((u64)-1) << 2) | 1; 249} 250 251static inline void context_set_translation_type(struct context_entry *context, 252 unsigned long value) 253{ 254 context->lo &= (((u64)-1) << 4) | 3; 255 context->lo |= (value & 3) << 2; 256} 257 258static inline void context_set_address_root(struct context_entry *context, 259 unsigned long value) 260{ 261 context->lo &= ~VTD_PAGE_MASK; 262 context->lo |= value & VTD_PAGE_MASK; 263} 264 265static inline void context_set_address_width(struct context_entry *context, 266 unsigned long value) 267{ 268 context->hi |= value & 7; 269} 270 271static inline void context_set_domain_id(struct context_entry *context, 272 unsigned long value) 273{ 274 context->hi |= (value & ((1 << 16) - 1)) << 8; 275} 276 277static inline int context_domain_id(struct context_entry *c) 278{ 279 return((c->hi >> 8) & 0xffff); 280} 281 282static inline void context_clear_entry(struct context_entry *context) 283{ 284 context->lo = 0; 285 context->hi = 0; 286} 287 288/* 289 * This domain is a statically identity mapping domain. 290 * 1. This domain creats a static 1:1 mapping to all usable memory. 291 * 2. It maps to each iommu if successful. 292 * 3. Each iommu mapps to this domain if successful. 293 */ 294static struct dmar_domain *si_domain; 295static int hw_pass_through = 1; 296 297/* 298 * Domain represents a virtual machine, more than one devices 299 * across iommus may be owned in one domain, e.g. kvm guest. 300 */ 301#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 0) 302 303/* si_domain contains mulitple devices */ 304#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 1) 305 306#define for_each_domain_iommu(idx, domain) \ 307 for (idx = 0; idx < g_num_of_iommus; idx++) \ 308 if (domain->iommu_refcnt[idx]) 309 310struct dmar_rmrr_unit { 311 struct list_head list; /* list of rmrr units */ 312 struct acpi_dmar_header *hdr; /* ACPI header */ 313 u64 base_address; /* reserved base address*/ 314 u64 end_address; /* reserved end address */ 315 struct dmar_dev_scope *devices; /* target devices */ 316 int devices_cnt; /* target device count */ 317 struct iommu_resv_region *resv; /* reserved region handle */ 318}; 319 320struct dmar_atsr_unit { 321 struct list_head list; /* list of ATSR units */ 322 struct acpi_dmar_header *hdr; /* ACPI header */ 323 struct dmar_dev_scope *devices; /* target devices */ 324 int devices_cnt; /* target device count */ 325 u8 include_all:1; /* include all ports */ 326}; 327 328static LIST_HEAD(dmar_atsr_units); 329static LIST_HEAD(dmar_rmrr_units); 330 331#define for_each_rmrr_units(rmrr) \ 332 list_for_each_entry(rmrr, &dmar_rmrr_units, list) 333 334/* bitmap for indexing intel_iommus */ 335static int g_num_of_iommus; 336 337static void domain_exit(struct dmar_domain *domain); 338static void domain_remove_dev_info(struct dmar_domain *domain); 339static void dmar_remove_one_dev_info(struct device *dev); 340static void __dmar_remove_one_dev_info(struct device_domain_info *info); 341static void domain_context_clear(struct intel_iommu *iommu, 342 struct device *dev); 343static int domain_detach_iommu(struct dmar_domain *domain, 344 struct intel_iommu *iommu); 345 346#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON 347int dmar_disabled = 0; 348#else 349int dmar_disabled = 1; 350#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/ 351 352int intel_iommu_enabled = 0; 353EXPORT_SYMBOL_GPL(intel_iommu_enabled); 354 355static int dmar_map_gfx = 1; 356static int dmar_forcedac; 357static int intel_iommu_strict; 358static int intel_iommu_superpage = 1; 359static int intel_iommu_sm; 360static int iommu_identity_mapping; 361 362#define IDENTMAP_ALL 1 363#define IDENTMAP_GFX 2 364#define IDENTMAP_AZALIA 4 365 366#define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap)) 367#define pasid_supported(iommu) (sm_supported(iommu) && \ 368 ecap_pasid((iommu)->ecap)) 369 370int intel_iommu_gfx_mapped; 371EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); 372 373#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) 374static DEFINE_SPINLOCK(device_domain_lock); 375static LIST_HEAD(device_domain_list); 376 377/* 378 * Iterate over elements in device_domain_list and call the specified 379 * callback @fn against each element. 380 */ 381int for_each_device_domain(int (*fn)(struct device_domain_info *info, 382 void *data), void *data) 383{ 384 int ret = 0; 385 unsigned long flags; 386 struct device_domain_info *info; 387 388 spin_lock_irqsave(&device_domain_lock, flags); 389 list_for_each_entry(info, &device_domain_list, global) { 390 ret = fn(info, data); 391 if (ret) { 392 spin_unlock_irqrestore(&device_domain_lock, flags); 393 return ret; 394 } 395 } 396 spin_unlock_irqrestore(&device_domain_lock, flags); 397 398 return 0; 399} 400 401const struct iommu_ops intel_iommu_ops; 402 403static bool translation_pre_enabled(struct intel_iommu *iommu) 404{ 405 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED); 406} 407 408static void clear_translation_pre_enabled(struct intel_iommu *iommu) 409{ 410 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED; 411} 412 413static void init_translation_status(struct intel_iommu *iommu) 414{ 415 u32 gsts; 416 417 gsts = readl(iommu->reg + DMAR_GSTS_REG); 418 if (gsts & DMA_GSTS_TES) 419 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED; 420} 421 422/* Convert generic 'struct iommu_domain to private struct dmar_domain */ 423static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom) 424{ 425 return container_of(dom, struct dmar_domain, domain); 426} 427 428static int __init intel_iommu_setup(char *str) 429{ 430 if (!str) 431 return -EINVAL; 432 while (*str) { 433 if (!strncmp(str, "on", 2)) { 434 dmar_disabled = 0; 435 pr_info("IOMMU enabled\n"); 436 } else if (!strncmp(str, "off", 3)) { 437 dmar_disabled = 1; 438 no_platform_optin = 1; 439 pr_info("IOMMU disabled\n"); 440 } else if (!strncmp(str, "igfx_off", 8)) { 441 dmar_map_gfx = 0; 442 pr_info("Disable GFX device mapping\n"); 443 } else if (!strncmp(str, "forcedac", 8)) { 444 pr_info("Forcing DAC for PCI devices\n"); 445 dmar_forcedac = 1; 446 } else if (!strncmp(str, "strict", 6)) { 447 pr_info("Disable batched IOTLB flush\n"); 448 intel_iommu_strict = 1; 449 } else if (!strncmp(str, "sp_off", 6)) { 450 pr_info("Disable supported super page\n"); 451 intel_iommu_superpage = 0; 452 } else if (!strncmp(str, "sm_on", 5)) { 453 pr_info("Intel-IOMMU: scalable mode supported\n"); 454 intel_iommu_sm = 1; 455 } else if (!strncmp(str, "tboot_noforce", 13)) { 456 printk(KERN_INFO 457 "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n"); 458 intel_iommu_tboot_noforce = 1; 459 } 460 461 str += strcspn(str, ","); 462 while (*str == ',') 463 str++; 464 } 465 return 0; 466} 467__setup("intel_iommu=", intel_iommu_setup); 468 469static struct kmem_cache *iommu_domain_cache; 470static struct kmem_cache *iommu_devinfo_cache; 471 472static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did) 473{ 474 struct dmar_domain **domains; 475 int idx = did >> 8; 476 477 domains = iommu->domains[idx]; 478 if (!domains) 479 return NULL; 480 481 return domains[did & 0xff]; 482} 483 484static void set_iommu_domain(struct intel_iommu *iommu, u16 did, 485 struct dmar_domain *domain) 486{ 487 struct dmar_domain **domains; 488 int idx = did >> 8; 489 490 if (!iommu->domains[idx]) { 491 size_t size = 256 * sizeof(struct dmar_domain *); 492 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC); 493 } 494 495 domains = iommu->domains[idx]; 496 if (WARN_ON(!domains)) 497 return; 498 else 499 domains[did & 0xff] = domain; 500} 501 502void *alloc_pgtable_page(int node) 503{ 504 struct page *page; 505 void *vaddr = NULL; 506 507 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0); 508 if (page) 509 vaddr = page_address(page); 510 return vaddr; 511} 512 513void free_pgtable_page(void *vaddr) 514{ 515 free_page((unsigned long)vaddr); 516} 517 518static inline void *alloc_domain_mem(void) 519{ 520 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC); 521} 522 523static void free_domain_mem(void *vaddr) 524{ 525 kmem_cache_free(iommu_domain_cache, vaddr); 526} 527 528static inline void * alloc_devinfo_mem(void) 529{ 530 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC); 531} 532 533static inline void free_devinfo_mem(void *vaddr) 534{ 535 kmem_cache_free(iommu_devinfo_cache, vaddr); 536} 537 538static inline int domain_type_is_vm(struct dmar_domain *domain) 539{ 540 return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE; 541} 542 543static inline int domain_type_is_si(struct dmar_domain *domain) 544{ 545 return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY; 546} 547 548static inline int domain_type_is_vm_or_si(struct dmar_domain *domain) 549{ 550 return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE | 551 DOMAIN_FLAG_STATIC_IDENTITY); 552} 553 554static inline int domain_pfn_supported(struct dmar_domain *domain, 555 unsigned long pfn) 556{ 557 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; 558 559 return !(addr_width < BITS_PER_LONG && pfn >> addr_width); 560} 561 562static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw) 563{ 564 unsigned long sagaw; 565 int agaw = -1; 566 567 sagaw = cap_sagaw(iommu->cap); 568 for (agaw = width_to_agaw(max_gaw); 569 agaw >= 0; agaw--) { 570 if (test_bit(agaw, &sagaw)) 571 break; 572 } 573 574 return agaw; 575} 576 577/* 578 * Calculate max SAGAW for each iommu. 579 */ 580int iommu_calculate_max_sagaw(struct intel_iommu *iommu) 581{ 582 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH); 583} 584 585/* 586 * calculate agaw for each iommu. 587 * "SAGAW" may be different across iommus, use a default agaw, and 588 * get a supported less agaw for iommus that don't support the default agaw. 589 */ 590int iommu_calculate_agaw(struct intel_iommu *iommu) 591{ 592 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH); 593} 594 595/* This functionin only returns single iommu in a domain */ 596struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) 597{ 598 int iommu_id; 599 600 /* si_domain and vm domain should not get here. */ 601 BUG_ON(domain_type_is_vm_or_si(domain)); 602 for_each_domain_iommu(iommu_id, domain) 603 break; 604 605 if (iommu_id < 0 || iommu_id >= g_num_of_iommus) 606 return NULL; 607 608 return g_iommus[iommu_id]; 609} 610 611static void domain_update_iommu_coherency(struct dmar_domain *domain) 612{ 613 struct dmar_drhd_unit *drhd; 614 struct intel_iommu *iommu; 615 bool found = false; 616 int i; 617 618 domain->iommu_coherency = 1; 619 620 for_each_domain_iommu(i, domain) { 621 found = true; 622 if (!ecap_coherent(g_iommus[i]->ecap)) { 623 domain->iommu_coherency = 0; 624 break; 625 } 626 } 627 if (found) 628 return; 629 630 /* No hardware attached; use lowest common denominator */ 631 rcu_read_lock(); 632 for_each_active_iommu(iommu, drhd) { 633 if (!ecap_coherent(iommu->ecap)) { 634 domain->iommu_coherency = 0; 635 break; 636 } 637 } 638 rcu_read_unlock(); 639} 640 641static int domain_update_iommu_snooping(struct intel_iommu *skip) 642{ 643 struct dmar_drhd_unit *drhd; 644 struct intel_iommu *iommu; 645 int ret = 1; 646 647 rcu_read_lock(); 648 for_each_active_iommu(iommu, drhd) { 649 if (iommu != skip) { 650 if (!ecap_sc_support(iommu->ecap)) { 651 ret = 0; 652 break; 653 } 654 } 655 } 656 rcu_read_unlock(); 657 658 return ret; 659} 660 661static int domain_update_iommu_superpage(struct intel_iommu *skip) 662{ 663 struct dmar_drhd_unit *drhd; 664 struct intel_iommu *iommu; 665 int mask = 0xf; 666 667 if (!intel_iommu_superpage) { 668 return 0; 669 } 670 671 /* set iommu_superpage to the smallest common denominator */ 672 rcu_read_lock(); 673 for_each_active_iommu(iommu, drhd) { 674 if (iommu != skip) { 675 mask &= cap_super_page_val(iommu->cap); 676 if (!mask) 677 break; 678 } 679 } 680 rcu_read_unlock(); 681 682 return fls(mask); 683} 684 685/* Some capabilities may be different across iommus */ 686static void domain_update_iommu_cap(struct dmar_domain *domain) 687{ 688 domain_update_iommu_coherency(domain); 689 domain->iommu_snooping = domain_update_iommu_snooping(NULL); 690 domain->iommu_superpage = domain_update_iommu_superpage(NULL); 691} 692 693struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, 694 u8 devfn, int alloc) 695{ 696 struct root_entry *root = &iommu->root_entry[bus]; 697 struct context_entry *context; 698 u64 *entry; 699 700 entry = &root->lo; 701 if (sm_supported(iommu)) { 702 if (devfn >= 0x80) { 703 devfn -= 0x80; 704 entry = &root->hi; 705 } 706 devfn *= 2; 707 } 708 if (*entry & 1) 709 context = phys_to_virt(*entry & VTD_PAGE_MASK); 710 else { 711 unsigned long phy_addr; 712 if (!alloc) 713 return NULL; 714 715 context = alloc_pgtable_page(iommu->node); 716 if (!context) 717 return NULL; 718 719 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE); 720 phy_addr = virt_to_phys((void *)context); 721 *entry = phy_addr | 1; 722 __iommu_flush_cache(iommu, entry, sizeof(*entry)); 723 } 724 return &context[devfn]; 725} 726 727static int iommu_dummy(struct device *dev) 728{ 729 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO; 730} 731 732static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn) 733{ 734 struct dmar_drhd_unit *drhd = NULL; 735 struct intel_iommu *iommu; 736 struct device *tmp; 737 struct pci_dev *ptmp, *pdev = NULL; 738 u16 segment = 0; 739 int i; 740 741 if (iommu_dummy(dev)) 742 return NULL; 743 744 if (dev_is_pci(dev)) { 745 struct pci_dev *pf_pdev; 746 747 pdev = to_pci_dev(dev); 748 749#ifdef CONFIG_X86 750 /* VMD child devices currently cannot be handled individually */ 751 if (is_vmd(pdev->bus)) 752 return NULL; 753#endif 754 755 /* VFs aren't listed in scope tables; we need to look up 756 * the PF instead to find the IOMMU. */ 757 pf_pdev = pci_physfn(pdev); 758 dev = &pf_pdev->dev; 759 segment = pci_domain_nr(pdev->bus); 760 } else if (has_acpi_companion(dev)) 761 dev = &ACPI_COMPANION(dev)->dev; 762 763 rcu_read_lock(); 764 for_each_active_iommu(iommu, drhd) { 765 if (pdev && segment != drhd->segment) 766 continue; 767 768 for_each_active_dev_scope(drhd->devices, 769 drhd->devices_cnt, i, tmp) { 770 if (tmp == dev) { 771 /* For a VF use its original BDF# not that of the PF 772 * which we used for the IOMMU lookup. Strictly speaking 773 * we could do this for all PCI devices; we only need to 774 * get the BDF# from the scope table for ACPI matches. */ 775 if (pdev && pdev->is_virtfn) 776 goto got_pdev; 777 778 *bus = drhd->devices[i].bus; 779 *devfn = drhd->devices[i].devfn; 780 goto out; 781 } 782 783 if (!pdev || !dev_is_pci(tmp)) 784 continue; 785 786 ptmp = to_pci_dev(tmp); 787 if (ptmp->subordinate && 788 ptmp->subordinate->number <= pdev->bus->number && 789 ptmp->subordinate->busn_res.end >= pdev->bus->number) 790 goto got_pdev; 791 } 792 793 if (pdev && drhd->include_all) { 794 got_pdev: 795 *bus = pdev->bus->number; 796 *devfn = pdev->devfn; 797 goto out; 798 } 799 } 800 iommu = NULL; 801 out: 802 rcu_read_unlock(); 803 804 return iommu; 805} 806 807static void domain_flush_cache(struct dmar_domain *domain, 808 void *addr, int size) 809{ 810 if (!domain->iommu_coherency) 811 clflush_cache_range(addr, size); 812} 813 814static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn) 815{ 816 struct context_entry *context; 817 int ret = 0; 818 unsigned long flags; 819 820 spin_lock_irqsave(&iommu->lock, flags); 821 context = iommu_context_addr(iommu, bus, devfn, 0); 822 if (context) 823 ret = context_present(context); 824 spin_unlock_irqrestore(&iommu->lock, flags); 825 return ret; 826} 827 828static void free_context_table(struct intel_iommu *iommu) 829{ 830 int i; 831 unsigned long flags; 832 struct context_entry *context; 833 834 spin_lock_irqsave(&iommu->lock, flags); 835 if (!iommu->root_entry) { 836 goto out; 837 } 838 for (i = 0; i < ROOT_ENTRY_NR; i++) { 839 context = iommu_context_addr(iommu, i, 0, 0); 840 if (context) 841 free_pgtable_page(context); 842 843 if (!sm_supported(iommu)) 844 continue; 845 846 context = iommu_context_addr(iommu, i, 0x80, 0); 847 if (context) 848 free_pgtable_page(context); 849 850 } 851 free_pgtable_page(iommu->root_entry); 852 iommu->root_entry = NULL; 853out: 854 spin_unlock_irqrestore(&iommu->lock, flags); 855} 856 857static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, 858 unsigned long pfn, int *target_level) 859{ 860 struct dma_pte *parent, *pte; 861 int level = agaw_to_level(domain->agaw); 862 int offset; 863 864 BUG_ON(!domain->pgd); 865 866 if (!domain_pfn_supported(domain, pfn)) 867 /* Address beyond IOMMU's addressing capabilities. */ 868 return NULL; 869 870 parent = domain->pgd; 871 872 while (1) { 873 void *tmp_page; 874 875 offset = pfn_level_offset(pfn, level); 876 pte = &parent[offset]; 877 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte))) 878 break; 879 if (level == *target_level) 880 break; 881 882 if (!dma_pte_present(pte)) { 883 uint64_t pteval; 884 885 tmp_page = alloc_pgtable_page(domain->nid); 886 887 if (!tmp_page) 888 return NULL; 889 890 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE); 891 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE; 892 if (cmpxchg64(&pte->val, 0ULL, pteval)) 893 /* Someone else set it while we were thinking; use theirs. */ 894 free_pgtable_page(tmp_page); 895 else 896 domain_flush_cache(domain, pte, sizeof(*pte)); 897 } 898 if (level == 1) 899 break; 900 901 parent = phys_to_virt(dma_pte_addr(pte)); 902 level--; 903 } 904 905 if (!*target_level) 906 *target_level = level; 907 908 return pte; 909} 910 911 912/* return address's pte at specific level */ 913static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, 914 unsigned long pfn, 915 int level, int *large_page) 916{ 917 struct dma_pte *parent, *pte; 918 int total = agaw_to_level(domain->agaw); 919 int offset; 920 921 parent = domain->pgd; 922 while (level <= total) { 923 offset = pfn_level_offset(pfn, total); 924 pte = &parent[offset]; 925 if (level == total) 926 return pte; 927 928 if (!dma_pte_present(pte)) { 929 *large_page = total; 930 break; 931 } 932 933 if (dma_pte_superpage(pte)) { 934 *large_page = total; 935 return pte; 936 } 937 938 parent = phys_to_virt(dma_pte_addr(pte)); 939 total--; 940 } 941 return NULL; 942} 943 944/* clear last level pte, a tlb flush should be followed */ 945static void dma_pte_clear_range(struct dmar_domain *domain, 946 unsigned long start_pfn, 947 unsigned long last_pfn) 948{ 949 unsigned int large_page; 950 struct dma_pte *first_pte, *pte; 951 952 BUG_ON(!domain_pfn_supported(domain, start_pfn)); 953 BUG_ON(!domain_pfn_supported(domain, last_pfn)); 954 BUG_ON(start_pfn > last_pfn); 955 956 /* we don't need lock here; nobody else touches the iova range */ 957 do { 958 large_page = 1; 959 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page); 960 if (!pte) { 961 start_pfn = align_to_level(start_pfn + 1, large_page + 1); 962 continue; 963 } 964 do { 965 dma_clear_pte(pte); 966 start_pfn += lvl_to_nr_pages(large_page); 967 pte++; 968 } while (start_pfn <= last_pfn && !first_pte_in_page(pte)); 969 970 domain_flush_cache(domain, first_pte, 971 (void *)pte - (void *)first_pte); 972 973 } while (start_pfn && start_pfn <= last_pfn); 974} 975 976static void dma_pte_free_level(struct dmar_domain *domain, int level, 977 int retain_level, struct dma_pte *pte, 978 unsigned long pfn, unsigned long start_pfn, 979 unsigned long last_pfn) 980{ 981 pfn = max(start_pfn, pfn); 982 pte = &pte[pfn_level_offset(pfn, level)]; 983 984 do { 985 unsigned long level_pfn; 986 struct dma_pte *level_pte; 987 988 if (!dma_pte_present(pte) || dma_pte_superpage(pte)) 989 goto next; 990 991 level_pfn = pfn & level_mask(level); 992 level_pte = phys_to_virt(dma_pte_addr(pte)); 993 994 if (level > 2) { 995 dma_pte_free_level(domain, level - 1, retain_level, 996 level_pte, level_pfn, start_pfn, 997 last_pfn); 998 } 999 1000 /* 1001 * Free the page table if we're below the level we want to 1002 * retain and the range covers the entire table. 1003 */ 1004 if (level < retain_level && !(start_pfn > level_pfn || 1005 last_pfn < level_pfn + level_size(level) - 1)) { 1006 dma_clear_pte(pte); 1007 domain_flush_cache(domain, pte, sizeof(*pte)); 1008 free_pgtable_page(level_pte); 1009 } 1010next: 1011 pfn += level_size(level); 1012 } while (!first_pte_in_page(++pte) && pfn <= last_pfn); 1013} 1014 1015/* 1016 * clear last level (leaf) ptes and free page table pages below the 1017 * level we wish to keep intact. 1018 */ 1019static void dma_pte_free_pagetable(struct dmar_domain *domain, 1020 unsigned long start_pfn, 1021 unsigned long last_pfn, 1022 int retain_level) 1023{ 1024 BUG_ON(!domain_pfn_supported(domain, start_pfn)); 1025 BUG_ON(!domain_pfn_supported(domain, last_pfn)); 1026 BUG_ON(start_pfn > last_pfn); 1027 1028 dma_pte_clear_range(domain, start_pfn, last_pfn); 1029 1030 /* We don't need lock here; nobody else touches the iova range */ 1031 dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level, 1032 domain->pgd, 0, start_pfn, last_pfn); 1033 1034 /* free pgd */ 1035 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) { 1036 free_pgtable_page(domain->pgd); 1037 domain->pgd = NULL; 1038 } 1039} 1040 1041/* When a page at a given level is being unlinked from its parent, we don't 1042 need to *modify* it at all. All we need to do is make a list of all the 1043 pages which can be freed just as soon as we've flushed the IOTLB and we 1044 know the hardware page-walk will no longer touch them. 1045 The 'pte' argument is the *parent* PTE, pointing to the page that is to 1046 be freed. */ 1047static struct page *dma_pte_list_pagetables(struct dmar_domain *domain, 1048 int level, struct dma_pte *pte, 1049 struct page *freelist) 1050{ 1051 struct page *pg; 1052 1053 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT); 1054 pg->freelist = freelist; 1055 freelist = pg; 1056 1057 if (level == 1) 1058 return freelist; 1059 1060 pte = page_address(pg); 1061 do { 1062 if (dma_pte_present(pte) && !dma_pte_superpage(pte)) 1063 freelist = dma_pte_list_pagetables(domain, level - 1, 1064 pte, freelist); 1065 pte++; 1066 } while (!first_pte_in_page(pte)); 1067 1068 return freelist; 1069} 1070 1071static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level, 1072 struct dma_pte *pte, unsigned long pfn, 1073 unsigned long start_pfn, 1074 unsigned long last_pfn, 1075 struct page *freelist) 1076{ 1077 struct dma_pte *first_pte = NULL, *last_pte = NULL; 1078 1079 pfn = max(start_pfn, pfn); 1080 pte = &pte[pfn_level_offset(pfn, level)]; 1081 1082 do { 1083 unsigned long level_pfn; 1084 1085 if (!dma_pte_present(pte)) 1086 goto next; 1087 1088 level_pfn = pfn & level_mask(level); 1089 1090 /* If range covers entire pagetable, free it */ 1091 if (start_pfn <= level_pfn && 1092 last_pfn >= level_pfn + level_size(level) - 1) { 1093 /* These suborbinate page tables are going away entirely. Don't 1094 bother to clear them; we're just going to *free* them. */ 1095 if (level > 1 && !dma_pte_superpage(pte)) 1096 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist); 1097 1098 dma_clear_pte(pte); 1099 if (!first_pte) 1100 first_pte = pte; 1101 last_pte = pte; 1102 } else if (level > 1) { 1103 /* Recurse down into a level that isn't *entirely* obsolete */ 1104 freelist = dma_pte_clear_level(domain, level - 1, 1105 phys_to_virt(dma_pte_addr(pte)), 1106 level_pfn, start_pfn, last_pfn, 1107 freelist); 1108 } 1109next: 1110 pfn += level_size(level); 1111 } while (!first_pte_in_page(++pte) && pfn <= last_pfn); 1112 1113 if (first_pte) 1114 domain_flush_cache(domain, first_pte, 1115 (void *)++last_pte - (void *)first_pte); 1116 1117 return freelist; 1118} 1119 1120/* We can't just free the pages because the IOMMU may still be walking 1121 the page tables, and may have cached the intermediate levels. The 1122 pages can only be freed after the IOTLB flush has been done. */ 1123static struct page *domain_unmap(struct dmar_domain *domain, 1124 unsigned long start_pfn, 1125 unsigned long last_pfn) 1126{ 1127 struct page *freelist; 1128 1129 BUG_ON(!domain_pfn_supported(domain, start_pfn)); 1130 BUG_ON(!domain_pfn_supported(domain, last_pfn)); 1131 BUG_ON(start_pfn > last_pfn); 1132 1133 /* we don't need lock here; nobody else touches the iova range */ 1134 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw), 1135 domain->pgd, 0, start_pfn, last_pfn, NULL); 1136 1137 /* free pgd */ 1138 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) { 1139 struct page *pgd_page = virt_to_page(domain->pgd); 1140 pgd_page->freelist = freelist; 1141 freelist = pgd_page; 1142 1143 domain->pgd = NULL; 1144 } 1145 1146 return freelist; 1147} 1148 1149static void dma_free_pagelist(struct page *freelist) 1150{ 1151 struct page *pg; 1152 1153 while ((pg = freelist)) { 1154 freelist = pg->freelist; 1155 free_pgtable_page(page_address(pg)); 1156 } 1157} 1158 1159static void iova_entry_free(unsigned long data) 1160{ 1161 struct page *freelist = (struct page *)data; 1162 1163 dma_free_pagelist(freelist); 1164} 1165 1166/* iommu handling */ 1167static int iommu_alloc_root_entry(struct intel_iommu *iommu) 1168{ 1169 struct root_entry *root; 1170 unsigned long flags; 1171 1172 root = (struct root_entry *)alloc_pgtable_page(iommu->node); 1173 if (!root) { 1174 pr_err("Allocating root entry for %s failed\n", 1175 iommu->name); 1176 return -ENOMEM; 1177 } 1178 1179 __iommu_flush_cache(iommu, root, ROOT_SIZE); 1180 1181 spin_lock_irqsave(&iommu->lock, flags); 1182 iommu->root_entry = root; 1183 spin_unlock_irqrestore(&iommu->lock, flags); 1184 1185 return 0; 1186} 1187 1188static void iommu_set_root_entry(struct intel_iommu *iommu) 1189{ 1190 u64 addr; 1191 u32 sts; 1192 unsigned long flag; 1193 1194 addr = virt_to_phys(iommu->root_entry); 1195 if (sm_supported(iommu)) 1196 addr |= DMA_RTADDR_SMT; 1197 1198 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1199 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr); 1200 1201 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG); 1202 1203 /* Make sure hardware complete it */ 1204 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 1205 readl, (sts & DMA_GSTS_RTPS), sts); 1206 1207 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 1208} 1209 1210void iommu_flush_write_buffer(struct intel_iommu *iommu) 1211{ 1212 u32 val; 1213 unsigned long flag; 1214 1215 if (!rwbf_quirk && !cap_rwbf(iommu->cap)) 1216 return; 1217 1218 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1219 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG); 1220 1221 /* Make sure hardware complete it */ 1222 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 1223 readl, (!(val & DMA_GSTS_WBFS)), val); 1224 1225 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 1226} 1227 1228/* return value determine if we need a write buffer flush */ 1229static void __iommu_flush_context(struct intel_iommu *iommu, 1230 u16 did, u16 source_id, u8 function_mask, 1231 u64 type) 1232{ 1233 u64 val = 0; 1234 unsigned long flag; 1235 1236 switch (type) { 1237 case DMA_CCMD_GLOBAL_INVL: 1238 val = DMA_CCMD_GLOBAL_INVL; 1239 break; 1240 case DMA_CCMD_DOMAIN_INVL: 1241 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did); 1242 break; 1243 case DMA_CCMD_DEVICE_INVL: 1244 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did) 1245 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask); 1246 break; 1247 default: 1248 BUG(); 1249 } 1250 val |= DMA_CCMD_ICC; 1251 1252 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1253 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val); 1254 1255 /* Make sure hardware complete it */ 1256 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG, 1257 dmar_readq, (!(val & DMA_CCMD_ICC)), val); 1258 1259 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 1260} 1261 1262/* return value determine if we need a write buffer flush */ 1263static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, 1264 u64 addr, unsigned int size_order, u64 type) 1265{ 1266 int tlb_offset = ecap_iotlb_offset(iommu->ecap); 1267 u64 val = 0, val_iva = 0; 1268 unsigned long flag; 1269 1270 switch (type) { 1271 case DMA_TLB_GLOBAL_FLUSH: 1272 /* global flush doesn't need set IVA_REG */ 1273 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT; 1274 break; 1275 case DMA_TLB_DSI_FLUSH: 1276 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); 1277 break; 1278 case DMA_TLB_PSI_FLUSH: 1279 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); 1280 /* IH bit is passed in as part of address */ 1281 val_iva = size_order | addr; 1282 break; 1283 default: 1284 BUG(); 1285 } 1286 /* Note: set drain read/write */ 1287#if 0 1288 /* 1289 * This is probably to be super secure.. Looks like we can 1290 * ignore it without any impact. 1291 */ 1292 if (cap_read_drain(iommu->cap)) 1293 val |= DMA_TLB_READ_DRAIN; 1294#endif 1295 if (cap_write_drain(iommu->cap)) 1296 val |= DMA_TLB_WRITE_DRAIN; 1297 1298 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1299 /* Note: Only uses first TLB reg currently */ 1300 if (val_iva) 1301 dmar_writeq(iommu->reg + tlb_offset, val_iva); 1302 dmar_writeq(iommu->reg + tlb_offset + 8, val); 1303 1304 /* Make sure hardware complete it */ 1305 IOMMU_WAIT_OP(iommu, tlb_offset + 8, 1306 dmar_readq, (!(val & DMA_TLB_IVT)), val); 1307 1308 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 1309 1310 /* check IOTLB invalidation granularity */ 1311 if (DMA_TLB_IAIG(val) == 0) 1312 pr_err("Flush IOTLB failed\n"); 1313 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type)) 1314 pr_debug("TLB flush request %Lx, actual %Lx\n", 1315 (unsigned long long)DMA_TLB_IIRG(type), 1316 (unsigned long long)DMA_TLB_IAIG(val)); 1317} 1318 1319static struct device_domain_info * 1320iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu, 1321 u8 bus, u8 devfn) 1322{ 1323 struct device_domain_info *info; 1324 1325 assert_spin_locked(&device_domain_lock); 1326 1327 if (!iommu->qi) 1328 return NULL; 1329 1330 list_for_each_entry(info, &domain->devices, link) 1331 if (info->iommu == iommu && info->bus == bus && 1332 info->devfn == devfn) { 1333 if (info->ats_supported && info->dev) 1334 return info; 1335 break; 1336 } 1337 1338 return NULL; 1339} 1340 1341static void domain_update_iotlb(struct dmar_domain *domain) 1342{ 1343 struct device_domain_info *info; 1344 bool has_iotlb_device = false; 1345 1346 assert_spin_locked(&device_domain_lock); 1347 1348 list_for_each_entry(info, &domain->devices, link) { 1349 struct pci_dev *pdev; 1350 1351 if (!info->dev || !dev_is_pci(info->dev)) 1352 continue; 1353 1354 pdev = to_pci_dev(info->dev); 1355 if (pdev->ats_enabled) { 1356 has_iotlb_device = true; 1357 break; 1358 } 1359 } 1360 1361 domain->has_iotlb_device = has_iotlb_device; 1362} 1363 1364static void iommu_enable_dev_iotlb(struct device_domain_info *info) 1365{ 1366 struct pci_dev *pdev; 1367 1368 assert_spin_locked(&device_domain_lock); 1369 1370 if (!info || !dev_is_pci(info->dev)) 1371 return; 1372 1373 pdev = to_pci_dev(info->dev); 1374 /* For IOMMU that supports device IOTLB throttling (DIT), we assign 1375 * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge 1376 * queue depth at PF level. If DIT is not set, PFSID will be treated as 1377 * reserved, which should be set to 0. 1378 */ 1379 if (!ecap_dit(info->iommu->ecap)) 1380 info->pfsid = 0; 1381 else { 1382 struct pci_dev *pf_pdev; 1383 1384 /* pdev will be returned if device is not a vf */ 1385 pf_pdev = pci_physfn(pdev); 1386 info->pfsid = pci_dev_id(pf_pdev); 1387 } 1388 1389#ifdef CONFIG_INTEL_IOMMU_SVM 1390 /* The PCIe spec, in its wisdom, declares that the behaviour of 1391 the device if you enable PASID support after ATS support is 1392 undefined. So always enable PASID support on devices which 1393 have it, even if we can't yet know if we're ever going to 1394 use it. */ 1395 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1)) 1396 info->pasid_enabled = 1; 1397 1398 if (info->pri_supported && 1399 (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1) && 1400 !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32)) 1401 info->pri_enabled = 1; 1402#endif 1403 if (!pdev->untrusted && info->ats_supported && 1404 pci_ats_page_aligned(pdev) && 1405 !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) { 1406 info->ats_enabled = 1; 1407 domain_update_iotlb(info->domain); 1408 info->ats_qdep = pci_ats_queue_depth(pdev); 1409 } 1410} 1411 1412static void iommu_disable_dev_iotlb(struct device_domain_info *info) 1413{ 1414 struct pci_dev *pdev; 1415 1416 assert_spin_locked(&device_domain_lock); 1417 1418 if (!dev_is_pci(info->dev)) 1419 return; 1420 1421 pdev = to_pci_dev(info->dev); 1422 1423 if (info->ats_enabled) { 1424 pci_disable_ats(pdev); 1425 info->ats_enabled = 0; 1426 domain_update_iotlb(info->domain); 1427 } 1428#ifdef CONFIG_INTEL_IOMMU_SVM 1429 if (info->pri_enabled) { 1430 pci_disable_pri(pdev); 1431 info->pri_enabled = 0; 1432 } 1433 if (info->pasid_enabled) { 1434 pci_disable_pasid(pdev); 1435 info->pasid_enabled = 0; 1436 } 1437#endif 1438} 1439 1440static void iommu_flush_dev_iotlb(struct dmar_domain *domain, 1441 u64 addr, unsigned mask) 1442{ 1443 u16 sid, qdep; 1444 unsigned long flags; 1445 struct device_domain_info *info; 1446 1447 if (!domain->has_iotlb_device) 1448 return; 1449 1450 spin_lock_irqsave(&device_domain_lock, flags); 1451 list_for_each_entry(info, &domain->devices, link) { 1452 if (!info->ats_enabled) 1453 continue; 1454 1455 sid = info->bus << 8 | info->devfn; 1456 qdep = info->ats_qdep; 1457 qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, 1458 qdep, addr, mask); 1459 } 1460 spin_unlock_irqrestore(&device_domain_lock, flags); 1461} 1462 1463static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, 1464 struct dmar_domain *domain, 1465 unsigned long pfn, unsigned int pages, 1466 int ih, int map) 1467{ 1468 unsigned int mask = ilog2(__roundup_pow_of_two(pages)); 1469 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT; 1470 u16 did = domain->iommu_did[iommu->seq_id]; 1471 1472 BUG_ON(pages == 0); 1473 1474 if (ih) 1475 ih = 1 << 6; 1476 /* 1477 * Fallback to domain selective flush if no PSI support or the size is 1478 * too big. 1479 * PSI requires page size to be 2 ^ x, and the base address is naturally 1480 * aligned to the size 1481 */ 1482 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap)) 1483 iommu->flush.flush_iotlb(iommu, did, 0, 0, 1484 DMA_TLB_DSI_FLUSH); 1485 else 1486 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask, 1487 DMA_TLB_PSI_FLUSH); 1488 1489 /* 1490 * In caching mode, changes of pages from non-present to present require 1491 * flush. However, device IOTLB doesn't need to be flushed in this case. 1492 */ 1493 if (!cap_caching_mode(iommu->cap) || !map) 1494 iommu_flush_dev_iotlb(domain, addr, mask); 1495} 1496 1497/* Notification for newly created mappings */ 1498static inline void __mapping_notify_one(struct intel_iommu *iommu, 1499 struct dmar_domain *domain, 1500 unsigned long pfn, unsigned int pages) 1501{ 1502 /* It's a non-present to present mapping. Only flush if caching mode */ 1503 if (cap_caching_mode(iommu->cap)) 1504 iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1); 1505 else 1506 iommu_flush_write_buffer(iommu); 1507} 1508 1509static void iommu_flush_iova(struct iova_domain *iovad) 1510{ 1511 struct dmar_domain *domain; 1512 int idx; 1513 1514 domain = container_of(iovad, struct dmar_domain, iovad); 1515 1516 for_each_domain_iommu(idx, domain) { 1517 struct intel_iommu *iommu = g_iommus[idx]; 1518 u16 did = domain->iommu_did[iommu->seq_id]; 1519 1520 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); 1521 1522 if (!cap_caching_mode(iommu->cap)) 1523 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did), 1524 0, MAX_AGAW_PFN_WIDTH); 1525 } 1526} 1527 1528static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) 1529{ 1530 u32 pmen; 1531 unsigned long flags; 1532 1533 if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap)) 1534 return; 1535 1536 raw_spin_lock_irqsave(&iommu->register_lock, flags); 1537 pmen = readl(iommu->reg + DMAR_PMEN_REG); 1538 pmen &= ~DMA_PMEN_EPM; 1539 writel(pmen, iommu->reg + DMAR_PMEN_REG); 1540 1541 /* wait for the protected region status bit to clear */ 1542 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG, 1543 readl, !(pmen & DMA_PMEN_PRS), pmen); 1544 1545 raw_spin_unlock_irqrestore(&iommu->register_lock, flags); 1546} 1547 1548static void iommu_enable_translation(struct intel_iommu *iommu) 1549{ 1550 u32 sts; 1551 unsigned long flags; 1552 1553 raw_spin_lock_irqsave(&iommu->register_lock, flags); 1554 iommu->gcmd |= DMA_GCMD_TE; 1555 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); 1556 1557 /* Make sure hardware complete it */ 1558 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 1559 readl, (sts & DMA_GSTS_TES), sts); 1560 1561 raw_spin_unlock_irqrestore(&iommu->register_lock, flags); 1562} 1563 1564static void iommu_disable_translation(struct intel_iommu *iommu) 1565{ 1566 u32 sts; 1567 unsigned long flag; 1568 1569 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1570 iommu->gcmd &= ~DMA_GCMD_TE; 1571 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); 1572 1573 /* Make sure hardware complete it */ 1574 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 1575 readl, (!(sts & DMA_GSTS_TES)), sts); 1576 1577 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 1578} 1579 1580 1581static int iommu_init_domains(struct intel_iommu *iommu) 1582{ 1583 u32 ndomains, nlongs; 1584 size_t size; 1585 1586 ndomains = cap_ndoms(iommu->cap); 1587 pr_debug("%s: Number of Domains supported <%d>\n", 1588 iommu->name, ndomains); 1589 nlongs = BITS_TO_LONGS(ndomains); 1590 1591 spin_lock_init(&iommu->lock); 1592 1593 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL); 1594 if (!iommu->domain_ids) { 1595 pr_err("%s: Allocating domain id array failed\n", 1596 iommu->name); 1597 return -ENOMEM; 1598 } 1599 1600 size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **); 1601 iommu->domains = kzalloc(size, GFP_KERNEL); 1602 1603 if (iommu->domains) { 1604 size = 256 * sizeof(struct dmar_domain *); 1605 iommu->domains[0] = kzalloc(size, GFP_KERNEL); 1606 } 1607 1608 if (!iommu->domains || !iommu->domains[0]) { 1609 pr_err("%s: Allocating domain array failed\n", 1610 iommu->name); 1611 kfree(iommu->domain_ids); 1612 kfree(iommu->domains); 1613 iommu->domain_ids = NULL; 1614 iommu->domains = NULL; 1615 return -ENOMEM; 1616 } 1617 1618 1619 1620 /* 1621 * If Caching mode is set, then invalid translations are tagged 1622 * with domain-id 0, hence we need to pre-allocate it. We also 1623 * use domain-id 0 as a marker for non-allocated domain-id, so 1624 * make sure it is not used for a real domain. 1625 */ 1626 set_bit(0, iommu->domain_ids); 1627 1628 /* 1629 * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid 1630 * entry for first-level or pass-through translation modes should 1631 * be programmed with a domain id different from those used for 1632 * second-level or nested translation. We reserve a domain id for 1633 * this purpose. 1634 */ 1635 if (sm_supported(iommu)) 1636 set_bit(FLPT_DEFAULT_DID, iommu->domain_ids); 1637 1638 return 0; 1639} 1640 1641static void disable_dmar_iommu(struct intel_iommu *iommu) 1642{ 1643 struct device_domain_info *info, *tmp; 1644 unsigned long flags; 1645 1646 if (!iommu->domains || !iommu->domain_ids) 1647 return; 1648 1649again: 1650 spin_lock_irqsave(&device_domain_lock, flags); 1651 list_for_each_entry_safe(info, tmp, &device_domain_list, global) { 1652 struct dmar_domain *domain; 1653 1654 if (info->iommu != iommu) 1655 continue; 1656 1657 if (!info->dev || !info->domain) 1658 continue; 1659 1660 domain = info->domain; 1661 1662 __dmar_remove_one_dev_info(info); 1663 1664 if (!domain_type_is_vm_or_si(domain)) { 1665 /* 1666 * The domain_exit() function can't be called under 1667 * device_domain_lock, as it takes this lock itself. 1668 * So release the lock here and re-run the loop 1669 * afterwards. 1670 */ 1671 spin_unlock_irqrestore(&device_domain_lock, flags); 1672 domain_exit(domain); 1673 goto again; 1674 } 1675 } 1676 spin_unlock_irqrestore(&device_domain_lock, flags); 1677 1678 if (iommu->gcmd & DMA_GCMD_TE) 1679 iommu_disable_translation(iommu); 1680} 1681 1682static void free_dmar_iommu(struct intel_iommu *iommu) 1683{ 1684 if ((iommu->domains) && (iommu->domain_ids)) { 1685 int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8; 1686 int i; 1687 1688 for (i = 0; i < elems; i++) 1689 kfree(iommu->domains[i]); 1690 kfree(iommu->domains); 1691 kfree(iommu->domain_ids); 1692 iommu->domains = NULL; 1693 iommu->domain_ids = NULL; 1694 } 1695 1696 g_iommus[iommu->seq_id] = NULL; 1697 1698 /* free context mapping */ 1699 free_context_table(iommu); 1700 1701#ifdef CONFIG_INTEL_IOMMU_SVM 1702 if (pasid_supported(iommu)) { 1703 if (ecap_prs(iommu->ecap)) 1704 intel_svm_finish_prq(iommu); 1705 } 1706#endif 1707} 1708 1709static struct dmar_domain *alloc_domain(int flags) 1710{ 1711 struct dmar_domain *domain; 1712 1713 domain = alloc_domain_mem(); 1714 if (!domain) 1715 return NULL; 1716 1717 memset(domain, 0, sizeof(*domain)); 1718 domain->nid = NUMA_NO_NODE; 1719 domain->flags = flags; 1720 domain->has_iotlb_device = false; 1721 INIT_LIST_HEAD(&domain->devices); 1722 1723 return domain; 1724} 1725 1726/* Must be called with iommu->lock */ 1727static int domain_attach_iommu(struct dmar_domain *domain, 1728 struct intel_iommu *iommu) 1729{ 1730 unsigned long ndomains; 1731 int num; 1732 1733 assert_spin_locked(&device_domain_lock); 1734 assert_spin_locked(&iommu->lock); 1735 1736 domain->iommu_refcnt[iommu->seq_id] += 1; 1737 domain->iommu_count += 1; 1738 if (domain->iommu_refcnt[iommu->seq_id] == 1) { 1739 ndomains = cap_ndoms(iommu->cap); 1740 num = find_first_zero_bit(iommu->domain_ids, ndomains); 1741 1742 if (num >= ndomains) { 1743 pr_err("%s: No free domain ids\n", iommu->name); 1744 domain->iommu_refcnt[iommu->seq_id] -= 1; 1745 domain->iommu_count -= 1; 1746 return -ENOSPC; 1747 } 1748 1749 set_bit(num, iommu->domain_ids); 1750 set_iommu_domain(iommu, num, domain); 1751 1752 domain->iommu_did[iommu->seq_id] = num; 1753 domain->nid = iommu->node; 1754 1755 domain_update_iommu_cap(domain); 1756 } 1757 1758 return 0; 1759} 1760 1761static int domain_detach_iommu(struct dmar_domain *domain, 1762 struct intel_iommu *iommu) 1763{ 1764 int num, count; 1765 1766 assert_spin_locked(&device_domain_lock); 1767 assert_spin_locked(&iommu->lock); 1768 1769 domain->iommu_refcnt[iommu->seq_id] -= 1; 1770 count = --domain->iommu_count; 1771 if (domain->iommu_refcnt[iommu->seq_id] == 0) { 1772 num = domain->iommu_did[iommu->seq_id]; 1773 clear_bit(num, iommu->domain_ids); 1774 set_iommu_domain(iommu, num, NULL); 1775 1776 domain_update_iommu_cap(domain); 1777 domain->iommu_did[iommu->seq_id] = 0; 1778 } 1779 1780 return count; 1781} 1782 1783static struct iova_domain reserved_iova_list; 1784static struct lock_class_key reserved_rbtree_key; 1785 1786static int dmar_init_reserved_ranges(void) 1787{ 1788 struct pci_dev *pdev = NULL; 1789 struct iova *iova; 1790 int i; 1791 1792 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN); 1793 1794 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock, 1795 &reserved_rbtree_key); 1796 1797 /* IOAPIC ranges shouldn't be accessed by DMA */ 1798 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START), 1799 IOVA_PFN(IOAPIC_RANGE_END)); 1800 if (!iova) { 1801 pr_err("Reserve IOAPIC range failed\n"); 1802 return -ENODEV; 1803 } 1804 1805 /* Reserve all PCI MMIO to avoid peer-to-peer access */ 1806 for_each_pci_dev(pdev) { 1807 struct resource *r; 1808 1809 for (i = 0; i < PCI_NUM_RESOURCES; i++) { 1810 r = &pdev->resource[i]; 1811 if (!r->flags || !(r->flags & IORESOURCE_MEM)) 1812 continue; 1813 iova = reserve_iova(&reserved_iova_list, 1814 IOVA_PFN(r->start), 1815 IOVA_PFN(r->end)); 1816 if (!iova) { 1817 pci_err(pdev, "Reserve iova for %pR failed\n", r); 1818 return -ENODEV; 1819 } 1820 } 1821 } 1822 return 0; 1823} 1824 1825static void domain_reserve_special_ranges(struct dmar_domain *domain) 1826{ 1827 copy_reserved_iova(&reserved_iova_list, &domain->iovad); 1828} 1829 1830static inline int guestwidth_to_adjustwidth(int gaw) 1831{ 1832 int agaw; 1833 int r = (gaw - 12) % 9; 1834 1835 if (r == 0) 1836 agaw = gaw; 1837 else 1838 agaw = gaw + 9 - r; 1839 if (agaw > 64) 1840 agaw = 64; 1841 return agaw; 1842} 1843 1844static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu, 1845 int guest_width) 1846{ 1847 int adjust_width, agaw; 1848 unsigned long sagaw; 1849 int err; 1850 1851 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); 1852 1853 err = init_iova_flush_queue(&domain->iovad, 1854 iommu_flush_iova, iova_entry_free); 1855 if (err) 1856 return err; 1857 1858 domain_reserve_special_ranges(domain); 1859 1860 /* calculate AGAW */ 1861 if (guest_width > cap_mgaw(iommu->cap)) 1862 guest_width = cap_mgaw(iommu->cap); 1863 domain->gaw = guest_width; 1864 adjust_width = guestwidth_to_adjustwidth(guest_width); 1865 agaw = width_to_agaw(adjust_width); 1866 sagaw = cap_sagaw(iommu->cap); 1867 if (!test_bit(agaw, &sagaw)) { 1868 /* hardware doesn't support it, choose a bigger one */ 1869 pr_debug("Hardware doesn't support agaw %d\n", agaw); 1870 agaw = find_next_bit(&sagaw, 5, agaw); 1871 if (agaw >= 5) 1872 return -ENODEV; 1873 } 1874 domain->agaw = agaw; 1875 1876 if (ecap_coherent(iommu->ecap)) 1877 domain->iommu_coherency = 1; 1878 else 1879 domain->iommu_coherency = 0; 1880 1881 if (ecap_sc_support(iommu->ecap)) 1882 domain->iommu_snooping = 1; 1883 else 1884 domain->iommu_snooping = 0; 1885 1886 if (intel_iommu_superpage) 1887 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap)); 1888 else 1889 domain->iommu_superpage = 0; 1890 1891 domain->nid = iommu->node; 1892 1893 /* always allocate the top pgd */ 1894 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); 1895 if (!domain->pgd) 1896 return -ENOMEM; 1897 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE); 1898 return 0; 1899} 1900 1901static void domain_exit(struct dmar_domain *domain) 1902{ 1903 struct page *freelist; 1904 1905 /* Remove associated devices and clear attached or cached domains */ 1906 rcu_read_lock(); 1907 domain_remove_dev_info(domain); 1908 rcu_read_unlock(); 1909 1910 /* destroy iovas */ 1911 put_iova_domain(&domain->iovad); 1912 1913 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); 1914 1915 dma_free_pagelist(freelist); 1916 1917 free_domain_mem(domain); 1918} 1919 1920/* 1921 * Get the PASID directory size for scalable mode context entry. 1922 * Value of X in the PDTS field of a scalable mode context entry 1923 * indicates PASID directory with 2^(X + 7) entries. 1924 */ 1925static inline unsigned long context_get_sm_pds(struct pasid_table *table) 1926{ 1927 int pds, max_pde; 1928 1929 max_pde = table->max_pasid >> PASID_PDE_SHIFT; 1930 pds = find_first_bit((unsigned long *)&max_pde, MAX_NR_PASID_BITS); 1931 if (pds < 7) 1932 return 0; 1933 1934 return pds - 7; 1935} 1936 1937/* 1938 * Set the RID_PASID field of a scalable mode context entry. The 1939 * IOMMU hardware will use the PASID value set in this field for 1940 * DMA translations of DMA requests without PASID. 1941 */ 1942static inline void 1943context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid) 1944{ 1945 context->hi |= pasid & ((1 << 20) - 1); 1946 context->hi |= (1 << 20); 1947} 1948 1949/* 1950 * Set the DTE(Device-TLB Enable) field of a scalable mode context 1951 * entry. 1952 */ 1953static inline void context_set_sm_dte(struct context_entry *context) 1954{ 1955 context->lo |= (1 << 2); 1956} 1957 1958/* 1959 * Set the PRE(Page Request Enable) field of a scalable mode context 1960 * entry. 1961 */ 1962static inline void context_set_sm_pre(struct context_entry *context) 1963{ 1964 context->lo |= (1 << 4); 1965} 1966 1967/* Convert value to context PASID directory size field coding. */ 1968#define context_pdts(pds) (((pds) & 0x7) << 9) 1969 1970static int domain_context_mapping_one(struct dmar_domain *domain, 1971 struct intel_iommu *iommu, 1972 struct pasid_table *table, 1973 u8 bus, u8 devfn) 1974{ 1975 u16 did = domain->iommu_did[iommu->seq_id]; 1976 int translation = CONTEXT_TT_MULTI_LEVEL; 1977 struct device_domain_info *info = NULL; 1978 struct context_entry *context; 1979 unsigned long flags; 1980 int ret; 1981 1982 WARN_ON(did == 0); 1983 1984 if (hw_pass_through && domain_type_is_si(domain)) 1985 translation = CONTEXT_TT_PASS_THROUGH; 1986 1987 pr_debug("Set context mapping for %02x:%02x.%d\n", 1988 bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); 1989 1990 BUG_ON(!domain->pgd); 1991 1992 spin_lock_irqsave(&device_domain_lock, flags); 1993 spin_lock(&iommu->lock); 1994 1995 ret = -ENOMEM; 1996 context = iommu_context_addr(iommu, bus, devfn, 1); 1997 if (!context) 1998 goto out_unlock; 1999 2000 ret = 0; 2001 if (context_present(context)) 2002 goto out_unlock; 2003 2004 /* 2005 * For kdump cases, old valid entries may be cached due to the 2006 * in-flight DMA and copied pgtable, but there is no unmapping 2007 * behaviour for them, thus we need an explicit cache flush for 2008 * the newly-mapped device. For kdump, at this point, the device 2009 * is supposed to finish reset at its driver probe stage, so no 2010 * in-flight DMA will exist, and we don't need to worry anymore 2011 * hereafter. 2012 */ 2013 if (context_copied(context)) { 2014 u16 did_old = context_domain_id(context); 2015 2016 if (did_old < cap_ndoms(iommu->cap)) { 2017 iommu->flush.flush_context(iommu, did_old, 2018 (((u16)bus) << 8) | devfn, 2019 DMA_CCMD_MASK_NOBIT, 2020 DMA_CCMD_DEVICE_INVL); 2021 iommu->flush.flush_iotlb(iommu, did_old, 0, 0, 2022 DMA_TLB_DSI_FLUSH); 2023 } 2024 } 2025 2026 context_clear_entry(context); 2027 2028 if (sm_supported(iommu)) { 2029 unsigned long pds; 2030 2031 WARN_ON(!table); 2032 2033 /* Setup the PASID DIR pointer: */ 2034 pds = context_get_sm_pds(table); 2035 context->lo = (u64)virt_to_phys(table->table) | 2036 context_pdts(pds); 2037 2038 /* Setup the RID_PASID field: */ 2039 context_set_sm_rid2pasid(context, PASID_RID2PASID); 2040 2041 /* 2042 * Setup the Device-TLB enable bit and Page request 2043 * Enable bit: 2044 */ 2045 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn); 2046 if (info && info->ats_supported) 2047 context_set_sm_dte(context); 2048 if (info && info->pri_supported) 2049 context_set_sm_pre(context); 2050 } else { 2051 struct dma_pte *pgd = domain->pgd; 2052 int agaw; 2053 2054 context_set_domain_id(context, did); 2055 2056 if (translation != CONTEXT_TT_PASS_THROUGH) { 2057 /* 2058 * Skip top levels of page tables for iommu which has 2059 * less agaw than default. Unnecessary for PT mode. 2060 */ 2061 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { 2062 ret = -ENOMEM; 2063 pgd = phys_to_virt(dma_pte_addr(pgd)); 2064 if (!dma_pte_present(pgd)) 2065 goto out_unlock; 2066 } 2067 2068 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn); 2069 if (info && info->ats_supported) 2070 translation = CONTEXT_TT_DEV_IOTLB; 2071 else 2072 translation = CONTEXT_TT_MULTI_LEVEL; 2073 2074 context_set_address_root(context, virt_to_phys(pgd)); 2075 context_set_address_width(context, agaw); 2076 } else { 2077 /* 2078 * In pass through mode, AW must be programmed to 2079 * indicate the largest AGAW value supported by 2080 * hardware. And ASR is ignored by hardware. 2081 */ 2082 context_set_address_width(context, iommu->msagaw); 2083 } 2084 2085 context_set_translation_type(context, translation); 2086 } 2087 2088 context_set_fault_enable(context); 2089 context_set_present(context); 2090 domain_flush_cache(domain, context, sizeof(*context)); 2091 2092 /* 2093 * It's a non-present to present mapping. If hardware doesn't cache 2094 * non-present entry we only need to flush the write-buffer. If the 2095 * _does_ cache non-present entries, then it does so in the special 2096 * domain #0, which we have to flush: 2097 */ 2098 if (cap_caching_mode(iommu->cap)) { 2099 iommu->flush.flush_context(iommu, 0, 2100 (((u16)bus) << 8) | devfn, 2101 DMA_CCMD_MASK_NOBIT, 2102 DMA_CCMD_DEVICE_INVL); 2103 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); 2104 } else { 2105 iommu_flush_write_buffer(iommu); 2106 } 2107 iommu_enable_dev_iotlb(info); 2108 2109 ret = 0; 2110 2111out_unlock: 2112 spin_unlock(&iommu->lock); 2113 spin_unlock_irqrestore(&device_domain_lock, flags); 2114 2115 return ret; 2116} 2117 2118struct domain_context_mapping_data { 2119 struct dmar_domain *domain; 2120 struct intel_iommu *iommu; 2121 struct pasid_table *table; 2122}; 2123 2124static int domain_context_mapping_cb(struct pci_dev *pdev, 2125 u16 alias, void *opaque) 2126{ 2127 struct domain_context_mapping_data *data = opaque; 2128 2129 return domain_context_mapping_one(data->domain, data->iommu, 2130 data->table, PCI_BUS_NUM(alias), 2131 alias & 0xff); 2132} 2133 2134static int 2135domain_context_mapping(struct dmar_domain *domain, struct device *dev) 2136{ 2137 struct domain_context_mapping_data data; 2138 struct pasid_table *table; 2139 struct intel_iommu *iommu; 2140 u8 bus, devfn; 2141 2142 iommu = device_to_iommu(dev, &bus, &devfn); 2143 if (!iommu) 2144 return -ENODEV; 2145 2146 table = intel_pasid_get_table(dev); 2147 2148 if (!dev_is_pci(dev)) 2149 return domain_context_mapping_one(domain, iommu, table, 2150 bus, devfn); 2151 2152 data.domain = domain; 2153 data.iommu = iommu; 2154 data.table = table; 2155 2156 return pci_for_each_dma_alias(to_pci_dev(dev), 2157 &domain_context_mapping_cb, &data); 2158} 2159 2160static int domain_context_mapped_cb(struct pci_dev *pdev, 2161 u16 alias, void *opaque) 2162{ 2163 struct intel_iommu *iommu = opaque; 2164 2165 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff); 2166} 2167 2168static int domain_context_mapped(struct device *dev) 2169{ 2170 struct intel_iommu *iommu; 2171 u8 bus, devfn; 2172 2173 iommu = device_to_iommu(dev, &bus, &devfn); 2174 if (!iommu) 2175 return -ENODEV; 2176 2177 if (!dev_is_pci(dev)) 2178 return device_context_mapped(iommu, bus, devfn); 2179 2180 return !pci_for_each_dma_alias(to_pci_dev(dev), 2181 domain_context_mapped_cb, iommu); 2182} 2183 2184/* Returns a number of VTD pages, but aligned to MM page size */ 2185static inline unsigned long aligned_nrpages(unsigned long host_addr, 2186 size_t size) 2187{ 2188 host_addr &= ~PAGE_MASK; 2189 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT; 2190} 2191 2192/* Return largest possible superpage level for a given mapping */ 2193static inline int hardware_largepage_caps(struct dmar_domain *domain, 2194 unsigned long iov_pfn, 2195 unsigned long phy_pfn, 2196 unsigned long pages) 2197{ 2198 int support, level = 1; 2199 unsigned long pfnmerge; 2200 2201 support = domain->iommu_superpage; 2202 2203 /* To use a large page, the virtual *and* physical addresses 2204 must be aligned to 2MiB/1GiB/etc. Lower bits set in either 2205 of them will mean we have to use smaller pages. So just 2206 merge them and check both at once. */ 2207 pfnmerge = iov_pfn | phy_pfn; 2208 2209 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) { 2210 pages >>= VTD_STRIDE_SHIFT; 2211 if (!pages) 2212 break; 2213 pfnmerge >>= VTD_STRIDE_SHIFT; 2214 level++; 2215 support--; 2216 } 2217 return level; 2218} 2219 2220static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, 2221 struct scatterlist *sg, unsigned long phys_pfn, 2222 unsigned long nr_pages, int prot) 2223{ 2224 struct dma_pte *first_pte = NULL, *pte = NULL; 2225 phys_addr_t uninitialized_var(pteval); 2226 unsigned long sg_res = 0; 2227 unsigned int largepage_lvl = 0; 2228 unsigned long lvl_pages = 0; 2229 2230 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1)); 2231 2232 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) 2233 return -EINVAL; 2234 2235 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP; 2236 2237 if (!sg) { 2238 sg_res = nr_pages; 2239 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot; 2240 } 2241 2242 while (nr_pages > 0) { 2243 uint64_t tmp; 2244 2245 if (!sg_res) { 2246 unsigned int pgoff = sg->offset & ~PAGE_MASK; 2247 2248 sg_res = aligned_nrpages(sg->offset, sg->length); 2249 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff; 2250 sg->dma_length = sg->length; 2251 pteval = (sg_phys(sg) - pgoff) | prot; 2252 phys_pfn = pteval >> VTD_PAGE_SHIFT; 2253 } 2254 2255 if (!pte) { 2256 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res); 2257 2258 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl); 2259 if (!pte) 2260 return -ENOMEM; 2261 /* It is large page*/ 2262 if (largepage_lvl > 1) { 2263 unsigned long nr_superpages, end_pfn; 2264 2265 pteval |= DMA_PTE_LARGE_PAGE; 2266 lvl_pages = lvl_to_nr_pages(largepage_lvl); 2267 2268 nr_superpages = sg_res / lvl_pages; 2269 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1; 2270 2271 /* 2272 * Ensure that old small page tables are 2273 * removed to make room for superpage(s). 2274 * We're adding new large pages, so make sure 2275 * we don't remove their parent tables. 2276 */ 2277 dma_pte_free_pagetable(domain, iov_pfn, end_pfn, 2278 largepage_lvl + 1); 2279 } else { 2280 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE; 2281 } 2282 2283 } 2284 /* We don't need lock here, nobody else 2285 * touches the iova range 2286 */ 2287 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval); 2288 if (tmp) { 2289 static int dumps = 5; 2290 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n", 2291 iov_pfn, tmp, (unsigned long long)pteval); 2292 if (dumps) { 2293 dumps--; 2294 debug_dma_dump_mappings(NULL); 2295 } 2296 WARN_ON(1); 2297 } 2298 2299 lvl_pages = lvl_to_nr_pages(largepage_lvl); 2300 2301 BUG_ON(nr_pages < lvl_pages); 2302 BUG_ON(sg_res < lvl_pages); 2303 2304 nr_pages -= lvl_pages; 2305 iov_pfn += lvl_pages; 2306 phys_pfn += lvl_pages; 2307 pteval += lvl_pages * VTD_PAGE_SIZE; 2308 sg_res -= lvl_pages; 2309 2310 /* If the next PTE would be the first in a new page, then we 2311 need to flush the cache on the entries we've just written. 2312 And then we'll need to recalculate 'pte', so clear it and 2313 let it get set again in the if (!pte) block above. 2314 2315 If we're done (!nr_pages) we need to flush the cache too. 2316 2317 Also if we've been setting superpages, we may need to 2318 recalculate 'pte' and switch back to smaller pages for the 2319 end of the mapping, if the trailing size is not enough to 2320 use another superpage (i.e. sg_res < lvl_pages). */ 2321 pte++; 2322 if (!nr_pages || first_pte_in_page(pte) || 2323 (largepage_lvl > 1 && sg_res < lvl_pages)) { 2324 domain_flush_cache(domain, first_pte, 2325 (void *)pte - (void *)first_pte); 2326 pte = NULL; 2327 } 2328 2329 if (!sg_res && nr_pages) 2330 sg = sg_next(sg); 2331 } 2332 return 0; 2333} 2334 2335static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, 2336 struct scatterlist *sg, unsigned long phys_pfn, 2337 unsigned long nr_pages, int prot) 2338{ 2339 int ret; 2340 struct intel_iommu *iommu; 2341 2342 /* Do the real mapping first */ 2343 ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot); 2344 if (ret) 2345 return ret; 2346 2347 /* Notify about the new mapping */ 2348 if (domain_type_is_vm(domain)) { 2349 /* VM typed domains can have more than one IOMMUs */ 2350 int iommu_id; 2351 2352 for_each_domain_iommu(iommu_id, domain) { 2353 iommu = g_iommus[iommu_id]; 2354 __mapping_notify_one(iommu, domain, iov_pfn, nr_pages); 2355 } 2356 } else { 2357 /* General domains only have one IOMMU */ 2358 iommu = domain_get_iommu(domain); 2359 __mapping_notify_one(iommu, domain, iov_pfn, nr_pages); 2360 } 2361 2362 return 0; 2363} 2364 2365static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn, 2366 struct scatterlist *sg, unsigned long nr_pages, 2367 int prot) 2368{ 2369 return domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot); 2370} 2371 2372static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn, 2373 unsigned long phys_pfn, unsigned long nr_pages, 2374 int prot) 2375{ 2376 return domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot); 2377} 2378 2379static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn) 2380{ 2381 unsigned long flags; 2382 struct context_entry *context; 2383 u16 did_old; 2384 2385 if (!iommu) 2386 return; 2387 2388 spin_lock_irqsave(&iommu->lock, flags); 2389 context = iommu_context_addr(iommu, bus, devfn, 0); 2390 if (!context) { 2391 spin_unlock_irqrestore(&iommu->lock, flags); 2392 return; 2393 } 2394 did_old = context_domain_id(context); 2395 context_clear_entry(context); 2396 __iommu_flush_cache(iommu, context, sizeof(*context)); 2397 spin_unlock_irqrestore(&iommu->lock, flags); 2398 iommu->flush.flush_context(iommu, 2399 did_old, 2400 (((u16)bus) << 8) | devfn, 2401 DMA_CCMD_MASK_NOBIT, 2402 DMA_CCMD_DEVICE_INVL); 2403 iommu->flush.flush_iotlb(iommu, 2404 did_old, 2405 0, 2406 0, 2407 DMA_TLB_DSI_FLUSH); 2408} 2409 2410static inline void unlink_domain_info(struct device_domain_info *info) 2411{ 2412 assert_spin_locked(&device_domain_lock); 2413 list_del(&info->link); 2414 list_del(&info->global); 2415 if (info->dev) 2416 info->dev->archdata.iommu = NULL; 2417} 2418 2419static void domain_remove_dev_info(struct dmar_domain *domain) 2420{ 2421 struct device_domain_info *info, *tmp; 2422 unsigned long flags; 2423 2424 spin_lock_irqsave(&device_domain_lock, flags); 2425 list_for_each_entry_safe(info, tmp, &domain->devices, link) 2426 __dmar_remove_one_dev_info(info); 2427 spin_unlock_irqrestore(&device_domain_lock, flags); 2428} 2429 2430/* 2431 * find_domain 2432 * Note: we use struct device->archdata.iommu stores the info 2433 */ 2434static struct dmar_domain *find_domain(struct device *dev) 2435{ 2436 struct device_domain_info *info; 2437 2438 /* No lock here, assumes no domain exit in normal case */ 2439 info = dev->archdata.iommu; 2440 if (likely(info)) 2441 return info->domain; 2442 return NULL; 2443} 2444 2445static inline struct device_domain_info * 2446dmar_search_domain_by_dev_info(int segment, int bus, int devfn) 2447{ 2448 struct device_domain_info *info; 2449 2450 list_for_each_entry(info, &device_domain_list, global) 2451 if (info->iommu->segment == segment && info->bus == bus && 2452 info->devfn == devfn) 2453 return info; 2454 2455 return NULL; 2456} 2457 2458static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, 2459 int bus, int devfn, 2460 struct device *dev, 2461 struct dmar_domain *domain) 2462{ 2463 struct dmar_domain *found = NULL; 2464 struct device_domain_info *info; 2465 unsigned long flags; 2466 int ret; 2467 2468 info = alloc_devinfo_mem(); 2469 if (!info) 2470 return NULL; 2471 2472 info->bus = bus; 2473 info->devfn = devfn; 2474 info->ats_supported = info->pasid_supported = info->pri_supported = 0; 2475 info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0; 2476 info->ats_qdep = 0; 2477 info->dev = dev; 2478 info->domain = domain; 2479 info->iommu = iommu; 2480 info->pasid_table = NULL; 2481 info->auxd_enabled = 0; 2482 INIT_LIST_HEAD(&info->auxiliary_domains); 2483 2484 if (dev && dev_is_pci(dev)) { 2485 struct pci_dev *pdev = to_pci_dev(info->dev); 2486 2487 if (!pdev->untrusted && 2488 !pci_ats_disabled() && 2489 ecap_dev_iotlb_support(iommu->ecap) && 2490 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) && 2491 dmar_find_matched_atsr_unit(pdev)) 2492 info->ats_supported = 1; 2493 2494 if (sm_supported(iommu)) { 2495 if (pasid_supported(iommu)) { 2496 int features = pci_pasid_features(pdev); 2497 if (features >= 0) 2498 info->pasid_supported = features | 1; 2499 } 2500 2501 if (info->ats_supported && ecap_prs(iommu->ecap) && 2502 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI)) 2503 info->pri_supported = 1; 2504 } 2505 } 2506 2507 spin_lock_irqsave(&device_domain_lock, flags); 2508 if (dev) 2509 found = find_domain(dev); 2510 2511 if (!found) { 2512 struct device_domain_info *info2; 2513 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn); 2514 if (info2) { 2515 found = info2->domain; 2516 info2->dev = dev; 2517 } 2518 } 2519 2520 if (found) { 2521 spin_unlock_irqrestore(&device_domain_lock, flags); 2522 free_devinfo_mem(info); 2523 /* Caller must free the original domain */ 2524 return found; 2525 } 2526 2527 spin_lock(&iommu->lock); 2528 ret = domain_attach_iommu(domain, iommu); 2529 spin_unlock(&iommu->lock); 2530 2531 if (ret) { 2532 spin_unlock_irqrestore(&device_domain_lock, flags); 2533 free_devinfo_mem(info); 2534 return NULL; 2535 } 2536 2537 list_add(&info->link, &domain->devices); 2538 list_add(&info->global, &device_domain_list); 2539 if (dev) 2540 dev->archdata.iommu = info; 2541 spin_unlock_irqrestore(&device_domain_lock, flags); 2542 2543 /* PASID table is mandatory for a PCI device in scalable mode. */ 2544 if (dev && dev_is_pci(dev) && sm_supported(iommu)) { 2545 ret = intel_pasid_alloc_table(dev); 2546 if (ret) { 2547 dev_err(dev, "PASID table allocation failed\n"); 2548 dmar_remove_one_dev_info(dev); 2549 return NULL; 2550 } 2551 2552 /* Setup the PASID entry for requests without PASID: */ 2553 spin_lock(&iommu->lock); 2554 if (hw_pass_through && domain_type_is_si(domain)) 2555 ret = intel_pasid_setup_pass_through(iommu, domain, 2556 dev, PASID_RID2PASID); 2557 else 2558 ret = intel_pasid_setup_second_level(iommu, domain, 2559 dev, PASID_RID2PASID); 2560 spin_unlock(&iommu->lock); 2561 if (ret) { 2562 dev_err(dev, "Setup RID2PASID failed\n"); 2563 dmar_remove_one_dev_info(dev); 2564 return NULL; 2565 } 2566 } 2567 2568 if (dev && domain_context_mapping(domain, dev)) { 2569 dev_err(dev, "Domain context map failed\n"); 2570 dmar_remove_one_dev_info(dev); 2571 return NULL; 2572 } 2573 2574 return domain; 2575} 2576 2577static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque) 2578{ 2579 *(u16 *)opaque = alias; 2580 return 0; 2581} 2582 2583static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw) 2584{ 2585 struct device_domain_info *info; 2586 struct dmar_domain *domain = NULL; 2587 struct intel_iommu *iommu; 2588 u16 dma_alias; 2589 unsigned long flags; 2590 u8 bus, devfn; 2591 2592 iommu = device_to_iommu(dev, &bus, &devfn); 2593 if (!iommu) 2594 return NULL; 2595 2596 if (dev_is_pci(dev)) { 2597 struct pci_dev *pdev = to_pci_dev(dev); 2598 2599 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias); 2600 2601 spin_lock_irqsave(&device_domain_lock, flags); 2602 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus), 2603 PCI_BUS_NUM(dma_alias), 2604 dma_alias & 0xff); 2605 if (info) { 2606 iommu = info->iommu; 2607 domain = info->domain; 2608 } 2609 spin_unlock_irqrestore(&device_domain_lock, flags); 2610 2611 /* DMA alias already has a domain, use it */ 2612 if (info) 2613 goto out; 2614 } 2615 2616 /* Allocate and initialize new domain for the device */ 2617 domain = alloc_domain(0); 2618 if (!domain) 2619 return NULL; 2620 if (domain_init(domain, iommu, gaw)) { 2621 domain_exit(domain); 2622 return NULL; 2623 } 2624 2625out: 2626 2627 return domain; 2628} 2629 2630static struct dmar_domain *set_domain_for_dev(struct device *dev, 2631 struct dmar_domain *domain) 2632{ 2633 struct intel_iommu *iommu; 2634 struct dmar_domain *tmp; 2635 u16 req_id, dma_alias; 2636 u8 bus, devfn; 2637 2638 iommu = device_to_iommu(dev, &bus, &devfn); 2639 if (!iommu) 2640 return NULL; 2641 2642 req_id = ((u16)bus << 8) | devfn; 2643 2644 if (dev_is_pci(dev)) { 2645 struct pci_dev *pdev = to_pci_dev(dev); 2646 2647 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias); 2648 2649 /* register PCI DMA alias device */ 2650 if (req_id != dma_alias) { 2651 tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias), 2652 dma_alias & 0xff, NULL, domain); 2653 2654 if (!tmp || tmp != domain) 2655 return tmp; 2656 } 2657 } 2658 2659 tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain); 2660 if (!tmp || tmp != domain) 2661 return tmp; 2662 2663 return domain; 2664} 2665 2666static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw) 2667{ 2668 struct dmar_domain *domain, *tmp; 2669 2670 domain = find_domain(dev); 2671 if (domain) 2672 goto out; 2673 2674 domain = find_or_alloc_domain(dev, gaw); 2675 if (!domain) 2676 goto out; 2677 2678 tmp = set_domain_for_dev(dev, domain); 2679 if (!tmp || domain != tmp) { 2680 domain_exit(domain); 2681 domain = tmp; 2682 } 2683 2684out: 2685 2686 return domain; 2687} 2688 2689static int iommu_domain_identity_map(struct dmar_domain *domain, 2690 unsigned long long start, 2691 unsigned long long end) 2692{ 2693 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT; 2694 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT; 2695 2696 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn), 2697 dma_to_mm_pfn(last_vpfn))) { 2698 pr_err("Reserving iova failed\n"); 2699 return -ENOMEM; 2700 } 2701 2702 pr_debug("Mapping reserved region %llx-%llx\n", start, end); 2703 /* 2704 * RMRR range might have overlap with physical memory range, 2705 * clear it first 2706 */ 2707 dma_pte_clear_range(domain, first_vpfn, last_vpfn); 2708 2709 return __domain_mapping(domain, first_vpfn, NULL, 2710 first_vpfn, last_vpfn - first_vpfn + 1, 2711 DMA_PTE_READ|DMA_PTE_WRITE); 2712} 2713 2714static int domain_prepare_identity_map(struct device *dev, 2715 struct dmar_domain *domain, 2716 unsigned long long start, 2717 unsigned long long end) 2718{ 2719 /* For _hardware_ passthrough, don't bother. But for software 2720 passthrough, we do it anyway -- it may indicate a memory 2721 range which is reserved in E820, so which didn't get set 2722 up to start with in si_domain */ 2723 if (domain == si_domain && hw_pass_through) { 2724 dev_warn(dev, "Ignoring identity map for HW passthrough [0x%Lx - 0x%Lx]\n", 2725 start, end); 2726 return 0; 2727 } 2728 2729 dev_info(dev, "Setting identity map [0x%Lx - 0x%Lx]\n", start, end); 2730 2731 if (end < start) { 2732 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n" 2733 "BIOS vendor: %s; Ver: %s; Product Version: %s\n", 2734 dmi_get_system_info(DMI_BIOS_VENDOR), 2735 dmi_get_system_info(DMI_BIOS_VERSION), 2736 dmi_get_system_info(DMI_PRODUCT_VERSION)); 2737 return -EIO; 2738 } 2739 2740 if (end >> agaw_to_width(domain->agaw)) { 2741 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n" 2742 "BIOS vendor: %s; Ver: %s; Product Version: %s\n", 2743 agaw_to_width(domain->agaw), 2744 dmi_get_system_info(DMI_BIOS_VENDOR), 2745 dmi_get_system_info(DMI_BIOS_VERSION), 2746 dmi_get_system_info(DMI_PRODUCT_VERSION)); 2747 return -EIO; 2748 } 2749 2750 return iommu_domain_identity_map(domain, start, end); 2751} 2752 2753static int iommu_prepare_identity_map(struct device *dev, 2754 unsigned long long start, 2755 unsigned long long end) 2756{ 2757 struct dmar_domain *domain; 2758 int ret; 2759 2760 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); 2761 if (!domain) 2762 return -ENOMEM; 2763 2764 ret = domain_prepare_identity_map(dev, domain, start, end); 2765 if (ret) 2766 domain_exit(domain); 2767 2768 return ret; 2769} 2770 2771static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, 2772 struct device *dev) 2773{ 2774 if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) 2775 return 0; 2776 return iommu_prepare_identity_map(dev, rmrr->base_address, 2777 rmrr->end_address); 2778} 2779 2780#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA 2781static inline void iommu_prepare_isa(void) 2782{ 2783 struct pci_dev *pdev; 2784 int ret; 2785 2786 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL); 2787 if (!pdev) 2788 return; 2789 2790 pr_info("Prepare 0-16MiB unity mapping for LPC\n"); 2791 ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1); 2792 2793 if (ret) 2794 pr_err("Failed to create 0-16MiB identity map - floppy might not work\n"); 2795 2796 pci_dev_put(pdev); 2797} 2798#else 2799static inline void iommu_prepare_isa(void) 2800{ 2801 return; 2802} 2803#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */ 2804 2805static int md_domain_init(struct dmar_domain *domain, int guest_width); 2806 2807static int __init si_domain_init(int hw) 2808{ 2809 int nid, ret; 2810 2811 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY); 2812 if (!si_domain) 2813 return -EFAULT; 2814 2815 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { 2816 domain_exit(si_domain); 2817 return -EFAULT; 2818 } 2819 2820 pr_debug("Identity mapping domain allocated\n"); 2821 2822 if (hw) 2823 return 0; 2824 2825 for_each_online_node(nid) { 2826 unsigned long start_pfn, end_pfn; 2827 int i; 2828 2829 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) { 2830 ret = iommu_domain_identity_map(si_domain, 2831 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn)); 2832 if (ret) 2833 return ret; 2834 } 2835 } 2836 2837 return 0; 2838} 2839 2840static int identity_mapping(struct device *dev) 2841{ 2842 struct device_domain_info *info; 2843 2844 if (likely(!iommu_identity_mapping)) 2845 return 0; 2846 2847 info = dev->archdata.iommu; 2848 if (info && info != DUMMY_DEVICE_DOMAIN_INFO) 2849 return (info->domain == si_domain); 2850 2851 return 0; 2852} 2853 2854static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev) 2855{ 2856 struct dmar_domain *ndomain; 2857 struct intel_iommu *iommu; 2858 u8 bus, devfn; 2859 2860 iommu = device_to_iommu(dev, &bus, &devfn); 2861 if (!iommu) 2862 return -ENODEV; 2863 2864 ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain); 2865 if (ndomain != domain) 2866 return -EBUSY; 2867 2868 return 0; 2869} 2870 2871static bool device_has_rmrr(struct device *dev) 2872{ 2873 struct dmar_rmrr_unit *rmrr; 2874 struct device *tmp; 2875 int i; 2876 2877 rcu_read_lock(); 2878 for_each_rmrr_units(rmrr) { 2879 /* 2880 * Return TRUE if this RMRR contains the device that 2881 * is passed in. 2882 */ 2883 for_each_active_dev_scope(rmrr->devices, 2884 rmrr->devices_cnt, i, tmp) 2885 if (tmp == dev) { 2886 rcu_read_unlock(); 2887 return true; 2888 } 2889 } 2890 rcu_read_unlock(); 2891 return false; 2892} 2893 2894/* 2895 * There are a couple cases where we need to restrict the functionality of 2896 * devices associated with RMRRs. The first is when evaluating a device for 2897 * identity mapping because problems exist when devices are moved in and out 2898 * of domains and their respective RMRR information is lost. This means that 2899 * a device with associated RMRRs will never be in a "passthrough" domain. 2900 * The second is use of the device through the IOMMU API. This interface 2901 * expects to have full control of the IOVA space for the device. We cannot 2902 * satisfy both the requirement that RMRR access is maintained and have an 2903 * unencumbered IOVA space. We also have no ability to quiesce the device's 2904 * use of the RMRR space or even inform the IOMMU API user of the restriction. 2905 * We therefore prevent devices associated with an RMRR from participating in 2906 * the IOMMU API, which eliminates them from device assignment. 2907 * 2908 * In both cases we assume that PCI USB devices with RMRRs have them largely 2909 * for historical reasons and that the RMRR space is not actively used post 2910 * boot. This exclusion may change if vendors begin to abuse it. 2911 * 2912 * The same exception is made for graphics devices, with the requirement that 2913 * any use of the RMRR regions will be torn down before assigning the device 2914 * to a guest. 2915 */ 2916static bool device_is_rmrr_locked(struct device *dev) 2917{ 2918 if (!device_has_rmrr(dev)) 2919 return false; 2920 2921 if (dev_is_pci(dev)) { 2922 struct pci_dev *pdev = to_pci_dev(dev); 2923 2924 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev)) 2925 return false; 2926 } 2927 2928 return true; 2929} 2930 2931static int iommu_should_identity_map(struct device *dev, int startup) 2932{ 2933 if (dev_is_pci(dev)) { 2934 struct pci_dev *pdev = to_pci_dev(dev); 2935 2936 if (device_is_rmrr_locked(dev)) 2937 return 0; 2938 2939 /* 2940 * Prevent any device marked as untrusted from getting 2941 * placed into the statically identity mapping domain. 2942 */ 2943 if (pdev->untrusted) 2944 return 0; 2945 2946 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev)) 2947 return 1; 2948 2949 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev)) 2950 return 1; 2951 2952 if (!(iommu_identity_mapping & IDENTMAP_ALL)) 2953 return 0; 2954 2955 /* 2956 * We want to start off with all devices in the 1:1 domain, and 2957 * take them out later if we find they can't access all of memory. 2958 * 2959 * However, we can't do this for PCI devices behind bridges, 2960 * because all PCI devices behind the same bridge will end up 2961 * with the same source-id on their transactions. 2962 * 2963 * Practically speaking, we can't change things around for these 2964 * devices at run-time, because we can't be sure there'll be no 2965 * DMA transactions in flight for any of their siblings. 2966 * 2967 * So PCI devices (unless they're on the root bus) as well as 2968 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of 2969 * the 1:1 domain, just in _case_ one of their siblings turns out 2970 * not to be able to map all of memory. 2971 */ 2972 if (!pci_is_pcie(pdev)) { 2973 if (!pci_is_root_bus(pdev->bus)) 2974 return 0; 2975 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI) 2976 return 0; 2977 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE) 2978 return 0; 2979 } else { 2980 if (device_has_rmrr(dev)) 2981 return 0; 2982 } 2983 2984 /* 2985 * At boot time, we don't yet know if devices will be 64-bit capable. 2986 * Assume that they will — if they turn out not to be, then we can 2987 * take them out of the 1:1 domain later. 2988 */ 2989 if (!startup) { 2990 /* 2991 * If the device's dma_mask is less than the system's memory 2992 * size then this is not a candidate for identity mapping. 2993 */ 2994 u64 dma_mask = *dev->dma_mask; 2995 2996 if (dev->coherent_dma_mask && 2997 dev->coherent_dma_mask < dma_mask) 2998 dma_mask = dev->coherent_dma_mask; 2999 3000 return dma_mask >= dma_get_required_mask(dev); 3001 } 3002 3003 return 1; 3004} 3005 3006static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw) 3007{ 3008 int ret; 3009 3010 if (!iommu_should_identity_map(dev, 1)) 3011 return 0; 3012 3013 ret = domain_add_dev_info(si_domain, dev); 3014 if (!ret) 3015 dev_info(dev, "%s identity mapping\n", 3016 hw ? "Hardware" : "Software"); 3017 else if (ret == -ENODEV) 3018 /* device not associated with an iommu */ 3019 ret = 0; 3020 3021 return ret; 3022} 3023 3024 3025static int __init iommu_prepare_static_identity_mapping(int hw) 3026{ 3027 struct pci_dev *pdev = NULL; 3028 struct dmar_drhd_unit *drhd; 3029 /* To avoid a -Wunused-but-set-variable warning. */ 3030 struct intel_iommu *iommu __maybe_unused; 3031 struct device *dev; 3032 int i; 3033 int ret = 0; 3034 3035 for_each_pci_dev(pdev) { 3036 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw); 3037 if (ret) 3038 return ret; 3039 } 3040 3041 for_each_active_iommu(iommu, drhd) 3042 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) { 3043 struct acpi_device_physical_node *pn; 3044 struct acpi_device *adev; 3045 3046 if (dev->bus != &acpi_bus_type) 3047 continue; 3048 3049 adev= to_acpi_device(dev); 3050 mutex_lock(&adev->physical_node_lock); 3051 list_for_each_entry(pn, &adev->physical_node_list, node) { 3052 ret = dev_prepare_static_identity_mapping(pn->dev, hw); 3053 if (ret) 3054 break; 3055 } 3056 mutex_unlock(&adev->physical_node_lock); 3057 if (ret) 3058 return ret; 3059 } 3060 3061 return 0; 3062} 3063 3064static void intel_iommu_init_qi(struct intel_iommu *iommu) 3065{ 3066 /* 3067 * Start from the sane iommu hardware state. 3068 * If the queued invalidation is already initialized by us 3069 * (for example, while enabling interrupt-remapping) then 3070 * we got the things already rolling from a sane state. 3071 */ 3072 if (!iommu->qi) { 3073 /* 3074 * Clear any previous faults. 3075 */ 3076 dmar_fault(-1, iommu); 3077 /* 3078 * Disable queued invalidation if supported and already enabled 3079 * before OS handover. 3080 */ 3081 dmar_disable_qi(iommu); 3082 } 3083 3084 if (dmar_enable_qi(iommu)) { 3085 /* 3086 * Queued Invalidate not enabled, use Register Based Invalidate 3087 */ 3088 iommu->flush.flush_context = __iommu_flush_context; 3089 iommu->flush.flush_iotlb = __iommu_flush_iotlb; 3090 pr_info("%s: Using Register based invalidation\n", 3091 iommu->name); 3092 } else { 3093 iommu->flush.flush_context = qi_flush_context; 3094 iommu->flush.flush_iotlb = qi_flush_iotlb; 3095 pr_info("%s: Using Queued invalidation\n", iommu->name); 3096 } 3097} 3098 3099static int copy_context_table(struct intel_iommu *iommu, 3100 struct root_entry *old_re, 3101 struct context_entry **tbl, 3102 int bus, bool ext) 3103{ 3104 int tbl_idx, pos = 0, idx, devfn, ret = 0, did; 3105 struct context_entry *new_ce = NULL, ce; 3106 struct context_entry *old_ce = NULL; 3107 struct root_entry re; 3108 phys_addr_t old_ce_phys; 3109 3110 tbl_idx = ext ? bus * 2 : bus; 3111 memcpy(&re, old_re, sizeof(re)); 3112 3113 for (devfn = 0; devfn < 256; devfn++) { 3114 /* First calculate the correct index */ 3115 idx = (ext ? devfn * 2 : devfn) % 256; 3116 3117 if (idx == 0) { 3118 /* First save what we may have and clean up */ 3119 if (new_ce) { 3120 tbl[tbl_idx] = new_ce; 3121 __iommu_flush_cache(iommu, new_ce, 3122 VTD_PAGE_SIZE); 3123 pos = 1; 3124 } 3125 3126 if (old_ce) 3127 memunmap(old_ce); 3128 3129 ret = 0; 3130 if (devfn < 0x80) 3131 old_ce_phys = root_entry_lctp(&re); 3132 else 3133 old_ce_phys = root_entry_uctp(&re); 3134 3135 if (!old_ce_phys) { 3136 if (ext && devfn == 0) { 3137 /* No LCTP, try UCTP */ 3138 devfn = 0x7f; 3139 continue; 3140 } else { 3141 goto out; 3142 } 3143 } 3144 3145 ret = -ENOMEM; 3146 old_ce = memremap(old_ce_phys, PAGE_SIZE, 3147 MEMREMAP_WB); 3148 if (!old_ce) 3149 goto out; 3150 3151 new_ce = alloc_pgtable_page(iommu->node); 3152 if (!new_ce) 3153 goto out_unmap; 3154 3155 ret = 0; 3156 } 3157 3158 /* Now copy the context entry */ 3159 memcpy(&ce, old_ce + idx, sizeof(ce)); 3160 3161 if (!__context_present(&ce)) 3162 continue; 3163 3164 did = context_domain_id(&ce); 3165 if (did >= 0 && did < cap_ndoms(iommu->cap)) 3166 set_bit(did, iommu->domain_ids); 3167 3168 /* 3169 * We need a marker for copied context entries. This 3170 * marker needs to work for the old format as well as 3171 * for extended context entries. 3172 * 3173 * Bit 67 of the context entry is used. In the old 3174 * format this bit is available to software, in the 3175 * extended format it is the PGE bit, but PGE is ignored 3176 * by HW if PASIDs are disabled (and thus still 3177 * available). 3178 * 3179 * So disable PASIDs first and then mark the entry 3180 * copied. This means that we don't copy PASID 3181 * translations from the old kernel, but this is fine as 3182 * faults there are not fatal. 3183 */ 3184 context_clear_pasid_enable(&ce); 3185 context_set_copied(&ce); 3186 3187 new_ce[idx] = ce; 3188 } 3189 3190 tbl[tbl_idx + pos] = new_ce; 3191 3192 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE); 3193 3194out_unmap: 3195 memunmap(old_ce); 3196 3197out: 3198 return ret; 3199} 3200 3201static int copy_translation_tables(struct intel_iommu *iommu) 3202{ 3203 struct context_entry **ctxt_tbls; 3204 struct root_entry *old_rt; 3205 phys_addr_t old_rt_phys; 3206 int ctxt_table_entries; 3207 unsigned long flags; 3208 u64 rtaddr_reg; 3209 int bus, ret; 3210 bool new_ext, ext; 3211 3212 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG); 3213 ext = !!(rtaddr_reg & DMA_RTADDR_RTT); 3214 new_ext = !!ecap_ecs(iommu->ecap); 3215 3216 /* 3217 * The RTT bit can only be changed when translation is disabled, 3218 * but disabling translation means to open a window for data 3219 * corruption. So bail out and don't copy anything if we would 3220 * have to change the bit. 3221 */ 3222 if (new_ext != ext) 3223 return -EINVAL; 3224 3225 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK; 3226 if (!old_rt_phys) 3227 return -EINVAL; 3228 3229 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB); 3230 if (!old_rt) 3231 return -ENOMEM; 3232 3233 /* This is too big for the stack - allocate it from slab */ 3234 ctxt_table_entries = ext ? 512 : 256; 3235 ret = -ENOMEM; 3236 ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL); 3237 if (!ctxt_tbls) 3238 goto out_unmap; 3239 3240 for (bus = 0; bus < 256; bus++) { 3241 ret = copy_context_table(iommu, &old_rt[bus], 3242 ctxt_tbls, bus, ext); 3243 if (ret) { 3244 pr_err("%s: Failed to copy context table for bus %d\n", 3245 iommu->name, bus); 3246 continue; 3247 } 3248 } 3249 3250 spin_lock_irqsave(&iommu->lock, flags); 3251 3252 /* Context tables are copied, now write them to the root_entry table */ 3253 for (bus = 0; bus < 256; bus++) { 3254 int idx = ext ? bus * 2 : bus; 3255 u64 val; 3256 3257 if (ctxt_tbls[idx]) { 3258 val = virt_to_phys(ctxt_tbls[idx]) | 1; 3259 iommu->root_entry[bus].lo = val; 3260 } 3261 3262 if (!ext || !ctxt_tbls[idx + 1]) 3263 continue; 3264 3265 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1; 3266 iommu->root_entry[bus].hi = val; 3267 } 3268 3269 spin_unlock_irqrestore(&iommu->lock, flags); 3270 3271 kfree(ctxt_tbls); 3272 3273 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE); 3274 3275 ret = 0; 3276 3277out_unmap: 3278 memunmap(old_rt); 3279 3280 return ret; 3281} 3282 3283static int __init init_dmars(void) 3284{ 3285 struct dmar_drhd_unit *drhd; 3286 struct dmar_rmrr_unit *rmrr; 3287 bool copied_tables = false; 3288 struct device *dev; 3289 struct intel_iommu *iommu; 3290 int i, ret; 3291 3292 /* 3293 * for each drhd 3294 * allocate root 3295 * initialize and program root entry to not present 3296 * endfor 3297 */ 3298 for_each_drhd_unit(drhd) { 3299 /* 3300 * lock not needed as this is only incremented in the single 3301 * threaded kernel __init code path all other access are read 3302 * only 3303 */ 3304 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) { 3305 g_num_of_iommus++; 3306 continue; 3307 } 3308 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED); 3309 } 3310 3311 /* Preallocate enough resources for IOMMU hot-addition */ 3312 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) 3313 g_num_of_iommus = DMAR_UNITS_SUPPORTED; 3314 3315 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *), 3316 GFP_KERNEL); 3317 if (!g_iommus) { 3318 pr_err("Allocating global iommu array failed\n"); 3319 ret = -ENOMEM; 3320 goto error; 3321 } 3322 3323 for_each_active_iommu(iommu, drhd) { 3324 /* 3325 * Find the max pasid size of all IOMMU's in the system. 3326 * We need to ensure the system pasid table is no bigger 3327 * than the smallest supported. 3328 */ 3329 if (pasid_supported(iommu)) { 3330 u32 temp = 2 << ecap_pss(iommu->ecap); 3331 3332 intel_pasid_max_id = min_t(u32, temp, 3333 intel_pasid_max_id); 3334 } 3335 3336 g_iommus[iommu->seq_id] = iommu; 3337 3338 intel_iommu_init_qi(iommu); 3339 3340 ret = iommu_init_domains(iommu); 3341 if (ret) 3342 goto free_iommu; 3343 3344 init_translation_status(iommu); 3345 3346 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) { 3347 iommu_disable_translation(iommu); 3348 clear_translation_pre_enabled(iommu); 3349 pr_warn("Translation was enabled for %s but we are not in kdump mode\n", 3350 iommu->name); 3351 } 3352 3353 /* 3354 * TBD: 3355 * we could share the same root & context tables 3356 * among all IOMMU's. Need to Split it later. 3357 */ 3358 ret = iommu_alloc_root_entry(iommu); 3359 if (ret) 3360 goto free_iommu; 3361 3362 if (translation_pre_enabled(iommu)) { 3363 pr_info("Translation already enabled - trying to copy translation structures\n"); 3364 3365 ret = copy_translation_tables(iommu); 3366 if (ret) { 3367 /* 3368 * We found the IOMMU with translation 3369 * enabled - but failed to copy over the 3370 * old root-entry table. Try to proceed 3371 * by disabling translation now and 3372 * allocating a clean root-entry table. 3373 * This might cause DMAR faults, but 3374 * probably the dump will still succeed. 3375 */ 3376 pr_err("Failed to copy translation tables from previous kernel for %s\n", 3377 iommu->name); 3378 iommu_disable_translation(iommu); 3379 clear_translation_pre_enabled(iommu); 3380 } else { 3381 pr_info("Copied translation tables from previous kernel for %s\n", 3382 iommu->name); 3383 copied_tables = true; 3384 } 3385 } 3386 3387 if (!ecap_pass_through(iommu->ecap)) 3388 hw_pass_through = 0; 3389#ifdef CONFIG_INTEL_IOMMU_SVM 3390 if (pasid_supported(iommu)) 3391 intel_svm_init(iommu); 3392#endif 3393 } 3394 3395 /* 3396 * Now that qi is enabled on all iommus, set the root entry and flush 3397 * caches. This is required on some Intel X58 chipsets, otherwise the 3398 * flush_context function will loop forever and the boot hangs. 3399 */ 3400 for_each_active_iommu(iommu, drhd) { 3401 iommu_flush_write_buffer(iommu); 3402 iommu_set_root_entry(iommu); 3403 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); 3404 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); 3405 } 3406 3407 if (iommu_pass_through) 3408 iommu_identity_mapping |= IDENTMAP_ALL; 3409 3410#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA 3411 dmar_map_gfx = 0; 3412#endif 3413 3414 if (!dmar_map_gfx) 3415 iommu_identity_mapping |= IDENTMAP_GFX; 3416 3417 check_tylersburg_isoch(); 3418 3419 if (iommu_identity_mapping) { 3420 ret = si_domain_init(hw_pass_through); 3421 if (ret) 3422 goto free_iommu; 3423 } 3424 3425 3426 /* 3427 * If we copied translations from a previous kernel in the kdump 3428 * case, we can not assign the devices to domains now, as that 3429 * would eliminate the old mappings. So skip this part and defer 3430 * the assignment to device driver initialization time. 3431 */ 3432 if (copied_tables) 3433 goto domains_done; 3434 3435 /* 3436 * If pass through is not set or not enabled, setup context entries for 3437 * identity mappings for rmrr, gfx, and isa and may fall back to static 3438 * identity mapping if iommu_identity_mapping is set. 3439 */ 3440 if (iommu_identity_mapping) { 3441 ret = iommu_prepare_static_identity_mapping(hw_pass_through); 3442 if (ret) { 3443 pr_crit("Failed to setup IOMMU pass-through\n"); 3444 goto free_iommu; 3445 } 3446 } 3447 /* 3448 * For each rmrr 3449 * for each dev attached to rmrr 3450 * do 3451 * locate drhd for dev, alloc domain for dev 3452 * allocate free domain 3453 * allocate page table entries for rmrr 3454 * if context not allocated for bus 3455 * allocate and init context 3456 * set present in root table for this bus 3457 * init context with domain, translation etc 3458 * endfor 3459 * endfor 3460 */ 3461 pr_info("Setting RMRR:\n"); 3462 for_each_rmrr_units(rmrr) { 3463 /* some BIOS lists non-exist devices in DMAR table. */ 3464 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, 3465 i, dev) { 3466 ret = iommu_prepare_rmrr_dev(rmrr, dev); 3467 if (ret) 3468 pr_err("Mapping reserved region failed\n"); 3469 } 3470 } 3471 3472 iommu_prepare_isa(); 3473 3474domains_done: 3475 3476 /* 3477 * for each drhd 3478 * enable fault log 3479 * global invalidate context cache 3480 * global invalidate iotlb 3481 * enable translation 3482 */ 3483 for_each_iommu(iommu, drhd) { 3484 if (drhd->ignored) { 3485 /* 3486 * we always have to disable PMRs or DMA may fail on 3487 * this device 3488 */ 3489 if (force_on) 3490 iommu_disable_protect_mem_regions(iommu); 3491 continue; 3492 } 3493 3494 iommu_flush_write_buffer(iommu); 3495 3496#ifdef CONFIG_INTEL_IOMMU_SVM 3497 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) { 3498 /* 3499 * Call dmar_alloc_hwirq() with dmar_global_lock held, 3500 * could cause possible lock race condition. 3501 */ 3502 up_write(&dmar_global_lock); 3503 ret = intel_svm_enable_prq(iommu); 3504 down_write(&dmar_global_lock); 3505 if (ret) 3506 goto free_iommu; 3507 } 3508#endif 3509 ret = dmar_set_interrupt(iommu); 3510 if (ret) 3511 goto free_iommu; 3512 3513 if (!translation_pre_enabled(iommu)) 3514 iommu_enable_translation(iommu); 3515 3516 iommu_disable_protect_mem_regions(iommu); 3517 } 3518 3519 return 0; 3520 3521free_iommu: 3522 for_each_active_iommu(iommu, drhd) { 3523 disable_dmar_iommu(iommu); 3524 free_dmar_iommu(iommu); 3525 } 3526 3527 kfree(g_iommus); 3528 3529error: 3530 return ret; 3531} 3532 3533/* This takes a number of _MM_ pages, not VTD pages */ 3534static unsigned long intel_alloc_iova(struct device *dev, 3535 struct dmar_domain *domain, 3536 unsigned long nrpages, uint64_t dma_mask) 3537{ 3538 unsigned long iova_pfn; 3539 3540 /* Restrict dma_mask to the width that the iommu can handle */ 3541 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask); 3542 /* Ensure we reserve the whole size-aligned region */ 3543 nrpages = __roundup_pow_of_two(nrpages); 3544 3545 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) { 3546 /* 3547 * First try to allocate an io virtual address in 3548 * DMA_BIT_MASK(32) and if that fails then try allocating 3549 * from higher range 3550 */ 3551 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages, 3552 IOVA_PFN(DMA_BIT_MASK(32)), false); 3553 if (iova_pfn) 3554 return iova_pfn; 3555 } 3556 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages, 3557 IOVA_PFN(dma_mask), true); 3558 if (unlikely(!iova_pfn)) { 3559 dev_err(dev, "Allocating %ld-page iova failed", nrpages); 3560 return 0; 3561 } 3562 3563 return iova_pfn; 3564} 3565 3566struct dmar_domain *get_valid_domain_for_dev(struct device *dev) 3567{ 3568 struct dmar_domain *domain, *tmp; 3569 struct dmar_rmrr_unit *rmrr; 3570 struct device *i_dev; 3571 int i, ret; 3572 3573 domain = find_domain(dev); 3574 if (domain) 3575 goto out; 3576 3577 domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); 3578 if (!domain) 3579 goto out; 3580 3581 /* We have a new domain - setup possible RMRRs for the device */ 3582 rcu_read_lock(); 3583 for_each_rmrr_units(rmrr) { 3584 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, 3585 i, i_dev) { 3586 if (i_dev != dev) 3587 continue; 3588 3589 ret = domain_prepare_identity_map(dev, domain, 3590 rmrr->base_address, 3591 rmrr->end_address); 3592 if (ret) 3593 dev_err(dev, "Mapping reserved region failed\n"); 3594 } 3595 } 3596 rcu_read_unlock(); 3597 3598 tmp = set_domain_for_dev(dev, domain); 3599 if (!tmp || domain != tmp) { 3600 domain_exit(domain); 3601 domain = tmp; 3602 } 3603 3604out: 3605 3606 if (!domain) 3607 dev_err(dev, "Allocating domain failed\n"); 3608 3609 3610 return domain; 3611} 3612 3613/* Check if the dev needs to go through non-identity map and unmap process.*/ 3614static bool iommu_need_mapping(struct device *dev) 3615{ 3616 int found; 3617 3618 if (iommu_dummy(dev)) 3619 return false; 3620 3621 if (!iommu_identity_mapping) 3622 return true; 3623 3624 found = identity_mapping(dev); 3625 if (found) { 3626 if (iommu_should_identity_map(dev, 0)) 3627 return false; 3628 3629 /* 3630 * 32 bit DMA is removed from si_domain and fall back to 3631 * non-identity mapping. 3632 */ 3633 dmar_remove_one_dev_info(dev); 3634 dev_info(dev, "32bit DMA uses non-identity mapping\n"); 3635 } else { 3636 /* 3637 * In case of a detached 64 bit DMA device from vm, the device 3638 * is put into si_domain for identity mapping. 3639 */ 3640 if (iommu_should_identity_map(dev, 0) && 3641 !domain_add_dev_info(si_domain, dev)) { 3642 dev_info(dev, "64bit DMA uses identity mapping\n"); 3643 return false; 3644 } 3645 } 3646 3647 return true; 3648} 3649 3650static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr, 3651 size_t size, int dir, u64 dma_mask) 3652{ 3653 struct dmar_domain *domain; 3654 phys_addr_t start_paddr; 3655 unsigned long iova_pfn; 3656 int prot = 0; 3657 int ret; 3658 struct intel_iommu *iommu; 3659 unsigned long paddr_pfn = paddr >> PAGE_SHIFT; 3660 3661 BUG_ON(dir == DMA_NONE); 3662 3663 domain = get_valid_domain_for_dev(dev); 3664 if (!domain) 3665 return DMA_MAPPING_ERROR; 3666 3667 iommu = domain_get_iommu(domain); 3668 size = aligned_nrpages(paddr, size); 3669 3670 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask); 3671 if (!iova_pfn) 3672 goto error; 3673 3674 /* 3675 * Check if DMAR supports zero-length reads on write only 3676 * mappings.. 3677 */ 3678 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \ 3679 !cap_zlr(iommu->cap)) 3680 prot |= DMA_PTE_READ; 3681 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) 3682 prot |= DMA_PTE_WRITE; 3683 /* 3684 * paddr - (paddr + size) might be partial page, we should map the whole 3685 * page. Note: if two part of one page are separately mapped, we 3686 * might have two guest_addr mapping to the same host paddr, but this 3687 * is not a big problem 3688 */ 3689 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn), 3690 mm_to_dma_pfn(paddr_pfn), size, prot); 3691 if (ret) 3692 goto error; 3693 3694 start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT; 3695 start_paddr += paddr & ~PAGE_MASK; 3696 return start_paddr; 3697 3698error: 3699 if (iova_pfn) 3700 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size)); 3701 dev_err(dev, "Device request: %zx@%llx dir %d --- failed\n", 3702 size, (unsigned long long)paddr, dir); 3703 return DMA_MAPPING_ERROR; 3704} 3705 3706static dma_addr_t intel_map_page(struct device *dev, struct page *page, 3707 unsigned long offset, size_t size, 3708 enum dma_data_direction dir, 3709 unsigned long attrs) 3710{ 3711 if (iommu_need_mapping(dev)) 3712 return __intel_map_single(dev, page_to_phys(page) + offset, 3713 size, dir, *dev->dma_mask); 3714 return dma_direct_map_page(dev, page, offset, size, dir, attrs); 3715} 3716 3717static dma_addr_t intel_map_resource(struct device *dev, phys_addr_t phys_addr, 3718 size_t size, enum dma_data_direction dir, 3719 unsigned long attrs) 3720{ 3721 if (iommu_need_mapping(dev)) 3722 return __intel_map_single(dev, phys_addr, size, dir, 3723 *dev->dma_mask); 3724 return dma_direct_map_resource(dev, phys_addr, size, dir, attrs); 3725} 3726 3727static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) 3728{ 3729 struct dmar_domain *domain; 3730 unsigned long start_pfn, last_pfn; 3731 unsigned long nrpages; 3732 unsigned long iova_pfn; 3733 struct intel_iommu *iommu; 3734 struct page *freelist; 3735 struct pci_dev *pdev = NULL; 3736 3737 domain = find_domain(dev); 3738 BUG_ON(!domain); 3739 3740 iommu = domain_get_iommu(domain); 3741 3742 iova_pfn = IOVA_PFN(dev_addr); 3743 3744 nrpages = aligned_nrpages(dev_addr, size); 3745 start_pfn = mm_to_dma_pfn(iova_pfn); 3746 last_pfn = start_pfn + nrpages - 1; 3747 3748 if (dev_is_pci(dev)) 3749 pdev = to_pci_dev(dev); 3750 3751 dev_dbg(dev, "Device unmapping: pfn %lx-%lx\n", start_pfn, last_pfn); 3752 3753 freelist = domain_unmap(domain, start_pfn, last_pfn); 3754 3755 if (intel_iommu_strict || (pdev && pdev->untrusted)) { 3756 iommu_flush_iotlb_psi(iommu, domain, start_pfn, 3757 nrpages, !freelist, 0); 3758 /* free iova */ 3759 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages)); 3760 dma_free_pagelist(freelist); 3761 } else { 3762 queue_iova(&domain->iovad, iova_pfn, nrpages, 3763 (unsigned long)freelist); 3764 /* 3765 * queue up the release of the unmap to save the 1/6th of the 3766 * cpu used up by the iotlb flush operation... 3767 */ 3768 } 3769} 3770 3771static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, 3772 size_t size, enum dma_data_direction dir, 3773 unsigned long attrs) 3774{ 3775 if (iommu_need_mapping(dev)) 3776 intel_unmap(dev, dev_addr, size); 3777 else 3778 dma_direct_unmap_page(dev, dev_addr, size, dir, attrs); 3779} 3780 3781static void intel_unmap_resource(struct device *dev, dma_addr_t dev_addr, 3782 size_t size, enum dma_data_direction dir, unsigned long attrs) 3783{ 3784 if (iommu_need_mapping(dev)) 3785 intel_unmap(dev, dev_addr, size); 3786} 3787 3788static void *intel_alloc_coherent(struct device *dev, size_t size, 3789 dma_addr_t *dma_handle, gfp_t flags, 3790 unsigned long attrs) 3791{ 3792 struct page *page = NULL; 3793 int order; 3794 3795 if (!iommu_need_mapping(dev)) 3796 return dma_direct_alloc(dev, size, dma_handle, flags, attrs); 3797 3798 size = PAGE_ALIGN(size); 3799 order = get_order(size); 3800 3801 if (gfpflags_allow_blocking(flags)) { 3802 unsigned int count = size >> PAGE_SHIFT; 3803 3804 page = dma_alloc_from_contiguous(dev, count, order, 3805 flags & __GFP_NOWARN); 3806 } 3807 3808 if (!page) 3809 page = alloc_pages(flags, order); 3810 if (!page) 3811 return NULL; 3812 memset(page_address(page), 0, size); 3813 3814 *dma_handle = __intel_map_single(dev, page_to_phys(page), size, 3815 DMA_BIDIRECTIONAL, 3816 dev->coherent_dma_mask); 3817 if (*dma_handle != DMA_MAPPING_ERROR) 3818 return page_address(page); 3819 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) 3820 __free_pages(page, order); 3821 3822 return NULL; 3823} 3824 3825static void intel_free_coherent(struct device *dev, size_t size, void *vaddr, 3826 dma_addr_t dma_handle, unsigned long attrs) 3827{ 3828 int order; 3829 struct page *page = virt_to_page(vaddr); 3830 3831 if (!iommu_need_mapping(dev)) 3832 return dma_direct_free(dev, size, vaddr, dma_handle, attrs); 3833 3834 size = PAGE_ALIGN(size); 3835 order = get_order(size); 3836 3837 intel_unmap(dev, dma_handle, size); 3838 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) 3839 __free_pages(page, order); 3840} 3841 3842static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist, 3843 int nelems, enum dma_data_direction dir, 3844 unsigned long attrs) 3845{ 3846 dma_addr_t startaddr = sg_dma_address(sglist) & PAGE_MASK; 3847 unsigned long nrpages = 0; 3848 struct scatterlist *sg; 3849 int i; 3850 3851 if (!iommu_need_mapping(dev)) 3852 return dma_direct_unmap_sg(dev, sglist, nelems, dir, attrs); 3853 3854 for_each_sg(sglist, sg, nelems, i) { 3855 nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg)); 3856 } 3857 3858 intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT); 3859} 3860 3861static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems, 3862 enum dma_data_direction dir, unsigned long attrs) 3863{ 3864 int i; 3865 struct dmar_domain *domain; 3866 size_t size = 0; 3867 int prot = 0; 3868 unsigned long iova_pfn; 3869 int ret; 3870 struct scatterlist *sg; 3871 unsigned long start_vpfn; 3872 struct intel_iommu *iommu; 3873 3874 BUG_ON(dir == DMA_NONE); 3875 if (!iommu_need_mapping(dev)) 3876 return dma_direct_map_sg(dev, sglist, nelems, dir, attrs); 3877 3878 domain = get_valid_domain_for_dev(dev); 3879 if (!domain) 3880 return 0; 3881 3882 iommu = domain_get_iommu(domain); 3883 3884 for_each_sg(sglist, sg, nelems, i) 3885 size += aligned_nrpages(sg->offset, sg->length); 3886 3887 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), 3888 *dev->dma_mask); 3889 if (!iova_pfn) { 3890 sglist->dma_length = 0; 3891 return 0; 3892 } 3893 3894 /* 3895 * Check if DMAR supports zero-length reads on write only 3896 * mappings.. 3897 */ 3898 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \ 3899 !cap_zlr(iommu->cap)) 3900 prot |= DMA_PTE_READ; 3901 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) 3902 prot |= DMA_PTE_WRITE; 3903 3904 start_vpfn = mm_to_dma_pfn(iova_pfn); 3905 3906 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot); 3907 if (unlikely(ret)) { 3908 dma_pte_free_pagetable(domain, start_vpfn, 3909 start_vpfn + size - 1, 3910 agaw_to_level(domain->agaw) + 1); 3911 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size)); 3912 return 0; 3913 } 3914 3915 return nelems; 3916} 3917 3918static const struct dma_map_ops intel_dma_ops = { 3919 .alloc = intel_alloc_coherent, 3920 .free = intel_free_coherent, 3921 .map_sg = intel_map_sg, 3922 .unmap_sg = intel_unmap_sg, 3923 .map_page = intel_map_page, 3924 .unmap_page = intel_unmap_page, 3925 .map_resource = intel_map_resource, 3926 .unmap_resource = intel_unmap_resource, 3927 .dma_supported = dma_direct_supported, 3928}; 3929 3930static inline int iommu_domain_cache_init(void) 3931{ 3932 int ret = 0; 3933 3934 iommu_domain_cache = kmem_cache_create("iommu_domain", 3935 sizeof(struct dmar_domain), 3936 0, 3937 SLAB_HWCACHE_ALIGN, 3938 3939 NULL); 3940 if (!iommu_domain_cache) { 3941 pr_err("Couldn't create iommu_domain cache\n"); 3942 ret = -ENOMEM; 3943 } 3944 3945 return ret; 3946} 3947 3948static inline int iommu_devinfo_cache_init(void) 3949{ 3950 int ret = 0; 3951 3952 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo", 3953 sizeof(struct device_domain_info), 3954 0, 3955 SLAB_HWCACHE_ALIGN, 3956 NULL); 3957 if (!iommu_devinfo_cache) { 3958 pr_err("Couldn't create devinfo cache\n"); 3959 ret = -ENOMEM; 3960 } 3961 3962 return ret; 3963} 3964 3965static int __init iommu_init_mempool(void) 3966{ 3967 int ret; 3968 ret = iova_cache_get(); 3969 if (ret) 3970 return ret; 3971 3972 ret = iommu_domain_cache_init(); 3973 if (ret) 3974 goto domain_error; 3975 3976 ret = iommu_devinfo_cache_init(); 3977 if (!ret) 3978 return ret; 3979 3980 kmem_cache_destroy(iommu_domain_cache); 3981domain_error: 3982 iova_cache_put(); 3983 3984 return -ENOMEM; 3985} 3986 3987static void __init iommu_exit_mempool(void) 3988{ 3989 kmem_cache_destroy(iommu_devinfo_cache); 3990 kmem_cache_destroy(iommu_domain_cache); 3991 iova_cache_put(); 3992} 3993 3994static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev) 3995{ 3996 struct dmar_drhd_unit *drhd; 3997 u32 vtbar; 3998 int rc; 3999 4000 /* We know that this device on this chipset has its own IOMMU. 4001 * If we find it under a different IOMMU, then the BIOS is lying 4002 * to us. Hope that the IOMMU for this device is actually 4003 * disabled, and it needs no translation... 4004 */ 4005 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar); 4006 if (rc) { 4007 /* "can't" happen */ 4008 dev_info(&pdev->dev, "failed to run vt-d quirk\n"); 4009 return; 4010 } 4011 vtbar &= 0xffff0000; 4012 4013 /* we know that the this iommu should be at offset 0xa000 from vtbar */ 4014 drhd = dmar_find_matched_drhd_unit(pdev); 4015 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000, 4016 TAINT_FIRMWARE_WORKAROUND, 4017 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n")) 4018 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO; 4019} 4020DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu); 4021 4022static void __init init_no_remapping_devices(void) 4023{ 4024 struct dmar_drhd_unit *drhd; 4025 struct device *dev; 4026 int i; 4027 4028 for_each_drhd_unit(drhd) { 4029 if (!drhd->include_all) { 4030 for_each_active_dev_scope(drhd->devices, 4031 drhd->devices_cnt, i, dev) 4032 break; 4033 /* ignore DMAR unit if no devices exist */ 4034 if (i == drhd->devices_cnt) 4035 drhd->ignored = 1; 4036 } 4037 } 4038 4039 for_each_active_drhd_unit(drhd) { 4040 if (drhd->include_all) 4041 continue; 4042 4043 for_each_active_dev_scope(drhd->devices, 4044 drhd->devices_cnt, i, dev) 4045 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev))) 4046 break; 4047 if (i < drhd->devices_cnt) 4048 continue; 4049 4050 /* This IOMMU has *only* gfx devices. Either bypass it or 4051 set the gfx_mapped flag, as appropriate */ 4052 if (!dmar_map_gfx) { 4053 drhd->ignored = 1; 4054 for_each_active_dev_scope(drhd->devices, 4055 drhd->devices_cnt, i, dev) 4056 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO; 4057 } 4058 } 4059} 4060 4061#ifdef CONFIG_SUSPEND 4062static int init_iommu_hw(void) 4063{ 4064 struct dmar_drhd_unit *drhd; 4065 struct intel_iommu *iommu = NULL; 4066 4067 for_each_active_iommu(iommu, drhd) 4068 if (iommu->qi) 4069 dmar_reenable_qi(iommu); 4070 4071 for_each_iommu(iommu, drhd) { 4072 if (drhd->ignored) { 4073 /* 4074 * we always have to disable PMRs or DMA may fail on 4075 * this device 4076 */ 4077 if (force_on) 4078 iommu_disable_protect_mem_regions(iommu); 4079 continue; 4080 } 4081 4082 iommu_flush_write_buffer(iommu); 4083 4084 iommu_set_root_entry(iommu); 4085 4086 iommu->flush.flush_context(iommu, 0, 0, 0, 4087 DMA_CCMD_GLOBAL_INVL); 4088 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); 4089 iommu_enable_translation(iommu); 4090 iommu_disable_protect_mem_regions(iommu); 4091 } 4092 4093 return 0; 4094} 4095 4096static void iommu_flush_all(void) 4097{ 4098 struct dmar_drhd_unit *drhd; 4099 struct intel_iommu *iommu; 4100 4101 for_each_active_iommu(iommu, drhd) { 4102 iommu->flush.flush_context(iommu, 0, 0, 0, 4103 DMA_CCMD_GLOBAL_INVL); 4104 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 4105 DMA_TLB_GLOBAL_FLUSH); 4106 } 4107} 4108 4109static int iommu_suspend(void) 4110{ 4111 struct dmar_drhd_unit *drhd; 4112 struct intel_iommu *iommu = NULL; 4113 unsigned long flag; 4114 4115 for_each_active_iommu(iommu, drhd) { 4116 iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32), 4117 GFP_ATOMIC); 4118 if (!iommu->iommu_state) 4119 goto nomem; 4120 } 4121 4122 iommu_flush_all(); 4123 4124 for_each_active_iommu(iommu, drhd) { 4125 iommu_disable_translation(iommu); 4126 4127 raw_spin_lock_irqsave(&iommu->register_lock, flag); 4128 4129 iommu->iommu_state[SR_DMAR_FECTL_REG] = 4130 readl(iommu->reg + DMAR_FECTL_REG); 4131 iommu->iommu_state[SR_DMAR_FEDATA_REG] = 4132 readl(iommu->reg + DMAR_FEDATA_REG); 4133 iommu->iommu_state[SR_DMAR_FEADDR_REG] = 4134 readl(iommu->reg + DMAR_FEADDR_REG); 4135 iommu->iommu_state[SR_DMAR_FEUADDR_REG] = 4136 readl(iommu->reg + DMAR_FEUADDR_REG); 4137 4138 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 4139 } 4140 return 0; 4141 4142nomem: 4143 for_each_active_iommu(iommu, drhd) 4144 kfree(iommu->iommu_state); 4145 4146 return -ENOMEM; 4147} 4148 4149static void iommu_resume(void) 4150{ 4151 struct dmar_drhd_unit *drhd; 4152 struct intel_iommu *iommu = NULL; 4153 unsigned long flag; 4154 4155 if (init_iommu_hw()) { 4156 if (force_on) 4157 panic("tboot: IOMMU setup failed, DMAR can not resume!\n"); 4158 else 4159 WARN(1, "IOMMU setup failed, DMAR can not resume!\n"); 4160 return; 4161 } 4162 4163 for_each_active_iommu(iommu, drhd) { 4164 4165 raw_spin_lock_irqsave(&iommu->register_lock, flag); 4166 4167 writel(iommu->iommu_state[SR_DMAR_FECTL_REG], 4168 iommu->reg + DMAR_FECTL_REG); 4169 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG], 4170 iommu->reg + DMAR_FEDATA_REG); 4171 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG], 4172 iommu->reg + DMAR_FEADDR_REG); 4173 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG], 4174 iommu->reg + DMAR_FEUADDR_REG); 4175 4176 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 4177 } 4178 4179 for_each_active_iommu(iommu, drhd) 4180 kfree(iommu->iommu_state); 4181} 4182 4183static struct syscore_ops iommu_syscore_ops = { 4184 .resume = iommu_resume, 4185 .suspend = iommu_suspend, 4186}; 4187 4188static void __init init_iommu_pm_ops(void) 4189{ 4190 register_syscore_ops(&iommu_syscore_ops); 4191} 4192 4193#else 4194static inline void init_iommu_pm_ops(void) {} 4195#endif /* CONFIG_PM */ 4196 4197 4198int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg) 4199{ 4200 struct acpi_dmar_reserved_memory *rmrr; 4201 int prot = DMA_PTE_READ|DMA_PTE_WRITE; 4202 struct dmar_rmrr_unit *rmrru; 4203 size_t length; 4204 4205 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL); 4206 if (!rmrru) 4207 goto out; 4208 4209 rmrru->hdr = header; 4210 rmrr = (struct acpi_dmar_reserved_memory *)header; 4211 rmrru->base_address = rmrr->base_address; 4212 rmrru->end_address = rmrr->end_address; 4213 4214 length = rmrr->end_address - rmrr->base_address + 1; 4215 rmrru->resv = iommu_alloc_resv_region(rmrr->base_address, length, prot, 4216 IOMMU_RESV_DIRECT); 4217 if (!rmrru->resv) 4218 goto free_rmrru; 4219 4220 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1), 4221 ((void *)rmrr) + rmrr->header.length, 4222 &rmrru->devices_cnt); 4223 if (rmrru->devices_cnt && rmrru->devices == NULL) 4224 goto free_all; 4225 4226 list_add(&rmrru->list, &dmar_rmrr_units); 4227 4228 return 0; 4229free_all: 4230 kfree(rmrru->resv); 4231free_rmrru: 4232 kfree(rmrru); 4233out: 4234 return -ENOMEM; 4235} 4236 4237static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr) 4238{ 4239 struct dmar_atsr_unit *atsru; 4240 struct acpi_dmar_atsr *tmp; 4241 4242 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) { 4243 tmp = (struct acpi_dmar_atsr *)atsru->hdr; 4244 if (atsr->segment != tmp->segment) 4245 continue; 4246 if (atsr->header.length != tmp->header.length) 4247 continue; 4248 if (memcmp(atsr, tmp, atsr->header.length) == 0) 4249 return atsru; 4250 } 4251 4252 return NULL; 4253} 4254 4255int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg) 4256{ 4257 struct acpi_dmar_atsr *atsr; 4258 struct dmar_atsr_unit *atsru; 4259 4260 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled) 4261 return 0; 4262 4263 atsr = container_of(hdr, struct acpi_dmar_atsr, header); 4264 atsru = dmar_find_atsr(atsr); 4265 if (atsru) 4266 return 0; 4267 4268 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL); 4269 if (!atsru) 4270 return -ENOMEM; 4271 4272 /* 4273 * If memory is allocated from slab by ACPI _DSM method, we need to 4274 * copy the memory content because the memory buffer will be freed 4275 * on return. 4276 */ 4277 atsru->hdr = (void *)(atsru + 1); 4278 memcpy(atsru->hdr, hdr, hdr->length); 4279 atsru->include_all = atsr->flags & 0x1; 4280 if (!atsru->include_all) { 4281 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1), 4282 (void *)atsr + atsr->header.length, 4283 &atsru->devices_cnt); 4284 if (atsru->devices_cnt && atsru->devices == NULL) { 4285 kfree(atsru); 4286 return -ENOMEM; 4287 } 4288 } 4289 4290 list_add_rcu(&atsru->list, &dmar_atsr_units); 4291 4292 return 0; 4293} 4294 4295static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru) 4296{ 4297 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt); 4298 kfree(atsru); 4299} 4300 4301int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg) 4302{ 4303 struct acpi_dmar_atsr *atsr; 4304 struct dmar_atsr_unit *atsru; 4305 4306 atsr = container_of(hdr, struct acpi_dmar_atsr, header); 4307 atsru = dmar_find_atsr(atsr); 4308 if (atsru) { 4309 list_del_rcu(&atsru->list); 4310 synchronize_rcu(); 4311 intel_iommu_free_atsr(atsru); 4312 } 4313 4314 return 0; 4315} 4316 4317int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg) 4318{ 4319 int i; 4320 struct device *dev; 4321 struct acpi_dmar_atsr *atsr; 4322 struct dmar_atsr_unit *atsru; 4323 4324 atsr = container_of(hdr, struct acpi_dmar_atsr, header); 4325 atsru = dmar_find_atsr(atsr); 4326 if (!atsru) 4327 return 0; 4328 4329 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) { 4330 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt, 4331 i, dev) 4332 return -EBUSY; 4333 } 4334 4335 return 0; 4336} 4337 4338static int intel_iommu_add(struct dmar_drhd_unit *dmaru) 4339{ 4340 int sp, ret; 4341 struct intel_iommu *iommu = dmaru->iommu; 4342 4343 if (g_iommus[iommu->seq_id]) 4344 return 0; 4345 4346 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) { 4347 pr_warn("%s: Doesn't support hardware pass through.\n", 4348 iommu->name); 4349 return -ENXIO; 4350 } 4351 if (!ecap_sc_support(iommu->ecap) && 4352 domain_update_iommu_snooping(iommu)) { 4353 pr_warn("%s: Doesn't support snooping.\n", 4354 iommu->name); 4355 return -ENXIO; 4356 } 4357 sp = domain_update_iommu_superpage(iommu) - 1; 4358 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) { 4359 pr_warn("%s: Doesn't support large page.\n", 4360 iommu->name); 4361 return -ENXIO; 4362 } 4363 4364 /* 4365 * Disable translation if already enabled prior to OS handover. 4366 */ 4367 if (iommu->gcmd & DMA_GCMD_TE) 4368 iommu_disable_translation(iommu); 4369 4370 g_iommus[iommu->seq_id] = iommu; 4371 ret = iommu_init_domains(iommu); 4372 if (ret == 0) 4373 ret = iommu_alloc_root_entry(iommu); 4374 if (ret) 4375 goto out; 4376 4377#ifdef CONFIG_INTEL_IOMMU_SVM 4378 if (pasid_supported(iommu)) 4379 intel_svm_init(iommu); 4380#endif 4381 4382 if (dmaru->ignored) { 4383 /* 4384 * we always have to disable PMRs or DMA may fail on this device 4385 */ 4386 if (force_on) 4387 iommu_disable_protect_mem_regions(iommu); 4388 return 0; 4389 } 4390 4391 intel_iommu_init_qi(iommu); 4392 iommu_flush_write_buffer(iommu); 4393 4394#ifdef CONFIG_INTEL_IOMMU_SVM 4395 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) { 4396 ret = intel_svm_enable_prq(iommu); 4397 if (ret) 4398 goto disable_iommu; 4399 } 4400#endif 4401 ret = dmar_set_interrupt(iommu); 4402 if (ret) 4403 goto disable_iommu; 4404 4405 iommu_set_root_entry(iommu); 4406 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); 4407 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); 4408 iommu_enable_translation(iommu); 4409 4410 iommu_disable_protect_mem_regions(iommu); 4411 return 0; 4412 4413disable_iommu: 4414 disable_dmar_iommu(iommu); 4415out: 4416 free_dmar_iommu(iommu); 4417 return ret; 4418} 4419 4420int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert) 4421{ 4422 int ret = 0; 4423 struct intel_iommu *iommu = dmaru->iommu; 4424 4425 if (!intel_iommu_enabled) 4426 return 0; 4427 if (iommu == NULL) 4428 return -EINVAL; 4429 4430 if (insert) { 4431 ret = intel_iommu_add(dmaru); 4432 } else { 4433 disable_dmar_iommu(iommu); 4434 free_dmar_iommu(iommu); 4435 } 4436 4437 return ret; 4438} 4439 4440static void intel_iommu_free_dmars(void) 4441{ 4442 struct dmar_rmrr_unit *rmrru, *rmrr_n; 4443 struct dmar_atsr_unit *atsru, *atsr_n; 4444 4445 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) { 4446 list_del(&rmrru->list); 4447 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt); 4448 kfree(rmrru->resv); 4449 kfree(rmrru); 4450 } 4451 4452 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) { 4453 list_del(&atsru->list); 4454 intel_iommu_free_atsr(atsru); 4455 } 4456} 4457 4458int dmar_find_matched_atsr_unit(struct pci_dev *dev) 4459{ 4460 int i, ret = 1; 4461 struct pci_bus *bus; 4462 struct pci_dev *bridge = NULL; 4463 struct device *tmp; 4464 struct acpi_dmar_atsr *atsr; 4465 struct dmar_atsr_unit *atsru; 4466 4467 dev = pci_physfn(dev); 4468 for (bus = dev->bus; bus; bus = bus->parent) { 4469 bridge = bus->self; 4470 /* If it's an integrated device, allow ATS */ 4471 if (!bridge) 4472 return 1; 4473 /* Connected via non-PCIe: no ATS */ 4474 if (!pci_is_pcie(bridge) || 4475 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) 4476 return 0; 4477 /* If we found the root port, look it up in the ATSR */ 4478 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) 4479 break; 4480 } 4481 4482 rcu_read_lock(); 4483 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) { 4484 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); 4485 if (atsr->segment != pci_domain_nr(dev->bus)) 4486 continue; 4487 4488 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp) 4489 if (tmp == &bridge->dev) 4490 goto out; 4491 4492 if (atsru->include_all) 4493 goto out; 4494 } 4495 ret = 0; 4496out: 4497 rcu_read_unlock(); 4498 4499 return ret; 4500} 4501 4502int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) 4503{ 4504 int ret; 4505 struct dmar_rmrr_unit *rmrru; 4506 struct dmar_atsr_unit *atsru; 4507 struct acpi_dmar_atsr *atsr; 4508 struct acpi_dmar_reserved_memory *rmrr; 4509 4510 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING) 4511 return 0; 4512 4513 list_for_each_entry(rmrru, &dmar_rmrr_units, list) { 4514 rmrr = container_of(rmrru->hdr, 4515 struct acpi_dmar_reserved_memory, header); 4516 if (info->event == BUS_NOTIFY_ADD_DEVICE) { 4517 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1), 4518 ((void *)rmrr) + rmrr->header.length, 4519 rmrr->segment, rmrru->devices, 4520 rmrru->devices_cnt); 4521 if (ret < 0) 4522 return ret; 4523 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) { 4524 dmar_remove_dev_scope(info, rmrr->segment, 4525 rmrru->devices, rmrru->devices_cnt); 4526 } 4527 } 4528 4529 list_for_each_entry(atsru, &dmar_atsr_units, list) { 4530 if (atsru->include_all) 4531 continue; 4532 4533 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); 4534 if (info->event == BUS_NOTIFY_ADD_DEVICE) { 4535 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1), 4536 (void *)atsr + atsr->header.length, 4537 atsr->segment, atsru->devices, 4538 atsru->devices_cnt); 4539 if (ret > 0) 4540 break; 4541 else if (ret < 0) 4542 return ret; 4543 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) { 4544 if (dmar_remove_dev_scope(info, atsr->segment, 4545 atsru->devices, atsru->devices_cnt)) 4546 break; 4547 } 4548 } 4549 4550 return 0; 4551} 4552 4553/* 4554 * Here we only respond to action of unbound device from driver. 4555 * 4556 * Added device is not attached to its DMAR domain here yet. That will happen 4557 * when mapping the device to iova. 4558 */ 4559static int device_notifier(struct notifier_block *nb, 4560 unsigned long action, void *data) 4561{ 4562 struct device *dev = data; 4563 struct dmar_domain *domain; 4564 4565 if (iommu_dummy(dev)) 4566 return 0; 4567 4568 if (action == BUS_NOTIFY_REMOVED_DEVICE) { 4569 domain = find_domain(dev); 4570 if (!domain) 4571 return 0; 4572 4573 dmar_remove_one_dev_info(dev); 4574 if (!domain_type_is_vm_or_si(domain) && 4575 list_empty(&domain->devices)) 4576 domain_exit(domain); 4577 } else if (action == BUS_NOTIFY_ADD_DEVICE) { 4578 if (iommu_should_identity_map(dev, 1)) 4579 domain_add_dev_info(si_domain, dev); 4580 } 4581 4582 return 0; 4583} 4584 4585static struct notifier_block device_nb = { 4586 .notifier_call = device_notifier, 4587}; 4588 4589static int intel_iommu_memory_notifier(struct notifier_block *nb, 4590 unsigned long val, void *v) 4591{ 4592 struct memory_notify *mhp = v; 4593 unsigned long long start, end; 4594 unsigned long start_vpfn, last_vpfn; 4595 4596 switch (val) { 4597 case MEM_GOING_ONLINE: 4598 start = mhp->start_pfn << PAGE_SHIFT; 4599 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1; 4600 if (iommu_domain_identity_map(si_domain, start, end)) { 4601 pr_warn("Failed to build identity map for [%llx-%llx]\n", 4602 start, end); 4603 return NOTIFY_BAD; 4604 } 4605 break; 4606 4607 case MEM_OFFLINE: 4608 case MEM_CANCEL_ONLINE: 4609 start_vpfn = mm_to_dma_pfn(mhp->start_pfn); 4610 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1); 4611 while (start_vpfn <= last_vpfn) { 4612 struct iova *iova; 4613 struct dmar_drhd_unit *drhd; 4614 struct intel_iommu *iommu; 4615 struct page *freelist; 4616 4617 iova = find_iova(&si_domain->iovad, start_vpfn); 4618 if (iova == NULL) { 4619 pr_debug("Failed get IOVA for PFN %lx\n", 4620 start_vpfn); 4621 break; 4622 } 4623 4624 iova = split_and_remove_iova(&si_domain->iovad, iova, 4625 start_vpfn, last_vpfn); 4626 if (iova == NULL) { 4627 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n", 4628 start_vpfn, last_vpfn); 4629 return NOTIFY_BAD; 4630 } 4631 4632 freelist = domain_unmap(si_domain, iova->pfn_lo, 4633 iova->pfn_hi); 4634 4635 rcu_read_lock(); 4636 for_each_active_iommu(iommu, drhd) 4637 iommu_flush_iotlb_psi(iommu, si_domain, 4638 iova->pfn_lo, iova_size(iova), 4639 !freelist, 0); 4640 rcu_read_unlock(); 4641 dma_free_pagelist(freelist); 4642 4643 start_vpfn = iova->pfn_hi + 1; 4644 free_iova_mem(iova); 4645 } 4646 break; 4647 } 4648 4649 return NOTIFY_OK; 4650} 4651 4652static struct notifier_block intel_iommu_memory_nb = { 4653 .notifier_call = intel_iommu_memory_notifier, 4654 .priority = 0 4655}; 4656 4657static void free_all_cpu_cached_iovas(unsigned int cpu) 4658{ 4659 int i; 4660 4661 for (i = 0; i < g_num_of_iommus; i++) { 4662 struct intel_iommu *iommu = g_iommus[i]; 4663 struct dmar_domain *domain; 4664 int did; 4665 4666 if (!iommu) 4667 continue; 4668 4669 for (did = 0; did < cap_ndoms(iommu->cap); did++) { 4670 domain = get_iommu_domain(iommu, (u16)did); 4671 4672 if (!domain) 4673 continue; 4674 free_cpu_cached_iovas(cpu, &domain->iovad); 4675 } 4676 } 4677} 4678 4679static int intel_iommu_cpu_dead(unsigned int cpu) 4680{ 4681 free_all_cpu_cached_iovas(cpu); 4682 return 0; 4683} 4684 4685static void intel_disable_iommus(void) 4686{ 4687 struct intel_iommu *iommu = NULL; 4688 struct dmar_drhd_unit *drhd; 4689 4690 for_each_iommu(iommu, drhd) 4691 iommu_disable_translation(iommu); 4692} 4693 4694static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev) 4695{ 4696 struct iommu_device *iommu_dev = dev_to_iommu_device(dev); 4697 4698 return container_of(iommu_dev, struct intel_iommu, iommu); 4699} 4700 4701static ssize_t intel_iommu_show_version(struct device *dev, 4702 struct device_attribute *attr, 4703 char *buf) 4704{ 4705 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 4706 u32 ver = readl(iommu->reg + DMAR_VER_REG); 4707 return sprintf(buf, "%d:%d\n", 4708 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver)); 4709} 4710static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL); 4711 4712static ssize_t intel_iommu_show_address(struct device *dev, 4713 struct device_attribute *attr, 4714 char *buf) 4715{ 4716 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 4717 return sprintf(buf, "%llx\n", iommu->reg_phys); 4718} 4719static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL); 4720 4721static ssize_t intel_iommu_show_cap(struct device *dev, 4722 struct device_attribute *attr, 4723 char *buf) 4724{ 4725 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 4726 return sprintf(buf, "%llx\n", iommu->cap); 4727} 4728static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL); 4729 4730static ssize_t intel_iommu_show_ecap(struct device *dev, 4731 struct device_attribute *attr, 4732 char *buf) 4733{ 4734 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 4735 return sprintf(buf, "%llx\n", iommu->ecap); 4736} 4737static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL); 4738 4739static ssize_t intel_iommu_show_ndoms(struct device *dev, 4740 struct device_attribute *attr, 4741 char *buf) 4742{ 4743 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 4744 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap)); 4745} 4746static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL); 4747 4748static ssize_t intel_iommu_show_ndoms_used(struct device *dev, 4749 struct device_attribute *attr, 4750 char *buf) 4751{ 4752 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 4753 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids, 4754 cap_ndoms(iommu->cap))); 4755} 4756static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL); 4757 4758static struct attribute *intel_iommu_attrs[] = { 4759 &dev_attr_version.attr, 4760 &dev_attr_address.attr, 4761 &dev_attr_cap.attr, 4762 &dev_attr_ecap.attr, 4763 &dev_attr_domains_supported.attr, 4764 &dev_attr_domains_used.attr, 4765 NULL, 4766}; 4767 4768static struct attribute_group intel_iommu_group = { 4769 .name = "intel-iommu", 4770 .attrs = intel_iommu_attrs, 4771}; 4772 4773const struct attribute_group *intel_iommu_groups[] = { 4774 &intel_iommu_group, 4775 NULL, 4776}; 4777 4778static int __init platform_optin_force_iommu(void) 4779{ 4780 struct pci_dev *pdev = NULL; 4781 bool has_untrusted_dev = false; 4782 4783 if (!dmar_platform_optin() || no_platform_optin) 4784 return 0; 4785 4786 for_each_pci_dev(pdev) { 4787 if (pdev->untrusted) { 4788 has_untrusted_dev = true; 4789 break; 4790 } 4791 } 4792 4793 if (!has_untrusted_dev) 4794 return 0; 4795 4796 if (no_iommu || dmar_disabled) 4797 pr_info("Intel-IOMMU force enabled due to platform opt in\n"); 4798 4799 /* 4800 * If Intel-IOMMU is disabled by default, we will apply identity 4801 * map for all devices except those marked as being untrusted. 4802 */ 4803 if (dmar_disabled) 4804 iommu_identity_mapping |= IDENTMAP_ALL; 4805 4806 dmar_disabled = 0; 4807#if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB) 4808 swiotlb = 0; 4809#endif 4810 no_iommu = 0; 4811 4812 return 1; 4813} 4814 4815int __init intel_iommu_init(void) 4816{ 4817 int ret = -ENODEV; 4818 struct dmar_drhd_unit *drhd; 4819 struct intel_iommu *iommu; 4820 4821 /* 4822 * Intel IOMMU is required for a TXT/tboot launch or platform 4823 * opt in, so enforce that. 4824 */ 4825 force_on = tboot_force_iommu() || platform_optin_force_iommu(); 4826 4827 if (iommu_init_mempool()) { 4828 if (force_on) 4829 panic("tboot: Failed to initialize iommu memory\n"); 4830 return -ENOMEM; 4831 } 4832 4833 down_write(&dmar_global_lock); 4834 if (dmar_table_init()) { 4835 if (force_on) 4836 panic("tboot: Failed to initialize DMAR table\n"); 4837 goto out_free_dmar; 4838 } 4839 4840 if (dmar_dev_scope_init() < 0) { 4841 if (force_on) 4842 panic("tboot: Failed to initialize DMAR device scope\n"); 4843 goto out_free_dmar; 4844 } 4845 4846 up_write(&dmar_global_lock); 4847 4848 /* 4849 * The bus notifier takes the dmar_global_lock, so lockdep will 4850 * complain later when we register it under the lock. 4851 */ 4852 dmar_register_bus_notifier(); 4853 4854 down_write(&dmar_global_lock); 4855 4856 if (no_iommu || dmar_disabled) { 4857 /* 4858 * We exit the function here to ensure IOMMU's remapping and 4859 * mempool aren't setup, which means that the IOMMU's PMRs 4860 * won't be disabled via the call to init_dmars(). So disable 4861 * it explicitly here. The PMRs were setup by tboot prior to 4862 * calling SENTER, but the kernel is expected to reset/tear 4863 * down the PMRs. 4864 */ 4865 if (intel_iommu_tboot_noforce) { 4866 for_each_iommu(iommu, drhd) 4867 iommu_disable_protect_mem_regions(iommu); 4868 } 4869 4870 /* 4871 * Make sure the IOMMUs are switched off, even when we 4872 * boot into a kexec kernel and the previous kernel left 4873 * them enabled 4874 */ 4875 intel_disable_iommus(); 4876 goto out_free_dmar; 4877 } 4878 4879 if (list_empty(&dmar_rmrr_units)) 4880 pr_info("No RMRR found\n"); 4881 4882 if (list_empty(&dmar_atsr_units)) 4883 pr_info("No ATSR found\n"); 4884 4885 if (dmar_init_reserved_ranges()) { 4886 if (force_on) 4887 panic("tboot: Failed to reserve iommu ranges\n"); 4888 goto out_free_reserved_range; 4889 } 4890 4891 if (dmar_map_gfx) 4892 intel_iommu_gfx_mapped = 1; 4893 4894 init_no_remapping_devices(); 4895 4896 ret = init_dmars(); 4897 if (ret) { 4898 if (force_on) 4899 panic("tboot: Failed to initialize DMARs\n"); 4900 pr_err("Initialization failed\n"); 4901 goto out_free_reserved_range; 4902 } 4903 up_write(&dmar_global_lock); 4904 pr_info("Intel(R) Virtualization Technology for Directed I/O\n"); 4905 4906#if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB) 4907 swiotlb = 0; 4908#endif 4909 dma_ops = &intel_dma_ops; 4910 4911 init_iommu_pm_ops(); 4912 4913 for_each_active_iommu(iommu, drhd) { 4914 iommu_device_sysfs_add(&iommu->iommu, NULL, 4915 intel_iommu_groups, 4916 "%s", iommu->name); 4917 iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops); 4918 iommu_device_register(&iommu->iommu); 4919 } 4920 4921 bus_set_iommu(&pci_bus_type, &intel_iommu_ops); 4922 bus_register_notifier(&pci_bus_type, &device_nb); 4923 if (si_domain && !hw_pass_through) 4924 register_memory_notifier(&intel_iommu_memory_nb); 4925 cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL, 4926 intel_iommu_cpu_dead); 4927 intel_iommu_enabled = 1; 4928 intel_iommu_debugfs_init(); 4929 4930 return 0; 4931 4932out_free_reserved_range: 4933 put_iova_domain(&reserved_iova_list); 4934out_free_dmar: 4935 intel_iommu_free_dmars(); 4936 up_write(&dmar_global_lock); 4937 iommu_exit_mempool(); 4938 return ret; 4939} 4940 4941static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque) 4942{ 4943 struct intel_iommu *iommu = opaque; 4944 4945 domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff); 4946 return 0; 4947} 4948 4949/* 4950 * NB - intel-iommu lacks any sort of reference counting for the users of 4951 * dependent devices. If multiple endpoints have intersecting dependent 4952 * devices, unbinding the driver from any one of them will possibly leave 4953 * the others unable to operate. 4954 */ 4955static void domain_context_clear(struct intel_iommu *iommu, struct device *dev) 4956{ 4957 if (!iommu || !dev || !dev_is_pci(dev)) 4958 return; 4959 4960 pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu); 4961} 4962 4963static void __dmar_remove_one_dev_info(struct device_domain_info *info) 4964{ 4965 struct intel_iommu *iommu; 4966 unsigned long flags; 4967 4968 assert_spin_locked(&device_domain_lock); 4969 4970 if (WARN_ON(!info)) 4971 return; 4972 4973 iommu = info->iommu; 4974 4975 if (info->dev) { 4976 if (dev_is_pci(info->dev) && sm_supported(iommu)) 4977 intel_pasid_tear_down_entry(iommu, info->dev, 4978 PASID_RID2PASID); 4979 4980 iommu_disable_dev_iotlb(info); 4981 domain_context_clear(iommu, info->dev); 4982 intel_pasid_free_table(info->dev); 4983 } 4984 4985 unlink_domain_info(info); 4986 4987 spin_lock_irqsave(&iommu->lock, flags); 4988 domain_detach_iommu(info->domain, iommu); 4989 spin_unlock_irqrestore(&iommu->lock, flags); 4990 4991 free_devinfo_mem(info); 4992} 4993 4994static void dmar_remove_one_dev_info(struct device *dev) 4995{ 4996 struct device_domain_info *info; 4997 unsigned long flags; 4998 4999 spin_lock_irqsave(&device_domain_lock, flags); 5000 info = dev->archdata.iommu; 5001 __dmar_remove_one_dev_info(info); 5002 spin_unlock_irqrestore(&device_domain_lock, flags); 5003} 5004 5005static int md_domain_init(struct dmar_domain *domain, int guest_width) 5006{ 5007 int adjust_width; 5008 5009 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); 5010 domain_reserve_special_ranges(domain); 5011 5012 /* calculate AGAW */ 5013 domain->gaw = guest_width; 5014 adjust_width = guestwidth_to_adjustwidth(guest_width); 5015 domain->agaw = width_to_agaw(adjust_width); 5016 5017 domain->iommu_coherency = 0; 5018 domain->iommu_snooping = 0; 5019 domain->iommu_superpage = 0; 5020 domain->max_addr = 0; 5021 5022 /* always allocate the top pgd */ 5023 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); 5024 if (!domain->pgd) 5025 return -ENOMEM; 5026 domain_flush_cache(domain, domain->pgd, PAGE_SIZE); 5027 return 0; 5028} 5029 5030static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) 5031{ 5032 struct dmar_domain *dmar_domain; 5033 struct iommu_domain *domain; 5034 5035 if (type != IOMMU_DOMAIN_UNMANAGED) 5036 return NULL; 5037 5038 dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE); 5039 if (!dmar_domain) { 5040 pr_err("Can't allocate dmar_domain\n"); 5041 return NULL; 5042 } 5043 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { 5044 pr_err("Domain initialization failed\n"); 5045 domain_exit(dmar_domain); 5046 return NULL; 5047 } 5048 domain_update_iommu_cap(dmar_domain); 5049 5050 domain = &dmar_domain->domain; 5051 domain->geometry.aperture_start = 0; 5052 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw); 5053 domain->geometry.force_aperture = true; 5054 5055 return domain; 5056} 5057 5058static void intel_iommu_domain_free(struct iommu_domain *domain) 5059{ 5060 domain_exit(to_dmar_domain(domain)); 5061} 5062 5063/* 5064 * Check whether a @domain could be attached to the @dev through the 5065 * aux-domain attach/detach APIs. 5066 */ 5067static inline bool 5068is_aux_domain(struct device *dev, struct iommu_domain *domain) 5069{ 5070 struct device_domain_info *info = dev->archdata.iommu; 5071 5072 return info && info->auxd_enabled && 5073 domain->type == IOMMU_DOMAIN_UNMANAGED; 5074} 5075 5076static void auxiliary_link_device(struct dmar_domain *domain, 5077 struct device *dev) 5078{ 5079 struct device_domain_info *info = dev->archdata.iommu; 5080 5081 assert_spin_locked(&device_domain_lock); 5082 if (WARN_ON(!info)) 5083 return; 5084 5085 domain->auxd_refcnt++; 5086 list_add(&domain->auxd, &info->auxiliary_domains); 5087} 5088 5089static void auxiliary_unlink_device(struct dmar_domain *domain, 5090 struct device *dev) 5091{ 5092 struct device_domain_info *info = dev->archdata.iommu; 5093 5094 assert_spin_locked(&device_domain_lock); 5095 if (WARN_ON(!info)) 5096 return; 5097 5098 list_del(&domain->auxd); 5099 domain->auxd_refcnt--; 5100 5101 if (!domain->auxd_refcnt && domain->default_pasid > 0) 5102 intel_pasid_free_id(domain->default_pasid); 5103} 5104 5105static int aux_domain_add_dev(struct dmar_domain *domain, 5106 struct device *dev) 5107{ 5108 int ret; 5109 u8 bus, devfn; 5110 unsigned long flags; 5111 struct intel_iommu *iommu; 5112 5113 iommu = device_to_iommu(dev, &bus, &devfn); 5114 if (!iommu) 5115 return -ENODEV; 5116 5117 if (domain->default_pasid <= 0) { 5118 int pasid; 5119 5120 pasid = intel_pasid_alloc_id(domain, PASID_MIN, 5121 pci_max_pasids(to_pci_dev(dev)), 5122 GFP_KERNEL); 5123 if (pasid <= 0) { 5124 pr_err("Can't allocate default pasid\n"); 5125 return -ENODEV; 5126 } 5127 domain->default_pasid = pasid; 5128 } 5129 5130 spin_lock_irqsave(&device_domain_lock, flags); 5131 /* 5132 * iommu->lock must be held to attach domain to iommu and setup the 5133 * pasid entry for second level translation. 5134 */ 5135 spin_lock(&iommu->lock); 5136 ret = domain_attach_iommu(domain, iommu); 5137 if (ret) 5138 goto attach_failed; 5139 5140 /* Setup the PASID entry for mediated devices: */ 5141 ret = intel_pasid_setup_second_level(iommu, domain, dev, 5142 domain->default_pasid); 5143 if (ret) 5144 goto table_failed; 5145 spin_unlock(&iommu->lock); 5146 5147 auxiliary_link_device(domain, dev); 5148 5149 spin_unlock_irqrestore(&device_domain_lock, flags); 5150 5151 return 0; 5152 5153table_failed: 5154 domain_detach_iommu(domain, iommu); 5155attach_failed: 5156 spin_unlock(&iommu->lock); 5157 spin_unlock_irqrestore(&device_domain_lock, flags); 5158 if (!domain->auxd_refcnt && domain->default_pasid > 0) 5159 intel_pasid_free_id(domain->default_pasid); 5160 5161 return ret; 5162} 5163 5164static void aux_domain_remove_dev(struct dmar_domain *domain, 5165 struct device *dev) 5166{ 5167 struct device_domain_info *info; 5168 struct intel_iommu *iommu; 5169 unsigned long flags; 5170 5171 if (!is_aux_domain(dev, &domain->domain)) 5172 return; 5173 5174 spin_lock_irqsave(&device_domain_lock, flags); 5175 info = dev->archdata.iommu; 5176 iommu = info->iommu; 5177 5178 auxiliary_unlink_device(domain, dev); 5179 5180 spin_lock(&iommu->lock); 5181 intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid); 5182 domain_detach_iommu(domain, iommu); 5183 spin_unlock(&iommu->lock); 5184 5185 spin_unlock_irqrestore(&device_domain_lock, flags); 5186} 5187 5188static int prepare_domain_attach_device(struct iommu_domain *domain, 5189 struct device *dev) 5190{ 5191 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 5192 struct intel_iommu *iommu; 5193 int addr_width; 5194 u8 bus, devfn; 5195 5196 iommu = device_to_iommu(dev, &bus, &devfn); 5197 if (!iommu) 5198 return -ENODEV; 5199 5200 /* check if this iommu agaw is sufficient for max mapped address */ 5201 addr_width = agaw_to_width(iommu->agaw); 5202 if (addr_width > cap_mgaw(iommu->cap)) 5203 addr_width = cap_mgaw(iommu->cap); 5204 5205 if (dmar_domain->max_addr > (1LL << addr_width)) { 5206 dev_err(dev, "%s: iommu width (%d) is not " 5207 "sufficient for the mapped address (%llx)\n", 5208 __func__, addr_width, dmar_domain->max_addr); 5209 return -EFAULT; 5210 } 5211 dmar_domain->gaw = addr_width; 5212 5213 /* 5214 * Knock out extra levels of page tables if necessary 5215 */ 5216 while (iommu->agaw < dmar_domain->agaw) { 5217 struct dma_pte *pte; 5218 5219 pte = dmar_domain->pgd; 5220 if (dma_pte_present(pte)) { 5221 dmar_domain->pgd = (struct dma_pte *) 5222 phys_to_virt(dma_pte_addr(pte)); 5223 free_pgtable_page(pte); 5224 } 5225 dmar_domain->agaw--; 5226 } 5227 5228 return 0; 5229} 5230 5231static int intel_iommu_attach_device(struct iommu_domain *domain, 5232 struct device *dev) 5233{ 5234 int ret; 5235 5236 if (device_is_rmrr_locked(dev)) { 5237 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n"); 5238 return -EPERM; 5239 } 5240 5241 if (is_aux_domain(dev, domain)) 5242 return -EPERM; 5243 5244 /* normally dev is not mapped */ 5245 if (unlikely(domain_context_mapped(dev))) { 5246 struct dmar_domain *old_domain; 5247 5248 old_domain = find_domain(dev); 5249 if (old_domain) { 5250 rcu_read_lock(); 5251 dmar_remove_one_dev_info(dev); 5252 rcu_read_unlock(); 5253 5254 if (!domain_type_is_vm_or_si(old_domain) && 5255 list_empty(&old_domain->devices)) 5256 domain_exit(old_domain); 5257 } 5258 } 5259 5260 ret = prepare_domain_attach_device(domain, dev); 5261 if (ret) 5262 return ret; 5263 5264 return domain_add_dev_info(to_dmar_domain(domain), dev); 5265} 5266 5267static int intel_iommu_aux_attach_device(struct iommu_domain *domain, 5268 struct device *dev) 5269{ 5270 int ret; 5271 5272 if (!is_aux_domain(dev, domain)) 5273 return -EPERM; 5274 5275 ret = prepare_domain_attach_device(domain, dev); 5276 if (ret) 5277 return ret; 5278 5279 return aux_domain_add_dev(to_dmar_domain(domain), dev); 5280} 5281 5282static void intel_iommu_detach_device(struct iommu_domain *domain, 5283 struct device *dev) 5284{ 5285 dmar_remove_one_dev_info(dev); 5286} 5287 5288static void intel_iommu_aux_detach_device(struct iommu_domain *domain, 5289 struct device *dev) 5290{ 5291 aux_domain_remove_dev(to_dmar_domain(domain), dev); 5292} 5293 5294static int intel_iommu_map(struct iommu_domain *domain, 5295 unsigned long iova, phys_addr_t hpa, 5296 size_t size, int iommu_prot) 5297{ 5298 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 5299 u64 max_addr; 5300 int prot = 0; 5301 int ret; 5302 5303 if (iommu_prot & IOMMU_READ) 5304 prot |= DMA_PTE_READ; 5305 if (iommu_prot & IOMMU_WRITE) 5306 prot |= DMA_PTE_WRITE; 5307 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping) 5308 prot |= DMA_PTE_SNP; 5309 5310 max_addr = iova + size; 5311 if (dmar_domain->max_addr < max_addr) { 5312 u64 end; 5313 5314 /* check if minimum agaw is sufficient for mapped address */ 5315 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1; 5316 if (end < max_addr) { 5317 pr_err("%s: iommu width (%d) is not " 5318 "sufficient for the mapped address (%llx)\n", 5319 __func__, dmar_domain->gaw, max_addr); 5320 return -EFAULT; 5321 } 5322 dmar_domain->max_addr = max_addr; 5323 } 5324 /* Round up size to next multiple of PAGE_SIZE, if it and 5325 the low bits of hpa would take us onto the next page */ 5326 size = aligned_nrpages(hpa, size); 5327 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT, 5328 hpa >> VTD_PAGE_SHIFT, size, prot); 5329 return ret; 5330} 5331 5332static size_t intel_iommu_unmap(struct iommu_domain *domain, 5333 unsigned long iova, size_t size) 5334{ 5335 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 5336 struct page *freelist = NULL; 5337 unsigned long start_pfn, last_pfn; 5338 unsigned int npages; 5339 int iommu_id, level = 0; 5340 5341 /* Cope with horrid API which requires us to unmap more than the 5342 size argument if it happens to be a large-page mapping. */ 5343 BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level)); 5344 5345 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level)) 5346 size = VTD_PAGE_SIZE << level_to_offset_bits(level); 5347 5348 start_pfn = iova >> VTD_PAGE_SHIFT; 5349 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT; 5350 5351 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn); 5352 5353 npages = last_pfn - start_pfn + 1; 5354 5355 for_each_domain_iommu(iommu_id, dmar_domain) 5356 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain, 5357 start_pfn, npages, !freelist, 0); 5358 5359 dma_free_pagelist(freelist); 5360 5361 if (dmar_domain->max_addr == iova + size) 5362 dmar_domain->max_addr = iova; 5363 5364 return size; 5365} 5366 5367static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, 5368 dma_addr_t iova) 5369{ 5370 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 5371 struct dma_pte *pte; 5372 int level = 0; 5373 u64 phys = 0; 5374 5375 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level); 5376 if (pte) 5377 phys = dma_pte_addr(pte); 5378 5379 return phys; 5380} 5381 5382static inline bool scalable_mode_support(void) 5383{ 5384 struct dmar_drhd_unit *drhd; 5385 struct intel_iommu *iommu; 5386 bool ret = true; 5387 5388 rcu_read_lock(); 5389 for_each_active_iommu(iommu, drhd) { 5390 if (!sm_supported(iommu)) { 5391 ret = false; 5392 break; 5393 } 5394 } 5395 rcu_read_unlock(); 5396 5397 return ret; 5398} 5399 5400static inline bool iommu_pasid_support(void) 5401{ 5402 struct dmar_drhd_unit *drhd; 5403 struct intel_iommu *iommu; 5404 bool ret = true; 5405 5406 rcu_read_lock(); 5407 for_each_active_iommu(iommu, drhd) { 5408 if (!pasid_supported(iommu)) { 5409 ret = false; 5410 break; 5411 } 5412 } 5413 rcu_read_unlock(); 5414 5415 return ret; 5416} 5417 5418static bool intel_iommu_capable(enum iommu_cap cap) 5419{ 5420 if (cap == IOMMU_CAP_CACHE_COHERENCY) 5421 return domain_update_iommu_snooping(NULL) == 1; 5422 if (cap == IOMMU_CAP_INTR_REMAP) 5423 return irq_remapping_enabled == 1; 5424 5425 return false; 5426} 5427 5428static int intel_iommu_add_device(struct device *dev) 5429{ 5430 struct intel_iommu *iommu; 5431 struct iommu_group *group; 5432 u8 bus, devfn; 5433 5434 iommu = device_to_iommu(dev, &bus, &devfn); 5435 if (!iommu) 5436 return -ENODEV; 5437 5438 iommu_device_link(&iommu->iommu, dev); 5439 5440 group = iommu_group_get_for_dev(dev); 5441 5442 if (IS_ERR(group)) 5443 return PTR_ERR(group); 5444 5445 iommu_group_put(group); 5446 return 0; 5447} 5448 5449static void intel_iommu_remove_device(struct device *dev) 5450{ 5451 struct intel_iommu *iommu; 5452 u8 bus, devfn; 5453 5454 iommu = device_to_iommu(dev, &bus, &devfn); 5455 if (!iommu) 5456 return; 5457 5458 iommu_group_remove_device(dev); 5459 5460 iommu_device_unlink(&iommu->iommu, dev); 5461} 5462 5463static void intel_iommu_get_resv_regions(struct device *device, 5464 struct list_head *head) 5465{ 5466 struct iommu_resv_region *reg; 5467 struct dmar_rmrr_unit *rmrr; 5468 struct device *i_dev; 5469 int i; 5470 5471 rcu_read_lock(); 5472 for_each_rmrr_units(rmrr) { 5473 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, 5474 i, i_dev) { 5475 if (i_dev != device) 5476 continue; 5477 5478 list_add_tail(&rmrr->resv->list, head); 5479 } 5480 } 5481 rcu_read_unlock(); 5482 5483 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START, 5484 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1, 5485 0, IOMMU_RESV_MSI); 5486 if (!reg) 5487 return; 5488 list_add_tail(&reg->list, head); 5489} 5490 5491static void intel_iommu_put_resv_regions(struct device *dev, 5492 struct list_head *head) 5493{ 5494 struct iommu_resv_region *entry, *next; 5495 5496 list_for_each_entry_safe(entry, next, head, list) { 5497 if (entry->type == IOMMU_RESV_MSI) 5498 kfree(entry); 5499 } 5500} 5501 5502int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev) 5503{ 5504 struct device_domain_info *info; 5505 struct context_entry *context; 5506 struct dmar_domain *domain; 5507 unsigned long flags; 5508 u64 ctx_lo; 5509 int ret; 5510 5511 domain = get_valid_domain_for_dev(dev); 5512 if (!domain) 5513 return -EINVAL; 5514 5515 spin_lock_irqsave(&device_domain_lock, flags); 5516 spin_lock(&iommu->lock); 5517 5518 ret = -EINVAL; 5519 info = dev->archdata.iommu; 5520 if (!info || !info->pasid_supported) 5521 goto out; 5522 5523 context = iommu_context_addr(iommu, info->bus, info->devfn, 0); 5524 if (WARN_ON(!context)) 5525 goto out; 5526 5527 ctx_lo = context[0].lo; 5528 5529 if (!(ctx_lo & CONTEXT_PASIDE)) { 5530 ctx_lo |= CONTEXT_PASIDE; 5531 context[0].lo = ctx_lo; 5532 wmb(); 5533 iommu->flush.flush_context(iommu, 5534 domain->iommu_did[iommu->seq_id], 5535 PCI_DEVID(info->bus, info->devfn), 5536 DMA_CCMD_MASK_NOBIT, 5537 DMA_CCMD_DEVICE_INVL); 5538 } 5539 5540 /* Enable PASID support in the device, if it wasn't already */ 5541 if (!info->pasid_enabled) 5542 iommu_enable_dev_iotlb(info); 5543 5544 ret = 0; 5545 5546 out: 5547 spin_unlock(&iommu->lock); 5548 spin_unlock_irqrestore(&device_domain_lock, flags); 5549 5550 return ret; 5551} 5552 5553#ifdef CONFIG_INTEL_IOMMU_SVM 5554struct intel_iommu *intel_svm_device_to_iommu(struct device *dev) 5555{ 5556 struct intel_iommu *iommu; 5557 u8 bus, devfn; 5558 5559 if (iommu_dummy(dev)) { 5560 dev_warn(dev, 5561 "No IOMMU translation for device; cannot enable SVM\n"); 5562 return NULL; 5563 } 5564 5565 iommu = device_to_iommu(dev, &bus, &devfn); 5566 if ((!iommu)) { 5567 dev_err(dev, "No IOMMU for device; cannot enable SVM\n"); 5568 return NULL; 5569 } 5570 5571 return iommu; 5572} 5573#endif /* CONFIG_INTEL_IOMMU_SVM */ 5574 5575static int intel_iommu_enable_auxd(struct device *dev) 5576{ 5577 struct device_domain_info *info; 5578 struct intel_iommu *iommu; 5579 unsigned long flags; 5580 u8 bus, devfn; 5581 int ret; 5582 5583 iommu = device_to_iommu(dev, &bus, &devfn); 5584 if (!iommu || dmar_disabled) 5585 return -EINVAL; 5586 5587 if (!sm_supported(iommu) || !pasid_supported(iommu)) 5588 return -EINVAL; 5589 5590 ret = intel_iommu_enable_pasid(iommu, dev); 5591 if (ret) 5592 return -ENODEV; 5593 5594 spin_lock_irqsave(&device_domain_lock, flags); 5595 info = dev->archdata.iommu; 5596 info->auxd_enabled = 1; 5597 spin_unlock_irqrestore(&device_domain_lock, flags); 5598 5599 return 0; 5600} 5601 5602static int intel_iommu_disable_auxd(struct device *dev) 5603{ 5604 struct device_domain_info *info; 5605 unsigned long flags; 5606 5607 spin_lock_irqsave(&device_domain_lock, flags); 5608 info = dev->archdata.iommu; 5609 if (!WARN_ON(!info)) 5610 info->auxd_enabled = 0; 5611 spin_unlock_irqrestore(&device_domain_lock, flags); 5612 5613 return 0; 5614} 5615 5616/* 5617 * A PCI express designated vendor specific extended capability is defined 5618 * in the section 3.7 of Intel scalable I/O virtualization technical spec 5619 * for system software and tools to detect endpoint devices supporting the 5620 * Intel scalable IO virtualization without host driver dependency. 5621 * 5622 * Returns the address of the matching extended capability structure within 5623 * the device's PCI configuration space or 0 if the device does not support 5624 * it. 5625 */ 5626static int siov_find_pci_dvsec(struct pci_dev *pdev) 5627{ 5628 int pos; 5629 u16 vendor, id; 5630 5631 pos = pci_find_next_ext_capability(pdev, 0, 0x23); 5632 while (pos) { 5633 pci_read_config_word(pdev, pos + 4, &vendor); 5634 pci_read_config_word(pdev, pos + 8, &id); 5635 if (vendor == PCI_VENDOR_ID_INTEL && id == 5) 5636 return pos; 5637 5638 pos = pci_find_next_ext_capability(pdev, pos, 0x23); 5639 } 5640 5641 return 0; 5642} 5643 5644static bool 5645intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat) 5646{ 5647 if (feat == IOMMU_DEV_FEAT_AUX) { 5648 int ret; 5649 5650 if (!dev_is_pci(dev) || dmar_disabled || 5651 !scalable_mode_support() || !iommu_pasid_support()) 5652 return false; 5653 5654 ret = pci_pasid_features(to_pci_dev(dev)); 5655 if (ret < 0) 5656 return false; 5657 5658 return !!siov_find_pci_dvsec(to_pci_dev(dev)); 5659 } 5660 5661 return false; 5662} 5663 5664static int 5665intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) 5666{ 5667 if (feat == IOMMU_DEV_FEAT_AUX) 5668 return intel_iommu_enable_auxd(dev); 5669 5670 return -ENODEV; 5671} 5672 5673static int 5674intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat) 5675{ 5676 if (feat == IOMMU_DEV_FEAT_AUX) 5677 return intel_iommu_disable_auxd(dev); 5678 5679 return -ENODEV; 5680} 5681 5682static bool 5683intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat) 5684{ 5685 struct device_domain_info *info = dev->archdata.iommu; 5686 5687 if (feat == IOMMU_DEV_FEAT_AUX) 5688 return scalable_mode_support() && info && info->auxd_enabled; 5689 5690 return false; 5691} 5692 5693static int 5694intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev) 5695{ 5696 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 5697 5698 return dmar_domain->default_pasid > 0 ? 5699 dmar_domain->default_pasid : -EINVAL; 5700} 5701 5702const struct iommu_ops intel_iommu_ops = { 5703 .capable = intel_iommu_capable, 5704 .domain_alloc = intel_iommu_domain_alloc, 5705 .domain_free = intel_iommu_domain_free, 5706 .attach_dev = intel_iommu_attach_device, 5707 .detach_dev = intel_iommu_detach_device, 5708 .aux_attach_dev = intel_iommu_aux_attach_device, 5709 .aux_detach_dev = intel_iommu_aux_detach_device, 5710 .aux_get_pasid = intel_iommu_aux_get_pasid, 5711 .map = intel_iommu_map, 5712 .unmap = intel_iommu_unmap, 5713 .iova_to_phys = intel_iommu_iova_to_phys, 5714 .add_device = intel_iommu_add_device, 5715 .remove_device = intel_iommu_remove_device, 5716 .get_resv_regions = intel_iommu_get_resv_regions, 5717 .put_resv_regions = intel_iommu_put_resv_regions, 5718 .device_group = pci_device_group, 5719 .dev_has_feat = intel_iommu_dev_has_feat, 5720 .dev_feat_enabled = intel_iommu_dev_feat_enabled, 5721 .dev_enable_feat = intel_iommu_dev_enable_feat, 5722 .dev_disable_feat = intel_iommu_dev_disable_feat, 5723 .pgsize_bitmap = INTEL_IOMMU_PGSIZES, 5724}; 5725 5726static void quirk_iommu_g4x_gfx(struct pci_dev *dev) 5727{ 5728 /* G4x/GM45 integrated gfx dmar support is totally busted. */ 5729 pci_info(dev, "Disabling IOMMU for graphics on this chipset\n"); 5730 dmar_map_gfx = 0; 5731} 5732 5733DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx); 5734DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx); 5735DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx); 5736DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx); 5737DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx); 5738DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx); 5739DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx); 5740 5741static void quirk_iommu_rwbf(struct pci_dev *dev) 5742{ 5743 /* 5744 * Mobile 4 Series Chipset neglects to set RWBF capability, 5745 * but needs it. Same seems to hold for the desktop versions. 5746 */ 5747 pci_info(dev, "Forcing write-buffer flush capability\n"); 5748 rwbf_quirk = 1; 5749} 5750 5751DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf); 5752DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf); 5753DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf); 5754DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf); 5755DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf); 5756DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf); 5757DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf); 5758 5759#define GGC 0x52 5760#define GGC_MEMORY_SIZE_MASK (0xf << 8) 5761#define GGC_MEMORY_SIZE_NONE (0x0 << 8) 5762#define GGC_MEMORY_SIZE_1M (0x1 << 8) 5763#define GGC_MEMORY_SIZE_2M (0x3 << 8) 5764#define GGC_MEMORY_VT_ENABLED (0x8 << 8) 5765#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8) 5766#define GGC_MEMORY_SIZE_3M_VT (0xa << 8) 5767#define GGC_MEMORY_SIZE_4M_VT (0xb << 8) 5768 5769static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev) 5770{ 5771 unsigned short ggc; 5772 5773 if (pci_read_config_word(dev, GGC, &ggc)) 5774 return; 5775 5776 if (!(ggc & GGC_MEMORY_VT_ENABLED)) { 5777 pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n"); 5778 dmar_map_gfx = 0; 5779 } else if (dmar_map_gfx) { 5780 /* we have to ensure the gfx device is idle before we flush */ 5781 pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n"); 5782 intel_iommu_strict = 1; 5783 } 5784} 5785DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt); 5786DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt); 5787DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt); 5788DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt); 5789 5790/* On Tylersburg chipsets, some BIOSes have been known to enable the 5791 ISOCH DMAR unit for the Azalia sound device, but not give it any 5792 TLB entries, which causes it to deadlock. Check for that. We do 5793 this in a function called from init_dmars(), instead of in a PCI 5794 quirk, because we don't want to print the obnoxious "BIOS broken" 5795 message if VT-d is actually disabled. 5796*/ 5797static void __init check_tylersburg_isoch(void) 5798{ 5799 struct pci_dev *pdev; 5800 uint32_t vtisochctrl; 5801 5802 /* If there's no Azalia in the system anyway, forget it. */ 5803 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL); 5804 if (!pdev) 5805 return; 5806 pci_dev_put(pdev); 5807 5808 /* System Management Registers. Might be hidden, in which case 5809 we can't do the sanity check. But that's OK, because the 5810 known-broken BIOSes _don't_ actually hide it, so far. */ 5811 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL); 5812 if (!pdev) 5813 return; 5814 5815 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) { 5816 pci_dev_put(pdev); 5817 return; 5818 } 5819 5820 pci_dev_put(pdev); 5821 5822 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */ 5823 if (vtisochctrl & 1) 5824 return; 5825 5826 /* Drop all bits other than the number of TLB entries */ 5827 vtisochctrl &= 0x1c; 5828 5829 /* If we have the recommended number of TLB entries (16), fine. */ 5830 if (vtisochctrl == 0x10) 5831 return; 5832 5833 /* Zero TLB entries? You get to ride the short bus to school. */ 5834 if (!vtisochctrl) { 5835 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n" 5836 "BIOS vendor: %s; Ver: %s; Product Version: %s\n", 5837 dmi_get_system_info(DMI_BIOS_VENDOR), 5838 dmi_get_system_info(DMI_BIOS_VERSION), 5839 dmi_get_system_info(DMI_PRODUCT_VERSION)); 5840 iommu_identity_mapping |= IDENTMAP_AZALIA; 5841 return; 5842 } 5843 5844 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n", 5845 vtisochctrl); 5846}