Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright © 2006-2014 Intel Corporation.
4 *
5 * Authors: David Woodhouse <dwmw2@infradead.org>,
6 * Ashok Raj <ashok.raj@intel.com>,
7 * Shaohua Li <shaohua.li@intel.com>,
8 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
9 * Fenghua Yu <fenghua.yu@intel.com>
10 * Joerg Roedel <jroedel@suse.de>
11 */
12
13#define pr_fmt(fmt) "DMAR: " fmt
14#define dev_fmt(fmt) pr_fmt(fmt)
15
16#include <linux/init.h>
17#include <linux/bitmap.h>
18#include <linux/debugfs.h>
19#include <linux/export.h>
20#include <linux/slab.h>
21#include <linux/irq.h>
22#include <linux/interrupt.h>
23#include <linux/spinlock.h>
24#include <linux/pci.h>
25#include <linux/dmar.h>
26#include <linux/dma-mapping.h>
27#include <linux/mempool.h>
28#include <linux/memory.h>
29#include <linux/cpu.h>
30#include <linux/timer.h>
31#include <linux/io.h>
32#include <linux/iova.h>
33#include <linux/iommu.h>
34#include <linux/intel-iommu.h>
35#include <linux/syscore_ops.h>
36#include <linux/tboot.h>
37#include <linux/dmi.h>
38#include <linux/pci-ats.h>
39#include <linux/memblock.h>
40#include <linux/dma-contiguous.h>
41#include <linux/dma-direct.h>
42#include <linux/crash_dump.h>
43#include <linux/numa.h>
44#include <asm/irq_remapping.h>
45#include <asm/cacheflush.h>
46#include <asm/iommu.h>
47
48#include "irq_remapping.h"
49#include "intel-pasid.h"
50
51#define ROOT_SIZE VTD_PAGE_SIZE
52#define CONTEXT_SIZE VTD_PAGE_SIZE
53
54#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
55#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
56#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
57#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
58
59#define IOAPIC_RANGE_START (0xfee00000)
60#define IOAPIC_RANGE_END (0xfeefffff)
61#define IOVA_START_ADDR (0x1000)
62
63#define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
64
65#define MAX_AGAW_WIDTH 64
66#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
67
68#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
69#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
70
71/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
72 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
73#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
74 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
75#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
76
77/* IO virtual address start page frame number */
78#define IOVA_START_PFN (1)
79
80#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
81
82/* page table handling */
83#define LEVEL_STRIDE (9)
84#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
85
86/*
87 * This bitmap is used to advertise the page sizes our hardware support
88 * to the IOMMU core, which will then use this information to split
89 * physically contiguous memory regions it is mapping into page sizes
90 * that we support.
91 *
92 * Traditionally the IOMMU core just handed us the mappings directly,
93 * after making sure the size is an order of a 4KiB page and that the
94 * mapping has natural alignment.
95 *
96 * To retain this behavior, we currently advertise that we support
97 * all page sizes that are an order of 4KiB.
98 *
99 * If at some point we'd like to utilize the IOMMU core's new behavior,
100 * we could change this to advertise the real page sizes we support.
101 */
102#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
103
104static inline int agaw_to_level(int agaw)
105{
106 return agaw + 2;
107}
108
109static inline int agaw_to_width(int agaw)
110{
111 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
112}
113
114static inline int width_to_agaw(int width)
115{
116 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
117}
118
119static inline unsigned int level_to_offset_bits(int level)
120{
121 return (level - 1) * LEVEL_STRIDE;
122}
123
124static inline int pfn_level_offset(unsigned long pfn, int level)
125{
126 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
127}
128
129static inline unsigned long level_mask(int level)
130{
131 return -1UL << level_to_offset_bits(level);
132}
133
134static inline unsigned long level_size(int level)
135{
136 return 1UL << level_to_offset_bits(level);
137}
138
139static inline unsigned long align_to_level(unsigned long pfn, int level)
140{
141 return (pfn + level_size(level) - 1) & level_mask(level);
142}
143
144static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
145{
146 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
147}
148
149/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
150 are never going to work. */
151static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
152{
153 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
154}
155
156static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
157{
158 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
159}
160static inline unsigned long page_to_dma_pfn(struct page *pg)
161{
162 return mm_to_dma_pfn(page_to_pfn(pg));
163}
164static inline unsigned long virt_to_dma_pfn(void *p)
165{
166 return page_to_dma_pfn(virt_to_page(p));
167}
168
169/* global iommu list, set NULL for ignored DMAR units */
170static struct intel_iommu **g_iommus;
171
172static void __init check_tylersburg_isoch(void);
173static int rwbf_quirk;
174
175/*
176 * set to 1 to panic kernel if can't successfully enable VT-d
177 * (used when kernel is launched w/ TXT)
178 */
179static int force_on = 0;
180int intel_iommu_tboot_noforce;
181static int no_platform_optin;
182
183#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
184
185/*
186 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
187 * if marked present.
188 */
189static phys_addr_t root_entry_lctp(struct root_entry *re)
190{
191 if (!(re->lo & 1))
192 return 0;
193
194 return re->lo & VTD_PAGE_MASK;
195}
196
197/*
198 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
199 * if marked present.
200 */
201static phys_addr_t root_entry_uctp(struct root_entry *re)
202{
203 if (!(re->hi & 1))
204 return 0;
205
206 return re->hi & VTD_PAGE_MASK;
207}
208
209static inline void context_clear_pasid_enable(struct context_entry *context)
210{
211 context->lo &= ~(1ULL << 11);
212}
213
214static inline bool context_pasid_enabled(struct context_entry *context)
215{
216 return !!(context->lo & (1ULL << 11));
217}
218
219static inline void context_set_copied(struct context_entry *context)
220{
221 context->hi |= (1ull << 3);
222}
223
224static inline bool context_copied(struct context_entry *context)
225{
226 return !!(context->hi & (1ULL << 3));
227}
228
229static inline bool __context_present(struct context_entry *context)
230{
231 return (context->lo & 1);
232}
233
234bool context_present(struct context_entry *context)
235{
236 return context_pasid_enabled(context) ?
237 __context_present(context) :
238 __context_present(context) && !context_copied(context);
239}
240
241static inline void context_set_present(struct context_entry *context)
242{
243 context->lo |= 1;
244}
245
246static inline void context_set_fault_enable(struct context_entry *context)
247{
248 context->lo &= (((u64)-1) << 2) | 1;
249}
250
251static inline void context_set_translation_type(struct context_entry *context,
252 unsigned long value)
253{
254 context->lo &= (((u64)-1) << 4) | 3;
255 context->lo |= (value & 3) << 2;
256}
257
258static inline void context_set_address_root(struct context_entry *context,
259 unsigned long value)
260{
261 context->lo &= ~VTD_PAGE_MASK;
262 context->lo |= value & VTD_PAGE_MASK;
263}
264
265static inline void context_set_address_width(struct context_entry *context,
266 unsigned long value)
267{
268 context->hi |= value & 7;
269}
270
271static inline void context_set_domain_id(struct context_entry *context,
272 unsigned long value)
273{
274 context->hi |= (value & ((1 << 16) - 1)) << 8;
275}
276
277static inline int context_domain_id(struct context_entry *c)
278{
279 return((c->hi >> 8) & 0xffff);
280}
281
282static inline void context_clear_entry(struct context_entry *context)
283{
284 context->lo = 0;
285 context->hi = 0;
286}
287
288/*
289 * This domain is a statically identity mapping domain.
290 * 1. This domain creats a static 1:1 mapping to all usable memory.
291 * 2. It maps to each iommu if successful.
292 * 3. Each iommu mapps to this domain if successful.
293 */
294static struct dmar_domain *si_domain;
295static int hw_pass_through = 1;
296
297/*
298 * Domain represents a virtual machine, more than one devices
299 * across iommus may be owned in one domain, e.g. kvm guest.
300 */
301#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 0)
302
303/* si_domain contains mulitple devices */
304#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 1)
305
306#define for_each_domain_iommu(idx, domain) \
307 for (idx = 0; idx < g_num_of_iommus; idx++) \
308 if (domain->iommu_refcnt[idx])
309
310struct dmar_rmrr_unit {
311 struct list_head list; /* list of rmrr units */
312 struct acpi_dmar_header *hdr; /* ACPI header */
313 u64 base_address; /* reserved base address*/
314 u64 end_address; /* reserved end address */
315 struct dmar_dev_scope *devices; /* target devices */
316 int devices_cnt; /* target device count */
317 struct iommu_resv_region *resv; /* reserved region handle */
318};
319
320struct dmar_atsr_unit {
321 struct list_head list; /* list of ATSR units */
322 struct acpi_dmar_header *hdr; /* ACPI header */
323 struct dmar_dev_scope *devices; /* target devices */
324 int devices_cnt; /* target device count */
325 u8 include_all:1; /* include all ports */
326};
327
328static LIST_HEAD(dmar_atsr_units);
329static LIST_HEAD(dmar_rmrr_units);
330
331#define for_each_rmrr_units(rmrr) \
332 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
333
334/* bitmap for indexing intel_iommus */
335static int g_num_of_iommus;
336
337static void domain_exit(struct dmar_domain *domain);
338static void domain_remove_dev_info(struct dmar_domain *domain);
339static void dmar_remove_one_dev_info(struct device *dev);
340static void __dmar_remove_one_dev_info(struct device_domain_info *info);
341static void domain_context_clear(struct intel_iommu *iommu,
342 struct device *dev);
343static int domain_detach_iommu(struct dmar_domain *domain,
344 struct intel_iommu *iommu);
345
346#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
347int dmar_disabled = 0;
348#else
349int dmar_disabled = 1;
350#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
351
352int intel_iommu_enabled = 0;
353EXPORT_SYMBOL_GPL(intel_iommu_enabled);
354
355static int dmar_map_gfx = 1;
356static int dmar_forcedac;
357static int intel_iommu_strict;
358static int intel_iommu_superpage = 1;
359static int intel_iommu_sm;
360static int iommu_identity_mapping;
361
362#define IDENTMAP_ALL 1
363#define IDENTMAP_GFX 2
364#define IDENTMAP_AZALIA 4
365
366#define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap))
367#define pasid_supported(iommu) (sm_supported(iommu) && \
368 ecap_pasid((iommu)->ecap))
369
370int intel_iommu_gfx_mapped;
371EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
372
373#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
374static DEFINE_SPINLOCK(device_domain_lock);
375static LIST_HEAD(device_domain_list);
376
377/*
378 * Iterate over elements in device_domain_list and call the specified
379 * callback @fn against each element.
380 */
381int for_each_device_domain(int (*fn)(struct device_domain_info *info,
382 void *data), void *data)
383{
384 int ret = 0;
385 unsigned long flags;
386 struct device_domain_info *info;
387
388 spin_lock_irqsave(&device_domain_lock, flags);
389 list_for_each_entry(info, &device_domain_list, global) {
390 ret = fn(info, data);
391 if (ret) {
392 spin_unlock_irqrestore(&device_domain_lock, flags);
393 return ret;
394 }
395 }
396 spin_unlock_irqrestore(&device_domain_lock, flags);
397
398 return 0;
399}
400
401const struct iommu_ops intel_iommu_ops;
402
403static bool translation_pre_enabled(struct intel_iommu *iommu)
404{
405 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
406}
407
408static void clear_translation_pre_enabled(struct intel_iommu *iommu)
409{
410 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
411}
412
413static void init_translation_status(struct intel_iommu *iommu)
414{
415 u32 gsts;
416
417 gsts = readl(iommu->reg + DMAR_GSTS_REG);
418 if (gsts & DMA_GSTS_TES)
419 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
420}
421
422/* Convert generic 'struct iommu_domain to private struct dmar_domain */
423static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
424{
425 return container_of(dom, struct dmar_domain, domain);
426}
427
428static int __init intel_iommu_setup(char *str)
429{
430 if (!str)
431 return -EINVAL;
432 while (*str) {
433 if (!strncmp(str, "on", 2)) {
434 dmar_disabled = 0;
435 pr_info("IOMMU enabled\n");
436 } else if (!strncmp(str, "off", 3)) {
437 dmar_disabled = 1;
438 no_platform_optin = 1;
439 pr_info("IOMMU disabled\n");
440 } else if (!strncmp(str, "igfx_off", 8)) {
441 dmar_map_gfx = 0;
442 pr_info("Disable GFX device mapping\n");
443 } else if (!strncmp(str, "forcedac", 8)) {
444 pr_info("Forcing DAC for PCI devices\n");
445 dmar_forcedac = 1;
446 } else if (!strncmp(str, "strict", 6)) {
447 pr_info("Disable batched IOTLB flush\n");
448 intel_iommu_strict = 1;
449 } else if (!strncmp(str, "sp_off", 6)) {
450 pr_info("Disable supported super page\n");
451 intel_iommu_superpage = 0;
452 } else if (!strncmp(str, "sm_on", 5)) {
453 pr_info("Intel-IOMMU: scalable mode supported\n");
454 intel_iommu_sm = 1;
455 } else if (!strncmp(str, "tboot_noforce", 13)) {
456 printk(KERN_INFO
457 "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
458 intel_iommu_tboot_noforce = 1;
459 }
460
461 str += strcspn(str, ",");
462 while (*str == ',')
463 str++;
464 }
465 return 0;
466}
467__setup("intel_iommu=", intel_iommu_setup);
468
469static struct kmem_cache *iommu_domain_cache;
470static struct kmem_cache *iommu_devinfo_cache;
471
472static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
473{
474 struct dmar_domain **domains;
475 int idx = did >> 8;
476
477 domains = iommu->domains[idx];
478 if (!domains)
479 return NULL;
480
481 return domains[did & 0xff];
482}
483
484static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
485 struct dmar_domain *domain)
486{
487 struct dmar_domain **domains;
488 int idx = did >> 8;
489
490 if (!iommu->domains[idx]) {
491 size_t size = 256 * sizeof(struct dmar_domain *);
492 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
493 }
494
495 domains = iommu->domains[idx];
496 if (WARN_ON(!domains))
497 return;
498 else
499 domains[did & 0xff] = domain;
500}
501
502void *alloc_pgtable_page(int node)
503{
504 struct page *page;
505 void *vaddr = NULL;
506
507 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
508 if (page)
509 vaddr = page_address(page);
510 return vaddr;
511}
512
513void free_pgtable_page(void *vaddr)
514{
515 free_page((unsigned long)vaddr);
516}
517
518static inline void *alloc_domain_mem(void)
519{
520 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
521}
522
523static void free_domain_mem(void *vaddr)
524{
525 kmem_cache_free(iommu_domain_cache, vaddr);
526}
527
528static inline void * alloc_devinfo_mem(void)
529{
530 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
531}
532
533static inline void free_devinfo_mem(void *vaddr)
534{
535 kmem_cache_free(iommu_devinfo_cache, vaddr);
536}
537
538static inline int domain_type_is_vm(struct dmar_domain *domain)
539{
540 return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
541}
542
543static inline int domain_type_is_si(struct dmar_domain *domain)
544{
545 return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
546}
547
548static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
549{
550 return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
551 DOMAIN_FLAG_STATIC_IDENTITY);
552}
553
554static inline int domain_pfn_supported(struct dmar_domain *domain,
555 unsigned long pfn)
556{
557 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
558
559 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
560}
561
562static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
563{
564 unsigned long sagaw;
565 int agaw = -1;
566
567 sagaw = cap_sagaw(iommu->cap);
568 for (agaw = width_to_agaw(max_gaw);
569 agaw >= 0; agaw--) {
570 if (test_bit(agaw, &sagaw))
571 break;
572 }
573
574 return agaw;
575}
576
577/*
578 * Calculate max SAGAW for each iommu.
579 */
580int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
581{
582 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
583}
584
585/*
586 * calculate agaw for each iommu.
587 * "SAGAW" may be different across iommus, use a default agaw, and
588 * get a supported less agaw for iommus that don't support the default agaw.
589 */
590int iommu_calculate_agaw(struct intel_iommu *iommu)
591{
592 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
593}
594
595/* This functionin only returns single iommu in a domain */
596struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
597{
598 int iommu_id;
599
600 /* si_domain and vm domain should not get here. */
601 BUG_ON(domain_type_is_vm_or_si(domain));
602 for_each_domain_iommu(iommu_id, domain)
603 break;
604
605 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
606 return NULL;
607
608 return g_iommus[iommu_id];
609}
610
611static void domain_update_iommu_coherency(struct dmar_domain *domain)
612{
613 struct dmar_drhd_unit *drhd;
614 struct intel_iommu *iommu;
615 bool found = false;
616 int i;
617
618 domain->iommu_coherency = 1;
619
620 for_each_domain_iommu(i, domain) {
621 found = true;
622 if (!ecap_coherent(g_iommus[i]->ecap)) {
623 domain->iommu_coherency = 0;
624 break;
625 }
626 }
627 if (found)
628 return;
629
630 /* No hardware attached; use lowest common denominator */
631 rcu_read_lock();
632 for_each_active_iommu(iommu, drhd) {
633 if (!ecap_coherent(iommu->ecap)) {
634 domain->iommu_coherency = 0;
635 break;
636 }
637 }
638 rcu_read_unlock();
639}
640
641static int domain_update_iommu_snooping(struct intel_iommu *skip)
642{
643 struct dmar_drhd_unit *drhd;
644 struct intel_iommu *iommu;
645 int ret = 1;
646
647 rcu_read_lock();
648 for_each_active_iommu(iommu, drhd) {
649 if (iommu != skip) {
650 if (!ecap_sc_support(iommu->ecap)) {
651 ret = 0;
652 break;
653 }
654 }
655 }
656 rcu_read_unlock();
657
658 return ret;
659}
660
661static int domain_update_iommu_superpage(struct intel_iommu *skip)
662{
663 struct dmar_drhd_unit *drhd;
664 struct intel_iommu *iommu;
665 int mask = 0xf;
666
667 if (!intel_iommu_superpage) {
668 return 0;
669 }
670
671 /* set iommu_superpage to the smallest common denominator */
672 rcu_read_lock();
673 for_each_active_iommu(iommu, drhd) {
674 if (iommu != skip) {
675 mask &= cap_super_page_val(iommu->cap);
676 if (!mask)
677 break;
678 }
679 }
680 rcu_read_unlock();
681
682 return fls(mask);
683}
684
685/* Some capabilities may be different across iommus */
686static void domain_update_iommu_cap(struct dmar_domain *domain)
687{
688 domain_update_iommu_coherency(domain);
689 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
690 domain->iommu_superpage = domain_update_iommu_superpage(NULL);
691}
692
693struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
694 u8 devfn, int alloc)
695{
696 struct root_entry *root = &iommu->root_entry[bus];
697 struct context_entry *context;
698 u64 *entry;
699
700 entry = &root->lo;
701 if (sm_supported(iommu)) {
702 if (devfn >= 0x80) {
703 devfn -= 0x80;
704 entry = &root->hi;
705 }
706 devfn *= 2;
707 }
708 if (*entry & 1)
709 context = phys_to_virt(*entry & VTD_PAGE_MASK);
710 else {
711 unsigned long phy_addr;
712 if (!alloc)
713 return NULL;
714
715 context = alloc_pgtable_page(iommu->node);
716 if (!context)
717 return NULL;
718
719 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
720 phy_addr = virt_to_phys((void *)context);
721 *entry = phy_addr | 1;
722 __iommu_flush_cache(iommu, entry, sizeof(*entry));
723 }
724 return &context[devfn];
725}
726
727static int iommu_dummy(struct device *dev)
728{
729 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
730}
731
732static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
733{
734 struct dmar_drhd_unit *drhd = NULL;
735 struct intel_iommu *iommu;
736 struct device *tmp;
737 struct pci_dev *ptmp, *pdev = NULL;
738 u16 segment = 0;
739 int i;
740
741 if (iommu_dummy(dev))
742 return NULL;
743
744 if (dev_is_pci(dev)) {
745 struct pci_dev *pf_pdev;
746
747 pdev = to_pci_dev(dev);
748
749#ifdef CONFIG_X86
750 /* VMD child devices currently cannot be handled individually */
751 if (is_vmd(pdev->bus))
752 return NULL;
753#endif
754
755 /* VFs aren't listed in scope tables; we need to look up
756 * the PF instead to find the IOMMU. */
757 pf_pdev = pci_physfn(pdev);
758 dev = &pf_pdev->dev;
759 segment = pci_domain_nr(pdev->bus);
760 } else if (has_acpi_companion(dev))
761 dev = &ACPI_COMPANION(dev)->dev;
762
763 rcu_read_lock();
764 for_each_active_iommu(iommu, drhd) {
765 if (pdev && segment != drhd->segment)
766 continue;
767
768 for_each_active_dev_scope(drhd->devices,
769 drhd->devices_cnt, i, tmp) {
770 if (tmp == dev) {
771 /* For a VF use its original BDF# not that of the PF
772 * which we used for the IOMMU lookup. Strictly speaking
773 * we could do this for all PCI devices; we only need to
774 * get the BDF# from the scope table for ACPI matches. */
775 if (pdev && pdev->is_virtfn)
776 goto got_pdev;
777
778 *bus = drhd->devices[i].bus;
779 *devfn = drhd->devices[i].devfn;
780 goto out;
781 }
782
783 if (!pdev || !dev_is_pci(tmp))
784 continue;
785
786 ptmp = to_pci_dev(tmp);
787 if (ptmp->subordinate &&
788 ptmp->subordinate->number <= pdev->bus->number &&
789 ptmp->subordinate->busn_res.end >= pdev->bus->number)
790 goto got_pdev;
791 }
792
793 if (pdev && drhd->include_all) {
794 got_pdev:
795 *bus = pdev->bus->number;
796 *devfn = pdev->devfn;
797 goto out;
798 }
799 }
800 iommu = NULL;
801 out:
802 rcu_read_unlock();
803
804 return iommu;
805}
806
807static void domain_flush_cache(struct dmar_domain *domain,
808 void *addr, int size)
809{
810 if (!domain->iommu_coherency)
811 clflush_cache_range(addr, size);
812}
813
814static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
815{
816 struct context_entry *context;
817 int ret = 0;
818 unsigned long flags;
819
820 spin_lock_irqsave(&iommu->lock, flags);
821 context = iommu_context_addr(iommu, bus, devfn, 0);
822 if (context)
823 ret = context_present(context);
824 spin_unlock_irqrestore(&iommu->lock, flags);
825 return ret;
826}
827
828static void free_context_table(struct intel_iommu *iommu)
829{
830 int i;
831 unsigned long flags;
832 struct context_entry *context;
833
834 spin_lock_irqsave(&iommu->lock, flags);
835 if (!iommu->root_entry) {
836 goto out;
837 }
838 for (i = 0; i < ROOT_ENTRY_NR; i++) {
839 context = iommu_context_addr(iommu, i, 0, 0);
840 if (context)
841 free_pgtable_page(context);
842
843 if (!sm_supported(iommu))
844 continue;
845
846 context = iommu_context_addr(iommu, i, 0x80, 0);
847 if (context)
848 free_pgtable_page(context);
849
850 }
851 free_pgtable_page(iommu->root_entry);
852 iommu->root_entry = NULL;
853out:
854 spin_unlock_irqrestore(&iommu->lock, flags);
855}
856
857static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
858 unsigned long pfn, int *target_level)
859{
860 struct dma_pte *parent, *pte;
861 int level = agaw_to_level(domain->agaw);
862 int offset;
863
864 BUG_ON(!domain->pgd);
865
866 if (!domain_pfn_supported(domain, pfn))
867 /* Address beyond IOMMU's addressing capabilities. */
868 return NULL;
869
870 parent = domain->pgd;
871
872 while (1) {
873 void *tmp_page;
874
875 offset = pfn_level_offset(pfn, level);
876 pte = &parent[offset];
877 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
878 break;
879 if (level == *target_level)
880 break;
881
882 if (!dma_pte_present(pte)) {
883 uint64_t pteval;
884
885 tmp_page = alloc_pgtable_page(domain->nid);
886
887 if (!tmp_page)
888 return NULL;
889
890 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
891 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
892 if (cmpxchg64(&pte->val, 0ULL, pteval))
893 /* Someone else set it while we were thinking; use theirs. */
894 free_pgtable_page(tmp_page);
895 else
896 domain_flush_cache(domain, pte, sizeof(*pte));
897 }
898 if (level == 1)
899 break;
900
901 parent = phys_to_virt(dma_pte_addr(pte));
902 level--;
903 }
904
905 if (!*target_level)
906 *target_level = level;
907
908 return pte;
909}
910
911
912/* return address's pte at specific level */
913static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
914 unsigned long pfn,
915 int level, int *large_page)
916{
917 struct dma_pte *parent, *pte;
918 int total = agaw_to_level(domain->agaw);
919 int offset;
920
921 parent = domain->pgd;
922 while (level <= total) {
923 offset = pfn_level_offset(pfn, total);
924 pte = &parent[offset];
925 if (level == total)
926 return pte;
927
928 if (!dma_pte_present(pte)) {
929 *large_page = total;
930 break;
931 }
932
933 if (dma_pte_superpage(pte)) {
934 *large_page = total;
935 return pte;
936 }
937
938 parent = phys_to_virt(dma_pte_addr(pte));
939 total--;
940 }
941 return NULL;
942}
943
944/* clear last level pte, a tlb flush should be followed */
945static void dma_pte_clear_range(struct dmar_domain *domain,
946 unsigned long start_pfn,
947 unsigned long last_pfn)
948{
949 unsigned int large_page;
950 struct dma_pte *first_pte, *pte;
951
952 BUG_ON(!domain_pfn_supported(domain, start_pfn));
953 BUG_ON(!domain_pfn_supported(domain, last_pfn));
954 BUG_ON(start_pfn > last_pfn);
955
956 /* we don't need lock here; nobody else touches the iova range */
957 do {
958 large_page = 1;
959 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
960 if (!pte) {
961 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
962 continue;
963 }
964 do {
965 dma_clear_pte(pte);
966 start_pfn += lvl_to_nr_pages(large_page);
967 pte++;
968 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
969
970 domain_flush_cache(domain, first_pte,
971 (void *)pte - (void *)first_pte);
972
973 } while (start_pfn && start_pfn <= last_pfn);
974}
975
976static void dma_pte_free_level(struct dmar_domain *domain, int level,
977 int retain_level, struct dma_pte *pte,
978 unsigned long pfn, unsigned long start_pfn,
979 unsigned long last_pfn)
980{
981 pfn = max(start_pfn, pfn);
982 pte = &pte[pfn_level_offset(pfn, level)];
983
984 do {
985 unsigned long level_pfn;
986 struct dma_pte *level_pte;
987
988 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
989 goto next;
990
991 level_pfn = pfn & level_mask(level);
992 level_pte = phys_to_virt(dma_pte_addr(pte));
993
994 if (level > 2) {
995 dma_pte_free_level(domain, level - 1, retain_level,
996 level_pte, level_pfn, start_pfn,
997 last_pfn);
998 }
999
1000 /*
1001 * Free the page table if we're below the level we want to
1002 * retain and the range covers the entire table.
1003 */
1004 if (level < retain_level && !(start_pfn > level_pfn ||
1005 last_pfn < level_pfn + level_size(level) - 1)) {
1006 dma_clear_pte(pte);
1007 domain_flush_cache(domain, pte, sizeof(*pte));
1008 free_pgtable_page(level_pte);
1009 }
1010next:
1011 pfn += level_size(level);
1012 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1013}
1014
1015/*
1016 * clear last level (leaf) ptes and free page table pages below the
1017 * level we wish to keep intact.
1018 */
1019static void dma_pte_free_pagetable(struct dmar_domain *domain,
1020 unsigned long start_pfn,
1021 unsigned long last_pfn,
1022 int retain_level)
1023{
1024 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1025 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1026 BUG_ON(start_pfn > last_pfn);
1027
1028 dma_pte_clear_range(domain, start_pfn, last_pfn);
1029
1030 /* We don't need lock here; nobody else touches the iova range */
1031 dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
1032 domain->pgd, 0, start_pfn, last_pfn);
1033
1034 /* free pgd */
1035 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1036 free_pgtable_page(domain->pgd);
1037 domain->pgd = NULL;
1038 }
1039}
1040
1041/* When a page at a given level is being unlinked from its parent, we don't
1042 need to *modify* it at all. All we need to do is make a list of all the
1043 pages which can be freed just as soon as we've flushed the IOTLB and we
1044 know the hardware page-walk will no longer touch them.
1045 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1046 be freed. */
1047static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1048 int level, struct dma_pte *pte,
1049 struct page *freelist)
1050{
1051 struct page *pg;
1052
1053 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1054 pg->freelist = freelist;
1055 freelist = pg;
1056
1057 if (level == 1)
1058 return freelist;
1059
1060 pte = page_address(pg);
1061 do {
1062 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1063 freelist = dma_pte_list_pagetables(domain, level - 1,
1064 pte, freelist);
1065 pte++;
1066 } while (!first_pte_in_page(pte));
1067
1068 return freelist;
1069}
1070
1071static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1072 struct dma_pte *pte, unsigned long pfn,
1073 unsigned long start_pfn,
1074 unsigned long last_pfn,
1075 struct page *freelist)
1076{
1077 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1078
1079 pfn = max(start_pfn, pfn);
1080 pte = &pte[pfn_level_offset(pfn, level)];
1081
1082 do {
1083 unsigned long level_pfn;
1084
1085 if (!dma_pte_present(pte))
1086 goto next;
1087
1088 level_pfn = pfn & level_mask(level);
1089
1090 /* If range covers entire pagetable, free it */
1091 if (start_pfn <= level_pfn &&
1092 last_pfn >= level_pfn + level_size(level) - 1) {
1093 /* These suborbinate page tables are going away entirely. Don't
1094 bother to clear them; we're just going to *free* them. */
1095 if (level > 1 && !dma_pte_superpage(pte))
1096 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1097
1098 dma_clear_pte(pte);
1099 if (!first_pte)
1100 first_pte = pte;
1101 last_pte = pte;
1102 } else if (level > 1) {
1103 /* Recurse down into a level that isn't *entirely* obsolete */
1104 freelist = dma_pte_clear_level(domain, level - 1,
1105 phys_to_virt(dma_pte_addr(pte)),
1106 level_pfn, start_pfn, last_pfn,
1107 freelist);
1108 }
1109next:
1110 pfn += level_size(level);
1111 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1112
1113 if (first_pte)
1114 domain_flush_cache(domain, first_pte,
1115 (void *)++last_pte - (void *)first_pte);
1116
1117 return freelist;
1118}
1119
1120/* We can't just free the pages because the IOMMU may still be walking
1121 the page tables, and may have cached the intermediate levels. The
1122 pages can only be freed after the IOTLB flush has been done. */
1123static struct page *domain_unmap(struct dmar_domain *domain,
1124 unsigned long start_pfn,
1125 unsigned long last_pfn)
1126{
1127 struct page *freelist;
1128
1129 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1130 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1131 BUG_ON(start_pfn > last_pfn);
1132
1133 /* we don't need lock here; nobody else touches the iova range */
1134 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1135 domain->pgd, 0, start_pfn, last_pfn, NULL);
1136
1137 /* free pgd */
1138 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1139 struct page *pgd_page = virt_to_page(domain->pgd);
1140 pgd_page->freelist = freelist;
1141 freelist = pgd_page;
1142
1143 domain->pgd = NULL;
1144 }
1145
1146 return freelist;
1147}
1148
1149static void dma_free_pagelist(struct page *freelist)
1150{
1151 struct page *pg;
1152
1153 while ((pg = freelist)) {
1154 freelist = pg->freelist;
1155 free_pgtable_page(page_address(pg));
1156 }
1157}
1158
1159static void iova_entry_free(unsigned long data)
1160{
1161 struct page *freelist = (struct page *)data;
1162
1163 dma_free_pagelist(freelist);
1164}
1165
1166/* iommu handling */
1167static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1168{
1169 struct root_entry *root;
1170 unsigned long flags;
1171
1172 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1173 if (!root) {
1174 pr_err("Allocating root entry for %s failed\n",
1175 iommu->name);
1176 return -ENOMEM;
1177 }
1178
1179 __iommu_flush_cache(iommu, root, ROOT_SIZE);
1180
1181 spin_lock_irqsave(&iommu->lock, flags);
1182 iommu->root_entry = root;
1183 spin_unlock_irqrestore(&iommu->lock, flags);
1184
1185 return 0;
1186}
1187
1188static void iommu_set_root_entry(struct intel_iommu *iommu)
1189{
1190 u64 addr;
1191 u32 sts;
1192 unsigned long flag;
1193
1194 addr = virt_to_phys(iommu->root_entry);
1195 if (sm_supported(iommu))
1196 addr |= DMA_RTADDR_SMT;
1197
1198 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1199 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
1200
1201 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1202
1203 /* Make sure hardware complete it */
1204 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1205 readl, (sts & DMA_GSTS_RTPS), sts);
1206
1207 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1208}
1209
1210void iommu_flush_write_buffer(struct intel_iommu *iommu)
1211{
1212 u32 val;
1213 unsigned long flag;
1214
1215 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1216 return;
1217
1218 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1219 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1220
1221 /* Make sure hardware complete it */
1222 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1223 readl, (!(val & DMA_GSTS_WBFS)), val);
1224
1225 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1226}
1227
1228/* return value determine if we need a write buffer flush */
1229static void __iommu_flush_context(struct intel_iommu *iommu,
1230 u16 did, u16 source_id, u8 function_mask,
1231 u64 type)
1232{
1233 u64 val = 0;
1234 unsigned long flag;
1235
1236 switch (type) {
1237 case DMA_CCMD_GLOBAL_INVL:
1238 val = DMA_CCMD_GLOBAL_INVL;
1239 break;
1240 case DMA_CCMD_DOMAIN_INVL:
1241 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1242 break;
1243 case DMA_CCMD_DEVICE_INVL:
1244 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1245 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1246 break;
1247 default:
1248 BUG();
1249 }
1250 val |= DMA_CCMD_ICC;
1251
1252 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1253 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1254
1255 /* Make sure hardware complete it */
1256 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1257 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1258
1259 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1260}
1261
1262/* return value determine if we need a write buffer flush */
1263static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1264 u64 addr, unsigned int size_order, u64 type)
1265{
1266 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1267 u64 val = 0, val_iva = 0;
1268 unsigned long flag;
1269
1270 switch (type) {
1271 case DMA_TLB_GLOBAL_FLUSH:
1272 /* global flush doesn't need set IVA_REG */
1273 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1274 break;
1275 case DMA_TLB_DSI_FLUSH:
1276 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1277 break;
1278 case DMA_TLB_PSI_FLUSH:
1279 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1280 /* IH bit is passed in as part of address */
1281 val_iva = size_order | addr;
1282 break;
1283 default:
1284 BUG();
1285 }
1286 /* Note: set drain read/write */
1287#if 0
1288 /*
1289 * This is probably to be super secure.. Looks like we can
1290 * ignore it without any impact.
1291 */
1292 if (cap_read_drain(iommu->cap))
1293 val |= DMA_TLB_READ_DRAIN;
1294#endif
1295 if (cap_write_drain(iommu->cap))
1296 val |= DMA_TLB_WRITE_DRAIN;
1297
1298 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1299 /* Note: Only uses first TLB reg currently */
1300 if (val_iva)
1301 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1302 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1303
1304 /* Make sure hardware complete it */
1305 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1306 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1307
1308 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1309
1310 /* check IOTLB invalidation granularity */
1311 if (DMA_TLB_IAIG(val) == 0)
1312 pr_err("Flush IOTLB failed\n");
1313 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1314 pr_debug("TLB flush request %Lx, actual %Lx\n",
1315 (unsigned long long)DMA_TLB_IIRG(type),
1316 (unsigned long long)DMA_TLB_IAIG(val));
1317}
1318
1319static struct device_domain_info *
1320iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1321 u8 bus, u8 devfn)
1322{
1323 struct device_domain_info *info;
1324
1325 assert_spin_locked(&device_domain_lock);
1326
1327 if (!iommu->qi)
1328 return NULL;
1329
1330 list_for_each_entry(info, &domain->devices, link)
1331 if (info->iommu == iommu && info->bus == bus &&
1332 info->devfn == devfn) {
1333 if (info->ats_supported && info->dev)
1334 return info;
1335 break;
1336 }
1337
1338 return NULL;
1339}
1340
1341static void domain_update_iotlb(struct dmar_domain *domain)
1342{
1343 struct device_domain_info *info;
1344 bool has_iotlb_device = false;
1345
1346 assert_spin_locked(&device_domain_lock);
1347
1348 list_for_each_entry(info, &domain->devices, link) {
1349 struct pci_dev *pdev;
1350
1351 if (!info->dev || !dev_is_pci(info->dev))
1352 continue;
1353
1354 pdev = to_pci_dev(info->dev);
1355 if (pdev->ats_enabled) {
1356 has_iotlb_device = true;
1357 break;
1358 }
1359 }
1360
1361 domain->has_iotlb_device = has_iotlb_device;
1362}
1363
1364static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1365{
1366 struct pci_dev *pdev;
1367
1368 assert_spin_locked(&device_domain_lock);
1369
1370 if (!info || !dev_is_pci(info->dev))
1371 return;
1372
1373 pdev = to_pci_dev(info->dev);
1374 /* For IOMMU that supports device IOTLB throttling (DIT), we assign
1375 * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
1376 * queue depth at PF level. If DIT is not set, PFSID will be treated as
1377 * reserved, which should be set to 0.
1378 */
1379 if (!ecap_dit(info->iommu->ecap))
1380 info->pfsid = 0;
1381 else {
1382 struct pci_dev *pf_pdev;
1383
1384 /* pdev will be returned if device is not a vf */
1385 pf_pdev = pci_physfn(pdev);
1386 info->pfsid = pci_dev_id(pf_pdev);
1387 }
1388
1389#ifdef CONFIG_INTEL_IOMMU_SVM
1390 /* The PCIe spec, in its wisdom, declares that the behaviour of
1391 the device if you enable PASID support after ATS support is
1392 undefined. So always enable PASID support on devices which
1393 have it, even if we can't yet know if we're ever going to
1394 use it. */
1395 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1396 info->pasid_enabled = 1;
1397
1398 if (info->pri_supported &&
1399 (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1) &&
1400 !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
1401 info->pri_enabled = 1;
1402#endif
1403 if (!pdev->untrusted && info->ats_supported &&
1404 pci_ats_page_aligned(pdev) &&
1405 !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1406 info->ats_enabled = 1;
1407 domain_update_iotlb(info->domain);
1408 info->ats_qdep = pci_ats_queue_depth(pdev);
1409 }
1410}
1411
1412static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1413{
1414 struct pci_dev *pdev;
1415
1416 assert_spin_locked(&device_domain_lock);
1417
1418 if (!dev_is_pci(info->dev))
1419 return;
1420
1421 pdev = to_pci_dev(info->dev);
1422
1423 if (info->ats_enabled) {
1424 pci_disable_ats(pdev);
1425 info->ats_enabled = 0;
1426 domain_update_iotlb(info->domain);
1427 }
1428#ifdef CONFIG_INTEL_IOMMU_SVM
1429 if (info->pri_enabled) {
1430 pci_disable_pri(pdev);
1431 info->pri_enabled = 0;
1432 }
1433 if (info->pasid_enabled) {
1434 pci_disable_pasid(pdev);
1435 info->pasid_enabled = 0;
1436 }
1437#endif
1438}
1439
1440static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1441 u64 addr, unsigned mask)
1442{
1443 u16 sid, qdep;
1444 unsigned long flags;
1445 struct device_domain_info *info;
1446
1447 if (!domain->has_iotlb_device)
1448 return;
1449
1450 spin_lock_irqsave(&device_domain_lock, flags);
1451 list_for_each_entry(info, &domain->devices, link) {
1452 if (!info->ats_enabled)
1453 continue;
1454
1455 sid = info->bus << 8 | info->devfn;
1456 qdep = info->ats_qdep;
1457 qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
1458 qdep, addr, mask);
1459 }
1460 spin_unlock_irqrestore(&device_domain_lock, flags);
1461}
1462
1463static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1464 struct dmar_domain *domain,
1465 unsigned long pfn, unsigned int pages,
1466 int ih, int map)
1467{
1468 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1469 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1470 u16 did = domain->iommu_did[iommu->seq_id];
1471
1472 BUG_ON(pages == 0);
1473
1474 if (ih)
1475 ih = 1 << 6;
1476 /*
1477 * Fallback to domain selective flush if no PSI support or the size is
1478 * too big.
1479 * PSI requires page size to be 2 ^ x, and the base address is naturally
1480 * aligned to the size
1481 */
1482 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1483 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1484 DMA_TLB_DSI_FLUSH);
1485 else
1486 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1487 DMA_TLB_PSI_FLUSH);
1488
1489 /*
1490 * In caching mode, changes of pages from non-present to present require
1491 * flush. However, device IOTLB doesn't need to be flushed in this case.
1492 */
1493 if (!cap_caching_mode(iommu->cap) || !map)
1494 iommu_flush_dev_iotlb(domain, addr, mask);
1495}
1496
1497/* Notification for newly created mappings */
1498static inline void __mapping_notify_one(struct intel_iommu *iommu,
1499 struct dmar_domain *domain,
1500 unsigned long pfn, unsigned int pages)
1501{
1502 /* It's a non-present to present mapping. Only flush if caching mode */
1503 if (cap_caching_mode(iommu->cap))
1504 iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
1505 else
1506 iommu_flush_write_buffer(iommu);
1507}
1508
1509static void iommu_flush_iova(struct iova_domain *iovad)
1510{
1511 struct dmar_domain *domain;
1512 int idx;
1513
1514 domain = container_of(iovad, struct dmar_domain, iovad);
1515
1516 for_each_domain_iommu(idx, domain) {
1517 struct intel_iommu *iommu = g_iommus[idx];
1518 u16 did = domain->iommu_did[iommu->seq_id];
1519
1520 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
1521
1522 if (!cap_caching_mode(iommu->cap))
1523 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1524 0, MAX_AGAW_PFN_WIDTH);
1525 }
1526}
1527
1528static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1529{
1530 u32 pmen;
1531 unsigned long flags;
1532
1533 if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap))
1534 return;
1535
1536 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1537 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1538 pmen &= ~DMA_PMEN_EPM;
1539 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1540
1541 /* wait for the protected region status bit to clear */
1542 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1543 readl, !(pmen & DMA_PMEN_PRS), pmen);
1544
1545 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1546}
1547
1548static void iommu_enable_translation(struct intel_iommu *iommu)
1549{
1550 u32 sts;
1551 unsigned long flags;
1552
1553 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1554 iommu->gcmd |= DMA_GCMD_TE;
1555 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1556
1557 /* Make sure hardware complete it */
1558 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1559 readl, (sts & DMA_GSTS_TES), sts);
1560
1561 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1562}
1563
1564static void iommu_disable_translation(struct intel_iommu *iommu)
1565{
1566 u32 sts;
1567 unsigned long flag;
1568
1569 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1570 iommu->gcmd &= ~DMA_GCMD_TE;
1571 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1572
1573 /* Make sure hardware complete it */
1574 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1575 readl, (!(sts & DMA_GSTS_TES)), sts);
1576
1577 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1578}
1579
1580
1581static int iommu_init_domains(struct intel_iommu *iommu)
1582{
1583 u32 ndomains, nlongs;
1584 size_t size;
1585
1586 ndomains = cap_ndoms(iommu->cap);
1587 pr_debug("%s: Number of Domains supported <%d>\n",
1588 iommu->name, ndomains);
1589 nlongs = BITS_TO_LONGS(ndomains);
1590
1591 spin_lock_init(&iommu->lock);
1592
1593 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1594 if (!iommu->domain_ids) {
1595 pr_err("%s: Allocating domain id array failed\n",
1596 iommu->name);
1597 return -ENOMEM;
1598 }
1599
1600 size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
1601 iommu->domains = kzalloc(size, GFP_KERNEL);
1602
1603 if (iommu->domains) {
1604 size = 256 * sizeof(struct dmar_domain *);
1605 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1606 }
1607
1608 if (!iommu->domains || !iommu->domains[0]) {
1609 pr_err("%s: Allocating domain array failed\n",
1610 iommu->name);
1611 kfree(iommu->domain_ids);
1612 kfree(iommu->domains);
1613 iommu->domain_ids = NULL;
1614 iommu->domains = NULL;
1615 return -ENOMEM;
1616 }
1617
1618
1619
1620 /*
1621 * If Caching mode is set, then invalid translations are tagged
1622 * with domain-id 0, hence we need to pre-allocate it. We also
1623 * use domain-id 0 as a marker for non-allocated domain-id, so
1624 * make sure it is not used for a real domain.
1625 */
1626 set_bit(0, iommu->domain_ids);
1627
1628 /*
1629 * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
1630 * entry for first-level or pass-through translation modes should
1631 * be programmed with a domain id different from those used for
1632 * second-level or nested translation. We reserve a domain id for
1633 * this purpose.
1634 */
1635 if (sm_supported(iommu))
1636 set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
1637
1638 return 0;
1639}
1640
1641static void disable_dmar_iommu(struct intel_iommu *iommu)
1642{
1643 struct device_domain_info *info, *tmp;
1644 unsigned long flags;
1645
1646 if (!iommu->domains || !iommu->domain_ids)
1647 return;
1648
1649again:
1650 spin_lock_irqsave(&device_domain_lock, flags);
1651 list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1652 struct dmar_domain *domain;
1653
1654 if (info->iommu != iommu)
1655 continue;
1656
1657 if (!info->dev || !info->domain)
1658 continue;
1659
1660 domain = info->domain;
1661
1662 __dmar_remove_one_dev_info(info);
1663
1664 if (!domain_type_is_vm_or_si(domain)) {
1665 /*
1666 * The domain_exit() function can't be called under
1667 * device_domain_lock, as it takes this lock itself.
1668 * So release the lock here and re-run the loop
1669 * afterwards.
1670 */
1671 spin_unlock_irqrestore(&device_domain_lock, flags);
1672 domain_exit(domain);
1673 goto again;
1674 }
1675 }
1676 spin_unlock_irqrestore(&device_domain_lock, flags);
1677
1678 if (iommu->gcmd & DMA_GCMD_TE)
1679 iommu_disable_translation(iommu);
1680}
1681
1682static void free_dmar_iommu(struct intel_iommu *iommu)
1683{
1684 if ((iommu->domains) && (iommu->domain_ids)) {
1685 int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
1686 int i;
1687
1688 for (i = 0; i < elems; i++)
1689 kfree(iommu->domains[i]);
1690 kfree(iommu->domains);
1691 kfree(iommu->domain_ids);
1692 iommu->domains = NULL;
1693 iommu->domain_ids = NULL;
1694 }
1695
1696 g_iommus[iommu->seq_id] = NULL;
1697
1698 /* free context mapping */
1699 free_context_table(iommu);
1700
1701#ifdef CONFIG_INTEL_IOMMU_SVM
1702 if (pasid_supported(iommu)) {
1703 if (ecap_prs(iommu->ecap))
1704 intel_svm_finish_prq(iommu);
1705 }
1706#endif
1707}
1708
1709static struct dmar_domain *alloc_domain(int flags)
1710{
1711 struct dmar_domain *domain;
1712
1713 domain = alloc_domain_mem();
1714 if (!domain)
1715 return NULL;
1716
1717 memset(domain, 0, sizeof(*domain));
1718 domain->nid = NUMA_NO_NODE;
1719 domain->flags = flags;
1720 domain->has_iotlb_device = false;
1721 INIT_LIST_HEAD(&domain->devices);
1722
1723 return domain;
1724}
1725
1726/* Must be called with iommu->lock */
1727static int domain_attach_iommu(struct dmar_domain *domain,
1728 struct intel_iommu *iommu)
1729{
1730 unsigned long ndomains;
1731 int num;
1732
1733 assert_spin_locked(&device_domain_lock);
1734 assert_spin_locked(&iommu->lock);
1735
1736 domain->iommu_refcnt[iommu->seq_id] += 1;
1737 domain->iommu_count += 1;
1738 if (domain->iommu_refcnt[iommu->seq_id] == 1) {
1739 ndomains = cap_ndoms(iommu->cap);
1740 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1741
1742 if (num >= ndomains) {
1743 pr_err("%s: No free domain ids\n", iommu->name);
1744 domain->iommu_refcnt[iommu->seq_id] -= 1;
1745 domain->iommu_count -= 1;
1746 return -ENOSPC;
1747 }
1748
1749 set_bit(num, iommu->domain_ids);
1750 set_iommu_domain(iommu, num, domain);
1751
1752 domain->iommu_did[iommu->seq_id] = num;
1753 domain->nid = iommu->node;
1754
1755 domain_update_iommu_cap(domain);
1756 }
1757
1758 return 0;
1759}
1760
1761static int domain_detach_iommu(struct dmar_domain *domain,
1762 struct intel_iommu *iommu)
1763{
1764 int num, count;
1765
1766 assert_spin_locked(&device_domain_lock);
1767 assert_spin_locked(&iommu->lock);
1768
1769 domain->iommu_refcnt[iommu->seq_id] -= 1;
1770 count = --domain->iommu_count;
1771 if (domain->iommu_refcnt[iommu->seq_id] == 0) {
1772 num = domain->iommu_did[iommu->seq_id];
1773 clear_bit(num, iommu->domain_ids);
1774 set_iommu_domain(iommu, num, NULL);
1775
1776 domain_update_iommu_cap(domain);
1777 domain->iommu_did[iommu->seq_id] = 0;
1778 }
1779
1780 return count;
1781}
1782
1783static struct iova_domain reserved_iova_list;
1784static struct lock_class_key reserved_rbtree_key;
1785
1786static int dmar_init_reserved_ranges(void)
1787{
1788 struct pci_dev *pdev = NULL;
1789 struct iova *iova;
1790 int i;
1791
1792 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN);
1793
1794 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1795 &reserved_rbtree_key);
1796
1797 /* IOAPIC ranges shouldn't be accessed by DMA */
1798 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1799 IOVA_PFN(IOAPIC_RANGE_END));
1800 if (!iova) {
1801 pr_err("Reserve IOAPIC range failed\n");
1802 return -ENODEV;
1803 }
1804
1805 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1806 for_each_pci_dev(pdev) {
1807 struct resource *r;
1808
1809 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1810 r = &pdev->resource[i];
1811 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1812 continue;
1813 iova = reserve_iova(&reserved_iova_list,
1814 IOVA_PFN(r->start),
1815 IOVA_PFN(r->end));
1816 if (!iova) {
1817 pci_err(pdev, "Reserve iova for %pR failed\n", r);
1818 return -ENODEV;
1819 }
1820 }
1821 }
1822 return 0;
1823}
1824
1825static void domain_reserve_special_ranges(struct dmar_domain *domain)
1826{
1827 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1828}
1829
1830static inline int guestwidth_to_adjustwidth(int gaw)
1831{
1832 int agaw;
1833 int r = (gaw - 12) % 9;
1834
1835 if (r == 0)
1836 agaw = gaw;
1837 else
1838 agaw = gaw + 9 - r;
1839 if (agaw > 64)
1840 agaw = 64;
1841 return agaw;
1842}
1843
1844static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1845 int guest_width)
1846{
1847 int adjust_width, agaw;
1848 unsigned long sagaw;
1849 int err;
1850
1851 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
1852
1853 err = init_iova_flush_queue(&domain->iovad,
1854 iommu_flush_iova, iova_entry_free);
1855 if (err)
1856 return err;
1857
1858 domain_reserve_special_ranges(domain);
1859
1860 /* calculate AGAW */
1861 if (guest_width > cap_mgaw(iommu->cap))
1862 guest_width = cap_mgaw(iommu->cap);
1863 domain->gaw = guest_width;
1864 adjust_width = guestwidth_to_adjustwidth(guest_width);
1865 agaw = width_to_agaw(adjust_width);
1866 sagaw = cap_sagaw(iommu->cap);
1867 if (!test_bit(agaw, &sagaw)) {
1868 /* hardware doesn't support it, choose a bigger one */
1869 pr_debug("Hardware doesn't support agaw %d\n", agaw);
1870 agaw = find_next_bit(&sagaw, 5, agaw);
1871 if (agaw >= 5)
1872 return -ENODEV;
1873 }
1874 domain->agaw = agaw;
1875
1876 if (ecap_coherent(iommu->ecap))
1877 domain->iommu_coherency = 1;
1878 else
1879 domain->iommu_coherency = 0;
1880
1881 if (ecap_sc_support(iommu->ecap))
1882 domain->iommu_snooping = 1;
1883 else
1884 domain->iommu_snooping = 0;
1885
1886 if (intel_iommu_superpage)
1887 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1888 else
1889 domain->iommu_superpage = 0;
1890
1891 domain->nid = iommu->node;
1892
1893 /* always allocate the top pgd */
1894 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1895 if (!domain->pgd)
1896 return -ENOMEM;
1897 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1898 return 0;
1899}
1900
1901static void domain_exit(struct dmar_domain *domain)
1902{
1903 struct page *freelist;
1904
1905 /* Remove associated devices and clear attached or cached domains */
1906 rcu_read_lock();
1907 domain_remove_dev_info(domain);
1908 rcu_read_unlock();
1909
1910 /* destroy iovas */
1911 put_iova_domain(&domain->iovad);
1912
1913 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1914
1915 dma_free_pagelist(freelist);
1916
1917 free_domain_mem(domain);
1918}
1919
1920/*
1921 * Get the PASID directory size for scalable mode context entry.
1922 * Value of X in the PDTS field of a scalable mode context entry
1923 * indicates PASID directory with 2^(X + 7) entries.
1924 */
1925static inline unsigned long context_get_sm_pds(struct pasid_table *table)
1926{
1927 int pds, max_pde;
1928
1929 max_pde = table->max_pasid >> PASID_PDE_SHIFT;
1930 pds = find_first_bit((unsigned long *)&max_pde, MAX_NR_PASID_BITS);
1931 if (pds < 7)
1932 return 0;
1933
1934 return pds - 7;
1935}
1936
1937/*
1938 * Set the RID_PASID field of a scalable mode context entry. The
1939 * IOMMU hardware will use the PASID value set in this field for
1940 * DMA translations of DMA requests without PASID.
1941 */
1942static inline void
1943context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
1944{
1945 context->hi |= pasid & ((1 << 20) - 1);
1946 context->hi |= (1 << 20);
1947}
1948
1949/*
1950 * Set the DTE(Device-TLB Enable) field of a scalable mode context
1951 * entry.
1952 */
1953static inline void context_set_sm_dte(struct context_entry *context)
1954{
1955 context->lo |= (1 << 2);
1956}
1957
1958/*
1959 * Set the PRE(Page Request Enable) field of a scalable mode context
1960 * entry.
1961 */
1962static inline void context_set_sm_pre(struct context_entry *context)
1963{
1964 context->lo |= (1 << 4);
1965}
1966
1967/* Convert value to context PASID directory size field coding. */
1968#define context_pdts(pds) (((pds) & 0x7) << 9)
1969
1970static int domain_context_mapping_one(struct dmar_domain *domain,
1971 struct intel_iommu *iommu,
1972 struct pasid_table *table,
1973 u8 bus, u8 devfn)
1974{
1975 u16 did = domain->iommu_did[iommu->seq_id];
1976 int translation = CONTEXT_TT_MULTI_LEVEL;
1977 struct device_domain_info *info = NULL;
1978 struct context_entry *context;
1979 unsigned long flags;
1980 int ret;
1981
1982 WARN_ON(did == 0);
1983
1984 if (hw_pass_through && domain_type_is_si(domain))
1985 translation = CONTEXT_TT_PASS_THROUGH;
1986
1987 pr_debug("Set context mapping for %02x:%02x.%d\n",
1988 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1989
1990 BUG_ON(!domain->pgd);
1991
1992 spin_lock_irqsave(&device_domain_lock, flags);
1993 spin_lock(&iommu->lock);
1994
1995 ret = -ENOMEM;
1996 context = iommu_context_addr(iommu, bus, devfn, 1);
1997 if (!context)
1998 goto out_unlock;
1999
2000 ret = 0;
2001 if (context_present(context))
2002 goto out_unlock;
2003
2004 /*
2005 * For kdump cases, old valid entries may be cached due to the
2006 * in-flight DMA and copied pgtable, but there is no unmapping
2007 * behaviour for them, thus we need an explicit cache flush for
2008 * the newly-mapped device. For kdump, at this point, the device
2009 * is supposed to finish reset at its driver probe stage, so no
2010 * in-flight DMA will exist, and we don't need to worry anymore
2011 * hereafter.
2012 */
2013 if (context_copied(context)) {
2014 u16 did_old = context_domain_id(context);
2015
2016 if (did_old < cap_ndoms(iommu->cap)) {
2017 iommu->flush.flush_context(iommu, did_old,
2018 (((u16)bus) << 8) | devfn,
2019 DMA_CCMD_MASK_NOBIT,
2020 DMA_CCMD_DEVICE_INVL);
2021 iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
2022 DMA_TLB_DSI_FLUSH);
2023 }
2024 }
2025
2026 context_clear_entry(context);
2027
2028 if (sm_supported(iommu)) {
2029 unsigned long pds;
2030
2031 WARN_ON(!table);
2032
2033 /* Setup the PASID DIR pointer: */
2034 pds = context_get_sm_pds(table);
2035 context->lo = (u64)virt_to_phys(table->table) |
2036 context_pdts(pds);
2037
2038 /* Setup the RID_PASID field: */
2039 context_set_sm_rid2pasid(context, PASID_RID2PASID);
2040
2041 /*
2042 * Setup the Device-TLB enable bit and Page request
2043 * Enable bit:
2044 */
2045 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2046 if (info && info->ats_supported)
2047 context_set_sm_dte(context);
2048 if (info && info->pri_supported)
2049 context_set_sm_pre(context);
2050 } else {
2051 struct dma_pte *pgd = domain->pgd;
2052 int agaw;
2053
2054 context_set_domain_id(context, did);
2055
2056 if (translation != CONTEXT_TT_PASS_THROUGH) {
2057 /*
2058 * Skip top levels of page tables for iommu which has
2059 * less agaw than default. Unnecessary for PT mode.
2060 */
2061 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2062 ret = -ENOMEM;
2063 pgd = phys_to_virt(dma_pte_addr(pgd));
2064 if (!dma_pte_present(pgd))
2065 goto out_unlock;
2066 }
2067
2068 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2069 if (info && info->ats_supported)
2070 translation = CONTEXT_TT_DEV_IOTLB;
2071 else
2072 translation = CONTEXT_TT_MULTI_LEVEL;
2073
2074 context_set_address_root(context, virt_to_phys(pgd));
2075 context_set_address_width(context, agaw);
2076 } else {
2077 /*
2078 * In pass through mode, AW must be programmed to
2079 * indicate the largest AGAW value supported by
2080 * hardware. And ASR is ignored by hardware.
2081 */
2082 context_set_address_width(context, iommu->msagaw);
2083 }
2084
2085 context_set_translation_type(context, translation);
2086 }
2087
2088 context_set_fault_enable(context);
2089 context_set_present(context);
2090 domain_flush_cache(domain, context, sizeof(*context));
2091
2092 /*
2093 * It's a non-present to present mapping. If hardware doesn't cache
2094 * non-present entry we only need to flush the write-buffer. If the
2095 * _does_ cache non-present entries, then it does so in the special
2096 * domain #0, which we have to flush:
2097 */
2098 if (cap_caching_mode(iommu->cap)) {
2099 iommu->flush.flush_context(iommu, 0,
2100 (((u16)bus) << 8) | devfn,
2101 DMA_CCMD_MASK_NOBIT,
2102 DMA_CCMD_DEVICE_INVL);
2103 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
2104 } else {
2105 iommu_flush_write_buffer(iommu);
2106 }
2107 iommu_enable_dev_iotlb(info);
2108
2109 ret = 0;
2110
2111out_unlock:
2112 spin_unlock(&iommu->lock);
2113 spin_unlock_irqrestore(&device_domain_lock, flags);
2114
2115 return ret;
2116}
2117
2118struct domain_context_mapping_data {
2119 struct dmar_domain *domain;
2120 struct intel_iommu *iommu;
2121 struct pasid_table *table;
2122};
2123
2124static int domain_context_mapping_cb(struct pci_dev *pdev,
2125 u16 alias, void *opaque)
2126{
2127 struct domain_context_mapping_data *data = opaque;
2128
2129 return domain_context_mapping_one(data->domain, data->iommu,
2130 data->table, PCI_BUS_NUM(alias),
2131 alias & 0xff);
2132}
2133
2134static int
2135domain_context_mapping(struct dmar_domain *domain, struct device *dev)
2136{
2137 struct domain_context_mapping_data data;
2138 struct pasid_table *table;
2139 struct intel_iommu *iommu;
2140 u8 bus, devfn;
2141
2142 iommu = device_to_iommu(dev, &bus, &devfn);
2143 if (!iommu)
2144 return -ENODEV;
2145
2146 table = intel_pasid_get_table(dev);
2147
2148 if (!dev_is_pci(dev))
2149 return domain_context_mapping_one(domain, iommu, table,
2150 bus, devfn);
2151
2152 data.domain = domain;
2153 data.iommu = iommu;
2154 data.table = table;
2155
2156 return pci_for_each_dma_alias(to_pci_dev(dev),
2157 &domain_context_mapping_cb, &data);
2158}
2159
2160static int domain_context_mapped_cb(struct pci_dev *pdev,
2161 u16 alias, void *opaque)
2162{
2163 struct intel_iommu *iommu = opaque;
2164
2165 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
2166}
2167
2168static int domain_context_mapped(struct device *dev)
2169{
2170 struct intel_iommu *iommu;
2171 u8 bus, devfn;
2172
2173 iommu = device_to_iommu(dev, &bus, &devfn);
2174 if (!iommu)
2175 return -ENODEV;
2176
2177 if (!dev_is_pci(dev))
2178 return device_context_mapped(iommu, bus, devfn);
2179
2180 return !pci_for_each_dma_alias(to_pci_dev(dev),
2181 domain_context_mapped_cb, iommu);
2182}
2183
2184/* Returns a number of VTD pages, but aligned to MM page size */
2185static inline unsigned long aligned_nrpages(unsigned long host_addr,
2186 size_t size)
2187{
2188 host_addr &= ~PAGE_MASK;
2189 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2190}
2191
2192/* Return largest possible superpage level for a given mapping */
2193static inline int hardware_largepage_caps(struct dmar_domain *domain,
2194 unsigned long iov_pfn,
2195 unsigned long phy_pfn,
2196 unsigned long pages)
2197{
2198 int support, level = 1;
2199 unsigned long pfnmerge;
2200
2201 support = domain->iommu_superpage;
2202
2203 /* To use a large page, the virtual *and* physical addresses
2204 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2205 of them will mean we have to use smaller pages. So just
2206 merge them and check both at once. */
2207 pfnmerge = iov_pfn | phy_pfn;
2208
2209 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2210 pages >>= VTD_STRIDE_SHIFT;
2211 if (!pages)
2212 break;
2213 pfnmerge >>= VTD_STRIDE_SHIFT;
2214 level++;
2215 support--;
2216 }
2217 return level;
2218}
2219
2220static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2221 struct scatterlist *sg, unsigned long phys_pfn,
2222 unsigned long nr_pages, int prot)
2223{
2224 struct dma_pte *first_pte = NULL, *pte = NULL;
2225 phys_addr_t uninitialized_var(pteval);
2226 unsigned long sg_res = 0;
2227 unsigned int largepage_lvl = 0;
2228 unsigned long lvl_pages = 0;
2229
2230 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
2231
2232 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2233 return -EINVAL;
2234
2235 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2236
2237 if (!sg) {
2238 sg_res = nr_pages;
2239 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2240 }
2241
2242 while (nr_pages > 0) {
2243 uint64_t tmp;
2244
2245 if (!sg_res) {
2246 unsigned int pgoff = sg->offset & ~PAGE_MASK;
2247
2248 sg_res = aligned_nrpages(sg->offset, sg->length);
2249 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
2250 sg->dma_length = sg->length;
2251 pteval = (sg_phys(sg) - pgoff) | prot;
2252 phys_pfn = pteval >> VTD_PAGE_SHIFT;
2253 }
2254
2255 if (!pte) {
2256 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2257
2258 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2259 if (!pte)
2260 return -ENOMEM;
2261 /* It is large page*/
2262 if (largepage_lvl > 1) {
2263 unsigned long nr_superpages, end_pfn;
2264
2265 pteval |= DMA_PTE_LARGE_PAGE;
2266 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2267
2268 nr_superpages = sg_res / lvl_pages;
2269 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2270
2271 /*
2272 * Ensure that old small page tables are
2273 * removed to make room for superpage(s).
2274 * We're adding new large pages, so make sure
2275 * we don't remove their parent tables.
2276 */
2277 dma_pte_free_pagetable(domain, iov_pfn, end_pfn,
2278 largepage_lvl + 1);
2279 } else {
2280 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2281 }
2282
2283 }
2284 /* We don't need lock here, nobody else
2285 * touches the iova range
2286 */
2287 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2288 if (tmp) {
2289 static int dumps = 5;
2290 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2291 iov_pfn, tmp, (unsigned long long)pteval);
2292 if (dumps) {
2293 dumps--;
2294 debug_dma_dump_mappings(NULL);
2295 }
2296 WARN_ON(1);
2297 }
2298
2299 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2300
2301 BUG_ON(nr_pages < lvl_pages);
2302 BUG_ON(sg_res < lvl_pages);
2303
2304 nr_pages -= lvl_pages;
2305 iov_pfn += lvl_pages;
2306 phys_pfn += lvl_pages;
2307 pteval += lvl_pages * VTD_PAGE_SIZE;
2308 sg_res -= lvl_pages;
2309
2310 /* If the next PTE would be the first in a new page, then we
2311 need to flush the cache on the entries we've just written.
2312 And then we'll need to recalculate 'pte', so clear it and
2313 let it get set again in the if (!pte) block above.
2314
2315 If we're done (!nr_pages) we need to flush the cache too.
2316
2317 Also if we've been setting superpages, we may need to
2318 recalculate 'pte' and switch back to smaller pages for the
2319 end of the mapping, if the trailing size is not enough to
2320 use another superpage (i.e. sg_res < lvl_pages). */
2321 pte++;
2322 if (!nr_pages || first_pte_in_page(pte) ||
2323 (largepage_lvl > 1 && sg_res < lvl_pages)) {
2324 domain_flush_cache(domain, first_pte,
2325 (void *)pte - (void *)first_pte);
2326 pte = NULL;
2327 }
2328
2329 if (!sg_res && nr_pages)
2330 sg = sg_next(sg);
2331 }
2332 return 0;
2333}
2334
2335static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2336 struct scatterlist *sg, unsigned long phys_pfn,
2337 unsigned long nr_pages, int prot)
2338{
2339 int ret;
2340 struct intel_iommu *iommu;
2341
2342 /* Do the real mapping first */
2343 ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot);
2344 if (ret)
2345 return ret;
2346
2347 /* Notify about the new mapping */
2348 if (domain_type_is_vm(domain)) {
2349 /* VM typed domains can have more than one IOMMUs */
2350 int iommu_id;
2351
2352 for_each_domain_iommu(iommu_id, domain) {
2353 iommu = g_iommus[iommu_id];
2354 __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
2355 }
2356 } else {
2357 /* General domains only have one IOMMU */
2358 iommu = domain_get_iommu(domain);
2359 __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
2360 }
2361
2362 return 0;
2363}
2364
2365static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2366 struct scatterlist *sg, unsigned long nr_pages,
2367 int prot)
2368{
2369 return domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2370}
2371
2372static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2373 unsigned long phys_pfn, unsigned long nr_pages,
2374 int prot)
2375{
2376 return domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
2377}
2378
2379static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
2380{
2381 unsigned long flags;
2382 struct context_entry *context;
2383 u16 did_old;
2384
2385 if (!iommu)
2386 return;
2387
2388 spin_lock_irqsave(&iommu->lock, flags);
2389 context = iommu_context_addr(iommu, bus, devfn, 0);
2390 if (!context) {
2391 spin_unlock_irqrestore(&iommu->lock, flags);
2392 return;
2393 }
2394 did_old = context_domain_id(context);
2395 context_clear_entry(context);
2396 __iommu_flush_cache(iommu, context, sizeof(*context));
2397 spin_unlock_irqrestore(&iommu->lock, flags);
2398 iommu->flush.flush_context(iommu,
2399 did_old,
2400 (((u16)bus) << 8) | devfn,
2401 DMA_CCMD_MASK_NOBIT,
2402 DMA_CCMD_DEVICE_INVL);
2403 iommu->flush.flush_iotlb(iommu,
2404 did_old,
2405 0,
2406 0,
2407 DMA_TLB_DSI_FLUSH);
2408}
2409
2410static inline void unlink_domain_info(struct device_domain_info *info)
2411{
2412 assert_spin_locked(&device_domain_lock);
2413 list_del(&info->link);
2414 list_del(&info->global);
2415 if (info->dev)
2416 info->dev->archdata.iommu = NULL;
2417}
2418
2419static void domain_remove_dev_info(struct dmar_domain *domain)
2420{
2421 struct device_domain_info *info, *tmp;
2422 unsigned long flags;
2423
2424 spin_lock_irqsave(&device_domain_lock, flags);
2425 list_for_each_entry_safe(info, tmp, &domain->devices, link)
2426 __dmar_remove_one_dev_info(info);
2427 spin_unlock_irqrestore(&device_domain_lock, flags);
2428}
2429
2430/*
2431 * find_domain
2432 * Note: we use struct device->archdata.iommu stores the info
2433 */
2434static struct dmar_domain *find_domain(struct device *dev)
2435{
2436 struct device_domain_info *info;
2437
2438 /* No lock here, assumes no domain exit in normal case */
2439 info = dev->archdata.iommu;
2440 if (likely(info))
2441 return info->domain;
2442 return NULL;
2443}
2444
2445static inline struct device_domain_info *
2446dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2447{
2448 struct device_domain_info *info;
2449
2450 list_for_each_entry(info, &device_domain_list, global)
2451 if (info->iommu->segment == segment && info->bus == bus &&
2452 info->devfn == devfn)
2453 return info;
2454
2455 return NULL;
2456}
2457
2458static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2459 int bus, int devfn,
2460 struct device *dev,
2461 struct dmar_domain *domain)
2462{
2463 struct dmar_domain *found = NULL;
2464 struct device_domain_info *info;
2465 unsigned long flags;
2466 int ret;
2467
2468 info = alloc_devinfo_mem();
2469 if (!info)
2470 return NULL;
2471
2472 info->bus = bus;
2473 info->devfn = devfn;
2474 info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2475 info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2476 info->ats_qdep = 0;
2477 info->dev = dev;
2478 info->domain = domain;
2479 info->iommu = iommu;
2480 info->pasid_table = NULL;
2481 info->auxd_enabled = 0;
2482 INIT_LIST_HEAD(&info->auxiliary_domains);
2483
2484 if (dev && dev_is_pci(dev)) {
2485 struct pci_dev *pdev = to_pci_dev(info->dev);
2486
2487 if (!pdev->untrusted &&
2488 !pci_ats_disabled() &&
2489 ecap_dev_iotlb_support(iommu->ecap) &&
2490 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2491 dmar_find_matched_atsr_unit(pdev))
2492 info->ats_supported = 1;
2493
2494 if (sm_supported(iommu)) {
2495 if (pasid_supported(iommu)) {
2496 int features = pci_pasid_features(pdev);
2497 if (features >= 0)
2498 info->pasid_supported = features | 1;
2499 }
2500
2501 if (info->ats_supported && ecap_prs(iommu->ecap) &&
2502 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2503 info->pri_supported = 1;
2504 }
2505 }
2506
2507 spin_lock_irqsave(&device_domain_lock, flags);
2508 if (dev)
2509 found = find_domain(dev);
2510
2511 if (!found) {
2512 struct device_domain_info *info2;
2513 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
2514 if (info2) {
2515 found = info2->domain;
2516 info2->dev = dev;
2517 }
2518 }
2519
2520 if (found) {
2521 spin_unlock_irqrestore(&device_domain_lock, flags);
2522 free_devinfo_mem(info);
2523 /* Caller must free the original domain */
2524 return found;
2525 }
2526
2527 spin_lock(&iommu->lock);
2528 ret = domain_attach_iommu(domain, iommu);
2529 spin_unlock(&iommu->lock);
2530
2531 if (ret) {
2532 spin_unlock_irqrestore(&device_domain_lock, flags);
2533 free_devinfo_mem(info);
2534 return NULL;
2535 }
2536
2537 list_add(&info->link, &domain->devices);
2538 list_add(&info->global, &device_domain_list);
2539 if (dev)
2540 dev->archdata.iommu = info;
2541 spin_unlock_irqrestore(&device_domain_lock, flags);
2542
2543 /* PASID table is mandatory for a PCI device in scalable mode. */
2544 if (dev && dev_is_pci(dev) && sm_supported(iommu)) {
2545 ret = intel_pasid_alloc_table(dev);
2546 if (ret) {
2547 dev_err(dev, "PASID table allocation failed\n");
2548 dmar_remove_one_dev_info(dev);
2549 return NULL;
2550 }
2551
2552 /* Setup the PASID entry for requests without PASID: */
2553 spin_lock(&iommu->lock);
2554 if (hw_pass_through && domain_type_is_si(domain))
2555 ret = intel_pasid_setup_pass_through(iommu, domain,
2556 dev, PASID_RID2PASID);
2557 else
2558 ret = intel_pasid_setup_second_level(iommu, domain,
2559 dev, PASID_RID2PASID);
2560 spin_unlock(&iommu->lock);
2561 if (ret) {
2562 dev_err(dev, "Setup RID2PASID failed\n");
2563 dmar_remove_one_dev_info(dev);
2564 return NULL;
2565 }
2566 }
2567
2568 if (dev && domain_context_mapping(domain, dev)) {
2569 dev_err(dev, "Domain context map failed\n");
2570 dmar_remove_one_dev_info(dev);
2571 return NULL;
2572 }
2573
2574 return domain;
2575}
2576
2577static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2578{
2579 *(u16 *)opaque = alias;
2580 return 0;
2581}
2582
2583static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
2584{
2585 struct device_domain_info *info;
2586 struct dmar_domain *domain = NULL;
2587 struct intel_iommu *iommu;
2588 u16 dma_alias;
2589 unsigned long flags;
2590 u8 bus, devfn;
2591
2592 iommu = device_to_iommu(dev, &bus, &devfn);
2593 if (!iommu)
2594 return NULL;
2595
2596 if (dev_is_pci(dev)) {
2597 struct pci_dev *pdev = to_pci_dev(dev);
2598
2599 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2600
2601 spin_lock_irqsave(&device_domain_lock, flags);
2602 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2603 PCI_BUS_NUM(dma_alias),
2604 dma_alias & 0xff);
2605 if (info) {
2606 iommu = info->iommu;
2607 domain = info->domain;
2608 }
2609 spin_unlock_irqrestore(&device_domain_lock, flags);
2610
2611 /* DMA alias already has a domain, use it */
2612 if (info)
2613 goto out;
2614 }
2615
2616 /* Allocate and initialize new domain for the device */
2617 domain = alloc_domain(0);
2618 if (!domain)
2619 return NULL;
2620 if (domain_init(domain, iommu, gaw)) {
2621 domain_exit(domain);
2622 return NULL;
2623 }
2624
2625out:
2626
2627 return domain;
2628}
2629
2630static struct dmar_domain *set_domain_for_dev(struct device *dev,
2631 struct dmar_domain *domain)
2632{
2633 struct intel_iommu *iommu;
2634 struct dmar_domain *tmp;
2635 u16 req_id, dma_alias;
2636 u8 bus, devfn;
2637
2638 iommu = device_to_iommu(dev, &bus, &devfn);
2639 if (!iommu)
2640 return NULL;
2641
2642 req_id = ((u16)bus << 8) | devfn;
2643
2644 if (dev_is_pci(dev)) {
2645 struct pci_dev *pdev = to_pci_dev(dev);
2646
2647 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2648
2649 /* register PCI DMA alias device */
2650 if (req_id != dma_alias) {
2651 tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2652 dma_alias & 0xff, NULL, domain);
2653
2654 if (!tmp || tmp != domain)
2655 return tmp;
2656 }
2657 }
2658
2659 tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2660 if (!tmp || tmp != domain)
2661 return tmp;
2662
2663 return domain;
2664}
2665
2666static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2667{
2668 struct dmar_domain *domain, *tmp;
2669
2670 domain = find_domain(dev);
2671 if (domain)
2672 goto out;
2673
2674 domain = find_or_alloc_domain(dev, gaw);
2675 if (!domain)
2676 goto out;
2677
2678 tmp = set_domain_for_dev(dev, domain);
2679 if (!tmp || domain != tmp) {
2680 domain_exit(domain);
2681 domain = tmp;
2682 }
2683
2684out:
2685
2686 return domain;
2687}
2688
2689static int iommu_domain_identity_map(struct dmar_domain *domain,
2690 unsigned long long start,
2691 unsigned long long end)
2692{
2693 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2694 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2695
2696 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2697 dma_to_mm_pfn(last_vpfn))) {
2698 pr_err("Reserving iova failed\n");
2699 return -ENOMEM;
2700 }
2701
2702 pr_debug("Mapping reserved region %llx-%llx\n", start, end);
2703 /*
2704 * RMRR range might have overlap with physical memory range,
2705 * clear it first
2706 */
2707 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2708
2709 return __domain_mapping(domain, first_vpfn, NULL,
2710 first_vpfn, last_vpfn - first_vpfn + 1,
2711 DMA_PTE_READ|DMA_PTE_WRITE);
2712}
2713
2714static int domain_prepare_identity_map(struct device *dev,
2715 struct dmar_domain *domain,
2716 unsigned long long start,
2717 unsigned long long end)
2718{
2719 /* For _hardware_ passthrough, don't bother. But for software
2720 passthrough, we do it anyway -- it may indicate a memory
2721 range which is reserved in E820, so which didn't get set
2722 up to start with in si_domain */
2723 if (domain == si_domain && hw_pass_through) {
2724 dev_warn(dev, "Ignoring identity map for HW passthrough [0x%Lx - 0x%Lx]\n",
2725 start, end);
2726 return 0;
2727 }
2728
2729 dev_info(dev, "Setting identity map [0x%Lx - 0x%Lx]\n", start, end);
2730
2731 if (end < start) {
2732 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2733 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2734 dmi_get_system_info(DMI_BIOS_VENDOR),
2735 dmi_get_system_info(DMI_BIOS_VERSION),
2736 dmi_get_system_info(DMI_PRODUCT_VERSION));
2737 return -EIO;
2738 }
2739
2740 if (end >> agaw_to_width(domain->agaw)) {
2741 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2742 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2743 agaw_to_width(domain->agaw),
2744 dmi_get_system_info(DMI_BIOS_VENDOR),
2745 dmi_get_system_info(DMI_BIOS_VERSION),
2746 dmi_get_system_info(DMI_PRODUCT_VERSION));
2747 return -EIO;
2748 }
2749
2750 return iommu_domain_identity_map(domain, start, end);
2751}
2752
2753static int iommu_prepare_identity_map(struct device *dev,
2754 unsigned long long start,
2755 unsigned long long end)
2756{
2757 struct dmar_domain *domain;
2758 int ret;
2759
2760 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2761 if (!domain)
2762 return -ENOMEM;
2763
2764 ret = domain_prepare_identity_map(dev, domain, start, end);
2765 if (ret)
2766 domain_exit(domain);
2767
2768 return ret;
2769}
2770
2771static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2772 struct device *dev)
2773{
2774 if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2775 return 0;
2776 return iommu_prepare_identity_map(dev, rmrr->base_address,
2777 rmrr->end_address);
2778}
2779
2780#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
2781static inline void iommu_prepare_isa(void)
2782{
2783 struct pci_dev *pdev;
2784 int ret;
2785
2786 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2787 if (!pdev)
2788 return;
2789
2790 pr_info("Prepare 0-16MiB unity mapping for LPC\n");
2791 ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
2792
2793 if (ret)
2794 pr_err("Failed to create 0-16MiB identity map - floppy might not work\n");
2795
2796 pci_dev_put(pdev);
2797}
2798#else
2799static inline void iommu_prepare_isa(void)
2800{
2801 return;
2802}
2803#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
2804
2805static int md_domain_init(struct dmar_domain *domain, int guest_width);
2806
2807static int __init si_domain_init(int hw)
2808{
2809 int nid, ret;
2810
2811 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2812 if (!si_domain)
2813 return -EFAULT;
2814
2815 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2816 domain_exit(si_domain);
2817 return -EFAULT;
2818 }
2819
2820 pr_debug("Identity mapping domain allocated\n");
2821
2822 if (hw)
2823 return 0;
2824
2825 for_each_online_node(nid) {
2826 unsigned long start_pfn, end_pfn;
2827 int i;
2828
2829 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2830 ret = iommu_domain_identity_map(si_domain,
2831 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2832 if (ret)
2833 return ret;
2834 }
2835 }
2836
2837 return 0;
2838}
2839
2840static int identity_mapping(struct device *dev)
2841{
2842 struct device_domain_info *info;
2843
2844 if (likely(!iommu_identity_mapping))
2845 return 0;
2846
2847 info = dev->archdata.iommu;
2848 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2849 return (info->domain == si_domain);
2850
2851 return 0;
2852}
2853
2854static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2855{
2856 struct dmar_domain *ndomain;
2857 struct intel_iommu *iommu;
2858 u8 bus, devfn;
2859
2860 iommu = device_to_iommu(dev, &bus, &devfn);
2861 if (!iommu)
2862 return -ENODEV;
2863
2864 ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2865 if (ndomain != domain)
2866 return -EBUSY;
2867
2868 return 0;
2869}
2870
2871static bool device_has_rmrr(struct device *dev)
2872{
2873 struct dmar_rmrr_unit *rmrr;
2874 struct device *tmp;
2875 int i;
2876
2877 rcu_read_lock();
2878 for_each_rmrr_units(rmrr) {
2879 /*
2880 * Return TRUE if this RMRR contains the device that
2881 * is passed in.
2882 */
2883 for_each_active_dev_scope(rmrr->devices,
2884 rmrr->devices_cnt, i, tmp)
2885 if (tmp == dev) {
2886 rcu_read_unlock();
2887 return true;
2888 }
2889 }
2890 rcu_read_unlock();
2891 return false;
2892}
2893
2894/*
2895 * There are a couple cases where we need to restrict the functionality of
2896 * devices associated with RMRRs. The first is when evaluating a device for
2897 * identity mapping because problems exist when devices are moved in and out
2898 * of domains and their respective RMRR information is lost. This means that
2899 * a device with associated RMRRs will never be in a "passthrough" domain.
2900 * The second is use of the device through the IOMMU API. This interface
2901 * expects to have full control of the IOVA space for the device. We cannot
2902 * satisfy both the requirement that RMRR access is maintained and have an
2903 * unencumbered IOVA space. We also have no ability to quiesce the device's
2904 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2905 * We therefore prevent devices associated with an RMRR from participating in
2906 * the IOMMU API, which eliminates them from device assignment.
2907 *
2908 * In both cases we assume that PCI USB devices with RMRRs have them largely
2909 * for historical reasons and that the RMRR space is not actively used post
2910 * boot. This exclusion may change if vendors begin to abuse it.
2911 *
2912 * The same exception is made for graphics devices, with the requirement that
2913 * any use of the RMRR regions will be torn down before assigning the device
2914 * to a guest.
2915 */
2916static bool device_is_rmrr_locked(struct device *dev)
2917{
2918 if (!device_has_rmrr(dev))
2919 return false;
2920
2921 if (dev_is_pci(dev)) {
2922 struct pci_dev *pdev = to_pci_dev(dev);
2923
2924 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2925 return false;
2926 }
2927
2928 return true;
2929}
2930
2931static int iommu_should_identity_map(struct device *dev, int startup)
2932{
2933 if (dev_is_pci(dev)) {
2934 struct pci_dev *pdev = to_pci_dev(dev);
2935
2936 if (device_is_rmrr_locked(dev))
2937 return 0;
2938
2939 /*
2940 * Prevent any device marked as untrusted from getting
2941 * placed into the statically identity mapping domain.
2942 */
2943 if (pdev->untrusted)
2944 return 0;
2945
2946 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2947 return 1;
2948
2949 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2950 return 1;
2951
2952 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2953 return 0;
2954
2955 /*
2956 * We want to start off with all devices in the 1:1 domain, and
2957 * take them out later if we find they can't access all of memory.
2958 *
2959 * However, we can't do this for PCI devices behind bridges,
2960 * because all PCI devices behind the same bridge will end up
2961 * with the same source-id on their transactions.
2962 *
2963 * Practically speaking, we can't change things around for these
2964 * devices at run-time, because we can't be sure there'll be no
2965 * DMA transactions in flight for any of their siblings.
2966 *
2967 * So PCI devices (unless they're on the root bus) as well as
2968 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2969 * the 1:1 domain, just in _case_ one of their siblings turns out
2970 * not to be able to map all of memory.
2971 */
2972 if (!pci_is_pcie(pdev)) {
2973 if (!pci_is_root_bus(pdev->bus))
2974 return 0;
2975 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2976 return 0;
2977 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
2978 return 0;
2979 } else {
2980 if (device_has_rmrr(dev))
2981 return 0;
2982 }
2983
2984 /*
2985 * At boot time, we don't yet know if devices will be 64-bit capable.
2986 * Assume that they will — if they turn out not to be, then we can
2987 * take them out of the 1:1 domain later.
2988 */
2989 if (!startup) {
2990 /*
2991 * If the device's dma_mask is less than the system's memory
2992 * size then this is not a candidate for identity mapping.
2993 */
2994 u64 dma_mask = *dev->dma_mask;
2995
2996 if (dev->coherent_dma_mask &&
2997 dev->coherent_dma_mask < dma_mask)
2998 dma_mask = dev->coherent_dma_mask;
2999
3000 return dma_mask >= dma_get_required_mask(dev);
3001 }
3002
3003 return 1;
3004}
3005
3006static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
3007{
3008 int ret;
3009
3010 if (!iommu_should_identity_map(dev, 1))
3011 return 0;
3012
3013 ret = domain_add_dev_info(si_domain, dev);
3014 if (!ret)
3015 dev_info(dev, "%s identity mapping\n",
3016 hw ? "Hardware" : "Software");
3017 else if (ret == -ENODEV)
3018 /* device not associated with an iommu */
3019 ret = 0;
3020
3021 return ret;
3022}
3023
3024
3025static int __init iommu_prepare_static_identity_mapping(int hw)
3026{
3027 struct pci_dev *pdev = NULL;
3028 struct dmar_drhd_unit *drhd;
3029 /* To avoid a -Wunused-but-set-variable warning. */
3030 struct intel_iommu *iommu __maybe_unused;
3031 struct device *dev;
3032 int i;
3033 int ret = 0;
3034
3035 for_each_pci_dev(pdev) {
3036 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
3037 if (ret)
3038 return ret;
3039 }
3040
3041 for_each_active_iommu(iommu, drhd)
3042 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
3043 struct acpi_device_physical_node *pn;
3044 struct acpi_device *adev;
3045
3046 if (dev->bus != &acpi_bus_type)
3047 continue;
3048
3049 adev= to_acpi_device(dev);
3050 mutex_lock(&adev->physical_node_lock);
3051 list_for_each_entry(pn, &adev->physical_node_list, node) {
3052 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
3053 if (ret)
3054 break;
3055 }
3056 mutex_unlock(&adev->physical_node_lock);
3057 if (ret)
3058 return ret;
3059 }
3060
3061 return 0;
3062}
3063
3064static void intel_iommu_init_qi(struct intel_iommu *iommu)
3065{
3066 /*
3067 * Start from the sane iommu hardware state.
3068 * If the queued invalidation is already initialized by us
3069 * (for example, while enabling interrupt-remapping) then
3070 * we got the things already rolling from a sane state.
3071 */
3072 if (!iommu->qi) {
3073 /*
3074 * Clear any previous faults.
3075 */
3076 dmar_fault(-1, iommu);
3077 /*
3078 * Disable queued invalidation if supported and already enabled
3079 * before OS handover.
3080 */
3081 dmar_disable_qi(iommu);
3082 }
3083
3084 if (dmar_enable_qi(iommu)) {
3085 /*
3086 * Queued Invalidate not enabled, use Register Based Invalidate
3087 */
3088 iommu->flush.flush_context = __iommu_flush_context;
3089 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
3090 pr_info("%s: Using Register based invalidation\n",
3091 iommu->name);
3092 } else {
3093 iommu->flush.flush_context = qi_flush_context;
3094 iommu->flush.flush_iotlb = qi_flush_iotlb;
3095 pr_info("%s: Using Queued invalidation\n", iommu->name);
3096 }
3097}
3098
3099static int copy_context_table(struct intel_iommu *iommu,
3100 struct root_entry *old_re,
3101 struct context_entry **tbl,
3102 int bus, bool ext)
3103{
3104 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
3105 struct context_entry *new_ce = NULL, ce;
3106 struct context_entry *old_ce = NULL;
3107 struct root_entry re;
3108 phys_addr_t old_ce_phys;
3109
3110 tbl_idx = ext ? bus * 2 : bus;
3111 memcpy(&re, old_re, sizeof(re));
3112
3113 for (devfn = 0; devfn < 256; devfn++) {
3114 /* First calculate the correct index */
3115 idx = (ext ? devfn * 2 : devfn) % 256;
3116
3117 if (idx == 0) {
3118 /* First save what we may have and clean up */
3119 if (new_ce) {
3120 tbl[tbl_idx] = new_ce;
3121 __iommu_flush_cache(iommu, new_ce,
3122 VTD_PAGE_SIZE);
3123 pos = 1;
3124 }
3125
3126 if (old_ce)
3127 memunmap(old_ce);
3128
3129 ret = 0;
3130 if (devfn < 0x80)
3131 old_ce_phys = root_entry_lctp(&re);
3132 else
3133 old_ce_phys = root_entry_uctp(&re);
3134
3135 if (!old_ce_phys) {
3136 if (ext && devfn == 0) {
3137 /* No LCTP, try UCTP */
3138 devfn = 0x7f;
3139 continue;
3140 } else {
3141 goto out;
3142 }
3143 }
3144
3145 ret = -ENOMEM;
3146 old_ce = memremap(old_ce_phys, PAGE_SIZE,
3147 MEMREMAP_WB);
3148 if (!old_ce)
3149 goto out;
3150
3151 new_ce = alloc_pgtable_page(iommu->node);
3152 if (!new_ce)
3153 goto out_unmap;
3154
3155 ret = 0;
3156 }
3157
3158 /* Now copy the context entry */
3159 memcpy(&ce, old_ce + idx, sizeof(ce));
3160
3161 if (!__context_present(&ce))
3162 continue;
3163
3164 did = context_domain_id(&ce);
3165 if (did >= 0 && did < cap_ndoms(iommu->cap))
3166 set_bit(did, iommu->domain_ids);
3167
3168 /*
3169 * We need a marker for copied context entries. This
3170 * marker needs to work for the old format as well as
3171 * for extended context entries.
3172 *
3173 * Bit 67 of the context entry is used. In the old
3174 * format this bit is available to software, in the
3175 * extended format it is the PGE bit, but PGE is ignored
3176 * by HW if PASIDs are disabled (and thus still
3177 * available).
3178 *
3179 * So disable PASIDs first and then mark the entry
3180 * copied. This means that we don't copy PASID
3181 * translations from the old kernel, but this is fine as
3182 * faults there are not fatal.
3183 */
3184 context_clear_pasid_enable(&ce);
3185 context_set_copied(&ce);
3186
3187 new_ce[idx] = ce;
3188 }
3189
3190 tbl[tbl_idx + pos] = new_ce;
3191
3192 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
3193
3194out_unmap:
3195 memunmap(old_ce);
3196
3197out:
3198 return ret;
3199}
3200
3201static int copy_translation_tables(struct intel_iommu *iommu)
3202{
3203 struct context_entry **ctxt_tbls;
3204 struct root_entry *old_rt;
3205 phys_addr_t old_rt_phys;
3206 int ctxt_table_entries;
3207 unsigned long flags;
3208 u64 rtaddr_reg;
3209 int bus, ret;
3210 bool new_ext, ext;
3211
3212 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3213 ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
3214 new_ext = !!ecap_ecs(iommu->ecap);
3215
3216 /*
3217 * The RTT bit can only be changed when translation is disabled,
3218 * but disabling translation means to open a window for data
3219 * corruption. So bail out and don't copy anything if we would
3220 * have to change the bit.
3221 */
3222 if (new_ext != ext)
3223 return -EINVAL;
3224
3225 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3226 if (!old_rt_phys)
3227 return -EINVAL;
3228
3229 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
3230 if (!old_rt)
3231 return -ENOMEM;
3232
3233 /* This is too big for the stack - allocate it from slab */
3234 ctxt_table_entries = ext ? 512 : 256;
3235 ret = -ENOMEM;
3236 ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
3237 if (!ctxt_tbls)
3238 goto out_unmap;
3239
3240 for (bus = 0; bus < 256; bus++) {
3241 ret = copy_context_table(iommu, &old_rt[bus],
3242 ctxt_tbls, bus, ext);
3243 if (ret) {
3244 pr_err("%s: Failed to copy context table for bus %d\n",
3245 iommu->name, bus);
3246 continue;
3247 }
3248 }
3249
3250 spin_lock_irqsave(&iommu->lock, flags);
3251
3252 /* Context tables are copied, now write them to the root_entry table */
3253 for (bus = 0; bus < 256; bus++) {
3254 int idx = ext ? bus * 2 : bus;
3255 u64 val;
3256
3257 if (ctxt_tbls[idx]) {
3258 val = virt_to_phys(ctxt_tbls[idx]) | 1;
3259 iommu->root_entry[bus].lo = val;
3260 }
3261
3262 if (!ext || !ctxt_tbls[idx + 1])
3263 continue;
3264
3265 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3266 iommu->root_entry[bus].hi = val;
3267 }
3268
3269 spin_unlock_irqrestore(&iommu->lock, flags);
3270
3271 kfree(ctxt_tbls);
3272
3273 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3274
3275 ret = 0;
3276
3277out_unmap:
3278 memunmap(old_rt);
3279
3280 return ret;
3281}
3282
3283static int __init init_dmars(void)
3284{
3285 struct dmar_drhd_unit *drhd;
3286 struct dmar_rmrr_unit *rmrr;
3287 bool copied_tables = false;
3288 struct device *dev;
3289 struct intel_iommu *iommu;
3290 int i, ret;
3291
3292 /*
3293 * for each drhd
3294 * allocate root
3295 * initialize and program root entry to not present
3296 * endfor
3297 */
3298 for_each_drhd_unit(drhd) {
3299 /*
3300 * lock not needed as this is only incremented in the single
3301 * threaded kernel __init code path all other access are read
3302 * only
3303 */
3304 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
3305 g_num_of_iommus++;
3306 continue;
3307 }
3308 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
3309 }
3310
3311 /* Preallocate enough resources for IOMMU hot-addition */
3312 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3313 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3314
3315 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3316 GFP_KERNEL);
3317 if (!g_iommus) {
3318 pr_err("Allocating global iommu array failed\n");
3319 ret = -ENOMEM;
3320 goto error;
3321 }
3322
3323 for_each_active_iommu(iommu, drhd) {
3324 /*
3325 * Find the max pasid size of all IOMMU's in the system.
3326 * We need to ensure the system pasid table is no bigger
3327 * than the smallest supported.
3328 */
3329 if (pasid_supported(iommu)) {
3330 u32 temp = 2 << ecap_pss(iommu->ecap);
3331
3332 intel_pasid_max_id = min_t(u32, temp,
3333 intel_pasid_max_id);
3334 }
3335
3336 g_iommus[iommu->seq_id] = iommu;
3337
3338 intel_iommu_init_qi(iommu);
3339
3340 ret = iommu_init_domains(iommu);
3341 if (ret)
3342 goto free_iommu;
3343
3344 init_translation_status(iommu);
3345
3346 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3347 iommu_disable_translation(iommu);
3348 clear_translation_pre_enabled(iommu);
3349 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3350 iommu->name);
3351 }
3352
3353 /*
3354 * TBD:
3355 * we could share the same root & context tables
3356 * among all IOMMU's. Need to Split it later.
3357 */
3358 ret = iommu_alloc_root_entry(iommu);
3359 if (ret)
3360 goto free_iommu;
3361
3362 if (translation_pre_enabled(iommu)) {
3363 pr_info("Translation already enabled - trying to copy translation structures\n");
3364
3365 ret = copy_translation_tables(iommu);
3366 if (ret) {
3367 /*
3368 * We found the IOMMU with translation
3369 * enabled - but failed to copy over the
3370 * old root-entry table. Try to proceed
3371 * by disabling translation now and
3372 * allocating a clean root-entry table.
3373 * This might cause DMAR faults, but
3374 * probably the dump will still succeed.
3375 */
3376 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3377 iommu->name);
3378 iommu_disable_translation(iommu);
3379 clear_translation_pre_enabled(iommu);
3380 } else {
3381 pr_info("Copied translation tables from previous kernel for %s\n",
3382 iommu->name);
3383 copied_tables = true;
3384 }
3385 }
3386
3387 if (!ecap_pass_through(iommu->ecap))
3388 hw_pass_through = 0;
3389#ifdef CONFIG_INTEL_IOMMU_SVM
3390 if (pasid_supported(iommu))
3391 intel_svm_init(iommu);
3392#endif
3393 }
3394
3395 /*
3396 * Now that qi is enabled on all iommus, set the root entry and flush
3397 * caches. This is required on some Intel X58 chipsets, otherwise the
3398 * flush_context function will loop forever and the boot hangs.
3399 */
3400 for_each_active_iommu(iommu, drhd) {
3401 iommu_flush_write_buffer(iommu);
3402 iommu_set_root_entry(iommu);
3403 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3404 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3405 }
3406
3407 if (iommu_pass_through)
3408 iommu_identity_mapping |= IDENTMAP_ALL;
3409
3410#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
3411 dmar_map_gfx = 0;
3412#endif
3413
3414 if (!dmar_map_gfx)
3415 iommu_identity_mapping |= IDENTMAP_GFX;
3416
3417 check_tylersburg_isoch();
3418
3419 if (iommu_identity_mapping) {
3420 ret = si_domain_init(hw_pass_through);
3421 if (ret)
3422 goto free_iommu;
3423 }
3424
3425
3426 /*
3427 * If we copied translations from a previous kernel in the kdump
3428 * case, we can not assign the devices to domains now, as that
3429 * would eliminate the old mappings. So skip this part and defer
3430 * the assignment to device driver initialization time.
3431 */
3432 if (copied_tables)
3433 goto domains_done;
3434
3435 /*
3436 * If pass through is not set or not enabled, setup context entries for
3437 * identity mappings for rmrr, gfx, and isa and may fall back to static
3438 * identity mapping if iommu_identity_mapping is set.
3439 */
3440 if (iommu_identity_mapping) {
3441 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
3442 if (ret) {
3443 pr_crit("Failed to setup IOMMU pass-through\n");
3444 goto free_iommu;
3445 }
3446 }
3447 /*
3448 * For each rmrr
3449 * for each dev attached to rmrr
3450 * do
3451 * locate drhd for dev, alloc domain for dev
3452 * allocate free domain
3453 * allocate page table entries for rmrr
3454 * if context not allocated for bus
3455 * allocate and init context
3456 * set present in root table for this bus
3457 * init context with domain, translation etc
3458 * endfor
3459 * endfor
3460 */
3461 pr_info("Setting RMRR:\n");
3462 for_each_rmrr_units(rmrr) {
3463 /* some BIOS lists non-exist devices in DMAR table. */
3464 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3465 i, dev) {
3466 ret = iommu_prepare_rmrr_dev(rmrr, dev);
3467 if (ret)
3468 pr_err("Mapping reserved region failed\n");
3469 }
3470 }
3471
3472 iommu_prepare_isa();
3473
3474domains_done:
3475
3476 /*
3477 * for each drhd
3478 * enable fault log
3479 * global invalidate context cache
3480 * global invalidate iotlb
3481 * enable translation
3482 */
3483 for_each_iommu(iommu, drhd) {
3484 if (drhd->ignored) {
3485 /*
3486 * we always have to disable PMRs or DMA may fail on
3487 * this device
3488 */
3489 if (force_on)
3490 iommu_disable_protect_mem_regions(iommu);
3491 continue;
3492 }
3493
3494 iommu_flush_write_buffer(iommu);
3495
3496#ifdef CONFIG_INTEL_IOMMU_SVM
3497 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
3498 /*
3499 * Call dmar_alloc_hwirq() with dmar_global_lock held,
3500 * could cause possible lock race condition.
3501 */
3502 up_write(&dmar_global_lock);
3503 ret = intel_svm_enable_prq(iommu);
3504 down_write(&dmar_global_lock);
3505 if (ret)
3506 goto free_iommu;
3507 }
3508#endif
3509 ret = dmar_set_interrupt(iommu);
3510 if (ret)
3511 goto free_iommu;
3512
3513 if (!translation_pre_enabled(iommu))
3514 iommu_enable_translation(iommu);
3515
3516 iommu_disable_protect_mem_regions(iommu);
3517 }
3518
3519 return 0;
3520
3521free_iommu:
3522 for_each_active_iommu(iommu, drhd) {
3523 disable_dmar_iommu(iommu);
3524 free_dmar_iommu(iommu);
3525 }
3526
3527 kfree(g_iommus);
3528
3529error:
3530 return ret;
3531}
3532
3533/* This takes a number of _MM_ pages, not VTD pages */
3534static unsigned long intel_alloc_iova(struct device *dev,
3535 struct dmar_domain *domain,
3536 unsigned long nrpages, uint64_t dma_mask)
3537{
3538 unsigned long iova_pfn;
3539
3540 /* Restrict dma_mask to the width that the iommu can handle */
3541 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
3542 /* Ensure we reserve the whole size-aligned region */
3543 nrpages = __roundup_pow_of_two(nrpages);
3544
3545 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
3546 /*
3547 * First try to allocate an io virtual address in
3548 * DMA_BIT_MASK(32) and if that fails then try allocating
3549 * from higher range
3550 */
3551 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3552 IOVA_PFN(DMA_BIT_MASK(32)), false);
3553 if (iova_pfn)
3554 return iova_pfn;
3555 }
3556 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3557 IOVA_PFN(dma_mask), true);
3558 if (unlikely(!iova_pfn)) {
3559 dev_err(dev, "Allocating %ld-page iova failed", nrpages);
3560 return 0;
3561 }
3562
3563 return iova_pfn;
3564}
3565
3566struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
3567{
3568 struct dmar_domain *domain, *tmp;
3569 struct dmar_rmrr_unit *rmrr;
3570 struct device *i_dev;
3571 int i, ret;
3572
3573 domain = find_domain(dev);
3574 if (domain)
3575 goto out;
3576
3577 domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
3578 if (!domain)
3579 goto out;
3580
3581 /* We have a new domain - setup possible RMRRs for the device */
3582 rcu_read_lock();
3583 for_each_rmrr_units(rmrr) {
3584 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3585 i, i_dev) {
3586 if (i_dev != dev)
3587 continue;
3588
3589 ret = domain_prepare_identity_map(dev, domain,
3590 rmrr->base_address,
3591 rmrr->end_address);
3592 if (ret)
3593 dev_err(dev, "Mapping reserved region failed\n");
3594 }
3595 }
3596 rcu_read_unlock();
3597
3598 tmp = set_domain_for_dev(dev, domain);
3599 if (!tmp || domain != tmp) {
3600 domain_exit(domain);
3601 domain = tmp;
3602 }
3603
3604out:
3605
3606 if (!domain)
3607 dev_err(dev, "Allocating domain failed\n");
3608
3609
3610 return domain;
3611}
3612
3613/* Check if the dev needs to go through non-identity map and unmap process.*/
3614static bool iommu_need_mapping(struct device *dev)
3615{
3616 int found;
3617
3618 if (iommu_dummy(dev))
3619 return false;
3620
3621 if (!iommu_identity_mapping)
3622 return true;
3623
3624 found = identity_mapping(dev);
3625 if (found) {
3626 if (iommu_should_identity_map(dev, 0))
3627 return false;
3628
3629 /*
3630 * 32 bit DMA is removed from si_domain and fall back to
3631 * non-identity mapping.
3632 */
3633 dmar_remove_one_dev_info(dev);
3634 dev_info(dev, "32bit DMA uses non-identity mapping\n");
3635 } else {
3636 /*
3637 * In case of a detached 64 bit DMA device from vm, the device
3638 * is put into si_domain for identity mapping.
3639 */
3640 if (iommu_should_identity_map(dev, 0) &&
3641 !domain_add_dev_info(si_domain, dev)) {
3642 dev_info(dev, "64bit DMA uses identity mapping\n");
3643 return false;
3644 }
3645 }
3646
3647 return true;
3648}
3649
3650static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3651 size_t size, int dir, u64 dma_mask)
3652{
3653 struct dmar_domain *domain;
3654 phys_addr_t start_paddr;
3655 unsigned long iova_pfn;
3656 int prot = 0;
3657 int ret;
3658 struct intel_iommu *iommu;
3659 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
3660
3661 BUG_ON(dir == DMA_NONE);
3662
3663 domain = get_valid_domain_for_dev(dev);
3664 if (!domain)
3665 return DMA_MAPPING_ERROR;
3666
3667 iommu = domain_get_iommu(domain);
3668 size = aligned_nrpages(paddr, size);
3669
3670 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3671 if (!iova_pfn)
3672 goto error;
3673
3674 /*
3675 * Check if DMAR supports zero-length reads on write only
3676 * mappings..
3677 */
3678 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3679 !cap_zlr(iommu->cap))
3680 prot |= DMA_PTE_READ;
3681 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3682 prot |= DMA_PTE_WRITE;
3683 /*
3684 * paddr - (paddr + size) might be partial page, we should map the whole
3685 * page. Note: if two part of one page are separately mapped, we
3686 * might have two guest_addr mapping to the same host paddr, but this
3687 * is not a big problem
3688 */
3689 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
3690 mm_to_dma_pfn(paddr_pfn), size, prot);
3691 if (ret)
3692 goto error;
3693
3694 start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT;
3695 start_paddr += paddr & ~PAGE_MASK;
3696 return start_paddr;
3697
3698error:
3699 if (iova_pfn)
3700 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
3701 dev_err(dev, "Device request: %zx@%llx dir %d --- failed\n",
3702 size, (unsigned long long)paddr, dir);
3703 return DMA_MAPPING_ERROR;
3704}
3705
3706static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3707 unsigned long offset, size_t size,
3708 enum dma_data_direction dir,
3709 unsigned long attrs)
3710{
3711 if (iommu_need_mapping(dev))
3712 return __intel_map_single(dev, page_to_phys(page) + offset,
3713 size, dir, *dev->dma_mask);
3714 return dma_direct_map_page(dev, page, offset, size, dir, attrs);
3715}
3716
3717static dma_addr_t intel_map_resource(struct device *dev, phys_addr_t phys_addr,
3718 size_t size, enum dma_data_direction dir,
3719 unsigned long attrs)
3720{
3721 if (iommu_need_mapping(dev))
3722 return __intel_map_single(dev, phys_addr, size, dir,
3723 *dev->dma_mask);
3724 return dma_direct_map_resource(dev, phys_addr, size, dir, attrs);
3725}
3726
3727static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
3728{
3729 struct dmar_domain *domain;
3730 unsigned long start_pfn, last_pfn;
3731 unsigned long nrpages;
3732 unsigned long iova_pfn;
3733 struct intel_iommu *iommu;
3734 struct page *freelist;
3735 struct pci_dev *pdev = NULL;
3736
3737 domain = find_domain(dev);
3738 BUG_ON(!domain);
3739
3740 iommu = domain_get_iommu(domain);
3741
3742 iova_pfn = IOVA_PFN(dev_addr);
3743
3744 nrpages = aligned_nrpages(dev_addr, size);
3745 start_pfn = mm_to_dma_pfn(iova_pfn);
3746 last_pfn = start_pfn + nrpages - 1;
3747
3748 if (dev_is_pci(dev))
3749 pdev = to_pci_dev(dev);
3750
3751 dev_dbg(dev, "Device unmapping: pfn %lx-%lx\n", start_pfn, last_pfn);
3752
3753 freelist = domain_unmap(domain, start_pfn, last_pfn);
3754
3755 if (intel_iommu_strict || (pdev && pdev->untrusted)) {
3756 iommu_flush_iotlb_psi(iommu, domain, start_pfn,
3757 nrpages, !freelist, 0);
3758 /* free iova */
3759 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
3760 dma_free_pagelist(freelist);
3761 } else {
3762 queue_iova(&domain->iovad, iova_pfn, nrpages,
3763 (unsigned long)freelist);
3764 /*
3765 * queue up the release of the unmap to save the 1/6th of the
3766 * cpu used up by the iotlb flush operation...
3767 */
3768 }
3769}
3770
3771static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3772 size_t size, enum dma_data_direction dir,
3773 unsigned long attrs)
3774{
3775 if (iommu_need_mapping(dev))
3776 intel_unmap(dev, dev_addr, size);
3777 else
3778 dma_direct_unmap_page(dev, dev_addr, size, dir, attrs);
3779}
3780
3781static void intel_unmap_resource(struct device *dev, dma_addr_t dev_addr,
3782 size_t size, enum dma_data_direction dir, unsigned long attrs)
3783{
3784 if (iommu_need_mapping(dev))
3785 intel_unmap(dev, dev_addr, size);
3786}
3787
3788static void *intel_alloc_coherent(struct device *dev, size_t size,
3789 dma_addr_t *dma_handle, gfp_t flags,
3790 unsigned long attrs)
3791{
3792 struct page *page = NULL;
3793 int order;
3794
3795 if (!iommu_need_mapping(dev))
3796 return dma_direct_alloc(dev, size, dma_handle, flags, attrs);
3797
3798 size = PAGE_ALIGN(size);
3799 order = get_order(size);
3800
3801 if (gfpflags_allow_blocking(flags)) {
3802 unsigned int count = size >> PAGE_SHIFT;
3803
3804 page = dma_alloc_from_contiguous(dev, count, order,
3805 flags & __GFP_NOWARN);
3806 }
3807
3808 if (!page)
3809 page = alloc_pages(flags, order);
3810 if (!page)
3811 return NULL;
3812 memset(page_address(page), 0, size);
3813
3814 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3815 DMA_BIDIRECTIONAL,
3816 dev->coherent_dma_mask);
3817 if (*dma_handle != DMA_MAPPING_ERROR)
3818 return page_address(page);
3819 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3820 __free_pages(page, order);
3821
3822 return NULL;
3823}
3824
3825static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
3826 dma_addr_t dma_handle, unsigned long attrs)
3827{
3828 int order;
3829 struct page *page = virt_to_page(vaddr);
3830
3831 if (!iommu_need_mapping(dev))
3832 return dma_direct_free(dev, size, vaddr, dma_handle, attrs);
3833
3834 size = PAGE_ALIGN(size);
3835 order = get_order(size);
3836
3837 intel_unmap(dev, dma_handle, size);
3838 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3839 __free_pages(page, order);
3840}
3841
3842static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
3843 int nelems, enum dma_data_direction dir,
3844 unsigned long attrs)
3845{
3846 dma_addr_t startaddr = sg_dma_address(sglist) & PAGE_MASK;
3847 unsigned long nrpages = 0;
3848 struct scatterlist *sg;
3849 int i;
3850
3851 if (!iommu_need_mapping(dev))
3852 return dma_direct_unmap_sg(dev, sglist, nelems, dir, attrs);
3853
3854 for_each_sg(sglist, sg, nelems, i) {
3855 nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
3856 }
3857
3858 intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
3859}
3860
3861static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3862 enum dma_data_direction dir, unsigned long attrs)
3863{
3864 int i;
3865 struct dmar_domain *domain;
3866 size_t size = 0;
3867 int prot = 0;
3868 unsigned long iova_pfn;
3869 int ret;
3870 struct scatterlist *sg;
3871 unsigned long start_vpfn;
3872 struct intel_iommu *iommu;
3873
3874 BUG_ON(dir == DMA_NONE);
3875 if (!iommu_need_mapping(dev))
3876 return dma_direct_map_sg(dev, sglist, nelems, dir, attrs);
3877
3878 domain = get_valid_domain_for_dev(dev);
3879 if (!domain)
3880 return 0;
3881
3882 iommu = domain_get_iommu(domain);
3883
3884 for_each_sg(sglist, sg, nelems, i)
3885 size += aligned_nrpages(sg->offset, sg->length);
3886
3887 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3888 *dev->dma_mask);
3889 if (!iova_pfn) {
3890 sglist->dma_length = 0;
3891 return 0;
3892 }
3893
3894 /*
3895 * Check if DMAR supports zero-length reads on write only
3896 * mappings..
3897 */
3898 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3899 !cap_zlr(iommu->cap))
3900 prot |= DMA_PTE_READ;
3901 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3902 prot |= DMA_PTE_WRITE;
3903
3904 start_vpfn = mm_to_dma_pfn(iova_pfn);
3905
3906 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3907 if (unlikely(ret)) {
3908 dma_pte_free_pagetable(domain, start_vpfn,
3909 start_vpfn + size - 1,
3910 agaw_to_level(domain->agaw) + 1);
3911 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
3912 return 0;
3913 }
3914
3915 return nelems;
3916}
3917
3918static const struct dma_map_ops intel_dma_ops = {
3919 .alloc = intel_alloc_coherent,
3920 .free = intel_free_coherent,
3921 .map_sg = intel_map_sg,
3922 .unmap_sg = intel_unmap_sg,
3923 .map_page = intel_map_page,
3924 .unmap_page = intel_unmap_page,
3925 .map_resource = intel_map_resource,
3926 .unmap_resource = intel_unmap_resource,
3927 .dma_supported = dma_direct_supported,
3928};
3929
3930static inline int iommu_domain_cache_init(void)
3931{
3932 int ret = 0;
3933
3934 iommu_domain_cache = kmem_cache_create("iommu_domain",
3935 sizeof(struct dmar_domain),
3936 0,
3937 SLAB_HWCACHE_ALIGN,
3938
3939 NULL);
3940 if (!iommu_domain_cache) {
3941 pr_err("Couldn't create iommu_domain cache\n");
3942 ret = -ENOMEM;
3943 }
3944
3945 return ret;
3946}
3947
3948static inline int iommu_devinfo_cache_init(void)
3949{
3950 int ret = 0;
3951
3952 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3953 sizeof(struct device_domain_info),
3954 0,
3955 SLAB_HWCACHE_ALIGN,
3956 NULL);
3957 if (!iommu_devinfo_cache) {
3958 pr_err("Couldn't create devinfo cache\n");
3959 ret = -ENOMEM;
3960 }
3961
3962 return ret;
3963}
3964
3965static int __init iommu_init_mempool(void)
3966{
3967 int ret;
3968 ret = iova_cache_get();
3969 if (ret)
3970 return ret;
3971
3972 ret = iommu_domain_cache_init();
3973 if (ret)
3974 goto domain_error;
3975
3976 ret = iommu_devinfo_cache_init();
3977 if (!ret)
3978 return ret;
3979
3980 kmem_cache_destroy(iommu_domain_cache);
3981domain_error:
3982 iova_cache_put();
3983
3984 return -ENOMEM;
3985}
3986
3987static void __init iommu_exit_mempool(void)
3988{
3989 kmem_cache_destroy(iommu_devinfo_cache);
3990 kmem_cache_destroy(iommu_domain_cache);
3991 iova_cache_put();
3992}
3993
3994static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3995{
3996 struct dmar_drhd_unit *drhd;
3997 u32 vtbar;
3998 int rc;
3999
4000 /* We know that this device on this chipset has its own IOMMU.
4001 * If we find it under a different IOMMU, then the BIOS is lying
4002 * to us. Hope that the IOMMU for this device is actually
4003 * disabled, and it needs no translation...
4004 */
4005 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
4006 if (rc) {
4007 /* "can't" happen */
4008 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
4009 return;
4010 }
4011 vtbar &= 0xffff0000;
4012
4013 /* we know that the this iommu should be at offset 0xa000 from vtbar */
4014 drhd = dmar_find_matched_drhd_unit(pdev);
4015 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
4016 TAINT_FIRMWARE_WORKAROUND,
4017 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
4018 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
4019}
4020DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
4021
4022static void __init init_no_remapping_devices(void)
4023{
4024 struct dmar_drhd_unit *drhd;
4025 struct device *dev;
4026 int i;
4027
4028 for_each_drhd_unit(drhd) {
4029 if (!drhd->include_all) {
4030 for_each_active_dev_scope(drhd->devices,
4031 drhd->devices_cnt, i, dev)
4032 break;
4033 /* ignore DMAR unit if no devices exist */
4034 if (i == drhd->devices_cnt)
4035 drhd->ignored = 1;
4036 }
4037 }
4038
4039 for_each_active_drhd_unit(drhd) {
4040 if (drhd->include_all)
4041 continue;
4042
4043 for_each_active_dev_scope(drhd->devices,
4044 drhd->devices_cnt, i, dev)
4045 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
4046 break;
4047 if (i < drhd->devices_cnt)
4048 continue;
4049
4050 /* This IOMMU has *only* gfx devices. Either bypass it or
4051 set the gfx_mapped flag, as appropriate */
4052 if (!dmar_map_gfx) {
4053 drhd->ignored = 1;
4054 for_each_active_dev_scope(drhd->devices,
4055 drhd->devices_cnt, i, dev)
4056 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
4057 }
4058 }
4059}
4060
4061#ifdef CONFIG_SUSPEND
4062static int init_iommu_hw(void)
4063{
4064 struct dmar_drhd_unit *drhd;
4065 struct intel_iommu *iommu = NULL;
4066
4067 for_each_active_iommu(iommu, drhd)
4068 if (iommu->qi)
4069 dmar_reenable_qi(iommu);
4070
4071 for_each_iommu(iommu, drhd) {
4072 if (drhd->ignored) {
4073 /*
4074 * we always have to disable PMRs or DMA may fail on
4075 * this device
4076 */
4077 if (force_on)
4078 iommu_disable_protect_mem_regions(iommu);
4079 continue;
4080 }
4081
4082 iommu_flush_write_buffer(iommu);
4083
4084 iommu_set_root_entry(iommu);
4085
4086 iommu->flush.flush_context(iommu, 0, 0, 0,
4087 DMA_CCMD_GLOBAL_INVL);
4088 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4089 iommu_enable_translation(iommu);
4090 iommu_disable_protect_mem_regions(iommu);
4091 }
4092
4093 return 0;
4094}
4095
4096static void iommu_flush_all(void)
4097{
4098 struct dmar_drhd_unit *drhd;
4099 struct intel_iommu *iommu;
4100
4101 for_each_active_iommu(iommu, drhd) {
4102 iommu->flush.flush_context(iommu, 0, 0, 0,
4103 DMA_CCMD_GLOBAL_INVL);
4104 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
4105 DMA_TLB_GLOBAL_FLUSH);
4106 }
4107}
4108
4109static int iommu_suspend(void)
4110{
4111 struct dmar_drhd_unit *drhd;
4112 struct intel_iommu *iommu = NULL;
4113 unsigned long flag;
4114
4115 for_each_active_iommu(iommu, drhd) {
4116 iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32),
4117 GFP_ATOMIC);
4118 if (!iommu->iommu_state)
4119 goto nomem;
4120 }
4121
4122 iommu_flush_all();
4123
4124 for_each_active_iommu(iommu, drhd) {
4125 iommu_disable_translation(iommu);
4126
4127 raw_spin_lock_irqsave(&iommu->register_lock, flag);
4128
4129 iommu->iommu_state[SR_DMAR_FECTL_REG] =
4130 readl(iommu->reg + DMAR_FECTL_REG);
4131 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
4132 readl(iommu->reg + DMAR_FEDATA_REG);
4133 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
4134 readl(iommu->reg + DMAR_FEADDR_REG);
4135 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
4136 readl(iommu->reg + DMAR_FEUADDR_REG);
4137
4138 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
4139 }
4140 return 0;
4141
4142nomem:
4143 for_each_active_iommu(iommu, drhd)
4144 kfree(iommu->iommu_state);
4145
4146 return -ENOMEM;
4147}
4148
4149static void iommu_resume(void)
4150{
4151 struct dmar_drhd_unit *drhd;
4152 struct intel_iommu *iommu = NULL;
4153 unsigned long flag;
4154
4155 if (init_iommu_hw()) {
4156 if (force_on)
4157 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
4158 else
4159 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
4160 return;
4161 }
4162
4163 for_each_active_iommu(iommu, drhd) {
4164
4165 raw_spin_lock_irqsave(&iommu->register_lock, flag);
4166
4167 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
4168 iommu->reg + DMAR_FECTL_REG);
4169 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
4170 iommu->reg + DMAR_FEDATA_REG);
4171 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
4172 iommu->reg + DMAR_FEADDR_REG);
4173 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
4174 iommu->reg + DMAR_FEUADDR_REG);
4175
4176 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
4177 }
4178
4179 for_each_active_iommu(iommu, drhd)
4180 kfree(iommu->iommu_state);
4181}
4182
4183static struct syscore_ops iommu_syscore_ops = {
4184 .resume = iommu_resume,
4185 .suspend = iommu_suspend,
4186};
4187
4188static void __init init_iommu_pm_ops(void)
4189{
4190 register_syscore_ops(&iommu_syscore_ops);
4191}
4192
4193#else
4194static inline void init_iommu_pm_ops(void) {}
4195#endif /* CONFIG_PM */
4196
4197
4198int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
4199{
4200 struct acpi_dmar_reserved_memory *rmrr;
4201 int prot = DMA_PTE_READ|DMA_PTE_WRITE;
4202 struct dmar_rmrr_unit *rmrru;
4203 size_t length;
4204
4205 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4206 if (!rmrru)
4207 goto out;
4208
4209 rmrru->hdr = header;
4210 rmrr = (struct acpi_dmar_reserved_memory *)header;
4211 rmrru->base_address = rmrr->base_address;
4212 rmrru->end_address = rmrr->end_address;
4213
4214 length = rmrr->end_address - rmrr->base_address + 1;
4215 rmrru->resv = iommu_alloc_resv_region(rmrr->base_address, length, prot,
4216 IOMMU_RESV_DIRECT);
4217 if (!rmrru->resv)
4218 goto free_rmrru;
4219
4220 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4221 ((void *)rmrr) + rmrr->header.length,
4222 &rmrru->devices_cnt);
4223 if (rmrru->devices_cnt && rmrru->devices == NULL)
4224 goto free_all;
4225
4226 list_add(&rmrru->list, &dmar_rmrr_units);
4227
4228 return 0;
4229free_all:
4230 kfree(rmrru->resv);
4231free_rmrru:
4232 kfree(rmrru);
4233out:
4234 return -ENOMEM;
4235}
4236
4237static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4238{
4239 struct dmar_atsr_unit *atsru;
4240 struct acpi_dmar_atsr *tmp;
4241
4242 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4243 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4244 if (atsr->segment != tmp->segment)
4245 continue;
4246 if (atsr->header.length != tmp->header.length)
4247 continue;
4248 if (memcmp(atsr, tmp, atsr->header.length) == 0)
4249 return atsru;
4250 }
4251
4252 return NULL;
4253}
4254
4255int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4256{
4257 struct acpi_dmar_atsr *atsr;
4258 struct dmar_atsr_unit *atsru;
4259
4260 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
4261 return 0;
4262
4263 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4264 atsru = dmar_find_atsr(atsr);
4265 if (atsru)
4266 return 0;
4267
4268 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
4269 if (!atsru)
4270 return -ENOMEM;
4271
4272 /*
4273 * If memory is allocated from slab by ACPI _DSM method, we need to
4274 * copy the memory content because the memory buffer will be freed
4275 * on return.
4276 */
4277 atsru->hdr = (void *)(atsru + 1);
4278 memcpy(atsru->hdr, hdr, hdr->length);
4279 atsru->include_all = atsr->flags & 0x1;
4280 if (!atsru->include_all) {
4281 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4282 (void *)atsr + atsr->header.length,
4283 &atsru->devices_cnt);
4284 if (atsru->devices_cnt && atsru->devices == NULL) {
4285 kfree(atsru);
4286 return -ENOMEM;
4287 }
4288 }
4289
4290 list_add_rcu(&atsru->list, &dmar_atsr_units);
4291
4292 return 0;
4293}
4294
4295static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4296{
4297 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4298 kfree(atsru);
4299}
4300
4301int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4302{
4303 struct acpi_dmar_atsr *atsr;
4304 struct dmar_atsr_unit *atsru;
4305
4306 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4307 atsru = dmar_find_atsr(atsr);
4308 if (atsru) {
4309 list_del_rcu(&atsru->list);
4310 synchronize_rcu();
4311 intel_iommu_free_atsr(atsru);
4312 }
4313
4314 return 0;
4315}
4316
4317int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4318{
4319 int i;
4320 struct device *dev;
4321 struct acpi_dmar_atsr *atsr;
4322 struct dmar_atsr_unit *atsru;
4323
4324 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4325 atsru = dmar_find_atsr(atsr);
4326 if (!atsru)
4327 return 0;
4328
4329 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
4330 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4331 i, dev)
4332 return -EBUSY;
4333 }
4334
4335 return 0;
4336}
4337
4338static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4339{
4340 int sp, ret;
4341 struct intel_iommu *iommu = dmaru->iommu;
4342
4343 if (g_iommus[iommu->seq_id])
4344 return 0;
4345
4346 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
4347 pr_warn("%s: Doesn't support hardware pass through.\n",
4348 iommu->name);
4349 return -ENXIO;
4350 }
4351 if (!ecap_sc_support(iommu->ecap) &&
4352 domain_update_iommu_snooping(iommu)) {
4353 pr_warn("%s: Doesn't support snooping.\n",
4354 iommu->name);
4355 return -ENXIO;
4356 }
4357 sp = domain_update_iommu_superpage(iommu) - 1;
4358 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
4359 pr_warn("%s: Doesn't support large page.\n",
4360 iommu->name);
4361 return -ENXIO;
4362 }
4363
4364 /*
4365 * Disable translation if already enabled prior to OS handover.
4366 */
4367 if (iommu->gcmd & DMA_GCMD_TE)
4368 iommu_disable_translation(iommu);
4369
4370 g_iommus[iommu->seq_id] = iommu;
4371 ret = iommu_init_domains(iommu);
4372 if (ret == 0)
4373 ret = iommu_alloc_root_entry(iommu);
4374 if (ret)
4375 goto out;
4376
4377#ifdef CONFIG_INTEL_IOMMU_SVM
4378 if (pasid_supported(iommu))
4379 intel_svm_init(iommu);
4380#endif
4381
4382 if (dmaru->ignored) {
4383 /*
4384 * we always have to disable PMRs or DMA may fail on this device
4385 */
4386 if (force_on)
4387 iommu_disable_protect_mem_regions(iommu);
4388 return 0;
4389 }
4390
4391 intel_iommu_init_qi(iommu);
4392 iommu_flush_write_buffer(iommu);
4393
4394#ifdef CONFIG_INTEL_IOMMU_SVM
4395 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
4396 ret = intel_svm_enable_prq(iommu);
4397 if (ret)
4398 goto disable_iommu;
4399 }
4400#endif
4401 ret = dmar_set_interrupt(iommu);
4402 if (ret)
4403 goto disable_iommu;
4404
4405 iommu_set_root_entry(iommu);
4406 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4407 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4408 iommu_enable_translation(iommu);
4409
4410 iommu_disable_protect_mem_regions(iommu);
4411 return 0;
4412
4413disable_iommu:
4414 disable_dmar_iommu(iommu);
4415out:
4416 free_dmar_iommu(iommu);
4417 return ret;
4418}
4419
4420int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4421{
4422 int ret = 0;
4423 struct intel_iommu *iommu = dmaru->iommu;
4424
4425 if (!intel_iommu_enabled)
4426 return 0;
4427 if (iommu == NULL)
4428 return -EINVAL;
4429
4430 if (insert) {
4431 ret = intel_iommu_add(dmaru);
4432 } else {
4433 disable_dmar_iommu(iommu);
4434 free_dmar_iommu(iommu);
4435 }
4436
4437 return ret;
4438}
4439
4440static void intel_iommu_free_dmars(void)
4441{
4442 struct dmar_rmrr_unit *rmrru, *rmrr_n;
4443 struct dmar_atsr_unit *atsru, *atsr_n;
4444
4445 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4446 list_del(&rmrru->list);
4447 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
4448 kfree(rmrru->resv);
4449 kfree(rmrru);
4450 }
4451
4452 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4453 list_del(&atsru->list);
4454 intel_iommu_free_atsr(atsru);
4455 }
4456}
4457
4458int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4459{
4460 int i, ret = 1;
4461 struct pci_bus *bus;
4462 struct pci_dev *bridge = NULL;
4463 struct device *tmp;
4464 struct acpi_dmar_atsr *atsr;
4465 struct dmar_atsr_unit *atsru;
4466
4467 dev = pci_physfn(dev);
4468 for (bus = dev->bus; bus; bus = bus->parent) {
4469 bridge = bus->self;
4470 /* If it's an integrated device, allow ATS */
4471 if (!bridge)
4472 return 1;
4473 /* Connected via non-PCIe: no ATS */
4474 if (!pci_is_pcie(bridge) ||
4475 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
4476 return 0;
4477 /* If we found the root port, look it up in the ATSR */
4478 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
4479 break;
4480 }
4481
4482 rcu_read_lock();
4483 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4484 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4485 if (atsr->segment != pci_domain_nr(dev->bus))
4486 continue;
4487
4488 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
4489 if (tmp == &bridge->dev)
4490 goto out;
4491
4492 if (atsru->include_all)
4493 goto out;
4494 }
4495 ret = 0;
4496out:
4497 rcu_read_unlock();
4498
4499 return ret;
4500}
4501
4502int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4503{
4504 int ret;
4505 struct dmar_rmrr_unit *rmrru;
4506 struct dmar_atsr_unit *atsru;
4507 struct acpi_dmar_atsr *atsr;
4508 struct acpi_dmar_reserved_memory *rmrr;
4509
4510 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
4511 return 0;
4512
4513 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4514 rmrr = container_of(rmrru->hdr,
4515 struct acpi_dmar_reserved_memory, header);
4516 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4517 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4518 ((void *)rmrr) + rmrr->header.length,
4519 rmrr->segment, rmrru->devices,
4520 rmrru->devices_cnt);
4521 if (ret < 0)
4522 return ret;
4523 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4524 dmar_remove_dev_scope(info, rmrr->segment,
4525 rmrru->devices, rmrru->devices_cnt);
4526 }
4527 }
4528
4529 list_for_each_entry(atsru, &dmar_atsr_units, list) {
4530 if (atsru->include_all)
4531 continue;
4532
4533 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4534 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4535 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4536 (void *)atsr + atsr->header.length,
4537 atsr->segment, atsru->devices,
4538 atsru->devices_cnt);
4539 if (ret > 0)
4540 break;
4541 else if (ret < 0)
4542 return ret;
4543 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4544 if (dmar_remove_dev_scope(info, atsr->segment,
4545 atsru->devices, atsru->devices_cnt))
4546 break;
4547 }
4548 }
4549
4550 return 0;
4551}
4552
4553/*
4554 * Here we only respond to action of unbound device from driver.
4555 *
4556 * Added device is not attached to its DMAR domain here yet. That will happen
4557 * when mapping the device to iova.
4558 */
4559static int device_notifier(struct notifier_block *nb,
4560 unsigned long action, void *data)
4561{
4562 struct device *dev = data;
4563 struct dmar_domain *domain;
4564
4565 if (iommu_dummy(dev))
4566 return 0;
4567
4568 if (action == BUS_NOTIFY_REMOVED_DEVICE) {
4569 domain = find_domain(dev);
4570 if (!domain)
4571 return 0;
4572
4573 dmar_remove_one_dev_info(dev);
4574 if (!domain_type_is_vm_or_si(domain) &&
4575 list_empty(&domain->devices))
4576 domain_exit(domain);
4577 } else if (action == BUS_NOTIFY_ADD_DEVICE) {
4578 if (iommu_should_identity_map(dev, 1))
4579 domain_add_dev_info(si_domain, dev);
4580 }
4581
4582 return 0;
4583}
4584
4585static struct notifier_block device_nb = {
4586 .notifier_call = device_notifier,
4587};
4588
4589static int intel_iommu_memory_notifier(struct notifier_block *nb,
4590 unsigned long val, void *v)
4591{
4592 struct memory_notify *mhp = v;
4593 unsigned long long start, end;
4594 unsigned long start_vpfn, last_vpfn;
4595
4596 switch (val) {
4597 case MEM_GOING_ONLINE:
4598 start = mhp->start_pfn << PAGE_SHIFT;
4599 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4600 if (iommu_domain_identity_map(si_domain, start, end)) {
4601 pr_warn("Failed to build identity map for [%llx-%llx]\n",
4602 start, end);
4603 return NOTIFY_BAD;
4604 }
4605 break;
4606
4607 case MEM_OFFLINE:
4608 case MEM_CANCEL_ONLINE:
4609 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4610 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4611 while (start_vpfn <= last_vpfn) {
4612 struct iova *iova;
4613 struct dmar_drhd_unit *drhd;
4614 struct intel_iommu *iommu;
4615 struct page *freelist;
4616
4617 iova = find_iova(&si_domain->iovad, start_vpfn);
4618 if (iova == NULL) {
4619 pr_debug("Failed get IOVA for PFN %lx\n",
4620 start_vpfn);
4621 break;
4622 }
4623
4624 iova = split_and_remove_iova(&si_domain->iovad, iova,
4625 start_vpfn, last_vpfn);
4626 if (iova == NULL) {
4627 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
4628 start_vpfn, last_vpfn);
4629 return NOTIFY_BAD;
4630 }
4631
4632 freelist = domain_unmap(si_domain, iova->pfn_lo,
4633 iova->pfn_hi);
4634
4635 rcu_read_lock();
4636 for_each_active_iommu(iommu, drhd)
4637 iommu_flush_iotlb_psi(iommu, si_domain,
4638 iova->pfn_lo, iova_size(iova),
4639 !freelist, 0);
4640 rcu_read_unlock();
4641 dma_free_pagelist(freelist);
4642
4643 start_vpfn = iova->pfn_hi + 1;
4644 free_iova_mem(iova);
4645 }
4646 break;
4647 }
4648
4649 return NOTIFY_OK;
4650}
4651
4652static struct notifier_block intel_iommu_memory_nb = {
4653 .notifier_call = intel_iommu_memory_notifier,
4654 .priority = 0
4655};
4656
4657static void free_all_cpu_cached_iovas(unsigned int cpu)
4658{
4659 int i;
4660
4661 for (i = 0; i < g_num_of_iommus; i++) {
4662 struct intel_iommu *iommu = g_iommus[i];
4663 struct dmar_domain *domain;
4664 int did;
4665
4666 if (!iommu)
4667 continue;
4668
4669 for (did = 0; did < cap_ndoms(iommu->cap); did++) {
4670 domain = get_iommu_domain(iommu, (u16)did);
4671
4672 if (!domain)
4673 continue;
4674 free_cpu_cached_iovas(cpu, &domain->iovad);
4675 }
4676 }
4677}
4678
4679static int intel_iommu_cpu_dead(unsigned int cpu)
4680{
4681 free_all_cpu_cached_iovas(cpu);
4682 return 0;
4683}
4684
4685static void intel_disable_iommus(void)
4686{
4687 struct intel_iommu *iommu = NULL;
4688 struct dmar_drhd_unit *drhd;
4689
4690 for_each_iommu(iommu, drhd)
4691 iommu_disable_translation(iommu);
4692}
4693
4694static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
4695{
4696 struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
4697
4698 return container_of(iommu_dev, struct intel_iommu, iommu);
4699}
4700
4701static ssize_t intel_iommu_show_version(struct device *dev,
4702 struct device_attribute *attr,
4703 char *buf)
4704{
4705 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4706 u32 ver = readl(iommu->reg + DMAR_VER_REG);
4707 return sprintf(buf, "%d:%d\n",
4708 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4709}
4710static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4711
4712static ssize_t intel_iommu_show_address(struct device *dev,
4713 struct device_attribute *attr,
4714 char *buf)
4715{
4716 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4717 return sprintf(buf, "%llx\n", iommu->reg_phys);
4718}
4719static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4720
4721static ssize_t intel_iommu_show_cap(struct device *dev,
4722 struct device_attribute *attr,
4723 char *buf)
4724{
4725 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4726 return sprintf(buf, "%llx\n", iommu->cap);
4727}
4728static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4729
4730static ssize_t intel_iommu_show_ecap(struct device *dev,
4731 struct device_attribute *attr,
4732 char *buf)
4733{
4734 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4735 return sprintf(buf, "%llx\n", iommu->ecap);
4736}
4737static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4738
4739static ssize_t intel_iommu_show_ndoms(struct device *dev,
4740 struct device_attribute *attr,
4741 char *buf)
4742{
4743 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4744 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4745}
4746static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4747
4748static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4749 struct device_attribute *attr,
4750 char *buf)
4751{
4752 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4753 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4754 cap_ndoms(iommu->cap)));
4755}
4756static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4757
4758static struct attribute *intel_iommu_attrs[] = {
4759 &dev_attr_version.attr,
4760 &dev_attr_address.attr,
4761 &dev_attr_cap.attr,
4762 &dev_attr_ecap.attr,
4763 &dev_attr_domains_supported.attr,
4764 &dev_attr_domains_used.attr,
4765 NULL,
4766};
4767
4768static struct attribute_group intel_iommu_group = {
4769 .name = "intel-iommu",
4770 .attrs = intel_iommu_attrs,
4771};
4772
4773const struct attribute_group *intel_iommu_groups[] = {
4774 &intel_iommu_group,
4775 NULL,
4776};
4777
4778static int __init platform_optin_force_iommu(void)
4779{
4780 struct pci_dev *pdev = NULL;
4781 bool has_untrusted_dev = false;
4782
4783 if (!dmar_platform_optin() || no_platform_optin)
4784 return 0;
4785
4786 for_each_pci_dev(pdev) {
4787 if (pdev->untrusted) {
4788 has_untrusted_dev = true;
4789 break;
4790 }
4791 }
4792
4793 if (!has_untrusted_dev)
4794 return 0;
4795
4796 if (no_iommu || dmar_disabled)
4797 pr_info("Intel-IOMMU force enabled due to platform opt in\n");
4798
4799 /*
4800 * If Intel-IOMMU is disabled by default, we will apply identity
4801 * map for all devices except those marked as being untrusted.
4802 */
4803 if (dmar_disabled)
4804 iommu_identity_mapping |= IDENTMAP_ALL;
4805
4806 dmar_disabled = 0;
4807#if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
4808 swiotlb = 0;
4809#endif
4810 no_iommu = 0;
4811
4812 return 1;
4813}
4814
4815int __init intel_iommu_init(void)
4816{
4817 int ret = -ENODEV;
4818 struct dmar_drhd_unit *drhd;
4819 struct intel_iommu *iommu;
4820
4821 /*
4822 * Intel IOMMU is required for a TXT/tboot launch or platform
4823 * opt in, so enforce that.
4824 */
4825 force_on = tboot_force_iommu() || platform_optin_force_iommu();
4826
4827 if (iommu_init_mempool()) {
4828 if (force_on)
4829 panic("tboot: Failed to initialize iommu memory\n");
4830 return -ENOMEM;
4831 }
4832
4833 down_write(&dmar_global_lock);
4834 if (dmar_table_init()) {
4835 if (force_on)
4836 panic("tboot: Failed to initialize DMAR table\n");
4837 goto out_free_dmar;
4838 }
4839
4840 if (dmar_dev_scope_init() < 0) {
4841 if (force_on)
4842 panic("tboot: Failed to initialize DMAR device scope\n");
4843 goto out_free_dmar;
4844 }
4845
4846 up_write(&dmar_global_lock);
4847
4848 /*
4849 * The bus notifier takes the dmar_global_lock, so lockdep will
4850 * complain later when we register it under the lock.
4851 */
4852 dmar_register_bus_notifier();
4853
4854 down_write(&dmar_global_lock);
4855
4856 if (no_iommu || dmar_disabled) {
4857 /*
4858 * We exit the function here to ensure IOMMU's remapping and
4859 * mempool aren't setup, which means that the IOMMU's PMRs
4860 * won't be disabled via the call to init_dmars(). So disable
4861 * it explicitly here. The PMRs were setup by tboot prior to
4862 * calling SENTER, but the kernel is expected to reset/tear
4863 * down the PMRs.
4864 */
4865 if (intel_iommu_tboot_noforce) {
4866 for_each_iommu(iommu, drhd)
4867 iommu_disable_protect_mem_regions(iommu);
4868 }
4869
4870 /*
4871 * Make sure the IOMMUs are switched off, even when we
4872 * boot into a kexec kernel and the previous kernel left
4873 * them enabled
4874 */
4875 intel_disable_iommus();
4876 goto out_free_dmar;
4877 }
4878
4879 if (list_empty(&dmar_rmrr_units))
4880 pr_info("No RMRR found\n");
4881
4882 if (list_empty(&dmar_atsr_units))
4883 pr_info("No ATSR found\n");
4884
4885 if (dmar_init_reserved_ranges()) {
4886 if (force_on)
4887 panic("tboot: Failed to reserve iommu ranges\n");
4888 goto out_free_reserved_range;
4889 }
4890
4891 if (dmar_map_gfx)
4892 intel_iommu_gfx_mapped = 1;
4893
4894 init_no_remapping_devices();
4895
4896 ret = init_dmars();
4897 if (ret) {
4898 if (force_on)
4899 panic("tboot: Failed to initialize DMARs\n");
4900 pr_err("Initialization failed\n");
4901 goto out_free_reserved_range;
4902 }
4903 up_write(&dmar_global_lock);
4904 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
4905
4906#if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
4907 swiotlb = 0;
4908#endif
4909 dma_ops = &intel_dma_ops;
4910
4911 init_iommu_pm_ops();
4912
4913 for_each_active_iommu(iommu, drhd) {
4914 iommu_device_sysfs_add(&iommu->iommu, NULL,
4915 intel_iommu_groups,
4916 "%s", iommu->name);
4917 iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
4918 iommu_device_register(&iommu->iommu);
4919 }
4920
4921 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
4922 bus_register_notifier(&pci_bus_type, &device_nb);
4923 if (si_domain && !hw_pass_through)
4924 register_memory_notifier(&intel_iommu_memory_nb);
4925 cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
4926 intel_iommu_cpu_dead);
4927 intel_iommu_enabled = 1;
4928 intel_iommu_debugfs_init();
4929
4930 return 0;
4931
4932out_free_reserved_range:
4933 put_iova_domain(&reserved_iova_list);
4934out_free_dmar:
4935 intel_iommu_free_dmars();
4936 up_write(&dmar_global_lock);
4937 iommu_exit_mempool();
4938 return ret;
4939}
4940
4941static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4942{
4943 struct intel_iommu *iommu = opaque;
4944
4945 domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
4946 return 0;
4947}
4948
4949/*
4950 * NB - intel-iommu lacks any sort of reference counting for the users of
4951 * dependent devices. If multiple endpoints have intersecting dependent
4952 * devices, unbinding the driver from any one of them will possibly leave
4953 * the others unable to operate.
4954 */
4955static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
4956{
4957 if (!iommu || !dev || !dev_is_pci(dev))
4958 return;
4959
4960 pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
4961}
4962
4963static void __dmar_remove_one_dev_info(struct device_domain_info *info)
4964{
4965 struct intel_iommu *iommu;
4966 unsigned long flags;
4967
4968 assert_spin_locked(&device_domain_lock);
4969
4970 if (WARN_ON(!info))
4971 return;
4972
4973 iommu = info->iommu;
4974
4975 if (info->dev) {
4976 if (dev_is_pci(info->dev) && sm_supported(iommu))
4977 intel_pasid_tear_down_entry(iommu, info->dev,
4978 PASID_RID2PASID);
4979
4980 iommu_disable_dev_iotlb(info);
4981 domain_context_clear(iommu, info->dev);
4982 intel_pasid_free_table(info->dev);
4983 }
4984
4985 unlink_domain_info(info);
4986
4987 spin_lock_irqsave(&iommu->lock, flags);
4988 domain_detach_iommu(info->domain, iommu);
4989 spin_unlock_irqrestore(&iommu->lock, flags);
4990
4991 free_devinfo_mem(info);
4992}
4993
4994static void dmar_remove_one_dev_info(struct device *dev)
4995{
4996 struct device_domain_info *info;
4997 unsigned long flags;
4998
4999 spin_lock_irqsave(&device_domain_lock, flags);
5000 info = dev->archdata.iommu;
5001 __dmar_remove_one_dev_info(info);
5002 spin_unlock_irqrestore(&device_domain_lock, flags);
5003}
5004
5005static int md_domain_init(struct dmar_domain *domain, int guest_width)
5006{
5007 int adjust_width;
5008
5009 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
5010 domain_reserve_special_ranges(domain);
5011
5012 /* calculate AGAW */
5013 domain->gaw = guest_width;
5014 adjust_width = guestwidth_to_adjustwidth(guest_width);
5015 domain->agaw = width_to_agaw(adjust_width);
5016
5017 domain->iommu_coherency = 0;
5018 domain->iommu_snooping = 0;
5019 domain->iommu_superpage = 0;
5020 domain->max_addr = 0;
5021
5022 /* always allocate the top pgd */
5023 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5024 if (!domain->pgd)
5025 return -ENOMEM;
5026 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
5027 return 0;
5028}
5029
5030static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
5031{
5032 struct dmar_domain *dmar_domain;
5033 struct iommu_domain *domain;
5034
5035 if (type != IOMMU_DOMAIN_UNMANAGED)
5036 return NULL;
5037
5038 dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
5039 if (!dmar_domain) {
5040 pr_err("Can't allocate dmar_domain\n");
5041 return NULL;
5042 }
5043 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
5044 pr_err("Domain initialization failed\n");
5045 domain_exit(dmar_domain);
5046 return NULL;
5047 }
5048 domain_update_iommu_cap(dmar_domain);
5049
5050 domain = &dmar_domain->domain;
5051 domain->geometry.aperture_start = 0;
5052 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
5053 domain->geometry.force_aperture = true;
5054
5055 return domain;
5056}
5057
5058static void intel_iommu_domain_free(struct iommu_domain *domain)
5059{
5060 domain_exit(to_dmar_domain(domain));
5061}
5062
5063/*
5064 * Check whether a @domain could be attached to the @dev through the
5065 * aux-domain attach/detach APIs.
5066 */
5067static inline bool
5068is_aux_domain(struct device *dev, struct iommu_domain *domain)
5069{
5070 struct device_domain_info *info = dev->archdata.iommu;
5071
5072 return info && info->auxd_enabled &&
5073 domain->type == IOMMU_DOMAIN_UNMANAGED;
5074}
5075
5076static void auxiliary_link_device(struct dmar_domain *domain,
5077 struct device *dev)
5078{
5079 struct device_domain_info *info = dev->archdata.iommu;
5080
5081 assert_spin_locked(&device_domain_lock);
5082 if (WARN_ON(!info))
5083 return;
5084
5085 domain->auxd_refcnt++;
5086 list_add(&domain->auxd, &info->auxiliary_domains);
5087}
5088
5089static void auxiliary_unlink_device(struct dmar_domain *domain,
5090 struct device *dev)
5091{
5092 struct device_domain_info *info = dev->archdata.iommu;
5093
5094 assert_spin_locked(&device_domain_lock);
5095 if (WARN_ON(!info))
5096 return;
5097
5098 list_del(&domain->auxd);
5099 domain->auxd_refcnt--;
5100
5101 if (!domain->auxd_refcnt && domain->default_pasid > 0)
5102 intel_pasid_free_id(domain->default_pasid);
5103}
5104
5105static int aux_domain_add_dev(struct dmar_domain *domain,
5106 struct device *dev)
5107{
5108 int ret;
5109 u8 bus, devfn;
5110 unsigned long flags;
5111 struct intel_iommu *iommu;
5112
5113 iommu = device_to_iommu(dev, &bus, &devfn);
5114 if (!iommu)
5115 return -ENODEV;
5116
5117 if (domain->default_pasid <= 0) {
5118 int pasid;
5119
5120 pasid = intel_pasid_alloc_id(domain, PASID_MIN,
5121 pci_max_pasids(to_pci_dev(dev)),
5122 GFP_KERNEL);
5123 if (pasid <= 0) {
5124 pr_err("Can't allocate default pasid\n");
5125 return -ENODEV;
5126 }
5127 domain->default_pasid = pasid;
5128 }
5129
5130 spin_lock_irqsave(&device_domain_lock, flags);
5131 /*
5132 * iommu->lock must be held to attach domain to iommu and setup the
5133 * pasid entry for second level translation.
5134 */
5135 spin_lock(&iommu->lock);
5136 ret = domain_attach_iommu(domain, iommu);
5137 if (ret)
5138 goto attach_failed;
5139
5140 /* Setup the PASID entry for mediated devices: */
5141 ret = intel_pasid_setup_second_level(iommu, domain, dev,
5142 domain->default_pasid);
5143 if (ret)
5144 goto table_failed;
5145 spin_unlock(&iommu->lock);
5146
5147 auxiliary_link_device(domain, dev);
5148
5149 spin_unlock_irqrestore(&device_domain_lock, flags);
5150
5151 return 0;
5152
5153table_failed:
5154 domain_detach_iommu(domain, iommu);
5155attach_failed:
5156 spin_unlock(&iommu->lock);
5157 spin_unlock_irqrestore(&device_domain_lock, flags);
5158 if (!domain->auxd_refcnt && domain->default_pasid > 0)
5159 intel_pasid_free_id(domain->default_pasid);
5160
5161 return ret;
5162}
5163
5164static void aux_domain_remove_dev(struct dmar_domain *domain,
5165 struct device *dev)
5166{
5167 struct device_domain_info *info;
5168 struct intel_iommu *iommu;
5169 unsigned long flags;
5170
5171 if (!is_aux_domain(dev, &domain->domain))
5172 return;
5173
5174 spin_lock_irqsave(&device_domain_lock, flags);
5175 info = dev->archdata.iommu;
5176 iommu = info->iommu;
5177
5178 auxiliary_unlink_device(domain, dev);
5179
5180 spin_lock(&iommu->lock);
5181 intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid);
5182 domain_detach_iommu(domain, iommu);
5183 spin_unlock(&iommu->lock);
5184
5185 spin_unlock_irqrestore(&device_domain_lock, flags);
5186}
5187
5188static int prepare_domain_attach_device(struct iommu_domain *domain,
5189 struct device *dev)
5190{
5191 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5192 struct intel_iommu *iommu;
5193 int addr_width;
5194 u8 bus, devfn;
5195
5196 iommu = device_to_iommu(dev, &bus, &devfn);
5197 if (!iommu)
5198 return -ENODEV;
5199
5200 /* check if this iommu agaw is sufficient for max mapped address */
5201 addr_width = agaw_to_width(iommu->agaw);
5202 if (addr_width > cap_mgaw(iommu->cap))
5203 addr_width = cap_mgaw(iommu->cap);
5204
5205 if (dmar_domain->max_addr > (1LL << addr_width)) {
5206 dev_err(dev, "%s: iommu width (%d) is not "
5207 "sufficient for the mapped address (%llx)\n",
5208 __func__, addr_width, dmar_domain->max_addr);
5209 return -EFAULT;
5210 }
5211 dmar_domain->gaw = addr_width;
5212
5213 /*
5214 * Knock out extra levels of page tables if necessary
5215 */
5216 while (iommu->agaw < dmar_domain->agaw) {
5217 struct dma_pte *pte;
5218
5219 pte = dmar_domain->pgd;
5220 if (dma_pte_present(pte)) {
5221 dmar_domain->pgd = (struct dma_pte *)
5222 phys_to_virt(dma_pte_addr(pte));
5223 free_pgtable_page(pte);
5224 }
5225 dmar_domain->agaw--;
5226 }
5227
5228 return 0;
5229}
5230
5231static int intel_iommu_attach_device(struct iommu_domain *domain,
5232 struct device *dev)
5233{
5234 int ret;
5235
5236 if (device_is_rmrr_locked(dev)) {
5237 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
5238 return -EPERM;
5239 }
5240
5241 if (is_aux_domain(dev, domain))
5242 return -EPERM;
5243
5244 /* normally dev is not mapped */
5245 if (unlikely(domain_context_mapped(dev))) {
5246 struct dmar_domain *old_domain;
5247
5248 old_domain = find_domain(dev);
5249 if (old_domain) {
5250 rcu_read_lock();
5251 dmar_remove_one_dev_info(dev);
5252 rcu_read_unlock();
5253
5254 if (!domain_type_is_vm_or_si(old_domain) &&
5255 list_empty(&old_domain->devices))
5256 domain_exit(old_domain);
5257 }
5258 }
5259
5260 ret = prepare_domain_attach_device(domain, dev);
5261 if (ret)
5262 return ret;
5263
5264 return domain_add_dev_info(to_dmar_domain(domain), dev);
5265}
5266
5267static int intel_iommu_aux_attach_device(struct iommu_domain *domain,
5268 struct device *dev)
5269{
5270 int ret;
5271
5272 if (!is_aux_domain(dev, domain))
5273 return -EPERM;
5274
5275 ret = prepare_domain_attach_device(domain, dev);
5276 if (ret)
5277 return ret;
5278
5279 return aux_domain_add_dev(to_dmar_domain(domain), dev);
5280}
5281
5282static void intel_iommu_detach_device(struct iommu_domain *domain,
5283 struct device *dev)
5284{
5285 dmar_remove_one_dev_info(dev);
5286}
5287
5288static void intel_iommu_aux_detach_device(struct iommu_domain *domain,
5289 struct device *dev)
5290{
5291 aux_domain_remove_dev(to_dmar_domain(domain), dev);
5292}
5293
5294static int intel_iommu_map(struct iommu_domain *domain,
5295 unsigned long iova, phys_addr_t hpa,
5296 size_t size, int iommu_prot)
5297{
5298 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5299 u64 max_addr;
5300 int prot = 0;
5301 int ret;
5302
5303 if (iommu_prot & IOMMU_READ)
5304 prot |= DMA_PTE_READ;
5305 if (iommu_prot & IOMMU_WRITE)
5306 prot |= DMA_PTE_WRITE;
5307 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
5308 prot |= DMA_PTE_SNP;
5309
5310 max_addr = iova + size;
5311 if (dmar_domain->max_addr < max_addr) {
5312 u64 end;
5313
5314 /* check if minimum agaw is sufficient for mapped address */
5315 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
5316 if (end < max_addr) {
5317 pr_err("%s: iommu width (%d) is not "
5318 "sufficient for the mapped address (%llx)\n",
5319 __func__, dmar_domain->gaw, max_addr);
5320 return -EFAULT;
5321 }
5322 dmar_domain->max_addr = max_addr;
5323 }
5324 /* Round up size to next multiple of PAGE_SIZE, if it and
5325 the low bits of hpa would take us onto the next page */
5326 size = aligned_nrpages(hpa, size);
5327 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
5328 hpa >> VTD_PAGE_SHIFT, size, prot);
5329 return ret;
5330}
5331
5332static size_t intel_iommu_unmap(struct iommu_domain *domain,
5333 unsigned long iova, size_t size)
5334{
5335 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5336 struct page *freelist = NULL;
5337 unsigned long start_pfn, last_pfn;
5338 unsigned int npages;
5339 int iommu_id, level = 0;
5340
5341 /* Cope with horrid API which requires us to unmap more than the
5342 size argument if it happens to be a large-page mapping. */
5343 BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
5344
5345 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
5346 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
5347
5348 start_pfn = iova >> VTD_PAGE_SHIFT;
5349 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
5350
5351 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
5352
5353 npages = last_pfn - start_pfn + 1;
5354
5355 for_each_domain_iommu(iommu_id, dmar_domain)
5356 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
5357 start_pfn, npages, !freelist, 0);
5358
5359 dma_free_pagelist(freelist);
5360
5361 if (dmar_domain->max_addr == iova + size)
5362 dmar_domain->max_addr = iova;
5363
5364 return size;
5365}
5366
5367static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
5368 dma_addr_t iova)
5369{
5370 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5371 struct dma_pte *pte;
5372 int level = 0;
5373 u64 phys = 0;
5374
5375 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
5376 if (pte)
5377 phys = dma_pte_addr(pte);
5378
5379 return phys;
5380}
5381
5382static inline bool scalable_mode_support(void)
5383{
5384 struct dmar_drhd_unit *drhd;
5385 struct intel_iommu *iommu;
5386 bool ret = true;
5387
5388 rcu_read_lock();
5389 for_each_active_iommu(iommu, drhd) {
5390 if (!sm_supported(iommu)) {
5391 ret = false;
5392 break;
5393 }
5394 }
5395 rcu_read_unlock();
5396
5397 return ret;
5398}
5399
5400static inline bool iommu_pasid_support(void)
5401{
5402 struct dmar_drhd_unit *drhd;
5403 struct intel_iommu *iommu;
5404 bool ret = true;
5405
5406 rcu_read_lock();
5407 for_each_active_iommu(iommu, drhd) {
5408 if (!pasid_supported(iommu)) {
5409 ret = false;
5410 break;
5411 }
5412 }
5413 rcu_read_unlock();
5414
5415 return ret;
5416}
5417
5418static bool intel_iommu_capable(enum iommu_cap cap)
5419{
5420 if (cap == IOMMU_CAP_CACHE_COHERENCY)
5421 return domain_update_iommu_snooping(NULL) == 1;
5422 if (cap == IOMMU_CAP_INTR_REMAP)
5423 return irq_remapping_enabled == 1;
5424
5425 return false;
5426}
5427
5428static int intel_iommu_add_device(struct device *dev)
5429{
5430 struct intel_iommu *iommu;
5431 struct iommu_group *group;
5432 u8 bus, devfn;
5433
5434 iommu = device_to_iommu(dev, &bus, &devfn);
5435 if (!iommu)
5436 return -ENODEV;
5437
5438 iommu_device_link(&iommu->iommu, dev);
5439
5440 group = iommu_group_get_for_dev(dev);
5441
5442 if (IS_ERR(group))
5443 return PTR_ERR(group);
5444
5445 iommu_group_put(group);
5446 return 0;
5447}
5448
5449static void intel_iommu_remove_device(struct device *dev)
5450{
5451 struct intel_iommu *iommu;
5452 u8 bus, devfn;
5453
5454 iommu = device_to_iommu(dev, &bus, &devfn);
5455 if (!iommu)
5456 return;
5457
5458 iommu_group_remove_device(dev);
5459
5460 iommu_device_unlink(&iommu->iommu, dev);
5461}
5462
5463static void intel_iommu_get_resv_regions(struct device *device,
5464 struct list_head *head)
5465{
5466 struct iommu_resv_region *reg;
5467 struct dmar_rmrr_unit *rmrr;
5468 struct device *i_dev;
5469 int i;
5470
5471 rcu_read_lock();
5472 for_each_rmrr_units(rmrr) {
5473 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
5474 i, i_dev) {
5475 if (i_dev != device)
5476 continue;
5477
5478 list_add_tail(&rmrr->resv->list, head);
5479 }
5480 }
5481 rcu_read_unlock();
5482
5483 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
5484 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
5485 0, IOMMU_RESV_MSI);
5486 if (!reg)
5487 return;
5488 list_add_tail(®->list, head);
5489}
5490
5491static void intel_iommu_put_resv_regions(struct device *dev,
5492 struct list_head *head)
5493{
5494 struct iommu_resv_region *entry, *next;
5495
5496 list_for_each_entry_safe(entry, next, head, list) {
5497 if (entry->type == IOMMU_RESV_MSI)
5498 kfree(entry);
5499 }
5500}
5501
5502int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
5503{
5504 struct device_domain_info *info;
5505 struct context_entry *context;
5506 struct dmar_domain *domain;
5507 unsigned long flags;
5508 u64 ctx_lo;
5509 int ret;
5510
5511 domain = get_valid_domain_for_dev(dev);
5512 if (!domain)
5513 return -EINVAL;
5514
5515 spin_lock_irqsave(&device_domain_lock, flags);
5516 spin_lock(&iommu->lock);
5517
5518 ret = -EINVAL;
5519 info = dev->archdata.iommu;
5520 if (!info || !info->pasid_supported)
5521 goto out;
5522
5523 context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5524 if (WARN_ON(!context))
5525 goto out;
5526
5527 ctx_lo = context[0].lo;
5528
5529 if (!(ctx_lo & CONTEXT_PASIDE)) {
5530 ctx_lo |= CONTEXT_PASIDE;
5531 context[0].lo = ctx_lo;
5532 wmb();
5533 iommu->flush.flush_context(iommu,
5534 domain->iommu_did[iommu->seq_id],
5535 PCI_DEVID(info->bus, info->devfn),
5536 DMA_CCMD_MASK_NOBIT,
5537 DMA_CCMD_DEVICE_INVL);
5538 }
5539
5540 /* Enable PASID support in the device, if it wasn't already */
5541 if (!info->pasid_enabled)
5542 iommu_enable_dev_iotlb(info);
5543
5544 ret = 0;
5545
5546 out:
5547 spin_unlock(&iommu->lock);
5548 spin_unlock_irqrestore(&device_domain_lock, flags);
5549
5550 return ret;
5551}
5552
5553#ifdef CONFIG_INTEL_IOMMU_SVM
5554struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
5555{
5556 struct intel_iommu *iommu;
5557 u8 bus, devfn;
5558
5559 if (iommu_dummy(dev)) {
5560 dev_warn(dev,
5561 "No IOMMU translation for device; cannot enable SVM\n");
5562 return NULL;
5563 }
5564
5565 iommu = device_to_iommu(dev, &bus, &devfn);
5566 if ((!iommu)) {
5567 dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
5568 return NULL;
5569 }
5570
5571 return iommu;
5572}
5573#endif /* CONFIG_INTEL_IOMMU_SVM */
5574
5575static int intel_iommu_enable_auxd(struct device *dev)
5576{
5577 struct device_domain_info *info;
5578 struct intel_iommu *iommu;
5579 unsigned long flags;
5580 u8 bus, devfn;
5581 int ret;
5582
5583 iommu = device_to_iommu(dev, &bus, &devfn);
5584 if (!iommu || dmar_disabled)
5585 return -EINVAL;
5586
5587 if (!sm_supported(iommu) || !pasid_supported(iommu))
5588 return -EINVAL;
5589
5590 ret = intel_iommu_enable_pasid(iommu, dev);
5591 if (ret)
5592 return -ENODEV;
5593
5594 spin_lock_irqsave(&device_domain_lock, flags);
5595 info = dev->archdata.iommu;
5596 info->auxd_enabled = 1;
5597 spin_unlock_irqrestore(&device_domain_lock, flags);
5598
5599 return 0;
5600}
5601
5602static int intel_iommu_disable_auxd(struct device *dev)
5603{
5604 struct device_domain_info *info;
5605 unsigned long flags;
5606
5607 spin_lock_irqsave(&device_domain_lock, flags);
5608 info = dev->archdata.iommu;
5609 if (!WARN_ON(!info))
5610 info->auxd_enabled = 0;
5611 spin_unlock_irqrestore(&device_domain_lock, flags);
5612
5613 return 0;
5614}
5615
5616/*
5617 * A PCI express designated vendor specific extended capability is defined
5618 * in the section 3.7 of Intel scalable I/O virtualization technical spec
5619 * for system software and tools to detect endpoint devices supporting the
5620 * Intel scalable IO virtualization without host driver dependency.
5621 *
5622 * Returns the address of the matching extended capability structure within
5623 * the device's PCI configuration space or 0 if the device does not support
5624 * it.
5625 */
5626static int siov_find_pci_dvsec(struct pci_dev *pdev)
5627{
5628 int pos;
5629 u16 vendor, id;
5630
5631 pos = pci_find_next_ext_capability(pdev, 0, 0x23);
5632 while (pos) {
5633 pci_read_config_word(pdev, pos + 4, &vendor);
5634 pci_read_config_word(pdev, pos + 8, &id);
5635 if (vendor == PCI_VENDOR_ID_INTEL && id == 5)
5636 return pos;
5637
5638 pos = pci_find_next_ext_capability(pdev, pos, 0x23);
5639 }
5640
5641 return 0;
5642}
5643
5644static bool
5645intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
5646{
5647 if (feat == IOMMU_DEV_FEAT_AUX) {
5648 int ret;
5649
5650 if (!dev_is_pci(dev) || dmar_disabled ||
5651 !scalable_mode_support() || !iommu_pasid_support())
5652 return false;
5653
5654 ret = pci_pasid_features(to_pci_dev(dev));
5655 if (ret < 0)
5656 return false;
5657
5658 return !!siov_find_pci_dvsec(to_pci_dev(dev));
5659 }
5660
5661 return false;
5662}
5663
5664static int
5665intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
5666{
5667 if (feat == IOMMU_DEV_FEAT_AUX)
5668 return intel_iommu_enable_auxd(dev);
5669
5670 return -ENODEV;
5671}
5672
5673static int
5674intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
5675{
5676 if (feat == IOMMU_DEV_FEAT_AUX)
5677 return intel_iommu_disable_auxd(dev);
5678
5679 return -ENODEV;
5680}
5681
5682static bool
5683intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat)
5684{
5685 struct device_domain_info *info = dev->archdata.iommu;
5686
5687 if (feat == IOMMU_DEV_FEAT_AUX)
5688 return scalable_mode_support() && info && info->auxd_enabled;
5689
5690 return false;
5691}
5692
5693static int
5694intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
5695{
5696 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5697
5698 return dmar_domain->default_pasid > 0 ?
5699 dmar_domain->default_pasid : -EINVAL;
5700}
5701
5702const struct iommu_ops intel_iommu_ops = {
5703 .capable = intel_iommu_capable,
5704 .domain_alloc = intel_iommu_domain_alloc,
5705 .domain_free = intel_iommu_domain_free,
5706 .attach_dev = intel_iommu_attach_device,
5707 .detach_dev = intel_iommu_detach_device,
5708 .aux_attach_dev = intel_iommu_aux_attach_device,
5709 .aux_detach_dev = intel_iommu_aux_detach_device,
5710 .aux_get_pasid = intel_iommu_aux_get_pasid,
5711 .map = intel_iommu_map,
5712 .unmap = intel_iommu_unmap,
5713 .iova_to_phys = intel_iommu_iova_to_phys,
5714 .add_device = intel_iommu_add_device,
5715 .remove_device = intel_iommu_remove_device,
5716 .get_resv_regions = intel_iommu_get_resv_regions,
5717 .put_resv_regions = intel_iommu_put_resv_regions,
5718 .device_group = pci_device_group,
5719 .dev_has_feat = intel_iommu_dev_has_feat,
5720 .dev_feat_enabled = intel_iommu_dev_feat_enabled,
5721 .dev_enable_feat = intel_iommu_dev_enable_feat,
5722 .dev_disable_feat = intel_iommu_dev_disable_feat,
5723 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
5724};
5725
5726static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
5727{
5728 /* G4x/GM45 integrated gfx dmar support is totally busted. */
5729 pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
5730 dmar_map_gfx = 0;
5731}
5732
5733DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
5734DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
5735DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
5736DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
5737DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
5738DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
5739DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
5740
5741static void quirk_iommu_rwbf(struct pci_dev *dev)
5742{
5743 /*
5744 * Mobile 4 Series Chipset neglects to set RWBF capability,
5745 * but needs it. Same seems to hold for the desktop versions.
5746 */
5747 pci_info(dev, "Forcing write-buffer flush capability\n");
5748 rwbf_quirk = 1;
5749}
5750
5751DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
5752DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
5753DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
5754DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
5755DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
5756DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
5757DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
5758
5759#define GGC 0x52
5760#define GGC_MEMORY_SIZE_MASK (0xf << 8)
5761#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
5762#define GGC_MEMORY_SIZE_1M (0x1 << 8)
5763#define GGC_MEMORY_SIZE_2M (0x3 << 8)
5764#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
5765#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
5766#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
5767#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
5768
5769static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
5770{
5771 unsigned short ggc;
5772
5773 if (pci_read_config_word(dev, GGC, &ggc))
5774 return;
5775
5776 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
5777 pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
5778 dmar_map_gfx = 0;
5779 } else if (dmar_map_gfx) {
5780 /* we have to ensure the gfx device is idle before we flush */
5781 pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
5782 intel_iommu_strict = 1;
5783 }
5784}
5785DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
5786DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
5787DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
5788DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
5789
5790/* On Tylersburg chipsets, some BIOSes have been known to enable the
5791 ISOCH DMAR unit for the Azalia sound device, but not give it any
5792 TLB entries, which causes it to deadlock. Check for that. We do
5793 this in a function called from init_dmars(), instead of in a PCI
5794 quirk, because we don't want to print the obnoxious "BIOS broken"
5795 message if VT-d is actually disabled.
5796*/
5797static void __init check_tylersburg_isoch(void)
5798{
5799 struct pci_dev *pdev;
5800 uint32_t vtisochctrl;
5801
5802 /* If there's no Azalia in the system anyway, forget it. */
5803 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
5804 if (!pdev)
5805 return;
5806 pci_dev_put(pdev);
5807
5808 /* System Management Registers. Might be hidden, in which case
5809 we can't do the sanity check. But that's OK, because the
5810 known-broken BIOSes _don't_ actually hide it, so far. */
5811 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
5812 if (!pdev)
5813 return;
5814
5815 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
5816 pci_dev_put(pdev);
5817 return;
5818 }
5819
5820 pci_dev_put(pdev);
5821
5822 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
5823 if (vtisochctrl & 1)
5824 return;
5825
5826 /* Drop all bits other than the number of TLB entries */
5827 vtisochctrl &= 0x1c;
5828
5829 /* If we have the recommended number of TLB entries (16), fine. */
5830 if (vtisochctrl == 0x10)
5831 return;
5832
5833 /* Zero TLB entries? You get to ride the short bus to school. */
5834 if (!vtisochctrl) {
5835 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
5836 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
5837 dmi_get_system_info(DMI_BIOS_VENDOR),
5838 dmi_get_system_info(DMI_BIOS_VERSION),
5839 dmi_get_system_info(DMI_PRODUCT_VERSION));
5840 iommu_identity_mapping |= IDENTMAP_AZALIA;
5841 return;
5842 }
5843
5844 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
5845 vtisochctrl);
5846}