···586 your BIOS for an option to enable it or if you have an IVRS ACPI587 table.5880000000000589# need this always selected by IOMMU for the VIA workaround590config SWIOTLB591 def_bool y if X86_64
···586 your BIOS for an option to enable it or if you have an IVRS ACPI587 table.588589+config AMD_IOMMU_STATS590+ bool "Export AMD IOMMU statistics to debugfs"591+ depends on AMD_IOMMU592+ select DEBUG_FS593+ help594+ This option enables code in the AMD IOMMU driver to collect various595+ statistics about whats happening in the driver and exports that596+ information to userspace via debugfs.597+ If unsure, say N.598+599# need this always selected by IOMMU for the VIA workaround600config SWIOTLB601 def_bool y if X86_64
+42-19
arch/x86/include/asm/amd_iommu_types.h
···190/* FIXME: move this macro to <linux/pci.h> */191#define PCI_BUS(x) (((x) >> 8) & 0xff)19200000193/*194 * This structure contains generic data for IOMMU protection domains195 * independent of their use.196 */197struct protection_domain {198- spinlock_t lock; /* mostly used to lock the page table*/199- u16 id; /* the domain id written to the device table */200- int mode; /* paging mode (0-6 levels) */201- u64 *pt_root; /* page table root pointer */202- void *priv; /* private data */00203};204205/*···302 bool int_enabled;303304 /* if one, we need to send a completion wait command */305- int need_sync;306307 /* default dma_ops domain for that IOMMU */308 struct dma_ops_domain *default_dom;···381extern unsigned long *amd_iommu_pd_alloc_bitmap;382383/* will be 1 if device isolation is enabled */384-extern int amd_iommu_isolate;385386/*387 * If true, the addresses will be flushed on unmap time, not when···389 */390extern bool amd_iommu_unmap_flush;391392-/* takes a PCI device id and prints it out in a readable form */393-static inline void print_devid(u16 devid, int nl)394-{395- int bus = devid >> 8;396- int dev = devid >> 3 & 0x1f;397- int fn = devid & 0x07;398-399- printk("%02x:%02x.%x", bus, dev, fn);400- if (nl)401- printk("\n");402-}403-404/* takes bus and device/function and returns the device id405 * FIXME: should that be in generic PCI code? */406static inline u16 calc_devid(u8 bus, u8 devfn)407{408 return (((u16)bus) << 8) | devfn;409}0000000000000000000000000000410411#endif /* _ASM_X86_AMD_IOMMU_TYPES_H */
···190/* FIXME: move this macro to <linux/pci.h> */191#define PCI_BUS(x) (((x) >> 8) & 0xff)192193+/* Protection domain flags */194+#define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */195+#define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops196+ domain for an IOMMU */197+198/*199 * This structure contains generic data for IOMMU protection domains200 * independent of their use.201 */202struct protection_domain {203+ spinlock_t lock; /* mostly used to lock the page table*/204+ u16 id; /* the domain id written to the device table */205+ int mode; /* paging mode (0-6 levels) */206+ u64 *pt_root; /* page table root pointer */207+ unsigned long flags; /* flags to find out type of domain */208+ unsigned dev_cnt; /* devices assigned to this domain */209+ void *priv; /* private data */210};211212/*···295 bool int_enabled;296297 /* if one, we need to send a completion wait command */298+ bool need_sync;299300 /* default dma_ops domain for that IOMMU */301 struct dma_ops_domain *default_dom;···374extern unsigned long *amd_iommu_pd_alloc_bitmap;375376/* will be 1 if device isolation is enabled */377+extern bool amd_iommu_isolate;378379/*380 * If true, the addresses will be flushed on unmap time, not when···382 */383extern bool amd_iommu_unmap_flush;384000000000000385/* takes bus and device/function and returns the device id386 * FIXME: should that be in generic PCI code? */387static inline u16 calc_devid(u8 bus, u8 devfn)388{389 return (((u16)bus) << 8) | devfn;390}391+392+#ifdef CONFIG_AMD_IOMMU_STATS393+394+struct __iommu_counter {395+ char *name;396+ struct dentry *dent;397+ u64 value;398+};399+400+#define DECLARE_STATS_COUNTER(nm) \401+ static struct __iommu_counter nm = { \402+ .name = #nm, \403+ }404+405+#define INC_STATS_COUNTER(name) name.value += 1406+#define ADD_STATS_COUNTER(name, x) name.value += (x)407+#define SUB_STATS_COUNTER(name, x) name.value -= (x)408+409+#else /* CONFIG_AMD_IOMMU_STATS */410+411+#define DECLARE_STATS_COUNTER(name)412+#define INC_STATS_COUNTER(name)413+#define ADD_STATS_COUNTER(name, x)414+#define SUB_STATS_COUNTER(name, x)415+416+static inline void amd_iommu_stats_init(void) { }417+418+#endif /* CONFIG_AMD_IOMMU_STATS */419420#endif /* _ASM_X86_AMD_IOMMU_TYPES_H */
+619-60
arch/x86/kernel/amd_iommu.c
···20#include <linux/pci.h>21#include <linux/gfp.h>22#include <linux/bitops.h>023#include <linux/scatterlist.h>24#include <linux/iommu-helper.h>00025#include <asm/proto.h>26#include <asm/iommu.h>27#include <asm/gart.h>···42static LIST_HEAD(iommu_pd_list);43static DEFINE_SPINLOCK(iommu_pd_list_lock);44000045/*46 * general struct to manage commands send to an IOMMU47 */···5556static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,57 struct unity_map_entry *e);000000000000000000000000000000000000000000000000000000000000005859/* returns !0 if the IOMMU is caching non-present entries in its TLB */60static int iommu_has_npcache(struct amd_iommu *iommu)···259 spin_lock_irqsave(&iommu->lock, flags);260 ret = __iommu_queue_command(iommu, cmd);261 if (!ret)262- iommu->need_sync = 1;263 spin_unlock_irqrestore(&iommu->lock, flags);264265 return ret;266}267268/*269- * This function is called whenever we need to ensure that the IOMMU has270- * completed execution of all commands we sent. It sends a271- * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs272- * us about that by writing a value to a physical address we pass with273- * the command.274 */275-static int iommu_completion_wait(struct amd_iommu *iommu)276{277- int ret = 0, ready = 0;278 unsigned status = 0;279- struct iommu_cmd cmd;280- unsigned long flags, i = 0;281282- memset(&cmd, 0, sizeof(cmd));283- cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;284- CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);285-286- spin_lock_irqsave(&iommu->lock, flags);287-288- if (!iommu->need_sync)289- goto out;290-291- iommu->need_sync = 0;292-293- ret = __iommu_queue_command(iommu, &cmd);294-295- if (ret)296- goto out;297298 while (!ready && (i < EXIT_LOOP_COUNT)) {299 ++i;···290291 if (unlikely(i == EXIT_LOOP_COUNT))292 panic("AMD IOMMU: Completion wait loop failed\n");000000000000000000000000000000000000000000293294out:295 spin_unlock_irqrestore(&iommu->lock, flags);···358 return ret;359}360000000000000000361/*362 * Generic command send function for invalidaing TLB entries363 */···382 struct iommu_cmd cmd;383 int ret;384385- memset(&cmd, 0, sizeof(cmd));386- address &= PAGE_MASK;387- CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES);388- cmd.data[1] |= domid;389- cmd.data[2] = lower_32_bits(address);390- cmd.data[3] = upper_32_bits(address);391- if (s) /* size bit - we flush more than one 4kb page */392- cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;393- if (pde) /* PDE bit - we wan't flush everything not only the PTEs */394- cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;395396 ret = iommu_queue_command(iommu, &cmd);397···421{422 u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;42300424 iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1);425}00000000000000000000000000426427/****************************************************************************428 *···466 * supporting all features of AMD IOMMU page tables like level skipping467 * and full 64 bit address spaces.468 */469-static int iommu_map(struct protection_domain *dom,470- unsigned long bus_addr,471- unsigned long phys_addr,472- int prot)473{474 u64 __pte, *pte, *page;475···515516 return 0;517}000000000000000000000000518519/*520 * This function checks if a specific unity mapping entry is needed for···592593 for (addr = e->address_start; addr < e->address_end;594 addr += PAGE_SIZE) {595- ret = iommu_map(&dma_dom->domain, addr, addr, e->prot);596 if (ret)597 return ret;598 /*···723 return id;724}725000000000000726/*727 * Used to reserve address ranges in the aperture (e.g. for exclusion728 * ranges.···751 iommu_area_reserve(dom->bitmap, start_page, pages);752}753754-static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)755{756 int i, j;757 u64 *p1, *p2, *p3;758759- p1 = dma_dom->domain.pt_root;760761 if (!p1)762 return;···777 }778779 free_page((unsigned long)p1);00780}781782/*···790 if (!dom)791 return;792793- dma_ops_free_pagetable(dom);794795 kfree(dom->pte_pages);796···829 goto free_dma_dom;830 dma_dom->domain.mode = PAGE_MODE_3_LEVEL;831 dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);0832 dma_dom->domain.priv = dma_dom;833 if (!dma_dom->domain.pt_root)834 goto free_dma_dom;···892}893894/*000000000895 * Find out the protection domain structure for a given PCI device. This896 * will give us the pointer to the page table root for example.897 */···920 * If a device is not yet associated with a domain, this function does921 * assigns it visible for the hardware922 */923-static void set_device_domain(struct amd_iommu *iommu,924- struct protection_domain *domain,925- u16 devid)926{927 unsigned long flags;928-929 u64 pte_root = virt_to_phys(domain->pt_root);00930931 pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)932 << DEV_ENTRY_MODE_SHIFT;···943944 iommu_queue_inv_dev_entry(iommu, devid);945}00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000946947/*****************************************************************************948 *···1089 list_for_each_entry(entry, &iommu_pd_list, list) {1090 if (entry->target_dev == devid) {1091 ret = entry;1092- list_del(&ret->list);1093 break;1094 }1095 }···1139 if (!dma_dom)1140 dma_dom = (*iommu)->default_dom;1141 *domain = &dma_dom->domain;1142- set_device_domain(*iommu, *domain, *bdf);1143 printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "1144- "device ", (*domain)->id);1145- print_devid(_bdf, 1);1146 }11471148 if (domain_for_device(_bdf) == NULL)1149- set_device_domain(*iommu, *domain, _bdf);11501151 return 1;1152}···1231 pages = iommu_num_pages(paddr, size, PAGE_SIZE);1232 paddr &= PAGE_MASK;1233000001234 if (align)1235 align_mask = (1UL << get_order(size)) - 1;1236···1251 start += PAGE_SIZE;1252 }1253 address += offset;0012541255 if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {1256 iommu_flush_tlb(iommu, dma_dom->domain.id);···1290 start += PAGE_SIZE;1291 }1292001293 dma_ops_free_addresses(dma_dom, dma_addr, pages);12941295 if (amd_iommu_unmap_flush || dma_dom->need_flush) {···1313 dma_addr_t addr;1314 u64 dma_mask;1315001316 if (!check_device(dev))1317 return bad_dma_address;1318···1325 if (iommu == NULL || domain == NULL)1326 /* device not handled by any AMD IOMMU */1327 return (dma_addr_t)paddr;00013281329 spin_lock_irqsave(&domain->lock, flags);1330 addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false,···1354 struct protection_domain *domain;1355 u16 devid;1356001357 if (!check_device(dev) ||1358 !get_device_resources(dev, &iommu, &domain, &devid))1359 /* device not handled by any AMD IOMMU */0001360 return;13611362 spin_lock_irqsave(&domain->lock, flags);···1408 int mapped_elems = 0;1409 u64 dma_mask;1410001411 if (!check_device(dev))1412 return 0;1413···14191420 if (!iommu || !domain)1421 return map_sg_no_iommu(dev, sglist, nelems, dir);00014221423 spin_lock_irqsave(&domain->lock, flags);1424···1472 u16 devid;1473 int i;1474001475 if (!check_device(dev) ||1476 !get_device_resources(dev, &iommu, &domain, &devid))0001477 return;14781479 spin_lock_irqsave(&domain->lock, flags);···1508 phys_addr_t paddr;1509 u64 dma_mask = dev->coherent_dma_mask;1510001511 if (!check_device(dev))1512 return NULL;1513···1528 return virt_addr;1529 }15300001531 if (!dma_mask)1532 dma_mask = *dev->dma_mask;1533···1539 *dma_addr = __map_single(dev, iommu, domain->priv, paddr,1540 size, DMA_BIDIRECTIONAL, true, dma_mask);15411542- if (*dma_addr == bad_dma_address) {1543- free_pages((unsigned long)virt_addr, get_order(size));1544- virt_addr = NULL;1545- goto out;1546- }15471548 iommu_completion_wait(iommu);15491550-out:1551 spin_unlock_irqrestore(&domain->lock, flags);15521553 return virt_addr;0000001554}15551556/*···1566 struct protection_domain *domain;1567 u16 devid;1568001569 if (!check_device(dev))1570 return;15711572 get_device_resources(dev, &iommu, &domain, &devid);15731574 if (!iommu || !domain)0001575 goto free_mem;15761577 spin_lock_irqsave(&domain->lock, flags);···1622 * we don't need to preallocate the protection domains anymore.1623 * For now we have to.1624 */1625-void prealloc_protection_domains(void)1626{1627 struct pci_dev *dev = NULL;1628 struct dma_ops_domain *dma_dom;···1631 u16 devid;16321633 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {1634- devid = (dev->bus->number << 8) | dev->devfn;1635 if (devid > amd_iommu_last_bdf)1636 continue;1637 devid = amd_iommu_alias_table[devid];···1678 iommu->default_dom = dma_ops_domain_alloc(iommu, order);1679 if (iommu->default_dom == NULL)1680 return -ENOMEM;01681 ret = iommu_init_unity_mappings(iommu);1682 if (ret)1683 goto free_domains;···1702 /* Make the driver finally visible to the drivers */1703 dma_ops = &amd_iommu_dma_ops;1704000000001705 return 0;17061707free_domains:···17211722 return ret;1723}00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
···20#include <linux/pci.h>21#include <linux/gfp.h>22#include <linux/bitops.h>23+#include <linux/debugfs.h>24#include <linux/scatterlist.h>25#include <linux/iommu-helper.h>26+#ifdef CONFIG_IOMMU_API27+#include <linux/iommu.h>28+#endif29#include <asm/proto.h>30#include <asm/iommu.h>31#include <asm/gart.h>···38static LIST_HEAD(iommu_pd_list);39static DEFINE_SPINLOCK(iommu_pd_list_lock);4041+#ifdef CONFIG_IOMMU_API42+static struct iommu_ops amd_iommu_ops;43+#endif44+45/*46 * general struct to manage commands send to an IOMMU47 */···4748static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,49 struct unity_map_entry *e);50+static struct dma_ops_domain *find_protection_domain(u16 devid);51+52+53+#ifdef CONFIG_AMD_IOMMU_STATS54+55+/*56+ * Initialization code for statistics collection57+ */58+59+DECLARE_STATS_COUNTER(compl_wait);60+DECLARE_STATS_COUNTER(cnt_map_single);61+DECLARE_STATS_COUNTER(cnt_unmap_single);62+DECLARE_STATS_COUNTER(cnt_map_sg);63+DECLARE_STATS_COUNTER(cnt_unmap_sg);64+DECLARE_STATS_COUNTER(cnt_alloc_coherent);65+DECLARE_STATS_COUNTER(cnt_free_coherent);66+DECLARE_STATS_COUNTER(cross_page);67+DECLARE_STATS_COUNTER(domain_flush_single);68+DECLARE_STATS_COUNTER(domain_flush_all);69+DECLARE_STATS_COUNTER(alloced_io_mem);70+DECLARE_STATS_COUNTER(total_map_requests);71+72+static struct dentry *stats_dir;73+static struct dentry *de_isolate;74+static struct dentry *de_fflush;75+76+static void amd_iommu_stats_add(struct __iommu_counter *cnt)77+{78+ if (stats_dir == NULL)79+ return;80+81+ cnt->dent = debugfs_create_u64(cnt->name, 0444, stats_dir,82+ &cnt->value);83+}84+85+static void amd_iommu_stats_init(void)86+{87+ stats_dir = debugfs_create_dir("amd-iommu", NULL);88+ if (stats_dir == NULL)89+ return;90+91+ de_isolate = debugfs_create_bool("isolation", 0444, stats_dir,92+ (u32 *)&amd_iommu_isolate);93+94+ de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir,95+ (u32 *)&amd_iommu_unmap_flush);96+97+ amd_iommu_stats_add(&compl_wait);98+ amd_iommu_stats_add(&cnt_map_single);99+ amd_iommu_stats_add(&cnt_unmap_single);100+ amd_iommu_stats_add(&cnt_map_sg);101+ amd_iommu_stats_add(&cnt_unmap_sg);102+ amd_iommu_stats_add(&cnt_alloc_coherent);103+ amd_iommu_stats_add(&cnt_free_coherent);104+ amd_iommu_stats_add(&cross_page);105+ amd_iommu_stats_add(&domain_flush_single);106+ amd_iommu_stats_add(&domain_flush_all);107+ amd_iommu_stats_add(&alloced_io_mem);108+ amd_iommu_stats_add(&total_map_requests);109+}110+111+#endif112113/* returns !0 if the IOMMU is caching non-present entries in its TLB */114static int iommu_has_npcache(struct amd_iommu *iommu)···189 spin_lock_irqsave(&iommu->lock, flags);190 ret = __iommu_queue_command(iommu, cmd);191 if (!ret)192+ iommu->need_sync = true;193 spin_unlock_irqrestore(&iommu->lock, flags);194195 return ret;196}197198/*199+ * This function waits until an IOMMU has completed a completion200+ * wait command000201 */202+static void __iommu_wait_for_completion(struct amd_iommu *iommu)203{204+ int ready = 0;205 unsigned status = 0;206+ unsigned long i = 0;0207208+ INC_STATS_COUNTER(compl_wait);00000000000000209210 while (!ready && (i < EXIT_LOOP_COUNT)) {211 ++i;···238239 if (unlikely(i == EXIT_LOOP_COUNT))240 panic("AMD IOMMU: Completion wait loop failed\n");241+}242+243+/*244+ * This function queues a completion wait command into the command245+ * buffer of an IOMMU246+ */247+static int __iommu_completion_wait(struct amd_iommu *iommu)248+{249+ struct iommu_cmd cmd;250+251+ memset(&cmd, 0, sizeof(cmd));252+ cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;253+ CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);254+255+ return __iommu_queue_command(iommu, &cmd);256+}257+258+/*259+ * This function is called whenever we need to ensure that the IOMMU has260+ * completed execution of all commands we sent. It sends a261+ * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs262+ * us about that by writing a value to a physical address we pass with263+ * the command.264+ */265+static int iommu_completion_wait(struct amd_iommu *iommu)266+{267+ int ret = 0;268+ unsigned long flags;269+270+ spin_lock_irqsave(&iommu->lock, flags);271+272+ if (!iommu->need_sync)273+ goto out;274+275+ ret = __iommu_completion_wait(iommu);276+277+ iommu->need_sync = false;278+279+ if (ret)280+ goto out;281+282+ __iommu_wait_for_completion(iommu);283284out:285 spin_unlock_irqrestore(&iommu->lock, flags);···264 return ret;265}266267+static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,268+ u16 domid, int pde, int s)269+{270+ memset(cmd, 0, sizeof(*cmd));271+ address &= PAGE_MASK;272+ CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);273+ cmd->data[1] |= domid;274+ cmd->data[2] = lower_32_bits(address);275+ cmd->data[3] = upper_32_bits(address);276+ if (s) /* size bit - we flush more than one 4kb page */277+ cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;278+ if (pde) /* PDE bit - we wan't flush everything not only the PTEs */279+ cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;280+}281+282/*283 * Generic command send function for invalidaing TLB entries284 */···273 struct iommu_cmd cmd;274 int ret;275276+ __iommu_build_inv_iommu_pages(&cmd, address, domid, pde, s);000000000277278 ret = iommu_queue_command(iommu, &cmd);279···321{322 u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;323324+ INC_STATS_COUNTER(domain_flush_single);325+326 iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1);327}328+329+#ifdef CONFIG_IOMMU_API330+/*331+ * This function is used to flush the IO/TLB for a given protection domain332+ * on every IOMMU in the system333+ */334+static void iommu_flush_domain(u16 domid)335+{336+ unsigned long flags;337+ struct amd_iommu *iommu;338+ struct iommu_cmd cmd;339+340+ INC_STATS_COUNTER(domain_flush_all);341+342+ __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,343+ domid, 1, 1);344+345+ list_for_each_entry(iommu, &amd_iommu_list, list) {346+ spin_lock_irqsave(&iommu->lock, flags);347+ __iommu_queue_command(iommu, &cmd);348+ __iommu_completion_wait(iommu);349+ __iommu_wait_for_completion(iommu);350+ spin_unlock_irqrestore(&iommu->lock, flags);351+ }352+}353+#endif354355/****************************************************************************356 *···338 * supporting all features of AMD IOMMU page tables like level skipping339 * and full 64 bit address spaces.340 */341+static int iommu_map_page(struct protection_domain *dom,342+ unsigned long bus_addr,343+ unsigned long phys_addr,344+ int prot)345{346 u64 __pte, *pte, *page;347···387388 return 0;389}390+391+#ifdef CONFIG_IOMMU_API392+static void iommu_unmap_page(struct protection_domain *dom,393+ unsigned long bus_addr)394+{395+ u64 *pte;396+397+ pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)];398+399+ if (!IOMMU_PTE_PRESENT(*pte))400+ return;401+402+ pte = IOMMU_PTE_PAGE(*pte);403+ pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];404+405+ if (!IOMMU_PTE_PRESENT(*pte))406+ return;407+408+ pte = IOMMU_PTE_PAGE(*pte);409+ pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];410+411+ *pte = 0;412+}413+#endif414415/*416 * This function checks if a specific unity mapping entry is needed for···440441 for (addr = e->address_start; addr < e->address_end;442 addr += PAGE_SIZE) {443+ ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot);444 if (ret)445 return ret;446 /*···571 return id;572}573574+#ifdef CONFIG_IOMMU_API575+static void domain_id_free(int id)576+{577+ unsigned long flags;578+579+ write_lock_irqsave(&amd_iommu_devtable_lock, flags);580+ if (id > 0 && id < MAX_DOMAIN_ID)581+ __clear_bit(id, amd_iommu_pd_alloc_bitmap);582+ write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);583+}584+#endif585+586/*587 * Used to reserve address ranges in the aperture (e.g. for exclusion588 * ranges.···587 iommu_area_reserve(dom->bitmap, start_page, pages);588}589590+static void free_pagetable(struct protection_domain *domain)591{592 int i, j;593 u64 *p1, *p2, *p3;594595+ p1 = domain->pt_root;596597 if (!p1)598 return;···613 }614615 free_page((unsigned long)p1);616+617+ domain->pt_root = NULL;618}619620/*···624 if (!dom)625 return;626627+ free_pagetable(&dom->domain);628629 kfree(dom->pte_pages);630···663 goto free_dma_dom;664 dma_dom->domain.mode = PAGE_MODE_3_LEVEL;665 dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);666+ dma_dom->domain.flags = PD_DMA_OPS_MASK;667 dma_dom->domain.priv = dma_dom;668 if (!dma_dom->domain.pt_root)669 goto free_dma_dom;···725}726727/*728+ * little helper function to check whether a given protection domain is a729+ * dma_ops domain730+ */731+static bool dma_ops_domain(struct protection_domain *domain)732+{733+ return domain->flags & PD_DMA_OPS_MASK;734+}735+736+/*737 * Find out the protection domain structure for a given PCI device. This738 * will give us the pointer to the page table root for example.739 */···744 * If a device is not yet associated with a domain, this function does745 * assigns it visible for the hardware746 */747+static void attach_device(struct amd_iommu *iommu,748+ struct protection_domain *domain,749+ u16 devid)750{751 unsigned long flags;0752 u64 pte_root = virt_to_phys(domain->pt_root);753+754+ domain->dev_cnt += 1;755756 pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)757 << DEV_ENTRY_MODE_SHIFT;···766767 iommu_queue_inv_dev_entry(iommu, devid);768}769+770+/*771+ * Removes a device from a protection domain (unlocked)772+ */773+static void __detach_device(struct protection_domain *domain, u16 devid)774+{775+776+ /* lock domain */777+ spin_lock(&domain->lock);778+779+ /* remove domain from the lookup table */780+ amd_iommu_pd_table[devid] = NULL;781+782+ /* remove entry from the device table seen by the hardware */783+ amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;784+ amd_iommu_dev_table[devid].data[1] = 0;785+ amd_iommu_dev_table[devid].data[2] = 0;786+787+ /* decrease reference counter */788+ domain->dev_cnt -= 1;789+790+ /* ready */791+ spin_unlock(&domain->lock);792+}793+794+/*795+ * Removes a device from a protection domain (with devtable_lock held)796+ */797+static void detach_device(struct protection_domain *domain, u16 devid)798+{799+ unsigned long flags;800+801+ /* lock device table */802+ write_lock_irqsave(&amd_iommu_devtable_lock, flags);803+ __detach_device(domain, devid);804+ write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);805+}806+807+static int device_change_notifier(struct notifier_block *nb,808+ unsigned long action, void *data)809+{810+ struct device *dev = data;811+ struct pci_dev *pdev = to_pci_dev(dev);812+ u16 devid = calc_devid(pdev->bus->number, pdev->devfn);813+ struct protection_domain *domain;814+ struct dma_ops_domain *dma_domain;815+ struct amd_iommu *iommu;816+ int order = amd_iommu_aperture_order;817+ unsigned long flags;818+819+ if (devid > amd_iommu_last_bdf)820+ goto out;821+822+ devid = amd_iommu_alias_table[devid];823+824+ iommu = amd_iommu_rlookup_table[devid];825+ if (iommu == NULL)826+ goto out;827+828+ domain = domain_for_device(devid);829+830+ if (domain && !dma_ops_domain(domain))831+ WARN_ONCE(1, "AMD IOMMU WARNING: device %s already bound "832+ "to a non-dma-ops domain\n", dev_name(dev));833+834+ switch (action) {835+ case BUS_NOTIFY_BOUND_DRIVER:836+ if (domain)837+ goto out;838+ dma_domain = find_protection_domain(devid);839+ if (!dma_domain)840+ dma_domain = iommu->default_dom;841+ attach_device(iommu, &dma_domain->domain, devid);842+ printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "843+ "device %s\n", dma_domain->domain.id, dev_name(dev));844+ break;845+ case BUS_NOTIFY_UNBIND_DRIVER:846+ if (!domain)847+ goto out;848+ detach_device(domain, devid);849+ break;850+ case BUS_NOTIFY_ADD_DEVICE:851+ /* allocate a protection domain if a device is added */852+ dma_domain = find_protection_domain(devid);853+ if (dma_domain)854+ goto out;855+ dma_domain = dma_ops_domain_alloc(iommu, order);856+ if (!dma_domain)857+ goto out;858+ dma_domain->target_dev = devid;859+860+ spin_lock_irqsave(&iommu_pd_list_lock, flags);861+ list_add_tail(&dma_domain->list, &iommu_pd_list);862+ spin_unlock_irqrestore(&iommu_pd_list_lock, flags);863+864+ break;865+ default:866+ goto out;867+ }868+869+ iommu_queue_inv_dev_entry(iommu, devid);870+ iommu_completion_wait(iommu);871+872+out:873+ return 0;874+}875+876+struct notifier_block device_nb = {877+ .notifier_call = device_change_notifier,878+};879880/*****************************************************************************881 *···802 list_for_each_entry(entry, &iommu_pd_list, list) {803 if (entry->target_dev == devid) {804 ret = entry;0805 break;806 }807 }···853 if (!dma_dom)854 dma_dom = (*iommu)->default_dom;855 *domain = &dma_dom->domain;856+ attach_device(*iommu, *domain, *bdf);857 printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "858+ "device %s\n", (*domain)->id, dev_name(dev));0859 }860861 if (domain_for_device(_bdf) == NULL)862+ attach_device(*iommu, *domain, _bdf);863864 return 1;865}···946 pages = iommu_num_pages(paddr, size, PAGE_SIZE);947 paddr &= PAGE_MASK;948949+ INC_STATS_COUNTER(total_map_requests);950+951+ if (pages > 1)952+ INC_STATS_COUNTER(cross_page);953+954 if (align)955 align_mask = (1UL << get_order(size)) - 1;956···961 start += PAGE_SIZE;962 }963 address += offset;964+965+ ADD_STATS_COUNTER(alloced_io_mem, size);966967 if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {968 iommu_flush_tlb(iommu, dma_dom->domain.id);···998 start += PAGE_SIZE;999 }10001001+ SUB_STATS_COUNTER(alloced_io_mem, size);1002+1003 dma_ops_free_addresses(dma_dom, dma_addr, pages);10041005 if (amd_iommu_unmap_flush || dma_dom->need_flush) {···1019 dma_addr_t addr;1020 u64 dma_mask;10211022+ INC_STATS_COUNTER(cnt_map_single);1023+1024 if (!check_device(dev))1025 return bad_dma_address;1026···1029 if (iommu == NULL || domain == NULL)1030 /* device not handled by any AMD IOMMU */1031 return (dma_addr_t)paddr;1032+1033+ if (!dma_ops_domain(domain))1034+ return bad_dma_address;10351036 spin_lock_irqsave(&domain->lock, flags);1037 addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false,···1055 struct protection_domain *domain;1056 u16 devid;10571058+ INC_STATS_COUNTER(cnt_unmap_single);1059+1060 if (!check_device(dev) ||1061 !get_device_resources(dev, &iommu, &domain, &devid))1062 /* device not handled by any AMD IOMMU */1063+ return;1064+1065+ if (!dma_ops_domain(domain))1066 return;10671068 spin_lock_irqsave(&domain->lock, flags);···1104 int mapped_elems = 0;1105 u64 dma_mask;11061107+ INC_STATS_COUNTER(cnt_map_sg);1108+1109 if (!check_device(dev))1110 return 0;1111···11131114 if (!iommu || !domain)1115 return map_sg_no_iommu(dev, sglist, nelems, dir);1116+1117+ if (!dma_ops_domain(domain))1118+ return 0;11191120 spin_lock_irqsave(&domain->lock, flags);1121···1163 u16 devid;1164 int i;11651166+ INC_STATS_COUNTER(cnt_unmap_sg);1167+1168 if (!check_device(dev) ||1169 !get_device_resources(dev, &iommu, &domain, &devid))1170+ return;1171+1172+ if (!dma_ops_domain(domain))1173 return;11741175 spin_lock_irqsave(&domain->lock, flags);···1194 phys_addr_t paddr;1195 u64 dma_mask = dev->coherent_dma_mask;11961197+ INC_STATS_COUNTER(cnt_alloc_coherent);1198+1199 if (!check_device(dev))1200 return NULL;1201···1212 return virt_addr;1213 }12141215+ if (!dma_ops_domain(domain))1216+ goto out_free;1217+1218 if (!dma_mask)1219 dma_mask = *dev->dma_mask;1220···1220 *dma_addr = __map_single(dev, iommu, domain->priv, paddr,1221 size, DMA_BIDIRECTIONAL, true, dma_mask);12221223+ if (*dma_addr == bad_dma_address)1224+ goto out_free;00012251226 iommu_completion_wait(iommu);122701228 spin_unlock_irqrestore(&domain->lock, flags);12291230 return virt_addr;1231+1232+out_free:1233+1234+ free_pages((unsigned long)virt_addr, get_order(size));1235+1236+ return NULL;1237}12381239/*···1245 struct protection_domain *domain;1246 u16 devid;12471248+ INC_STATS_COUNTER(cnt_free_coherent);1249+1250 if (!check_device(dev))1251 return;12521253 get_device_resources(dev, &iommu, &domain, &devid);12541255 if (!iommu || !domain)1256+ goto free_mem;1257+1258+ if (!dma_ops_domain(domain))1259 goto free_mem;12601261 spin_lock_irqsave(&domain->lock, flags);···1296 * we don't need to preallocate the protection domains anymore.1297 * For now we have to.1298 */1299+static void prealloc_protection_domains(void)1300{1301 struct pci_dev *dev = NULL;1302 struct dma_ops_domain *dma_dom;···1305 u16 devid;13061307 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {1308+ devid = calc_devid(dev->bus->number, dev->devfn);1309 if (devid > amd_iommu_last_bdf)1310 continue;1311 devid = amd_iommu_alias_table[devid];···1352 iommu->default_dom = dma_ops_domain_alloc(iommu, order);1353 if (iommu->default_dom == NULL)1354 return -ENOMEM;1355+ iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;1356 ret = iommu_init_unity_mappings(iommu);1357 if (ret)1358 goto free_domains;···1375 /* Make the driver finally visible to the drivers */1376 dma_ops = &amd_iommu_dma_ops;13771378+#ifdef CONFIG_IOMMU_API1379+ register_iommu(&amd_iommu_ops);1380+#endif1381+1382+ bus_register_notifier(&pci_bus_type, &device_nb);1383+1384+ amd_iommu_stats_init();1385+1386 return 0;13871388free_domains:···13861387 return ret;1388}1389+1390+/*****************************************************************************1391+ *1392+ * The following functions belong to the exported interface of AMD IOMMU1393+ *1394+ * This interface allows access to lower level functions of the IOMMU1395+ * like protection domain handling and assignement of devices to domains1396+ * which is not possible with the dma_ops interface.1397+ *1398+ *****************************************************************************/1399+1400+#ifdef CONFIG_IOMMU_API1401+1402+static void cleanup_domain(struct protection_domain *domain)1403+{1404+ unsigned long flags;1405+ u16 devid;1406+1407+ write_lock_irqsave(&amd_iommu_devtable_lock, flags);1408+1409+ for (devid = 0; devid <= amd_iommu_last_bdf; ++devid)1410+ if (amd_iommu_pd_table[devid] == domain)1411+ __detach_device(domain, devid);1412+1413+ write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);1414+}1415+1416+static int amd_iommu_domain_init(struct iommu_domain *dom)1417+{1418+ struct protection_domain *domain;1419+1420+ domain = kzalloc(sizeof(*domain), GFP_KERNEL);1421+ if (!domain)1422+ return -ENOMEM;1423+1424+ spin_lock_init(&domain->lock);1425+ domain->mode = PAGE_MODE_3_LEVEL;1426+ domain->id = domain_id_alloc();1427+ if (!domain->id)1428+ goto out_free;1429+ domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);1430+ if (!domain->pt_root)1431+ goto out_free;1432+1433+ dom->priv = domain;1434+1435+ return 0;1436+1437+out_free:1438+ kfree(domain);1439+1440+ return -ENOMEM;1441+}1442+1443+static void amd_iommu_domain_destroy(struct iommu_domain *dom)1444+{1445+ struct protection_domain *domain = dom->priv;1446+1447+ if (!domain)1448+ return;1449+1450+ if (domain->dev_cnt > 0)1451+ cleanup_domain(domain);1452+1453+ BUG_ON(domain->dev_cnt != 0);1454+1455+ free_pagetable(domain);1456+1457+ domain_id_free(domain->id);1458+1459+ kfree(domain);1460+1461+ dom->priv = NULL;1462+}1463+1464+static void amd_iommu_detach_device(struct iommu_domain *dom,1465+ struct device *dev)1466+{1467+ struct protection_domain *domain = dom->priv;1468+ struct amd_iommu *iommu;1469+ struct pci_dev *pdev;1470+ u16 devid;1471+1472+ if (dev->bus != &pci_bus_type)1473+ return;1474+1475+ pdev = to_pci_dev(dev);1476+1477+ devid = calc_devid(pdev->bus->number, pdev->devfn);1478+1479+ if (devid > 0)1480+ detach_device(domain, devid);1481+1482+ iommu = amd_iommu_rlookup_table[devid];1483+ if (!iommu)1484+ return;1485+1486+ iommu_queue_inv_dev_entry(iommu, devid);1487+ iommu_completion_wait(iommu);1488+}1489+1490+static int amd_iommu_attach_device(struct iommu_domain *dom,1491+ struct device *dev)1492+{1493+ struct protection_domain *domain = dom->priv;1494+ struct protection_domain *old_domain;1495+ struct amd_iommu *iommu;1496+ struct pci_dev *pdev;1497+ u16 devid;1498+1499+ if (dev->bus != &pci_bus_type)1500+ return -EINVAL;1501+1502+ pdev = to_pci_dev(dev);1503+1504+ devid = calc_devid(pdev->bus->number, pdev->devfn);1505+1506+ if (devid >= amd_iommu_last_bdf ||1507+ devid != amd_iommu_alias_table[devid])1508+ return -EINVAL;1509+1510+ iommu = amd_iommu_rlookup_table[devid];1511+ if (!iommu)1512+ return -EINVAL;1513+1514+ old_domain = domain_for_device(devid);1515+ if (old_domain)1516+ return -EBUSY;1517+1518+ attach_device(iommu, domain, devid);1519+1520+ iommu_completion_wait(iommu);1521+1522+ return 0;1523+}1524+1525+static int amd_iommu_map_range(struct iommu_domain *dom,1526+ unsigned long iova, phys_addr_t paddr,1527+ size_t size, int iommu_prot)1528+{1529+ struct protection_domain *domain = dom->priv;1530+ unsigned long i, npages = iommu_num_pages(paddr, size, PAGE_SIZE);1531+ int prot = 0;1532+ int ret;1533+1534+ if (iommu_prot & IOMMU_READ)1535+ prot |= IOMMU_PROT_IR;1536+ if (iommu_prot & IOMMU_WRITE)1537+ prot |= IOMMU_PROT_IW;1538+1539+ iova &= PAGE_MASK;1540+ paddr &= PAGE_MASK;1541+1542+ for (i = 0; i < npages; ++i) {1543+ ret = iommu_map_page(domain, iova, paddr, prot);1544+ if (ret)1545+ return ret;1546+1547+ iova += PAGE_SIZE;1548+ paddr += PAGE_SIZE;1549+ }1550+1551+ return 0;1552+}1553+1554+static void amd_iommu_unmap_range(struct iommu_domain *dom,1555+ unsigned long iova, size_t size)1556+{1557+1558+ struct protection_domain *domain = dom->priv;1559+ unsigned long i, npages = iommu_num_pages(iova, size, PAGE_SIZE);1560+1561+ iova &= PAGE_MASK;1562+1563+ for (i = 0; i < npages; ++i) {1564+ iommu_unmap_page(domain, iova);1565+ iova += PAGE_SIZE;1566+ }1567+1568+ iommu_flush_domain(domain->id);1569+}1570+1571+static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,1572+ unsigned long iova)1573+{1574+ struct protection_domain *domain = dom->priv;1575+ unsigned long offset = iova & ~PAGE_MASK;1576+ phys_addr_t paddr;1577+ u64 *pte;1578+1579+ pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(iova)];1580+1581+ if (!IOMMU_PTE_PRESENT(*pte))1582+ return 0;1583+1584+ pte = IOMMU_PTE_PAGE(*pte);1585+ pte = &pte[IOMMU_PTE_L1_INDEX(iova)];1586+1587+ if (!IOMMU_PTE_PRESENT(*pte))1588+ return 0;1589+1590+ pte = IOMMU_PTE_PAGE(*pte);1591+ pte = &pte[IOMMU_PTE_L0_INDEX(iova)];1592+1593+ if (!IOMMU_PTE_PRESENT(*pte))1594+ return 0;1595+1596+ paddr = *pte & IOMMU_PAGE_MASK;1597+ paddr |= offset;1598+1599+ return paddr;1600+}1601+1602+static struct iommu_ops amd_iommu_ops = {1603+ .domain_init = amd_iommu_domain_init,1604+ .domain_destroy = amd_iommu_domain_destroy,1605+ .attach_dev = amd_iommu_attach_device,1606+ .detach_dev = amd_iommu_detach_device,1607+ .map = amd_iommu_map_range,1608+ .unmap = amd_iommu_unmap_range,1609+ .iova_to_phys = amd_iommu_iova_to_phys,1610+};1611+1612+#endif
+6-9
arch/x86/kernel/amd_iommu_init.c
···122LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings123 we find in ACPI */124unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */125-int amd_iommu_isolate = 1; /* if 1, device isolation is enabled */0126bool amd_iommu_unmap_flush; /* if true, flush on every unmap */127128LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the···246/* Function to enable the hardware */247void __init iommu_enable(struct amd_iommu *iommu)248{249- printk(KERN_INFO "AMD IOMMU: Enabling IOMMU "250- "at %02x:%02x.%x cap 0x%hx\n",251- iommu->dev->bus->number,252- PCI_SLOT(iommu->dev->devfn),253- PCI_FUNC(iommu->dev->devfn),254- iommu->cap_ptr);255256 iommu_feature_enable(iommu, CONTROL_IOMMU_EN);257}···1215{1216 for (; *str; ++str) {1217 if (strncmp(str, "isolate", 7) == 0)1218- amd_iommu_isolate = 1;1219 if (strncmp(str, "share", 5) == 0)1220- amd_iommu_isolate = 0;1221 if (strncmp(str, "fullflush", 9) == 0)1222 amd_iommu_unmap_flush = true;1223 }
···122LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings123 we find in ACPI */124unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */125+bool amd_iommu_isolate = true; /* if true, device isolation is126+ enabled */127bool amd_iommu_unmap_flush; /* if true, flush on every unmap */128129LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the···245/* Function to enable the hardware */246void __init iommu_enable(struct amd_iommu *iommu)247{248+ printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n",249+ dev_name(&iommu->dev->dev), iommu->cap_ptr);0000250251 iommu_feature_enable(iommu, CONTROL_IOMMU_EN);252}···1218{1219 for (; *str; ++str) {1220 if (strncmp(str, "isolate", 7) == 0)1221+ amd_iommu_isolate = true;1222 if (strncmp(str, "share", 5) == 0)1223+ amd_iommu_isolate = false;1224 if (strncmp(str, "fullflush", 9) == 0)1225 amd_iommu_unmap_flush = true;1226 }