Merge tag 'iommu-fixes-v6.0-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu

Pull iommu fixes from Joerg Roedel:

- Intel VT-d fixes from Lu Baolu:
- Boot kdump kernels with VT-d scalable mode on
- Calculate the right page table levels
- Fix two recursive locking issues
- Fix a lockdep splat issue

- AMD IOMMU fixes:
- Fix for completion-wait command to use full 64 bits of data
- Fix PASID related issue where GPU sound devices failed to
initialize

- Fix for Virtio-IOMMU to report correct caching behavior, needed for
use with VFIO

* tag 'iommu-fixes-v6.0-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu:
iommu: Fix false ownership failure on AMD systems with PASID activated
iommu/vt-d: Fix possible recursive locking in intel_iommu_init()
iommu/virtio: Fix interaction with VFIO
iommu/vt-d: Fix lockdep splat due to klist iteration in atomic context
iommu/vt-d: Fix recursive lock issue in iommu_flush_dev_iotlb()
iommu/vt-d: Correctly calculate sagaw value of IOMMU
iommu/vt-d: Fix kdump kernels boot failure with scalable mode
iommu/amd: use full 64-bit value in build_completion_wait()

+163 -135
+2 -1
drivers/iommu/amd/iommu.c
··· 939 memset(cmd, 0, sizeof(*cmd)); 940 cmd->data[0] = lower_32_bits(paddr) | CMD_COMPL_WAIT_STORE_MASK; 941 cmd->data[1] = upper_32_bits(paddr); 942 - cmd->data[2] = data; 943 CMD_SET_TYPE(cmd, CMD_COMPL_WAIT); 944 } 945
··· 939 memset(cmd, 0, sizeof(*cmd)); 940 cmd->data[0] = lower_32_bits(paddr) | CMD_COMPL_WAIT_STORE_MASK; 941 cmd->data[1] = upper_32_bits(paddr); 942 + cmd->data[2] = lower_32_bits(data); 943 + cmd->data[3] = upper_32_bits(data); 944 CMD_SET_TYPE(cmd, CMD_COMPL_WAIT); 945 } 946
+2
drivers/iommu/amd/iommu_v2.c
··· 777 if (dev_state->domain == NULL) 778 goto out_free_states; 779 780 amd_iommu_domain_direct_map(dev_state->domain); 781 782 ret = amd_iommu_domain_enable_v2(dev_state->domain, pasids);
··· 777 if (dev_state->domain == NULL) 778 goto out_free_states; 779 780 + /* See iommu_is_default_domain() */ 781 + dev_state->domain->type = IOMMU_DOMAIN_IDENTITY; 782 amd_iommu_domain_direct_map(dev_state->domain); 783 784 ret = amd_iommu_domain_enable_v2(dev_state->domain, pasids);
+7
drivers/iommu/intel/dmar.c
··· 2349 if (!dmar_in_use()) 2350 return 0; 2351 2352 if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) { 2353 tmp = handle; 2354 } else {
··· 2349 if (!dmar_in_use()) 2350 return 0; 2351 2352 + /* 2353 + * It's unlikely that any I/O board is hot added before the IOMMU 2354 + * subsystem is initialized. 2355 + */ 2356 + if (IS_ENABLED(CONFIG_INTEL_IOMMU) && !intel_iommu_enabled) 2357 + return -EOPNOTSUPP; 2358 + 2359 if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) { 2360 tmp = handle; 2361 } else {
+113 -128
drivers/iommu/intel/iommu.c
··· 163 return re->hi & VTD_PAGE_MASK; 164 } 165 166 - static inline void context_clear_pasid_enable(struct context_entry *context) 167 - { 168 - context->lo &= ~(1ULL << 11); 169 - } 170 - 171 - static inline bool context_pasid_enabled(struct context_entry *context) 172 - { 173 - return !!(context->lo & (1ULL << 11)); 174 - } 175 - 176 - static inline void context_set_copied(struct context_entry *context) 177 - { 178 - context->hi |= (1ull << 3); 179 - } 180 - 181 - static inline bool context_copied(struct context_entry *context) 182 - { 183 - return !!(context->hi & (1ULL << 3)); 184 - } 185 - 186 - static inline bool __context_present(struct context_entry *context) 187 - { 188 - return (context->lo & 1); 189 - } 190 - 191 - bool context_present(struct context_entry *context) 192 - { 193 - return context_pasid_enabled(context) ? 194 - __context_present(context) : 195 - __context_present(context) && !context_copied(context); 196 - } 197 - 198 static inline void context_set_present(struct context_entry *context) 199 { 200 context->lo |= 1; ··· 208 { 209 context->lo = 0; 210 context->hi = 0; 211 } 212 213 /* ··· 390 return !(addr_width < BITS_PER_LONG && pfn >> addr_width); 391 } 392 393 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw) 394 { 395 unsigned long sagaw; 396 int agaw; 397 398 - sagaw = cap_sagaw(iommu->cap); 399 - for (agaw = width_to_agaw(max_gaw); 400 - agaw >= 0; agaw--) { 401 if (test_bit(agaw, &sagaw)) 402 break; 403 } ··· 515 { 516 struct device_domain_info *info; 517 int nid = NUMA_NO_NODE; 518 519 - spin_lock(&domain->lock); 520 list_for_each_entry(info, &domain->devices, link) { 521 /* 522 * There could possibly be multiple device numa nodes as devices ··· 529 if (nid != NUMA_NO_NODE) 530 break; 531 } 532 - spin_unlock(&domain->lock); 533 534 return nid; 535 } ··· 588 struct root_entry *root = &iommu->root_entry[bus]; 589 struct context_entry *context; 590 u64 *entry; 591 592 entry = &root->lo; 593 if (sm_supported(iommu)) { ··· 813 } 814 815 #ifdef CONFIG_DMAR_DEBUG 816 - static void pgtable_walk(struct intel_iommu *iommu, unsigned long pfn, u8 bus, u8 devfn) 817 { 818 - struct device_domain_info *info; 819 - struct dma_pte *parent, *pte; 820 - struct dmar_domain *domain; 821 - struct pci_dev *pdev; 822 - int offset, level; 823 - 824 - pdev = pci_get_domain_bus_and_slot(iommu->segment, bus, devfn); 825 - if (!pdev) 826 - return; 827 - 828 - info = dev_iommu_priv_get(&pdev->dev); 829 - if (!info || !info->domain) { 830 - pr_info("device [%02x:%02x.%d] not probed\n", 831 - bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); 832 - return; 833 - } 834 - 835 - domain = info->domain; 836 - level = agaw_to_level(domain->agaw); 837 - parent = domain->pgd; 838 - if (!parent) { 839 - pr_info("no page table setup\n"); 840 - return; 841 - } 842 843 while (1) { 844 offset = pfn_level_offset(pfn, level); ··· 844 struct pasid_entry *entries, *pte; 845 struct context_entry *ctx_entry; 846 struct root_entry *rt_entry; 847 u8 devfn = source_id & 0xff; 848 u8 bus = source_id >> 8; 849 - int i, dir_index, index; 850 851 pr_info("Dump %s table entries for IOVA 0x%llx\n", iommu->name, addr); 852 ··· 875 ctx_entry->hi, ctx_entry->lo); 876 877 /* legacy mode does not require PASID entries */ 878 - if (!sm_supported(iommu)) 879 goto pgtable_walk; 880 881 /* get the pointer to pasid directory entry */ 882 dir = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK); ··· 906 for (i = 0; i < ARRAY_SIZE(pte->val); i++) 907 pr_info("pasid table entry[%d]: 0x%016llx\n", i, pte->val[i]); 908 909 pgtable_walk: 910 - pgtable_walk(iommu, addr >> VTD_PAGE_SHIFT, bus, devfn); 911 } 912 #endif 913 ··· 1354 u8 bus, u8 devfn) 1355 { 1356 struct device_domain_info *info; 1357 1358 if (!iommu->qi) 1359 return NULL; 1360 1361 - spin_lock(&domain->lock); 1362 list_for_each_entry(info, &domain->devices, link) { 1363 if (info->iommu == iommu && info->bus == bus && 1364 info->devfn == devfn) { 1365 - spin_unlock(&domain->lock); 1366 return info->ats_supported ? info : NULL; 1367 } 1368 } 1369 - spin_unlock(&domain->lock); 1370 1371 return NULL; 1372 } ··· 1376 { 1377 struct device_domain_info *info; 1378 bool has_iotlb_device = false; 1379 1380 - spin_lock(&domain->lock); 1381 list_for_each_entry(info, &domain->devices, link) { 1382 if (info->ats_enabled) { 1383 has_iotlb_device = true; ··· 1386 } 1387 } 1388 domain->has_iotlb_device = has_iotlb_device; 1389 - spin_unlock(&domain->lock); 1390 } 1391 1392 static void iommu_enable_dev_iotlb(struct device_domain_info *info) ··· 1478 u64 addr, unsigned mask) 1479 { 1480 struct device_domain_info *info; 1481 1482 if (!domain->has_iotlb_device) 1483 return; 1484 1485 - spin_lock(&domain->lock); 1486 list_for_each_entry(info, &domain->devices, link) 1487 __iommu_flush_dev_iotlb(info, addr, mask); 1488 - spin_unlock(&domain->lock); 1489 } 1490 1491 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, ··· 1698 if (iommu->domain_ids) { 1699 bitmap_free(iommu->domain_ids); 1700 iommu->domain_ids = NULL; 1701 } 1702 1703 /* free context mapping */ ··· 1930 goto out_unlock; 1931 1932 ret = 0; 1933 - if (context_present(context)) 1934 goto out_unlock; 1935 1936 /* ··· 1942 * in-flight DMA will exist, and we don't need to worry anymore 1943 * hereafter. 1944 */ 1945 - if (context_copied(context)) { 1946 u16 did_old = context_domain_id(context); 1947 1948 if (did_old < cap_ndoms(iommu->cap)) { ··· 1953 iommu->flush.flush_iotlb(iommu, did_old, 0, 0, 1954 DMA_TLB_DSI_FLUSH); 1955 } 1956 } 1957 1958 context_clear_entry(context); ··· 2448 { 2449 struct device_domain_info *info = dev_iommu_priv_get(dev); 2450 struct intel_iommu *iommu; 2451 u8 bus, devfn; 2452 int ret; 2453 ··· 2460 if (ret) 2461 return ret; 2462 info->domain = domain; 2463 - spin_lock(&domain->lock); 2464 list_add(&info->link, &domain->devices); 2465 - spin_unlock(&domain->lock); 2466 2467 /* PASID table is mandatory for a PCI device in scalable mode. */ 2468 if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { ··· 2704 /* Now copy the context entry */ 2705 memcpy(&ce, old_ce + idx, sizeof(ce)); 2706 2707 - if (!__context_present(&ce)) 2708 continue; 2709 2710 did = context_domain_id(&ce); 2711 if (did >= 0 && did < cap_ndoms(iommu->cap)) 2712 set_bit(did, iommu->domain_ids); 2713 2714 - /* 2715 - * We need a marker for copied context entries. This 2716 - * marker needs to work for the old format as well as 2717 - * for extended context entries. 2718 - * 2719 - * Bit 67 of the context entry is used. In the old 2720 - * format this bit is available to software, in the 2721 - * extended format it is the PGE bit, but PGE is ignored 2722 - * by HW if PASIDs are disabled (and thus still 2723 - * available). 2724 - * 2725 - * So disable PASIDs first and then mark the entry 2726 - * copied. This means that we don't copy PASID 2727 - * translations from the old kernel, but this is fine as 2728 - * faults there are not fatal. 2729 - */ 2730 - context_clear_pasid_enable(&ce); 2731 - context_set_copied(&ce); 2732 - 2733 new_ce[idx] = ce; 2734 } 2735 ··· 2737 bool new_ext, ext; 2738 2739 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG); 2740 - ext = !!(rtaddr_reg & DMA_RTADDR_RTT); 2741 - new_ext = !!ecap_ecs(iommu->ecap); 2742 2743 /* 2744 * The RTT bit can only be changed when translation is disabled, ··· 2748 */ 2749 if (new_ext != ext) 2750 return -EINVAL; 2751 2752 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK; 2753 if (!old_rt_phys) ··· 3019 3020 #ifdef CONFIG_INTEL_IOMMU_SVM 3021 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) { 3022 - /* 3023 - * Call dmar_alloc_hwirq() with dmar_global_lock held, 3024 - * could cause possible lock race condition. 3025 - */ 3026 - up_write(&dmar_global_lock); 3027 ret = intel_svm_enable_prq(iommu); 3028 - down_write(&dmar_global_lock); 3029 if (ret) 3030 goto free_iommu; 3031 } ··· 3932 force_on = (!intel_iommu_tboot_noforce && tboot_force_iommu()) || 3933 platform_optin_force_iommu(); 3934 3935 - down_write(&dmar_global_lock); 3936 if (dmar_table_init()) { 3937 if (force_on) 3938 panic("tboot: Failed to initialize DMAR table\n"); ··· 3943 panic("tboot: Failed to initialize DMAR device scope\n"); 3944 goto out_free_dmar; 3945 } 3946 - 3947 - up_write(&dmar_global_lock); 3948 - 3949 - /* 3950 - * The bus notifier takes the dmar_global_lock, so lockdep will 3951 - * complain later when we register it under the lock. 3952 - */ 3953 - dmar_register_bus_notifier(); 3954 - 3955 - down_write(&dmar_global_lock); 3956 3957 if (!no_iommu) 3958 intel_iommu_debugfs_init(); ··· 3988 pr_err("Initialization failed\n"); 3989 goto out_free_dmar; 3990 } 3991 - up_write(&dmar_global_lock); 3992 3993 init_iommu_pm_ops(); 3994 3995 - down_read(&dmar_global_lock); 3996 for_each_active_iommu(iommu, drhd) { 3997 /* 3998 * The flush queue implementation does not perform ··· 4008 "%s", iommu->name); 4009 iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL); 4010 } 4011 - up_read(&dmar_global_lock); 4012 4013 bus_set_iommu(&pci_bus_type, &intel_iommu_ops); 4014 if (si_domain && !hw_pass_through) 4015 register_memory_notifier(&intel_iommu_memory_nb); 4016 4017 - down_read(&dmar_global_lock); 4018 if (probe_acpi_namespace_devices()) 4019 pr_warn("ACPI name space devices didn't probe correctly\n"); 4020 ··· 4023 4024 iommu_disable_protect_mem_regions(iommu); 4025 } 4026 - up_read(&dmar_global_lock); 4027 - 4028 - pr_info("Intel(R) Virtualization Technology for Directed I/O\n"); 4029 4030 intel_iommu_enabled = 1; 4031 4032 return 0; 4033 4034 out_free_dmar: 4035 intel_iommu_free_dmars(); 4036 - up_write(&dmar_global_lock); 4037 return ret; 4038 } 4039 ··· 4063 struct device_domain_info *info = dev_iommu_priv_get(dev); 4064 struct dmar_domain *domain = info->domain; 4065 struct intel_iommu *iommu = info->iommu; 4066 4067 if (!dev_is_real_dma_subdevice(info->dev)) { 4068 if (dev_is_pci(info->dev) && sm_supported(iommu)) ··· 4075 intel_pasid_free_table(info->dev); 4076 } 4077 4078 - spin_lock(&domain->lock); 4079 list_del(&info->link); 4080 - spin_unlock(&domain->lock); 4081 4082 domain_detach_iommu(domain, iommu); 4083 info->domain = NULL; ··· 4396 static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain) 4397 { 4398 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 4399 4400 if (dmar_domain->force_snooping) 4401 return true; 4402 4403 - spin_lock(&dmar_domain->lock); 4404 if (!domain_support_force_snooping(dmar_domain)) { 4405 - spin_unlock(&dmar_domain->lock); 4406 return false; 4407 } 4408 4409 domain_set_force_snooping(dmar_domain); 4410 dmar_domain->force_snooping = true; 4411 - spin_unlock(&dmar_domain->lock); 4412 4413 return true; 4414 }
··· 163 return re->hi & VTD_PAGE_MASK; 164 } 165 166 static inline void context_set_present(struct context_entry *context) 167 { 168 context->lo |= 1; ··· 240 { 241 context->lo = 0; 242 context->hi = 0; 243 + } 244 + 245 + static inline bool context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn) 246 + { 247 + if (!iommu->copied_tables) 248 + return false; 249 + 250 + return test_bit(((long)bus << 8) | devfn, iommu->copied_tables); 251 + } 252 + 253 + static inline void 254 + set_context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn) 255 + { 256 + set_bit(((long)bus << 8) | devfn, iommu->copied_tables); 257 + } 258 + 259 + static inline void 260 + clear_context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn) 261 + { 262 + clear_bit(((long)bus << 8) | devfn, iommu->copied_tables); 263 } 264 265 /* ··· 402 return !(addr_width < BITS_PER_LONG && pfn >> addr_width); 403 } 404 405 + /* 406 + * Calculate the Supported Adjusted Guest Address Widths of an IOMMU. 407 + * Refer to 11.4.2 of the VT-d spec for the encoding of each bit of 408 + * the returned SAGAW. 409 + */ 410 + static unsigned long __iommu_calculate_sagaw(struct intel_iommu *iommu) 411 + { 412 + unsigned long fl_sagaw, sl_sagaw; 413 + 414 + fl_sagaw = BIT(2) | (cap_fl1gp_support(iommu->cap) ? BIT(3) : 0); 415 + sl_sagaw = cap_sagaw(iommu->cap); 416 + 417 + /* Second level only. */ 418 + if (!sm_supported(iommu) || !ecap_flts(iommu->ecap)) 419 + return sl_sagaw; 420 + 421 + /* First level only. */ 422 + if (!ecap_slts(iommu->ecap)) 423 + return fl_sagaw; 424 + 425 + return fl_sagaw & sl_sagaw; 426 + } 427 + 428 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw) 429 { 430 unsigned long sagaw; 431 int agaw; 432 433 + sagaw = __iommu_calculate_sagaw(iommu); 434 + for (agaw = width_to_agaw(max_gaw); agaw >= 0; agaw--) { 435 if (test_bit(agaw, &sagaw)) 436 break; 437 } ··· 505 { 506 struct device_domain_info *info; 507 int nid = NUMA_NO_NODE; 508 + unsigned long flags; 509 510 + spin_lock_irqsave(&domain->lock, flags); 511 list_for_each_entry(info, &domain->devices, link) { 512 /* 513 * There could possibly be multiple device numa nodes as devices ··· 518 if (nid != NUMA_NO_NODE) 519 break; 520 } 521 + spin_unlock_irqrestore(&domain->lock, flags); 522 523 return nid; 524 } ··· 577 struct root_entry *root = &iommu->root_entry[bus]; 578 struct context_entry *context; 579 u64 *entry; 580 + 581 + /* 582 + * Except that the caller requested to allocate a new entry, 583 + * returning a copied context entry makes no sense. 584 + */ 585 + if (!alloc && context_copied(iommu, bus, devfn)) 586 + return NULL; 587 588 entry = &root->lo; 589 if (sm_supported(iommu)) { ··· 795 } 796 797 #ifdef CONFIG_DMAR_DEBUG 798 + static void pgtable_walk(struct intel_iommu *iommu, unsigned long pfn, 799 + u8 bus, u8 devfn, struct dma_pte *parent, int level) 800 { 801 + struct dma_pte *pte; 802 + int offset; 803 804 while (1) { 805 offset = pfn_level_offset(pfn, level); ··· 847 struct pasid_entry *entries, *pte; 848 struct context_entry *ctx_entry; 849 struct root_entry *rt_entry; 850 + int i, dir_index, index, level; 851 u8 devfn = source_id & 0xff; 852 u8 bus = source_id >> 8; 853 + struct dma_pte *pgtable; 854 855 pr_info("Dump %s table entries for IOVA 0x%llx\n", iommu->name, addr); 856 ··· 877 ctx_entry->hi, ctx_entry->lo); 878 879 /* legacy mode does not require PASID entries */ 880 + if (!sm_supported(iommu)) { 881 + level = agaw_to_level(ctx_entry->hi & 7); 882 + pgtable = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK); 883 goto pgtable_walk; 884 + } 885 886 /* get the pointer to pasid directory entry */ 887 dir = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK); ··· 905 for (i = 0; i < ARRAY_SIZE(pte->val); i++) 906 pr_info("pasid table entry[%d]: 0x%016llx\n", i, pte->val[i]); 907 908 + if (pasid_pte_get_pgtt(pte) == PASID_ENTRY_PGTT_FL_ONLY) { 909 + level = pte->val[2] & BIT_ULL(2) ? 5 : 4; 910 + pgtable = phys_to_virt(pte->val[2] & VTD_PAGE_MASK); 911 + } else { 912 + level = agaw_to_level((pte->val[0] >> 2) & 0x7); 913 + pgtable = phys_to_virt(pte->val[0] & VTD_PAGE_MASK); 914 + } 915 + 916 pgtable_walk: 917 + pgtable_walk(iommu, addr >> VTD_PAGE_SHIFT, bus, devfn, pgtable, level); 918 } 919 #endif 920 ··· 1345 u8 bus, u8 devfn) 1346 { 1347 struct device_domain_info *info; 1348 + unsigned long flags; 1349 1350 if (!iommu->qi) 1351 return NULL; 1352 1353 + spin_lock_irqsave(&domain->lock, flags); 1354 list_for_each_entry(info, &domain->devices, link) { 1355 if (info->iommu == iommu && info->bus == bus && 1356 info->devfn == devfn) { 1357 + spin_unlock_irqrestore(&domain->lock, flags); 1358 return info->ats_supported ? info : NULL; 1359 } 1360 } 1361 + spin_unlock_irqrestore(&domain->lock, flags); 1362 1363 return NULL; 1364 } ··· 1366 { 1367 struct device_domain_info *info; 1368 bool has_iotlb_device = false; 1369 + unsigned long flags; 1370 1371 + spin_lock_irqsave(&domain->lock, flags); 1372 list_for_each_entry(info, &domain->devices, link) { 1373 if (info->ats_enabled) { 1374 has_iotlb_device = true; ··· 1375 } 1376 } 1377 domain->has_iotlb_device = has_iotlb_device; 1378 + spin_unlock_irqrestore(&domain->lock, flags); 1379 } 1380 1381 static void iommu_enable_dev_iotlb(struct device_domain_info *info) ··· 1467 u64 addr, unsigned mask) 1468 { 1469 struct device_domain_info *info; 1470 + unsigned long flags; 1471 1472 if (!domain->has_iotlb_device) 1473 return; 1474 1475 + spin_lock_irqsave(&domain->lock, flags); 1476 list_for_each_entry(info, &domain->devices, link) 1477 __iommu_flush_dev_iotlb(info, addr, mask); 1478 + spin_unlock_irqrestore(&domain->lock, flags); 1479 } 1480 1481 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, ··· 1686 if (iommu->domain_ids) { 1687 bitmap_free(iommu->domain_ids); 1688 iommu->domain_ids = NULL; 1689 + } 1690 + 1691 + if (iommu->copied_tables) { 1692 + bitmap_free(iommu->copied_tables); 1693 + iommu->copied_tables = NULL; 1694 } 1695 1696 /* free context mapping */ ··· 1913 goto out_unlock; 1914 1915 ret = 0; 1916 + if (context_present(context) && !context_copied(iommu, bus, devfn)) 1917 goto out_unlock; 1918 1919 /* ··· 1925 * in-flight DMA will exist, and we don't need to worry anymore 1926 * hereafter. 1927 */ 1928 + if (context_copied(iommu, bus, devfn)) { 1929 u16 did_old = context_domain_id(context); 1930 1931 if (did_old < cap_ndoms(iommu->cap)) { ··· 1936 iommu->flush.flush_iotlb(iommu, did_old, 0, 0, 1937 DMA_TLB_DSI_FLUSH); 1938 } 1939 + 1940 + clear_context_copied(iommu, bus, devfn); 1941 } 1942 1943 context_clear_entry(context); ··· 2429 { 2430 struct device_domain_info *info = dev_iommu_priv_get(dev); 2431 struct intel_iommu *iommu; 2432 + unsigned long flags; 2433 u8 bus, devfn; 2434 int ret; 2435 ··· 2440 if (ret) 2441 return ret; 2442 info->domain = domain; 2443 + spin_lock_irqsave(&domain->lock, flags); 2444 list_add(&info->link, &domain->devices); 2445 + spin_unlock_irqrestore(&domain->lock, flags); 2446 2447 /* PASID table is mandatory for a PCI device in scalable mode. */ 2448 if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { ··· 2684 /* Now copy the context entry */ 2685 memcpy(&ce, old_ce + idx, sizeof(ce)); 2686 2687 + if (!context_present(&ce)) 2688 continue; 2689 2690 did = context_domain_id(&ce); 2691 if (did >= 0 && did < cap_ndoms(iommu->cap)) 2692 set_bit(did, iommu->domain_ids); 2693 2694 + set_context_copied(iommu, bus, devfn); 2695 new_ce[idx] = ce; 2696 } 2697 ··· 2735 bool new_ext, ext; 2736 2737 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG); 2738 + ext = !!(rtaddr_reg & DMA_RTADDR_SMT); 2739 + new_ext = !!sm_supported(iommu); 2740 2741 /* 2742 * The RTT bit can only be changed when translation is disabled, ··· 2746 */ 2747 if (new_ext != ext) 2748 return -EINVAL; 2749 + 2750 + iommu->copied_tables = bitmap_zalloc(BIT_ULL(16), GFP_KERNEL); 2751 + if (!iommu->copied_tables) 2752 + return -ENOMEM; 2753 2754 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK; 2755 if (!old_rt_phys) ··· 3013 3014 #ifdef CONFIG_INTEL_IOMMU_SVM 3015 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) { 3016 ret = intel_svm_enable_prq(iommu); 3017 if (ret) 3018 goto free_iommu; 3019 } ··· 3932 force_on = (!intel_iommu_tboot_noforce && tboot_force_iommu()) || 3933 platform_optin_force_iommu(); 3934 3935 if (dmar_table_init()) { 3936 if (force_on) 3937 panic("tboot: Failed to initialize DMAR table\n"); ··· 3944 panic("tboot: Failed to initialize DMAR device scope\n"); 3945 goto out_free_dmar; 3946 } 3947 3948 if (!no_iommu) 3949 intel_iommu_debugfs_init(); ··· 3999 pr_err("Initialization failed\n"); 4000 goto out_free_dmar; 4001 } 4002 4003 init_iommu_pm_ops(); 4004 4005 for_each_active_iommu(iommu, drhd) { 4006 /* 4007 * The flush queue implementation does not perform ··· 4021 "%s", iommu->name); 4022 iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL); 4023 } 4024 4025 bus_set_iommu(&pci_bus_type, &intel_iommu_ops); 4026 if (si_domain && !hw_pass_through) 4027 register_memory_notifier(&intel_iommu_memory_nb); 4028 4029 if (probe_acpi_namespace_devices()) 4030 pr_warn("ACPI name space devices didn't probe correctly\n"); 4031 ··· 4038 4039 iommu_disable_protect_mem_regions(iommu); 4040 } 4041 4042 intel_iommu_enabled = 1; 4043 + dmar_register_bus_notifier(); 4044 + pr_info("Intel(R) Virtualization Technology for Directed I/O\n"); 4045 4046 return 0; 4047 4048 out_free_dmar: 4049 intel_iommu_free_dmars(); 4050 return ret; 4051 } 4052 ··· 4080 struct device_domain_info *info = dev_iommu_priv_get(dev); 4081 struct dmar_domain *domain = info->domain; 4082 struct intel_iommu *iommu = info->iommu; 4083 + unsigned long flags; 4084 4085 if (!dev_is_real_dma_subdevice(info->dev)) { 4086 if (dev_is_pci(info->dev) && sm_supported(iommu)) ··· 4091 intel_pasid_free_table(info->dev); 4092 } 4093 4094 + spin_lock_irqsave(&domain->lock, flags); 4095 list_del(&info->link); 4096 + spin_unlock_irqrestore(&domain->lock, flags); 4097 4098 domain_detach_iommu(domain, iommu); 4099 info->domain = NULL; ··· 4412 static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain) 4413 { 4414 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 4415 + unsigned long flags; 4416 4417 if (dmar_domain->force_snooping) 4418 return true; 4419 4420 + spin_lock_irqsave(&dmar_domain->lock, flags); 4421 if (!domain_support_force_snooping(dmar_domain)) { 4422 + spin_unlock_irqrestore(&dmar_domain->lock, flags); 4423 return false; 4424 } 4425 4426 domain_set_force_snooping(dmar_domain); 4427 dmar_domain->force_snooping = true; 4428 + spin_unlock_irqrestore(&dmar_domain->lock, flags); 4429 4430 return true; 4431 }
+6 -3
drivers/iommu/intel/iommu.h
··· 197 #define ecap_dis(e) (((e) >> 27) & 0x1) 198 #define ecap_nest(e) (((e) >> 26) & 0x1) 199 #define ecap_mts(e) (((e) >> 25) & 0x1) 200 - #define ecap_ecs(e) (((e) >> 24) & 0x1) 201 #define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16) 202 #define ecap_max_iotlb_offset(e) (ecap_iotlb_offset(e) + 16) 203 #define ecap_coherent(e) ((e) & 0x1) ··· 264 #define DMA_GSTS_CFIS (((u32)1) << 23) 265 266 /* DMA_RTADDR_REG */ 267 - #define DMA_RTADDR_RTT (((u64)1) << 11) 268 #define DMA_RTADDR_SMT (((u64)1) << 10) 269 270 /* CCMD_REG */ ··· 577 578 #ifdef CONFIG_INTEL_IOMMU 579 unsigned long *domain_ids; /* bitmap of domains */ 580 spinlock_t lock; /* protect context, domain ids */ 581 struct root_entry *root_entry; /* virtual address */ 582 ··· 700 (struct dma_pte *)ALIGN((unsigned long)pte, VTD_PAGE_SIZE) - pte; 701 } 702 703 extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); 704 705 extern int dmar_enable_qi(struct intel_iommu *iommu); ··· 788 #endif /* CONFIG_INTEL_IOMMU_DEBUGFS */ 789 790 extern const struct attribute_group *intel_iommu_groups[]; 791 - bool context_present(struct context_entry *context); 792 struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, 793 u8 devfn, int alloc); 794
··· 197 #define ecap_dis(e) (((e) >> 27) & 0x1) 198 #define ecap_nest(e) (((e) >> 26) & 0x1) 199 #define ecap_mts(e) (((e) >> 25) & 0x1) 200 #define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16) 201 #define ecap_max_iotlb_offset(e) (ecap_iotlb_offset(e) + 16) 202 #define ecap_coherent(e) ((e) & 0x1) ··· 265 #define DMA_GSTS_CFIS (((u32)1) << 23) 266 267 /* DMA_RTADDR_REG */ 268 #define DMA_RTADDR_SMT (((u64)1) << 10) 269 270 /* CCMD_REG */ ··· 579 580 #ifdef CONFIG_INTEL_IOMMU 581 unsigned long *domain_ids; /* bitmap of domains */ 582 + unsigned long *copied_tables; /* bitmap of copied tables */ 583 spinlock_t lock; /* protect context, domain ids */ 584 struct root_entry *root_entry; /* virtual address */ 585 ··· 701 (struct dma_pte *)ALIGN((unsigned long)pte, VTD_PAGE_SIZE) - pte; 702 } 703 704 + static inline bool context_present(struct context_entry *context) 705 + { 706 + return (context->lo & 1); 707 + } 708 + 709 extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); 710 711 extern int dmar_enable_qi(struct intel_iommu *iommu); ··· 784 #endif /* CONFIG_INTEL_IOMMU_DEBUGFS */ 785 786 extern const struct attribute_group *intel_iommu_groups[]; 787 struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, 788 u8 devfn, int alloc); 789
+19 -2
drivers/iommu/iommu.c
··· 3076 return ret; 3077 } 3078 3079 /** 3080 * iommu_device_use_default_domain() - Device driver wants to handle device 3081 * DMA through the kernel DMA API. ··· 3112 3113 mutex_lock(&group->mutex); 3114 if (group->owner_cnt) { 3115 - if (group->domain != group->default_domain || 3116 - group->owner) { 3117 ret = -EBUSY; 3118 goto unlock_out; 3119 }
··· 3076 return ret; 3077 } 3078 3079 + static bool iommu_is_default_domain(struct iommu_group *group) 3080 + { 3081 + if (group->domain == group->default_domain) 3082 + return true; 3083 + 3084 + /* 3085 + * If the default domain was set to identity and it is still an identity 3086 + * domain then we consider this a pass. This happens because of 3087 + * amd_iommu_init_device() replacing the default idenytity domain with an 3088 + * identity domain that has a different configuration for AMDGPU. 3089 + */ 3090 + if (group->default_domain && 3091 + group->default_domain->type == IOMMU_DOMAIN_IDENTITY && 3092 + group->domain && group->domain->type == IOMMU_DOMAIN_IDENTITY) 3093 + return true; 3094 + return false; 3095 + } 3096 + 3097 /** 3098 * iommu_device_use_default_domain() - Device driver wants to handle device 3099 * DMA through the kernel DMA API. ··· 3094 3095 mutex_lock(&group->mutex); 3096 if (group->owner_cnt) { 3097 + if (group->owner || !iommu_is_default_domain(group)) { 3098 ret = -EBUSY; 3099 goto unlock_out; 3100 }
+11
drivers/iommu/virtio-iommu.c
··· 1006 return iommu_fwspec_add_ids(dev, args->args, 1); 1007 } 1008 1009 static struct iommu_ops viommu_ops = { 1010 .domain_alloc = viommu_domain_alloc, 1011 .probe_device = viommu_probe_device, 1012 .probe_finalize = viommu_probe_finalize,
··· 1006 return iommu_fwspec_add_ids(dev, args->args, 1); 1007 } 1008 1009 + static bool viommu_capable(enum iommu_cap cap) 1010 + { 1011 + switch (cap) { 1012 + case IOMMU_CAP_CACHE_COHERENCY: 1013 + return true; 1014 + default: 1015 + return false; 1016 + } 1017 + } 1018 + 1019 static struct iommu_ops viommu_ops = { 1020 + .capable = viommu_capable, 1021 .domain_alloc = viommu_domain_alloc, 1022 .probe_device = viommu_probe_device, 1023 .probe_finalize = viommu_probe_finalize,
+3 -1
include/linux/dmar.h
··· 65 66 extern struct rw_semaphore dmar_global_lock; 67 extern struct list_head dmar_drhd_units; 68 69 #define for_each_drhd_unit(drhd) \ 70 list_for_each_entry_rcu(drhd, &dmar_drhd_units, list, \ ··· 89 static inline bool dmar_rcu_check(void) 90 { 91 return rwsem_is_locked(&dmar_global_lock) || 92 - system_state == SYSTEM_BOOTING; 93 } 94 95 #define dmar_rcu_dereference(p) rcu_dereference_check((p), dmar_rcu_check())
··· 65 66 extern struct rw_semaphore dmar_global_lock; 67 extern struct list_head dmar_drhd_units; 68 + extern int intel_iommu_enabled; 69 70 #define for_each_drhd_unit(drhd) \ 71 list_for_each_entry_rcu(drhd, &dmar_drhd_units, list, \ ··· 88 static inline bool dmar_rcu_check(void) 89 { 90 return rwsem_is_locked(&dmar_global_lock) || 91 + system_state == SYSTEM_BOOTING || 92 + (IS_ENABLED(CONFIG_INTEL_IOMMU) && !intel_iommu_enabled); 93 } 94 95 #define dmar_rcu_dereference(p) rcu_dereference_check((p), dmar_rcu_check())