Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

iommu/vt-d: Fix general protection fault in aux_detach_device()

The aux-domain attach/detach are not tracked, some data structures might
be used after free. This causes general protection faults when multiple
subdevices are created and assigned to a same guest machine:

| general protection fault, probably for non-canonical address 0xdead000000000100: 0000 [#1] SMP NOPTI
| RIP: 0010:intel_iommu_aux_detach_device+0x12a/0x1f0
| [...]
| Call Trace:
| iommu_aux_detach_device+0x24/0x70
| vfio_mdev_detach_domain+0x3b/0x60
| ? vfio_mdev_set_domain+0x50/0x50
| iommu_group_for_each_dev+0x4f/0x80
| vfio_iommu_detach_group.isra.0+0x22/0x30
| vfio_iommu_type1_detach_group.cold+0x71/0x211
| ? find_exported_symbol_in_section+0x4a/0xd0
| ? each_symbol_section+0x28/0x50
| __vfio_group_unset_container+0x4d/0x150
| vfio_group_try_dissolve_container+0x25/0x30
| vfio_group_put_external_user+0x13/0x20
| kvm_vfio_group_put_external_user+0x27/0x40 [kvm]
| kvm_vfio_destroy+0x45/0xb0 [kvm]
| kvm_put_kvm+0x1bb/0x2e0 [kvm]
| kvm_vm_release+0x22/0x30 [kvm]
| __fput+0xcc/0x260
| ____fput+0xe/0x10
| task_work_run+0x8f/0xb0
| do_exit+0x358/0xaf0
| ? wake_up_state+0x10/0x20
| ? signal_wake_up_state+0x1a/0x30
| do_group_exit+0x47/0xb0
| __x64_sys_exit_group+0x18/0x20
| do_syscall_64+0x57/0x1d0
| entry_SYSCALL_64_after_hwframe+0x44/0xa9

Fix the crash by tracking the subdevices when attaching and detaching
aux-domains.

Fixes: 67b8e02b5e76 ("iommu/vt-d: Aux-domain specific domain attach/detach")
Co-developed-by: Xin Zeng <xin.zeng@intel.com>
Signed-off-by: Xin Zeng <xin.zeng@intel.com>
Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
Acked-by: Lu Baolu <baolu.lu@linux.intel.com>
Link: https://lore.kernel.org/r/1609949037-25291-3-git-send-email-yi.l.liu@intel.com
Signed-off-by: Will Deacon <will@kernel.org>

authored by

Liu Yi L and committed by
Will Deacon
18abda7a 9ad9f45b

+84 -31
+73 -26
drivers/iommu/intel/iommu.c
··· 1877 1877 domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL; 1878 1878 domain->has_iotlb_device = false; 1879 1879 INIT_LIST_HEAD(&domain->devices); 1880 + INIT_LIST_HEAD(&domain->subdevices); 1880 1881 1881 1882 return domain; 1882 1883 } ··· 2548 2547 info->iommu = iommu; 2549 2548 info->pasid_table = NULL; 2550 2549 info->auxd_enabled = 0; 2551 - INIT_LIST_HEAD(&info->auxiliary_domains); 2550 + INIT_LIST_HEAD(&info->subdevices); 2552 2551 2553 2552 if (dev && dev_is_pci(dev)) { 2554 2553 struct pci_dev *pdev = to_pci_dev(info->dev); ··· 4476 4475 domain->type == IOMMU_DOMAIN_UNMANAGED; 4477 4476 } 4478 4477 4479 - static void auxiliary_link_device(struct dmar_domain *domain, 4480 - struct device *dev) 4478 + static inline struct subdev_domain_info * 4479 + lookup_subdev_info(struct dmar_domain *domain, struct device *dev) 4481 4480 { 4482 - struct device_domain_info *info = get_domain_info(dev); 4481 + struct subdev_domain_info *sinfo; 4483 4482 4484 - assert_spin_locked(&device_domain_lock); 4485 - if (WARN_ON(!info)) 4486 - return; 4483 + if (!list_empty(&domain->subdevices)) { 4484 + list_for_each_entry(sinfo, &domain->subdevices, link_domain) { 4485 + if (sinfo->pdev == dev) 4486 + return sinfo; 4487 + } 4488 + } 4487 4489 4488 - domain->auxd_refcnt++; 4489 - list_add(&domain->auxd, &info->auxiliary_domains); 4490 + return NULL; 4490 4491 } 4491 4492 4492 - static void auxiliary_unlink_device(struct dmar_domain *domain, 4493 - struct device *dev) 4493 + static int auxiliary_link_device(struct dmar_domain *domain, 4494 + struct device *dev) 4494 4495 { 4495 4496 struct device_domain_info *info = get_domain_info(dev); 4497 + struct subdev_domain_info *sinfo = lookup_subdev_info(domain, dev); 4496 4498 4497 4499 assert_spin_locked(&device_domain_lock); 4498 4500 if (WARN_ON(!info)) 4499 - return; 4501 + return -EINVAL; 4500 4502 4501 - list_del(&domain->auxd); 4502 - domain->auxd_refcnt--; 4503 + if (!sinfo) { 4504 + sinfo = kzalloc(sizeof(*sinfo), GFP_ATOMIC); 4505 + sinfo->domain = domain; 4506 + sinfo->pdev = dev; 4507 + list_add(&sinfo->link_phys, &info->subdevices); 4508 + list_add(&sinfo->link_domain, &domain->subdevices); 4509 + } 4503 4510 4504 - if (!domain->auxd_refcnt && domain->default_pasid > 0) 4505 - ioasid_put(domain->default_pasid); 4511 + return ++sinfo->users; 4512 + } 4513 + 4514 + static int auxiliary_unlink_device(struct dmar_domain *domain, 4515 + struct device *dev) 4516 + { 4517 + struct device_domain_info *info = get_domain_info(dev); 4518 + struct subdev_domain_info *sinfo = lookup_subdev_info(domain, dev); 4519 + int ret; 4520 + 4521 + assert_spin_locked(&device_domain_lock); 4522 + if (WARN_ON(!info || !sinfo || sinfo->users <= 0)) 4523 + return -EINVAL; 4524 + 4525 + ret = --sinfo->users; 4526 + if (!ret) { 4527 + list_del(&sinfo->link_phys); 4528 + list_del(&sinfo->link_domain); 4529 + kfree(sinfo); 4530 + } 4531 + 4532 + return ret; 4506 4533 } 4507 4534 4508 4535 static int aux_domain_add_dev(struct dmar_domain *domain, ··· 4559 4530 } 4560 4531 4561 4532 spin_lock_irqsave(&device_domain_lock, flags); 4533 + ret = auxiliary_link_device(domain, dev); 4534 + if (ret <= 0) 4535 + goto link_failed; 4536 + 4537 + /* 4538 + * Subdevices from the same physical device can be attached to the 4539 + * same domain. For such cases, only the first subdevice attachment 4540 + * needs to go through the full steps in this function. So if ret > 4541 + * 1, just goto out. 4542 + */ 4543 + if (ret > 1) 4544 + goto out; 4545 + 4562 4546 /* 4563 4547 * iommu->lock must be held to attach domain to iommu and setup the 4564 4548 * pasid entry for second level translation. ··· 4590 4548 domain->default_pasid); 4591 4549 if (ret) 4592 4550 goto table_failed; 4551 + 4593 4552 spin_unlock(&iommu->lock); 4594 - 4595 - auxiliary_link_device(domain, dev); 4596 - 4553 + out: 4597 4554 spin_unlock_irqrestore(&device_domain_lock, flags); 4598 4555 4599 4556 return 0; ··· 4601 4560 domain_detach_iommu(domain, iommu); 4602 4561 attach_failed: 4603 4562 spin_unlock(&iommu->lock); 4563 + auxiliary_unlink_device(domain, dev); 4564 + link_failed: 4604 4565 spin_unlock_irqrestore(&device_domain_lock, flags); 4605 - if (!domain->auxd_refcnt && domain->default_pasid > 0) 4566 + if (list_empty(&domain->subdevices) && domain->default_pasid > 0) 4606 4567 ioasid_put(domain->default_pasid); 4607 4568 4608 4569 return ret; ··· 4624 4581 info = get_domain_info(dev); 4625 4582 iommu = info->iommu; 4626 4583 4627 - auxiliary_unlink_device(domain, dev); 4628 - 4629 - spin_lock(&iommu->lock); 4630 - intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid, false); 4631 - domain_detach_iommu(domain, iommu); 4632 - spin_unlock(&iommu->lock); 4584 + if (!auxiliary_unlink_device(domain, dev)) { 4585 + spin_lock(&iommu->lock); 4586 + intel_pasid_tear_down_entry(iommu, dev, 4587 + domain->default_pasid, false); 4588 + domain_detach_iommu(domain, iommu); 4589 + spin_unlock(&iommu->lock); 4590 + } 4633 4591 4634 4592 spin_unlock_irqrestore(&device_domain_lock, flags); 4593 + 4594 + if (list_empty(&domain->subdevices) && domain->default_pasid > 0) 4595 + ioasid_put(domain->default_pasid); 4635 4596 } 4636 4597 4637 4598 static int prepare_domain_attach_device(struct iommu_domain *domain,
+11 -5
include/linux/intel-iommu.h
··· 533 533 /* Domain ids per IOMMU. Use u16 since 534 534 * domain ids are 16 bit wide according 535 535 * to VT-d spec, section 9.3 */ 536 - unsigned int auxd_refcnt; /* Refcount of auxiliary attaching */ 537 536 538 537 bool has_iotlb_device; 539 538 struct list_head devices; /* all devices' list */ 540 - struct list_head auxd; /* link to device's auxiliary list */ 539 + struct list_head subdevices; /* all subdevices' list */ 541 540 struct iova_domain iovad; /* iova's that belong to this domain */ 542 541 543 542 struct dma_pte *pgd; /* virtual address */ ··· 609 610 struct dmar_drhd_unit *drhd; 610 611 }; 611 612 613 + /* Per subdevice private data */ 614 + struct subdev_domain_info { 615 + struct list_head link_phys; /* link to phys device siblings */ 616 + struct list_head link_domain; /* link to domain siblings */ 617 + struct device *pdev; /* physical device derived from */ 618 + struct dmar_domain *domain; /* aux-domain */ 619 + int users; /* user count */ 620 + }; 621 + 612 622 /* PCI domain-device relationship */ 613 623 struct device_domain_info { 614 624 struct list_head link; /* link to domain siblings */ 615 625 struct list_head global; /* link to global list */ 616 626 struct list_head table; /* link to pasid table */ 617 - struct list_head auxiliary_domains; /* auxiliary domains 618 - * attached to this device 619 - */ 627 + struct list_head subdevices; /* subdevices sibling */ 620 628 u32 segment; /* PCI segment number */ 621 629 u8 bus; /* PCI bus number */ 622 630 u8 devfn; /* PCI devfn number */