Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'vfio-v5.4-rc1' of git://github.com/awilliam/linux-vfio

Pull VFIO updates from Alex Williamson:

- Fix spapr iommu error case case (Alexey Kardashevskiy)

- Consolidate region type definitions (Cornelia Huck)

- Restore saved original PCI state on release (hexin)

- Simplify mtty sample driver interrupt path (Parav Pandit)

- Support for reporting valid IOVA regions to user (Shameer Kolothum)

* tag 'vfio-v5.4-rc1' of git://github.com/awilliam/linux-vfio:
vfio_pci: Restore original state on release
vfio/type1: remove duplicate retrieval of reserved regions
vfio/type1: Add IOVA range capability support
vfio/type1: check dma map request is within a valid iova range
vfio/spapr_tce: Fix incorrect tce_iommu_group memory free
vfio-mdev/mtty: Simplify interrupt generation
vfio: re-arrange vfio region definitions
vfio/type1: Update iova list on detach
vfio/type1: Check reserved region conflict and update iova list
vfio/type1: Introduce iova list and add iommu aperture validity check

+583 -71
+13 -4
drivers/vfio/pci/vfio_pci.c
··· 438 438 pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE); 439 439 440 440 /* 441 - * Try to reset the device. The success of this is dependent on 442 - * being able to lock the device, which is not always possible. 441 + * Try to get the locks ourselves to prevent a deadlock. The 442 + * success of this is dependent on being able to lock the device, 443 + * which is not always possible. 444 + * We can not use the "try" reset interface here, which will 445 + * overwrite the previously restored configuration information. 443 446 */ 444 - if (vdev->reset_works && !pci_try_reset_function(pdev)) 445 - vdev->needs_reset = false; 447 + if (vdev->reset_works && pci_cfg_access_trylock(pdev)) { 448 + if (device_trylock(&pdev->dev)) { 449 + if (!__pci_reset_function_locked(pdev)) 450 + vdev->needs_reset = false; 451 + device_unlock(&pdev->dev); 452 + } 453 + pci_cfg_access_unlock(pdev); 454 + } 446 455 447 456 pci_restore_state(pdev); 448 457 out:
+5 -4
drivers/vfio/vfio_iommu_spapr_tce.c
··· 1240 1240 static int tce_iommu_attach_group(void *iommu_data, 1241 1241 struct iommu_group *iommu_group) 1242 1242 { 1243 - int ret; 1243 + int ret = 0; 1244 1244 struct tce_container *container = iommu_data; 1245 1245 struct iommu_table_group *table_group; 1246 1246 struct tce_iommu_group *tcegrp = NULL; ··· 1293 1293 !table_group->ops->release_ownership) { 1294 1294 if (container->v2) { 1295 1295 ret = -EPERM; 1296 - goto unlock_exit; 1296 + goto free_exit; 1297 1297 } 1298 1298 ret = tce_iommu_take_ownership(container, table_group); 1299 1299 } else { 1300 1300 if (!container->v2) { 1301 1301 ret = -EPERM; 1302 - goto unlock_exit; 1302 + goto free_exit; 1303 1303 } 1304 1304 ret = tce_iommu_take_ownership_ddw(container, table_group); 1305 1305 if (!tce_groups_attached(container) && !container->tables[0]) ··· 1311 1311 list_add(&tcegrp->next, &container->group_list); 1312 1312 } 1313 1313 1314 - unlock_exit: 1314 + free_exit: 1315 1315 if (ret && tcegrp) 1316 1316 kfree(tcegrp); 1317 1317 1318 + unlock_exit: 1318 1319 mutex_unlock(&container->lock); 1319 1320 1320 1321 return ret;
+506 -12
drivers/vfio/vfio_iommu_type1.c
··· 62 62 63 63 struct vfio_iommu { 64 64 struct list_head domain_list; 65 + struct list_head iova_list; 65 66 struct vfio_domain *external_domain; /* domain for external user */ 66 67 struct mutex lock; 67 68 struct rb_root dma_list; ··· 96 95 struct iommu_group *iommu_group; 97 96 struct list_head next; 98 97 bool mdev_group; /* An mdev group */ 98 + }; 99 + 100 + struct vfio_iova { 101 + struct list_head list; 102 + dma_addr_t start; 103 + dma_addr_t end; 99 104 }; 100 105 101 106 /* ··· 1045 1038 return ret; 1046 1039 } 1047 1040 1041 + /* 1042 + * Check dma map request is within a valid iova range 1043 + */ 1044 + static bool vfio_iommu_iova_dma_valid(struct vfio_iommu *iommu, 1045 + dma_addr_t start, dma_addr_t end) 1046 + { 1047 + struct list_head *iova = &iommu->iova_list; 1048 + struct vfio_iova *node; 1049 + 1050 + list_for_each_entry(node, iova, list) { 1051 + if (start >= node->start && end <= node->end) 1052 + return true; 1053 + } 1054 + 1055 + /* 1056 + * Check for list_empty() as well since a container with 1057 + * a single mdev device will have an empty list. 1058 + */ 1059 + return list_empty(iova); 1060 + } 1061 + 1048 1062 static int vfio_dma_do_map(struct vfio_iommu *iommu, 1049 1063 struct vfio_iommu_type1_dma_map *map) 1050 1064 { ··· 1106 1078 1107 1079 if (!iommu->dma_avail) { 1108 1080 ret = -ENOSPC; 1081 + goto out_unlock; 1082 + } 1083 + 1084 + if (!vfio_iommu_iova_dma_valid(iommu, iova, iova + size - 1)) { 1085 + ret = -EINVAL; 1109 1086 goto out_unlock; 1110 1087 } 1111 1088 ··· 1303 1270 return NULL; 1304 1271 } 1305 1272 1306 - static bool vfio_iommu_has_sw_msi(struct iommu_group *group, phys_addr_t *base) 1273 + static bool vfio_iommu_has_sw_msi(struct list_head *group_resv_regions, 1274 + phys_addr_t *base) 1307 1275 { 1308 - struct list_head group_resv_regions; 1309 - struct iommu_resv_region *region, *next; 1276 + struct iommu_resv_region *region; 1310 1277 bool ret = false; 1311 1278 1312 - INIT_LIST_HEAD(&group_resv_regions); 1313 - iommu_get_group_resv_regions(group, &group_resv_regions); 1314 - list_for_each_entry(region, &group_resv_regions, list) { 1279 + list_for_each_entry(region, group_resv_regions, list) { 1315 1280 /* 1316 1281 * The presence of any 'real' MSI regions should take 1317 1282 * precedence over the software-managed one if the ··· 1325 1294 ret = true; 1326 1295 } 1327 1296 } 1328 - list_for_each_entry_safe(region, next, &group_resv_regions, list) 1329 - kfree(region); 1297 + 1330 1298 return ret; 1331 1299 } 1332 1300 ··· 1425 1395 return 0; 1426 1396 } 1427 1397 1398 + /* 1399 + * This is a helper function to insert an address range to iova list. 1400 + * The list is initially created with a single entry corresponding to 1401 + * the IOMMU domain geometry to which the device group is attached. 1402 + * The list aperture gets modified when a new domain is added to the 1403 + * container if the new aperture doesn't conflict with the current one 1404 + * or with any existing dma mappings. The list is also modified to 1405 + * exclude any reserved regions associated with the device group. 1406 + */ 1407 + static int vfio_iommu_iova_insert(struct list_head *head, 1408 + dma_addr_t start, dma_addr_t end) 1409 + { 1410 + struct vfio_iova *region; 1411 + 1412 + region = kmalloc(sizeof(*region), GFP_KERNEL); 1413 + if (!region) 1414 + return -ENOMEM; 1415 + 1416 + INIT_LIST_HEAD(&region->list); 1417 + region->start = start; 1418 + region->end = end; 1419 + 1420 + list_add_tail(&region->list, head); 1421 + return 0; 1422 + } 1423 + 1424 + /* 1425 + * Check the new iommu aperture conflicts with existing aper or with any 1426 + * existing dma mappings. 1427 + */ 1428 + static bool vfio_iommu_aper_conflict(struct vfio_iommu *iommu, 1429 + dma_addr_t start, dma_addr_t end) 1430 + { 1431 + struct vfio_iova *first, *last; 1432 + struct list_head *iova = &iommu->iova_list; 1433 + 1434 + if (list_empty(iova)) 1435 + return false; 1436 + 1437 + /* Disjoint sets, return conflict */ 1438 + first = list_first_entry(iova, struct vfio_iova, list); 1439 + last = list_last_entry(iova, struct vfio_iova, list); 1440 + if (start > last->end || end < first->start) 1441 + return true; 1442 + 1443 + /* Check for any existing dma mappings below the new start */ 1444 + if (start > first->start) { 1445 + if (vfio_find_dma(iommu, first->start, start - first->start)) 1446 + return true; 1447 + } 1448 + 1449 + /* Check for any existing dma mappings beyond the new end */ 1450 + if (end < last->end) { 1451 + if (vfio_find_dma(iommu, end + 1, last->end - end)) 1452 + return true; 1453 + } 1454 + 1455 + return false; 1456 + } 1457 + 1458 + /* 1459 + * Resize iommu iova aperture window. This is called only if the new 1460 + * aperture has no conflict with existing aperture and dma mappings. 1461 + */ 1462 + static int vfio_iommu_aper_resize(struct list_head *iova, 1463 + dma_addr_t start, dma_addr_t end) 1464 + { 1465 + struct vfio_iova *node, *next; 1466 + 1467 + if (list_empty(iova)) 1468 + return vfio_iommu_iova_insert(iova, start, end); 1469 + 1470 + /* Adjust iova list start */ 1471 + list_for_each_entry_safe(node, next, iova, list) { 1472 + if (start < node->start) 1473 + break; 1474 + if (start >= node->start && start < node->end) { 1475 + node->start = start; 1476 + break; 1477 + } 1478 + /* Delete nodes before new start */ 1479 + list_del(&node->list); 1480 + kfree(node); 1481 + } 1482 + 1483 + /* Adjust iova list end */ 1484 + list_for_each_entry_safe(node, next, iova, list) { 1485 + if (end > node->end) 1486 + continue; 1487 + if (end > node->start && end <= node->end) { 1488 + node->end = end; 1489 + continue; 1490 + } 1491 + /* Delete nodes after new end */ 1492 + list_del(&node->list); 1493 + kfree(node); 1494 + } 1495 + 1496 + return 0; 1497 + } 1498 + 1499 + /* 1500 + * Check reserved region conflicts with existing dma mappings 1501 + */ 1502 + static bool vfio_iommu_resv_conflict(struct vfio_iommu *iommu, 1503 + struct list_head *resv_regions) 1504 + { 1505 + struct iommu_resv_region *region; 1506 + 1507 + /* Check for conflict with existing dma mappings */ 1508 + list_for_each_entry(region, resv_regions, list) { 1509 + if (region->type == IOMMU_RESV_DIRECT_RELAXABLE) 1510 + continue; 1511 + 1512 + if (vfio_find_dma(iommu, region->start, region->length)) 1513 + return true; 1514 + } 1515 + 1516 + return false; 1517 + } 1518 + 1519 + /* 1520 + * Check iova region overlap with reserved regions and 1521 + * exclude them from the iommu iova range 1522 + */ 1523 + static int vfio_iommu_resv_exclude(struct list_head *iova, 1524 + struct list_head *resv_regions) 1525 + { 1526 + struct iommu_resv_region *resv; 1527 + struct vfio_iova *n, *next; 1528 + 1529 + list_for_each_entry(resv, resv_regions, list) { 1530 + phys_addr_t start, end; 1531 + 1532 + if (resv->type == IOMMU_RESV_DIRECT_RELAXABLE) 1533 + continue; 1534 + 1535 + start = resv->start; 1536 + end = resv->start + resv->length - 1; 1537 + 1538 + list_for_each_entry_safe(n, next, iova, list) { 1539 + int ret = 0; 1540 + 1541 + /* No overlap */ 1542 + if (start > n->end || end < n->start) 1543 + continue; 1544 + /* 1545 + * Insert a new node if current node overlaps with the 1546 + * reserve region to exlude that from valid iova range. 1547 + * Note that, new node is inserted before the current 1548 + * node and finally the current node is deleted keeping 1549 + * the list updated and sorted. 1550 + */ 1551 + if (start > n->start) 1552 + ret = vfio_iommu_iova_insert(&n->list, n->start, 1553 + start - 1); 1554 + if (!ret && end < n->end) 1555 + ret = vfio_iommu_iova_insert(&n->list, end + 1, 1556 + n->end); 1557 + if (ret) 1558 + return ret; 1559 + 1560 + list_del(&n->list); 1561 + kfree(n); 1562 + } 1563 + } 1564 + 1565 + if (list_empty(iova)) 1566 + return -EINVAL; 1567 + 1568 + return 0; 1569 + } 1570 + 1571 + static void vfio_iommu_resv_free(struct list_head *resv_regions) 1572 + { 1573 + struct iommu_resv_region *n, *next; 1574 + 1575 + list_for_each_entry_safe(n, next, resv_regions, list) { 1576 + list_del(&n->list); 1577 + kfree(n); 1578 + } 1579 + } 1580 + 1581 + static void vfio_iommu_iova_free(struct list_head *iova) 1582 + { 1583 + struct vfio_iova *n, *next; 1584 + 1585 + list_for_each_entry_safe(n, next, iova, list) { 1586 + list_del(&n->list); 1587 + kfree(n); 1588 + } 1589 + } 1590 + 1591 + static int vfio_iommu_iova_get_copy(struct vfio_iommu *iommu, 1592 + struct list_head *iova_copy) 1593 + { 1594 + struct list_head *iova = &iommu->iova_list; 1595 + struct vfio_iova *n; 1596 + int ret; 1597 + 1598 + list_for_each_entry(n, iova, list) { 1599 + ret = vfio_iommu_iova_insert(iova_copy, n->start, n->end); 1600 + if (ret) 1601 + goto out_free; 1602 + } 1603 + 1604 + return 0; 1605 + 1606 + out_free: 1607 + vfio_iommu_iova_free(iova_copy); 1608 + return ret; 1609 + } 1610 + 1611 + static void vfio_iommu_iova_insert_copy(struct vfio_iommu *iommu, 1612 + struct list_head *iova_copy) 1613 + { 1614 + struct list_head *iova = &iommu->iova_list; 1615 + 1616 + vfio_iommu_iova_free(iova); 1617 + 1618 + list_splice_tail(iova_copy, iova); 1619 + } 1428 1620 static int vfio_iommu_type1_attach_group(void *iommu_data, 1429 1621 struct iommu_group *iommu_group) 1430 1622 { ··· 1657 1405 int ret; 1658 1406 bool resv_msi, msi_remap; 1659 1407 phys_addr_t resv_msi_base; 1408 + struct iommu_domain_geometry geo; 1409 + LIST_HEAD(iova_copy); 1410 + LIST_HEAD(group_resv_regions); 1660 1411 1661 1412 mutex_lock(&iommu->lock); 1662 1413 ··· 1736 1481 if (ret) 1737 1482 goto out_domain; 1738 1483 1739 - resv_msi = vfio_iommu_has_sw_msi(iommu_group, &resv_msi_base); 1484 + /* Get aperture info */ 1485 + iommu_domain_get_attr(domain->domain, DOMAIN_ATTR_GEOMETRY, &geo); 1486 + 1487 + if (vfio_iommu_aper_conflict(iommu, geo.aperture_start, 1488 + geo.aperture_end)) { 1489 + ret = -EINVAL; 1490 + goto out_detach; 1491 + } 1492 + 1493 + ret = iommu_get_group_resv_regions(iommu_group, &group_resv_regions); 1494 + if (ret) 1495 + goto out_detach; 1496 + 1497 + if (vfio_iommu_resv_conflict(iommu, &group_resv_regions)) { 1498 + ret = -EINVAL; 1499 + goto out_detach; 1500 + } 1501 + 1502 + /* 1503 + * We don't want to work on the original iova list as the list 1504 + * gets modified and in case of failure we have to retain the 1505 + * original list. Get a copy here. 1506 + */ 1507 + ret = vfio_iommu_iova_get_copy(iommu, &iova_copy); 1508 + if (ret) 1509 + goto out_detach; 1510 + 1511 + ret = vfio_iommu_aper_resize(&iova_copy, geo.aperture_start, 1512 + geo.aperture_end); 1513 + if (ret) 1514 + goto out_detach; 1515 + 1516 + ret = vfio_iommu_resv_exclude(&iova_copy, &group_resv_regions); 1517 + if (ret) 1518 + goto out_detach; 1519 + 1520 + resv_msi = vfio_iommu_has_sw_msi(&group_resv_regions, &resv_msi_base); 1740 1521 1741 1522 INIT_LIST_HEAD(&domain->group_list); 1742 1523 list_add(&group->next, &domain->group_list); ··· 1805 1514 list_add(&group->next, &d->group_list); 1806 1515 iommu_domain_free(domain->domain); 1807 1516 kfree(domain); 1808 - mutex_unlock(&iommu->lock); 1809 - return 0; 1517 + goto done; 1810 1518 } 1811 1519 1812 1520 ret = vfio_iommu_attach_group(domain, group); ··· 1828 1538 } 1829 1539 1830 1540 list_add(&domain->next, &iommu->domain_list); 1831 - 1541 + done: 1542 + /* Delete the old one and insert new iova list */ 1543 + vfio_iommu_iova_insert_copy(iommu, &iova_copy); 1832 1544 mutex_unlock(&iommu->lock); 1545 + vfio_iommu_resv_free(&group_resv_regions); 1833 1546 1834 1547 return 0; 1835 1548 ··· 1840 1547 vfio_iommu_detach_group(domain, group); 1841 1548 out_domain: 1842 1549 iommu_domain_free(domain->domain); 1550 + vfio_iommu_iova_free(&iova_copy); 1551 + vfio_iommu_resv_free(&group_resv_regions); 1843 1552 out_free: 1844 1553 kfree(domain); 1845 1554 kfree(group); ··· 1897 1602 WARN_ON(iommu->notifier.head); 1898 1603 } 1899 1604 1605 + /* 1606 + * Called when a domain is removed in detach. It is possible that 1607 + * the removed domain decided the iova aperture window. Modify the 1608 + * iova aperture with the smallest window among existing domains. 1609 + */ 1610 + static void vfio_iommu_aper_expand(struct vfio_iommu *iommu, 1611 + struct list_head *iova_copy) 1612 + { 1613 + struct vfio_domain *domain; 1614 + struct iommu_domain_geometry geo; 1615 + struct vfio_iova *node; 1616 + dma_addr_t start = 0; 1617 + dma_addr_t end = (dma_addr_t)~0; 1618 + 1619 + if (list_empty(iova_copy)) 1620 + return; 1621 + 1622 + list_for_each_entry(domain, &iommu->domain_list, next) { 1623 + iommu_domain_get_attr(domain->domain, DOMAIN_ATTR_GEOMETRY, 1624 + &geo); 1625 + if (geo.aperture_start > start) 1626 + start = geo.aperture_start; 1627 + if (geo.aperture_end < end) 1628 + end = geo.aperture_end; 1629 + } 1630 + 1631 + /* Modify aperture limits. The new aper is either same or bigger */ 1632 + node = list_first_entry(iova_copy, struct vfio_iova, list); 1633 + node->start = start; 1634 + node = list_last_entry(iova_copy, struct vfio_iova, list); 1635 + node->end = end; 1636 + } 1637 + 1638 + /* 1639 + * Called when a group is detached. The reserved regions for that 1640 + * group can be part of valid iova now. But since reserved regions 1641 + * may be duplicated among groups, populate the iova valid regions 1642 + * list again. 1643 + */ 1644 + static int vfio_iommu_resv_refresh(struct vfio_iommu *iommu, 1645 + struct list_head *iova_copy) 1646 + { 1647 + struct vfio_domain *d; 1648 + struct vfio_group *g; 1649 + struct vfio_iova *node; 1650 + dma_addr_t start, end; 1651 + LIST_HEAD(resv_regions); 1652 + int ret; 1653 + 1654 + if (list_empty(iova_copy)) 1655 + return -EINVAL; 1656 + 1657 + list_for_each_entry(d, &iommu->domain_list, next) { 1658 + list_for_each_entry(g, &d->group_list, next) { 1659 + ret = iommu_get_group_resv_regions(g->iommu_group, 1660 + &resv_regions); 1661 + if (ret) 1662 + goto done; 1663 + } 1664 + } 1665 + 1666 + node = list_first_entry(iova_copy, struct vfio_iova, list); 1667 + start = node->start; 1668 + node = list_last_entry(iova_copy, struct vfio_iova, list); 1669 + end = node->end; 1670 + 1671 + /* purge the iova list and create new one */ 1672 + vfio_iommu_iova_free(iova_copy); 1673 + 1674 + ret = vfio_iommu_aper_resize(iova_copy, start, end); 1675 + if (ret) 1676 + goto done; 1677 + 1678 + /* Exclude current reserved regions from iova ranges */ 1679 + ret = vfio_iommu_resv_exclude(iova_copy, &resv_regions); 1680 + done: 1681 + vfio_iommu_resv_free(&resv_regions); 1682 + return ret; 1683 + } 1684 + 1900 1685 static void vfio_iommu_type1_detach_group(void *iommu_data, 1901 1686 struct iommu_group *iommu_group) 1902 1687 { 1903 1688 struct vfio_iommu *iommu = iommu_data; 1904 1689 struct vfio_domain *domain; 1905 1690 struct vfio_group *group; 1691 + LIST_HEAD(iova_copy); 1906 1692 1907 1693 mutex_lock(&iommu->lock); 1908 1694 ··· 2005 1629 goto detach_group_done; 2006 1630 } 2007 1631 } 1632 + 1633 + /* 1634 + * Get a copy of iova list. This will be used to update 1635 + * and to replace the current one later. Please note that 1636 + * we will leave the original list as it is if update fails. 1637 + */ 1638 + vfio_iommu_iova_get_copy(iommu, &iova_copy); 2008 1639 2009 1640 list_for_each_entry(domain, &iommu->domain_list, next) { 2010 1641 group = find_iommu_group(domain, iommu_group); ··· 2038 1655 iommu_domain_free(domain->domain); 2039 1656 list_del(&domain->next); 2040 1657 kfree(domain); 1658 + vfio_iommu_aper_expand(iommu, &iova_copy); 2041 1659 } 2042 1660 break; 2043 1661 } 1662 + 1663 + if (!vfio_iommu_resv_refresh(iommu, &iova_copy)) 1664 + vfio_iommu_iova_insert_copy(iommu, &iova_copy); 1665 + else 1666 + vfio_iommu_iova_free(&iova_copy); 2044 1667 2045 1668 detach_group_done: 2046 1669 mutex_unlock(&iommu->lock); ··· 2075 1686 } 2076 1687 2077 1688 INIT_LIST_HEAD(&iommu->domain_list); 1689 + INIT_LIST_HEAD(&iommu->iova_list); 2078 1690 iommu->dma_list = RB_ROOT; 2079 1691 iommu->dma_avail = dma_entry_limit; 2080 1692 mutex_init(&iommu->lock); ··· 2119 1729 list_del(&domain->next); 2120 1730 kfree(domain); 2121 1731 } 1732 + 1733 + vfio_iommu_iova_free(&iommu->iova_list); 1734 + 2122 1735 kfree(iommu); 2123 1736 } 2124 1737 ··· 2139 1746 } 2140 1747 mutex_unlock(&iommu->lock); 2141 1748 1749 + return ret; 1750 + } 1751 + 1752 + static int vfio_iommu_iova_add_cap(struct vfio_info_cap *caps, 1753 + struct vfio_iommu_type1_info_cap_iova_range *cap_iovas, 1754 + size_t size) 1755 + { 1756 + struct vfio_info_cap_header *header; 1757 + struct vfio_iommu_type1_info_cap_iova_range *iova_cap; 1758 + 1759 + header = vfio_info_cap_add(caps, size, 1760 + VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE, 1); 1761 + if (IS_ERR(header)) 1762 + return PTR_ERR(header); 1763 + 1764 + iova_cap = container_of(header, 1765 + struct vfio_iommu_type1_info_cap_iova_range, 1766 + header); 1767 + iova_cap->nr_iovas = cap_iovas->nr_iovas; 1768 + memcpy(iova_cap->iova_ranges, cap_iovas->iova_ranges, 1769 + cap_iovas->nr_iovas * sizeof(*cap_iovas->iova_ranges)); 1770 + return 0; 1771 + } 1772 + 1773 + static int vfio_iommu_iova_build_caps(struct vfio_iommu *iommu, 1774 + struct vfio_info_cap *caps) 1775 + { 1776 + struct vfio_iommu_type1_info_cap_iova_range *cap_iovas; 1777 + struct vfio_iova *iova; 1778 + size_t size; 1779 + int iovas = 0, i = 0, ret; 1780 + 1781 + mutex_lock(&iommu->lock); 1782 + 1783 + list_for_each_entry(iova, &iommu->iova_list, list) 1784 + iovas++; 1785 + 1786 + if (!iovas) { 1787 + /* 1788 + * Return 0 as a container with a single mdev device 1789 + * will have an empty list 1790 + */ 1791 + ret = 0; 1792 + goto out_unlock; 1793 + } 1794 + 1795 + size = sizeof(*cap_iovas) + (iovas * sizeof(*cap_iovas->iova_ranges)); 1796 + 1797 + cap_iovas = kzalloc(size, GFP_KERNEL); 1798 + if (!cap_iovas) { 1799 + ret = -ENOMEM; 1800 + goto out_unlock; 1801 + } 1802 + 1803 + cap_iovas->nr_iovas = iovas; 1804 + 1805 + list_for_each_entry(iova, &iommu->iova_list, list) { 1806 + cap_iovas->iova_ranges[i].start = iova->start; 1807 + cap_iovas->iova_ranges[i].end = iova->end; 1808 + i++; 1809 + } 1810 + 1811 + ret = vfio_iommu_iova_add_cap(caps, cap_iovas, size); 1812 + 1813 + kfree(cap_iovas); 1814 + out_unlock: 1815 + mutex_unlock(&iommu->lock); 2142 1816 return ret; 2143 1817 } 2144 1818 ··· 2230 1770 } 2231 1771 } else if (cmd == VFIO_IOMMU_GET_INFO) { 2232 1772 struct vfio_iommu_type1_info info; 1773 + struct vfio_info_cap caps = { .buf = NULL, .size = 0 }; 1774 + unsigned long capsz; 1775 + int ret; 2233 1776 2234 1777 minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes); 1778 + 1779 + /* For backward compatibility, cannot require this */ 1780 + capsz = offsetofend(struct vfio_iommu_type1_info, cap_offset); 2235 1781 2236 1782 if (copy_from_user(&info, (void __user *)arg, minsz)) 2237 1783 return -EFAULT; ··· 2245 1779 if (info.argsz < minsz) 2246 1780 return -EINVAL; 2247 1781 1782 + if (info.argsz >= capsz) { 1783 + minsz = capsz; 1784 + info.cap_offset = 0; /* output, no-recopy necessary */ 1785 + } 1786 + 2248 1787 info.flags = VFIO_IOMMU_INFO_PGSIZES; 2249 1788 2250 1789 info.iova_pgsizes = vfio_pgsize_bitmap(iommu); 1790 + 1791 + ret = vfio_iommu_iova_build_caps(iommu, &caps); 1792 + if (ret) 1793 + return ret; 1794 + 1795 + if (caps.size) { 1796 + info.flags |= VFIO_IOMMU_INFO_CAPS; 1797 + 1798 + if (info.argsz < sizeof(info) + caps.size) { 1799 + info.argsz = sizeof(info) + caps.size; 1800 + } else { 1801 + vfio_info_cap_shift(&caps, sizeof(info)); 1802 + if (copy_to_user((void __user *)arg + 1803 + sizeof(info), caps.buf, 1804 + caps.size)) { 1805 + kfree(caps.buf); 1806 + return -EFAULT; 1807 + } 1808 + info.cap_offset = sizeof(info); 1809 + } 1810 + 1811 + kfree(caps.buf); 1812 + } 2251 1813 2252 1814 return copy_to_user((void __user *)arg, &info, minsz) ? 2253 1815 -EFAULT : 0;
+51 -20
include/uapi/linux/vfio.h
··· 295 295 __u32 subtype; /* type specific */ 296 296 }; 297 297 298 + /* 299 + * List of region types, global per bus driver. 300 + * If you introduce a new type, please add it here. 301 + */ 302 + 303 + /* PCI region type containing a PCI vendor part */ 298 304 #define VFIO_REGION_TYPE_PCI_VENDOR_TYPE (1 << 31) 299 305 #define VFIO_REGION_TYPE_PCI_VENDOR_MASK (0xffff) 306 + #define VFIO_REGION_TYPE_GFX (1) 307 + #define VFIO_REGION_TYPE_CCW (2) 300 308 301 - /* 8086 Vendor sub-types */ 309 + /* sub-types for VFIO_REGION_TYPE_PCI_* */ 310 + 311 + /* 8086 vendor PCI sub-types */ 302 312 #define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION (1) 303 313 #define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2) 304 314 #define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3) 305 315 306 - #define VFIO_REGION_TYPE_GFX (1) 316 + /* 10de vendor PCI sub-types */ 317 + /* 318 + * NVIDIA GPU NVlink2 RAM is coherent RAM mapped onto the host address space. 319 + */ 320 + #define VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM (1) 321 + 322 + /* 1014 vendor PCI sub-types */ 323 + /* 324 + * IBM NPU NVlink2 ATSD (Address Translation Shootdown) register of NPU 325 + * to do TLB invalidation on a GPU. 326 + */ 327 + #define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD (1) 328 + 329 + /* sub-types for VFIO_REGION_TYPE_GFX */ 307 330 #define VFIO_REGION_SUBTYPE_GFX_EDID (1) 308 331 309 332 /** ··· 376 353 #define VFIO_DEVICE_GFX_LINK_STATE_DOWN 2 377 354 }; 378 355 379 - #define VFIO_REGION_TYPE_CCW (2) 380 - /* ccw sub-types */ 356 + /* sub-types for VFIO_REGION_TYPE_CCW */ 381 357 #define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD (1) 382 - 383 - /* 384 - * 10de vendor sub-type 385 - * 386 - * NVIDIA GPU NVlink2 RAM is coherent RAM mapped onto the host address space. 387 - */ 388 - #define VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM (1) 389 - 390 - /* 391 - * 1014 vendor sub-type 392 - * 393 - * IBM NPU NVlink2 ATSD (Address Translation Shootdown) register of NPU 394 - * to do TLB invalidation on a GPU. 395 - */ 396 - #define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD (1) 397 358 398 359 /* 399 360 * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped ··· 721 714 __u32 argsz; 722 715 __u32 flags; 723 716 #define VFIO_IOMMU_INFO_PGSIZES (1 << 0) /* supported page sizes info */ 724 - __u64 iova_pgsizes; /* Bitmap of supported page sizes */ 717 + #define VFIO_IOMMU_INFO_CAPS (1 << 1) /* Info supports caps */ 718 + __u64 iova_pgsizes; /* Bitmap of supported page sizes */ 719 + __u32 cap_offset; /* Offset within info struct of first cap */ 720 + }; 721 + 722 + /* 723 + * The IOVA capability allows to report the valid IOVA range(s) 724 + * excluding any non-relaxable reserved regions exposed by 725 + * devices attached to the container. Any DMA map attempt 726 + * outside the valid iova range will return error. 727 + * 728 + * The structures below define version 1 of this capability. 729 + */ 730 + #define VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE 1 731 + 732 + struct vfio_iova_range { 733 + __u64 start; 734 + __u64 end; 735 + }; 736 + 737 + struct vfio_iommu_type1_info_cap_iova_range { 738 + struct vfio_info_cap_header header; 739 + __u32 nr_iovas; 740 + __u32 reserved; 741 + struct vfio_iova_range iova_ranges[]; 725 742 }; 726 743 727 744 #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
+8 -31
samples/vfio-mdev/mtty.c
··· 152 152 153 153 /* function prototypes */ 154 154 155 - static int mtty_trigger_interrupt(const guid_t *uuid); 155 + static int mtty_trigger_interrupt(struct mdev_state *mdev_state); 156 156 157 157 /* Helper functions */ 158 - static struct mdev_state *find_mdev_state_by_uuid(const guid_t *uuid) 159 - { 160 - struct mdev_state *mds; 161 - 162 - list_for_each_entry(mds, &mdev_devices_list, next) { 163 - if (guid_equal(mdev_uuid(mds->mdev), uuid)) 164 - return mds; 165 - } 166 - 167 - return NULL; 168 - } 169 158 170 159 static void dump_buffer(u8 *buf, uint32_t count) 171 160 { ··· 326 337 pr_err("Serial port %d: Fifo level trigger\n", 327 338 index); 328 339 #endif 329 - mtty_trigger_interrupt( 330 - mdev_uuid(mdev_state->mdev)); 340 + mtty_trigger_interrupt(mdev_state); 331 341 } 332 342 } else { 333 343 #if defined(DEBUG_INTR) ··· 340 352 */ 341 353 if (mdev_state->s[index].uart_reg[UART_IER] & 342 354 UART_IER_RLSI) 343 - mtty_trigger_interrupt( 344 - mdev_uuid(mdev_state->mdev)); 355 + mtty_trigger_interrupt(mdev_state); 345 356 } 346 357 mutex_unlock(&mdev_state->rxtx_lock); 347 358 break; ··· 359 372 pr_err("Serial port %d: IER_THRI write\n", 360 373 index); 361 374 #endif 362 - mtty_trigger_interrupt( 363 - mdev_uuid(mdev_state->mdev)); 375 + mtty_trigger_interrupt(mdev_state); 364 376 } 365 377 366 378 mutex_unlock(&mdev_state->rxtx_lock); ··· 430 444 #if defined(DEBUG_INTR) 431 445 pr_err("Serial port %d: MCR_OUT2 write\n", index); 432 446 #endif 433 - mtty_trigger_interrupt(mdev_uuid(mdev_state->mdev)); 447 + mtty_trigger_interrupt(mdev_state); 434 448 } 435 449 436 450 if ((mdev_state->s[index].uart_reg[UART_IER] & UART_IER_MSI) && ··· 438 452 #if defined(DEBUG_INTR) 439 453 pr_err("Serial port %d: MCR RTS/DTR write\n", index); 440 454 #endif 441 - mtty_trigger_interrupt(mdev_uuid(mdev_state->mdev)); 455 + mtty_trigger_interrupt(mdev_state); 442 456 } 443 457 break; 444 458 ··· 489 503 #endif 490 504 if (mdev_state->s[index].uart_reg[UART_IER] & 491 505 UART_IER_THRI) 492 - mtty_trigger_interrupt( 493 - mdev_uuid(mdev_state->mdev)); 506 + mtty_trigger_interrupt(mdev_state); 494 507 } 495 508 mutex_unlock(&mdev_state->rxtx_lock); 496 509 ··· 1013 1028 return ret; 1014 1029 } 1015 1030 1016 - static int mtty_trigger_interrupt(const guid_t *uuid) 1031 + static int mtty_trigger_interrupt(struct mdev_state *mdev_state) 1017 1032 { 1018 1033 int ret = -1; 1019 - struct mdev_state *mdev_state; 1020 - 1021 - mdev_state = find_mdev_state_by_uuid(uuid); 1022 - 1023 - if (!mdev_state) { 1024 - pr_info("%s: mdev not found\n", __func__); 1025 - return -EINVAL; 1026 - } 1027 1034 1028 1035 if ((mdev_state->irq_index == VFIO_PCI_MSI_IRQ_INDEX) && 1029 1036 (!mdev_state->msi_evtfd))