Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/gpuvm: take GEM lock inside drm_gpuvm_bo_obtain_prealloc()

When calling drm_gpuvm_bo_obtain_prealloc() and using immediate mode,
this may result in a call to ops->vm_bo_free(vm_bo) while holding the
GEMs gpuva mutex. This is a problem if ops->vm_bo_free(vm_bo) performs
any operations that are not safe in the fence signalling critical path,
and it turns out that Panthor (the only current user of the method)
calls drm_gem_shmem_unpin() which takes a resv lock internally.

This constitutes both a violation of signalling safety and lock
inversion. To fix this, we modify the method to internally take the GEMs
gpuva mutex so that the mutex can be unlocked before freeing the
preallocated vm_bo.

Note that this modification introduces a requirement that the driver
uses immediate mode to call drm_gpuvm_bo_obtain_prealloc() as it would
otherwise take the wrong lock.

Fixes: 63e919a31625 ("panthor: use drm_gpuva_unlink_defer()")
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Alice Ryhl <aliceryhl@google.com>
Link: https://patch.msgid.link/20260108-gpuvm-rust-v2-1-dbd014005a0b@google.com
Signed-off-by: Danilo Krummrich <dakr@kernel.org>

authored by

Alice Ryhl and committed by
Danilo Krummrich
9ce4aef9 7f6721b7

+51 -34
+51 -24
drivers/gpu/drm/drm_gpuvm.c
··· 1602 1602 } 1603 1603 EXPORT_SYMBOL_GPL(drm_gpuvm_bo_create); 1604 1604 1605 + /* 1606 + * drm_gpuvm_bo_destroy_not_in_lists() - final part of drm_gpuvm_bo cleanup 1607 + * @vm_bo: the &drm_gpuvm_bo to destroy 1608 + * 1609 + * It is illegal to call this method if the @vm_bo is present in the GEMs gpuva 1610 + * list, the extobj list, or the evicted list. 1611 + * 1612 + * Note that this puts a refcount on the GEM object, which may destroy the GEM 1613 + * object if the refcount reaches zero. It's illegal for this to happen if the 1614 + * caller holds the GEMs gpuva mutex because it would free the mutex. 1615 + */ 1605 1616 static void 1606 - drm_gpuvm_bo_destroy(struct kref *kref) 1617 + drm_gpuvm_bo_destroy_not_in_lists(struct drm_gpuvm_bo *vm_bo) 1607 1618 { 1608 - struct drm_gpuvm_bo *vm_bo = container_of(kref, struct drm_gpuvm_bo, 1609 - kref); 1610 1619 struct drm_gpuvm *gpuvm = vm_bo->vm; 1611 1620 const struct drm_gpuvm_ops *ops = gpuvm->ops; 1612 1621 struct drm_gem_object *obj = vm_bo->obj; 1613 - bool lock = !drm_gpuvm_resv_protected(gpuvm); 1614 - 1615 - if (!lock) 1616 - drm_gpuvm_resv_assert_held(gpuvm); 1617 - 1618 - drm_gpuvm_bo_list_del(vm_bo, extobj, lock); 1619 - drm_gpuvm_bo_list_del(vm_bo, evict, lock); 1620 - 1621 - drm_gem_gpuva_assert_lock_held(gpuvm, obj); 1622 - list_del(&vm_bo->list.entry.gem); 1623 1622 1624 1623 if (ops && ops->vm_bo_free) 1625 1624 ops->vm_bo_free(vm_bo); ··· 1627 1628 1628 1629 drm_gpuvm_put(gpuvm); 1629 1630 drm_gem_object_put(obj); 1631 + } 1632 + 1633 + static void 1634 + drm_gpuvm_bo_destroy_not_in_lists_kref(struct kref *kref) 1635 + { 1636 + struct drm_gpuvm_bo *vm_bo = container_of(kref, struct drm_gpuvm_bo, 1637 + kref); 1638 + 1639 + drm_gpuvm_bo_destroy_not_in_lists(vm_bo); 1640 + } 1641 + 1642 + static void 1643 + drm_gpuvm_bo_destroy(struct kref *kref) 1644 + { 1645 + struct drm_gpuvm_bo *vm_bo = container_of(kref, struct drm_gpuvm_bo, 1646 + kref); 1647 + struct drm_gpuvm *gpuvm = vm_bo->vm; 1648 + bool lock = !drm_gpuvm_resv_protected(gpuvm); 1649 + 1650 + if (!lock) 1651 + drm_gpuvm_resv_assert_held(gpuvm); 1652 + 1653 + drm_gpuvm_bo_list_del(vm_bo, extobj, lock); 1654 + drm_gpuvm_bo_list_del(vm_bo, evict, lock); 1655 + 1656 + drm_gem_gpuva_assert_lock_held(gpuvm, vm_bo->obj); 1657 + list_del(&vm_bo->list.entry.gem); 1658 + 1659 + drm_gpuvm_bo_destroy_not_in_lists(vm_bo); 1630 1660 } 1631 1661 1632 1662 /** ··· 1773 1745 void 1774 1746 drm_gpuvm_bo_deferred_cleanup(struct drm_gpuvm *gpuvm) 1775 1747 { 1776 - const struct drm_gpuvm_ops *ops = gpuvm->ops; 1777 1748 struct drm_gpuvm_bo *vm_bo; 1778 - struct drm_gem_object *obj; 1779 1749 struct llist_node *bo_defer; 1780 1750 1781 1751 bo_defer = llist_del_all(&gpuvm->bo_defer); ··· 1792 1766 while (bo_defer) { 1793 1767 vm_bo = llist_entry(bo_defer, struct drm_gpuvm_bo, list.entry.bo_defer); 1794 1768 bo_defer = bo_defer->next; 1795 - obj = vm_bo->obj; 1796 - if (ops && ops->vm_bo_free) 1797 - ops->vm_bo_free(vm_bo); 1798 - else 1799 - kfree(vm_bo); 1800 - 1801 - drm_gpuvm_put(gpuvm); 1802 - drm_gem_object_put(obj); 1769 + drm_gpuvm_bo_destroy_not_in_lists(vm_bo); 1803 1770 } 1804 1771 } 1805 1772 EXPORT_SYMBOL_GPL(drm_gpuvm_bo_deferred_cleanup); ··· 1880 1861 * count is decreased. If not found @__vm_bo is returned without further 1881 1862 * increase of the reference count. 1882 1863 * 1864 + * The provided @__vm_bo must not already be in the gpuva, evict, or extobj 1865 + * lists prior to calling this method. 1866 + * 1883 1867 * A new &drm_gpuvm_bo is added to the GEMs gpuva list. 1884 1868 * 1885 1869 * Returns: a pointer to the found &drm_gpuvm_bo or @__vm_bo if no existing ··· 1895 1873 struct drm_gem_object *obj = __vm_bo->obj; 1896 1874 struct drm_gpuvm_bo *vm_bo; 1897 1875 1876 + drm_WARN_ON(gpuvm->drm, !drm_gpuvm_immediate_mode(gpuvm)); 1877 + 1878 + mutex_lock(&obj->gpuva.lock); 1898 1879 vm_bo = drm_gpuvm_bo_find(gpuvm, obj); 1899 1880 if (vm_bo) { 1900 - drm_gpuvm_bo_put(__vm_bo); 1881 + mutex_unlock(&obj->gpuva.lock); 1882 + kref_put(&__vm_bo->kref, drm_gpuvm_bo_destroy_not_in_lists_kref); 1901 1883 return vm_bo; 1902 1884 } 1903 1885 1904 1886 drm_gem_gpuva_assert_lock_held(gpuvm, obj); 1905 1887 list_add_tail(&__vm_bo->list.entry.gem, &obj->gpuva.list); 1888 + mutex_unlock(&obj->gpuva.lock); 1906 1889 1907 1890 return __vm_bo; 1908 1891 }
-10
drivers/gpu/drm/panthor/panthor_mmu.c
··· 1252 1252 goto err_cleanup; 1253 1253 } 1254 1254 1255 - /* drm_gpuvm_bo_obtain_prealloc() will call drm_gpuvm_bo_put() on our 1256 - * pre-allocated BO if the <BO,VM> association exists. Given we 1257 - * only have one ref on preallocated_vm_bo, drm_gpuvm_bo_destroy() will 1258 - * be called immediately, and we have to hold the VM resv lock when 1259 - * calling this function. 1260 - */ 1261 - dma_resv_lock(panthor_vm_resv(vm), NULL); 1262 - mutex_lock(&bo->base.base.gpuva.lock); 1263 1255 op_ctx->map.vm_bo = drm_gpuvm_bo_obtain_prealloc(preallocated_vm_bo); 1264 - mutex_unlock(&bo->base.base.gpuva.lock); 1265 - dma_resv_unlock(panthor_vm_resv(vm)); 1266 1256 1267 1257 op_ctx->map.bo_offset = offset; 1268 1258