Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

RDMA/core: Introduce a DMAH object and its alloc/free APIs

Introduce a new DMA handle (DMAH) object along with its corresponding
allocation and deallocation APIs.

This DMAH object encapsulates attributes intended for use in DMA
transactions.

While its initial purpose is to support TPH functionality, it is
designed to be extensible for future features such as DMA PCI multipath,
PCI UIO configurations, PCI traffic class selection, and more.

Further details:
----------------
We ensure that a caller requesting a DMA handle for a specific CPU ID is
permitted to be scheduled on it. This prevent a potential security issue
where a non privilege user may trigger DMA operations toward a CPU that
it's not allowed to run on.

We manage reference counting for the DMAH object and its consumers
(e.g., memory regions) as will be detailed in subsequent patches in the
series.

Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Reviewed-by: Edward Srouji <edwards@nvidia.com>
Link: https://patch.msgid.link/2cad097e849597e49d6b61e6865dba878257f371.1752752567.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>

authored by

Yishai Hadas and committed by
Leon Romanovsky
d83edab5 5b2e4504

+200
+1
drivers/infiniband/core/Makefile
··· 33 33 ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \ 34 34 rdma_core.o uverbs_std_types.o uverbs_ioctl.o \ 35 35 uverbs_std_types_cq.o \ 36 + uverbs_std_types_dmah.o \ 36 37 uverbs_std_types_flow_action.o uverbs_std_types_dm.o \ 37 38 uverbs_std_types_mr.o uverbs_std_types_counters.o \ 38 39 uverbs_uapi.o uverbs_std_types_device.o \
+3
drivers/infiniband/core/device.c
··· 2708 2708 SET_DEVICE_OP(dev_ops, add_sub_dev); 2709 2709 SET_DEVICE_OP(dev_ops, advise_mr); 2710 2710 SET_DEVICE_OP(dev_ops, alloc_dm); 2711 + SET_DEVICE_OP(dev_ops, alloc_dmah); 2711 2712 SET_DEVICE_OP(dev_ops, alloc_hw_device_stats); 2712 2713 SET_DEVICE_OP(dev_ops, alloc_hw_port_stats); 2713 2714 SET_DEVICE_OP(dev_ops, alloc_mr); ··· 2737 2736 SET_DEVICE_OP(dev_ops, create_user_ah); 2738 2737 SET_DEVICE_OP(dev_ops, create_wq); 2739 2738 SET_DEVICE_OP(dev_ops, dealloc_dm); 2739 + SET_DEVICE_OP(dev_ops, dealloc_dmah); 2740 2740 SET_DEVICE_OP(dev_ops, dealloc_driver); 2741 2741 SET_DEVICE_OP(dev_ops, dealloc_mw); 2742 2742 SET_DEVICE_OP(dev_ops, dealloc_pd); ··· 2835 2833 SET_OBJ_SIZE(dev_ops, ib_ah); 2836 2834 SET_OBJ_SIZE(dev_ops, ib_counters); 2837 2835 SET_OBJ_SIZE(dev_ops, ib_cq); 2836 + SET_OBJ_SIZE(dev_ops, ib_dmah); 2838 2837 SET_OBJ_SIZE(dev_ops, ib_mw); 2839 2838 SET_OBJ_SIZE(dev_ops, ib_pd); 2840 2839 SET_OBJ_SIZE(dev_ops, ib_qp);
+1
drivers/infiniband/core/rdma_core.h
··· 156 156 extern const struct uapi_definition uverbs_def_obj_cq[]; 157 157 extern const struct uapi_definition uverbs_def_obj_device[]; 158 158 extern const struct uapi_definition uverbs_def_obj_dm[]; 159 + extern const struct uapi_definition uverbs_def_obj_dmah[]; 159 160 extern const struct uapi_definition uverbs_def_obj_flow_action[]; 160 161 extern const struct uapi_definition uverbs_def_obj_intf[]; 161 162 extern const struct uapi_definition uverbs_def_obj_mr[];
+2
drivers/infiniband/core/restrack.c
··· 100 100 return container_of(res, struct rdma_counter, res)->device; 101 101 case RDMA_RESTRACK_SRQ: 102 102 return container_of(res, struct ib_srq, res)->device; 103 + case RDMA_RESTRACK_DMAH: 104 + return container_of(res, struct ib_dmah, res)->device; 103 105 default: 104 106 WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type); 105 107 return NULL;
+145
drivers/infiniband/core/uverbs_std_types_dmah.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 + /* 3 + * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved 4 + */ 5 + 6 + #include "rdma_core.h" 7 + #include "uverbs.h" 8 + #include <rdma/uverbs_std_types.h> 9 + #include "restrack.h" 10 + 11 + static int uverbs_free_dmah(struct ib_uobject *uobject, 12 + enum rdma_remove_reason why, 13 + struct uverbs_attr_bundle *attrs) 14 + { 15 + struct ib_dmah *dmah = uobject->object; 16 + int ret; 17 + 18 + if (atomic_read(&dmah->usecnt)) 19 + return -EBUSY; 20 + 21 + ret = dmah->device->ops.dealloc_dmah(dmah, attrs); 22 + if (ret) 23 + return ret; 24 + 25 + rdma_restrack_del(&dmah->res); 26 + kfree(dmah); 27 + return 0; 28 + } 29 + 30 + static int UVERBS_HANDLER(UVERBS_METHOD_DMAH_ALLOC)( 31 + struct uverbs_attr_bundle *attrs) 32 + { 33 + struct ib_uobject *uobj = 34 + uverbs_attr_get(attrs, UVERBS_ATTR_ALLOC_DMAH_HANDLE) 35 + ->obj_attr.uobject; 36 + struct ib_device *ib_dev = attrs->context->device; 37 + struct ib_dmah *dmah; 38 + int ret; 39 + 40 + dmah = rdma_zalloc_drv_obj(ib_dev, ib_dmah); 41 + if (!dmah) 42 + return -ENOMEM; 43 + 44 + if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_ALLOC_DMAH_CPU_ID)) { 45 + ret = uverbs_copy_from(&dmah->cpu_id, attrs, 46 + UVERBS_ATTR_ALLOC_DMAH_CPU_ID); 47 + if (ret) 48 + goto err; 49 + 50 + if (!cpumask_test_cpu(dmah->cpu_id, current->cpus_ptr)) { 51 + ret = -EPERM; 52 + goto err; 53 + } 54 + 55 + dmah->valid_fields |= BIT(IB_DMAH_CPU_ID_EXISTS); 56 + } 57 + 58 + if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_ALLOC_DMAH_TPH_MEM_TYPE)) { 59 + dmah->mem_type = uverbs_attr_get_enum_id(attrs, 60 + UVERBS_ATTR_ALLOC_DMAH_TPH_MEM_TYPE); 61 + dmah->valid_fields |= BIT(IB_DMAH_MEM_TYPE_EXISTS); 62 + } 63 + 64 + if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_ALLOC_DMAH_PH)) { 65 + ret = uverbs_copy_from(&dmah->ph, attrs, 66 + UVERBS_ATTR_ALLOC_DMAH_PH); 67 + if (ret) 68 + goto err; 69 + 70 + /* Per PCIe spec 6.2-1.0, only the lowest two bits are applicable */ 71 + if (dmah->ph & 0xFC) { 72 + ret = -EINVAL; 73 + goto err; 74 + } 75 + 76 + dmah->valid_fields |= BIT(IB_DMAH_PH_EXISTS); 77 + } 78 + 79 + dmah->device = ib_dev; 80 + dmah->uobject = uobj; 81 + atomic_set(&dmah->usecnt, 0); 82 + 83 + rdma_restrack_new(&dmah->res, RDMA_RESTRACK_DMAH); 84 + rdma_restrack_set_name(&dmah->res, NULL); 85 + 86 + ret = ib_dev->ops.alloc_dmah(dmah, attrs); 87 + if (ret) { 88 + rdma_restrack_put(&dmah->res); 89 + goto err; 90 + } 91 + 92 + uobj->object = dmah; 93 + rdma_restrack_add(&dmah->res); 94 + uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_ALLOC_DMAH_HANDLE); 95 + return 0; 96 + err: 97 + kfree(dmah); 98 + return ret; 99 + } 100 + 101 + static const struct uverbs_attr_spec uverbs_dmah_mem_type[] = { 102 + [TPH_MEM_TYPE_VM] = { 103 + .type = UVERBS_ATTR_TYPE_PTR_IN, 104 + UVERBS_ATTR_NO_DATA(), 105 + }, 106 + [TPH_MEM_TYPE_PM] = { 107 + .type = UVERBS_ATTR_TYPE_PTR_IN, 108 + UVERBS_ATTR_NO_DATA(), 109 + }, 110 + }; 111 + 112 + DECLARE_UVERBS_NAMED_METHOD( 113 + UVERBS_METHOD_DMAH_ALLOC, 114 + UVERBS_ATTR_IDR(UVERBS_ATTR_ALLOC_DMAH_HANDLE, 115 + UVERBS_OBJECT_DMAH, 116 + UVERBS_ACCESS_NEW, 117 + UA_MANDATORY), 118 + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DMAH_CPU_ID, 119 + UVERBS_ATTR_TYPE(u32), 120 + UA_OPTIONAL), 121 + UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_ALLOC_DMAH_TPH_MEM_TYPE, 122 + uverbs_dmah_mem_type, 123 + UA_OPTIONAL), 124 + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DMAH_PH, 125 + UVERBS_ATTR_TYPE(u8), 126 + UA_OPTIONAL)); 127 + 128 + DECLARE_UVERBS_NAMED_METHOD_DESTROY( 129 + UVERBS_METHOD_DMAH_FREE, 130 + UVERBS_ATTR_IDR(UVERBS_ATTR_FREE_DMA_HANDLE, 131 + UVERBS_OBJECT_DMAH, 132 + UVERBS_ACCESS_DESTROY, 133 + UA_MANDATORY)); 134 + 135 + DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DMAH, 136 + UVERBS_TYPE_ALLOC_IDR(uverbs_free_dmah), 137 + &UVERBS_METHOD(UVERBS_METHOD_DMAH_ALLOC), 138 + &UVERBS_METHOD(UVERBS_METHOD_DMAH_FREE)); 139 + 140 + const struct uapi_definition uverbs_def_obj_dmah[] = { 141 + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DMAH, 142 + UAPI_DEF_OBJ_NEEDS_FN(dealloc_dmah), 143 + UAPI_DEF_OBJ_NEEDS_FN(alloc_dmah)), 144 + {} 145 + };
+1
drivers/infiniband/core/uverbs_uapi.c
··· 631 631 UAPI_DEF_CHAIN(uverbs_def_obj_cq), 632 632 UAPI_DEF_CHAIN(uverbs_def_obj_device), 633 633 UAPI_DEF_CHAIN(uverbs_def_obj_dm), 634 + UAPI_DEF_CHAIN(uverbs_def_obj_dmah), 634 635 UAPI_DEF_CHAIN(uverbs_def_obj_flow_action), 635 636 UAPI_DEF_CHAIN(uverbs_def_obj_intf), 636 637 UAPI_DEF_CHAIN(uverbs_def_obj_mr),
+26
include/rdma/ib_verbs.h
··· 42 42 #include <rdma/signature.h> 43 43 #include <uapi/rdma/rdma_user_ioctl.h> 44 44 #include <uapi/rdma/ib_user_ioctl_verbs.h> 45 + #include <linux/pci-tph.h> 45 46 46 47 #define IB_FW_VERSION_NAME_MAX ETHTOOL_FWVERS_LEN 47 48 ··· 1847 1846 atomic_t usecnt; 1848 1847 }; 1849 1848 1849 + /* bit values to mark existence of ib_dmah fields */ 1850 + enum { 1851 + IB_DMAH_CPU_ID_EXISTS, 1852 + IB_DMAH_MEM_TYPE_EXISTS, 1853 + IB_DMAH_PH_EXISTS, 1854 + }; 1855 + 1856 + struct ib_dmah { 1857 + struct ib_device *device; 1858 + struct ib_uobject *uobject; 1859 + /* 1860 + * Implementation details of the RDMA core, don't use in drivers: 1861 + */ 1862 + struct rdma_restrack_entry res; 1863 + u32 cpu_id; 1864 + enum tph_mem_type mem_type; 1865 + atomic_t usecnt; 1866 + u8 ph; 1867 + u8 valid_fields; /* use IB_DMAH_XXX_EXISTS */ 1868 + }; 1869 + 1850 1870 struct ib_mr { 1851 1871 struct ib_device *device; 1852 1872 struct ib_pd *pd; ··· 2595 2573 struct ib_dm_alloc_attr *attr, 2596 2574 struct uverbs_attr_bundle *attrs); 2597 2575 int (*dealloc_dm)(struct ib_dm *dm, struct uverbs_attr_bundle *attrs); 2576 + int (*alloc_dmah)(struct ib_dmah *ibdmah, 2577 + struct uverbs_attr_bundle *attrs); 2578 + int (*dealloc_dmah)(struct ib_dmah *dmah, struct uverbs_attr_bundle *attrs); 2598 2579 struct ib_mr *(*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm, 2599 2580 struct ib_dm_mr_attr *attr, 2600 2581 struct uverbs_attr_bundle *attrs); ··· 2755 2730 DECLARE_RDMA_OBJ_SIZE(ib_ah); 2756 2731 DECLARE_RDMA_OBJ_SIZE(ib_counters); 2757 2732 DECLARE_RDMA_OBJ_SIZE(ib_cq); 2733 + DECLARE_RDMA_OBJ_SIZE(ib_dmah); 2758 2734 DECLARE_RDMA_OBJ_SIZE(ib_mw); 2759 2735 DECLARE_RDMA_OBJ_SIZE(ib_pd); 2760 2736 DECLARE_RDMA_OBJ_SIZE(ib_qp);
+4
include/rdma/restrack.h
··· 57 57 */ 58 58 RDMA_RESTRACK_SRQ, 59 59 /** 60 + * @RDMA_RESTRACK_DMAH: DMA handle 61 + */ 62 + RDMA_RESTRACK_DMAH, 63 + /** 60 64 * @RDMA_RESTRACK_MAX: Last entry, used for array dclarations 61 65 */ 62 66 RDMA_RESTRACK_MAX
+17
include/uapi/rdma/ib_user_ioctl_cmds.h
··· 55 55 UVERBS_OBJECT_DM, 56 56 UVERBS_OBJECT_COUNTERS, 57 57 UVERBS_OBJECT_ASYNC_EVENT, 58 + UVERBS_OBJECT_DMAH, 58 59 }; 59 60 60 61 enum { ··· 239 238 enum uverbs_methods_dm { 240 239 UVERBS_METHOD_DM_ALLOC, 241 240 UVERBS_METHOD_DM_FREE, 241 + }; 242 + 243 + enum uverbs_attrs_alloc_dmah_cmd_attr_ids { 244 + UVERBS_ATTR_ALLOC_DMAH_HANDLE, 245 + UVERBS_ATTR_ALLOC_DMAH_CPU_ID, 246 + UVERBS_ATTR_ALLOC_DMAH_TPH_MEM_TYPE, 247 + UVERBS_ATTR_ALLOC_DMAH_PH, 248 + }; 249 + 250 + enum uverbs_attrs_free_dmah_cmd_attr_ids { 251 + UVERBS_ATTR_FREE_DMA_HANDLE, 252 + }; 253 + 254 + enum uverbs_methods_dmah { 255 + UVERBS_METHOD_DMAH_ALLOC, 256 + UVERBS_METHOD_DMAH_FREE, 242 257 }; 243 258 244 259 enum uverbs_attrs_reg_dm_mr_cmd_attr_ids {