Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

RDMA/core: Add RDMA_NLDEV_CMD_NEWLINK/DELLINK support

Add support for new LINK messages to allow adding and deleting rdma
interfaces. This will be used initially for soft rdma drivers which
instantiate device instances dynamically by the admin specifying a netdev
device to use. The rdma_rxe module will be the first user of these
messages.

The design is modeled after RTNL_NEWLINK/DELLINK: rdma drivers register
with the rdma core if they provide link add/delete functions. Each driver
registers with a unique "type" string, that is used to dispatch messages
coming from user space. A new RDMA_NLDEV_ATTR is defined for the "type"
string. User mode will pass 3 attributes in a NEWLINK message:
RDMA_NLDEV_ATTR_DEV_NAME for the desired rdma device name to be created,
RDMA_NLDEV_ATTR_LINK_TYPE for the "type" of link being added, and
RDMA_NLDEV_ATTR_NDEV_NAME for the net_device interface to use for this
link. The DELLINK message will contain the RDMA_NLDEV_ATTR_DEV_INDEX of
the device to delete.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>

authored by

Steve Wise and committed by
Jason Gunthorpe
3856ec4b 5bb3c1e9

+144 -2
+122
drivers/infiniband/core/nldev.c
··· 33 33 #include <linux/module.h> 34 34 #include <linux/pid.h> 35 35 #include <linux/pid_namespace.h> 36 + #include <linux/mutex.h> 36 37 #include <net/netlink.h> 37 38 #include <rdma/rdma_cm.h> 38 39 #include <rdma/rdma_netlink.h> ··· 114 113 [RDMA_NLDEV_ATTR_RES_MRN] = { .type = NLA_U32 }, 115 114 [RDMA_NLDEV_ATTR_RES_CM_IDN] = { .type = NLA_U32 }, 116 115 [RDMA_NLDEV_ATTR_RES_CTXN] = { .type = NLA_U32 }, 116 + [RDMA_NLDEV_ATTR_LINK_TYPE] = { .type = NLA_NUL_STRING, 117 + .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN }, 117 118 }; 118 119 119 120 static int put_driver_name_print_type(struct sk_buff *msg, const char *name, ··· 1203 1200 RES_GET_FUNCS(pd, RDMA_RESTRACK_PD); 1204 1201 RES_GET_FUNCS(mr, RDMA_RESTRACK_MR); 1205 1202 1203 + static LIST_HEAD(link_ops); 1204 + static DECLARE_RWSEM(link_ops_rwsem); 1205 + 1206 + static const struct rdma_link_ops *link_ops_get(const char *type) 1207 + { 1208 + const struct rdma_link_ops *ops; 1209 + 1210 + list_for_each_entry(ops, &link_ops, list) { 1211 + if (!strcmp(ops->type, type)) 1212 + goto out; 1213 + } 1214 + ops = NULL; 1215 + out: 1216 + return ops; 1217 + } 1218 + 1219 + void rdma_link_register(struct rdma_link_ops *ops) 1220 + { 1221 + down_write(&link_ops_rwsem); 1222 + if (link_ops_get(ops->type)) { 1223 + WARN_ONCE("Duplicate rdma_link_ops! %s\n", ops->type); 1224 + goto out; 1225 + } 1226 + list_add(&ops->list, &link_ops); 1227 + out: 1228 + up_write(&link_ops_rwsem); 1229 + } 1230 + EXPORT_SYMBOL(rdma_link_register); 1231 + 1232 + void rdma_link_unregister(struct rdma_link_ops *ops) 1233 + { 1234 + down_write(&link_ops_rwsem); 1235 + list_del(&ops->list); 1236 + up_write(&link_ops_rwsem); 1237 + } 1238 + EXPORT_SYMBOL(rdma_link_unregister); 1239 + 1240 + static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, 1241 + struct netlink_ext_ack *extack) 1242 + { 1243 + struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 1244 + char ibdev_name[IB_DEVICE_NAME_MAX]; 1245 + const struct rdma_link_ops *ops; 1246 + char ndev_name[IFNAMSIZ]; 1247 + struct net_device *ndev; 1248 + char type[IFNAMSIZ]; 1249 + int err; 1250 + 1251 + err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 1252 + nldev_policy, extack); 1253 + if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] || 1254 + !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME]) 1255 + return -EINVAL; 1256 + 1257 + nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME], 1258 + sizeof(ibdev_name)); 1259 + if (strchr(ibdev_name, '%')) 1260 + return -EINVAL; 1261 + 1262 + nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type)); 1263 + nla_strlcpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME], 1264 + sizeof(ndev_name)); 1265 + 1266 + ndev = dev_get_by_name(&init_net, ndev_name); 1267 + if (!ndev) 1268 + return -ENODEV; 1269 + 1270 + down_read(&link_ops_rwsem); 1271 + ops = link_ops_get(type); 1272 + #ifdef CONFIG_MODULES 1273 + if (!ops) { 1274 + up_read(&link_ops_rwsem); 1275 + request_module("rdma-link-%s", type); 1276 + down_read(&link_ops_rwsem); 1277 + ops = link_ops_get(type); 1278 + } 1279 + #endif 1280 + err = ops ? ops->newlink(ibdev_name, ndev) : -EINVAL; 1281 + up_read(&link_ops_rwsem); 1282 + dev_put(ndev); 1283 + 1284 + return err; 1285 + } 1286 + 1287 + static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, 1288 + struct netlink_ext_ack *extack) 1289 + { 1290 + struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 1291 + struct ib_device *device; 1292 + u32 index; 1293 + int err; 1294 + 1295 + err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 1296 + nldev_policy, extack); 1297 + if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) 1298 + return -EINVAL; 1299 + 1300 + index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 1301 + device = ib_device_get_by_index(index); 1302 + if (!device) 1303 + return -EINVAL; 1304 + 1305 + if (!(device->attrs.device_cap_flags & IB_DEVICE_ALLOW_USER_UNREG)) { 1306 + ib_device_put(device); 1307 + return -EINVAL; 1308 + } 1309 + 1310 + ib_unregister_device_and_put(device); 1311 + return 0; 1312 + } 1313 + 1206 1314 static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { 1207 1315 [RDMA_NLDEV_CMD_GET] = { 1208 1316 .doit = nldev_get_doit, ··· 1321 1207 }, 1322 1208 [RDMA_NLDEV_CMD_SET] = { 1323 1209 .doit = nldev_set_doit, 1210 + .flags = RDMA_NL_ADMIN_PERM, 1211 + }, 1212 + [RDMA_NLDEV_CMD_NEWLINK] = { 1213 + .doit = nldev_newlink, 1214 + .flags = RDMA_NL_ADMIN_PERM, 1215 + }, 1216 + [RDMA_NLDEV_CMD_DELLINK] = { 1217 + .doit = nldev_dellink, 1324 1218 .flags = RDMA_NL_ADMIN_PERM, 1325 1219 }, 1326 1220 [RDMA_NLDEV_CMD_PORT_GET] = {
+3
include/rdma/ib_verbs.h
··· 238 238 IB_DEVICE_RDMA_NETDEV_OPA_VNIC = (1ULL << 35), 239 239 /* The device supports padding incoming writes to cacheline. */ 240 240 IB_DEVICE_PCI_WRITE_END_PADDING = (1ULL << 36), 241 + IB_DEVICE_ALLOW_USER_UNREG = (1ULL << 37), 241 242 }; 242 243 243 244 enum ib_signature_prot_cap { ··· 2623 2622 refcount_t refcount; 2624 2623 struct completion unreg_completion; 2625 2624 struct work_struct unregistration_work; 2625 + 2626 + const struct rdma_link_ops *link_ops; 2626 2627 }; 2627 2628 2628 2629 struct ib_client {
+11
include/rdma/rdma_netlink.h
··· 99 99 * Returns true on success or false if no listeners. 100 100 */ 101 101 bool rdma_nl_chk_listeners(unsigned int group); 102 + 103 + struct rdma_link_ops { 104 + struct list_head list; 105 + const char *type; 106 + int (*newlink)(const char *ibdev_name, struct net_device *ndev); 107 + }; 108 + 109 + void rdma_link_register(struct rdma_link_ops *ops); 110 + void rdma_link_unregister(struct rdma_link_ops *ops); 111 + 112 + #define MODULE_ALIAS_RDMA_LINK(type) MODULE_ALIAS("rdma-link-" type) 102 113 #endif /* _RDMA_NETLINK_H */
+8 -2
include/uapi/rdma/rdma_netlink.h
··· 255 255 RDMA_NLDEV_CMD_GET, /* can dump */ 256 256 RDMA_NLDEV_CMD_SET, 257 257 258 - /* 3 - 4 are free to use */ 258 + RDMA_NLDEV_CMD_NEWLINK, 259 259 260 - RDMA_NLDEV_CMD_PORT_GET = 5, /* can dump */ 260 + RDMA_NLDEV_CMD_DELLINK, 261 + 262 + RDMA_NLDEV_CMD_PORT_GET, /* can dump */ 261 263 262 264 /* 6 - 8 are free to use */ 263 265 ··· 467 465 RDMA_NLDEV_ATTR_RES_MRN, /* u32 */ 468 466 RDMA_NLDEV_ATTR_RES_CM_IDN, /* u32 */ 469 467 RDMA_NLDEV_ATTR_RES_CTXN, /* u32 */ 468 + /* 469 + * Identifies the rdma driver. eg: "rxe" or "siw" 470 + */ 471 + RDMA_NLDEV_ATTR_LINK_TYPE, /* string */ 470 472 471 473 /* 472 474 * Always the end