Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v4.19-rc3 670 lines 16 kB view raw
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * Generic netlink support functions to configure an SMC-R PNET table 6 * 7 * Copyright IBM Corp. 2016 8 * 9 * Author(s): Thomas Richter <tmricht@linux.vnet.ibm.com> 10 */ 11 12#include <linux/module.h> 13#include <linux/list.h> 14#include <linux/ctype.h> 15#include <net/netlink.h> 16#include <net/genetlink.h> 17 18#include <uapi/linux/if.h> 19#include <uapi/linux/smc.h> 20 21#include <rdma/ib_verbs.h> 22 23#include "smc_pnet.h" 24#include "smc_ib.h" 25#include "smc_ism.h" 26 27static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = { 28 [SMC_PNETID_NAME] = { 29 .type = NLA_NUL_STRING, 30 .len = SMC_MAX_PNETID_LEN - 1 31 }, 32 [SMC_PNETID_ETHNAME] = { 33 .type = NLA_NUL_STRING, 34 .len = IFNAMSIZ - 1 35 }, 36 [SMC_PNETID_IBNAME] = { 37 .type = NLA_NUL_STRING, 38 .len = IB_DEVICE_NAME_MAX - 1 39 }, 40 [SMC_PNETID_IBPORT] = { .type = NLA_U8 } 41}; 42 43static struct genl_family smc_pnet_nl_family; 44 45/** 46 * struct smc_pnettable - SMC PNET table anchor 47 * @lock: Lock for list action 48 * @pnetlist: List of PNETIDs 49 */ 50static struct smc_pnettable { 51 rwlock_t lock; 52 struct list_head pnetlist; 53} smc_pnettable = { 54 .pnetlist = LIST_HEAD_INIT(smc_pnettable.pnetlist), 55 .lock = __RW_LOCK_UNLOCKED(smc_pnettable.lock) 56}; 57 58/** 59 * struct smc_pnetentry - pnet identifier name entry 60 * @list: List node. 61 * @pnet_name: Pnet identifier name 62 * @ndev: pointer to network device. 63 * @smcibdev: Pointer to IB device. 64 */ 65struct smc_pnetentry { 66 struct list_head list; 67 char pnet_name[SMC_MAX_PNETID_LEN + 1]; 68 struct net_device *ndev; 69 struct smc_ib_device *smcibdev; 70 u8 ib_port; 71}; 72 73/* Check if two RDMA device entries are identical. Use device name and port 74 * number for comparison. 75 */ 76static bool smc_pnet_same_ibname(struct smc_pnetentry *pnetelem, char *ibname, 77 u8 ibport) 78{ 79 return pnetelem->ib_port == ibport && 80 !strncmp(pnetelem->smcibdev->ibdev->name, ibname, 81 sizeof(pnetelem->smcibdev->ibdev->name)); 82} 83 84/* Find a pnetid in the pnet table. 85 */ 86static struct smc_pnetentry *smc_pnet_find_pnetid(char *pnet_name) 87{ 88 struct smc_pnetentry *pnetelem, *found_pnetelem = NULL; 89 90 read_lock(&smc_pnettable.lock); 91 list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) { 92 if (!strncmp(pnetelem->pnet_name, pnet_name, 93 sizeof(pnetelem->pnet_name))) { 94 found_pnetelem = pnetelem; 95 break; 96 } 97 } 98 read_unlock(&smc_pnettable.lock); 99 return found_pnetelem; 100} 101 102/* Remove a pnetid from the pnet table. 103 */ 104static int smc_pnet_remove_by_pnetid(char *pnet_name) 105{ 106 struct smc_pnetentry *pnetelem, *tmp_pe; 107 int rc = -ENOENT; 108 109 write_lock(&smc_pnettable.lock); 110 list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist, 111 list) { 112 if (!strncmp(pnetelem->pnet_name, pnet_name, 113 sizeof(pnetelem->pnet_name))) { 114 list_del(&pnetelem->list); 115 dev_put(pnetelem->ndev); 116 kfree(pnetelem); 117 rc = 0; 118 break; 119 } 120 } 121 write_unlock(&smc_pnettable.lock); 122 return rc; 123} 124 125/* Remove a pnet entry mentioning a given network device from the pnet table. 126 */ 127static int smc_pnet_remove_by_ndev(struct net_device *ndev) 128{ 129 struct smc_pnetentry *pnetelem, *tmp_pe; 130 int rc = -ENOENT; 131 132 write_lock(&smc_pnettable.lock); 133 list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist, 134 list) { 135 if (pnetelem->ndev == ndev) { 136 list_del(&pnetelem->list); 137 dev_put(pnetelem->ndev); 138 kfree(pnetelem); 139 rc = 0; 140 break; 141 } 142 } 143 write_unlock(&smc_pnettable.lock); 144 return rc; 145} 146 147/* Remove a pnet entry mentioning a given ib device from the pnet table. 148 */ 149int smc_pnet_remove_by_ibdev(struct smc_ib_device *ibdev) 150{ 151 struct smc_pnetentry *pnetelem, *tmp_pe; 152 int rc = -ENOENT; 153 154 write_lock(&smc_pnettable.lock); 155 list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist, 156 list) { 157 if (pnetelem->smcibdev == ibdev) { 158 list_del(&pnetelem->list); 159 dev_put(pnetelem->ndev); 160 kfree(pnetelem); 161 rc = 0; 162 break; 163 } 164 } 165 write_unlock(&smc_pnettable.lock); 166 return rc; 167} 168 169/* Append a pnetid to the end of the pnet table if not already on this list. 170 */ 171static int smc_pnet_enter(struct smc_pnetentry *new_pnetelem) 172{ 173 struct smc_pnetentry *pnetelem; 174 int rc = -EEXIST; 175 176 write_lock(&smc_pnettable.lock); 177 list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) { 178 if (!strncmp(pnetelem->pnet_name, new_pnetelem->pnet_name, 179 sizeof(new_pnetelem->pnet_name)) || 180 !strncmp(pnetelem->ndev->name, new_pnetelem->ndev->name, 181 sizeof(new_pnetelem->ndev->name)) || 182 smc_pnet_same_ibname(pnetelem, 183 new_pnetelem->smcibdev->ibdev->name, 184 new_pnetelem->ib_port)) { 185 dev_put(pnetelem->ndev); 186 goto found; 187 } 188 } 189 list_add_tail(&new_pnetelem->list, &smc_pnettable.pnetlist); 190 rc = 0; 191found: 192 write_unlock(&smc_pnettable.lock); 193 return rc; 194} 195 196/* The limit for pnetid is 16 characters. 197 * Valid characters should be (single-byte character set) a-z, A-Z, 0-9. 198 * Lower case letters are converted to upper case. 199 * Interior blanks should not be used. 200 */ 201static bool smc_pnetid_valid(const char *pnet_name, char *pnetid) 202{ 203 char *bf = skip_spaces(pnet_name); 204 size_t len = strlen(bf); 205 char *end = bf + len; 206 207 if (!len) 208 return false; 209 while (--end >= bf && isspace(*end)) 210 ; 211 if (end - bf >= SMC_MAX_PNETID_LEN) 212 return false; 213 while (bf <= end) { 214 if (!isalnum(*bf)) 215 return false; 216 *pnetid++ = islower(*bf) ? toupper(*bf) : *bf; 217 bf++; 218 } 219 *pnetid = '\0'; 220 return true; 221} 222 223/* Find an infiniband device by a given name. The device might not exist. */ 224static struct smc_ib_device *smc_pnet_find_ib(char *ib_name) 225{ 226 struct smc_ib_device *ibdev; 227 228 spin_lock(&smc_ib_devices.lock); 229 list_for_each_entry(ibdev, &smc_ib_devices.list, list) { 230 if (!strncmp(ibdev->ibdev->name, ib_name, 231 sizeof(ibdev->ibdev->name))) { 232 goto out; 233 } 234 } 235 ibdev = NULL; 236out: 237 spin_unlock(&smc_ib_devices.lock); 238 return ibdev; 239} 240 241/* Parse the supplied netlink attributes and fill a pnetentry structure. 242 * For ethernet and infiniband device names verify that the devices exist. 243 */ 244static int smc_pnet_fill_entry(struct net *net, struct smc_pnetentry *pnetelem, 245 struct nlattr *tb[]) 246{ 247 char *string, *ibname; 248 int rc; 249 250 memset(pnetelem, 0, sizeof(*pnetelem)); 251 INIT_LIST_HEAD(&pnetelem->list); 252 253 rc = -EINVAL; 254 if (!tb[SMC_PNETID_NAME]) 255 goto error; 256 string = (char *)nla_data(tb[SMC_PNETID_NAME]); 257 if (!smc_pnetid_valid(string, pnetelem->pnet_name)) 258 goto error; 259 260 rc = -EINVAL; 261 if (!tb[SMC_PNETID_ETHNAME]) 262 goto error; 263 rc = -ENOENT; 264 string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]); 265 pnetelem->ndev = dev_get_by_name(net, string); 266 if (!pnetelem->ndev) 267 goto error; 268 269 rc = -EINVAL; 270 if (!tb[SMC_PNETID_IBNAME]) 271 goto error; 272 rc = -ENOENT; 273 ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]); 274 ibname = strim(ibname); 275 pnetelem->smcibdev = smc_pnet_find_ib(ibname); 276 if (!pnetelem->smcibdev) 277 goto error; 278 279 rc = -EINVAL; 280 if (!tb[SMC_PNETID_IBPORT]) 281 goto error; 282 pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]); 283 if (pnetelem->ib_port < 1 || pnetelem->ib_port > SMC_MAX_PORTS) 284 goto error; 285 286 return 0; 287 288error: 289 if (pnetelem->ndev) 290 dev_put(pnetelem->ndev); 291 return rc; 292} 293 294/* Convert an smc_pnetentry to a netlink attribute sequence */ 295static int smc_pnet_set_nla(struct sk_buff *msg, struct smc_pnetentry *pnetelem) 296{ 297 if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name) || 298 nla_put_string(msg, SMC_PNETID_ETHNAME, pnetelem->ndev->name) || 299 nla_put_string(msg, SMC_PNETID_IBNAME, 300 pnetelem->smcibdev->ibdev->name) || 301 nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port)) 302 return -1; 303 return 0; 304} 305 306/* Retrieve one PNETID entry */ 307static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info) 308{ 309 struct smc_pnetentry *pnetelem; 310 struct sk_buff *msg; 311 void *hdr; 312 int rc; 313 314 if (!info->attrs[SMC_PNETID_NAME]) 315 return -EINVAL; 316 pnetelem = smc_pnet_find_pnetid( 317 (char *)nla_data(info->attrs[SMC_PNETID_NAME])); 318 if (!pnetelem) 319 return -ENOENT; 320 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 321 if (!msg) 322 return -ENOMEM; 323 324 hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, 325 &smc_pnet_nl_family, 0, SMC_PNETID_GET); 326 if (!hdr) { 327 rc = -EMSGSIZE; 328 goto err_out; 329 } 330 331 if (smc_pnet_set_nla(msg, pnetelem)) { 332 rc = -ENOBUFS; 333 goto err_out; 334 } 335 336 genlmsg_end(msg, hdr); 337 return genlmsg_reply(msg, info); 338 339err_out: 340 nlmsg_free(msg); 341 return rc; 342} 343 344static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info) 345{ 346 struct net *net = genl_info_net(info); 347 struct smc_pnetentry *pnetelem; 348 int rc; 349 350 pnetelem = kzalloc(sizeof(*pnetelem), GFP_KERNEL); 351 if (!pnetelem) 352 return -ENOMEM; 353 rc = smc_pnet_fill_entry(net, pnetelem, info->attrs); 354 if (!rc) 355 rc = smc_pnet_enter(pnetelem); 356 if (rc) { 357 kfree(pnetelem); 358 return rc; 359 } 360 return rc; 361} 362 363static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info) 364{ 365 if (!info->attrs[SMC_PNETID_NAME]) 366 return -EINVAL; 367 return smc_pnet_remove_by_pnetid( 368 (char *)nla_data(info->attrs[SMC_PNETID_NAME])); 369} 370 371static int smc_pnet_dump_start(struct netlink_callback *cb) 372{ 373 cb->args[0] = 0; 374 return 0; 375} 376 377static int smc_pnet_dumpinfo(struct sk_buff *skb, 378 u32 portid, u32 seq, u32 flags, 379 struct smc_pnetentry *pnetelem) 380{ 381 void *hdr; 382 383 hdr = genlmsg_put(skb, portid, seq, &smc_pnet_nl_family, 384 flags, SMC_PNETID_GET); 385 if (!hdr) 386 return -ENOMEM; 387 if (smc_pnet_set_nla(skb, pnetelem) < 0) { 388 genlmsg_cancel(skb, hdr); 389 return -EMSGSIZE; 390 } 391 genlmsg_end(skb, hdr); 392 return 0; 393} 394 395static int smc_pnet_dump(struct sk_buff *skb, struct netlink_callback *cb) 396{ 397 struct smc_pnetentry *pnetelem; 398 int idx = 0; 399 400 read_lock(&smc_pnettable.lock); 401 list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) { 402 if (idx++ < cb->args[0]) 403 continue; 404 if (smc_pnet_dumpinfo(skb, NETLINK_CB(cb->skb).portid, 405 cb->nlh->nlmsg_seq, NLM_F_MULTI, 406 pnetelem)) { 407 --idx; 408 break; 409 } 410 } 411 cb->args[0] = idx; 412 read_unlock(&smc_pnettable.lock); 413 return skb->len; 414} 415 416/* Remove and delete all pnetids from pnet table. 417 */ 418static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info) 419{ 420 struct smc_pnetentry *pnetelem, *tmp_pe; 421 422 write_lock(&smc_pnettable.lock); 423 list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist, 424 list) { 425 list_del(&pnetelem->list); 426 dev_put(pnetelem->ndev); 427 kfree(pnetelem); 428 } 429 write_unlock(&smc_pnettable.lock); 430 return 0; 431} 432 433/* SMC_PNETID generic netlink operation definition */ 434static const struct genl_ops smc_pnet_ops[] = { 435 { 436 .cmd = SMC_PNETID_GET, 437 .flags = GENL_ADMIN_PERM, 438 .policy = smc_pnet_policy, 439 .doit = smc_pnet_get, 440 .dumpit = smc_pnet_dump, 441 .start = smc_pnet_dump_start 442 }, 443 { 444 .cmd = SMC_PNETID_ADD, 445 .flags = GENL_ADMIN_PERM, 446 .policy = smc_pnet_policy, 447 .doit = smc_pnet_add 448 }, 449 { 450 .cmd = SMC_PNETID_DEL, 451 .flags = GENL_ADMIN_PERM, 452 .policy = smc_pnet_policy, 453 .doit = smc_pnet_del 454 }, 455 { 456 .cmd = SMC_PNETID_FLUSH, 457 .flags = GENL_ADMIN_PERM, 458 .policy = smc_pnet_policy, 459 .doit = smc_pnet_flush 460 } 461}; 462 463/* SMC_PNETID family definition */ 464static struct genl_family smc_pnet_nl_family = { 465 .hdrsize = 0, 466 .name = SMCR_GENL_FAMILY_NAME, 467 .version = SMCR_GENL_FAMILY_VERSION, 468 .maxattr = SMC_PNETID_MAX, 469 .netnsok = true, 470 .module = THIS_MODULE, 471 .ops = smc_pnet_ops, 472 .n_ops = ARRAY_SIZE(smc_pnet_ops) 473}; 474 475static int smc_pnet_netdev_event(struct notifier_block *this, 476 unsigned long event, void *ptr) 477{ 478 struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); 479 480 switch (event) { 481 case NETDEV_REBOOT: 482 case NETDEV_UNREGISTER: 483 smc_pnet_remove_by_ndev(event_dev); 484 return NOTIFY_OK; 485 default: 486 return NOTIFY_DONE; 487 } 488} 489 490static struct notifier_block smc_netdev_notifier = { 491 .notifier_call = smc_pnet_netdev_event 492}; 493 494int __init smc_pnet_init(void) 495{ 496 int rc; 497 498 rc = genl_register_family(&smc_pnet_nl_family); 499 if (rc) 500 return rc; 501 rc = register_netdevice_notifier(&smc_netdev_notifier); 502 if (rc) 503 genl_unregister_family(&smc_pnet_nl_family); 504 return rc; 505} 506 507void smc_pnet_exit(void) 508{ 509 smc_pnet_flush(NULL, NULL); 510 unregister_netdevice_notifier(&smc_netdev_notifier); 511 genl_unregister_family(&smc_pnet_nl_family); 512} 513 514/* Determine one base device for stacked net devices. 515 * If the lower device level contains more than one devices 516 * (for instance with bonding slaves), just the first device 517 * is used to reach a base device. 518 */ 519static struct net_device *pnet_find_base_ndev(struct net_device *ndev) 520{ 521 int i, nest_lvl; 522 523 rtnl_lock(); 524 nest_lvl = dev_get_nest_level(ndev); 525 for (i = 0; i < nest_lvl; i++) { 526 struct list_head *lower = &ndev->adj_list.lower; 527 528 if (list_empty(lower)) 529 break; 530 lower = lower->next; 531 ndev = netdev_lower_get_next(ndev, &lower); 532 } 533 rtnl_unlock(); 534 return ndev; 535} 536 537/* Determine the corresponding IB device port based on the hardware PNETID. 538 * Searching stops at the first matching active IB device port with vlan_id 539 * configured. 540 */ 541static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev, 542 struct smc_ib_device **smcibdev, 543 u8 *ibport, unsigned short vlan_id, 544 u8 gid[]) 545{ 546 u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; 547 struct smc_ib_device *ibdev; 548 int i; 549 550 ndev = pnet_find_base_ndev(ndev); 551 if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, 552 ndev_pnetid)) 553 return; /* pnetid could not be determined */ 554 555 spin_lock(&smc_ib_devices.lock); 556 list_for_each_entry(ibdev, &smc_ib_devices.list, list) { 557 for (i = 1; i <= SMC_MAX_PORTS; i++) { 558 if (!rdma_is_port_valid(ibdev->ibdev, i)) 559 continue; 560 if (!memcmp(ibdev->pnetid[i - 1], ndev_pnetid, 561 SMC_MAX_PNETID_LEN) && 562 smc_ib_port_active(ibdev, i) && 563 !smc_ib_determine_gid(ibdev, i, vlan_id, gid, 564 NULL)) { 565 *smcibdev = ibdev; 566 *ibport = i; 567 goto out; 568 } 569 } 570 } 571out: 572 spin_unlock(&smc_ib_devices.lock); 573} 574 575static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, 576 struct smcd_dev **smcismdev) 577{ 578 u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; 579 struct smcd_dev *ismdev; 580 581 ndev = pnet_find_base_ndev(ndev); 582 if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, 583 ndev_pnetid)) 584 return; /* pnetid could not be determined */ 585 586 spin_lock(&smcd_dev_list.lock); 587 list_for_each_entry(ismdev, &smcd_dev_list.list, list) { 588 if (!memcmp(ismdev->pnetid, ndev_pnetid, SMC_MAX_PNETID_LEN)) { 589 *smcismdev = ismdev; 590 break; 591 } 592 } 593 spin_unlock(&smcd_dev_list.lock); 594} 595 596/* Lookup of coupled ib_device via SMC pnet table */ 597static void smc_pnet_find_roce_by_table(struct net_device *netdev, 598 struct smc_ib_device **smcibdev, 599 u8 *ibport, unsigned short vlan_id, 600 u8 gid[]) 601{ 602 struct smc_pnetentry *pnetelem; 603 604 read_lock(&smc_pnettable.lock); 605 list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) { 606 if (netdev == pnetelem->ndev) { 607 if (smc_ib_port_active(pnetelem->smcibdev, 608 pnetelem->ib_port) && 609 !smc_ib_determine_gid(pnetelem->smcibdev, 610 pnetelem->ib_port, vlan_id, 611 gid, NULL)) { 612 *smcibdev = pnetelem->smcibdev; 613 *ibport = pnetelem->ib_port; 614 } 615 break; 616 } 617 } 618 read_unlock(&smc_pnettable.lock); 619} 620 621/* PNET table analysis for a given sock: 622 * determine ib_device and port belonging to used internal TCP socket 623 * ethernet interface. 624 */ 625void smc_pnet_find_roce_resource(struct sock *sk, 626 struct smc_ib_device **smcibdev, u8 *ibport, 627 unsigned short vlan_id, u8 gid[]) 628{ 629 struct dst_entry *dst = sk_dst_get(sk); 630 631 *smcibdev = NULL; 632 *ibport = 0; 633 634 if (!dst) 635 goto out; 636 if (!dst->dev) 637 goto out_rel; 638 639 /* if possible, lookup via hardware-defined pnetid */ 640 smc_pnet_find_roce_by_pnetid(dst->dev, smcibdev, ibport, vlan_id, gid); 641 if (*smcibdev) 642 goto out_rel; 643 644 /* lookup via SMC PNET table */ 645 smc_pnet_find_roce_by_table(dst->dev, smcibdev, ibport, vlan_id, gid); 646 647out_rel: 648 dst_release(dst); 649out: 650 return; 651} 652 653void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev) 654{ 655 struct dst_entry *dst = sk_dst_get(sk); 656 657 *smcismdev = NULL; 658 if (!dst) 659 goto out; 660 if (!dst->dev) 661 goto out_rel; 662 663 /* if possible, lookup via hardware-defined pnetid */ 664 smc_pnet_find_ism_by_pnetid(dst->dev, smcismdev); 665 666out_rel: 667 dst_release(dst); 668out: 669 return; 670}