Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband

Pull infiniband updates from Roland Dreier:
"Main batch of InfiniBand/RDMA changes for 3.14:
- Flow steering for InfiniBand UD traffic
- IP-based addressing for IBoE aka RoCE
- Pass SRP submaintainership from Dave to Bart
- SRP transport fixes from Bart
- Add the new Cisco usNIC low-level device driver
- Various other fixes"

* tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (75 commits)
IB/mlx5: Verify reserved fields are cleared
IB/mlx5: Remove old field for create mkey mailbox
IB/mlx5: Abort driver cleanup if teardown hca fails
IB/mlx5: Allow creation of QPs with zero-length work queues
mlx5_core: Fix PowerPC support
mlx5_core: Improve debugfs readability
IB/mlx5: Add support for resize CQ
IB/mlx5: Implement modify CQ
IB/mlx5: Make sure doorbell record is visible before doorbell
mlx5_core: Use mlx5 core style warning
IB/mlx5: Clear out struct before create QP command
mlx5_core: Fix out arg size in access_register command
RDMA/nes: Slight optimization of Ethernet address compare
IB/qib: Fix QP check when looping back to/from QP1
RDMA/cxgb4: Fix gcc warning on 32-bit arch
IB/usnic: Remove unused includes of <linux/version.h>
RDMA/amso1100: Add check if cache memory was allocated before freeing it
IPoIB: Report operstate consistently when brought up without a link
IB/core: Fix unused variable warning
RDMA/cma: Handle global/non-linklocal IPv6 addresses in cma_check_linklocal()
...

+7678 -582
+7
Documentation/scsi/scsi_transport_srp/Makefile
··· 1 + all: rport_state_diagram.svg rport_state_diagram.png 2 + 3 + rport_state_diagram.svg: rport_state_diagram.dot 4 + dot -Tsvg -o $@ $< 5 + 6 + rport_state_diagram.png: rport_state_diagram.dot 7 + dot -Tpng -o $@ $<
+26
Documentation/scsi/scsi_transport_srp/rport_state_diagram.dot
··· 1 + digraph srp_initiator { 2 + node [shape = doublecircle]; running lost; 3 + node [shape = circle]; 4 + 5 + { 6 + rank = min; 7 + running_rta [ label = "running;\nreconnect\ntimer\nactive" ]; 8 + }; 9 + running [ label = "running;\nreconnect\ntimer\nstopped" ]; 10 + blocked; 11 + failfast [ label = "fail I/O\nfast" ]; 12 + lost; 13 + 14 + running -> running_rta [ label = "fast_io_fail_tmo = off and\ndev_loss_tmo = off;\nsrp_start_tl_fail_timers()" ]; 15 + running_rta -> running [ label = "fast_io_fail_tmo = off and\ndev_loss_tmo = off;\nreconnecting succeeded" ]; 16 + running -> blocked [ label = "fast_io_fail_tmo >= 0 or\ndev_loss_tmo >= 0;\nsrp_start_tl_fail_timers()" ]; 17 + running -> failfast [ label = "fast_io_fail_tmo = off and\ndev_loss_tmo = off;\nreconnecting failed\n" ]; 18 + blocked -> failfast [ label = "fast_io_fail_tmo\nexpired or\nreconnecting\nfailed" ]; 19 + blocked -> lost [ label = "dev_loss_tmo\nexpired or\nsrp_stop_rport_timers()" ]; 20 + failfast -> lost [ label = "dev_loss_tmo\nexpired or\nsrp_stop_rport_timers()" ]; 21 + blocked -> running [ label = "reconnecting\nsucceeded" ]; 22 + failfast -> failfast [ label = "reconnecting\nfailed" ]; 23 + failfast -> running [ label = "reconnecting\nsucceeded" ]; 24 + running -> lost [ label = "srp_stop_rport_timers()" ]; 25 + running_rta -> lost [ label = "srp_stop_rport_timers()" ]; 26 + }
+6 -1
MAINTAINERS
··· 2195 2195 S: Supported 2196 2196 F: drivers/net/ethernet/cisco/enic/ 2197 2197 2198 + CISCO VIC LOW LATENCY NIC DRIVER 2199 + M: Upinder Malhi <umalhi@cisco.com> 2200 + S: Supported 2201 + F: drivers/infiniband/hw/usnic 2202 + 2198 2203 CIRRUS LOGIC EP93XX ETHERNET DRIVER 2199 2204 M: Hartley Sweeten <hsweeten@visionengravers.com> 2200 2205 L: netdev@vger.kernel.org ··· 7533 7528 F: drivers/scsi/sr* 7534 7529 7535 7530 SCSI RDMA PROTOCOL (SRP) INITIATOR 7536 - M: David Dillow <dillowda@ornl.gov> 7531 + M: Bart Van Assche <bvanassche@acm.org> 7537 7532 L: linux-rdma@vger.kernel.org 7538 7533 S: Supported 7539 7534 W: http://www.openfabrics.org
+4 -2
drivers/infiniband/Kconfig
··· 3 3 depends on PCI || BROKEN 4 4 depends on HAS_IOMEM 5 5 depends on NET 6 + depends on INET 7 + depends on m || IPV6 != m 6 8 ---help--- 7 9 Core support for InfiniBand (IB). Make sure to also select 8 10 any protocols you wish to use as well as drivers for your ··· 40 38 41 39 config INFINIBAND_ADDR_TRANS 42 40 bool 43 - depends on INET 44 - depends on !(INFINIBAND = y && IPV6 = m) 41 + depends on INFINIBAND 45 42 default y 46 43 47 44 source "drivers/infiniband/hw/mthca/Kconfig" ··· 54 53 source "drivers/infiniband/hw/mlx5/Kconfig" 55 54 source "drivers/infiniband/hw/nes/Kconfig" 56 55 source "drivers/infiniband/hw/ocrdma/Kconfig" 56 + source "drivers/infiniband/hw/usnic/Kconfig" 57 57 58 58 source "drivers/infiniband/ulp/ipoib/Kconfig" 59 59
+1
drivers/infiniband/Makefile
··· 10 10 obj-$(CONFIG_MLX5_INFINIBAND) += hw/mlx5/ 11 11 obj-$(CONFIG_INFINIBAND_NES) += hw/nes/ 12 12 obj-$(CONFIG_INFINIBAND_OCRDMA) += hw/ocrdma/ 13 + obj-$(CONFIG_INFINIBAND_USNIC) += hw/usnic/ 13 14 obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/ 14 15 obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/ 15 16 obj-$(CONFIG_INFINIBAND_SRPT) += ulp/srpt/
+3 -2
drivers/infiniband/core/Makefile
··· 1 - infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := ib_addr.o rdma_cm.o 1 + infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_cm.o 2 2 user_access-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_ucm.o 3 3 4 4 obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \ 5 - ib_cm.o iw_cm.o $(infiniband-y) 5 + ib_cm.o iw_cm.o ib_addr.o \ 6 + $(infiniband-y) 6 7 obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o 7 8 obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \ 8 9 $(user_access-y)
+94 -3
drivers/infiniband/core/addr.c
··· 86 86 } 87 87 EXPORT_SYMBOL(rdma_addr_size); 88 88 89 + static struct rdma_addr_client self; 90 + 89 91 void rdma_addr_register_client(struct rdma_addr_client *client) 90 92 { 91 93 atomic_set(&client->refcount, 1); ··· 121 119 } 122 120 EXPORT_SYMBOL(rdma_copy_addr); 123 121 124 - int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) 122 + int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr, 123 + u16 *vlan_id) 125 124 { 126 125 struct net_device *dev; 127 126 int ret = -EADDRNOTAVAIL; ··· 145 142 return ret; 146 143 147 144 ret = rdma_copy_addr(dev_addr, dev, NULL); 145 + if (vlan_id) 146 + *vlan_id = rdma_vlan_dev_vlan_id(dev); 148 147 dev_put(dev); 149 148 break; 150 149 ··· 158 153 &((struct sockaddr_in6 *) addr)->sin6_addr, 159 154 dev, 1)) { 160 155 ret = rdma_copy_addr(dev_addr, dev, NULL); 156 + if (vlan_id) 157 + *vlan_id = rdma_vlan_dev_vlan_id(dev); 161 158 break; 162 159 } 163 160 } ··· 245 238 src_in->sin_addr.s_addr = fl4.saddr; 246 239 247 240 if (rt->dst.dev->flags & IFF_LOOPBACK) { 248 - ret = rdma_translate_ip((struct sockaddr *) dst_in, addr); 241 + ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL); 249 242 if (!ret) 250 243 memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); 251 244 goto put; ··· 293 286 } 294 287 295 288 if (dst->dev->flags & IFF_LOOPBACK) { 296 - ret = rdma_translate_ip((struct sockaddr *) dst_in, addr); 289 + ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL); 297 290 if (!ret) 298 291 memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); 299 292 goto put; ··· 444 437 } 445 438 EXPORT_SYMBOL(rdma_addr_cancel); 446 439 440 + struct resolve_cb_context { 441 + struct rdma_dev_addr *addr; 442 + struct completion comp; 443 + }; 444 + 445 + static void resolve_cb(int status, struct sockaddr *src_addr, 446 + struct rdma_dev_addr *addr, void *context) 447 + { 448 + memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct 449 + rdma_dev_addr)); 450 + complete(&((struct resolve_cb_context *)context)->comp); 451 + } 452 + 453 + int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac, 454 + u16 *vlan_id) 455 + { 456 + int ret = 0; 457 + struct rdma_dev_addr dev_addr; 458 + struct resolve_cb_context ctx; 459 + struct net_device *dev; 460 + 461 + union { 462 + struct sockaddr _sockaddr; 463 + struct sockaddr_in _sockaddr_in; 464 + struct sockaddr_in6 _sockaddr_in6; 465 + } sgid_addr, dgid_addr; 466 + 467 + 468 + ret = rdma_gid2ip(&sgid_addr._sockaddr, sgid); 469 + if (ret) 470 + return ret; 471 + 472 + ret = rdma_gid2ip(&dgid_addr._sockaddr, dgid); 473 + if (ret) 474 + return ret; 475 + 476 + memset(&dev_addr, 0, sizeof(dev_addr)); 477 + 478 + ctx.addr = &dev_addr; 479 + init_completion(&ctx.comp); 480 + ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr, 481 + &dev_addr, 1000, resolve_cb, &ctx); 482 + if (ret) 483 + return ret; 484 + 485 + wait_for_completion(&ctx.comp); 486 + 487 + memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN); 488 + dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if); 489 + if (!dev) 490 + return -ENODEV; 491 + if (vlan_id) 492 + *vlan_id = rdma_vlan_dev_vlan_id(dev); 493 + dev_put(dev); 494 + return ret; 495 + } 496 + EXPORT_SYMBOL(rdma_addr_find_dmac_by_grh); 497 + 498 + int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id) 499 + { 500 + int ret = 0; 501 + struct rdma_dev_addr dev_addr; 502 + union { 503 + struct sockaddr _sockaddr; 504 + struct sockaddr_in _sockaddr_in; 505 + struct sockaddr_in6 _sockaddr_in6; 506 + } gid_addr; 507 + 508 + ret = rdma_gid2ip(&gid_addr._sockaddr, sgid); 509 + 510 + if (ret) 511 + return ret; 512 + memset(&dev_addr, 0, sizeof(dev_addr)); 513 + ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id); 514 + if (ret) 515 + return ret; 516 + 517 + memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN); 518 + return ret; 519 + } 520 + EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid); 521 + 447 522 static int netevent_callback(struct notifier_block *self, unsigned long event, 448 523 void *ctx) 449 524 { ··· 550 461 return -ENOMEM; 551 462 552 463 register_netevent_notifier(&nb); 464 + rdma_addr_register_client(&self); 553 465 return 0; 554 466 } 555 467 556 468 static void __exit addr_cleanup(void) 557 469 { 470 + rdma_addr_unregister_client(&self); 558 471 unregister_netevent_notifier(&nb); 559 472 destroy_workqueue(addr_wq); 560 473 }
+52
drivers/infiniband/core/cm.c
··· 47 47 #include <linux/sysfs.h> 48 48 #include <linux/workqueue.h> 49 49 #include <linux/kdev_t.h> 50 + #include <linux/etherdevice.h> 50 51 51 52 #include <rdma/ib_cache.h> 52 53 #include <rdma/ib_cm.h> ··· 178 177 struct ib_ah_attr ah_attr; 179 178 u16 pkey_index; 180 179 u8 timeout; 180 + u8 valid; 181 + u8 smac[ETH_ALEN]; 181 182 }; 182 183 183 184 struct cm_work { ··· 349 346 grh, &av->ah_attr); 350 347 } 351 348 349 + int ib_update_cm_av(struct ib_cm_id *id, const u8 *smac, const u8 *alt_smac) 350 + { 351 + struct cm_id_private *cm_id_priv; 352 + 353 + cm_id_priv = container_of(id, struct cm_id_private, id); 354 + 355 + if (smac != NULL) 356 + memcpy(cm_id_priv->av.smac, smac, sizeof(cm_id_priv->av.smac)); 357 + 358 + if (alt_smac != NULL) 359 + memcpy(cm_id_priv->alt_av.smac, alt_smac, 360 + sizeof(cm_id_priv->alt_av.smac)); 361 + 362 + return 0; 363 + } 364 + EXPORT_SYMBOL(ib_update_cm_av); 365 + 352 366 static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) 353 367 { 354 368 struct cm_device *cm_dev; ··· 396 376 ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path, 397 377 &av->ah_attr); 398 378 av->timeout = path->packet_life_time + 1; 379 + memcpy(av->smac, path->smac, sizeof(av->smac)); 380 + 381 + av->valid = 1; 399 382 return 0; 400 383 } 401 384 ··· 1577 1554 1578 1555 cm_process_routed_req(req_msg, work->mad_recv_wc->wc); 1579 1556 cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]); 1557 + 1558 + memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN); 1559 + work->path[0].vlan_id = cm_id_priv->av.ah_attr.vlan_id; 1580 1560 ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av); 1581 1561 if (ret) { 1582 1562 ib_get_cached_gid(work->port->cm_dev->ib_device, ··· 3526 3500 *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | 3527 3501 IB_QP_DEST_QPN | IB_QP_RQ_PSN; 3528 3502 qp_attr->ah_attr = cm_id_priv->av.ah_attr; 3503 + if (!cm_id_priv->av.valid) { 3504 + spin_unlock_irqrestore(&cm_id_priv->lock, flags); 3505 + return -EINVAL; 3506 + } 3507 + if (cm_id_priv->av.ah_attr.vlan_id != 0xffff) { 3508 + qp_attr->vlan_id = cm_id_priv->av.ah_attr.vlan_id; 3509 + *qp_attr_mask |= IB_QP_VID; 3510 + } 3511 + if (!is_zero_ether_addr(cm_id_priv->av.smac)) { 3512 + memcpy(qp_attr->smac, cm_id_priv->av.smac, 3513 + sizeof(qp_attr->smac)); 3514 + *qp_attr_mask |= IB_QP_SMAC; 3515 + } 3516 + if (cm_id_priv->alt_av.valid) { 3517 + if (cm_id_priv->alt_av.ah_attr.vlan_id != 0xffff) { 3518 + qp_attr->alt_vlan_id = 3519 + cm_id_priv->alt_av.ah_attr.vlan_id; 3520 + *qp_attr_mask |= IB_QP_ALT_VID; 3521 + } 3522 + if (!is_zero_ether_addr(cm_id_priv->alt_av.smac)) { 3523 + memcpy(qp_attr->alt_smac, 3524 + cm_id_priv->alt_av.smac, 3525 + sizeof(qp_attr->alt_smac)); 3526 + *qp_attr_mask |= IB_QP_ALT_SMAC; 3527 + } 3528 + } 3529 3529 qp_attr->path_mtu = cm_id_priv->path_mtu; 3530 3530 qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn); 3531 3531 qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
+66 -15
drivers/infiniband/core/cma.c
··· 340 340 int ret; 341 341 342 342 if (addr->sa_family != AF_IB) { 343 - ret = rdma_translate_ip(addr, dev_addr); 343 + ret = rdma_translate_ip(addr, dev_addr, NULL); 344 344 } else { 345 345 cma_translate_ib((struct sockaddr_ib *) addr, dev_addr); 346 346 ret = 0; ··· 365 365 return -EINVAL; 366 366 367 367 mutex_lock(&lock); 368 - iboe_addr_get_sgid(dev_addr, &iboe_gid); 368 + rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 369 + &iboe_gid); 370 + 369 371 memcpy(&gid, dev_addr->src_dev_addr + 370 372 rdma_addr_gid_offset(dev_addr), sizeof gid); 371 373 if (listen_id_priv && ··· 605 603 { 606 604 struct ib_qp_attr qp_attr; 607 605 int qp_attr_mask, ret; 606 + union ib_gid sgid; 608 607 609 608 mutex_lock(&id_priv->qp_mutex); 610 609 if (!id_priv->id.qp) { ··· 628 625 if (ret) 629 626 goto out; 630 627 628 + ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num, 629 + qp_attr.ah_attr.grh.sgid_index, &sgid); 630 + if (ret) 631 + goto out; 632 + 633 + if (rdma_node_get_transport(id_priv->cma_dev->device->node_type) 634 + == RDMA_TRANSPORT_IB && 635 + rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num) 636 + == IB_LINK_LAYER_ETHERNET) { 637 + ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr.smac, NULL); 638 + 639 + if (ret) 640 + goto out; 641 + } 631 642 if (conn_param) 632 643 qp_attr.max_dest_rd_atomic = conn_param->responder_resources; 633 644 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); ··· 742 725 else 743 726 ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr, 744 727 qp_attr_mask); 728 + 745 729 if (qp_attr->qp_state == IB_QPS_RTR) 746 730 qp_attr->rq_psn = id_priv->seq_num; 747 731 break; ··· 1284 1266 struct rdma_id_private *listen_id, *conn_id; 1285 1267 struct rdma_cm_event event; 1286 1268 int offset, ret; 1269 + u8 smac[ETH_ALEN]; 1270 + u8 alt_smac[ETH_ALEN]; 1271 + u8 *psmac = smac; 1272 + u8 *palt_smac = alt_smac; 1273 + int is_iboe = ((rdma_node_get_transport(cm_id->device->node_type) == 1274 + RDMA_TRANSPORT_IB) && 1275 + (rdma_port_get_link_layer(cm_id->device, 1276 + ib_event->param.req_rcvd.port) == 1277 + IB_LINK_LAYER_ETHERNET)); 1287 1278 1288 1279 listen_id = cm_id->context; 1289 1280 if (!cma_check_req_qp_type(&listen_id->id, ib_event)) ··· 1337 1310 if (ret) 1338 1311 goto err3; 1339 1312 1313 + if (is_iboe) { 1314 + if (ib_event->param.req_rcvd.primary_path != NULL) 1315 + rdma_addr_find_smac_by_sgid( 1316 + &ib_event->param.req_rcvd.primary_path->sgid, 1317 + psmac, NULL); 1318 + else 1319 + psmac = NULL; 1320 + if (ib_event->param.req_rcvd.alternate_path != NULL) 1321 + rdma_addr_find_smac_by_sgid( 1322 + &ib_event->param.req_rcvd.alternate_path->sgid, 1323 + palt_smac, NULL); 1324 + else 1325 + palt_smac = NULL; 1326 + } 1340 1327 /* 1341 1328 * Acquire mutex to prevent user executing rdma_destroy_id() 1342 1329 * while we're accessing the cm_id. 1343 1330 */ 1344 1331 mutex_lock(&lock); 1345 - if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD)) 1332 + if (is_iboe) 1333 + ib_update_cm_av(cm_id, psmac, palt_smac); 1334 + if (cma_comp(conn_id, RDMA_CM_CONNECT) && 1335 + (conn_id->id.qp_type != IB_QPT_UD)) 1346 1336 ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); 1347 1337 mutex_unlock(&lock); 1348 1338 mutex_unlock(&conn_id->handler_mutex); ··· 1518 1474 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); 1519 1475 conn_id->state = RDMA_CM_CONNECT; 1520 1476 1521 - ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr); 1477 + ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr, NULL); 1522 1478 if (ret) { 1523 1479 mutex_unlock(&conn_id->handler_mutex); 1524 1480 rdma_destroy_id(new_cm_id); ··· 1917 1873 struct cma_work *work; 1918 1874 int ret; 1919 1875 struct net_device *ndev = NULL; 1920 - u16 vid; 1876 + 1921 1877 1922 1878 work = kzalloc(sizeof *work, GFP_KERNEL); 1923 1879 if (!work) ··· 1941 1897 goto err2; 1942 1898 } 1943 1899 1944 - vid = rdma_vlan_dev_vlan_id(ndev); 1900 + route->path_rec->vlan_id = rdma_vlan_dev_vlan_id(ndev); 1901 + memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN); 1902 + memcpy(route->path_rec->smac, ndev->dev_addr, ndev->addr_len); 1945 1903 1946 - iboe_mac_vlan_to_ll(&route->path_rec->sgid, addr->dev_addr.src_dev_addr, vid); 1947 - iboe_mac_vlan_to_ll(&route->path_rec->dgid, addr->dev_addr.dst_dev_addr, vid); 1904 + rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 1905 + &route->path_rec->sgid); 1906 + rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr, 1907 + &route->path_rec->dgid); 1948 1908 1949 1909 route->path_rec->hop_limit = 1; 1950 1910 route->path_rec->reversible = 1; ··· 2111 2063 RDMA_CM_ADDR_RESOLVED)) 2112 2064 goto out; 2113 2065 2066 + memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr)); 2114 2067 if (!status && !id_priv->cma_dev) 2115 2068 status = cma_acquire_dev(id_priv, NULL); 2116 2069 ··· 2121 2072 goto out; 2122 2073 event.event = RDMA_CM_EVENT_ADDR_ERROR; 2123 2074 event.status = status; 2124 - } else { 2125 - memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr)); 2075 + } else 2126 2076 event.event = RDMA_CM_EVENT_ADDR_RESOLVED; 2127 - } 2128 2077 2129 2078 if (id_priv->id.event_handler(&id_priv->id, &event)) { 2130 2079 cma_exch(id_priv, RDMA_CM_DESTROYING); ··· 2527 2480 return 0; 2528 2481 2529 2482 sin6 = (struct sockaddr_in6 *) addr; 2530 - if ((ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) && 2531 - !sin6->sin6_scope_id) 2483 + 2484 + if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)) 2485 + return 0; 2486 + 2487 + if (!sin6->sin6_scope_id) 2532 2488 return -EINVAL; 2533 2489 2534 2490 dev_addr->bound_dev_if = sin6->sin6_scope_id; ··· 2606 2556 if (ret) 2607 2557 goto err1; 2608 2558 2559 + memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr)); 2609 2560 if (!cma_any_addr(addr)) { 2610 2561 ret = cma_translate_addr(addr, &id->route.addr.dev_addr); 2611 2562 if (ret) ··· 2617 2566 goto err1; 2618 2567 } 2619 2568 2620 - memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr)); 2621 2569 if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) { 2622 2570 if (addr->sa_family == AF_INET) 2623 2571 id_priv->afonly = 1; ··· 3345 3295 err = -EINVAL; 3346 3296 goto out2; 3347 3297 } 3348 - iboe_addr_get_sgid(dev_addr, &mc->multicast.ib->rec.port_gid); 3298 + rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 3299 + &mc->multicast.ib->rec.port_gid); 3349 3300 work->id = id_priv; 3350 3301 work->mc = mc; 3351 3302 INIT_WORK(&work->work, iboe_mcast_work_handler);
+2
drivers/infiniband/core/core_priv.h
··· 49 49 int ib_cache_setup(void); 50 50 void ib_cache_cleanup(void); 51 51 52 + int ib_resolve_eth_l2_attrs(struct ib_qp *qp, 53 + struct ib_qp_attr *qp_attr, int *qp_attr_mask); 52 54 #endif /* _CORE_PRIV_H */
+1 -2
drivers/infiniband/core/iwcm.c
··· 334 334 { 335 335 struct iwcm_id_private *cm_id_priv; 336 336 unsigned long flags; 337 - int ret; 338 337 339 338 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 340 339 /* ··· 349 350 cm_id_priv->state = IW_CM_STATE_DESTROYING; 350 351 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 351 352 /* destroy the listening endpoint */ 352 - ret = cm_id->device->iwcm->destroy_listen(cm_id); 353 + cm_id->device->iwcm->destroy_listen(cm_id); 353 354 spin_lock_irqsave(&cm_id_priv->lock, flags); 354 355 break; 355 356 case IW_CM_STATE_ESTABLISHED:
+11 -1
drivers/infiniband/core/sa_query.c
··· 42 42 #include <linux/kref.h> 43 43 #include <linux/idr.h> 44 44 #include <linux/workqueue.h> 45 - 45 + #include <uapi/linux/if_ether.h> 46 46 #include <rdma/ib_pack.h> 47 47 #include <rdma/ib_cache.h> 48 48 #include "sa.h" ··· 556 556 ah_attr->grh.hop_limit = rec->hop_limit; 557 557 ah_attr->grh.traffic_class = rec->traffic_class; 558 558 } 559 + if (force_grh) { 560 + memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN); 561 + ah_attr->vlan_id = rec->vlan_id; 562 + } else { 563 + ah_attr->vlan_id = 0xffff; 564 + } 565 + 559 566 return 0; 560 567 } 561 568 EXPORT_SYMBOL(ib_init_ah_from_path); ··· 677 670 678 671 ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), 679 672 mad->data, &rec); 673 + rec.vlan_id = 0xffff; 674 + memset(rec.dmac, 0, ETH_ALEN); 675 + memset(rec.smac, 0, ETH_ALEN); 680 676 query->callback(status, &rec, query->context); 681 677 } else 682 678 query->callback(status, NULL, query->context);
+1
drivers/infiniband/core/sysfs.c
··· 613 613 case RDMA_NODE_IB_CA: return sprintf(buf, "%d: CA\n", dev->node_type); 614 614 case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type); 615 615 case RDMA_NODE_USNIC: return sprintf(buf, "%d: usNIC\n", dev->node_type); 616 + case RDMA_NODE_USNIC_UDP: return sprintf(buf, "%d: usNIC UDP\n", dev->node_type); 616 617 case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type); 617 618 case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type); 618 619 default: return sprintf(buf, "%d: <unknown>\n", dev->node_type);
+4 -14
drivers/infiniband/core/ucma.c
··· 655 655 static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp, 656 656 struct rdma_route *route) 657 657 { 658 - struct rdma_dev_addr *dev_addr; 659 - struct net_device *dev; 660 - u16 vid = 0; 661 658 662 659 resp->num_paths = route->num_paths; 663 660 switch (route->num_paths) { 664 661 case 0: 665 - dev_addr = &route->addr.dev_addr; 666 - dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); 667 - if (dev) { 668 - vid = rdma_vlan_dev_vlan_id(dev); 669 - dev_put(dev); 670 - } 671 - 672 - iboe_mac_vlan_to_ll((union ib_gid *) &resp->ib_route[0].dgid, 673 - dev_addr->dst_dev_addr, vid); 674 - iboe_addr_get_sgid(dev_addr, 675 - (union ib_gid *) &resp->ib_route[0].sgid); 662 + rdma_ip2gid((struct sockaddr *)&route->addr.dst_addr, 663 + (union ib_gid *)&resp->ib_route[0].dgid); 664 + rdma_ip2gid((struct sockaddr *)&route->addr.src_addr, 665 + (union ib_gid *)&resp->ib_route[0].sgid); 676 666 resp->ib_route[0].pkey = cpu_to_be16(0xffff); 677 667 break; 678 668 case 2:
+4
drivers/infiniband/core/uverbs_cmd.c
··· 40 40 #include <asm/uaccess.h> 41 41 42 42 #include "uverbs.h" 43 + #include "core_priv.h" 43 44 44 45 struct uverbs_lock_class { 45 46 struct lock_class_key key; ··· 1962 1961 attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; 1963 1962 1964 1963 if (qp->real_qp == qp) { 1964 + ret = ib_resolve_eth_l2_attrs(qp, attr, &cmd.attr_mask); 1965 + if (ret) 1966 + goto out; 1965 1967 ret = qp->device->modify_qp(qp, attr, 1966 1968 modify_qp_mask(qp->qp_type, cmd.attr_mask), &udata); 1967 1969 } else {
+99 -2
drivers/infiniband/core/verbs.c
··· 44 44 45 45 #include <rdma/ib_verbs.h> 46 46 #include <rdma/ib_cache.h> 47 + #include <rdma/ib_addr.h> 48 + 49 + #include "core_priv.h" 47 50 48 51 int ib_rate_to_mult(enum ib_rate rate) 49 52 { ··· 119 116 return RDMA_TRANSPORT_IWARP; 120 117 case RDMA_NODE_USNIC: 121 118 return RDMA_TRANSPORT_USNIC; 119 + case RDMA_NODE_USNIC_UDP: 120 + return RDMA_TRANSPORT_USNIC_UDP; 122 121 default: 123 122 BUG(); 124 123 return 0; ··· 138 133 return IB_LINK_LAYER_INFINIBAND; 139 134 case RDMA_TRANSPORT_IWARP: 140 135 case RDMA_TRANSPORT_USNIC: 136 + case RDMA_TRANSPORT_USNIC_UDP: 141 137 return IB_LINK_LAYER_ETHERNET; 142 138 default: 143 139 return IB_LINK_LAYER_UNSPECIFIED; ··· 198 192 u32 flow_class; 199 193 u16 gid_index; 200 194 int ret; 195 + int is_eth = (rdma_port_get_link_layer(device, port_num) == 196 + IB_LINK_LAYER_ETHERNET); 201 197 202 198 memset(ah_attr, 0, sizeof *ah_attr); 199 + if (is_eth) { 200 + if (!(wc->wc_flags & IB_WC_GRH)) 201 + return -EPROTOTYPE; 202 + 203 + if (wc->wc_flags & IB_WC_WITH_SMAC && 204 + wc->wc_flags & IB_WC_WITH_VLAN) { 205 + memcpy(ah_attr->dmac, wc->smac, ETH_ALEN); 206 + ah_attr->vlan_id = wc->vlan_id; 207 + } else { 208 + ret = rdma_addr_find_dmac_by_grh(&grh->dgid, &grh->sgid, 209 + ah_attr->dmac, &ah_attr->vlan_id); 210 + if (ret) 211 + return ret; 212 + } 213 + } else { 214 + ah_attr->vlan_id = 0xffff; 215 + } 216 + 203 217 ah_attr->dlid = wc->slid; 204 218 ah_attr->sl = wc->sl; 205 219 ah_attr->src_path_bits = wc->dlid_path_bits; ··· 502 476 static const struct { 503 477 int valid; 504 478 enum ib_qp_attr_mask req_param[IB_QPT_MAX]; 479 + enum ib_qp_attr_mask req_param_add_eth[IB_QPT_MAX]; 505 480 enum ib_qp_attr_mask opt_param[IB_QPT_MAX]; 481 + enum ib_qp_attr_mask opt_param_add_eth[IB_QPT_MAX]; 506 482 } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { 507 483 [IB_QPS_RESET] = { 508 484 [IB_QPS_RESET] = { .valid = 1 }, ··· 585 557 IB_QP_MAX_DEST_RD_ATOMIC | 586 558 IB_QP_MIN_RNR_TIMER), 587 559 }, 560 + .req_param_add_eth = { 561 + [IB_QPT_RC] = (IB_QP_SMAC), 562 + [IB_QPT_UC] = (IB_QP_SMAC), 563 + [IB_QPT_XRC_INI] = (IB_QP_SMAC), 564 + [IB_QPT_XRC_TGT] = (IB_QP_SMAC) 565 + }, 588 566 .opt_param = { 589 567 [IB_QPT_UD] = (IB_QP_PKEY_INDEX | 590 568 IB_QP_QKEY), ··· 610 576 IB_QP_QKEY), 611 577 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | 612 578 IB_QP_QKEY), 613 - } 579 + }, 580 + .opt_param_add_eth = { 581 + [IB_QPT_RC] = (IB_QP_ALT_SMAC | 582 + IB_QP_VID | 583 + IB_QP_ALT_VID), 584 + [IB_QPT_UC] = (IB_QP_ALT_SMAC | 585 + IB_QP_VID | 586 + IB_QP_ALT_VID), 587 + [IB_QPT_XRC_INI] = (IB_QP_ALT_SMAC | 588 + IB_QP_VID | 589 + IB_QP_ALT_VID), 590 + [IB_QPT_XRC_TGT] = (IB_QP_ALT_SMAC | 591 + IB_QP_VID | 592 + IB_QP_ALT_VID) 593 + } 614 594 } 615 595 }, 616 596 [IB_QPS_RTR] = { ··· 827 779 }; 828 780 829 781 int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, 830 - enum ib_qp_type type, enum ib_qp_attr_mask mask) 782 + enum ib_qp_type type, enum ib_qp_attr_mask mask, 783 + enum rdma_link_layer ll) 831 784 { 832 785 enum ib_qp_attr_mask req_param, opt_param; 833 786 ··· 847 798 req_param = qp_state_table[cur_state][next_state].req_param[type]; 848 799 opt_param = qp_state_table[cur_state][next_state].opt_param[type]; 849 800 801 + if (ll == IB_LINK_LAYER_ETHERNET) { 802 + req_param |= qp_state_table[cur_state][next_state]. 803 + req_param_add_eth[type]; 804 + opt_param |= qp_state_table[cur_state][next_state]. 805 + opt_param_add_eth[type]; 806 + } 807 + 850 808 if ((mask & req_param) != req_param) 851 809 return 0; 852 810 ··· 864 808 } 865 809 EXPORT_SYMBOL(ib_modify_qp_is_ok); 866 810 811 + int ib_resolve_eth_l2_attrs(struct ib_qp *qp, 812 + struct ib_qp_attr *qp_attr, int *qp_attr_mask) 813 + { 814 + int ret = 0; 815 + union ib_gid sgid; 816 + 817 + if ((*qp_attr_mask & IB_QP_AV) && 818 + (rdma_port_get_link_layer(qp->device, qp_attr->ah_attr.port_num) == IB_LINK_LAYER_ETHERNET)) { 819 + ret = ib_query_gid(qp->device, qp_attr->ah_attr.port_num, 820 + qp_attr->ah_attr.grh.sgid_index, &sgid); 821 + if (ret) 822 + goto out; 823 + if (rdma_link_local_addr((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw)) { 824 + rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw, qp_attr->ah_attr.dmac); 825 + rdma_get_ll_mac((struct in6_addr *)sgid.raw, qp_attr->smac); 826 + qp_attr->vlan_id = rdma_get_vlan_id(&sgid); 827 + } else { 828 + ret = rdma_addr_find_dmac_by_grh(&sgid, &qp_attr->ah_attr.grh.dgid, 829 + qp_attr->ah_attr.dmac, &qp_attr->vlan_id); 830 + if (ret) 831 + goto out; 832 + ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr->smac, NULL); 833 + if (ret) 834 + goto out; 835 + } 836 + *qp_attr_mask |= IB_QP_SMAC; 837 + if (qp_attr->vlan_id < 0xFFFF) 838 + *qp_attr_mask |= IB_QP_VID; 839 + } 840 + out: 841 + return ret; 842 + } 843 + EXPORT_SYMBOL(ib_resolve_eth_l2_attrs); 844 + 845 + 867 846 int ib_modify_qp(struct ib_qp *qp, 868 847 struct ib_qp_attr *qp_attr, 869 848 int qp_attr_mask) 870 849 { 850 + int ret; 851 + 852 + ret = ib_resolve_eth_l2_attrs(qp, qp_attr, &qp_attr_mask); 853 + if (ret) 854 + return ret; 855 + 871 856 return qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL); 872 857 } 873 858 EXPORT_SYMBOL(ib_modify_qp);
+2 -1
drivers/infiniband/hw/amso1100/c2_intr.c
··· 169 169 * We should never get here, as the adapter should 170 170 * never send us a reply that we're not expecting. 171 171 */ 172 - vq_repbuf_free(c2dev, host_msg); 172 + if (reply_msg != NULL) 173 + vq_repbuf_free(c2dev, host_msg); 173 174 pr_debug("handle_vq: UNEXPECTEDLY got NULL req\n"); 174 175 return; 175 176 }
+1 -1
drivers/infiniband/hw/cxgb4/mem.c
··· 76 76 INIT_ULPTX_WR(req, wr_len, 0, 0); 77 77 req->wr.wr_hi = cpu_to_be32(FW_WR_OP(FW_ULPTX_WR) | 78 78 (wait ? FW_WR_COMPL(1) : 0)); 79 - req->wr.wr_lo = wait ? (__force __be64)&wr_wait : 0; 79 + req->wr.wr_lo = wait ? (__force __be64)(unsigned long) &wr_wait : 0L; 80 80 req->wr.wr_mid = cpu_to_be32(FW_WR_LEN16(DIV_ROUND_UP(wr_len, 16))); 81 81 req->cmd = cpu_to_be32(ULPTX_CMD(ULP_TX_MEM_WRITE)); 82 82 req->cmd |= cpu_to_be32(V_T5_ULP_MEMIO_ORDER(1));
+1 -1
drivers/infiniband/hw/ehca/ehca_qp.c
··· 1329 1329 qp_new_state = attr_mask & IB_QP_STATE ? attr->qp_state : qp_cur_state; 1330 1330 if (!smi_reset2init && 1331 1331 !ib_modify_qp_is_ok(qp_cur_state, qp_new_state, ibqp->qp_type, 1332 - attr_mask)) { 1332 + attr_mask, IB_LINK_LAYER_UNSPECIFIED)) { 1333 1333 ret = -EINVAL; 1334 1334 ehca_err(ibqp->device, 1335 1335 "Invalid qp transition new_state=%x cur_state=%x "
+1 -1
drivers/infiniband/hw/ipath/ipath_qp.c
··· 463 463 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; 464 464 465 465 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, 466 - attr_mask)) 466 + attr_mask, IB_LINK_LAYER_UNSPECIFIED)) 467 467 goto inval; 468 468 469 469 if (attr_mask & IB_QP_AV) {
+1 -1
drivers/infiniband/hw/mlx4/Kconfig
··· 1 1 config MLX4_INFINIBAND 2 2 tristate "Mellanox ConnectX HCA support" 3 - depends on NETDEVICES && ETHERNET && PCI 3 + depends on NETDEVICES && ETHERNET && PCI && INET 4 4 select NET_VENDOR_MELLANOX 5 5 select MLX4_CORE 6 6 ---help---
+9 -31
drivers/infiniband/hw/mlx4/ah.c
··· 39 39 40 40 #include "mlx4_ib.h" 41 41 42 - int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr, 43 - u8 *mac, int *is_mcast, u8 port) 44 - { 45 - struct in6_addr in6; 46 - 47 - *is_mcast = 0; 48 - 49 - memcpy(&in6, ah_attr->grh.dgid.raw, sizeof in6); 50 - if (rdma_link_local_addr(&in6)) 51 - rdma_get_ll_mac(&in6, mac); 52 - else if (rdma_is_multicast_addr(&in6)) { 53 - rdma_get_mcast_mac(&in6, mac); 54 - *is_mcast = 1; 55 - } else 56 - return -EINVAL; 57 - 58 - return 0; 59 - } 60 - 61 42 static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, 62 43 struct mlx4_ib_ah *ah) 63 44 { ··· 73 92 { 74 93 struct mlx4_ib_dev *ibdev = to_mdev(pd->device); 75 94 struct mlx4_dev *dev = ibdev->dev; 76 - union ib_gid sgid; 77 - u8 mac[6]; 78 - int err; 79 95 int is_mcast; 96 + struct in6_addr in6; 80 97 u16 vlan_tag; 81 98 82 - err = mlx4_ib_resolve_grh(ibdev, ah_attr, mac, &is_mcast, ah_attr->port_num); 83 - if (err) 84 - return ERR_PTR(err); 85 - 86 - memcpy(ah->av.eth.mac, mac, 6); 87 - err = ib_get_cached_gid(pd->device, ah_attr->port_num, ah_attr->grh.sgid_index, &sgid); 88 - if (err) 89 - return ERR_PTR(err); 90 - vlan_tag = rdma_get_vlan_id(&sgid); 99 + memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6)); 100 + if (rdma_is_multicast_addr(&in6)) { 101 + is_mcast = 1; 102 + rdma_get_mcast_mac(&in6, ah->av.eth.mac); 103 + } else { 104 + memcpy(ah->av.eth.mac, ah_attr->dmac, ETH_ALEN); 105 + } 106 + vlan_tag = ah_attr->vlan_id; 91 107 if (vlan_tag < 0x1000) 92 108 vlan_tag |= (ah_attr->sl & 7) << 13; 93 109 ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
+9
drivers/infiniband/hw/mlx4/cq.c
··· 798 798 wc->sl = be16_to_cpu(cqe->sl_vid) >> 13; 799 799 else 800 800 wc->sl = be16_to_cpu(cqe->sl_vid) >> 12; 801 + if (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_VLAN_PRESENT_MASK) { 802 + wc->vlan_id = be16_to_cpu(cqe->sl_vid) & 803 + MLX4_CQE_VID_MASK; 804 + } else { 805 + wc->vlan_id = 0xffff; 806 + } 807 + wc->wc_flags |= IB_WC_WITH_VLAN; 808 + memcpy(wc->smac, cqe->smac, ETH_ALEN); 809 + wc->wc_flags |= IB_WC_WITH_SMAC; 801 810 } 802 811 803 812 return 0;
+604 -171
drivers/infiniband/hw/mlx4/main.c
··· 39 39 #include <linux/inetdevice.h> 40 40 #include <linux/rtnetlink.h> 41 41 #include <linux/if_vlan.h> 42 + #include <net/ipv6.h> 43 + #include <net/addrconf.h> 42 44 43 45 #include <rdma/ib_smi.h> 44 46 #include <rdma/ib_user_verbs.h> ··· 57 55 #define DRV_RELDATE "April 4, 2008" 58 56 59 57 #define MLX4_IB_FLOW_MAX_PRIO 0xFFF 58 + #define MLX4_IB_FLOW_QPN_MASK 0xFFFFFF 60 59 61 60 MODULE_AUTHOR("Roland Dreier"); 62 61 MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver"); ··· 95 92 96 93 static int check_flow_steering_support(struct mlx4_dev *dev) 97 94 { 95 + int eth_num_ports = 0; 98 96 int ib_num_ports = 0; 99 - int i; 100 97 101 - mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) 102 - ib_num_ports++; 98 + int dmfs = dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED; 103 99 104 - if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { 105 - if (ib_num_ports || mlx4_is_mfunc(dev)) { 106 - pr_warn("Device managed flow steering is unavailable " 107 - "for IB ports or in multifunction env.\n"); 108 - return 0; 100 + if (dmfs) { 101 + int i; 102 + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) 103 + eth_num_ports++; 104 + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) 105 + ib_num_ports++; 106 + dmfs &= (!ib_num_ports || 107 + (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB)) && 108 + (!eth_num_ports || 109 + (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN)); 110 + if (ib_num_ports && mlx4_is_mfunc(dev)) { 111 + pr_warn("Device managed flow steering is unavailable for IB port in multifunction env.\n"); 112 + dmfs = 0; 109 113 } 110 - return 1; 111 114 } 112 - return 0; 115 + return dmfs; 113 116 } 114 117 115 118 static int mlx4_ib_query_device(struct ib_device *ibdev, ··· 174 165 props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B; 175 166 else 176 167 props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A; 177 - if (check_flow_steering_support(dev->dev)) 168 + if (dev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) 178 169 props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING; 179 170 } 180 171 ··· 796 787 int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, 797 788 union ib_gid *gid) 798 789 { 799 - u8 mac[6]; 800 790 struct net_device *ndev; 801 791 int ret = 0; 802 792 ··· 809 801 spin_unlock(&mdev->iboe.lock); 810 802 811 803 if (ndev) { 812 - rdma_get_mcast_mac((struct in6_addr *)gid, mac); 813 - rtnl_lock(); 814 - dev_mc_add(mdev->iboe.netdevs[mqp->port - 1], mac); 815 804 ret = 1; 816 - rtnl_unlock(); 817 805 dev_put(ndev); 818 806 } 819 807 ··· 823 819 }; 824 820 825 821 static int parse_flow_attr(struct mlx4_dev *dev, 822 + u32 qp_num, 826 823 union ib_flow_spec *ib_spec, 827 824 struct _rule_hw *mlx4_spec) 828 825 { ··· 839 834 mlx4_spec->eth.vlan_tag = ib_spec->eth.val.vlan_tag; 840 835 mlx4_spec->eth.vlan_tag_msk = ib_spec->eth.mask.vlan_tag; 841 836 break; 837 + case IB_FLOW_SPEC_IB: 838 + type = MLX4_NET_TRANS_RULE_ID_IB; 839 + mlx4_spec->ib.l3_qpn = 840 + cpu_to_be32(qp_num); 841 + mlx4_spec->ib.qpn_mask = 842 + cpu_to_be32(MLX4_IB_FLOW_QPN_MASK); 843 + break; 844 + 842 845 843 846 case IB_FLOW_SPEC_IPV4: 844 847 type = MLX4_NET_TRANS_RULE_ID_IPV4; ··· 878 865 return mlx4_hw_rule_sz(dev, type); 879 866 } 880 867 868 + struct default_rules { 869 + __u32 mandatory_fields[IB_FLOW_SPEC_SUPPORT_LAYERS]; 870 + __u32 mandatory_not_fields[IB_FLOW_SPEC_SUPPORT_LAYERS]; 871 + __u32 rules_create_list[IB_FLOW_SPEC_SUPPORT_LAYERS]; 872 + __u8 link_layer; 873 + }; 874 + static const struct default_rules default_table[] = { 875 + { 876 + .mandatory_fields = {IB_FLOW_SPEC_IPV4}, 877 + .mandatory_not_fields = {IB_FLOW_SPEC_ETH}, 878 + .rules_create_list = {IB_FLOW_SPEC_IB}, 879 + .link_layer = IB_LINK_LAYER_INFINIBAND 880 + } 881 + }; 882 + 883 + static int __mlx4_ib_default_rules_match(struct ib_qp *qp, 884 + struct ib_flow_attr *flow_attr) 885 + { 886 + int i, j, k; 887 + void *ib_flow; 888 + const struct default_rules *pdefault_rules = default_table; 889 + u8 link_layer = rdma_port_get_link_layer(qp->device, flow_attr->port); 890 + 891 + for (i = 0; i < sizeof(default_table)/sizeof(default_table[0]); i++, 892 + pdefault_rules++) { 893 + __u32 field_types[IB_FLOW_SPEC_SUPPORT_LAYERS]; 894 + memset(&field_types, 0, sizeof(field_types)); 895 + 896 + if (link_layer != pdefault_rules->link_layer) 897 + continue; 898 + 899 + ib_flow = flow_attr + 1; 900 + /* we assume the specs are sorted */ 901 + for (j = 0, k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS && 902 + j < flow_attr->num_of_specs; k++) { 903 + union ib_flow_spec *current_flow = 904 + (union ib_flow_spec *)ib_flow; 905 + 906 + /* same layer but different type */ 907 + if (((current_flow->type & IB_FLOW_SPEC_LAYER_MASK) == 908 + (pdefault_rules->mandatory_fields[k] & 909 + IB_FLOW_SPEC_LAYER_MASK)) && 910 + (current_flow->type != 911 + pdefault_rules->mandatory_fields[k])) 912 + goto out; 913 + 914 + /* same layer, try match next one */ 915 + if (current_flow->type == 916 + pdefault_rules->mandatory_fields[k]) { 917 + j++; 918 + ib_flow += 919 + ((union ib_flow_spec *)ib_flow)->size; 920 + } 921 + } 922 + 923 + ib_flow = flow_attr + 1; 924 + for (j = 0; j < flow_attr->num_of_specs; 925 + j++, ib_flow += ((union ib_flow_spec *)ib_flow)->size) 926 + for (k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS; k++) 927 + /* same layer and same type */ 928 + if (((union ib_flow_spec *)ib_flow)->type == 929 + pdefault_rules->mandatory_not_fields[k]) 930 + goto out; 931 + 932 + return i; 933 + } 934 + out: 935 + return -1; 936 + } 937 + 938 + static int __mlx4_ib_create_default_rules( 939 + struct mlx4_ib_dev *mdev, 940 + struct ib_qp *qp, 941 + const struct default_rules *pdefault_rules, 942 + struct _rule_hw *mlx4_spec) { 943 + int size = 0; 944 + int i; 945 + 946 + for (i = 0; i < sizeof(pdefault_rules->rules_create_list)/ 947 + sizeof(pdefault_rules->rules_create_list[0]); i++) { 948 + int ret; 949 + union ib_flow_spec ib_spec; 950 + switch (pdefault_rules->rules_create_list[i]) { 951 + case 0: 952 + /* no rule */ 953 + continue; 954 + case IB_FLOW_SPEC_IB: 955 + ib_spec.type = IB_FLOW_SPEC_IB; 956 + ib_spec.size = sizeof(struct ib_flow_spec_ib); 957 + 958 + break; 959 + default: 960 + /* invalid rule */ 961 + return -EINVAL; 962 + } 963 + /* We must put empty rule, qpn is being ignored */ 964 + ret = parse_flow_attr(mdev->dev, 0, &ib_spec, 965 + mlx4_spec); 966 + if (ret < 0) { 967 + pr_info("invalid parsing\n"); 968 + return -EINVAL; 969 + } 970 + 971 + mlx4_spec = (void *)mlx4_spec + ret; 972 + size += ret; 973 + } 974 + return size; 975 + } 976 + 881 977 static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr, 882 978 int domain, 883 979 enum mlx4_net_trans_promisc_mode flow_type, ··· 998 876 struct mlx4_ib_dev *mdev = to_mdev(qp->device); 999 877 struct mlx4_cmd_mailbox *mailbox; 1000 878 struct mlx4_net_trans_rule_hw_ctrl *ctrl; 879 + int default_flow; 1001 880 1002 881 static const u16 __mlx4_domain[] = { 1003 882 [IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS, ··· 1033 910 1034 911 ib_flow = flow_attr + 1; 1035 912 size += sizeof(struct mlx4_net_trans_rule_hw_ctrl); 913 + /* Add default flows */ 914 + default_flow = __mlx4_ib_default_rules_match(qp, flow_attr); 915 + if (default_flow >= 0) { 916 + ret = __mlx4_ib_create_default_rules( 917 + mdev, qp, default_table + default_flow, 918 + mailbox->buf + size); 919 + if (ret < 0) { 920 + mlx4_free_cmd_mailbox(mdev->dev, mailbox); 921 + return -EINVAL; 922 + } 923 + size += ret; 924 + } 1036 925 for (i = 0; i < flow_attr->num_of_specs; i++) { 1037 - ret = parse_flow_attr(mdev->dev, ib_flow, mailbox->buf + size); 926 + ret = parse_flow_attr(mdev->dev, qp->qp_num, ib_flow, 927 + mailbox->buf + size); 1038 928 if (ret < 0) { 1039 929 mlx4_free_cmd_mailbox(mdev->dev, mailbox); 1040 930 return -EINVAL; ··· 1161 1025 struct mlx4_ib_qp *mqp = to_mqp(ibqp); 1162 1026 u64 reg_id; 1163 1027 struct mlx4_ib_steering *ib_steering = NULL; 1028 + enum mlx4_protocol prot = (gid->raw[1] == 0x0e) ? 1029 + MLX4_PROT_IB_IPV4 : MLX4_PROT_IB_IPV6; 1164 1030 1165 1031 if (mdev->dev->caps.steering_mode == 1166 1032 MLX4_STEERING_MODE_DEVICE_MANAGED) { ··· 1174 1036 err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port, 1175 1037 !!(mqp->flags & 1176 1038 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK), 1177 - MLX4_PROT_IB_IPV6, &reg_id); 1039 + prot, &reg_id); 1178 1040 if (err) 1179 1041 goto err_malloc; 1180 1042 ··· 1193 1055 1194 1056 err_add: 1195 1057 mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, 1196 - MLX4_PROT_IB_IPV6, reg_id); 1058 + prot, reg_id); 1197 1059 err_malloc: 1198 1060 kfree(ib_steering); 1199 1061 ··· 1221 1083 int err; 1222 1084 struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); 1223 1085 struct mlx4_ib_qp *mqp = to_mqp(ibqp); 1224 - u8 mac[6]; 1225 1086 struct net_device *ndev; 1226 1087 struct mlx4_ib_gid_entry *ge; 1227 1088 u64 reg_id = 0; 1089 + enum mlx4_protocol prot = (gid->raw[1] == 0x0e) ? 1090 + MLX4_PROT_IB_IPV4 : MLX4_PROT_IB_IPV6; 1228 1091 1229 1092 if (mdev->dev->caps.steering_mode == 1230 1093 MLX4_STEERING_MODE_DEVICE_MANAGED) { ··· 1248 1109 } 1249 1110 1250 1111 err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, 1251 - MLX4_PROT_IB_IPV6, reg_id); 1112 + prot, reg_id); 1252 1113 if (err) 1253 1114 return err; 1254 1115 ··· 1260 1121 if (ndev) 1261 1122 dev_hold(ndev); 1262 1123 spin_unlock(&mdev->iboe.lock); 1263 - rdma_get_mcast_mac((struct in6_addr *)gid, mac); 1264 - if (ndev) { 1265 - rtnl_lock(); 1266 - dev_mc_del(mdev->iboe.netdevs[ge->port - 1], mac); 1267 - rtnl_unlock(); 1124 + if (ndev) 1268 1125 dev_put(ndev); 1269 - } 1270 1126 list_del(&ge->list); 1271 1127 kfree(ge); 1272 1128 } else ··· 1357 1223 &dev_attr_board_id 1358 1224 }; 1359 1225 1360 - static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, struct net_device *dev) 1361 - { 1362 - memcpy(eui, dev->dev_addr, 3); 1363 - memcpy(eui + 5, dev->dev_addr + 3, 3); 1364 - if (vlan_id < 0x1000) { 1365 - eui[3] = vlan_id >> 8; 1366 - eui[4] = vlan_id & 0xff; 1367 - } else { 1368 - eui[3] = 0xff; 1369 - eui[4] = 0xfe; 1370 - } 1371 - eui[0] ^= 2; 1372 - } 1373 - 1374 1226 static void update_gids_task(struct work_struct *work) 1375 1227 { 1376 1228 struct update_gid_work *gw = container_of(work, struct update_gid_work, work); ··· 1379 1259 MLX4_CMD_WRAPPED); 1380 1260 if (err) 1381 1261 pr_warn("set port command failed\n"); 1382 - else { 1383 - memcpy(gw->dev->iboe.gid_table[gw->port - 1], gw->gids, sizeof gw->gids); 1262 + else 1384 1263 mlx4_ib_dispatch_event(gw->dev, gw->port, IB_EVENT_GID_CHANGE); 1385 - } 1386 1264 1387 1265 mlx4_free_cmd_mailbox(dev, mailbox); 1388 1266 kfree(gw); 1389 1267 } 1390 1268 1391 - static int update_ipv6_gids(struct mlx4_ib_dev *dev, int port, int clear) 1269 + static void reset_gids_task(struct work_struct *work) 1392 1270 { 1393 - struct net_device *ndev = dev->iboe.netdevs[port - 1]; 1394 - struct update_gid_work *work; 1395 - struct net_device *tmp; 1271 + struct update_gid_work *gw = 1272 + container_of(work, struct update_gid_work, work); 1273 + struct mlx4_cmd_mailbox *mailbox; 1274 + union ib_gid *gids; 1275 + int err; 1396 1276 int i; 1397 - u8 *hits; 1398 - int ret; 1399 - union ib_gid gid; 1400 - int free; 1401 - int found; 1402 - int need_update = 0; 1403 - u16 vid; 1277 + struct mlx4_dev *dev = gw->dev->dev; 1404 1278 1405 - work = kzalloc(sizeof *work, GFP_ATOMIC); 1279 + mailbox = mlx4_alloc_cmd_mailbox(dev); 1280 + if (IS_ERR(mailbox)) { 1281 + pr_warn("reset gid table failed\n"); 1282 + goto free; 1283 + } 1284 + 1285 + gids = mailbox->buf; 1286 + memcpy(gids, gw->gids, sizeof(gw->gids)); 1287 + 1288 + for (i = 1; i < gw->dev->num_ports + 1; i++) { 1289 + if (mlx4_ib_port_link_layer(&gw->dev->ib_dev, i) == 1290 + IB_LINK_LAYER_ETHERNET) { 1291 + err = mlx4_cmd(dev, mailbox->dma, 1292 + MLX4_SET_PORT_GID_TABLE << 8 | i, 1293 + 1, MLX4_CMD_SET_PORT, 1294 + MLX4_CMD_TIME_CLASS_B, 1295 + MLX4_CMD_WRAPPED); 1296 + if (err) 1297 + pr_warn(KERN_WARNING 1298 + "set port %d command failed\n", i); 1299 + } 1300 + } 1301 + 1302 + mlx4_free_cmd_mailbox(dev, mailbox); 1303 + free: 1304 + kfree(gw); 1305 + } 1306 + 1307 + static int update_gid_table(struct mlx4_ib_dev *dev, int port, 1308 + union ib_gid *gid, int clear) 1309 + { 1310 + struct update_gid_work *work; 1311 + int i; 1312 + int need_update = 0; 1313 + int free = -1; 1314 + int found = -1; 1315 + int max_gids; 1316 + 1317 + max_gids = dev->dev->caps.gid_table_len[port]; 1318 + for (i = 0; i < max_gids; ++i) { 1319 + if (!memcmp(&dev->iboe.gid_table[port - 1][i], gid, 1320 + sizeof(*gid))) 1321 + found = i; 1322 + 1323 + if (clear) { 1324 + if (found >= 0) { 1325 + need_update = 1; 1326 + dev->iboe.gid_table[port - 1][found] = zgid; 1327 + break; 1328 + } 1329 + } else { 1330 + if (found >= 0) 1331 + break; 1332 + 1333 + if (free < 0 && 1334 + !memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, 1335 + sizeof(*gid))) 1336 + free = i; 1337 + } 1338 + } 1339 + 1340 + if (found == -1 && !clear && free >= 0) { 1341 + dev->iboe.gid_table[port - 1][free] = *gid; 1342 + need_update = 1; 1343 + } 1344 + 1345 + if (!need_update) 1346 + return 0; 1347 + 1348 + work = kzalloc(sizeof(*work), GFP_ATOMIC); 1406 1349 if (!work) 1407 1350 return -ENOMEM; 1408 1351 1409 - hits = kzalloc(128, GFP_ATOMIC); 1410 - if (!hits) { 1411 - ret = -ENOMEM; 1412 - goto out; 1413 - } 1352 + memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof(work->gids)); 1353 + INIT_WORK(&work->work, update_gids_task); 1354 + work->port = port; 1355 + work->dev = dev; 1356 + queue_work(wq, &work->work); 1414 1357 1415 - rcu_read_lock(); 1416 - for_each_netdev_rcu(&init_net, tmp) { 1417 - if (ndev && (tmp == ndev || rdma_vlan_dev_real_dev(tmp) == ndev)) { 1418 - gid.global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); 1419 - vid = rdma_vlan_dev_vlan_id(tmp); 1420 - mlx4_addrconf_ifid_eui48(&gid.raw[8], vid, ndev); 1421 - found = 0; 1422 - free = -1; 1423 - for (i = 0; i < 128; ++i) { 1424 - if (free < 0 && 1425 - !memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid)) 1426 - free = i; 1427 - if (!memcmp(&dev->iboe.gid_table[port - 1][i], &gid, sizeof gid)) { 1428 - hits[i] = 1; 1429 - found = 1; 1430 - break; 1431 - } 1432 - } 1358 + return 0; 1359 + } 1433 1360 1434 - if (!found) { 1435 - if (tmp == ndev && 1436 - (memcmp(&dev->iboe.gid_table[port - 1][0], 1437 - &gid, sizeof gid) || 1438 - !memcmp(&dev->iboe.gid_table[port - 1][0], 1439 - &zgid, sizeof gid))) { 1440 - dev->iboe.gid_table[port - 1][0] = gid; 1441 - ++need_update; 1442 - hits[0] = 1; 1443 - } else if (free >= 0) { 1444 - dev->iboe.gid_table[port - 1][free] = gid; 1445 - hits[free] = 1; 1446 - ++need_update; 1447 - } 1448 - } 1449 - } 1450 - } 1451 - rcu_read_unlock(); 1361 + static int reset_gid_table(struct mlx4_ib_dev *dev) 1362 + { 1363 + struct update_gid_work *work; 1452 1364 1453 - for (i = 0; i < 128; ++i) 1454 - if (!hits[i]) { 1455 - if (memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid)) 1456 - ++need_update; 1457 - dev->iboe.gid_table[port - 1][i] = zgid; 1458 - } 1459 1365 1460 - if (need_update) { 1461 - memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof work->gids); 1462 - INIT_WORK(&work->work, update_gids_task); 1463 - work->port = port; 1464 - work->dev = dev; 1465 - queue_work(wq, &work->work); 1466 - } else 1467 - kfree(work); 1366 + work = kzalloc(sizeof(*work), GFP_ATOMIC); 1367 + if (!work) 1368 + return -ENOMEM; 1369 + memset(dev->iboe.gid_table, 0, sizeof(dev->iboe.gid_table)); 1370 + memset(work->gids, 0, sizeof(work->gids)); 1371 + INIT_WORK(&work->work, reset_gids_task); 1372 + work->dev = dev; 1373 + queue_work(wq, &work->work); 1374 + return 0; 1375 + } 1468 1376 1469 - kfree(hits); 1377 + static int mlx4_ib_addr_event(int event, struct net_device *event_netdev, 1378 + struct mlx4_ib_dev *ibdev, union ib_gid *gid) 1379 + { 1380 + struct mlx4_ib_iboe *iboe; 1381 + int port = 0; 1382 + struct net_device *real_dev = rdma_vlan_dev_real_dev(event_netdev) ? 1383 + rdma_vlan_dev_real_dev(event_netdev) : 1384 + event_netdev; 1385 + 1386 + if (event != NETDEV_DOWN && event != NETDEV_UP) 1387 + return 0; 1388 + 1389 + if ((real_dev != event_netdev) && 1390 + (event == NETDEV_DOWN) && 1391 + rdma_link_local_addr((struct in6_addr *)gid)) 1392 + return 0; 1393 + 1394 + iboe = &ibdev->iboe; 1395 + spin_lock(&iboe->lock); 1396 + 1397 + for (port = 1; port <= MLX4_MAX_PORTS; ++port) 1398 + if ((netif_is_bond_master(real_dev) && 1399 + (real_dev == iboe->masters[port - 1])) || 1400 + (!netif_is_bond_master(real_dev) && 1401 + (real_dev == iboe->netdevs[port - 1]))) 1402 + update_gid_table(ibdev, port, gid, 1403 + event == NETDEV_DOWN); 1404 + 1405 + spin_unlock(&iboe->lock); 1470 1406 return 0; 1471 1407 1472 - out: 1473 - kfree(work); 1474 - return ret; 1475 1408 } 1476 1409 1477 - static void handle_en_event(struct mlx4_ib_dev *dev, int port, unsigned long event) 1410 + static u8 mlx4_ib_get_dev_port(struct net_device *dev, 1411 + struct mlx4_ib_dev *ibdev) 1478 1412 { 1479 - switch (event) { 1480 - case NETDEV_UP: 1481 - case NETDEV_CHANGEADDR: 1482 - update_ipv6_gids(dev, port, 0); 1483 - break; 1413 + u8 port = 0; 1414 + struct mlx4_ib_iboe *iboe; 1415 + struct net_device *real_dev = rdma_vlan_dev_real_dev(dev) ? 1416 + rdma_vlan_dev_real_dev(dev) : dev; 1484 1417 1485 - case NETDEV_DOWN: 1486 - update_ipv6_gids(dev, port, 1); 1487 - dev->iboe.netdevs[port - 1] = NULL; 1488 - } 1418 + iboe = &ibdev->iboe; 1419 + spin_lock(&iboe->lock); 1420 + 1421 + for (port = 1; port <= MLX4_MAX_PORTS; ++port) 1422 + if ((netif_is_bond_master(real_dev) && 1423 + (real_dev == iboe->masters[port - 1])) || 1424 + (!netif_is_bond_master(real_dev) && 1425 + (real_dev == iboe->netdevs[port - 1]))) 1426 + break; 1427 + 1428 + spin_unlock(&iboe->lock); 1429 + 1430 + if ((port == 0) || (port > MLX4_MAX_PORTS)) 1431 + return 0; 1432 + else 1433 + return port; 1489 1434 } 1490 1435 1491 - static void netdev_added(struct mlx4_ib_dev *dev, int port) 1492 - { 1493 - update_ipv6_gids(dev, port, 0); 1494 - } 1495 - 1496 - static void netdev_removed(struct mlx4_ib_dev *dev, int port) 1497 - { 1498 - update_ipv6_gids(dev, port, 1); 1499 - } 1500 - 1501 - static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event, 1436 + static int mlx4_ib_inet_event(struct notifier_block *this, unsigned long event, 1502 1437 void *ptr) 1438 + { 1439 + struct mlx4_ib_dev *ibdev; 1440 + struct in_ifaddr *ifa = ptr; 1441 + union ib_gid gid; 1442 + struct net_device *event_netdev = ifa->ifa_dev->dev; 1443 + 1444 + ipv6_addr_set_v4mapped(ifa->ifa_address, (struct in6_addr *)&gid); 1445 + 1446 + ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb_inet); 1447 + 1448 + mlx4_ib_addr_event(event, event_netdev, ibdev, &gid); 1449 + return NOTIFY_DONE; 1450 + } 1451 + 1452 + #if IS_ENABLED(CONFIG_IPV6) 1453 + static int mlx4_ib_inet6_event(struct notifier_block *this, unsigned long event, 1454 + void *ptr) 1455 + { 1456 + struct mlx4_ib_dev *ibdev; 1457 + struct inet6_ifaddr *ifa = ptr; 1458 + union ib_gid *gid = (union ib_gid *)&ifa->addr; 1459 + struct net_device *event_netdev = ifa->idev->dev; 1460 + 1461 + ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb_inet6); 1462 + 1463 + mlx4_ib_addr_event(event, event_netdev, ibdev, gid); 1464 + return NOTIFY_DONE; 1465 + } 1466 + #endif 1467 + 1468 + static void mlx4_ib_get_dev_addr(struct net_device *dev, 1469 + struct mlx4_ib_dev *ibdev, u8 port) 1470 + { 1471 + struct in_device *in_dev; 1472 + #if IS_ENABLED(CONFIG_IPV6) 1473 + struct inet6_dev *in6_dev; 1474 + union ib_gid *pgid; 1475 + struct inet6_ifaddr *ifp; 1476 + #endif 1477 + union ib_gid gid; 1478 + 1479 + 1480 + if ((port == 0) || (port > MLX4_MAX_PORTS)) 1481 + return; 1482 + 1483 + /* IPv4 gids */ 1484 + in_dev = in_dev_get(dev); 1485 + if (in_dev) { 1486 + for_ifa(in_dev) { 1487 + /*ifa->ifa_address;*/ 1488 + ipv6_addr_set_v4mapped(ifa->ifa_address, 1489 + (struct in6_addr *)&gid); 1490 + update_gid_table(ibdev, port, &gid, 0); 1491 + } 1492 + endfor_ifa(in_dev); 1493 + in_dev_put(in_dev); 1494 + } 1495 + #if IS_ENABLED(CONFIG_IPV6) 1496 + /* IPv6 gids */ 1497 + in6_dev = in6_dev_get(dev); 1498 + if (in6_dev) { 1499 + read_lock_bh(&in6_dev->lock); 1500 + list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { 1501 + pgid = (union ib_gid *)&ifp->addr; 1502 + update_gid_table(ibdev, port, pgid, 0); 1503 + } 1504 + read_unlock_bh(&in6_dev->lock); 1505 + in6_dev_put(in6_dev); 1506 + } 1507 + #endif 1508 + } 1509 + 1510 + static int mlx4_ib_init_gid_table(struct mlx4_ib_dev *ibdev) 1511 + { 1512 + struct net_device *dev; 1513 + 1514 + if (reset_gid_table(ibdev)) 1515 + return -1; 1516 + 1517 + read_lock(&dev_base_lock); 1518 + 1519 + for_each_netdev(&init_net, dev) { 1520 + u8 port = mlx4_ib_get_dev_port(dev, ibdev); 1521 + if (port) 1522 + mlx4_ib_get_dev_addr(dev, ibdev, port); 1523 + } 1524 + 1525 + read_unlock(&dev_base_lock); 1526 + 1527 + return 0; 1528 + } 1529 + 1530 + static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev) 1531 + { 1532 + struct mlx4_ib_iboe *iboe; 1533 + int port; 1534 + 1535 + iboe = &ibdev->iboe; 1536 + 1537 + spin_lock(&iboe->lock); 1538 + mlx4_foreach_ib_transport_port(port, ibdev->dev) { 1539 + struct net_device *old_master = iboe->masters[port - 1]; 1540 + struct net_device *curr_master; 1541 + iboe->netdevs[port - 1] = 1542 + mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port); 1543 + 1544 + if (iboe->netdevs[port - 1] && 1545 + netif_is_bond_slave(iboe->netdevs[port - 1])) { 1546 + rtnl_lock(); 1547 + iboe->masters[port - 1] = netdev_master_upper_dev_get( 1548 + iboe->netdevs[port - 1]); 1549 + rtnl_unlock(); 1550 + } 1551 + curr_master = iboe->masters[port - 1]; 1552 + 1553 + /* if bonding is used it is possible that we add it to masters 1554 + only after IP address is assigned to the net bonding 1555 + interface */ 1556 + if (curr_master && (old_master != curr_master)) 1557 + mlx4_ib_get_dev_addr(curr_master, ibdev, port); 1558 + } 1559 + 1560 + spin_unlock(&iboe->lock); 1561 + } 1562 + 1563 + static int mlx4_ib_netdev_event(struct notifier_block *this, 1564 + unsigned long event, void *ptr) 1503 1565 { 1504 1566 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1505 1567 struct mlx4_ib_dev *ibdev; 1506 - struct net_device *oldnd; 1507 - struct mlx4_ib_iboe *iboe; 1508 - int port; 1509 1568 1510 1569 if (!net_eq(dev_net(dev), &init_net)) 1511 1570 return NOTIFY_DONE; 1512 1571 1513 1572 ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb); 1514 - iboe = &ibdev->iboe; 1515 - 1516 - spin_lock(&iboe->lock); 1517 - mlx4_foreach_ib_transport_port(port, ibdev->dev) { 1518 - oldnd = iboe->netdevs[port - 1]; 1519 - iboe->netdevs[port - 1] = 1520 - mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port); 1521 - if (oldnd != iboe->netdevs[port - 1]) { 1522 - if (iboe->netdevs[port - 1]) 1523 - netdev_added(ibdev, port); 1524 - else 1525 - netdev_removed(ibdev, port); 1526 - } 1527 - } 1528 - 1529 - if (dev == iboe->netdevs[0] || 1530 - (iboe->netdevs[0] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[0])) 1531 - handle_en_event(ibdev, 1, event); 1532 - else if (dev == iboe->netdevs[1] 1533 - || (iboe->netdevs[1] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[1])) 1534 - handle_en_event(ibdev, 2, event); 1535 - 1536 - spin_unlock(&iboe->lock); 1573 + mlx4_ib_scan_netdevs(ibdev); 1537 1574 1538 1575 return NOTIFY_DONE; 1539 1576 } ··· 1959 1682 } 1960 1683 1961 1684 if (check_flow_steering_support(dev)) { 1685 + ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED; 1962 1686 ibdev->ib_dev.create_flow = mlx4_ib_create_flow; 1963 1687 ibdev->ib_dev.destroy_flow = mlx4_ib_destroy_flow; 1964 1688 ··· 1988 1710 spin_lock_init(&ibdev->sm_lock); 1989 1711 mutex_init(&ibdev->cap_mask_mutex); 1990 1712 1713 + if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) { 1714 + ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS; 1715 + err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count, 1716 + MLX4_IB_UC_STEER_QPN_ALIGN, 1717 + &ibdev->steer_qpn_base); 1718 + if (err) 1719 + goto err_counter; 1720 + 1721 + ibdev->ib_uc_qpns_bitmap = 1722 + kmalloc(BITS_TO_LONGS(ibdev->steer_qpn_count) * 1723 + sizeof(long), 1724 + GFP_KERNEL); 1725 + if (!ibdev->ib_uc_qpns_bitmap) { 1726 + dev_err(&dev->pdev->dev, "bit map alloc failed\n"); 1727 + goto err_steer_qp_release; 1728 + } 1729 + 1730 + bitmap_zero(ibdev->ib_uc_qpns_bitmap, ibdev->steer_qpn_count); 1731 + 1732 + err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE( 1733 + dev, ibdev->steer_qpn_base, 1734 + ibdev->steer_qpn_base + 1735 + ibdev->steer_qpn_count - 1); 1736 + if (err) 1737 + goto err_steer_free_bitmap; 1738 + } 1739 + 1991 1740 if (ib_register_device(&ibdev->ib_dev, NULL)) 1992 - goto err_counter; 1741 + goto err_steer_free_bitmap; 1993 1742 1994 1743 if (mlx4_ib_mad_init(ibdev)) 1995 1744 goto err_reg; ··· 2024 1719 if (mlx4_ib_init_sriov(ibdev)) 2025 1720 goto err_mad; 2026 1721 2027 - if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE && !iboe->nb.notifier_call) { 2028 - iboe->nb.notifier_call = mlx4_ib_netdev_event; 2029 - err = register_netdevice_notifier(&iboe->nb); 2030 - if (err) 2031 - goto err_sriov; 1722 + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) { 1723 + if (!iboe->nb.notifier_call) { 1724 + iboe->nb.notifier_call = mlx4_ib_netdev_event; 1725 + err = register_netdevice_notifier(&iboe->nb); 1726 + if (err) { 1727 + iboe->nb.notifier_call = NULL; 1728 + goto err_notif; 1729 + } 1730 + } 1731 + if (!iboe->nb_inet.notifier_call) { 1732 + iboe->nb_inet.notifier_call = mlx4_ib_inet_event; 1733 + err = register_inetaddr_notifier(&iboe->nb_inet); 1734 + if (err) { 1735 + iboe->nb_inet.notifier_call = NULL; 1736 + goto err_notif; 1737 + } 1738 + } 1739 + #if IS_ENABLED(CONFIG_IPV6) 1740 + if (!iboe->nb_inet6.notifier_call) { 1741 + iboe->nb_inet6.notifier_call = mlx4_ib_inet6_event; 1742 + err = register_inet6addr_notifier(&iboe->nb_inet6); 1743 + if (err) { 1744 + iboe->nb_inet6.notifier_call = NULL; 1745 + goto err_notif; 1746 + } 1747 + } 1748 + #endif 1749 + mlx4_ib_scan_netdevs(ibdev); 1750 + mlx4_ib_init_gid_table(ibdev); 2032 1751 } 2033 1752 2034 1753 for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) { ··· 2078 1749 return ibdev; 2079 1750 2080 1751 err_notif: 2081 - if (unregister_netdevice_notifier(&ibdev->iboe.nb)) 2082 - pr_warn("failure unregistering notifier\n"); 1752 + if (ibdev->iboe.nb.notifier_call) { 1753 + if (unregister_netdevice_notifier(&ibdev->iboe.nb)) 1754 + pr_warn("failure unregistering notifier\n"); 1755 + ibdev->iboe.nb.notifier_call = NULL; 1756 + } 1757 + if (ibdev->iboe.nb_inet.notifier_call) { 1758 + if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet)) 1759 + pr_warn("failure unregistering notifier\n"); 1760 + ibdev->iboe.nb_inet.notifier_call = NULL; 1761 + } 1762 + #if IS_ENABLED(CONFIG_IPV6) 1763 + if (ibdev->iboe.nb_inet6.notifier_call) { 1764 + if (unregister_inet6addr_notifier(&ibdev->iboe.nb_inet6)) 1765 + pr_warn("failure unregistering notifier\n"); 1766 + ibdev->iboe.nb_inet6.notifier_call = NULL; 1767 + } 1768 + #endif 2083 1769 flush_workqueue(wq); 2084 1770 2085 - err_sriov: 2086 1771 mlx4_ib_close_sriov(ibdev); 2087 1772 2088 1773 err_mad: ··· 2105 1762 err_reg: 2106 1763 ib_unregister_device(&ibdev->ib_dev); 2107 1764 1765 + err_steer_free_bitmap: 1766 + kfree(ibdev->ib_uc_qpns_bitmap); 1767 + 1768 + err_steer_qp_release: 1769 + if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) 1770 + mlx4_qp_release_range(dev, ibdev->steer_qpn_base, 1771 + ibdev->steer_qpn_count); 2108 1772 err_counter: 2109 1773 for (; i; --i) 2110 1774 if (ibdev->counters[i - 1] != -1) ··· 2132 1782 return NULL; 2133 1783 } 2134 1784 1785 + int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn) 1786 + { 1787 + int offset; 1788 + 1789 + WARN_ON(!dev->ib_uc_qpns_bitmap); 1790 + 1791 + offset = bitmap_find_free_region(dev->ib_uc_qpns_bitmap, 1792 + dev->steer_qpn_count, 1793 + get_count_order(count)); 1794 + if (offset < 0) 1795 + return offset; 1796 + 1797 + *qpn = dev->steer_qpn_base + offset; 1798 + return 0; 1799 + } 1800 + 1801 + void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count) 1802 + { 1803 + if (!qpn || 1804 + dev->steering_support != MLX4_STEERING_MODE_DEVICE_MANAGED) 1805 + return; 1806 + 1807 + BUG_ON(qpn < dev->steer_qpn_base); 1808 + 1809 + bitmap_release_region(dev->ib_uc_qpns_bitmap, 1810 + qpn - dev->steer_qpn_base, 1811 + get_count_order(count)); 1812 + } 1813 + 1814 + int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, 1815 + int is_attach) 1816 + { 1817 + int err; 1818 + size_t flow_size; 1819 + struct ib_flow_attr *flow = NULL; 1820 + struct ib_flow_spec_ib *ib_spec; 1821 + 1822 + if (is_attach) { 1823 + flow_size = sizeof(struct ib_flow_attr) + 1824 + sizeof(struct ib_flow_spec_ib); 1825 + flow = kzalloc(flow_size, GFP_KERNEL); 1826 + if (!flow) 1827 + return -ENOMEM; 1828 + flow->port = mqp->port; 1829 + flow->num_of_specs = 1; 1830 + flow->size = flow_size; 1831 + ib_spec = (struct ib_flow_spec_ib *)(flow + 1); 1832 + ib_spec->type = IB_FLOW_SPEC_IB; 1833 + ib_spec->size = sizeof(struct ib_flow_spec_ib); 1834 + /* Add an empty rule for IB L2 */ 1835 + memset(&ib_spec->mask, 0, sizeof(ib_spec->mask)); 1836 + 1837 + err = __mlx4_ib_create_flow(&mqp->ibqp, flow, 1838 + IB_FLOW_DOMAIN_NIC, 1839 + MLX4_FS_REGULAR, 1840 + &mqp->reg_id); 1841 + } else { 1842 + err = __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id); 1843 + } 1844 + kfree(flow); 1845 + return err; 1846 + } 1847 + 2135 1848 static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) 2136 1849 { 2137 1850 struct mlx4_ib_dev *ibdev = ibdev_ptr; ··· 2208 1795 pr_warn("failure unregistering notifier\n"); 2209 1796 ibdev->iboe.nb.notifier_call = NULL; 2210 1797 } 1798 + 1799 + if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) { 1800 + mlx4_qp_release_range(dev, ibdev->steer_qpn_base, 1801 + ibdev->steer_qpn_count); 1802 + kfree(ibdev->ib_uc_qpns_bitmap); 1803 + } 1804 + 1805 + if (ibdev->iboe.nb_inet.notifier_call) { 1806 + if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet)) 1807 + pr_warn("failure unregistering notifier\n"); 1808 + ibdev->iboe.nb_inet.notifier_call = NULL; 1809 + } 1810 + #if IS_ENABLED(CONFIG_IPV6) 1811 + if (ibdev->iboe.nb_inet6.notifier_call) { 1812 + if (unregister_inet6addr_notifier(&ibdev->iboe.nb_inet6)) 1813 + pr_warn("failure unregistering notifier\n"); 1814 + ibdev->iboe.nb_inet6.notifier_call = NULL; 1815 + } 1816 + #endif 1817 + 2211 1818 iounmap(ibdev->uar_map); 2212 1819 for (p = 0; p < ibdev->num_ports; ++p) 2213 1820 if (ibdev->counters[p] != -1)
+15 -3
drivers/infiniband/hw/mlx4/mlx4_ib.h
··· 68 68 /*module param to indicate if SM assigns the alias_GUID*/ 69 69 extern int mlx4_ib_sm_guid_assign; 70 70 71 + #define MLX4_IB_UC_STEER_QPN_ALIGN 1 72 + #define MLX4_IB_UC_MAX_NUM_QPS 256 71 73 struct mlx4_ib_ucontext { 72 74 struct ib_ucontext ibucontext; 73 75 struct mlx4_uar uar; ··· 155 153 enum mlx4_ib_qp_flags { 156 154 MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO, 157 155 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK, 156 + MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP, 158 157 MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30, 159 158 MLX4_IB_SRIOV_SQP = 1 << 31, 160 159 }; ··· 273 270 struct list_head gid_list; 274 271 struct list_head steering_rules; 275 272 struct mlx4_ib_buf *sqp_proxy_rcv; 273 + u64 reg_id; 276 274 277 275 }; 278 276 ··· 432 428 struct mlx4_ib_iboe { 433 429 spinlock_t lock; 434 430 struct net_device *netdevs[MLX4_MAX_PORTS]; 431 + struct net_device *masters[MLX4_MAX_PORTS]; 435 432 struct notifier_block nb; 433 + struct notifier_block nb_inet; 434 + struct notifier_block nb_inet6; 436 435 union ib_gid gid_table[MLX4_MAX_PORTS][128]; 437 436 }; 438 437 ··· 501 494 struct kobject *dev_ports_parent[MLX4_MFUNC_MAX]; 502 495 struct mlx4_ib_iov_port iov_ports[MLX4_MAX_PORTS]; 503 496 struct pkey_mgt pkeys; 497 + unsigned long *ib_uc_qpns_bitmap; 498 + int steer_qpn_count; 499 + int steer_qpn_base; 500 + int steering_support; 504 501 }; 505 502 506 503 struct ib_event_work { ··· 686 675 int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, 687 676 union ib_gid *gid, int netw_view); 688 677 689 - int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr, 690 - u8 *mac, int *is_mcast, u8 port); 691 - 692 678 static inline bool mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah) 693 679 { 694 680 u8 port = be32_to_cpu(ah->av.ib.port_pd) >> 24 & 3; ··· 760 752 761 753 __be64 mlx4_ib_gen_node_guid(void); 762 754 755 + int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn); 756 + void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count); 757 + int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, 758 + int is_attach); 763 759 764 760 #endif /* MLX4_IB_H */
+130 -27
drivers/infiniband/hw/mlx4/qp.c
··· 90 90 MLX4_RAW_QP_MSGMAX = 31, 91 91 }; 92 92 93 + #ifndef ETH_ALEN 94 + #define ETH_ALEN 6 95 + #endif 96 + static inline u64 mlx4_mac_to_u64(u8 *addr) 97 + { 98 + u64 mac = 0; 99 + int i; 100 + 101 + for (i = 0; i < ETH_ALEN; i++) { 102 + mac <<= 8; 103 + mac |= addr[i]; 104 + } 105 + return mac; 106 + } 107 + 93 108 static const __be32 mlx4_ib_opcode[] = { 94 109 [IB_WR_SEND] = cpu_to_be32(MLX4_OPCODE_SEND), 95 110 [IB_WR_LSO] = cpu_to_be32(MLX4_OPCODE_LSO), ··· 731 716 if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) 732 717 qp->flags |= MLX4_IB_QP_LSO; 733 718 719 + if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) { 720 + if (dev->steering_support == 721 + MLX4_STEERING_MODE_DEVICE_MANAGED) 722 + qp->flags |= MLX4_IB_QP_NETIF; 723 + else 724 + goto err; 725 + } 726 + 734 727 err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp); 735 728 if (err) 736 729 goto err; ··· 788 765 if (init_attr->qp_type == IB_QPT_RAW_PACKET) 789 766 err = mlx4_qp_reserve_range(dev->dev, 1, 1 << 8, &qpn); 790 767 else 791 - err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn); 768 + if (qp->flags & MLX4_IB_QP_NETIF) 769 + err = mlx4_ib_steer_qp_alloc(dev, 1, &qpn); 770 + else 771 + err = mlx4_qp_reserve_range(dev->dev, 1, 1, 772 + &qpn); 792 773 if (err) 793 774 goto err_proxy; 794 775 } ··· 817 790 return 0; 818 791 819 792 err_qpn: 820 - if (!sqpn) 821 - mlx4_qp_release_range(dev->dev, qpn, 1); 793 + if (!sqpn) { 794 + if (qp->flags & MLX4_IB_QP_NETIF) 795 + mlx4_ib_steer_qp_free(dev, qpn, 1); 796 + else 797 + mlx4_qp_release_range(dev->dev, qpn, 1); 798 + } 822 799 err_proxy: 823 800 if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI) 824 801 free_proxy_bufs(pd->device, qp); ··· 963 932 964 933 mlx4_qp_free(dev->dev, &qp->mqp); 965 934 966 - if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp)) 967 - mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1); 935 + if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp)) { 936 + if (qp->flags & MLX4_IB_QP_NETIF) 937 + mlx4_ib_steer_qp_free(dev, qp->mqp.qpn, 1); 938 + else 939 + mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1); 940 + } 968 941 969 942 mlx4_mtt_cleanup(dev->dev, &qp->mtt); 970 943 ··· 1022 987 */ 1023 988 if (init_attr->create_flags & ~(MLX4_IB_QP_LSO | 1024 989 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK | 1025 - MLX4_IB_SRIOV_TUNNEL_QP | MLX4_IB_SRIOV_SQP)) 990 + MLX4_IB_SRIOV_TUNNEL_QP | 991 + MLX4_IB_SRIOV_SQP | 992 + MLX4_IB_QP_NETIF)) 1026 993 return ERR_PTR(-EINVAL); 994 + 995 + if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) { 996 + if (init_attr->qp_type != IB_QPT_UD) 997 + return ERR_PTR(-EINVAL); 998 + } 1027 999 1028 1000 if (init_attr->create_flags && 1029 1001 (udata || ··· 1186 1144 path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6); 1187 1145 } 1188 1146 1189 - static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, 1190 - struct mlx4_qp_path *path, u8 port) 1147 + static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, 1148 + u64 smac, u16 vlan_tag, struct mlx4_qp_path *path, 1149 + u8 port) 1191 1150 { 1192 - int err; 1193 1151 int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port) == 1194 1152 IB_LINK_LAYER_ETHERNET; 1195 - u8 mac[6]; 1196 - int is_mcast; 1197 - u16 vlan_tag; 1198 1153 int vidx; 1154 + int smac_index; 1155 + 1199 1156 1200 1157 path->grh_mylmc = ah->src_path_bits & 0x7f; 1201 1158 path->rlid = cpu_to_be16(ah->dlid); ··· 1229 1188 if (!(ah->ah_flags & IB_AH_GRH)) 1230 1189 return -1; 1231 1190 1232 - err = mlx4_ib_resolve_grh(dev, ah, mac, &is_mcast, port); 1233 - if (err) 1234 - return err; 1235 - 1236 - memcpy(path->dmac, mac, 6); 1191 + memcpy(path->dmac, ah->dmac, ETH_ALEN); 1237 1192 path->ackto = MLX4_IB_LINK_TYPE_ETH; 1238 - /* use index 0 into MAC table for IBoE */ 1239 - path->grh_mylmc &= 0x80; 1193 + /* find the index into MAC table for IBoE */ 1194 + if (!is_zero_ether_addr((const u8 *)&smac)) { 1195 + if (mlx4_find_cached_mac(dev->dev, port, smac, 1196 + &smac_index)) 1197 + return -ENOENT; 1198 + } else { 1199 + smac_index = 0; 1200 + } 1240 1201 1241 - vlan_tag = rdma_get_vlan_id(&dev->iboe.gid_table[port - 1][ah->grh.sgid_index]); 1202 + path->grh_mylmc &= 0x80 | smac_index; 1203 + 1204 + path->feup |= MLX4_FEUP_FORCE_ETH_UP; 1242 1205 if (vlan_tag < 0x1000) { 1243 1206 if (mlx4_find_cached_vlan(dev->dev, port, vlan_tag, &vidx)) 1244 1207 return -ENOENT; 1245 1208 1246 1209 path->vlan_index = vidx; 1247 1210 path->fl = 1 << 6; 1211 + path->feup |= MLX4_FVL_FORCE_ETH_VLAN; 1248 1212 } 1249 1213 } else 1250 1214 path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | 1251 1215 ((port - 1) << 6) | ((ah->sl & 0xf) << 2); 1252 1216 1253 1217 return 0; 1218 + } 1219 + 1220 + static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_qp_attr *qp, 1221 + enum ib_qp_attr_mask qp_attr_mask, 1222 + struct mlx4_qp_path *path, u8 port) 1223 + { 1224 + return _mlx4_set_path(dev, &qp->ah_attr, 1225 + mlx4_mac_to_u64((u8 *)qp->smac), 1226 + (qp_attr_mask & IB_QP_VID) ? qp->vlan_id : 0xffff, 1227 + path, port); 1228 + } 1229 + 1230 + static int mlx4_set_alt_path(struct mlx4_ib_dev *dev, 1231 + const struct ib_qp_attr *qp, 1232 + enum ib_qp_attr_mask qp_attr_mask, 1233 + struct mlx4_qp_path *path, u8 port) 1234 + { 1235 + return _mlx4_set_path(dev, &qp->alt_ah_attr, 1236 + mlx4_mac_to_u64((u8 *)qp->alt_smac), 1237 + (qp_attr_mask & IB_QP_ALT_VID) ? 1238 + qp->alt_vlan_id : 0xffff, 1239 + path, port); 1254 1240 } 1255 1241 1256 1242 static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) ··· 1303 1235 struct mlx4_qp_context *context; 1304 1236 enum mlx4_qp_optpar optpar = 0; 1305 1237 int sqd_event; 1238 + int steer_qp = 0; 1306 1239 int err = -EINVAL; 1307 1240 1308 1241 context = kzalloc(sizeof *context, GFP_KERNEL); ··· 1388 1319 optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX; 1389 1320 } else 1390 1321 context->pri_path.counter_index = 0xff; 1322 + 1323 + if (qp->flags & MLX4_IB_QP_NETIF) { 1324 + mlx4_ib_steer_qp_reg(dev, qp, 1); 1325 + steer_qp = 1; 1326 + } 1391 1327 } 1392 1328 1393 1329 if (attr_mask & IB_QP_PKEY_INDEX) { ··· 1403 1329 } 1404 1330 1405 1331 if (attr_mask & IB_QP_AV) { 1406 - if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path, 1332 + if (mlx4_set_path(dev, attr, attr_mask, &context->pri_path, 1407 1333 attr_mask & IB_QP_PORT ? 1408 1334 attr->port_num : qp->port)) 1409 1335 goto out; ··· 1426 1352 dev->dev->caps.pkey_table_len[attr->alt_port_num]) 1427 1353 goto out; 1428 1354 1429 - if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->alt_path, 1430 - attr->alt_port_num)) 1355 + if (mlx4_set_alt_path(dev, attr, attr_mask, &context->alt_path, 1356 + attr->alt_port_num)) 1431 1357 goto out; 1432 1358 1433 1359 context->alt_path.pkey_index = attr->alt_pkey_index; ··· 1538 1464 context->pri_path.ackto = (context->pri_path.ackto & 0xf8) | 1539 1465 MLX4_IB_LINK_TYPE_ETH; 1540 1466 1467 + if (ibqp->qp_type == IB_QPT_UD && (new_state == IB_QPS_RTR)) { 1468 + int is_eth = rdma_port_get_link_layer( 1469 + &dev->ib_dev, qp->port) == 1470 + IB_LINK_LAYER_ETHERNET; 1471 + if (is_eth) { 1472 + context->pri_path.ackto = MLX4_IB_LINK_TYPE_ETH; 1473 + optpar |= MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH; 1474 + } 1475 + } 1476 + 1477 + 1541 1478 if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD && 1542 1479 attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify) 1543 1480 sqd_event = 1; ··· 1632 1547 qp->sq_next_wqe = 0; 1633 1548 if (qp->rq.wqe_cnt) 1634 1549 *qp->db.db = 0; 1550 + 1551 + if (qp->flags & MLX4_IB_QP_NETIF) 1552 + mlx4_ib_steer_qp_reg(dev, qp, 0); 1635 1553 } 1636 1554 1637 1555 out: 1556 + if (err && steer_qp) 1557 + mlx4_ib_steer_qp_reg(dev, qp, 0); 1638 1558 kfree(context); 1639 1559 return err; 1640 1560 } ··· 1651 1561 struct mlx4_ib_qp *qp = to_mqp(ibqp); 1652 1562 enum ib_qp_state cur_state, new_state; 1653 1563 int err = -EINVAL; 1654 - 1564 + int ll; 1655 1565 mutex_lock(&qp->mutex); 1656 1566 1657 1567 cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; 1658 1568 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; 1659 1569 1660 - if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) { 1570 + if (cur_state == new_state && cur_state == IB_QPS_RESET) { 1571 + ll = IB_LINK_LAYER_UNSPECIFIED; 1572 + } else { 1573 + int port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; 1574 + ll = rdma_port_get_link_layer(&dev->ib_dev, port); 1575 + } 1576 + 1577 + if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, 1578 + attr_mask, ll)) { 1661 1579 pr_debug("qpn 0x%x: invalid attribute mask specified " 1662 1580 "for transition %d to %d. qp_type %d," 1663 1581 " attr_mask 0x%x\n", ··· 1882 1784 return err; 1883 1785 } 1884 1786 1885 - vlan = rdma_get_vlan_id(&sgid); 1886 - is_vlan = vlan < 0x1000; 1787 + if (ah->av.eth.vlan != 0xffff) { 1788 + vlan = be16_to_cpu(ah->av.eth.vlan) & 0x0fff; 1789 + is_vlan = 1; 1790 + } 1887 1791 } 1888 1792 ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, &sqp->ud_header); 1889 1793 ··· 2861 2761 2862 2762 if (qp->flags & MLX4_IB_QP_LSO) 2863 2763 qp_init_attr->create_flags |= IB_QP_CREATE_IPOIB_UD_LSO; 2764 + 2765 + if (qp->flags & MLX4_IB_QP_NETIF) 2766 + qp_init_attr->create_flags |= IB_QP_CREATE_NETIF_QP; 2864 2767 2865 2768 qp_init_attr->sq_sig_type = 2866 2769 qp->sq_signal_bits == cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) ?
+6 -2
drivers/infiniband/hw/mlx4/sysfs.c
··· 582 582 p->pkey_group.attrs = 583 583 alloc_group_attrs(show_port_pkey, store_port_pkey, 584 584 dev->dev->caps.pkey_table_len[port_num]); 585 - if (!p->pkey_group.attrs) 585 + if (!p->pkey_group.attrs) { 586 + ret = -ENOMEM; 586 587 goto err_alloc; 588 + } 587 589 588 590 ret = sysfs_create_group(&p->kobj, &p->pkey_group); 589 591 if (ret) ··· 593 591 594 592 p->gid_group.name = "gid_idx"; 595 593 p->gid_group.attrs = alloc_group_attrs(show_port_gid_idx, NULL, 1); 596 - if (!p->gid_group.attrs) 594 + if (!p->gid_group.attrs) { 595 + ret = -ENOMEM; 597 596 goto err_free_pkey; 597 + } 598 598 599 599 ret = sysfs_create_group(&p->kobj, &p->gid_group); 600 600 if (ret)
+293 -17
drivers/infiniband/hw/mlx5/cq.c
··· 73 73 return get_cqe_from_buf(&cq->buf, n, cq->mcq.cqe_sz); 74 74 } 75 75 76 + static u8 sw_ownership_bit(int n, int nent) 77 + { 78 + return (n & nent) ? 1 : 0; 79 + } 80 + 76 81 static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n) 77 82 { 78 83 void *cqe = get_cqe(cq, n & cq->ibcq.cqe); 79 84 struct mlx5_cqe64 *cqe64; 80 85 81 86 cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; 82 - return ((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ 83 - !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe; 87 + 88 + if (likely((cqe64->op_own) >> 4 != MLX5_CQE_INVALID) && 89 + !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1)))) { 90 + return cqe; 91 + } else { 92 + return NULL; 93 + } 84 94 } 85 95 86 96 static void *next_cqe_sw(struct mlx5_ib_cq *cq) ··· 361 351 qp->sq.last_poll = tail; 362 352 } 363 353 354 + static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf) 355 + { 356 + mlx5_buf_free(&dev->mdev, &buf->buf); 357 + } 358 + 364 359 static int mlx5_poll_one(struct mlx5_ib_cq *cq, 365 360 struct mlx5_ib_qp **cur_qp, 366 361 struct ib_wc *wc) ··· 381 366 void *cqe; 382 367 int idx; 383 368 369 + repoll: 384 370 cqe = next_cqe_sw(cq); 385 371 if (!cqe) 386 372 return -EAGAIN; ··· 395 379 */ 396 380 rmb(); 397 381 398 - /* TBD: resize CQ */ 382 + opcode = cqe64->op_own >> 4; 383 + if (unlikely(opcode == MLX5_CQE_RESIZE_CQ)) { 384 + if (likely(cq->resize_buf)) { 385 + free_cq_buf(dev, &cq->buf); 386 + cq->buf = *cq->resize_buf; 387 + kfree(cq->resize_buf); 388 + cq->resize_buf = NULL; 389 + goto repoll; 390 + } else { 391 + mlx5_ib_warn(dev, "unexpected resize cqe\n"); 392 + } 393 + } 399 394 400 395 qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff; 401 396 if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) { ··· 425 398 } 426 399 427 400 wc->qp = &(*cur_qp)->ibqp; 428 - opcode = cqe64->op_own >> 4; 429 401 switch (opcode) { 430 402 case MLX5_CQE_REQ: 431 403 wq = &(*cur_qp)->sq; ··· 529 503 return err; 530 504 531 505 buf->cqe_size = cqe_size; 506 + buf->nent = nent; 532 507 533 508 return 0; 534 - } 535 - 536 - static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf) 537 - { 538 - mlx5_buf_free(&dev->mdev, &buf->buf); 539 509 } 540 510 541 511 static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, ··· 598 576 ib_umem_release(cq->buf.umem); 599 577 } 600 578 601 - static void init_cq_buf(struct mlx5_ib_cq *cq, int nent) 579 + static void init_cq_buf(struct mlx5_ib_cq *cq, struct mlx5_ib_cq_buf *buf) 602 580 { 603 581 int i; 604 582 void *cqe; 605 583 struct mlx5_cqe64 *cqe64; 606 584 607 - for (i = 0; i < nent; i++) { 608 - cqe = get_cqe(cq, i); 609 - cqe64 = (cq->buf.cqe_size == 64) ? cqe : cqe + 64; 610 - cqe64->op_own = 0xf1; 585 + for (i = 0; i < buf->nent; i++) { 586 + cqe = get_cqe_from_buf(buf, i, buf->cqe_size); 587 + cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64; 588 + cqe64->op_own = MLX5_CQE_INVALID << 4; 611 589 } 612 590 } 613 591 ··· 632 610 if (err) 633 611 goto err_db; 634 612 635 - init_cq_buf(cq, entries); 613 + init_cq_buf(cq, &cq->buf); 636 614 637 615 *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * cq->buf.buf.npages; 638 616 *cqb = mlx5_vzalloc(*inlen); ··· 840 818 841 819 int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) 842 820 { 843 - return -ENOSYS; 821 + struct mlx5_modify_cq_mbox_in *in; 822 + struct mlx5_ib_dev *dev = to_mdev(cq->device); 823 + struct mlx5_ib_cq *mcq = to_mcq(cq); 824 + int err; 825 + u32 fsel; 826 + 827 + if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_CQ_MODER)) 828 + return -ENOSYS; 829 + 830 + in = kzalloc(sizeof(*in), GFP_KERNEL); 831 + if (!in) 832 + return -ENOMEM; 833 + 834 + in->cqn = cpu_to_be32(mcq->mcq.cqn); 835 + fsel = (MLX5_CQ_MODIFY_PERIOD | MLX5_CQ_MODIFY_COUNT); 836 + in->ctx.cq_period = cpu_to_be16(cq_period); 837 + in->ctx.cq_max_count = cpu_to_be16(cq_count); 838 + in->field_select = cpu_to_be32(fsel); 839 + err = mlx5_core_modify_cq(&dev->mdev, &mcq->mcq, in, sizeof(*in)); 840 + kfree(in); 841 + 842 + if (err) 843 + mlx5_ib_warn(dev, "modify cq 0x%x failed\n", mcq->mcq.cqn); 844 + 845 + return err; 846 + } 847 + 848 + static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, 849 + int entries, struct ib_udata *udata, int *npas, 850 + int *page_shift, int *cqe_size) 851 + { 852 + struct mlx5_ib_resize_cq ucmd; 853 + struct ib_umem *umem; 854 + int err; 855 + int npages; 856 + struct ib_ucontext *context = cq->buf.umem->context; 857 + 858 + err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); 859 + if (err) 860 + return err; 861 + 862 + if (ucmd.reserved0 || ucmd.reserved1) 863 + return -EINVAL; 864 + 865 + umem = ib_umem_get(context, ucmd.buf_addr, entries * ucmd.cqe_size, 866 + IB_ACCESS_LOCAL_WRITE, 1); 867 + if (IS_ERR(umem)) { 868 + err = PTR_ERR(umem); 869 + return err; 870 + } 871 + 872 + mlx5_ib_cont_pages(umem, ucmd.buf_addr, &npages, page_shift, 873 + npas, NULL); 874 + 875 + cq->resize_umem = umem; 876 + *cqe_size = ucmd.cqe_size; 877 + 878 + return 0; 879 + } 880 + 881 + static void un_resize_user(struct mlx5_ib_cq *cq) 882 + { 883 + ib_umem_release(cq->resize_umem); 884 + } 885 + 886 + static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, 887 + int entries, int cqe_size) 888 + { 889 + int err; 890 + 891 + cq->resize_buf = kzalloc(sizeof(*cq->resize_buf), GFP_KERNEL); 892 + if (!cq->resize_buf) 893 + return -ENOMEM; 894 + 895 + err = alloc_cq_buf(dev, cq->resize_buf, entries, cqe_size); 896 + if (err) 897 + goto ex; 898 + 899 + init_cq_buf(cq, cq->resize_buf); 900 + 901 + return 0; 902 + 903 + ex: 904 + kfree(cq->resize_buf); 905 + return err; 906 + } 907 + 908 + static void un_resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq) 909 + { 910 + free_cq_buf(dev, cq->resize_buf); 911 + cq->resize_buf = NULL; 912 + } 913 + 914 + static int copy_resize_cqes(struct mlx5_ib_cq *cq) 915 + { 916 + struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device); 917 + struct mlx5_cqe64 *scqe64; 918 + struct mlx5_cqe64 *dcqe64; 919 + void *start_cqe; 920 + void *scqe; 921 + void *dcqe; 922 + int ssize; 923 + int dsize; 924 + int i; 925 + u8 sw_own; 926 + 927 + ssize = cq->buf.cqe_size; 928 + dsize = cq->resize_buf->cqe_size; 929 + if (ssize != dsize) { 930 + mlx5_ib_warn(dev, "resize from different cqe size is not supported\n"); 931 + return -EINVAL; 932 + } 933 + 934 + i = cq->mcq.cons_index; 935 + scqe = get_sw_cqe(cq, i); 936 + scqe64 = ssize == 64 ? scqe : scqe + 64; 937 + start_cqe = scqe; 938 + if (!scqe) { 939 + mlx5_ib_warn(dev, "expected cqe in sw ownership\n"); 940 + return -EINVAL; 941 + } 942 + 943 + while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) { 944 + dcqe = get_cqe_from_buf(cq->resize_buf, 945 + (i + 1) & (cq->resize_buf->nent), 946 + dsize); 947 + dcqe64 = dsize == 64 ? dcqe : dcqe + 64; 948 + sw_own = sw_ownership_bit(i + 1, cq->resize_buf->nent); 949 + memcpy(dcqe, scqe, dsize); 950 + dcqe64->op_own = (dcqe64->op_own & ~MLX5_CQE_OWNER_MASK) | sw_own; 951 + 952 + ++i; 953 + scqe = get_sw_cqe(cq, i); 954 + scqe64 = ssize == 64 ? scqe : scqe + 64; 955 + if (!scqe) { 956 + mlx5_ib_warn(dev, "expected cqe in sw ownership\n"); 957 + return -EINVAL; 958 + } 959 + 960 + if (scqe == start_cqe) { 961 + pr_warn("resize CQ failed to get resize CQE, CQN 0x%x\n", 962 + cq->mcq.cqn); 963 + return -ENOMEM; 964 + } 965 + } 966 + ++cq->mcq.cons_index; 967 + return 0; 844 968 } 845 969 846 970 int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) 847 971 { 848 - return -ENOSYS; 972 + struct mlx5_ib_dev *dev = to_mdev(ibcq->device); 973 + struct mlx5_ib_cq *cq = to_mcq(ibcq); 974 + struct mlx5_modify_cq_mbox_in *in; 975 + int err; 976 + int npas; 977 + int page_shift; 978 + int inlen; 979 + int uninitialized_var(cqe_size); 980 + unsigned long flags; 981 + 982 + if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_RESIZE_CQ)) { 983 + pr_info("Firmware does not support resize CQ\n"); 984 + return -ENOSYS; 985 + } 986 + 987 + if (entries < 1) 988 + return -EINVAL; 989 + 990 + entries = roundup_pow_of_two(entries + 1); 991 + if (entries > dev->mdev.caps.max_cqes + 1) 992 + return -EINVAL; 993 + 994 + if (entries == ibcq->cqe + 1) 995 + return 0; 996 + 997 + mutex_lock(&cq->resize_mutex); 998 + if (udata) { 999 + err = resize_user(dev, cq, entries, udata, &npas, &page_shift, 1000 + &cqe_size); 1001 + } else { 1002 + cqe_size = 64; 1003 + err = resize_kernel(dev, cq, entries, cqe_size); 1004 + if (!err) { 1005 + npas = cq->resize_buf->buf.npages; 1006 + page_shift = cq->resize_buf->buf.page_shift; 1007 + } 1008 + } 1009 + 1010 + if (err) 1011 + goto ex; 1012 + 1013 + inlen = sizeof(*in) + npas * sizeof(in->pas[0]); 1014 + in = mlx5_vzalloc(inlen); 1015 + if (!in) { 1016 + err = -ENOMEM; 1017 + goto ex_resize; 1018 + } 1019 + 1020 + if (udata) 1021 + mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift, 1022 + in->pas, 0); 1023 + else 1024 + mlx5_fill_page_array(&cq->resize_buf->buf, in->pas); 1025 + 1026 + in->field_select = cpu_to_be32(MLX5_MODIFY_CQ_MASK_LOG_SIZE | 1027 + MLX5_MODIFY_CQ_MASK_PG_OFFSET | 1028 + MLX5_MODIFY_CQ_MASK_PG_SIZE); 1029 + in->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT; 1030 + in->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5; 1031 + in->ctx.page_offset = 0; 1032 + in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(entries) << 24); 1033 + in->hdr.opmod = cpu_to_be16(MLX5_CQ_OPMOD_RESIZE); 1034 + in->cqn = cpu_to_be32(cq->mcq.cqn); 1035 + 1036 + err = mlx5_core_modify_cq(&dev->mdev, &cq->mcq, in, inlen); 1037 + if (err) 1038 + goto ex_alloc; 1039 + 1040 + if (udata) { 1041 + cq->ibcq.cqe = entries - 1; 1042 + ib_umem_release(cq->buf.umem); 1043 + cq->buf.umem = cq->resize_umem; 1044 + cq->resize_umem = NULL; 1045 + } else { 1046 + struct mlx5_ib_cq_buf tbuf; 1047 + int resized = 0; 1048 + 1049 + spin_lock_irqsave(&cq->lock, flags); 1050 + if (cq->resize_buf) { 1051 + err = copy_resize_cqes(cq); 1052 + if (!err) { 1053 + tbuf = cq->buf; 1054 + cq->buf = *cq->resize_buf; 1055 + kfree(cq->resize_buf); 1056 + cq->resize_buf = NULL; 1057 + resized = 1; 1058 + } 1059 + } 1060 + cq->ibcq.cqe = entries - 1; 1061 + spin_unlock_irqrestore(&cq->lock, flags); 1062 + if (resized) 1063 + free_cq_buf(dev, &tbuf); 1064 + } 1065 + mutex_unlock(&cq->resize_mutex); 1066 + 1067 + mlx5_vfree(in); 1068 + return 0; 1069 + 1070 + ex_alloc: 1071 + mlx5_vfree(in); 1072 + 1073 + ex_resize: 1074 + if (udata) 1075 + un_resize_user(cq); 1076 + else 1077 + un_resize_kernel(dev, cq); 1078 + ex: 1079 + mutex_unlock(&cq->resize_mutex); 1080 + return err; 849 1081 } 850 1082 851 1083 int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq)
+8 -5
drivers/infiniband/hw/mlx5/main.c
··· 541 541 struct mlx5_ib_ucontext *context; 542 542 struct mlx5_uuar_info *uuari; 543 543 struct mlx5_uar *uars; 544 + int gross_uuars; 544 545 int num_uars; 545 546 int uuarn; 546 547 int err; ··· 560 559 if (req.total_num_uuars == 0) 561 560 return ERR_PTR(-EINVAL); 562 561 563 - req.total_num_uuars = ALIGN(req.total_num_uuars, MLX5_BF_REGS_PER_PAGE); 562 + req.total_num_uuars = ALIGN(req.total_num_uuars, 563 + MLX5_NON_FP_BF_REGS_PER_PAGE); 564 564 if (req.num_low_latency_uuars > req.total_num_uuars - 1) 565 565 return ERR_PTR(-EINVAL); 566 566 567 - num_uars = req.total_num_uuars / MLX5_BF_REGS_PER_PAGE; 567 + num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE; 568 + gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE; 568 569 resp.qp_tab_size = 1 << dev->mdev.caps.log_max_qp; 569 570 resp.bf_reg_size = dev->mdev.caps.bf_reg_size; 570 571 resp.cache_line_size = L1_CACHE_BYTES; ··· 588 585 goto out_ctx; 589 586 } 590 587 591 - uuari->bitmap = kcalloc(BITS_TO_LONGS(req.total_num_uuars), 588 + uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars), 592 589 sizeof(*uuari->bitmap), 593 590 GFP_KERNEL); 594 591 if (!uuari->bitmap) { ··· 598 595 /* 599 596 * clear all fast path uuars 600 597 */ 601 - for (i = 0; i < req.total_num_uuars; i++) { 598 + for (i = 0; i < gross_uuars; i++) { 602 599 uuarn = i & 3; 603 600 if (uuarn == 2 || uuarn == 3) 604 601 set_bit(i, uuari->bitmap); 605 602 } 606 603 607 - uuari->count = kcalloc(req.total_num_uuars, sizeof(*uuari->count), GFP_KERNEL); 604 + uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL); 608 605 if (!uuari->count) { 609 606 err = -ENOMEM; 610 607 goto out_bitmap;
+2 -2
drivers/infiniband/hw/mlx5/mlx5_ib.h
··· 195 195 struct mlx5_buf buf; 196 196 struct ib_umem *umem; 197 197 int cqe_size; 198 + int nent; 198 199 }; 199 200 200 201 enum mlx5_ib_qp_flags { ··· 221 220 /* protect resize cq 222 221 */ 223 222 struct mutex resize_mutex; 224 - struct mlx5_ib_cq_resize *resize_buf; 223 + struct mlx5_ib_cq_buf *resize_buf; 225 224 struct ib_umem *resize_umem; 226 225 int cqe_size; 227 226 }; ··· 265 264 enum ib_wc_status status; 266 265 struct mlx5_ib_dev *dev; 267 266 struct mlx5_create_mkey_mbox_out out; 268 - unsigned long start; 269 267 }; 270 268 271 269 struct mlx5_ib_fast_reg_page_list {
-1
drivers/infiniband/hw/mlx5/mr.c
··· 146 146 spin_lock_irq(&ent->lock); 147 147 ent->pending++; 148 148 spin_unlock_irq(&ent->lock); 149 - mr->start = jiffies; 150 149 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, 151 150 sizeof(*in), reg_mr_callback, 152 151 mr, &mr->out);
+93 -40
drivers/infiniband/hw/mlx5/qp.c
··· 340 340 return 1; 341 341 } 342 342 343 + static int first_med_uuar(void) 344 + { 345 + return 1; 346 + } 347 + 348 + static int next_uuar(int n) 349 + { 350 + n++; 351 + 352 + while (((n % 4) & 2)) 353 + n++; 354 + 355 + return n; 356 + } 357 + 358 + static int num_med_uuar(struct mlx5_uuar_info *uuari) 359 + { 360 + int n; 361 + 362 + n = uuari->num_uars * MLX5_NON_FP_BF_REGS_PER_PAGE - 363 + uuari->num_low_latency_uuars - 1; 364 + 365 + return n >= 0 ? n : 0; 366 + } 367 + 368 + static int max_uuari(struct mlx5_uuar_info *uuari) 369 + { 370 + return uuari->num_uars * 4; 371 + } 372 + 373 + static int first_hi_uuar(struct mlx5_uuar_info *uuari) 374 + { 375 + int med; 376 + int i; 377 + int t; 378 + 379 + med = num_med_uuar(uuari); 380 + for (t = 0, i = first_med_uuar();; i = next_uuar(i)) { 381 + t++; 382 + if (t == med) 383 + return next_uuar(i); 384 + } 385 + 386 + return 0; 387 + } 388 + 343 389 static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari) 344 390 { 345 - int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE; 346 - int start_uuar; 347 391 int i; 348 392 349 - start_uuar = nuuars - uuari->num_low_latency_uuars; 350 - for (i = start_uuar; i < nuuars; i++) { 393 + for (i = first_hi_uuar(uuari); i < max_uuari(uuari); i = next_uuar(i)) { 351 394 if (!test_bit(i, uuari->bitmap)) { 352 395 set_bit(i, uuari->bitmap); 353 396 uuari->count[i]++; ··· 403 360 404 361 static int alloc_med_class_uuar(struct mlx5_uuar_info *uuari) 405 362 { 406 - int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE; 407 - int minidx = 1; 408 - int uuarn; 409 - int end; 363 + int minidx = first_med_uuar(); 410 364 int i; 411 365 412 - end = nuuars - uuari->num_low_latency_uuars; 413 - 414 - for (i = 1; i < end; i++) { 415 - uuarn = i & 3; 416 - if (uuarn == 2 || uuarn == 3) 417 - continue; 418 - 366 + for (i = first_med_uuar(); i < first_hi_uuar(uuari); i = next_uuar(i)) { 419 367 if (uuari->count[i] < uuari->count[minidx]) 420 368 minidx = i; 421 369 } ··· 523 489 { 524 490 struct mlx5_ib_ucontext *context; 525 491 struct mlx5_ib_create_qp ucmd; 526 - int page_shift; 492 + int page_shift = 0; 527 493 int uar_index; 528 494 int npages; 529 - u32 offset; 495 + u32 offset = 0; 530 496 int uuarn; 531 - int ncont; 497 + int ncont = 0; 532 498 int err; 533 499 534 500 err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); ··· 544 510 uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH); 545 511 if (uuarn < 0) { 546 512 mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n"); 547 - mlx5_ib_dbg(dev, "reverting to high latency\n"); 548 - uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW); 513 + mlx5_ib_dbg(dev, "reverting to medium latency\n"); 514 + uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM); 549 515 if (uuarn < 0) { 550 - mlx5_ib_dbg(dev, "uuar allocation failed\n"); 551 - return uuarn; 516 + mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n"); 517 + mlx5_ib_dbg(dev, "reverting to high latency\n"); 518 + uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW); 519 + if (uuarn < 0) { 520 + mlx5_ib_warn(dev, "uuar allocation failed\n"); 521 + return uuarn; 522 + } 552 523 } 553 524 } 554 525 ··· 564 525 if (err) 565 526 goto err_uuar; 566 527 567 - qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, 568 - qp->buf_size, 0, 0); 569 - if (IS_ERR(qp->umem)) { 570 - mlx5_ib_dbg(dev, "umem_get failed\n"); 571 - err = PTR_ERR(qp->umem); 572 - goto err_uuar; 528 + if (ucmd.buf_addr && qp->buf_size) { 529 + qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, 530 + qp->buf_size, 0, 0); 531 + if (IS_ERR(qp->umem)) { 532 + mlx5_ib_dbg(dev, "umem_get failed\n"); 533 + err = PTR_ERR(qp->umem); 534 + goto err_uuar; 535 + } 536 + } else { 537 + qp->umem = NULL; 573 538 } 574 539 575 - mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift, 576 - &ncont, NULL); 577 - err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset); 578 - if (err) { 579 - mlx5_ib_warn(dev, "bad offset\n"); 580 - goto err_umem; 540 + if (qp->umem) { 541 + mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift, 542 + &ncont, NULL); 543 + err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset); 544 + if (err) { 545 + mlx5_ib_warn(dev, "bad offset\n"); 546 + goto err_umem; 547 + } 548 + mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n", 549 + ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset); 581 550 } 582 - mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n", 583 - ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset); 584 551 585 552 *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont; 586 553 *in = mlx5_vzalloc(*inlen); ··· 594 549 err = -ENOMEM; 595 550 goto err_umem; 596 551 } 597 - mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0); 552 + if (qp->umem) 553 + mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0); 598 554 (*in)->ctx.log_pg_sz_remote_qpn = 599 555 cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24); 600 556 (*in)->ctx.params2 = cpu_to_be32(offset << 6); ··· 626 580 mlx5_vfree(*in); 627 581 628 582 err_umem: 629 - ib_umem_release(qp->umem); 583 + if (qp->umem) 584 + ib_umem_release(qp->umem); 630 585 631 586 err_uuar: 632 587 free_uuar(&context->uuari, uuarn); ··· 640 593 641 594 context = to_mucontext(pd->uobject->context); 642 595 mlx5_ib_db_unmap_user(context, &qp->db); 643 - ib_umem_release(qp->umem); 596 + if (qp->umem) 597 + ib_umem_release(qp->umem); 644 598 free_uuar(&context->uuari, qp->uuarn); 645 599 } 646 600 ··· 1664 1616 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; 1665 1617 1666 1618 if (ibqp->qp_type != MLX5_IB_QPT_REG_UMR && 1667 - !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) 1619 + !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask, 1620 + IB_LINK_LAYER_UNSPECIFIED)) 1668 1621 goto out; 1669 1622 1670 1623 if ((attr_mask & IB_QP_PORT) && ··· 2260 2211 wmb(); 2261 2212 2262 2213 qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post); 2214 + 2215 + /* Make sure doorbell record is visible to the HCA before 2216 + * we hit doorbell */ 2217 + wmb(); 2263 2218 2264 2219 if (bf->need_lock) 2265 2220 spin_lock(&bf->lock);
+3
drivers/infiniband/hw/mlx5/user.h
··· 93 93 94 94 struct mlx5_ib_resize_cq { 95 95 __u64 buf_addr; 96 + __u16 cqe_size; 97 + __u16 reserved0; 98 + __u32 reserved1; 96 99 }; 97 100 98 101 struct mlx5_ib_create_srq {
+2 -1
drivers/infiniband/hw/mthca/mthca_qp.c
··· 860 860 861 861 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; 862 862 863 - if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) { 863 + if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask, 864 + IB_LINK_LAYER_UNSPECIFIED)) { 864 865 mthca_dbg(dev, "Bad QP transition (transport %d) " 865 866 "%d->%d with attr 0x%08x\n", 866 867 qp->transport, cur_state, new_state,
+1 -2
drivers/infiniband/hw/nes/nes_cm.c
··· 1354 1354 neigh->ha, ntohl(rt->rt_gateway)); 1355 1355 1356 1356 if (arpindex >= 0) { 1357 - if (!memcmp(nesadapter->arp_table[arpindex].mac_addr, 1358 - neigh->ha, ETH_ALEN)) { 1357 + if (ether_addr_equal(nesadapter->arp_table[arpindex].mac_addr, neigh->ha)) { 1359 1358 /* Mac address same as in nes_arp_table */ 1360 1359 goto out; 1361 1360 }
+1 -1
drivers/infiniband/hw/ocrdma/Kconfig
··· 1 1 config INFINIBAND_OCRDMA 2 2 tristate "Emulex One Connect HCA support" 3 - depends on ETHERNET && NETDEVICES && PCI && (IPV6 || IPV6=n) 3 + depends on ETHERNET && NETDEVICES && PCI && INET && (IPV6 || IPV6=n) 4 4 select NET_VENDOR_EMULEX 5 5 select BE2NET 6 6 ---help---
+12
drivers/infiniband/hw/ocrdma/ocrdma.h
··· 423 423 OCRDMA_CQE_WRITE_IMM) ? 1 : 0; 424 424 } 425 425 426 + static inline int ocrdma_resolve_dmac(struct ocrdma_dev *dev, 427 + struct ib_ah_attr *ah_attr, u8 *mac_addr) 428 + { 429 + struct in6_addr in6; 430 + 431 + memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6)); 432 + if (rdma_is_multicast_addr(&in6)) 433 + rdma_get_mcast_mac(&in6, mac_addr); 434 + else 435 + memcpy(mac_addr, ah_attr->dmac, ETH_ALEN); 436 + return 0; 437 + } 426 438 427 439 #endif
+4 -2
drivers/infiniband/hw/ocrdma/ocrdma_ah.c
··· 49 49 50 50 ah->sgid_index = attr->grh.sgid_index; 51 51 52 - vlan_tag = rdma_get_vlan_id(&attr->grh.dgid); 52 + vlan_tag = attr->vlan_id; 53 53 if (!vlan_tag || (vlan_tag > 0xFFF)) 54 54 vlan_tag = dev->pvid; 55 55 if (vlan_tag && (vlan_tag < 0x1000)) { ··· 64 64 eth_sz = sizeof(struct ocrdma_eth_basic); 65 65 } 66 66 memcpy(&eth.smac[0], &dev->nic_info.mac_addr[0], ETH_ALEN); 67 - status = ocrdma_resolve_dgid(dev, &attr->grh.dgid, &eth.dmac[0]); 67 + memcpy(&eth.dmac[0], attr->dmac, ETH_ALEN); 68 + status = ocrdma_resolve_dmac(dev, attr, &eth.dmac[0]); 68 69 if (status) 69 70 return status; 70 71 status = ocrdma_query_gid(&dev->ibdev, 1, attr->grh.sgid_index, ··· 85 84 memcpy((u8 *)ah->av + eth_sz, &grh, sizeof(struct ocrdma_grh)); 86 85 if (vlan_enabled) 87 86 ah->av->valid |= OCRDMA_AV_VLAN_VALID; 87 + ah->av->valid = cpu_to_le32(ah->av->valid); 88 88 return status; 89 89 } 90 90
+2 -19
drivers/infiniband/hw/ocrdma/ocrdma_hw.c
··· 2076 2076 return status; 2077 2077 } 2078 2078 2079 - int ocrdma_resolve_dgid(struct ocrdma_dev *dev, union ib_gid *dgid, 2080 - u8 *mac_addr) 2081 - { 2082 - struct in6_addr in6; 2083 - 2084 - memcpy(&in6, dgid, sizeof in6); 2085 - if (rdma_is_multicast_addr(&in6)) { 2086 - rdma_get_mcast_mac(&in6, mac_addr); 2087 - } else if (rdma_link_local_addr(&in6)) { 2088 - rdma_get_ll_mac(&in6, mac_addr); 2089 - } else { 2090 - pr_err("%s() fail to resolve mac_addr.\n", __func__); 2091 - return -EINVAL; 2092 - } 2093 - return 0; 2094 - } 2095 - 2096 2079 static int ocrdma_set_av_params(struct ocrdma_qp *qp, 2097 2080 struct ocrdma_modify_qp *cmd, 2098 2081 struct ib_qp_attr *attrs) ··· 2109 2126 2110 2127 qp->sgid_idx = ah_attr->grh.sgid_index; 2111 2128 memcpy(&cmd->params.sgid[0], &sgid.raw[0], sizeof(cmd->params.sgid)); 2112 - ocrdma_resolve_dgid(qp->dev, &ah_attr->grh.dgid, &mac_addr[0]); 2129 + ocrdma_resolve_dmac(qp->dev, ah_attr, &mac_addr[0]); 2113 2130 cmd->params.dmac_b0_to_b3 = mac_addr[0] | (mac_addr[1] << 8) | 2114 2131 (mac_addr[2] << 16) | (mac_addr[3] << 24); 2115 2132 /* convert them to LE format. */ 2116 2133 ocrdma_cpu_to_le32(&cmd->params.dgid[0], sizeof(cmd->params.dgid)); 2117 2134 ocrdma_cpu_to_le32(&cmd->params.sgid[0], sizeof(cmd->params.sgid)); 2118 2135 cmd->params.vlan_dmac_b4_to_b5 = mac_addr[4] | (mac_addr[5] << 8); 2119 - vlan_id = rdma_get_vlan_id(&sgid); 2136 + vlan_id = ah_attr->vlan_id; 2120 2137 if (vlan_id && (vlan_id < 0x1000)) { 2121 2138 cmd->params.vlan_dmac_b4_to_b5 |= 2122 2139 vlan_id << OCRDMA_QP_PARAMS_VLAN_SHIFT;
-1
drivers/infiniband/hw/ocrdma/ocrdma_hw.h
··· 94 94 int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed); 95 95 int ocrdma_query_config(struct ocrdma_dev *, 96 96 struct ocrdma_mbx_query_config *config); 97 - int ocrdma_resolve_dgid(struct ocrdma_dev *, union ib_gid *dgid, u8 *mac_addr); 98 97 99 98 int ocrdma_mbx_alloc_pd(struct ocrdma_dev *, struct ocrdma_pd *); 100 99 int ocrdma_mbx_dealloc_pd(struct ocrdma_dev *, struct ocrdma_pd *);
+40 -98
drivers/infiniband/hw/ocrdma/ocrdma_main.c
··· 67 67 guid[7] = mac_addr[5]; 68 68 } 69 69 70 - static void ocrdma_build_sgid_mac(union ib_gid *sgid, unsigned char *mac_addr, 71 - bool is_vlan, u16 vlan_id) 72 - { 73 - sgid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); 74 - sgid->raw[8] = mac_addr[0] ^ 2; 75 - sgid->raw[9] = mac_addr[1]; 76 - sgid->raw[10] = mac_addr[2]; 77 - if (is_vlan) { 78 - sgid->raw[11] = vlan_id >> 8; 79 - sgid->raw[12] = vlan_id & 0xff; 80 - } else { 81 - sgid->raw[11] = 0xff; 82 - sgid->raw[12] = 0xfe; 83 - } 84 - sgid->raw[13] = mac_addr[3]; 85 - sgid->raw[14] = mac_addr[4]; 86 - sgid->raw[15] = mac_addr[5]; 87 - } 88 - 89 - static bool ocrdma_add_sgid(struct ocrdma_dev *dev, unsigned char *mac_addr, 90 - bool is_vlan, u16 vlan_id) 70 + static bool ocrdma_add_sgid(struct ocrdma_dev *dev, union ib_gid *new_sgid) 91 71 { 92 72 int i; 93 - union ib_gid new_sgid; 94 73 unsigned long flags; 95 74 96 75 memset(&ocrdma_zero_sgid, 0, sizeof(union ib_gid)); 97 76 98 - ocrdma_build_sgid_mac(&new_sgid, mac_addr, is_vlan, vlan_id); 99 77 100 78 spin_lock_irqsave(&dev->sgid_lock, flags); 101 79 for (i = 0; i < OCRDMA_MAX_SGID; i++) { 102 80 if (!memcmp(&dev->sgid_tbl[i], &ocrdma_zero_sgid, 103 81 sizeof(union ib_gid))) { 104 82 /* found free entry */ 105 - memcpy(&dev->sgid_tbl[i], &new_sgid, 83 + memcpy(&dev->sgid_tbl[i], new_sgid, 106 84 sizeof(union ib_gid)); 107 85 spin_unlock_irqrestore(&dev->sgid_lock, flags); 108 86 return true; 109 - } else if (!memcmp(&dev->sgid_tbl[i], &new_sgid, 87 + } else if (!memcmp(&dev->sgid_tbl[i], new_sgid, 110 88 sizeof(union ib_gid))) { 111 89 /* entry already present, no addition is required. */ 112 90 spin_unlock_irqrestore(&dev->sgid_lock, flags); ··· 95 117 return false; 96 118 } 97 119 98 - static bool ocrdma_del_sgid(struct ocrdma_dev *dev, unsigned char *mac_addr, 99 - bool is_vlan, u16 vlan_id) 120 + static bool ocrdma_del_sgid(struct ocrdma_dev *dev, union ib_gid *sgid) 100 121 { 101 122 int found = false; 102 123 int i; 103 - union ib_gid sgid; 104 124 unsigned long flags; 105 125 106 - ocrdma_build_sgid_mac(&sgid, mac_addr, is_vlan, vlan_id); 107 126 108 127 spin_lock_irqsave(&dev->sgid_lock, flags); 109 128 /* first is default sgid, which cannot be deleted. */ 110 129 for (i = 1; i < OCRDMA_MAX_SGID; i++) { 111 - if (!memcmp(&dev->sgid_tbl[i], &sgid, sizeof(union ib_gid))) { 130 + if (!memcmp(&dev->sgid_tbl[i], sgid, sizeof(union ib_gid))) { 112 131 /* found matching entry */ 113 132 memset(&dev->sgid_tbl[i], 0, sizeof(union ib_gid)); 114 133 found = true; ··· 116 141 return found; 117 142 } 118 143 119 - static void ocrdma_add_default_sgid(struct ocrdma_dev *dev) 144 + static int ocrdma_addr_event(unsigned long event, struct net_device *netdev, 145 + union ib_gid *gid) 120 146 { 121 - /* GID Index 0 - Invariant manufacturer-assigned EUI-64 */ 122 - union ib_gid *sgid = &dev->sgid_tbl[0]; 123 - 124 - sgid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); 125 - ocrdma_get_guid(dev, &sgid->raw[8]); 126 - } 127 - 128 - #if IS_ENABLED(CONFIG_VLAN_8021Q) 129 - static void ocrdma_add_vlan_sgids(struct ocrdma_dev *dev) 130 - { 131 - struct net_device *netdev, *tmp; 132 - u16 vlan_id; 133 - bool is_vlan; 134 - 135 - netdev = dev->nic_info.netdev; 136 - 137 - rcu_read_lock(); 138 - for_each_netdev_rcu(&init_net, tmp) { 139 - if (netdev == tmp || vlan_dev_real_dev(tmp) == netdev) { 140 - if (!netif_running(tmp) || !netif_oper_up(tmp)) 141 - continue; 142 - if (netdev != tmp) { 143 - vlan_id = vlan_dev_vlan_id(tmp); 144 - is_vlan = true; 145 - } else { 146 - is_vlan = false; 147 - vlan_id = 0; 148 - tmp = netdev; 149 - } 150 - ocrdma_add_sgid(dev, tmp->dev_addr, is_vlan, vlan_id); 151 - } 152 - } 153 - rcu_read_unlock(); 154 - } 155 - #else 156 - static void ocrdma_add_vlan_sgids(struct ocrdma_dev *dev) 157 - { 158 - 159 - } 160 - #endif /* VLAN */ 161 - 162 - static int ocrdma_build_sgid_tbl(struct ocrdma_dev *dev) 163 - { 164 - ocrdma_add_default_sgid(dev); 165 - ocrdma_add_vlan_sgids(dev); 166 - return 0; 167 - } 168 - 169 - #if IS_ENABLED(CONFIG_IPV6) 170 - 171 - static int ocrdma_inet6addr_event(struct notifier_block *notifier, 172 - unsigned long event, void *ptr) 173 - { 174 - struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr; 175 - struct net_device *netdev = ifa->idev->dev; 176 147 struct ib_event gid_event; 177 148 struct ocrdma_dev *dev; 178 149 bool found = false; 179 150 bool updated = false; 180 151 bool is_vlan = false; 181 - u16 vid = 0; 182 152 183 153 is_vlan = netdev->priv_flags & IFF_802_1Q_VLAN; 184 - if (is_vlan) { 185 - vid = vlan_dev_vlan_id(netdev); 154 + if (is_vlan) 186 155 netdev = vlan_dev_real_dev(netdev); 187 - } 188 156 189 157 rcu_read_lock(); 190 158 list_for_each_entry_rcu(dev, &ocrdma_dev_list, entry) { ··· 140 222 141 223 if (!found) 142 224 return NOTIFY_DONE; 143 - if (!rdma_link_local_addr((struct in6_addr *)&ifa->addr)) 144 - return NOTIFY_DONE; 145 225 146 226 mutex_lock(&dev->dev_lock); 147 227 switch (event) { 148 228 case NETDEV_UP: 149 - updated = ocrdma_add_sgid(dev, netdev->dev_addr, is_vlan, vid); 229 + updated = ocrdma_add_sgid(dev, gid); 150 230 break; 151 231 case NETDEV_DOWN: 152 - updated = ocrdma_del_sgid(dev, netdev->dev_addr, is_vlan, vid); 232 + updated = ocrdma_del_sgid(dev, gid); 153 233 break; 154 234 default: 155 235 break; ··· 161 245 } 162 246 mutex_unlock(&dev->dev_lock); 163 247 return NOTIFY_OK; 248 + } 249 + 250 + static int ocrdma_inetaddr_event(struct notifier_block *notifier, 251 + unsigned long event, void *ptr) 252 + { 253 + struct in_ifaddr *ifa = ptr; 254 + union ib_gid gid; 255 + struct net_device *netdev = ifa->ifa_dev->dev; 256 + 257 + ipv6_addr_set_v4mapped(ifa->ifa_address, (struct in6_addr *)&gid); 258 + return ocrdma_addr_event(event, netdev, &gid); 259 + } 260 + 261 + static struct notifier_block ocrdma_inetaddr_notifier = { 262 + .notifier_call = ocrdma_inetaddr_event 263 + }; 264 + 265 + #if IS_ENABLED(CONFIG_IPV6) 266 + 267 + static int ocrdma_inet6addr_event(struct notifier_block *notifier, 268 + unsigned long event, void *ptr) 269 + { 270 + struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr; 271 + union ib_gid *gid = (union ib_gid *)&ifa->addr; 272 + struct net_device *netdev = ifa->idev->dev; 273 + return ocrdma_addr_event(event, netdev, gid); 164 274 } 165 275 166 276 static struct notifier_block ocrdma_inet6addr_notifier = { ··· 365 423 if (status) 366 424 goto alloc_err; 367 425 368 - status = ocrdma_build_sgid_tbl(dev); 369 - if (status) 370 - goto alloc_err; 371 - 372 426 status = ocrdma_register_device(dev); 373 427 if (status) 374 428 goto alloc_err; ··· 490 552 static int __init ocrdma_init_module(void) 491 553 { 492 554 int status; 555 + 556 + status = register_inetaddr_notifier(&ocrdma_inetaddr_notifier); 557 + if (status) 558 + return status; 493 559 494 560 #if IS_ENABLED(CONFIG_IPV6) 495 561 status = register_inet6addr_notifier(&ocrdma_inet6addr_notifier);
+2 -2
drivers/infiniband/hw/ocrdma/ocrdma_sli.h
··· 31 31 #define Bit(_b) (1 << (_b)) 32 32 33 33 #define OCRDMA_GEN1_FAMILY 0xB 34 - #define OCRDMA_GEN2_FAMILY 0x2 34 + #define OCRDMA_GEN2_FAMILY 0x0F 35 35 36 36 #define OCRDMA_SUBSYS_ROCE 10 37 37 enum { ··· 1694 1694 u16 rsvd; 1695 1695 } __packed; 1696 1696 1697 - #define OCRDMA_AV_VALID Bit(0) 1697 + #define OCRDMA_AV_VALID Bit(7) 1698 1698 #define OCRDMA_AV_VLAN_VALID Bit(1) 1699 1699 1700 1700 struct ocrdma_av {
+2 -1
drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
··· 1326 1326 new_qps = old_qps; 1327 1327 spin_unlock_irqrestore(&qp->q_lock, flags); 1328 1328 1329 - if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask)) { 1329 + if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask, 1330 + IB_LINK_LAYER_ETHERNET)) { 1330 1331 pr_err("%s(%d) invalid attribute mask=0x%x specified for\n" 1331 1332 "qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n", 1332 1333 __func__, dev->id, attr_mask, qp->id, ibqp->qp_type,
+1 -1
drivers/infiniband/hw/qib/qib_qp.c
··· 585 585 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; 586 586 587 587 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, 588 - attr_mask)) 588 + attr_mask, IB_LINK_LAYER_UNSPECIFIED)) 589 589 goto inval; 590 590 591 591 if (attr_mask & IB_QP_AV) {
+8 -1
drivers/infiniband/hw/qib/qib_ud.c
··· 57 57 struct qib_sge *sge; 58 58 struct ib_wc wc; 59 59 u32 length; 60 + enum ib_qp_type sqptype, dqptype; 60 61 61 62 qp = qib_lookup_qpn(ibp, swqe->wr.wr.ud.remote_qpn); 62 63 if (!qp) { 63 64 ibp->n_pkt_drops++; 64 65 return; 65 66 } 66 - if (qp->ibqp.qp_type != sqp->ibqp.qp_type || 67 + 68 + sqptype = sqp->ibqp.qp_type == IB_QPT_GSI ? 69 + IB_QPT_UD : sqp->ibqp.qp_type; 70 + dqptype = qp->ibqp.qp_type == IB_QPT_GSI ? 71 + IB_QPT_UD : qp->ibqp.qp_type; 72 + 73 + if (dqptype != sqptype || 67 74 !(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) { 68 75 ibp->n_pkt_drops++; 69 76 goto drop;
+10
drivers/infiniband/hw/usnic/Kconfig
··· 1 + config INFINIBAND_USNIC 2 + tristate "Verbs support for Cisco VIC" 3 + depends on NETDEVICES && ETHERNET && INET && PCI && INTEL_IOMMU 4 + select ENIC 5 + select NET_VENDOR_CISCO 6 + select PCI_IOV 7 + select INFINIBAND_USER_ACCESS 8 + ---help--- 9 + This is a low-level driver for Cisco's Virtual Interface 10 + Cards (VICs), including the VIC 1240 and 1280 cards.
+15
drivers/infiniband/hw/usnic/Makefile
··· 1 + ccflags-y := -Idrivers/net/ethernet/cisco/enic 2 + 3 + obj-$(CONFIG_INFINIBAND_USNIC)+= usnic_verbs.o 4 + 5 + usnic_verbs-y=\ 6 + usnic_fwd.o \ 7 + usnic_transport.o \ 8 + usnic_uiom.o \ 9 + usnic_uiom_interval_tree.o \ 10 + usnic_vnic.o \ 11 + usnic_ib_main.o \ 12 + usnic_ib_qp_grp.o \ 13 + usnic_ib_sysfs.o \ 14 + usnic_ib_verbs.o \ 15 + usnic_debugfs.o \
+29
drivers/infiniband/hw/usnic/usnic.h
··· 1 + /* 2 + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. 3 + * 4 + * This program is free software; you may redistribute it and/or modify 5 + * it under the terms of the GNU General Public License as published by 6 + * the Free Software Foundation; version 2 of the License. 7 + * 8 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 9 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 10 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 11 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 12 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 13 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 15 + * SOFTWARE. 16 + * 17 + */ 18 + 19 + #ifndef USNIC_H_ 20 + #define USNIC_H_ 21 + 22 + #define DRV_NAME "usnic_verbs" 23 + 24 + #define PCI_DEVICE_ID_CISCO_VIC_USPACE_NIC 0x00cf /* User space NIC */ 25 + 26 + #define DRV_VERSION "1.0.3" 27 + #define DRV_RELDATE "December 19, 2013" 28 + 29 + #endif /* USNIC_H_ */
+73
drivers/infiniband/hw/usnic/usnic_abi.h
··· 1 + /* 2 + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. 3 + * 4 + * This program is free software; you may redistribute it and/or modify 5 + * it under the terms of the GNU General Public License as published by 6 + * the Free Software Foundation; version 2 of the License. 7 + * 8 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 9 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 10 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 11 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 12 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 13 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 15 + * SOFTWARE. 16 + * 17 + */ 18 + 19 + 20 + #ifndef USNIC_ABI_H 21 + #define USNIC_ABI_H 22 + 23 + /* ABI between userspace and kernel */ 24 + #define USNIC_UVERBS_ABI_VERSION 4 25 + 26 + #define USNIC_QP_GRP_MAX_WQS 8 27 + #define USNIC_QP_GRP_MAX_RQS 8 28 + #define USNIC_QP_GRP_MAX_CQS 16 29 + 30 + enum usnic_transport_type { 31 + USNIC_TRANSPORT_UNKNOWN = 0, 32 + USNIC_TRANSPORT_ROCE_CUSTOM = 1, 33 + USNIC_TRANSPORT_IPV4_UDP = 2, 34 + USNIC_TRANSPORT_MAX = 3, 35 + }; 36 + 37 + struct usnic_transport_spec { 38 + enum usnic_transport_type trans_type; 39 + union { 40 + struct { 41 + uint16_t port_num; 42 + } usnic_roce; 43 + struct { 44 + uint32_t sock_fd; 45 + } udp; 46 + }; 47 + }; 48 + 49 + struct usnic_ib_create_qp_cmd { 50 + struct usnic_transport_spec spec; 51 + }; 52 + 53 + /*TODO: Future - usnic_modify_qp needs to pass in generic filters */ 54 + struct usnic_ib_create_qp_resp { 55 + u32 vfid; 56 + u32 qp_grp_id; 57 + u64 bar_bus_addr; 58 + u32 bar_len; 59 + /* 60 + * WQ, RQ, CQ are explicity specified bc exposing a generic resources inteface 61 + * expands the scope of ABI to many files. 62 + */ 63 + u32 wq_cnt; 64 + u32 rq_cnt; 65 + u32 cq_cnt; 66 + u32 wq_idx[USNIC_QP_GRP_MAX_WQS]; 67 + u32 rq_idx[USNIC_QP_GRP_MAX_RQS]; 68 + u32 cq_idx[USNIC_QP_GRP_MAX_CQS]; 69 + u32 transport; 70 + u32 reserved[9]; 71 + }; 72 + 73 + #endif /* USNIC_ABI_H */
+27
drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h
··· 1 + /* 2 + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. 3 + * 4 + * This program is free software; you may redistribute it and/or modify 5 + * it under the terms of the GNU General Public License as published by 6 + * the Free Software Foundation; version 2 of the License. 7 + * 8 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 9 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 10 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 11 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 12 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 13 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 15 + * SOFTWARE. 16 + * 17 + */ 18 + 19 + #ifndef USNIC_CMN_PKT_HDR_H 20 + #define USNIC_CMN_PKT_HDR_H 21 + 22 + #define USNIC_ROCE_ETHERTYPE (0x8915) 23 + #define USNIC_ROCE_GRH_VER (8) 24 + #define USNIC_PROTO_VER (1) 25 + #define USNIC_ROCE_GRH_VER_SHIFT (4) 26 + 27 + #endif /* USNIC_COMMON_PKT_HDR_H */
+68
drivers/infiniband/hw/usnic/usnic_common_util.h
··· 1 + /* 2 + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. 3 + * 4 + * This program is free software; you may redistribute it and/or modify 5 + * it under the terms of the GNU General Public License as published by 6 + * the Free Software Foundation; version 2 of the License. 7 + * 8 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 9 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 10 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 11 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 12 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 13 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 15 + * SOFTWARE. 16 + * 17 + */ 18 + 19 + #ifndef USNIC_CMN_UTIL_H 20 + #define USNIC_CMN_UTIL_H 21 + 22 + static inline void 23 + usnic_mac_to_gid(const char *const mac, char *raw_gid) 24 + { 25 + raw_gid[0] = 0xfe; 26 + raw_gid[1] = 0x80; 27 + memset(&raw_gid[2], 0, 6); 28 + raw_gid[8] = mac[0]^2; 29 + raw_gid[9] = mac[1]; 30 + raw_gid[10] = mac[2]; 31 + raw_gid[11] = 0xff; 32 + raw_gid[12] = 0xfe; 33 + raw_gid[13] = mac[3]; 34 + raw_gid[14] = mac[4]; 35 + raw_gid[15] = mac[5]; 36 + } 37 + 38 + static inline void 39 + usnic_mac_ip_to_gid(const char *const mac, const __be32 inaddr, char *raw_gid) 40 + { 41 + raw_gid[0] = 0xfe; 42 + raw_gid[1] = 0x80; 43 + memset(&raw_gid[2], 0, 2); 44 + memcpy(&raw_gid[4], &inaddr, 4); 45 + raw_gid[8] = mac[0]^2; 46 + raw_gid[9] = mac[1]; 47 + raw_gid[10] = mac[2]; 48 + raw_gid[11] = 0xff; 49 + raw_gid[12] = 0xfe; 50 + raw_gid[13] = mac[3]; 51 + raw_gid[14] = mac[4]; 52 + raw_gid[15] = mac[5]; 53 + } 54 + 55 + static inline void 56 + usnic_write_gid_if_id_from_mac(char *mac, char *raw_gid) 57 + { 58 + raw_gid[8] = mac[0]^2; 59 + raw_gid[9] = mac[1]; 60 + raw_gid[10] = mac[2]; 61 + raw_gid[11] = 0xff; 62 + raw_gid[12] = 0xfe; 63 + raw_gid[13] = mac[3]; 64 + raw_gid[14] = mac[4]; 65 + raw_gid[15] = mac[5]; 66 + } 67 + 68 + #endif /* USNIC_COMMON_UTIL_H */
+154
drivers/infiniband/hw/usnic/usnic_debugfs.c
··· 1 + /* 2 + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. 3 + * 4 + * This program is free software; you may redistribute it and/or modify 5 + * it under the terms of the GNU General Public License as published by 6 + * the Free Software Foundation; version 2 of the License. 7 + * 8 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 9 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 10 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 11 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 12 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 13 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 15 + * SOFTWARE. 16 + * 17 + */ 18 + 19 + #include <linux/debugfs.h> 20 + #include <linux/module.h> 21 + 22 + #include "usnic.h" 23 + #include "usnic_log.h" 24 + #include "usnic_debugfs.h" 25 + #include "usnic_ib_qp_grp.h" 26 + #include "usnic_transport.h" 27 + 28 + static struct dentry *debugfs_root; 29 + static struct dentry *flows_dentry; 30 + 31 + static ssize_t usnic_debugfs_buildinfo_read(struct file *f, char __user *data, 32 + size_t count, loff_t *ppos) 33 + { 34 + char buf[500]; 35 + int res; 36 + 37 + if (*ppos > 0) 38 + return 0; 39 + 40 + res = scnprintf(buf, sizeof(buf), 41 + "version: %s\n" 42 + "build date: %s\n", 43 + DRV_VERSION, DRV_RELDATE); 44 + 45 + return simple_read_from_buffer(data, count, ppos, buf, res); 46 + } 47 + 48 + static const struct file_operations usnic_debugfs_buildinfo_ops = { 49 + .owner = THIS_MODULE, 50 + .open = simple_open, 51 + .read = usnic_debugfs_buildinfo_read 52 + }; 53 + 54 + static ssize_t flowinfo_read(struct file *f, char __user *data, 55 + size_t count, loff_t *ppos) 56 + { 57 + struct usnic_ib_qp_grp_flow *qp_flow; 58 + int n; 59 + int left; 60 + char *ptr; 61 + char buf[512]; 62 + 63 + qp_flow = f->private_data; 64 + ptr = buf; 65 + left = count; 66 + 67 + if (*ppos > 0) 68 + return 0; 69 + 70 + spin_lock(&qp_flow->qp_grp->lock); 71 + n = scnprintf(ptr, left, 72 + "QP Grp ID: %d Transport: %s ", 73 + qp_flow->qp_grp->grp_id, 74 + usnic_transport_to_str(qp_flow->trans_type)); 75 + UPDATE_PTR_LEFT(n, ptr, left); 76 + if (qp_flow->trans_type == USNIC_TRANSPORT_ROCE_CUSTOM) { 77 + n = scnprintf(ptr, left, "Port_Num:%hu\n", 78 + qp_flow->usnic_roce.port_num); 79 + UPDATE_PTR_LEFT(n, ptr, left); 80 + } else if (qp_flow->trans_type == USNIC_TRANSPORT_IPV4_UDP) { 81 + n = usnic_transport_sock_to_str(ptr, left, 82 + qp_flow->udp.sock); 83 + UPDATE_PTR_LEFT(n, ptr, left); 84 + n = scnprintf(ptr, left, "\n"); 85 + UPDATE_PTR_LEFT(n, ptr, left); 86 + } 87 + spin_unlock(&qp_flow->qp_grp->lock); 88 + 89 + return simple_read_from_buffer(data, count, ppos, buf, ptr - buf); 90 + } 91 + 92 + static const struct file_operations flowinfo_ops = { 93 + .owner = THIS_MODULE, 94 + .open = simple_open, 95 + .read = flowinfo_read, 96 + }; 97 + 98 + void usnic_debugfs_init(void) 99 + { 100 + debugfs_root = debugfs_create_dir(DRV_NAME, NULL); 101 + if (IS_ERR(debugfs_root)) { 102 + usnic_err("Failed to create debugfs root dir, check if debugfs is enabled in kernel configuration\n"); 103 + goto out_clear_root; 104 + } 105 + 106 + flows_dentry = debugfs_create_dir("flows", debugfs_root); 107 + if (IS_ERR_OR_NULL(flows_dentry)) { 108 + usnic_err("Failed to create debugfs flow dir with err %ld\n", 109 + PTR_ERR(flows_dentry)); 110 + goto out_free_root; 111 + } 112 + 113 + debugfs_create_file("build-info", S_IRUGO, debugfs_root, 114 + NULL, &usnic_debugfs_buildinfo_ops); 115 + return; 116 + 117 + out_free_root: 118 + debugfs_remove_recursive(debugfs_root); 119 + out_clear_root: 120 + debugfs_root = NULL; 121 + } 122 + 123 + void usnic_debugfs_exit(void) 124 + { 125 + if (!debugfs_root) 126 + return; 127 + 128 + debugfs_remove_recursive(debugfs_root); 129 + debugfs_root = NULL; 130 + } 131 + 132 + void usnic_debugfs_flow_add(struct usnic_ib_qp_grp_flow *qp_flow) 133 + { 134 + if (IS_ERR_OR_NULL(flows_dentry)) 135 + return; 136 + 137 + scnprintf(qp_flow->dentry_name, sizeof(qp_flow->dentry_name), 138 + "%u", qp_flow->flow->flow_id); 139 + qp_flow->dbgfs_dentry = debugfs_create_file(qp_flow->dentry_name, 140 + S_IRUGO, 141 + flows_dentry, 142 + qp_flow, 143 + &flowinfo_ops); 144 + if (IS_ERR_OR_NULL(qp_flow->dbgfs_dentry)) { 145 + usnic_err("Failed to create dbg fs entry for flow %u\n", 146 + qp_flow->flow->flow_id); 147 + } 148 + } 149 + 150 + void usnic_debugfs_flow_remove(struct usnic_ib_qp_grp_flow *qp_flow) 151 + { 152 + if (!IS_ERR_OR_NULL(qp_flow->dbgfs_dentry)) 153 + debugfs_remove(qp_flow->dbgfs_dentry); 154 + }
+29
drivers/infiniband/hw/usnic/usnic_debugfs.h
··· 1 + /* 2 + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. 3 + * 4 + * This program is free software; you may redistribute it and/or modify 5 + * it under the terms of the GNU General Public License as published by 6 + * the Free Software Foundation; version 2 of the License. 7 + * 8 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 9 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 10 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 11 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 12 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 13 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 15 + * SOFTWARE. 16 + * 17 + */ 18 + #ifndef USNIC_DEBUGFS_H_ 19 + #define USNIC_DEBUGFS_H_ 20 + 21 + #include "usnic_ib_qp_grp.h" 22 + 23 + void usnic_debugfs_init(void); 24 + 25 + void usnic_debugfs_exit(void); 26 + void usnic_debugfs_flow_add(struct usnic_ib_qp_grp_flow *qp_flow); 27 + void usnic_debugfs_flow_remove(struct usnic_ib_qp_grp_flow *qp_flow); 28 + 29 + #endif /*!USNIC_DEBUGFS_H_ */
+350
drivers/infiniband/hw/usnic/usnic_fwd.c
··· 1 + /* 2 + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. 3 + * 4 + * This program is free software; you may redistribute it and/or modify 5 + * it under the terms of the GNU General Public License as published by 6 + * the Free Software Foundation; version 2 of the License. 7 + * 8 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 9 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 10 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 11 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 12 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 13 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 15 + * SOFTWARE. 16 + * 17 + */ 18 + #include <linux/netdevice.h> 19 + #include <linux/pci.h> 20 + 21 + #include "enic_api.h" 22 + #include "usnic_common_pkt_hdr.h" 23 + #include "usnic_fwd.h" 24 + #include "usnic_log.h" 25 + 26 + static int usnic_fwd_devcmd_locked(struct usnic_fwd_dev *ufdev, int vnic_idx, 27 + enum vnic_devcmd_cmd cmd, u64 *a0, 28 + u64 *a1) 29 + { 30 + int status; 31 + struct net_device *netdev = ufdev->netdev; 32 + 33 + lockdep_assert_held(&ufdev->lock); 34 + 35 + status = enic_api_devcmd_proxy_by_index(netdev, 36 + vnic_idx, 37 + cmd, 38 + a0, a1, 39 + 1000); 40 + if (status) { 41 + if (status == ERR_EINVAL && cmd == CMD_DEL_FILTER) { 42 + usnic_dbg("Dev %s vnic idx %u cmd %u already deleted", 43 + ufdev->name, vnic_idx, cmd); 44 + } else { 45 + usnic_err("Dev %s vnic idx %u cmd %u failed with status %d\n", 46 + ufdev->name, vnic_idx, cmd, 47 + status); 48 + } 49 + } else { 50 + usnic_dbg("Dev %s vnic idx %u cmd %u success", 51 + ufdev->name, vnic_idx, cmd); 52 + } 53 + 54 + return status; 55 + } 56 + 57 + static int usnic_fwd_devcmd(struct usnic_fwd_dev *ufdev, int vnic_idx, 58 + enum vnic_devcmd_cmd cmd, u64 *a0, u64 *a1) 59 + { 60 + int status; 61 + 62 + spin_lock(&ufdev->lock); 63 + status = usnic_fwd_devcmd_locked(ufdev, vnic_idx, cmd, a0, a1); 64 + spin_unlock(&ufdev->lock); 65 + 66 + return status; 67 + } 68 + 69 + struct usnic_fwd_dev *usnic_fwd_dev_alloc(struct pci_dev *pdev) 70 + { 71 + struct usnic_fwd_dev *ufdev; 72 + 73 + ufdev = kzalloc(sizeof(*ufdev), GFP_KERNEL); 74 + if (!ufdev) 75 + return NULL; 76 + 77 + ufdev->pdev = pdev; 78 + ufdev->netdev = pci_get_drvdata(pdev); 79 + spin_lock_init(&ufdev->lock); 80 + strncpy(ufdev->name, netdev_name(ufdev->netdev), 81 + sizeof(ufdev->name) - 1); 82 + 83 + return ufdev; 84 + } 85 + 86 + void usnic_fwd_dev_free(struct usnic_fwd_dev *ufdev) 87 + { 88 + kfree(ufdev); 89 + } 90 + 91 + void usnic_fwd_set_mac(struct usnic_fwd_dev *ufdev, char mac[ETH_ALEN]) 92 + { 93 + spin_lock(&ufdev->lock); 94 + memcpy(&ufdev->mac, mac, sizeof(ufdev->mac)); 95 + spin_unlock(&ufdev->lock); 96 + } 97 + 98 + int usnic_fwd_add_ipaddr(struct usnic_fwd_dev *ufdev, __be32 inaddr) 99 + { 100 + int status; 101 + 102 + spin_lock(&ufdev->lock); 103 + if (ufdev->inaddr == 0) { 104 + ufdev->inaddr = inaddr; 105 + status = 0; 106 + } else { 107 + status = -EFAULT; 108 + } 109 + spin_unlock(&ufdev->lock); 110 + 111 + return status; 112 + } 113 + 114 + void usnic_fwd_del_ipaddr(struct usnic_fwd_dev *ufdev) 115 + { 116 + spin_lock(&ufdev->lock); 117 + ufdev->inaddr = 0; 118 + spin_unlock(&ufdev->lock); 119 + } 120 + 121 + void usnic_fwd_carrier_up(struct usnic_fwd_dev *ufdev) 122 + { 123 + spin_lock(&ufdev->lock); 124 + ufdev->link_up = 1; 125 + spin_unlock(&ufdev->lock); 126 + } 127 + 128 + void usnic_fwd_carrier_down(struct usnic_fwd_dev *ufdev) 129 + { 130 + spin_lock(&ufdev->lock); 131 + ufdev->link_up = 0; 132 + spin_unlock(&ufdev->lock); 133 + } 134 + 135 + void usnic_fwd_set_mtu(struct usnic_fwd_dev *ufdev, unsigned int mtu) 136 + { 137 + spin_lock(&ufdev->lock); 138 + ufdev->mtu = mtu; 139 + spin_unlock(&ufdev->lock); 140 + } 141 + 142 + static int usnic_fwd_dev_ready_locked(struct usnic_fwd_dev *ufdev) 143 + { 144 + lockdep_assert_held(&ufdev->lock); 145 + 146 + if (!ufdev->link_up) 147 + return -EPERM; 148 + 149 + return 0; 150 + } 151 + 152 + static int validate_filter_locked(struct usnic_fwd_dev *ufdev, 153 + struct filter *filter) 154 + { 155 + 156 + lockdep_assert_held(&ufdev->lock); 157 + 158 + if (filter->type == FILTER_IPV4_5TUPLE) { 159 + if (!(filter->u.ipv4.flags & FILTER_FIELD_5TUP_DST_AD)) 160 + return -EACCES; 161 + if (!(filter->u.ipv4.flags & FILTER_FIELD_5TUP_DST_PT)) 162 + return -EBUSY; 163 + else if (ufdev->inaddr == 0) 164 + return -EINVAL; 165 + else if (filter->u.ipv4.dst_port == 0) 166 + return -ERANGE; 167 + else if (ntohl(ufdev->inaddr) != filter->u.ipv4.dst_addr) 168 + return -EFAULT; 169 + else 170 + return 0; 171 + } 172 + 173 + return 0; 174 + } 175 + 176 + static void fill_tlv(struct filter_tlv *tlv, struct filter *filter, 177 + struct filter_action *action) 178 + { 179 + tlv->type = CLSF_TLV_FILTER; 180 + tlv->length = sizeof(struct filter); 181 + *((struct filter *)&tlv->val) = *filter; 182 + 183 + tlv = (struct filter_tlv *)((char *)tlv + sizeof(struct filter_tlv) + 184 + sizeof(struct filter)); 185 + tlv->type = CLSF_TLV_ACTION; 186 + tlv->length = sizeof(struct filter_action); 187 + *((struct filter_action *)&tlv->val) = *action; 188 + } 189 + 190 + struct usnic_fwd_flow* 191 + usnic_fwd_alloc_flow(struct usnic_fwd_dev *ufdev, struct filter *filter, 192 + struct usnic_filter_action *uaction) 193 + { 194 + struct filter_tlv *tlv; 195 + struct pci_dev *pdev; 196 + struct usnic_fwd_flow *flow; 197 + uint64_t a0, a1; 198 + uint64_t tlv_size; 199 + dma_addr_t tlv_pa; 200 + int status; 201 + 202 + pdev = ufdev->pdev; 203 + tlv_size = (2*sizeof(struct filter_tlv) + sizeof(struct filter) + 204 + sizeof(struct filter_action)); 205 + 206 + flow = kzalloc(sizeof(*flow), GFP_ATOMIC); 207 + if (!flow) 208 + return ERR_PTR(-ENOMEM); 209 + 210 + tlv = pci_alloc_consistent(pdev, tlv_size, &tlv_pa); 211 + if (!tlv) { 212 + usnic_err("Failed to allocate memory\n"); 213 + status = -ENOMEM; 214 + goto out_free_flow; 215 + } 216 + 217 + fill_tlv(tlv, filter, &uaction->action); 218 + 219 + spin_lock(&ufdev->lock); 220 + status = usnic_fwd_dev_ready_locked(ufdev); 221 + if (status) { 222 + usnic_err("Forwarding dev %s not ready with status %d\n", 223 + ufdev->name, status); 224 + goto out_free_tlv; 225 + } 226 + 227 + status = validate_filter_locked(ufdev, filter); 228 + if (status) { 229 + usnic_err("Failed to validate filter with status %d\n", 230 + status); 231 + goto out_free_tlv; 232 + } 233 + 234 + /* Issue Devcmd */ 235 + a0 = tlv_pa; 236 + a1 = tlv_size; 237 + status = usnic_fwd_devcmd_locked(ufdev, uaction->vnic_idx, 238 + CMD_ADD_FILTER, &a0, &a1); 239 + if (status) { 240 + usnic_err("VF %s Filter add failed with status:%d", 241 + ufdev->name, status); 242 + status = -EFAULT; 243 + goto out_free_tlv; 244 + } else { 245 + usnic_dbg("VF %s FILTER ID:%llu", ufdev->name, a0); 246 + } 247 + 248 + flow->flow_id = (uint32_t) a0; 249 + flow->vnic_idx = uaction->vnic_idx; 250 + flow->ufdev = ufdev; 251 + 252 + out_free_tlv: 253 + spin_unlock(&ufdev->lock); 254 + pci_free_consistent(pdev, tlv_size, tlv, tlv_pa); 255 + if (!status) 256 + return flow; 257 + out_free_flow: 258 + kfree(flow); 259 + return ERR_PTR(status); 260 + } 261 + 262 + int usnic_fwd_dealloc_flow(struct usnic_fwd_flow *flow) 263 + { 264 + int status; 265 + u64 a0, a1; 266 + 267 + a0 = flow->flow_id; 268 + 269 + status = usnic_fwd_devcmd(flow->ufdev, flow->vnic_idx, 270 + CMD_DEL_FILTER, &a0, &a1); 271 + if (status) { 272 + if (status == ERR_EINVAL) { 273 + usnic_dbg("Filter %u already deleted for VF Idx %u pf: %s status: %d", 274 + flow->flow_id, flow->vnic_idx, 275 + flow->ufdev->name, status); 276 + } else { 277 + usnic_err("PF %s VF Idx %u Filter: %u FILTER DELETE failed with status %d", 278 + flow->ufdev->name, flow->vnic_idx, 279 + flow->flow_id, status); 280 + } 281 + status = 0; 282 + /* 283 + * Log the error and fake success to the caller because if 284 + * a flow fails to be deleted in the firmware, it is an 285 + * unrecoverable error. 286 + */ 287 + } else { 288 + usnic_dbg("PF %s VF Idx %u Filter: %u FILTER DELETED", 289 + flow->ufdev->name, flow->vnic_idx, 290 + flow->flow_id); 291 + } 292 + 293 + kfree(flow); 294 + return status; 295 + } 296 + 297 + int usnic_fwd_enable_qp(struct usnic_fwd_dev *ufdev, int vnic_idx, int qp_idx) 298 + { 299 + int status; 300 + struct net_device *pf_netdev; 301 + u64 a0, a1; 302 + 303 + pf_netdev = ufdev->netdev; 304 + a0 = qp_idx; 305 + a1 = CMD_QP_RQWQ; 306 + 307 + status = usnic_fwd_devcmd(ufdev, vnic_idx, CMD_QP_ENABLE, 308 + &a0, &a1); 309 + if (status) { 310 + usnic_err("PF %s VNIC Index %u RQ Index: %u ENABLE Failed with status %d", 311 + netdev_name(pf_netdev), 312 + vnic_idx, 313 + qp_idx, 314 + status); 315 + } else { 316 + usnic_dbg("PF %s VNIC Index %u RQ Index: %u ENABLED", 317 + netdev_name(pf_netdev), 318 + vnic_idx, qp_idx); 319 + } 320 + 321 + return status; 322 + } 323 + 324 + int usnic_fwd_disable_qp(struct usnic_fwd_dev *ufdev, int vnic_idx, int qp_idx) 325 + { 326 + int status; 327 + u64 a0, a1; 328 + struct net_device *pf_netdev; 329 + 330 + pf_netdev = ufdev->netdev; 331 + a0 = qp_idx; 332 + a1 = CMD_QP_RQWQ; 333 + 334 + status = usnic_fwd_devcmd(ufdev, vnic_idx, CMD_QP_DISABLE, 335 + &a0, &a1); 336 + if (status) { 337 + usnic_err("PF %s VNIC Index %u RQ Index: %u DISABLE Failed with status %d", 338 + netdev_name(pf_netdev), 339 + vnic_idx, 340 + qp_idx, 341 + status); 342 + } else { 343 + usnic_dbg("PF %s VNIC Index %u RQ Index: %u DISABLED", 344 + netdev_name(pf_netdev), 345 + vnic_idx, 346 + qp_idx); 347 + } 348 + 349 + return status; 350 + }
+113
drivers/infiniband/hw/usnic/usnic_fwd.h
··· 1 + /* 2 + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. 3 + * 4 + * This program is free software; you may redistribute it and/or modify 5 + * it under the terms of the GNU General Public License as published by 6 + * the Free Software Foundation; version 2 of the License. 7 + * 8 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 9 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 10 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 11 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 12 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 13 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 15 + * SOFTWARE. 16 + * 17 + */ 18 + 19 + #ifndef USNIC_FWD_H_ 20 + #define USNIC_FWD_H_ 21 + 22 + #include <linux/if.h> 23 + #include <linux/netdevice.h> 24 + #include <linux/pci.h> 25 + #include <linux/in.h> 26 + 27 + #include "usnic_abi.h" 28 + #include "usnic_common_pkt_hdr.h" 29 + #include "vnic_devcmd.h" 30 + 31 + struct usnic_fwd_dev { 32 + struct pci_dev *pdev; 33 + struct net_device *netdev; 34 + spinlock_t lock; 35 + /* 36 + * The following fields can be read directly off the device. 37 + * However, they should be set by a accessor function, except name, 38 + * which cannot be changed. 39 + */ 40 + bool link_up; 41 + char mac[ETH_ALEN]; 42 + unsigned int mtu; 43 + __be32 inaddr; 44 + char name[IFNAMSIZ+1]; 45 + }; 46 + 47 + struct usnic_fwd_flow { 48 + uint32_t flow_id; 49 + struct usnic_fwd_dev *ufdev; 50 + unsigned int vnic_idx; 51 + }; 52 + 53 + struct usnic_filter_action { 54 + int vnic_idx; 55 + struct filter_action action; 56 + }; 57 + 58 + struct usnic_fwd_dev *usnic_fwd_dev_alloc(struct pci_dev *pdev); 59 + void usnic_fwd_dev_free(struct usnic_fwd_dev *ufdev); 60 + 61 + void usnic_fwd_set_mac(struct usnic_fwd_dev *ufdev, char mac[ETH_ALEN]); 62 + int usnic_fwd_add_ipaddr(struct usnic_fwd_dev *ufdev, __be32 inaddr); 63 + void usnic_fwd_del_ipaddr(struct usnic_fwd_dev *ufdev); 64 + void usnic_fwd_carrier_up(struct usnic_fwd_dev *ufdev); 65 + void usnic_fwd_carrier_down(struct usnic_fwd_dev *ufdev); 66 + void usnic_fwd_set_mtu(struct usnic_fwd_dev *ufdev, unsigned int mtu); 67 + 68 + /* 69 + * Allocate a flow on this forwarding device. Whoever calls this function, 70 + * must monitor netdev events on ufdev's netdevice. If NETDEV_REBOOT or 71 + * NETDEV_DOWN is seen, flow will no longer function and must be 72 + * immediately freed by calling usnic_dealloc_flow. 73 + */ 74 + struct usnic_fwd_flow* 75 + usnic_fwd_alloc_flow(struct usnic_fwd_dev *ufdev, struct filter *filter, 76 + struct usnic_filter_action *action); 77 + int usnic_fwd_dealloc_flow(struct usnic_fwd_flow *flow); 78 + int usnic_fwd_enable_qp(struct usnic_fwd_dev *ufdev, int vnic_idx, int qp_idx); 79 + int usnic_fwd_disable_qp(struct usnic_fwd_dev *ufdev, int vnic_idx, int qp_idx); 80 + 81 + static inline void usnic_fwd_init_usnic_filter(struct filter *filter, 82 + uint32_t usnic_id) 83 + { 84 + filter->type = FILTER_USNIC_ID; 85 + filter->u.usnic.ethtype = USNIC_ROCE_ETHERTYPE; 86 + filter->u.usnic.flags = FILTER_FIELD_USNIC_ETHTYPE | 87 + FILTER_FIELD_USNIC_ID | 88 + FILTER_FIELD_USNIC_PROTO; 89 + filter->u.usnic.proto_version = (USNIC_ROCE_GRH_VER << 90 + USNIC_ROCE_GRH_VER_SHIFT) | 91 + USNIC_PROTO_VER; 92 + filter->u.usnic.usnic_id = usnic_id; 93 + } 94 + 95 + static inline void usnic_fwd_init_udp_filter(struct filter *filter, 96 + uint32_t daddr, uint16_t dport) 97 + { 98 + filter->type = FILTER_IPV4_5TUPLE; 99 + filter->u.ipv4.flags = FILTER_FIELD_5TUP_PROTO; 100 + filter->u.ipv4.protocol = PROTO_UDP; 101 + 102 + if (daddr) { 103 + filter->u.ipv4.flags |= FILTER_FIELD_5TUP_DST_AD; 104 + filter->u.ipv4.dst_addr = daddr; 105 + } 106 + 107 + if (dport) { 108 + filter->u.ipv4.flags |= FILTER_FIELD_5TUP_DST_PT; 109 + filter->u.ipv4.dst_port = dport; 110 + } 111 + } 112 + 113 + #endif /* !USNIC_FWD_H_ */
+118
drivers/infiniband/hw/usnic/usnic_ib.h
··· 1 + /* 2 + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. 3 + * 4 + * This program is free software; you may redistribute it and/or modify 5 + * it under the terms of the GNU General Public License as published by 6 + * the Free Software Foundation; version 2 of the License. 7 + * 8 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 9 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 10 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 11 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 12 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 13 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 15 + * SOFTWARE. 16 + * 17 + */ 18 + 19 + #ifndef USNIC_IB_H_ 20 + #define USNIC_IB_H_ 21 + 22 + #include <linux/iommu.h> 23 + #include <linux/netdevice.h> 24 + 25 + #include <rdma/ib_verbs.h> 26 + 27 + 28 + #include "usnic.h" 29 + #include "usnic_abi.h" 30 + #include "usnic_vnic.h" 31 + 32 + #define USNIC_IB_PORT_CNT 1 33 + #define USNIC_IB_NUM_COMP_VECTORS 1 34 + 35 + extern unsigned int usnic_ib_share_vf; 36 + 37 + struct usnic_ib_ucontext { 38 + struct ib_ucontext ibucontext; 39 + /* Protected by usnic_ib_dev->usdev_lock */ 40 + struct list_head qp_grp_list; 41 + struct list_head link; 42 + }; 43 + 44 + struct usnic_ib_pd { 45 + struct ib_pd ibpd; 46 + struct usnic_uiom_pd *umem_pd; 47 + }; 48 + 49 + struct usnic_ib_mr { 50 + struct ib_mr ibmr; 51 + struct usnic_uiom_reg *umem; 52 + }; 53 + 54 + struct usnic_ib_dev { 55 + struct ib_device ib_dev; 56 + struct pci_dev *pdev; 57 + struct net_device *netdev; 58 + struct usnic_fwd_dev *ufdev; 59 + struct list_head ib_dev_link; 60 + struct list_head vf_dev_list; 61 + struct list_head ctx_list; 62 + struct mutex usdev_lock; 63 + 64 + /* provisioning information */ 65 + struct kref vf_cnt; 66 + unsigned int vf_res_cnt[USNIC_VNIC_RES_TYPE_MAX]; 67 + 68 + /* sysfs vars for QPN reporting */ 69 + struct kobject *qpn_kobj; 70 + }; 71 + 72 + struct usnic_ib_vf { 73 + struct usnic_ib_dev *pf; 74 + spinlock_t lock; 75 + struct usnic_vnic *vnic; 76 + unsigned int qp_grp_ref_cnt; 77 + struct usnic_ib_pd *pd; 78 + struct list_head link; 79 + }; 80 + 81 + static inline 82 + struct usnic_ib_dev *to_usdev(struct ib_device *ibdev) 83 + { 84 + return container_of(ibdev, struct usnic_ib_dev, ib_dev); 85 + } 86 + 87 + static inline 88 + struct usnic_ib_ucontext *to_ucontext(struct ib_ucontext *ibucontext) 89 + { 90 + return container_of(ibucontext, struct usnic_ib_ucontext, ibucontext); 91 + } 92 + 93 + static inline 94 + struct usnic_ib_pd *to_upd(struct ib_pd *ibpd) 95 + { 96 + return container_of(ibpd, struct usnic_ib_pd, ibpd); 97 + } 98 + 99 + static inline 100 + struct usnic_ib_ucontext *to_uucontext(struct ib_ucontext *ibucontext) 101 + { 102 + return container_of(ibucontext, struct usnic_ib_ucontext, ibucontext); 103 + } 104 + 105 + static inline 106 + struct usnic_ib_mr *to_umr(struct ib_mr *ibmr) 107 + { 108 + return container_of(ibmr, struct usnic_ib_mr, ibmr); 109 + } 110 + void usnic_ib_log_vf(struct usnic_ib_vf *vf); 111 + 112 + #define UPDATE_PTR_LEFT(N, P, L) \ 113 + do { \ 114 + L -= (N); \ 115 + P += (N); \ 116 + } while (0) 117 + 118 + #endif /* USNIC_IB_H_ */
+682
drivers/infiniband/hw/usnic/usnic_ib_main.c
··· 1 + /* 2 + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. 3 + * 4 + * This program is free software; you may redistribute it and/or modify 5 + * it under the terms of the GNU General Public License as published by 6 + * the Free Software Foundation; version 2 of the License. 7 + * 8 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 9 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 10 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 11 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 12 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 13 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 15 + * SOFTWARE. 16 + * 17 + * Author: Upinder Malhi <umalhi@cisco.com> 18 + * Author: Anant Deepak <anadeepa@cisco.com> 19 + * Author: Cesare Cantu' <cantuc@cisco.com> 20 + * Author: Jeff Squyres <jsquyres@cisco.com> 21 + * Author: Kiran Thirumalai <kithirum@cisco.com> 22 + * Author: Xuyang Wang <xuywang@cisco.com> 23 + * Author: Reese Faucette <rfaucett@cisco.com> 24 + * 25 + */ 26 + 27 + #include <linux/module.h> 28 + #include <linux/inetdevice.h> 29 + #include <linux/init.h> 30 + #include <linux/slab.h> 31 + #include <linux/errno.h> 32 + #include <linux/pci.h> 33 + #include <linux/netdevice.h> 34 + 35 + #include <rdma/ib_user_verbs.h> 36 + #include <rdma/ib_addr.h> 37 + 38 + #include "usnic_abi.h" 39 + #include "usnic_common_util.h" 40 + #include "usnic_ib.h" 41 + #include "usnic_ib_qp_grp.h" 42 + #include "usnic_log.h" 43 + #include "usnic_fwd.h" 44 + #include "usnic_debugfs.h" 45 + #include "usnic_ib_verbs.h" 46 + #include "usnic_transport.h" 47 + #include "usnic_uiom.h" 48 + #include "usnic_ib_sysfs.h" 49 + 50 + unsigned int usnic_log_lvl = USNIC_LOG_LVL_ERR; 51 + unsigned int usnic_ib_share_vf = 1; 52 + 53 + static const char usnic_version[] = 54 + DRV_NAME ": Cisco VIC (USNIC) Verbs Driver v" 55 + DRV_VERSION " (" DRV_RELDATE ")\n"; 56 + 57 + static DEFINE_MUTEX(usnic_ib_ibdev_list_lock); 58 + static LIST_HEAD(usnic_ib_ibdev_list); 59 + 60 + /* Callback dump funcs */ 61 + static int usnic_ib_dump_vf_hdr(void *obj, char *buf, int buf_sz) 62 + { 63 + struct usnic_ib_vf *vf = obj; 64 + return scnprintf(buf, buf_sz, "PF: %s ", vf->pf->ib_dev.name); 65 + } 66 + /* End callback dump funcs */ 67 + 68 + static void usnic_ib_dump_vf(struct usnic_ib_vf *vf, char *buf, int buf_sz) 69 + { 70 + usnic_vnic_dump(vf->vnic, buf, buf_sz, vf, 71 + usnic_ib_dump_vf_hdr, 72 + usnic_ib_qp_grp_dump_hdr, usnic_ib_qp_grp_dump_rows); 73 + } 74 + 75 + void usnic_ib_log_vf(struct usnic_ib_vf *vf) 76 + { 77 + char buf[1000]; 78 + usnic_ib_dump_vf(vf, buf, sizeof(buf)); 79 + usnic_dbg("%s\n", buf); 80 + } 81 + 82 + /* Start of netdev section */ 83 + static inline const char *usnic_ib_netdev_event_to_string(unsigned long event) 84 + { 85 + const char *event2str[] = {"NETDEV_NONE", "NETDEV_UP", "NETDEV_DOWN", 86 + "NETDEV_REBOOT", "NETDEV_CHANGE", 87 + "NETDEV_REGISTER", "NETDEV_UNREGISTER", "NETDEV_CHANGEMTU", 88 + "NETDEV_CHANGEADDR", "NETDEV_GOING_DOWN", "NETDEV_FEAT_CHANGE", 89 + "NETDEV_BONDING_FAILOVER", "NETDEV_PRE_UP", 90 + "NETDEV_PRE_TYPE_CHANGE", "NETDEV_POST_TYPE_CHANGE", 91 + "NETDEV_POST_INT", "NETDEV_UNREGISTER_FINAL", "NETDEV_RELEASE", 92 + "NETDEV_NOTIFY_PEERS", "NETDEV_JOIN" 93 + }; 94 + 95 + if (event >= ARRAY_SIZE(event2str)) 96 + return "UNKNOWN_NETDEV_EVENT"; 97 + else 98 + return event2str[event]; 99 + } 100 + 101 + static void usnic_ib_qp_grp_modify_active_to_err(struct usnic_ib_dev *us_ibdev) 102 + { 103 + struct usnic_ib_ucontext *ctx; 104 + struct usnic_ib_qp_grp *qp_grp; 105 + enum ib_qp_state cur_state; 106 + int status; 107 + 108 + BUG_ON(!mutex_is_locked(&us_ibdev->usdev_lock)); 109 + 110 + list_for_each_entry(ctx, &us_ibdev->ctx_list, link) { 111 + list_for_each_entry(qp_grp, &ctx->qp_grp_list, link) { 112 + cur_state = qp_grp->state; 113 + if (cur_state == IB_QPS_INIT || 114 + cur_state == IB_QPS_RTR || 115 + cur_state == IB_QPS_RTS) { 116 + status = usnic_ib_qp_grp_modify(qp_grp, 117 + IB_QPS_ERR, 118 + NULL); 119 + if (status) { 120 + usnic_err("Failed to transistion qp grp %u from %s to %s\n", 121 + qp_grp->grp_id, 122 + usnic_ib_qp_grp_state_to_string 123 + (cur_state), 124 + usnic_ib_qp_grp_state_to_string 125 + (IB_QPS_ERR)); 126 + } 127 + } 128 + } 129 + } 130 + } 131 + 132 + static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev, 133 + unsigned long event) 134 + { 135 + struct net_device *netdev; 136 + struct ib_event ib_event; 137 + 138 + memset(&ib_event, 0, sizeof(ib_event)); 139 + 140 + mutex_lock(&us_ibdev->usdev_lock); 141 + netdev = us_ibdev->netdev; 142 + switch (event) { 143 + case NETDEV_REBOOT: 144 + usnic_info("PF Reset on %s\n", us_ibdev->ib_dev.name); 145 + usnic_ib_qp_grp_modify_active_to_err(us_ibdev); 146 + ib_event.event = IB_EVENT_PORT_ERR; 147 + ib_event.device = &us_ibdev->ib_dev; 148 + ib_event.element.port_num = 1; 149 + ib_dispatch_event(&ib_event); 150 + break; 151 + case NETDEV_UP: 152 + case NETDEV_DOWN: 153 + case NETDEV_CHANGE: 154 + if (!us_ibdev->ufdev->link_up && 155 + netif_carrier_ok(netdev)) { 156 + usnic_fwd_carrier_up(us_ibdev->ufdev); 157 + usnic_info("Link UP on %s\n", us_ibdev->ib_dev.name); 158 + ib_event.event = IB_EVENT_PORT_ACTIVE; 159 + ib_event.device = &us_ibdev->ib_dev; 160 + ib_event.element.port_num = 1; 161 + ib_dispatch_event(&ib_event); 162 + } else if (us_ibdev->ufdev->link_up && 163 + !netif_carrier_ok(netdev)) { 164 + usnic_fwd_carrier_down(us_ibdev->ufdev); 165 + usnic_info("Link DOWN on %s\n", us_ibdev->ib_dev.name); 166 + usnic_ib_qp_grp_modify_active_to_err(us_ibdev); 167 + ib_event.event = IB_EVENT_PORT_ERR; 168 + ib_event.device = &us_ibdev->ib_dev; 169 + ib_event.element.port_num = 1; 170 + ib_dispatch_event(&ib_event); 171 + } else { 172 + usnic_dbg("Ignoring %s on %s\n", 173 + usnic_ib_netdev_event_to_string(event), 174 + us_ibdev->ib_dev.name); 175 + } 176 + break; 177 + case NETDEV_CHANGEADDR: 178 + if (!memcmp(us_ibdev->ufdev->mac, netdev->dev_addr, 179 + sizeof(us_ibdev->ufdev->mac))) { 180 + usnic_dbg("Ignoring addr change on %s\n", 181 + us_ibdev->ib_dev.name); 182 + } else { 183 + usnic_info(" %s old mac: %pM new mac: %pM\n", 184 + us_ibdev->ib_dev.name, 185 + us_ibdev->ufdev->mac, 186 + netdev->dev_addr); 187 + usnic_fwd_set_mac(us_ibdev->ufdev, netdev->dev_addr); 188 + usnic_ib_qp_grp_modify_active_to_err(us_ibdev); 189 + ib_event.event = IB_EVENT_GID_CHANGE; 190 + ib_event.device = &us_ibdev->ib_dev; 191 + ib_event.element.port_num = 1; 192 + ib_dispatch_event(&ib_event); 193 + } 194 + 195 + break; 196 + case NETDEV_CHANGEMTU: 197 + if (us_ibdev->ufdev->mtu != netdev->mtu) { 198 + usnic_info("MTU Change on %s old: %u new: %u\n", 199 + us_ibdev->ib_dev.name, 200 + us_ibdev->ufdev->mtu, netdev->mtu); 201 + usnic_fwd_set_mtu(us_ibdev->ufdev, netdev->mtu); 202 + usnic_ib_qp_grp_modify_active_to_err(us_ibdev); 203 + } else { 204 + usnic_dbg("Ignoring MTU change on %s\n", 205 + us_ibdev->ib_dev.name); 206 + } 207 + break; 208 + default: 209 + usnic_dbg("Ignoring event %s on %s", 210 + usnic_ib_netdev_event_to_string(event), 211 + us_ibdev->ib_dev.name); 212 + } 213 + mutex_unlock(&us_ibdev->usdev_lock); 214 + } 215 + 216 + static int usnic_ib_netdevice_event(struct notifier_block *notifier, 217 + unsigned long event, void *ptr) 218 + { 219 + struct usnic_ib_dev *us_ibdev; 220 + 221 + struct net_device *netdev = netdev_notifier_info_to_dev(ptr); 222 + 223 + mutex_lock(&usnic_ib_ibdev_list_lock); 224 + list_for_each_entry(us_ibdev, &usnic_ib_ibdev_list, ib_dev_link) { 225 + if (us_ibdev->netdev == netdev) { 226 + usnic_ib_handle_usdev_event(us_ibdev, event); 227 + break; 228 + } 229 + } 230 + mutex_unlock(&usnic_ib_ibdev_list_lock); 231 + 232 + return NOTIFY_DONE; 233 + } 234 + 235 + static struct notifier_block usnic_ib_netdevice_notifier = { 236 + .notifier_call = usnic_ib_netdevice_event 237 + }; 238 + /* End of netdev section */ 239 + 240 + /* Start of inet section */ 241 + static int usnic_ib_handle_inet_event(struct usnic_ib_dev *us_ibdev, 242 + unsigned long event, void *ptr) 243 + { 244 + struct in_ifaddr *ifa = ptr; 245 + struct ib_event ib_event; 246 + 247 + mutex_lock(&us_ibdev->usdev_lock); 248 + 249 + switch (event) { 250 + case NETDEV_DOWN: 251 + usnic_info("%s via ip notifiers", 252 + usnic_ib_netdev_event_to_string(event)); 253 + usnic_fwd_del_ipaddr(us_ibdev->ufdev); 254 + usnic_ib_qp_grp_modify_active_to_err(us_ibdev); 255 + ib_event.event = IB_EVENT_GID_CHANGE; 256 + ib_event.device = &us_ibdev->ib_dev; 257 + ib_event.element.port_num = 1; 258 + ib_dispatch_event(&ib_event); 259 + break; 260 + case NETDEV_UP: 261 + usnic_fwd_add_ipaddr(us_ibdev->ufdev, ifa->ifa_address); 262 + usnic_info("%s via ip notifiers: ip %pI4", 263 + usnic_ib_netdev_event_to_string(event), 264 + &us_ibdev->ufdev->inaddr); 265 + ib_event.event = IB_EVENT_GID_CHANGE; 266 + ib_event.device = &us_ibdev->ib_dev; 267 + ib_event.element.port_num = 1; 268 + ib_dispatch_event(&ib_event); 269 + break; 270 + default: 271 + usnic_info("Ignoring event %s on %s", 272 + usnic_ib_netdev_event_to_string(event), 273 + us_ibdev->ib_dev.name); 274 + } 275 + mutex_unlock(&us_ibdev->usdev_lock); 276 + 277 + return NOTIFY_DONE; 278 + } 279 + 280 + static int usnic_ib_inetaddr_event(struct notifier_block *notifier, 281 + unsigned long event, void *ptr) 282 + { 283 + struct usnic_ib_dev *us_ibdev; 284 + struct in_ifaddr *ifa = ptr; 285 + struct net_device *netdev = ifa->ifa_dev->dev; 286 + 287 + mutex_lock(&usnic_ib_ibdev_list_lock); 288 + list_for_each_entry(us_ibdev, &usnic_ib_ibdev_list, ib_dev_link) { 289 + if (us_ibdev->netdev == netdev) { 290 + usnic_ib_handle_inet_event(us_ibdev, event, ptr); 291 + break; 292 + } 293 + } 294 + mutex_unlock(&usnic_ib_ibdev_list_lock); 295 + 296 + return NOTIFY_DONE; 297 + } 298 + static struct notifier_block usnic_ib_inetaddr_notifier = { 299 + .notifier_call = usnic_ib_inetaddr_event 300 + }; 301 + /* End of inet section*/ 302 + 303 + /* Start of PF discovery section */ 304 + static void *usnic_ib_device_add(struct pci_dev *dev) 305 + { 306 + struct usnic_ib_dev *us_ibdev; 307 + union ib_gid gid; 308 + struct in_ifaddr *in; 309 + struct net_device *netdev; 310 + 311 + usnic_dbg("\n"); 312 + netdev = pci_get_drvdata(dev); 313 + 314 + us_ibdev = (struct usnic_ib_dev *)ib_alloc_device(sizeof(*us_ibdev)); 315 + if (IS_ERR_OR_NULL(us_ibdev)) { 316 + usnic_err("Device %s context alloc failed\n", 317 + netdev_name(pci_get_drvdata(dev))); 318 + return ERR_PTR(us_ibdev ? PTR_ERR(us_ibdev) : -EFAULT); 319 + } 320 + 321 + us_ibdev->ufdev = usnic_fwd_dev_alloc(dev); 322 + if (IS_ERR_OR_NULL(us_ibdev->ufdev)) { 323 + usnic_err("Failed to alloc ufdev for %s with err %ld\n", 324 + pci_name(dev), PTR_ERR(us_ibdev->ufdev)); 325 + goto err_dealloc; 326 + } 327 + 328 + mutex_init(&us_ibdev->usdev_lock); 329 + INIT_LIST_HEAD(&us_ibdev->vf_dev_list); 330 + INIT_LIST_HEAD(&us_ibdev->ctx_list); 331 + 332 + us_ibdev->pdev = dev; 333 + us_ibdev->netdev = pci_get_drvdata(dev); 334 + us_ibdev->ib_dev.owner = THIS_MODULE; 335 + us_ibdev->ib_dev.node_type = RDMA_NODE_USNIC_UDP; 336 + us_ibdev->ib_dev.phys_port_cnt = USNIC_IB_PORT_CNT; 337 + us_ibdev->ib_dev.num_comp_vectors = USNIC_IB_NUM_COMP_VECTORS; 338 + us_ibdev->ib_dev.dma_device = &dev->dev; 339 + us_ibdev->ib_dev.uverbs_abi_ver = USNIC_UVERBS_ABI_VERSION; 340 + strlcpy(us_ibdev->ib_dev.name, "usnic_%d", IB_DEVICE_NAME_MAX); 341 + 342 + us_ibdev->ib_dev.uverbs_cmd_mask = 343 + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | 344 + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | 345 + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | 346 + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | 347 + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | 348 + (1ull << IB_USER_VERBS_CMD_REG_MR) | 349 + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | 350 + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | 351 + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | 352 + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | 353 + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | 354 + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | 355 + (1ull << IB_USER_VERBS_CMD_QUERY_QP) | 356 + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | 357 + (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | 358 + (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | 359 + (1ull << IB_USER_VERBS_CMD_OPEN_QP); 360 + 361 + us_ibdev->ib_dev.query_device = usnic_ib_query_device; 362 + us_ibdev->ib_dev.query_port = usnic_ib_query_port; 363 + us_ibdev->ib_dev.query_pkey = usnic_ib_query_pkey; 364 + us_ibdev->ib_dev.query_gid = usnic_ib_query_gid; 365 + us_ibdev->ib_dev.get_link_layer = usnic_ib_port_link_layer; 366 + us_ibdev->ib_dev.alloc_pd = usnic_ib_alloc_pd; 367 + us_ibdev->ib_dev.dealloc_pd = usnic_ib_dealloc_pd; 368 + us_ibdev->ib_dev.create_qp = usnic_ib_create_qp; 369 + us_ibdev->ib_dev.modify_qp = usnic_ib_modify_qp; 370 + us_ibdev->ib_dev.query_qp = usnic_ib_query_qp; 371 + us_ibdev->ib_dev.destroy_qp = usnic_ib_destroy_qp; 372 + us_ibdev->ib_dev.create_cq = usnic_ib_create_cq; 373 + us_ibdev->ib_dev.destroy_cq = usnic_ib_destroy_cq; 374 + us_ibdev->ib_dev.reg_user_mr = usnic_ib_reg_mr; 375 + us_ibdev->ib_dev.dereg_mr = usnic_ib_dereg_mr; 376 + us_ibdev->ib_dev.alloc_ucontext = usnic_ib_alloc_ucontext; 377 + us_ibdev->ib_dev.dealloc_ucontext = usnic_ib_dealloc_ucontext; 378 + us_ibdev->ib_dev.mmap = usnic_ib_mmap; 379 + us_ibdev->ib_dev.create_ah = usnic_ib_create_ah; 380 + us_ibdev->ib_dev.destroy_ah = usnic_ib_destroy_ah; 381 + us_ibdev->ib_dev.post_send = usnic_ib_post_send; 382 + us_ibdev->ib_dev.post_recv = usnic_ib_post_recv; 383 + us_ibdev->ib_dev.poll_cq = usnic_ib_poll_cq; 384 + us_ibdev->ib_dev.req_notify_cq = usnic_ib_req_notify_cq; 385 + us_ibdev->ib_dev.get_dma_mr = usnic_ib_get_dma_mr; 386 + 387 + 388 + if (ib_register_device(&us_ibdev->ib_dev, NULL)) 389 + goto err_fwd_dealloc; 390 + 391 + usnic_fwd_set_mtu(us_ibdev->ufdev, us_ibdev->netdev->mtu); 392 + usnic_fwd_set_mac(us_ibdev->ufdev, us_ibdev->netdev->dev_addr); 393 + if (netif_carrier_ok(us_ibdev->netdev)) 394 + usnic_fwd_carrier_up(us_ibdev->ufdev); 395 + 396 + in = ((struct in_device *)(netdev->ip_ptr))->ifa_list; 397 + if (in != NULL) 398 + usnic_fwd_add_ipaddr(us_ibdev->ufdev, in->ifa_address); 399 + 400 + usnic_mac_ip_to_gid(us_ibdev->netdev->perm_addr, 401 + us_ibdev->ufdev->inaddr, &gid.raw[0]); 402 + memcpy(&us_ibdev->ib_dev.node_guid, &gid.global.interface_id, 403 + sizeof(gid.global.interface_id)); 404 + kref_init(&us_ibdev->vf_cnt); 405 + 406 + usnic_info("Added ibdev: %s netdev: %s with mac %pM Link: %u MTU: %u\n", 407 + us_ibdev->ib_dev.name, netdev_name(us_ibdev->netdev), 408 + us_ibdev->ufdev->mac, us_ibdev->ufdev->link_up, 409 + us_ibdev->ufdev->mtu); 410 + return us_ibdev; 411 + 412 + err_fwd_dealloc: 413 + usnic_fwd_dev_free(us_ibdev->ufdev); 414 + err_dealloc: 415 + usnic_err("failed -- deallocing device\n"); 416 + ib_dealloc_device(&us_ibdev->ib_dev); 417 + return NULL; 418 + } 419 + 420 + static void usnic_ib_device_remove(struct usnic_ib_dev *us_ibdev) 421 + { 422 + usnic_info("Unregistering %s\n", us_ibdev->ib_dev.name); 423 + usnic_ib_sysfs_unregister_usdev(us_ibdev); 424 + usnic_fwd_dev_free(us_ibdev->ufdev); 425 + ib_unregister_device(&us_ibdev->ib_dev); 426 + ib_dealloc_device(&us_ibdev->ib_dev); 427 + } 428 + 429 + static void usnic_ib_undiscover_pf(struct kref *kref) 430 + { 431 + struct usnic_ib_dev *us_ibdev, *tmp; 432 + struct pci_dev *dev; 433 + bool found = false; 434 + 435 + dev = container_of(kref, struct usnic_ib_dev, vf_cnt)->pdev; 436 + mutex_lock(&usnic_ib_ibdev_list_lock); 437 + list_for_each_entry_safe(us_ibdev, tmp, 438 + &usnic_ib_ibdev_list, ib_dev_link) { 439 + if (us_ibdev->pdev == dev) { 440 + list_del(&us_ibdev->ib_dev_link); 441 + usnic_ib_device_remove(us_ibdev); 442 + found = true; 443 + break; 444 + } 445 + } 446 + 447 + WARN(!found, "Failed to remove PF %s\n", pci_name(dev)); 448 + 449 + mutex_unlock(&usnic_ib_ibdev_list_lock); 450 + } 451 + 452 + static struct usnic_ib_dev *usnic_ib_discover_pf(struct usnic_vnic *vnic) 453 + { 454 + struct usnic_ib_dev *us_ibdev; 455 + struct pci_dev *parent_pci, *vf_pci; 456 + int err; 457 + 458 + vf_pci = usnic_vnic_get_pdev(vnic); 459 + parent_pci = pci_physfn(vf_pci); 460 + 461 + BUG_ON(!parent_pci); 462 + 463 + mutex_lock(&usnic_ib_ibdev_list_lock); 464 + list_for_each_entry(us_ibdev, &usnic_ib_ibdev_list, ib_dev_link) { 465 + if (us_ibdev->pdev == parent_pci) { 466 + kref_get(&us_ibdev->vf_cnt); 467 + goto out; 468 + } 469 + } 470 + 471 + us_ibdev = usnic_ib_device_add(parent_pci); 472 + if (IS_ERR_OR_NULL(us_ibdev)) { 473 + us_ibdev = us_ibdev ? us_ibdev : ERR_PTR(-EFAULT); 474 + goto out; 475 + } 476 + 477 + err = usnic_ib_sysfs_register_usdev(us_ibdev); 478 + if (err) { 479 + usnic_ib_device_remove(us_ibdev); 480 + us_ibdev = ERR_PTR(err); 481 + goto out; 482 + } 483 + 484 + list_add(&us_ibdev->ib_dev_link, &usnic_ib_ibdev_list); 485 + out: 486 + mutex_unlock(&usnic_ib_ibdev_list_lock); 487 + return us_ibdev; 488 + } 489 + /* End of PF discovery section */ 490 + 491 + /* Start of PCI section */ 492 + 493 + static DEFINE_PCI_DEVICE_TABLE(usnic_ib_pci_ids) = { 494 + {PCI_DEVICE(PCI_VENDOR_ID_CISCO, PCI_DEVICE_ID_CISCO_VIC_USPACE_NIC)}, 495 + {0,} 496 + }; 497 + 498 + static int usnic_ib_pci_probe(struct pci_dev *pdev, 499 + const struct pci_device_id *id) 500 + { 501 + int err; 502 + struct usnic_ib_dev *pf; 503 + struct usnic_ib_vf *vf; 504 + enum usnic_vnic_res_type res_type; 505 + 506 + vf = kzalloc(sizeof(*vf), GFP_KERNEL); 507 + if (!vf) 508 + return -ENOMEM; 509 + 510 + err = pci_enable_device(pdev); 511 + if (err) { 512 + usnic_err("Failed to enable %s with err %d\n", 513 + pci_name(pdev), err); 514 + goto out_clean_vf; 515 + } 516 + 517 + err = pci_request_regions(pdev, DRV_NAME); 518 + if (err) { 519 + usnic_err("Failed to request region for %s with err %d\n", 520 + pci_name(pdev), err); 521 + goto out_disable_device; 522 + } 523 + 524 + pci_set_master(pdev); 525 + pci_set_drvdata(pdev, vf); 526 + 527 + vf->vnic = usnic_vnic_alloc(pdev); 528 + if (IS_ERR_OR_NULL(vf->vnic)) { 529 + err = vf->vnic ? PTR_ERR(vf->vnic) : -ENOMEM; 530 + usnic_err("Failed to alloc vnic for %s with err %d\n", 531 + pci_name(pdev), err); 532 + goto out_release_regions; 533 + } 534 + 535 + pf = usnic_ib_discover_pf(vf->vnic); 536 + if (IS_ERR_OR_NULL(pf)) { 537 + usnic_err("Failed to discover pf of vnic %s with err%ld\n", 538 + pci_name(pdev), PTR_ERR(pf)); 539 + err = pf ? PTR_ERR(pf) : -EFAULT; 540 + goto out_clean_vnic; 541 + } 542 + 543 + vf->pf = pf; 544 + spin_lock_init(&vf->lock); 545 + mutex_lock(&pf->usdev_lock); 546 + list_add_tail(&vf->link, &pf->vf_dev_list); 547 + /* 548 + * Save max settings (will be same for each VF, easier to re-write than 549 + * to say "if (!set) { set_values(); set=1; } 550 + */ 551 + for (res_type = USNIC_VNIC_RES_TYPE_EOL+1; 552 + res_type < USNIC_VNIC_RES_TYPE_MAX; 553 + res_type++) { 554 + pf->vf_res_cnt[res_type] = usnic_vnic_res_cnt(vf->vnic, 555 + res_type); 556 + } 557 + 558 + mutex_unlock(&pf->usdev_lock); 559 + 560 + usnic_info("Registering usnic VF %s into PF %s\n", pci_name(pdev), 561 + pf->ib_dev.name); 562 + usnic_ib_log_vf(vf); 563 + return 0; 564 + 565 + out_clean_vnic: 566 + usnic_vnic_free(vf->vnic); 567 + out_release_regions: 568 + pci_set_drvdata(pdev, NULL); 569 + pci_clear_master(pdev); 570 + pci_release_regions(pdev); 571 + out_disable_device: 572 + pci_disable_device(pdev); 573 + out_clean_vf: 574 + kfree(vf); 575 + return err; 576 + } 577 + 578 + static void usnic_ib_pci_remove(struct pci_dev *pdev) 579 + { 580 + struct usnic_ib_vf *vf = pci_get_drvdata(pdev); 581 + struct usnic_ib_dev *pf = vf->pf; 582 + 583 + mutex_lock(&pf->usdev_lock); 584 + list_del(&vf->link); 585 + mutex_unlock(&pf->usdev_lock); 586 + 587 + kref_put(&pf->vf_cnt, usnic_ib_undiscover_pf); 588 + usnic_vnic_free(vf->vnic); 589 + pci_set_drvdata(pdev, NULL); 590 + pci_clear_master(pdev); 591 + pci_release_regions(pdev); 592 + pci_disable_device(pdev); 593 + kfree(vf); 594 + 595 + usnic_info("Removed VF %s\n", pci_name(pdev)); 596 + } 597 + 598 + /* PCI driver entry points */ 599 + static struct pci_driver usnic_ib_pci_driver = { 600 + .name = DRV_NAME, 601 + .id_table = usnic_ib_pci_ids, 602 + .probe = usnic_ib_pci_probe, 603 + .remove = usnic_ib_pci_remove, 604 + }; 605 + /* End of PCI section */ 606 + 607 + /* Start of module section */ 608 + static int __init usnic_ib_init(void) 609 + { 610 + int err; 611 + 612 + printk_once(KERN_INFO "%s", usnic_version); 613 + 614 + err = usnic_uiom_init(DRV_NAME); 615 + if (err) { 616 + usnic_err("Unable to initalize umem with err %d\n", err); 617 + return err; 618 + } 619 + 620 + if (pci_register_driver(&usnic_ib_pci_driver)) { 621 + usnic_err("Unable to register with PCI\n"); 622 + goto out_umem_fini; 623 + } 624 + 625 + err = register_netdevice_notifier(&usnic_ib_netdevice_notifier); 626 + if (err) { 627 + usnic_err("Failed to register netdev notifier\n"); 628 + goto out_pci_unreg; 629 + } 630 + 631 + err = register_inetaddr_notifier(&usnic_ib_inetaddr_notifier); 632 + if (err) { 633 + usnic_err("Failed to register inet addr notifier\n"); 634 + goto out_unreg_netdev_notifier; 635 + } 636 + 637 + err = usnic_transport_init(); 638 + if (err) { 639 + usnic_err("Failed to initialize transport\n"); 640 + goto out_unreg_inetaddr_notifier; 641 + } 642 + 643 + usnic_debugfs_init(); 644 + 645 + return 0; 646 + 647 + out_unreg_inetaddr_notifier: 648 + unregister_inetaddr_notifier(&usnic_ib_inetaddr_notifier); 649 + out_unreg_netdev_notifier: 650 + unregister_netdevice_notifier(&usnic_ib_netdevice_notifier); 651 + out_pci_unreg: 652 + pci_unregister_driver(&usnic_ib_pci_driver); 653 + out_umem_fini: 654 + usnic_uiom_fini(); 655 + 656 + return err; 657 + } 658 + 659 + static void __exit usnic_ib_destroy(void) 660 + { 661 + usnic_dbg("\n"); 662 + usnic_debugfs_exit(); 663 + usnic_transport_fini(); 664 + unregister_inetaddr_notifier(&usnic_ib_inetaddr_notifier); 665 + unregister_netdevice_notifier(&usnic_ib_netdevice_notifier); 666 + pci_unregister_driver(&usnic_ib_pci_driver); 667 + usnic_uiom_fini(); 668 + } 669 + 670 + MODULE_DESCRIPTION("Cisco VIC (usNIC) Verbs Driver"); 671 + MODULE_AUTHOR("Upinder Malhi <umalhi@cisco.com>"); 672 + MODULE_LICENSE("Dual BSD/GPL"); 673 + MODULE_VERSION(DRV_VERSION); 674 + module_param(usnic_log_lvl, uint, S_IRUGO | S_IWUSR); 675 + module_param(usnic_ib_share_vf, uint, S_IRUGO | S_IWUSR); 676 + MODULE_PARM_DESC(usnic_log_lvl, " Off=0, Err=1, Info=2, Debug=3"); 677 + MODULE_PARM_DESC(usnic_ib_share_vf, "Off=0, On=1 VF sharing amongst QPs"); 678 + MODULE_DEVICE_TABLE(pci, usnic_ib_pci_ids); 679 + 680 + module_init(usnic_ib_init); 681 + module_exit(usnic_ib_destroy); 682 + /* End of module section */
+754
drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
··· 1 + /* 2 + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. 3 + * 4 + * This program is free software; you may redistribute it and/or modify 5 + * it under the terms of the GNU General Public License as published by 6 + * the Free Software Foundation; version 2 of the License. 7 + * 8 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 9 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 10 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 11 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 12 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 13 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 15 + * SOFTWARE. 16 + * 17 + */ 18 + #include <linux/bug.h> 19 + #include <linux/errno.h> 20 + #include <linux/module.h> 21 + #include <linux/spinlock.h> 22 + 23 + #include "usnic_log.h" 24 + #include "usnic_vnic.h" 25 + #include "usnic_fwd.h" 26 + #include "usnic_uiom.h" 27 + #include "usnic_debugfs.h" 28 + #include "usnic_ib_qp_grp.h" 29 + #include "usnic_ib_sysfs.h" 30 + #include "usnic_transport.h" 31 + 32 + #define DFLT_RQ_IDX 0 33 + 34 + const char *usnic_ib_qp_grp_state_to_string(enum ib_qp_state state) 35 + { 36 + switch (state) { 37 + case IB_QPS_RESET: 38 + return "Rst"; 39 + case IB_QPS_INIT: 40 + return "Init"; 41 + case IB_QPS_RTR: 42 + return "RTR"; 43 + case IB_QPS_RTS: 44 + return "RTS"; 45 + case IB_QPS_SQD: 46 + return "SQD"; 47 + case IB_QPS_SQE: 48 + return "SQE"; 49 + case IB_QPS_ERR: 50 + return "ERR"; 51 + default: 52 + return "UNKOWN STATE"; 53 + 54 + } 55 + } 56 + 57 + int usnic_ib_qp_grp_dump_hdr(char *buf, int buf_sz) 58 + { 59 + return scnprintf(buf, buf_sz, "|QPN\t|State\t|PID\t|VF Idx\t|Fil ID"); 60 + } 61 + 62 + int usnic_ib_qp_grp_dump_rows(void *obj, char *buf, int buf_sz) 63 + { 64 + struct usnic_ib_qp_grp *qp_grp = obj; 65 + struct usnic_ib_qp_grp_flow *default_flow; 66 + if (obj) { 67 + default_flow = list_first_entry(&qp_grp->flows_lst, 68 + struct usnic_ib_qp_grp_flow, link); 69 + return scnprintf(buf, buf_sz, "|%d\t|%s\t|%d\t|%hu\t|%d", 70 + qp_grp->ibqp.qp_num, 71 + usnic_ib_qp_grp_state_to_string( 72 + qp_grp->state), 73 + qp_grp->owner_pid, 74 + usnic_vnic_get_index(qp_grp->vf->vnic), 75 + default_flow->flow->flow_id); 76 + } else { 77 + return scnprintf(buf, buf_sz, "|N/A\t|N/A\t|N/A\t|N/A\t|N/A"); 78 + } 79 + } 80 + 81 + static struct usnic_vnic_res_chunk * 82 + get_qp_res_chunk(struct usnic_ib_qp_grp *qp_grp) 83 + { 84 + lockdep_assert_held(&qp_grp->lock); 85 + /* 86 + * The QP res chunk, used to derive qp indices, 87 + * are just indices of the RQs 88 + */ 89 + return usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ); 90 + } 91 + 92 + static int enable_qp_grp(struct usnic_ib_qp_grp *qp_grp) 93 + { 94 + 95 + int status; 96 + int i, vnic_idx; 97 + struct usnic_vnic_res_chunk *res_chunk; 98 + struct usnic_vnic_res *res; 99 + 100 + lockdep_assert_held(&qp_grp->lock); 101 + 102 + vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic); 103 + 104 + res_chunk = get_qp_res_chunk(qp_grp); 105 + if (IS_ERR_OR_NULL(res_chunk)) { 106 + usnic_err("Unable to get qp res with err %ld\n", 107 + PTR_ERR(res_chunk)); 108 + return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM; 109 + } 110 + 111 + for (i = 0; i < res_chunk->cnt; i++) { 112 + res = res_chunk->res[i]; 113 + status = usnic_fwd_enable_qp(qp_grp->ufdev, vnic_idx, 114 + res->vnic_idx); 115 + if (status) { 116 + usnic_err("Failed to enable qp %d of %s:%d\n with err %d\n", 117 + res->vnic_idx, qp_grp->ufdev->name, 118 + vnic_idx, status); 119 + goto out_err; 120 + } 121 + } 122 + 123 + return 0; 124 + 125 + out_err: 126 + for (i--; i >= 0; i--) { 127 + res = res_chunk->res[i]; 128 + usnic_fwd_disable_qp(qp_grp->ufdev, vnic_idx, 129 + res->vnic_idx); 130 + } 131 + 132 + return status; 133 + } 134 + 135 + static int disable_qp_grp(struct usnic_ib_qp_grp *qp_grp) 136 + { 137 + int i, vnic_idx; 138 + struct usnic_vnic_res_chunk *res_chunk; 139 + struct usnic_vnic_res *res; 140 + int status = 0; 141 + 142 + lockdep_assert_held(&qp_grp->lock); 143 + vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic); 144 + 145 + res_chunk = get_qp_res_chunk(qp_grp); 146 + if (IS_ERR_OR_NULL(res_chunk)) { 147 + usnic_err("Unable to get qp res with err %ld\n", 148 + PTR_ERR(res_chunk)); 149 + return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM; 150 + } 151 + 152 + for (i = 0; i < res_chunk->cnt; i++) { 153 + res = res_chunk->res[i]; 154 + status = usnic_fwd_disable_qp(qp_grp->ufdev, vnic_idx, 155 + res->vnic_idx); 156 + if (status) { 157 + usnic_err("Failed to disable rq %d of %s:%d\n with err %d\n", 158 + res->vnic_idx, 159 + qp_grp->ufdev->name, 160 + vnic_idx, status); 161 + } 162 + } 163 + 164 + return status; 165 + 166 + } 167 + 168 + static int init_filter_action(struct usnic_ib_qp_grp *qp_grp, 169 + struct usnic_filter_action *uaction) 170 + { 171 + struct usnic_vnic_res_chunk *res_chunk; 172 + 173 + res_chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ); 174 + if (IS_ERR_OR_NULL(res_chunk)) { 175 + usnic_err("Unable to get %s with err %ld\n", 176 + usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_RQ), 177 + PTR_ERR(res_chunk)); 178 + return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM; 179 + } 180 + 181 + uaction->vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic); 182 + uaction->action.type = FILTER_ACTION_RQ_STEERING; 183 + uaction->action.u.rq_idx = res_chunk->res[DFLT_RQ_IDX]->vnic_idx; 184 + 185 + return 0; 186 + } 187 + 188 + static struct usnic_ib_qp_grp_flow* 189 + create_roce_custom_flow(struct usnic_ib_qp_grp *qp_grp, 190 + struct usnic_transport_spec *trans_spec) 191 + { 192 + uint16_t port_num; 193 + int err; 194 + struct filter filter; 195 + struct usnic_filter_action uaction; 196 + struct usnic_ib_qp_grp_flow *qp_flow; 197 + struct usnic_fwd_flow *flow; 198 + enum usnic_transport_type trans_type; 199 + 200 + trans_type = trans_spec->trans_type; 201 + port_num = trans_spec->usnic_roce.port_num; 202 + 203 + /* Reserve Port */ 204 + port_num = usnic_transport_rsrv_port(trans_type, port_num); 205 + if (port_num == 0) 206 + return ERR_PTR(-EINVAL); 207 + 208 + /* Create Flow */ 209 + usnic_fwd_init_usnic_filter(&filter, port_num); 210 + err = init_filter_action(qp_grp, &uaction); 211 + if (err) 212 + goto out_unreserve_port; 213 + 214 + flow = usnic_fwd_alloc_flow(qp_grp->ufdev, &filter, &uaction); 215 + if (IS_ERR_OR_NULL(flow)) { 216 + usnic_err("Unable to alloc flow failed with err %ld\n", 217 + PTR_ERR(flow)); 218 + err = flow ? PTR_ERR(flow) : -EFAULT; 219 + goto out_unreserve_port; 220 + } 221 + 222 + /* Create Flow Handle */ 223 + qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC); 224 + if (IS_ERR_OR_NULL(qp_flow)) { 225 + err = qp_flow ? PTR_ERR(qp_flow) : -ENOMEM; 226 + goto out_dealloc_flow; 227 + } 228 + qp_flow->flow = flow; 229 + qp_flow->trans_type = trans_type; 230 + qp_flow->usnic_roce.port_num = port_num; 231 + qp_flow->qp_grp = qp_grp; 232 + return qp_flow; 233 + 234 + out_dealloc_flow: 235 + usnic_fwd_dealloc_flow(flow); 236 + out_unreserve_port: 237 + usnic_transport_unrsrv_port(trans_type, port_num); 238 + return ERR_PTR(err); 239 + } 240 + 241 + static void release_roce_custom_flow(struct usnic_ib_qp_grp_flow *qp_flow) 242 + { 243 + usnic_fwd_dealloc_flow(qp_flow->flow); 244 + usnic_transport_unrsrv_port(qp_flow->trans_type, 245 + qp_flow->usnic_roce.port_num); 246 + kfree(qp_flow); 247 + } 248 + 249 + static struct usnic_ib_qp_grp_flow* 250 + create_udp_flow(struct usnic_ib_qp_grp *qp_grp, 251 + struct usnic_transport_spec *trans_spec) 252 + { 253 + struct socket *sock; 254 + int sock_fd; 255 + int err; 256 + struct filter filter; 257 + struct usnic_filter_action uaction; 258 + struct usnic_ib_qp_grp_flow *qp_flow; 259 + struct usnic_fwd_flow *flow; 260 + enum usnic_transport_type trans_type; 261 + uint32_t addr; 262 + uint16_t port_num; 263 + int proto; 264 + 265 + trans_type = trans_spec->trans_type; 266 + sock_fd = trans_spec->udp.sock_fd; 267 + 268 + /* Get and check socket */ 269 + sock = usnic_transport_get_socket(sock_fd); 270 + if (IS_ERR_OR_NULL(sock)) 271 + return ERR_CAST(sock); 272 + 273 + err = usnic_transport_sock_get_addr(sock, &proto, &addr, &port_num); 274 + if (err) 275 + goto out_put_sock; 276 + 277 + if (proto != IPPROTO_UDP) { 278 + usnic_err("Protocol for fd %d is not UDP", sock_fd); 279 + err = -EPERM; 280 + goto out_put_sock; 281 + } 282 + 283 + /* Create flow */ 284 + usnic_fwd_init_udp_filter(&filter, addr, port_num); 285 + err = init_filter_action(qp_grp, &uaction); 286 + if (err) 287 + goto out_put_sock; 288 + 289 + flow = usnic_fwd_alloc_flow(qp_grp->ufdev, &filter, &uaction); 290 + if (IS_ERR_OR_NULL(flow)) { 291 + usnic_err("Unable to alloc flow failed with err %ld\n", 292 + PTR_ERR(flow)); 293 + err = flow ? PTR_ERR(flow) : -EFAULT; 294 + goto out_put_sock; 295 + } 296 + 297 + /* Create qp_flow */ 298 + qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC); 299 + if (IS_ERR_OR_NULL(qp_flow)) { 300 + err = qp_flow ? PTR_ERR(qp_flow) : -ENOMEM; 301 + goto out_dealloc_flow; 302 + } 303 + qp_flow->flow = flow; 304 + qp_flow->trans_type = trans_type; 305 + qp_flow->udp.sock = sock; 306 + qp_flow->qp_grp = qp_grp; 307 + return qp_flow; 308 + 309 + out_dealloc_flow: 310 + usnic_fwd_dealloc_flow(flow); 311 + out_put_sock: 312 + usnic_transport_put_socket(sock); 313 + return ERR_PTR(err); 314 + } 315 + 316 + static void release_udp_flow(struct usnic_ib_qp_grp_flow *qp_flow) 317 + { 318 + usnic_fwd_dealloc_flow(qp_flow->flow); 319 + usnic_transport_put_socket(qp_flow->udp.sock); 320 + kfree(qp_flow); 321 + } 322 + 323 + static struct usnic_ib_qp_grp_flow* 324 + create_and_add_flow(struct usnic_ib_qp_grp *qp_grp, 325 + struct usnic_transport_spec *trans_spec) 326 + { 327 + struct usnic_ib_qp_grp_flow *qp_flow; 328 + enum usnic_transport_type trans_type; 329 + 330 + trans_type = trans_spec->trans_type; 331 + switch (trans_type) { 332 + case USNIC_TRANSPORT_ROCE_CUSTOM: 333 + qp_flow = create_roce_custom_flow(qp_grp, trans_spec); 334 + break; 335 + case USNIC_TRANSPORT_IPV4_UDP: 336 + qp_flow = create_udp_flow(qp_grp, trans_spec); 337 + break; 338 + default: 339 + usnic_err("Unsupported transport %u\n", 340 + trans_spec->trans_type); 341 + return ERR_PTR(-EINVAL); 342 + } 343 + 344 + if (!IS_ERR_OR_NULL(qp_flow)) { 345 + list_add_tail(&qp_flow->link, &qp_grp->flows_lst); 346 + usnic_debugfs_flow_add(qp_flow); 347 + } 348 + 349 + 350 + return qp_flow; 351 + } 352 + 353 + static void release_and_remove_flow(struct usnic_ib_qp_grp_flow *qp_flow) 354 + { 355 + usnic_debugfs_flow_remove(qp_flow); 356 + list_del(&qp_flow->link); 357 + 358 + switch (qp_flow->trans_type) { 359 + case USNIC_TRANSPORT_ROCE_CUSTOM: 360 + release_roce_custom_flow(qp_flow); 361 + break; 362 + case USNIC_TRANSPORT_IPV4_UDP: 363 + release_udp_flow(qp_flow); 364 + break; 365 + default: 366 + WARN(1, "Unsupported transport %u\n", 367 + qp_flow->trans_type); 368 + break; 369 + } 370 + } 371 + 372 + static void release_and_remove_all_flows(struct usnic_ib_qp_grp *qp_grp) 373 + { 374 + struct usnic_ib_qp_grp_flow *qp_flow, *tmp; 375 + list_for_each_entry_safe(qp_flow, tmp, &qp_grp->flows_lst, link) 376 + release_and_remove_flow(qp_flow); 377 + } 378 + 379 + int usnic_ib_qp_grp_modify(struct usnic_ib_qp_grp *qp_grp, 380 + enum ib_qp_state new_state, 381 + void *data) 382 + { 383 + int status = 0; 384 + int vnic_idx; 385 + struct ib_event ib_event; 386 + enum ib_qp_state old_state; 387 + struct usnic_transport_spec *trans_spec; 388 + struct usnic_ib_qp_grp_flow *qp_flow; 389 + 390 + old_state = qp_grp->state; 391 + vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic); 392 + trans_spec = (struct usnic_transport_spec *) data; 393 + 394 + spin_lock(&qp_grp->lock); 395 + switch (new_state) { 396 + case IB_QPS_RESET: 397 + switch (old_state) { 398 + case IB_QPS_RESET: 399 + /* NO-OP */ 400 + break; 401 + case IB_QPS_INIT: 402 + release_and_remove_all_flows(qp_grp); 403 + status = 0; 404 + break; 405 + case IB_QPS_RTR: 406 + case IB_QPS_RTS: 407 + case IB_QPS_ERR: 408 + status = disable_qp_grp(qp_grp); 409 + release_and_remove_all_flows(qp_grp); 410 + break; 411 + default: 412 + status = -EINVAL; 413 + } 414 + break; 415 + case IB_QPS_INIT: 416 + switch (old_state) { 417 + case IB_QPS_RESET: 418 + if (trans_spec) { 419 + qp_flow = create_and_add_flow(qp_grp, 420 + trans_spec); 421 + if (IS_ERR_OR_NULL(qp_flow)) { 422 + status = qp_flow ? PTR_ERR(qp_flow) : -EFAULT; 423 + break; 424 + } 425 + } else { 426 + /* 427 + * Optional to specify filters. 428 + */ 429 + status = 0; 430 + } 431 + break; 432 + case IB_QPS_INIT: 433 + if (trans_spec) { 434 + qp_flow = create_and_add_flow(qp_grp, 435 + trans_spec); 436 + if (IS_ERR_OR_NULL(qp_flow)) { 437 + status = qp_flow ? PTR_ERR(qp_flow) : -EFAULT; 438 + break; 439 + } 440 + } else { 441 + /* 442 + * Doesn't make sense to go into INIT state 443 + * from INIT state w/o adding filters. 444 + */ 445 + status = -EINVAL; 446 + } 447 + break; 448 + case IB_QPS_RTR: 449 + status = disable_qp_grp(qp_grp); 450 + break; 451 + case IB_QPS_RTS: 452 + status = disable_qp_grp(qp_grp); 453 + break; 454 + default: 455 + status = -EINVAL; 456 + } 457 + break; 458 + case IB_QPS_RTR: 459 + switch (old_state) { 460 + case IB_QPS_INIT: 461 + status = enable_qp_grp(qp_grp); 462 + break; 463 + default: 464 + status = -EINVAL; 465 + } 466 + break; 467 + case IB_QPS_RTS: 468 + switch (old_state) { 469 + case IB_QPS_RTR: 470 + /* NO-OP FOR NOW */ 471 + break; 472 + default: 473 + status = -EINVAL; 474 + } 475 + break; 476 + case IB_QPS_ERR: 477 + ib_event.device = &qp_grp->vf->pf->ib_dev; 478 + ib_event.element.qp = &qp_grp->ibqp; 479 + ib_event.event = IB_EVENT_QP_FATAL; 480 + 481 + switch (old_state) { 482 + case IB_QPS_RESET: 483 + qp_grp->ibqp.event_handler(&ib_event, 484 + qp_grp->ibqp.qp_context); 485 + break; 486 + case IB_QPS_INIT: 487 + release_and_remove_all_flows(qp_grp); 488 + qp_grp->ibqp.event_handler(&ib_event, 489 + qp_grp->ibqp.qp_context); 490 + break; 491 + case IB_QPS_RTR: 492 + case IB_QPS_RTS: 493 + status = disable_qp_grp(qp_grp); 494 + release_and_remove_all_flows(qp_grp); 495 + qp_grp->ibqp.event_handler(&ib_event, 496 + qp_grp->ibqp.qp_context); 497 + break; 498 + default: 499 + status = -EINVAL; 500 + } 501 + break; 502 + default: 503 + status = -EINVAL; 504 + } 505 + spin_unlock(&qp_grp->lock); 506 + 507 + if (!status) { 508 + qp_grp->state = new_state; 509 + usnic_info("Transistioned %u from %s to %s", 510 + qp_grp->grp_id, 511 + usnic_ib_qp_grp_state_to_string(old_state), 512 + usnic_ib_qp_grp_state_to_string(new_state)); 513 + } else { 514 + usnic_err("Failed to transistion %u from %s to %s", 515 + qp_grp->grp_id, 516 + usnic_ib_qp_grp_state_to_string(old_state), 517 + usnic_ib_qp_grp_state_to_string(new_state)); 518 + } 519 + 520 + return status; 521 + } 522 + 523 + static struct usnic_vnic_res_chunk** 524 + alloc_res_chunk_list(struct usnic_vnic *vnic, 525 + struct usnic_vnic_res_spec *res_spec, void *owner_obj) 526 + { 527 + enum usnic_vnic_res_type res_type; 528 + struct usnic_vnic_res_chunk **res_chunk_list; 529 + int err, i, res_cnt, res_lst_sz; 530 + 531 + for (res_lst_sz = 0; 532 + res_spec->resources[res_lst_sz].type != USNIC_VNIC_RES_TYPE_EOL; 533 + res_lst_sz++) { 534 + /* Do Nothing */ 535 + } 536 + 537 + res_chunk_list = kzalloc(sizeof(*res_chunk_list)*(res_lst_sz+1), 538 + GFP_ATOMIC); 539 + if (!res_chunk_list) 540 + return ERR_PTR(-ENOMEM); 541 + 542 + for (i = 0; res_spec->resources[i].type != USNIC_VNIC_RES_TYPE_EOL; 543 + i++) { 544 + res_type = res_spec->resources[i].type; 545 + res_cnt = res_spec->resources[i].cnt; 546 + 547 + res_chunk_list[i] = usnic_vnic_get_resources(vnic, res_type, 548 + res_cnt, owner_obj); 549 + if (IS_ERR_OR_NULL(res_chunk_list[i])) { 550 + err = res_chunk_list[i] ? 551 + PTR_ERR(res_chunk_list[i]) : -ENOMEM; 552 + usnic_err("Failed to get %s from %s with err %d\n", 553 + usnic_vnic_res_type_to_str(res_type), 554 + usnic_vnic_pci_name(vnic), 555 + err); 556 + goto out_free_res; 557 + } 558 + } 559 + 560 + return res_chunk_list; 561 + 562 + out_free_res: 563 + for (i--; i > 0; i--) 564 + usnic_vnic_put_resources(res_chunk_list[i]); 565 + kfree(res_chunk_list); 566 + return ERR_PTR(err); 567 + } 568 + 569 + static void free_qp_grp_res(struct usnic_vnic_res_chunk **res_chunk_list) 570 + { 571 + int i; 572 + for (i = 0; res_chunk_list[i]; i++) 573 + usnic_vnic_put_resources(res_chunk_list[i]); 574 + kfree(res_chunk_list); 575 + } 576 + 577 + static int qp_grp_and_vf_bind(struct usnic_ib_vf *vf, 578 + struct usnic_ib_pd *pd, 579 + struct usnic_ib_qp_grp *qp_grp) 580 + { 581 + int err; 582 + struct pci_dev *pdev; 583 + 584 + lockdep_assert_held(&vf->lock); 585 + 586 + pdev = usnic_vnic_get_pdev(vf->vnic); 587 + if (vf->qp_grp_ref_cnt == 0) { 588 + err = usnic_uiom_attach_dev_to_pd(pd->umem_pd, &pdev->dev); 589 + if (err) { 590 + usnic_err("Failed to attach %s to domain\n", 591 + pci_name(pdev)); 592 + return err; 593 + } 594 + vf->pd = pd; 595 + } 596 + vf->qp_grp_ref_cnt++; 597 + 598 + WARN_ON(vf->pd != pd); 599 + qp_grp->vf = vf; 600 + 601 + return 0; 602 + } 603 + 604 + static void qp_grp_and_vf_unbind(struct usnic_ib_qp_grp *qp_grp) 605 + { 606 + struct pci_dev *pdev; 607 + struct usnic_ib_pd *pd; 608 + 609 + lockdep_assert_held(&qp_grp->vf->lock); 610 + 611 + pd = qp_grp->vf->pd; 612 + pdev = usnic_vnic_get_pdev(qp_grp->vf->vnic); 613 + if (--qp_grp->vf->qp_grp_ref_cnt == 0) { 614 + qp_grp->vf->pd = NULL; 615 + usnic_uiom_detach_dev_from_pd(pd->umem_pd, &pdev->dev); 616 + } 617 + qp_grp->vf = NULL; 618 + } 619 + 620 + static void log_spec(struct usnic_vnic_res_spec *res_spec) 621 + { 622 + char buf[512]; 623 + usnic_vnic_spec_dump(buf, sizeof(buf), res_spec); 624 + usnic_dbg("%s\n", buf); 625 + } 626 + 627 + static int qp_grp_id_from_flow(struct usnic_ib_qp_grp_flow *qp_flow, 628 + uint32_t *id) 629 + { 630 + enum usnic_transport_type trans_type = qp_flow->trans_type; 631 + int err; 632 + 633 + switch (trans_type) { 634 + case USNIC_TRANSPORT_ROCE_CUSTOM: 635 + *id = qp_flow->usnic_roce.port_num; 636 + break; 637 + case USNIC_TRANSPORT_IPV4_UDP: 638 + err = usnic_transport_sock_get_addr(qp_flow->udp.sock, 639 + NULL, NULL, 640 + (uint16_t *) id); 641 + if (err) 642 + return err; 643 + break; 644 + default: 645 + usnic_err("Unsupported transport %u\n", trans_type); 646 + return -EINVAL; 647 + } 648 + 649 + return 0; 650 + } 651 + 652 + struct usnic_ib_qp_grp * 653 + usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, struct usnic_ib_vf *vf, 654 + struct usnic_ib_pd *pd, 655 + struct usnic_vnic_res_spec *res_spec, 656 + struct usnic_transport_spec *transport_spec) 657 + { 658 + struct usnic_ib_qp_grp *qp_grp; 659 + int err; 660 + enum usnic_transport_type transport = transport_spec->trans_type; 661 + struct usnic_ib_qp_grp_flow *qp_flow; 662 + 663 + lockdep_assert_held(&vf->lock); 664 + 665 + err = usnic_vnic_res_spec_satisfied(&min_transport_spec[transport], 666 + res_spec); 667 + if (err) { 668 + usnic_err("Spec does not meet miniumum req for transport %d\n", 669 + transport); 670 + log_spec(res_spec); 671 + return ERR_PTR(err); 672 + } 673 + 674 + qp_grp = kzalloc(sizeof(*qp_grp), GFP_ATOMIC); 675 + if (!qp_grp) { 676 + usnic_err("Unable to alloc qp_grp - Out of memory\n"); 677 + return NULL; 678 + } 679 + 680 + qp_grp->res_chunk_list = alloc_res_chunk_list(vf->vnic, res_spec, 681 + qp_grp); 682 + if (IS_ERR_OR_NULL(qp_grp->res_chunk_list)) { 683 + err = qp_grp->res_chunk_list ? 684 + PTR_ERR(qp_grp->res_chunk_list) : -ENOMEM; 685 + usnic_err("Unable to alloc res for %d with err %d\n", 686 + qp_grp->grp_id, err); 687 + goto out_free_qp_grp; 688 + } 689 + 690 + err = qp_grp_and_vf_bind(vf, pd, qp_grp); 691 + if (err) 692 + goto out_free_res; 693 + 694 + INIT_LIST_HEAD(&qp_grp->flows_lst); 695 + spin_lock_init(&qp_grp->lock); 696 + qp_grp->ufdev = ufdev; 697 + qp_grp->state = IB_QPS_RESET; 698 + qp_grp->owner_pid = current->pid; 699 + 700 + qp_flow = create_and_add_flow(qp_grp, transport_spec); 701 + if (IS_ERR_OR_NULL(qp_flow)) { 702 + usnic_err("Unable to create and add flow with err %ld\n", 703 + PTR_ERR(qp_flow)); 704 + err = qp_flow ? PTR_ERR(qp_flow) : -EFAULT; 705 + goto out_qp_grp_vf_unbind; 706 + } 707 + 708 + err = qp_grp_id_from_flow(qp_flow, &qp_grp->grp_id); 709 + if (err) 710 + goto out_release_flow; 711 + qp_grp->ibqp.qp_num = qp_grp->grp_id; 712 + 713 + usnic_ib_sysfs_qpn_add(qp_grp); 714 + 715 + return qp_grp; 716 + 717 + out_release_flow: 718 + release_and_remove_flow(qp_flow); 719 + out_qp_grp_vf_unbind: 720 + qp_grp_and_vf_unbind(qp_grp); 721 + out_free_res: 722 + free_qp_grp_res(qp_grp->res_chunk_list); 723 + out_free_qp_grp: 724 + kfree(qp_grp); 725 + 726 + return ERR_PTR(err); 727 + } 728 + 729 + void usnic_ib_qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp) 730 + { 731 + 732 + WARN_ON(qp_grp->state != IB_QPS_RESET); 733 + lockdep_assert_held(&qp_grp->vf->lock); 734 + 735 + release_and_remove_all_flows(qp_grp); 736 + usnic_ib_sysfs_qpn_remove(qp_grp); 737 + qp_grp_and_vf_unbind(qp_grp); 738 + free_qp_grp_res(qp_grp->res_chunk_list); 739 + kfree(qp_grp); 740 + } 741 + 742 + struct usnic_vnic_res_chunk* 743 + usnic_ib_qp_grp_get_chunk(struct usnic_ib_qp_grp *qp_grp, 744 + enum usnic_vnic_res_type res_type) 745 + { 746 + int i; 747 + 748 + for (i = 0; qp_grp->res_chunk_list[i]; i++) { 749 + if (qp_grp->res_chunk_list[i]->type == res_type) 750 + return qp_grp->res_chunk_list[i]; 751 + } 752 + 753 + return ERR_PTR(-EINVAL); 754 + }
+117
drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h
··· 1 + /* 2 + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. 3 + * 4 + * This program is free software; you may redistribute it and/or modify 5 + * it under the terms of the GNU General Public License as published by 6 + * the Free Software Foundation; version 2 of the License. 7 + * 8 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 9 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 10 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 11 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 12 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 13 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 15 + * SOFTWARE. 16 + * 17 + */ 18 + 19 + #ifndef USNIC_IB_QP_GRP_H_ 20 + #define USNIC_IB_QP_GRP_H_ 21 + 22 + #include <linux/debugfs.h> 23 + #include <rdma/ib_verbs.h> 24 + 25 + #include "usnic_ib.h" 26 + #include "usnic_abi.h" 27 + #include "usnic_fwd.h" 28 + #include "usnic_vnic.h" 29 + 30 + /* 31 + * The qp group struct represents all the hw resources needed to present a ib_qp 32 + */ 33 + struct usnic_ib_qp_grp { 34 + struct ib_qp ibqp; 35 + enum ib_qp_state state; 36 + int grp_id; 37 + 38 + struct usnic_fwd_dev *ufdev; 39 + struct usnic_ib_ucontext *ctx; 40 + struct list_head flows_lst; 41 + 42 + struct usnic_vnic_res_chunk **res_chunk_list; 43 + 44 + pid_t owner_pid; 45 + struct usnic_ib_vf *vf; 46 + struct list_head link; 47 + 48 + spinlock_t lock; 49 + 50 + struct kobject kobj; 51 + }; 52 + 53 + struct usnic_ib_qp_grp_flow { 54 + struct usnic_fwd_flow *flow; 55 + enum usnic_transport_type trans_type; 56 + union { 57 + struct { 58 + uint16_t port_num; 59 + } usnic_roce; 60 + struct { 61 + struct socket *sock; 62 + } udp; 63 + }; 64 + struct usnic_ib_qp_grp *qp_grp; 65 + struct list_head link; 66 + 67 + /* Debug FS */ 68 + struct dentry *dbgfs_dentry; 69 + char dentry_name[32]; 70 + }; 71 + 72 + static const struct 73 + usnic_vnic_res_spec min_transport_spec[USNIC_TRANSPORT_MAX] = { 74 + { /*USNIC_TRANSPORT_UNKNOWN*/ 75 + .resources = { 76 + {.type = USNIC_VNIC_RES_TYPE_EOL, .cnt = 0,}, 77 + }, 78 + }, 79 + { /*USNIC_TRANSPORT_ROCE_CUSTOM*/ 80 + .resources = { 81 + {.type = USNIC_VNIC_RES_TYPE_WQ, .cnt = 1,}, 82 + {.type = USNIC_VNIC_RES_TYPE_RQ, .cnt = 1,}, 83 + {.type = USNIC_VNIC_RES_TYPE_CQ, .cnt = 1,}, 84 + {.type = USNIC_VNIC_RES_TYPE_EOL, .cnt = 0,}, 85 + }, 86 + }, 87 + { /*USNIC_TRANSPORT_IPV4_UDP*/ 88 + .resources = { 89 + {.type = USNIC_VNIC_RES_TYPE_WQ, .cnt = 1,}, 90 + {.type = USNIC_VNIC_RES_TYPE_RQ, .cnt = 1,}, 91 + {.type = USNIC_VNIC_RES_TYPE_CQ, .cnt = 1,}, 92 + {.type = USNIC_VNIC_RES_TYPE_EOL, .cnt = 0,}, 93 + }, 94 + }, 95 + }; 96 + 97 + const char *usnic_ib_qp_grp_state_to_string(enum ib_qp_state state); 98 + int usnic_ib_qp_grp_dump_hdr(char *buf, int buf_sz); 99 + int usnic_ib_qp_grp_dump_rows(void *obj, char *buf, int buf_sz); 100 + struct usnic_ib_qp_grp * 101 + usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, struct usnic_ib_vf *vf, 102 + struct usnic_ib_pd *pd, 103 + struct usnic_vnic_res_spec *res_spec, 104 + struct usnic_transport_spec *trans_spec); 105 + void usnic_ib_qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp); 106 + int usnic_ib_qp_grp_modify(struct usnic_ib_qp_grp *qp_grp, 107 + enum ib_qp_state new_state, 108 + void *data); 109 + struct usnic_vnic_res_chunk 110 + *usnic_ib_qp_grp_get_chunk(struct usnic_ib_qp_grp *qp_grp, 111 + enum usnic_vnic_res_type type); 112 + static inline 113 + struct usnic_ib_qp_grp *to_uqp_grp(struct ib_qp *ibqp) 114 + { 115 + return container_of(ibqp, struct usnic_ib_qp_grp, ibqp); 116 + } 117 + #endif /* USNIC_IB_QP_GRP_H_ */
+341
drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
··· 1 + /* 2 + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. 3 + * 4 + * This program is free software; you may redistribute it and/or modify 5 + * it under the terms of the GNU General Public License as published by 6 + * the Free Software Foundation; version 2 of the License. 7 + * 8 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 9 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 10 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 11 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 12 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 13 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 15 + * SOFTWARE. 16 + * 17 + */ 18 + 19 + #include <linux/module.h> 20 + #include <linux/init.h> 21 + #include <linux/errno.h> 22 + 23 + #include <rdma/ib_user_verbs.h> 24 + #include <rdma/ib_addr.h> 25 + 26 + #include "usnic_common_util.h" 27 + #include "usnic_ib.h" 28 + #include "usnic_ib_qp_grp.h" 29 + #include "usnic_vnic.h" 30 + #include "usnic_ib_verbs.h" 31 + #include "usnic_log.h" 32 + 33 + static ssize_t usnic_ib_show_fw_ver(struct device *device, 34 + struct device_attribute *attr, 35 + char *buf) 36 + { 37 + struct usnic_ib_dev *us_ibdev = 38 + container_of(device, struct usnic_ib_dev, ib_dev.dev); 39 + struct ethtool_drvinfo info; 40 + 41 + mutex_lock(&us_ibdev->usdev_lock); 42 + us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info); 43 + mutex_unlock(&us_ibdev->usdev_lock); 44 + 45 + return scnprintf(buf, PAGE_SIZE, "%s\n", info.fw_version); 46 + } 47 + 48 + static ssize_t usnic_ib_show_board(struct device *device, 49 + struct device_attribute *attr, 50 + char *buf) 51 + { 52 + struct usnic_ib_dev *us_ibdev = 53 + container_of(device, struct usnic_ib_dev, ib_dev.dev); 54 + unsigned short subsystem_device_id; 55 + 56 + mutex_lock(&us_ibdev->usdev_lock); 57 + subsystem_device_id = us_ibdev->pdev->subsystem_device; 58 + mutex_unlock(&us_ibdev->usdev_lock); 59 + 60 + return scnprintf(buf, PAGE_SIZE, "%hu\n", subsystem_device_id); 61 + } 62 + 63 + /* 64 + * Report the configuration for this PF 65 + */ 66 + static ssize_t 67 + usnic_ib_show_config(struct device *device, struct device_attribute *attr, 68 + char *buf) 69 + { 70 + struct usnic_ib_dev *us_ibdev; 71 + char *ptr; 72 + unsigned left; 73 + unsigned n; 74 + enum usnic_vnic_res_type res_type; 75 + 76 + us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev); 77 + 78 + /* Buffer space limit is 1 page */ 79 + ptr = buf; 80 + left = PAGE_SIZE; 81 + 82 + mutex_lock(&us_ibdev->usdev_lock); 83 + if (atomic_read(&us_ibdev->vf_cnt.refcount) > 0) { 84 + char *busname; 85 + 86 + /* 87 + * bus name seems to come with annoying prefix. 88 + * Remove it if it is predictable 89 + */ 90 + busname = us_ibdev->pdev->bus->name; 91 + if (strncmp(busname, "PCI Bus ", 8) == 0) 92 + busname += 8; 93 + 94 + n = scnprintf(ptr, left, 95 + "%s: %s:%d.%d, %s, %pM, %u VFs\n Per VF:", 96 + us_ibdev->ib_dev.name, 97 + busname, 98 + PCI_SLOT(us_ibdev->pdev->devfn), 99 + PCI_FUNC(us_ibdev->pdev->devfn), 100 + netdev_name(us_ibdev->netdev), 101 + us_ibdev->ufdev->mac, 102 + atomic_read(&us_ibdev->vf_cnt.refcount)); 103 + UPDATE_PTR_LEFT(n, ptr, left); 104 + 105 + for (res_type = USNIC_VNIC_RES_TYPE_EOL; 106 + res_type < USNIC_VNIC_RES_TYPE_MAX; 107 + res_type++) { 108 + if (us_ibdev->vf_res_cnt[res_type] == 0) 109 + continue; 110 + n = scnprintf(ptr, left, " %d %s%s", 111 + us_ibdev->vf_res_cnt[res_type], 112 + usnic_vnic_res_type_to_str(res_type), 113 + (res_type < (USNIC_VNIC_RES_TYPE_MAX - 1)) ? 114 + "," : ""); 115 + UPDATE_PTR_LEFT(n, ptr, left); 116 + } 117 + n = scnprintf(ptr, left, "\n"); 118 + UPDATE_PTR_LEFT(n, ptr, left); 119 + } else { 120 + n = scnprintf(ptr, left, "%s: no VFs\n", 121 + us_ibdev->ib_dev.name); 122 + UPDATE_PTR_LEFT(n, ptr, left); 123 + } 124 + mutex_unlock(&us_ibdev->usdev_lock); 125 + 126 + return ptr - buf; 127 + } 128 + 129 + static ssize_t 130 + usnic_ib_show_iface(struct device *device, struct device_attribute *attr, 131 + char *buf) 132 + { 133 + struct usnic_ib_dev *us_ibdev; 134 + 135 + us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev); 136 + 137 + return scnprintf(buf, PAGE_SIZE, "%s\n", 138 + netdev_name(us_ibdev->netdev)); 139 + } 140 + 141 + static ssize_t 142 + usnic_ib_show_max_vf(struct device *device, struct device_attribute *attr, 143 + char *buf) 144 + { 145 + struct usnic_ib_dev *us_ibdev; 146 + 147 + us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev); 148 + 149 + return scnprintf(buf, PAGE_SIZE, "%u\n", 150 + atomic_read(&us_ibdev->vf_cnt.refcount)); 151 + } 152 + 153 + static ssize_t 154 + usnic_ib_show_qp_per_vf(struct device *device, struct device_attribute *attr, 155 + char *buf) 156 + { 157 + struct usnic_ib_dev *us_ibdev; 158 + int qp_per_vf; 159 + 160 + us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev); 161 + qp_per_vf = max(us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_WQ], 162 + us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_RQ]); 163 + 164 + return scnprintf(buf, PAGE_SIZE, 165 + "%d\n", qp_per_vf); 166 + } 167 + 168 + static ssize_t 169 + usnic_ib_show_cq_per_vf(struct device *device, struct device_attribute *attr, 170 + char *buf) 171 + { 172 + struct usnic_ib_dev *us_ibdev; 173 + 174 + us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev); 175 + 176 + return scnprintf(buf, PAGE_SIZE, "%d\n", 177 + us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]); 178 + } 179 + 180 + static DEVICE_ATTR(fw_ver, S_IRUGO, usnic_ib_show_fw_ver, NULL); 181 + static DEVICE_ATTR(board_id, S_IRUGO, usnic_ib_show_board, NULL); 182 + static DEVICE_ATTR(config, S_IRUGO, usnic_ib_show_config, NULL); 183 + static DEVICE_ATTR(iface, S_IRUGO, usnic_ib_show_iface, NULL); 184 + static DEVICE_ATTR(max_vf, S_IRUGO, usnic_ib_show_max_vf, NULL); 185 + static DEVICE_ATTR(qp_per_vf, S_IRUGO, usnic_ib_show_qp_per_vf, NULL); 186 + static DEVICE_ATTR(cq_per_vf, S_IRUGO, usnic_ib_show_cq_per_vf, NULL); 187 + 188 + static struct device_attribute *usnic_class_attributes[] = { 189 + &dev_attr_fw_ver, 190 + &dev_attr_board_id, 191 + &dev_attr_config, 192 + &dev_attr_iface, 193 + &dev_attr_max_vf, 194 + &dev_attr_qp_per_vf, 195 + &dev_attr_cq_per_vf, 196 + }; 197 + 198 + struct qpn_attribute { 199 + struct attribute attr; 200 + ssize_t (*show)(struct usnic_ib_qp_grp *, char *buf); 201 + }; 202 + 203 + /* 204 + * Definitions for supporting QPN entries in sysfs 205 + */ 206 + static ssize_t 207 + usnic_ib_qpn_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) 208 + { 209 + struct usnic_ib_qp_grp *qp_grp; 210 + struct qpn_attribute *qpn_attr; 211 + 212 + qp_grp = container_of(kobj, struct usnic_ib_qp_grp, kobj); 213 + qpn_attr = container_of(attr, struct qpn_attribute, attr); 214 + 215 + return qpn_attr->show(qp_grp, buf); 216 + } 217 + 218 + static const struct sysfs_ops usnic_ib_qpn_sysfs_ops = { 219 + .show = usnic_ib_qpn_attr_show 220 + }; 221 + 222 + #define QPN_ATTR_RO(NAME) \ 223 + struct qpn_attribute qpn_attr_##NAME = __ATTR_RO(NAME) 224 + 225 + static ssize_t context_show(struct usnic_ib_qp_grp *qp_grp, char *buf) 226 + { 227 + return scnprintf(buf, PAGE_SIZE, "0x%p\n", qp_grp->ctx); 228 + } 229 + 230 + static ssize_t summary_show(struct usnic_ib_qp_grp *qp_grp, char *buf) 231 + { 232 + int i, j, n; 233 + int left; 234 + char *ptr; 235 + struct usnic_vnic_res_chunk *res_chunk; 236 + struct usnic_vnic_res *vnic_res; 237 + 238 + left = PAGE_SIZE; 239 + ptr = buf; 240 + 241 + n = scnprintf(ptr, left, 242 + "QPN: %d State: (%s) PID: %u VF Idx: %hu ", 243 + qp_grp->ibqp.qp_num, 244 + usnic_ib_qp_grp_state_to_string(qp_grp->state), 245 + qp_grp->owner_pid, 246 + usnic_vnic_get_index(qp_grp->vf->vnic)); 247 + UPDATE_PTR_LEFT(n, ptr, left); 248 + 249 + for (i = 0; qp_grp->res_chunk_list[i]; i++) { 250 + res_chunk = qp_grp->res_chunk_list[i]; 251 + for (j = 0; j < res_chunk->cnt; j++) { 252 + vnic_res = res_chunk->res[j]; 253 + n = scnprintf(ptr, left, "%s[%d] ", 254 + usnic_vnic_res_type_to_str(vnic_res->type), 255 + vnic_res->vnic_idx); 256 + UPDATE_PTR_LEFT(n, ptr, left); 257 + } 258 + } 259 + 260 + n = scnprintf(ptr, left, "\n"); 261 + UPDATE_PTR_LEFT(n, ptr, left); 262 + 263 + return ptr - buf; 264 + } 265 + 266 + static QPN_ATTR_RO(context); 267 + static QPN_ATTR_RO(summary); 268 + 269 + static struct attribute *usnic_ib_qpn_default_attrs[] = { 270 + &qpn_attr_context.attr, 271 + &qpn_attr_summary.attr, 272 + NULL 273 + }; 274 + 275 + static struct kobj_type usnic_ib_qpn_type = { 276 + .sysfs_ops = &usnic_ib_qpn_sysfs_ops, 277 + .default_attrs = usnic_ib_qpn_default_attrs 278 + }; 279 + 280 + int usnic_ib_sysfs_register_usdev(struct usnic_ib_dev *us_ibdev) 281 + { 282 + int i; 283 + int err; 284 + for (i = 0; i < ARRAY_SIZE(usnic_class_attributes); ++i) { 285 + err = device_create_file(&us_ibdev->ib_dev.dev, 286 + usnic_class_attributes[i]); 287 + if (err) { 288 + usnic_err("Failed to create device file %d for %s eith err %d", 289 + i, us_ibdev->ib_dev.name, err); 290 + return -EINVAL; 291 + } 292 + } 293 + 294 + /* create kernel object for looking at individual QPs */ 295 + kobject_get(&us_ibdev->ib_dev.dev.kobj); 296 + us_ibdev->qpn_kobj = kobject_create_and_add("qpn", 297 + &us_ibdev->ib_dev.dev.kobj); 298 + if (us_ibdev->qpn_kobj == NULL) { 299 + kobject_put(&us_ibdev->ib_dev.dev.kobj); 300 + return -ENOMEM; 301 + } 302 + 303 + return 0; 304 + } 305 + 306 + void usnic_ib_sysfs_unregister_usdev(struct usnic_ib_dev *us_ibdev) 307 + { 308 + int i; 309 + for (i = 0; i < ARRAY_SIZE(usnic_class_attributes); ++i) { 310 + device_remove_file(&us_ibdev->ib_dev.dev, 311 + usnic_class_attributes[i]); 312 + } 313 + 314 + kobject_put(us_ibdev->qpn_kobj); 315 + } 316 + 317 + void usnic_ib_sysfs_qpn_add(struct usnic_ib_qp_grp *qp_grp) 318 + { 319 + struct usnic_ib_dev *us_ibdev; 320 + int err; 321 + 322 + us_ibdev = qp_grp->vf->pf; 323 + 324 + err = kobject_init_and_add(&qp_grp->kobj, &usnic_ib_qpn_type, 325 + kobject_get(us_ibdev->qpn_kobj), 326 + "%d", qp_grp->grp_id); 327 + if (err) { 328 + kobject_put(us_ibdev->qpn_kobj); 329 + return; 330 + } 331 + } 332 + 333 + void usnic_ib_sysfs_qpn_remove(struct usnic_ib_qp_grp *qp_grp) 334 + { 335 + struct usnic_ib_dev *us_ibdev; 336 + 337 + us_ibdev = qp_grp->vf->pf; 338 + 339 + kobject_put(&qp_grp->kobj); 340 + kobject_put(us_ibdev->qpn_kobj); 341 + }
+29
drivers/infiniband/hw/usnic/usnic_ib_sysfs.h
··· 1 + /* 2 + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. 3 + * 4 + * This program is free software; you may redistribute it and/or modify 5 + * it under the terms of the GNU General Public License as published by 6 + * the Free Software Foundation; version 2 of the License. 7 + * 8 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 9 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 10 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 11 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 12 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 13 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 15 + * SOFTWARE. 16 + * 17 + */ 18 + 19 + #ifndef USNIC_IB_SYSFS_H_ 20 + #define USNIC_IB_SYSFS_H_ 21 + 22 + #include "usnic_ib.h" 23 + 24 + int usnic_ib_sysfs_register_usdev(struct usnic_ib_dev *us_ibdev); 25 + void usnic_ib_sysfs_unregister_usdev(struct usnic_ib_dev *us_ibdev); 26 + void usnic_ib_sysfs_qpn_add(struct usnic_ib_qp_grp *qp_grp); 27 + void usnic_ib_sysfs_qpn_remove(struct usnic_ib_qp_grp *qp_grp); 28 + 29 + #endif /* !USNIC_IB_SYSFS_H_ */
+765
drivers/infiniband/hw/usnic/usnic_ib_verbs.c
··· 1 + /* 2 + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. 3 + * 4 + * This program is free software; you may redistribute it and/or modify 5 + * it under the terms of the GNU General Public License as published by 6 + * the Free Software Foundation; version 2 of the License. 7 + * 8 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 9 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 10 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 11 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 12 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 13 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 15 + * SOFTWARE. 16 + * 17 + */ 18 + #include <linux/module.h> 19 + #include <linux/init.h> 20 + #include <linux/slab.h> 21 + #include <linux/errno.h> 22 + 23 + #include <rdma/ib_user_verbs.h> 24 + #include <rdma/ib_addr.h> 25 + 26 + #include "usnic_abi.h" 27 + #include "usnic_ib.h" 28 + #include "usnic_common_util.h" 29 + #include "usnic_ib_qp_grp.h" 30 + #include "usnic_fwd.h" 31 + #include "usnic_log.h" 32 + #include "usnic_uiom.h" 33 + #include "usnic_transport.h" 34 + 35 + #define USNIC_DEFAULT_TRANSPORT USNIC_TRANSPORT_ROCE_CUSTOM 36 + 37 + static void usnic_ib_fw_string_to_u64(char *fw_ver_str, u64 *fw_ver) 38 + { 39 + *fw_ver = (u64) *fw_ver_str; 40 + } 41 + 42 + static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp, 43 + struct ib_udata *udata) 44 + { 45 + struct usnic_ib_dev *us_ibdev; 46 + struct usnic_ib_create_qp_resp resp; 47 + struct pci_dev *pdev; 48 + struct vnic_dev_bar *bar; 49 + struct usnic_vnic_res_chunk *chunk; 50 + struct usnic_ib_qp_grp_flow *default_flow; 51 + int i, err; 52 + 53 + memset(&resp, 0, sizeof(resp)); 54 + 55 + us_ibdev = qp_grp->vf->pf; 56 + pdev = usnic_vnic_get_pdev(qp_grp->vf->vnic); 57 + if (!pdev) { 58 + usnic_err("Failed to get pdev of qp_grp %d\n", 59 + qp_grp->grp_id); 60 + return -EFAULT; 61 + } 62 + 63 + bar = usnic_vnic_get_bar(qp_grp->vf->vnic, 0); 64 + if (!bar) { 65 + usnic_err("Failed to get bar0 of qp_grp %d vf %s", 66 + qp_grp->grp_id, pci_name(pdev)); 67 + return -EFAULT; 68 + } 69 + 70 + resp.vfid = usnic_vnic_get_index(qp_grp->vf->vnic); 71 + resp.bar_bus_addr = bar->bus_addr; 72 + resp.bar_len = bar->len; 73 + 74 + chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ); 75 + if (IS_ERR_OR_NULL(chunk)) { 76 + usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n", 77 + usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_RQ), 78 + qp_grp->grp_id, 79 + PTR_ERR(chunk)); 80 + return chunk ? PTR_ERR(chunk) : -ENOMEM; 81 + } 82 + 83 + WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_RQ); 84 + resp.rq_cnt = chunk->cnt; 85 + for (i = 0; i < chunk->cnt; i++) 86 + resp.rq_idx[i] = chunk->res[i]->vnic_idx; 87 + 88 + chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_WQ); 89 + if (IS_ERR_OR_NULL(chunk)) { 90 + usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n", 91 + usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_WQ), 92 + qp_grp->grp_id, 93 + PTR_ERR(chunk)); 94 + return chunk ? PTR_ERR(chunk) : -ENOMEM; 95 + } 96 + 97 + WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_WQ); 98 + resp.wq_cnt = chunk->cnt; 99 + for (i = 0; i < chunk->cnt; i++) 100 + resp.wq_idx[i] = chunk->res[i]->vnic_idx; 101 + 102 + chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_CQ); 103 + if (IS_ERR_OR_NULL(chunk)) { 104 + usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n", 105 + usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_CQ), 106 + qp_grp->grp_id, 107 + PTR_ERR(chunk)); 108 + return chunk ? PTR_ERR(chunk) : -ENOMEM; 109 + } 110 + 111 + WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_CQ); 112 + resp.cq_cnt = chunk->cnt; 113 + for (i = 0; i < chunk->cnt; i++) 114 + resp.cq_idx[i] = chunk->res[i]->vnic_idx; 115 + 116 + default_flow = list_first_entry(&qp_grp->flows_lst, 117 + struct usnic_ib_qp_grp_flow, link); 118 + resp.transport = default_flow->trans_type; 119 + 120 + err = ib_copy_to_udata(udata, &resp, sizeof(resp)); 121 + if (err) { 122 + usnic_err("Failed to copy udata for %s", us_ibdev->ib_dev.name); 123 + return err; 124 + } 125 + 126 + return 0; 127 + } 128 + 129 + static struct usnic_ib_qp_grp* 130 + find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev, 131 + struct usnic_ib_pd *pd, 132 + struct usnic_transport_spec *trans_spec, 133 + struct usnic_vnic_res_spec *res_spec) 134 + { 135 + struct usnic_ib_vf *vf; 136 + struct usnic_vnic *vnic; 137 + struct usnic_ib_qp_grp *qp_grp; 138 + struct device *dev, **dev_list; 139 + int i, found = 0; 140 + 141 + BUG_ON(!mutex_is_locked(&us_ibdev->usdev_lock)); 142 + 143 + if (list_empty(&us_ibdev->vf_dev_list)) { 144 + usnic_info("No vfs to allocate\n"); 145 + return NULL; 146 + } 147 + 148 + if (usnic_ib_share_vf) { 149 + /* Try to find resouces on a used vf which is in pd */ 150 + dev_list = usnic_uiom_get_dev_list(pd->umem_pd); 151 + for (i = 0; dev_list[i]; i++) { 152 + dev = dev_list[i]; 153 + vf = pci_get_drvdata(to_pci_dev(dev)); 154 + spin_lock(&vf->lock); 155 + vnic = vf->vnic; 156 + if (!usnic_vnic_check_room(vnic, res_spec)) { 157 + usnic_dbg("Found used vnic %s from %s\n", 158 + us_ibdev->ib_dev.name, 159 + pci_name(usnic_vnic_get_pdev( 160 + vnic))); 161 + found = 1; 162 + break; 163 + } 164 + spin_unlock(&vf->lock); 165 + 166 + } 167 + usnic_uiom_free_dev_list(dev_list); 168 + } 169 + 170 + if (!found) { 171 + /* Try to find resources on an unused vf */ 172 + list_for_each_entry(vf, &us_ibdev->vf_dev_list, link) { 173 + spin_lock(&vf->lock); 174 + vnic = vf->vnic; 175 + if (vf->qp_grp_ref_cnt == 0 && 176 + usnic_vnic_check_room(vnic, res_spec) == 0) { 177 + found = 1; 178 + break; 179 + } 180 + spin_unlock(&vf->lock); 181 + } 182 + } 183 + 184 + if (!found) { 185 + usnic_info("No free qp grp found on %s\n", 186 + us_ibdev->ib_dev.name); 187 + return ERR_PTR(-ENOMEM); 188 + } 189 + 190 + qp_grp = usnic_ib_qp_grp_create(us_ibdev->ufdev, vf, pd, res_spec, 191 + trans_spec); 192 + spin_unlock(&vf->lock); 193 + if (IS_ERR_OR_NULL(qp_grp)) { 194 + usnic_err("Failed to allocate qp_grp\n"); 195 + return ERR_PTR(qp_grp ? PTR_ERR(qp_grp) : -ENOMEM); 196 + } 197 + 198 + return qp_grp; 199 + } 200 + 201 + static void qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp) 202 + { 203 + struct usnic_ib_vf *vf = qp_grp->vf; 204 + 205 + WARN_ON(qp_grp->state != IB_QPS_RESET); 206 + 207 + spin_lock(&vf->lock); 208 + usnic_ib_qp_grp_destroy(qp_grp); 209 + spin_unlock(&vf->lock); 210 + } 211 + 212 + static void eth_speed_to_ib_speed(int speed, u8 *active_speed, 213 + u8 *active_width) 214 + { 215 + if (speed <= 10000) { 216 + *active_width = IB_WIDTH_1X; 217 + *active_speed = IB_SPEED_FDR10; 218 + } else if (speed <= 20000) { 219 + *active_width = IB_WIDTH_4X; 220 + *active_speed = IB_SPEED_DDR; 221 + } else if (speed <= 30000) { 222 + *active_width = IB_WIDTH_4X; 223 + *active_speed = IB_SPEED_QDR; 224 + } else if (speed <= 40000) { 225 + *active_width = IB_WIDTH_4X; 226 + *active_speed = IB_SPEED_FDR10; 227 + } else { 228 + *active_width = IB_WIDTH_4X; 229 + *active_speed = IB_SPEED_EDR; 230 + } 231 + } 232 + 233 + static int create_qp_validate_user_data(struct usnic_ib_create_qp_cmd cmd) 234 + { 235 + if (cmd.spec.trans_type <= USNIC_TRANSPORT_UNKNOWN || 236 + cmd.spec.trans_type >= USNIC_TRANSPORT_MAX) 237 + return -EINVAL; 238 + 239 + return 0; 240 + } 241 + 242 + /* Start of ib callback functions */ 243 + 244 + enum rdma_link_layer usnic_ib_port_link_layer(struct ib_device *device, 245 + u8 port_num) 246 + { 247 + return IB_LINK_LAYER_ETHERNET; 248 + } 249 + 250 + int usnic_ib_query_device(struct ib_device *ibdev, 251 + struct ib_device_attr *props) 252 + { 253 + struct usnic_ib_dev *us_ibdev = to_usdev(ibdev); 254 + union ib_gid gid; 255 + struct ethtool_drvinfo info; 256 + struct ethtool_cmd cmd; 257 + int qp_per_vf; 258 + 259 + usnic_dbg("\n"); 260 + mutex_lock(&us_ibdev->usdev_lock); 261 + us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info); 262 + us_ibdev->netdev->ethtool_ops->get_settings(us_ibdev->netdev, &cmd); 263 + memset(props, 0, sizeof(*props)); 264 + usnic_mac_ip_to_gid(us_ibdev->ufdev->mac, us_ibdev->ufdev->inaddr, 265 + &gid.raw[0]); 266 + memcpy(&props->sys_image_guid, &gid.global.interface_id, 267 + sizeof(gid.global.interface_id)); 268 + usnic_ib_fw_string_to_u64(&info.fw_version[0], &props->fw_ver); 269 + props->max_mr_size = USNIC_UIOM_MAX_MR_SIZE; 270 + props->page_size_cap = USNIC_UIOM_PAGE_SIZE; 271 + props->vendor_id = PCI_VENDOR_ID_CISCO; 272 + props->vendor_part_id = PCI_DEVICE_ID_CISCO_VIC_USPACE_NIC; 273 + props->hw_ver = us_ibdev->pdev->subsystem_device; 274 + qp_per_vf = max(us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_WQ], 275 + us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_RQ]); 276 + props->max_qp = qp_per_vf * 277 + atomic_read(&us_ibdev->vf_cnt.refcount); 278 + props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT | 279 + IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; 280 + props->max_cq = us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ] * 281 + atomic_read(&us_ibdev->vf_cnt.refcount); 282 + props->max_pd = USNIC_UIOM_MAX_PD_CNT; 283 + props->max_mr = USNIC_UIOM_MAX_MR_CNT; 284 + props->local_ca_ack_delay = 0; 285 + props->max_pkeys = 0; 286 + props->atomic_cap = IB_ATOMIC_NONE; 287 + props->masked_atomic_cap = props->atomic_cap; 288 + props->max_qp_rd_atom = 0; 289 + props->max_qp_init_rd_atom = 0; 290 + props->max_res_rd_atom = 0; 291 + props->max_srq = 0; 292 + props->max_srq_wr = 0; 293 + props->max_srq_sge = 0; 294 + props->max_fast_reg_page_list_len = 0; 295 + props->max_mcast_grp = 0; 296 + props->max_mcast_qp_attach = 0; 297 + props->max_total_mcast_qp_attach = 0; 298 + props->max_map_per_fmr = 0; 299 + /* Owned by Userspace 300 + * max_qp_wr, max_sge, max_sge_rd, max_cqe */ 301 + mutex_unlock(&us_ibdev->usdev_lock); 302 + 303 + return 0; 304 + } 305 + 306 + int usnic_ib_query_port(struct ib_device *ibdev, u8 port, 307 + struct ib_port_attr *props) 308 + { 309 + struct usnic_ib_dev *us_ibdev = to_usdev(ibdev); 310 + struct ethtool_cmd cmd; 311 + 312 + usnic_dbg("\n"); 313 + 314 + mutex_lock(&us_ibdev->usdev_lock); 315 + us_ibdev->netdev->ethtool_ops->get_settings(us_ibdev->netdev, &cmd); 316 + memset(props, 0, sizeof(*props)); 317 + 318 + props->lid = 0; 319 + props->lmc = 1; 320 + props->sm_lid = 0; 321 + props->sm_sl = 0; 322 + 323 + if (!us_ibdev->ufdev->link_up) { 324 + props->state = IB_PORT_DOWN; 325 + props->phys_state = 3; 326 + } else if (!us_ibdev->ufdev->inaddr) { 327 + props->state = IB_PORT_INIT; 328 + props->phys_state = 4; 329 + } else { 330 + props->state = IB_PORT_ACTIVE; 331 + props->phys_state = 5; 332 + } 333 + 334 + props->port_cap_flags = 0; 335 + props->gid_tbl_len = 1; 336 + props->pkey_tbl_len = 1; 337 + props->bad_pkey_cntr = 0; 338 + props->qkey_viol_cntr = 0; 339 + eth_speed_to_ib_speed(cmd.speed, &props->active_speed, 340 + &props->active_width); 341 + props->max_mtu = IB_MTU_4096; 342 + props->active_mtu = iboe_get_mtu(us_ibdev->ufdev->mtu); 343 + /* Userspace will adjust for hdrs */ 344 + props->max_msg_sz = us_ibdev->ufdev->mtu; 345 + props->max_vl_num = 1; 346 + mutex_unlock(&us_ibdev->usdev_lock); 347 + 348 + return 0; 349 + } 350 + 351 + int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, 352 + int qp_attr_mask, 353 + struct ib_qp_init_attr *qp_init_attr) 354 + { 355 + struct usnic_ib_qp_grp *qp_grp; 356 + struct usnic_ib_vf *vf; 357 + int err; 358 + 359 + usnic_dbg("\n"); 360 + 361 + memset(qp_attr, 0, sizeof(*qp_attr)); 362 + memset(qp_init_attr, 0, sizeof(*qp_init_attr)); 363 + 364 + qp_grp = to_uqp_grp(qp); 365 + vf = qp_grp->vf; 366 + mutex_lock(&vf->pf->usdev_lock); 367 + usnic_dbg("\n"); 368 + qp_attr->qp_state = qp_grp->state; 369 + qp_attr->cur_qp_state = qp_grp->state; 370 + 371 + switch (qp_grp->ibqp.qp_type) { 372 + case IB_QPT_UD: 373 + qp_attr->qkey = 0; 374 + break; 375 + default: 376 + usnic_err("Unexpected qp_type %d\n", qp_grp->ibqp.qp_type); 377 + err = -EINVAL; 378 + goto err_out; 379 + } 380 + 381 + mutex_unlock(&vf->pf->usdev_lock); 382 + return 0; 383 + 384 + err_out: 385 + mutex_unlock(&vf->pf->usdev_lock); 386 + return err; 387 + } 388 + 389 + int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index, 390 + union ib_gid *gid) 391 + { 392 + 393 + struct usnic_ib_dev *us_ibdev = to_usdev(ibdev); 394 + usnic_dbg("\n"); 395 + 396 + if (index > 1) 397 + return -EINVAL; 398 + 399 + mutex_lock(&us_ibdev->usdev_lock); 400 + memset(&(gid->raw[0]), 0, sizeof(gid->raw)); 401 + usnic_mac_ip_to_gid(us_ibdev->ufdev->mac, us_ibdev->ufdev->inaddr, 402 + &gid->raw[0]); 403 + mutex_unlock(&us_ibdev->usdev_lock); 404 + 405 + return 0; 406 + } 407 + 408 + int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, 409 + u16 *pkey) 410 + { 411 + if (index > 1) 412 + return -EINVAL; 413 + 414 + *pkey = 0xffff; 415 + return 0; 416 + } 417 + 418 + struct ib_pd *usnic_ib_alloc_pd(struct ib_device *ibdev, 419 + struct ib_ucontext *context, 420 + struct ib_udata *udata) 421 + { 422 + struct usnic_ib_pd *pd; 423 + void *umem_pd; 424 + 425 + usnic_dbg("\n"); 426 + 427 + pd = kzalloc(sizeof(*pd), GFP_KERNEL); 428 + if (!pd) 429 + return ERR_PTR(-ENOMEM); 430 + 431 + umem_pd = pd->umem_pd = usnic_uiom_alloc_pd(); 432 + if (IS_ERR_OR_NULL(umem_pd)) { 433 + kfree(pd); 434 + return ERR_PTR(umem_pd ? PTR_ERR(umem_pd) : -ENOMEM); 435 + } 436 + 437 + usnic_info("domain 0x%p allocated for context 0x%p and device %s\n", 438 + pd, context, ibdev->name); 439 + return &pd->ibpd; 440 + } 441 + 442 + int usnic_ib_dealloc_pd(struct ib_pd *pd) 443 + { 444 + usnic_info("freeing domain 0x%p\n", pd); 445 + 446 + usnic_uiom_dealloc_pd((to_upd(pd))->umem_pd); 447 + kfree(pd); 448 + return 0; 449 + } 450 + 451 + struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd, 452 + struct ib_qp_init_attr *init_attr, 453 + struct ib_udata *udata) 454 + { 455 + int err; 456 + struct usnic_ib_dev *us_ibdev; 457 + struct usnic_ib_qp_grp *qp_grp; 458 + struct usnic_ib_ucontext *ucontext; 459 + int cq_cnt; 460 + struct usnic_vnic_res_spec res_spec; 461 + struct usnic_ib_create_qp_cmd cmd; 462 + struct usnic_transport_spec trans_spec; 463 + 464 + usnic_dbg("\n"); 465 + 466 + ucontext = to_uucontext(pd->uobject->context); 467 + us_ibdev = to_usdev(pd->device); 468 + 469 + err = ib_copy_from_udata(&cmd, udata, sizeof(cmd)); 470 + if (err) { 471 + usnic_err("%s: cannot copy udata for create_qp\n", 472 + us_ibdev->ib_dev.name); 473 + return ERR_PTR(-EINVAL); 474 + } 475 + 476 + err = create_qp_validate_user_data(cmd); 477 + if (err) { 478 + usnic_err("%s: Failed to validate user data\n", 479 + us_ibdev->ib_dev.name); 480 + return ERR_PTR(-EINVAL); 481 + } 482 + 483 + if (init_attr->qp_type != IB_QPT_UD) { 484 + usnic_err("%s asked to make a non-UD QP: %d\n", 485 + us_ibdev->ib_dev.name, init_attr->qp_type); 486 + return ERR_PTR(-EINVAL); 487 + } 488 + 489 + trans_spec = cmd.spec; 490 + mutex_lock(&us_ibdev->usdev_lock); 491 + cq_cnt = (init_attr->send_cq == init_attr->recv_cq) ? 1 : 2; 492 + res_spec = min_transport_spec[trans_spec.trans_type]; 493 + usnic_vnic_res_spec_update(&res_spec, USNIC_VNIC_RES_TYPE_CQ, cq_cnt); 494 + qp_grp = find_free_vf_and_create_qp_grp(us_ibdev, to_upd(pd), 495 + &trans_spec, 496 + &res_spec); 497 + if (IS_ERR_OR_NULL(qp_grp)) { 498 + err = qp_grp ? PTR_ERR(qp_grp) : -ENOMEM; 499 + goto out_release_mutex; 500 + } 501 + 502 + err = usnic_ib_fill_create_qp_resp(qp_grp, udata); 503 + if (err) { 504 + err = -EBUSY; 505 + goto out_release_qp_grp; 506 + } 507 + 508 + qp_grp->ctx = ucontext; 509 + list_add_tail(&qp_grp->link, &ucontext->qp_grp_list); 510 + usnic_ib_log_vf(qp_grp->vf); 511 + mutex_unlock(&us_ibdev->usdev_lock); 512 + return &qp_grp->ibqp; 513 + 514 + out_release_qp_grp: 515 + qp_grp_destroy(qp_grp); 516 + out_release_mutex: 517 + mutex_unlock(&us_ibdev->usdev_lock); 518 + return ERR_PTR(err); 519 + } 520 + 521 + int usnic_ib_destroy_qp(struct ib_qp *qp) 522 + { 523 + struct usnic_ib_qp_grp *qp_grp; 524 + struct usnic_ib_vf *vf; 525 + 526 + usnic_dbg("\n"); 527 + 528 + qp_grp = to_uqp_grp(qp); 529 + vf = qp_grp->vf; 530 + mutex_lock(&vf->pf->usdev_lock); 531 + if (usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RESET, NULL)) { 532 + usnic_err("Failed to move qp grp %u to reset\n", 533 + qp_grp->grp_id); 534 + } 535 + 536 + list_del(&qp_grp->link); 537 + qp_grp_destroy(qp_grp); 538 + mutex_unlock(&vf->pf->usdev_lock); 539 + 540 + return 0; 541 + } 542 + 543 + int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 544 + int attr_mask, struct ib_udata *udata) 545 + { 546 + struct usnic_ib_qp_grp *qp_grp; 547 + int status; 548 + usnic_dbg("\n"); 549 + 550 + qp_grp = to_uqp_grp(ibqp); 551 + 552 + /* TODO: Future Support All States */ 553 + mutex_lock(&qp_grp->vf->pf->usdev_lock); 554 + if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_INIT) { 555 + status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_INIT, NULL); 556 + } else if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_RTR) { 557 + status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RTR, NULL); 558 + } else if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_RTS) { 559 + status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RTS, NULL); 560 + } else { 561 + usnic_err("Unexpected combination mask: %u state: %u\n", 562 + attr_mask & IB_QP_STATE, attr->qp_state); 563 + status = -EINVAL; 564 + } 565 + 566 + mutex_unlock(&qp_grp->vf->pf->usdev_lock); 567 + return status; 568 + } 569 + 570 + struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev, int entries, 571 + int vector, struct ib_ucontext *context, 572 + struct ib_udata *udata) 573 + { 574 + struct ib_cq *cq; 575 + 576 + usnic_dbg("\n"); 577 + cq = kzalloc(sizeof(*cq), GFP_KERNEL); 578 + if (!cq) 579 + return ERR_PTR(-EBUSY); 580 + 581 + return cq; 582 + } 583 + 584 + int usnic_ib_destroy_cq(struct ib_cq *cq) 585 + { 586 + usnic_dbg("\n"); 587 + kfree(cq); 588 + return 0; 589 + } 590 + 591 + struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length, 592 + u64 virt_addr, int access_flags, 593 + struct ib_udata *udata) 594 + { 595 + struct usnic_ib_mr *mr; 596 + int err; 597 + 598 + usnic_dbg("start 0x%llx va 0x%llx length 0x%llx\n", start, 599 + virt_addr, length); 600 + 601 + mr = kzalloc(sizeof(*mr), GFP_KERNEL); 602 + if (IS_ERR_OR_NULL(mr)) 603 + return ERR_PTR(mr ? PTR_ERR(mr) : -ENOMEM); 604 + 605 + mr->umem = usnic_uiom_reg_get(to_upd(pd)->umem_pd, start, length, 606 + access_flags, 0); 607 + if (IS_ERR_OR_NULL(mr->umem)) { 608 + err = mr->umem ? PTR_ERR(mr->umem) : -EFAULT; 609 + goto err_free; 610 + } 611 + 612 + mr->ibmr.lkey = mr->ibmr.rkey = 0; 613 + return &mr->ibmr; 614 + 615 + err_free: 616 + kfree(mr); 617 + return ERR_PTR(err); 618 + } 619 + 620 + int usnic_ib_dereg_mr(struct ib_mr *ibmr) 621 + { 622 + struct usnic_ib_mr *mr = to_umr(ibmr); 623 + 624 + usnic_dbg("va 0x%lx length 0x%zx\n", mr->umem->va, mr->umem->length); 625 + 626 + usnic_uiom_reg_release(mr->umem, ibmr->pd->uobject->context->closing); 627 + kfree(mr); 628 + return 0; 629 + } 630 + 631 + struct ib_ucontext *usnic_ib_alloc_ucontext(struct ib_device *ibdev, 632 + struct ib_udata *udata) 633 + { 634 + struct usnic_ib_ucontext *context; 635 + struct usnic_ib_dev *us_ibdev = to_usdev(ibdev); 636 + usnic_dbg("\n"); 637 + 638 + context = kmalloc(sizeof(*context), GFP_KERNEL); 639 + if (!context) 640 + return ERR_PTR(-ENOMEM); 641 + 642 + INIT_LIST_HEAD(&context->qp_grp_list); 643 + mutex_lock(&us_ibdev->usdev_lock); 644 + list_add_tail(&context->link, &us_ibdev->ctx_list); 645 + mutex_unlock(&us_ibdev->usdev_lock); 646 + 647 + return &context->ibucontext; 648 + } 649 + 650 + int usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) 651 + { 652 + struct usnic_ib_ucontext *context = to_uucontext(ibcontext); 653 + struct usnic_ib_dev *us_ibdev = to_usdev(ibcontext->device); 654 + usnic_dbg("\n"); 655 + 656 + mutex_lock(&us_ibdev->usdev_lock); 657 + BUG_ON(!list_empty(&context->qp_grp_list)); 658 + list_del(&context->link); 659 + mutex_unlock(&us_ibdev->usdev_lock); 660 + kfree(context); 661 + return 0; 662 + } 663 + 664 + int usnic_ib_mmap(struct ib_ucontext *context, 665 + struct vm_area_struct *vma) 666 + { 667 + struct usnic_ib_ucontext *uctx = to_ucontext(context); 668 + struct usnic_ib_dev *us_ibdev; 669 + struct usnic_ib_qp_grp *qp_grp; 670 + struct usnic_ib_vf *vf; 671 + struct vnic_dev_bar *bar; 672 + dma_addr_t bus_addr; 673 + unsigned int len; 674 + unsigned int vfid; 675 + 676 + usnic_dbg("\n"); 677 + 678 + us_ibdev = to_usdev(context->device); 679 + vma->vm_flags |= VM_IO; 680 + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 681 + vfid = vma->vm_pgoff; 682 + usnic_dbg("Page Offset %lu PAGE_SHIFT %u VFID %u\n", 683 + vma->vm_pgoff, PAGE_SHIFT, vfid); 684 + 685 + mutex_lock(&us_ibdev->usdev_lock); 686 + list_for_each_entry(qp_grp, &uctx->qp_grp_list, link) { 687 + vf = qp_grp->vf; 688 + if (usnic_vnic_get_index(vf->vnic) == vfid) { 689 + bar = usnic_vnic_get_bar(vf->vnic, 0); 690 + if ((vma->vm_end - vma->vm_start) != bar->len) { 691 + usnic_err("Bar0 Len %lu - Request map %lu\n", 692 + bar->len, 693 + vma->vm_end - vma->vm_start); 694 + mutex_unlock(&us_ibdev->usdev_lock); 695 + return -EINVAL; 696 + } 697 + bus_addr = bar->bus_addr; 698 + len = bar->len; 699 + usnic_dbg("bus: %pa vaddr: %p size: %ld\n", 700 + &bus_addr, bar->vaddr, bar->len); 701 + mutex_unlock(&us_ibdev->usdev_lock); 702 + 703 + return remap_pfn_range(vma, 704 + vma->vm_start, 705 + bus_addr >> PAGE_SHIFT, 706 + len, vma->vm_page_prot); 707 + } 708 + } 709 + 710 + mutex_unlock(&us_ibdev->usdev_lock); 711 + usnic_err("No VF %u found\n", vfid); 712 + return -EINVAL; 713 + } 714 + 715 + /* In ib callbacks section - Start of stub funcs */ 716 + struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd, 717 + struct ib_ah_attr *ah_attr) 718 + { 719 + usnic_dbg("\n"); 720 + return ERR_PTR(-EPERM); 721 + } 722 + 723 + int usnic_ib_destroy_ah(struct ib_ah *ah) 724 + { 725 + usnic_dbg("\n"); 726 + return -EINVAL; 727 + } 728 + 729 + int usnic_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 730 + struct ib_send_wr **bad_wr) 731 + { 732 + usnic_dbg("\n"); 733 + return -EINVAL; 734 + } 735 + 736 + int usnic_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, 737 + struct ib_recv_wr **bad_wr) 738 + { 739 + usnic_dbg("\n"); 740 + return -EINVAL; 741 + } 742 + 743 + int usnic_ib_poll_cq(struct ib_cq *ibcq, int num_entries, 744 + struct ib_wc *wc) 745 + { 746 + usnic_dbg("\n"); 747 + return -EINVAL; 748 + } 749 + 750 + int usnic_ib_req_notify_cq(struct ib_cq *cq, 751 + enum ib_cq_notify_flags flags) 752 + { 753 + usnic_dbg("\n"); 754 + return -EINVAL; 755 + } 756 + 757 + struct ib_mr *usnic_ib_get_dma_mr(struct ib_pd *pd, int acc) 758 + { 759 + usnic_dbg("\n"); 760 + return ERR_PTR(-ENOMEM); 761 + } 762 + 763 + 764 + /* In ib callbacks section - End of stub funcs */ 765 + /* End of ib callbacks section */
+72
drivers/infiniband/hw/usnic/usnic_ib_verbs.h
··· 1 + /* 2 + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. 3 + * 4 + * This program is free software; you may redistribute it and/or modify 5 + * it under the terms of the GNU General Public License as published by 6 + * the Free Software Foundation; version 2 of the License. 7 + * 8 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 9 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 10 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 11 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 12 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 13 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 15 + * SOFTWARE. 16 + * 17 + */ 18 + 19 + #ifndef USNIC_IB_VERBS_H_ 20 + #define USNIC_IB_VERBS_H_ 21 + 22 + #include "usnic_ib.h" 23 + 24 + enum rdma_link_layer usnic_ib_port_link_layer(struct ib_device *device, 25 + u8 port_num); 26 + int usnic_ib_query_device(struct ib_device *ibdev, 27 + struct ib_device_attr *props); 28 + int usnic_ib_query_port(struct ib_device *ibdev, u8 port, 29 + struct ib_port_attr *props); 30 + int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, 31 + int qp_attr_mask, 32 + struct ib_qp_init_attr *qp_init_attr); 33 + int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index, 34 + union ib_gid *gid); 35 + int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, 36 + u16 *pkey); 37 + struct ib_pd *usnic_ib_alloc_pd(struct ib_device *ibdev, 38 + struct ib_ucontext *context, 39 + struct ib_udata *udata); 40 + int usnic_ib_dealloc_pd(struct ib_pd *pd); 41 + struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd, 42 + struct ib_qp_init_attr *init_attr, 43 + struct ib_udata *udata); 44 + int usnic_ib_destroy_qp(struct ib_qp *qp); 45 + int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 46 + int attr_mask, struct ib_udata *udata); 47 + struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev, int entries, 48 + int vector, struct ib_ucontext *context, 49 + struct ib_udata *udata); 50 + int usnic_ib_destroy_cq(struct ib_cq *cq); 51 + struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length, 52 + u64 virt_addr, int access_flags, 53 + struct ib_udata *udata); 54 + int usnic_ib_dereg_mr(struct ib_mr *ibmr); 55 + struct ib_ucontext *usnic_ib_alloc_ucontext(struct ib_device *ibdev, 56 + struct ib_udata *udata); 57 + int usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext); 58 + int usnic_ib_mmap(struct ib_ucontext *context, 59 + struct vm_area_struct *vma); 60 + struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd, 61 + struct ib_ah_attr *ah_attr); 62 + int usnic_ib_destroy_ah(struct ib_ah *ah); 63 + int usnic_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 64 + struct ib_send_wr **bad_wr); 65 + int usnic_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, 66 + struct ib_recv_wr **bad_wr); 67 + int usnic_ib_poll_cq(struct ib_cq *ibcq, int num_entries, 68 + struct ib_wc *wc); 69 + int usnic_ib_req_notify_cq(struct ib_cq *cq, 70 + enum ib_cq_notify_flags flags); 71 + struct ib_mr *usnic_ib_get_dma_mr(struct ib_pd *pd, int acc); 72 + #endif /* !USNIC_IB_VERBS_H */
+58
drivers/infiniband/hw/usnic/usnic_log.h
··· 1 + /* 2 + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. 3 + * 4 + * This program is free software; you may redistribute it and/or modify 5 + * it under the terms of the GNU General Public License as published by 6 + * the Free Software Foundation; version 2 of the License. 7 + * 8 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 9 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 10 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 11 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 12 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 13 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 15 + * SOFTWARE. 16 + * 17 + */ 18 + 19 + #ifndef USNIC_LOG_H_ 20 + #define USNIC_LOG_H_ 21 + 22 + #include "usnic.h" 23 + 24 + extern unsigned int usnic_log_lvl; 25 + 26 + #define USNIC_LOG_LVL_NONE (0) 27 + #define USNIC_LOG_LVL_ERR (1) 28 + #define USNIC_LOG_LVL_INFO (2) 29 + #define USNIC_LOG_LVL_DBG (3) 30 + 31 + #define usnic_printk(lvl, args...) \ 32 + do { \ 33 + printk(lvl "%s:%s:%d: ", DRV_NAME, __func__, \ 34 + __LINE__); \ 35 + printk(args); \ 36 + } while (0) 37 + 38 + #define usnic_dbg(args...) \ 39 + do { \ 40 + if (unlikely(usnic_log_lvl >= USNIC_LOG_LVL_DBG)) { \ 41 + usnic_printk(KERN_INFO, args); \ 42 + } \ 43 + } while (0) 44 + 45 + #define usnic_info(args...) \ 46 + do { \ 47 + if (usnic_log_lvl >= USNIC_LOG_LVL_INFO) { \ 48 + usnic_printk(KERN_INFO, args); \ 49 + } \ 50 + } while (0) 51 + 52 + #define usnic_err(args...) \ 53 + do { \ 54 + if (usnic_log_lvl >= USNIC_LOG_LVL_ERR) { \ 55 + usnic_printk(KERN_ERR, args); \ 56 + } \ 57 + } while (0) 58 + #endif /* !USNIC_LOG_H_ */
+202
drivers/infiniband/hw/usnic/usnic_transport.c
··· 1 + /* 2 + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. 3 + * 4 + * This program is free software; you may redistribute it and/or modify 5 + * it under the terms of the GNU General Public License as published by 6 + * the Free Software Foundation; version 2 of the License. 7 + * 8 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 9 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 10 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 11 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 12 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 13 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 15 + * SOFTWARE. 16 + * 17 + */ 18 + #include <linux/bitmap.h> 19 + #include <linux/file.h> 20 + #include <linux/module.h> 21 + #include <linux/slab.h> 22 + #include <net/inet_sock.h> 23 + 24 + #include "usnic_transport.h" 25 + #include "usnic_log.h" 26 + 27 + /* ROCE */ 28 + static unsigned long *roce_bitmap; 29 + static u16 roce_next_port = 1; 30 + #define ROCE_BITMAP_SZ ((1 << (8 /*CHAR_BIT*/ * sizeof(u16)))/8 /*CHAR BIT*/) 31 + static DEFINE_SPINLOCK(roce_bitmap_lock); 32 + 33 + const char *usnic_transport_to_str(enum usnic_transport_type type) 34 + { 35 + switch (type) { 36 + case USNIC_TRANSPORT_UNKNOWN: 37 + return "Unknown"; 38 + case USNIC_TRANSPORT_ROCE_CUSTOM: 39 + return "roce custom"; 40 + case USNIC_TRANSPORT_IPV4_UDP: 41 + return "IPv4 UDP"; 42 + case USNIC_TRANSPORT_MAX: 43 + return "Max?"; 44 + default: 45 + return "Not known"; 46 + } 47 + } 48 + 49 + int usnic_transport_sock_to_str(char *buf, int buf_sz, 50 + struct socket *sock) 51 + { 52 + int err; 53 + uint32_t addr; 54 + uint16_t port; 55 + int proto; 56 + 57 + memset(buf, 0, buf_sz); 58 + err = usnic_transport_sock_get_addr(sock, &proto, &addr, &port); 59 + if (err) 60 + return 0; 61 + 62 + return scnprintf(buf, buf_sz, "Proto:%u Addr:%pI4h Port:%hu", 63 + proto, &addr, port); 64 + } 65 + 66 + /* 67 + * reserve a port number. if "0" specified, we will try to pick one 68 + * starting at roce_next_port. roce_next_port will take on the values 69 + * 1..4096 70 + */ 71 + u16 usnic_transport_rsrv_port(enum usnic_transport_type type, u16 port_num) 72 + { 73 + if (type == USNIC_TRANSPORT_ROCE_CUSTOM) { 74 + spin_lock(&roce_bitmap_lock); 75 + if (!port_num) { 76 + port_num = bitmap_find_next_zero_area(roce_bitmap, 77 + ROCE_BITMAP_SZ, 78 + roce_next_port /* start */, 79 + 1 /* nr */, 80 + 0 /* align */); 81 + roce_next_port = (port_num & 4095) + 1; 82 + } else if (test_bit(port_num, roce_bitmap)) { 83 + usnic_err("Failed to allocate port for %s\n", 84 + usnic_transport_to_str(type)); 85 + spin_unlock(&roce_bitmap_lock); 86 + goto out_fail; 87 + } 88 + bitmap_set(roce_bitmap, port_num, 1); 89 + spin_unlock(&roce_bitmap_lock); 90 + } else { 91 + usnic_err("Failed to allocate port - transport %s unsupported\n", 92 + usnic_transport_to_str(type)); 93 + goto out_fail; 94 + } 95 + 96 + usnic_dbg("Allocating port %hu for %s\n", port_num, 97 + usnic_transport_to_str(type)); 98 + return port_num; 99 + 100 + out_fail: 101 + return 0; 102 + } 103 + 104 + void usnic_transport_unrsrv_port(enum usnic_transport_type type, u16 port_num) 105 + { 106 + if (type == USNIC_TRANSPORT_ROCE_CUSTOM) { 107 + spin_lock(&roce_bitmap_lock); 108 + if (!port_num) { 109 + usnic_err("Unreserved unvalid port num 0 for %s\n", 110 + usnic_transport_to_str(type)); 111 + goto out_roce_custom; 112 + } 113 + 114 + if (!test_bit(port_num, roce_bitmap)) { 115 + usnic_err("Unreserving invalid %hu for %s\n", 116 + port_num, 117 + usnic_transport_to_str(type)); 118 + goto out_roce_custom; 119 + } 120 + bitmap_clear(roce_bitmap, port_num, 1); 121 + usnic_dbg("Freeing port %hu for %s\n", port_num, 122 + usnic_transport_to_str(type)); 123 + out_roce_custom: 124 + spin_unlock(&roce_bitmap_lock); 125 + } else { 126 + usnic_err("Freeing invalid port %hu for %d\n", port_num, type); 127 + } 128 + } 129 + 130 + struct socket *usnic_transport_get_socket(int sock_fd) 131 + { 132 + struct socket *sock; 133 + int err; 134 + char buf[25]; 135 + 136 + /* sockfd_lookup will internally do a fget */ 137 + sock = sockfd_lookup(sock_fd, &err); 138 + if (!sock) { 139 + usnic_err("Unable to lookup socket for fd %d with err %d\n", 140 + sock_fd, err); 141 + return ERR_PTR(-ENOENT); 142 + } 143 + 144 + usnic_transport_sock_to_str(buf, sizeof(buf), sock); 145 + usnic_dbg("Get sock %s\n", buf); 146 + 147 + return sock; 148 + } 149 + 150 + void usnic_transport_put_socket(struct socket *sock) 151 + { 152 + char buf[100]; 153 + 154 + usnic_transport_sock_to_str(buf, sizeof(buf), sock); 155 + usnic_dbg("Put sock %s\n", buf); 156 + sockfd_put(sock); 157 + } 158 + 159 + int usnic_transport_sock_get_addr(struct socket *sock, int *proto, 160 + uint32_t *addr, uint16_t *port) 161 + { 162 + int len; 163 + int err; 164 + struct sockaddr_in sock_addr; 165 + 166 + err = sock->ops->getname(sock, 167 + (struct sockaddr *)&sock_addr, 168 + &len, 0); 169 + if (err) 170 + return err; 171 + 172 + if (sock_addr.sin_family != AF_INET) 173 + return -EINVAL; 174 + 175 + if (proto) 176 + *proto = sock->sk->sk_protocol; 177 + if (port) 178 + *port = ntohs(((struct sockaddr_in *)&sock_addr)->sin_port); 179 + if (addr) 180 + *addr = ntohl(((struct sockaddr_in *) 181 + &sock_addr)->sin_addr.s_addr); 182 + 183 + return 0; 184 + } 185 + 186 + int usnic_transport_init(void) 187 + { 188 + roce_bitmap = kzalloc(ROCE_BITMAP_SZ, GFP_KERNEL); 189 + if (!roce_bitmap) { 190 + usnic_err("Failed to allocate bit map"); 191 + return -ENOMEM; 192 + } 193 + 194 + /* Do not ever allocate bit 0, hence set it here */ 195 + bitmap_set(roce_bitmap, 0, 1); 196 + return 0; 197 + } 198 + 199 + void usnic_transport_fini(void) 200 + { 201 + kfree(roce_bitmap); 202 + }
+51
drivers/infiniband/hw/usnic/usnic_transport.h
··· 1 + /* 2 + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. 3 + * 4 + * This program is free software; you may redistribute it and/or modify 5 + * it under the terms of the GNU General Public License as published by 6 + * the Free Software Foundation; version 2 of the License. 7 + * 8 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 9 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 10 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 11 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 12 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 13 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 15 + * SOFTWARE. 16 + * 17 + */ 18 + 19 + #ifndef USNIC_TRANSPORT_H_ 20 + #define USNIC_TRANSPORT_H_ 21 + 22 + #include "usnic_abi.h" 23 + 24 + const char *usnic_transport_to_str(enum usnic_transport_type trans_type); 25 + /* 26 + * Returns number of bytes written, excluding null terminator. If 27 + * nothing was written, the function returns 0. 28 + */ 29 + int usnic_transport_sock_to_str(char *buf, int buf_sz, 30 + struct socket *sock); 31 + /* 32 + * Reserve a port. If "port_num" is set, then the function will try 33 + * to reserve that particular port. 34 + */ 35 + u16 usnic_transport_rsrv_port(enum usnic_transport_type type, u16 port_num); 36 + void usnic_transport_unrsrv_port(enum usnic_transport_type type, u16 port_num); 37 + /* 38 + * Do a fget on the socket refered to by sock_fd and returns the socket. 39 + * Socket will not be destroyed before usnic_transport_put_socket has 40 + * been called. 41 + */ 42 + struct socket *usnic_transport_get_socket(int sock_fd); 43 + void usnic_transport_put_socket(struct socket *sock); 44 + /* 45 + * Call usnic_transport_get_socket before calling *_sock_get_addr 46 + */ 47 + int usnic_transport_sock_get_addr(struct socket *sock, int *proto, 48 + uint32_t *addr, uint16_t *port); 49 + int usnic_transport_init(void); 50 + void usnic_transport_fini(void); 51 + #endif /* !USNIC_TRANSPORT_H */
+604
drivers/infiniband/hw/usnic/usnic_uiom.c
··· 1 + /* 2 + * Copyright (c) 2005 Topspin Communications. All rights reserved. 3 + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. 4 + * Copyright (c) 2013 Cisco Systems. All rights reserved. 5 + * 6 + * This software is available to you under a choice of one of two 7 + * licenses. You may choose to be licensed under the terms of the GNU 8 + * General Public License (GPL) Version 2, available from the file 9 + * COPYING in the main directory of this source tree, or the 10 + * OpenIB.org BSD license below: 11 + * 12 + * Redistribution and use in source and binary forms, with or 13 + * without modification, are permitted provided that the following 14 + * conditions are met: 15 + * 16 + * - Redistributions of source code must retain the above 17 + * copyright notice, this list of conditions and the following 18 + * disclaimer. 19 + * 20 + * - Redistributions in binary form must reproduce the above 21 + * copyright notice, this list of conditions and the following 22 + * disclaimer in the documentation and/or other materials 23 + * provided with the distribution. 24 + * 25 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 + * SOFTWARE. 33 + */ 34 + 35 + #include <linux/mm.h> 36 + #include <linux/dma-mapping.h> 37 + #include <linux/sched.h> 38 + #include <linux/hugetlb.h> 39 + #include <linux/dma-attrs.h> 40 + #include <linux/iommu.h> 41 + #include <linux/workqueue.h> 42 + #include <linux/list.h> 43 + #include <linux/pci.h> 44 + 45 + #include "usnic_log.h" 46 + #include "usnic_uiom.h" 47 + #include "usnic_uiom_interval_tree.h" 48 + 49 + static struct workqueue_struct *usnic_uiom_wq; 50 + 51 + #define USNIC_UIOM_PAGE_CHUNK \ 52 + ((PAGE_SIZE - offsetof(struct usnic_uiom_chunk, page_list)) /\ 53 + ((void *) &((struct usnic_uiom_chunk *) 0)->page_list[1] - \ 54 + (void *) &((struct usnic_uiom_chunk *) 0)->page_list[0])) 55 + 56 + static void usnic_uiom_reg_account(struct work_struct *work) 57 + { 58 + struct usnic_uiom_reg *umem = container_of(work, 59 + struct usnic_uiom_reg, work); 60 + 61 + down_write(&umem->mm->mmap_sem); 62 + umem->mm->locked_vm -= umem->diff; 63 + up_write(&umem->mm->mmap_sem); 64 + mmput(umem->mm); 65 + kfree(umem); 66 + } 67 + 68 + static int usnic_uiom_dma_fault(struct iommu_domain *domain, 69 + struct device *dev, 70 + unsigned long iova, int flags, 71 + void *token) 72 + { 73 + usnic_err("Device %s iommu fault domain 0x%pK va 0x%lx flags 0x%x\n", 74 + dev_name(dev), 75 + domain, iova, flags); 76 + return -ENOSYS; 77 + } 78 + 79 + static void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty) 80 + { 81 + struct usnic_uiom_chunk *chunk, *tmp; 82 + struct page *page; 83 + struct scatterlist *sg; 84 + int i; 85 + dma_addr_t pa; 86 + 87 + list_for_each_entry_safe(chunk, tmp, chunk_list, list) { 88 + for_each_sg(chunk->page_list, sg, chunk->nents, i) { 89 + page = sg_page(sg); 90 + pa = sg_phys(sg); 91 + if (dirty) 92 + set_page_dirty_lock(page); 93 + put_page(page); 94 + usnic_dbg("pa: %pa\n", &pa); 95 + } 96 + kfree(chunk); 97 + } 98 + } 99 + 100 + static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, 101 + int dmasync, struct list_head *chunk_list) 102 + { 103 + struct page **page_list; 104 + struct scatterlist *sg; 105 + struct usnic_uiom_chunk *chunk; 106 + unsigned long locked; 107 + unsigned long lock_limit; 108 + unsigned long cur_base; 109 + unsigned long npages; 110 + int ret; 111 + int off; 112 + int i; 113 + int flags; 114 + dma_addr_t pa; 115 + DEFINE_DMA_ATTRS(attrs); 116 + 117 + if (dmasync) 118 + dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs); 119 + 120 + if (!can_do_mlock()) 121 + return -EPERM; 122 + 123 + INIT_LIST_HEAD(chunk_list); 124 + 125 + page_list = (struct page **) __get_free_page(GFP_KERNEL); 126 + if (!page_list) 127 + return -ENOMEM; 128 + 129 + npages = PAGE_ALIGN(size + (addr & ~PAGE_MASK)) >> PAGE_SHIFT; 130 + 131 + down_write(&current->mm->mmap_sem); 132 + 133 + locked = npages + current->mm->locked_vm; 134 + lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 135 + 136 + if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { 137 + ret = -ENOMEM; 138 + goto out; 139 + } 140 + 141 + flags = IOMMU_READ | IOMMU_CACHE; 142 + flags |= (writable) ? IOMMU_WRITE : 0; 143 + cur_base = addr & PAGE_MASK; 144 + ret = 0; 145 + 146 + while (npages) { 147 + ret = get_user_pages(current, current->mm, cur_base, 148 + min_t(unsigned long, npages, 149 + PAGE_SIZE / sizeof(struct page *)), 150 + 1, !writable, page_list, NULL); 151 + 152 + if (ret < 0) 153 + goto out; 154 + 155 + npages -= ret; 156 + off = 0; 157 + 158 + while (ret) { 159 + chunk = kmalloc(sizeof(*chunk) + 160 + sizeof(struct scatterlist) * 161 + min_t(int, ret, USNIC_UIOM_PAGE_CHUNK), 162 + GFP_KERNEL); 163 + if (!chunk) { 164 + ret = -ENOMEM; 165 + goto out; 166 + } 167 + 168 + chunk->nents = min_t(int, ret, USNIC_UIOM_PAGE_CHUNK); 169 + sg_init_table(chunk->page_list, chunk->nents); 170 + for_each_sg(chunk->page_list, sg, chunk->nents, i) { 171 + sg_set_page(sg, page_list[i + off], 172 + PAGE_SIZE, 0); 173 + pa = sg_phys(sg); 174 + usnic_dbg("va: 0x%lx pa: %pa\n", 175 + cur_base + i*PAGE_SIZE, &pa); 176 + } 177 + cur_base += chunk->nents * PAGE_SIZE; 178 + ret -= chunk->nents; 179 + off += chunk->nents; 180 + list_add_tail(&chunk->list, chunk_list); 181 + } 182 + 183 + ret = 0; 184 + } 185 + 186 + out: 187 + if (ret < 0) 188 + usnic_uiom_put_pages(chunk_list, 0); 189 + else 190 + current->mm->locked_vm = locked; 191 + 192 + up_write(&current->mm->mmap_sem); 193 + free_page((unsigned long) page_list); 194 + return ret; 195 + } 196 + 197 + static void usnic_uiom_unmap_sorted_intervals(struct list_head *intervals, 198 + struct usnic_uiom_pd *pd) 199 + { 200 + struct usnic_uiom_interval_node *interval, *tmp; 201 + long unsigned va, size; 202 + 203 + list_for_each_entry_safe(interval, tmp, intervals, link) { 204 + va = interval->start << PAGE_SHIFT; 205 + size = ((interval->last - interval->start) + 1) << PAGE_SHIFT; 206 + while (size > 0) { 207 + /* Workaround for RH 970401 */ 208 + usnic_dbg("va 0x%lx size 0x%lx", va, PAGE_SIZE); 209 + iommu_unmap(pd->domain, va, PAGE_SIZE); 210 + va += PAGE_SIZE; 211 + size -= PAGE_SIZE; 212 + } 213 + } 214 + } 215 + 216 + static void __usnic_uiom_reg_release(struct usnic_uiom_pd *pd, 217 + struct usnic_uiom_reg *uiomr, 218 + int dirty) 219 + { 220 + int npages; 221 + unsigned long vpn_start, vpn_last; 222 + struct usnic_uiom_interval_node *interval, *tmp; 223 + int writable = 0; 224 + LIST_HEAD(rm_intervals); 225 + 226 + npages = PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT; 227 + vpn_start = (uiomr->va & PAGE_MASK) >> PAGE_SHIFT; 228 + vpn_last = vpn_start + npages - 1; 229 + 230 + spin_lock(&pd->lock); 231 + usnic_uiom_remove_interval(&pd->rb_root, vpn_start, 232 + vpn_last, &rm_intervals); 233 + usnic_uiom_unmap_sorted_intervals(&rm_intervals, pd); 234 + 235 + list_for_each_entry_safe(interval, tmp, &rm_intervals, link) { 236 + if (interval->flags & IOMMU_WRITE) 237 + writable = 1; 238 + list_del(&interval->link); 239 + kfree(interval); 240 + } 241 + 242 + usnic_uiom_put_pages(&uiomr->chunk_list, dirty & writable); 243 + spin_unlock(&pd->lock); 244 + } 245 + 246 + static int usnic_uiom_map_sorted_intervals(struct list_head *intervals, 247 + struct usnic_uiom_reg *uiomr) 248 + { 249 + int i, err; 250 + size_t size; 251 + struct usnic_uiom_chunk *chunk; 252 + struct usnic_uiom_interval_node *interval_node; 253 + dma_addr_t pa; 254 + dma_addr_t pa_start = 0; 255 + dma_addr_t pa_end = 0; 256 + long int va_start = -EINVAL; 257 + struct usnic_uiom_pd *pd = uiomr->pd; 258 + long int va = uiomr->va & PAGE_MASK; 259 + int flags = IOMMU_READ | IOMMU_CACHE; 260 + 261 + flags |= (uiomr->writable) ? IOMMU_WRITE : 0; 262 + chunk = list_first_entry(&uiomr->chunk_list, struct usnic_uiom_chunk, 263 + list); 264 + list_for_each_entry(interval_node, intervals, link) { 265 + iter_chunk: 266 + for (i = 0; i < chunk->nents; i++, va += PAGE_SIZE) { 267 + pa = sg_phys(&chunk->page_list[i]); 268 + if ((va >> PAGE_SHIFT) < interval_node->start) 269 + continue; 270 + 271 + if ((va >> PAGE_SHIFT) == interval_node->start) { 272 + /* First page of the interval */ 273 + va_start = va; 274 + pa_start = pa; 275 + pa_end = pa; 276 + } 277 + 278 + WARN_ON(va_start == -EINVAL); 279 + 280 + if ((pa_end + PAGE_SIZE != pa) && 281 + (pa != pa_start)) { 282 + /* PAs are not contiguous */ 283 + size = pa_end - pa_start + PAGE_SIZE; 284 + usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x", 285 + va_start, &pa_start, size, flags); 286 + err = iommu_map(pd->domain, va_start, pa_start, 287 + size, flags); 288 + if (err) { 289 + usnic_err("Failed to map va 0x%lx pa 0x%pa size 0x%zx with err %d\n", 290 + va_start, &pa_start, size, err); 291 + goto err_out; 292 + } 293 + va_start = va; 294 + pa_start = pa; 295 + pa_end = pa; 296 + } 297 + 298 + if ((va >> PAGE_SHIFT) == interval_node->last) { 299 + /* Last page of the interval */ 300 + size = pa - pa_start + PAGE_SIZE; 301 + usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x\n", 302 + va_start, &pa_start, size, flags); 303 + err = iommu_map(pd->domain, va_start, pa_start, 304 + size, flags); 305 + if (err) { 306 + usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n", 307 + va_start, &pa_start, size, err); 308 + goto err_out; 309 + } 310 + break; 311 + } 312 + 313 + if (pa != pa_start) 314 + pa_end += PAGE_SIZE; 315 + } 316 + 317 + if (i == chunk->nents) { 318 + /* 319 + * Hit last entry of the chunk, 320 + * hence advance to next chunk 321 + */ 322 + chunk = list_first_entry(&chunk->list, 323 + struct usnic_uiom_chunk, 324 + list); 325 + goto iter_chunk; 326 + } 327 + } 328 + 329 + return 0; 330 + 331 + err_out: 332 + usnic_uiom_unmap_sorted_intervals(intervals, pd); 333 + return err; 334 + } 335 + 336 + struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd, 337 + unsigned long addr, size_t size, 338 + int writable, int dmasync) 339 + { 340 + struct usnic_uiom_reg *uiomr; 341 + unsigned long va_base, vpn_start, vpn_last; 342 + unsigned long npages; 343 + int offset, err; 344 + LIST_HEAD(sorted_diff_intervals); 345 + 346 + /* 347 + * Intel IOMMU map throws an error if a translation entry is 348 + * changed from read to write. This module may not unmap 349 + * and then remap the entry after fixing the permission 350 + * b/c this open up a small windows where hw DMA may page fault 351 + * Hence, make all entries to be writable. 352 + */ 353 + writable = 1; 354 + 355 + va_base = addr & PAGE_MASK; 356 + offset = addr & ~PAGE_MASK; 357 + npages = PAGE_ALIGN(size + offset) >> PAGE_SHIFT; 358 + vpn_start = (addr & PAGE_MASK) >> PAGE_SHIFT; 359 + vpn_last = vpn_start + npages - 1; 360 + 361 + uiomr = kmalloc(sizeof(*uiomr), GFP_KERNEL); 362 + if (!uiomr) 363 + return ERR_PTR(-ENOMEM); 364 + 365 + uiomr->va = va_base; 366 + uiomr->offset = offset; 367 + uiomr->length = size; 368 + uiomr->writable = writable; 369 + uiomr->pd = pd; 370 + 371 + err = usnic_uiom_get_pages(addr, size, writable, dmasync, 372 + &uiomr->chunk_list); 373 + if (err) { 374 + usnic_err("Failed get_pages vpn [0x%lx,0x%lx] err %d\n", 375 + vpn_start, vpn_last, err); 376 + goto out_free_uiomr; 377 + } 378 + 379 + spin_lock(&pd->lock); 380 + err = usnic_uiom_get_intervals_diff(vpn_start, vpn_last, 381 + (writable) ? IOMMU_WRITE : 0, 382 + IOMMU_WRITE, 383 + &pd->rb_root, 384 + &sorted_diff_intervals); 385 + if (err) { 386 + usnic_err("Failed disjoint interval vpn [0x%lx,0x%lx] err %d\n", 387 + vpn_start, vpn_last, err); 388 + goto out_put_pages; 389 + } 390 + 391 + err = usnic_uiom_map_sorted_intervals(&sorted_diff_intervals, uiomr); 392 + if (err) { 393 + usnic_err("Failed map interval vpn [0x%lx,0x%lx] err %d\n", 394 + vpn_start, vpn_last, err); 395 + goto out_put_intervals; 396 + 397 + } 398 + 399 + err = usnic_uiom_insert_interval(&pd->rb_root, vpn_start, vpn_last, 400 + (writable) ? IOMMU_WRITE : 0); 401 + if (err) { 402 + usnic_err("Failed insert interval vpn [0x%lx,0x%lx] err %d\n", 403 + vpn_start, vpn_last, err); 404 + goto out_unmap_intervals; 405 + } 406 + 407 + usnic_uiom_put_interval_set(&sorted_diff_intervals); 408 + spin_unlock(&pd->lock); 409 + 410 + return uiomr; 411 + 412 + out_unmap_intervals: 413 + usnic_uiom_unmap_sorted_intervals(&sorted_diff_intervals, pd); 414 + out_put_intervals: 415 + usnic_uiom_put_interval_set(&sorted_diff_intervals); 416 + out_put_pages: 417 + usnic_uiom_put_pages(&uiomr->chunk_list, 0); 418 + spin_unlock(&pd->lock); 419 + out_free_uiomr: 420 + kfree(uiomr); 421 + return ERR_PTR(err); 422 + } 423 + 424 + void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing) 425 + { 426 + struct mm_struct *mm; 427 + unsigned long diff; 428 + 429 + __usnic_uiom_reg_release(uiomr->pd, uiomr, 1); 430 + 431 + mm = get_task_mm(current); 432 + if (!mm) { 433 + kfree(uiomr); 434 + return; 435 + } 436 + 437 + diff = PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT; 438 + 439 + /* 440 + * We may be called with the mm's mmap_sem already held. This 441 + * can happen when a userspace munmap() is the call that drops 442 + * the last reference to our file and calls our release 443 + * method. If there are memory regions to destroy, we'll end 444 + * up here and not be able to take the mmap_sem. In that case 445 + * we defer the vm_locked accounting to the system workqueue. 446 + */ 447 + if (closing) { 448 + if (!down_write_trylock(&mm->mmap_sem)) { 449 + INIT_WORK(&uiomr->work, usnic_uiom_reg_account); 450 + uiomr->mm = mm; 451 + uiomr->diff = diff; 452 + 453 + queue_work(usnic_uiom_wq, &uiomr->work); 454 + return; 455 + } 456 + } else 457 + down_write(&mm->mmap_sem); 458 + 459 + current->mm->locked_vm -= diff; 460 + up_write(&mm->mmap_sem); 461 + mmput(mm); 462 + kfree(uiomr); 463 + } 464 + 465 + struct usnic_uiom_pd *usnic_uiom_alloc_pd(void) 466 + { 467 + struct usnic_uiom_pd *pd; 468 + void *domain; 469 + 470 + pd = kzalloc(sizeof(*pd), GFP_KERNEL); 471 + if (!pd) 472 + return ERR_PTR(-ENOMEM); 473 + 474 + pd->domain = domain = iommu_domain_alloc(&pci_bus_type); 475 + if (IS_ERR_OR_NULL(domain)) { 476 + usnic_err("Failed to allocate IOMMU domain with err %ld\n", 477 + PTR_ERR(pd->domain)); 478 + kfree(pd); 479 + return ERR_PTR(domain ? PTR_ERR(domain) : -ENOMEM); 480 + } 481 + 482 + iommu_set_fault_handler(pd->domain, usnic_uiom_dma_fault, NULL); 483 + 484 + spin_lock_init(&pd->lock); 485 + INIT_LIST_HEAD(&pd->devs); 486 + 487 + return pd; 488 + } 489 + 490 + void usnic_uiom_dealloc_pd(struct usnic_uiom_pd *pd) 491 + { 492 + iommu_domain_free(pd->domain); 493 + kfree(pd); 494 + } 495 + 496 + int usnic_uiom_attach_dev_to_pd(struct usnic_uiom_pd *pd, struct device *dev) 497 + { 498 + struct usnic_uiom_dev *uiom_dev; 499 + int err; 500 + 501 + uiom_dev = kzalloc(sizeof(*uiom_dev), GFP_ATOMIC); 502 + if (!uiom_dev) 503 + return -ENOMEM; 504 + uiom_dev->dev = dev; 505 + 506 + err = iommu_attach_device(pd->domain, dev); 507 + if (err) 508 + goto out_free_dev; 509 + 510 + if (!iommu_domain_has_cap(pd->domain, IOMMU_CAP_CACHE_COHERENCY)) { 511 + usnic_err("IOMMU of %s does not support cache coherency\n", 512 + dev_name(dev)); 513 + err = -EINVAL; 514 + goto out_detach_device; 515 + } 516 + 517 + spin_lock(&pd->lock); 518 + list_add_tail(&uiom_dev->link, &pd->devs); 519 + pd->dev_cnt++; 520 + spin_unlock(&pd->lock); 521 + 522 + return 0; 523 + 524 + out_detach_device: 525 + iommu_detach_device(pd->domain, dev); 526 + out_free_dev: 527 + kfree(uiom_dev); 528 + return err; 529 + } 530 + 531 + void usnic_uiom_detach_dev_from_pd(struct usnic_uiom_pd *pd, struct device *dev) 532 + { 533 + struct usnic_uiom_dev *uiom_dev; 534 + int found = 0; 535 + 536 + spin_lock(&pd->lock); 537 + list_for_each_entry(uiom_dev, &pd->devs, link) { 538 + if (uiom_dev->dev == dev) { 539 + found = 1; 540 + break; 541 + } 542 + } 543 + 544 + if (!found) { 545 + usnic_err("Unable to free dev %s - not found\n", 546 + dev_name(dev)); 547 + spin_unlock(&pd->lock); 548 + return; 549 + } 550 + 551 + list_del(&uiom_dev->link); 552 + pd->dev_cnt--; 553 + spin_unlock(&pd->lock); 554 + 555 + return iommu_detach_device(pd->domain, dev); 556 + } 557 + 558 + struct device **usnic_uiom_get_dev_list(struct usnic_uiom_pd *pd) 559 + { 560 + struct usnic_uiom_dev *uiom_dev; 561 + struct device **devs; 562 + int i = 0; 563 + 564 + spin_lock(&pd->lock); 565 + devs = kcalloc(pd->dev_cnt + 1, sizeof(*devs), GFP_ATOMIC); 566 + if (!devs) { 567 + devs = ERR_PTR(-ENOMEM); 568 + goto out; 569 + } 570 + 571 + list_for_each_entry(uiom_dev, &pd->devs, link) { 572 + devs[i++] = uiom_dev->dev; 573 + } 574 + out: 575 + spin_unlock(&pd->lock); 576 + return devs; 577 + } 578 + 579 + void usnic_uiom_free_dev_list(struct device **devs) 580 + { 581 + kfree(devs); 582 + } 583 + 584 + int usnic_uiom_init(char *drv_name) 585 + { 586 + if (!iommu_present(&pci_bus_type)) { 587 + usnic_err("IOMMU required but not present or enabled. USNIC QPs will not function w/o enabling IOMMU\n"); 588 + return -EPERM; 589 + } 590 + 591 + usnic_uiom_wq = create_workqueue(drv_name); 592 + if (!usnic_uiom_wq) { 593 + usnic_err("Unable to alloc wq for drv %s\n", drv_name); 594 + return -ENOMEM; 595 + } 596 + 597 + return 0; 598 + } 599 + 600 + void usnic_uiom_fini(void) 601 + { 602 + flush_workqueue(usnic_uiom_wq); 603 + destroy_workqueue(usnic_uiom_wq); 604 + }
+80
drivers/infiniband/hw/usnic/usnic_uiom.h
··· 1 + /* 2 + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. 3 + * 4 + * This program is free software; you may redistribute it and/or modify 5 + * it under the terms of the GNU General Public License as published by 6 + * the Free Software Foundation; version 2 of the License. 7 + * 8 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 9 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 10 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 11 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 12 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 13 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 15 + * SOFTWARE. 16 + * 17 + */ 18 + 19 + #ifndef USNIC_UIOM_H_ 20 + #define USNIC_UIOM_H_ 21 + 22 + #include <linux/list.h> 23 + #include <linux/scatterlist.h> 24 + 25 + #include "usnic_uiom_interval_tree.h" 26 + 27 + #define USNIC_UIOM_READ (1) 28 + #define USNIC_UIOM_WRITE (2) 29 + 30 + #define USNIC_UIOM_MAX_PD_CNT (1000) 31 + #define USNIC_UIOM_MAX_MR_CNT (1000000) 32 + #define USNIC_UIOM_MAX_MR_SIZE (~0UL) 33 + #define USNIC_UIOM_PAGE_SIZE (PAGE_SIZE) 34 + 35 + struct usnic_uiom_dev { 36 + struct device *dev; 37 + struct list_head link; 38 + }; 39 + 40 + struct usnic_uiom_pd { 41 + struct iommu_domain *domain; 42 + spinlock_t lock; 43 + struct rb_root rb_root; 44 + struct list_head devs; 45 + int dev_cnt; 46 + }; 47 + 48 + struct usnic_uiom_reg { 49 + struct usnic_uiom_pd *pd; 50 + unsigned long va; 51 + size_t length; 52 + int offset; 53 + int page_size; 54 + int writable; 55 + struct list_head chunk_list; 56 + struct work_struct work; 57 + struct mm_struct *mm; 58 + unsigned long diff; 59 + }; 60 + 61 + struct usnic_uiom_chunk { 62 + struct list_head list; 63 + int nents; 64 + struct scatterlist page_list[0]; 65 + }; 66 + 67 + struct usnic_uiom_pd *usnic_uiom_alloc_pd(void); 68 + void usnic_uiom_dealloc_pd(struct usnic_uiom_pd *pd); 69 + int usnic_uiom_attach_dev_to_pd(struct usnic_uiom_pd *pd, struct device *dev); 70 + void usnic_uiom_detach_dev_from_pd(struct usnic_uiom_pd *pd, 71 + struct device *dev); 72 + struct device **usnic_uiom_get_dev_list(struct usnic_uiom_pd *pd); 73 + void usnic_uiom_free_dev_list(struct device **devs); 74 + struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd, 75 + unsigned long addr, size_t size, 76 + int access, int dmasync); 77 + void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing); 78 + int usnic_uiom_init(char *drv_name); 79 + void usnic_uiom_fini(void); 80 + #endif /* USNIC_UIOM_H_ */
+236
drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c
··· 1 + #include <linux/init.h> 2 + #include <linux/list.h> 3 + #include <linux/slab.h> 4 + #include <linux/list_sort.h> 5 + 6 + #include <linux/interval_tree_generic.h> 7 + #include "usnic_uiom_interval_tree.h" 8 + 9 + #define START(node) ((node)->start) 10 + #define LAST(node) ((node)->last) 11 + 12 + #define MAKE_NODE(node, start, end, ref_cnt, flags, err, err_out) \ 13 + do { \ 14 + node = usnic_uiom_interval_node_alloc(start, \ 15 + end, ref_cnt, flags); \ 16 + if (!node) { \ 17 + err = -ENOMEM; \ 18 + goto err_out; \ 19 + } \ 20 + } while (0) 21 + 22 + #define MARK_FOR_ADD(node, list) (list_add_tail(&node->link, list)) 23 + 24 + #define MAKE_NODE_AND_APPEND(node, start, end, ref_cnt, flags, err, \ 25 + err_out, list) \ 26 + do { \ 27 + MAKE_NODE(node, start, end, \ 28 + ref_cnt, flags, err, \ 29 + err_out); \ 30 + MARK_FOR_ADD(node, list); \ 31 + } while (0) 32 + 33 + #define FLAGS_EQUAL(flags1, flags2, mask) \ 34 + (((flags1) & (mask)) == ((flags2) & (mask))) 35 + 36 + static struct usnic_uiom_interval_node* 37 + usnic_uiom_interval_node_alloc(long int start, long int last, int ref_cnt, 38 + int flags) 39 + { 40 + struct usnic_uiom_interval_node *interval = kzalloc(sizeof(*interval), 41 + GFP_ATOMIC); 42 + if (!interval) 43 + return NULL; 44 + 45 + interval->start = start; 46 + interval->last = last; 47 + interval->flags = flags; 48 + interval->ref_cnt = ref_cnt; 49 + 50 + return interval; 51 + } 52 + 53 + static int interval_cmp(void *priv, struct list_head *a, struct list_head *b) 54 + { 55 + struct usnic_uiom_interval_node *node_a, *node_b; 56 + 57 + node_a = list_entry(a, struct usnic_uiom_interval_node, link); 58 + node_b = list_entry(b, struct usnic_uiom_interval_node, link); 59 + 60 + /* long to int */ 61 + if (node_a->start < node_b->start) 62 + return -1; 63 + else if (node_a->start > node_b->start) 64 + return 1; 65 + 66 + return 0; 67 + } 68 + 69 + static void 70 + find_intervals_intersection_sorted(struct rb_root *root, unsigned long start, 71 + unsigned long last, 72 + struct list_head *list) 73 + { 74 + struct usnic_uiom_interval_node *node; 75 + 76 + INIT_LIST_HEAD(list); 77 + 78 + for (node = usnic_uiom_interval_tree_iter_first(root, start, last); 79 + node; 80 + node = usnic_uiom_interval_tree_iter_next(node, start, last)) 81 + list_add_tail(&node->link, list); 82 + 83 + list_sort(NULL, list, interval_cmp); 84 + } 85 + 86 + int usnic_uiom_get_intervals_diff(unsigned long start, unsigned long last, 87 + int flags, int flag_mask, 88 + struct rb_root *root, 89 + struct list_head *diff_set) 90 + { 91 + struct usnic_uiom_interval_node *interval, *tmp; 92 + int err = 0; 93 + long int pivot = start; 94 + LIST_HEAD(intersection_set); 95 + 96 + INIT_LIST_HEAD(diff_set); 97 + 98 + find_intervals_intersection_sorted(root, start, last, 99 + &intersection_set); 100 + 101 + list_for_each_entry(interval, &intersection_set, link) { 102 + if (pivot < interval->start) { 103 + MAKE_NODE_AND_APPEND(tmp, pivot, interval->start - 1, 104 + 1, flags, err, err_out, 105 + diff_set); 106 + pivot = interval->start; 107 + } 108 + 109 + /* 110 + * Invariant: Set [start, pivot] is either in diff_set or root, 111 + * but not in both. 112 + */ 113 + 114 + if (pivot > interval->last) { 115 + continue; 116 + } else if (pivot <= interval->last && 117 + FLAGS_EQUAL(interval->flags, flags, 118 + flag_mask)) { 119 + pivot = interval->last + 1; 120 + } 121 + } 122 + 123 + if (pivot <= last) 124 + MAKE_NODE_AND_APPEND(tmp, pivot, last, 1, flags, err, err_out, 125 + diff_set); 126 + 127 + return 0; 128 + 129 + err_out: 130 + list_for_each_entry_safe(interval, tmp, diff_set, link) { 131 + list_del(&interval->link); 132 + kfree(interval); 133 + } 134 + 135 + return err; 136 + } 137 + 138 + void usnic_uiom_put_interval_set(struct list_head *intervals) 139 + { 140 + struct usnic_uiom_interval_node *interval, *tmp; 141 + list_for_each_entry_safe(interval, tmp, intervals, link) 142 + kfree(interval); 143 + } 144 + 145 + int usnic_uiom_insert_interval(struct rb_root *root, unsigned long start, 146 + unsigned long last, int flags) 147 + { 148 + struct usnic_uiom_interval_node *interval, *tmp; 149 + unsigned long istart, ilast; 150 + int iref_cnt, iflags; 151 + unsigned long lpivot = start; 152 + int err = 0; 153 + LIST_HEAD(to_add); 154 + LIST_HEAD(intersection_set); 155 + 156 + find_intervals_intersection_sorted(root, start, last, 157 + &intersection_set); 158 + 159 + list_for_each_entry(interval, &intersection_set, link) { 160 + /* 161 + * Invariant - lpivot is the left edge of next interval to be 162 + * inserted 163 + */ 164 + istart = interval->start; 165 + ilast = interval->last; 166 + iref_cnt = interval->ref_cnt; 167 + iflags = interval->flags; 168 + 169 + if (istart < lpivot) { 170 + MAKE_NODE_AND_APPEND(tmp, istart, lpivot - 1, iref_cnt, 171 + iflags, err, err_out, &to_add); 172 + } else if (istart > lpivot) { 173 + MAKE_NODE_AND_APPEND(tmp, lpivot, istart - 1, 1, flags, 174 + err, err_out, &to_add); 175 + lpivot = istart; 176 + } else { 177 + lpivot = istart; 178 + } 179 + 180 + if (ilast > last) { 181 + MAKE_NODE_AND_APPEND(tmp, lpivot, last, iref_cnt + 1, 182 + iflags | flags, err, err_out, 183 + &to_add); 184 + MAKE_NODE_AND_APPEND(tmp, last + 1, ilast, iref_cnt, 185 + iflags, err, err_out, &to_add); 186 + } else { 187 + MAKE_NODE_AND_APPEND(tmp, lpivot, ilast, iref_cnt + 1, 188 + iflags | flags, err, err_out, 189 + &to_add); 190 + } 191 + 192 + lpivot = ilast + 1; 193 + } 194 + 195 + if (lpivot <= last) 196 + MAKE_NODE_AND_APPEND(tmp, lpivot, last, 1, flags, err, err_out, 197 + &to_add); 198 + 199 + list_for_each_entry_safe(interval, tmp, &intersection_set, link) { 200 + usnic_uiom_interval_tree_remove(interval, root); 201 + kfree(interval); 202 + } 203 + 204 + list_for_each_entry(interval, &to_add, link) 205 + usnic_uiom_interval_tree_insert(interval, root); 206 + 207 + return 0; 208 + 209 + err_out: 210 + list_for_each_entry_safe(interval, tmp, &to_add, link) 211 + kfree(interval); 212 + 213 + return err; 214 + } 215 + 216 + void usnic_uiom_remove_interval(struct rb_root *root, unsigned long start, 217 + unsigned long last, struct list_head *removed) 218 + { 219 + struct usnic_uiom_interval_node *interval; 220 + 221 + for (interval = usnic_uiom_interval_tree_iter_first(root, start, last); 222 + interval; 223 + interval = usnic_uiom_interval_tree_iter_next(interval, 224 + start, 225 + last)) { 226 + if (--interval->ref_cnt == 0) 227 + list_add_tail(&interval->link, removed); 228 + } 229 + 230 + list_for_each_entry(interval, removed, link) 231 + usnic_uiom_interval_tree_remove(interval, root); 232 + } 233 + 234 + INTERVAL_TREE_DEFINE(struct usnic_uiom_interval_node, rb, 235 + unsigned long, __subtree_last, 236 + START, LAST, , usnic_uiom_interval_tree)
+73
drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h
··· 1 + /* 2 + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. 3 + * 4 + * This program is free software; you may redistribute it and/or modify 5 + * it under the terms of the GNU General Public License as published by 6 + * the Free Software Foundation; version 2 of the License. 7 + * 8 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 9 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 10 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 11 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 12 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 13 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 15 + * SOFTWARE. 16 + * 17 + */ 18 + 19 + #ifndef USNIC_UIOM_INTERVAL_TREE_H_ 20 + #define USNIC_UIOM_INTERVAL_TREE_H_ 21 + 22 + #include <linux/rbtree.h> 23 + 24 + struct usnic_uiom_interval_node { 25 + struct rb_node rb; 26 + struct list_head link; 27 + unsigned long start; 28 + unsigned long last; 29 + unsigned long __subtree_last; 30 + unsigned int ref_cnt; 31 + int flags; 32 + }; 33 + 34 + extern void 35 + usnic_uiom_interval_tree_insert(struct usnic_uiom_interval_node *node, 36 + struct rb_root *root); 37 + extern void 38 + usnic_uiom_interval_tree_remove(struct usnic_uiom_interval_node *node, 39 + struct rb_root *root); 40 + extern struct usnic_uiom_interval_node * 41 + usnic_uiom_interval_tree_iter_first(struct rb_root *root, 42 + unsigned long start, 43 + unsigned long last); 44 + extern struct usnic_uiom_interval_node * 45 + usnic_uiom_interval_tree_iter_next(struct usnic_uiom_interval_node *node, 46 + unsigned long start, unsigned long last); 47 + /* 48 + * Inserts {start...last} into {root}. If there are overlaps, 49 + * nodes will be broken up and merged 50 + */ 51 + int usnic_uiom_insert_interval(struct rb_root *root, 52 + unsigned long start, unsigned long last, 53 + int flags); 54 + /* 55 + * Removed {start...last} from {root}. The nodes removed are returned in 56 + * 'removed.' The caller is responsibile for freeing memory of nodes in 57 + * 'removed.' 58 + */ 59 + void usnic_uiom_remove_interval(struct rb_root *root, 60 + unsigned long start, unsigned long last, 61 + struct list_head *removed); 62 + /* 63 + * Returns {start...last} - {root} (relative complement of {start...last} in 64 + * {root}) in diff_set sorted ascendingly 65 + */ 66 + int usnic_uiom_get_intervals_diff(unsigned long start, 67 + unsigned long last, int flags, 68 + int flag_mask, 69 + struct rb_root *root, 70 + struct list_head *diff_set); 71 + /* Call this to free diff_set returned by usnic_uiom_get_intervals_diff */ 72 + void usnic_uiom_put_interval_set(struct list_head *intervals); 73 + #endif /* USNIC_UIOM_INTERVAL_TREE_H_ */
+467
drivers/infiniband/hw/usnic/usnic_vnic.c
··· 1 + /* 2 + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. 3 + * 4 + * This program is free software; you may redistribute it and/or modify 5 + * it under the terms of the GNU General Public License as published by 6 + * the Free Software Foundation; version 2 of the License. 7 + * 8 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 9 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 10 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 11 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 12 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 13 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 15 + * SOFTWARE. 16 + * 17 + */ 18 + #include <linux/errno.h> 19 + #include <linux/module.h> 20 + #include <linux/pci.h> 21 + 22 + #include "usnic_ib.h" 23 + #include "vnic_resource.h" 24 + #include "usnic_log.h" 25 + #include "usnic_vnic.h" 26 + 27 + struct usnic_vnic { 28 + struct vnic_dev *vdev; 29 + struct vnic_dev_bar bar[PCI_NUM_RESOURCES]; 30 + struct usnic_vnic_res_chunk chunks[USNIC_VNIC_RES_TYPE_MAX]; 31 + spinlock_t res_lock; 32 + }; 33 + 34 + static enum vnic_res_type _to_vnic_res_type(enum usnic_vnic_res_type res_type) 35 + { 36 + #define DEFINE_USNIC_VNIC_RES_AT(usnic_vnic_res_t, vnic_res_type, desc, val) \ 37 + vnic_res_type, 38 + #define DEFINE_USNIC_VNIC_RES(usnic_vnic_res_t, vnic_res_type, desc) \ 39 + vnic_res_type, 40 + static enum vnic_res_type usnic_vnic_type_2_vnic_type[] = { 41 + USNIC_VNIC_RES_TYPES}; 42 + #undef DEFINE_USNIC_VNIC_RES 43 + #undef DEFINE_USNIC_VNIC_RES_AT 44 + 45 + if (res_type >= USNIC_VNIC_RES_TYPE_MAX) 46 + return RES_TYPE_MAX; 47 + 48 + return usnic_vnic_type_2_vnic_type[res_type]; 49 + } 50 + 51 + const char *usnic_vnic_res_type_to_str(enum usnic_vnic_res_type res_type) 52 + { 53 + #define DEFINE_USNIC_VNIC_RES_AT(usnic_vnic_res_t, vnic_res_type, desc, val) \ 54 + desc, 55 + #define DEFINE_USNIC_VNIC_RES(usnic_vnic_res_t, vnic_res_type, desc) \ 56 + desc, 57 + static const char * const usnic_vnic_res_type_desc[] = { 58 + USNIC_VNIC_RES_TYPES}; 59 + #undef DEFINE_USNIC_VNIC_RES 60 + #undef DEFINE_USNIC_VNIC_RES_AT 61 + 62 + if (res_type >= USNIC_VNIC_RES_TYPE_MAX) 63 + return "unknown"; 64 + 65 + return usnic_vnic_res_type_desc[res_type]; 66 + 67 + } 68 + 69 + const char *usnic_vnic_pci_name(struct usnic_vnic *vnic) 70 + { 71 + return pci_name(usnic_vnic_get_pdev(vnic)); 72 + } 73 + 74 + int usnic_vnic_dump(struct usnic_vnic *vnic, char *buf, 75 + int buf_sz, 76 + void *hdr_obj, 77 + int (*printtitle)(void *, char*, int), 78 + int (*printcols)(char *, int), 79 + int (*printrow)(void *, char *, int)) 80 + { 81 + struct usnic_vnic_res_chunk *chunk; 82 + struct usnic_vnic_res *res; 83 + struct vnic_dev_bar *bar0; 84 + int i, j, offset; 85 + 86 + offset = 0; 87 + bar0 = usnic_vnic_get_bar(vnic, 0); 88 + offset += scnprintf(buf + offset, buf_sz - offset, 89 + "VF:%hu BAR0 bus_addr=%pa vaddr=0x%p size=%ld ", 90 + usnic_vnic_get_index(vnic), 91 + &bar0->bus_addr, 92 + bar0->vaddr, bar0->len); 93 + if (printtitle) 94 + offset += printtitle(hdr_obj, buf + offset, buf_sz - offset); 95 + offset += scnprintf(buf + offset, buf_sz - offset, "\n"); 96 + offset += scnprintf(buf + offset, buf_sz - offset, 97 + "|RES\t|CTRL_PIN\t\t|IN_USE\t"); 98 + if (printcols) 99 + offset += printcols(buf + offset, buf_sz - offset); 100 + offset += scnprintf(buf + offset, buf_sz - offset, "\n"); 101 + 102 + spin_lock(&vnic->res_lock); 103 + for (i = 0; i < ARRAY_SIZE(vnic->chunks); i++) { 104 + chunk = &vnic->chunks[i]; 105 + for (j = 0; j < chunk->cnt; j++) { 106 + res = chunk->res[j]; 107 + offset += scnprintf(buf + offset, buf_sz - offset, 108 + "|%s[%u]\t|0x%p\t|%u\t", 109 + usnic_vnic_res_type_to_str(res->type), 110 + res->vnic_idx, res->ctrl, !!res->owner); 111 + if (printrow) { 112 + offset += printrow(res->owner, buf + offset, 113 + buf_sz - offset); 114 + } 115 + offset += scnprintf(buf + offset, buf_sz - offset, 116 + "\n"); 117 + } 118 + } 119 + spin_unlock(&vnic->res_lock); 120 + return offset; 121 + } 122 + 123 + void usnic_vnic_res_spec_update(struct usnic_vnic_res_spec *spec, 124 + enum usnic_vnic_res_type trgt_type, 125 + u16 cnt) 126 + { 127 + int i; 128 + 129 + for (i = 0; i < USNIC_VNIC_RES_TYPE_MAX; i++) { 130 + if (spec->resources[i].type == trgt_type) { 131 + spec->resources[i].cnt = cnt; 132 + return; 133 + } 134 + } 135 + 136 + WARN_ON(1); 137 + } 138 + 139 + int usnic_vnic_res_spec_satisfied(const struct usnic_vnic_res_spec *min_spec, 140 + struct usnic_vnic_res_spec *res_spec) 141 + { 142 + int found, i, j; 143 + 144 + for (i = 0; i < USNIC_VNIC_RES_TYPE_MAX; i++) { 145 + found = 0; 146 + 147 + for (j = 0; j < USNIC_VNIC_RES_TYPE_MAX; j++) { 148 + if (res_spec->resources[i].type != 149 + min_spec->resources[i].type) 150 + continue; 151 + found = 1; 152 + if (min_spec->resources[i].cnt > 153 + res_spec->resources[i].cnt) 154 + return -EINVAL; 155 + break; 156 + } 157 + 158 + if (!found) 159 + return -EINVAL; 160 + } 161 + return 0; 162 + } 163 + 164 + int usnic_vnic_spec_dump(char *buf, int buf_sz, 165 + struct usnic_vnic_res_spec *res_spec) 166 + { 167 + enum usnic_vnic_res_type res_type; 168 + int res_cnt; 169 + int i; 170 + int offset = 0; 171 + 172 + for (i = 0; i < USNIC_VNIC_RES_TYPE_MAX; i++) { 173 + res_type = res_spec->resources[i].type; 174 + res_cnt = res_spec->resources[i].cnt; 175 + offset += scnprintf(buf + offset, buf_sz - offset, 176 + "Res: %s Cnt: %d ", 177 + usnic_vnic_res_type_to_str(res_type), 178 + res_cnt); 179 + } 180 + 181 + return offset; 182 + } 183 + 184 + int usnic_vnic_check_room(struct usnic_vnic *vnic, 185 + struct usnic_vnic_res_spec *res_spec) 186 + { 187 + int i; 188 + enum usnic_vnic_res_type res_type; 189 + int res_cnt; 190 + 191 + for (i = 0; i < USNIC_VNIC_RES_TYPE_MAX; i++) { 192 + res_type = res_spec->resources[i].type; 193 + res_cnt = res_spec->resources[i].cnt; 194 + 195 + if (res_type == USNIC_VNIC_RES_TYPE_EOL) 196 + break; 197 + 198 + if (res_cnt > usnic_vnic_res_free_cnt(vnic, res_type)) 199 + return -EBUSY; 200 + } 201 + 202 + return 0; 203 + } 204 + 205 + int usnic_vnic_res_cnt(struct usnic_vnic *vnic, 206 + enum usnic_vnic_res_type type) 207 + { 208 + return vnic->chunks[type].cnt; 209 + } 210 + 211 + int usnic_vnic_res_free_cnt(struct usnic_vnic *vnic, 212 + enum usnic_vnic_res_type type) 213 + { 214 + return vnic->chunks[type].free_cnt; 215 + } 216 + 217 + struct usnic_vnic_res_chunk * 218 + usnic_vnic_get_resources(struct usnic_vnic *vnic, enum usnic_vnic_res_type type, 219 + int cnt, void *owner) 220 + { 221 + struct usnic_vnic_res_chunk *src, *ret; 222 + struct usnic_vnic_res *res; 223 + int i; 224 + 225 + if (usnic_vnic_res_free_cnt(vnic, type) < cnt || cnt < 1 || !owner) 226 + return ERR_PTR(-EINVAL); 227 + 228 + ret = kzalloc(sizeof(*ret), GFP_ATOMIC); 229 + if (!ret) { 230 + usnic_err("Failed to allocate chunk for %s - Out of memory\n", 231 + usnic_vnic_pci_name(vnic)); 232 + return ERR_PTR(-ENOMEM); 233 + } 234 + 235 + ret->res = kzalloc(sizeof(*(ret->res))*cnt, GFP_ATOMIC); 236 + if (!ret->res) { 237 + usnic_err("Failed to allocate resources for %s. Out of memory\n", 238 + usnic_vnic_pci_name(vnic)); 239 + kfree(ret); 240 + return ERR_PTR(-ENOMEM); 241 + } 242 + 243 + spin_lock(&vnic->res_lock); 244 + src = &vnic->chunks[type]; 245 + for (i = 0; i < src->cnt && ret->cnt < cnt; i++) { 246 + res = src->res[i]; 247 + if (!res->owner) { 248 + src->free_cnt--; 249 + res->owner = owner; 250 + ret->res[ret->cnt++] = res; 251 + } 252 + } 253 + 254 + spin_unlock(&vnic->res_lock); 255 + ret->type = type; 256 + ret->vnic = vnic; 257 + WARN_ON(ret->cnt != cnt); 258 + 259 + return ret; 260 + } 261 + 262 + void usnic_vnic_put_resources(struct usnic_vnic_res_chunk *chunk) 263 + { 264 + 265 + struct usnic_vnic_res *res; 266 + int i; 267 + struct usnic_vnic *vnic = chunk->vnic; 268 + 269 + spin_lock(&vnic->res_lock); 270 + while ((i = --chunk->cnt) >= 0) { 271 + res = chunk->res[i]; 272 + chunk->res[i] = NULL; 273 + res->owner = NULL; 274 + vnic->chunks[res->type].free_cnt++; 275 + } 276 + spin_unlock(&vnic->res_lock); 277 + 278 + kfree(chunk->res); 279 + kfree(chunk); 280 + } 281 + 282 + u16 usnic_vnic_get_index(struct usnic_vnic *vnic) 283 + { 284 + return usnic_vnic_get_pdev(vnic)->devfn - 1; 285 + } 286 + 287 + static int usnic_vnic_alloc_res_chunk(struct usnic_vnic *vnic, 288 + enum usnic_vnic_res_type type, 289 + struct usnic_vnic_res_chunk *chunk) 290 + { 291 + int cnt, err, i; 292 + struct usnic_vnic_res *res; 293 + 294 + cnt = vnic_dev_get_res_count(vnic->vdev, _to_vnic_res_type(type)); 295 + if (cnt < 1) 296 + return -EINVAL; 297 + 298 + chunk->cnt = chunk->free_cnt = cnt; 299 + chunk->res = kzalloc(sizeof(*(chunk->res))*cnt, GFP_KERNEL); 300 + if (!chunk->res) 301 + return -ENOMEM; 302 + 303 + for (i = 0; i < cnt; i++) { 304 + res = kzalloc(sizeof(*res), GFP_KERNEL); 305 + if (!res) { 306 + err = -ENOMEM; 307 + goto fail; 308 + } 309 + res->type = type; 310 + res->vnic_idx = i; 311 + res->vnic = vnic; 312 + res->ctrl = vnic_dev_get_res(vnic->vdev, 313 + _to_vnic_res_type(type), i); 314 + chunk->res[i] = res; 315 + } 316 + 317 + chunk->vnic = vnic; 318 + return 0; 319 + fail: 320 + for (i--; i >= 0; i--) 321 + kfree(chunk->res[i]); 322 + kfree(chunk->res); 323 + return err; 324 + } 325 + 326 + static void usnic_vnic_free_res_chunk(struct usnic_vnic_res_chunk *chunk) 327 + { 328 + int i; 329 + for (i = 0; i < chunk->cnt; i++) 330 + kfree(chunk->res[i]); 331 + kfree(chunk->res); 332 + } 333 + 334 + static int usnic_vnic_discover_resources(struct pci_dev *pdev, 335 + struct usnic_vnic *vnic) 336 + { 337 + enum usnic_vnic_res_type res_type; 338 + int i; 339 + int err = 0; 340 + 341 + for (i = 0; i < ARRAY_SIZE(vnic->bar); i++) { 342 + if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM)) 343 + continue; 344 + vnic->bar[i].len = pci_resource_len(pdev, i); 345 + vnic->bar[i].vaddr = pci_iomap(pdev, i, vnic->bar[i].len); 346 + if (!vnic->bar[i].vaddr) { 347 + usnic_err("Cannot memory-map BAR %d, aborting\n", 348 + i); 349 + err = -ENODEV; 350 + goto out_clean_bar; 351 + } 352 + vnic->bar[i].bus_addr = pci_resource_start(pdev, i); 353 + } 354 + 355 + vnic->vdev = vnic_dev_register(NULL, pdev, pdev, vnic->bar, 356 + ARRAY_SIZE(vnic->bar)); 357 + if (!vnic->vdev) { 358 + usnic_err("Failed to register device %s\n", 359 + pci_name(pdev)); 360 + err = -EINVAL; 361 + goto out_clean_bar; 362 + } 363 + 364 + for (res_type = USNIC_VNIC_RES_TYPE_EOL + 1; 365 + res_type < USNIC_VNIC_RES_TYPE_MAX; res_type++) { 366 + err = usnic_vnic_alloc_res_chunk(vnic, res_type, 367 + &vnic->chunks[res_type]); 368 + if (err) { 369 + usnic_err("Failed to alloc res %s with err %d\n", 370 + usnic_vnic_res_type_to_str(res_type), 371 + err); 372 + goto out_clean_chunks; 373 + } 374 + } 375 + 376 + return 0; 377 + 378 + out_clean_chunks: 379 + for (res_type--; res_type > USNIC_VNIC_RES_TYPE_EOL; res_type--) 380 + usnic_vnic_free_res_chunk(&vnic->chunks[res_type]); 381 + vnic_dev_unregister(vnic->vdev); 382 + out_clean_bar: 383 + for (i = 0; i < ARRAY_SIZE(vnic->bar); i++) { 384 + if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM)) 385 + continue; 386 + if (!vnic->bar[i].vaddr) 387 + break; 388 + 389 + iounmap(vnic->bar[i].vaddr); 390 + } 391 + 392 + return err; 393 + } 394 + 395 + struct pci_dev *usnic_vnic_get_pdev(struct usnic_vnic *vnic) 396 + { 397 + return vnic_dev_get_pdev(vnic->vdev); 398 + } 399 + 400 + struct vnic_dev_bar *usnic_vnic_get_bar(struct usnic_vnic *vnic, 401 + int bar_num) 402 + { 403 + return (bar_num < ARRAY_SIZE(vnic->bar)) ? &vnic->bar[bar_num] : NULL; 404 + } 405 + 406 + static void usnic_vnic_release_resources(struct usnic_vnic *vnic) 407 + { 408 + int i; 409 + struct pci_dev *pdev; 410 + enum usnic_vnic_res_type res_type; 411 + 412 + pdev = usnic_vnic_get_pdev(vnic); 413 + 414 + for (res_type = USNIC_VNIC_RES_TYPE_EOL + 1; 415 + res_type < USNIC_VNIC_RES_TYPE_MAX; res_type++) 416 + usnic_vnic_free_res_chunk(&vnic->chunks[res_type]); 417 + 418 + vnic_dev_unregister(vnic->vdev); 419 + 420 + for (i = 0; i < ARRAY_SIZE(vnic->bar); i++) { 421 + if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM)) 422 + continue; 423 + iounmap(vnic->bar[i].vaddr); 424 + } 425 + } 426 + 427 + struct usnic_vnic *usnic_vnic_alloc(struct pci_dev *pdev) 428 + { 429 + struct usnic_vnic *vnic; 430 + int err = 0; 431 + 432 + if (!pci_is_enabled(pdev)) { 433 + usnic_err("PCI dev %s is disabled\n", pci_name(pdev)); 434 + return ERR_PTR(-EINVAL); 435 + } 436 + 437 + vnic = kzalloc(sizeof(*vnic), GFP_KERNEL); 438 + if (!vnic) { 439 + usnic_err("Failed to alloc vnic for %s - out of memory\n", 440 + pci_name(pdev)); 441 + return ERR_PTR(-ENOMEM); 442 + } 443 + 444 + spin_lock_init(&vnic->res_lock); 445 + 446 + err = usnic_vnic_discover_resources(pdev, vnic); 447 + if (err) { 448 + usnic_err("Failed to discover %s resources with err %d\n", 449 + pci_name(pdev), err); 450 + goto out_free_vnic; 451 + } 452 + 453 + usnic_dbg("Allocated vnic for %s\n", usnic_vnic_pci_name(vnic)); 454 + 455 + return vnic; 456 + 457 + out_free_vnic: 458 + kfree(vnic); 459 + 460 + return ERR_PTR(err); 461 + } 462 + 463 + void usnic_vnic_free(struct usnic_vnic *vnic) 464 + { 465 + usnic_vnic_release_resources(vnic); 466 + kfree(vnic); 467 + }
+103
drivers/infiniband/hw/usnic/usnic_vnic.h
··· 1 + /* 2 + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. 3 + * 4 + * This program is free software; you may redistribute it and/or modify 5 + * it under the terms of the GNU General Public License as published by 6 + * the Free Software Foundation; version 2 of the License. 7 + * 8 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 9 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 10 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 11 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 12 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 13 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 15 + * SOFTWARE. 16 + * 17 + */ 18 + 19 + #ifndef USNIC_VNIC_H_ 20 + #define USNIC_VNIC_H_ 21 + 22 + #include <linux/pci.h> 23 + 24 + #include "vnic_dev.h" 25 + 26 + /* =USNIC_VNIC_RES_TYPE= =VNIC_RES= =DESC= */ 27 + #define USNIC_VNIC_RES_TYPES \ 28 + DEFINE_USNIC_VNIC_RES_AT(EOL, RES_TYPE_EOL, "EOL", 0) \ 29 + DEFINE_USNIC_VNIC_RES(WQ, RES_TYPE_WQ, "WQ") \ 30 + DEFINE_USNIC_VNIC_RES(RQ, RES_TYPE_RQ, "RQ") \ 31 + DEFINE_USNIC_VNIC_RES(CQ, RES_TYPE_CQ, "CQ") \ 32 + DEFINE_USNIC_VNIC_RES(INTR, RES_TYPE_INTR_CTRL, "INT") \ 33 + DEFINE_USNIC_VNIC_RES(MAX, RES_TYPE_MAX, "MAX")\ 34 + 35 + #define DEFINE_USNIC_VNIC_RES_AT(usnic_vnic_res_t, vnic_res_type, desc, val) \ 36 + USNIC_VNIC_RES_TYPE_##usnic_vnic_res_t = val, 37 + #define DEFINE_USNIC_VNIC_RES(usnic_vnic_res_t, vnic_res_type, desc) \ 38 + USNIC_VNIC_RES_TYPE_##usnic_vnic_res_t, 39 + enum usnic_vnic_res_type { 40 + USNIC_VNIC_RES_TYPES 41 + }; 42 + #undef DEFINE_USNIC_VNIC_RES 43 + #undef DEFINE_USNIC_VNIC_RES_AT 44 + 45 + struct usnic_vnic_res { 46 + enum usnic_vnic_res_type type; 47 + unsigned int vnic_idx; 48 + struct usnic_vnic *vnic; 49 + void __iomem *ctrl; 50 + void *owner; 51 + }; 52 + 53 + struct usnic_vnic_res_chunk { 54 + enum usnic_vnic_res_type type; 55 + int cnt; 56 + int free_cnt; 57 + struct usnic_vnic_res **res; 58 + struct usnic_vnic *vnic; 59 + }; 60 + 61 + struct usnic_vnic_res_desc { 62 + enum usnic_vnic_res_type type; 63 + uint16_t cnt; 64 + }; 65 + 66 + struct usnic_vnic_res_spec { 67 + struct usnic_vnic_res_desc resources[USNIC_VNIC_RES_TYPE_MAX]; 68 + }; 69 + 70 + const char *usnic_vnic_res_type_to_str(enum usnic_vnic_res_type res_type); 71 + const char *usnic_vnic_pci_name(struct usnic_vnic *vnic); 72 + int usnic_vnic_dump(struct usnic_vnic *vnic, char *buf, int buf_sz, 73 + void *hdr_obj, 74 + int (*printtitle)(void *, char*, int), 75 + int (*printcols)(char *, int), 76 + int (*printrow)(void *, char *, int)); 77 + void usnic_vnic_res_spec_update(struct usnic_vnic_res_spec *spec, 78 + enum usnic_vnic_res_type trgt_type, 79 + u16 cnt); 80 + int usnic_vnic_res_spec_satisfied(const struct usnic_vnic_res_spec *min_spec, 81 + struct usnic_vnic_res_spec *res_spec); 82 + int usnic_vnic_spec_dump(char *buf, int buf_sz, 83 + struct usnic_vnic_res_spec *res_spec); 84 + int usnic_vnic_check_room(struct usnic_vnic *vnic, 85 + struct usnic_vnic_res_spec *res_spec); 86 + int usnic_vnic_res_cnt(struct usnic_vnic *vnic, 87 + enum usnic_vnic_res_type type); 88 + int usnic_vnic_res_free_cnt(struct usnic_vnic *vnic, 89 + enum usnic_vnic_res_type type); 90 + struct usnic_vnic_res_chunk * 91 + usnic_vnic_get_resources(struct usnic_vnic *vnic, 92 + enum usnic_vnic_res_type type, 93 + int cnt, 94 + void *owner); 95 + void usnic_vnic_put_resources(struct usnic_vnic_res_chunk *chunk); 96 + struct pci_dev *usnic_vnic_get_pdev(struct usnic_vnic *vnic); 97 + struct vnic_dev_bar *usnic_vnic_get_bar(struct usnic_vnic *vnic, 98 + int bar_num); 99 + struct usnic_vnic *usnic_vnic_alloc(struct pci_dev *pdev); 100 + void usnic_vnic_free(struct usnic_vnic *vnic); 101 + u16 usnic_vnic_get_index(struct usnic_vnic *vnic); 102 + 103 + #endif /*!USNIC_VNIC_H_*/
+2 -2
drivers/infiniband/ulp/ipoib/ipoib_main.c
··· 104 104 105 105 ipoib_dbg(priv, "bringing up interface\n"); 106 106 107 + netif_carrier_off(dev); 108 + 107 109 set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); 108 110 109 111 if (ipoib_pkey_dev_delay_open(dev)) ··· 1367 1365 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; 1368 1366 1369 1367 memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN); 1370 - 1371 - netif_carrier_off(dev); 1372 1368 1373 1369 priv->dev = dev; 1374 1370
+3
drivers/infiniband/ulp/ipoib/ipoib_verbs.c
··· 192 192 if (priv->hca_caps & IB_DEVICE_BLOCK_MULTICAST_LOOPBACK) 193 193 init_attr.create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK; 194 194 195 + if (priv->hca_caps & IB_DEVICE_MANAGED_FLOW_STEERING) 196 + init_attr.create_flags |= IB_QP_CREATE_NETIF_QP; 197 + 195 198 if (dev->features & NETIF_F_SG) 196 199 init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1; 197 200
+1
drivers/infiniband/ulp/srp/ib_srp.c
··· 660 660 srp_rport_get(target->rport); 661 661 srp_remove_host(target->scsi_host); 662 662 scsi_remove_host(target->scsi_host); 663 + srp_stop_rport_timers(target->rport); 663 664 srp_disconnect_target(target); 664 665 ib_destroy_cm_id(target->cm_id); 665 666 srp_free_target_ib(target);
+9
drivers/net/ethernet/mellanox/mlx4/cmd.c
··· 1371 1371 .verify = NULL, 1372 1372 .wrapper = mlx4_QP_FLOW_STEERING_DETACH_wrapper 1373 1373 }, 1374 + { 1375 + .opcode = MLX4_FLOW_STEERING_IB_UC_QP_RANGE, 1376 + .has_inbox = false, 1377 + .has_outbox = false, 1378 + .out_is_imm = false, 1379 + .encode_slave_id = false, 1380 + .verify = NULL, 1381 + .wrapper = mlx4_FLOW_STEERING_IB_UC_QP_RANGE_wrapper 1382 + }, 1374 1383 }; 1375 1384 1376 1385 static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave,
+10
drivers/net/ethernet/mellanox/mlx4/fw.c
··· 513 513 #define QUERY_DEV_CAP_MAX_XRC_OFFSET 0x67 514 514 #define QUERY_DEV_CAP_MAX_COUNTERS_OFFSET 0x68 515 515 #define QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET 0x70 516 + #define QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET 0x74 516 517 #define QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET 0x76 517 518 #define QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET 0x77 518 519 #define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET 0x80 ··· 604 603 if (field & 0x80) 605 604 dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FS_EN; 606 605 dev_cap->fs_log_max_ucast_qp_range_size = field & 0x1f; 606 + MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET); 607 + if (field & 0x80) 608 + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DMFS_IPOIB; 607 609 MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET); 608 610 dev_cap->fs_max_num_qp_per_entry = field; 609 611 MLX4_GET(stat_rate, outbox, QUERY_DEV_CAP_RATE_SUPPORT_OFFSET); ··· 864 860 MLX4_PUT(outbox->buf, field, 865 861 QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET); 866 862 } 863 + 864 + /* turn off ipoib managed steering for guests */ 865 + MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET); 866 + field &= ~0x80; 867 + MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET); 868 + 867 869 return 0; 868 870 } 869 871
+17
drivers/net/ethernet/mellanox/mlx4/mcg.c
··· 895 895 } 896 896 EXPORT_SYMBOL_GPL(mlx4_flow_detach); 897 897 898 + int mlx4_FLOW_STEERING_IB_UC_QP_RANGE(struct mlx4_dev *dev, u32 min_range_qpn, 899 + u32 max_range_qpn) 900 + { 901 + int err; 902 + u64 in_param; 903 + 904 + in_param = ((u64) min_range_qpn) << 32; 905 + in_param |= ((u64) max_range_qpn) & 0xFFFFFFFF; 906 + 907 + err = mlx4_cmd(dev, in_param, 0, 0, 908 + MLX4_FLOW_STEERING_IB_UC_QP_RANGE, 909 + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); 910 + 911 + return err; 912 + } 913 + EXPORT_SYMBOL_GPL(mlx4_FLOW_STEERING_IB_UC_QP_RANGE); 914 + 898 915 int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], 899 916 int block_mcast_loopback, enum mlx4_protocol prot, 900 917 enum mlx4_steer_type steer)
+5
drivers/net/ethernet/mellanox/mlx4/mlx4.h
··· 1236 1236 struct mlx4_cmd_mailbox *inbox, 1237 1237 struct mlx4_cmd_mailbox *outbox, 1238 1238 struct mlx4_cmd_info *cmd); 1239 + int mlx4_FLOW_STEERING_IB_UC_QP_RANGE_wrapper(struct mlx4_dev *dev, int slave, 1240 + struct mlx4_vhcr *vhcr, 1241 + struct mlx4_cmd_mailbox *inbox, 1242 + struct mlx4_cmd_mailbox *outbox, 1243 + struct mlx4_cmd_info *cmd); 1239 1244 1240 1245 int mlx4_get_mgm_entry_size(struct mlx4_dev *dev); 1241 1246 int mlx4_get_qp_per_mgm(struct mlx4_dev *dev);
+20
drivers/net/ethernet/mellanox/mlx4/port.c
··· 123 123 return err; 124 124 } 125 125 126 + int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx) 127 + { 128 + struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; 129 + struct mlx4_mac_table *table = &info->mac_table; 130 + int i; 131 + 132 + for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { 133 + if (!table->refs[i]) 134 + continue; 135 + 136 + if (mac == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i]))) { 137 + *idx = i; 138 + return 0; 139 + } 140 + } 141 + 142 + return -ENOENT; 143 + } 144 + EXPORT_SYMBOL_GPL(mlx4_find_cached_mac); 145 + 126 146 int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac) 127 147 { 128 148 struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
+10
drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
··· 3844 3844 return err; 3845 3845 } 3846 3846 3847 + int mlx4_FLOW_STEERING_IB_UC_QP_RANGE_wrapper(struct mlx4_dev *dev, int slave, 3848 + struct mlx4_vhcr *vhcr, 3849 + struct mlx4_cmd_mailbox *inbox, 3850 + struct mlx4_cmd_mailbox *outbox, 3851 + struct mlx4_cmd_info *cmd) 3852 + { 3853 + return -EPERM; 3854 + } 3855 + 3856 + 3847 3857 static void detach_qp(struct mlx4_dev *dev, int slave, struct res_qp *rqp) 3848 3858 { 3849 3859 struct res_gid *rgid;
+15 -2
drivers/net/ethernet/mellanox/mlx5/core/cq.c
··· 201 201 202 202 203 203 int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, 204 - int type, struct mlx5_cq_modify_params *params) 204 + struct mlx5_modify_cq_mbox_in *in, int in_sz) 205 205 { 206 - return -ENOSYS; 206 + struct mlx5_modify_cq_mbox_out out; 207 + int err; 208 + 209 + memset(&out, 0, sizeof(out)); 210 + in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MODIFY_CQ); 211 + err = mlx5_cmd_exec(dev, in, in_sz, &out, sizeof(out)); 212 + if (err) 213 + return err; 214 + 215 + if (out.hdr.status) 216 + return mlx5_cmd_status_to_err(&out.hdr); 217 + 218 + return 0; 207 219 } 220 + EXPORT_SYMBOL(mlx5_core_modify_cq); 208 221 209 222 int mlx5_init_cq_table(struct mlx5_core_dev *dev) 210 223 {
+33 -6
drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
··· 275 275 } 276 276 277 277 static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, 278 - int index) 278 + int index, int *is_str) 279 279 { 280 280 struct mlx5_query_qp_mbox_out *out; 281 281 struct mlx5_qp_context *ctx; ··· 293 293 goto out; 294 294 } 295 295 296 + *is_str = 0; 296 297 ctx = &out->ctx; 297 298 switch (index) { 298 299 case QP_PID: 299 300 param = qp->pid; 300 301 break; 301 302 case QP_STATE: 302 - param = be32_to_cpu(ctx->flags) >> 28; 303 + param = (u64)mlx5_qp_state_str(be32_to_cpu(ctx->flags) >> 28); 304 + *is_str = 1; 303 305 break; 304 306 case QP_XPORT: 305 - param = (be32_to_cpu(ctx->flags) >> 16) & 0xff; 307 + param = (u64)mlx5_qp_type_str((be32_to_cpu(ctx->flags) >> 16) & 0xff); 308 + *is_str = 1; 306 309 break; 307 310 case QP_MTU: 308 - param = ctx->mtu_msgmax >> 5; 311 + switch (ctx->mtu_msgmax >> 5) { 312 + case IB_MTU_256: 313 + param = 256; 314 + break; 315 + case IB_MTU_512: 316 + param = 512; 317 + break; 318 + case IB_MTU_1024: 319 + param = 1024; 320 + break; 321 + case IB_MTU_2048: 322 + param = 2048; 323 + break; 324 + case IB_MTU_4096: 325 + param = 4096; 326 + break; 327 + default: 328 + param = 0; 329 + } 309 330 break; 310 331 case QP_N_RECV: 311 332 param = 1 << ((ctx->rq_size_stride >> 3) & 0xf); ··· 435 414 struct mlx5_field_desc *desc; 436 415 struct mlx5_rsc_debug *d; 437 416 char tbuf[18]; 417 + int is_str = 0; 438 418 u64 field; 439 419 int ret; 440 420 ··· 446 424 d = (void *)(desc - desc->i) - sizeof(*d); 447 425 switch (d->type) { 448 426 case MLX5_DBG_RSC_QP: 449 - field = qp_read_field(d->dev, d->object, desc->i); 427 + field = qp_read_field(d->dev, d->object, desc->i, &is_str); 450 428 break; 451 429 452 430 case MLX5_DBG_RSC_EQ: ··· 462 440 return -EINVAL; 463 441 } 464 442 465 - ret = snprintf(tbuf, sizeof(tbuf), "0x%llx\n", field); 443 + 444 + if (is_str) 445 + ret = snprintf(tbuf, sizeof(tbuf), "%s\n", (const char *)field); 446 + else 447 + ret = snprintf(tbuf, sizeof(tbuf), "0x%llx\n", field); 448 + 466 449 if (ret > 0) { 467 450 if (copy_to_user(buf, tbuf, ret)) 468 451 return -EFAULT;
+8 -2
drivers/net/ethernet/mellanox/mlx5/core/main.c
··· 460 460 461 461 err_stop_poll: 462 462 mlx5_stop_health_poll(dev); 463 - mlx5_cmd_teardown_hca(dev); 463 + if (mlx5_cmd_teardown_hca(dev)) { 464 + dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n"); 465 + return err; 466 + } 464 467 465 468 err_pagealloc_stop: 466 469 mlx5_pagealloc_stop(dev); ··· 506 503 mlx5_eq_cleanup(dev); 507 504 mlx5_disable_msix(dev); 508 505 mlx5_stop_health_poll(dev); 509 - mlx5_cmd_teardown_hca(dev); 506 + if (mlx5_cmd_teardown_hca(dev)) { 507 + dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n"); 508 + return; 509 + } 510 510 mlx5_pagealloc_stop(dev); 511 511 mlx5_reclaim_startup_pages(dev); 512 512 mlx5_core_disable_hca(dev);
+6 -7
drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
··· 99 99 100 100 enum { 101 101 MLX5_MAX_RECLAIM_TIME_MILI = 5000, 102 - MLX5_NUM_4K_IN_PAGE = PAGE_SIZE / 4096, 102 + MLX5_NUM_4K_IN_PAGE = PAGE_SIZE / MLX5_ADAPTER_PAGE_SIZE, 103 103 }; 104 104 105 105 static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u16 func_id) ··· 192 192 struct fw_page *fp; 193 193 unsigned n; 194 194 195 - if (list_empty(&dev->priv.free_list)) { 195 + if (list_empty(&dev->priv.free_list)) 196 196 return -ENOMEM; 197 - mlx5_core_warn(dev, "\n"); 198 - } 199 197 200 198 fp = list_entry(dev->priv.free_list.next, struct fw_page, list); 201 199 n = find_first_bit(&fp->bitmask, 8 * sizeof(fp->bitmask)); ··· 206 208 if (!fp->free_count) 207 209 list_del(&fp->list); 208 210 209 - *addr = fp->addr + n * 4096; 211 + *addr = fp->addr + n * MLX5_ADAPTER_PAGE_SIZE; 210 212 211 213 return 0; 212 214 } ··· 222 224 return; 223 225 } 224 226 225 - n = (addr & ~PAGE_MASK) % 4096; 227 + n = (addr & ~PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT; 226 228 fwp->free_count++; 227 229 set_bit(n, &fwp->bitmask); 228 230 if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) { 229 231 rb_erase(&fwp->rb_node, &dev->priv.page_root); 230 232 if (fwp->free_count != 1) 231 233 list_del(&fwp->list); 232 - dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL); 234 + dma_unmap_page(&dev->pdev->dev, addr & PAGE_MASK, PAGE_SIZE, 235 + DMA_BIDIRECTIONAL); 233 236 __free_page(fwp->page); 234 237 kfree(fwp); 235 238 } else if (fwp->free_count == 1) {
+1 -1
drivers/net/ethernet/mellanox/mlx5/core/port.c
··· 57 57 in->arg = cpu_to_be32(arg); 58 58 in->register_id = cpu_to_be16(reg_num); 59 59 err = mlx5_cmd_exec(dev, in, sizeof(*in) + size_in, out, 60 - sizeof(out) + size_out); 60 + sizeof(*out) + size_out); 61 61 if (err) 62 62 goto ex2; 63 63
+3 -2
drivers/net/ethernet/mellanox/mlx5/core/qp.c
··· 74 74 struct mlx5_destroy_qp_mbox_out dout; 75 75 int err; 76 76 77 - memset(&dout, 0, sizeof(dout)); 77 + memset(&out, 0, sizeof(out)); 78 78 in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_QP); 79 79 80 80 err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); ··· 84 84 } 85 85 86 86 if (out.hdr.status) { 87 - pr_warn("current num of QPs 0x%x\n", atomic_read(&dev->num_qps)); 87 + mlx5_core_warn(dev, "current num of QPs 0x%x\n", 88 + atomic_read(&dev->num_qps)); 88 89 return mlx5_cmd_status_to_err(&out.hdr); 89 90 } 90 91
+57 -38
drivers/scsi/scsi_transport_srp.c
··· 64 64 65 65 /** 66 66 * srp_tmo_valid() - check timeout combination validity 67 + * @reconnect_delay: Reconnect delay in seconds. 68 + * @fast_io_fail_tmo: Fast I/O fail timeout in seconds. 69 + * @dev_loss_tmo: Device loss timeout in seconds. 67 70 * 68 71 * The combination of the timeout parameters must be such that SCSI commands 69 72 * are finished in a reasonable time. Hence do not allow the fast I/O fail 70 - * timeout to exceed SCSI_DEVICE_BLOCK_MAX_TIMEOUT. Furthermore, these 73 + * timeout to exceed SCSI_DEVICE_BLOCK_MAX_TIMEOUT nor allow dev_loss_tmo to 74 + * exceed that limit if failing I/O fast has been disabled. Furthermore, these 71 75 * parameters must be such that multipath can detect failed paths timely. 72 76 * Hence do not allow all three parameters to be disabled simultaneously. 73 77 */ ··· 82 78 if (reconnect_delay == 0) 83 79 return -EINVAL; 84 80 if (fast_io_fail_tmo > SCSI_DEVICE_BLOCK_MAX_TIMEOUT) 81 + return -EINVAL; 82 + if (fast_io_fail_tmo < 0 && 83 + dev_loss_tmo > SCSI_DEVICE_BLOCK_MAX_TIMEOUT) 85 84 return -EINVAL; 86 85 if (dev_loss_tmo >= LONG_MAX / HZ) 87 86 return -EINVAL; ··· 375 368 376 369 /** 377 370 * srp_reconnect_work() - reconnect and schedule a new attempt if necessary 371 + * @work: Work structure used for scheduling this operation. 378 372 */ 379 373 static void srp_reconnect_work(struct work_struct *work) 380 374 { ··· 416 408 417 409 /** 418 410 * rport_fast_io_fail_timedout() - fast I/O failure timeout handler 411 + * @work: Work structure used for scheduling this operation. 419 412 */ 420 413 static void rport_fast_io_fail_timedout(struct work_struct *work) 421 414 { ··· 435 426 436 427 /** 437 428 * rport_dev_loss_timedout() - device loss timeout handler 429 + * @work: Work structure used for scheduling this operation. 438 430 */ 439 431 static void rport_dev_loss_timedout(struct work_struct *work) 440 432 { ··· 462 452 463 453 lockdep_assert_held(&rport->mutex); 464 454 465 - if (!rport->deleted) { 466 - delay = rport->reconnect_delay; 467 - fast_io_fail_tmo = rport->fast_io_fail_tmo; 468 - dev_loss_tmo = rport->dev_loss_tmo; 469 - pr_debug("%s current state: %d\n", 470 - dev_name(&shost->shost_gendev), rport->state); 455 + delay = rport->reconnect_delay; 456 + fast_io_fail_tmo = rport->fast_io_fail_tmo; 457 + dev_loss_tmo = rport->dev_loss_tmo; 458 + pr_debug("%s current state: %d\n", dev_name(&shost->shost_gendev), 459 + rport->state); 471 460 472 - if (delay > 0) 473 - queue_delayed_work(system_long_wq, 474 - &rport->reconnect_work, 475 - 1UL * delay * HZ); 476 - if (fast_io_fail_tmo >= 0 && 477 - srp_rport_set_state(rport, SRP_RPORT_BLOCKED) == 0) { 478 - pr_debug("%s new state: %d\n", 479 - dev_name(&shost->shost_gendev), 480 - rport->state); 481 - scsi_target_block(&shost->shost_gendev); 461 + if (rport->state == SRP_RPORT_LOST) 462 + return; 463 + if (delay > 0) 464 + queue_delayed_work(system_long_wq, &rport->reconnect_work, 465 + 1UL * delay * HZ); 466 + if (srp_rport_set_state(rport, SRP_RPORT_BLOCKED) == 0) { 467 + pr_debug("%s new state: %d\n", dev_name(&shost->shost_gendev), 468 + rport->state); 469 + scsi_target_block(&shost->shost_gendev); 470 + if (fast_io_fail_tmo >= 0) 482 471 queue_delayed_work(system_long_wq, 483 472 &rport->fast_io_fail_work, 484 473 1UL * fast_io_fail_tmo * HZ); 485 - } 486 474 if (dev_loss_tmo >= 0) 487 475 queue_delayed_work(system_long_wq, 488 476 &rport->dev_loss_work, 489 477 1UL * dev_loss_tmo * HZ); 490 - } else { 491 - pr_debug("%s has already been deleted\n", 492 - dev_name(&shost->shost_gendev)); 493 - srp_rport_set_state(rport, SRP_RPORT_FAIL_FAST); 494 - scsi_target_unblock(&shost->shost_gendev, 495 - SDEV_TRANSPORT_OFFLINE); 496 478 } 497 479 } 498 480 499 481 /** 500 482 * srp_start_tl_fail_timers() - start the transport layer failure timers 483 + * @rport: SRP target port. 501 484 * 502 485 * Start the transport layer fast I/O failure and device loss timers. Do not 503 486 * modify a timer that was already started. ··· 505 502 506 503 /** 507 504 * scsi_request_fn_active() - number of kernel threads inside scsi_request_fn() 505 + * @shost: SCSI host for which to count the number of scsi_request_fn() callers. 508 506 */ 509 507 static int scsi_request_fn_active(struct Scsi_Host *shost) 510 508 { ··· 526 522 527 523 /** 528 524 * srp_reconnect_rport() - reconnect to an SRP target port 525 + * @rport: SRP target port. 529 526 * 530 527 * Blocks SCSI command queueing before invoking reconnect() such that 531 528 * queuecommand() won't be invoked concurrently with reconnect() from outside ··· 561 556 scsi_target_block(&shost->shost_gendev); 562 557 while (scsi_request_fn_active(shost)) 563 558 msleep(20); 564 - res = i->f->reconnect(rport); 559 + res = rport->state != SRP_RPORT_LOST ? i->f->reconnect(rport) : -ENODEV; 565 560 pr_debug("%s (state %d): transport.reconnect() returned %d\n", 566 561 dev_name(&shost->shost_gendev), rport->state, res); 567 562 if (res == 0) { ··· 583 578 spin_unlock_irq(shost->host_lock); 584 579 } else if (rport->state == SRP_RPORT_RUNNING) { 585 580 /* 586 - * srp_reconnect_rport() was invoked with fast_io_fail 587 - * off. Mark the port as failed and start the TL failure 588 - * timers if these had not yet been started. 581 + * srp_reconnect_rport() has been invoked with fast_io_fail 582 + * and dev_loss off. Mark the port as failed and start the TL 583 + * failure timers if these had not yet been started. 589 584 */ 590 585 __rport_fail_io_fast(rport); 591 586 scsi_target_unblock(&shost->shost_gendev, ··· 604 599 605 600 /** 606 601 * srp_timed_out() - SRP transport intercept of the SCSI timeout EH 602 + * @scmd: SCSI command. 607 603 * 608 604 * If a timeout occurs while an rport is in the blocked state, ask the SCSI 609 605 * EH to continue waiting (BLK_EH_RESET_TIMER). Otherwise let the SCSI core ··· 627 621 static void srp_rport_release(struct device *dev) 628 622 { 629 623 struct srp_rport *rport = dev_to_rport(dev); 630 - 631 - cancel_delayed_work_sync(&rport->reconnect_work); 632 - cancel_delayed_work_sync(&rport->fast_io_fail_work); 633 - cancel_delayed_work_sync(&rport->dev_loss_work); 634 624 635 625 put_device(dev->parent); 636 626 kfree(rport); ··· 676 674 677 675 /** 678 676 * srp_rport_get() - increment rport reference count 677 + * @rport: SRP target port. 679 678 */ 680 679 void srp_rport_get(struct srp_rport *rport) 681 680 { ··· 686 683 687 684 /** 688 685 * srp_rport_put() - decrement rport reference count 686 + * @rport: SRP target port. 689 687 */ 690 688 void srp_rport_put(struct srp_rport *rport) 691 689 { ··· 784 780 device_del(dev); 785 781 transport_destroy_device(dev); 786 782 787 - mutex_lock(&rport->mutex); 788 - if (rport->state == SRP_RPORT_BLOCKED) 789 - __rport_fail_io_fast(rport); 790 - rport->deleted = true; 791 - mutex_unlock(&rport->mutex); 792 - 793 783 put_device(dev); 794 784 } 795 785 EXPORT_SYMBOL_GPL(srp_rport_del); ··· 807 809 device_for_each_child(&shost->shost_gendev, NULL, do_srp_rport_del); 808 810 } 809 811 EXPORT_SYMBOL_GPL(srp_remove_host); 812 + 813 + /** 814 + * srp_stop_rport_timers - stop the transport layer recovery timers 815 + * 816 + * Must be called after srp_remove_host() and scsi_remove_host(). The caller 817 + * must hold a reference on the rport (rport->dev) and on the SCSI host 818 + * (rport->dev.parent). 819 + */ 820 + void srp_stop_rport_timers(struct srp_rport *rport) 821 + { 822 + mutex_lock(&rport->mutex); 823 + if (rport->state == SRP_RPORT_BLOCKED) 824 + __rport_fail_io_fast(rport); 825 + srp_rport_set_state(rport, SRP_RPORT_LOST); 826 + mutex_unlock(&rport->mutex); 827 + 828 + cancel_delayed_work_sync(&rport->reconnect_work); 829 + cancel_delayed_work_sync(&rport->fast_io_fail_work); 830 + cancel_delayed_work_sync(&rport->dev_loss_work); 831 + } 832 + EXPORT_SYMBOL_GPL(srp_stop_rport_timers); 810 833 811 834 static int srp_tsk_mgmt_response(struct Scsi_Host *shost, u64 nexus, u64 tm_id, 812 835 int result)
+1
include/linux/mlx4/cmd.h
··· 157 157 /* register/delete flow steering network rules */ 158 158 MLX4_QP_FLOW_STEERING_ATTACH = 0x65, 159 159 MLX4_QP_FLOW_STEERING_DETACH = 0x66, 160 + MLX4_FLOW_STEERING_IB_UC_QP_RANGE = 0x64, 160 161 }; 161 162 162 163 enum {
+11 -4
include/linux/mlx4/cq.h
··· 34 34 #define MLX4_CQ_H 35 35 36 36 #include <linux/types.h> 37 + #include <uapi/linux/if_ether.h> 37 38 38 39 #include <linux/mlx4/device.h> 39 40 #include <linux/mlx4/doorbell.h> ··· 44 43 __be32 immed_rss_invalid; 45 44 __be32 g_mlpath_rqpn; 46 45 __be16 sl_vid; 47 - __be16 rlid; 48 - __be16 status; 49 - u8 ipv6_ext_mask; 50 - u8 badfcs_enc; 46 + union { 47 + struct { 48 + __be16 rlid; 49 + __be16 status; 50 + u8 ipv6_ext_mask; 51 + u8 badfcs_enc; 52 + }; 53 + u8 smac[ETH_ALEN]; 54 + }; 51 55 __be32 byte_cnt; 52 56 __be16 wqe_index; 53 57 __be16 checksum; ··· 89 83 enum { 90 84 MLX4_CQE_VLAN_PRESENT_MASK = 1 << 29, 91 85 MLX4_CQE_QPN_MASK = 0xffffff, 86 + MLX4_CQE_VID_MASK = 0xfff, 92 87 }; 93 88 94 89 enum {
+6 -1
include/linux/mlx4/device.h
··· 160 160 MLX4_DEV_CAP_FLAG2_TS = 1LL << 5, 161 161 MLX4_DEV_CAP_FLAG2_VLAN_CONTROL = 1LL << 6, 162 162 MLX4_DEV_CAP_FLAG2_FSM = 1LL << 7, 163 - MLX4_DEV_CAP_FLAG2_UPDATE_QP = 1LL << 8 163 + MLX4_DEV_CAP_FLAG2_UPDATE_QP = 1LL << 8, 164 + MLX4_DEV_CAP_FLAG2_DMFS_IPOIB = 1LL << 9 164 165 }; 165 166 166 167 enum { ··· 1096 1095 int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc); 1097 1096 int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw, 1098 1097 u8 *pg, u16 *ratelimit); 1098 + int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx); 1099 1099 int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); 1100 1100 int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); 1101 1101 void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan); ··· 1145 1143 1146 1144 void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid); 1147 1145 __be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave); 1146 + 1147 + int mlx4_FLOW_STEERING_IB_UC_QP_RANGE(struct mlx4_dev *dev, u32 min_range_qpn, 1148 + u32 max_range_qpn); 1148 1149 1149 1150 cycle_t mlx4_read_clock(struct mlx4_dev *dev); 1150 1151
+13 -5
include/linux/mlx5/cq.h
··· 79 79 MLX5_CQE_RESP_SEND = 2, 80 80 MLX5_CQE_RESP_SEND_IMM = 3, 81 81 MLX5_CQE_RESP_SEND_INV = 4, 82 - MLX5_CQE_RESIZE_CQ = 0xff, /* TBD */ 82 + MLX5_CQE_RESIZE_CQ = 5, 83 83 MLX5_CQE_REQ_ERR = 13, 84 84 MLX5_CQE_RESP_ERR = 14, 85 + MLX5_CQE_INVALID = 15, 85 86 }; 86 87 87 88 enum { 88 - MLX5_CQ_MODIFY_RESEIZE = 0, 89 - MLX5_CQ_MODIFY_MODER = 1, 90 - MLX5_CQ_MODIFY_MAPPING = 2, 89 + MLX5_CQ_MODIFY_PERIOD = 1 << 0, 90 + MLX5_CQ_MODIFY_COUNT = 1 << 1, 91 + MLX5_CQ_MODIFY_OVERRUN = 1 << 2, 92 + }; 93 + 94 + enum { 95 + MLX5_CQ_OPMOD_RESIZE = 1, 96 + MLX5_MODIFY_CQ_MASK_LOG_SIZE = 1 << 0, 97 + MLX5_MODIFY_CQ_MASK_PG_OFFSET = 1 << 1, 98 + MLX5_MODIFY_CQ_MASK_PG_SIZE = 1 << 2, 91 99 }; 92 100 93 101 struct mlx5_cq_modify_params { ··· 166 158 int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, 167 159 struct mlx5_query_cq_mbox_out *out); 168 160 int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, 169 - int type, struct mlx5_cq_modify_params *params); 161 + struct mlx5_modify_cq_mbox_in *in, int in_sz); 170 162 int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); 171 163 void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); 172 164
+25 -6
include/linux/mlx5/device.h
··· 104 104 }; 105 105 106 106 enum { 107 - MLX5_BF_REGS_PER_PAGE = 4, 108 - MLX5_MAX_UAR_PAGES = 1 << 8, 109 - MLX5_MAX_UUARS = MLX5_MAX_UAR_PAGES * MLX5_BF_REGS_PER_PAGE, 107 + MLX5_BF_REGS_PER_PAGE = 4, 108 + MLX5_MAX_UAR_PAGES = 1 << 8, 109 + MLX5_NON_FP_BF_REGS_PER_PAGE = 2, 110 + MLX5_MAX_UUARS = MLX5_MAX_UAR_PAGES * MLX5_NON_FP_BF_REGS_PER_PAGE, 110 111 }; 111 112 112 113 enum { ··· 177 176 MLX5_DEV_CAP_FLAG_APM = 1LL << 17, 178 177 MLX5_DEV_CAP_FLAG_ATOMIC = 1LL << 18, 179 178 MLX5_DEV_CAP_FLAG_ON_DMND_PG = 1LL << 24, 179 + MLX5_DEV_CAP_FLAG_CQ_MODER = 1LL << 29, 180 + MLX5_DEV_CAP_FLAG_RESIZE_CQ = 1LL << 30, 180 181 MLX5_DEV_CAP_FLAG_RESIZE_SRQ = 1LL << 32, 181 182 MLX5_DEV_CAP_FLAG_REMOTE_FENCE = 1LL << 38, 182 183 MLX5_DEV_CAP_FLAG_TLP_HINTS = 1LL << 39, ··· 234 231 }; 235 232 236 233 enum { 237 - MLX5_ADAPTER_PAGE_SHIFT = 12 234 + MLX5_ADAPTER_PAGE_SHIFT = 12, 235 + MLX5_ADAPTER_PAGE_SIZE = 1 << MLX5_ADAPTER_PAGE_SHIFT, 238 236 }; 239 237 240 238 enum { ··· 701 697 __be64 pas[0]; 702 698 }; 703 699 700 + struct mlx5_modify_cq_mbox_in { 701 + struct mlx5_inbox_hdr hdr; 702 + __be32 cqn; 703 + __be32 field_select; 704 + struct mlx5_cq_context ctx; 705 + u8 rsvd[192]; 706 + __be64 pas[0]; 707 + }; 708 + 709 + struct mlx5_modify_cq_mbox_out { 710 + struct mlx5_outbox_hdr hdr; 711 + u8 rsvd[8]; 712 + }; 713 + 704 714 struct mlx5_enable_hca_mbox_in { 705 715 struct mlx5_inbox_hdr hdr; 706 716 u8 rsvd[8]; ··· 849 831 struct mlx5_mkey_seg seg; 850 832 u8 rsvd1[16]; 851 833 __be32 xlat_oct_act_size; 852 - __be32 bsf_coto_act_size; 853 - u8 rsvd2[168]; 834 + __be32 rsvd2; 835 + u8 rsvd3[168]; 854 836 __be64 pas[0]; 855 837 }; 856 838 ··· 889 871 890 872 struct mlx5_modify_mkey_mbox_out { 891 873 struct mlx5_outbox_hdr hdr; 874 + u8 rsvd[8]; 892 875 }; 893 876 894 877 struct mlx5_dump_mkey_mbox_in {
+45
include/linux/mlx5/qp.h
··· 464 464 int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp); 465 465 void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp); 466 466 467 + static inline const char *mlx5_qp_type_str(int type) 468 + { 469 + switch (type) { 470 + case MLX5_QP_ST_RC: return "RC"; 471 + case MLX5_QP_ST_UC: return "C"; 472 + case MLX5_QP_ST_UD: return "UD"; 473 + case MLX5_QP_ST_XRC: return "XRC"; 474 + case MLX5_QP_ST_MLX: return "MLX"; 475 + case MLX5_QP_ST_QP0: return "QP0"; 476 + case MLX5_QP_ST_QP1: return "QP1"; 477 + case MLX5_QP_ST_RAW_ETHERTYPE: return "RAW_ETHERTYPE"; 478 + case MLX5_QP_ST_RAW_IPV6: return "RAW_IPV6"; 479 + case MLX5_QP_ST_SNIFFER: return "SNIFFER"; 480 + case MLX5_QP_ST_SYNC_UMR: return "SYNC_UMR"; 481 + case MLX5_QP_ST_PTP_1588: return "PTP_1588"; 482 + case MLX5_QP_ST_REG_UMR: return "REG_UMR"; 483 + default: return "Invalid transport type"; 484 + } 485 + } 486 + 487 + static inline const char *mlx5_qp_state_str(int state) 488 + { 489 + switch (state) { 490 + case MLX5_QP_STATE_RST: 491 + return "RST"; 492 + case MLX5_QP_STATE_INIT: 493 + return "INIT"; 494 + case MLX5_QP_STATE_RTR: 495 + return "RTR"; 496 + case MLX5_QP_STATE_RTS: 497 + return "RTS"; 498 + case MLX5_QP_STATE_SQER: 499 + return "SQER"; 500 + case MLX5_QP_STATE_SQD: 501 + return "SQD"; 502 + case MLX5_QP_STATE_ERR: 503 + return "ERR"; 504 + case MLX5_QP_STATE_SQ_DRAINING: 505 + return "SQ_DRAINING"; 506 + case MLX5_QP_STATE_SUSPENDED: 507 + return "SUSPENDED"; 508 + default: return "Invalid QP state"; 509 + } 510 + } 511 + 467 512 #endif /* MLX5_QP_H */
+50 -21
include/rdma/ib_addr.h
··· 38 38 #include <linux/in6.h> 39 39 #include <linux/if_arp.h> 40 40 #include <linux/netdevice.h> 41 + #include <linux/inetdevice.h> 41 42 #include <linux/socket.h> 42 43 #include <linux/if_vlan.h> 44 + #include <net/ipv6.h> 45 + #include <net/if_inet6.h> 46 + #include <net/ip.h> 43 47 #include <rdma/ib_verbs.h> 44 48 #include <rdma/ib_pack.h> 49 + #include <net/ipv6.h> 45 50 46 51 struct rdma_addr_client { 47 52 atomic_t refcount; ··· 77 72 * rdma_translate_ip - Translate a local IP address to an RDMA hardware 78 73 * address. 79 74 */ 80 - int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr); 75 + int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr, 76 + u16 *vlan_id); 81 77 82 78 /** 83 79 * rdma_resolve_ip - Resolve source and destination IP addresses to ··· 110 104 111 105 int rdma_addr_size(struct sockaddr *addr); 112 106 107 + int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id); 108 + int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *smac, 109 + u16 *vlan_id); 110 + 113 111 static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr) 114 112 { 115 113 return ((u16)dev_addr->broadcast[8] << 8) | (u16)dev_addr->broadcast[9]; ··· 136 126 return dev_addr->dev_type == ARPHRD_INFINIBAND ? 4 : 0; 137 127 } 138 128 139 - static inline void iboe_mac_vlan_to_ll(union ib_gid *gid, u8 *mac, u16 vid) 140 - { 141 - memset(gid->raw, 0, 16); 142 - *((__be32 *) gid->raw) = cpu_to_be32(0xfe800000); 143 - if (vid < 0x1000) { 144 - gid->raw[12] = vid & 0xff; 145 - gid->raw[11] = vid >> 8; 146 - } else { 147 - gid->raw[12] = 0xfe; 148 - gid->raw[11] = 0xff; 149 - } 150 - memcpy(gid->raw + 13, mac + 3, 3); 151 - memcpy(gid->raw + 8, mac, 3); 152 - gid->raw[8] ^= 2; 153 - } 154 - 155 129 static inline u16 rdma_vlan_dev_vlan_id(const struct net_device *dev) 156 130 { 157 131 return dev->priv_flags & IFF_802_1Q_VLAN ? 158 132 vlan_dev_vlan_id(dev) : 0xffff; 159 133 } 160 134 135 + static inline int rdma_ip2gid(struct sockaddr *addr, union ib_gid *gid) 136 + { 137 + switch (addr->sa_family) { 138 + case AF_INET: 139 + ipv6_addr_set_v4mapped(((struct sockaddr_in *) 140 + addr)->sin_addr.s_addr, 141 + (struct in6_addr *)gid); 142 + break; 143 + case AF_INET6: 144 + memcpy(gid->raw, &((struct sockaddr_in6 *)addr)->sin6_addr, 16); 145 + break; 146 + default: 147 + return -EINVAL; 148 + } 149 + return 0; 150 + } 151 + 152 + /* Important - sockaddr should be a union of sockaddr_in and sockaddr_in6 */ 153 + static inline int rdma_gid2ip(struct sockaddr *out, union ib_gid *gid) 154 + { 155 + if (ipv6_addr_v4mapped((struct in6_addr *)gid)) { 156 + struct sockaddr_in *out_in = (struct sockaddr_in *)out; 157 + memset(out_in, 0, sizeof(*out_in)); 158 + out_in->sin_family = AF_INET; 159 + memcpy(&out_in->sin_addr.s_addr, gid->raw + 12, 4); 160 + } else { 161 + struct sockaddr_in6 *out_in = (struct sockaddr_in6 *)out; 162 + memset(out_in, 0, sizeof(*out_in)); 163 + out_in->sin6_family = AF_INET6; 164 + memcpy(&out_in->sin6_addr.s6_addr, gid->raw, 16); 165 + } 166 + return 0; 167 + } 168 + 161 169 static inline void iboe_addr_get_sgid(struct rdma_dev_addr *dev_addr, 162 170 union ib_gid *gid) 163 171 { 164 172 struct net_device *dev; 165 - u16 vid = 0xffff; 173 + struct in_device *ip4; 166 174 167 175 dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); 168 176 if (dev) { 169 - vid = rdma_vlan_dev_vlan_id(dev); 177 + ip4 = (struct in_device *)dev->ip_ptr; 178 + if (ip4 && ip4->ifa_list && ip4->ifa_list->ifa_address) 179 + ipv6_addr_set_v4mapped(ip4->ifa_list->ifa_address, 180 + (struct in6_addr *)gid); 170 181 dev_put(dev); 171 182 } 172 - 173 - iboe_mac_vlan_to_ll(gid, dev_addr->src_dev_addr, vid); 174 183 } 175 184 176 185 static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
+1
include/rdma/ib_cm.h
··· 601 601 int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id, 602 602 struct ib_cm_sidr_rep_param *param); 603 603 604 + int ib_update_cm_av(struct ib_cm_id *id, const u8 *smac, const u8 *alt_smac); 604 605 #endif /* IB_CM_H */
+1
include/rdma/ib_pack.h
··· 34 34 #define IB_PACK_H 35 35 36 36 #include <rdma/ib_verbs.h> 37 + #include <uapi/linux/if_ether.h> 37 38 38 39 enum { 39 40 IB_LRH_BYTES = 8,
+3
include/rdma/ib_sa.h
··· 154 154 u8 packet_life_time_selector; 155 155 u8 packet_life_time; 156 156 u8 preference; 157 + u8 smac[ETH_ALEN]; 158 + u8 dmac[ETH_ALEN]; 159 + u16 vlan_id; 157 160 }; 158 161 159 162 #define IB_SA_MCMEMBER_REC_MGID IB_SA_COMP_MASK( 0)
+38 -4
include/rdma/ib_verbs.h
··· 48 48 #include <linux/rwsem.h> 49 49 #include <linux/scatterlist.h> 50 50 #include <linux/workqueue.h> 51 + #include <uapi/linux/if_ether.h> 51 52 52 53 #include <linux/atomic.h> 53 54 #include <asm/uaccess.h> ··· 70 69 RDMA_NODE_IB_ROUTER, 71 70 RDMA_NODE_RNIC, 72 71 RDMA_NODE_USNIC, 72 + RDMA_NODE_USNIC_UDP, 73 73 }; 74 74 75 75 enum rdma_transport_type { 76 76 RDMA_TRANSPORT_IB, 77 77 RDMA_TRANSPORT_IWARP, 78 - RDMA_TRANSPORT_USNIC 78 + RDMA_TRANSPORT_USNIC, 79 + RDMA_TRANSPORT_USNIC_UDP 79 80 }; 80 81 81 82 enum rdma_transport_type ··· 475 472 u8 static_rate; 476 473 u8 ah_flags; 477 474 u8 port_num; 475 + u8 dmac[ETH_ALEN]; 476 + u16 vlan_id; 478 477 }; 479 478 480 479 enum ib_wc_status { ··· 529 524 IB_WC_WITH_IMM = (1<<1), 530 525 IB_WC_WITH_INVALIDATE = (1<<2), 531 526 IB_WC_IP_CSUM_OK = (1<<3), 527 + IB_WC_WITH_SMAC = (1<<4), 528 + IB_WC_WITH_VLAN = (1<<5), 532 529 }; 533 530 534 531 struct ib_wc { ··· 551 544 u8 sl; 552 545 u8 dlid_path_bits; 553 546 u8 port_num; /* valid only for DR SMPs on switches */ 547 + u8 smac[ETH_ALEN]; 548 + u16 vlan_id; 554 549 }; 555 550 556 551 enum ib_cq_notify_flags { ··· 642 633 enum ib_qp_create_flags { 643 634 IB_QP_CREATE_IPOIB_UD_LSO = 1 << 0, 644 635 IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1, 636 + IB_QP_CREATE_NETIF_QP = 1 << 5, 645 637 /* reserve bits 26-31 for low level drivers' internal use */ 646 638 IB_QP_CREATE_RESERVED_START = 1 << 26, 647 639 IB_QP_CREATE_RESERVED_END = 1 << 31, ··· 731 721 IB_QP_MAX_DEST_RD_ATOMIC = (1<<17), 732 722 IB_QP_PATH_MIG_STATE = (1<<18), 733 723 IB_QP_CAP = (1<<19), 734 - IB_QP_DEST_QPN = (1<<20) 724 + IB_QP_DEST_QPN = (1<<20), 725 + IB_QP_SMAC = (1<<21), 726 + IB_QP_ALT_SMAC = (1<<22), 727 + IB_QP_VID = (1<<23), 728 + IB_QP_ALT_VID = (1<<24), 735 729 }; 736 730 737 731 enum ib_qp_state { ··· 785 771 u8 rnr_retry; 786 772 u8 alt_port_num; 787 773 u8 alt_timeout; 774 + u8 smac[ETH_ALEN]; 775 + u8 alt_smac[ETH_ALEN]; 776 + u16 vlan_id; 777 + u16 alt_vlan_id; 788 778 }; 789 779 790 780 enum ib_wr_opcode { ··· 1117 1099 enum ib_flow_spec_type { 1118 1100 /* L2 headers*/ 1119 1101 IB_FLOW_SPEC_ETH = 0x20, 1102 + IB_FLOW_SPEC_IB = 0x22, 1120 1103 /* L3 header*/ 1121 1104 IB_FLOW_SPEC_IPV4 = 0x30, 1122 1105 /* L4 headers*/ 1123 1106 IB_FLOW_SPEC_TCP = 0x40, 1124 1107 IB_FLOW_SPEC_UDP = 0x41 1125 1108 }; 1126 - 1109 + #define IB_FLOW_SPEC_LAYER_MASK 0xF0 1127 1110 #define IB_FLOW_SPEC_SUPPORT_LAYERS 4 1128 1111 1129 1112 /* Flow steering rule priority is set according to it's domain. ··· 1150 1131 u16 size; 1151 1132 struct ib_flow_eth_filter val; 1152 1133 struct ib_flow_eth_filter mask; 1134 + }; 1135 + 1136 + struct ib_flow_ib_filter { 1137 + __be16 dlid; 1138 + __u8 sl; 1139 + }; 1140 + 1141 + struct ib_flow_spec_ib { 1142 + enum ib_flow_spec_type type; 1143 + u16 size; 1144 + struct ib_flow_ib_filter val; 1145 + struct ib_flow_ib_filter mask; 1153 1146 }; 1154 1147 1155 1148 struct ib_flow_ipv4_filter { ··· 1194 1163 u16 size; 1195 1164 }; 1196 1165 struct ib_flow_spec_eth eth; 1166 + struct ib_flow_spec_ib ib; 1197 1167 struct ib_flow_spec_ipv4 ipv4; 1198 1168 struct ib_flow_spec_tcp_udp tcp_udp; 1199 1169 }; ··· 1520 1488 * @next_state: Next QP state 1521 1489 * @type: QP type 1522 1490 * @mask: Mask of supplied QP attributes 1491 + * @ll : link layer of port 1523 1492 * 1524 1493 * This function is a helper function that a low-level driver's 1525 1494 * modify_qp method can use to validate the consumer's input. It ··· 1529 1496 * and that the attribute mask supplied is allowed for the transition. 1530 1497 */ 1531 1498 int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, 1532 - enum ib_qp_type type, enum ib_qp_attr_mask mask); 1499 + enum ib_qp_type type, enum ib_qp_attr_mask mask, 1500 + enum rdma_link_layer ll); 1533 1501 1534 1502 int ib_register_event_handler (struct ib_event_handler *event_handler); 1535 1503 int ib_unregister_event_handler(struct ib_event_handler *event_handler);
+30 -6
include/scsi/scsi_transport_srp.h
··· 19 19 * @SRP_RPORT_BLOCKED: Transport layer not operational; fast I/O fail timer 20 20 * is running and I/O has been blocked. 21 21 * @SRP_RPORT_FAIL_FAST: Fast I/O fail timer has expired; fail I/O fast. 22 - * @SRP_RPORT_LOST: Device loss timer has expired; port is being removed. 22 + * @SRP_RPORT_LOST: Port is being removed. 23 23 */ 24 24 enum srp_rport_state { 25 25 SRP_RPORT_RUNNING, ··· 29 29 }; 30 30 31 31 /** 32 - * struct srp_rport 33 - * @lld_data: LLD private data. 34 - * @mutex: Protects against concurrent rport reconnect / fast_io_fail / 35 - * dev_loss_tmo activity. 32 + * struct srp_rport - SRP initiator or target port 33 + * 34 + * Fields that are relevant for SRP initiator and SRP target drivers: 35 + * @dev: Device associated with this rport. 36 + * @port_id: 16-byte port identifier. 37 + * @roles: Role of this port - initiator or target. 38 + * 39 + * Fields that are only relevant for SRP initiator drivers: 40 + * @lld_data: LLD private data. 41 + * @mutex: Protects against concurrent rport reconnect / 42 + * fast_io_fail / dev_loss_tmo activity. 43 + * @state: rport state. 44 + * @deleted: Whether or not srp_rport_del() has already been invoked. 45 + * @reconnect_delay: Reconnect delay in seconds. 46 + * @failed_reconnects: Number of failed reconnect attempts. 47 + * @reconnect_work: Work structure used for scheduling reconnect attempts. 48 + * @fast_io_fail_tmo: Fast I/O fail timeout in seconds. 49 + * @dev_loss_tmo: Device loss timeout in seconds. 50 + * @fast_io_fail_work: Work structure used for scheduling fast I/O fail work. 51 + * @dev_loss_work: Work structure used for scheduling device loss work. 36 52 */ 37 53 struct srp_rport { 38 54 /* for initiator and target drivers */ ··· 64 48 65 49 struct mutex mutex; 66 50 enum srp_rport_state state; 67 - bool deleted; 68 51 int reconnect_delay; 69 52 int failed_reconnects; 70 53 struct delayed_work reconnect_work; ··· 75 60 76 61 /** 77 62 * struct srp_function_template 63 + * 64 + * Fields that are only relevant for SRP initiator drivers: 78 65 * @has_rport_state: Whether or not to create the state, fast_io_fail_tmo and 79 66 * dev_loss_tmo sysfs attribute for an rport. 80 67 * @reset_timer_if_blocked: Whether or srp_timed_out() should reset the command ··· 88 71 * srp_reconnect_rport(). 89 72 * @terminate_rport_io: Callback function for terminating all outstanding I/O 90 73 * requests for an rport. 74 + * @rport_delete: Callback function that deletes an rport. 75 + * 76 + * Fields that are only relevant for SRP target drivers: 77 + * @tsk_mgmt_response: Callback function for sending a task management response. 78 + * @it_nexus_response: Callback function for processing an IT nexus response. 91 79 */ 92 80 struct srp_function_template { 93 81 /* for initiator drivers */ ··· 123 101 extern int srp_reconnect_rport(struct srp_rport *rport); 124 102 extern void srp_start_tl_fail_timers(struct srp_rport *rport); 125 103 extern void srp_remove_host(struct Scsi_Host *); 104 + extern void srp_stop_rport_timers(struct srp_rport *rport); 126 105 127 106 /** 128 107 * srp_chkready() - evaluate the transport layer state before I/O 108 + * @rport: SRP target port pointer. 129 109 * 130 110 * Returns a SCSI result code that can be returned by the LLD queuecommand() 131 111 * implementation. The role of this function is similar to that of