Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma updates from Jason Gunthorpe:
"This has been a smaller cycle with many of the commits being smallish
code fixes and improvements across the drivers.

- Driver updates for bnxt_re, cxgb4, hfi1, hns, mlx5, nes, qedr, and
rxe

- Memory window support in hns

- mlx5 user API 'flow mutate/steering' allows accessing the full
packet mangling and matching machinery from user space

- Support inter-working with verbs API calls in the 'devx' mlx5 user
API, and provide options to use devx with less privilege

- Modernize the use of syfs and the device interface to use attribute
groups and cdev properly for uverbs, and clean up some of the core
code's device list management

- More progress on net namespaces for RDMA devices

- Consolidate driver BAR mmapping support into core code helpers and
rework how RDMA holds poitners to mm_struct for get_user_pages
cases

- First pass to use 'dev_name' instead of ib_device->name

- Device renaming for RDMA devices"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (242 commits)
IB/mlx5: Add support for extended atomic operations
RDMA/core: Fix comment for hw stats init for port == 0
RDMA/core: Refactor ib_register_device() function
RDMA/core: Fix unwinding flow in case of error to register device
ib_srp: Remove WARN_ON in srp_terminate_io()
IB/mlx5: Allow scatter to CQE without global signaled WRs
IB/mlx5: Verify that driver supports user flags
IB/mlx5: Support scatter to CQE for DC transport type
RDMA/drivers: Use core provided API for registering device attributes
RDMA/core: Allow existing drivers to set one sysfs group per device
IB/rxe: Remove unnecessary enum values
RDMA/umad: Use kernel API to allocate umad indexes
RDMA/uverbs: Use kernel API to allocate uverbs indexes
RDMA/core: Increase total number of RDMA ports across all devices
IB/mlx4: Add port and TID to MAD debug print
IB/mlx4: Enable debug print of SMPs
RDMA/core: Rename ports_parent to ports_kobj
RDMA/core: Do not expose unsupported counters
IB/mlx4: Refer to the device kobject instead of ports_parent
RDMA/nldev: Allow IB device rename through RDMA netlink
...

+7633 -5205
+18
Documentation/ABI/testing/sysfs-class-net
··· 91 91 stacked (e.g: VLAN interfaces) but still have the same MAC 92 92 address as their parent device. 93 93 94 + What: /sys/class/net/<iface>/dev_port 95 + Date: February 2014 96 + KernelVersion: 3.15 97 + Contact: netdev@vger.kernel.org 98 + Description: 99 + Indicates the port number of this network device, formatted 100 + as a decimal value. Some NICs have multiple independent ports 101 + on the same PCI bus, device and function. This attribute allows 102 + userspace to distinguish the respective interfaces. 103 + 104 + Note: some device drivers started to use 'dev_id' for this 105 + purpose since long before 3.15 and have not adopted the new 106 + attribute ever since. To query the port number, some tools look 107 + exclusively at 'dev_port', while others only consult 'dev_id'. 108 + If a network device has multiple client adapter ports as 109 + described in the previous paragraph and does not set this 110 + attribute to its port number, it's a kernel bug. 111 + 94 112 What: /sys/class/net/<iface>/dormant 95 113 Date: March 2006 96 114 KernelVersion: 2.6.17
+1
drivers/infiniband/Kconfig
··· 26 26 config INFINIBAND_USER_ACCESS 27 27 tristate "InfiniBand userspace access (verbs and CM)" 28 28 select ANON_INODES 29 + depends on MMU 29 30 ---help--- 30 31 Userspace InfiniBand access support. This enables the 31 32 kernel side of userspace verbs and the userspace
+253 -155
drivers/infiniband/core/addr.c
··· 45 45 #include <net/addrconf.h> 46 46 #include <net/ip6_route.h> 47 47 #include <rdma/ib_addr.h> 48 + #include <rdma/ib_sa.h> 48 49 #include <rdma/ib.h> 49 50 #include <rdma/rdma_netlink.h> 50 51 #include <net/netlink.h> ··· 62 61 struct rdma_dev_addr *addr, void *context); 63 62 unsigned long timeout; 64 63 struct delayed_work work; 64 + bool resolve_by_gid_attr; /* Consider gid attr in resolve phase */ 65 65 int status; 66 66 u32 seq; 67 67 }; ··· 221 219 } 222 220 EXPORT_SYMBOL(rdma_addr_size_kss); 223 221 224 - void rdma_copy_addr(struct rdma_dev_addr *dev_addr, 225 - const struct net_device *dev, 226 - const unsigned char *dst_dev_addr) 222 + /** 223 + * rdma_copy_src_l2_addr - Copy netdevice source addresses 224 + * @dev_addr: Destination address pointer where to copy the addresses 225 + * @dev: Netdevice whose source addresses to copy 226 + * 227 + * rdma_copy_src_l2_addr() copies source addresses from the specified netdevice. 228 + * This includes unicast address, broadcast address, device type and 229 + * interface index. 230 + */ 231 + void rdma_copy_src_l2_addr(struct rdma_dev_addr *dev_addr, 232 + const struct net_device *dev) 227 233 { 228 234 dev_addr->dev_type = dev->type; 229 235 memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN); 230 236 memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN); 231 - if (dst_dev_addr) 232 - memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN); 233 237 dev_addr->bound_dev_if = dev->ifindex; 234 238 } 235 - EXPORT_SYMBOL(rdma_copy_addr); 239 + EXPORT_SYMBOL(rdma_copy_src_l2_addr); 240 + 241 + static struct net_device * 242 + rdma_find_ndev_for_src_ip_rcu(struct net *net, const struct sockaddr *src_in) 243 + { 244 + struct net_device *dev = NULL; 245 + int ret = -EADDRNOTAVAIL; 246 + 247 + switch (src_in->sa_family) { 248 + case AF_INET: 249 + dev = __ip_dev_find(net, 250 + ((const struct sockaddr_in *)src_in)->sin_addr.s_addr, 251 + false); 252 + if (dev) 253 + ret = 0; 254 + break; 255 + #if IS_ENABLED(CONFIG_IPV6) 256 + case AF_INET6: 257 + for_each_netdev_rcu(net, dev) { 258 + if (ipv6_chk_addr(net, 259 + &((const struct sockaddr_in6 *)src_in)->sin6_addr, 260 + dev, 1)) { 261 + ret = 0; 262 + break; 263 + } 264 + } 265 + break; 266 + #endif 267 + } 268 + return ret ? ERR_PTR(ret) : dev; 269 + } 236 270 237 271 int rdma_translate_ip(const struct sockaddr *addr, 238 272 struct rdma_dev_addr *dev_addr) ··· 279 241 dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); 280 242 if (!dev) 281 243 return -ENODEV; 282 - rdma_copy_addr(dev_addr, dev, NULL); 244 + rdma_copy_src_l2_addr(dev_addr, dev); 283 245 dev_put(dev); 284 246 return 0; 285 247 } 286 248 287 - switch (addr->sa_family) { 288 - case AF_INET: 289 - dev = ip_dev_find(dev_addr->net, 290 - ((const struct sockaddr_in *)addr)->sin_addr.s_addr); 291 - 292 - if (!dev) 293 - return -EADDRNOTAVAIL; 294 - 295 - rdma_copy_addr(dev_addr, dev, NULL); 296 - dev_put(dev); 297 - break; 298 - #if IS_ENABLED(CONFIG_IPV6) 299 - case AF_INET6: 300 - rcu_read_lock(); 301 - for_each_netdev_rcu(dev_addr->net, dev) { 302 - if (ipv6_chk_addr(dev_addr->net, 303 - &((const struct sockaddr_in6 *)addr)->sin6_addr, 304 - dev, 1)) { 305 - rdma_copy_addr(dev_addr, dev, NULL); 306 - break; 307 - } 308 - } 309 - rcu_read_unlock(); 310 - break; 311 - #endif 312 - } 313 - return 0; 249 + rcu_read_lock(); 250 + dev = rdma_find_ndev_for_src_ip_rcu(dev_addr->net, addr); 251 + if (!IS_ERR(dev)) 252 + rdma_copy_src_l2_addr(dev_addr, dev); 253 + rcu_read_unlock(); 254 + return PTR_ERR_OR_ZERO(dev); 314 255 } 315 256 EXPORT_SYMBOL(rdma_translate_ip); 316 257 ··· 312 295 spin_unlock_bh(&lock); 313 296 } 314 297 315 - static int ib_nl_fetch_ha(const struct dst_entry *dst, 316 - struct rdma_dev_addr *dev_addr, 298 + static int ib_nl_fetch_ha(struct rdma_dev_addr *dev_addr, 317 299 const void *daddr, u32 seq, u16 family) 318 300 { 319 - if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) 301 + if (!rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) 320 302 return -EADDRNOTAVAIL; 321 303 322 - /* We fill in what we can, the response will fill the rest */ 323 - rdma_copy_addr(dev_addr, dst->dev, NULL); 324 304 return ib_nl_ip_send_msg(dev_addr, daddr, seq, family); 325 305 } 326 306 ··· 336 322 neigh_event_send(n, NULL); 337 323 ret = -ENODATA; 338 324 } else { 339 - rdma_copy_addr(dev_addr, dst->dev, n->ha); 325 + memcpy(dev_addr->dst_dev_addr, n->ha, MAX_ADDR_LEN); 340 326 } 341 327 342 328 neigh_release(n); ··· 370 356 (const void *)&dst_in6->sin6_addr; 371 357 sa_family_t family = dst_in->sa_family; 372 358 373 - /* Gateway + ARPHRD_INFINIBAND -> IB router */ 374 - if (has_gateway(dst, family) && dst->dev->type == ARPHRD_INFINIBAND) 375 - return ib_nl_fetch_ha(dst, dev_addr, daddr, seq, family); 359 + /* If we have a gateway in IB mode then it must be an IB network */ 360 + if (has_gateway(dst, family) && dev_addr->network == RDMA_NETWORK_IB) 361 + return ib_nl_fetch_ha(dev_addr, daddr, seq, family); 376 362 else 377 363 return dst_fetch_ha(dst, dev_addr, daddr); 378 364 } 379 365 380 - static int addr4_resolve(struct sockaddr_in *src_in, 381 - const struct sockaddr_in *dst_in, 366 + static int addr4_resolve(struct sockaddr *src_sock, 367 + const struct sockaddr *dst_sock, 382 368 struct rdma_dev_addr *addr, 383 369 struct rtable **prt) 384 370 { 371 + struct sockaddr_in *src_in = (struct sockaddr_in *)src_sock; 372 + const struct sockaddr_in *dst_in = 373 + (const struct sockaddr_in *)dst_sock; 374 + 385 375 __be32 src_ip = src_in->sin_addr.s_addr; 386 376 __be32 dst_ip = dst_in->sin_addr.s_addr; 387 377 struct rtable *rt; ··· 401 383 if (ret) 402 384 return ret; 403 385 404 - src_in->sin_family = AF_INET; 405 386 src_in->sin_addr.s_addr = fl4.saddr; 406 - 407 - /* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're 408 - * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network 409 - * type accordingly. 410 - */ 411 - if (rt->rt_uses_gateway && rt->dst.dev->type != ARPHRD_INFINIBAND) 412 - addr->network = RDMA_NETWORK_IPV4; 413 387 414 388 addr->hoplimit = ip4_dst_hoplimit(&rt->dst); 415 389 ··· 410 400 } 411 401 412 402 #if IS_ENABLED(CONFIG_IPV6) 413 - static int addr6_resolve(struct sockaddr_in6 *src_in, 414 - const struct sockaddr_in6 *dst_in, 403 + static int addr6_resolve(struct sockaddr *src_sock, 404 + const struct sockaddr *dst_sock, 415 405 struct rdma_dev_addr *addr, 416 406 struct dst_entry **pdst) 417 407 { 408 + struct sockaddr_in6 *src_in = (struct sockaddr_in6 *)src_sock; 409 + const struct sockaddr_in6 *dst_in = 410 + (const struct sockaddr_in6 *)dst_sock; 418 411 struct flowi6 fl6; 419 412 struct dst_entry *dst; 420 - struct rt6_info *rt; 421 413 int ret; 422 414 423 415 memset(&fl6, 0, sizeof fl6); ··· 431 419 if (ret < 0) 432 420 return ret; 433 421 434 - rt = (struct rt6_info *)dst; 435 - if (ipv6_addr_any(&src_in->sin6_addr)) { 436 - src_in->sin6_family = AF_INET6; 422 + if (ipv6_addr_any(&src_in->sin6_addr)) 437 423 src_in->sin6_addr = fl6.saddr; 438 - } 439 - 440 - /* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're 441 - * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network 442 - * type accordingly. 443 - */ 444 - if (rt->rt6i_flags & RTF_GATEWAY && 445 - ip6_dst_idev(dst)->dev->type != ARPHRD_INFINIBAND) 446 - addr->network = RDMA_NETWORK_IPV6; 447 424 448 425 addr->hoplimit = ip6_dst_hoplimit(dst); 449 426 ··· 440 439 return 0; 441 440 } 442 441 #else 443 - static int addr6_resolve(struct sockaddr_in6 *src_in, 444 - const struct sockaddr_in6 *dst_in, 442 + static int addr6_resolve(struct sockaddr *src_sock, 443 + const struct sockaddr *dst_sock, 445 444 struct rdma_dev_addr *addr, 446 445 struct dst_entry **pdst) 447 446 { ··· 452 451 static int addr_resolve_neigh(const struct dst_entry *dst, 453 452 const struct sockaddr *dst_in, 454 453 struct rdma_dev_addr *addr, 454 + unsigned int ndev_flags, 455 455 u32 seq) 456 456 { 457 - if (dst->dev->flags & IFF_LOOPBACK) { 458 - int ret; 457 + int ret = 0; 459 458 460 - ret = rdma_translate_ip(dst_in, addr); 461 - if (!ret) 462 - memcpy(addr->dst_dev_addr, addr->src_dev_addr, 463 - MAX_ADDR_LEN); 459 + if (ndev_flags & IFF_LOOPBACK) { 460 + memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); 461 + } else { 462 + if (!(ndev_flags & IFF_NOARP)) { 463 + /* If the device doesn't do ARP internally */ 464 + ret = fetch_ha(dst, addr, dst_in, seq); 465 + } 466 + } 467 + return ret; 468 + } 464 469 465 - return ret; 470 + static int copy_src_l2_addr(struct rdma_dev_addr *dev_addr, 471 + const struct sockaddr *dst_in, 472 + const struct dst_entry *dst, 473 + const struct net_device *ndev) 474 + { 475 + int ret = 0; 476 + 477 + if (dst->dev->flags & IFF_LOOPBACK) 478 + ret = rdma_translate_ip(dst_in, dev_addr); 479 + else 480 + rdma_copy_src_l2_addr(dev_addr, dst->dev); 481 + 482 + /* 483 + * If there's a gateway and type of device not ARPHRD_INFINIBAND, 484 + * we're definitely in RoCE v2 (as RoCE v1 isn't routable) set the 485 + * network type accordingly. 486 + */ 487 + if (has_gateway(dst, dst_in->sa_family) && 488 + ndev->type != ARPHRD_INFINIBAND) 489 + dev_addr->network = dst_in->sa_family == AF_INET ? 490 + RDMA_NETWORK_IPV4 : 491 + RDMA_NETWORK_IPV6; 492 + else 493 + dev_addr->network = RDMA_NETWORK_IB; 494 + 495 + return ret; 496 + } 497 + 498 + static int rdma_set_src_addr_rcu(struct rdma_dev_addr *dev_addr, 499 + unsigned int *ndev_flags, 500 + const struct sockaddr *dst_in, 501 + const struct dst_entry *dst) 502 + { 503 + struct net_device *ndev = READ_ONCE(dst->dev); 504 + 505 + *ndev_flags = ndev->flags; 506 + /* A physical device must be the RDMA device to use */ 507 + if (ndev->flags & IFF_LOOPBACK) { 508 + /* 509 + * RDMA (IB/RoCE, iWarp) doesn't run on lo interface or 510 + * loopback IP address. So if route is resolved to loopback 511 + * interface, translate that to a real ndev based on non 512 + * loopback IP address. 513 + */ 514 + ndev = rdma_find_ndev_for_src_ip_rcu(dev_net(ndev), dst_in); 515 + if (IS_ERR(ndev)) 516 + return -ENODEV; 466 517 } 467 518 468 - /* If the device doesn't do ARP internally */ 469 - if (!(dst->dev->flags & IFF_NOARP)) 470 - return fetch_ha(dst, addr, dst_in, seq); 519 + return copy_src_l2_addr(dev_addr, dst_in, dst, ndev); 520 + } 471 521 472 - rdma_copy_addr(addr, dst->dev, NULL); 522 + static int set_addr_netns_by_gid_rcu(struct rdma_dev_addr *addr) 523 + { 524 + struct net_device *ndev; 473 525 526 + ndev = rdma_read_gid_attr_ndev_rcu(addr->sgid_attr); 527 + if (IS_ERR(ndev)) 528 + return PTR_ERR(ndev); 529 + 530 + /* 531 + * Since we are holding the rcu, reading net and ifindex 532 + * are safe without any additional reference; because 533 + * change_net_namespace() in net/core/dev.c does rcu sync 534 + * after it changes the state to IFF_DOWN and before 535 + * updating netdev fields {net, ifindex}. 536 + */ 537 + addr->net = dev_net(ndev); 538 + addr->bound_dev_if = ndev->ifindex; 474 539 return 0; 540 + } 541 + 542 + static void rdma_addr_set_net_defaults(struct rdma_dev_addr *addr) 543 + { 544 + addr->net = &init_net; 545 + addr->bound_dev_if = 0; 475 546 } 476 547 477 548 static int addr_resolve(struct sockaddr *src_in, 478 549 const struct sockaddr *dst_in, 479 550 struct rdma_dev_addr *addr, 480 551 bool resolve_neigh, 552 + bool resolve_by_gid_attr, 481 553 u32 seq) 482 554 { 483 - struct net_device *ndev; 484 - struct dst_entry *dst; 555 + struct dst_entry *dst = NULL; 556 + unsigned int ndev_flags = 0; 557 + struct rtable *rt = NULL; 485 558 int ret; 486 559 487 560 if (!addr->net) { ··· 563 488 return -EINVAL; 564 489 } 565 490 491 + rcu_read_lock(); 492 + if (resolve_by_gid_attr) { 493 + if (!addr->sgid_attr) { 494 + rcu_read_unlock(); 495 + pr_warn_ratelimited("%s: missing gid_attr\n", __func__); 496 + return -EINVAL; 497 + } 498 + /* 499 + * If the request is for a specific gid attribute of the 500 + * rdma_dev_addr, derive net from the netdevice of the 501 + * GID attribute. 502 + */ 503 + ret = set_addr_netns_by_gid_rcu(addr); 504 + if (ret) { 505 + rcu_read_unlock(); 506 + return ret; 507 + } 508 + } 566 509 if (src_in->sa_family == AF_INET) { 567 - struct rtable *rt = NULL; 568 - const struct sockaddr_in *dst_in4 = 569 - (const struct sockaddr_in *)dst_in; 570 - 571 - ret = addr4_resolve((struct sockaddr_in *)src_in, 572 - dst_in4, addr, &rt); 573 - if (ret) 574 - return ret; 575 - 576 - if (resolve_neigh) 577 - ret = addr_resolve_neigh(&rt->dst, dst_in, addr, seq); 578 - 579 - if (addr->bound_dev_if) { 580 - ndev = dev_get_by_index(addr->net, addr->bound_dev_if); 581 - } else { 582 - ndev = rt->dst.dev; 583 - dev_hold(ndev); 584 - } 585 - 586 - ip_rt_put(rt); 510 + ret = addr4_resolve(src_in, dst_in, addr, &rt); 511 + dst = &rt->dst; 587 512 } else { 588 - const struct sockaddr_in6 *dst_in6 = 589 - (const struct sockaddr_in6 *)dst_in; 513 + ret = addr6_resolve(src_in, dst_in, addr, &dst); 514 + } 515 + if (ret) { 516 + rcu_read_unlock(); 517 + goto done; 518 + } 519 + ret = rdma_set_src_addr_rcu(addr, &ndev_flags, dst_in, dst); 520 + rcu_read_unlock(); 590 521 591 - ret = addr6_resolve((struct sockaddr_in6 *)src_in, 592 - dst_in6, addr, 593 - &dst); 594 - if (ret) 595 - return ret; 522 + /* 523 + * Resolve neighbor destination address if requested and 524 + * only if src addr translation didn't fail. 525 + */ 526 + if (!ret && resolve_neigh) 527 + ret = addr_resolve_neigh(dst, dst_in, addr, ndev_flags, seq); 596 528 597 - if (resolve_neigh) 598 - ret = addr_resolve_neigh(dst, dst_in, addr, seq); 599 - 600 - if (addr->bound_dev_if) { 601 - ndev = dev_get_by_index(addr->net, addr->bound_dev_if); 602 - } else { 603 - ndev = dst->dev; 604 - dev_hold(ndev); 605 - } 606 - 529 + if (src_in->sa_family == AF_INET) 530 + ip_rt_put(rt); 531 + else 607 532 dst_release(dst); 608 - } 609 - 610 - if (ndev) { 611 - if (ndev->flags & IFF_LOOPBACK) 612 - ret = rdma_translate_ip(dst_in, addr); 613 - else 614 - addr->bound_dev_if = ndev->ifindex; 615 - dev_put(ndev); 616 - } 617 - 533 + done: 534 + /* 535 + * Clear the addr net to go back to its original state, only if it was 536 + * derived from GID attribute in this context. 537 + */ 538 + if (resolve_by_gid_attr) 539 + rdma_addr_set_net_defaults(addr); 618 540 return ret; 619 541 } 620 542 ··· 626 554 src_in = (struct sockaddr *)&req->src_addr; 627 555 dst_in = (struct sockaddr *)&req->dst_addr; 628 556 req->status = addr_resolve(src_in, dst_in, req->addr, 629 - true, req->seq); 557 + true, req->resolve_by_gid_attr, 558 + req->seq); 630 559 if (req->status && time_after_eq(jiffies, req->timeout)) { 631 560 req->status = -ETIMEDOUT; 632 561 } else if (req->status == -ENODATA) { ··· 659 586 } 660 587 661 588 int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr, 662 - struct rdma_dev_addr *addr, int timeout_ms, 589 + struct rdma_dev_addr *addr, unsigned long timeout_ms, 663 590 void (*callback)(int status, struct sockaddr *src_addr, 664 591 struct rdma_dev_addr *addr, void *context), 665 - void *context) 592 + bool resolve_by_gid_attr, void *context) 666 593 { 667 594 struct sockaddr *src_in, *dst_in; 668 595 struct addr_req *req; ··· 690 617 req->addr = addr; 691 618 req->callback = callback; 692 619 req->context = context; 620 + req->resolve_by_gid_attr = resolve_by_gid_attr; 693 621 INIT_DELAYED_WORK(&req->work, process_one_req); 694 622 req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq); 695 623 696 - req->status = addr_resolve(src_in, dst_in, addr, true, req->seq); 624 + req->status = addr_resolve(src_in, dst_in, addr, true, 625 + req->resolve_by_gid_attr, req->seq); 697 626 switch (req->status) { 698 627 case 0: 699 628 req->timeout = jiffies; ··· 716 641 } 717 642 EXPORT_SYMBOL(rdma_resolve_ip); 718 643 719 - int rdma_resolve_ip_route(struct sockaddr *src_addr, 720 - const struct sockaddr *dst_addr, 721 - struct rdma_dev_addr *addr) 644 + int roce_resolve_route_from_path(struct sa_path_rec *rec, 645 + const struct ib_gid_attr *attr) 722 646 { 723 - struct sockaddr_storage ssrc_addr = {}; 724 - struct sockaddr *src_in = (struct sockaddr *)&ssrc_addr; 647 + union { 648 + struct sockaddr _sockaddr; 649 + struct sockaddr_in _sockaddr_in; 650 + struct sockaddr_in6 _sockaddr_in6; 651 + } sgid, dgid; 652 + struct rdma_dev_addr dev_addr = {}; 653 + int ret; 725 654 726 - if (src_addr) { 727 - if (src_addr->sa_family != dst_addr->sa_family) 728 - return -EINVAL; 655 + if (rec->roce.route_resolved) 656 + return 0; 729 657 730 - memcpy(src_in, src_addr, rdma_addr_size(src_addr)); 731 - } else { 732 - src_in->sa_family = dst_addr->sa_family; 733 - } 658 + rdma_gid2ip(&sgid._sockaddr, &rec->sgid); 659 + rdma_gid2ip(&dgid._sockaddr, &rec->dgid); 734 660 735 - return addr_resolve(src_in, dst_addr, addr, false, 0); 661 + if (sgid._sockaddr.sa_family != dgid._sockaddr.sa_family) 662 + return -EINVAL; 663 + 664 + if (!attr || !attr->ndev) 665 + return -EINVAL; 666 + 667 + dev_addr.net = &init_net; 668 + dev_addr.sgid_attr = attr; 669 + 670 + ret = addr_resolve(&sgid._sockaddr, &dgid._sockaddr, 671 + &dev_addr, false, true, 0); 672 + if (ret) 673 + return ret; 674 + 675 + if ((dev_addr.network == RDMA_NETWORK_IPV4 || 676 + dev_addr.network == RDMA_NETWORK_IPV6) && 677 + rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2) 678 + return -EINVAL; 679 + 680 + rec->roce.route_resolved = true; 681 + return 0; 736 682 } 737 683 684 + /** 685 + * rdma_addr_cancel - Cancel resolve ip request 686 + * @addr: Pointer to address structure given previously 687 + * during rdma_resolve_ip(). 688 + * rdma_addr_cancel() is synchronous function which cancels any pending 689 + * request if there is any. 690 + */ 738 691 void rdma_addr_cancel(struct rdma_dev_addr *addr) 739 692 { 740 693 struct addr_req *req, *temp_req; ··· 790 687 * guarentees no work is running and none will be started. 791 688 */ 792 689 cancel_delayed_work_sync(&found->work); 793 - 794 - if (found->callback) 795 - found->callback(-ECANCELED, (struct sockaddr *)&found->src_addr, 796 - found->addr, found->context); 797 - 798 690 kfree(found); 799 691 } 800 692 EXPORT_SYMBOL(rdma_addr_cancel); ··· 808 710 809 711 int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, 810 712 const union ib_gid *dgid, 811 - u8 *dmac, const struct net_device *ndev, 713 + u8 *dmac, const struct ib_gid_attr *sgid_attr, 812 714 int *hoplimit) 813 715 { 814 716 struct rdma_dev_addr dev_addr; ··· 824 726 rdma_gid2ip(&dgid_addr._sockaddr, dgid); 825 727 826 728 memset(&dev_addr, 0, sizeof(dev_addr)); 827 - dev_addr.bound_dev_if = ndev->ifindex; 828 729 dev_addr.net = &init_net; 730 + dev_addr.sgid_attr = sgid_attr; 829 731 830 732 init_completion(&ctx.comp); 831 733 ret = rdma_resolve_ip(&sgid_addr._sockaddr, &dgid_addr._sockaddr, 832 - &dev_addr, 1000, resolve_cb, &ctx); 734 + &dev_addr, 1000, resolve_cb, true, &ctx); 833 735 if (ret) 834 736 return ret; 835 737
+55 -24
drivers/infiniband/core/cache.c
··· 212 212 u8 port_num = entry->attr.port_num; 213 213 struct ib_gid_table *table = rdma_gid_table(device, port_num); 214 214 215 - pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, 216 - device->name, port_num, entry->attr.index, 217 - entry->attr.gid.raw); 215 + dev_dbg(&device->dev, "%s port=%d index=%d gid %pI6\n", __func__, 216 + port_num, entry->attr.index, entry->attr.gid.raw); 218 217 219 218 if (rdma_cap_roce_gid_table(device, port_num) && 220 219 entry->state != GID_TABLE_ENTRY_INVALID) ··· 288 289 { 289 290 entry->state = GID_TABLE_ENTRY_VALID; 290 291 291 - pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, 292 - entry->attr.device->name, entry->attr.port_num, 293 - entry->attr.index, entry->attr.gid.raw); 292 + dev_dbg(&entry->attr.device->dev, "%s port=%d index=%d gid %pI6\n", 293 + __func__, entry->attr.port_num, entry->attr.index, 294 + entry->attr.gid.raw); 294 295 295 296 lockdep_assert_held(&table->lock); 296 297 write_lock_irq(&table->rwlock); ··· 319 320 int ret; 320 321 321 322 if (!attr->ndev) { 322 - pr_err("%s NULL netdev device=%s port=%d index=%d\n", 323 - __func__, attr->device->name, attr->port_num, 324 - attr->index); 323 + dev_err(&attr->device->dev, "%s NULL netdev port=%d index=%d\n", 324 + __func__, attr->port_num, attr->index); 325 325 return -EINVAL; 326 326 } 327 327 if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) { 328 328 ret = attr->device->add_gid(attr, &entry->context); 329 329 if (ret) { 330 - pr_err("%s GID add failed device=%s port=%d index=%d\n", 331 - __func__, attr->device->name, attr->port_num, 332 - attr->index); 330 + dev_err(&attr->device->dev, 331 + "%s GID add failed port=%d index=%d\n", 332 + __func__, attr->port_num, attr->index); 333 333 return ret; 334 334 } 335 335 } ··· 351 353 352 354 lockdep_assert_held(&table->lock); 353 355 354 - pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, 355 - ib_dev->name, port, ix, 356 - table->data_vec[ix]->attr.gid.raw); 356 + dev_dbg(&ib_dev->dev, "%s port=%d index=%d gid %pI6\n", __func__, port, 357 + ix, table->data_vec[ix]->attr.gid.raw); 357 358 358 359 write_lock_irq(&table->rwlock); 359 360 entry = table->data_vec[ix]; ··· 779 782 if (is_gid_entry_free(table->data_vec[i])) 780 783 continue; 781 784 if (kref_read(&table->data_vec[i]->kref) > 1) { 782 - pr_err("GID entry ref leak for %s (index %d) ref=%d\n", 783 - device->name, i, 784 - kref_read(&table->data_vec[i]->kref)); 785 + dev_err(&device->dev, 786 + "GID entry ref leak for index %d ref=%d\n", i, 787 + kref_read(&table->data_vec[i]->kref)); 785 788 leak = true; 786 789 } 787 790 } ··· 1249 1252 } 1250 1253 EXPORT_SYMBOL(rdma_hold_gid_attr); 1251 1254 1255 + /** 1256 + * rdma_read_gid_attr_ndev_rcu - Read GID attribute netdevice 1257 + * which must be in UP state. 1258 + * 1259 + * @attr:Pointer to the GID attribute 1260 + * 1261 + * Returns pointer to netdevice if the netdevice was attached to GID and 1262 + * netdevice is in UP state. Caller must hold RCU lock as this API 1263 + * reads the netdev flags which can change while netdevice migrates to 1264 + * different net namespace. Returns ERR_PTR with error code otherwise. 1265 + * 1266 + */ 1267 + struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr) 1268 + { 1269 + struct ib_gid_table_entry *entry = 1270 + container_of(attr, struct ib_gid_table_entry, attr); 1271 + struct ib_device *device = entry->attr.device; 1272 + struct net_device *ndev = ERR_PTR(-ENODEV); 1273 + u8 port_num = entry->attr.port_num; 1274 + struct ib_gid_table *table; 1275 + unsigned long flags; 1276 + bool valid; 1277 + 1278 + table = rdma_gid_table(device, port_num); 1279 + 1280 + read_lock_irqsave(&table->rwlock, flags); 1281 + valid = is_gid_entry_valid(table->data_vec[attr->index]); 1282 + if (valid && attr->ndev && (READ_ONCE(attr->ndev->flags) & IFF_UP)) 1283 + ndev = attr->ndev; 1284 + read_unlock_irqrestore(&table->rwlock, flags); 1285 + return ndev; 1286 + } 1287 + 1252 1288 static int config_non_roce_gid_cache(struct ib_device *device, 1253 1289 u8 port, int gid_tbl_len) 1254 1290 { ··· 1300 1270 continue; 1301 1271 ret = device->query_gid(device, port, i, &gid_attr.gid); 1302 1272 if (ret) { 1303 - pr_warn("query_gid failed (%d) for %s (index %d)\n", 1304 - ret, device->name, i); 1273 + dev_warn(&device->dev, 1274 + "query_gid failed (%d) for index %d\n", ret, 1275 + i); 1305 1276 goto err; 1306 1277 } 1307 1278 gid_attr.index = i; ··· 1331 1300 1332 1301 ret = ib_query_port(device, port, tprops); 1333 1302 if (ret) { 1334 - pr_warn("ib_query_port failed (%d) for %s\n", 1335 - ret, device->name); 1303 + dev_warn(&device->dev, "ib_query_port failed (%d)\n", ret); 1336 1304 goto err; 1337 1305 } 1338 1306 ··· 1353 1323 for (i = 0; i < pkey_cache->table_len; ++i) { 1354 1324 ret = ib_query_pkey(device, port, i, pkey_cache->table + i); 1355 1325 if (ret) { 1356 - pr_warn("ib_query_pkey failed (%d) for %s (index %d)\n", 1357 - ret, device->name, i); 1326 + dev_warn(&device->dev, 1327 + "ib_query_pkey failed (%d) for index %d\n", 1328 + ret, i); 1358 1329 goto err; 1359 1330 } 1360 1331 }
+6 -3
drivers/infiniband/core/cm.c
··· 3292 3292 if (ret) 3293 3293 goto unlock; 3294 3294 3295 - cm_init_av_by_path(param->alternate_path, NULL, &cm_id_priv->alt_av, 3296 - cm_id_priv); 3295 + ret = cm_init_av_by_path(param->alternate_path, NULL, 3296 + &cm_id_priv->alt_av, cm_id_priv); 3297 + if (ret) 3298 + goto unlock; 3299 + 3297 3300 cm_id_priv->id.lap_state = IB_CM_LAP_RCVD; 3298 3301 cm_id_priv->tid = lap_msg->hdr.tid; 3299 3302 ret = atomic_inc_and_test(&cm_id_priv->work_count); ··· 4370 4367 cm_dev->going_down = 0; 4371 4368 cm_dev->device = device_create(&cm_class, &ib_device->dev, 4372 4369 MKDEV(0, 0), NULL, 4373 - "%s", ib_device->name); 4370 + "%s", dev_name(&ib_device->dev)); 4374 4371 if (IS_ERR(cm_dev->device)) { 4375 4372 kfree(cm_dev); 4376 4373 return;
+180 -77
drivers/infiniband/core/cma.c
··· 639 639 id_priv->id.route.addr.dev_addr.sgid_attr = sgid_attr; 640 640 } 641 641 642 - static int cma_acquire_dev(struct rdma_id_private *id_priv, 643 - const struct rdma_id_private *listen_id_priv) 642 + /** 643 + * cma_acquire_dev_by_src_ip - Acquire cma device, port, gid attribute 644 + * based on source ip address. 645 + * @id_priv: cm_id which should be bound to cma device 646 + * 647 + * cma_acquire_dev_by_src_ip() binds cm id to cma device, port and GID attribute 648 + * based on source IP address. It returns 0 on success or error code otherwise. 649 + * It is applicable to active and passive side cm_id. 650 + */ 651 + static int cma_acquire_dev_by_src_ip(struct rdma_id_private *id_priv) 644 652 { 645 653 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 646 654 const struct ib_gid_attr *sgid_attr; 647 - struct cma_device *cma_dev; 648 655 union ib_gid gid, iboe_gid, *gidp; 656 + struct cma_device *cma_dev; 649 657 enum ib_gid_type gid_type; 650 658 int ret = -ENODEV; 651 659 u8 port; ··· 662 654 id_priv->id.ps == RDMA_PS_IPOIB) 663 655 return -EINVAL; 664 656 665 - mutex_lock(&lock); 666 657 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 667 658 &iboe_gid); 668 659 669 660 memcpy(&gid, dev_addr->src_dev_addr + 670 - rdma_addr_gid_offset(dev_addr), sizeof gid); 661 + rdma_addr_gid_offset(dev_addr), sizeof(gid)); 671 662 672 - if (listen_id_priv) { 673 - cma_dev = listen_id_priv->cma_dev; 674 - port = listen_id_priv->id.port_num; 675 - gidp = rdma_protocol_roce(cma_dev->device, port) ? 676 - &iboe_gid : &gid; 677 - gid_type = listen_id_priv->gid_type; 678 - sgid_attr = cma_validate_port(cma_dev->device, port, 679 - gid_type, gidp, id_priv); 680 - if (!IS_ERR(sgid_attr)) { 681 - id_priv->id.port_num = port; 682 - cma_bind_sgid_attr(id_priv, sgid_attr); 683 - ret = 0; 684 - goto out; 685 - } 686 - } 687 - 663 + mutex_lock(&lock); 688 664 list_for_each_entry(cma_dev, &dev_list, list) { 689 - for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) { 690 - if (listen_id_priv && 691 - listen_id_priv->cma_dev == cma_dev && 692 - listen_id_priv->id.port_num == port) 693 - continue; 694 - 665 + for (port = rdma_start_port(cma_dev->device); 666 + port <= rdma_end_port(cma_dev->device); port++) { 695 667 gidp = rdma_protocol_roce(cma_dev->device, port) ? 696 668 &iboe_gid : &gid; 697 669 gid_type = cma_dev->default_gid_type[port - 1]; 698 670 sgid_attr = cma_validate_port(cma_dev->device, port, 699 671 gid_type, gidp, id_priv); 672 + if (!IS_ERR(sgid_attr)) { 673 + id_priv->id.port_num = port; 674 + cma_bind_sgid_attr(id_priv, sgid_attr); 675 + cma_attach_to_dev(id_priv, cma_dev); 676 + ret = 0; 677 + goto out; 678 + } 679 + } 680 + } 681 + out: 682 + mutex_unlock(&lock); 683 + return ret; 684 + } 685 + 686 + /** 687 + * cma_ib_acquire_dev - Acquire cma device, port and SGID attribute 688 + * @id_priv: cm id to bind to cma device 689 + * @listen_id_priv: listener cm id to match against 690 + * @req: Pointer to req structure containaining incoming 691 + * request information 692 + * cma_ib_acquire_dev() acquires cma device, port and SGID attribute when 693 + * rdma device matches for listen_id and incoming request. It also verifies 694 + * that a GID table entry is present for the source address. 695 + * Returns 0 on success, or returns error code otherwise. 696 + */ 697 + static int cma_ib_acquire_dev(struct rdma_id_private *id_priv, 698 + const struct rdma_id_private *listen_id_priv, 699 + struct cma_req_info *req) 700 + { 701 + struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 702 + const struct ib_gid_attr *sgid_attr; 703 + enum ib_gid_type gid_type; 704 + union ib_gid gid; 705 + 706 + if (dev_addr->dev_type != ARPHRD_INFINIBAND && 707 + id_priv->id.ps == RDMA_PS_IPOIB) 708 + return -EINVAL; 709 + 710 + if (rdma_protocol_roce(req->device, req->port)) 711 + rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 712 + &gid); 713 + else 714 + memcpy(&gid, dev_addr->src_dev_addr + 715 + rdma_addr_gid_offset(dev_addr), sizeof(gid)); 716 + 717 + gid_type = listen_id_priv->cma_dev->default_gid_type[req->port - 1]; 718 + sgid_attr = cma_validate_port(req->device, req->port, 719 + gid_type, &gid, id_priv); 720 + if (IS_ERR(sgid_attr)) 721 + return PTR_ERR(sgid_attr); 722 + 723 + id_priv->id.port_num = req->port; 724 + cma_bind_sgid_attr(id_priv, sgid_attr); 725 + /* Need to acquire lock to protect against reader 726 + * of cma_dev->id_list such as cma_netdev_callback() and 727 + * cma_process_remove(). 728 + */ 729 + mutex_lock(&lock); 730 + cma_attach_to_dev(id_priv, listen_id_priv->cma_dev); 731 + mutex_unlock(&lock); 732 + return 0; 733 + } 734 + 735 + static int cma_iw_acquire_dev(struct rdma_id_private *id_priv, 736 + const struct rdma_id_private *listen_id_priv) 737 + { 738 + struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 739 + const struct ib_gid_attr *sgid_attr; 740 + struct cma_device *cma_dev; 741 + enum ib_gid_type gid_type; 742 + int ret = -ENODEV; 743 + union ib_gid gid; 744 + u8 port; 745 + 746 + if (dev_addr->dev_type != ARPHRD_INFINIBAND && 747 + id_priv->id.ps == RDMA_PS_IPOIB) 748 + return -EINVAL; 749 + 750 + memcpy(&gid, dev_addr->src_dev_addr + 751 + rdma_addr_gid_offset(dev_addr), sizeof(gid)); 752 + 753 + mutex_lock(&lock); 754 + 755 + cma_dev = listen_id_priv->cma_dev; 756 + port = listen_id_priv->id.port_num; 757 + gid_type = listen_id_priv->gid_type; 758 + sgid_attr = cma_validate_port(cma_dev->device, port, 759 + gid_type, &gid, id_priv); 760 + if (!IS_ERR(sgid_attr)) { 761 + id_priv->id.port_num = port; 762 + cma_bind_sgid_attr(id_priv, sgid_attr); 763 + ret = 0; 764 + goto out; 765 + } 766 + 767 + list_for_each_entry(cma_dev, &dev_list, list) { 768 + for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) { 769 + if (listen_id_priv->cma_dev == cma_dev && 770 + listen_id_priv->id.port_num == port) 771 + continue; 772 + 773 + gid_type = cma_dev->default_gid_type[port - 1]; 774 + sgid_attr = cma_validate_port(cma_dev->device, port, 775 + gid_type, &gid, id_priv); 700 776 if (!IS_ERR(sgid_attr)) { 701 777 id_priv->id.port_num = port; 702 778 cma_bind_sgid_attr(id_priv, sgid_attr); ··· 877 785 if (!id_priv) 878 786 return ERR_PTR(-ENOMEM); 879 787 880 - if (caller) 881 - id_priv->res.kern_name = caller; 882 - else 883 - rdma_restrack_set_task(&id_priv->res, current); 788 + rdma_restrack_set_task(&id_priv->res, caller); 884 789 id_priv->res.type = RDMA_RESTRACK_CM_ID; 885 790 id_priv->state = RDMA_CM_IDLE; 886 791 id_priv->id.context = context; ··· 1551 1462 return rdma_protocol_roce(device, port_num); 1552 1463 } 1553 1464 1465 + static bool cma_is_req_ipv6_ll(const struct cma_req_info *req) 1466 + { 1467 + const struct sockaddr *daddr = 1468 + (const struct sockaddr *)&req->listen_addr_storage; 1469 + const struct sockaddr_in6 *daddr6 = (const struct sockaddr_in6 *)daddr; 1470 + 1471 + /* Returns true if the req is for IPv6 link local */ 1472 + return (daddr->sa_family == AF_INET6 && 1473 + (ipv6_addr_type(&daddr6->sin6_addr) & IPV6_ADDR_LINKLOCAL)); 1474 + } 1475 + 1554 1476 static bool cma_match_net_dev(const struct rdma_cm_id *id, 1555 1477 const struct net_device *net_dev, 1556 - u8 port_num) 1478 + const struct cma_req_info *req) 1557 1479 { 1558 1480 const struct rdma_addr *addr = &id->route.addr; 1559 1481 1560 1482 if (!net_dev) 1561 1483 /* This request is an AF_IB request */ 1562 - return (!id->port_num || id->port_num == port_num) && 1484 + return (!id->port_num || id->port_num == req->port) && 1563 1485 (addr->src_addr.ss_family == AF_IB); 1564 1486 1487 + /* 1488 + * If the request is not for IPv6 link local, allow matching 1489 + * request to any netdevice of the one or multiport rdma device. 1490 + */ 1491 + if (!cma_is_req_ipv6_ll(req)) 1492 + return true; 1565 1493 /* 1566 1494 * Net namespaces must match, and if the listner is listening 1567 1495 * on a specific netdevice than netdevice must match as well. ··· 1606 1500 hlist_for_each_entry(id_priv, &bind_list->owners, node) { 1607 1501 if (cma_match_private_data(id_priv, ib_event->private_data)) { 1608 1502 if (id_priv->id.device == cm_id->device && 1609 - cma_match_net_dev(&id_priv->id, net_dev, req->port)) 1503 + cma_match_net_dev(&id_priv->id, net_dev, req)) 1610 1504 return id_priv; 1611 1505 list_for_each_entry(id_priv_dev, 1612 1506 &id_priv->listen_list, 1613 1507 listen_list) { 1614 1508 if (id_priv_dev->id.device == cm_id->device && 1615 - cma_match_net_dev(&id_priv_dev->id, net_dev, req->port)) 1509 + cma_match_net_dev(&id_priv_dev->id, 1510 + net_dev, req)) 1616 1511 return id_priv_dev; 1617 1512 } 1618 1513 } ··· 1625 1518 static struct rdma_id_private * 1626 1519 cma_ib_id_from_event(struct ib_cm_id *cm_id, 1627 1520 const struct ib_cm_event *ib_event, 1521 + struct cma_req_info *req, 1628 1522 struct net_device **net_dev) 1629 1523 { 1630 - struct cma_req_info req; 1631 1524 struct rdma_bind_list *bind_list; 1632 1525 struct rdma_id_private *id_priv; 1633 1526 int err; 1634 1527 1635 - err = cma_save_req_info(ib_event, &req); 1528 + err = cma_save_req_info(ib_event, req); 1636 1529 if (err) 1637 1530 return ERR_PTR(err); 1638 1531 1639 - *net_dev = cma_get_net_dev(ib_event, &req); 1532 + *net_dev = cma_get_net_dev(ib_event, req); 1640 1533 if (IS_ERR(*net_dev)) { 1641 1534 if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) { 1642 1535 /* Assuming the protocol is AF_IB */ ··· 1674 1567 } 1675 1568 1676 1569 if (!validate_net_dev(*net_dev, 1677 - (struct sockaddr *)&req.listen_addr_storage, 1678 - (struct sockaddr *)&req.src_addr_storage)) { 1570 + (struct sockaddr *)&req->listen_addr_storage, 1571 + (struct sockaddr *)&req->src_addr_storage)) { 1679 1572 id_priv = ERR_PTR(-EHOSTUNREACH); 1680 1573 goto err; 1681 1574 } 1682 1575 } 1683 1576 1684 1577 bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net, 1685 - rdma_ps_from_service_id(req.service_id), 1686 - cma_port_from_service_id(req.service_id)); 1687 - id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev); 1578 + rdma_ps_from_service_id(req->service_id), 1579 + cma_port_from_service_id(req->service_id)); 1580 + id_priv = cma_find_listener(bind_list, cm_id, ib_event, req, *net_dev); 1688 1581 err: 1689 1582 rcu_read_unlock(); 1690 1583 if (IS_ERR(id_priv) && *net_dev) { ··· 1817 1710 mutex_lock(&id_priv->handler_mutex); 1818 1711 mutex_unlock(&id_priv->handler_mutex); 1819 1712 1713 + rdma_restrack_del(&id_priv->res); 1820 1714 if (id_priv->cma_dev) { 1821 - rdma_restrack_del(&id_priv->res); 1822 1715 if (rdma_cap_ib_cm(id_priv->id.device, 1)) { 1823 1716 if (id_priv->cm_id.ib) 1824 1717 ib_destroy_cm_id(id_priv->cm_id.ib); ··· 2009 1902 rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; 2010 1903 2011 1904 if (net_dev) { 2012 - rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL); 1905 + rdma_copy_src_l2_addr(&rt->addr.dev_addr, net_dev); 2013 1906 } else { 2014 1907 if (!cma_protocol_roce(listen_id) && 2015 1908 cma_any_addr(cma_src_addr(id_priv))) { ··· 2059 1952 goto err; 2060 1953 2061 1954 if (net_dev) { 2062 - rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL); 1955 + rdma_copy_src_l2_addr(&id->route.addr.dev_addr, net_dev); 2063 1956 } else { 2064 1957 if (!cma_any_addr(cma_src_addr(id_priv))) { 2065 1958 ret = cma_translate_addr(cma_src_addr(id_priv), ··· 2106 1999 { 2107 2000 struct rdma_id_private *listen_id, *conn_id = NULL; 2108 2001 struct rdma_cm_event event = {}; 2002 + struct cma_req_info req = {}; 2109 2003 struct net_device *net_dev; 2110 2004 u8 offset; 2111 2005 int ret; 2112 2006 2113 - listen_id = cma_ib_id_from_event(cm_id, ib_event, &net_dev); 2007 + listen_id = cma_ib_id_from_event(cm_id, ib_event, &req, &net_dev); 2114 2008 if (IS_ERR(listen_id)) 2115 2009 return PTR_ERR(listen_id); 2116 2010 ··· 2144 2036 } 2145 2037 2146 2038 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); 2147 - ret = cma_acquire_dev(conn_id, listen_id); 2039 + ret = cma_ib_acquire_dev(conn_id, listen_id, &req); 2148 2040 if (ret) 2149 2041 goto err2; 2150 2042 ··· 2340 2232 goto out; 2341 2233 } 2342 2234 2343 - ret = cma_acquire_dev(conn_id, listen_id); 2235 + ret = cma_iw_acquire_dev(conn_id, listen_id); 2344 2236 if (ret) { 2345 2237 mutex_unlock(&conn_id->handler_mutex); 2346 2238 rdma_destroy_id(new_cm_id); ··· 2462 2354 2463 2355 ret = rdma_listen(id, id_priv->backlog); 2464 2356 if (ret) 2465 - pr_warn("RDMA CMA: cma_listen_on_dev, error %d, listening on device %s\n", 2466 - ret, cma_dev->device->name); 2357 + dev_warn(&cma_dev->device->dev, 2358 + "RDMA CMA: cma_listen_on_dev, error %d\n", ret); 2467 2359 } 2468 2360 2469 2361 static void cma_listen_on_all(struct rdma_id_private *id_priv) ··· 2510 2402 queue_work(cma_wq, &work->work); 2511 2403 } 2512 2404 2513 - static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, 2514 - struct cma_work *work) 2405 + static int cma_query_ib_route(struct rdma_id_private *id_priv, 2406 + unsigned long timeout_ms, struct cma_work *work) 2515 2407 { 2516 2408 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 2517 2409 struct sa_path_rec path_rec; ··· 2629 2521 work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; 2630 2522 } 2631 2523 2632 - static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms) 2524 + static int cma_resolve_ib_route(struct rdma_id_private *id_priv, 2525 + unsigned long timeout_ms) 2633 2526 { 2634 2527 struct rdma_route *route = &id_priv->id.route; 2635 2528 struct cma_work *work; ··· 2752 2643 } 2753 2644 EXPORT_SYMBOL(rdma_set_ib_path); 2754 2645 2755 - static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms) 2646 + static int cma_resolve_iw_route(struct rdma_id_private *id_priv) 2756 2647 { 2757 2648 struct cma_work *work; 2758 2649 ··· 2853 2744 return ret; 2854 2745 } 2855 2746 2856 - int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) 2747 + int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms) 2857 2748 { 2858 2749 struct rdma_id_private *id_priv; 2859 2750 int ret; ··· 2868 2759 else if (rdma_protocol_roce(id->device, id->port_num)) 2869 2760 ret = cma_resolve_iboe_route(id_priv); 2870 2761 else if (rdma_protocol_iwarp(id->device, id->port_num)) 2871 - ret = cma_resolve_iw_route(id_priv, timeout_ms); 2762 + ret = cma_resolve_iw_route(id_priv); 2872 2763 else 2873 2764 ret = -ENOSYS; 2874 2765 ··· 2971 2862 2972 2863 memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr)); 2973 2864 if (!status && !id_priv->cma_dev) { 2974 - status = cma_acquire_dev(id_priv, NULL); 2865 + status = cma_acquire_dev_by_src_ip(id_priv); 2975 2866 if (status) 2976 2867 pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to acquire device. status %d\n", 2977 2868 status); ··· 2991 2882 if (id_priv->id.event_handler(&id_priv->id, &event)) { 2992 2883 cma_exch(id_priv, RDMA_CM_DESTROYING); 2993 2884 mutex_unlock(&id_priv->handler_mutex); 2994 - cma_deref_id(id_priv); 2995 2885 rdma_destroy_id(&id_priv->id); 2996 2886 return; 2997 2887 } 2998 2888 out: 2999 2889 mutex_unlock(&id_priv->handler_mutex); 3000 - cma_deref_id(id_priv); 3001 2890 } 3002 2891 3003 2892 static int cma_resolve_loopback(struct rdma_id_private *id_priv) ··· 3073 2966 } 3074 2967 3075 2968 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, 3076 - const struct sockaddr *dst_addr, int timeout_ms) 2969 + const struct sockaddr *dst_addr, unsigned long timeout_ms) 3077 2970 { 3078 2971 struct rdma_id_private *id_priv; 3079 2972 int ret; ··· 3092 2985 return -EINVAL; 3093 2986 3094 2987 memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); 3095 - atomic_inc(&id_priv->refcount); 3096 2988 if (cma_any_addr(dst_addr)) { 3097 2989 ret = cma_resolve_loopback(id_priv); 3098 2990 } else { 3099 2991 if (dst_addr->sa_family == AF_IB) { 3100 2992 ret = cma_resolve_ib_addr(id_priv); 3101 2993 } else { 3102 - ret = rdma_resolve_ip(cma_src_addr(id_priv), 3103 - dst_addr, &id->route.addr.dev_addr, 3104 - timeout_ms, addr_handler, id_priv); 2994 + ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr, 2995 + &id->route.addr.dev_addr, 2996 + timeout_ms, addr_handler, 2997 + false, id_priv); 3105 2998 } 3106 2999 } 3107 3000 if (ret) ··· 3110 3003 return 0; 3111 3004 err: 3112 3005 cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); 3113 - cma_deref_id(id_priv); 3114 3006 return ret; 3115 3007 } 3116 3008 EXPORT_SYMBOL(rdma_resolve_addr); ··· 3520 3414 if (ret) 3521 3415 goto err1; 3522 3416 3523 - ret = cma_acquire_dev(id_priv, NULL); 3417 + ret = cma_acquire_dev_by_src_ip(id_priv); 3524 3418 if (ret) 3525 3419 goto err1; 3526 3420 } ··· 3545 3439 3546 3440 return 0; 3547 3441 err2: 3548 - if (id_priv->cma_dev) { 3549 - rdma_restrack_del(&id_priv->res); 3442 + rdma_restrack_del(&id_priv->res); 3443 + if (id_priv->cma_dev) 3550 3444 cma_release_dev(id_priv); 3551 - } 3552 3445 err1: 3553 3446 cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE); 3554 3447 return ret; ··· 3944 3839 3945 3840 id_priv = container_of(id, struct rdma_id_private, id); 3946 3841 3947 - if (caller) 3948 - id_priv->res.kern_name = caller; 3949 - else 3950 - rdma_restrack_set_task(&id_priv->res, current); 3842 + rdma_restrack_set_task(&id_priv->res, caller); 3951 3843 3952 3844 if (!cma_comp(id_priv, RDMA_CM_CONNECT)) 3953 3845 return -EINVAL; ··· 4189 4087 (!ib_sa_sendonly_fullmem_support(&sa_client, 4190 4088 id_priv->id.device, 4191 4089 id_priv->id.port_num))) { 4192 - pr_warn("RDMA CM: %s port %u Unable to multicast join\n" 4193 - "RDMA CM: SM doesn't support Send Only Full Member option\n", 4194 - id_priv->id.device->name, id_priv->id.port_num); 4090 + dev_warn( 4091 + &id_priv->id.device->dev, 4092 + "RDMA CM: port %u Unable to multicast join: SM doesn't support Send Only Full Member option\n", 4093 + id_priv->id.port_num); 4195 4094 return -EOPNOTSUPP; 4196 4095 } 4197 4096
+1 -1
drivers/infiniband/core/cma_configfs.c
··· 65 65 66 66 static bool filter_by_name(struct ib_device *ib_dev, void *cookie) 67 67 { 68 - return !strcmp(ib_dev->name, cookie); 68 + return !strcmp(dev_name(&ib_dev->dev), cookie); 69 69 } 70 70 71 71 static int cma_configfs_params_get(struct config_item *item,
+10 -2
drivers/infiniband/core/core_priv.h
··· 44 44 #include "mad_priv.h" 45 45 46 46 /* Total number of ports combined across all struct ib_devices's */ 47 - #define RDMA_MAX_PORTS 1024 47 + #define RDMA_MAX_PORTS 8192 48 48 49 49 struct pkey_index_qp_list { 50 50 struct list_head pkey_index_list; ··· 87 87 int (*port_callback)(struct ib_device *, 88 88 u8, struct kobject *)); 89 89 void ib_device_unregister_sysfs(struct ib_device *device); 90 + int ib_device_rename(struct ib_device *ibdev, const char *name); 90 91 91 92 typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port, 92 93 struct net_device *idev, void *cookie); ··· 339 338 340 339 int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, 341 340 const union ib_gid *dgid, 342 - u8 *dmac, const struct net_device *ndev, 341 + u8 *dmac, const struct ib_gid_attr *sgid_attr, 343 342 int *hoplimit); 343 + void rdma_copy_src_l2_addr(struct rdma_dev_addr *dev_addr, 344 + const struct net_device *dev); 344 345 346 + struct sa_path_rec; 347 + int roce_resolve_route_from_path(struct sa_path_rec *rec, 348 + const struct ib_gid_attr *attr); 349 + 350 + struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr); 345 351 #endif /* _CORE_PRIV_H */
+7 -3
drivers/infiniband/core/cq.c
··· 112 112 IB_POLL_BATCH); 113 113 if (completed >= IB_POLL_BUDGET_WORKQUEUE || 114 114 ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0) 115 - queue_work(ib_comp_wq, &cq->work); 115 + queue_work(cq->comp_wq, &cq->work); 116 116 } 117 117 118 118 static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private) 119 119 { 120 - queue_work(ib_comp_wq, &cq->work); 120 + queue_work(cq->comp_wq, &cq->work); 121 121 } 122 122 123 123 /** ··· 161 161 goto out_destroy_cq; 162 162 163 163 cq->res.type = RDMA_RESTRACK_CQ; 164 - cq->res.kern_name = caller; 164 + rdma_restrack_set_task(&cq->res, caller); 165 165 rdma_restrack_add(&cq->res); 166 166 167 167 switch (cq->poll_ctx) { ··· 175 175 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 176 176 break; 177 177 case IB_POLL_WORKQUEUE: 178 + case IB_POLL_UNBOUND_WORKQUEUE: 178 179 cq->comp_handler = ib_cq_completion_workqueue; 179 180 INIT_WORK(&cq->work, ib_cq_poll_work); 180 181 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 182 + cq->comp_wq = (cq->poll_ctx == IB_POLL_WORKQUEUE) ? 183 + ib_comp_wq : ib_comp_unbound_wq; 181 184 break; 182 185 default: 183 186 ret = -EINVAL; ··· 216 213 irq_poll_disable(&cq->iop); 217 214 break; 218 215 case IB_POLL_WORKQUEUE: 216 + case IB_POLL_UNBOUND_WORKQUEUE: 219 217 cancel_work_sync(&cq->work); 220 218 break; 221 219 default:
+177 -103
drivers/infiniband/core/device.c
··· 61 61 }; 62 62 63 63 struct workqueue_struct *ib_comp_wq; 64 + struct workqueue_struct *ib_comp_unbound_wq; 64 65 struct workqueue_struct *ib_wq; 65 66 EXPORT_SYMBOL_GPL(ib_wq); 66 67 ··· 123 122 124 123 for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) { 125 124 if (!*(void **) ((void *) device + mandatory_table[i].offset)) { 126 - pr_warn("Device %s is missing mandatory function %s\n", 127 - device->name, mandatory_table[i].name); 125 + dev_warn(&device->dev, 126 + "Device is missing mandatory function %s\n", 127 + mandatory_table[i].name); 128 128 return -EINVAL; 129 129 } 130 130 } ··· 165 163 struct ib_device *device; 166 164 167 165 list_for_each_entry(device, &device_list, core_list) 168 - if (!strncmp(name, device->name, IB_DEVICE_NAME_MAX)) 166 + if (!strcmp(name, dev_name(&device->dev))) 169 167 return device; 170 168 171 169 return NULL; 172 170 } 173 171 174 - static int alloc_name(char *name) 172 + int ib_device_rename(struct ib_device *ibdev, const char *name) 173 + { 174 + struct ib_device *device; 175 + int ret = 0; 176 + 177 + if (!strcmp(name, dev_name(&ibdev->dev))) 178 + return ret; 179 + 180 + mutex_lock(&device_mutex); 181 + list_for_each_entry(device, &device_list, core_list) { 182 + if (!strcmp(name, dev_name(&device->dev))) { 183 + ret = -EEXIST; 184 + goto out; 185 + } 186 + } 187 + 188 + ret = device_rename(&ibdev->dev, name); 189 + if (ret) 190 + goto out; 191 + strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX); 192 + out: 193 + mutex_unlock(&device_mutex); 194 + return ret; 195 + } 196 + 197 + static int alloc_name(struct ib_device *ibdev, const char *name) 175 198 { 176 199 unsigned long *inuse; 177 - char buf[IB_DEVICE_NAME_MAX]; 178 200 struct ib_device *device; 179 201 int i; 180 202 ··· 207 181 return -ENOMEM; 208 182 209 183 list_for_each_entry(device, &device_list, core_list) { 210 - if (!sscanf(device->name, name, &i)) 184 + char buf[IB_DEVICE_NAME_MAX]; 185 + 186 + if (sscanf(dev_name(&device->dev), name, &i) != 1) 211 187 continue; 212 188 if (i < 0 || i >= PAGE_SIZE * 8) 213 189 continue; 214 190 snprintf(buf, sizeof buf, name, i); 215 - if (!strncmp(buf, device->name, IB_DEVICE_NAME_MAX)) 191 + if (!strcmp(buf, dev_name(&device->dev))) 216 192 set_bit(i, inuse); 217 193 } 218 194 219 195 i = find_first_zero_bit(inuse, PAGE_SIZE * 8); 220 196 free_page((unsigned long) inuse); 221 - snprintf(buf, sizeof buf, name, i); 222 197 223 - if (__ib_device_get_by_name(buf)) 224 - return -ENFILE; 225 - 226 - strlcpy(name, buf, IB_DEVICE_NAME_MAX); 227 - return 0; 198 + return dev_set_name(&ibdev->dev, name, i); 228 199 } 229 200 230 201 static void ib_device_release(struct device *device) ··· 244 221 static int ib_device_uevent(struct device *device, 245 222 struct kobj_uevent_env *env) 246 223 { 247 - struct ib_device *dev = container_of(device, struct ib_device, dev); 248 - 249 - if (add_uevent_var(env, "NAME=%s", dev->name)) 224 + if (add_uevent_var(env, "NAME=%s", dev_name(device))) 250 225 return -ENOMEM; 251 226 252 227 /* ··· 290 269 291 270 INIT_LIST_HEAD(&device->event_handler_list); 292 271 spin_lock_init(&device->event_handler_lock); 293 - spin_lock_init(&device->client_data_lock); 272 + rwlock_init(&device->client_data_lock); 294 273 INIT_LIST_HEAD(&device->client_data_list); 295 274 INIT_LIST_HEAD(&device->port_list); 296 275 ··· 306 285 */ 307 286 void ib_dealloc_device(struct ib_device *device) 308 287 { 288 + WARN_ON(!list_empty(&device->client_data_list)); 309 289 WARN_ON(device->reg_state != IB_DEV_UNREGISTERED && 310 290 device->reg_state != IB_DEV_UNINITIALIZED); 311 291 rdma_restrack_clean(&device->res); ··· 317 295 static int add_client_context(struct ib_device *device, struct ib_client *client) 318 296 { 319 297 struct ib_client_data *context; 320 - unsigned long flags; 321 298 322 - context = kmalloc(sizeof *context, GFP_KERNEL); 299 + context = kmalloc(sizeof(*context), GFP_KERNEL); 323 300 if (!context) 324 301 return -ENOMEM; 325 302 ··· 327 306 context->going_down = false; 328 307 329 308 down_write(&lists_rwsem); 330 - spin_lock_irqsave(&device->client_data_lock, flags); 309 + write_lock_irq(&device->client_data_lock); 331 310 list_add(&context->list, &device->client_data_list); 332 - spin_unlock_irqrestore(&device->client_data_lock, flags); 311 + write_unlock_irq(&device->client_data_lock); 333 312 up_write(&lists_rwsem); 334 313 335 314 return 0; ··· 465 444 } 466 445 } 467 446 468 - /** 469 - * ib_register_device - Register an IB device with IB core 470 - * @device:Device to register 471 - * 472 - * Low-level drivers use ib_register_device() to register their 473 - * devices with the IB core. All registered clients will receive a 474 - * callback for each device that is added. @device must be allocated 475 - * with ib_alloc_device(). 476 - */ 477 - int ib_register_device(struct ib_device *device, 478 - int (*port_callback)(struct ib_device *, 479 - u8, struct kobject *)) 447 + static void setup_dma_device(struct ib_device *device) 480 448 { 481 - int ret; 482 - struct ib_client *client; 483 - struct ib_udata uhw = {.outlen = 0, .inlen = 0}; 484 449 struct device *parent = device->dev.parent; 485 450 486 451 WARN_ON_ONCE(device->dma_device); ··· 498 491 WARN_ON_ONCE(!parent); 499 492 device->dma_device = parent; 500 493 } 494 + } 501 495 502 - mutex_lock(&device_mutex); 496 + static void cleanup_device(struct ib_device *device) 497 + { 498 + ib_cache_cleanup_one(device); 499 + ib_cache_release_one(device); 500 + kfree(device->port_pkey_list); 501 + kfree(device->port_immutable); 502 + } 503 503 504 - if (strchr(device->name, '%')) { 505 - ret = alloc_name(device->name); 506 - if (ret) 507 - goto out; 508 - } 504 + static int setup_device(struct ib_device *device) 505 + { 506 + struct ib_udata uhw = {.outlen = 0, .inlen = 0}; 507 + int ret; 509 508 510 - if (ib_device_check_mandatory(device)) { 511 - ret = -EINVAL; 512 - goto out; 513 - } 509 + ret = ib_device_check_mandatory(device); 510 + if (ret) 511 + return ret; 514 512 515 513 ret = read_port_immutable(device); 516 514 if (ret) { 517 - pr_warn("Couldn't create per port immutable data %s\n", 518 - device->name); 519 - goto out; 520 - } 521 - 522 - ret = setup_port_pkey_list(device); 523 - if (ret) { 524 - pr_warn("Couldn't create per port_pkey_list\n"); 525 - goto out; 526 - } 527 - 528 - ret = ib_cache_setup_one(device); 529 - if (ret) { 530 - pr_warn("Couldn't set up InfiniBand P_Key/GID cache\n"); 531 - goto port_cleanup; 532 - } 533 - 534 - ret = ib_device_register_rdmacg(device); 535 - if (ret) { 536 - pr_warn("Couldn't register device with rdma cgroup\n"); 537 - goto cache_cleanup; 515 + dev_warn(&device->dev, 516 + "Couldn't create per port immutable data\n"); 517 + return ret; 538 518 } 539 519 540 520 memset(&device->attrs, 0, sizeof(device->attrs)); 541 521 ret = device->query_device(device, &device->attrs, &uhw); 542 522 if (ret) { 543 - pr_warn("Couldn't query the device attributes\n"); 544 - goto cg_cleanup; 523 + dev_warn(&device->dev, 524 + "Couldn't query the device attributes\n"); 525 + goto port_cleanup; 526 + } 527 + 528 + ret = setup_port_pkey_list(device); 529 + if (ret) { 530 + dev_warn(&device->dev, "Couldn't create per port_pkey_list\n"); 531 + goto port_cleanup; 532 + } 533 + 534 + ret = ib_cache_setup_one(device); 535 + if (ret) { 536 + dev_warn(&device->dev, 537 + "Couldn't set up InfiniBand P_Key/GID cache\n"); 538 + goto pkey_cleanup; 539 + } 540 + return 0; 541 + 542 + pkey_cleanup: 543 + kfree(device->port_pkey_list); 544 + port_cleanup: 545 + kfree(device->port_immutable); 546 + return ret; 547 + } 548 + 549 + /** 550 + * ib_register_device - Register an IB device with IB core 551 + * @device:Device to register 552 + * 553 + * Low-level drivers use ib_register_device() to register their 554 + * devices with the IB core. All registered clients will receive a 555 + * callback for each device that is added. @device must be allocated 556 + * with ib_alloc_device(). 557 + */ 558 + int ib_register_device(struct ib_device *device, const char *name, 559 + int (*port_callback)(struct ib_device *, u8, 560 + struct kobject *)) 561 + { 562 + int ret; 563 + struct ib_client *client; 564 + 565 + setup_dma_device(device); 566 + 567 + mutex_lock(&device_mutex); 568 + 569 + if (strchr(name, '%')) { 570 + ret = alloc_name(device, name); 571 + if (ret) 572 + goto out; 573 + } else { 574 + ret = dev_set_name(&device->dev, name); 575 + if (ret) 576 + goto out; 577 + } 578 + if (__ib_device_get_by_name(dev_name(&device->dev))) { 579 + ret = -ENFILE; 580 + goto out; 581 + } 582 + strlcpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX); 583 + 584 + ret = setup_device(device); 585 + if (ret) 586 + goto out; 587 + 588 + device->index = __dev_new_index(); 589 + 590 + ret = ib_device_register_rdmacg(device); 591 + if (ret) { 592 + dev_warn(&device->dev, 593 + "Couldn't register device with rdma cgroup\n"); 594 + goto dev_cleanup; 545 595 } 546 596 547 597 ret = ib_device_register_sysfs(device, port_callback); 548 598 if (ret) { 549 - pr_warn("Couldn't register device %s with driver model\n", 550 - device->name); 599 + dev_warn(&device->dev, 600 + "Couldn't register device with driver model\n"); 551 601 goto cg_cleanup; 552 602 } 553 603 ··· 614 550 if (!add_client_context(device, client) && client->add) 615 551 client->add(device); 616 552 617 - device->index = __dev_new_index(); 618 553 down_write(&lists_rwsem); 619 554 list_add_tail(&device->core_list, &device_list); 620 555 up_write(&lists_rwsem); ··· 622 559 623 560 cg_cleanup: 624 561 ib_device_unregister_rdmacg(device); 625 - cache_cleanup: 626 - ib_cache_cleanup_one(device); 627 - ib_cache_release_one(device); 628 - port_cleanup: 629 - kfree(device->port_immutable); 562 + dev_cleanup: 563 + cleanup_device(device); 630 564 out: 631 565 mutex_unlock(&device_mutex); 632 566 return ret; ··· 645 585 646 586 down_write(&lists_rwsem); 647 587 list_del(&device->core_list); 648 - spin_lock_irqsave(&device->client_data_lock, flags); 649 - list_for_each_entry_safe(context, tmp, &device->client_data_list, list) 588 + write_lock_irq(&device->client_data_lock); 589 + list_for_each_entry(context, &device->client_data_list, list) 650 590 context->going_down = true; 651 - spin_unlock_irqrestore(&device->client_data_lock, flags); 591 + write_unlock_irq(&device->client_data_lock); 652 592 downgrade_write(&lists_rwsem); 653 593 654 - list_for_each_entry_safe(context, tmp, &device->client_data_list, 655 - list) { 594 + list_for_each_entry(context, &device->client_data_list, list) { 656 595 if (context->client->remove) 657 596 context->client->remove(device, context->data); 658 597 } 659 598 up_read(&lists_rwsem); 660 599 661 - ib_device_unregister_rdmacg(device); 662 600 ib_device_unregister_sysfs(device); 601 + ib_device_unregister_rdmacg(device); 663 602 664 603 mutex_unlock(&device_mutex); 665 604 ··· 668 609 kfree(device->port_pkey_list); 669 610 670 611 down_write(&lists_rwsem); 671 - spin_lock_irqsave(&device->client_data_lock, flags); 672 - list_for_each_entry_safe(context, tmp, &device->client_data_list, list) 612 + write_lock_irqsave(&device->client_data_lock, flags); 613 + list_for_each_entry_safe(context, tmp, &device->client_data_list, 614 + list) { 615 + list_del(&context->list); 673 616 kfree(context); 674 - spin_unlock_irqrestore(&device->client_data_lock, flags); 617 + } 618 + write_unlock_irqrestore(&device->client_data_lock, flags); 675 619 up_write(&lists_rwsem); 676 620 677 621 device->reg_state = IB_DEV_UNREGISTERED; ··· 724 662 */ 725 663 void ib_unregister_client(struct ib_client *client) 726 664 { 727 - struct ib_client_data *context, *tmp; 665 + struct ib_client_data *context; 728 666 struct ib_device *device; 729 - unsigned long flags; 730 667 731 668 mutex_lock(&device_mutex); 732 669 ··· 737 676 struct ib_client_data *found_context = NULL; 738 677 739 678 down_write(&lists_rwsem); 740 - spin_lock_irqsave(&device->client_data_lock, flags); 741 - list_for_each_entry_safe(context, tmp, &device->client_data_list, list) 679 + write_lock_irq(&device->client_data_lock); 680 + list_for_each_entry(context, &device->client_data_list, list) 742 681 if (context->client == client) { 743 682 context->going_down = true; 744 683 found_context = context; 745 684 break; 746 685 } 747 - spin_unlock_irqrestore(&device->client_data_lock, flags); 686 + write_unlock_irq(&device->client_data_lock); 748 687 up_write(&lists_rwsem); 749 688 750 689 if (client->remove) ··· 752 691 found_context->data : NULL); 753 692 754 693 if (!found_context) { 755 - pr_warn("No client context found for %s/%s\n", 756 - device->name, client->name); 694 + dev_warn(&device->dev, 695 + "No client context found for %s\n", 696 + client->name); 757 697 continue; 758 698 } 759 699 760 700 down_write(&lists_rwsem); 761 - spin_lock_irqsave(&device->client_data_lock, flags); 701 + write_lock_irq(&device->client_data_lock); 762 702 list_del(&found_context->list); 763 - kfree(found_context); 764 - spin_unlock_irqrestore(&device->client_data_lock, flags); 703 + write_unlock_irq(&device->client_data_lock); 765 704 up_write(&lists_rwsem); 705 + kfree(found_context); 766 706 } 767 707 768 708 mutex_unlock(&device_mutex); ··· 784 722 void *ret = NULL; 785 723 unsigned long flags; 786 724 787 - spin_lock_irqsave(&device->client_data_lock, flags); 725 + read_lock_irqsave(&device->client_data_lock, flags); 788 726 list_for_each_entry(context, &device->client_data_list, list) 789 727 if (context->client == client) { 790 728 ret = context->data; 791 729 break; 792 730 } 793 - spin_unlock_irqrestore(&device->client_data_lock, flags); 731 + read_unlock_irqrestore(&device->client_data_lock, flags); 794 732 795 733 return ret; 796 734 } ··· 811 749 struct ib_client_data *context; 812 750 unsigned long flags; 813 751 814 - spin_lock_irqsave(&device->client_data_lock, flags); 752 + write_lock_irqsave(&device->client_data_lock, flags); 815 753 list_for_each_entry(context, &device->client_data_list, list) 816 754 if (context->client == client) { 817 755 context->data = data; 818 756 goto out; 819 757 } 820 758 821 - pr_warn("No client context found for %s/%s\n", 822 - device->name, client->name); 759 + dev_warn(&device->dev, "No client context found for %s\n", 760 + client->name); 823 761 824 762 out: 825 - spin_unlock_irqrestore(&device->client_data_lock, flags); 763 + write_unlock_irqrestore(&device->client_data_lock, flags); 826 764 } 827 765 EXPORT_SYMBOL(ib_set_client_data); 828 766 ··· 1228 1166 goto err; 1229 1167 } 1230 1168 1169 + ib_comp_unbound_wq = 1170 + alloc_workqueue("ib-comp-unb-wq", 1171 + WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM | 1172 + WQ_SYSFS, WQ_UNBOUND_MAX_ACTIVE); 1173 + if (!ib_comp_unbound_wq) { 1174 + ret = -ENOMEM; 1175 + goto err_comp; 1176 + } 1177 + 1231 1178 ret = class_register(&ib_class); 1232 1179 if (ret) { 1233 1180 pr_warn("Couldn't create InfiniBand device class\n"); 1234 - goto err_comp; 1181 + goto err_comp_unbound; 1235 1182 } 1236 1183 1237 1184 ret = rdma_nl_init(); ··· 1289 1218 rdma_nl_exit(); 1290 1219 err_sysfs: 1291 1220 class_unregister(&ib_class); 1221 + err_comp_unbound: 1222 + destroy_workqueue(ib_comp_unbound_wq); 1292 1223 err_comp: 1293 1224 destroy_workqueue(ib_comp_wq); 1294 1225 err: ··· 1309 1236 addr_cleanup(); 1310 1237 rdma_nl_exit(); 1311 1238 class_unregister(&ib_class); 1239 + destroy_workqueue(ib_comp_unbound_wq); 1312 1240 destroy_workqueue(ib_comp_wq); 1313 1241 /* Make sure that any pending umem accounting work is done. */ 1314 1242 destroy_workqueue(ib_wq);
+3 -2
drivers/infiniband/core/fmr_pool.c
··· 213 213 device = pd->device; 214 214 if (!device->alloc_fmr || !device->dealloc_fmr || 215 215 !device->map_phys_fmr || !device->unmap_fmr) { 216 - pr_info(PFX "Device %s does not support FMRs\n", device->name); 216 + dev_info(&device->dev, "Device does not support FMRs\n"); 217 217 return ERR_PTR(-ENOSYS); 218 218 } 219 219 ··· 257 257 atomic_set(&pool->flush_ser, 0); 258 258 init_waitqueue_head(&pool->force_wait); 259 259 260 - pool->worker = kthread_create_worker(0, "ib_fmr(%s)", device->name); 260 + pool->worker = 261 + kthread_create_worker(0, "ib_fmr(%s)", dev_name(&device->dev)); 261 262 if (IS_ERR(pool->worker)) { 262 263 pr_warn(PFX "couldn't start cleanup kthread worker\n"); 263 264 ret = PTR_ERR(pool->worker);
+1 -1
drivers/infiniband/core/iwcm.c
··· 509 509 cm_id->m_local_addr = cm_id->local_addr; 510 510 cm_id->m_remote_addr = cm_id->remote_addr; 511 511 512 - memcpy(pm_reg_msg.dev_name, cm_id->device->name, 512 + memcpy(pm_reg_msg.dev_name, dev_name(&cm_id->device->dev), 513 513 sizeof(pm_reg_msg.dev_name)); 514 514 memcpy(pm_reg_msg.if_name, cm_id->device->iwcm->ifname, 515 515 sizeof(pm_reg_msg.if_name));
+43 -37
drivers/infiniband/core/mad.c
··· 220 220 int ret2, qpn; 221 221 u8 mgmt_class, vclass; 222 222 223 + if ((qp_type == IB_QPT_SMI && !rdma_cap_ib_smi(device, port_num)) || 224 + (qp_type == IB_QPT_GSI && !rdma_cap_ib_cm(device, port_num))) 225 + return ERR_PTR(-EPROTONOSUPPORT); 226 + 223 227 /* Validate parameters */ 224 228 qpn = get_spl_qp_index(qp_type); 225 229 if (qpn == -1) { 226 - dev_notice(&device->dev, 227 - "ib_register_mad_agent: invalid QP Type %d\n", 228 - qp_type); 230 + dev_dbg_ratelimited(&device->dev, "%s: invalid QP Type %d\n", 231 + __func__, qp_type); 229 232 goto error1; 230 233 } 231 234 232 235 if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) { 233 - dev_notice(&device->dev, 234 - "ib_register_mad_agent: invalid RMPP Version %u\n", 235 - rmpp_version); 236 + dev_dbg_ratelimited(&device->dev, 237 + "%s: invalid RMPP Version %u\n", 238 + __func__, rmpp_version); 236 239 goto error1; 237 240 } 238 241 239 242 /* Validate MAD registration request if supplied */ 240 243 if (mad_reg_req) { 241 244 if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) { 242 - dev_notice(&device->dev, 243 - "ib_register_mad_agent: invalid Class Version %u\n", 244 - mad_reg_req->mgmt_class_version); 245 + dev_dbg_ratelimited(&device->dev, 246 + "%s: invalid Class Version %u\n", 247 + __func__, 248 + mad_reg_req->mgmt_class_version); 245 249 goto error1; 246 250 } 247 251 if (!recv_handler) { 248 - dev_notice(&device->dev, 249 - "ib_register_mad_agent: no recv_handler\n"); 252 + dev_dbg_ratelimited(&device->dev, 253 + "%s: no recv_handler\n", __func__); 250 254 goto error1; 251 255 } 252 256 if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) { ··· 260 256 */ 261 257 if (mad_reg_req->mgmt_class != 262 258 IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { 263 - dev_notice(&device->dev, 264 - "ib_register_mad_agent: Invalid Mgmt Class 0x%x\n", 265 - mad_reg_req->mgmt_class); 259 + dev_dbg_ratelimited(&device->dev, 260 + "%s: Invalid Mgmt Class 0x%x\n", 261 + __func__, mad_reg_req->mgmt_class); 266 262 goto error1; 267 263 } 268 264 } else if (mad_reg_req->mgmt_class == 0) { ··· 270 266 * Class 0 is reserved in IBA and is used for 271 267 * aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE 272 268 */ 273 - dev_notice(&device->dev, 274 - "ib_register_mad_agent: Invalid Mgmt Class 0\n"); 269 + dev_dbg_ratelimited(&device->dev, 270 + "%s: Invalid Mgmt Class 0\n", 271 + __func__); 275 272 goto error1; 276 273 } else if (is_vendor_class(mad_reg_req->mgmt_class)) { 277 274 /* ··· 280 275 * ensure supplied OUI is not zero 281 276 */ 282 277 if (!is_vendor_oui(mad_reg_req->oui)) { 283 - dev_notice(&device->dev, 284 - "ib_register_mad_agent: No OUI specified for class 0x%x\n", 285 - mad_reg_req->mgmt_class); 278 + dev_dbg_ratelimited(&device->dev, 279 + "%s: No OUI specified for class 0x%x\n", 280 + __func__, 281 + mad_reg_req->mgmt_class); 286 282 goto error1; 287 283 } 288 284 } 289 285 /* Make sure class supplied is consistent with RMPP */ 290 286 if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) { 291 287 if (rmpp_version) { 292 - dev_notice(&device->dev, 293 - "ib_register_mad_agent: RMPP version for non-RMPP class 0x%x\n", 294 - mad_reg_req->mgmt_class); 288 + dev_dbg_ratelimited(&device->dev, 289 + "%s: RMPP version for non-RMPP class 0x%x\n", 290 + __func__, mad_reg_req->mgmt_class); 295 291 goto error1; 296 292 } 297 293 } ··· 303 297 IB_MGMT_CLASS_SUBN_LID_ROUTED) && 304 298 (mad_reg_req->mgmt_class != 305 299 IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { 306 - dev_notice(&device->dev, 307 - "ib_register_mad_agent: Invalid SM QP type: class 0x%x\n", 308 - mad_reg_req->mgmt_class); 300 + dev_dbg_ratelimited(&device->dev, 301 + "%s: Invalid SM QP type: class 0x%x\n", 302 + __func__, mad_reg_req->mgmt_class); 309 303 goto error1; 310 304 } 311 305 } else { ··· 313 307 IB_MGMT_CLASS_SUBN_LID_ROUTED) || 314 308 (mad_reg_req->mgmt_class == 315 309 IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { 316 - dev_notice(&device->dev, 317 - "ib_register_mad_agent: Invalid GS QP type: class 0x%x\n", 318 - mad_reg_req->mgmt_class); 310 + dev_dbg_ratelimited(&device->dev, 311 + "%s: Invalid GS QP type: class 0x%x\n", 312 + __func__, mad_reg_req->mgmt_class); 319 313 goto error1; 320 314 } 321 315 } ··· 330 324 /* Validate device and port */ 331 325 port_priv = ib_get_mad_port(device, port_num); 332 326 if (!port_priv) { 333 - dev_notice(&device->dev, 334 - "ib_register_mad_agent: Invalid port %d\n", 335 - port_num); 327 + dev_dbg_ratelimited(&device->dev, "%s: Invalid port %d\n", 328 + __func__, port_num); 336 329 ret = ERR_PTR(-ENODEV); 337 330 goto error1; 338 331 } 339 332 340 - /* Verify the QP requested is supported. For example, Ethernet devices 341 - * will not have QP0 */ 333 + /* Verify the QP requested is supported. For example, Ethernet devices 334 + * will not have QP0. 335 + */ 342 336 if (!port_priv->qp_info[qpn].qp) { 343 - dev_notice(&device->dev, 344 - "ib_register_mad_agent: QP %d not supported\n", qpn); 337 + dev_dbg_ratelimited(&device->dev, "%s: QP %d not supported\n", 338 + __func__, qpn); 345 339 ret = ERR_PTR(-EPROTONOSUPPORT); 346 340 goto error1; 347 341 } ··· 2414 2408 } 2415 2409 2416 2410 void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr, 2417 - int timeout_ms) 2411 + unsigned long timeout_ms) 2418 2412 { 2419 2413 mad_send_wr->timeout = msecs_to_jiffies(timeout_ms); 2420 2414 wait_for_response(mad_send_wr); ··· 3189 3183 cq_size *= 2; 3190 3184 3191 3185 port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0, 3192 - IB_POLL_WORKQUEUE); 3186 + IB_POLL_UNBOUND_WORKQUEUE); 3193 3187 if (IS_ERR(port_priv->cq)) { 3194 3188 dev_err(&device->dev, "Couldn't create ib_mad CQ\n"); 3195 3189 ret = PTR_ERR(port_priv->cq);
+1 -1
drivers/infiniband/core/mad_priv.h
··· 221 221 void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr); 222 222 223 223 void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr, 224 - int timeout_ms); 224 + unsigned long timeout_ms); 225 225 226 226 #endif /* __IB_MAD_PRIV_H__ */
+2 -2
drivers/infiniband/core/netlink.c
··· 47 47 const struct rdma_nl_cbs *cb_table; 48 48 } rdma_nl_types[RDMA_NL_NUM_CLIENTS]; 49 49 50 - int rdma_nl_chk_listeners(unsigned int group) 50 + bool rdma_nl_chk_listeners(unsigned int group) 51 51 { 52 - return (netlink_has_listeners(nls, group)) ? 0 : -1; 52 + return netlink_has_listeners(nls, group); 53 53 } 54 54 EXPORT_SYMBOL(rdma_nl_chk_listeners); 55 55
+36 -1
drivers/infiniband/core/nldev.c
··· 179 179 { 180 180 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index)) 181 181 return -EMSGSIZE; 182 - if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, device->name)) 182 + if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, 183 + dev_name(&device->dev))) 183 184 return -EMSGSIZE; 184 185 185 186 return 0; ··· 646 645 return err; 647 646 } 648 647 648 + static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 649 + struct netlink_ext_ack *extack) 650 + { 651 + struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 652 + struct ib_device *device; 653 + u32 index; 654 + int err; 655 + 656 + err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, 657 + extack); 658 + if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) 659 + return -EINVAL; 660 + 661 + index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 662 + device = ib_device_get_by_index(index); 663 + if (!device) 664 + return -EINVAL; 665 + 666 + if (tb[RDMA_NLDEV_ATTR_DEV_NAME]) { 667 + char name[IB_DEVICE_NAME_MAX] = {}; 668 + 669 + nla_strlcpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME], 670 + IB_DEVICE_NAME_MAX); 671 + err = ib_device_rename(device, name); 672 + } 673 + 674 + put_device(&device->dev); 675 + return err; 676 + } 677 + 649 678 static int _nldev_get_dumpit(struct ib_device *device, 650 679 struct sk_buff *skb, 651 680 struct netlink_callback *cb, ··· 1107 1076 [RDMA_NLDEV_CMD_GET] = { 1108 1077 .doit = nldev_get_doit, 1109 1078 .dump = nldev_get_dumpit, 1079 + }, 1080 + [RDMA_NLDEV_CMD_SET] = { 1081 + .doit = nldev_set_doit, 1082 + .flags = RDMA_NL_ADMIN_PERM, 1110 1083 }, 1111 1084 [RDMA_NLDEV_CMD_PORT_GET] = { 1112 1085 .doit = nldev_port_get_doit,
+13 -43
drivers/infiniband/core/rdma_core.c
··· 794 794 uverbs_uobject_put(uobj); 795 795 } 796 796 797 - static void ufile_disassociate_ucontext(struct ib_ucontext *ibcontext) 798 - { 799 - struct ib_device *ib_dev = ibcontext->device; 800 - struct task_struct *owning_process = NULL; 801 - struct mm_struct *owning_mm = NULL; 802 - 803 - owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID); 804 - if (!owning_process) 805 - return; 806 - 807 - owning_mm = get_task_mm(owning_process); 808 - if (!owning_mm) { 809 - pr_info("no mm, disassociate ucontext is pending task termination\n"); 810 - while (1) { 811 - put_task_struct(owning_process); 812 - usleep_range(1000, 2000); 813 - owning_process = get_pid_task(ibcontext->tgid, 814 - PIDTYPE_PID); 815 - if (!owning_process || 816 - owning_process->state == TASK_DEAD) { 817 - pr_info("disassociate ucontext done, task was terminated\n"); 818 - /* in case task was dead need to release the 819 - * task struct. 820 - */ 821 - if (owning_process) 822 - put_task_struct(owning_process); 823 - return; 824 - } 825 - } 826 - } 827 - 828 - down_write(&owning_mm->mmap_sem); 829 - ib_dev->disassociate_ucontext(ibcontext); 830 - up_write(&owning_mm->mmap_sem); 831 - mmput(owning_mm); 832 - put_task_struct(owning_process); 833 - } 834 - 835 797 /* 836 798 * Drop the ucontext off the ufile and completely disconnect it from the 837 799 * ib_device ··· 802 840 enum rdma_remove_reason reason) 803 841 { 804 842 struct ib_ucontext *ucontext = ufile->ucontext; 843 + struct ib_device *ib_dev = ucontext->device; 805 844 int ret; 806 845 807 - if (reason == RDMA_REMOVE_DRIVER_REMOVE) 808 - ufile_disassociate_ucontext(ucontext); 846 + /* 847 + * If we are closing the FD then the user mmap VMAs must have 848 + * already been destroyed as they hold on to the filep, otherwise 849 + * they need to be zap'd. 850 + */ 851 + if (reason == RDMA_REMOVE_DRIVER_REMOVE) { 852 + uverbs_user_mmap_disassociate(ufile); 853 + if (ib_dev->disassociate_ucontext) 854 + ib_dev->disassociate_ucontext(ucontext); 855 + } 809 856 810 - put_pid(ucontext->tgid); 811 - ib_rdmacg_uncharge(&ucontext->cg_obj, ucontext->device, 857 + ib_rdmacg_uncharge(&ucontext->cg_obj, ib_dev, 812 858 RDMACG_RESOURCE_HCA_HANDLE); 813 859 814 860 /* 815 861 * FIXME: Drivers are not permitted to fail dealloc_ucontext, remove 816 862 * the error return. 817 863 */ 818 - ret = ucontext->device->dealloc_ucontext(ucontext); 864 + ret = ib_dev->dealloc_ucontext(ucontext); 819 865 WARN_ON(ret); 820 866 821 867 ufile->ucontext = NULL;
+1
drivers/infiniband/core/rdma_core.h
··· 160 160 void uverbs_destroy_api(struct uverbs_api *uapi); 161 161 void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm, 162 162 unsigned int num_attrs); 163 + void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile); 163 164 164 165 #endif /* RDMA_CORE_H */
+24 -6
drivers/infiniband/core/restrack.c
··· 50 50 51 51 dev = container_of(res, struct ib_device, res); 52 52 pr_err("restrack: %s", CUT_HERE); 53 - pr_err("restrack: BUG: RESTRACK detected leak of resources on %s\n", 54 - dev->name); 53 + dev_err(&dev->dev, "BUG: RESTRACK detected leak of resources\n"); 55 54 hash_for_each(res->hash, bkt, e, node) { 56 55 if (rdma_is_kernel_res(e)) { 57 56 owner = e->kern_name; ··· 155 156 } 156 157 } 157 158 159 + void rdma_restrack_set_task(struct rdma_restrack_entry *res, 160 + const char *caller) 161 + { 162 + if (caller) { 163 + res->kern_name = caller; 164 + return; 165 + } 166 + 167 + if (res->task) 168 + put_task_struct(res->task); 169 + get_task_struct(current); 170 + res->task = current; 171 + } 172 + EXPORT_SYMBOL(rdma_restrack_set_task); 173 + 158 174 void rdma_restrack_add(struct rdma_restrack_entry *res) 159 175 { 160 176 struct ib_device *dev = res_to_dev(res); ··· 182 168 183 169 if (res_is_user(res)) { 184 170 if (!res->task) 185 - rdma_restrack_set_task(res, current); 171 + rdma_restrack_set_task(res, NULL); 186 172 res->kern_name = NULL; 187 173 } else { 188 174 set_kern_name(res); ··· 223 209 struct ib_device *dev; 224 210 225 211 if (!res->valid) 226 - return; 212 + goto out; 227 213 228 214 dev = res_to_dev(res); 229 215 if (!dev) ··· 236 222 down_write(&dev->res.rwsem); 237 223 hash_del(&res->node); 238 224 res->valid = false; 239 - if (res->task) 240 - put_task_struct(res->task); 241 225 up_write(&dev->res.rwsem); 226 + 227 + out: 228 + if (res->task) { 229 + put_task_struct(res->task); 230 + res->task = NULL; 231 + } 242 232 } 243 233 EXPORT_SYMBOL(rdma_restrack_del);
+3 -5
drivers/infiniband/core/sa.h
··· 49 49 } 50 50 51 51 int ib_sa_mcmember_rec_query(struct ib_sa_client *client, 52 - struct ib_device *device, u8 port_num, 53 - u8 method, 52 + struct ib_device *device, u8 port_num, u8 method, 54 53 struct ib_sa_mcmember_rec *rec, 55 54 ib_sa_comp_mask comp_mask, 56 - int timeout_ms, gfp_t gfp_mask, 55 + unsigned long timeout_ms, gfp_t gfp_mask, 57 56 void (*callback)(int status, 58 57 struct ib_sa_mcmember_rec *resp, 59 58 void *context), 60 - void *context, 61 - struct ib_sa_query **sa_query); 59 + void *context, struct ib_sa_query **sa_query); 62 60 63 61 int mcast_init(void); 64 62 void mcast_cleanup(void);
+11 -59
drivers/infiniband/core/sa_query.c
··· 761 761 762 762 /* Construct the family header first */ 763 763 header = skb_put(skb, NLMSG_ALIGN(sizeof(*header))); 764 - memcpy(header->device_name, query->port->agent->device->name, 764 + memcpy(header->device_name, dev_name(&query->port->agent->device->dev), 765 765 LS_DEVICE_NAME_MAX); 766 766 header->port_num = query->port->port_num; 767 767 ··· 835 835 struct sk_buff *skb = NULL; 836 836 struct nlmsghdr *nlh; 837 837 void *data; 838 - int ret = 0; 839 838 struct ib_sa_mad *mad; 840 839 int len; 841 840 ··· 861 862 /* Repair the nlmsg header length */ 862 863 nlmsg_end(skb, nlh); 863 864 864 - ret = rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, gfp_mask); 865 - if (!ret) 866 - ret = len; 867 - else 868 - ret = 0; 869 - 870 - return ret; 865 + return rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, gfp_mask); 871 866 } 872 867 873 868 static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask) ··· 884 891 spin_unlock_irqrestore(&ib_nl_request_lock, flags); 885 892 886 893 ret = ib_nl_send_msg(query, gfp_mask); 887 - if (ret <= 0) { 894 + if (ret) { 888 895 ret = -EIO; 889 896 /* Remove the request */ 890 897 spin_lock_irqsave(&ib_nl_request_lock, flags); 891 898 list_del(&query->list); 892 899 spin_unlock_irqrestore(&ib_nl_request_lock, flags); 893 - } else { 894 - ret = 0; 895 900 } 896 901 897 902 return ret; ··· 1218 1227 return src_path_mask; 1219 1228 } 1220 1229 1221 - static int roce_resolve_route_from_path(struct sa_path_rec *rec, 1222 - const struct ib_gid_attr *attr) 1223 - { 1224 - struct rdma_dev_addr dev_addr = {}; 1225 - union { 1226 - struct sockaddr _sockaddr; 1227 - struct sockaddr_in _sockaddr_in; 1228 - struct sockaddr_in6 _sockaddr_in6; 1229 - } sgid_addr, dgid_addr; 1230 - int ret; 1231 - 1232 - if (rec->roce.route_resolved) 1233 - return 0; 1234 - if (!attr || !attr->ndev) 1235 - return -EINVAL; 1236 - 1237 - dev_addr.bound_dev_if = attr->ndev->ifindex; 1238 - /* TODO: Use net from the ib_gid_attr once it is added to it, 1239 - * until than, limit itself to init_net. 1240 - */ 1241 - dev_addr.net = &init_net; 1242 - 1243 - rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid); 1244 - rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid); 1245 - 1246 - /* validate the route */ 1247 - ret = rdma_resolve_ip_route(&sgid_addr._sockaddr, 1248 - &dgid_addr._sockaddr, &dev_addr); 1249 - if (ret) 1250 - return ret; 1251 - 1252 - if ((dev_addr.network == RDMA_NETWORK_IPV4 || 1253 - dev_addr.network == RDMA_NETWORK_IPV6) && 1254 - rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2) 1255 - return -EINVAL; 1256 - 1257 - rec->roce.route_resolved = true; 1258 - return 0; 1259 - } 1260 - 1261 1230 static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num, 1262 1231 struct sa_path_rec *rec, 1263 1232 struct rdma_ah_attr *ah_attr, ··· 1360 1409 spin_unlock_irqrestore(&tid_lock, flags); 1361 1410 } 1362 1411 1363 - static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask) 1412 + static int send_mad(struct ib_sa_query *query, unsigned long timeout_ms, 1413 + gfp_t gfp_mask) 1364 1414 { 1365 1415 bool preload = gfpflags_allow_blocking(gfp_mask); 1366 1416 unsigned long flags; ··· 1385 1433 1386 1434 if ((query->flags & IB_SA_ENABLE_LOCAL_SERVICE) && 1387 1435 (!(query->flags & IB_SA_QUERY_OPA))) { 1388 - if (!rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) { 1436 + if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) { 1389 1437 if (!ib_nl_make_request(query, gfp_mask)) 1390 1438 return id; 1391 1439 } ··· 1551 1599 struct ib_device *device, u8 port_num, 1552 1600 struct sa_path_rec *rec, 1553 1601 ib_sa_comp_mask comp_mask, 1554 - int timeout_ms, gfp_t gfp_mask, 1602 + unsigned long timeout_ms, gfp_t gfp_mask, 1555 1603 void (*callback)(int status, 1556 1604 struct sa_path_rec *resp, 1557 1605 void *context), ··· 1705 1753 struct ib_device *device, u8 port_num, u8 method, 1706 1754 struct ib_sa_service_rec *rec, 1707 1755 ib_sa_comp_mask comp_mask, 1708 - int timeout_ms, gfp_t gfp_mask, 1756 + unsigned long timeout_ms, gfp_t gfp_mask, 1709 1757 void (*callback)(int status, 1710 1758 struct ib_sa_service_rec *resp, 1711 1759 void *context), ··· 1802 1850 u8 method, 1803 1851 struct ib_sa_mcmember_rec *rec, 1804 1852 ib_sa_comp_mask comp_mask, 1805 - int timeout_ms, gfp_t gfp_mask, 1853 + unsigned long timeout_ms, gfp_t gfp_mask, 1806 1854 void (*callback)(int status, 1807 1855 struct ib_sa_mcmember_rec *resp, 1808 1856 void *context), ··· 1893 1941 struct ib_device *device, u8 port_num, 1894 1942 struct ib_sa_guidinfo_rec *rec, 1895 1943 ib_sa_comp_mask comp_mask, u8 method, 1896 - int timeout_ms, gfp_t gfp_mask, 1944 + unsigned long timeout_ms, gfp_t gfp_mask, 1897 1945 void (*callback)(int status, 1898 1946 struct ib_sa_guidinfo_rec *resp, 1899 1947 void *context), ··· 2060 2108 } 2061 2109 2062 2110 static int ib_sa_classport_info_rec_query(struct ib_sa_port *port, 2063 - int timeout_ms, 2111 + unsigned long timeout_ms, 2064 2112 void (*callback)(void *context), 2065 2113 void *context, 2066 2114 struct ib_sa_query **sa_query)
+3 -4
drivers/infiniband/core/security.c
··· 685 685 if (event != LSM_POLICY_CHANGE) 686 686 return NOTIFY_DONE; 687 687 688 - ag->smp_allowed = !security_ib_endport_manage_subnet(ag->security, 689 - ag->device->name, 690 - ag->port_num); 688 + ag->smp_allowed = !security_ib_endport_manage_subnet( 689 + ag->security, dev_name(&ag->device->dev), ag->port_num); 691 690 692 691 return NOTIFY_OK; 693 692 } ··· 707 708 return 0; 708 709 709 710 ret = security_ib_endport_manage_subnet(agent->security, 710 - agent->device->name, 711 + dev_name(&agent->device->dev), 711 712 agent->port_num); 712 713 if (ret) 713 714 return ret;
+46 -53
drivers/infiniband/core/sysfs.c
··· 512 512 ret = get_perf_mad(p->ibdev, p->port_num, tab_attr->attr_id, &data, 513 513 40 + offset / 8, sizeof(data)); 514 514 if (ret < 0) 515 - return sprintf(buf, "N/A (no PMA)\n"); 515 + return ret; 516 516 517 517 switch (width) { 518 518 case 4: ··· 1036 1036 p->port_num = port_num; 1037 1037 1038 1038 ret = kobject_init_and_add(&p->kobj, &port_type, 1039 - device->ports_parent, 1039 + device->ports_kobj, 1040 1040 "%d", port_num); 1041 1041 if (ret) { 1042 1042 kfree(p); ··· 1057 1057 goto err_put; 1058 1058 } 1059 1059 1060 - p->pma_table = get_counter_table(device, port_num); 1061 - ret = sysfs_create_group(&p->kobj, p->pma_table); 1062 - if (ret) 1063 - goto err_put_gid_attrs; 1060 + if (device->process_mad) { 1061 + p->pma_table = get_counter_table(device, port_num); 1062 + ret = sysfs_create_group(&p->kobj, p->pma_table); 1063 + if (ret) 1064 + goto err_put_gid_attrs; 1065 + } 1064 1066 1065 1067 p->gid_group.name = "gids"; 1066 1068 p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len); ··· 1120 1118 } 1121 1119 1122 1120 /* 1123 - * If port == 0, it means we have only one port and the parent 1124 - * device, not this port device, should be the holder of the 1125 - * hw_counters 1121 + * If port == 0, it means hw_counters are per device and not per 1122 + * port, so holder should be device. Therefore skip per port conunter 1123 + * initialization. 1126 1124 */ 1127 1125 if (device->alloc_hw_stats && port_num) 1128 1126 setup_hw_stats(device, p, port_num); ··· 1175 1173 p->gid_group.attrs = NULL; 1176 1174 1177 1175 err_remove_pma: 1178 - sysfs_remove_group(&p->kobj, p->pma_table); 1176 + if (p->pma_table) 1177 + sysfs_remove_group(&p->kobj, p->pma_table); 1179 1178 1180 1179 err_put_gid_attrs: 1181 1180 kobject_put(&p->gid_attr_group->kobj); ··· 1186 1183 return ret; 1187 1184 } 1188 1185 1189 - static ssize_t show_node_type(struct device *device, 1186 + static ssize_t node_type_show(struct device *device, 1190 1187 struct device_attribute *attr, char *buf) 1191 1188 { 1192 1189 struct ib_device *dev = container_of(device, struct ib_device, dev); ··· 1201 1198 default: return sprintf(buf, "%d: <unknown>\n", dev->node_type); 1202 1199 } 1203 1200 } 1201 + static DEVICE_ATTR_RO(node_type); 1204 1202 1205 - static ssize_t show_sys_image_guid(struct device *device, 1203 + static ssize_t sys_image_guid_show(struct device *device, 1206 1204 struct device_attribute *dev_attr, char *buf) 1207 1205 { 1208 1206 struct ib_device *dev = container_of(device, struct ib_device, dev); ··· 1214 1210 be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[2]), 1215 1211 be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[3])); 1216 1212 } 1213 + static DEVICE_ATTR_RO(sys_image_guid); 1217 1214 1218 - static ssize_t show_node_guid(struct device *device, 1215 + static ssize_t node_guid_show(struct device *device, 1219 1216 struct device_attribute *attr, char *buf) 1220 1217 { 1221 1218 struct ib_device *dev = container_of(device, struct ib_device, dev); ··· 1227 1222 be16_to_cpu(((__be16 *) &dev->node_guid)[2]), 1228 1223 be16_to_cpu(((__be16 *) &dev->node_guid)[3])); 1229 1224 } 1225 + static DEVICE_ATTR_RO(node_guid); 1230 1226 1231 - static ssize_t show_node_desc(struct device *device, 1227 + static ssize_t node_desc_show(struct device *device, 1232 1228 struct device_attribute *attr, char *buf) 1233 1229 { 1234 1230 struct ib_device *dev = container_of(device, struct ib_device, dev); ··· 1237 1231 return sprintf(buf, "%.64s\n", dev->node_desc); 1238 1232 } 1239 1233 1240 - static ssize_t set_node_desc(struct device *device, 1241 - struct device_attribute *attr, 1242 - const char *buf, size_t count) 1234 + static ssize_t node_desc_store(struct device *device, 1235 + struct device_attribute *attr, 1236 + const char *buf, size_t count) 1243 1237 { 1244 1238 struct ib_device *dev = container_of(device, struct ib_device, dev); 1245 1239 struct ib_device_modify desc = {}; ··· 1255 1249 1256 1250 return count; 1257 1251 } 1252 + static DEVICE_ATTR_RW(node_desc); 1258 1253 1259 - static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, 1254 + static ssize_t fw_ver_show(struct device *device, struct device_attribute *attr, 1260 1255 char *buf) 1261 1256 { 1262 1257 struct ib_device *dev = container_of(device, struct ib_device, dev); ··· 1266 1259 strlcat(buf, "\n", IB_FW_VERSION_NAME_MAX); 1267 1260 return strlen(buf); 1268 1261 } 1262 + static DEVICE_ATTR_RO(fw_ver); 1269 1263 1270 - static DEVICE_ATTR(node_type, S_IRUGO, show_node_type, NULL); 1271 - static DEVICE_ATTR(sys_image_guid, S_IRUGO, show_sys_image_guid, NULL); 1272 - static DEVICE_ATTR(node_guid, S_IRUGO, show_node_guid, NULL); 1273 - static DEVICE_ATTR(node_desc, S_IRUGO | S_IWUSR, show_node_desc, set_node_desc); 1274 - static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); 1264 + static struct attribute *ib_dev_attrs[] = { 1265 + &dev_attr_node_type.attr, 1266 + &dev_attr_node_guid.attr, 1267 + &dev_attr_sys_image_guid.attr, 1268 + &dev_attr_fw_ver.attr, 1269 + &dev_attr_node_desc.attr, 1270 + NULL, 1271 + }; 1275 1272 1276 - static struct device_attribute *ib_class_attributes[] = { 1277 - &dev_attr_node_type, 1278 - &dev_attr_sys_image_guid, 1279 - &dev_attr_node_guid, 1280 - &dev_attr_node_desc, 1281 - &dev_attr_fw_ver, 1273 + static const struct attribute_group dev_attr_group = { 1274 + .attrs = ib_dev_attrs, 1282 1275 }; 1283 1276 1284 1277 static void free_port_list_attributes(struct ib_device *device) ··· 1292 1285 kfree(port->hw_stats); 1293 1286 free_hsag(&port->kobj, port->hw_stats_ag); 1294 1287 } 1295 - sysfs_remove_group(p, port->pma_table); 1288 + 1289 + if (port->pma_table) 1290 + sysfs_remove_group(p, port->pma_table); 1296 1291 sysfs_remove_group(p, &port->pkey_group); 1297 1292 sysfs_remove_group(p, &port->gid_group); 1298 1293 sysfs_remove_group(&port->gid_attr_group->kobj, ··· 1305 1296 kobject_put(p); 1306 1297 } 1307 1298 1308 - kobject_put(device->ports_parent); 1299 + kobject_put(device->ports_kobj); 1309 1300 } 1310 1301 1311 1302 int ib_device_register_sysfs(struct ib_device *device, ··· 1316 1307 int ret; 1317 1308 int i; 1318 1309 1319 - ret = dev_set_name(class_dev, "%s", device->name); 1320 - if (ret) 1321 - return ret; 1310 + device->groups[0] = &dev_attr_group; 1311 + class_dev->groups = device->groups; 1322 1312 1323 1313 ret = device_add(class_dev); 1324 1314 if (ret) 1325 1315 goto err; 1326 1316 1327 - for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) { 1328 - ret = device_create_file(class_dev, ib_class_attributes[i]); 1329 - if (ret) 1330 - goto err_unregister; 1331 - } 1332 - 1333 - device->ports_parent = kobject_create_and_add("ports", 1334 - &class_dev->kobj); 1335 - if (!device->ports_parent) { 1317 + device->ports_kobj = kobject_create_and_add("ports", &class_dev->kobj); 1318 + if (!device->ports_kobj) { 1336 1319 ret = -ENOMEM; 1337 1320 goto err_put; 1338 1321 } ··· 1348 1347 1349 1348 err_put: 1350 1349 free_port_list_attributes(device); 1351 - 1352 - err_unregister: 1353 1350 device_del(class_dev); 1354 - 1355 1351 err: 1356 1352 return ret; 1357 1353 } 1358 1354 1359 1355 void ib_device_unregister_sysfs(struct ib_device *device) 1360 1356 { 1361 - int i; 1362 - 1363 - /* Hold kobject until ib_dealloc_device() */ 1364 - kobject_get(&device->dev.kobj); 1357 + /* Hold device until ib_dealloc_device() */ 1358 + get_device(&device->dev); 1365 1359 1366 1360 free_port_list_attributes(device); 1367 1361 ··· 1364 1368 kfree(device->hw_stats); 1365 1369 free_hsag(&device->dev.kobj, device->hw_stats_ag); 1366 1370 } 1367 - 1368 - for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) 1369 - device_remove_file(&device->dev, ib_class_attributes[i]); 1370 1371 1371 1372 device_unregister(&device->dev); 1372 1373 }
+66 -59
drivers/infiniband/core/umem.c
··· 85 85 struct page **page_list; 86 86 struct vm_area_struct **vma_list; 87 87 unsigned long lock_limit; 88 + unsigned long new_pinned; 88 89 unsigned long cur_base; 90 + struct mm_struct *mm; 89 91 unsigned long npages; 90 92 int ret; 91 93 int i; ··· 109 107 if (!can_do_mlock()) 110 108 return ERR_PTR(-EPERM); 111 109 112 - umem = kzalloc(sizeof *umem, GFP_KERNEL); 113 - if (!umem) 114 - return ERR_PTR(-ENOMEM); 110 + if (access & IB_ACCESS_ON_DEMAND) { 111 + umem = kzalloc(sizeof(struct ib_umem_odp), GFP_KERNEL); 112 + if (!umem) 113 + return ERR_PTR(-ENOMEM); 114 + umem->is_odp = 1; 115 + } else { 116 + umem = kzalloc(sizeof(*umem), GFP_KERNEL); 117 + if (!umem) 118 + return ERR_PTR(-ENOMEM); 119 + } 115 120 116 121 umem->context = context; 117 122 umem->length = size; 118 123 umem->address = addr; 119 124 umem->page_shift = PAGE_SHIFT; 120 125 umem->writable = ib_access_writable(access); 126 + umem->owning_mm = mm = current->mm; 127 + mmgrab(mm); 121 128 122 129 if (access & IB_ACCESS_ON_DEMAND) { 123 - ret = ib_umem_odp_get(context, umem, access); 130 + ret = ib_umem_odp_get(to_ib_umem_odp(umem), access); 124 131 if (ret) 125 132 goto umem_kfree; 126 133 return umem; 127 134 } 128 - 129 - umem->odp_data = NULL; 130 135 131 136 /* We assume the memory is from hugetlb until proved otherwise */ 132 137 umem->hugetlb = 1; ··· 153 144 umem->hugetlb = 0; 154 145 155 146 npages = ib_umem_num_pages(umem); 147 + if (npages == 0 || npages > UINT_MAX) { 148 + ret = -EINVAL; 149 + goto out; 150 + } 156 151 157 152 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 158 153 159 - down_write(&current->mm->mmap_sem); 160 - current->mm->pinned_vm += npages; 161 - if ((current->mm->pinned_vm > lock_limit) && !capable(CAP_IPC_LOCK)) { 162 - up_write(&current->mm->mmap_sem); 154 + down_write(&mm->mmap_sem); 155 + if (check_add_overflow(mm->pinned_vm, npages, &new_pinned) || 156 + (new_pinned > lock_limit && !capable(CAP_IPC_LOCK))) { 157 + up_write(&mm->mmap_sem); 163 158 ret = -ENOMEM; 164 - goto vma; 159 + goto out; 165 160 } 166 - up_write(&current->mm->mmap_sem); 161 + mm->pinned_vm = new_pinned; 162 + up_write(&mm->mmap_sem); 167 163 168 164 cur_base = addr & PAGE_MASK; 169 - 170 - if (npages == 0 || npages > UINT_MAX) { 171 - ret = -EINVAL; 172 - goto vma; 173 - } 174 165 175 166 ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL); 176 167 if (ret) ··· 181 172 182 173 sg_list_start = umem->sg_head.sgl; 183 174 184 - down_read(&current->mm->mmap_sem); 185 175 while (npages) { 176 + down_read(&mm->mmap_sem); 186 177 ret = get_user_pages_longterm(cur_base, 187 178 min_t(unsigned long, npages, 188 179 PAGE_SIZE / sizeof (struct page *)), 189 180 gup_flags, page_list, vma_list); 190 181 if (ret < 0) { 191 - up_read(&current->mm->mmap_sem); 182 + up_read(&mm->mmap_sem); 192 183 goto umem_release; 193 184 } 194 185 ··· 196 187 cur_base += ret * PAGE_SIZE; 197 188 npages -= ret; 198 189 190 + /* Continue to hold the mmap_sem as vma_list access 191 + * needs to be protected. 192 + */ 199 193 for_each_sg(sg_list_start, sg, ret, i) { 200 194 if (vma_list && !is_vm_hugetlb_page(vma_list[i])) 201 195 umem->hugetlb = 0; 202 196 203 197 sg_set_page(sg, page_list[i], PAGE_SIZE, 0); 204 198 } 199 + up_read(&mm->mmap_sem); 205 200 206 201 /* preparing for next loop */ 207 202 sg_list_start = sg; 208 203 } 209 - up_read(&current->mm->mmap_sem); 210 204 211 205 umem->nmap = ib_dma_map_sg_attrs(context->device, 212 206 umem->sg_head.sgl, ··· 228 216 umem_release: 229 217 __ib_umem_release(context->device, umem, 0); 230 218 vma: 231 - down_write(&current->mm->mmap_sem); 232 - current->mm->pinned_vm -= ib_umem_num_pages(umem); 233 - up_write(&current->mm->mmap_sem); 219 + down_write(&mm->mmap_sem); 220 + mm->pinned_vm -= ib_umem_num_pages(umem); 221 + up_write(&mm->mmap_sem); 234 222 out: 235 223 if (vma_list) 236 224 free_page((unsigned long) vma_list); 237 225 free_page((unsigned long) page_list); 238 226 umem_kfree: 239 - if (ret) 227 + if (ret) { 228 + mmdrop(umem->owning_mm); 240 229 kfree(umem); 230 + } 241 231 return ret ? ERR_PTR(ret) : umem; 242 232 } 243 233 EXPORT_SYMBOL(ib_umem_get); 244 234 245 - static void ib_umem_account(struct work_struct *work) 235 + static void __ib_umem_release_tail(struct ib_umem *umem) 236 + { 237 + mmdrop(umem->owning_mm); 238 + if (umem->is_odp) 239 + kfree(to_ib_umem_odp(umem)); 240 + else 241 + kfree(umem); 242 + } 243 + 244 + static void ib_umem_release_defer(struct work_struct *work) 246 245 { 247 246 struct ib_umem *umem = container_of(work, struct ib_umem, work); 248 247 249 - down_write(&umem->mm->mmap_sem); 250 - umem->mm->pinned_vm -= umem->diff; 251 - up_write(&umem->mm->mmap_sem); 252 - mmput(umem->mm); 253 - kfree(umem); 248 + down_write(&umem->owning_mm->mmap_sem); 249 + umem->owning_mm->pinned_vm -= ib_umem_num_pages(umem); 250 + up_write(&umem->owning_mm->mmap_sem); 251 + 252 + __ib_umem_release_tail(umem); 254 253 } 255 254 256 255 /** ··· 271 248 void ib_umem_release(struct ib_umem *umem) 272 249 { 273 250 struct ib_ucontext *context = umem->context; 274 - struct mm_struct *mm; 275 - struct task_struct *task; 276 - unsigned long diff; 277 251 278 - if (umem->odp_data) { 279 - ib_umem_odp_release(umem); 252 + if (umem->is_odp) { 253 + ib_umem_odp_release(to_ib_umem_odp(umem)); 254 + __ib_umem_release_tail(umem); 280 255 return; 281 256 } 282 257 283 258 __ib_umem_release(umem->context->device, umem, 1); 284 - 285 - task = get_pid_task(umem->context->tgid, PIDTYPE_PID); 286 - if (!task) 287 - goto out; 288 - mm = get_task_mm(task); 289 - put_task_struct(task); 290 - if (!mm) 291 - goto out; 292 - 293 - diff = ib_umem_num_pages(umem); 294 259 295 260 /* 296 261 * We may be called with the mm's mmap_sem already held. This ··· 286 275 * the last reference to our file and calls our release 287 276 * method. If there are memory regions to destroy, we'll end 288 277 * up here and not be able to take the mmap_sem. In that case 289 - * we defer the vm_locked accounting to the system workqueue. 278 + * we defer the vm_locked accounting a workqueue. 290 279 */ 291 280 if (context->closing) { 292 - if (!down_write_trylock(&mm->mmap_sem)) { 293 - INIT_WORK(&umem->work, ib_umem_account); 294 - umem->mm = mm; 295 - umem->diff = diff; 296 - 281 + if (!down_write_trylock(&umem->owning_mm->mmap_sem)) { 282 + INIT_WORK(&umem->work, ib_umem_release_defer); 297 283 queue_work(ib_wq, &umem->work); 298 284 return; 299 285 } 300 - } else 301 - down_write(&mm->mmap_sem); 286 + } else { 287 + down_write(&umem->owning_mm->mmap_sem); 288 + } 289 + umem->owning_mm->pinned_vm -= ib_umem_num_pages(umem); 290 + up_write(&umem->owning_mm->mmap_sem); 302 291 303 - mm->pinned_vm -= diff; 304 - up_write(&mm->mmap_sem); 305 - mmput(mm); 306 - out: 307 - kfree(umem); 292 + __ib_umem_release_tail(umem); 308 293 } 309 294 EXPORT_SYMBOL(ib_umem_release); 310 295 ··· 310 303 int n; 311 304 struct scatterlist *sg; 312 305 313 - if (umem->odp_data) 306 + if (umem->is_odp) 314 307 return ib_umem_num_pages(umem); 315 308 316 309 n = 0;
+285 -334
drivers/infiniband/core/umem_odp.c
··· 58 58 struct ib_umem_odp *umem_odp = 59 59 container_of(n, struct ib_umem_odp, interval_tree); 60 60 61 - return ib_umem_start(umem_odp->umem); 61 + return ib_umem_start(&umem_odp->umem); 62 62 } 63 63 64 64 /* Note that the representation of the intervals in the interval tree ··· 71 71 struct ib_umem_odp *umem_odp = 72 72 container_of(n, struct ib_umem_odp, interval_tree); 73 73 74 - return ib_umem_end(umem_odp->umem) - 1; 74 + return ib_umem_end(&umem_odp->umem) - 1; 75 75 } 76 76 77 77 INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last, 78 78 node_start, node_last, static, rbt_ib_umem) 79 79 80 - static void ib_umem_notifier_start_account(struct ib_umem *item) 80 + static void ib_umem_notifier_start_account(struct ib_umem_odp *umem_odp) 81 81 { 82 - mutex_lock(&item->odp_data->umem_mutex); 83 - 84 - /* Only update private counters for this umem if it has them. 85 - * Otherwise skip it. All page faults will be delayed for this umem. */ 86 - if (item->odp_data->mn_counters_active) { 87 - int notifiers_count = item->odp_data->notifiers_count++; 88 - 89 - if (notifiers_count == 0) 90 - /* Initialize the completion object for waiting on 91 - * notifiers. Since notifier_count is zero, no one 92 - * should be waiting right now. */ 93 - reinit_completion(&item->odp_data->notifier_completion); 94 - } 95 - mutex_unlock(&item->odp_data->umem_mutex); 96 - } 97 - 98 - static void ib_umem_notifier_end_account(struct ib_umem *item) 99 - { 100 - mutex_lock(&item->odp_data->umem_mutex); 101 - 102 - /* Only update private counters for this umem if it has them. 103 - * Otherwise skip it. All page faults will be delayed for this umem. */ 104 - if (item->odp_data->mn_counters_active) { 82 + mutex_lock(&umem_odp->umem_mutex); 83 + if (umem_odp->notifiers_count++ == 0) 105 84 /* 106 - * This sequence increase will notify the QP page fault that 107 - * the page that is going to be mapped in the spte could have 108 - * been freed. 85 + * Initialize the completion object for waiting on 86 + * notifiers. Since notifier_count is zero, no one should be 87 + * waiting right now. 109 88 */ 110 - ++item->odp_data->notifiers_seq; 111 - if (--item->odp_data->notifiers_count == 0) 112 - complete_all(&item->odp_data->notifier_completion); 113 - } 114 - mutex_unlock(&item->odp_data->umem_mutex); 89 + reinit_completion(&umem_odp->notifier_completion); 90 + mutex_unlock(&umem_odp->umem_mutex); 115 91 } 116 92 117 - /* Account for a new mmu notifier in an ib_ucontext. */ 118 - static void ib_ucontext_notifier_start_account(struct ib_ucontext *context) 93 + static void ib_umem_notifier_end_account(struct ib_umem_odp *umem_odp) 119 94 { 120 - atomic_inc(&context->notifier_count); 95 + mutex_lock(&umem_odp->umem_mutex); 96 + /* 97 + * This sequence increase will notify the QP page fault that the page 98 + * that is going to be mapped in the spte could have been freed. 99 + */ 100 + ++umem_odp->notifiers_seq; 101 + if (--umem_odp->notifiers_count == 0) 102 + complete_all(&umem_odp->notifier_completion); 103 + mutex_unlock(&umem_odp->umem_mutex); 121 104 } 122 105 123 - /* Account for a terminating mmu notifier in an ib_ucontext. 124 - * 125 - * Must be called with the ib_ucontext->umem_rwsem semaphore unlocked, since 126 - * the function takes the semaphore itself. */ 127 - static void ib_ucontext_notifier_end_account(struct ib_ucontext *context) 106 + static int ib_umem_notifier_release_trampoline(struct ib_umem_odp *umem_odp, 107 + u64 start, u64 end, void *cookie) 128 108 { 129 - int zero_notifiers = atomic_dec_and_test(&context->notifier_count); 109 + struct ib_umem *umem = &umem_odp->umem; 130 110 131 - if (zero_notifiers && 132 - !list_empty(&context->no_private_counters)) { 133 - /* No currently running mmu notifiers. Now is the chance to 134 - * add private accounting to all previously added umems. */ 135 - struct ib_umem_odp *odp_data, *next; 136 - 137 - /* Prevent concurrent mmu notifiers from working on the 138 - * no_private_counters list. */ 139 - down_write(&context->umem_rwsem); 140 - 141 - /* Read the notifier_count again, with the umem_rwsem 142 - * semaphore taken for write. */ 143 - if (!atomic_read(&context->notifier_count)) { 144 - list_for_each_entry_safe(odp_data, next, 145 - &context->no_private_counters, 146 - no_private_counters) { 147 - mutex_lock(&odp_data->umem_mutex); 148 - odp_data->mn_counters_active = true; 149 - list_del(&odp_data->no_private_counters); 150 - complete_all(&odp_data->notifier_completion); 151 - mutex_unlock(&odp_data->umem_mutex); 152 - } 153 - } 154 - 155 - up_write(&context->umem_rwsem); 156 - } 157 - } 158 - 159 - static int ib_umem_notifier_release_trampoline(struct ib_umem *item, u64 start, 160 - u64 end, void *cookie) { 161 111 /* 162 112 * Increase the number of notifiers running, to 163 113 * prevent any further fault handling on this MR. 164 114 */ 165 - ib_umem_notifier_start_account(item); 166 - item->odp_data->dying = 1; 115 + ib_umem_notifier_start_account(umem_odp); 116 + umem_odp->dying = 1; 167 117 /* Make sure that the fact the umem is dying is out before we release 168 118 * all pending page faults. */ 169 119 smp_wmb(); 170 - complete_all(&item->odp_data->notifier_completion); 171 - item->context->invalidate_range(item, ib_umem_start(item), 172 - ib_umem_end(item)); 120 + complete_all(&umem_odp->notifier_completion); 121 + umem->context->invalidate_range(umem_odp, ib_umem_start(umem), 122 + ib_umem_end(umem)); 173 123 return 0; 174 124 } 175 125 176 126 static void ib_umem_notifier_release(struct mmu_notifier *mn, 177 127 struct mm_struct *mm) 178 128 { 179 - struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn); 129 + struct ib_ucontext_per_mm *per_mm = 130 + container_of(mn, struct ib_ucontext_per_mm, mn); 180 131 181 - if (!context->invalidate_range) 182 - return; 183 - 184 - ib_ucontext_notifier_start_account(context); 185 - down_read(&context->umem_rwsem); 186 - rbt_ib_umem_for_each_in_range(&context->umem_tree, 0, 187 - ULLONG_MAX, 188 - ib_umem_notifier_release_trampoline, 189 - true, 190 - NULL); 191 - up_read(&context->umem_rwsem); 132 + down_read(&per_mm->umem_rwsem); 133 + if (per_mm->active) 134 + rbt_ib_umem_for_each_in_range( 135 + &per_mm->umem_tree, 0, ULLONG_MAX, 136 + ib_umem_notifier_release_trampoline, true, NULL); 137 + up_read(&per_mm->umem_rwsem); 192 138 } 193 139 194 - static int invalidate_page_trampoline(struct ib_umem *item, u64 start, 140 + static int invalidate_page_trampoline(struct ib_umem_odp *item, u64 start, 195 141 u64 end, void *cookie) 196 142 { 197 143 ib_umem_notifier_start_account(item); 198 - item->context->invalidate_range(item, start, start + PAGE_SIZE); 144 + item->umem.context->invalidate_range(item, start, start + PAGE_SIZE); 199 145 ib_umem_notifier_end_account(item); 200 146 return 0; 201 147 } 202 148 203 - static int invalidate_range_start_trampoline(struct ib_umem *item, u64 start, 204 - u64 end, void *cookie) 149 + static int invalidate_range_start_trampoline(struct ib_umem_odp *item, 150 + u64 start, u64 end, void *cookie) 205 151 { 206 152 ib_umem_notifier_start_account(item); 207 - item->context->invalidate_range(item, start, end); 153 + item->umem.context->invalidate_range(item, start, end); 208 154 return 0; 209 155 } 210 156 ··· 160 214 unsigned long end, 161 215 bool blockable) 162 216 { 163 - struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn); 164 - int ret; 165 - 166 - if (!context->invalidate_range) 167 - return 0; 217 + struct ib_ucontext_per_mm *per_mm = 218 + container_of(mn, struct ib_ucontext_per_mm, mn); 168 219 169 220 if (blockable) 170 - down_read(&context->umem_rwsem); 171 - else if (!down_read_trylock(&context->umem_rwsem)) 221 + down_read(&per_mm->umem_rwsem); 222 + else if (!down_read_trylock(&per_mm->umem_rwsem)) 172 223 return -EAGAIN; 173 224 174 - ib_ucontext_notifier_start_account(context); 175 - ret = rbt_ib_umem_for_each_in_range(&context->umem_tree, start, 176 - end, 177 - invalidate_range_start_trampoline, 178 - blockable, NULL); 179 - up_read(&context->umem_rwsem); 225 + if (!per_mm->active) { 226 + up_read(&per_mm->umem_rwsem); 227 + /* 228 + * At this point active is permanently set and visible to this 229 + * CPU without a lock, that fact is relied on to skip the unlock 230 + * in range_end. 231 + */ 232 + return 0; 233 + } 180 234 181 - return ret; 235 + return rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, start, end, 236 + invalidate_range_start_trampoline, 237 + blockable, NULL); 182 238 } 183 239 184 - static int invalidate_range_end_trampoline(struct ib_umem *item, u64 start, 240 + static int invalidate_range_end_trampoline(struct ib_umem_odp *item, u64 start, 185 241 u64 end, void *cookie) 186 242 { 187 243 ib_umem_notifier_end_account(item); ··· 195 247 unsigned long start, 196 248 unsigned long end) 197 249 { 198 - struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn); 250 + struct ib_ucontext_per_mm *per_mm = 251 + container_of(mn, struct ib_ucontext_per_mm, mn); 199 252 200 - if (!context->invalidate_range) 253 + if (unlikely(!per_mm->active)) 201 254 return; 202 255 203 - /* 204 - * TODO: we currently bail out if there is any sleepable work to be done 205 - * in ib_umem_notifier_invalidate_range_start so we shouldn't really block 206 - * here. But this is ugly and fragile. 207 - */ 208 - down_read(&context->umem_rwsem); 209 - rbt_ib_umem_for_each_in_range(&context->umem_tree, start, 256 + rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, start, 210 257 end, 211 258 invalidate_range_end_trampoline, true, NULL); 212 - up_read(&context->umem_rwsem); 213 - ib_ucontext_notifier_end_account(context); 259 + up_read(&per_mm->umem_rwsem); 214 260 } 215 261 216 262 static const struct mmu_notifier_ops ib_umem_notifiers = { ··· 213 271 .invalidate_range_end = ib_umem_notifier_invalidate_range_end, 214 272 }; 215 273 216 - struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context, 217 - unsigned long addr, 218 - size_t size) 274 + static void add_umem_to_per_mm(struct ib_umem_odp *umem_odp) 219 275 { 220 - struct ib_umem *umem; 276 + struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm; 277 + struct ib_umem *umem = &umem_odp->umem; 278 + 279 + down_write(&per_mm->umem_rwsem); 280 + if (likely(ib_umem_start(umem) != ib_umem_end(umem))) 281 + rbt_ib_umem_insert(&umem_odp->interval_tree, 282 + &per_mm->umem_tree); 283 + up_write(&per_mm->umem_rwsem); 284 + } 285 + 286 + static void remove_umem_from_per_mm(struct ib_umem_odp *umem_odp) 287 + { 288 + struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm; 289 + struct ib_umem *umem = &umem_odp->umem; 290 + 291 + down_write(&per_mm->umem_rwsem); 292 + if (likely(ib_umem_start(umem) != ib_umem_end(umem))) 293 + rbt_ib_umem_remove(&umem_odp->interval_tree, 294 + &per_mm->umem_tree); 295 + complete_all(&umem_odp->notifier_completion); 296 + 297 + up_write(&per_mm->umem_rwsem); 298 + } 299 + 300 + static struct ib_ucontext_per_mm *alloc_per_mm(struct ib_ucontext *ctx, 301 + struct mm_struct *mm) 302 + { 303 + struct ib_ucontext_per_mm *per_mm; 304 + int ret; 305 + 306 + per_mm = kzalloc(sizeof(*per_mm), GFP_KERNEL); 307 + if (!per_mm) 308 + return ERR_PTR(-ENOMEM); 309 + 310 + per_mm->context = ctx; 311 + per_mm->mm = mm; 312 + per_mm->umem_tree = RB_ROOT_CACHED; 313 + init_rwsem(&per_mm->umem_rwsem); 314 + per_mm->active = ctx->invalidate_range; 315 + 316 + rcu_read_lock(); 317 + per_mm->tgid = get_task_pid(current->group_leader, PIDTYPE_PID); 318 + rcu_read_unlock(); 319 + 320 + WARN_ON(mm != current->mm); 321 + 322 + per_mm->mn.ops = &ib_umem_notifiers; 323 + ret = mmu_notifier_register(&per_mm->mn, per_mm->mm); 324 + if (ret) { 325 + dev_err(&ctx->device->dev, 326 + "Failed to register mmu_notifier %d\n", ret); 327 + goto out_pid; 328 + } 329 + 330 + list_add(&per_mm->ucontext_list, &ctx->per_mm_list); 331 + return per_mm; 332 + 333 + out_pid: 334 + put_pid(per_mm->tgid); 335 + kfree(per_mm); 336 + return ERR_PTR(ret); 337 + } 338 + 339 + static int get_per_mm(struct ib_umem_odp *umem_odp) 340 + { 341 + struct ib_ucontext *ctx = umem_odp->umem.context; 342 + struct ib_ucontext_per_mm *per_mm; 343 + 344 + /* 345 + * Generally speaking we expect only one or two per_mm in this list, 346 + * so no reason to optimize this search today. 347 + */ 348 + mutex_lock(&ctx->per_mm_list_lock); 349 + list_for_each_entry(per_mm, &ctx->per_mm_list, ucontext_list) { 350 + if (per_mm->mm == umem_odp->umem.owning_mm) 351 + goto found; 352 + } 353 + 354 + per_mm = alloc_per_mm(ctx, umem_odp->umem.owning_mm); 355 + if (IS_ERR(per_mm)) { 356 + mutex_unlock(&ctx->per_mm_list_lock); 357 + return PTR_ERR(per_mm); 358 + } 359 + 360 + found: 361 + umem_odp->per_mm = per_mm; 362 + per_mm->odp_mrs_count++; 363 + mutex_unlock(&ctx->per_mm_list_lock); 364 + 365 + return 0; 366 + } 367 + 368 + static void free_per_mm(struct rcu_head *rcu) 369 + { 370 + kfree(container_of(rcu, struct ib_ucontext_per_mm, rcu)); 371 + } 372 + 373 + void put_per_mm(struct ib_umem_odp *umem_odp) 374 + { 375 + struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm; 376 + struct ib_ucontext *ctx = umem_odp->umem.context; 377 + bool need_free; 378 + 379 + mutex_lock(&ctx->per_mm_list_lock); 380 + umem_odp->per_mm = NULL; 381 + per_mm->odp_mrs_count--; 382 + need_free = per_mm->odp_mrs_count == 0; 383 + if (need_free) 384 + list_del(&per_mm->ucontext_list); 385 + mutex_unlock(&ctx->per_mm_list_lock); 386 + 387 + if (!need_free) 388 + return; 389 + 390 + /* 391 + * NOTE! mmu_notifier_unregister() can happen between a start/end 392 + * callback, resulting in an start/end, and thus an unbalanced 393 + * lock. This doesn't really matter to us since we are about to kfree 394 + * the memory that holds the lock, however LOCKDEP doesn't like this. 395 + */ 396 + down_write(&per_mm->umem_rwsem); 397 + per_mm->active = false; 398 + up_write(&per_mm->umem_rwsem); 399 + 400 + WARN_ON(!RB_EMPTY_ROOT(&per_mm->umem_tree.rb_root)); 401 + mmu_notifier_unregister_no_release(&per_mm->mn, per_mm->mm); 402 + put_pid(per_mm->tgid); 403 + mmu_notifier_call_srcu(&per_mm->rcu, free_per_mm); 404 + } 405 + 406 + struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext_per_mm *per_mm, 407 + unsigned long addr, size_t size) 408 + { 409 + struct ib_ucontext *ctx = per_mm->context; 221 410 struct ib_umem_odp *odp_data; 411 + struct ib_umem *umem; 222 412 int pages = size >> PAGE_SHIFT; 223 413 int ret; 224 414 225 - umem = kzalloc(sizeof(*umem), GFP_KERNEL); 226 - if (!umem) 415 + odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL); 416 + if (!odp_data) 227 417 return ERR_PTR(-ENOMEM); 228 - 229 - umem->context = context; 418 + umem = &odp_data->umem; 419 + umem->context = ctx; 230 420 umem->length = size; 231 421 umem->address = addr; 232 422 umem->page_shift = PAGE_SHIFT; 233 423 umem->writable = 1; 234 - 235 - odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL); 236 - if (!odp_data) { 237 - ret = -ENOMEM; 238 - goto out_umem; 239 - } 240 - odp_data->umem = umem; 424 + umem->is_odp = 1; 425 + odp_data->per_mm = per_mm; 241 426 242 427 mutex_init(&odp_data->umem_mutex); 243 428 init_completion(&odp_data->notifier_completion); ··· 383 314 goto out_page_list; 384 315 } 385 316 386 - down_write(&context->umem_rwsem); 387 - context->odp_mrs_count++; 388 - rbt_ib_umem_insert(&odp_data->interval_tree, &context->umem_tree); 389 - if (likely(!atomic_read(&context->notifier_count))) 390 - odp_data->mn_counters_active = true; 391 - else 392 - list_add(&odp_data->no_private_counters, 393 - &context->no_private_counters); 394 - up_write(&context->umem_rwsem); 317 + /* 318 + * Caller must ensure that the umem_odp that the per_mm came from 319 + * cannot be freed during the call to ib_alloc_odp_umem. 320 + */ 321 + mutex_lock(&ctx->per_mm_list_lock); 322 + per_mm->odp_mrs_count++; 323 + mutex_unlock(&ctx->per_mm_list_lock); 324 + add_umem_to_per_mm(odp_data); 395 325 396 - umem->odp_data = odp_data; 397 - 398 - return umem; 326 + return odp_data; 399 327 400 328 out_page_list: 401 329 vfree(odp_data->page_list); 402 330 out_odp_data: 403 331 kfree(odp_data); 404 - out_umem: 405 - kfree(umem); 406 332 return ERR_PTR(ret); 407 333 } 408 334 EXPORT_SYMBOL(ib_alloc_odp_umem); 409 335 410 - int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem, 411 - int access) 336 + int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access) 412 337 { 338 + struct ib_umem *umem = &umem_odp->umem; 339 + /* 340 + * NOTE: This must called in a process context where umem->owning_mm 341 + * == current->mm 342 + */ 343 + struct mm_struct *mm = umem->owning_mm; 413 344 int ret_val; 414 - struct pid *our_pid; 415 - struct mm_struct *mm = get_task_mm(current); 416 - 417 - if (!mm) 418 - return -EINVAL; 419 345 420 346 if (access & IB_ACCESS_HUGETLB) { 421 347 struct vm_area_struct *vma; ··· 430 366 umem->hugetlb = 0; 431 367 } 432 368 433 - /* Prevent creating ODP MRs in child processes */ 434 - rcu_read_lock(); 435 - our_pid = get_task_pid(current->group_leader, PIDTYPE_PID); 436 - rcu_read_unlock(); 437 - put_pid(our_pid); 438 - if (context->tgid != our_pid) { 439 - ret_val = -EINVAL; 440 - goto out_mm; 441 - } 369 + mutex_init(&umem_odp->umem_mutex); 442 370 443 - umem->odp_data = kzalloc(sizeof(*umem->odp_data), GFP_KERNEL); 444 - if (!umem->odp_data) { 445 - ret_val = -ENOMEM; 446 - goto out_mm; 447 - } 448 - umem->odp_data->umem = umem; 449 - 450 - mutex_init(&umem->odp_data->umem_mutex); 451 - 452 - init_completion(&umem->odp_data->notifier_completion); 371 + init_completion(&umem_odp->notifier_completion); 453 372 454 373 if (ib_umem_num_pages(umem)) { 455 - umem->odp_data->page_list = 456 - vzalloc(array_size(sizeof(*umem->odp_data->page_list), 374 + umem_odp->page_list = 375 + vzalloc(array_size(sizeof(*umem_odp->page_list), 457 376 ib_umem_num_pages(umem))); 458 - if (!umem->odp_data->page_list) { 459 - ret_val = -ENOMEM; 460 - goto out_odp_data; 461 - } 377 + if (!umem_odp->page_list) 378 + return -ENOMEM; 462 379 463 - umem->odp_data->dma_list = 464 - vzalloc(array_size(sizeof(*umem->odp_data->dma_list), 380 + umem_odp->dma_list = 381 + vzalloc(array_size(sizeof(*umem_odp->dma_list), 465 382 ib_umem_num_pages(umem))); 466 - if (!umem->odp_data->dma_list) { 383 + if (!umem_odp->dma_list) { 467 384 ret_val = -ENOMEM; 468 385 goto out_page_list; 469 386 } 470 387 } 471 388 472 - /* 473 - * When using MMU notifiers, we will get a 474 - * notification before the "current" task (and MM) is 475 - * destroyed. We use the umem_rwsem semaphore to synchronize. 476 - */ 477 - down_write(&context->umem_rwsem); 478 - context->odp_mrs_count++; 479 - if (likely(ib_umem_start(umem) != ib_umem_end(umem))) 480 - rbt_ib_umem_insert(&umem->odp_data->interval_tree, 481 - &context->umem_tree); 482 - if (likely(!atomic_read(&context->notifier_count)) || 483 - context->odp_mrs_count == 1) 484 - umem->odp_data->mn_counters_active = true; 485 - else 486 - list_add(&umem->odp_data->no_private_counters, 487 - &context->no_private_counters); 488 - downgrade_write(&context->umem_rwsem); 389 + ret_val = get_per_mm(umem_odp); 390 + if (ret_val) 391 + goto out_dma_list; 392 + add_umem_to_per_mm(umem_odp); 489 393 490 - if (context->odp_mrs_count == 1) { 491 - /* 492 - * Note that at this point, no MMU notifier is running 493 - * for this context! 494 - */ 495 - atomic_set(&context->notifier_count, 0); 496 - INIT_HLIST_NODE(&context->mn.hlist); 497 - context->mn.ops = &ib_umem_notifiers; 498 - /* 499 - * Lock-dep detects a false positive for mmap_sem vs. 500 - * umem_rwsem, due to not grasping downgrade_write correctly. 501 - */ 502 - lockdep_off(); 503 - ret_val = mmu_notifier_register(&context->mn, mm); 504 - lockdep_on(); 505 - if (ret_val) { 506 - pr_err("Failed to register mmu_notifier %d\n", ret_val); 507 - ret_val = -EBUSY; 508 - goto out_mutex; 509 - } 510 - } 511 - 512 - up_read(&context->umem_rwsem); 513 - 514 - /* 515 - * Note that doing an mmput can cause a notifier for the relevant mm. 516 - * If the notifier is called while we hold the umem_rwsem, this will 517 - * cause a deadlock. Therefore, we release the reference only after we 518 - * released the semaphore. 519 - */ 520 - mmput(mm); 521 394 return 0; 522 395 523 - out_mutex: 524 - up_read(&context->umem_rwsem); 525 - vfree(umem->odp_data->dma_list); 396 + out_dma_list: 397 + vfree(umem_odp->dma_list); 526 398 out_page_list: 527 - vfree(umem->odp_data->page_list); 528 - out_odp_data: 529 - kfree(umem->odp_data); 530 - out_mm: 531 - mmput(mm); 399 + vfree(umem_odp->page_list); 532 400 return ret_val; 533 401 } 534 402 535 - void ib_umem_odp_release(struct ib_umem *umem) 403 + void ib_umem_odp_release(struct ib_umem_odp *umem_odp) 536 404 { 537 - struct ib_ucontext *context = umem->context; 405 + struct ib_umem *umem = &umem_odp->umem; 538 406 539 407 /* 540 408 * Ensure that no more pages are mapped in the umem. ··· 474 478 * It is the driver's responsibility to ensure, before calling us, 475 479 * that the hardware will not attempt to access the MR any more. 476 480 */ 477 - ib_umem_odp_unmap_dma_pages(umem, ib_umem_start(umem), 481 + ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem), 478 482 ib_umem_end(umem)); 479 483 480 - down_write(&context->umem_rwsem); 481 - if (likely(ib_umem_start(umem) != ib_umem_end(umem))) 482 - rbt_ib_umem_remove(&umem->odp_data->interval_tree, 483 - &context->umem_tree); 484 - context->odp_mrs_count--; 485 - if (!umem->odp_data->mn_counters_active) { 486 - list_del(&umem->odp_data->no_private_counters); 487 - complete_all(&umem->odp_data->notifier_completion); 488 - } 489 - 490 - /* 491 - * Downgrade the lock to a read lock. This ensures that the notifiers 492 - * (who lock the mutex for reading) will be able to finish, and we 493 - * will be able to enventually obtain the mmu notifiers SRCU. Note 494 - * that since we are doing it atomically, no other user could register 495 - * and unregister while we do the check. 496 - */ 497 - downgrade_write(&context->umem_rwsem); 498 - if (!context->odp_mrs_count) { 499 - struct task_struct *owning_process = NULL; 500 - struct mm_struct *owning_mm = NULL; 501 - 502 - owning_process = get_pid_task(context->tgid, 503 - PIDTYPE_PID); 504 - if (owning_process == NULL) 505 - /* 506 - * The process is already dead, notifier were removed 507 - * already. 508 - */ 509 - goto out; 510 - 511 - owning_mm = get_task_mm(owning_process); 512 - if (owning_mm == NULL) 513 - /* 514 - * The process' mm is already dead, notifier were 515 - * removed already. 516 - */ 517 - goto out_put_task; 518 - mmu_notifier_unregister(&context->mn, owning_mm); 519 - 520 - mmput(owning_mm); 521 - 522 - out_put_task: 523 - put_task_struct(owning_process); 524 - } 525 - out: 526 - up_read(&context->umem_rwsem); 527 - 528 - vfree(umem->odp_data->dma_list); 529 - vfree(umem->odp_data->page_list); 530 - kfree(umem->odp_data); 531 - kfree(umem); 484 + remove_umem_from_per_mm(umem_odp); 485 + put_per_mm(umem_odp); 486 + vfree(umem_odp->dma_list); 487 + vfree(umem_odp->page_list); 532 488 } 533 489 534 490 /* ··· 492 544 * @access_mask: access permissions needed for this page. 493 545 * @current_seq: sequence number for synchronization with invalidations. 494 546 * the sequence number is taken from 495 - * umem->odp_data->notifiers_seq. 547 + * umem_odp->notifiers_seq. 496 548 * 497 549 * The function returns -EFAULT if the DMA mapping operation fails. It returns 498 550 * -EAGAIN if a concurrent invalidation prevents us from updating the page. ··· 502 554 * umem. 503 555 */ 504 556 static int ib_umem_odp_map_dma_single_page( 505 - struct ib_umem *umem, 557 + struct ib_umem_odp *umem_odp, 506 558 int page_index, 507 559 struct page *page, 508 560 u64 access_mask, 509 561 unsigned long current_seq) 510 562 { 563 + struct ib_umem *umem = &umem_odp->umem; 511 564 struct ib_device *dev = umem->context->device; 512 565 dma_addr_t dma_addr; 513 566 int stored_page = 0; ··· 520 571 * handle case of a racing notifier. This check also allows us to bail 521 572 * early if we have a notifier running in parallel with us. 522 573 */ 523 - if (ib_umem_mmu_notifier_retry(umem, current_seq)) { 574 + if (ib_umem_mmu_notifier_retry(umem_odp, current_seq)) { 524 575 ret = -EAGAIN; 525 576 goto out; 526 577 } 527 - if (!(umem->odp_data->dma_list[page_index])) { 578 + if (!(umem_odp->dma_list[page_index])) { 528 579 dma_addr = ib_dma_map_page(dev, 529 580 page, 530 581 0, BIT(umem->page_shift), ··· 533 584 ret = -EFAULT; 534 585 goto out; 535 586 } 536 - umem->odp_data->dma_list[page_index] = dma_addr | access_mask; 537 - umem->odp_data->page_list[page_index] = page; 587 + umem_odp->dma_list[page_index] = dma_addr | access_mask; 588 + umem_odp->page_list[page_index] = page; 538 589 umem->npages++; 539 590 stored_page = 1; 540 - } else if (umem->odp_data->page_list[page_index] == page) { 541 - umem->odp_data->dma_list[page_index] |= access_mask; 591 + } else if (umem_odp->page_list[page_index] == page) { 592 + umem_odp->dma_list[page_index] |= access_mask; 542 593 } else { 543 594 pr_err("error: got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n", 544 - umem->odp_data->page_list[page_index], page); 595 + umem_odp->page_list[page_index], page); 545 596 /* Better remove the mapping now, to prevent any further 546 597 * damage. */ 547 598 remove_existing_mapping = 1; ··· 554 605 555 606 if (remove_existing_mapping && umem->context->invalidate_range) { 556 607 invalidate_page_trampoline( 557 - umem, 608 + umem_odp, 558 609 ib_umem_start(umem) + (page_index >> umem->page_shift), 559 610 ib_umem_start(umem) + ((page_index + 1) >> 560 611 umem->page_shift), ··· 570 621 * 571 622 * Pins the range of pages passed in the argument, and maps them to 572 623 * DMA addresses. The DMA addresses of the mapped pages is updated in 573 - * umem->odp_data->dma_list. 624 + * umem_odp->dma_list. 574 625 * 575 626 * Returns the number of pages mapped in success, negative error code 576 627 * for failure. ··· 578 629 * the function from completing its task. 579 630 * An -ENOENT error code indicates that userspace process is being terminated 580 631 * and mm was already destroyed. 581 - * @umem: the umem to map and pin 632 + * @umem_odp: the umem to map and pin 582 633 * @user_virt: the address from which we need to map. 583 634 * @bcnt: the minimal number of bytes to pin and map. The mapping might be 584 635 * bigger due to alignment, and may also be smaller in case of an error ··· 588 639 * range. 589 640 * @current_seq: the MMU notifiers sequance value for synchronization with 590 641 * invalidations. the sequance number is read from 591 - * umem->odp_data->notifiers_seq before calling this function 642 + * umem_odp->notifiers_seq before calling this function 592 643 */ 593 - int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, 594 - u64 access_mask, unsigned long current_seq) 644 + int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, 645 + u64 bcnt, u64 access_mask, 646 + unsigned long current_seq) 595 647 { 648 + struct ib_umem *umem = &umem_odp->umem; 596 649 struct task_struct *owning_process = NULL; 597 - struct mm_struct *owning_mm = NULL; 650 + struct mm_struct *owning_mm = umem_odp->umem.owning_mm; 598 651 struct page **local_page_list = NULL; 599 652 u64 page_mask, off; 600 653 int j, k, ret = 0, start_idx, npages = 0, page_shift; ··· 620 669 user_virt = user_virt & page_mask; 621 670 bcnt += off; /* Charge for the first page offset as well. */ 622 671 623 - owning_process = get_pid_task(umem->context->tgid, PIDTYPE_PID); 624 - if (owning_process == NULL) { 672 + /* 673 + * owning_process is allowed to be NULL, this means somehow the mm is 674 + * existing beyond the lifetime of the originating process.. Presumably 675 + * mmget_not_zero will fail in this case. 676 + */ 677 + owning_process = get_pid_task(umem_odp->per_mm->tgid, PIDTYPE_PID); 678 + if (WARN_ON(!mmget_not_zero(umem_odp->umem.owning_mm))) { 625 679 ret = -EINVAL; 626 - goto out_no_task; 627 - } 628 - 629 - owning_mm = get_task_mm(owning_process); 630 - if (owning_mm == NULL) { 631 - ret = -ENOENT; 632 680 goto out_put_task; 633 681 } 634 682 ··· 659 709 break; 660 710 661 711 bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt); 662 - mutex_lock(&umem->odp_data->umem_mutex); 712 + mutex_lock(&umem_odp->umem_mutex); 663 713 for (j = 0; j < npages; j++, user_virt += PAGE_SIZE) { 664 714 if (user_virt & ~page_mask) { 665 715 p += PAGE_SIZE; ··· 672 722 } 673 723 674 724 ret = ib_umem_odp_map_dma_single_page( 675 - umem, k, local_page_list[j], 725 + umem_odp, k, local_page_list[j], 676 726 access_mask, current_seq); 677 727 if (ret < 0) 678 728 break; ··· 680 730 p = page_to_phys(local_page_list[j]); 681 731 k++; 682 732 } 683 - mutex_unlock(&umem->odp_data->umem_mutex); 733 + mutex_unlock(&umem_odp->umem_mutex); 684 734 685 735 if (ret < 0) { 686 736 /* Release left over pages when handling errors. */ ··· 699 749 700 750 mmput(owning_mm); 701 751 out_put_task: 702 - put_task_struct(owning_process); 703 - out_no_task: 752 + if (owning_process) 753 + put_task_struct(owning_process); 704 754 free_page((unsigned long)local_page_list); 705 755 return ret; 706 756 } 707 757 EXPORT_SYMBOL(ib_umem_odp_map_dma_pages); 708 758 709 - void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt, 759 + void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt, 710 760 u64 bound) 711 761 { 762 + struct ib_umem *umem = &umem_odp->umem; 712 763 int idx; 713 764 u64 addr; 714 765 struct ib_device *dev = umem->context->device; ··· 721 770 * faults from completion. We might be racing with other 722 771 * invalidations, so we must make sure we free each page only 723 772 * once. */ 724 - mutex_lock(&umem->odp_data->umem_mutex); 773 + mutex_lock(&umem_odp->umem_mutex); 725 774 for (addr = virt; addr < bound; addr += BIT(umem->page_shift)) { 726 775 idx = (addr - ib_umem_start(umem)) >> umem->page_shift; 727 - if (umem->odp_data->page_list[idx]) { 728 - struct page *page = umem->odp_data->page_list[idx]; 729 - dma_addr_t dma = umem->odp_data->dma_list[idx]; 776 + if (umem_odp->page_list[idx]) { 777 + struct page *page = umem_odp->page_list[idx]; 778 + dma_addr_t dma = umem_odp->dma_list[idx]; 730 779 dma_addr_t dma_addr = dma & ODP_DMA_ADDR_MASK; 731 780 732 781 WARN_ON(!dma_addr); ··· 749 798 /* on demand pinning support */ 750 799 if (!umem->context->invalidate_range) 751 800 put_page(page); 752 - umem->odp_data->page_list[idx] = NULL; 753 - umem->odp_data->dma_list[idx] = 0; 801 + umem_odp->page_list[idx] = NULL; 802 + umem_odp->dma_list[idx] = 0; 754 803 umem->npages--; 755 804 } 756 805 } 757 - mutex_unlock(&umem->odp_data->umem_mutex); 806 + mutex_unlock(&umem_odp->umem_mutex); 758 807 } 759 808 EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages); 760 809 ··· 781 830 return -EAGAIN; 782 831 next = rbt_ib_umem_iter_next(node, start, last - 1); 783 832 umem = container_of(node, struct ib_umem_odp, interval_tree); 784 - ret_val = cb(umem->umem, start, last, cookie) || ret_val; 833 + ret_val = cb(umem, start, last, cookie) || ret_val; 785 834 } 786 835 787 836 return ret_val;
+6 -7
drivers/infiniband/core/user_mad.c
··· 138 138 static dev_t dynamic_umad_dev; 139 139 static dev_t dynamic_issm_dev; 140 140 141 - static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS); 141 + static DEFINE_IDA(umad_ida); 142 142 143 143 static void ib_umad_add_one(struct ib_device *device); 144 144 static void ib_umad_remove_one(struct ib_device *device, void *client_data); ··· 1132 1132 if (!port) 1133 1133 return -ENODEV; 1134 1134 1135 - return sprintf(buf, "%s\n", port->ib_dev->name); 1135 + return sprintf(buf, "%s\n", dev_name(&port->ib_dev->dev)); 1136 1136 } 1137 1137 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); 1138 1138 ··· 1159 1159 dev_t base_umad; 1160 1160 dev_t base_issm; 1161 1161 1162 - devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS); 1163 - if (devnum >= IB_UMAD_MAX_PORTS) 1162 + devnum = ida_alloc_max(&umad_ida, IB_UMAD_MAX_PORTS - 1, GFP_KERNEL); 1163 + if (devnum < 0) 1164 1164 return -1; 1165 1165 port->dev_num = devnum; 1166 - set_bit(devnum, dev_map); 1167 1166 if (devnum >= IB_UMAD_NUM_FIXED_MINOR) { 1168 1167 base_umad = dynamic_umad_dev + devnum - IB_UMAD_NUM_FIXED_MINOR; 1169 1168 base_issm = dynamic_issm_dev + devnum - IB_UMAD_NUM_FIXED_MINOR; ··· 1226 1227 1227 1228 err_cdev: 1228 1229 cdev_del(&port->cdev); 1229 - clear_bit(devnum, dev_map); 1230 + ida_free(&umad_ida, devnum); 1230 1231 1231 1232 return -1; 1232 1233 } ··· 1260 1261 } 1261 1262 1262 1263 mutex_unlock(&port->file_mutex); 1263 - clear_bit(port->dev_num, dev_map); 1264 + ida_free(&umad_ida, port->dev_num); 1264 1265 } 1265 1266 1266 1267 static void ib_umad_add_one(struct ib_device *device)
+6 -9
drivers/infiniband/core/uverbs.h
··· 100 100 atomic_t refcount; 101 101 int num_comp_vectors; 102 102 struct completion comp; 103 - struct device *dev; 103 + struct device dev; 104 + /* First group for device attributes, NULL terminated array */ 105 + const struct attribute_group *groups[2]; 104 106 struct ib_device __rcu *ib_dev; 105 107 int devnum; 106 108 struct cdev cdev; 107 109 struct rb_root xrcd_tree; 108 110 struct mutex xrcd_tree_mutex; 109 - struct kobject kobj; 110 111 struct srcu_struct disassociate_srcu; 111 112 struct mutex lists_mutex; /* protect lists */ 112 113 struct list_head uverbs_file_list; ··· 147 146 struct ib_event_handler event_handler; 148 147 struct ib_uverbs_async_event_file *async_file; 149 148 struct list_head list; 150 - int is_closed; 151 149 152 150 /* 153 151 * To access the uobjects list hw_destroy_rwsem must be held for write ··· 157 157 struct rw_semaphore hw_destroy_rwsem; 158 158 spinlock_t uobjects_lock; 159 159 struct list_head uobjects; 160 + 161 + struct mutex umap_lock; 162 + struct list_head umaps; 160 163 161 164 u64 uverbs_cmd_mask; 162 165 u64 uverbs_ex_cmd_mask; ··· 219 216 struct list_head async_list; 220 217 u32 comp_events_reported; 221 218 u32 async_events_reported; 222 - }; 223 - 224 - struct ib_uflow_resources; 225 - struct ib_uflow_object { 226 - struct ib_uobject uobject; 227 - struct ib_uflow_resources *resources; 228 219 }; 229 220 230 221 extern const struct file_operations uverbs_event_fops;
+12 -31
drivers/infiniband/core/uverbs_cmd.c
··· 117 117 /* ufile is required when some objects are released */ 118 118 ucontext->ufile = file; 119 119 120 - rcu_read_lock(); 121 - ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID); 122 - rcu_read_unlock(); 123 - ucontext->closing = 0; 120 + ucontext->closing = false; 124 121 ucontext->cleanup_retryable = false; 125 122 126 123 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 127 - ucontext->umem_tree = RB_ROOT_CACHED; 128 - init_rwsem(&ucontext->umem_rwsem); 129 - ucontext->odp_mrs_count = 0; 130 - INIT_LIST_HEAD(&ucontext->no_private_counters); 131 - 124 + mutex_init(&ucontext->per_mm_list_lock); 125 + INIT_LIST_HEAD(&ucontext->per_mm_list); 132 126 if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING)) 133 127 ucontext->invalidate_range = NULL; 134 128 ··· 166 172 put_unused_fd(resp.async_fd); 167 173 168 174 err_free: 169 - put_pid(ucontext->tgid); 170 175 ib_dev->dealloc_ucontext(ucontext); 171 176 172 177 err_alloc: ··· 2762 2769 return ret ? ret : in_len; 2763 2770 } 2764 2771 2765 - struct ib_uflow_resources { 2766 - size_t max; 2767 - size_t num; 2768 - size_t collection_num; 2769 - size_t counters_num; 2770 - struct ib_counters **counters; 2771 - struct ib_flow_action **collection; 2772 - }; 2773 - 2774 - static struct ib_uflow_resources *flow_resources_alloc(size_t num_specs) 2772 + struct ib_uflow_resources *flow_resources_alloc(size_t num_specs) 2775 2773 { 2776 2774 struct ib_uflow_resources *resources; 2777 2775 ··· 2792 2808 2793 2809 return NULL; 2794 2810 } 2811 + EXPORT_SYMBOL(flow_resources_alloc); 2795 2812 2796 2813 void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res) 2797 2814 { ··· 2811 2826 kfree(uflow_res->counters); 2812 2827 kfree(uflow_res); 2813 2828 } 2829 + EXPORT_SYMBOL(ib_uverbs_flow_resources_free); 2814 2830 2815 - static void flow_resources_add(struct ib_uflow_resources *uflow_res, 2816 - enum ib_flow_spec_type type, 2817 - void *ibobj) 2831 + void flow_resources_add(struct ib_uflow_resources *uflow_res, 2832 + enum ib_flow_spec_type type, 2833 + void *ibobj) 2818 2834 { 2819 2835 WARN_ON(uflow_res->num >= uflow_res->max); 2820 2836 ··· 2836 2850 2837 2851 uflow_res->num++; 2838 2852 } 2853 + EXPORT_SYMBOL(flow_resources_add); 2839 2854 2840 2855 static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile, 2841 2856 struct ib_uverbs_flow_spec *kern_spec, ··· 3471 3484 struct ib_uverbs_create_flow cmd; 3472 3485 struct ib_uverbs_create_flow_resp resp; 3473 3486 struct ib_uobject *uobj; 3474 - struct ib_uflow_object *uflow; 3475 3487 struct ib_flow *flow_id; 3476 3488 struct ib_uverbs_flow_attr *kern_flow_attr; 3477 3489 struct ib_flow_attr *flow_attr; ··· 3609 3623 err = PTR_ERR(flow_id); 3610 3624 goto err_free; 3611 3625 } 3612 - atomic_inc(&qp->usecnt); 3613 - flow_id->qp = qp; 3614 - flow_id->device = qp->device; 3615 - flow_id->uobject = uobj; 3616 - uobj->object = flow_id; 3617 - uflow = container_of(uobj, typeof(*uflow), uobject); 3618 - uflow->resources = uflow_res; 3626 + 3627 + ib_set_flow(uobj, flow_id, qp, qp->device, uflow_res); 3619 3628 3620 3629 memset(&resp, 0, sizeof(resp)); 3621 3630 resp.flow_handle = uobj->id;
+137 -3
drivers/infiniband/core/uverbs_ioctl.c
··· 57 57 struct ib_uverbs_attr *uattrs; 58 58 59 59 DECLARE_BITMAP(uobj_finalize, UVERBS_API_ATTR_BKEY_LEN); 60 + DECLARE_BITMAP(spec_finalize, UVERBS_API_ATTR_BKEY_LEN); 60 61 61 62 /* 62 63 * Must be last. bundle ends in a flex array which overlaps ··· 142 141 143 142 return !memchr_inv((const void *)&uattr->data + len, 144 143 0, uattr->len - len); 144 + } 145 + 146 + static int uverbs_process_idrs_array(struct bundle_priv *pbundle, 147 + const struct uverbs_api_attr *attr_uapi, 148 + struct uverbs_objs_arr_attr *attr, 149 + struct ib_uverbs_attr *uattr, 150 + u32 attr_bkey) 151 + { 152 + const struct uverbs_attr_spec *spec = &attr_uapi->spec; 153 + size_t array_len; 154 + u32 *idr_vals; 155 + int ret = 0; 156 + size_t i; 157 + 158 + if (uattr->attr_data.reserved) 159 + return -EINVAL; 160 + 161 + if (uattr->len % sizeof(u32)) 162 + return -EINVAL; 163 + 164 + array_len = uattr->len / sizeof(u32); 165 + if (array_len < spec->u2.objs_arr.min_len || 166 + array_len > spec->u2.objs_arr.max_len) 167 + return -EINVAL; 168 + 169 + attr->uobjects = 170 + uverbs_alloc(&pbundle->bundle, 171 + array_size(array_len, sizeof(*attr->uobjects))); 172 + if (IS_ERR(attr->uobjects)) 173 + return PTR_ERR(attr->uobjects); 174 + 175 + /* 176 + * Since idr is 4B and *uobjects is >= 4B, we can use attr->uobjects 177 + * to store idrs array and avoid additional memory allocation. The 178 + * idrs array is offset to the end of the uobjects array so we will be 179 + * able to read idr and replace with a pointer. 180 + */ 181 + idr_vals = (u32 *)(attr->uobjects + array_len) - array_len; 182 + 183 + if (uattr->len > sizeof(uattr->data)) { 184 + ret = copy_from_user(idr_vals, u64_to_user_ptr(uattr->data), 185 + uattr->len); 186 + if (ret) 187 + return -EFAULT; 188 + } else { 189 + memcpy(idr_vals, &uattr->data, uattr->len); 190 + } 191 + 192 + for (i = 0; i != array_len; i++) { 193 + attr->uobjects[i] = uverbs_get_uobject_from_file( 194 + spec->u2.objs_arr.obj_type, pbundle->bundle.ufile, 195 + spec->u2.objs_arr.access, idr_vals[i]); 196 + if (IS_ERR(attr->uobjects[i])) { 197 + ret = PTR_ERR(attr->uobjects[i]); 198 + break; 199 + } 200 + } 201 + 202 + attr->len = i; 203 + __set_bit(attr_bkey, pbundle->spec_finalize); 204 + return ret; 205 + } 206 + 207 + static int uverbs_free_idrs_array(const struct uverbs_api_attr *attr_uapi, 208 + struct uverbs_objs_arr_attr *attr, 209 + bool commit) 210 + { 211 + const struct uverbs_attr_spec *spec = &attr_uapi->spec; 212 + int current_ret; 213 + int ret = 0; 214 + size_t i; 215 + 216 + for (i = 0; i != attr->len; i++) { 217 + current_ret = uverbs_finalize_object( 218 + attr->uobjects[i], spec->u2.objs_arr.access, commit); 219 + if (!ret) 220 + ret = current_ret; 221 + } 222 + 223 + return ret; 145 224 } 146 225 147 226 static int uverbs_process_attr(struct bundle_priv *pbundle, ··· 327 246 } 328 247 329 248 break; 249 + 250 + case UVERBS_ATTR_TYPE_IDRS_ARRAY: 251 + return uverbs_process_idrs_array(pbundle, attr_uapi, 252 + &e->objs_arr_attr, uattr, 253 + attr_bkey); 330 254 default: 331 255 return -EOPNOTSUPP; 332 256 } ··· 386 300 return -EPROTONOSUPPORT; 387 301 return 0; 388 302 } 389 - attr = srcu_dereference( 390 - *slot, &pbundle->bundle.ufile->device->disassociate_srcu); 303 + attr = rcu_dereference_protected(*slot, true); 391 304 392 305 /* Reject duplicate attributes from user-space */ 393 306 if (test_bit(attr_bkey, pbundle->bundle.attr_present)) ··· 469 384 unsigned int i; 470 385 int ret = 0; 471 386 387 + /* fast path for simple uobjects */ 472 388 i = -1; 473 389 while ((i = find_next_bit(pbundle->uobj_finalize, key_bitmap_len, 474 390 i + 1)) < key_bitmap_len) { ··· 481 395 attr->obj_attr.attr_elm->spec.u.obj.access, commit); 482 396 if (!ret) 483 397 ret = current_ret; 398 + } 399 + 400 + i = -1; 401 + while ((i = find_next_bit(pbundle->spec_finalize, key_bitmap_len, 402 + i + 1)) < key_bitmap_len) { 403 + struct uverbs_attr *attr = &pbundle->bundle.attrs[i]; 404 + const struct uverbs_api_attr *attr_uapi; 405 + void __rcu **slot; 406 + int current_ret; 407 + 408 + slot = uapi_get_attr_for_method( 409 + pbundle, 410 + pbundle->method_key | uapi_bkey_to_key_attr(i)); 411 + if (WARN_ON(!slot)) 412 + continue; 413 + 414 + attr_uapi = rcu_dereference_protected(*slot, true); 415 + 416 + if (attr_uapi->spec.type == UVERBS_ATTR_TYPE_IDRS_ARRAY) { 417 + current_ret = uverbs_free_idrs_array( 418 + attr_uapi, &attr->objs_arr_attr, commit); 419 + if (!ret) 420 + ret = current_ret; 421 + } 484 422 } 485 423 486 424 for (memblock = pbundle->allocated_mem; memblock;) { ··· 539 429 uapi_key_ioctl_method(hdr->method_id)); 540 430 if (unlikely(!slot)) 541 431 return -EPROTONOSUPPORT; 542 - method_elm = srcu_dereference(*slot, &ufile->device->disassociate_srcu); 432 + method_elm = rcu_dereference_protected(*slot, true); 543 433 544 434 if (!method_elm->use_stack) { 545 435 pbundle = kmalloc(method_elm->bundle_size, GFP_KERNEL); ··· 571 461 memset(pbundle->bundle.attr_present, 0, 572 462 sizeof(pbundle->bundle.attr_present)); 573 463 memset(pbundle->uobj_finalize, 0, sizeof(pbundle->uobj_finalize)); 464 + memset(pbundle->spec_finalize, 0, sizeof(pbundle->spec_finalize)); 574 465 575 466 ret = ib_uverbs_run_method(pbundle, hdr->num_attrs); 576 467 destroy_ret = bundle_destroy(pbundle, ret == 0); ··· 722 611 return 0; 723 612 } 724 613 EXPORT_SYMBOL(uverbs_copy_to); 614 + 615 + int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle, 616 + size_t idx, s64 lower_bound, u64 upper_bound, 617 + s64 *def_val) 618 + { 619 + const struct uverbs_attr *attr; 620 + 621 + attr = uverbs_attr_get(attrs_bundle, idx); 622 + if (IS_ERR(attr)) { 623 + if ((PTR_ERR(attr) != -ENOENT) || !def_val) 624 + return PTR_ERR(attr); 625 + 626 + *to = *def_val; 627 + } else { 628 + *to = attr->ptr_attr.data; 629 + } 630 + 631 + if (*to < lower_bound || (*to > 0 && (u64)*to > upper_bound)) 632 + return -EINVAL; 633 + 634 + return 0; 635 + } 636 + EXPORT_SYMBOL(_uverbs_get_const);
+275 -63
drivers/infiniband/core/uverbs_main.c
··· 45 45 #include <linux/cdev.h> 46 46 #include <linux/anon_inodes.h> 47 47 #include <linux/slab.h> 48 + #include <linux/sched/mm.h> 48 49 49 50 #include <linux/uaccess.h> 50 51 ··· 73 72 static dev_t dynamic_uverbs_dev; 74 73 static struct class *uverbs_class; 75 74 76 - static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); 75 + static DEFINE_IDA(uverbs_ida); 77 76 78 77 static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, 79 78 const char __user *buf, int in_len, ··· 170 169 return ret; 171 170 } 172 171 173 - static void ib_uverbs_release_dev(struct kobject *kobj) 172 + static void ib_uverbs_release_dev(struct device *device) 174 173 { 175 174 struct ib_uverbs_device *dev = 176 - container_of(kobj, struct ib_uverbs_device, kobj); 175 + container_of(device, struct ib_uverbs_device, dev); 177 176 178 177 uverbs_destroy_api(dev->uapi); 179 178 cleanup_srcu_struct(&dev->disassociate_srcu); 180 179 kfree(dev); 181 180 } 182 - 183 - static struct kobj_type ib_uverbs_dev_ktype = { 184 - .release = ib_uverbs_release_dev, 185 - }; 186 181 187 182 static void ib_uverbs_release_async_event_file(struct kref *ref) 188 183 { ··· 262 265 if (atomic_dec_and_test(&file->device->refcount)) 263 266 ib_uverbs_comp_dev(file->device); 264 267 265 - kobject_put(&file->device->kobj); 268 + put_device(&file->device->dev); 266 269 kfree(file); 267 270 } 268 271 ··· 814 817 } 815 818 816 819 /* 820 + * Each time we map IO memory into user space this keeps track of the mapping. 821 + * When the device is hot-unplugged we 'zap' the mmaps in user space to point 822 + * to the zero page and allow the hot unplug to proceed. 823 + * 824 + * This is necessary for cases like PCI physical hot unplug as the actual BAR 825 + * memory may vanish after this and access to it from userspace could MCE. 826 + * 827 + * RDMA drivers supporting disassociation must have their user space designed 828 + * to cope in some way with their IO pages going to the zero page. 829 + */ 830 + struct rdma_umap_priv { 831 + struct vm_area_struct *vma; 832 + struct list_head list; 833 + }; 834 + 835 + static const struct vm_operations_struct rdma_umap_ops; 836 + 837 + static void rdma_umap_priv_init(struct rdma_umap_priv *priv, 838 + struct vm_area_struct *vma) 839 + { 840 + struct ib_uverbs_file *ufile = vma->vm_file->private_data; 841 + 842 + priv->vma = vma; 843 + vma->vm_private_data = priv; 844 + vma->vm_ops = &rdma_umap_ops; 845 + 846 + mutex_lock(&ufile->umap_lock); 847 + list_add(&priv->list, &ufile->umaps); 848 + mutex_unlock(&ufile->umap_lock); 849 + } 850 + 851 + /* 852 + * The VMA has been dup'd, initialize the vm_private_data with a new tracking 853 + * struct 854 + */ 855 + static void rdma_umap_open(struct vm_area_struct *vma) 856 + { 857 + struct ib_uverbs_file *ufile = vma->vm_file->private_data; 858 + struct rdma_umap_priv *opriv = vma->vm_private_data; 859 + struct rdma_umap_priv *priv; 860 + 861 + if (!opriv) 862 + return; 863 + 864 + /* We are racing with disassociation */ 865 + if (!down_read_trylock(&ufile->hw_destroy_rwsem)) 866 + goto out_zap; 867 + /* 868 + * Disassociation already completed, the VMA should already be zapped. 869 + */ 870 + if (!ufile->ucontext) 871 + goto out_unlock; 872 + 873 + priv = kzalloc(sizeof(*priv), GFP_KERNEL); 874 + if (!priv) 875 + goto out_unlock; 876 + rdma_umap_priv_init(priv, vma); 877 + 878 + up_read(&ufile->hw_destroy_rwsem); 879 + return; 880 + 881 + out_unlock: 882 + up_read(&ufile->hw_destroy_rwsem); 883 + out_zap: 884 + /* 885 + * We can't allow the VMA to be created with the actual IO pages, that 886 + * would break our API contract, and it can't be stopped at this 887 + * point, so zap it. 888 + */ 889 + vma->vm_private_data = NULL; 890 + zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); 891 + } 892 + 893 + static void rdma_umap_close(struct vm_area_struct *vma) 894 + { 895 + struct ib_uverbs_file *ufile = vma->vm_file->private_data; 896 + struct rdma_umap_priv *priv = vma->vm_private_data; 897 + 898 + if (!priv) 899 + return; 900 + 901 + /* 902 + * The vma holds a reference on the struct file that created it, which 903 + * in turn means that the ib_uverbs_file is guaranteed to exist at 904 + * this point. 905 + */ 906 + mutex_lock(&ufile->umap_lock); 907 + list_del(&priv->list); 908 + mutex_unlock(&ufile->umap_lock); 909 + kfree(priv); 910 + } 911 + 912 + static const struct vm_operations_struct rdma_umap_ops = { 913 + .open = rdma_umap_open, 914 + .close = rdma_umap_close, 915 + }; 916 + 917 + static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext, 918 + struct vm_area_struct *vma, 919 + unsigned long size) 920 + { 921 + struct ib_uverbs_file *ufile = ucontext->ufile; 922 + struct rdma_umap_priv *priv; 923 + 924 + if (vma->vm_end - vma->vm_start != size) 925 + return ERR_PTR(-EINVAL); 926 + 927 + /* Driver is using this wrong, must be called by ib_uverbs_mmap */ 928 + if (WARN_ON(!vma->vm_file || 929 + vma->vm_file->private_data != ufile)) 930 + return ERR_PTR(-EINVAL); 931 + lockdep_assert_held(&ufile->device->disassociate_srcu); 932 + 933 + priv = kzalloc(sizeof(*priv), GFP_KERNEL); 934 + if (!priv) 935 + return ERR_PTR(-ENOMEM); 936 + return priv; 937 + } 938 + 939 + /* 940 + * Map IO memory into a process. This is to be called by drivers as part of 941 + * their mmap() functions if they wish to send something like PCI-E BAR memory 942 + * to userspace. 943 + */ 944 + int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, 945 + unsigned long pfn, unsigned long size, pgprot_t prot) 946 + { 947 + struct rdma_umap_priv *priv = rdma_user_mmap_pre(ucontext, vma, size); 948 + 949 + if (IS_ERR(priv)) 950 + return PTR_ERR(priv); 951 + 952 + vma->vm_page_prot = prot; 953 + if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) { 954 + kfree(priv); 955 + return -EAGAIN; 956 + } 957 + 958 + rdma_umap_priv_init(priv, vma); 959 + return 0; 960 + } 961 + EXPORT_SYMBOL(rdma_user_mmap_io); 962 + 963 + /* 964 + * The page case is here for a slightly different reason, the driver expects 965 + * to be able to free the page it is sharing to user space when it destroys 966 + * its ucontext, which means we need to zap the user space references. 967 + * 968 + * We could handle this differently by providing an API to allocate a shared 969 + * page and then only freeing the shared page when the last ufile is 970 + * destroyed. 971 + */ 972 + int rdma_user_mmap_page(struct ib_ucontext *ucontext, 973 + struct vm_area_struct *vma, struct page *page, 974 + unsigned long size) 975 + { 976 + struct rdma_umap_priv *priv = rdma_user_mmap_pre(ucontext, vma, size); 977 + 978 + if (IS_ERR(priv)) 979 + return PTR_ERR(priv); 980 + 981 + if (remap_pfn_range(vma, vma->vm_start, page_to_pfn(page), size, 982 + vma->vm_page_prot)) { 983 + kfree(priv); 984 + return -EAGAIN; 985 + } 986 + 987 + rdma_umap_priv_init(priv, vma); 988 + return 0; 989 + } 990 + EXPORT_SYMBOL(rdma_user_mmap_page); 991 + 992 + void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) 993 + { 994 + struct rdma_umap_priv *priv, *next_priv; 995 + 996 + lockdep_assert_held(&ufile->hw_destroy_rwsem); 997 + 998 + while (1) { 999 + struct mm_struct *mm = NULL; 1000 + 1001 + /* Get an arbitrary mm pointer that hasn't been cleaned yet */ 1002 + mutex_lock(&ufile->umap_lock); 1003 + if (!list_empty(&ufile->umaps)) { 1004 + mm = list_first_entry(&ufile->umaps, 1005 + struct rdma_umap_priv, list) 1006 + ->vma->vm_mm; 1007 + mmget(mm); 1008 + } 1009 + mutex_unlock(&ufile->umap_lock); 1010 + if (!mm) 1011 + return; 1012 + 1013 + /* 1014 + * The umap_lock is nested under mmap_sem since it used within 1015 + * the vma_ops callbacks, so we have to clean the list one mm 1016 + * at a time to get the lock ordering right. Typically there 1017 + * will only be one mm, so no big deal. 1018 + */ 1019 + down_write(&mm->mmap_sem); 1020 + mutex_lock(&ufile->umap_lock); 1021 + list_for_each_entry_safe (priv, next_priv, &ufile->umaps, 1022 + list) { 1023 + struct vm_area_struct *vma = priv->vma; 1024 + 1025 + if (vma->vm_mm != mm) 1026 + continue; 1027 + list_del_init(&priv->list); 1028 + 1029 + zap_vma_ptes(vma, vma->vm_start, 1030 + vma->vm_end - vma->vm_start); 1031 + vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE); 1032 + } 1033 + mutex_unlock(&ufile->umap_lock); 1034 + up_write(&mm->mmap_sem); 1035 + mmput(mm); 1036 + } 1037 + } 1038 + 1039 + /* 817 1040 * ib_uverbs_open() does not need the BKL: 818 1041 * 819 1042 * - the ib_uverbs_device structures are properly reference counted and ··· 1056 839 if (!atomic_inc_not_zero(&dev->refcount)) 1057 840 return -ENXIO; 1058 841 842 + get_device(&dev->dev); 1059 843 srcu_key = srcu_read_lock(&dev->disassociate_srcu); 1060 844 mutex_lock(&dev->lists_mutex); 1061 845 ib_dev = srcu_dereference(dev->ib_dev, ··· 1094 876 spin_lock_init(&file->uobjects_lock); 1095 877 INIT_LIST_HEAD(&file->uobjects); 1096 878 init_rwsem(&file->hw_destroy_rwsem); 879 + mutex_init(&file->umap_lock); 880 + INIT_LIST_HEAD(&file->umaps); 1097 881 1098 882 filp->private_data = file; 1099 - kobject_get(&dev->kobj); 1100 883 list_add_tail(&file->list, &dev->uverbs_file_list); 1101 884 mutex_unlock(&dev->lists_mutex); 1102 885 srcu_read_unlock(&dev->disassociate_srcu, srcu_key); ··· 1118 899 if (atomic_dec_and_test(&dev->refcount)) 1119 900 ib_uverbs_comp_dev(dev); 1120 901 902 + put_device(&dev->dev); 1121 903 return ret; 1122 904 } 1123 905 ··· 1129 909 uverbs_destroy_ufile_hw(file, RDMA_REMOVE_CLOSE); 1130 910 1131 911 mutex_lock(&file->device->lists_mutex); 1132 - if (!file->is_closed) { 1133 - list_del(&file->list); 1134 - file->is_closed = 1; 1135 - } 912 + list_del_init(&file->list); 1136 913 mutex_unlock(&file->device->lists_mutex); 1137 914 1138 915 if (file->async_file) ··· 1168 951 .remove = ib_uverbs_remove_one 1169 952 }; 1170 953 1171 - static ssize_t show_ibdev(struct device *device, struct device_attribute *attr, 954 + static ssize_t ibdev_show(struct device *device, struct device_attribute *attr, 1172 955 char *buf) 1173 956 { 957 + struct ib_uverbs_device *dev = 958 + container_of(device, struct ib_uverbs_device, dev); 1174 959 int ret = -ENODEV; 1175 960 int srcu_key; 1176 - struct ib_uverbs_device *dev = dev_get_drvdata(device); 1177 961 struct ib_device *ib_dev; 1178 - 1179 - if (!dev) 1180 - return -ENODEV; 1181 962 1182 963 srcu_key = srcu_read_lock(&dev->disassociate_srcu); 1183 964 ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); 1184 965 if (ib_dev) 1185 - ret = sprintf(buf, "%s\n", ib_dev->name); 966 + ret = sprintf(buf, "%s\n", dev_name(&ib_dev->dev)); 1186 967 srcu_read_unlock(&dev->disassociate_srcu, srcu_key); 1187 968 1188 969 return ret; 1189 970 } 1190 - static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); 971 + static DEVICE_ATTR_RO(ibdev); 1191 972 1192 - static ssize_t show_dev_abi_version(struct device *device, 1193 - struct device_attribute *attr, char *buf) 973 + static ssize_t abi_version_show(struct device *device, 974 + struct device_attribute *attr, char *buf) 1194 975 { 1195 - struct ib_uverbs_device *dev = dev_get_drvdata(device); 976 + struct ib_uverbs_device *dev = 977 + container_of(device, struct ib_uverbs_device, dev); 1196 978 int ret = -ENODEV; 1197 979 int srcu_key; 1198 980 struct ib_device *ib_dev; 1199 981 1200 - if (!dev) 1201 - return -ENODEV; 1202 982 srcu_key = srcu_read_lock(&dev->disassociate_srcu); 1203 983 ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); 1204 984 if (ib_dev) ··· 1204 990 1205 991 return ret; 1206 992 } 1207 - static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL); 993 + static DEVICE_ATTR_RO(abi_version); 994 + 995 + static struct attribute *ib_dev_attrs[] = { 996 + &dev_attr_abi_version.attr, 997 + &dev_attr_ibdev.attr, 998 + NULL, 999 + }; 1000 + 1001 + static const struct attribute_group dev_attr_group = { 1002 + .attrs = ib_dev_attrs, 1003 + }; 1208 1004 1209 1005 static CLASS_ATTR_STRING(abi_version, S_IRUGO, 1210 1006 __stringify(IB_USER_VERBS_ABI_VERSION)); ··· 1252 1028 return; 1253 1029 } 1254 1030 1031 + device_initialize(&uverbs_dev->dev); 1032 + uverbs_dev->dev.class = uverbs_class; 1033 + uverbs_dev->dev.parent = device->dev.parent; 1034 + uverbs_dev->dev.release = ib_uverbs_release_dev; 1035 + uverbs_dev->groups[0] = &dev_attr_group; 1036 + uverbs_dev->dev.groups = uverbs_dev->groups; 1255 1037 atomic_set(&uverbs_dev->refcount, 1); 1256 1038 init_completion(&uverbs_dev->comp); 1257 1039 uverbs_dev->xrcd_tree = RB_ROOT; 1258 1040 mutex_init(&uverbs_dev->xrcd_tree_mutex); 1259 - kobject_init(&uverbs_dev->kobj, &ib_uverbs_dev_ktype); 1260 1041 mutex_init(&uverbs_dev->lists_mutex); 1261 1042 INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list); 1262 1043 INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list); 1044 + rcu_assign_pointer(uverbs_dev->ib_dev, device); 1045 + uverbs_dev->num_comp_vectors = device->num_comp_vectors; 1263 1046 1264 - devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); 1265 - if (devnum >= IB_UVERBS_MAX_DEVICES) 1047 + devnum = ida_alloc_max(&uverbs_ida, IB_UVERBS_MAX_DEVICES - 1, 1048 + GFP_KERNEL); 1049 + if (devnum < 0) 1266 1050 goto err; 1267 1051 uverbs_dev->devnum = devnum; 1268 - set_bit(devnum, dev_map); 1269 1052 if (devnum >= IB_UVERBS_NUM_FIXED_MINOR) 1270 1053 base = dynamic_uverbs_dev + devnum - IB_UVERBS_NUM_FIXED_MINOR; 1271 1054 else 1272 1055 base = IB_UVERBS_BASE_DEV + devnum; 1273 1056 1274 - rcu_assign_pointer(uverbs_dev->ib_dev, device); 1275 - uverbs_dev->num_comp_vectors = device->num_comp_vectors; 1276 - 1277 1057 if (ib_uverbs_create_uapi(device, uverbs_dev)) 1278 1058 goto err_uapi; 1279 1059 1280 - cdev_init(&uverbs_dev->cdev, NULL); 1060 + uverbs_dev->dev.devt = base; 1061 + dev_set_name(&uverbs_dev->dev, "uverbs%d", uverbs_dev->devnum); 1062 + 1063 + cdev_init(&uverbs_dev->cdev, 1064 + device->mmap ? &uverbs_mmap_fops : &uverbs_fops); 1281 1065 uverbs_dev->cdev.owner = THIS_MODULE; 1282 - uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops; 1283 - cdev_set_parent(&uverbs_dev->cdev, &uverbs_dev->kobj); 1284 - kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum); 1285 - if (cdev_add(&uverbs_dev->cdev, base, 1)) 1286 - goto err_cdev; 1287 1066 1288 - uverbs_dev->dev = device_create(uverbs_class, device->dev.parent, 1289 - uverbs_dev->cdev.dev, uverbs_dev, 1290 - "uverbs%d", uverbs_dev->devnum); 1291 - if (IS_ERR(uverbs_dev->dev)) 1292 - goto err_cdev; 1293 - 1294 - if (device_create_file(uverbs_dev->dev, &dev_attr_ibdev)) 1295 - goto err_class; 1296 - if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version)) 1297 - goto err_class; 1067 + ret = cdev_device_add(&uverbs_dev->cdev, &uverbs_dev->dev); 1068 + if (ret) 1069 + goto err_uapi; 1298 1070 1299 1071 ib_set_client_data(device, &uverbs_client, uverbs_dev); 1300 - 1301 1072 return; 1302 1073 1303 - err_class: 1304 - device_destroy(uverbs_class, uverbs_dev->cdev.dev); 1305 - err_cdev: 1306 - cdev_del(&uverbs_dev->cdev); 1307 1074 err_uapi: 1308 - clear_bit(devnum, dev_map); 1075 + ida_free(&uverbs_ida, devnum); 1309 1076 err: 1310 1077 if (atomic_dec_and_test(&uverbs_dev->refcount)) 1311 1078 ib_uverbs_comp_dev(uverbs_dev); 1312 1079 wait_for_completion(&uverbs_dev->comp); 1313 - kobject_put(&uverbs_dev->kobj); 1080 + put_device(&uverbs_dev->dev); 1314 1081 return; 1315 1082 } 1316 1083 ··· 1322 1107 while (!list_empty(&uverbs_dev->uverbs_file_list)) { 1323 1108 file = list_first_entry(&uverbs_dev->uverbs_file_list, 1324 1109 struct ib_uverbs_file, list); 1325 - file->is_closed = 1; 1326 - list_del(&file->list); 1110 + list_del_init(&file->list); 1327 1111 kref_get(&file->ref); 1328 1112 1329 1113 /* We must release the mutex before going ahead and calling ··· 1370 1156 if (!uverbs_dev) 1371 1157 return; 1372 1158 1373 - dev_set_drvdata(uverbs_dev->dev, NULL); 1374 - device_destroy(uverbs_class, uverbs_dev->cdev.dev); 1375 - cdev_del(&uverbs_dev->cdev); 1376 - clear_bit(uverbs_dev->devnum, dev_map); 1159 + cdev_device_del(&uverbs_dev->cdev, &uverbs_dev->dev); 1160 + ida_free(&uverbs_ida, uverbs_dev->devnum); 1377 1161 1378 1162 if (device->disassociate_ucontext) { 1379 1163 /* We disassociate HW resources and immediately return. ··· 1394 1182 if (wait_clients) 1395 1183 wait_for_completion(&uverbs_dev->comp); 1396 1184 1397 - kobject_put(&uverbs_dev->kobj); 1185 + put_device(&uverbs_dev->dev); 1398 1186 } 1399 1187 1400 1188 static char *uverbs_devnode(struct device *dev, umode_t *mode)
+2 -5
drivers/infiniband/core/uverbs_std_types_flow_action.c
··· 326 326 if (IS_ERR(action)) 327 327 return PTR_ERR(action); 328 328 329 - atomic_set(&action->usecnt, 0); 330 - action->device = ib_dev; 331 - action->type = IB_FLOW_ACTION_ESP; 332 - action->uobject = uobj; 333 - uobj->object = action; 329 + uverbs_flow_action_fill_action(action, uobj, ib_dev, 330 + IB_FLOW_ACTION_ESP); 334 331 335 332 return 0; 336 333 }
+12
drivers/infiniband/core/uverbs_uapi.c
··· 73 73 if (attr->attr.type == UVERBS_ATTR_TYPE_ENUM_IN) 74 74 method_elm->driver_method |= is_driver; 75 75 76 + /* 77 + * Like other uobject based things we only support a single 78 + * uobject being NEW'd or DESTROY'd 79 + */ 80 + if (attr->attr.type == UVERBS_ATTR_TYPE_IDRS_ARRAY) { 81 + u8 access = attr->attr.u2.objs_arr.access; 82 + 83 + if (WARN_ON(access == UVERBS_ACCESS_NEW || 84 + access == UVERBS_ACCESS_DESTROY)) 85 + return -EINVAL; 86 + } 87 + 76 88 attr_slot = 77 89 uapi_add_elm(uapi, method_key | uapi_key_attr(attr->id), 78 90 sizeof(*attr_slot));
+10 -9
drivers/infiniband/core/verbs.c
··· 264 264 } 265 265 266 266 pd->res.type = RDMA_RESTRACK_PD; 267 - pd->res.kern_name = caller; 267 + rdma_restrack_set_task(&pd->res, caller); 268 268 rdma_restrack_add(&pd->res); 269 269 270 270 if (mr_access_flags) { ··· 710 710 711 711 ret = rdma_addr_find_l2_eth_by_grh(&sgid_attr->gid, &grh->dgid, 712 712 ah_attr->roce.dmac, 713 - sgid_attr->ndev, &hop_limit); 713 + sgid_attr, &hop_limit); 714 714 715 715 grh->hop_limit = hop_limit; 716 716 return ret; ··· 1509 1509 }; 1510 1510 1511 1511 bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, 1512 - enum ib_qp_type type, enum ib_qp_attr_mask mask, 1513 - enum rdma_link_layer ll) 1512 + enum ib_qp_type type, enum ib_qp_attr_mask mask) 1514 1513 { 1515 1514 enum ib_qp_attr_mask req_param, opt_param; 1516 1515 ··· 1628 1629 1629 1630 if (rdma_ib_or_roce(qp->device, port)) { 1630 1631 if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) { 1631 - pr_warn("%s: %s rq_psn overflow, masking to 24 bits\n", 1632 - __func__, qp->device->name); 1632 + dev_warn(&qp->device->dev, 1633 + "%s rq_psn overflow, masking to 24 bits\n", 1634 + __func__); 1633 1635 attr->rq_psn &= 0xffffff; 1634 1636 } 1635 1637 1636 1638 if (attr_mask & IB_QP_SQ_PSN && attr->sq_psn & ~0xffffff) { 1637 - pr_warn("%s: %s sq_psn overflow, masking to 24 bits\n", 1638 - __func__, qp->device->name); 1639 + dev_warn(&qp->device->dev, 1640 + " %s sq_psn overflow, masking to 24 bits\n", 1641 + __func__); 1639 1642 attr->sq_psn &= 0xffffff; 1640 1643 } 1641 1644 } ··· 1889 1888 cq->cq_context = cq_context; 1890 1889 atomic_set(&cq->usecnt, 0); 1891 1890 cq->res.type = RDMA_RESTRACK_CQ; 1892 - cq->res.kern_name = caller; 1891 + rdma_restrack_set_task(&cq->res, caller); 1893 1892 rdma_restrack_add(&cq->res); 1894 1893 } 1895 1894
+2 -1
drivers/infiniband/hw/bnxt_re/bnxt_re.h
··· 40 40 #ifndef __BNXT_RE_H__ 41 41 #define __BNXT_RE_H__ 42 42 #define ROCE_DRV_MODULE_NAME "bnxt_re" 43 - #define ROCE_DRV_MODULE_VERSION "1.0.0" 44 43 45 44 #define BNXT_RE_DESC "Broadcom NetXtreme-C/E RoCE Driver" 46 45 #define BNXT_RE_PAGE_SHIFT_4K (12) ··· 119 120 #define BNXT_RE_FLAG_HAVE_L2_REF 3 120 121 #define BNXT_RE_FLAG_RCFW_CHANNEL_EN 4 121 122 #define BNXT_RE_FLAG_QOS_WORK_REG 5 123 + #define BNXT_RE_FLAG_RESOURCES_ALLOCATED 7 124 + #define BNXT_RE_FLAG_RESOURCES_INITIALIZED 8 122 125 #define BNXT_RE_FLAG_ISSUE_ROCE_STATS 29 123 126 struct net_device *netdev; 124 127 unsigned int version, major, minor;
+10 -1
drivers/infiniband/hw/bnxt_re/hw_counters.c
··· 68 68 [BNXT_RE_TX_PKTS] = "tx_pkts", 69 69 [BNXT_RE_TX_BYTES] = "tx_bytes", 70 70 [BNXT_RE_RECOVERABLE_ERRORS] = "recoverable_errors", 71 + [BNXT_RE_RX_DROPS] = "rx_roce_drops", 72 + [BNXT_RE_RX_DISCARDS] = "rx_roce_discards", 71 73 [BNXT_RE_TO_RETRANSMITS] = "to_retransmits", 72 74 [BNXT_RE_SEQ_ERR_NAKS_RCVD] = "seq_err_naks_rcvd", 73 75 [BNXT_RE_MAX_RETRY_EXCEEDED] = "max_retry_exceeded", ··· 108 106 [BNXT_RE_RES_CQ_LOAD_ERR] = "res_cq_load_err", 109 107 [BNXT_RE_RES_SRQ_LOAD_ERR] = "res_srq_load_err", 110 108 [BNXT_RE_RES_TX_PCI_ERR] = "res_tx_pci_err", 111 - [BNXT_RE_RES_RX_PCI_ERR] = "res_rx_pci_err" 109 + [BNXT_RE_RES_RX_PCI_ERR] = "res_rx_pci_err", 110 + [BNXT_RE_OUT_OF_SEQ_ERR] = "oos_drop_count" 112 111 }; 113 112 114 113 int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, ··· 131 128 if (bnxt_re_stats) { 132 129 stats->value[BNXT_RE_RECOVERABLE_ERRORS] = 133 130 le64_to_cpu(bnxt_re_stats->tx_bcast_pkts); 131 + stats->value[BNXT_RE_RX_DROPS] = 132 + le64_to_cpu(bnxt_re_stats->rx_drop_pkts); 133 + stats->value[BNXT_RE_RX_DISCARDS] = 134 + le64_to_cpu(bnxt_re_stats->rx_discard_pkts); 134 135 stats->value[BNXT_RE_RX_PKTS] = 135 136 le64_to_cpu(bnxt_re_stats->rx_ucast_pkts); 136 137 stats->value[BNXT_RE_RX_BYTES] = ··· 227 220 rdev->stats.res_tx_pci_err; 228 221 stats->value[BNXT_RE_RES_RX_PCI_ERR] = 229 222 rdev->stats.res_rx_pci_err; 223 + stats->value[BNXT_RE_OUT_OF_SEQ_ERR] = 224 + rdev->stats.res_oos_drop_count; 230 225 } 231 226 232 227 return ARRAY_SIZE(bnxt_re_stat_name);
+3
drivers/infiniband/hw/bnxt_re/hw_counters.h
··· 51 51 BNXT_RE_TX_PKTS, 52 52 BNXT_RE_TX_BYTES, 53 53 BNXT_RE_RECOVERABLE_ERRORS, 54 + BNXT_RE_RX_DROPS, 55 + BNXT_RE_RX_DISCARDS, 54 56 BNXT_RE_TO_RETRANSMITS, 55 57 BNXT_RE_SEQ_ERR_NAKS_RCVD, 56 58 BNXT_RE_MAX_RETRY_EXCEEDED, ··· 92 90 BNXT_RE_RES_SRQ_LOAD_ERR, 93 91 BNXT_RE_RES_TX_PCI_ERR, 94 92 BNXT_RE_RES_RX_PCI_ERR, 93 + BNXT_RE_OUT_OF_SEQ_ERR, 95 94 BNXT_RE_NUM_COUNTERS 96 95 }; 97 96
+2 -2
drivers/infiniband/hw/bnxt_re/ib_verbs.c
··· 1598 1598 curr_qp_state = __to_ib_qp_state(qp->qplib_qp.cur_qp_state); 1599 1599 new_qp_state = qp_attr->qp_state; 1600 1600 if (!ib_modify_qp_is_ok(curr_qp_state, new_qp_state, 1601 - ib_qp->qp_type, qp_attr_mask, 1602 - IB_LINK_LAYER_ETHERNET)) { 1601 + ib_qp->qp_type, qp_attr_mask)) { 1603 1602 dev_err(rdev_to_dev(rdev), 1604 1603 "Invalid attribute mask: %#x specified ", 1605 1604 qp_attr_mask); ··· 2663 2664 nq->budget++; 2664 2665 2665 2666 atomic_inc(&rdev->cq_count); 2667 + spin_lock_init(&cq->cq_lock); 2666 2668 2667 2669 if (context) { 2668 2670 struct bnxt_re_cq_resp resp;
+65 -60
drivers/infiniband/hw/bnxt_re/main.c
··· 67 67 #include "hw_counters.h" 68 68 69 69 static char version[] = 70 - BNXT_RE_DESC " v" ROCE_DRV_MODULE_VERSION "\n"; 70 + BNXT_RE_DESC "\n"; 71 71 72 72 MODULE_AUTHOR("Eddie Wai <eddie.wai@broadcom.com>"); 73 73 MODULE_DESCRIPTION(BNXT_RE_DESC " Driver"); ··· 535 535 return en_dev; 536 536 } 537 537 538 + static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, 539 + char *buf) 540 + { 541 + struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev); 542 + 543 + return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor); 544 + } 545 + static DEVICE_ATTR_RO(hw_rev); 546 + 547 + static ssize_t hca_type_show(struct device *device, 548 + struct device_attribute *attr, char *buf) 549 + { 550 + struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev); 551 + 552 + return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc); 553 + } 554 + static DEVICE_ATTR_RO(hca_type); 555 + 556 + static struct attribute *bnxt_re_attributes[] = { 557 + &dev_attr_hw_rev.attr, 558 + &dev_attr_hca_type.attr, 559 + NULL 560 + }; 561 + 562 + static const struct attribute_group bnxt_re_dev_attr_group = { 563 + .attrs = bnxt_re_attributes, 564 + }; 565 + 538 566 static void bnxt_re_unregister_ib(struct bnxt_re_dev *rdev) 539 567 { 540 568 ib_unregister_device(&rdev->ibdev); ··· 575 547 /* ib device init */ 576 548 ibdev->owner = THIS_MODULE; 577 549 ibdev->node_type = RDMA_NODE_IB_CA; 578 - strlcpy(ibdev->name, "bnxt_re%d", IB_DEVICE_NAME_MAX); 579 550 strlcpy(ibdev->node_desc, BNXT_RE_DESC " HCA", 580 551 strlen(BNXT_RE_DESC) + 5); 581 552 ibdev->phys_port_cnt = 1; ··· 666 639 ibdev->get_hw_stats = bnxt_re_ib_get_hw_stats; 667 640 ibdev->alloc_hw_stats = bnxt_re_ib_alloc_hw_stats; 668 641 642 + rdma_set_device_sysfs_group(ibdev, &bnxt_re_dev_attr_group); 669 643 ibdev->driver_id = RDMA_DRIVER_BNXT_RE; 670 - return ib_register_device(ibdev, NULL); 644 + return ib_register_device(ibdev, "bnxt_re%d", NULL); 671 645 } 672 - 673 - static ssize_t show_rev(struct device *device, struct device_attribute *attr, 674 - char *buf) 675 - { 676 - struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev); 677 - 678 - return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor); 679 - } 680 - 681 - static ssize_t show_hca(struct device *device, struct device_attribute *attr, 682 - char *buf) 683 - { 684 - struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev); 685 - 686 - return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc); 687 - } 688 - 689 - static DEVICE_ATTR(hw_rev, 0444, show_rev, NULL); 690 - static DEVICE_ATTR(hca_type, 0444, show_hca, NULL); 691 - 692 - static struct device_attribute *bnxt_re_attributes[] = { 693 - &dev_attr_hw_rev, 694 - &dev_attr_hca_type 695 - }; 696 646 697 647 static void bnxt_re_dev_remove(struct bnxt_re_dev *rdev) 698 648 { ··· 868 864 { 869 865 int i; 870 866 871 - if (rdev->nq[0].hwq.max_elements) { 872 - for (i = 1; i < rdev->num_msix; i++) 873 - bnxt_qplib_disable_nq(&rdev->nq[i - 1]); 874 - } 867 + for (i = 1; i < rdev->num_msix; i++) 868 + bnxt_qplib_disable_nq(&rdev->nq[i - 1]); 875 869 876 870 if (rdev->qplib_res.rcfw) 877 871 bnxt_qplib_cleanup_res(&rdev->qplib_res); ··· 878 876 static int bnxt_re_init_res(struct bnxt_re_dev *rdev) 879 877 { 880 878 int rc = 0, i; 879 + int num_vec_enabled = 0; 881 880 882 881 bnxt_qplib_init_res(&rdev->qplib_res); 883 882 ··· 894 891 "Failed to enable NQ with rc = 0x%x", rc); 895 892 goto fail; 896 893 } 894 + num_vec_enabled++; 897 895 } 898 896 return 0; 899 897 fail: 898 + for (i = num_vec_enabled; i >= 0; i--) 899 + bnxt_qplib_disable_nq(&rdev->nq[i]); 900 + 900 901 return rc; 901 902 } 902 903 ··· 932 925 static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) 933 926 { 934 927 int rc = 0, i; 928 + int num_vec_created = 0; 935 929 936 930 /* Configure and allocate resources for qplib */ 937 931 rdev->qplib_res.rcfw = &rdev->rcfw; ··· 959 951 if (rc) { 960 952 dev_err(rdev_to_dev(rdev), "Alloc Failed NQ%d rc:%#x", 961 953 i, rc); 962 - goto dealloc_dpi; 954 + goto free_nq; 963 955 } 964 956 rc = bnxt_re_net_ring_alloc 965 957 (rdev, rdev->nq[i].hwq.pbl[PBL_LVL_0].pg_map_arr, ··· 972 964 dev_err(rdev_to_dev(rdev), 973 965 "Failed to allocate NQ fw id with rc = 0x%x", 974 966 rc); 967 + bnxt_qplib_free_nq(&rdev->nq[i]); 975 968 goto free_nq; 976 969 } 970 + num_vec_created++; 977 971 } 978 972 return 0; 979 973 free_nq: 980 - for (i = 0; i < rdev->num_msix - 1; i++) 974 + for (i = num_vec_created; i >= 0; i--) { 975 + bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id); 981 976 bnxt_qplib_free_nq(&rdev->nq[i]); 982 - dealloc_dpi: 977 + } 983 978 bnxt_qplib_dealloc_dpi(&rdev->qplib_res, 984 979 &rdev->qplib_res.dpi_tbl, 985 980 &rdev->dpi_privileged); ··· 1000 989 struct ib_event ib_event; 1001 990 1002 991 ib_event.device = ibdev; 1003 - if (qp) 992 + if (qp) { 1004 993 ib_event.element.qp = qp; 1005 - else 994 + ib_event.event = event; 995 + if (qp->event_handler) 996 + qp->event_handler(&ib_event, qp->qp_context); 997 + 998 + } else { 1006 999 ib_event.element.port_num = port_num; 1007 - ib_event.event = event; 1008 - ib_dispatch_event(&ib_event); 1000 + ib_event.event = event; 1001 + ib_dispatch_event(&ib_event); 1002 + } 1009 1003 } 1010 1004 1011 1005 #define HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_IVLAN 0x02 ··· 1205 1189 1206 1190 static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev) 1207 1191 { 1208 - int i, rc; 1192 + int rc; 1209 1193 1210 1194 if (test_and_clear_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) { 1211 - for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++) 1212 - device_remove_file(&rdev->ibdev.dev, 1213 - bnxt_re_attributes[i]); 1214 1195 /* Cleanup ib dev */ 1215 1196 bnxt_re_unregister_ib(rdev); 1216 1197 } 1217 1198 if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags)) 1218 - cancel_delayed_work(&rdev->worker); 1199 + cancel_delayed_work_sync(&rdev->worker); 1219 1200 1220 - bnxt_re_cleanup_res(rdev); 1221 - bnxt_re_free_res(rdev); 1201 + if (test_and_clear_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED, 1202 + &rdev->flags)) 1203 + bnxt_re_cleanup_res(rdev); 1204 + if (test_and_clear_bit(BNXT_RE_FLAG_RESOURCES_ALLOCATED, &rdev->flags)) 1205 + bnxt_re_free_res(rdev); 1222 1206 1223 1207 if (test_and_clear_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags)) { 1224 1208 rc = bnxt_qplib_deinit_rcfw(&rdev->rcfw); ··· 1257 1241 1258 1242 static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) 1259 1243 { 1260 - int i, j, rc; 1244 + int rc; 1261 1245 1262 1246 bool locked; 1263 1247 ··· 1347 1331 pr_err("Failed to allocate resources: %#x\n", rc); 1348 1332 goto fail; 1349 1333 } 1334 + set_bit(BNXT_RE_FLAG_RESOURCES_ALLOCATED, &rdev->flags); 1350 1335 rc = bnxt_re_init_res(rdev); 1351 1336 if (rc) { 1352 1337 pr_err("Failed to initialize resources: %#x\n", rc); 1353 1338 goto fail; 1354 1339 } 1340 + 1341 + set_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED, &rdev->flags); 1355 1342 1356 1343 if (!rdev->is_virtfn) { 1357 1344 rc = bnxt_re_setup_qos(rdev); ··· 1377 1358 } 1378 1359 set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags); 1379 1360 dev_info(rdev_to_dev(rdev), "Device registered successfully"); 1380 - for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++) { 1381 - rc = device_create_file(&rdev->ibdev.dev, 1382 - bnxt_re_attributes[i]); 1383 - if (rc) { 1384 - dev_err(rdev_to_dev(rdev), 1385 - "Failed to create IB sysfs: %#x", rc); 1386 - /* Must clean up all created device files */ 1387 - for (j = 0; j < i; j++) 1388 - device_remove_file(&rdev->ibdev.dev, 1389 - bnxt_re_attributes[j]); 1390 - bnxt_re_unregister_ib(rdev); 1391 - goto fail; 1392 - } 1393 - } 1394 1361 ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed, 1395 1362 &rdev->active_width); 1396 1363 set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags);
+54 -80
drivers/infiniband/hw/bnxt_re/qplib_fp.c
··· 36 36 * Description: Fast Path Operators 37 37 */ 38 38 39 + #define dev_fmt(fmt) "QPLIB: " fmt 40 + 39 41 #include <linux/interrupt.h> 40 42 #include <linux/spinlock.h> 41 43 #include <linux/sched.h> ··· 73 71 74 72 if (!qp->sq.flushed) { 75 73 dev_dbg(&scq->hwq.pdev->dev, 76 - "QPLIB: FP: Adding to SQ Flush list = %p", 77 - qp); 74 + "FP: Adding to SQ Flush list = %p\n", qp); 78 75 bnxt_qplib_cancel_phantom_processing(qp); 79 76 list_add_tail(&qp->sq_flush, &scq->sqf_head); 80 77 qp->sq.flushed = true; ··· 81 80 if (!qp->srq) { 82 81 if (!qp->rq.flushed) { 83 82 dev_dbg(&rcq->hwq.pdev->dev, 84 - "QPLIB: FP: Adding to RQ Flush list = %p", 85 - qp); 83 + "FP: Adding to RQ Flush list = %p\n", qp); 86 84 list_add_tail(&qp->rq_flush, &rcq->rqf_head); 87 85 qp->rq.flushed = true; 88 86 } ··· 207 207 if (!qp->sq_hdr_buf) { 208 208 rc = -ENOMEM; 209 209 dev_err(&res->pdev->dev, 210 - "QPLIB: Failed to create sq_hdr_buf"); 210 + "Failed to create sq_hdr_buf\n"); 211 211 goto fail; 212 212 } 213 213 } ··· 221 221 if (!qp->rq_hdr_buf) { 222 222 rc = -ENOMEM; 223 223 dev_err(&res->pdev->dev, 224 - "QPLIB: Failed to create rq_hdr_buf"); 224 + "Failed to create rq_hdr_buf\n"); 225 225 goto fail; 226 226 } 227 227 } ··· 277 277 num_cqne_processed++; 278 278 else 279 279 dev_warn(&nq->pdev->dev, 280 - "QPLIB: cqn - type 0x%x not handled", 281 - type); 280 + "cqn - type 0x%x not handled\n", type); 282 281 spin_unlock_bh(&cq->compl_lock); 283 282 break; 284 283 } ··· 297 298 num_srqne_processed++; 298 299 else 299 300 dev_warn(&nq->pdev->dev, 300 - "QPLIB: SRQ event 0x%x not handled", 301 + "SRQ event 0x%x not handled\n", 301 302 nqsrqe->event); 302 303 break; 303 304 } ··· 305 306 break; 306 307 default: 307 308 dev_warn(&nq->pdev->dev, 308 - "QPLIB: nqe with type = 0x%x not handled", 309 - type); 309 + "nqe with type = 0x%x not handled\n", type); 310 310 break; 311 311 } 312 312 raw_cons++; ··· 358 360 } 359 361 360 362 /* Make sure the HW is stopped! */ 361 - bnxt_qplib_nq_stop_irq(nq, true); 363 + if (nq->requested) 364 + bnxt_qplib_nq_stop_irq(nq, true); 362 365 363 366 if (nq->bar_reg_iomem) 364 367 iounmap(nq->bar_reg_iomem); ··· 395 396 rc = irq_set_affinity_hint(nq->vector, &nq->mask); 396 397 if (rc) { 397 398 dev_warn(&nq->pdev->dev, 398 - "QPLIB: set affinity failed; vector: %d nq_idx: %d\n", 399 + "set affinity failed; vector: %d nq_idx: %d\n", 399 400 nq->vector, nq_indx); 400 401 } 401 402 nq->requested = true; ··· 442 443 rc = bnxt_qplib_nq_start_irq(nq, nq_idx, msix_vector, true); 443 444 if (rc) { 444 445 dev_err(&nq->pdev->dev, 445 - "QPLIB: Failed to request irq for nq-idx %d", nq_idx); 446 + "Failed to request irq for nq-idx %d\n", nq_idx); 446 447 goto fail; 447 448 } 448 449 ··· 661 662 662 663 spin_lock(&srq_hwq->lock); 663 664 if (srq->start_idx == srq->last_idx) { 664 - dev_err(&srq_hwq->pdev->dev, "QPLIB: FP: SRQ (0x%x) is full!", 665 - srq->id); 665 + dev_err(&srq_hwq->pdev->dev, 666 + "FP: SRQ (0x%x) is full!\n", srq->id); 666 667 rc = -EINVAL; 667 668 spin_unlock(&srq_hwq->lock); 668 669 goto done; ··· 1323 1324 } 1324 1325 } 1325 1326 if (i == res->sgid_tbl.max) 1326 - dev_warn(&res->pdev->dev, "QPLIB: SGID not found??"); 1327 + dev_warn(&res->pdev->dev, "SGID not found??\n"); 1327 1328 1328 1329 qp->ah.hop_limit = sb->hop_limit; 1329 1330 qp->ah.traffic_class = sb->traffic_class; ··· 1535 1536 1536 1537 if (bnxt_qplib_queue_full(sq)) { 1537 1538 dev_err(&sq->hwq.pdev->dev, 1538 - "QPLIB: prod = %#x cons = %#x qdepth = %#x delta = %#x", 1539 + "prod = %#x cons = %#x qdepth = %#x delta = %#x\n", 1539 1540 sq->hwq.prod, sq->hwq.cons, sq->hwq.max_elements, 1540 1541 sq->q_full_delta); 1541 1542 rc = -ENOMEM; ··· 1560 1561 /* Copy the inline data */ 1561 1562 if (wqe->inline_len > BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH) { 1562 1563 dev_warn(&sq->hwq.pdev->dev, 1563 - "QPLIB: Inline data length > 96 detected"); 1564 + "Inline data length > 96 detected\n"); 1564 1565 data_len = BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH; 1565 1566 } else { 1566 1567 data_len = wqe->inline_len; ··· 1775 1776 queue_work(qp->scq->nq->cqn_wq, &nq_work->work); 1776 1777 } else { 1777 1778 dev_err(&sq->hwq.pdev->dev, 1778 - "QPLIB: FP: Failed to allocate SQ nq_work!"); 1779 + "FP: Failed to allocate SQ nq_work!\n"); 1779 1780 rc = -ENOMEM; 1780 1781 } 1781 1782 } ··· 1814 1815 if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { 1815 1816 sch_handler = true; 1816 1817 dev_dbg(&rq->hwq.pdev->dev, 1817 - "%s Error QP. Scheduling for poll_cq\n", 1818 - __func__); 1818 + "%s: Error QP. Scheduling for poll_cq\n", __func__); 1819 1819 goto queue_err; 1820 1820 } 1821 1821 if (bnxt_qplib_queue_full(rq)) { 1822 1822 dev_err(&rq->hwq.pdev->dev, 1823 - "QPLIB: FP: QP (0x%x) RQ is full!", qp->id); 1823 + "FP: QP (0x%x) RQ is full!\n", qp->id); 1824 1824 rc = -EINVAL; 1825 1825 goto done; 1826 1826 } ··· 1868 1870 queue_work(qp->rcq->nq->cqn_wq, &nq_work->work); 1869 1871 } else { 1870 1872 dev_err(&rq->hwq.pdev->dev, 1871 - "QPLIB: FP: Failed to allocate RQ nq_work!"); 1873 + "FP: Failed to allocate RQ nq_work!\n"); 1872 1874 rc = -ENOMEM; 1873 1875 } 1874 1876 } ··· 1930 1932 1931 1933 if (!cq->dpi) { 1932 1934 dev_err(&rcfw->pdev->dev, 1933 - "QPLIB: FP: CREATE_CQ failed due to NULL DPI"); 1935 + "FP: CREATE_CQ failed due to NULL DPI\n"); 1934 1936 return -EINVAL; 1935 1937 } 1936 1938 req.dpi = cpu_to_le32(cq->dpi->dpi); ··· 1967 1969 INIT_LIST_HEAD(&cq->sqf_head); 1968 1970 INIT_LIST_HEAD(&cq->rqf_head); 1969 1971 spin_lock_init(&cq->compl_lock); 1972 + spin_lock_init(&cq->flush_lock); 1970 1973 1971 1974 bnxt_qplib_arm_cq_enable(cq); 1972 1975 return 0; ··· 2171 2172 * comes back 2172 2173 */ 2173 2174 dev_dbg(&cq->hwq.pdev->dev, 2174 - "FP:Got Phantom CQE"); 2175 + "FP: Got Phantom CQE\n"); 2175 2176 sq->condition = false; 2176 2177 sq->single = true; 2177 2178 rc = 0; ··· 2188 2189 peek_raw_cq_cons++; 2189 2190 } 2190 2191 dev_err(&cq->hwq.pdev->dev, 2191 - "Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x", 2192 + "Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x\n", 2192 2193 cq_cons, qp->id, sw_sq_cons, cqe_sq_cons); 2193 2194 rc = -EINVAL; 2194 2195 } ··· 2212 2213 le64_to_cpu(hwcqe->qp_handle)); 2213 2214 if (!qp) { 2214 2215 dev_err(&cq->hwq.pdev->dev, 2215 - "QPLIB: FP: Process Req qp is NULL"); 2216 + "FP: Process Req qp is NULL\n"); 2216 2217 return -EINVAL; 2217 2218 } 2218 2219 sq = &qp->sq; ··· 2220 2221 cqe_sq_cons = HWQ_CMP(le16_to_cpu(hwcqe->sq_cons_idx), &sq->hwq); 2221 2222 if (cqe_sq_cons > sq->hwq.max_elements) { 2222 2223 dev_err(&cq->hwq.pdev->dev, 2223 - "QPLIB: FP: CQ Process req reported "); 2224 - dev_err(&cq->hwq.pdev->dev, 2225 - "QPLIB: sq_cons_idx 0x%x which exceeded max 0x%x", 2224 + "FP: CQ Process req reported sq_cons_idx 0x%x which exceeded max 0x%x\n", 2226 2225 cqe_sq_cons, sq->hwq.max_elements); 2227 2226 return -EINVAL; 2228 2227 } 2229 2228 2230 2229 if (qp->sq.flushed) { 2231 2230 dev_dbg(&cq->hwq.pdev->dev, 2232 - "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp); 2231 + "%s: QP in Flush QP = %p\n", __func__, qp); 2233 2232 goto done; 2234 2233 } 2235 2234 /* Require to walk the sq's swq to fabricate CQEs for all previously ··· 2259 2262 hwcqe->status != CQ_REQ_STATUS_OK) { 2260 2263 cqe->status = hwcqe->status; 2261 2264 dev_err(&cq->hwq.pdev->dev, 2262 - "QPLIB: FP: CQ Processed Req "); 2263 - dev_err(&cq->hwq.pdev->dev, 2264 - "QPLIB: wr_id[%d] = 0x%llx with status 0x%x", 2265 + "FP: CQ Processed Req wr_id[%d] = 0x%llx with status 0x%x\n", 2265 2266 sw_sq_cons, cqe->wr_id, cqe->status); 2266 2267 cqe++; 2267 2268 (*budget)--; ··· 2325 2330 qp = (struct bnxt_qplib_qp *)((unsigned long) 2326 2331 le64_to_cpu(hwcqe->qp_handle)); 2327 2332 if (!qp) { 2328 - dev_err(&cq->hwq.pdev->dev, "QPLIB: process_cq RC qp is NULL"); 2333 + dev_err(&cq->hwq.pdev->dev, "process_cq RC qp is NULL\n"); 2329 2334 return -EINVAL; 2330 2335 } 2331 2336 if (qp->rq.flushed) { 2332 2337 dev_dbg(&cq->hwq.pdev->dev, 2333 - "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp); 2338 + "%s: QP in Flush QP = %p\n", __func__, qp); 2334 2339 goto done; 2335 2340 } 2336 2341 ··· 2351 2356 return -EINVAL; 2352 2357 if (wr_id_idx >= srq->hwq.max_elements) { 2353 2358 dev_err(&cq->hwq.pdev->dev, 2354 - "QPLIB: FP: CQ Process RC "); 2355 - dev_err(&cq->hwq.pdev->dev, 2356 - "QPLIB: wr_id idx 0x%x exceeded SRQ max 0x%x", 2359 + "FP: CQ Process RC wr_id idx 0x%x exceeded SRQ max 0x%x\n", 2357 2360 wr_id_idx, srq->hwq.max_elements); 2358 2361 return -EINVAL; 2359 2362 } ··· 2364 2371 rq = &qp->rq; 2365 2372 if (wr_id_idx >= rq->hwq.max_elements) { 2366 2373 dev_err(&cq->hwq.pdev->dev, 2367 - "QPLIB: FP: CQ Process RC "); 2368 - dev_err(&cq->hwq.pdev->dev, 2369 - "QPLIB: wr_id idx 0x%x exceeded RQ max 0x%x", 2374 + "FP: CQ Process RC wr_id idx 0x%x exceeded RQ max 0x%x\n", 2370 2375 wr_id_idx, rq->hwq.max_elements); 2371 2376 return -EINVAL; 2372 2377 } ··· 2400 2409 qp = (struct bnxt_qplib_qp *)((unsigned long) 2401 2410 le64_to_cpu(hwcqe->qp_handle)); 2402 2411 if (!qp) { 2403 - dev_err(&cq->hwq.pdev->dev, "QPLIB: process_cq UD qp is NULL"); 2412 + dev_err(&cq->hwq.pdev->dev, "process_cq UD qp is NULL\n"); 2404 2413 return -EINVAL; 2405 2414 } 2406 2415 if (qp->rq.flushed) { 2407 2416 dev_dbg(&cq->hwq.pdev->dev, 2408 - "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp); 2417 + "%s: QP in Flush QP = %p\n", __func__, qp); 2409 2418 goto done; 2410 2419 } 2411 2420 cqe = *pcqe; ··· 2430 2439 2431 2440 if (wr_id_idx >= srq->hwq.max_elements) { 2432 2441 dev_err(&cq->hwq.pdev->dev, 2433 - "QPLIB: FP: CQ Process UD "); 2434 - dev_err(&cq->hwq.pdev->dev, 2435 - "QPLIB: wr_id idx 0x%x exceeded SRQ max 0x%x", 2442 + "FP: CQ Process UD wr_id idx 0x%x exceeded SRQ max 0x%x\n", 2436 2443 wr_id_idx, srq->hwq.max_elements); 2437 2444 return -EINVAL; 2438 2445 } ··· 2443 2454 rq = &qp->rq; 2444 2455 if (wr_id_idx >= rq->hwq.max_elements) { 2445 2456 dev_err(&cq->hwq.pdev->dev, 2446 - "QPLIB: FP: CQ Process UD "); 2447 - dev_err(&cq->hwq.pdev->dev, 2448 - "QPLIB: wr_id idx 0x%x exceeded RQ max 0x%x", 2457 + "FP: CQ Process UD wr_id idx 0x%x exceeded RQ max 0x%x\n", 2449 2458 wr_id_idx, rq->hwq.max_elements); 2450 2459 return -EINVAL; 2451 2460 } ··· 2495 2508 qp = (struct bnxt_qplib_qp *)((unsigned long) 2496 2509 le64_to_cpu(hwcqe->qp_handle)); 2497 2510 if (!qp) { 2498 - dev_err(&cq->hwq.pdev->dev, 2499 - "QPLIB: process_cq Raw/QP1 qp is NULL"); 2511 + dev_err(&cq->hwq.pdev->dev, "process_cq Raw/QP1 qp is NULL\n"); 2500 2512 return -EINVAL; 2501 2513 } 2502 2514 if (qp->rq.flushed) { 2503 2515 dev_dbg(&cq->hwq.pdev->dev, 2504 - "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp); 2516 + "%s: QP in Flush QP = %p\n", __func__, qp); 2505 2517 goto done; 2506 2518 } 2507 2519 cqe = *pcqe; ··· 2529 2543 srq = qp->srq; 2530 2544 if (!srq) { 2531 2545 dev_err(&cq->hwq.pdev->dev, 2532 - "QPLIB: FP: SRQ used but not defined??"); 2546 + "FP: SRQ used but not defined??\n"); 2533 2547 return -EINVAL; 2534 2548 } 2535 2549 if (wr_id_idx >= srq->hwq.max_elements) { 2536 2550 dev_err(&cq->hwq.pdev->dev, 2537 - "QPLIB: FP: CQ Process Raw/QP1 "); 2538 - dev_err(&cq->hwq.pdev->dev, 2539 - "QPLIB: wr_id idx 0x%x exceeded SRQ max 0x%x", 2551 + "FP: CQ Process Raw/QP1 wr_id idx 0x%x exceeded SRQ max 0x%x\n", 2540 2552 wr_id_idx, srq->hwq.max_elements); 2541 2553 return -EINVAL; 2542 2554 } ··· 2547 2563 rq = &qp->rq; 2548 2564 if (wr_id_idx >= rq->hwq.max_elements) { 2549 2565 dev_err(&cq->hwq.pdev->dev, 2550 - "QPLIB: FP: CQ Process Raw/QP1 RQ wr_id "); 2551 - dev_err(&cq->hwq.pdev->dev, 2552 - "QPLIB: ix 0x%x exceeded RQ max 0x%x", 2566 + "FP: CQ Process Raw/QP1 RQ wr_id idx 0x%x exceeded RQ max 0x%x\n", 2553 2567 wr_id_idx, rq->hwq.max_elements); 2554 2568 return -EINVAL; 2555 2569 } ··· 2582 2600 /* Check the Status */ 2583 2601 if (hwcqe->status != CQ_TERMINAL_STATUS_OK) 2584 2602 dev_warn(&cq->hwq.pdev->dev, 2585 - "QPLIB: FP: CQ Process Terminal Error status = 0x%x", 2603 + "FP: CQ Process Terminal Error status = 0x%x\n", 2586 2604 hwcqe->status); 2587 2605 2588 2606 qp = (struct bnxt_qplib_qp *)((unsigned long) 2589 2607 le64_to_cpu(hwcqe->qp_handle)); 2590 2608 if (!qp) { 2591 2609 dev_err(&cq->hwq.pdev->dev, 2592 - "QPLIB: FP: CQ Process terminal qp is NULL"); 2610 + "FP: CQ Process terminal qp is NULL\n"); 2593 2611 return -EINVAL; 2594 2612 } 2595 2613 ··· 2605 2623 2606 2624 if (cqe_cons > sq->hwq.max_elements) { 2607 2625 dev_err(&cq->hwq.pdev->dev, 2608 - "QPLIB: FP: CQ Process terminal reported "); 2609 - dev_err(&cq->hwq.pdev->dev, 2610 - "QPLIB: sq_cons_idx 0x%x which exceeded max 0x%x", 2626 + "FP: CQ Process terminal reported sq_cons_idx 0x%x which exceeded max 0x%x\n", 2611 2627 cqe_cons, sq->hwq.max_elements); 2612 2628 goto do_rq; 2613 2629 } 2614 2630 2615 2631 if (qp->sq.flushed) { 2616 2632 dev_dbg(&cq->hwq.pdev->dev, 2617 - "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp); 2633 + "%s: QP in Flush QP = %p\n", __func__, qp); 2618 2634 goto sq_done; 2619 2635 } 2620 2636 ··· 2653 2673 goto done; 2654 2674 } else if (cqe_cons > rq->hwq.max_elements) { 2655 2675 dev_err(&cq->hwq.pdev->dev, 2656 - "QPLIB: FP: CQ Processed terminal "); 2657 - dev_err(&cq->hwq.pdev->dev, 2658 - "QPLIB: reported rq_cons_idx 0x%x exceeds max 0x%x", 2676 + "FP: CQ Processed terminal reported rq_cons_idx 0x%x exceeds max 0x%x\n", 2659 2677 cqe_cons, rq->hwq.max_elements); 2660 2678 goto done; 2661 2679 } 2662 2680 2663 2681 if (qp->rq.flushed) { 2664 2682 dev_dbg(&cq->hwq.pdev->dev, 2665 - "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp); 2683 + "%s: QP in Flush QP = %p\n", __func__, qp); 2666 2684 rc = 0; 2667 2685 goto done; 2668 2686 } ··· 2682 2704 /* Check the Status */ 2683 2705 if (hwcqe->status != CQ_CUTOFF_STATUS_OK) { 2684 2706 dev_err(&cq->hwq.pdev->dev, 2685 - "QPLIB: FP: CQ Process Cutoff Error status = 0x%x", 2707 + "FP: CQ Process Cutoff Error status = 0x%x\n", 2686 2708 hwcqe->status); 2687 2709 return -EINVAL; 2688 2710 } ··· 2702 2724 2703 2725 spin_lock_irqsave(&cq->flush_lock, flags); 2704 2726 list_for_each_entry(qp, &cq->sqf_head, sq_flush) { 2705 - dev_dbg(&cq->hwq.pdev->dev, 2706 - "QPLIB: FP: Flushing SQ QP= %p", 2707 - qp); 2727 + dev_dbg(&cq->hwq.pdev->dev, "FP: Flushing SQ QP= %p\n", qp); 2708 2728 __flush_sq(&qp->sq, qp, &cqe, &budget); 2709 2729 } 2710 2730 2711 2731 list_for_each_entry(qp, &cq->rqf_head, rq_flush) { 2712 - dev_dbg(&cq->hwq.pdev->dev, 2713 - "QPLIB: FP: Flushing RQ QP= %p", 2714 - qp); 2732 + dev_dbg(&cq->hwq.pdev->dev, "FP: Flushing RQ QP= %p\n", qp); 2715 2733 __flush_rq(&qp->rq, qp, &cqe, &budget); 2716 2734 } 2717 2735 spin_unlock_irqrestore(&cq->flush_lock, flags); ··· 2775 2801 goto exit; 2776 2802 default: 2777 2803 dev_err(&cq->hwq.pdev->dev, 2778 - "QPLIB: process_cq unknown type 0x%lx", 2804 + "process_cq unknown type 0x%lx\n", 2779 2805 hw_cqe->cqe_type_toggle & 2780 2806 CQ_BASE_CQE_TYPE_MASK); 2781 2807 rc = -EINVAL; ··· 2788 2814 * next one 2789 2815 */ 2790 2816 dev_err(&cq->hwq.pdev->dev, 2791 - "QPLIB: process_cqe error rc = 0x%x", rc); 2817 + "process_cqe error rc = 0x%x\n", rc); 2792 2818 } 2793 2819 raw_cons++; 2794 2820 }
+49 -39
drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
··· 35 35 * 36 36 * Description: RDMA Controller HW interface 37 37 */ 38 + 39 + #define dev_fmt(fmt) "QPLIB: " fmt 40 + 38 41 #include <linux/interrupt.h> 39 42 #include <linux/spinlock.h> 40 43 #include <linux/pci.h> ··· 99 96 opcode != CMDQ_BASE_OPCODE_INITIALIZE_FW && 100 97 opcode != CMDQ_BASE_OPCODE_QUERY_VERSION)) { 101 98 dev_err(&rcfw->pdev->dev, 102 - "QPLIB: RCFW not initialized, reject opcode 0x%x", 103 - opcode); 99 + "RCFW not initialized, reject opcode 0x%x\n", opcode); 104 100 return -EINVAL; 105 101 } 106 102 107 103 if (test_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags) && 108 104 opcode == CMDQ_BASE_OPCODE_INITIALIZE_FW) { 109 - dev_err(&rcfw->pdev->dev, "QPLIB: RCFW already initialized!"); 105 + dev_err(&rcfw->pdev->dev, "RCFW already initialized!\n"); 110 106 return -EINVAL; 111 107 } 112 108 ··· 117 115 */ 118 116 spin_lock_irqsave(&cmdq->lock, flags); 119 117 if (req->cmd_size >= HWQ_FREE_SLOTS(cmdq)) { 120 - dev_err(&rcfw->pdev->dev, "QPLIB: RCFW: CMDQ is full!"); 118 + dev_err(&rcfw->pdev->dev, "RCFW: CMDQ is full!\n"); 121 119 spin_unlock_irqrestore(&cmdq->lock, flags); 122 120 return -EAGAIN; 123 121 } ··· 156 154 cmdqe = &cmdq_ptr[get_cmdq_pg(sw_prod)][get_cmdq_idx(sw_prod)]; 157 155 if (!cmdqe) { 158 156 dev_err(&rcfw->pdev->dev, 159 - "QPLIB: RCFW request failed with no cmdqe!"); 157 + "RCFW request failed with no cmdqe!\n"); 160 158 goto done; 161 159 } 162 160 /* Copy a segment of the req cmd to the cmdq */ ··· 212 210 213 211 if (!retry_cnt || (rc != -EAGAIN && rc != -EBUSY)) { 214 212 /* send failed */ 215 - dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x send failed", 213 + dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x send failed\n", 216 214 cookie, opcode); 217 215 return rc; 218 216 } ··· 226 224 rc = __wait_for_resp(rcfw, cookie); 227 225 if (rc) { 228 226 /* timed out */ 229 - dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x timedout (%d)msec", 227 + dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x timedout (%d)msec\n", 230 228 cookie, opcode, RCFW_CMD_WAIT_TIME_MS); 231 229 set_bit(FIRMWARE_TIMED_OUT, &rcfw->flags); 232 230 return rc; ··· 234 232 235 233 if (evnt->status) { 236 234 /* failed with status */ 237 - dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x status %#x", 235 + dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x status %#x\n", 238 236 cookie, opcode, evnt->status); 239 237 rc = -EFAULT; 240 238 } ··· 300 298 qp_id = le32_to_cpu(err_event->xid); 301 299 qp = rcfw->qp_tbl[qp_id].qp_handle; 302 300 dev_dbg(&rcfw->pdev->dev, 303 - "QPLIB: Received QP error notification"); 301 + "Received QP error notification\n"); 304 302 dev_dbg(&rcfw->pdev->dev, 305 - "QPLIB: qpid 0x%x, req_err=0x%x, resp_err=0x%x\n", 303 + "qpid 0x%x, req_err=0x%x, resp_err=0x%x\n", 306 304 qp_id, err_event->req_err_state_reason, 307 305 err_event->res_err_state_reason); 308 306 if (!qp) ··· 311 309 rcfw->aeq_handler(rcfw, qp_event, qp); 312 310 break; 313 311 default: 314 - /* Command Response */ 315 - spin_lock_irqsave(&cmdq->lock, flags); 312 + /* 313 + * Command Response 314 + * cmdq->lock needs to be acquired to synchronie 315 + * the command send and completion reaping. This function 316 + * is always called with creq->lock held. Using 317 + * the nested variant of spin_lock. 318 + * 319 + */ 320 + 321 + spin_lock_irqsave_nested(&cmdq->lock, flags, 322 + SINGLE_DEPTH_NESTING); 316 323 cookie = le16_to_cpu(qp_event->cookie); 317 324 mcookie = qp_event->cookie; 318 325 blocked = cookie & RCFW_CMD_IS_BLOCKING; ··· 333 322 memcpy(crsqe->resp, qp_event, sizeof(*qp_event)); 334 323 crsqe->resp = NULL; 335 324 } else { 336 - dev_err(&rcfw->pdev->dev, 337 - "QPLIB: CMD %s resp->cookie = %#x, evnt->cookie = %#x", 338 - crsqe->resp ? "mismatch" : "collision", 339 - crsqe->resp ? crsqe->resp->cookie : 0, mcookie); 325 + if (crsqe->resp && crsqe->resp->cookie) 326 + dev_err(&rcfw->pdev->dev, 327 + "CMD %s cookie sent=%#x, recd=%#x\n", 328 + crsqe->resp ? "mismatch" : "collision", 329 + crsqe->resp ? crsqe->resp->cookie : 0, 330 + mcookie); 340 331 } 341 332 if (!test_and_clear_bit(cbit, rcfw->cmdq_bitmap)) 342 333 dev_warn(&rcfw->pdev->dev, 343 - "QPLIB: CMD bit %d was not requested", cbit); 334 + "CMD bit %d was not requested\n", cbit); 344 335 cmdq->cons += crsqe->req_size; 345 336 crsqe->req_size = 0; 346 337 ··· 389 376 (rcfw, (struct creq_func_event *)creqe)) 390 377 rcfw->creq_func_event_processed++; 391 378 else 392 - dev_warn 393 - (&rcfw->pdev->dev, "QPLIB:aeqe:%#x Not handled", 394 - type); 379 + dev_warn(&rcfw->pdev->dev, 380 + "aeqe:%#x Not handled\n", type); 395 381 break; 396 382 default: 397 - dev_warn(&rcfw->pdev->dev, "QPLIB: creqe with "); 398 - dev_warn(&rcfw->pdev->dev, 399 - "QPLIB: op_event = 0x%x not handled", type); 383 + if (type != ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT) 384 + dev_warn(&rcfw->pdev->dev, 385 + "creqe with event 0x%x not handled\n", 386 + type); 400 387 break; 401 388 } 402 389 raw_cons++; ··· 564 551 BNXT_QPLIB_CREQE_UNITS, 0, PAGE_SIZE, 565 552 HWQ_TYPE_L2_CMPL)) { 566 553 dev_err(&rcfw->pdev->dev, 567 - "QPLIB: HW channel CREQ allocation failed"); 554 + "HW channel CREQ allocation failed\n"); 568 555 goto fail; 569 556 } 570 557 rcfw->cmdq.max_elements = BNXT_QPLIB_CMDQE_MAX_CNT; ··· 573 560 BNXT_QPLIB_CMDQE_UNITS, 0, PAGE_SIZE, 574 561 HWQ_TYPE_CTX)) { 575 562 dev_err(&rcfw->pdev->dev, 576 - "QPLIB: HW channel CMDQ allocation failed"); 563 + "HW channel CMDQ allocation failed\n"); 577 564 goto fail; 578 565 } 579 566 ··· 618 605 619 606 bnxt_qplib_rcfw_stop_irq(rcfw, true); 620 607 621 - if (rcfw->cmdq_bar_reg_iomem) 622 - iounmap(rcfw->cmdq_bar_reg_iomem); 623 - rcfw->cmdq_bar_reg_iomem = NULL; 624 - 625 - if (rcfw->creq_bar_reg_iomem) 626 - iounmap(rcfw->creq_bar_reg_iomem); 627 - rcfw->creq_bar_reg_iomem = NULL; 608 + iounmap(rcfw->cmdq_bar_reg_iomem); 609 + iounmap(rcfw->creq_bar_reg_iomem); 628 610 629 611 indx = find_first_bit(rcfw->cmdq_bitmap, rcfw->bmap_size); 630 612 if (indx != rcfw->bmap_size) 631 613 dev_err(&rcfw->pdev->dev, 632 - "QPLIB: disabling RCFW with pending cmd-bit %lx", indx); 614 + "disabling RCFW with pending cmd-bit %lx\n", indx); 633 615 kfree(rcfw->cmdq_bitmap); 634 616 rcfw->bmap_size = 0; 635 617 618 + rcfw->cmdq_bar_reg_iomem = NULL; 619 + rcfw->creq_bar_reg_iomem = NULL; 636 620 rcfw->aeq_handler = NULL; 637 621 rcfw->vector = 0; 638 622 } ··· 691 681 RCFW_COMM_BASE_OFFSET, 692 682 RCFW_COMM_SIZE); 693 683 if (!rcfw->cmdq_bar_reg_iomem) { 694 - dev_err(&rcfw->pdev->dev, 695 - "QPLIB: CMDQ BAR region %d mapping failed", 684 + dev_err(&rcfw->pdev->dev, "CMDQ BAR region %d mapping failed\n", 696 685 rcfw->cmdq_bar_reg); 697 686 return -ENOMEM; 698 687 } ··· 706 697 res_base = pci_resource_start(pdev, rcfw->creq_bar_reg); 707 698 if (!res_base) 708 699 dev_err(&rcfw->pdev->dev, 709 - "QPLIB: CREQ BAR region %d resc start is 0!", 700 + "CREQ BAR region %d resc start is 0!\n", 710 701 rcfw->creq_bar_reg); 711 702 rcfw->creq_bar_reg_iomem = ioremap_nocache(res_base + cp_bar_reg_off, 712 703 4); 713 704 if (!rcfw->creq_bar_reg_iomem) { 714 - dev_err(&rcfw->pdev->dev, 715 - "QPLIB: CREQ BAR region %d mapping failed", 705 + dev_err(&rcfw->pdev->dev, "CREQ BAR region %d mapping failed\n", 716 706 rcfw->creq_bar_reg); 707 + iounmap(rcfw->cmdq_bar_reg_iomem); 708 + rcfw->cmdq_bar_reg_iomem = NULL; 717 709 return -ENOMEM; 718 710 } 719 711 rcfw->creq_qp_event_processed = 0; ··· 727 717 rc = bnxt_qplib_rcfw_start_irq(rcfw, msix_vector, true); 728 718 if (rc) { 729 719 dev_err(&rcfw->pdev->dev, 730 - "QPLIB: Failed to request IRQ for CREQ rc = 0x%x", rc); 720 + "Failed to request IRQ for CREQ rc = 0x%x\n", rc); 731 721 bnxt_qplib_disable_rcfw_channel(rcfw); 732 722 return rc; 733 723 }
+4
drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
··· 154 154 void *qp_handle; /* ptr to qplib_qp */ 155 155 }; 156 156 157 + #define BNXT_QPLIB_OOS_COUNT_MASK 0xFFFFFFFF 158 + 157 159 /* RCFW Communication Channels */ 158 160 struct bnxt_qplib_rcfw { 159 161 struct pci_dev *pdev; ··· 192 190 struct bnxt_qplib_crsq *crsqe_tbl; 193 191 int qp_tbl_size; 194 192 struct bnxt_qplib_qp_node *qp_tbl; 193 + u64 oos_prev; 194 + u32 init_oos_stats; 195 195 }; 196 196 197 197 void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
+14 -15
drivers/infiniband/hw/bnxt_re/qplib_res.c
··· 36 36 * Description: QPLib resource manager 37 37 */ 38 38 39 + #define dev_fmt(fmt) "QPLIB: " fmt 40 + 39 41 #include <linux/spinlock.h> 40 42 #include <linux/pci.h> 41 43 #include <linux/interrupt.h> ··· 70 68 pbl->pg_map_arr[i]); 71 69 else 72 70 dev_warn(&pdev->dev, 73 - "QPLIB: PBL free pg_arr[%d] empty?!", 74 - i); 71 + "PBL free pg_arr[%d] empty?!\n", i); 75 72 pbl->pg_arr[i] = NULL; 76 73 } 77 74 } ··· 538 537 struct bnxt_qplib_pkey_tbl *pkey_tbl) 539 538 { 540 539 if (!pkey_tbl->tbl) 541 - dev_dbg(&res->pdev->dev, "QPLIB: PKEY tbl not present"); 540 + dev_dbg(&res->pdev->dev, "PKEY tbl not present\n"); 542 541 else 543 542 kfree(pkey_tbl->tbl); 544 543 ··· 579 578 struct bnxt_qplib_pd *pd) 580 579 { 581 580 if (test_and_set_bit(pd->id, pdt->tbl)) { 582 - dev_warn(&res->pdev->dev, "Freeing an unused PD? pdn = %d", 581 + dev_warn(&res->pdev->dev, "Freeing an unused PD? pdn = %d\n", 583 582 pd->id); 584 583 return -EINVAL; 585 584 } ··· 640 639 struct bnxt_qplib_dpi *dpi) 641 640 { 642 641 if (dpi->dpi >= dpit->max) { 643 - dev_warn(&res->pdev->dev, "Invalid DPI? dpi = %d", dpi->dpi); 642 + dev_warn(&res->pdev->dev, "Invalid DPI? dpi = %d\n", dpi->dpi); 644 643 return -EINVAL; 645 644 } 646 645 if (test_and_set_bit(dpi->dpi, dpit->tbl)) { 647 - dev_warn(&res->pdev->dev, "Freeing an unused DPI? dpi = %d", 646 + dev_warn(&res->pdev->dev, "Freeing an unused DPI? dpi = %d\n", 648 647 dpi->dpi); 649 648 return -EINVAL; 650 649 } ··· 674 673 u32 dbr_len, bytes; 675 674 676 675 if (dpit->dbr_bar_reg_iomem) { 677 - dev_err(&res->pdev->dev, 678 - "QPLIB: DBR BAR region %d already mapped", dbr_bar_reg); 676 + dev_err(&res->pdev->dev, "DBR BAR region %d already mapped\n", 677 + dbr_bar_reg); 679 678 return -EALREADY; 680 679 } 681 680 682 681 bar_reg_base = pci_resource_start(res->pdev, dbr_bar_reg); 683 682 if (!bar_reg_base) { 684 - dev_err(&res->pdev->dev, 685 - "QPLIB: BAR region %d resc start failed", dbr_bar_reg); 683 + dev_err(&res->pdev->dev, "BAR region %d resc start failed\n", 684 + dbr_bar_reg); 686 685 return -ENOMEM; 687 686 } 688 687 689 688 dbr_len = pci_resource_len(res->pdev, dbr_bar_reg) - dbr_offset; 690 689 if (!dbr_len || ((dbr_len & (PAGE_SIZE - 1)) != 0)) { 691 - dev_err(&res->pdev->dev, "QPLIB: Invalid DBR length %d", 692 - dbr_len); 690 + dev_err(&res->pdev->dev, "Invalid DBR length %d\n", dbr_len); 693 691 return -ENOMEM; 694 692 } 695 693 ··· 696 696 dbr_len); 697 697 if (!dpit->dbr_bar_reg_iomem) { 698 698 dev_err(&res->pdev->dev, 699 - "QPLIB: FP: DBR BAR region %d mapping failed", 700 - dbr_bar_reg); 699 + "FP: DBR BAR region %d mapping failed\n", dbr_bar_reg); 701 700 return -ENOMEM; 702 701 } 703 702 ··· 766 767 stats->dma = dma_alloc_coherent(&pdev->dev, stats->size, 767 768 &stats->dma_map, GFP_KERNEL); 768 769 if (!stats->dma) { 769 - dev_err(&pdev->dev, "QPLIB: Stats DMA allocation failed"); 770 + dev_err(&pdev->dev, "Stats DMA allocation failed\n"); 770 771 return -ENOMEM; 771 772 } 772 773 return 0;
+45 -32
drivers/infiniband/hw/bnxt_re/qplib_sp.c
··· 36 36 * Description: Slow Path Operators 37 37 */ 38 38 39 + #define dev_fmt(fmt) "QPLIB: " fmt 40 + 39 41 #include <linux/interrupt.h> 40 42 #include <linux/spinlock.h> 41 43 #include <linux/sched.h> ··· 91 89 sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb)); 92 90 if (!sbuf) { 93 91 dev_err(&rcfw->pdev->dev, 94 - "QPLIB: SP: QUERY_FUNC alloc side buffer failed"); 92 + "SP: QUERY_FUNC alloc side buffer failed\n"); 95 93 return -ENOMEM; 96 94 } 97 95 ··· 137 135 attr->max_srq = le16_to_cpu(sb->max_srq); 138 136 attr->max_srq_wqes = le32_to_cpu(sb->max_srq_wr) - 1; 139 137 attr->max_srq_sges = sb->max_srq_sge; 140 - /* Bono only reports 1 PKEY for now, but it can support > 1 */ 141 138 attr->max_pkey = le32_to_cpu(sb->max_pkeys); 139 + /* 140 + * Some versions of FW reports more than 0xFFFF. 141 + * Restrict it for now to 0xFFFF to avoid 142 + * reporting trucated value 143 + */ 144 + if (attr->max_pkey > 0xFFFF) { 145 + /* ib_port_attr::pkey_tbl_len is u16 */ 146 + attr->max_pkey = 0xFFFF; 147 + } 142 148 143 149 attr->max_inline_data = le32_to_cpu(sb->max_inline_data); 144 150 attr->l2_db_size = (sb->l2_db_space_size + 1) * ··· 196 186 (void *)&resp, 197 187 NULL, 0); 198 188 if (rc) { 199 - dev_err(&res->pdev->dev, 200 - "QPLIB: Failed to set function resources"); 189 + dev_err(&res->pdev->dev, "Failed to set function resources\n"); 201 190 } 202 191 return rc; 203 192 } ··· 208 199 { 209 200 if (index >= sgid_tbl->max) { 210 201 dev_err(&res->pdev->dev, 211 - "QPLIB: Index %d exceeded SGID table max (%d)", 202 + "Index %d exceeded SGID table max (%d)\n", 212 203 index, sgid_tbl->max); 213 204 return -EINVAL; 214 205 } ··· 226 217 int index; 227 218 228 219 if (!sgid_tbl) { 229 - dev_err(&res->pdev->dev, "QPLIB: SGID table not allocated"); 220 + dev_err(&res->pdev->dev, "SGID table not allocated\n"); 230 221 return -EINVAL; 231 222 } 232 223 /* Do we need a sgid_lock here? */ 233 224 if (!sgid_tbl->active) { 234 - dev_err(&res->pdev->dev, 235 - "QPLIB: SGID table has no active entries"); 225 + dev_err(&res->pdev->dev, "SGID table has no active entries\n"); 236 226 return -ENOMEM; 237 227 } 238 228 for (index = 0; index < sgid_tbl->max; index++) { ··· 239 231 break; 240 232 } 241 233 if (index == sgid_tbl->max) { 242 - dev_warn(&res->pdev->dev, "GID not found in the SGID table"); 234 + dev_warn(&res->pdev->dev, "GID not found in the SGID table\n"); 243 235 return 0; 244 236 } 245 237 /* Remove GID from the SGID table */ ··· 252 244 RCFW_CMD_PREP(req, DELETE_GID, cmd_flags); 253 245 if (sgid_tbl->hw_id[index] == 0xFFFF) { 254 246 dev_err(&res->pdev->dev, 255 - "QPLIB: GID entry contains an invalid HW id"); 247 + "GID entry contains an invalid HW id\n"); 256 248 return -EINVAL; 257 249 } 258 250 req.gid_index = cpu_to_le16(sgid_tbl->hw_id[index]); ··· 266 258 sgid_tbl->vlan[index] = 0; 267 259 sgid_tbl->active--; 268 260 dev_dbg(&res->pdev->dev, 269 - "QPLIB: SGID deleted hw_id[0x%x] = 0x%x active = 0x%x", 261 + "SGID deleted hw_id[0x%x] = 0x%x active = 0x%x\n", 270 262 index, sgid_tbl->hw_id[index], sgid_tbl->active); 271 263 sgid_tbl->hw_id[index] = (u16)-1; 272 264 ··· 285 277 int i, free_idx; 286 278 287 279 if (!sgid_tbl) { 288 - dev_err(&res->pdev->dev, "QPLIB: SGID table not allocated"); 280 + dev_err(&res->pdev->dev, "SGID table not allocated\n"); 289 281 return -EINVAL; 290 282 } 291 283 /* Do we need a sgid_lock here? */ 292 284 if (sgid_tbl->active == sgid_tbl->max) { 293 - dev_err(&res->pdev->dev, "QPLIB: SGID table is full"); 285 + dev_err(&res->pdev->dev, "SGID table is full\n"); 294 286 return -ENOMEM; 295 287 } 296 288 free_idx = sgid_tbl->max; 297 289 for (i = 0; i < sgid_tbl->max; i++) { 298 290 if (!memcmp(&sgid_tbl->tbl[i], gid, sizeof(*gid))) { 299 291 dev_dbg(&res->pdev->dev, 300 - "QPLIB: SGID entry already exist in entry %d!", 301 - i); 292 + "SGID entry already exist in entry %d!\n", i); 302 293 *index = i; 303 294 return -EALREADY; 304 295 } else if (!memcmp(&sgid_tbl->tbl[i], &bnxt_qplib_gid_zero, ··· 308 301 } 309 302 if (free_idx == sgid_tbl->max) { 310 303 dev_err(&res->pdev->dev, 311 - "QPLIB: SGID table is FULL but count is not MAX??"); 304 + "SGID table is FULL but count is not MAX??\n"); 312 305 return -ENOMEM; 313 306 } 314 307 if (update) { ··· 355 348 sgid_tbl->vlan[free_idx] = 1; 356 349 357 350 dev_dbg(&res->pdev->dev, 358 - "QPLIB: SGID added hw_id[0x%x] = 0x%x active = 0x%x", 351 + "SGID added hw_id[0x%x] = 0x%x active = 0x%x\n", 359 352 free_idx, sgid_tbl->hw_id[free_idx], sgid_tbl->active); 360 353 361 354 *index = free_idx; ··· 411 404 } 412 405 if (index >= pkey_tbl->max) { 413 406 dev_err(&res->pdev->dev, 414 - "QPLIB: Index %d exceeded PKEY table max (%d)", 407 + "Index %d exceeded PKEY table max (%d)\n", 415 408 index, pkey_tbl->max); 416 409 return -EINVAL; 417 410 } ··· 426 419 int i, rc = 0; 427 420 428 421 if (!pkey_tbl) { 429 - dev_err(&res->pdev->dev, "QPLIB: PKEY table not allocated"); 422 + dev_err(&res->pdev->dev, "PKEY table not allocated\n"); 430 423 return -EINVAL; 431 424 } 432 425 433 426 /* Do we need a pkey_lock here? */ 434 427 if (!pkey_tbl->active) { 435 - dev_err(&res->pdev->dev, 436 - "QPLIB: PKEY table has no active entries"); 428 + dev_err(&res->pdev->dev, "PKEY table has no active entries\n"); 437 429 return -ENOMEM; 438 430 } 439 431 for (i = 0; i < pkey_tbl->max; i++) { ··· 441 435 } 442 436 if (i == pkey_tbl->max) { 443 437 dev_err(&res->pdev->dev, 444 - "QPLIB: PKEY 0x%04x not found in the pkey table", 445 - *pkey); 438 + "PKEY 0x%04x not found in the pkey table\n", *pkey); 446 439 return -ENOMEM; 447 440 } 448 441 memset(&pkey_tbl->tbl[i], 0, sizeof(*pkey)); ··· 458 453 int i, free_idx, rc = 0; 459 454 460 455 if (!pkey_tbl) { 461 - dev_err(&res->pdev->dev, "QPLIB: PKEY table not allocated"); 456 + dev_err(&res->pdev->dev, "PKEY table not allocated\n"); 462 457 return -EINVAL; 463 458 } 464 459 465 460 /* Do we need a pkey_lock here? */ 466 461 if (pkey_tbl->active == pkey_tbl->max) { 467 - dev_err(&res->pdev->dev, "QPLIB: PKEY table is full"); 462 + dev_err(&res->pdev->dev, "PKEY table is full\n"); 468 463 return -ENOMEM; 469 464 } 470 465 free_idx = pkey_tbl->max; ··· 476 471 } 477 472 if (free_idx == pkey_tbl->max) { 478 473 dev_err(&res->pdev->dev, 479 - "QPLIB: PKEY table is FULL but count is not MAX??"); 474 + "PKEY table is FULL but count is not MAX??\n"); 480 475 return -ENOMEM; 481 476 } 482 477 /* Add PKEY to the pkey_tbl */ ··· 560 555 int rc; 561 556 562 557 if (mrw->lkey == 0xFFFFFFFF) { 563 - dev_info(&res->pdev->dev, 564 - "QPLIB: SP: Free a reserved lkey MRW"); 558 + dev_info(&res->pdev->dev, "SP: Free a reserved lkey MRW\n"); 565 559 return 0; 566 560 } 567 561 ··· 670 666 pages++; 671 667 672 668 if (pages > MAX_PBL_LVL_1_PGS) { 673 - dev_err(&res->pdev->dev, "QPLIB: SP: Reg MR pages "); 674 669 dev_err(&res->pdev->dev, 675 - "requested (0x%x) exceeded max (0x%x)", 670 + "SP: Reg MR pages requested (0x%x) exceeded max (0x%x)\n", 676 671 pages, MAX_PBL_LVL_1_PGS); 677 672 return -ENOMEM; 678 673 } ··· 687 684 HWQ_TYPE_CTX); 688 685 if (rc) { 689 686 dev_err(&res->pdev->dev, 690 - "SP: Reg MR memory allocation failed"); 687 + "SP: Reg MR memory allocation failed\n"); 691 688 return -ENOMEM; 692 689 } 693 690 /* Write to the hwq */ ··· 798 795 sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb)); 799 796 if (!sbuf) { 800 797 dev_err(&rcfw->pdev->dev, 801 - "QPLIB: SP: QUERY_ROCE_STATS alloc side buffer failed"); 798 + "SP: QUERY_ROCE_STATS alloc side buffer failed\n"); 802 799 return -ENOMEM; 803 800 } 804 801 ··· 848 845 stats->res_srq_load_err = le64_to_cpu(sb->res_srq_load_err); 849 846 stats->res_tx_pci_err = le64_to_cpu(sb->res_tx_pci_err); 850 847 stats->res_rx_pci_err = le64_to_cpu(sb->res_rx_pci_err); 848 + if (!rcfw->init_oos_stats) { 849 + rcfw->oos_prev = le64_to_cpu(sb->res_oos_drop_count); 850 + rcfw->init_oos_stats = 1; 851 + } else { 852 + stats->res_oos_drop_count += 853 + (le64_to_cpu(sb->res_oos_drop_count) - 854 + rcfw->oos_prev) & BNXT_QPLIB_OOS_COUNT_MASK; 855 + rcfw->oos_prev = le64_to_cpu(sb->res_oos_drop_count); 856 + } 857 + 851 858 bail: 852 859 bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf); 853 860 return rc;
+10
drivers/infiniband/hw/bnxt_re/qplib_sp.h
··· 205 205 /* res_tx_pci_err is 64 b */ 206 206 u64 res_rx_pci_err; 207 207 /* res_rx_pci_err is 64 b */ 208 + u64 res_oos_drop_count; 209 + /* res_oos_drop_count */ 210 + u64 active_qp_count_p0; 211 + /* port 0 active qps */ 212 + u64 active_qp_count_p1; 213 + /* port 1 active qps */ 214 + u64 active_qp_count_p2; 215 + /* port 2 active qps */ 216 + u64 active_qp_count_p3; 217 + /* port 3 active qps */ 208 218 }; 209 219 210 220 int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
+5
drivers/infiniband/hw/bnxt_re/roce_hsi.h
··· 2929 2929 __le64 res_srq_load_err; 2930 2930 __le64 res_tx_pci_err; 2931 2931 __le64 res_rx_pci_err; 2932 + __le64 res_oos_drop_count; 2933 + __le64 active_qp_count_p0; 2934 + __le64 active_qp_count_p1; 2935 + __le64 active_qp_count_p2; 2936 + __le64 active_qp_count_p3; 2932 2937 }; 2933 2938 2934 2939 /* QP error notification event (16 bytes) */
+20 -35
drivers/infiniband/hw/cxgb3/iwch_provider.c
··· 1127 1127 return 0; 1128 1128 } 1129 1129 1130 - static ssize_t show_rev(struct device *dev, struct device_attribute *attr, 1131 - char *buf) 1130 + static ssize_t hw_rev_show(struct device *dev, 1131 + struct device_attribute *attr, char *buf) 1132 1132 { 1133 1133 struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev, 1134 1134 ibdev.dev); 1135 1135 pr_debug("%s dev 0x%p\n", __func__, dev); 1136 1136 return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type); 1137 1137 } 1138 + static DEVICE_ATTR_RO(hw_rev); 1138 1139 1139 - static ssize_t show_hca(struct device *dev, struct device_attribute *attr, 1140 - char *buf) 1140 + static ssize_t hca_type_show(struct device *dev, 1141 + struct device_attribute *attr, char *buf) 1141 1142 { 1142 1143 struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev, 1143 1144 ibdev.dev); ··· 1149 1148 lldev->ethtool_ops->get_drvinfo(lldev, &info); 1150 1149 return sprintf(buf, "%s\n", info.driver); 1151 1150 } 1151 + static DEVICE_ATTR_RO(hca_type); 1152 1152 1153 - static ssize_t show_board(struct device *dev, struct device_attribute *attr, 1154 - char *buf) 1153 + static ssize_t board_id_show(struct device *dev, 1154 + struct device_attribute *attr, char *buf) 1155 1155 { 1156 1156 struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev, 1157 1157 ibdev.dev); ··· 1160 1158 return sprintf(buf, "%x.%x\n", iwch_dev->rdev.rnic_info.pdev->vendor, 1161 1159 iwch_dev->rdev.rnic_info.pdev->device); 1162 1160 } 1161 + static DEVICE_ATTR_RO(board_id); 1163 1162 1164 1163 enum counters { 1165 1164 IPINRECEIVES, ··· 1277 1274 return stats->num_counters; 1278 1275 } 1279 1276 1280 - static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 1281 - static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); 1282 - static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); 1277 + static struct attribute *iwch_class_attributes[] = { 1278 + &dev_attr_hw_rev.attr, 1279 + &dev_attr_hca_type.attr, 1280 + &dev_attr_board_id.attr, 1281 + NULL 1282 + }; 1283 1283 1284 - static struct device_attribute *iwch_class_attributes[] = { 1285 - &dev_attr_hw_rev, 1286 - &dev_attr_hca_type, 1287 - &dev_attr_board_id, 1284 + static const struct attribute_group iwch_attr_group = { 1285 + .attrs = iwch_class_attributes, 1288 1286 }; 1289 1287 1290 1288 static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num, ··· 1320 1316 int iwch_register_device(struct iwch_dev *dev) 1321 1317 { 1322 1318 int ret; 1323 - int i; 1324 1319 1325 1320 pr_debug("%s iwch_dev %p\n", __func__, dev); 1326 - strlcpy(dev->ibdev.name, "cxgb3_%d", IB_DEVICE_NAME_MAX); 1327 1321 memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); 1328 1322 memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6); 1329 1323 dev->ibdev.owner = THIS_MODULE; ··· 1404 1402 sizeof(dev->ibdev.iwcm->ifname)); 1405 1403 1406 1404 dev->ibdev.driver_id = RDMA_DRIVER_CXGB3; 1407 - ret = ib_register_device(&dev->ibdev, NULL); 1405 + rdma_set_device_sysfs_group(&dev->ibdev, &iwch_attr_group); 1406 + ret = ib_register_device(&dev->ibdev, "cxgb3_%d", NULL); 1408 1407 if (ret) 1409 - goto bail1; 1410 - 1411 - for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i) { 1412 - ret = device_create_file(&dev->ibdev.dev, 1413 - iwch_class_attributes[i]); 1414 - if (ret) { 1415 - goto bail2; 1416 - } 1417 - } 1418 - return 0; 1419 - bail2: 1420 - ib_unregister_device(&dev->ibdev); 1421 - bail1: 1422 - kfree(dev->ibdev.iwcm); 1408 + kfree(dev->ibdev.iwcm); 1423 1409 return ret; 1424 1410 } 1425 1411 1426 1412 void iwch_unregister_device(struct iwch_dev *dev) 1427 1413 { 1428 - int i; 1429 - 1430 1414 pr_debug("%s iwch_dev %p\n", __func__, dev); 1431 - for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i) 1432 - device_remove_file(&dev->ibdev.dev, 1433 - iwch_class_attributes[i]); 1434 1415 ib_unregister_device(&dev->ibdev); 1435 1416 kfree(dev->ibdev.iwcm); 1436 1417 return;
+1 -2
drivers/infiniband/hw/cxgb4/cm.c
··· 403 403 ep->com.local_addr.ss_family); 404 404 dst_release(ep->dst); 405 405 cxgb4_l2t_release(ep->l2t); 406 - if (ep->mpa_skb) 407 - kfree_skb(ep->mpa_skb); 406 + kfree_skb(ep->mpa_skb); 408 407 } 409 408 if (!skb_queue_empty(&ep->com.ep_skb_list)) 410 409 skb_queue_purge(&ep->com.ep_skb_list);
+1 -1
drivers/infiniband/hw/cxgb4/cq.c
··· 161 161 cq->gts = rdev->lldi.gts_reg; 162 162 cq->rdev = rdev; 163 163 164 - cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, T4_BAR2_QTYPE_INGRESS, 164 + cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, CXGB4_BAR2_QTYPE_INGRESS, 165 165 &cq->bar2_qid, 166 166 user ? &cq->bar2_pa : NULL); 167 167 if (user && !cq->bar2_pa) {
+20 -30
drivers/infiniband/hw/cxgb4/provider.c
··· 373 373 return 0; 374 374 } 375 375 376 - static ssize_t show_rev(struct device *dev, struct device_attribute *attr, 377 - char *buf) 376 + static ssize_t hw_rev_show(struct device *dev, 377 + struct device_attribute *attr, char *buf) 378 378 { 379 379 struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, 380 380 ibdev.dev); ··· 382 382 return sprintf(buf, "%d\n", 383 383 CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type)); 384 384 } 385 + static DEVICE_ATTR_RO(hw_rev); 385 386 386 - static ssize_t show_hca(struct device *dev, struct device_attribute *attr, 387 - char *buf) 387 + static ssize_t hca_type_show(struct device *dev, 388 + struct device_attribute *attr, char *buf) 388 389 { 389 390 struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, 390 391 ibdev.dev); ··· 396 395 lldev->ethtool_ops->get_drvinfo(lldev, &info); 397 396 return sprintf(buf, "%s\n", info.driver); 398 397 } 398 + static DEVICE_ATTR_RO(hca_type); 399 399 400 - static ssize_t show_board(struct device *dev, struct device_attribute *attr, 401 - char *buf) 400 + static ssize_t board_id_show(struct device *dev, struct device_attribute *attr, 401 + char *buf) 402 402 { 403 403 struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, 404 404 ibdev.dev); ··· 407 405 return sprintf(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor, 408 406 c4iw_dev->rdev.lldi.pdev->device); 409 407 } 408 + static DEVICE_ATTR_RO(board_id); 410 409 411 410 enum counters { 412 411 IP4INSEGS, ··· 464 461 return stats->num_counters; 465 462 } 466 463 467 - static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 468 - static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); 469 - static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); 464 + static struct attribute *c4iw_class_attributes[] = { 465 + &dev_attr_hw_rev.attr, 466 + &dev_attr_hca_type.attr, 467 + &dev_attr_board_id.attr, 468 + NULL 469 + }; 470 470 471 - static struct device_attribute *c4iw_class_attributes[] = { 472 - &dev_attr_hw_rev, 473 - &dev_attr_hca_type, 474 - &dev_attr_board_id, 471 + static const struct attribute_group c4iw_attr_group = { 472 + .attrs = c4iw_class_attributes, 475 473 }; 476 474 477 475 static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num, ··· 534 530 void c4iw_register_device(struct work_struct *work) 535 531 { 536 532 int ret; 537 - int i; 538 533 struct uld_ctx *ctx = container_of(work, struct uld_ctx, reg_work); 539 534 struct c4iw_dev *dev = ctx->dev; 540 535 541 536 pr_debug("c4iw_dev %p\n", dev); 542 - strlcpy(dev->ibdev.name, "cxgb4_%d", IB_DEVICE_NAME_MAX); 543 537 memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); 544 538 memcpy(&dev->ibdev.node_guid, dev->rdev.lldi.ports[0]->dev_addr, 6); 545 539 dev->ibdev.owner = THIS_MODULE; ··· 628 626 memcpy(dev->ibdev.iwcm->ifname, dev->rdev.lldi.ports[0]->name, 629 627 sizeof(dev->ibdev.iwcm->ifname)); 630 628 629 + rdma_set_device_sysfs_group(&dev->ibdev, &c4iw_attr_group); 631 630 dev->ibdev.driver_id = RDMA_DRIVER_CXGB4; 632 - ret = ib_register_device(&dev->ibdev, NULL); 631 + ret = ib_register_device(&dev->ibdev, "cxgb4_%d", NULL); 633 632 if (ret) 634 633 goto err_kfree_iwcm; 635 - 636 - for (i = 0; i < ARRAY_SIZE(c4iw_class_attributes); ++i) { 637 - ret = device_create_file(&dev->ibdev.dev, 638 - c4iw_class_attributes[i]); 639 - if (ret) 640 - goto err_unregister_device; 641 - } 642 634 return; 643 - err_unregister_device: 644 - ib_unregister_device(&dev->ibdev); 635 + 645 636 err_kfree_iwcm: 646 637 kfree(dev->ibdev.iwcm); 647 638 err_dealloc_ctx: ··· 646 651 647 652 void c4iw_unregister_device(struct c4iw_dev *dev) 648 653 { 649 - int i; 650 - 651 654 pr_debug("c4iw_dev %p\n", dev); 652 - for (i = 0; i < ARRAY_SIZE(c4iw_class_attributes); ++i) 653 - device_remove_file(&dev->ibdev.dev, 654 - c4iw_class_attributes[i]); 655 655 ib_unregister_device(&dev->ibdev); 656 656 kfree(dev->ibdev.iwcm); 657 657 return;
+5 -5
drivers/infiniband/hw/cxgb4/qp.c
··· 279 279 280 280 wq->db = rdev->lldi.db_reg; 281 281 282 - wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid, T4_BAR2_QTYPE_EGRESS, 282 + wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid, 283 + CXGB4_BAR2_QTYPE_EGRESS, 283 284 &wq->sq.bar2_qid, 284 285 user ? &wq->sq.bar2_pa : NULL); 285 286 if (need_rq) 286 287 wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid, 287 - T4_BAR2_QTYPE_EGRESS, 288 + CXGB4_BAR2_QTYPE_EGRESS, 288 289 &wq->rq.bar2_qid, 289 290 user ? &wq->rq.bar2_pa : NULL); 290 291 ··· 2573 2572 memset(wq->queue, 0, wq->memsize); 2574 2573 dma_unmap_addr_set(wq, mapping, wq->dma_addr); 2575 2574 2576 - wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, T4_BAR2_QTYPE_EGRESS, 2575 + wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, CXGB4_BAR2_QTYPE_EGRESS, 2577 2576 &wq->bar2_qid, 2578 2577 user ? &wq->bar2_pa : NULL); 2579 2578 ··· 2814 2813 free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, 2815 2814 srq->wr_waitp); 2816 2815 err_free_skb: 2817 - if (srq->destroy_skb) 2818 - kfree_skb(srq->destroy_skb); 2816 + kfree_skb(srq->destroy_skb); 2819 2817 err_free_srq_idx: 2820 2818 c4iw_free_srq_idx(&rhp->rdev, srq->idx); 2821 2819 err_free_wr_wait:
+36 -6
drivers/infiniband/hw/hfi1/Makefile
··· 8 8 # 9 9 obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o 10 10 11 - hfi1-y := affinity.o chip.o device.o driver.o efivar.o \ 12 - eprom.o exp_rcv.o file_ops.o firmware.o \ 13 - init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \ 14 - qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \ 15 - uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \ 16 - verbs_txreq.o vnic_main.o vnic_sdma.o 11 + hfi1-y := \ 12 + affinity.o \ 13 + chip.o \ 14 + device.o \ 15 + driver.o \ 16 + efivar.o \ 17 + eprom.o \ 18 + exp_rcv.o \ 19 + file_ops.o \ 20 + firmware.o \ 21 + init.o \ 22 + intr.o \ 23 + iowait.o \ 24 + mad.o \ 25 + mmu_rb.o \ 26 + msix.o \ 27 + pcie.o \ 28 + pio.o \ 29 + pio_copy.o \ 30 + platform.o \ 31 + qp.o \ 32 + qsfp.o \ 33 + rc.o \ 34 + ruc.o \ 35 + sdma.o \ 36 + sysfs.o \ 37 + trace.o \ 38 + uc.o \ 39 + ud.o \ 40 + user_exp_rcv.o \ 41 + user_pages.o \ 42 + user_sdma.o \ 43 + verbs.o \ 44 + verbs_txreq.o \ 45 + vnic_main.o \ 46 + vnic_sdma.o 17 47 18 48 ifdef CONFIG_DEBUG_FS 19 49 hfi1-y += debugfs.o
+2 -2
drivers/infiniband/hw/hfi1/affinity.c
··· 817 817 set = &entry->def_intr; 818 818 cpumask_set_cpu(cpu, &set->mask); 819 819 cpumask_set_cpu(cpu, &set->used); 820 - for (i = 0; i < dd->num_msix_entries; i++) { 820 + for (i = 0; i < dd->msix_info.max_requested; i++) { 821 821 struct hfi1_msix_entry *other_msix; 822 822 823 - other_msix = &dd->msix_entries[i]; 823 + other_msix = &dd->msix_info.msix_entries[i]; 824 824 if (other_msix->type != IRQ_SDMA || other_msix == msix) 825 825 continue; 826 826
+139 -357
drivers/infiniband/hw/hfi1/chip.c
··· 67 67 #include "debugfs.h" 68 68 #include "fault.h" 69 69 70 - #define NUM_IB_PORTS 1 71 - 72 70 uint kdeth_qp; 73 71 module_param_named(kdeth_qp, kdeth_qp, uint, S_IRUGO); 74 72 MODULE_PARM_DESC(kdeth_qp, "Set the KDETH queue pair prefix"); ··· 1098 1100 const char *desc; 1099 1101 }; 1100 1102 1101 - #define NUM_MISC_ERRS (IS_GENERAL_ERR_END - IS_GENERAL_ERR_START) 1102 - #define NUM_DC_ERRS (IS_DC_END - IS_DC_START) 1103 - #define NUM_VARIOUS (IS_VARIOUS_END - IS_VARIOUS_START) 1103 + #define NUM_MISC_ERRS (IS_GENERAL_ERR_END + 1 - IS_GENERAL_ERR_START) 1104 + #define NUM_DC_ERRS (IS_DC_END + 1 - IS_DC_START) 1105 + #define NUM_VARIOUS (IS_VARIOUS_END + 1 - IS_VARIOUS_START) 1104 1106 1105 1107 /* 1106 1108 * Helpers for building HFI and DC error interrupt table entries. Different ··· 8179 8181 /** 8180 8182 * is_rcv_urgent_int() - User receive context urgent IRQ handler 8181 8183 * @dd: valid dd 8182 - * @source: logical IRQ source (ofse from IS_RCVURGENT_START) 8184 + * @source: logical IRQ source (offset from IS_RCVURGENT_START) 8183 8185 * 8184 8186 * RX block receive urgent interrupt. Source is < 160. 8185 8187 * ··· 8229 8231 is_sdma_eng_err_name, is_sdma_eng_err_int }, 8230 8232 { IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END, 8231 8233 is_sendctxt_err_name, is_sendctxt_err_int }, 8232 - { IS_SDMA_START, IS_SDMA_END, 8234 + { IS_SDMA_START, IS_SDMA_IDLE_END, 8233 8235 is_sdma_eng_name, is_sdma_eng_int }, 8234 8236 { IS_VARIOUS_START, IS_VARIOUS_END, 8235 8237 is_various_name, is_various_int }, ··· 8255 8257 8256 8258 /* avoids a double compare by walking the table in-order */ 8257 8259 for (entry = &is_table[0]; entry->is_name; entry++) { 8258 - if (source < entry->end) { 8260 + if (source <= entry->end) { 8259 8261 trace_hfi1_interrupt(dd, entry, source); 8260 8262 entry->is_int(dd, source - entry->start); 8261 8263 return; ··· 8274 8276 * context DATA IRQs are threaded and are not supported by this handler. 8275 8277 * 8276 8278 */ 8277 - static irqreturn_t general_interrupt(int irq, void *data) 8279 + irqreturn_t general_interrupt(int irq, void *data) 8278 8280 { 8279 8281 struct hfi1_devdata *dd = data; 8280 8282 u64 regs[CCE_NUM_INT_CSRS]; ··· 8307 8309 return handled; 8308 8310 } 8309 8311 8310 - static irqreturn_t sdma_interrupt(int irq, void *data) 8312 + irqreturn_t sdma_interrupt(int irq, void *data) 8311 8313 { 8312 8314 struct sdma_engine *sde = data; 8313 8315 struct hfi1_devdata *dd = sde->dd; ··· 8399 8401 * invoked) is finished. The intent is to avoid extra interrupts while we 8400 8402 * are processing packets anyway. 8401 8403 */ 8402 - static irqreturn_t receive_context_interrupt(int irq, void *data) 8404 + irqreturn_t receive_context_interrupt(int irq, void *data) 8403 8405 { 8404 8406 struct hfi1_ctxtdata *rcd = data; 8405 8407 struct hfi1_devdata *dd = rcd->dd; ··· 8439 8441 * Receive packet thread handler. This expects to be invoked with the 8440 8442 * receive interrupt still blocked. 8441 8443 */ 8442 - static irqreturn_t receive_context_thread(int irq, void *data) 8444 + irqreturn_t receive_context_thread(int irq, void *data) 8443 8445 { 8444 8446 struct hfi1_ctxtdata *rcd = data; 8445 8447 int present; ··· 9649 9651 } 9650 9652 } 9651 9653 9652 - static void init_qsfp_int(struct hfi1_devdata *dd) 9654 + void init_qsfp_int(struct hfi1_devdata *dd) 9653 9655 { 9654 9656 struct hfi1_pportdata *ppd = dd->pport; 9655 - u64 qsfp_mask, cce_int_mask; 9656 - const int qsfp1_int_smask = QSFP1_INT % 64; 9657 - const int qsfp2_int_smask = QSFP2_INT % 64; 9658 - 9659 - /* 9660 - * disable QSFP1 interrupts for HFI1, QSFP2 interrupts for HFI0 9661 - * Qsfp1Int and Qsfp2Int are adjacent bits in the same CSR, 9662 - * therefore just one of QSFP1_INT/QSFP2_INT can be used to find 9663 - * the index of the appropriate CSR in the CCEIntMask CSR array 9664 - */ 9665 - cce_int_mask = read_csr(dd, CCE_INT_MASK + 9666 - (8 * (QSFP1_INT / 64))); 9667 - if (dd->hfi1_id) { 9668 - cce_int_mask &= ~((u64)1 << qsfp1_int_smask); 9669 - write_csr(dd, CCE_INT_MASK + (8 * (QSFP1_INT / 64)), 9670 - cce_int_mask); 9671 - } else { 9672 - cce_int_mask &= ~((u64)1 << qsfp2_int_smask); 9673 - write_csr(dd, CCE_INT_MASK + (8 * (QSFP2_INT / 64)), 9674 - cce_int_mask); 9675 - } 9657 + u64 qsfp_mask; 9676 9658 9677 9659 qsfp_mask = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N); 9678 9660 /* Clear current status to avoid spurious interrupts */ ··· 9669 9691 write_csr(dd, 9670 9692 dd->hfi1_id ? ASIC_QSFP2_INVERT : ASIC_QSFP1_INVERT, 9671 9693 qsfp_mask); 9694 + 9695 + /* Enable the appropriate QSFP IRQ source */ 9696 + if (!dd->hfi1_id) 9697 + set_intr_bits(dd, QSFP1_INT, QSFP1_INT, true); 9698 + else 9699 + set_intr_bits(dd, QSFP2_INT, QSFP2_INT, true); 9672 9700 } 9673 9701 9674 9702 /* ··· 10561 10577 } 10562 10578 } 10563 10579 10564 - /* 10565 - * Verify if BCT for data VLs is non-zero. 10580 + /** 10581 + * data_vls_operational() - Verify if data VL BCT credits and MTU 10582 + * are both set. 10583 + * @ppd: pointer to hfi1_pportdata structure 10584 + * 10585 + * Return: true - Ok, false -otherwise. 10566 10586 */ 10567 10587 static inline bool data_vls_operational(struct hfi1_pportdata *ppd) 10568 10588 { 10569 - return !!ppd->actual_vls_operational; 10589 + int i; 10590 + u64 reg; 10591 + 10592 + if (!ppd->actual_vls_operational) 10593 + return false; 10594 + 10595 + for (i = 0; i < ppd->vls_supported; i++) { 10596 + reg = read_csr(ppd->dd, SEND_CM_CREDIT_VL + (8 * i)); 10597 + if ((reg && !ppd->dd->vld[i].mtu) || 10598 + (!reg && ppd->dd->vld[i].mtu)) 10599 + return false; 10600 + } 10601 + 10602 + return true; 10570 10603 } 10571 10604 10572 10605 /* ··· 10696 10695 10697 10696 if (!data_vls_operational(ppd)) { 10698 10697 dd_dev_err(dd, 10699 - "%s: data VLs not operational\n", __func__); 10698 + "%s: Invalid data VL credits or mtu\n", 10699 + __func__); 10700 10700 ret = -EINVAL; 10701 10701 break; 10702 10702 } ··· 11934 11932 11935 11933 rcvctrl &= ~RCV_CTXT_CTRL_ENABLE_SMASK; 11936 11934 } 11937 - if (op & HFI1_RCVCTRL_INTRAVAIL_ENB) 11935 + if (op & HFI1_RCVCTRL_INTRAVAIL_ENB) { 11936 + set_intr_bits(dd, IS_RCVAVAIL_START + rcd->ctxt, 11937 + IS_RCVAVAIL_START + rcd->ctxt, true); 11938 11938 rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK; 11939 - if (op & HFI1_RCVCTRL_INTRAVAIL_DIS) 11939 + } 11940 + if (op & HFI1_RCVCTRL_INTRAVAIL_DIS) { 11941 + set_intr_bits(dd, IS_RCVAVAIL_START + rcd->ctxt, 11942 + IS_RCVAVAIL_START + rcd->ctxt, false); 11940 11943 rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK; 11944 + } 11941 11945 if ((op & HFI1_RCVCTRL_TAILUPD_ENB) && rcd->rcvhdrtail_kvaddr) 11942 11946 rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK; 11943 11947 if (op & HFI1_RCVCTRL_TAILUPD_DIS) { ··· 11973 11965 rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK; 11974 11966 if (op & HFI1_RCVCTRL_NO_EGR_DROP_DIS) 11975 11967 rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK; 11968 + if (op & HFI1_RCVCTRL_URGENT_ENB) 11969 + set_intr_bits(dd, IS_RCVURGENT_START + rcd->ctxt, 11970 + IS_RCVURGENT_START + rcd->ctxt, true); 11971 + if (op & HFI1_RCVCTRL_URGENT_DIS) 11972 + set_intr_bits(dd, IS_RCVURGENT_START + rcd->ctxt, 11973 + IS_RCVURGENT_START + rcd->ctxt, false); 11974 + 11976 11975 hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx\n", ctxt, rcvctrl); 11977 11976 write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcvctrl); 11978 11977 ··· 12978 12963 return ret; 12979 12964 } 12980 12965 12981 - /** 12982 - * get_int_mask - get 64 bit int mask 12983 - * @dd - the devdata 12984 - * @i - the csr (relative to CCE_INT_MASK) 12985 - * 12986 - * Returns the mask with the urgent interrupt mask 12987 - * bit clear for kernel receive contexts. 12988 - */ 12989 - static u64 get_int_mask(struct hfi1_devdata *dd, u32 i) 12990 - { 12991 - u64 mask = U64_MAX; /* default to no change */ 12992 - 12993 - if (i >= (IS_RCVURGENT_START / 64) && i < (IS_RCVURGENT_END / 64)) { 12994 - int j = (i - (IS_RCVURGENT_START / 64)) * 64; 12995 - int k = !j ? IS_RCVURGENT_START % 64 : 0; 12996 - 12997 - if (j) 12998 - j -= IS_RCVURGENT_START % 64; 12999 - /* j = 0..dd->first_dyn_alloc_ctxt - 1,k = 0..63 */ 13000 - for (; j < dd->first_dyn_alloc_ctxt && k < 64; j++, k++) 13001 - /* convert to bit in mask and clear */ 13002 - mask &= ~BIT_ULL(k); 13003 - } 13004 - return mask; 13005 - } 13006 - 13007 12966 /* ========================================================================= */ 13008 12967 13009 - /* 13010 - * Enable/disable chip from delivering interrupts. 12968 + /** 12969 + * read_mod_write() - Calculate the IRQ register index and set/clear the bits 12970 + * @dd: valid devdata 12971 + * @src: IRQ source to determine register index from 12972 + * @bits: the bits to set or clear 12973 + * @set: true == set the bits, false == clear the bits 12974 + * 13011 12975 */ 13012 - void set_intr_state(struct hfi1_devdata *dd, u32 enable) 12976 + static void read_mod_write(struct hfi1_devdata *dd, u16 src, u64 bits, 12977 + bool set) 13013 12978 { 13014 - int i; 12979 + u64 reg; 12980 + u16 idx = src / BITS_PER_REGISTER; 13015 12981 13016 - /* 13017 - * In HFI, the mask needs to be 1 to allow interrupts. 13018 - */ 13019 - if (enable) { 13020 - /* enable all interrupts but urgent on kernel contexts */ 13021 - for (i = 0; i < CCE_NUM_INT_CSRS; i++) { 13022 - u64 mask = get_int_mask(dd, i); 12982 + spin_lock(&dd->irq_src_lock); 12983 + reg = read_csr(dd, CCE_INT_MASK + (8 * idx)); 12984 + if (set) 12985 + reg |= bits; 12986 + else 12987 + reg &= ~bits; 12988 + write_csr(dd, CCE_INT_MASK + (8 * idx), reg); 12989 + spin_unlock(&dd->irq_src_lock); 12990 + } 13023 12991 13024 - write_csr(dd, CCE_INT_MASK + (8 * i), mask); 12992 + /** 12993 + * set_intr_bits() - Enable/disable a range (one or more) IRQ sources 12994 + * @dd: valid devdata 12995 + * @first: first IRQ source to set/clear 12996 + * @last: last IRQ source (inclusive) to set/clear 12997 + * @set: true == set the bits, false == clear the bits 12998 + * 12999 + * If first == last, set the exact source. 13000 + */ 13001 + int set_intr_bits(struct hfi1_devdata *dd, u16 first, u16 last, bool set) 13002 + { 13003 + u64 bits = 0; 13004 + u64 bit; 13005 + u16 src; 13006 + 13007 + if (first > NUM_INTERRUPT_SOURCES || last > NUM_INTERRUPT_SOURCES) 13008 + return -EINVAL; 13009 + 13010 + if (last < first) 13011 + return -ERANGE; 13012 + 13013 + for (src = first; src <= last; src++) { 13014 + bit = src % BITS_PER_REGISTER; 13015 + /* wrapped to next register? */ 13016 + if (!bit && bits) { 13017 + read_mod_write(dd, src - 1, bits, set); 13018 + bits = 0; 13025 13019 } 13026 - 13027 - init_qsfp_int(dd); 13028 - } else { 13029 - for (i = 0; i < CCE_NUM_INT_CSRS; i++) 13030 - write_csr(dd, CCE_INT_MASK + (8 * i), 0ull); 13020 + bits |= BIT_ULL(bit); 13031 13021 } 13022 + read_mod_write(dd, last, bits, set); 13023 + 13024 + return 0; 13032 13025 } 13033 13026 13034 13027 /* 13035 13028 * Clear all interrupt sources on the chip. 13036 13029 */ 13037 - static void clear_all_interrupts(struct hfi1_devdata *dd) 13030 + void clear_all_interrupts(struct hfi1_devdata *dd) 13038 13031 { 13039 13032 int i; 13040 13033 ··· 13066 13043 write_csr(dd, DC_DC8051_ERR_CLR, ~(u64)0); 13067 13044 } 13068 13045 13069 - /** 13070 - * hfi1_clean_up_interrupts() - Free all IRQ resources 13071 - * @dd: valid device data data structure 13072 - * 13073 - * Free the MSIx and assoicated PCI resources, if they have been allocated. 13074 - */ 13075 - void hfi1_clean_up_interrupts(struct hfi1_devdata *dd) 13076 - { 13077 - int i; 13078 - struct hfi1_msix_entry *me = dd->msix_entries; 13079 - 13080 - /* remove irqs - must happen before disabling/turning off */ 13081 - for (i = 0; i < dd->num_msix_entries; i++, me++) { 13082 - if (!me->arg) /* => no irq, no affinity */ 13083 - continue; 13084 - hfi1_put_irq_affinity(dd, me); 13085 - pci_free_irq(dd->pcidev, i, me->arg); 13086 - } 13087 - 13088 - /* clean structures */ 13089 - kfree(dd->msix_entries); 13090 - dd->msix_entries = NULL; 13091 - dd->num_msix_entries = 0; 13092 - 13093 - pci_free_irq_vectors(dd->pcidev); 13094 - } 13095 - 13096 13046 /* 13097 13047 * Remap the interrupt source from the general handler to the given MSI-X 13098 13048 * interrupt. 13099 13049 */ 13100 - static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr) 13050 + void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr) 13101 13051 { 13102 13052 u64 reg; 13103 13053 int m, n; ··· 13094 13098 write_csr(dd, CCE_INT_MAP + (8 * m), reg); 13095 13099 } 13096 13100 13097 - static void remap_sdma_interrupts(struct hfi1_devdata *dd, 13098 - int engine, int msix_intr) 13101 + void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr) 13099 13102 { 13100 13103 /* 13101 13104 * SDMA engine interrupt sources grouped by type, rather than ··· 13103 13108 * SDMAProgress 13104 13109 * SDMAIdle 13105 13110 */ 13106 - remap_intr(dd, IS_SDMA_START + 0 * TXE_NUM_SDMA_ENGINES + engine, 13107 - msix_intr); 13108 - remap_intr(dd, IS_SDMA_START + 1 * TXE_NUM_SDMA_ENGINES + engine, 13109 - msix_intr); 13110 - remap_intr(dd, IS_SDMA_START + 2 * TXE_NUM_SDMA_ENGINES + engine, 13111 - msix_intr); 13112 - } 13113 - 13114 - static int request_msix_irqs(struct hfi1_devdata *dd) 13115 - { 13116 - int first_general, last_general; 13117 - int first_sdma, last_sdma; 13118 - int first_rx, last_rx; 13119 - int i, ret = 0; 13120 - 13121 - /* calculate the ranges we are going to use */ 13122 - first_general = 0; 13123 - last_general = first_general + 1; 13124 - first_sdma = last_general; 13125 - last_sdma = first_sdma + dd->num_sdma; 13126 - first_rx = last_sdma; 13127 - last_rx = first_rx + dd->n_krcv_queues + dd->num_vnic_contexts; 13128 - 13129 - /* VNIC MSIx interrupts get mapped when VNIC contexts are created */ 13130 - dd->first_dyn_msix_idx = first_rx + dd->n_krcv_queues; 13131 - 13132 - /* 13133 - * Sanity check - the code expects all SDMA chip source 13134 - * interrupts to be in the same CSR, starting at bit 0. Verify 13135 - * that this is true by checking the bit location of the start. 13136 - */ 13137 - BUILD_BUG_ON(IS_SDMA_START % 64); 13138 - 13139 - for (i = 0; i < dd->num_msix_entries; i++) { 13140 - struct hfi1_msix_entry *me = &dd->msix_entries[i]; 13141 - const char *err_info; 13142 - irq_handler_t handler; 13143 - irq_handler_t thread = NULL; 13144 - void *arg = NULL; 13145 - int idx; 13146 - struct hfi1_ctxtdata *rcd = NULL; 13147 - struct sdma_engine *sde = NULL; 13148 - char name[MAX_NAME_SIZE]; 13149 - 13150 - /* obtain the arguments to pci_request_irq */ 13151 - if (first_general <= i && i < last_general) { 13152 - idx = i - first_general; 13153 - handler = general_interrupt; 13154 - arg = dd; 13155 - snprintf(name, sizeof(name), 13156 - DRIVER_NAME "_%d", dd->unit); 13157 - err_info = "general"; 13158 - me->type = IRQ_GENERAL; 13159 - } else if (first_sdma <= i && i < last_sdma) { 13160 - idx = i - first_sdma; 13161 - sde = &dd->per_sdma[idx]; 13162 - handler = sdma_interrupt; 13163 - arg = sde; 13164 - snprintf(name, sizeof(name), 13165 - DRIVER_NAME "_%d sdma%d", dd->unit, idx); 13166 - err_info = "sdma"; 13167 - remap_sdma_interrupts(dd, idx, i); 13168 - me->type = IRQ_SDMA; 13169 - } else if (first_rx <= i && i < last_rx) { 13170 - idx = i - first_rx; 13171 - rcd = hfi1_rcd_get_by_index_safe(dd, idx); 13172 - if (rcd) { 13173 - /* 13174 - * Set the interrupt register and mask for this 13175 - * context's interrupt. 13176 - */ 13177 - rcd->ireg = (IS_RCVAVAIL_START + idx) / 64; 13178 - rcd->imask = ((u64)1) << 13179 - ((IS_RCVAVAIL_START + idx) % 64); 13180 - handler = receive_context_interrupt; 13181 - thread = receive_context_thread; 13182 - arg = rcd; 13183 - snprintf(name, sizeof(name), 13184 - DRIVER_NAME "_%d kctxt%d", 13185 - dd->unit, idx); 13186 - err_info = "receive context"; 13187 - remap_intr(dd, IS_RCVAVAIL_START + idx, i); 13188 - me->type = IRQ_RCVCTXT; 13189 - rcd->msix_intr = i; 13190 - hfi1_rcd_put(rcd); 13191 - } 13192 - } else { 13193 - /* not in our expected range - complain, then 13194 - * ignore it 13195 - */ 13196 - dd_dev_err(dd, 13197 - "Unexpected extra MSI-X interrupt %d\n", i); 13198 - continue; 13199 - } 13200 - /* no argument, no interrupt */ 13201 - if (!arg) 13202 - continue; 13203 - /* make sure the name is terminated */ 13204 - name[sizeof(name) - 1] = 0; 13205 - me->irq = pci_irq_vector(dd->pcidev, i); 13206 - ret = pci_request_irq(dd->pcidev, i, handler, thread, arg, 13207 - name); 13208 - if (ret) { 13209 - dd_dev_err(dd, 13210 - "unable to allocate %s interrupt, irq %d, index %d, err %d\n", 13211 - err_info, me->irq, idx, ret); 13212 - return ret; 13213 - } 13214 - /* 13215 - * assign arg after pci_request_irq call, so it will be 13216 - * cleaned up 13217 - */ 13218 - me->arg = arg; 13219 - 13220 - ret = hfi1_get_irq_affinity(dd, me); 13221 - if (ret) 13222 - dd_dev_err(dd, "unable to pin IRQ %d\n", ret); 13223 - } 13224 - 13225 - return ret; 13226 - } 13227 - 13228 - void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd) 13229 - { 13230 - int i; 13231 - 13232 - for (i = 0; i < dd->vnic.num_ctxt; i++) { 13233 - struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i]; 13234 - struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr]; 13235 - 13236 - synchronize_irq(me->irq); 13237 - } 13238 - } 13239 - 13240 - void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd) 13241 - { 13242 - struct hfi1_devdata *dd = rcd->dd; 13243 - struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr]; 13244 - 13245 - if (!me->arg) /* => no irq, no affinity */ 13246 - return; 13247 - 13248 - hfi1_put_irq_affinity(dd, me); 13249 - pci_free_irq(dd->pcidev, rcd->msix_intr, me->arg); 13250 - 13251 - me->arg = NULL; 13252 - } 13253 - 13254 - void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd) 13255 - { 13256 - struct hfi1_devdata *dd = rcd->dd; 13257 - struct hfi1_msix_entry *me; 13258 - int idx = rcd->ctxt; 13259 - void *arg = rcd; 13260 - int ret; 13261 - 13262 - rcd->msix_intr = dd->vnic.msix_idx++; 13263 - me = &dd->msix_entries[rcd->msix_intr]; 13264 - 13265 - /* 13266 - * Set the interrupt register and mask for this 13267 - * context's interrupt. 13268 - */ 13269 - rcd->ireg = (IS_RCVAVAIL_START + idx) / 64; 13270 - rcd->imask = ((u64)1) << 13271 - ((IS_RCVAVAIL_START + idx) % 64); 13272 - me->type = IRQ_RCVCTXT; 13273 - me->irq = pci_irq_vector(dd->pcidev, rcd->msix_intr); 13274 - remap_intr(dd, IS_RCVAVAIL_START + idx, rcd->msix_intr); 13275 - 13276 - ret = pci_request_irq(dd->pcidev, rcd->msix_intr, 13277 - receive_context_interrupt, 13278 - receive_context_thread, arg, 13279 - DRIVER_NAME "_%d kctxt%d", dd->unit, idx); 13280 - if (ret) { 13281 - dd_dev_err(dd, "vnic irq request (irq %d, idx %d) fail %d\n", 13282 - me->irq, idx, ret); 13283 - return; 13284 - } 13285 - /* 13286 - * assign arg after pci_request_irq call, so it will be 13287 - * cleaned up 13288 - */ 13289 - me->arg = arg; 13290 - 13291 - ret = hfi1_get_irq_affinity(dd, me); 13292 - if (ret) { 13293 - dd_dev_err(dd, 13294 - "unable to pin IRQ %d\n", ret); 13295 - pci_free_irq(dd->pcidev, rcd->msix_intr, me->arg); 13296 - } 13111 + remap_intr(dd, IS_SDMA_START + engine, msix_intr); 13112 + remap_intr(dd, IS_SDMA_PROGRESS_START + engine, msix_intr); 13113 + remap_intr(dd, IS_SDMA_IDLE_START + engine, msix_intr); 13297 13114 } 13298 13115 13299 13116 /* 13300 13117 * Set the general handler to accept all interrupts, remap all 13301 13118 * chip interrupts back to MSI-X 0. 13302 13119 */ 13303 - static void reset_interrupts(struct hfi1_devdata *dd) 13120 + void reset_interrupts(struct hfi1_devdata *dd) 13304 13121 { 13305 13122 int i; 13306 13123 ··· 13125 13318 write_csr(dd, CCE_INT_MAP + (8 * i), 0); 13126 13319 } 13127 13320 13321 + /** 13322 + * set_up_interrupts() - Initialize the IRQ resources and state 13323 + * @dd: valid devdata 13324 + * 13325 + */ 13128 13326 static int set_up_interrupts(struct hfi1_devdata *dd) 13129 13327 { 13130 - u32 total; 13131 - int ret, request; 13132 - 13133 - /* 13134 - * Interrupt count: 13135 - * 1 general, "slow path" interrupt (includes the SDMA engines 13136 - * slow source, SDMACleanupDone) 13137 - * N interrupts - one per used SDMA engine 13138 - * M interrupt - one per kernel receive context 13139 - * V interrupt - one for each VNIC context 13140 - */ 13141 - total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_vnic_contexts; 13142 - 13143 - /* ask for MSI-X interrupts */ 13144 - request = request_msix(dd, total); 13145 - if (request < 0) { 13146 - ret = request; 13147 - goto fail; 13148 - } else { 13149 - dd->msix_entries = kcalloc(total, sizeof(*dd->msix_entries), 13150 - GFP_KERNEL); 13151 - if (!dd->msix_entries) { 13152 - ret = -ENOMEM; 13153 - goto fail; 13154 - } 13155 - /* using MSI-X */ 13156 - dd->num_msix_entries = total; 13157 - dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total); 13158 - } 13328 + int ret; 13159 13329 13160 13330 /* mask all interrupts */ 13161 - set_intr_state(dd, 0); 13331 + set_intr_bits(dd, IS_FIRST_SOURCE, IS_LAST_SOURCE, false); 13332 + 13162 13333 /* clear all pending interrupts */ 13163 13334 clear_all_interrupts(dd); 13164 13335 13165 13336 /* reset general handler mask, chip MSI-X mappings */ 13166 13337 reset_interrupts(dd); 13167 13338 13168 - ret = request_msix_irqs(dd); 13339 + /* ask for MSI-X interrupts */ 13340 + ret = msix_initialize(dd); 13169 13341 if (ret) 13170 - goto fail; 13342 + return ret; 13171 13343 13172 - return 0; 13344 + ret = msix_request_irqs(dd); 13345 + if (ret) 13346 + msix_clean_up_interrupts(dd); 13173 13347 13174 - fail: 13175 - hfi1_clean_up_interrupts(dd); 13176 13348 return ret; 13177 13349 } 13178 13350 ··· 14704 14918 } 14705 14919 14706 14920 /** 14707 - * Allocate and initialize the device structure for the hfi. 14921 + * hfi1_init_dd() - Initialize most of the dd structure. 14708 14922 * @dev: the pci_dev for hfi1_ib device 14709 14923 * @ent: pci_device_id struct for this dev 14710 - * 14711 - * Also allocates, initializes, and returns the devdata struct for this 14712 - * device instance 14713 14924 * 14714 14925 * This is global, and is called directly at init to set up the 14715 14926 * chip-specific function pointers for later use. 14716 14927 */ 14717 - struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, 14718 - const struct pci_device_id *ent) 14928 + int hfi1_init_dd(struct hfi1_devdata *dd) 14719 14929 { 14720 - struct hfi1_devdata *dd; 14930 + struct pci_dev *pdev = dd->pcidev; 14721 14931 struct hfi1_pportdata *ppd; 14722 14932 u64 reg; 14723 14933 int i, ret; ··· 14724 14942 "Functional simulator" 14725 14943 }; 14726 14944 struct pci_dev *parent = pdev->bus->self; 14727 - u32 sdma_engines; 14945 + u32 sdma_engines = chip_sdma_engines(dd); 14728 14946 14729 - dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS * 14730 - sizeof(struct hfi1_pportdata)); 14731 - if (IS_ERR(dd)) 14732 - goto bail; 14733 - sdma_engines = chip_sdma_engines(dd); 14734 14947 ppd = dd->pport; 14735 14948 for (i = 0; i < dd->num_pports; i++, ppd++) { 14736 14949 int vl; ··· 14904 15127 if (ret) 14905 15128 goto bail_cleanup; 14906 15129 15130 + /* 15131 + * This should probably occur in hfi1_pcie_init(), but historically 15132 + * occurs after the do_pcie_gen3_transition() code. 15133 + */ 15134 + tune_pcie_caps(dd); 15135 + 14907 15136 /* start setting dd values and adjusting CSRs */ 14908 15137 init_early_variables(dd); 14909 15138 ··· 15022 15239 free_cntrs(dd); 15023 15240 bail_clear_intr: 15024 15241 hfi1_comp_vectors_clean_up(dd); 15025 - hfi1_clean_up_interrupts(dd); 15242 + msix_clean_up_interrupts(dd); 15026 15243 bail_cleanup: 15027 15244 hfi1_pcie_ddcleanup(dd); 15028 15245 bail_free: 15029 15246 hfi1_free_devdata(dd); 15030 - dd = ERR_PTR(ret); 15031 15247 bail: 15032 - return dd; 15248 + return ret; 15033 15249 } 15034 15250 15035 15251 static u16 delay_cycles(struct hfi1_pportdata *ppd, u32 desired_egress_rate,
+48 -23
drivers/infiniband/hw/hfi1/chip.h
··· 52 52 */ 53 53 54 54 /* sizes */ 55 - #define CCE_NUM_MSIX_VECTORS 256 56 - #define CCE_NUM_INT_CSRS 12 57 - #define CCE_NUM_INT_MAP_CSRS 96 55 + #define BITS_PER_REGISTER (BITS_PER_BYTE * sizeof(u64)) 58 56 #define NUM_INTERRUPT_SOURCES 768 59 57 #define RXE_NUM_CONTEXTS 160 60 58 #define RXE_PER_CONTEXT_SIZE 0x1000 /* 4k */ ··· 159 161 (CR_CREDIT_RETURN_DUE_TO_FORCE_MASK << \ 160 162 CR_CREDIT_RETURN_DUE_TO_FORCE_SHIFT) 161 163 162 - /* interrupt source numbers */ 163 - #define IS_GENERAL_ERR_START 0 164 - #define IS_SDMAENG_ERR_START 16 165 - #define IS_SENDCTXT_ERR_START 32 166 - #define IS_SDMA_START 192 /* includes SDmaProgress,SDmaIdle */ 164 + /* Specific IRQ sources */ 165 + #define CCE_ERR_INT 0 166 + #define RXE_ERR_INT 1 167 + #define MISC_ERR_INT 2 168 + #define PIO_ERR_INT 4 169 + #define SDMA_ERR_INT 5 170 + #define EGRESS_ERR_INT 6 171 + #define TXE_ERR_INT 7 172 + #define PBC_INT 240 173 + #define GPIO_ASSERT_INT 241 174 + #define QSFP1_INT 242 175 + #define QSFP2_INT 243 176 + #define TCRIT_INT 244 177 + 178 + /* interrupt source ranges */ 179 + #define IS_FIRST_SOURCE CCE_ERR_INT 180 + #define IS_GENERAL_ERR_START 0 181 + #define IS_SDMAENG_ERR_START 16 182 + #define IS_SENDCTXT_ERR_START 32 183 + #define IS_SDMA_START 192 184 + #define IS_SDMA_PROGRESS_START 208 185 + #define IS_SDMA_IDLE_START 224 167 186 #define IS_VARIOUS_START 240 168 187 #define IS_DC_START 248 169 188 #define IS_RCVAVAIL_START 256 170 189 #define IS_RCVURGENT_START 416 171 190 #define IS_SENDCREDIT_START 576 172 191 #define IS_RESERVED_START 736 173 - #define IS_MAX_SOURCES 768 192 + #define IS_LAST_SOURCE 767 174 193 175 194 /* derived interrupt source values */ 176 - #define IS_GENERAL_ERR_END IS_SDMAENG_ERR_START 177 - #define IS_SDMAENG_ERR_END IS_SENDCTXT_ERR_START 178 - #define IS_SENDCTXT_ERR_END IS_SDMA_START 179 - #define IS_SDMA_END IS_VARIOUS_START 180 - #define IS_VARIOUS_END IS_DC_START 181 - #define IS_DC_END IS_RCVAVAIL_START 182 - #define IS_RCVAVAIL_END IS_RCVURGENT_START 183 - #define IS_RCVURGENT_END IS_SENDCREDIT_START 184 - #define IS_SENDCREDIT_END IS_RESERVED_START 185 - #define IS_RESERVED_END IS_MAX_SOURCES 186 - 187 - /* absolute interrupt numbers for QSFP1Int and QSFP2Int */ 188 - #define QSFP1_INT 242 189 - #define QSFP2_INT 243 195 + #define IS_GENERAL_ERR_END 7 196 + #define IS_SDMAENG_ERR_END 31 197 + #define IS_SENDCTXT_ERR_END 191 198 + #define IS_SDMA_END 207 199 + #define IS_SDMA_PROGRESS_END 223 200 + #define IS_SDMA_IDLE_END 239 201 + #define IS_VARIOUS_END 244 202 + #define IS_DC_END 255 203 + #define IS_RCVAVAIL_END 415 204 + #define IS_RCVURGENT_END 575 205 + #define IS_SENDCREDIT_END 735 206 + #define IS_RESERVED_END IS_LAST_SOURCE 190 207 191 208 /* DCC_CFG_PORT_CONFIG logical link states */ 192 209 #define LSTATE_DOWN 0x1 ··· 1428 1415 void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality); 1429 1416 void hfi1_init_vnic_rsm(struct hfi1_devdata *dd); 1430 1417 void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd); 1418 + 1419 + irqreturn_t general_interrupt(int irq, void *data); 1420 + irqreturn_t sdma_interrupt(int irq, void *data); 1421 + irqreturn_t receive_context_interrupt(int irq, void *data); 1422 + irqreturn_t receive_context_thread(int irq, void *data); 1423 + 1424 + int set_intr_bits(struct hfi1_devdata *dd, u16 first, u16 last, bool set); 1425 + void init_qsfp_int(struct hfi1_devdata *dd); 1426 + void clear_all_interrupts(struct hfi1_devdata *dd); 1427 + void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr); 1428 + void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr); 1429 + void reset_interrupts(struct hfi1_devdata *dd); 1431 1430 1432 1431 /* 1433 1432 * Interrupt source table.
+4
drivers/infiniband/hw/hfi1/chip_registers.h
··· 878 878 #define SEND_CTRL (TXE + 0x000000000000) 879 879 #define SEND_CTRL_CM_RESET_SMASK 0x4ull 880 880 #define SEND_CTRL_SEND_ENABLE_SMASK 0x1ull 881 + #define SEND_CTRL_UNSUPPORTED_VL_SHIFT 3 882 + #define SEND_CTRL_UNSUPPORTED_VL_MASK 0xFFull 883 + #define SEND_CTRL_UNSUPPORTED_VL_SMASK (SEND_CTRL_UNSUPPORTED_VL_MASK \ 884 + << SEND_CTRL_UNSUPPORTED_VL_SHIFT) 881 885 #define SEND_CTRL_VL_ARBITER_ENABLE_SMASK 0x2ull 882 886 #define SEND_CTXT_CHECK_ENABLE (TXE + 0x000000100080) 883 887 #define SEND_CTXT_CHECK_ENABLE_CHECK_BYPASS_VL_MAPPING_SMASK 0x80ull
+3 -1
drivers/infiniband/hw/hfi1/file_ops.c
··· 681 681 HFI1_RCVCTRL_TAILUPD_DIS | 682 682 HFI1_RCVCTRL_ONE_PKT_EGR_DIS | 683 683 HFI1_RCVCTRL_NO_RHQ_DROP_DIS | 684 - HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt); 684 + HFI1_RCVCTRL_NO_EGR_DROP_DIS | 685 + HFI1_RCVCTRL_URGENT_DIS, uctxt); 685 686 /* Clear the context's J_KEY */ 686 687 hfi1_clear_ctxt_jkey(dd, uctxt); 687 688 /* ··· 1097 1096 hfi1_set_ctxt_jkey(uctxt->dd, uctxt, uctxt->jkey); 1098 1097 1099 1098 rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB; 1099 + rcvctrl_ops |= HFI1_RCVCTRL_URGENT_ENB; 1100 1100 if (HFI1_CAP_UGET_MASK(uctxt->flags, HDRSUPP)) 1101 1101 rcvctrl_ops |= HFI1_RCVCTRL_TIDFLOW_ENB; 1102 1102 /*
+20 -28
drivers/infiniband/hw/hfi1/hfi.h
··· 80 80 #include "qsfp.h" 81 81 #include "platform.h" 82 82 #include "affinity.h" 83 + #include "msix.h" 83 84 84 85 /* bumped 1 from s/w major version of TrueScale */ 85 86 #define HFI1_CHIP_VERS_MAJ 3U ··· 621 620 #define HFI1_RCVCTRL_NO_RHQ_DROP_DIS 0x8000 622 621 #define HFI1_RCVCTRL_NO_EGR_DROP_ENB 0x10000 623 622 #define HFI1_RCVCTRL_NO_EGR_DROP_DIS 0x20000 623 + #define HFI1_RCVCTRL_URGENT_ENB 0x40000 624 + #define HFI1_RCVCTRL_URGENT_DIS 0x80000 624 625 625 626 /* partition enforcement flags */ 626 627 #define HFI1_PART_ENFORCE_IN 0x1 ··· 668 665 void *arg; 669 666 cpumask_t mask; 670 667 struct irq_affinity_notify notify; 668 + }; 669 + 670 + struct hfi1_msix_info { 671 + /* lock to synchronize in_use_msix access */ 672 + spinlock_t msix_lock; 673 + DECLARE_BITMAP(in_use_msix, CCE_NUM_MSIX_VECTORS); 674 + struct hfi1_msix_entry *msix_entries; 675 + u16 max_requested; 671 676 }; 672 677 673 678 /* per-SL CCA information */ ··· 1003 992 struct idr vesw_idr; 1004 993 u8 rmt_start; 1005 994 u8 num_ctxt; 1006 - u32 msix_idx; 1007 995 }; 1008 996 1009 997 struct hfi1_vnic_vport_info; ··· 1215 1205 1216 1206 struct diag_client *diag_client; 1217 1207 1218 - /* MSI-X information */ 1219 - struct hfi1_msix_entry *msix_entries; 1220 - u32 num_msix_entries; 1221 - u32 first_dyn_msix_idx; 1222 - 1223 1208 /* general interrupt: mask of handled interrupts */ 1224 1209 u64 gi_mask[CCE_NUM_INT_CSRS]; 1225 1210 ··· 1227 1222 * 64 bit synthetic counters 1228 1223 */ 1229 1224 struct timer_list synth_stats_timer; 1225 + 1226 + /* MSI-X information */ 1227 + struct hfi1_msix_info msix_info; 1230 1228 1231 1229 /* 1232 1230 * device counters ··· 1357 1349 1358 1350 /* vnic data */ 1359 1351 struct hfi1_vnic_data vnic; 1352 + /* Lock to protect IRQ SRC register access */ 1353 + spinlock_t irq_src_lock; 1360 1354 }; 1361 1355 1362 1356 static inline bool hfi1_vnic_is_rsm_full(struct hfi1_devdata *dd, int spare) ··· 1441 1431 int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread); 1442 1432 int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread); 1443 1433 void set_all_slowpath(struct hfi1_devdata *dd); 1444 - void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd); 1445 - void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd); 1446 - void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd); 1447 1434 1448 1435 extern const struct pci_device_id hfi1_pci_tbl[]; 1449 1436 void hfi1_make_ud_req_9B(struct rvt_qp *qp, ··· 1894 1887 #define HFI1_CTXT_WAITING_URG 4 1895 1888 1896 1889 /* free up any allocated data at closes */ 1897 - struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, 1898 - const struct pci_device_id *ent); 1890 + int hfi1_init_dd(struct hfi1_devdata *dd); 1899 1891 void hfi1_free_devdata(struct hfi1_devdata *dd); 1900 - struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra); 1901 1892 1902 1893 /* LED beaconing functions */ 1903 1894 void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon, ··· 1968 1963 */ 1969 1964 1970 1965 extern const char ib_hfi1_version[]; 1966 + extern const struct attribute_group ib_hfi1_attr_group; 1971 1967 1972 1968 int hfi1_device_create(struct hfi1_devdata *dd); 1973 1969 void hfi1_device_remove(struct hfi1_devdata *dd); ··· 1980 1974 /* Hook for sysfs read of QSFP */ 1981 1975 int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len); 1982 1976 1983 - int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent); 1984 - void hfi1_clean_up_interrupts(struct hfi1_devdata *dd); 1977 + int hfi1_pcie_init(struct hfi1_devdata *dd); 1985 1978 void hfi1_pcie_cleanup(struct pci_dev *pdev); 1986 1979 int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev); 1987 1980 void hfi1_pcie_ddcleanup(struct hfi1_devdata *); 1988 1981 int pcie_speeds(struct hfi1_devdata *dd); 1989 - int request_msix(struct hfi1_devdata *dd, u32 msireq); 1990 1982 int restore_pci_variables(struct hfi1_devdata *dd); 1991 1983 int save_pci_variables(struct hfi1_devdata *dd); 1992 1984 int do_pcie_gen3_transition(struct hfi1_devdata *dd); 1985 + void tune_pcie_caps(struct hfi1_devdata *dd); 1993 1986 int parse_platform_config(struct hfi1_devdata *dd); 1994 1987 int get_platform_config_field(struct hfi1_devdata *dd, 1995 1988 enum platform_config_table_type_encoding ··· 2128 2123 2129 2124 return base_sdma_integrity; 2130 2125 } 2131 - 2132 - /* 2133 - * hfi1_early_err is used (only!) to print early errors before devdata is 2134 - * allocated, or when dd->pcidev may not be valid, and at the tail end of 2135 - * cleanup when devdata may have been freed, etc. hfi1_dev_porterr is 2136 - * the same as dd_dev_err, but is used when the message really needs 2137 - * the IB port# to be definitive as to what's happening.. 2138 - */ 2139 - #define hfi1_early_err(dev, fmt, ...) \ 2140 - dev_err(dev, fmt, ##__VA_ARGS__) 2141 - 2142 - #define hfi1_early_info(dev, fmt, ...) \ 2143 - dev_info(dev, fmt, ##__VA_ARGS__) 2144 2126 2145 2127 #define dd_dev_emerg(dd, fmt, ...) \ 2146 2128 dev_emerg(&(dd)->pcidev->dev, "%s: " fmt, \
+67 -46
drivers/infiniband/hw/hfi1/init.c
··· 83 83 #define HFI1_MIN_EAGER_BUFFER_SIZE (4 * 1024) /* 4KB */ 84 84 #define HFI1_MAX_EAGER_BUFFER_SIZE (256 * 1024) /* 256KB */ 85 85 86 + #define NUM_IB_PORTS 1 87 + 86 88 /* 87 89 * Number of user receive contexts we are configured to use (to allow for more 88 90 * pio buffers per ctxt, etc.) Zero means use one user context per CPU. ··· 656 654 ppd->part_enforce |= HFI1_PART_ENFORCE_IN; 657 655 658 656 if (loopback) { 659 - hfi1_early_err(&pdev->dev, 660 - "Faking data partition 0x8001 in idx %u\n", 661 - !default_pkey_idx); 657 + dd_dev_err(dd, "Faking data partition 0x8001 in idx %u\n", 658 + !default_pkey_idx); 662 659 ppd->pkeys[!default_pkey_idx] = 0x8001; 663 660 } 664 661 ··· 703 702 return; 704 703 705 704 bail: 706 - 707 - hfi1_early_err(&pdev->dev, 708 - "Congestion Control Agent disabled for port %d\n", port); 705 + dd_dev_err(dd, "Congestion Control Agent disabled for port %d\n", port); 709 706 } 710 707 711 708 /* ··· 832 833 } 833 834 834 835 /** 836 + * enable_general_intr() - Enable the IRQs that will be handled by the 837 + * general interrupt handler. 838 + * @dd: valid devdata 839 + * 840 + */ 841 + static void enable_general_intr(struct hfi1_devdata *dd) 842 + { 843 + set_intr_bits(dd, CCE_ERR_INT, MISC_ERR_INT, true); 844 + set_intr_bits(dd, PIO_ERR_INT, TXE_ERR_INT, true); 845 + set_intr_bits(dd, IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END, true); 846 + set_intr_bits(dd, PBC_INT, GPIO_ASSERT_INT, true); 847 + set_intr_bits(dd, TCRIT_INT, TCRIT_INT, true); 848 + set_intr_bits(dd, IS_DC_START, IS_DC_END, true); 849 + set_intr_bits(dd, IS_SENDCREDIT_START, IS_SENDCREDIT_END, true); 850 + } 851 + 852 + /** 835 853 * hfi1_init - do the actual initialization sequence on the chip 836 854 * @dd: the hfi1_ib device 837 855 * @reinit: re-initializing, so don't allocate new memory ··· 932 916 "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n"); 933 917 ret = lastfail; 934 918 } 919 + /* enable IRQ */ 935 920 hfi1_rcd_put(rcd); 936 921 } 937 922 ··· 971 954 HFI1_STATUS_INITTED; 972 955 if (!ret) { 973 956 /* enable all interrupts from the chip */ 974 - set_intr_state(dd, 1); 957 + enable_general_intr(dd); 958 + init_qsfp_int(dd); 975 959 976 960 /* chip is OK for user apps; mark it as initialized */ 977 961 for (pidx = 0; pidx < dd->num_pports; ++pidx) { ··· 1069 1051 } 1070 1052 dd->flags &= ~HFI1_INITTED; 1071 1053 1072 - /* mask and clean up interrupts, but not errors */ 1073 - set_intr_state(dd, 0); 1074 - hfi1_clean_up_interrupts(dd); 1054 + /* mask and clean up interrupts */ 1055 + set_intr_bits(dd, IS_FIRST_SOURCE, IS_LAST_SOURCE, false); 1056 + msix_clean_up_interrupts(dd); 1075 1057 1076 1058 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 1077 1059 ppd = dd->pport + pidx; ··· 1264 1246 kobject_put(&dd->kobj); 1265 1247 } 1266 1248 1267 - /* 1268 - * Allocate our primary per-unit data structure. Must be done via verbs 1269 - * allocator, because the verbs cleanup process both does cleanup and 1270 - * free of the data structure. 1249 + /** 1250 + * hfi1_alloc_devdata - Allocate our primary per-unit data structure. 1251 + * @pdev: Valid PCI device 1252 + * @extra: How many bytes to alloc past the default 1253 + * 1254 + * Must be done via verbs allocator, because the verbs cleanup process 1255 + * both does cleanup and free of the data structure. 1271 1256 * "extra" is for chip-specific data. 1272 1257 * 1273 1258 * Use the idr mechanism to get a unit number for this unit. 1274 1259 */ 1275 - struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) 1260 + static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, 1261 + size_t extra) 1276 1262 { 1277 1263 unsigned long flags; 1278 1264 struct hfi1_devdata *dd; ··· 1309 1287 idr_preload_end(); 1310 1288 1311 1289 if (ret < 0) { 1312 - hfi1_early_err(&pdev->dev, 1313 - "Could not allocate unit ID: error %d\n", -ret); 1290 + dev_err(&pdev->dev, 1291 + "Could not allocate unit ID: error %d\n", -ret); 1314 1292 goto bail; 1315 1293 } 1316 1294 rvt_set_ibdev_name(&dd->verbs_dev.rdi, "%s_%d", class_name(), dd->unit); ··· 1331 1309 spin_lock_init(&dd->pio_map_lock); 1332 1310 mutex_init(&dd->dc8051_lock); 1333 1311 init_waitqueue_head(&dd->event_queue); 1312 + spin_lock_init(&dd->irq_src_lock); 1334 1313 1335 1314 dd->int_counter = alloc_percpu(u64); 1336 1315 if (!dd->int_counter) { ··· 1504 1481 idr_init(&hfi1_unit_table); 1505 1482 1506 1483 hfi1_dbg_init(); 1507 - ret = hfi1_wss_init(); 1508 - if (ret < 0) 1509 - goto bail_wss; 1510 1484 ret = pci_register_driver(&hfi1_pci_driver); 1511 1485 if (ret < 0) { 1512 1486 pr_err("Unable to register driver: error %d\n", -ret); ··· 1512 1492 goto bail; /* all OK */ 1513 1493 1514 1494 bail_dev: 1515 - hfi1_wss_exit(); 1516 - bail_wss: 1517 1495 hfi1_dbg_exit(); 1518 1496 idr_destroy(&hfi1_unit_table); 1519 1497 dev_cleanup(); ··· 1528 1510 { 1529 1511 pci_unregister_driver(&hfi1_pci_driver); 1530 1512 node_affinity_destroy_all(); 1531 - hfi1_wss_exit(); 1532 1513 hfi1_dbg_exit(); 1533 1514 1534 1515 idr_destroy(&hfi1_unit_table); ··· 1621 1604 hfi1_free_devdata(dd); 1622 1605 } 1623 1606 1624 - static int init_validate_rcvhdrcnt(struct device *dev, uint thecnt) 1607 + static int init_validate_rcvhdrcnt(struct hfi1_devdata *dd, uint thecnt) 1625 1608 { 1626 1609 if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) { 1627 - hfi1_early_err(dev, "Receive header queue count too small\n"); 1610 + dd_dev_err(dd, "Receive header queue count too small\n"); 1628 1611 return -EINVAL; 1629 1612 } 1630 1613 1631 1614 if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) { 1632 - hfi1_early_err(dev, 1633 - "Receive header queue count cannot be greater than %u\n", 1634 - HFI1_MAX_HDRQ_EGRBUF_CNT); 1615 + dd_dev_err(dd, 1616 + "Receive header queue count cannot be greater than %u\n", 1617 + HFI1_MAX_HDRQ_EGRBUF_CNT); 1635 1618 return -EINVAL; 1636 1619 } 1637 1620 1638 1621 if (thecnt % HDRQ_INCREMENT) { 1639 - hfi1_early_err(dev, "Receive header queue count %d must be divisible by %lu\n", 1640 - thecnt, HDRQ_INCREMENT); 1622 + dd_dev_err(dd, "Receive header queue count %d must be divisible by %lu\n", 1623 + thecnt, HDRQ_INCREMENT); 1641 1624 return -EINVAL; 1642 1625 } 1643 1626 ··· 1656 1639 /* Validate dev ids */ 1657 1640 if (!(ent->device == PCI_DEVICE_ID_INTEL0 || 1658 1641 ent->device == PCI_DEVICE_ID_INTEL1)) { 1659 - hfi1_early_err(&pdev->dev, 1660 - "Failing on unknown Intel deviceid 0x%x\n", 1661 - ent->device); 1642 + dev_err(&pdev->dev, "Failing on unknown Intel deviceid 0x%x\n", 1643 + ent->device); 1662 1644 ret = -ENODEV; 1663 1645 goto bail; 1664 1646 } 1665 1647 1648 + /* Allocate the dd so we can get to work */ 1649 + dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS * 1650 + sizeof(struct hfi1_pportdata)); 1651 + if (IS_ERR(dd)) { 1652 + ret = PTR_ERR(dd); 1653 + goto bail; 1654 + } 1655 + 1666 1656 /* Validate some global module parameters */ 1667 - ret = init_validate_rcvhdrcnt(&pdev->dev, rcvhdrcnt); 1657 + ret = init_validate_rcvhdrcnt(dd, rcvhdrcnt); 1668 1658 if (ret) 1669 1659 goto bail; 1670 1660 1671 1661 /* use the encoding function as a sanitization check */ 1672 1662 if (!encode_rcv_header_entry_size(hfi1_hdrq_entsize)) { 1673 - hfi1_early_err(&pdev->dev, "Invalid HdrQ Entry size %u\n", 1674 - hfi1_hdrq_entsize); 1663 + dd_dev_err(dd, "Invalid HdrQ Entry size %u\n", 1664 + hfi1_hdrq_entsize); 1675 1665 ret = -EINVAL; 1676 1666 goto bail; 1677 1667 } ··· 1700 1676 clamp_val(eager_buffer_size, 1701 1677 MIN_EAGER_BUFFER * 8, 1702 1678 MAX_EAGER_BUFFER_TOTAL); 1703 - hfi1_early_info(&pdev->dev, "Eager buffer size %u\n", 1704 - eager_buffer_size); 1679 + dd_dev_info(dd, "Eager buffer size %u\n", 1680 + eager_buffer_size); 1705 1681 } else { 1706 - hfi1_early_err(&pdev->dev, "Invalid Eager buffer size of 0\n"); 1682 + dd_dev_err(dd, "Invalid Eager buffer size of 0\n"); 1707 1683 ret = -EINVAL; 1708 1684 goto bail; 1709 1685 } ··· 1711 1687 /* restrict value of hfi1_rcvarr_split */ 1712 1688 hfi1_rcvarr_split = clamp_val(hfi1_rcvarr_split, 0, 100); 1713 1689 1714 - ret = hfi1_pcie_init(pdev, ent); 1690 + ret = hfi1_pcie_init(dd); 1715 1691 if (ret) 1716 1692 goto bail; 1717 1693 ··· 1719 1695 * Do device-specific initialization, function table setup, dd 1720 1696 * allocation, etc. 1721 1697 */ 1722 - dd = hfi1_init_dd(pdev, ent); 1723 - 1724 - if (IS_ERR(dd)) { 1725 - ret = PTR_ERR(dd); 1698 + ret = hfi1_init_dd(dd); 1699 + if (ret) 1726 1700 goto clean_bail; /* error already printed */ 1727 - } 1728 1701 1729 1702 ret = create_workqueues(dd); 1730 1703 if (ret) ··· 1752 1731 dd_dev_err(dd, "Failed to create /dev devices: %d\n", -j); 1753 1732 1754 1733 if (initfail || ret) { 1755 - hfi1_clean_up_interrupts(dd); 1734 + msix_clean_up_interrupts(dd); 1756 1735 stop_timers(dd); 1757 1736 flush_workqueue(ib_wq); 1758 1737 for (pidx = 0; pidx < dd->num_pports; ++pidx) {
+94
drivers/infiniband/hw/hfi1/iowait.c
··· 1 + // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) 2 + /* 3 + * Copyright(c) 2018 Intel Corporation. 4 + * 5 + */ 6 + #include "iowait.h" 7 + #include "trace_iowait.h" 8 + 9 + void iowait_set_flag(struct iowait *wait, u32 flag) 10 + { 11 + trace_hfi1_iowait_set(wait, flag); 12 + set_bit(flag, &wait->flags); 13 + } 14 + 15 + bool iowait_flag_set(struct iowait *wait, u32 flag) 16 + { 17 + return test_bit(flag, &wait->flags); 18 + } 19 + 20 + inline void iowait_clear_flag(struct iowait *wait, u32 flag) 21 + { 22 + trace_hfi1_iowait_clear(wait, flag); 23 + clear_bit(flag, &wait->flags); 24 + } 25 + 26 + /** 27 + * iowait_init() - initialize wait structure 28 + * @wait: wait struct to initialize 29 + * @tx_limit: limit for overflow queuing 30 + * @func: restart function for workqueue 31 + * @sleep: sleep function for no space 32 + * @resume: wakeup function for no space 33 + * 34 + * This function initializes the iowait 35 + * structure embedded in the QP or PQ. 36 + * 37 + */ 38 + void iowait_init(struct iowait *wait, u32 tx_limit, 39 + void (*func)(struct work_struct *work), 40 + void (*tidfunc)(struct work_struct *work), 41 + int (*sleep)(struct sdma_engine *sde, 42 + struct iowait_work *wait, 43 + struct sdma_txreq *tx, 44 + uint seq, 45 + bool pkts_sent), 46 + void (*wakeup)(struct iowait *wait, int reason), 47 + void (*sdma_drained)(struct iowait *wait)) 48 + { 49 + int i; 50 + 51 + wait->count = 0; 52 + INIT_LIST_HEAD(&wait->list); 53 + init_waitqueue_head(&wait->wait_dma); 54 + init_waitqueue_head(&wait->wait_pio); 55 + atomic_set(&wait->sdma_busy, 0); 56 + atomic_set(&wait->pio_busy, 0); 57 + wait->tx_limit = tx_limit; 58 + wait->sleep = sleep; 59 + wait->wakeup = wakeup; 60 + wait->sdma_drained = sdma_drained; 61 + wait->flags = 0; 62 + for (i = 0; i < IOWAIT_SES; i++) { 63 + wait->wait[i].iow = wait; 64 + INIT_LIST_HEAD(&wait->wait[i].tx_head); 65 + if (i == IOWAIT_IB_SE) 66 + INIT_WORK(&wait->wait[i].iowork, func); 67 + else 68 + INIT_WORK(&wait->wait[i].iowork, tidfunc); 69 + } 70 + } 71 + 72 + /** 73 + * iowait_cancel_work - cancel all work in iowait 74 + * @w: the iowait struct 75 + */ 76 + void iowait_cancel_work(struct iowait *w) 77 + { 78 + cancel_work_sync(&iowait_get_ib_work(w)->iowork); 79 + cancel_work_sync(&iowait_get_tid_work(w)->iowork); 80 + } 81 + 82 + /** 83 + * iowait_set_work_flag - set work flag based on leg 84 + * @w - the iowait work struct 85 + */ 86 + int iowait_set_work_flag(struct iowait_work *w) 87 + { 88 + if (w == &w->iow->wait[IOWAIT_IB_SE]) { 89 + iowait_set_flag(w->iow, IOWAIT_PENDING_IB); 90 + return IOWAIT_IB_SE; 91 + } 92 + iowait_set_flag(w->iow, IOWAIT_PENDING_TID); 93 + return IOWAIT_TID_SE; 94 + }
+134 -58
drivers/infiniband/hw/hfi1/iowait.h
··· 1 1 #ifndef _HFI1_IOWAIT_H 2 2 #define _HFI1_IOWAIT_H 3 3 /* 4 - * Copyright(c) 2015, 2016 Intel Corporation. 4 + * Copyright(c) 2015 - 2018 Intel Corporation. 5 5 * 6 6 * This file is provided under a dual BSD/GPLv2 license. When using or 7 7 * redistributing this file, you may do so under either license. ··· 49 49 50 50 #include <linux/list.h> 51 51 #include <linux/workqueue.h> 52 + #include <linux/wait.h> 52 53 #include <linux/sched.h> 53 54 54 55 #include "sdma_txreq.h" ··· 60 59 */ 61 60 typedef void (*restart_t)(struct work_struct *work); 62 61 62 + #define IOWAIT_PENDING_IB 0x0 63 + #define IOWAIT_PENDING_TID 0x1 64 + 65 + /* 66 + * A QP can have multiple Send Engines (SEs). 67 + * 68 + * The current use case is for supporting a TID RDMA 69 + * packet build/xmit mechanism independent from verbs. 70 + */ 71 + #define IOWAIT_SES 2 72 + #define IOWAIT_IB_SE 0 73 + #define IOWAIT_TID_SE 1 74 + 63 75 struct sdma_txreq; 64 76 struct sdma_engine; 65 77 /** 66 - * struct iowait - linkage for delayed progress/waiting 78 + * @iowork: the work struct 79 + * @tx_head: list of prebuilt packets 80 + * @iow: the parent iowait structure 81 + * 82 + * This structure is the work item (process) specific 83 + * details associated with the each of the two SEs of the 84 + * QP. 85 + * 86 + * The workstruct and the queued TXs are unique to each 87 + * SE. 88 + */ 89 + struct iowait; 90 + struct iowait_work { 91 + struct work_struct iowork; 92 + struct list_head tx_head; 93 + struct iowait *iow; 94 + }; 95 + 96 + /** 67 97 * @list: used to add/insert into QP/PQ wait lists 68 - * @lock: uses to record the list head lock 69 98 * @tx_head: overflow list of sdma_txreq's 70 99 * @sleep: no space callback 71 100 * @wakeup: space callback wakeup 72 101 * @sdma_drained: sdma count drained 102 + * @lock: lock protected head of wait queue 73 103 * @iowork: workqueue overhead 74 104 * @wait_dma: wait for sdma_busy == 0 75 105 * @wait_pio: wait for pio_busy == 0 ··· 108 76 * @count: total number of descriptors in tx_head'ed list 109 77 * @tx_limit: limit for overflow queuing 110 78 * @tx_count: number of tx entry's in tx_head'ed list 79 + * @flags: wait flags (one per QP) 80 + * @wait: SE array 111 81 * 112 82 * This is to be embedded in user's state structure 113 83 * (QP or PQ). ··· 132 98 * Waiters explicity know that, but the destroy 133 99 * code that unwaits QPs does not. 134 100 */ 135 - 136 101 struct iowait { 137 102 struct list_head list; 138 - struct list_head tx_head; 139 103 int (*sleep)( 140 104 struct sdma_engine *sde, 141 - struct iowait *wait, 105 + struct iowait_work *wait, 142 106 struct sdma_txreq *tx, 143 107 uint seq, 144 108 bool pkts_sent ··· 144 112 void (*wakeup)(struct iowait *wait, int reason); 145 113 void (*sdma_drained)(struct iowait *wait); 146 114 seqlock_t *lock; 147 - struct work_struct iowork; 148 115 wait_queue_head_t wait_dma; 149 116 wait_queue_head_t wait_pio; 150 117 atomic_t sdma_busy; ··· 152 121 u32 tx_limit; 153 122 u32 tx_count; 154 123 u8 starved_cnt; 124 + unsigned long flags; 125 + struct iowait_work wait[IOWAIT_SES]; 155 126 }; 156 127 157 128 #define SDMA_AVAIL_REASON 0 158 129 159 - /** 160 - * iowait_init() - initialize wait structure 161 - * @wait: wait struct to initialize 162 - * @tx_limit: limit for overflow queuing 163 - * @func: restart function for workqueue 164 - * @sleep: sleep function for no space 165 - * @resume: wakeup function for no space 166 - * 167 - * This function initializes the iowait 168 - * structure embedded in the QP or PQ. 169 - * 170 - */ 130 + void iowait_set_flag(struct iowait *wait, u32 flag); 131 + bool iowait_flag_set(struct iowait *wait, u32 flag); 132 + void iowait_clear_flag(struct iowait *wait, u32 flag); 171 133 172 - static inline void iowait_init( 173 - struct iowait *wait, 174 - u32 tx_limit, 175 - void (*func)(struct work_struct *work), 176 - int (*sleep)( 177 - struct sdma_engine *sde, 178 - struct iowait *wait, 179 - struct sdma_txreq *tx, 180 - uint seq, 181 - bool pkts_sent), 182 - void (*wakeup)(struct iowait *wait, int reason), 183 - void (*sdma_drained)(struct iowait *wait)) 184 - { 185 - wait->count = 0; 186 - wait->lock = NULL; 187 - INIT_LIST_HEAD(&wait->list); 188 - INIT_LIST_HEAD(&wait->tx_head); 189 - INIT_WORK(&wait->iowork, func); 190 - init_waitqueue_head(&wait->wait_dma); 191 - init_waitqueue_head(&wait->wait_pio); 192 - atomic_set(&wait->sdma_busy, 0); 193 - atomic_set(&wait->pio_busy, 0); 194 - wait->tx_limit = tx_limit; 195 - wait->sleep = sleep; 196 - wait->wakeup = wakeup; 197 - wait->sdma_drained = sdma_drained; 198 - } 134 + void iowait_init(struct iowait *wait, u32 tx_limit, 135 + void (*func)(struct work_struct *work), 136 + void (*tidfunc)(struct work_struct *work), 137 + int (*sleep)(struct sdma_engine *sde, 138 + struct iowait_work *wait, 139 + struct sdma_txreq *tx, 140 + uint seq, 141 + bool pkts_sent), 142 + void (*wakeup)(struct iowait *wait, int reason), 143 + void (*sdma_drained)(struct iowait *wait)); 199 144 200 145 /** 201 - * iowait_schedule() - initialize wait structure 146 + * iowait_schedule() - schedule the default send engine work 202 147 * @wait: wait struct to schedule 203 148 * @wq: workqueue for schedule 204 149 * @cpu: cpu 205 150 */ 206 - static inline void iowait_schedule( 207 - struct iowait *wait, 208 - struct workqueue_struct *wq, 209 - int cpu) 151 + static inline bool iowait_schedule(struct iowait *wait, 152 + struct workqueue_struct *wq, int cpu) 210 153 { 211 - queue_work_on(cpu, wq, &wait->iowork); 154 + return !!queue_work_on(cpu, wq, &wait->wait[IOWAIT_IB_SE].iowork); 212 155 } 213 156 214 157 /** ··· 233 228 */ 234 229 static inline int iowait_sdma_dec(struct iowait *wait) 235 230 { 231 + if (!wait) 232 + return 0; 236 233 return atomic_dec_and_test(&wait->sdma_busy); 237 234 } 238 235 ··· 274 267 } 275 268 276 269 /** 277 - * iowait_sdma_dec - note pio complete 270 + * iowait_pio_dec - note pio complete 278 271 * @wait: iowait structure 279 272 */ 280 273 static inline int iowait_pio_dec(struct iowait *wait) 281 274 { 275 + if (!wait) 276 + return 0; 282 277 return atomic_dec_and_test(&wait->pio_busy); 283 278 } 284 279 ··· 302 293 /** 303 294 * iowait_get_txhead() - get packet off of iowait list 304 295 * 305 - * @wait wait struture 296 + * @wait iowait_work struture 306 297 */ 307 - static inline struct sdma_txreq *iowait_get_txhead(struct iowait *wait) 298 + static inline struct sdma_txreq *iowait_get_txhead(struct iowait_work *wait) 308 299 { 309 300 struct sdma_txreq *tx = NULL; 310 301 ··· 316 307 list_del_init(&tx->list); 317 308 } 318 309 return tx; 310 + } 311 + 312 + static inline u16 iowait_get_desc(struct iowait_work *w) 313 + { 314 + u16 num_desc = 0; 315 + struct sdma_txreq *tx = NULL; 316 + 317 + if (!list_empty(&w->tx_head)) { 318 + tx = list_first_entry(&w->tx_head, struct sdma_txreq, 319 + list); 320 + num_desc = tx->num_desc; 321 + } 322 + return num_desc; 323 + } 324 + 325 + static inline u32 iowait_get_all_desc(struct iowait *w) 326 + { 327 + u32 num_desc = 0; 328 + 329 + num_desc = iowait_get_desc(&w->wait[IOWAIT_IB_SE]); 330 + num_desc += iowait_get_desc(&w->wait[IOWAIT_TID_SE]); 331 + return num_desc; 319 332 } 320 333 321 334 /** ··· 403 372 } 404 373 405 374 /** 406 - * iowait_packet_queued() - determine if a packet is already built 407 - * @wait: the wait structure 375 + * iowait_packet_queued() - determine if a packet is queued 376 + * @wait: the iowait_work structure 408 377 */ 409 - static inline bool iowait_packet_queued(struct iowait *wait) 378 + static inline bool iowait_packet_queued(struct iowait_work *wait) 410 379 { 411 380 return !list_empty(&wait->tx_head); 412 381 } 382 + 383 + /** 384 + * inc_wait_count - increment wait counts 385 + * @w: the log work struct 386 + * @n: the count 387 + */ 388 + static inline void iowait_inc_wait_count(struct iowait_work *w, u16 n) 389 + { 390 + if (!w) 391 + return; 392 + w->iow->tx_count++; 393 + w->iow->count += n; 394 + } 395 + 396 + /** 397 + * iowait_get_tid_work - return iowait_work for tid SE 398 + * @w: the iowait struct 399 + */ 400 + static inline struct iowait_work *iowait_get_tid_work(struct iowait *w) 401 + { 402 + return &w->wait[IOWAIT_TID_SE]; 403 + } 404 + 405 + /** 406 + * iowait_get_ib_work - return iowait_work for ib SE 407 + * @w: the iowait struct 408 + */ 409 + static inline struct iowait_work *iowait_get_ib_work(struct iowait *w) 410 + { 411 + return &w->wait[IOWAIT_IB_SE]; 412 + } 413 + 414 + /** 415 + * iowait_ioww_to_iow - return iowait given iowait_work 416 + * @w: the iowait_work struct 417 + */ 418 + static inline struct iowait *iowait_ioww_to_iow(struct iowait_work *w) 419 + { 420 + if (likely(w)) 421 + return w->iow; 422 + return NULL; 423 + } 424 + 425 + void iowait_cancel_work(struct iowait *w); 426 + int iowait_set_work_flag(struct iowait_work *w); 413 427 414 428 #endif
+2 -2
drivers/infiniband/hw/hfi1/mad.c
··· 1 1 /* 2 - * Copyright(c) 2015-2017 Intel Corporation. 2 + * Copyright(c) 2015-2018 Intel Corporation. 3 3 * 4 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 5 * redistributing this file, you may do so under either license. ··· 4836 4836 int ret; 4837 4837 int pkey_idx; 4838 4838 int local_mad = 0; 4839 - u32 resp_len = 0; 4839 + u32 resp_len = in_wc->byte_len - sizeof(*in_grh); 4840 4840 struct hfi1_ibport *ibp = to_iport(ibdev, port); 4841 4841 4842 4842 pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
+363
drivers/infiniband/hw/hfi1/msix.c
··· 1 + // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) 2 + /* 3 + * Copyright(c) 2018 Intel Corporation. 4 + * 5 + * This file is provided under a dual BSD/GPLv2 license. When using or 6 + * redistributing this file, you may do so under either license. 7 + * 8 + * GPL LICENSE SUMMARY 9 + * 10 + * This program is free software; you can redistribute it and/or modify 11 + * it under the terms of version 2 of the GNU General Public License as 12 + * published by the Free Software Foundation. 13 + * 14 + * This program is distributed in the hope that it will be useful, but 15 + * WITHOUT ANY WARRANTY; without even the implied warranty of 16 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 + * General Public License for more details. 18 + * 19 + * BSD LICENSE 20 + * 21 + * Redistribution and use in source and binary forms, with or without 22 + * modification, are permitted provided that the following conditions 23 + * are met: 24 + * 25 + * - Redistributions of source code must retain the above copyright 26 + * notice, this list of conditions and the following disclaimer. 27 + * - Redistributions in binary form must reproduce the above copyright 28 + * notice, this list of conditions and the following disclaimer in 29 + * the documentation and/or other materials provided with the 30 + * distribution. 31 + * - Neither the name of Intel Corporation nor the names of its 32 + * contributors may be used to endorse or promote products derived 33 + * from this software without specific prior written permission. 34 + * 35 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 36 + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 37 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 38 + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 39 + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 40 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 41 + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 42 + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 43 + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 44 + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 45 + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 46 + * 47 + */ 48 + 49 + #include "hfi.h" 50 + #include "affinity.h" 51 + #include "sdma.h" 52 + 53 + /** 54 + * msix_initialize() - Calculate, request and configure MSIx IRQs 55 + * @dd: valid hfi1 devdata 56 + * 57 + */ 58 + int msix_initialize(struct hfi1_devdata *dd) 59 + { 60 + u32 total; 61 + int ret; 62 + struct hfi1_msix_entry *entries; 63 + 64 + /* 65 + * MSIx interrupt count: 66 + * one for the general, "slow path" interrupt 67 + * one per used SDMA engine 68 + * one per kernel receive context 69 + * one for each VNIC context 70 + * ...any new IRQs should be added here. 71 + */ 72 + total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_vnic_contexts; 73 + 74 + if (total >= CCE_NUM_MSIX_VECTORS) 75 + return -EINVAL; 76 + 77 + ret = pci_alloc_irq_vectors(dd->pcidev, total, total, PCI_IRQ_MSIX); 78 + if (ret < 0) { 79 + dd_dev_err(dd, "pci_alloc_irq_vectors() failed: %d\n", ret); 80 + return ret; 81 + } 82 + 83 + entries = kcalloc(total, sizeof(*dd->msix_info.msix_entries), 84 + GFP_KERNEL); 85 + if (!entries) { 86 + pci_free_irq_vectors(dd->pcidev); 87 + return -ENOMEM; 88 + } 89 + 90 + dd->msix_info.msix_entries = entries; 91 + spin_lock_init(&dd->msix_info.msix_lock); 92 + bitmap_zero(dd->msix_info.in_use_msix, total); 93 + dd->msix_info.max_requested = total; 94 + dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total); 95 + 96 + return 0; 97 + } 98 + 99 + /** 100 + * msix_request_irq() - Allocate a free MSIx IRQ 101 + * @dd: valid devdata 102 + * @arg: context information for the IRQ 103 + * @handler: IRQ handler 104 + * @thread: IRQ thread handler (could be NULL) 105 + * @idx: zero base idx if multiple devices are needed 106 + * @type: affinty IRQ type 107 + * 108 + * Allocated an MSIx vector if available, and then create the appropriate 109 + * meta data needed to keep track of the pci IRQ request. 110 + * 111 + * Return: 112 + * < 0 Error 113 + * >= 0 MSIx vector 114 + * 115 + */ 116 + static int msix_request_irq(struct hfi1_devdata *dd, void *arg, 117 + irq_handler_t handler, irq_handler_t thread, 118 + u32 idx, enum irq_type type) 119 + { 120 + unsigned long nr; 121 + int irq; 122 + int ret; 123 + const char *err_info; 124 + char name[MAX_NAME_SIZE]; 125 + struct hfi1_msix_entry *me; 126 + 127 + /* Allocate an MSIx vector */ 128 + spin_lock(&dd->msix_info.msix_lock); 129 + nr = find_first_zero_bit(dd->msix_info.in_use_msix, 130 + dd->msix_info.max_requested); 131 + if (nr < dd->msix_info.max_requested) 132 + __set_bit(nr, dd->msix_info.in_use_msix); 133 + spin_unlock(&dd->msix_info.msix_lock); 134 + 135 + if (nr == dd->msix_info.max_requested) 136 + return -ENOSPC; 137 + 138 + /* Specific verification and determine the name */ 139 + switch (type) { 140 + case IRQ_GENERAL: 141 + /* general interrupt must be MSIx vector 0 */ 142 + if (nr) { 143 + spin_lock(&dd->msix_info.msix_lock); 144 + __clear_bit(nr, dd->msix_info.in_use_msix); 145 + spin_unlock(&dd->msix_info.msix_lock); 146 + dd_dev_err(dd, "Invalid index %lu for GENERAL IRQ\n", 147 + nr); 148 + return -EINVAL; 149 + } 150 + snprintf(name, sizeof(name), DRIVER_NAME "_%d", dd->unit); 151 + err_info = "general"; 152 + break; 153 + case IRQ_SDMA: 154 + snprintf(name, sizeof(name), DRIVER_NAME "_%d sdma%d", 155 + dd->unit, idx); 156 + err_info = "sdma"; 157 + break; 158 + case IRQ_RCVCTXT: 159 + snprintf(name, sizeof(name), DRIVER_NAME "_%d kctxt%d", 160 + dd->unit, idx); 161 + err_info = "receive context"; 162 + break; 163 + case IRQ_OTHER: 164 + default: 165 + return -EINVAL; 166 + } 167 + name[sizeof(name) - 1] = 0; 168 + 169 + irq = pci_irq_vector(dd->pcidev, nr); 170 + ret = pci_request_irq(dd->pcidev, nr, handler, thread, arg, name); 171 + if (ret) { 172 + dd_dev_err(dd, 173 + "%s: request for IRQ %d failed, MSIx %d, err %d\n", 174 + err_info, irq, idx, ret); 175 + spin_lock(&dd->msix_info.msix_lock); 176 + __clear_bit(nr, dd->msix_info.in_use_msix); 177 + spin_unlock(&dd->msix_info.msix_lock); 178 + return ret; 179 + } 180 + 181 + /* 182 + * assign arg after pci_request_irq call, so it will be 183 + * cleaned up 184 + */ 185 + me = &dd->msix_info.msix_entries[nr]; 186 + me->irq = irq; 187 + me->arg = arg; 188 + me->type = type; 189 + 190 + /* This is a request, so a failure is not fatal */ 191 + ret = hfi1_get_irq_affinity(dd, me); 192 + if (ret) 193 + dd_dev_err(dd, "unable to pin IRQ %d\n", ret); 194 + 195 + return nr; 196 + } 197 + 198 + /** 199 + * msix_request_rcd_irq() - Helper function for RCVAVAIL IRQs 200 + * @rcd: valid rcd context 201 + * 202 + */ 203 + int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd) 204 + { 205 + int nr; 206 + 207 + nr = msix_request_irq(rcd->dd, rcd, receive_context_interrupt, 208 + receive_context_thread, rcd->ctxt, IRQ_RCVCTXT); 209 + if (nr < 0) 210 + return nr; 211 + 212 + /* 213 + * Set the interrupt register and mask for this 214 + * context's interrupt. 215 + */ 216 + rcd->ireg = (IS_RCVAVAIL_START + rcd->ctxt) / 64; 217 + rcd->imask = ((u64)1) << ((IS_RCVAVAIL_START + rcd->ctxt) % 64); 218 + rcd->msix_intr = nr; 219 + remap_intr(rcd->dd, IS_RCVAVAIL_START + rcd->ctxt, nr); 220 + 221 + return 0; 222 + } 223 + 224 + /** 225 + * msix_request_smda_ira() - Helper for getting SDMA IRQ resources 226 + * @sde: valid sdma engine 227 + * 228 + */ 229 + int msix_request_sdma_irq(struct sdma_engine *sde) 230 + { 231 + int nr; 232 + 233 + nr = msix_request_irq(sde->dd, sde, sdma_interrupt, NULL, 234 + sde->this_idx, IRQ_SDMA); 235 + if (nr < 0) 236 + return nr; 237 + sde->msix_intr = nr; 238 + remap_sdma_interrupts(sde->dd, sde->this_idx, nr); 239 + 240 + return 0; 241 + } 242 + 243 + /** 244 + * enable_sdma_src() - Helper to enable SDMA IRQ srcs 245 + * @dd: valid devdata structure 246 + * @i: index of SDMA engine 247 + */ 248 + static void enable_sdma_srcs(struct hfi1_devdata *dd, int i) 249 + { 250 + set_intr_bits(dd, IS_SDMA_START + i, IS_SDMA_START + i, true); 251 + set_intr_bits(dd, IS_SDMA_PROGRESS_START + i, 252 + IS_SDMA_PROGRESS_START + i, true); 253 + set_intr_bits(dd, IS_SDMA_IDLE_START + i, IS_SDMA_IDLE_START + i, true); 254 + set_intr_bits(dd, IS_SDMAENG_ERR_START + i, IS_SDMAENG_ERR_START + i, 255 + true); 256 + } 257 + 258 + /** 259 + * msix_request_irqs() - Allocate all MSIx IRQs 260 + * @dd: valid devdata structure 261 + * 262 + * Helper function to request the used MSIx IRQs. 263 + * 264 + */ 265 + int msix_request_irqs(struct hfi1_devdata *dd) 266 + { 267 + int i; 268 + int ret; 269 + 270 + ret = msix_request_irq(dd, dd, general_interrupt, NULL, 0, IRQ_GENERAL); 271 + if (ret < 0) 272 + return ret; 273 + 274 + for (i = 0; i < dd->num_sdma; i++) { 275 + struct sdma_engine *sde = &dd->per_sdma[i]; 276 + 277 + ret = msix_request_sdma_irq(sde); 278 + if (ret) 279 + return ret; 280 + enable_sdma_srcs(sde->dd, i); 281 + } 282 + 283 + for (i = 0; i < dd->n_krcv_queues; i++) { 284 + struct hfi1_ctxtdata *rcd = hfi1_rcd_get_by_index_safe(dd, i); 285 + 286 + if (rcd) 287 + ret = msix_request_rcd_irq(rcd); 288 + hfi1_rcd_put(rcd); 289 + if (ret) 290 + return ret; 291 + } 292 + 293 + return 0; 294 + } 295 + 296 + /** 297 + * msix_free_irq() - Free the specified MSIx resources and IRQ 298 + * @dd: valid devdata 299 + * @msix_intr: MSIx vector to free. 300 + * 301 + */ 302 + void msix_free_irq(struct hfi1_devdata *dd, u8 msix_intr) 303 + { 304 + struct hfi1_msix_entry *me; 305 + 306 + if (msix_intr >= dd->msix_info.max_requested) 307 + return; 308 + 309 + me = &dd->msix_info.msix_entries[msix_intr]; 310 + 311 + if (!me->arg) /* => no irq, no affinity */ 312 + return; 313 + 314 + hfi1_put_irq_affinity(dd, me); 315 + pci_free_irq(dd->pcidev, msix_intr, me->arg); 316 + 317 + me->arg = NULL; 318 + 319 + spin_lock(&dd->msix_info.msix_lock); 320 + __clear_bit(msix_intr, dd->msix_info.in_use_msix); 321 + spin_unlock(&dd->msix_info.msix_lock); 322 + } 323 + 324 + /** 325 + * hfi1_clean_up_msix_interrupts() - Free all MSIx IRQ resources 326 + * @dd: valid device data data structure 327 + * 328 + * Free the MSIx and associated PCI resources, if they have been allocated. 329 + */ 330 + void msix_clean_up_interrupts(struct hfi1_devdata *dd) 331 + { 332 + int i; 333 + struct hfi1_msix_entry *me = dd->msix_info.msix_entries; 334 + 335 + /* remove irqs - must happen before disabling/turning off */ 336 + for (i = 0; i < dd->msix_info.max_requested; i++, me++) 337 + msix_free_irq(dd, i); 338 + 339 + /* clean structures */ 340 + kfree(dd->msix_info.msix_entries); 341 + dd->msix_info.msix_entries = NULL; 342 + dd->msix_info.max_requested = 0; 343 + 344 + pci_free_irq_vectors(dd->pcidev); 345 + } 346 + 347 + /** 348 + * msix_vnic_syncrhonize_irq() - Vnic IRQ synchronize 349 + * @dd: valid devdata 350 + */ 351 + void msix_vnic_synchronize_irq(struct hfi1_devdata *dd) 352 + { 353 + int i; 354 + 355 + for (i = 0; i < dd->vnic.num_ctxt; i++) { 356 + struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i]; 357 + struct hfi1_msix_entry *me; 358 + 359 + me = &dd->msix_info.msix_entries[rcd->msix_intr]; 360 + 361 + synchronize_irq(me->irq); 362 + } 363 + }
+64
drivers/infiniband/hw/hfi1/msix.h
··· 1 + /* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ 2 + /* 3 + * Copyright(c) 2018 Intel Corporation. 4 + * 5 + * This file is provided under a dual BSD/GPLv2 license. When using or 6 + * redistributing this file, you may do so under either license. 7 + * 8 + * GPL LICENSE SUMMARY 9 + * 10 + * This program is free software; you can redistribute it and/or modify 11 + * it under the terms of version 2 of the GNU General Public License as 12 + * published by the Free Software Foundation. 13 + * 14 + * This program is distributed in the hope that it will be useful, but 15 + * WITHOUT ANY WARRANTY; without even the implied warranty of 16 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 + * General Public License for more details. 18 + * 19 + * BSD LICENSE 20 + * 21 + * Redistribution and use in source and binary forms, with or without 22 + * modification, are permitted provided that the following conditions 23 + * are met: 24 + * 25 + * - Redistributions of source code must retain the above copyright 26 + * notice, this list of conditions and the following disclaimer. 27 + * - Redistributions in binary form must reproduce the above copyright 28 + * notice, this list of conditions and the following disclaimer in 29 + * the documentation and/or other materials provided with the 30 + * distribution. 31 + * - Neither the name of Intel Corporation nor the names of its 32 + * contributors may be used to endorse or promote products derived 33 + * from this software without specific prior written permission. 34 + * 35 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 36 + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 37 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 38 + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 39 + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 40 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 41 + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 42 + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 43 + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 44 + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 45 + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 46 + * 47 + */ 48 + #ifndef _HFI1_MSIX_H 49 + #define _HFI1_MSIX_H 50 + 51 + #include "hfi.h" 52 + 53 + /* MSIx interface */ 54 + int msix_initialize(struct hfi1_devdata *dd); 55 + int msix_request_irqs(struct hfi1_devdata *dd); 56 + void msix_clean_up_interrupts(struct hfi1_devdata *dd); 57 + int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd); 58 + int msix_request_sdma_irq(struct sdma_engine *sde); 59 + void msix_free_irq(struct hfi1_devdata *dd, u8 msix_intr); 60 + 61 + /* VNIC interface */ 62 + void msix_vnic_synchronize_irq(struct hfi1_devdata *dd); 63 + 64 + #endif
+26 -48
drivers/infiniband/hw/hfi1/pcie.c
··· 1 1 /* 2 - * Copyright(c) 2015 - 2017 Intel Corporation. 2 + * Copyright(c) 2015 - 2018 Intel Corporation. 3 3 * 4 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 5 * redistributing this file, you may do so under either license. ··· 61 61 */ 62 62 63 63 /* 64 - * Code to adjust PCIe capabilities. 65 - */ 66 - static void tune_pcie_caps(struct hfi1_devdata *); 67 - 68 - /* 69 64 * Do all the common PCIe setup and initialization. 70 - * devdata is not yet allocated, and is not allocated until after this 71 - * routine returns success. Therefore dd_dev_err() can't be used for error 72 - * printing. 73 65 */ 74 - int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent) 66 + int hfi1_pcie_init(struct hfi1_devdata *dd) 75 67 { 76 68 int ret; 69 + struct pci_dev *pdev = dd->pcidev; 77 70 78 71 ret = pci_enable_device(pdev); 79 72 if (ret) { ··· 82 89 * about that, it appears. If the original BAR was retained 83 90 * in the kernel data structures, this may be OK. 84 91 */ 85 - hfi1_early_err(&pdev->dev, "pci enable failed: error %d\n", 86 - -ret); 87 - goto done; 92 + dd_dev_err(dd, "pci enable failed: error %d\n", -ret); 93 + return ret; 88 94 } 89 95 90 96 ret = pci_request_regions(pdev, DRIVER_NAME); 91 97 if (ret) { 92 - hfi1_early_err(&pdev->dev, 93 - "pci_request_regions fails: err %d\n", -ret); 98 + dd_dev_err(dd, "pci_request_regions fails: err %d\n", -ret); 94 99 goto bail; 95 100 } 96 101 ··· 101 110 */ 102 111 ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); 103 112 if (ret) { 104 - hfi1_early_err(&pdev->dev, 105 - "Unable to set DMA mask: %d\n", ret); 113 + dd_dev_err(dd, "Unable to set DMA mask: %d\n", ret); 106 114 goto bail; 107 115 } 108 116 ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); ··· 109 119 ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); 110 120 } 111 121 if (ret) { 112 - hfi1_early_err(&pdev->dev, 113 - "Unable to set DMA consistent mask: %d\n", ret); 122 + dd_dev_err(dd, "Unable to set DMA consistent mask: %d\n", ret); 114 123 goto bail; 115 124 } 116 125 117 126 pci_set_master(pdev); 118 127 (void)pci_enable_pcie_error_reporting(pdev); 119 - goto done; 128 + return 0; 120 129 121 130 bail: 122 131 hfi1_pcie_cleanup(pdev); 123 - done: 124 132 return ret; 125 133 } 126 134 ··· 194 206 dd_dev_err(dd, "WC mapping of send buffers failed\n"); 195 207 goto nomem; 196 208 } 197 - dd_dev_info(dd, "WC piobase: %p\n for %x", dd->piobase, TXE_PIO_SIZE); 209 + dd_dev_info(dd, "WC piobase: %p for %x\n", dd->piobase, TXE_PIO_SIZE); 198 210 199 211 dd->physaddr = addr; /* used for io_remap, etc. */ 200 212 ··· 332 344 return 0; 333 345 } 334 346 335 - /* 336 - * Returns: 337 - * - actual number of interrupts allocated or 338 - * - error 339 - */ 340 - int request_msix(struct hfi1_devdata *dd, u32 msireq) 341 - { 342 - int nvec; 343 - 344 - nvec = pci_alloc_irq_vectors(dd->pcidev, msireq, msireq, PCI_IRQ_MSIX); 345 - if (nvec < 0) { 346 - dd_dev_err(dd, "pci_alloc_irq_vectors() failed: %d\n", nvec); 347 - return nvec; 348 - } 349 - 350 - tune_pcie_caps(dd); 351 - 352 - return nvec; 353 - } 354 - 355 347 /* restore command and BARs after a reset has wiped them out */ 356 348 int restore_pci_variables(struct hfi1_devdata *dd) 357 349 { ··· 447 479 * Check and optionally adjust them to maximize our throughput. 448 480 */ 449 481 static int hfi1_pcie_caps; 450 - module_param_named(pcie_caps, hfi1_pcie_caps, int, S_IRUGO); 482 + module_param_named(pcie_caps, hfi1_pcie_caps, int, 0444); 451 483 MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)"); 452 484 453 485 uint aspm_mode = ASPM_MODE_DISABLED; 454 - module_param_named(aspm, aspm_mode, uint, S_IRUGO); 486 + module_param_named(aspm, aspm_mode, uint, 0444); 455 487 MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic"); 456 488 457 - static void tune_pcie_caps(struct hfi1_devdata *dd) 489 + /** 490 + * tune_pcie_caps() - Code to adjust PCIe capabilities. 491 + * @dd: Valid device data structure 492 + * 493 + */ 494 + void tune_pcie_caps(struct hfi1_devdata *dd) 458 495 { 459 496 struct pci_dev *parent; 460 497 u16 rc_mpss, rc_mps, ep_mpss, ep_mps; ··· 1001 1028 const u8 (*ctle_tunings)[4]; 1002 1029 uint static_ctle_mode; 1003 1030 int return_error = 0; 1031 + u32 target_width; 1004 1032 1005 1033 /* PCIe Gen3 is for the ASIC only */ 1006 1034 if (dd->icode != ICODE_RTL_SILICON) ··· 1040 1066 __func__); 1041 1067 return 0; 1042 1068 } 1069 + 1070 + /* Previous Gen1/Gen2 bus width */ 1071 + target_width = dd->lbus_width; 1043 1072 1044 1073 /* 1045 1074 * Do the Gen3 transition. Steps are those of the PCIe Gen3 ··· 1412 1435 dd_dev_info(dd, "%s: new speed and width: %s\n", __func__, 1413 1436 dd->lbus_info); 1414 1437 1415 - if (dd->lbus_speed != target_speed) { /* not target */ 1438 + if (dd->lbus_speed != target_speed || 1439 + dd->lbus_width < target_width) { /* not target */ 1416 1440 /* maybe retry */ 1417 1441 do_retry = retry_count < pcie_retry; 1418 - dd_dev_err(dd, "PCIe link speed did not switch to Gen%d%s\n", 1419 - pcie_target, do_retry ? ", retrying" : ""); 1442 + dd_dev_err(dd, "PCIe link speed or width did not match target%s\n", 1443 + do_retry ? ", retrying" : ""); 1420 1444 retry_count++; 1421 1445 if (do_retry) { 1422 1446 msleep(100); /* allow time to settle */
-8
drivers/infiniband/hw/hfi1/pio.c
··· 71 71 } 72 72 } 73 73 74 - /* defined in header release 48 and higher */ 75 - #ifndef SEND_CTRL_UNSUPPORTED_VL_SHIFT 76 - #define SEND_CTRL_UNSUPPORTED_VL_SHIFT 3 77 - #define SEND_CTRL_UNSUPPORTED_VL_MASK 0xffull 78 - #define SEND_CTRL_UNSUPPORTED_VL_SMASK (SEND_CTRL_UNSUPPORTED_VL_MASK \ 79 - << SEND_CTRL_UNSUPPORTED_VL_SHIFT) 80 - #endif 81 - 82 74 /* global control of PIO send */ 83 75 void pio_send_control(struct hfi1_devdata *dd, int op) 84 76 {
+73 -27
drivers/infiniband/hw/hfi1/qp.c
··· 66 66 static void flush_tx_list(struct rvt_qp *qp); 67 67 static int iowait_sleep( 68 68 struct sdma_engine *sde, 69 - struct iowait *wait, 69 + struct iowait_work *wait, 70 70 struct sdma_txreq *stx, 71 71 unsigned int seq, 72 72 bool pkts_sent); ··· 134 134 135 135 }; 136 136 137 - static void flush_tx_list(struct rvt_qp *qp) 137 + static void flush_list_head(struct list_head *l) 138 138 { 139 - struct hfi1_qp_priv *priv = qp->priv; 140 - 141 - while (!list_empty(&priv->s_iowait.tx_head)) { 139 + while (!list_empty(l)) { 142 140 struct sdma_txreq *tx; 143 141 144 142 tx = list_first_entry( 145 - &priv->s_iowait.tx_head, 143 + l, 146 144 struct sdma_txreq, 147 145 list); 148 146 list_del_init(&tx->list); 149 147 hfi1_put_txreq( 150 148 container_of(tx, struct verbs_txreq, txreq)); 151 149 } 150 + } 151 + 152 + static void flush_tx_list(struct rvt_qp *qp) 153 + { 154 + struct hfi1_qp_priv *priv = qp->priv; 155 + 156 + flush_list_head(&iowait_get_ib_work(&priv->s_iowait)->tx_head); 157 + flush_list_head(&iowait_get_tid_work(&priv->s_iowait)->tx_head); 152 158 } 153 159 154 160 static void flush_iowait(struct rvt_qp *qp) ··· 288 282 } 289 283 290 284 /** 291 - * hfi1_check_send_wqe - validate wqe 285 + * hfi1_setup_wqe - set up the wqe 292 286 * @qp - The qp 293 287 * @wqe - The built wqe 288 + * @call_send - Determine if the send should be posted or scheduled. 294 289 * 295 - * validate wqe. This is called 296 - * prior to inserting the wqe into 297 - * the ring but after the wqe has been 298 - * setup. 290 + * Perform setup of the wqe. This is called 291 + * prior to inserting the wqe into the ring but after 292 + * the wqe has been setup by RDMAVT. This function 293 + * allows the driver the opportunity to perform 294 + * validation and additional setup of the wqe. 299 295 * 300 296 * Returns 0 on success, -EINVAL on failure 301 297 * 302 298 */ 303 - int hfi1_check_send_wqe(struct rvt_qp *qp, 304 - struct rvt_swqe *wqe) 299 + int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send) 305 300 { 306 301 struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); 307 302 struct rvt_ah *ah; 303 + struct hfi1_pportdata *ppd; 304 + struct hfi1_devdata *dd; 308 305 309 306 switch (qp->ibqp.qp_type) { 310 307 case IB_QPT_RC: 311 308 case IB_QPT_UC: 312 309 if (wqe->length > 0x80000000U) 313 310 return -EINVAL; 311 + if (wqe->length > qp->pmtu) 312 + *call_send = false; 314 313 break; 315 314 case IB_QPT_SMI: 316 - ah = ibah_to_rvtah(wqe->ud_wr.ah); 317 - if (wqe->length > (1 << ah->log_pmtu)) 315 + /* 316 + * SM packets should exclusively use VL15 and their SL is 317 + * ignored (IBTA v1.3, Section 3.5.8.2). Therefore, when ah 318 + * is created, SL is 0 in most cases and as a result some 319 + * fields (vl and pmtu) in ah may not be set correctly, 320 + * depending on the SL2SC and SC2VL tables at the time. 321 + */ 322 + ppd = ppd_from_ibp(ibp); 323 + dd = dd_from_ppd(ppd); 324 + if (wqe->length > dd->vld[15].mtu) 318 325 return -EINVAL; 319 326 break; 320 327 case IB_QPT_GSI: ··· 340 321 default: 341 322 break; 342 323 } 343 - return wqe->length <= piothreshold; 324 + return 0; 344 325 } 345 326 346 327 /** ··· 352 333 * It is only used in the post send, which doesn't hold 353 334 * the s_lock. 354 335 */ 355 - void _hfi1_schedule_send(struct rvt_qp *qp) 336 + bool _hfi1_schedule_send(struct rvt_qp *qp) 356 337 { 357 338 struct hfi1_qp_priv *priv = qp->priv; 358 339 struct hfi1_ibport *ibp = ··· 360 341 struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); 361 342 struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); 362 343 363 - iowait_schedule(&priv->s_iowait, ppd->hfi1_wq, 364 - priv->s_sde ? 365 - priv->s_sde->cpu : 366 - cpumask_first(cpumask_of_node(dd->node))); 344 + return iowait_schedule(&priv->s_iowait, ppd->hfi1_wq, 345 + priv->s_sde ? 346 + priv->s_sde->cpu : 347 + cpumask_first(cpumask_of_node(dd->node))); 367 348 } 368 349 369 350 static void qp_pio_drain(struct rvt_qp *qp) ··· 391 372 * 392 373 * This schedules qp progress and caller should hold 393 374 * the s_lock. 375 + * @return true if the first leg is scheduled; 376 + * false if the first leg is not scheduled. 394 377 */ 395 - void hfi1_schedule_send(struct rvt_qp *qp) 378 + bool hfi1_schedule_send(struct rvt_qp *qp) 396 379 { 397 380 lockdep_assert_held(&qp->s_lock); 398 - if (hfi1_send_ok(qp)) 381 + if (hfi1_send_ok(qp)) { 399 382 _hfi1_schedule_send(qp); 383 + return true; 384 + } 385 + if (qp->s_flags & HFI1_S_ANY_WAIT_IO) 386 + iowait_set_flag(&((struct hfi1_qp_priv *)qp->priv)->s_iowait, 387 + IOWAIT_PENDING_IB); 388 + return false; 389 + } 390 + 391 + static void hfi1_qp_schedule(struct rvt_qp *qp) 392 + { 393 + struct hfi1_qp_priv *priv = qp->priv; 394 + bool ret; 395 + 396 + if (iowait_flag_set(&priv->s_iowait, IOWAIT_PENDING_IB)) { 397 + ret = hfi1_schedule_send(qp); 398 + if (ret) 399 + iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_IB); 400 + } 400 401 } 401 402 402 403 void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag) ··· 427 388 if (qp->s_flags & flag) { 428 389 qp->s_flags &= ~flag; 429 390 trace_hfi1_qpwakeup(qp, flag); 430 - hfi1_schedule_send(qp); 391 + hfi1_qp_schedule(qp); 431 392 } 432 393 spin_unlock_irqrestore(&qp->s_lock, flags); 433 394 /* Notify hfi1_destroy_qp() if it is waiting. */ 434 395 rvt_put_qp(qp); 435 396 } 436 397 398 + void hfi1_qp_unbusy(struct rvt_qp *qp, struct iowait_work *wait) 399 + { 400 + if (iowait_set_work_flag(wait) == IOWAIT_IB_SE) 401 + qp->s_flags &= ~RVT_S_BUSY; 402 + } 403 + 437 404 static int iowait_sleep( 438 405 struct sdma_engine *sde, 439 - struct iowait *wait, 406 + struct iowait_work *wait, 440 407 struct sdma_txreq *stx, 441 408 uint seq, 442 409 bool pkts_sent) ··· 483 438 rvt_get_qp(qp); 484 439 } 485 440 write_sequnlock(&dev->iowait_lock); 486 - qp->s_flags &= ~RVT_S_BUSY; 441 + hfi1_qp_unbusy(qp, wait); 487 442 spin_unlock_irqrestore(&qp->s_lock, flags); 488 443 ret = -EBUSY; 489 444 } else { ··· 682 637 &priv->s_iowait, 683 638 1, 684 639 _hfi1_do_send, 640 + NULL, 685 641 iowait_sleep, 686 642 iowait_wakeup, 687 643 iowait_sdma_drained); ··· 732 686 { 733 687 struct hfi1_qp_priv *priv = qp->priv; 734 688 735 - cancel_work_sync(&priv->s_iowait.iowork); 689 + iowait_cancel_work(&priv->s_iowait); 736 690 } 737 691 738 692 void quiesce_qp(struct rvt_qp *qp)
+17 -14
drivers/infiniband/hw/hfi1/qp.h
··· 58 58 extern const struct rvt_operation_params hfi1_post_parms[]; 59 59 60 60 /* 61 - * Send if not busy or waiting for I/O and either 62 - * a RC response is pending or we can process send work requests. 63 - */ 64 - static inline int hfi1_send_ok(struct rvt_qp *qp) 65 - { 66 - return !(qp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT_IO)) && 67 - (verbs_txreq_queued(qp) || 68 - (qp->s_flags & RVT_S_RESP_PENDING) || 69 - !(qp->s_flags & RVT_S_ANY_WAIT_SEND)); 70 - } 71 - 72 - /* 73 61 * Driver specific s_flags starting at bit 31 down to HFI1_S_MIN_BIT_MASK 74 62 * 75 63 * HFI1_S_AHG_VALID - ahg header valid on chip ··· 76 88 77 89 #define HFI1_S_ANY_WAIT_IO (RVT_S_ANY_WAIT_IO | HFI1_S_WAIT_PIO_DRAIN) 78 90 #define HFI1_S_ANY_WAIT (HFI1_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND) 91 + 92 + /* 93 + * Send if not busy or waiting for I/O and either 94 + * a RC response is pending or we can process send work requests. 95 + */ 96 + static inline int hfi1_send_ok(struct rvt_qp *qp) 97 + { 98 + struct hfi1_qp_priv *priv = qp->priv; 99 + 100 + return !(qp->s_flags & (RVT_S_BUSY | HFI1_S_ANY_WAIT_IO)) && 101 + (verbs_txreq_queued(iowait_get_ib_work(&priv->s_iowait)) || 102 + (qp->s_flags & RVT_S_RESP_PENDING) || 103 + !(qp->s_flags & RVT_S_ANY_WAIT_SEND)); 104 + } 79 105 80 106 /* 81 107 * free_ahg - clear ahg from QP ··· 131 129 132 130 void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter); 133 131 134 - void _hfi1_schedule_send(struct rvt_qp *qp); 135 - void hfi1_schedule_send(struct rvt_qp *qp); 132 + bool _hfi1_schedule_send(struct rvt_qp *qp); 133 + bool hfi1_schedule_send(struct rvt_qp *qp); 136 134 137 135 void hfi1_migrate_qp(struct rvt_qp *qp); 138 136 ··· 152 150 u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu); 153 151 int mtu_to_path_mtu(u32 mtu); 154 152 void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl); 153 + void hfi1_qp_unbusy(struct rvt_qp *qp, struct iowait_work *wait); 155 154 #endif /* _QP_H */
+13 -11
drivers/infiniband/hw/hfi1/rc.c
··· 309 309 } 310 310 clear_ahg(qp); 311 311 wqe = rvt_get_swqe_ptr(qp, qp->s_last); 312 - hfi1_send_complete(qp, wqe, qp->s_last != qp->s_acked ? 312 + rvt_send_complete(qp, wqe, qp->s_last != qp->s_acked ? 313 313 IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR); 314 314 /* will get called again */ 315 315 goto done_free_tx; ··· 378 378 wqe->wr.ex.invalidate_rkey); 379 379 local_ops = 1; 380 380 } 381 - hfi1_send_complete(qp, wqe, 382 - err ? IB_WC_LOC_PROT_ERR 383 - : IB_WC_SUCCESS); 381 + rvt_send_complete(qp, wqe, 382 + err ? IB_WC_LOC_PROT_ERR 383 + : IB_WC_SUCCESS); 384 384 if (local_ops) 385 385 atomic_dec(&qp->local_ops_pending); 386 386 goto done_free_tx; ··· 1043 1043 hfi1_migrate_qp(qp); 1044 1044 qp->s_retry = qp->s_retry_cnt; 1045 1045 } else if (qp->s_last == qp->s_acked) { 1046 - hfi1_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR); 1046 + rvt_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR); 1047 1047 rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); 1048 1048 return; 1049 1049 } else { /* need to handle delayed completion */ ··· 1468 1468 ibp->rvp.n_other_naks++; 1469 1469 class_b: 1470 1470 if (qp->s_last == qp->s_acked) { 1471 - hfi1_send_complete(qp, wqe, status); 1471 + rvt_send_complete(qp, wqe, status); 1472 1472 rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); 1473 1473 } 1474 1474 break; ··· 1644 1644 qp->s_rdma_read_len -= pmtu; 1645 1645 update_last_psn(qp, psn); 1646 1646 spin_unlock_irqrestore(&qp->s_lock, flags); 1647 - hfi1_copy_sge(&qp->s_rdma_read_sge, data, pmtu, false, false); 1647 + rvt_copy_sge(qp, &qp->s_rdma_read_sge, 1648 + data, pmtu, false, false); 1648 1649 goto bail; 1649 1650 1650 1651 case OP(RDMA_READ_RESPONSE_ONLY): ··· 1685 1684 if (unlikely(tlen != qp->s_rdma_read_len)) 1686 1685 goto ack_len_err; 1687 1686 aeth = be32_to_cpu(ohdr->u.aeth); 1688 - hfi1_copy_sge(&qp->s_rdma_read_sge, data, tlen, false, false); 1687 + rvt_copy_sge(qp, &qp->s_rdma_read_sge, 1688 + data, tlen, false, false); 1689 1689 WARN_ON(qp->s_rdma_read_sge.num_sge); 1690 1690 (void)do_rc_ack(qp, aeth, psn, 1691 1691 OP(RDMA_READ_RESPONSE_LAST), 0, rcd); ··· 1706 1704 status = IB_WC_LOC_LEN_ERR; 1707 1705 ack_err: 1708 1706 if (qp->s_last == qp->s_acked) { 1709 - hfi1_send_complete(qp, wqe, status); 1707 + rvt_send_complete(qp, wqe, status); 1710 1708 rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); 1711 1709 } 1712 1710 ack_done: ··· 2146 2144 qp->r_rcv_len += pmtu; 2147 2145 if (unlikely(qp->r_rcv_len > qp->r_len)) 2148 2146 goto nack_inv; 2149 - hfi1_copy_sge(&qp->r_sge, data, pmtu, true, false); 2147 + rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false); 2150 2148 break; 2151 2149 2152 2150 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): ··· 2202 2200 wc.byte_len = tlen + qp->r_rcv_len; 2203 2201 if (unlikely(wc.byte_len > qp->r_len)) 2204 2202 goto nack_inv; 2205 - hfi1_copy_sge(&qp->r_sge, data, tlen, true, copy_last); 2203 + rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, copy_last); 2206 2204 rvt_put_ss(&qp->r_sge); 2207 2205 qp->r_msn++; 2208 2206 if (!__test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
+9 -373
drivers/infiniband/hw/hfi1/ruc.c
··· 156 156 } 157 157 158 158 /** 159 - * ruc_loopback - handle UC and RC loopback requests 160 - * @sqp: the sending QP 161 - * 162 - * This is called from hfi1_do_send() to 163 - * forward a WQE addressed to the same HFI. 164 - * Note that although we are single threaded due to the send engine, we still 165 - * have to protect against post_send(). We don't have to worry about 166 - * receive interrupts since this is a connected protocol and all packets 167 - * will pass through here. 168 - */ 169 - static void ruc_loopback(struct rvt_qp *sqp) 170 - { 171 - struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num); 172 - struct rvt_qp *qp; 173 - struct rvt_swqe *wqe; 174 - struct rvt_sge *sge; 175 - unsigned long flags; 176 - struct ib_wc wc; 177 - u64 sdata; 178 - atomic64_t *maddr; 179 - enum ib_wc_status send_status; 180 - bool release; 181 - int ret; 182 - bool copy_last = false; 183 - int local_ops = 0; 184 - 185 - rcu_read_lock(); 186 - 187 - /* 188 - * Note that we check the responder QP state after 189 - * checking the requester's state. 190 - */ 191 - qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), &ibp->rvp, 192 - sqp->remote_qpn); 193 - 194 - spin_lock_irqsave(&sqp->s_lock, flags); 195 - 196 - /* Return if we are already busy processing a work request. */ 197 - if ((sqp->s_flags & (RVT_S_BUSY | HFI1_S_ANY_WAIT)) || 198 - !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND)) 199 - goto unlock; 200 - 201 - sqp->s_flags |= RVT_S_BUSY; 202 - 203 - again: 204 - if (sqp->s_last == READ_ONCE(sqp->s_head)) 205 - goto clr_busy; 206 - wqe = rvt_get_swqe_ptr(sqp, sqp->s_last); 207 - 208 - /* Return if it is not OK to start a new work request. */ 209 - if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) { 210 - if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND)) 211 - goto clr_busy; 212 - /* We are in the error state, flush the work request. */ 213 - send_status = IB_WC_WR_FLUSH_ERR; 214 - goto flush_send; 215 - } 216 - 217 - /* 218 - * We can rely on the entry not changing without the s_lock 219 - * being held until we update s_last. 220 - * We increment s_cur to indicate s_last is in progress. 221 - */ 222 - if (sqp->s_last == sqp->s_cur) { 223 - if (++sqp->s_cur >= sqp->s_size) 224 - sqp->s_cur = 0; 225 - } 226 - spin_unlock_irqrestore(&sqp->s_lock, flags); 227 - 228 - if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) || 229 - qp->ibqp.qp_type != sqp->ibqp.qp_type) { 230 - ibp->rvp.n_pkt_drops++; 231 - /* 232 - * For RC, the requester would timeout and retry so 233 - * shortcut the timeouts and just signal too many retries. 234 - */ 235 - if (sqp->ibqp.qp_type == IB_QPT_RC) 236 - send_status = IB_WC_RETRY_EXC_ERR; 237 - else 238 - send_status = IB_WC_SUCCESS; 239 - goto serr; 240 - } 241 - 242 - memset(&wc, 0, sizeof(wc)); 243 - send_status = IB_WC_SUCCESS; 244 - 245 - release = true; 246 - sqp->s_sge.sge = wqe->sg_list[0]; 247 - sqp->s_sge.sg_list = wqe->sg_list + 1; 248 - sqp->s_sge.num_sge = wqe->wr.num_sge; 249 - sqp->s_len = wqe->length; 250 - switch (wqe->wr.opcode) { 251 - case IB_WR_REG_MR: 252 - goto send_comp; 253 - 254 - case IB_WR_LOCAL_INV: 255 - if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) { 256 - if (rvt_invalidate_rkey(sqp, 257 - wqe->wr.ex.invalidate_rkey)) 258 - send_status = IB_WC_LOC_PROT_ERR; 259 - local_ops = 1; 260 - } 261 - goto send_comp; 262 - 263 - case IB_WR_SEND_WITH_INV: 264 - if (!rvt_invalidate_rkey(qp, wqe->wr.ex.invalidate_rkey)) { 265 - wc.wc_flags = IB_WC_WITH_INVALIDATE; 266 - wc.ex.invalidate_rkey = wqe->wr.ex.invalidate_rkey; 267 - } 268 - goto send; 269 - 270 - case IB_WR_SEND_WITH_IMM: 271 - wc.wc_flags = IB_WC_WITH_IMM; 272 - wc.ex.imm_data = wqe->wr.ex.imm_data; 273 - /* FALLTHROUGH */ 274 - case IB_WR_SEND: 275 - send: 276 - ret = rvt_get_rwqe(qp, false); 277 - if (ret < 0) 278 - goto op_err; 279 - if (!ret) 280 - goto rnr_nak; 281 - break; 282 - 283 - case IB_WR_RDMA_WRITE_WITH_IMM: 284 - if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) 285 - goto inv_err; 286 - wc.wc_flags = IB_WC_WITH_IMM; 287 - wc.ex.imm_data = wqe->wr.ex.imm_data; 288 - ret = rvt_get_rwqe(qp, true); 289 - if (ret < 0) 290 - goto op_err; 291 - if (!ret) 292 - goto rnr_nak; 293 - /* skip copy_last set and qp_access_flags recheck */ 294 - goto do_write; 295 - case IB_WR_RDMA_WRITE: 296 - copy_last = rvt_is_user_qp(qp); 297 - if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) 298 - goto inv_err; 299 - do_write: 300 - if (wqe->length == 0) 301 - break; 302 - if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length, 303 - wqe->rdma_wr.remote_addr, 304 - wqe->rdma_wr.rkey, 305 - IB_ACCESS_REMOTE_WRITE))) 306 - goto acc_err; 307 - qp->r_sge.sg_list = NULL; 308 - qp->r_sge.num_sge = 1; 309 - qp->r_sge.total_len = wqe->length; 310 - break; 311 - 312 - case IB_WR_RDMA_READ: 313 - if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) 314 - goto inv_err; 315 - if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length, 316 - wqe->rdma_wr.remote_addr, 317 - wqe->rdma_wr.rkey, 318 - IB_ACCESS_REMOTE_READ))) 319 - goto acc_err; 320 - release = false; 321 - sqp->s_sge.sg_list = NULL; 322 - sqp->s_sge.num_sge = 1; 323 - qp->r_sge.sge = wqe->sg_list[0]; 324 - qp->r_sge.sg_list = wqe->sg_list + 1; 325 - qp->r_sge.num_sge = wqe->wr.num_sge; 326 - qp->r_sge.total_len = wqe->length; 327 - break; 328 - 329 - case IB_WR_ATOMIC_CMP_AND_SWP: 330 - case IB_WR_ATOMIC_FETCH_AND_ADD: 331 - if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) 332 - goto inv_err; 333 - if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64), 334 - wqe->atomic_wr.remote_addr, 335 - wqe->atomic_wr.rkey, 336 - IB_ACCESS_REMOTE_ATOMIC))) 337 - goto acc_err; 338 - /* Perform atomic OP and save result. */ 339 - maddr = (atomic64_t *)qp->r_sge.sge.vaddr; 340 - sdata = wqe->atomic_wr.compare_add; 341 - *(u64 *)sqp->s_sge.sge.vaddr = 342 - (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ? 343 - (u64)atomic64_add_return(sdata, maddr) - sdata : 344 - (u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr, 345 - sdata, wqe->atomic_wr.swap); 346 - rvt_put_mr(qp->r_sge.sge.mr); 347 - qp->r_sge.num_sge = 0; 348 - goto send_comp; 349 - 350 - default: 351 - send_status = IB_WC_LOC_QP_OP_ERR; 352 - goto serr; 353 - } 354 - 355 - sge = &sqp->s_sge.sge; 356 - while (sqp->s_len) { 357 - u32 len = sqp->s_len; 358 - 359 - if (len > sge->length) 360 - len = sge->length; 361 - if (len > sge->sge_length) 362 - len = sge->sge_length; 363 - WARN_ON_ONCE(len == 0); 364 - hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, release, copy_last); 365 - sge->vaddr += len; 366 - sge->length -= len; 367 - sge->sge_length -= len; 368 - if (sge->sge_length == 0) { 369 - if (!release) 370 - rvt_put_mr(sge->mr); 371 - if (--sqp->s_sge.num_sge) 372 - *sge = *sqp->s_sge.sg_list++; 373 - } else if (sge->length == 0 && sge->mr->lkey) { 374 - if (++sge->n >= RVT_SEGSZ) { 375 - if (++sge->m >= sge->mr->mapsz) 376 - break; 377 - sge->n = 0; 378 - } 379 - sge->vaddr = 380 - sge->mr->map[sge->m]->segs[sge->n].vaddr; 381 - sge->length = 382 - sge->mr->map[sge->m]->segs[sge->n].length; 383 - } 384 - sqp->s_len -= len; 385 - } 386 - if (release) 387 - rvt_put_ss(&qp->r_sge); 388 - 389 - if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) 390 - goto send_comp; 391 - 392 - if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM) 393 - wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; 394 - else 395 - wc.opcode = IB_WC_RECV; 396 - wc.wr_id = qp->r_wr_id; 397 - wc.status = IB_WC_SUCCESS; 398 - wc.byte_len = wqe->length; 399 - wc.qp = &qp->ibqp; 400 - wc.src_qp = qp->remote_qpn; 401 - wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr) & U16_MAX; 402 - wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr); 403 - wc.port_num = 1; 404 - /* Signal completion event if the solicited bit is set. */ 405 - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 406 - wqe->wr.send_flags & IB_SEND_SOLICITED); 407 - 408 - send_comp: 409 - spin_lock_irqsave(&sqp->s_lock, flags); 410 - ibp->rvp.n_loop_pkts++; 411 - flush_send: 412 - sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; 413 - hfi1_send_complete(sqp, wqe, send_status); 414 - if (local_ops) { 415 - atomic_dec(&sqp->local_ops_pending); 416 - local_ops = 0; 417 - } 418 - goto again; 419 - 420 - rnr_nak: 421 - /* Handle RNR NAK */ 422 - if (qp->ibqp.qp_type == IB_QPT_UC) 423 - goto send_comp; 424 - ibp->rvp.n_rnr_naks++; 425 - /* 426 - * Note: we don't need the s_lock held since the BUSY flag 427 - * makes this single threaded. 428 - */ 429 - if (sqp->s_rnr_retry == 0) { 430 - send_status = IB_WC_RNR_RETRY_EXC_ERR; 431 - goto serr; 432 - } 433 - if (sqp->s_rnr_retry_cnt < 7) 434 - sqp->s_rnr_retry--; 435 - spin_lock_irqsave(&sqp->s_lock, flags); 436 - if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK)) 437 - goto clr_busy; 438 - rvt_add_rnr_timer(sqp, qp->r_min_rnr_timer << 439 - IB_AETH_CREDIT_SHIFT); 440 - goto clr_busy; 441 - 442 - op_err: 443 - send_status = IB_WC_REM_OP_ERR; 444 - wc.status = IB_WC_LOC_QP_OP_ERR; 445 - goto err; 446 - 447 - inv_err: 448 - send_status = IB_WC_REM_INV_REQ_ERR; 449 - wc.status = IB_WC_LOC_QP_OP_ERR; 450 - goto err; 451 - 452 - acc_err: 453 - send_status = IB_WC_REM_ACCESS_ERR; 454 - wc.status = IB_WC_LOC_PROT_ERR; 455 - err: 456 - /* responder goes to error state */ 457 - rvt_rc_error(qp, wc.status); 458 - 459 - serr: 460 - spin_lock_irqsave(&sqp->s_lock, flags); 461 - hfi1_send_complete(sqp, wqe, send_status); 462 - if (sqp->ibqp.qp_type == IB_QPT_RC) { 463 - int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR); 464 - 465 - sqp->s_flags &= ~RVT_S_BUSY; 466 - spin_unlock_irqrestore(&sqp->s_lock, flags); 467 - if (lastwqe) { 468 - struct ib_event ev; 469 - 470 - ev.device = sqp->ibqp.device; 471 - ev.element.qp = &sqp->ibqp; 472 - ev.event = IB_EVENT_QP_LAST_WQE_REACHED; 473 - sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context); 474 - } 475 - goto done; 476 - } 477 - clr_busy: 478 - sqp->s_flags &= ~RVT_S_BUSY; 479 - unlock: 480 - spin_unlock_irqrestore(&sqp->s_lock, flags); 481 - done: 482 - rcu_read_unlock(); 483 - } 484 - 485 - /** 486 159 * hfi1_make_grh - construct a GRH header 487 160 * @ibp: a pointer to the IB port 488 161 * @hdr: a pointer to the GRH header being constructed ··· 498 825 499 826 void _hfi1_do_send(struct work_struct *work) 500 827 { 501 - struct iowait *wait = container_of(work, struct iowait, iowork); 502 - struct rvt_qp *qp = iowait_to_qp(wait); 828 + struct iowait_work *w = container_of(work, struct iowait_work, iowork); 829 + struct rvt_qp *qp = iowait_to_qp(w->iow); 503 830 504 831 hfi1_do_send(qp, true); 505 832 } ··· 523 850 ps.ibp = to_iport(qp->ibqp.device, qp->port_num); 524 851 ps.ppd = ppd_from_ibp(ps.ibp); 525 852 ps.in_thread = in_thread; 853 + ps.wait = iowait_get_ib_work(&priv->s_iowait); 526 854 527 855 trace_hfi1_rc_do_send(qp, in_thread); 528 856 ··· 532 858 if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) & 533 859 ~((1 << ps.ppd->lmc) - 1)) == 534 860 ps.ppd->lid)) { 535 - ruc_loopback(qp); 861 + rvt_ruc_loopback(qp); 536 862 return; 537 863 } 538 864 make_req = hfi1_make_rc_req; ··· 542 868 if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) & 543 869 ~((1 << ps.ppd->lmc) - 1)) == 544 870 ps.ppd->lid)) { 545 - ruc_loopback(qp); 871 + rvt_ruc_loopback(qp); 546 872 return; 547 873 } 548 874 make_req = hfi1_make_uc_req; ··· 557 883 558 884 /* Return if we are already busy processing a work request. */ 559 885 if (!hfi1_send_ok(qp)) { 886 + if (qp->s_flags & HFI1_S_ANY_WAIT_IO) 887 + iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_IB); 560 888 spin_unlock_irqrestore(&qp->s_lock, ps.flags); 561 889 return; 562 890 } ··· 572 896 ps.pkts_sent = false; 573 897 574 898 /* insure a pre-built packet is handled */ 575 - ps.s_txreq = get_waiting_verbs_txreq(qp); 899 + ps.s_txreq = get_waiting_verbs_txreq(ps.wait); 576 900 do { 577 901 /* Check for a constructed packet to be sent. */ 578 902 if (ps.s_txreq) { ··· 583 907 */ 584 908 if (hfi1_verbs_send(qp, &ps)) 585 909 return; 910 + 586 911 /* allow other tasks to run */ 587 912 if (schedule_send_yield(qp, &ps)) 588 913 return; ··· 593 916 } while (make_req(qp, &ps)); 594 917 iowait_starve_clear(ps.pkts_sent, &priv->s_iowait); 595 918 spin_unlock_irqrestore(&qp->s_lock, ps.flags); 596 - } 597 - 598 - /* 599 - * This should be called with s_lock held. 600 - */ 601 - void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, 602 - enum ib_wc_status status) 603 - { 604 - u32 old_last, last; 605 - 606 - if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND)) 607 - return; 608 - 609 - last = qp->s_last; 610 - old_last = last; 611 - trace_hfi1_qp_send_completion(qp, wqe, last); 612 - if (++last >= qp->s_size) 613 - last = 0; 614 - trace_hfi1_qp_send_completion(qp, wqe, last); 615 - qp->s_last = last; 616 - /* See post_send() */ 617 - barrier(); 618 - rvt_put_swqe(wqe); 619 - if (qp->ibqp.qp_type == IB_QPT_UD || 620 - qp->ibqp.qp_type == IB_QPT_SMI || 621 - qp->ibqp.qp_type == IB_QPT_GSI) 622 - atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount); 623 - 624 - rvt_qp_swqe_complete(qp, 625 - wqe, 626 - ib_hfi1_wc_opcode[wqe->wr.opcode], 627 - status); 628 - 629 - if (qp->s_acked == old_last) 630 - qp->s_acked = last; 631 - if (qp->s_cur == old_last) 632 - qp->s_cur = last; 633 - if (qp->s_tail == old_last) 634 - qp->s_tail = last; 635 - if (qp->state == IB_QPS_SQD && last == qp->s_cur) 636 - qp->s_draining = 0; 637 919 }
+22 -34
drivers/infiniband/hw/hfi1/sdma.c
··· 378 378 __sdma_txclean(sde->dd, tx); 379 379 if (complete) 380 380 (*complete)(tx, res); 381 - if (wait && iowait_sdma_dec(wait)) 381 + if (iowait_sdma_dec(wait)) 382 382 iowait_drain_wakeup(wait); 383 383 } 384 384 ··· 1758 1758 struct iowait *wait, *nw; 1759 1759 struct iowait *waits[SDMA_WAIT_BATCH_SIZE]; 1760 1760 uint i, n = 0, seq, max_idx = 0; 1761 - struct sdma_txreq *stx; 1762 1761 struct hfi1_ibdev *dev = &sde->dd->verbs_dev; 1763 1762 u8 max_starved_cnt = 0; 1764 1763 ··· 1778 1779 nw, 1779 1780 &sde->dmawait, 1780 1781 list) { 1781 - u16 num_desc = 0; 1782 + u32 num_desc; 1782 1783 1783 1784 if (!wait->wakeup) 1784 1785 continue; 1785 1786 if (n == ARRAY_SIZE(waits)) 1786 1787 break; 1787 - if (!list_empty(&wait->tx_head)) { 1788 - stx = list_first_entry( 1789 - &wait->tx_head, 1790 - struct sdma_txreq, 1791 - list); 1792 - num_desc = stx->num_desc; 1793 - } 1788 + num_desc = iowait_get_all_desc(wait); 1794 1789 if (num_desc > avail) 1795 1790 break; 1796 1791 avail -= num_desc; ··· 2339 2346 */ 2340 2347 static int sdma_check_progress( 2341 2348 struct sdma_engine *sde, 2342 - struct iowait *wait, 2349 + struct iowait_work *wait, 2343 2350 struct sdma_txreq *tx, 2344 2351 bool pkts_sent) 2345 2352 { ··· 2349 2356 if (tx->num_desc <= sde->desc_avail) 2350 2357 return -EAGAIN; 2351 2358 /* pulse the head_lock */ 2352 - if (wait && wait->sleep) { 2359 + if (wait && iowait_ioww_to_iow(wait)->sleep) { 2353 2360 unsigned seq; 2354 2361 2355 2362 seq = raw_seqcount_begin( 2356 2363 (const seqcount_t *)&sde->head_lock.seqcount); 2357 - ret = wait->sleep(sde, wait, tx, seq, pkts_sent); 2364 + ret = wait->iow->sleep(sde, wait, tx, seq, pkts_sent); 2358 2365 if (ret == -EAGAIN) 2359 2366 sde->desc_avail = sdma_descq_freecnt(sde); 2360 2367 } else { ··· 2366 2373 /** 2367 2374 * sdma_send_txreq() - submit a tx req to ring 2368 2375 * @sde: sdma engine to use 2369 - * @wait: wait structure to use when full (may be NULL) 2376 + * @wait: SE wait structure to use when full (may be NULL) 2370 2377 * @tx: sdma_txreq to submit 2371 2378 * @pkts_sent: has any packet been sent yet? 2372 2379 * ··· 2379 2386 * -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state 2380 2387 */ 2381 2388 int sdma_send_txreq(struct sdma_engine *sde, 2382 - struct iowait *wait, 2389 + struct iowait_work *wait, 2383 2390 struct sdma_txreq *tx, 2384 2391 bool pkts_sent) 2385 2392 { ··· 2390 2397 /* user should have supplied entire packet */ 2391 2398 if (unlikely(tx->tlen)) 2392 2399 return -EINVAL; 2393 - tx->wait = wait; 2400 + tx->wait = iowait_ioww_to_iow(wait); 2394 2401 spin_lock_irqsave(&sde->tail_lock, flags); 2395 2402 retry: 2396 2403 if (unlikely(!__sdma_running(sde))) ··· 2399 2406 goto nodesc; 2400 2407 tail = submit_tx(sde, tx); 2401 2408 if (wait) 2402 - iowait_sdma_inc(wait); 2409 + iowait_sdma_inc(iowait_ioww_to_iow(wait)); 2403 2410 sdma_update_tail(sde, tail); 2404 2411 unlock: 2405 2412 spin_unlock_irqrestore(&sde->tail_lock, flags); 2406 2413 return ret; 2407 2414 unlock_noconn: 2408 2415 if (wait) 2409 - iowait_sdma_inc(wait); 2416 + iowait_sdma_inc(iowait_ioww_to_iow(wait)); 2410 2417 tx->next_descq_idx = 0; 2411 2418 #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER 2412 2419 tx->sn = sde->tail_sn++; ··· 2415 2422 spin_lock(&sde->flushlist_lock); 2416 2423 list_add_tail(&tx->list, &sde->flushlist); 2417 2424 spin_unlock(&sde->flushlist_lock); 2418 - if (wait) { 2419 - wait->tx_count++; 2420 - wait->count += tx->num_desc; 2421 - } 2425 + iowait_inc_wait_count(wait, tx->num_desc); 2422 2426 schedule_work(&sde->flush_worker); 2423 2427 ret = -ECOMM; 2424 2428 goto unlock; ··· 2432 2442 /** 2433 2443 * sdma_send_txlist() - submit a list of tx req to ring 2434 2444 * @sde: sdma engine to use 2435 - * @wait: wait structure to use when full (may be NULL) 2445 + * @wait: SE wait structure to use when full (may be NULL) 2436 2446 * @tx_list: list of sdma_txreqs to submit 2437 - * @count: pointer to a u32 which, after return will contain the total number of 2447 + * @count: pointer to a u16 which, after return will contain the total number of 2438 2448 * sdma_txreqs removed from the tx_list. This will include sdma_txreqs 2439 2449 * whose SDMA descriptors are submitted to the ring and the sdma_txreqs 2440 2450 * which are added to SDMA engine flush list if the SDMA engine state is ··· 2457 2467 * -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring (wait == NULL) 2458 2468 * -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state 2459 2469 */ 2460 - int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait, 2461 - struct list_head *tx_list, u32 *count_out) 2470 + int sdma_send_txlist(struct sdma_engine *sde, struct iowait_work *wait, 2471 + struct list_head *tx_list, u16 *count_out) 2462 2472 { 2463 2473 struct sdma_txreq *tx, *tx_next; 2464 2474 int ret = 0; ··· 2469 2479 spin_lock_irqsave(&sde->tail_lock, flags); 2470 2480 retry: 2471 2481 list_for_each_entry_safe(tx, tx_next, tx_list, list) { 2472 - tx->wait = wait; 2482 + tx->wait = iowait_ioww_to_iow(wait); 2473 2483 if (unlikely(!__sdma_running(sde))) 2474 2484 goto unlock_noconn; 2475 2485 if (unlikely(tx->num_desc > sde->desc_avail)) ··· 2490 2500 update_tail: 2491 2501 total_count = submit_count + flush_count; 2492 2502 if (wait) { 2493 - iowait_sdma_add(wait, total_count); 2494 - iowait_starve_clear(submit_count > 0, wait); 2503 + iowait_sdma_add(iowait_ioww_to_iow(wait), total_count); 2504 + iowait_starve_clear(submit_count > 0, 2505 + iowait_ioww_to_iow(wait)); 2495 2506 } 2496 2507 if (tail != INVALID_TAIL) 2497 2508 sdma_update_tail(sde, tail); ··· 2502 2511 unlock_noconn: 2503 2512 spin_lock(&sde->flushlist_lock); 2504 2513 list_for_each_entry_safe(tx, tx_next, tx_list, list) { 2505 - tx->wait = wait; 2514 + tx->wait = iowait_ioww_to_iow(wait); 2506 2515 list_del_init(&tx->list); 2507 2516 tx->next_descq_idx = 0; 2508 2517 #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER ··· 2511 2520 #endif 2512 2521 list_add_tail(&tx->list, &sde->flushlist); 2513 2522 flush_count++; 2514 - if (wait) { 2515 - wait->tx_count++; 2516 - wait->count += tx->num_desc; 2517 - } 2523 + iowait_inc_wait_count(wait, tx->num_desc); 2518 2524 } 2519 2525 spin_unlock(&sde->flushlist_lock); 2520 2526 schedule_work(&sde->flush_worker);
+6 -15
drivers/infiniband/hw/hfi1/sdma.h
··· 1 1 #ifndef _HFI1_SDMA_H 2 2 #define _HFI1_SDMA_H 3 3 /* 4 - * Copyright(c) 2015, 2016 Intel Corporation. 4 + * Copyright(c) 2015 - 2018 Intel Corporation. 5 5 * 6 6 * This file is provided under a dual BSD/GPLv2 license. When using or 7 7 * redistributing this file, you may do so under either license. ··· 61 61 #define MAX_DESC 64 62 62 /* Hardware limit for SDMA packet size */ 63 63 #define MAX_SDMA_PKT_SIZE ((16 * 1024) - 1) 64 - 65 - #define SDMA_TXREQ_S_OK 0 66 - #define SDMA_TXREQ_S_SENDERROR 1 67 - #define SDMA_TXREQ_S_ABORTED 2 68 - #define SDMA_TXREQ_S_SHUTDOWN 3 69 - 70 - /* flags bits */ 71 - #define SDMA_TXREQ_F_URGENT 0x0001 72 - #define SDMA_TXREQ_F_AHG_COPY 0x0002 73 - #define SDMA_TXREQ_F_USE_AHG 0x0004 74 64 75 65 #define SDMA_MAP_NONE 0 76 66 #define SDMA_MAP_SINGLE 1 ··· 405 415 struct list_head flushlist; 406 416 struct cpumask cpu_mask; 407 417 struct kobject kobj; 418 + u32 msix_intr; 408 419 }; 409 420 410 421 int sdma_init(struct hfi1_devdata *dd, u8 port); ··· 840 849 dd, SDMA_MAP_SINGLE, tx, addr, len); 841 850 } 842 851 843 - struct iowait; 852 + struct iowait_work; 844 853 845 854 int sdma_send_txreq(struct sdma_engine *sde, 846 - struct iowait *wait, 855 + struct iowait_work *wait, 847 856 struct sdma_txreq *tx, 848 857 bool pkts_sent); 849 858 int sdma_send_txlist(struct sdma_engine *sde, 850 - struct iowait *wait, 859 + struct iowait_work *wait, 851 860 struct list_head *tx_list, 852 - u32 *count); 861 + u16 *count_out); 853 862 854 863 int sdma_ahg_alloc(struct sdma_engine *sde); 855 864 void sdma_ahg_free(struct sdma_engine *sde, int ahg_index);
+31 -36
drivers/infiniband/hw/hfi1/sysfs.c
··· 494 494 * Start of per-unit (or driver, in some cases, but replicated 495 495 * per unit) functions (these get a device *) 496 496 */ 497 - static ssize_t show_rev(struct device *device, struct device_attribute *attr, 498 - char *buf) 497 + static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, 498 + char *buf) 499 499 { 500 500 struct hfi1_ibdev *dev = 501 501 container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); 502 502 503 503 return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev); 504 504 } 505 + static DEVICE_ATTR_RO(hw_rev); 505 506 506 - static ssize_t show_hfi(struct device *device, struct device_attribute *attr, 507 - char *buf) 507 + static ssize_t board_id_show(struct device *device, 508 + struct device_attribute *attr, char *buf) 508 509 { 509 510 struct hfi1_ibdev *dev = 510 511 container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); ··· 518 517 ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname); 519 518 return ret; 520 519 } 520 + static DEVICE_ATTR_RO(board_id); 521 521 522 - static ssize_t show_boardversion(struct device *device, 522 + static ssize_t boardversion_show(struct device *device, 523 523 struct device_attribute *attr, char *buf) 524 524 { 525 525 struct hfi1_ibdev *dev = ··· 530 528 /* The string printed here is already newline-terminated. */ 531 529 return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion); 532 530 } 531 + static DEVICE_ATTR_RO(boardversion); 533 532 534 - static ssize_t show_nctxts(struct device *device, 533 + static ssize_t nctxts_show(struct device *device, 535 534 struct device_attribute *attr, char *buf) 536 535 { 537 536 struct hfi1_ibdev *dev = ··· 549 546 min(dd->num_user_contexts, 550 547 (u32)dd->sc_sizes[SC_USER].count)); 551 548 } 549 + static DEVICE_ATTR_RO(nctxts); 552 550 553 - static ssize_t show_nfreectxts(struct device *device, 551 + static ssize_t nfreectxts_show(struct device *device, 554 552 struct device_attribute *attr, char *buf) 555 553 { 556 554 struct hfi1_ibdev *dev = ··· 561 557 /* Return the number of free user ports (contexts) available. */ 562 558 return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts); 563 559 } 560 + static DEVICE_ATTR_RO(nfreectxts); 564 561 565 - static ssize_t show_serial(struct device *device, 562 + static ssize_t serial_show(struct device *device, 566 563 struct device_attribute *attr, char *buf) 567 564 { 568 565 struct hfi1_ibdev *dev = ··· 572 567 573 568 return scnprintf(buf, PAGE_SIZE, "%s", dd->serial); 574 569 } 570 + static DEVICE_ATTR_RO(serial); 575 571 576 - static ssize_t store_chip_reset(struct device *device, 572 + static ssize_t chip_reset_store(struct device *device, 577 573 struct device_attribute *attr, const char *buf, 578 574 size_t count) 579 575 { ··· 592 586 bail: 593 587 return ret < 0 ? ret : count; 594 588 } 589 + static DEVICE_ATTR_WO(chip_reset); 595 590 596 591 /* 597 592 * Convert the reported temperature from an integer (reported in ··· 605 598 /* 606 599 * Dump tempsense values, in decimal, to ease shell-scripts. 607 600 */ 608 - static ssize_t show_tempsense(struct device *device, 601 + static ssize_t tempsense_show(struct device *device, 609 602 struct device_attribute *attr, char *buf) 610 603 { 611 604 struct hfi1_ibdev *dev = ··· 629 622 } 630 623 return ret; 631 624 } 625 + static DEVICE_ATTR_RO(tempsense); 632 626 633 627 /* 634 628 * end of per-unit (or driver, in some cases, but replicated ··· 637 629 */ 638 630 639 631 /* start of per-unit file structures and support code */ 640 - static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 641 - static DEVICE_ATTR(board_id, S_IRUGO, show_hfi, NULL); 642 - static DEVICE_ATTR(nctxts, S_IRUGO, show_nctxts, NULL); 643 - static DEVICE_ATTR(nfreectxts, S_IRUGO, show_nfreectxts, NULL); 644 - static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL); 645 - static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL); 646 - static DEVICE_ATTR(tempsense, S_IRUGO, show_tempsense, NULL); 647 - static DEVICE_ATTR(chip_reset, S_IWUSR, NULL, store_chip_reset); 632 + static struct attribute *hfi1_attributes[] = { 633 + &dev_attr_hw_rev.attr, 634 + &dev_attr_board_id.attr, 635 + &dev_attr_nctxts.attr, 636 + &dev_attr_nfreectxts.attr, 637 + &dev_attr_serial.attr, 638 + &dev_attr_boardversion.attr, 639 + &dev_attr_tempsense.attr, 640 + &dev_attr_chip_reset.attr, 641 + NULL, 642 + }; 648 643 649 - static struct device_attribute *hfi1_attributes[] = { 650 - &dev_attr_hw_rev, 651 - &dev_attr_board_id, 652 - &dev_attr_nctxts, 653 - &dev_attr_nfreectxts, 654 - &dev_attr_serial, 655 - &dev_attr_boardversion, 656 - &dev_attr_tempsense, 657 - &dev_attr_chip_reset, 644 + const struct attribute_group ib_hfi1_attr_group = { 645 + .attrs = hfi1_attributes, 658 646 }; 659 647 660 648 int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, ··· 836 832 struct device *class_dev = &dev->dev; 837 833 int i, j, ret; 838 834 839 - for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i) { 840 - ret = device_create_file(&dev->dev, hfi1_attributes[i]); 841 - if (ret) 842 - goto bail; 843 - } 844 - 845 835 for (i = 0; i < dd->num_sdma; i++) { 846 836 ret = kobject_init_and_add(&dd->per_sdma[i].kobj, 847 837 &sde_ktype, &class_dev->kobj, ··· 853 855 854 856 return 0; 855 857 bail: 856 - for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i) 857 - device_remove_file(&dev->dev, hfi1_attributes[i]); 858 - 859 858 for (i = 0; i < dd->num_sdma; i++) 860 859 kobject_del(&dd->per_sdma[i].kobj); 861 860
+2 -1
drivers/infiniband/hw/hfi1/trace.h
··· 1 1 /* 2 - * Copyright(c) 2015 - 2017 Intel Corporation. 2 + * Copyright(c) 2015 - 2018 Intel Corporation. 3 3 * 4 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 5 * redistributing this file, you may do so under either license. ··· 62 62 #include "trace_rx.h" 63 63 #include "trace_tx.h" 64 64 #include "trace_mmu.h" 65 + #include "trace_iowait.h"
+54
drivers/infiniband/hw/hfi1/trace_iowait.h
··· 1 + /* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ 2 + /* 3 + * Copyright(c) 2018 Intel Corporation. 4 + * 5 + */ 6 + #if !defined(__HFI1_TRACE_IOWAIT_H) || defined(TRACE_HEADER_MULTI_READ) 7 + #define __HFI1_TRACE_IOWAIT_H 8 + 9 + #include <linux/tracepoint.h> 10 + #include "iowait.h" 11 + #include "verbs.h" 12 + 13 + #undef TRACE_SYSTEM 14 + #define TRACE_SYSTEM hfi1_iowait 15 + 16 + DECLARE_EVENT_CLASS(hfi1_iowait_template, 17 + TP_PROTO(struct iowait *wait, u32 flag), 18 + TP_ARGS(wait, flag), 19 + TP_STRUCT__entry(/* entry */ 20 + __field(unsigned long, addr) 21 + __field(unsigned long, flags) 22 + __field(u32, flag) 23 + __field(u32, qpn) 24 + ), 25 + TP_fast_assign(/* assign */ 26 + __entry->addr = (unsigned long)wait; 27 + __entry->flags = wait->flags; 28 + __entry->flag = (1 << flag); 29 + __entry->qpn = iowait_to_qp(wait)->ibqp.qp_num; 30 + ), 31 + TP_printk(/* print */ 32 + "iowait 0x%lx qp %u flags 0x%lx flag 0x%x", 33 + __entry->addr, 34 + __entry->qpn, 35 + __entry->flags, 36 + __entry->flag 37 + ) 38 + ); 39 + 40 + DEFINE_EVENT(hfi1_iowait_template, hfi1_iowait_set, 41 + TP_PROTO(struct iowait *wait, u32 flag), 42 + TP_ARGS(wait, flag)); 43 + 44 + DEFINE_EVENT(hfi1_iowait_template, hfi1_iowait_clear, 45 + TP_PROTO(struct iowait *wait, u32 flag), 46 + TP_ARGS(wait, flag)); 47 + 48 + #endif /* __HFI1_TRACE_IOWAIT_H */ 49 + 50 + #undef TRACE_INCLUDE_PATH 51 + #undef TRACE_INCLUDE_FILE 52 + #define TRACE_INCLUDE_PATH . 53 + #define TRACE_INCLUDE_FILE trace_iowait 54 + #include <trace/define_trace.h>
+7 -7
drivers/infiniband/hw/hfi1/uc.c
··· 88 88 } 89 89 clear_ahg(qp); 90 90 wqe = rvt_get_swqe_ptr(qp, qp->s_last); 91 - hfi1_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); 91 + rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); 92 92 goto done_free_tx; 93 93 } 94 94 ··· 140 140 qp, wqe->wr.ex.invalidate_rkey); 141 141 local_ops = 1; 142 142 } 143 - hfi1_send_complete(qp, wqe, err ? IB_WC_LOC_PROT_ERR 143 + rvt_send_complete(qp, wqe, err ? IB_WC_LOC_PROT_ERR 144 144 : IB_WC_SUCCESS); 145 145 if (local_ops) 146 146 atomic_dec(&qp->local_ops_pending); ··· 426 426 qp->r_rcv_len += pmtu; 427 427 if (unlikely(qp->r_rcv_len > qp->r_len)) 428 428 goto rewind; 429 - hfi1_copy_sge(&qp->r_sge, data, pmtu, false, false); 429 + rvt_copy_sge(qp, &qp->r_sge, data, pmtu, false, false); 430 430 break; 431 431 432 432 case OP(SEND_LAST_WITH_IMMEDIATE): ··· 449 449 if (unlikely(wc.byte_len > qp->r_len)) 450 450 goto rewind; 451 451 wc.opcode = IB_WC_RECV; 452 - hfi1_copy_sge(&qp->r_sge, data, tlen, false, false); 452 + rvt_copy_sge(qp, &qp->r_sge, data, tlen, false, false); 453 453 rvt_put_ss(&qp->s_rdma_read_sge); 454 454 last_imm: 455 455 wc.wr_id = qp->r_wr_id; ··· 523 523 qp->r_rcv_len += pmtu; 524 524 if (unlikely(qp->r_rcv_len > qp->r_len)) 525 525 goto drop; 526 - hfi1_copy_sge(&qp->r_sge, data, pmtu, true, false); 526 + rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false); 527 527 break; 528 528 529 529 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): ··· 550 550 } 551 551 wc.byte_len = qp->r_len; 552 552 wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; 553 - hfi1_copy_sge(&qp->r_sge, data, tlen, true, false); 553 + rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false); 554 554 rvt_put_ss(&qp->r_sge); 555 555 goto last_imm; 556 556 ··· 564 564 tlen -= (hdrsize + extra_bytes); 565 565 if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) 566 566 goto drop; 567 - hfi1_copy_sge(&qp->r_sge, data, tlen, true, false); 567 + rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false); 568 568 rvt_put_ss(&qp->r_sge); 569 569 break; 570 570
+11 -11
drivers/infiniband/hw/hfi1/ud.c
··· 210 210 } 211 211 212 212 hfi1_make_grh(ibp, &grh, &grd, 0, 0); 213 - hfi1_copy_sge(&qp->r_sge, &grh, 214 - sizeof(grh), true, false); 213 + rvt_copy_sge(qp, &qp->r_sge, &grh, 214 + sizeof(grh), true, false); 215 215 wc.wc_flags |= IB_WC_GRH; 216 216 } else { 217 217 rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true); ··· 228 228 if (len > sge->sge_length) 229 229 len = sge->sge_length; 230 230 WARN_ON_ONCE(len == 0); 231 - hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, true, false); 231 + rvt_copy_sge(qp, &qp->r_sge, sge->vaddr, len, true, false); 232 232 sge->vaddr += len; 233 233 sge->length -= len; 234 234 sge->sge_length -= len; ··· 518 518 goto bail; 519 519 } 520 520 wqe = rvt_get_swqe_ptr(qp, qp->s_last); 521 - hfi1_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); 521 + rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); 522 522 goto done_free_tx; 523 523 } 524 524 ··· 560 560 ud_loopback(qp, wqe); 561 561 spin_lock_irqsave(&qp->s_lock, tflags); 562 562 ps->flags = tflags; 563 - hfi1_send_complete(qp, wqe, IB_WC_SUCCESS); 563 + rvt_send_complete(qp, wqe, IB_WC_SUCCESS); 564 564 goto done_free_tx; 565 565 } 566 566 } ··· 1019 1019 goto drop; 1020 1020 } 1021 1021 if (packet->grh) { 1022 - hfi1_copy_sge(&qp->r_sge, packet->grh, 1023 - sizeof(struct ib_grh), true, false); 1022 + rvt_copy_sge(qp, &qp->r_sge, packet->grh, 1023 + sizeof(struct ib_grh), true, false); 1024 1024 wc.wc_flags |= IB_WC_GRH; 1025 1025 } else if (packet->etype == RHF_RCV_TYPE_BYPASS) { 1026 1026 struct ib_grh grh; ··· 1030 1030 * out when creating 16B, add back the GRH here. 1031 1031 */ 1032 1032 hfi1_make_ext_grh(packet, &grh, slid, dlid); 1033 - hfi1_copy_sge(&qp->r_sge, &grh, 1034 - sizeof(struct ib_grh), true, false); 1033 + rvt_copy_sge(qp, &qp->r_sge, &grh, 1034 + sizeof(struct ib_grh), true, false); 1035 1035 wc.wc_flags |= IB_WC_GRH; 1036 1036 } else { 1037 1037 rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true); 1038 1038 } 1039 - hfi1_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1040 - true, false); 1039 + rvt_copy_sge(qp, &qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1040 + true, false); 1041 1041 rvt_put_ss(&qp->r_sge); 1042 1042 if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) 1043 1043 return;
+63 -72
drivers/infiniband/hw/hfi1/user_sdma.c
··· 1 1 /* 2 - * Copyright(c) 2015 - 2017 Intel Corporation. 2 + * Copyright(c) 2015 - 2018 Intel Corporation. 3 3 * 4 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 5 * redistributing this file, you may do so under either license. ··· 76 76 77 77 static unsigned initial_pkt_count = 8; 78 78 79 - static int user_sdma_send_pkts(struct user_sdma_request *req, 80 - unsigned maxpkts); 79 + static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts); 81 80 static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status); 82 81 static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq); 83 82 static void user_sdma_free_request(struct user_sdma_request *req, bool unpin); ··· 100 101 101 102 static int defer_packet_queue( 102 103 struct sdma_engine *sde, 103 - struct iowait *wait, 104 + struct iowait_work *wait, 104 105 struct sdma_txreq *txreq, 105 106 uint seq, 106 107 bool pkts_sent); ··· 123 124 124 125 static int defer_packet_queue( 125 126 struct sdma_engine *sde, 126 - struct iowait *wait, 127 + struct iowait_work *wait, 127 128 struct sdma_txreq *txreq, 128 129 uint seq, 129 130 bool pkts_sent) 130 131 { 131 132 struct hfi1_user_sdma_pkt_q *pq = 132 - container_of(wait, struct hfi1_user_sdma_pkt_q, busy); 133 + container_of(wait->iow, struct hfi1_user_sdma_pkt_q, busy); 133 134 struct hfi1_ibdev *dev = &pq->dd->verbs_dev; 134 135 struct user_sdma_txreq *tx = 135 136 container_of(txreq, struct user_sdma_txreq, txreq); ··· 186 187 pq->ctxt = uctxt->ctxt; 187 188 pq->subctxt = fd->subctxt; 188 189 pq->n_max_reqs = hfi1_sdma_comp_ring_size; 189 - pq->state = SDMA_PKT_Q_INACTIVE; 190 190 atomic_set(&pq->n_reqs, 0); 191 191 init_waitqueue_head(&pq->wait); 192 192 atomic_set(&pq->n_locked, 0); 193 193 pq->mm = fd->mm; 194 194 195 - iowait_init(&pq->busy, 0, NULL, defer_packet_queue, 195 + iowait_init(&pq->busy, 0, NULL, NULL, defer_packet_queue, 196 196 activate_packet_queue, NULL); 197 197 pq->reqidx = 0; 198 198 ··· 274 276 /* Wait until all requests have been freed. */ 275 277 wait_event_interruptible( 276 278 pq->wait, 277 - (READ_ONCE(pq->state) == SDMA_PKT_Q_INACTIVE)); 279 + !atomic_read(&pq->n_reqs)); 278 280 kfree(pq->reqs); 279 281 kfree(pq->req_in_use); 280 282 kmem_cache_destroy(pq->txreq_cache); ··· 310 312 return mapping[hash]; 311 313 } 312 314 315 + /** 316 + * hfi1_user_sdma_process_request() - Process and start a user sdma request 317 + * @fd: valid file descriptor 318 + * @iovec: array of io vectors to process 319 + * @dim: overall iovec array size 320 + * @count: number of io vector array entries processed 321 + */ 313 322 int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, 314 323 struct iovec *iovec, unsigned long dim, 315 324 unsigned long *count) ··· 333 328 u8 opcode, sc, vl; 334 329 u16 pkey; 335 330 u32 slid; 336 - int req_queued = 0; 337 331 u16 dlid; 338 332 u32 selector; 339 333 ··· 396 392 req->data_len = 0; 397 393 req->pq = pq; 398 394 req->cq = cq; 399 - req->status = -1; 400 395 req->ahg_idx = -1; 401 396 req->iov_idx = 0; 402 397 req->sent = 0; ··· 403 400 req->seqcomp = 0; 404 401 req->seqsubmitted = 0; 405 402 req->tids = NULL; 406 - req->done = 0; 407 403 req->has_error = 0; 408 404 INIT_LIST_HEAD(&req->txps); 409 405 410 406 memcpy(&req->info, &info, sizeof(info)); 407 + 408 + /* The request is initialized, count it */ 409 + atomic_inc(&pq->n_reqs); 411 410 412 411 if (req_opcode(info.ctrl) == EXPECTED) { 413 412 /* expected must have a TID info and at least one data vector */ ··· 505 500 ret = pin_vector_pages(req, &req->iovs[i]); 506 501 if (ret) { 507 502 req->data_iovs = i; 508 - req->status = ret; 509 503 goto free_req; 510 504 } 511 505 req->data_len += req->iovs[i].iov.iov_len; ··· 565 561 req->ahg_idx = sdma_ahg_alloc(req->sde); 566 562 567 563 set_comp_state(pq, cq, info.comp_idx, QUEUED, 0); 568 - atomic_inc(&pq->n_reqs); 569 - req_queued = 1; 564 + pq->state = SDMA_PKT_Q_ACTIVE; 570 565 /* Send the first N packets in the request to buy us some time */ 571 566 ret = user_sdma_send_pkts(req, pcount); 572 - if (unlikely(ret < 0 && ret != -EBUSY)) { 573 - req->status = ret; 567 + if (unlikely(ret < 0 && ret != -EBUSY)) 574 568 goto free_req; 575 - } 576 - 577 - /* 578 - * It is possible that the SDMA engine would have processed all the 579 - * submitted packets by the time we get here. Therefore, only set 580 - * packet queue state to ACTIVE if there are still uncompleted 581 - * requests. 582 - */ 583 - if (atomic_read(&pq->n_reqs)) 584 - xchg(&pq->state, SDMA_PKT_Q_ACTIVE); 585 569 586 570 /* 587 571 * This is a somewhat blocking send implementation. ··· 580 588 while (req->seqsubmitted != req->info.npkts) { 581 589 ret = user_sdma_send_pkts(req, pcount); 582 590 if (ret < 0) { 583 - if (ret != -EBUSY) { 584 - req->status = ret; 585 - WRITE_ONCE(req->has_error, 1); 586 - if (READ_ONCE(req->seqcomp) == 587 - req->seqsubmitted - 1) 588 - goto free_req; 589 - return ret; 590 - } 591 + if (ret != -EBUSY) 592 + goto free_req; 591 593 wait_event_interruptible_timeout( 592 594 pq->busy.wait_dma, 593 595 (pq->state == SDMA_PKT_Q_ACTIVE), ··· 592 606 *count += idx; 593 607 return 0; 594 608 free_req: 595 - user_sdma_free_request(req, true); 596 - if (req_queued) 609 + /* 610 + * If the submitted seqsubmitted == npkts, the completion routine 611 + * controls the final state. If sequbmitted < npkts, wait for any 612 + * outstanding packets to finish before cleaning up. 613 + */ 614 + if (req->seqsubmitted < req->info.npkts) { 615 + if (req->seqsubmitted) 616 + wait_event(pq->busy.wait_dma, 617 + (req->seqcomp == req->seqsubmitted - 1)); 618 + user_sdma_free_request(req, true); 597 619 pq_update(pq); 598 - set_comp_state(pq, cq, info.comp_idx, ERROR, req->status); 620 + set_comp_state(pq, cq, info.comp_idx, ERROR, ret); 621 + } 599 622 return ret; 600 623 } 601 624 ··· 755 760 return ret; 756 761 } 757 762 758 - static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) 763 + static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts) 759 764 { 760 - int ret = 0, count; 765 + int ret = 0; 766 + u16 count; 761 767 unsigned npkts = 0; 762 768 struct user_sdma_txreq *tx = NULL; 763 769 struct hfi1_user_sdma_pkt_q *pq = NULL; ··· 860 864 861 865 changes = set_txreq_header_ahg(req, tx, 862 866 datalen); 863 - if (changes < 0) 867 + if (changes < 0) { 868 + ret = changes; 864 869 goto free_tx; 870 + } 865 871 } 866 872 } else { 867 873 ret = sdma_txinit(&tx->txreq, 0, sizeof(req->hdr) + ··· 912 914 npkts++; 913 915 } 914 916 dosend: 915 - ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count); 917 + ret = sdma_send_txlist(req->sde, 918 + iowait_get_ib_work(&pq->busy), 919 + &req->txps, &count); 916 920 req->seqsubmitted += count; 917 921 if (req->seqsubmitted == req->info.npkts) { 918 - WRITE_ONCE(req->done, 1); 919 922 /* 920 923 * The txreq has already been submitted to the HW queue 921 924 * so we can free the AHG entry now. Corruption will not ··· 1364 1365 return idx; 1365 1366 } 1366 1367 1367 - /* 1368 - * SDMA tx request completion callback. Called when the SDMA progress 1369 - * state machine gets notification that the SDMA descriptors for this 1370 - * tx request have been processed by the DMA engine. Called in 1371 - * interrupt context. 1368 + /** 1369 + * user_sdma_txreq_cb() - SDMA tx request completion callback. 1370 + * @txreq: valid sdma tx request 1371 + * @status: success/failure of request 1372 + * 1373 + * Called when the SDMA progress state machine gets notification that 1374 + * the SDMA descriptors for this tx request have been processed by the 1375 + * DMA engine. Called in interrupt context. 1376 + * Only do work on completed sequences. 1372 1377 */ 1373 1378 static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) 1374 1379 { ··· 1381 1378 struct user_sdma_request *req; 1382 1379 struct hfi1_user_sdma_pkt_q *pq; 1383 1380 struct hfi1_user_sdma_comp_q *cq; 1384 - u16 idx; 1381 + enum hfi1_sdma_comp_state state = COMPLETE; 1385 1382 1386 1383 if (!tx->req) 1387 1384 return; ··· 1394 1391 SDMA_DBG(req, "SDMA completion with error %d", 1395 1392 status); 1396 1393 WRITE_ONCE(req->has_error, 1); 1394 + state = ERROR; 1397 1395 } 1398 1396 1399 1397 req->seqcomp = tx->seqnum; 1400 1398 kmem_cache_free(pq->txreq_cache, tx); 1401 - tx = NULL; 1402 1399 1403 - idx = req->info.comp_idx; 1404 - if (req->status == -1 && status == SDMA_TXREQ_S_OK) { 1405 - if (req->seqcomp == req->info.npkts - 1) { 1406 - req->status = 0; 1407 - user_sdma_free_request(req, false); 1408 - pq_update(pq); 1409 - set_comp_state(pq, cq, idx, COMPLETE, 0); 1410 - } 1411 - } else { 1412 - if (status != SDMA_TXREQ_S_OK) 1413 - req->status = status; 1414 - if (req->seqcomp == (READ_ONCE(req->seqsubmitted) - 1) && 1415 - (READ_ONCE(req->done) || 1416 - READ_ONCE(req->has_error))) { 1417 - user_sdma_free_request(req, false); 1418 - pq_update(pq); 1419 - set_comp_state(pq, cq, idx, ERROR, req->status); 1420 - } 1421 - } 1400 + /* sequence isn't complete? We are done */ 1401 + if (req->seqcomp != req->info.npkts - 1) 1402 + return; 1403 + 1404 + user_sdma_free_request(req, false); 1405 + set_comp_state(pq, cq, req->info.comp_idx, state, status); 1406 + pq_update(pq); 1422 1407 } 1423 1408 1424 1409 static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq) 1425 1410 { 1426 - if (atomic_dec_and_test(&pq->n_reqs)) { 1427 - xchg(&pq->state, SDMA_PKT_Q_INACTIVE); 1411 + if (atomic_dec_and_test(&pq->n_reqs)) 1428 1412 wake_up(&pq->wait); 1429 - } 1430 1413 } 1431 1414 1432 1415 static void user_sdma_free_request(struct user_sdma_request *req, bool unpin) ··· 1436 1447 1437 1448 if (!node) 1438 1449 continue; 1450 + 1451 + req->iovs[i].node = NULL; 1439 1452 1440 1453 if (unpin) 1441 1454 hfi1_mmu_rb_remove(req->pq->handler,
+9 -11
drivers/infiniband/hw/hfi1/user_sdma.h
··· 105 105 #define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */ 106 106 #define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */ 107 107 108 - #define SDMA_PKT_Q_INACTIVE BIT(0) 109 - #define SDMA_PKT_Q_ACTIVE BIT(1) 110 - #define SDMA_PKT_Q_DEFERRED BIT(2) 108 + enum pkt_q_sdma_state { 109 + SDMA_PKT_Q_ACTIVE, 110 + SDMA_PKT_Q_DEFERRED, 111 + }; 111 112 112 113 /* 113 114 * Maximum retry attempts to submit a TX request ··· 134 133 struct user_sdma_request *reqs; 135 134 unsigned long *req_in_use; 136 135 struct iowait busy; 137 - unsigned state; 136 + enum pkt_q_sdma_state state; 138 137 wait_queue_head_t wait; 139 138 unsigned long unpinned; 140 139 struct mmu_rb_handler *handler; ··· 204 203 s8 ahg_idx; 205 204 206 205 /* Writeable fields shared with interrupt */ 207 - u64 seqcomp ____cacheline_aligned_in_smp; 208 - u64 seqsubmitted; 209 - /* status of the last txreq completed */ 210 - int status; 206 + u16 seqcomp ____cacheline_aligned_in_smp; 207 + u16 seqsubmitted; 211 208 212 209 /* Send side fields */ 213 210 struct list_head txps ____cacheline_aligned_in_smp; 214 - u64 seqnum; 211 + u16 seqnum; 215 212 /* 216 213 * KDETH.OFFSET (TID) field 217 214 * The offset can cover multiple packets, depending on the ··· 227 228 u16 tididx; 228 229 /* progress index moving along the iovs array */ 229 230 u8 iov_idx; 230 - u8 done; 231 231 u8 has_error; 232 232 233 233 struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ]; ··· 246 248 struct user_sdma_request *req; 247 249 u16 flags; 248 250 unsigned int busycount; 249 - u64 seqnum; 251 + u16 seqnum; 250 252 }; 251 253 252 254 int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
+19 -232
drivers/infiniband/hw/hfi1/verbs.c
··· 129 129 module_param(piothreshold, ushort, S_IRUGO); 130 130 MODULE_PARM_DESC(piothreshold, "size used to determine sdma vs. pio"); 131 131 132 - #define COPY_CACHELESS 1 133 - #define COPY_ADAPTIVE 2 134 132 static unsigned int sge_copy_mode; 135 133 module_param(sge_copy_mode, uint, S_IRUGO); 136 134 MODULE_PARM_DESC(sge_copy_mode, ··· 149 151 /* 16B trailing buffer */ 150 152 static const u8 trail_buf[MAX_16B_PADDING]; 151 153 152 - static uint wss_threshold; 154 + static uint wss_threshold = 80; 153 155 module_param(wss_threshold, uint, S_IRUGO); 154 156 MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy"); 155 157 static uint wss_clean_period = 256; 156 158 module_param(wss_clean_period, uint, S_IRUGO); 157 159 MODULE_PARM_DESC(wss_clean_period, "Count of verbs copies before an entry in the page copy table is cleaned"); 158 - 159 - /* memory working set size */ 160 - struct hfi1_wss { 161 - unsigned long *entries; 162 - atomic_t total_count; 163 - atomic_t clean_counter; 164 - atomic_t clean_entry; 165 - 166 - int threshold; 167 - int num_entries; 168 - long pages_mask; 169 - }; 170 - 171 - static struct hfi1_wss wss; 172 - 173 - int hfi1_wss_init(void) 174 - { 175 - long llc_size; 176 - long llc_bits; 177 - long table_size; 178 - long table_bits; 179 - 180 - /* check for a valid percent range - default to 80 if none or invalid */ 181 - if (wss_threshold < 1 || wss_threshold > 100) 182 - wss_threshold = 80; 183 - /* reject a wildly large period */ 184 - if (wss_clean_period > 1000000) 185 - wss_clean_period = 256; 186 - /* reject a zero period */ 187 - if (wss_clean_period == 0) 188 - wss_clean_period = 1; 189 - 190 - /* 191 - * Calculate the table size - the next power of 2 larger than the 192 - * LLC size. LLC size is in KiB. 193 - */ 194 - llc_size = wss_llc_size() * 1024; 195 - table_size = roundup_pow_of_two(llc_size); 196 - 197 - /* one bit per page in rounded up table */ 198 - llc_bits = llc_size / PAGE_SIZE; 199 - table_bits = table_size / PAGE_SIZE; 200 - wss.pages_mask = table_bits - 1; 201 - wss.num_entries = table_bits / BITS_PER_LONG; 202 - 203 - wss.threshold = (llc_bits * wss_threshold) / 100; 204 - if (wss.threshold == 0) 205 - wss.threshold = 1; 206 - 207 - atomic_set(&wss.clean_counter, wss_clean_period); 208 - 209 - wss.entries = kcalloc(wss.num_entries, sizeof(*wss.entries), 210 - GFP_KERNEL); 211 - if (!wss.entries) { 212 - hfi1_wss_exit(); 213 - return -ENOMEM; 214 - } 215 - 216 - return 0; 217 - } 218 - 219 - void hfi1_wss_exit(void) 220 - { 221 - /* coded to handle partially initialized and repeat callers */ 222 - kfree(wss.entries); 223 - wss.entries = NULL; 224 - } 225 - 226 - /* 227 - * Advance the clean counter. When the clean period has expired, 228 - * clean an entry. 229 - * 230 - * This is implemented in atomics to avoid locking. Because multiple 231 - * variables are involved, it can be racy which can lead to slightly 232 - * inaccurate information. Since this is only a heuristic, this is 233 - * OK. Any innaccuracies will clean themselves out as the counter 234 - * advances. That said, it is unlikely the entry clean operation will 235 - * race - the next possible racer will not start until the next clean 236 - * period. 237 - * 238 - * The clean counter is implemented as a decrement to zero. When zero 239 - * is reached an entry is cleaned. 240 - */ 241 - static void wss_advance_clean_counter(void) 242 - { 243 - int entry; 244 - int weight; 245 - unsigned long bits; 246 - 247 - /* become the cleaner if we decrement the counter to zero */ 248 - if (atomic_dec_and_test(&wss.clean_counter)) { 249 - /* 250 - * Set, not add, the clean period. This avoids an issue 251 - * where the counter could decrement below the clean period. 252 - * Doing a set can result in lost decrements, slowing the 253 - * clean advance. Since this a heuristic, this possible 254 - * slowdown is OK. 255 - * 256 - * An alternative is to loop, advancing the counter by a 257 - * clean period until the result is > 0. However, this could 258 - * lead to several threads keeping another in the clean loop. 259 - * This could be mitigated by limiting the number of times 260 - * we stay in the loop. 261 - */ 262 - atomic_set(&wss.clean_counter, wss_clean_period); 263 - 264 - /* 265 - * Uniquely grab the entry to clean and move to next. 266 - * The current entry is always the lower bits of 267 - * wss.clean_entry. The table size, wss.num_entries, 268 - * is always a power-of-2. 269 - */ 270 - entry = (atomic_inc_return(&wss.clean_entry) - 1) 271 - & (wss.num_entries - 1); 272 - 273 - /* clear the entry and count the bits */ 274 - bits = xchg(&wss.entries[entry], 0); 275 - weight = hweight64((u64)bits); 276 - /* only adjust the contended total count if needed */ 277 - if (weight) 278 - atomic_sub(weight, &wss.total_count); 279 - } 280 - } 281 - 282 - /* 283 - * Insert the given address into the working set array. 284 - */ 285 - static void wss_insert(void *address) 286 - { 287 - u32 page = ((unsigned long)address >> PAGE_SHIFT) & wss.pages_mask; 288 - u32 entry = page / BITS_PER_LONG; /* assumes this ends up a shift */ 289 - u32 nr = page & (BITS_PER_LONG - 1); 290 - 291 - if (!test_and_set_bit(nr, &wss.entries[entry])) 292 - atomic_inc(&wss.total_count); 293 - 294 - wss_advance_clean_counter(); 295 - } 296 - 297 - /* 298 - * Is the working set larger than the threshold? 299 - */ 300 - static inline bool wss_exceeds_threshold(void) 301 - { 302 - return atomic_read(&wss.total_count) >= wss.threshold; 303 - } 304 160 305 161 /* 306 162 * Translate ib_wr_opcode into ib_wc_opcode. ··· 289 437 * System image GUID. 290 438 */ 291 439 __be64 ib_hfi1_sys_image_guid; 292 - 293 - /** 294 - * hfi1_copy_sge - copy data to SGE memory 295 - * @ss: the SGE state 296 - * @data: the data to copy 297 - * @length: the length of the data 298 - * @release: boolean to release MR 299 - * @copy_last: do a separate copy of the last 8 bytes 300 - */ 301 - void hfi1_copy_sge( 302 - struct rvt_sge_state *ss, 303 - void *data, u32 length, 304 - bool release, 305 - bool copy_last) 306 - { 307 - struct rvt_sge *sge = &ss->sge; 308 - int i; 309 - bool in_last = false; 310 - bool cacheless_copy = false; 311 - 312 - if (sge_copy_mode == COPY_CACHELESS) { 313 - cacheless_copy = length >= PAGE_SIZE; 314 - } else if (sge_copy_mode == COPY_ADAPTIVE) { 315 - if (length >= PAGE_SIZE) { 316 - /* 317 - * NOTE: this *assumes*: 318 - * o The first vaddr is the dest. 319 - * o If multiple pages, then vaddr is sequential. 320 - */ 321 - wss_insert(sge->vaddr); 322 - if (length >= (2 * PAGE_SIZE)) 323 - wss_insert(sge->vaddr + PAGE_SIZE); 324 - 325 - cacheless_copy = wss_exceeds_threshold(); 326 - } else { 327 - wss_advance_clean_counter(); 328 - } 329 - } 330 - if (copy_last) { 331 - if (length > 8) { 332 - length -= 8; 333 - } else { 334 - copy_last = false; 335 - in_last = true; 336 - } 337 - } 338 - 339 - again: 340 - while (length) { 341 - u32 len = rvt_get_sge_length(sge, length); 342 - 343 - WARN_ON_ONCE(len == 0); 344 - if (unlikely(in_last)) { 345 - /* enforce byte transfer ordering */ 346 - for (i = 0; i < len; i++) 347 - ((u8 *)sge->vaddr)[i] = ((u8 *)data)[i]; 348 - } else if (cacheless_copy) { 349 - cacheless_memcpy(sge->vaddr, data, len); 350 - } else { 351 - memcpy(sge->vaddr, data, len); 352 - } 353 - rvt_update_sge(ss, len, release); 354 - data += len; 355 - length -= len; 356 - } 357 - 358 - if (copy_last) { 359 - copy_last = false; 360 - in_last = true; 361 - length = 8; 362 - goto again; 363 - } 364 - } 365 440 366 441 /* 367 442 * Make sure the QP is ready and able to accept the given opcode. ··· 492 713 493 714 spin_lock(&qp->s_lock); 494 715 if (tx->wqe) { 495 - hfi1_send_complete(qp, tx->wqe, IB_WC_SUCCESS); 716 + rvt_send_complete(qp, tx->wqe, IB_WC_SUCCESS); 496 717 } else if (qp->ibqp.qp_type == IB_QPT_RC) { 497 718 struct hfi1_opa_header *hdr; 498 719 ··· 516 737 if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { 517 738 write_seqlock(&dev->iowait_lock); 518 739 list_add_tail(&ps->s_txreq->txreq.list, 519 - &priv->s_iowait.tx_head); 740 + &ps->wait->tx_head); 520 741 if (list_empty(&priv->s_iowait.list)) { 521 742 if (list_empty(&dev->memwait)) 522 743 mod_timer(&dev->mem_timer, jiffies + 1); ··· 527 748 rvt_get_qp(qp); 528 749 } 529 750 write_sequnlock(&dev->iowait_lock); 530 - qp->s_flags &= ~RVT_S_BUSY; 751 + hfi1_qp_unbusy(qp, ps->wait); 531 752 ret = -EBUSY; 532 753 } 533 754 spin_unlock_irqrestore(&qp->s_lock, flags); ··· 729 950 if (unlikely(ret)) 730 951 goto bail_build; 731 952 } 732 - ret = sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq, 733 - ps->pkts_sent); 953 + ret = sdma_send_txreq(tx->sde, ps->wait, &tx->txreq, ps->pkts_sent); 734 954 if (unlikely(ret < 0)) { 735 955 if (ret == -ECOMM) 736 956 goto bail_ecomm; ··· 779 1001 if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { 780 1002 write_seqlock(&dev->iowait_lock); 781 1003 list_add_tail(&ps->s_txreq->txreq.list, 782 - &priv->s_iowait.tx_head); 1004 + &ps->wait->tx_head); 783 1005 if (list_empty(&priv->s_iowait.list)) { 784 1006 struct hfi1_ibdev *dev = &dd->verbs_dev; 785 1007 int was_empty; ··· 798 1020 hfi1_sc_wantpiobuf_intr(sc, 1); 799 1021 } 800 1022 write_sequnlock(&dev->iowait_lock); 801 - qp->s_flags &= ~RVT_S_BUSY; 1023 + hfi1_qp_unbusy(qp, ps->wait); 802 1024 ret = -EBUSY; 803 1025 } 804 1026 spin_unlock_irqrestore(&qp->s_lock, flags); ··· 938 1160 pio_bail: 939 1161 if (qp->s_wqe) { 940 1162 spin_lock_irqsave(&qp->s_lock, flags); 941 - hfi1_send_complete(qp, qp->s_wqe, wc_status); 1163 + rvt_send_complete(qp, qp->s_wqe, wc_status); 942 1164 spin_unlock_irqrestore(&qp->s_lock, flags); 943 1165 } else if (qp->ibqp.qp_type == IB_QPT_RC) { 944 1166 spin_lock_irqsave(&qp->s_lock, flags); ··· 1145 1367 hfi1_cdbg(PIO, "%s() Failed. Completing with err", 1146 1368 __func__); 1147 1369 spin_lock_irqsave(&qp->s_lock, flags); 1148 - hfi1_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR); 1370 + rvt_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR); 1149 1371 spin_unlock_irqrestore(&qp->s_lock, flags); 1150 1372 } 1151 1373 return -EINVAL; ··· 1721 1943 dd->verbs_dev.rdi.driver_f.check_modify_qp = hfi1_check_modify_qp; 1722 1944 dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp; 1723 1945 dd->verbs_dev.rdi.driver_f.notify_restart_rc = hfi1_restart_rc; 1724 - dd->verbs_dev.rdi.driver_f.check_send_wqe = hfi1_check_send_wqe; 1946 + dd->verbs_dev.rdi.driver_f.setup_wqe = hfi1_setup_wqe; 1725 1947 dd->verbs_dev.rdi.driver_f.comp_vect_cpu_lookup = 1726 1948 hfi1_comp_vect_mappings_lookup; 1727 1949 ··· 1734 1956 dd->verbs_dev.rdi.dparms.lkey_table_size = hfi1_lkey_table_size; 1735 1957 dd->verbs_dev.rdi.dparms.nports = dd->num_pports; 1736 1958 dd->verbs_dev.rdi.dparms.npkeys = hfi1_get_npkeys(dd); 1959 + dd->verbs_dev.rdi.dparms.sge_copy_mode = sge_copy_mode; 1960 + dd->verbs_dev.rdi.dparms.wss_threshold = wss_threshold; 1961 + dd->verbs_dev.rdi.dparms.wss_clean_period = wss_clean_period; 1737 1962 1738 1963 /* post send table */ 1739 1964 dd->verbs_dev.rdi.post_parms = hfi1_post_parms; 1965 + 1966 + /* opcode translation table */ 1967 + dd->verbs_dev.rdi.wc_opcode = ib_hfi1_wc_opcode; 1740 1968 1741 1969 ppd = dd->pport; 1742 1970 for (i = 0; i < dd->num_pports; i++, ppd++) ··· 1750 1966 &ppd->ibport_data.rvp, 1751 1967 i, 1752 1968 ppd->pkeys); 1969 + 1970 + rdma_set_device_sysfs_group(&dd->verbs_dev.rdi.ibdev, 1971 + &ib_hfi1_attr_group); 1753 1972 1754 1973 ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_HFI1); 1755 1974 if (ret)
+5 -30
drivers/infiniband/hw/hfi1/verbs.h
··· 166 166 * This structure is used to hold commonly lookedup and computed values during 167 167 * the send engine progress. 168 168 */ 169 + struct iowait_work; 169 170 struct hfi1_pkt_state { 170 171 struct hfi1_ibdev *dev; 171 172 struct hfi1_ibport *ibp; 172 173 struct hfi1_pportdata *ppd; 173 174 struct verbs_txreq *s_txreq; 175 + struct iowait_work *wait; 174 176 unsigned long flags; 175 177 unsigned long timeout; 176 178 unsigned long timeout_int; ··· 249 247 return container_of(rdi, struct hfi1_ibdev, rdi); 250 248 } 251 249 252 - static inline struct rvt_qp *iowait_to_qp(struct iowait *s_iowait) 250 + static inline struct rvt_qp *iowait_to_qp(struct iowait *s_iowait) 253 251 { 254 252 struct hfi1_qp_priv *priv; 255 253 ··· 315 313 316 314 int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps); 317 315 318 - void hfi1_copy_sge(struct rvt_sge_state *ss, void *data, u32 length, 319 - bool release, bool copy_last); 320 - 321 316 void hfi1_cnp_rcv(struct hfi1_packet *packet); 322 317 323 318 void hfi1_uc_rcv(struct hfi1_packet *packet); ··· 342 343 void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, 343 344 int attr_mask, struct ib_udata *udata); 344 345 void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait); 345 - int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe); 346 + int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, 347 + bool *call_send); 346 348 347 349 extern const u32 rc_only_opcode; 348 350 extern const u32 uc_only_opcode; ··· 362 362 void hfi1_do_send_from_rvt(struct rvt_qp *qp); 363 363 364 364 void hfi1_do_send(struct rvt_qp *qp, bool in_thread); 365 - 366 - void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, 367 - enum ib_wc_status status); 368 365 369 366 void hfi1_send_rc_ack(struct hfi1_packet *packet, bool is_fecn); 370 367 ··· 386 389 387 390 int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, 388 391 u64 pbc); 389 - 390 - int hfi1_wss_init(void); 391 - void hfi1_wss_exit(void); 392 - 393 - /* platform specific: return the lowest level cache (llc) size, in KiB */ 394 - static inline int wss_llc_size(void) 395 - { 396 - /* assume that the boot CPU value is universal for all CPUs */ 397 - return boot_cpu_data.x86_cache_size; 398 - } 399 - 400 - /* platform specific: cacheless copy */ 401 - static inline void cacheless_memcpy(void *dst, void *src, size_t n) 402 - { 403 - /* 404 - * Use the only available X64 cacheless copy. Add a __user cast 405 - * to quiet sparse. The src agument is already in the kernel so 406 - * there are no security issues. The extra fault recovery machinery 407 - * is not invoked. 408 - */ 409 - __copy_user_nocache(dst, (void __user *)src, n, 0); 410 - } 411 392 412 393 static inline bool opa_bth_is_migration(struct ib_other_headers *ohdr) 413 394 {
+4 -7
drivers/infiniband/hw/hfi1/verbs_txreq.h
··· 102 102 return &tx->txreq; 103 103 } 104 104 105 - static inline struct verbs_txreq *get_waiting_verbs_txreq(struct rvt_qp *qp) 105 + static inline struct verbs_txreq *get_waiting_verbs_txreq(struct iowait_work *w) 106 106 { 107 107 struct sdma_txreq *stx; 108 - struct hfi1_qp_priv *priv = qp->priv; 109 108 110 - stx = iowait_get_txhead(&priv->s_iowait); 109 + stx = iowait_get_txhead(w); 111 110 if (stx) 112 111 return container_of(stx, struct verbs_txreq, txreq); 113 112 return NULL; 114 113 } 115 114 116 - static inline bool verbs_txreq_queued(struct rvt_qp *qp) 115 + static inline bool verbs_txreq_queued(struct iowait_work *w) 117 116 { 118 - struct hfi1_qp_priv *priv = qp->priv; 119 - 120 - return iowait_packet_queued(&priv->s_iowait); 117 + return iowait_packet_queued(w); 121 118 } 122 119 123 120 void hfi1_put_txreq(struct verbs_txreq *tx);
+6 -6
drivers/infiniband/hw/hfi1/vnic_main.c
··· 120 120 uctxt->seq_cnt = 1; 121 121 uctxt->is_vnic = true; 122 122 123 - hfi1_set_vnic_msix_info(uctxt); 123 + msix_request_rcd_irq(uctxt); 124 124 125 125 hfi1_stats.sps_ctxts++; 126 126 dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt); ··· 135 135 dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt); 136 136 flush_wc(); 137 137 138 - hfi1_reset_vnic_msix_info(uctxt); 139 - 140 138 /* 141 139 * Disable receive context and interrupt available, reset all 142 140 * RcvCtxtCtrl bits to default values. ··· 145 147 HFI1_RCVCTRL_ONE_PKT_EGR_DIS | 146 148 HFI1_RCVCTRL_NO_RHQ_DROP_DIS | 147 149 HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt); 150 + 151 + /* msix_intr will always be > 0, only clean up if this is true */ 152 + if (uctxt->msix_intr) 153 + msix_free_irq(dd, uctxt->msix_intr); 148 154 149 155 uctxt->event_flags = 0; 150 156 ··· 628 626 idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id); 629 627 630 628 /* ensure irqs see the change */ 631 - hfi1_vnic_synchronize_irq(dd); 629 + msix_vnic_synchronize_irq(dd); 632 630 633 631 /* remove unread skbs */ 634 632 for (i = 0; i < vinfo->num_rx_q; i++) { ··· 692 690 rc = hfi1_vnic_txreq_init(dd); 693 691 if (rc) 694 692 goto txreq_fail; 695 - 696 - dd->vnic.msix_idx = dd->first_dyn_msix_idx; 697 693 } 698 694 699 695 for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) {
+12 -9
drivers/infiniband/hw/hfi1/vnic_sdma.c
··· 1 1 /* 2 - * Copyright(c) 2017 Intel Corporation. 2 + * Copyright(c) 2017 - 2018 Intel Corporation. 3 3 * 4 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 5 * redistributing this file, you may do so under either license. ··· 198 198 goto free_desc; 199 199 tx->retry_count = 0; 200 200 201 - ret = sdma_send_txreq(sde, &vnic_sdma->wait, &tx->txreq, 202 - vnic_sdma->pkts_sent); 201 + ret = sdma_send_txreq(sde, iowait_get_ib_work(&vnic_sdma->wait), 202 + &tx->txreq, vnic_sdma->pkts_sent); 203 203 /* When -ECOMM, sdma callback will be called with ABORT status */ 204 204 if (unlikely(ret && unlikely(ret != -ECOMM))) 205 205 goto free_desc; ··· 230 230 * become available. 231 231 */ 232 232 static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde, 233 - struct iowait *wait, 233 + struct iowait_work *wait, 234 234 struct sdma_txreq *txreq, 235 235 uint seq, 236 236 bool pkts_sent) 237 237 { 238 238 struct hfi1_vnic_sdma *vnic_sdma = 239 - container_of(wait, struct hfi1_vnic_sdma, wait); 239 + container_of(wait->iow, struct hfi1_vnic_sdma, wait); 240 240 struct hfi1_ibdev *dev = &vnic_sdma->dd->verbs_dev; 241 241 struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq); 242 242 ··· 247 247 vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED; 248 248 write_seqlock(&dev->iowait_lock); 249 249 if (list_empty(&vnic_sdma->wait.list)) 250 - iowait_queue(pkts_sent, wait, &sde->dmawait); 250 + iowait_queue(pkts_sent, wait->iow, &sde->dmawait); 251 251 write_sequnlock(&dev->iowait_lock); 252 252 return -EBUSY; 253 253 } ··· 285 285 for (i = 0; i < vinfo->num_tx_q; i++) { 286 286 struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[i]; 287 287 288 - iowait_init(&vnic_sdma->wait, 0, NULL, hfi1_vnic_sdma_sleep, 288 + iowait_init(&vnic_sdma->wait, 0, NULL, NULL, 289 + hfi1_vnic_sdma_sleep, 289 290 hfi1_vnic_sdma_wakeup, NULL); 290 291 vnic_sdma->sde = &vinfo->dd->per_sdma[i]; 291 292 vnic_sdma->dd = vinfo->dd; ··· 296 295 297 296 /* Add a free descriptor watermark for wakeups */ 298 297 if (vnic_sdma->sde->descq_cnt > HFI1_VNIC_SDMA_DESC_WTRMRK) { 298 + struct iowait_work *work; 299 + 299 300 INIT_LIST_HEAD(&vnic_sdma->stx.list); 300 301 vnic_sdma->stx.num_desc = HFI1_VNIC_SDMA_DESC_WTRMRK; 301 - list_add_tail(&vnic_sdma->stx.list, 302 - &vnic_sdma->wait.tx_head); 302 + work = iowait_get_ib_work(&vnic_sdma->wait); 303 + list_add_tail(&vnic_sdma->stx.list, &work->tx_head); 303 304 } 304 305 } 305 306 }
+1
drivers/infiniband/hw/hns/Kconfig
··· 1 1 config INFINIBAND_HNS 2 2 tristate "HNS RoCE Driver" 3 3 depends on NET_VENDOR_HISILICON 4 + depends on INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS 4 5 depends on ARM64 || (COMPILE_TEST && 64BIT) 5 6 ---help--- 6 7 This is a RoCE/RDMA driver for the Hisilicon RoCE engine. The engine
+5 -1
drivers/infiniband/hw/hns/hns_roce_ah.c
··· 49 49 struct hns_roce_ah *ah; 50 50 u16 vlan_tag = 0xffff; 51 51 const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); 52 + bool vlan_en = false; 52 53 53 54 ah = kzalloc(sizeof(*ah), GFP_ATOMIC); 54 55 if (!ah) ··· 59 58 memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); 60 59 61 60 gid_attr = ah_attr->grh.sgid_attr; 62 - if (is_vlan_dev(gid_attr->ndev)) 61 + if (is_vlan_dev(gid_attr->ndev)) { 63 62 vlan_tag = vlan_dev_vlan_id(gid_attr->ndev); 63 + vlan_en = true; 64 + } 64 65 65 66 if (vlan_tag < 0x1000) 66 67 vlan_tag |= (rdma_ah_get_sl(ah_attr) & ··· 74 71 HNS_ROCE_PORT_NUM_SHIFT)); 75 72 ah->av.gid_index = grh->sgid_index; 76 73 ah->av.vlan = cpu_to_le16(vlan_tag); 74 + ah->av.vlan_en = vlan_en; 77 75 dev_dbg(dev, "gid_index = 0x%x,vlan = 0x%x\n", ah->av.gid_index, 78 76 ah->av.vlan); 79 77
+37 -8
drivers/infiniband/hw/hns/hns_roce_device.h
··· 88 88 #define BITMAP_RR 1 89 89 90 90 #define MR_TYPE_MR 0x00 91 + #define MR_TYPE_FRMR 0x01 91 92 #define MR_TYPE_DMA 0x03 93 + 94 + #define HNS_ROCE_FRMR_MAX_PA 512 92 95 93 96 #define PKEY_ID 0xffff 94 97 #define GUID_LEN 8 ··· 196 193 HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(2), 197 194 HNS_ROCE_CAP_FLAG_RECORD_DB = BIT(3), 198 195 HNS_ROCE_CAP_FLAG_SQ_RECORD_DB = BIT(4), 196 + HNS_ROCE_CAP_FLAG_MW = BIT(7), 197 + HNS_ROCE_CAP_FLAG_FRMR = BIT(8), 198 + HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10), 199 199 }; 200 200 201 201 enum hns_roce_mtt_type { ··· 225 219 unsigned long logic_idx; 226 220 }; 227 221 228 - struct hns_roce_vma_data { 229 - struct list_head list; 230 - struct vm_area_struct *vma; 231 - struct mutex *vma_list_mutex; 232 - }; 233 - 234 222 struct hns_roce_ucontext { 235 223 struct ib_ucontext ibucontext; 236 224 struct hns_roce_uar uar; 237 225 struct list_head page_list; 238 226 struct mutex page_mutex; 239 - struct list_head vma_list; 240 - struct mutex vma_list_mutex; 241 227 }; 242 228 243 229 struct hns_roce_pd { ··· 291 293 enum hns_roce_mtt_type mtt_type; 292 294 }; 293 295 296 + struct hns_roce_mw { 297 + struct ib_mw ibmw; 298 + u32 pdn; 299 + u32 rkey; 300 + int enabled; /* MW's active status */ 301 + u32 pbl_hop_num; 302 + u32 pbl_ba_pg_sz; 303 + u32 pbl_buf_pg_sz; 304 + }; 305 + 294 306 /* Only support 4K page size for mr register */ 295 307 #define MR_SIZE_4K 0 296 308 ··· 312 304 u32 key; /* Key of MR */ 313 305 u32 pd; /* PD num of MR */ 314 306 u32 access;/* Access permission of MR */ 307 + u32 npages; 315 308 int enabled; /* MR's active status */ 316 309 int type; /* MR's register type */ 317 310 u64 *pbl_buf;/* MR's PBL space */ ··· 466 457 u8 dgid[HNS_ROCE_GID_SIZE]; 467 458 u8 mac[6]; 468 459 __le16 vlan; 460 + bool vlan_en; 469 461 }; 470 462 471 463 struct hns_roce_ah { ··· 666 656 }; 667 657 668 658 struct hns_roce_caps { 659 + u64 fw_ver; 669 660 u8 num_ports; 670 661 int gid_table_len[HNS_ROCE_MAX_PORTS]; 671 662 int pkey_table_len[HNS_ROCE_MAX_PORTS]; ··· 676 665 u32 max_sq_sg; /* 2 */ 677 666 u32 max_sq_inline; /* 32 */ 678 667 u32 max_rq_sg; /* 2 */ 668 + u32 max_extend_sg; 679 669 int num_qps; /* 256k */ 670 + int reserved_qps; 680 671 u32 max_wqes; /* 16k */ 681 672 u32 max_sq_desc_sz; /* 64 */ 682 673 u32 max_rq_desc_sz; /* 64 */ ··· 751 738 struct hns_roce_dev *hr_dev; 752 739 struct work_struct work; 753 740 u32 qpn; 741 + u32 cqn; 754 742 int event_type; 755 743 int sub_type; 756 744 }; ··· 778 764 struct hns_roce_mr *mr, int flags, u32 pdn, 779 765 int mr_access_flags, u64 iova, u64 size, 780 766 void *mb_buf); 767 + int (*frmr_write_mtpt)(void *mb_buf, struct hns_roce_mr *mr); 768 + int (*mw_write_mtpt)(void *mb_buf, struct hns_roce_mw *mw); 781 769 void (*write_cqc)(struct hns_roce_dev *hr_dev, 782 770 struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts, 783 771 dma_addr_t dma_handle, int nent, u32 vector); ··· 877 861 static inline struct hns_roce_mr *to_hr_mr(struct ib_mr *ibmr) 878 862 { 879 863 return container_of(ibmr, struct hns_roce_mr, ibmr); 864 + } 865 + 866 + static inline struct hns_roce_mw *to_hr_mw(struct ib_mw *ibmw) 867 + { 868 + return container_of(ibmw, struct hns_roce_mw, ibmw); 880 869 } 881 870 882 871 static inline struct hns_roce_qp *to_hr_qp(struct ib_qp *ibqp) ··· 989 968 int hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, u64 length, 990 969 u64 virt_addr, int mr_access_flags, struct ib_pd *pd, 991 970 struct ib_udata *udata); 971 + struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, 972 + u32 max_num_sg); 973 + int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 974 + unsigned int *sg_offset); 992 975 int hns_roce_dereg_mr(struct ib_mr *ibmr); 993 976 int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev, 994 977 struct hns_roce_cmd_mailbox *mailbox, 995 978 unsigned long mpt_index); 996 979 unsigned long key_to_hw_index(u32 key); 980 + 981 + struct ib_mw *hns_roce_alloc_mw(struct ib_pd *pd, enum ib_mw_type, 982 + struct ib_udata *udata); 983 + int hns_roce_dealloc_mw(struct ib_mw *ibmw); 997 984 998 985 void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size, 999 986 struct hns_roce_buf *buf);
+2 -2
drivers/infiniband/hw/hns/hns_roce_hw_v1.c
··· 731 731 cq_init_attr.comp_vector = 0; 732 732 cq = hns_roce_ib_create_cq(&hr_dev->ib_dev, &cq_init_attr, NULL, NULL); 733 733 if (IS_ERR(cq)) { 734 - dev_err(dev, "Create cq for reseved loop qp failed!"); 734 + dev_err(dev, "Create cq for reserved loop qp failed!"); 735 735 return -ENOMEM; 736 736 } 737 737 free_mr->mr_free_cq = to_hr_cq(cq); ··· 744 744 745 745 pd = hns_roce_alloc_pd(&hr_dev->ib_dev, NULL, NULL); 746 746 if (IS_ERR(pd)) { 747 - dev_err(dev, "Create pd for reseved loop qp failed!"); 747 + dev_err(dev, "Create pd for reserved loop qp failed!"); 748 748 ret = -ENOMEM; 749 749 goto alloc_pd_failed; 750 750 }
+396 -237
drivers/infiniband/hw/hns/hns_roce_hw_v2.c
··· 54 54 dseg->len = cpu_to_le32(sg->length); 55 55 } 56 56 57 + static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, 58 + struct hns_roce_wqe_frmr_seg *fseg, 59 + const struct ib_reg_wr *wr) 60 + { 61 + struct hns_roce_mr *mr = to_hr_mr(wr->mr); 62 + 63 + /* use ib_access_flags */ 64 + roce_set_bit(rc_sq_wqe->byte_4, 65 + V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S, 66 + wr->access & IB_ACCESS_MW_BIND ? 1 : 0); 67 + roce_set_bit(rc_sq_wqe->byte_4, 68 + V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S, 69 + wr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0); 70 + roce_set_bit(rc_sq_wqe->byte_4, 71 + V2_RC_FRMR_WQE_BYTE_4_RR_S, 72 + wr->access & IB_ACCESS_REMOTE_READ ? 1 : 0); 73 + roce_set_bit(rc_sq_wqe->byte_4, 74 + V2_RC_FRMR_WQE_BYTE_4_RW_S, 75 + wr->access & IB_ACCESS_REMOTE_WRITE ? 1 : 0); 76 + roce_set_bit(rc_sq_wqe->byte_4, 77 + V2_RC_FRMR_WQE_BYTE_4_LW_S, 78 + wr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0); 79 + 80 + /* Data structure reuse may lead to confusion */ 81 + rc_sq_wqe->msg_len = cpu_to_le32(mr->pbl_ba & 0xffffffff); 82 + rc_sq_wqe->inv_key = cpu_to_le32(mr->pbl_ba >> 32); 83 + 84 + rc_sq_wqe->byte_16 = cpu_to_le32(wr->mr->length & 0xffffffff); 85 + rc_sq_wqe->byte_20 = cpu_to_le32(wr->mr->length >> 32); 86 + rc_sq_wqe->rkey = cpu_to_le32(wr->key); 87 + rc_sq_wqe->va = cpu_to_le64(wr->mr->iova); 88 + 89 + fseg->pbl_size = cpu_to_le32(mr->pbl_size); 90 + roce_set_field(fseg->mode_buf_pg_sz, 91 + V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M, 92 + V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S, 93 + mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET); 94 + roce_set_bit(fseg->mode_buf_pg_sz, 95 + V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0); 96 + } 97 + 98 + static void set_atomic_seg(struct hns_roce_wqe_atomic_seg *aseg, 99 + const struct ib_atomic_wr *wr) 100 + { 101 + if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { 102 + aseg->fetchadd_swap_data = cpu_to_le64(wr->swap); 103 + aseg->cmp_data = cpu_to_le64(wr->compare_add); 104 + } else { 105 + aseg->fetchadd_swap_data = cpu_to_le64(wr->compare_add); 106 + aseg->cmp_data = 0; 107 + } 108 + } 109 + 57 110 static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, 58 111 unsigned int *sge_ind) 59 112 { ··· 174 121 } 175 122 176 123 if (wr->opcode == IB_WR_RDMA_READ) { 124 + *bad_wr = wr; 177 125 dev_err(hr_dev->dev, "Not support inline data!\n"); 178 126 return -EINVAL; 179 127 } ··· 233 179 struct hns_roce_v2_ud_send_wqe *ud_sq_wqe; 234 180 struct hns_roce_v2_rc_send_wqe *rc_sq_wqe; 235 181 struct hns_roce_qp *qp = to_hr_qp(ibqp); 182 + struct hns_roce_wqe_frmr_seg *fseg; 236 183 struct device *dev = hr_dev->dev; 237 184 struct hns_roce_v2_db sq_db; 238 185 struct ib_qp_attr attr; ··· 246 191 int attr_mask; 247 192 u32 tmp_len; 248 193 int ret = 0; 194 + u32 hr_op; 249 195 u8 *smac; 250 196 int nreq; 251 197 int i; ··· 412 356 V2_UD_SEND_WQE_BYTE_40_PORTN_S, 413 357 qp->port); 414 358 359 + roce_set_bit(ud_sq_wqe->byte_40, 360 + V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S, 361 + ah->av.vlan_en ? 1 : 0); 415 362 roce_set_field(ud_sq_wqe->byte_48, 416 363 V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M, 417 364 V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S, ··· 465 406 roce_set_bit(rc_sq_wqe->byte_4, 466 407 V2_RC_SEND_WQE_BYTE_4_OWNER_S, owner_bit); 467 408 409 + wqe += sizeof(struct hns_roce_v2_rc_send_wqe); 468 410 switch (wr->opcode) { 469 411 case IB_WR_RDMA_READ: 470 - roce_set_field(rc_sq_wqe->byte_4, 471 - V2_RC_SEND_WQE_BYTE_4_OPCODE_M, 472 - V2_RC_SEND_WQE_BYTE_4_OPCODE_S, 473 - HNS_ROCE_V2_WQE_OP_RDMA_READ); 412 + hr_op = HNS_ROCE_V2_WQE_OP_RDMA_READ; 474 413 rc_sq_wqe->rkey = 475 414 cpu_to_le32(rdma_wr(wr)->rkey); 476 415 rc_sq_wqe->va = 477 416 cpu_to_le64(rdma_wr(wr)->remote_addr); 478 417 break; 479 418 case IB_WR_RDMA_WRITE: 480 - roce_set_field(rc_sq_wqe->byte_4, 481 - V2_RC_SEND_WQE_BYTE_4_OPCODE_M, 482 - V2_RC_SEND_WQE_BYTE_4_OPCODE_S, 483 - HNS_ROCE_V2_WQE_OP_RDMA_WRITE); 419 + hr_op = HNS_ROCE_V2_WQE_OP_RDMA_WRITE; 484 420 rc_sq_wqe->rkey = 485 421 cpu_to_le32(rdma_wr(wr)->rkey); 486 422 rc_sq_wqe->va = 487 423 cpu_to_le64(rdma_wr(wr)->remote_addr); 488 424 break; 489 425 case IB_WR_RDMA_WRITE_WITH_IMM: 490 - roce_set_field(rc_sq_wqe->byte_4, 491 - V2_RC_SEND_WQE_BYTE_4_OPCODE_M, 492 - V2_RC_SEND_WQE_BYTE_4_OPCODE_S, 493 - HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM); 426 + hr_op = HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM; 494 427 rc_sq_wqe->rkey = 495 428 cpu_to_le32(rdma_wr(wr)->rkey); 496 429 rc_sq_wqe->va = 497 430 cpu_to_le64(rdma_wr(wr)->remote_addr); 498 431 break; 499 432 case IB_WR_SEND: 500 - roce_set_field(rc_sq_wqe->byte_4, 501 - V2_RC_SEND_WQE_BYTE_4_OPCODE_M, 502 - V2_RC_SEND_WQE_BYTE_4_OPCODE_S, 503 - HNS_ROCE_V2_WQE_OP_SEND); 433 + hr_op = HNS_ROCE_V2_WQE_OP_SEND; 504 434 break; 505 435 case IB_WR_SEND_WITH_INV: 506 - roce_set_field(rc_sq_wqe->byte_4, 507 - V2_RC_SEND_WQE_BYTE_4_OPCODE_M, 508 - V2_RC_SEND_WQE_BYTE_4_OPCODE_S, 509 - HNS_ROCE_V2_WQE_OP_SEND_WITH_INV); 436 + hr_op = HNS_ROCE_V2_WQE_OP_SEND_WITH_INV; 510 437 break; 511 438 case IB_WR_SEND_WITH_IMM: 512 - roce_set_field(rc_sq_wqe->byte_4, 513 - V2_RC_SEND_WQE_BYTE_4_OPCODE_M, 514 - V2_RC_SEND_WQE_BYTE_4_OPCODE_S, 515 - HNS_ROCE_V2_WQE_OP_SEND_WITH_IMM); 439 + hr_op = HNS_ROCE_V2_WQE_OP_SEND_WITH_IMM; 516 440 break; 517 441 case IB_WR_LOCAL_INV: 518 - roce_set_field(rc_sq_wqe->byte_4, 519 - V2_RC_SEND_WQE_BYTE_4_OPCODE_M, 520 - V2_RC_SEND_WQE_BYTE_4_OPCODE_S, 521 - HNS_ROCE_V2_WQE_OP_LOCAL_INV); 442 + hr_op = HNS_ROCE_V2_WQE_OP_LOCAL_INV; 443 + roce_set_bit(rc_sq_wqe->byte_4, 444 + V2_RC_SEND_WQE_BYTE_4_SO_S, 1); 445 + rc_sq_wqe->inv_key = 446 + cpu_to_le32(wr->ex.invalidate_rkey); 447 + break; 448 + case IB_WR_REG_MR: 449 + hr_op = HNS_ROCE_V2_WQE_OP_FAST_REG_PMR; 450 + fseg = wqe; 451 + set_frmr_seg(rc_sq_wqe, fseg, reg_wr(wr)); 522 452 break; 523 453 case IB_WR_ATOMIC_CMP_AND_SWP: 524 - roce_set_field(rc_sq_wqe->byte_4, 525 - V2_RC_SEND_WQE_BYTE_4_OPCODE_M, 526 - V2_RC_SEND_WQE_BYTE_4_OPCODE_S, 527 - HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP); 454 + hr_op = HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP; 455 + rc_sq_wqe->rkey = 456 + cpu_to_le32(atomic_wr(wr)->rkey); 457 + rc_sq_wqe->va = 458 + cpu_to_le64(atomic_wr(wr)->remote_addr); 528 459 break; 529 460 case IB_WR_ATOMIC_FETCH_AND_ADD: 530 - roce_set_field(rc_sq_wqe->byte_4, 531 - V2_RC_SEND_WQE_BYTE_4_OPCODE_M, 532 - V2_RC_SEND_WQE_BYTE_4_OPCODE_S, 533 - HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD); 461 + hr_op = HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD; 462 + rc_sq_wqe->rkey = 463 + cpu_to_le32(atomic_wr(wr)->rkey); 464 + rc_sq_wqe->va = 465 + cpu_to_le64(atomic_wr(wr)->remote_addr); 534 466 break; 535 467 case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: 536 - roce_set_field(rc_sq_wqe->byte_4, 537 - V2_RC_SEND_WQE_BYTE_4_OPCODE_M, 538 - V2_RC_SEND_WQE_BYTE_4_OPCODE_S, 539 - HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP); 468 + hr_op = 469 + HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP; 540 470 break; 541 471 case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD: 542 - roce_set_field(rc_sq_wqe->byte_4, 543 - V2_RC_SEND_WQE_BYTE_4_OPCODE_M, 544 - V2_RC_SEND_WQE_BYTE_4_OPCODE_S, 545 - HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD); 472 + hr_op = 473 + HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD; 546 474 break; 547 475 default: 548 - roce_set_field(rc_sq_wqe->byte_4, 549 - V2_RC_SEND_WQE_BYTE_4_OPCODE_M, 550 - V2_RC_SEND_WQE_BYTE_4_OPCODE_S, 551 - HNS_ROCE_V2_WQE_OP_MASK); 476 + hr_op = HNS_ROCE_V2_WQE_OP_MASK; 552 477 break; 553 478 } 554 479 555 - wqe += sizeof(struct hns_roce_v2_rc_send_wqe); 480 + roce_set_field(rc_sq_wqe->byte_4, 481 + V2_RC_SEND_WQE_BYTE_4_OPCODE_M, 482 + V2_RC_SEND_WQE_BYTE_4_OPCODE_S, hr_op); 556 483 557 - ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe, wqe, 558 - &sge_ind, bad_wr); 559 - if (ret) 560 - goto out; 484 + if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || 485 + wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) { 486 + struct hns_roce_v2_wqe_data_seg *dseg; 487 + 488 + dseg = wqe; 489 + set_data_seg_v2(dseg, wr->sg_list); 490 + wqe += sizeof(struct hns_roce_v2_wqe_data_seg); 491 + set_atomic_seg(wqe, atomic_wr(wr)); 492 + roce_set_field(rc_sq_wqe->byte_16, 493 + V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, 494 + V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, 495 + wr->num_sge); 496 + } else if (wr->opcode != IB_WR_REG_MR) { 497 + ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe, 498 + wqe, &sge_ind, bad_wr); 499 + if (ret) 500 + goto out; 501 + } 502 + 561 503 ind++; 562 504 } else { 563 505 dev_err(dev, "Illegal qp_type(0x%x)\n", ibqp->qp_type); ··· 995 935 996 936 resp = (struct hns_roce_query_version *)desc.data; 997 937 hr_dev->hw_rev = le32_to_cpu(resp->rocee_hw_version); 998 - hr_dev->vendor_id = le32_to_cpu(resp->rocee_vendor_id); 938 + hr_dev->vendor_id = hr_dev->pci_dev->vendor; 939 + 940 + return 0; 941 + } 942 + 943 + static int hns_roce_query_fw_ver(struct hns_roce_dev *hr_dev) 944 + { 945 + struct hns_roce_query_fw_info *resp; 946 + struct hns_roce_cmq_desc desc; 947 + int ret; 948 + 949 + hns_roce_cmq_setup_basic_desc(&desc, HNS_QUERY_FW_VER, true); 950 + ret = hns_roce_cmq_send(hr_dev, &desc, 1); 951 + if (ret) 952 + return ret; 953 + 954 + resp = (struct hns_roce_query_fw_info *)desc.data; 955 + hr_dev->caps.fw_ver = (u64)(le32_to_cpu(resp->fw_ver)); 999 956 1000 957 return 0; 1001 958 } ··· 1235 1158 1236 1159 ret = hns_roce_cmq_query_hw_info(hr_dev); 1237 1160 if (ret) { 1161 + dev_err(hr_dev->dev, "Query hardware version fail, ret = %d.\n", 1162 + ret); 1163 + return ret; 1164 + } 1165 + 1166 + ret = hns_roce_query_fw_ver(hr_dev); 1167 + if (ret) { 1238 1168 dev_err(hr_dev->dev, "Query firmware version fail, ret = %d.\n", 1239 1169 ret); 1240 1170 return ret; ··· 1269 1185 return ret; 1270 1186 } 1271 1187 1272 - hr_dev->vendor_part_id = 0; 1273 - hr_dev->sys_image_guid = 0; 1188 + 1189 + hr_dev->vendor_part_id = hr_dev->pci_dev->device; 1190 + hr_dev->sys_image_guid = be64_to_cpu(hr_dev->ib_dev.node_guid); 1274 1191 1275 1192 caps->num_qps = HNS_ROCE_V2_MAX_QP_NUM; 1276 1193 caps->max_wqes = HNS_ROCE_V2_MAX_WQE_NUM; 1277 1194 caps->num_cqs = HNS_ROCE_V2_MAX_CQ_NUM; 1278 1195 caps->max_cqes = HNS_ROCE_V2_MAX_CQE_NUM; 1279 1196 caps->max_sq_sg = HNS_ROCE_V2_MAX_SQ_SGE_NUM; 1197 + caps->max_extend_sg = HNS_ROCE_V2_MAX_EXTEND_SGE_NUM; 1280 1198 caps->max_rq_sg = HNS_ROCE_V2_MAX_RQ_SGE_NUM; 1281 1199 caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INLINE; 1282 1200 caps->num_uars = HNS_ROCE_V2_UAR_NUM; ··· 1308 1222 caps->reserved_mrws = 1; 1309 1223 caps->reserved_uars = 0; 1310 1224 caps->reserved_cqs = 0; 1225 + caps->reserved_qps = HNS_ROCE_V2_RSV_QPS; 1311 1226 1312 1227 caps->qpc_ba_pg_sz = 0; 1313 1228 caps->qpc_buf_pg_sz = 0; ··· 1342 1255 HNS_ROCE_CAP_FLAG_RQ_INLINE | 1343 1256 HNS_ROCE_CAP_FLAG_RECORD_DB | 1344 1257 HNS_ROCE_CAP_FLAG_SQ_RECORD_DB; 1258 + 1259 + if (hr_dev->pci_dev->revision == 0x21) 1260 + caps->flags |= HNS_ROCE_CAP_FLAG_MW | 1261 + HNS_ROCE_CAP_FLAG_FRMR; 1262 + 1345 1263 caps->pkey_table_len[0] = 1; 1346 1264 caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM; 1347 1265 caps->ceqe_depth = HNS_ROCE_V2_COMP_EQE_NUM; 1348 1266 caps->aeqe_depth = HNS_ROCE_V2_ASYNC_EQE_NUM; 1349 1267 caps->local_ca_ack_delay = 0; 1350 1268 caps->max_mtu = IB_MTU_4096; 1269 + 1270 + if (hr_dev->pci_dev->revision == 0x21) 1271 + caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC; 1351 1272 1352 1273 ret = hns_roce_v2_set_bt(hr_dev); 1353 1274 if (ret) ··· 1785 1690 1786 1691 roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 0); 1787 1692 roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1); 1788 - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 0); 1693 + roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1); 1789 1694 roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_BIND_EN_S, 1790 1695 (mr->access & IB_ACCESS_MW_BIND ? 1 : 0)); 1791 - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_ATOMIC_EN_S, 0); 1696 + roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_ATOMIC_EN_S, 1697 + mr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0); 1792 1698 roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RR_EN_S, 1793 1699 (mr->access & IB_ACCESS_REMOTE_READ ? 1 : 0)); 1794 1700 roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RW_EN_S, ··· 1909 1813 mr->iova = iova; 1910 1814 mr->size = size; 1911 1815 } 1816 + 1817 + return 0; 1818 + } 1819 + 1820 + static int hns_roce_v2_frmr_write_mtpt(void *mb_buf, struct hns_roce_mr *mr) 1821 + { 1822 + struct hns_roce_v2_mpt_entry *mpt_entry; 1823 + 1824 + mpt_entry = mb_buf; 1825 + memset(mpt_entry, 0, sizeof(*mpt_entry)); 1826 + 1827 + roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M, 1828 + V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE); 1829 + roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M, 1830 + V2_MPT_BYTE_4_PBL_HOP_NUM_S, 1); 1831 + roce_set_field(mpt_entry->byte_4_pd_hop_st, 1832 + V2_MPT_BYTE_4_PBL_BA_PG_SZ_M, 1833 + V2_MPT_BYTE_4_PBL_BA_PG_SZ_S, 1834 + mr->pbl_ba_pg_sz + PG_SHIFT_OFFSET); 1835 + roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, 1836 + V2_MPT_BYTE_4_PD_S, mr->pd); 1837 + 1838 + roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 1); 1839 + roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1); 1840 + roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1); 1841 + 1842 + roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_FRE_S, 1); 1843 + roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0); 1844 + roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 0); 1845 + roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1); 1846 + 1847 + mpt_entry->pbl_size = cpu_to_le32(mr->pbl_size); 1848 + 1849 + mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(mr->pbl_ba >> 3)); 1850 + roce_set_field(mpt_entry->byte_48_mode_ba, V2_MPT_BYTE_48_PBL_BA_H_M, 1851 + V2_MPT_BYTE_48_PBL_BA_H_S, 1852 + upper_32_bits(mr->pbl_ba >> 3)); 1853 + 1854 + roce_set_field(mpt_entry->byte_64_buf_pa1, 1855 + V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M, 1856 + V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S, 1857 + mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET); 1858 + 1859 + return 0; 1860 + } 1861 + 1862 + static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw) 1863 + { 1864 + struct hns_roce_v2_mpt_entry *mpt_entry; 1865 + 1866 + mpt_entry = mb_buf; 1867 + memset(mpt_entry, 0, sizeof(*mpt_entry)); 1868 + 1869 + roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M, 1870 + V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE); 1871 + roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, 1872 + V2_MPT_BYTE_4_PD_S, mw->pdn); 1873 + roce_set_field(mpt_entry->byte_4_pd_hop_st, 1874 + V2_MPT_BYTE_4_PBL_HOP_NUM_M, 1875 + V2_MPT_BYTE_4_PBL_HOP_NUM_S, 1876 + mw->pbl_hop_num == HNS_ROCE_HOP_NUM_0 ? 1877 + 0 : mw->pbl_hop_num); 1878 + roce_set_field(mpt_entry->byte_4_pd_hop_st, 1879 + V2_MPT_BYTE_4_PBL_BA_PG_SZ_M, 1880 + V2_MPT_BYTE_4_PBL_BA_PG_SZ_S, 1881 + mw->pbl_ba_pg_sz + PG_SHIFT_OFFSET); 1882 + 1883 + roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1); 1884 + roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1); 1885 + 1886 + roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0); 1887 + roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 1); 1888 + roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1); 1889 + roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BQP_S, 1890 + mw->ibmw.type == IB_MW_TYPE_1 ? 0 : 1); 1891 + 1892 + roce_set_field(mpt_entry->byte_64_buf_pa1, 1893 + V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M, 1894 + V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S, 1895 + mw->pbl_buf_pg_sz + PG_SHIFT_OFFSET); 1896 + 1897 + mpt_entry->lkey = cpu_to_le32(mw->rkey); 1912 1898 1913 1899 return 0; 1914 1900 } ··· 2452 2274 wc->src_qp = (u8)roce_get_field(cqe->byte_32, 2453 2275 V2_CQE_BYTE_32_RMT_QPN_M, 2454 2276 V2_CQE_BYTE_32_RMT_QPN_S); 2277 + wc->slid = 0; 2455 2278 wc->wc_flags |= (roce_get_bit(cqe->byte_32, 2456 2279 V2_CQE_BYTE_32_GRH_S) ? 2457 2280 IB_WC_GRH : 0); ··· 2466 2287 wc->smac[5] = roce_get_field(cqe->byte_28, 2467 2288 V2_CQE_BYTE_28_SMAC_5_M, 2468 2289 V2_CQE_BYTE_28_SMAC_5_S); 2469 - wc->vlan_id = 0xffff; 2290 + if (roce_get_bit(cqe->byte_28, V2_CQE_BYTE_28_VID_VLD_S)) { 2291 + wc->vlan_id = (u16)roce_get_field(cqe->byte_28, 2292 + V2_CQE_BYTE_28_VID_M, 2293 + V2_CQE_BYTE_28_VID_S); 2294 + } else { 2295 + wc->vlan_id = 0xffff; 2296 + } 2297 + 2470 2298 wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC); 2471 2299 wc->network_hdr_type = roce_get_field(cqe->byte_28, 2472 2300 V2_CQE_BYTE_28_PORT_TYPE_M, ··· 2775 2589 roce_set_bit(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_RQ_TX_ERR_S, 0); 2776 2590 roce_set_bit(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_RQ_RX_ERR_S, 0); 2777 2591 2778 - roce_set_field(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_MAPID_M, 2779 - V2_QPC_BYTE_60_MAPID_S, 0); 2592 + roce_set_field(qpc_mask->byte_60_qpst_tempid, V2_QPC_BYTE_60_TEMPID_M, 2593 + V2_QPC_BYTE_60_TEMPID_S, 0); 2780 2594 2781 - roce_set_bit(qpc_mask->byte_60_qpst_mapid, 2782 - V2_QPC_BYTE_60_INNER_MAP_IND_S, 0); 2783 - roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_SQ_MAP_IND_S, 2784 - 0); 2785 - roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_RQ_MAP_IND_S, 2786 - 0); 2787 - roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_EXT_MAP_IND_S, 2788 - 0); 2789 - roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_SQ_RLS_IND_S, 2790 - 0); 2791 - roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_SQ_EXT_IND_S, 2792 - 0); 2595 + roce_set_field(qpc_mask->byte_60_qpst_tempid, 2596 + V2_QPC_BYTE_60_SCC_TOKEN_M, V2_QPC_BYTE_60_SCC_TOKEN_S, 2597 + 0); 2598 + roce_set_bit(qpc_mask->byte_60_qpst_tempid, 2599 + V2_QPC_BYTE_60_SQ_DB_DOING_S, 0); 2600 + roce_set_bit(qpc_mask->byte_60_qpst_tempid, 2601 + V2_QPC_BYTE_60_RQ_DB_DOING_S, 0); 2793 2602 roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CNP_TX_FLAG_S, 0); 2794 2603 roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CE_FLAG_S, 0); 2795 2604 ··· 2866 2685 roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_TAIL_MAX_M, 2867 2686 V2_QPC_BYTE_132_TRRL_TAIL_MAX_S, 0); 2868 2687 2869 - roce_set_bit(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RSVD_RAQ_MAP_S, 0); 2688 + roce_set_bit(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RQ_RTY_WAIT_DO_S, 2689 + 0); 2870 2690 roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RAQ_TRRL_HEAD_M, 2871 2691 V2_QPC_BYTE_140_RAQ_TRRL_HEAD_S, 0); 2872 2692 roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RAQ_TRRL_TAIL_M, ··· 2876 2694 roce_set_field(qpc_mask->byte_144_raq, 2877 2695 V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_M, 2878 2696 V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_S, 0); 2879 - roce_set_bit(qpc_mask->byte_144_raq, V2_QPC_BYTE_144_RAQ_RTY_INI_IND_S, 2880 - 0); 2881 2697 roce_set_field(qpc_mask->byte_144_raq, V2_QPC_BYTE_144_RAQ_CREDIT_M, 2882 2698 V2_QPC_BYTE_144_RAQ_CREDIT_S, 0); 2883 2699 roce_set_bit(qpc_mask->byte_144_raq, V2_QPC_BYTE_144_RESP_RTY_FLG_S, 0); ··· 2901 2721 V2_QPC_BYTE_160_SQ_CONSUMER_IDX_M, 2902 2722 V2_QPC_BYTE_160_SQ_CONSUMER_IDX_S, 0); 2903 2723 2904 - roce_set_field(context->byte_168_irrl_idx, 2905 - V2_QPC_BYTE_168_SQ_SHIFT_BAK_M, 2906 - V2_QPC_BYTE_168_SQ_SHIFT_BAK_S, 2907 - ilog2((unsigned int)hr_qp->sq.wqe_cnt)); 2908 - roce_set_field(qpc_mask->byte_168_irrl_idx, 2909 - V2_QPC_BYTE_168_SQ_SHIFT_BAK_M, 2910 - V2_QPC_BYTE_168_SQ_SHIFT_BAK_S, 0); 2911 - 2724 + roce_set_bit(qpc_mask->byte_168_irrl_idx, 2725 + V2_QPC_BYTE_168_POLL_DB_WAIT_DO_S, 0); 2726 + roce_set_bit(qpc_mask->byte_168_irrl_idx, 2727 + V2_QPC_BYTE_168_SCC_TOKEN_FORBID_SQ_DEQ_S, 0); 2728 + roce_set_bit(qpc_mask->byte_168_irrl_idx, 2729 + V2_QPC_BYTE_168_WAIT_ACK_TIMEOUT_S, 0); 2912 2730 roce_set_bit(qpc_mask->byte_168_irrl_idx, 2913 2731 V2_QPC_BYTE_168_MSG_RTY_LP_FLG_S, 0); 2914 2732 roce_set_bit(qpc_mask->byte_168_irrl_idx, ··· 2923 2745 2924 2746 roce_set_bit(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_MSG_RNR_FLG_S, 2925 2747 0); 2748 + 2749 + roce_set_bit(context->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 1); 2750 + roce_set_bit(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 0); 2926 2751 2927 2752 roce_set_field(qpc_mask->byte_176_msg_pktn, 2928 2753 V2_QPC_BYTE_176_MSG_USE_PKTN_M, ··· 2970 2789 roce_set_field(qpc_mask->byte_232_irrl_sge, 2971 2790 V2_QPC_BYTE_232_IRRL_SGE_IDX_M, 2972 2791 V2_QPC_BYTE_232_IRRL_SGE_IDX_S, 0); 2792 + 2793 + roce_set_bit(qpc_mask->byte_232_irrl_sge, V2_QPC_BYTE_232_SO_LP_VLD_S, 2794 + 0); 2795 + roce_set_bit(qpc_mask->byte_232_irrl_sge, 2796 + V2_QPC_BYTE_232_FENCE_LP_VLD_S, 0); 2797 + roce_set_bit(qpc_mask->byte_232_irrl_sge, V2_QPC_BYTE_232_IRRL_LP_VLD_S, 2798 + 0); 2973 2799 2974 2800 qpc_mask->irrl_cur_sge_offset = 0; 2975 2801 ··· 3143 2955 roce_set_field(qpc_mask->byte_56_dqpn_err, 3144 2956 V2_QPC_BYTE_56_DQPN_M, V2_QPC_BYTE_56_DQPN_S, 0); 3145 2957 } 3146 - roce_set_field(context->byte_168_irrl_idx, 3147 - V2_QPC_BYTE_168_SQ_SHIFT_BAK_M, 3148 - V2_QPC_BYTE_168_SQ_SHIFT_BAK_S, 3149 - ilog2((unsigned int)hr_qp->sq.wqe_cnt)); 3150 - roce_set_field(qpc_mask->byte_168_irrl_idx, 3151 - V2_QPC_BYTE_168_SQ_SHIFT_BAK_M, 3152 - V2_QPC_BYTE_168_SQ_SHIFT_BAK_S, 0); 3153 2958 } 3154 2959 3155 2960 static int modify_qp_init_to_rtr(struct ib_qp *ibqp, ··· 3452 3271 * we should set all bits of the relevant fields in context mask to 3453 3272 * 0 at the same time, else set them to 0x1. 3454 3273 */ 3455 - roce_set_field(context->byte_60_qpst_mapid, 3456 - V2_QPC_BYTE_60_RTY_NUM_INI_BAK_M, 3457 - V2_QPC_BYTE_60_RTY_NUM_INI_BAK_S, attr->retry_cnt); 3458 - roce_set_field(qpc_mask->byte_60_qpst_mapid, 3459 - V2_QPC_BYTE_60_RTY_NUM_INI_BAK_M, 3460 - V2_QPC_BYTE_60_RTY_NUM_INI_BAK_S, 0); 3461 - 3462 3274 context->sq_cur_blk_addr = (u32)(mtts[0] >> PAGE_ADDR_SHIFT); 3463 3275 roce_set_field(context->byte_168_irrl_idx, 3464 3276 V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_M, ··· 3712 3538 memcpy(src_mac, gid_attr->ndev->dev_addr, ETH_ALEN); 3713 3539 } 3714 3540 3541 + if (is_vlan_dev(gid_attr->ndev)) { 3542 + roce_set_bit(context->byte_76_srqn_op_en, 3543 + V2_QPC_BYTE_76_RQ_VLAN_EN_S, 1); 3544 + roce_set_bit(qpc_mask->byte_76_srqn_op_en, 3545 + V2_QPC_BYTE_76_RQ_VLAN_EN_S, 0); 3546 + roce_set_bit(context->byte_168_irrl_idx, 3547 + V2_QPC_BYTE_168_SQ_VLAN_EN_S, 1); 3548 + roce_set_bit(qpc_mask->byte_168_irrl_idx, 3549 + V2_QPC_BYTE_168_SQ_VLAN_EN_S, 0); 3550 + } 3551 + 3715 3552 roce_set_field(context->byte_24_mtu_tc, 3716 3553 V2_QPC_BYTE_24_VLAN_ID_M, 3717 3554 V2_QPC_BYTE_24_VLAN_ID_S, vlan); ··· 3769 3584 V2_QPC_BYTE_24_HOP_LIMIT_M, 3770 3585 V2_QPC_BYTE_24_HOP_LIMIT_S, 0); 3771 3586 3772 - roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, 3773 - V2_QPC_BYTE_24_TC_S, grh->traffic_class); 3587 + if (hr_dev->pci_dev->revision == 0x21 && 3588 + gid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) 3589 + roce_set_field(context->byte_24_mtu_tc, 3590 + V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S, 3591 + grh->traffic_class >> 2); 3592 + else 3593 + roce_set_field(context->byte_24_mtu_tc, 3594 + V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S, 3595 + grh->traffic_class); 3774 3596 roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, 3775 3597 V2_QPC_BYTE_24_TC_S, 0); 3776 3598 roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M, ··· 3798 3606 set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask); 3799 3607 3800 3608 /* Every status migrate must change state */ 3801 - roce_set_field(context->byte_60_qpst_mapid, V2_QPC_BYTE_60_QP_ST_M, 3609 + roce_set_field(context->byte_60_qpst_tempid, V2_QPC_BYTE_60_QP_ST_M, 3802 3610 V2_QPC_BYTE_60_QP_ST_S, new_state); 3803 - roce_set_field(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_QP_ST_M, 3611 + roce_set_field(qpc_mask->byte_60_qpst_tempid, V2_QPC_BYTE_60_QP_ST_M, 3804 3612 V2_QPC_BYTE_60_QP_ST_S, 0); 3805 3613 3806 3614 /* SW pass context to HW */ ··· 3920 3728 goto out; 3921 3729 } 3922 3730 3923 - state = roce_get_field(context->byte_60_qpst_mapid, 3731 + state = roce_get_field(context->byte_60_qpst_tempid, 3924 3732 V2_QPC_BYTE_60_QP_ST_M, V2_QPC_BYTE_60_QP_ST_S); 3925 3733 tmp_qp_state = to_ib_qp_st((enum hns_roce_v2_qp_state)state); 3926 3734 if (tmp_qp_state == -1) { ··· 4187 3995 { 4188 3996 struct hns_roce_work *irq_work = 4189 3997 container_of(work, struct hns_roce_work, work); 3998 + struct device *dev = irq_work->hr_dev->dev; 4190 3999 u32 qpn = irq_work->qpn; 4000 + u32 cqn = irq_work->cqn; 4191 4001 4192 4002 switch (irq_work->event_type) { 4003 + case HNS_ROCE_EVENT_TYPE_PATH_MIG: 4004 + dev_info(dev, "Path migrated succeeded.\n"); 4005 + break; 4006 + case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED: 4007 + dev_warn(dev, "Path migration failed.\n"); 4008 + break; 4009 + case HNS_ROCE_EVENT_TYPE_COMM_EST: 4010 + dev_info(dev, "Communication established.\n"); 4011 + break; 4012 + case HNS_ROCE_EVENT_TYPE_SQ_DRAINED: 4013 + dev_warn(dev, "Send queue drained.\n"); 4014 + break; 4193 4015 case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: 4194 - case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: 4195 - case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: 4016 + dev_err(dev, "Local work queue catastrophic error.\n"); 4196 4017 hns_roce_set_qps_to_err(irq_work->hr_dev, qpn); 4018 + switch (irq_work->sub_type) { 4019 + case HNS_ROCE_LWQCE_QPC_ERROR: 4020 + dev_err(dev, "QP %d, QPC error.\n", qpn); 4021 + break; 4022 + case HNS_ROCE_LWQCE_MTU_ERROR: 4023 + dev_err(dev, "QP %d, MTU error.\n", qpn); 4024 + break; 4025 + case HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR: 4026 + dev_err(dev, "QP %d, WQE BA addr error.\n", qpn); 4027 + break; 4028 + case HNS_ROCE_LWQCE_WQE_ADDR_ERROR: 4029 + dev_err(dev, "QP %d, WQE addr error.\n", qpn); 4030 + break; 4031 + case HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR: 4032 + dev_err(dev, "QP %d, WQE shift error.\n", qpn); 4033 + break; 4034 + default: 4035 + dev_err(dev, "Unhandled sub_event type %d.\n", 4036 + irq_work->sub_type); 4037 + break; 4038 + } 4039 + break; 4040 + case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: 4041 + dev_err(dev, "Invalid request local work queue error.\n"); 4042 + hns_roce_set_qps_to_err(irq_work->hr_dev, qpn); 4043 + break; 4044 + case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: 4045 + dev_err(dev, "Local access violation work queue error.\n"); 4046 + hns_roce_set_qps_to_err(irq_work->hr_dev, qpn); 4047 + switch (irq_work->sub_type) { 4048 + case HNS_ROCE_LAVWQE_R_KEY_VIOLATION: 4049 + dev_err(dev, "QP %d, R_key violation.\n", qpn); 4050 + break; 4051 + case HNS_ROCE_LAVWQE_LENGTH_ERROR: 4052 + dev_err(dev, "QP %d, length error.\n", qpn); 4053 + break; 4054 + case HNS_ROCE_LAVWQE_VA_ERROR: 4055 + dev_err(dev, "QP %d, VA error.\n", qpn); 4056 + break; 4057 + case HNS_ROCE_LAVWQE_PD_ERROR: 4058 + dev_err(dev, "QP %d, PD error.\n", qpn); 4059 + break; 4060 + case HNS_ROCE_LAVWQE_RW_ACC_ERROR: 4061 + dev_err(dev, "QP %d, rw acc error.\n", qpn); 4062 + break; 4063 + case HNS_ROCE_LAVWQE_KEY_STATE_ERROR: 4064 + dev_err(dev, "QP %d, key state error.\n", qpn); 4065 + break; 4066 + case HNS_ROCE_LAVWQE_MR_OPERATION_ERROR: 4067 + dev_err(dev, "QP %d, MR operation error.\n", qpn); 4068 + break; 4069 + default: 4070 + dev_err(dev, "Unhandled sub_event type %d.\n", 4071 + irq_work->sub_type); 4072 + break; 4073 + } 4074 + break; 4075 + case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: 4076 + dev_warn(dev, "SRQ limit reach.\n"); 4077 + break; 4078 + case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: 4079 + dev_warn(dev, "SRQ last wqe reach.\n"); 4080 + break; 4081 + case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR: 4082 + dev_err(dev, "SRQ catas error.\n"); 4083 + break; 4084 + case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: 4085 + dev_err(dev, "CQ 0x%x access err.\n", cqn); 4086 + break; 4087 + case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: 4088 + dev_warn(dev, "CQ 0x%x overflow\n", cqn); 4089 + break; 4090 + case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: 4091 + dev_warn(dev, "DB overflow.\n"); 4092 + break; 4093 + case HNS_ROCE_EVENT_TYPE_FLR: 4094 + dev_warn(dev, "Function level reset.\n"); 4197 4095 break; 4198 4096 default: 4199 4097 break; ··· 4293 4011 } 4294 4012 4295 4013 static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev, 4296 - struct hns_roce_eq *eq, u32 qpn) 4014 + struct hns_roce_eq *eq, 4015 + u32 qpn, u32 cqn) 4297 4016 { 4298 4017 struct hns_roce_work *irq_work; 4299 4018 ··· 4305 4022 INIT_WORK(&(irq_work->work), hns_roce_irq_work_handle); 4306 4023 irq_work->hr_dev = hr_dev; 4307 4024 irq_work->qpn = qpn; 4025 + irq_work->cqn = cqn; 4308 4026 irq_work->event_type = eq->event_type; 4309 4027 irq_work->sub_type = eq->sub_type; 4310 4028 queue_work(hr_dev->irq_workq, &(irq_work->work)); ··· 4340 4056 (eq->cons_index & HNS_ROCE_V2_CONS_IDX_M)); 4341 4057 4342 4058 hns_roce_write64_k(doorbell, eq->doorbell); 4343 - } 4344 - 4345 - static void hns_roce_v2_wq_catas_err_handle(struct hns_roce_dev *hr_dev, 4346 - struct hns_roce_aeqe *aeqe, 4347 - u32 qpn) 4348 - { 4349 - struct device *dev = hr_dev->dev; 4350 - int sub_type; 4351 - 4352 - dev_warn(dev, "Local work queue catastrophic error.\n"); 4353 - sub_type = roce_get_field(aeqe->asyn, HNS_ROCE_V2_AEQE_SUB_TYPE_M, 4354 - HNS_ROCE_V2_AEQE_SUB_TYPE_S); 4355 - switch (sub_type) { 4356 - case HNS_ROCE_LWQCE_QPC_ERROR: 4357 - dev_warn(dev, "QP %d, QPC error.\n", qpn); 4358 - break; 4359 - case HNS_ROCE_LWQCE_MTU_ERROR: 4360 - dev_warn(dev, "QP %d, MTU error.\n", qpn); 4361 - break; 4362 - case HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR: 4363 - dev_warn(dev, "QP %d, WQE BA addr error.\n", qpn); 4364 - break; 4365 - case HNS_ROCE_LWQCE_WQE_ADDR_ERROR: 4366 - dev_warn(dev, "QP %d, WQE addr error.\n", qpn); 4367 - break; 4368 - case HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR: 4369 - dev_warn(dev, "QP %d, WQE shift error.\n", qpn); 4370 - break; 4371 - default: 4372 - dev_err(dev, "Unhandled sub_event type %d.\n", sub_type); 4373 - break; 4374 - } 4375 - } 4376 - 4377 - static void hns_roce_v2_local_wq_access_err_handle(struct hns_roce_dev *hr_dev, 4378 - struct hns_roce_aeqe *aeqe, u32 qpn) 4379 - { 4380 - struct device *dev = hr_dev->dev; 4381 - int sub_type; 4382 - 4383 - dev_warn(dev, "Local access violation work queue error.\n"); 4384 - sub_type = roce_get_field(aeqe->asyn, HNS_ROCE_V2_AEQE_SUB_TYPE_M, 4385 - HNS_ROCE_V2_AEQE_SUB_TYPE_S); 4386 - switch (sub_type) { 4387 - case HNS_ROCE_LAVWQE_R_KEY_VIOLATION: 4388 - dev_warn(dev, "QP %d, R_key violation.\n", qpn); 4389 - break; 4390 - case HNS_ROCE_LAVWQE_LENGTH_ERROR: 4391 - dev_warn(dev, "QP %d, length error.\n", qpn); 4392 - break; 4393 - case HNS_ROCE_LAVWQE_VA_ERROR: 4394 - dev_warn(dev, "QP %d, VA error.\n", qpn); 4395 - break; 4396 - case HNS_ROCE_LAVWQE_PD_ERROR: 4397 - dev_err(dev, "QP %d, PD error.\n", qpn); 4398 - break; 4399 - case HNS_ROCE_LAVWQE_RW_ACC_ERROR: 4400 - dev_warn(dev, "QP %d, rw acc error.\n", qpn); 4401 - break; 4402 - case HNS_ROCE_LAVWQE_KEY_STATE_ERROR: 4403 - dev_warn(dev, "QP %d, key state error.\n", qpn); 4404 - break; 4405 - case HNS_ROCE_LAVWQE_MR_OPERATION_ERROR: 4406 - dev_warn(dev, "QP %d, MR operation error.\n", qpn); 4407 - break; 4408 - default: 4409 - dev_err(dev, "Unhandled sub_event type %d.\n", sub_type); 4410 - break; 4411 - } 4412 - } 4413 - 4414 - static void hns_roce_v2_qp_err_handle(struct hns_roce_dev *hr_dev, 4415 - struct hns_roce_aeqe *aeqe, 4416 - int event_type, u32 qpn) 4417 - { 4418 - struct device *dev = hr_dev->dev; 4419 - 4420 - switch (event_type) { 4421 - case HNS_ROCE_EVENT_TYPE_COMM_EST: 4422 - dev_warn(dev, "Communication established.\n"); 4423 - break; 4424 - case HNS_ROCE_EVENT_TYPE_SQ_DRAINED: 4425 - dev_warn(dev, "Send queue drained.\n"); 4426 - break; 4427 - case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: 4428 - hns_roce_v2_wq_catas_err_handle(hr_dev, aeqe, qpn); 4429 - break; 4430 - case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: 4431 - dev_warn(dev, "Invalid request local work queue error.\n"); 4432 - break; 4433 - case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: 4434 - hns_roce_v2_local_wq_access_err_handle(hr_dev, aeqe, qpn); 4435 - break; 4436 - default: 4437 - break; 4438 - } 4439 - 4440 - hns_roce_qp_event(hr_dev, qpn, event_type); 4441 - } 4442 - 4443 - static void hns_roce_v2_cq_err_handle(struct hns_roce_dev *hr_dev, 4444 - struct hns_roce_aeqe *aeqe, 4445 - int event_type, u32 cqn) 4446 - { 4447 - struct device *dev = hr_dev->dev; 4448 - 4449 - switch (event_type) { 4450 - case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: 4451 - dev_warn(dev, "CQ 0x%x access err.\n", cqn); 4452 - break; 4453 - case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: 4454 - dev_warn(dev, "CQ 0x%x overflow\n", cqn); 4455 - break; 4456 - default: 4457 - break; 4458 - } 4459 - 4460 - hns_roce_cq_event(hr_dev, cqn, event_type); 4461 4059 } 4462 4060 4463 4061 static struct hns_roce_aeqe *get_aeqe_v2(struct hns_roce_eq *eq, u32 entry) ··· 4417 4251 4418 4252 switch (event_type) { 4419 4253 case HNS_ROCE_EVENT_TYPE_PATH_MIG: 4420 - dev_warn(dev, "Path migrated succeeded.\n"); 4421 - break; 4422 4254 case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED: 4423 - dev_warn(dev, "Path migration failed.\n"); 4424 - break; 4425 4255 case HNS_ROCE_EVENT_TYPE_COMM_EST: 4426 4256 case HNS_ROCE_EVENT_TYPE_SQ_DRAINED: 4427 4257 case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: 4428 4258 case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: 4429 4259 case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: 4430 - hns_roce_v2_qp_err_handle(hr_dev, aeqe, event_type, 4431 - qpn); 4260 + hns_roce_qp_event(hr_dev, qpn, event_type); 4432 4261 break; 4433 4262 case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: 4434 4263 case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: 4435 4264 case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR: 4436 - dev_warn(dev, "SRQ not support.\n"); 4437 4265 break; 4438 4266 case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: 4439 4267 case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: 4440 - hns_roce_v2_cq_err_handle(hr_dev, aeqe, event_type, 4441 - cqn); 4268 + hns_roce_cq_event(hr_dev, cqn, event_type); 4442 4269 break; 4443 4270 case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: 4444 - dev_warn(dev, "DB overflow.\n"); 4445 4271 break; 4446 4272 case HNS_ROCE_EVENT_TYPE_MB: 4447 4273 hns_roce_cmd_event(hr_dev, ··· 4442 4284 le64_to_cpu(aeqe->event.cmd.out_param)); 4443 4285 break; 4444 4286 case HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW: 4445 - dev_warn(dev, "CEQ overflow.\n"); 4446 4287 break; 4447 4288 case HNS_ROCE_EVENT_TYPE_FLR: 4448 - dev_warn(dev, "Function level reset.\n"); 4449 4289 break; 4450 4290 default: 4451 4291 dev_err(dev, "Unhandled event %d on EQ %d at idx %u.\n", ··· 4460 4304 dev_warn(dev, "cons_index overflow, set back to 0.\n"); 4461 4305 eq->cons_index = 0; 4462 4306 } 4463 - hns_roce_v2_init_irq_work(hr_dev, eq, qpn); 4307 + hns_roce_v2_init_irq_work(hr_dev, eq, qpn, cqn); 4464 4308 } 4465 4309 4466 4310 set_eq_cons_index_v2(eq); ··· 5281 5125 create_singlethread_workqueue("hns_roce_irq_workqueue"); 5282 5126 if (!hr_dev->irq_workq) { 5283 5127 dev_err(dev, "Create irq workqueue failed!\n"); 5128 + ret = -ENOMEM; 5284 5129 goto err_request_irq_fail; 5285 5130 } 5286 5131 ··· 5352 5195 .set_mac = hns_roce_v2_set_mac, 5353 5196 .write_mtpt = hns_roce_v2_write_mtpt, 5354 5197 .rereg_write_mtpt = hns_roce_v2_rereg_write_mtpt, 5198 + .frmr_write_mtpt = hns_roce_v2_frmr_write_mtpt, 5199 + .mw_write_mtpt = hns_roce_v2_mw_write_mtpt, 5355 5200 .write_cqc = hns_roce_v2_write_cqc, 5356 5201 .set_hem = hns_roce_v2_set_hem, 5357 5202 .clear_hem = hns_roce_v2_clear_hem,
+72 -24
drivers/infiniband/hw/hns/hns_roce_hw_v2.h
··· 50 50 #define HNS_ROCE_V2_MAX_CQE_NUM 0x10000 51 51 #define HNS_ROCE_V2_MAX_RQ_SGE_NUM 0x100 52 52 #define HNS_ROCE_V2_MAX_SQ_SGE_NUM 0xff 53 + #define HNS_ROCE_V2_MAX_EXTEND_SGE_NUM 0x200000 53 54 #define HNS_ROCE_V2_MAX_SQ_INLINE 0x20 54 55 #define HNS_ROCE_V2_UAR_NUM 256 55 56 #define HNS_ROCE_V2_PHY_UAR_NUM 1 ··· 79 78 #define HNS_ROCE_INVALID_LKEY 0x100 80 79 #define HNS_ROCE_CMQ_TX_TIMEOUT 30000 81 80 #define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE 2 81 + #define HNS_ROCE_V2_RSV_QPS 8 82 82 83 83 #define HNS_ROCE_CONTEXT_HOP_NUM 1 84 84 #define HNS_ROCE_MTT_HOP_NUM 1 ··· 203 201 204 202 /* CMQ command */ 205 203 enum hns_roce_opcode_type { 204 + HNS_QUERY_FW_VER = 0x0001, 206 205 HNS_ROCE_OPC_QUERY_HW_VER = 0x8000, 207 206 HNS_ROCE_OPC_CFG_GLOBAL_PARAM = 0x8001, 208 207 HNS_ROCE_OPC_ALLOC_PF_RES = 0x8004, ··· 327 324 328 325 enum{ 329 326 V2_MPT_ST_VALID = 0x1, 327 + V2_MPT_ST_FREE = 0x2, 330 328 }; 331 329 332 330 enum hns_roce_v2_qp_state { ··· 354 350 __le32 dmac; 355 351 __le32 byte_52_udpspn_dmac; 356 352 __le32 byte_56_dqpn_err; 357 - __le32 byte_60_qpst_mapid; 353 + __le32 byte_60_qpst_tempid; 358 354 __le32 qkey_xrcd; 359 355 __le32 byte_68_rq_db; 360 356 __le32 rq_db_record_addr; ··· 496 492 #define V2_QPC_BYTE_56_LP_PKTN_INI_S 28 497 493 #define V2_QPC_BYTE_56_LP_PKTN_INI_M GENMASK(31, 28) 498 494 499 - #define V2_QPC_BYTE_60_MAPID_S 0 500 - #define V2_QPC_BYTE_60_MAPID_M GENMASK(12, 0) 495 + #define V2_QPC_BYTE_60_TEMPID_S 0 496 + #define V2_QPC_BYTE_60_TEMPID_M GENMASK(7, 0) 501 497 502 - #define V2_QPC_BYTE_60_INNER_MAP_IND_S 13 498 + #define V2_QPC_BYTE_60_SCC_TOKEN_S 8 499 + #define V2_QPC_BYTE_60_SCC_TOKEN_M GENMASK(26, 8) 503 500 504 - #define V2_QPC_BYTE_60_SQ_MAP_IND_S 14 501 + #define V2_QPC_BYTE_60_SQ_DB_DOING_S 27 505 502 506 - #define V2_QPC_BYTE_60_RQ_MAP_IND_S 15 507 - 508 - #define V2_QPC_BYTE_60_TEMPID_S 16 509 - #define V2_QPC_BYTE_60_TEMPID_M GENMASK(22, 16) 510 - 511 - #define V2_QPC_BYTE_60_EXT_MAP_IND_S 23 512 - 513 - #define V2_QPC_BYTE_60_RTY_NUM_INI_BAK_S 24 514 - #define V2_QPC_BYTE_60_RTY_NUM_INI_BAK_M GENMASK(26, 24) 515 - 516 - #define V2_QPC_BYTE_60_SQ_RLS_IND_S 27 517 - 518 - #define V2_QPC_BYTE_60_SQ_EXT_IND_S 28 503 + #define V2_QPC_BYTE_60_RQ_DB_DOING_S 28 519 504 520 505 #define V2_QPC_BYTE_60_QP_ST_S 29 521 506 #define V2_QPC_BYTE_60_QP_ST_M GENMASK(31, 29) ··· 527 534 528 535 #define V2_QPC_BYTE_76_RQIE_S 28 529 536 537 + #define V2_QPC_BYTE_76_RQ_VLAN_EN_S 30 530 538 #define V2_QPC_BYTE_80_RX_CQN_S 0 531 539 #define V2_QPC_BYTE_80_RX_CQN_M GENMASK(23, 0) 532 540 ··· 582 588 #define V2_QPC_BYTE_140_RR_MAX_S 12 583 589 #define V2_QPC_BYTE_140_RR_MAX_M GENMASK(14, 12) 584 590 585 - #define V2_QPC_BYTE_140_RSVD_RAQ_MAP_S 15 591 + #define V2_QPC_BYTE_140_RQ_RTY_WAIT_DO_S 15 586 592 587 593 #define V2_QPC_BYTE_140_RAQ_TRRL_HEAD_S 16 588 594 #define V2_QPC_BYTE_140_RAQ_TRRL_HEAD_M GENMASK(23, 16) ··· 592 598 593 599 #define V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_S 0 594 600 #define V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_M GENMASK(23, 0) 595 - 596 - #define V2_QPC_BYTE_144_RAQ_RTY_INI_IND_S 24 597 601 598 602 #define V2_QPC_BYTE_144_RAQ_CREDIT_S 25 599 603 #define V2_QPC_BYTE_144_RAQ_CREDIT_M GENMASK(29, 25) ··· 629 637 #define V2_QPC_BYTE_168_LP_SGEN_INI_S 22 630 638 #define V2_QPC_BYTE_168_LP_SGEN_INI_M GENMASK(23, 22) 631 639 632 - #define V2_QPC_BYTE_168_SQ_SHIFT_BAK_S 24 633 - #define V2_QPC_BYTE_168_SQ_SHIFT_BAK_M GENMASK(27, 24) 634 - 640 + #define V2_QPC_BYTE_168_SQ_VLAN_EN_S 24 641 + #define V2_QPC_BYTE_168_POLL_DB_WAIT_DO_S 25 642 + #define V2_QPC_BYTE_168_SCC_TOKEN_FORBID_SQ_DEQ_S 26 643 + #define V2_QPC_BYTE_168_WAIT_ACK_TIMEOUT_S 27 635 644 #define V2_QPC_BYTE_168_IRRL_IDX_LSB_S 28 636 645 #define V2_QPC_BYTE_168_IRRL_IDX_LSB_M GENMASK(31, 28) 637 646 ··· 718 725 #define V2_QPC_BYTE_232_IRRL_SGE_IDX_S 20 719 726 #define V2_QPC_BYTE_232_IRRL_SGE_IDX_M GENMASK(28, 20) 720 727 728 + #define V2_QPC_BYTE_232_SO_LP_VLD_S 29 729 + #define V2_QPC_BYTE_232_FENCE_LP_VLD_S 30 730 + #define V2_QPC_BYTE_232_IRRL_LP_VLD_S 31 731 + 721 732 #define V2_QPC_BYTE_240_IRRL_TAIL_REAL_S 0 722 733 #define V2_QPC_BYTE_240_IRRL_TAIL_REAL_M GENMASK(7, 0) 723 734 ··· 739 742 740 743 #define V2_QPC_BYTE_244_RNR_CNT_S 27 741 744 #define V2_QPC_BYTE_244_RNR_CNT_M GENMASK(29, 27) 745 + 746 + #define V2_QPC_BYTE_244_LCL_OP_FLG_S 30 747 + #define V2_QPC_BYTE_244_IRRL_RD_FLG_S 31 742 748 743 749 #define V2_QPC_BYTE_248_IRRL_PSN_S 0 744 750 #define V2_QPC_BYTE_248_IRRL_PSN_M GENMASK(23, 0) ··· 818 818 #define V2_CQE_BYTE_28_PORT_TYPE_S 16 819 819 #define V2_CQE_BYTE_28_PORT_TYPE_M GENMASK(17, 16) 820 820 821 + #define V2_CQE_BYTE_28_VID_S 18 822 + #define V2_CQE_BYTE_28_VID_M GENMASK(29, 18) 823 + 824 + #define V2_CQE_BYTE_28_VID_VLD_S 30 825 + 821 826 #define V2_CQE_BYTE_32_RMT_QPN_S 0 822 827 #define V2_CQE_BYTE_32_RMT_QPN_M GENMASK(23, 0) 823 828 ··· 883 878 884 879 #define V2_MPT_BYTE_8_LW_EN_S 7 885 880 881 + #define V2_MPT_BYTE_8_MW_CNT_S 8 882 + #define V2_MPT_BYTE_8_MW_CNT_M GENMASK(31, 8) 883 + 884 + #define V2_MPT_BYTE_12_FRE_S 0 885 + 886 886 #define V2_MPT_BYTE_12_PA_S 1 887 + 888 + #define V2_MPT_BYTE_12_MR_MW_S 4 889 + 890 + #define V2_MPT_BYTE_12_BPD_S 5 891 + 892 + #define V2_MPT_BYTE_12_BQP_S 6 887 893 888 894 #define V2_MPT_BYTE_12_INNER_PA_VLD_S 7 889 895 ··· 1004 988 #define V2_UD_SEND_WQE_BYTE_40_PORTN_S 24 1005 989 #define V2_UD_SEND_WQE_BYTE_40_PORTN_M GENMASK(26, 24) 1006 990 991 + #define V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S 30 992 + 1007 993 #define V2_UD_SEND_WQE_BYTE_40_LBI_S 31 1008 994 1009 995 #define V2_UD_SEND_WQE_DMAC_0_S 0 ··· 1060 1042 1061 1043 #define V2_RC_SEND_WQE_BYTE_4_INLINE_S 12 1062 1044 1045 + #define V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S 19 1046 + 1047 + #define V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S 20 1048 + 1049 + #define V2_RC_FRMR_WQE_BYTE_4_RR_S 21 1050 + 1051 + #define V2_RC_FRMR_WQE_BYTE_4_RW_S 22 1052 + 1053 + #define V2_RC_FRMR_WQE_BYTE_4_LW_S 23 1054 + 1063 1055 #define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_S 0 1064 1056 #define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_M GENMASK(23, 0) 1065 1057 ··· 1078 1050 1079 1051 #define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0 1080 1052 #define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0) 1053 + 1054 + struct hns_roce_wqe_frmr_seg { 1055 + __le32 pbl_size; 1056 + __le32 mode_buf_pg_sz; 1057 + }; 1058 + 1059 + #define V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S 4 1060 + #define V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M GENMASK(7, 4) 1061 + 1062 + #define V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S 8 1081 1063 1082 1064 struct hns_roce_v2_wqe_data_seg { 1083 1065 __le32 len; ··· 1103 1065 struct hns_roce_query_version { 1104 1066 __le16 rocee_vendor_id; 1105 1067 __le16 rocee_hw_version; 1068 + __le32 rsv[5]; 1069 + }; 1070 + 1071 + struct hns_roce_query_fw_info { 1072 + __le32 fw_ver; 1106 1073 __le32 rsv[5]; 1107 1074 }; 1108 1075 ··· 1606 1563 1607 1564 #define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S 0 1608 1565 #define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M GENMASK(23, 0) 1566 + 1567 + struct hns_roce_wqe_atomic_seg { 1568 + __le64 fetchadd_swap_data; 1569 + __le64 cmp_data; 1570 + }; 1609 1571 1610 1572 #endif
+40 -83
drivers/infiniband/hw/hns/hns_roce_main.c
··· 196 196 197 197 memset(props, 0, sizeof(*props)); 198 198 199 + props->fw_ver = hr_dev->caps.fw_ver; 199 200 props->sys_image_guid = cpu_to_be64(hr_dev->sys_image_guid); 200 201 props->max_mr_size = (u64)(~(0ULL)); 201 202 props->page_size_cap = hr_dev->caps.page_size_cap; ··· 216 215 props->max_pd = hr_dev->caps.num_pds; 217 216 props->max_qp_rd_atom = hr_dev->caps.max_qp_dest_rdma; 218 217 props->max_qp_init_rd_atom = hr_dev->caps.max_qp_init_rdma; 219 - props->atomic_cap = IB_ATOMIC_NONE; 218 + props->atomic_cap = hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_ATOMIC ? 219 + IB_ATOMIC_HCA : IB_ATOMIC_NONE; 220 220 props->max_pkeys = 1; 221 221 props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay; 222 222 ··· 346 344 if (ret) 347 345 goto error_fail_uar_alloc; 348 346 349 - INIT_LIST_HEAD(&context->vma_list); 350 - mutex_init(&context->vma_list_mutex); 351 347 if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) { 352 348 INIT_LIST_HEAD(&context->page_list); 353 349 mutex_init(&context->page_mutex); ··· 376 376 return 0; 377 377 } 378 378 379 - static void hns_roce_vma_open(struct vm_area_struct *vma) 380 - { 381 - vma->vm_ops = NULL; 382 - } 383 - 384 - static void hns_roce_vma_close(struct vm_area_struct *vma) 385 - { 386 - struct hns_roce_vma_data *vma_data; 387 - 388 - vma_data = (struct hns_roce_vma_data *)vma->vm_private_data; 389 - vma_data->vma = NULL; 390 - mutex_lock(vma_data->vma_list_mutex); 391 - list_del(&vma_data->list); 392 - mutex_unlock(vma_data->vma_list_mutex); 393 - kfree(vma_data); 394 - } 395 - 396 - static const struct vm_operations_struct hns_roce_vm_ops = { 397 - .open = hns_roce_vma_open, 398 - .close = hns_roce_vma_close, 399 - }; 400 - 401 - static int hns_roce_set_vma_data(struct vm_area_struct *vma, 402 - struct hns_roce_ucontext *context) 403 - { 404 - struct list_head *vma_head = &context->vma_list; 405 - struct hns_roce_vma_data *vma_data; 406 - 407 - vma_data = kzalloc(sizeof(*vma_data), GFP_KERNEL); 408 - if (!vma_data) 409 - return -ENOMEM; 410 - 411 - vma_data->vma = vma; 412 - vma_data->vma_list_mutex = &context->vma_list_mutex; 413 - vma->vm_private_data = vma_data; 414 - vma->vm_ops = &hns_roce_vm_ops; 415 - 416 - mutex_lock(&context->vma_list_mutex); 417 - list_add(&vma_data->list, vma_head); 418 - mutex_unlock(&context->vma_list_mutex); 419 - 420 - return 0; 421 - } 422 - 423 379 static int hns_roce_mmap(struct ib_ucontext *context, 424 380 struct vm_area_struct *vma) 425 381 { 426 382 struct hns_roce_dev *hr_dev = to_hr_dev(context->device); 427 383 428 - if (((vma->vm_end - vma->vm_start) % PAGE_SIZE) != 0) 429 - return -EINVAL; 384 + switch (vma->vm_pgoff) { 385 + case 0: 386 + return rdma_user_mmap_io(context, vma, 387 + to_hr_ucontext(context)->uar.pfn, 388 + PAGE_SIZE, 389 + pgprot_noncached(vma->vm_page_prot)); 430 390 431 - if (vma->vm_pgoff == 0) { 432 - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 433 - if (io_remap_pfn_range(vma, vma->vm_start, 434 - to_hr_ucontext(context)->uar.pfn, 435 - PAGE_SIZE, vma->vm_page_prot)) 436 - return -EAGAIN; 437 - } else if (vma->vm_pgoff == 1 && hr_dev->tptr_dma_addr && 438 - hr_dev->tptr_size) { 439 - /* vm_pgoff: 1 -- TPTR */ 440 - if (io_remap_pfn_range(vma, vma->vm_start, 441 - hr_dev->tptr_dma_addr >> PAGE_SHIFT, 442 - hr_dev->tptr_size, 443 - vma->vm_page_prot)) 444 - return -EAGAIN; 445 - } else 446 - return -EINVAL; 391 + /* vm_pgoff: 1 -- TPTR */ 392 + case 1: 393 + if (!hr_dev->tptr_dma_addr || !hr_dev->tptr_size) 394 + return -EINVAL; 395 + /* 396 + * FIXME: using io_remap_pfn_range on the dma address returned 397 + * by dma_alloc_coherent is totally wrong. 398 + */ 399 + return rdma_user_mmap_io(context, vma, 400 + hr_dev->tptr_dma_addr >> PAGE_SHIFT, 401 + hr_dev->tptr_size, 402 + vma->vm_page_prot); 447 403 448 - return hns_roce_set_vma_data(vma, to_hr_ucontext(context)); 404 + default: 405 + return -EINVAL; 406 + } 449 407 } 450 408 451 409 static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num, ··· 429 471 430 472 static void hns_roce_disassociate_ucontext(struct ib_ucontext *ibcontext) 431 473 { 432 - struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext); 433 - struct hns_roce_vma_data *vma_data, *n; 434 - struct vm_area_struct *vma; 435 - 436 - mutex_lock(&context->vma_list_mutex); 437 - list_for_each_entry_safe(vma_data, n, &context->vma_list, list) { 438 - vma = vma_data->vma; 439 - zap_vma_ptes(vma, vma->vm_start, PAGE_SIZE); 440 - 441 - vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE); 442 - vma->vm_ops = NULL; 443 - list_del(&vma_data->list); 444 - kfree(vma_data); 445 - } 446 - mutex_unlock(&context->vma_list_mutex); 447 474 } 448 475 449 476 static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev) ··· 451 508 spin_lock_init(&iboe->lock); 452 509 453 510 ib_dev = &hr_dev->ib_dev; 454 - strlcpy(ib_dev->name, "hns_%d", IB_DEVICE_NAME_MAX); 455 511 456 512 ib_dev->owner = THIS_MODULE; 457 513 ib_dev->node_type = RDMA_NODE_IB_CA; ··· 526 584 ib_dev->uverbs_cmd_mask |= (1ULL << IB_USER_VERBS_CMD_REREG_MR); 527 585 } 528 586 587 + /* MW */ 588 + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_MW) { 589 + ib_dev->alloc_mw = hns_roce_alloc_mw; 590 + ib_dev->dealloc_mw = hns_roce_dealloc_mw; 591 + ib_dev->uverbs_cmd_mask |= 592 + (1ULL << IB_USER_VERBS_CMD_ALLOC_MW) | 593 + (1ULL << IB_USER_VERBS_CMD_DEALLOC_MW); 594 + } 595 + 596 + /* FRMR */ 597 + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR) { 598 + ib_dev->alloc_mr = hns_roce_alloc_mr; 599 + ib_dev->map_mr_sg = hns_roce_map_mr_sg; 600 + } 601 + 529 602 /* OTHERS */ 530 603 ib_dev->get_port_immutable = hns_roce_port_immutable; 531 604 ib_dev->disassociate_ucontext = hns_roce_disassociate_ucontext; 532 605 533 606 ib_dev->driver_id = RDMA_DRIVER_HNS; 534 - ret = ib_register_device(ib_dev, NULL); 607 + ret = ib_register_device(ib_dev, "hns_%d", NULL); 535 608 if (ret) { 536 609 dev_err(dev, "ib_register_device failed!\n"); 537 610 return ret;
+204 -8
drivers/infiniband/hw/hns/hns_roce_mr.c
··· 329 329 u64 bt_idx; 330 330 u64 size; 331 331 332 - mhop_num = hr_dev->caps.pbl_hop_num; 332 + mhop_num = (mr->type == MR_TYPE_FRMR ? 1 : hr_dev->caps.pbl_hop_num); 333 333 pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT); 334 334 pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8); 335 335 ··· 351 351 352 352 mr->pbl_size = npages; 353 353 mr->pbl_ba = mr->pbl_dma_addr; 354 - mr->pbl_hop_num = hr_dev->caps.pbl_hop_num; 354 + mr->pbl_hop_num = mhop_num; 355 355 mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz; 356 356 mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz; 357 357 return 0; ··· 511 511 mr->key = hw_index_to_key(index); /* MR key */ 512 512 513 513 if (size == ~0ull) { 514 - mr->type = MR_TYPE_DMA; 515 514 mr->pbl_buf = NULL; 516 515 mr->pbl_dma_addr = 0; 517 516 /* PBL multi-hop addressing parameters */ ··· 521 522 mr->pbl_l1_dma_addr = NULL; 522 523 mr->pbl_l0_dma_addr = 0; 523 524 } else { 524 - mr->type = MR_TYPE_MR; 525 525 if (!hr_dev->caps.pbl_hop_num) { 526 526 mr->pbl_buf = dma_alloc_coherent(dev, npages * 8, 527 527 &(mr->pbl_dma_addr), ··· 546 548 u32 mhop_num; 547 549 u64 bt_idx; 548 550 549 - npages = ib_umem_page_count(mr->umem); 551 + npages = mr->pbl_size; 550 552 pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT); 551 - mhop_num = hr_dev->caps.pbl_hop_num; 553 + mhop_num = (mr->type == MR_TYPE_FRMR) ? 1 : hr_dev->caps.pbl_hop_num; 552 554 553 555 if (mhop_num == HNS_ROCE_HOP_NUM_0) 554 556 return; ··· 634 636 } 635 637 636 638 if (mr->size != ~0ULL) { 637 - npages = ib_umem_page_count(mr->umem); 639 + if (mr->type == MR_TYPE_MR) 640 + npages = ib_umem_page_count(mr->umem); 638 641 639 642 if (!hr_dev->caps.pbl_hop_num) 640 643 dma_free_coherent(dev, (unsigned int)(npages * 8), ··· 673 674 goto err_table; 674 675 } 675 676 676 - ret = hr_dev->hw->write_mtpt(mailbox->buf, mr, mtpt_idx); 677 + if (mr->type != MR_TYPE_FRMR) 678 + ret = hr_dev->hw->write_mtpt(mailbox->buf, mr, mtpt_idx); 679 + else 680 + ret = hr_dev->hw->frmr_write_mtpt(mailbox->buf, mr); 677 681 if (ret) { 678 682 dev_err(dev, "Write mtpt fail!\n"); 679 683 goto err_page; ··· 857 855 if (mr == NULL) 858 856 return ERR_PTR(-ENOMEM); 859 857 858 + mr->type = MR_TYPE_DMA; 859 + 860 860 /* Allocate memory region key */ 861 861 ret = hns_roce_mr_alloc(to_hr_dev(pd->device), to_hr_pd(pd)->pdn, 0, 862 862 ~0ULL, acc, 0, mr); ··· 1035 1031 } 1036 1032 } 1037 1033 1034 + mr->type = MR_TYPE_MR; 1035 + 1038 1036 ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, virt_addr, length, 1039 1037 access_flags, n, mr); 1040 1038 if (ret) ··· 1206 1200 } 1207 1201 1208 1202 return ret; 1203 + } 1204 + 1205 + struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, 1206 + u32 max_num_sg) 1207 + { 1208 + struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); 1209 + struct device *dev = hr_dev->dev; 1210 + struct hns_roce_mr *mr; 1211 + u64 length; 1212 + u32 page_size; 1213 + int ret; 1214 + 1215 + page_size = 1 << (hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT); 1216 + length = max_num_sg * page_size; 1217 + 1218 + if (mr_type != IB_MR_TYPE_MEM_REG) 1219 + return ERR_PTR(-EINVAL); 1220 + 1221 + if (max_num_sg > HNS_ROCE_FRMR_MAX_PA) { 1222 + dev_err(dev, "max_num_sg larger than %d\n", 1223 + HNS_ROCE_FRMR_MAX_PA); 1224 + return ERR_PTR(-EINVAL); 1225 + } 1226 + 1227 + mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1228 + if (!mr) 1229 + return ERR_PTR(-ENOMEM); 1230 + 1231 + mr->type = MR_TYPE_FRMR; 1232 + 1233 + /* Allocate memory region key */ 1234 + ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, 0, length, 1235 + 0, max_num_sg, mr); 1236 + if (ret) 1237 + goto err_free; 1238 + 1239 + ret = hns_roce_mr_enable(hr_dev, mr); 1240 + if (ret) 1241 + goto err_mr; 1242 + 1243 + mr->ibmr.rkey = mr->ibmr.lkey = mr->key; 1244 + mr->umem = NULL; 1245 + 1246 + return &mr->ibmr; 1247 + 1248 + err_mr: 1249 + hns_roce_mr_free(to_hr_dev(pd->device), mr); 1250 + 1251 + err_free: 1252 + kfree(mr); 1253 + return ERR_PTR(ret); 1254 + } 1255 + 1256 + static int hns_roce_set_page(struct ib_mr *ibmr, u64 addr) 1257 + { 1258 + struct hns_roce_mr *mr = to_hr_mr(ibmr); 1259 + 1260 + mr->pbl_buf[mr->npages++] = cpu_to_le64(addr); 1261 + 1262 + return 0; 1263 + } 1264 + 1265 + int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 1266 + unsigned int *sg_offset) 1267 + { 1268 + struct hns_roce_mr *mr = to_hr_mr(ibmr); 1269 + 1270 + mr->npages = 0; 1271 + 1272 + return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page); 1273 + } 1274 + 1275 + static void hns_roce_mw_free(struct hns_roce_dev *hr_dev, 1276 + struct hns_roce_mw *mw) 1277 + { 1278 + struct device *dev = hr_dev->dev; 1279 + int ret; 1280 + 1281 + if (mw->enabled) { 1282 + ret = hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mw->rkey) 1283 + & (hr_dev->caps.num_mtpts - 1)); 1284 + if (ret) 1285 + dev_warn(dev, "MW HW2SW_MPT failed (%d)\n", ret); 1286 + 1287 + hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table, 1288 + key_to_hw_index(mw->rkey)); 1289 + } 1290 + 1291 + hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap, 1292 + key_to_hw_index(mw->rkey), BITMAP_NO_RR); 1293 + } 1294 + 1295 + static int hns_roce_mw_enable(struct hns_roce_dev *hr_dev, 1296 + struct hns_roce_mw *mw) 1297 + { 1298 + struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; 1299 + struct hns_roce_cmd_mailbox *mailbox; 1300 + struct device *dev = hr_dev->dev; 1301 + unsigned long mtpt_idx = key_to_hw_index(mw->rkey); 1302 + int ret; 1303 + 1304 + /* prepare HEM entry memory */ 1305 + ret = hns_roce_table_get(hr_dev, &mr_table->mtpt_table, mtpt_idx); 1306 + if (ret) 1307 + return ret; 1308 + 1309 + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); 1310 + if (IS_ERR(mailbox)) { 1311 + ret = PTR_ERR(mailbox); 1312 + goto err_table; 1313 + } 1314 + 1315 + ret = hr_dev->hw->mw_write_mtpt(mailbox->buf, mw); 1316 + if (ret) { 1317 + dev_err(dev, "MW write mtpt fail!\n"); 1318 + goto err_page; 1319 + } 1320 + 1321 + ret = hns_roce_sw2hw_mpt(hr_dev, mailbox, 1322 + mtpt_idx & (hr_dev->caps.num_mtpts - 1)); 1323 + if (ret) { 1324 + dev_err(dev, "MW sw2hw_mpt failed (%d)\n", ret); 1325 + goto err_page; 1326 + } 1327 + 1328 + mw->enabled = 1; 1329 + 1330 + hns_roce_free_cmd_mailbox(hr_dev, mailbox); 1331 + 1332 + return 0; 1333 + 1334 + err_page: 1335 + hns_roce_free_cmd_mailbox(hr_dev, mailbox); 1336 + 1337 + err_table: 1338 + hns_roce_table_put(hr_dev, &mr_table->mtpt_table, mtpt_idx); 1339 + 1340 + return ret; 1341 + } 1342 + 1343 + struct ib_mw *hns_roce_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type, 1344 + struct ib_udata *udata) 1345 + { 1346 + struct hns_roce_dev *hr_dev = to_hr_dev(ib_pd->device); 1347 + struct hns_roce_mw *mw; 1348 + unsigned long index = 0; 1349 + int ret; 1350 + 1351 + mw = kmalloc(sizeof(*mw), GFP_KERNEL); 1352 + if (!mw) 1353 + return ERR_PTR(-ENOMEM); 1354 + 1355 + /* Allocate a key for mw from bitmap */ 1356 + ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index); 1357 + if (ret) 1358 + goto err_bitmap; 1359 + 1360 + mw->rkey = hw_index_to_key(index); 1361 + 1362 + mw->ibmw.rkey = mw->rkey; 1363 + mw->ibmw.type = type; 1364 + mw->pdn = to_hr_pd(ib_pd)->pdn; 1365 + mw->pbl_hop_num = hr_dev->caps.pbl_hop_num; 1366 + mw->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz; 1367 + mw->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz; 1368 + 1369 + ret = hns_roce_mw_enable(hr_dev, mw); 1370 + if (ret) 1371 + goto err_mw; 1372 + 1373 + return &mw->ibmw; 1374 + 1375 + err_mw: 1376 + hns_roce_mw_free(hr_dev, mw); 1377 + 1378 + err_bitmap: 1379 + kfree(mw); 1380 + 1381 + return ERR_PTR(ret); 1382 + } 1383 + 1384 + int hns_roce_dealloc_mw(struct ib_mw *ibmw) 1385 + { 1386 + struct hns_roce_dev *hr_dev = to_hr_dev(ibmw->device); 1387 + struct hns_roce_mw *mw = to_hr_mw(ibmw); 1388 + 1389 + hns_roce_mw_free(hr_dev, mw); 1390 + kfree(mw); 1391 + 1392 + return 0; 1209 1393 }
+36 -5
drivers/infiniband/hw/hns/hns_roce_qp.c
··· 31 31 * SOFTWARE. 32 32 */ 33 33 34 + #include <linux/pci.h> 34 35 #include <linux/platform_device.h> 35 36 #include <rdma/ib_addr.h> 36 37 #include <rdma/ib_umem.h> ··· 344 343 { 345 344 u32 roundup_sq_stride = roundup_pow_of_two(hr_dev->caps.max_sq_desc_sz); 346 345 u8 max_sq_stride = ilog2(roundup_sq_stride); 346 + u32 ex_sge_num; 347 347 u32 page_size; 348 348 u32 max_cnt; 349 349 ··· 374 372 if (hr_qp->sq.max_gs > 2) 375 373 hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt * 376 374 (hr_qp->sq.max_gs - 2)); 375 + 376 + if ((hr_qp->sq.max_gs > 2) && (hr_dev->pci_dev->revision == 0x20)) { 377 + if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) { 378 + dev_err(hr_dev->dev, 379 + "The extended sge cnt error! sge_cnt=%d\n", 380 + hr_qp->sge.sge_cnt); 381 + return -EINVAL; 382 + } 383 + } 384 + 377 385 hr_qp->sge.sge_shift = 4; 386 + ex_sge_num = hr_qp->sge.sge_cnt; 378 387 379 388 /* Get buf size, SQ and RQ are aligned to page_szie */ 380 389 if (hr_dev->caps.max_sq_sg <= 2) { ··· 399 386 hr_qp->sq.wqe_shift), PAGE_SIZE); 400 387 } else { 401 388 page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); 389 + hr_qp->sge.sge_cnt = 390 + max(page_size / (1 << hr_qp->sge.sge_shift), ex_sge_num); 402 391 hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt << 403 392 hr_qp->rq.wqe_shift), page_size) + 404 393 HNS_ROCE_ALOGN_UP((hr_qp->sge.sge_cnt << ··· 409 394 hr_qp->sq.wqe_shift), page_size); 410 395 411 396 hr_qp->sq.offset = 0; 412 - if (hr_qp->sge.sge_cnt) { 397 + if (ex_sge_num) { 413 398 hr_qp->sge.offset = HNS_ROCE_ALOGN_UP( 414 399 (hr_qp->sq.wqe_cnt << 415 400 hr_qp->sq.wqe_shift), ··· 480 465 hr_qp->sge.sge_shift = 4; 481 466 } 482 467 468 + if ((hr_qp->sq.max_gs > 2) && hr_dev->pci_dev->revision == 0x20) { 469 + if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) { 470 + dev_err(dev, "The extended sge cnt error! sge_cnt=%d\n", 471 + hr_qp->sge.sge_cnt); 472 + return -EINVAL; 473 + } 474 + } 475 + 483 476 /* Get buf size, SQ and RQ are aligned to PAGE_SIZE */ 484 477 page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); 485 478 hr_qp->sq.offset = 0; ··· 495 472 page_size); 496 473 497 474 if (hr_dev->caps.max_sq_sg > 2 && hr_qp->sge.sge_cnt) { 475 + hr_qp->sge.sge_cnt = max(page_size/(1 << hr_qp->sge.sge_shift), 476 + (u32)hr_qp->sge.sge_cnt); 498 477 hr_qp->sge.offset = size; 499 478 size += HNS_ROCE_ALOGN_UP(hr_qp->sge.sge_cnt << 500 479 hr_qp->sge.sge_shift, page_size); ··· 977 952 } 978 953 } 979 954 980 - if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask, 981 - IB_LINK_LAYER_ETHERNET)) { 955 + if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, 956 + attr_mask)) { 982 957 dev_err(dev, "ib_modify_qp_is_ok failed\n"); 983 958 goto out; 984 959 } ··· 1131 1106 { 1132 1107 struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; 1133 1108 int reserved_from_top = 0; 1109 + int reserved_from_bot; 1134 1110 int ret; 1135 1111 1136 1112 spin_lock_init(&qp_table->lock); 1137 1113 INIT_RADIX_TREE(&hr_dev->qp_table_tree, GFP_ATOMIC); 1138 1114 1139 - /* A port include two SQP, six port total 12 */ 1115 + /* In hw v1, a port include two SQP, six ports total 12 */ 1116 + if (hr_dev->caps.max_sq_sg <= 2) 1117 + reserved_from_bot = SQP_NUM; 1118 + else 1119 + reserved_from_bot = hr_dev->caps.reserved_qps; 1120 + 1140 1121 ret = hns_roce_bitmap_init(&qp_table->bitmap, hr_dev->caps.num_qps, 1141 - hr_dev->caps.num_qps - 1, SQP_NUM, 1122 + hr_dev->caps.num_qps - 1, reserved_from_bot, 1142 1123 reserved_from_top); 1143 1124 if (ret) { 1144 1125 dev_err(hr_dev->dev, "qp bitmap init failed!error=%d\n",
+1 -1
drivers/infiniband/hw/i40iw/i40iw_cm.c
··· 1689 1689 unsigned long flags; 1690 1690 1691 1691 rtnl_lock(); 1692 - for_each_netdev_rcu(&init_net, ip_dev) { 1692 + for_each_netdev(&init_net, ip_dev) { 1693 1693 if ((((rdma_vlan_dev_vlan_id(ip_dev) < I40IW_NO_VLAN) && 1694 1694 (rdma_vlan_dev_real_dev(ip_dev) == iwdev->netdev)) || 1695 1695 (ip_dev == iwdev->netdev)) && (ip_dev->flags & IFF_UP)) {
+24 -49
drivers/infiniband/hw/i40iw/i40iw_verbs.c
··· 2135 2135 } 2136 2136 2137 2137 /** 2138 - * i40iw_show_rev 2138 + * hw_rev_show 2139 2139 */ 2140 - static ssize_t i40iw_show_rev(struct device *dev, 2141 - struct device_attribute *attr, char *buf) 2140 + static ssize_t hw_rev_show(struct device *dev, 2141 + struct device_attribute *attr, char *buf) 2142 2142 { 2143 2143 struct i40iw_ib_device *iwibdev = container_of(dev, 2144 2144 struct i40iw_ib_device, ··· 2147 2147 2148 2148 return sprintf(buf, "%x\n", hw_rev); 2149 2149 } 2150 + static DEVICE_ATTR_RO(hw_rev); 2150 2151 2151 2152 /** 2152 - * i40iw_show_hca 2153 + * hca_type_show 2153 2154 */ 2154 - static ssize_t i40iw_show_hca(struct device *dev, 2155 - struct device_attribute *attr, char *buf) 2155 + static ssize_t hca_type_show(struct device *dev, 2156 + struct device_attribute *attr, char *buf) 2156 2157 { 2157 2158 return sprintf(buf, "I40IW\n"); 2158 2159 } 2160 + static DEVICE_ATTR_RO(hca_type); 2159 2161 2160 2162 /** 2161 - * i40iw_show_board 2163 + * board_id_show 2162 2164 */ 2163 - static ssize_t i40iw_show_board(struct device *dev, 2164 - struct device_attribute *attr, 2165 - char *buf) 2165 + static ssize_t board_id_show(struct device *dev, 2166 + struct device_attribute *attr, char *buf) 2166 2167 { 2167 2168 return sprintf(buf, "%.*s\n", 32, "I40IW Board ID"); 2168 2169 } 2170 + static DEVICE_ATTR_RO(board_id); 2169 2171 2170 - static DEVICE_ATTR(hw_rev, S_IRUGO, i40iw_show_rev, NULL); 2171 - static DEVICE_ATTR(hca_type, S_IRUGO, i40iw_show_hca, NULL); 2172 - static DEVICE_ATTR(board_id, S_IRUGO, i40iw_show_board, NULL); 2172 + static struct attribute *i40iw_dev_attributes[] = { 2173 + &dev_attr_hw_rev.attr, 2174 + &dev_attr_hca_type.attr, 2175 + &dev_attr_board_id.attr, 2176 + NULL 2177 + }; 2173 2178 2174 - static struct device_attribute *i40iw_dev_attributes[] = { 2175 - &dev_attr_hw_rev, 2176 - &dev_attr_hca_type, 2177 - &dev_attr_board_id 2179 + static const struct attribute_group i40iw_attr_group = { 2180 + .attrs = i40iw_dev_attributes, 2178 2181 }; 2179 2182 2180 2183 /** ··· 2755 2752 i40iw_pr_err("iwdev == NULL\n"); 2756 2753 return NULL; 2757 2754 } 2758 - strlcpy(iwibdev->ibdev.name, "i40iw%d", IB_DEVICE_NAME_MAX); 2759 2755 iwibdev->ibdev.owner = THIS_MODULE; 2760 2756 iwdev->iwibdev = iwibdev; 2761 2757 iwibdev->iwdev = iwdev; ··· 2853 2851 } 2854 2852 2855 2853 /** 2856 - * i40iw_unregister_rdma_device - unregister of iwarp from IB 2857 - * @iwibdev: rdma device ptr 2858 - */ 2859 - static void i40iw_unregister_rdma_device(struct i40iw_ib_device *iwibdev) 2860 - { 2861 - int i; 2862 - 2863 - for (i = 0; i < ARRAY_SIZE(i40iw_dev_attributes); ++i) 2864 - device_remove_file(&iwibdev->ibdev.dev, 2865 - i40iw_dev_attributes[i]); 2866 - ib_unregister_device(&iwibdev->ibdev); 2867 - } 2868 - 2869 - /** 2870 2854 * i40iw_destroy_rdma_device - destroy rdma device and free resources 2871 2855 * @iwibdev: IB device ptr 2872 2856 */ ··· 2861 2873 if (!iwibdev) 2862 2874 return; 2863 2875 2864 - i40iw_unregister_rdma_device(iwibdev); 2876 + ib_unregister_device(&iwibdev->ibdev); 2865 2877 kfree(iwibdev->ibdev.iwcm); 2866 2878 iwibdev->ibdev.iwcm = NULL; 2867 2879 wait_event_timeout(iwibdev->iwdev->close_wq, ··· 2876 2888 */ 2877 2889 int i40iw_register_rdma_device(struct i40iw_device *iwdev) 2878 2890 { 2879 - int i, ret; 2891 + int ret; 2880 2892 struct i40iw_ib_device *iwibdev; 2881 2893 2882 2894 iwdev->iwibdev = i40iw_init_rdma_device(iwdev); 2883 2895 if (!iwdev->iwibdev) 2884 2896 return -ENOMEM; 2885 2897 iwibdev = iwdev->iwibdev; 2886 - 2898 + rdma_set_device_sysfs_group(&iwibdev->ibdev, &i40iw_attr_group); 2887 2899 iwibdev->ibdev.driver_id = RDMA_DRIVER_I40IW; 2888 - ret = ib_register_device(&iwibdev->ibdev, NULL); 2900 + ret = ib_register_device(&iwibdev->ibdev, "i40iw%d", NULL); 2889 2901 if (ret) 2890 2902 goto error; 2891 2903 2892 - for (i = 0; i < ARRAY_SIZE(i40iw_dev_attributes); ++i) { 2893 - ret = 2894 - device_create_file(&iwibdev->ibdev.dev, 2895 - i40iw_dev_attributes[i]); 2896 - if (ret) { 2897 - while (i > 0) { 2898 - i--; 2899 - device_remove_file(&iwibdev->ibdev.dev, i40iw_dev_attributes[i]); 2900 - } 2901 - ib_unregister_device(&iwibdev->ibdev); 2902 - goto error; 2903 - } 2904 - } 2905 2904 return 0; 2906 2905 error: 2907 2906 kfree(iwdev->iwibdev->ibdev.iwcm);
+1
drivers/infiniband/hw/mlx4/Kconfig
··· 1 1 config MLX4_INFINIBAND 2 2 tristate "Mellanox ConnectX HCA support" 3 3 depends on NETDEVICES && ETHERNET && PCI && INET 4 + depends on INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS 4 5 depends on MAY_USE_DEVLINK 5 6 select NET_VENDOR_MELLANOX 6 7 select MLX4_CORE
+11 -9
drivers/infiniband/hw/mlx4/mad.c
··· 807 807 int err; 808 808 struct ib_port_attr pattr; 809 809 810 - if (in_wc && in_wc->qp->qp_num) { 811 - pr_debug("received MAD: slid:%d sqpn:%d " 812 - "dlid_bits:%d dqpn:%d wc_flags:0x%x, cls %x, mtd %x, atr %x\n", 813 - in_wc->slid, in_wc->src_qp, 814 - in_wc->dlid_path_bits, 815 - in_wc->qp->qp_num, 816 - in_wc->wc_flags, 817 - in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.method, 818 - be16_to_cpu(in_mad->mad_hdr.attr_id)); 810 + if (in_wc && in_wc->qp) { 811 + pr_debug("received MAD: port:%d slid:%d sqpn:%d " 812 + "dlid_bits:%d dqpn:%d wc_flags:0x%x tid:%016llx cls:%x mtd:%x atr:%x\n", 813 + port_num, 814 + in_wc->slid, in_wc->src_qp, 815 + in_wc->dlid_path_bits, 816 + in_wc->qp->qp_num, 817 + in_wc->wc_flags, 818 + be64_to_cpu(in_mad->mad_hdr.tid), 819 + in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.method, 820 + be16_to_cpu(in_mad->mad_hdr.attr_id)); 819 821 if (in_wc->wc_flags & IB_WC_GRH) { 820 822 pr_debug("sgid_hi:0x%016llx sgid_lo:0x%016llx\n", 821 823 be64_to_cpu(in_grh->sgid.global.subnet_prefix),
+43 -139
drivers/infiniband/hw/mlx4/main.c
··· 1140 1140 return 0; 1141 1141 } 1142 1142 1143 - static void mlx4_ib_vma_open(struct vm_area_struct *area) 1144 - { 1145 - /* vma_open is called when a new VMA is created on top of our VMA. 1146 - * This is done through either mremap flow or split_vma (usually due 1147 - * to mlock, madvise, munmap, etc.). We do not support a clone of the 1148 - * vma, as this VMA is strongly hardware related. Therefore we set the 1149 - * vm_ops of the newly created/cloned VMA to NULL, to prevent it from 1150 - * calling us again and trying to do incorrect actions. We assume that 1151 - * the original vma size is exactly a single page that there will be no 1152 - * "splitting" operations on. 1153 - */ 1154 - area->vm_ops = NULL; 1155 - } 1156 - 1157 - static void mlx4_ib_vma_close(struct vm_area_struct *area) 1158 - { 1159 - struct mlx4_ib_vma_private_data *mlx4_ib_vma_priv_data; 1160 - 1161 - /* It's guaranteed that all VMAs opened on a FD are closed before the 1162 - * file itself is closed, therefore no sync is needed with the regular 1163 - * closing flow. (e.g. mlx4_ib_dealloc_ucontext) However need a sync 1164 - * with accessing the vma as part of mlx4_ib_disassociate_ucontext. 1165 - * The close operation is usually called under mm->mmap_sem except when 1166 - * process is exiting. The exiting case is handled explicitly as part 1167 - * of mlx4_ib_disassociate_ucontext. 1168 - */ 1169 - mlx4_ib_vma_priv_data = (struct mlx4_ib_vma_private_data *) 1170 - area->vm_private_data; 1171 - 1172 - /* set the vma context pointer to null in the mlx4_ib driver's private 1173 - * data to protect against a race condition in mlx4_ib_dissassociate_ucontext(). 1174 - */ 1175 - mlx4_ib_vma_priv_data->vma = NULL; 1176 - } 1177 - 1178 - static const struct vm_operations_struct mlx4_ib_vm_ops = { 1179 - .open = mlx4_ib_vma_open, 1180 - .close = mlx4_ib_vma_close 1181 - }; 1182 - 1183 1143 static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) 1184 1144 { 1185 - int i; 1186 - struct vm_area_struct *vma; 1187 - struct mlx4_ib_ucontext *context = to_mucontext(ibcontext); 1188 - 1189 - /* need to protect from a race on closing the vma as part of 1190 - * mlx4_ib_vma_close(). 1191 - */ 1192 - for (i = 0; i < HW_BAR_COUNT; i++) { 1193 - vma = context->hw_bar_info[i].vma; 1194 - if (!vma) 1195 - continue; 1196 - 1197 - zap_vma_ptes(context->hw_bar_info[i].vma, 1198 - context->hw_bar_info[i].vma->vm_start, PAGE_SIZE); 1199 - 1200 - context->hw_bar_info[i].vma->vm_flags &= 1201 - ~(VM_SHARED | VM_MAYSHARE); 1202 - /* context going to be destroyed, should not access ops any more */ 1203 - context->hw_bar_info[i].vma->vm_ops = NULL; 1204 - } 1205 - } 1206 - 1207 - static void mlx4_ib_set_vma_data(struct vm_area_struct *vma, 1208 - struct mlx4_ib_vma_private_data *vma_private_data) 1209 - { 1210 - vma_private_data->vma = vma; 1211 - vma->vm_private_data = vma_private_data; 1212 - vma->vm_ops = &mlx4_ib_vm_ops; 1213 1145 } 1214 1146 1215 1147 static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) 1216 1148 { 1217 1149 struct mlx4_ib_dev *dev = to_mdev(context->device); 1218 - struct mlx4_ib_ucontext *mucontext = to_mucontext(context); 1219 1150 1220 - if (vma->vm_end - vma->vm_start != PAGE_SIZE) 1221 - return -EINVAL; 1151 + switch (vma->vm_pgoff) { 1152 + case 0: 1153 + return rdma_user_mmap_io(context, vma, 1154 + to_mucontext(context)->uar.pfn, 1155 + PAGE_SIZE, 1156 + pgprot_noncached(vma->vm_page_prot)); 1222 1157 1223 - if (vma->vm_pgoff == 0) { 1224 - /* We prevent double mmaping on same context */ 1225 - if (mucontext->hw_bar_info[HW_BAR_DB].vma) 1158 + case 1: 1159 + if (dev->dev->caps.bf_reg_size == 0) 1226 1160 return -EINVAL; 1161 + return rdma_user_mmap_io( 1162 + context, vma, 1163 + to_mucontext(context)->uar.pfn + 1164 + dev->dev->caps.num_uars, 1165 + PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot)); 1227 1166 1228 - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 1229 - 1230 - if (io_remap_pfn_range(vma, vma->vm_start, 1231 - to_mucontext(context)->uar.pfn, 1232 - PAGE_SIZE, vma->vm_page_prot)) 1233 - return -EAGAIN; 1234 - 1235 - mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_DB]); 1236 - 1237 - } else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) { 1238 - /* We prevent double mmaping on same context */ 1239 - if (mucontext->hw_bar_info[HW_BAR_BF].vma) 1240 - return -EINVAL; 1241 - 1242 - vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); 1243 - 1244 - if (io_remap_pfn_range(vma, vma->vm_start, 1245 - to_mucontext(context)->uar.pfn + 1246 - dev->dev->caps.num_uars, 1247 - PAGE_SIZE, vma->vm_page_prot)) 1248 - return -EAGAIN; 1249 - 1250 - mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_BF]); 1251 - 1252 - } else if (vma->vm_pgoff == 3) { 1167 + case 3: { 1253 1168 struct mlx4_clock_params params; 1254 1169 int ret; 1255 1170 1256 - /* We prevent double mmaping on same context */ 1257 - if (mucontext->hw_bar_info[HW_BAR_CLOCK].vma) 1258 - return -EINVAL; 1259 - 1260 1171 ret = mlx4_get_internal_clock_params(dev->dev, &params); 1261 - 1262 1172 if (ret) 1263 1173 return ret; 1264 1174 1265 - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 1266 - if (io_remap_pfn_range(vma, vma->vm_start, 1267 - (pci_resource_start(dev->dev->persist->pdev, 1268 - params.bar) + 1269 - params.offset) 1270 - >> PAGE_SHIFT, 1271 - PAGE_SIZE, vma->vm_page_prot)) 1272 - return -EAGAIN; 1273 - 1274 - mlx4_ib_set_vma_data(vma, 1275 - &mucontext->hw_bar_info[HW_BAR_CLOCK]); 1276 - } else { 1277 - return -EINVAL; 1175 + return rdma_user_mmap_io( 1176 + context, vma, 1177 + (pci_resource_start(dev->dev->persist->pdev, 1178 + params.bar) + 1179 + params.offset) >> 1180 + PAGE_SHIFT, 1181 + PAGE_SIZE, pgprot_noncached(vma->vm_page_prot)); 1278 1182 } 1279 1183 1280 - return 0; 1184 + default: 1185 + return -EINVAL; 1186 + } 1281 1187 } 1282 1188 1283 1189 static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev, ··· 2039 2133 return err; 2040 2134 } 2041 2135 2042 - static ssize_t show_hca(struct device *device, struct device_attribute *attr, 2043 - char *buf) 2136 + static ssize_t hca_type_show(struct device *device, 2137 + struct device_attribute *attr, char *buf) 2044 2138 { 2045 2139 struct mlx4_ib_dev *dev = 2046 2140 container_of(device, struct mlx4_ib_dev, ib_dev.dev); 2047 2141 return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device); 2048 2142 } 2143 + static DEVICE_ATTR_RO(hca_type); 2049 2144 2050 - static ssize_t show_rev(struct device *device, struct device_attribute *attr, 2051 - char *buf) 2145 + static ssize_t hw_rev_show(struct device *device, 2146 + struct device_attribute *attr, char *buf) 2052 2147 { 2053 2148 struct mlx4_ib_dev *dev = 2054 2149 container_of(device, struct mlx4_ib_dev, ib_dev.dev); 2055 2150 return sprintf(buf, "%x\n", dev->dev->rev_id); 2056 2151 } 2152 + static DEVICE_ATTR_RO(hw_rev); 2057 2153 2058 - static ssize_t show_board(struct device *device, struct device_attribute *attr, 2059 - char *buf) 2154 + static ssize_t board_id_show(struct device *device, 2155 + struct device_attribute *attr, char *buf) 2060 2156 { 2061 2157 struct mlx4_ib_dev *dev = 2062 2158 container_of(device, struct mlx4_ib_dev, ib_dev.dev); 2063 2159 return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN, 2064 2160 dev->dev->board_id); 2065 2161 } 2162 + static DEVICE_ATTR_RO(board_id); 2066 2163 2067 - static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 2068 - static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); 2069 - static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); 2164 + static struct attribute *mlx4_class_attributes[] = { 2165 + &dev_attr_hw_rev.attr, 2166 + &dev_attr_hca_type.attr, 2167 + &dev_attr_board_id.attr, 2168 + NULL 2169 + }; 2070 2170 2071 - static struct device_attribute *mlx4_class_attributes[] = { 2072 - &dev_attr_hw_rev, 2073 - &dev_attr_hca_type, 2074 - &dev_attr_board_id 2171 + static const struct attribute_group mlx4_attr_group = { 2172 + .attrs = mlx4_class_attributes, 2075 2173 }; 2076 2174 2077 2175 struct diag_counter { ··· 2546 2636 ibdev->dev = dev; 2547 2637 ibdev->bond_next_port = 0; 2548 2638 2549 - strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX); 2550 2639 ibdev->ib_dev.owner = THIS_MODULE; 2551 2640 ibdev->ib_dev.node_type = RDMA_NODE_IB_CA; 2552 2641 ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey; ··· 2807 2898 if (mlx4_ib_alloc_diag_counters(ibdev)) 2808 2899 goto err_steer_free_bitmap; 2809 2900 2901 + rdma_set_device_sysfs_group(&ibdev->ib_dev, &mlx4_attr_group); 2810 2902 ibdev->ib_dev.driver_id = RDMA_DRIVER_MLX4; 2811 - if (ib_register_device(&ibdev->ib_dev, NULL)) 2903 + if (ib_register_device(&ibdev->ib_dev, "mlx4_%d", NULL)) 2812 2904 goto err_diag_counters; 2813 2905 2814 2906 if (mlx4_ib_mad_init(ibdev)) ··· 2829 2919 if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) { 2830 2920 err = mlx4_config_roce_v2_port(dev, ROCE_V2_UDP_DPORT); 2831 2921 if (err) 2832 - goto err_notif; 2833 - } 2834 - 2835 - for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) { 2836 - if (device_create_file(&ibdev->ib_dev.dev, 2837 - mlx4_class_attributes[j])) 2838 2922 goto err_notif; 2839 2923 } 2840 2924
+1 -1
drivers/infiniband/hw/mlx4/mcg.c
··· 673 673 if (!list_empty(&group->pending_list)) 674 674 req = list_first_entry(&group->pending_list, 675 675 struct mcast_req, group_list); 676 - if ((method == IB_MGMT_METHOD_GET_RESP)) { 676 + if (method == IB_MGMT_METHOD_GET_RESP) { 677 677 if (req) { 678 678 send_reply_to_slave(req->func, group, &req->sa_mad, status); 679 679 --group->func[req->func].num_pend_reqs;
-5
drivers/infiniband/hw/mlx4/mlx4_ib.h
··· 80 80 HW_BAR_COUNT 81 81 }; 82 82 83 - struct mlx4_ib_vma_private_data { 84 - struct vm_area_struct *vma; 85 - }; 86 - 87 83 struct mlx4_ib_ucontext { 88 84 struct ib_ucontext ibucontext; 89 85 struct mlx4_uar uar; 90 86 struct list_head db_page_list; 91 87 struct mutex db_page_mutex; 92 - struct mlx4_ib_vma_private_data hw_bar_info[HW_BAR_COUNT]; 93 88 struct list_head wqn_ranges_list; 94 89 struct mutex wqn_ranges_mutex; /* protect wqn_ranges_list */ 95 90 };
+1 -7
drivers/infiniband/hw/mlx4/qp.c
··· 2629 2629 static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 2630 2630 int attr_mask, struct ib_udata *udata) 2631 2631 { 2632 - enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED; 2633 2632 struct mlx4_ib_dev *dev = to_mdev(ibqp->device); 2634 2633 struct mlx4_ib_qp *qp = to_mqp(ibqp); 2635 2634 enum ib_qp_state cur_state, new_state; ··· 2638 2639 cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; 2639 2640 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; 2640 2641 2641 - if (cur_state != new_state || cur_state != IB_QPS_RESET) { 2642 - int port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; 2643 - ll = rdma_port_get_link_layer(&dev->ib_dev, port); 2644 - } 2645 - 2646 2642 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, 2647 - attr_mask, ll)) { 2643 + attr_mask)) { 2648 2644 pr_debug("qpn 0x%x: invalid attribute mask specified " 2649 2645 "for transition %d to %d. qp_type %d," 2650 2646 " attr_mask 0x%x\n",
+1 -5
drivers/infiniband/hw/mlx4/sysfs.c
··· 818 818 if (!mlx4_is_master(dev->dev)) 819 819 return 0; 820 820 821 - dev->iov_parent = 822 - kobject_create_and_add("iov", 823 - kobject_get(dev->ib_dev.ports_parent->parent)); 821 + dev->iov_parent = kobject_create_and_add("iov", &dev->ib_dev.dev.kobj); 824 822 if (!dev->iov_parent) { 825 823 ret = -ENOMEM; 826 824 goto err; ··· 848 850 err_ports: 849 851 kobject_put(dev->iov_parent); 850 852 err: 851 - kobject_put(dev->ib_dev.ports_parent->parent); 852 853 pr_err("mlx4_ib_device_register_sysfs error (%d)\n", ret); 853 854 return ret; 854 855 } ··· 883 886 kobject_put(device->ports_parent); 884 887 kobject_put(device->iov_parent); 885 888 kobject_put(device->iov_parent); 886 - kobject_put(device->ib_dev.ports_parent->parent); 887 889 }
+129
drivers/infiniband/hw/mlx5/cmd.c
··· 197 197 return mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPCNT, 198 198 0, 0); 199 199 } 200 + 201 + void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid) 202 + { 203 + u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {}; 204 + u32 out[MLX5_ST_SZ_DW(destroy_tir_out)] = {}; 205 + 206 + MLX5_SET(destroy_tir_in, in, opcode, MLX5_CMD_OP_DESTROY_TIR); 207 + MLX5_SET(destroy_tir_in, in, tirn, tirn); 208 + MLX5_SET(destroy_tir_in, in, uid, uid); 209 + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); 210 + } 211 + 212 + void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid) 213 + { 214 + u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {0}; 215 + u32 out[MLX5_ST_SZ_DW(destroy_tis_out)] = {0}; 216 + 217 + MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS); 218 + MLX5_SET(destroy_tis_in, in, tisn, tisn); 219 + MLX5_SET(destroy_tis_in, in, uid, uid); 220 + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); 221 + } 222 + 223 + void mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid) 224 + { 225 + u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {}; 226 + u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {}; 227 + 228 + MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT); 229 + MLX5_SET(destroy_rqt_in, in, rqtn, rqtn); 230 + MLX5_SET(destroy_rqt_in, in, uid, uid); 231 + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); 232 + } 233 + 234 + int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn, 235 + u16 uid) 236 + { 237 + u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)] = {0}; 238 + u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)] = {0}; 239 + int err; 240 + 241 + MLX5_SET(alloc_transport_domain_in, in, opcode, 242 + MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN); 243 + 244 + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); 245 + if (!err) 246 + *tdn = MLX5_GET(alloc_transport_domain_out, out, 247 + transport_domain); 248 + 249 + return err; 250 + } 251 + 252 + void mlx5_cmd_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn, 253 + u16 uid) 254 + { 255 + u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)] = {0}; 256 + u32 out[MLX5_ST_SZ_DW(dealloc_transport_domain_out)] = {0}; 257 + 258 + MLX5_SET(dealloc_transport_domain_in, in, opcode, 259 + MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN); 260 + MLX5_SET(dealloc_transport_domain_in, in, transport_domain, tdn); 261 + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); 262 + } 263 + 264 + void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid) 265 + { 266 + u32 out[MLX5_ST_SZ_DW(dealloc_pd_out)] = {}; 267 + u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {}; 268 + 269 + MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD); 270 + MLX5_SET(dealloc_pd_in, in, pd, pdn); 271 + MLX5_SET(dealloc_pd_in, in, uid, uid); 272 + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); 273 + } 274 + 275 + int mlx5_cmd_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, 276 + u32 qpn, u16 uid) 277 + { 278 + u32 out[MLX5_ST_SZ_DW(attach_to_mcg_out)] = {}; 279 + u32 in[MLX5_ST_SZ_DW(attach_to_mcg_in)] = {}; 280 + void *gid; 281 + 282 + MLX5_SET(attach_to_mcg_in, in, opcode, MLX5_CMD_OP_ATTACH_TO_MCG); 283 + MLX5_SET(attach_to_mcg_in, in, qpn, qpn); 284 + MLX5_SET(attach_to_mcg_in, in, uid, uid); 285 + gid = MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid); 286 + memcpy(gid, mgid, sizeof(*mgid)); 287 + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); 288 + } 289 + 290 + int mlx5_cmd_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, 291 + u32 qpn, u16 uid) 292 + { 293 + u32 out[MLX5_ST_SZ_DW(detach_from_mcg_out)] = {}; 294 + u32 in[MLX5_ST_SZ_DW(detach_from_mcg_in)] = {}; 295 + void *gid; 296 + 297 + MLX5_SET(detach_from_mcg_in, in, opcode, MLX5_CMD_OP_DETACH_FROM_MCG); 298 + MLX5_SET(detach_from_mcg_in, in, qpn, qpn); 299 + MLX5_SET(detach_from_mcg_in, in, uid, uid); 300 + gid = MLX5_ADDR_OF(detach_from_mcg_in, in, multicast_gid); 301 + memcpy(gid, mgid, sizeof(*mgid)); 302 + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); 303 + } 304 + 305 + int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid) 306 + { 307 + u32 out[MLX5_ST_SZ_DW(alloc_xrcd_out)] = {}; 308 + u32 in[MLX5_ST_SZ_DW(alloc_xrcd_in)] = {}; 309 + int err; 310 + 311 + MLX5_SET(alloc_xrcd_in, in, opcode, MLX5_CMD_OP_ALLOC_XRCD); 312 + MLX5_SET(alloc_xrcd_in, in, uid, uid); 313 + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); 314 + if (!err) 315 + *xrcdn = MLX5_GET(alloc_xrcd_out, out, xrcd); 316 + return err; 317 + } 318 + 319 + int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid) 320 + { 321 + u32 out[MLX5_ST_SZ_DW(dealloc_xrcd_out)] = {}; 322 + u32 in[MLX5_ST_SZ_DW(dealloc_xrcd_in)] = {}; 323 + 324 + MLX5_SET(dealloc_xrcd_in, in, opcode, MLX5_CMD_OP_DEALLOC_XRCD); 325 + MLX5_SET(dealloc_xrcd_in, in, xrcd, xrcdn); 326 + MLX5_SET(dealloc_xrcd_in, in, uid, uid); 327 + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); 328 + }
+14
drivers/infiniband/hw/mlx5/cmd.h
··· 47 47 int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr, 48 48 u64 length, u32 alignment); 49 49 int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length); 50 + void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid); 51 + void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid); 52 + void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid); 53 + void mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid); 54 + int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn, 55 + u16 uid); 56 + void mlx5_cmd_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn, 57 + u16 uid); 58 + int mlx5_cmd_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, 59 + u32 qpn, u16 uid); 60 + int mlx5_cmd_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, 61 + u32 qpn, u16 uid); 62 + int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid); 63 + int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid); 50 64 #endif /* MLX5_IB_CMD_H */
+2 -1
drivers/infiniband/hw/mlx5/cq.c
··· 874 874 cq->private_flags |= MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD; 875 875 } 876 876 877 + MLX5_SET(create_cq_in, *cqb, uid, to_mucontext(context)->devx_uid); 877 878 return 0; 878 879 879 880 err_cqb: ··· 1455 1454 return err; 1456 1455 } 1457 1456 1458 - int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq) 1457 + int mlx5_ib_get_cqe_size(struct ib_cq *ibcq) 1459 1458 { 1460 1459 struct mlx5_ib_cq *cq; 1461 1460
+285 -73
drivers/infiniband/hw/mlx5/devx.c
··· 19 19 #define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in) 20 20 struct devx_obj { 21 21 struct mlx5_core_dev *mdev; 22 - u32 obj_id; 22 + u64 obj_id; 23 23 u32 dinlen; /* destroy inbox length */ 24 24 u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW]; 25 25 }; ··· 45 45 return to_mucontext(ib_uverbs_get_ucontext(file)); 46 46 } 47 47 48 - int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context) 48 + int mlx5_ib_devx_create(struct mlx5_ib_dev *dev) 49 49 { 50 50 u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0}; 51 51 u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; 52 52 u64 general_obj_types; 53 53 void *hdr; 54 54 int err; 55 + u16 uid; 55 56 56 57 hdr = MLX5_ADDR_OF(create_uctx_in, in, hdr); 57 58 ··· 61 60 !(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UMEM)) 62 61 return -EINVAL; 63 62 64 - if (!capable(CAP_NET_RAW)) 65 - return -EPERM; 66 - 67 63 MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); 68 64 MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, MLX5_OBJ_TYPE_UCTX); 69 65 ··· 68 70 if (err) 69 71 return err; 70 72 71 - context->devx_uid = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); 72 - return 0; 73 + uid = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); 74 + return uid; 73 75 } 74 76 75 - void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, 76 - struct mlx5_ib_ucontext *context) 77 + void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) 77 78 { 78 79 u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {0}; 79 80 u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; 80 81 81 82 MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); 82 83 MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_UCTX); 83 - MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, context->devx_uid); 84 + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, uid); 84 85 85 86 mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); 86 87 } ··· 106 109 } 107 110 } 108 111 112 + /* 113 + * As the obj_id in the firmware is not globally unique the object type 114 + * must be considered upon checking for a valid object id. 115 + * For that the opcode of the creator command is encoded as part of the obj_id. 116 + */ 117 + static u64 get_enc_obj_id(u16 opcode, u32 obj_id) 118 + { 119 + return ((u64)opcode << 32) | obj_id; 120 + } 121 + 109 122 static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in) 110 123 { 111 124 u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); 112 - u32 obj_id; 125 + u64 obj_id; 113 126 114 127 switch (opcode) { 115 128 case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT: 116 129 case MLX5_CMD_OP_QUERY_GENERAL_OBJECT: 117 - obj_id = MLX5_GET(general_obj_in_cmd_hdr, in, obj_id); 130 + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_GENERAL_OBJECT, 131 + MLX5_GET(general_obj_in_cmd_hdr, in, 132 + obj_id)); 118 133 break; 119 134 case MLX5_CMD_OP_QUERY_MKEY: 120 - obj_id = MLX5_GET(query_mkey_in, in, mkey_index); 135 + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_MKEY, 136 + MLX5_GET(query_mkey_in, in, 137 + mkey_index)); 121 138 break; 122 139 case MLX5_CMD_OP_QUERY_CQ: 123 - obj_id = MLX5_GET(query_cq_in, in, cqn); 140 + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ, 141 + MLX5_GET(query_cq_in, in, cqn)); 124 142 break; 125 143 case MLX5_CMD_OP_MODIFY_CQ: 126 - obj_id = MLX5_GET(modify_cq_in, in, cqn); 144 + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ, 145 + MLX5_GET(modify_cq_in, in, cqn)); 127 146 break; 128 147 case MLX5_CMD_OP_QUERY_SQ: 129 - obj_id = MLX5_GET(query_sq_in, in, sqn); 148 + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ, 149 + MLX5_GET(query_sq_in, in, sqn)); 130 150 break; 131 151 case MLX5_CMD_OP_MODIFY_SQ: 132 - obj_id = MLX5_GET(modify_sq_in, in, sqn); 152 + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ, 153 + MLX5_GET(modify_sq_in, in, sqn)); 133 154 break; 134 155 case MLX5_CMD_OP_QUERY_RQ: 135 - obj_id = MLX5_GET(query_rq_in, in, rqn); 156 + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ, 157 + MLX5_GET(query_rq_in, in, rqn)); 136 158 break; 137 159 case MLX5_CMD_OP_MODIFY_RQ: 138 - obj_id = MLX5_GET(modify_rq_in, in, rqn); 160 + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ, 161 + MLX5_GET(modify_rq_in, in, rqn)); 139 162 break; 140 163 case MLX5_CMD_OP_QUERY_RMP: 141 - obj_id = MLX5_GET(query_rmp_in, in, rmpn); 164 + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RMP, 165 + MLX5_GET(query_rmp_in, in, rmpn)); 142 166 break; 143 167 case MLX5_CMD_OP_MODIFY_RMP: 144 - obj_id = MLX5_GET(modify_rmp_in, in, rmpn); 168 + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RMP, 169 + MLX5_GET(modify_rmp_in, in, rmpn)); 145 170 break; 146 171 case MLX5_CMD_OP_QUERY_RQT: 147 - obj_id = MLX5_GET(query_rqt_in, in, rqtn); 172 + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT, 173 + MLX5_GET(query_rqt_in, in, rqtn)); 148 174 break; 149 175 case MLX5_CMD_OP_MODIFY_RQT: 150 - obj_id = MLX5_GET(modify_rqt_in, in, rqtn); 176 + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT, 177 + MLX5_GET(modify_rqt_in, in, rqtn)); 151 178 break; 152 179 case MLX5_CMD_OP_QUERY_TIR: 153 - obj_id = MLX5_GET(query_tir_in, in, tirn); 180 + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR, 181 + MLX5_GET(query_tir_in, in, tirn)); 154 182 break; 155 183 case MLX5_CMD_OP_MODIFY_TIR: 156 - obj_id = MLX5_GET(modify_tir_in, in, tirn); 184 + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR, 185 + MLX5_GET(modify_tir_in, in, tirn)); 157 186 break; 158 187 case MLX5_CMD_OP_QUERY_TIS: 159 - obj_id = MLX5_GET(query_tis_in, in, tisn); 188 + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS, 189 + MLX5_GET(query_tis_in, in, tisn)); 160 190 break; 161 191 case MLX5_CMD_OP_MODIFY_TIS: 162 - obj_id = MLX5_GET(modify_tis_in, in, tisn); 192 + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS, 193 + MLX5_GET(modify_tis_in, in, tisn)); 163 194 break; 164 195 case MLX5_CMD_OP_QUERY_FLOW_TABLE: 165 - obj_id = MLX5_GET(query_flow_table_in, in, table_id); 196 + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_TABLE, 197 + MLX5_GET(query_flow_table_in, in, 198 + table_id)); 166 199 break; 167 200 case MLX5_CMD_OP_MODIFY_FLOW_TABLE: 168 - obj_id = MLX5_GET(modify_flow_table_in, in, table_id); 201 + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_TABLE, 202 + MLX5_GET(modify_flow_table_in, in, 203 + table_id)); 169 204 break; 170 205 case MLX5_CMD_OP_QUERY_FLOW_GROUP: 171 - obj_id = MLX5_GET(query_flow_group_in, in, group_id); 206 + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_GROUP, 207 + MLX5_GET(query_flow_group_in, in, 208 + group_id)); 172 209 break; 173 210 case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY: 174 - obj_id = MLX5_GET(query_fte_in, in, flow_index); 211 + obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY, 212 + MLX5_GET(query_fte_in, in, 213 + flow_index)); 175 214 break; 176 215 case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: 177 - obj_id = MLX5_GET(set_fte_in, in, flow_index); 216 + obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY, 217 + MLX5_GET(set_fte_in, in, flow_index)); 178 218 break; 179 219 case MLX5_CMD_OP_QUERY_Q_COUNTER: 180 - obj_id = MLX5_GET(query_q_counter_in, in, counter_set_id); 220 + obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_Q_COUNTER, 221 + MLX5_GET(query_q_counter_in, in, 222 + counter_set_id)); 181 223 break; 182 224 case MLX5_CMD_OP_QUERY_FLOW_COUNTER: 183 - obj_id = MLX5_GET(query_flow_counter_in, in, flow_counter_id); 225 + obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_FLOW_COUNTER, 226 + MLX5_GET(query_flow_counter_in, in, 227 + flow_counter_id)); 184 228 break; 185 229 case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT: 186 - obj_id = MLX5_GET(general_obj_in_cmd_hdr, in, obj_id); 230 + obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT, 231 + MLX5_GET(general_obj_in_cmd_hdr, in, 232 + obj_id)); 187 233 break; 188 234 case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT: 189 - obj_id = MLX5_GET(query_scheduling_element_in, in, 190 - scheduling_element_id); 235 + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT, 236 + MLX5_GET(query_scheduling_element_in, 237 + in, scheduling_element_id)); 191 238 break; 192 239 case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT: 193 - obj_id = MLX5_GET(modify_scheduling_element_in, in, 194 - scheduling_element_id); 240 + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT, 241 + MLX5_GET(modify_scheduling_element_in, 242 + in, scheduling_element_id)); 195 243 break; 196 244 case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: 197 - obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port); 245 + obj_id = get_enc_obj_id(MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT, 246 + MLX5_GET(add_vxlan_udp_dport_in, in, 247 + vxlan_udp_port)); 198 248 break; 199 249 case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY: 200 - obj_id = MLX5_GET(query_l2_table_entry_in, in, table_index); 250 + obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_L2_TABLE_ENTRY, 251 + MLX5_GET(query_l2_table_entry_in, in, 252 + table_index)); 201 253 break; 202 254 case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: 203 - obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index); 255 + obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_L2_TABLE_ENTRY, 256 + MLX5_GET(set_l2_table_entry_in, in, 257 + table_index)); 204 258 break; 205 259 case MLX5_CMD_OP_QUERY_QP: 206 - obj_id = MLX5_GET(query_qp_in, in, qpn); 260 + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, 261 + MLX5_GET(query_qp_in, in, qpn)); 207 262 break; 208 263 case MLX5_CMD_OP_RST2INIT_QP: 209 - obj_id = MLX5_GET(rst2init_qp_in, in, qpn); 264 + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, 265 + MLX5_GET(rst2init_qp_in, in, qpn)); 210 266 break; 211 267 case MLX5_CMD_OP_INIT2RTR_QP: 212 - obj_id = MLX5_GET(init2rtr_qp_in, in, qpn); 268 + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, 269 + MLX5_GET(init2rtr_qp_in, in, qpn)); 213 270 break; 214 271 case MLX5_CMD_OP_RTR2RTS_QP: 215 - obj_id = MLX5_GET(rtr2rts_qp_in, in, qpn); 272 + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, 273 + MLX5_GET(rtr2rts_qp_in, in, qpn)); 216 274 break; 217 275 case MLX5_CMD_OP_RTS2RTS_QP: 218 - obj_id = MLX5_GET(rts2rts_qp_in, in, qpn); 276 + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, 277 + MLX5_GET(rts2rts_qp_in, in, qpn)); 219 278 break; 220 279 case MLX5_CMD_OP_SQERR2RTS_QP: 221 - obj_id = MLX5_GET(sqerr2rts_qp_in, in, qpn); 280 + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, 281 + MLX5_GET(sqerr2rts_qp_in, in, qpn)); 222 282 break; 223 283 case MLX5_CMD_OP_2ERR_QP: 224 - obj_id = MLX5_GET(qp_2err_in, in, qpn); 284 + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, 285 + MLX5_GET(qp_2err_in, in, qpn)); 225 286 break; 226 287 case MLX5_CMD_OP_2RST_QP: 227 - obj_id = MLX5_GET(qp_2rst_in, in, qpn); 288 + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, 289 + MLX5_GET(qp_2rst_in, in, qpn)); 228 290 break; 229 291 case MLX5_CMD_OP_QUERY_DCT: 230 - obj_id = MLX5_GET(query_dct_in, in, dctn); 292 + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT, 293 + MLX5_GET(query_dct_in, in, dctn)); 231 294 break; 232 295 case MLX5_CMD_OP_QUERY_XRQ: 233 - obj_id = MLX5_GET(query_xrq_in, in, xrqn); 296 + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ, 297 + MLX5_GET(query_xrq_in, in, xrqn)); 234 298 break; 235 299 case MLX5_CMD_OP_QUERY_XRC_SRQ: 236 - obj_id = MLX5_GET(query_xrc_srq_in, in, xrc_srqn); 300 + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRC_SRQ, 301 + MLX5_GET(query_xrc_srq_in, in, 302 + xrc_srqn)); 237 303 break; 238 304 case MLX5_CMD_OP_ARM_XRC_SRQ: 239 - obj_id = MLX5_GET(arm_xrc_srq_in, in, xrc_srqn); 305 + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRC_SRQ, 306 + MLX5_GET(arm_xrc_srq_in, in, xrc_srqn)); 240 307 break; 241 308 case MLX5_CMD_OP_QUERY_SRQ: 242 - obj_id = MLX5_GET(query_srq_in, in, srqn); 309 + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SRQ, 310 + MLX5_GET(query_srq_in, in, srqn)); 243 311 break; 244 312 case MLX5_CMD_OP_ARM_RQ: 245 - obj_id = MLX5_GET(arm_rq_in, in, srq_number); 313 + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ, 314 + MLX5_GET(arm_rq_in, in, srq_number)); 246 315 break; 247 316 case MLX5_CMD_OP_DRAIN_DCT: 248 317 case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION: 249 - obj_id = MLX5_GET(drain_dct_in, in, dctn); 318 + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT, 319 + MLX5_GET(drain_dct_in, in, dctn)); 250 320 break; 251 321 case MLX5_CMD_OP_ARM_XRQ: 252 - obj_id = MLX5_GET(arm_xrq_in, in, xrqn); 322 + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ, 323 + MLX5_GET(arm_xrq_in, in, xrqn)); 253 324 break; 254 325 default: 255 326 return false; ··· 329 264 return false; 330 265 } 331 266 332 - static bool devx_is_obj_create_cmd(const void *in) 267 + static void devx_set_umem_valid(const void *in) 333 268 { 334 269 u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); 335 270 336 271 switch (opcode) { 272 + case MLX5_CMD_OP_CREATE_MKEY: 273 + MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1); 274 + break; 275 + case MLX5_CMD_OP_CREATE_CQ: 276 + { 277 + void *cqc; 278 + 279 + MLX5_SET(create_cq_in, in, cq_umem_valid, 1); 280 + cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); 281 + MLX5_SET(cqc, cqc, dbr_umem_valid, 1); 282 + break; 283 + } 284 + case MLX5_CMD_OP_CREATE_QP: 285 + { 286 + void *qpc; 287 + 288 + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 289 + MLX5_SET(qpc, qpc, dbr_umem_valid, 1); 290 + MLX5_SET(create_qp_in, in, wq_umem_valid, 1); 291 + break; 292 + } 293 + 294 + case MLX5_CMD_OP_CREATE_RQ: 295 + { 296 + void *rqc, *wq; 297 + 298 + rqc = MLX5_ADDR_OF(create_rq_in, in, ctx); 299 + wq = MLX5_ADDR_OF(rqc, rqc, wq); 300 + MLX5_SET(wq, wq, dbr_umem_valid, 1); 301 + MLX5_SET(wq, wq, wq_umem_valid, 1); 302 + break; 303 + } 304 + 305 + case MLX5_CMD_OP_CREATE_SQ: 306 + { 307 + void *sqc, *wq; 308 + 309 + sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); 310 + wq = MLX5_ADDR_OF(sqc, sqc, wq); 311 + MLX5_SET(wq, wq, dbr_umem_valid, 1); 312 + MLX5_SET(wq, wq, wq_umem_valid, 1); 313 + break; 314 + } 315 + 316 + case MLX5_CMD_OP_MODIFY_CQ: 317 + MLX5_SET(modify_cq_in, in, cq_umem_valid, 1); 318 + break; 319 + 320 + case MLX5_CMD_OP_CREATE_RMP: 321 + { 322 + void *rmpc, *wq; 323 + 324 + rmpc = MLX5_ADDR_OF(create_rmp_in, in, ctx); 325 + wq = MLX5_ADDR_OF(rmpc, rmpc, wq); 326 + MLX5_SET(wq, wq, dbr_umem_valid, 1); 327 + MLX5_SET(wq, wq, wq_umem_valid, 1); 328 + break; 329 + } 330 + 331 + case MLX5_CMD_OP_CREATE_XRQ: 332 + { 333 + void *xrqc, *wq; 334 + 335 + xrqc = MLX5_ADDR_OF(create_xrq_in, in, xrq_context); 336 + wq = MLX5_ADDR_OF(xrqc, xrqc, wq); 337 + MLX5_SET(wq, wq, dbr_umem_valid, 1); 338 + MLX5_SET(wq, wq, wq_umem_valid, 1); 339 + break; 340 + } 341 + 342 + case MLX5_CMD_OP_CREATE_XRC_SRQ: 343 + { 344 + void *xrc_srqc; 345 + 346 + MLX5_SET(create_xrc_srq_in, in, xrc_srq_umem_valid, 1); 347 + xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, in, 348 + xrc_srq_context_entry); 349 + MLX5_SET(xrc_srqc, xrc_srqc, dbr_umem_valid, 1); 350 + break; 351 + } 352 + 353 + default: 354 + return; 355 + } 356 + } 357 + 358 + static bool devx_is_obj_create_cmd(const void *in, u16 *opcode) 359 + { 360 + *opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); 361 + 362 + switch (*opcode) { 337 363 case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: 338 364 case MLX5_CMD_OP_CREATE_MKEY: 339 365 case MLX5_CMD_OP_CREATE_CQ: ··· 541 385 } 542 386 } 543 387 388 + static bool devx_is_whitelist_cmd(void *in) 389 + { 390 + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); 391 + 392 + switch (opcode) { 393 + case MLX5_CMD_OP_QUERY_HCA_CAP: 394 + case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT: 395 + return true; 396 + default: 397 + return false; 398 + } 399 + } 400 + 401 + static int devx_get_uid(struct mlx5_ib_ucontext *c, void *cmd_in) 402 + { 403 + if (devx_is_whitelist_cmd(cmd_in)) { 404 + struct mlx5_ib_dev *dev; 405 + 406 + if (c->devx_uid) 407 + return c->devx_uid; 408 + 409 + dev = to_mdev(c->ibucontext.device); 410 + if (dev->devx_whitelist_uid) 411 + return dev->devx_whitelist_uid; 412 + 413 + return -EOPNOTSUPP; 414 + } 415 + 416 + if (!c->devx_uid) 417 + return -EINVAL; 418 + 419 + if (!capable(CAP_NET_RAW)) 420 + return -EPERM; 421 + 422 + return c->devx_uid; 423 + } 544 424 static bool devx_is_general_cmd(void *in) 545 425 { 546 426 u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); 547 427 548 428 switch (opcode) { 549 429 case MLX5_CMD_OP_QUERY_HCA_CAP: 430 + case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT: 550 431 case MLX5_CMD_OP_QUERY_VPORT_STATE: 551 432 case MLX5_CMD_OP_QUERY_ADAPTER: 552 433 case MLX5_CMD_OP_QUERY_ISSI: ··· 691 498 MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT); 692 499 void *cmd_out; 693 500 int err; 501 + int uid; 694 502 695 503 c = devx_ufile2uctx(file); 696 504 if (IS_ERR(c)) 697 505 return PTR_ERR(c); 698 506 dev = to_mdev(c->ibucontext.device); 699 507 700 - if (!c->devx_uid) 701 - return -EPERM; 508 + uid = devx_get_uid(c, cmd_in); 509 + if (uid < 0) 510 + return uid; 702 511 703 512 /* Only white list of some general HCA commands are allowed for this method. */ 704 513 if (!devx_is_general_cmd(cmd_in)) ··· 710 515 if (IS_ERR(cmd_out)) 711 516 return PTR_ERR(cmd_out); 712 517 713 - MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid); 518 + MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid); 714 519 err = mlx5_cmd_exec(dev->mdev, cmd_in, 715 520 uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN), 716 521 cmd_out, cmd_out_len); ··· 921 726 u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; 922 727 struct devx_obj *obj; 923 728 int err; 729 + int uid; 730 + u32 obj_id; 731 + u16 opcode; 924 732 925 - if (!c->devx_uid) 926 - return -EPERM; 733 + uid = devx_get_uid(c, cmd_in); 734 + if (uid < 0) 735 + return uid; 927 736 928 - if (!devx_is_obj_create_cmd(cmd_in)) 737 + if (!devx_is_obj_create_cmd(cmd_in, &opcode)) 929 738 return -EINVAL; 930 739 931 740 cmd_out = uverbs_zalloc(attrs, cmd_out_len); ··· 940 741 if (!obj) 941 742 return -ENOMEM; 942 743 943 - MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid); 744 + MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid); 745 + devx_set_umem_valid(cmd_in); 746 + 944 747 err = mlx5_cmd_exec(dev->mdev, cmd_in, 945 748 uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN), 946 749 cmd_out, cmd_out_len); ··· 951 750 952 751 uobj->object = obj; 953 752 obj->mdev = dev->mdev; 954 - devx_obj_build_destroy_cmd(cmd_in, cmd_out, obj->dinbox, &obj->dinlen, &obj->obj_id); 753 + devx_obj_build_destroy_cmd(cmd_in, cmd_out, obj->dinbox, &obj->dinlen, 754 + &obj_id); 955 755 WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32)); 956 756 957 757 err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len); 958 758 if (err) 959 759 goto obj_destroy; 960 760 761 + obj->obj_id = get_enc_obj_id(opcode, obj_id); 961 762 return 0; 962 763 963 764 obj_destroy: ··· 981 778 struct devx_obj *obj = uobj->object; 982 779 void *cmd_out; 983 780 int err; 781 + int uid; 984 782 985 - if (!c->devx_uid) 986 - return -EPERM; 783 + uid = devx_get_uid(c, cmd_in); 784 + if (uid < 0) 785 + return uid; 987 786 988 787 if (!devx_is_obj_modify_cmd(cmd_in)) 989 788 return -EINVAL; ··· 997 792 if (IS_ERR(cmd_out)) 998 793 return PTR_ERR(cmd_out); 999 794 1000 - MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid); 795 + MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid); 796 + devx_set_umem_valid(cmd_in); 797 + 1001 798 err = mlx5_cmd_exec(obj->mdev, cmd_in, 1002 799 uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN), 1003 800 cmd_out, cmd_out_len); ··· 1022 815 struct devx_obj *obj = uobj->object; 1023 816 void *cmd_out; 1024 817 int err; 818 + int uid; 1025 819 1026 - if (!c->devx_uid) 1027 - return -EPERM; 820 + uid = devx_get_uid(c, cmd_in); 821 + if (uid < 0) 822 + return uid; 1028 823 1029 824 if (!devx_is_obj_query_cmd(cmd_in)) 1030 825 return -EINVAL; ··· 1038 829 if (IS_ERR(cmd_out)) 1039 830 return PTR_ERR(cmd_out); 1040 831 1041 - MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid); 832 + MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid); 1042 833 err = mlx5_cmd_exec(obj->mdev, cmd_in, 1043 834 uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN), 1044 835 cmd_out, cmd_out_len); ··· 1137 928 int err; 1138 929 1139 930 if (!c->devx_uid) 931 + return -EINVAL; 932 + 933 + if (!capable(CAP_NET_RAW)) 1140 934 return -EPERM; 1141 935 1142 936 obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL);
+384 -11
drivers/infiniband/hw/mlx5/flow.c
··· 7 7 #include <rdma/ib_verbs.h> 8 8 #include <rdma/uverbs_types.h> 9 9 #include <rdma/uverbs_ioctl.h> 10 + #include <rdma/uverbs_std_types.h> 10 11 #include <rdma/mlx5_user_ioctl_cmds.h> 12 + #include <rdma/mlx5_user_ioctl_verbs.h> 11 13 #include <rdma/ib_umem.h> 12 14 #include <linux/mlx5/driver.h> 13 15 #include <linux/mlx5/fs.h> ··· 17 15 18 16 #define UVERBS_MODULE_NAME mlx5_ib 19 17 #include <rdma/uverbs_named_ioctl.h> 18 + 19 + static int 20 + mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type, 21 + enum mlx5_flow_namespace_type *namespace) 22 + { 23 + switch (table_type) { 24 + case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX: 25 + *namespace = MLX5_FLOW_NAMESPACE_BYPASS; 26 + break; 27 + case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX: 28 + *namespace = MLX5_FLOW_NAMESPACE_EGRESS; 29 + break; 30 + default: 31 + return -EINVAL; 32 + } 33 + 34 + return 0; 35 + } 20 36 21 37 static const struct uverbs_attr_spec mlx5_ib_flow_type[] = { 22 38 [MLX5_IB_FLOW_TYPE_NORMAL] = { ··· 58 38 }, 59 39 }; 60 40 41 + #define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2 61 42 static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( 62 43 struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) 63 44 { 45 + struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG}; 64 46 struct mlx5_ib_flow_handler *flow_handler; 65 47 struct mlx5_ib_flow_matcher *fs_matcher; 48 + struct ib_uobject **arr_flow_actions; 49 + struct ib_uflow_resources *uflow_res; 66 50 void *devx_obj; 67 51 int dest_id, dest_type; 68 52 void *cmd_in; ··· 76 52 struct ib_uobject *uobj = 77 53 uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE); 78 54 struct mlx5_ib_dev *dev = to_mdev(uobj->context->device); 55 + int len, ret, i; 79 56 80 57 if (!capable(CAP_NET_RAW)) 81 58 return -EPERM; ··· 86 61 dest_qp = uverbs_attr_is_valid(attrs, 87 62 MLX5_IB_ATTR_CREATE_FLOW_DEST_QP); 88 63 89 - if ((dest_devx && dest_qp) || (!dest_devx && !dest_qp)) 64 + fs_matcher = uverbs_attr_get_obj(attrs, 65 + MLX5_IB_ATTR_CREATE_FLOW_MATCHER); 66 + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS && 67 + ((dest_devx && dest_qp) || (!dest_devx && !dest_qp))) 68 + return -EINVAL; 69 + 70 + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS && 71 + (dest_devx || dest_qp)) 90 72 return -EINVAL; 91 73 92 74 if (dest_devx) { ··· 107 75 */ 108 76 if (!mlx5_ib_devx_is_flow_dest(devx_obj, &dest_id, &dest_type)) 109 77 return -EINVAL; 110 - } else { 78 + } else if (dest_qp) { 111 79 struct mlx5_ib_qp *mqp; 112 80 113 81 qp = uverbs_attr_get_obj(attrs, ··· 124 92 else 125 93 dest_id = mqp->raw_packet_qp.rq.tirn; 126 94 dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR; 95 + } else { 96 + dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT; 127 97 } 128 98 129 99 if (dev->rep) ··· 135 101 attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); 136 102 inlen = uverbs_attr_get_len(attrs, 137 103 MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); 138 - fs_matcher = uverbs_attr_get_obj(attrs, 139 - MLX5_IB_ATTR_CREATE_FLOW_MATCHER); 140 - flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, cmd_in, inlen, 141 - dest_id, dest_type); 142 - if (IS_ERR(flow_handler)) 143 - return PTR_ERR(flow_handler); 144 104 145 - ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev); 105 + uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS); 106 + if (!uflow_res) 107 + return -ENOMEM; 108 + 109 + len = uverbs_attr_get_uobjs_arr(attrs, 110 + MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions); 111 + for (i = 0; i < len; i++) { 112 + struct mlx5_ib_flow_action *maction = 113 + to_mflow_act(arr_flow_actions[i]->object); 114 + 115 + ret = parse_flow_flow_action(maction, false, &flow_act); 116 + if (ret) 117 + goto err_out; 118 + flow_resources_add(uflow_res, IB_FLOW_SPEC_ACTION_HANDLE, 119 + arr_flow_actions[i]->object); 120 + } 121 + 122 + ret = uverbs_copy_from(&flow_act.flow_tag, attrs, 123 + MLX5_IB_ATTR_CREATE_FLOW_TAG); 124 + if (!ret) { 125 + if (flow_act.flow_tag >= BIT(24)) { 126 + ret = -EINVAL; 127 + goto err_out; 128 + } 129 + flow_act.flags |= FLOW_ACT_HAS_TAG; 130 + } 131 + 132 + flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, &flow_act, 133 + cmd_in, inlen, 134 + dest_id, dest_type); 135 + if (IS_ERR(flow_handler)) { 136 + ret = PTR_ERR(flow_handler); 137 + goto err_out; 138 + } 139 + 140 + ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev, uflow_res); 146 141 147 142 return 0; 143 + err_out: 144 + ib_uverbs_flow_resources_free(uflow_res); 145 + return ret; 148 146 } 149 147 150 148 static int flow_matcher_cleanup(struct ib_uobject *uobject, ··· 200 134 attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE); 201 135 struct mlx5_ib_dev *dev = to_mdev(uobj->context->device); 202 136 struct mlx5_ib_flow_matcher *obj; 137 + u32 flags; 203 138 int err; 204 139 205 140 obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL); 206 141 if (!obj) 207 142 return -ENOMEM; 208 143 144 + obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS; 209 145 obj->mask_len = uverbs_attr_get_len( 210 146 attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK); 211 147 err = uverbs_copy_from(&obj->matcher_mask, ··· 233 165 if (err) 234 166 goto end; 235 167 168 + err = uverbs_get_flags32(&flags, attrs, 169 + MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, 170 + IB_FLOW_ATTR_FLAGS_EGRESS); 171 + if (err) 172 + goto end; 173 + 174 + if (flags) { 175 + err = mlx5_ib_ft_type_to_namespace( 176 + MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX, &obj->ns_type); 177 + if (err) 178 + goto end; 179 + } 180 + 236 181 uobj->object = obj; 237 182 obj->mdev = dev->mdev; 238 183 atomic_set(&obj->usecnt, 0); ··· 254 173 end: 255 174 kfree(obj); 256 175 return err; 176 + } 177 + 178 + void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction) 179 + { 180 + switch (maction->flow_action_raw.sub_type) { 181 + case MLX5_IB_FLOW_ACTION_MODIFY_HEADER: 182 + mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev, 183 + maction->flow_action_raw.action_id); 184 + break; 185 + case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT: 186 + mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev, 187 + maction->flow_action_raw.action_id); 188 + break; 189 + case MLX5_IB_FLOW_ACTION_DECAP: 190 + break; 191 + default: 192 + break; 193 + } 194 + } 195 + 196 + static struct ib_flow_action * 197 + mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev, 198 + enum mlx5_ib_uapi_flow_table_type ft_type, 199 + u8 num_actions, void *in) 200 + { 201 + enum mlx5_flow_namespace_type namespace; 202 + struct mlx5_ib_flow_action *maction; 203 + int ret; 204 + 205 + ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace); 206 + if (ret) 207 + return ERR_PTR(-EINVAL); 208 + 209 + maction = kzalloc(sizeof(*maction), GFP_KERNEL); 210 + if (!maction) 211 + return ERR_PTR(-ENOMEM); 212 + 213 + ret = mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in, 214 + &maction->flow_action_raw.action_id); 215 + 216 + if (ret) { 217 + kfree(maction); 218 + return ERR_PTR(ret); 219 + } 220 + maction->flow_action_raw.sub_type = 221 + MLX5_IB_FLOW_ACTION_MODIFY_HEADER; 222 + maction->flow_action_raw.dev = dev; 223 + 224 + return &maction->ib_action; 225 + } 226 + 227 + static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev) 228 + { 229 + return MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, 230 + max_modify_header_actions) || 231 + MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, max_modify_header_actions); 232 + } 233 + 234 + static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)( 235 + struct ib_uverbs_file *file, 236 + struct uverbs_attr_bundle *attrs) 237 + { 238 + struct ib_uobject *uobj = uverbs_attr_get_uobject( 239 + attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE); 240 + struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device); 241 + enum mlx5_ib_uapi_flow_table_type ft_type; 242 + struct ib_flow_action *action; 243 + size_t num_actions; 244 + void *in; 245 + int len; 246 + int ret; 247 + 248 + if (!mlx5_ib_modify_header_supported(mdev)) 249 + return -EOPNOTSUPP; 250 + 251 + in = uverbs_attr_get_alloced_ptr(attrs, 252 + MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM); 253 + len = uverbs_attr_get_len(attrs, 254 + MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM); 255 + 256 + if (len % MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)) 257 + return -EINVAL; 258 + 259 + ret = uverbs_get_const(&ft_type, attrs, 260 + MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE); 261 + if (ret) 262 + return ret; 263 + 264 + num_actions = len / MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto), 265 + action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in); 266 + if (IS_ERR(action)) 267 + return PTR_ERR(action); 268 + 269 + uverbs_flow_action_fill_action(action, uobj, uobj->context->device, 270 + IB_FLOW_ACTION_UNSPECIFIED); 271 + 272 + return 0; 273 + } 274 + 275 + static bool mlx5_ib_flow_action_packet_reformat_valid(struct mlx5_ib_dev *ibdev, 276 + u8 packet_reformat_type, 277 + u8 ft_type) 278 + { 279 + switch (packet_reformat_type) { 280 + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL: 281 + if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX) 282 + return MLX5_CAP_FLOWTABLE(ibdev->mdev, 283 + encap_general_header); 284 + break; 285 + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL: 286 + if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX) 287 + return MLX5_CAP_FLOWTABLE_NIC_TX(ibdev->mdev, 288 + reformat_l2_to_l3_tunnel); 289 + break; 290 + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2: 291 + if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX) 292 + return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, 293 + reformat_l3_tunnel_to_l2); 294 + break; 295 + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2: 296 + if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX) 297 + return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, decap); 298 + break; 299 + default: 300 + break; 301 + } 302 + 303 + return false; 304 + } 305 + 306 + static int mlx5_ib_dv_to_prm_packet_reforamt_type(u8 dv_prt, u8 *prm_prt) 307 + { 308 + switch (dv_prt) { 309 + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL: 310 + *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL; 311 + break; 312 + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2: 313 + *prm_prt = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2; 314 + break; 315 + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL: 316 + *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL; 317 + break; 318 + default: 319 + return -EINVAL; 320 + } 321 + 322 + return 0; 323 + } 324 + 325 + static int mlx5_ib_flow_action_create_packet_reformat_ctx( 326 + struct mlx5_ib_dev *dev, 327 + struct mlx5_ib_flow_action *maction, 328 + u8 ft_type, u8 dv_prt, 329 + void *in, size_t len) 330 + { 331 + enum mlx5_flow_namespace_type namespace; 332 + u8 prm_prt; 333 + int ret; 334 + 335 + ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace); 336 + if (ret) 337 + return ret; 338 + 339 + ret = mlx5_ib_dv_to_prm_packet_reforamt_type(dv_prt, &prm_prt); 340 + if (ret) 341 + return ret; 342 + 343 + ret = mlx5_packet_reformat_alloc(dev->mdev, prm_prt, len, 344 + in, namespace, 345 + &maction->flow_action_raw.action_id); 346 + if (ret) 347 + return ret; 348 + 349 + maction->flow_action_raw.sub_type = 350 + MLX5_IB_FLOW_ACTION_PACKET_REFORMAT; 351 + maction->flow_action_raw.dev = dev; 352 + 353 + return 0; 354 + } 355 + 356 + static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)( 357 + struct ib_uverbs_file *file, 358 + struct uverbs_attr_bundle *attrs) 359 + { 360 + struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, 361 + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE); 362 + struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device); 363 + enum mlx5_ib_uapi_flow_action_packet_reformat_type dv_prt; 364 + enum mlx5_ib_uapi_flow_table_type ft_type; 365 + struct mlx5_ib_flow_action *maction; 366 + int ret; 367 + 368 + ret = uverbs_get_const(&ft_type, attrs, 369 + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE); 370 + if (ret) 371 + return ret; 372 + 373 + ret = uverbs_get_const(&dv_prt, attrs, 374 + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE); 375 + if (ret) 376 + return ret; 377 + 378 + if (!mlx5_ib_flow_action_packet_reformat_valid(mdev, dv_prt, ft_type)) 379 + return -EOPNOTSUPP; 380 + 381 + maction = kzalloc(sizeof(*maction), GFP_KERNEL); 382 + if (!maction) 383 + return -ENOMEM; 384 + 385 + if (dv_prt == 386 + MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2) { 387 + maction->flow_action_raw.sub_type = 388 + MLX5_IB_FLOW_ACTION_DECAP; 389 + maction->flow_action_raw.dev = mdev; 390 + } else { 391 + void *in; 392 + int len; 393 + 394 + in = uverbs_attr_get_alloced_ptr(attrs, 395 + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF); 396 + if (IS_ERR(in)) { 397 + ret = PTR_ERR(in); 398 + goto free_maction; 399 + } 400 + 401 + len = uverbs_attr_get_len(attrs, 402 + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF); 403 + 404 + ret = mlx5_ib_flow_action_create_packet_reformat_ctx(mdev, 405 + maction, ft_type, dv_prt, in, len); 406 + if (ret) 407 + goto free_maction; 408 + } 409 + 410 + uverbs_flow_action_fill_action(&maction->ib_action, uobj, 411 + uobj->context->device, 412 + IB_FLOW_ACTION_UNSPECIFIED); 413 + return 0; 414 + 415 + free_maction: 416 + kfree(maction); 417 + return ret; 257 418 } 258 419 259 420 DECLARE_UVERBS_NAMED_METHOD( ··· 518 195 UVERBS_ACCESS_READ), 519 196 UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX, 520 197 MLX5_IB_OBJECT_DEVX_OBJ, 521 - UVERBS_ACCESS_READ)); 198 + UVERBS_ACCESS_READ), 199 + UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, 200 + UVERBS_OBJECT_FLOW_ACTION, 201 + UVERBS_ACCESS_READ, 1, 202 + MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS, 203 + UA_OPTIONAL), 204 + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG, 205 + UVERBS_ATTR_TYPE(u32), 206 + UA_OPTIONAL)); 522 207 523 208 DECLARE_UVERBS_NAMED_METHOD_DESTROY( 524 209 MLX5_IB_METHOD_DESTROY_FLOW, ··· 539 208 UVERBS_OBJECT_FLOW, 540 209 &UVERBS_METHOD(MLX5_IB_METHOD_CREATE_FLOW), 541 210 &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW)); 211 + 212 + DECLARE_UVERBS_NAMED_METHOD( 213 + MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER, 214 + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE, 215 + UVERBS_OBJECT_FLOW_ACTION, 216 + UVERBS_ACCESS_NEW, 217 + UA_MANDATORY), 218 + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM, 219 + UVERBS_ATTR_MIN_SIZE(MLX5_UN_SZ_BYTES( 220 + set_action_in_add_action_in_auto)), 221 + UA_MANDATORY, 222 + UA_ALLOC_AND_COPY), 223 + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE, 224 + enum mlx5_ib_uapi_flow_table_type, 225 + UA_MANDATORY)); 226 + 227 + DECLARE_UVERBS_NAMED_METHOD( 228 + MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT, 229 + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE, 230 + UVERBS_OBJECT_FLOW_ACTION, 231 + UVERBS_ACCESS_NEW, 232 + UA_MANDATORY), 233 + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF, 234 + UVERBS_ATTR_MIN_SIZE(1), 235 + UA_ALLOC_AND_COPY, 236 + UA_OPTIONAL), 237 + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE, 238 + enum mlx5_ib_uapi_flow_action_packet_reformat_type, 239 + UA_MANDATORY), 240 + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE, 241 + enum mlx5_ib_uapi_flow_table_type, 242 + UA_MANDATORY)); 243 + 244 + ADD_UVERBS_METHODS( 245 + mlx5_ib_flow_actions, 246 + UVERBS_OBJECT_FLOW_ACTION, 247 + &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER), 248 + &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)); 542 249 543 250 DECLARE_UVERBS_NAMED_METHOD( 544 251 MLX5_IB_METHOD_FLOW_MATCHER_CREATE, ··· 593 224 UA_MANDATORY), 594 225 UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA, 595 226 UVERBS_ATTR_TYPE(u8), 596 - UA_MANDATORY)); 227 + UA_MANDATORY), 228 + UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, 229 + enum ib_flow_flags, 230 + UA_OPTIONAL)); 597 231 598 232 DECLARE_UVERBS_NAMED_METHOD_DESTROY( 599 233 MLX5_IB_METHOD_FLOW_MATCHER_DESTROY, ··· 619 247 620 248 root[i++] = &flow_objects; 621 249 root[i++] = &mlx5_ib_fs; 250 + root[i++] = &mlx5_ib_flow_actions; 622 251 623 252 return i; 624 253 }
-3
drivers/infiniband/hw/mlx5/ib_rep.c
··· 39 39 STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR, 40 40 mlx5_ib_stage_post_ib_reg_umr_init, 41 41 NULL), 42 - STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR, 43 - mlx5_ib_stage_class_attr_init, 44 - NULL), 45 42 }; 46 43 47 44 static int
+264 -246
drivers/infiniband/hw/mlx5/main.c
··· 1571 1571 mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]); 1572 1572 } 1573 1573 1574 - static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn) 1574 + int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp) 1575 + { 1576 + int err = 0; 1577 + 1578 + mutex_lock(&dev->lb.mutex); 1579 + if (td) 1580 + dev->lb.user_td++; 1581 + if (qp) 1582 + dev->lb.qps++; 1583 + 1584 + if (dev->lb.user_td == 2 || 1585 + dev->lb.qps == 1) { 1586 + if (!dev->lb.enabled) { 1587 + err = mlx5_nic_vport_update_local_lb(dev->mdev, true); 1588 + dev->lb.enabled = true; 1589 + } 1590 + } 1591 + 1592 + mutex_unlock(&dev->lb.mutex); 1593 + 1594 + return err; 1595 + } 1596 + 1597 + void mlx5_ib_disable_lb(struct mlx5_ib_dev *dev, bool td, bool qp) 1598 + { 1599 + mutex_lock(&dev->lb.mutex); 1600 + if (td) 1601 + dev->lb.user_td--; 1602 + if (qp) 1603 + dev->lb.qps--; 1604 + 1605 + if (dev->lb.user_td == 1 && 1606 + dev->lb.qps == 0) { 1607 + if (dev->lb.enabled) { 1608 + mlx5_nic_vport_update_local_lb(dev->mdev, false); 1609 + dev->lb.enabled = false; 1610 + } 1611 + } 1612 + 1613 + mutex_unlock(&dev->lb.mutex); 1614 + } 1615 + 1616 + static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn, 1617 + u16 uid) 1575 1618 { 1576 1619 int err; 1577 1620 1578 1621 if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) 1579 1622 return 0; 1580 1623 1581 - err = mlx5_core_alloc_transport_domain(dev->mdev, tdn); 1624 + err = mlx5_cmd_alloc_transport_domain(dev->mdev, tdn, uid); 1582 1625 if (err) 1583 1626 return err; 1584 1627 ··· 1630 1587 !MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc))) 1631 1588 return err; 1632 1589 1633 - mutex_lock(&dev->lb_mutex); 1634 - dev->user_td++; 1635 - 1636 - if (dev->user_td == 2) 1637 - err = mlx5_nic_vport_update_local_lb(dev->mdev, true); 1638 - 1639 - mutex_unlock(&dev->lb_mutex); 1640 - return err; 1590 + return mlx5_ib_enable_lb(dev, true, false); 1641 1591 } 1642 1592 1643 - static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn) 1593 + static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn, 1594 + u16 uid) 1644 1595 { 1645 1596 if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) 1646 1597 return; 1647 1598 1648 - mlx5_core_dealloc_transport_domain(dev->mdev, tdn); 1599 + mlx5_cmd_dealloc_transport_domain(dev->mdev, tdn, uid); 1649 1600 1650 1601 if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) || 1651 1602 (!MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) && 1652 1603 !MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc))) 1653 1604 return; 1654 1605 1655 - mutex_lock(&dev->lb_mutex); 1656 - dev->user_td--; 1657 - 1658 - if (dev->user_td < 2) 1659 - mlx5_nic_vport_update_local_lb(dev->mdev, false); 1660 - 1661 - mutex_unlock(&dev->lb_mutex); 1606 + mlx5_ib_disable_lb(dev, true, false); 1662 1607 } 1663 1608 1664 1609 static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, ··· 1758 1727 context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range; 1759 1728 #endif 1760 1729 1761 - err = mlx5_ib_alloc_transport_domain(dev, &context->tdn); 1762 - if (err) 1763 - goto out_uars; 1764 - 1765 1730 if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) { 1766 - /* Block DEVX on Infiniband as of SELinux */ 1767 - if (mlx5_ib_port_link_layer(ibdev, 1) != IB_LINK_LAYER_ETHERNET) { 1768 - err = -EPERM; 1769 - goto out_td; 1770 - } 1771 - 1772 - err = mlx5_ib_devx_create(dev, context); 1773 - if (err) 1774 - goto out_td; 1731 + err = mlx5_ib_devx_create(dev); 1732 + if (err < 0) 1733 + goto out_uars; 1734 + context->devx_uid = err; 1775 1735 } 1736 + 1737 + err = mlx5_ib_alloc_transport_domain(dev, &context->tdn, 1738 + context->devx_uid); 1739 + if (err) 1740 + goto out_devx; 1776 1741 1777 1742 if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) { 1778 1743 err = mlx5_cmd_dump_fill_mkey(dev->mdev, &dump_fill_mkey); ··· 1776 1749 goto out_mdev; 1777 1750 } 1778 1751 1779 - INIT_LIST_HEAD(&context->vma_private_list); 1780 - mutex_init(&context->vma_private_list_mutex); 1781 1752 INIT_LIST_HEAD(&context->db_page_list); 1782 1753 mutex_init(&context->db_page_mutex); 1783 1754 ··· 1851 1826 context->lib_caps = req.lib_caps; 1852 1827 print_lib_caps(dev, context->lib_caps); 1853 1828 1829 + if (mlx5_lag_is_active(dev->mdev)) { 1830 + u8 port = mlx5_core_native_port_num(dev->mdev); 1831 + 1832 + atomic_set(&context->tx_port_affinity, 1833 + atomic_add_return( 1834 + 1, &dev->roce[port].tx_port_affinity)); 1835 + } 1836 + 1854 1837 return &context->ibucontext; 1855 1838 1856 1839 out_mdev: 1840 + mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid); 1841 + out_devx: 1857 1842 if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) 1858 - mlx5_ib_devx_destroy(dev, context); 1859 - out_td: 1860 - mlx5_ib_dealloc_transport_domain(dev, context->tdn); 1843 + mlx5_ib_devx_destroy(dev, context->devx_uid); 1861 1844 1862 1845 out_uars: 1863 1846 deallocate_uars(dev, context); ··· 1888 1855 struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); 1889 1856 struct mlx5_bfreg_info *bfregi; 1890 1857 1891 - if (context->devx_uid) 1892 - mlx5_ib_devx_destroy(dev, context); 1858 + #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 1859 + /* All umem's must be destroyed before destroying the ucontext. */ 1860 + mutex_lock(&ibcontext->per_mm_list_lock); 1861 + WARN_ON(!list_empty(&ibcontext->per_mm_list)); 1862 + mutex_unlock(&ibcontext->per_mm_list_lock); 1863 + #endif 1893 1864 1894 1865 bfregi = &context->bfregi; 1895 - mlx5_ib_dealloc_transport_domain(dev, context->tdn); 1866 + mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid); 1867 + 1868 + if (context->devx_uid) 1869 + mlx5_ib_devx_destroy(dev, context->devx_uid); 1896 1870 1897 1871 deallocate_uars(dev, context); 1898 1872 kfree(bfregi->sys_pages); ··· 1940 1900 return get_arg(offset) | ((offset >> 16) & 0xff) << 8; 1941 1901 } 1942 1902 1943 - static void mlx5_ib_vma_open(struct vm_area_struct *area) 1944 - { 1945 - /* vma_open is called when a new VMA is created on top of our VMA. This 1946 - * is done through either mremap flow or split_vma (usually due to 1947 - * mlock, madvise, munmap, etc.) We do not support a clone of the VMA, 1948 - * as this VMA is strongly hardware related. Therefore we set the 1949 - * vm_ops of the newly created/cloned VMA to NULL, to prevent it from 1950 - * calling us again and trying to do incorrect actions. We assume that 1951 - * the original VMA size is exactly a single page, and therefore all 1952 - * "splitting" operation will not happen to it. 1953 - */ 1954 - area->vm_ops = NULL; 1955 - } 1956 - 1957 - static void mlx5_ib_vma_close(struct vm_area_struct *area) 1958 - { 1959 - struct mlx5_ib_vma_private_data *mlx5_ib_vma_priv_data; 1960 - 1961 - /* It's guaranteed that all VMAs opened on a FD are closed before the 1962 - * file itself is closed, therefore no sync is needed with the regular 1963 - * closing flow. (e.g. mlx5 ib_dealloc_ucontext) 1964 - * However need a sync with accessing the vma as part of 1965 - * mlx5_ib_disassociate_ucontext. 1966 - * The close operation is usually called under mm->mmap_sem except when 1967 - * process is exiting. 1968 - * The exiting case is handled explicitly as part of 1969 - * mlx5_ib_disassociate_ucontext. 1970 - */ 1971 - mlx5_ib_vma_priv_data = (struct mlx5_ib_vma_private_data *)area->vm_private_data; 1972 - 1973 - /* setting the vma context pointer to null in the mlx5_ib driver's 1974 - * private data, to protect a race condition in 1975 - * mlx5_ib_disassociate_ucontext(). 1976 - */ 1977 - mlx5_ib_vma_priv_data->vma = NULL; 1978 - mutex_lock(mlx5_ib_vma_priv_data->vma_private_list_mutex); 1979 - list_del(&mlx5_ib_vma_priv_data->list); 1980 - mutex_unlock(mlx5_ib_vma_priv_data->vma_private_list_mutex); 1981 - kfree(mlx5_ib_vma_priv_data); 1982 - } 1983 - 1984 - static const struct vm_operations_struct mlx5_ib_vm_ops = { 1985 - .open = mlx5_ib_vma_open, 1986 - .close = mlx5_ib_vma_close 1987 - }; 1988 - 1989 - static int mlx5_ib_set_vma_data(struct vm_area_struct *vma, 1990 - struct mlx5_ib_ucontext *ctx) 1991 - { 1992 - struct mlx5_ib_vma_private_data *vma_prv; 1993 - struct list_head *vma_head = &ctx->vma_private_list; 1994 - 1995 - vma_prv = kzalloc(sizeof(*vma_prv), GFP_KERNEL); 1996 - if (!vma_prv) 1997 - return -ENOMEM; 1998 - 1999 - vma_prv->vma = vma; 2000 - vma_prv->vma_private_list_mutex = &ctx->vma_private_list_mutex; 2001 - vma->vm_private_data = vma_prv; 2002 - vma->vm_ops = &mlx5_ib_vm_ops; 2003 - 2004 - mutex_lock(&ctx->vma_private_list_mutex); 2005 - list_add(&vma_prv->list, vma_head); 2006 - mutex_unlock(&ctx->vma_private_list_mutex); 2007 - 2008 - return 0; 2009 - } 2010 1903 2011 1904 static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) 2012 1905 { 2013 - struct vm_area_struct *vma; 2014 - struct mlx5_ib_vma_private_data *vma_private, *n; 2015 - struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); 2016 - 2017 - mutex_lock(&context->vma_private_list_mutex); 2018 - list_for_each_entry_safe(vma_private, n, &context->vma_private_list, 2019 - list) { 2020 - vma = vma_private->vma; 2021 - zap_vma_ptes(vma, vma->vm_start, PAGE_SIZE); 2022 - /* context going to be destroyed, should 2023 - * not access ops any more. 2024 - */ 2025 - vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE); 2026 - vma->vm_ops = NULL; 2027 - list_del(&vma_private->list); 2028 - kfree(vma_private); 2029 - } 2030 - mutex_unlock(&context->vma_private_list_mutex); 2031 1906 } 2032 1907 2033 1908 static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd) ··· 1965 2010 struct vm_area_struct *vma, 1966 2011 struct mlx5_ib_ucontext *context) 1967 2012 { 1968 - phys_addr_t pfn; 1969 - int err; 1970 - 1971 2013 if (vma->vm_end - vma->vm_start != PAGE_SIZE) 1972 2014 return -EINVAL; 1973 2015 ··· 1977 2025 if (!dev->mdev->clock_info_page) 1978 2026 return -EOPNOTSUPP; 1979 2027 1980 - pfn = page_to_pfn(dev->mdev->clock_info_page); 1981 - err = remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE, 1982 - vma->vm_page_prot); 1983 - if (err) 1984 - return err; 1985 - 1986 - return mlx5_ib_set_vma_data(vma, context); 2028 + return rdma_user_mmap_page(&context->ibucontext, vma, 2029 + dev->mdev->clock_info_page, PAGE_SIZE); 1987 2030 } 1988 2031 1989 2032 static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, ··· 2068 2121 pfn = uar_index2pfn(dev, uar_index); 2069 2122 mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn); 2070 2123 2071 - vma->vm_page_prot = prot; 2072 - err = io_remap_pfn_range(vma, vma->vm_start, pfn, 2073 - PAGE_SIZE, vma->vm_page_prot); 2124 + err = rdma_user_mmap_io(&context->ibucontext, vma, pfn, PAGE_SIZE, 2125 + prot); 2074 2126 if (err) { 2075 2127 mlx5_ib_err(dev, 2076 - "io_remap_pfn_range failed with error=%d, mmap_cmd=%s\n", 2128 + "rdma_user_mmap_io failed with error=%d, mmap_cmd=%s\n", 2077 2129 err, mmap_cmd2str(cmd)); 2078 - err = -EAGAIN; 2079 2130 goto err; 2080 2131 } 2081 - 2082 - err = mlx5_ib_set_vma_data(vma, context); 2083 - if (err) 2084 - goto err; 2085 2132 2086 2133 if (dyn_uar) 2087 2134 bfregi->sys_pages[idx] = uar_index; ··· 2101 2160 size_t map_size = vma->vm_end - vma->vm_start; 2102 2161 u32 npages = map_size >> PAGE_SHIFT; 2103 2162 phys_addr_t pfn; 2104 - pgprot_t prot; 2105 2163 2106 2164 if (find_next_zero_bit(mctx->dm_pages, page_idx + npages, page_idx) != 2107 2165 page_idx + npages) ··· 2110 2170 MLX5_CAP64_DEV_MEM(dev->mdev, memic_bar_start_addr)) >> 2111 2171 PAGE_SHIFT) + 2112 2172 page_idx; 2113 - prot = pgprot_writecombine(vma->vm_page_prot); 2114 - vma->vm_page_prot = prot; 2115 - 2116 - if (io_remap_pfn_range(vma, vma->vm_start, pfn, map_size, 2117 - vma->vm_page_prot)) 2118 - return -EAGAIN; 2119 - 2120 - return mlx5_ib_set_vma_data(vma, mctx); 2173 + return rdma_user_mmap_io(context, vma, pfn, map_size, 2174 + pgprot_writecombine(vma->vm_page_prot)); 2121 2175 } 2122 2176 2123 2177 static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) ··· 2252 2318 struct mlx5_ib_alloc_pd_resp resp; 2253 2319 struct mlx5_ib_pd *pd; 2254 2320 int err; 2321 + u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {}; 2322 + u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {}; 2323 + u16 uid = 0; 2255 2324 2256 2325 pd = kmalloc(sizeof(*pd), GFP_KERNEL); 2257 2326 if (!pd) 2258 2327 return ERR_PTR(-ENOMEM); 2259 2328 2260 - err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn); 2329 + uid = context ? to_mucontext(context)->devx_uid : 0; 2330 + MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD); 2331 + MLX5_SET(alloc_pd_in, in, uid, uid); 2332 + err = mlx5_cmd_exec(to_mdev(ibdev)->mdev, in, sizeof(in), 2333 + out, sizeof(out)); 2261 2334 if (err) { 2262 2335 kfree(pd); 2263 2336 return ERR_PTR(err); 2264 2337 } 2265 2338 2339 + pd->pdn = MLX5_GET(alloc_pd_out, out, pd); 2340 + pd->uid = uid; 2266 2341 if (context) { 2267 2342 resp.pdn = pd->pdn; 2268 2343 if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { 2269 - mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn); 2344 + mlx5_cmd_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn, uid); 2270 2345 kfree(pd); 2271 2346 return ERR_PTR(-EFAULT); 2272 2347 } ··· 2289 2346 struct mlx5_ib_dev *mdev = to_mdev(pd->device); 2290 2347 struct mlx5_ib_pd *mpd = to_mpd(pd); 2291 2348 2292 - mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn); 2349 + mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid); 2293 2350 kfree(mpd); 2294 2351 2295 2352 return 0; ··· 2395 2452 offsetof(typeof(filter), field) -\ 2396 2453 sizeof(filter.field)) 2397 2454 2398 - static int parse_flow_flow_action(const union ib_flow_spec *ib_spec, 2399 - const struct ib_flow_attr *flow_attr, 2400 - struct mlx5_flow_act *action) 2455 + int parse_flow_flow_action(struct mlx5_ib_flow_action *maction, 2456 + bool is_egress, 2457 + struct mlx5_flow_act *action) 2401 2458 { 2402 - struct mlx5_ib_flow_action *maction = to_mflow_act(ib_spec->action.act); 2403 2459 2404 2460 switch (maction->ib_action.type) { 2405 2461 case IB_FLOW_ACTION_ESP: 2462 + if (action->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | 2463 + MLX5_FLOW_CONTEXT_ACTION_DECRYPT)) 2464 + return -EINVAL; 2406 2465 /* Currently only AES_GCM keymat is supported by the driver */ 2407 2466 action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx; 2408 - action->action |= flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS ? 2467 + action->action |= is_egress ? 2409 2468 MLX5_FLOW_CONTEXT_ACTION_ENCRYPT : 2410 2469 MLX5_FLOW_CONTEXT_ACTION_DECRYPT; 2411 2470 return 0; 2471 + case IB_FLOW_ACTION_UNSPECIFIED: 2472 + if (maction->flow_action_raw.sub_type == 2473 + MLX5_IB_FLOW_ACTION_MODIFY_HEADER) { 2474 + if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) 2475 + return -EINVAL; 2476 + action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 2477 + action->modify_id = maction->flow_action_raw.action_id; 2478 + return 0; 2479 + } 2480 + if (maction->flow_action_raw.sub_type == 2481 + MLX5_IB_FLOW_ACTION_DECAP) { 2482 + if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) 2483 + return -EINVAL; 2484 + action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; 2485 + return 0; 2486 + } 2487 + if (maction->flow_action_raw.sub_type == 2488 + MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) { 2489 + if (action->action & 2490 + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) 2491 + return -EINVAL; 2492 + action->action |= 2493 + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; 2494 + action->reformat_id = 2495 + maction->flow_action_raw.action_id; 2496 + return 0; 2497 + } 2498 + /* fall through */ 2412 2499 default: 2413 2500 return -EOPNOTSUPP; 2414 2501 } ··· 2775 2802 action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP; 2776 2803 break; 2777 2804 case IB_FLOW_SPEC_ACTION_HANDLE: 2778 - ret = parse_flow_flow_action(ib_spec, flow_attr, action); 2805 + ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act), 2806 + flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action); 2779 2807 if (ret) 2780 2808 return ret; 2781 2809 break; ··· 2857 2883 * rules would be supported, always return VALID_SPEC_NA. 2858 2884 */ 2859 2885 if (!is_crypto) 2860 - return egress ? VALID_SPEC_INVALID : VALID_SPEC_NA; 2886 + return VALID_SPEC_NA; 2861 2887 2862 2888 return is_crypto && is_ipsec && 2863 2889 (!egress || (!is_drop && !(flow_act->flags & FLOW_ACT_HAS_TAG))) ? ··· 3000 3026 static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns, 3001 3027 struct mlx5_ib_flow_prio *prio, 3002 3028 int priority, 3003 - int num_entries, int num_groups) 3029 + int num_entries, int num_groups, 3030 + u32 flags) 3004 3031 { 3005 3032 struct mlx5_flow_table *ft; 3006 3033 3007 3034 ft = mlx5_create_auto_grouped_flow_table(ns, priority, 3008 3035 num_entries, 3009 3036 num_groups, 3010 - 0, 0); 3037 + 0, flags); 3011 3038 if (IS_ERR(ft)) 3012 3039 return ERR_CAST(ft); 3013 3040 ··· 3028 3053 int max_table_size; 3029 3054 int num_entries; 3030 3055 int num_groups; 3056 + u32 flags = 0; 3031 3057 int priority; 3032 3058 3033 3059 max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, 3034 3060 log_max_ft_size)); 3035 3061 if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { 3036 - if (ft_type == MLX5_IB_FT_TX) 3037 - priority = 0; 3038 - else if (flow_is_multicast_only(flow_attr) && 3039 - !dont_trap) 3062 + enum mlx5_flow_namespace_type fn_type; 3063 + 3064 + if (flow_is_multicast_only(flow_attr) && 3065 + !dont_trap) 3040 3066 priority = MLX5_IB_FLOW_MCAST_PRIO; 3041 3067 else 3042 3068 priority = ib_prio_to_core_prio(flow_attr->priority, 3043 3069 dont_trap); 3044 - ns = mlx5_get_flow_namespace(dev->mdev, 3045 - ft_type == MLX5_IB_FT_TX ? 3046 - MLX5_FLOW_NAMESPACE_EGRESS : 3047 - MLX5_FLOW_NAMESPACE_BYPASS); 3070 + if (ft_type == MLX5_IB_FT_RX) { 3071 + fn_type = MLX5_FLOW_NAMESPACE_BYPASS; 3072 + prio = &dev->flow_db->prios[priority]; 3073 + if (!dev->rep && 3074 + MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap)) 3075 + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; 3076 + if (!dev->rep && 3077 + MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, 3078 + reformat_l3_tunnel_to_l2)) 3079 + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; 3080 + } else { 3081 + max_table_size = 3082 + BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, 3083 + log_max_ft_size)); 3084 + fn_type = MLX5_FLOW_NAMESPACE_EGRESS; 3085 + prio = &dev->flow_db->egress_prios[priority]; 3086 + if (!dev->rep && 3087 + MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat)) 3088 + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; 3089 + } 3090 + ns = mlx5_get_flow_namespace(dev->mdev, fn_type); 3048 3091 num_entries = MLX5_FS_MAX_ENTRIES; 3049 3092 num_groups = MLX5_FS_MAX_TYPES; 3050 - prio = &dev->flow_db->prios[priority]; 3051 3093 } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || 3052 3094 flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { 3053 3095 ns = mlx5_get_flow_namespace(dev->mdev, ··· 3096 3104 3097 3105 ft = prio->flow_table; 3098 3106 if (!ft) 3099 - return _get_prio(ns, prio, priority, num_entries, num_groups); 3107 + return _get_prio(ns, prio, priority, num_entries, num_groups, 3108 + flags); 3100 3109 3101 3110 return prio; 3102 3111 } ··· 3262 3269 bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS; 3263 3270 3264 3271 if (!is_valid_attr(dev->mdev, flow_attr)) 3272 + return ERR_PTR(-EINVAL); 3273 + 3274 + if (dev->rep && is_egress) 3265 3275 return ERR_PTR(-EINVAL); 3266 3276 3267 3277 spec = kvzalloc(sizeof(*spec), GFP_KERNEL); ··· 3657 3661 return ERR_PTR(err); 3658 3662 } 3659 3663 3660 - static struct mlx5_ib_flow_prio *_get_flow_table(struct mlx5_ib_dev *dev, 3661 - int priority, bool mcast) 3664 + static struct mlx5_ib_flow_prio * 3665 + _get_flow_table(struct mlx5_ib_dev *dev, 3666 + struct mlx5_ib_flow_matcher *fs_matcher, 3667 + bool mcast) 3662 3668 { 3663 - int max_table_size; 3664 3669 struct mlx5_flow_namespace *ns = NULL; 3665 3670 struct mlx5_ib_flow_prio *prio; 3671 + int max_table_size; 3672 + u32 flags = 0; 3673 + int priority; 3666 3674 3667 - max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, 3668 - log_max_ft_size)); 3675 + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) { 3676 + max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, 3677 + log_max_ft_size)); 3678 + if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap)) 3679 + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; 3680 + if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, 3681 + reformat_l3_tunnel_to_l2)) 3682 + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; 3683 + } else { /* Can only be MLX5_FLOW_NAMESPACE_EGRESS */ 3684 + max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, 3685 + log_max_ft_size)); 3686 + if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat)) 3687 + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; 3688 + } 3689 + 3669 3690 if (max_table_size < MLX5_FS_MAX_ENTRIES) 3670 3691 return ERR_PTR(-ENOMEM); 3671 3692 3672 3693 if (mcast) 3673 3694 priority = MLX5_IB_FLOW_MCAST_PRIO; 3674 3695 else 3675 - priority = ib_prio_to_core_prio(priority, false); 3696 + priority = ib_prio_to_core_prio(fs_matcher->priority, false); 3676 3697 3677 - ns = mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS); 3698 + ns = mlx5_get_flow_namespace(dev->mdev, fs_matcher->ns_type); 3678 3699 if (!ns) 3679 3700 return ERR_PTR(-ENOTSUPP); 3680 3701 3681 - prio = &dev->flow_db->prios[priority]; 3702 + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) 3703 + prio = &dev->flow_db->prios[priority]; 3704 + else 3705 + prio = &dev->flow_db->egress_prios[priority]; 3682 3706 3683 3707 if (prio->flow_table) 3684 3708 return prio; 3685 3709 3686 3710 return _get_prio(ns, prio, priority, MLX5_FS_MAX_ENTRIES, 3687 - MLX5_FS_MAX_TYPES); 3711 + MLX5_FS_MAX_TYPES, flags); 3688 3712 } 3689 3713 3690 3714 static struct mlx5_ib_flow_handler * ··· 3712 3696 struct mlx5_ib_flow_prio *ft_prio, 3713 3697 struct mlx5_flow_destination *dst, 3714 3698 struct mlx5_ib_flow_matcher *fs_matcher, 3699 + struct mlx5_flow_act *flow_act, 3715 3700 void *cmd_in, int inlen) 3716 3701 { 3717 3702 struct mlx5_ib_flow_handler *handler; 3718 - struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG}; 3719 3703 struct mlx5_flow_spec *spec; 3720 3704 struct mlx5_flow_table *ft = ft_prio->flow_table; 3721 3705 int err = 0; ··· 3734 3718 fs_matcher->mask_len); 3735 3719 spec->match_criteria_enable = fs_matcher->match_criteria_enable; 3736 3720 3737 - flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 3738 3721 handler->rule = mlx5_add_flow_rules(ft, spec, 3739 - &flow_act, dst, 1); 3722 + flow_act, dst, 1); 3740 3723 3741 3724 if (IS_ERR(handler->rule)) { 3742 3725 err = PTR_ERR(handler->rule); ··· 3797 3782 struct mlx5_ib_flow_handler * 3798 3783 mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, 3799 3784 struct mlx5_ib_flow_matcher *fs_matcher, 3785 + struct mlx5_flow_act *flow_act, 3800 3786 void *cmd_in, int inlen, int dest_id, 3801 3787 int dest_type) 3802 3788 { 3803 3789 struct mlx5_flow_destination *dst; 3804 3790 struct mlx5_ib_flow_prio *ft_prio; 3805 - int priority = fs_matcher->priority; 3806 3791 struct mlx5_ib_flow_handler *handler; 3807 3792 bool mcast; 3808 3793 int err; ··· 3820 3805 mcast = raw_fs_is_multicast(fs_matcher, cmd_in); 3821 3806 mutex_lock(&dev->flow_db->lock); 3822 3807 3823 - ft_prio = _get_flow_table(dev, priority, mcast); 3808 + ft_prio = _get_flow_table(dev, fs_matcher, mcast); 3824 3809 if (IS_ERR(ft_prio)) { 3825 3810 err = PTR_ERR(ft_prio); 3826 3811 goto unlock; ··· 3829 3814 if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) { 3830 3815 dst->type = dest_type; 3831 3816 dst->tir_num = dest_id; 3832 - } else { 3817 + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 3818 + } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) { 3833 3819 dst->type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM; 3834 3820 dst->ft_num = dest_id; 3821 + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 3822 + } else { 3823 + dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT; 3824 + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; 3835 3825 } 3836 3826 3837 - handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, cmd_in, 3838 - inlen); 3827 + handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, flow_act, 3828 + cmd_in, inlen); 3839 3829 3840 3830 if (IS_ERR(handler)) { 3841 3831 err = PTR_ERR(handler); ··· 4018 3998 */ 4019 3999 mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx); 4020 4000 break; 4001 + case IB_FLOW_ACTION_UNSPECIFIED: 4002 + mlx5_ib_destroy_flow_action_raw(maction); 4003 + break; 4021 4004 default: 4022 4005 WARN_ON(true); 4023 4006 break; ··· 4035 4012 struct mlx5_ib_dev *dev = to_mdev(ibqp->device); 4036 4013 struct mlx5_ib_qp *mqp = to_mqp(ibqp); 4037 4014 int err; 4015 + u16 uid; 4016 + 4017 + uid = ibqp->pd ? 4018 + to_mpd(ibqp->pd)->uid : 0; 4038 4019 4039 4020 if (mqp->flags & MLX5_IB_QP_UNDERLAY) { 4040 4021 mlx5_ib_dbg(dev, "Attaching a multi cast group to underlay QP is not supported\n"); 4041 4022 return -EOPNOTSUPP; 4042 4023 } 4043 4024 4044 - err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num); 4025 + err = mlx5_cmd_attach_mcg(dev->mdev, gid, ibqp->qp_num, uid); 4045 4026 if (err) 4046 4027 mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n", 4047 4028 ibqp->qp_num, gid->raw); ··· 4057 4030 { 4058 4031 struct mlx5_ib_dev *dev = to_mdev(ibqp->device); 4059 4032 int err; 4033 + u16 uid; 4060 4034 4061 - err = mlx5_core_detach_mcg(dev->mdev, gid, ibqp->qp_num); 4035 + uid = ibqp->pd ? 4036 + to_mpd(ibqp->pd)->uid : 0; 4037 + err = mlx5_cmd_detach_mcg(dev->mdev, gid, ibqp->qp_num, uid); 4062 4038 if (err) 4063 4039 mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n", 4064 4040 ibqp->qp_num, gid->raw); ··· 4082 4052 return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid); 4083 4053 } 4084 4054 4085 - static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr, 4086 - char *buf) 4055 + static ssize_t fw_pages_show(struct device *device, 4056 + struct device_attribute *attr, char *buf) 4087 4057 { 4088 4058 struct mlx5_ib_dev *dev = 4089 4059 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 4090 4060 4091 4061 return sprintf(buf, "%d\n", dev->mdev->priv.fw_pages); 4092 4062 } 4063 + static DEVICE_ATTR_RO(fw_pages); 4093 4064 4094 - static ssize_t show_reg_pages(struct device *device, 4065 + static ssize_t reg_pages_show(struct device *device, 4095 4066 struct device_attribute *attr, char *buf) 4096 4067 { 4097 4068 struct mlx5_ib_dev *dev = ··· 4100 4069 4101 4070 return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages)); 4102 4071 } 4072 + static DEVICE_ATTR_RO(reg_pages); 4103 4073 4104 - static ssize_t show_hca(struct device *device, struct device_attribute *attr, 4105 - char *buf) 4074 + static ssize_t hca_type_show(struct device *device, 4075 + struct device_attribute *attr, char *buf) 4106 4076 { 4107 4077 struct mlx5_ib_dev *dev = 4108 4078 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 4109 4079 return sprintf(buf, "MT%d\n", dev->mdev->pdev->device); 4110 4080 } 4081 + static DEVICE_ATTR_RO(hca_type); 4111 4082 4112 - static ssize_t show_rev(struct device *device, struct device_attribute *attr, 4113 - char *buf) 4083 + static ssize_t hw_rev_show(struct device *device, 4084 + struct device_attribute *attr, char *buf) 4114 4085 { 4115 4086 struct mlx5_ib_dev *dev = 4116 4087 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 4117 4088 return sprintf(buf, "%x\n", dev->mdev->rev_id); 4118 4089 } 4090 + static DEVICE_ATTR_RO(hw_rev); 4119 4091 4120 - static ssize_t show_board(struct device *device, struct device_attribute *attr, 4121 - char *buf) 4092 + static ssize_t board_id_show(struct device *device, 4093 + struct device_attribute *attr, char *buf) 4122 4094 { 4123 4095 struct mlx5_ib_dev *dev = 4124 4096 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 4125 4097 return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN, 4126 4098 dev->mdev->board_id); 4127 4099 } 4100 + static DEVICE_ATTR_RO(board_id); 4128 4101 4129 - static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 4130 - static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); 4131 - static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); 4132 - static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL); 4133 - static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL); 4102 + static struct attribute *mlx5_class_attributes[] = { 4103 + &dev_attr_hw_rev.attr, 4104 + &dev_attr_hca_type.attr, 4105 + &dev_attr_board_id.attr, 4106 + &dev_attr_fw_pages.attr, 4107 + &dev_attr_reg_pages.attr, 4108 + NULL, 4109 + }; 4134 4110 4135 - static struct device_attribute *mlx5_class_attributes[] = { 4136 - &dev_attr_hw_rev, 4137 - &dev_attr_hca_type, 4138 - &dev_attr_board_id, 4139 - &dev_attr_fw_pages, 4140 - &dev_attr_reg_pages, 4111 + static const struct attribute_group mlx5_attr_group = { 4112 + .attrs = mlx5_class_attributes, 4141 4113 }; 4142 4114 4143 4115 static void pkey_change_handler(struct work_struct *work) ··· 5665 5631 int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) 5666 5632 { 5667 5633 struct mlx5_core_dev *mdev = dev->mdev; 5668 - const char *name; 5669 5634 int err; 5670 5635 int i; 5671 5636 ··· 5697 5664 if (mlx5_use_mad_ifc(dev)) 5698 5665 get_ext_port_caps(dev); 5699 5666 5700 - if (!mlx5_lag_is_active(mdev)) 5701 - name = "mlx5_%d"; 5702 - else 5703 - name = "mlx5_bond_%d"; 5704 - 5705 - strlcpy(dev->ib_dev.name, name, IB_DEVICE_NAME_MAX); 5706 5667 dev->ib_dev.owner = THIS_MODULE; 5707 5668 dev->ib_dev.node_type = RDMA_NODE_IB_CA; 5708 5669 dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; ··· 5903 5876 if ((MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && 5904 5877 (MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) || 5905 5878 MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc))) 5906 - mutex_init(&dev->lb_mutex); 5879 + mutex_init(&dev->lb.mutex); 5907 5880 5908 5881 return 0; 5909 5882 } ··· 6110 6083 6111 6084 int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) 6112 6085 { 6113 - return ib_register_device(&dev->ib_dev, NULL); 6086 + const char *name; 6087 + 6088 + rdma_set_device_sysfs_group(&dev->ib_dev, &mlx5_attr_group); 6089 + if (!mlx5_lag_is_active(dev->mdev)) 6090 + name = "mlx5_%d"; 6091 + else 6092 + name = "mlx5_bond_%d"; 6093 + return ib_register_device(&dev->ib_dev, name, NULL); 6114 6094 } 6115 6095 6116 6096 void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev) ··· 6147 6113 cancel_delay_drop(dev); 6148 6114 } 6149 6115 6150 - int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev) 6151 - { 6152 - int err; 6153 - int i; 6154 - 6155 - for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) { 6156 - err = device_create_file(&dev->ib_dev.dev, 6157 - mlx5_class_attributes[i]); 6158 - if (err) 6159 - return err; 6160 - } 6161 - 6162 - return 0; 6163 - } 6164 - 6165 6116 static int mlx5_ib_stage_rep_reg_init(struct mlx5_ib_dev *dev) 6166 6117 { 6167 6118 mlx5_ib_register_vport_reps(dev); ··· 6170 6151 profile->stage[stage].cleanup(dev); 6171 6152 } 6172 6153 6154 + if (dev->devx_whitelist_uid) 6155 + mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid); 6173 6156 ib_dealloc_device((struct ib_device *)dev); 6174 6157 } 6175 6158 ··· 6180 6159 { 6181 6160 int err; 6182 6161 int i; 6183 - 6184 - printk_once(KERN_INFO "%s", mlx5_version); 6162 + int uid; 6185 6163 6186 6164 for (i = 0; i < MLX5_IB_STAGE_MAX; i++) { 6187 6165 if (profile->stage[i].init) { ··· 6189 6169 goto err_out; 6190 6170 } 6191 6171 } 6172 + 6173 + uid = mlx5_ib_devx_create(dev); 6174 + if (uid > 0) 6175 + dev->devx_whitelist_uid = uid; 6192 6176 6193 6177 dev->profile = profile; 6194 6178 dev->ib_active = true; ··· 6254 6230 STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP, 6255 6231 mlx5_ib_stage_delay_drop_init, 6256 6232 mlx5_ib_stage_delay_drop_cleanup), 6257 - STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR, 6258 - mlx5_ib_stage_class_attr_init, 6259 - NULL), 6260 6233 }; 6261 6234 6262 6235 static const struct mlx5_ib_profile nic_rep_profile = { ··· 6295 6274 mlx5_ib_stage_ib_reg_cleanup), 6296 6275 STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR, 6297 6276 mlx5_ib_stage_post_ib_reg_umr_init, 6298 - NULL), 6299 - STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR, 6300 - mlx5_ib_stage_class_attr_init, 6301 6277 NULL), 6302 6278 STAGE_CREATE(MLX5_IB_STAGE_REP_REG, 6303 6279 mlx5_ib_stage_rep_reg_init,
+4 -5
drivers/infiniband/hw/mlx5/mem.c
··· 57 57 int entry; 58 58 unsigned long page_shift = umem->page_shift; 59 59 60 - if (umem->odp_data) { 60 + if (umem->is_odp) { 61 61 *ncont = ib_umem_page_count(umem); 62 62 *count = *ncont << (page_shift - PAGE_SHIFT); 63 63 *shift = page_shift; ··· 152 152 struct scatterlist *sg; 153 153 int entry; 154 154 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 155 - const bool odp = umem->odp_data != NULL; 156 - 157 - if (odp) { 155 + if (umem->is_odp) { 158 156 WARN_ON(shift != 0); 159 157 WARN_ON(access_flags != (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)); 160 158 161 159 for (i = 0; i < num_pages; ++i) { 162 - dma_addr_t pa = umem->odp_data->dma_list[offset + i]; 160 + dma_addr_t pa = 161 + to_ib_umem_odp(umem)->dma_list[offset + i]; 163 162 164 163 pas[i] = cpu_to_be64(umem_dma_to_mtt(pa)); 165 164 }
+61 -37
drivers/infiniband/hw/mlx5/mlx5_ib.h
··· 39 39 #include <rdma/ib_smi.h> 40 40 #include <linux/mlx5/driver.h> 41 41 #include <linux/mlx5/cq.h> 42 + #include <linux/mlx5/fs.h> 42 43 #include <linux/mlx5/qp.h> 43 44 #include <linux/mlx5/srq.h> 45 + #include <linux/mlx5/fs.h> 44 46 #include <linux/types.h> 45 47 #include <linux/mlx5/transobj.h> 46 48 #include <rdma/ib_user_verbs.h> ··· 50 48 #include <rdma/uverbs_ioctl.h> 51 49 #include <rdma/mlx5_user_ioctl_cmds.h> 52 50 53 - #define mlx5_ib_dbg(dev, format, arg...) \ 54 - pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ 55 - __LINE__, current->pid, ##arg) 51 + #define mlx5_ib_dbg(_dev, format, arg...) \ 52 + dev_dbg(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \ 53 + __LINE__, current->pid, ##arg) 56 54 57 - #define mlx5_ib_err(dev, format, arg...) \ 58 - pr_err("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ 59 - __LINE__, current->pid, ##arg) 55 + #define mlx5_ib_err(_dev, format, arg...) \ 56 + dev_err(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \ 57 + __LINE__, current->pid, ##arg) 60 58 61 - #define mlx5_ib_warn(dev, format, arg...) \ 62 - pr_warn("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ 63 - __LINE__, current->pid, ##arg) 59 + #define mlx5_ib_warn(_dev, format, arg...) \ 60 + dev_warn(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \ 61 + __LINE__, current->pid, ##arg) 64 62 65 63 #define field_avail(type, fld, sz) (offsetof(type, fld) + \ 66 64 sizeof(((type *)0)->fld) <= (sz)) ··· 116 114 MLX5_MEMIC_BASE_SIZE = 1 << MLX5_MEMIC_BASE_ALIGN, 117 115 }; 118 116 119 - struct mlx5_ib_vma_private_data { 120 - struct list_head list; 121 - struct vm_area_struct *vma; 122 - /* protect vma_private_list add/del */ 123 - struct mutex *vma_private_list_mutex; 124 - }; 125 - 126 117 struct mlx5_ib_ucontext { 127 118 struct ib_ucontext ibucontext; 128 119 struct list_head db_page_list; ··· 127 132 u8 cqe_version; 128 133 /* Transport Domain number */ 129 134 u32 tdn; 130 - struct list_head vma_private_list; 131 - /* protect vma_private_list add/del */ 132 - struct mutex vma_private_list_mutex; 133 135 134 136 u64 lib_caps; 135 137 DECLARE_BITMAP(dm_pages, MLX5_MAX_MEMIC_PAGES); 136 138 u16 devx_uid; 139 + /* For RoCE LAG TX affinity */ 140 + atomic_t tx_port_affinity; 137 141 }; 138 142 139 143 static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext) ··· 143 149 struct mlx5_ib_pd { 144 150 struct ib_pd ibpd; 145 151 u32 pdn; 152 + u16 uid; 153 + }; 154 + 155 + enum { 156 + MLX5_IB_FLOW_ACTION_MODIFY_HEADER, 157 + MLX5_IB_FLOW_ACTION_PACKET_REFORMAT, 158 + MLX5_IB_FLOW_ACTION_DECAP, 146 159 }; 147 160 148 161 #define MLX5_IB_FLOW_MCAST_PRIO (MLX5_BY_PASS_NUM_PRIOS - 1) ··· 181 180 struct mlx5_ib_match_params matcher_mask; 182 181 int mask_len; 183 182 enum mlx5_ib_flow_type flow_type; 183 + enum mlx5_flow_namespace_type ns_type; 184 184 u16 priority; 185 185 struct mlx5_core_dev *mdev; 186 186 atomic_t usecnt; ··· 190 188 191 189 struct mlx5_ib_flow_db { 192 190 struct mlx5_ib_flow_prio prios[MLX5_IB_NUM_FLOW_FT]; 191 + struct mlx5_ib_flow_prio egress_prios[MLX5_IB_NUM_FLOW_FT]; 193 192 struct mlx5_ib_flow_prio sniffer[MLX5_IB_NUM_SNIFFER_FTS]; 194 193 struct mlx5_ib_flow_prio egress[MLX5_IB_NUM_EGRESS_FTS]; 195 194 struct mlx5_flow_table *lag_demux_ft; ··· 325 322 struct mlx5_ib_rwq_ind_table { 326 323 struct ib_rwq_ind_table ib_rwq_ind_tbl; 327 324 u32 rqtn; 325 + u16 uid; 328 326 }; 329 327 330 328 struct mlx5_ib_ubuffer { ··· 432 428 struct list_head cq_send_list; 433 429 struct mlx5_rate_limit rl; 434 430 u32 underlay_qpn; 435 - bool tunnel_offload_en; 431 + u32 flags_en; 436 432 /* storage for qp sub type when core qp type is IB_QPT_DRIVER */ 437 433 enum ib_qp_type qp_sub_type; 438 434 }; ··· 540 536 struct mlx5_ib_xrcd { 541 537 struct ib_xrcd ibxrcd; 542 538 u32 xrcdn; 539 + u16 uid; 543 540 }; 544 541 545 542 enum mlx5_ib_mtt_access_flags { ··· 705 700 rwlock_t netdev_lock; 706 701 struct net_device *netdev; 707 702 struct notifier_block nb; 708 - atomic_t next_port; 703 + atomic_t tx_port_affinity; 709 704 enum ib_port_state last_port_state; 710 705 struct mlx5_ib_dev *dev; 711 706 u8 native_port_num; ··· 820 815 u64 ib_flags; 821 816 struct mlx5_accel_esp_xfrm *ctx; 822 817 } esp_aes_gcm; 818 + struct { 819 + struct mlx5_ib_dev *dev; 820 + u32 sub_type; 821 + u32 action_id; 822 + } flow_action_raw; 823 823 }; 824 824 }; 825 825 ··· 869 859 return container_of(ibcntrs, struct mlx5_ib_mcounters, ibcntrs); 870 860 } 871 861 862 + int parse_flow_flow_action(struct mlx5_ib_flow_action *maction, 863 + bool is_egress, 864 + struct mlx5_flow_act *action); 865 + struct mlx5_ib_lb_state { 866 + /* protect the user_td */ 867 + struct mutex mutex; 868 + u32 user_td; 869 + int qps; 870 + bool enabled; 871 + }; 872 + 872 873 struct mlx5_ib_dev { 873 874 struct ib_device ib_dev; 874 - const struct uverbs_object_tree_def *driver_trees[6]; 875 + const struct uverbs_object_tree_def *driver_trees[7]; 875 876 struct mlx5_core_dev *mdev; 876 877 struct mlx5_roce roce[MLX5_MAX_PORTS]; 877 878 int num_ports; ··· 921 900 const struct mlx5_ib_profile *profile; 922 901 struct mlx5_eswitch_rep *rep; 923 902 924 - /* protect the user_td */ 925 - struct mutex lb_mutex; 926 - u32 user_td; 903 + struct mlx5_ib_lb_state lb; 927 904 u8 umr_fence; 928 905 struct list_head ib_dev_list; 929 906 u64 sys_image_guid; 930 907 struct mlx5_memic memic; 908 + u16 devx_whitelist_uid; 931 909 }; 932 910 933 911 static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) ··· 1037 1017 int mlx5_ib_destroy_srq(struct ib_srq *srq); 1038 1018 int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, 1039 1019 const struct ib_recv_wr **bad_wr); 1020 + int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp); 1021 + void mlx5_ib_disable_lb(struct mlx5_ib_dev *dev, bool td, bool qp); 1040 1022 struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, 1041 1023 struct ib_qp_init_attr *init_attr, 1042 1024 struct ib_udata *udata); ··· 1128 1106 void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, 1129 1107 int page_shift, __be64 *pas, int access_flags); 1130 1108 void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num); 1131 - int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq); 1109 + int mlx5_ib_get_cqe_size(struct ib_cq *ibcq); 1132 1110 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); 1133 1111 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); 1134 1112 ··· 1163 1141 int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev); 1164 1142 int __init mlx5_ib_odp_init(void); 1165 1143 void mlx5_ib_odp_cleanup(void); 1166 - void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start, 1144 + void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, 1167 1145 unsigned long end); 1168 1146 void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent); 1169 1147 void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, ··· 1202 1180 int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev); 1203 1181 void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev); 1204 1182 int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev); 1205 - int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev); 1206 1183 void __mlx5_ib_remove(struct mlx5_ib_dev *dev, 1207 1184 const struct mlx5_ib_profile *profile, 1208 1185 int stage); ··· 1250 1229 u8 port_num); 1251 1230 1252 1231 #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) 1253 - int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, 1254 - struct mlx5_ib_ucontext *context); 1255 - void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, 1256 - struct mlx5_ib_ucontext *context); 1232 + int mlx5_ib_devx_create(struct mlx5_ib_dev *dev); 1233 + void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid); 1257 1234 const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void); 1258 1235 struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add( 1259 1236 struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, 1260 - void *cmd_in, int inlen, int dest_id, int dest_type); 1237 + struct mlx5_flow_act *flow_act, void *cmd_in, int inlen, 1238 + int dest_id, int dest_type); 1261 1239 bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type); 1262 1240 int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root); 1241 + void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction); 1263 1242 #else 1264 1243 static inline int 1265 - mlx5_ib_devx_create(struct mlx5_ib_dev *dev, 1266 - struct mlx5_ib_ucontext *context) { return -EOPNOTSUPP; }; 1267 - static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, 1268 - struct mlx5_ib_ucontext *context) {} 1244 + mlx5_ib_devx_create(struct mlx5_ib_dev *dev) { return -EOPNOTSUPP; }; 1245 + static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {} 1269 1246 static inline const struct uverbs_object_tree_def * 1270 1247 mlx5_ib_get_devx_tree(void) { return NULL; } 1271 1248 static inline bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, ··· 1276 1257 { 1277 1258 return 0; 1278 1259 } 1260 + static inline void 1261 + mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction) 1262 + { 1263 + return; 1264 + }; 1279 1265 #endif 1280 1266 static inline void init_query_mad(struct ib_smp *mad) 1281 1267 {
+8 -6
drivers/infiniband/hw/mlx5/mr.c
··· 98 98 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 99 99 static void update_odp_mr(struct mlx5_ib_mr *mr) 100 100 { 101 - if (mr->umem->odp_data) { 101 + if (mr->umem->is_odp) { 102 102 /* 103 103 * This barrier prevents the compiler from moving the 104 104 * setting of umem->odp_data->private to point to our ··· 107 107 * handle invalidations. 108 108 */ 109 109 smp_wmb(); 110 - mr->umem->odp_data->private = mr; 110 + to_ib_umem_odp(mr->umem)->private = mr; 111 111 /* 112 112 * Make sure we will see the new 113 113 * umem->odp_data->private value in the invalidation ··· 691 691 init_completion(&ent->compl); 692 692 INIT_WORK(&ent->work, cache_work_func); 693 693 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); 694 - queue_work(cache->wq, &ent->work); 695 694 696 695 if (i > MR_CACHE_LAST_STD_ENTRY) { 697 696 mlx5_odp_init_mr_cache_entry(ent); ··· 710 711 ent->limit = dev->mdev->profile->mr_cache[i].limit; 711 712 else 712 713 ent->limit = 0; 714 + queue_work(cache->wq, &ent->work); 713 715 } 714 716 715 717 err = mlx5_mr_cache_debugfs_init(dev); ··· 1627 1627 struct ib_umem *umem = mr->umem; 1628 1628 1629 1629 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 1630 - if (umem && umem->odp_data) { 1630 + if (umem && umem->is_odp) { 1631 + struct ib_umem_odp *umem_odp = to_ib_umem_odp(umem); 1632 + 1631 1633 /* Prevent new page faults from succeeding */ 1632 1634 mr->live = 0; 1633 1635 /* Wait for all running page-fault handlers to finish. */ 1634 1636 synchronize_srcu(&dev->mr_srcu); 1635 1637 /* Destroy all page mappings */ 1636 - if (umem->odp_data->page_list) 1637 - mlx5_ib_invalidate_range(umem, ib_umem_start(umem), 1638 + if (umem_odp->page_list) 1639 + mlx5_ib_invalidate_range(umem_odp, ib_umem_start(umem), 1638 1640 ib_umem_end(umem)); 1639 1641 else 1640 1642 mlx5_ib_free_implicit_mr(mr);
+67 -56
drivers/infiniband/hw/mlx5/odp.c
··· 61 61 return mr && mr->parent == parent && !odp->dying; 62 62 } 63 63 64 + struct ib_ucontext_per_mm *mr_to_per_mm(struct mlx5_ib_mr *mr) 65 + { 66 + if (WARN_ON(!mr || !mr->umem || !mr->umem->is_odp)) 67 + return NULL; 68 + 69 + return to_ib_umem_odp(mr->umem)->per_mm; 70 + } 71 + 64 72 static struct ib_umem_odp *odp_next(struct ib_umem_odp *odp) 65 73 { 66 74 struct mlx5_ib_mr *mr = odp->private, *parent = mr->parent; 67 - struct ib_ucontext *ctx = odp->umem->context; 75 + struct ib_ucontext_per_mm *per_mm = odp->per_mm; 68 76 struct rb_node *rb; 69 77 70 - down_read(&ctx->umem_rwsem); 78 + down_read(&per_mm->umem_rwsem); 71 79 while (1) { 72 80 rb = rb_next(&odp->interval_tree.rb); 73 81 if (!rb) ··· 87 79 not_found: 88 80 odp = NULL; 89 81 end: 90 - up_read(&ctx->umem_rwsem); 82 + up_read(&per_mm->umem_rwsem); 91 83 return odp; 92 84 } 93 85 94 - static struct ib_umem_odp *odp_lookup(struct ib_ucontext *ctx, 95 - u64 start, u64 length, 86 + static struct ib_umem_odp *odp_lookup(u64 start, u64 length, 96 87 struct mlx5_ib_mr *parent) 97 88 { 89 + struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(parent); 98 90 struct ib_umem_odp *odp; 99 91 struct rb_node *rb; 100 92 101 - down_read(&ctx->umem_rwsem); 102 - odp = rbt_ib_umem_lookup(&ctx->umem_tree, start, length); 93 + down_read(&per_mm->umem_rwsem); 94 + odp = rbt_ib_umem_lookup(&per_mm->umem_tree, start, length); 103 95 if (!odp) 104 96 goto end; 105 97 ··· 110 102 if (!rb) 111 103 goto not_found; 112 104 odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb); 113 - if (ib_umem_start(odp->umem) > start + length) 105 + if (ib_umem_start(&odp->umem) > start + length) 114 106 goto not_found; 115 107 } 116 108 not_found: 117 109 odp = NULL; 118 110 end: 119 - up_read(&ctx->umem_rwsem); 111 + up_read(&per_mm->umem_rwsem); 120 112 return odp; 121 113 } 122 114 ··· 124 116 size_t nentries, struct mlx5_ib_mr *mr, int flags) 125 117 { 126 118 struct ib_pd *pd = mr->ibmr.pd; 127 - struct ib_ucontext *ctx = pd->uobject->context; 128 119 struct mlx5_ib_dev *dev = to_mdev(pd->device); 129 120 struct ib_umem_odp *odp; 130 121 unsigned long va; ··· 138 131 return; 139 132 } 140 133 141 - odp = odp_lookup(ctx, offset * MLX5_IMR_MTT_SIZE, 142 - nentries * MLX5_IMR_MTT_SIZE, mr); 134 + odp = odp_lookup(offset * MLX5_IMR_MTT_SIZE, 135 + nentries * MLX5_IMR_MTT_SIZE, mr); 143 136 144 137 for (i = 0; i < nentries; i++, pklm++) { 145 138 pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE); 146 139 va = (offset + i) * MLX5_IMR_MTT_SIZE; 147 - if (odp && odp->umem->address == va) { 140 + if (odp && odp->umem.address == va) { 148 141 struct mlx5_ib_mr *mtt = odp->private; 149 142 150 143 pklm->key = cpu_to_be32(mtt->ibmr.lkey); ··· 160 153 static void mr_leaf_free_action(struct work_struct *work) 161 154 { 162 155 struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work); 163 - int idx = ib_umem_start(odp->umem) >> MLX5_IMR_MTT_SHIFT; 156 + int idx = ib_umem_start(&odp->umem) >> MLX5_IMR_MTT_SHIFT; 164 157 struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent; 165 158 166 159 mr->parent = NULL; 167 160 synchronize_srcu(&mr->dev->mr_srcu); 168 161 169 - ib_umem_release(odp->umem); 162 + ib_umem_release(&odp->umem); 170 163 if (imr->live) 171 164 mlx5_ib_update_xlt(imr, idx, 1, 0, 172 165 MLX5_IB_UPD_XLT_INDIRECT | ··· 177 170 wake_up(&imr->q_leaf_free); 178 171 } 179 172 180 - void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start, 173 + void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, 181 174 unsigned long end) 182 175 { 183 176 struct mlx5_ib_mr *mr; 184 177 const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT / 185 178 sizeof(struct mlx5_mtt)) - 1; 186 179 u64 idx = 0, blk_start_idx = 0; 180 + struct ib_umem *umem; 187 181 int in_block = 0; 188 182 u64 addr; 189 183 190 - if (!umem || !umem->odp_data) { 184 + if (!umem_odp) { 191 185 pr_err("invalidation called on NULL umem or non-ODP umem\n"); 192 186 return; 193 187 } 188 + umem = &umem_odp->umem; 194 189 195 - mr = umem->odp_data->private; 190 + mr = umem_odp->private; 196 191 197 192 if (!mr || !mr->ibmr.pd) 198 193 return; ··· 217 208 * estimate the cost of another UMR vs. the cost of bigger 218 209 * UMR. 219 210 */ 220 - if (umem->odp_data->dma_list[idx] & 211 + if (umem_odp->dma_list[idx] & 221 212 (ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT)) { 222 213 if (!in_block) { 223 214 blk_start_idx = idx; ··· 246 237 * needed. 247 238 */ 248 239 249 - ib_umem_odp_unmap_dma_pages(umem, start, end); 240 + ib_umem_odp_unmap_dma_pages(umem_odp, start, end); 250 241 251 242 if (unlikely(!umem->npages && mr->parent && 252 - !umem->odp_data->dying)) { 253 - WRITE_ONCE(umem->odp_data->dying, 1); 243 + !umem_odp->dying)) { 244 + WRITE_ONCE(umem_odp->dying, 1); 254 245 atomic_inc(&mr->parent->num_leaf_free); 255 - schedule_work(&umem->odp_data->work); 246 + schedule_work(&umem_odp->work); 256 247 } 257 248 } 258 249 ··· 375 366 static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *mr, 376 367 u64 io_virt, size_t bcnt) 377 368 { 378 - struct ib_ucontext *ctx = mr->ibmr.pd->uobject->context; 379 369 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.pd->device); 380 370 struct ib_umem_odp *odp, *result = NULL; 371 + struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem); 381 372 u64 addr = io_virt & MLX5_IMR_MTT_MASK; 382 373 int nentries = 0, start_idx = 0, ret; 383 374 struct mlx5_ib_mr *mtt; 384 - struct ib_umem *umem; 385 375 386 - mutex_lock(&mr->umem->odp_data->umem_mutex); 387 - odp = odp_lookup(ctx, addr, 1, mr); 376 + mutex_lock(&odp_mr->umem_mutex); 377 + odp = odp_lookup(addr, 1, mr); 388 378 389 379 mlx5_ib_dbg(dev, "io_virt:%llx bcnt:%zx addr:%llx odp:%p\n", 390 380 io_virt, bcnt, addr, odp); ··· 393 385 if (nentries) 394 386 nentries++; 395 387 } else { 396 - umem = ib_alloc_odp_umem(ctx, addr, MLX5_IMR_MTT_SIZE); 397 - if (IS_ERR(umem)) { 398 - mutex_unlock(&mr->umem->odp_data->umem_mutex); 399 - return ERR_CAST(umem); 388 + odp = ib_alloc_odp_umem(odp_mr->per_mm, addr, 389 + MLX5_IMR_MTT_SIZE); 390 + if (IS_ERR(odp)) { 391 + mutex_unlock(&odp_mr->umem_mutex); 392 + return ERR_CAST(odp); 400 393 } 401 394 402 - mtt = implicit_mr_alloc(mr->ibmr.pd, umem, 0, mr->access_flags); 395 + mtt = implicit_mr_alloc(mr->ibmr.pd, &odp->umem, 0, 396 + mr->access_flags); 403 397 if (IS_ERR(mtt)) { 404 - mutex_unlock(&mr->umem->odp_data->umem_mutex); 405 - ib_umem_release(umem); 398 + mutex_unlock(&odp_mr->umem_mutex); 399 + ib_umem_release(&odp->umem); 406 400 return ERR_CAST(mtt); 407 401 } 408 402 409 - odp = umem->odp_data; 410 403 odp->private = mtt; 411 - mtt->umem = umem; 404 + mtt->umem = &odp->umem; 412 405 mtt->mmkey.iova = addr; 413 406 mtt->parent = mr; 414 407 INIT_WORK(&odp->work, mr_leaf_free_action); ··· 426 417 addr += MLX5_IMR_MTT_SIZE; 427 418 if (unlikely(addr < io_virt + bcnt)) { 428 419 odp = odp_next(odp); 429 - if (odp && odp->umem->address != addr) 420 + if (odp && odp->umem.address != addr) 430 421 odp = NULL; 431 422 goto next_mr; 432 423 } ··· 441 432 } 442 433 } 443 434 444 - mutex_unlock(&mr->umem->odp_data->umem_mutex); 435 + mutex_unlock(&odp_mr->umem_mutex); 445 436 return result; 446 437 } 447 438 ··· 469 460 return imr; 470 461 } 471 462 472 - static int mr_leaf_free(struct ib_umem *umem, u64 start, 473 - u64 end, void *cookie) 463 + static int mr_leaf_free(struct ib_umem_odp *umem_odp, u64 start, u64 end, 464 + void *cookie) 474 465 { 475 - struct mlx5_ib_mr *mr = umem->odp_data->private, *imr = cookie; 466 + struct mlx5_ib_mr *mr = umem_odp->private, *imr = cookie; 467 + struct ib_umem *umem = &umem_odp->umem; 476 468 477 469 if (mr->parent != imr) 478 470 return 0; 479 471 480 - ib_umem_odp_unmap_dma_pages(umem, 481 - ib_umem_start(umem), 472 + ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem), 482 473 ib_umem_end(umem)); 483 474 484 - if (umem->odp_data->dying) 475 + if (umem_odp->dying) 485 476 return 0; 486 477 487 - WRITE_ONCE(umem->odp_data->dying, 1); 478 + WRITE_ONCE(umem_odp->dying, 1); 488 479 atomic_inc(&imr->num_leaf_free); 489 - schedule_work(&umem->odp_data->work); 480 + schedule_work(&umem_odp->work); 490 481 491 482 return 0; 492 483 } 493 484 494 485 void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) 495 486 { 496 - struct ib_ucontext *ctx = imr->ibmr.pd->uobject->context; 487 + struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(imr); 497 488 498 - down_read(&ctx->umem_rwsem); 499 - rbt_ib_umem_for_each_in_range(&ctx->umem_tree, 0, ULLONG_MAX, 489 + down_read(&per_mm->umem_rwsem); 490 + rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, 0, ULLONG_MAX, 500 491 mr_leaf_free, true, imr); 501 - up_read(&ctx->umem_rwsem); 492 + up_read(&per_mm->umem_rwsem); 502 493 503 494 wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free)); 504 495 } ··· 506 497 static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, 507 498 u64 io_virt, size_t bcnt, u32 *bytes_mapped) 508 499 { 500 + struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem); 509 501 u64 access_mask = ODP_READ_ALLOWED_BIT; 510 502 int npages = 0, page_shift, np; 511 503 u64 start_idx, page_mask; ··· 515 505 size_t size; 516 506 int ret; 517 507 518 - if (!mr->umem->odp_data->page_list) { 508 + if (!odp_mr->page_list) { 519 509 odp = implicit_mr_get_data(mr, io_virt, bcnt); 520 510 521 511 if (IS_ERR(odp)) ··· 523 513 mr = odp->private; 524 514 525 515 } else { 526 - odp = mr->umem->odp_data; 516 + odp = odp_mr; 527 517 } 528 518 529 519 next_mr: 530 - size = min_t(size_t, bcnt, ib_umem_end(odp->umem) - io_virt); 520 + size = min_t(size_t, bcnt, ib_umem_end(&odp->umem) - io_virt); 531 521 532 522 page_shift = mr->umem->page_shift; 533 523 page_mask = ~(BIT(page_shift) - 1); ··· 543 533 */ 544 534 smp_rmb(); 545 535 546 - ret = ib_umem_odp_map_dma_pages(mr->umem, io_virt, size, 536 + ret = ib_umem_odp_map_dma_pages(to_ib_umem_odp(mr->umem), io_virt, size, 547 537 access_mask, current_seq); 548 538 549 539 if (ret < 0) ··· 552 542 np = ret; 553 543 554 544 mutex_lock(&odp->umem_mutex); 555 - if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) { 545 + if (!ib_umem_mmu_notifier_retry(to_ib_umem_odp(mr->umem), 546 + current_seq)) { 556 547 /* 557 548 * No need to check whether the MTTs really belong to 558 549 * this MR, since ib_umem_odp_map_dma_pages already ··· 586 575 587 576 io_virt += size; 588 577 next = odp_next(odp); 589 - if (unlikely(!next || next->umem->address != io_virt)) { 578 + if (unlikely(!next || next->umem.address != io_virt)) { 590 579 mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n", 591 580 io_virt, next); 592 581 return -EAGAIN;
+384 -109
drivers/infiniband/hw/mlx5/qp.c
··· 37 37 #include <linux/mlx5/fs.h> 38 38 #include "mlx5_ib.h" 39 39 #include "ib_rep.h" 40 + #include "cmd.h" 40 41 41 42 /* not supported currently */ 42 43 static int wq_signature; ··· 851 850 goto err_umem; 852 851 } 853 852 853 + MLX5_SET(create_qp_in, *in, uid, to_mpd(pd)->uid); 854 854 pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas); 855 855 if (ubuffer->umem) 856 856 mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0); ··· 1053 1051 1054 1052 static int is_connected(enum ib_qp_type qp_type) 1055 1053 { 1056 - if (qp_type == IB_QPT_RC || qp_type == IB_QPT_UC) 1054 + if (qp_type == IB_QPT_RC || qp_type == IB_QPT_UC || 1055 + qp_type == MLX5_IB_QPT_DCI) 1057 1056 return 1; 1058 1057 1059 1058 return 0; ··· 1062 1059 1063 1060 static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev, 1064 1061 struct mlx5_ib_qp *qp, 1065 - struct mlx5_ib_sq *sq, u32 tdn) 1062 + struct mlx5_ib_sq *sq, u32 tdn, 1063 + struct ib_pd *pd) 1066 1064 { 1067 1065 u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0}; 1068 1066 void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); 1069 1067 1068 + MLX5_SET(create_tis_in, in, uid, to_mpd(pd)->uid); 1070 1069 MLX5_SET(tisc, tisc, transport_domain, tdn); 1071 1070 if (qp->flags & MLX5_IB_QP_UNDERLAY) 1072 1071 MLX5_SET(tisc, tisc, underlay_qpn, qp->underlay_qpn); ··· 1077 1072 } 1078 1073 1079 1074 static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev, 1080 - struct mlx5_ib_sq *sq) 1075 + struct mlx5_ib_sq *sq, struct ib_pd *pd) 1081 1076 { 1082 - mlx5_core_destroy_tis(dev->mdev, sq->tisn); 1077 + mlx5_cmd_destroy_tis(dev->mdev, sq->tisn, to_mpd(pd)->uid); 1083 1078 } 1084 1079 1085 1080 static void destroy_flow_rule_vport_sq(struct mlx5_ib_dev *dev, ··· 1119 1114 goto err_umem; 1120 1115 } 1121 1116 1117 + MLX5_SET(create_sq_in, in, uid, to_mpd(pd)->uid); 1122 1118 sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); 1123 1119 MLX5_SET(sqc, sqc, flush_in_error_en, 1); 1124 1120 if (MLX5_CAP_ETH(dev->mdev, multi_pkt_send_wqe)) ··· 1194 1188 1195 1189 static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, 1196 1190 struct mlx5_ib_rq *rq, void *qpin, 1197 - size_t qpinlen) 1191 + size_t qpinlen, struct ib_pd *pd) 1198 1192 { 1199 1193 struct mlx5_ib_qp *mqp = rq->base.container_mibqp; 1200 1194 __be64 *pas; ··· 1215 1209 if (!in) 1216 1210 return -ENOMEM; 1217 1211 1212 + MLX5_SET(create_rq_in, in, uid, to_mpd(pd)->uid); 1218 1213 rqc = MLX5_ADDR_OF(create_rq_in, in, ctx); 1219 1214 if (!(rq->flags & MLX5_IB_RQ_CVLAN_STRIPPING)) 1220 1215 MLX5_SET(rqc, rqc, vsd, 1); ··· 1263 1256 MLX5_CAP_ETH(dev, tunnel_stateless_geneve_rx)); 1264 1257 } 1265 1258 1259 + static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev, 1260 + struct mlx5_ib_rq *rq, 1261 + u32 qp_flags_en, 1262 + struct ib_pd *pd) 1263 + { 1264 + if (qp_flags_en & (MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC | 1265 + MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC)) 1266 + mlx5_ib_disable_lb(dev, false, true); 1267 + mlx5_cmd_destroy_tir(dev->mdev, rq->tirn, to_mpd(pd)->uid); 1268 + } 1269 + 1266 1270 static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev, 1267 1271 struct mlx5_ib_rq *rq, u32 tdn, 1268 - bool tunnel_offload_en) 1272 + u32 *qp_flags_en, 1273 + struct ib_pd *pd) 1269 1274 { 1275 + u8 lb_flag = 0; 1270 1276 u32 *in; 1271 1277 void *tirc; 1272 1278 int inlen; ··· 1290 1270 if (!in) 1291 1271 return -ENOMEM; 1292 1272 1273 + MLX5_SET(create_tir_in, in, uid, to_mpd(pd)->uid); 1293 1274 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); 1294 1275 MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT); 1295 1276 MLX5_SET(tirc, tirc, inline_rqn, rq->base.mqp.qpn); 1296 1277 MLX5_SET(tirc, tirc, transport_domain, tdn); 1297 - if (tunnel_offload_en) 1278 + if (*qp_flags_en & MLX5_QP_FLAG_TUNNEL_OFFLOADS) 1298 1279 MLX5_SET(tirc, tirc, tunneled_offload_en, 1); 1299 1280 1300 - if (dev->rep) 1301 - MLX5_SET(tirc, tirc, self_lb_block, 1302 - MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST); 1281 + if (*qp_flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC) 1282 + lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST; 1283 + 1284 + if (*qp_flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC) 1285 + lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST; 1286 + 1287 + if (dev->rep) { 1288 + lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST; 1289 + *qp_flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC; 1290 + } 1291 + 1292 + MLX5_SET(tirc, tirc, self_lb_block, lb_flag); 1303 1293 1304 1294 err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn); 1305 1295 1296 + if (!err && MLX5_GET(tirc, tirc, self_lb_block)) { 1297 + err = mlx5_ib_enable_lb(dev, false, true); 1298 + 1299 + if (err) 1300 + destroy_raw_packet_qp_tir(dev, rq, 0, pd); 1301 + } 1306 1302 kvfree(in); 1307 1303 1308 1304 return err; 1309 1305 } 1310 1306 1311 - static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev, 1312 - struct mlx5_ib_rq *rq) 1313 - { 1314 - mlx5_core_destroy_tir(dev->mdev, rq->tirn); 1315 - } 1316 - 1317 1307 static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, 1318 1308 u32 *in, size_t inlen, 1319 - struct ib_pd *pd) 1309 + struct ib_pd *pd, 1310 + struct ib_udata *udata, 1311 + struct mlx5_ib_create_qp_resp *resp) 1320 1312 { 1321 1313 struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp; 1322 1314 struct mlx5_ib_sq *sq = &raw_packet_qp->sq; ··· 1338 1306 struct mlx5_ib_ucontext *mucontext = to_mucontext(ucontext); 1339 1307 int err; 1340 1308 u32 tdn = mucontext->tdn; 1309 + u16 uid = to_mpd(pd)->uid; 1341 1310 1342 1311 if (qp->sq.wqe_cnt) { 1343 - err = create_raw_packet_qp_tis(dev, qp, sq, tdn); 1312 + err = create_raw_packet_qp_tis(dev, qp, sq, tdn, pd); 1344 1313 if (err) 1345 1314 return err; 1346 1315 1347 1316 err = create_raw_packet_qp_sq(dev, sq, in, pd); 1348 1317 if (err) 1349 1318 goto err_destroy_tis; 1319 + 1320 + if (uid) { 1321 + resp->tisn = sq->tisn; 1322 + resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TISN; 1323 + resp->sqn = sq->base.mqp.qpn; 1324 + resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_SQN; 1325 + } 1350 1326 1351 1327 sq->base.container_mibqp = qp; 1352 1328 sq->base.mqp.event = mlx5_ib_qp_event; ··· 1367 1327 rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING; 1368 1328 if (qp->flags & MLX5_IB_QP_PCI_WRITE_END_PADDING) 1369 1329 rq->flags |= MLX5_IB_RQ_PCI_WRITE_END_PADDING; 1370 - err = create_raw_packet_qp_rq(dev, rq, in, inlen); 1330 + err = create_raw_packet_qp_rq(dev, rq, in, inlen, pd); 1371 1331 if (err) 1372 1332 goto err_destroy_sq; 1373 1333 1374 - 1375 - err = create_raw_packet_qp_tir(dev, rq, tdn, 1376 - qp->tunnel_offload_en); 1334 + err = create_raw_packet_qp_tir(dev, rq, tdn, &qp->flags_en, pd); 1377 1335 if (err) 1378 1336 goto err_destroy_rq; 1337 + 1338 + if (uid) { 1339 + resp->rqn = rq->base.mqp.qpn; 1340 + resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_RQN; 1341 + resp->tirn = rq->tirn; 1342 + resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN; 1343 + } 1379 1344 } 1380 1345 1381 1346 qp->trans_qp.base.mqp.qpn = qp->sq.wqe_cnt ? sq->base.mqp.qpn : 1382 1347 rq->base.mqp.qpn; 1348 + err = ib_copy_to_udata(udata, resp, min(udata->outlen, sizeof(*resp))); 1349 + if (err) 1350 + goto err_destroy_tir; 1383 1351 1384 1352 return 0; 1385 1353 1354 + err_destroy_tir: 1355 + destroy_raw_packet_qp_tir(dev, rq, qp->flags_en, pd); 1386 1356 err_destroy_rq: 1387 1357 destroy_raw_packet_qp_rq(dev, rq); 1388 1358 err_destroy_sq: ··· 1400 1350 return err; 1401 1351 destroy_raw_packet_qp_sq(dev, sq); 1402 1352 err_destroy_tis: 1403 - destroy_raw_packet_qp_tis(dev, sq); 1353 + destroy_raw_packet_qp_tis(dev, sq, pd); 1404 1354 1405 1355 return err; 1406 1356 } ··· 1413 1363 struct mlx5_ib_rq *rq = &raw_packet_qp->rq; 1414 1364 1415 1365 if (qp->rq.wqe_cnt) { 1416 - destroy_raw_packet_qp_tir(dev, rq); 1366 + destroy_raw_packet_qp_tir(dev, rq, qp->flags_en, qp->ibqp.pd); 1417 1367 destroy_raw_packet_qp_rq(dev, rq); 1418 1368 } 1419 1369 1420 1370 if (qp->sq.wqe_cnt) { 1421 1371 destroy_raw_packet_qp_sq(dev, sq); 1422 - destroy_raw_packet_qp_tis(dev, sq); 1372 + destroy_raw_packet_qp_tis(dev, sq, qp->ibqp.pd); 1423 1373 } 1424 1374 } 1425 1375 ··· 1437 1387 1438 1388 static void destroy_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) 1439 1389 { 1440 - mlx5_core_destroy_tir(dev->mdev, qp->rss_qp.tirn); 1390 + if (qp->flags_en & (MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC | 1391 + MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC)) 1392 + mlx5_ib_disable_lb(dev, false, true); 1393 + mlx5_cmd_destroy_tir(dev->mdev, qp->rss_qp.tirn, 1394 + to_mpd(qp->ibqp.pd)->uid); 1441 1395 } 1442 1396 1443 1397 static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, ··· 1464 1410 u32 tdn = mucontext->tdn; 1465 1411 struct mlx5_ib_create_qp_rss ucmd = {}; 1466 1412 size_t required_cmd_sz; 1413 + u8 lb_flag = 0; 1467 1414 1468 1415 if (init_attr->qp_type != IB_QPT_RAW_PACKET) 1469 1416 return -EOPNOTSUPP; ··· 1499 1444 return -EOPNOTSUPP; 1500 1445 } 1501 1446 1502 - if (ucmd.flags & ~MLX5_QP_FLAG_TUNNEL_OFFLOADS) { 1447 + if (ucmd.flags & ~(MLX5_QP_FLAG_TUNNEL_OFFLOADS | 1448 + MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC | 1449 + MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC)) { 1503 1450 mlx5_ib_dbg(dev, "invalid flags\n"); 1504 1451 return -EOPNOTSUPP; 1505 1452 } ··· 1518 1461 return -EOPNOTSUPP; 1519 1462 } 1520 1463 1464 + if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC || dev->rep) { 1465 + lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST; 1466 + qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC; 1467 + } 1468 + 1469 + if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC) { 1470 + lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST; 1471 + qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC; 1472 + } 1473 + 1521 1474 err = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp))); 1522 1475 if (err) { 1523 1476 mlx5_ib_dbg(dev, "copy failed\n"); ··· 1539 1472 if (!in) 1540 1473 return -ENOMEM; 1541 1474 1475 + MLX5_SET(create_tir_in, in, uid, to_mpd(pd)->uid); 1542 1476 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); 1543 1477 MLX5_SET(tirc, tirc, disp_type, 1544 1478 MLX5_TIRC_DISP_TYPE_INDIRECT); ··· 1551 1483 1552 1484 if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS) 1553 1485 MLX5_SET(tirc, tirc, tunneled_offload_en, 1); 1486 + 1487 + MLX5_SET(tirc, tirc, self_lb_block, lb_flag); 1554 1488 1555 1489 if (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_INNER) 1556 1490 hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner); ··· 1650 1580 MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields); 1651 1581 1652 1582 create_tir: 1653 - if (dev->rep) 1654 - MLX5_SET(tirc, tirc, self_lb_block, 1655 - MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST); 1656 - 1657 1583 err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn); 1584 + 1585 + if (!err && MLX5_GET(tirc, tirc, self_lb_block)) { 1586 + err = mlx5_ib_enable_lb(dev, false, true); 1587 + 1588 + if (err) 1589 + mlx5_cmd_destroy_tir(dev->mdev, qp->rss_qp.tirn, 1590 + to_mpd(pd)->uid); 1591 + } 1658 1592 1659 1593 if (err) 1660 1594 goto err; 1595 + 1596 + if (mucontext->devx_uid) { 1597 + resp.comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN; 1598 + resp.tirn = qp->rss_qp.tirn; 1599 + } 1600 + 1601 + err = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp))); 1602 + if (err) 1603 + goto err_copy; 1661 1604 1662 1605 kvfree(in); 1663 1606 /* qpn is reserved for that QP */ ··· 1678 1595 qp->flags |= MLX5_IB_QP_RSS; 1679 1596 return 0; 1680 1597 1598 + err_copy: 1599 + mlx5_cmd_destroy_tir(dev->mdev, qp->rss_qp.tirn, mucontext->devx_uid); 1681 1600 err: 1682 1601 kvfree(in); 1683 1602 return err; 1603 + } 1604 + 1605 + static void configure_responder_scat_cqe(struct ib_qp_init_attr *init_attr, 1606 + void *qpc) 1607 + { 1608 + int rcqe_sz; 1609 + 1610 + if (init_attr->qp_type == MLX5_IB_QPT_DCI) 1611 + return; 1612 + 1613 + rcqe_sz = mlx5_ib_get_cqe_size(init_attr->recv_cq); 1614 + 1615 + if (rcqe_sz == 128) { 1616 + MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE); 1617 + return; 1618 + } 1619 + 1620 + if (init_attr->qp_type != MLX5_IB_QPT_DCT) 1621 + MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA32_CQE); 1622 + } 1623 + 1624 + static void configure_requester_scat_cqe(struct mlx5_ib_dev *dev, 1625 + struct ib_qp_init_attr *init_attr, 1626 + struct mlx5_ib_create_qp *ucmd, 1627 + void *qpc) 1628 + { 1629 + enum ib_qp_type qpt = init_attr->qp_type; 1630 + int scqe_sz; 1631 + bool allow_scat_cqe = 0; 1632 + 1633 + if (qpt == IB_QPT_UC || qpt == IB_QPT_UD) 1634 + return; 1635 + 1636 + if (ucmd) 1637 + allow_scat_cqe = ucmd->flags & MLX5_QP_FLAG_ALLOW_SCATTER_CQE; 1638 + 1639 + if (!allow_scat_cqe && init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) 1640 + return; 1641 + 1642 + scqe_sz = mlx5_ib_get_cqe_size(init_attr->send_cq); 1643 + if (scqe_sz == 128) { 1644 + MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA64_CQE); 1645 + return; 1646 + } 1647 + 1648 + if (init_attr->qp_type != MLX5_IB_QPT_DCI || 1649 + MLX5_CAP_GEN(dev->mdev, dc_req_scat_data_cqe)) 1650 + MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA32_CQE); 1651 + } 1652 + 1653 + static int atomic_size_to_mode(int size_mask) 1654 + { 1655 + /* driver does not support atomic_size > 256B 1656 + * and does not know how to translate bigger sizes 1657 + */ 1658 + int supported_size_mask = size_mask & 0x1ff; 1659 + int log_max_size; 1660 + 1661 + if (!supported_size_mask) 1662 + return -EOPNOTSUPP; 1663 + 1664 + log_max_size = __fls(supported_size_mask); 1665 + 1666 + if (log_max_size > 3) 1667 + return log_max_size; 1668 + 1669 + return MLX5_ATOMIC_MODE_8B; 1670 + } 1671 + 1672 + static int get_atomic_mode(struct mlx5_ib_dev *dev, 1673 + enum ib_qp_type qp_type) 1674 + { 1675 + u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations); 1676 + u8 atomic = MLX5_CAP_GEN(dev->mdev, atomic); 1677 + int atomic_mode = -EOPNOTSUPP; 1678 + int atomic_size_mask; 1679 + 1680 + if (!atomic) 1681 + return -EOPNOTSUPP; 1682 + 1683 + if (qp_type == MLX5_IB_QPT_DCT) 1684 + atomic_size_mask = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc); 1685 + else 1686 + atomic_size_mask = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp); 1687 + 1688 + if ((atomic_operations & MLX5_ATOMIC_OPS_EXTENDED_CMP_SWAP) || 1689 + (atomic_operations & MLX5_ATOMIC_OPS_EXTENDED_FETCH_ADD)) 1690 + atomic_mode = atomic_size_to_mode(atomic_size_mask); 1691 + 1692 + if (atomic_mode <= 0 && 1693 + (atomic_operations & MLX5_ATOMIC_OPS_CMP_SWAP && 1694 + atomic_operations & MLX5_ATOMIC_OPS_FETCH_ADD)) 1695 + atomic_mode = MLX5_ATOMIC_MODE_IB_COMP; 1696 + 1697 + return atomic_mode; 1698 + } 1699 + 1700 + static inline bool check_flags_mask(uint64_t input, uint64_t supported) 1701 + { 1702 + return (input & ~supported) == 0; 1684 1703 } 1685 1704 1686 1705 static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, ··· 1882 1697 return -EFAULT; 1883 1698 } 1884 1699 1700 + if (!check_flags_mask(ucmd.flags, 1701 + MLX5_QP_FLAG_SIGNATURE | 1702 + MLX5_QP_FLAG_SCATTER_CQE | 1703 + MLX5_QP_FLAG_TUNNEL_OFFLOADS | 1704 + MLX5_QP_FLAG_BFREG_INDEX | 1705 + MLX5_QP_FLAG_TYPE_DCT | 1706 + MLX5_QP_FLAG_TYPE_DCI | 1707 + MLX5_QP_FLAG_ALLOW_SCATTER_CQE)) 1708 + return -EINVAL; 1709 + 1885 1710 err = get_qp_user_index(to_mucontext(pd->uobject->context), 1886 1711 &ucmd, udata->inlen, &uidx); 1887 1712 if (err) 1888 1713 return err; 1889 1714 1890 1715 qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE); 1891 - qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE); 1716 + if (MLX5_CAP_GEN(dev->mdev, sctr_data_cqe)) 1717 + qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE); 1892 1718 if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS) { 1893 1719 if (init_attr->qp_type != IB_QPT_RAW_PACKET || 1894 1720 !tunnel_offload_supported(mdev)) { 1895 1721 mlx5_ib_dbg(dev, "Tunnel offload isn't supported\n"); 1896 1722 return -EOPNOTSUPP; 1897 1723 } 1898 - qp->tunnel_offload_en = true; 1724 + qp->flags_en |= MLX5_QP_FLAG_TUNNEL_OFFLOADS; 1725 + } 1726 + 1727 + if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC) { 1728 + if (init_attr->qp_type != IB_QPT_RAW_PACKET) { 1729 + mlx5_ib_dbg(dev, "Self-LB UC isn't supported\n"); 1730 + return -EOPNOTSUPP; 1731 + } 1732 + qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC; 1733 + } 1734 + 1735 + if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC) { 1736 + if (init_attr->qp_type != IB_QPT_RAW_PACKET) { 1737 + mlx5_ib_dbg(dev, "Self-LB UM isn't supported\n"); 1738 + return -EOPNOTSUPP; 1739 + } 1740 + qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC; 1899 1741 } 1900 1742 1901 1743 if (init_attr->create_flags & IB_QP_CREATE_SOURCE_QPN) { ··· 2023 1811 MLX5_SET(qpc, qpc, cd_slave_receive, 1); 2024 1812 2025 1813 if (qp->scat_cqe && is_connected(init_attr->qp_type)) { 2026 - int rcqe_sz; 2027 - int scqe_sz; 2028 - 2029 - rcqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->recv_cq); 2030 - scqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->send_cq); 2031 - 2032 - if (rcqe_sz == 128) 2033 - MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE); 2034 - else 2035 - MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA32_CQE); 2036 - 2037 - if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) { 2038 - if (scqe_sz == 128) 2039 - MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA64_CQE); 2040 - else 2041 - MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA32_CQE); 2042 - } 1814 + configure_responder_scat_cqe(init_attr, qpc); 1815 + configure_requester_scat_cqe(dev, init_attr, 1816 + (pd && pd->uobject) ? &ucmd : NULL, 1817 + qpc); 2043 1818 } 2044 1819 2045 1820 if (qp->rq.wqe_cnt) { ··· 2110 1911 qp->flags & MLX5_IB_QP_UNDERLAY) { 2111 1912 qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr; 2112 1913 raw_packet_qp_copy_info(qp, &qp->raw_packet_qp); 2113 - err = create_raw_packet_qp(dev, qp, in, inlen, pd); 1914 + err = create_raw_packet_qp(dev, qp, in, inlen, pd, udata, 1915 + &resp); 2114 1916 } else { 2115 1917 err = mlx5_core_create_qp(dev->mdev, &base->mqp, in, inlen); 2116 1918 } ··· 2392 2192 goto err_free; 2393 2193 } 2394 2194 2195 + MLX5_SET(create_dct_in, qp->dct.in, uid, to_mpd(pd)->uid); 2395 2196 dctc = MLX5_ADDR_OF(create_dct_in, qp->dct.in, dct_context_entry); 2396 2197 qp->qp_sub_type = MLX5_IB_QPT_DCT; 2397 2198 MLX5_SET(dctc, dctc, pd, to_mpd(pd)->pdn); ··· 2400 2199 MLX5_SET(dctc, dctc, cqn, to_mcq(attr->recv_cq)->mcq.cqn); 2401 2200 MLX5_SET64(dctc, dctc, dc_access_key, ucmd->access_key); 2402 2201 MLX5_SET(dctc, dctc, user_index, uidx); 2202 + 2203 + if (ucmd->flags & MLX5_QP_FLAG_SCATTER_CQE) 2204 + configure_responder_scat_cqe(attr, dctc); 2403 2205 2404 2206 qp->state = IB_QPS_RESET; 2405 2207 ··· 2609 2405 return 0; 2610 2406 } 2611 2407 2612 - static __be32 to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_attr *attr, 2613 - int attr_mask) 2408 + static int to_mlx5_access_flags(struct mlx5_ib_qp *qp, 2409 + const struct ib_qp_attr *attr, 2410 + int attr_mask, __be32 *hw_access_flags) 2614 2411 { 2615 - u32 hw_access_flags = 0; 2616 2412 u8 dest_rd_atomic; 2617 2413 u32 access_flags; 2414 + 2415 + struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device); 2618 2416 2619 2417 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) 2620 2418 dest_rd_atomic = attr->max_dest_rd_atomic; ··· 2632 2426 access_flags &= IB_ACCESS_REMOTE_WRITE; 2633 2427 2634 2428 if (access_flags & IB_ACCESS_REMOTE_READ) 2635 - hw_access_flags |= MLX5_QP_BIT_RRE; 2636 - if (access_flags & IB_ACCESS_REMOTE_ATOMIC) 2637 - hw_access_flags |= (MLX5_QP_BIT_RAE | MLX5_ATOMIC_MODE_CX); 2638 - if (access_flags & IB_ACCESS_REMOTE_WRITE) 2639 - hw_access_flags |= MLX5_QP_BIT_RWE; 2429 + *hw_access_flags |= MLX5_QP_BIT_RRE; 2430 + if ((access_flags & IB_ACCESS_REMOTE_ATOMIC) && 2431 + qp->ibqp.qp_type == IB_QPT_RC) { 2432 + int atomic_mode; 2640 2433 2641 - return cpu_to_be32(hw_access_flags); 2434 + atomic_mode = get_atomic_mode(dev, qp->ibqp.qp_type); 2435 + if (atomic_mode < 0) 2436 + return -EOPNOTSUPP; 2437 + 2438 + *hw_access_flags |= MLX5_QP_BIT_RAE; 2439 + *hw_access_flags |= atomic_mode << MLX5_ATOMIC_MODE_OFFSET; 2440 + } 2441 + 2442 + if (access_flags & IB_ACCESS_REMOTE_WRITE) 2443 + *hw_access_flags |= MLX5_QP_BIT_RWE; 2444 + 2445 + *hw_access_flags = cpu_to_be32(*hw_access_flags); 2446 + 2447 + return 0; 2642 2448 } 2643 2449 2644 2450 enum { ··· 2676 2458 } 2677 2459 2678 2460 static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev, 2679 - struct mlx5_ib_sq *sq, u8 sl) 2461 + struct mlx5_ib_sq *sq, u8 sl, 2462 + struct ib_pd *pd) 2680 2463 { 2681 2464 void *in; 2682 2465 void *tisc; ··· 2690 2471 return -ENOMEM; 2691 2472 2692 2473 MLX5_SET(modify_tis_in, in, bitmask.prio, 1); 2474 + MLX5_SET(modify_tis_in, in, uid, to_mpd(pd)->uid); 2693 2475 2694 2476 tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx); 2695 2477 MLX5_SET(tisc, tisc, prio, ((sl & 0x7) << 1)); ··· 2703 2483 } 2704 2484 2705 2485 static int modify_raw_packet_tx_affinity(struct mlx5_core_dev *dev, 2706 - struct mlx5_ib_sq *sq, u8 tx_affinity) 2486 + struct mlx5_ib_sq *sq, u8 tx_affinity, 2487 + struct ib_pd *pd) 2707 2488 { 2708 2489 void *in; 2709 2490 void *tisc; ··· 2717 2496 return -ENOMEM; 2718 2497 2719 2498 MLX5_SET(modify_tis_in, in, bitmask.lag_tx_port_affinity, 1); 2499 + MLX5_SET(modify_tis_in, in, uid, to_mpd(pd)->uid); 2720 2500 2721 2501 tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx); 2722 2502 MLX5_SET(tisc, tisc, lag_tx_port_affinity, tx_affinity); ··· 2802 2580 if ((qp->ibqp.qp_type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt) 2803 2581 return modify_raw_packet_eth_prio(dev->mdev, 2804 2582 &qp->raw_packet_qp.sq, 2805 - sl & 0xf); 2583 + sl & 0xf, qp->ibqp.pd); 2806 2584 2807 2585 return 0; 2808 2586 } ··· 2950 2728 return result; 2951 2729 } 2952 2730 2953 - static int modify_raw_packet_qp_rq(struct mlx5_ib_dev *dev, 2954 - struct mlx5_ib_rq *rq, int new_state, 2955 - const struct mlx5_modify_raw_qp_param *raw_qp_param) 2731 + static int modify_raw_packet_qp_rq( 2732 + struct mlx5_ib_dev *dev, struct mlx5_ib_rq *rq, int new_state, 2733 + const struct mlx5_modify_raw_qp_param *raw_qp_param, struct ib_pd *pd) 2956 2734 { 2957 2735 void *in; 2958 2736 void *rqc; ··· 2965 2743 return -ENOMEM; 2966 2744 2967 2745 MLX5_SET(modify_rq_in, in, rq_state, rq->state); 2746 + MLX5_SET(modify_rq_in, in, uid, to_mpd(pd)->uid); 2968 2747 2969 2748 rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); 2970 2749 MLX5_SET(rqc, rqc, state, new_state); ··· 2976 2753 MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID); 2977 2754 MLX5_SET(rqc, rqc, counter_set_id, raw_qp_param->rq_q_ctr_id); 2978 2755 } else 2979 - pr_info_once("%s: RAW PACKET QP counters are not supported on current FW\n", 2980 - dev->ib_dev.name); 2756 + dev_info_once( 2757 + &dev->ib_dev.dev, 2758 + "RAW PACKET QP counters are not supported on current FW\n"); 2981 2759 } 2982 2760 2983 2761 err = mlx5_core_modify_rq(dev->mdev, rq->base.mqp.qpn, in, inlen); ··· 2992 2768 return err; 2993 2769 } 2994 2770 2995 - static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev, 2996 - struct mlx5_ib_sq *sq, 2997 - int new_state, 2998 - const struct mlx5_modify_raw_qp_param *raw_qp_param) 2771 + static int modify_raw_packet_qp_sq( 2772 + struct mlx5_core_dev *dev, struct mlx5_ib_sq *sq, int new_state, 2773 + const struct mlx5_modify_raw_qp_param *raw_qp_param, struct ib_pd *pd) 2999 2774 { 3000 2775 struct mlx5_ib_qp *ibqp = sq->base.container_mibqp; 3001 2776 struct mlx5_rate_limit old_rl = ibqp->rl; ··· 3011 2788 if (!in) 3012 2789 return -ENOMEM; 3013 2790 2791 + MLX5_SET(modify_sq_in, in, uid, to_mpd(pd)->uid); 3014 2792 MLX5_SET(modify_sq_in, in, sq_state, sq->state); 3015 2793 3016 2794 sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); ··· 3114 2890 } 3115 2891 3116 2892 if (modify_rq) { 3117 - err = modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param); 2893 + err = modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param, 2894 + qp->ibqp.pd); 3118 2895 if (err) 3119 2896 return err; 3120 2897 } ··· 3123 2898 if (modify_sq) { 3124 2899 if (tx_affinity) { 3125 2900 err = modify_raw_packet_tx_affinity(dev->mdev, sq, 3126 - tx_affinity); 2901 + tx_affinity, 2902 + qp->ibqp.pd); 3127 2903 if (err) 3128 2904 return err; 3129 2905 } 3130 2906 3131 - return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state, raw_qp_param); 2907 + return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state, 2908 + raw_qp_param, qp->ibqp.pd); 3132 2909 } 3133 2910 3134 2911 return 0; 2912 + } 2913 + 2914 + static unsigned int get_tx_affinity(struct mlx5_ib_dev *dev, 2915 + struct mlx5_ib_pd *pd, 2916 + struct mlx5_ib_qp_base *qp_base, 2917 + u8 port_num) 2918 + { 2919 + struct mlx5_ib_ucontext *ucontext = NULL; 2920 + unsigned int tx_port_affinity; 2921 + 2922 + if (pd && pd->ibpd.uobject && pd->ibpd.uobject->context) 2923 + ucontext = to_mucontext(pd->ibpd.uobject->context); 2924 + 2925 + if (ucontext) { 2926 + tx_port_affinity = (unsigned int)atomic_add_return( 2927 + 1, &ucontext->tx_port_affinity) % 2928 + MLX5_MAX_PORTS + 2929 + 1; 2930 + mlx5_ib_dbg(dev, "Set tx affinity 0x%x to qpn 0x%x ucontext %p\n", 2931 + tx_port_affinity, qp_base->mqp.qpn, ucontext); 2932 + } else { 2933 + tx_port_affinity = 2934 + (unsigned int)atomic_add_return( 2935 + 1, &dev->roce[port_num].tx_port_affinity) % 2936 + MLX5_MAX_PORTS + 2937 + 1; 2938 + mlx5_ib_dbg(dev, "Set tx affinity 0x%x to qpn 0x%x\n", 2939 + tx_port_affinity, qp_base->mqp.qpn); 2940 + } 2941 + 2942 + return tx_port_affinity; 3135 2943 } 3136 2944 3137 2945 static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, ··· 3232 2974 if (!context) 3233 2975 return -ENOMEM; 3234 2976 2977 + pd = get_pd(qp); 3235 2978 context->flags = cpu_to_be32(mlx5_st << 16); 3236 2979 3237 2980 if (!(attr_mask & IB_QP_PATH_MIG_STATE)) { ··· 3261 3002 (ibqp->qp_type == IB_QPT_XRC_TGT)) { 3262 3003 if (mlx5_lag_is_active(dev->mdev)) { 3263 3004 u8 p = mlx5_core_native_port_num(dev->mdev); 3264 - tx_affinity = (unsigned int)atomic_add_return(1, 3265 - &dev->roce[p].next_port) % 3266 - MLX5_MAX_PORTS + 1; 3005 + tx_affinity = get_tx_affinity(dev, pd, base, p); 3267 3006 context->flags |= cpu_to_be32(tx_affinity << 24); 3268 3007 } 3269 3008 } ··· 3319 3062 goto out; 3320 3063 } 3321 3064 3322 - pd = get_pd(qp); 3323 3065 get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq, 3324 3066 &send_cq, &recv_cq); 3325 3067 ··· 3348 3092 cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21); 3349 3093 } 3350 3094 3351 - if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) 3352 - context->params2 |= to_mlx5_access_flags(qp, attr, attr_mask); 3095 + if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) { 3096 + __be32 access_flags = 0; 3097 + 3098 + err = to_mlx5_access_flags(qp, attr, attr_mask, &access_flags); 3099 + if (err) 3100 + goto out; 3101 + 3102 + context->params2 |= access_flags; 3103 + } 3353 3104 3354 3105 if (attr_mask & IB_QP_MIN_RNR_TIMER) 3355 3106 context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24); ··· 3506 3243 int req = IB_QP_STATE; 3507 3244 int opt = 0; 3508 3245 3509 - if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { 3246 + if (new_state == IB_QPS_RESET) { 3247 + return is_valid_mask(attr_mask, req, opt); 3248 + } else if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { 3510 3249 req |= IB_QP_PKEY_INDEX | IB_QP_PORT; 3511 3250 return is_valid_mask(attr_mask, req, opt); 3512 3251 } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { ··· 3572 3307 if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE) 3573 3308 MLX5_SET(dctc, dctc, rwe, 1); 3574 3309 if (attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) { 3575 - if (!mlx5_ib_dc_atomic_is_supported(dev)) 3310 + int atomic_mode; 3311 + 3312 + atomic_mode = get_atomic_mode(dev, MLX5_IB_QPT_DCT); 3313 + if (atomic_mode < 0) 3576 3314 return -EOPNOTSUPP; 3315 + 3316 + MLX5_SET(dctc, dctc, atomic_mode, atomic_mode); 3577 3317 MLX5_SET(dctc, dctc, rae, 1); 3578 - MLX5_SET(dctc, dctc, atomic_mode, MLX5_ATOMIC_MODE_DCT_CX); 3579 3318 } 3580 3319 MLX5_SET(dctc, dctc, pkey_index, attr->pkey_index); 3581 3320 MLX5_SET(dctc, dctc, port, attr->port_num); ··· 3636 3367 size_t required_cmd_sz; 3637 3368 int err = -EINVAL; 3638 3369 int port; 3639 - enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED; 3640 3370 3641 3371 if (ibqp->rwq_ind_tbl) 3642 3372 return -ENOSYS; ··· 3681 3413 3682 3414 if (!(cur_state == new_state && cur_state == IB_QPS_RESET)) { 3683 3415 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; 3684 - ll = dev->ib_dev.get_link_layer(&dev->ib_dev, port); 3685 3416 } 3686 3417 3687 3418 if (qp->flags & MLX5_IB_QP_UNDERLAY) { ··· 3691 3424 } 3692 3425 } else if (qp_type != MLX5_IB_QPT_REG_UMR && 3693 3426 qp_type != MLX5_IB_QPT_DCI && 3694 - !ib_modify_qp_is_ok(cur_state, new_state, qp_type, attr_mask, ll)) { 3427 + !ib_modify_qp_is_ok(cur_state, new_state, qp_type, 3428 + attr_mask)) { 3695 3429 mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n", 3696 3430 cur_state, new_state, ibqp->qp_type, attr_mask); 3697 3431 goto out; ··· 4639 4371 u8 next_fence = 0; 4640 4372 u8 fence; 4641 4373 4374 + if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && 4375 + !drain)) { 4376 + *bad_wr = wr; 4377 + return -EIO; 4378 + } 4379 + 4642 4380 if (unlikely(ibqp->qp_type == IB_QPT_GSI)) 4643 4381 return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr); 4644 4382 ··· 4653 4379 qend = qp->sq.qend; 4654 4380 4655 4381 spin_lock_irqsave(&qp->sq.lock, flags); 4656 - 4657 - if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && !drain) { 4658 - err = -EIO; 4659 - *bad_wr = wr; 4660 - nreq = 0; 4661 - goto out; 4662 - } 4663 4382 4664 4383 for (nreq = 0; wr; nreq++, wr = wr->next) { 4665 4384 if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) { ··· 4967 4700 int ind; 4968 4701 int i; 4969 4702 4703 + if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && 4704 + !drain)) { 4705 + *bad_wr = wr; 4706 + return -EIO; 4707 + } 4708 + 4970 4709 if (unlikely(ibqp->qp_type == IB_QPT_GSI)) 4971 4710 return mlx5_ib_gsi_post_recv(ibqp, wr, bad_wr); 4972 4711 4973 4712 spin_lock_irqsave(&qp->rq.lock, flags); 4974 - 4975 - if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && !drain) { 4976 - err = -EIO; 4977 - *bad_wr = wr; 4978 - nreq = 0; 4979 - goto out; 4980 - } 4981 4713 4982 4714 ind = qp->rq.head & (qp->rq.wqe_cnt - 1); 4983 4715 ··· 5441 5175 struct mlx5_ib_dev *dev = to_mdev(ibdev); 5442 5176 struct mlx5_ib_xrcd *xrcd; 5443 5177 int err; 5178 + u16 uid; 5444 5179 5445 5180 if (!MLX5_CAP_GEN(dev->mdev, xrc)) 5446 5181 return ERR_PTR(-ENOSYS); ··· 5450 5183 if (!xrcd) 5451 5184 return ERR_PTR(-ENOMEM); 5452 5185 5453 - err = mlx5_core_xrcd_alloc(dev->mdev, &xrcd->xrcdn); 5186 + uid = context ? to_mucontext(context)->devx_uid : 0; 5187 + err = mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, uid); 5454 5188 if (err) { 5455 5189 kfree(xrcd); 5456 5190 return ERR_PTR(-ENOMEM); 5457 5191 } 5458 5192 5193 + xrcd->uid = uid; 5459 5194 return &xrcd->ibxrcd; 5460 5195 } 5461 5196 ··· 5465 5196 { 5466 5197 struct mlx5_ib_dev *dev = to_mdev(xrcd->device); 5467 5198 u32 xrcdn = to_mxrcd(xrcd)->xrcdn; 5199 + u16 uid = to_mxrcd(xrcd)->uid; 5468 5200 int err; 5469 5201 5470 - err = mlx5_core_xrcd_dealloc(dev->mdev, xrcdn); 5202 + err = mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, uid); 5471 5203 if (err) 5472 5204 mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn); 5473 5205 ··· 5538 5268 if (!in) 5539 5269 return -ENOMEM; 5540 5270 5271 + MLX5_SET(create_rq_in, in, uid, to_mpd(pd)->uid); 5541 5272 rqc = MLX5_ADDR_OF(create_rq_in, in, ctx); 5542 5273 MLX5_SET(rqc, rqc, mem_rq_type, 5543 5274 MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE); ··· 5714 5443 err = create_user_rq(dev, pd, rwq, &ucmd); 5715 5444 if (err) { 5716 5445 mlx5_ib_dbg(dev, "err %d\n", err); 5717 - if (err) 5718 - return err; 5446 + return err; 5719 5447 } 5720 5448 5721 5449 rwq->user_index = ucmd.user_index; ··· 5843 5573 for (i = 0; i < sz; i++) 5844 5574 MLX5_SET(rqtc, rqtc, rq_num[i], init_attr->ind_tbl[i]->wq_num); 5845 5575 5576 + rwq_ind_tbl->uid = to_mpd(init_attr->ind_tbl[0]->pd)->uid; 5577 + MLX5_SET(create_rqt_in, in, uid, rwq_ind_tbl->uid); 5578 + 5846 5579 err = mlx5_core_create_rqt(dev->mdev, in, inlen, &rwq_ind_tbl->rqtn); 5847 5580 kvfree(in); 5848 5581 ··· 5864 5591 return &rwq_ind_tbl->ib_rwq_ind_tbl; 5865 5592 5866 5593 err_copy: 5867 - mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn); 5594 + mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid); 5868 5595 err: 5869 5596 kfree(rwq_ind_tbl); 5870 5597 return ERR_PTR(err); ··· 5875 5602 struct mlx5_ib_rwq_ind_table *rwq_ind_tbl = to_mrwq_ind_table(ib_rwq_ind_tbl); 5876 5603 struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_tbl->device); 5877 5604 5878 - mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn); 5605 + mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid); 5879 5606 5880 5607 kfree(rwq_ind_tbl); 5881 5608 return 0; ··· 5926 5653 if (wq_state == IB_WQS_ERR) 5927 5654 wq_state = MLX5_RQC_STATE_ERR; 5928 5655 MLX5_SET(modify_rq_in, in, rq_state, curr_wq_state); 5656 + MLX5_SET(modify_rq_in, in, uid, to_mpd(wq->pd)->uid); 5929 5657 MLX5_SET(rqc, rqc, state, wq_state); 5930 5658 5931 5659 if (wq_attr_mask & IB_WQ_FLAGS) { ··· 5958 5684 MLX5_SET(rqc, rqc, counter_set_id, 5959 5685 dev->port->cnts.set_id); 5960 5686 } else 5961 - pr_info_once("%s: Receive WQ counters are not supported on current FW\n", 5962 - dev->ib_dev.name); 5687 + dev_info_once( 5688 + &dev->ib_dev.dev, 5689 + "Receive WQ counters are not supported on current FW\n"); 5963 5690 } 5964 5691 5965 5692 err = mlx5_core_modify_rq(dev->mdev, rwq->core_qp.qpn, in, inlen);
+1
drivers/infiniband/hw/mlx5/srq.c
··· 144 144 145 145 in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; 146 146 in->page_offset = offset; 147 + in->uid = to_mpd(pd)->uid; 147 148 if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && 148 149 in->type != IB_SRQT_BASIC) 149 150 in->user_index = uidx;
+3 -2
drivers/infiniband/hw/mthca/mthca_mad.c
··· 58 58 59 59 ret = ib_query_port(&dev->ib_dev, port_num, tprops); 60 60 if (ret) { 61 - printk(KERN_WARNING "ib_query_port failed (%d) for %s port %d\n", 62 - ret, dev->ib_dev.name, port_num); 61 + dev_warn(&dev->ib_dev.dev, 62 + "ib_query_port failed (%d) forport %d\n", ret, 63 + port_num); 63 64 goto out; 64 65 } 65 66
+3 -3
drivers/infiniband/hw/mthca/mthca_main.c
··· 986 986 goto err_free_dev; 987 987 } 988 988 989 - if (mthca_cmd_init(mdev)) { 989 + err = mthca_cmd_init(mdev); 990 + if (err) { 990 991 mthca_err(mdev, "Failed to init command interface, aborting.\n"); 991 992 goto err_free_dev; 992 993 } ··· 1015 1014 1016 1015 err = mthca_setup_hca(mdev); 1017 1016 if (err == -EBUSY && (mdev->mthca_flags & MTHCA_FLAG_MSI_X)) { 1018 - if (mdev->mthca_flags & MTHCA_FLAG_MSI_X) 1019 - pci_free_irq_vectors(pdev); 1017 + pci_free_irq_vectors(pdev); 1020 1018 mdev->mthca_flags &= ~MTHCA_FLAG_MSI_X; 1021 1019 1022 1020 err = mthca_setup_hca(mdev);
+19 -25
drivers/infiniband/hw/mthca/mthca_provider.c
··· 1076 1076 return err; 1077 1077 } 1078 1078 1079 - static ssize_t show_rev(struct device *device, struct device_attribute *attr, 1080 - char *buf) 1079 + static ssize_t hw_rev_show(struct device *device, 1080 + struct device_attribute *attr, char *buf) 1081 1081 { 1082 1082 struct mthca_dev *dev = 1083 1083 container_of(device, struct mthca_dev, ib_dev.dev); 1084 1084 return sprintf(buf, "%x\n", dev->rev_id); 1085 1085 } 1086 + static DEVICE_ATTR_RO(hw_rev); 1086 1087 1087 - static ssize_t show_hca(struct device *device, struct device_attribute *attr, 1088 - char *buf) 1088 + static ssize_t hca_type_show(struct device *device, 1089 + struct device_attribute *attr, char *buf) 1089 1090 { 1090 1091 struct mthca_dev *dev = 1091 1092 container_of(device, struct mthca_dev, ib_dev.dev); ··· 1104 1103 return sprintf(buf, "unknown\n"); 1105 1104 } 1106 1105 } 1106 + static DEVICE_ATTR_RO(hca_type); 1107 1107 1108 - static ssize_t show_board(struct device *device, struct device_attribute *attr, 1109 - char *buf) 1108 + static ssize_t board_id_show(struct device *device, 1109 + struct device_attribute *attr, char *buf) 1110 1110 { 1111 1111 struct mthca_dev *dev = 1112 1112 container_of(device, struct mthca_dev, ib_dev.dev); 1113 1113 return sprintf(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id); 1114 1114 } 1115 + static DEVICE_ATTR_RO(board_id); 1115 1116 1116 - static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 1117 - static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); 1118 - static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); 1117 + static struct attribute *mthca_dev_attributes[] = { 1118 + &dev_attr_hw_rev.attr, 1119 + &dev_attr_hca_type.attr, 1120 + &dev_attr_board_id.attr, 1121 + NULL 1122 + }; 1119 1123 1120 - static struct device_attribute *mthca_dev_attributes[] = { 1121 - &dev_attr_hw_rev, 1122 - &dev_attr_hca_type, 1123 - &dev_attr_board_id 1124 + static const struct attribute_group mthca_attr_group = { 1125 + .attrs = mthca_dev_attributes, 1124 1126 }; 1125 1127 1126 1128 static int mthca_init_node_data(struct mthca_dev *dev) ··· 1196 1192 int mthca_register_device(struct mthca_dev *dev) 1197 1193 { 1198 1194 int ret; 1199 - int i; 1200 1195 1201 1196 ret = mthca_init_node_data(dev); 1202 1197 if (ret) 1203 1198 return ret; 1204 1199 1205 - strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX); 1206 1200 dev->ib_dev.owner = THIS_MODULE; 1207 1201 1208 1202 dev->ib_dev.uverbs_abi_ver = MTHCA_UVERBS_ABI_VERSION; ··· 1298 1296 1299 1297 mutex_init(&dev->cap_mask_mutex); 1300 1298 1299 + rdma_set_device_sysfs_group(&dev->ib_dev, &mthca_attr_group); 1301 1300 dev->ib_dev.driver_id = RDMA_DRIVER_MTHCA; 1302 - ret = ib_register_device(&dev->ib_dev, NULL); 1301 + ret = ib_register_device(&dev->ib_dev, "mthca%d", NULL); 1303 1302 if (ret) 1304 1303 return ret; 1305 - 1306 - for (i = 0; i < ARRAY_SIZE(mthca_dev_attributes); ++i) { 1307 - ret = device_create_file(&dev->ib_dev.dev, 1308 - mthca_dev_attributes[i]); 1309 - if (ret) { 1310 - ib_unregister_device(&dev->ib_dev); 1311 - return ret; 1312 - } 1313 - } 1314 1304 1315 1305 mthca_start_catas_poll(dev); 1316 1306
+2 -2
drivers/infiniband/hw/mthca/mthca_qp.c
··· 872 872 873 873 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; 874 874 875 - if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask, 876 - IB_LINK_LAYER_UNSPECIFIED)) { 875 + if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, 876 + attr_mask)) { 877 877 mthca_dbg(dev, "Bad QP transition (transport %d) " 878 878 "%d->%d with attr 0x%08x\n", 879 879 qp->transport, cur_state, new_state,
-3
drivers/infiniband/hw/nes/nes.c
··· 456 456 void __iomem *mmio_regs = NULL; 457 457 u8 hw_rev; 458 458 459 - assert(pcidev != NULL); 460 - assert(ent != NULL); 461 - 462 459 printk(KERN_INFO PFX "NetEffect RNIC driver v%s loading. (%s)\n", 463 460 DRV_VERSION, pci_name(pcidev)); 464 461
-9
drivers/infiniband/hw/nes/nes.h
··· 149 149 printk(KERN_ERR PFX "%s[%u]: " fmt, __func__, __LINE__, ##args); \ 150 150 } while (0) 151 151 152 - #define assert(expr) \ 153 - do { \ 154 - if (!(expr)) { \ 155 - printk(KERN_ERR PFX "Assertion failed! %s, %s, %s, line %d\n", \ 156 - #expr, __FILE__, __func__, __LINE__); \ 157 - } \ 158 - } while (0) 159 - 160 152 #define NES_EVENT_TIMEOUT 1200000 161 153 #else 162 154 #define nes_debug(level, fmt, args...) no_printk(fmt, ##args) 163 - #define assert(expr) do {} while (0) 164 155 165 156 #define NES_EVENT_TIMEOUT 100000 166 157 #endif
+1 -1
drivers/infiniband/hw/nes/nes_hw.c
··· 1443 1443 mdelay(1); 1444 1444 nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee); 1445 1445 temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); 1446 - } while ((temp_phy_data2 == temp_phy_data)); 1446 + } while (temp_phy_data2 == temp_phy_data); 1447 1447 1448 1448 /* wait for tracking */ 1449 1449 counter = 0;
-2
drivers/infiniband/hw/nes/nes_nic.c
··· 146 146 struct list_head *list_pos, *list_temp; 147 147 unsigned long flags; 148 148 149 - assert(nesdev != NULL); 150 - 151 149 if (nesvnic->netdev_open == 1) 152 150 return 0; 153 151
+22 -41
drivers/infiniband/hw/nes/nes_verbs.c
··· 687 687 } 688 688 689 689 nes_debug(NES_DBG_PD, "Allocating PD (%p) for ib device %s\n", 690 - nespd, nesvnic->nesibdev->ibdev.name); 690 + nespd, dev_name(&nesvnic->nesibdev->ibdev.dev)); 691 691 692 692 nespd->pd_id = (pd_num << (PAGE_SHIFT-12)) + nesadapter->base_pd; 693 693 ··· 2556 2556 /** 2557 2557 * show_rev 2558 2558 */ 2559 - static ssize_t show_rev(struct device *dev, struct device_attribute *attr, 2560 - char *buf) 2559 + static ssize_t hw_rev_show(struct device *dev, 2560 + struct device_attribute *attr, char *buf) 2561 2561 { 2562 2562 struct nes_ib_device *nesibdev = 2563 2563 container_of(dev, struct nes_ib_device, ibdev.dev); ··· 2566 2566 nes_debug(NES_DBG_INIT, "\n"); 2567 2567 return sprintf(buf, "%x\n", nesvnic->nesdev->nesadapter->hw_rev); 2568 2568 } 2569 - 2569 + static DEVICE_ATTR_RO(hw_rev); 2570 2570 2571 2571 /** 2572 2572 * show_hca 2573 2573 */ 2574 - static ssize_t show_hca(struct device *dev, struct device_attribute *attr, 2575 - char *buf) 2574 + static ssize_t hca_type_show(struct device *dev, 2575 + struct device_attribute *attr, char *buf) 2576 2576 { 2577 2577 nes_debug(NES_DBG_INIT, "\n"); 2578 2578 return sprintf(buf, "NES020\n"); 2579 2579 } 2580 - 2580 + static DEVICE_ATTR_RO(hca_type); 2581 2581 2582 2582 /** 2583 2583 * show_board 2584 2584 */ 2585 - static ssize_t show_board(struct device *dev, struct device_attribute *attr, 2586 - char *buf) 2585 + static ssize_t board_id_show(struct device *dev, 2586 + struct device_attribute *attr, char *buf) 2587 2587 { 2588 2588 nes_debug(NES_DBG_INIT, "\n"); 2589 2589 return sprintf(buf, "%.*s\n", 32, "NES020 Board ID"); 2590 2590 } 2591 + static DEVICE_ATTR_RO(board_id); 2591 2592 2592 - 2593 - static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 2594 - static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); 2595 - static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); 2596 - 2597 - static struct device_attribute *nes_dev_attributes[] = { 2598 - &dev_attr_hw_rev, 2599 - &dev_attr_hca_type, 2600 - &dev_attr_board_id 2593 + static struct attribute *nes_dev_attributes[] = { 2594 + &dev_attr_hw_rev.attr, 2595 + &dev_attr_hca_type.attr, 2596 + &dev_attr_board_id.attr, 2597 + NULL 2601 2598 }; 2602 2599 2600 + static const struct attribute_group nes_attr_group = { 2601 + .attrs = nes_dev_attributes, 2602 + }; 2603 2603 2604 2604 /** 2605 2605 * nes_query_qp ··· 3640 3640 if (nesibdev == NULL) { 3641 3641 return NULL; 3642 3642 } 3643 - strlcpy(nesibdev->ibdev.name, "nes%d", IB_DEVICE_NAME_MAX); 3644 3643 nesibdev->ibdev.owner = THIS_MODULE; 3645 3644 3646 3645 nesibdev->ibdev.node_type = RDMA_NODE_RNIC; ··· 3794 3795 struct nes_vnic *nesvnic = nesibdev->nesvnic; 3795 3796 struct nes_device *nesdev = nesvnic->nesdev; 3796 3797 struct nes_adapter *nesadapter = nesdev->nesadapter; 3797 - int i, ret; 3798 + int ret; 3798 3799 3800 + rdma_set_device_sysfs_group(&nesvnic->nesibdev->ibdev, &nes_attr_group); 3799 3801 nesvnic->nesibdev->ibdev.driver_id = RDMA_DRIVER_NES; 3800 - ret = ib_register_device(&nesvnic->nesibdev->ibdev, NULL); 3802 + ret = ib_register_device(&nesvnic->nesibdev->ibdev, "nes%d", NULL); 3801 3803 if (ret) { 3802 3804 return ret; 3803 3805 } ··· 3808 3808 nesibdev->max_mr = nesadapter->max_mr / nesadapter->port_count; 3809 3809 nesibdev->max_qp = (nesadapter->max_qp-NES_FIRST_QPN) / nesadapter->port_count; 3810 3810 nesibdev->max_pd = nesadapter->max_pd / nesadapter->port_count; 3811 - 3812 - for (i = 0; i < ARRAY_SIZE(nes_dev_attributes); ++i) { 3813 - ret = device_create_file(&nesibdev->ibdev.dev, nes_dev_attributes[i]); 3814 - if (ret) { 3815 - while (i > 0) { 3816 - i--; 3817 - device_remove_file(&nesibdev->ibdev.dev, 3818 - nes_dev_attributes[i]); 3819 - } 3820 - ib_unregister_device(&nesibdev->ibdev); 3821 - return ret; 3822 - } 3823 - } 3824 3811 3825 3812 nesvnic->of_device_registered = 1; 3826 3813 ··· 3821 3834 static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev) 3822 3835 { 3823 3836 struct nes_vnic *nesvnic = nesibdev->nesvnic; 3824 - int i; 3825 3837 3826 - for (i = 0; i < ARRAY_SIZE(nes_dev_attributes); ++i) { 3827 - device_remove_file(&nesibdev->ibdev.dev, nes_dev_attributes[i]); 3828 - } 3829 - 3830 - if (nesvnic->of_device_registered) { 3838 + if (nesvnic->of_device_registered) 3831 3839 ib_unregister_device(&nesibdev->ibdev); 3832 - } 3833 3840 3834 3841 nesvnic->of_device_registered = 0; 3835 3842 }
+1 -1
drivers/infiniband/hw/ocrdma/ocrdma_hw.c
··· 792 792 qp->srq->ibsrq. 793 793 srq_context); 794 794 } else if (dev_event) { 795 - pr_err("%s: Fatal event received\n", dev->ibdev.name); 795 + dev_err(&dev->ibdev.dev, "Fatal event received\n"); 796 796 ib_dispatch_event(&ib_evt); 797 797 } 798 798
+32 -42
drivers/infiniband/hw/ocrdma/ocrdma_main.c
··· 114 114 snprintf(str, IB_FW_VERSION_NAME_MAX, "%s", &dev->attr.fw_ver[0]); 115 115 } 116 116 117 + /* OCRDMA sysfs interface */ 118 + static ssize_t hw_rev_show(struct device *device, 119 + struct device_attribute *attr, char *buf) 120 + { 121 + struct ocrdma_dev *dev = dev_get_drvdata(device); 122 + 123 + return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor); 124 + } 125 + static DEVICE_ATTR_RO(hw_rev); 126 + 127 + static ssize_t hca_type_show(struct device *device, 128 + struct device_attribute *attr, char *buf) 129 + { 130 + struct ocrdma_dev *dev = dev_get_drvdata(device); 131 + 132 + return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->model_number[0]); 133 + } 134 + static DEVICE_ATTR_RO(hca_type); 135 + 136 + static struct attribute *ocrdma_attributes[] = { 137 + &dev_attr_hw_rev.attr, 138 + &dev_attr_hca_type.attr, 139 + NULL 140 + }; 141 + 142 + static const struct attribute_group ocrdma_attr_group = { 143 + .attrs = ocrdma_attributes, 144 + }; 145 + 117 146 static int ocrdma_register_device(struct ocrdma_dev *dev) 118 147 { 119 - strlcpy(dev->ibdev.name, "ocrdma%d", IB_DEVICE_NAME_MAX); 120 148 ocrdma_get_guid(dev, (u8 *)&dev->ibdev.node_guid); 121 149 BUILD_BUG_ON(sizeof(OCRDMA_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); 122 150 memcpy(dev->ibdev.node_desc, OCRDMA_NODE_DESC, ··· 241 213 dev->ibdev.destroy_srq = ocrdma_destroy_srq; 242 214 dev->ibdev.post_srq_recv = ocrdma_post_srq_recv; 243 215 } 216 + rdma_set_device_sysfs_group(&dev->ibdev, &ocrdma_attr_group); 244 217 dev->ibdev.driver_id = RDMA_DRIVER_OCRDMA; 245 - return ib_register_device(&dev->ibdev, NULL); 218 + return ib_register_device(&dev->ibdev, "ocrdma%d", NULL); 246 219 } 247 220 248 221 static int ocrdma_alloc_resources(struct ocrdma_dev *dev) ··· 289 260 kfree(dev->cq_tbl); 290 261 } 291 262 292 - /* OCRDMA sysfs interface */ 293 - static ssize_t show_rev(struct device *device, struct device_attribute *attr, 294 - char *buf) 295 - { 296 - struct ocrdma_dev *dev = dev_get_drvdata(device); 297 - 298 - return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor); 299 - } 300 - 301 - static ssize_t show_hca_type(struct device *device, 302 - struct device_attribute *attr, char *buf) 303 - { 304 - struct ocrdma_dev *dev = dev_get_drvdata(device); 305 - 306 - return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->model_number[0]); 307 - } 308 - 309 - static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 310 - static DEVICE_ATTR(hca_type, S_IRUGO, show_hca_type, NULL); 311 - 312 - static struct device_attribute *ocrdma_attributes[] = { 313 - &dev_attr_hw_rev, 314 - &dev_attr_hca_type 315 - }; 316 - 317 - static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev) 318 - { 319 - int i; 320 - 321 - for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++) 322 - device_remove_file(&dev->ibdev.dev, ocrdma_attributes[i]); 323 - } 324 - 325 263 static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info) 326 264 { 327 - int status = 0, i; 265 + int status = 0; 328 266 u8 lstate = 0; 329 267 struct ocrdma_dev *dev; 330 268 ··· 327 331 if (!status) 328 332 ocrdma_update_link_state(dev, lstate); 329 333 330 - for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++) 331 - if (device_create_file(&dev->ibdev.dev, ocrdma_attributes[i])) 332 - goto sysfs_err; 333 334 /* Init stats */ 334 335 ocrdma_add_port_stats(dev); 335 336 /* Interrupt Moderation */ ··· 341 348 dev_name(&dev->nic_info.pdev->dev), dev->id); 342 349 return dev; 343 350 344 - sysfs_err: 345 - ocrdma_remove_sysfiles(dev); 346 351 alloc_err: 347 352 ocrdma_free_resources(dev); 348 353 ocrdma_cleanup_hw(dev); ··· 367 376 * of the registered clients. 368 377 */ 369 378 cancel_delayed_work_sync(&dev->eqd_work); 370 - ocrdma_remove_sysfiles(dev); 371 379 ib_unregister_device(&dev->ibdev); 372 380 373 381 ocrdma_rem_port_stats(dev);
+2 -1
drivers/infiniband/hw/ocrdma/ocrdma_stats.c
··· 764 764 return; 765 765 766 766 /* Create post stats base dir */ 767 - dev->dir = debugfs_create_dir(dev->ibdev.name, ocrdma_dbgfs_dir); 767 + dev->dir = 768 + debugfs_create_dir(dev_name(&dev->ibdev.dev), ocrdma_dbgfs_dir); 768 769 if (!dev->dir) 769 770 goto err; 770 771
+1 -2
drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
··· 1480 1480 new_qps = old_qps; 1481 1481 spin_unlock_irqrestore(&qp->q_lock, flags); 1482 1482 1483 - if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask, 1484 - IB_LINK_LAYER_ETHERNET)) { 1483 + if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask)) { 1485 1484 pr_err("%s(%d) invalid attribute mask=0x%x specified for\n" 1486 1485 "qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n", 1487 1486 __func__, dev->id, attr_mask, qp->id, ibqp->qp_type,
+30 -43
drivers/infiniband/hw/qedr/main.c
··· 133 133 return 0; 134 134 } 135 135 136 + /* QEDR sysfs interface */ 137 + static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, 138 + char *buf) 139 + { 140 + struct qedr_dev *dev = dev_get_drvdata(device); 141 + 142 + return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->pdev->vendor); 143 + } 144 + static DEVICE_ATTR_RO(hw_rev); 145 + 146 + static ssize_t hca_type_show(struct device *device, 147 + struct device_attribute *attr, char *buf) 148 + { 149 + return scnprintf(buf, PAGE_SIZE, "%s\n", "HCA_TYPE_TO_SET"); 150 + } 151 + static DEVICE_ATTR_RO(hca_type); 152 + 153 + static struct attribute *qedr_attributes[] = { 154 + &dev_attr_hw_rev.attr, 155 + &dev_attr_hca_type.attr, 156 + NULL 157 + }; 158 + 159 + static const struct attribute_group qedr_attr_group = { 160 + .attrs = qedr_attributes, 161 + }; 162 + 136 163 static int qedr_iw_register_device(struct qedr_dev *dev) 137 164 { 138 165 dev->ibdev.node_type = RDMA_NODE_RNIC; ··· 196 169 static int qedr_register_device(struct qedr_dev *dev) 197 170 { 198 171 int rc; 199 - 200 - strlcpy(dev->ibdev.name, "qedr%d", IB_DEVICE_NAME_MAX); 201 172 202 173 dev->ibdev.node_guid = dev->attr.node_guid; 203 174 memcpy(dev->ibdev.node_desc, QEDR_NODE_DESC, sizeof(QEDR_NODE_DESC)); ··· 287 262 288 263 dev->ibdev.get_link_layer = qedr_link_layer; 289 264 dev->ibdev.get_dev_fw_str = qedr_get_dev_fw_str; 290 - 265 + rdma_set_device_sysfs_group(&dev->ibdev, &qedr_attr_group); 291 266 dev->ibdev.driver_id = RDMA_DRIVER_QEDR; 292 - return ib_register_device(&dev->ibdev, NULL); 267 + return ib_register_device(&dev->ibdev, "qedr%d", NULL); 293 268 } 294 269 295 270 /* This function allocates fast-path status block memory */ ··· 427 402 err1: 428 403 kfree(dev->sgid_tbl); 429 404 return rc; 430 - } 431 - 432 - /* QEDR sysfs interface */ 433 - static ssize_t show_rev(struct device *device, struct device_attribute *attr, 434 - char *buf) 435 - { 436 - struct qedr_dev *dev = dev_get_drvdata(device); 437 - 438 - return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->pdev->vendor); 439 - } 440 - 441 - static ssize_t show_hca_type(struct device *device, 442 - struct device_attribute *attr, char *buf) 443 - { 444 - return scnprintf(buf, PAGE_SIZE, "%s\n", "HCA_TYPE_TO_SET"); 445 - } 446 - 447 - static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 448 - static DEVICE_ATTR(hca_type, S_IRUGO, show_hca_type, NULL); 449 - 450 - static struct device_attribute *qedr_attributes[] = { 451 - &dev_attr_hw_rev, 452 - &dev_attr_hca_type 453 - }; 454 - 455 - static void qedr_remove_sysfiles(struct qedr_dev *dev) 456 - { 457 - int i; 458 - 459 - for (i = 0; i < ARRAY_SIZE(qedr_attributes); i++) 460 - device_remove_file(&dev->ibdev.dev, qedr_attributes[i]); 461 405 } 462 406 463 407 static void qedr_pci_set_atomic(struct qedr_dev *dev, struct pci_dev *pdev) ··· 849 855 { 850 856 struct qed_dev_rdma_info dev_info; 851 857 struct qedr_dev *dev; 852 - int rc = 0, i; 858 + int rc = 0; 853 859 854 860 dev = (struct qedr_dev *)ib_alloc_device(sizeof(*dev)); 855 861 if (!dev) { ··· 908 914 goto reg_err; 909 915 } 910 916 911 - for (i = 0; i < ARRAY_SIZE(qedr_attributes); i++) 912 - if (device_create_file(&dev->ibdev.dev, qedr_attributes[i])) 913 - goto sysfs_err; 914 - 915 917 if (!test_and_set_bit(QEDR_ENET_STATE_BIT, &dev->enet_state)) 916 918 qedr_ib_dispatch_event(dev, QEDR_PORT, IB_EVENT_PORT_ACTIVE); 917 919 918 920 DP_DEBUG(dev, QEDR_MSG_INIT, "qedr driver loaded successfully\n"); 919 921 return dev; 920 922 921 - sysfs_err: 922 - ib_unregister_device(&dev->ibdev); 923 923 reg_err: 924 924 qedr_sync_free_irqs(dev); 925 925 irq_err: ··· 932 944 /* First unregister with stack to stop all the active traffic 933 945 * of the registered clients. 934 946 */ 935 - qedr_remove_sysfiles(dev); 936 947 ib_unregister_device(&dev->ibdev); 937 948 938 949 qedr_stop_hw(dev);
+1 -1
drivers/infiniband/hw/qedr/qedr.h
··· 43 43 #include "qedr_hsi_rdma.h" 44 44 45 45 #define QEDR_NODE_DESC "QLogic 579xx RoCE HCA" 46 - #define DP_NAME(dev) ((dev)->ibdev.name) 46 + #define DP_NAME(_dev) dev_name(&(_dev)->ibdev.dev) 47 47 #define IS_IWARP(_dev) ((_dev)->rdma_type == QED_RDMA_TYPE_IWARP) 48 48 #define IS_ROCE(_dev) ((_dev)->rdma_type == QED_RDMA_TYPE_ROCE) 49 49
+2 -2
drivers/infiniband/hw/qedr/qedr_roce_cm.c
··· 519 519 } 520 520 521 521 if (ether_addr_equal(udh.eth.smac_h, udh.eth.dmac_h)) 522 - packet->tx_dest = QED_ROCE_LL2_TX_DEST_LB; 522 + packet->tx_dest = QED_LL2_TX_DEST_LB; 523 523 else 524 - packet->tx_dest = QED_ROCE_LL2_TX_DEST_NW; 524 + packet->tx_dest = QED_LL2_TX_DEST_NW; 525 525 526 526 packet->roce_mode = roce_mode; 527 527 memcpy(packet->header.vaddr, ud_header_buffer, header_size);
+1 -4
drivers/infiniband/hw/qedr/verbs.c
··· 1447 1447 u64 pbl_base_addr, phy_prod_pair_addr; 1448 1448 struct ib_ucontext *ib_ctx = NULL; 1449 1449 struct qedr_srq_hwq_info *hw_srq; 1450 - struct qedr_ucontext *ctx = NULL; 1451 1450 u32 page_cnt, page_size; 1452 1451 struct qedr_srq *srq; 1453 1452 int rc = 0; ··· 1472 1473 1473 1474 if (udata && ibpd->uobject && ibpd->uobject->context) { 1474 1475 ib_ctx = ibpd->uobject->context; 1475 - ctx = get_qedr_ucontext(ib_ctx); 1476 1476 1477 1477 if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) { 1478 1478 DP_ERR(dev, ··· 2238 2240 2239 2241 if (rdma_protocol_roce(&dev->ibdev, 1)) { 2240 2242 if (!ib_modify_qp_is_ok(old_qp_state, new_qp_state, 2241 - ibqp->qp_type, attr_mask, 2242 - IB_LINK_LAYER_ETHERNET)) { 2243 + ibqp->qp_type, attr_mask)) { 2243 2244 DP_ERR(dev, 2244 2245 "modify qp: invalid attribute mask=0x%x specified for\n" 2245 2246 "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
+1 -1
drivers/infiniband/hw/qib/qib.h
··· 1390 1390 */ 1391 1391 1392 1392 extern const char ib_qib_version[]; 1393 + extern const struct attribute_group qib_attr_group; 1393 1394 1394 1395 int qib_device_create(struct qib_devdata *); 1395 1396 void qib_device_remove(struct qib_devdata *); 1396 1397 1397 1398 int qib_create_port_files(struct ib_device *ibdev, u8 port_num, 1398 1399 struct kobject *kobj); 1399 - int qib_verbs_register_sysfs(struct qib_devdata *); 1400 1400 void qib_verbs_unregister_sysfs(struct qib_devdata *); 1401 1401 /* Hook for sysfs read of QSFP */ 1402 1402 extern int qib_qsfp_dump(struct qib_pportdata *ppd, char *buf, int len);
+7 -10
drivers/infiniband/hw/qib/qib_qp.c
··· 378 378 * qib_check_send_wqe - validate wr/wqe 379 379 * @qp - The qp 380 380 * @wqe - The built wqe 381 + * @call_send - Determine if the send should be posted or scheduled 381 382 * 382 - * validate wr/wqe. This is called 383 - * prior to inserting the wqe into 384 - * the ring but after the wqe has been 385 - * setup. 386 - * 387 - * Returns 1 to force direct progress, 0 otherwise, -EINVAL on failure 383 + * Returns 0 on success, -EINVAL on failure 388 384 */ 389 385 int qib_check_send_wqe(struct rvt_qp *qp, 390 - struct rvt_swqe *wqe) 386 + struct rvt_swqe *wqe, bool *call_send) 391 387 { 392 388 struct rvt_ah *ah; 393 - int ret = 0; 394 389 395 390 switch (qp->ibqp.qp_type) { 396 391 case IB_QPT_RC: 397 392 case IB_QPT_UC: 398 393 if (wqe->length > 0x80000000U) 399 394 return -EINVAL; 395 + if (wqe->length > qp->pmtu) 396 + *call_send = false; 400 397 break; 401 398 case IB_QPT_SMI: 402 399 case IB_QPT_GSI: ··· 402 405 if (wqe->length > (1 << ah->log_pmtu)) 403 406 return -EINVAL; 404 407 /* progress hint */ 405 - ret = 1; 408 + *call_send = true; 406 409 break; 407 410 default: 408 411 break; 409 412 } 410 - return ret; 413 + return 0; 411 414 } 412 415 413 416 #ifdef CONFIG_DEBUG_FS
+10 -8
drivers/infiniband/hw/qib/qib_rc.c
··· 254 254 goto bail; 255 255 } 256 256 wqe = rvt_get_swqe_ptr(qp, qp->s_last); 257 - qib_send_complete(qp, wqe, qp->s_last != qp->s_acked ? 257 + rvt_send_complete(qp, wqe, qp->s_last != qp->s_acked ? 258 258 IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR); 259 259 /* will get called again */ 260 260 goto done; ··· 838 838 qib_migrate_qp(qp); 839 839 qp->s_retry = qp->s_retry_cnt; 840 840 } else if (qp->s_last == qp->s_acked) { 841 - qib_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR); 841 + rvt_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR); 842 842 rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); 843 843 return; 844 844 } else /* XXX need to handle delayed completion */ ··· 1221 1221 ibp->rvp.n_other_naks++; 1222 1222 class_b: 1223 1223 if (qp->s_last == qp->s_acked) { 1224 - qib_send_complete(qp, wqe, status); 1224 + rvt_send_complete(qp, wqe, status); 1225 1225 rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); 1226 1226 } 1227 1227 break; ··· 1425 1425 qp->s_rdma_read_len -= pmtu; 1426 1426 update_last_psn(qp, psn); 1427 1427 spin_unlock_irqrestore(&qp->s_lock, flags); 1428 - qib_copy_sge(&qp->s_rdma_read_sge, data, pmtu, 0); 1428 + rvt_copy_sge(qp, &qp->s_rdma_read_sge, 1429 + data, pmtu, false, false); 1429 1430 goto bail; 1430 1431 1431 1432 case OP(RDMA_READ_RESPONSE_ONLY): ··· 1472 1471 if (unlikely(tlen != qp->s_rdma_read_len)) 1473 1472 goto ack_len_err; 1474 1473 aeth = be32_to_cpu(ohdr->u.aeth); 1475 - qib_copy_sge(&qp->s_rdma_read_sge, data, tlen, 0); 1474 + rvt_copy_sge(qp, &qp->s_rdma_read_sge, 1475 + data, tlen, false, false); 1476 1476 WARN_ON(qp->s_rdma_read_sge.num_sge); 1477 1477 (void) do_rc_ack(qp, aeth, psn, 1478 1478 OP(RDMA_READ_RESPONSE_LAST), 0, rcd); ··· 1492 1490 status = IB_WC_LOC_LEN_ERR; 1493 1491 ack_err: 1494 1492 if (qp->s_last == qp->s_acked) { 1495 - qib_send_complete(qp, wqe, status); 1493 + rvt_send_complete(qp, wqe, status); 1496 1494 rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); 1497 1495 } 1498 1496 ack_done: ··· 1846 1844 qp->r_rcv_len += pmtu; 1847 1845 if (unlikely(qp->r_rcv_len > qp->r_len)) 1848 1846 goto nack_inv; 1849 - qib_copy_sge(&qp->r_sge, data, pmtu, 1); 1847 + rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false); 1850 1848 break; 1851 1849 1852 1850 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): ··· 1892 1890 wc.byte_len = tlen + qp->r_rcv_len; 1893 1891 if (unlikely(wc.byte_len > qp->r_len)) 1894 1892 goto nack_inv; 1895 - qib_copy_sge(&qp->r_sge, data, tlen, 1); 1893 + rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false); 1896 1894 rvt_put_ss(&qp->r_sge); 1897 1895 qp->r_msn++; 1898 1896 if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
+1 -341
drivers/infiniband/hw/qib/qib_ruc.c
··· 171 171 } 172 172 173 173 /** 174 - * qib_ruc_loopback - handle UC and RC lookback requests 175 - * @sqp: the sending QP 176 - * 177 - * This is called from qib_do_send() to 178 - * forward a WQE addressed to the same HCA. 179 - * Note that although we are single threaded due to the tasklet, we still 180 - * have to protect against post_send(). We don't have to worry about 181 - * receive interrupts since this is a connected protocol and all packets 182 - * will pass through here. 183 - */ 184 - static void qib_ruc_loopback(struct rvt_qp *sqp) 185 - { 186 - struct qib_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num); 187 - struct qib_pportdata *ppd = ppd_from_ibp(ibp); 188 - struct qib_devdata *dd = ppd->dd; 189 - struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; 190 - struct rvt_qp *qp; 191 - struct rvt_swqe *wqe; 192 - struct rvt_sge *sge; 193 - unsigned long flags; 194 - struct ib_wc wc; 195 - u64 sdata; 196 - atomic64_t *maddr; 197 - enum ib_wc_status send_status; 198 - int release; 199 - int ret; 200 - 201 - rcu_read_lock(); 202 - /* 203 - * Note that we check the responder QP state after 204 - * checking the requester's state. 205 - */ 206 - qp = rvt_lookup_qpn(rdi, &ibp->rvp, sqp->remote_qpn); 207 - if (!qp) 208 - goto done; 209 - 210 - spin_lock_irqsave(&sqp->s_lock, flags); 211 - 212 - /* Return if we are already busy processing a work request. */ 213 - if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) || 214 - !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND)) 215 - goto unlock; 216 - 217 - sqp->s_flags |= RVT_S_BUSY; 218 - 219 - again: 220 - if (sqp->s_last == READ_ONCE(sqp->s_head)) 221 - goto clr_busy; 222 - wqe = rvt_get_swqe_ptr(sqp, sqp->s_last); 223 - 224 - /* Return if it is not OK to start a new work reqeust. */ 225 - if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) { 226 - if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND)) 227 - goto clr_busy; 228 - /* We are in the error state, flush the work request. */ 229 - send_status = IB_WC_WR_FLUSH_ERR; 230 - goto flush_send; 231 - } 232 - 233 - /* 234 - * We can rely on the entry not changing without the s_lock 235 - * being held until we update s_last. 236 - * We increment s_cur to indicate s_last is in progress. 237 - */ 238 - if (sqp->s_last == sqp->s_cur) { 239 - if (++sqp->s_cur >= sqp->s_size) 240 - sqp->s_cur = 0; 241 - } 242 - spin_unlock_irqrestore(&sqp->s_lock, flags); 243 - 244 - if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) || 245 - qp->ibqp.qp_type != sqp->ibqp.qp_type) { 246 - ibp->rvp.n_pkt_drops++; 247 - /* 248 - * For RC, the requester would timeout and retry so 249 - * shortcut the timeouts and just signal too many retries. 250 - */ 251 - if (sqp->ibqp.qp_type == IB_QPT_RC) 252 - send_status = IB_WC_RETRY_EXC_ERR; 253 - else 254 - send_status = IB_WC_SUCCESS; 255 - goto serr; 256 - } 257 - 258 - memset(&wc, 0, sizeof(wc)); 259 - send_status = IB_WC_SUCCESS; 260 - 261 - release = 1; 262 - sqp->s_sge.sge = wqe->sg_list[0]; 263 - sqp->s_sge.sg_list = wqe->sg_list + 1; 264 - sqp->s_sge.num_sge = wqe->wr.num_sge; 265 - sqp->s_len = wqe->length; 266 - switch (wqe->wr.opcode) { 267 - case IB_WR_SEND_WITH_IMM: 268 - wc.wc_flags = IB_WC_WITH_IMM; 269 - wc.ex.imm_data = wqe->wr.ex.imm_data; 270 - /* FALLTHROUGH */ 271 - case IB_WR_SEND: 272 - ret = rvt_get_rwqe(qp, false); 273 - if (ret < 0) 274 - goto op_err; 275 - if (!ret) 276 - goto rnr_nak; 277 - break; 278 - 279 - case IB_WR_RDMA_WRITE_WITH_IMM: 280 - if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) 281 - goto inv_err; 282 - wc.wc_flags = IB_WC_WITH_IMM; 283 - wc.ex.imm_data = wqe->wr.ex.imm_data; 284 - ret = rvt_get_rwqe(qp, true); 285 - if (ret < 0) 286 - goto op_err; 287 - if (!ret) 288 - goto rnr_nak; 289 - /* FALLTHROUGH */ 290 - case IB_WR_RDMA_WRITE: 291 - if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) 292 - goto inv_err; 293 - if (wqe->length == 0) 294 - break; 295 - if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length, 296 - wqe->rdma_wr.remote_addr, 297 - wqe->rdma_wr.rkey, 298 - IB_ACCESS_REMOTE_WRITE))) 299 - goto acc_err; 300 - qp->r_sge.sg_list = NULL; 301 - qp->r_sge.num_sge = 1; 302 - qp->r_sge.total_len = wqe->length; 303 - break; 304 - 305 - case IB_WR_RDMA_READ: 306 - if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) 307 - goto inv_err; 308 - if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length, 309 - wqe->rdma_wr.remote_addr, 310 - wqe->rdma_wr.rkey, 311 - IB_ACCESS_REMOTE_READ))) 312 - goto acc_err; 313 - release = 0; 314 - sqp->s_sge.sg_list = NULL; 315 - sqp->s_sge.num_sge = 1; 316 - qp->r_sge.sge = wqe->sg_list[0]; 317 - qp->r_sge.sg_list = wqe->sg_list + 1; 318 - qp->r_sge.num_sge = wqe->wr.num_sge; 319 - qp->r_sge.total_len = wqe->length; 320 - break; 321 - 322 - case IB_WR_ATOMIC_CMP_AND_SWP: 323 - case IB_WR_ATOMIC_FETCH_AND_ADD: 324 - if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) 325 - goto inv_err; 326 - if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64), 327 - wqe->atomic_wr.remote_addr, 328 - wqe->atomic_wr.rkey, 329 - IB_ACCESS_REMOTE_ATOMIC))) 330 - goto acc_err; 331 - /* Perform atomic OP and save result. */ 332 - maddr = (atomic64_t *) qp->r_sge.sge.vaddr; 333 - sdata = wqe->atomic_wr.compare_add; 334 - *(u64 *) sqp->s_sge.sge.vaddr = 335 - (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ? 336 - (u64) atomic64_add_return(sdata, maddr) - sdata : 337 - (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, 338 - sdata, wqe->atomic_wr.swap); 339 - rvt_put_mr(qp->r_sge.sge.mr); 340 - qp->r_sge.num_sge = 0; 341 - goto send_comp; 342 - 343 - default: 344 - send_status = IB_WC_LOC_QP_OP_ERR; 345 - goto serr; 346 - } 347 - 348 - sge = &sqp->s_sge.sge; 349 - while (sqp->s_len) { 350 - u32 len = sqp->s_len; 351 - 352 - if (len > sge->length) 353 - len = sge->length; 354 - if (len > sge->sge_length) 355 - len = sge->sge_length; 356 - BUG_ON(len == 0); 357 - qib_copy_sge(&qp->r_sge, sge->vaddr, len, release); 358 - sge->vaddr += len; 359 - sge->length -= len; 360 - sge->sge_length -= len; 361 - if (sge->sge_length == 0) { 362 - if (!release) 363 - rvt_put_mr(sge->mr); 364 - if (--sqp->s_sge.num_sge) 365 - *sge = *sqp->s_sge.sg_list++; 366 - } else if (sge->length == 0 && sge->mr->lkey) { 367 - if (++sge->n >= RVT_SEGSZ) { 368 - if (++sge->m >= sge->mr->mapsz) 369 - break; 370 - sge->n = 0; 371 - } 372 - sge->vaddr = 373 - sge->mr->map[sge->m]->segs[sge->n].vaddr; 374 - sge->length = 375 - sge->mr->map[sge->m]->segs[sge->n].length; 376 - } 377 - sqp->s_len -= len; 378 - } 379 - if (release) 380 - rvt_put_ss(&qp->r_sge); 381 - 382 - if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) 383 - goto send_comp; 384 - 385 - if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM) 386 - wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; 387 - else 388 - wc.opcode = IB_WC_RECV; 389 - wc.wr_id = qp->r_wr_id; 390 - wc.status = IB_WC_SUCCESS; 391 - wc.byte_len = wqe->length; 392 - wc.qp = &qp->ibqp; 393 - wc.src_qp = qp->remote_qpn; 394 - wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr); 395 - wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr); 396 - wc.port_num = 1; 397 - /* Signal completion event if the solicited bit is set. */ 398 - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 399 - wqe->wr.send_flags & IB_SEND_SOLICITED); 400 - 401 - send_comp: 402 - spin_lock_irqsave(&sqp->s_lock, flags); 403 - ibp->rvp.n_loop_pkts++; 404 - flush_send: 405 - sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; 406 - qib_send_complete(sqp, wqe, send_status); 407 - goto again; 408 - 409 - rnr_nak: 410 - /* Handle RNR NAK */ 411 - if (qp->ibqp.qp_type == IB_QPT_UC) 412 - goto send_comp; 413 - ibp->rvp.n_rnr_naks++; 414 - /* 415 - * Note: we don't need the s_lock held since the BUSY flag 416 - * makes this single threaded. 417 - */ 418 - if (sqp->s_rnr_retry == 0) { 419 - send_status = IB_WC_RNR_RETRY_EXC_ERR; 420 - goto serr; 421 - } 422 - if (sqp->s_rnr_retry_cnt < 7) 423 - sqp->s_rnr_retry--; 424 - spin_lock_irqsave(&sqp->s_lock, flags); 425 - if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK)) 426 - goto clr_busy; 427 - rvt_add_rnr_timer(sqp, qp->r_min_rnr_timer << 428 - IB_AETH_CREDIT_SHIFT); 429 - goto clr_busy; 430 - 431 - op_err: 432 - send_status = IB_WC_REM_OP_ERR; 433 - wc.status = IB_WC_LOC_QP_OP_ERR; 434 - goto err; 435 - 436 - inv_err: 437 - send_status = IB_WC_REM_INV_REQ_ERR; 438 - wc.status = IB_WC_LOC_QP_OP_ERR; 439 - goto err; 440 - 441 - acc_err: 442 - send_status = IB_WC_REM_ACCESS_ERR; 443 - wc.status = IB_WC_LOC_PROT_ERR; 444 - err: 445 - /* responder goes to error state */ 446 - rvt_rc_error(qp, wc.status); 447 - 448 - serr: 449 - spin_lock_irqsave(&sqp->s_lock, flags); 450 - qib_send_complete(sqp, wqe, send_status); 451 - if (sqp->ibqp.qp_type == IB_QPT_RC) { 452 - int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR); 453 - 454 - sqp->s_flags &= ~RVT_S_BUSY; 455 - spin_unlock_irqrestore(&sqp->s_lock, flags); 456 - if (lastwqe) { 457 - struct ib_event ev; 458 - 459 - ev.device = sqp->ibqp.device; 460 - ev.element.qp = &sqp->ibqp; 461 - ev.event = IB_EVENT_QP_LAST_WQE_REACHED; 462 - sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context); 463 - } 464 - goto done; 465 - } 466 - clr_busy: 467 - sqp->s_flags &= ~RVT_S_BUSY; 468 - unlock: 469 - spin_unlock_irqrestore(&sqp->s_lock, flags); 470 - done: 471 - rcu_read_unlock(); 472 - } 473 - 474 - /** 475 174 * qib_make_grh - construct a GRH header 476 175 * @ibp: a pointer to the IB port 477 176 * @hdr: a pointer to the GRH header being constructed ··· 272 573 qp->ibqp.qp_type == IB_QPT_UC) && 273 574 (rdma_ah_get_dlid(&qp->remote_ah_attr) & 274 575 ~((1 << ppd->lmc) - 1)) == ppd->lid) { 275 - qib_ruc_loopback(qp); 576 + rvt_ruc_loopback(qp); 276 577 return; 277 578 } 278 579 ··· 311 612 } while (make_req(qp, &flags)); 312 613 313 614 spin_unlock_irqrestore(&qp->s_lock, flags); 314 - } 315 - 316 - /* 317 - * This should be called with s_lock held. 318 - */ 319 - void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, 320 - enum ib_wc_status status) 321 - { 322 - u32 old_last, last; 323 - 324 - if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND)) 325 - return; 326 - 327 - last = qp->s_last; 328 - old_last = last; 329 - if (++last >= qp->s_size) 330 - last = 0; 331 - qp->s_last = last; 332 - /* See post_send() */ 333 - barrier(); 334 - rvt_put_swqe(wqe); 335 - if (qp->ibqp.qp_type == IB_QPT_UD || 336 - qp->ibqp.qp_type == IB_QPT_SMI || 337 - qp->ibqp.qp_type == IB_QPT_GSI) 338 - atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount); 339 - 340 - rvt_qp_swqe_complete(qp, 341 - wqe, 342 - ib_qib_wc_opcode[wqe->wr.opcode], 343 - status); 344 - 345 - if (qp->s_acked == old_last) 346 - qp->s_acked = last; 347 - if (qp->s_cur == old_last) 348 - qp->s_cur = last; 349 - if (qp->s_tail == old_last) 350 - qp->s_tail = last; 351 - if (qp->state == IB_QPS_SQD && last == qp->s_cur) 352 - qp->s_draining = 0; 353 615 }
+1 -1
drivers/infiniband/hw/qib/qib_sdma.c
··· 651 651 if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) 652 652 rvt_error_qp(qp, IB_WC_GENERAL_ERR); 653 653 } else if (qp->s_wqe) 654 - qib_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR); 654 + rvt_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR); 655 655 spin_unlock(&qp->s_lock); 656 656 spin_unlock(&qp->r_lock); 657 657 /* return zero to process the next send work request */
+40 -59
drivers/infiniband/hw/qib/qib_sysfs.c
··· 551 551 * Start of per-unit (or driver, in some cases, but replicated 552 552 * per unit) functions (these get a device *) 553 553 */ 554 - static ssize_t show_rev(struct device *device, struct device_attribute *attr, 555 - char *buf) 554 + static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, 555 + char *buf) 556 556 { 557 557 struct qib_ibdev *dev = 558 558 container_of(device, struct qib_ibdev, rdi.ibdev.dev); 559 559 560 560 return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev); 561 561 } 562 + static DEVICE_ATTR_RO(hw_rev); 562 563 563 - static ssize_t show_hca(struct device *device, struct device_attribute *attr, 564 - char *buf) 564 + static ssize_t hca_type_show(struct device *device, 565 + struct device_attribute *attr, char *buf) 565 566 { 566 567 struct qib_ibdev *dev = 567 568 container_of(device, struct qib_ibdev, rdi.ibdev.dev); ··· 575 574 ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname); 576 575 return ret; 577 576 } 577 + static DEVICE_ATTR_RO(hca_type); 578 + static DEVICE_ATTR(board_id, 0444, hca_type_show, NULL); 578 579 579 - static ssize_t show_version(struct device *device, 580 + static ssize_t version_show(struct device *device, 580 581 struct device_attribute *attr, char *buf) 581 582 { 582 583 /* The string printed here is already newline-terminated. */ 583 584 return scnprintf(buf, PAGE_SIZE, "%s", (char *)ib_qib_version); 584 585 } 586 + static DEVICE_ATTR_RO(version); 585 587 586 - static ssize_t show_boardversion(struct device *device, 588 + static ssize_t boardversion_show(struct device *device, 587 589 struct device_attribute *attr, char *buf) 588 590 { 589 591 struct qib_ibdev *dev = ··· 596 592 /* The string printed here is already newline-terminated. */ 597 593 return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion); 598 594 } 595 + static DEVICE_ATTR_RO(boardversion); 599 596 600 - 601 - static ssize_t show_localbus_info(struct device *device, 597 + static ssize_t localbus_info_show(struct device *device, 602 598 struct device_attribute *attr, char *buf) 603 599 { 604 600 struct qib_ibdev *dev = ··· 608 604 /* The string printed here is already newline-terminated. */ 609 605 return scnprintf(buf, PAGE_SIZE, "%s", dd->lbus_info); 610 606 } 607 + static DEVICE_ATTR_RO(localbus_info); 611 608 612 - 613 - static ssize_t show_nctxts(struct device *device, 609 + static ssize_t nctxts_show(struct device *device, 614 610 struct device_attribute *attr, char *buf) 615 611 { 616 612 struct qib_ibdev *dev = ··· 624 620 (dd->first_user_ctxt > dd->cfgctxts) ? 0 : 625 621 (dd->cfgctxts - dd->first_user_ctxt)); 626 622 } 623 + static DEVICE_ATTR_RO(nctxts); 627 624 628 - static ssize_t show_nfreectxts(struct device *device, 629 - struct device_attribute *attr, char *buf) 625 + static ssize_t nfreectxts_show(struct device *device, 626 + struct device_attribute *attr, char *buf) 630 627 { 631 628 struct qib_ibdev *dev = 632 629 container_of(device, struct qib_ibdev, rdi.ibdev.dev); ··· 636 631 /* Return the number of free user ports (contexts) available. */ 637 632 return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts); 638 633 } 634 + static DEVICE_ATTR_RO(nfreectxts); 639 635 640 - static ssize_t show_serial(struct device *device, 636 + static ssize_t serial_show(struct device *device, 641 637 struct device_attribute *attr, char *buf) 642 638 { 643 639 struct qib_ibdev *dev = ··· 650 644 strcat(buf, "\n"); 651 645 return strlen(buf); 652 646 } 647 + static DEVICE_ATTR_RO(serial); 653 648 654 - static ssize_t store_chip_reset(struct device *device, 649 + static ssize_t chip_reset_store(struct device *device, 655 650 struct device_attribute *attr, const char *buf, 656 651 size_t count) 657 652 { ··· 670 663 bail: 671 664 return ret < 0 ? ret : count; 672 665 } 666 + static DEVICE_ATTR_WO(chip_reset); 673 667 674 668 /* 675 669 * Dump tempsense regs. in decimal, to ease shell-scripts. 676 670 */ 677 - static ssize_t show_tempsense(struct device *device, 671 + static ssize_t tempsense_show(struct device *device, 678 672 struct device_attribute *attr, char *buf) 679 673 { 680 674 struct qib_ibdev *dev = ··· 703 695 *(signed char *)(regvals + 7)); 704 696 return ret; 705 697 } 698 + static DEVICE_ATTR_RO(tempsense); 706 699 707 700 /* 708 701 * end of per-unit (or driver, in some cases, but replicated ··· 711 702 */ 712 703 713 704 /* start of per-unit file structures and support code */ 714 - static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 715 - static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); 716 - static DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL); 717 - static DEVICE_ATTR(version, S_IRUGO, show_version, NULL); 718 - static DEVICE_ATTR(nctxts, S_IRUGO, show_nctxts, NULL); 719 - static DEVICE_ATTR(nfreectxts, S_IRUGO, show_nfreectxts, NULL); 720 - static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL); 721 - static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL); 722 - static DEVICE_ATTR(tempsense, S_IRUGO, show_tempsense, NULL); 723 - static DEVICE_ATTR(localbus_info, S_IRUGO, show_localbus_info, NULL); 724 - static DEVICE_ATTR(chip_reset, S_IWUSR, NULL, store_chip_reset); 705 + static struct attribute *qib_attributes[] = { 706 + &dev_attr_hw_rev.attr, 707 + &dev_attr_hca_type.attr, 708 + &dev_attr_board_id.attr, 709 + &dev_attr_version.attr, 710 + &dev_attr_nctxts.attr, 711 + &dev_attr_nfreectxts.attr, 712 + &dev_attr_serial.attr, 713 + &dev_attr_boardversion.attr, 714 + &dev_attr_tempsense.attr, 715 + &dev_attr_localbus_info.attr, 716 + &dev_attr_chip_reset.attr, 717 + NULL, 718 + }; 725 719 726 - static struct device_attribute *qib_attributes[] = { 727 - &dev_attr_hw_rev, 728 - &dev_attr_hca_type, 729 - &dev_attr_board_id, 730 - &dev_attr_version, 731 - &dev_attr_nctxts, 732 - &dev_attr_nfreectxts, 733 - &dev_attr_serial, 734 - &dev_attr_boardversion, 735 - &dev_attr_tempsense, 736 - &dev_attr_localbus_info, 737 - &dev_attr_chip_reset, 720 + const struct attribute_group qib_attr_group = { 721 + .attrs = qib_attributes, 738 722 }; 739 723 740 724 int qib_create_port_files(struct ib_device *ibdev, u8 port_num, ··· 825 823 bail_link: 826 824 kobject_put(&ppd->pport_kobj); 827 825 bail: 828 - return ret; 829 - } 830 - 831 - /* 832 - * Register and create our files in /sys/class/infiniband. 833 - */ 834 - int qib_verbs_register_sysfs(struct qib_devdata *dd) 835 - { 836 - struct ib_device *dev = &dd->verbs_dev.rdi.ibdev; 837 - int i, ret; 838 - 839 - for (i = 0; i < ARRAY_SIZE(qib_attributes); ++i) { 840 - ret = device_create_file(&dev->dev, qib_attributes[i]); 841 - if (ret) 842 - goto bail; 843 - } 844 - 845 - return 0; 846 - bail: 847 - for (i = 0; i < ARRAY_SIZE(qib_attributes); ++i) 848 - device_remove_file(&dev->dev, qib_attributes[i]); 849 826 return ret; 850 827 } 851 828
+6 -6
drivers/infiniband/hw/qib/qib_uc.c
··· 68 68 goto bail; 69 69 } 70 70 wqe = rvt_get_swqe_ptr(qp, qp->s_last); 71 - qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); 71 + rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); 72 72 goto done; 73 73 } 74 74 ··· 359 359 qp->r_rcv_len += pmtu; 360 360 if (unlikely(qp->r_rcv_len > qp->r_len)) 361 361 goto rewind; 362 - qib_copy_sge(&qp->r_sge, data, pmtu, 0); 362 + rvt_copy_sge(qp, &qp->r_sge, data, pmtu, false, false); 363 363 break; 364 364 365 365 case OP(SEND_LAST_WITH_IMMEDIATE): ··· 385 385 if (unlikely(wc.byte_len > qp->r_len)) 386 386 goto rewind; 387 387 wc.opcode = IB_WC_RECV; 388 - qib_copy_sge(&qp->r_sge, data, tlen, 0); 388 + rvt_copy_sge(qp, &qp->r_sge, data, tlen, false, false); 389 389 rvt_put_ss(&qp->s_rdma_read_sge); 390 390 last_imm: 391 391 wc.wr_id = qp->r_wr_id; ··· 449 449 qp->r_rcv_len += pmtu; 450 450 if (unlikely(qp->r_rcv_len > qp->r_len)) 451 451 goto drop; 452 - qib_copy_sge(&qp->r_sge, data, pmtu, 1); 452 + rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false); 453 453 break; 454 454 455 455 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): ··· 479 479 } 480 480 wc.byte_len = qp->r_len; 481 481 wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; 482 - qib_copy_sge(&qp->r_sge, data, tlen, 1); 482 + rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false); 483 483 rvt_put_ss(&qp->r_sge); 484 484 goto last_imm; 485 485 ··· 495 495 tlen -= (hdrsize + pad + 4); 496 496 if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) 497 497 goto drop; 498 - qib_copy_sge(&qp->r_sge, data, tlen, 1); 498 + rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false); 499 499 rvt_put_ss(&qp->r_sge); 500 500 break; 501 501
+9 -8
drivers/infiniband/hw/qib/qib_ud.c
··· 162 162 const struct ib_global_route *grd = rdma_ah_read_grh(ah_attr); 163 163 164 164 qib_make_grh(ibp, &grh, grd, 0, 0); 165 - qib_copy_sge(&qp->r_sge, &grh, 166 - sizeof(grh), 1); 165 + rvt_copy_sge(qp, &qp->r_sge, &grh, 166 + sizeof(grh), true, false); 167 167 wc.wc_flags |= IB_WC_GRH; 168 168 } else 169 169 rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true); ··· 179 179 if (len > sge->sge_length) 180 180 len = sge->sge_length; 181 181 BUG_ON(len == 0); 182 - qib_copy_sge(&qp->r_sge, sge->vaddr, len, 1); 182 + rvt_copy_sge(qp, &qp->r_sge, sge->vaddr, len, true, false); 183 183 sge->vaddr += len; 184 184 sge->length -= len; 185 185 sge->sge_length -= len; ··· 260 260 goto bail; 261 261 } 262 262 wqe = rvt_get_swqe_ptr(qp, qp->s_last); 263 - qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); 263 + rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); 264 264 goto done; 265 265 } 266 266 ··· 304 304 qib_ud_loopback(qp, wqe); 305 305 spin_lock_irqsave(&qp->s_lock, tflags); 306 306 *flags = tflags; 307 - qib_send_complete(qp, wqe, IB_WC_SUCCESS); 307 + rvt_send_complete(qp, wqe, IB_WC_SUCCESS); 308 308 goto done; 309 309 } 310 310 } ··· 551 551 goto drop; 552 552 } 553 553 if (has_grh) { 554 - qib_copy_sge(&qp->r_sge, &hdr->u.l.grh, 555 - sizeof(struct ib_grh), 1); 554 + rvt_copy_sge(qp, &qp->r_sge, &hdr->u.l.grh, 555 + sizeof(struct ib_grh), true, false); 556 556 wc.wc_flags |= IB_WC_GRH; 557 557 } else 558 558 rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true); 559 - qib_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1); 559 + rvt_copy_sge(qp, &qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 560 + true, false); 560 561 rvt_put_ss(&qp->r_sge); 561 562 if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) 562 563 return;
+13 -34
drivers/infiniband/hw/qib/qib_verbs.c
··· 131 131 */ 132 132 __be64 ib_qib_sys_image_guid; 133 133 134 - /** 135 - * qib_copy_sge - copy data to SGE memory 136 - * @ss: the SGE state 137 - * @data: the data to copy 138 - * @length: the length of the data 139 - */ 140 - void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length, int release) 141 - { 142 - struct rvt_sge *sge = &ss->sge; 143 - 144 - while (length) { 145 - u32 len = rvt_get_sge_length(sge, length); 146 - 147 - WARN_ON_ONCE(len == 0); 148 - memcpy(sge->vaddr, data, len); 149 - rvt_update_sge(ss, len, release); 150 - data += len; 151 - length -= len; 152 - } 153 - } 154 - 155 134 /* 156 135 * Count the number of DMA descriptors needed to send length bytes of data. 157 136 * Don't modify the qib_sge_state to get the count. ··· 731 752 732 753 spin_lock(&qp->s_lock); 733 754 if (tx->wqe) 734 - qib_send_complete(qp, tx->wqe, IB_WC_SUCCESS); 755 + rvt_send_complete(qp, tx->wqe, IB_WC_SUCCESS); 735 756 else if (qp->ibqp.qp_type == IB_QPT_RC) { 736 757 struct ib_header *hdr; 737 758 ··· 1004 1025 } 1005 1026 if (qp->s_wqe) { 1006 1027 spin_lock_irqsave(&qp->s_lock, flags); 1007 - qib_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS); 1028 + rvt_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS); 1008 1029 spin_unlock_irqrestore(&qp->s_lock, flags); 1009 1030 } else if (qp->ibqp.qp_type == IB_QPT_RC) { 1010 1031 spin_lock_irqsave(&qp->s_lock, flags); ··· 1491 1512 rdi->dparms.props.max_mcast_grp; 1492 1513 /* post send table */ 1493 1514 dd->verbs_dev.rdi.post_parms = qib_post_parms; 1515 + 1516 + /* opcode translation table */ 1517 + dd->verbs_dev.rdi.wc_opcode = ib_qib_wc_opcode; 1494 1518 } 1495 1519 1496 1520 /** ··· 1570 1588 dd->verbs_dev.rdi.driver_f.port_callback = qib_create_port_files; 1571 1589 dd->verbs_dev.rdi.driver_f.get_pci_dev = qib_get_pci_dev; 1572 1590 dd->verbs_dev.rdi.driver_f.check_ah = qib_check_ah; 1573 - dd->verbs_dev.rdi.driver_f.check_send_wqe = qib_check_send_wqe; 1591 + dd->verbs_dev.rdi.driver_f.setup_wqe = qib_check_send_wqe; 1574 1592 dd->verbs_dev.rdi.driver_f.notify_new_ah = qib_notify_new_ah; 1575 1593 dd->verbs_dev.rdi.driver_f.alloc_qpn = qib_alloc_qpn; 1576 1594 dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qib_qp_priv_alloc; ··· 1613 1631 dd->verbs_dev.rdi.dparms.node = dd->assigned_node_id; 1614 1632 dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_IBA_IB; 1615 1633 dd->verbs_dev.rdi.dparms.max_mad_size = IB_MGMT_MAD_SIZE; 1634 + dd->verbs_dev.rdi.dparms.sge_copy_mode = RVT_SGE_COPY_MEMCPY; 1616 1635 1617 1636 qib_fill_device_attr(dd); 1618 1637 ··· 1625 1642 i, 1626 1643 dd->rcd[ctxt]->pkeys); 1627 1644 } 1645 + rdma_set_device_sysfs_group(&dd->verbs_dev.rdi.ibdev, &qib_attr_group); 1628 1646 1629 1647 ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_QIB); 1630 1648 if (ret) 1631 1649 goto err_tx; 1632 1650 1633 - ret = qib_verbs_register_sysfs(dd); 1634 - if (ret) 1635 - goto err_class; 1636 - 1637 1651 return ret; 1638 1652 1639 - err_class: 1640 - rvt_unregister_device(&dd->verbs_dev.rdi); 1641 1653 err_tx: 1642 1654 while (!list_empty(&dev->txreq_free)) { 1643 1655 struct list_head *l = dev->txreq_free.next; ··· 1694 1716 * It is only used in post send, which doesn't hold 1695 1717 * the s_lock. 1696 1718 */ 1697 - void _qib_schedule_send(struct rvt_qp *qp) 1719 + bool _qib_schedule_send(struct rvt_qp *qp) 1698 1720 { 1699 1721 struct qib_ibport *ibp = 1700 1722 to_iport(qp->ibqp.device, qp->port_num); 1701 1723 struct qib_pportdata *ppd = ppd_from_ibp(ibp); 1702 1724 struct qib_qp_priv *priv = qp->priv; 1703 1725 1704 - queue_work(ppd->qib_wq, &priv->s_work); 1726 + return queue_work(ppd->qib_wq, &priv->s_work); 1705 1727 } 1706 1728 1707 1729 /** ··· 1711 1733 * This schedules qp progress. The s_lock 1712 1734 * should be held. 1713 1735 */ 1714 - void qib_schedule_send(struct rvt_qp *qp) 1736 + bool qib_schedule_send(struct rvt_qp *qp) 1715 1737 { 1716 1738 if (qib_send_ok(qp)) 1717 - _qib_schedule_send(qp); 1739 + return _qib_schedule_send(qp); 1740 + return false; 1718 1741 }
+5 -10
drivers/infiniband/hw/qib/qib_verbs.h
··· 1 1 /* 2 - * Copyright (c) 2012 - 2017 Intel Corporation. All rights reserved. 2 + * Copyright (c) 2012 - 2018 Intel Corporation. All rights reserved. 3 3 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. 4 4 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 5 5 * ··· 223 223 !(qp->s_flags & RVT_S_ANY_WAIT_SEND)); 224 224 } 225 225 226 - void _qib_schedule_send(struct rvt_qp *qp); 227 - void qib_schedule_send(struct rvt_qp *qp); 226 + bool _qib_schedule_send(struct rvt_qp *qp); 227 + bool qib_schedule_send(struct rvt_qp *qp); 228 228 229 229 static inline int qib_pkey_ok(u16 pkey1, u16 pkey2) 230 230 { ··· 292 292 int qib_verbs_send(struct rvt_qp *qp, struct ib_header *hdr, 293 293 u32 hdrwords, struct rvt_sge_state *ss, u32 len); 294 294 295 - void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length, 296 - int release); 297 - 298 295 void qib_uc_rcv(struct qib_ibport *ibp, struct ib_header *hdr, 299 296 int has_grh, void *data, u32 tlen, struct rvt_qp *qp); 300 297 ··· 300 303 301 304 int qib_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr); 302 305 303 - int qib_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe); 306 + int qib_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, 307 + bool *call_send); 304 308 305 309 struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid); 306 310 ··· 330 332 void _qib_do_send(struct work_struct *work); 331 333 332 334 void qib_do_send(struct rvt_qp *qp); 333 - 334 - void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, 335 - enum ib_wc_status status); 336 335 337 336 void qib_send_rc_ack(struct rvt_qp *qp); 338 337
+1 -2
drivers/infiniband/hw/usnic/usnic_debugfs.c
··· 165 165 166 166 void usnic_debugfs_flow_remove(struct usnic_ib_qp_grp_flow *qp_flow) 167 167 { 168 - if (!IS_ERR_OR_NULL(qp_flow->dbgfs_dentry)) 169 - debugfs_remove(qp_flow->dbgfs_dentry); 168 + debugfs_remove(qp_flow->dbgfs_dentry); 170 169 }
+21 -18
drivers/infiniband/hw/usnic/usnic_ib_main.c
··· 76 76 static int usnic_ib_dump_vf_hdr(void *obj, char *buf, int buf_sz) 77 77 { 78 78 struct usnic_ib_vf *vf = obj; 79 - return scnprintf(buf, buf_sz, "PF: %s ", vf->pf->ib_dev.name); 79 + return scnprintf(buf, buf_sz, "PF: %s ", dev_name(&vf->pf->ib_dev.dev)); 80 80 } 81 81 /* End callback dump funcs */ 82 82 ··· 138 138 netdev = us_ibdev->netdev; 139 139 switch (event) { 140 140 case NETDEV_REBOOT: 141 - usnic_info("PF Reset on %s\n", us_ibdev->ib_dev.name); 141 + usnic_info("PF Reset on %s\n", dev_name(&us_ibdev->ib_dev.dev)); 142 142 usnic_ib_qp_grp_modify_active_to_err(us_ibdev); 143 143 ib_event.event = IB_EVENT_PORT_ERR; 144 144 ib_event.device = &us_ibdev->ib_dev; ··· 151 151 if (!us_ibdev->ufdev->link_up && 152 152 netif_carrier_ok(netdev)) { 153 153 usnic_fwd_carrier_up(us_ibdev->ufdev); 154 - usnic_info("Link UP on %s\n", us_ibdev->ib_dev.name); 154 + usnic_info("Link UP on %s\n", 155 + dev_name(&us_ibdev->ib_dev.dev)); 155 156 ib_event.event = IB_EVENT_PORT_ACTIVE; 156 157 ib_event.device = &us_ibdev->ib_dev; 157 158 ib_event.element.port_num = 1; ··· 160 159 } else if (us_ibdev->ufdev->link_up && 161 160 !netif_carrier_ok(netdev)) { 162 161 usnic_fwd_carrier_down(us_ibdev->ufdev); 163 - usnic_info("Link DOWN on %s\n", us_ibdev->ib_dev.name); 162 + usnic_info("Link DOWN on %s\n", 163 + dev_name(&us_ibdev->ib_dev.dev)); 164 164 usnic_ib_qp_grp_modify_active_to_err(us_ibdev); 165 165 ib_event.event = IB_EVENT_PORT_ERR; 166 166 ib_event.device = &us_ibdev->ib_dev; ··· 170 168 } else { 171 169 usnic_dbg("Ignoring %s on %s\n", 172 170 netdev_cmd_to_name(event), 173 - us_ibdev->ib_dev.name); 171 + dev_name(&us_ibdev->ib_dev.dev)); 174 172 } 175 173 break; 176 174 case NETDEV_CHANGEADDR: 177 175 if (!memcmp(us_ibdev->ufdev->mac, netdev->dev_addr, 178 176 sizeof(us_ibdev->ufdev->mac))) { 179 177 usnic_dbg("Ignoring addr change on %s\n", 180 - us_ibdev->ib_dev.name); 178 + dev_name(&us_ibdev->ib_dev.dev)); 181 179 } else { 182 180 usnic_info(" %s old mac: %pM new mac: %pM\n", 183 - us_ibdev->ib_dev.name, 181 + dev_name(&us_ibdev->ib_dev.dev), 184 182 us_ibdev->ufdev->mac, 185 183 netdev->dev_addr); 186 184 usnic_fwd_set_mac(us_ibdev->ufdev, netdev->dev_addr); ··· 195 193 case NETDEV_CHANGEMTU: 196 194 if (us_ibdev->ufdev->mtu != netdev->mtu) { 197 195 usnic_info("MTU Change on %s old: %u new: %u\n", 198 - us_ibdev->ib_dev.name, 196 + dev_name(&us_ibdev->ib_dev.dev), 199 197 us_ibdev->ufdev->mtu, netdev->mtu); 200 198 usnic_fwd_set_mtu(us_ibdev->ufdev, netdev->mtu); 201 199 usnic_ib_qp_grp_modify_active_to_err(us_ibdev); 202 200 } else { 203 201 usnic_dbg("Ignoring MTU change on %s\n", 204 - us_ibdev->ib_dev.name); 202 + dev_name(&us_ibdev->ib_dev.dev)); 205 203 } 206 204 break; 207 205 default: 208 206 usnic_dbg("Ignoring event %s on %s", 209 207 netdev_cmd_to_name(event), 210 - us_ibdev->ib_dev.name); 208 + dev_name(&us_ibdev->ib_dev.dev)); 211 209 } 212 210 mutex_unlock(&us_ibdev->usdev_lock); 213 211 } ··· 269 267 default: 270 268 usnic_info("Ignoring event %s on %s", 271 269 netdev_cmd_to_name(event), 272 - us_ibdev->ib_dev.name); 270 + dev_name(&us_ibdev->ib_dev.dev)); 273 271 } 274 272 mutex_unlock(&us_ibdev->usdev_lock); 275 273 ··· 366 364 us_ibdev->ib_dev.num_comp_vectors = USNIC_IB_NUM_COMP_VECTORS; 367 365 us_ibdev->ib_dev.dev.parent = &dev->dev; 368 366 us_ibdev->ib_dev.uverbs_abi_ver = USNIC_UVERBS_ABI_VERSION; 369 - strlcpy(us_ibdev->ib_dev.name, "usnic_%d", IB_DEVICE_NAME_MAX); 370 367 371 368 us_ibdev->ib_dev.uverbs_cmd_mask = 372 369 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | ··· 417 416 418 417 419 418 us_ibdev->ib_dev.driver_id = RDMA_DRIVER_USNIC; 420 - if (ib_register_device(&us_ibdev->ib_dev, NULL)) 419 + rdma_set_device_sysfs_group(&us_ibdev->ib_dev, &usnic_attr_group); 420 + 421 + if (ib_register_device(&us_ibdev->ib_dev, "usnic_%d", NULL)) 421 422 goto err_fwd_dealloc; 422 423 423 424 usnic_fwd_set_mtu(us_ibdev->ufdev, us_ibdev->netdev->mtu); ··· 440 437 kref_init(&us_ibdev->vf_cnt); 441 438 442 439 usnic_info("Added ibdev: %s netdev: %s with mac %pM Link: %u MTU: %u\n", 443 - us_ibdev->ib_dev.name, netdev_name(us_ibdev->netdev), 444 - us_ibdev->ufdev->mac, us_ibdev->ufdev->link_up, 445 - us_ibdev->ufdev->mtu); 440 + dev_name(&us_ibdev->ib_dev.dev), 441 + netdev_name(us_ibdev->netdev), us_ibdev->ufdev->mac, 442 + us_ibdev->ufdev->link_up, us_ibdev->ufdev->mtu); 446 443 return us_ibdev; 447 444 448 445 err_fwd_dealloc: ··· 455 452 456 453 static void usnic_ib_device_remove(struct usnic_ib_dev *us_ibdev) 457 454 { 458 - usnic_info("Unregistering %s\n", us_ibdev->ib_dev.name); 455 + usnic_info("Unregistering %s\n", dev_name(&us_ibdev->ib_dev.dev)); 459 456 usnic_ib_sysfs_unregister_usdev(us_ibdev); 460 457 usnic_fwd_dev_free(us_ibdev->ufdev); 461 458 ib_unregister_device(&us_ibdev->ib_dev); ··· 594 591 mutex_unlock(&pf->usdev_lock); 595 592 596 593 usnic_info("Registering usnic VF %s into PF %s\n", pci_name(pdev), 597 - pf->ib_dev.name); 594 + dev_name(&pf->ib_dev.dev)); 598 595 usnic_ib_log_vf(vf); 599 596 return 0; 600 597
+26 -46
drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
··· 46 46 #include "usnic_ib_sysfs.h" 47 47 #include "usnic_log.h" 48 48 49 - static ssize_t usnic_ib_show_board(struct device *device, 50 - struct device_attribute *attr, 51 - char *buf) 49 + static ssize_t board_id_show(struct device *device, 50 + struct device_attribute *attr, char *buf) 52 51 { 53 52 struct usnic_ib_dev *us_ibdev = 54 53 container_of(device, struct usnic_ib_dev, ib_dev.dev); ··· 59 60 60 61 return scnprintf(buf, PAGE_SIZE, "%hu\n", subsystem_device_id); 61 62 } 63 + static DEVICE_ATTR_RO(board_id); 62 64 63 65 /* 64 66 * Report the configuration for this PF 65 67 */ 66 68 static ssize_t 67 - usnic_ib_show_config(struct device *device, struct device_attribute *attr, 68 - char *buf) 69 + config_show(struct device *device, struct device_attribute *attr, char *buf) 69 70 { 70 71 struct usnic_ib_dev *us_ibdev; 71 72 char *ptr; ··· 93 94 94 95 n = scnprintf(ptr, left, 95 96 "%s: %s:%d.%d, %s, %pM, %u VFs\n Per VF:", 96 - us_ibdev->ib_dev.name, 97 + dev_name(&us_ibdev->ib_dev.dev), 97 98 busname, 98 99 PCI_SLOT(us_ibdev->pdev->devfn), 99 100 PCI_FUNC(us_ibdev->pdev->devfn), ··· 118 119 UPDATE_PTR_LEFT(n, ptr, left); 119 120 } else { 120 121 n = scnprintf(ptr, left, "%s: no VFs\n", 121 - us_ibdev->ib_dev.name); 122 + dev_name(&us_ibdev->ib_dev.dev)); 122 123 UPDATE_PTR_LEFT(n, ptr, left); 123 124 } 124 125 mutex_unlock(&us_ibdev->usdev_lock); 125 126 126 127 return ptr - buf; 127 128 } 129 + static DEVICE_ATTR_RO(config); 128 130 129 131 static ssize_t 130 - usnic_ib_show_iface(struct device *device, struct device_attribute *attr, 131 - char *buf) 132 + iface_show(struct device *device, struct device_attribute *attr, char *buf) 132 133 { 133 134 struct usnic_ib_dev *us_ibdev; 134 135 ··· 137 138 return scnprintf(buf, PAGE_SIZE, "%s\n", 138 139 netdev_name(us_ibdev->netdev)); 139 140 } 141 + static DEVICE_ATTR_RO(iface); 140 142 141 143 static ssize_t 142 - usnic_ib_show_max_vf(struct device *device, struct device_attribute *attr, 143 - char *buf) 144 + max_vf_show(struct device *device, struct device_attribute *attr, char *buf) 144 145 { 145 146 struct usnic_ib_dev *us_ibdev; 146 147 ··· 149 150 return scnprintf(buf, PAGE_SIZE, "%u\n", 150 151 kref_read(&us_ibdev->vf_cnt)); 151 152 } 153 + static DEVICE_ATTR_RO(max_vf); 152 154 153 155 static ssize_t 154 - usnic_ib_show_qp_per_vf(struct device *device, struct device_attribute *attr, 155 - char *buf) 156 + qp_per_vf_show(struct device *device, struct device_attribute *attr, char *buf) 156 157 { 157 158 struct usnic_ib_dev *us_ibdev; 158 159 int qp_per_vf; ··· 164 165 return scnprintf(buf, PAGE_SIZE, 165 166 "%d\n", qp_per_vf); 166 167 } 168 + static DEVICE_ATTR_RO(qp_per_vf); 167 169 168 170 static ssize_t 169 - usnic_ib_show_cq_per_vf(struct device *device, struct device_attribute *attr, 170 - char *buf) 171 + cq_per_vf_show(struct device *device, struct device_attribute *attr, char *buf) 171 172 { 172 173 struct usnic_ib_dev *us_ibdev; 173 174 ··· 176 177 return scnprintf(buf, PAGE_SIZE, "%d\n", 177 178 us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]); 178 179 } 180 + static DEVICE_ATTR_RO(cq_per_vf); 179 181 180 - static DEVICE_ATTR(board_id, S_IRUGO, usnic_ib_show_board, NULL); 181 - static DEVICE_ATTR(config, S_IRUGO, usnic_ib_show_config, NULL); 182 - static DEVICE_ATTR(iface, S_IRUGO, usnic_ib_show_iface, NULL); 183 - static DEVICE_ATTR(max_vf, S_IRUGO, usnic_ib_show_max_vf, NULL); 184 - static DEVICE_ATTR(qp_per_vf, S_IRUGO, usnic_ib_show_qp_per_vf, NULL); 185 - static DEVICE_ATTR(cq_per_vf, S_IRUGO, usnic_ib_show_cq_per_vf, NULL); 182 + static struct attribute *usnic_class_attributes[] = { 183 + &dev_attr_board_id.attr, 184 + &dev_attr_config.attr, 185 + &dev_attr_iface.attr, 186 + &dev_attr_max_vf.attr, 187 + &dev_attr_qp_per_vf.attr, 188 + &dev_attr_cq_per_vf.attr, 189 + NULL 190 + }; 186 191 187 - static struct device_attribute *usnic_class_attributes[] = { 188 - &dev_attr_board_id, 189 - &dev_attr_config, 190 - &dev_attr_iface, 191 - &dev_attr_max_vf, 192 - &dev_attr_qp_per_vf, 193 - &dev_attr_cq_per_vf, 192 + const struct attribute_group usnic_attr_group = { 193 + .attrs = usnic_class_attributes, 194 194 }; 195 195 196 196 struct qpn_attribute { ··· 276 278 277 279 int usnic_ib_sysfs_register_usdev(struct usnic_ib_dev *us_ibdev) 278 280 { 279 - int i; 280 - int err; 281 - for (i = 0; i < ARRAY_SIZE(usnic_class_attributes); ++i) { 282 - err = device_create_file(&us_ibdev->ib_dev.dev, 283 - usnic_class_attributes[i]); 284 - if (err) { 285 - usnic_err("Failed to create device file %d for %s eith err %d", 286 - i, us_ibdev->ib_dev.name, err); 287 - return -EINVAL; 288 - } 289 - } 290 - 291 281 /* create kernel object for looking at individual QPs */ 292 282 kobject_get(&us_ibdev->ib_dev.dev.kobj); 293 283 us_ibdev->qpn_kobj = kobject_create_and_add("qpn", ··· 290 304 291 305 void usnic_ib_sysfs_unregister_usdev(struct usnic_ib_dev *us_ibdev) 292 306 { 293 - int i; 294 - for (i = 0; i < ARRAY_SIZE(usnic_class_attributes); ++i) { 295 - device_remove_file(&us_ibdev->ib_dev.dev, 296 - usnic_class_attributes[i]); 297 - } 298 - 299 307 kobject_put(us_ibdev->qpn_kobj); 300 308 } 301 309
+2
drivers/infiniband/hw/usnic/usnic_ib_sysfs.h
··· 41 41 void usnic_ib_sysfs_qpn_add(struct usnic_ib_qp_grp *qp_grp); 42 42 void usnic_ib_sysfs_qpn_remove(struct usnic_ib_qp_grp *qp_grp); 43 43 44 + extern const struct attribute_group usnic_attr_group; 45 + 44 46 #endif /* !USNIC_IB_SYSFS_H_ */
+9 -7
drivers/infiniband/hw/usnic/usnic_ib_verbs.c
··· 159 159 160 160 err = ib_copy_to_udata(udata, &resp, sizeof(resp)); 161 161 if (err) { 162 - usnic_err("Failed to copy udata for %s", us_ibdev->ib_dev.name); 162 + usnic_err("Failed to copy udata for %s", 163 + dev_name(&us_ibdev->ib_dev.dev)); 163 164 return err; 164 165 } 165 166 ··· 198 197 vnic = vf->vnic; 199 198 if (!usnic_vnic_check_room(vnic, res_spec)) { 200 199 usnic_dbg("Found used vnic %s from %s\n", 201 - us_ibdev->ib_dev.name, 200 + dev_name(&us_ibdev->ib_dev.dev), 202 201 pci_name(usnic_vnic_get_pdev( 203 202 vnic))); 204 203 qp_grp = usnic_ib_qp_grp_create(us_ibdev->ufdev, ··· 231 230 spin_unlock(&vf->lock); 232 231 } 233 232 234 - usnic_info("No free qp grp found on %s\n", us_ibdev->ib_dev.name); 233 + usnic_info("No free qp grp found on %s\n", 234 + dev_name(&us_ibdev->ib_dev.dev)); 235 235 return ERR_PTR(-ENOMEM); 236 236 237 237 qp_grp_check: ··· 473 471 } 474 472 475 473 usnic_info("domain 0x%p allocated for context 0x%p and device %s\n", 476 - pd, context, ibdev->name); 474 + pd, context, dev_name(&ibdev->dev)); 477 475 return &pd->ibpd; 478 476 } 479 477 ··· 510 508 err = ib_copy_from_udata(&cmd, udata, sizeof(cmd)); 511 509 if (err) { 512 510 usnic_err("%s: cannot copy udata for create_qp\n", 513 - us_ibdev->ib_dev.name); 511 + dev_name(&us_ibdev->ib_dev.dev)); 514 512 return ERR_PTR(-EINVAL); 515 513 } 516 514 517 515 err = create_qp_validate_user_data(cmd); 518 516 if (err) { 519 517 usnic_err("%s: Failed to validate user data\n", 520 - us_ibdev->ib_dev.name); 518 + dev_name(&us_ibdev->ib_dev.dev)); 521 519 return ERR_PTR(-EINVAL); 522 520 } 523 521 524 522 if (init_attr->qp_type != IB_QPT_UD) { 525 523 usnic_err("%s asked to make a non-UD QP: %d\n", 526 - us_ibdev->ib_dev.name, init_attr->qp_type); 524 + dev_name(&us_ibdev->ib_dev.dev), init_attr->qp_type); 527 525 return ERR_PTR(-EINVAL); 528 526 } 529 527
+1 -1
drivers/infiniband/hw/usnic/usnic_transport.c
··· 121 121 if (type == USNIC_TRANSPORT_ROCE_CUSTOM) { 122 122 spin_lock(&roce_bitmap_lock); 123 123 if (!port_num) { 124 - usnic_err("Unreserved unvalid port num 0 for %s\n", 124 + usnic_err("Unreserved invalid port num 0 for %s\n", 125 125 usnic_transport_to_str(type)); 126 126 goto out_roce_custom; 127 127 }
+46 -47
drivers/infiniband/hw/usnic/usnic_uiom.c
··· 54 54 ((void *) &((struct usnic_uiom_chunk *) 0)->page_list[1] - \ 55 55 (void *) &((struct usnic_uiom_chunk *) 0)->page_list[0])) 56 56 57 - static void usnic_uiom_reg_account(struct work_struct *work) 58 - { 59 - struct usnic_uiom_reg *umem = container_of(work, 60 - struct usnic_uiom_reg, work); 61 - 62 - down_write(&umem->mm->mmap_sem); 63 - umem->mm->locked_vm -= umem->diff; 64 - up_write(&umem->mm->mmap_sem); 65 - mmput(umem->mm); 66 - kfree(umem); 67 - } 68 - 69 57 static int usnic_uiom_dma_fault(struct iommu_domain *domain, 70 58 struct device *dev, 71 59 unsigned long iova, int flags, ··· 87 99 } 88 100 89 101 static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, 90 - int dmasync, struct list_head *chunk_list) 102 + int dmasync, struct usnic_uiom_reg *uiomr) 91 103 { 104 + struct list_head *chunk_list = &uiomr->chunk_list; 92 105 struct page **page_list; 93 106 struct scatterlist *sg; 94 107 struct usnic_uiom_chunk *chunk; ··· 103 114 int flags; 104 115 dma_addr_t pa; 105 116 unsigned int gup_flags; 117 + struct mm_struct *mm; 106 118 107 119 /* 108 120 * If the combination of the addr and size requested for this memory ··· 126 136 127 137 npages = PAGE_ALIGN(size + (addr & ~PAGE_MASK)) >> PAGE_SHIFT; 128 138 129 - down_write(&current->mm->mmap_sem); 139 + uiomr->owning_mm = mm = current->mm; 140 + down_write(&mm->mmap_sem); 130 141 131 142 locked = npages + current->mm->pinned_vm; 132 143 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; ··· 187 196 out: 188 197 if (ret < 0) 189 198 usnic_uiom_put_pages(chunk_list, 0); 190 - else 191 - current->mm->pinned_vm = locked; 199 + else { 200 + mm->pinned_vm = locked; 201 + mmgrab(uiomr->owning_mm); 202 + } 192 203 193 - up_write(&current->mm->mmap_sem); 204 + up_write(&mm->mmap_sem); 194 205 free_page((unsigned long) page_list); 195 206 return ret; 196 207 } ··· 372 379 uiomr->pd = pd; 373 380 374 381 err = usnic_uiom_get_pages(addr, size, writable, dmasync, 375 - &uiomr->chunk_list); 382 + uiomr); 376 383 if (err) { 377 384 usnic_err("Failed get_pages vpn [0x%lx,0x%lx] err %d\n", 378 385 vpn_start, vpn_last, err); ··· 419 426 out_put_pages: 420 427 usnic_uiom_put_pages(&uiomr->chunk_list, 0); 421 428 spin_unlock(&pd->lock); 429 + mmdrop(uiomr->owning_mm); 422 430 out_free_uiomr: 423 431 kfree(uiomr); 424 432 return ERR_PTR(err); 425 433 } 426 434 427 - void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, 428 - struct ib_ucontext *ucontext) 435 + static void __usnic_uiom_release_tail(struct usnic_uiom_reg *uiomr) 429 436 { 430 - struct task_struct *task; 431 - struct mm_struct *mm; 432 - unsigned long diff; 437 + mmdrop(uiomr->owning_mm); 438 + kfree(uiomr); 439 + } 433 440 441 + static inline size_t usnic_uiom_num_pages(struct usnic_uiom_reg *uiomr) 442 + { 443 + return PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT; 444 + } 445 + 446 + static void usnic_uiom_release_defer(struct work_struct *work) 447 + { 448 + struct usnic_uiom_reg *uiomr = 449 + container_of(work, struct usnic_uiom_reg, work); 450 + 451 + down_write(&uiomr->owning_mm->mmap_sem); 452 + uiomr->owning_mm->pinned_vm -= usnic_uiom_num_pages(uiomr); 453 + up_write(&uiomr->owning_mm->mmap_sem); 454 + 455 + __usnic_uiom_release_tail(uiomr); 456 + } 457 + 458 + void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, 459 + struct ib_ucontext *context) 460 + { 434 461 __usnic_uiom_reg_release(uiomr->pd, uiomr, 1); 435 - 436 - task = get_pid_task(ucontext->tgid, PIDTYPE_PID); 437 - if (!task) 438 - goto out; 439 - mm = get_task_mm(task); 440 - put_task_struct(task); 441 - if (!mm) 442 - goto out; 443 - 444 - diff = PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT; 445 462 446 463 /* 447 464 * We may be called with the mm's mmap_sem already held. This ··· 459 456 * the last reference to our file and calls our release 460 457 * method. If there are memory regions to destroy, we'll end 461 458 * up here and not be able to take the mmap_sem. In that case 462 - * we defer the vm_locked accounting to the system workqueue. 459 + * we defer the vm_locked accounting to a workqueue. 463 460 */ 464 - if (ucontext->closing) { 465 - if (!down_write_trylock(&mm->mmap_sem)) { 466 - INIT_WORK(&uiomr->work, usnic_uiom_reg_account); 467 - uiomr->mm = mm; 468 - uiomr->diff = diff; 469 - 461 + if (context->closing) { 462 + if (!down_write_trylock(&uiomr->owning_mm->mmap_sem)) { 463 + INIT_WORK(&uiomr->work, usnic_uiom_release_defer); 470 464 queue_work(usnic_uiom_wq, &uiomr->work); 471 465 return; 472 466 } 473 - } else 474 - down_write(&mm->mmap_sem); 467 + } else { 468 + down_write(&uiomr->owning_mm->mmap_sem); 469 + } 470 + uiomr->owning_mm->pinned_vm -= usnic_uiom_num_pages(uiomr); 471 + up_write(&uiomr->owning_mm->mmap_sem); 475 472 476 - mm->pinned_vm -= diff; 477 - up_write(&mm->mmap_sem); 478 - mmput(mm); 479 - out: 480 - kfree(uiomr); 473 + __usnic_uiom_release_tail(uiomr); 481 474 } 482 475 483 476 struct usnic_uiom_pd *usnic_uiom_alloc_pd(void)
+1 -2
drivers/infiniband/hw/usnic/usnic_uiom.h
··· 71 71 int writable; 72 72 struct list_head chunk_list; 73 73 struct work_struct work; 74 - struct mm_struct *mm; 75 - unsigned long diff; 74 + struct mm_struct *owning_mm; 76 75 }; 77 76 78 77 struct usnic_uiom_chunk {
+20 -26
drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
··· 65 65 static int pvrdma_add_gid(const struct ib_gid_attr *attr, void **context); 66 66 static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context); 67 67 68 - static ssize_t show_hca(struct device *device, struct device_attribute *attr, 69 - char *buf) 68 + static ssize_t hca_type_show(struct device *device, 69 + struct device_attribute *attr, char *buf) 70 70 { 71 71 return sprintf(buf, "VMW_PVRDMA-%s\n", DRV_VERSION); 72 72 } 73 + static DEVICE_ATTR_RO(hca_type); 73 74 74 - static ssize_t show_rev(struct device *device, struct device_attribute *attr, 75 - char *buf) 75 + static ssize_t hw_rev_show(struct device *device, 76 + struct device_attribute *attr, char *buf) 76 77 { 77 78 return sprintf(buf, "%d\n", PVRDMA_REV_ID); 78 79 } 80 + static DEVICE_ATTR_RO(hw_rev); 79 81 80 - static ssize_t show_board(struct device *device, struct device_attribute *attr, 81 - char *buf) 82 + static ssize_t board_id_show(struct device *device, 83 + struct device_attribute *attr, char *buf) 82 84 { 83 85 return sprintf(buf, "%d\n", PVRDMA_BOARD_ID); 84 86 } 87 + static DEVICE_ATTR_RO(board_id); 85 88 86 - static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 87 - static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); 88 - static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); 89 + static struct attribute *pvrdma_class_attributes[] = { 90 + &dev_attr_hw_rev.attr, 91 + &dev_attr_hca_type.attr, 92 + &dev_attr_board_id.attr, 93 + NULL, 94 + }; 89 95 90 - static struct device_attribute *pvrdma_class_attributes[] = { 91 - &dev_attr_hw_rev, 92 - &dev_attr_hca_type, 93 - &dev_attr_board_id 96 + static const struct attribute_group pvrdma_attr_group = { 97 + .attrs = pvrdma_class_attributes, 94 98 }; 95 99 96 100 static void pvrdma_get_fw_ver_str(struct ib_device *device, char *str) ··· 164 160 static int pvrdma_register_device(struct pvrdma_dev *dev) 165 161 { 166 162 int ret = -1; 167 - int i = 0; 168 163 169 - strlcpy(dev->ib_dev.name, "vmw_pvrdma%d", IB_DEVICE_NAME_MAX); 170 164 dev->ib_dev.node_guid = dev->dsr->caps.node_guid; 171 165 dev->sys_image_guid = dev->dsr->caps.sys_image_guid; 172 166 dev->flags = 0; ··· 268 266 } 269 267 dev->ib_dev.driver_id = RDMA_DRIVER_VMW_PVRDMA; 270 268 spin_lock_init(&dev->srq_tbl_lock); 269 + rdma_set_device_sysfs_group(&dev->ib_dev, &pvrdma_attr_group); 271 270 272 - ret = ib_register_device(&dev->ib_dev, NULL); 271 + ret = ib_register_device(&dev->ib_dev, "vmw_pvrdma%d", NULL); 273 272 if (ret) 274 273 goto err_srq_free; 275 - 276 - for (i = 0; i < ARRAY_SIZE(pvrdma_class_attributes); ++i) { 277 - ret = device_create_file(&dev->ib_dev.dev, 278 - pvrdma_class_attributes[i]); 279 - if (ret) 280 - goto err_class; 281 - } 282 274 283 275 dev->ib_active = true; 284 276 285 277 return 0; 286 278 287 - err_class: 288 - ib_unregister_device(&dev->ib_dev); 289 279 err_srq_free: 290 280 kfree(dev->srq_tbl); 291 281 err_qp_free: ··· 729 735 730 736 default: 731 737 dev_dbg(&dev->pdev->dev, "ignore netdevice event %ld on %s\n", 732 - event, dev->ib_dev.name); 738 + event, dev_name(&dev->ib_dev.dev)); 733 739 break; 734 740 } 735 741 }
+1 -1
drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
··· 499 499 next_state = (attr_mask & IB_QP_STATE) ? attr->qp_state : cur_state; 500 500 501 501 if (!ib_modify_qp_is_ok(cur_state, next_state, ibqp->qp_type, 502 - attr_mask, IB_LINK_LAYER_ETHERNET)) { 502 + attr_mask)) { 503 503 ret = -EINVAL; 504 504 goto out; 505 505 }
+1 -1
drivers/infiniband/sw/rdmavt/Kconfig
··· 1 1 config INFINIBAND_RDMAVT 2 2 tristate "RDMA verbs transport library" 3 - depends on 64BIT && ARCH_DMA_ADDR_T_64BIT 3 + depends on X86_64 && ARCH_DMA_ADDR_T_64BIT 4 4 depends on PCI 5 5 select DMA_VIRT_OPS 6 6 ---help---
+660 -17
drivers/infiniband/sw/rdmavt/qp.c
··· 118 118 }; 119 119 EXPORT_SYMBOL(ib_rvt_state_ops); 120 120 121 + /* platform specific: return the last level cache (llc) size, in KiB */ 122 + static int rvt_wss_llc_size(void) 123 + { 124 + /* assume that the boot CPU value is universal for all CPUs */ 125 + return boot_cpu_data.x86_cache_size; 126 + } 127 + 128 + /* platform specific: cacheless copy */ 129 + static void cacheless_memcpy(void *dst, void *src, size_t n) 130 + { 131 + /* 132 + * Use the only available X64 cacheless copy. Add a __user cast 133 + * to quiet sparse. The src agument is already in the kernel so 134 + * there are no security issues. The extra fault recovery machinery 135 + * is not invoked. 136 + */ 137 + __copy_user_nocache(dst, (void __user *)src, n, 0); 138 + } 139 + 140 + void rvt_wss_exit(struct rvt_dev_info *rdi) 141 + { 142 + struct rvt_wss *wss = rdi->wss; 143 + 144 + if (!wss) 145 + return; 146 + 147 + /* coded to handle partially initialized and repeat callers */ 148 + kfree(wss->entries); 149 + wss->entries = NULL; 150 + kfree(rdi->wss); 151 + rdi->wss = NULL; 152 + } 153 + 154 + /** 155 + * rvt_wss_init - Init wss data structures 156 + * 157 + * Return: 0 on success 158 + */ 159 + int rvt_wss_init(struct rvt_dev_info *rdi) 160 + { 161 + unsigned int sge_copy_mode = rdi->dparms.sge_copy_mode; 162 + unsigned int wss_threshold = rdi->dparms.wss_threshold; 163 + unsigned int wss_clean_period = rdi->dparms.wss_clean_period; 164 + long llc_size; 165 + long llc_bits; 166 + long table_size; 167 + long table_bits; 168 + struct rvt_wss *wss; 169 + int node = rdi->dparms.node; 170 + 171 + if (sge_copy_mode != RVT_SGE_COPY_ADAPTIVE) { 172 + rdi->wss = NULL; 173 + return 0; 174 + } 175 + 176 + rdi->wss = kzalloc_node(sizeof(*rdi->wss), GFP_KERNEL, node); 177 + if (!rdi->wss) 178 + return -ENOMEM; 179 + wss = rdi->wss; 180 + 181 + /* check for a valid percent range - default to 80 if none or invalid */ 182 + if (wss_threshold < 1 || wss_threshold > 100) 183 + wss_threshold = 80; 184 + 185 + /* reject a wildly large period */ 186 + if (wss_clean_period > 1000000) 187 + wss_clean_period = 256; 188 + 189 + /* reject a zero period */ 190 + if (wss_clean_period == 0) 191 + wss_clean_period = 1; 192 + 193 + /* 194 + * Calculate the table size - the next power of 2 larger than the 195 + * LLC size. LLC size is in KiB. 196 + */ 197 + llc_size = rvt_wss_llc_size() * 1024; 198 + table_size = roundup_pow_of_two(llc_size); 199 + 200 + /* one bit per page in rounded up table */ 201 + llc_bits = llc_size / PAGE_SIZE; 202 + table_bits = table_size / PAGE_SIZE; 203 + wss->pages_mask = table_bits - 1; 204 + wss->num_entries = table_bits / BITS_PER_LONG; 205 + 206 + wss->threshold = (llc_bits * wss_threshold) / 100; 207 + if (wss->threshold == 0) 208 + wss->threshold = 1; 209 + 210 + wss->clean_period = wss_clean_period; 211 + atomic_set(&wss->clean_counter, wss_clean_period); 212 + 213 + wss->entries = kcalloc_node(wss->num_entries, sizeof(*wss->entries), 214 + GFP_KERNEL, node); 215 + if (!wss->entries) { 216 + rvt_wss_exit(rdi); 217 + return -ENOMEM; 218 + } 219 + 220 + return 0; 221 + } 222 + 223 + /* 224 + * Advance the clean counter. When the clean period has expired, 225 + * clean an entry. 226 + * 227 + * This is implemented in atomics to avoid locking. Because multiple 228 + * variables are involved, it can be racy which can lead to slightly 229 + * inaccurate information. Since this is only a heuristic, this is 230 + * OK. Any innaccuracies will clean themselves out as the counter 231 + * advances. That said, it is unlikely the entry clean operation will 232 + * race - the next possible racer will not start until the next clean 233 + * period. 234 + * 235 + * The clean counter is implemented as a decrement to zero. When zero 236 + * is reached an entry is cleaned. 237 + */ 238 + static void wss_advance_clean_counter(struct rvt_wss *wss) 239 + { 240 + int entry; 241 + int weight; 242 + unsigned long bits; 243 + 244 + /* become the cleaner if we decrement the counter to zero */ 245 + if (atomic_dec_and_test(&wss->clean_counter)) { 246 + /* 247 + * Set, not add, the clean period. This avoids an issue 248 + * where the counter could decrement below the clean period. 249 + * Doing a set can result in lost decrements, slowing the 250 + * clean advance. Since this a heuristic, this possible 251 + * slowdown is OK. 252 + * 253 + * An alternative is to loop, advancing the counter by a 254 + * clean period until the result is > 0. However, this could 255 + * lead to several threads keeping another in the clean loop. 256 + * This could be mitigated by limiting the number of times 257 + * we stay in the loop. 258 + */ 259 + atomic_set(&wss->clean_counter, wss->clean_period); 260 + 261 + /* 262 + * Uniquely grab the entry to clean and move to next. 263 + * The current entry is always the lower bits of 264 + * wss.clean_entry. The table size, wss.num_entries, 265 + * is always a power-of-2. 266 + */ 267 + entry = (atomic_inc_return(&wss->clean_entry) - 1) 268 + & (wss->num_entries - 1); 269 + 270 + /* clear the entry and count the bits */ 271 + bits = xchg(&wss->entries[entry], 0); 272 + weight = hweight64((u64)bits); 273 + /* only adjust the contended total count if needed */ 274 + if (weight) 275 + atomic_sub(weight, &wss->total_count); 276 + } 277 + } 278 + 279 + /* 280 + * Insert the given address into the working set array. 281 + */ 282 + static void wss_insert(struct rvt_wss *wss, void *address) 283 + { 284 + u32 page = ((unsigned long)address >> PAGE_SHIFT) & wss->pages_mask; 285 + u32 entry = page / BITS_PER_LONG; /* assumes this ends up a shift */ 286 + u32 nr = page & (BITS_PER_LONG - 1); 287 + 288 + if (!test_and_set_bit(nr, &wss->entries[entry])) 289 + atomic_inc(&wss->total_count); 290 + 291 + wss_advance_clean_counter(wss); 292 + } 293 + 294 + /* 295 + * Is the working set larger than the threshold? 296 + */ 297 + static inline bool wss_exceeds_threshold(struct rvt_wss *wss) 298 + { 299 + return atomic_read(&wss->total_count) >= wss->threshold; 300 + } 301 + 121 302 static void get_map_page(struct rvt_qpn_table *qpt, 122 303 struct rvt_qpn_map *map) 123 304 { ··· 1345 1164 int lastwqe = 0; 1346 1165 int mig = 0; 1347 1166 int pmtu = 0; /* for gcc warning only */ 1348 - enum rdma_link_layer link; 1349 1167 int opa_ah; 1350 - 1351 - link = rdma_port_get_link_layer(ibqp->device, qp->port_num); 1352 1168 1353 1169 spin_lock_irq(&qp->r_lock); 1354 1170 spin_lock(&qp->s_hlock); ··· 1357 1179 opa_ah = rdma_cap_opa_ah(ibqp->device, qp->port_num); 1358 1180 1359 1181 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, 1360 - attr_mask, link)) 1182 + attr_mask)) 1361 1183 goto inval; 1362 1184 1363 1185 if (rdi->driver_f.check_modify_qp && ··· 1896 1718 */ 1897 1719 static int rvt_post_one_wr(struct rvt_qp *qp, 1898 1720 const struct ib_send_wr *wr, 1899 - int *call_send) 1721 + bool *call_send) 1900 1722 { 1901 1723 struct rvt_swqe *wqe; 1902 1724 u32 next; ··· 2001 1823 wqe->wr.num_sge = j; 2002 1824 } 2003 1825 2004 - /* general part of wqe valid - allow for driver checks */ 2005 - if (rdi->driver_f.check_send_wqe) { 2006 - ret = rdi->driver_f.check_send_wqe(qp, wqe); 2007 - if (ret < 0) 2008 - goto bail_inval_free; 2009 - if (ret) 2010 - *call_send = ret; 2011 - } 2012 - 1826 + /* 1827 + * Calculate and set SWQE PSN values prior to handing it off 1828 + * to the driver's check routine. This give the driver the 1829 + * opportunity to adjust PSN values based on internal checks. 1830 + */ 2013 1831 log_pmtu = qp->log_pmtu; 2014 1832 if (qp->ibqp.qp_type != IB_QPT_UC && 2015 1833 qp->ibqp.qp_type != IB_QPT_RC) { ··· 2030 1856 (wqe->length ? 2031 1857 ((wqe->length - 1) >> log_pmtu) : 2032 1858 0); 2033 - qp->s_next_psn = wqe->lpsn + 1; 2034 1859 } 1860 + 1861 + /* general part of wqe valid - allow for driver checks */ 1862 + if (rdi->driver_f.setup_wqe) { 1863 + ret = rdi->driver_f.setup_wqe(qp, wqe, call_send); 1864 + if (ret < 0) 1865 + goto bail_inval_free_ref; 1866 + } 1867 + 1868 + if (!(rdi->post_parms[wr->opcode].flags & RVT_OPERATION_LOCAL)) 1869 + qp->s_next_psn = wqe->lpsn + 1; 1870 + 2035 1871 if (unlikely(reserved_op)) { 2036 1872 wqe->wr.send_flags |= RVT_SEND_RESERVE_USED; 2037 1873 rvt_qp_wqe_reserve(qp, wqe); ··· 2055 1871 2056 1872 return 0; 2057 1873 1874 + bail_inval_free_ref: 1875 + if (qp->ibqp.qp_type != IB_QPT_UC && 1876 + qp->ibqp.qp_type != IB_QPT_RC) 1877 + atomic_dec(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount); 2058 1878 bail_inval_free: 2059 1879 /* release mr holds */ 2060 1880 while (j) { ··· 2085 1897 struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); 2086 1898 struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); 2087 1899 unsigned long flags = 0; 2088 - int call_send; 1900 + bool call_send; 2089 1901 unsigned nreq = 0; 2090 1902 int err = 0; 2091 1903 ··· 2118 1930 bail: 2119 1931 spin_unlock_irqrestore(&qp->s_hlock, flags); 2120 1932 if (nreq) { 2121 - if (call_send) 1933 + /* 1934 + * Only call do_send if there is exactly one packet, and the 1935 + * driver said it was ok. 1936 + */ 1937 + if (nreq == 1 && call_send) 2122 1938 rdi->driver_f.do_send(qp); 2123 1939 else 2124 1940 rdi->driver_f.schedule_send_no_lock(qp); ··· 2657 2465 rcu_read_unlock(); 2658 2466 } 2659 2467 EXPORT_SYMBOL(rvt_qp_iter); 2468 + 2469 + /* 2470 + * This should be called with s_lock held. 2471 + */ 2472 + void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, 2473 + enum ib_wc_status status) 2474 + { 2475 + u32 old_last, last; 2476 + struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); 2477 + 2478 + if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND)) 2479 + return; 2480 + 2481 + last = qp->s_last; 2482 + old_last = last; 2483 + trace_rvt_qp_send_completion(qp, wqe, last); 2484 + if (++last >= qp->s_size) 2485 + last = 0; 2486 + trace_rvt_qp_send_completion(qp, wqe, last); 2487 + qp->s_last = last; 2488 + /* See post_send() */ 2489 + barrier(); 2490 + rvt_put_swqe(wqe); 2491 + if (qp->ibqp.qp_type == IB_QPT_UD || 2492 + qp->ibqp.qp_type == IB_QPT_SMI || 2493 + qp->ibqp.qp_type == IB_QPT_GSI) 2494 + atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount); 2495 + 2496 + rvt_qp_swqe_complete(qp, 2497 + wqe, 2498 + rdi->wc_opcode[wqe->wr.opcode], 2499 + status); 2500 + 2501 + if (qp->s_acked == old_last) 2502 + qp->s_acked = last; 2503 + if (qp->s_cur == old_last) 2504 + qp->s_cur = last; 2505 + if (qp->s_tail == old_last) 2506 + qp->s_tail = last; 2507 + if (qp->state == IB_QPS_SQD && last == qp->s_cur) 2508 + qp->s_draining = 0; 2509 + } 2510 + EXPORT_SYMBOL(rvt_send_complete); 2511 + 2512 + /** 2513 + * rvt_copy_sge - copy data to SGE memory 2514 + * @qp: associated QP 2515 + * @ss: the SGE state 2516 + * @data: the data to copy 2517 + * @length: the length of the data 2518 + * @release: boolean to release MR 2519 + * @copy_last: do a separate copy of the last 8 bytes 2520 + */ 2521 + void rvt_copy_sge(struct rvt_qp *qp, struct rvt_sge_state *ss, 2522 + void *data, u32 length, 2523 + bool release, bool copy_last) 2524 + { 2525 + struct rvt_sge *sge = &ss->sge; 2526 + int i; 2527 + bool in_last = false; 2528 + bool cacheless_copy = false; 2529 + struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); 2530 + struct rvt_wss *wss = rdi->wss; 2531 + unsigned int sge_copy_mode = rdi->dparms.sge_copy_mode; 2532 + 2533 + if (sge_copy_mode == RVT_SGE_COPY_CACHELESS) { 2534 + cacheless_copy = length >= PAGE_SIZE; 2535 + } else if (sge_copy_mode == RVT_SGE_COPY_ADAPTIVE) { 2536 + if (length >= PAGE_SIZE) { 2537 + /* 2538 + * NOTE: this *assumes*: 2539 + * o The first vaddr is the dest. 2540 + * o If multiple pages, then vaddr is sequential. 2541 + */ 2542 + wss_insert(wss, sge->vaddr); 2543 + if (length >= (2 * PAGE_SIZE)) 2544 + wss_insert(wss, (sge->vaddr + PAGE_SIZE)); 2545 + 2546 + cacheless_copy = wss_exceeds_threshold(wss); 2547 + } else { 2548 + wss_advance_clean_counter(wss); 2549 + } 2550 + } 2551 + 2552 + if (copy_last) { 2553 + if (length > 8) { 2554 + length -= 8; 2555 + } else { 2556 + copy_last = false; 2557 + in_last = true; 2558 + } 2559 + } 2560 + 2561 + again: 2562 + while (length) { 2563 + u32 len = rvt_get_sge_length(sge, length); 2564 + 2565 + WARN_ON_ONCE(len == 0); 2566 + if (unlikely(in_last)) { 2567 + /* enforce byte transfer ordering */ 2568 + for (i = 0; i < len; i++) 2569 + ((u8 *)sge->vaddr)[i] = ((u8 *)data)[i]; 2570 + } else if (cacheless_copy) { 2571 + cacheless_memcpy(sge->vaddr, data, len); 2572 + } else { 2573 + memcpy(sge->vaddr, data, len); 2574 + } 2575 + rvt_update_sge(ss, len, release); 2576 + data += len; 2577 + length -= len; 2578 + } 2579 + 2580 + if (copy_last) { 2581 + copy_last = false; 2582 + in_last = true; 2583 + length = 8; 2584 + goto again; 2585 + } 2586 + } 2587 + EXPORT_SYMBOL(rvt_copy_sge); 2588 + 2589 + /** 2590 + * ruc_loopback - handle UC and RC loopback requests 2591 + * @sqp: the sending QP 2592 + * 2593 + * This is called from rvt_do_send() to forward a WQE addressed to the same HFI 2594 + * Note that although we are single threaded due to the send engine, we still 2595 + * have to protect against post_send(). We don't have to worry about 2596 + * receive interrupts since this is a connected protocol and all packets 2597 + * will pass through here. 2598 + */ 2599 + void rvt_ruc_loopback(struct rvt_qp *sqp) 2600 + { 2601 + struct rvt_ibport *rvp = NULL; 2602 + struct rvt_dev_info *rdi = ib_to_rvt(sqp->ibqp.device); 2603 + struct rvt_qp *qp; 2604 + struct rvt_swqe *wqe; 2605 + struct rvt_sge *sge; 2606 + unsigned long flags; 2607 + struct ib_wc wc; 2608 + u64 sdata; 2609 + atomic64_t *maddr; 2610 + enum ib_wc_status send_status; 2611 + bool release; 2612 + int ret; 2613 + bool copy_last = false; 2614 + int local_ops = 0; 2615 + 2616 + rcu_read_lock(); 2617 + rvp = rdi->ports[sqp->port_num - 1]; 2618 + 2619 + /* 2620 + * Note that we check the responder QP state after 2621 + * checking the requester's state. 2622 + */ 2623 + 2624 + qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), rvp, 2625 + sqp->remote_qpn); 2626 + 2627 + spin_lock_irqsave(&sqp->s_lock, flags); 2628 + 2629 + /* Return if we are already busy processing a work request. */ 2630 + if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) || 2631 + !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND)) 2632 + goto unlock; 2633 + 2634 + sqp->s_flags |= RVT_S_BUSY; 2635 + 2636 + again: 2637 + if (sqp->s_last == READ_ONCE(sqp->s_head)) 2638 + goto clr_busy; 2639 + wqe = rvt_get_swqe_ptr(sqp, sqp->s_last); 2640 + 2641 + /* Return if it is not OK to start a new work request. */ 2642 + if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) { 2643 + if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND)) 2644 + goto clr_busy; 2645 + /* We are in the error state, flush the work request. */ 2646 + send_status = IB_WC_WR_FLUSH_ERR; 2647 + goto flush_send; 2648 + } 2649 + 2650 + /* 2651 + * We can rely on the entry not changing without the s_lock 2652 + * being held until we update s_last. 2653 + * We increment s_cur to indicate s_last is in progress. 2654 + */ 2655 + if (sqp->s_last == sqp->s_cur) { 2656 + if (++sqp->s_cur >= sqp->s_size) 2657 + sqp->s_cur = 0; 2658 + } 2659 + spin_unlock_irqrestore(&sqp->s_lock, flags); 2660 + 2661 + if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) || 2662 + qp->ibqp.qp_type != sqp->ibqp.qp_type) { 2663 + rvp->n_pkt_drops++; 2664 + /* 2665 + * For RC, the requester would timeout and retry so 2666 + * shortcut the timeouts and just signal too many retries. 2667 + */ 2668 + if (sqp->ibqp.qp_type == IB_QPT_RC) 2669 + send_status = IB_WC_RETRY_EXC_ERR; 2670 + else 2671 + send_status = IB_WC_SUCCESS; 2672 + goto serr; 2673 + } 2674 + 2675 + memset(&wc, 0, sizeof(wc)); 2676 + send_status = IB_WC_SUCCESS; 2677 + 2678 + release = true; 2679 + sqp->s_sge.sge = wqe->sg_list[0]; 2680 + sqp->s_sge.sg_list = wqe->sg_list + 1; 2681 + sqp->s_sge.num_sge = wqe->wr.num_sge; 2682 + sqp->s_len = wqe->length; 2683 + switch (wqe->wr.opcode) { 2684 + case IB_WR_REG_MR: 2685 + goto send_comp; 2686 + 2687 + case IB_WR_LOCAL_INV: 2688 + if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) { 2689 + if (rvt_invalidate_rkey(sqp, 2690 + wqe->wr.ex.invalidate_rkey)) 2691 + send_status = IB_WC_LOC_PROT_ERR; 2692 + local_ops = 1; 2693 + } 2694 + goto send_comp; 2695 + 2696 + case IB_WR_SEND_WITH_INV: 2697 + if (!rvt_invalidate_rkey(qp, wqe->wr.ex.invalidate_rkey)) { 2698 + wc.wc_flags = IB_WC_WITH_INVALIDATE; 2699 + wc.ex.invalidate_rkey = wqe->wr.ex.invalidate_rkey; 2700 + } 2701 + goto send; 2702 + 2703 + case IB_WR_SEND_WITH_IMM: 2704 + wc.wc_flags = IB_WC_WITH_IMM; 2705 + wc.ex.imm_data = wqe->wr.ex.imm_data; 2706 + /* FALLTHROUGH */ 2707 + case IB_WR_SEND: 2708 + send: 2709 + ret = rvt_get_rwqe(qp, false); 2710 + if (ret < 0) 2711 + goto op_err; 2712 + if (!ret) 2713 + goto rnr_nak; 2714 + break; 2715 + 2716 + case IB_WR_RDMA_WRITE_WITH_IMM: 2717 + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) 2718 + goto inv_err; 2719 + wc.wc_flags = IB_WC_WITH_IMM; 2720 + wc.ex.imm_data = wqe->wr.ex.imm_data; 2721 + ret = rvt_get_rwqe(qp, true); 2722 + if (ret < 0) 2723 + goto op_err; 2724 + if (!ret) 2725 + goto rnr_nak; 2726 + /* skip copy_last set and qp_access_flags recheck */ 2727 + goto do_write; 2728 + case IB_WR_RDMA_WRITE: 2729 + copy_last = rvt_is_user_qp(qp); 2730 + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) 2731 + goto inv_err; 2732 + do_write: 2733 + if (wqe->length == 0) 2734 + break; 2735 + if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length, 2736 + wqe->rdma_wr.remote_addr, 2737 + wqe->rdma_wr.rkey, 2738 + IB_ACCESS_REMOTE_WRITE))) 2739 + goto acc_err; 2740 + qp->r_sge.sg_list = NULL; 2741 + qp->r_sge.num_sge = 1; 2742 + qp->r_sge.total_len = wqe->length; 2743 + break; 2744 + 2745 + case IB_WR_RDMA_READ: 2746 + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) 2747 + goto inv_err; 2748 + if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length, 2749 + wqe->rdma_wr.remote_addr, 2750 + wqe->rdma_wr.rkey, 2751 + IB_ACCESS_REMOTE_READ))) 2752 + goto acc_err; 2753 + release = false; 2754 + sqp->s_sge.sg_list = NULL; 2755 + sqp->s_sge.num_sge = 1; 2756 + qp->r_sge.sge = wqe->sg_list[0]; 2757 + qp->r_sge.sg_list = wqe->sg_list + 1; 2758 + qp->r_sge.num_sge = wqe->wr.num_sge; 2759 + qp->r_sge.total_len = wqe->length; 2760 + break; 2761 + 2762 + case IB_WR_ATOMIC_CMP_AND_SWP: 2763 + case IB_WR_ATOMIC_FETCH_AND_ADD: 2764 + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) 2765 + goto inv_err; 2766 + if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64), 2767 + wqe->atomic_wr.remote_addr, 2768 + wqe->atomic_wr.rkey, 2769 + IB_ACCESS_REMOTE_ATOMIC))) 2770 + goto acc_err; 2771 + /* Perform atomic OP and save result. */ 2772 + maddr = (atomic64_t *)qp->r_sge.sge.vaddr; 2773 + sdata = wqe->atomic_wr.compare_add; 2774 + *(u64 *)sqp->s_sge.sge.vaddr = 2775 + (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ? 2776 + (u64)atomic64_add_return(sdata, maddr) - sdata : 2777 + (u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr, 2778 + sdata, wqe->atomic_wr.swap); 2779 + rvt_put_mr(qp->r_sge.sge.mr); 2780 + qp->r_sge.num_sge = 0; 2781 + goto send_comp; 2782 + 2783 + default: 2784 + send_status = IB_WC_LOC_QP_OP_ERR; 2785 + goto serr; 2786 + } 2787 + 2788 + sge = &sqp->s_sge.sge; 2789 + while (sqp->s_len) { 2790 + u32 len = sqp->s_len; 2791 + 2792 + if (len > sge->length) 2793 + len = sge->length; 2794 + if (len > sge->sge_length) 2795 + len = sge->sge_length; 2796 + WARN_ON_ONCE(len == 0); 2797 + rvt_copy_sge(qp, &qp->r_sge, sge->vaddr, 2798 + len, release, copy_last); 2799 + sge->vaddr += len; 2800 + sge->length -= len; 2801 + sge->sge_length -= len; 2802 + if (sge->sge_length == 0) { 2803 + if (!release) 2804 + rvt_put_mr(sge->mr); 2805 + if (--sqp->s_sge.num_sge) 2806 + *sge = *sqp->s_sge.sg_list++; 2807 + } else if (sge->length == 0 && sge->mr->lkey) { 2808 + if (++sge->n >= RVT_SEGSZ) { 2809 + if (++sge->m >= sge->mr->mapsz) 2810 + break; 2811 + sge->n = 0; 2812 + } 2813 + sge->vaddr = 2814 + sge->mr->map[sge->m]->segs[sge->n].vaddr; 2815 + sge->length = 2816 + sge->mr->map[sge->m]->segs[sge->n].length; 2817 + } 2818 + sqp->s_len -= len; 2819 + } 2820 + if (release) 2821 + rvt_put_ss(&qp->r_sge); 2822 + 2823 + if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) 2824 + goto send_comp; 2825 + 2826 + if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM) 2827 + wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; 2828 + else 2829 + wc.opcode = IB_WC_RECV; 2830 + wc.wr_id = qp->r_wr_id; 2831 + wc.status = IB_WC_SUCCESS; 2832 + wc.byte_len = wqe->length; 2833 + wc.qp = &qp->ibqp; 2834 + wc.src_qp = qp->remote_qpn; 2835 + wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr) & U16_MAX; 2836 + wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr); 2837 + wc.port_num = 1; 2838 + /* Signal completion event if the solicited bit is set. */ 2839 + rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 2840 + wqe->wr.send_flags & IB_SEND_SOLICITED); 2841 + 2842 + send_comp: 2843 + spin_lock_irqsave(&sqp->s_lock, flags); 2844 + rvp->n_loop_pkts++; 2845 + flush_send: 2846 + sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; 2847 + rvt_send_complete(sqp, wqe, send_status); 2848 + if (local_ops) { 2849 + atomic_dec(&sqp->local_ops_pending); 2850 + local_ops = 0; 2851 + } 2852 + goto again; 2853 + 2854 + rnr_nak: 2855 + /* Handle RNR NAK */ 2856 + if (qp->ibqp.qp_type == IB_QPT_UC) 2857 + goto send_comp; 2858 + rvp->n_rnr_naks++; 2859 + /* 2860 + * Note: we don't need the s_lock held since the BUSY flag 2861 + * makes this single threaded. 2862 + */ 2863 + if (sqp->s_rnr_retry == 0) { 2864 + send_status = IB_WC_RNR_RETRY_EXC_ERR; 2865 + goto serr; 2866 + } 2867 + if (sqp->s_rnr_retry_cnt < 7) 2868 + sqp->s_rnr_retry--; 2869 + spin_lock_irqsave(&sqp->s_lock, flags); 2870 + if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK)) 2871 + goto clr_busy; 2872 + rvt_add_rnr_timer(sqp, qp->r_min_rnr_timer << 2873 + IB_AETH_CREDIT_SHIFT); 2874 + goto clr_busy; 2875 + 2876 + op_err: 2877 + send_status = IB_WC_REM_OP_ERR; 2878 + wc.status = IB_WC_LOC_QP_OP_ERR; 2879 + goto err; 2880 + 2881 + inv_err: 2882 + send_status = IB_WC_REM_INV_REQ_ERR; 2883 + wc.status = IB_WC_LOC_QP_OP_ERR; 2884 + goto err; 2885 + 2886 + acc_err: 2887 + send_status = IB_WC_REM_ACCESS_ERR; 2888 + wc.status = IB_WC_LOC_PROT_ERR; 2889 + err: 2890 + /* responder goes to error state */ 2891 + rvt_rc_error(qp, wc.status); 2892 + 2893 + serr: 2894 + spin_lock_irqsave(&sqp->s_lock, flags); 2895 + rvt_send_complete(sqp, wqe, send_status); 2896 + if (sqp->ibqp.qp_type == IB_QPT_RC) { 2897 + int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR); 2898 + 2899 + sqp->s_flags &= ~RVT_S_BUSY; 2900 + spin_unlock_irqrestore(&sqp->s_lock, flags); 2901 + if (lastwqe) { 2902 + struct ib_event ev; 2903 + 2904 + ev.device = sqp->ibqp.device; 2905 + ev.element.qp = &sqp->ibqp; 2906 + ev.event = IB_EVENT_QP_LAST_WQE_REACHED; 2907 + sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context); 2908 + } 2909 + goto done; 2910 + } 2911 + clr_busy: 2912 + sqp->s_flags &= ~RVT_S_BUSY; 2913 + unlock: 2914 + spin_unlock_irqrestore(&sqp->s_lock, flags); 2915 + done: 2916 + rcu_read_unlock(); 2917 + } 2918 + EXPORT_SYMBOL(rvt_ruc_loopback);
+2
drivers/infiniband/sw/rdmavt/qp.h
··· 66 66 const struct ib_send_wr **bad_wr); 67 67 int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, 68 68 const struct ib_recv_wr **bad_wr); 69 + int rvt_wss_init(struct rvt_dev_info *rdi); 70 + void rvt_wss_exit(struct rvt_dev_info *rdi); 69 71 #endif /* DEF_RVTQP_H */
+42
drivers/infiniband/sw/rdmavt/trace_tx.h
··· 153 153 ) 154 154 ); 155 155 156 + TRACE_EVENT( 157 + rvt_qp_send_completion, 158 + TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe, u32 idx), 159 + TP_ARGS(qp, wqe, idx), 160 + TP_STRUCT__entry( 161 + RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device)) 162 + __field(struct rvt_swqe *, wqe) 163 + __field(u64, wr_id) 164 + __field(u32, qpn) 165 + __field(u32, qpt) 166 + __field(u32, length) 167 + __field(u32, idx) 168 + __field(u32, ssn) 169 + __field(enum ib_wr_opcode, opcode) 170 + __field(int, send_flags) 171 + ), 172 + TP_fast_assign( 173 + RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)) 174 + __entry->wqe = wqe; 175 + __entry->wr_id = wqe->wr.wr_id; 176 + __entry->qpn = qp->ibqp.qp_num; 177 + __entry->qpt = qp->ibqp.qp_type; 178 + __entry->length = wqe->length; 179 + __entry->idx = idx; 180 + __entry->ssn = wqe->ssn; 181 + __entry->opcode = wqe->wr.opcode; 182 + __entry->send_flags = wqe->wr.send_flags; 183 + ), 184 + TP_printk( 185 + "[%s] qpn 0x%x qpt %u wqe %p idx %u wr_id %llx length %u ssn %u opcode %x send_flags %x", 186 + __get_str(dev), 187 + __entry->qpn, 188 + __entry->qpt, 189 + __entry->wqe, 190 + __entry->idx, 191 + __entry->wr_id, 192 + __entry->length, 193 + __entry->ssn, 194 + __entry->opcode, 195 + __entry->send_flags 196 + ) 197 + ); 156 198 #endif /* __RVT_TRACE_TX_H */ 157 199 158 200 #undef TRACE_INCLUDE_PATH
+13 -2
drivers/infiniband/sw/rdmavt/vt.c
··· 774 774 goto bail_no_mr; 775 775 } 776 776 777 + /* Memory Working Set Size */ 778 + ret = rvt_wss_init(rdi); 779 + if (ret) { 780 + rvt_pr_err(rdi, "Error in WSS init.\n"); 781 + goto bail_mr; 782 + } 783 + 777 784 /* Completion queues */ 778 785 spin_lock_init(&rdi->n_cqs_lock); 779 786 ··· 835 828 836 829 rdi->ibdev.driver_id = driver_id; 837 830 /* We are now good to announce we exist */ 838 - ret = ib_register_device(&rdi->ibdev, rdi->driver_f.port_callback); 831 + ret = ib_register_device(&rdi->ibdev, dev_name(&rdi->ibdev.dev), 832 + rdi->driver_f.port_callback); 839 833 if (ret) { 840 834 rvt_pr_err(rdi, "Failed to register driver with ib core.\n"); 841 - goto bail_mr; 835 + goto bail_wss; 842 836 } 843 837 844 838 rvt_create_mad_agents(rdi); ··· 847 839 rvt_pr_info(rdi, "Registration with rdmavt done.\n"); 848 840 return ret; 849 841 842 + bail_wss: 843 + rvt_wss_exit(rdi); 850 844 bail_mr: 851 845 rvt_mr_exit(rdi); 852 846 ··· 872 862 rvt_free_mad_agents(rdi); 873 863 874 864 ib_unregister_device(&rdi->ibdev); 865 + rvt_wss_exit(rdi); 875 866 rvt_mr_exit(rdi); 876 867 rvt_qp_exit(rdi); 877 868 }
+6 -7
drivers/infiniband/sw/rxe/rxe.c
··· 103 103 rxe->attr.max_res_rd_atom = RXE_MAX_RES_RD_ATOM; 104 104 rxe->attr.max_qp_init_rd_atom = RXE_MAX_QP_INIT_RD_ATOM; 105 105 rxe->attr.max_ee_init_rd_atom = RXE_MAX_EE_INIT_RD_ATOM; 106 - rxe->attr.atomic_cap = RXE_ATOMIC_CAP; 106 + rxe->attr.atomic_cap = IB_ATOMIC_HCA; 107 107 rxe->attr.max_ee = RXE_MAX_EE; 108 108 rxe->attr.max_rdd = RXE_MAX_RDD; 109 109 rxe->attr.max_mw = RXE_MAX_MW; ··· 128 128 /* initialize port attributes */ 129 129 static int rxe_init_port_param(struct rxe_port *port) 130 130 { 131 - port->attr.state = RXE_PORT_STATE; 132 - port->attr.max_mtu = RXE_PORT_MAX_MTU; 133 - port->attr.active_mtu = RXE_PORT_ACTIVE_MTU; 131 + port->attr.state = IB_PORT_DOWN; 132 + port->attr.max_mtu = IB_MTU_4096; 133 + port->attr.active_mtu = IB_MTU_256; 134 134 port->attr.gid_tbl_len = RXE_PORT_GID_TBL_LEN; 135 135 port->attr.port_cap_flags = RXE_PORT_PORT_CAP_FLAGS; 136 136 port->attr.max_msg_sz = RXE_PORT_MAX_MSG_SZ; ··· 147 147 port->attr.active_width = RXE_PORT_ACTIVE_WIDTH; 148 148 port->attr.active_speed = RXE_PORT_ACTIVE_SPEED; 149 149 port->attr.phys_state = RXE_PORT_PHYS_STATE; 150 - port->mtu_cap = 151 - ib_mtu_enum_to_int(RXE_PORT_ACTIVE_MTU); 150 + port->mtu_cap = ib_mtu_enum_to_int(IB_MTU_256); 152 151 port->subnet_prefix = cpu_to_be64(RXE_PORT_SUBNET_PREFIX); 153 152 154 153 return 0; ··· 299 300 mtu = eth_mtu_int_to_enum(ndev_mtu); 300 301 301 302 /* Make sure that new MTU in range */ 302 - mtu = mtu ? min_t(enum ib_mtu, mtu, RXE_PORT_MAX_MTU) : IB_MTU_256; 303 + mtu = mtu ? min_t(enum ib_mtu, mtu, IB_MTU_4096) : IB_MTU_256; 303 304 304 305 port->attr.active_mtu = mtu; 305 306 port->mtu_cap = ib_mtu_enum_to_int(mtu);
+33 -6
drivers/infiniband/sw/rxe/rxe_comp.c
··· 191 191 { 192 192 qp->comp.retry_cnt = qp->attr.retry_cnt; 193 193 qp->comp.rnr_retry = qp->attr.rnr_retry; 194 + qp->comp.started_retry = 0; 194 195 } 195 196 196 197 static inline enum comp_state check_psn(struct rxe_qp *qp, ··· 254 253 case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE: 255 254 if (pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE && 256 255 pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST) { 256 + /* read retries of partial data may restart from 257 + * read response first or response only. 258 + */ 259 + if ((pkt->psn == wqe->first_psn && 260 + pkt->opcode == 261 + IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) || 262 + (wqe->first_psn == wqe->last_psn && 263 + pkt->opcode == 264 + IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY)) 265 + break; 266 + 257 267 return COMPST_ERROR; 258 268 } 259 269 break; ··· 511 499 struct rxe_pkt_info *pkt, 512 500 struct rxe_send_wqe *wqe) 513 501 { 514 - qp->comp.opcode = -1; 515 - 516 - if (pkt) { 517 - if (psn_compare(pkt->psn, qp->comp.psn) >= 0) 518 - qp->comp.psn = (pkt->psn + 1) & BTH_PSN_MASK; 502 + if (pkt && wqe->state == wqe_state_pending) { 503 + if (psn_compare(wqe->last_psn, qp->comp.psn) >= 0) { 504 + qp->comp.psn = (wqe->last_psn + 1) & BTH_PSN_MASK; 505 + qp->comp.opcode = -1; 506 + } 519 507 520 508 if (qp->req.wait_psn) { 521 509 qp->req.wait_psn = 0; ··· 688 676 goto exit; 689 677 } 690 678 679 + /* if we've started a retry, don't start another 680 + * retry sequence, unless this is a timeout. 681 + */ 682 + if (qp->comp.started_retry && 683 + !qp->comp.timeout_retry) { 684 + if (pkt) { 685 + rxe_drop_ref(pkt->qp); 686 + kfree_skb(skb); 687 + skb = NULL; 688 + } 689 + 690 + goto done; 691 + } 692 + 691 693 if (qp->comp.retry_cnt > 0) { 692 694 if (qp->comp.retry_cnt != 7) 693 695 qp->comp.retry_cnt--; ··· 718 692 rxe_counter_inc(rxe, 719 693 RXE_CNT_COMP_RETRY); 720 694 qp->req.need_retry = 1; 695 + qp->comp.started_retry = 1; 721 696 rxe_run_task(&qp->req.task, 1); 722 697 } 723 698 ··· 728 701 skb = NULL; 729 702 } 730 703 731 - goto exit; 704 + goto done; 732 705 733 706 } else { 734 707 rxe_counter_inc(rxe, RXE_CNT_RETRY_EXCEEDED);
+2 -2
drivers/infiniband/sw/rxe/rxe_cq.c
··· 30 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 31 * SOFTWARE. 32 32 */ 33 - 33 + #include <linux/vmalloc.h> 34 34 #include "rxe.h" 35 35 #include "rxe_loc.h" 36 36 #include "rxe_queue.h" ··· 97 97 err = do_mmap_info(rxe, uresp ? &uresp->mi : NULL, context, 98 98 cq->queue->buf, cq->queue->buf_size, &cq->queue->ip); 99 99 if (err) { 100 - kvfree(cq->queue->buf); 100 + vfree(cq->queue->buf); 101 101 kfree(cq->queue); 102 102 return err; 103 103 }
+2 -3
drivers/infiniband/sw/rxe/rxe_loc.h
··· 144 144 int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb); 145 145 struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, 146 146 int paylen, struct rxe_pkt_info *pkt); 147 - int rxe_prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, 148 - struct sk_buff *skb, u32 *crc); 147 + int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb, u32 *crc); 149 148 enum rdma_link_layer rxe_link_layer(struct rxe_dev *rxe, unsigned int port_num); 150 149 const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num); 151 150 struct device *rxe_dma_device(struct rxe_dev *rxe); ··· 195 196 if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) 196 197 return qp->attr.path_mtu; 197 198 else 198 - return RXE_PORT_MAX_MTU; 199 + return IB_MTU_4096; 199 200 } 200 201 201 202 static inline int rcv_wqe_size(int max_sge)
+11 -24
drivers/infiniband/sw/rxe/rxe_mr.c
··· 573 573 struct rxe_dev *rxe = to_rdev(pd->ibpd.device); 574 574 int index = key >> 8; 575 575 576 - if (index >= RXE_MIN_MR_INDEX && index <= RXE_MAX_MR_INDEX) { 577 - mem = rxe_pool_get_index(&rxe->mr_pool, index); 578 - if (!mem) 579 - goto err1; 580 - } else { 581 - goto err1; 576 + mem = rxe_pool_get_index(&rxe->mr_pool, index); 577 + if (!mem) 578 + return NULL; 579 + 580 + if (unlikely((type == lookup_local && mem->lkey != key) || 581 + (type == lookup_remote && mem->rkey != key) || 582 + mem->pd != pd || 583 + (access && !(access & mem->access)) || 584 + mem->state != RXE_MEM_STATE_VALID)) { 585 + rxe_drop_ref(mem); 586 + mem = NULL; 582 587 } 583 588 584 - if ((type == lookup_local && mem->lkey != key) || 585 - (type == lookup_remote && mem->rkey != key)) 586 - goto err2; 587 - 588 - if (mem->pd != pd) 589 - goto err2; 590 - 591 - if (access && !(access & mem->access)) 592 - goto err2; 593 - 594 - if (mem->state != RXE_MEM_STATE_VALID) 595 - goto err2; 596 - 597 589 return mem; 598 - 599 - err2: 600 - rxe_drop_ref(mem); 601 - err1: 602 - return NULL; 603 590 } 604 591 605 592 int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem,
+17 -32
drivers/infiniband/sw/rxe/rxe_net.c
··· 72 72 73 73 spin_lock_bh(&dev_list_lock); 74 74 list_for_each_entry(rxe, &rxe_dev_list, list) { 75 - if (!strcmp(name, rxe->ib_dev.name)) { 75 + if (!strcmp(name, dev_name(&rxe->ib_dev.dev))) { 76 76 found = rxe; 77 77 break; 78 78 } ··· 182 182 183 183 #endif 184 184 185 - static struct dst_entry *rxe_find_route(struct rxe_dev *rxe, 185 + static struct dst_entry *rxe_find_route(struct net_device *ndev, 186 186 struct rxe_qp *qp, 187 187 struct rxe_av *av) 188 188 { 189 - const struct ib_gid_attr *attr; 190 189 struct dst_entry *dst = NULL; 191 - struct net_device *ndev; 192 - 193 - attr = rdma_get_gid_attr(&rxe->ib_dev, qp->attr.port_num, 194 - av->grh.sgid_index); 195 - if (IS_ERR(attr)) 196 - return NULL; 197 - ndev = attr->ndev; 198 190 199 191 if (qp_type(qp) == IB_QPT_RC) 200 192 dst = sk_dst_get(qp->sk->sk); ··· 221 229 sk_dst_set(qp->sk->sk, dst); 222 230 } 223 231 } 224 - rdma_put_gid_attr(attr); 225 232 return dst; 226 233 } 227 234 ··· 368 377 ip6h->payload_len = htons(skb->len - sizeof(*ip6h)); 369 378 } 370 379 371 - static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, 372 - struct sk_buff *skb, struct rxe_av *av) 380 + static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb, 381 + struct rxe_av *av) 373 382 { 374 383 struct rxe_qp *qp = pkt->qp; 375 384 struct dst_entry *dst; ··· 378 387 struct in_addr *saddr = &av->sgid_addr._sockaddr_in.sin_addr; 379 388 struct in_addr *daddr = &av->dgid_addr._sockaddr_in.sin_addr; 380 389 381 - dst = rxe_find_route(rxe, qp, av); 390 + dst = rxe_find_route(skb->dev, qp, av); 382 391 if (!dst) { 383 392 pr_err("Host not reachable\n"); 384 393 return -EHOSTUNREACH; ··· 387 396 if (!memcmp(saddr, daddr, sizeof(*daddr))) 388 397 pkt->mask |= RXE_LOOPBACK_MASK; 389 398 390 - prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT), 391 - htons(ROCE_V2_UDP_DPORT)); 399 + prepare_udp_hdr(skb, cpu_to_be16(qp->src_port), 400 + cpu_to_be16(ROCE_V2_UDP_DPORT)); 392 401 393 402 prepare_ipv4_hdr(dst, skb, saddr->s_addr, daddr->s_addr, IPPROTO_UDP, 394 403 av->grh.traffic_class, av->grh.hop_limit, df, xnet); ··· 397 406 return 0; 398 407 } 399 408 400 - static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, 401 - struct sk_buff *skb, struct rxe_av *av) 409 + static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb, 410 + struct rxe_av *av) 402 411 { 403 412 struct rxe_qp *qp = pkt->qp; 404 413 struct dst_entry *dst; 405 414 struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr; 406 415 struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr; 407 416 408 - dst = rxe_find_route(rxe, qp, av); 417 + dst = rxe_find_route(skb->dev, qp, av); 409 418 if (!dst) { 410 419 pr_err("Host not reachable\n"); 411 420 return -EHOSTUNREACH; ··· 414 423 if (!memcmp(saddr, daddr, sizeof(*daddr))) 415 424 pkt->mask |= RXE_LOOPBACK_MASK; 416 425 417 - prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT), 418 - htons(ROCE_V2_UDP_DPORT)); 426 + prepare_udp_hdr(skb, cpu_to_be16(qp->src_port), 427 + cpu_to_be16(ROCE_V2_UDP_DPORT)); 419 428 420 429 prepare_ipv6_hdr(dst, skb, saddr, daddr, IPPROTO_UDP, 421 430 av->grh.traffic_class, ··· 425 434 return 0; 426 435 } 427 436 428 - int rxe_prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, 429 - struct sk_buff *skb, u32 *crc) 437 + int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb, u32 *crc) 430 438 { 431 439 int err = 0; 432 440 struct rxe_av *av = rxe_get_av(pkt); 433 441 434 442 if (av->network_type == RDMA_NETWORK_IPV4) 435 - err = prepare4(rxe, pkt, skb, av); 443 + err = prepare4(pkt, skb, av); 436 444 else if (av->network_type == RDMA_NETWORK_IPV6) 437 - err = prepare6(rxe, pkt, skb, av); 445 + err = prepare6(pkt, skb, av); 438 446 439 447 *crc = rxe_icrc_hdr(pkt, skb); 440 448 ··· 489 499 void rxe_loopback(struct sk_buff *skb) 490 500 { 491 501 rxe_rcv(skb); 492 - } 493 - 494 - static inline int addr_same(struct rxe_dev *rxe, struct rxe_av *av) 495 - { 496 - return rxe->port.port_guid == av->grh.dgid.global.interface_id; 497 502 } 498 503 499 504 struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, ··· 610 625 port->attr.phys_state = IB_PHYS_STATE_LINK_UP; 611 626 612 627 rxe_port_event(rxe, IB_EVENT_PORT_ACTIVE); 613 - pr_info("set %s active\n", rxe->ib_dev.name); 628 + dev_info(&rxe->ib_dev.dev, "set active\n"); 614 629 } 615 630 616 631 /* Caller must hold net_info_lock */ ··· 623 638 port->attr.phys_state = IB_PHYS_STATE_LINK_DOWN; 624 639 625 640 rxe_port_event(rxe, IB_EVENT_PORT_ERR); 626 - pr_info("set %s down\n", rxe->ib_dev.name); 641 + dev_info(&rxe->ib_dev.dev, "set down\n"); 627 642 } 628 643 629 644 static int rxe_notify(struct notifier_block *not_blk,
-4
drivers/infiniband/sw/rxe/rxe_param.h
··· 90 90 RXE_MAX_RES_RD_ATOM = 0x3f000, 91 91 RXE_MAX_QP_INIT_RD_ATOM = 128, 92 92 RXE_MAX_EE_INIT_RD_ATOM = 0, 93 - RXE_ATOMIC_CAP = 1, 94 93 RXE_MAX_EE = 0, 95 94 RXE_MAX_RDD = 0, 96 95 RXE_MAX_MW = 0, ··· 138 139 139 140 /* default/initial rxe port parameters */ 140 141 enum rxe_port_param { 141 - RXE_PORT_STATE = IB_PORT_DOWN, 142 - RXE_PORT_MAX_MTU = IB_MTU_4096, 143 - RXE_PORT_ACTIVE_MTU = IB_MTU_256, 144 142 RXE_PORT_GID_TBL_LEN = 1024, 145 143 RXE_PORT_PORT_CAP_FLAGS = RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP, 146 144 RXE_PORT_MAX_MSG_SZ = 0x800000,
+27 -28
drivers/infiniband/sw/rxe/rxe_pool.c
··· 207 207 208 208 kref_init(&pool->ref_cnt); 209 209 210 - spin_lock_init(&pool->pool_lock); 210 + rwlock_init(&pool->pool_lock); 211 211 212 212 if (rxe_type_info[type].flags & RXE_POOL_INDEX) { 213 213 err = rxe_pool_init_index(pool, ··· 222 222 pool->key_size = rxe_type_info[type].key_size; 223 223 } 224 224 225 - pool->state = rxe_pool_valid; 225 + pool->state = RXE_POOL_STATE_VALID; 226 226 227 227 out: 228 228 return err; ··· 232 232 { 233 233 struct rxe_pool *pool = container_of(kref, struct rxe_pool, ref_cnt); 234 234 235 - pool->state = rxe_pool_invalid; 235 + pool->state = RXE_POOL_STATE_INVALID; 236 236 kfree(pool->table); 237 237 } 238 238 ··· 245 245 { 246 246 unsigned long flags; 247 247 248 - spin_lock_irqsave(&pool->pool_lock, flags); 249 - pool->state = rxe_pool_invalid; 248 + write_lock_irqsave(&pool->pool_lock, flags); 249 + pool->state = RXE_POOL_STATE_INVALID; 250 250 if (atomic_read(&pool->num_elem) > 0) 251 251 pr_warn("%s pool destroyed with unfree'd elem\n", 252 252 pool_name(pool)); 253 - spin_unlock_irqrestore(&pool->pool_lock, flags); 253 + write_unlock_irqrestore(&pool->pool_lock, flags); 254 254 255 255 rxe_pool_put(pool); 256 256 ··· 336 336 struct rxe_pool *pool = elem->pool; 337 337 unsigned long flags; 338 338 339 - spin_lock_irqsave(&pool->pool_lock, flags); 339 + write_lock_irqsave(&pool->pool_lock, flags); 340 340 memcpy((u8 *)elem + pool->key_offset, key, pool->key_size); 341 341 insert_key(pool, elem); 342 - spin_unlock_irqrestore(&pool->pool_lock, flags); 342 + write_unlock_irqrestore(&pool->pool_lock, flags); 343 343 } 344 344 345 345 void rxe_drop_key(void *arg) ··· 348 348 struct rxe_pool *pool = elem->pool; 349 349 unsigned long flags; 350 350 351 - spin_lock_irqsave(&pool->pool_lock, flags); 351 + write_lock_irqsave(&pool->pool_lock, flags); 352 352 rb_erase(&elem->node, &pool->tree); 353 - spin_unlock_irqrestore(&pool->pool_lock, flags); 353 + write_unlock_irqrestore(&pool->pool_lock, flags); 354 354 } 355 355 356 356 void rxe_add_index(void *arg) ··· 359 359 struct rxe_pool *pool = elem->pool; 360 360 unsigned long flags; 361 361 362 - spin_lock_irqsave(&pool->pool_lock, flags); 362 + write_lock_irqsave(&pool->pool_lock, flags); 363 363 elem->index = alloc_index(pool); 364 364 insert_index(pool, elem); 365 - spin_unlock_irqrestore(&pool->pool_lock, flags); 365 + write_unlock_irqrestore(&pool->pool_lock, flags); 366 366 } 367 367 368 368 void rxe_drop_index(void *arg) ··· 371 371 struct rxe_pool *pool = elem->pool; 372 372 unsigned long flags; 373 373 374 - spin_lock_irqsave(&pool->pool_lock, flags); 374 + write_lock_irqsave(&pool->pool_lock, flags); 375 375 clear_bit(elem->index - pool->min_index, pool->table); 376 376 rb_erase(&elem->node, &pool->tree); 377 - spin_unlock_irqrestore(&pool->pool_lock, flags); 377 + write_unlock_irqrestore(&pool->pool_lock, flags); 378 378 } 379 379 380 380 void *rxe_alloc(struct rxe_pool *pool) ··· 384 384 385 385 might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC)); 386 386 387 - spin_lock_irqsave(&pool->pool_lock, flags); 388 - if (pool->state != rxe_pool_valid) { 389 - spin_unlock_irqrestore(&pool->pool_lock, flags); 387 + read_lock_irqsave(&pool->pool_lock, flags); 388 + if (pool->state != RXE_POOL_STATE_VALID) { 389 + read_unlock_irqrestore(&pool->pool_lock, flags); 390 390 return NULL; 391 391 } 392 392 kref_get(&pool->ref_cnt); 393 - spin_unlock_irqrestore(&pool->pool_lock, flags); 393 + read_unlock_irqrestore(&pool->pool_lock, flags); 394 394 395 395 kref_get(&pool->rxe->ref_cnt); 396 396 ··· 436 436 struct rxe_pool_entry *elem = NULL; 437 437 unsigned long flags; 438 438 439 - spin_lock_irqsave(&pool->pool_lock, flags); 439 + read_lock_irqsave(&pool->pool_lock, flags); 440 440 441 - if (pool->state != rxe_pool_valid) 441 + if (pool->state != RXE_POOL_STATE_VALID) 442 442 goto out; 443 443 444 444 node = pool->tree.rb_node; ··· 450 450 node = node->rb_left; 451 451 else if (elem->index < index) 452 452 node = node->rb_right; 453 - else 453 + else { 454 + kref_get(&elem->ref_cnt); 454 455 break; 456 + } 455 457 } 456 458 457 - if (node) 458 - kref_get(&elem->ref_cnt); 459 - 460 459 out: 461 - spin_unlock_irqrestore(&pool->pool_lock, flags); 460 + read_unlock_irqrestore(&pool->pool_lock, flags); 462 461 return node ? elem : NULL; 463 462 } 464 463 ··· 468 469 int cmp; 469 470 unsigned long flags; 470 471 471 - spin_lock_irqsave(&pool->pool_lock, flags); 472 + read_lock_irqsave(&pool->pool_lock, flags); 472 473 473 - if (pool->state != rxe_pool_valid) 474 + if (pool->state != RXE_POOL_STATE_VALID) 474 475 goto out; 475 476 476 477 node = pool->tree.rb_node; ··· 493 494 kref_get(&elem->ref_cnt); 494 495 495 496 out: 496 - spin_unlock_irqrestore(&pool->pool_lock, flags); 497 + read_unlock_irqrestore(&pool->pool_lock, flags); 497 498 return node ? elem : NULL; 498 499 }
+3 -3
drivers/infiniband/sw/rxe/rxe_pool.h
··· 74 74 extern struct rxe_type_info rxe_type_info[]; 75 75 76 76 enum rxe_pool_state { 77 - rxe_pool_invalid, 78 - rxe_pool_valid, 77 + RXE_POOL_STATE_INVALID, 78 + RXE_POOL_STATE_VALID, 79 79 }; 80 80 81 81 struct rxe_pool_entry { ··· 90 90 91 91 struct rxe_pool { 92 92 struct rxe_dev *rxe; 93 - spinlock_t pool_lock; /* pool spinlock */ 93 + rwlock_t pool_lock; /* protects pool add/del/search */ 94 94 size_t elem_size; 95 95 struct kref ref_cnt; 96 96 void (*cleanup)(struct rxe_pool_entry *obj);
+14 -4
drivers/infiniband/sw/rxe/rxe_qp.c
··· 34 34 #include <linux/skbuff.h> 35 35 #include <linux/delay.h> 36 36 #include <linux/sched.h> 37 + #include <linux/vmalloc.h> 37 38 38 39 #include "rxe.h" 39 40 #include "rxe_loc.h" ··· 228 227 return err; 229 228 qp->sk->sk->sk_user_data = qp; 230 229 230 + /* pick a source UDP port number for this QP based on 231 + * the source QPN. this spreads traffic for different QPs 232 + * across different NIC RX queues (while using a single 233 + * flow for a given QP to maintain packet order). 234 + * the port number must be in the Dynamic Ports range 235 + * (0xc000 - 0xffff). 236 + */ 237 + qp->src_port = RXE_ROCE_V2_SPORT + 238 + (hash_32_generic(qp_num(qp), 14) & 0x3fff); 239 + 231 240 qp->sq.max_wr = init->cap.max_send_wr; 232 241 qp->sq.max_sge = init->cap.max_send_sge; 233 242 qp->sq.max_inline = init->cap.max_inline_data; ··· 258 247 &qp->sq.queue->ip); 259 248 260 249 if (err) { 261 - kvfree(qp->sq.queue->buf); 250 + vfree(qp->sq.queue->buf); 262 251 kfree(qp->sq.queue); 263 252 return err; 264 253 } ··· 311 300 qp->rq.queue->buf, qp->rq.queue->buf_size, 312 301 &qp->rq.queue->ip); 313 302 if (err) { 314 - kvfree(qp->rq.queue->buf); 303 + vfree(qp->rq.queue->buf); 315 304 kfree(qp->rq.queue); 316 305 return err; 317 306 } ··· 419 408 enum ib_qp_state new_state = (mask & IB_QP_STATE) ? 420 409 attr->qp_state : cur_state; 421 410 422 - if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask, 423 - IB_LINK_LAYER_ETHERNET)) { 411 + if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask)) { 424 412 pr_warn("invalid mask or state for qp\n"); 425 413 goto err1; 426 414 }
+2 -2
drivers/infiniband/sw/rxe/rxe_recv.c
··· 122 122 set_bad_pkey_cntr(port); 123 123 goto err1; 124 124 } 125 - } else if (qpn != 0) { 125 + } else { 126 126 if (unlikely(!pkey_match(pkey, 127 127 port->pkey_tbl[qp->attr.pkey_index] 128 128 ))) { ··· 134 134 } 135 135 136 136 if ((qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_GSI) && 137 - qpn != 0 && pkt->mask) { 137 + pkt->mask) { 138 138 u32 qkey = (qpn == 1) ? GSI_QKEY : qp->attr.qkey; 139 139 140 140 if (unlikely(deth_qkey(pkt) != qkey)) {
+10 -7
drivers/infiniband/sw/rxe/rxe_req.c
··· 73 73 int npsn; 74 74 int first = 1; 75 75 76 - wqe = queue_head(qp->sq.queue); 77 - npsn = (qp->comp.psn - wqe->first_psn) & BTH_PSN_MASK; 78 - 79 76 qp->req.wqe_index = consumer_index(qp->sq.queue); 80 77 qp->req.psn = qp->comp.psn; 81 78 qp->req.opcode = -1; ··· 104 107 if (first) { 105 108 first = 0; 106 109 107 - if (mask & WR_WRITE_OR_SEND_MASK) 110 + if (mask & WR_WRITE_OR_SEND_MASK) { 111 + npsn = (qp->comp.psn - wqe->first_psn) & 112 + BTH_PSN_MASK; 108 113 retry_first_write_send(qp, wqe, mask, npsn); 114 + } 109 115 110 - if (mask & WR_READ_MASK) 116 + if (mask & WR_READ_MASK) { 117 + npsn = (wqe->dma.length - wqe->dma.resid) / 118 + qp->mtu; 111 119 wqe->iova += npsn * qp->mtu; 120 + } 112 121 } 113 122 114 123 wqe->state = wqe_state_posted; ··· 438 435 if (pkt->mask & RXE_RETH_MASK) { 439 436 reth_set_rkey(pkt, ibwr->wr.rdma.rkey); 440 437 reth_set_va(pkt, wqe->iova); 441 - reth_set_len(pkt, wqe->dma.length); 438 + reth_set_len(pkt, wqe->dma.resid); 442 439 } 443 440 444 441 if (pkt->mask & RXE_IMMDT_MASK) ··· 479 476 u32 *p; 480 477 int err; 481 478 482 - err = rxe_prepare(rxe, pkt, skb, &crc); 479 + err = rxe_prepare(pkt, skb, &crc); 483 480 if (err) 484 481 return err; 485 482
+7 -3
drivers/infiniband/sw/rxe/rxe_resp.c
··· 637 637 if (ack->mask & RXE_ATMACK_MASK) 638 638 atmack_set_orig(ack, qp->resp.atomic_orig); 639 639 640 - err = rxe_prepare(rxe, ack, skb, &crc); 640 + err = rxe_prepare(ack, skb, &crc); 641 641 if (err) { 642 642 kfree_skb(skb); 643 643 return NULL; ··· 682 682 rxe_advance_resp_resource(qp); 683 683 684 684 res->type = RXE_READ_MASK; 685 + res->replay = 0; 685 686 686 687 res->read.va = qp->resp.va; 687 688 res->read.va_org = qp->resp.va; ··· 753 752 state = RESPST_DONE; 754 753 } else { 755 754 qp->resp.res = NULL; 756 - qp->resp.opcode = -1; 755 + if (!res->replay) 756 + qp->resp.opcode = -1; 757 757 if (psn_compare(res->cur_psn, qp->resp.psn) >= 0) 758 758 qp->resp.psn = res->cur_psn; 759 759 state = RESPST_CLEANUP; ··· 816 814 817 815 /* next expected psn, read handles this separately */ 818 816 qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; 817 + qp->resp.ack_psn = qp->resp.psn; 819 818 820 819 qp->resp.opcode = pkt->opcode; 821 820 qp->resp.status = IB_WC_SUCCESS; ··· 1068 1065 struct rxe_pkt_info *pkt) 1069 1066 { 1070 1067 enum resp_states rc; 1071 - u32 prev_psn = (qp->resp.psn - 1) & BTH_PSN_MASK; 1068 + u32 prev_psn = (qp->resp.ack_psn - 1) & BTH_PSN_MASK; 1072 1069 1073 1070 if (pkt->mask & RXE_SEND_MASK || 1074 1071 pkt->mask & RXE_WRITE_MASK) { ··· 1111 1108 res->state = (pkt->psn == res->first_psn) ? 1112 1109 rdatm_res_state_new : 1113 1110 rdatm_res_state_replay; 1111 + res->replay = 1; 1114 1112 1115 1113 /* Reset the resource, except length. */ 1116 1114 res->read.va_org = iova;
+8 -2
drivers/infiniband/sw/rxe/rxe_srq.c
··· 31 31 * SOFTWARE. 32 32 */ 33 33 34 + #include <linux/vmalloc.h> 34 35 #include "rxe.h" 35 36 #include "rxe_loc.h" 36 37 #include "rxe_queue.h" ··· 130 129 131 130 err = do_mmap_info(rxe, uresp ? &uresp->mi : NULL, context, q->buf, 132 131 q->buf_size, &q->ip); 133 - if (err) 132 + if (err) { 133 + vfree(q->buf); 134 + kfree(q); 134 135 return err; 136 + } 135 137 136 138 if (uresp) { 137 139 if (copy_to_user(&uresp->srq_num, &srq->srq_num, 138 - sizeof(uresp->srq_num))) 140 + sizeof(uresp->srq_num))) { 141 + rxe_queue_cleanup(q); 139 142 return -EFAULT; 143 + } 140 144 } 141 145 142 146 return 0;
+1 -1
drivers/infiniband/sw/rxe/rxe_sysfs.c
··· 105 105 } 106 106 107 107 rxe_set_port_state(ndev); 108 - pr_info("added %s to %s\n", rxe->ib_dev.name, intf); 108 + dev_info(&rxe->ib_dev.dev, "added %s\n", intf); 109 109 err: 110 110 if (ndev) 111 111 dev_put(ndev);
+9 -20
drivers/infiniband/sw/rxe/rxe_verbs.c
··· 1148 1148 1149 1149 static DEVICE_ATTR_RO(parent); 1150 1150 1151 - static struct device_attribute *rxe_dev_attributes[] = { 1152 - &dev_attr_parent, 1151 + static struct attribute *rxe_dev_attributes[] = { 1152 + &dev_attr_parent.attr, 1153 + NULL 1154 + }; 1155 + 1156 + static const struct attribute_group rxe_attr_group = { 1157 + .attrs = rxe_dev_attributes, 1153 1158 }; 1154 1159 1155 1160 int rxe_register_device(struct rxe_dev *rxe) 1156 1161 { 1157 1162 int err; 1158 - int i; 1159 1163 struct ib_device *dev = &rxe->ib_dev; 1160 1164 struct crypto_shash *tfm; 1161 1165 1162 - strlcpy(dev->name, "rxe%d", IB_DEVICE_NAME_MAX); 1163 1166 strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc)); 1164 1167 1165 1168 dev->owner = THIS_MODULE; ··· 1263 1260 } 1264 1261 rxe->tfm = tfm; 1265 1262 1263 + rdma_set_device_sysfs_group(dev, &rxe_attr_group); 1266 1264 dev->driver_id = RDMA_DRIVER_RXE; 1267 - err = ib_register_device(dev, NULL); 1265 + err = ib_register_device(dev, "rxe%d", NULL); 1268 1266 if (err) { 1269 1267 pr_warn("%s failed with error %d\n", __func__, err); 1270 1268 goto err1; 1271 1269 } 1272 1270 1273 - for (i = 0; i < ARRAY_SIZE(rxe_dev_attributes); ++i) { 1274 - err = device_create_file(&dev->dev, rxe_dev_attributes[i]); 1275 - if (err) { 1276 - pr_warn("%s failed with error %d for attr number %d\n", 1277 - __func__, err, i); 1278 - goto err2; 1279 - } 1280 - } 1281 - 1282 1271 return 0; 1283 1272 1284 - err2: 1285 - ib_unregister_device(dev); 1286 1273 err1: 1287 1274 crypto_free_shash(rxe->tfm); 1288 1275 ··· 1281 1288 1282 1289 int rxe_unregister_device(struct rxe_dev *rxe) 1283 1290 { 1284 - int i; 1285 1291 struct ib_device *dev = &rxe->ib_dev; 1286 - 1287 - for (i = 0; i < ARRAY_SIZE(rxe_dev_attributes); ++i) 1288 - device_remove_file(&dev->dev, rxe_dev_attributes[i]); 1289 1292 1290 1293 ib_unregister_device(dev); 1291 1294
+4
drivers/infiniband/sw/rxe/rxe_verbs.h
··· 158 158 int opcode; 159 159 int timeout; 160 160 int timeout_retry; 161 + int started_retry; 161 162 u32 retry_cnt; 162 163 u32 rnr_retry; 163 164 struct rxe_task task; ··· 172 171 173 172 struct resp_res { 174 173 int type; 174 + int replay; 175 175 u32 first_psn; 176 176 u32 last_psn; 177 177 u32 cur_psn; ··· 197 195 enum rxe_qp_state state; 198 196 u32 msn; 199 197 u32 psn; 198 + u32 ack_psn; 200 199 int opcode; 201 200 int drop_msg; 202 201 int goto_error; ··· 251 248 252 249 struct socket *sk; 253 250 u32 dst_cookie; 251 + u16 src_port; 254 252 255 253 struct rxe_av pri_av; 256 254 struct rxe_av alt_av;
+6 -2
drivers/infiniband/ulp/ipoib/ipoib_cm.c
··· 1438 1438 spin_unlock_irqrestore(&priv->lock, flags); 1439 1439 netif_tx_unlock_bh(dev); 1440 1440 1441 - if (skb->protocol == htons(ETH_P_IP)) 1441 + if (skb->protocol == htons(ETH_P_IP)) { 1442 + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 1442 1443 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 1444 + } 1443 1445 #if IS_ENABLED(CONFIG_IPV6) 1444 - else if (skb->protocol == htons(ETH_P_IPV6)) 1446 + else if (skb->protocol == htons(ETH_P_IPV6)) { 1447 + memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); 1445 1448 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 1449 + } 1446 1450 #endif 1447 1451 dev_kfree_skb_any(skb); 1448 1452
+35 -1
drivers/infiniband/ulp/ipoib/ipoib_main.c
··· 243 243 return 0; 244 244 } 245 245 246 - if (new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu)) 246 + if (new_mtu < (ETH_MIN_MTU + IPOIB_ENCAP_LEN) || 247 + new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu)) 247 248 return -EINVAL; 248 249 249 250 priv->admin_mtu = new_mtu; ··· 1881 1880 sizeof(union ib_gid)); 1882 1881 1883 1882 SET_NETDEV_DEV(priv->dev, priv->ca->dev.parent); 1883 + priv->dev->dev_port = priv->port - 1; 1884 + /* Let's set this one too for backwards compatibility. */ 1884 1885 priv->dev->dev_id = priv->port - 1; 1885 1886 1886 1887 return 0; ··· 2388 2385 return device_create_file(&dev->dev, &dev_attr_pkey); 2389 2386 } 2390 2387 2388 + /* 2389 + * We erroneously exposed the iface's port number in the dev_id 2390 + * sysfs field long after dev_port was introduced for that purpose[1], 2391 + * and we need to stop everyone from relying on that. 2392 + * Let's overload the shower routine for the dev_id file here 2393 + * to gently bring the issue up. 2394 + * 2395 + * [1] https://www.spinics.net/lists/netdev/msg272123.html 2396 + */ 2397 + static ssize_t dev_id_show(struct device *dev, 2398 + struct device_attribute *attr, char *buf) 2399 + { 2400 + struct net_device *ndev = to_net_dev(dev); 2401 + 2402 + if (ndev->dev_id == ndev->dev_port) 2403 + netdev_info_once(ndev, 2404 + "\"%s\" wants to know my dev_id. Should it look at dev_port instead? See Documentation/ABI/testing/sysfs-class-net for more info.\n", 2405 + current->comm); 2406 + 2407 + return sprintf(buf, "%#x\n", ndev->dev_id); 2408 + } 2409 + static DEVICE_ATTR_RO(dev_id); 2410 + 2411 + int ipoib_intercept_dev_id_attr(struct net_device *dev) 2412 + { 2413 + device_remove_file(&dev->dev, &dev_attr_dev_id); 2414 + return device_create_file(&dev->dev, &dev_attr_dev_id); 2415 + } 2416 + 2391 2417 static struct net_device *ipoib_add_port(const char *format, 2392 2418 struct ib_device *hca, u8 port) 2393 2419 { ··· 2469 2437 */ 2470 2438 ndev->priv_destructor = ipoib_intf_free; 2471 2439 2440 + if (ipoib_intercept_dev_id_attr(ndev)) 2441 + goto sysfs_failed; 2472 2442 if (ipoib_cm_add_mode_attr(ndev)) 2473 2443 goto sysfs_failed; 2474 2444 if (ipoib_add_pkey_attr(ndev))
+1 -1
drivers/infiniband/ulp/ipoib/ipoib_verbs.c
··· 277 277 return; 278 278 279 279 ipoib_dbg(priv, "Event %d on device %s port %d\n", record->event, 280 - record->device->name, record->element.port_num); 280 + dev_name(&record->device->dev), record->element.port_num); 281 281 282 282 if (record->event == IB_EVENT_SM_CHANGE || 283 283 record->event == IB_EVENT_CLIENT_REREGISTER) {
+13 -5
drivers/infiniband/ulp/iser/iser_initiator.c
··· 589 589 ib_conn->post_recv_buf_count--; 590 590 } 591 591 592 - static inline void 592 + static inline int 593 593 iser_inv_desc(struct iser_fr_desc *desc, u32 rkey) 594 594 { 595 - if (likely(rkey == desc->rsc.mr->rkey)) 595 + if (likely(rkey == desc->rsc.mr->rkey)) { 596 596 desc->rsc.mr_valid = 0; 597 - else if (likely(rkey == desc->pi_ctx->sig_mr->rkey)) 597 + } else if (likely(desc->pi_ctx && rkey == desc->pi_ctx->sig_mr->rkey)) { 598 598 desc->pi_ctx->sig_mr_valid = 0; 599 + } else { 600 + iser_err("Bogus remote invalidation for rkey %#x\n", rkey); 601 + return -EINVAL; 602 + } 603 + 604 + return 0; 599 605 } 600 606 601 607 static int ··· 629 623 630 624 if (iser_task->dir[ISER_DIR_IN]) { 631 625 desc = iser_task->rdma_reg[ISER_DIR_IN].mem_h; 632 - iser_inv_desc(desc, rkey); 626 + if (unlikely(iser_inv_desc(desc, rkey))) 627 + return -EINVAL; 633 628 } 634 629 635 630 if (iser_task->dir[ISER_DIR_OUT]) { 636 631 desc = iser_task->rdma_reg[ISER_DIR_OUT].mem_h; 637 - iser_inv_desc(desc, rkey); 632 + if (unlikely(iser_inv_desc(desc, rkey))) 633 + return -EINVAL; 638 634 } 639 635 } else { 640 636 iser_err("failed to get task for itt=%d\n", hdr->itt);
+5 -4
drivers/infiniband/ulp/iser/iser_verbs.c
··· 55 55 { 56 56 iser_err("async event %s (%d) on device %s port %d\n", 57 57 ib_event_msg(event->event), event->event, 58 - event->device->name, event->element.port_num); 58 + dev_name(&event->device->dev), event->element.port_num); 59 59 } 60 60 61 61 /** ··· 85 85 max_cqe = min(ISER_MAX_CQ_LEN, ib_dev->attrs.max_cqe); 86 86 87 87 iser_info("using %d CQs, device %s supports %d vectors max_cqe %d\n", 88 - device->comps_used, ib_dev->name, 88 + device->comps_used, dev_name(&ib_dev->dev), 89 89 ib_dev->num_comp_vectors, max_cqe); 90 90 91 91 device->pd = ib_alloc_pd(ib_dev, ··· 468 468 iser_conn->max_cmds = 469 469 ISER_GET_MAX_XMIT_CMDS(ib_dev->attrs.max_qp_wr); 470 470 iser_dbg("device %s supports max_send_wr %d\n", 471 - device->ib_device->name, ib_dev->attrs.max_qp_wr); 471 + dev_name(&device->ib_device->dev), 472 + ib_dev->attrs.max_qp_wr); 472 473 } 473 474 } 474 475 ··· 765 764 IB_DEVICE_SIGNATURE_HANDOVER)) { 766 765 iser_warn("T10-PI requested but not supported on %s, " 767 766 "continue without T10-PI\n", 768 - ib_conn->device->ib_device->name); 767 + dev_name(&ib_conn->device->ib_device->dev)); 769 768 ib_conn->pi_support = false; 770 769 } else { 771 770 ib_conn->pi_support = true;
+1 -1
drivers/infiniband/ulp/isert/ib_isert.c
··· 262 262 263 263 isert_info("Using %d CQs, %s supports %d vectors support " 264 264 "pi_capable %d\n", 265 - device->comps_used, device->ib_device->name, 265 + device->comps_used, dev_name(&device->ib_device->dev), 266 266 device->ib_device->num_comp_vectors, 267 267 device->pi_capable); 268 268
+2 -1
drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
··· 351 351 if (unlikely(!dlid)) 352 352 v_warn("Null dlid in MAC address\n"); 353 353 } else if (def_port != OPA_VNIC_INVALID_PORT) { 354 - dlid = info->vesw.u_ucast_dlid[def_port]; 354 + if (def_port < OPA_VESW_MAX_NUM_DEF_PORT) 355 + dlid = info->vesw.u_ucast_dlid[def_port]; 355 356 } 356 357 } 357 358
+2 -1
drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
··· 888 888 return; 889 889 890 890 c_dbg("OPA_VNIC received event %d on device %s port %d\n", 891 - record->event, record->device->name, record->element.port_num); 891 + record->event, dev_name(&record->device->dev), 892 + record->element.port_num); 892 893 893 894 if (record->event == IB_EVENT_PORT_ERR) 894 895 idr_for_each(&port->vport_idr, vema_disable_vport, NULL);
+6 -13
drivers/infiniband/ulp/srp/ib_srp.c
··· 1330 1330 { 1331 1331 struct srp_target_port *target = rport->lld_data; 1332 1332 struct srp_rdma_ch *ch; 1333 - struct Scsi_Host *shost = target->scsi_host; 1334 - struct scsi_device *sdev; 1335 1333 int i, j; 1336 - 1337 - /* 1338 - * Invoking srp_terminate_io() while srp_queuecommand() is running 1339 - * is not safe. Hence the warning statement below. 1340 - */ 1341 - shost_for_each_device(sdev, shost) 1342 - WARN_ON_ONCE(sdev->request_queue->request_fn_active); 1343 1334 1344 1335 for (i = 0; i < target->ch_count; i++) { 1345 1336 ch = &target->ch[i]; ··· 3115 3124 { 3116 3125 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3117 3126 3118 - return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name); 3127 + return sprintf(buf, "%s\n", 3128 + dev_name(&target->srp_host->srp_dev->dev->dev)); 3119 3129 } 3120 3130 3121 3131 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr, ··· 3979 3987 { 3980 3988 struct srp_host *host = container_of(dev, struct srp_host, dev); 3981 3989 3982 - return sprintf(buf, "%s\n", host->srp_dev->dev->name); 3990 + return sprintf(buf, "%s\n", dev_name(&host->srp_dev->dev->dev)); 3983 3991 } 3984 3992 3985 3993 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); ··· 4011 4019 4012 4020 host->dev.class = &srp_class; 4013 4021 host->dev.parent = device->dev->dev.parent; 4014 - dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port); 4022 + dev_set_name(&host->dev, "srp-%s-%d", dev_name(&device->dev->dev), 4023 + port); 4015 4024 4016 4025 if (device_register(&host->dev)) 4017 4026 goto free_host; ··· 4088 4095 srp_dev->mr_max_size = srp_dev->mr_page_size * 4089 4096 srp_dev->max_pages_per_mr; 4090 4097 pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n", 4091 - device->name, mr_page_shift, attr->max_mr_size, 4098 + dev_name(&device->dev), mr_page_shift, attr->max_mr_size, 4092 4099 attr->max_fast_reg_page_list_len, 4093 4100 srp_dev->max_pages_per_mr, srp_dev->mr_max_size); 4094 4101
+15 -13
drivers/infiniband/ulp/srpt/ib_srpt.c
··· 148 148 return; 149 149 150 150 pr_debug("ASYNC event= %d on device= %s\n", event->event, 151 - sdev->device->name); 151 + dev_name(&sdev->device->dev)); 152 152 153 153 switch (event->event) { 154 154 case IB_EVENT_PORT_ERR: ··· 1941 1941 if (srpt_disconnect_ch(ch) >= 0) 1942 1942 pr_info("Closing channel %s because target %s_%d has been disabled\n", 1943 1943 ch->sess_name, 1944 - sport->sdev->device->name, sport->port); 1944 + dev_name(&sport->sdev->device->dev), 1945 + sport->port); 1945 1946 srpt_close_ch(ch); 1946 1947 } 1947 1948 } ··· 2128 2127 if (!sport->enabled) { 2129 2128 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); 2130 2129 pr_info("rejected SRP_LOGIN_REQ because target port %s_%d has not yet been enabled\n", 2131 - sport->sdev->device->name, port_num); 2130 + dev_name(&sport->sdev->device->dev), port_num); 2132 2131 goto reject; 2133 2132 } 2134 2133 ··· 2268 2267 rej->reason = cpu_to_be32( 2269 2268 SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); 2270 2269 pr_info("rejected SRP_LOGIN_REQ because target %s_%d is not enabled\n", 2271 - sdev->device->name, port_num); 2270 + dev_name(&sdev->device->dev), port_num); 2272 2271 mutex_unlock(&sport->mutex); 2273 2272 goto reject; 2274 2273 } ··· 2709 2708 break; 2710 2709 } 2711 2710 2712 - if (unlikely(WARN_ON_ONCE(state == SRPT_STATE_CMD_RSP_SENT))) 2711 + if (WARN_ON_ONCE(state == SRPT_STATE_CMD_RSP_SENT)) 2713 2712 return; 2714 2713 2715 2714 /* For read commands, transfer the data to the initiator. */ ··· 2843 2842 while (wait_event_timeout(sport->ch_releaseQ, 2844 2843 srpt_ch_list_empty(sport), 5 * HZ) <= 0) { 2845 2844 pr_info("%s_%d: waiting for session unregistration ...\n", 2846 - sport->sdev->device->name, sport->port); 2845 + dev_name(&sport->sdev->device->dev), sport->port); 2847 2846 rcu_read_lock(); 2848 2847 list_for_each_entry(nexus, &sport->nexus_list, entry) { 2849 2848 list_for_each_entry(ch, &nexus->ch_list, list) { ··· 2933 2932 } 2934 2933 2935 2934 pr_debug("create SRQ #wr= %d max_allow=%d dev= %s\n", sdev->srq_size, 2936 - sdev->device->attrs.max_srq_wr, device->name); 2935 + sdev->device->attrs.max_srq_wr, dev_name(&device->dev)); 2937 2936 2938 2937 sdev->ioctx_ring = (struct srpt_recv_ioctx **) 2939 2938 srpt_alloc_ioctx_ring(sdev, sdev->srq_size, ··· 2966 2965 } else if (use_srq && !sdev->srq) { 2967 2966 ret = srpt_alloc_srq(sdev); 2968 2967 } 2969 - pr_debug("%s(%s): use_srq = %d; ret = %d\n", __func__, device->name, 2970 - sdev->use_srq, ret); 2968 + pr_debug("%s(%s): use_srq = %d; ret = %d\n", __func__, 2969 + dev_name(&device->dev), sdev->use_srq, ret); 2971 2970 return ret; 2972 2971 } 2973 2972 ··· 3053 3052 3054 3053 if (srpt_refresh_port(sport)) { 3055 3054 pr_err("MAD registration failed for %s-%d.\n", 3056 - sdev->device->name, i); 3055 + dev_name(&sdev->device->dev), i); 3057 3056 goto err_event; 3058 3057 } 3059 3058 } ··· 3064 3063 3065 3064 out: 3066 3065 ib_set_client_data(device, &srpt_client, sdev); 3067 - pr_debug("added %s.\n", device->name); 3066 + pr_debug("added %s.\n", dev_name(&device->dev)); 3068 3067 return; 3069 3068 3070 3069 err_event: ··· 3079 3078 kfree(sdev); 3080 3079 err: 3081 3080 sdev = NULL; 3082 - pr_info("%s(%s) failed.\n", __func__, device->name); 3081 + pr_info("%s(%s) failed.\n", __func__, dev_name(&device->dev)); 3083 3082 goto out; 3084 3083 } 3085 3084 ··· 3094 3093 int i; 3095 3094 3096 3095 if (!sdev) { 3097 - pr_info("%s(%s): nothing to do.\n", __func__, device->name); 3096 + pr_info("%s(%s): nothing to do.\n", __func__, 3097 + dev_name(&device->dev)); 3098 3098 return; 3099 3099 } 3100 3100
+11 -12
include/linux/mlx5/driver.h
··· 97 97 }; 98 98 99 99 enum { 100 - MLX5_ATOMIC_MODE_IB_COMP = 1 << 16, 101 - MLX5_ATOMIC_MODE_CX = 2 << 16, 102 - MLX5_ATOMIC_MODE_8B = 3 << 16, 103 - MLX5_ATOMIC_MODE_16B = 4 << 16, 104 - MLX5_ATOMIC_MODE_32B = 5 << 16, 105 - MLX5_ATOMIC_MODE_64B = 6 << 16, 106 - MLX5_ATOMIC_MODE_128B = 7 << 16, 107 - MLX5_ATOMIC_MODE_256B = 8 << 16, 100 + MLX5_ATOMIC_MODE_OFFSET = 16, 101 + MLX5_ATOMIC_MODE_IB_COMP = 1, 102 + MLX5_ATOMIC_MODE_CX = 2, 103 + MLX5_ATOMIC_MODE_8B = 3, 104 + MLX5_ATOMIC_MODE_16B = 4, 105 + MLX5_ATOMIC_MODE_32B = 5, 106 + MLX5_ATOMIC_MODE_64B = 6, 107 + MLX5_ATOMIC_MODE_128B = 7, 108 + MLX5_ATOMIC_MODE_256B = 8, 108 109 }; 109 110 110 111 enum { ··· 164 163 MLX5E_DCBX_PARAM_VER_OPER_AUTO = 0x3, 165 164 }; 166 165 167 - enum mlx5_dct_atomic_mode { 168 - MLX5_ATOMIC_MODE_DCT_CX = 2, 169 - }; 170 - 171 166 enum { 172 167 MLX5_ATOMIC_OPS_CMP_SWAP = 1 << 0, 173 168 MLX5_ATOMIC_OPS_FETCH_ADD = 1 << 1, 169 + MLX5_ATOMIC_OPS_EXTENDED_CMP_SWAP = 1 << 2, 170 + MLX5_ATOMIC_OPS_EXTENDED_FETCH_ADD = 1 << 3, 174 171 }; 175 172 176 173 enum mlx5_page_fault_resume_flags {
+1 -10
include/linux/qed/qed_rdma_if.h
··· 39 39 #include <linux/qed/qed_ll2_if.h> 40 40 #include <linux/qed/rdma_common.h> 41 41 42 - enum qed_roce_ll2_tx_dest { 43 - /* Light L2 TX Destination to the Network */ 44 - QED_ROCE_LL2_TX_DEST_NW, 45 - 46 - /* Light L2 TX Destination to the Loopback */ 47 - QED_ROCE_LL2_TX_DEST_LB, 48 - QED_ROCE_LL2_TX_DEST_MAX 49 - }; 50 - 51 42 #define QED_RDMA_MAX_CNQ_SIZE (0xFFFF) 52 43 53 44 /* rdma interface */ ··· 572 581 int n_seg; 573 582 struct qed_roce_ll2_buffer payload[RDMA_MAX_SGE_PER_SQ_WQE]; 574 583 int roce_mode; 575 - enum qed_roce_ll2_tx_dest tx_dest; 584 + enum qed_ll2_tx_dest tx_dest; 576 585 }; 577 586 578 587 enum qed_rdma_type {
+4 -7
include/rdma/ib_addr.h
··· 46 46 #include <net/ip.h> 47 47 #include <rdma/ib_verbs.h> 48 48 #include <rdma/ib_pack.h> 49 - #include <net/ipv6.h> 50 49 #include <net/net_namespace.h> 51 50 52 51 /** ··· 94 95 * @timeout_ms: Amount of time to wait for the address resolution to complete. 95 96 * @callback: Call invoked once address resolution has completed, timed out, 96 97 * or been canceled. A status of 0 indicates success. 98 + * @resolve_by_gid_attr: Resolve the ip based on the GID attribute from 99 + * rdma_dev_addr. 97 100 * @context: User-specified context associated with the call. 98 101 */ 99 102 int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr, 100 - struct rdma_dev_addr *addr, int timeout_ms, 103 + struct rdma_dev_addr *addr, unsigned long timeout_ms, 101 104 void (*callback)(int status, struct sockaddr *src_addr, 102 105 struct rdma_dev_addr *addr, void *context), 103 - void *context); 106 + bool resolve_by_gid_attr, void *context); 104 107 105 108 void rdma_addr_cancel(struct rdma_dev_addr *addr); 106 - 107 - void rdma_copy_addr(struct rdma_dev_addr *dev_addr, 108 - const struct net_device *dev, 109 - const unsigned char *dst_dev_addr); 110 109 111 110 int rdma_addr_size(const struct sockaddr *addr); 112 111 int rdma_addr_size_in6(struct sockaddr_in6 *addr);
+1 -1
include/rdma/ib_cm.h
··· 583 583 struct sa_path_rec *path; 584 584 const struct ib_gid_attr *sgid_attr; 585 585 __be64 service_id; 586 - int timeout_ms; 586 + unsigned long timeout_ms; 587 587 const void *private_data; 588 588 u8 private_data_len; 589 589 u8 max_cm_retries;
+16 -22
include/rdma/ib_sa.h
··· 449 449 450 450 void ib_sa_cancel_query(int id, struct ib_sa_query *query); 451 451 452 - int ib_sa_path_rec_get(struct ib_sa_client *client, 453 - struct ib_device *device, u8 port_num, 454 - struct sa_path_rec *rec, 455 - ib_sa_comp_mask comp_mask, 456 - int timeout_ms, gfp_t gfp_mask, 457 - void (*callback)(int status, 458 - struct sa_path_rec *resp, 452 + int ib_sa_path_rec_get(struct ib_sa_client *client, struct ib_device *device, 453 + u8 port_num, struct sa_path_rec *rec, 454 + ib_sa_comp_mask comp_mask, unsigned long timeout_ms, 455 + gfp_t gfp_mask, 456 + void (*callback)(int status, struct sa_path_rec *resp, 459 457 void *context), 460 - void *context, 461 - struct ib_sa_query **query); 458 + void *context, struct ib_sa_query **query); 462 459 463 460 int ib_sa_service_rec_query(struct ib_sa_client *client, 464 - struct ib_device *device, u8 port_num, 465 - u8 method, 466 - struct ib_sa_service_rec *rec, 467 - ib_sa_comp_mask comp_mask, 468 - int timeout_ms, gfp_t gfp_mask, 469 - void (*callback)(int status, 470 - struct ib_sa_service_rec *resp, 471 - void *context), 472 - void *context, 473 - struct ib_sa_query **sa_query); 461 + struct ib_device *device, u8 port_num, u8 method, 462 + struct ib_sa_service_rec *rec, 463 + ib_sa_comp_mask comp_mask, unsigned long timeout_ms, 464 + gfp_t gfp_mask, 465 + void (*callback)(int status, 466 + struct ib_sa_service_rec *resp, 467 + void *context), 468 + void *context, struct ib_sa_query **sa_query); 474 469 475 470 struct ib_sa_multicast { 476 471 struct ib_sa_mcmember_rec rec; ··· 568 573 struct ib_device *device, u8 port_num, 569 574 struct ib_sa_guidinfo_rec *rec, 570 575 ib_sa_comp_mask comp_mask, u8 method, 571 - int timeout_ms, gfp_t gfp_mask, 576 + unsigned long timeout_ms, gfp_t gfp_mask, 572 577 void (*callback)(int status, 573 578 struct ib_sa_guidinfo_rec *resp, 574 579 void *context), 575 - void *context, 576 - struct ib_sa_query **sa_query); 580 + void *context, struct ib_sa_query **sa_query); 577 581 578 582 bool ib_sa_sendonly_fullmem_support(struct ib_sa_client *client, 579 583 struct ib_device *device,
+4 -5
include/rdma/ib_umem.h
··· 42 42 43 43 struct ib_umem { 44 44 struct ib_ucontext *context; 45 + struct mm_struct *owning_mm; 45 46 size_t length; 46 47 unsigned long address; 47 48 int page_shift; 48 - int writable; 49 - int hugetlb; 49 + u32 writable : 1; 50 + u32 hugetlb : 1; 51 + u32 is_odp : 1; 50 52 struct work_struct work; 51 - struct mm_struct *mm; 52 - unsigned long diff; 53 - struct ib_umem_odp *odp_data; 54 53 struct sg_table sg_head; 55 54 int nmap; 56 55 int npages;
+41 -34
include/rdma/ib_umem_odp.h
··· 43 43 }; 44 44 45 45 struct ib_umem_odp { 46 + struct ib_umem umem; 47 + struct ib_ucontext_per_mm *per_mm; 48 + 46 49 /* 47 50 * An array of the pages included in the on-demand paging umem. 48 51 * Indices of pages that are currently not mapped into the device will ··· 67 64 struct mutex umem_mutex; 68 65 void *private; /* for the HW driver to use. */ 69 66 70 - /* When false, use the notifier counter in the ucontext struct. */ 71 - bool mn_counters_active; 72 67 int notifiers_seq; 73 68 int notifiers_count; 74 - 75 - /* A linked list of umems that don't have private mmu notifier 76 - * counters yet. */ 77 - struct list_head no_private_counters; 78 - struct ib_umem *umem; 79 69 80 70 /* Tree tracking */ 81 71 struct umem_odp_node interval_tree; ··· 78 82 struct work_struct work; 79 83 }; 80 84 85 + static inline struct ib_umem_odp *to_ib_umem_odp(struct ib_umem *umem) 86 + { 87 + return container_of(umem, struct ib_umem_odp, umem); 88 + } 89 + 81 90 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 82 91 83 - int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem, 84 - int access); 85 - struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context, 86 - unsigned long addr, 87 - size_t size); 92 + struct ib_ucontext_per_mm { 93 + struct ib_ucontext *context; 94 + struct mm_struct *mm; 95 + struct pid *tgid; 96 + bool active; 88 97 89 - void ib_umem_odp_release(struct ib_umem *umem); 98 + struct rb_root_cached umem_tree; 99 + /* Protects umem_tree */ 100 + struct rw_semaphore umem_rwsem; 101 + 102 + struct mmu_notifier mn; 103 + unsigned int odp_mrs_count; 104 + 105 + struct list_head ucontext_list; 106 + struct rcu_head rcu; 107 + }; 108 + 109 + int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access); 110 + struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext_per_mm *per_mm, 111 + unsigned long addr, size_t size); 112 + void ib_umem_odp_release(struct ib_umem_odp *umem_odp); 90 113 91 114 /* 92 115 * The lower 2 bits of the DMA address signal the R/W permissions for ··· 120 105 121 106 #define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT)) 122 107 123 - int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 start_offset, u64 bcnt, 124 - u64 access_mask, unsigned long current_seq); 108 + int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset, 109 + u64 bcnt, u64 access_mask, 110 + unsigned long current_seq); 125 111 126 - void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 start_offset, 112 + void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset, 127 113 u64 bound); 128 114 129 - typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end, 115 + typedef int (*umem_call_back)(struct ib_umem_odp *item, u64 start, u64 end, 130 116 void *cookie); 131 117 /* 132 118 * Call the callback on each ib_umem in the range. Returns the logical or of ··· 145 129 struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root, 146 130 u64 addr, u64 length); 147 131 148 - static inline int ib_umem_mmu_notifier_retry(struct ib_umem *item, 132 + static inline int ib_umem_mmu_notifier_retry(struct ib_umem_odp *umem_odp, 149 133 unsigned long mmu_seq) 150 134 { 151 135 /* 152 136 * This code is strongly based on the KVM code from 153 137 * mmu_notifier_retry. Should be called with 154 - * the relevant locks taken (item->odp_data->umem_mutex 138 + * the relevant locks taken (umem_odp->umem_mutex 155 139 * and the ucontext umem_mutex semaphore locked for read). 156 140 */ 157 141 158 - /* Do not allow page faults while the new ib_umem hasn't seen a state 159 - * with zero notifiers yet, and doesn't have its own valid set of 160 - * private counters. */ 161 - if (!item->odp_data->mn_counters_active) 142 + if (unlikely(umem_odp->notifiers_count)) 162 143 return 1; 163 - 164 - if (unlikely(item->odp_data->notifiers_count)) 165 - return 1; 166 - if (item->odp_data->notifiers_seq != mmu_seq) 144 + if (umem_odp->notifiers_seq != mmu_seq) 167 145 return 1; 168 146 return 0; 169 147 } 170 148 171 149 #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ 172 150 173 - static inline int ib_umem_odp_get(struct ib_ucontext *context, 174 - struct ib_umem *umem, 175 - int access) 151 + static inline int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access) 176 152 { 177 153 return -EINVAL; 178 154 } 179 155 180 - static inline struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context, 181 - unsigned long addr, 182 - size_t size) 156 + static inline struct ib_umem_odp * 157 + ib_alloc_odp_umem(struct ib_ucontext *context, unsigned long addr, size_t size) 183 158 { 184 159 return ERR_PTR(-EINVAL); 185 160 } 186 161 187 - static inline void ib_umem_odp_release(struct ib_umem *umem) {} 162 + static inline void ib_umem_odp_release(struct ib_umem_odp *umem_odp) {} 188 163 189 164 #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ 190 165
+93 -56
include/rdma/ib_verbs.h
··· 69 69 70 70 #define IB_FW_VERSION_NAME_MAX ETHTOOL_FWVERS_LEN 71 71 72 + struct ib_umem_odp; 73 + 72 74 extern struct workqueue_struct *ib_wq; 73 75 extern struct workqueue_struct *ib_comp_wq; 76 + extern struct workqueue_struct *ib_comp_unbound_wq; 74 77 75 78 union ib_gid { 76 79 u8 raw[16]; ··· 1140 1137 */ 1141 1138 1142 1139 struct ib_qp_init_attr { 1140 + /* Consumer's event_handler callback must not block */ 1143 1141 void (*event_handler)(struct ib_event *, void *); 1142 + 1144 1143 void *qp_context; 1145 1144 struct ib_cq *send_cq; 1146 1145 struct ib_cq *recv_cq; ··· 1151 1146 struct ib_qp_cap cap; 1152 1147 enum ib_sig_type sq_sig_type; 1153 1148 enum ib_qp_type qp_type; 1154 - enum ib_qp_create_flags create_flags; 1149 + u32 create_flags; 1155 1150 1156 1151 /* 1157 1152 * Only needed for special QP types, or when using the RW API. ··· 1283 1278 }; 1284 1279 1285 1280 enum ib_wr_opcode { 1286 - IB_WR_RDMA_WRITE, 1287 - IB_WR_RDMA_WRITE_WITH_IMM, 1288 - IB_WR_SEND, 1289 - IB_WR_SEND_WITH_IMM, 1290 - IB_WR_RDMA_READ, 1291 - IB_WR_ATOMIC_CMP_AND_SWP, 1292 - IB_WR_ATOMIC_FETCH_AND_ADD, 1293 - IB_WR_LSO, 1294 - IB_WR_SEND_WITH_INV, 1295 - IB_WR_RDMA_READ_WITH_INV, 1296 - IB_WR_LOCAL_INV, 1297 - IB_WR_REG_MR, 1298 - IB_WR_MASKED_ATOMIC_CMP_AND_SWP, 1299 - IB_WR_MASKED_ATOMIC_FETCH_AND_ADD, 1281 + /* These are shared with userspace */ 1282 + IB_WR_RDMA_WRITE = IB_UVERBS_WR_RDMA_WRITE, 1283 + IB_WR_RDMA_WRITE_WITH_IMM = IB_UVERBS_WR_RDMA_WRITE_WITH_IMM, 1284 + IB_WR_SEND = IB_UVERBS_WR_SEND, 1285 + IB_WR_SEND_WITH_IMM = IB_UVERBS_WR_SEND_WITH_IMM, 1286 + IB_WR_RDMA_READ = IB_UVERBS_WR_RDMA_READ, 1287 + IB_WR_ATOMIC_CMP_AND_SWP = IB_UVERBS_WR_ATOMIC_CMP_AND_SWP, 1288 + IB_WR_ATOMIC_FETCH_AND_ADD = IB_UVERBS_WR_ATOMIC_FETCH_AND_ADD, 1289 + IB_WR_LSO = IB_UVERBS_WR_TSO, 1290 + IB_WR_SEND_WITH_INV = IB_UVERBS_WR_SEND_WITH_INV, 1291 + IB_WR_RDMA_READ_WITH_INV = IB_UVERBS_WR_RDMA_READ_WITH_INV, 1292 + IB_WR_LOCAL_INV = IB_UVERBS_WR_LOCAL_INV, 1293 + IB_WR_MASKED_ATOMIC_CMP_AND_SWP = 1294 + IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP, 1295 + IB_WR_MASKED_ATOMIC_FETCH_AND_ADD = 1296 + IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD, 1297 + 1298 + /* These are kernel only and can not be issued by userspace */ 1299 + IB_WR_REG_MR = 0x20, 1300 1300 IB_WR_REG_SIG_MR, 1301 + 1301 1302 /* reserve values for low level drivers' internal use. 1302 1303 * These values will not be used at all in the ib core layer. 1303 1304 */ ··· 1496 1485 * it is set when we are closing the file descriptor and indicates 1497 1486 * that mm_sem may be locked. 1498 1487 */ 1499 - int closing; 1488 + bool closing; 1500 1489 1501 1490 bool cleanup_retryable; 1502 1491 1503 - struct pid *tgid; 1504 1492 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 1505 - struct rb_root_cached umem_tree; 1506 - /* 1507 - * Protects .umem_rbroot and tree, as well as odp_mrs_count and 1508 - * mmu notifiers registration. 1509 - */ 1510 - struct rw_semaphore umem_rwsem; 1511 - void (*invalidate_range)(struct ib_umem *umem, 1493 + void (*invalidate_range)(struct ib_umem_odp *umem_odp, 1512 1494 unsigned long start, unsigned long end); 1513 - 1514 - struct mmu_notifier mn; 1515 - atomic_t notifier_count; 1516 - /* A list of umems that don't have private mmu notifier counters yet. */ 1517 - struct list_head no_private_counters; 1518 - int odp_mrs_count; 1495 + struct mutex per_mm_list_lock; 1496 + struct list_head per_mm_list; 1519 1497 #endif 1520 1498 1521 1499 struct ib_rdmacg_object cg_obj; ··· 1570 1570 typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context); 1571 1571 1572 1572 enum ib_poll_context { 1573 - IB_POLL_DIRECT, /* caller context, no hw completions */ 1574 - IB_POLL_SOFTIRQ, /* poll from softirq context */ 1575 - IB_POLL_WORKQUEUE, /* poll from workqueue */ 1573 + IB_POLL_DIRECT, /* caller context, no hw completions */ 1574 + IB_POLL_SOFTIRQ, /* poll from softirq context */ 1575 + IB_POLL_WORKQUEUE, /* poll from workqueue */ 1576 + IB_POLL_UNBOUND_WORKQUEUE, /* poll from unbound workqueue */ 1576 1577 }; 1577 1578 1578 1579 struct ib_cq { ··· 1590 1589 struct irq_poll iop; 1591 1590 struct work_struct work; 1592 1591 }; 1592 + struct workqueue_struct *comp_wq; 1593 1593 /* 1594 1594 * Implementation details of the RDMA core, don't use in drivers: 1595 1595 */ ··· 2265 2263 struct list_head event_handler_list; 2266 2264 spinlock_t event_handler_lock; 2267 2265 2268 - spinlock_t client_data_lock; 2266 + rwlock_t client_data_lock; 2269 2267 struct list_head core_list; 2270 2268 /* Access to the client_data_list is protected by the client_data_lock 2271 - * spinlock and the lists_rwsem read-write semaphore */ 2269 + * rwlock and the lists_rwsem read-write semaphore 2270 + */ 2272 2271 struct list_head client_data_list; 2273 2272 2274 2273 struct ib_cache cache; ··· 2553 2550 2554 2551 struct module *owner; 2555 2552 struct device dev; 2556 - struct kobject *ports_parent; 2553 + /* First group for device attributes, 2554 + * Second group for driver provided attributes (optional). 2555 + * It is NULL terminated array. 2556 + */ 2557 + const struct attribute_group *groups[3]; 2558 + 2559 + struct kobject *ports_kobj; 2557 2560 struct list_head port_list; 2558 2561 2559 2562 enum { ··· 2642 2633 2643 2634 void ib_get_device_fw_str(struct ib_device *device, char *str); 2644 2635 2645 - int ib_register_device(struct ib_device *device, 2646 - int (*port_callback)(struct ib_device *, 2647 - u8, struct kobject *)); 2636 + int ib_register_device(struct ib_device *device, const char *name, 2637 + int (*port_callback)(struct ib_device *, u8, 2638 + struct kobject *)); 2648 2639 void ib_unregister_device(struct ib_device *device); 2649 2640 2650 2641 int ib_register_client (struct ib_client *client); ··· 2653 2644 void *ib_get_client_data(struct ib_device *device, struct ib_client *client); 2654 2645 void ib_set_client_data(struct ib_device *device, struct ib_client *client, 2655 2646 void *data); 2647 + 2648 + #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) 2649 + int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, 2650 + unsigned long pfn, unsigned long size, pgprot_t prot); 2651 + int rdma_user_mmap_page(struct ib_ucontext *ucontext, 2652 + struct vm_area_struct *vma, struct page *page, 2653 + unsigned long size); 2654 + #else 2655 + static inline int rdma_user_mmap_io(struct ib_ucontext *ucontext, 2656 + struct vm_area_struct *vma, 2657 + unsigned long pfn, unsigned long size, 2658 + pgprot_t prot) 2659 + { 2660 + return -EINVAL; 2661 + } 2662 + static inline int rdma_user_mmap_page(struct ib_ucontext *ucontext, 2663 + struct vm_area_struct *vma, struct page *page, 2664 + unsigned long size) 2665 + { 2666 + return -EINVAL; 2667 + } 2668 + #endif 2656 2669 2657 2670 static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len) 2658 2671 { ··· 2759 2728 * @next_state: Next QP state 2760 2729 * @type: QP type 2761 2730 * @mask: Mask of supplied QP attributes 2762 - * @ll : link layer of port 2763 2731 * 2764 2732 * This function is a helper function that a low-level driver's 2765 2733 * modify_qp method can use to validate the consumer's input. It ··· 2767 2737 * and that the attribute mask supplied is allowed for the transition. 2768 2738 */ 2769 2739 bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, 2770 - enum ib_qp_type type, enum ib_qp_attr_mask mask, 2771 - enum rdma_link_layer ll); 2740 + enum ib_qp_type type, enum ib_qp_attr_mask mask); 2772 2741 2773 2742 void ib_register_event_handler(struct ib_event_handler *event_handler); 2774 2743 void ib_unregister_event_handler(struct ib_event_handler *event_handler); ··· 4196 4167 4197 4168 } 4198 4169 4199 - static inline void ib_set_flow(struct ib_uobject *uobj, struct ib_flow *ibflow, 4200 - struct ib_qp *qp, struct ib_device *device) 4201 - { 4202 - uobj->object = ibflow; 4203 - ibflow->uobject = uobj; 4204 - 4205 - if (qp) { 4206 - atomic_inc(&qp->usecnt); 4207 - ibflow->qp = qp; 4208 - } 4209 - 4210 - ibflow->device = device; 4211 - } 4212 - 4213 4170 /** 4214 4171 * rdma_roce_rescan_device - Rescan all of the network devices in the system 4215 4172 * and add their gids, as needed, to the relevant RoCE devices. ··· 4219 4204 unsigned char name_assign_type, 4220 4205 void (*setup)(struct net_device *), 4221 4206 struct net_device *netdev); 4207 + 4208 + /** 4209 + * rdma_set_device_sysfs_group - Set device attributes group to have 4210 + * driver specific sysfs entries at 4211 + * for infiniband class. 4212 + * 4213 + * @device: device pointer for which attributes to be created 4214 + * @group: Pointer to group which should be added when device 4215 + * is registered with sysfs. 4216 + * rdma_set_device_sysfs_group() allows existing drivers to expose one 4217 + * group per device to have sysfs attributes. 4218 + * 4219 + * NOTE: New drivers should not make use of this API; instead new device 4220 + * parameter should be exposed via netlink command. This API and mechanism 4221 + * exist only for existing drivers. 4222 + */ 4223 + static inline void 4224 + rdma_set_device_sysfs_group(struct ib_device *dev, 4225 + const struct attribute_group *group) 4226 + { 4227 + dev->groups[1] = group; 4228 + } 4222 4229 4223 4230 #endif /* IB_VERBS_H */
+8 -3
include/rdma/rdma_cm.h
··· 152 152 * @ps: RDMA port space. 153 153 * @qp_type: type of queue pair associated with the id. 154 154 * 155 - * The id holds a reference on the network namespace until it is destroyed. 155 + * Returns a new rdma_cm_id. The id holds a reference on the network 156 + * namespace until it is destroyed. 157 + * 158 + * The event handler callback serializes on the id's mutex and is 159 + * allowed to sleep. 156 160 */ 157 161 #define rdma_create_id(net, event_handler, context, ps, qp_type) \ 158 162 __rdma_create_id((net), (event_handler), (context), (ps), (qp_type), \ ··· 196 192 * @timeout_ms: Time to wait for resolution to complete. 197 193 */ 198 194 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, 199 - const struct sockaddr *dst_addr, int timeout_ms); 195 + const struct sockaddr *dst_addr, 196 + unsigned long timeout_ms); 200 197 201 198 /** 202 199 * rdma_resolve_route - Resolve the RDMA address bound to the RDMA identifier ··· 207 202 * Users must have first called rdma_resolve_addr to resolve a dst_addr 208 203 * into an RDMA address before calling this routine. 209 204 */ 210 - int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms); 205 + int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms); 211 206 212 207 /** 213 208 * rdma_create_qp - Allocate a QP and associate it with the specified RDMA
+2 -2
include/rdma/rdma_netlink.h
··· 96 96 /** 97 97 * Check if there are any listeners to the netlink group 98 98 * @group: the netlink group ID 99 - * Returns 0 on success or a negative for no listeners. 99 + * Returns true on success or false if no listeners. 100 100 */ 101 - int rdma_nl_chk_listeners(unsigned int group); 101 + bool rdma_nl_chk_listeners(unsigned int group); 102 102 #endif /* _RDMA_NETLINK_H */
+45 -6
include/rdma/rdma_vt.h
··· 149 149 150 150 #define RVT_CQN_MAX 16 /* maximum length of cq name */ 151 151 152 + #define RVT_SGE_COPY_MEMCPY 0 153 + #define RVT_SGE_COPY_CACHELESS 1 154 + #define RVT_SGE_COPY_ADAPTIVE 2 155 + 152 156 /* 153 157 * Things that are driver specific, module parameters in hfi1 and qib 154 158 */ ··· 165 161 */ 166 162 unsigned int lkey_table_size; 167 163 unsigned int qp_table_size; 164 + unsigned int sge_copy_mode; 165 + unsigned int wss_threshold; 166 + unsigned int wss_clean_period; 168 167 int qpn_start; 169 168 int qpn_inc; 170 169 int qpn_res_start; ··· 200 193 u8 log_pmtu; 201 194 }; 202 195 196 + /* memory working set size */ 197 + struct rvt_wss { 198 + unsigned long *entries; 199 + atomic_t total_count; 200 + atomic_t clean_counter; 201 + atomic_t clean_entry; 202 + 203 + int threshold; 204 + int num_entries; 205 + long pages_mask; 206 + unsigned int clean_period; 207 + }; 208 + 203 209 struct rvt_dev_info; 204 210 struct rvt_swqe; 205 211 struct rvt_driver_provided { ··· 231 211 * version requires the s_lock not to be held. The other assumes the 232 212 * s_lock is held. 233 213 */ 234 - void (*schedule_send)(struct rvt_qp *qp); 235 - void (*schedule_send_no_lock)(struct rvt_qp *qp); 214 + bool (*schedule_send)(struct rvt_qp *qp); 215 + bool (*schedule_send_no_lock)(struct rvt_qp *qp); 236 216 237 - /* Driver specific work request checking */ 238 - int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe); 217 + /* 218 + * Driver specific work request setup and checking. 219 + * This function is allowed to perform any setup, checks, or 220 + * adjustments required to the SWQE in order to be usable by 221 + * underlying protocols. This includes private data structure 222 + * allocations. 223 + */ 224 + int (*setup_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe, 225 + bool *call_send); 239 226 240 227 /* 241 228 * Sometimes rdmavt needs to kick the driver's send progress. That is ··· 398 371 /* post send table */ 399 372 const struct rvt_operation_params *post_parms; 400 373 374 + /* opcode translation table */ 375 + const enum ib_wc_opcode *wc_opcode; 376 + 401 377 /* Driver specific helper functions */ 402 378 struct rvt_driver_provided driver_f; 403 379 ··· 441 411 u32 n_mcast_grps_allocated; /* number of mcast groups allocated */ 442 412 spinlock_t n_mcast_grps_lock; 443 413 414 + /* Memory Working Set Size */ 415 + struct rvt_wss *wss; 444 416 }; 445 417 446 418 /** ··· 455 423 const char *fmt, const char *name, 456 424 const int unit) 457 425 { 458 - snprintf(rdi->ibdev.name, sizeof(rdi->ibdev.name), fmt, name, unit); 426 + /* 427 + * FIXME: rvt and its users want to touch the ibdev before 428 + * registration and have things like the name work. We don't have the 429 + * infrastructure in the core to support this directly today, hack it 430 + * to work by setting the name manually here. 431 + */ 432 + dev_set_name(&rdi->ibdev.dev, fmt, name, unit); 433 + strlcpy(rdi->ibdev.name, dev_name(&rdi->ibdev.dev), IB_DEVICE_NAME_MAX); 459 434 } 460 435 461 436 /** ··· 473 434 */ 474 435 static inline const char *rvt_get_ibdev_name(const struct rvt_dev_info *rdi) 475 436 { 476 - return rdi->ibdev.name; 437 + return dev_name(&rdi->ibdev.dev); 477 438 } 478 439 479 440 static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd)
+7
include/rdma/rdmavt_qp.h
··· 678 678 void rvt_stop_rc_timers(struct rvt_qp *qp); 679 679 void rvt_add_retry_timer(struct rvt_qp *qp); 680 680 681 + void rvt_copy_sge(struct rvt_qp *qp, struct rvt_sge_state *ss, 682 + void *data, u32 length, 683 + bool release, bool copy_last); 684 + void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, 685 + enum ib_wc_status status); 686 + void rvt_ruc_loopback(struct rvt_qp *qp); 687 + 681 688 /** 682 689 * struct rvt_qp_iter - the iterator for QPs 683 690 * @qp - the current QP
+3 -9
include/rdma/restrack.h
··· 173 173 /** 174 174 * rdma_restrack_set_task() - set the task for this resource 175 175 * @res: resource entry 176 - * @task: task struct 176 + * @caller: kernel name, the current task will be used if the caller is NULL. 177 177 */ 178 - static inline void rdma_restrack_set_task(struct rdma_restrack_entry *res, 179 - struct task_struct *task) 180 - { 181 - if (res->task) 182 - put_task_struct(res->task); 183 - get_task_struct(task); 184 - res->task = task; 185 - } 178 + void rdma_restrack_set_task(struct rdma_restrack_entry *res, 179 + const char *caller); 186 180 187 181 /* 188 182 * Helper functions for rdma drivers when filling out
+110 -1
include/rdma/uverbs_ioctl.h
··· 52 52 UVERBS_ATTR_TYPE_IDR, 53 53 UVERBS_ATTR_TYPE_FD, 54 54 UVERBS_ATTR_TYPE_ENUM_IN, 55 + UVERBS_ATTR_TYPE_IDRS_ARRAY, 55 56 }; 56 57 57 58 enum uverbs_obj_access { ··· 102 101 } enum_def; 103 102 } u; 104 103 105 - /* This weird split of the enum lets us remove some padding */ 104 + /* This weird split lets us remove some padding */ 106 105 union { 107 106 struct { 108 107 /* ··· 112 111 */ 113 112 const struct uverbs_attr_spec *ids; 114 113 } enum_def; 114 + 115 + struct { 116 + /* 117 + * higher bits mean the namespace and lower bits mean 118 + * the type id within the namespace. 119 + */ 120 + u16 obj_type; 121 + u16 min_len; 122 + u16 max_len; 123 + u8 access; 124 + } objs_arr; 115 125 } u2; 116 126 }; 117 127 ··· 263 251 return attr_key - 1; 264 252 } 265 253 254 + static inline __attribute_const__ u32 uapi_bkey_to_key_attr(u32 attr_bkey) 255 + { 256 + return attr_bkey + 1; 257 + } 258 + 266 259 /* 267 260 * ======================================= 268 261 * Verbs definitions ··· 340 323 #define UA_MANDATORY .mandatory = 1 341 324 #define UA_OPTIONAL .mandatory = 0 342 325 326 + /* 327 + * min_len must be bigger than 0 and _max_len must be smaller than 4095. Only 328 + * READ\WRITE accesses are supported. 329 + */ 330 + #define UVERBS_ATTR_IDRS_ARR(_attr_id, _idr_type, _access, _min_len, _max_len, \ 331 + ...) \ 332 + (&(const struct uverbs_attr_def){ \ 333 + .id = (_attr_id) + \ 334 + BUILD_BUG_ON_ZERO((_min_len) == 0 || \ 335 + (_max_len) > \ 336 + PAGE_SIZE / sizeof(void *) || \ 337 + (_min_len) > (_max_len) || \ 338 + (_access) == UVERBS_ACCESS_NEW || \ 339 + (_access) == UVERBS_ACCESS_DESTROY), \ 340 + .attr = { .type = UVERBS_ATTR_TYPE_IDRS_ARRAY, \ 341 + .u2.objs_arr.obj_type = _idr_type, \ 342 + .u2.objs_arr.access = _access, \ 343 + .u2.objs_arr.min_len = _min_len, \ 344 + .u2.objs_arr.max_len = _max_len, \ 345 + __VA_ARGS__ } }) 346 + 343 347 #define UVERBS_ATTR_IDR(_attr_id, _idr_type, _access, ...) \ 344 348 (&(const struct uverbs_attr_def){ \ 345 349 .id = _attr_id, \ ··· 402 364 .u.enum_def.num_elems = ARRAY_SIZE(_enum_arr), \ 403 365 __VA_ARGS__ }, \ 404 366 }) 367 + 368 + /* An input value that is a member in the enum _enum_type. */ 369 + #define UVERBS_ATTR_CONST_IN(_attr_id, _enum_type, ...) \ 370 + UVERBS_ATTR_PTR_IN( \ 371 + _attr_id, \ 372 + UVERBS_ATTR_SIZE( \ 373 + sizeof(u64) + BUILD_BUG_ON_ZERO(!sizeof(_enum_type)), \ 374 + sizeof(u64)), \ 375 + __VA_ARGS__) 405 376 406 377 /* 407 378 * An input value that is a bitwise combination of values of _enum_type. ··· 478 431 const struct uverbs_api_attr *attr_elm; 479 432 }; 480 433 434 + struct uverbs_objs_arr_attr { 435 + struct ib_uobject **uobjects; 436 + u16 len; 437 + }; 438 + 481 439 struct uverbs_attr { 482 440 union { 483 441 struct uverbs_ptr_attr ptr_attr; 484 442 struct uverbs_obj_attr obj_attr; 443 + struct uverbs_objs_arr_attr objs_arr_attr; 485 444 }; 486 445 }; 487 446 ··· 558 505 return PTR_ERR(attr); 559 506 560 507 return attr->ptr_attr.len; 508 + } 509 + 510 + /** 511 + * uverbs_attr_get_uobjs_arr() - Provides array's properties for attribute for 512 + * UVERBS_ATTR_TYPE_IDRS_ARRAY. 513 + * @arr: Returned pointer to array of pointers for uobjects or NULL if 514 + * the attribute isn't provided. 515 + * 516 + * Return: The array length or 0 if no attribute was provided. 517 + */ 518 + static inline int uverbs_attr_get_uobjs_arr( 519 + const struct uverbs_attr_bundle *attrs_bundle, u16 attr_idx, 520 + struct ib_uobject ***arr) 521 + { 522 + const struct uverbs_attr *attr = 523 + uverbs_attr_get(attrs_bundle, attr_idx); 524 + 525 + if (IS_ERR(attr)) { 526 + *arr = NULL; 527 + return 0; 528 + } 529 + 530 + *arr = attr->objs_arr_attr.uobjects; 531 + 532 + return attr->objs_arr_attr.len; 561 533 } 562 534 563 535 static inline bool uverbs_attr_ptr_is_inline(const struct uverbs_attr *attr) ··· 681 603 { 682 604 return _uverbs_alloc(bundle, size, GFP_KERNEL | __GFP_ZERO); 683 605 } 606 + int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle, 607 + size_t idx, s64 lower_bound, u64 upper_bound, 608 + s64 *def_val); 684 609 #else 685 610 static inline int 686 611 uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle, ··· 712 631 { 713 632 return ERR_PTR(-EINVAL); 714 633 } 634 + static inline int 635 + _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle, 636 + size_t idx, s64 lower_bound, u64 upper_bound, 637 + s64 *def_val) 638 + { 639 + return -EINVAL; 640 + } 715 641 #endif 716 642 643 + #define uverbs_get_const(_to, _attrs_bundle, _idx) \ 644 + ({ \ 645 + s64 _val; \ 646 + int _ret = _uverbs_get_const(&_val, _attrs_bundle, _idx, \ 647 + type_min(typeof(*_to)), \ 648 + type_max(typeof(*_to)), NULL); \ 649 + (*_to) = _val; \ 650 + _ret; \ 651 + }) 652 + 653 + #define uverbs_get_const_default(_to, _attrs_bundle, _idx, _default) \ 654 + ({ \ 655 + s64 _val; \ 656 + s64 _def_val = _default; \ 657 + int _ret = \ 658 + _uverbs_get_const(&_val, _attrs_bundle, _idx, \ 659 + type_min(typeof(*_to)), \ 660 + type_max(typeof(*_to)), &_def_val); \ 661 + (*_to) = _val; \ 662 + _ret; \ 663 + }) 717 664 #endif
+51
include/rdma/uverbs_std_types.h
··· 140 140 #define uobj_alloc(_type, _ufile, _ib_dev) \ 141 141 __uobj_alloc(uobj_get_type(_ufile, _type), _ufile, _ib_dev) 142 142 143 + static inline void uverbs_flow_action_fill_action(struct ib_flow_action *action, 144 + struct ib_uobject *uobj, 145 + struct ib_device *ib_dev, 146 + enum ib_flow_action_type type) 147 + { 148 + atomic_set(&action->usecnt, 0); 149 + action->device = ib_dev; 150 + action->type = type; 151 + action->uobject = uobj; 152 + uobj->object = action; 153 + } 154 + 155 + struct ib_uflow_resources { 156 + size_t max; 157 + size_t num; 158 + size_t collection_num; 159 + size_t counters_num; 160 + struct ib_counters **counters; 161 + struct ib_flow_action **collection; 162 + }; 163 + 164 + struct ib_uflow_object { 165 + struct ib_uobject uobject; 166 + struct ib_uflow_resources *resources; 167 + }; 168 + 169 + struct ib_uflow_resources *flow_resources_alloc(size_t num_specs); 170 + void flow_resources_add(struct ib_uflow_resources *uflow_res, 171 + enum ib_flow_spec_type type, 172 + void *ibobj); 173 + void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res); 174 + 175 + static inline void ib_set_flow(struct ib_uobject *uobj, struct ib_flow *ibflow, 176 + struct ib_qp *qp, struct ib_device *device, 177 + struct ib_uflow_resources *uflow_res) 178 + { 179 + struct ib_uflow_object *uflow; 180 + 181 + uobj->object = ibflow; 182 + ibflow->uobject = uobj; 183 + 184 + if (qp) { 185 + atomic_inc(&qp->usecnt); 186 + ibflow->qp = qp; 187 + } 188 + 189 + ibflow->device = device; 190 + uflow = container_of(uobj, typeof(*uflow), uobject); 191 + uflow->resources = uflow_res; 192 + } 193 + 143 194 #endif 144 195
+19 -1
include/uapi/rdma/ib_user_verbs.h
··· 763 763 __u32 lkey; 764 764 }; 765 765 766 + enum ib_uverbs_wr_opcode { 767 + IB_UVERBS_WR_RDMA_WRITE = 0, 768 + IB_UVERBS_WR_RDMA_WRITE_WITH_IMM = 1, 769 + IB_UVERBS_WR_SEND = 2, 770 + IB_UVERBS_WR_SEND_WITH_IMM = 3, 771 + IB_UVERBS_WR_RDMA_READ = 4, 772 + IB_UVERBS_WR_ATOMIC_CMP_AND_SWP = 5, 773 + IB_UVERBS_WR_ATOMIC_FETCH_AND_ADD = 6, 774 + IB_UVERBS_WR_LOCAL_INV = 7, 775 + IB_UVERBS_WR_BIND_MW = 8, 776 + IB_UVERBS_WR_SEND_WITH_INV = 9, 777 + IB_UVERBS_WR_TSO = 10, 778 + IB_UVERBS_WR_RDMA_READ_WITH_INV = 11, 779 + IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP = 12, 780 + IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD = 13, 781 + /* Review enum ib_wr_opcode before modifying this */ 782 + }; 783 + 766 784 struct ib_uverbs_send_wr { 767 785 __aligned_u64 wr_id; 768 786 __u32 num_sge; 769 - __u32 opcode; 787 + __u32 opcode; /* see enum ib_uverbs_wr_opcode */ 770 788 __u32 send_flags; 771 789 union { 772 790 __be32 imm_data;
+16
include/uapi/rdma/mlx5-abi.h
··· 45 45 MLX5_QP_FLAG_BFREG_INDEX = 1 << 3, 46 46 MLX5_QP_FLAG_TYPE_DCT = 1 << 4, 47 47 MLX5_QP_FLAG_TYPE_DCI = 1 << 5, 48 + MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC = 1 << 6, 49 + MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC = 1 << 7, 50 + MLX5_QP_FLAG_ALLOW_SCATTER_CQE = 1 << 8, 48 51 }; 49 52 50 53 enum { ··· 352 349 __u32 flags; 353 350 }; 354 351 352 + enum mlx5_ib_create_qp_resp_mask { 353 + MLX5_IB_CREATE_QP_RESP_MASK_TIRN = 1UL << 0, 354 + MLX5_IB_CREATE_QP_RESP_MASK_TISN = 1UL << 1, 355 + MLX5_IB_CREATE_QP_RESP_MASK_RQN = 1UL << 2, 356 + MLX5_IB_CREATE_QP_RESP_MASK_SQN = 1UL << 3, 357 + }; 358 + 355 359 struct mlx5_ib_create_qp_resp { 356 360 __u32 bfreg_index; 357 361 __u32 reserved; 362 + __u32 comp_mask; 363 + __u32 tirn; 364 + __u32 tisn; 365 + __u32 rqn; 366 + __u32 sqn; 367 + __u32 reserved1; 358 368 }; 359 369 360 370 struct mlx5_ib_alloc_mw {
+21
include/uapi/rdma/mlx5_user_ioctl_cmds.h
··· 125 125 MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK, 126 126 MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE, 127 127 MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA, 128 + MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, 128 129 }; 129 130 130 131 enum mlx5_ib_flow_matcher_destroy_attrs { ··· 156 155 MLX5_IB_ATTR_CREATE_FLOW_DEST_QP, 157 156 MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX, 158 157 MLX5_IB_ATTR_CREATE_FLOW_MATCHER, 158 + MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, 159 + MLX5_IB_ATTR_CREATE_FLOW_TAG, 159 160 }; 160 161 161 162 enum mlx5_ib_destoy_flow_attrs { ··· 167 164 enum mlx5_ib_flow_methods { 168 165 MLX5_IB_METHOD_CREATE_FLOW = (1U << UVERBS_ID_NS_SHIFT), 169 166 MLX5_IB_METHOD_DESTROY_FLOW, 167 + }; 168 + 169 + enum mlx5_ib_flow_action_methods { 170 + MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER = (1U << UVERBS_ID_NS_SHIFT), 171 + MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT, 172 + }; 173 + 174 + enum mlx5_ib_create_flow_action_create_modify_header_attrs { 175 + MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE = (1U << UVERBS_ID_NS_SHIFT), 176 + MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM, 177 + MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE, 178 + }; 179 + 180 + enum mlx5_ib_create_flow_action_create_packet_reformat_attrs { 181 + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE = (1U << UVERBS_ID_NS_SHIFT), 182 + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE, 183 + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE, 184 + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF, 170 185 }; 171 186 172 187 #endif
+12
include/uapi/rdma/mlx5_user_ioctl_verbs.h
··· 39 39 MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA = 1 << 0, 40 40 }; 41 41 42 + enum mlx5_ib_uapi_flow_table_type { 43 + MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX = 0x0, 44 + MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX = 0x1, 45 + }; 46 + 47 + enum mlx5_ib_uapi_flow_action_packet_reformat_type { 48 + MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2 = 0x0, 49 + MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL = 0x1, 50 + MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2 = 0x2, 51 + MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x3, 52 + }; 53 + 42 54 #endif 43 55
+2 -1
include/uapi/rdma/rdma_netlink.h
··· 227 227 RDMA_NLDEV_CMD_UNSPEC, 228 228 229 229 RDMA_NLDEV_CMD_GET, /* can dump */ 230 + RDMA_NLDEV_CMD_SET, 230 231 231 - /* 2 - 4 are free to use */ 232 + /* 3 - 4 are free to use */ 232 233 233 234 RDMA_NLDEV_CMD_PORT_GET = 5, /* can dump */ 234 235
+5 -2
include/uapi/rdma/rdma_user_ioctl_cmds.h
··· 53 53 54 54 struct ib_uverbs_attr { 55 55 __u16 attr_id; /* command specific type attribute */ 56 - __u16 len; /* only for pointers */ 56 + __u16 len; /* only for pointers and IDRs array */ 57 57 __u16 flags; /* combination of UVERBS_ATTR_F_XXXX */ 58 58 union { 59 59 struct { ··· 63 63 __u16 reserved; 64 64 } attr_data; 65 65 union { 66 - /* Used by PTR_IN/OUT, ENUM_IN and IDR */ 66 + /* 67 + * ptr to command, inline data, idr/fd or 68 + * ptr to __u32 array of IDRs 69 + */ 67 70 __aligned_u64 data; 68 71 /* Used by FD_IN and FD_OUT */ 69 72 __s64 data_s64;