Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband:
mlx4_core: Increase command timeout for INIT_HCA to 10 seconds
IPoIB/cm: Use common CQ for CM send completions
IB/uverbs: Fix checking of userspace object ownership
IB/mlx4: Sanity check userspace send queue sizes
IPoIB: Rewrite "if (!likely(...))" as "if (unlikely(!(...)))"
IB/ehca: Enable large page MRs by default
IB/ehca: Change meaning of hca_cap_mr_pgsize
IB/ehca: Fix ehca_encode_hwpage_size() and alloc_fmr()
IB/ehca: Fix masking error in {,re}reg_phys_mr()
IB/ehca: Supply QP token for SRQ base QPs
IPoIB: Use round_jiffies() for ah_reap_task
RDMA/cma: Fix deadlock destroying listen requests
RDMA/cma: Add locking around QP accesses
IB/mthca: Avoid alignment traps when writing doorbells
mlx4_core: Kill mlx4_write64_raw()

+287 -327
+83 -77
drivers/infiniband/core/cma.c
··· 114 114 115 115 struct rdma_bind_list *bind_list; 116 116 struct hlist_node node; 117 - struct list_head list; 118 - struct list_head listen_list; 117 + struct list_head list; /* listen_any_list or cma_device.list */ 118 + struct list_head listen_list; /* per device listens */ 119 119 struct cma_device *cma_dev; 120 120 struct list_head mc_list; 121 121 122 + int internal_id; 122 123 enum cma_state state; 123 124 spinlock_t lock; 125 + struct mutex qp_mutex; 126 + 124 127 struct completion comp; 125 128 atomic_t refcount; 126 129 wait_queue_head_t wait_remove; ··· 392 389 id_priv->id.event_handler = event_handler; 393 390 id_priv->id.ps = ps; 394 391 spin_lock_init(&id_priv->lock); 392 + mutex_init(&id_priv->qp_mutex); 395 393 init_completion(&id_priv->comp); 396 394 atomic_set(&id_priv->refcount, 1); 397 395 init_waitqueue_head(&id_priv->wait_remove); ··· 478 474 479 475 void rdma_destroy_qp(struct rdma_cm_id *id) 480 476 { 481 - ib_destroy_qp(id->qp); 477 + struct rdma_id_private *id_priv; 478 + 479 + id_priv = container_of(id, struct rdma_id_private, id); 480 + mutex_lock(&id_priv->qp_mutex); 481 + ib_destroy_qp(id_priv->id.qp); 482 + id_priv->id.qp = NULL; 483 + mutex_unlock(&id_priv->qp_mutex); 482 484 } 483 485 EXPORT_SYMBOL(rdma_destroy_qp); 484 486 485 - static int cma_modify_qp_rtr(struct rdma_cm_id *id) 487 + static int cma_modify_qp_rtr(struct rdma_id_private *id_priv) 486 488 { 487 489 struct ib_qp_attr qp_attr; 488 490 int qp_attr_mask, ret; 489 491 490 - if (!id->qp) 491 - return 0; 492 + mutex_lock(&id_priv->qp_mutex); 493 + if (!id_priv->id.qp) { 494 + ret = 0; 495 + goto out; 496 + } 492 497 493 498 /* Need to update QP attributes from default values. */ 494 499 qp_attr.qp_state = IB_QPS_INIT; 495 - ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask); 500 + ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 496 501 if (ret) 497 - return ret; 502 + goto out; 498 503 499 - ret = ib_modify_qp(id->qp, &qp_attr, qp_attr_mask); 504 + ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 500 505 if (ret) 501 - return ret; 506 + goto out; 502 507 503 508 qp_attr.qp_state = IB_QPS_RTR; 504 - ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask); 509 + ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 505 510 if (ret) 506 - return ret; 511 + goto out; 507 512 508 - return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask); 513 + ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 514 + out: 515 + mutex_unlock(&id_priv->qp_mutex); 516 + return ret; 509 517 } 510 518 511 - static int cma_modify_qp_rts(struct rdma_cm_id *id) 519 + static int cma_modify_qp_rts(struct rdma_id_private *id_priv) 512 520 { 513 521 struct ib_qp_attr qp_attr; 514 522 int qp_attr_mask, ret; 515 523 516 - if (!id->qp) 517 - return 0; 524 + mutex_lock(&id_priv->qp_mutex); 525 + if (!id_priv->id.qp) { 526 + ret = 0; 527 + goto out; 528 + } 518 529 519 530 qp_attr.qp_state = IB_QPS_RTS; 520 - ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask); 531 + ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 521 532 if (ret) 522 - return ret; 533 + goto out; 523 534 524 - return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask); 535 + ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 536 + out: 537 + mutex_unlock(&id_priv->qp_mutex); 538 + return ret; 525 539 } 526 540 527 - static int cma_modify_qp_err(struct rdma_cm_id *id) 541 + static int cma_modify_qp_err(struct rdma_id_private *id_priv) 528 542 { 529 543 struct ib_qp_attr qp_attr; 544 + int ret; 530 545 531 - if (!id->qp) 532 - return 0; 546 + mutex_lock(&id_priv->qp_mutex); 547 + if (!id_priv->id.qp) { 548 + ret = 0; 549 + goto out; 550 + } 533 551 534 552 qp_attr.qp_state = IB_QPS_ERR; 535 - return ib_modify_qp(id->qp, &qp_attr, IB_QP_STATE); 553 + ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE); 554 + out: 555 + mutex_unlock(&id_priv->qp_mutex); 556 + return ret; 536 557 } 537 558 538 559 static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, ··· 746 717 } 747 718 } 748 719 749 - static inline int cma_internal_listen(struct rdma_id_private *id_priv) 750 - { 751 - return (id_priv->state == CMA_LISTEN) && id_priv->cma_dev && 752 - cma_any_addr(&id_priv->id.route.addr.src_addr); 753 - } 754 - 755 - static void cma_destroy_listen(struct rdma_id_private *id_priv) 756 - { 757 - cma_exch(id_priv, CMA_DESTROYING); 758 - 759 - if (id_priv->cma_dev) { 760 - switch (rdma_node_get_transport(id_priv->id.device->node_type)) { 761 - case RDMA_TRANSPORT_IB: 762 - if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) 763 - ib_destroy_cm_id(id_priv->cm_id.ib); 764 - break; 765 - case RDMA_TRANSPORT_IWARP: 766 - if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw)) 767 - iw_destroy_cm_id(id_priv->cm_id.iw); 768 - break; 769 - default: 770 - break; 771 - } 772 - cma_detach_from_dev(id_priv); 773 - } 774 - list_del(&id_priv->listen_list); 775 - 776 - cma_deref_id(id_priv); 777 - wait_for_completion(&id_priv->comp); 778 - 779 - kfree(id_priv); 780 - } 781 - 782 720 static void cma_cancel_listens(struct rdma_id_private *id_priv) 783 721 { 784 722 struct rdma_id_private *dev_id_priv; 785 723 724 + /* 725 + * Remove from listen_any_list to prevent added devices from spawning 726 + * additional listen requests. 727 + */ 786 728 mutex_lock(&lock); 787 729 list_del(&id_priv->list); 788 730 789 731 while (!list_empty(&id_priv->listen_list)) { 790 732 dev_id_priv = list_entry(id_priv->listen_list.next, 791 733 struct rdma_id_private, listen_list); 792 - cma_destroy_listen(dev_id_priv); 734 + /* sync with device removal to avoid duplicate destruction */ 735 + list_del_init(&dev_id_priv->list); 736 + list_del(&dev_id_priv->listen_list); 737 + mutex_unlock(&lock); 738 + 739 + rdma_destroy_id(&dev_id_priv->id); 740 + mutex_lock(&lock); 793 741 } 794 742 mutex_unlock(&lock); 795 743 } ··· 854 848 cma_deref_id(id_priv); 855 849 wait_for_completion(&id_priv->comp); 856 850 851 + if (id_priv->internal_id) 852 + cma_deref_id(id_priv->id.context); 853 + 857 854 kfree(id_priv->id.route.path_rec); 858 855 kfree(id_priv); 859 856 } ··· 866 857 { 867 858 int ret; 868 859 869 - ret = cma_modify_qp_rtr(&id_priv->id); 860 + ret = cma_modify_qp_rtr(id_priv); 870 861 if (ret) 871 862 goto reject; 872 863 873 - ret = cma_modify_qp_rts(&id_priv->id); 864 + ret = cma_modify_qp_rts(id_priv); 874 865 if (ret) 875 866 goto reject; 876 867 ··· 880 871 881 872 return 0; 882 873 reject: 883 - cma_modify_qp_err(&id_priv->id); 874 + cma_modify_qp_err(id_priv); 884 875 ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, 885 876 NULL, 0, NULL, 0); 886 877 return ret; ··· 956 947 /* ignore event */ 957 948 goto out; 958 949 case IB_CM_REJ_RECEIVED: 959 - cma_modify_qp_err(&id_priv->id); 950 + cma_modify_qp_err(id_priv); 960 951 event.status = ib_event->param.rej_rcvd.reason; 961 952 event.event = RDMA_CM_EVENT_REJECTED; 962 953 event.param.conn.private_data = ib_event->private_data; ··· 1413 1404 1414 1405 cma_attach_to_dev(dev_id_priv, cma_dev); 1415 1406 list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); 1407 + atomic_inc(&id_priv->refcount); 1408 + dev_id_priv->internal_id = 1; 1416 1409 1417 1410 ret = rdma_listen(id, id_priv->backlog); 1418 1411 if (ret) 1419 - goto err; 1420 - 1421 - return; 1422 - err: 1423 - cma_destroy_listen(dev_id_priv); 1412 + printk(KERN_WARNING "RDMA CMA: cma_listen_on_dev, error %d, " 1413 + "listening on device %s", ret, cma_dev->device->name); 1424 1414 } 1425 1415 1426 1416 static void cma_listen_on_all(struct rdma_id_private *id_priv) ··· 2272 2264 sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr; 2273 2265 cm_id->remote_addr = *sin; 2274 2266 2275 - ret = cma_modify_qp_rtr(&id_priv->id); 2267 + ret = cma_modify_qp_rtr(id_priv); 2276 2268 if (ret) 2277 2269 goto out; 2278 2270 ··· 2339 2331 int qp_attr_mask, ret; 2340 2332 2341 2333 if (id_priv->id.qp) { 2342 - ret = cma_modify_qp_rtr(&id_priv->id); 2334 + ret = cma_modify_qp_rtr(id_priv); 2343 2335 if (ret) 2344 2336 goto out; 2345 2337 ··· 2378 2370 struct iw_cm_conn_param iw_param; 2379 2371 int ret; 2380 2372 2381 - ret = cma_modify_qp_rtr(&id_priv->id); 2373 + ret = cma_modify_qp_rtr(id_priv); 2382 2374 if (ret) 2383 2375 return ret; 2384 2376 ··· 2450 2442 2451 2443 return 0; 2452 2444 reject: 2453 - cma_modify_qp_err(id); 2445 + cma_modify_qp_err(id_priv); 2454 2446 rdma_reject(id, NULL, 0); 2455 2447 return ret; 2456 2448 } ··· 2520 2512 2521 2513 switch (rdma_node_get_transport(id->device->node_type)) { 2522 2514 case RDMA_TRANSPORT_IB: 2523 - ret = cma_modify_qp_err(id); 2515 + ret = cma_modify_qp_err(id_priv); 2524 2516 if (ret) 2525 2517 goto out; 2526 2518 /* Initiate or respond to a disconnect. */ ··· 2551 2543 cma_disable_remove(id_priv, CMA_ADDR_RESOLVED)) 2552 2544 return 0; 2553 2545 2546 + mutex_lock(&id_priv->qp_mutex); 2554 2547 if (!status && id_priv->id.qp) 2555 2548 status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, 2556 2549 multicast->rec.mlid); 2550 + mutex_unlock(&id_priv->qp_mutex); 2557 2551 2558 2552 memset(&event, 0, sizeof event); 2559 2553 event.status = status; ··· 2767 2757 id_priv = list_entry(cma_dev->id_list.next, 2768 2758 struct rdma_id_private, list); 2769 2759 2770 - if (cma_internal_listen(id_priv)) { 2771 - cma_destroy_listen(id_priv); 2772 - continue; 2773 - } 2774 - 2760 + list_del(&id_priv->listen_list); 2775 2761 list_del_init(&id_priv->list); 2776 2762 atomic_inc(&id_priv->refcount); 2777 2763 mutex_unlock(&lock); 2778 2764 2779 - ret = cma_remove_id_dev(id_priv); 2765 + ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv); 2780 2766 cma_deref_id(id_priv); 2781 2767 if (ret) 2782 2768 rdma_destroy_id(&id_priv->id);
+6 -2
drivers/infiniband/core/uverbs_cmd.c
··· 147 147 148 148 spin_lock(&ib_uverbs_idr_lock); 149 149 uobj = idr_find(idr, id); 150 - if (uobj) 151 - kref_get(&uobj->ref); 150 + if (uobj) { 151 + if (uobj->context == context) 152 + kref_get(&uobj->ref); 153 + else 154 + uobj = NULL; 155 + } 152 156 spin_unlock(&ib_uverbs_idr_lock); 153 157 154 158 return uobj;
-1
drivers/infiniband/hw/ehca/ehca_classes.h
··· 323 323 extern int ehca_port_act_time; 324 324 extern int ehca_use_hp_mr; 325 325 extern int ehca_scaling_code; 326 - extern int ehca_mr_largepage; 327 326 328 327 struct ipzu_queue_resp { 329 328 u32 qe_size; /* queue entry size */
+1
drivers/infiniband/hw/ehca/ehca_hca.c
··· 77 77 } 78 78 79 79 memset(props, 0, sizeof(struct ib_device_attr)); 80 + props->page_size_cap = shca->hca_cap_mr_pgsize; 80 81 props->fw_ver = rblock->hw_ver; 81 82 props->max_mr_size = rblock->max_mr_size; 82 83 props->vendor_id = rblock->vendor_id >> 8;
+17 -3
drivers/infiniband/hw/ehca/ehca_main.c
··· 65 65 int ehca_poll_all_eqs = 1; 66 66 int ehca_static_rate = -1; 67 67 int ehca_scaling_code = 0; 68 - int ehca_mr_largepage = 0; 68 + int ehca_mr_largepage = 1; 69 69 70 70 module_param_named(open_aqp1, ehca_open_aqp1, int, S_IRUGO); 71 71 module_param_named(debug_level, ehca_debug_level, int, S_IRUGO); ··· 260 260 { HCA_CAP_MINI_QP, "HCA_CAP_MINI_QP" }, 261 261 }; 262 262 263 - int ehca_sense_attributes(struct ehca_shca *shca) 263 + static int ehca_sense_attributes(struct ehca_shca *shca) 264 264 { 265 265 int i, ret = 0; 266 266 u64 h_ret; 267 267 struct hipz_query_hca *rblock; 268 268 struct hipz_query_port *port; 269 + 270 + static const u32 pgsize_map[] = { 271 + HCA_CAP_MR_PGSIZE_4K, 0x1000, 272 + HCA_CAP_MR_PGSIZE_64K, 0x10000, 273 + HCA_CAP_MR_PGSIZE_1M, 0x100000, 274 + HCA_CAP_MR_PGSIZE_16M, 0x1000000, 275 + }; 269 276 270 277 rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); 271 278 if (!rblock) { ··· 336 329 if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap)) 337 330 ehca_gen_dbg(" %s", hca_cap_descr[i].descr); 338 331 339 - shca->hca_cap_mr_pgsize = rblock->memory_page_size_supported; 332 + /* translate supported MR page sizes; always support 4K */ 333 + shca->hca_cap_mr_pgsize = EHCA_PAGESIZE; 334 + if (ehca_mr_largepage) { /* support extra sizes only if enabled */ 335 + for (i = 0; i < ARRAY_SIZE(pgsize_map); i += 2) 336 + if (rblock->memory_page_size_supported & pgsize_map[i]) 337 + shca->hca_cap_mr_pgsize |= pgsize_map[i + 1]; 338 + } 340 339 340 + /* query max MTU from first port -- it's the same for all ports */ 341 341 port = (struct hipz_query_port *)rblock; 342 342 h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port); 343 343 if (h_ret != H_SUCCESS) {
+25 -32
drivers/infiniband/hw/ehca/ehca_mrmw.c
··· 72 72 73 73 static u32 ehca_encode_hwpage_size(u32 pgsize) 74 74 { 75 - u32 idx = 0; 76 - pgsize >>= 12; 77 - /* 78 - * map mr page size into hw code: 79 - * 0, 1, 2, 3 for 4K, 64K, 1M, 64M 80 - */ 81 - while (!(pgsize & 1)) { 82 - idx++; 83 - pgsize >>= 4; 84 - } 85 - return idx; 75 + int log = ilog2(pgsize); 76 + WARN_ON(log < 12 || log > 24 || log & 3); 77 + return (log - 12) / 4; 86 78 } 87 79 88 80 static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca) 89 81 { 90 - if (shca->hca_cap_mr_pgsize & HCA_CAP_MR_PGSIZE_16M) 91 - return EHCA_MR_PGSIZE16M; 92 - return EHCA_MR_PGSIZE4K; 82 + return 1UL << ilog2(shca->hca_cap_mr_pgsize); 93 83 } 94 84 95 85 static struct ehca_mr *ehca_mr_new(void) ··· 249 259 pginfo.u.phy.num_phys_buf = num_phys_buf; 250 260 pginfo.u.phy.phys_buf_array = phys_buf_array; 251 261 pginfo.next_hwpage = 252 - ((u64)iova_start & ~(hw_pgsize - 1)) / hw_pgsize; 262 + ((u64)iova_start & ~PAGE_MASK) / hw_pgsize; 253 263 254 264 ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags, 255 265 e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, ··· 286 296 container_of(pd->device, struct ehca_shca, ib_device); 287 297 struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); 288 298 struct ehca_mr_pginfo pginfo; 289 - int ret; 299 + int ret, page_shift; 290 300 u32 num_kpages; 291 301 u32 num_hwpages; 292 302 u64 hwpage_size; ··· 341 351 /* determine number of MR pages */ 342 352 num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE); 343 353 /* select proper hw_pgsize */ 344 - if (ehca_mr_largepage && 345 - (shca->hca_cap_mr_pgsize & HCA_CAP_MR_PGSIZE_16M)) { 346 - int page_shift = PAGE_SHIFT; 347 - if (e_mr->umem->hugetlb) { 348 - /* determine page_shift, clamp between 4K and 16M */ 349 - page_shift = (fls64(length - 1) + 3) & ~3; 350 - page_shift = min(max(page_shift, EHCA_MR_PGSHIFT4K), 351 - EHCA_MR_PGSHIFT16M); 352 - } 353 - hwpage_size = 1UL << page_shift; 354 - } else 355 - hwpage_size = EHCA_MR_PGSIZE4K; /* ehca1 only supports 4k */ 356 - ehca_dbg(pd->device, "hwpage_size=%lx", hwpage_size); 354 + page_shift = PAGE_SHIFT; 355 + if (e_mr->umem->hugetlb) { 356 + /* determine page_shift, clamp between 4K and 16M */ 357 + page_shift = (fls64(length - 1) + 3) & ~3; 358 + page_shift = min(max(page_shift, EHCA_MR_PGSHIFT4K), 359 + EHCA_MR_PGSHIFT16M); 360 + } 361 + hwpage_size = 1UL << page_shift; 362 + 363 + /* now that we have the desired page size, shift until it's 364 + * supported, too. 4K is always supported, so this terminates. 365 + */ 366 + while (!(hwpage_size & shca->hca_cap_mr_pgsize)) 367 + hwpage_size >>= 4; 357 368 358 369 reg_user_mr_fallback: 359 370 num_hwpages = NUM_CHUNKS((virt % hwpage_size) + length, hwpage_size); ··· 538 547 pginfo.u.phy.num_phys_buf = num_phys_buf; 539 548 pginfo.u.phy.phys_buf_array = phys_buf_array; 540 549 pginfo.next_hwpage = 541 - ((u64)iova_start & ~(hw_pgsize - 1)) / hw_pgsize; 550 + ((u64)iova_start & ~PAGE_MASK) / hw_pgsize; 542 551 } 543 552 if (mr_rereg_mask & IB_MR_REREG_ACCESS) 544 553 new_acl = mr_access_flags; ··· 800 809 ib_fmr = ERR_PTR(-EINVAL); 801 810 goto alloc_fmr_exit0; 802 811 } 803 - hw_pgsize = ehca_get_max_hwpage_size(shca); 804 - if ((1 << fmr_attr->page_shift) != hw_pgsize) { 812 + 813 + hw_pgsize = 1 << fmr_attr->page_shift; 814 + if (!(hw_pgsize & shca->hca_cap_mr_pgsize)) { 805 815 ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x", 806 816 fmr_attr->page_shift); 807 817 ib_fmr = ERR_PTR(-EINVAL); ··· 818 826 819 827 /* register MR on HCA */ 820 828 memset(&pginfo, 0, sizeof(pginfo)); 829 + pginfo.hwpage_size = hw_pgsize; 821 830 /* 822 831 * pginfo.num_hwpages==0, ie register_rpages() will not be called 823 832 * but deferred to map_phys_fmr()
+3 -1
drivers/infiniband/hw/ehca/ehca_qp.c
··· 451 451 has_srq = 1; 452 452 parms.ext_type = EQPT_SRQBASE; 453 453 parms.srq_qpn = my_srq->real_qp_num; 454 - parms.srq_token = my_srq->token; 455 454 } 456 455 457 456 if (is_llqp && has_srq) { ··· 581 582 ehca_err(pd->device, "Invalid number of qp"); 582 583 goto create_qp_exit1; 583 584 } 585 + 586 + if (has_srq) 587 + parms.srq_token = my_qp->token; 584 588 585 589 parms.servicetype = ibqptype2servicetype(qp_type); 586 590 if (parms.servicetype < 0) {
+14 -2
drivers/infiniband/hw/mlx4/qp.c
··· 63 63 u8 header_buf[MLX4_IB_UD_HEADER_SIZE]; 64 64 }; 65 65 66 + enum { 67 + MLX4_IB_MIN_SQ_STRIDE = 6 68 + }; 69 + 66 70 static const __be32 mlx4_ib_opcode[] = { 67 71 [IB_WR_SEND] = __constant_cpu_to_be32(MLX4_OPCODE_SEND), 68 72 [IB_WR_SEND_WITH_IMM] = __constant_cpu_to_be32(MLX4_OPCODE_SEND_IMM), ··· 289 285 return 0; 290 286 } 291 287 292 - static int set_user_sq_size(struct mlx4_ib_qp *qp, 288 + static int set_user_sq_size(struct mlx4_ib_dev *dev, 289 + struct mlx4_ib_qp *qp, 293 290 struct mlx4_ib_create_qp *ucmd) 294 291 { 292 + /* Sanity check SQ size before proceeding */ 293 + if ((1 << ucmd->log_sq_bb_count) > dev->dev->caps.max_wqes || 294 + ucmd->log_sq_stride > 295 + ilog2(roundup_pow_of_two(dev->dev->caps.max_sq_desc_sz)) || 296 + ucmd->log_sq_stride < MLX4_IB_MIN_SQ_STRIDE) 297 + return -EINVAL; 298 + 295 299 qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count; 296 300 qp->sq.wqe_shift = ucmd->log_sq_stride; 297 301 ··· 342 330 343 331 qp->sq_no_prefetch = ucmd.sq_no_prefetch; 344 332 345 - err = set_user_sq_size(qp, &ucmd); 333 + err = set_user_sq_size(dev, qp, &ucmd); 346 334 if (err) 347 335 goto err; 348 336
+19 -34
drivers/infiniband/hw/mthca/mthca_cq.c
··· 204 204 static inline void update_cons_index(struct mthca_dev *dev, struct mthca_cq *cq, 205 205 int incr) 206 206 { 207 - __be32 doorbell[2]; 208 - 209 207 if (mthca_is_memfree(dev)) { 210 208 *cq->set_ci_db = cpu_to_be32(cq->cons_index); 211 209 wmb(); 212 210 } else { 213 - doorbell[0] = cpu_to_be32(MTHCA_TAVOR_CQ_DB_INC_CI | cq->cqn); 214 - doorbell[1] = cpu_to_be32(incr - 1); 215 - 216 - mthca_write64(doorbell, 211 + mthca_write64(MTHCA_TAVOR_CQ_DB_INC_CI | cq->cqn, incr - 1, 217 212 dev->kar + MTHCA_CQ_DOORBELL, 218 213 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 219 214 /* ··· 726 731 727 732 int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags) 728 733 { 729 - __be32 doorbell[2]; 734 + u32 dbhi = ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? 735 + MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL : 736 + MTHCA_TAVOR_CQ_DB_REQ_NOT) | 737 + to_mcq(cq)->cqn; 730 738 731 - doorbell[0] = cpu_to_be32(((flags & IB_CQ_SOLICITED_MASK) == 732 - IB_CQ_SOLICITED ? 733 - MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL : 734 - MTHCA_TAVOR_CQ_DB_REQ_NOT) | 735 - to_mcq(cq)->cqn); 736 - doorbell[1] = (__force __be32) 0xffffffff; 737 - 738 - mthca_write64(doorbell, 739 - to_mdev(cq->device)->kar + MTHCA_CQ_DOORBELL, 739 + mthca_write64(dbhi, 0xffffffff, to_mdev(cq->device)->kar + MTHCA_CQ_DOORBELL, 740 740 MTHCA_GET_DOORBELL_LOCK(&to_mdev(cq->device)->doorbell_lock)); 741 741 742 742 return 0; ··· 740 750 int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) 741 751 { 742 752 struct mthca_cq *cq = to_mcq(ibcq); 743 - __be32 doorbell[2]; 744 - u32 sn; 745 - __be32 ci; 753 + __be32 db_rec[2]; 754 + u32 dbhi; 755 + u32 sn = cq->arm_sn & 3; 746 756 747 - sn = cq->arm_sn & 3; 748 - ci = cpu_to_be32(cq->cons_index); 757 + db_rec[0] = cpu_to_be32(cq->cons_index); 758 + db_rec[1] = cpu_to_be32((cq->cqn << 8) | (2 << 5) | (sn << 3) | 759 + ((flags & IB_CQ_SOLICITED_MASK) == 760 + IB_CQ_SOLICITED ? 1 : 2)); 749 761 750 - doorbell[0] = ci; 751 - doorbell[1] = cpu_to_be32((cq->cqn << 8) | (2 << 5) | (sn << 3) | 752 - ((flags & IB_CQ_SOLICITED_MASK) == 753 - IB_CQ_SOLICITED ? 1 : 2)); 754 - 755 - mthca_write_db_rec(doorbell, cq->arm_db); 762 + mthca_write_db_rec(db_rec, cq->arm_db); 756 763 757 764 /* 758 765 * Make sure that the doorbell record in host memory is ··· 757 770 */ 758 771 wmb(); 759 772 760 - doorbell[0] = cpu_to_be32((sn << 28) | 761 - ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? 762 - MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL : 763 - MTHCA_ARBEL_CQ_DB_REQ_NOT) | 764 - cq->cqn); 765 - doorbell[1] = ci; 773 + dbhi = (sn << 28) | 774 + ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? 775 + MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL : 776 + MTHCA_ARBEL_CQ_DB_REQ_NOT) | cq->cqn; 766 777 767 - mthca_write64(doorbell, 778 + mthca_write64(dbhi, cq->cons_index, 768 779 to_mdev(ibcq->device)->kar + MTHCA_CQ_DOORBELL, 769 780 MTHCA_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->doorbell_lock)); 770 781
+8 -5
drivers/infiniband/hw/mthca/mthca_doorbell.h
··· 58 58 __raw_writeq((__force u64) val, dest); 59 59 } 60 60 61 - static inline void mthca_write64(__be32 val[2], void __iomem *dest, 61 + static inline void mthca_write64(u32 hi, u32 lo, void __iomem *dest, 62 62 spinlock_t *doorbell_lock) 63 63 { 64 - __raw_writeq(*(u64 *) val, dest); 64 + __raw_writeq((__force u64) cpu_to_be64((u64) hi << 32 | lo), dest); 65 65 } 66 66 67 67 static inline void mthca_write_db_rec(__be32 val[2], __be32 *db) ··· 87 87 __raw_writel(((__force u32 *) &val)[1], dest + 4); 88 88 } 89 89 90 - static inline void mthca_write64(__be32 val[2], void __iomem *dest, 90 + static inline void mthca_write64(u32 hi, u32 lo, void __iomem *dest, 91 91 spinlock_t *doorbell_lock) 92 92 { 93 93 unsigned long flags; 94 94 95 + hi = (__force u32) cpu_to_be32(hi); 96 + lo = (__force u32) cpu_to_be32(lo); 97 + 95 98 spin_lock_irqsave(doorbell_lock, flags); 96 - __raw_writel((__force u32) val[0], dest); 97 - __raw_writel((__force u32) val[1], dest + 4); 99 + __raw_writel(hi, dest); 100 + __raw_writel(lo, dest + 4); 98 101 spin_unlock_irqrestore(doorbell_lock, flags); 99 102 } 100 103
+3 -18
drivers/infiniband/hw/mthca/mthca_eq.c
··· 173 173 174 174 static inline void tavor_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci) 175 175 { 176 - __be32 doorbell[2]; 177 - 178 - doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_SET_CI | eq->eqn); 179 - doorbell[1] = cpu_to_be32(ci & (eq->nent - 1)); 180 - 181 176 /* 182 177 * This barrier makes sure that all updates to ownership bits 183 178 * done by set_eqe_hw() hit memory before the consumer index ··· 182 187 * having set_eqe_hw() overwrite the owner field. 183 188 */ 184 189 wmb(); 185 - mthca_write64(doorbell, 190 + mthca_write64(MTHCA_EQ_DB_SET_CI | eq->eqn, ci & (eq->nent - 1), 186 191 dev->kar + MTHCA_EQ_DOORBELL, 187 192 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 188 193 } ··· 207 212 208 213 static inline void tavor_eq_req_not(struct mthca_dev *dev, int eqn) 209 214 { 210 - __be32 doorbell[2]; 211 - 212 - doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_REQ_NOT | eqn); 213 - doorbell[1] = 0; 214 - 215 - mthca_write64(doorbell, 215 + mthca_write64(MTHCA_EQ_DB_REQ_NOT | eqn, 0, 216 216 dev->kar + MTHCA_EQ_DOORBELL, 217 217 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 218 218 } ··· 220 230 static inline void disarm_cq(struct mthca_dev *dev, int eqn, int cqn) 221 231 { 222 232 if (!mthca_is_memfree(dev)) { 223 - __be32 doorbell[2]; 224 - 225 - doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_DISARM_CQ | eqn); 226 - doorbell[1] = cpu_to_be32(cqn); 227 - 228 - mthca_write64(doorbell, 233 + mthca_write64(MTHCA_EQ_DB_DISARM_CQ | eqn, cqn, 229 234 dev->kar + MTHCA_EQ_DOORBELL, 230 235 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 231 236 }
+15 -30
drivers/infiniband/hw/mthca/mthca_qp.c
··· 1799 1799 1800 1800 out: 1801 1801 if (likely(nreq)) { 1802 - __be32 doorbell[2]; 1803 - 1804 - doorbell[0] = cpu_to_be32(((qp->sq.next_ind << qp->sq.wqe_shift) + 1805 - qp->send_wqe_offset) | f0 | op0); 1806 - doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0); 1807 - 1808 1802 wmb(); 1809 1803 1810 - mthca_write64(doorbell, 1804 + mthca_write64(((qp->sq.next_ind << qp->sq.wqe_shift) + 1805 + qp->send_wqe_offset) | f0 | op0, 1806 + (qp->qpn << 8) | size0, 1811 1807 dev->kar + MTHCA_SEND_DOORBELL, 1812 1808 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 1813 1809 /* ··· 1825 1829 { 1826 1830 struct mthca_dev *dev = to_mdev(ibqp->device); 1827 1831 struct mthca_qp *qp = to_mqp(ibqp); 1828 - __be32 doorbell[2]; 1829 1832 unsigned long flags; 1830 1833 int err = 0; 1831 1834 int nreq; ··· 1902 1907 if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) { 1903 1908 nreq = 0; 1904 1909 1905 - doorbell[0] = cpu_to_be32((qp->rq.next_ind << qp->rq.wqe_shift) | size0); 1906 - doorbell[1] = cpu_to_be32(qp->qpn << 8); 1907 - 1908 1910 wmb(); 1909 1911 1910 - mthca_write64(doorbell, 1911 - dev->kar + MTHCA_RECEIVE_DOORBELL, 1912 + mthca_write64((qp->rq.next_ind << qp->rq.wqe_shift) | size0, 1913 + qp->qpn << 8, dev->kar + MTHCA_RECEIVE_DOORBELL, 1912 1914 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 1913 1915 1914 1916 qp->rq.next_ind = ind; ··· 1915 1923 1916 1924 out: 1917 1925 if (likely(nreq)) { 1918 - doorbell[0] = cpu_to_be32((qp->rq.next_ind << qp->rq.wqe_shift) | size0); 1919 - doorbell[1] = cpu_to_be32((qp->qpn << 8) | nreq); 1920 - 1921 1926 wmb(); 1922 1927 1923 - mthca_write64(doorbell, 1924 - dev->kar + MTHCA_RECEIVE_DOORBELL, 1928 + mthca_write64((qp->rq.next_ind << qp->rq.wqe_shift) | size0, 1929 + qp->qpn << 8 | nreq, dev->kar + MTHCA_RECEIVE_DOORBELL, 1925 1930 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 1926 1931 } 1927 1932 ··· 1940 1951 { 1941 1952 struct mthca_dev *dev = to_mdev(ibqp->device); 1942 1953 struct mthca_qp *qp = to_mqp(ibqp); 1943 - __be32 doorbell[2]; 1954 + u32 dbhi; 1944 1955 void *wqe; 1945 1956 void *prev_wqe; 1946 1957 unsigned long flags; ··· 1970 1981 if (unlikely(nreq == MTHCA_ARBEL_MAX_WQES_PER_SEND_DB)) { 1971 1982 nreq = 0; 1972 1983 1973 - doorbell[0] = cpu_to_be32((MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) | 1974 - ((qp->sq.head & 0xffff) << 8) | 1975 - f0 | op0); 1976 - doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0); 1984 + dbhi = (MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) | 1985 + ((qp->sq.head & 0xffff) << 8) | f0 | op0; 1977 1986 1978 1987 qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB; 1979 1988 ··· 1987 2000 * write MMIO send doorbell. 1988 2001 */ 1989 2002 wmb(); 1990 - mthca_write64(doorbell, 2003 + 2004 + mthca_write64(dbhi, (qp->qpn << 8) | size0, 1991 2005 dev->kar + MTHCA_SEND_DOORBELL, 1992 2006 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 1993 2007 } ··· 2142 2154 2143 2155 out: 2144 2156 if (likely(nreq)) { 2145 - doorbell[0] = cpu_to_be32((nreq << 24) | 2146 - ((qp->sq.head & 0xffff) << 8) | 2147 - f0 | op0); 2148 - doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0); 2157 + dbhi = (nreq << 24) | ((qp->sq.head & 0xffff) << 8) | f0 | op0; 2149 2158 2150 2159 qp->sq.head += nreq; 2151 2160 ··· 2158 2173 * write MMIO send doorbell. 2159 2174 */ 2160 2175 wmb(); 2161 - mthca_write64(doorbell, 2162 - dev->kar + MTHCA_SEND_DOORBELL, 2176 + 2177 + mthca_write64(dbhi, (qp->qpn << 8) | size0, dev->kar + MTHCA_SEND_DOORBELL, 2163 2178 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 2164 2179 } 2165 2180
+2 -9
drivers/infiniband/hw/mthca/mthca_srq.c
··· 491 491 { 492 492 struct mthca_dev *dev = to_mdev(ibsrq->device); 493 493 struct mthca_srq *srq = to_msrq(ibsrq); 494 - __be32 doorbell[2]; 495 494 unsigned long flags; 496 495 int err = 0; 497 496 int first_ind; ··· 562 563 if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) { 563 564 nreq = 0; 564 565 565 - doorbell[0] = cpu_to_be32(first_ind << srq->wqe_shift); 566 - doorbell[1] = cpu_to_be32(srq->srqn << 8); 567 - 568 566 /* 569 567 * Make sure that descriptors are written 570 568 * before doorbell is rung. 571 569 */ 572 570 wmb(); 573 571 574 - mthca_write64(doorbell, 572 + mthca_write64(first_ind << srq->wqe_shift, srq->srqn << 8, 575 573 dev->kar + MTHCA_RECEIVE_DOORBELL, 576 574 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 577 575 ··· 577 581 } 578 582 579 583 if (likely(nreq)) { 580 - doorbell[0] = cpu_to_be32(first_ind << srq->wqe_shift); 581 - doorbell[1] = cpu_to_be32((srq->srqn << 8) | nreq); 582 - 583 584 /* 584 585 * Make sure that descriptors are written before 585 586 * doorbell is rung. 586 587 */ 587 588 wmb(); 588 589 589 - mthca_write64(doorbell, 590 + mthca_write64(first_ind << srq->wqe_shift, (srq->srqn << 8) | nreq, 590 591 dev->kar + MTHCA_RECEIVE_DOORBELL, 591 592 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 592 593 }
+9 -6
drivers/infiniband/ulp/ipoib/ipoib.h
··· 84 84 IPOIB_MCAST_RUN = 6, 85 85 IPOIB_STOP_REAPER = 7, 86 86 IPOIB_MCAST_STARTED = 8, 87 - IPOIB_FLAG_NETIF_STOPPED = 9, 88 - IPOIB_FLAG_ADMIN_CM = 10, 89 - IPOIB_FLAG_UMCAST = 11, 87 + IPOIB_FLAG_ADMIN_CM = 9, 88 + IPOIB_FLAG_UMCAST = 10, 90 89 91 90 IPOIB_MAX_BACKOFF_SECONDS = 16, 92 91 ··· 97 98 98 99 #define IPOIB_OP_RECV (1ul << 31) 99 100 #ifdef CONFIG_INFINIBAND_IPOIB_CM 100 - #define IPOIB_CM_OP_SRQ (1ul << 30) 101 + #define IPOIB_OP_CM (1ul << 30) 101 102 #else 102 - #define IPOIB_CM_OP_SRQ (0) 103 + #define IPOIB_OP_CM (0) 103 104 #endif 104 105 105 106 /* structs */ ··· 196 197 197 198 struct ipoib_cm_tx { 198 199 struct ib_cm_id *id; 199 - struct ib_cq *cq; 200 200 struct ib_qp *qp; 201 201 struct list_head list; 202 202 struct net_device *dev; ··· 292 294 unsigned tx_tail; 293 295 struct ib_sge tx_sge; 294 296 struct ib_send_wr tx_wr; 297 + unsigned tx_outstanding; 295 298 296 299 struct ib_wc ibwc[IPOIB_NUM_WC]; 297 300 ··· 503 504 void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb, 504 505 unsigned int mtu); 505 506 void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc); 507 + void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc); 506 508 #else 507 509 508 510 struct ipoib_cm_tx; ··· 592 592 { 593 593 } 594 594 595 + static inline void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) 596 + { 597 + } 595 598 #endif 596 599 597 600 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+49 -71
drivers/infiniband/ulp/ipoib/ipoib_cm.c
··· 87 87 struct ib_recv_wr *bad_wr; 88 88 int i, ret; 89 89 90 - priv->cm.rx_wr.wr_id = id | IPOIB_CM_OP_SRQ; 90 + priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; 91 91 92 92 for (i = 0; i < IPOIB_CM_RX_SG; ++i) 93 93 priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i]; ··· 401 401 void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) 402 402 { 403 403 struct ipoib_dev_priv *priv = netdev_priv(dev); 404 - unsigned int wr_id = wc->wr_id & ~IPOIB_CM_OP_SRQ; 404 + unsigned int wr_id = wc->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV); 405 405 struct sk_buff *skb, *newskb; 406 406 struct ipoib_cm_rx *p; 407 407 unsigned long flags; ··· 412 412 wr_id, wc->status); 413 413 414 414 if (unlikely(wr_id >= ipoib_recvq_size)) { 415 - if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~IPOIB_CM_OP_SRQ)) { 415 + if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~(IPOIB_OP_CM | IPOIB_OP_RECV))) { 416 416 spin_lock_irqsave(&priv->lock, flags); 417 417 list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list); 418 418 ipoib_cm_start_rx_drain(priv); ··· 434 434 goto repost; 435 435 } 436 436 437 - if (!likely(wr_id & IPOIB_CM_RX_UPDATE_MASK)) { 437 + if (unlikely(!(wr_id & IPOIB_CM_RX_UPDATE_MASK))) { 438 438 p = wc->qp->qp_context; 439 439 if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) { 440 440 spin_lock_irqsave(&priv->lock, flags); ··· 498 498 priv->tx_sge.addr = addr; 499 499 priv->tx_sge.length = len; 500 500 501 - priv->tx_wr.wr_id = wr_id; 501 + priv->tx_wr.wr_id = wr_id | IPOIB_OP_CM; 502 502 503 503 return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr); 504 504 } ··· 549 549 dev->trans_start = jiffies; 550 550 ++tx->tx_head; 551 551 552 - if (tx->tx_head - tx->tx_tail == ipoib_sendq_size) { 552 + if (++priv->tx_outstanding == ipoib_sendq_size) { 553 553 ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n", 554 554 tx->qp->qp_num); 555 555 netif_stop_queue(dev); 556 - set_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags); 557 556 } 558 557 } 559 558 } 560 559 561 - static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx *tx, 562 - struct ib_wc *wc) 560 + void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) 563 561 { 564 562 struct ipoib_dev_priv *priv = netdev_priv(dev); 565 - unsigned int wr_id = wc->wr_id; 563 + struct ipoib_cm_tx *tx = wc->qp->qp_context; 564 + unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM; 566 565 struct ipoib_tx_buf *tx_req; 567 566 unsigned long flags; 568 567 ··· 586 587 587 588 spin_lock_irqsave(&priv->tx_lock, flags); 588 589 ++tx->tx_tail; 589 - if (unlikely(test_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags)) && 590 - tx->tx_head - tx->tx_tail <= ipoib_sendq_size >> 1) { 591 - clear_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags); 590 + if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) && 591 + netif_queue_stopped(dev) && 592 + test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) 592 593 netif_wake_queue(dev); 593 - } 594 594 595 595 if (wc->status != IB_WC_SUCCESS && 596 596 wc->status != IB_WC_WR_FLUSH_ERR) { ··· 612 614 tx->neigh = NULL; 613 615 } 614 616 615 - /* queue would be re-started anyway when TX is destroyed, 616 - * but it makes sense to do it ASAP here. */ 617 - if (test_and_clear_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags)) 618 - netif_wake_queue(dev); 619 - 620 617 if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { 621 618 list_move(&tx->list, &priv->cm.reap_list); 622 619 queue_work(ipoib_workqueue, &priv->cm.reap_task); ··· 623 630 } 624 631 625 632 spin_unlock_irqrestore(&priv->tx_lock, flags); 626 - } 627 - 628 - static void ipoib_cm_tx_completion(struct ib_cq *cq, void *tx_ptr) 629 - { 630 - struct ipoib_cm_tx *tx = tx_ptr; 631 - int n, i; 632 - 633 - ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 634 - do { 635 - n = ib_poll_cq(cq, IPOIB_NUM_WC, tx->ibwc); 636 - for (i = 0; i < n; ++i) 637 - ipoib_cm_handle_tx_wc(tx->dev, tx, tx->ibwc + i); 638 - } while (n == IPOIB_NUM_WC); 639 633 } 640 634 641 635 int ipoib_cm_dev_open(struct net_device *dev) ··· 787 807 return 0; 788 808 } 789 809 790 - static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ib_cq *cq) 810 + static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_cm_tx *tx) 791 811 { 792 812 struct ipoib_dev_priv *priv = netdev_priv(dev); 793 813 struct ib_qp_init_attr attr = { 794 - .send_cq = cq, 814 + .send_cq = priv->cq, 795 815 .recv_cq = priv->cq, 796 816 .srq = priv->cm.srq, 797 817 .cap.max_send_wr = ipoib_sendq_size, 798 818 .cap.max_send_sge = 1, 799 819 .sq_sig_type = IB_SIGNAL_ALL_WR, 800 820 .qp_type = IB_QPT_RC, 821 + .qp_context = tx 801 822 }; 802 823 803 824 return ib_create_qp(priv->pd, &attr); ··· 880 899 goto err_tx; 881 900 } 882 901 883 - p->cq = ib_create_cq(priv->ca, ipoib_cm_tx_completion, NULL, p, 884 - ipoib_sendq_size + 1, 0); 885 - if (IS_ERR(p->cq)) { 886 - ret = PTR_ERR(p->cq); 887 - ipoib_warn(priv, "failed to allocate tx cq: %d\n", ret); 888 - goto err_cq; 889 - } 890 - 891 - ret = ib_req_notify_cq(p->cq, IB_CQ_NEXT_COMP); 892 - if (ret) { 893 - ipoib_warn(priv, "failed to request completion notification: %d\n", ret); 894 - goto err_req_notify; 895 - } 896 - 897 - p->qp = ipoib_cm_create_tx_qp(p->dev, p->cq); 902 + p->qp = ipoib_cm_create_tx_qp(p->dev, p); 898 903 if (IS_ERR(p->qp)) { 899 904 ret = PTR_ERR(p->qp); 900 905 ipoib_warn(priv, "failed to allocate tx qp: %d\n", ret); ··· 917 950 err_id: 918 951 p->id = NULL; 919 952 ib_destroy_qp(p->qp); 920 - err_req_notify: 921 953 err_qp: 922 954 p->qp = NULL; 923 - ib_destroy_cq(p->cq); 924 - err_cq: 925 - p->cq = NULL; 926 955 err_tx: 927 956 return ret; 928 957 } ··· 927 964 { 928 965 struct ipoib_dev_priv *priv = netdev_priv(p->dev); 929 966 struct ipoib_tx_buf *tx_req; 967 + unsigned long flags; 968 + unsigned long begin; 930 969 931 970 ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n", 932 971 p->qp ? p->qp->qp_num : 0, p->tx_head, p->tx_tail); ··· 936 971 if (p->id) 937 972 ib_destroy_cm_id(p->id); 938 973 974 + if (p->tx_ring) { 975 + /* Wait for all sends to complete */ 976 + begin = jiffies; 977 + while ((int) p->tx_tail - (int) p->tx_head < 0) { 978 + if (time_after(jiffies, begin + 5 * HZ)) { 979 + ipoib_warn(priv, "timing out; %d sends not completed\n", 980 + p->tx_head - p->tx_tail); 981 + goto timeout; 982 + } 983 + 984 + msleep(1); 985 + } 986 + } 987 + 988 + timeout: 989 + 990 + while ((int) p->tx_tail - (int) p->tx_head < 0) { 991 + tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)]; 992 + ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, 993 + DMA_TO_DEVICE); 994 + dev_kfree_skb_any(tx_req->skb); 995 + ++p->tx_tail; 996 + spin_lock_irqsave(&priv->tx_lock, flags); 997 + if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) && 998 + netif_queue_stopped(p->dev) && 999 + test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) 1000 + netif_wake_queue(p->dev); 1001 + spin_unlock_irqrestore(&priv->tx_lock, flags); 1002 + } 1003 + 939 1004 if (p->qp) 940 1005 ib_destroy_qp(p->qp); 941 1006 942 - if (p->cq) 943 - ib_destroy_cq(p->cq); 944 - 945 - if (test_bit(IPOIB_FLAG_NETIF_STOPPED, &p->flags)) 946 - netif_wake_queue(p->dev); 947 - 948 - if (p->tx_ring) { 949 - while ((int) p->tx_tail - (int) p->tx_head < 0) { 950 - tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)]; 951 - ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, 952 - DMA_TO_DEVICE); 953 - dev_kfree_skb_any(tx_req->skb); 954 - ++p->tx_tail; 955 - } 956 - 957 - kfree(p->tx_ring); 958 - } 959 - 1007 + kfree(p->tx_ring); 960 1008 kfree(p); 961 1009 } 962 1010
+31 -21
drivers/infiniband/ulp/ipoib/ipoib_ib.c
··· 267 267 268 268 spin_lock_irqsave(&priv->tx_lock, flags); 269 269 ++priv->tx_tail; 270 - if (unlikely(test_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags)) && 271 - priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1) { 272 - clear_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags); 270 + if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) && 271 + netif_queue_stopped(dev) && 272 + test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) 273 273 netif_wake_queue(dev); 274 - } 275 274 spin_unlock_irqrestore(&priv->tx_lock, flags); 276 275 277 276 if (wc->status != IB_WC_SUCCESS && ··· 300 301 for (i = 0; i < n; i++) { 301 302 struct ib_wc *wc = priv->ibwc + i; 302 303 303 - if (wc->wr_id & IPOIB_CM_OP_SRQ) { 304 + if (wc->wr_id & IPOIB_OP_RECV) { 304 305 ++done; 305 - ipoib_cm_handle_rx_wc(dev, wc); 306 - } else if (wc->wr_id & IPOIB_OP_RECV) { 307 - ++done; 308 - ipoib_ib_handle_rx_wc(dev, wc); 309 - } else 310 - ipoib_ib_handle_tx_wc(dev, wc); 306 + if (wc->wr_id & IPOIB_OP_CM) 307 + ipoib_cm_handle_rx_wc(dev, wc); 308 + else 309 + ipoib_ib_handle_rx_wc(dev, wc); 310 + } else { 311 + if (wc->wr_id & IPOIB_OP_CM) 312 + ipoib_cm_handle_tx_wc(dev, wc); 313 + else 314 + ipoib_ib_handle_tx_wc(dev, wc); 315 + } 311 316 } 312 317 313 318 if (n != t) ··· 404 401 address->last_send = priv->tx_head; 405 402 ++priv->tx_head; 406 403 407 - if (priv->tx_head - priv->tx_tail == ipoib_sendq_size) { 404 + if (++priv->tx_outstanding == ipoib_sendq_size) { 408 405 ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); 409 406 netif_stop_queue(dev); 410 - set_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags); 411 407 } 412 408 } 413 409 } ··· 438 436 __ipoib_reap_ah(dev); 439 437 440 438 if (!test_bit(IPOIB_STOP_REAPER, &priv->flags)) 441 - queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ); 439 + queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, 440 + round_jiffies_relative(HZ)); 442 441 } 443 442 444 443 int ipoib_ib_dev_open(struct net_device *dev) ··· 475 472 } 476 473 477 474 clear_bit(IPOIB_STOP_REAPER, &priv->flags); 478 - queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ); 475 + queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, 476 + round_jiffies_relative(HZ)); 479 477 480 478 set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); 481 479 ··· 565 561 if (priv->ibwc[i].status == IB_WC_SUCCESS) 566 562 priv->ibwc[i].status = IB_WC_WR_FLUSH_ERR; 567 563 568 - if (priv->ibwc[i].wr_id & IPOIB_CM_OP_SRQ) 569 - ipoib_cm_handle_rx_wc(dev, priv->ibwc + i); 570 - else if (priv->ibwc[i].wr_id & IPOIB_OP_RECV) 571 - ipoib_ib_handle_rx_wc(dev, priv->ibwc + i); 572 - else 573 - ipoib_ib_handle_tx_wc(dev, priv->ibwc + i); 564 + if (priv->ibwc[i].wr_id & IPOIB_OP_RECV) { 565 + if (priv->ibwc[i].wr_id & IPOIB_OP_CM) 566 + ipoib_cm_handle_rx_wc(dev, priv->ibwc + i); 567 + else 568 + ipoib_ib_handle_rx_wc(dev, priv->ibwc + i); 569 + } else { 570 + if (priv->ibwc[i].wr_id & IPOIB_OP_CM) 571 + ipoib_cm_handle_tx_wc(dev, priv->ibwc + i); 572 + else 573 + ipoib_ib_handle_tx_wc(dev, priv->ibwc + i); 574 + } 574 575 } 575 576 } while (n == IPOIB_NUM_WC); 576 577 } ··· 621 612 DMA_TO_DEVICE); 622 613 dev_kfree_skb_any(tx_req->skb); 623 614 ++priv->tx_tail; 615 + --priv->tx_outstanding; 624 616 } 625 617 626 618 for (i = 0; i < ipoib_recvq_size; ++i) {
+1 -3
drivers/infiniband/ulp/ipoib/ipoib_main.c
··· 148 148 149 149 netif_stop_queue(dev); 150 150 151 - clear_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags); 152 - 153 151 /* 154 152 * Now flush workqueue to make sure a scheduled task doesn't 155 153 * bring our internal state back up. ··· 900 902 goto out_rx_ring_cleanup; 901 903 } 902 904 903 - /* priv->tx_head & tx_tail are already 0 */ 905 + /* priv->tx_head, tx_tail & tx_outstanding are already 0 */ 904 906 905 907 if (ipoib_ib_dev_init(dev, ca, port)) 906 908 goto out_tx_ring_cleanup;
+1 -1
drivers/net/mlx4/fw.c
··· 736 736 MLX4_PUT(inbox, (u8) (PAGE_SHIFT - 12), INIT_HCA_UAR_PAGE_SZ_OFFSET); 737 737 MLX4_PUT(inbox, param->log_uar_sz, INIT_HCA_LOG_UAR_SZ_OFFSET); 738 738 739 - err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_INIT_HCA, 1000); 739 + err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_INIT_HCA, 10000); 740 740 741 741 if (err) 742 742 mlx4_err(dev, "INIT_HCA returns %d\n", err);
-11
include/linux/mlx4/doorbell.h
··· 52 52 #define MLX4_INIT_DOORBELL_LOCK(ptr) do { } while (0) 53 53 #define MLX4_GET_DOORBELL_LOCK(ptr) (NULL) 54 54 55 - static inline void mlx4_write64_raw(__be64 val, void __iomem *dest) 56 - { 57 - __raw_writeq((__force u64) val, dest); 58 - } 59 - 60 55 static inline void mlx4_write64(__be32 val[2], void __iomem *dest, 61 56 spinlock_t *doorbell_lock) 62 57 { ··· 69 74 #define MLX4_DECLARE_DOORBELL_LOCK(name) spinlock_t name; 70 75 #define MLX4_INIT_DOORBELL_LOCK(ptr) spin_lock_init(ptr) 71 76 #define MLX4_GET_DOORBELL_LOCK(ptr) (ptr) 72 - 73 - static inline void mlx4_write64_raw(__be64 val, void __iomem *dest) 74 - { 75 - __raw_writel(((__force u32 *) &val)[0], dest); 76 - __raw_writel(((__force u32 *) &val)[1], dest + 4); 77 - } 78 77 79 78 static inline void mlx4_write64(__be32 val[2], void __iomem *dest, 80 79 spinlock_t *doorbell_lock)