Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branches 'ib_core', 'ib_ipoib', 'srpt', 'drain-cq-v4' and 'net/9p' into k.o/for-4.6

+621 -747
+164
drivers/infiniband/core/verbs.c
··· 1657 1657 return i; 1658 1658 } 1659 1659 EXPORT_SYMBOL(ib_sg_to_pages); 1660 + 1661 + struct ib_drain_cqe { 1662 + struct ib_cqe cqe; 1663 + struct completion done; 1664 + }; 1665 + 1666 + static void ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc) 1667 + { 1668 + struct ib_drain_cqe *cqe = container_of(wc->wr_cqe, struct ib_drain_cqe, 1669 + cqe); 1670 + 1671 + complete(&cqe->done); 1672 + } 1673 + 1674 + /* 1675 + * Post a WR and block until its completion is reaped for the SQ. 1676 + */ 1677 + static void __ib_drain_sq(struct ib_qp *qp) 1678 + { 1679 + struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; 1680 + struct ib_drain_cqe sdrain; 1681 + struct ib_send_wr swr = {}, *bad_swr; 1682 + int ret; 1683 + 1684 + if (qp->send_cq->poll_ctx == IB_POLL_DIRECT) { 1685 + WARN_ONCE(qp->send_cq->poll_ctx == IB_POLL_DIRECT, 1686 + "IB_POLL_DIRECT poll_ctx not supported for drain\n"); 1687 + return; 1688 + } 1689 + 1690 + swr.wr_cqe = &sdrain.cqe; 1691 + sdrain.cqe.done = ib_drain_qp_done; 1692 + init_completion(&sdrain.done); 1693 + 1694 + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); 1695 + if (ret) { 1696 + WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); 1697 + return; 1698 + } 1699 + 1700 + ret = ib_post_send(qp, &swr, &bad_swr); 1701 + if (ret) { 1702 + WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); 1703 + return; 1704 + } 1705 + 1706 + wait_for_completion(&sdrain.done); 1707 + } 1708 + 1709 + /* 1710 + * Post a WR and block until its completion is reaped for the RQ. 1711 + */ 1712 + static void __ib_drain_rq(struct ib_qp *qp) 1713 + { 1714 + struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; 1715 + struct ib_drain_cqe rdrain; 1716 + struct ib_recv_wr rwr = {}, *bad_rwr; 1717 + int ret; 1718 + 1719 + if (qp->recv_cq->poll_ctx == IB_POLL_DIRECT) { 1720 + WARN_ONCE(qp->recv_cq->poll_ctx == IB_POLL_DIRECT, 1721 + "IB_POLL_DIRECT poll_ctx not supported for drain\n"); 1722 + return; 1723 + } 1724 + 1725 + rwr.wr_cqe = &rdrain.cqe; 1726 + rdrain.cqe.done = ib_drain_qp_done; 1727 + init_completion(&rdrain.done); 1728 + 1729 + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); 1730 + if (ret) { 1731 + WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); 1732 + return; 1733 + } 1734 + 1735 + ret = ib_post_recv(qp, &rwr, &bad_rwr); 1736 + if (ret) { 1737 + WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); 1738 + return; 1739 + } 1740 + 1741 + wait_for_completion(&rdrain.done); 1742 + } 1743 + 1744 + /** 1745 + * ib_drain_sq() - Block until all SQ CQEs have been consumed by the 1746 + * application. 1747 + * @qp: queue pair to drain 1748 + * 1749 + * If the device has a provider-specific drain function, then 1750 + * call that. Otherwise call the generic drain function 1751 + * __ib_drain_sq(). 1752 + * 1753 + * The caller must: 1754 + * 1755 + * ensure there is room in the CQ and SQ for the drain work request and 1756 + * completion. 1757 + * 1758 + * allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be 1759 + * IB_POLL_DIRECT. 1760 + * 1761 + * ensure that there are no other contexts that are posting WRs concurrently. 1762 + * Otherwise the drain is not guaranteed. 1763 + */ 1764 + void ib_drain_sq(struct ib_qp *qp) 1765 + { 1766 + if (qp->device->drain_sq) 1767 + qp->device->drain_sq(qp); 1768 + else 1769 + __ib_drain_sq(qp); 1770 + } 1771 + EXPORT_SYMBOL(ib_drain_sq); 1772 + 1773 + /** 1774 + * ib_drain_rq() - Block until all RQ CQEs have been consumed by the 1775 + * application. 1776 + * @qp: queue pair to drain 1777 + * 1778 + * If the device has a provider-specific drain function, then 1779 + * call that. Otherwise call the generic drain function 1780 + * __ib_drain_rq(). 1781 + * 1782 + * The caller must: 1783 + * 1784 + * ensure there is room in the CQ and RQ for the drain work request and 1785 + * completion. 1786 + * 1787 + * allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be 1788 + * IB_POLL_DIRECT. 1789 + * 1790 + * ensure that there are no other contexts that are posting WRs concurrently. 1791 + * Otherwise the drain is not guaranteed. 1792 + */ 1793 + void ib_drain_rq(struct ib_qp *qp) 1794 + { 1795 + if (qp->device->drain_rq) 1796 + qp->device->drain_rq(qp); 1797 + else 1798 + __ib_drain_rq(qp); 1799 + } 1800 + EXPORT_SYMBOL(ib_drain_rq); 1801 + 1802 + /** 1803 + * ib_drain_qp() - Block until all CQEs have been consumed by the 1804 + * application on both the RQ and SQ. 1805 + * @qp: queue pair to drain 1806 + * 1807 + * The caller must: 1808 + * 1809 + * ensure there is room in the CQ(s), SQ, and RQ for drain work requests 1810 + * and completions. 1811 + * 1812 + * allocate the CQs using ib_alloc_cq() and the CQ poll context cannot be 1813 + * IB_POLL_DIRECT. 1814 + * 1815 + * ensure that there are no other contexts that are posting WRs concurrently. 1816 + * Otherwise the drain is not guaranteed. 1817 + */ 1818 + void ib_drain_qp(struct ib_qp *qp) 1819 + { 1820 + ib_drain_sq(qp); 1821 + ib_drain_rq(qp); 1822 + } 1823 + EXPORT_SYMBOL(ib_drain_qp);
+8 -1
drivers/infiniband/hw/cxgb4/cq.c
··· 815 815 } 816 816 } 817 817 out: 818 - if (wq) 818 + if (wq) { 819 + if (unlikely(qhp->attr.state != C4IW_QP_STATE_RTS)) { 820 + if (t4_sq_empty(wq)) 821 + complete(&qhp->sq_drained); 822 + if (t4_rq_empty(wq)) 823 + complete(&qhp->rq_drained); 824 + } 819 825 spin_unlock(&qhp->lock); 826 + } 820 827 return ret; 821 828 } 822 829
+4
drivers/infiniband/hw/cxgb4/iw_cxgb4.h
··· 476 476 wait_queue_head_t wait; 477 477 struct timer_list timer; 478 478 int sq_sig_all; 479 + struct completion rq_drained; 480 + struct completion sq_drained; 479 481 }; 480 482 481 483 static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp) ··· 1018 1016 extern int db_fc_threshold; 1019 1017 extern int db_coalescing_threshold; 1020 1018 extern int use_dsgl; 1019 + void c4iw_drain_rq(struct ib_qp *qp); 1020 + void c4iw_drain_sq(struct ib_qp *qp); 1021 1021 1022 1022 1023 1023 #endif
+2
drivers/infiniband/hw/cxgb4/provider.c
··· 564 564 dev->ibdev.get_protocol_stats = c4iw_get_mib; 565 565 dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION; 566 566 dev->ibdev.get_port_immutable = c4iw_port_immutable; 567 + dev->ibdev.drain_sq = c4iw_drain_sq; 568 + dev->ibdev.drain_rq = c4iw_drain_rq; 567 569 568 570 dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); 569 571 if (!dev->ibdev.iwcm)
+16
drivers/infiniband/hw/cxgb4/qp.c
··· 1697 1697 qhp->attr.max_ird = 0; 1698 1698 qhp->sq_sig_all = attrs->sq_sig_type == IB_SIGNAL_ALL_WR; 1699 1699 spin_lock_init(&qhp->lock); 1700 + init_completion(&qhp->sq_drained); 1701 + init_completion(&qhp->rq_drained); 1700 1702 mutex_init(&qhp->mutex); 1701 1703 init_waitqueue_head(&qhp->wait); 1702 1704 atomic_set(&qhp->refcnt, 1); ··· 1889 1887 init_attr->cap.max_inline_data = T4_MAX_SEND_INLINE; 1890 1888 init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0; 1891 1889 return 0; 1890 + } 1891 + 1892 + void c4iw_drain_sq(struct ib_qp *ibqp) 1893 + { 1894 + struct c4iw_qp *qp = to_c4iw_qp(ibqp); 1895 + 1896 + wait_for_completion(&qp->sq_drained); 1897 + } 1898 + 1899 + void c4iw_drain_rq(struct ib_qp *ibqp) 1900 + { 1901 + struct c4iw_qp *qp = to_c4iw_qp(ibqp); 1902 + 1903 + wait_for_completion(&qp->rq_drained); 1892 1904 }
+2
drivers/infiniband/ulp/ipoib/ipoib.h
··· 244 244 unsigned tx_tail; 245 245 unsigned long flags; 246 246 u32 mtu; 247 + unsigned max_send_sge; 247 248 }; 248 249 249 250 struct ipoib_cm_rx_buf { ··· 391 390 int hca_caps; 392 391 struct ipoib_ethtool_st ethtool; 393 392 struct timer_list poll_timer; 393 + unsigned max_send_sge; 394 394 }; 395 395 396 396 struct ipoib_ah {
+21 -2
drivers/infiniband/ulp/ipoib/ipoib_cm.c
··· 710 710 struct ipoib_dev_priv *priv = netdev_priv(dev); 711 711 struct ipoib_tx_buf *tx_req; 712 712 int rc; 713 + unsigned usable_sge = tx->max_send_sge - !!skb_headlen(skb); 713 714 714 715 if (unlikely(skb->len > tx->mtu)) { 715 716 ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", ··· 720 719 ipoib_cm_skb_too_long(dev, skb, tx->mtu - IPOIB_ENCAP_LEN); 721 720 return; 722 721 } 723 - 722 + if (skb_shinfo(skb)->nr_frags > usable_sge) { 723 + if (skb_linearize(skb) < 0) { 724 + ipoib_warn(priv, "skb could not be linearized\n"); 725 + ++dev->stats.tx_dropped; 726 + ++dev->stats.tx_errors; 727 + dev_kfree_skb_any(skb); 728 + return; 729 + } 730 + /* Does skb_linearize return ok without reducing nr_frags? */ 731 + if (skb_shinfo(skb)->nr_frags > usable_sge) { 732 + ipoib_warn(priv, "too many frags after skb linearize\n"); 733 + ++dev->stats.tx_dropped; 734 + ++dev->stats.tx_errors; 735 + dev_kfree_skb_any(skb); 736 + return; 737 + } 738 + } 724 739 ipoib_dbg_data(priv, "sending packet: head 0x%x length %d connection 0x%x\n", 725 740 tx->tx_head, skb->len, tx->qp->qp_num); 726 741 ··· 1048 1031 struct ib_qp *tx_qp; 1049 1032 1050 1033 if (dev->features & NETIF_F_SG) 1051 - attr.cap.max_send_sge = MAX_SKB_FRAGS + 1; 1034 + attr.cap.max_send_sge = 1035 + min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1); 1052 1036 1053 1037 tx_qp = ib_create_qp(priv->pd, &attr); 1054 1038 if (PTR_ERR(tx_qp) == -EINVAL) { ··· 1058 1040 attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO; 1059 1041 tx_qp = ib_create_qp(priv->pd, &attr); 1060 1042 } 1043 + tx->max_send_sge = attr.cap.max_send_sge; 1061 1044 return tx_qp; 1062 1045 } 1063 1046
+18
drivers/infiniband/ulp/ipoib/ipoib_ib.c
··· 538 538 struct ipoib_tx_buf *tx_req; 539 539 int hlen, rc; 540 540 void *phead; 541 + unsigned usable_sge = priv->max_send_sge - !!skb_headlen(skb); 541 542 542 543 if (skb_is_gso(skb)) { 543 544 hlen = skb_transport_offset(skb) + tcp_hdrlen(skb); ··· 561 560 } 562 561 phead = NULL; 563 562 hlen = 0; 563 + } 564 + if (skb_shinfo(skb)->nr_frags > usable_sge) { 565 + if (skb_linearize(skb) < 0) { 566 + ipoib_warn(priv, "skb could not be linearized\n"); 567 + ++dev->stats.tx_dropped; 568 + ++dev->stats.tx_errors; 569 + dev_kfree_skb_any(skb); 570 + return; 571 + } 572 + /* Does skb_linearize return ok without reducing nr_frags? */ 573 + if (skb_shinfo(skb)->nr_frags > usable_sge) { 574 + ipoib_warn(priv, "too many frags after skb linearize\n"); 575 + ++dev->stats.tx_dropped; 576 + ++dev->stats.tx_errors; 577 + dev_kfree_skb_any(skb); 578 + return; 579 + } 564 580 } 565 581 566 582 ipoib_dbg_data(priv, "sending packet, length=%d address=%p qpn=0x%06x\n",
+4 -1
drivers/infiniband/ulp/ipoib/ipoib_verbs.c
··· 206 206 init_attr.create_flags |= IB_QP_CREATE_NETIF_QP; 207 207 208 208 if (dev->features & NETIF_F_SG) 209 - init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1; 209 + init_attr.cap.max_send_sge = 210 + min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1); 210 211 211 212 priv->qp = ib_create_qp(priv->pd, &init_attr); 212 213 if (IS_ERR(priv->qp)) { ··· 233 232 234 233 priv->rx_wr.next = NULL; 235 234 priv->rx_wr.sg_list = priv->rx_sge; 235 + 236 + priv->max_send_sge = init_attr.cap.max_send_sge; 236 237 237 238 return 0; 238 239
-7
drivers/infiniband/ulp/iser/iscsi_iser.h
··· 458 458 * @comp: iser completion context 459 459 * @fr_pool: connection fast registration poool 460 460 * @pi_support: Indicate device T10-PI support 461 - * @last: last send wr to signal all flush errors were drained 462 - * @last_cqe: cqe handler for last wr 463 - * @last_comp: completes when all connection completions consumed 464 461 */ 465 462 struct ib_conn { 466 463 struct rdma_cm_id *cma_id; ··· 469 472 struct iser_comp *comp; 470 473 struct iser_fr_pool fr_pool; 471 474 bool pi_support; 472 - struct ib_send_wr last; 473 - struct ib_cqe last_cqe; 474 475 struct ib_cqe reg_cqe; 475 - struct completion last_comp; 476 476 }; 477 477 478 478 /** ··· 611 617 void iser_ctrl_comp(struct ib_cq *cq, struct ib_wc *wc); 612 618 void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc); 613 619 void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc); 614 - void iser_last_comp(struct ib_cq *cq, struct ib_wc *wc); 615 620 616 621 void iser_task_rdma_init(struct iscsi_iser_task *task); 617 622
-7
drivers/infiniband/ulp/iser/iser_initiator.c
··· 729 729 kmem_cache_free(ig.desc_cache, desc); 730 730 } 731 731 732 - void iser_last_comp(struct ib_cq *cq, struct ib_wc *wc) 733 - { 734 - struct ib_conn *ib_conn = wc->qp->qp_context; 735 - 736 - complete(&ib_conn->last_comp); 737 - } 738 - 739 732 void iser_task_rdma_init(struct iscsi_iser_task *iser_task) 740 733 741 734 {
+2 -13
drivers/infiniband/ulp/iser/iser_verbs.c
··· 663 663 int iser_conn_terminate(struct iser_conn *iser_conn) 664 664 { 665 665 struct ib_conn *ib_conn = &iser_conn->ib_conn; 666 - struct ib_send_wr *bad_wr; 667 666 int err = 0; 668 667 669 668 /* terminate the iser conn only if the conn state is UP */ ··· 687 688 iser_err("Failed to disconnect, conn: 0x%p err %d\n", 688 689 iser_conn, err); 689 690 690 - /* post an indication that all flush errors were consumed */ 691 - err = ib_post_send(ib_conn->qp, &ib_conn->last, &bad_wr); 692 - if (err) { 693 - iser_err("conn %p failed to post last wr", ib_conn); 694 - return 1; 695 - } 696 - 697 - wait_for_completion(&ib_conn->last_comp); 691 + /* block until all flush errors are consumed */ 692 + ib_drain_sq(ib_conn->qp); 698 693 } 699 694 700 695 return 1; ··· 947 954 948 955 ib_conn->post_recv_buf_count = 0; 949 956 ib_conn->reg_cqe.done = iser_reg_comp; 950 - ib_conn->last_cqe.done = iser_last_comp; 951 - ib_conn->last.wr_cqe = &ib_conn->last_cqe; 952 - ib_conn->last.opcode = IB_WR_SEND; 953 - init_completion(&ib_conn->last_comp); 954 957 } 955 958 956 959 /**
+4 -36
drivers/infiniband/ulp/srp/ib_srp.c
··· 446 446 dev->max_pages_per_mr); 447 447 } 448 448 449 - static void srp_drain_done(struct ib_cq *cq, struct ib_wc *wc) 450 - { 451 - struct srp_rdma_ch *ch = cq->cq_context; 452 - 453 - complete(&ch->done); 454 - } 455 - 456 - static struct ib_cqe srp_drain_cqe = { 457 - .done = srp_drain_done, 458 - }; 459 - 460 449 /** 461 450 * srp_destroy_qp() - destroy an RDMA queue pair 462 451 * @ch: SRP RDMA channel. 463 452 * 464 - * Change a queue pair into the error state and wait until all receive 465 - * completions have been processed before destroying it. This avoids that 466 - * the receive completion handler can access the queue pair while it is 453 + * Drain the qp before destroying it. This avoids that the receive 454 + * completion handler can access the queue pair while it is 467 455 * being destroyed. 468 456 */ 469 457 static void srp_destroy_qp(struct srp_rdma_ch *ch) 470 458 { 471 - static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; 472 - static struct ib_recv_wr wr = { 0 }; 473 - struct ib_recv_wr *bad_wr; 474 - int ret; 475 - 476 - wr.wr_cqe = &srp_drain_cqe; 477 - /* Destroying a QP and reusing ch->done is only safe if not connected */ 478 - WARN_ON_ONCE(ch->connected); 479 - 480 - ret = ib_modify_qp(ch->qp, &attr, IB_QP_STATE); 481 - WARN_ONCE(ret, "ib_cm_init_qp_attr() returned %d\n", ret); 482 - if (ret) 483 - goto out; 484 - 485 - init_completion(&ch->done); 486 - ret = ib_post_recv(ch->qp, &wr, &bad_wr); 487 - WARN_ONCE(ret, "ib_post_recv() returned %d\n", ret); 488 - if (ret == 0) 489 - wait_for_completion(&ch->done); 490 - 491 - out: 459 + ib_drain_rq(ch->qp); 492 460 ib_destroy_qp(ch->qp); 493 461 } 494 462 ··· 476 508 if (!init_attr) 477 509 return -ENOMEM; 478 510 479 - /* queue_size + 1 for ib_drain_qp */ 511 + /* queue_size + 1 for ib_drain_rq() */ 480 512 recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1, 481 513 ch->comp_vector, IB_POLL_SOFTIRQ); 482 514 if (IS_ERR(recv_cq)) {
+328 -606
drivers/infiniband/ulp/srpt/ib_srpt.c
··· 91 91 " instead of using the node_guid of the first HCA."); 92 92 93 93 static struct ib_client srpt_client; 94 - static void srpt_release_channel(struct srpt_rdma_ch *ch); 94 + static void srpt_release_cmd(struct se_cmd *se_cmd); 95 + static void srpt_free_ch(struct kref *kref); 95 96 static int srpt_queue_status(struct se_cmd *cmd); 96 97 static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc); 97 98 static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc); 99 + static void srpt_process_wait_list(struct srpt_rdma_ch *ch); 98 100 99 - /** 100 - * opposite_dma_dir() - Swap DMA_TO_DEVICE and DMA_FROM_DEVICE. 101 + /* 102 + * The only allowed channel state changes are those that change the channel 103 + * state into a state with a higher numerical value. Hence the new > prev test. 101 104 */ 102 - static inline 103 - enum dma_data_direction opposite_dma_dir(enum dma_data_direction dir) 104 - { 105 - switch (dir) { 106 - case DMA_TO_DEVICE: return DMA_FROM_DEVICE; 107 - case DMA_FROM_DEVICE: return DMA_TO_DEVICE; 108 - default: return dir; 109 - } 110 - } 111 - 112 - /** 113 - * srpt_sdev_name() - Return the name associated with the HCA. 114 - * 115 - * Examples are ib0, ib1, ... 116 - */ 117 - static inline const char *srpt_sdev_name(struct srpt_device *sdev) 118 - { 119 - return sdev->device->name; 120 - } 121 - 122 - static enum rdma_ch_state srpt_get_ch_state(struct srpt_rdma_ch *ch) 123 - { 124 - unsigned long flags; 125 - enum rdma_ch_state state; 126 - 127 - spin_lock_irqsave(&ch->spinlock, flags); 128 - state = ch->state; 129 - spin_unlock_irqrestore(&ch->spinlock, flags); 130 - return state; 131 - } 132 - 133 - static enum rdma_ch_state 134 - srpt_set_ch_state(struct srpt_rdma_ch *ch, enum rdma_ch_state new_state) 105 + static bool srpt_set_ch_state(struct srpt_rdma_ch *ch, enum rdma_ch_state new) 135 106 { 136 107 unsigned long flags; 137 108 enum rdma_ch_state prev; 109 + bool changed = false; 138 110 139 111 spin_lock_irqsave(&ch->spinlock, flags); 140 112 prev = ch->state; 141 - ch->state = new_state; 142 - spin_unlock_irqrestore(&ch->spinlock, flags); 143 - return prev; 144 - } 145 - 146 - /** 147 - * srpt_test_and_set_ch_state() - Test and set the channel state. 148 - * 149 - * Returns true if and only if the channel state has been set to the new state. 150 - */ 151 - static bool 152 - srpt_test_and_set_ch_state(struct srpt_rdma_ch *ch, enum rdma_ch_state old, 153 - enum rdma_ch_state new) 154 - { 155 - unsigned long flags; 156 - enum rdma_ch_state prev; 157 - 158 - spin_lock_irqsave(&ch->spinlock, flags); 159 - prev = ch->state; 160 - if (prev == old) 113 + if (new > prev) { 161 114 ch->state = new; 115 + changed = true; 116 + } 162 117 spin_unlock_irqrestore(&ch->spinlock, flags); 163 - return prev == old; 118 + 119 + return changed; 164 120 } 165 121 166 122 /** ··· 138 182 return; 139 183 140 184 pr_debug("ASYNC event= %d on device= %s\n", event->event, 141 - srpt_sdev_name(sdev)); 185 + sdev->device->name); 142 186 143 187 switch (event->event) { 144 188 case IB_EVENT_PORT_ERR: ··· 176 220 pr_info("SRQ event %d\n", event->event); 177 221 } 178 222 223 + static const char *get_ch_state_name(enum rdma_ch_state s) 224 + { 225 + switch (s) { 226 + case CH_CONNECTING: 227 + return "connecting"; 228 + case CH_LIVE: 229 + return "live"; 230 + case CH_DISCONNECTING: 231 + return "disconnecting"; 232 + case CH_DRAINING: 233 + return "draining"; 234 + case CH_DISCONNECTED: 235 + return "disconnected"; 236 + } 237 + return "???"; 238 + } 239 + 179 240 /** 180 241 * srpt_qp_event() - QP event callback function. 181 242 */ 182 243 static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch) 183 244 { 184 245 pr_debug("QP event %d on cm_id=%p sess_name=%s state=%d\n", 185 - event->event, ch->cm_id, ch->sess_name, srpt_get_ch_state(ch)); 246 + event->event, ch->cm_id, ch->sess_name, ch->state); 186 247 187 248 switch (event->event) { 188 249 case IB_EVENT_COMM_EST: 189 250 ib_cm_notify(ch->cm_id, event->event); 190 251 break; 191 252 case IB_EVENT_QP_LAST_WQE_REACHED: 192 - if (srpt_test_and_set_ch_state(ch, CH_DRAINING, 193 - CH_RELEASING)) 194 - srpt_release_channel(ch); 195 - else 196 - pr_debug("%s: state %d - ignored LAST_WQE.\n", 197 - ch->sess_name, srpt_get_ch_state(ch)); 253 + pr_debug("%s-%d, state %s: received Last WQE event.\n", 254 + ch->sess_name, ch->qp->qp_num, 255 + get_ch_state_name(ch->state)); 198 256 break; 199 257 default: 200 258 pr_err("received unrecognized IB QP event %d\n", event->event); ··· 251 281 struct ib_class_port_info *cif; 252 282 253 283 cif = (struct ib_class_port_info *)mad->data; 254 - memset(cif, 0, sizeof *cif); 284 + memset(cif, 0, sizeof(*cif)); 255 285 cif->base_version = 1; 256 286 cif->class_version = 1; 257 287 cif->resp_time_value = 20; ··· 310 340 return; 311 341 } 312 342 313 - memset(iocp, 0, sizeof *iocp); 343 + memset(iocp, 0, sizeof(*iocp)); 314 344 strcpy(iocp->id_string, SRPT_ID_STRING); 315 345 iocp->guid = cpu_to_be64(srpt_service_guid); 316 346 iocp->vendor_id = cpu_to_be32(sdev->device->attrs.vendor_id); ··· 360 390 } 361 391 362 392 svc_entries = (struct ib_dm_svc_entries *)mad->data; 363 - memset(svc_entries, 0, sizeof *svc_entries); 393 + memset(svc_entries, 0, sizeof(*svc_entries)); 364 394 svc_entries->service_entries[0].id = cpu_to_be64(ioc_guid); 365 395 snprintf(svc_entries->service_entries[0].name, 366 396 sizeof(svc_entries->service_entries[0].name), ··· 454 484 rsp->ah = ah; 455 485 456 486 dm_mad = rsp->mad; 457 - memcpy(dm_mad, mad_wc->recv_buf.mad, sizeof *dm_mad); 487 + memcpy(dm_mad, mad_wc->recv_buf.mad, sizeof(*dm_mad)); 458 488 dm_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP; 459 489 dm_mad->mad_hdr.status = 0; 460 490 ··· 502 532 struct ib_port_attr port_attr; 503 533 int ret; 504 534 505 - memset(&port_modify, 0, sizeof port_modify); 535 + memset(&port_modify, 0, sizeof(port_modify)); 506 536 port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP; 507 537 port_modify.clr_port_cap_mask = 0; 508 538 ··· 523 553 goto err_query_port; 524 554 525 555 if (!sport->mad_agent) { 526 - memset(&reg_req, 0, sizeof reg_req); 556 + memset(&reg_req, 0, sizeof(reg_req)); 527 557 reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT; 528 558 reg_req.mgmt_class_version = IB_MGMT_BASE_VERSION; 529 559 set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask); ··· 811 841 } 812 842 813 843 /** 844 + * srpt_zerolength_write() - Perform a zero-length RDMA write. 845 + * 846 + * A quote from the InfiniBand specification: C9-88: For an HCA responder 847 + * using Reliable Connection service, for each zero-length RDMA READ or WRITE 848 + * request, the R_Key shall not be validated, even if the request includes 849 + * Immediate data. 850 + */ 851 + static int srpt_zerolength_write(struct srpt_rdma_ch *ch) 852 + { 853 + struct ib_send_wr wr, *bad_wr; 854 + 855 + memset(&wr, 0, sizeof(wr)); 856 + wr.opcode = IB_WR_RDMA_WRITE; 857 + wr.wr_cqe = &ch->zw_cqe; 858 + wr.send_flags = IB_SEND_SIGNALED; 859 + return ib_post_send(ch->qp, &wr, &bad_wr); 860 + } 861 + 862 + static void srpt_zerolength_write_done(struct ib_cq *cq, struct ib_wc *wc) 863 + { 864 + struct srpt_rdma_ch *ch = cq->cq_context; 865 + 866 + if (wc->status == IB_WC_SUCCESS) { 867 + srpt_process_wait_list(ch); 868 + } else { 869 + if (srpt_set_ch_state(ch, CH_DISCONNECTED)) 870 + schedule_work(&ch->release_work); 871 + else 872 + WARN_ONCE("%s-%d\n", ch->sess_name, ch->qp->qp_num); 873 + } 874 + } 875 + 876 + /** 814 877 * srpt_get_desc_tbl() - Parse the data descriptors of an SRP_CMD request. 815 878 * @ioctx: Pointer to the I/O context associated with the request. 816 879 * @srp_cmd: Pointer to the SRP_CMD request data. ··· 906 903 907 904 db = (struct srp_direct_buf *)(srp_cmd->add_data 908 905 + add_cdb_offset); 909 - memcpy(ioctx->rbufs, db, sizeof *db); 906 + memcpy(ioctx->rbufs, db, sizeof(*db)); 910 907 *data_len = be32_to_cpu(db->len); 911 908 } else if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_INDIRECT) || 912 909 ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_INDIRECT)) { 913 910 idb = (struct srp_indirect_buf *)(srp_cmd->add_data 914 911 + add_cdb_offset); 915 912 916 - ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof *db; 913 + ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof(*db); 917 914 918 915 if (ioctx->n_rbuf > 919 916 (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) { ··· 932 929 ioctx->rbufs = &ioctx->single_rbuf; 933 930 else { 934 931 ioctx->rbufs = 935 - kmalloc(ioctx->n_rbuf * sizeof *db, GFP_ATOMIC); 932 + kmalloc(ioctx->n_rbuf * sizeof(*db), GFP_ATOMIC); 936 933 if (!ioctx->rbufs) { 937 934 ioctx->n_rbuf = 0; 938 935 ret = -ENOMEM; ··· 941 938 } 942 939 943 940 db = idb->desc_list; 944 - memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof *db); 941 + memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof(*db)); 945 942 *data_len = be32_to_cpu(idb->len); 946 943 } 947 944 out: ··· 959 956 struct ib_qp_attr *attr; 960 957 int ret; 961 958 962 - attr = kzalloc(sizeof *attr, GFP_KERNEL); 959 + attr = kzalloc(sizeof(*attr), GFP_KERNEL); 963 960 if (!attr) 964 961 return -ENOMEM; 965 962 ··· 1073 1070 dir = ioctx->cmd.data_direction; 1074 1071 BUG_ON(dir == DMA_NONE); 1075 1072 ib_dma_unmap_sg(ch->sport->sdev->device, sg, ioctx->sg_cnt, 1076 - opposite_dma_dir(dir)); 1073 + target_reverse_dma_direction(&ioctx->cmd)); 1077 1074 ioctx->mapped_sg_count = 0; 1078 1075 } 1079 1076 } ··· 1110 1107 ioctx->sg_cnt = sg_cnt = cmd->t_data_nents; 1111 1108 1112 1109 count = ib_dma_map_sg(ch->sport->sdev->device, sg, sg_cnt, 1113 - opposite_dma_dir(dir)); 1110 + target_reverse_dma_direction(cmd)); 1114 1111 if (unlikely(!count)) 1115 1112 return -EAGAIN; 1116 1113 ··· 1316 1313 1317 1314 /* 1318 1315 * If the command is in a state where the target core is waiting for 1319 - * the ib_srpt driver, change the state to the next state. Changing 1320 - * the state of the command from SRPT_STATE_NEED_DATA to 1321 - * SRPT_STATE_DATA_IN ensures that srpt_xmit_response() will call this 1322 - * function a second time. 1316 + * the ib_srpt driver, change the state to the next state. 1323 1317 */ 1324 1318 1325 1319 spin_lock_irqsave(&ioctx->spinlock, flags); ··· 1325 1325 case SRPT_STATE_NEED_DATA: 1326 1326 ioctx->state = SRPT_STATE_DATA_IN; 1327 1327 break; 1328 - case SRPT_STATE_DATA_IN: 1329 1328 case SRPT_STATE_CMD_RSP_SENT: 1330 1329 case SRPT_STATE_MGMT_RSP_SENT: 1331 1330 ioctx->state = SRPT_STATE_DONE; 1332 1331 break; 1333 1332 default: 1333 + WARN_ONCE(true, "%s: unexpected I/O context state %d\n", 1334 + __func__, state); 1334 1335 break; 1335 1336 } 1336 1337 spin_unlock_irqrestore(&ioctx->spinlock, flags); 1337 - 1338 - if (state == SRPT_STATE_DONE) { 1339 - struct srpt_rdma_ch *ch = ioctx->ch; 1340 - 1341 - BUG_ON(ch->sess == NULL); 1342 - 1343 - target_put_sess_cmd(&ioctx->cmd); 1344 - goto out; 1345 - } 1346 1338 1347 1339 pr_debug("Aborting cmd with state %d and tag %lld\n", state, 1348 1340 ioctx->cmd.tag); ··· 1343 1351 case SRPT_STATE_NEW: 1344 1352 case SRPT_STATE_DATA_IN: 1345 1353 case SRPT_STATE_MGMT: 1354 + case SRPT_STATE_DONE: 1346 1355 /* 1347 1356 * Do nothing - defer abort processing until 1348 1357 * srpt_queue_response() is invoked. 1349 1358 */ 1350 - WARN_ON(!transport_check_aborted_status(&ioctx->cmd, false)); 1351 1359 break; 1352 1360 case SRPT_STATE_NEED_DATA: 1353 - /* DMA_TO_DEVICE (write) - RDMA read error. */ 1354 - 1355 - /* XXX(hch): this is a horrible layering violation.. */ 1356 - spin_lock_irqsave(&ioctx->cmd.t_state_lock, flags); 1357 - ioctx->cmd.transport_state &= ~CMD_T_ACTIVE; 1358 - spin_unlock_irqrestore(&ioctx->cmd.t_state_lock, flags); 1361 + pr_debug("tag %#llx: RDMA read error\n", ioctx->cmd.tag); 1362 + transport_generic_request_failure(&ioctx->cmd, 1363 + TCM_CHECK_CONDITION_ABORT_CMD); 1359 1364 break; 1360 1365 case SRPT_STATE_CMD_RSP_SENT: 1361 1366 /* ··· 1360 1371 * not been received in time. 1361 1372 */ 1362 1373 srpt_unmap_sg_to_ib_sge(ioctx->ch, ioctx); 1363 - target_put_sess_cmd(&ioctx->cmd); 1374 + transport_generic_free_cmd(&ioctx->cmd, 0); 1364 1375 break; 1365 1376 case SRPT_STATE_MGMT_RSP_SENT: 1366 - srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); 1367 - target_put_sess_cmd(&ioctx->cmd); 1377 + transport_generic_free_cmd(&ioctx->cmd, 0); 1368 1378 break; 1369 1379 default: 1370 1380 WARN(1, "Unexpected command state (%d)", state); 1371 1381 break; 1372 1382 } 1373 1383 1374 - out: 1375 1384 return state; 1376 1385 } 1377 1386 ··· 1409 1422 container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe); 1410 1423 1411 1424 if (unlikely(wc->status != IB_WC_SUCCESS)) { 1425 + /* 1426 + * Note: if an RDMA write error completion is received that 1427 + * means that a SEND also has been posted. Defer further 1428 + * processing of the associated command until the send error 1429 + * completion has been received. 1430 + */ 1412 1431 pr_info("RDMA_WRITE for ioctx 0x%p failed with status %d\n", 1413 1432 ioctx, wc->status); 1414 - srpt_abort_cmd(ioctx); 1415 1433 } 1416 1434 } 1417 1435 ··· 1456 1464 sense_data_len = ioctx->cmd.scsi_sense_length; 1457 1465 WARN_ON(sense_data_len > sizeof(ioctx->sense_data)); 1458 1466 1459 - memset(srp_rsp, 0, sizeof *srp_rsp); 1467 + memset(srp_rsp, 0, sizeof(*srp_rsp)); 1460 1468 srp_rsp->opcode = SRP_RSP; 1461 1469 srp_rsp->req_lim_delta = 1462 1470 cpu_to_be32(1 + atomic_xchg(&ch->req_lim_delta, 0)); ··· 1506 1514 1507 1515 srp_rsp = ioctx->ioctx.buf; 1508 1516 BUG_ON(!srp_rsp); 1509 - memset(srp_rsp, 0, sizeof *srp_rsp); 1517 + memset(srp_rsp, 0, sizeof(*srp_rsp)); 1510 1518 1511 1519 srp_rsp->opcode = SRP_RSP; 1512 1520 srp_rsp->req_lim_delta = ··· 1520 1528 return resp_len; 1521 1529 } 1522 1530 1523 - #define NO_SUCH_LUN ((uint64_t)-1LL) 1524 - 1525 - /* 1526 - * SCSI LUN addressing method. See also SAM-2 and the section about 1527 - * eight byte LUNs. 1528 - */ 1529 - enum scsi_lun_addr_method { 1530 - SCSI_LUN_ADDR_METHOD_PERIPHERAL = 0, 1531 - SCSI_LUN_ADDR_METHOD_FLAT = 1, 1532 - SCSI_LUN_ADDR_METHOD_LUN = 2, 1533 - SCSI_LUN_ADDR_METHOD_EXTENDED_LUN = 3, 1534 - }; 1535 - 1536 - /* 1537 - * srpt_unpack_lun() - Convert from network LUN to linear LUN. 1538 - * 1539 - * Convert an 2-byte, 4-byte, 6-byte or 8-byte LUN structure in network byte 1540 - * order (big endian) to a linear LUN. Supports three LUN addressing methods: 1541 - * peripheral, flat and logical unit. See also SAM-2, section 4.9.4 (page 40). 1542 - */ 1543 - static uint64_t srpt_unpack_lun(const uint8_t *lun, int len) 1544 - { 1545 - uint64_t res = NO_SUCH_LUN; 1546 - int addressing_method; 1547 - 1548 - if (unlikely(len < 2)) { 1549 - pr_err("Illegal LUN length %d, expected 2 bytes or more\n", 1550 - len); 1551 - goto out; 1552 - } 1553 - 1554 - switch (len) { 1555 - case 8: 1556 - if ((*((__be64 *)lun) & 1557 - cpu_to_be64(0x0000FFFFFFFFFFFFLL)) != 0) 1558 - goto out_err; 1559 - break; 1560 - case 4: 1561 - if (*((__be16 *)&lun[2]) != 0) 1562 - goto out_err; 1563 - break; 1564 - case 6: 1565 - if (*((__be32 *)&lun[2]) != 0) 1566 - goto out_err; 1567 - break; 1568 - case 2: 1569 - break; 1570 - default: 1571 - goto out_err; 1572 - } 1573 - 1574 - addressing_method = (*lun) >> 6; /* highest two bits of byte 0 */ 1575 - switch (addressing_method) { 1576 - case SCSI_LUN_ADDR_METHOD_PERIPHERAL: 1577 - case SCSI_LUN_ADDR_METHOD_FLAT: 1578 - case SCSI_LUN_ADDR_METHOD_LUN: 1579 - res = *(lun + 1) | (((*lun) & 0x3f) << 8); 1580 - break; 1581 - 1582 - case SCSI_LUN_ADDR_METHOD_EXTENDED_LUN: 1583 - default: 1584 - pr_err("Unimplemented LUN addressing method %u\n", 1585 - addressing_method); 1586 - break; 1587 - } 1588 - 1589 - out: 1590 - return res; 1591 - 1592 - out_err: 1593 - pr_err("Support for multi-level LUNs has not yet been implemented\n"); 1594 - goto out; 1595 - } 1596 - 1597 1531 static int srpt_check_stop_free(struct se_cmd *cmd) 1598 1532 { 1599 1533 struct srpt_send_ioctx *ioctx = container_of(cmd, ··· 1531 1613 /** 1532 1614 * srpt_handle_cmd() - Process SRP_CMD. 1533 1615 */ 1534 - static int srpt_handle_cmd(struct srpt_rdma_ch *ch, 1535 - struct srpt_recv_ioctx *recv_ioctx, 1536 - struct srpt_send_ioctx *send_ioctx) 1616 + static void srpt_handle_cmd(struct srpt_rdma_ch *ch, 1617 + struct srpt_recv_ioctx *recv_ioctx, 1618 + struct srpt_send_ioctx *send_ioctx) 1537 1619 { 1538 1620 struct se_cmd *cmd; 1539 1621 struct srp_cmd *srp_cmd; 1540 - uint64_t unpacked_lun; 1541 1622 u64 data_len; 1542 1623 enum dma_data_direction dir; 1543 - sense_reason_t ret; 1544 1624 int rc; 1545 1625 1546 1626 BUG_ON(!send_ioctx); ··· 1566 1650 if (srpt_get_desc_tbl(send_ioctx, srp_cmd, &dir, &data_len)) { 1567 1651 pr_err("0x%llx: parsing SRP descriptor table failed.\n", 1568 1652 srp_cmd->tag); 1569 - ret = TCM_INVALID_CDB_FIELD; 1570 - goto send_sense; 1653 + goto release_ioctx; 1571 1654 } 1572 1655 1573 - unpacked_lun = srpt_unpack_lun((uint8_t *)&srp_cmd->lun, 1574 - sizeof(srp_cmd->lun)); 1575 1656 rc = target_submit_cmd(cmd, ch->sess, srp_cmd->cdb, 1576 - &send_ioctx->sense_data[0], unpacked_lun, data_len, 1577 - TCM_SIMPLE_TAG, dir, TARGET_SCF_ACK_KREF); 1657 + &send_ioctx->sense_data[0], 1658 + scsilun_to_int(&srp_cmd->lun), data_len, 1659 + TCM_SIMPLE_TAG, dir, TARGET_SCF_ACK_KREF); 1578 1660 if (rc != 0) { 1579 - ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; 1580 - goto send_sense; 1661 + pr_debug("target_submit_cmd() returned %d for tag %#llx\n", rc, 1662 + srp_cmd->tag); 1663 + goto release_ioctx; 1581 1664 } 1582 - return 0; 1665 + return; 1583 1666 1584 - send_sense: 1585 - transport_send_check_condition_and_sense(cmd, ret, 0); 1586 - return -1; 1587 - } 1588 - 1589 - /** 1590 - * srpt_rx_mgmt_fn_tag() - Process a task management function by tag. 1591 - * @ch: RDMA channel of the task management request. 1592 - * @fn: Task management function to perform. 1593 - * @req_tag: Tag of the SRP task management request. 1594 - * @mgmt_ioctx: I/O context of the task management request. 1595 - * 1596 - * Returns zero if the target core will process the task management 1597 - * request asynchronously. 1598 - * 1599 - * Note: It is assumed that the initiator serializes tag-based task management 1600 - * requests. 1601 - */ 1602 - static int srpt_rx_mgmt_fn_tag(struct srpt_send_ioctx *ioctx, u64 tag) 1603 - { 1604 - struct srpt_device *sdev; 1605 - struct srpt_rdma_ch *ch; 1606 - struct srpt_send_ioctx *target; 1607 - int ret, i; 1608 - 1609 - ret = -EINVAL; 1610 - ch = ioctx->ch; 1611 - BUG_ON(!ch); 1612 - BUG_ON(!ch->sport); 1613 - sdev = ch->sport->sdev; 1614 - BUG_ON(!sdev); 1615 - spin_lock_irq(&sdev->spinlock); 1616 - for (i = 0; i < ch->rq_size; ++i) { 1617 - target = ch->ioctx_ring[i]; 1618 - if (target->cmd.se_lun == ioctx->cmd.se_lun && 1619 - target->cmd.tag == tag && 1620 - srpt_get_cmd_state(target) != SRPT_STATE_DONE) { 1621 - ret = 0; 1622 - /* now let the target core abort &target->cmd; */ 1623 - break; 1624 - } 1625 - } 1626 - spin_unlock_irq(&sdev->spinlock); 1627 - return ret; 1667 + release_ioctx: 1668 + send_ioctx->state = SRPT_STATE_DONE; 1669 + srpt_release_cmd(cmd); 1628 1670 } 1629 1671 1630 1672 static int srp_tmr_to_tcm(int fn) ··· 1618 1744 struct srp_tsk_mgmt *srp_tsk; 1619 1745 struct se_cmd *cmd; 1620 1746 struct se_session *sess = ch->sess; 1621 - uint64_t unpacked_lun; 1622 - uint32_t tag = 0; 1623 1747 int tcm_tmr; 1624 1748 int rc; 1625 1749 ··· 1633 1761 srpt_set_cmd_state(send_ioctx, SRPT_STATE_MGMT); 1634 1762 send_ioctx->cmd.tag = srp_tsk->tag; 1635 1763 tcm_tmr = srp_tmr_to_tcm(srp_tsk->tsk_mgmt_func); 1636 - if (tcm_tmr < 0) { 1637 - send_ioctx->cmd.se_tmr_req->response = 1638 - TMR_TASK_MGMT_FUNCTION_NOT_SUPPORTED; 1639 - goto fail; 1640 - } 1641 - unpacked_lun = srpt_unpack_lun((uint8_t *)&srp_tsk->lun, 1642 - sizeof(srp_tsk->lun)); 1643 - 1644 - if (srp_tsk->tsk_mgmt_func == SRP_TSK_ABORT_TASK) { 1645 - rc = srpt_rx_mgmt_fn_tag(send_ioctx, srp_tsk->task_tag); 1646 - if (rc < 0) { 1647 - send_ioctx->cmd.se_tmr_req->response = 1648 - TMR_TASK_DOES_NOT_EXIST; 1649 - goto fail; 1650 - } 1651 - tag = srp_tsk->task_tag; 1652 - } 1653 - rc = target_submit_tmr(&send_ioctx->cmd, sess, NULL, unpacked_lun, 1654 - srp_tsk, tcm_tmr, GFP_KERNEL, tag, 1655 - TARGET_SCF_ACK_KREF); 1764 + rc = target_submit_tmr(&send_ioctx->cmd, sess, NULL, 1765 + scsilun_to_int(&srp_tsk->lun), srp_tsk, tcm_tmr, 1766 + GFP_KERNEL, srp_tsk->task_tag, 1767 + TARGET_SCF_ACK_KREF); 1656 1768 if (rc != 0) { 1657 1769 send_ioctx->cmd.se_tmr_req->response = TMR_FUNCTION_REJECTED; 1658 1770 goto fail; ··· 1656 1800 struct srpt_send_ioctx *send_ioctx) 1657 1801 { 1658 1802 struct srp_cmd *srp_cmd; 1659 - enum rdma_ch_state ch_state; 1660 1803 1661 1804 BUG_ON(!ch); 1662 1805 BUG_ON(!recv_ioctx); ··· 1664 1809 recv_ioctx->ioctx.dma, srp_max_req_size, 1665 1810 DMA_FROM_DEVICE); 1666 1811 1667 - ch_state = srpt_get_ch_state(ch); 1668 - if (unlikely(ch_state == CH_CONNECTING)) { 1812 + if (unlikely(ch->state == CH_CONNECTING)) { 1669 1813 list_add_tail(&recv_ioctx->wait_list, &ch->cmd_wait_list); 1670 1814 goto out; 1671 1815 } 1672 1816 1673 - if (unlikely(ch_state != CH_LIVE)) 1817 + if (unlikely(ch->state != CH_LIVE)) 1674 1818 goto out; 1675 1819 1676 1820 srp_cmd = recv_ioctx->ioctx.buf; ··· 1732 1878 } 1733 1879 } 1734 1880 1881 + /* 1882 + * This function must be called from the context in which RDMA completions are 1883 + * processed because it accesses the wait list without protection against 1884 + * access from other threads. 1885 + */ 1886 + static void srpt_process_wait_list(struct srpt_rdma_ch *ch) 1887 + { 1888 + struct srpt_send_ioctx *ioctx; 1889 + 1890 + while (!list_empty(&ch->cmd_wait_list) && 1891 + ch->state >= CH_LIVE && 1892 + (ioctx = srpt_get_send_ioctx(ch)) != NULL) { 1893 + struct srpt_recv_ioctx *recv_ioctx; 1894 + 1895 + recv_ioctx = list_first_entry(&ch->cmd_wait_list, 1896 + struct srpt_recv_ioctx, 1897 + wait_list); 1898 + list_del(&recv_ioctx->wait_list); 1899 + srpt_handle_new_iu(ch, recv_ioctx, ioctx); 1900 + } 1901 + } 1902 + 1735 1903 /** 1736 1904 * Note: Although this has not yet been observed during tests, at least in 1737 1905 * theory it is possible that the srpt_get_send_ioctx() call invoked by ··· 1781 1905 1782 1906 atomic_inc(&ch->sq_wr_avail); 1783 1907 1784 - if (wc->status != IB_WC_SUCCESS) { 1908 + if (wc->status != IB_WC_SUCCESS) 1785 1909 pr_info("sending response for ioctx 0x%p failed" 1786 1910 " with status %d\n", ioctx, wc->status); 1787 - 1788 - atomic_dec(&ch->req_lim); 1789 - srpt_abort_cmd(ioctx); 1790 - goto out; 1791 - } 1792 1911 1793 1912 if (state != SRPT_STATE_DONE) { 1794 1913 srpt_unmap_sg_to_ib_sge(ch, ioctx); ··· 1793 1922 " wr_id = %u.\n", ioctx->ioctx.index); 1794 1923 } 1795 1924 1796 - out: 1797 - while (!list_empty(&ch->cmd_wait_list) && 1798 - srpt_get_ch_state(ch) == CH_LIVE && 1799 - (ioctx = srpt_get_send_ioctx(ch)) != NULL) { 1800 - struct srpt_recv_ioctx *recv_ioctx; 1801 - 1802 - recv_ioctx = list_first_entry(&ch->cmd_wait_list, 1803 - struct srpt_recv_ioctx, 1804 - wait_list); 1805 - list_del(&recv_ioctx->wait_list); 1806 - srpt_handle_new_iu(ch, recv_ioctx, ioctx); 1807 - } 1925 + srpt_process_wait_list(ch); 1808 1926 } 1809 1927 1810 1928 /** ··· 1810 1950 WARN_ON(ch->rq_size < 1); 1811 1951 1812 1952 ret = -ENOMEM; 1813 - qp_init = kzalloc(sizeof *qp_init, GFP_KERNEL); 1953 + qp_init = kzalloc(sizeof(*qp_init), GFP_KERNEL); 1814 1954 if (!qp_init) 1815 1955 goto out; 1816 1956 ··· 1877 2017 } 1878 2018 1879 2019 /** 1880 - * __srpt_close_ch() - Close an RDMA channel by setting the QP error state. 2020 + * srpt_close_ch() - Close an RDMA channel. 1881 2021 * 1882 - * Reset the QP and make sure all resources associated with the channel will 1883 - * be deallocated at an appropriate time. 2022 + * Make sure all resources associated with the channel will be deallocated at 2023 + * an appropriate time. 1884 2024 * 1885 - * Note: The caller must hold ch->sport->sdev->spinlock. 2025 + * Returns true if and only if the channel state has been modified into 2026 + * CH_DRAINING. 1886 2027 */ 1887 - static void __srpt_close_ch(struct srpt_rdma_ch *ch) 2028 + static bool srpt_close_ch(struct srpt_rdma_ch *ch) 1888 2029 { 1889 - enum rdma_ch_state prev_state; 1890 - unsigned long flags; 2030 + int ret; 1891 2031 1892 - spin_lock_irqsave(&ch->spinlock, flags); 1893 - prev_state = ch->state; 1894 - switch (prev_state) { 1895 - case CH_CONNECTING: 1896 - case CH_LIVE: 1897 - ch->state = CH_DISCONNECTING; 1898 - break; 1899 - default: 1900 - break; 2032 + if (!srpt_set_ch_state(ch, CH_DRAINING)) { 2033 + pr_debug("%s-%d: already closed\n", ch->sess_name, 2034 + ch->qp->qp_num); 2035 + return false; 1901 2036 } 1902 - spin_unlock_irqrestore(&ch->spinlock, flags); 1903 2037 1904 - switch (prev_state) { 1905 - case CH_CONNECTING: 1906 - ib_send_cm_rej(ch->cm_id, IB_CM_REJ_NO_RESOURCES, NULL, 0, 1907 - NULL, 0); 1908 - /* fall through */ 1909 - case CH_LIVE: 1910 - if (ib_send_cm_dreq(ch->cm_id, NULL, 0) < 0) 1911 - pr_err("sending CM DREQ failed.\n"); 1912 - break; 1913 - case CH_DISCONNECTING: 1914 - break; 1915 - case CH_DRAINING: 1916 - case CH_RELEASING: 1917 - break; 2038 + kref_get(&ch->kref); 2039 + 2040 + ret = srpt_ch_qp_err(ch); 2041 + if (ret < 0) 2042 + pr_err("%s-%d: changing queue pair into error state failed: %d\n", 2043 + ch->sess_name, ch->qp->qp_num, ret); 2044 + 2045 + pr_debug("%s-%d: queued zerolength write\n", ch->sess_name, 2046 + ch->qp->qp_num); 2047 + ret = srpt_zerolength_write(ch); 2048 + if (ret < 0) { 2049 + pr_err("%s-%d: queuing zero-length write failed: %d\n", 2050 + ch->sess_name, ch->qp->qp_num, ret); 2051 + if (srpt_set_ch_state(ch, CH_DISCONNECTED)) 2052 + schedule_work(&ch->release_work); 2053 + else 2054 + WARN_ON_ONCE(true); 1918 2055 } 2056 + 2057 + kref_put(&ch->kref, srpt_free_ch); 2058 + 2059 + return true; 1919 2060 } 1920 2061 1921 - /** 1922 - * srpt_close_ch() - Close an RDMA channel. 2062 + /* 2063 + * Change the channel state into CH_DISCONNECTING. If a channel has not yet 2064 + * reached the connected state, close it. If a channel is in the connected 2065 + * state, send a DREQ. If a DREQ has been received, send a DREP. Note: it is 2066 + * the responsibility of the caller to ensure that this function is not 2067 + * invoked concurrently with the code that accepts a connection. This means 2068 + * that this function must either be invoked from inside a CM callback 2069 + * function or that it must be invoked with the srpt_port.mutex held. 1923 2070 */ 1924 - static void srpt_close_ch(struct srpt_rdma_ch *ch) 2071 + static int srpt_disconnect_ch(struct srpt_rdma_ch *ch) 1925 2072 { 1926 - struct srpt_device *sdev; 2073 + int ret; 1927 2074 1928 - sdev = ch->sport->sdev; 1929 - spin_lock_irq(&sdev->spinlock); 1930 - __srpt_close_ch(ch); 1931 - spin_unlock_irq(&sdev->spinlock); 2075 + if (!srpt_set_ch_state(ch, CH_DISCONNECTING)) 2076 + return -ENOTCONN; 2077 + 2078 + ret = ib_send_cm_dreq(ch->cm_id, NULL, 0); 2079 + if (ret < 0) 2080 + ret = ib_send_cm_drep(ch->cm_id, NULL, 0); 2081 + 2082 + if (ret < 0 && srpt_close_ch(ch)) 2083 + ret = 0; 2084 + 2085 + return ret; 2086 + } 2087 + 2088 + static void __srpt_close_all_ch(struct srpt_device *sdev) 2089 + { 2090 + struct srpt_rdma_ch *ch; 2091 + 2092 + lockdep_assert_held(&sdev->mutex); 2093 + 2094 + list_for_each_entry(ch, &sdev->rch_list, list) { 2095 + if (srpt_disconnect_ch(ch) >= 0) 2096 + pr_info("Closing channel %s-%d because target %s has been disabled\n", 2097 + ch->sess_name, ch->qp->qp_num, 2098 + sdev->device->name); 2099 + srpt_close_ch(ch); 2100 + } 1932 2101 } 1933 2102 1934 2103 /** ··· 1965 2076 */ 1966 2077 static int srpt_shutdown_session(struct se_session *se_sess) 1967 2078 { 1968 - struct srpt_rdma_ch *ch = se_sess->fabric_sess_ptr; 1969 - unsigned long flags; 1970 - 1971 - spin_lock_irqsave(&ch->spinlock, flags); 1972 - if (ch->in_shutdown) { 1973 - spin_unlock_irqrestore(&ch->spinlock, flags); 1974 - return true; 1975 - } 1976 - 1977 - ch->in_shutdown = true; 1978 - target_sess_cmd_list_set_waiting(se_sess); 1979 - spin_unlock_irqrestore(&ch->spinlock, flags); 1980 - 1981 - return true; 2079 + return 1; 1982 2080 } 1983 2081 1984 - /** 1985 - * srpt_drain_channel() - Drain a channel by resetting the IB queue pair. 1986 - * @cm_id: Pointer to the CM ID of the channel to be drained. 1987 - * 1988 - * Note: Must be called from inside srpt_cm_handler to avoid a race between 1989 - * accessing sdev->spinlock and the call to kfree(sdev) in srpt_remove_one() 1990 - * (the caller of srpt_cm_handler holds the cm_id spinlock; srpt_remove_one() 1991 - * waits until all target sessions for the associated IB device have been 1992 - * unregistered and target session registration involves a call to 1993 - * ib_destroy_cm_id(), which locks the cm_id spinlock and hence waits until 1994 - * this function has finished). 1995 - */ 1996 - static void srpt_drain_channel(struct ib_cm_id *cm_id) 2082 + static void srpt_free_ch(struct kref *kref) 1997 2083 { 1998 - struct srpt_device *sdev; 1999 - struct srpt_rdma_ch *ch; 2000 - int ret; 2001 - bool do_reset = false; 2084 + struct srpt_rdma_ch *ch = container_of(kref, struct srpt_rdma_ch, kref); 2002 2085 2003 - WARN_ON_ONCE(irqs_disabled()); 2004 - 2005 - sdev = cm_id->context; 2006 - BUG_ON(!sdev); 2007 - spin_lock_irq(&sdev->spinlock); 2008 - list_for_each_entry(ch, &sdev->rch_list, list) { 2009 - if (ch->cm_id == cm_id) { 2010 - do_reset = srpt_test_and_set_ch_state(ch, 2011 - CH_CONNECTING, CH_DRAINING) || 2012 - srpt_test_and_set_ch_state(ch, 2013 - CH_LIVE, CH_DRAINING) || 2014 - srpt_test_and_set_ch_state(ch, 2015 - CH_DISCONNECTING, CH_DRAINING); 2016 - break; 2017 - } 2018 - } 2019 - spin_unlock_irq(&sdev->spinlock); 2020 - 2021 - if (do_reset) { 2022 - if (ch->sess) 2023 - srpt_shutdown_session(ch->sess); 2024 - 2025 - ret = srpt_ch_qp_err(ch); 2026 - if (ret < 0) 2027 - pr_err("Setting queue pair in error state" 2028 - " failed: %d\n", ret); 2029 - } 2030 - } 2031 - 2032 - /** 2033 - * srpt_find_channel() - Look up an RDMA channel. 2034 - * @cm_id: Pointer to the CM ID of the channel to be looked up. 2035 - * 2036 - * Return NULL if no matching RDMA channel has been found. 2037 - */ 2038 - static struct srpt_rdma_ch *srpt_find_channel(struct srpt_device *sdev, 2039 - struct ib_cm_id *cm_id) 2040 - { 2041 - struct srpt_rdma_ch *ch; 2042 - bool found; 2043 - 2044 - WARN_ON_ONCE(irqs_disabled()); 2045 - BUG_ON(!sdev); 2046 - 2047 - found = false; 2048 - spin_lock_irq(&sdev->spinlock); 2049 - list_for_each_entry(ch, &sdev->rch_list, list) { 2050 - if (ch->cm_id == cm_id) { 2051 - found = true; 2052 - break; 2053 - } 2054 - } 2055 - spin_unlock_irq(&sdev->spinlock); 2056 - 2057 - return found ? ch : NULL; 2058 - } 2059 - 2060 - /** 2061 - * srpt_release_channel() - Release channel resources. 2062 - * 2063 - * Schedules the actual release because: 2064 - * - Calling the ib_destroy_cm_id() call from inside an IB CM callback would 2065 - * trigger a deadlock. 2066 - * - It is not safe to call TCM transport_* functions from interrupt context. 2067 - */ 2068 - static void srpt_release_channel(struct srpt_rdma_ch *ch) 2069 - { 2070 - schedule_work(&ch->release_work); 2086 + kfree(ch); 2071 2087 } 2072 2088 2073 2089 static void srpt_release_channel_work(struct work_struct *w) ··· 1982 2188 struct se_session *se_sess; 1983 2189 1984 2190 ch = container_of(w, struct srpt_rdma_ch, release_work); 1985 - pr_debug("ch = %p; ch->sess = %p; release_done = %p\n", ch, ch->sess, 1986 - ch->release_done); 2191 + pr_debug("%s: %s-%d; release_done = %p\n", __func__, ch->sess_name, 2192 + ch->qp->qp_num, ch->release_done); 1987 2193 1988 2194 sdev = ch->sport->sdev; 1989 2195 BUG_ON(!sdev); ··· 1991 2197 se_sess = ch->sess; 1992 2198 BUG_ON(!se_sess); 1993 2199 2200 + target_sess_cmd_list_set_waiting(se_sess); 1994 2201 target_wait_for_sess_cmds(se_sess); 1995 2202 1996 2203 transport_deregister_session_configfs(se_sess); ··· 2006 2211 ch->sport->sdev, ch->rq_size, 2007 2212 ch->rsp_size, DMA_TO_DEVICE); 2008 2213 2009 - spin_lock_irq(&sdev->spinlock); 2010 - list_del(&ch->list); 2011 - spin_unlock_irq(&sdev->spinlock); 2012 - 2214 + mutex_lock(&sdev->mutex); 2215 + list_del_init(&ch->list); 2013 2216 if (ch->release_done) 2014 2217 complete(ch->release_done); 2218 + mutex_unlock(&sdev->mutex); 2015 2219 2016 2220 wake_up(&sdev->ch_releaseQ); 2017 2221 2018 - kfree(ch); 2222 + kref_put(&ch->kref, srpt_free_ch); 2019 2223 } 2020 2224 2021 2225 /** ··· 2060 2266 be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[0]), 2061 2267 be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[8])); 2062 2268 2063 - rsp = kzalloc(sizeof *rsp, GFP_KERNEL); 2064 - rej = kzalloc(sizeof *rej, GFP_KERNEL); 2065 - rep_param = kzalloc(sizeof *rep_param, GFP_KERNEL); 2269 + rsp = kzalloc(sizeof(*rsp), GFP_KERNEL); 2270 + rej = kzalloc(sizeof(*rej), GFP_KERNEL); 2271 + rep_param = kzalloc(sizeof(*rep_param), GFP_KERNEL); 2066 2272 2067 2273 if (!rsp || !rej || !rep_param) { 2068 2274 ret = -ENOMEM; ··· 2091 2297 if ((req->req_flags & SRP_MTCH_ACTION) == SRP_MULTICHAN_SINGLE) { 2092 2298 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN; 2093 2299 2094 - spin_lock_irq(&sdev->spinlock); 2300 + mutex_lock(&sdev->mutex); 2095 2301 2096 2302 list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) { 2097 2303 if (!memcmp(ch->i_port_id, req->initiator_port_id, 16) ··· 2099 2305 && param->port == ch->sport->port 2100 2306 && param->listen_id == ch->sport->sdev->cm_id 2101 2307 && ch->cm_id) { 2102 - enum rdma_ch_state ch_state; 2103 - 2104 - ch_state = srpt_get_ch_state(ch); 2105 - if (ch_state != CH_CONNECTING 2106 - && ch_state != CH_LIVE) 2308 + if (srpt_disconnect_ch(ch) < 0) 2107 2309 continue; 2108 - 2109 - /* found an existing channel */ 2110 - pr_debug("Found existing channel %s" 2111 - " cm_id= %p state= %d\n", 2112 - ch->sess_name, ch->cm_id, ch_state); 2113 - 2114 - __srpt_close_ch(ch); 2115 - 2310 + pr_info("Relogin - closed existing channel %s\n", 2311 + ch->sess_name); 2116 2312 rsp->rsp_flags = 2117 2313 SRP_LOGIN_RSP_MULTICHAN_TERMINATED; 2118 2314 } 2119 2315 } 2120 2316 2121 - spin_unlock_irq(&sdev->spinlock); 2317 + mutex_unlock(&sdev->mutex); 2122 2318 2123 2319 } else 2124 2320 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED; ··· 2124 2340 goto reject; 2125 2341 } 2126 2342 2127 - ch = kzalloc(sizeof *ch, GFP_KERNEL); 2343 + ch = kzalloc(sizeof(*ch), GFP_KERNEL); 2128 2344 if (!ch) { 2129 2345 rej->reason = cpu_to_be32( 2130 2346 SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); ··· 2133 2349 goto reject; 2134 2350 } 2135 2351 2352 + kref_init(&ch->kref); 2353 + ch->zw_cqe.done = srpt_zerolength_write_done; 2136 2354 INIT_WORK(&ch->release_work, srpt_release_channel_work); 2137 2355 memcpy(ch->i_port_id, req->initiator_port_id, 16); 2138 2356 memcpy(ch->t_port_id, req->target_port_id, 16); 2139 2357 ch->sport = &sdev->port[param->port - 1]; 2140 2358 ch->cm_id = cm_id; 2359 + cm_id->context = ch; 2141 2360 /* 2142 2361 * Avoid QUEUE_FULL conditions by limiting the number of buffers used 2143 2362 * for the SRP protocol to the command queue size. ··· 2240 2453 /* create cm reply */ 2241 2454 rep_param->qp_num = ch->qp->qp_num; 2242 2455 rep_param->private_data = (void *)rsp; 2243 - rep_param->private_data_len = sizeof *rsp; 2456 + rep_param->private_data_len = sizeof(*rsp); 2244 2457 rep_param->rnr_retry_count = 7; 2245 2458 rep_param->flow_control = 1; 2246 2459 rep_param->failover_accepted = 0; ··· 2255 2468 goto release_channel; 2256 2469 } 2257 2470 2258 - spin_lock_irq(&sdev->spinlock); 2471 + mutex_lock(&sdev->mutex); 2259 2472 list_add_tail(&ch->list, &sdev->rch_list); 2260 - spin_unlock_irq(&sdev->spinlock); 2473 + mutex_unlock(&sdev->mutex); 2261 2474 2262 2475 goto out; 2263 2476 2264 2477 release_channel: 2265 - srpt_set_ch_state(ch, CH_RELEASING); 2478 + srpt_disconnect_ch(ch); 2266 2479 transport_deregister_session_configfs(ch->sess); 2267 2480 transport_deregister_session(ch->sess); 2268 2481 ch->sess = NULL; ··· 2284 2497 | SRP_BUF_FORMAT_INDIRECT); 2285 2498 2286 2499 ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, 2287 - (void *)rej, sizeof *rej); 2500 + (void *)rej, sizeof(*rej)); 2288 2501 2289 2502 out: 2290 2503 kfree(rep_param); ··· 2294 2507 return ret; 2295 2508 } 2296 2509 2297 - static void srpt_cm_rej_recv(struct ib_cm_id *cm_id) 2510 + static void srpt_cm_rej_recv(struct srpt_rdma_ch *ch, 2511 + enum ib_cm_rej_reason reason, 2512 + const u8 *private_data, 2513 + u8 private_data_len) 2298 2514 { 2299 - pr_info("Received IB REJ for cm_id %p.\n", cm_id); 2300 - srpt_drain_channel(cm_id); 2515 + char *priv = NULL; 2516 + int i; 2517 + 2518 + if (private_data_len && (priv = kmalloc(private_data_len * 3 + 1, 2519 + GFP_KERNEL))) { 2520 + for (i = 0; i < private_data_len; i++) 2521 + sprintf(priv + 3 * i, " %02x", private_data[i]); 2522 + } 2523 + pr_info("Received CM REJ for ch %s-%d; reason %d%s%s.\n", 2524 + ch->sess_name, ch->qp->qp_num, reason, private_data_len ? 2525 + "; private data" : "", priv ? priv : " (?)"); 2526 + kfree(priv); 2301 2527 } 2302 2528 2303 2529 /** ··· 2319 2519 * An IB_CM_RTU_RECEIVED message indicates that the connection is established 2320 2520 * and that the recipient may begin transmitting (RTU = ready to use). 2321 2521 */ 2322 - static void srpt_cm_rtu_recv(struct ib_cm_id *cm_id) 2522 + static void srpt_cm_rtu_recv(struct srpt_rdma_ch *ch) 2323 2523 { 2324 - struct srpt_rdma_ch *ch; 2325 2524 int ret; 2326 2525 2327 - ch = srpt_find_channel(cm_id->context, cm_id); 2328 - BUG_ON(!ch); 2329 - 2330 - if (srpt_test_and_set_ch_state(ch, CH_CONNECTING, CH_LIVE)) { 2331 - struct srpt_recv_ioctx *ioctx, *ioctx_tmp; 2332 - 2526 + if (srpt_set_ch_state(ch, CH_LIVE)) { 2333 2527 ret = srpt_ch_qp_rts(ch, ch->qp); 2334 2528 2335 - list_for_each_entry_safe(ioctx, ioctx_tmp, &ch->cmd_wait_list, 2336 - wait_list) { 2337 - list_del(&ioctx->wait_list); 2338 - srpt_handle_new_iu(ch, ioctx, NULL); 2339 - } 2340 - if (ret) 2529 + if (ret == 0) { 2530 + /* Trigger wait list processing. */ 2531 + ret = srpt_zerolength_write(ch); 2532 + WARN_ONCE(ret < 0, "%d\n", ret); 2533 + } else { 2341 2534 srpt_close_ch(ch); 2535 + } 2342 2536 } 2343 - } 2344 - 2345 - static void srpt_cm_timewait_exit(struct ib_cm_id *cm_id) 2346 - { 2347 - pr_info("Received IB TimeWait exit for cm_id %p.\n", cm_id); 2348 - srpt_drain_channel(cm_id); 2349 - } 2350 - 2351 - static void srpt_cm_rep_error(struct ib_cm_id *cm_id) 2352 - { 2353 - pr_info("Received IB REP error for cm_id %p.\n", cm_id); 2354 - srpt_drain_channel(cm_id); 2355 - } 2356 - 2357 - /** 2358 - * srpt_cm_dreq_recv() - Process reception of a DREQ message. 2359 - */ 2360 - static void srpt_cm_dreq_recv(struct ib_cm_id *cm_id) 2361 - { 2362 - struct srpt_rdma_ch *ch; 2363 - unsigned long flags; 2364 - bool send_drep = false; 2365 - 2366 - ch = srpt_find_channel(cm_id->context, cm_id); 2367 - BUG_ON(!ch); 2368 - 2369 - pr_debug("cm_id= %p ch->state= %d\n", cm_id, srpt_get_ch_state(ch)); 2370 - 2371 - spin_lock_irqsave(&ch->spinlock, flags); 2372 - switch (ch->state) { 2373 - case CH_CONNECTING: 2374 - case CH_LIVE: 2375 - send_drep = true; 2376 - ch->state = CH_DISCONNECTING; 2377 - break; 2378 - case CH_DISCONNECTING: 2379 - case CH_DRAINING: 2380 - case CH_RELEASING: 2381 - WARN(true, "unexpected channel state %d\n", ch->state); 2382 - break; 2383 - } 2384 - spin_unlock_irqrestore(&ch->spinlock, flags); 2385 - 2386 - if (send_drep) { 2387 - if (ib_send_cm_drep(ch->cm_id, NULL, 0) < 0) 2388 - pr_err("Sending IB DREP failed.\n"); 2389 - pr_info("Received DREQ and sent DREP for session %s.\n", 2390 - ch->sess_name); 2391 - } 2392 - } 2393 - 2394 - /** 2395 - * srpt_cm_drep_recv() - Process reception of a DREP message. 2396 - */ 2397 - static void srpt_cm_drep_recv(struct ib_cm_id *cm_id) 2398 - { 2399 - pr_info("Received InfiniBand DREP message for cm_id %p.\n", cm_id); 2400 - srpt_drain_channel(cm_id); 2401 2537 } 2402 2538 2403 2539 /** ··· 2348 2612 */ 2349 2613 static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) 2350 2614 { 2615 + struct srpt_rdma_ch *ch = cm_id->context; 2351 2616 int ret; 2352 2617 2353 2618 ret = 0; ··· 2358 2621 event->private_data); 2359 2622 break; 2360 2623 case IB_CM_REJ_RECEIVED: 2361 - srpt_cm_rej_recv(cm_id); 2624 + srpt_cm_rej_recv(ch, event->param.rej_rcvd.reason, 2625 + event->private_data, 2626 + IB_CM_REJ_PRIVATE_DATA_SIZE); 2362 2627 break; 2363 2628 case IB_CM_RTU_RECEIVED: 2364 2629 case IB_CM_USER_ESTABLISHED: 2365 - srpt_cm_rtu_recv(cm_id); 2630 + srpt_cm_rtu_recv(ch); 2366 2631 break; 2367 2632 case IB_CM_DREQ_RECEIVED: 2368 - srpt_cm_dreq_recv(cm_id); 2633 + srpt_disconnect_ch(ch); 2369 2634 break; 2370 2635 case IB_CM_DREP_RECEIVED: 2371 - srpt_cm_drep_recv(cm_id); 2636 + pr_info("Received CM DREP message for ch %s-%d.\n", 2637 + ch->sess_name, ch->qp->qp_num); 2638 + srpt_close_ch(ch); 2372 2639 break; 2373 2640 case IB_CM_TIMEWAIT_EXIT: 2374 - srpt_cm_timewait_exit(cm_id); 2641 + pr_info("Received CM TimeWait exit for ch %s-%d.\n", 2642 + ch->sess_name, ch->qp->qp_num); 2643 + srpt_close_ch(ch); 2375 2644 break; 2376 2645 case IB_CM_REP_ERROR: 2377 - srpt_cm_rep_error(cm_id); 2646 + pr_info("Received CM REP error for ch %s-%d.\n", ch->sess_name, 2647 + ch->qp->qp_num); 2378 2648 break; 2379 2649 case IB_CM_DREQ_ERROR: 2380 - pr_info("Received IB DREQ ERROR event.\n"); 2650 + pr_info("Received CM DREQ ERROR event.\n"); 2381 2651 break; 2382 2652 case IB_CM_MRA_RECEIVED: 2383 - pr_info("Received IB MRA event\n"); 2653 + pr_info("Received CM MRA event\n"); 2384 2654 break; 2385 2655 default: 2386 - pr_err("received unrecognized IB CM event %d\n", event->event); 2656 + pr_err("received unrecognized CM event %d\n", event->event); 2387 2657 break; 2388 2658 } 2389 2659 ··· 2499 2755 */ 2500 2756 static int srpt_write_pending(struct se_cmd *se_cmd) 2501 2757 { 2502 - struct srpt_rdma_ch *ch; 2503 - struct srpt_send_ioctx *ioctx; 2758 + struct srpt_send_ioctx *ioctx = 2759 + container_of(se_cmd, struct srpt_send_ioctx, cmd); 2760 + struct srpt_rdma_ch *ch = ioctx->ch; 2504 2761 enum srpt_command_state new_state; 2505 - enum rdma_ch_state ch_state; 2506 - int ret; 2507 - 2508 - ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd); 2509 2762 2510 2763 new_state = srpt_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA); 2511 2764 WARN_ON(new_state == SRPT_STATE_DONE); 2512 - 2513 - ch = ioctx->ch; 2514 - BUG_ON(!ch); 2515 - 2516 - ch_state = srpt_get_ch_state(ch); 2517 - switch (ch_state) { 2518 - case CH_CONNECTING: 2519 - WARN(true, "unexpected channel state %d\n", ch_state); 2520 - ret = -EINVAL; 2521 - goto out; 2522 - case CH_LIVE: 2523 - break; 2524 - case CH_DISCONNECTING: 2525 - case CH_DRAINING: 2526 - case CH_RELEASING: 2527 - pr_debug("cmd with tag %lld: channel disconnecting\n", 2528 - ioctx->cmd.tag); 2529 - srpt_set_cmd_state(ioctx, SRPT_STATE_DATA_IN); 2530 - ret = -EINVAL; 2531 - goto out; 2532 - } 2533 - ret = srpt_xfer_data(ch, ioctx); 2534 - 2535 - out: 2536 - return ret; 2765 + return srpt_xfer_data(ch, ioctx); 2537 2766 } 2538 2767 2539 2768 static u8 tcm_to_srp_tsk_mgmt_status(const int tcm_mgmt_status) ··· 2637 2920 srpt_refresh_port(sport); 2638 2921 } 2639 2922 2640 - static int srpt_ch_list_empty(struct srpt_device *sdev) 2641 - { 2642 - int res; 2643 - 2644 - spin_lock_irq(&sdev->spinlock); 2645 - res = list_empty(&sdev->rch_list); 2646 - spin_unlock_irq(&sdev->spinlock); 2647 - 2648 - return res; 2649 - } 2650 - 2651 2923 /** 2652 2924 * srpt_release_sdev() - Free the channel resources associated with a target. 2653 2925 */ 2654 2926 static int srpt_release_sdev(struct srpt_device *sdev) 2655 2927 { 2656 - struct srpt_rdma_ch *ch, *tmp_ch; 2657 - int res; 2928 + int i, res; 2658 2929 2659 2930 WARN_ON_ONCE(irqs_disabled()); 2660 2931 2661 2932 BUG_ON(!sdev); 2662 2933 2663 - spin_lock_irq(&sdev->spinlock); 2664 - list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) 2665 - __srpt_close_ch(ch); 2666 - spin_unlock_irq(&sdev->spinlock); 2934 + mutex_lock(&sdev->mutex); 2935 + for (i = 0; i < ARRAY_SIZE(sdev->port); i++) 2936 + sdev->port[i].enabled = false; 2937 + __srpt_close_all_ch(sdev); 2938 + mutex_unlock(&sdev->mutex); 2667 2939 2668 2940 res = wait_event_interruptible(sdev->ch_releaseQ, 2669 - srpt_ch_list_empty(sdev)); 2941 + list_empty_careful(&sdev->rch_list)); 2670 2942 if (res) 2671 2943 pr_err("%s: interrupted.\n", __func__); 2672 2944 ··· 2709 3003 pr_debug("device = %p, device->dma_ops = %p\n", device, 2710 3004 device->dma_ops); 2711 3005 2712 - sdev = kzalloc(sizeof *sdev, GFP_KERNEL); 3006 + sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); 2713 3007 if (!sdev) 2714 3008 goto err; 2715 3009 2716 3010 sdev->device = device; 2717 3011 INIT_LIST_HEAD(&sdev->rch_list); 2718 3012 init_waitqueue_head(&sdev->ch_releaseQ); 2719 - spin_lock_init(&sdev->spinlock); 3013 + mutex_init(&sdev->mutex); 2720 3014 2721 3015 sdev->pd = ib_alloc_pd(device); 2722 3016 if (IS_ERR(sdev->pd)) ··· 2788 3082 2789 3083 if (srpt_refresh_port(sport)) { 2790 3084 pr_err("MAD registration failed for %s-%d.\n", 2791 - srpt_sdev_name(sdev), i); 3085 + sdev->device->name, i); 2792 3086 goto err_ring; 2793 3087 } 2794 3088 snprintf(sport->port_guid, sizeof(sport->port_guid), ··· 2937 3231 static void srpt_close_session(struct se_session *se_sess) 2938 3232 { 2939 3233 DECLARE_COMPLETION_ONSTACK(release_done); 2940 - struct srpt_rdma_ch *ch; 2941 - struct srpt_device *sdev; 2942 - unsigned long res; 3234 + struct srpt_rdma_ch *ch = se_sess->fabric_sess_ptr; 3235 + struct srpt_device *sdev = ch->sport->sdev; 3236 + bool wait; 2943 3237 2944 - ch = se_sess->fabric_sess_ptr; 2945 - WARN_ON(ch->sess != se_sess); 3238 + pr_debug("ch %s-%d state %d\n", ch->sess_name, ch->qp->qp_num, 3239 + ch->state); 2946 3240 2947 - pr_debug("ch %p state %d\n", ch, srpt_get_ch_state(ch)); 2948 - 2949 - sdev = ch->sport->sdev; 2950 - spin_lock_irq(&sdev->spinlock); 3241 + mutex_lock(&sdev->mutex); 2951 3242 BUG_ON(ch->release_done); 2952 3243 ch->release_done = &release_done; 2953 - __srpt_close_ch(ch); 2954 - spin_unlock_irq(&sdev->spinlock); 3244 + wait = !list_empty(&ch->list); 3245 + srpt_disconnect_ch(ch); 3246 + mutex_unlock(&sdev->mutex); 2955 3247 2956 - res = wait_for_completion_timeout(&release_done, 60 * HZ); 2957 - WARN_ON(res == 0); 3248 + if (!wait) 3249 + return; 3250 + 3251 + while (wait_for_completion_timeout(&release_done, 180 * HZ) == 0) 3252 + pr_info("%s(%s-%d state %d): still waiting ...\n", __func__, 3253 + ch->sess_name, ch->qp->qp_num, ch->state); 2958 3254 } 2959 3255 2960 3256 /** ··· 3164 3456 { 3165 3457 struct se_portal_group *se_tpg = to_tpg(item); 3166 3458 struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1); 3459 + struct srpt_device *sdev = sport->sdev; 3460 + struct srpt_rdma_ch *ch; 3167 3461 unsigned long tmp; 3168 3462 int ret; 3169 3463 ··· 3179 3469 pr_err("Illegal value for srpt_tpg_store_enable: %lu\n", tmp); 3180 3470 return -EINVAL; 3181 3471 } 3182 - if (tmp == 1) 3183 - sport->enabled = true; 3184 - else 3185 - sport->enabled = false; 3472 + if (sport->enabled == tmp) 3473 + goto out; 3474 + sport->enabled = tmp; 3475 + if (sport->enabled) 3476 + goto out; 3186 3477 3478 + mutex_lock(&sdev->mutex); 3479 + list_for_each_entry(ch, &sdev->rch_list, list) { 3480 + if (ch->sport == sport) { 3481 + pr_debug("%s: ch %p %s-%d\n", __func__, ch, 3482 + ch->sess_name, ch->qp->qp_num); 3483 + srpt_disconnect_ch(ch); 3484 + srpt_close_ch(ch); 3485 + } 3486 + } 3487 + mutex_unlock(&sdev->mutex); 3488 + 3489 + out: 3187 3490 return count; 3188 3491 } 3189 3492 ··· 3288 3565 static const struct target_core_fabric_ops srpt_template = { 3289 3566 .module = THIS_MODULE, 3290 3567 .name = "srpt", 3291 - .node_acl_size = sizeof(struct srpt_node_acl), 3292 3568 .get_fabric_name = srpt_get_fabric_name, 3293 3569 .tpg_get_wwn = srpt_get_fabric_wwn, 3294 3570 .tpg_get_tag = srpt_get_tag,
+12 -19
drivers/infiniband/ulp/srpt/ib_srpt.h
··· 218 218 219 219 /** 220 220 * enum rdma_ch_state - SRP channel state. 221 - * @CH_CONNECTING: QP is in RTR state; waiting for RTU. 222 - * @CH_LIVE: QP is in RTS state. 223 - * @CH_DISCONNECTING: DREQ has been received; waiting for DREP 224 - * or DREQ has been send and waiting for DREP 225 - * or . 226 - * @CH_DRAINING: QP is in ERR state; waiting for last WQE event. 227 - * @CH_RELEASING: Last WQE event has been received; releasing resources. 221 + * @CH_CONNECTING: QP is in RTR state; waiting for RTU. 222 + * @CH_LIVE: QP is in RTS state. 223 + * @CH_DISCONNECTING: DREQ has been sent and waiting for DREP or DREQ has 224 + * been received. 225 + * @CH_DRAINING: DREP has been received or waiting for DREP timed out 226 + * and last work request has been queued. 227 + * @CH_DISCONNECTED: Last completion has been received. 228 228 */ 229 229 enum rdma_ch_state { 230 230 CH_CONNECTING, 231 231 CH_LIVE, 232 232 CH_DISCONNECTING, 233 233 CH_DRAINING, 234 - CH_RELEASING 234 + CH_DISCONNECTED, 235 235 }; 236 236 237 237 /** ··· 267 267 struct ib_cm_id *cm_id; 268 268 struct ib_qp *qp; 269 269 struct ib_cq *cq; 270 + struct ib_cqe zw_cqe; 271 + struct kref kref; 270 272 int rq_size; 271 273 u32 rsp_size; 272 274 atomic_t sq_wr_avail; ··· 288 286 u8 sess_name[36]; 289 287 struct work_struct release_work; 290 288 struct completion *release_done; 291 - bool in_shutdown; 292 289 }; 293 290 294 291 /** ··· 344 343 * @ioctx_ring: Per-HCA SRQ. 345 344 * @rch_list: Per-device channel list -- see also srpt_rdma_ch.list. 346 345 * @ch_releaseQ: Enables waiting for removal from rch_list. 347 - * @spinlock: Protects rch_list and tpg. 346 + * @mutex: Protects rch_list. 348 347 * @port: Information about the ports owned by this HCA. 349 348 * @event_handler: Per-HCA asynchronous IB event handler. 350 349 * @list: Node in srpt_dev_list. ··· 358 357 struct srpt_recv_ioctx **ioctx_ring; 359 358 struct list_head rch_list; 360 359 wait_queue_head_t ch_releaseQ; 361 - spinlock_t spinlock; 360 + struct mutex mutex; 362 361 struct srpt_port port[2]; 363 362 struct ib_event_handler event_handler; 364 363 struct list_head list; 365 - }; 366 - 367 - /** 368 - * struct srpt_node_acl - Per-initiator ACL data (managed via configfs). 369 - * @nacl: Target core node ACL information. 370 - */ 371 - struct srpt_node_acl { 372 - struct se_node_acl nacl; 373 364 }; 374 365 375 366 #endif /* IB_SRPT_H */
+5
include/rdma/ib_verbs.h
··· 1846 1846 int (*check_mr_status)(struct ib_mr *mr, u32 check_mask, 1847 1847 struct ib_mr_status *mr_status); 1848 1848 void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); 1849 + void (*drain_rq)(struct ib_qp *qp); 1850 + void (*drain_sq)(struct ib_qp *qp); 1849 1851 1850 1852 struct ib_dma_mapping_ops *dma_ops; 1851 1853 ··· 3096 3094 int sg_nents, 3097 3095 int (*set_page)(struct ib_mr *, u64)); 3098 3096 3097 + void ib_drain_rq(struct ib_qp *qp); 3098 + void ib_drain_sq(struct ib_qp *qp); 3099 + void ib_drain_qp(struct ib_qp *qp); 3099 3100 #endif /* IB_VERBS_H */
+31 -55
net/9p/trans_rdma.c
··· 109 109 /** 110 110 * p9_rdma_context - Keeps track of in-process WR 111 111 * 112 - * @wc_op: The original WR op for when the CQE completes in error. 113 112 * @busa: Bus address to unmap when the WR completes 114 113 * @req: Keeps track of requests (send) 115 114 * @rc: Keepts track of replies (receive) 116 115 */ 117 116 struct p9_rdma_req; 118 117 struct p9_rdma_context { 119 - enum ib_wc_opcode wc_op; 118 + struct ib_cqe cqe; 120 119 dma_addr_t busa; 121 120 union { 122 121 struct p9_req_t *req; ··· 283 284 } 284 285 285 286 static void 286 - handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma, 287 - struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len) 287 + recv_done(struct ib_cq *cq, struct ib_wc *wc) 288 288 { 289 + struct p9_client *client = cq->cq_context; 290 + struct p9_trans_rdma *rdma = client->trans; 291 + struct p9_rdma_context *c = 292 + container_of(wc->wr_cqe, struct p9_rdma_context, cqe); 289 293 struct p9_req_t *req; 290 294 int err = 0; 291 295 int16_t tag; ··· 297 295 ib_dma_unmap_single(rdma->cm_id->device, c->busa, client->msize, 298 296 DMA_FROM_DEVICE); 299 297 300 - if (status != IB_WC_SUCCESS) 298 + if (wc->status != IB_WC_SUCCESS) 301 299 goto err_out; 302 300 303 301 err = p9_parse_header(c->rc, NULL, NULL, &tag, 1); ··· 318 316 req->rc = c->rc; 319 317 p9_client_cb(client, req, REQ_STATUS_RCVD); 320 318 319 + out: 320 + up(&rdma->rq_sem); 321 + kfree(c); 321 322 return; 322 323 323 324 err_out: 324 - p9_debug(P9_DEBUG_ERROR, "req %p err %d status %d\n", req, err, status); 325 + p9_debug(P9_DEBUG_ERROR, "req %p err %d status %d\n", 326 + req, err, wc->status); 325 327 rdma->state = P9_RDMA_FLUSHING; 326 328 client->status = Disconnected; 329 + goto out; 327 330 } 328 331 329 332 static void 330 - handle_send(struct p9_client *client, struct p9_trans_rdma *rdma, 331 - struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len) 333 + send_done(struct ib_cq *cq, struct ib_wc *wc) 332 334 { 335 + struct p9_client *client = cq->cq_context; 336 + struct p9_trans_rdma *rdma = client->trans; 337 + struct p9_rdma_context *c = 338 + container_of(wc->wr_cqe, struct p9_rdma_context, cqe); 339 + 333 340 ib_dma_unmap_single(rdma->cm_id->device, 334 341 c->busa, c->req->tc->size, 335 342 DMA_TO_DEVICE); 343 + up(&rdma->sq_sem); 344 + kfree(c); 336 345 } 337 346 338 347 static void qp_event_handler(struct ib_event *event, void *context) 339 348 { 340 349 p9_debug(P9_DEBUG_ERROR, "QP event %d context %p\n", 341 350 event->event, context); 342 - } 343 - 344 - static void cq_comp_handler(struct ib_cq *cq, void *cq_context) 345 - { 346 - struct p9_client *client = cq_context; 347 - struct p9_trans_rdma *rdma = client->trans; 348 - int ret; 349 - struct ib_wc wc; 350 - 351 - ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP); 352 - while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { 353 - struct p9_rdma_context *c = (void *) (unsigned long) wc.wr_id; 354 - 355 - switch (c->wc_op) { 356 - case IB_WC_RECV: 357 - handle_recv(client, rdma, c, wc.status, wc.byte_len); 358 - up(&rdma->rq_sem); 359 - break; 360 - 361 - case IB_WC_SEND: 362 - handle_send(client, rdma, c, wc.status, wc.byte_len); 363 - up(&rdma->sq_sem); 364 - break; 365 - 366 - default: 367 - pr_err("unexpected completion type, c->wc_op=%d, wc.opcode=%d, status=%d\n", 368 - c->wc_op, wc.opcode, wc.status); 369 - break; 370 - } 371 - kfree(c); 372 - } 373 - } 374 - 375 - static void cq_event_handler(struct ib_event *e, void *v) 376 - { 377 - p9_debug(P9_DEBUG_ERROR, "CQ event %d context %p\n", e->event, v); 378 351 } 379 352 380 353 static void rdma_destroy_trans(struct p9_trans_rdma *rdma) ··· 364 387 ib_dealloc_pd(rdma->pd); 365 388 366 389 if (rdma->cq && !IS_ERR(rdma->cq)) 367 - ib_destroy_cq(rdma->cq); 390 + ib_free_cq(rdma->cq); 368 391 369 392 if (rdma->cm_id && !IS_ERR(rdma->cm_id)) 370 393 rdma_destroy_id(rdma->cm_id); ··· 385 408 if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) 386 409 goto error; 387 410 411 + c->cqe.done = recv_done; 412 + 388 413 sge.addr = c->busa; 389 414 sge.length = client->msize; 390 415 sge.lkey = rdma->pd->local_dma_lkey; 391 416 392 417 wr.next = NULL; 393 - c->wc_op = IB_WC_RECV; 394 - wr.wr_id = (unsigned long) c; 418 + wr.wr_cqe = &c->cqe; 395 419 wr.sg_list = &sge; 396 420 wr.num_sge = 1; 397 421 return ib_post_recv(rdma->qp, &wr, &bad_wr); ··· 477 499 goto send_error; 478 500 } 479 501 502 + c->cqe.done = send_done; 503 + 480 504 sge.addr = c->busa; 481 505 sge.length = c->req->tc->size; 482 506 sge.lkey = rdma->pd->local_dma_lkey; 483 507 484 508 wr.next = NULL; 485 - c->wc_op = IB_WC_SEND; 486 - wr.wr_id = (unsigned long) c; 509 + wr.wr_cqe = &c->cqe; 487 510 wr.opcode = IB_WR_SEND; 488 511 wr.send_flags = IB_SEND_SIGNALED; 489 512 wr.sg_list = &sge; ··· 621 642 struct p9_trans_rdma *rdma; 622 643 struct rdma_conn_param conn_param; 623 644 struct ib_qp_init_attr qp_attr; 624 - struct ib_cq_init_attr cq_attr = {}; 625 645 626 646 /* Parse the transport specific mount options */ 627 647 err = parse_opts(args, &opts); ··· 673 695 goto error; 674 696 675 697 /* Create the Completion Queue */ 676 - cq_attr.cqe = opts.sq_depth + opts.rq_depth + 1; 677 - rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler, 678 - cq_event_handler, client, 679 - &cq_attr); 698 + rdma->cq = ib_alloc_cq(rdma->cm_id->device, client, 699 + opts.sq_depth + opts.rq_depth + 1, 700 + 0, IB_POLL_SOFTIRQ); 680 701 if (IS_ERR(rdma->cq)) 681 702 goto error; 682 - ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP); 683 703 684 704 /* Create the Protection Domain */ 685 705 rdma->pd = ib_alloc_pd(rdma->cm_id->device);