Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

IB/iser: Chain all iser transaction send work requests

Chaning of send work requests benefits performance by
reducing the send queue lock contention (acquired in
ib_post_send) and saves us HW doorbells which is posted
only once.

Currently, in normal IO flows iser does not chain the CDB send
work request with the registration work request. Also in PI
flows, signature work requests are not chained as well.

Lets chain those and post only once.

Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

authored by

Sagi Grimberg and committed by
Doug Ledford
7332bed0 1b16c989

+99 -77
+1
drivers/infiniband/ulp/iser/iscsi_iser.c
··· 204 204 goto out; 205 205 } 206 206 207 + tx_desc->wr_idx = 0; 207 208 tx_desc->mapped = true; 208 209 tx_desc->dma_addr = dma_addr; 209 210 tx_desc->tx_sg[0].addr = tx_desc->dma_addr;
+34
drivers/infiniband/ulp/iser/iscsi_iser.h
··· 265 265 ISCSI_TX_DATAOUT 266 266 }; 267 267 268 + /* Maximum number of work requests per task: 269 + * Data memory region local invalidate + fast registration 270 + * Protection memory region local invalidate + fast registration 271 + * Signature memory region local invalidate + fast registration 272 + * PDU send 273 + */ 274 + #define ISER_MAX_WRS 7 275 + 268 276 /** 269 277 * struct iser_tx_desc - iSER TX descriptor (for send wr_id) 270 278 * ··· 285 277 * unsolicited data-out or control 286 278 * @num_sge: number sges used on this TX task 287 279 * @mapped: Is the task header mapped 280 + * @wr_idx: Current WR index 281 + * @wrs: Array of WRs per task 282 + * @data_reg: Data buffer registration details 283 + * @prot_reg: Protection buffer registration details 284 + * @sig_attrs: Signature attributes 288 285 */ 289 286 struct iser_tx_desc { 290 287 struct iser_hdr iser_header; ··· 299 286 struct ib_sge tx_sg[2]; 300 287 int num_sge; 301 288 bool mapped; 289 + u8 wr_idx; 290 + struct ib_send_wr wrs[ISER_MAX_WRS]; 291 + struct iser_mem_reg data_reg; 292 + struct iser_mem_reg prot_reg; 293 + struct ib_sig_attrs sig_attrs; 302 294 }; 303 295 304 296 #define ISER_RX_PAD_SIZE (256 - (ISER_RX_PAYLOAD_SIZE + \ ··· 707 689 void 708 690 iser_reg_desc_put_fmr(struct ib_conn *ib_conn, 709 691 struct iser_fr_desc *desc); 692 + 693 + static inline struct ib_send_wr * 694 + iser_tx_next_wr(struct iser_tx_desc *tx_desc) 695 + { 696 + struct ib_send_wr *cur_wr = &tx_desc->wrs[tx_desc->wr_idx]; 697 + struct ib_send_wr *last_wr; 698 + 699 + if (tx_desc->wr_idx) { 700 + last_wr = &tx_desc->wrs[tx_desc->wr_idx - 1]; 701 + last_wr->next = cur_wr; 702 + } 703 + tx_desc->wr_idx++; 704 + 705 + return cur_wr; 706 + } 707 + 710 708 #endif
+53 -67
drivers/infiniband/ulp/iser/iser_memory.c
··· 664 664 { 665 665 u32 rkey; 666 666 667 - memset(inv_wr, 0, sizeof(*inv_wr)); 668 667 inv_wr->opcode = IB_WR_LOCAL_INV; 669 668 inv_wr->wr_id = ISER_FASTREG_LI_WRID; 670 669 inv_wr->ex.invalidate_rkey = mr->rkey; 670 + inv_wr->send_flags = 0; 671 + inv_wr->num_sge = 0; 671 672 672 673 rkey = ib_inc_rkey(mr->rkey); 673 674 ib_update_fast_reg_key(mr, rkey); ··· 681 680 struct iser_mem_reg *prot_reg, 682 681 struct iser_mem_reg *sig_reg) 683 682 { 684 - struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn; 685 - struct ib_send_wr sig_wr, inv_wr; 686 - struct ib_send_wr *bad_wr, *wr = NULL; 687 - struct ib_sig_attrs sig_attrs; 683 + struct iser_tx_desc *tx_desc = &iser_task->desc; 684 + struct ib_sig_attrs *sig_attrs = &tx_desc->sig_attrs; 685 + struct ib_send_wr *wr; 688 686 int ret; 689 687 690 - memset(&sig_attrs, 0, sizeof(sig_attrs)); 691 - ret = iser_set_sig_attrs(iser_task->sc, &sig_attrs); 688 + memset(sig_attrs, 0, sizeof(*sig_attrs)); 689 + ret = iser_set_sig_attrs(iser_task->sc, sig_attrs); 692 690 if (ret) 693 691 goto err; 694 692 695 - iser_set_prot_checks(iser_task->sc, &sig_attrs.check_mask); 693 + iser_set_prot_checks(iser_task->sc, &sig_attrs->check_mask); 696 694 697 695 if (!pi_ctx->sig_mr_valid) { 698 - iser_inv_rkey(&inv_wr, pi_ctx->sig_mr); 699 - wr = &inv_wr; 696 + wr = iser_tx_next_wr(tx_desc); 697 + iser_inv_rkey(wr, pi_ctx->sig_mr); 700 698 } 701 699 702 - memset(&sig_wr, 0, sizeof(sig_wr)); 703 - sig_wr.opcode = IB_WR_REG_SIG_MR; 704 - sig_wr.wr_id = ISER_FASTREG_LI_WRID; 705 - sig_wr.sg_list = &data_reg->sge; 706 - sig_wr.num_sge = 1; 707 - sig_wr.wr.sig_handover.sig_attrs = &sig_attrs; 708 - sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr; 700 + wr = iser_tx_next_wr(tx_desc); 701 + wr->opcode = IB_WR_REG_SIG_MR; 702 + wr->wr_id = ISER_FASTREG_LI_WRID; 703 + wr->sg_list = &data_reg->sge; 704 + wr->num_sge = 1; 705 + wr->send_flags = 0; 706 + wr->wr.sig_handover.sig_attrs = sig_attrs; 707 + wr->wr.sig_handover.sig_mr = pi_ctx->sig_mr; 709 708 if (scsi_prot_sg_count(iser_task->sc)) 710 - sig_wr.wr.sig_handover.prot = &prot_reg->sge; 711 - sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE | 712 - IB_ACCESS_REMOTE_READ | 713 - IB_ACCESS_REMOTE_WRITE; 714 - 715 - if (!wr) 716 - wr = &sig_wr; 709 + wr->wr.sig_handover.prot = &prot_reg->sge; 717 710 else 718 - wr->next = &sig_wr; 719 - 720 - ret = ib_post_send(ib_conn->qp, wr, &bad_wr); 721 - if (ret) { 722 - iser_err("reg_sig_mr failed, ret:%d\n", ret); 723 - goto err; 724 - } 711 + wr->wr.sig_handover.prot = NULL; 712 + wr->wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE | 713 + IB_ACCESS_REMOTE_READ | 714 + IB_ACCESS_REMOTE_WRITE; 725 715 pi_ctx->sig_mr_valid = 0; 726 716 727 717 sig_reg->sge.lkey = pi_ctx->sig_mr->lkey; ··· 736 744 struct iser_device *device = ib_conn->device; 737 745 struct ib_mr *mr = rsc->mr; 738 746 struct ib_fast_reg_page_list *frpl = rsc->frpl; 739 - struct ib_send_wr fastreg_wr, inv_wr; 740 - struct ib_send_wr *bad_wr, *wr = NULL; 741 - int ret, offset, size, plen; 747 + struct iser_tx_desc *tx_desc = &iser_task->desc; 748 + struct ib_send_wr *wr; 749 + int offset, size, plen; 742 750 743 751 plen = iser_sg_to_page_vec(mem, device->ib_device, frpl->page_list, 744 752 &offset, &size); ··· 748 756 } 749 757 750 758 if (!rsc->mr_valid) { 751 - iser_inv_rkey(&inv_wr, mr); 752 - wr = &inv_wr; 759 + wr = iser_tx_next_wr(tx_desc); 760 + iser_inv_rkey(wr, mr); 753 761 } 754 762 755 - /* Prepare FASTREG WR */ 756 - memset(&fastreg_wr, 0, sizeof(fastreg_wr)); 757 - fastreg_wr.wr_id = ISER_FASTREG_LI_WRID; 758 - fastreg_wr.opcode = IB_WR_FAST_REG_MR; 759 - fastreg_wr.wr.fast_reg.iova_start = frpl->page_list[0] + offset; 760 - fastreg_wr.wr.fast_reg.page_list = frpl; 761 - fastreg_wr.wr.fast_reg.page_list_len = plen; 762 - fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K; 763 - fastreg_wr.wr.fast_reg.length = size; 764 - fastreg_wr.wr.fast_reg.rkey = mr->rkey; 765 - fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE | 766 - IB_ACCESS_REMOTE_WRITE | 767 - IB_ACCESS_REMOTE_READ); 768 - 769 - if (!wr) 770 - wr = &fastreg_wr; 771 - else 772 - wr->next = &fastreg_wr; 773 - 774 - ret = ib_post_send(ib_conn->qp, wr, &bad_wr); 775 - if (ret) { 776 - iser_err("fast registration failed, ret:%d\n", ret); 777 - return ret; 778 - } 763 + wr = iser_tx_next_wr(tx_desc); 764 + wr->opcode = IB_WR_FAST_REG_MR; 765 + wr->wr_id = ISER_FASTREG_LI_WRID; 766 + wr->send_flags = 0; 767 + wr->wr.fast_reg.iova_start = frpl->page_list[0] + offset; 768 + wr->wr.fast_reg.page_list = frpl; 769 + wr->wr.fast_reg.page_list_len = plen; 770 + wr->wr.fast_reg.page_shift = SHIFT_4K; 771 + wr->wr.fast_reg.length = size; 772 + wr->wr.fast_reg.rkey = mr->rkey; 773 + wr->wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE | 774 + IB_ACCESS_REMOTE_WRITE | 775 + IB_ACCESS_REMOTE_READ); 779 776 rsc->mr_valid = 0; 780 777 781 778 reg->sge.lkey = mr->lkey; ··· 776 795 " length=0x%x\n", reg->sge.lkey, reg->rkey, 777 796 reg->sge.addr, reg->sge.length); 778 797 779 - return ret; 798 + return 0; 780 799 } 781 800 782 801 static int ··· 834 853 struct iser_device *device = ib_conn->device; 835 854 struct iser_data_buf *mem = &task->data[dir]; 836 855 struct iser_mem_reg *reg = &task->rdma_reg[dir]; 856 + struct iser_mem_reg *data_reg; 837 857 struct iser_fr_desc *desc = NULL; 838 858 int err; 839 859 ··· 848 866 reg->mem_h = desc; 849 867 } 850 868 851 - err = iser_reg_data_sg(task, mem, desc, reg); 869 + if (scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL) 870 + data_reg = reg; 871 + else 872 + data_reg = &task->desc.data_reg; 873 + 874 + err = iser_reg_data_sg(task, mem, desc, data_reg); 852 875 if (unlikely(err)) 853 876 goto err_reg; 854 877 855 878 if (scsi_get_prot_op(task->sc) != SCSI_PROT_NORMAL) { 856 - struct iser_mem_reg prot_reg; 879 + struct iser_mem_reg *prot_reg = &task->desc.prot_reg; 857 880 858 - memset(&prot_reg, 0, sizeof(prot_reg)); 859 881 if (scsi_prot_sg_count(task->sc)) { 860 882 mem = &task->prot[dir]; 861 883 err = iser_handle_unaligned_buf(task, mem, dir); 862 884 if (unlikely(err)) 863 885 goto err_reg; 864 886 865 - err = iser_reg_prot_sg(task, mem, desc, &prot_reg); 887 + err = iser_reg_prot_sg(task, mem, desc, prot_reg); 866 888 if (unlikely(err)) 867 889 goto err_reg; 868 890 } 869 891 870 - err = iser_reg_sig_mr(task, desc->pi_ctx, reg, 871 - &prot_reg, reg); 892 + err = iser_reg_sig_mr(task, desc->pi_ctx, data_reg, 893 + prot_reg, reg); 872 894 if (unlikely(err)) 873 895 goto err_reg; 874 896
+11 -10
drivers/infiniband/ulp/iser/iser_verbs.c
··· 1116 1116 int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, 1117 1117 bool signal) 1118 1118 { 1119 - int ib_ret; 1120 - struct ib_send_wr send_wr, *send_wr_failed; 1119 + struct ib_send_wr *bad_wr, *wr = iser_tx_next_wr(tx_desc); 1120 + int ib_ret; 1121 1121 1122 1122 ib_dma_sync_single_for_device(ib_conn->device->ib_device, 1123 1123 tx_desc->dma_addr, ISER_HEADERS_LEN, 1124 1124 DMA_TO_DEVICE); 1125 1125 1126 - send_wr.next = NULL; 1127 - send_wr.wr_id = (uintptr_t)tx_desc; 1128 - send_wr.sg_list = tx_desc->tx_sg; 1129 - send_wr.num_sge = tx_desc->num_sge; 1130 - send_wr.opcode = IB_WR_SEND; 1131 - send_wr.send_flags = signal ? IB_SEND_SIGNALED : 0; 1126 + wr->next = NULL; 1127 + wr->wr_id = (uintptr_t)tx_desc; 1128 + wr->sg_list = tx_desc->tx_sg; 1129 + wr->num_sge = tx_desc->num_sge; 1130 + wr->opcode = IB_WR_SEND; 1131 + wr->send_flags = signal ? IB_SEND_SIGNALED : 0; 1132 1132 1133 - ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed); 1133 + ib_ret = ib_post_send(ib_conn->qp, &tx_desc->wrs[0], &bad_wr); 1134 1134 if (ib_ret) 1135 - iser_err("ib_post_send failed, ret:%d\n", ib_ret); 1135 + iser_err("ib_post_send failed, ret:%d opcode:%d\n", 1136 + ib_ret, bad_wr->opcode); 1136 1137 1137 1138 return ib_ret; 1138 1139 }