Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

iw_cxgb4: RDMA write with immediate support

Adds iw_cxgb4 functionality to support RDMA_WRITE_WITH_IMMEDATE opcode.

Signed-off-by: Potnuri Bharat Teja <bharat@chelsio.com>
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>

authored by

Potnuri Bharat Teja and committed by
Jason Gunthorpe
b9855f4c 8001b717

+81 -16
+20 -3
drivers/infiniband/hw/cxgb4/cq.c
··· 791 791 wc->byte_len = CQE_LEN(&cqe); 792 792 else 793 793 wc->byte_len = 0; 794 - wc->opcode = IB_WC_RECV; 795 - if (CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_INV || 796 - CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) { 794 + 795 + switch (CQE_OPCODE(&cqe)) { 796 + case FW_RI_SEND: 797 + wc->opcode = IB_WC_RECV; 798 + break; 799 + case FW_RI_SEND_WITH_INV: 800 + case FW_RI_SEND_WITH_SE_INV: 801 + wc->opcode = IB_WC_RECV; 797 802 wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe); 798 803 wc->wc_flags |= IB_WC_WITH_INVALIDATE; 799 804 c4iw_invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey); 805 + break; 806 + case FW_RI_WRITE_IMMEDIATE: 807 + wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; 808 + wc->ex.imm_data = CQE_IMM_DATA(&cqe); 809 + wc->wc_flags |= IB_WC_WITH_IMM; 810 + break; 811 + default: 812 + pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n", 813 + CQE_OPCODE(&cqe), CQE_QPID(&cqe)); 814 + ret = -EINVAL; 815 + goto out; 800 816 } 801 817 } else { 802 818 switch (CQE_OPCODE(&cqe)) { 819 + case FW_RI_WRITE_IMMEDIATE: 803 820 case FW_RI_RDMA_WRITE: 804 821 wc->opcode = IB_WC_RDMA_WRITE; 805 822 break;
+29 -8
drivers/infiniband/hw/cxgb4/qp.c
··· 555 555 556 556 if (wr->num_sge > T4_MAX_SEND_SGE) 557 557 return -EINVAL; 558 - wqe->write.r2 = 0; 558 + 559 + /* 560 + * iWARP protocol supports 64 bit immediate data but rdma api 561 + * limits it to 32bit. 562 + */ 563 + if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) 564 + wqe->write.iw_imm_data.ib_imm_data.imm_data32 = wr->ex.imm_data; 565 + else 566 + wqe->write.iw_imm_data.ib_imm_data.imm_data32 = 0; 559 567 wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey); 560 568 wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr); 561 569 if (wr->num_sge) { ··· 856 848 case IB_WR_RDMA_WRITE: 857 849 opcode = FW_RI_RDMA_WRITE; 858 850 break; 851 + case IB_WR_RDMA_WRITE_WITH_IMM: 852 + opcode = FW_RI_WRITE_IMMEDIATE; 853 + break; 859 854 case IB_WR_RDMA_READ: 860 855 case IB_WR_RDMA_READ_WITH_INV: 861 856 opcode = FW_RI_READ_REQ; ··· 981 970 enum fw_wr_opcodes fw_opcode = 0; 982 971 enum fw_ri_wr_flags fw_flags; 983 972 struct c4iw_qp *qhp; 973 + struct c4iw_dev *rhp; 984 974 union t4_wr *wqe = NULL; 985 975 u32 num_wrs; 986 976 struct t4_swsqe *swsqe; ··· 989 977 u16 idx = 0; 990 978 991 979 qhp = to_c4iw_qp(ibqp); 980 + rhp = qhp->rhp; 992 981 spin_lock_irqsave(&qhp->lock, flag); 993 982 994 983 /* ··· 1034 1021 swsqe->opcode = FW_RI_SEND_WITH_INV; 1035 1022 err = build_rdma_send(&qhp->wq.sq, wqe, wr, &len16); 1036 1023 break; 1024 + case IB_WR_RDMA_WRITE_WITH_IMM: 1025 + if (unlikely(!rhp->rdev.lldi.write_w_imm_support)) { 1026 + err = -EINVAL; 1027 + break; 1028 + } 1029 + fw_flags |= FW_RI_RDMA_WRITE_WITH_IMMEDIATE; 1030 + /*FALLTHROUGH*/ 1037 1031 case IB_WR_RDMA_WRITE: 1038 1032 fw_opcode = FW_RI_RDMA_WRITE_WR; 1039 1033 swsqe->opcode = FW_RI_RDMA_WRITE; ··· 1051 1031 fw_opcode = FW_RI_RDMA_READ_WR; 1052 1032 swsqe->opcode = FW_RI_READ_REQ; 1053 1033 if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) { 1054 - c4iw_invalidate_mr(qhp->rhp, 1055 - wr->sg_list[0].lkey); 1034 + c4iw_invalidate_mr(rhp, wr->sg_list[0].lkey); 1056 1035 fw_flags = FW_RI_RDMA_READ_INVALIDATE; 1057 1036 } else { 1058 1037 fw_flags = 0; ··· 1067 1048 struct c4iw_mr *mhp = to_c4iw_mr(reg_wr(wr)->mr); 1068 1049 1069 1050 swsqe->opcode = FW_RI_FAST_REGISTER; 1070 - if (qhp->rhp->rdev.lldi.fr_nsmr_tpte_wr_support && 1051 + if (rhp->rdev.lldi.fr_nsmr_tpte_wr_support && 1071 1052 !mhp->attr.state && mhp->mpl_len <= 2) { 1072 1053 fw_opcode = FW_RI_FR_NSMR_TPTE_WR; 1073 1054 build_tpte_memreg(&wqe->fr_tpte, reg_wr(wr), ··· 1076 1057 fw_opcode = FW_RI_FR_NSMR_WR; 1077 1058 err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr), 1078 1059 mhp, &len16, 1079 - qhp->rhp->rdev.lldi.ulptx_memwrite_dsgl); 1060 + rhp->rdev.lldi.ulptx_memwrite_dsgl); 1080 1061 if (err) 1081 1062 break; 1082 1063 } ··· 1089 1070 fw_opcode = FW_RI_INV_LSTAG_WR; 1090 1071 swsqe->opcode = FW_RI_LOCAL_INV; 1091 1072 err = build_inv_stag(wqe, wr, &len16); 1092 - c4iw_invalidate_mr(qhp->rhp, wr->ex.invalidate_rkey); 1073 + c4iw_invalidate_mr(rhp, wr->ex.invalidate_rkey); 1093 1074 break; 1094 1075 default: 1095 1076 pr_warn("%s post of type=%d TBD!\n", __func__, ··· 1108 1089 swsqe->wr_id = wr->wr_id; 1109 1090 if (c4iw_wr_log) { 1110 1091 swsqe->sge_ts = cxgb4_read_sge_timestamp( 1111 - qhp->rhp->rdev.lldi.ports[0]); 1092 + rhp->rdev.lldi.ports[0]); 1112 1093 swsqe->host_time = ktime_get(); 1113 1094 } 1114 1095 ··· 1122 1103 t4_sq_produce(&qhp->wq, len16); 1123 1104 idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); 1124 1105 } 1125 - if (!qhp->rhp->rdev.status_page->db_off) { 1106 + if (!rhp->rdev.status_page->db_off) { 1126 1107 t4_ring_sq_db(&qhp->wq, idx, wqe); 1127 1108 spin_unlock_irqrestore(&qhp->lock, flag); 1128 1109 } else { ··· 2117 2098 } 2118 2099 uresp.flags = C4IW_QPF_ONCHIP; 2119 2100 } 2101 + if (rhp->rdev.lldi.write_w_imm_support) 2102 + uresp.flags |= C4IW_QPF_WRITE_W_IMM; 2120 2103 uresp.qid_mask = rhp->rdev.qpmask; 2121 2104 uresp.sqid = qhp->wq.sq.qid; 2122 2105 uresp.sq_size = qhp->wq.sq.size;
+15 -1
drivers/infiniband/hw/cxgb4/t4.h
··· 190 190 __be32 abs_rqe_idx; 191 191 } srcqe; 192 192 struct { 193 - __be64 imm_data; 193 + __be32 mo; 194 + __be32 msn; 195 + /* 196 + * Use union for immediate data to be consistent with 197 + * stack's 32 bit data and iWARP spec's 64 bit data. 198 + */ 199 + union { 200 + struct { 201 + __be32 imm_data32; 202 + u32 reserved; 203 + } ib_imm_data; 204 + __be64 imm_data64; 205 + } iw_imm_data; 194 206 } imm_data_rcqe; 195 207 196 208 u64 drain_cookie; ··· 265 253 #define CQE_WRID_STAG(x) (be32_to_cpu((x)->u.rcqe.stag)) 266 254 #define CQE_WRID_MSN(x) (be32_to_cpu((x)->u.rcqe.msn)) 267 255 #define CQE_ABS_RQE_IDX(x) (be32_to_cpu((x)->u.srcqe.abs_rqe_idx)) 256 + #define CQE_IMM_DATA(x)( \ 257 + (x)->u.imm_data_rcqe.iw_imm_data.ib_imm_data.imm_data32) 268 258 269 259 /* used for SQ completion processing */ 270 260 #define CQE_WRID_SQ_IDX(x) ((x)->u.scqe.cidx)
+15 -3
drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
··· 50 50 FW_RI_BYPASS = 0xd, 51 51 FW_RI_RECEIVE = 0xe, 52 52 53 - FW_RI_SGE_EC_CR_RETURN = 0xf 53 + FW_RI_SGE_EC_CR_RETURN = 0xf, 54 + FW_RI_WRITE_IMMEDIATE = FW_RI_RDMA_INIT 54 55 }; 55 56 56 57 enum fw_ri_wr_flags { ··· 60 59 FW_RI_SOLICITED_EVENT_FLAG = 0x04, 61 60 FW_RI_READ_FENCE_FLAG = 0x08, 62 61 FW_RI_LOCAL_FENCE_FLAG = 0x10, 63 - FW_RI_RDMA_READ_INVALIDATE = 0x20 62 + FW_RI_RDMA_READ_INVALIDATE = 0x20, 63 + FW_RI_RDMA_WRITE_WITH_IMMEDIATE = 0x40 64 64 }; 65 65 66 66 enum fw_ri_mpa_attrs { ··· 548 546 __u16 wrid; 549 547 __u8 r1[3]; 550 548 __u8 len16; 551 - __be64 r2; 549 + /* 550 + * Use union for immediate data to be consistent with stack's 32 bit 551 + * data and iWARP spec's 64 bit data. 552 + */ 553 + union { 554 + struct { 555 + __be32 imm_data32; 556 + u32 reserved; 557 + } ib_imm_data; 558 + __be64 imm_data64; 559 + } iw_imm_data; 552 560 __be32 plen; 553 561 __be32 stag_sink; 554 562 __be64 to_sink;
+2 -1
include/uapi/rdma/cxgb4-abi.h
··· 65 65 }; 66 66 67 67 enum { 68 - C4IW_QPF_ONCHIP = (1 << 0) 68 + C4IW_QPF_ONCHIP = (1 << 0), 69 + C4IW_QPF_WRITE_W_IMM = (1 << 1) 69 70 }; 70 71 71 72 struct c4iw_create_qp_resp {