rdma: SVCRMDA Header File

This file defines the data types used by the SVCRDMA transport module.
The principle data structure is the transport specific extension to
the svcxprt structure.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

authored by Tom Tucker and committed by J. Bruce Fields d21b05f1 9571af18

+262
+262
include/linux/sunrpc/svc_rdma.h
··· 1 + /* 2 + * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. 3 + * 4 + * This software is available to you under a choice of one of two 5 + * licenses. You may choose to be licensed under the terms of the GNU 6 + * General Public License (GPL) Version 2, available from the file 7 + * COPYING in the main directory of this source tree, or the BSD-type 8 + * license below: 9 + * 10 + * Redistribution and use in source and binary forms, with or without 11 + * modification, are permitted provided that the following conditions 12 + * are met: 13 + * 14 + * Redistributions of source code must retain the above copyright 15 + * notice, this list of conditions and the following disclaimer. 16 + * 17 + * Redistributions in binary form must reproduce the above 18 + * copyright notice, this list of conditions and the following 19 + * disclaimer in the documentation and/or other materials provided 20 + * with the distribution. 21 + * 22 + * Neither the name of the Network Appliance, Inc. nor the names of 23 + * its contributors may be used to endorse or promote products 24 + * derived from this software without specific prior written 25 + * permission. 26 + * 27 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 28 + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 29 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 30 + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 31 + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 32 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 33 + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 34 + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 35 + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 36 + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 37 + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 38 + * 39 + * Author: Tom Tucker <tom@opengridcomputing.com> 40 + */ 41 + 42 + #ifndef SVC_RDMA_H 43 + #define SVC_RDMA_H 44 + #include <linux/sunrpc/xdr.h> 45 + #include <linux/sunrpc/svcsock.h> 46 + #include <linux/sunrpc/rpc_rdma.h> 47 + #include <rdma/ib_verbs.h> 48 + #include <rdma/rdma_cm.h> 49 + #define SVCRDMA_DEBUG 50 + 51 + /* RPC/RDMA parameters and stats */ 52 + extern unsigned int svcrdma_ord; 53 + extern unsigned int svcrdma_max_requests; 54 + extern unsigned int svcrdma_max_req_size; 55 + 56 + extern atomic_t rdma_stat_recv; 57 + extern atomic_t rdma_stat_read; 58 + extern atomic_t rdma_stat_write; 59 + extern atomic_t rdma_stat_sq_starve; 60 + extern atomic_t rdma_stat_rq_starve; 61 + extern atomic_t rdma_stat_rq_poll; 62 + extern atomic_t rdma_stat_rq_prod; 63 + extern atomic_t rdma_stat_sq_poll; 64 + extern atomic_t rdma_stat_sq_prod; 65 + 66 + #define RPCRDMA_VERSION 1 67 + 68 + /* 69 + * Contexts are built when an RDMA request is created and are a 70 + * record of the resources that can be recovered when the request 71 + * completes. 72 + */ 73 + struct svc_rdma_op_ctxt { 74 + struct svc_rdma_op_ctxt *next; 75 + struct xdr_buf arg; 76 + struct list_head dto_q; 77 + enum ib_wr_opcode wr_op; 78 + enum ib_wc_status wc_status; 79 + u32 byte_len; 80 + struct svcxprt_rdma *xprt; 81 + unsigned long flags; 82 + enum dma_data_direction direction; 83 + int count; 84 + struct ib_sge sge[RPCSVC_MAXPAGES]; 85 + struct page *pages[RPCSVC_MAXPAGES]; 86 + }; 87 + 88 + #define RDMACTXT_F_READ_DONE 1 89 + #define RDMACTXT_F_LAST_CTXT 2 90 + 91 + struct svcxprt_rdma { 92 + struct svc_xprt sc_xprt; /* SVC transport structure */ 93 + struct rdma_cm_id *sc_cm_id; /* RDMA connection id */ 94 + struct list_head sc_accept_q; /* Conn. waiting accept */ 95 + int sc_ord; /* RDMA read limit */ 96 + wait_queue_head_t sc_read_wait; 97 + int sc_max_sge; 98 + 99 + int sc_sq_depth; /* Depth of SQ */ 100 + atomic_t sc_sq_count; /* Number of SQ WR on queue */ 101 + 102 + int sc_max_requests; /* Depth of RQ */ 103 + int sc_max_req_size; /* Size of each RQ WR buf */ 104 + 105 + struct ib_pd *sc_pd; 106 + 107 + struct svc_rdma_op_ctxt *sc_ctxt_head; 108 + int sc_ctxt_cnt; 109 + int sc_ctxt_bump; 110 + int sc_ctxt_max; 111 + spinlock_t sc_ctxt_lock; 112 + struct list_head sc_rq_dto_q; 113 + spinlock_t sc_rq_dto_lock; 114 + struct ib_qp *sc_qp; 115 + struct ib_cq *sc_rq_cq; 116 + struct ib_cq *sc_sq_cq; 117 + struct ib_mr *sc_phys_mr; /* MR for server memory */ 118 + 119 + spinlock_t sc_lock; /* transport lock */ 120 + 121 + wait_queue_head_t sc_send_wait; /* SQ exhaustion waitlist */ 122 + unsigned long sc_flags; 123 + struct list_head sc_dto_q; /* DTO tasklet I/O pending Q */ 124 + struct list_head sc_read_complete_q; 125 + spinlock_t sc_read_complete_lock; 126 + }; 127 + /* sc_flags */ 128 + #define RDMAXPRT_RQ_PENDING 1 129 + #define RDMAXPRT_SQ_PENDING 2 130 + #define RDMAXPRT_CONN_PENDING 3 131 + 132 + #define RPCRDMA_LISTEN_BACKLOG 10 133 + /* The default ORD value is based on two outstanding full-size writes with a 134 + * page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ. */ 135 + #define RPCRDMA_ORD (64/4) 136 + #define RPCRDMA_SQ_DEPTH_MULT 8 137 + #define RPCRDMA_MAX_THREADS 16 138 + #define RPCRDMA_MAX_REQUESTS 16 139 + #define RPCRDMA_MAX_REQ_SIZE 4096 140 + 141 + /* svc_rdma_marshal.c */ 142 + extern void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *, 143 + int *, int *); 144 + extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *); 145 + extern int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *); 146 + extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *, 147 + struct rpcrdma_msg *, 148 + enum rpcrdma_errcode, u32 *); 149 + extern void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *, int); 150 + extern void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *, int); 151 + extern void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *, int, 152 + u32, u64, u32); 153 + extern void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *, 154 + struct rpcrdma_msg *, 155 + struct rpcrdma_msg *, 156 + enum rpcrdma_proc); 157 + extern int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *); 158 + 159 + /* svc_rdma_recvfrom.c */ 160 + extern int svc_rdma_recvfrom(struct svc_rqst *); 161 + 162 + /* svc_rdma_sendto.c */ 163 + extern int svc_rdma_sendto(struct svc_rqst *); 164 + 165 + /* svc_rdma_transport.c */ 166 + extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *); 167 + extern int svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *, 168 + enum rpcrdma_errcode); 169 + struct page *svc_rdma_get_page(void); 170 + extern int svc_rdma_post_recv(struct svcxprt_rdma *); 171 + extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *); 172 + extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *); 173 + extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int); 174 + extern void svc_sq_reap(struct svcxprt_rdma *); 175 + extern void svc_rq_reap(struct svcxprt_rdma *); 176 + extern struct svc_xprt_class svc_rdma_class; 177 + extern void svc_rdma_prep_reply_hdr(struct svc_rqst *); 178 + 179 + /* svc_rdma.c */ 180 + extern int svc_rdma_init(void); 181 + extern void svc_rdma_cleanup(void); 182 + 183 + /* 184 + * Returns the address of the first read chunk or <nul> if no read chunk is 185 + * present 186 + */ 187 + static inline struct rpcrdma_read_chunk * 188 + svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp) 189 + { 190 + struct rpcrdma_read_chunk *ch = 191 + (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; 192 + 193 + if (ch->rc_discrim == 0) 194 + return NULL; 195 + 196 + return ch; 197 + } 198 + 199 + /* 200 + * Returns the address of the first read write array element or <nul> if no 201 + * write array list is present 202 + */ 203 + static inline struct rpcrdma_write_array * 204 + svc_rdma_get_write_array(struct rpcrdma_msg *rmsgp) 205 + { 206 + if (rmsgp->rm_body.rm_chunks[0] != 0 207 + || rmsgp->rm_body.rm_chunks[1] == 0) 208 + return NULL; 209 + 210 + return (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[1]; 211 + } 212 + 213 + /* 214 + * Returns the address of the first reply array element or <nul> if no 215 + * reply array is present 216 + */ 217 + static inline struct rpcrdma_write_array * 218 + svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp) 219 + { 220 + struct rpcrdma_read_chunk *rch; 221 + struct rpcrdma_write_array *wr_ary; 222 + struct rpcrdma_write_array *rp_ary; 223 + 224 + /* XXX: Need to fix when reply list may occur with read-list and/or 225 + * write list */ 226 + if (rmsgp->rm_body.rm_chunks[0] != 0 || 227 + rmsgp->rm_body.rm_chunks[1] != 0) 228 + return NULL; 229 + 230 + rch = svc_rdma_get_read_chunk(rmsgp); 231 + if (rch) { 232 + while (rch->rc_discrim) 233 + rch++; 234 + 235 + /* The reply list follows an empty write array located 236 + * at 'rc_position' here. The reply array is at rc_target. 237 + */ 238 + rp_ary = (struct rpcrdma_write_array *)&rch->rc_target; 239 + 240 + goto found_it; 241 + } 242 + 243 + wr_ary = svc_rdma_get_write_array(rmsgp); 244 + if (wr_ary) { 245 + rp_ary = (struct rpcrdma_write_array *) 246 + &wr_ary-> 247 + wc_array[wr_ary->wc_nchunks].wc_target.rs_length; 248 + 249 + goto found_it; 250 + } 251 + 252 + /* No read list, no write list */ 253 + rp_ary = (struct rpcrdma_write_array *) 254 + &rmsgp->rm_body.rm_chunks[2]; 255 + 256 + found_it: 257 + if (rp_ary->wc_discrim == 0) 258 + return NULL; 259 + 260 + return rp_ary; 261 + } 262 + #endif