tangled
alpha
login
or
join now
tjh.dev
/
kernel
1
fork
atom
Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1
fork
atom
overview
issues
pulls
pipelines
Merge branch 'from-tomtucker' into for-2.6.28
J. Bruce Fields
17 years ago
107e0008
29373913
+710
-123
4 changed files
expand all
collapse all
unified
split
include
linux
sunrpc
svc_rdma.h
net
sunrpc
xprtrdma
svc_rdma_recvfrom.c
svc_rdma_sendto.c
svc_rdma_transport.c
+26
-1
include/linux/sunrpc/svc_rdma.h
reviewed
···
72
72
*/
73
73
struct svc_rdma_op_ctxt {
74
74
struct svc_rdma_op_ctxt *read_hdr;
75
75
+
struct svc_rdma_fastreg_mr *frmr;
75
76
int hdr_count;
76
77
struct xdr_buf arg;
77
78
struct list_head dto_q;
···
104
103
int start; /* sge no for this chunk */
105
104
int count; /* sge count for this chunk */
106
105
};
106
106
+
struct svc_rdma_fastreg_mr {
107
107
+
struct ib_mr *mr;
108
108
+
void *kva;
109
109
+
struct ib_fast_reg_page_list *page_list;
110
110
+
int page_list_len;
111
111
+
unsigned long access_flags;
112
112
+
unsigned long map_len;
113
113
+
enum dma_data_direction direction;
114
114
+
struct list_head frmr_list;
115
115
+
};
107
116
struct svc_rdma_req_map {
117
117
+
struct svc_rdma_fastreg_mr *frmr;
108
118
unsigned long count;
109
119
union {
110
120
struct kvec sge[RPCSVC_MAXPAGES];
111
121
struct svc_rdma_chunk_sge ch[RPCSVC_MAXPAGES];
112
122
};
113
123
};
114
114
-
124
124
+
#define RDMACTXT_F_FAST_UNREG 1
115
125
#define RDMACTXT_F_LAST_CTXT 2
126
126
+
127
127
+
#define SVCRDMA_DEVCAP_FAST_REG 1 /* fast mr registration */
128
128
+
#define SVCRDMA_DEVCAP_READ_W_INV 2 /* read w/ invalidate */
116
129
117
130
struct svcxprt_rdma {
118
131
struct svc_xprt sc_xprt; /* SVC transport structure */
···
151
136
struct ib_cq *sc_rq_cq;
152
137
struct ib_cq *sc_sq_cq;
153
138
struct ib_mr *sc_phys_mr; /* MR for server memory */
139
139
+
u32 sc_dev_caps; /* distilled device caps */
140
140
+
u32 sc_dma_lkey; /* local dma key */
141
141
+
unsigned int sc_frmr_pg_list_len;
142
142
+
struct list_head sc_frmr_q;
143
143
+
spinlock_t sc_frmr_q_lock;
154
144
155
145
spinlock_t sc_lock; /* transport lock */
156
146
···
212
192
extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
213
193
extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *);
214
194
extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int);
195
195
+
extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt);
215
196
extern struct svc_rdma_req_map *svc_rdma_get_req_map(void);
216
197
extern void svc_rdma_put_req_map(struct svc_rdma_req_map *);
198
198
+
extern int svc_rdma_fastreg(struct svcxprt_rdma *, struct svc_rdma_fastreg_mr *);
199
199
+
extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *);
200
200
+
extern void svc_rdma_put_frmr(struct svcxprt_rdma *,
201
201
+
struct svc_rdma_fastreg_mr *);
217
202
extern void svc_sq_reap(struct svcxprt_rdma *);
218
203
extern void svc_rq_reap(struct svcxprt_rdma *);
219
204
extern struct svc_xprt_class svc_rdma_class;
+166
-21
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
reviewed
···
116
116
*
117
117
* Assumptions:
118
118
* - chunk[0]->position points to pages[0] at an offset of 0
119
119
-
* - pages[] is not physically or virtually contigous and consists of
119
119
+
* - pages[] is not physically or virtually contiguous and consists of
120
120
* PAGE_SIZE elements.
121
121
*
122
122
* Output:
···
125
125
* chunk in the read list
126
126
*
127
127
*/
128
128
-
static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
128
128
+
static int map_read_chunks(struct svcxprt_rdma *xprt,
129
129
struct svc_rqst *rqstp,
130
130
struct svc_rdma_op_ctxt *head,
131
131
struct rpcrdma_msg *rmsgp,
···
211
211
return sge_no;
212
212
}
213
213
214
214
-
static void rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
215
215
-
struct svc_rdma_op_ctxt *ctxt,
216
216
-
struct kvec *vec,
217
217
-
u64 *sgl_offset,
218
218
-
int count)
214
214
+
/* Map a read-chunk-list to an XDR and fast register the page-list.
215
215
+
*
216
216
+
* Assumptions:
217
217
+
* - chunk[0] position points to pages[0] at an offset of 0
218
218
+
* - pages[] will be made physically contiguous by creating a one-off memory
219
219
+
* region using the fastreg verb.
220
220
+
* - byte_count is # of bytes in read-chunk-list
221
221
+
* - ch_count is # of chunks in read-chunk-list
222
222
+
*
223
223
+
* Output:
224
224
+
* - sge array pointing into pages[] array.
225
225
+
* - chunk_sge array specifying sge index and count for each
226
226
+
* chunk in the read list
227
227
+
*/
228
228
+
static int fast_reg_read_chunks(struct svcxprt_rdma *xprt,
229
229
+
struct svc_rqst *rqstp,
230
230
+
struct svc_rdma_op_ctxt *head,
231
231
+
struct rpcrdma_msg *rmsgp,
232
232
+
struct svc_rdma_req_map *rpl_map,
233
233
+
struct svc_rdma_req_map *chl_map,
234
234
+
int ch_count,
235
235
+
int byte_count)
236
236
+
{
237
237
+
int page_no;
238
238
+
int ch_no;
239
239
+
u32 offset;
240
240
+
struct rpcrdma_read_chunk *ch;
241
241
+
struct svc_rdma_fastreg_mr *frmr;
242
242
+
int ret = 0;
243
243
+
244
244
+
frmr = svc_rdma_get_frmr(xprt);
245
245
+
if (IS_ERR(frmr))
246
246
+
return -ENOMEM;
247
247
+
248
248
+
head->frmr = frmr;
249
249
+
head->arg.head[0] = rqstp->rq_arg.head[0];
250
250
+
head->arg.tail[0] = rqstp->rq_arg.tail[0];
251
251
+
head->arg.pages = &head->pages[head->count];
252
252
+
head->hdr_count = head->count; /* save count of hdr pages */
253
253
+
head->arg.page_base = 0;
254
254
+
head->arg.page_len = byte_count;
255
255
+
head->arg.len = rqstp->rq_arg.len + byte_count;
256
256
+
head->arg.buflen = rqstp->rq_arg.buflen + byte_count;
257
257
+
258
258
+
/* Fast register the page list */
259
259
+
frmr->kva = page_address(rqstp->rq_arg.pages[0]);
260
260
+
frmr->direction = DMA_FROM_DEVICE;
261
261
+
frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
262
262
+
frmr->map_len = byte_count;
263
263
+
frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT;
264
264
+
for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
265
265
+
frmr->page_list->page_list[page_no] =
266
266
+
ib_dma_map_single(xprt->sc_cm_id->device,
267
267
+
page_address(rqstp->rq_arg.pages[page_no]),
268
268
+
PAGE_SIZE, DMA_TO_DEVICE);
269
269
+
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
270
270
+
frmr->page_list->page_list[page_no]))
271
271
+
goto fatal_err;
272
272
+
atomic_inc(&xprt->sc_dma_used);
273
273
+
head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no];
274
274
+
}
275
275
+
head->count += page_no;
276
276
+
277
277
+
/* rq_respages points one past arg pages */
278
278
+
rqstp->rq_respages = &rqstp->rq_arg.pages[page_no];
279
279
+
280
280
+
/* Create the reply and chunk maps */
281
281
+
offset = 0;
282
282
+
ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
283
283
+
for (ch_no = 0; ch_no < ch_count; ch_no++) {
284
284
+
rpl_map->sge[ch_no].iov_base = frmr->kva + offset;
285
285
+
rpl_map->sge[ch_no].iov_len = ch->rc_target.rs_length;
286
286
+
chl_map->ch[ch_no].count = 1;
287
287
+
chl_map->ch[ch_no].start = ch_no;
288
288
+
offset += ch->rc_target.rs_length;
289
289
+
ch++;
290
290
+
}
291
291
+
292
292
+
ret = svc_rdma_fastreg(xprt, frmr);
293
293
+
if (ret)
294
294
+
goto fatal_err;
295
295
+
296
296
+
return ch_no;
297
297
+
298
298
+
fatal_err:
299
299
+
printk("svcrdma: error fast registering xdr for xprt %p", xprt);
300
300
+
svc_rdma_put_frmr(xprt, frmr);
301
301
+
return -EIO;
302
302
+
}
303
303
+
304
304
+
static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
305
305
+
struct svc_rdma_op_ctxt *ctxt,
306
306
+
struct svc_rdma_fastreg_mr *frmr,
307
307
+
struct kvec *vec,
308
308
+
u64 *sgl_offset,
309
309
+
int count)
219
310
{
220
311
int i;
221
312
222
313
ctxt->count = count;
223
314
ctxt->direction = DMA_FROM_DEVICE;
224
315
for (i = 0; i < count; i++) {
225
225
-
atomic_inc(&xprt->sc_dma_used);
226
226
-
ctxt->sge[i].addr =
227
227
-
ib_dma_map_single(xprt->sc_cm_id->device,
228
228
-
vec[i].iov_base, vec[i].iov_len,
229
229
-
DMA_FROM_DEVICE);
316
316
+
ctxt->sge[i].length = 0; /* in case map fails */
317
317
+
if (!frmr) {
318
318
+
ctxt->sge[i].addr =
319
319
+
ib_dma_map_single(xprt->sc_cm_id->device,
320
320
+
vec[i].iov_base,
321
321
+
vec[i].iov_len,
322
322
+
DMA_FROM_DEVICE);
323
323
+
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
324
324
+
ctxt->sge[i].addr))
325
325
+
return -EINVAL;
326
326
+
ctxt->sge[i].lkey = xprt->sc_dma_lkey;
327
327
+
atomic_inc(&xprt->sc_dma_used);
328
328
+
} else {
329
329
+
ctxt->sge[i].addr = (unsigned long)vec[i].iov_base;
330
330
+
ctxt->sge[i].lkey = frmr->mr->lkey;
331
331
+
}
230
332
ctxt->sge[i].length = vec[i].iov_len;
231
231
-
ctxt->sge[i].lkey = xprt->sc_phys_mr->lkey;
232
333
*sgl_offset = *sgl_offset + vec[i].iov_len;
233
334
}
335
335
+
return 0;
234
336
}
235
337
236
338
static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
···
380
278
struct svc_rdma_op_ctxt *hdr_ctxt)
381
279
{
382
280
struct ib_send_wr read_wr;
281
281
+
struct ib_send_wr inv_wr;
383
282
int err = 0;
384
283
int ch_no;
385
284
int ch_count;
···
404
301
svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count);
405
302
if (ch_count > RPCSVC_MAXPAGES)
406
303
return -EINVAL;
407
407
-
sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp,
408
408
-
rpl_map, chl_map,
409
409
-
ch_count, byte_count);
304
304
+
305
305
+
if (!xprt->sc_frmr_pg_list_len)
306
306
+
sge_count = map_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp,
307
307
+
rpl_map, chl_map, ch_count,
308
308
+
byte_count);
309
309
+
else
310
310
+
sge_count = fast_reg_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp,
311
311
+
rpl_map, chl_map, ch_count,
312
312
+
byte_count);
313
313
+
if (sge_count < 0) {
314
314
+
err = -EIO;
315
315
+
goto out;
316
316
+
}
317
317
+
410
318
sgl_offset = 0;
411
319
ch_no = 0;
412
320
···
426
312
next_sge:
427
313
ctxt = svc_rdma_get_context(xprt);
428
314
ctxt->direction = DMA_FROM_DEVICE;
315
315
+
ctxt->frmr = hdr_ctxt->frmr;
316
316
+
ctxt->read_hdr = NULL;
429
317
clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
318
318
+
clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
430
319
431
320
/* Prepare READ WR */
432
321
memset(&read_wr, 0, sizeof read_wr);
433
433
-
ctxt->wr_op = IB_WR_RDMA_READ;
434
322
read_wr.wr_id = (unsigned long)ctxt;
435
323
read_wr.opcode = IB_WR_RDMA_READ;
324
324
+
ctxt->wr_op = read_wr.opcode;
436
325
read_wr.send_flags = IB_SEND_SIGNALED;
437
326
read_wr.wr.rdma.rkey = ch->rc_target.rs_handle;
438
327
read_wr.wr.rdma.remote_addr =
···
444
327
read_wr.sg_list = ctxt->sge;
445
328
read_wr.num_sge =
446
329
rdma_read_max_sge(xprt, chl_map->ch[ch_no].count);
447
447
-
rdma_set_ctxt_sge(xprt, ctxt,
448
448
-
&rpl_map->sge[chl_map->ch[ch_no].start],
449
449
-
&sgl_offset,
450
450
-
read_wr.num_sge);
330
330
+
err = rdma_set_ctxt_sge(xprt, ctxt, hdr_ctxt->frmr,
331
331
+
&rpl_map->sge[chl_map->ch[ch_no].start],
332
332
+
&sgl_offset,
333
333
+
read_wr.num_sge);
334
334
+
if (err) {
335
335
+
svc_rdma_unmap_dma(ctxt);
336
336
+
svc_rdma_put_context(ctxt, 0);
337
337
+
goto out;
338
338
+
}
451
339
if (((ch+1)->rc_discrim == 0) &&
452
340
(read_wr.num_sge == chl_map->ch[ch_no].count)) {
453
341
/*
···
461
339
* the client and the RPC needs to be enqueued.
462
340
*/
463
341
set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
342
342
+
if (hdr_ctxt->frmr) {
343
343
+
set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
344
344
+
/*
345
345
+
* Invalidate the local MR used to map the data
346
346
+
* sink.
347
347
+
*/
348
348
+
if (xprt->sc_dev_caps &
349
349
+
SVCRDMA_DEVCAP_READ_W_INV) {
350
350
+
read_wr.opcode =
351
351
+
IB_WR_RDMA_READ_WITH_INV;
352
352
+
ctxt->wr_op = read_wr.opcode;
353
353
+
read_wr.ex.invalidate_rkey =
354
354
+
ctxt->frmr->mr->lkey;
355
355
+
} else {
356
356
+
/* Prepare INVALIDATE WR */
357
357
+
memset(&inv_wr, 0, sizeof inv_wr);
358
358
+
inv_wr.opcode = IB_WR_LOCAL_INV;
359
359
+
inv_wr.send_flags = IB_SEND_SIGNALED;
360
360
+
inv_wr.ex.invalidate_rkey =
361
361
+
hdr_ctxt->frmr->mr->lkey;
362
362
+
read_wr.next = &inv_wr;
363
363
+
}
364
364
+
}
464
365
ctxt->read_hdr = hdr_ctxt;
465
366
}
466
367
/* Post the read */
+215
-40
net/sunrpc/xprtrdma/svc_rdma_sendto.c
reviewed
···
69
69
* array is only concerned with the reply we are assured that we have
70
70
* on extra page for the RPCRMDA header.
71
71
*/
72
72
-
static void xdr_to_sge(struct svcxprt_rdma *xprt,
73
73
-
struct xdr_buf *xdr,
74
74
-
struct svc_rdma_req_map *vec)
72
72
+
int fast_reg_xdr(struct svcxprt_rdma *xprt,
73
73
+
struct xdr_buf *xdr,
74
74
+
struct svc_rdma_req_map *vec)
75
75
+
{
76
76
+
int sge_no;
77
77
+
u32 sge_bytes;
78
78
+
u32 page_bytes;
79
79
+
u32 page_off;
80
80
+
int page_no = 0;
81
81
+
u8 *frva;
82
82
+
struct svc_rdma_fastreg_mr *frmr;
83
83
+
84
84
+
frmr = svc_rdma_get_frmr(xprt);
85
85
+
if (IS_ERR(frmr))
86
86
+
return -ENOMEM;
87
87
+
vec->frmr = frmr;
88
88
+
89
89
+
/* Skip the RPCRDMA header */
90
90
+
sge_no = 1;
91
91
+
92
92
+
/* Map the head. */
93
93
+
frva = (void *)((unsigned long)(xdr->head[0].iov_base) & PAGE_MASK);
94
94
+
vec->sge[sge_no].iov_base = xdr->head[0].iov_base;
95
95
+
vec->sge[sge_no].iov_len = xdr->head[0].iov_len;
96
96
+
vec->count = 2;
97
97
+
sge_no++;
98
98
+
99
99
+
/* Build the FRMR */
100
100
+
frmr->kva = frva;
101
101
+
frmr->direction = DMA_TO_DEVICE;
102
102
+
frmr->access_flags = 0;
103
103
+
frmr->map_len = PAGE_SIZE;
104
104
+
frmr->page_list_len = 1;
105
105
+
frmr->page_list->page_list[page_no] =
106
106
+
ib_dma_map_single(xprt->sc_cm_id->device,
107
107
+
(void *)xdr->head[0].iov_base,
108
108
+
PAGE_SIZE, DMA_TO_DEVICE);
109
109
+
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
110
110
+
frmr->page_list->page_list[page_no]))
111
111
+
goto fatal_err;
112
112
+
atomic_inc(&xprt->sc_dma_used);
113
113
+
114
114
+
page_off = xdr->page_base;
115
115
+
page_bytes = xdr->page_len + page_off;
116
116
+
if (!page_bytes)
117
117
+
goto encode_tail;
118
118
+
119
119
+
/* Map the pages */
120
120
+
vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
121
121
+
vec->sge[sge_no].iov_len = page_bytes;
122
122
+
sge_no++;
123
123
+
while (page_bytes) {
124
124
+
struct page *page;
125
125
+
126
126
+
page = xdr->pages[page_no++];
127
127
+
sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off));
128
128
+
page_bytes -= sge_bytes;
129
129
+
130
130
+
frmr->page_list->page_list[page_no] =
131
131
+
ib_dma_map_page(xprt->sc_cm_id->device, page, 0,
132
132
+
PAGE_SIZE, DMA_TO_DEVICE);
133
133
+
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
134
134
+
frmr->page_list->page_list[page_no]))
135
135
+
goto fatal_err;
136
136
+
137
137
+
atomic_inc(&xprt->sc_dma_used);
138
138
+
page_off = 0; /* reset for next time through loop */
139
139
+
frmr->map_len += PAGE_SIZE;
140
140
+
frmr->page_list_len++;
141
141
+
}
142
142
+
vec->count++;
143
143
+
144
144
+
encode_tail:
145
145
+
/* Map tail */
146
146
+
if (0 == xdr->tail[0].iov_len)
147
147
+
goto done;
148
148
+
149
149
+
vec->count++;
150
150
+
vec->sge[sge_no].iov_len = xdr->tail[0].iov_len;
151
151
+
152
152
+
if (((unsigned long)xdr->tail[0].iov_base & PAGE_MASK) ==
153
153
+
((unsigned long)xdr->head[0].iov_base & PAGE_MASK)) {
154
154
+
/*
155
155
+
* If head and tail use the same page, we don't need
156
156
+
* to map it again.
157
157
+
*/
158
158
+
vec->sge[sge_no].iov_base = xdr->tail[0].iov_base;
159
159
+
} else {
160
160
+
void *va;
161
161
+
162
162
+
/* Map another page for the tail */
163
163
+
page_off = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK;
164
164
+
va = (void *)((unsigned long)xdr->tail[0].iov_base & PAGE_MASK);
165
165
+
vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
166
166
+
167
167
+
frmr->page_list->page_list[page_no] =
168
168
+
ib_dma_map_single(xprt->sc_cm_id->device, va, PAGE_SIZE,
169
169
+
DMA_TO_DEVICE);
170
170
+
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
171
171
+
frmr->page_list->page_list[page_no]))
172
172
+
goto fatal_err;
173
173
+
atomic_inc(&xprt->sc_dma_used);
174
174
+
frmr->map_len += PAGE_SIZE;
175
175
+
frmr->page_list_len++;
176
176
+
}
177
177
+
178
178
+
done:
179
179
+
if (svc_rdma_fastreg(xprt, frmr))
180
180
+
goto fatal_err;
181
181
+
182
182
+
return 0;
183
183
+
184
184
+
fatal_err:
185
185
+
printk("svcrdma: Error fast registering memory for xprt %p\n", xprt);
186
186
+
svc_rdma_put_frmr(xprt, frmr);
187
187
+
return -EIO;
188
188
+
}
189
189
+
190
190
+
static int map_xdr(struct svcxprt_rdma *xprt,
191
191
+
struct xdr_buf *xdr,
192
192
+
struct svc_rdma_req_map *vec)
75
193
{
76
194
int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3;
77
195
int sge_no;
···
200
82
201
83
BUG_ON(xdr->len !=
202
84
(xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len));
85
85
+
86
86
+
if (xprt->sc_frmr_pg_list_len)
87
87
+
return fast_reg_xdr(xprt, xdr, vec);
203
88
204
89
/* Skip the first sge, this is for the RPCRDMA header */
205
90
sge_no = 1;
···
237
116
238
117
BUG_ON(sge_no > sge_max);
239
118
vec->count = sge_no;
119
119
+
return 0;
240
120
}
241
121
242
122
/* Assumptions:
123
123
+
* - We are using FRMR
124
124
+
* - or -
243
125
* - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
244
126
*/
245
127
static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
···
282
158
sge_no = 0;
283
159
284
160
/* Copy the remaining SGE */
285
285
-
while (bc != 0 && xdr_sge_no < vec->count) {
286
286
-
sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
287
287
-
sge_bytes = min((size_t)bc,
288
288
-
(size_t)(vec->sge[xdr_sge_no].iov_len-sge_off));
161
161
+
while (bc != 0) {
162
162
+
sge_bytes = min_t(size_t,
163
163
+
bc, vec->sge[xdr_sge_no].iov_len-sge_off);
289
164
sge[sge_no].length = sge_bytes;
290
290
-
atomic_inc(&xprt->sc_dma_used);
291
291
-
sge[sge_no].addr =
292
292
-
ib_dma_map_single(xprt->sc_cm_id->device,
293
293
-
(void *)
294
294
-
vec->sge[xdr_sge_no].iov_base + sge_off,
295
295
-
sge_bytes, DMA_TO_DEVICE);
296
296
-
if (dma_mapping_error(xprt->sc_cm_id->device->dma_device,
297
297
-
sge[sge_no].addr))
298
298
-
goto err;
165
165
+
if (!vec->frmr) {
166
166
+
sge[sge_no].addr =
167
167
+
ib_dma_map_single(xprt->sc_cm_id->device,
168
168
+
(void *)
169
169
+
vec->sge[xdr_sge_no].iov_base + sge_off,
170
170
+
sge_bytes, DMA_TO_DEVICE);
171
171
+
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
172
172
+
sge[sge_no].addr))
173
173
+
goto err;
174
174
+
atomic_inc(&xprt->sc_dma_used);
175
175
+
sge[sge_no].lkey = xprt->sc_dma_lkey;
176
176
+
} else {
177
177
+
sge[sge_no].addr = (unsigned long)
178
178
+
vec->sge[xdr_sge_no].iov_base + sge_off;
179
179
+
sge[sge_no].lkey = vec->frmr->mr->lkey;
180
180
+
}
181
181
+
ctxt->count++;
182
182
+
ctxt->frmr = vec->frmr;
299
183
sge_off = 0;
300
184
sge_no++;
301
301
-
ctxt->count++;
302
185
xdr_sge_no++;
186
186
+
BUG_ON(xdr_sge_no > vec->count);
303
187
bc -= sge_bytes;
304
188
}
305
305
-
306
306
-
BUG_ON(bc != 0);
307
307
-
BUG_ON(xdr_sge_no > vec->count);
308
189
309
190
/* Prepare WRITE WR */
310
191
memset(&write_wr, 0, sizeof write_wr);
···
355
226
res_ary = (struct rpcrdma_write_array *)
356
227
&rdma_resp->rm_body.rm_chunks[1];
357
228
358
358
-
max_write = xprt->sc_max_sge * PAGE_SIZE;
229
229
+
if (vec->frmr)
230
230
+
max_write = vec->frmr->map_len;
231
231
+
else
232
232
+
max_write = xprt->sc_max_sge * PAGE_SIZE;
359
233
360
234
/* Write chunks start at the pagelist */
361
235
for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
···
429
297
res_ary = (struct rpcrdma_write_array *)
430
298
&rdma_resp->rm_body.rm_chunks[2];
431
299
432
432
-
max_write = xprt->sc_max_sge * PAGE_SIZE;
300
300
+
if (vec->frmr)
301
301
+
max_write = vec->frmr->map_len;
302
302
+
else
303
303
+
max_write = xprt->sc_max_sge * PAGE_SIZE;
433
304
434
305
/* xdr offset starts at RPC message */
435
306
for (xdr_off = 0, chunk_no = 0;
···
441
306
u64 rs_offset;
442
307
ch = &arg_ary->wc_array[chunk_no].wc_target;
443
308
write_len = min(xfer_len, ch->rs_length);
444
444
-
445
309
446
310
/* Prepare the reply chunk given the length actually
447
311
* written */
···
500
366
int byte_count)
501
367
{
502
368
struct ib_send_wr send_wr;
369
369
+
struct ib_send_wr inv_wr;
503
370
int sge_no;
504
371
int sge_bytes;
505
372
int page_no;
···
520
385
/* Prepare the context */
521
386
ctxt->pages[0] = page;
522
387
ctxt->count = 1;
388
388
+
ctxt->frmr = vec->frmr;
389
389
+
if (vec->frmr)
390
390
+
set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
391
391
+
else
392
392
+
clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
523
393
524
394
/* Prepare the SGE for the RPCRDMA Header */
525
525
-
atomic_inc(&rdma->sc_dma_used);
526
395
ctxt->sge[0].addr =
527
396
ib_dma_map_page(rdma->sc_cm_id->device,
528
397
page, 0, PAGE_SIZE, DMA_TO_DEVICE);
398
398
+
if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
399
399
+
goto err;
400
400
+
atomic_inc(&rdma->sc_dma_used);
401
401
+
529
402
ctxt->direction = DMA_TO_DEVICE;
403
403
+
530
404
ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
531
531
-
ctxt->sge[0].lkey = rdma->sc_phys_mr->lkey;
405
405
+
ctxt->sge[0].lkey = rdma->sc_dma_lkey;
532
406
533
407
/* Determine how many of our SGE are to be transmitted */
534
408
for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
535
409
sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
536
410
byte_count -= sge_bytes;
537
537
-
atomic_inc(&rdma->sc_dma_used);
538
538
-
ctxt->sge[sge_no].addr =
539
539
-
ib_dma_map_single(rdma->sc_cm_id->device,
540
540
-
vec->sge[sge_no].iov_base,
541
541
-
sge_bytes, DMA_TO_DEVICE);
411
411
+
if (!vec->frmr) {
412
412
+
ctxt->sge[sge_no].addr =
413
413
+
ib_dma_map_single(rdma->sc_cm_id->device,
414
414
+
vec->sge[sge_no].iov_base,
415
415
+
sge_bytes, DMA_TO_DEVICE);
416
416
+
if (ib_dma_mapping_error(rdma->sc_cm_id->device,
417
417
+
ctxt->sge[sge_no].addr))
418
418
+
goto err;
419
419
+
atomic_inc(&rdma->sc_dma_used);
420
420
+
ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
421
421
+
} else {
422
422
+
ctxt->sge[sge_no].addr = (unsigned long)
423
423
+
vec->sge[sge_no].iov_base;
424
424
+
ctxt->sge[sge_no].lkey = vec->frmr->mr->lkey;
425
425
+
}
542
426
ctxt->sge[sge_no].length = sge_bytes;
543
543
-
ctxt->sge[sge_no].lkey = rdma->sc_phys_mr->lkey;
544
427
}
545
428
BUG_ON(byte_count != 0);
546
429
···
570
417
ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
571
418
ctxt->count++;
572
419
rqstp->rq_respages[page_no] = NULL;
573
573
-
/* If there are more pages than SGE, terminate SGE list */
420
420
+
/*
421
421
+
* If there are more pages than SGE, terminate SGE
422
422
+
* list so that svc_rdma_unmap_dma doesn't attempt to
423
423
+
* unmap garbage.
424
424
+
*/
574
425
if (page_no+1 >= sge_no)
575
426
ctxt->sge[page_no+1].length = 0;
576
427
}
577
428
BUG_ON(sge_no > rdma->sc_max_sge);
429
429
+
BUG_ON(sge_no > ctxt->count);
578
430
memset(&send_wr, 0, sizeof send_wr);
579
431
ctxt->wr_op = IB_WR_SEND;
580
432
send_wr.wr_id = (unsigned long)ctxt;
···
587
429
send_wr.num_sge = sge_no;
588
430
send_wr.opcode = IB_WR_SEND;
589
431
send_wr.send_flags = IB_SEND_SIGNALED;
432
432
+
if (vec->frmr) {
433
433
+
/* Prepare INVALIDATE WR */
434
434
+
memset(&inv_wr, 0, sizeof inv_wr);
435
435
+
inv_wr.opcode = IB_WR_LOCAL_INV;
436
436
+
inv_wr.send_flags = IB_SEND_SIGNALED;
437
437
+
inv_wr.ex.invalidate_rkey =
438
438
+
vec->frmr->mr->lkey;
439
439
+
send_wr.next = &inv_wr;
440
440
+
}
590
441
591
442
ret = svc_rdma_send(rdma, &send_wr);
592
443
if (ret)
593
593
-
svc_rdma_put_context(ctxt, 1);
444
444
+
goto err;
594
445
595
595
-
return ret;
446
446
+
return 0;
447
447
+
448
448
+
err:
449
449
+
svc_rdma_put_frmr(rdma, vec->frmr);
450
450
+
svc_rdma_put_context(ctxt, 1);
451
451
+
return -EIO;
596
452
}
597
453
598
454
void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp)
···
649
477
ctxt = svc_rdma_get_context(rdma);
650
478
ctxt->direction = DMA_TO_DEVICE;
651
479
vec = svc_rdma_get_req_map();
652
652
-
xdr_to_sge(rdma, &rqstp->rq_res, vec);
653
653
-
480
480
+
ret = map_xdr(rdma, &rqstp->rq_res, vec);
481
481
+
if (ret)
482
482
+
goto err0;
654
483
inline_bytes = rqstp->rq_res.len;
655
484
656
485
/* Create the RDMA response header */
···
671
498
if (ret < 0) {
672
499
printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n",
673
500
ret);
674
674
-
goto error;
501
501
+
goto err1;
675
502
}
676
503
inline_bytes -= ret;
677
504
···
681
508
if (ret < 0) {
682
509
printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n",
683
510
ret);
684
684
-
goto error;
511
511
+
goto err1;
685
512
}
686
513
inline_bytes -= ret;
687
514
···
690
517
svc_rdma_put_req_map(vec);
691
518
dprintk("svcrdma: send_reply returns %d\n", ret);
692
519
return ret;
693
693
-
error:
520
520
+
521
521
+
err1:
522
522
+
put_page(res_page);
523
523
+
err0:
694
524
svc_rdma_put_req_map(vec);
695
525
svc_rdma_put_context(ctxt, 0);
696
696
-
put_page(res_page);
697
526
return ret;
698
527
}
+303
-61
net/sunrpc/xprtrdma/svc_rdma_transport.c
reviewed
···
100
100
ctxt->xprt = xprt;
101
101
INIT_LIST_HEAD(&ctxt->dto_q);
102
102
ctxt->count = 0;
103
103
+
ctxt->frmr = NULL;
103
104
atomic_inc(&xprt->sc_ctxt_used);
104
105
return ctxt;
105
106
}
106
107
107
107
-
static void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
108
108
+
void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
108
109
{
109
110
struct svcxprt_rdma *xprt = ctxt->xprt;
110
111
int i;
111
112
for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) {
112
112
-
atomic_dec(&xprt->sc_dma_used);
113
113
-
ib_dma_unmap_single(xprt->sc_cm_id->device,
114
114
-
ctxt->sge[i].addr,
115
115
-
ctxt->sge[i].length,
116
116
-
ctxt->direction);
113
113
+
/*
114
114
+
* Unmap the DMA addr in the SGE if the lkey matches
115
115
+
* the sc_dma_lkey, otherwise, ignore it since it is
116
116
+
* an FRMR lkey and will be unmapped later when the
117
117
+
* last WR that uses it completes.
118
118
+
*/
119
119
+
if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) {
120
120
+
atomic_dec(&xprt->sc_dma_used);
121
121
+
ib_dma_unmap_single(xprt->sc_cm_id->device,
122
122
+
ctxt->sge[i].addr,
123
123
+
ctxt->sge[i].length,
124
124
+
ctxt->direction);
125
125
+
}
117
126
}
118
127
}
119
128
···
159
150
schedule_timeout_uninterruptible(msecs_to_jiffies(500));
160
151
}
161
152
map->count = 0;
153
153
+
map->frmr = NULL;
162
154
return map;
163
155
}
164
156
···
326
316
}
327
317
328
318
/*
319
319
+
* Processs a completion context
320
320
+
*/
321
321
+
static void process_context(struct svcxprt_rdma *xprt,
322
322
+
struct svc_rdma_op_ctxt *ctxt)
323
323
+
{
324
324
+
svc_rdma_unmap_dma(ctxt);
325
325
+
326
326
+
switch (ctxt->wr_op) {
327
327
+
case IB_WR_SEND:
328
328
+
if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags))
329
329
+
svc_rdma_put_frmr(xprt, ctxt->frmr);
330
330
+
svc_rdma_put_context(ctxt, 1);
331
331
+
break;
332
332
+
333
333
+
case IB_WR_RDMA_WRITE:
334
334
+
svc_rdma_put_context(ctxt, 0);
335
335
+
break;
336
336
+
337
337
+
case IB_WR_RDMA_READ:
338
338
+
case IB_WR_RDMA_READ_WITH_INV:
339
339
+
if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
340
340
+
struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
341
341
+
BUG_ON(!read_hdr);
342
342
+
if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags))
343
343
+
svc_rdma_put_frmr(xprt, ctxt->frmr);
344
344
+
spin_lock_bh(&xprt->sc_rq_dto_lock);
345
345
+
set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
346
346
+
list_add_tail(&read_hdr->dto_q,
347
347
+
&xprt->sc_read_complete_q);
348
348
+
spin_unlock_bh(&xprt->sc_rq_dto_lock);
349
349
+
svc_xprt_enqueue(&xprt->sc_xprt);
350
350
+
}
351
351
+
svc_rdma_put_context(ctxt, 0);
352
352
+
break;
353
353
+
354
354
+
default:
355
355
+
printk(KERN_ERR "svcrdma: unexpected completion type, "
356
356
+
"opcode=%d\n",
357
357
+
ctxt->wr_op);
358
358
+
break;
359
359
+
}
360
360
+
}
361
361
+
362
362
+
/*
329
363
* Send Queue Completion Handler - potentially called on interrupt context.
330
364
*
331
365
* Note that caller must hold a transport reference.
···
381
327
struct ib_cq *cq = xprt->sc_sq_cq;
382
328
int ret;
383
329
384
384
-
385
330
if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags))
386
331
return;
387
332
388
333
ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP);
389
334
atomic_inc(&rdma_stat_sq_poll);
390
335
while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) {
391
391
-
ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
392
392
-
xprt = ctxt->xprt;
393
393
-
394
394
-
svc_rdma_unmap_dma(ctxt);
395
336
if (wc.status != IB_WC_SUCCESS)
396
337
/* Close the transport */
397
338
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
···
395
346
atomic_dec(&xprt->sc_sq_count);
396
347
wake_up(&xprt->sc_send_wait);
397
348
398
398
-
switch (ctxt->wr_op) {
399
399
-
case IB_WR_SEND:
400
400
-
svc_rdma_put_context(ctxt, 1);
401
401
-
break;
349
349
+
ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
350
350
+
if (ctxt)
351
351
+
process_context(xprt, ctxt);
402
352
403
403
-
case IB_WR_RDMA_WRITE:
404
404
-
svc_rdma_put_context(ctxt, 0);
405
405
-
break;
406
406
-
407
407
-
case IB_WR_RDMA_READ:
408
408
-
if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
409
409
-
struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
410
410
-
BUG_ON(!read_hdr);
411
411
-
spin_lock_bh(&xprt->sc_rq_dto_lock);
412
412
-
set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
413
413
-
list_add_tail(&read_hdr->dto_q,
414
414
-
&xprt->sc_read_complete_q);
415
415
-
spin_unlock_bh(&xprt->sc_rq_dto_lock);
416
416
-
svc_xprt_enqueue(&xprt->sc_xprt);
417
417
-
}
418
418
-
svc_rdma_put_context(ctxt, 0);
419
419
-
break;
420
420
-
421
421
-
default:
422
422
-
printk(KERN_ERR "svcrdma: unexpected completion type, "
423
423
-
"opcode=%d, status=%d\n",
424
424
-
wc.opcode, wc.status);
425
425
-
break;
426
426
-
}
427
353
svc_xprt_put(&xprt->sc_xprt);
428
354
}
429
355
···
449
425
INIT_LIST_HEAD(&cma_xprt->sc_dto_q);
450
426
INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
451
427
INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
428
428
+
INIT_LIST_HEAD(&cma_xprt->sc_frmr_q);
452
429
init_waitqueue_head(&cma_xprt->sc_send_wait);
453
430
454
431
spin_lock_init(&cma_xprt->sc_lock);
455
432
spin_lock_init(&cma_xprt->sc_rq_dto_lock);
433
433
+
spin_lock_init(&cma_xprt->sc_frmr_q_lock);
456
434
457
435
cma_xprt->sc_ord = svcrdma_ord;
458
436
···
488
462
struct ib_recv_wr recv_wr, *bad_recv_wr;
489
463
struct svc_rdma_op_ctxt *ctxt;
490
464
struct page *page;
491
491
-
unsigned long pa;
465
465
+
dma_addr_t pa;
492
466
int sge_no;
493
467
int buflen;
494
468
int ret;
···
500
474
BUG_ON(sge_no >= xprt->sc_max_sge);
501
475
page = svc_rdma_get_page();
502
476
ctxt->pages[sge_no] = page;
503
503
-
atomic_inc(&xprt->sc_dma_used);
504
477
pa = ib_dma_map_page(xprt->sc_cm_id->device,
505
478
page, 0, PAGE_SIZE,
506
479
DMA_FROM_DEVICE);
480
480
+
if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa))
481
481
+
goto err_put_ctxt;
482
482
+
atomic_inc(&xprt->sc_dma_used);
507
483
ctxt->sge[sge_no].addr = pa;
508
484
ctxt->sge[sge_no].length = PAGE_SIZE;
509
509
-
ctxt->sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
485
485
+
ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey;
510
486
buflen += PAGE_SIZE;
511
487
}
512
488
ctxt->count = sge_no;
···
524
496
svc_rdma_put_context(ctxt, 1);
525
497
}
526
498
return ret;
499
499
+
500
500
+
err_put_ctxt:
501
501
+
svc_rdma_put_context(ctxt, 1);
502
502
+
return -ENOMEM;
527
503
}
528
504
529
505
/*
···
598
566
dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, "
599
567
"event=%d\n", cma_id, cma_id->context, event->event);
600
568
handle_connect_req(cma_id,
601
601
-
event->param.conn.responder_resources);
569
569
+
event->param.conn.initiator_depth);
602
570
break;
603
571
604
572
case RDMA_CM_EVENT_ESTABLISHED:
···
718
686
return ERR_PTR(ret);
719
687
}
720
688
689
689
+
static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt)
690
690
+
{
691
691
+
struct ib_mr *mr;
692
692
+
struct ib_fast_reg_page_list *pl;
693
693
+
struct svc_rdma_fastreg_mr *frmr;
694
694
+
695
695
+
frmr = kmalloc(sizeof(*frmr), GFP_KERNEL);
696
696
+
if (!frmr)
697
697
+
goto err;
698
698
+
699
699
+
mr = ib_alloc_fast_reg_mr(xprt->sc_pd, RPCSVC_MAXPAGES);
700
700
+
if (!mr)
701
701
+
goto err_free_frmr;
702
702
+
703
703
+
pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device,
704
704
+
RPCSVC_MAXPAGES);
705
705
+
if (!pl)
706
706
+
goto err_free_mr;
707
707
+
708
708
+
frmr->mr = mr;
709
709
+
frmr->page_list = pl;
710
710
+
INIT_LIST_HEAD(&frmr->frmr_list);
711
711
+
return frmr;
712
712
+
713
713
+
err_free_mr:
714
714
+
ib_dereg_mr(mr);
715
715
+
err_free_frmr:
716
716
+
kfree(frmr);
717
717
+
err:
718
718
+
return ERR_PTR(-ENOMEM);
719
719
+
}
720
720
+
721
721
+
static void rdma_dealloc_frmr_q(struct svcxprt_rdma *xprt)
722
722
+
{
723
723
+
struct svc_rdma_fastreg_mr *frmr;
724
724
+
725
725
+
while (!list_empty(&xprt->sc_frmr_q)) {
726
726
+
frmr = list_entry(xprt->sc_frmr_q.next,
727
727
+
struct svc_rdma_fastreg_mr, frmr_list);
728
728
+
list_del_init(&frmr->frmr_list);
729
729
+
ib_dereg_mr(frmr->mr);
730
730
+
ib_free_fast_reg_page_list(frmr->page_list);
731
731
+
kfree(frmr);
732
732
+
}
733
733
+
}
734
734
+
735
735
+
struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
736
736
+
{
737
737
+
struct svc_rdma_fastreg_mr *frmr = NULL;
738
738
+
739
739
+
spin_lock_bh(&rdma->sc_frmr_q_lock);
740
740
+
if (!list_empty(&rdma->sc_frmr_q)) {
741
741
+
frmr = list_entry(rdma->sc_frmr_q.next,
742
742
+
struct svc_rdma_fastreg_mr, frmr_list);
743
743
+
list_del_init(&frmr->frmr_list);
744
744
+
frmr->map_len = 0;
745
745
+
frmr->page_list_len = 0;
746
746
+
}
747
747
+
spin_unlock_bh(&rdma->sc_frmr_q_lock);
748
748
+
if (frmr)
749
749
+
return frmr;
750
750
+
751
751
+
return rdma_alloc_frmr(rdma);
752
752
+
}
753
753
+
754
754
+
static void frmr_unmap_dma(struct svcxprt_rdma *xprt,
755
755
+
struct svc_rdma_fastreg_mr *frmr)
756
756
+
{
757
757
+
int page_no;
758
758
+
for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
759
759
+
dma_addr_t addr = frmr->page_list->page_list[page_no];
760
760
+
if (ib_dma_mapping_error(frmr->mr->device, addr))
761
761
+
continue;
762
762
+
atomic_dec(&xprt->sc_dma_used);
763
763
+
ib_dma_unmap_single(frmr->mr->device, addr, PAGE_SIZE,
764
764
+
frmr->direction);
765
765
+
}
766
766
+
}
767
767
+
768
768
+
void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
769
769
+
struct svc_rdma_fastreg_mr *frmr)
770
770
+
{
771
771
+
if (frmr) {
772
772
+
frmr_unmap_dma(rdma, frmr);
773
773
+
spin_lock_bh(&rdma->sc_frmr_q_lock);
774
774
+
BUG_ON(!list_empty(&frmr->frmr_list));
775
775
+
list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
776
776
+
spin_unlock_bh(&rdma->sc_frmr_q_lock);
777
777
+
}
778
778
+
}
779
779
+
721
780
/*
722
781
* This is the xpo_recvfrom function for listening endpoints. Its
723
782
* purpose is to accept incoming connections. The CMA callback handler
···
827
704
struct rdma_conn_param conn_param;
828
705
struct ib_qp_init_attr qp_attr;
829
706
struct ib_device_attr devattr;
707
707
+
int dma_mr_acc;
708
708
+
int need_dma_mr;
830
709
int ret;
831
710
int i;
832
711
···
944
819
}
945
820
newxprt->sc_qp = newxprt->sc_cm_id->qp;
946
821
947
947
-
/* Register all of physical memory */
948
948
-
newxprt->sc_phys_mr = ib_get_dma_mr(newxprt->sc_pd,
949
949
-
IB_ACCESS_LOCAL_WRITE |
950
950
-
IB_ACCESS_REMOTE_WRITE);
951
951
-
if (IS_ERR(newxprt->sc_phys_mr)) {
952
952
-
dprintk("svcrdma: Failed to create DMA MR ret=%d\n", ret);
822
822
+
/*
823
823
+
* Use the most secure set of MR resources based on the
824
824
+
* transport type and available memory management features in
825
825
+
* the device. Here's the table implemented below:
826
826
+
*
827
827
+
* Fast Global DMA Remote WR
828
828
+
* Reg LKEY MR Access
829
829
+
* Sup'd Sup'd Needed Needed
830
830
+
*
831
831
+
* IWARP N N Y Y
832
832
+
* N Y Y Y
833
833
+
* Y N Y N
834
834
+
* Y Y N -
835
835
+
*
836
836
+
* IB N N Y N
837
837
+
* N Y N -
838
838
+
* Y N Y N
839
839
+
* Y Y N -
840
840
+
*
841
841
+
* NB: iWARP requires remote write access for the data sink
842
842
+
* of an RDMA_READ. IB does not.
843
843
+
*/
844
844
+
if (devattr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
845
845
+
newxprt->sc_frmr_pg_list_len =
846
846
+
devattr.max_fast_reg_page_list_len;
847
847
+
newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG;
848
848
+
}
849
849
+
850
850
+
/*
851
851
+
* Determine if a DMA MR is required and if so, what privs are required
852
852
+
*/
853
853
+
switch (rdma_node_get_transport(newxprt->sc_cm_id->device->node_type)) {
854
854
+
case RDMA_TRANSPORT_IWARP:
855
855
+
newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV;
856
856
+
if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) {
857
857
+
need_dma_mr = 1;
858
858
+
dma_mr_acc =
859
859
+
(IB_ACCESS_LOCAL_WRITE |
860
860
+
IB_ACCESS_REMOTE_WRITE);
861
861
+
} else if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
862
862
+
need_dma_mr = 1;
863
863
+
dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
864
864
+
} else
865
865
+
need_dma_mr = 0;
866
866
+
break;
867
867
+
case RDMA_TRANSPORT_IB:
868
868
+
if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
869
869
+
need_dma_mr = 1;
870
870
+
dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
871
871
+
} else
872
872
+
need_dma_mr = 0;
873
873
+
break;
874
874
+
default:
953
875
goto errout;
954
876
}
877
877
+
878
878
+
/* Create the DMA MR if needed, otherwise, use the DMA LKEY */
879
879
+
if (need_dma_mr) {
880
880
+
/* Register all of physical memory */
881
881
+
newxprt->sc_phys_mr =
882
882
+
ib_get_dma_mr(newxprt->sc_pd, dma_mr_acc);
883
883
+
if (IS_ERR(newxprt->sc_phys_mr)) {
884
884
+
dprintk("svcrdma: Failed to create DMA MR ret=%d\n",
885
885
+
ret);
886
886
+
goto errout;
887
887
+
}
888
888
+
newxprt->sc_dma_lkey = newxprt->sc_phys_mr->lkey;
889
889
+
} else
890
890
+
newxprt->sc_dma_lkey =
891
891
+
newxprt->sc_cm_id->device->local_dma_lkey;
955
892
956
893
/* Post receive buffers */
957
894
for (i = 0; i < newxprt->sc_max_requests; i++) {
···
1148
961
WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0);
1149
962
WARN_ON(atomic_read(&rdma->sc_dma_used) != 0);
1150
963
964
964
+
/* De-allocate fastreg mr */
965
965
+
rdma_dealloc_frmr_q(rdma);
966
966
+
1151
967
/* Destroy the QP if present (not a listener) */
1152
968
if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
1153
969
ib_destroy_qp(rdma->sc_qp);
···
1204
1014
return 1;
1205
1015
}
1206
1016
1017
1017
+
/*
1018
1018
+
* Attempt to register the kvec representing the RPC memory with the
1019
1019
+
* device.
1020
1020
+
*
1021
1021
+
* Returns:
1022
1022
+
* NULL : The device does not support fastreg or there were no more
1023
1023
+
* fastreg mr.
1024
1024
+
* frmr : The kvec register request was successfully posted.
1025
1025
+
* <0 : An error was encountered attempting to register the kvec.
1026
1026
+
*/
1027
1027
+
int svc_rdma_fastreg(struct svcxprt_rdma *xprt,
1028
1028
+
struct svc_rdma_fastreg_mr *frmr)
1029
1029
+
{
1030
1030
+
struct ib_send_wr fastreg_wr;
1031
1031
+
u8 key;
1032
1032
+
1033
1033
+
/* Bump the key */
1034
1034
+
key = (u8)(frmr->mr->lkey & 0x000000FF);
1035
1035
+
ib_update_fast_reg_key(frmr->mr, ++key);
1036
1036
+
1037
1037
+
/* Prepare FASTREG WR */
1038
1038
+
memset(&fastreg_wr, 0, sizeof fastreg_wr);
1039
1039
+
fastreg_wr.opcode = IB_WR_FAST_REG_MR;
1040
1040
+
fastreg_wr.send_flags = IB_SEND_SIGNALED;
1041
1041
+
fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva;
1042
1042
+
fastreg_wr.wr.fast_reg.page_list = frmr->page_list;
1043
1043
+
fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len;
1044
1044
+
fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
1045
1045
+
fastreg_wr.wr.fast_reg.length = frmr->map_len;
1046
1046
+
fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags;
1047
1047
+
fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey;
1048
1048
+
return svc_rdma_send(xprt, &fastreg_wr);
1049
1049
+
}
1050
1050
+
1207
1051
int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
1208
1052
{
1209
1209
-
struct ib_send_wr *bad_wr;
1053
1053
+
struct ib_send_wr *bad_wr, *n_wr;
1054
1054
+
int wr_count;
1055
1055
+
int i;
1210
1056
int ret;
1211
1057
1212
1058
if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags))
1213
1059
return -ENOTCONN;
1214
1060
1215
1061
BUG_ON(wr->send_flags != IB_SEND_SIGNALED);
1216
1216
-
BUG_ON(((struct svc_rdma_op_ctxt *)(unsigned long)wr->wr_id)->wr_op !=
1217
1217
-
wr->opcode);
1062
1062
+
wr_count = 1;
1063
1063
+
for (n_wr = wr->next; n_wr; n_wr = n_wr->next)
1064
1064
+
wr_count++;
1065
1065
+
1218
1066
/* If the SQ is full, wait until an SQ entry is available */
1219
1067
while (1) {
1220
1068
spin_lock_bh(&xprt->sc_lock);
1221
1221
-
if (xprt->sc_sq_depth == atomic_read(&xprt->sc_sq_count)) {
1069
1069
+
if (xprt->sc_sq_depth < atomic_read(&xprt->sc_sq_count) + wr_count) {
1222
1070
spin_unlock_bh(&xprt->sc_lock);
1223
1071
atomic_inc(&rdma_stat_sq_starve);
1224
1072
···
1271
1043
return 0;
1272
1044
continue;
1273
1045
}
1274
1274
-
/* Bumped used SQ WR count and post */
1275
1275
-
svc_xprt_get(&xprt->sc_xprt);
1046
1046
+
/* Take a transport ref for each WR posted */
1047
1047
+
for (i = 0; i < wr_count; i++)
1048
1048
+
svc_xprt_get(&xprt->sc_xprt);
1049
1049
+
1050
1050
+
/* Bump used SQ WR count and post */
1051
1051
+
atomic_add(wr_count, &xprt->sc_sq_count);
1276
1052
ret = ib_post_send(xprt->sc_qp, wr, &bad_wr);
1277
1277
-
if (!ret)
1278
1278
-
atomic_inc(&xprt->sc_sq_count);
1279
1279
-
else {
1280
1280
-
svc_xprt_put(&xprt->sc_xprt);
1053
1053
+
if (ret) {
1054
1054
+
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
1055
1055
+
atomic_sub(wr_count, &xprt->sc_sq_count);
1056
1056
+
for (i = 0; i < wr_count; i ++)
1057
1057
+
svc_xprt_put(&xprt->sc_xprt);
1281
1058
dprintk("svcrdma: failed to post SQ WR rc=%d, "
1282
1059
"sc_sq_count=%d, sc_sq_depth=%d\n",
1283
1060
ret, atomic_read(&xprt->sc_sq_count),
1284
1061
xprt->sc_sq_depth);
1285
1062
}
1286
1063
spin_unlock_bh(&xprt->sc_lock);
1064
1064
+
if (ret)
1065
1065
+
wake_up(&xprt->sc_send_wait);
1287
1066
break;
1288
1067
}
1289
1068
return ret;
···
1314
1079
length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
1315
1080
1316
1081
/* Prepare SGE for local address */
1317
1317
-
atomic_inc(&xprt->sc_dma_used);
1318
1082
sge.addr = ib_dma_map_page(xprt->sc_cm_id->device,
1319
1083
p, 0, PAGE_SIZE, DMA_FROM_DEVICE);
1320
1320
-
sge.lkey = xprt->sc_phys_mr->lkey;
1084
1084
+
if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge.addr)) {
1085
1085
+
put_page(p);
1086
1086
+
return;
1087
1087
+
}
1088
1088
+
atomic_inc(&xprt->sc_dma_used);
1089
1089
+
sge.lkey = xprt->sc_dma_lkey;
1321
1090
sge.length = length;
1322
1091
1323
1092
ctxt = svc_rdma_get_context(xprt);
···
1342
1103
if (ret) {
1343
1104
dprintk("svcrdma: Error %d posting send for protocol error\n",
1344
1105
ret);
1106
1106
+
ib_dma_unmap_page(xprt->sc_cm_id->device,
1107
1107
+
sge.addr, PAGE_SIZE,
1108
1108
+
DMA_FROM_DEVICE);
1345
1109
svc_rdma_put_context(ctxt, 1);
1346
1110
}
1347
1111
}