Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

xprtrdma: Use gathered Send for large inline messages

An RPC Call message that is sent inline but that has a data payload
(ie, one or more items in rq_snd_buf's page list) must be "pulled
up:"

- call_allocate has to reserve enough RPC Call buffer space to
accommodate the data payload

- call_transmit has to memcopy the rq_snd_buf's page list and tail
into its head iovec before it is sent

As the inline threshold is increased beyond its current 1KB default,
however, this means data payloads of more than a few KB are copied
by the host CPU. For example, if the inline threshold is increased
just to 4KB, then NFS WRITE requests up to 4KB would involve a
memcpy of the NFS WRITE's payload data into the RPC Call buffer.
This is an undesirable amount of participation by the host CPU.

The inline threshold may be much larger than 4KB in the future,
after negotiation with a peer server.

Instead of copying the components of rq_snd_buf into its head iovec,
construct a gather list of these components, and send them all in
place. The same approach is already used in the Linux server's
RPC-over-RDMA reply path.

This mechanism also eliminates the need for rpcrdma_tail_pullup,
which is used to manage the XDR pad and trailing inline content when
a Read list is present.

This requires that the pages in rq_snd_buf's page list be DMA-mapped
during marshaling, and unmapped when a data-bearing RPC is
completed. This is slightly less efficient for very small I/O
payloads, but significantly more efficient as data payload size and
inline threshold increase past a kilobyte.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>

authored by

Chuck Lever and committed by
Anna Schumaker
655fec69 c8b920bb

+207 -185
+3 -30
net/sunrpc/xprtrdma/backchannel.c
··· 206 206 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 207 207 struct rpcrdma_req *req = rpcr_to_rdmar(rqst); 208 208 struct rpcrdma_msg *headerp; 209 - size_t rpclen; 210 209 211 210 headerp = rdmab_to_msg(req->rl_rdmabuf); 212 211 headerp->rm_xid = rqst->rq_xid; ··· 217 218 headerp->rm_body.rm_chunks[1] = xdr_zero; 218 219 headerp->rm_body.rm_chunks[2] = xdr_zero; 219 220 220 - rpclen = rqst->rq_svec[0].iov_len; 221 - 222 - #ifdef RPCRDMA_BACKCHANNEL_DEBUG 223 - pr_info("RPC: %s: rpclen %zd headerp 0x%p lkey 0x%x\n", 224 - __func__, rpclen, headerp, rdmab_lkey(req->rl_rdmabuf)); 225 - pr_info("RPC: %s: RPC/RDMA: %*ph\n", 226 - __func__, (int)RPCRDMA_HDRLEN_MIN, headerp); 227 - pr_info("RPC: %s: RPC: %*ph\n", 228 - __func__, (int)rpclen, rqst->rq_svec[0].iov_base); 229 - #endif 230 - 231 - if (!rpcrdma_dma_map_regbuf(&r_xprt->rx_ia, req->rl_rdmabuf)) 232 - goto out_map; 233 - req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf); 234 - req->rl_send_iov[0].length = RPCRDMA_HDRLEN_MIN; 235 - req->rl_send_iov[0].lkey = rdmab_lkey(req->rl_rdmabuf); 236 - 237 - if (!rpcrdma_dma_map_regbuf(&r_xprt->rx_ia, req->rl_sendbuf)) 238 - goto out_map; 239 - req->rl_send_iov[1].addr = rdmab_addr(req->rl_sendbuf); 240 - req->rl_send_iov[1].length = rpclen; 241 - req->rl_send_iov[1].lkey = rdmab_lkey(req->rl_sendbuf); 242 - 243 - req->rl_send_wr.num_sge = 2; 244 - 221 + if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, RPCRDMA_HDRLEN_MIN, 222 + &rqst->rq_snd_buf, rpcrdma_noch)) 223 + return -EIO; 245 224 return 0; 246 - 247 - out_map: 248 - pr_err("rpcrdma: failed to DMA map a Send buffer\n"); 249 - return -EIO; 250 225 } 251 226 252 227 /**
+170 -131
net/sunrpc/xprtrdma/rpc_rdma.c
··· 53 53 # define RPCDBG_FACILITY RPCDBG_TRANS 54 54 #endif 55 55 56 - enum rpcrdma_chunktype { 57 - rpcrdma_noch = 0, 58 - rpcrdma_readch, 59 - rpcrdma_areadch, 60 - rpcrdma_writech, 61 - rpcrdma_replych 62 - }; 63 - 64 56 static const char transfertypes[][12] = { 65 57 "inline", /* no chunks */ 66 58 "read list", /* some argument via rdma read */ ··· 147 155 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 148 156 149 157 return rqst->rq_rcv_buf.buflen <= ia->ri_max_inline_read; 150 - } 151 - 152 - static int 153 - rpcrdma_tail_pullup(struct xdr_buf *buf) 154 - { 155 - size_t tlen = buf->tail[0].iov_len; 156 - size_t skip = tlen & 3; 157 - 158 - /* Do not include the tail if it is only an XDR pad */ 159 - if (tlen < 4) 160 - return 0; 161 - 162 - /* xdr_write_pages() adds a pad at the beginning of the tail 163 - * if the content in "buf->pages" is unaligned. Force the 164 - * tail's actual content to land at the next XDR position 165 - * after the head instead. 166 - */ 167 - if (skip) { 168 - unsigned char *src, *dst; 169 - unsigned int count; 170 - 171 - src = buf->tail[0].iov_base; 172 - dst = buf->head[0].iov_base; 173 - dst += buf->head[0].iov_len; 174 - 175 - src += skip; 176 - tlen -= skip; 177 - 178 - dprintk("RPC: %s: skip=%zu, memmove(%p, %p, %zu)\n", 179 - __func__, skip, dst, src, tlen); 180 - 181 - for (count = tlen; count; count--) 182 - *dst++ = *src++; 183 - } 184 - 185 - return tlen; 186 158 } 187 159 188 160 /* Split "vec" on page boundaries into segments. FMR registers pages, ··· 459 503 return iptr; 460 504 } 461 505 462 - /* 463 - * Copy write data inline. 464 - * This function is used for "small" requests. Data which is passed 465 - * to RPC via iovecs (or page list) is copied directly into the 466 - * pre-registered memory buffer for this request. For small amounts 467 - * of data, this is efficient. The cutoff value is tunable. 506 + /* Prepare the RPC-over-RDMA header SGE. 468 507 */ 469 - static void rpcrdma_inline_pullup(struct rpc_rqst *rqst) 508 + static bool 509 + rpcrdma_prepare_hdr_sge(struct rpcrdma_ia *ia, struct rpcrdma_req *req, 510 + u32 len) 470 511 { 471 - int i, npages, curlen; 472 - int copy_len; 473 - unsigned char *srcp, *destp; 474 - struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); 475 - int page_base; 476 - struct page **ppages; 512 + struct rpcrdma_regbuf *rb = req->rl_rdmabuf; 513 + struct ib_sge *sge = &req->rl_send_sge[0]; 477 514 478 - destp = rqst->rq_svec[0].iov_base; 479 - curlen = rqst->rq_svec[0].iov_len; 480 - destp += curlen; 515 + if (unlikely(!rpcrdma_regbuf_is_mapped(rb))) { 516 + if (!__rpcrdma_dma_map_regbuf(ia, rb)) 517 + return false; 518 + sge->addr = rdmab_addr(rb); 519 + sge->lkey = rdmab_lkey(rb); 520 + } 521 + sge->length = len; 481 522 482 - dprintk("RPC: %s: destp 0x%p len %d hdrlen %d\n", 483 - __func__, destp, rqst->rq_slen, curlen); 523 + ib_dma_sync_single_for_device(ia->ri_device, sge->addr, 524 + sge->length, DMA_TO_DEVICE); 525 + req->rl_send_wr.num_sge++; 526 + return true; 527 + } 484 528 485 - copy_len = rqst->rq_snd_buf.page_len; 529 + /* Prepare the Send SGEs. The head and tail iovec, and each entry 530 + * in the page list, gets its own SGE. 531 + */ 532 + static bool 533 + rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req, 534 + struct xdr_buf *xdr, enum rpcrdma_chunktype rtype) 535 + { 536 + unsigned int sge_no, page_base, len, remaining; 537 + struct rpcrdma_regbuf *rb = req->rl_sendbuf; 538 + struct ib_device *device = ia->ri_device; 539 + struct ib_sge *sge = req->rl_send_sge; 540 + u32 lkey = ia->ri_pd->local_dma_lkey; 541 + struct page *page, **ppages; 486 542 487 - if (rqst->rq_snd_buf.tail[0].iov_len) { 488 - curlen = rqst->rq_snd_buf.tail[0].iov_len; 489 - if (destp + copy_len != rqst->rq_snd_buf.tail[0].iov_base) { 490 - memmove(destp + copy_len, 491 - rqst->rq_snd_buf.tail[0].iov_base, curlen); 492 - r_xprt->rx_stats.pullup_copy_count += curlen; 543 + /* The head iovec is straightforward, as it is already 544 + * DMA-mapped. Sync the content that has changed. 545 + */ 546 + if (!rpcrdma_dma_map_regbuf(ia, rb)) 547 + return false; 548 + sge_no = 1; 549 + sge[sge_no].addr = rdmab_addr(rb); 550 + sge[sge_no].length = xdr->head[0].iov_len; 551 + sge[sge_no].lkey = rdmab_lkey(rb); 552 + ib_dma_sync_single_for_device(device, sge[sge_no].addr, 553 + sge[sge_no].length, DMA_TO_DEVICE); 554 + 555 + /* If there is a Read chunk, the page list is being handled 556 + * via explicit RDMA, and thus is skipped here. However, the 557 + * tail iovec may include an XDR pad for the page list, as 558 + * well as additional content, and may not reside in the 559 + * same page as the head iovec. 560 + */ 561 + if (rtype == rpcrdma_readch) { 562 + len = xdr->tail[0].iov_len; 563 + 564 + /* Do not include the tail if it is only an XDR pad */ 565 + if (len < 4) 566 + goto out; 567 + 568 + page = virt_to_page(xdr->tail[0].iov_base); 569 + page_base = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK; 570 + 571 + /* If the content in the page list is an odd length, 572 + * xdr_write_pages() has added a pad at the beginning 573 + * of the tail iovec. Force the tail's non-pad content 574 + * to land at the next XDR position in the Send message. 575 + */ 576 + page_base += len & 3; 577 + len -= len & 3; 578 + goto map_tail; 579 + } 580 + 581 + /* If there is a page list present, temporarily DMA map 582 + * and prepare an SGE for each page to be sent. 583 + */ 584 + if (xdr->page_len) { 585 + ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT); 586 + page_base = xdr->page_base & ~PAGE_MASK; 587 + remaining = xdr->page_len; 588 + while (remaining) { 589 + sge_no++; 590 + if (sge_no > RPCRDMA_MAX_SEND_SGES - 2) 591 + goto out_mapping_overflow; 592 + 593 + len = min_t(u32, PAGE_SIZE - page_base, remaining); 594 + sge[sge_no].addr = ib_dma_map_page(device, *ppages, 595 + page_base, len, 596 + DMA_TO_DEVICE); 597 + if (ib_dma_mapping_error(device, sge[sge_no].addr)) 598 + goto out_mapping_err; 599 + sge[sge_no].length = len; 600 + sge[sge_no].lkey = lkey; 601 + 602 + req->rl_mapped_sges++; 603 + ppages++; 604 + remaining -= len; 605 + page_base = 0; 493 606 } 494 - dprintk("RPC: %s: tail destp 0x%p len %d\n", 495 - __func__, destp + copy_len, curlen); 496 - rqst->rq_svec[0].iov_len += curlen; 497 607 } 498 - r_xprt->rx_stats.pullup_copy_count += copy_len; 499 608 500 - page_base = rqst->rq_snd_buf.page_base; 501 - ppages = rqst->rq_snd_buf.pages + (page_base >> PAGE_SHIFT); 502 - page_base &= ~PAGE_MASK; 503 - npages = PAGE_ALIGN(page_base+copy_len) >> PAGE_SHIFT; 504 - for (i = 0; copy_len && i < npages; i++) { 505 - curlen = PAGE_SIZE - page_base; 506 - if (curlen > copy_len) 507 - curlen = copy_len; 508 - dprintk("RPC: %s: page %d destp 0x%p len %d curlen %d\n", 509 - __func__, i, destp, copy_len, curlen); 510 - srcp = kmap_atomic(ppages[i]); 511 - memcpy(destp, srcp+page_base, curlen); 512 - kunmap_atomic(srcp); 513 - rqst->rq_svec[0].iov_len += curlen; 514 - destp += curlen; 515 - copy_len -= curlen; 516 - page_base = 0; 609 + /* The tail iovec is not always constructed in the same 610 + * page where the head iovec resides (see, for example, 611 + * gss_wrap_req_priv). To neatly accommodate that case, 612 + * DMA map it separately. 613 + */ 614 + if (xdr->tail[0].iov_len) { 615 + page = virt_to_page(xdr->tail[0].iov_base); 616 + page_base = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK; 617 + len = xdr->tail[0].iov_len; 618 + 619 + map_tail: 620 + sge_no++; 621 + sge[sge_no].addr = ib_dma_map_page(device, page, 622 + page_base, len, 623 + DMA_TO_DEVICE); 624 + if (ib_dma_mapping_error(device, sge[sge_no].addr)) 625 + goto out_mapping_err; 626 + sge[sge_no].length = len; 627 + sge[sge_no].lkey = lkey; 628 + req->rl_mapped_sges++; 517 629 } 518 - /* header now contains entire send message */ 630 + 631 + out: 632 + req->rl_send_wr.num_sge = sge_no + 1; 633 + return true; 634 + 635 + out_mapping_overflow: 636 + pr_err("rpcrdma: too many Send SGEs (%u)\n", sge_no); 637 + return false; 638 + 639 + out_mapping_err: 640 + pr_err("rpcrdma: Send mapping error\n"); 641 + return false; 642 + } 643 + 644 + bool 645 + rpcrdma_prepare_send_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req, 646 + u32 hdrlen, struct xdr_buf *xdr, 647 + enum rpcrdma_chunktype rtype) 648 + { 649 + req->rl_send_wr.num_sge = 0; 650 + req->rl_mapped_sges = 0; 651 + 652 + if (!rpcrdma_prepare_hdr_sge(ia, req, hdrlen)) 653 + goto out_map; 654 + 655 + if (rtype != rpcrdma_areadch) 656 + if (!rpcrdma_prepare_msg_sges(ia, req, xdr, rtype)) 657 + goto out_map; 658 + 659 + return true; 660 + 661 + out_map: 662 + pr_err("rpcrdma: failed to DMA map a Send buffer\n"); 663 + return false; 664 + } 665 + 666 + void 667 + rpcrdma_unmap_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req) 668 + { 669 + struct ib_device *device = ia->ri_device; 670 + struct ib_sge *sge; 671 + int count; 672 + 673 + sge = &req->rl_send_sge[2]; 674 + for (count = req->rl_mapped_sges; count--; sge++) 675 + ib_dma_unmap_page(device, sge->addr, sge->length, 676 + DMA_TO_DEVICE); 677 + req->rl_mapped_sges = 0; 519 678 } 520 679 521 680 /* 522 681 * Marshal a request: the primary job of this routine is to choose 523 682 * the transfer modes. See comments below. 524 - * 525 - * Prepares up to two IOVs per Call message: 526 - * 527 - * [0] -- RPC RDMA header 528 - * [1] -- the RPC header/data 529 683 * 530 684 * Returns zero on success, otherwise a negative errno. 531 685 */ ··· 704 638 */ 705 639 if (rpcrdma_args_inline(r_xprt, rqst)) { 706 640 rtype = rpcrdma_noch; 707 - rpcrdma_inline_pullup(rqst); 708 - rpclen = rqst->rq_svec[0].iov_len; 641 + rpclen = rqst->rq_snd_buf.len; 709 642 } else if (ddp_allowed && rqst->rq_snd_buf.flags & XDRBUF_WRITE) { 710 643 rtype = rpcrdma_readch; 711 - rpclen = rqst->rq_svec[0].iov_len; 712 - rpclen += rpcrdma_tail_pullup(&rqst->rq_snd_buf); 644 + rpclen = rqst->rq_snd_buf.head[0].iov_len + 645 + rqst->rq_snd_buf.tail[0].iov_len; 713 646 } else { 714 647 r_xprt->rx_stats.nomsg_call_count++; 715 648 headerp->rm_type = htonl(RDMA_NOMSG); ··· 750 685 goto out_unmap; 751 686 hdrlen = (unsigned char *)iptr - (unsigned char *)headerp; 752 687 753 - if (hdrlen + rpclen > r_xprt->rx_data.inline_wsize) 754 - goto out_overflow; 755 - 756 688 dprintk("RPC: %5u %s: %s/%s: hdrlen %zd rpclen %zd\n", 757 689 rqst->rq_task->tk_pid, __func__, 758 690 transfertypes[rtype], transfertypes[wtype], 759 691 hdrlen, rpclen); 760 692 761 - if (!rpcrdma_dma_map_regbuf(&r_xprt->rx_ia, req->rl_rdmabuf)) 762 - goto out_map; 763 - req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf); 764 - req->rl_send_iov[0].length = hdrlen; 765 - req->rl_send_iov[0].lkey = rdmab_lkey(req->rl_rdmabuf); 766 - 767 - req->rl_send_wr.num_sge = 1; 768 - if (rtype == rpcrdma_areadch) 769 - return 0; 770 - 771 - if (!rpcrdma_dma_map_regbuf(&r_xprt->rx_ia, req->rl_sendbuf)) 772 - goto out_map; 773 - req->rl_send_iov[1].addr = rdmab_addr(req->rl_sendbuf); 774 - req->rl_send_iov[1].length = rpclen; 775 - req->rl_send_iov[1].lkey = rdmab_lkey(req->rl_sendbuf); 776 - 777 - req->rl_send_wr.num_sge = 2; 778 - 693 + if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, hdrlen, 694 + &rqst->rq_snd_buf, rtype)) { 695 + iptr = ERR_PTR(-EIO); 696 + goto out_unmap; 697 + } 779 698 return 0; 780 - 781 - out_overflow: 782 - pr_err("rpcrdma: send overflow: hdrlen %zd rpclen %zu %s/%s\n", 783 - hdrlen, rpclen, transfertypes[rtype], transfertypes[wtype]); 784 - iptr = ERR_PTR(-EIO); 785 699 786 700 out_unmap: 787 701 r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false); 788 702 return PTR_ERR(iptr); 789 - 790 - out_map: 791 - pr_err("rpcrdma: failed to DMA map a Send buffer\n"); 792 - iptr = ERR_PTR(-EIO); 793 - goto out_unmap; 794 703 } 795 704 796 705 /*
+5 -13
net/sunrpc/xprtrdma/transport.c
··· 499 499 return true; 500 500 } 501 501 502 - /* RPC/RDMA marshaling may choose to send payload bearing ops inline, 503 - * if the resulting Call message is smaller than the inline threshold. 504 - * The value of the "rq_callsize" argument accounts for RPC header 505 - * requirements, but not for the data payload in these cases. 506 - * 507 - * See rpcrdma_inline_pullup. 508 - */ 509 502 static bool 510 503 rpcrdma_get_sendbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, 511 504 size_t size, gfp_t flags) 512 505 { 513 506 struct rpcrdma_regbuf *rb; 514 - size_t min_size; 515 507 516 508 if (req->rl_sendbuf && rdmab_length(req->rl_sendbuf) >= size) 517 509 return true; 518 510 519 - min_size = max_t(size_t, size, r_xprt->rx_data.inline_wsize); 520 - rb = rpcrdma_alloc_regbuf(min_size, DMA_TO_DEVICE, flags); 511 + rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, flags); 521 512 if (IS_ERR(rb)) 522 513 return false; 523 514 524 515 rpcrdma_free_regbuf(req->rl_sendbuf); 525 - r_xprt->rx_stats.hardway_register_count += min_size; 516 + r_xprt->rx_stats.hardway_register_count += size; 526 517 req->rl_sendbuf = rb; 527 518 return true; 528 519 } ··· 614 623 struct rpc_rqst *rqst = task->tk_rqstp; 615 624 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); 616 625 struct rpcrdma_req *req = rpcr_to_rdmar(rqst); 626 + struct rpcrdma_ia *ia = &r_xprt->rx_ia; 617 627 618 628 if (req->rl_backchannel) 619 629 return; 620 630 621 631 dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply); 622 632 623 - r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, 624 - !RPC_IS_ASYNC(task)); 633 + ia->ri_ops->ro_unmap_safe(r_xprt, req, !RPC_IS_ASYNC(task)); 634 + rpcrdma_unmap_sges(ia, req); 625 635 rpcrdma_buffer_put(req); 626 636 } 627 637
+4 -9
net/sunrpc/xprtrdma/verbs.c
··· 493 493 unsigned int max_qp_wr; 494 494 int rc; 495 495 496 - if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_IOVS) { 496 + if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_SEND_SGES) { 497 497 dprintk("RPC: %s: insufficient sge's available\n", 498 498 __func__); 499 499 return -ENOMEM; ··· 522 522 ep->rep_attr.cap.max_recv_wr = cdata->max_requests; 523 523 ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; 524 524 ep->rep_attr.cap.max_recv_wr += 1; /* drain cqe */ 525 - ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_IOVS; 525 + ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_SEND_SGES; 526 526 ep->rep_attr.cap.max_recv_sge = 1; 527 527 ep->rep_attr.cap.max_inline_data = 0; 528 528 ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR; ··· 891 891 INIT_LIST_HEAD(&req->rl_registered); 892 892 req->rl_send_wr.next = NULL; 893 893 req->rl_send_wr.wr_cqe = &req->rl_cqe; 894 - req->rl_send_wr.sg_list = req->rl_send_iov; 894 + req->rl_send_wr.sg_list = req->rl_send_sge; 895 895 req->rl_send_wr.opcode = IB_WR_SEND; 896 896 return req; 897 897 } ··· 1306 1306 struct rpcrdma_ep *ep, 1307 1307 struct rpcrdma_req *req) 1308 1308 { 1309 - struct ib_device *device = ia->ri_device; 1310 1309 struct ib_send_wr *send_wr = &req->rl_send_wr; 1311 1310 struct ib_send_wr *send_wr_fail; 1312 - struct ib_sge *sge = req->rl_send_iov; 1313 - int i, rc; 1311 + int rc; 1314 1312 1315 1313 if (req->rl_reply) { 1316 1314 rc = rpcrdma_ep_post_recv(ia, req->rl_reply); ··· 1317 1319 req->rl_reply = NULL; 1318 1320 } 1319 1321 1320 - for (i = 0; i < send_wr->num_sge; i++) 1321 - ib_dma_sync_single_for_device(device, sge[i].addr, 1322 - sge[i].length, DMA_TO_DEVICE); 1323 1322 dprintk("RPC: %s: posting %d s/g entries\n", 1324 1323 __func__, send_wr->num_sge); 1325 1324
+25 -2
net/sunrpc/xprtrdma/xprt_rdma.h
··· 285 285 char *mr_offset; /* kva if no page, else offset */ 286 286 }; 287 287 288 - #define RPCRDMA_MAX_IOVS (2) 288 + /* Reserve enough Send SGEs to send a maximum size inline request: 289 + * - RPC-over-RDMA header 290 + * - xdr_buf head iovec 291 + * - RPCRDMA_MAX_INLINE bytes, possibly unaligned, in pages 292 + * - xdr_buf tail iovec 293 + */ 294 + enum { 295 + RPCRDMA_MAX_SEND_PAGES = PAGE_SIZE + RPCRDMA_MAX_INLINE - 1, 296 + RPCRDMA_MAX_PAGE_SGES = (RPCRDMA_MAX_SEND_PAGES >> PAGE_SHIFT) + 1, 297 + RPCRDMA_MAX_SEND_SGES = 1 + 1 + RPCRDMA_MAX_PAGE_SGES + 1, 298 + }; 289 299 290 300 struct rpcrdma_buffer; 291 301 struct rpcrdma_req { 292 302 struct list_head rl_free; 303 + unsigned int rl_mapped_sges; 293 304 unsigned int rl_connect_cookie; 294 305 struct rpcrdma_buffer *rl_buffer; 295 306 struct rpcrdma_rep *rl_reply; 296 307 struct ib_send_wr rl_send_wr; 297 - struct ib_sge rl_send_iov[RPCRDMA_MAX_IOVS]; 308 + struct ib_sge rl_send_sge[RPCRDMA_MAX_SEND_SGES]; 298 309 struct rpcrdma_regbuf *rl_rdmabuf; /* xprt header */ 299 310 struct rpcrdma_regbuf *rl_sendbuf; /* rq_snd_buf */ 300 311 struct rpcrdma_regbuf *rl_recvbuf; /* rq_rcv_buf */ ··· 540 529 /* 541 530 * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c 542 531 */ 532 + 533 + enum rpcrdma_chunktype { 534 + rpcrdma_noch = 0, 535 + rpcrdma_readch, 536 + rpcrdma_areadch, 537 + rpcrdma_writech, 538 + rpcrdma_replych 539 + }; 540 + 541 + bool rpcrdma_prepare_send_sges(struct rpcrdma_ia *, struct rpcrdma_req *, 542 + u32, struct xdr_buf *, enum rpcrdma_chunktype); 543 + void rpcrdma_unmap_sges(struct rpcrdma_ia *, struct rpcrdma_req *); 543 544 int rpcrdma_marshal_req(struct rpc_rqst *); 544 545 void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *); 545 546