Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

xprtrdma: Refactor rpcrdma_prepare_msg_sges()

Refactor: Replace spaghetti with code that makes it plain what needs
to be done for each rtype. This makes it easier to add features and
optimizations.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>

authored by

Chuck Lever and committed by
Anna Schumaker
d6764bbd dc15c3d5

+155 -126
+155 -126
net/sunrpc/xprtrdma/rpc_rdma.c
··· 589 589 { 590 590 struct rpcrdma_sendctx *sc = req->rl_sendctx; 591 591 struct rpcrdma_regbuf *rb = req->rl_rdmabuf; 592 - struct ib_sge *sge = sc->sc_sges; 592 + struct ib_sge *sge = &sc->sc_sges[req->rl_wr.num_sge++]; 593 593 594 594 if (!rpcrdma_regbuf_dma_map(r_xprt, rb)) 595 - goto out_regbuf; 595 + return false; 596 596 sge->addr = rdmab_addr(rb); 597 597 sge->length = len; 598 598 sge->lkey = rdmab_lkey(rb); 599 599 600 600 ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, sge->length, 601 601 DMA_TO_DEVICE); 602 - req->rl_wr.num_sge++; 602 + return true; 603 + } 604 + 605 + /* The head iovec is straightforward, as it is usually already 606 + * DMA-mapped. Sync the content that has changed. 607 + */ 608 + static bool rpcrdma_prepare_head_iov(struct rpcrdma_xprt *r_xprt, 609 + struct rpcrdma_req *req, unsigned int len) 610 + { 611 + struct rpcrdma_sendctx *sc = req->rl_sendctx; 612 + struct ib_sge *sge = &sc->sc_sges[req->rl_wr.num_sge++]; 613 + struct rpcrdma_regbuf *rb = req->rl_sendbuf; 614 + 615 + if (!rpcrdma_regbuf_dma_map(r_xprt, rb)) 616 + return false; 617 + 618 + sge->addr = rdmab_addr(rb); 619 + sge->length = len; 620 + sge->lkey = rdmab_lkey(rb); 621 + 622 + ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, sge->length, 623 + DMA_TO_DEVICE); 624 + return true; 625 + } 626 + 627 + /* If there is a page list present, DMA map and prepare an 628 + * SGE for each page to be sent. 629 + */ 630 + static bool rpcrdma_prepare_pagelist(struct rpcrdma_req *req, 631 + struct xdr_buf *xdr) 632 + { 633 + struct rpcrdma_sendctx *sc = req->rl_sendctx; 634 + struct rpcrdma_regbuf *rb = req->rl_sendbuf; 635 + unsigned int page_base, len, remaining; 636 + struct page **ppages; 637 + struct ib_sge *sge; 638 + 639 + ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT); 640 + page_base = offset_in_page(xdr->page_base); 641 + remaining = xdr->page_len; 642 + while (remaining) { 643 + sge = &sc->sc_sges[req->rl_wr.num_sge++]; 644 + len = min_t(unsigned int, PAGE_SIZE - page_base, remaining); 645 + sge->addr = ib_dma_map_page(rdmab_device(rb), *ppages, 646 + page_base, len, DMA_TO_DEVICE); 647 + if (ib_dma_mapping_error(rdmab_device(rb), sge->addr)) 648 + goto out_mapping_err; 649 + 650 + sge->length = len; 651 + sge->lkey = rdmab_lkey(rb); 652 + 653 + sc->sc_unmap_count++; 654 + ppages++; 655 + remaining -= len; 656 + page_base = 0; 657 + } 658 + 603 659 return true; 604 660 605 - out_regbuf: 606 - pr_err("rpcrdma: failed to DMA map a Send buffer\n"); 661 + out_mapping_err: 662 + trace_xprtrdma_dma_maperr(sge->addr); 607 663 return false; 608 664 } 609 665 610 - /* Prepare the Send SGEs. The head and tail iovec, and each entry 611 - * in the page list, gets its own SGE. 666 + /* The tail iovec may include an XDR pad for the page list, 667 + * as well as additional content, and may not reside in the 668 + * same page as the head iovec. 612 669 */ 613 - static bool rpcrdma_prepare_msg_sges(struct rpcrdma_xprt *r_xprt, 614 - struct rpcrdma_req *req, 670 + static bool rpcrdma_prepare_tail_iov(struct rpcrdma_req *req, 615 671 struct xdr_buf *xdr, 616 - enum rpcrdma_chunktype rtype) 672 + unsigned int page_base, unsigned int len) 617 673 { 618 674 struct rpcrdma_sendctx *sc = req->rl_sendctx; 619 - unsigned int sge_no, page_base, len, remaining; 675 + struct ib_sge *sge = &sc->sc_sges[req->rl_wr.num_sge++]; 620 676 struct rpcrdma_regbuf *rb = req->rl_sendbuf; 621 - struct ib_sge *sge = sc->sc_sges; 622 - struct page *page, **ppages; 677 + struct page *page = virt_to_page(xdr->tail[0].iov_base); 623 678 624 - /* The head iovec is straightforward, as it is already 625 - * DMA-mapped. Sync the content that has changed. 626 - */ 627 - if (!rpcrdma_regbuf_dma_map(r_xprt, rb)) 628 - goto out_regbuf; 629 - sge_no = 1; 630 - sge[sge_no].addr = rdmab_addr(rb); 631 - sge[sge_no].length = xdr->head[0].iov_len; 632 - sge[sge_no].lkey = rdmab_lkey(rb); 633 - ib_dma_sync_single_for_device(rdmab_device(rb), sge[sge_no].addr, 634 - sge[sge_no].length, DMA_TO_DEVICE); 679 + sge->addr = ib_dma_map_page(rdmab_device(rb), page, page_base, len, 680 + DMA_TO_DEVICE); 681 + if (ib_dma_mapping_error(rdmab_device(rb), sge->addr)) 682 + goto out_mapping_err; 635 683 636 - /* If there is a Read chunk, the page list is being handled 637 - * via explicit RDMA, and thus is skipped here. However, the 638 - * tail iovec may include an XDR pad for the page list, as 639 - * well as additional content, and may not reside in the 640 - * same page as the head iovec. 641 - */ 642 - if (rtype == rpcrdma_readch) { 643 - len = xdr->tail[0].iov_len; 644 - 645 - /* Do not include the tail if it is only an XDR pad */ 646 - if (len < 4) 647 - goto out; 648 - 649 - page = virt_to_page(xdr->tail[0].iov_base); 650 - page_base = offset_in_page(xdr->tail[0].iov_base); 651 - 652 - /* If the content in the page list is an odd length, 653 - * xdr_write_pages() has added a pad at the beginning 654 - * of the tail iovec. Force the tail's non-pad content 655 - * to land at the next XDR position in the Send message. 656 - */ 657 - page_base += len & 3; 658 - len -= len & 3; 659 - goto map_tail; 660 - } 661 - 662 - /* If there is a page list present, temporarily DMA map 663 - * and prepare an SGE for each page to be sent. 664 - */ 665 - if (xdr->page_len) { 666 - ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT); 667 - page_base = offset_in_page(xdr->page_base); 668 - remaining = xdr->page_len; 669 - while (remaining) { 670 - sge_no++; 671 - if (sge_no > RPCRDMA_MAX_SEND_SGES - 2) 672 - goto out_mapping_overflow; 673 - 674 - len = min_t(u32, PAGE_SIZE - page_base, remaining); 675 - sge[sge_no].addr = 676 - ib_dma_map_page(rdmab_device(rb), *ppages, 677 - page_base, len, DMA_TO_DEVICE); 678 - if (ib_dma_mapping_error(rdmab_device(rb), 679 - sge[sge_no].addr)) 680 - goto out_mapping_err; 681 - sge[sge_no].length = len; 682 - sge[sge_no].lkey = rdmab_lkey(rb); 683 - 684 - sc->sc_unmap_count++; 685 - ppages++; 686 - remaining -= len; 687 - page_base = 0; 688 - } 689 - } 690 - 691 - /* The tail iovec is not always constructed in the same 692 - * page where the head iovec resides (see, for example, 693 - * gss_wrap_req_priv). To neatly accommodate that case, 694 - * DMA map it separately. 695 - */ 696 - if (xdr->tail[0].iov_len) { 697 - page = virt_to_page(xdr->tail[0].iov_base); 698 - page_base = offset_in_page(xdr->tail[0].iov_base); 699 - len = xdr->tail[0].iov_len; 700 - 701 - map_tail: 702 - sge_no++; 703 - sge[sge_no].addr = 704 - ib_dma_map_page(rdmab_device(rb), page, page_base, len, 705 - DMA_TO_DEVICE); 706 - if (ib_dma_mapping_error(rdmab_device(rb), sge[sge_no].addr)) 707 - goto out_mapping_err; 708 - sge[sge_no].length = len; 709 - sge[sge_no].lkey = rdmab_lkey(rb); 710 - sc->sc_unmap_count++; 711 - } 712 - 713 - out: 714 - req->rl_wr.num_sge += sge_no; 715 - if (sc->sc_unmap_count) 716 - kref_get(&req->rl_kref); 684 + sge->length = len; 685 + sge->lkey = rdmab_lkey(rb); 686 + ++sc->sc_unmap_count; 717 687 return true; 718 688 719 - out_regbuf: 720 - pr_err("rpcrdma: failed to DMA map a Send buffer\n"); 721 - return false; 722 - 723 - out_mapping_overflow: 724 - rpcrdma_sendctx_unmap(sc); 725 - pr_err("rpcrdma: too many Send SGEs (%u)\n", sge_no); 726 - return false; 727 - 728 689 out_mapping_err: 729 - rpcrdma_sendctx_unmap(sc); 730 - trace_xprtrdma_dma_maperr(sge[sge_no].addr); 690 + trace_xprtrdma_dma_maperr(sge->addr); 731 691 return false; 692 + } 693 + 694 + static bool rpcrdma_prepare_noch_mapped(struct rpcrdma_xprt *r_xprt, 695 + struct rpcrdma_req *req, 696 + struct xdr_buf *xdr) 697 + { 698 + struct kvec *tail = &xdr->tail[0]; 699 + 700 + if (!rpcrdma_prepare_head_iov(r_xprt, req, xdr->head[0].iov_len)) 701 + return false; 702 + if (xdr->page_len) 703 + if (!rpcrdma_prepare_pagelist(req, xdr)) 704 + return false; 705 + if (tail->iov_len) 706 + if (!rpcrdma_prepare_tail_iov(req, xdr, 707 + offset_in_page(tail->iov_base), 708 + tail->iov_len)) 709 + return false; 710 + 711 + if (req->rl_sendctx->sc_unmap_count) 712 + kref_get(&req->rl_kref); 713 + return true; 714 + } 715 + 716 + static bool rpcrdma_prepare_readch(struct rpcrdma_xprt *r_xprt, 717 + struct rpcrdma_req *req, 718 + struct xdr_buf *xdr) 719 + { 720 + if (!rpcrdma_prepare_head_iov(r_xprt, req, xdr->head[0].iov_len)) 721 + return false; 722 + 723 + /* If there is a Read chunk, the page list is being handled 724 + * via explicit RDMA, and thus is skipped here. 725 + */ 726 + 727 + /* Do not include the tail if it is only an XDR pad */ 728 + if (xdr->tail[0].iov_len > 3) { 729 + unsigned int page_base, len; 730 + 731 + /* If the content in the page list is an odd length, 732 + * xdr_write_pages() adds a pad at the beginning of 733 + * the tail iovec. Force the tail's non-pad content to 734 + * land at the next XDR position in the Send message. 735 + */ 736 + page_base = offset_in_page(xdr->tail[0].iov_base); 737 + len = xdr->tail[0].iov_len; 738 + page_base += len & 3; 739 + len -= len & 3; 740 + if (!rpcrdma_prepare_tail_iov(req, xdr, page_base, len)) 741 + return false; 742 + kref_get(&req->rl_kref); 743 + } 744 + 745 + return true; 732 746 } 733 747 734 748 /** ··· 755 741 * 756 742 * Returns 0 on success; otherwise a negative errno is returned. 757 743 */ 758 - int 759 - rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt, 760 - struct rpcrdma_req *req, u32 hdrlen, 761 - struct xdr_buf *xdr, enum rpcrdma_chunktype rtype) 744 + inline int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt, 745 + struct rpcrdma_req *req, u32 hdrlen, 746 + struct xdr_buf *xdr, 747 + enum rpcrdma_chunktype rtype) 762 748 { 763 749 int ret; 764 750 765 751 ret = -EAGAIN; 766 752 req->rl_sendctx = rpcrdma_sendctx_get_locked(r_xprt); 767 753 if (!req->rl_sendctx) 768 - goto err; 754 + goto out_nosc; 769 755 req->rl_sendctx->sc_unmap_count = 0; 770 756 req->rl_sendctx->sc_req = req; 771 757 kref_init(&req->rl_kref); ··· 776 762 777 763 ret = -EIO; 778 764 if (!rpcrdma_prepare_hdr_sge(r_xprt, req, hdrlen)) 779 - goto err; 780 - if (rtype != rpcrdma_areadch) 781 - if (!rpcrdma_prepare_msg_sges(r_xprt, req, xdr, rtype)) 782 - goto err; 765 + goto out_unmap; 766 + 767 + switch (rtype) { 768 + case rpcrdma_noch: 769 + if (!rpcrdma_prepare_noch_mapped(r_xprt, req, xdr)) 770 + goto out_unmap; 771 + break; 772 + case rpcrdma_readch: 773 + if (!rpcrdma_prepare_readch(r_xprt, req, xdr)) 774 + goto out_unmap; 775 + break; 776 + case rpcrdma_areadch: 777 + break; 778 + default: 779 + goto out_unmap; 780 + } 781 + 783 782 return 0; 784 783 785 - err: 784 + out_unmap: 785 + rpcrdma_sendctx_unmap(req->rl_sendctx); 786 + out_nosc: 786 787 trace_xprtrdma_prepsend_failed(&req->rl_slot, ret); 787 788 return ret; 788 789 }