Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

IB/core: Enhance ib_map_mr_sg()

The SRP initiator allows to set max_sectors to a value that exceeds
the largest amount of data that can be mapped at once with an mlx4
HCA using fast registration and a page size of 4 KB. Hence modify
ib_map_mr_sg() such that it can map partial sg-elements. If an
sg-element has been mapped partially, let the caller know
which fraction has been mapped by adjusting *sg_offset.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Tested-by: Laurence Oberman <loberman@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Doug Ledford <dledford@redhat.com>

authored by

Bart Van Assche and committed by
Doug Ledford
9aa8b321 f83b2561

+47 -27
+1 -1
drivers/infiniband/core/rw.c
··· 92 92 reg->inv_wr.next = NULL; 93 93 } 94 94 95 - ret = ib_map_mr_sg(reg->mr, sg, nents, offset, PAGE_SIZE); 95 + ret = ib_map_mr_sg(reg->mr, sg, nents, &offset, PAGE_SIZE); 96 96 if (ret < nents) { 97 97 ib_mr_pool_put(qp, &qp->rdma_mrs, reg->mr); 98 98 return -EINVAL;
+21 -5
drivers/infiniband/core/verbs.c
··· 1655 1655 * is ready for registration. 1656 1656 */ 1657 1657 int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, 1658 - unsigned int sg_offset, unsigned int page_size) 1658 + unsigned int *sg_offset, unsigned int page_size) 1659 1659 { 1660 1660 if (unlikely(!mr->device->map_mr_sg)) 1661 1661 return -ENOSYS; ··· 1672 1672 * @mr: memory region 1673 1673 * @sgl: dma mapped scatterlist 1674 1674 * @sg_nents: number of entries in sg 1675 - * @sg_offset: offset in bytes into sg 1675 + * @sg_offset_p: IN: start offset in bytes into sg 1676 + * OUT: offset in bytes for element n of the sg of the first 1677 + * byte that has not been processed where n is the return 1678 + * value of this function. 1676 1679 * @set_page: driver page assignment function pointer 1677 1680 * 1678 1681 * Core service helper for drivers to convert the largest ··· 1687 1684 * a page vector. 1688 1685 */ 1689 1686 int ib_sg_to_pages(struct ib_mr *mr, struct scatterlist *sgl, int sg_nents, 1690 - unsigned int sg_offset, int (*set_page)(struct ib_mr *, u64)) 1687 + unsigned int *sg_offset_p, int (*set_page)(struct ib_mr *, u64)) 1691 1688 { 1692 1689 struct scatterlist *sg; 1693 1690 u64 last_end_dma_addr = 0; 1691 + unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; 1694 1692 unsigned int last_page_off = 0; 1695 1693 u64 page_mask = ~((u64)mr->page_size - 1); 1696 1694 int i, ret; 1695 + 1696 + if (unlikely(sg_nents <= 0 || sg_offset > sg_dma_len(&sgl[0]))) 1697 + return -EINVAL; 1697 1698 1698 1699 mr->iova = sg_dma_address(&sgl[0]) + sg_offset; 1699 1700 mr->length = 0; 1700 1701 1701 1702 for_each_sg(sgl, sg, sg_nents, i) { 1702 1703 u64 dma_addr = sg_dma_address(sg) + sg_offset; 1704 + u64 prev_addr = dma_addr; 1703 1705 unsigned int dma_len = sg_dma_len(sg) - sg_offset; 1704 1706 u64 end_dma_addr = dma_addr + dma_len; 1705 1707 u64 page_addr = dma_addr & page_mask; ··· 1729 1721 1730 1722 do { 1731 1723 ret = set_page(mr, page_addr); 1732 - if (unlikely(ret < 0)) 1733 - return i ? : ret; 1724 + if (unlikely(ret < 0)) { 1725 + sg_offset = prev_addr - sg_dma_address(sg); 1726 + mr->length += prev_addr - dma_addr; 1727 + if (sg_offset_p) 1728 + *sg_offset_p = sg_offset; 1729 + return i || sg_offset ? i : ret; 1730 + } 1731 + prev_addr = page_addr; 1734 1732 next_page: 1735 1733 page_addr += mr->page_size; 1736 1734 } while (page_addr < end_dma_addr); ··· 1748 1734 sg_offset = 0; 1749 1735 } 1750 1736 1737 + if (sg_offset_p) 1738 + *sg_offset_p = 0; 1751 1739 return i; 1752 1740 } 1753 1741 EXPORT_SYMBOL(ib_sg_to_pages);
+1 -1
drivers/infiniband/hw/cxgb3/iwch_provider.c
··· 784 784 } 785 785 786 786 static int iwch_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, 787 - int sg_nents, unsigned sg_offset) 787 + int sg_nents, unsigned int *sg_offset) 788 788 { 789 789 struct iwch_mr *mhp = to_iwch_mr(ibmr); 790 790
+1 -1
drivers/infiniband/hw/cxgb4/iw_cxgb4.h
··· 918 918 enum ib_mr_type mr_type, 919 919 u32 max_num_sg); 920 920 int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 921 - unsigned int sg_offset); 921 + unsigned int *sg_offset); 922 922 int c4iw_dealloc_mw(struct ib_mw *mw); 923 923 struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, 924 924 struct ib_udata *udata);
+1 -1
drivers/infiniband/hw/cxgb4/mem.c
··· 691 691 } 692 692 693 693 int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 694 - unsigned int sg_offset) 694 + unsigned int *sg_offset) 695 695 { 696 696 struct c4iw_mr *mhp = to_c4iw_mr(ibmr); 697 697
+1 -1
drivers/infiniband/hw/i40iw/i40iw_verbs.c
··· 1574 1574 * @sg_nents: number of sg pages 1575 1575 */ 1576 1576 static int i40iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, 1577 - int sg_nents, unsigned int sg_offset) 1577 + int sg_nents, unsigned int *sg_offset) 1578 1578 { 1579 1579 struct i40iw_mr *iwmr = to_iwmr(ibmr); 1580 1580
+1 -1
drivers/infiniband/hw/mlx4/mlx4_ib.h
··· 718 718 enum ib_mr_type mr_type, 719 719 u32 max_num_sg); 720 720 int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 721 - unsigned int sg_offset); 721 + unsigned int *sg_offset); 722 722 int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); 723 723 int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata); 724 724 struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
+1 -1
drivers/infiniband/hw/mlx4/mr.c
··· 529 529 } 530 530 531 531 int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 532 - unsigned int sg_offset) 532 + unsigned int *sg_offset) 533 533 { 534 534 struct mlx4_ib_mr *mr = to_mmr(ibmr); 535 535 int rc;
+1 -1
drivers/infiniband/hw/mlx5/mlx5_ib.h
··· 713 713 enum ib_mr_type mr_type, 714 714 u32 max_num_sg); 715 715 int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 716 - unsigned int sg_offset); 716 + unsigned int *sg_offset); 717 717 int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, 718 718 const struct ib_wc *in_wc, const struct ib_grh *in_grh, 719 719 const struct ib_mad_hdr *in, size_t in_mad_size,
+6 -2
drivers/infiniband/hw/mlx5/mr.c
··· 1752 1752 mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, 1753 1753 struct scatterlist *sgl, 1754 1754 unsigned short sg_nents, 1755 - unsigned int sg_offset) 1755 + unsigned int *sg_offset_p) 1756 1756 { 1757 1757 struct scatterlist *sg = sgl; 1758 1758 struct mlx5_klm *klms = mr->descs; 1759 + unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; 1759 1760 u32 lkey = mr->ibmr.pd->local_dma_lkey; 1760 1761 int i; 1761 1762 ··· 1774 1773 1775 1774 sg_offset = 0; 1776 1775 } 1776 + 1777 + if (sg_offset_p) 1778 + *sg_offset_p = sg_offset; 1777 1779 1778 1780 return i; 1779 1781 } ··· 1796 1792 } 1797 1793 1798 1794 int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 1799 - unsigned int sg_offset) 1795 + unsigned int *sg_offset) 1800 1796 { 1801 1797 struct mlx5_ib_mr *mr = to_mmr(ibmr); 1802 1798 int n;
+1 -1
drivers/infiniband/hw/nes/nes_verbs.c
··· 403 403 } 404 404 405 405 static int nes_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, 406 - int sg_nents, unsigned int sg_offset) 406 + int sg_nents, unsigned int *sg_offset) 407 407 { 408 408 struct nes_mr *nesmr = to_nesmr(ibmr); 409 409
+1 -1
drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
··· 3082 3082 } 3083 3083 3084 3084 int ocrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 3085 - unsigned int sg_offset) 3085 + unsigned int *sg_offset) 3086 3086 { 3087 3087 struct ocrdma_mr *mr = get_ocrdma_mr(ibmr); 3088 3088
+1 -1
drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
··· 123 123 enum ib_mr_type mr_type, 124 124 u32 max_num_sg); 125 125 int ocrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 126 - unsigned sg_offset); 126 + unsigned int *sg_offset); 127 127 128 128 #endif /* __OCRDMA_VERBS_H__ */
+2 -2
drivers/infiniband/ulp/iser/iser_memory.c
··· 236 236 page_vec->npages = 0; 237 237 page_vec->fake_mr.page_size = SIZE_4K; 238 238 plen = ib_sg_to_pages(&page_vec->fake_mr, mem->sg, 239 - mem->size, 0, iser_set_page); 239 + mem->size, NULL, iser_set_page); 240 240 if (unlikely(plen < mem->size)) { 241 241 iser_err("page vec too short to hold this SG\n"); 242 242 iser_data_buf_dump(mem, device->ib_device); ··· 446 446 447 447 ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey)); 448 448 449 - n = ib_map_mr_sg(mr, mem->sg, mem->size, 0, SIZE_4K); 449 + n = ib_map_mr_sg(mr, mem->sg, mem->size, NULL, SIZE_4K); 450 450 if (unlikely(n != mem->size)) { 451 451 iser_err("failed to map sg (%d/%d)\n", 452 452 n, mem->size);
+1 -1
drivers/infiniband/ulp/srp/ib_srp.c
··· 1329 1329 rkey = ib_inc_rkey(desc->mr->rkey); 1330 1330 ib_update_fast_reg_key(desc->mr, rkey); 1331 1331 1332 - n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, 0, dev->mr_page_size); 1332 + n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, NULL, dev->mr_page_size); 1333 1333 if (unlikely(n < 0)) { 1334 1334 srp_fr_pool_put(ch->fr_pool, &desc, 1); 1335 1335 pr_debug("%s: ib_map_mr_sg(%d) returned %d.\n",
+4 -4
include/rdma/ib_verbs.h
··· 1848 1848 int (*map_mr_sg)(struct ib_mr *mr, 1849 1849 struct scatterlist *sg, 1850 1850 int sg_nents, 1851 - unsigned sg_offset); 1851 + unsigned int *sg_offset); 1852 1852 struct ib_mw * (*alloc_mw)(struct ib_pd *pd, 1853 1853 enum ib_mw_type type, 1854 1854 struct ib_udata *udata); ··· 3145 3145 const struct sockaddr *addr); 3146 3146 3147 3147 int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, 3148 - unsigned int sg_offset, unsigned int page_size); 3148 + unsigned int *sg_offset, unsigned int page_size); 3149 3149 3150 3150 static inline int 3151 3151 ib_map_mr_sg_zbva(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, 3152 - unsigned int sg_offset, unsigned int page_size) 3152 + unsigned int *sg_offset, unsigned int page_size) 3153 3153 { 3154 3154 int n; 3155 3155 ··· 3160 3160 } 3161 3161 3162 3162 int ib_sg_to_pages(struct ib_mr *mr, struct scatterlist *sgl, int sg_nents, 3163 - unsigned int sg_offset, int (*set_page)(struct ib_mr *, u64)); 3163 + unsigned int *sg_offset, int (*set_page)(struct ib_mr *, u64)); 3164 3164 3165 3165 void ib_drain_rq(struct ib_qp *qp); 3166 3166 void ib_drain_sq(struct ib_qp *qp);
+1 -1
net/sunrpc/xprtrdma/frwr_ops.c
··· 421 421 return -ENOMEM; 422 422 } 423 423 424 - n = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, 0, PAGE_SIZE); 424 + n = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, NULL, PAGE_SIZE); 425 425 if (unlikely(n != frmr->sg_nents)) { 426 426 pr_err("RPC: %s: failed to map mr %p (%u/%u)\n", 427 427 __func__, frmr->fr_mr, n, frmr->sg_nents);
+1 -1
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
··· 281 281 } 282 282 atomic_inc(&xprt->sc_dma_used); 283 283 284 - n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, 0, PAGE_SIZE); 284 + n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, NULL, PAGE_SIZE); 285 285 if (unlikely(n != frmr->sg_nents)) { 286 286 pr_err("svcrdma: failed to map mr %p (%d/%d elements)\n", 287 287 frmr->mr, n, frmr->sg_nents);