Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

cifs: Build the RDMA SGE list directly from an iterator

In the depths of the cifs RDMA code, extract part of an iov iterator
directly into an SGE list without going through an intermediate
scatterlist.

Note that this doesn't support extraction from an IOBUF- or UBUF-type
iterator (ie. user-supplied buffer). The assumption is that the higher
layers will extract those to a BVEC-type iterator first and do whatever is
required to stop the pages from going away.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Steve French <sfrench@samba.org>
cc: Shyam Prasad N <nspmangalore@gmail.com>
cc: Rohith Surabattula <rohiths.msft@gmail.com>
cc: Tom Talpey <tom@talpey.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: linux-cifs@vger.kernel.org
cc: linux-rdma@vger.kernel.org

Link: https://lore.kernel.org/r/166697260361.61150.5064013393408112197.stgit@warthog.procyon.org.uk/ # rfc
Link: https://lore.kernel.org/r/166732032518.3186319.1859601819981624629.stgit@warthog.procyon.org.uk/ # rfc
Signed-off-by: Steve French <stfrench@microsoft.com>

authored by

David Howells and committed by
Steve French
3d78fe73 d08089f6

+63 -93
+62 -91
fs/cifs/smbdirect.c
··· 828 828 return rc; 829 829 } 830 830 831 - static int smbd_post_send_sgl(struct smbd_connection *info, 832 - struct scatterlist *sgl, int data_length, int remaining_data_length) 831 + static int smbd_post_send_iter(struct smbd_connection *info, 832 + struct iov_iter *iter, 833 + int *_remaining_data_length) 833 834 { 834 - int num_sgs; 835 835 int i, rc; 836 836 int header_length; 837 + int data_length; 837 838 struct smbd_request *request; 838 839 struct smbd_data_transfer *packet; 839 840 int new_credits; 840 - struct scatterlist *sg; 841 841 842 842 wait_credit: 843 843 /* Wait for send credits. A SMBD packet needs one credit */ ··· 881 881 } 882 882 883 883 request->info = info; 884 + memset(request->sge, 0, sizeof(request->sge)); 885 + 886 + /* Fill in the data payload to find out how much data we can add */ 887 + if (iter) { 888 + struct smb_extract_to_rdma extract = { 889 + .nr_sge = 1, 890 + .max_sge = SMBDIRECT_MAX_SEND_SGE, 891 + .sge = request->sge, 892 + .device = info->id->device, 893 + .local_dma_lkey = info->pd->local_dma_lkey, 894 + .direction = DMA_TO_DEVICE, 895 + }; 896 + 897 + rc = smb_extract_iter_to_rdma(iter, *_remaining_data_length, 898 + &extract); 899 + if (rc < 0) 900 + goto err_dma; 901 + data_length = rc; 902 + request->num_sge = extract.nr_sge; 903 + *_remaining_data_length -= data_length; 904 + } else { 905 + data_length = 0; 906 + request->num_sge = 1; 907 + } 884 908 885 909 /* Fill in the packet header */ 886 910 packet = smbd_request_payload(request); ··· 926 902 else 927 903 packet->data_offset = cpu_to_le32(24); 928 904 packet->data_length = cpu_to_le32(data_length); 929 - packet->remaining_data_length = cpu_to_le32(remaining_data_length); 905 + packet->remaining_data_length = cpu_to_le32(*_remaining_data_length); 930 906 packet->padding = 0; 931 907 932 908 log_outgoing(INFO, "credits_requested=%d credits_granted=%d data_offset=%d data_length=%d remaining_data_length=%d\n", ··· 942 918 if (!data_length) 943 919 header_length = offsetof(struct smbd_data_transfer, padding); 944 920 945 - request->num_sge = 1; 946 921 request->sge[0].addr = ib_dma_map_single(info->id->device, 947 922 (void *)packet, 948 923 header_length, ··· 954 931 955 932 request->sge[0].length = header_length; 956 933 request->sge[0].lkey = info->pd->local_dma_lkey; 957 - 958 - /* Fill in the packet data payload */ 959 - num_sgs = sgl ? sg_nents(sgl) : 0; 960 - for_each_sg(sgl, sg, num_sgs, i) { 961 - request->sge[i+1].addr = 962 - ib_dma_map_page(info->id->device, sg_page(sg), 963 - sg->offset, sg->length, DMA_TO_DEVICE); 964 - if (ib_dma_mapping_error( 965 - info->id->device, request->sge[i+1].addr)) { 966 - rc = -EIO; 967 - request->sge[i+1].addr = 0; 968 - goto err_dma; 969 - } 970 - request->sge[i+1].length = sg->length; 971 - request->sge[i+1].lkey = info->pd->local_dma_lkey; 972 - request->num_sge++; 973 - } 974 934 975 935 rc = smbd_post_send(info, request); 976 936 if (!rc) ··· 993 987 */ 994 988 static int smbd_post_send_empty(struct smbd_connection *info) 995 989 { 990 + int remaining_data_length = 0; 991 + 996 992 info->count_send_empty++; 997 - return smbd_post_send_sgl(info, NULL, 0, 0); 993 + return smbd_post_send_iter(info, NULL, &remaining_data_length); 998 994 } 999 995 1000 996 /* ··· 1943 1935 } 1944 1936 1945 1937 /* 1946 - * Send the contents of an iterator 1947 - * @iter: The iterator to send 1948 - * @_remaining_data_length: remaining data to send in this payload 1949 - */ 1950 - static int smbd_post_send_iter(struct smbd_connection *info, 1951 - struct iov_iter *iter, 1952 - int *_remaining_data_length) 1953 - { 1954 - struct scatterlist sgl[SMBDIRECT_MAX_SEND_SGE - 1]; 1955 - unsigned int max_payload = info->max_send_size - sizeof(struct smbd_data_transfer); 1956 - ssize_t rc; 1957 - 1958 - /* We're not expecting a user-backed iter */ 1959 - WARN_ON(iov_iter_extract_will_pin(iter)); 1960 - 1961 - do { 1962 - struct sg_table sgtable = { .sgl = sgl }; 1963 - size_t maxlen = min_t(size_t, *_remaining_data_length, max_payload); 1964 - 1965 - sg_init_table(sgtable.sgl, ARRAY_SIZE(sgl)); 1966 - rc = netfs_extract_iter_to_sg(iter, maxlen, 1967 - &sgtable, ARRAY_SIZE(sgl), 0); 1968 - if (rc < 0) 1969 - break; 1970 - if (WARN_ON_ONCE(sgtable.nents == 0)) 1971 - return -EIO; 1972 - 1973 - sg_mark_end(&sgl[sgtable.nents - 1]); 1974 - *_remaining_data_length -= rc; 1975 - rc = smbd_post_send_sgl(info, sgl, rc, *_remaining_data_length); 1976 - } while (rc == 0 && iov_iter_count(iter) > 0); 1977 - 1978 - return rc; 1979 - } 1980 - 1981 - /* 1982 1938 * Send data to transport 1983 1939 * Each rqst is transported as a SMBDirect payload 1984 1940 * rqst: the data to write ··· 2102 2130 cancel_work_sync(&info->mr_recovery_work); 2103 2131 list_for_each_entry_safe(mr, tmp, &info->mr_list, list) { 2104 2132 if (mr->state == MR_INVALIDATED) 2105 - ib_dma_unmap_sg(info->id->device, mr->sgl, 2106 - mr->sgl_count, mr->dir); 2133 + ib_dma_unmap_sg(info->id->device, mr->sgt.sgl, 2134 + mr->sgt.nents, mr->dir); 2107 2135 ib_dereg_mr(mr->mr); 2108 - kfree(mr->sgl); 2136 + kfree(mr->sgt.sgl); 2109 2137 kfree(mr); 2110 2138 } 2111 2139 } ··· 2141 2169 info->mr_type, info->max_frmr_depth); 2142 2170 goto out; 2143 2171 } 2144 - smbdirect_mr->sgl = kcalloc( 2145 - info->max_frmr_depth, 2146 - sizeof(struct scatterlist), 2147 - GFP_KERNEL); 2148 - if (!smbdirect_mr->sgl) { 2172 + smbdirect_mr->sgt.sgl = kcalloc(info->max_frmr_depth, 2173 + sizeof(struct scatterlist), 2174 + GFP_KERNEL); 2175 + if (!smbdirect_mr->sgt.sgl) { 2149 2176 log_rdma_mr(ERR, "failed to allocate sgl\n"); 2150 2177 ib_dereg_mr(smbdirect_mr->mr); 2151 2178 goto out; ··· 2163 2192 list_for_each_entry_safe(smbdirect_mr, tmp, &info->mr_list, list) { 2164 2193 list_del(&smbdirect_mr->list); 2165 2194 ib_dereg_mr(smbdirect_mr->mr); 2166 - kfree(smbdirect_mr->sgl); 2195 + kfree(smbdirect_mr->sgt.sgl); 2167 2196 kfree(smbdirect_mr); 2168 2197 } 2169 2198 return -ENOMEM; ··· 2217 2246 2218 2247 /* 2219 2248 * Transcribe the pages from an iterator into an MR scatterlist. 2220 - * @iter: The iterator to transcribe 2221 - * @_remaining_data_length: remaining data to send in this payload 2222 2249 */ 2223 2250 static int smbd_iter_to_mr(struct smbd_connection *info, 2224 2251 struct iov_iter *iter, 2225 - struct scatterlist *sgl, 2226 - unsigned int num_pages) 2252 + struct sg_table *sgt, 2253 + unsigned int max_sg) 2227 2254 { 2228 - struct sg_table sgtable = { .sgl = sgl }; 2229 2255 int ret; 2230 2256 2231 - sg_init_table(sgl, num_pages); 2257 + memset(sgt->sgl, 0, max_sg * sizeof(struct scatterlist)); 2232 2258 2233 - ret = netfs_extract_iter_to_sg(iter, iov_iter_count(iter), 2234 - &sgtable, num_pages, 0); 2259 + ret = netfs_extract_iter_to_sg(iter, iov_iter_count(iter), sgt, max_sg, 0); 2235 2260 WARN_ON(ret < 0); 2261 + if (sgt->nents > 0) 2262 + sg_mark_end(&sgt->sgl[sgt->nents - 1]); 2236 2263 return ret; 2237 2264 } 2238 2265 ··· 2267 2298 dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; 2268 2299 smbdirect_mr->dir = dir; 2269 2300 smbdirect_mr->need_invalidate = need_invalidate; 2270 - smbdirect_mr->sgl_count = num_pages; 2301 + smbdirect_mr->sgt.nents = 0; 2302 + smbdirect_mr->sgt.orig_nents = 0; 2271 2303 2272 - log_rdma_mr(INFO, "num_pages=0x%x count=0x%zx\n", 2273 - num_pages, iov_iter_count(iter)); 2274 - smbd_iter_to_mr(info, iter, smbdirect_mr->sgl, num_pages); 2304 + log_rdma_mr(INFO, "num_pages=0x%x count=0x%zx depth=%u\n", 2305 + num_pages, iov_iter_count(iter), info->max_frmr_depth); 2306 + smbd_iter_to_mr(info, iter, &smbdirect_mr->sgt, info->max_frmr_depth); 2275 2307 2276 - rc = ib_dma_map_sg(info->id->device, smbdirect_mr->sgl, num_pages, dir); 2308 + rc = ib_dma_map_sg(info->id->device, smbdirect_mr->sgt.sgl, 2309 + smbdirect_mr->sgt.nents, dir); 2277 2310 if (!rc) { 2278 2311 log_rdma_mr(ERR, "ib_dma_map_sg num_pages=%x dir=%x rc=%x\n", 2279 2312 num_pages, dir, rc); 2280 2313 goto dma_map_error; 2281 2314 } 2282 2315 2283 - rc = ib_map_mr_sg(smbdirect_mr->mr, smbdirect_mr->sgl, num_pages, 2284 - NULL, PAGE_SIZE); 2285 - if (rc != num_pages) { 2316 + rc = ib_map_mr_sg(smbdirect_mr->mr, smbdirect_mr->sgt.sgl, 2317 + smbdirect_mr->sgt.nents, NULL, PAGE_SIZE); 2318 + if (rc != smbdirect_mr->sgt.nents) { 2286 2319 log_rdma_mr(ERR, 2287 - "ib_map_mr_sg failed rc = %d num_pages = %x\n", 2288 - rc, num_pages); 2320 + "ib_map_mr_sg failed rc = %d nents = %x\n", 2321 + rc, smbdirect_mr->sgt.nents); 2289 2322 goto map_mr_error; 2290 2323 } 2291 2324 ··· 2319 2348 2320 2349 /* If all failed, attempt to recover this MR by setting it MR_ERROR*/ 2321 2350 map_mr_error: 2322 - ib_dma_unmap_sg(info->id->device, smbdirect_mr->sgl, 2323 - smbdirect_mr->sgl_count, smbdirect_mr->dir); 2351 + ib_dma_unmap_sg(info->id->device, smbdirect_mr->sgt.sgl, 2352 + smbdirect_mr->sgt.nents, smbdirect_mr->dir); 2324 2353 2325 2354 dma_map_error: 2326 2355 smbdirect_mr->state = MR_ERROR; ··· 2387 2416 2388 2417 if (smbdirect_mr->state == MR_INVALIDATED) { 2389 2418 ib_dma_unmap_sg( 2390 - info->id->device, smbdirect_mr->sgl, 2391 - smbdirect_mr->sgl_count, 2419 + info->id->device, smbdirect_mr->sgt.sgl, 2420 + smbdirect_mr->sgt.nents, 2392 2421 smbdirect_mr->dir); 2393 2422 smbdirect_mr->state = MR_READY; 2394 2423 if (atomic_inc_return(&info->mr_ready_count) == 1)
+1 -2
fs/cifs/smbdirect.h
··· 288 288 struct list_head list; 289 289 enum mr_state state; 290 290 struct ib_mr *mr; 291 - struct scatterlist *sgl; 292 - int sgl_count; 291 + struct sg_table sgt; 293 292 enum dma_data_direction dir; 294 293 union { 295 294 struct ib_reg_wr wr;