Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

cifs: Add a function to build an RDMA SGE list from an iterator

Add a function to add elements onto an RDMA SGE list representing page
fragments extracted from a BVEC-, KVEC- or XARRAY-type iterator and DMA
mapped until the maximum number of elements is reached.

Nothing is done to make sure the pages remain present - that must be done
by the caller.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Steve French <sfrench@samba.org>
cc: Shyam Prasad N <nspmangalore@gmail.com>
cc: Rohith Surabattula <rohiths.msft@gmail.com>
cc: Tom Talpey <tom@talpey.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: linux-cifs@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
cc: linux-rdma@vger.kernel.org

Link: https://lore.kernel.org/r/166697256704.61150.17388516338310645808.stgit@warthog.procyon.org.uk/ # rfc
Link: https://lore.kernel.org/r/166732028840.3186319.8512284239779728860.stgit@warthog.procyon.org.uk/ # rfc
Signed-off-by: Steve French <stfrench@microsoft.com>

authored by

David Howells and committed by
Steve French
e5fbdde4 01858469

+214
+214
fs/cifs/smbdirect.c
··· 44 44 static void destroy_mr_list(struct smbd_connection *info); 45 45 static int allocate_mr_list(struct smbd_connection *info); 46 46 47 + struct smb_extract_to_rdma { 48 + struct ib_sge *sge; 49 + unsigned int nr_sge; 50 + unsigned int max_sge; 51 + struct ib_device *device; 52 + u32 local_dma_lkey; 53 + enum dma_data_direction direction; 54 + }; 55 + static ssize_t smb_extract_iter_to_rdma(struct iov_iter *iter, size_t len, 56 + struct smb_extract_to_rdma *rdma); 57 + 47 58 /* SMBD version number */ 48 59 #define SMBD_V1 0x0100 49 60 ··· 2502 2491 wake_up(&info->wait_for_mr_cleanup); 2503 2492 2504 2493 return rc; 2494 + } 2495 + 2496 + static bool smb_set_sge(struct smb_extract_to_rdma *rdma, 2497 + struct page *lowest_page, size_t off, size_t len) 2498 + { 2499 + struct ib_sge *sge = &rdma->sge[rdma->nr_sge]; 2500 + u64 addr; 2501 + 2502 + addr = ib_dma_map_page(rdma->device, lowest_page, 2503 + off, len, rdma->direction); 2504 + if (ib_dma_mapping_error(rdma->device, addr)) 2505 + return false; 2506 + 2507 + sge->addr = addr; 2508 + sge->length = len; 2509 + sge->lkey = rdma->local_dma_lkey; 2510 + rdma->nr_sge++; 2511 + return true; 2512 + } 2513 + 2514 + /* 2515 + * Extract page fragments from a BVEC-class iterator and add them to an RDMA 2516 + * element list. The pages are not pinned. 2517 + */ 2518 + static ssize_t smb_extract_bvec_to_rdma(struct iov_iter *iter, 2519 + struct smb_extract_to_rdma *rdma, 2520 + ssize_t maxsize) 2521 + { 2522 + const struct bio_vec *bv = iter->bvec; 2523 + unsigned long start = iter->iov_offset; 2524 + unsigned int i; 2525 + ssize_t ret = 0; 2526 + 2527 + for (i = 0; i < iter->nr_segs; i++) { 2528 + size_t off, len; 2529 + 2530 + len = bv[i].bv_len; 2531 + if (start >= len) { 2532 + start -= len; 2533 + continue; 2534 + } 2535 + 2536 + len = min_t(size_t, maxsize, len - start); 2537 + off = bv[i].bv_offset + start; 2538 + 2539 + if (!smb_set_sge(rdma, bv[i].bv_page, off, len)) 2540 + return -EIO; 2541 + 2542 + ret += len; 2543 + maxsize -= len; 2544 + if (rdma->nr_sge >= rdma->max_sge || maxsize <= 0) 2545 + break; 2546 + start = 0; 2547 + } 2548 + 2549 + return ret; 2550 + } 2551 + 2552 + /* 2553 + * Extract fragments from a KVEC-class iterator and add them to an RDMA list. 2554 + * This can deal with vmalloc'd buffers as well as kmalloc'd or static buffers. 2555 + * The pages are not pinned. 2556 + */ 2557 + static ssize_t smb_extract_kvec_to_rdma(struct iov_iter *iter, 2558 + struct smb_extract_to_rdma *rdma, 2559 + ssize_t maxsize) 2560 + { 2561 + const struct kvec *kv = iter->kvec; 2562 + unsigned long start = iter->iov_offset; 2563 + unsigned int i; 2564 + ssize_t ret = 0; 2565 + 2566 + for (i = 0; i < iter->nr_segs; i++) { 2567 + struct page *page; 2568 + unsigned long kaddr; 2569 + size_t off, len, seg; 2570 + 2571 + len = kv[i].iov_len; 2572 + if (start >= len) { 2573 + start -= len; 2574 + continue; 2575 + } 2576 + 2577 + kaddr = (unsigned long)kv[i].iov_base + start; 2578 + off = kaddr & ~PAGE_MASK; 2579 + len = min_t(size_t, maxsize, len - start); 2580 + kaddr &= PAGE_MASK; 2581 + 2582 + maxsize -= len; 2583 + do { 2584 + seg = min_t(size_t, len, PAGE_SIZE - off); 2585 + 2586 + if (is_vmalloc_or_module_addr((void *)kaddr)) 2587 + page = vmalloc_to_page((void *)kaddr); 2588 + else 2589 + page = virt_to_page(kaddr); 2590 + 2591 + if (!smb_set_sge(rdma, page, off, seg)) 2592 + return -EIO; 2593 + 2594 + ret += seg; 2595 + len -= seg; 2596 + kaddr += PAGE_SIZE; 2597 + off = 0; 2598 + } while (len > 0 && rdma->nr_sge < rdma->max_sge); 2599 + 2600 + if (rdma->nr_sge >= rdma->max_sge || maxsize <= 0) 2601 + break; 2602 + start = 0; 2603 + } 2604 + 2605 + return ret; 2606 + } 2607 + 2608 + /* 2609 + * Extract folio fragments from an XARRAY-class iterator and add them to an 2610 + * RDMA list. The folios are not pinned. 2611 + */ 2612 + static ssize_t smb_extract_xarray_to_rdma(struct iov_iter *iter, 2613 + struct smb_extract_to_rdma *rdma, 2614 + ssize_t maxsize) 2615 + { 2616 + struct xarray *xa = iter->xarray; 2617 + struct folio *folio; 2618 + loff_t start = iter->xarray_start + iter->iov_offset; 2619 + pgoff_t index = start / PAGE_SIZE; 2620 + ssize_t ret = 0; 2621 + size_t off, len; 2622 + XA_STATE(xas, xa, index); 2623 + 2624 + rcu_read_lock(); 2625 + 2626 + xas_for_each(&xas, folio, ULONG_MAX) { 2627 + if (xas_retry(&xas, folio)) 2628 + continue; 2629 + if (WARN_ON(xa_is_value(folio))) 2630 + break; 2631 + if (WARN_ON(folio_test_hugetlb(folio))) 2632 + break; 2633 + 2634 + off = offset_in_folio(folio, start); 2635 + len = min_t(size_t, maxsize, folio_size(folio) - off); 2636 + 2637 + if (!smb_set_sge(rdma, folio_page(folio, 0), off, len)) { 2638 + rcu_read_unlock(); 2639 + return -EIO; 2640 + } 2641 + 2642 + maxsize -= len; 2643 + ret += len; 2644 + if (rdma->nr_sge >= rdma->max_sge || maxsize <= 0) 2645 + break; 2646 + } 2647 + 2648 + rcu_read_unlock(); 2649 + return ret; 2650 + } 2651 + 2652 + /* 2653 + * Extract page fragments from up to the given amount of the source iterator 2654 + * and build up an RDMA list that refers to all of those bits. The RDMA list 2655 + * is appended to, up to the maximum number of elements set in the parameter 2656 + * block. 2657 + * 2658 + * The extracted page fragments are not pinned or ref'd in any way; if an 2659 + * IOVEC/UBUF-type iterator is to be used, it should be converted to a 2660 + * BVEC-type iterator and the pages pinned, ref'd or otherwise held in some 2661 + * way. 2662 + */ 2663 + static ssize_t smb_extract_iter_to_rdma(struct iov_iter *iter, size_t len, 2664 + struct smb_extract_to_rdma *rdma) 2665 + { 2666 + ssize_t ret; 2667 + int before = rdma->nr_sge; 2668 + 2669 + switch (iov_iter_type(iter)) { 2670 + case ITER_BVEC: 2671 + ret = smb_extract_bvec_to_rdma(iter, rdma, len); 2672 + break; 2673 + case ITER_KVEC: 2674 + ret = smb_extract_kvec_to_rdma(iter, rdma, len); 2675 + break; 2676 + case ITER_XARRAY: 2677 + ret = smb_extract_xarray_to_rdma(iter, rdma, len); 2678 + break; 2679 + default: 2680 + WARN_ON_ONCE(1); 2681 + return -EIO; 2682 + } 2683 + 2684 + if (ret > 0) { 2685 + iov_iter_advance(iter, ret); 2686 + } else if (ret < 0) { 2687 + while (rdma->nr_sge > before) { 2688 + struct ib_sge *sge = &rdma->sge[rdma->nr_sge--]; 2689 + 2690 + ib_dma_unmap_single(rdma->device, sge->addr, sge->length, 2691 + rdma->direction); 2692 + sge->addr = 0; 2693 + } 2694 + } 2695 + 2696 + return ret; 2505 2697 }