Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

virtiofs: set up virtio_fs dax_device

Setup a dax device.

Use the shm capability to find the cache entry and map it.

The DAX window is accessed by the fs/dax.c infrastructure and must have
struct pages (at least on x86). Use devm_memremap_pages() to map the
DAX window PCI BAR and allocate struct page.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
Signed-off-by: Liu Bo <bo.liu@linux.alibaba.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>

authored by

Stefan Hajnoczi and committed by
Miklos Szeredi
22f3787e f4fd4ae3

+141
+138
fs/fuse/virtio_fs.c
··· 5 5 */ 6 6 7 7 #include <linux/fs.h> 8 + #include <linux/dax.h> 9 + #include <linux/pci.h> 10 + #include <linux/pfn_t.h> 8 11 #include <linux/module.h> 9 12 #include <linux/virtio.h> 10 13 #include <linux/virtio_fs.h> 11 14 #include <linux/delay.h> 12 15 #include <linux/fs_context.h> 13 16 #include <linux/highmem.h> 17 + #include <linux/uio.h> 14 18 #include "fuse_i.h" 15 19 16 20 /* List of virtio-fs device instances and a lock for the list. Also provides ··· 53 49 struct virtio_fs_vq *vqs; 54 50 unsigned int nvqs; /* number of virtqueues */ 55 51 unsigned int num_request_queues; /* number of request queues */ 52 + struct dax_device *dax_dev; 53 + 54 + /* DAX memory window where file contents are mapped */ 55 + void *window_kaddr; 56 + phys_addr_t window_phys_addr; 57 + size_t window_len; 56 58 }; 57 59 58 60 struct virtio_fs_forget_req { ··· 696 686 vdev->config->del_vqs(vdev); 697 687 } 698 688 689 + /* Map a window offset to a page frame number. The window offset will have 690 + * been produced by .iomap_begin(), which maps a file offset to a window 691 + * offset. 692 + */ 693 + static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, 694 + long nr_pages, void **kaddr, pfn_t *pfn) 695 + { 696 + struct virtio_fs *fs = dax_get_private(dax_dev); 697 + phys_addr_t offset = PFN_PHYS(pgoff); 698 + size_t max_nr_pages = fs->window_len/PAGE_SIZE - pgoff; 699 + 700 + if (kaddr) 701 + *kaddr = fs->window_kaddr + offset; 702 + if (pfn) 703 + *pfn = phys_to_pfn_t(fs->window_phys_addr + offset, 704 + PFN_DEV | PFN_MAP); 705 + return nr_pages > max_nr_pages ? max_nr_pages : nr_pages; 706 + } 707 + 708 + static size_t virtio_fs_copy_from_iter(struct dax_device *dax_dev, 709 + pgoff_t pgoff, void *addr, 710 + size_t bytes, struct iov_iter *i) 711 + { 712 + return copy_from_iter(addr, bytes, i); 713 + } 714 + 715 + static size_t virtio_fs_copy_to_iter(struct dax_device *dax_dev, 716 + pgoff_t pgoff, void *addr, 717 + size_t bytes, struct iov_iter *i) 718 + { 719 + return copy_to_iter(addr, bytes, i); 720 + } 721 + 722 + static int virtio_fs_zero_page_range(struct dax_device *dax_dev, 723 + pgoff_t pgoff, size_t nr_pages) 724 + { 725 + long rc; 726 + void *kaddr; 727 + 728 + rc = dax_direct_access(dax_dev, pgoff, nr_pages, &kaddr, NULL); 729 + if (rc < 0) 730 + return rc; 731 + memset(kaddr, 0, nr_pages << PAGE_SHIFT); 732 + dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT); 733 + return 0; 734 + } 735 + 736 + static const struct dax_operations virtio_fs_dax_ops = { 737 + .direct_access = virtio_fs_direct_access, 738 + .copy_from_iter = virtio_fs_copy_from_iter, 739 + .copy_to_iter = virtio_fs_copy_to_iter, 740 + .zero_page_range = virtio_fs_zero_page_range, 741 + }; 742 + 743 + static void virtio_fs_cleanup_dax(void *data) 744 + { 745 + struct dax_device *dax_dev = data; 746 + 747 + kill_dax(dax_dev); 748 + put_dax(dax_dev); 749 + } 750 + 751 + static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs) 752 + { 753 + struct virtio_shm_region cache_reg; 754 + struct dev_pagemap *pgmap; 755 + bool have_cache; 756 + 757 + if (!IS_ENABLED(CONFIG_FUSE_DAX)) 758 + return 0; 759 + 760 + /* Get cache region */ 761 + have_cache = virtio_get_shm_region(vdev, &cache_reg, 762 + (u8)VIRTIO_FS_SHMCAP_ID_CACHE); 763 + if (!have_cache) { 764 + dev_notice(&vdev->dev, "%s: No cache capability\n", __func__); 765 + return 0; 766 + } 767 + 768 + if (!devm_request_mem_region(&vdev->dev, cache_reg.addr, cache_reg.len, 769 + dev_name(&vdev->dev))) { 770 + dev_warn(&vdev->dev, "could not reserve region addr=0x%llx len=0x%llx\n", 771 + cache_reg.addr, cache_reg.len); 772 + return -EBUSY; 773 + } 774 + 775 + dev_notice(&vdev->dev, "Cache len: 0x%llx @ 0x%llx\n", cache_reg.len, 776 + cache_reg.addr); 777 + 778 + pgmap = devm_kzalloc(&vdev->dev, sizeof(*pgmap), GFP_KERNEL); 779 + if (!pgmap) 780 + return -ENOMEM; 781 + 782 + pgmap->type = MEMORY_DEVICE_FS_DAX; 783 + 784 + /* Ideally we would directly use the PCI BAR resource but 785 + * devm_memremap_pages() wants its own copy in pgmap. So 786 + * initialize a struct resource from scratch (only the start 787 + * and end fields will be used). 788 + */ 789 + pgmap->res = (struct resource){ 790 + .name = "virtio-fs dax window", 791 + .start = (phys_addr_t) cache_reg.addr, 792 + .end = (phys_addr_t) cache_reg.addr + cache_reg.len - 1, 793 + }; 794 + 795 + fs->window_kaddr = devm_memremap_pages(&vdev->dev, pgmap); 796 + if (IS_ERR(fs->window_kaddr)) 797 + return PTR_ERR(fs->window_kaddr); 798 + 799 + fs->window_phys_addr = (phys_addr_t) cache_reg.addr; 800 + fs->window_len = (phys_addr_t) cache_reg.len; 801 + 802 + dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n", 803 + __func__, fs->window_kaddr, cache_reg.addr, cache_reg.len); 804 + 805 + fs->dax_dev = alloc_dax(fs, NULL, &virtio_fs_dax_ops, 0); 806 + if (IS_ERR(fs->dax_dev)) 807 + return PTR_ERR(fs->dax_dev); 808 + 809 + return devm_add_action_or_reset(&vdev->dev, virtio_fs_cleanup_dax, 810 + fs->dax_dev); 811 + } 812 + 699 813 static int virtio_fs_probe(struct virtio_device *vdev) 700 814 { 701 815 struct virtio_fs *fs; ··· 840 706 goto out; 841 707 842 708 /* TODO vq affinity */ 709 + 710 + ret = virtio_fs_setup_dax(vdev, fs); 711 + if (ret < 0) 712 + goto out_vqs; 843 713 844 714 /* Bring the device online in case the filesystem is mounted and 845 715 * requests need to be sent before we return.
+3
include/uapi/linux/virtio_fs.h
··· 16 16 __le32 num_request_queues; 17 17 } __attribute__((packed)); 18 18 19 + /* For the id field in virtio_pci_shm_cap */ 20 + #define VIRTIO_FS_SHMCAP_ID_CACHE 0 21 + 19 22 #endif /* _UAPI_LINUX_VIRTIO_FS_H */