Merge tag 'fuse-update-5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse

Pull fuse updates from Miklos Szeredi:

- Support directly accessing host page cache from virtiofs. This can
improve I/O performance for various workloads, as well as reducing
the memory requirement by eliminating double caching. Thanks to Vivek
Goyal for doing most of the work on this.

- Allow automatic submounting inside virtiofs. This allows unique
st_dev/ st_ino values to be assigned inside the guest to files
residing on different filesystems on the host. Thanks to Max Reitz
for the patches.

- Fix an old use after free bug found by Pradeep P V K.

* tag 'fuse-update-5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse: (25 commits)
virtiofs: calculate number of scatter-gather elements accurately
fuse: connection remove fix
fuse: implement crossmounts
fuse: Allow fuse_fill_super_common() for submounts
fuse: split fuse_mount off of fuse_conn
fuse: drop fuse_conn parameter where possible
fuse: store fuse_conn in fuse_req
fuse: add submount support to <uapi/linux/fuse.h>
fuse: fix page dereference after free
virtiofs: add logic to free up a memory range
virtiofs: maintain a list of busy elements
virtiofs: serialize truncate/punch_hole and dax fault path
virtiofs: define dax address space operations
virtiofs: add DAX mmap support
virtiofs: implement dax read/write operations
virtiofs: introduce setupmapping/removemapping commands
virtiofs: implement FUSE_INIT map_alignment field
virtiofs: keep a list of free dax memory ranges
virtiofs: add a mount option to enable dax
virtiofs: set up virtio_fs dax_device
...

+2690 -497
+1 -1
Documentation/filesystems/fuse.rst
··· 47 47 using the sftp protocol. 48 48 49 49 The userspace library and utilities are available from the 50 - `FUSE homepage: <http://fuse.sourceforge.net/>`_ 50 + `FUSE homepage: <https://github.com/libfuse/>`_ 51 51 52 52 Filesystem type 53 53 ===============
+1 -1
MAINTAINERS
··· 7238 7238 M: Miklos Szeredi <miklos@szeredi.hu> 7239 7239 L: linux-fsdevel@vger.kernel.org 7240 7240 S: Maintained 7241 - W: http://fuse.sourceforge.net/ 7241 + W: https://github.com/libfuse/ 7242 7242 T: git git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse.git 7243 7243 F: Documentation/filesystems/fuse.rst 7244 7244 F: fs/fuse/
+2 -1
drivers/dax/super.c
··· 46 46 int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size, 47 47 pgoff_t *pgoff) 48 48 { 49 - phys_addr_t phys_off = (get_start_sect(bdev) + sector) * 512; 49 + sector_t start_sect = bdev ? get_start_sect(bdev) : 0; 50 + phys_addr_t phys_off = (start_sect + sector) * 512; 50 51 51 52 if (pgoff) 52 53 *pgoff = PHYS_PFN(phys_off);
+23 -6
fs/dax.c
··· 559 559 } 560 560 561 561 /** 562 - * dax_layout_busy_page - find first pinned page in @mapping 562 + * dax_layout_busy_page_range - find first pinned page in @mapping 563 563 * @mapping: address space to scan for a page with ref count > 1 564 + * @start: Starting offset. Page containing 'start' is included. 565 + * @end: End offset. Page containing 'end' is included. If 'end' is LLONG_MAX, 566 + * pages from 'start' till the end of file are included. 564 567 * 565 568 * DAX requires ZONE_DEVICE mapped pages. These pages are never 566 569 * 'onlined' to the page allocator so they are considered idle when ··· 576 573 * to be able to run unmap_mapping_range() and subsequently not race 577 574 * mapping_mapped() becoming true. 578 575 */ 579 - struct page *dax_layout_busy_page(struct address_space *mapping) 576 + struct page *dax_layout_busy_page_range(struct address_space *mapping, 577 + loff_t start, loff_t end) 580 578 { 581 - XA_STATE(xas, &mapping->i_pages, 0); 582 579 void *entry; 583 580 unsigned int scanned = 0; 584 581 struct page *page = NULL; 582 + pgoff_t start_idx = start >> PAGE_SHIFT; 583 + pgoff_t end_idx; 584 + XA_STATE(xas, &mapping->i_pages, start_idx); 585 585 586 586 /* 587 587 * In the 'limited' case get_user_pages() for dax is disabled. ··· 595 589 if (!dax_mapping(mapping) || !mapping_mapped(mapping)) 596 590 return NULL; 597 591 592 + /* If end == LLONG_MAX, all pages from start to till end of file */ 593 + if (end == LLONG_MAX) 594 + end_idx = ULONG_MAX; 595 + else 596 + end_idx = end >> PAGE_SHIFT; 598 597 /* 599 598 * If we race get_user_pages_fast() here either we'll see the 600 599 * elevated page count in the iteration and wait, or ··· 607 596 * against is no longer mapped in the page tables and bail to the 608 597 * get_user_pages() slow path. The slow path is protected by 609 598 * pte_lock() and pmd_lock(). New references are not taken without 610 - * holding those locks, and unmap_mapping_range() will not zero the 599 + * holding those locks, and unmap_mapping_pages() will not zero the 611 600 * pte or pmd without holding the respective lock, so we are 612 601 * guaranteed to either see new references or prevent new 613 602 * references from being established. 614 603 */ 615 - unmap_mapping_range(mapping, 0, 0, 0); 604 + unmap_mapping_pages(mapping, start_idx, end_idx - start_idx + 1, 0); 616 605 617 606 xas_lock_irq(&xas); 618 - xas_for_each(&xas, entry, ULONG_MAX) { 607 + xas_for_each(&xas, entry, end_idx) { 619 608 if (WARN_ON_ONCE(!xa_is_value(entry))) 620 609 continue; 621 610 if (unlikely(dax_is_locked(entry))) ··· 635 624 } 636 625 xas_unlock_irq(&xas); 637 626 return page; 627 + } 628 + EXPORT_SYMBOL_GPL(dax_layout_busy_page_range); 629 + 630 + struct page *dax_layout_busy_page(struct address_space *mapping) 631 + { 632 + return dax_layout_busy_page_range(mapping, 0, LLONG_MAX); 638 633 } 639 634 EXPORT_SYMBOL_GPL(dax_layout_busy_page); 640 635
+15 -1
fs/fuse/Kconfig
··· 8 8 9 9 There's also a companion library: libfuse2. This library is available 10 10 from the FUSE homepage: 11 - <http://fuse.sourceforge.net/> 11 + <https://github.com/libfuse/> 12 12 although chances are your distribution already has that library 13 13 installed if you've installed the "fuse" package itself. 14 14 ··· 38 38 39 39 If you want to share files between guests or with the host, answer Y 40 40 or M. 41 + 42 + config FUSE_DAX 43 + bool "Virtio Filesystem Direct Host Memory Access support" 44 + default y 45 + select INTERVAL_TREE 46 + depends on VIRTIO_FS 47 + depends on FS_DAX 48 + depends on DAX_DRIVER 49 + help 50 + This allows bypassing guest page cache and allows mapping host page 51 + cache directly in guest address space. 52 + 53 + If you want to allow mounting a Virtio Filesystem with the "dax" 54 + option, answer Y.
+4 -2
fs/fuse/Makefile
··· 7 7 obj-$(CONFIG_CUSE) += cuse.o 8 8 obj-$(CONFIG_VIRTIO_FS) += virtiofs.o 9 9 10 - fuse-objs := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o 11 - virtiofs-y += virtio_fs.o 10 + fuse-y := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o 11 + fuse-$(CONFIG_FUSE_DAX) += dax.o 12 + 13 + virtiofs-y := virtio_fs.o
+15 -5
fs/fuse/control.c
··· 164 164 { 165 165 unsigned val; 166 166 struct fuse_conn *fc; 167 + struct fuse_mount *fm; 167 168 ssize_t ret; 168 169 169 170 ret = fuse_conn_limit_write(file, buf, count, ppos, &val, ··· 175 174 if (!fc) 176 175 goto out; 177 176 177 + down_read(&fc->killsb); 178 178 spin_lock(&fc->bg_lock); 179 179 fc->congestion_threshold = val; 180 - if (fc->sb) { 180 + 181 + /* 182 + * Get any fuse_mount belonging to this fuse_conn; s_bdi is 183 + * shared between all of them 184 + */ 185 + 186 + if (!list_empty(&fc->mounts)) { 187 + fm = list_first_entry(&fc->mounts, struct fuse_mount, fc_entry); 181 188 if (fc->num_background < fc->congestion_threshold) { 182 - clear_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC); 183 - clear_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC); 189 + clear_bdi_congested(fm->sb->s_bdi, BLK_RW_SYNC); 190 + clear_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC); 184 191 } else { 185 - set_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC); 186 - set_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC); 192 + set_bdi_congested(fm->sb->s_bdi, BLK_RW_SYNC); 193 + set_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC); 187 194 } 188 195 } 189 196 spin_unlock(&fc->bg_lock); 197 + up_read(&fc->killsb); 190 198 fuse_conn_put(fc); 191 199 out: 192 200 return ret;
+12 -9
fs/fuse/cuse.c
··· 57 57 58 58 struct cuse_conn { 59 59 struct list_head list; /* linked on cuse_conntbl */ 60 + struct fuse_mount fm; /* Dummy mount referencing fc */ 60 61 struct fuse_conn fc; /* fuse connection */ 61 62 struct cdev *cdev; /* associated character device */ 62 63 struct device *dev; /* device representing @cdev */ ··· 135 134 * Generic permission check is already done against the chrdev 136 135 * file, proceed to open. 137 136 */ 138 - rc = fuse_do_open(&cc->fc, 0, file, 0); 137 + rc = fuse_do_open(&cc->fm, 0, file, 0); 139 138 if (rc) 140 139 fuse_conn_put(&cc->fc); 141 140 return rc; ··· 144 143 static int cuse_release(struct inode *inode, struct file *file) 145 144 { 146 145 struct fuse_file *ff = file->private_data; 147 - struct fuse_conn *fc = ff->fc; 146 + struct fuse_mount *fm = ff->fm; 148 147 149 148 fuse_sync_release(NULL, ff, file->f_flags); 150 - fuse_conn_put(fc); 149 + fuse_conn_put(fm->fc); 151 150 152 151 return 0; 153 152 } ··· 156 155 unsigned long arg) 157 156 { 158 157 struct fuse_file *ff = file->private_data; 159 - struct cuse_conn *cc = fc_to_cc(ff->fc); 158 + struct cuse_conn *cc = fc_to_cc(ff->fm->fc); 160 159 unsigned int flags = 0; 161 160 162 161 if (cc->unrestricted_ioctl) ··· 169 168 unsigned long arg) 170 169 { 171 170 struct fuse_file *ff = file->private_data; 172 - struct cuse_conn *cc = fc_to_cc(ff->fc); 171 + struct cuse_conn *cc = fc_to_cc(ff->fm->fc); 173 172 unsigned int flags = FUSE_IOCTL_COMPAT; 174 173 175 174 if (cc->unrestricted_ioctl) ··· 314 313 * required data structures for it. Please read the comment at the 315 314 * top of this file for high level overview. 316 315 */ 317 - static void cuse_process_init_reply(struct fuse_conn *fc, 316 + static void cuse_process_init_reply(struct fuse_mount *fm, 318 317 struct fuse_args *args, int error) 319 318 { 319 + struct fuse_conn *fc = fm->fc; 320 320 struct cuse_init_args *ia = container_of(args, typeof(*ia), ap.args); 321 321 struct fuse_args_pages *ap = &ia->ap; 322 322 struct cuse_conn *cc = fc_to_cc(fc), *pos; ··· 426 424 { 427 425 int rc; 428 426 struct page *page; 429 - struct fuse_conn *fc = &cc->fc; 427 + struct fuse_mount *fm = &cc->fm; 430 428 struct cuse_init_args *ia; 431 429 struct fuse_args_pages *ap; 432 430 ··· 462 460 ia->desc.length = ap->args.out_args[1].size; 463 461 ap->args.end = cuse_process_init_reply; 464 462 465 - rc = fuse_simple_background(fc, &ap->args, GFP_KERNEL); 463 + rc = fuse_simple_background(fm, &ap->args, GFP_KERNEL); 466 464 if (rc) { 467 465 kfree(ia); 468 466 err_free_page: ··· 508 506 * Limit the cuse channel to requests that can 509 507 * be represented in file->f_cred->user_ns. 510 508 */ 511 - fuse_conn_init(&cc->fc, file->f_cred->user_ns, &fuse_dev_fiq_ops, NULL); 509 + fuse_conn_init(&cc->fc, &cc->fm, file->f_cred->user_ns, 510 + &fuse_dev_fiq_ops, NULL); 512 511 513 512 fud = fuse_dev_alloc_install(&cc->fc); 514 513 if (!fud) {
+1365
fs/fuse/dax.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * dax: direct host memory access 4 + * Copyright (C) 2020 Red Hat, Inc. 5 + */ 6 + 7 + #include "fuse_i.h" 8 + 9 + #include <linux/delay.h> 10 + #include <linux/dax.h> 11 + #include <linux/uio.h> 12 + #include <linux/pfn_t.h> 13 + #include <linux/iomap.h> 14 + #include <linux/interval_tree.h> 15 + 16 + /* 17 + * Default memory range size. A power of 2 so it agrees with common FUSE_INIT 18 + * map_alignment values 4KB and 64KB. 19 + */ 20 + #define FUSE_DAX_SHIFT 21 21 + #define FUSE_DAX_SZ (1 << FUSE_DAX_SHIFT) 22 + #define FUSE_DAX_PAGES (FUSE_DAX_SZ / PAGE_SIZE) 23 + 24 + /* Number of ranges reclaimer will try to free in one invocation */ 25 + #define FUSE_DAX_RECLAIM_CHUNK (10) 26 + 27 + /* 28 + * Dax memory reclaim threshold in percetage of total ranges. When free 29 + * number of free ranges drops below this threshold, reclaim can trigger 30 + * Default is 20% 31 + */ 32 + #define FUSE_DAX_RECLAIM_THRESHOLD (20) 33 + 34 + /** Translation information for file offsets to DAX window offsets */ 35 + struct fuse_dax_mapping { 36 + /* Pointer to inode where this memory range is mapped */ 37 + struct inode *inode; 38 + 39 + /* Will connect in fcd->free_ranges to keep track of free memory */ 40 + struct list_head list; 41 + 42 + /* For interval tree in file/inode */ 43 + struct interval_tree_node itn; 44 + 45 + /* Will connect in fc->busy_ranges to keep track busy memory */ 46 + struct list_head busy_list; 47 + 48 + /** Position in DAX window */ 49 + u64 window_offset; 50 + 51 + /** Length of mapping, in bytes */ 52 + loff_t length; 53 + 54 + /* Is this mapping read-only or read-write */ 55 + bool writable; 56 + 57 + /* reference count when the mapping is used by dax iomap. */ 58 + refcount_t refcnt; 59 + }; 60 + 61 + /* Per-inode dax map */ 62 + struct fuse_inode_dax { 63 + /* Semaphore to protect modifications to the dmap tree */ 64 + struct rw_semaphore sem; 65 + 66 + /* Sorted rb tree of struct fuse_dax_mapping elements */ 67 + struct rb_root_cached tree; 68 + unsigned long nr; 69 + }; 70 + 71 + struct fuse_conn_dax { 72 + /* DAX device */ 73 + struct dax_device *dev; 74 + 75 + /* Lock protecting accessess to members of this structure */ 76 + spinlock_t lock; 77 + 78 + /* List of memory ranges which are busy */ 79 + unsigned long nr_busy_ranges; 80 + struct list_head busy_ranges; 81 + 82 + /* Worker to free up memory ranges */ 83 + struct delayed_work free_work; 84 + 85 + /* Wait queue for a dax range to become free */ 86 + wait_queue_head_t range_waitq; 87 + 88 + /* DAX Window Free Ranges */ 89 + long nr_free_ranges; 90 + struct list_head free_ranges; 91 + 92 + unsigned long nr_ranges; 93 + }; 94 + 95 + static inline struct fuse_dax_mapping * 96 + node_to_dmap(struct interval_tree_node *node) 97 + { 98 + if (!node) 99 + return NULL; 100 + 101 + return container_of(node, struct fuse_dax_mapping, itn); 102 + } 103 + 104 + static struct fuse_dax_mapping * 105 + alloc_dax_mapping_reclaim(struct fuse_conn_dax *fcd, struct inode *inode); 106 + 107 + static void 108 + __kick_dmap_free_worker(struct fuse_conn_dax *fcd, unsigned long delay_ms) 109 + { 110 + unsigned long free_threshold; 111 + 112 + /* If number of free ranges are below threshold, start reclaim */ 113 + free_threshold = max_t(unsigned long, fcd->nr_ranges * FUSE_DAX_RECLAIM_THRESHOLD / 100, 114 + 1); 115 + if (fcd->nr_free_ranges < free_threshold) 116 + queue_delayed_work(system_long_wq, &fcd->free_work, 117 + msecs_to_jiffies(delay_ms)); 118 + } 119 + 120 + static void kick_dmap_free_worker(struct fuse_conn_dax *fcd, 121 + unsigned long delay_ms) 122 + { 123 + spin_lock(&fcd->lock); 124 + __kick_dmap_free_worker(fcd, delay_ms); 125 + spin_unlock(&fcd->lock); 126 + } 127 + 128 + static struct fuse_dax_mapping *alloc_dax_mapping(struct fuse_conn_dax *fcd) 129 + { 130 + struct fuse_dax_mapping *dmap; 131 + 132 + spin_lock(&fcd->lock); 133 + dmap = list_first_entry_or_null(&fcd->free_ranges, 134 + struct fuse_dax_mapping, list); 135 + if (dmap) { 136 + list_del_init(&dmap->list); 137 + WARN_ON(fcd->nr_free_ranges <= 0); 138 + fcd->nr_free_ranges--; 139 + } 140 + spin_unlock(&fcd->lock); 141 + 142 + kick_dmap_free_worker(fcd, 0); 143 + return dmap; 144 + } 145 + 146 + /* This assumes fcd->lock is held */ 147 + static void __dmap_remove_busy_list(struct fuse_conn_dax *fcd, 148 + struct fuse_dax_mapping *dmap) 149 + { 150 + list_del_init(&dmap->busy_list); 151 + WARN_ON(fcd->nr_busy_ranges == 0); 152 + fcd->nr_busy_ranges--; 153 + } 154 + 155 + static void dmap_remove_busy_list(struct fuse_conn_dax *fcd, 156 + struct fuse_dax_mapping *dmap) 157 + { 158 + spin_lock(&fcd->lock); 159 + __dmap_remove_busy_list(fcd, dmap); 160 + spin_unlock(&fcd->lock); 161 + } 162 + 163 + /* This assumes fcd->lock is held */ 164 + static void __dmap_add_to_free_pool(struct fuse_conn_dax *fcd, 165 + struct fuse_dax_mapping *dmap) 166 + { 167 + list_add_tail(&dmap->list, &fcd->free_ranges); 168 + fcd->nr_free_ranges++; 169 + wake_up(&fcd->range_waitq); 170 + } 171 + 172 + static void dmap_add_to_free_pool(struct fuse_conn_dax *fcd, 173 + struct fuse_dax_mapping *dmap) 174 + { 175 + /* Return fuse_dax_mapping to free list */ 176 + spin_lock(&fcd->lock); 177 + __dmap_add_to_free_pool(fcd, dmap); 178 + spin_unlock(&fcd->lock); 179 + } 180 + 181 + static int fuse_setup_one_mapping(struct inode *inode, unsigned long start_idx, 182 + struct fuse_dax_mapping *dmap, bool writable, 183 + bool upgrade) 184 + { 185 + struct fuse_mount *fm = get_fuse_mount(inode); 186 + struct fuse_conn_dax *fcd = fm->fc->dax; 187 + struct fuse_inode *fi = get_fuse_inode(inode); 188 + struct fuse_setupmapping_in inarg; 189 + loff_t offset = start_idx << FUSE_DAX_SHIFT; 190 + FUSE_ARGS(args); 191 + ssize_t err; 192 + 193 + WARN_ON(fcd->nr_free_ranges < 0); 194 + 195 + /* Ask fuse daemon to setup mapping */ 196 + memset(&inarg, 0, sizeof(inarg)); 197 + inarg.foffset = offset; 198 + inarg.fh = -1; 199 + inarg.moffset = dmap->window_offset; 200 + inarg.len = FUSE_DAX_SZ; 201 + inarg.flags |= FUSE_SETUPMAPPING_FLAG_READ; 202 + if (writable) 203 + inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE; 204 + args.opcode = FUSE_SETUPMAPPING; 205 + args.nodeid = fi->nodeid; 206 + args.in_numargs = 1; 207 + args.in_args[0].size = sizeof(inarg); 208 + args.in_args[0].value = &inarg; 209 + err = fuse_simple_request(fm, &args); 210 + if (err < 0) 211 + return err; 212 + dmap->writable = writable; 213 + if (!upgrade) { 214 + /* 215 + * We don't take a refernce on inode. inode is valid right now 216 + * and when inode is going away, cleanup logic should first 217 + * cleanup dmap entries. 218 + */ 219 + dmap->inode = inode; 220 + dmap->itn.start = dmap->itn.last = start_idx; 221 + /* Protected by fi->dax->sem */ 222 + interval_tree_insert(&dmap->itn, &fi->dax->tree); 223 + fi->dax->nr++; 224 + spin_lock(&fcd->lock); 225 + list_add_tail(&dmap->busy_list, &fcd->busy_ranges); 226 + fcd->nr_busy_ranges++; 227 + spin_unlock(&fcd->lock); 228 + } 229 + return 0; 230 + } 231 + 232 + static int fuse_send_removemapping(struct inode *inode, 233 + struct fuse_removemapping_in *inargp, 234 + struct fuse_removemapping_one *remove_one) 235 + { 236 + struct fuse_inode *fi = get_fuse_inode(inode); 237 + struct fuse_mount *fm = get_fuse_mount(inode); 238 + FUSE_ARGS(args); 239 + 240 + args.opcode = FUSE_REMOVEMAPPING; 241 + args.nodeid = fi->nodeid; 242 + args.in_numargs = 2; 243 + args.in_args[0].size = sizeof(*inargp); 244 + args.in_args[0].value = inargp; 245 + args.in_args[1].size = inargp->count * sizeof(*remove_one); 246 + args.in_args[1].value = remove_one; 247 + return fuse_simple_request(fm, &args); 248 + } 249 + 250 + static int dmap_removemapping_list(struct inode *inode, unsigned int num, 251 + struct list_head *to_remove) 252 + { 253 + struct fuse_removemapping_one *remove_one, *ptr; 254 + struct fuse_removemapping_in inarg; 255 + struct fuse_dax_mapping *dmap; 256 + int ret, i = 0, nr_alloc; 257 + 258 + nr_alloc = min_t(unsigned int, num, FUSE_REMOVEMAPPING_MAX_ENTRY); 259 + remove_one = kmalloc_array(nr_alloc, sizeof(*remove_one), GFP_NOFS); 260 + if (!remove_one) 261 + return -ENOMEM; 262 + 263 + ptr = remove_one; 264 + list_for_each_entry(dmap, to_remove, list) { 265 + ptr->moffset = dmap->window_offset; 266 + ptr->len = dmap->length; 267 + ptr++; 268 + i++; 269 + num--; 270 + if (i >= nr_alloc || num == 0) { 271 + memset(&inarg, 0, sizeof(inarg)); 272 + inarg.count = i; 273 + ret = fuse_send_removemapping(inode, &inarg, 274 + remove_one); 275 + if (ret) 276 + goto out; 277 + ptr = remove_one; 278 + i = 0; 279 + } 280 + } 281 + out: 282 + kfree(remove_one); 283 + return ret; 284 + } 285 + 286 + /* 287 + * Cleanup dmap entry and add back to free list. This should be called with 288 + * fcd->lock held. 289 + */ 290 + static void dmap_reinit_add_to_free_pool(struct fuse_conn_dax *fcd, 291 + struct fuse_dax_mapping *dmap) 292 + { 293 + pr_debug("fuse: freeing memory range start_idx=0x%lx end_idx=0x%lx window_offset=0x%llx length=0x%llx\n", 294 + dmap->itn.start, dmap->itn.last, dmap->window_offset, 295 + dmap->length); 296 + __dmap_remove_busy_list(fcd, dmap); 297 + dmap->inode = NULL; 298 + dmap->itn.start = dmap->itn.last = 0; 299 + __dmap_add_to_free_pool(fcd, dmap); 300 + } 301 + 302 + /* 303 + * Free inode dmap entries whose range falls inside [start, end]. 304 + * Does not take any locks. At this point of time it should only be 305 + * called from evict_inode() path where we know all dmap entries can be 306 + * reclaimed. 307 + */ 308 + static void inode_reclaim_dmap_range(struct fuse_conn_dax *fcd, 309 + struct inode *inode, 310 + loff_t start, loff_t end) 311 + { 312 + struct fuse_inode *fi = get_fuse_inode(inode); 313 + struct fuse_dax_mapping *dmap, *n; 314 + int err, num = 0; 315 + LIST_HEAD(to_remove); 316 + unsigned long start_idx = start >> FUSE_DAX_SHIFT; 317 + unsigned long end_idx = end >> FUSE_DAX_SHIFT; 318 + struct interval_tree_node *node; 319 + 320 + while (1) { 321 + node = interval_tree_iter_first(&fi->dax->tree, start_idx, 322 + end_idx); 323 + if (!node) 324 + break; 325 + dmap = node_to_dmap(node); 326 + /* inode is going away. There should not be any users of dmap */ 327 + WARN_ON(refcount_read(&dmap->refcnt) > 1); 328 + interval_tree_remove(&dmap->itn, &fi->dax->tree); 329 + num++; 330 + list_add(&dmap->list, &to_remove); 331 + } 332 + 333 + /* Nothing to remove */ 334 + if (list_empty(&to_remove)) 335 + return; 336 + 337 + WARN_ON(fi->dax->nr < num); 338 + fi->dax->nr -= num; 339 + err = dmap_removemapping_list(inode, num, &to_remove); 340 + if (err && err != -ENOTCONN) { 341 + pr_warn("Failed to removemappings. start=0x%llx end=0x%llx\n", 342 + start, end); 343 + } 344 + spin_lock(&fcd->lock); 345 + list_for_each_entry_safe(dmap, n, &to_remove, list) { 346 + list_del_init(&dmap->list); 347 + dmap_reinit_add_to_free_pool(fcd, dmap); 348 + } 349 + spin_unlock(&fcd->lock); 350 + } 351 + 352 + static int dmap_removemapping_one(struct inode *inode, 353 + struct fuse_dax_mapping *dmap) 354 + { 355 + struct fuse_removemapping_one forget_one; 356 + struct fuse_removemapping_in inarg; 357 + 358 + memset(&inarg, 0, sizeof(inarg)); 359 + inarg.count = 1; 360 + memset(&forget_one, 0, sizeof(forget_one)); 361 + forget_one.moffset = dmap->window_offset; 362 + forget_one.len = dmap->length; 363 + 364 + return fuse_send_removemapping(inode, &inarg, &forget_one); 365 + } 366 + 367 + /* 368 + * It is called from evict_inode() and by that time inode is going away. So 369 + * this function does not take any locks like fi->dax->sem for traversing 370 + * that fuse inode interval tree. If that lock is taken then lock validator 371 + * complains of deadlock situation w.r.t fs_reclaim lock. 372 + */ 373 + void fuse_dax_inode_cleanup(struct inode *inode) 374 + { 375 + struct fuse_conn *fc = get_fuse_conn(inode); 376 + struct fuse_inode *fi = get_fuse_inode(inode); 377 + 378 + /* 379 + * fuse_evict_inode() has already called truncate_inode_pages_final() 380 + * before we arrive here. So we should not have to worry about any 381 + * pages/exception entries still associated with inode. 382 + */ 383 + inode_reclaim_dmap_range(fc->dax, inode, 0, -1); 384 + WARN_ON(fi->dax->nr); 385 + } 386 + 387 + static void fuse_fill_iomap_hole(struct iomap *iomap, loff_t length) 388 + { 389 + iomap->addr = IOMAP_NULL_ADDR; 390 + iomap->length = length; 391 + iomap->type = IOMAP_HOLE; 392 + } 393 + 394 + static void fuse_fill_iomap(struct inode *inode, loff_t pos, loff_t length, 395 + struct iomap *iomap, struct fuse_dax_mapping *dmap, 396 + unsigned int flags) 397 + { 398 + loff_t offset, len; 399 + loff_t i_size = i_size_read(inode); 400 + 401 + offset = pos - (dmap->itn.start << FUSE_DAX_SHIFT); 402 + len = min(length, dmap->length - offset); 403 + 404 + /* If length is beyond end of file, truncate further */ 405 + if (pos + len > i_size) 406 + len = i_size - pos; 407 + 408 + if (len > 0) { 409 + iomap->addr = dmap->window_offset + offset; 410 + iomap->length = len; 411 + if (flags & IOMAP_FAULT) 412 + iomap->length = ALIGN(len, PAGE_SIZE); 413 + iomap->type = IOMAP_MAPPED; 414 + /* 415 + * increace refcnt so that reclaim code knows this dmap is in 416 + * use. This assumes fi->dax->sem mutex is held either 417 + * shared/exclusive. 418 + */ 419 + refcount_inc(&dmap->refcnt); 420 + 421 + /* iomap->private should be NULL */ 422 + WARN_ON_ONCE(iomap->private); 423 + iomap->private = dmap; 424 + } else { 425 + /* Mapping beyond end of file is hole */ 426 + fuse_fill_iomap_hole(iomap, length); 427 + } 428 + } 429 + 430 + static int fuse_setup_new_dax_mapping(struct inode *inode, loff_t pos, 431 + loff_t length, unsigned int flags, 432 + struct iomap *iomap) 433 + { 434 + struct fuse_inode *fi = get_fuse_inode(inode); 435 + struct fuse_conn *fc = get_fuse_conn(inode); 436 + struct fuse_conn_dax *fcd = fc->dax; 437 + struct fuse_dax_mapping *dmap, *alloc_dmap = NULL; 438 + int ret; 439 + bool writable = flags & IOMAP_WRITE; 440 + unsigned long start_idx = pos >> FUSE_DAX_SHIFT; 441 + struct interval_tree_node *node; 442 + 443 + /* 444 + * Can't do inline reclaim in fault path. We call 445 + * dax_layout_busy_page() before we free a range. And 446 + * fuse_wait_dax_page() drops fi->i_mmap_sem lock and requires it. 447 + * In fault path we enter with fi->i_mmap_sem held and can't drop 448 + * it. Also in fault path we hold fi->i_mmap_sem shared and not 449 + * exclusive, so that creates further issues with fuse_wait_dax_page(). 450 + * Hence return -EAGAIN and fuse_dax_fault() will wait for a memory 451 + * range to become free and retry. 452 + */ 453 + if (flags & IOMAP_FAULT) { 454 + alloc_dmap = alloc_dax_mapping(fcd); 455 + if (!alloc_dmap) 456 + return -EAGAIN; 457 + } else { 458 + alloc_dmap = alloc_dax_mapping_reclaim(fcd, inode); 459 + if (IS_ERR(alloc_dmap)) 460 + return PTR_ERR(alloc_dmap); 461 + } 462 + 463 + /* If we are here, we should have memory allocated */ 464 + if (WARN_ON(!alloc_dmap)) 465 + return -EIO; 466 + 467 + /* 468 + * Take write lock so that only one caller can try to setup mapping 469 + * and other waits. 470 + */ 471 + down_write(&fi->dax->sem); 472 + /* 473 + * We dropped lock. Check again if somebody else setup 474 + * mapping already. 475 + */ 476 + node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx); 477 + if (node) { 478 + dmap = node_to_dmap(node); 479 + fuse_fill_iomap(inode, pos, length, iomap, dmap, flags); 480 + dmap_add_to_free_pool(fcd, alloc_dmap); 481 + up_write(&fi->dax->sem); 482 + return 0; 483 + } 484 + 485 + /* Setup one mapping */ 486 + ret = fuse_setup_one_mapping(inode, pos >> FUSE_DAX_SHIFT, alloc_dmap, 487 + writable, false); 488 + if (ret < 0) { 489 + dmap_add_to_free_pool(fcd, alloc_dmap); 490 + up_write(&fi->dax->sem); 491 + return ret; 492 + } 493 + fuse_fill_iomap(inode, pos, length, iomap, alloc_dmap, flags); 494 + up_write(&fi->dax->sem); 495 + return 0; 496 + } 497 + 498 + static int fuse_upgrade_dax_mapping(struct inode *inode, loff_t pos, 499 + loff_t length, unsigned int flags, 500 + struct iomap *iomap) 501 + { 502 + struct fuse_inode *fi = get_fuse_inode(inode); 503 + struct fuse_dax_mapping *dmap; 504 + int ret; 505 + unsigned long idx = pos >> FUSE_DAX_SHIFT; 506 + struct interval_tree_node *node; 507 + 508 + /* 509 + * Take exclusive lock so that only one caller can try to setup 510 + * mapping and others wait. 511 + */ 512 + down_write(&fi->dax->sem); 513 + node = interval_tree_iter_first(&fi->dax->tree, idx, idx); 514 + 515 + /* We are holding either inode lock or i_mmap_sem, and that should 516 + * ensure that dmap can't be truncated. We are holding a reference 517 + * on dmap and that should make sure it can't be reclaimed. So dmap 518 + * should still be there in tree despite the fact we dropped and 519 + * re-acquired the fi->dax->sem lock. 520 + */ 521 + ret = -EIO; 522 + if (WARN_ON(!node)) 523 + goto out_err; 524 + 525 + dmap = node_to_dmap(node); 526 + 527 + /* We took an extra reference on dmap to make sure its not reclaimd. 528 + * Now we hold fi->dax->sem lock and that reference is not needed 529 + * anymore. Drop it. 530 + */ 531 + if (refcount_dec_and_test(&dmap->refcnt)) { 532 + /* refcount should not hit 0. This object only goes 533 + * away when fuse connection goes away 534 + */ 535 + WARN_ON_ONCE(1); 536 + } 537 + 538 + /* Maybe another thread already upgraded mapping while we were not 539 + * holding lock. 540 + */ 541 + if (dmap->writable) { 542 + ret = 0; 543 + goto out_fill_iomap; 544 + } 545 + 546 + ret = fuse_setup_one_mapping(inode, pos >> FUSE_DAX_SHIFT, dmap, true, 547 + true); 548 + if (ret < 0) 549 + goto out_err; 550 + out_fill_iomap: 551 + fuse_fill_iomap(inode, pos, length, iomap, dmap, flags); 552 + out_err: 553 + up_write(&fi->dax->sem); 554 + return ret; 555 + } 556 + 557 + /* This is just for DAX and the mapping is ephemeral, do not use it for other 558 + * purposes since there is no block device with a permanent mapping. 559 + */ 560 + static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length, 561 + unsigned int flags, struct iomap *iomap, 562 + struct iomap *srcmap) 563 + { 564 + struct fuse_inode *fi = get_fuse_inode(inode); 565 + struct fuse_conn *fc = get_fuse_conn(inode); 566 + struct fuse_dax_mapping *dmap; 567 + bool writable = flags & IOMAP_WRITE; 568 + unsigned long start_idx = pos >> FUSE_DAX_SHIFT; 569 + struct interval_tree_node *node; 570 + 571 + /* We don't support FIEMAP */ 572 + if (WARN_ON(flags & IOMAP_REPORT)) 573 + return -EIO; 574 + 575 + iomap->offset = pos; 576 + iomap->flags = 0; 577 + iomap->bdev = NULL; 578 + iomap->dax_dev = fc->dax->dev; 579 + 580 + /* 581 + * Both read/write and mmap path can race here. So we need something 582 + * to make sure if we are setting up mapping, then other path waits 583 + * 584 + * For now, use a semaphore for this. It probably needs to be 585 + * optimized later. 586 + */ 587 + down_read(&fi->dax->sem); 588 + node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx); 589 + if (node) { 590 + dmap = node_to_dmap(node); 591 + if (writable && !dmap->writable) { 592 + /* Upgrade read-only mapping to read-write. This will 593 + * require exclusive fi->dax->sem lock as we don't want 594 + * two threads to be trying to this simultaneously 595 + * for same dmap. So drop shared lock and acquire 596 + * exclusive lock. 597 + * 598 + * Before dropping fi->dax->sem lock, take reference 599 + * on dmap so that its not freed by range reclaim. 600 + */ 601 + refcount_inc(&dmap->refcnt); 602 + up_read(&fi->dax->sem); 603 + pr_debug("%s: Upgrading mapping at offset 0x%llx length 0x%llx\n", 604 + __func__, pos, length); 605 + return fuse_upgrade_dax_mapping(inode, pos, length, 606 + flags, iomap); 607 + } else { 608 + fuse_fill_iomap(inode, pos, length, iomap, dmap, flags); 609 + up_read(&fi->dax->sem); 610 + return 0; 611 + } 612 + } else { 613 + up_read(&fi->dax->sem); 614 + pr_debug("%s: no mapping at offset 0x%llx length 0x%llx\n", 615 + __func__, pos, length); 616 + if (pos >= i_size_read(inode)) 617 + goto iomap_hole; 618 + 619 + return fuse_setup_new_dax_mapping(inode, pos, length, flags, 620 + iomap); 621 + } 622 + 623 + /* 624 + * If read beyond end of file happnes, fs code seems to return 625 + * it as hole 626 + */ 627 + iomap_hole: 628 + fuse_fill_iomap_hole(iomap, length); 629 + pr_debug("%s returning hole mapping. pos=0x%llx length_asked=0x%llx length_returned=0x%llx\n", 630 + __func__, pos, length, iomap->length); 631 + return 0; 632 + } 633 + 634 + static int fuse_iomap_end(struct inode *inode, loff_t pos, loff_t length, 635 + ssize_t written, unsigned int flags, 636 + struct iomap *iomap) 637 + { 638 + struct fuse_dax_mapping *dmap = iomap->private; 639 + 640 + if (dmap) { 641 + if (refcount_dec_and_test(&dmap->refcnt)) { 642 + /* refcount should not hit 0. This object only goes 643 + * away when fuse connection goes away 644 + */ 645 + WARN_ON_ONCE(1); 646 + } 647 + } 648 + 649 + /* DAX writes beyond end-of-file aren't handled using iomap, so the 650 + * file size is unchanged and there is nothing to do here. 651 + */ 652 + return 0; 653 + } 654 + 655 + static const struct iomap_ops fuse_iomap_ops = { 656 + .iomap_begin = fuse_iomap_begin, 657 + .iomap_end = fuse_iomap_end, 658 + }; 659 + 660 + static void fuse_wait_dax_page(struct inode *inode) 661 + { 662 + struct fuse_inode *fi = get_fuse_inode(inode); 663 + 664 + up_write(&fi->i_mmap_sem); 665 + schedule(); 666 + down_write(&fi->i_mmap_sem); 667 + } 668 + 669 + /* Should be called with fi->i_mmap_sem lock held exclusively */ 670 + static int __fuse_dax_break_layouts(struct inode *inode, bool *retry, 671 + loff_t start, loff_t end) 672 + { 673 + struct page *page; 674 + 675 + page = dax_layout_busy_page_range(inode->i_mapping, start, end); 676 + if (!page) 677 + return 0; 678 + 679 + *retry = true; 680 + return ___wait_var_event(&page->_refcount, 681 + atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE, 682 + 0, 0, fuse_wait_dax_page(inode)); 683 + } 684 + 685 + /* dmap_end == 0 leads to unmapping of whole file */ 686 + int fuse_dax_break_layouts(struct inode *inode, u64 dmap_start, 687 + u64 dmap_end) 688 + { 689 + bool retry; 690 + int ret; 691 + 692 + do { 693 + retry = false; 694 + ret = __fuse_dax_break_layouts(inode, &retry, dmap_start, 695 + dmap_end); 696 + } while (ret == 0 && retry); 697 + 698 + return ret; 699 + } 700 + 701 + ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to) 702 + { 703 + struct inode *inode = file_inode(iocb->ki_filp); 704 + ssize_t ret; 705 + 706 + if (iocb->ki_flags & IOCB_NOWAIT) { 707 + if (!inode_trylock_shared(inode)) 708 + return -EAGAIN; 709 + } else { 710 + inode_lock_shared(inode); 711 + } 712 + 713 + ret = dax_iomap_rw(iocb, to, &fuse_iomap_ops); 714 + inode_unlock_shared(inode); 715 + 716 + /* TODO file_accessed(iocb->f_filp) */ 717 + return ret; 718 + } 719 + 720 + static bool file_extending_write(struct kiocb *iocb, struct iov_iter *from) 721 + { 722 + struct inode *inode = file_inode(iocb->ki_filp); 723 + 724 + return (iov_iter_rw(from) == WRITE && 725 + ((iocb->ki_pos) >= i_size_read(inode) || 726 + (iocb->ki_pos + iov_iter_count(from) > i_size_read(inode)))); 727 + } 728 + 729 + static ssize_t fuse_dax_direct_write(struct kiocb *iocb, struct iov_iter *from) 730 + { 731 + struct inode *inode = file_inode(iocb->ki_filp); 732 + struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb); 733 + ssize_t ret; 734 + 735 + ret = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE); 736 + if (ret < 0) 737 + return ret; 738 + 739 + fuse_invalidate_attr(inode); 740 + fuse_write_update_size(inode, iocb->ki_pos); 741 + return ret; 742 + } 743 + 744 + ssize_t fuse_dax_write_iter(struct kiocb *iocb, struct iov_iter *from) 745 + { 746 + struct inode *inode = file_inode(iocb->ki_filp); 747 + ssize_t ret; 748 + 749 + if (iocb->ki_flags & IOCB_NOWAIT) { 750 + if (!inode_trylock(inode)) 751 + return -EAGAIN; 752 + } else { 753 + inode_lock(inode); 754 + } 755 + 756 + ret = generic_write_checks(iocb, from); 757 + if (ret <= 0) 758 + goto out; 759 + 760 + ret = file_remove_privs(iocb->ki_filp); 761 + if (ret) 762 + goto out; 763 + /* TODO file_update_time() but we don't want metadata I/O */ 764 + 765 + /* Do not use dax for file extending writes as write and on 766 + * disk i_size increase are not atomic otherwise. 767 + */ 768 + if (file_extending_write(iocb, from)) 769 + ret = fuse_dax_direct_write(iocb, from); 770 + else 771 + ret = dax_iomap_rw(iocb, from, &fuse_iomap_ops); 772 + 773 + out: 774 + inode_unlock(inode); 775 + 776 + if (ret > 0) 777 + ret = generic_write_sync(iocb, ret); 778 + return ret; 779 + } 780 + 781 + static int fuse_dax_writepages(struct address_space *mapping, 782 + struct writeback_control *wbc) 783 + { 784 + 785 + struct inode *inode = mapping->host; 786 + struct fuse_conn *fc = get_fuse_conn(inode); 787 + 788 + return dax_writeback_mapping_range(mapping, fc->dax->dev, wbc); 789 + } 790 + 791 + static vm_fault_t __fuse_dax_fault(struct vm_fault *vmf, 792 + enum page_entry_size pe_size, bool write) 793 + { 794 + vm_fault_t ret; 795 + struct inode *inode = file_inode(vmf->vma->vm_file); 796 + struct super_block *sb = inode->i_sb; 797 + pfn_t pfn; 798 + int error = 0; 799 + struct fuse_conn *fc = get_fuse_conn(inode); 800 + struct fuse_conn_dax *fcd = fc->dax; 801 + bool retry = false; 802 + 803 + if (write) 804 + sb_start_pagefault(sb); 805 + retry: 806 + if (retry && !(fcd->nr_free_ranges > 0)) 807 + wait_event(fcd->range_waitq, (fcd->nr_free_ranges > 0)); 808 + 809 + /* 810 + * We need to serialize against not only truncate but also against 811 + * fuse dax memory range reclaim. While a range is being reclaimed, 812 + * we do not want any read/write/mmap to make progress and try 813 + * to populate page cache or access memory we are trying to free. 814 + */ 815 + down_read(&get_fuse_inode(inode)->i_mmap_sem); 816 + ret = dax_iomap_fault(vmf, pe_size, &pfn, &error, &fuse_iomap_ops); 817 + if ((ret & VM_FAULT_ERROR) && error == -EAGAIN) { 818 + error = 0; 819 + retry = true; 820 + up_read(&get_fuse_inode(inode)->i_mmap_sem); 821 + goto retry; 822 + } 823 + 824 + if (ret & VM_FAULT_NEEDDSYNC) 825 + ret = dax_finish_sync_fault(vmf, pe_size, pfn); 826 + up_read(&get_fuse_inode(inode)->i_mmap_sem); 827 + 828 + if (write) 829 + sb_end_pagefault(sb); 830 + 831 + return ret; 832 + } 833 + 834 + static vm_fault_t fuse_dax_fault(struct vm_fault *vmf) 835 + { 836 + return __fuse_dax_fault(vmf, PE_SIZE_PTE, 837 + vmf->flags & FAULT_FLAG_WRITE); 838 + } 839 + 840 + static vm_fault_t fuse_dax_huge_fault(struct vm_fault *vmf, 841 + enum page_entry_size pe_size) 842 + { 843 + return __fuse_dax_fault(vmf, pe_size, vmf->flags & FAULT_FLAG_WRITE); 844 + } 845 + 846 + static vm_fault_t fuse_dax_page_mkwrite(struct vm_fault *vmf) 847 + { 848 + return __fuse_dax_fault(vmf, PE_SIZE_PTE, true); 849 + } 850 + 851 + static vm_fault_t fuse_dax_pfn_mkwrite(struct vm_fault *vmf) 852 + { 853 + return __fuse_dax_fault(vmf, PE_SIZE_PTE, true); 854 + } 855 + 856 + static const struct vm_operations_struct fuse_dax_vm_ops = { 857 + .fault = fuse_dax_fault, 858 + .huge_fault = fuse_dax_huge_fault, 859 + .page_mkwrite = fuse_dax_page_mkwrite, 860 + .pfn_mkwrite = fuse_dax_pfn_mkwrite, 861 + }; 862 + 863 + int fuse_dax_mmap(struct file *file, struct vm_area_struct *vma) 864 + { 865 + file_accessed(file); 866 + vma->vm_ops = &fuse_dax_vm_ops; 867 + vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE; 868 + return 0; 869 + } 870 + 871 + static int dmap_writeback_invalidate(struct inode *inode, 872 + struct fuse_dax_mapping *dmap) 873 + { 874 + int ret; 875 + loff_t start_pos = dmap->itn.start << FUSE_DAX_SHIFT; 876 + loff_t end_pos = (start_pos + FUSE_DAX_SZ - 1); 877 + 878 + ret = filemap_fdatawrite_range(inode->i_mapping, start_pos, end_pos); 879 + if (ret) { 880 + pr_debug("fuse: filemap_fdatawrite_range() failed. err=%d start_pos=0x%llx, end_pos=0x%llx\n", 881 + ret, start_pos, end_pos); 882 + return ret; 883 + } 884 + 885 + ret = invalidate_inode_pages2_range(inode->i_mapping, 886 + start_pos >> PAGE_SHIFT, 887 + end_pos >> PAGE_SHIFT); 888 + if (ret) 889 + pr_debug("fuse: invalidate_inode_pages2_range() failed err=%d\n", 890 + ret); 891 + 892 + return ret; 893 + } 894 + 895 + static int reclaim_one_dmap_locked(struct inode *inode, 896 + struct fuse_dax_mapping *dmap) 897 + { 898 + int ret; 899 + struct fuse_inode *fi = get_fuse_inode(inode); 900 + 901 + /* 902 + * igrab() was done to make sure inode won't go under us, and this 903 + * further avoids the race with evict(). 904 + */ 905 + ret = dmap_writeback_invalidate(inode, dmap); 906 + if (ret) 907 + return ret; 908 + 909 + /* Remove dax mapping from inode interval tree now */ 910 + interval_tree_remove(&dmap->itn, &fi->dax->tree); 911 + fi->dax->nr--; 912 + 913 + /* It is possible that umount/shutdown has killed the fuse connection 914 + * and worker thread is trying to reclaim memory in parallel. Don't 915 + * warn in that case. 916 + */ 917 + ret = dmap_removemapping_one(inode, dmap); 918 + if (ret && ret != -ENOTCONN) { 919 + pr_warn("Failed to remove mapping. offset=0x%llx len=0x%llx ret=%d\n", 920 + dmap->window_offset, dmap->length, ret); 921 + } 922 + return 0; 923 + } 924 + 925 + /* Find first mapped dmap for an inode and return file offset. Caller needs 926 + * to hold fi->dax->sem lock either shared or exclusive. 927 + */ 928 + static struct fuse_dax_mapping *inode_lookup_first_dmap(struct inode *inode) 929 + { 930 + struct fuse_inode *fi = get_fuse_inode(inode); 931 + struct fuse_dax_mapping *dmap; 932 + struct interval_tree_node *node; 933 + 934 + for (node = interval_tree_iter_first(&fi->dax->tree, 0, -1); node; 935 + node = interval_tree_iter_next(node, 0, -1)) { 936 + dmap = node_to_dmap(node); 937 + /* still in use. */ 938 + if (refcount_read(&dmap->refcnt) > 1) 939 + continue; 940 + 941 + return dmap; 942 + } 943 + 944 + return NULL; 945 + } 946 + 947 + /* 948 + * Find first mapping in the tree and free it and return it. Do not add 949 + * it back to free pool. 950 + */ 951 + static struct fuse_dax_mapping * 952 + inode_inline_reclaim_one_dmap(struct fuse_conn_dax *fcd, struct inode *inode, 953 + bool *retry) 954 + { 955 + struct fuse_inode *fi = get_fuse_inode(inode); 956 + struct fuse_dax_mapping *dmap; 957 + u64 dmap_start, dmap_end; 958 + unsigned long start_idx; 959 + int ret; 960 + struct interval_tree_node *node; 961 + 962 + down_write(&fi->i_mmap_sem); 963 + 964 + /* Lookup a dmap and corresponding file offset to reclaim. */ 965 + down_read(&fi->dax->sem); 966 + dmap = inode_lookup_first_dmap(inode); 967 + if (dmap) { 968 + start_idx = dmap->itn.start; 969 + dmap_start = start_idx << FUSE_DAX_SHIFT; 970 + dmap_end = dmap_start + FUSE_DAX_SZ - 1; 971 + } 972 + up_read(&fi->dax->sem); 973 + 974 + if (!dmap) 975 + goto out_mmap_sem; 976 + /* 977 + * Make sure there are no references to inode pages using 978 + * get_user_pages() 979 + */ 980 + ret = fuse_dax_break_layouts(inode, dmap_start, dmap_end); 981 + if (ret) { 982 + pr_debug("fuse: fuse_dax_break_layouts() failed. err=%d\n", 983 + ret); 984 + dmap = ERR_PTR(ret); 985 + goto out_mmap_sem; 986 + } 987 + 988 + down_write(&fi->dax->sem); 989 + node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx); 990 + /* Range already got reclaimed by somebody else */ 991 + if (!node) { 992 + if (retry) 993 + *retry = true; 994 + goto out_write_dmap_sem; 995 + } 996 + 997 + dmap = node_to_dmap(node); 998 + /* still in use. */ 999 + if (refcount_read(&dmap->refcnt) > 1) { 1000 + dmap = NULL; 1001 + if (retry) 1002 + *retry = true; 1003 + goto out_write_dmap_sem; 1004 + } 1005 + 1006 + ret = reclaim_one_dmap_locked(inode, dmap); 1007 + if (ret < 0) { 1008 + dmap = ERR_PTR(ret); 1009 + goto out_write_dmap_sem; 1010 + } 1011 + 1012 + /* Clean up dmap. Do not add back to free list */ 1013 + dmap_remove_busy_list(fcd, dmap); 1014 + dmap->inode = NULL; 1015 + dmap->itn.start = dmap->itn.last = 0; 1016 + 1017 + pr_debug("fuse: %s: inline reclaimed memory range. inode=%p, window_offset=0x%llx, length=0x%llx\n", 1018 + __func__, inode, dmap->window_offset, dmap->length); 1019 + 1020 + out_write_dmap_sem: 1021 + up_write(&fi->dax->sem); 1022 + out_mmap_sem: 1023 + up_write(&fi->i_mmap_sem); 1024 + return dmap; 1025 + } 1026 + 1027 + static struct fuse_dax_mapping * 1028 + alloc_dax_mapping_reclaim(struct fuse_conn_dax *fcd, struct inode *inode) 1029 + { 1030 + struct fuse_dax_mapping *dmap; 1031 + struct fuse_inode *fi = get_fuse_inode(inode); 1032 + 1033 + while (1) { 1034 + bool retry = false; 1035 + 1036 + dmap = alloc_dax_mapping(fcd); 1037 + if (dmap) 1038 + return dmap; 1039 + 1040 + dmap = inode_inline_reclaim_one_dmap(fcd, inode, &retry); 1041 + /* 1042 + * Either we got a mapping or it is an error, return in both 1043 + * the cases. 1044 + */ 1045 + if (dmap) 1046 + return dmap; 1047 + 1048 + /* If we could not reclaim a mapping because it 1049 + * had a reference or some other temporary failure, 1050 + * Try again. We want to give up inline reclaim only 1051 + * if there is no range assigned to this node. Otherwise 1052 + * if a deadlock is possible if we sleep with fi->i_mmap_sem 1053 + * held and worker to free memory can't make progress due 1054 + * to unavailability of fi->i_mmap_sem lock. So sleep 1055 + * only if fi->dax->nr=0 1056 + */ 1057 + if (retry) 1058 + continue; 1059 + /* 1060 + * There are no mappings which can be reclaimed. Wait for one. 1061 + * We are not holding fi->dax->sem. So it is possible 1062 + * that range gets added now. But as we are not holding 1063 + * fi->i_mmap_sem, worker should still be able to free up 1064 + * a range and wake us up. 1065 + */ 1066 + if (!fi->dax->nr && !(fcd->nr_free_ranges > 0)) { 1067 + if (wait_event_killable_exclusive(fcd->range_waitq, 1068 + (fcd->nr_free_ranges > 0))) { 1069 + return ERR_PTR(-EINTR); 1070 + } 1071 + } 1072 + } 1073 + } 1074 + 1075 + static int lookup_and_reclaim_dmap_locked(struct fuse_conn_dax *fcd, 1076 + struct inode *inode, 1077 + unsigned long start_idx) 1078 + { 1079 + int ret; 1080 + struct fuse_inode *fi = get_fuse_inode(inode); 1081 + struct fuse_dax_mapping *dmap; 1082 + struct interval_tree_node *node; 1083 + 1084 + /* Find fuse dax mapping at file offset inode. */ 1085 + node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx); 1086 + 1087 + /* Range already got cleaned up by somebody else */ 1088 + if (!node) 1089 + return 0; 1090 + dmap = node_to_dmap(node); 1091 + 1092 + /* still in use. */ 1093 + if (refcount_read(&dmap->refcnt) > 1) 1094 + return 0; 1095 + 1096 + ret = reclaim_one_dmap_locked(inode, dmap); 1097 + if (ret < 0) 1098 + return ret; 1099 + 1100 + /* Cleanup dmap entry and add back to free list */ 1101 + spin_lock(&fcd->lock); 1102 + dmap_reinit_add_to_free_pool(fcd, dmap); 1103 + spin_unlock(&fcd->lock); 1104 + return ret; 1105 + } 1106 + 1107 + /* 1108 + * Free a range of memory. 1109 + * Locking: 1110 + * 1. Take fi->i_mmap_sem to block dax faults. 1111 + * 2. Take fi->dax->sem to protect interval tree and also to make sure 1112 + * read/write can not reuse a dmap which we might be freeing. 1113 + */ 1114 + static int lookup_and_reclaim_dmap(struct fuse_conn_dax *fcd, 1115 + struct inode *inode, 1116 + unsigned long start_idx, 1117 + unsigned long end_idx) 1118 + { 1119 + int ret; 1120 + struct fuse_inode *fi = get_fuse_inode(inode); 1121 + loff_t dmap_start = start_idx << FUSE_DAX_SHIFT; 1122 + loff_t dmap_end = (dmap_start + FUSE_DAX_SZ) - 1; 1123 + 1124 + down_write(&fi->i_mmap_sem); 1125 + ret = fuse_dax_break_layouts(inode, dmap_start, dmap_end); 1126 + if (ret) { 1127 + pr_debug("virtio_fs: fuse_dax_break_layouts() failed. err=%d\n", 1128 + ret); 1129 + goto out_mmap_sem; 1130 + } 1131 + 1132 + down_write(&fi->dax->sem); 1133 + ret = lookup_and_reclaim_dmap_locked(fcd, inode, start_idx); 1134 + up_write(&fi->dax->sem); 1135 + out_mmap_sem: 1136 + up_write(&fi->i_mmap_sem); 1137 + return ret; 1138 + } 1139 + 1140 + static int try_to_free_dmap_chunks(struct fuse_conn_dax *fcd, 1141 + unsigned long nr_to_free) 1142 + { 1143 + struct fuse_dax_mapping *dmap, *pos, *temp; 1144 + int ret, nr_freed = 0; 1145 + unsigned long start_idx = 0, end_idx = 0; 1146 + struct inode *inode = NULL; 1147 + 1148 + /* Pick first busy range and free it for now*/ 1149 + while (1) { 1150 + if (nr_freed >= nr_to_free) 1151 + break; 1152 + 1153 + dmap = NULL; 1154 + spin_lock(&fcd->lock); 1155 + 1156 + if (!fcd->nr_busy_ranges) { 1157 + spin_unlock(&fcd->lock); 1158 + return 0; 1159 + } 1160 + 1161 + list_for_each_entry_safe(pos, temp, &fcd->busy_ranges, 1162 + busy_list) { 1163 + /* skip this range if it's in use. */ 1164 + if (refcount_read(&pos->refcnt) > 1) 1165 + continue; 1166 + 1167 + inode = igrab(pos->inode); 1168 + /* 1169 + * This inode is going away. That will free 1170 + * up all the ranges anyway, continue to 1171 + * next range. 1172 + */ 1173 + if (!inode) 1174 + continue; 1175 + /* 1176 + * Take this element off list and add it tail. If 1177 + * this element can't be freed, it will help with 1178 + * selecting new element in next iteration of loop. 1179 + */ 1180 + dmap = pos; 1181 + list_move_tail(&dmap->busy_list, &fcd->busy_ranges); 1182 + start_idx = end_idx = dmap->itn.start; 1183 + break; 1184 + } 1185 + spin_unlock(&fcd->lock); 1186 + if (!dmap) 1187 + return 0; 1188 + 1189 + ret = lookup_and_reclaim_dmap(fcd, inode, start_idx, end_idx); 1190 + iput(inode); 1191 + if (ret) 1192 + return ret; 1193 + nr_freed++; 1194 + } 1195 + return 0; 1196 + } 1197 + 1198 + static void fuse_dax_free_mem_worker(struct work_struct *work) 1199 + { 1200 + int ret; 1201 + struct fuse_conn_dax *fcd = container_of(work, struct fuse_conn_dax, 1202 + free_work.work); 1203 + ret = try_to_free_dmap_chunks(fcd, FUSE_DAX_RECLAIM_CHUNK); 1204 + if (ret) { 1205 + pr_debug("fuse: try_to_free_dmap_chunks() failed with err=%d\n", 1206 + ret); 1207 + } 1208 + 1209 + /* If number of free ranges are still below threhold, requeue */ 1210 + kick_dmap_free_worker(fcd, 1); 1211 + } 1212 + 1213 + static void fuse_free_dax_mem_ranges(struct list_head *mem_list) 1214 + { 1215 + struct fuse_dax_mapping *range, *temp; 1216 + 1217 + /* Free All allocated elements */ 1218 + list_for_each_entry_safe(range, temp, mem_list, list) { 1219 + list_del(&range->list); 1220 + if (!list_empty(&range->busy_list)) 1221 + list_del(&range->busy_list); 1222 + kfree(range); 1223 + } 1224 + } 1225 + 1226 + void fuse_dax_conn_free(struct fuse_conn *fc) 1227 + { 1228 + if (fc->dax) { 1229 + fuse_free_dax_mem_ranges(&fc->dax->free_ranges); 1230 + kfree(fc->dax); 1231 + } 1232 + } 1233 + 1234 + static int fuse_dax_mem_range_init(struct fuse_conn_dax *fcd) 1235 + { 1236 + long nr_pages, nr_ranges; 1237 + void *kaddr; 1238 + pfn_t pfn; 1239 + struct fuse_dax_mapping *range; 1240 + int ret, id; 1241 + size_t dax_size = -1; 1242 + unsigned long i; 1243 + 1244 + init_waitqueue_head(&fcd->range_waitq); 1245 + INIT_LIST_HEAD(&fcd->free_ranges); 1246 + INIT_LIST_HEAD(&fcd->busy_ranges); 1247 + INIT_DELAYED_WORK(&fcd->free_work, fuse_dax_free_mem_worker); 1248 + 1249 + id = dax_read_lock(); 1250 + nr_pages = dax_direct_access(fcd->dev, 0, PHYS_PFN(dax_size), &kaddr, 1251 + &pfn); 1252 + dax_read_unlock(id); 1253 + if (nr_pages < 0) { 1254 + pr_debug("dax_direct_access() returned %ld\n", nr_pages); 1255 + return nr_pages; 1256 + } 1257 + 1258 + nr_ranges = nr_pages/FUSE_DAX_PAGES; 1259 + pr_debug("%s: dax mapped %ld pages. nr_ranges=%ld\n", 1260 + __func__, nr_pages, nr_ranges); 1261 + 1262 + for (i = 0; i < nr_ranges; i++) { 1263 + range = kzalloc(sizeof(struct fuse_dax_mapping), GFP_KERNEL); 1264 + ret = -ENOMEM; 1265 + if (!range) 1266 + goto out_err; 1267 + 1268 + /* TODO: This offset only works if virtio-fs driver is not 1269 + * having some memory hidden at the beginning. This needs 1270 + * better handling 1271 + */ 1272 + range->window_offset = i * FUSE_DAX_SZ; 1273 + range->length = FUSE_DAX_SZ; 1274 + INIT_LIST_HEAD(&range->busy_list); 1275 + refcount_set(&range->refcnt, 1); 1276 + list_add_tail(&range->list, &fcd->free_ranges); 1277 + } 1278 + 1279 + fcd->nr_free_ranges = nr_ranges; 1280 + fcd->nr_ranges = nr_ranges; 1281 + return 0; 1282 + out_err: 1283 + /* Free All allocated elements */ 1284 + fuse_free_dax_mem_ranges(&fcd->free_ranges); 1285 + return ret; 1286 + } 1287 + 1288 + int fuse_dax_conn_alloc(struct fuse_conn *fc, struct dax_device *dax_dev) 1289 + { 1290 + struct fuse_conn_dax *fcd; 1291 + int err; 1292 + 1293 + if (!dax_dev) 1294 + return 0; 1295 + 1296 + fcd = kzalloc(sizeof(*fcd), GFP_KERNEL); 1297 + if (!fcd) 1298 + return -ENOMEM; 1299 + 1300 + spin_lock_init(&fcd->lock); 1301 + fcd->dev = dax_dev; 1302 + err = fuse_dax_mem_range_init(fcd); 1303 + if (err) { 1304 + kfree(fcd); 1305 + return err; 1306 + } 1307 + 1308 + fc->dax = fcd; 1309 + return 0; 1310 + } 1311 + 1312 + bool fuse_dax_inode_alloc(struct super_block *sb, struct fuse_inode *fi) 1313 + { 1314 + struct fuse_conn *fc = get_fuse_conn_super(sb); 1315 + 1316 + fi->dax = NULL; 1317 + if (fc->dax) { 1318 + fi->dax = kzalloc(sizeof(*fi->dax), GFP_KERNEL_ACCOUNT); 1319 + if (!fi->dax) 1320 + return false; 1321 + 1322 + init_rwsem(&fi->dax->sem); 1323 + fi->dax->tree = RB_ROOT_CACHED; 1324 + } 1325 + 1326 + return true; 1327 + } 1328 + 1329 + static const struct address_space_operations fuse_dax_file_aops = { 1330 + .writepages = fuse_dax_writepages, 1331 + .direct_IO = noop_direct_IO, 1332 + .set_page_dirty = noop_set_page_dirty, 1333 + .invalidatepage = noop_invalidatepage, 1334 + }; 1335 + 1336 + void fuse_dax_inode_init(struct inode *inode) 1337 + { 1338 + struct fuse_conn *fc = get_fuse_conn(inode); 1339 + 1340 + if (!fc->dax) 1341 + return; 1342 + 1343 + inode->i_flags |= S_DAX; 1344 + inode->i_data.a_ops = &fuse_dax_file_aops; 1345 + } 1346 + 1347 + bool fuse_dax_check_alignment(struct fuse_conn *fc, unsigned int map_alignment) 1348 + { 1349 + if (fc->dax && (map_alignment > FUSE_DAX_SHIFT)) { 1350 + pr_warn("FUSE: map_alignment %u incompatible with dax mem range size %u\n", 1351 + map_alignment, FUSE_DAX_SZ); 1352 + return false; 1353 + } 1354 + return true; 1355 + } 1356 + 1357 + void fuse_dax_cancel_work(struct fuse_conn *fc) 1358 + { 1359 + struct fuse_conn_dax *fcd = fc->dax; 1360 + 1361 + if (fcd) 1362 + cancel_delayed_work_sync(&fcd->free_work); 1363 + 1364 + } 1365 + EXPORT_SYMBOL_GPL(fuse_dax_cancel_work);
+100 -89
fs/fuse/dev.c
··· 40 40 return READ_ONCE(file->private_data); 41 41 } 42 42 43 - static void fuse_request_init(struct fuse_req *req) 43 + static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req) 44 44 { 45 45 INIT_LIST_HEAD(&req->list); 46 46 INIT_LIST_HEAD(&req->intr_entry); 47 47 init_waitqueue_head(&req->waitq); 48 48 refcount_set(&req->count, 1); 49 49 __set_bit(FR_PENDING, &req->flags); 50 + req->fm = fm; 50 51 } 51 52 52 - static struct fuse_req *fuse_request_alloc(gfp_t flags) 53 + static struct fuse_req *fuse_request_alloc(struct fuse_mount *fm, gfp_t flags) 53 54 { 54 55 struct fuse_req *req = kmem_cache_zalloc(fuse_req_cachep, flags); 55 56 if (req) 56 - fuse_request_init(req); 57 + fuse_request_init(fm, req); 57 58 58 59 return req; 59 60 } ··· 101 100 } 102 101 } 103 102 104 - static void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req); 103 + static void fuse_put_request(struct fuse_req *req); 105 104 106 - static struct fuse_req *fuse_get_req(struct fuse_conn *fc, bool for_background) 105 + static struct fuse_req *fuse_get_req(struct fuse_mount *fm, bool for_background) 107 106 { 107 + struct fuse_conn *fc = fm->fc; 108 108 struct fuse_req *req; 109 109 int err; 110 110 atomic_inc(&fc->num_waiting); ··· 127 125 if (fc->conn_error) 128 126 goto out; 129 127 130 - req = fuse_request_alloc(GFP_KERNEL); 128 + req = fuse_request_alloc(fm, GFP_KERNEL); 131 129 err = -ENOMEM; 132 130 if (!req) { 133 131 if (for_background) ··· 145 143 146 144 if (unlikely(req->in.h.uid == ((uid_t)-1) || 147 145 req->in.h.gid == ((gid_t)-1))) { 148 - fuse_put_request(fc, req); 146 + fuse_put_request(req); 149 147 return ERR_PTR(-EOVERFLOW); 150 148 } 151 149 return req; ··· 155 153 return ERR_PTR(err); 156 154 } 157 155 158 - static void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) 156 + static void fuse_put_request(struct fuse_req *req) 159 157 { 158 + struct fuse_conn *fc = req->fm->fc; 159 + 160 160 if (refcount_dec_and_test(&req->count)) { 161 161 if (test_bit(FR_BACKGROUND, &req->flags)) { 162 162 /* ··· 277 273 * the 'end' callback is called if given, else the reference to the 278 274 * request is released 279 275 */ 280 - void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req) 276 + void fuse_request_end(struct fuse_req *req) 281 277 { 278 + struct fuse_mount *fm = req->fm; 279 + struct fuse_conn *fc = fm->fc; 282 280 struct fuse_iqueue *fiq = &fc->iq; 283 281 284 282 if (test_and_set_bit(FR_FINISHED, &req->flags)) ··· 315 309 wake_up(&fc->blocked_waitq); 316 310 } 317 311 318 - if (fc->num_background == fc->congestion_threshold && fc->sb) { 319 - clear_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC); 320 - clear_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC); 312 + if (fc->num_background == fc->congestion_threshold && fm->sb) { 313 + clear_bdi_congested(fm->sb->s_bdi, BLK_RW_SYNC); 314 + clear_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC); 321 315 } 322 316 fc->num_background--; 323 317 fc->active_background--; ··· 329 323 } 330 324 331 325 if (test_bit(FR_ASYNC, &req->flags)) 332 - req->args->end(fc, req->args, req->out.h.error); 326 + req->args->end(fm, req->args, req->out.h.error); 333 327 put_request: 334 - fuse_put_request(fc, req); 328 + fuse_put_request(req); 335 329 } 336 330 EXPORT_SYMBOL_GPL(fuse_request_end); 337 331 338 - static int queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req) 332 + static int queue_interrupt(struct fuse_req *req) 339 333 { 334 + struct fuse_iqueue *fiq = &req->fm->fc->iq; 335 + 340 336 spin_lock(&fiq->lock); 341 337 /* Check for we've sent request to interrupt this req */ 342 338 if (unlikely(!test_bit(FR_INTERRUPTED, &req->flags))) { ··· 365 357 return 0; 366 358 } 367 359 368 - static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) 360 + static void request_wait_answer(struct fuse_req *req) 369 361 { 362 + struct fuse_conn *fc = req->fm->fc; 370 363 struct fuse_iqueue *fiq = &fc->iq; 371 364 int err; 372 365 ··· 382 373 /* matches barrier in fuse_dev_do_read() */ 383 374 smp_mb__after_atomic(); 384 375 if (test_bit(FR_SENT, &req->flags)) 385 - queue_interrupt(fiq, req); 376 + queue_interrupt(req); 386 377 } 387 378 388 379 if (!test_bit(FR_FORCE, &req->flags)) { ··· 411 402 wait_event(req->waitq, test_bit(FR_FINISHED, &req->flags)); 412 403 } 413 404 414 - static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) 405 + static void __fuse_request_send(struct fuse_req *req) 415 406 { 416 - struct fuse_iqueue *fiq = &fc->iq; 407 + struct fuse_iqueue *fiq = &req->fm->fc->iq; 417 408 418 409 BUG_ON(test_bit(FR_BACKGROUND, &req->flags)); 419 410 spin_lock(&fiq->lock); ··· 427 418 __fuse_get_request(req); 428 419 queue_request_and_unlock(fiq, req); 429 420 430 - request_wait_answer(fc, req); 421 + request_wait_answer(req); 431 422 /* Pairs with smp_wmb() in fuse_request_end() */ 432 423 smp_rmb(); 433 424 } ··· 466 457 } 467 458 } 468 459 469 - static void fuse_force_creds(struct fuse_conn *fc, struct fuse_req *req) 460 + static void fuse_force_creds(struct fuse_req *req) 470 461 { 462 + struct fuse_conn *fc = req->fm->fc; 463 + 471 464 req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid()); 472 465 req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid()); 473 466 req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns); ··· 484 473 __set_bit(FR_ASYNC, &req->flags); 485 474 } 486 475 487 - ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args) 476 + ssize_t fuse_simple_request(struct fuse_mount *fm, struct fuse_args *args) 488 477 { 478 + struct fuse_conn *fc = fm->fc; 489 479 struct fuse_req *req; 490 480 ssize_t ret; 491 481 492 482 if (args->force) { 493 483 atomic_inc(&fc->num_waiting); 494 - req = fuse_request_alloc(GFP_KERNEL | __GFP_NOFAIL); 484 + req = fuse_request_alloc(fm, GFP_KERNEL | __GFP_NOFAIL); 495 485 496 486 if (!args->nocreds) 497 - fuse_force_creds(fc, req); 487 + fuse_force_creds(req); 498 488 499 489 __set_bit(FR_WAITING, &req->flags); 500 490 __set_bit(FR_FORCE, &req->flags); 501 491 } else { 502 492 WARN_ON(args->nocreds); 503 - req = fuse_get_req(fc, false); 493 + req = fuse_get_req(fm, false); 504 494 if (IS_ERR(req)) 505 495 return PTR_ERR(req); 506 496 } ··· 512 500 513 501 if (!args->noreply) 514 502 __set_bit(FR_ISREPLY, &req->flags); 515 - __fuse_request_send(fc, req); 503 + __fuse_request_send(req); 516 504 ret = req->out.h.error; 517 505 if (!ret && args->out_argvar) { 518 506 BUG_ON(args->out_numargs == 0); 519 507 ret = args->out_args[args->out_numargs - 1].size; 520 508 } 521 - fuse_put_request(fc, req); 509 + fuse_put_request(req); 522 510 523 511 return ret; 524 512 } 525 513 526 - static bool fuse_request_queue_background(struct fuse_conn *fc, 527 - struct fuse_req *req) 514 + static bool fuse_request_queue_background(struct fuse_req *req) 528 515 { 516 + struct fuse_mount *fm = req->fm; 517 + struct fuse_conn *fc = fm->fc; 529 518 bool queued = false; 530 519 531 520 WARN_ON(!test_bit(FR_BACKGROUND, &req->flags)); ··· 540 527 fc->num_background++; 541 528 if (fc->num_background == fc->max_background) 542 529 fc->blocked = 1; 543 - if (fc->num_background == fc->congestion_threshold && fc->sb) { 544 - set_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC); 545 - set_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC); 530 + if (fc->num_background == fc->congestion_threshold && fm->sb) { 531 + set_bdi_congested(fm->sb->s_bdi, BLK_RW_SYNC); 532 + set_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC); 546 533 } 547 534 list_add_tail(&req->list, &fc->bg_queue); 548 535 flush_bg_queue(fc); ··· 553 540 return queued; 554 541 } 555 542 556 - int fuse_simple_background(struct fuse_conn *fc, struct fuse_args *args, 543 + int fuse_simple_background(struct fuse_mount *fm, struct fuse_args *args, 557 544 gfp_t gfp_flags) 558 545 { 559 546 struct fuse_req *req; 560 547 561 548 if (args->force) { 562 549 WARN_ON(!args->nocreds); 563 - req = fuse_request_alloc(gfp_flags); 550 + req = fuse_request_alloc(fm, gfp_flags); 564 551 if (!req) 565 552 return -ENOMEM; 566 553 __set_bit(FR_BACKGROUND, &req->flags); 567 554 } else { 568 555 WARN_ON(args->nocreds); 569 - req = fuse_get_req(fc, true); 556 + req = fuse_get_req(fm, true); 570 557 if (IS_ERR(req)) 571 558 return PTR_ERR(req); 572 559 } 573 560 574 561 fuse_args_to_req(req, args); 575 562 576 - if (!fuse_request_queue_background(fc, req)) { 577 - fuse_put_request(fc, req); 563 + if (!fuse_request_queue_background(req)) { 564 + fuse_put_request(req); 578 565 return -ENOTCONN; 579 566 } 580 567 ··· 582 569 } 583 570 EXPORT_SYMBOL_GPL(fuse_simple_background); 584 571 585 - static int fuse_simple_notify_reply(struct fuse_conn *fc, 572 + static int fuse_simple_notify_reply(struct fuse_mount *fm, 586 573 struct fuse_args *args, u64 unique) 587 574 { 588 575 struct fuse_req *req; 589 - struct fuse_iqueue *fiq = &fc->iq; 576 + struct fuse_iqueue *fiq = &fm->fc->iq; 590 577 int err = 0; 591 578 592 - req = fuse_get_req(fc, false); 579 + req = fuse_get_req(fm, false); 593 580 if (IS_ERR(req)) 594 581 return PTR_ERR(req); 595 582 ··· 604 591 } else { 605 592 err = -ENODEV; 606 593 spin_unlock(&fiq->lock); 607 - fuse_put_request(fc, req); 594 + fuse_put_request(req); 608 595 } 609 596 610 597 return err; ··· 798 785 struct page *newpage; 799 786 struct pipe_buffer *buf = cs->pipebufs; 800 787 788 + get_page(oldpage); 801 789 err = unlock_request(cs->req); 802 790 if (err) 803 - return err; 791 + goto out_put_old; 804 792 805 793 fuse_copy_finish(cs); 806 794 807 795 err = pipe_buf_confirm(cs->pipe, buf); 808 796 if (err) 809 - return err; 797 + goto out_put_old; 810 798 811 799 BUG_ON(!cs->nr_segs); 812 800 cs->currbuf = buf; ··· 847 833 err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL); 848 834 if (err) { 849 835 unlock_page(newpage); 850 - return err; 836 + goto out_put_old; 851 837 } 852 838 853 839 get_page(newpage); ··· 866 852 if (err) { 867 853 unlock_page(newpage); 868 854 put_page(newpage); 869 - return err; 855 + goto out_put_old; 870 856 } 871 857 872 858 unlock_page(oldpage); 859 + /* Drop ref for ap->pages[] array */ 873 860 put_page(oldpage); 874 861 cs->len = 0; 875 862 876 - return 0; 863 + err = 0; 864 + out_put_old: 865 + /* Drop ref obtained in this function */ 866 + put_page(oldpage); 867 + return err; 877 868 878 869 out_fallback_unlock: 879 870 unlock_page(newpage); ··· 887 868 cs->offset = buf->offset; 888 869 889 870 err = lock_request(cs->req); 890 - if (err) 891 - return err; 871 + if (!err) 872 + err = 1; 892 873 893 - return 1; 874 + goto out_put_old; 894 875 } 895 876 896 877 static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page, ··· 902 883 if (cs->nr_segs >= cs->pipe->max_usage) 903 884 return -EIO; 904 885 886 + get_page(page); 905 887 err = unlock_request(cs->req); 906 - if (err) 888 + if (err) { 889 + put_page(page); 907 890 return err; 891 + } 908 892 909 893 fuse_copy_finish(cs); 910 894 911 895 buf = cs->pipebufs; 912 - get_page(page); 913 896 buf->page = page; 914 897 buf->offset = offset; 915 898 buf->len = count; ··· 1271 1250 /* SETXATTR is special, since it may contain too large data */ 1272 1251 if (args->opcode == FUSE_SETXATTR) 1273 1252 req->out.h.error = -E2BIG; 1274 - fuse_request_end(fc, req); 1253 + fuse_request_end(req); 1275 1254 goto restart; 1276 1255 } 1277 1256 spin_lock(&fpq->lock); ··· 1305 1284 /* matches barrier in request_wait_answer() */ 1306 1285 smp_mb__after_atomic(); 1307 1286 if (test_bit(FR_INTERRUPTED, &req->flags)) 1308 - queue_interrupt(fiq, req); 1309 - fuse_put_request(fc, req); 1287 + queue_interrupt(req); 1288 + fuse_put_request(req); 1310 1289 1311 1290 return reqsize; 1312 1291 ··· 1314 1293 if (!test_bit(FR_PRIVATE, &req->flags)) 1315 1294 list_del_init(&req->list); 1316 1295 spin_unlock(&fpq->lock); 1317 - fuse_request_end(fc, req); 1296 + fuse_request_end(req); 1318 1297 return err; 1319 1298 1320 1299 err_unlock: ··· 1437 1416 fuse_copy_finish(cs); 1438 1417 1439 1418 down_read(&fc->killsb); 1440 - err = -ENOENT; 1441 - if (fc->sb) { 1442 - err = fuse_reverse_inval_inode(fc->sb, outarg.ino, 1443 - outarg.off, outarg.len); 1444 - } 1419 + err = fuse_reverse_inval_inode(fc, outarg.ino, 1420 + outarg.off, outarg.len); 1445 1421 up_read(&fc->killsb); 1446 1422 return err; 1447 1423 ··· 1484 1466 buf[outarg.namelen] = 0; 1485 1467 1486 1468 down_read(&fc->killsb); 1487 - err = -ENOENT; 1488 - if (fc->sb) 1489 - err = fuse_reverse_inval_entry(fc->sb, outarg.parent, 0, &name); 1469 + err = fuse_reverse_inval_entry(fc, outarg.parent, 0, &name); 1490 1470 up_read(&fc->killsb); 1491 1471 kfree(buf); 1492 1472 return err; ··· 1532 1516 buf[outarg.namelen] = 0; 1533 1517 1534 1518 down_read(&fc->killsb); 1535 - err = -ENOENT; 1536 - if (fc->sb) 1537 - err = fuse_reverse_inval_entry(fc->sb, outarg.parent, 1538 - outarg.child, &name); 1519 + err = fuse_reverse_inval_entry(fc, outarg.parent, outarg.child, &name); 1539 1520 up_read(&fc->killsb); 1540 1521 kfree(buf); 1541 1522 return err; ··· 1574 1561 down_read(&fc->killsb); 1575 1562 1576 1563 err = -ENOENT; 1577 - if (!fc->sb) 1578 - goto out_up_killsb; 1579 - 1580 - inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid); 1564 + inode = fuse_ilookup(fc, nodeid, NULL); 1581 1565 if (!inode) 1582 1566 goto out_up_killsb; 1583 1567 ··· 1631 1621 struct fuse_notify_retrieve_in inarg; 1632 1622 }; 1633 1623 1634 - static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_args *args, 1624 + static void fuse_retrieve_end(struct fuse_mount *fm, struct fuse_args *args, 1635 1625 int error) 1636 1626 { 1637 1627 struct fuse_retrieve_args *ra = ··· 1641 1631 kfree(ra); 1642 1632 } 1643 1633 1644 - static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, 1634 + static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode, 1645 1635 struct fuse_notify_retrieve_out *outarg) 1646 1636 { 1647 1637 int err; ··· 1652 1642 unsigned int offset; 1653 1643 size_t total_len = 0; 1654 1644 unsigned int num_pages; 1645 + struct fuse_conn *fc = fm->fc; 1655 1646 struct fuse_retrieve_args *ra; 1656 1647 size_t args_size = sizeof(*ra); 1657 1648 struct fuse_args_pages *ap; ··· 1714 1703 args->in_args[0].value = &ra->inarg; 1715 1704 args->in_args[1].size = total_len; 1716 1705 1717 - err = fuse_simple_notify_reply(fc, args, outarg->notify_unique); 1706 + err = fuse_simple_notify_reply(fm, args, outarg->notify_unique); 1718 1707 if (err) 1719 - fuse_retrieve_end(fc, args, err); 1708 + fuse_retrieve_end(fm, args, err); 1720 1709 1721 1710 return err; 1722 1711 } ··· 1725 1714 struct fuse_copy_state *cs) 1726 1715 { 1727 1716 struct fuse_notify_retrieve_out outarg; 1717 + struct fuse_mount *fm; 1728 1718 struct inode *inode; 1719 + u64 nodeid; 1729 1720 int err; 1730 1721 1731 1722 err = -EINVAL; ··· 1742 1729 1743 1730 down_read(&fc->killsb); 1744 1731 err = -ENOENT; 1745 - if (fc->sb) { 1746 - u64 nodeid = outarg.nodeid; 1732 + nodeid = outarg.nodeid; 1747 1733 1748 - inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid); 1749 - if (inode) { 1750 - err = fuse_retrieve(fc, inode, &outarg); 1751 - iput(inode); 1752 - } 1734 + inode = fuse_ilookup(fc, nodeid, &fm); 1735 + if (inode) { 1736 + err = fuse_retrieve(fm, inode, &outarg); 1737 + iput(inode); 1753 1738 } 1754 1739 up_read(&fc->killsb); 1755 1740 ··· 1886 1875 else if (oh.error == -ENOSYS) 1887 1876 fc->no_interrupt = 1; 1888 1877 else if (oh.error == -EAGAIN) 1889 - err = queue_interrupt(&fc->iq, req); 1878 + err = queue_interrupt(req); 1890 1879 1891 - fuse_put_request(fc, req); 1880 + fuse_put_request(req); 1892 1881 1893 1882 goto copy_finish; 1894 1883 } ··· 1918 1907 list_del_init(&req->list); 1919 1908 spin_unlock(&fpq->lock); 1920 1909 1921 - fuse_request_end(fc, req); 1910 + fuse_request_end(req); 1922 1911 out: 1923 1912 return err ? err : nbytes; 1924 1913 ··· 2056 2045 } 2057 2046 2058 2047 /* Abort all requests on the given list (pending or processing) */ 2059 - static void end_requests(struct fuse_conn *fc, struct list_head *head) 2048 + static void end_requests(struct list_head *head) 2060 2049 { 2061 2050 while (!list_empty(head)) { 2062 2051 struct fuse_req *req; ··· 2064 2053 req->out.h.error = -ECONNABORTED; 2065 2054 clear_bit(FR_SENT, &req->flags); 2066 2055 list_del_init(&req->list); 2067 - fuse_request_end(fc, req); 2056 + fuse_request_end(req); 2068 2057 } 2069 2058 } 2070 2059 ··· 2159 2148 wake_up_all(&fc->blocked_waitq); 2160 2149 spin_unlock(&fc->lock); 2161 2150 2162 - end_requests(fc, &to_end); 2151 + end_requests(&to_end); 2163 2152 } else { 2164 2153 spin_unlock(&fc->lock); 2165 2154 } ··· 2189 2178 list_splice_init(&fpq->processing[i], &to_end); 2190 2179 spin_unlock(&fpq->lock); 2191 2180 2192 - end_requests(fc, &to_end); 2181 + end_requests(&to_end); 2193 2182 2194 2183 /* Are we the last open device? */ 2195 2184 if (atomic_dec_and_test(&fc->dev_count)) {
+158 -62
fs/fuse/dir.c
··· 10 10 11 11 #include <linux/pagemap.h> 12 12 #include <linux/file.h> 13 + #include <linux/fs_context.h> 13 14 #include <linux/sched.h> 14 15 #include <linux/namei.h> 15 16 #include <linux/slab.h> ··· 197 196 { 198 197 struct inode *inode; 199 198 struct dentry *parent; 200 - struct fuse_conn *fc; 199 + struct fuse_mount *fm; 201 200 struct fuse_inode *fi; 202 201 int ret; 203 202 ··· 219 218 if (flags & LOOKUP_RCU) 220 219 goto out; 221 220 222 - fc = get_fuse_conn(inode); 221 + fm = get_fuse_mount(inode); 223 222 224 223 forget = fuse_alloc_forget(); 225 224 ret = -ENOMEM; 226 225 if (!forget) 227 226 goto out; 228 227 229 - attr_version = fuse_get_attr_version(fc); 228 + attr_version = fuse_get_attr_version(fm->fc); 230 229 231 230 parent = dget_parent(entry); 232 - fuse_lookup_init(fc, &args, get_node_id(d_inode(parent)), 231 + fuse_lookup_init(fm->fc, &args, get_node_id(d_inode(parent)), 233 232 &entry->d_name, &outarg); 234 - ret = fuse_simple_request(fc, &args); 233 + ret = fuse_simple_request(fm, &args); 235 234 dput(parent); 236 235 /* Zero nodeid is same as -ENOENT */ 237 236 if (!ret && !outarg.nodeid) 238 237 ret = -ENOENT; 239 238 if (!ret) { 240 239 fi = get_fuse_inode(inode); 241 - if (outarg.nodeid != get_node_id(inode)) { 242 - fuse_queue_forget(fc, forget, outarg.nodeid, 1); 240 + if (outarg.nodeid != get_node_id(inode) || 241 + (bool) IS_AUTOMOUNT(inode) != (bool) (outarg.attr.flags & FUSE_ATTR_SUBMOUNT)) { 242 + fuse_queue_forget(fm->fc, forget, 243 + outarg.nodeid, 1); 243 244 goto invalid; 244 245 } 245 246 spin_lock(&fi->lock); ··· 301 298 return time_before64(fuse_dentry_time(dentry), get_jiffies_64()); 302 299 } 303 300 301 + /* 302 + * Create a fuse_mount object with a new superblock (with path->dentry 303 + * as the root), and return that mount so it can be auto-mounted on 304 + * @path. 305 + */ 306 + static struct vfsmount *fuse_dentry_automount(struct path *path) 307 + { 308 + struct fs_context *fsc; 309 + struct fuse_mount *parent_fm = get_fuse_mount_super(path->mnt->mnt_sb); 310 + struct fuse_conn *fc = parent_fm->fc; 311 + struct fuse_mount *fm; 312 + struct vfsmount *mnt; 313 + struct fuse_inode *mp_fi = get_fuse_inode(d_inode(path->dentry)); 314 + struct super_block *sb; 315 + int err; 316 + 317 + fsc = fs_context_for_submount(path->mnt->mnt_sb->s_type, path->dentry); 318 + if (IS_ERR(fsc)) { 319 + err = PTR_ERR(fsc); 320 + goto out; 321 + } 322 + 323 + err = -ENOMEM; 324 + fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL); 325 + if (!fm) 326 + goto out_put_fsc; 327 + 328 + refcount_set(&fm->count, 1); 329 + fsc->s_fs_info = fm; 330 + sb = sget_fc(fsc, NULL, set_anon_super_fc); 331 + if (IS_ERR(sb)) { 332 + err = PTR_ERR(sb); 333 + fuse_mount_put(fm); 334 + goto out_put_fsc; 335 + } 336 + fm->fc = fuse_conn_get(fc); 337 + 338 + /* Initialize superblock, making @mp_fi its root */ 339 + err = fuse_fill_super_submount(sb, mp_fi); 340 + if (err) 341 + goto out_put_sb; 342 + 343 + sb->s_flags |= SB_ACTIVE; 344 + fsc->root = dget(sb->s_root); 345 + /* We are done configuring the superblock, so unlock it */ 346 + up_write(&sb->s_umount); 347 + 348 + down_write(&fc->killsb); 349 + list_add_tail(&fm->fc_entry, &fc->mounts); 350 + up_write(&fc->killsb); 351 + 352 + /* Create the submount */ 353 + mnt = vfs_create_mount(fsc); 354 + if (IS_ERR(mnt)) { 355 + err = PTR_ERR(mnt); 356 + goto out_put_fsc; 357 + } 358 + mntget(mnt); 359 + put_fs_context(fsc); 360 + return mnt; 361 + 362 + out_put_sb: 363 + /* 364 + * Only jump here when fsc->root is NULL and sb is still locked 365 + * (otherwise put_fs_context() will put the superblock) 366 + */ 367 + deactivate_locked_super(sb); 368 + out_put_fsc: 369 + put_fs_context(fsc); 370 + out: 371 + return ERR_PTR(err); 372 + } 373 + 304 374 const struct dentry_operations fuse_dentry_operations = { 305 375 .d_revalidate = fuse_dentry_revalidate, 306 376 .d_delete = fuse_dentry_delete, ··· 381 305 .d_init = fuse_dentry_init, 382 306 .d_release = fuse_dentry_release, 383 307 #endif 308 + .d_automount = fuse_dentry_automount, 384 309 }; 385 310 386 311 const struct dentry_operations fuse_root_dentry_operations = { ··· 406 329 int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name, 407 330 struct fuse_entry_out *outarg, struct inode **inode) 408 331 { 409 - struct fuse_conn *fc = get_fuse_conn_super(sb); 332 + struct fuse_mount *fm = get_fuse_mount_super(sb); 410 333 FUSE_ARGS(args); 411 334 struct fuse_forget_link *forget; 412 335 u64 attr_version; ··· 423 346 if (!forget) 424 347 goto out; 425 348 426 - attr_version = fuse_get_attr_version(fc); 349 + attr_version = fuse_get_attr_version(fm->fc); 427 350 428 - fuse_lookup_init(fc, &args, nodeid, name, outarg); 429 - err = fuse_simple_request(fc, &args); 351 + fuse_lookup_init(fm->fc, &args, nodeid, name, outarg); 352 + err = fuse_simple_request(fm, &args); 430 353 /* Zero nodeid is same as -ENOENT, but with valid timeout */ 431 354 if (err || !outarg->nodeid) 432 355 goto out_put_forget; ··· 442 365 attr_version); 443 366 err = -ENOMEM; 444 367 if (!*inode) { 445 - fuse_queue_forget(fc, forget, outarg->nodeid, 1); 368 + fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1); 446 369 goto out; 447 370 } 448 371 err = 0; ··· 511 434 { 512 435 int err; 513 436 struct inode *inode; 514 - struct fuse_conn *fc = get_fuse_conn(dir); 437 + struct fuse_mount *fm = get_fuse_mount(dir); 515 438 FUSE_ARGS(args); 516 439 struct fuse_forget_link *forget; 517 440 struct fuse_create_in inarg; ··· 529 452 goto out_err; 530 453 531 454 err = -ENOMEM; 532 - ff = fuse_file_alloc(fc); 455 + ff = fuse_file_alloc(fm); 533 456 if (!ff) 534 457 goto out_put_forget_req; 535 458 536 - if (!fc->dont_mask) 459 + if (!fm->fc->dont_mask) 537 460 mode &= ~current_umask(); 538 461 539 462 flags &= ~O_NOCTTY; ··· 554 477 args.out_args[0].value = &outentry; 555 478 args.out_args[1].size = sizeof(outopen); 556 479 args.out_args[1].value = &outopen; 557 - err = fuse_simple_request(fc, &args); 480 + err = fuse_simple_request(fm, &args); 558 481 if (err) 559 482 goto out_free_ff; 560 483 ··· 571 494 if (!inode) { 572 495 flags &= ~(O_CREAT | O_EXCL | O_TRUNC); 573 496 fuse_sync_release(NULL, ff, flags); 574 - fuse_queue_forget(fc, forget, outentry.nodeid, 1); 497 + fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1); 575 498 err = -ENOMEM; 576 499 goto out_err; 577 500 } ··· 644 567 /* 645 568 * Code shared between mknod, mkdir, symlink and link 646 569 */ 647 - static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args, 570 + static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args, 648 571 struct inode *dir, struct dentry *entry, 649 572 umode_t mode) 650 573 { ··· 663 586 args->out_numargs = 1; 664 587 args->out_args[0].size = sizeof(outarg); 665 588 args->out_args[0].value = &outarg; 666 - err = fuse_simple_request(fc, args); 589 + err = fuse_simple_request(fm, args); 667 590 if (err) 668 591 goto out_put_forget_req; 669 592 ··· 677 600 inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, 678 601 &outarg.attr, entry_attr_timeout(&outarg), 0); 679 602 if (!inode) { 680 - fuse_queue_forget(fc, forget, outarg.nodeid, 1); 603 + fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1); 681 604 return -ENOMEM; 682 605 } 683 606 kfree(forget); ··· 705 628 dev_t rdev) 706 629 { 707 630 struct fuse_mknod_in inarg; 708 - struct fuse_conn *fc = get_fuse_conn(dir); 631 + struct fuse_mount *fm = get_fuse_mount(dir); 709 632 FUSE_ARGS(args); 710 633 711 - if (!fc->dont_mask) 634 + if (!fm->fc->dont_mask) 712 635 mode &= ~current_umask(); 713 636 714 637 memset(&inarg, 0, sizeof(inarg)); ··· 721 644 args.in_args[0].value = &inarg; 722 645 args.in_args[1].size = entry->d_name.len + 1; 723 646 args.in_args[1].value = entry->d_name.name; 724 - return create_new_entry(fc, &args, dir, entry, mode); 647 + return create_new_entry(fm, &args, dir, entry, mode); 725 648 } 726 649 727 650 static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode, ··· 733 656 static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode) 734 657 { 735 658 struct fuse_mkdir_in inarg; 736 - struct fuse_conn *fc = get_fuse_conn(dir); 659 + struct fuse_mount *fm = get_fuse_mount(dir); 737 660 FUSE_ARGS(args); 738 661 739 - if (!fc->dont_mask) 662 + if (!fm->fc->dont_mask) 740 663 mode &= ~current_umask(); 741 664 742 665 memset(&inarg, 0, sizeof(inarg)); ··· 748 671 args.in_args[0].value = &inarg; 749 672 args.in_args[1].size = entry->d_name.len + 1; 750 673 args.in_args[1].value = entry->d_name.name; 751 - return create_new_entry(fc, &args, dir, entry, S_IFDIR); 674 + return create_new_entry(fm, &args, dir, entry, S_IFDIR); 752 675 } 753 676 754 677 static int fuse_symlink(struct inode *dir, struct dentry *entry, 755 678 const char *link) 756 679 { 757 - struct fuse_conn *fc = get_fuse_conn(dir); 680 + struct fuse_mount *fm = get_fuse_mount(dir); 758 681 unsigned len = strlen(link) + 1; 759 682 FUSE_ARGS(args); 760 683 ··· 764 687 args.in_args[0].value = entry->d_name.name; 765 688 args.in_args[1].size = len; 766 689 args.in_args[1].value = link; 767 - return create_new_entry(fc, &args, dir, entry, S_IFLNK); 690 + return create_new_entry(fm, &args, dir, entry, S_IFLNK); 768 691 } 769 692 770 693 void fuse_update_ctime(struct inode *inode) ··· 778 701 static int fuse_unlink(struct inode *dir, struct dentry *entry) 779 702 { 780 703 int err; 781 - struct fuse_conn *fc = get_fuse_conn(dir); 704 + struct fuse_mount *fm = get_fuse_mount(dir); 782 705 FUSE_ARGS(args); 783 706 784 707 args.opcode = FUSE_UNLINK; ··· 786 709 args.in_numargs = 1; 787 710 args.in_args[0].size = entry->d_name.len + 1; 788 711 args.in_args[0].value = entry->d_name.name; 789 - err = fuse_simple_request(fc, &args); 712 + err = fuse_simple_request(fm, &args); 790 713 if (!err) { 791 714 struct inode *inode = d_inode(entry); 792 715 struct fuse_inode *fi = get_fuse_inode(inode); 793 716 794 717 spin_lock(&fi->lock); 795 - fi->attr_version = atomic64_inc_return(&fc->attr_version); 718 + fi->attr_version = atomic64_inc_return(&fm->fc->attr_version); 796 719 /* 797 720 * If i_nlink == 0 then unlink doesn't make sense, yet this can 798 721 * happen if userspace filesystem is careless. It would be ··· 814 737 static int fuse_rmdir(struct inode *dir, struct dentry *entry) 815 738 { 816 739 int err; 817 - struct fuse_conn *fc = get_fuse_conn(dir); 740 + struct fuse_mount *fm = get_fuse_mount(dir); 818 741 FUSE_ARGS(args); 819 742 820 743 args.opcode = FUSE_RMDIR; ··· 822 745 args.in_numargs = 1; 823 746 args.in_args[0].size = entry->d_name.len + 1; 824 747 args.in_args[0].value = entry->d_name.name; 825 - err = fuse_simple_request(fc, &args); 748 + err = fuse_simple_request(fm, &args); 826 749 if (!err) { 827 750 clear_nlink(d_inode(entry)); 828 751 fuse_dir_changed(dir); ··· 838 761 { 839 762 int err; 840 763 struct fuse_rename2_in inarg; 841 - struct fuse_conn *fc = get_fuse_conn(olddir); 764 + struct fuse_mount *fm = get_fuse_mount(olddir); 842 765 FUSE_ARGS(args); 843 766 844 767 memset(&inarg, 0, argsize); ··· 853 776 args.in_args[1].value = oldent->d_name.name; 854 777 args.in_args[2].size = newent->d_name.len + 1; 855 778 args.in_args[2].value = newent->d_name.name; 856 - err = fuse_simple_request(fc, &args); 779 + err = fuse_simple_request(fm, &args); 857 780 if (!err) { 858 781 /* ctime changes */ 859 782 fuse_invalidate_attr(d_inode(oldent)); ··· 924 847 int err; 925 848 struct fuse_link_in inarg; 926 849 struct inode *inode = d_inode(entry); 927 - struct fuse_conn *fc = get_fuse_conn(inode); 850 + struct fuse_mount *fm = get_fuse_mount(inode); 928 851 FUSE_ARGS(args); 929 852 930 853 memset(&inarg, 0, sizeof(inarg)); ··· 935 858 args.in_args[0].value = &inarg; 936 859 args.in_args[1].size = newent->d_name.len + 1; 937 860 args.in_args[1].value = newent->d_name.name; 938 - err = create_new_entry(fc, &args, newdir, newent, inode->i_mode); 861 + err = create_new_entry(fm, &args, newdir, newent, inode->i_mode); 939 862 /* Contrary to "normal" filesystems it can happen that link 940 863 makes two "logical" inodes point to the same "physical" 941 864 inode. We invalidate the attributes of the old one, so it ··· 946 869 struct fuse_inode *fi = get_fuse_inode(inode); 947 870 948 871 spin_lock(&fi->lock); 949 - fi->attr_version = atomic64_inc_return(&fc->attr_version); 872 + fi->attr_version = atomic64_inc_return(&fm->fc->attr_version); 950 873 if (likely(inode->i_nlink < UINT_MAX)) 951 874 inc_nlink(inode); 952 875 spin_unlock(&fi->lock); ··· 1003 926 int err; 1004 927 struct fuse_getattr_in inarg; 1005 928 struct fuse_attr_out outarg; 1006 - struct fuse_conn *fc = get_fuse_conn(inode); 929 + struct fuse_mount *fm = get_fuse_mount(inode); 1007 930 FUSE_ARGS(args); 1008 931 u64 attr_version; 1009 932 1010 - attr_version = fuse_get_attr_version(fc); 933 + attr_version = fuse_get_attr_version(fm->fc); 1011 934 1012 935 memset(&inarg, 0, sizeof(inarg)); 1013 936 memset(&outarg, 0, sizeof(outarg)); ··· 1026 949 args.out_numargs = 1; 1027 950 args.out_args[0].size = sizeof(outarg); 1028 951 args.out_args[0].value = &outarg; 1029 - err = fuse_simple_request(fc, &args); 952 + err = fuse_simple_request(fm, &args); 1030 953 if (!err) { 1031 954 if (fuse_invalid_attr(&outarg.attr) || 1032 955 (inode->i_mode ^ outarg.attr.mode) & S_IFMT) { ··· 1079 1002 STATX_BASIC_STATS & ~STATX_ATIME, 0); 1080 1003 } 1081 1004 1082 - int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, 1005 + int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid, 1083 1006 u64 child_nodeid, struct qstr *name) 1084 1007 { 1085 1008 int err = -ENOTDIR; ··· 1087 1010 struct dentry *dir; 1088 1011 struct dentry *entry; 1089 1012 1090 - parent = ilookup5(sb, parent_nodeid, fuse_inode_eq, &parent_nodeid); 1013 + parent = fuse_ilookup(fc, parent_nodeid, NULL); 1091 1014 if (!parent) 1092 1015 return -ENOENT; 1093 1016 ··· 1179 1102 1180 1103 static int fuse_access(struct inode *inode, int mask) 1181 1104 { 1182 - struct fuse_conn *fc = get_fuse_conn(inode); 1105 + struct fuse_mount *fm = get_fuse_mount(inode); 1183 1106 FUSE_ARGS(args); 1184 1107 struct fuse_access_in inarg; 1185 1108 int err; 1186 1109 1187 1110 BUG_ON(mask & MAY_NOT_BLOCK); 1188 1111 1189 - if (fc->no_access) 1112 + if (fm->fc->no_access) 1190 1113 return 0; 1191 1114 1192 1115 memset(&inarg, 0, sizeof(inarg)); ··· 1196 1119 args.in_numargs = 1; 1197 1120 args.in_args[0].size = sizeof(inarg); 1198 1121 args.in_args[0].value = &inarg; 1199 - err = fuse_simple_request(fc, &args); 1122 + err = fuse_simple_request(fm, &args); 1200 1123 if (err == -ENOSYS) { 1201 - fc->no_access = 1; 1124 + fm->fc->no_access = 1; 1202 1125 err = 0; 1203 1126 } 1204 1127 return err; ··· 1286 1209 1287 1210 static int fuse_readlink_page(struct inode *inode, struct page *page) 1288 1211 { 1289 - struct fuse_conn *fc = get_fuse_conn(inode); 1212 + struct fuse_mount *fm = get_fuse_mount(inode); 1290 1213 struct fuse_page_desc desc = { .length = PAGE_SIZE - 1 }; 1291 1214 struct fuse_args_pages ap = { 1292 1215 .num_pages = 1, ··· 1303 1226 ap.args.page_zeroing = true; 1304 1227 ap.args.out_numargs = 1; 1305 1228 ap.args.out_args[0].size = desc.length; 1306 - res = fuse_simple_request(fc, &ap.args); 1229 + res = fuse_simple_request(fm, &ap.args); 1307 1230 1308 1231 fuse_invalidate_atime(inode); 1309 1232 ··· 1531 1454 */ 1532 1455 int fuse_flush_times(struct inode *inode, struct fuse_file *ff) 1533 1456 { 1534 - struct fuse_conn *fc = get_fuse_conn(inode); 1457 + struct fuse_mount *fm = get_fuse_mount(inode); 1535 1458 FUSE_ARGS(args); 1536 1459 struct fuse_setattr_in inarg; 1537 1460 struct fuse_attr_out outarg; ··· 1542 1465 inarg.valid = FATTR_MTIME; 1543 1466 inarg.mtime = inode->i_mtime.tv_sec; 1544 1467 inarg.mtimensec = inode->i_mtime.tv_nsec; 1545 - if (fc->minor >= 23) { 1468 + if (fm->fc->minor >= 23) { 1546 1469 inarg.valid |= FATTR_CTIME; 1547 1470 inarg.ctime = inode->i_ctime.tv_sec; 1548 1471 inarg.ctimensec = inode->i_ctime.tv_nsec; ··· 1551 1474 inarg.valid |= FATTR_FH; 1552 1475 inarg.fh = ff->fh; 1553 1476 } 1554 - fuse_setattr_fill(fc, &args, inode, &inarg, &outarg); 1477 + fuse_setattr_fill(fm->fc, &args, inode, &inarg, &outarg); 1555 1478 1556 - return fuse_simple_request(fc, &args); 1479 + return fuse_simple_request(fm, &args); 1557 1480 } 1558 1481 1559 1482 /* ··· 1568 1491 struct file *file) 1569 1492 { 1570 1493 struct inode *inode = d_inode(dentry); 1571 - struct fuse_conn *fc = get_fuse_conn(inode); 1494 + struct fuse_mount *fm = get_fuse_mount(inode); 1495 + struct fuse_conn *fc = fm->fc; 1572 1496 struct fuse_inode *fi = get_fuse_inode(inode); 1573 1497 FUSE_ARGS(args); 1574 1498 struct fuse_setattr_in inarg; ··· 1579 1501 loff_t oldsize; 1580 1502 int err; 1581 1503 bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode); 1504 + bool fault_blocked = false; 1582 1505 1583 1506 if (!fc->default_permissions) 1584 1507 attr->ia_valid |= ATTR_FORCE; ··· 1587 1508 err = setattr_prepare(dentry, attr); 1588 1509 if (err) 1589 1510 return err; 1511 + 1512 + if (attr->ia_valid & ATTR_SIZE) { 1513 + if (WARN_ON(!S_ISREG(inode->i_mode))) 1514 + return -EIO; 1515 + is_truncate = true; 1516 + } 1517 + 1518 + if (FUSE_IS_DAX(inode) && is_truncate) { 1519 + down_write(&fi->i_mmap_sem); 1520 + fault_blocked = true; 1521 + err = fuse_dax_break_layouts(inode, 0, 0); 1522 + if (err) { 1523 + up_write(&fi->i_mmap_sem); 1524 + return err; 1525 + } 1526 + } 1590 1527 1591 1528 if (attr->ia_valid & ATTR_OPEN) { 1592 1529 /* This is coming from open(..., ... | O_TRUNC); */ ··· 1616 1521 */ 1617 1522 i_size_write(inode, 0); 1618 1523 truncate_pagecache(inode, 0); 1619 - return 0; 1524 + goto out; 1620 1525 } 1621 1526 file = NULL; 1622 - } 1623 - 1624 - if (attr->ia_valid & ATTR_SIZE) { 1625 - if (WARN_ON(!S_ISREG(inode->i_mode))) 1626 - return -EIO; 1627 - is_truncate = true; 1628 1527 } 1629 1528 1630 1529 /* Flush dirty data/metadata before non-truncate SETATTR */ ··· 1655 1566 inarg.lock_owner = fuse_lock_owner_id(fc, current->files); 1656 1567 } 1657 1568 fuse_setattr_fill(fc, &args, inode, &inarg, &outarg); 1658 - err = fuse_simple_request(fc, &args); 1569 + err = fuse_simple_request(fm, &args); 1659 1570 if (err) { 1660 1571 if (err == -EINTR) 1661 1572 fuse_invalidate_attr(inode); ··· 1703 1614 } 1704 1615 1705 1616 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); 1617 + out: 1618 + if (fault_blocked) 1619 + up_write(&fi->i_mmap_sem); 1620 + 1706 1621 return 0; 1707 1622 1708 1623 error: ··· 1714 1621 fuse_release_nowrite(inode); 1715 1622 1716 1623 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); 1624 + 1625 + if (fault_blocked) 1626 + up_write(&fi->i_mmap_sem); 1717 1627 return err; 1718 1628 } 1719 1629
+149 -106
fs/fuse/file.c
··· 32 32 return pages; 33 33 } 34 34 35 - static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file, 35 + static int fuse_send_open(struct fuse_mount *fm, u64 nodeid, struct file *file, 36 36 int opcode, struct fuse_open_out *outargp) 37 37 { 38 38 struct fuse_open_in inarg; ··· 40 40 41 41 memset(&inarg, 0, sizeof(inarg)); 42 42 inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY); 43 - if (!fc->atomic_o_trunc) 43 + if (!fm->fc->atomic_o_trunc) 44 44 inarg.flags &= ~O_TRUNC; 45 45 args.opcode = opcode; 46 46 args.nodeid = nodeid; ··· 51 51 args.out_args[0].size = sizeof(*outargp); 52 52 args.out_args[0].value = outargp; 53 53 54 - return fuse_simple_request(fc, &args); 54 + return fuse_simple_request(fm, &args); 55 55 } 56 56 57 57 struct fuse_release_args { ··· 60 60 struct inode *inode; 61 61 }; 62 62 63 - struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) 63 + struct fuse_file *fuse_file_alloc(struct fuse_mount *fm) 64 64 { 65 65 struct fuse_file *ff; 66 66 ··· 68 68 if (unlikely(!ff)) 69 69 return NULL; 70 70 71 - ff->fc = fc; 71 + ff->fm = fm; 72 72 ff->release_args = kzalloc(sizeof(*ff->release_args), 73 73 GFP_KERNEL_ACCOUNT); 74 74 if (!ff->release_args) { ··· 82 82 RB_CLEAR_NODE(&ff->polled_node); 83 83 init_waitqueue_head(&ff->poll_wait); 84 84 85 - ff->kh = atomic64_inc_return(&fc->khctr); 85 + ff->kh = atomic64_inc_return(&fm->fc->khctr); 86 86 87 87 return ff; 88 88 } ··· 100 100 return ff; 101 101 } 102 102 103 - static void fuse_release_end(struct fuse_conn *fc, struct fuse_args *args, 103 + static void fuse_release_end(struct fuse_mount *fm, struct fuse_args *args, 104 104 int error) 105 105 { 106 106 struct fuse_release_args *ra = container_of(args, typeof(*ra), args); ··· 114 114 if (refcount_dec_and_test(&ff->count)) { 115 115 struct fuse_args *args = &ff->release_args->args; 116 116 117 - if (isdir ? ff->fc->no_opendir : ff->fc->no_open) { 117 + if (isdir ? ff->fm->fc->no_opendir : ff->fm->fc->no_open) { 118 118 /* Do nothing when client does not implement 'open' */ 119 - fuse_release_end(ff->fc, args, 0); 119 + fuse_release_end(ff->fm, args, 0); 120 120 } else if (sync) { 121 - fuse_simple_request(ff->fc, args); 122 - fuse_release_end(ff->fc, args, 0); 121 + fuse_simple_request(ff->fm, args); 122 + fuse_release_end(ff->fm, args, 0); 123 123 } else { 124 124 args->end = fuse_release_end; 125 - if (fuse_simple_background(ff->fc, args, 125 + if (fuse_simple_background(ff->fm, args, 126 126 GFP_KERNEL | __GFP_NOFAIL)) 127 - fuse_release_end(ff->fc, args, -ENOTCONN); 127 + fuse_release_end(ff->fm, args, -ENOTCONN); 128 128 } 129 129 kfree(ff); 130 130 } 131 131 } 132 132 133 - int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, 133 + int fuse_do_open(struct fuse_mount *fm, u64 nodeid, struct file *file, 134 134 bool isdir) 135 135 { 136 + struct fuse_conn *fc = fm->fc; 136 137 struct fuse_file *ff; 137 138 int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN; 138 139 139 - ff = fuse_file_alloc(fc); 140 + ff = fuse_file_alloc(fm); 140 141 if (!ff) 141 142 return -ENOMEM; 142 143 ··· 148 147 struct fuse_open_out outarg; 149 148 int err; 150 149 151 - err = fuse_send_open(fc, nodeid, file, opcode, &outarg); 150 + err = fuse_send_open(fm, nodeid, file, opcode, &outarg); 152 151 if (!err) { 153 152 ff->fh = outarg.fh; 154 153 ff->open_flags = outarg.open_flags; ··· 217 216 218 217 int fuse_open_common(struct inode *inode, struct file *file, bool isdir) 219 218 { 220 - struct fuse_conn *fc = get_fuse_conn(inode); 219 + struct fuse_mount *fm = get_fuse_mount(inode); 220 + struct fuse_conn *fc = fm->fc; 221 221 int err; 222 222 bool is_wb_truncate = (file->f_flags & O_TRUNC) && 223 223 fc->atomic_o_trunc && 224 224 fc->writeback_cache; 225 + bool dax_truncate = (file->f_flags & O_TRUNC) && 226 + fc->atomic_o_trunc && FUSE_IS_DAX(inode); 225 227 226 228 err = generic_file_open(inode, file); 227 229 if (err) 228 230 return err; 229 231 230 - if (is_wb_truncate) { 232 + if (is_wb_truncate || dax_truncate) { 231 233 inode_lock(inode); 232 234 fuse_set_nowrite(inode); 233 235 } 234 236 235 - err = fuse_do_open(fc, get_node_id(inode), file, isdir); 237 + if (dax_truncate) { 238 + down_write(&get_fuse_inode(inode)->i_mmap_sem); 239 + err = fuse_dax_break_layouts(inode, 0, 0); 240 + if (err) 241 + goto out; 242 + } 236 243 244 + err = fuse_do_open(fm, get_node_id(inode), file, isdir); 237 245 if (!err) 238 246 fuse_finish_open(inode, file); 239 247 240 - if (is_wb_truncate) { 248 + out: 249 + if (dax_truncate) 250 + up_write(&get_fuse_inode(inode)->i_mmap_sem); 251 + 252 + if (is_wb_truncate | dax_truncate) { 241 253 fuse_release_nowrite(inode); 242 254 inode_unlock(inode); 243 255 } ··· 261 247 static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff, 262 248 int flags, int opcode) 263 249 { 264 - struct fuse_conn *fc = ff->fc; 250 + struct fuse_conn *fc = ff->fm->fc; 265 251 struct fuse_release_args *ra = ff->release_args; 266 252 267 253 /* Inode is NULL on error path of fuse_create_open() */ ··· 299 285 300 286 if (ff->flock) { 301 287 ra->inarg.release_flags |= FUSE_RELEASE_FLOCK_UNLOCK; 302 - ra->inarg.lock_owner = fuse_lock_owner_id(ff->fc, 288 + ra->inarg.lock_owner = fuse_lock_owner_id(ff->fm->fc, 303 289 (fl_owner_t) file); 304 290 } 305 291 /* Hold inode until release is finished */ ··· 314 300 * synchronous RELEASE is allowed (and desirable) in this case 315 301 * because the server can be trusted not to screw up. 316 302 */ 317 - fuse_file_put(ff, ff->fc->destroy, isdir); 303 + fuse_file_put(ff, ff->fm->fc->destroy, isdir); 318 304 } 319 305 320 306 static int fuse_open(struct inode *inode, struct file *file) ··· 457 443 static int fuse_flush(struct file *file, fl_owner_t id) 458 444 { 459 445 struct inode *inode = file_inode(file); 460 - struct fuse_conn *fc = get_fuse_conn(inode); 446 + struct fuse_mount *fm = get_fuse_mount(inode); 461 447 struct fuse_file *ff = file->private_data; 462 448 struct fuse_flush_in inarg; 463 449 FUSE_ARGS(args); ··· 479 465 return err; 480 466 481 467 err = 0; 482 - if (fc->no_flush) 468 + if (fm->fc->no_flush) 483 469 goto inval_attr_out; 484 470 485 471 memset(&inarg, 0, sizeof(inarg)); 486 472 inarg.fh = ff->fh; 487 - inarg.lock_owner = fuse_lock_owner_id(fc, id); 473 + inarg.lock_owner = fuse_lock_owner_id(fm->fc, id); 488 474 args.opcode = FUSE_FLUSH; 489 475 args.nodeid = get_node_id(inode); 490 476 args.in_numargs = 1; ··· 492 478 args.in_args[0].value = &inarg; 493 479 args.force = true; 494 480 495 - err = fuse_simple_request(fc, &args); 481 + err = fuse_simple_request(fm, &args); 496 482 if (err == -ENOSYS) { 497 - fc->no_flush = 1; 483 + fm->fc->no_flush = 1; 498 484 err = 0; 499 485 } 500 486 ··· 503 489 * In memory i_blocks is not maintained by fuse, if writeback cache is 504 490 * enabled, i_blocks from cached attr may not be accurate. 505 491 */ 506 - if (!err && fc->writeback_cache) 492 + if (!err && fm->fc->writeback_cache) 507 493 fuse_invalidate_attr(inode); 508 494 return err; 509 495 } ··· 512 498 int datasync, int opcode) 513 499 { 514 500 struct inode *inode = file->f_mapping->host; 515 - struct fuse_conn *fc = get_fuse_conn(inode); 501 + struct fuse_mount *fm = get_fuse_mount(inode); 516 502 struct fuse_file *ff = file->private_data; 517 503 FUSE_ARGS(args); 518 504 struct fuse_fsync_in inarg; ··· 525 511 args.in_numargs = 1; 526 512 args.in_args[0].size = sizeof(inarg); 527 513 args.in_args[0].value = &inarg; 528 - return fuse_simple_request(fc, &args); 514 + return fuse_simple_request(fm, &args); 529 515 } 530 516 531 517 static int fuse_fsync(struct file *file, loff_t start, loff_t end, ··· 700 686 kfree(ia); 701 687 } 702 688 703 - static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_args *args, 689 + static void fuse_aio_complete_req(struct fuse_mount *fm, struct fuse_args *args, 704 690 int err) 705 691 { 706 692 struct fuse_io_args *ia = container_of(args, typeof(*ia), ap.args); ··· 729 715 fuse_io_free(ia); 730 716 } 731 717 732 - static ssize_t fuse_async_req_send(struct fuse_conn *fc, 718 + static ssize_t fuse_async_req_send(struct fuse_mount *fm, 733 719 struct fuse_io_args *ia, size_t num_bytes) 734 720 { 735 721 ssize_t err; ··· 743 729 744 730 ia->ap.args.end = fuse_aio_complete_req; 745 731 ia->ap.args.may_block = io->should_dirty; 746 - err = fuse_simple_background(fc, &ia->ap.args, GFP_KERNEL); 732 + err = fuse_simple_background(fm, &ia->ap.args, GFP_KERNEL); 747 733 if (err) 748 - fuse_aio_complete_req(fc, &ia->ap.args, err); 734 + fuse_aio_complete_req(fm, &ia->ap.args, err); 749 735 750 736 return num_bytes; 751 737 } ··· 755 741 { 756 742 struct file *file = ia->io->iocb->ki_filp; 757 743 struct fuse_file *ff = file->private_data; 758 - struct fuse_conn *fc = ff->fc; 744 + struct fuse_mount *fm = ff->fm; 759 745 760 746 fuse_read_args_fill(ia, file, pos, count, FUSE_READ); 761 747 if (owner != NULL) { 762 748 ia->read.in.read_flags |= FUSE_READ_LOCKOWNER; 763 - ia->read.in.lock_owner = fuse_lock_owner_id(fc, owner); 749 + ia->read.in.lock_owner = fuse_lock_owner_id(fm->fc, owner); 764 750 } 765 751 766 752 if (ia->io->async) 767 - return fuse_async_req_send(fc, ia, count); 753 + return fuse_async_req_send(fm, ia, count); 768 754 769 - return fuse_simple_request(fc, &ia->ap.args); 755 + return fuse_simple_request(fm, &ia->ap.args); 770 756 } 771 757 772 758 static void fuse_read_update_size(struct inode *inode, loff_t size, ··· 812 798 static int fuse_do_readpage(struct file *file, struct page *page) 813 799 { 814 800 struct inode *inode = page->mapping->host; 815 - struct fuse_conn *fc = get_fuse_conn(inode); 801 + struct fuse_mount *fm = get_fuse_mount(inode); 816 802 loff_t pos = page_offset(page); 817 803 struct fuse_page_desc desc = { .length = PAGE_SIZE }; 818 804 struct fuse_io_args ia = { ··· 832 818 */ 833 819 fuse_wait_on_page_writeback(inode, page->index); 834 820 835 - attr_ver = fuse_get_attr_version(fc); 821 + attr_ver = fuse_get_attr_version(fm->fc); 836 822 837 823 /* Don't overflow end offset */ 838 824 if (pos + (desc.length - 1) == LLONG_MAX) 839 825 desc.length--; 840 826 841 827 fuse_read_args_fill(&ia, file, pos, desc.length, FUSE_READ); 842 - res = fuse_simple_request(fc, &ia.ap.args); 828 + res = fuse_simple_request(fm, &ia.ap.args); 843 829 if (res < 0) 844 830 return res; 845 831 /* ··· 869 855 return err; 870 856 } 871 857 872 - static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_args *args, 858 + static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args, 873 859 int err) 874 860 { 875 861 int i; ··· 913 899 static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file) 914 900 { 915 901 struct fuse_file *ff = file->private_data; 916 - struct fuse_conn *fc = ff->fc; 902 + struct fuse_mount *fm = ff->fm; 917 903 struct fuse_args_pages *ap = &ia->ap; 918 904 loff_t pos = page_offset(ap->pages[0]); 919 905 size_t count = ap->num_pages << PAGE_SHIFT; ··· 932 918 WARN_ON((loff_t) (pos + count) < 0); 933 919 934 920 fuse_read_args_fill(ia, file, pos, count, FUSE_READ); 935 - ia->read.attr_ver = fuse_get_attr_version(fc); 936 - if (fc->async_read) { 921 + ia->read.attr_ver = fuse_get_attr_version(fm->fc); 922 + if (fm->fc->async_read) { 937 923 ia->ff = fuse_file_get(ff); 938 924 ap->args.end = fuse_readpages_end; 939 - err = fuse_simple_background(fc, &ap->args, GFP_KERNEL); 925 + err = fuse_simple_background(fm, &ap->args, GFP_KERNEL); 940 926 if (!err) 941 927 return; 942 928 } else { 943 - res = fuse_simple_request(fc, &ap->args); 929 + res = fuse_simple_request(fm, &ap->args); 944 930 err = res < 0 ? res : 0; 945 931 } 946 - fuse_readpages_end(fc, &ap->args, err); 932 + fuse_readpages_end(fm, &ap->args, err); 947 933 } 948 934 949 935 static void fuse_readahead(struct readahead_control *rac) ··· 1014 1000 args->opcode = FUSE_WRITE; 1015 1001 args->nodeid = ff->nodeid; 1016 1002 args->in_numargs = 2; 1017 - if (ff->fc->minor < 9) 1003 + if (ff->fm->fc->minor < 9) 1018 1004 args->in_args[0].size = FUSE_COMPAT_WRITE_IN_SIZE; 1019 1005 else 1020 1006 args->in_args[0].size = sizeof(ia->write.in); ··· 1043 1029 struct kiocb *iocb = ia->io->iocb; 1044 1030 struct file *file = iocb->ki_filp; 1045 1031 struct fuse_file *ff = file->private_data; 1046 - struct fuse_conn *fc = ff->fc; 1032 + struct fuse_mount *fm = ff->fm; 1047 1033 struct fuse_write_in *inarg = &ia->write.in; 1048 1034 ssize_t err; 1049 1035 ··· 1051 1037 inarg->flags = fuse_write_flags(iocb); 1052 1038 if (owner != NULL) { 1053 1039 inarg->write_flags |= FUSE_WRITE_LOCKOWNER; 1054 - inarg->lock_owner = fuse_lock_owner_id(fc, owner); 1040 + inarg->lock_owner = fuse_lock_owner_id(fm->fc, owner); 1055 1041 } 1056 1042 1057 1043 if (ia->io->async) 1058 - return fuse_async_req_send(fc, ia, count); 1044 + return fuse_async_req_send(fm, ia, count); 1059 1045 1060 - err = fuse_simple_request(fc, &ia->ap.args); 1046 + err = fuse_simple_request(fm, &ia->ap.args); 1061 1047 if (!err && ia->write.out.size > count) 1062 1048 err = -EIO; 1063 1049 ··· 1088 1074 struct fuse_args_pages *ap = &ia->ap; 1089 1075 struct file *file = iocb->ki_filp; 1090 1076 struct fuse_file *ff = file->private_data; 1091 - struct fuse_conn *fc = ff->fc; 1077 + struct fuse_mount *fm = ff->fm; 1092 1078 unsigned int offset, i; 1093 1079 int err; 1094 1080 ··· 1098 1084 fuse_write_args_fill(ia, ff, pos, count); 1099 1085 ia->write.in.flags = fuse_write_flags(iocb); 1100 1086 1101 - err = fuse_simple_request(fc, &ap->args); 1087 + err = fuse_simple_request(fm, &ap->args); 1102 1088 if (!err && ia->write.out.size > count) 1103 1089 err = -EIO; 1104 1090 ··· 1413 1399 struct file *file = io->iocb->ki_filp; 1414 1400 struct inode *inode = file->f_mapping->host; 1415 1401 struct fuse_file *ff = file->private_data; 1416 - struct fuse_conn *fc = ff->fc; 1402 + struct fuse_conn *fc = ff->fm->fc; 1417 1403 size_t nmax = write ? fc->max_write : fc->max_read; 1418 1404 loff_t pos = *ppos; 1419 1405 size_t count = iov_iter_count(iter); ··· 1553 1539 { 1554 1540 struct file *file = iocb->ki_filp; 1555 1541 struct fuse_file *ff = file->private_data; 1542 + struct inode *inode = file_inode(file); 1556 1543 1557 - if (is_bad_inode(file_inode(file))) 1544 + if (is_bad_inode(inode)) 1558 1545 return -EIO; 1546 + 1547 + if (FUSE_IS_DAX(inode)) 1548 + return fuse_dax_read_iter(iocb, to); 1559 1549 1560 1550 if (!(ff->open_flags & FOPEN_DIRECT_IO)) 1561 1551 return fuse_cache_read_iter(iocb, to); ··· 1571 1553 { 1572 1554 struct file *file = iocb->ki_filp; 1573 1555 struct fuse_file *ff = file->private_data; 1556 + struct inode *inode = file_inode(file); 1574 1557 1575 - if (is_bad_inode(file_inode(file))) 1558 + if (is_bad_inode(inode)) 1576 1559 return -EIO; 1560 + 1561 + if (FUSE_IS_DAX(inode)) 1562 + return fuse_dax_write_iter(iocb, from); 1577 1563 1578 1564 if (!(ff->open_flags & FOPEN_DIRECT_IO)) 1579 1565 return fuse_cache_write_iter(iocb, from); ··· 1600 1578 kfree(wpa); 1601 1579 } 1602 1580 1603 - static void fuse_writepage_finish(struct fuse_conn *fc, 1581 + static void fuse_writepage_finish(struct fuse_mount *fm, 1604 1582 struct fuse_writepage_args *wpa) 1605 1583 { 1606 1584 struct fuse_args_pages *ap = &wpa->ia.ap; ··· 1618 1596 } 1619 1597 1620 1598 /* Called under fi->lock, may release and reacquire it */ 1621 - static void fuse_send_writepage(struct fuse_conn *fc, 1599 + static void fuse_send_writepage(struct fuse_mount *fm, 1622 1600 struct fuse_writepage_args *wpa, loff_t size) 1623 1601 __releases(fi->lock) 1624 1602 __acquires(fi->lock) ··· 1644 1622 args->force = true; 1645 1623 args->nocreds = true; 1646 1624 1647 - err = fuse_simple_background(fc, args, GFP_ATOMIC); 1625 + err = fuse_simple_background(fm, args, GFP_ATOMIC); 1648 1626 if (err == -ENOMEM) { 1649 1627 spin_unlock(&fi->lock); 1650 - err = fuse_simple_background(fc, args, GFP_NOFS | __GFP_NOFAIL); 1628 + err = fuse_simple_background(fm, args, GFP_NOFS | __GFP_NOFAIL); 1651 1629 spin_lock(&fi->lock); 1652 1630 } 1653 1631 ··· 1660 1638 out_free: 1661 1639 fi->writectr--; 1662 1640 rb_erase(&wpa->writepages_entry, &fi->writepages); 1663 - fuse_writepage_finish(fc, wpa); 1641 + fuse_writepage_finish(fm, wpa); 1664 1642 spin_unlock(&fi->lock); 1665 1643 1666 1644 /* After fuse_writepage_finish() aux request list is private */ ··· 1684 1662 __releases(fi->lock) 1685 1663 __acquires(fi->lock) 1686 1664 { 1687 - struct fuse_conn *fc = get_fuse_conn(inode); 1665 + struct fuse_mount *fm = get_fuse_mount(inode); 1688 1666 struct fuse_inode *fi = get_fuse_inode(inode); 1689 1667 loff_t crop = i_size_read(inode); 1690 1668 struct fuse_writepage_args *wpa; ··· 1693 1671 wpa = list_entry(fi->queued_writes.next, 1694 1672 struct fuse_writepage_args, queue_entry); 1695 1673 list_del_init(&wpa->queue_entry); 1696 - fuse_send_writepage(fc, wpa, crop); 1674 + fuse_send_writepage(fm, wpa, crop); 1697 1675 } 1698 1676 } 1699 1677 ··· 1734 1712 WARN_ON(fuse_insert_writeback(root, wpa)); 1735 1713 } 1736 1714 1737 - static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args, 1715 + static void fuse_writepage_end(struct fuse_mount *fm, struct fuse_args *args, 1738 1716 int error) 1739 1717 { 1740 1718 struct fuse_writepage_args *wpa = ··· 1746 1724 spin_lock(&fi->lock); 1747 1725 rb_erase(&wpa->writepages_entry, &fi->writepages); 1748 1726 while (wpa->next) { 1749 - struct fuse_conn *fc = get_fuse_conn(inode); 1727 + struct fuse_mount *fm = get_fuse_mount(inode); 1750 1728 struct fuse_write_in *inarg = &wpa->ia.write.in; 1751 1729 struct fuse_writepage_args *next = wpa->next; 1752 1730 ··· 1778 1756 * no invocations of fuse_writepage_end() while we're in 1779 1757 * fuse_set_nowrite..fuse_release_nowrite section. 1780 1758 */ 1781 - fuse_send_writepage(fc, next, inarg->offset + inarg->size); 1759 + fuse_send_writepage(fm, next, inarg->offset + inarg->size); 1782 1760 } 1783 1761 fi->writectr--; 1784 - fuse_writepage_finish(fc, wpa); 1762 + fuse_writepage_finish(fm, wpa); 1785 1763 spin_unlock(&fi->lock); 1786 1764 fuse_writepage_free(wpa); 1787 1765 } ··· 2339 2317 { 2340 2318 struct fuse_file *ff = file->private_data; 2341 2319 2320 + /* DAX mmap is superior to direct_io mmap */ 2321 + if (FUSE_IS_DAX(file_inode(file))) 2322 + return fuse_dax_mmap(file, vma); 2323 + 2342 2324 if (ff->open_flags & FOPEN_DIRECT_IO) { 2343 2325 /* Can't provide the coherency needed for MAP_SHARED */ 2344 2326 if (vma->vm_flags & VM_MAYSHARE) ··· 2421 2395 static int fuse_getlk(struct file *file, struct file_lock *fl) 2422 2396 { 2423 2397 struct inode *inode = file_inode(file); 2424 - struct fuse_conn *fc = get_fuse_conn(inode); 2398 + struct fuse_mount *fm = get_fuse_mount(inode); 2425 2399 FUSE_ARGS(args); 2426 2400 struct fuse_lk_in inarg; 2427 2401 struct fuse_lk_out outarg; ··· 2431 2405 args.out_numargs = 1; 2432 2406 args.out_args[0].size = sizeof(outarg); 2433 2407 args.out_args[0].value = &outarg; 2434 - err = fuse_simple_request(fc, &args); 2408 + err = fuse_simple_request(fm, &args); 2435 2409 if (!err) 2436 - err = convert_fuse_file_lock(fc, &outarg.lk, fl); 2410 + err = convert_fuse_file_lock(fm->fc, &outarg.lk, fl); 2437 2411 2438 2412 return err; 2439 2413 } ··· 2441 2415 static int fuse_setlk(struct file *file, struct file_lock *fl, int flock) 2442 2416 { 2443 2417 struct inode *inode = file_inode(file); 2444 - struct fuse_conn *fc = get_fuse_conn(inode); 2418 + struct fuse_mount *fm = get_fuse_mount(inode); 2445 2419 FUSE_ARGS(args); 2446 2420 struct fuse_lk_in inarg; 2447 2421 int opcode = (fl->fl_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK; 2448 2422 struct pid *pid = fl->fl_type != F_UNLCK ? task_tgid(current) : NULL; 2449 - pid_t pid_nr = pid_nr_ns(pid, fc->pid_ns); 2423 + pid_t pid_nr = pid_nr_ns(pid, fm->fc->pid_ns); 2450 2424 int err; 2451 2425 2452 2426 if (fl->fl_lmops && fl->fl_lmops->lm_grant) { ··· 2459 2433 return 0; 2460 2434 2461 2435 fuse_lk_fill(&args, file, fl, opcode, pid_nr, flock, &inarg); 2462 - err = fuse_simple_request(fc, &args); 2436 + err = fuse_simple_request(fm, &args); 2463 2437 2464 2438 /* locking is restartable */ 2465 2439 if (err == -EINTR) ··· 2513 2487 static sector_t fuse_bmap(struct address_space *mapping, sector_t block) 2514 2488 { 2515 2489 struct inode *inode = mapping->host; 2516 - struct fuse_conn *fc = get_fuse_conn(inode); 2490 + struct fuse_mount *fm = get_fuse_mount(inode); 2517 2491 FUSE_ARGS(args); 2518 2492 struct fuse_bmap_in inarg; 2519 2493 struct fuse_bmap_out outarg; 2520 2494 int err; 2521 2495 2522 - if (!inode->i_sb->s_bdev || fc->no_bmap) 2496 + if (!inode->i_sb->s_bdev || fm->fc->no_bmap) 2523 2497 return 0; 2524 2498 2525 2499 memset(&inarg, 0, sizeof(inarg)); ··· 2533 2507 args.out_numargs = 1; 2534 2508 args.out_args[0].size = sizeof(outarg); 2535 2509 args.out_args[0].value = &outarg; 2536 - err = fuse_simple_request(fc, &args); 2510 + err = fuse_simple_request(fm, &args); 2537 2511 if (err == -ENOSYS) 2538 - fc->no_bmap = 1; 2512 + fm->fc->no_bmap = 1; 2539 2513 2540 2514 return err ? 0 : outarg.block; 2541 2515 } ··· 2543 2517 static loff_t fuse_lseek(struct file *file, loff_t offset, int whence) 2544 2518 { 2545 2519 struct inode *inode = file->f_mapping->host; 2546 - struct fuse_conn *fc = get_fuse_conn(inode); 2520 + struct fuse_mount *fm = get_fuse_mount(inode); 2547 2521 struct fuse_file *ff = file->private_data; 2548 2522 FUSE_ARGS(args); 2549 2523 struct fuse_lseek_in inarg = { ··· 2554 2528 struct fuse_lseek_out outarg; 2555 2529 int err; 2556 2530 2557 - if (fc->no_lseek) 2531 + if (fm->fc->no_lseek) 2558 2532 goto fallback; 2559 2533 2560 2534 args.opcode = FUSE_LSEEK; ··· 2565 2539 args.out_numargs = 1; 2566 2540 args.out_args[0].size = sizeof(outarg); 2567 2541 args.out_args[0].value = &outarg; 2568 - err = fuse_simple_request(fc, &args); 2542 + err = fuse_simple_request(fm, &args); 2569 2543 if (err) { 2570 2544 if (err == -ENOSYS) { 2571 - fc->no_lseek = 1; 2545 + fm->fc->no_lseek = 1; 2572 2546 goto fallback; 2573 2547 } 2574 2548 return err; ··· 2754 2728 unsigned int flags) 2755 2729 { 2756 2730 struct fuse_file *ff = file->private_data; 2757 - struct fuse_conn *fc = ff->fc; 2731 + struct fuse_mount *fm = ff->fm; 2758 2732 struct fuse_ioctl_in inarg = { 2759 2733 .fh = ff->fh, 2760 2734 .cmd = cmd, ··· 2787 2761 BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE); 2788 2762 2789 2763 err = -ENOMEM; 2790 - ap.pages = fuse_pages_alloc(fc->max_pages, GFP_KERNEL, &ap.descs); 2764 + ap.pages = fuse_pages_alloc(fm->fc->max_pages, GFP_KERNEL, &ap.descs); 2791 2765 iov_page = (struct iovec *) __get_free_page(GFP_KERNEL); 2792 2766 if (!ap.pages || !iov_page) 2793 2767 goto out; 2794 2768 2795 - fuse_page_descs_length_init(ap.descs, 0, fc->max_pages); 2769 + fuse_page_descs_length_init(ap.descs, 0, fm->fc->max_pages); 2796 2770 2797 2771 /* 2798 2772 * If restricted, initialize IO parameters as encoded in @cmd. ··· 2837 2811 2838 2812 /* make sure there are enough buffer pages and init request with them */ 2839 2813 err = -ENOMEM; 2840 - if (max_pages > fc->max_pages) 2814 + if (max_pages > fm->fc->max_pages) 2841 2815 goto out; 2842 2816 while (ap.num_pages < max_pages) { 2843 2817 ap.pages[ap.num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); ··· 2874 2848 ap.args.out_pages = true; 2875 2849 ap.args.out_argvar = true; 2876 2850 2877 - transferred = fuse_simple_request(fc, &ap.args); 2851 + transferred = fuse_simple_request(fm, &ap.args); 2878 2852 err = transferred; 2879 2853 if (transferred < 0) 2880 2854 goto out; ··· 2902 2876 goto out; 2903 2877 2904 2878 vaddr = kmap_atomic(ap.pages[0]); 2905 - err = fuse_copy_ioctl_iovec(fc, iov_page, vaddr, 2879 + err = fuse_copy_ioctl_iovec(fm->fc, iov_page, vaddr, 2906 2880 transferred, in_iovs + out_iovs, 2907 2881 (flags & FUSE_IOCTL_COMPAT) != 0); 2908 2882 kunmap_atomic(vaddr); ··· 2912 2886 in_iov = iov_page; 2913 2887 out_iov = in_iov + in_iovs; 2914 2888 2915 - err = fuse_verify_ioctl_iov(fc, in_iov, in_iovs); 2889 + err = fuse_verify_ioctl_iov(fm->fc, in_iov, in_iovs); 2916 2890 if (err) 2917 2891 goto out; 2918 2892 2919 - err = fuse_verify_ioctl_iov(fc, out_iov, out_iovs); 2893 + err = fuse_verify_ioctl_iov(fm->fc, out_iov, out_iovs); 2920 2894 if (err) 2921 2895 goto out; 2922 2896 ··· 3026 3000 __poll_t fuse_file_poll(struct file *file, poll_table *wait) 3027 3001 { 3028 3002 struct fuse_file *ff = file->private_data; 3029 - struct fuse_conn *fc = ff->fc; 3003 + struct fuse_mount *fm = ff->fm; 3030 3004 struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh }; 3031 3005 struct fuse_poll_out outarg; 3032 3006 FUSE_ARGS(args); 3033 3007 int err; 3034 3008 3035 - if (fc->no_poll) 3009 + if (fm->fc->no_poll) 3036 3010 return DEFAULT_POLLMASK; 3037 3011 3038 3012 poll_wait(file, &ff->poll_wait, wait); ··· 3044 3018 */ 3045 3019 if (waitqueue_active(&ff->poll_wait)) { 3046 3020 inarg.flags |= FUSE_POLL_SCHEDULE_NOTIFY; 3047 - fuse_register_polled_file(fc, ff); 3021 + fuse_register_polled_file(fm->fc, ff); 3048 3022 } 3049 3023 3050 3024 args.opcode = FUSE_POLL; ··· 3055 3029 args.out_numargs = 1; 3056 3030 args.out_args[0].size = sizeof(outarg); 3057 3031 args.out_args[0].value = &outarg; 3058 - err = fuse_simple_request(fc, &args); 3032 + err = fuse_simple_request(fm, &args); 3059 3033 3060 3034 if (!err) 3061 3035 return demangle_poll(outarg.revents); 3062 3036 if (err == -ENOSYS) { 3063 - fc->no_poll = 1; 3037 + fm->fc->no_poll = 1; 3064 3038 return DEFAULT_POLLMASK; 3065 3039 } 3066 3040 return EPOLLERR; ··· 3146 3120 * By default, we want to optimize all I/Os with async request 3147 3121 * submission to the client filesystem if supported. 3148 3122 */ 3149 - io->async = ff->fc->async_dio; 3123 + io->async = ff->fm->fc->async_dio; 3150 3124 io->iocb = iocb; 3151 3125 io->blocking = is_sync_kiocb(iocb); 3152 3126 3153 3127 /* optimization for short read */ 3154 3128 if (io->async && !io->write && offset + count > i_size) { 3155 - iov_iter_truncate(iter, fuse_round_up(ff->fc, i_size - offset)); 3129 + iov_iter_truncate(iter, fuse_round_up(ff->fm->fc, i_size - offset)); 3156 3130 shortened = count - iov_iter_count(iter); 3157 3131 count -= shortened; 3158 3132 } ··· 3222 3196 struct fuse_file *ff = file->private_data; 3223 3197 struct inode *inode = file_inode(file); 3224 3198 struct fuse_inode *fi = get_fuse_inode(inode); 3225 - struct fuse_conn *fc = ff->fc; 3199 + struct fuse_mount *fm = ff->fm; 3226 3200 FUSE_ARGS(args); 3227 3201 struct fuse_fallocate_in inarg = { 3228 3202 .fh = ff->fh, ··· 3234 3208 bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) || 3235 3209 (mode & FALLOC_FL_PUNCH_HOLE); 3236 3210 3211 + bool block_faults = FUSE_IS_DAX(inode) && lock_inode; 3212 + 3237 3213 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) 3238 3214 return -EOPNOTSUPP; 3239 3215 3240 - if (fc->no_fallocate) 3216 + if (fm->fc->no_fallocate) 3241 3217 return -EOPNOTSUPP; 3242 3218 3243 3219 if (lock_inode) { 3244 3220 inode_lock(inode); 3221 + if (block_faults) { 3222 + down_write(&fi->i_mmap_sem); 3223 + err = fuse_dax_break_layouts(inode, 0, 0); 3224 + if (err) 3225 + goto out; 3226 + } 3227 + 3245 3228 if (mode & FALLOC_FL_PUNCH_HOLE) { 3246 3229 loff_t endbyte = offset + length - 1; 3247 3230 ··· 3275 3240 args.in_numargs = 1; 3276 3241 args.in_args[0].size = sizeof(inarg); 3277 3242 args.in_args[0].value = &inarg; 3278 - err = fuse_simple_request(fc, &args); 3243 + err = fuse_simple_request(fm, &args); 3279 3244 if (err == -ENOSYS) { 3280 - fc->no_fallocate = 1; 3245 + fm->fc->no_fallocate = 1; 3281 3246 err = -EOPNOTSUPP; 3282 3247 } 3283 3248 if (err) ··· 3287 3252 if (!(mode & FALLOC_FL_KEEP_SIZE)) { 3288 3253 bool changed = fuse_write_update_size(inode, offset + length); 3289 3254 3290 - if (changed && fc->writeback_cache) 3255 + if (changed && fm->fc->writeback_cache) 3291 3256 file_update_time(file); 3292 3257 } 3293 3258 ··· 3299 3264 out: 3300 3265 if (!(mode & FALLOC_FL_KEEP_SIZE)) 3301 3266 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); 3267 + 3268 + if (block_faults) 3269 + up_write(&fi->i_mmap_sem); 3302 3270 3303 3271 if (lock_inode) 3304 3272 inode_unlock(inode); ··· 3318 3280 struct inode *inode_in = file_inode(file_in); 3319 3281 struct inode *inode_out = file_inode(file_out); 3320 3282 struct fuse_inode *fi_out = get_fuse_inode(inode_out); 3321 - struct fuse_conn *fc = ff_in->fc; 3283 + struct fuse_mount *fm = ff_in->fm; 3284 + struct fuse_conn *fc = fm->fc; 3322 3285 FUSE_ARGS(args); 3323 3286 struct fuse_copy_file_range_in inarg = { 3324 3287 .fh_in = ff_in->fh, ··· 3388 3349 args.out_numargs = 1; 3389 3350 args.out_args[0].size = sizeof(outarg); 3390 3351 args.out_args[0].value = &outarg; 3391 - err = fuse_simple_request(fc, &args); 3352 + err = fuse_simple_request(fm, &args); 3392 3353 if (err == -ENOSYS) { 3393 3354 fc->no_copy_file_range = 1; 3394 3355 err = -EOPNOTSUPP; ··· 3443 3404 .release = fuse_release, 3444 3405 .fsync = fuse_fsync, 3445 3406 .lock = fuse_file_lock, 3407 + .get_unmapped_area = thp_get_unmapped_area, 3446 3408 .flock = fuse_file_flock, 3447 3409 .splice_read = generic_file_splice_read, 3448 3410 .splice_write = iter_file_splice_write, ··· 3479 3439 fi->writectr = 0; 3480 3440 init_waitqueue_head(&fi->page_waitq); 3481 3441 fi->writepages = RB_ROOT; 3442 + 3443 + if (IS_ENABLED(CONFIG_FUSE_DAX)) 3444 + fuse_dax_inode_init(inode); 3482 3445 }
+152 -35
fs/fuse/fuse_i.h
··· 148 148 149 149 /** Lock to protect write related fields */ 150 150 spinlock_t lock; 151 + 152 + /** 153 + * Can't take inode lock in fault path (leads to circular dependency). 154 + * Introduce another semaphore which can be taken in fault path and 155 + * then other filesystem paths can take this to block faults. 156 + */ 157 + struct rw_semaphore i_mmap_sem; 158 + 159 + #ifdef CONFIG_FUSE_DAX 160 + /* 161 + * Dax specific inode data 162 + */ 163 + struct fuse_inode_dax *dax; 164 + #endif 151 165 }; 152 166 153 167 /** FUSE inode state bits */ ··· 175 161 }; 176 162 177 163 struct fuse_conn; 164 + struct fuse_mount; 178 165 struct fuse_release_args; 179 166 180 167 /** FUSE specific file data */ 181 168 struct fuse_file { 182 169 /** Fuse connection for this file */ 183 - struct fuse_conn *fc; 170 + struct fuse_mount *fm; 184 171 185 172 /* Argument space reserved for release */ 186 173 struct fuse_release_args *release_args; ··· 267 252 bool may_block:1; 268 253 struct fuse_in_arg in_args[3]; 269 254 struct fuse_arg out_args[2]; 270 - void (*end)(struct fuse_conn *fc, struct fuse_args *args, int error); 255 + void (*end)(struct fuse_mount *fm, struct fuse_args *args, int error); 271 256 }; 272 257 273 258 struct fuse_args_pages { ··· 375 360 /** virtio-fs's physically contiguous buffer for in and out args */ 376 361 void *argbuf; 377 362 #endif 363 + 364 + /** fuse_mount this request belongs to */ 365 + struct fuse_mount *fm; 378 366 }; 379 367 380 368 struct fuse_iqueue; ··· 500 482 bool destroy:1; 501 483 bool no_control:1; 502 484 bool no_force_umount:1; 503 - bool no_mount_options:1; 485 + bool legacy_opts_show:1; 486 + bool dax:1; 504 487 unsigned int max_read; 505 488 unsigned int blksize; 506 489 const char *subtype; 490 + 491 + /* DAX device, may be NULL */ 492 + struct dax_device *dax_dev; 507 493 508 494 /* fuse_dev pointer to fill in, should contain NULL on entry */ 509 495 void **fudptr; ··· 516 494 /** 517 495 * A Fuse connection. 518 496 * 519 - * This structure is created, when the filesystem is mounted, and is 520 - * destroyed, when the client device is closed and the filesystem is 521 - * unmounted. 497 + * This structure is created, when the root filesystem is mounted, and 498 + * is destroyed, when the client device is closed and the last 499 + * fuse_mount is destroyed. 522 500 */ 523 501 struct fuse_conn { 524 502 /** Lock protecting accessess to members of this structure */ ··· 632 610 /** cache READLINK responses in page cache */ 633 611 unsigned cache_symlinks:1; 634 612 613 + /* show legacy mount options */ 614 + unsigned int legacy_opts_show:1; 615 + 635 616 /* 636 617 * The following bitfields are only for optimization purposes 637 618 * and hence races in setting them will not cause malfunction ··· 742 717 /** Do not allow MNT_FORCE umount */ 743 718 unsigned int no_force_umount:1; 744 719 745 - /* Do not show mount options */ 746 - unsigned int no_mount_options:1; 720 + /* Auto-mount submounts announced by the server */ 721 + unsigned int auto_submounts:1; 747 722 748 723 /** The number of requests waiting for completion */ 749 724 atomic_t num_waiting; ··· 751 726 /** Negotiated minor version */ 752 727 unsigned minor; 753 728 754 - /** Entry on the fuse_conn_list */ 729 + /** Entry on the fuse_mount_list */ 755 730 struct list_head entry; 756 731 757 - /** Device ID from super block */ 732 + /** Device ID from the root super block */ 758 733 dev_t dev; 759 734 760 735 /** Dentries in the control filesystem */ ··· 772 747 /** Called on final put */ 773 748 void (*release)(struct fuse_conn *); 774 749 775 - /** Super block for this connection. */ 776 - struct super_block *sb; 777 - 778 - /** Read/write semaphore to hold when accessing sb. */ 750 + /** 751 + * Read/write semaphore to hold when accessing the sb of any 752 + * fuse_mount belonging to this connection 753 + */ 779 754 struct rw_semaphore killsb; 780 755 781 756 /** List of device instances belonging to this connection */ 782 757 struct list_head devices; 758 + 759 + #ifdef CONFIG_FUSE_DAX 760 + /* Dax specific conn data, non-NULL if DAX is enabled */ 761 + struct fuse_conn_dax *dax; 762 + #endif 763 + 764 + /** List of filesystems using this connection */ 765 + struct list_head mounts; 783 766 }; 784 767 785 - static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb) 768 + /* 769 + * Represents a mounted filesystem, potentially a submount. 770 + * 771 + * This object allows sharing a fuse_conn between separate mounts to 772 + * allow submounts with dedicated superblocks and thus separate device 773 + * IDs. 774 + */ 775 + struct fuse_mount { 776 + /* Underlying (potentially shared) connection to the FUSE server */ 777 + struct fuse_conn *fc; 778 + 779 + /* Refcount */ 780 + refcount_t count; 781 + 782 + /* 783 + * Super block for this connection (fc->killsb must be held when 784 + * accessing this). 785 + */ 786 + struct super_block *sb; 787 + 788 + /* Entry on fc->mounts */ 789 + struct list_head fc_entry; 790 + }; 791 + 792 + static inline struct fuse_mount *get_fuse_mount_super(struct super_block *sb) 786 793 { 787 794 return sb->s_fs_info; 788 795 } 789 796 797 + static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb) 798 + { 799 + struct fuse_mount *fm = get_fuse_mount_super(sb); 800 + 801 + return fm ? fm->fc : NULL; 802 + } 803 + 804 + static inline struct fuse_mount *get_fuse_mount(struct inode *inode) 805 + { 806 + return get_fuse_mount_super(inode->i_sb); 807 + } 808 + 790 809 static inline struct fuse_conn *get_fuse_conn(struct inode *inode) 791 810 { 792 - return get_fuse_conn_super(inode->i_sb); 811 + struct fuse_mount *fm = get_fuse_mount(inode); 812 + 813 + return fm ? fm->fc : NULL; 793 814 } 794 815 795 816 static inline struct fuse_inode *get_fuse_inode(struct inode *inode) ··· 863 792 864 793 extern const struct dentry_operations fuse_dentry_operations; 865 794 extern const struct dentry_operations fuse_root_dentry_operations; 866 - 867 - /** 868 - * Inode to nodeid comparison. 869 - */ 870 - int fuse_inode_eq(struct inode *inode, void *_nodeidp); 871 795 872 796 /** 873 797 * Get a filled in inode ··· 914 848 */ 915 849 int fuse_open_common(struct inode *inode, struct file *file, bool isdir); 916 850 917 - struct fuse_file *fuse_file_alloc(struct fuse_conn *fc); 851 + struct fuse_file *fuse_file_alloc(struct fuse_mount *fm); 918 852 void fuse_file_free(struct fuse_file *ff); 919 853 void fuse_finish_open(struct inode *inode, struct file *file); 920 854 ··· 982 916 /** 983 917 * Simple request sending that does request allocation and freeing 984 918 */ 985 - ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args); 986 - int fuse_simple_background(struct fuse_conn *fc, struct fuse_args *args, 919 + ssize_t fuse_simple_request(struct fuse_mount *fm, struct fuse_args *args); 920 + int fuse_simple_background(struct fuse_mount *fm, struct fuse_args *args, 987 921 gfp_t gfp_flags); 988 922 989 923 /** 990 924 * End a finished request 991 925 */ 992 - void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req); 926 + void fuse_request_end(struct fuse_req *req); 993 927 994 928 /* Abort all requests */ 995 929 void fuse_abort_conn(struct fuse_conn *fc); ··· 1015 949 /** 1016 950 * Initialize fuse_conn 1017 951 */ 1018 - void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns, 952 + void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, 953 + struct user_namespace *user_ns, 1019 954 const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv); 1020 955 1021 956 /** ··· 1024 957 */ 1025 958 void fuse_conn_put(struct fuse_conn *fc); 1026 959 960 + /** 961 + * Acquire reference to fuse_mount 962 + */ 963 + struct fuse_mount *fuse_mount_get(struct fuse_mount *fm); 964 + 965 + /** 966 + * Release reference to fuse_mount 967 + */ 968 + void fuse_mount_put(struct fuse_mount *fm); 969 + 1027 970 struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc); 1028 971 struct fuse_dev *fuse_dev_alloc(void); 1029 972 void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc); 1030 973 void fuse_dev_free(struct fuse_dev *fud); 1031 - void fuse_send_init(struct fuse_conn *fc); 974 + void fuse_send_init(struct fuse_mount *fm); 1032 975 1033 976 /** 1034 977 * Fill in superblock and initialize fuse connection ··· 1047 970 */ 1048 971 int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx); 1049 972 1050 - /** 1051 - * Disassociate fuse connection from superblock and kill the superblock 1052 - * 1053 - * Calls kill_anon_super(), do not use with bdev mounts. 973 + /* 974 + * Fill in superblock for submounts 975 + * @sb: partially-initialized superblock to fill in 976 + * @parent_fi: The fuse_inode of the parent filesystem where this submount is 977 + * mounted 1054 978 */ 1055 - void fuse_kill_sb_anon(struct super_block *sb); 979 + int fuse_fill_super_submount(struct super_block *sb, 980 + struct fuse_inode *parent_fi); 981 + 982 + /* 983 + * Remove the mount from the connection 984 + * 985 + * Returns whether this was the last mount 986 + */ 987 + bool fuse_mount_remove(struct fuse_mount *fm); 988 + 989 + /* 990 + * Shut down the connection (possibly sending DESTROY request). 991 + */ 992 + void fuse_conn_destroy(struct fuse_mount *fm); 1056 993 1057 994 /** 1058 995 * Add connection to control filesystem ··· 1102 1011 void fuse_release_nowrite(struct inode *inode); 1103 1012 1104 1013 /** 1014 + * Scan all fuse_mounts belonging to fc to find the first where 1015 + * ilookup5() returns a result. Return that result and the 1016 + * respective fuse_mount in *fm (unless fm is NULL). 1017 + * 1018 + * The caller must hold fc->killsb. 1019 + */ 1020 + struct inode *fuse_ilookup(struct fuse_conn *fc, u64 nodeid, 1021 + struct fuse_mount **fm); 1022 + 1023 + /** 1105 1024 * File-system tells the kernel to invalidate cache for the given node id. 1106 1025 */ 1107 - int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid, 1026 + int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid, 1108 1027 loff_t offset, loff_t len); 1109 1028 1110 1029 /** ··· 1127 1026 * - is a file or oan empty directory 1128 1027 * then the dentry is unhashed (d_delete()). 1129 1028 */ 1130 - int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, 1029 + int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid, 1131 1030 u64 child_nodeid, struct qstr *name); 1132 1031 1133 - int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, 1032 + int fuse_do_open(struct fuse_mount *fm, u64 nodeid, struct file *file, 1134 1033 bool isdir); 1135 1034 1136 1035 /** ··· 1193 1092 */ 1194 1093 u64 fuse_get_unique(struct fuse_iqueue *fiq); 1195 1094 void fuse_free_conn(struct fuse_conn *fc); 1095 + 1096 + /* dax.c */ 1097 + 1098 + #define FUSE_IS_DAX(inode) (IS_ENABLED(CONFIG_FUSE_DAX) && IS_DAX(inode)) 1099 + 1100 + ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to); 1101 + ssize_t fuse_dax_write_iter(struct kiocb *iocb, struct iov_iter *from); 1102 + int fuse_dax_mmap(struct file *file, struct vm_area_struct *vma); 1103 + int fuse_dax_break_layouts(struct inode *inode, u64 dmap_start, u64 dmap_end); 1104 + int fuse_dax_conn_alloc(struct fuse_conn *fc, struct dax_device *dax_dev); 1105 + void fuse_dax_conn_free(struct fuse_conn *fc); 1106 + bool fuse_dax_inode_alloc(struct super_block *sb, struct fuse_inode *fi); 1107 + void fuse_dax_inode_init(struct inode *inode); 1108 + void fuse_dax_inode_cleanup(struct inode *inode); 1109 + bool fuse_dax_check_alignment(struct fuse_conn *fc, unsigned int map_alignment); 1110 + void fuse_dax_cancel_work(struct fuse_conn *fc); 1196 1111 1197 1112 #endif /* _FS_FUSE_I_H */
+303 -88
fs/fuse/inode.c
··· 85 85 fi->orig_ino = 0; 86 86 fi->state = 0; 87 87 mutex_init(&fi->mutex); 88 + init_rwsem(&fi->i_mmap_sem); 88 89 spin_lock_init(&fi->lock); 89 90 fi->forget = fuse_alloc_forget(); 90 - if (!fi->forget) { 91 - kmem_cache_free(fuse_inode_cachep, fi); 92 - return NULL; 93 - } 91 + if (!fi->forget) 92 + goto out_free; 93 + 94 + if (IS_ENABLED(CONFIG_FUSE_DAX) && !fuse_dax_inode_alloc(sb, fi)) 95 + goto out_free_forget; 94 96 95 97 return &fi->inode; 98 + 99 + out_free_forget: 100 + kfree(fi->forget); 101 + out_free: 102 + kmem_cache_free(fuse_inode_cachep, fi); 103 + return NULL; 96 104 } 97 105 98 106 static void fuse_free_inode(struct inode *inode) ··· 109 101 110 102 mutex_destroy(&fi->mutex); 111 103 kfree(fi->forget); 104 + #ifdef CONFIG_FUSE_DAX 105 + kfree(fi->dax); 106 + #endif 112 107 kmem_cache_free(fuse_inode_cachep, fi); 113 108 } 114 109 ··· 123 112 clear_inode(inode); 124 113 if (inode->i_sb->s_flags & SB_ACTIVE) { 125 114 struct fuse_conn *fc = get_fuse_conn(inode); 126 - fuse_queue_forget(fc, fi->forget, fi->nodeid, fi->nlookup); 127 - fi->forget = NULL; 115 + 116 + if (FUSE_IS_DAX(inode)) 117 + fuse_dax_inode_cleanup(inode); 118 + if (fi->nlookup) { 119 + fuse_queue_forget(fc, fi->forget, fi->nodeid, 120 + fi->nlookup); 121 + fi->forget = NULL; 122 + } 128 123 } 129 124 if (S_ISREG(inode->i_mode) && !is_bad_inode(inode)) { 130 125 WARN_ON(!list_empty(&fi->write_files)); ··· 285 268 BUG(); 286 269 } 287 270 288 - int fuse_inode_eq(struct inode *inode, void *_nodeidp) 271 + static int fuse_inode_eq(struct inode *inode, void *_nodeidp) 289 272 { 290 273 u64 nodeid = *(u64 *) _nodeidp; 291 274 if (get_node_id(inode) == nodeid) ··· 309 292 struct fuse_inode *fi; 310 293 struct fuse_conn *fc = get_fuse_conn_super(sb); 311 294 312 - retry: 295 + /* 296 + * Auto mount points get their node id from the submount root, which is 297 + * not a unique identifier within this filesystem. 298 + * 299 + * To avoid conflicts, do not place submount points into the inode hash 300 + * table. 301 + */ 302 + if (fc->auto_submounts && (attr->flags & FUSE_ATTR_SUBMOUNT) && 303 + S_ISDIR(attr->mode)) { 304 + inode = new_inode(sb); 305 + if (!inode) 306 + return NULL; 307 + 308 + fuse_init_inode(inode, attr); 309 + get_fuse_inode(inode)->nodeid = nodeid; 310 + inode->i_flags |= S_AUTOMOUNT; 311 + goto done; 312 + } 313 + 314 + retry: 313 315 inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid); 314 316 if (!inode) 315 317 return NULL; ··· 346 310 iput(inode); 347 311 goto retry; 348 312 } 349 - 313 + done: 350 314 fi = get_fuse_inode(inode); 351 315 spin_lock(&fi->lock); 352 316 fi->nlookup++; ··· 356 320 return inode; 357 321 } 358 322 359 - int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid, 323 + struct inode *fuse_ilookup(struct fuse_conn *fc, u64 nodeid, 324 + struct fuse_mount **fm) 325 + { 326 + struct fuse_mount *fm_iter; 327 + struct inode *inode; 328 + 329 + WARN_ON(!rwsem_is_locked(&fc->killsb)); 330 + list_for_each_entry(fm_iter, &fc->mounts, fc_entry) { 331 + if (!fm_iter->sb) 332 + continue; 333 + 334 + inode = ilookup5(fm_iter->sb, nodeid, fuse_inode_eq, &nodeid); 335 + if (inode) { 336 + if (fm) 337 + *fm = fm_iter; 338 + return inode; 339 + } 340 + } 341 + 342 + return NULL; 343 + } 344 + 345 + int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid, 360 346 loff_t offset, loff_t len) 361 347 { 362 - struct fuse_conn *fc = get_fuse_conn_super(sb); 363 348 struct fuse_inode *fi; 364 349 struct inode *inode; 365 350 pgoff_t pg_start; 366 351 pgoff_t pg_end; 367 352 368 - inode = ilookup5(sb, nodeid, fuse_inode_eq, &nodeid); 353 + inode = fuse_ilookup(fc, nodeid, NULL); 369 354 if (!inode) 370 355 return -ENOENT; 371 356 ··· 436 379 fuse_abort_conn(fc); 437 380 } 438 381 439 - static void fuse_send_destroy(struct fuse_conn *fc) 382 + static void fuse_send_destroy(struct fuse_mount *fm) 440 383 { 441 - if (fc->conn_init) { 384 + if (fm->fc->conn_init) { 442 385 FUSE_ARGS(args); 443 386 444 387 args.opcode = FUSE_DESTROY; 445 388 args.force = true; 446 389 args.nocreds = true; 447 - fuse_simple_request(fc, &args); 390 + fuse_simple_request(fm, &args); 448 391 } 449 392 } 450 393 451 394 static void fuse_put_super(struct super_block *sb) 452 395 { 453 - struct fuse_conn *fc = get_fuse_conn_super(sb); 396 + struct fuse_mount *fm = get_fuse_mount_super(sb); 454 397 455 - mutex_lock(&fuse_mutex); 456 - list_del(&fc->entry); 457 - fuse_ctl_remove_conn(fc); 458 - mutex_unlock(&fuse_mutex); 459 - 460 - fuse_conn_put(fc); 398 + fuse_mount_put(fm); 461 399 } 462 400 463 401 static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr) ··· 472 420 static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf) 473 421 { 474 422 struct super_block *sb = dentry->d_sb; 475 - struct fuse_conn *fc = get_fuse_conn_super(sb); 423 + struct fuse_mount *fm = get_fuse_mount_super(sb); 476 424 FUSE_ARGS(args); 477 425 struct fuse_statfs_out outarg; 478 426 int err; 479 427 480 - if (!fuse_allow_current_process(fc)) { 428 + if (!fuse_allow_current_process(fm->fc)) { 481 429 buf->f_type = FUSE_SUPER_MAGIC; 482 430 return 0; 483 431 } ··· 489 437 args.out_numargs = 1; 490 438 args.out_args[0].size = sizeof(outarg); 491 439 args.out_args[0].value = &outarg; 492 - err = fuse_simple_request(fc, &args); 440 + err = fuse_simple_request(fm, &args); 493 441 if (!err) 494 442 convert_fuse_statfs(buf, &outarg.st); 495 443 return err; ··· 625 573 struct super_block *sb = root->d_sb; 626 574 struct fuse_conn *fc = get_fuse_conn_super(sb); 627 575 628 - if (fc->no_mount_options) 629 - return 0; 576 + if (fc->legacy_opts_show) { 577 + seq_printf(m, ",user_id=%u", 578 + from_kuid_munged(fc->user_ns, fc->user_id)); 579 + seq_printf(m, ",group_id=%u", 580 + from_kgid_munged(fc->user_ns, fc->group_id)); 581 + if (fc->default_permissions) 582 + seq_puts(m, ",default_permissions"); 583 + if (fc->allow_other) 584 + seq_puts(m, ",allow_other"); 585 + if (fc->max_read != ~0) 586 + seq_printf(m, ",max_read=%u", fc->max_read); 587 + if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE) 588 + seq_printf(m, ",blksize=%lu", sb->s_blocksize); 589 + } 590 + #ifdef CONFIG_FUSE_DAX 591 + if (fc->dax) 592 + seq_puts(m, ",dax"); 593 + #endif 630 594 631 - seq_printf(m, ",user_id=%u", from_kuid_munged(fc->user_ns, fc->user_id)); 632 - seq_printf(m, ",group_id=%u", from_kgid_munged(fc->user_ns, fc->group_id)); 633 - if (fc->default_permissions) 634 - seq_puts(m, ",default_permissions"); 635 - if (fc->allow_other) 636 - seq_puts(m, ",allow_other"); 637 - if (fc->max_read != ~0) 638 - seq_printf(m, ",max_read=%u", fc->max_read); 639 - if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE) 640 - seq_printf(m, ",blksize=%lu", sb->s_blocksize); 641 595 return 0; 642 596 } 643 597 ··· 673 615 fpq->connected = 1; 674 616 } 675 617 676 - void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns, 618 + void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, 619 + struct user_namespace *user_ns, 677 620 const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv) 678 621 { 679 622 memset(fc, 0, sizeof(*fc)); ··· 701 642 fc->pid_ns = get_pid_ns(task_active_pid_ns(current)); 702 643 fc->user_ns = get_user_ns(user_ns); 703 644 fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ; 645 + 646 + INIT_LIST_HEAD(&fc->mounts); 647 + list_add(&fm->fc_entry, &fc->mounts); 648 + fm->fc = fc; 649 + refcount_set(&fm->count, 1); 704 650 } 705 651 EXPORT_SYMBOL_GPL(fuse_conn_init); 706 652 ··· 714 650 if (refcount_dec_and_test(&fc->count)) { 715 651 struct fuse_iqueue *fiq = &fc->iq; 716 652 653 + if (IS_ENABLED(CONFIG_FUSE_DAX)) 654 + fuse_dax_conn_free(fc); 717 655 if (fiq->ops->release) 718 656 fiq->ops->release(fiq); 719 657 put_pid_ns(fc->pid_ns); ··· 731 665 return fc; 732 666 } 733 667 EXPORT_SYMBOL_GPL(fuse_conn_get); 668 + 669 + void fuse_mount_put(struct fuse_mount *fm) 670 + { 671 + if (refcount_dec_and_test(&fm->count)) { 672 + if (fm->fc) 673 + fuse_conn_put(fm->fc); 674 + kfree(fm); 675 + } 676 + } 677 + EXPORT_SYMBOL_GPL(fuse_mount_put); 678 + 679 + struct fuse_mount *fuse_mount_get(struct fuse_mount *fm) 680 + { 681 + refcount_inc(&fm->count); 682 + return fm; 683 + } 684 + EXPORT_SYMBOL_GPL(fuse_mount_get); 734 685 735 686 static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode) 736 687 { ··· 978 895 struct fuse_init_out out; 979 896 }; 980 897 981 - static void process_init_reply(struct fuse_conn *fc, struct fuse_args *args, 898 + static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, 982 899 int error) 983 900 { 901 + struct fuse_conn *fc = fm->fc; 984 902 struct fuse_init_args *ia = container_of(args, typeof(*ia), args); 985 903 struct fuse_init_out *arg = &ia->out; 904 + bool ok = true; 986 905 987 906 if (error || arg->major != FUSE_KERNEL_VERSION) 988 - fc->conn_error = 1; 907 + ok = false; 989 908 else { 990 909 unsigned long ra_pages; 991 910 ··· 1035 950 if (arg->flags & FUSE_HANDLE_KILLPRIV) 1036 951 fc->handle_killpriv = 1; 1037 952 if (arg->time_gran && arg->time_gran <= 1000000000) 1038 - fc->sb->s_time_gran = arg->time_gran; 953 + fm->sb->s_time_gran = arg->time_gran; 1039 954 if ((arg->flags & FUSE_POSIX_ACL)) { 1040 955 fc->default_permissions = 1; 1041 956 fc->posix_acl = 1; 1042 - fc->sb->s_xattr = fuse_acl_xattr_handlers; 957 + fm->sb->s_xattr = fuse_acl_xattr_handlers; 1043 958 } 1044 959 if (arg->flags & FUSE_CACHE_SYMLINKS) 1045 960 fc->cache_symlinks = 1; ··· 1050 965 min_t(unsigned int, FUSE_MAX_MAX_PAGES, 1051 966 max_t(unsigned int, arg->max_pages, 1)); 1052 967 } 968 + if (IS_ENABLED(CONFIG_FUSE_DAX) && 969 + arg->flags & FUSE_MAP_ALIGNMENT && 970 + !fuse_dax_check_alignment(fc, arg->map_alignment)) { 971 + ok = false; 972 + } 1053 973 } else { 1054 974 ra_pages = fc->max_read / PAGE_SIZE; 1055 975 fc->no_lock = 1; 1056 976 fc->no_flock = 1; 1057 977 } 1058 978 1059 - fc->sb->s_bdi->ra_pages = 1060 - min(fc->sb->s_bdi->ra_pages, ra_pages); 979 + fm->sb->s_bdi->ra_pages = 980 + min(fm->sb->s_bdi->ra_pages, ra_pages); 1061 981 fc->minor = arg->minor; 1062 982 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; 1063 983 fc->max_write = max_t(unsigned, 4096, fc->max_write); ··· 1070 980 } 1071 981 kfree(ia); 1072 982 983 + if (!ok) { 984 + fc->conn_init = 0; 985 + fc->conn_error = 1; 986 + } 987 + 1073 988 fuse_set_initialized(fc); 1074 989 wake_up_all(&fc->blocked_waitq); 1075 990 } 1076 991 1077 - void fuse_send_init(struct fuse_conn *fc) 992 + void fuse_send_init(struct fuse_mount *fm) 1078 993 { 1079 994 struct fuse_init_args *ia; 1080 995 ··· 1087 992 1088 993 ia->in.major = FUSE_KERNEL_VERSION; 1089 994 ia->in.minor = FUSE_KERNEL_MINOR_VERSION; 1090 - ia->in.max_readahead = fc->sb->s_bdi->ra_pages * PAGE_SIZE; 995 + ia->in.max_readahead = fm->sb->s_bdi->ra_pages * PAGE_SIZE; 1091 996 ia->in.flags |= 1092 997 FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | 1093 998 FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | ··· 1098 1003 FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL | 1099 1004 FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS | 1100 1005 FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA; 1006 + #ifdef CONFIG_FUSE_DAX 1007 + if (fm->fc->dax) 1008 + ia->in.flags |= FUSE_MAP_ALIGNMENT; 1009 + #endif 1010 + if (fm->fc->auto_submounts) 1011 + ia->in.flags |= FUSE_SUBMOUNTS; 1012 + 1101 1013 ia->args.opcode = FUSE_INIT; 1102 1014 ia->args.in_numargs = 1; 1103 1015 ia->args.in_args[0].size = sizeof(ia->in); ··· 1120 1018 ia->args.nocreds = true; 1121 1019 ia->args.end = process_init_reply; 1122 1020 1123 - if (fuse_simple_background(fc, &ia->args, GFP_KERNEL) != 0) 1124 - process_init_reply(fc, &ia->args, -ENOTCONN); 1021 + if (fuse_simple_background(fm, &ia->args, GFP_KERNEL) != 0) 1022 + process_init_reply(fm, &ia->args, -ENOTCONN); 1125 1023 } 1126 1024 EXPORT_SYMBOL_GPL(fuse_send_init); 1127 1025 ··· 1232 1130 } 1233 1131 EXPORT_SYMBOL_GPL(fuse_dev_free); 1234 1132 1133 + static void fuse_fill_attr_from_inode(struct fuse_attr *attr, 1134 + const struct fuse_inode *fi) 1135 + { 1136 + *attr = (struct fuse_attr){ 1137 + .ino = fi->inode.i_ino, 1138 + .size = fi->inode.i_size, 1139 + .blocks = fi->inode.i_blocks, 1140 + .atime = fi->inode.i_atime.tv_sec, 1141 + .mtime = fi->inode.i_mtime.tv_sec, 1142 + .ctime = fi->inode.i_ctime.tv_sec, 1143 + .atimensec = fi->inode.i_atime.tv_nsec, 1144 + .mtimensec = fi->inode.i_mtime.tv_nsec, 1145 + .ctimensec = fi->inode.i_ctime.tv_nsec, 1146 + .mode = fi->inode.i_mode, 1147 + .nlink = fi->inode.i_nlink, 1148 + .uid = fi->inode.i_uid.val, 1149 + .gid = fi->inode.i_gid.val, 1150 + .rdev = fi->inode.i_rdev, 1151 + .blksize = 1u << fi->inode.i_blkbits, 1152 + }; 1153 + } 1154 + 1155 + static void fuse_sb_defaults(struct super_block *sb) 1156 + { 1157 + sb->s_magic = FUSE_SUPER_MAGIC; 1158 + sb->s_op = &fuse_super_operations; 1159 + sb->s_xattr = fuse_xattr_handlers; 1160 + sb->s_maxbytes = MAX_LFS_FILESIZE; 1161 + sb->s_time_gran = 1; 1162 + sb->s_export_op = &fuse_export_operations; 1163 + sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE; 1164 + if (sb->s_user_ns != &init_user_ns) 1165 + sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER; 1166 + sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION); 1167 + 1168 + /* 1169 + * If we are not in the initial user namespace posix 1170 + * acls must be translated. 1171 + */ 1172 + if (sb->s_user_ns != &init_user_ns) 1173 + sb->s_xattr = fuse_no_acl_xattr_handlers; 1174 + } 1175 + 1176 + int fuse_fill_super_submount(struct super_block *sb, 1177 + struct fuse_inode *parent_fi) 1178 + { 1179 + struct fuse_mount *fm = get_fuse_mount_super(sb); 1180 + struct super_block *parent_sb = parent_fi->inode.i_sb; 1181 + struct fuse_attr root_attr; 1182 + struct inode *root; 1183 + 1184 + fuse_sb_defaults(sb); 1185 + fm->sb = sb; 1186 + 1187 + WARN_ON(sb->s_bdi != &noop_backing_dev_info); 1188 + sb->s_bdi = bdi_get(parent_sb->s_bdi); 1189 + 1190 + sb->s_xattr = parent_sb->s_xattr; 1191 + sb->s_time_gran = parent_sb->s_time_gran; 1192 + sb->s_blocksize = parent_sb->s_blocksize; 1193 + sb->s_blocksize_bits = parent_sb->s_blocksize_bits; 1194 + sb->s_subtype = kstrdup(parent_sb->s_subtype, GFP_KERNEL); 1195 + if (parent_sb->s_subtype && !sb->s_subtype) 1196 + return -ENOMEM; 1197 + 1198 + fuse_fill_attr_from_inode(&root_attr, parent_fi); 1199 + root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0); 1200 + /* 1201 + * This inode is just a duplicate, so it is not looked up and 1202 + * its nlookup should not be incremented. fuse_iget() does 1203 + * that, though, so undo it here. 1204 + */ 1205 + get_fuse_inode(root)->nlookup--; 1206 + sb->s_d_op = &fuse_dentry_operations; 1207 + sb->s_root = d_make_root(root); 1208 + if (!sb->s_root) 1209 + return -ENOMEM; 1210 + 1211 + return 0; 1212 + } 1213 + 1235 1214 int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) 1236 1215 { 1237 1216 struct fuse_dev *fud = NULL; 1238 - struct fuse_conn *fc = get_fuse_conn_super(sb); 1217 + struct fuse_mount *fm = get_fuse_mount_super(sb); 1218 + struct fuse_conn *fc = fm->fc; 1239 1219 struct inode *root; 1240 1220 struct dentry *root_dentry; 1241 1221 int err; ··· 1326 1142 if (sb->s_flags & SB_MANDLOCK) 1327 1143 goto err; 1328 1144 1329 - sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION); 1145 + fuse_sb_defaults(sb); 1330 1146 1331 1147 if (ctx->is_bdev) { 1332 1148 #ifdef CONFIG_BLOCK ··· 1341 1157 1342 1158 sb->s_subtype = ctx->subtype; 1343 1159 ctx->subtype = NULL; 1344 - sb->s_magic = FUSE_SUPER_MAGIC; 1345 - sb->s_op = &fuse_super_operations; 1346 - sb->s_xattr = fuse_xattr_handlers; 1347 - sb->s_maxbytes = MAX_LFS_FILESIZE; 1348 - sb->s_time_gran = 1; 1349 - sb->s_export_op = &fuse_export_operations; 1350 - sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE; 1351 - if (sb->s_user_ns != &init_user_ns) 1352 - sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER; 1353 - 1354 - /* 1355 - * If we are not in the initial user namespace posix 1356 - * acls must be translated. 1357 - */ 1358 - if (sb->s_user_ns != &init_user_ns) 1359 - sb->s_xattr = fuse_no_acl_xattr_handlers; 1160 + if (IS_ENABLED(CONFIG_FUSE_DAX)) { 1161 + err = fuse_dax_conn_alloc(fc, ctx->dax_dev); 1162 + if (err) 1163 + goto err; 1164 + } 1360 1165 1361 1166 if (ctx->fudptr) { 1362 1167 err = -ENOMEM; 1363 1168 fud = fuse_dev_alloc_install(fc); 1364 1169 if (!fud) 1365 - goto err; 1170 + goto err_free_dax; 1366 1171 } 1367 1172 1368 1173 fc->dev = sb->s_dev; 1369 - fc->sb = sb; 1174 + fm->sb = sb; 1370 1175 err = fuse_bdi_init(fc, sb); 1371 1176 if (err) 1372 1177 goto err_dev_free; ··· 1369 1196 fc->allow_other = ctx->allow_other; 1370 1197 fc->user_id = ctx->user_id; 1371 1198 fc->group_id = ctx->group_id; 1372 - fc->max_read = max_t(unsigned, 4096, ctx->max_read); 1199 + fc->legacy_opts_show = ctx->legacy_opts_show; 1200 + fc->max_read = max_t(unsigned int, 4096, ctx->max_read); 1373 1201 fc->destroy = ctx->destroy; 1374 1202 fc->no_control = ctx->no_control; 1375 1203 fc->no_force_umount = ctx->no_force_umount; 1376 - fc->no_mount_options = ctx->no_mount_options; 1377 1204 1378 1205 err = -ENOMEM; 1379 1206 root = fuse_get_root_inode(sb, ctx->rootmode); ··· 1406 1233 err_dev_free: 1407 1234 if (fud) 1408 1235 fuse_dev_free(fud); 1236 + err_free_dax: 1237 + if (IS_ENABLED(CONFIG_FUSE_DAX)) 1238 + fuse_dax_conn_free(fc); 1409 1239 err: 1410 1240 return err; 1411 1241 } ··· 1420 1244 struct file *file; 1421 1245 int err; 1422 1246 struct fuse_conn *fc; 1247 + struct fuse_mount *fm; 1423 1248 1424 1249 err = -EINVAL; 1425 1250 file = fget(ctx->fd); ··· 1441 1264 if (!fc) 1442 1265 goto err_fput; 1443 1266 1444 - fuse_conn_init(fc, sb->s_user_ns, &fuse_dev_fiq_ops, NULL); 1267 + fm = kzalloc(sizeof(*fm), GFP_KERNEL); 1268 + if (!fm) { 1269 + kfree(fc); 1270 + goto err_fput; 1271 + } 1272 + 1273 + fuse_conn_init(fc, fm, sb->s_user_ns, &fuse_dev_fiq_ops, NULL); 1445 1274 fc->release = fuse_free_conn; 1446 - sb->s_fs_info = fc; 1275 + 1276 + sb->s_fs_info = fm; 1447 1277 1448 1278 err = fuse_fill_super_common(sb, ctx); 1449 1279 if (err) ··· 1461 1277 * CPUs after this 1462 1278 */ 1463 1279 fput(file); 1464 - fuse_send_init(get_fuse_conn_super(sb)); 1280 + fuse_send_init(get_fuse_mount_super(sb)); 1465 1281 return 0; 1466 1282 1467 1283 err_put_conn: 1468 - fuse_conn_put(fc); 1284 + fuse_mount_put(fm); 1469 1285 sb->s_fs_info = NULL; 1470 1286 err_fput: 1471 1287 fput(file); ··· 1509 1325 1510 1326 ctx->max_read = ~0; 1511 1327 ctx->blksize = FUSE_DEFAULT_BLKSIZE; 1328 + ctx->legacy_opts_show = true; 1512 1329 1513 1330 #ifdef CONFIG_BLOCK 1514 1331 if (fc->fs_type == &fuseblk_fs_type) { ··· 1523 1338 return 0; 1524 1339 } 1525 1340 1526 - static void fuse_sb_destroy(struct super_block *sb) 1341 + bool fuse_mount_remove(struct fuse_mount *fm) 1527 1342 { 1528 - struct fuse_conn *fc = get_fuse_conn_super(sb); 1343 + struct fuse_conn *fc = fm->fc; 1344 + bool last = false; 1529 1345 1530 - if (fc) { 1531 - if (fc->destroy) 1532 - fuse_send_destroy(fc); 1346 + down_write(&fc->killsb); 1347 + list_del_init(&fm->fc_entry); 1348 + if (list_empty(&fc->mounts)) 1349 + last = true; 1350 + up_write(&fc->killsb); 1533 1351 1534 - fuse_abort_conn(fc); 1535 - fuse_wait_aborted(fc); 1352 + return last; 1353 + } 1354 + EXPORT_SYMBOL_GPL(fuse_mount_remove); 1536 1355 1537 - down_write(&fc->killsb); 1538 - fc->sb = NULL; 1539 - up_write(&fc->killsb); 1356 + void fuse_conn_destroy(struct fuse_mount *fm) 1357 + { 1358 + struct fuse_conn *fc = fm->fc; 1359 + 1360 + if (fc->destroy) 1361 + fuse_send_destroy(fm); 1362 + 1363 + fuse_abort_conn(fc); 1364 + fuse_wait_aborted(fc); 1365 + 1366 + if (!list_empty(&fc->entry)) { 1367 + mutex_lock(&fuse_mutex); 1368 + list_del(&fc->entry); 1369 + fuse_ctl_remove_conn(fc); 1370 + mutex_unlock(&fuse_mutex); 1540 1371 } 1541 1372 } 1373 + EXPORT_SYMBOL_GPL(fuse_conn_destroy); 1542 1374 1543 - void fuse_kill_sb_anon(struct super_block *sb) 1375 + static void fuse_kill_sb_anon(struct super_block *sb) 1544 1376 { 1545 - fuse_sb_destroy(sb); 1377 + struct fuse_mount *fm = get_fuse_mount_super(sb); 1378 + bool last; 1379 + 1380 + if (fm) { 1381 + last = fuse_mount_remove(fm); 1382 + if (last) 1383 + fuse_conn_destroy(fm); 1384 + } 1546 1385 kill_anon_super(sb); 1547 1386 } 1548 - EXPORT_SYMBOL_GPL(fuse_kill_sb_anon); 1549 1387 1550 1388 static struct file_system_type fuse_fs_type = { 1551 1389 .owner = THIS_MODULE, ··· 1583 1375 #ifdef CONFIG_BLOCK 1584 1376 static void fuse_kill_sb_blk(struct super_block *sb) 1585 1377 { 1586 - fuse_sb_destroy(sb); 1378 + struct fuse_mount *fm = get_fuse_mount_super(sb); 1379 + bool last; 1380 + 1381 + if (fm) { 1382 + last = fuse_mount_remove(fm); 1383 + if (last) 1384 + fuse_conn_destroy(fm); 1385 + } 1587 1386 kill_block_super(sb); 1588 1387 } 1589 1388
+5 -5
fs/fuse/readdir.c
··· 252 252 static void fuse_force_forget(struct file *file, u64 nodeid) 253 253 { 254 254 struct inode *inode = file_inode(file); 255 - struct fuse_conn *fc = get_fuse_conn(inode); 255 + struct fuse_mount *fm = get_fuse_mount(inode); 256 256 struct fuse_forget_in inarg; 257 257 FUSE_ARGS(args); 258 258 ··· 266 266 args.force = true; 267 267 args.noreply = true; 268 268 269 - fuse_simple_request(fc, &args); 269 + fuse_simple_request(fm, &args); 270 270 /* ignore errors */ 271 271 } 272 272 ··· 320 320 ssize_t res; 321 321 struct page *page; 322 322 struct inode *inode = file_inode(file); 323 - struct fuse_conn *fc = get_fuse_conn(inode); 323 + struct fuse_mount *fm = get_fuse_mount(inode); 324 324 struct fuse_io_args ia = {}; 325 325 struct fuse_args_pages *ap = &ia.ap; 326 326 struct fuse_page_desc desc = { .length = PAGE_SIZE }; ··· 337 337 ap->pages = &page; 338 338 ap->descs = &desc; 339 339 if (plus) { 340 - attr_version = fuse_get_attr_version(fc); 340 + attr_version = fuse_get_attr_version(fm->fc); 341 341 fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE, 342 342 FUSE_READDIRPLUS); 343 343 } else { ··· 345 345 FUSE_READDIR); 346 346 } 347 347 locked = fuse_lock_inode(inode); 348 - res = fuse_simple_request(fc, &ap->args); 348 + res = fuse_simple_request(fm, &ap->args); 349 349 fuse_unlock_inode(inode, locked); 350 350 if (res >= 0) { 351 351 if (!res) {
+312 -66
fs/fuse/virtio_fs.c
··· 5 5 */ 6 6 7 7 #include <linux/fs.h> 8 + #include <linux/dax.h> 9 + #include <linux/pci.h> 10 + #include <linux/pfn_t.h> 8 11 #include <linux/module.h> 9 12 #include <linux/virtio.h> 10 13 #include <linux/virtio_fs.h> 11 14 #include <linux/delay.h> 12 15 #include <linux/fs_context.h> 16 + #include <linux/fs_parser.h> 13 17 #include <linux/highmem.h> 18 + #include <linux/uio.h> 14 19 #include "fuse_i.h" 15 20 16 21 /* List of virtio-fs device instances and a lock for the list. Also provides ··· 29 24 VQ_REQUEST 30 25 }; 31 26 27 + #define VQ_NAME_LEN 24 28 + 32 29 /* Per-virtqueue state */ 33 30 struct virtio_fs_vq { 34 31 spinlock_t lock; ··· 43 36 bool connected; 44 37 long in_flight; 45 38 struct completion in_flight_zero; /* No inflight requests */ 46 - char name[24]; 39 + char name[VQ_NAME_LEN]; 47 40 } ____cacheline_aligned_in_smp; 48 41 49 42 /* A virtio-fs device instance */ ··· 54 47 struct virtio_fs_vq *vqs; 55 48 unsigned int nvqs; /* number of virtqueues */ 56 49 unsigned int num_request_queues; /* number of request queues */ 50 + struct dax_device *dax_dev; 51 + 52 + /* DAX memory window where file contents are mapped */ 53 + void *window_kaddr; 54 + phys_addr_t window_phys_addr; 55 + size_t window_len; 57 56 }; 58 57 59 58 struct virtio_fs_forget_req { ··· 81 68 82 69 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, 83 70 struct fuse_req *req, bool in_flight); 71 + 72 + enum { 73 + OPT_DAX, 74 + }; 75 + 76 + static const struct fs_parameter_spec virtio_fs_parameters[] = { 77 + fsparam_flag("dax", OPT_DAX), 78 + {} 79 + }; 80 + 81 + static int virtio_fs_parse_param(struct fs_context *fc, 82 + struct fs_parameter *param) 83 + { 84 + struct fs_parse_result result; 85 + struct fuse_fs_context *ctx = fc->fs_private; 86 + int opt; 87 + 88 + opt = fs_parse(fc, virtio_fs_parameters, param, &result); 89 + if (opt < 0) 90 + return opt; 91 + 92 + switch (opt) { 93 + case OPT_DAX: 94 + ctx->dax = 1; 95 + break; 96 + default: 97 + return -EINVAL; 98 + } 99 + 100 + return 0; 101 + } 102 + 103 + static void virtio_fs_free_fc(struct fs_context *fc) 104 + { 105 + struct fuse_fs_context *ctx = fc->fs_private; 106 + 107 + kfree(ctx); 108 + } 84 109 85 110 static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq) 86 111 { ··· 340 289 struct fuse_req *req; 341 290 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 342 291 dispatch_work.work); 343 - struct fuse_conn *fc = fsvq->fud->fc; 344 292 int ret; 345 293 346 294 pr_debug("virtio-fs: worker %s called.\n", __func__); ··· 354 304 355 305 list_del_init(&req->list); 356 306 spin_unlock(&fsvq->lock); 357 - fuse_request_end(fc, req); 307 + fuse_request_end(req); 358 308 } 359 309 360 310 /* Dispatch pending requests */ ··· 385 335 spin_unlock(&fsvq->lock); 386 336 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", 387 337 ret); 388 - fuse_request_end(fc, req); 338 + fuse_request_end(req); 389 339 } 390 340 } 391 341 } ··· 545 495 struct virtio_fs_vq *fsvq) 546 496 { 547 497 struct fuse_pqueue *fpq = &fsvq->fud->pq; 548 - struct fuse_conn *fc = fsvq->fud->fc; 549 498 struct fuse_args *args; 550 499 struct fuse_args_pages *ap; 551 500 unsigned int len, i, thislen; ··· 577 528 clear_bit(FR_SENT, &req->flags); 578 529 spin_unlock(&fpq->lock); 579 530 580 - fuse_request_end(fc, req); 531 + fuse_request_end(req); 581 532 spin_lock(&fsvq->lock); 582 533 dec_in_flight_req(fsvq); 583 534 spin_unlock(&fsvq->lock); ··· 645 596 schedule_work(&fsvq->done_work); 646 597 } 647 598 599 + static void virtio_fs_init_vq(struct virtio_fs_vq *fsvq, char *name, 600 + int vq_type) 601 + { 602 + strncpy(fsvq->name, name, VQ_NAME_LEN); 603 + spin_lock_init(&fsvq->lock); 604 + INIT_LIST_HEAD(&fsvq->queued_reqs); 605 + INIT_LIST_HEAD(&fsvq->end_reqs); 606 + init_completion(&fsvq->in_flight_zero); 607 + 608 + if (vq_type == VQ_REQUEST) { 609 + INIT_WORK(&fsvq->done_work, virtio_fs_requests_done_work); 610 + INIT_DELAYED_WORK(&fsvq->dispatch_work, 611 + virtio_fs_request_dispatch_work); 612 + } else { 613 + INIT_WORK(&fsvq->done_work, virtio_fs_hiprio_done_work); 614 + INIT_DELAYED_WORK(&fsvq->dispatch_work, 615 + virtio_fs_hiprio_dispatch_work); 616 + } 617 + } 618 + 648 619 /* Initialize virtqueues */ 649 620 static int virtio_fs_setup_vqs(struct virtio_device *vdev, 650 621 struct virtio_fs *fs) ··· 680 611 if (fs->num_request_queues == 0) 681 612 return -EINVAL; 682 613 683 - fs->nvqs = 1 + fs->num_request_queues; 614 + fs->nvqs = VQ_REQUEST + fs->num_request_queues; 684 615 fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL); 685 616 if (!fs->vqs) 686 617 return -ENOMEM; ··· 694 625 goto out; 695 626 } 696 627 628 + /* Initialize the hiprio/forget request virtqueue */ 697 629 callbacks[VQ_HIPRIO] = virtio_fs_vq_done; 698 - snprintf(fs->vqs[VQ_HIPRIO].name, sizeof(fs->vqs[VQ_HIPRIO].name), 699 - "hiprio"); 630 + virtio_fs_init_vq(&fs->vqs[VQ_HIPRIO], "hiprio", VQ_HIPRIO); 700 631 names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name; 701 - INIT_WORK(&fs->vqs[VQ_HIPRIO].done_work, virtio_fs_hiprio_done_work); 702 - INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].queued_reqs); 703 - INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].end_reqs); 704 - INIT_DELAYED_WORK(&fs->vqs[VQ_HIPRIO].dispatch_work, 705 - virtio_fs_hiprio_dispatch_work); 706 - init_completion(&fs->vqs[VQ_HIPRIO].in_flight_zero); 707 - spin_lock_init(&fs->vqs[VQ_HIPRIO].lock); 708 632 709 633 /* Initialize the requests virtqueues */ 710 634 for (i = VQ_REQUEST; i < fs->nvqs; i++) { 711 - spin_lock_init(&fs->vqs[i].lock); 712 - INIT_WORK(&fs->vqs[i].done_work, virtio_fs_requests_done_work); 713 - INIT_DELAYED_WORK(&fs->vqs[i].dispatch_work, 714 - virtio_fs_request_dispatch_work); 715 - INIT_LIST_HEAD(&fs->vqs[i].queued_reqs); 716 - INIT_LIST_HEAD(&fs->vqs[i].end_reqs); 717 - init_completion(&fs->vqs[i].in_flight_zero); 718 - snprintf(fs->vqs[i].name, sizeof(fs->vqs[i].name), 719 - "requests.%u", i - VQ_REQUEST); 635 + char vq_name[VQ_NAME_LEN]; 636 + 637 + snprintf(vq_name, VQ_NAME_LEN, "requests.%u", i - VQ_REQUEST); 638 + virtio_fs_init_vq(&fs->vqs[i], vq_name, VQ_REQUEST); 720 639 callbacks[i] = virtio_fs_vq_done; 721 640 names[i] = fs->vqs[i].name; 722 641 } ··· 733 676 vdev->config->del_vqs(vdev); 734 677 } 735 678 679 + /* Map a window offset to a page frame number. The window offset will have 680 + * been produced by .iomap_begin(), which maps a file offset to a window 681 + * offset. 682 + */ 683 + static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, 684 + long nr_pages, void **kaddr, pfn_t *pfn) 685 + { 686 + struct virtio_fs *fs = dax_get_private(dax_dev); 687 + phys_addr_t offset = PFN_PHYS(pgoff); 688 + size_t max_nr_pages = fs->window_len/PAGE_SIZE - pgoff; 689 + 690 + if (kaddr) 691 + *kaddr = fs->window_kaddr + offset; 692 + if (pfn) 693 + *pfn = phys_to_pfn_t(fs->window_phys_addr + offset, 694 + PFN_DEV | PFN_MAP); 695 + return nr_pages > max_nr_pages ? max_nr_pages : nr_pages; 696 + } 697 + 698 + static size_t virtio_fs_copy_from_iter(struct dax_device *dax_dev, 699 + pgoff_t pgoff, void *addr, 700 + size_t bytes, struct iov_iter *i) 701 + { 702 + return copy_from_iter(addr, bytes, i); 703 + } 704 + 705 + static size_t virtio_fs_copy_to_iter(struct dax_device *dax_dev, 706 + pgoff_t pgoff, void *addr, 707 + size_t bytes, struct iov_iter *i) 708 + { 709 + return copy_to_iter(addr, bytes, i); 710 + } 711 + 712 + static int virtio_fs_zero_page_range(struct dax_device *dax_dev, 713 + pgoff_t pgoff, size_t nr_pages) 714 + { 715 + long rc; 716 + void *kaddr; 717 + 718 + rc = dax_direct_access(dax_dev, pgoff, nr_pages, &kaddr, NULL); 719 + if (rc < 0) 720 + return rc; 721 + memset(kaddr, 0, nr_pages << PAGE_SHIFT); 722 + dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT); 723 + return 0; 724 + } 725 + 726 + static const struct dax_operations virtio_fs_dax_ops = { 727 + .direct_access = virtio_fs_direct_access, 728 + .copy_from_iter = virtio_fs_copy_from_iter, 729 + .copy_to_iter = virtio_fs_copy_to_iter, 730 + .zero_page_range = virtio_fs_zero_page_range, 731 + }; 732 + 733 + static void virtio_fs_cleanup_dax(void *data) 734 + { 735 + struct dax_device *dax_dev = data; 736 + 737 + kill_dax(dax_dev); 738 + put_dax(dax_dev); 739 + } 740 + 741 + static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs) 742 + { 743 + struct virtio_shm_region cache_reg; 744 + struct dev_pagemap *pgmap; 745 + bool have_cache; 746 + 747 + if (!IS_ENABLED(CONFIG_FUSE_DAX)) 748 + return 0; 749 + 750 + /* Get cache region */ 751 + have_cache = virtio_get_shm_region(vdev, &cache_reg, 752 + (u8)VIRTIO_FS_SHMCAP_ID_CACHE); 753 + if (!have_cache) { 754 + dev_notice(&vdev->dev, "%s: No cache capability\n", __func__); 755 + return 0; 756 + } 757 + 758 + if (!devm_request_mem_region(&vdev->dev, cache_reg.addr, cache_reg.len, 759 + dev_name(&vdev->dev))) { 760 + dev_warn(&vdev->dev, "could not reserve region addr=0x%llx len=0x%llx\n", 761 + cache_reg.addr, cache_reg.len); 762 + return -EBUSY; 763 + } 764 + 765 + dev_notice(&vdev->dev, "Cache len: 0x%llx @ 0x%llx\n", cache_reg.len, 766 + cache_reg.addr); 767 + 768 + pgmap = devm_kzalloc(&vdev->dev, sizeof(*pgmap), GFP_KERNEL); 769 + if (!pgmap) 770 + return -ENOMEM; 771 + 772 + pgmap->type = MEMORY_DEVICE_FS_DAX; 773 + 774 + /* Ideally we would directly use the PCI BAR resource but 775 + * devm_memremap_pages() wants its own copy in pgmap. So 776 + * initialize a struct resource from scratch (only the start 777 + * and end fields will be used). 778 + */ 779 + pgmap->range = (struct range) { 780 + .start = (phys_addr_t) cache_reg.addr, 781 + .end = (phys_addr_t) cache_reg.addr + cache_reg.len - 1, 782 + }; 783 + pgmap->nr_range = 1; 784 + 785 + fs->window_kaddr = devm_memremap_pages(&vdev->dev, pgmap); 786 + if (IS_ERR(fs->window_kaddr)) 787 + return PTR_ERR(fs->window_kaddr); 788 + 789 + fs->window_phys_addr = (phys_addr_t) cache_reg.addr; 790 + fs->window_len = (phys_addr_t) cache_reg.len; 791 + 792 + dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n", 793 + __func__, fs->window_kaddr, cache_reg.addr, cache_reg.len); 794 + 795 + fs->dax_dev = alloc_dax(fs, NULL, &virtio_fs_dax_ops, 0); 796 + if (IS_ERR(fs->dax_dev)) 797 + return PTR_ERR(fs->dax_dev); 798 + 799 + return devm_add_action_or_reset(&vdev->dev, virtio_fs_cleanup_dax, 800 + fs->dax_dev); 801 + } 802 + 736 803 static int virtio_fs_probe(struct virtio_device *vdev) 737 804 { 738 805 struct virtio_fs *fs; ··· 877 696 goto out; 878 697 879 698 /* TODO vq affinity */ 699 + 700 + ret = virtio_fs_setup_dax(vdev, fs); 701 + if (ret < 0) 702 + goto out_vqs; 880 703 881 704 /* Bring the device online in case the filesystem is mounted and 882 705 * requests need to be sent before we return. ··· 1018 833 spin_unlock(&fiq->lock); 1019 834 } 1020 835 836 + /* Count number of scatter-gather elements required */ 837 + static unsigned int sg_count_fuse_pages(struct fuse_page_desc *page_descs, 838 + unsigned int num_pages, 839 + unsigned int total_len) 840 + { 841 + unsigned int i; 842 + unsigned int this_len; 843 + 844 + for (i = 0; i < num_pages && total_len; i++) { 845 + this_len = min(page_descs[i].length, total_len); 846 + total_len -= this_len; 847 + } 848 + 849 + return i; 850 + } 851 + 1021 852 /* Return the number of scatter-gather list elements required */ 1022 853 static unsigned int sg_count_fuse_req(struct fuse_req *req) 1023 854 { 1024 855 struct fuse_args *args = req->args; 1025 856 struct fuse_args_pages *ap = container_of(args, typeof(*ap), args); 1026 - unsigned int total_sgs = 1 /* fuse_in_header */; 857 + unsigned int size, total_sgs = 1 /* fuse_in_header */; 1027 858 1028 859 if (args->in_numargs - args->in_pages) 1029 860 total_sgs += 1; 1030 861 1031 - if (args->in_pages) 1032 - total_sgs += ap->num_pages; 862 + if (args->in_pages) { 863 + size = args->in_args[args->in_numargs - 1].size; 864 + total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages, 865 + size); 866 + } 1033 867 1034 868 if (!test_bit(FR_ISREPLY, &req->flags)) 1035 869 return total_sgs; ··· 1058 854 if (args->out_numargs - args->out_pages) 1059 855 total_sgs += 1; 1060 856 1061 - if (args->out_pages) 1062 - total_sgs += ap->num_pages; 857 + if (args->out_pages) { 858 + size = args->out_args[args->out_numargs - 1].size; 859 + total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages, 860 + size); 861 + } 1063 862 1064 863 return total_sgs; 1065 864 } ··· 1278 1071 .release = virtio_fs_fiq_release, 1279 1072 }; 1280 1073 1281 - static int virtio_fs_fill_super(struct super_block *sb) 1074 + static inline void virtio_fs_ctx_set_defaults(struct fuse_fs_context *ctx) 1282 1075 { 1283 - struct fuse_conn *fc = get_fuse_conn_super(sb); 1076 + ctx->rootmode = S_IFDIR; 1077 + ctx->default_permissions = 1; 1078 + ctx->allow_other = 1; 1079 + ctx->max_read = UINT_MAX; 1080 + ctx->blksize = 512; 1081 + ctx->destroy = true; 1082 + ctx->no_control = true; 1083 + ctx->no_force_umount = true; 1084 + } 1085 + 1086 + static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc) 1087 + { 1088 + struct fuse_mount *fm = get_fuse_mount_super(sb); 1089 + struct fuse_conn *fc = fm->fc; 1284 1090 struct virtio_fs *fs = fc->iq.priv; 1091 + struct fuse_fs_context *ctx = fsc->fs_private; 1285 1092 unsigned int i; 1286 1093 int err; 1287 - struct fuse_fs_context ctx = { 1288 - .rootmode = S_IFDIR, 1289 - .default_permissions = 1, 1290 - .allow_other = 1, 1291 - .max_read = UINT_MAX, 1292 - .blksize = 512, 1293 - .destroy = true, 1294 - .no_control = true, 1295 - .no_force_umount = true, 1296 - .no_mount_options = true, 1297 - }; 1298 1094 1095 + virtio_fs_ctx_set_defaults(ctx); 1299 1096 mutex_lock(&virtio_fs_mutex); 1300 1097 1301 1098 /* After holding mutex, make sure virtiofs device is still there. ··· 1323 1112 } 1324 1113 1325 1114 /* virtiofs allocates and installs its own fuse devices */ 1326 - ctx.fudptr = NULL; 1327 - err = fuse_fill_super_common(sb, &ctx); 1115 + ctx->fudptr = NULL; 1116 + if (ctx->dax) 1117 + ctx->dax_dev = fs->dax_dev; 1118 + err = fuse_fill_super_common(sb, ctx); 1328 1119 if (err < 0) 1329 1120 goto err_free_fuse_devs; 1330 1121 ··· 1338 1125 1339 1126 /* Previous unmount will stop all queues. Start these again */ 1340 1127 virtio_fs_start_all_queues(fs); 1341 - fuse_send_init(fc); 1128 + fuse_send_init(fm); 1342 1129 mutex_unlock(&virtio_fs_mutex); 1343 1130 return 0; 1344 1131 ··· 1349 1136 return err; 1350 1137 } 1351 1138 1352 - static void virtio_kill_sb(struct super_block *sb) 1139 + static void virtio_fs_conn_destroy(struct fuse_mount *fm) 1353 1140 { 1354 - struct fuse_conn *fc = get_fuse_conn_super(sb); 1355 - struct virtio_fs *vfs; 1356 - struct virtio_fs_vq *fsvq; 1141 + struct fuse_conn *fc = fm->fc; 1142 + struct virtio_fs *vfs = fc->iq.priv; 1143 + struct virtio_fs_vq *fsvq = &vfs->vqs[VQ_HIPRIO]; 1357 1144 1358 - /* If mount failed, we can still be called without any fc */ 1359 - if (!fc) 1360 - return fuse_kill_sb_anon(sb); 1361 - 1362 - vfs = fc->iq.priv; 1363 - fsvq = &vfs->vqs[VQ_HIPRIO]; 1145 + /* Stop dax worker. Soon evict_inodes() will be called which 1146 + * will free all memory ranges belonging to all inodes. 1147 + */ 1148 + if (IS_ENABLED(CONFIG_FUSE_DAX)) 1149 + fuse_dax_cancel_work(fc); 1364 1150 1365 1151 /* Stop forget queue. Soon destroy will be sent */ 1366 1152 spin_lock(&fsvq->lock); ··· 1367 1155 spin_unlock(&fsvq->lock); 1368 1156 virtio_fs_drain_all_queues(vfs); 1369 1157 1370 - fuse_kill_sb_anon(sb); 1158 + fuse_conn_destroy(fm); 1371 1159 1372 - /* fuse_kill_sb_anon() must have sent destroy. Stop all queues 1160 + /* fuse_conn_destroy() must have sent destroy. Stop all queues 1373 1161 * and drain one more time and free fuse devices. Freeing fuse 1374 1162 * devices will drop their reference on fuse_conn and that in 1375 1163 * turn will drop its reference on virtio_fs object. ··· 1379 1167 virtio_fs_free_devs(vfs); 1380 1168 } 1381 1169 1170 + static void virtio_kill_sb(struct super_block *sb) 1171 + { 1172 + struct fuse_mount *fm = get_fuse_mount_super(sb); 1173 + bool last; 1174 + 1175 + /* If mount failed, we can still be called without any fc */ 1176 + if (fm) { 1177 + last = fuse_mount_remove(fm); 1178 + if (last) 1179 + virtio_fs_conn_destroy(fm); 1180 + } 1181 + kill_anon_super(sb); 1182 + } 1183 + 1382 1184 static int virtio_fs_test_super(struct super_block *sb, 1383 1185 struct fs_context *fsc) 1384 1186 { 1385 - struct fuse_conn *fc = fsc->s_fs_info; 1187 + struct fuse_mount *fsc_fm = fsc->s_fs_info; 1188 + struct fuse_mount *sb_fm = get_fuse_mount_super(sb); 1386 1189 1387 - return fc->iq.priv == get_fuse_conn_super(sb)->iq.priv; 1190 + return fsc_fm->fc->iq.priv == sb_fm->fc->iq.priv; 1388 1191 } 1389 1192 1390 1193 static int virtio_fs_set_super(struct super_block *sb, ··· 1409 1182 1410 1183 err = get_anon_bdev(&sb->s_dev); 1411 1184 if (!err) 1412 - fuse_conn_get(fsc->s_fs_info); 1185 + fuse_mount_get(fsc->s_fs_info); 1413 1186 1414 1187 return err; 1415 1188 } ··· 1419 1192 struct virtio_fs *fs; 1420 1193 struct super_block *sb; 1421 1194 struct fuse_conn *fc; 1195 + struct fuse_mount *fm; 1422 1196 int err; 1423 1197 1424 1198 /* This gets a reference on virtio_fs object. This ptr gets installed ··· 1440 1212 return -ENOMEM; 1441 1213 } 1442 1214 1443 - fuse_conn_init(fc, get_user_ns(current_user_ns()), &virtio_fs_fiq_ops, 1444 - fs); 1215 + fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL); 1216 + if (!fm) { 1217 + mutex_lock(&virtio_fs_mutex); 1218 + virtio_fs_put(fs); 1219 + mutex_unlock(&virtio_fs_mutex); 1220 + kfree(fc); 1221 + return -ENOMEM; 1222 + } 1223 + 1224 + fuse_conn_init(fc, fm, get_user_ns(current_user_ns()), 1225 + &virtio_fs_fiq_ops, fs); 1445 1226 fc->release = fuse_free_conn; 1446 1227 fc->delete_stale = true; 1228 + fc->auto_submounts = true; 1447 1229 1448 - fsc->s_fs_info = fc; 1230 + fsc->s_fs_info = fm; 1449 1231 sb = sget_fc(fsc, virtio_fs_test_super, virtio_fs_set_super); 1450 - fuse_conn_put(fc); 1232 + fuse_mount_put(fm); 1451 1233 if (IS_ERR(sb)) 1452 1234 return PTR_ERR(sb); 1453 1235 1454 1236 if (!sb->s_root) { 1455 - err = virtio_fs_fill_super(sb); 1237 + err = virtio_fs_fill_super(sb, fsc); 1456 1238 if (err) { 1457 1239 deactivate_locked_super(sb); 1458 1240 return err; ··· 1477 1239 } 1478 1240 1479 1241 static const struct fs_context_operations virtio_fs_context_ops = { 1242 + .free = virtio_fs_free_fc, 1243 + .parse_param = virtio_fs_parse_param, 1480 1244 .get_tree = virtio_fs_get_tree, 1481 1245 }; 1482 1246 1483 1247 static int virtio_fs_init_fs_context(struct fs_context *fsc) 1484 1248 { 1249 + struct fuse_fs_context *ctx; 1250 + 1251 + ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL); 1252 + if (!ctx) 1253 + return -ENOMEM; 1254 + fsc->fs_private = ctx; 1485 1255 fsc->ops = &virtio_fs_context_ops; 1486 1256 return 0; 1487 1257 }
+17 -17
fs/fuse/xattr.c
··· 14 14 int fuse_setxattr(struct inode *inode, const char *name, const void *value, 15 15 size_t size, int flags) 16 16 { 17 - struct fuse_conn *fc = get_fuse_conn(inode); 17 + struct fuse_mount *fm = get_fuse_mount(inode); 18 18 FUSE_ARGS(args); 19 19 struct fuse_setxattr_in inarg; 20 20 int err; 21 21 22 - if (fc->no_setxattr) 22 + if (fm->fc->no_setxattr) 23 23 return -EOPNOTSUPP; 24 24 25 25 memset(&inarg, 0, sizeof(inarg)); ··· 34 34 args.in_args[1].value = name; 35 35 args.in_args[2].size = size; 36 36 args.in_args[2].value = value; 37 - err = fuse_simple_request(fc, &args); 37 + err = fuse_simple_request(fm, &args); 38 38 if (err == -ENOSYS) { 39 - fc->no_setxattr = 1; 39 + fm->fc->no_setxattr = 1; 40 40 err = -EOPNOTSUPP; 41 41 } 42 42 if (!err) { ··· 49 49 ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value, 50 50 size_t size) 51 51 { 52 - struct fuse_conn *fc = get_fuse_conn(inode); 52 + struct fuse_mount *fm = get_fuse_mount(inode); 53 53 FUSE_ARGS(args); 54 54 struct fuse_getxattr_in inarg; 55 55 struct fuse_getxattr_out outarg; 56 56 ssize_t ret; 57 57 58 - if (fc->no_getxattr) 58 + if (fm->fc->no_getxattr) 59 59 return -EOPNOTSUPP; 60 60 61 61 memset(&inarg, 0, sizeof(inarg)); ··· 77 77 args.out_args[0].size = sizeof(outarg); 78 78 args.out_args[0].value = &outarg; 79 79 } 80 - ret = fuse_simple_request(fc, &args); 80 + ret = fuse_simple_request(fm, &args); 81 81 if (!ret && !size) 82 82 ret = min_t(ssize_t, outarg.size, XATTR_SIZE_MAX); 83 83 if (ret == -ENOSYS) { 84 - fc->no_getxattr = 1; 84 + fm->fc->no_getxattr = 1; 85 85 ret = -EOPNOTSUPP; 86 86 } 87 87 return ret; ··· 107 107 ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size) 108 108 { 109 109 struct inode *inode = d_inode(entry); 110 - struct fuse_conn *fc = get_fuse_conn(inode); 110 + struct fuse_mount *fm = get_fuse_mount(inode); 111 111 FUSE_ARGS(args); 112 112 struct fuse_getxattr_in inarg; 113 113 struct fuse_getxattr_out outarg; 114 114 ssize_t ret; 115 115 116 - if (!fuse_allow_current_process(fc)) 116 + if (!fuse_allow_current_process(fm->fc)) 117 117 return -EACCES; 118 118 119 - if (fc->no_listxattr) 119 + if (fm->fc->no_listxattr) 120 120 return -EOPNOTSUPP; 121 121 122 122 memset(&inarg, 0, sizeof(inarg)); ··· 136 136 args.out_args[0].size = sizeof(outarg); 137 137 args.out_args[0].value = &outarg; 138 138 } 139 - ret = fuse_simple_request(fc, &args); 139 + ret = fuse_simple_request(fm, &args); 140 140 if (!ret && !size) 141 141 ret = min_t(ssize_t, outarg.size, XATTR_LIST_MAX); 142 142 if (ret > 0 && size) 143 143 ret = fuse_verify_xattr_list(list, ret); 144 144 if (ret == -ENOSYS) { 145 - fc->no_listxattr = 1; 145 + fm->fc->no_listxattr = 1; 146 146 ret = -EOPNOTSUPP; 147 147 } 148 148 return ret; ··· 150 150 151 151 int fuse_removexattr(struct inode *inode, const char *name) 152 152 { 153 - struct fuse_conn *fc = get_fuse_conn(inode); 153 + struct fuse_mount *fm = get_fuse_mount(inode); 154 154 FUSE_ARGS(args); 155 155 int err; 156 156 157 - if (fc->no_removexattr) 157 + if (fm->fc->no_removexattr) 158 158 return -EOPNOTSUPP; 159 159 160 160 args.opcode = FUSE_REMOVEXATTR; ··· 162 162 args.in_numargs = 1; 163 163 args.in_args[0].size = strlen(name) + 1; 164 164 args.in_args[0].value = name; 165 - err = fuse_simple_request(fc, &args); 165 + err = fuse_simple_request(fm, &args); 166 166 if (err == -ENOSYS) { 167 - fc->no_removexattr = 1; 167 + fm->fc->no_removexattr = 1; 168 168 err = -EOPNOTSUPP; 169 169 } 170 170 if (!err) {
+6
include/linux/dax.h
··· 149 149 struct dax_device *dax_dev, struct writeback_control *wbc); 150 150 151 151 struct page *dax_layout_busy_page(struct address_space *mapping); 152 + struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t start, loff_t end); 152 153 dax_entry_t dax_lock_page(struct page *page); 153 154 void dax_unlock_page(struct page *page, dax_entry_t cookie); 154 155 #else ··· 176 175 } 177 176 178 177 static inline struct page *dax_layout_busy_page(struct address_space *mapping) 178 + { 179 + return NULL; 180 + } 181 + 182 + static inline struct page *dax_layout_busy_page_range(struct address_space *mapping, pgoff_t start, pgoff_t nr_pages) 179 183 { 180 184 return NULL; 181 185 }
+47 -3
include/uapi/linux/fuse.h
··· 172 172 * - add FUSE_WRITE_KILL_PRIV flag 173 173 * - add FUSE_SETUPMAPPING and FUSE_REMOVEMAPPING 174 174 * - add map_alignment to fuse_init_out, add FUSE_MAP_ALIGNMENT flag 175 + * 176 + * 7.32 177 + * - add flags to fuse_attr, add FUSE_ATTR_SUBMOUNT, add FUSE_SUBMOUNTS 175 178 */ 176 179 177 180 #ifndef _LINUX_FUSE_H ··· 210 207 #define FUSE_KERNEL_VERSION 7 211 208 212 209 /** Minor version number of this interface */ 213 - #define FUSE_KERNEL_MINOR_VERSION 31 210 + #define FUSE_KERNEL_MINOR_VERSION 32 214 211 215 212 /** The node ID of the root inode */ 216 213 #define FUSE_ROOT_ID 1 ··· 234 231 uint32_t gid; 235 232 uint32_t rdev; 236 233 uint32_t blksize; 237 - uint32_t padding; 234 + uint32_t flags; 238 235 }; 239 236 240 237 struct fuse_kstatfs { ··· 316 313 * FUSE_CACHE_SYMLINKS: cache READLINK responses 317 314 * FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir 318 315 * FUSE_EXPLICIT_INVAL_DATA: only invalidate cached pages on explicit request 319 - * FUSE_MAP_ALIGNMENT: map_alignment field is valid 316 + * FUSE_MAP_ALIGNMENT: init_out.map_alignment contains log2(byte alignment) for 317 + * foffset and moffset fields in struct 318 + * fuse_setupmapping_out and fuse_removemapping_one. 319 + * FUSE_SUBMOUNTS: kernel supports auto-mounting directory submounts 320 320 */ 321 321 #define FUSE_ASYNC_READ (1 << 0) 322 322 #define FUSE_POSIX_LOCKS (1 << 1) ··· 348 342 #define FUSE_NO_OPENDIR_SUPPORT (1 << 24) 349 343 #define FUSE_EXPLICIT_INVAL_DATA (1 << 25) 350 344 #define FUSE_MAP_ALIGNMENT (1 << 26) 345 + #define FUSE_SUBMOUNTS (1 << 27) 351 346 352 347 /** 353 348 * CUSE INIT request/reply flags ··· 423 416 * FUSE_FSYNC_FDATASYNC: Sync data only, not metadata 424 417 */ 425 418 #define FUSE_FSYNC_FDATASYNC (1 << 0) 419 + 420 + /** 421 + * fuse_attr flags 422 + * 423 + * FUSE_ATTR_SUBMOUNT: Object is a submount root 424 + */ 425 + #define FUSE_ATTR_SUBMOUNT (1 << 0) 426 426 427 427 enum fuse_opcode { 428 428 FUSE_LOOKUP = 1, ··· 905 891 uint64_t len; 906 892 uint64_t flags; 907 893 }; 894 + 895 + #define FUSE_SETUPMAPPING_FLAG_WRITE (1ull << 0) 896 + #define FUSE_SETUPMAPPING_FLAG_READ (1ull << 1) 897 + struct fuse_setupmapping_in { 898 + /* An already open handle */ 899 + uint64_t fh; 900 + /* Offset into the file to start the mapping */ 901 + uint64_t foffset; 902 + /* Length of mapping required */ 903 + uint64_t len; 904 + /* Flags, FUSE_SETUPMAPPING_FLAG_* */ 905 + uint64_t flags; 906 + /* Offset in Memory Window */ 907 + uint64_t moffset; 908 + }; 909 + 910 + struct fuse_removemapping_in { 911 + /* number of fuse_removemapping_one follows */ 912 + uint32_t count; 913 + }; 914 + 915 + struct fuse_removemapping_one { 916 + /* Offset into the dax window start the unmapping */ 917 + uint64_t moffset; 918 + /* Length of mapping required */ 919 + uint64_t len; 920 + }; 921 + 922 + #define FUSE_REMOVEMAPPING_MAX_ENTRY \ 923 + (PAGE_SIZE / sizeof(struct fuse_removemapping_one)) 908 924 909 925 #endif /* _LINUX_FUSE_H */
+3
include/uapi/linux/virtio_fs.h
··· 16 16 __le32 num_request_queues; 17 17 } __attribute__((packed)); 18 18 19 + /* For the id field in virtio_pci_shm_cap */ 20 + #define VIRTIO_FS_SHMCAP_ID_CACHE 0 21 + 19 22 #endif /* _UAPI_LINUX_VIRTIO_FS_H */