commit 694565356c2e06224d94774a42709cc8dfab49ee · tjh.dev/kernel

+1 -1

Documentation/filesystems/fuse.rst

··· 47 using the sftp protocol. 48 49 The userspace library and utilities are available from the 50 - `FUSE homepage: <http://fuse.sourceforge.net/>`_ 51 52 Filesystem type 53 ===============

··· 47 using the sftp protocol. 48 49 The userspace library and utilities are available from the 50 + `FUSE homepage: <https://github.com/libfuse/>`_ 51 52 Filesystem type 53 ===============

+1 -1

MAINTAINERS

··· 7238 M: Miklos Szeredi <miklos@szeredi.hu> 7239 L: linux-fsdevel@vger.kernel.org 7240 S: Maintained 7241 - W: http://fuse.sourceforge.net/ 7242 T: git git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse.git 7243 F: Documentation/filesystems/fuse.rst 7244 F: fs/fuse/

··· 7238 M: Miklos Szeredi <miklos@szeredi.hu> 7239 L: linux-fsdevel@vger.kernel.org 7240 S: Maintained 7241 + W: https://github.com/libfuse/ 7242 T: git git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse.git 7243 F: Documentation/filesystems/fuse.rst 7244 F: fs/fuse/

+2 -1

drivers/dax/super.c

··· 46 int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size, 47 pgoff_t *pgoff) 48 { 49 - phys_addr_t phys_off = (get_start_sect(bdev) + sector) * 512; 50 51 if (pgoff) 52 *pgoff = PHYS_PFN(phys_off);

··· 46 int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size, 47 pgoff_t *pgoff) 48 { 49 + sector_t start_sect = bdev ? get_start_sect(bdev) : 0; 50 + phys_addr_t phys_off = (start_sect + sector) * 512; 51 52 if (pgoff) 53 *pgoff = PHYS_PFN(phys_off);

+23 -6

fs/dax.c

··· 559 } 560 561 /** 562 - * dax_layout_busy_page - find first pinned page in @mapping 563 * @mapping: address space to scan for a page with ref count > 1 564 * 565 * DAX requires ZONE_DEVICE mapped pages. These pages are never 566 * 'onlined' to the page allocator so they are considered idle when ··· 576 * to be able to run unmap_mapping_range() and subsequently not race 577 * mapping_mapped() becoming true. 578 */ 579 - struct page *dax_layout_busy_page(struct address_space *mapping) 580 { 581 - XA_STATE(xas, &mapping->i_pages, 0); 582 void *entry; 583 unsigned int scanned = 0; 584 struct page *page = NULL; 585 586 /* 587 * In the 'limited' case get_user_pages() for dax is disabled. ··· 595 if (!dax_mapping(mapping) || !mapping_mapped(mapping)) 596 return NULL; 597 598 /* 599 * If we race get_user_pages_fast() here either we'll see the 600 * elevated page count in the iteration and wait, or ··· 607 * against is no longer mapped in the page tables and bail to the 608 * get_user_pages() slow path. The slow path is protected by 609 * pte_lock() and pmd_lock(). New references are not taken without 610 - * holding those locks, and unmap_mapping_range() will not zero the 611 * pte or pmd without holding the respective lock, so we are 612 * guaranteed to either see new references or prevent new 613 * references from being established. 614 */ 615 - unmap_mapping_range(mapping, 0, 0, 0); 616 617 xas_lock_irq(&xas); 618 - xas_for_each(&xas, entry, ULONG_MAX) { 619 if (WARN_ON_ONCE(!xa_is_value(entry))) 620 continue; 621 if (unlikely(dax_is_locked(entry))) ··· 635 } 636 xas_unlock_irq(&xas); 637 return page; 638 } 639 EXPORT_SYMBOL_GPL(dax_layout_busy_page); 640

··· 559 } 560 561 /** 562 + * dax_layout_busy_page_range - find first pinned page in @mapping 563 * @mapping: address space to scan for a page with ref count > 1 564 + * @start: Starting offset. Page containing 'start' is included. 565 + * @end: End offset. Page containing 'end' is included. If 'end' is LLONG_MAX, 566 + * pages from 'start' till the end of file are included. 567 * 568 * DAX requires ZONE_DEVICE mapped pages. These pages are never 569 * 'onlined' to the page allocator so they are considered idle when ··· 573 * to be able to run unmap_mapping_range() and subsequently not race 574 * mapping_mapped() becoming true. 575 */ 576 + struct page *dax_layout_busy_page_range(struct address_space *mapping, 577 + loff_t start, loff_t end) 578 { 579 void *entry; 580 unsigned int scanned = 0; 581 struct page *page = NULL; 582 + pgoff_t start_idx = start >> PAGE_SHIFT; 583 + pgoff_t end_idx; 584 + XA_STATE(xas, &mapping->i_pages, start_idx); 585 586 /* 587 * In the 'limited' case get_user_pages() for dax is disabled. ··· 589 if (!dax_mapping(mapping) || !mapping_mapped(mapping)) 590 return NULL; 591 592 + /* If end == LLONG_MAX, all pages from start to till end of file */ 593 + if (end == LLONG_MAX) 594 + end_idx = ULONG_MAX; 595 + else 596 + end_idx = end >> PAGE_SHIFT; 597 /* 598 * If we race get_user_pages_fast() here either we'll see the 599 * elevated page count in the iteration and wait, or ··· 596 * against is no longer mapped in the page tables and bail to the 597 * get_user_pages() slow path. The slow path is protected by 598 * pte_lock() and pmd_lock(). New references are not taken without 599 + * holding those locks, and unmap_mapping_pages() will not zero the 600 * pte or pmd without holding the respective lock, so we are 601 * guaranteed to either see new references or prevent new 602 * references from being established. 603 */ 604 + unmap_mapping_pages(mapping, start_idx, end_idx - start_idx + 1, 0); 605 606 xas_lock_irq(&xas); 607 + xas_for_each(&xas, entry, end_idx) { 608 if (WARN_ON_ONCE(!xa_is_value(entry))) 609 continue; 610 if (unlikely(dax_is_locked(entry))) ··· 624 } 625 xas_unlock_irq(&xas); 626 return page; 627 + } 628 + EXPORT_SYMBOL_GPL(dax_layout_busy_page_range); 629 + 630 + struct page *dax_layout_busy_page(struct address_space *mapping) 631 + { 632 + return dax_layout_busy_page_range(mapping, 0, LLONG_MAX); 633 } 634 EXPORT_SYMBOL_GPL(dax_layout_busy_page); 635

+15 -1

fs/fuse/Kconfig

··· 8 9 There's also a companion library: libfuse2. This library is available 10 from the FUSE homepage: 11 - <http://fuse.sourceforge.net/> 12 although chances are your distribution already has that library 13 installed if you've installed the "fuse" package itself. 14 ··· 38 39 If you want to share files between guests or with the host, answer Y 40 or M.

··· 8 9 There's also a companion library: libfuse2. This library is available 10 from the FUSE homepage: 11 + <https://github.com/libfuse/> 12 although chances are your distribution already has that library 13 installed if you've installed the "fuse" package itself. 14 ··· 38 39 If you want to share files between guests or with the host, answer Y 40 or M. 41 + 42 + config FUSE_DAX 43 + bool "Virtio Filesystem Direct Host Memory Access support" 44 + default y 45 + select INTERVAL_TREE 46 + depends on VIRTIO_FS 47 + depends on FS_DAX 48 + depends on DAX_DRIVER 49 + help 50 + This allows bypassing guest page cache and allows mapping host page 51 + cache directly in guest address space. 52 + 53 + If you want to allow mounting a Virtio Filesystem with the "dax" 54 + option, answer Y.

+4 -2

fs/fuse/Makefile

··· 7 obj-$(CONFIG_CUSE) += cuse.o 8 obj-$(CONFIG_VIRTIO_FS) += virtiofs.o 9 10 - fuse-objs := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o 11 - virtiofs-y += virtio_fs.o

··· 7 obj-$(CONFIG_CUSE) += cuse.o 8 obj-$(CONFIG_VIRTIO_FS) += virtiofs.o 9 10 + fuse-y := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o 11 + fuse-$(CONFIG_FUSE_DAX) += dax.o 12 + 13 + virtiofs-y := virtio_fs.o

+15 -5

fs/fuse/control.c

··· 164 { 165 unsigned val; 166 struct fuse_conn *fc; 167 ssize_t ret; 168 169 ret = fuse_conn_limit_write(file, buf, count, ppos, &val, ··· 175 if (!fc) 176 goto out; 177 178 spin_lock(&fc->bg_lock); 179 fc->congestion_threshold = val; 180 - if (fc->sb) { 181 if (fc->num_background < fc->congestion_threshold) { 182 - clear_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC); 183 - clear_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC); 184 } else { 185 - set_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC); 186 - set_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC); 187 } 188 } 189 spin_unlock(&fc->bg_lock); 190 fuse_conn_put(fc); 191 out: 192 return ret;

··· 164 { 165 unsigned val; 166 struct fuse_conn *fc; 167 + struct fuse_mount *fm; 168 ssize_t ret; 169 170 ret = fuse_conn_limit_write(file, buf, count, ppos, &val, ··· 174 if (!fc) 175 goto out; 176 177 + down_read(&fc->killsb); 178 spin_lock(&fc->bg_lock); 179 fc->congestion_threshold = val; 180 + 181 + /* 182 + * Get any fuse_mount belonging to this fuse_conn; s_bdi is 183 + * shared between all of them 184 + */ 185 + 186 + if (!list_empty(&fc->mounts)) { 187 + fm = list_first_entry(&fc->mounts, struct fuse_mount, fc_entry); 188 if (fc->num_background < fc->congestion_threshold) { 189 + clear_bdi_congested(fm->sb->s_bdi, BLK_RW_SYNC); 190 + clear_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC); 191 } else { 192 + set_bdi_congested(fm->sb->s_bdi, BLK_RW_SYNC); 193 + set_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC); 194 } 195 } 196 spin_unlock(&fc->bg_lock); 197 + up_read(&fc->killsb); 198 fuse_conn_put(fc); 199 out: 200 return ret;

+12 -9

fs/fuse/cuse.c

··· 57 58 struct cuse_conn { 59 struct list_head list; /* linked on cuse_conntbl */ 60 struct fuse_conn fc; /* fuse connection */ 61 struct cdev *cdev; /* associated character device */ 62 struct device *dev; /* device representing @cdev */ ··· 135 * Generic permission check is already done against the chrdev 136 * file, proceed to open. 137 */ 138 - rc = fuse_do_open(&cc->fc, 0, file, 0); 139 if (rc) 140 fuse_conn_put(&cc->fc); 141 return rc; ··· 144 static int cuse_release(struct inode *inode, struct file *file) 145 { 146 struct fuse_file *ff = file->private_data; 147 - struct fuse_conn *fc = ff->fc; 148 149 fuse_sync_release(NULL, ff, file->f_flags); 150 - fuse_conn_put(fc); 151 152 return 0; 153 } ··· 156 unsigned long arg) 157 { 158 struct fuse_file *ff = file->private_data; 159 - struct cuse_conn *cc = fc_to_cc(ff->fc); 160 unsigned int flags = 0; 161 162 if (cc->unrestricted_ioctl) ··· 169 unsigned long arg) 170 { 171 struct fuse_file *ff = file->private_data; 172 - struct cuse_conn *cc = fc_to_cc(ff->fc); 173 unsigned int flags = FUSE_IOCTL_COMPAT; 174 175 if (cc->unrestricted_ioctl) ··· 314 * required data structures for it. Please read the comment at the 315 * top of this file for high level overview. 316 */ 317 - static void cuse_process_init_reply(struct fuse_conn *fc, 318 struct fuse_args *args, int error) 319 { 320 struct cuse_init_args *ia = container_of(args, typeof(*ia), ap.args); 321 struct fuse_args_pages *ap = &ia->ap; 322 struct cuse_conn *cc = fc_to_cc(fc), *pos; ··· 426 { 427 int rc; 428 struct page *page; 429 - struct fuse_conn *fc = &cc->fc; 430 struct cuse_init_args *ia; 431 struct fuse_args_pages *ap; 432 ··· 462 ia->desc.length = ap->args.out_args[1].size; 463 ap->args.end = cuse_process_init_reply; 464 465 - rc = fuse_simple_background(fc, &ap->args, GFP_KERNEL); 466 if (rc) { 467 kfree(ia); 468 err_free_page: ··· 508 * Limit the cuse channel to requests that can 509 * be represented in file->f_cred->user_ns. 510 */ 511 - fuse_conn_init(&cc->fc, file->f_cred->user_ns, &fuse_dev_fiq_ops, NULL); 512 513 fud = fuse_dev_alloc_install(&cc->fc); 514 if (!fud) {

··· 57 58 struct cuse_conn { 59 struct list_head list; /* linked on cuse_conntbl */ 60 + struct fuse_mount fm; /* Dummy mount referencing fc */ 61 struct fuse_conn fc; /* fuse connection */ 62 struct cdev *cdev; /* associated character device */ 63 struct device *dev; /* device representing @cdev */ ··· 134 * Generic permission check is already done against the chrdev 135 * file, proceed to open. 136 */ 137 + rc = fuse_do_open(&cc->fm, 0, file, 0); 138 if (rc) 139 fuse_conn_put(&cc->fc); 140 return rc; ··· 143 static int cuse_release(struct inode *inode, struct file *file) 144 { 145 struct fuse_file *ff = file->private_data; 146 + struct fuse_mount *fm = ff->fm; 147 148 fuse_sync_release(NULL, ff, file->f_flags); 149 + fuse_conn_put(fm->fc); 150 151 return 0; 152 } ··· 155 unsigned long arg) 156 { 157 struct fuse_file *ff = file->private_data; 158 + struct cuse_conn *cc = fc_to_cc(ff->fm->fc); 159 unsigned int flags = 0; 160 161 if (cc->unrestricted_ioctl) ··· 168 unsigned long arg) 169 { 170 struct fuse_file *ff = file->private_data; 171 + struct cuse_conn *cc = fc_to_cc(ff->fm->fc); 172 unsigned int flags = FUSE_IOCTL_COMPAT; 173 174 if (cc->unrestricted_ioctl) ··· 313 * required data structures for it. Please read the comment at the 314 * top of this file for high level overview. 315 */ 316 + static void cuse_process_init_reply(struct fuse_mount *fm, 317 struct fuse_args *args, int error) 318 { 319 + struct fuse_conn *fc = fm->fc; 320 struct cuse_init_args *ia = container_of(args, typeof(*ia), ap.args); 321 struct fuse_args_pages *ap = &ia->ap; 322 struct cuse_conn *cc = fc_to_cc(fc), *pos; ··· 424 { 425 int rc; 426 struct page *page; 427 + struct fuse_mount *fm = &cc->fm; 428 struct cuse_init_args *ia; 429 struct fuse_args_pages *ap; 430 ··· 460 ia->desc.length = ap->args.out_args[1].size; 461 ap->args.end = cuse_process_init_reply; 462 463 + rc = fuse_simple_background(fm, &ap->args, GFP_KERNEL); 464 if (rc) { 465 kfree(ia); 466 err_free_page: ··· 506 * Limit the cuse channel to requests that can 507 * be represented in file->f_cred->user_ns. 508 */ 509 + fuse_conn_init(&cc->fc, &cc->fm, file->f_cred->user_ns, 510 + &fuse_dev_fiq_ops, NULL); 511 512 fud = fuse_dev_alloc_install(&cc->fc); 513 if (!fud) {

+1365

fs/fuse/dax.c

···

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * dax: direct host memory access 4 + * Copyright (C) 2020 Red Hat, Inc. 5 + */ 6 + 7 + #include "fuse_i.h" 8 + 9 + #include <linux/delay.h> 10 + #include <linux/dax.h> 11 + #include <linux/uio.h> 12 + #include <linux/pfn_t.h> 13 + #include <linux/iomap.h> 14 + #include <linux/interval_tree.h> 15 + 16 + /* 17 + * Default memory range size. A power of 2 so it agrees with common FUSE_INIT 18 + * map_alignment values 4KB and 64KB. 19 + */ 20 + #define FUSE_DAX_SHIFT 21 21 + #define FUSE_DAX_SZ (1 << FUSE_DAX_SHIFT) 22 + #define FUSE_DAX_PAGES (FUSE_DAX_SZ / PAGE_SIZE) 23 + 24 + /* Number of ranges reclaimer will try to free in one invocation */ 25 + #define FUSE_DAX_RECLAIM_CHUNK (10) 26 + 27 + /* 28 + * Dax memory reclaim threshold in percetage of total ranges. When free 29 + * number of free ranges drops below this threshold, reclaim can trigger 30 + * Default is 20% 31 + */ 32 + #define FUSE_DAX_RECLAIM_THRESHOLD (20) 33 + 34 + /** Translation information for file offsets to DAX window offsets */ 35 + struct fuse_dax_mapping { 36 + /* Pointer to inode where this memory range is mapped */ 37 + struct inode *inode; 38 + 39 + /* Will connect in fcd->free_ranges to keep track of free memory */ 40 + struct list_head list; 41 + 42 + /* For interval tree in file/inode */ 43 + struct interval_tree_node itn; 44 + 45 + /* Will connect in fc->busy_ranges to keep track busy memory */ 46 + struct list_head busy_list; 47 + 48 + /** Position in DAX window */ 49 + u64 window_offset; 50 + 51 + /** Length of mapping, in bytes */ 52 + loff_t length; 53 + 54 + /* Is this mapping read-only or read-write */ 55 + bool writable; 56 + 57 + /* reference count when the mapping is used by dax iomap. */ 58 + refcount_t refcnt; 59 + }; 60 + 61 + /* Per-inode dax map */ 62 + struct fuse_inode_dax { 63 + /* Semaphore to protect modifications to the dmap tree */ 64 + struct rw_semaphore sem; 65 + 66 + /* Sorted rb tree of struct fuse_dax_mapping elements */ 67 + struct rb_root_cached tree; 68 + unsigned long nr; 69 + }; 70 + 71 + struct fuse_conn_dax { 72 + /* DAX device */ 73 + struct dax_device *dev; 74 + 75 + /* Lock protecting accessess to members of this structure */ 76 + spinlock_t lock; 77 + 78 + /* List of memory ranges which are busy */ 79 + unsigned long nr_busy_ranges; 80 + struct list_head busy_ranges; 81 + 82 + /* Worker to free up memory ranges */ 83 + struct delayed_work free_work; 84 + 85 + /* Wait queue for a dax range to become free */ 86 + wait_queue_head_t range_waitq; 87 + 88 + /* DAX Window Free Ranges */ 89 + long nr_free_ranges; 90 + struct list_head free_ranges; 91 + 92 + unsigned long nr_ranges; 93 + }; 94 + 95 + static inline struct fuse_dax_mapping * 96 + node_to_dmap(struct interval_tree_node *node) 97 + { 98 + if (!node) 99 + return NULL; 100 + 101 + return container_of(node, struct fuse_dax_mapping, itn); 102 + } 103 + 104 + static struct fuse_dax_mapping * 105 + alloc_dax_mapping_reclaim(struct fuse_conn_dax *fcd, struct inode *inode); 106 + 107 + static void 108 + __kick_dmap_free_worker(struct fuse_conn_dax *fcd, unsigned long delay_ms) 109 + { 110 + unsigned long free_threshold; 111 + 112 + /* If number of free ranges are below threshold, start reclaim */ 113 + free_threshold = max_t(unsigned long, fcd->nr_ranges * FUSE_DAX_RECLAIM_THRESHOLD / 100, 114 + 1); 115 + if (fcd->nr_free_ranges < free_threshold) 116 + queue_delayed_work(system_long_wq, &fcd->free_work, 117 + msecs_to_jiffies(delay_ms)); 118 + } 119 + 120 + static void kick_dmap_free_worker(struct fuse_conn_dax *fcd, 121 + unsigned long delay_ms) 122 + { 123 + spin_lock(&fcd->lock); 124 + __kick_dmap_free_worker(fcd, delay_ms); 125 + spin_unlock(&fcd->lock); 126 + } 127 + 128 + static struct fuse_dax_mapping *alloc_dax_mapping(struct fuse_conn_dax *fcd) 129 + { 130 + struct fuse_dax_mapping *dmap; 131 + 132 + spin_lock(&fcd->lock); 133 + dmap = list_first_entry_or_null(&fcd->free_ranges, 134 + struct fuse_dax_mapping, list); 135 + if (dmap) { 136 + list_del_init(&dmap->list); 137 + WARN_ON(fcd->nr_free_ranges <= 0); 138 + fcd->nr_free_ranges--; 139 + } 140 + spin_unlock(&fcd->lock); 141 + 142 + kick_dmap_free_worker(fcd, 0); 143 + return dmap; 144 + } 145 + 146 + /* This assumes fcd->lock is held */ 147 + static void __dmap_remove_busy_list(struct fuse_conn_dax *fcd, 148 + struct fuse_dax_mapping *dmap) 149 + { 150 + list_del_init(&dmap->busy_list); 151 + WARN_ON(fcd->nr_busy_ranges == 0); 152 + fcd->nr_busy_ranges--; 153 + } 154 + 155 + static void dmap_remove_busy_list(struct fuse_conn_dax *fcd, 156 + struct fuse_dax_mapping *dmap) 157 + { 158 + spin_lock(&fcd->lock); 159 + __dmap_remove_busy_list(fcd, dmap); 160 + spin_unlock(&fcd->lock); 161 + } 162 + 163 + /* This assumes fcd->lock is held */ 164 + static void __dmap_add_to_free_pool(struct fuse_conn_dax *fcd, 165 + struct fuse_dax_mapping *dmap) 166 + { 167 + list_add_tail(&dmap->list, &fcd->free_ranges); 168 + fcd->nr_free_ranges++; 169 + wake_up(&fcd->range_waitq); 170 + } 171 + 172 + static void dmap_add_to_free_pool(struct fuse_conn_dax *fcd, 173 + struct fuse_dax_mapping *dmap) 174 + { 175 + /* Return fuse_dax_mapping to free list */ 176 + spin_lock(&fcd->lock); 177 + __dmap_add_to_free_pool(fcd, dmap); 178 + spin_unlock(&fcd->lock); 179 + } 180 + 181 + static int fuse_setup_one_mapping(struct inode *inode, unsigned long start_idx, 182 + struct fuse_dax_mapping *dmap, bool writable, 183 + bool upgrade) 184 + { 185 + struct fuse_mount *fm = get_fuse_mount(inode); 186 + struct fuse_conn_dax *fcd = fm->fc->dax; 187 + struct fuse_inode *fi = get_fuse_inode(inode); 188 + struct fuse_setupmapping_in inarg; 189 + loff_t offset = start_idx << FUSE_DAX_SHIFT; 190 + FUSE_ARGS(args); 191 + ssize_t err; 192 + 193 + WARN_ON(fcd->nr_free_ranges < 0); 194 + 195 + /* Ask fuse daemon to setup mapping */ 196 + memset(&inarg, 0, sizeof(inarg)); 197 + inarg.foffset = offset; 198 + inarg.fh = -1; 199 + inarg.moffset = dmap->window_offset; 200 + inarg.len = FUSE_DAX_SZ; 201 + inarg.flags |= FUSE_SETUPMAPPING_FLAG_READ; 202 + if (writable) 203 + inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE; 204 + args.opcode = FUSE_SETUPMAPPING; 205 + args.nodeid = fi->nodeid; 206 + args.in_numargs = 1; 207 + args.in_args[0].size = sizeof(inarg); 208 + args.in_args[0].value = &inarg; 209 + err = fuse_simple_request(fm, &args); 210 + if (err < 0) 211 + return err; 212 + dmap->writable = writable; 213 + if (!upgrade) { 214 + /* 215 + * We don't take a refernce on inode. inode is valid right now 216 + * and when inode is going away, cleanup logic should first 217 + * cleanup dmap entries. 218 + */ 219 + dmap->inode = inode; 220 + dmap->itn.start = dmap->itn.last = start_idx; 221 + /* Protected by fi->dax->sem */ 222 + interval_tree_insert(&dmap->itn, &fi->dax->tree); 223 + fi->dax->nr++; 224 + spin_lock(&fcd->lock); 225 + list_add_tail(&dmap->busy_list, &fcd->busy_ranges); 226 + fcd->nr_busy_ranges++; 227 + spin_unlock(&fcd->lock); 228 + } 229 + return 0; 230 + } 231 + 232 + static int fuse_send_removemapping(struct inode *inode, 233 + struct fuse_removemapping_in *inargp, 234 + struct fuse_removemapping_one *remove_one) 235 + { 236 + struct fuse_inode *fi = get_fuse_inode(inode); 237 + struct fuse_mount *fm = get_fuse_mount(inode); 238 + FUSE_ARGS(args); 239 + 240 + args.opcode = FUSE_REMOVEMAPPING; 241 + args.nodeid = fi->nodeid; 242 + args.in_numargs = 2; 243 + args.in_args[0].size = sizeof(*inargp); 244 + args.in_args[0].value = inargp; 245 + args.in_args[1].size = inargp->count * sizeof(*remove_one); 246 + args.in_args[1].value = remove_one; 247 + return fuse_simple_request(fm, &args); 248 + } 249 + 250 + static int dmap_removemapping_list(struct inode *inode, unsigned int num, 251 + struct list_head *to_remove) 252 + { 253 + struct fuse_removemapping_one *remove_one, *ptr; 254 + struct fuse_removemapping_in inarg; 255 + struct fuse_dax_mapping *dmap; 256 + int ret, i = 0, nr_alloc; 257 + 258 + nr_alloc = min_t(unsigned int, num, FUSE_REMOVEMAPPING_MAX_ENTRY); 259 + remove_one = kmalloc_array(nr_alloc, sizeof(*remove_one), GFP_NOFS); 260 + if (!remove_one) 261 + return -ENOMEM; 262 + 263 + ptr = remove_one; 264 + list_for_each_entry(dmap, to_remove, list) { 265 + ptr->moffset = dmap->window_offset; 266 + ptr->len = dmap->length; 267 + ptr++; 268 + i++; 269 + num--; 270 + if (i >= nr_alloc || num == 0) { 271 + memset(&inarg, 0, sizeof(inarg)); 272 + inarg.count = i; 273 + ret = fuse_send_removemapping(inode, &inarg, 274 + remove_one); 275 + if (ret) 276 + goto out; 277 + ptr = remove_one; 278 + i = 0; 279 + } 280 + } 281 + out: 282 + kfree(remove_one); 283 + return ret; 284 + } 285 + 286 + /* 287 + * Cleanup dmap entry and add back to free list. This should be called with 288 + * fcd->lock held. 289 + */ 290 + static void dmap_reinit_add_to_free_pool(struct fuse_conn_dax *fcd, 291 + struct fuse_dax_mapping *dmap) 292 + { 293 + pr_debug("fuse: freeing memory range start_idx=0x%lx end_idx=0x%lx window_offset=0x%llx length=0x%llx\n", 294 + dmap->itn.start, dmap->itn.last, dmap->window_offset, 295 + dmap->length); 296 + __dmap_remove_busy_list(fcd, dmap); 297 + dmap->inode = NULL; 298 + dmap->itn.start = dmap->itn.last = 0; 299 + __dmap_add_to_free_pool(fcd, dmap); 300 + } 301 + 302 + /* 303 + * Free inode dmap entries whose range falls inside [start, end]. 304 + * Does not take any locks. At this point of time it should only be 305 + * called from evict_inode() path where we know all dmap entries can be 306 + * reclaimed. 307 + */ 308 + static void inode_reclaim_dmap_range(struct fuse_conn_dax *fcd, 309 + struct inode *inode, 310 + loff_t start, loff_t end) 311 + { 312 + struct fuse_inode *fi = get_fuse_inode(inode); 313 + struct fuse_dax_mapping *dmap, *n; 314 + int err, num = 0; 315 + LIST_HEAD(to_remove); 316 + unsigned long start_idx = start >> FUSE_DAX_SHIFT; 317 + unsigned long end_idx = end >> FUSE_DAX_SHIFT; 318 + struct interval_tree_node *node; 319 + 320 + while (1) { 321 + node = interval_tree_iter_first(&fi->dax->tree, start_idx, 322 + end_idx); 323 + if (!node) 324 + break; 325 + dmap = node_to_dmap(node); 326 + /* inode is going away. There should not be any users of dmap */ 327 + WARN_ON(refcount_read(&dmap->refcnt) > 1); 328 + interval_tree_remove(&dmap->itn, &fi->dax->tree); 329 + num++; 330 + list_add(&dmap->list, &to_remove); 331 + } 332 + 333 + /* Nothing to remove */ 334 + if (list_empty(&to_remove)) 335 + return; 336 + 337 + WARN_ON(fi->dax->nr < num); 338 + fi->dax->nr -= num; 339 + err = dmap_removemapping_list(inode, num, &to_remove); 340 + if (err && err != -ENOTCONN) { 341 + pr_warn("Failed to removemappings. start=0x%llx end=0x%llx\n", 342 + start, end); 343 + } 344 + spin_lock(&fcd->lock); 345 + list_for_each_entry_safe(dmap, n, &to_remove, list) { 346 + list_del_init(&dmap->list); 347 + dmap_reinit_add_to_free_pool(fcd, dmap); 348 + } 349 + spin_unlock(&fcd->lock); 350 + } 351 + 352 + static int dmap_removemapping_one(struct inode *inode, 353 + struct fuse_dax_mapping *dmap) 354 + { 355 + struct fuse_removemapping_one forget_one; 356 + struct fuse_removemapping_in inarg; 357 + 358 + memset(&inarg, 0, sizeof(inarg)); 359 + inarg.count = 1; 360 + memset(&forget_one, 0, sizeof(forget_one)); 361 + forget_one.moffset = dmap->window_offset; 362 + forget_one.len = dmap->length; 363 + 364 + return fuse_send_removemapping(inode, &inarg, &forget_one); 365 + } 366 + 367 + /* 368 + * It is called from evict_inode() and by that time inode is going away. So 369 + * this function does not take any locks like fi->dax->sem for traversing 370 + * that fuse inode interval tree. If that lock is taken then lock validator 371 + * complains of deadlock situation w.r.t fs_reclaim lock. 372 + */ 373 + void fuse_dax_inode_cleanup(struct inode *inode) 374 + { 375 + struct fuse_conn *fc = get_fuse_conn(inode); 376 + struct fuse_inode *fi = get_fuse_inode(inode); 377 + 378 + /* 379 + * fuse_evict_inode() has already called truncate_inode_pages_final() 380 + * before we arrive here. So we should not have to worry about any 381 + * pages/exception entries still associated with inode. 382 + */ 383 + inode_reclaim_dmap_range(fc->dax, inode, 0, -1); 384 + WARN_ON(fi->dax->nr); 385 + } 386 + 387 + static void fuse_fill_iomap_hole(struct iomap *iomap, loff_t length) 388 + { 389 + iomap->addr = IOMAP_NULL_ADDR; 390 + iomap->length = length; 391 + iomap->type = IOMAP_HOLE; 392 + } 393 + 394 + static void fuse_fill_iomap(struct inode *inode, loff_t pos, loff_t length, 395 + struct iomap *iomap, struct fuse_dax_mapping *dmap, 396 + unsigned int flags) 397 + { 398 + loff_t offset, len; 399 + loff_t i_size = i_size_read(inode); 400 + 401 + offset = pos - (dmap->itn.start << FUSE_DAX_SHIFT); 402 + len = min(length, dmap->length - offset); 403 + 404 + /* If length is beyond end of file, truncate further */ 405 + if (pos + len > i_size) 406 + len = i_size - pos; 407 + 408 + if (len > 0) { 409 + iomap->addr = dmap->window_offset + offset; 410 + iomap->length = len; 411 + if (flags & IOMAP_FAULT) 412 + iomap->length = ALIGN(len, PAGE_SIZE); 413 + iomap->type = IOMAP_MAPPED; 414 + /* 415 + * increace refcnt so that reclaim code knows this dmap is in 416 + * use. This assumes fi->dax->sem mutex is held either 417 + * shared/exclusive. 418 + */ 419 + refcount_inc(&dmap->refcnt); 420 + 421 + /* iomap->private should be NULL */ 422 + WARN_ON_ONCE(iomap->private); 423 + iomap->private = dmap; 424 + } else { 425 + /* Mapping beyond end of file is hole */ 426 + fuse_fill_iomap_hole(iomap, length); 427 + } 428 + } 429 + 430 + static int fuse_setup_new_dax_mapping(struct inode *inode, loff_t pos, 431 + loff_t length, unsigned int flags, 432 + struct iomap *iomap) 433 + { 434 + struct fuse_inode *fi = get_fuse_inode(inode); 435 + struct fuse_conn *fc = get_fuse_conn(inode); 436 + struct fuse_conn_dax *fcd = fc->dax; 437 + struct fuse_dax_mapping *dmap, *alloc_dmap = NULL; 438 + int ret; 439 + bool writable = flags & IOMAP_WRITE; 440 + unsigned long start_idx = pos >> FUSE_DAX_SHIFT; 441 + struct interval_tree_node *node; 442 + 443 + /* 444 + * Can't do inline reclaim in fault path. We call 445 + * dax_layout_busy_page() before we free a range. And 446 + * fuse_wait_dax_page() drops fi->i_mmap_sem lock and requires it. 447 + * In fault path we enter with fi->i_mmap_sem held and can't drop 448 + * it. Also in fault path we hold fi->i_mmap_sem shared and not 449 + * exclusive, so that creates further issues with fuse_wait_dax_page(). 450 + * Hence return -EAGAIN and fuse_dax_fault() will wait for a memory 451 + * range to become free and retry. 452 + */ 453 + if (flags & IOMAP_FAULT) { 454 + alloc_dmap = alloc_dax_mapping(fcd); 455 + if (!alloc_dmap) 456 + return -EAGAIN; 457 + } else { 458 + alloc_dmap = alloc_dax_mapping_reclaim(fcd, inode); 459 + if (IS_ERR(alloc_dmap)) 460 + return PTR_ERR(alloc_dmap); 461 + } 462 + 463 + /* If we are here, we should have memory allocated */ 464 + if (WARN_ON(!alloc_dmap)) 465 + return -EIO; 466 + 467 + /* 468 + * Take write lock so that only one caller can try to setup mapping 469 + * and other waits. 470 + */ 471 + down_write(&fi->dax->sem); 472 + /* 473 + * We dropped lock. Check again if somebody else setup 474 + * mapping already. 475 + */ 476 + node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx); 477 + if (node) { 478 + dmap = node_to_dmap(node); 479 + fuse_fill_iomap(inode, pos, length, iomap, dmap, flags); 480 + dmap_add_to_free_pool(fcd, alloc_dmap); 481 + up_write(&fi->dax->sem); 482 + return 0; 483 + } 484 + 485 + /* Setup one mapping */ 486 + ret = fuse_setup_one_mapping(inode, pos >> FUSE_DAX_SHIFT, alloc_dmap, 487 + writable, false); 488 + if (ret < 0) { 489 + dmap_add_to_free_pool(fcd, alloc_dmap); 490 + up_write(&fi->dax->sem); 491 + return ret; 492 + } 493 + fuse_fill_iomap(inode, pos, length, iomap, alloc_dmap, flags); 494 + up_write(&fi->dax->sem); 495 + return 0; 496 + } 497 + 498 + static int fuse_upgrade_dax_mapping(struct inode *inode, loff_t pos, 499 + loff_t length, unsigned int flags, 500 + struct iomap *iomap) 501 + { 502 + struct fuse_inode *fi = get_fuse_inode(inode); 503 + struct fuse_dax_mapping *dmap; 504 + int ret; 505 + unsigned long idx = pos >> FUSE_DAX_SHIFT; 506 + struct interval_tree_node *node; 507 + 508 + /* 509 + * Take exclusive lock so that only one caller can try to setup 510 + * mapping and others wait. 511 + */ 512 + down_write(&fi->dax->sem); 513 + node = interval_tree_iter_first(&fi->dax->tree, idx, idx); 514 + 515 + /* We are holding either inode lock or i_mmap_sem, and that should 516 + * ensure that dmap can't be truncated. We are holding a reference 517 + * on dmap and that should make sure it can't be reclaimed. So dmap 518 + * should still be there in tree despite the fact we dropped and 519 + * re-acquired the fi->dax->sem lock. 520 + */ 521 + ret = -EIO; 522 + if (WARN_ON(!node)) 523 + goto out_err; 524 + 525 + dmap = node_to_dmap(node); 526 + 527 + /* We took an extra reference on dmap to make sure its not reclaimd. 528 + * Now we hold fi->dax->sem lock and that reference is not needed 529 + * anymore. Drop it. 530 + */ 531 + if (refcount_dec_and_test(&dmap->refcnt)) { 532 + /* refcount should not hit 0. This object only goes 533 + * away when fuse connection goes away 534 + */ 535 + WARN_ON_ONCE(1); 536 + } 537 + 538 + /* Maybe another thread already upgraded mapping while we were not 539 + * holding lock. 540 + */ 541 + if (dmap->writable) { 542 + ret = 0; 543 + goto out_fill_iomap; 544 + } 545 + 546 + ret = fuse_setup_one_mapping(inode, pos >> FUSE_DAX_SHIFT, dmap, true, 547 + true); 548 + if (ret < 0) 549 + goto out_err; 550 + out_fill_iomap: 551 + fuse_fill_iomap(inode, pos, length, iomap, dmap, flags); 552 + out_err: 553 + up_write(&fi->dax->sem); 554 + return ret; 555 + } 556 + 557 + /* This is just for DAX and the mapping is ephemeral, do not use it for other 558 + * purposes since there is no block device with a permanent mapping. 559 + */ 560 + static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length, 561 + unsigned int flags, struct iomap *iomap, 562 + struct iomap *srcmap) 563 + { 564 + struct fuse_inode *fi = get_fuse_inode(inode); 565 + struct fuse_conn *fc = get_fuse_conn(inode); 566 + struct fuse_dax_mapping *dmap; 567 + bool writable = flags & IOMAP_WRITE; 568 + unsigned long start_idx = pos >> FUSE_DAX_SHIFT; 569 + struct interval_tree_node *node; 570 + 571 + /* We don't support FIEMAP */ 572 + if (WARN_ON(flags & IOMAP_REPORT)) 573 + return -EIO; 574 + 575 + iomap->offset = pos; 576 + iomap->flags = 0; 577 + iomap->bdev = NULL; 578 + iomap->dax_dev = fc->dax->dev; 579 + 580 + /* 581 + * Both read/write and mmap path can race here. So we need something 582 + * to make sure if we are setting up mapping, then other path waits 583 + * 584 + * For now, use a semaphore for this. It probably needs to be 585 + * optimized later. 586 + */ 587 + down_read(&fi->dax->sem); 588 + node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx); 589 + if (node) { 590 + dmap = node_to_dmap(node); 591 + if (writable && !dmap->writable) { 592 + /* Upgrade read-only mapping to read-write. This will 593 + * require exclusive fi->dax->sem lock as we don't want 594 + * two threads to be trying to this simultaneously 595 + * for same dmap. So drop shared lock and acquire 596 + * exclusive lock. 597 + * 598 + * Before dropping fi->dax->sem lock, take reference 599 + * on dmap so that its not freed by range reclaim. 600 + */ 601 + refcount_inc(&dmap->refcnt); 602 + up_read(&fi->dax->sem); 603 + pr_debug("%s: Upgrading mapping at offset 0x%llx length 0x%llx\n", 604 + __func__, pos, length); 605 + return fuse_upgrade_dax_mapping(inode, pos, length, 606 + flags, iomap); 607 + } else { 608 + fuse_fill_iomap(inode, pos, length, iomap, dmap, flags); 609 + up_read(&fi->dax->sem); 610 + return 0; 611 + } 612 + } else { 613 + up_read(&fi->dax->sem); 614 + pr_debug("%s: no mapping at offset 0x%llx length 0x%llx\n", 615 + __func__, pos, length); 616 + if (pos >= i_size_read(inode)) 617 + goto iomap_hole; 618 + 619 + return fuse_setup_new_dax_mapping(inode, pos, length, flags, 620 + iomap); 621 + } 622 + 623 + /* 624 + * If read beyond end of file happnes, fs code seems to return 625 + * it as hole 626 + */ 627 + iomap_hole: 628 + fuse_fill_iomap_hole(iomap, length); 629 + pr_debug("%s returning hole mapping. pos=0x%llx length_asked=0x%llx length_returned=0x%llx\n", 630 + __func__, pos, length, iomap->length); 631 + return 0; 632 + } 633 + 634 + static int fuse_iomap_end(struct inode *inode, loff_t pos, loff_t length, 635 + ssize_t written, unsigned int flags, 636 + struct iomap *iomap) 637 + { 638 + struct fuse_dax_mapping *dmap = iomap->private; 639 + 640 + if (dmap) { 641 + if (refcount_dec_and_test(&dmap->refcnt)) { 642 + /* refcount should not hit 0. This object only goes 643 + * away when fuse connection goes away 644 + */ 645 + WARN_ON_ONCE(1); 646 + } 647 + } 648 + 649 + /* DAX writes beyond end-of-file aren't handled using iomap, so the 650 + * file size is unchanged and there is nothing to do here. 651 + */ 652 + return 0; 653 + } 654 + 655 + static const struct iomap_ops fuse_iomap_ops = { 656 + .iomap_begin = fuse_iomap_begin, 657 + .iomap_end = fuse_iomap_end, 658 + }; 659 + 660 + static void fuse_wait_dax_page(struct inode *inode) 661 + { 662 + struct fuse_inode *fi = get_fuse_inode(inode); 663 + 664 + up_write(&fi->i_mmap_sem); 665 + schedule(); 666 + down_write(&fi->i_mmap_sem); 667 + } 668 + 669 + /* Should be called with fi->i_mmap_sem lock held exclusively */ 670 + static int __fuse_dax_break_layouts(struct inode *inode, bool *retry, 671 + loff_t start, loff_t end) 672 + { 673 + struct page *page; 674 + 675 + page = dax_layout_busy_page_range(inode->i_mapping, start, end); 676 + if (!page) 677 + return 0; 678 + 679 + *retry = true; 680 + return ___wait_var_event(&page->_refcount, 681 + atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE, 682 + 0, 0, fuse_wait_dax_page(inode)); 683 + } 684 + 685 + /* dmap_end == 0 leads to unmapping of whole file */ 686 + int fuse_dax_break_layouts(struct inode *inode, u64 dmap_start, 687 + u64 dmap_end) 688 + { 689 + bool retry; 690 + int ret; 691 + 692 + do { 693 + retry = false; 694 + ret = __fuse_dax_break_layouts(inode, &retry, dmap_start, 695 + dmap_end); 696 + } while (ret == 0 && retry); 697 + 698 + return ret; 699 + } 700 + 701 + ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to) 702 + { 703 + struct inode *inode = file_inode(iocb->ki_filp); 704 + ssize_t ret; 705 + 706 + if (iocb->ki_flags & IOCB_NOWAIT) { 707 + if (!inode_trylock_shared(inode)) 708 + return -EAGAIN; 709 + } else { 710 + inode_lock_shared(inode); 711 + } 712 + 713 + ret = dax_iomap_rw(iocb, to, &fuse_iomap_ops); 714 + inode_unlock_shared(inode); 715 + 716 + /* TODO file_accessed(iocb->f_filp) */ 717 + return ret; 718 + } 719 + 720 + static bool file_extending_write(struct kiocb *iocb, struct iov_iter *from) 721 + { 722 + struct inode *inode = file_inode(iocb->ki_filp); 723 + 724 + return (iov_iter_rw(from) == WRITE && 725 + ((iocb->ki_pos) >= i_size_read(inode) || 726 + (iocb->ki_pos + iov_iter_count(from) > i_size_read(inode)))); 727 + } 728 + 729 + static ssize_t fuse_dax_direct_write(struct kiocb *iocb, struct iov_iter *from) 730 + { 731 + struct inode *inode = file_inode(iocb->ki_filp); 732 + struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb); 733 + ssize_t ret; 734 + 735 + ret = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE); 736 + if (ret < 0) 737 + return ret; 738 + 739 + fuse_invalidate_attr(inode); 740 + fuse_write_update_size(inode, iocb->ki_pos); 741 + return ret; 742 + } 743 + 744 + ssize_t fuse_dax_write_iter(struct kiocb *iocb, struct iov_iter *from) 745 + { 746 + struct inode *inode = file_inode(iocb->ki_filp); 747 + ssize_t ret; 748 + 749 + if (iocb->ki_flags & IOCB_NOWAIT) { 750 + if (!inode_trylock(inode)) 751 + return -EAGAIN; 752 + } else { 753 + inode_lock(inode); 754 + } 755 + 756 + ret = generic_write_checks(iocb, from); 757 + if (ret <= 0) 758 + goto out; 759 + 760 + ret = file_remove_privs(iocb->ki_filp); 761 + if (ret) 762 + goto out; 763 + /* TODO file_update_time() but we don't want metadata I/O */ 764 + 765 + /* Do not use dax for file extending writes as write and on 766 + * disk i_size increase are not atomic otherwise. 767 + */ 768 + if (file_extending_write(iocb, from)) 769 + ret = fuse_dax_direct_write(iocb, from); 770 + else 771 + ret = dax_iomap_rw(iocb, from, &fuse_iomap_ops); 772 + 773 + out: 774 + inode_unlock(inode); 775 + 776 + if (ret > 0) 777 + ret = generic_write_sync(iocb, ret); 778 + return ret; 779 + } 780 + 781 + static int fuse_dax_writepages(struct address_space *mapping, 782 + struct writeback_control *wbc) 783 + { 784 + 785 + struct inode *inode = mapping->host; 786 + struct fuse_conn *fc = get_fuse_conn(inode); 787 + 788 + return dax_writeback_mapping_range(mapping, fc->dax->dev, wbc); 789 + } 790 + 791 + static vm_fault_t __fuse_dax_fault(struct vm_fault *vmf, 792 + enum page_entry_size pe_size, bool write) 793 + { 794 + vm_fault_t ret; 795 + struct inode *inode = file_inode(vmf->vma->vm_file); 796 + struct super_block *sb = inode->i_sb; 797 + pfn_t pfn; 798 + int error = 0; 799 + struct fuse_conn *fc = get_fuse_conn(inode); 800 + struct fuse_conn_dax *fcd = fc->dax; 801 + bool retry = false; 802 + 803 + if (write) 804 + sb_start_pagefault(sb); 805 + retry: 806 + if (retry && !(fcd->nr_free_ranges > 0)) 807 + wait_event(fcd->range_waitq, (fcd->nr_free_ranges > 0)); 808 + 809 + /* 810 + * We need to serialize against not only truncate but also against 811 + * fuse dax memory range reclaim. While a range is being reclaimed, 812 + * we do not want any read/write/mmap to make progress and try 813 + * to populate page cache or access memory we are trying to free. 814 + */ 815 + down_read(&get_fuse_inode(inode)->i_mmap_sem); 816 + ret = dax_iomap_fault(vmf, pe_size, &pfn, &error, &fuse_iomap_ops); 817 + if ((ret & VM_FAULT_ERROR) && error == -EAGAIN) { 818 + error = 0; 819 + retry = true; 820 + up_read(&get_fuse_inode(inode)->i_mmap_sem); 821 + goto retry; 822 + } 823 + 824 + if (ret & VM_FAULT_NEEDDSYNC) 825 + ret = dax_finish_sync_fault(vmf, pe_size, pfn); 826 + up_read(&get_fuse_inode(inode)->i_mmap_sem); 827 + 828 + if (write) 829 + sb_end_pagefault(sb); 830 + 831 + return ret; 832 + } 833 + 834 + static vm_fault_t fuse_dax_fault(struct vm_fault *vmf) 835 + { 836 + return __fuse_dax_fault(vmf, PE_SIZE_PTE, 837 + vmf->flags & FAULT_FLAG_WRITE); 838 + } 839 + 840 + static vm_fault_t fuse_dax_huge_fault(struct vm_fault *vmf, 841 + enum page_entry_size pe_size) 842 + { 843 + return __fuse_dax_fault(vmf, pe_size, vmf->flags & FAULT_FLAG_WRITE); 844 + } 845 + 846 + static vm_fault_t fuse_dax_page_mkwrite(struct vm_fault *vmf) 847 + { 848 + return __fuse_dax_fault(vmf, PE_SIZE_PTE, true); 849 + } 850 + 851 + static vm_fault_t fuse_dax_pfn_mkwrite(struct vm_fault *vmf) 852 + { 853 + return __fuse_dax_fault(vmf, PE_SIZE_PTE, true); 854 + } 855 + 856 + static const struct vm_operations_struct fuse_dax_vm_ops = { 857 + .fault = fuse_dax_fault, 858 + .huge_fault = fuse_dax_huge_fault, 859 + .page_mkwrite = fuse_dax_page_mkwrite, 860 + .pfn_mkwrite = fuse_dax_pfn_mkwrite, 861 + }; 862 + 863 + int fuse_dax_mmap(struct file *file, struct vm_area_struct *vma) 864 + { 865 + file_accessed(file); 866 + vma->vm_ops = &fuse_dax_vm_ops; 867 + vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE; 868 + return 0; 869 + } 870 + 871 + static int dmap_writeback_invalidate(struct inode *inode, 872 + struct fuse_dax_mapping *dmap) 873 + { 874 + int ret; 875 + loff_t start_pos = dmap->itn.start << FUSE_DAX_SHIFT; 876 + loff_t end_pos = (start_pos + FUSE_DAX_SZ - 1); 877 + 878 + ret = filemap_fdatawrite_range(inode->i_mapping, start_pos, end_pos); 879 + if (ret) { 880 + pr_debug("fuse: filemap_fdatawrite_range() failed. err=%d start_pos=0x%llx, end_pos=0x%llx\n", 881 + ret, start_pos, end_pos); 882 + return ret; 883 + } 884 + 885 + ret = invalidate_inode_pages2_range(inode->i_mapping, 886 + start_pos >> PAGE_SHIFT, 887 + end_pos >> PAGE_SHIFT); 888 + if (ret) 889 + pr_debug("fuse: invalidate_inode_pages2_range() failed err=%d\n", 890 + ret); 891 + 892 + return ret; 893 + } 894 + 895 + static int reclaim_one_dmap_locked(struct inode *inode, 896 + struct fuse_dax_mapping *dmap) 897 + { 898 + int ret; 899 + struct fuse_inode *fi = get_fuse_inode(inode); 900 + 901 + /* 902 + * igrab() was done to make sure inode won't go under us, and this 903 + * further avoids the race with evict(). 904 + */ 905 + ret = dmap_writeback_invalidate(inode, dmap); 906 + if (ret) 907 + return ret; 908 + 909 + /* Remove dax mapping from inode interval tree now */ 910 + interval_tree_remove(&dmap->itn, &fi->dax->tree); 911 + fi->dax->nr--; 912 + 913 + /* It is possible that umount/shutdown has killed the fuse connection 914 + * and worker thread is trying to reclaim memory in parallel. Don't 915 + * warn in that case. 916 + */ 917 + ret = dmap_removemapping_one(inode, dmap); 918 + if (ret && ret != -ENOTCONN) { 919 + pr_warn("Failed to remove mapping. offset=0x%llx len=0x%llx ret=%d\n", 920 + dmap->window_offset, dmap->length, ret); 921 + } 922 + return 0; 923 + } 924 + 925 + /* Find first mapped dmap for an inode and return file offset. Caller needs 926 + * to hold fi->dax->sem lock either shared or exclusive. 927 + */ 928 + static struct fuse_dax_mapping *inode_lookup_first_dmap(struct inode *inode) 929 + { 930 + struct fuse_inode *fi = get_fuse_inode(inode); 931 + struct fuse_dax_mapping *dmap; 932 + struct interval_tree_node *node; 933 + 934 + for (node = interval_tree_iter_first(&fi->dax->tree, 0, -1); node; 935 + node = interval_tree_iter_next(node, 0, -1)) { 936 + dmap = node_to_dmap(node); 937 + /* still in use. */ 938 + if (refcount_read(&dmap->refcnt) > 1) 939 + continue; 940 + 941 + return dmap; 942 + } 943 + 944 + return NULL; 945 + } 946 + 947 + /* 948 + * Find first mapping in the tree and free it and return it. Do not add 949 + * it back to free pool. 950 + */ 951 + static struct fuse_dax_mapping * 952 + inode_inline_reclaim_one_dmap(struct fuse_conn_dax *fcd, struct inode *inode, 953 + bool *retry) 954 + { 955 + struct fuse_inode *fi = get_fuse_inode(inode); 956 + struct fuse_dax_mapping *dmap; 957 + u64 dmap_start, dmap_end; 958 + unsigned long start_idx; 959 + int ret; 960 + struct interval_tree_node *node; 961 + 962 + down_write(&fi->i_mmap_sem); 963 + 964 + /* Lookup a dmap and corresponding file offset to reclaim. */ 965 + down_read(&fi->dax->sem); 966 + dmap = inode_lookup_first_dmap(inode); 967 + if (dmap) { 968 + start_idx = dmap->itn.start; 969 + dmap_start = start_idx << FUSE_DAX_SHIFT; 970 + dmap_end = dmap_start + FUSE_DAX_SZ - 1; 971 + } 972 + up_read(&fi->dax->sem); 973 + 974 + if (!dmap) 975 + goto out_mmap_sem; 976 + /* 977 + * Make sure there are no references to inode pages using 978 + * get_user_pages() 979 + */ 980 + ret = fuse_dax_break_layouts(inode, dmap_start, dmap_end); 981 + if (ret) { 982 + pr_debug("fuse: fuse_dax_break_layouts() failed. err=%d\n", 983 + ret); 984 + dmap = ERR_PTR(ret); 985 + goto out_mmap_sem; 986 + } 987 + 988 + down_write(&fi->dax->sem); 989 + node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx); 990 + /* Range already got reclaimed by somebody else */ 991 + if (!node) { 992 + if (retry) 993 + *retry = true; 994 + goto out_write_dmap_sem; 995 + } 996 + 997 + dmap = node_to_dmap(node); 998 + /* still in use. */ 999 + if (refcount_read(&dmap->refcnt) > 1) { 1000 + dmap = NULL; 1001 + if (retry) 1002 + *retry = true; 1003 + goto out_write_dmap_sem; 1004 + } 1005 + 1006 + ret = reclaim_one_dmap_locked(inode, dmap); 1007 + if (ret < 0) { 1008 + dmap = ERR_PTR(ret); 1009 + goto out_write_dmap_sem; 1010 + } 1011 + 1012 + /* Clean up dmap. Do not add back to free list */ 1013 + dmap_remove_busy_list(fcd, dmap); 1014 + dmap->inode = NULL; 1015 + dmap->itn.start = dmap->itn.last = 0; 1016 + 1017 + pr_debug("fuse: %s: inline reclaimed memory range. inode=%p, window_offset=0x%llx, length=0x%llx\n", 1018 + __func__, inode, dmap->window_offset, dmap->length); 1019 + 1020 + out_write_dmap_sem: 1021 + up_write(&fi->dax->sem); 1022 + out_mmap_sem: 1023 + up_write(&fi->i_mmap_sem); 1024 + return dmap; 1025 + } 1026 + 1027 + static struct fuse_dax_mapping * 1028 + alloc_dax_mapping_reclaim(struct fuse_conn_dax *fcd, struct inode *inode) 1029 + { 1030 + struct fuse_dax_mapping *dmap; 1031 + struct fuse_inode *fi = get_fuse_inode(inode); 1032 + 1033 + while (1) { 1034 + bool retry = false; 1035 + 1036 + dmap = alloc_dax_mapping(fcd); 1037 + if (dmap) 1038 + return dmap; 1039 + 1040 + dmap = inode_inline_reclaim_one_dmap(fcd, inode, &retry); 1041 + /* 1042 + * Either we got a mapping or it is an error, return in both 1043 + * the cases. 1044 + */ 1045 + if (dmap) 1046 + return dmap; 1047 + 1048 + /* If we could not reclaim a mapping because it 1049 + * had a reference or some other temporary failure, 1050 + * Try again. We want to give up inline reclaim only 1051 + * if there is no range assigned to this node. Otherwise 1052 + * if a deadlock is possible if we sleep with fi->i_mmap_sem 1053 + * held and worker to free memory can't make progress due 1054 + * to unavailability of fi->i_mmap_sem lock. So sleep 1055 + * only if fi->dax->nr=0 1056 + */ 1057 + if (retry) 1058 + continue; 1059 + /* 1060 + * There are no mappings which can be reclaimed. Wait for one. 1061 + * We are not holding fi->dax->sem. So it is possible 1062 + * that range gets added now. But as we are not holding 1063 + * fi->i_mmap_sem, worker should still be able to free up 1064 + * a range and wake us up. 1065 + */ 1066 + if (!fi->dax->nr && !(fcd->nr_free_ranges > 0)) { 1067 + if (wait_event_killable_exclusive(fcd->range_waitq, 1068 + (fcd->nr_free_ranges > 0))) { 1069 + return ERR_PTR(-EINTR); 1070 + } 1071 + } 1072 + } 1073 + } 1074 + 1075 + static int lookup_and_reclaim_dmap_locked(struct fuse_conn_dax *fcd, 1076 + struct inode *inode, 1077 + unsigned long start_idx) 1078 + { 1079 + int ret; 1080 + struct fuse_inode *fi = get_fuse_inode(inode); 1081 + struct fuse_dax_mapping *dmap; 1082 + struct interval_tree_node *node; 1083 + 1084 + /* Find fuse dax mapping at file offset inode. */ 1085 + node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx); 1086 + 1087 + /* Range already got cleaned up by somebody else */ 1088 + if (!node) 1089 + return 0; 1090 + dmap = node_to_dmap(node); 1091 + 1092 + /* still in use. */ 1093 + if (refcount_read(&dmap->refcnt) > 1) 1094 + return 0; 1095 + 1096 + ret = reclaim_one_dmap_locked(inode, dmap); 1097 + if (ret < 0) 1098 + return ret; 1099 + 1100 + /* Cleanup dmap entry and add back to free list */ 1101 + spin_lock(&fcd->lock); 1102 + dmap_reinit_add_to_free_pool(fcd, dmap); 1103 + spin_unlock(&fcd->lock); 1104 + return ret; 1105 + } 1106 + 1107 + /* 1108 + * Free a range of memory. 1109 + * Locking: 1110 + * 1. Take fi->i_mmap_sem to block dax faults. 1111 + * 2. Take fi->dax->sem to protect interval tree and also to make sure 1112 + * read/write can not reuse a dmap which we might be freeing. 1113 + */ 1114 + static int lookup_and_reclaim_dmap(struct fuse_conn_dax *fcd, 1115 + struct inode *inode, 1116 + unsigned long start_idx, 1117 + unsigned long end_idx) 1118 + { 1119 + int ret; 1120 + struct fuse_inode *fi = get_fuse_inode(inode); 1121 + loff_t dmap_start = start_idx << FUSE_DAX_SHIFT; 1122 + loff_t dmap_end = (dmap_start + FUSE_DAX_SZ) - 1; 1123 + 1124 + down_write(&fi->i_mmap_sem); 1125 + ret = fuse_dax_break_layouts(inode, dmap_start, dmap_end); 1126 + if (ret) { 1127 + pr_debug("virtio_fs: fuse_dax_break_layouts() failed. err=%d\n", 1128 + ret); 1129 + goto out_mmap_sem; 1130 + } 1131 + 1132 + down_write(&fi->dax->sem); 1133 + ret = lookup_and_reclaim_dmap_locked(fcd, inode, start_idx); 1134 + up_write(&fi->dax->sem); 1135 + out_mmap_sem: 1136 + up_write(&fi->i_mmap_sem); 1137 + return ret; 1138 + } 1139 + 1140 + static int try_to_free_dmap_chunks(struct fuse_conn_dax *fcd, 1141 + unsigned long nr_to_free) 1142 + { 1143 + struct fuse_dax_mapping *dmap, *pos, *temp; 1144 + int ret, nr_freed = 0; 1145 + unsigned long start_idx = 0, end_idx = 0; 1146 + struct inode *inode = NULL; 1147 + 1148 + /* Pick first busy range and free it for now*/ 1149 + while (1) { 1150 + if (nr_freed >= nr_to_free) 1151 + break; 1152 + 1153 + dmap = NULL; 1154 + spin_lock(&fcd->lock); 1155 + 1156 + if (!fcd->nr_busy_ranges) { 1157 + spin_unlock(&fcd->lock); 1158 + return 0; 1159 + } 1160 + 1161 + list_for_each_entry_safe(pos, temp, &fcd->busy_ranges, 1162 + busy_list) { 1163 + /* skip this range if it's in use. */ 1164 + if (refcount_read(&pos->refcnt) > 1) 1165 + continue; 1166 + 1167 + inode = igrab(pos->inode); 1168 + /* 1169 + * This inode is going away. That will free 1170 + * up all the ranges anyway, continue to 1171 + * next range. 1172 + */ 1173 + if (!inode) 1174 + continue; 1175 + /* 1176 + * Take this element off list and add it tail. If 1177 + * this element can't be freed, it will help with 1178 + * selecting new element in next iteration of loop. 1179 + */ 1180 + dmap = pos; 1181 + list_move_tail(&dmap->busy_list, &fcd->busy_ranges); 1182 + start_idx = end_idx = dmap->itn.start; 1183 + break; 1184 + } 1185 + spin_unlock(&fcd->lock); 1186 + if (!dmap) 1187 + return 0; 1188 + 1189 + ret = lookup_and_reclaim_dmap(fcd, inode, start_idx, end_idx); 1190 + iput(inode); 1191 + if (ret) 1192 + return ret; 1193 + nr_freed++; 1194 + } 1195 + return 0; 1196 + } 1197 + 1198 + static void fuse_dax_free_mem_worker(struct work_struct *work) 1199 + { 1200 + int ret; 1201 + struct fuse_conn_dax *fcd = container_of(work, struct fuse_conn_dax, 1202 + free_work.work); 1203 + ret = try_to_free_dmap_chunks(fcd, FUSE_DAX_RECLAIM_CHUNK); 1204 + if (ret) { 1205 + pr_debug("fuse: try_to_free_dmap_chunks() failed with err=%d\n", 1206 + ret); 1207 + } 1208 + 1209 + /* If number of free ranges are still below threhold, requeue */ 1210 + kick_dmap_free_worker(fcd, 1); 1211 + } 1212 + 1213 + static void fuse_free_dax_mem_ranges(struct list_head *mem_list) 1214 + { 1215 + struct fuse_dax_mapping *range, *temp; 1216 + 1217 + /* Free All allocated elements */ 1218 + list_for_each_entry_safe(range, temp, mem_list, list) { 1219 + list_del(&range->list); 1220 + if (!list_empty(&range->busy_list)) 1221 + list_del(&range->busy_list); 1222 + kfree(range); 1223 + } 1224 + } 1225 + 1226 + void fuse_dax_conn_free(struct fuse_conn *fc) 1227 + { 1228 + if (fc->dax) { 1229 + fuse_free_dax_mem_ranges(&fc->dax->free_ranges); 1230 + kfree(fc->dax); 1231 + } 1232 + } 1233 + 1234 + static int fuse_dax_mem_range_init(struct fuse_conn_dax *fcd) 1235 + { 1236 + long nr_pages, nr_ranges; 1237 + void *kaddr; 1238 + pfn_t pfn; 1239 + struct fuse_dax_mapping *range; 1240 + int ret, id; 1241 + size_t dax_size = -1; 1242 + unsigned long i; 1243 + 1244 + init_waitqueue_head(&fcd->range_waitq); 1245 + INIT_LIST_HEAD(&fcd->free_ranges); 1246 + INIT_LIST_HEAD(&fcd->busy_ranges); 1247 + INIT_DELAYED_WORK(&fcd->free_work, fuse_dax_free_mem_worker); 1248 + 1249 + id = dax_read_lock(); 1250 + nr_pages = dax_direct_access(fcd->dev, 0, PHYS_PFN(dax_size), &kaddr, 1251 + &pfn); 1252 + dax_read_unlock(id); 1253 + if (nr_pages < 0) { 1254 + pr_debug("dax_direct_access() returned %ld\n", nr_pages); 1255 + return nr_pages; 1256 + } 1257 + 1258 + nr_ranges = nr_pages/FUSE_DAX_PAGES; 1259 + pr_debug("%s: dax mapped %ld pages. nr_ranges=%ld\n", 1260 + __func__, nr_pages, nr_ranges); 1261 + 1262 + for (i = 0; i < nr_ranges; i++) { 1263 + range = kzalloc(sizeof(struct fuse_dax_mapping), GFP_KERNEL); 1264 + ret = -ENOMEM; 1265 + if (!range) 1266 + goto out_err; 1267 + 1268 + /* TODO: This offset only works if virtio-fs driver is not 1269 + * having some memory hidden at the beginning. This needs 1270 + * better handling 1271 + */ 1272 + range->window_offset = i * FUSE_DAX_SZ; 1273 + range->length = FUSE_DAX_SZ; 1274 + INIT_LIST_HEAD(&range->busy_list); 1275 + refcount_set(&range->refcnt, 1); 1276 + list_add_tail(&range->list, &fcd->free_ranges); 1277 + } 1278 + 1279 + fcd->nr_free_ranges = nr_ranges; 1280 + fcd->nr_ranges = nr_ranges; 1281 + return 0; 1282 + out_err: 1283 + /* Free All allocated elements */ 1284 + fuse_free_dax_mem_ranges(&fcd->free_ranges); 1285 + return ret; 1286 + } 1287 + 1288 + int fuse_dax_conn_alloc(struct fuse_conn *fc, struct dax_device *dax_dev) 1289 + { 1290 + struct fuse_conn_dax *fcd; 1291 + int err; 1292 + 1293 + if (!dax_dev) 1294 + return 0; 1295 + 1296 + fcd = kzalloc(sizeof(*fcd), GFP_KERNEL); 1297 + if (!fcd) 1298 + return -ENOMEM; 1299 + 1300 + spin_lock_init(&fcd->lock); 1301 + fcd->dev = dax_dev; 1302 + err = fuse_dax_mem_range_init(fcd); 1303 + if (err) { 1304 + kfree(fcd); 1305 + return err; 1306 + } 1307 + 1308 + fc->dax = fcd; 1309 + return 0; 1310 + } 1311 + 1312 + bool fuse_dax_inode_alloc(struct super_block *sb, struct fuse_inode *fi) 1313 + { 1314 + struct fuse_conn *fc = get_fuse_conn_super(sb); 1315 + 1316 + fi->dax = NULL; 1317 + if (fc->dax) { 1318 + fi->dax = kzalloc(sizeof(*fi->dax), GFP_KERNEL_ACCOUNT); 1319 + if (!fi->dax) 1320 + return false; 1321 + 1322 + init_rwsem(&fi->dax->sem); 1323 + fi->dax->tree = RB_ROOT_CACHED; 1324 + } 1325 + 1326 + return true; 1327 + } 1328 + 1329 + static const struct address_space_operations fuse_dax_file_aops = { 1330 + .writepages = fuse_dax_writepages, 1331 + .direct_IO = noop_direct_IO, 1332 + .set_page_dirty = noop_set_page_dirty, 1333 + .invalidatepage = noop_invalidatepage, 1334 + }; 1335 + 1336 + void fuse_dax_inode_init(struct inode *inode) 1337 + { 1338 + struct fuse_conn *fc = get_fuse_conn(inode); 1339 + 1340 + if (!fc->dax) 1341 + return; 1342 + 1343 + inode->i_flags |= S_DAX; 1344 + inode->i_data.a_ops = &fuse_dax_file_aops; 1345 + } 1346 + 1347 + bool fuse_dax_check_alignment(struct fuse_conn *fc, unsigned int map_alignment) 1348 + { 1349 + if (fc->dax && (map_alignment > FUSE_DAX_SHIFT)) { 1350 + pr_warn("FUSE: map_alignment %u incompatible with dax mem range size %u\n", 1351 + map_alignment, FUSE_DAX_SZ); 1352 + return false; 1353 + } 1354 + return true; 1355 + } 1356 + 1357 + void fuse_dax_cancel_work(struct fuse_conn *fc) 1358 + { 1359 + struct fuse_conn_dax *fcd = fc->dax; 1360 + 1361 + if (fcd) 1362 + cancel_delayed_work_sync(&fcd->free_work); 1363 + 1364 + } 1365 + EXPORT_SYMBOL_GPL(fuse_dax_cancel_work);

+100 -89

fs/fuse/dev.c

··· 40 return READ_ONCE(file->private_data); 41 } 42 43 - static void fuse_request_init(struct fuse_req *req) 44 { 45 INIT_LIST_HEAD(&req->list); 46 INIT_LIST_HEAD(&req->intr_entry); 47 init_waitqueue_head(&req->waitq); 48 refcount_set(&req->count, 1); 49 __set_bit(FR_PENDING, &req->flags); 50 } 51 52 - static struct fuse_req *fuse_request_alloc(gfp_t flags) 53 { 54 struct fuse_req *req = kmem_cache_zalloc(fuse_req_cachep, flags); 55 if (req) 56 - fuse_request_init(req); 57 58 return req; 59 } ··· 101 } 102 } 103 104 - static void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req); 105 106 - static struct fuse_req *fuse_get_req(struct fuse_conn *fc, bool for_background) 107 { 108 struct fuse_req *req; 109 int err; 110 atomic_inc(&fc->num_waiting); ··· 127 if (fc->conn_error) 128 goto out; 129 130 - req = fuse_request_alloc(GFP_KERNEL); 131 err = -ENOMEM; 132 if (!req) { 133 if (for_background) ··· 145 146 if (unlikely(req->in.h.uid == ((uid_t)-1) || 147 req->in.h.gid == ((gid_t)-1))) { 148 - fuse_put_request(fc, req); 149 return ERR_PTR(-EOVERFLOW); 150 } 151 return req; ··· 155 return ERR_PTR(err); 156 } 157 158 - static void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) 159 { 160 if (refcount_dec_and_test(&req->count)) { 161 if (test_bit(FR_BACKGROUND, &req->flags)) { 162 /* ··· 277 * the 'end' callback is called if given, else the reference to the 278 * request is released 279 */ 280 - void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req) 281 { 282 struct fuse_iqueue *fiq = &fc->iq; 283 284 if (test_and_set_bit(FR_FINISHED, &req->flags)) ··· 315 wake_up(&fc->blocked_waitq); 316 } 317 318 - if (fc->num_background == fc->congestion_threshold && fc->sb) { 319 - clear_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC); 320 - clear_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC); 321 } 322 fc->num_background--; 323 fc->active_background--; ··· 329 } 330 331 if (test_bit(FR_ASYNC, &req->flags)) 332 - req->args->end(fc, req->args, req->out.h.error); 333 put_request: 334 - fuse_put_request(fc, req); 335 } 336 EXPORT_SYMBOL_GPL(fuse_request_end); 337 338 - static int queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req) 339 { 340 spin_lock(&fiq->lock); 341 /* Check for we've sent request to interrupt this req */ 342 if (unlikely(!test_bit(FR_INTERRUPTED, &req->flags))) { ··· 365 return 0; 366 } 367 368 - static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) 369 { 370 struct fuse_iqueue *fiq = &fc->iq; 371 int err; 372 ··· 382 /* matches barrier in fuse_dev_do_read() */ 383 smp_mb__after_atomic(); 384 if (test_bit(FR_SENT, &req->flags)) 385 - queue_interrupt(fiq, req); 386 } 387 388 if (!test_bit(FR_FORCE, &req->flags)) { ··· 411 wait_event(req->waitq, test_bit(FR_FINISHED, &req->flags)); 412 } 413 414 - static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) 415 { 416 - struct fuse_iqueue *fiq = &fc->iq; 417 418 BUG_ON(test_bit(FR_BACKGROUND, &req->flags)); 419 spin_lock(&fiq->lock); ··· 427 __fuse_get_request(req); 428 queue_request_and_unlock(fiq, req); 429 430 - request_wait_answer(fc, req); 431 /* Pairs with smp_wmb() in fuse_request_end() */ 432 smp_rmb(); 433 } ··· 466 } 467 } 468 469 - static void fuse_force_creds(struct fuse_conn *fc, struct fuse_req *req) 470 { 471 req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid()); 472 req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid()); 473 req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns); ··· 484 __set_bit(FR_ASYNC, &req->flags); 485 } 486 487 - ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args) 488 { 489 struct fuse_req *req; 490 ssize_t ret; 491 492 if (args->force) { 493 atomic_inc(&fc->num_waiting); 494 - req = fuse_request_alloc(GFP_KERNEL | __GFP_NOFAIL); 495 496 if (!args->nocreds) 497 - fuse_force_creds(fc, req); 498 499 __set_bit(FR_WAITING, &req->flags); 500 __set_bit(FR_FORCE, &req->flags); 501 } else { 502 WARN_ON(args->nocreds); 503 - req = fuse_get_req(fc, false); 504 if (IS_ERR(req)) 505 return PTR_ERR(req); 506 } ··· 512 513 if (!args->noreply) 514 __set_bit(FR_ISREPLY, &req->flags); 515 - __fuse_request_send(fc, req); 516 ret = req->out.h.error; 517 if (!ret && args->out_argvar) { 518 BUG_ON(args->out_numargs == 0); 519 ret = args->out_args[args->out_numargs - 1].size; 520 } 521 - fuse_put_request(fc, req); 522 523 return ret; 524 } 525 526 - static bool fuse_request_queue_background(struct fuse_conn *fc, 527 - struct fuse_req *req) 528 { 529 bool queued = false; 530 531 WARN_ON(!test_bit(FR_BACKGROUND, &req->flags)); ··· 540 fc->num_background++; 541 if (fc->num_background == fc->max_background) 542 fc->blocked = 1; 543 - if (fc->num_background == fc->congestion_threshold && fc->sb) { 544 - set_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC); 545 - set_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC); 546 } 547 list_add_tail(&req->list, &fc->bg_queue); 548 flush_bg_queue(fc); ··· 553 return queued; 554 } 555 556 - int fuse_simple_background(struct fuse_conn *fc, struct fuse_args *args, 557 gfp_t gfp_flags) 558 { 559 struct fuse_req *req; 560 561 if (args->force) { 562 WARN_ON(!args->nocreds); 563 - req = fuse_request_alloc(gfp_flags); 564 if (!req) 565 return -ENOMEM; 566 __set_bit(FR_BACKGROUND, &req->flags); 567 } else { 568 WARN_ON(args->nocreds); 569 - req = fuse_get_req(fc, true); 570 if (IS_ERR(req)) 571 return PTR_ERR(req); 572 } 573 574 fuse_args_to_req(req, args); 575 576 - if (!fuse_request_queue_background(fc, req)) { 577 - fuse_put_request(fc, req); 578 return -ENOTCONN; 579 } 580 ··· 582 } 583 EXPORT_SYMBOL_GPL(fuse_simple_background); 584 585 - static int fuse_simple_notify_reply(struct fuse_conn *fc, 586 struct fuse_args *args, u64 unique) 587 { 588 struct fuse_req *req; 589 - struct fuse_iqueue *fiq = &fc->iq; 590 int err = 0; 591 592 - req = fuse_get_req(fc, false); 593 if (IS_ERR(req)) 594 return PTR_ERR(req); 595 ··· 604 } else { 605 err = -ENODEV; 606 spin_unlock(&fiq->lock); 607 - fuse_put_request(fc, req); 608 } 609 610 return err; ··· 798 struct page *newpage; 799 struct pipe_buffer *buf = cs->pipebufs; 800 801 err = unlock_request(cs->req); 802 if (err) 803 - return err; 804 805 fuse_copy_finish(cs); 806 807 err = pipe_buf_confirm(cs->pipe, buf); 808 if (err) 809 - return err; 810 811 BUG_ON(!cs->nr_segs); 812 cs->currbuf = buf; ··· 847 err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL); 848 if (err) { 849 unlock_page(newpage); 850 - return err; 851 } 852 853 get_page(newpage); ··· 866 if (err) { 867 unlock_page(newpage); 868 put_page(newpage); 869 - return err; 870 } 871 872 unlock_page(oldpage); 873 put_page(oldpage); 874 cs->len = 0; 875 876 - return 0; 877 878 out_fallback_unlock: 879 unlock_page(newpage); ··· 887 cs->offset = buf->offset; 888 889 err = lock_request(cs->req); 890 - if (err) 891 - return err; 892 893 - return 1; 894 } 895 896 static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page, ··· 902 if (cs->nr_segs >= cs->pipe->max_usage) 903 return -EIO; 904 905 err = unlock_request(cs->req); 906 - if (err) 907 return err; 908 909 fuse_copy_finish(cs); 910 911 buf = cs->pipebufs; 912 - get_page(page); 913 buf->page = page; 914 buf->offset = offset; 915 buf->len = count; ··· 1271 /* SETXATTR is special, since it may contain too large data */ 1272 if (args->opcode == FUSE_SETXATTR) 1273 req->out.h.error = -E2BIG; 1274 - fuse_request_end(fc, req); 1275 goto restart; 1276 } 1277 spin_lock(&fpq->lock); ··· 1305 /* matches barrier in request_wait_answer() */ 1306 smp_mb__after_atomic(); 1307 if (test_bit(FR_INTERRUPTED, &req->flags)) 1308 - queue_interrupt(fiq, req); 1309 - fuse_put_request(fc, req); 1310 1311 return reqsize; 1312 ··· 1314 if (!test_bit(FR_PRIVATE, &req->flags)) 1315 list_del_init(&req->list); 1316 spin_unlock(&fpq->lock); 1317 - fuse_request_end(fc, req); 1318 return err; 1319 1320 err_unlock: ··· 1437 fuse_copy_finish(cs); 1438 1439 down_read(&fc->killsb); 1440 - err = -ENOENT; 1441 - if (fc->sb) { 1442 - err = fuse_reverse_inval_inode(fc->sb, outarg.ino, 1443 - outarg.off, outarg.len); 1444 - } 1445 up_read(&fc->killsb); 1446 return err; 1447 ··· 1484 buf[outarg.namelen] = 0; 1485 1486 down_read(&fc->killsb); 1487 - err = -ENOENT; 1488 - if (fc->sb) 1489 - err = fuse_reverse_inval_entry(fc->sb, outarg.parent, 0, &name); 1490 up_read(&fc->killsb); 1491 kfree(buf); 1492 return err; ··· 1532 buf[outarg.namelen] = 0; 1533 1534 down_read(&fc->killsb); 1535 - err = -ENOENT; 1536 - if (fc->sb) 1537 - err = fuse_reverse_inval_entry(fc->sb, outarg.parent, 1538 - outarg.child, &name); 1539 up_read(&fc->killsb); 1540 kfree(buf); 1541 return err; ··· 1574 down_read(&fc->killsb); 1575 1576 err = -ENOENT; 1577 - if (!fc->sb) 1578 - goto out_up_killsb; 1579 - 1580 - inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid); 1581 if (!inode) 1582 goto out_up_killsb; 1583 ··· 1631 struct fuse_notify_retrieve_in inarg; 1632 }; 1633 1634 - static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_args *args, 1635 int error) 1636 { 1637 struct fuse_retrieve_args *ra = ··· 1641 kfree(ra); 1642 } 1643 1644 - static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, 1645 struct fuse_notify_retrieve_out *outarg) 1646 { 1647 int err; ··· 1652 unsigned int offset; 1653 size_t total_len = 0; 1654 unsigned int num_pages; 1655 struct fuse_retrieve_args *ra; 1656 size_t args_size = sizeof(*ra); 1657 struct fuse_args_pages *ap; ··· 1714 args->in_args[0].value = &ra->inarg; 1715 args->in_args[1].size = total_len; 1716 1717 - err = fuse_simple_notify_reply(fc, args, outarg->notify_unique); 1718 if (err) 1719 - fuse_retrieve_end(fc, args, err); 1720 1721 return err; 1722 } ··· 1725 struct fuse_copy_state *cs) 1726 { 1727 struct fuse_notify_retrieve_out outarg; 1728 struct inode *inode; 1729 int err; 1730 1731 err = -EINVAL; ··· 1742 1743 down_read(&fc->killsb); 1744 err = -ENOENT; 1745 - if (fc->sb) { 1746 - u64 nodeid = outarg.nodeid; 1747 1748 - inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid); 1749 - if (inode) { 1750 - err = fuse_retrieve(fc, inode, &outarg); 1751 - iput(inode); 1752 - } 1753 } 1754 up_read(&fc->killsb); 1755 ··· 1886 else if (oh.error == -ENOSYS) 1887 fc->no_interrupt = 1; 1888 else if (oh.error == -EAGAIN) 1889 - err = queue_interrupt(&fc->iq, req); 1890 1891 - fuse_put_request(fc, req); 1892 1893 goto copy_finish; 1894 } ··· 1918 list_del_init(&req->list); 1919 spin_unlock(&fpq->lock); 1920 1921 - fuse_request_end(fc, req); 1922 out: 1923 return err ? err : nbytes; 1924 ··· 2056 } 2057 2058 /* Abort all requests on the given list (pending or processing) */ 2059 - static void end_requests(struct fuse_conn *fc, struct list_head *head) 2060 { 2061 while (!list_empty(head)) { 2062 struct fuse_req *req; ··· 2064 req->out.h.error = -ECONNABORTED; 2065 clear_bit(FR_SENT, &req->flags); 2066 list_del_init(&req->list); 2067 - fuse_request_end(fc, req); 2068 } 2069 } 2070 ··· 2159 wake_up_all(&fc->blocked_waitq); 2160 spin_unlock(&fc->lock); 2161 2162 - end_requests(fc, &to_end); 2163 } else { 2164 spin_unlock(&fc->lock); 2165 } ··· 2189 list_splice_init(&fpq->processing[i], &to_end); 2190 spin_unlock(&fpq->lock); 2191 2192 - end_requests(fc, &to_end); 2193 2194 /* Are we the last open device? */ 2195 if (atomic_dec_and_test(&fc->dev_count)) {

··· 40 return READ_ONCE(file->private_data); 41 } 42 43 + static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req) 44 { 45 INIT_LIST_HEAD(&req->list); 46 INIT_LIST_HEAD(&req->intr_entry); 47 init_waitqueue_head(&req->waitq); 48 refcount_set(&req->count, 1); 49 __set_bit(FR_PENDING, &req->flags); 50 + req->fm = fm; 51 } 52 53 + static struct fuse_req *fuse_request_alloc(struct fuse_mount *fm, gfp_t flags) 54 { 55 struct fuse_req *req = kmem_cache_zalloc(fuse_req_cachep, flags); 56 if (req) 57 + fuse_request_init(fm, req); 58 59 return req; 60 } ··· 100 } 101 } 102 103 + static void fuse_put_request(struct fuse_req *req); 104 105 + static struct fuse_req *fuse_get_req(struct fuse_mount *fm, bool for_background) 106 { 107 + struct fuse_conn *fc = fm->fc; 108 struct fuse_req *req; 109 int err; 110 atomic_inc(&fc->num_waiting); ··· 125 if (fc->conn_error) 126 goto out; 127 128 + req = fuse_request_alloc(fm, GFP_KERNEL); 129 err = -ENOMEM; 130 if (!req) { 131 if (for_background) ··· 143 144 if (unlikely(req->in.h.uid == ((uid_t)-1) || 145 req->in.h.gid == ((gid_t)-1))) { 146 + fuse_put_request(req); 147 return ERR_PTR(-EOVERFLOW); 148 } 149 return req; ··· 153 return ERR_PTR(err); 154 } 155 156 + static void fuse_put_request(struct fuse_req *req) 157 { 158 + struct fuse_conn *fc = req->fm->fc; 159 + 160 if (refcount_dec_and_test(&req->count)) { 161 if (test_bit(FR_BACKGROUND, &req->flags)) { 162 /* ··· 273 * the 'end' callback is called if given, else the reference to the 274 * request is released 275 */ 276 + void fuse_request_end(struct fuse_req *req) 277 { 278 + struct fuse_mount *fm = req->fm; 279 + struct fuse_conn *fc = fm->fc; 280 struct fuse_iqueue *fiq = &fc->iq; 281 282 if (test_and_set_bit(FR_FINISHED, &req->flags)) ··· 309 wake_up(&fc->blocked_waitq); 310 } 311 312 + if (fc->num_background == fc->congestion_threshold && fm->sb) { 313 + clear_bdi_congested(fm->sb->s_bdi, BLK_RW_SYNC); 314 + clear_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC); 315 } 316 fc->num_background--; 317 fc->active_background--; ··· 323 } 324 325 if (test_bit(FR_ASYNC, &req->flags)) 326 + req->args->end(fm, req->args, req->out.h.error); 327 put_request: 328 + fuse_put_request(req); 329 } 330 EXPORT_SYMBOL_GPL(fuse_request_end); 331 332 + static int queue_interrupt(struct fuse_req *req) 333 { 334 + struct fuse_iqueue *fiq = &req->fm->fc->iq; 335 + 336 spin_lock(&fiq->lock); 337 /* Check for we've sent request to interrupt this req */ 338 if (unlikely(!test_bit(FR_INTERRUPTED, &req->flags))) { ··· 357 return 0; 358 } 359 360 + static void request_wait_answer(struct fuse_req *req) 361 { 362 + struct fuse_conn *fc = req->fm->fc; 363 struct fuse_iqueue *fiq = &fc->iq; 364 int err; 365 ··· 373 /* matches barrier in fuse_dev_do_read() */ 374 smp_mb__after_atomic(); 375 if (test_bit(FR_SENT, &req->flags)) 376 + queue_interrupt(req); 377 } 378 379 if (!test_bit(FR_FORCE, &req->flags)) { ··· 402 wait_event(req->waitq, test_bit(FR_FINISHED, &req->flags)); 403 } 404 405 + static void __fuse_request_send(struct fuse_req *req) 406 { 407 + struct fuse_iqueue *fiq = &req->fm->fc->iq; 408 409 BUG_ON(test_bit(FR_BACKGROUND, &req->flags)); 410 spin_lock(&fiq->lock); ··· 418 __fuse_get_request(req); 419 queue_request_and_unlock(fiq, req); 420 421 + request_wait_answer(req); 422 /* Pairs with smp_wmb() in fuse_request_end() */ 423 smp_rmb(); 424 } ··· 457 } 458 } 459 460 + static void fuse_force_creds(struct fuse_req *req) 461 { 462 + struct fuse_conn *fc = req->fm->fc; 463 + 464 req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid()); 465 req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid()); 466 req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns); ··· 473 __set_bit(FR_ASYNC, &req->flags); 474 } 475 476 + ssize_t fuse_simple_request(struct fuse_mount *fm, struct fuse_args *args) 477 { 478 + struct fuse_conn *fc = fm->fc; 479 struct fuse_req *req; 480 ssize_t ret; 481 482 if (args->force) { 483 atomic_inc(&fc->num_waiting); 484 + req = fuse_request_alloc(fm, GFP_KERNEL | __GFP_NOFAIL); 485 486 if (!args->nocreds) 487 + fuse_force_creds(req); 488 489 __set_bit(FR_WAITING, &req->flags); 490 __set_bit(FR_FORCE, &req->flags); 491 } else { 492 WARN_ON(args->nocreds); 493 + req = fuse_get_req(fm, false); 494 if (IS_ERR(req)) 495 return PTR_ERR(req); 496 } ··· 500 501 if (!args->noreply) 502 __set_bit(FR_ISREPLY, &req->flags); 503 + __fuse_request_send(req); 504 ret = req->out.h.error; 505 if (!ret && args->out_argvar) { 506 BUG_ON(args->out_numargs == 0); 507 ret = args->out_args[args->out_numargs - 1].size; 508 } 509 + fuse_put_request(req); 510 511 return ret; 512 } 513 514 + static bool fuse_request_queue_background(struct fuse_req *req) 515 { 516 + struct fuse_mount *fm = req->fm; 517 + struct fuse_conn *fc = fm->fc; 518 bool queued = false; 519 520 WARN_ON(!test_bit(FR_BACKGROUND, &req->flags)); ··· 527 fc->num_background++; 528 if (fc->num_background == fc->max_background) 529 fc->blocked = 1; 530 + if (fc->num_background == fc->congestion_threshold && fm->sb) { 531 + set_bdi_congested(fm->sb->s_bdi, BLK_RW_SYNC); 532 + set_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC); 533 } 534 list_add_tail(&req->list, &fc->bg_queue); 535 flush_bg_queue(fc); ··· 540 return queued; 541 } 542 543 + int fuse_simple_background(struct fuse_mount *fm, struct fuse_args *args, 544 gfp_t gfp_flags) 545 { 546 struct fuse_req *req; 547 548 if (args->force) { 549 WARN_ON(!args->nocreds); 550 + req = fuse_request_alloc(fm, gfp_flags); 551 if (!req) 552 return -ENOMEM; 553 __set_bit(FR_BACKGROUND, &req->flags); 554 } else { 555 WARN_ON(args->nocreds); 556 + req = fuse_get_req(fm, true); 557 if (IS_ERR(req)) 558 return PTR_ERR(req); 559 } 560 561 fuse_args_to_req(req, args); 562 563 + if (!fuse_request_queue_background(req)) { 564 + fuse_put_request(req); 565 return -ENOTCONN; 566 } 567 ··· 569 } 570 EXPORT_SYMBOL_GPL(fuse_simple_background); 571 572 + static int fuse_simple_notify_reply(struct fuse_mount *fm, 573 struct fuse_args *args, u64 unique) 574 { 575 struct fuse_req *req; 576 + struct fuse_iqueue *fiq = &fm->fc->iq; 577 int err = 0; 578 579 + req = fuse_get_req(fm, false); 580 if (IS_ERR(req)) 581 return PTR_ERR(req); 582 ··· 591 } else { 592 err = -ENODEV; 593 spin_unlock(&fiq->lock); 594 + fuse_put_request(req); 595 } 596 597 return err; ··· 785 struct page *newpage; 786 struct pipe_buffer *buf = cs->pipebufs; 787 788 + get_page(oldpage); 789 err = unlock_request(cs->req); 790 if (err) 791 + goto out_put_old; 792 793 fuse_copy_finish(cs); 794 795 err = pipe_buf_confirm(cs->pipe, buf); 796 if (err) 797 + goto out_put_old; 798 799 BUG_ON(!cs->nr_segs); 800 cs->currbuf = buf; ··· 833 err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL); 834 if (err) { 835 unlock_page(newpage); 836 + goto out_put_old; 837 } 838 839 get_page(newpage); ··· 852 if (err) { 853 unlock_page(newpage); 854 put_page(newpage); 855 + goto out_put_old; 856 } 857 858 unlock_page(oldpage); 859 + /* Drop ref for ap->pages[] array */ 860 put_page(oldpage); 861 cs->len = 0; 862 863 + err = 0; 864 + out_put_old: 865 + /* Drop ref obtained in this function */ 866 + put_page(oldpage); 867 + return err; 868 869 out_fallback_unlock: 870 unlock_page(newpage); ··· 868 cs->offset = buf->offset; 869 870 err = lock_request(cs->req); 871 + if (!err) 872 + err = 1; 873 874 + goto out_put_old; 875 } 876 877 static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page, ··· 883 if (cs->nr_segs >= cs->pipe->max_usage) 884 return -EIO; 885 886 + get_page(page); 887 err = unlock_request(cs->req); 888 + if (err) { 889 + put_page(page); 890 return err; 891 + } 892 893 fuse_copy_finish(cs); 894 895 buf = cs->pipebufs; 896 buf->page = page; 897 buf->offset = offset; 898 buf->len = count; ··· 1250 /* SETXATTR is special, since it may contain too large data */ 1251 if (args->opcode == FUSE_SETXATTR) 1252 req->out.h.error = -E2BIG; 1253 + fuse_request_end(req); 1254 goto restart; 1255 } 1256 spin_lock(&fpq->lock); ··· 1284 /* matches barrier in request_wait_answer() */ 1285 smp_mb__after_atomic(); 1286 if (test_bit(FR_INTERRUPTED, &req->flags)) 1287 + queue_interrupt(req); 1288 + fuse_put_request(req); 1289 1290 return reqsize; 1291 ··· 1293 if (!test_bit(FR_PRIVATE, &req->flags)) 1294 list_del_init(&req->list); 1295 spin_unlock(&fpq->lock); 1296 + fuse_request_end(req); 1297 return err; 1298 1299 err_unlock: ··· 1416 fuse_copy_finish(cs); 1417 1418 down_read(&fc->killsb); 1419 + err = fuse_reverse_inval_inode(fc, outarg.ino, 1420 + outarg.off, outarg.len); 1421 up_read(&fc->killsb); 1422 return err; 1423 ··· 1466 buf[outarg.namelen] = 0; 1467 1468 down_read(&fc->killsb); 1469 + err = fuse_reverse_inval_entry(fc, outarg.parent, 0, &name); 1470 up_read(&fc->killsb); 1471 kfree(buf); 1472 return err; ··· 1516 buf[outarg.namelen] = 0; 1517 1518 down_read(&fc->killsb); 1519 + err = fuse_reverse_inval_entry(fc, outarg.parent, outarg.child, &name); 1520 up_read(&fc->killsb); 1521 kfree(buf); 1522 return err; ··· 1561 down_read(&fc->killsb); 1562 1563 err = -ENOENT; 1564 + inode = fuse_ilookup(fc, nodeid, NULL); 1565 if (!inode) 1566 goto out_up_killsb; 1567 ··· 1621 struct fuse_notify_retrieve_in inarg; 1622 }; 1623 1624 + static void fuse_retrieve_end(struct fuse_mount *fm, struct fuse_args *args, 1625 int error) 1626 { 1627 struct fuse_retrieve_args *ra = ··· 1631 kfree(ra); 1632 } 1633 1634 + static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode, 1635 struct fuse_notify_retrieve_out *outarg) 1636 { 1637 int err; ··· 1642 unsigned int offset; 1643 size_t total_len = 0; 1644 unsigned int num_pages; 1645 + struct fuse_conn *fc = fm->fc; 1646 struct fuse_retrieve_args *ra; 1647 size_t args_size = sizeof(*ra); 1648 struct fuse_args_pages *ap; ··· 1703 args->in_args[0].value = &ra->inarg; 1704 args->in_args[1].size = total_len; 1705 1706 + err = fuse_simple_notify_reply(fm, args, outarg->notify_unique); 1707 if (err) 1708 + fuse_retrieve_end(fm, args, err); 1709 1710 return err; 1711 } ··· 1714 struct fuse_copy_state *cs) 1715 { 1716 struct fuse_notify_retrieve_out outarg; 1717 + struct fuse_mount *fm; 1718 struct inode *inode; 1719 + u64 nodeid; 1720 int err; 1721 1722 err = -EINVAL; ··· 1729 1730 down_read(&fc->killsb); 1731 err = -ENOENT; 1732 + nodeid = outarg.nodeid; 1733 1734 + inode = fuse_ilookup(fc, nodeid, &fm); 1735 + if (inode) { 1736 + err = fuse_retrieve(fm, inode, &outarg); 1737 + iput(inode); 1738 } 1739 up_read(&fc->killsb); 1740 ··· 1875 else if (oh.error == -ENOSYS) 1876 fc->no_interrupt = 1; 1877 else if (oh.error == -EAGAIN) 1878 + err = queue_interrupt(req); 1879 1880 + fuse_put_request(req); 1881 1882 goto copy_finish; 1883 } ··· 1907 list_del_init(&req->list); 1908 spin_unlock(&fpq->lock); 1909 1910 + fuse_request_end(req); 1911 out: 1912 return err ? err : nbytes; 1913 ··· 2045 } 2046 2047 /* Abort all requests on the given list (pending or processing) */ 2048 + static void end_requests(struct list_head *head) 2049 { 2050 while (!list_empty(head)) { 2051 struct fuse_req *req; ··· 2053 req->out.h.error = -ECONNABORTED; 2054 clear_bit(FR_SENT, &req->flags); 2055 list_del_init(&req->list); 2056 + fuse_request_end(req); 2057 } 2058 } 2059 ··· 2148 wake_up_all(&fc->blocked_waitq); 2149 spin_unlock(&fc->lock); 2150 2151 + end_requests(&to_end); 2152 } else { 2153 spin_unlock(&fc->lock); 2154 } ··· 2178 list_splice_init(&fpq->processing[i], &to_end); 2179 spin_unlock(&fpq->lock); 2180 2181 + end_requests(&to_end); 2182 2183 /* Are we the last open device? */ 2184 if (atomic_dec_and_test(&fc->dev_count)) {

+158 -62

fs/fuse/dir.c

··· 10 11 #include <linux/pagemap.h> 12 #include <linux/file.h> 13 #include <linux/sched.h> 14 #include <linux/namei.h> 15 #include <linux/slab.h> ··· 197 { 198 struct inode *inode; 199 struct dentry *parent; 200 - struct fuse_conn *fc; 201 struct fuse_inode *fi; 202 int ret; 203 ··· 219 if (flags & LOOKUP_RCU) 220 goto out; 221 222 - fc = get_fuse_conn(inode); 223 224 forget = fuse_alloc_forget(); 225 ret = -ENOMEM; 226 if (!forget) 227 goto out; 228 229 - attr_version = fuse_get_attr_version(fc); 230 231 parent = dget_parent(entry); 232 - fuse_lookup_init(fc, &args, get_node_id(d_inode(parent)), 233 &entry->d_name, &outarg); 234 - ret = fuse_simple_request(fc, &args); 235 dput(parent); 236 /* Zero nodeid is same as -ENOENT */ 237 if (!ret && !outarg.nodeid) 238 ret = -ENOENT; 239 if (!ret) { 240 fi = get_fuse_inode(inode); 241 - if (outarg.nodeid != get_node_id(inode)) { 242 - fuse_queue_forget(fc, forget, outarg.nodeid, 1); 243 goto invalid; 244 } 245 spin_lock(&fi->lock); ··· 301 return time_before64(fuse_dentry_time(dentry), get_jiffies_64()); 302 } 303 304 const struct dentry_operations fuse_dentry_operations = { 305 .d_revalidate = fuse_dentry_revalidate, 306 .d_delete = fuse_dentry_delete, ··· 381 .d_init = fuse_dentry_init, 382 .d_release = fuse_dentry_release, 383 #endif 384 }; 385 386 const struct dentry_operations fuse_root_dentry_operations = { ··· 406 int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name, 407 struct fuse_entry_out *outarg, struct inode **inode) 408 { 409 - struct fuse_conn *fc = get_fuse_conn_super(sb); 410 FUSE_ARGS(args); 411 struct fuse_forget_link *forget; 412 u64 attr_version; ··· 423 if (!forget) 424 goto out; 425 426 - attr_version = fuse_get_attr_version(fc); 427 428 - fuse_lookup_init(fc, &args, nodeid, name, outarg); 429 - err = fuse_simple_request(fc, &args); 430 /* Zero nodeid is same as -ENOENT, but with valid timeout */ 431 if (err || !outarg->nodeid) 432 goto out_put_forget; ··· 442 attr_version); 443 err = -ENOMEM; 444 if (!*inode) { 445 - fuse_queue_forget(fc, forget, outarg->nodeid, 1); 446 goto out; 447 } 448 err = 0; ··· 511 { 512 int err; 513 struct inode *inode; 514 - struct fuse_conn *fc = get_fuse_conn(dir); 515 FUSE_ARGS(args); 516 struct fuse_forget_link *forget; 517 struct fuse_create_in inarg; ··· 529 goto out_err; 530 531 err = -ENOMEM; 532 - ff = fuse_file_alloc(fc); 533 if (!ff) 534 goto out_put_forget_req; 535 536 - if (!fc->dont_mask) 537 mode &= ~current_umask(); 538 539 flags &= ~O_NOCTTY; ··· 554 args.out_args[0].value = &outentry; 555 args.out_args[1].size = sizeof(outopen); 556 args.out_args[1].value = &outopen; 557 - err = fuse_simple_request(fc, &args); 558 if (err) 559 goto out_free_ff; 560 ··· 571 if (!inode) { 572 flags &= ~(O_CREAT | O_EXCL | O_TRUNC); 573 fuse_sync_release(NULL, ff, flags); 574 - fuse_queue_forget(fc, forget, outentry.nodeid, 1); 575 err = -ENOMEM; 576 goto out_err; 577 } ··· 644 /* 645 * Code shared between mknod, mkdir, symlink and link 646 */ 647 - static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args, 648 struct inode *dir, struct dentry *entry, 649 umode_t mode) 650 { ··· 663 args->out_numargs = 1; 664 args->out_args[0].size = sizeof(outarg); 665 args->out_args[0].value = &outarg; 666 - err = fuse_simple_request(fc, args); 667 if (err) 668 goto out_put_forget_req; 669 ··· 677 inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, 678 &outarg.attr, entry_attr_timeout(&outarg), 0); 679 if (!inode) { 680 - fuse_queue_forget(fc, forget, outarg.nodeid, 1); 681 return -ENOMEM; 682 } 683 kfree(forget); ··· 705 dev_t rdev) 706 { 707 struct fuse_mknod_in inarg; 708 - struct fuse_conn *fc = get_fuse_conn(dir); 709 FUSE_ARGS(args); 710 711 - if (!fc->dont_mask) 712 mode &= ~current_umask(); 713 714 memset(&inarg, 0, sizeof(inarg)); ··· 721 args.in_args[0].value = &inarg; 722 args.in_args[1].size = entry->d_name.len + 1; 723 args.in_args[1].value = entry->d_name.name; 724 - return create_new_entry(fc, &args, dir, entry, mode); 725 } 726 727 static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode, ··· 733 static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode) 734 { 735 struct fuse_mkdir_in inarg; 736 - struct fuse_conn *fc = get_fuse_conn(dir); 737 FUSE_ARGS(args); 738 739 - if (!fc->dont_mask) 740 mode &= ~current_umask(); 741 742 memset(&inarg, 0, sizeof(inarg)); ··· 748 args.in_args[0].value = &inarg; 749 args.in_args[1].size = entry->d_name.len + 1; 750 args.in_args[1].value = entry->d_name.name; 751 - return create_new_entry(fc, &args, dir, entry, S_IFDIR); 752 } 753 754 static int fuse_symlink(struct inode *dir, struct dentry *entry, 755 const char *link) 756 { 757 - struct fuse_conn *fc = get_fuse_conn(dir); 758 unsigned len = strlen(link) + 1; 759 FUSE_ARGS(args); 760 ··· 764 args.in_args[0].value = entry->d_name.name; 765 args.in_args[1].size = len; 766 args.in_args[1].value = link; 767 - return create_new_entry(fc, &args, dir, entry, S_IFLNK); 768 } 769 770 void fuse_update_ctime(struct inode *inode) ··· 778 static int fuse_unlink(struct inode *dir, struct dentry *entry) 779 { 780 int err; 781 - struct fuse_conn *fc = get_fuse_conn(dir); 782 FUSE_ARGS(args); 783 784 args.opcode = FUSE_UNLINK; ··· 786 args.in_numargs = 1; 787 args.in_args[0].size = entry->d_name.len + 1; 788 args.in_args[0].value = entry->d_name.name; 789 - err = fuse_simple_request(fc, &args); 790 if (!err) { 791 struct inode *inode = d_inode(entry); 792 struct fuse_inode *fi = get_fuse_inode(inode); 793 794 spin_lock(&fi->lock); 795 - fi->attr_version = atomic64_inc_return(&fc->attr_version); 796 /* 797 * If i_nlink == 0 then unlink doesn't make sense, yet this can 798 * happen if userspace filesystem is careless. It would be ··· 814 static int fuse_rmdir(struct inode *dir, struct dentry *entry) 815 { 816 int err; 817 - struct fuse_conn *fc = get_fuse_conn(dir); 818 FUSE_ARGS(args); 819 820 args.opcode = FUSE_RMDIR; ··· 822 args.in_numargs = 1; 823 args.in_args[0].size = entry->d_name.len + 1; 824 args.in_args[0].value = entry->d_name.name; 825 - err = fuse_simple_request(fc, &args); 826 if (!err) { 827 clear_nlink(d_inode(entry)); 828 fuse_dir_changed(dir); ··· 838 { 839 int err; 840 struct fuse_rename2_in inarg; 841 - struct fuse_conn *fc = get_fuse_conn(olddir); 842 FUSE_ARGS(args); 843 844 memset(&inarg, 0, argsize); ··· 853 args.in_args[1].value = oldent->d_name.name; 854 args.in_args[2].size = newent->d_name.len + 1; 855 args.in_args[2].value = newent->d_name.name; 856 - err = fuse_simple_request(fc, &args); 857 if (!err) { 858 /* ctime changes */ 859 fuse_invalidate_attr(d_inode(oldent)); ··· 924 int err; 925 struct fuse_link_in inarg; 926 struct inode *inode = d_inode(entry); 927 - struct fuse_conn *fc = get_fuse_conn(inode); 928 FUSE_ARGS(args); 929 930 memset(&inarg, 0, sizeof(inarg)); ··· 935 args.in_args[0].value = &inarg; 936 args.in_args[1].size = newent->d_name.len + 1; 937 args.in_args[1].value = newent->d_name.name; 938 - err = create_new_entry(fc, &args, newdir, newent, inode->i_mode); 939 /* Contrary to "normal" filesystems it can happen that link 940 makes two "logical" inodes point to the same "physical" 941 inode. We invalidate the attributes of the old one, so it ··· 946 struct fuse_inode *fi = get_fuse_inode(inode); 947 948 spin_lock(&fi->lock); 949 - fi->attr_version = atomic64_inc_return(&fc->attr_version); 950 if (likely(inode->i_nlink < UINT_MAX)) 951 inc_nlink(inode); 952 spin_unlock(&fi->lock); ··· 1003 int err; 1004 struct fuse_getattr_in inarg; 1005 struct fuse_attr_out outarg; 1006 - struct fuse_conn *fc = get_fuse_conn(inode); 1007 FUSE_ARGS(args); 1008 u64 attr_version; 1009 1010 - attr_version = fuse_get_attr_version(fc); 1011 1012 memset(&inarg, 0, sizeof(inarg)); 1013 memset(&outarg, 0, sizeof(outarg)); ··· 1026 args.out_numargs = 1; 1027 args.out_args[0].size = sizeof(outarg); 1028 args.out_args[0].value = &outarg; 1029 - err = fuse_simple_request(fc, &args); 1030 if (!err) { 1031 if (fuse_invalid_attr(&outarg.attr) || 1032 (inode->i_mode ^ outarg.attr.mode) & S_IFMT) { ··· 1079 STATX_BASIC_STATS & ~STATX_ATIME, 0); 1080 } 1081 1082 - int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, 1083 u64 child_nodeid, struct qstr *name) 1084 { 1085 int err = -ENOTDIR; ··· 1087 struct dentry *dir; 1088 struct dentry *entry; 1089 1090 - parent = ilookup5(sb, parent_nodeid, fuse_inode_eq, &parent_nodeid); 1091 if (!parent) 1092 return -ENOENT; 1093 ··· 1179 1180 static int fuse_access(struct inode *inode, int mask) 1181 { 1182 - struct fuse_conn *fc = get_fuse_conn(inode); 1183 FUSE_ARGS(args); 1184 struct fuse_access_in inarg; 1185 int err; 1186 1187 BUG_ON(mask & MAY_NOT_BLOCK); 1188 1189 - if (fc->no_access) 1190 return 0; 1191 1192 memset(&inarg, 0, sizeof(inarg)); ··· 1196 args.in_numargs = 1; 1197 args.in_args[0].size = sizeof(inarg); 1198 args.in_args[0].value = &inarg; 1199 - err = fuse_simple_request(fc, &args); 1200 if (err == -ENOSYS) { 1201 - fc->no_access = 1; 1202 err = 0; 1203 } 1204 return err; ··· 1286 1287 static int fuse_readlink_page(struct inode *inode, struct page *page) 1288 { 1289 - struct fuse_conn *fc = get_fuse_conn(inode); 1290 struct fuse_page_desc desc = { .length = PAGE_SIZE - 1 }; 1291 struct fuse_args_pages ap = { 1292 .num_pages = 1, ··· 1303 ap.args.page_zeroing = true; 1304 ap.args.out_numargs = 1; 1305 ap.args.out_args[0].size = desc.length; 1306 - res = fuse_simple_request(fc, &ap.args); 1307 1308 fuse_invalidate_atime(inode); 1309 ··· 1531 */ 1532 int fuse_flush_times(struct inode *inode, struct fuse_file *ff) 1533 { 1534 - struct fuse_conn *fc = get_fuse_conn(inode); 1535 FUSE_ARGS(args); 1536 struct fuse_setattr_in inarg; 1537 struct fuse_attr_out outarg; ··· 1542 inarg.valid = FATTR_MTIME; 1543 inarg.mtime = inode->i_mtime.tv_sec; 1544 inarg.mtimensec = inode->i_mtime.tv_nsec; 1545 - if (fc->minor >= 23) { 1546 inarg.valid |= FATTR_CTIME; 1547 inarg.ctime = inode->i_ctime.tv_sec; 1548 inarg.ctimensec = inode->i_ctime.tv_nsec; ··· 1551 inarg.valid |= FATTR_FH; 1552 inarg.fh = ff->fh; 1553 } 1554 - fuse_setattr_fill(fc, &args, inode, &inarg, &outarg); 1555 1556 - return fuse_simple_request(fc, &args); 1557 } 1558 1559 /* ··· 1568 struct file *file) 1569 { 1570 struct inode *inode = d_inode(dentry); 1571 - struct fuse_conn *fc = get_fuse_conn(inode); 1572 struct fuse_inode *fi = get_fuse_inode(inode); 1573 FUSE_ARGS(args); 1574 struct fuse_setattr_in inarg; ··· 1579 loff_t oldsize; 1580 int err; 1581 bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode); 1582 1583 if (!fc->default_permissions) 1584 attr->ia_valid |= ATTR_FORCE; ··· 1587 err = setattr_prepare(dentry, attr); 1588 if (err) 1589 return err; 1590 1591 if (attr->ia_valid & ATTR_OPEN) { 1592 /* This is coming from open(..., ... | O_TRUNC); */ ··· 1616 */ 1617 i_size_write(inode, 0); 1618 truncate_pagecache(inode, 0); 1619 - return 0; 1620 } 1621 file = NULL; 1622 - } 1623 - 1624 - if (attr->ia_valid & ATTR_SIZE) { 1625 - if (WARN_ON(!S_ISREG(inode->i_mode))) 1626 - return -EIO; 1627 - is_truncate = true; 1628 } 1629 1630 /* Flush dirty data/metadata before non-truncate SETATTR */ ··· 1655 inarg.lock_owner = fuse_lock_owner_id(fc, current->files); 1656 } 1657 fuse_setattr_fill(fc, &args, inode, &inarg, &outarg); 1658 - err = fuse_simple_request(fc, &args); 1659 if (err) { 1660 if (err == -EINTR) 1661 fuse_invalidate_attr(inode); ··· 1703 } 1704 1705 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); 1706 return 0; 1707 1708 error: ··· 1714 fuse_release_nowrite(inode); 1715 1716 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); 1717 return err; 1718 } 1719

··· 10 11 #include <linux/pagemap.h> 12 #include <linux/file.h> 13 + #include <linux/fs_context.h> 14 #include <linux/sched.h> 15 #include <linux/namei.h> 16 #include <linux/slab.h> ··· 196 { 197 struct inode *inode; 198 struct dentry *parent; 199 + struct fuse_mount *fm; 200 struct fuse_inode *fi; 201 int ret; 202 ··· 218 if (flags & LOOKUP_RCU) 219 goto out; 220 221 + fm = get_fuse_mount(inode); 222 223 forget = fuse_alloc_forget(); 224 ret = -ENOMEM; 225 if (!forget) 226 goto out; 227 228 + attr_version = fuse_get_attr_version(fm->fc); 229 230 parent = dget_parent(entry); 231 + fuse_lookup_init(fm->fc, &args, get_node_id(d_inode(parent)), 232 &entry->d_name, &outarg); 233 + ret = fuse_simple_request(fm, &args); 234 dput(parent); 235 /* Zero nodeid is same as -ENOENT */ 236 if (!ret && !outarg.nodeid) 237 ret = -ENOENT; 238 if (!ret) { 239 fi = get_fuse_inode(inode); 240 + if (outarg.nodeid != get_node_id(inode) || 241 + (bool) IS_AUTOMOUNT(inode) != (bool) (outarg.attr.flags & FUSE_ATTR_SUBMOUNT)) { 242 + fuse_queue_forget(fm->fc, forget, 243 + outarg.nodeid, 1); 244 goto invalid; 245 } 246 spin_lock(&fi->lock); ··· 298 return time_before64(fuse_dentry_time(dentry), get_jiffies_64()); 299 } 300 301 + /* 302 + * Create a fuse_mount object with a new superblock (with path->dentry 303 + * as the root), and return that mount so it can be auto-mounted on 304 + * @path. 305 + */ 306 + static struct vfsmount *fuse_dentry_automount(struct path *path) 307 + { 308 + struct fs_context *fsc; 309 + struct fuse_mount *parent_fm = get_fuse_mount_super(path->mnt->mnt_sb); 310 + struct fuse_conn *fc = parent_fm->fc; 311 + struct fuse_mount *fm; 312 + struct vfsmount *mnt; 313 + struct fuse_inode *mp_fi = get_fuse_inode(d_inode(path->dentry)); 314 + struct super_block *sb; 315 + int err; 316 + 317 + fsc = fs_context_for_submount(path->mnt->mnt_sb->s_type, path->dentry); 318 + if (IS_ERR(fsc)) { 319 + err = PTR_ERR(fsc); 320 + goto out; 321 + } 322 + 323 + err = -ENOMEM; 324 + fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL); 325 + if (!fm) 326 + goto out_put_fsc; 327 + 328 + refcount_set(&fm->count, 1); 329 + fsc->s_fs_info = fm; 330 + sb = sget_fc(fsc, NULL, set_anon_super_fc); 331 + if (IS_ERR(sb)) { 332 + err = PTR_ERR(sb); 333 + fuse_mount_put(fm); 334 + goto out_put_fsc; 335 + } 336 + fm->fc = fuse_conn_get(fc); 337 + 338 + /* Initialize superblock, making @mp_fi its root */ 339 + err = fuse_fill_super_submount(sb, mp_fi); 340 + if (err) 341 + goto out_put_sb; 342 + 343 + sb->s_flags |= SB_ACTIVE; 344 + fsc->root = dget(sb->s_root); 345 + /* We are done configuring the superblock, so unlock it */ 346 + up_write(&sb->s_umount); 347 + 348 + down_write(&fc->killsb); 349 + list_add_tail(&fm->fc_entry, &fc->mounts); 350 + up_write(&fc->killsb); 351 + 352 + /* Create the submount */ 353 + mnt = vfs_create_mount(fsc); 354 + if (IS_ERR(mnt)) { 355 + err = PTR_ERR(mnt); 356 + goto out_put_fsc; 357 + } 358 + mntget(mnt); 359 + put_fs_context(fsc); 360 + return mnt; 361 + 362 + out_put_sb: 363 + /* 364 + * Only jump here when fsc->root is NULL and sb is still locked 365 + * (otherwise put_fs_context() will put the superblock) 366 + */ 367 + deactivate_locked_super(sb); 368 + out_put_fsc: 369 + put_fs_context(fsc); 370 + out: 371 + return ERR_PTR(err); 372 + } 373 + 374 const struct dentry_operations fuse_dentry_operations = { 375 .d_revalidate = fuse_dentry_revalidate, 376 .d_delete = fuse_dentry_delete, ··· 305 .d_init = fuse_dentry_init, 306 .d_release = fuse_dentry_release, 307 #endif 308 + .d_automount = fuse_dentry_automount, 309 }; 310 311 const struct dentry_operations fuse_root_dentry_operations = { ··· 329 int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name, 330 struct fuse_entry_out *outarg, struct inode **inode) 331 { 332 + struct fuse_mount *fm = get_fuse_mount_super(sb); 333 FUSE_ARGS(args); 334 struct fuse_forget_link *forget; 335 u64 attr_version; ··· 346 if (!forget) 347 goto out; 348 349 + attr_version = fuse_get_attr_version(fm->fc); 350 351 + fuse_lookup_init(fm->fc, &args, nodeid, name, outarg); 352 + err = fuse_simple_request(fm, &args); 353 /* Zero nodeid is same as -ENOENT, but with valid timeout */ 354 if (err || !outarg->nodeid) 355 goto out_put_forget; ··· 365 attr_version); 366 err = -ENOMEM; 367 if (!*inode) { 368 + fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1); 369 goto out; 370 } 371 err = 0; ··· 434 { 435 int err; 436 struct inode *inode; 437 + struct fuse_mount *fm = get_fuse_mount(dir); 438 FUSE_ARGS(args); 439 struct fuse_forget_link *forget; 440 struct fuse_create_in inarg; ··· 452 goto out_err; 453 454 err = -ENOMEM; 455 + ff = fuse_file_alloc(fm); 456 if (!ff) 457 goto out_put_forget_req; 458 459 + if (!fm->fc->dont_mask) 460 mode &= ~current_umask(); 461 462 flags &= ~O_NOCTTY; ··· 477 args.out_args[0].value = &outentry; 478 args.out_args[1].size = sizeof(outopen); 479 args.out_args[1].value = &outopen; 480 + err = fuse_simple_request(fm, &args); 481 if (err) 482 goto out_free_ff; 483 ··· 494 if (!inode) { 495 flags &= ~(O_CREAT | O_EXCL | O_TRUNC); 496 fuse_sync_release(NULL, ff, flags); 497 + fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1); 498 err = -ENOMEM; 499 goto out_err; 500 } ··· 567 /* 568 * Code shared between mknod, mkdir, symlink and link 569 */ 570 + static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args, 571 struct inode *dir, struct dentry *entry, 572 umode_t mode) 573 { ··· 586 args->out_numargs = 1; 587 args->out_args[0].size = sizeof(outarg); 588 args->out_args[0].value = &outarg; 589 + err = fuse_simple_request(fm, args); 590 if (err) 591 goto out_put_forget_req; 592 ··· 600 inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, 601 &outarg.attr, entry_attr_timeout(&outarg), 0); 602 if (!inode) { 603 + fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1); 604 return -ENOMEM; 605 } 606 kfree(forget); ··· 628 dev_t rdev) 629 { 630 struct fuse_mknod_in inarg; 631 + struct fuse_mount *fm = get_fuse_mount(dir); 632 FUSE_ARGS(args); 633 634 + if (!fm->fc->dont_mask) 635 mode &= ~current_umask(); 636 637 memset(&inarg, 0, sizeof(inarg)); ··· 644 args.in_args[0].value = &inarg; 645 args.in_args[1].size = entry->d_name.len + 1; 646 args.in_args[1].value = entry->d_name.name; 647 + return create_new_entry(fm, &args, dir, entry, mode); 648 } 649 650 static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode, ··· 656 static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode) 657 { 658 struct fuse_mkdir_in inarg; 659 + struct fuse_mount *fm = get_fuse_mount(dir); 660 FUSE_ARGS(args); 661 662 + if (!fm->fc->dont_mask) 663 mode &= ~current_umask(); 664 665 memset(&inarg, 0, sizeof(inarg)); ··· 671 args.in_args[0].value = &inarg; 672 args.in_args[1].size = entry->d_name.len + 1; 673 args.in_args[1].value = entry->d_name.name; 674 + return create_new_entry(fm, &args, dir, entry, S_IFDIR); 675 } 676 677 static int fuse_symlink(struct inode *dir, struct dentry *entry, 678 const char *link) 679 { 680 + struct fuse_mount *fm = get_fuse_mount(dir); 681 unsigned len = strlen(link) + 1; 682 FUSE_ARGS(args); 683 ··· 687 args.in_args[0].value = entry->d_name.name; 688 args.in_args[1].size = len; 689 args.in_args[1].value = link; 690 + return create_new_entry(fm, &args, dir, entry, S_IFLNK); 691 } 692 693 void fuse_update_ctime(struct inode *inode) ··· 701 static int fuse_unlink(struct inode *dir, struct dentry *entry) 702 { 703 int err; 704 + struct fuse_mount *fm = get_fuse_mount(dir); 705 FUSE_ARGS(args); 706 707 args.opcode = FUSE_UNLINK; ··· 709 args.in_numargs = 1; 710 args.in_args[0].size = entry->d_name.len + 1; 711 args.in_args[0].value = entry->d_name.name; 712 + err = fuse_simple_request(fm, &args); 713 if (!err) { 714 struct inode *inode = d_inode(entry); 715 struct fuse_inode *fi = get_fuse_inode(inode); 716 717 spin_lock(&fi->lock); 718 + fi->attr_version = atomic64_inc_return(&fm->fc->attr_version); 719 /* 720 * If i_nlink == 0 then unlink doesn't make sense, yet this can 721 * happen if userspace filesystem is careless. It would be ··· 737 static int fuse_rmdir(struct inode *dir, struct dentry *entry) 738 { 739 int err; 740 + struct fuse_mount *fm = get_fuse_mount(dir); 741 FUSE_ARGS(args); 742 743 args.opcode = FUSE_RMDIR; ··· 745 args.in_numargs = 1; 746 args.in_args[0].size = entry->d_name.len + 1; 747 args.in_args[0].value = entry->d_name.name; 748 + err = fuse_simple_request(fm, &args); 749 if (!err) { 750 clear_nlink(d_inode(entry)); 751 fuse_dir_changed(dir); ··· 761 { 762 int err; 763 struct fuse_rename2_in inarg; 764 + struct fuse_mount *fm = get_fuse_mount(olddir); 765 FUSE_ARGS(args); 766 767 memset(&inarg, 0, argsize); ··· 776 args.in_args[1].value = oldent->d_name.name; 777 args.in_args[2].size = newent->d_name.len + 1; 778 args.in_args[2].value = newent->d_name.name; 779 + err = fuse_simple_request(fm, &args); 780 if (!err) { 781 /* ctime changes */ 782 fuse_invalidate_attr(d_inode(oldent)); ··· 847 int err; 848 struct fuse_link_in inarg; 849 struct inode *inode = d_inode(entry); 850 + struct fuse_mount *fm = get_fuse_mount(inode); 851 FUSE_ARGS(args); 852 853 memset(&inarg, 0, sizeof(inarg)); ··· 858 args.in_args[0].value = &inarg; 859 args.in_args[1].size = newent->d_name.len + 1; 860 args.in_args[1].value = newent->d_name.name; 861 + err = create_new_entry(fm, &args, newdir, newent, inode->i_mode); 862 /* Contrary to "normal" filesystems it can happen that link 863 makes two "logical" inodes point to the same "physical" 864 inode. We invalidate the attributes of the old one, so it ··· 869 struct fuse_inode *fi = get_fuse_inode(inode); 870 871 spin_lock(&fi->lock); 872 + fi->attr_version = atomic64_inc_return(&fm->fc->attr_version); 873 if (likely(inode->i_nlink < UINT_MAX)) 874 inc_nlink(inode); 875 spin_unlock(&fi->lock); ··· 926 int err; 927 struct fuse_getattr_in inarg; 928 struct fuse_attr_out outarg; 929 + struct fuse_mount *fm = get_fuse_mount(inode); 930 FUSE_ARGS(args); 931 u64 attr_version; 932 933 + attr_version = fuse_get_attr_version(fm->fc); 934 935 memset(&inarg, 0, sizeof(inarg)); 936 memset(&outarg, 0, sizeof(outarg)); ··· 949 args.out_numargs = 1; 950 args.out_args[0].size = sizeof(outarg); 951 args.out_args[0].value = &outarg; 952 + err = fuse_simple_request(fm, &args); 953 if (!err) { 954 if (fuse_invalid_attr(&outarg.attr) || 955 (inode->i_mode ^ outarg.attr.mode) & S_IFMT) { ··· 1002 STATX_BASIC_STATS & ~STATX_ATIME, 0); 1003 } 1004 1005 + int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid, 1006 u64 child_nodeid, struct qstr *name) 1007 { 1008 int err = -ENOTDIR; ··· 1010 struct dentry *dir; 1011 struct dentry *entry; 1012 1013 + parent = fuse_ilookup(fc, parent_nodeid, NULL); 1014 if (!parent) 1015 return -ENOENT; 1016 ··· 1102 1103 static int fuse_access(struct inode *inode, int mask) 1104 { 1105 + struct fuse_mount *fm = get_fuse_mount(inode); 1106 FUSE_ARGS(args); 1107 struct fuse_access_in inarg; 1108 int err; 1109 1110 BUG_ON(mask & MAY_NOT_BLOCK); 1111 1112 + if (fm->fc->no_access) 1113 return 0; 1114 1115 memset(&inarg, 0, sizeof(inarg)); ··· 1119 args.in_numargs = 1; 1120 args.in_args[0].size = sizeof(inarg); 1121 args.in_args[0].value = &inarg; 1122 + err = fuse_simple_request(fm, &args); 1123 if (err == -ENOSYS) { 1124 + fm->fc->no_access = 1; 1125 err = 0; 1126 } 1127 return err; ··· 1209 1210 static int fuse_readlink_page(struct inode *inode, struct page *page) 1211 { 1212 + struct fuse_mount *fm = get_fuse_mount(inode); 1213 struct fuse_page_desc desc = { .length = PAGE_SIZE - 1 }; 1214 struct fuse_args_pages ap = { 1215 .num_pages = 1, ··· 1226 ap.args.page_zeroing = true; 1227 ap.args.out_numargs = 1; 1228 ap.args.out_args[0].size = desc.length; 1229 + res = fuse_simple_request(fm, &ap.args); 1230 1231 fuse_invalidate_atime(inode); 1232 ··· 1454 */ 1455 int fuse_flush_times(struct inode *inode, struct fuse_file *ff) 1456 { 1457 + struct fuse_mount *fm = get_fuse_mount(inode); 1458 FUSE_ARGS(args); 1459 struct fuse_setattr_in inarg; 1460 struct fuse_attr_out outarg; ··· 1465 inarg.valid = FATTR_MTIME; 1466 inarg.mtime = inode->i_mtime.tv_sec; 1467 inarg.mtimensec = inode->i_mtime.tv_nsec; 1468 + if (fm->fc->minor >= 23) { 1469 inarg.valid |= FATTR_CTIME; 1470 inarg.ctime = inode->i_ctime.tv_sec; 1471 inarg.ctimensec = inode->i_ctime.tv_nsec; ··· 1474 inarg.valid |= FATTR_FH; 1475 inarg.fh = ff->fh; 1476 } 1477 + fuse_setattr_fill(fm->fc, &args, inode, &inarg, &outarg); 1478 1479 + return fuse_simple_request(fm, &args); 1480 } 1481 1482 /* ··· 1491 struct file *file) 1492 { 1493 struct inode *inode = d_inode(dentry); 1494 + struct fuse_mount *fm = get_fuse_mount(inode); 1495 + struct fuse_conn *fc = fm->fc; 1496 struct fuse_inode *fi = get_fuse_inode(inode); 1497 FUSE_ARGS(args); 1498 struct fuse_setattr_in inarg; ··· 1501 loff_t oldsize; 1502 int err; 1503 bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode); 1504 + bool fault_blocked = false; 1505 1506 if (!fc->default_permissions) 1507 attr->ia_valid |= ATTR_FORCE; ··· 1508 err = setattr_prepare(dentry, attr); 1509 if (err) 1510 return err; 1511 + 1512 + if (attr->ia_valid & ATTR_SIZE) { 1513 + if (WARN_ON(!S_ISREG(inode->i_mode))) 1514 + return -EIO; 1515 + is_truncate = true; 1516 + } 1517 + 1518 + if (FUSE_IS_DAX(inode) && is_truncate) { 1519 + down_write(&fi->i_mmap_sem); 1520 + fault_blocked = true; 1521 + err = fuse_dax_break_layouts(inode, 0, 0); 1522 + if (err) { 1523 + up_write(&fi->i_mmap_sem); 1524 + return err; 1525 + } 1526 + } 1527 1528 if (attr->ia_valid & ATTR_OPEN) { 1529 /* This is coming from open(..., ... | O_TRUNC); */ ··· 1521 */ 1522 i_size_write(inode, 0); 1523 truncate_pagecache(inode, 0); 1524 + goto out; 1525 } 1526 file = NULL; 1527 } 1528 1529 /* Flush dirty data/metadata before non-truncate SETATTR */ ··· 1566 inarg.lock_owner = fuse_lock_owner_id(fc, current->files); 1567 } 1568 fuse_setattr_fill(fc, &args, inode, &inarg, &outarg); 1569 + err = fuse_simple_request(fm, &args); 1570 if (err) { 1571 if (err == -EINTR) 1572 fuse_invalidate_attr(inode); ··· 1614 } 1615 1616 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); 1617 + out: 1618 + if (fault_blocked) 1619 + up_write(&fi->i_mmap_sem); 1620 + 1621 return 0; 1622 1623 error: ··· 1621 fuse_release_nowrite(inode); 1622 1623 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); 1624 + 1625 + if (fault_blocked) 1626 + up_write(&fi->i_mmap_sem); 1627 return err; 1628 } 1629

+149 -106

fs/fuse/file.c

··· 32 return pages; 33 } 34 35 - static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file, 36 int opcode, struct fuse_open_out *outargp) 37 { 38 struct fuse_open_in inarg; ··· 40 41 memset(&inarg, 0, sizeof(inarg)); 42 inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY); 43 - if (!fc->atomic_o_trunc) 44 inarg.flags &= ~O_TRUNC; 45 args.opcode = opcode; 46 args.nodeid = nodeid; ··· 51 args.out_args[0].size = sizeof(*outargp); 52 args.out_args[0].value = outargp; 53 54 - return fuse_simple_request(fc, &args); 55 } 56 57 struct fuse_release_args { ··· 60 struct inode *inode; 61 }; 62 63 - struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) 64 { 65 struct fuse_file *ff; 66 ··· 68 if (unlikely(!ff)) 69 return NULL; 70 71 - ff->fc = fc; 72 ff->release_args = kzalloc(sizeof(*ff->release_args), 73 GFP_KERNEL_ACCOUNT); 74 if (!ff->release_args) { ··· 82 RB_CLEAR_NODE(&ff->polled_node); 83 init_waitqueue_head(&ff->poll_wait); 84 85 - ff->kh = atomic64_inc_return(&fc->khctr); 86 87 return ff; 88 } ··· 100 return ff; 101 } 102 103 - static void fuse_release_end(struct fuse_conn *fc, struct fuse_args *args, 104 int error) 105 { 106 struct fuse_release_args *ra = container_of(args, typeof(*ra), args); ··· 114 if (refcount_dec_and_test(&ff->count)) { 115 struct fuse_args *args = &ff->release_args->args; 116 117 - if (isdir ? ff->fc->no_opendir : ff->fc->no_open) { 118 /* Do nothing when client does not implement 'open' */ 119 - fuse_release_end(ff->fc, args, 0); 120 } else if (sync) { 121 - fuse_simple_request(ff->fc, args); 122 - fuse_release_end(ff->fc, args, 0); 123 } else { 124 args->end = fuse_release_end; 125 - if (fuse_simple_background(ff->fc, args, 126 GFP_KERNEL | __GFP_NOFAIL)) 127 - fuse_release_end(ff->fc, args, -ENOTCONN); 128 } 129 kfree(ff); 130 } 131 } 132 133 - int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, 134 bool isdir) 135 { 136 struct fuse_file *ff; 137 int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN; 138 139 - ff = fuse_file_alloc(fc); 140 if (!ff) 141 return -ENOMEM; 142 ··· 148 struct fuse_open_out outarg; 149 int err; 150 151 - err = fuse_send_open(fc, nodeid, file, opcode, &outarg); 152 if (!err) { 153 ff->fh = outarg.fh; 154 ff->open_flags = outarg.open_flags; ··· 217 218 int fuse_open_common(struct inode *inode, struct file *file, bool isdir) 219 { 220 - struct fuse_conn *fc = get_fuse_conn(inode); 221 int err; 222 bool is_wb_truncate = (file->f_flags & O_TRUNC) && 223 fc->atomic_o_trunc && 224 fc->writeback_cache; 225 226 err = generic_file_open(inode, file); 227 if (err) 228 return err; 229 230 - if (is_wb_truncate) { 231 inode_lock(inode); 232 fuse_set_nowrite(inode); 233 } 234 235 - err = fuse_do_open(fc, get_node_id(inode), file, isdir); 236 237 if (!err) 238 fuse_finish_open(inode, file); 239 240 - if (is_wb_truncate) { 241 fuse_release_nowrite(inode); 242 inode_unlock(inode); 243 } ··· 261 static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff, 262 int flags, int opcode) 263 { 264 - struct fuse_conn *fc = ff->fc; 265 struct fuse_release_args *ra = ff->release_args; 266 267 /* Inode is NULL on error path of fuse_create_open() */ ··· 299 300 if (ff->flock) { 301 ra->inarg.release_flags |= FUSE_RELEASE_FLOCK_UNLOCK; 302 - ra->inarg.lock_owner = fuse_lock_owner_id(ff->fc, 303 (fl_owner_t) file); 304 } 305 /* Hold inode until release is finished */ ··· 314 * synchronous RELEASE is allowed (and desirable) in this case 315 * because the server can be trusted not to screw up. 316 */ 317 - fuse_file_put(ff, ff->fc->destroy, isdir); 318 } 319 320 static int fuse_open(struct inode *inode, struct file *file) ··· 457 static int fuse_flush(struct file *file, fl_owner_t id) 458 { 459 struct inode *inode = file_inode(file); 460 - struct fuse_conn *fc = get_fuse_conn(inode); 461 struct fuse_file *ff = file->private_data; 462 struct fuse_flush_in inarg; 463 FUSE_ARGS(args); ··· 479 return err; 480 481 err = 0; 482 - if (fc->no_flush) 483 goto inval_attr_out; 484 485 memset(&inarg, 0, sizeof(inarg)); 486 inarg.fh = ff->fh; 487 - inarg.lock_owner = fuse_lock_owner_id(fc, id); 488 args.opcode = FUSE_FLUSH; 489 args.nodeid = get_node_id(inode); 490 args.in_numargs = 1; ··· 492 args.in_args[0].value = &inarg; 493 args.force = true; 494 495 - err = fuse_simple_request(fc, &args); 496 if (err == -ENOSYS) { 497 - fc->no_flush = 1; 498 err = 0; 499 } 500 ··· 503 * In memory i_blocks is not maintained by fuse, if writeback cache is 504 * enabled, i_blocks from cached attr may not be accurate. 505 */ 506 - if (!err && fc->writeback_cache) 507 fuse_invalidate_attr(inode); 508 return err; 509 } ··· 512 int datasync, int opcode) 513 { 514 struct inode *inode = file->f_mapping->host; 515 - struct fuse_conn *fc = get_fuse_conn(inode); 516 struct fuse_file *ff = file->private_data; 517 FUSE_ARGS(args); 518 struct fuse_fsync_in inarg; ··· 525 args.in_numargs = 1; 526 args.in_args[0].size = sizeof(inarg); 527 args.in_args[0].value = &inarg; 528 - return fuse_simple_request(fc, &args); 529 } 530 531 static int fuse_fsync(struct file *file, loff_t start, loff_t end, ··· 700 kfree(ia); 701 } 702 703 - static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_args *args, 704 int err) 705 { 706 struct fuse_io_args *ia = container_of(args, typeof(*ia), ap.args); ··· 729 fuse_io_free(ia); 730 } 731 732 - static ssize_t fuse_async_req_send(struct fuse_conn *fc, 733 struct fuse_io_args *ia, size_t num_bytes) 734 { 735 ssize_t err; ··· 743 744 ia->ap.args.end = fuse_aio_complete_req; 745 ia->ap.args.may_block = io->should_dirty; 746 - err = fuse_simple_background(fc, &ia->ap.args, GFP_KERNEL); 747 if (err) 748 - fuse_aio_complete_req(fc, &ia->ap.args, err); 749 750 return num_bytes; 751 } ··· 755 { 756 struct file *file = ia->io->iocb->ki_filp; 757 struct fuse_file *ff = file->private_data; 758 - struct fuse_conn *fc = ff->fc; 759 760 fuse_read_args_fill(ia, file, pos, count, FUSE_READ); 761 if (owner != NULL) { 762 ia->read.in.read_flags |= FUSE_READ_LOCKOWNER; 763 - ia->read.in.lock_owner = fuse_lock_owner_id(fc, owner); 764 } 765 766 if (ia->io->async) 767 - return fuse_async_req_send(fc, ia, count); 768 769 - return fuse_simple_request(fc, &ia->ap.args); 770 } 771 772 static void fuse_read_update_size(struct inode *inode, loff_t size, ··· 812 static int fuse_do_readpage(struct file *file, struct page *page) 813 { 814 struct inode *inode = page->mapping->host; 815 - struct fuse_conn *fc = get_fuse_conn(inode); 816 loff_t pos = page_offset(page); 817 struct fuse_page_desc desc = { .length = PAGE_SIZE }; 818 struct fuse_io_args ia = { ··· 832 */ 833 fuse_wait_on_page_writeback(inode, page->index); 834 835 - attr_ver = fuse_get_attr_version(fc); 836 837 /* Don't overflow end offset */ 838 if (pos + (desc.length - 1) == LLONG_MAX) 839 desc.length--; 840 841 fuse_read_args_fill(&ia, file, pos, desc.length, FUSE_READ); 842 - res = fuse_simple_request(fc, &ia.ap.args); 843 if (res < 0) 844 return res; 845 /* ··· 869 return err; 870 } 871 872 - static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_args *args, 873 int err) 874 { 875 int i; ··· 913 static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file) 914 { 915 struct fuse_file *ff = file->private_data; 916 - struct fuse_conn *fc = ff->fc; 917 struct fuse_args_pages *ap = &ia->ap; 918 loff_t pos = page_offset(ap->pages[0]); 919 size_t count = ap->num_pages << PAGE_SHIFT; ··· 932 WARN_ON((loff_t) (pos + count) < 0); 933 934 fuse_read_args_fill(ia, file, pos, count, FUSE_READ); 935 - ia->read.attr_ver = fuse_get_attr_version(fc); 936 - if (fc->async_read) { 937 ia->ff = fuse_file_get(ff); 938 ap->args.end = fuse_readpages_end; 939 - err = fuse_simple_background(fc, &ap->args, GFP_KERNEL); 940 if (!err) 941 return; 942 } else { 943 - res = fuse_simple_request(fc, &ap->args); 944 err = res < 0 ? res : 0; 945 } 946 - fuse_readpages_end(fc, &ap->args, err); 947 } 948 949 static void fuse_readahead(struct readahead_control *rac) ··· 1014 args->opcode = FUSE_WRITE; 1015 args->nodeid = ff->nodeid; 1016 args->in_numargs = 2; 1017 - if (ff->fc->minor < 9) 1018 args->in_args[0].size = FUSE_COMPAT_WRITE_IN_SIZE; 1019 else 1020 args->in_args[0].size = sizeof(ia->write.in); ··· 1043 struct kiocb *iocb = ia->io->iocb; 1044 struct file *file = iocb->ki_filp; 1045 struct fuse_file *ff = file->private_data; 1046 - struct fuse_conn *fc = ff->fc; 1047 struct fuse_write_in *inarg = &ia->write.in; 1048 ssize_t err; 1049 ··· 1051 inarg->flags = fuse_write_flags(iocb); 1052 if (owner != NULL) { 1053 inarg->write_flags |= FUSE_WRITE_LOCKOWNER; 1054 - inarg->lock_owner = fuse_lock_owner_id(fc, owner); 1055 } 1056 1057 if (ia->io->async) 1058 - return fuse_async_req_send(fc, ia, count); 1059 1060 - err = fuse_simple_request(fc, &ia->ap.args); 1061 if (!err && ia->write.out.size > count) 1062 err = -EIO; 1063 ··· 1088 struct fuse_args_pages *ap = &ia->ap; 1089 struct file *file = iocb->ki_filp; 1090 struct fuse_file *ff = file->private_data; 1091 - struct fuse_conn *fc = ff->fc; 1092 unsigned int offset, i; 1093 int err; 1094 ··· 1098 fuse_write_args_fill(ia, ff, pos, count); 1099 ia->write.in.flags = fuse_write_flags(iocb); 1100 1101 - err = fuse_simple_request(fc, &ap->args); 1102 if (!err && ia->write.out.size > count) 1103 err = -EIO; 1104 ··· 1413 struct file *file = io->iocb->ki_filp; 1414 struct inode *inode = file->f_mapping->host; 1415 struct fuse_file *ff = file->private_data; 1416 - struct fuse_conn *fc = ff->fc; 1417 size_t nmax = write ? fc->max_write : fc->max_read; 1418 loff_t pos = *ppos; 1419 size_t count = iov_iter_count(iter); ··· 1553 { 1554 struct file *file = iocb->ki_filp; 1555 struct fuse_file *ff = file->private_data; 1556 1557 - if (is_bad_inode(file_inode(file))) 1558 return -EIO; 1559 1560 if (!(ff->open_flags & FOPEN_DIRECT_IO)) 1561 return fuse_cache_read_iter(iocb, to); ··· 1571 { 1572 struct file *file = iocb->ki_filp; 1573 struct fuse_file *ff = file->private_data; 1574 1575 - if (is_bad_inode(file_inode(file))) 1576 return -EIO; 1577 1578 if (!(ff->open_flags & FOPEN_DIRECT_IO)) 1579 return fuse_cache_write_iter(iocb, from); ··· 1600 kfree(wpa); 1601 } 1602 1603 - static void fuse_writepage_finish(struct fuse_conn *fc, 1604 struct fuse_writepage_args *wpa) 1605 { 1606 struct fuse_args_pages *ap = &wpa->ia.ap; ··· 1618 } 1619 1620 /* Called under fi->lock, may release and reacquire it */ 1621 - static void fuse_send_writepage(struct fuse_conn *fc, 1622 struct fuse_writepage_args *wpa, loff_t size) 1623 __releases(fi->lock) 1624 __acquires(fi->lock) ··· 1644 args->force = true; 1645 args->nocreds = true; 1646 1647 - err = fuse_simple_background(fc, args, GFP_ATOMIC); 1648 if (err == -ENOMEM) { 1649 spin_unlock(&fi->lock); 1650 - err = fuse_simple_background(fc, args, GFP_NOFS | __GFP_NOFAIL); 1651 spin_lock(&fi->lock); 1652 } 1653 ··· 1660 out_free: 1661 fi->writectr--; 1662 rb_erase(&wpa->writepages_entry, &fi->writepages); 1663 - fuse_writepage_finish(fc, wpa); 1664 spin_unlock(&fi->lock); 1665 1666 /* After fuse_writepage_finish() aux request list is private */ ··· 1684 __releases(fi->lock) 1685 __acquires(fi->lock) 1686 { 1687 - struct fuse_conn *fc = get_fuse_conn(inode); 1688 struct fuse_inode *fi = get_fuse_inode(inode); 1689 loff_t crop = i_size_read(inode); 1690 struct fuse_writepage_args *wpa; ··· 1693 wpa = list_entry(fi->queued_writes.next, 1694 struct fuse_writepage_args, queue_entry); 1695 list_del_init(&wpa->queue_entry); 1696 - fuse_send_writepage(fc, wpa, crop); 1697 } 1698 } 1699 ··· 1734 WARN_ON(fuse_insert_writeback(root, wpa)); 1735 } 1736 1737 - static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args, 1738 int error) 1739 { 1740 struct fuse_writepage_args *wpa = ··· 1746 spin_lock(&fi->lock); 1747 rb_erase(&wpa->writepages_entry, &fi->writepages); 1748 while (wpa->next) { 1749 - struct fuse_conn *fc = get_fuse_conn(inode); 1750 struct fuse_write_in *inarg = &wpa->ia.write.in; 1751 struct fuse_writepage_args *next = wpa->next; 1752 ··· 1778 * no invocations of fuse_writepage_end() while we're in 1779 * fuse_set_nowrite..fuse_release_nowrite section. 1780 */ 1781 - fuse_send_writepage(fc, next, inarg->offset + inarg->size); 1782 } 1783 fi->writectr--; 1784 - fuse_writepage_finish(fc, wpa); 1785 spin_unlock(&fi->lock); 1786 fuse_writepage_free(wpa); 1787 } ··· 2339 { 2340 struct fuse_file *ff = file->private_data; 2341 2342 if (ff->open_flags & FOPEN_DIRECT_IO) { 2343 /* Can't provide the coherency needed for MAP_SHARED */ 2344 if (vma->vm_flags & VM_MAYSHARE) ··· 2421 static int fuse_getlk(struct file *file, struct file_lock *fl) 2422 { 2423 struct inode *inode = file_inode(file); 2424 - struct fuse_conn *fc = get_fuse_conn(inode); 2425 FUSE_ARGS(args); 2426 struct fuse_lk_in inarg; 2427 struct fuse_lk_out outarg; ··· 2431 args.out_numargs = 1; 2432 args.out_args[0].size = sizeof(outarg); 2433 args.out_args[0].value = &outarg; 2434 - err = fuse_simple_request(fc, &args); 2435 if (!err) 2436 - err = convert_fuse_file_lock(fc, &outarg.lk, fl); 2437 2438 return err; 2439 } ··· 2441 static int fuse_setlk(struct file *file, struct file_lock *fl, int flock) 2442 { 2443 struct inode *inode = file_inode(file); 2444 - struct fuse_conn *fc = get_fuse_conn(inode); 2445 FUSE_ARGS(args); 2446 struct fuse_lk_in inarg; 2447 int opcode = (fl->fl_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK; 2448 struct pid *pid = fl->fl_type != F_UNLCK ? task_tgid(current) : NULL; 2449 - pid_t pid_nr = pid_nr_ns(pid, fc->pid_ns); 2450 int err; 2451 2452 if (fl->fl_lmops && fl->fl_lmops->lm_grant) { ··· 2459 return 0; 2460 2461 fuse_lk_fill(&args, file, fl, opcode, pid_nr, flock, &inarg); 2462 - err = fuse_simple_request(fc, &args); 2463 2464 /* locking is restartable */ 2465 if (err == -EINTR) ··· 2513 static sector_t fuse_bmap(struct address_space *mapping, sector_t block) 2514 { 2515 struct inode *inode = mapping->host; 2516 - struct fuse_conn *fc = get_fuse_conn(inode); 2517 FUSE_ARGS(args); 2518 struct fuse_bmap_in inarg; 2519 struct fuse_bmap_out outarg; 2520 int err; 2521 2522 - if (!inode->i_sb->s_bdev || fc->no_bmap) 2523 return 0; 2524 2525 memset(&inarg, 0, sizeof(inarg)); ··· 2533 args.out_numargs = 1; 2534 args.out_args[0].size = sizeof(outarg); 2535 args.out_args[0].value = &outarg; 2536 - err = fuse_simple_request(fc, &args); 2537 if (err == -ENOSYS) 2538 - fc->no_bmap = 1; 2539 2540 return err ? 0 : outarg.block; 2541 } ··· 2543 static loff_t fuse_lseek(struct file *file, loff_t offset, int whence) 2544 { 2545 struct inode *inode = file->f_mapping->host; 2546 - struct fuse_conn *fc = get_fuse_conn(inode); 2547 struct fuse_file *ff = file->private_data; 2548 FUSE_ARGS(args); 2549 struct fuse_lseek_in inarg = { ··· 2554 struct fuse_lseek_out outarg; 2555 int err; 2556 2557 - if (fc->no_lseek) 2558 goto fallback; 2559 2560 args.opcode = FUSE_LSEEK; ··· 2565 args.out_numargs = 1; 2566 args.out_args[0].size = sizeof(outarg); 2567 args.out_args[0].value = &outarg; 2568 - err = fuse_simple_request(fc, &args); 2569 if (err) { 2570 if (err == -ENOSYS) { 2571 - fc->no_lseek = 1; 2572 goto fallback; 2573 } 2574 return err; ··· 2754 unsigned int flags) 2755 { 2756 struct fuse_file *ff = file->private_data; 2757 - struct fuse_conn *fc = ff->fc; 2758 struct fuse_ioctl_in inarg = { 2759 .fh = ff->fh, 2760 .cmd = cmd, ··· 2787 BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE); 2788 2789 err = -ENOMEM; 2790 - ap.pages = fuse_pages_alloc(fc->max_pages, GFP_KERNEL, &ap.descs); 2791 iov_page = (struct iovec *) __get_free_page(GFP_KERNEL); 2792 if (!ap.pages || !iov_page) 2793 goto out; 2794 2795 - fuse_page_descs_length_init(ap.descs, 0, fc->max_pages); 2796 2797 /* 2798 * If restricted, initialize IO parameters as encoded in @cmd. ··· 2837 2838 /* make sure there are enough buffer pages and init request with them */ 2839 err = -ENOMEM; 2840 - if (max_pages > fc->max_pages) 2841 goto out; 2842 while (ap.num_pages < max_pages) { 2843 ap.pages[ap.num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); ··· 2874 ap.args.out_pages = true; 2875 ap.args.out_argvar = true; 2876 2877 - transferred = fuse_simple_request(fc, &ap.args); 2878 err = transferred; 2879 if (transferred < 0) 2880 goto out; ··· 2902 goto out; 2903 2904 vaddr = kmap_atomic(ap.pages[0]); 2905 - err = fuse_copy_ioctl_iovec(fc, iov_page, vaddr, 2906 transferred, in_iovs + out_iovs, 2907 (flags & FUSE_IOCTL_COMPAT) != 0); 2908 kunmap_atomic(vaddr); ··· 2912 in_iov = iov_page; 2913 out_iov = in_iov + in_iovs; 2914 2915 - err = fuse_verify_ioctl_iov(fc, in_iov, in_iovs); 2916 if (err) 2917 goto out; 2918 2919 - err = fuse_verify_ioctl_iov(fc, out_iov, out_iovs); 2920 if (err) 2921 goto out; 2922 ··· 3026 __poll_t fuse_file_poll(struct file *file, poll_table *wait) 3027 { 3028 struct fuse_file *ff = file->private_data; 3029 - struct fuse_conn *fc = ff->fc; 3030 struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh }; 3031 struct fuse_poll_out outarg; 3032 FUSE_ARGS(args); 3033 int err; 3034 3035 - if (fc->no_poll) 3036 return DEFAULT_POLLMASK; 3037 3038 poll_wait(file, &ff->poll_wait, wait); ··· 3044 */ 3045 if (waitqueue_active(&ff->poll_wait)) { 3046 inarg.flags |= FUSE_POLL_SCHEDULE_NOTIFY; 3047 - fuse_register_polled_file(fc, ff); 3048 } 3049 3050 args.opcode = FUSE_POLL; ··· 3055 args.out_numargs = 1; 3056 args.out_args[0].size = sizeof(outarg); 3057 args.out_args[0].value = &outarg; 3058 - err = fuse_simple_request(fc, &args); 3059 3060 if (!err) 3061 return demangle_poll(outarg.revents); 3062 if (err == -ENOSYS) { 3063 - fc->no_poll = 1; 3064 return DEFAULT_POLLMASK; 3065 } 3066 return EPOLLERR; ··· 3146 * By default, we want to optimize all I/Os with async request 3147 * submission to the client filesystem if supported. 3148 */ 3149 - io->async = ff->fc->async_dio; 3150 io->iocb = iocb; 3151 io->blocking = is_sync_kiocb(iocb); 3152 3153 /* optimization for short read */ 3154 if (io->async && !io->write && offset + count > i_size) { 3155 - iov_iter_truncate(iter, fuse_round_up(ff->fc, i_size - offset)); 3156 shortened = count - iov_iter_count(iter); 3157 count -= shortened; 3158 } ··· 3222 struct fuse_file *ff = file->private_data; 3223 struct inode *inode = file_inode(file); 3224 struct fuse_inode *fi = get_fuse_inode(inode); 3225 - struct fuse_conn *fc = ff->fc; 3226 FUSE_ARGS(args); 3227 struct fuse_fallocate_in inarg = { 3228 .fh = ff->fh, ··· 3234 bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) || 3235 (mode & FALLOC_FL_PUNCH_HOLE); 3236 3237 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) 3238 return -EOPNOTSUPP; 3239 3240 - if (fc->no_fallocate) 3241 return -EOPNOTSUPP; 3242 3243 if (lock_inode) { 3244 inode_lock(inode); 3245 if (mode & FALLOC_FL_PUNCH_HOLE) { 3246 loff_t endbyte = offset + length - 1; 3247 ··· 3275 args.in_numargs = 1; 3276 args.in_args[0].size = sizeof(inarg); 3277 args.in_args[0].value = &inarg; 3278 - err = fuse_simple_request(fc, &args); 3279 if (err == -ENOSYS) { 3280 - fc->no_fallocate = 1; 3281 err = -EOPNOTSUPP; 3282 } 3283 if (err) ··· 3287 if (!(mode & FALLOC_FL_KEEP_SIZE)) { 3288 bool changed = fuse_write_update_size(inode, offset + length); 3289 3290 - if (changed && fc->writeback_cache) 3291 file_update_time(file); 3292 } 3293 ··· 3299 out: 3300 if (!(mode & FALLOC_FL_KEEP_SIZE)) 3301 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); 3302 3303 if (lock_inode) 3304 inode_unlock(inode); ··· 3318 struct inode *inode_in = file_inode(file_in); 3319 struct inode *inode_out = file_inode(file_out); 3320 struct fuse_inode *fi_out = get_fuse_inode(inode_out); 3321 - struct fuse_conn *fc = ff_in->fc; 3322 FUSE_ARGS(args); 3323 struct fuse_copy_file_range_in inarg = { 3324 .fh_in = ff_in->fh, ··· 3388 args.out_numargs = 1; 3389 args.out_args[0].size = sizeof(outarg); 3390 args.out_args[0].value = &outarg; 3391 - err = fuse_simple_request(fc, &args); 3392 if (err == -ENOSYS) { 3393 fc->no_copy_file_range = 1; 3394 err = -EOPNOTSUPP; ··· 3443 .release = fuse_release, 3444 .fsync = fuse_fsync, 3445 .lock = fuse_file_lock, 3446 .flock = fuse_file_flock, 3447 .splice_read = generic_file_splice_read, 3448 .splice_write = iter_file_splice_write, ··· 3479 fi->writectr = 0; 3480 init_waitqueue_head(&fi->page_waitq); 3481 fi->writepages = RB_ROOT; 3482 }

··· 32 return pages; 33 } 34 35 + static int fuse_send_open(struct fuse_mount *fm, u64 nodeid, struct file *file, 36 int opcode, struct fuse_open_out *outargp) 37 { 38 struct fuse_open_in inarg; ··· 40 41 memset(&inarg, 0, sizeof(inarg)); 42 inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY); 43 + if (!fm->fc->atomic_o_trunc) 44 inarg.flags &= ~O_TRUNC; 45 args.opcode = opcode; 46 args.nodeid = nodeid; ··· 51 args.out_args[0].size = sizeof(*outargp); 52 args.out_args[0].value = outargp; 53 54 + return fuse_simple_request(fm, &args); 55 } 56 57 struct fuse_release_args { ··· 60 struct inode *inode; 61 }; 62 63 + struct fuse_file *fuse_file_alloc(struct fuse_mount *fm) 64 { 65 struct fuse_file *ff; 66 ··· 68 if (unlikely(!ff)) 69 return NULL; 70 71 + ff->fm = fm; 72 ff->release_args = kzalloc(sizeof(*ff->release_args), 73 GFP_KERNEL_ACCOUNT); 74 if (!ff->release_args) { ··· 82 RB_CLEAR_NODE(&ff->polled_node); 83 init_waitqueue_head(&ff->poll_wait); 84 85 + ff->kh = atomic64_inc_return(&fm->fc->khctr); 86 87 return ff; 88 } ··· 100 return ff; 101 } 102 103 + static void fuse_release_end(struct fuse_mount *fm, struct fuse_args *args, 104 int error) 105 { 106 struct fuse_release_args *ra = container_of(args, typeof(*ra), args); ··· 114 if (refcount_dec_and_test(&ff->count)) { 115 struct fuse_args *args = &ff->release_args->args; 116 117 + if (isdir ? ff->fm->fc->no_opendir : ff->fm->fc->no_open) { 118 /* Do nothing when client does not implement 'open' */ 119 + fuse_release_end(ff->fm, args, 0); 120 } else if (sync) { 121 + fuse_simple_request(ff->fm, args); 122 + fuse_release_end(ff->fm, args, 0); 123 } else { 124 args->end = fuse_release_end; 125 + if (fuse_simple_background(ff->fm, args, 126 GFP_KERNEL | __GFP_NOFAIL)) 127 + fuse_release_end(ff->fm, args, -ENOTCONN); 128 } 129 kfree(ff); 130 } 131 } 132 133 + int fuse_do_open(struct fuse_mount *fm, u64 nodeid, struct file *file, 134 bool isdir) 135 { 136 + struct fuse_conn *fc = fm->fc; 137 struct fuse_file *ff; 138 int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN; 139 140 + ff = fuse_file_alloc(fm); 141 if (!ff) 142 return -ENOMEM; 143 ··· 147 struct fuse_open_out outarg; 148 int err; 149 150 + err = fuse_send_open(fm, nodeid, file, opcode, &outarg); 151 if (!err) { 152 ff->fh = outarg.fh; 153 ff->open_flags = outarg.open_flags; ··· 216 217 int fuse_open_common(struct inode *inode, struct file *file, bool isdir) 218 { 219 + struct fuse_mount *fm = get_fuse_mount(inode); 220 + struct fuse_conn *fc = fm->fc; 221 int err; 222 bool is_wb_truncate = (file->f_flags & O_TRUNC) && 223 fc->atomic_o_trunc && 224 fc->writeback_cache; 225 + bool dax_truncate = (file->f_flags & O_TRUNC) && 226 + fc->atomic_o_trunc && FUSE_IS_DAX(inode); 227 228 err = generic_file_open(inode, file); 229 if (err) 230 return err; 231 232 + if (is_wb_truncate || dax_truncate) { 233 inode_lock(inode); 234 fuse_set_nowrite(inode); 235 } 236 237 + if (dax_truncate) { 238 + down_write(&get_fuse_inode(inode)->i_mmap_sem); 239 + err = fuse_dax_break_layouts(inode, 0, 0); 240 + if (err) 241 + goto out; 242 + } 243 244 + err = fuse_do_open(fm, get_node_id(inode), file, isdir); 245 if (!err) 246 fuse_finish_open(inode, file); 247 248 + out: 249 + if (dax_truncate) 250 + up_write(&get_fuse_inode(inode)->i_mmap_sem); 251 + 252 + if (is_wb_truncate | dax_truncate) { 253 fuse_release_nowrite(inode); 254 inode_unlock(inode); 255 } ··· 247 static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff, 248 int flags, int opcode) 249 { 250 + struct fuse_conn *fc = ff->fm->fc; 251 struct fuse_release_args *ra = ff->release_args; 252 253 /* Inode is NULL on error path of fuse_create_open() */ ··· 285 286 if (ff->flock) { 287 ra->inarg.release_flags |= FUSE_RELEASE_FLOCK_UNLOCK; 288 + ra->inarg.lock_owner = fuse_lock_owner_id(ff->fm->fc, 289 (fl_owner_t) file); 290 } 291 /* Hold inode until release is finished */ ··· 300 * synchronous RELEASE is allowed (and desirable) in this case 301 * because the server can be trusted not to screw up. 302 */ 303 + fuse_file_put(ff, ff->fm->fc->destroy, isdir); 304 } 305 306 static int fuse_open(struct inode *inode, struct file *file) ··· 443 static int fuse_flush(struct file *file, fl_owner_t id) 444 { 445 struct inode *inode = file_inode(file); 446 + struct fuse_mount *fm = get_fuse_mount(inode); 447 struct fuse_file *ff = file->private_data; 448 struct fuse_flush_in inarg; 449 FUSE_ARGS(args); ··· 465 return err; 466 467 err = 0; 468 + if (fm->fc->no_flush) 469 goto inval_attr_out; 470 471 memset(&inarg, 0, sizeof(inarg)); 472 inarg.fh = ff->fh; 473 + inarg.lock_owner = fuse_lock_owner_id(fm->fc, id); 474 args.opcode = FUSE_FLUSH; 475 args.nodeid = get_node_id(inode); 476 args.in_numargs = 1; ··· 478 args.in_args[0].value = &inarg; 479 args.force = true; 480 481 + err = fuse_simple_request(fm, &args); 482 if (err == -ENOSYS) { 483 + fm->fc->no_flush = 1; 484 err = 0; 485 } 486 ··· 489 * In memory i_blocks is not maintained by fuse, if writeback cache is 490 * enabled, i_blocks from cached attr may not be accurate. 491 */ 492 + if (!err && fm->fc->writeback_cache) 493 fuse_invalidate_attr(inode); 494 return err; 495 } ··· 498 int datasync, int opcode) 499 { 500 struct inode *inode = file->f_mapping->host; 501 + struct fuse_mount *fm = get_fuse_mount(inode); 502 struct fuse_file *ff = file->private_data; 503 FUSE_ARGS(args); 504 struct fuse_fsync_in inarg; ··· 511 args.in_numargs = 1; 512 args.in_args[0].size = sizeof(inarg); 513 args.in_args[0].value = &inarg; 514 + return fuse_simple_request(fm, &args); 515 } 516 517 static int fuse_fsync(struct file *file, loff_t start, loff_t end, ··· 686 kfree(ia); 687 } 688 689 + static void fuse_aio_complete_req(struct fuse_mount *fm, struct fuse_args *args, 690 int err) 691 { 692 struct fuse_io_args *ia = container_of(args, typeof(*ia), ap.args); ··· 715 fuse_io_free(ia); 716 } 717 718 + static ssize_t fuse_async_req_send(struct fuse_mount *fm, 719 struct fuse_io_args *ia, size_t num_bytes) 720 { 721 ssize_t err; ··· 729 730 ia->ap.args.end = fuse_aio_complete_req; 731 ia->ap.args.may_block = io->should_dirty; 732 + err = fuse_simple_background(fm, &ia->ap.args, GFP_KERNEL); 733 if (err) 734 + fuse_aio_complete_req(fm, &ia->ap.args, err); 735 736 return num_bytes; 737 } ··· 741 { 742 struct file *file = ia->io->iocb->ki_filp; 743 struct fuse_file *ff = file->private_data; 744 + struct fuse_mount *fm = ff->fm; 745 746 fuse_read_args_fill(ia, file, pos, count, FUSE_READ); 747 if (owner != NULL) { 748 ia->read.in.read_flags |= FUSE_READ_LOCKOWNER; 749 + ia->read.in.lock_owner = fuse_lock_owner_id(fm->fc, owner); 750 } 751 752 if (ia->io->async) 753 + return fuse_async_req_send(fm, ia, count); 754 755 + return fuse_simple_request(fm, &ia->ap.args); 756 } 757 758 static void fuse_read_update_size(struct inode *inode, loff_t size, ··· 798 static int fuse_do_readpage(struct file *file, struct page *page) 799 { 800 struct inode *inode = page->mapping->host; 801 + struct fuse_mount *fm = get_fuse_mount(inode); 802 loff_t pos = page_offset(page); 803 struct fuse_page_desc desc = { .length = PAGE_SIZE }; 804 struct fuse_io_args ia = { ··· 818 */ 819 fuse_wait_on_page_writeback(inode, page->index); 820 821 + attr_ver = fuse_get_attr_version(fm->fc); 822 823 /* Don't overflow end offset */ 824 if (pos + (desc.length - 1) == LLONG_MAX) 825 desc.length--; 826 827 fuse_read_args_fill(&ia, file, pos, desc.length, FUSE_READ); 828 + res = fuse_simple_request(fm, &ia.ap.args); 829 if (res < 0) 830 return res; 831 /* ··· 855 return err; 856 } 857 858 + static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args, 859 int err) 860 { 861 int i; ··· 899 static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file) 900 { 901 struct fuse_file *ff = file->private_data; 902 + struct fuse_mount *fm = ff->fm; 903 struct fuse_args_pages *ap = &ia->ap; 904 loff_t pos = page_offset(ap->pages[0]); 905 size_t count = ap->num_pages << PAGE_SHIFT; ··· 918 WARN_ON((loff_t) (pos + count) < 0); 919 920 fuse_read_args_fill(ia, file, pos, count, FUSE_READ); 921 + ia->read.attr_ver = fuse_get_attr_version(fm->fc); 922 + if (fm->fc->async_read) { 923 ia->ff = fuse_file_get(ff); 924 ap->args.end = fuse_readpages_end; 925 + err = fuse_simple_background(fm, &ap->args, GFP_KERNEL); 926 if (!err) 927 return; 928 } else { 929 + res = fuse_simple_request(fm, &ap->args); 930 err = res < 0 ? res : 0; 931 } 932 + fuse_readpages_end(fm, &ap->args, err); 933 } 934 935 static void fuse_readahead(struct readahead_control *rac) ··· 1000 args->opcode = FUSE_WRITE; 1001 args->nodeid = ff->nodeid; 1002 args->in_numargs = 2; 1003 + if (ff->fm->fc->minor < 9) 1004 args->in_args[0].size = FUSE_COMPAT_WRITE_IN_SIZE; 1005 else 1006 args->in_args[0].size = sizeof(ia->write.in); ··· 1029 struct kiocb *iocb = ia->io->iocb; 1030 struct file *file = iocb->ki_filp; 1031 struct fuse_file *ff = file->private_data; 1032 + struct fuse_mount *fm = ff->fm; 1033 struct fuse_write_in *inarg = &ia->write.in; 1034 ssize_t err; 1035 ··· 1037 inarg->flags = fuse_write_flags(iocb); 1038 if (owner != NULL) { 1039 inarg->write_flags |= FUSE_WRITE_LOCKOWNER; 1040 + inarg->lock_owner = fuse_lock_owner_id(fm->fc, owner); 1041 } 1042 1043 if (ia->io->async) 1044 + return fuse_async_req_send(fm, ia, count); 1045 1046 + err = fuse_simple_request(fm, &ia->ap.args); 1047 if (!err && ia->write.out.size > count) 1048 err = -EIO; 1049 ··· 1074 struct fuse_args_pages *ap = &ia->ap; 1075 struct file *file = iocb->ki_filp; 1076 struct fuse_file *ff = file->private_data; 1077 + struct fuse_mount *fm = ff->fm; 1078 unsigned int offset, i; 1079 int err; 1080 ··· 1084 fuse_write_args_fill(ia, ff, pos, count); 1085 ia->write.in.flags = fuse_write_flags(iocb); 1086 1087 + err = fuse_simple_request(fm, &ap->args); 1088 if (!err && ia->write.out.size > count) 1089 err = -EIO; 1090 ··· 1399 struct file *file = io->iocb->ki_filp; 1400 struct inode *inode = file->f_mapping->host; 1401 struct fuse_file *ff = file->private_data; 1402 + struct fuse_conn *fc = ff->fm->fc; 1403 size_t nmax = write ? fc->max_write : fc->max_read; 1404 loff_t pos = *ppos; 1405 size_t count = iov_iter_count(iter); ··· 1539 { 1540 struct file *file = iocb->ki_filp; 1541 struct fuse_file *ff = file->private_data; 1542 + struct inode *inode = file_inode(file); 1543 1544 + if (is_bad_inode(inode)) 1545 return -EIO; 1546 + 1547 + if (FUSE_IS_DAX(inode)) 1548 + return fuse_dax_read_iter(iocb, to); 1549 1550 if (!(ff->open_flags & FOPEN_DIRECT_IO)) 1551 return fuse_cache_read_iter(iocb, to); ··· 1553 { 1554 struct file *file = iocb->ki_filp; 1555 struct fuse_file *ff = file->private_data; 1556 + struct inode *inode = file_inode(file); 1557 1558 + if (is_bad_inode(inode)) 1559 return -EIO; 1560 + 1561 + if (FUSE_IS_DAX(inode)) 1562 + return fuse_dax_write_iter(iocb, from); 1563 1564 if (!(ff->open_flags & FOPEN_DIRECT_IO)) 1565 return fuse_cache_write_iter(iocb, from); ··· 1578 kfree(wpa); 1579 } 1580 1581 + static void fuse_writepage_finish(struct fuse_mount *fm, 1582 struct fuse_writepage_args *wpa) 1583 { 1584 struct fuse_args_pages *ap = &wpa->ia.ap; ··· 1596 } 1597 1598 /* Called under fi->lock, may release and reacquire it */ 1599 + static void fuse_send_writepage(struct fuse_mount *fm, 1600 struct fuse_writepage_args *wpa, loff_t size) 1601 __releases(fi->lock) 1602 __acquires(fi->lock) ··· 1622 args->force = true; 1623 args->nocreds = true; 1624 1625 + err = fuse_simple_background(fm, args, GFP_ATOMIC); 1626 if (err == -ENOMEM) { 1627 spin_unlock(&fi->lock); 1628 + err = fuse_simple_background(fm, args, GFP_NOFS | __GFP_NOFAIL); 1629 spin_lock(&fi->lock); 1630 } 1631 ··· 1638 out_free: 1639 fi->writectr--; 1640 rb_erase(&wpa->writepages_entry, &fi->writepages); 1641 + fuse_writepage_finish(fm, wpa); 1642 spin_unlock(&fi->lock); 1643 1644 /* After fuse_writepage_finish() aux request list is private */ ··· 1662 __releases(fi->lock) 1663 __acquires(fi->lock) 1664 { 1665 + struct fuse_mount *fm = get_fuse_mount(inode); 1666 struct fuse_inode *fi = get_fuse_inode(inode); 1667 loff_t crop = i_size_read(inode); 1668 struct fuse_writepage_args *wpa; ··· 1671 wpa = list_entry(fi->queued_writes.next, 1672 struct fuse_writepage_args, queue_entry); 1673 list_del_init(&wpa->queue_entry); 1674 + fuse_send_writepage(fm, wpa, crop); 1675 } 1676 } 1677 ··· 1712 WARN_ON(fuse_insert_writeback(root, wpa)); 1713 } 1714 1715 + static void fuse_writepage_end(struct fuse_mount *fm, struct fuse_args *args, 1716 int error) 1717 { 1718 struct fuse_writepage_args *wpa = ··· 1724 spin_lock(&fi->lock); 1725 rb_erase(&wpa->writepages_entry, &fi->writepages); 1726 while (wpa->next) { 1727 + struct fuse_mount *fm = get_fuse_mount(inode); 1728 struct fuse_write_in *inarg = &wpa->ia.write.in; 1729 struct fuse_writepage_args *next = wpa->next; 1730 ··· 1756 * no invocations of fuse_writepage_end() while we're in 1757 * fuse_set_nowrite..fuse_release_nowrite section. 1758 */ 1759 + fuse_send_writepage(fm, next, inarg->offset + inarg->size); 1760 } 1761 fi->writectr--; 1762 + fuse_writepage_finish(fm, wpa); 1763 spin_unlock(&fi->lock); 1764 fuse_writepage_free(wpa); 1765 } ··· 2317 { 2318 struct fuse_file *ff = file->private_data; 2319 2320 + /* DAX mmap is superior to direct_io mmap */ 2321 + if (FUSE_IS_DAX(file_inode(file))) 2322 + return fuse_dax_mmap(file, vma); 2323 + 2324 if (ff->open_flags & FOPEN_DIRECT_IO) { 2325 /* Can't provide the coherency needed for MAP_SHARED */ 2326 if (vma->vm_flags & VM_MAYSHARE) ··· 2395 static int fuse_getlk(struct file *file, struct file_lock *fl) 2396 { 2397 struct inode *inode = file_inode(file); 2398 + struct fuse_mount *fm = get_fuse_mount(inode); 2399 FUSE_ARGS(args); 2400 struct fuse_lk_in inarg; 2401 struct fuse_lk_out outarg; ··· 2405 args.out_numargs = 1; 2406 args.out_args[0].size = sizeof(outarg); 2407 args.out_args[0].value = &outarg; 2408 + err = fuse_simple_request(fm, &args); 2409 if (!err) 2410 + err = convert_fuse_file_lock(fm->fc, &outarg.lk, fl); 2411 2412 return err; 2413 } ··· 2415 static int fuse_setlk(struct file *file, struct file_lock *fl, int flock) 2416 { 2417 struct inode *inode = file_inode(file); 2418 + struct fuse_mount *fm = get_fuse_mount(inode); 2419 FUSE_ARGS(args); 2420 struct fuse_lk_in inarg; 2421 int opcode = (fl->fl_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK; 2422 struct pid *pid = fl->fl_type != F_UNLCK ? task_tgid(current) : NULL; 2423 + pid_t pid_nr = pid_nr_ns(pid, fm->fc->pid_ns); 2424 int err; 2425 2426 if (fl->fl_lmops && fl->fl_lmops->lm_grant) { ··· 2433 return 0; 2434 2435 fuse_lk_fill(&args, file, fl, opcode, pid_nr, flock, &inarg); 2436 + err = fuse_simple_request(fm, &args); 2437 2438 /* locking is restartable */ 2439 if (err == -EINTR) ··· 2487 static sector_t fuse_bmap(struct address_space *mapping, sector_t block) 2488 { 2489 struct inode *inode = mapping->host; 2490 + struct fuse_mount *fm = get_fuse_mount(inode); 2491 FUSE_ARGS(args); 2492 struct fuse_bmap_in inarg; 2493 struct fuse_bmap_out outarg; 2494 int err; 2495 2496 + if (!inode->i_sb->s_bdev || fm->fc->no_bmap) 2497 return 0; 2498 2499 memset(&inarg, 0, sizeof(inarg)); ··· 2507 args.out_numargs = 1; 2508 args.out_args[0].size = sizeof(outarg); 2509 args.out_args[0].value = &outarg; 2510 + err = fuse_simple_request(fm, &args); 2511 if (err == -ENOSYS) 2512 + fm->fc->no_bmap = 1; 2513 2514 return err ? 0 : outarg.block; 2515 } ··· 2517 static loff_t fuse_lseek(struct file *file, loff_t offset, int whence) 2518 { 2519 struct inode *inode = file->f_mapping->host; 2520 + struct fuse_mount *fm = get_fuse_mount(inode); 2521 struct fuse_file *ff = file->private_data; 2522 FUSE_ARGS(args); 2523 struct fuse_lseek_in inarg = { ··· 2528 struct fuse_lseek_out outarg; 2529 int err; 2530 2531 + if (fm->fc->no_lseek) 2532 goto fallback; 2533 2534 args.opcode = FUSE_LSEEK; ··· 2539 args.out_numargs = 1; 2540 args.out_args[0].size = sizeof(outarg); 2541 args.out_args[0].value = &outarg; 2542 + err = fuse_simple_request(fm, &args); 2543 if (err) { 2544 if (err == -ENOSYS) { 2545 + fm->fc->no_lseek = 1; 2546 goto fallback; 2547 } 2548 return err; ··· 2728 unsigned int flags) 2729 { 2730 struct fuse_file *ff = file->private_data; 2731 + struct fuse_mount *fm = ff->fm; 2732 struct fuse_ioctl_in inarg = { 2733 .fh = ff->fh, 2734 .cmd = cmd, ··· 2761 BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE); 2762 2763 err = -ENOMEM; 2764 + ap.pages = fuse_pages_alloc(fm->fc->max_pages, GFP_KERNEL, &ap.descs); 2765 iov_page = (struct iovec *) __get_free_page(GFP_KERNEL); 2766 if (!ap.pages || !iov_page) 2767 goto out; 2768 2769 + fuse_page_descs_length_init(ap.descs, 0, fm->fc->max_pages); 2770 2771 /* 2772 * If restricted, initialize IO parameters as encoded in @cmd. ··· 2811 2812 /* make sure there are enough buffer pages and init request with them */ 2813 err = -ENOMEM; 2814 + if (max_pages > fm->fc->max_pages) 2815 goto out; 2816 while (ap.num_pages < max_pages) { 2817 ap.pages[ap.num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); ··· 2848 ap.args.out_pages = true; 2849 ap.args.out_argvar = true; 2850 2851 + transferred = fuse_simple_request(fm, &ap.args); 2852 err = transferred; 2853 if (transferred < 0) 2854 goto out; ··· 2876 goto out; 2877 2878 vaddr = kmap_atomic(ap.pages[0]); 2879 + err = fuse_copy_ioctl_iovec(fm->fc, iov_page, vaddr, 2880 transferred, in_iovs + out_iovs, 2881 (flags & FUSE_IOCTL_COMPAT) != 0); 2882 kunmap_atomic(vaddr); ··· 2886 in_iov = iov_page; 2887 out_iov = in_iov + in_iovs; 2888 2889 + err = fuse_verify_ioctl_iov(fm->fc, in_iov, in_iovs); 2890 if (err) 2891 goto out; 2892 2893 + err = fuse_verify_ioctl_iov(fm->fc, out_iov, out_iovs); 2894 if (err) 2895 goto out; 2896 ··· 3000 __poll_t fuse_file_poll(struct file *file, poll_table *wait) 3001 { 3002 struct fuse_file *ff = file->private_data; 3003 + struct fuse_mount *fm = ff->fm; 3004 struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh }; 3005 struct fuse_poll_out outarg; 3006 FUSE_ARGS(args); 3007 int err; 3008 3009 + if (fm->fc->no_poll) 3010 return DEFAULT_POLLMASK; 3011 3012 poll_wait(file, &ff->poll_wait, wait); ··· 3018 */ 3019 if (waitqueue_active(&ff->poll_wait)) { 3020 inarg.flags |= FUSE_POLL_SCHEDULE_NOTIFY; 3021 + fuse_register_polled_file(fm->fc, ff); 3022 } 3023 3024 args.opcode = FUSE_POLL; ··· 3029 args.out_numargs = 1; 3030 args.out_args[0].size = sizeof(outarg); 3031 args.out_args[0].value = &outarg; 3032 + err = fuse_simple_request(fm, &args); 3033 3034 if (!err) 3035 return demangle_poll(outarg.revents); 3036 if (err == -ENOSYS) { 3037 + fm->fc->no_poll = 1; 3038 return DEFAULT_POLLMASK; 3039 } 3040 return EPOLLERR; ··· 3120 * By default, we want to optimize all I/Os with async request 3121 * submission to the client filesystem if supported. 3122 */ 3123 + io->async = ff->fm->fc->async_dio; 3124 io->iocb = iocb; 3125 io->blocking = is_sync_kiocb(iocb); 3126 3127 /* optimization for short read */ 3128 if (io->async && !io->write && offset + count > i_size) { 3129 + iov_iter_truncate(iter, fuse_round_up(ff->fm->fc, i_size - offset)); 3130 shortened = count - iov_iter_count(iter); 3131 count -= shortened; 3132 } ··· 3196 struct fuse_file *ff = file->private_data; 3197 struct inode *inode = file_inode(file); 3198 struct fuse_inode *fi = get_fuse_inode(inode); 3199 + struct fuse_mount *fm = ff->fm; 3200 FUSE_ARGS(args); 3201 struct fuse_fallocate_in inarg = { 3202 .fh = ff->fh, ··· 3208 bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) || 3209 (mode & FALLOC_FL_PUNCH_HOLE); 3210 3211 + bool block_faults = FUSE_IS_DAX(inode) && lock_inode; 3212 + 3213 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) 3214 return -EOPNOTSUPP; 3215 3216 + if (fm->fc->no_fallocate) 3217 return -EOPNOTSUPP; 3218 3219 if (lock_inode) { 3220 inode_lock(inode); 3221 + if (block_faults) { 3222 + down_write(&fi->i_mmap_sem); 3223 + err = fuse_dax_break_layouts(inode, 0, 0); 3224 + if (err) 3225 + goto out; 3226 + } 3227 + 3228 if (mode & FALLOC_FL_PUNCH_HOLE) { 3229 loff_t endbyte = offset + length - 1; 3230 ··· 3240 args.in_numargs = 1; 3241 args.in_args[0].size = sizeof(inarg); 3242 args.in_args[0].value = &inarg; 3243 + err = fuse_simple_request(fm, &args); 3244 if (err == -ENOSYS) { 3245 + fm->fc->no_fallocate = 1; 3246 err = -EOPNOTSUPP; 3247 } 3248 if (err) ··· 3252 if (!(mode & FALLOC_FL_KEEP_SIZE)) { 3253 bool changed = fuse_write_update_size(inode, offset + length); 3254 3255 + if (changed && fm->fc->writeback_cache) 3256 file_update_time(file); 3257 } 3258 ··· 3264 out: 3265 if (!(mode & FALLOC_FL_KEEP_SIZE)) 3266 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); 3267 + 3268 + if (block_faults) 3269 + up_write(&fi->i_mmap_sem); 3270 3271 if (lock_inode) 3272 inode_unlock(inode); ··· 3280 struct inode *inode_in = file_inode(file_in); 3281 struct inode *inode_out = file_inode(file_out); 3282 struct fuse_inode *fi_out = get_fuse_inode(inode_out); 3283 + struct fuse_mount *fm = ff_in->fm; 3284 + struct fuse_conn *fc = fm->fc; 3285 FUSE_ARGS(args); 3286 struct fuse_copy_file_range_in inarg = { 3287 .fh_in = ff_in->fh, ··· 3349 args.out_numargs = 1; 3350 args.out_args[0].size = sizeof(outarg); 3351 args.out_args[0].value = &outarg; 3352 + err = fuse_simple_request(fm, &args); 3353 if (err == -ENOSYS) { 3354 fc->no_copy_file_range = 1; 3355 err = -EOPNOTSUPP; ··· 3404 .release = fuse_release, 3405 .fsync = fuse_fsync, 3406 .lock = fuse_file_lock, 3407 + .get_unmapped_area = thp_get_unmapped_area, 3408 .flock = fuse_file_flock, 3409 .splice_read = generic_file_splice_read, 3410 .splice_write = iter_file_splice_write, ··· 3439 fi->writectr = 0; 3440 init_waitqueue_head(&fi->page_waitq); 3441 fi->writepages = RB_ROOT; 3442 + 3443 + if (IS_ENABLED(CONFIG_FUSE_DAX)) 3444 + fuse_dax_inode_init(inode); 3445 }

+152 -35

fs/fuse/fuse_i.h

··· 148 149 /** Lock to protect write related fields */ 150 spinlock_t lock; 151 }; 152 153 /** FUSE inode state bits */ ··· 175 }; 176 177 struct fuse_conn; 178 struct fuse_release_args; 179 180 /** FUSE specific file data */ 181 struct fuse_file { 182 /** Fuse connection for this file */ 183 - struct fuse_conn *fc; 184 185 /* Argument space reserved for release */ 186 struct fuse_release_args *release_args; ··· 267 bool may_block:1; 268 struct fuse_in_arg in_args[3]; 269 struct fuse_arg out_args[2]; 270 - void (*end)(struct fuse_conn *fc, struct fuse_args *args, int error); 271 }; 272 273 struct fuse_args_pages { ··· 375 /** virtio-fs's physically contiguous buffer for in and out args */ 376 void *argbuf; 377 #endif 378 }; 379 380 struct fuse_iqueue; ··· 500 bool destroy:1; 501 bool no_control:1; 502 bool no_force_umount:1; 503 - bool no_mount_options:1; 504 unsigned int max_read; 505 unsigned int blksize; 506 const char *subtype; 507 508 /* fuse_dev pointer to fill in, should contain NULL on entry */ 509 void **fudptr; ··· 516 /** 517 * A Fuse connection. 518 * 519 - * This structure is created, when the filesystem is mounted, and is 520 - * destroyed, when the client device is closed and the filesystem is 521 - * unmounted. 522 */ 523 struct fuse_conn { 524 /** Lock protecting accessess to members of this structure */ ··· 632 /** cache READLINK responses in page cache */ 633 unsigned cache_symlinks:1; 634 635 /* 636 * The following bitfields are only for optimization purposes 637 * and hence races in setting them will not cause malfunction ··· 742 /** Do not allow MNT_FORCE umount */ 743 unsigned int no_force_umount:1; 744 745 - /* Do not show mount options */ 746 - unsigned int no_mount_options:1; 747 748 /** The number of requests waiting for completion */ 749 atomic_t num_waiting; ··· 751 /** Negotiated minor version */ 752 unsigned minor; 753 754 - /** Entry on the fuse_conn_list */ 755 struct list_head entry; 756 757 - /** Device ID from super block */ 758 dev_t dev; 759 760 /** Dentries in the control filesystem */ ··· 772 /** Called on final put */ 773 void (*release)(struct fuse_conn *); 774 775 - /** Super block for this connection. */ 776 - struct super_block *sb; 777 - 778 - /** Read/write semaphore to hold when accessing sb. */ 779 struct rw_semaphore killsb; 780 781 /** List of device instances belonging to this connection */ 782 struct list_head devices; 783 }; 784 785 - static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb) 786 { 787 return sb->s_fs_info; 788 } 789 790 static inline struct fuse_conn *get_fuse_conn(struct inode *inode) 791 { 792 - return get_fuse_conn_super(inode->i_sb); 793 } 794 795 static inline struct fuse_inode *get_fuse_inode(struct inode *inode) ··· 863 864 extern const struct dentry_operations fuse_dentry_operations; 865 extern const struct dentry_operations fuse_root_dentry_operations; 866 - 867 - /** 868 - * Inode to nodeid comparison. 869 - */ 870 - int fuse_inode_eq(struct inode *inode, void *_nodeidp); 871 872 /** 873 * Get a filled in inode ··· 914 */ 915 int fuse_open_common(struct inode *inode, struct file *file, bool isdir); 916 917 - struct fuse_file *fuse_file_alloc(struct fuse_conn *fc); 918 void fuse_file_free(struct fuse_file *ff); 919 void fuse_finish_open(struct inode *inode, struct file *file); 920 ··· 982 /** 983 * Simple request sending that does request allocation and freeing 984 */ 985 - ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args); 986 - int fuse_simple_background(struct fuse_conn *fc, struct fuse_args *args, 987 gfp_t gfp_flags); 988 989 /** 990 * End a finished request 991 */ 992 - void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req); 993 994 /* Abort all requests */ 995 void fuse_abort_conn(struct fuse_conn *fc); ··· 1015 /** 1016 * Initialize fuse_conn 1017 */ 1018 - void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns, 1019 const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv); 1020 1021 /** ··· 1024 */ 1025 void fuse_conn_put(struct fuse_conn *fc); 1026 1027 struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc); 1028 struct fuse_dev *fuse_dev_alloc(void); 1029 void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc); 1030 void fuse_dev_free(struct fuse_dev *fud); 1031 - void fuse_send_init(struct fuse_conn *fc); 1032 1033 /** 1034 * Fill in superblock and initialize fuse connection ··· 1047 */ 1048 int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx); 1049 1050 - /** 1051 - * Disassociate fuse connection from superblock and kill the superblock 1052 - * 1053 - * Calls kill_anon_super(), do not use with bdev mounts. 1054 */ 1055 - void fuse_kill_sb_anon(struct super_block *sb); 1056 1057 /** 1058 * Add connection to control filesystem ··· 1102 void fuse_release_nowrite(struct inode *inode); 1103 1104 /** 1105 * File-system tells the kernel to invalidate cache for the given node id. 1106 */ 1107 - int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid, 1108 loff_t offset, loff_t len); 1109 1110 /** ··· 1127 * - is a file or oan empty directory 1128 * then the dentry is unhashed (d_delete()). 1129 */ 1130 - int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, 1131 u64 child_nodeid, struct qstr *name); 1132 1133 - int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, 1134 bool isdir); 1135 1136 /** ··· 1193 */ 1194 u64 fuse_get_unique(struct fuse_iqueue *fiq); 1195 void fuse_free_conn(struct fuse_conn *fc); 1196 1197 #endif /* _FS_FUSE_I_H */

··· 148 149 /** Lock to protect write related fields */ 150 spinlock_t lock; 151 + 152 + /** 153 + * Can't take inode lock in fault path (leads to circular dependency). 154 + * Introduce another semaphore which can be taken in fault path and 155 + * then other filesystem paths can take this to block faults. 156 + */ 157 + struct rw_semaphore i_mmap_sem; 158 + 159 + #ifdef CONFIG_FUSE_DAX 160 + /* 161 + * Dax specific inode data 162 + */ 163 + struct fuse_inode_dax *dax; 164 + #endif 165 }; 166 167 /** FUSE inode state bits */ ··· 161 }; 162 163 struct fuse_conn; 164 + struct fuse_mount; 165 struct fuse_release_args; 166 167 /** FUSE specific file data */ 168 struct fuse_file { 169 /** Fuse connection for this file */ 170 + struct fuse_mount *fm; 171 172 /* Argument space reserved for release */ 173 struct fuse_release_args *release_args; ··· 252 bool may_block:1; 253 struct fuse_in_arg in_args[3]; 254 struct fuse_arg out_args[2]; 255 + void (*end)(struct fuse_mount *fm, struct fuse_args *args, int error); 256 }; 257 258 struct fuse_args_pages { ··· 360 /** virtio-fs's physically contiguous buffer for in and out args */ 361 void *argbuf; 362 #endif 363 + 364 + /** fuse_mount this request belongs to */ 365 + struct fuse_mount *fm; 366 }; 367 368 struct fuse_iqueue; ··· 482 bool destroy:1; 483 bool no_control:1; 484 bool no_force_umount:1; 485 + bool legacy_opts_show:1; 486 + bool dax:1; 487 unsigned int max_read; 488 unsigned int blksize; 489 const char *subtype; 490 + 491 + /* DAX device, may be NULL */ 492 + struct dax_device *dax_dev; 493 494 /* fuse_dev pointer to fill in, should contain NULL on entry */ 495 void **fudptr; ··· 494 /** 495 * A Fuse connection. 496 * 497 + * This structure is created, when the root filesystem is mounted, and 498 + * is destroyed, when the client device is closed and the last 499 + * fuse_mount is destroyed. 500 */ 501 struct fuse_conn { 502 /** Lock protecting accessess to members of this structure */ ··· 610 /** cache READLINK responses in page cache */ 611 unsigned cache_symlinks:1; 612 613 + /* show legacy mount options */ 614 + unsigned int legacy_opts_show:1; 615 + 616 /* 617 * The following bitfields are only for optimization purposes 618 * and hence races in setting them will not cause malfunction ··· 717 /** Do not allow MNT_FORCE umount */ 718 unsigned int no_force_umount:1; 719 720 + /* Auto-mount submounts announced by the server */ 721 + unsigned int auto_submounts:1; 722 723 /** The number of requests waiting for completion */ 724 atomic_t num_waiting; ··· 726 /** Negotiated minor version */ 727 unsigned minor; 728 729 + /** Entry on the fuse_mount_list */ 730 struct list_head entry; 731 732 + /** Device ID from the root super block */ 733 dev_t dev; 734 735 /** Dentries in the control filesystem */ ··· 747 /** Called on final put */ 748 void (*release)(struct fuse_conn *); 749 750 + /** 751 + * Read/write semaphore to hold when accessing the sb of any 752 + * fuse_mount belonging to this connection 753 + */ 754 struct rw_semaphore killsb; 755 756 /** List of device instances belonging to this connection */ 757 struct list_head devices; 758 + 759 + #ifdef CONFIG_FUSE_DAX 760 + /* Dax specific conn data, non-NULL if DAX is enabled */ 761 + struct fuse_conn_dax *dax; 762 + #endif 763 + 764 + /** List of filesystems using this connection */ 765 + struct list_head mounts; 766 }; 767 768 + /* 769 + * Represents a mounted filesystem, potentially a submount. 770 + * 771 + * This object allows sharing a fuse_conn between separate mounts to 772 + * allow submounts with dedicated superblocks and thus separate device 773 + * IDs. 774 + */ 775 + struct fuse_mount { 776 + /* Underlying (potentially shared) connection to the FUSE server */ 777 + struct fuse_conn *fc; 778 + 779 + /* Refcount */ 780 + refcount_t count; 781 + 782 + /* 783 + * Super block for this connection (fc->killsb must be held when 784 + * accessing this). 785 + */ 786 + struct super_block *sb; 787 + 788 + /* Entry on fc->mounts */ 789 + struct list_head fc_entry; 790 + }; 791 + 792 + static inline struct fuse_mount *get_fuse_mount_super(struct super_block *sb) 793 { 794 return sb->s_fs_info; 795 } 796 797 + static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb) 798 + { 799 + struct fuse_mount *fm = get_fuse_mount_super(sb); 800 + 801 + return fm ? fm->fc : NULL; 802 + } 803 + 804 + static inline struct fuse_mount *get_fuse_mount(struct inode *inode) 805 + { 806 + return get_fuse_mount_super(inode->i_sb); 807 + } 808 + 809 static inline struct fuse_conn *get_fuse_conn(struct inode *inode) 810 { 811 + struct fuse_mount *fm = get_fuse_mount(inode); 812 + 813 + return fm ? fm->fc : NULL; 814 } 815 816 static inline struct fuse_inode *get_fuse_inode(struct inode *inode) ··· 792 793 extern const struct dentry_operations fuse_dentry_operations; 794 extern const struct dentry_operations fuse_root_dentry_operations; 795 796 /** 797 * Get a filled in inode ··· 848 */ 849 int fuse_open_common(struct inode *inode, struct file *file, bool isdir); 850 851 + struct fuse_file *fuse_file_alloc(struct fuse_mount *fm); 852 void fuse_file_free(struct fuse_file *ff); 853 void fuse_finish_open(struct inode *inode, struct file *file); 854 ··· 916 /** 917 * Simple request sending that does request allocation and freeing 918 */ 919 + ssize_t fuse_simple_request(struct fuse_mount *fm, struct fuse_args *args); 920 + int fuse_simple_background(struct fuse_mount *fm, struct fuse_args *args, 921 gfp_t gfp_flags); 922 923 /** 924 * End a finished request 925 */ 926 + void fuse_request_end(struct fuse_req *req); 927 928 /* Abort all requests */ 929 void fuse_abort_conn(struct fuse_conn *fc); ··· 949 /** 950 * Initialize fuse_conn 951 */ 952 + void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, 953 + struct user_namespace *user_ns, 954 const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv); 955 956 /** ··· 957 */ 958 void fuse_conn_put(struct fuse_conn *fc); 959 960 + /** 961 + * Acquire reference to fuse_mount 962 + */ 963 + struct fuse_mount *fuse_mount_get(struct fuse_mount *fm); 964 + 965 + /** 966 + * Release reference to fuse_mount 967 + */ 968 + void fuse_mount_put(struct fuse_mount *fm); 969 + 970 struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc); 971 struct fuse_dev *fuse_dev_alloc(void); 972 void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc); 973 void fuse_dev_free(struct fuse_dev *fud); 974 + void fuse_send_init(struct fuse_mount *fm); 975 976 /** 977 * Fill in superblock and initialize fuse connection ··· 970 */ 971 int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx); 972 973 + /* 974 + * Fill in superblock for submounts 975 + * @sb: partially-initialized superblock to fill in 976 + * @parent_fi: The fuse_inode of the parent filesystem where this submount is 977 + * mounted 978 */ 979 + int fuse_fill_super_submount(struct super_block *sb, 980 + struct fuse_inode *parent_fi); 981 + 982 + /* 983 + * Remove the mount from the connection 984 + * 985 + * Returns whether this was the last mount 986 + */ 987 + bool fuse_mount_remove(struct fuse_mount *fm); 988 + 989 + /* 990 + * Shut down the connection (possibly sending DESTROY request). 991 + */ 992 + void fuse_conn_destroy(struct fuse_mount *fm); 993 994 /** 995 * Add connection to control filesystem ··· 1011 void fuse_release_nowrite(struct inode *inode); 1012 1013 /** 1014 + * Scan all fuse_mounts belonging to fc to find the first where 1015 + * ilookup5() returns a result. Return that result and the 1016 + * respective fuse_mount in *fm (unless fm is NULL). 1017 + * 1018 + * The caller must hold fc->killsb. 1019 + */ 1020 + struct inode *fuse_ilookup(struct fuse_conn *fc, u64 nodeid, 1021 + struct fuse_mount **fm); 1022 + 1023 + /** 1024 * File-system tells the kernel to invalidate cache for the given node id. 1025 */ 1026 + int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid, 1027 loff_t offset, loff_t len); 1028 1029 /** ··· 1026 * - is a file or oan empty directory 1027 * then the dentry is unhashed (d_delete()). 1028 */ 1029 + int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid, 1030 u64 child_nodeid, struct qstr *name); 1031 1032 + int fuse_do_open(struct fuse_mount *fm, u64 nodeid, struct file *file, 1033 bool isdir); 1034 1035 /** ··· 1092 */ 1093 u64 fuse_get_unique(struct fuse_iqueue *fiq); 1094 void fuse_free_conn(struct fuse_conn *fc); 1095 + 1096 + /* dax.c */ 1097 + 1098 + #define FUSE_IS_DAX(inode) (IS_ENABLED(CONFIG_FUSE_DAX) && IS_DAX(inode)) 1099 + 1100 + ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to); 1101 + ssize_t fuse_dax_write_iter(struct kiocb *iocb, struct iov_iter *from); 1102 + int fuse_dax_mmap(struct file *file, struct vm_area_struct *vma); 1103 + int fuse_dax_break_layouts(struct inode *inode, u64 dmap_start, u64 dmap_end); 1104 + int fuse_dax_conn_alloc(struct fuse_conn *fc, struct dax_device *dax_dev); 1105 + void fuse_dax_conn_free(struct fuse_conn *fc); 1106 + bool fuse_dax_inode_alloc(struct super_block *sb, struct fuse_inode *fi); 1107 + void fuse_dax_inode_init(struct inode *inode); 1108 + void fuse_dax_inode_cleanup(struct inode *inode); 1109 + bool fuse_dax_check_alignment(struct fuse_conn *fc, unsigned int map_alignment); 1110 + void fuse_dax_cancel_work(struct fuse_conn *fc); 1111 1112 #endif /* _FS_FUSE_I_H */

+303 -88

fs/fuse/inode.c

··· 85 fi->orig_ino = 0; 86 fi->state = 0; 87 mutex_init(&fi->mutex); 88 spin_lock_init(&fi->lock); 89 fi->forget = fuse_alloc_forget(); 90 - if (!fi->forget) { 91 - kmem_cache_free(fuse_inode_cachep, fi); 92 - return NULL; 93 - } 94 95 return &fi->inode; 96 } 97 98 static void fuse_free_inode(struct inode *inode) ··· 109 110 mutex_destroy(&fi->mutex); 111 kfree(fi->forget); 112 kmem_cache_free(fuse_inode_cachep, fi); 113 } 114 ··· 123 clear_inode(inode); 124 if (inode->i_sb->s_flags & SB_ACTIVE) { 125 struct fuse_conn *fc = get_fuse_conn(inode); 126 - fuse_queue_forget(fc, fi->forget, fi->nodeid, fi->nlookup); 127 - fi->forget = NULL; 128 } 129 if (S_ISREG(inode->i_mode) && !is_bad_inode(inode)) { 130 WARN_ON(!list_empty(&fi->write_files)); ··· 285 BUG(); 286 } 287 288 - int fuse_inode_eq(struct inode *inode, void *_nodeidp) 289 { 290 u64 nodeid = *(u64 *) _nodeidp; 291 if (get_node_id(inode) == nodeid) ··· 309 struct fuse_inode *fi; 310 struct fuse_conn *fc = get_fuse_conn_super(sb); 311 312 - retry: 313 inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid); 314 if (!inode) 315 return NULL; ··· 346 iput(inode); 347 goto retry; 348 } 349 - 350 fi = get_fuse_inode(inode); 351 spin_lock(&fi->lock); 352 fi->nlookup++; ··· 356 return inode; 357 } 358 359 - int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid, 360 loff_t offset, loff_t len) 361 { 362 - struct fuse_conn *fc = get_fuse_conn_super(sb); 363 struct fuse_inode *fi; 364 struct inode *inode; 365 pgoff_t pg_start; 366 pgoff_t pg_end; 367 368 - inode = ilookup5(sb, nodeid, fuse_inode_eq, &nodeid); 369 if (!inode) 370 return -ENOENT; 371 ··· 436 fuse_abort_conn(fc); 437 } 438 439 - static void fuse_send_destroy(struct fuse_conn *fc) 440 { 441 - if (fc->conn_init) { 442 FUSE_ARGS(args); 443 444 args.opcode = FUSE_DESTROY; 445 args.force = true; 446 args.nocreds = true; 447 - fuse_simple_request(fc, &args); 448 } 449 } 450 451 static void fuse_put_super(struct super_block *sb) 452 { 453 - struct fuse_conn *fc = get_fuse_conn_super(sb); 454 455 - mutex_lock(&fuse_mutex); 456 - list_del(&fc->entry); 457 - fuse_ctl_remove_conn(fc); 458 - mutex_unlock(&fuse_mutex); 459 - 460 - fuse_conn_put(fc); 461 } 462 463 static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr) ··· 472 static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf) 473 { 474 struct super_block *sb = dentry->d_sb; 475 - struct fuse_conn *fc = get_fuse_conn_super(sb); 476 FUSE_ARGS(args); 477 struct fuse_statfs_out outarg; 478 int err; 479 480 - if (!fuse_allow_current_process(fc)) { 481 buf->f_type = FUSE_SUPER_MAGIC; 482 return 0; 483 } ··· 489 args.out_numargs = 1; 490 args.out_args[0].size = sizeof(outarg); 491 args.out_args[0].value = &outarg; 492 - err = fuse_simple_request(fc, &args); 493 if (!err) 494 convert_fuse_statfs(buf, &outarg.st); 495 return err; ··· 625 struct super_block *sb = root->d_sb; 626 struct fuse_conn *fc = get_fuse_conn_super(sb); 627 628 - if (fc->no_mount_options) 629 - return 0; 630 631 - seq_printf(m, ",user_id=%u", from_kuid_munged(fc->user_ns, fc->user_id)); 632 - seq_printf(m, ",group_id=%u", from_kgid_munged(fc->user_ns, fc->group_id)); 633 - if (fc->default_permissions) 634 - seq_puts(m, ",default_permissions"); 635 - if (fc->allow_other) 636 - seq_puts(m, ",allow_other"); 637 - if (fc->max_read != ~0) 638 - seq_printf(m, ",max_read=%u", fc->max_read); 639 - if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE) 640 - seq_printf(m, ",blksize=%lu", sb->s_blocksize); 641 return 0; 642 } 643 ··· 673 fpq->connected = 1; 674 } 675 676 - void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns, 677 const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv) 678 { 679 memset(fc, 0, sizeof(*fc)); ··· 701 fc->pid_ns = get_pid_ns(task_active_pid_ns(current)); 702 fc->user_ns = get_user_ns(user_ns); 703 fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ; 704 } 705 EXPORT_SYMBOL_GPL(fuse_conn_init); 706 ··· 714 if (refcount_dec_and_test(&fc->count)) { 715 struct fuse_iqueue *fiq = &fc->iq; 716 717 if (fiq->ops->release) 718 fiq->ops->release(fiq); 719 put_pid_ns(fc->pid_ns); ··· 731 return fc; 732 } 733 EXPORT_SYMBOL_GPL(fuse_conn_get); 734 735 static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode) 736 { ··· 978 struct fuse_init_out out; 979 }; 980 981 - static void process_init_reply(struct fuse_conn *fc, struct fuse_args *args, 982 int error) 983 { 984 struct fuse_init_args *ia = container_of(args, typeof(*ia), args); 985 struct fuse_init_out *arg = &ia->out; 986 987 if (error || arg->major != FUSE_KERNEL_VERSION) 988 - fc->conn_error = 1; 989 else { 990 unsigned long ra_pages; 991 ··· 1035 if (arg->flags & FUSE_HANDLE_KILLPRIV) 1036 fc->handle_killpriv = 1; 1037 if (arg->time_gran && arg->time_gran <= 1000000000) 1038 - fc->sb->s_time_gran = arg->time_gran; 1039 if ((arg->flags & FUSE_POSIX_ACL)) { 1040 fc->default_permissions = 1; 1041 fc->posix_acl = 1; 1042 - fc->sb->s_xattr = fuse_acl_xattr_handlers; 1043 } 1044 if (arg->flags & FUSE_CACHE_SYMLINKS) 1045 fc->cache_symlinks = 1; ··· 1050 min_t(unsigned int, FUSE_MAX_MAX_PAGES, 1051 max_t(unsigned int, arg->max_pages, 1)); 1052 } 1053 } else { 1054 ra_pages = fc->max_read / PAGE_SIZE; 1055 fc->no_lock = 1; 1056 fc->no_flock = 1; 1057 } 1058 1059 - fc->sb->s_bdi->ra_pages = 1060 - min(fc->sb->s_bdi->ra_pages, ra_pages); 1061 fc->minor = arg->minor; 1062 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; 1063 fc->max_write = max_t(unsigned, 4096, fc->max_write); ··· 1070 } 1071 kfree(ia); 1072 1073 fuse_set_initialized(fc); 1074 wake_up_all(&fc->blocked_waitq); 1075 } 1076 1077 - void fuse_send_init(struct fuse_conn *fc) 1078 { 1079 struct fuse_init_args *ia; 1080 ··· 1087 1088 ia->in.major = FUSE_KERNEL_VERSION; 1089 ia->in.minor = FUSE_KERNEL_MINOR_VERSION; 1090 - ia->in.max_readahead = fc->sb->s_bdi->ra_pages * PAGE_SIZE; 1091 ia->in.flags |= 1092 FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | 1093 FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | ··· 1098 FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL | 1099 FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS | 1100 FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA; 1101 ia->args.opcode = FUSE_INIT; 1102 ia->args.in_numargs = 1; 1103 ia->args.in_args[0].size = sizeof(ia->in); ··· 1120 ia->args.nocreds = true; 1121 ia->args.end = process_init_reply; 1122 1123 - if (fuse_simple_background(fc, &ia->args, GFP_KERNEL) != 0) 1124 - process_init_reply(fc, &ia->args, -ENOTCONN); 1125 } 1126 EXPORT_SYMBOL_GPL(fuse_send_init); 1127 ··· 1232 } 1233 EXPORT_SYMBOL_GPL(fuse_dev_free); 1234 1235 int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) 1236 { 1237 struct fuse_dev *fud = NULL; 1238 - struct fuse_conn *fc = get_fuse_conn_super(sb); 1239 struct inode *root; 1240 struct dentry *root_dentry; 1241 int err; ··· 1326 if (sb->s_flags & SB_MANDLOCK) 1327 goto err; 1328 1329 - sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION); 1330 1331 if (ctx->is_bdev) { 1332 #ifdef CONFIG_BLOCK ··· 1341 1342 sb->s_subtype = ctx->subtype; 1343 ctx->subtype = NULL; 1344 - sb->s_magic = FUSE_SUPER_MAGIC; 1345 - sb->s_op = &fuse_super_operations; 1346 - sb->s_xattr = fuse_xattr_handlers; 1347 - sb->s_maxbytes = MAX_LFS_FILESIZE; 1348 - sb->s_time_gran = 1; 1349 - sb->s_export_op = &fuse_export_operations; 1350 - sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE; 1351 - if (sb->s_user_ns != &init_user_ns) 1352 - sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER; 1353 - 1354 - /* 1355 - * If we are not in the initial user namespace posix 1356 - * acls must be translated. 1357 - */ 1358 - if (sb->s_user_ns != &init_user_ns) 1359 - sb->s_xattr = fuse_no_acl_xattr_handlers; 1360 1361 if (ctx->fudptr) { 1362 err = -ENOMEM; 1363 fud = fuse_dev_alloc_install(fc); 1364 if (!fud) 1365 - goto err; 1366 } 1367 1368 fc->dev = sb->s_dev; 1369 - fc->sb = sb; 1370 err = fuse_bdi_init(fc, sb); 1371 if (err) 1372 goto err_dev_free; ··· 1369 fc->allow_other = ctx->allow_other; 1370 fc->user_id = ctx->user_id; 1371 fc->group_id = ctx->group_id; 1372 - fc->max_read = max_t(unsigned, 4096, ctx->max_read); 1373 fc->destroy = ctx->destroy; 1374 fc->no_control = ctx->no_control; 1375 fc->no_force_umount = ctx->no_force_umount; 1376 - fc->no_mount_options = ctx->no_mount_options; 1377 1378 err = -ENOMEM; 1379 root = fuse_get_root_inode(sb, ctx->rootmode); ··· 1406 err_dev_free: 1407 if (fud) 1408 fuse_dev_free(fud); 1409 err: 1410 return err; 1411 } ··· 1420 struct file *file; 1421 int err; 1422 struct fuse_conn *fc; 1423 1424 err = -EINVAL; 1425 file = fget(ctx->fd); ··· 1441 if (!fc) 1442 goto err_fput; 1443 1444 - fuse_conn_init(fc, sb->s_user_ns, &fuse_dev_fiq_ops, NULL); 1445 fc->release = fuse_free_conn; 1446 - sb->s_fs_info = fc; 1447 1448 err = fuse_fill_super_common(sb, ctx); 1449 if (err) ··· 1461 * CPUs after this 1462 */ 1463 fput(file); 1464 - fuse_send_init(get_fuse_conn_super(sb)); 1465 return 0; 1466 1467 err_put_conn: 1468 - fuse_conn_put(fc); 1469 sb->s_fs_info = NULL; 1470 err_fput: 1471 fput(file); ··· 1509 1510 ctx->max_read = ~0; 1511 ctx->blksize = FUSE_DEFAULT_BLKSIZE; 1512 1513 #ifdef CONFIG_BLOCK 1514 if (fc->fs_type == &fuseblk_fs_type) { ··· 1523 return 0; 1524 } 1525 1526 - static void fuse_sb_destroy(struct super_block *sb) 1527 { 1528 - struct fuse_conn *fc = get_fuse_conn_super(sb); 1529 1530 - if (fc) { 1531 - if (fc->destroy) 1532 - fuse_send_destroy(fc); 1533 1534 - fuse_abort_conn(fc); 1535 - fuse_wait_aborted(fc); 1536 1537 - down_write(&fc->killsb); 1538 - fc->sb = NULL; 1539 - up_write(&fc->killsb); 1540 } 1541 } 1542 1543 - void fuse_kill_sb_anon(struct super_block *sb) 1544 { 1545 - fuse_sb_destroy(sb); 1546 kill_anon_super(sb); 1547 } 1548 - EXPORT_SYMBOL_GPL(fuse_kill_sb_anon); 1549 1550 static struct file_system_type fuse_fs_type = { 1551 .owner = THIS_MODULE, ··· 1583 #ifdef CONFIG_BLOCK 1584 static void fuse_kill_sb_blk(struct super_block *sb) 1585 { 1586 - fuse_sb_destroy(sb); 1587 kill_block_super(sb); 1588 } 1589

··· 85 fi->orig_ino = 0; 86 fi->state = 0; 87 mutex_init(&fi->mutex); 88 + init_rwsem(&fi->i_mmap_sem); 89 spin_lock_init(&fi->lock); 90 fi->forget = fuse_alloc_forget(); 91 + if (!fi->forget) 92 + goto out_free; 93 + 94 + if (IS_ENABLED(CONFIG_FUSE_DAX) && !fuse_dax_inode_alloc(sb, fi)) 95 + goto out_free_forget; 96 97 return &fi->inode; 98 + 99 + out_free_forget: 100 + kfree(fi->forget); 101 + out_free: 102 + kmem_cache_free(fuse_inode_cachep, fi); 103 + return NULL; 104 } 105 106 static void fuse_free_inode(struct inode *inode) ··· 101 102 mutex_destroy(&fi->mutex); 103 kfree(fi->forget); 104 + #ifdef CONFIG_FUSE_DAX 105 + kfree(fi->dax); 106 + #endif 107 kmem_cache_free(fuse_inode_cachep, fi); 108 } 109 ··· 112 clear_inode(inode); 113 if (inode->i_sb->s_flags & SB_ACTIVE) { 114 struct fuse_conn *fc = get_fuse_conn(inode); 115 + 116 + if (FUSE_IS_DAX(inode)) 117 + fuse_dax_inode_cleanup(inode); 118 + if (fi->nlookup) { 119 + fuse_queue_forget(fc, fi->forget, fi->nodeid, 120 + fi->nlookup); 121 + fi->forget = NULL; 122 + } 123 } 124 if (S_ISREG(inode->i_mode) && !is_bad_inode(inode)) { 125 WARN_ON(!list_empty(&fi->write_files)); ··· 268 BUG(); 269 } 270 271 + static int fuse_inode_eq(struct inode *inode, void *_nodeidp) 272 { 273 u64 nodeid = *(u64 *) _nodeidp; 274 if (get_node_id(inode) == nodeid) ··· 292 struct fuse_inode *fi; 293 struct fuse_conn *fc = get_fuse_conn_super(sb); 294 295 + /* 296 + * Auto mount points get their node id from the submount root, which is 297 + * not a unique identifier within this filesystem. 298 + * 299 + * To avoid conflicts, do not place submount points into the inode hash 300 + * table. 301 + */ 302 + if (fc->auto_submounts && (attr->flags & FUSE_ATTR_SUBMOUNT) && 303 + S_ISDIR(attr->mode)) { 304 + inode = new_inode(sb); 305 + if (!inode) 306 + return NULL; 307 + 308 + fuse_init_inode(inode, attr); 309 + get_fuse_inode(inode)->nodeid = nodeid; 310 + inode->i_flags |= S_AUTOMOUNT; 311 + goto done; 312 + } 313 + 314 + retry: 315 inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid); 316 if (!inode) 317 return NULL; ··· 310 iput(inode); 311 goto retry; 312 } 313 + done: 314 fi = get_fuse_inode(inode); 315 spin_lock(&fi->lock); 316 fi->nlookup++; ··· 320 return inode; 321 } 322 323 + struct inode *fuse_ilookup(struct fuse_conn *fc, u64 nodeid, 324 + struct fuse_mount **fm) 325 + { 326 + struct fuse_mount *fm_iter; 327 + struct inode *inode; 328 + 329 + WARN_ON(!rwsem_is_locked(&fc->killsb)); 330 + list_for_each_entry(fm_iter, &fc->mounts, fc_entry) { 331 + if (!fm_iter->sb) 332 + continue; 333 + 334 + inode = ilookup5(fm_iter->sb, nodeid, fuse_inode_eq, &nodeid); 335 + if (inode) { 336 + if (fm) 337 + *fm = fm_iter; 338 + return inode; 339 + } 340 + } 341 + 342 + return NULL; 343 + } 344 + 345 + int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid, 346 loff_t offset, loff_t len) 347 { 348 struct fuse_inode *fi; 349 struct inode *inode; 350 pgoff_t pg_start; 351 pgoff_t pg_end; 352 353 + inode = fuse_ilookup(fc, nodeid, NULL); 354 if (!inode) 355 return -ENOENT; 356 ··· 379 fuse_abort_conn(fc); 380 } 381 382 + static void fuse_send_destroy(struct fuse_mount *fm) 383 { 384 + if (fm->fc->conn_init) { 385 FUSE_ARGS(args); 386 387 args.opcode = FUSE_DESTROY; 388 args.force = true; 389 args.nocreds = true; 390 + fuse_simple_request(fm, &args); 391 } 392 } 393 394 static void fuse_put_super(struct super_block *sb) 395 { 396 + struct fuse_mount *fm = get_fuse_mount_super(sb); 397 398 + fuse_mount_put(fm); 399 } 400 401 static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr) ··· 420 static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf) 421 { 422 struct super_block *sb = dentry->d_sb; 423 + struct fuse_mount *fm = get_fuse_mount_super(sb); 424 FUSE_ARGS(args); 425 struct fuse_statfs_out outarg; 426 int err; 427 428 + if (!fuse_allow_current_process(fm->fc)) { 429 buf->f_type = FUSE_SUPER_MAGIC; 430 return 0; 431 } ··· 437 args.out_numargs = 1; 438 args.out_args[0].size = sizeof(outarg); 439 args.out_args[0].value = &outarg; 440 + err = fuse_simple_request(fm, &args); 441 if (!err) 442 convert_fuse_statfs(buf, &outarg.st); 443 return err; ··· 573 struct super_block *sb = root->d_sb; 574 struct fuse_conn *fc = get_fuse_conn_super(sb); 575 576 + if (fc->legacy_opts_show) { 577 + seq_printf(m, ",user_id=%u", 578 + from_kuid_munged(fc->user_ns, fc->user_id)); 579 + seq_printf(m, ",group_id=%u", 580 + from_kgid_munged(fc->user_ns, fc->group_id)); 581 + if (fc->default_permissions) 582 + seq_puts(m, ",default_permissions"); 583 + if (fc->allow_other) 584 + seq_puts(m, ",allow_other"); 585 + if (fc->max_read != ~0) 586 + seq_printf(m, ",max_read=%u", fc->max_read); 587 + if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE) 588 + seq_printf(m, ",blksize=%lu", sb->s_blocksize); 589 + } 590 + #ifdef CONFIG_FUSE_DAX 591 + if (fc->dax) 592 + seq_puts(m, ",dax"); 593 + #endif 594 595 return 0; 596 } 597 ··· 615 fpq->connected = 1; 616 } 617 618 + void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, 619 + struct user_namespace *user_ns, 620 const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv) 621 { 622 memset(fc, 0, sizeof(*fc)); ··· 642 fc->pid_ns = get_pid_ns(task_active_pid_ns(current)); 643 fc->user_ns = get_user_ns(user_ns); 644 fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ; 645 + 646 + INIT_LIST_HEAD(&fc->mounts); 647 + list_add(&fm->fc_entry, &fc->mounts); 648 + fm->fc = fc; 649 + refcount_set(&fm->count, 1); 650 } 651 EXPORT_SYMBOL_GPL(fuse_conn_init); 652 ··· 650 if (refcount_dec_and_test(&fc->count)) { 651 struct fuse_iqueue *fiq = &fc->iq; 652 653 + if (IS_ENABLED(CONFIG_FUSE_DAX)) 654 + fuse_dax_conn_free(fc); 655 if (fiq->ops->release) 656 fiq->ops->release(fiq); 657 put_pid_ns(fc->pid_ns); ··· 665 return fc; 666 } 667 EXPORT_SYMBOL_GPL(fuse_conn_get); 668 + 669 + void fuse_mount_put(struct fuse_mount *fm) 670 + { 671 + if (refcount_dec_and_test(&fm->count)) { 672 + if (fm->fc) 673 + fuse_conn_put(fm->fc); 674 + kfree(fm); 675 + } 676 + } 677 + EXPORT_SYMBOL_GPL(fuse_mount_put); 678 + 679 + struct fuse_mount *fuse_mount_get(struct fuse_mount *fm) 680 + { 681 + refcount_inc(&fm->count); 682 + return fm; 683 + } 684 + EXPORT_SYMBOL_GPL(fuse_mount_get); 685 686 static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode) 687 { ··· 895 struct fuse_init_out out; 896 }; 897 898 + static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, 899 int error) 900 { 901 + struct fuse_conn *fc = fm->fc; 902 struct fuse_init_args *ia = container_of(args, typeof(*ia), args); 903 struct fuse_init_out *arg = &ia->out; 904 + bool ok = true; 905 906 if (error || arg->major != FUSE_KERNEL_VERSION) 907 + ok = false; 908 else { 909 unsigned long ra_pages; 910 ··· 950 if (arg->flags & FUSE_HANDLE_KILLPRIV) 951 fc->handle_killpriv = 1; 952 if (arg->time_gran && arg->time_gran <= 1000000000) 953 + fm->sb->s_time_gran = arg->time_gran; 954 if ((arg->flags & FUSE_POSIX_ACL)) { 955 fc->default_permissions = 1; 956 fc->posix_acl = 1; 957 + fm->sb->s_xattr = fuse_acl_xattr_handlers; 958 } 959 if (arg->flags & FUSE_CACHE_SYMLINKS) 960 fc->cache_symlinks = 1; ··· 965 min_t(unsigned int, FUSE_MAX_MAX_PAGES, 966 max_t(unsigned int, arg->max_pages, 1)); 967 } 968 + if (IS_ENABLED(CONFIG_FUSE_DAX) && 969 + arg->flags & FUSE_MAP_ALIGNMENT && 970 + !fuse_dax_check_alignment(fc, arg->map_alignment)) { 971 + ok = false; 972 + } 973 } else { 974 ra_pages = fc->max_read / PAGE_SIZE; 975 fc->no_lock = 1; 976 fc->no_flock = 1; 977 } 978 979 + fm->sb->s_bdi->ra_pages = 980 + min(fm->sb->s_bdi->ra_pages, ra_pages); 981 fc->minor = arg->minor; 982 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; 983 fc->max_write = max_t(unsigned, 4096, fc->max_write); ··· 980 } 981 kfree(ia); 982 983 + if (!ok) { 984 + fc->conn_init = 0; 985 + fc->conn_error = 1; 986 + } 987 + 988 fuse_set_initialized(fc); 989 wake_up_all(&fc->blocked_waitq); 990 } 991 992 + void fuse_send_init(struct fuse_mount *fm) 993 { 994 struct fuse_init_args *ia; 995 ··· 992 993 ia->in.major = FUSE_KERNEL_VERSION; 994 ia->in.minor = FUSE_KERNEL_MINOR_VERSION; 995 + ia->in.max_readahead = fm->sb->s_bdi->ra_pages * PAGE_SIZE; 996 ia->in.flags |= 997 FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | 998 FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | ··· 1003 FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL | 1004 FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS | 1005 FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA; 1006 + #ifdef CONFIG_FUSE_DAX 1007 + if (fm->fc->dax) 1008 + ia->in.flags |= FUSE_MAP_ALIGNMENT; 1009 + #endif 1010 + if (fm->fc->auto_submounts) 1011 + ia->in.flags |= FUSE_SUBMOUNTS; 1012 + 1013 ia->args.opcode = FUSE_INIT; 1014 ia->args.in_numargs = 1; 1015 ia->args.in_args[0].size = sizeof(ia->in); ··· 1018 ia->args.nocreds = true; 1019 ia->args.end = process_init_reply; 1020 1021 + if (fuse_simple_background(fm, &ia->args, GFP_KERNEL) != 0) 1022 + process_init_reply(fm, &ia->args, -ENOTCONN); 1023 } 1024 EXPORT_SYMBOL_GPL(fuse_send_init); 1025 ··· 1130 } 1131 EXPORT_SYMBOL_GPL(fuse_dev_free); 1132 1133 + static void fuse_fill_attr_from_inode(struct fuse_attr *attr, 1134 + const struct fuse_inode *fi) 1135 + { 1136 + *attr = (struct fuse_attr){ 1137 + .ino = fi->inode.i_ino, 1138 + .size = fi->inode.i_size, 1139 + .blocks = fi->inode.i_blocks, 1140 + .atime = fi->inode.i_atime.tv_sec, 1141 + .mtime = fi->inode.i_mtime.tv_sec, 1142 + .ctime = fi->inode.i_ctime.tv_sec, 1143 + .atimensec = fi->inode.i_atime.tv_nsec, 1144 + .mtimensec = fi->inode.i_mtime.tv_nsec, 1145 + .ctimensec = fi->inode.i_ctime.tv_nsec, 1146 + .mode = fi->inode.i_mode, 1147 + .nlink = fi->inode.i_nlink, 1148 + .uid = fi->inode.i_uid.val, 1149 + .gid = fi->inode.i_gid.val, 1150 + .rdev = fi->inode.i_rdev, 1151 + .blksize = 1u << fi->inode.i_blkbits, 1152 + }; 1153 + } 1154 + 1155 + static void fuse_sb_defaults(struct super_block *sb) 1156 + { 1157 + sb->s_magic = FUSE_SUPER_MAGIC; 1158 + sb->s_op = &fuse_super_operations; 1159 + sb->s_xattr = fuse_xattr_handlers; 1160 + sb->s_maxbytes = MAX_LFS_FILESIZE; 1161 + sb->s_time_gran = 1; 1162 + sb->s_export_op = &fuse_export_operations; 1163 + sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE; 1164 + if (sb->s_user_ns != &init_user_ns) 1165 + sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER; 1166 + sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION); 1167 + 1168 + /* 1169 + * If we are not in the initial user namespace posix 1170 + * acls must be translated. 1171 + */ 1172 + if (sb->s_user_ns != &init_user_ns) 1173 + sb->s_xattr = fuse_no_acl_xattr_handlers; 1174 + } 1175 + 1176 + int fuse_fill_super_submount(struct super_block *sb, 1177 + struct fuse_inode *parent_fi) 1178 + { 1179 + struct fuse_mount *fm = get_fuse_mount_super(sb); 1180 + struct super_block *parent_sb = parent_fi->inode.i_sb; 1181 + struct fuse_attr root_attr; 1182 + struct inode *root; 1183 + 1184 + fuse_sb_defaults(sb); 1185 + fm->sb = sb; 1186 + 1187 + WARN_ON(sb->s_bdi != &noop_backing_dev_info); 1188 + sb->s_bdi = bdi_get(parent_sb->s_bdi); 1189 + 1190 + sb->s_xattr = parent_sb->s_xattr; 1191 + sb->s_time_gran = parent_sb->s_time_gran; 1192 + sb->s_blocksize = parent_sb->s_blocksize; 1193 + sb->s_blocksize_bits = parent_sb->s_blocksize_bits; 1194 + sb->s_subtype = kstrdup(parent_sb->s_subtype, GFP_KERNEL); 1195 + if (parent_sb->s_subtype && !sb->s_subtype) 1196 + return -ENOMEM; 1197 + 1198 + fuse_fill_attr_from_inode(&root_attr, parent_fi); 1199 + root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0); 1200 + /* 1201 + * This inode is just a duplicate, so it is not looked up and 1202 + * its nlookup should not be incremented. fuse_iget() does 1203 + * that, though, so undo it here. 1204 + */ 1205 + get_fuse_inode(root)->nlookup--; 1206 + sb->s_d_op = &fuse_dentry_operations; 1207 + sb->s_root = d_make_root(root); 1208 + if (!sb->s_root) 1209 + return -ENOMEM; 1210 + 1211 + return 0; 1212 + } 1213 + 1214 int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) 1215 { 1216 struct fuse_dev *fud = NULL; 1217 + struct fuse_mount *fm = get_fuse_mount_super(sb); 1218 + struct fuse_conn *fc = fm->fc; 1219 struct inode *root; 1220 struct dentry *root_dentry; 1221 int err; ··· 1142 if (sb->s_flags & SB_MANDLOCK) 1143 goto err; 1144 1145 + fuse_sb_defaults(sb); 1146 1147 if (ctx->is_bdev) { 1148 #ifdef CONFIG_BLOCK ··· 1157 1158 sb->s_subtype = ctx->subtype; 1159 ctx->subtype = NULL; 1160 + if (IS_ENABLED(CONFIG_FUSE_DAX)) { 1161 + err = fuse_dax_conn_alloc(fc, ctx->dax_dev); 1162 + if (err) 1163 + goto err; 1164 + } 1165 1166 if (ctx->fudptr) { 1167 err = -ENOMEM; 1168 fud = fuse_dev_alloc_install(fc); 1169 if (!fud) 1170 + goto err_free_dax; 1171 } 1172 1173 fc->dev = sb->s_dev; 1174 + fm->sb = sb; 1175 err = fuse_bdi_init(fc, sb); 1176 if (err) 1177 goto err_dev_free; ··· 1196 fc->allow_other = ctx->allow_other; 1197 fc->user_id = ctx->user_id; 1198 fc->group_id = ctx->group_id; 1199 + fc->legacy_opts_show = ctx->legacy_opts_show; 1200 + fc->max_read = max_t(unsigned int, 4096, ctx->max_read); 1201 fc->destroy = ctx->destroy; 1202 fc->no_control = ctx->no_control; 1203 fc->no_force_umount = ctx->no_force_umount; 1204 1205 err = -ENOMEM; 1206 root = fuse_get_root_inode(sb, ctx->rootmode); ··· 1233 err_dev_free: 1234 if (fud) 1235 fuse_dev_free(fud); 1236 + err_free_dax: 1237 + if (IS_ENABLED(CONFIG_FUSE_DAX)) 1238 + fuse_dax_conn_free(fc); 1239 err: 1240 return err; 1241 } ··· 1244 struct file *file; 1245 int err; 1246 struct fuse_conn *fc; 1247 + struct fuse_mount *fm; 1248 1249 err = -EINVAL; 1250 file = fget(ctx->fd); ··· 1264 if (!fc) 1265 goto err_fput; 1266 1267 + fm = kzalloc(sizeof(*fm), GFP_KERNEL); 1268 + if (!fm) { 1269 + kfree(fc); 1270 + goto err_fput; 1271 + } 1272 + 1273 + fuse_conn_init(fc, fm, sb->s_user_ns, &fuse_dev_fiq_ops, NULL); 1274 fc->release = fuse_free_conn; 1275 + 1276 + sb->s_fs_info = fm; 1277 1278 err = fuse_fill_super_common(sb, ctx); 1279 if (err) ··· 1277 * CPUs after this 1278 */ 1279 fput(file); 1280 + fuse_send_init(get_fuse_mount_super(sb)); 1281 return 0; 1282 1283 err_put_conn: 1284 + fuse_mount_put(fm); 1285 sb->s_fs_info = NULL; 1286 err_fput: 1287 fput(file); ··· 1325 1326 ctx->max_read = ~0; 1327 ctx->blksize = FUSE_DEFAULT_BLKSIZE; 1328 + ctx->legacy_opts_show = true; 1329 1330 #ifdef CONFIG_BLOCK 1331 if (fc->fs_type == &fuseblk_fs_type) { ··· 1338 return 0; 1339 } 1340 1341 + bool fuse_mount_remove(struct fuse_mount *fm) 1342 { 1343 + struct fuse_conn *fc = fm->fc; 1344 + bool last = false; 1345 1346 + down_write(&fc->killsb); 1347 + list_del_init(&fm->fc_entry); 1348 + if (list_empty(&fc->mounts)) 1349 + last = true; 1350 + up_write(&fc->killsb); 1351 1352 + return last; 1353 + } 1354 + EXPORT_SYMBOL_GPL(fuse_mount_remove); 1355 1356 + void fuse_conn_destroy(struct fuse_mount *fm) 1357 + { 1358 + struct fuse_conn *fc = fm->fc; 1359 + 1360 + if (fc->destroy) 1361 + fuse_send_destroy(fm); 1362 + 1363 + fuse_abort_conn(fc); 1364 + fuse_wait_aborted(fc); 1365 + 1366 + if (!list_empty(&fc->entry)) { 1367 + mutex_lock(&fuse_mutex); 1368 + list_del(&fc->entry); 1369 + fuse_ctl_remove_conn(fc); 1370 + mutex_unlock(&fuse_mutex); 1371 } 1372 } 1373 + EXPORT_SYMBOL_GPL(fuse_conn_destroy); 1374 1375 + static void fuse_kill_sb_anon(struct super_block *sb) 1376 { 1377 + struct fuse_mount *fm = get_fuse_mount_super(sb); 1378 + bool last; 1379 + 1380 + if (fm) { 1381 + last = fuse_mount_remove(fm); 1382 + if (last) 1383 + fuse_conn_destroy(fm); 1384 + } 1385 kill_anon_super(sb); 1386 } 1387 1388 static struct file_system_type fuse_fs_type = { 1389 .owner = THIS_MODULE, ··· 1375 #ifdef CONFIG_BLOCK 1376 static void fuse_kill_sb_blk(struct super_block *sb) 1377 { 1378 + struct fuse_mount *fm = get_fuse_mount_super(sb); 1379 + bool last; 1380 + 1381 + if (fm) { 1382 + last = fuse_mount_remove(fm); 1383 + if (last) 1384 + fuse_conn_destroy(fm); 1385 + } 1386 kill_block_super(sb); 1387 } 1388

+5 -5

fs/fuse/readdir.c

··· 252 static void fuse_force_forget(struct file *file, u64 nodeid) 253 { 254 struct inode *inode = file_inode(file); 255 - struct fuse_conn *fc = get_fuse_conn(inode); 256 struct fuse_forget_in inarg; 257 FUSE_ARGS(args); 258 ··· 266 args.force = true; 267 args.noreply = true; 268 269 - fuse_simple_request(fc, &args); 270 /* ignore errors */ 271 } 272 ··· 320 ssize_t res; 321 struct page *page; 322 struct inode *inode = file_inode(file); 323 - struct fuse_conn *fc = get_fuse_conn(inode); 324 struct fuse_io_args ia = {}; 325 struct fuse_args_pages *ap = &ia.ap; 326 struct fuse_page_desc desc = { .length = PAGE_SIZE }; ··· 337 ap->pages = &page; 338 ap->descs = &desc; 339 if (plus) { 340 - attr_version = fuse_get_attr_version(fc); 341 fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE, 342 FUSE_READDIRPLUS); 343 } else { ··· 345 FUSE_READDIR); 346 } 347 locked = fuse_lock_inode(inode); 348 - res = fuse_simple_request(fc, &ap->args); 349 fuse_unlock_inode(inode, locked); 350 if (res >= 0) { 351 if (!res) {

··· 252 static void fuse_force_forget(struct file *file, u64 nodeid) 253 { 254 struct inode *inode = file_inode(file); 255 + struct fuse_mount *fm = get_fuse_mount(inode); 256 struct fuse_forget_in inarg; 257 FUSE_ARGS(args); 258 ··· 266 args.force = true; 267 args.noreply = true; 268 269 + fuse_simple_request(fm, &args); 270 /* ignore errors */ 271 } 272 ··· 320 ssize_t res; 321 struct page *page; 322 struct inode *inode = file_inode(file); 323 + struct fuse_mount *fm = get_fuse_mount(inode); 324 struct fuse_io_args ia = {}; 325 struct fuse_args_pages *ap = &ia.ap; 326 struct fuse_page_desc desc = { .length = PAGE_SIZE }; ··· 337 ap->pages = &page; 338 ap->descs = &desc; 339 if (plus) { 340 + attr_version = fuse_get_attr_version(fm->fc); 341 fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE, 342 FUSE_READDIRPLUS); 343 } else { ··· 345 FUSE_READDIR); 346 } 347 locked = fuse_lock_inode(inode); 348 + res = fuse_simple_request(fm, &ap->args); 349 fuse_unlock_inode(inode, locked); 350 if (res >= 0) { 351 if (!res) {

+312 -66

fs/fuse/virtio_fs.c

··· 5 */ 6 7 #include <linux/fs.h> 8 #include <linux/module.h> 9 #include <linux/virtio.h> 10 #include <linux/virtio_fs.h> 11 #include <linux/delay.h> 12 #include <linux/fs_context.h> 13 #include <linux/highmem.h> 14 #include "fuse_i.h" 15 16 /* List of virtio-fs device instances and a lock for the list. Also provides ··· 29 VQ_REQUEST 30 }; 31 32 /* Per-virtqueue state */ 33 struct virtio_fs_vq { 34 spinlock_t lock; ··· 43 bool connected; 44 long in_flight; 45 struct completion in_flight_zero; /* No inflight requests */ 46 - char name[24]; 47 } ____cacheline_aligned_in_smp; 48 49 /* A virtio-fs device instance */ ··· 54 struct virtio_fs_vq *vqs; 55 unsigned int nvqs; /* number of virtqueues */ 56 unsigned int num_request_queues; /* number of request queues */ 57 }; 58 59 struct virtio_fs_forget_req { ··· 81 82 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, 83 struct fuse_req *req, bool in_flight); 84 85 static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq) 86 { ··· 340 struct fuse_req *req; 341 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 342 dispatch_work.work); 343 - struct fuse_conn *fc = fsvq->fud->fc; 344 int ret; 345 346 pr_debug("virtio-fs: worker %s called.\n", __func__); ··· 354 355 list_del_init(&req->list); 356 spin_unlock(&fsvq->lock); 357 - fuse_request_end(fc, req); 358 } 359 360 /* Dispatch pending requests */ ··· 385 spin_unlock(&fsvq->lock); 386 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", 387 ret); 388 - fuse_request_end(fc, req); 389 } 390 } 391 } ··· 545 struct virtio_fs_vq *fsvq) 546 { 547 struct fuse_pqueue *fpq = &fsvq->fud->pq; 548 - struct fuse_conn *fc = fsvq->fud->fc; 549 struct fuse_args *args; 550 struct fuse_args_pages *ap; 551 unsigned int len, i, thislen; ··· 577 clear_bit(FR_SENT, &req->flags); 578 spin_unlock(&fpq->lock); 579 580 - fuse_request_end(fc, req); 581 spin_lock(&fsvq->lock); 582 dec_in_flight_req(fsvq); 583 spin_unlock(&fsvq->lock); ··· 645 schedule_work(&fsvq->done_work); 646 } 647 648 /* Initialize virtqueues */ 649 static int virtio_fs_setup_vqs(struct virtio_device *vdev, 650 struct virtio_fs *fs) ··· 680 if (fs->num_request_queues == 0) 681 return -EINVAL; 682 683 - fs->nvqs = 1 + fs->num_request_queues; 684 fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL); 685 if (!fs->vqs) 686 return -ENOMEM; ··· 694 goto out; 695 } 696 697 callbacks[VQ_HIPRIO] = virtio_fs_vq_done; 698 - snprintf(fs->vqs[VQ_HIPRIO].name, sizeof(fs->vqs[VQ_HIPRIO].name), 699 - "hiprio"); 700 names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name; 701 - INIT_WORK(&fs->vqs[VQ_HIPRIO].done_work, virtio_fs_hiprio_done_work); 702 - INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].queued_reqs); 703 - INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].end_reqs); 704 - INIT_DELAYED_WORK(&fs->vqs[VQ_HIPRIO].dispatch_work, 705 - virtio_fs_hiprio_dispatch_work); 706 - init_completion(&fs->vqs[VQ_HIPRIO].in_flight_zero); 707 - spin_lock_init(&fs->vqs[VQ_HIPRIO].lock); 708 709 /* Initialize the requests virtqueues */ 710 for (i = VQ_REQUEST; i < fs->nvqs; i++) { 711 - spin_lock_init(&fs->vqs[i].lock); 712 - INIT_WORK(&fs->vqs[i].done_work, virtio_fs_requests_done_work); 713 - INIT_DELAYED_WORK(&fs->vqs[i].dispatch_work, 714 - virtio_fs_request_dispatch_work); 715 - INIT_LIST_HEAD(&fs->vqs[i].queued_reqs); 716 - INIT_LIST_HEAD(&fs->vqs[i].end_reqs); 717 - init_completion(&fs->vqs[i].in_flight_zero); 718 - snprintf(fs->vqs[i].name, sizeof(fs->vqs[i].name), 719 - "requests.%u", i - VQ_REQUEST); 720 callbacks[i] = virtio_fs_vq_done; 721 names[i] = fs->vqs[i].name; 722 } ··· 733 vdev->config->del_vqs(vdev); 734 } 735 736 static int virtio_fs_probe(struct virtio_device *vdev) 737 { 738 struct virtio_fs *fs; ··· 877 goto out; 878 879 /* TODO vq affinity */ 880 881 /* Bring the device online in case the filesystem is mounted and 882 * requests need to be sent before we return. ··· 1018 spin_unlock(&fiq->lock); 1019 } 1020 1021 /* Return the number of scatter-gather list elements required */ 1022 static unsigned int sg_count_fuse_req(struct fuse_req *req) 1023 { 1024 struct fuse_args *args = req->args; 1025 struct fuse_args_pages *ap = container_of(args, typeof(*ap), args); 1026 - unsigned int total_sgs = 1 /* fuse_in_header */; 1027 1028 if (args->in_numargs - args->in_pages) 1029 total_sgs += 1; 1030 1031 - if (args->in_pages) 1032 - total_sgs += ap->num_pages; 1033 1034 if (!test_bit(FR_ISREPLY, &req->flags)) 1035 return total_sgs; ··· 1058 if (args->out_numargs - args->out_pages) 1059 total_sgs += 1; 1060 1061 - if (args->out_pages) 1062 - total_sgs += ap->num_pages; 1063 1064 return total_sgs; 1065 } ··· 1278 .release = virtio_fs_fiq_release, 1279 }; 1280 1281 - static int virtio_fs_fill_super(struct super_block *sb) 1282 { 1283 - struct fuse_conn *fc = get_fuse_conn_super(sb); 1284 struct virtio_fs *fs = fc->iq.priv; 1285 unsigned int i; 1286 int err; 1287 - struct fuse_fs_context ctx = { 1288 - .rootmode = S_IFDIR, 1289 - .default_permissions = 1, 1290 - .allow_other = 1, 1291 - .max_read = UINT_MAX, 1292 - .blksize = 512, 1293 - .destroy = true, 1294 - .no_control = true, 1295 - .no_force_umount = true, 1296 - .no_mount_options = true, 1297 - }; 1298 1299 mutex_lock(&virtio_fs_mutex); 1300 1301 /* After holding mutex, make sure virtiofs device is still there. ··· 1323 } 1324 1325 /* virtiofs allocates and installs its own fuse devices */ 1326 - ctx.fudptr = NULL; 1327 - err = fuse_fill_super_common(sb, &ctx); 1328 if (err < 0) 1329 goto err_free_fuse_devs; 1330 ··· 1338 1339 /* Previous unmount will stop all queues. Start these again */ 1340 virtio_fs_start_all_queues(fs); 1341 - fuse_send_init(fc); 1342 mutex_unlock(&virtio_fs_mutex); 1343 return 0; 1344 ··· 1349 return err; 1350 } 1351 1352 - static void virtio_kill_sb(struct super_block *sb) 1353 { 1354 - struct fuse_conn *fc = get_fuse_conn_super(sb); 1355 - struct virtio_fs *vfs; 1356 - struct virtio_fs_vq *fsvq; 1357 1358 - /* If mount failed, we can still be called without any fc */ 1359 - if (!fc) 1360 - return fuse_kill_sb_anon(sb); 1361 - 1362 - vfs = fc->iq.priv; 1363 - fsvq = &vfs->vqs[VQ_HIPRIO]; 1364 1365 /* Stop forget queue. Soon destroy will be sent */ 1366 spin_lock(&fsvq->lock); ··· 1367 spin_unlock(&fsvq->lock); 1368 virtio_fs_drain_all_queues(vfs); 1369 1370 - fuse_kill_sb_anon(sb); 1371 1372 - /* fuse_kill_sb_anon() must have sent destroy. Stop all queues 1373 * and drain one more time and free fuse devices. Freeing fuse 1374 * devices will drop their reference on fuse_conn and that in 1375 * turn will drop its reference on virtio_fs object. ··· 1379 virtio_fs_free_devs(vfs); 1380 } 1381 1382 static int virtio_fs_test_super(struct super_block *sb, 1383 struct fs_context *fsc) 1384 { 1385 - struct fuse_conn *fc = fsc->s_fs_info; 1386 1387 - return fc->iq.priv == get_fuse_conn_super(sb)->iq.priv; 1388 } 1389 1390 static int virtio_fs_set_super(struct super_block *sb, ··· 1409 1410 err = get_anon_bdev(&sb->s_dev); 1411 if (!err) 1412 - fuse_conn_get(fsc->s_fs_info); 1413 1414 return err; 1415 } ··· 1419 struct virtio_fs *fs; 1420 struct super_block *sb; 1421 struct fuse_conn *fc; 1422 int err; 1423 1424 /* This gets a reference on virtio_fs object. This ptr gets installed ··· 1440 return -ENOMEM; 1441 } 1442 1443 - fuse_conn_init(fc, get_user_ns(current_user_ns()), &virtio_fs_fiq_ops, 1444 - fs); 1445 fc->release = fuse_free_conn; 1446 fc->delete_stale = true; 1447 1448 - fsc->s_fs_info = fc; 1449 sb = sget_fc(fsc, virtio_fs_test_super, virtio_fs_set_super); 1450 - fuse_conn_put(fc); 1451 if (IS_ERR(sb)) 1452 return PTR_ERR(sb); 1453 1454 if (!sb->s_root) { 1455 - err = virtio_fs_fill_super(sb); 1456 if (err) { 1457 deactivate_locked_super(sb); 1458 return err; ··· 1477 } 1478 1479 static const struct fs_context_operations virtio_fs_context_ops = { 1480 .get_tree = virtio_fs_get_tree, 1481 }; 1482 1483 static int virtio_fs_init_fs_context(struct fs_context *fsc) 1484 { 1485 fsc->ops = &virtio_fs_context_ops; 1486 return 0; 1487 }

··· 5 */ 6 7 #include <linux/fs.h> 8 + #include <linux/dax.h> 9 + #include <linux/pci.h> 10 + #include <linux/pfn_t.h> 11 #include <linux/module.h> 12 #include <linux/virtio.h> 13 #include <linux/virtio_fs.h> 14 #include <linux/delay.h> 15 #include <linux/fs_context.h> 16 + #include <linux/fs_parser.h> 17 #include <linux/highmem.h> 18 + #include <linux/uio.h> 19 #include "fuse_i.h" 20 21 /* List of virtio-fs device instances and a lock for the list. Also provides ··· 24 VQ_REQUEST 25 }; 26 27 + #define VQ_NAME_LEN 24 28 + 29 /* Per-virtqueue state */ 30 struct virtio_fs_vq { 31 spinlock_t lock; ··· 36 bool connected; 37 long in_flight; 38 struct completion in_flight_zero; /* No inflight requests */ 39 + char name[VQ_NAME_LEN]; 40 } ____cacheline_aligned_in_smp; 41 42 /* A virtio-fs device instance */ ··· 47 struct virtio_fs_vq *vqs; 48 unsigned int nvqs; /* number of virtqueues */ 49 unsigned int num_request_queues; /* number of request queues */ 50 + struct dax_device *dax_dev; 51 + 52 + /* DAX memory window where file contents are mapped */ 53 + void *window_kaddr; 54 + phys_addr_t window_phys_addr; 55 + size_t window_len; 56 }; 57 58 struct virtio_fs_forget_req { ··· 68 69 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, 70 struct fuse_req *req, bool in_flight); 71 + 72 + enum { 73 + OPT_DAX, 74 + }; 75 + 76 + static const struct fs_parameter_spec virtio_fs_parameters[] = { 77 + fsparam_flag("dax", OPT_DAX), 78 + {} 79 + }; 80 + 81 + static int virtio_fs_parse_param(struct fs_context *fc, 82 + struct fs_parameter *param) 83 + { 84 + struct fs_parse_result result; 85 + struct fuse_fs_context *ctx = fc->fs_private; 86 + int opt; 87 + 88 + opt = fs_parse(fc, virtio_fs_parameters, param, &result); 89 + if (opt < 0) 90 + return opt; 91 + 92 + switch (opt) { 93 + case OPT_DAX: 94 + ctx->dax = 1; 95 + break; 96 + default: 97 + return -EINVAL; 98 + } 99 + 100 + return 0; 101 + } 102 + 103 + static void virtio_fs_free_fc(struct fs_context *fc) 104 + { 105 + struct fuse_fs_context *ctx = fc->fs_private; 106 + 107 + kfree(ctx); 108 + } 109 110 static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq) 111 { ··· 289 struct fuse_req *req; 290 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 291 dispatch_work.work); 292 int ret; 293 294 pr_debug("virtio-fs: worker %s called.\n", __func__); ··· 304 305 list_del_init(&req->list); 306 spin_unlock(&fsvq->lock); 307 + fuse_request_end(req); 308 } 309 310 /* Dispatch pending requests */ ··· 335 spin_unlock(&fsvq->lock); 336 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", 337 ret); 338 + fuse_request_end(req); 339 } 340 } 341 } ··· 495 struct virtio_fs_vq *fsvq) 496 { 497 struct fuse_pqueue *fpq = &fsvq->fud->pq; 498 struct fuse_args *args; 499 struct fuse_args_pages *ap; 500 unsigned int len, i, thislen; ··· 528 clear_bit(FR_SENT, &req->flags); 529 spin_unlock(&fpq->lock); 530 531 + fuse_request_end(req); 532 spin_lock(&fsvq->lock); 533 dec_in_flight_req(fsvq); 534 spin_unlock(&fsvq->lock); ··· 596 schedule_work(&fsvq->done_work); 597 } 598 599 + static void virtio_fs_init_vq(struct virtio_fs_vq *fsvq, char *name, 600 + int vq_type) 601 + { 602 + strncpy(fsvq->name, name, VQ_NAME_LEN); 603 + spin_lock_init(&fsvq->lock); 604 + INIT_LIST_HEAD(&fsvq->queued_reqs); 605 + INIT_LIST_HEAD(&fsvq->end_reqs); 606 + init_completion(&fsvq->in_flight_zero); 607 + 608 + if (vq_type == VQ_REQUEST) { 609 + INIT_WORK(&fsvq->done_work, virtio_fs_requests_done_work); 610 + INIT_DELAYED_WORK(&fsvq->dispatch_work, 611 + virtio_fs_request_dispatch_work); 612 + } else { 613 + INIT_WORK(&fsvq->done_work, virtio_fs_hiprio_done_work); 614 + INIT_DELAYED_WORK(&fsvq->dispatch_work, 615 + virtio_fs_hiprio_dispatch_work); 616 + } 617 + } 618 + 619 /* Initialize virtqueues */ 620 static int virtio_fs_setup_vqs(struct virtio_device *vdev, 621 struct virtio_fs *fs) ··· 611 if (fs->num_request_queues == 0) 612 return -EINVAL; 613 614 + fs->nvqs = VQ_REQUEST + fs->num_request_queues; 615 fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL); 616 if (!fs->vqs) 617 return -ENOMEM; ··· 625 goto out; 626 } 627 628 + /* Initialize the hiprio/forget request virtqueue */ 629 callbacks[VQ_HIPRIO] = virtio_fs_vq_done; 630 + virtio_fs_init_vq(&fs->vqs[VQ_HIPRIO], "hiprio", VQ_HIPRIO); 631 names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name; 632 633 /* Initialize the requests virtqueues */ 634 for (i = VQ_REQUEST; i < fs->nvqs; i++) { 635 + char vq_name[VQ_NAME_LEN]; 636 + 637 + snprintf(vq_name, VQ_NAME_LEN, "requests.%u", i - VQ_REQUEST); 638 + virtio_fs_init_vq(&fs->vqs[i], vq_name, VQ_REQUEST); 639 callbacks[i] = virtio_fs_vq_done; 640 names[i] = fs->vqs[i].name; 641 } ··· 676 vdev->config->del_vqs(vdev); 677 } 678 679 + /* Map a window offset to a page frame number. The window offset will have 680 + * been produced by .iomap_begin(), which maps a file offset to a window 681 + * offset. 682 + */ 683 + static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, 684 + long nr_pages, void **kaddr, pfn_t *pfn) 685 + { 686 + struct virtio_fs *fs = dax_get_private(dax_dev); 687 + phys_addr_t offset = PFN_PHYS(pgoff); 688 + size_t max_nr_pages = fs->window_len/PAGE_SIZE - pgoff; 689 + 690 + if (kaddr) 691 + *kaddr = fs->window_kaddr + offset; 692 + if (pfn) 693 + *pfn = phys_to_pfn_t(fs->window_phys_addr + offset, 694 + PFN_DEV | PFN_MAP); 695 + return nr_pages > max_nr_pages ? max_nr_pages : nr_pages; 696 + } 697 + 698 + static size_t virtio_fs_copy_from_iter(struct dax_device *dax_dev, 699 + pgoff_t pgoff, void *addr, 700 + size_t bytes, struct iov_iter *i) 701 + { 702 + return copy_from_iter(addr, bytes, i); 703 + } 704 + 705 + static size_t virtio_fs_copy_to_iter(struct dax_device *dax_dev, 706 + pgoff_t pgoff, void *addr, 707 + size_t bytes, struct iov_iter *i) 708 + { 709 + return copy_to_iter(addr, bytes, i); 710 + } 711 + 712 + static int virtio_fs_zero_page_range(struct dax_device *dax_dev, 713 + pgoff_t pgoff, size_t nr_pages) 714 + { 715 + long rc; 716 + void *kaddr; 717 + 718 + rc = dax_direct_access(dax_dev, pgoff, nr_pages, &kaddr, NULL); 719 + if (rc < 0) 720 + return rc; 721 + memset(kaddr, 0, nr_pages << PAGE_SHIFT); 722 + dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT); 723 + return 0; 724 + } 725 + 726 + static const struct dax_operations virtio_fs_dax_ops = { 727 + .direct_access = virtio_fs_direct_access, 728 + .copy_from_iter = virtio_fs_copy_from_iter, 729 + .copy_to_iter = virtio_fs_copy_to_iter, 730 + .zero_page_range = virtio_fs_zero_page_range, 731 + }; 732 + 733 + static void virtio_fs_cleanup_dax(void *data) 734 + { 735 + struct dax_device *dax_dev = data; 736 + 737 + kill_dax(dax_dev); 738 + put_dax(dax_dev); 739 + } 740 + 741 + static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs) 742 + { 743 + struct virtio_shm_region cache_reg; 744 + struct dev_pagemap *pgmap; 745 + bool have_cache; 746 + 747 + if (!IS_ENABLED(CONFIG_FUSE_DAX)) 748 + return 0; 749 + 750 + /* Get cache region */ 751 + have_cache = virtio_get_shm_region(vdev, &cache_reg, 752 + (u8)VIRTIO_FS_SHMCAP_ID_CACHE); 753 + if (!have_cache) { 754 + dev_notice(&vdev->dev, "%s: No cache capability\n", __func__); 755 + return 0; 756 + } 757 + 758 + if (!devm_request_mem_region(&vdev->dev, cache_reg.addr, cache_reg.len, 759 + dev_name(&vdev->dev))) { 760 + dev_warn(&vdev->dev, "could not reserve region addr=0x%llx len=0x%llx\n", 761 + cache_reg.addr, cache_reg.len); 762 + return -EBUSY; 763 + } 764 + 765 + dev_notice(&vdev->dev, "Cache len: 0x%llx @ 0x%llx\n", cache_reg.len, 766 + cache_reg.addr); 767 + 768 + pgmap = devm_kzalloc(&vdev->dev, sizeof(*pgmap), GFP_KERNEL); 769 + if (!pgmap) 770 + return -ENOMEM; 771 + 772 + pgmap->type = MEMORY_DEVICE_FS_DAX; 773 + 774 + /* Ideally we would directly use the PCI BAR resource but 775 + * devm_memremap_pages() wants its own copy in pgmap. So 776 + * initialize a struct resource from scratch (only the start 777 + * and end fields will be used). 778 + */ 779 + pgmap->range = (struct range) { 780 + .start = (phys_addr_t) cache_reg.addr, 781 + .end = (phys_addr_t) cache_reg.addr + cache_reg.len - 1, 782 + }; 783 + pgmap->nr_range = 1; 784 + 785 + fs->window_kaddr = devm_memremap_pages(&vdev->dev, pgmap); 786 + if (IS_ERR(fs->window_kaddr)) 787 + return PTR_ERR(fs->window_kaddr); 788 + 789 + fs->window_phys_addr = (phys_addr_t) cache_reg.addr; 790 + fs->window_len = (phys_addr_t) cache_reg.len; 791 + 792 + dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n", 793 + __func__, fs->window_kaddr, cache_reg.addr, cache_reg.len); 794 + 795 + fs->dax_dev = alloc_dax(fs, NULL, &virtio_fs_dax_ops, 0); 796 + if (IS_ERR(fs->dax_dev)) 797 + return PTR_ERR(fs->dax_dev); 798 + 799 + return devm_add_action_or_reset(&vdev->dev, virtio_fs_cleanup_dax, 800 + fs->dax_dev); 801 + } 802 + 803 static int virtio_fs_probe(struct virtio_device *vdev) 804 { 805 struct virtio_fs *fs; ··· 696 goto out; 697 698 /* TODO vq affinity */ 699 + 700 + ret = virtio_fs_setup_dax(vdev, fs); 701 + if (ret < 0) 702 + goto out_vqs; 703 704 /* Bring the device online in case the filesystem is mounted and 705 * requests need to be sent before we return. ··· 833 spin_unlock(&fiq->lock); 834 } 835 836 + /* Count number of scatter-gather elements required */ 837 + static unsigned int sg_count_fuse_pages(struct fuse_page_desc *page_descs, 838 + unsigned int num_pages, 839 + unsigned int total_len) 840 + { 841 + unsigned int i; 842 + unsigned int this_len; 843 + 844 + for (i = 0; i < num_pages && total_len; i++) { 845 + this_len = min(page_descs[i].length, total_len); 846 + total_len -= this_len; 847 + } 848 + 849 + return i; 850 + } 851 + 852 /* Return the number of scatter-gather list elements required */ 853 static unsigned int sg_count_fuse_req(struct fuse_req *req) 854 { 855 struct fuse_args *args = req->args; 856 struct fuse_args_pages *ap = container_of(args, typeof(*ap), args); 857 + unsigned int size, total_sgs = 1 /* fuse_in_header */; 858 859 if (args->in_numargs - args->in_pages) 860 total_sgs += 1; 861 862 + if (args->in_pages) { 863 + size = args->in_args[args->in_numargs - 1].size; 864 + total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages, 865 + size); 866 + } 867 868 if (!test_bit(FR_ISREPLY, &req->flags)) 869 return total_sgs; ··· 854 if (args->out_numargs - args->out_pages) 855 total_sgs += 1; 856 857 + if (args->out_pages) { 858 + size = args->out_args[args->out_numargs - 1].size; 859 + total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages, 860 + size); 861 + } 862 863 return total_sgs; 864 } ··· 1071 .release = virtio_fs_fiq_release, 1072 }; 1073 1074 + static inline void virtio_fs_ctx_set_defaults(struct fuse_fs_context *ctx) 1075 { 1076 + ctx->rootmode = S_IFDIR; 1077 + ctx->default_permissions = 1; 1078 + ctx->allow_other = 1; 1079 + ctx->max_read = UINT_MAX; 1080 + ctx->blksize = 512; 1081 + ctx->destroy = true; 1082 + ctx->no_control = true; 1083 + ctx->no_force_umount = true; 1084 + } 1085 + 1086 + static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc) 1087 + { 1088 + struct fuse_mount *fm = get_fuse_mount_super(sb); 1089 + struct fuse_conn *fc = fm->fc; 1090 struct virtio_fs *fs = fc->iq.priv; 1091 + struct fuse_fs_context *ctx = fsc->fs_private; 1092 unsigned int i; 1093 int err; 1094 1095 + virtio_fs_ctx_set_defaults(ctx); 1096 mutex_lock(&virtio_fs_mutex); 1097 1098 /* After holding mutex, make sure virtiofs device is still there. ··· 1112 } 1113 1114 /* virtiofs allocates and installs its own fuse devices */ 1115 + ctx->fudptr = NULL; 1116 + if (ctx->dax) 1117 + ctx->dax_dev = fs->dax_dev; 1118 + err = fuse_fill_super_common(sb, ctx); 1119 if (err < 0) 1120 goto err_free_fuse_devs; 1121 ··· 1125 1126 /* Previous unmount will stop all queues. Start these again */ 1127 virtio_fs_start_all_queues(fs); 1128 + fuse_send_init(fm); 1129 mutex_unlock(&virtio_fs_mutex); 1130 return 0; 1131 ··· 1136 return err; 1137 } 1138 1139 + static void virtio_fs_conn_destroy(struct fuse_mount *fm) 1140 { 1141 + struct fuse_conn *fc = fm->fc; 1142 + struct virtio_fs *vfs = fc->iq.priv; 1143 + struct virtio_fs_vq *fsvq = &vfs->vqs[VQ_HIPRIO]; 1144 1145 + /* Stop dax worker. Soon evict_inodes() will be called which 1146 + * will free all memory ranges belonging to all inodes. 1147 + */ 1148 + if (IS_ENABLED(CONFIG_FUSE_DAX)) 1149 + fuse_dax_cancel_work(fc); 1150 1151 /* Stop forget queue. Soon destroy will be sent */ 1152 spin_lock(&fsvq->lock); ··· 1155 spin_unlock(&fsvq->lock); 1156 virtio_fs_drain_all_queues(vfs); 1157 1158 + fuse_conn_destroy(fm); 1159 1160 + /* fuse_conn_destroy() must have sent destroy. Stop all queues 1161 * and drain one more time and free fuse devices. Freeing fuse 1162 * devices will drop their reference on fuse_conn and that in 1163 * turn will drop its reference on virtio_fs object. ··· 1167 virtio_fs_free_devs(vfs); 1168 } 1169 1170 + static void virtio_kill_sb(struct super_block *sb) 1171 + { 1172 + struct fuse_mount *fm = get_fuse_mount_super(sb); 1173 + bool last; 1174 + 1175 + /* If mount failed, we can still be called without any fc */ 1176 + if (fm) { 1177 + last = fuse_mount_remove(fm); 1178 + if (last) 1179 + virtio_fs_conn_destroy(fm); 1180 + } 1181 + kill_anon_super(sb); 1182 + } 1183 + 1184 static int virtio_fs_test_super(struct super_block *sb, 1185 struct fs_context *fsc) 1186 { 1187 + struct fuse_mount *fsc_fm = fsc->s_fs_info; 1188 + struct fuse_mount *sb_fm = get_fuse_mount_super(sb); 1189 1190 + return fsc_fm->fc->iq.priv == sb_fm->fc->iq.priv; 1191 } 1192 1193 static int virtio_fs_set_super(struct super_block *sb, ··· 1182 1183 err = get_anon_bdev(&sb->s_dev); 1184 if (!err) 1185 + fuse_mount_get(fsc->s_fs_info); 1186 1187 return err; 1188 } ··· 1192 struct virtio_fs *fs; 1193 struct super_block *sb; 1194 struct fuse_conn *fc; 1195 + struct fuse_mount *fm; 1196 int err; 1197 1198 /* This gets a reference on virtio_fs object. This ptr gets installed ··· 1212 return -ENOMEM; 1213 } 1214 1215 + fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL); 1216 + if (!fm) { 1217 + mutex_lock(&virtio_fs_mutex); 1218 + virtio_fs_put(fs); 1219 + mutex_unlock(&virtio_fs_mutex); 1220 + kfree(fc); 1221 + return -ENOMEM; 1222 + } 1223 + 1224 + fuse_conn_init(fc, fm, get_user_ns(current_user_ns()), 1225 + &virtio_fs_fiq_ops, fs); 1226 fc->release = fuse_free_conn; 1227 fc->delete_stale = true; 1228 + fc->auto_submounts = true; 1229 1230 + fsc->s_fs_info = fm; 1231 sb = sget_fc(fsc, virtio_fs_test_super, virtio_fs_set_super); 1232 + fuse_mount_put(fm); 1233 if (IS_ERR(sb)) 1234 return PTR_ERR(sb); 1235 1236 if (!sb->s_root) { 1237 + err = virtio_fs_fill_super(sb, fsc); 1238 if (err) { 1239 deactivate_locked_super(sb); 1240 return err; ··· 1239 } 1240 1241 static const struct fs_context_operations virtio_fs_context_ops = { 1242 + .free = virtio_fs_free_fc, 1243 + .parse_param = virtio_fs_parse_param, 1244 .get_tree = virtio_fs_get_tree, 1245 }; 1246 1247 static int virtio_fs_init_fs_context(struct fs_context *fsc) 1248 { 1249 + struct fuse_fs_context *ctx; 1250 + 1251 + ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL); 1252 + if (!ctx) 1253 + return -ENOMEM; 1254 + fsc->fs_private = ctx; 1255 fsc->ops = &virtio_fs_context_ops; 1256 return 0; 1257 }

+17 -17

fs/fuse/xattr.c

··· 14 int fuse_setxattr(struct inode *inode, const char *name, const void *value, 15 size_t size, int flags) 16 { 17 - struct fuse_conn *fc = get_fuse_conn(inode); 18 FUSE_ARGS(args); 19 struct fuse_setxattr_in inarg; 20 int err; 21 22 - if (fc->no_setxattr) 23 return -EOPNOTSUPP; 24 25 memset(&inarg, 0, sizeof(inarg)); ··· 34 args.in_args[1].value = name; 35 args.in_args[2].size = size; 36 args.in_args[2].value = value; 37 - err = fuse_simple_request(fc, &args); 38 if (err == -ENOSYS) { 39 - fc->no_setxattr = 1; 40 err = -EOPNOTSUPP; 41 } 42 if (!err) { ··· 49 ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value, 50 size_t size) 51 { 52 - struct fuse_conn *fc = get_fuse_conn(inode); 53 FUSE_ARGS(args); 54 struct fuse_getxattr_in inarg; 55 struct fuse_getxattr_out outarg; 56 ssize_t ret; 57 58 - if (fc->no_getxattr) 59 return -EOPNOTSUPP; 60 61 memset(&inarg, 0, sizeof(inarg)); ··· 77 args.out_args[0].size = sizeof(outarg); 78 args.out_args[0].value = &outarg; 79 } 80 - ret = fuse_simple_request(fc, &args); 81 if (!ret && !size) 82 ret = min_t(ssize_t, outarg.size, XATTR_SIZE_MAX); 83 if (ret == -ENOSYS) { 84 - fc->no_getxattr = 1; 85 ret = -EOPNOTSUPP; 86 } 87 return ret; ··· 107 ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size) 108 { 109 struct inode *inode = d_inode(entry); 110 - struct fuse_conn *fc = get_fuse_conn(inode); 111 FUSE_ARGS(args); 112 struct fuse_getxattr_in inarg; 113 struct fuse_getxattr_out outarg; 114 ssize_t ret; 115 116 - if (!fuse_allow_current_process(fc)) 117 return -EACCES; 118 119 - if (fc->no_listxattr) 120 return -EOPNOTSUPP; 121 122 memset(&inarg, 0, sizeof(inarg)); ··· 136 args.out_args[0].size = sizeof(outarg); 137 args.out_args[0].value = &outarg; 138 } 139 - ret = fuse_simple_request(fc, &args); 140 if (!ret && !size) 141 ret = min_t(ssize_t, outarg.size, XATTR_LIST_MAX); 142 if (ret > 0 && size) 143 ret = fuse_verify_xattr_list(list, ret); 144 if (ret == -ENOSYS) { 145 - fc->no_listxattr = 1; 146 ret = -EOPNOTSUPP; 147 } 148 return ret; ··· 150 151 int fuse_removexattr(struct inode *inode, const char *name) 152 { 153 - struct fuse_conn *fc = get_fuse_conn(inode); 154 FUSE_ARGS(args); 155 int err; 156 157 - if (fc->no_removexattr) 158 return -EOPNOTSUPP; 159 160 args.opcode = FUSE_REMOVEXATTR; ··· 162 args.in_numargs = 1; 163 args.in_args[0].size = strlen(name) + 1; 164 args.in_args[0].value = name; 165 - err = fuse_simple_request(fc, &args); 166 if (err == -ENOSYS) { 167 - fc->no_removexattr = 1; 168 err = -EOPNOTSUPP; 169 } 170 if (!err) {

··· 14 int fuse_setxattr(struct inode *inode, const char *name, const void *value, 15 size_t size, int flags) 16 { 17 + struct fuse_mount *fm = get_fuse_mount(inode); 18 FUSE_ARGS(args); 19 struct fuse_setxattr_in inarg; 20 int err; 21 22 + if (fm->fc->no_setxattr) 23 return -EOPNOTSUPP; 24 25 memset(&inarg, 0, sizeof(inarg)); ··· 34 args.in_args[1].value = name; 35 args.in_args[2].size = size; 36 args.in_args[2].value = value; 37 + err = fuse_simple_request(fm, &args); 38 if (err == -ENOSYS) { 39 + fm->fc->no_setxattr = 1; 40 err = -EOPNOTSUPP; 41 } 42 if (!err) { ··· 49 ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value, 50 size_t size) 51 { 52 + struct fuse_mount *fm = get_fuse_mount(inode); 53 FUSE_ARGS(args); 54 struct fuse_getxattr_in inarg; 55 struct fuse_getxattr_out outarg; 56 ssize_t ret; 57 58 + if (fm->fc->no_getxattr) 59 return -EOPNOTSUPP; 60 61 memset(&inarg, 0, sizeof(inarg)); ··· 77 args.out_args[0].size = sizeof(outarg); 78 args.out_args[0].value = &outarg; 79 } 80 + ret = fuse_simple_request(fm, &args); 81 if (!ret && !size) 82 ret = min_t(ssize_t, outarg.size, XATTR_SIZE_MAX); 83 if (ret == -ENOSYS) { 84 + fm->fc->no_getxattr = 1; 85 ret = -EOPNOTSUPP; 86 } 87 return ret; ··· 107 ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size) 108 { 109 struct inode *inode = d_inode(entry); 110 + struct fuse_mount *fm = get_fuse_mount(inode); 111 FUSE_ARGS(args); 112 struct fuse_getxattr_in inarg; 113 struct fuse_getxattr_out outarg; 114 ssize_t ret; 115 116 + if (!fuse_allow_current_process(fm->fc)) 117 return -EACCES; 118 119 + if (fm->fc->no_listxattr) 120 return -EOPNOTSUPP; 121 122 memset(&inarg, 0, sizeof(inarg)); ··· 136 args.out_args[0].size = sizeof(outarg); 137 args.out_args[0].value = &outarg; 138 } 139 + ret = fuse_simple_request(fm, &args); 140 if (!ret && !size) 141 ret = min_t(ssize_t, outarg.size, XATTR_LIST_MAX); 142 if (ret > 0 && size) 143 ret = fuse_verify_xattr_list(list, ret); 144 if (ret == -ENOSYS) { 145 + fm->fc->no_listxattr = 1; 146 ret = -EOPNOTSUPP; 147 } 148 return ret; ··· 150 151 int fuse_removexattr(struct inode *inode, const char *name) 152 { 153 + struct fuse_mount *fm = get_fuse_mount(inode); 154 FUSE_ARGS(args); 155 int err; 156 157 + if (fm->fc->no_removexattr) 158 return -EOPNOTSUPP; 159 160 args.opcode = FUSE_REMOVEXATTR; ··· 162 args.in_numargs = 1; 163 args.in_args[0].size = strlen(name) + 1; 164 args.in_args[0].value = name; 165 + err = fuse_simple_request(fm, &args); 166 if (err == -ENOSYS) { 167 + fm->fc->no_removexattr = 1; 168 err = -EOPNOTSUPP; 169 } 170 if (!err) {

+6

include/linux/dax.h

··· 149 struct dax_device *dax_dev, struct writeback_control *wbc); 150 151 struct page *dax_layout_busy_page(struct address_space *mapping); 152 dax_entry_t dax_lock_page(struct page *page); 153 void dax_unlock_page(struct page *page, dax_entry_t cookie); 154 #else ··· 176 } 177 178 static inline struct page *dax_layout_busy_page(struct address_space *mapping) 179 { 180 return NULL; 181 }

··· 149 struct dax_device *dax_dev, struct writeback_control *wbc); 150 151 struct page *dax_layout_busy_page(struct address_space *mapping); 152 + struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t start, loff_t end); 153 dax_entry_t dax_lock_page(struct page *page); 154 void dax_unlock_page(struct page *page, dax_entry_t cookie); 155 #else ··· 175 } 176 177 static inline struct page *dax_layout_busy_page(struct address_space *mapping) 178 + { 179 + return NULL; 180 + } 181 + 182 + static inline struct page *dax_layout_busy_page_range(struct address_space *mapping, pgoff_t start, pgoff_t nr_pages) 183 { 184 return NULL; 185 }

+47 -3

include/uapi/linux/fuse.h

··· 172 * - add FUSE_WRITE_KILL_PRIV flag 173 * - add FUSE_SETUPMAPPING and FUSE_REMOVEMAPPING 174 * - add map_alignment to fuse_init_out, add FUSE_MAP_ALIGNMENT flag 175 */ 176 177 #ifndef _LINUX_FUSE_H ··· 210 #define FUSE_KERNEL_VERSION 7 211 212 /** Minor version number of this interface */ 213 - #define FUSE_KERNEL_MINOR_VERSION 31 214 215 /** The node ID of the root inode */ 216 #define FUSE_ROOT_ID 1 ··· 234 uint32_t gid; 235 uint32_t rdev; 236 uint32_t blksize; 237 - uint32_t padding; 238 }; 239 240 struct fuse_kstatfs { ··· 316 * FUSE_CACHE_SYMLINKS: cache READLINK responses 317 * FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir 318 * FUSE_EXPLICIT_INVAL_DATA: only invalidate cached pages on explicit request 319 - * FUSE_MAP_ALIGNMENT: map_alignment field is valid 320 */ 321 #define FUSE_ASYNC_READ (1 << 0) 322 #define FUSE_POSIX_LOCKS (1 << 1) ··· 348 #define FUSE_NO_OPENDIR_SUPPORT (1 << 24) 349 #define FUSE_EXPLICIT_INVAL_DATA (1 << 25) 350 #define FUSE_MAP_ALIGNMENT (1 << 26) 351 352 /** 353 * CUSE INIT request/reply flags ··· 423 * FUSE_FSYNC_FDATASYNC: Sync data only, not metadata 424 */ 425 #define FUSE_FSYNC_FDATASYNC (1 << 0) 426 427 enum fuse_opcode { 428 FUSE_LOOKUP = 1, ··· 905 uint64_t len; 906 uint64_t flags; 907 }; 908 909 #endif /* _LINUX_FUSE_H */

··· 172 * - add FUSE_WRITE_KILL_PRIV flag 173 * - add FUSE_SETUPMAPPING and FUSE_REMOVEMAPPING 174 * - add map_alignment to fuse_init_out, add FUSE_MAP_ALIGNMENT flag 175 + * 176 + * 7.32 177 + * - add flags to fuse_attr, add FUSE_ATTR_SUBMOUNT, add FUSE_SUBMOUNTS 178 */ 179 180 #ifndef _LINUX_FUSE_H ··· 207 #define FUSE_KERNEL_VERSION 7 208 209 /** Minor version number of this interface */ 210 + #define FUSE_KERNEL_MINOR_VERSION 32 211 212 /** The node ID of the root inode */ 213 #define FUSE_ROOT_ID 1 ··· 231 uint32_t gid; 232 uint32_t rdev; 233 uint32_t blksize; 234 + uint32_t flags; 235 }; 236 237 struct fuse_kstatfs { ··· 313 * FUSE_CACHE_SYMLINKS: cache READLINK responses 314 * FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir 315 * FUSE_EXPLICIT_INVAL_DATA: only invalidate cached pages on explicit request 316 + * FUSE_MAP_ALIGNMENT: init_out.map_alignment contains log2(byte alignment) for 317 + * foffset and moffset fields in struct 318 + * fuse_setupmapping_out and fuse_removemapping_one. 319 + * FUSE_SUBMOUNTS: kernel supports auto-mounting directory submounts 320 */ 321 #define FUSE_ASYNC_READ (1 << 0) 322 #define FUSE_POSIX_LOCKS (1 << 1) ··· 342 #define FUSE_NO_OPENDIR_SUPPORT (1 << 24) 343 #define FUSE_EXPLICIT_INVAL_DATA (1 << 25) 344 #define FUSE_MAP_ALIGNMENT (1 << 26) 345 + #define FUSE_SUBMOUNTS (1 << 27) 346 347 /** 348 * CUSE INIT request/reply flags ··· 416 * FUSE_FSYNC_FDATASYNC: Sync data only, not metadata 417 */ 418 #define FUSE_FSYNC_FDATASYNC (1 << 0) 419 + 420 + /** 421 + * fuse_attr flags 422 + * 423 + * FUSE_ATTR_SUBMOUNT: Object is a submount root 424 + */ 425 + #define FUSE_ATTR_SUBMOUNT (1 << 0) 426 427 enum fuse_opcode { 428 FUSE_LOOKUP = 1, ··· 891 uint64_t len; 892 uint64_t flags; 893 }; 894 + 895 + #define FUSE_SETUPMAPPING_FLAG_WRITE (1ull << 0) 896 + #define FUSE_SETUPMAPPING_FLAG_READ (1ull << 1) 897 + struct fuse_setupmapping_in { 898 + /* An already open handle */ 899 + uint64_t fh; 900 + /* Offset into the file to start the mapping */ 901 + uint64_t foffset; 902 + /* Length of mapping required */ 903 + uint64_t len; 904 + /* Flags, FUSE_SETUPMAPPING_FLAG_* */ 905 + uint64_t flags; 906 + /* Offset in Memory Window */ 907 + uint64_t moffset; 908 + }; 909 + 910 + struct fuse_removemapping_in { 911 + /* number of fuse_removemapping_one follows */ 912 + uint32_t count; 913 + }; 914 + 915 + struct fuse_removemapping_one { 916 + /* Offset into the dax window start the unmapping */ 917 + uint64_t moffset; 918 + /* Length of mapping required */ 919 + uint64_t len; 920 + }; 921 + 922 + #define FUSE_REMOVEMAPPING_MAX_ENTRY \ 923 + (PAGE_SIZE / sizeof(struct fuse_removemapping_one)) 924 925 #endif /* _LINUX_FUSE_H */

+3

include/uapi/linux/virtio_fs.h

··· 16 __le32 num_request_queues; 17 } __attribute__((packed)); 18 19 #endif /* _UAPI_LINUX_VIRTIO_FS_H */

··· 16 __le32 num_request_queues; 17 } __attribute__((packed)); 18 19 + /* For the id field in virtio_pci_shm_cap */ 20 + #define VIRTIO_FS_SHMCAP_ID_CACHE 0 21 + 22 #endif /* _UAPI_LINUX_VIRTIO_FS_H */