Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'fuse-update-6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse

Pull fuse updates from Miklos Szeredi:

- Add page -> folio conversions (Joanne Koong, Josef Bacik)

- Allow max size of fuse requests to be configurable with a sysctl
(Joanne Koong)

- Allow FOPEN_DIRECT_IO to take advantage of async code path (yangyun)

- Fix large kernel reads (like a module load) in virtio_fs (Hou Tao)

- Fix attribute inconsistency in case readdirplus (and plain lookup in
corner cases) is racing with inode eviction (Zhang Tianci)

- Fix a WARN_ON triggered by virtio_fs (Asahi Lina)

* tag 'fuse-update-6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse: (30 commits)
virtiofs: dax: remove ->writepages() callback
fuse: check attributes staleness on fuse_iget()
fuse: remove pages for requests and exclusively use folios
fuse: convert direct io to use folios
mm/writeback: add folio_mark_dirty_lock()
fuse: convert writebacks to use folios
fuse: convert retrieves to use folios
fuse: convert ioctls to use folios
fuse: convert writes (non-writeback) to use folios
fuse: convert reads to use folios
fuse: convert readdir to use folios
fuse: convert readlink to use folios
fuse: convert cuse to use folios
fuse: add support in virtio for requests using folios
fuse: support folios in struct fuse_args_pages and fuse_copy_pages()
fuse: convert fuse_notify_store to use folios
fuse: convert fuse_retrieve to use folios
fuse: use the folio based vmstat helpers
fuse: convert fuse_writepage_need_send to take a folio
fuse: convert fuse_do_readpage to use folios
...

+579 -375
+10
Documentation/admin-guide/sysctl/fs.rst
··· 337 337 on a 64-bit one. 338 338 The current default value for ``max_user_watches`` is 4% of the 339 339 available low memory, divided by the "watch" cost in bytes. 340 + 341 + 5. /proc/sys/fs/fuse - Configuration options for FUSE filesystems 342 + ===================================================================== 343 + 344 + This directory contains the following configuration options for FUSE 345 + filesystems: 346 + 347 + ``/proc/sys/fs/fuse/max_pages_limit`` is a read/write file for 348 + setting/getting the maximum number of pages that can be used for servicing 349 + requests in FUSE.
+1
fs/fuse/Makefile
··· 14 14 fuse-y += iomode.o 15 15 fuse-$(CONFIG_FUSE_DAX) += dax.o 16 16 fuse-$(CONFIG_FUSE_PASSTHROUGH) += passthrough.o 17 + fuse-$(CONFIG_SYSCTL) += sysctl.o 17 18 18 19 virtiofs-y := virtio_fs.o
+15 -14
fs/fuse/cuse.c
··· 303 303 struct fuse_args_pages ap; 304 304 struct cuse_init_in in; 305 305 struct cuse_init_out out; 306 - struct page *page; 307 - struct fuse_page_desc desc; 306 + struct folio *folio; 307 + struct fuse_folio_desc desc; 308 308 }; 309 309 310 310 /** ··· 326 326 struct fuse_args_pages *ap = &ia->ap; 327 327 struct cuse_conn *cc = fc_to_cc(fc), *pos; 328 328 struct cuse_init_out *arg = &ia->out; 329 - struct page *page = ap->pages[0]; 329 + struct folio *folio = ap->folios[0]; 330 330 struct cuse_devinfo devinfo = { }; 331 331 struct device *dev; 332 332 struct cdev *cdev; ··· 343 343 /* parse init reply */ 344 344 cc->unrestricted_ioctl = arg->flags & CUSE_UNRESTRICTED_IOCTL; 345 345 346 - rc = cuse_parse_devinfo(page_address(page), ap->args.out_args[1].size, 346 + rc = cuse_parse_devinfo(folio_address(folio), ap->args.out_args[1].size, 347 347 &devinfo); 348 348 if (rc) 349 349 goto err; ··· 411 411 kobject_uevent(&dev->kobj, KOBJ_ADD); 412 412 out: 413 413 kfree(ia); 414 - __free_page(page); 414 + folio_put(folio); 415 415 return; 416 416 417 417 err_cdev: ··· 429 429 static int cuse_send_init(struct cuse_conn *cc) 430 430 { 431 431 int rc; 432 - struct page *page; 432 + struct folio *folio; 433 433 struct fuse_mount *fm = &cc->fm; 434 434 struct cuse_init_args *ia; 435 435 struct fuse_args_pages *ap; ··· 437 437 BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE); 438 438 439 439 rc = -ENOMEM; 440 - page = alloc_page(GFP_KERNEL | __GFP_ZERO); 441 - if (!page) 440 + 441 + folio = folio_alloc(GFP_KERNEL | __GFP_ZERO, 0); 442 + if (!folio) 442 443 goto err; 443 444 444 445 ia = kzalloc(sizeof(*ia), GFP_KERNEL); 445 446 if (!ia) 446 - goto err_free_page; 447 + goto err_free_folio; 447 448 448 449 ap = &ia->ap; 449 450 ia->in.major = FUSE_KERNEL_VERSION; ··· 460 459 ap->args.out_args[1].size = CUSE_INIT_INFO_MAX; 461 460 ap->args.out_argvar = true; 462 461 ap->args.out_pages = true; 463 - ap->num_pages = 1; 464 - ap->pages = &ia->page; 462 + ap->num_folios = 1; 463 + ap->folios = &ia->folio; 465 464 ap->descs = &ia->desc; 466 - ia->page = page; 465 + ia->folio = folio; 467 466 ia->desc.length = ap->args.out_args[1].size; 468 467 ap->args.end = cuse_process_init_reply; 469 468 470 469 rc = fuse_simple_background(fm, &ap->args, GFP_KERNEL); 471 470 if (rc) { 472 471 kfree(ia); 473 - err_free_page: 474 - __free_page(page); 472 + err_free_folio: 473 + folio_put(folio); 475 474 } 476 475 err: 477 476 return rc;
-11
fs/fuse/dax.c
··· 774 774 return ret; 775 775 } 776 776 777 - static int fuse_dax_writepages(struct address_space *mapping, 778 - struct writeback_control *wbc) 779 - { 780 - 781 - struct inode *inode = mapping->host; 782 - struct fuse_conn *fc = get_fuse_conn(inode); 783 - 784 - return dax_writeback_mapping_range(mapping, fc->dax->dev, wbc); 785 - } 786 - 787 777 static vm_fault_t __fuse_dax_fault(struct vm_fault *vmf, unsigned int order, 788 778 bool write) 789 779 { ··· 1313 1323 } 1314 1324 1315 1325 static const struct address_space_operations fuse_dax_file_aops = { 1316 - .writepages = fuse_dax_writepages, 1317 1326 .direct_IO = noop_direct_IO, 1318 1327 .dirty_folio = noop_dirty_folio, 1319 1328 };
+39 -27
fs/fuse/dev.c
··· 1028 1028 struct fuse_req *req = cs->req; 1029 1029 struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args); 1030 1030 1031 - 1032 - for (i = 0; i < ap->num_pages && (nbytes || zeroing); i++) { 1031 + for (i = 0; i < ap->num_folios && (nbytes || zeroing); i++) { 1033 1032 int err; 1034 1033 unsigned int offset = ap->descs[i].offset; 1035 1034 unsigned int count = min(nbytes, ap->descs[i].length); 1035 + struct page *orig, *pagep; 1036 1036 1037 - err = fuse_copy_page(cs, &ap->pages[i], offset, count, zeroing); 1037 + orig = pagep = &ap->folios[i]->page; 1038 + 1039 + err = fuse_copy_page(cs, &pagep, offset, count, zeroing); 1038 1040 if (err) 1039 1041 return err; 1040 1042 1041 1043 nbytes -= count; 1044 + 1045 + /* 1046 + * fuse_copy_page may have moved a page from a pipe instead of 1047 + * copying into our given page, so update the folios if it was 1048 + * replaced. 1049 + */ 1050 + if (pagep != orig) 1051 + ap->folios[i] = page_folio(pagep); 1042 1052 } 1043 1053 return 0; 1044 1054 } ··· 1664 1654 1665 1655 num = outarg.size; 1666 1656 while (num) { 1657 + struct folio *folio; 1667 1658 struct page *page; 1668 1659 unsigned int this_num; 1669 1660 1670 - err = -ENOMEM; 1671 - page = find_or_create_page(mapping, index, 1672 - mapping_gfp_mask(mapping)); 1673 - if (!page) 1661 + folio = filemap_grab_folio(mapping, index); 1662 + err = PTR_ERR(folio); 1663 + if (IS_ERR(folio)) 1674 1664 goto out_iput; 1675 1665 1676 - this_num = min_t(unsigned, num, PAGE_SIZE - offset); 1666 + page = &folio->page; 1667 + this_num = min_t(unsigned, num, folio_size(folio) - offset); 1677 1668 err = fuse_copy_page(cs, &page, offset, this_num, 0); 1678 - if (!PageUptodate(page) && !err && offset == 0 && 1679 - (this_num == PAGE_SIZE || file_size == end)) { 1680 - zero_user_segment(page, this_num, PAGE_SIZE); 1681 - SetPageUptodate(page); 1669 + if (!folio_test_uptodate(folio) && !err && offset == 0 && 1670 + (this_num == folio_size(folio) || file_size == end)) { 1671 + folio_zero_segment(folio, this_num, folio_size(folio)); 1672 + folio_mark_uptodate(folio); 1682 1673 } 1683 - unlock_page(page); 1684 - put_page(page); 1674 + folio_unlock(folio); 1675 + folio_put(folio); 1685 1676 1686 1677 if (err) 1687 1678 goto out_iput; ··· 1714 1703 struct fuse_retrieve_args *ra = 1715 1704 container_of(args, typeof(*ra), ap.args); 1716 1705 1717 - release_pages(ra->ap.pages, ra->ap.num_pages); 1706 + release_pages(ra->ap.folios, ra->ap.num_folios); 1718 1707 kfree(ra); 1719 1708 } 1720 1709 ··· 1728 1717 unsigned int num; 1729 1718 unsigned int offset; 1730 1719 size_t total_len = 0; 1731 - unsigned int num_pages; 1720 + unsigned int num_pages, cur_pages = 0; 1732 1721 struct fuse_conn *fc = fm->fc; 1733 1722 struct fuse_retrieve_args *ra; 1734 1723 size_t args_size = sizeof(*ra); ··· 1747 1736 num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; 1748 1737 num_pages = min(num_pages, fc->max_pages); 1749 1738 1750 - args_size += num_pages * (sizeof(ap->pages[0]) + sizeof(ap->descs[0])); 1739 + args_size += num_pages * (sizeof(ap->folios[0]) + sizeof(ap->descs[0])); 1751 1740 1752 1741 ra = kzalloc(args_size, GFP_KERNEL); 1753 1742 if (!ra) 1754 1743 return -ENOMEM; 1755 1744 1756 1745 ap = &ra->ap; 1757 - ap->pages = (void *) (ra + 1); 1758 - ap->descs = (void *) (ap->pages + num_pages); 1746 + ap->folios = (void *) (ra + 1); 1747 + ap->descs = (void *) (ap->folios + num_pages); 1759 1748 1760 1749 args = &ap->args; 1761 1750 args->nodeid = outarg->nodeid; ··· 1766 1755 1767 1756 index = outarg->offset >> PAGE_SHIFT; 1768 1757 1769 - while (num && ap->num_pages < num_pages) { 1770 - struct page *page; 1758 + while (num && cur_pages < num_pages) { 1759 + struct folio *folio; 1771 1760 unsigned int this_num; 1772 1761 1773 - page = find_get_page(mapping, index); 1774 - if (!page) 1762 + folio = filemap_get_folio(mapping, index); 1763 + if (IS_ERR(folio)) 1775 1764 break; 1776 1765 1777 1766 this_num = min_t(unsigned, num, PAGE_SIZE - offset); 1778 - ap->pages[ap->num_pages] = page; 1779 - ap->descs[ap->num_pages].offset = offset; 1780 - ap->descs[ap->num_pages].length = this_num; 1781 - ap->num_pages++; 1767 + ap->folios[ap->num_folios] = folio; 1768 + ap->descs[ap->num_folios].offset = offset; 1769 + ap->descs[ap->num_folios].length = this_num; 1770 + ap->num_folios++; 1771 + cur_pages++; 1782 1772 1783 1773 offset = 0; 1784 1774 num -= this_num;
+19 -18
fs/fuse/dir.c
··· 366 366 struct fuse_mount *fm = get_fuse_mount_super(sb); 367 367 FUSE_ARGS(args); 368 368 struct fuse_forget_link *forget; 369 - u64 attr_version; 369 + u64 attr_version, evict_ctr; 370 370 int err; 371 371 372 372 *inode = NULL; ··· 381 381 goto out; 382 382 383 383 attr_version = fuse_get_attr_version(fm->fc); 384 + evict_ctr = fuse_get_evict_ctr(fm->fc); 384 385 385 386 fuse_lookup_init(fm->fc, &args, nodeid, name, outarg); 386 387 err = fuse_simple_request(fm, &args); ··· 399 398 400 399 *inode = fuse_iget(sb, outarg->nodeid, outarg->generation, 401 400 &outarg->attr, ATTR_TIMEOUT(outarg), 402 - attr_version); 401 + attr_version, evict_ctr); 403 402 err = -ENOMEM; 404 403 if (!*inode) { 405 404 fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1); ··· 692 691 ff->nodeid = outentry.nodeid; 693 692 ff->open_flags = outopenp->open_flags; 694 693 inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation, 695 - &outentry.attr, ATTR_TIMEOUT(&outentry), 0); 694 + &outentry.attr, ATTR_TIMEOUT(&outentry), 0, 0); 696 695 if (!inode) { 697 696 flags &= ~(O_CREAT | O_EXCL | O_TRUNC); 698 697 fuse_sync_release(NULL, ff, flags); ··· 823 822 goto out_put_forget_req; 824 823 825 824 inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, 826 - &outarg.attr, ATTR_TIMEOUT(&outarg), 0); 825 + &outarg.attr, ATTR_TIMEOUT(&outarg), 0, 0); 827 826 if (!inode) { 828 827 fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1); 829 828 return -ENOMEM; ··· 1586 1585 return err; 1587 1586 } 1588 1587 1589 - static int fuse_readlink_page(struct inode *inode, struct page *page) 1588 + static int fuse_readlink_page(struct inode *inode, struct folio *folio) 1590 1589 { 1591 1590 struct fuse_mount *fm = get_fuse_mount(inode); 1592 - struct fuse_page_desc desc = { .length = PAGE_SIZE - 1 }; 1591 + struct fuse_folio_desc desc = { .length = PAGE_SIZE - 1 }; 1593 1592 struct fuse_args_pages ap = { 1594 - .num_pages = 1, 1595 - .pages = &page, 1593 + .num_folios = 1, 1594 + .folios = &folio, 1596 1595 .descs = &desc, 1597 1596 }; 1598 1597 char *link; ··· 1615 1614 if (WARN_ON(res >= PAGE_SIZE)) 1616 1615 return -EIO; 1617 1616 1618 - link = page_address(page); 1617 + link = folio_address(folio); 1619 1618 link[res] = '\0'; 1620 1619 1621 1620 return 0; ··· 1625 1624 struct delayed_call *callback) 1626 1625 { 1627 1626 struct fuse_conn *fc = get_fuse_conn(inode); 1628 - struct page *page; 1627 + struct folio *folio; 1629 1628 int err; 1630 1629 1631 1630 err = -EIO; ··· 1639 1638 if (!dentry) 1640 1639 goto out_err; 1641 1640 1642 - page = alloc_page(GFP_KERNEL); 1641 + folio = folio_alloc(GFP_KERNEL, 0); 1643 1642 err = -ENOMEM; 1644 - if (!page) 1643 + if (!folio) 1645 1644 goto out_err; 1646 1645 1647 - err = fuse_readlink_page(inode, page); 1646 + err = fuse_readlink_page(inode, folio); 1648 1647 if (err) { 1649 - __free_page(page); 1648 + folio_put(folio); 1650 1649 goto out_err; 1651 1650 } 1652 1651 1653 - set_delayed_call(callback, page_put_link, page); 1652 + set_delayed_call(callback, page_put_link, &folio->page); 1654 1653 1655 - return page_address(page); 1654 + return folio_address(folio); 1656 1655 1657 1656 out_err: 1658 1657 return ERR_PTR(err); ··· 2029 2028 2030 2029 fuse_change_attributes_common(inode, &outarg.attr, NULL, 2031 2030 ATTR_TIMEOUT(&outarg), 2032 - fuse_get_cache_mask(inode)); 2031 + fuse_get_cache_mask(inode), 0); 2033 2032 oldsize = inode->i_size; 2034 2033 /* see the comment in fuse_change_attributes() */ 2035 2034 if (!is_wb || is_truncate) ··· 2232 2231 2233 2232 static int fuse_symlink_read_folio(struct file *null, struct folio *folio) 2234 2233 { 2235 - int err = fuse_readlink_page(folio->mapping->host, &folio->page); 2234 + int err = fuse_readlink_page(folio->mapping->host, folio); 2236 2235 2237 2236 if (!err) 2238 2237 folio_mark_uptodate(folio);
+259 -192
fs/fuse/file.c
··· 436 436 wpa = rb_entry(n, struct fuse_writepage_args, writepages_entry); 437 437 WARN_ON(get_fuse_inode(wpa->inode) != fi); 438 438 curr_index = wpa->ia.write.in.offset >> PAGE_SHIFT; 439 - if (idx_from >= curr_index + wpa->ia.ap.num_pages) 439 + if (idx_from >= curr_index + wpa->ia.ap.num_folios) 440 440 n = n->rb_right; 441 441 else if (idx_to < curr_index) 442 442 n = n->rb_left; ··· 481 481 struct fuse_inode *fi = get_fuse_inode(inode); 482 482 483 483 wait_event(fi->page_waitq, !fuse_page_is_writeback(inode, index)); 484 + } 485 + 486 + static inline bool fuse_folio_is_writeback(struct inode *inode, 487 + struct folio *folio) 488 + { 489 + pgoff_t last = folio_next_index(folio) - 1; 490 + return fuse_range_is_writeback(inode, folio_index(folio), last); 491 + } 492 + 493 + static void fuse_wait_on_folio_writeback(struct inode *inode, 494 + struct folio *folio) 495 + { 496 + struct fuse_inode *fi = get_fuse_inode(inode); 497 + 498 + wait_event(fi->page_waitq, !fuse_folio_is_writeback(inode, folio)); 484 499 } 485 500 486 501 /* ··· 660 645 args->out_args[0].size = count; 661 646 } 662 647 663 - static void fuse_release_user_pages(struct fuse_args_pages *ap, 648 + static void fuse_release_user_pages(struct fuse_args_pages *ap, ssize_t nres, 664 649 bool should_dirty) 665 650 { 666 651 unsigned int i; 667 652 668 - for (i = 0; i < ap->num_pages; i++) { 653 + for (i = 0; i < ap->num_folios; i++) { 669 654 if (should_dirty) 670 - set_page_dirty_lock(ap->pages[i]); 655 + folio_mark_dirty_lock(ap->folios[i]); 671 656 if (ap->args.is_pinned) 672 - unpin_user_page(ap->pages[i]); 657 + unpin_folio(ap->folios[i]); 673 658 } 659 + 660 + if (nres > 0 && ap->args.invalidate_vmap) 661 + invalidate_kernel_vmap_range(ap->args.vmap_base, nres); 674 662 } 675 663 676 664 static void fuse_io_release(struct kref *kref) ··· 743 725 } 744 726 745 727 static struct fuse_io_args *fuse_io_alloc(struct fuse_io_priv *io, 746 - unsigned int npages) 728 + unsigned int nfolios) 747 729 { 748 730 struct fuse_io_args *ia; 749 731 750 732 ia = kzalloc(sizeof(*ia), GFP_KERNEL); 751 733 if (ia) { 752 734 ia->io = io; 753 - ia->ap.pages = fuse_pages_alloc(npages, GFP_KERNEL, 754 - &ia->ap.descs); 755 - if (!ia->ap.pages) { 735 + ia->ap.folios = fuse_folios_alloc(nfolios, GFP_KERNEL, 736 + &ia->ap.descs); 737 + if (!ia->ap.folios) { 756 738 kfree(ia); 757 739 ia = NULL; 758 740 } ··· 762 744 763 745 static void fuse_io_free(struct fuse_io_args *ia) 764 746 { 765 - kfree(ia->ap.pages); 747 + kfree(ia->ap.folios); 766 748 kfree(ia); 767 749 } 768 750 ··· 772 754 struct fuse_io_args *ia = container_of(args, typeof(*ia), ap.args); 773 755 struct fuse_io_priv *io = ia->io; 774 756 ssize_t pos = -1; 775 - 776 - fuse_release_user_pages(&ia->ap, io->should_dirty); 757 + size_t nres; 777 758 778 759 if (err) { 779 760 /* Nothing */ 780 761 } else if (io->write) { 781 762 if (ia->write.out.size > ia->write.in.size) { 782 763 err = -EIO; 783 - } else if (ia->write.in.size != ia->write.out.size) { 784 - pos = ia->write.in.offset - io->offset + 785 - ia->write.out.size; 764 + } else { 765 + nres = ia->write.out.size; 766 + if (ia->write.in.size != ia->write.out.size) 767 + pos = ia->write.in.offset - io->offset + 768 + ia->write.out.size; 786 769 } 787 770 } else { 788 771 u32 outsize = args->out_args[0].size; 789 772 773 + nres = outsize; 790 774 if (ia->read.in.size != outsize) 791 775 pos = ia->read.in.offset - io->offset + outsize; 792 776 } 777 + 778 + fuse_release_user_pages(&ia->ap, err ?: nres, io->should_dirty); 793 779 794 780 fuse_aio_complete(io, err, pos); 795 781 fuse_io_free(ia); ··· 865 843 * reached the client fs yet. So the hole is not present there. 866 844 */ 867 845 if (!fc->writeback_cache) { 868 - loff_t pos = page_offset(ap->pages[0]) + num_read; 846 + loff_t pos = folio_pos(ap->folios[0]) + num_read; 869 847 fuse_read_update_size(inode, pos, attr_ver); 870 848 } 871 849 } 872 850 873 - static int fuse_do_readpage(struct file *file, struct page *page) 851 + static int fuse_do_readfolio(struct file *file, struct folio *folio) 874 852 { 875 - struct inode *inode = page->mapping->host; 853 + struct inode *inode = folio->mapping->host; 876 854 struct fuse_mount *fm = get_fuse_mount(inode); 877 - loff_t pos = page_offset(page); 878 - struct fuse_page_desc desc = { .length = PAGE_SIZE }; 855 + loff_t pos = folio_pos(folio); 856 + struct fuse_folio_desc desc = { .length = PAGE_SIZE }; 879 857 struct fuse_io_args ia = { 880 858 .ap.args.page_zeroing = true, 881 859 .ap.args.out_pages = true, 882 - .ap.num_pages = 1, 883 - .ap.pages = &page, 860 + .ap.num_folios = 1, 861 + .ap.folios = &folio, 884 862 .ap.descs = &desc, 885 863 }; 886 864 ssize_t res; 887 865 u64 attr_ver; 888 866 889 867 /* 890 - * Page writeback can extend beyond the lifetime of the 891 - * page-cache page, so make sure we read a properly synced 892 - * page. 868 + * With the temporary pages that are used to complete writeback, we can 869 + * have writeback that extends beyond the lifetime of the folio. So 870 + * make sure we read a properly synced folio. 893 871 */ 894 - fuse_wait_on_page_writeback(inode, page->index); 872 + fuse_wait_on_folio_writeback(inode, folio); 895 873 896 874 attr_ver = fuse_get_attr_version(fm->fc); 897 875 ··· 909 887 if (res < desc.length) 910 888 fuse_short_read(inode, attr_ver, res, &ia.ap); 911 889 912 - SetPageUptodate(page); 890 + folio_mark_uptodate(folio); 913 891 914 892 return 0; 915 893 } 916 894 917 895 static int fuse_read_folio(struct file *file, struct folio *folio) 918 896 { 919 - struct page *page = &folio->page; 920 - struct inode *inode = page->mapping->host; 897 + struct inode *inode = folio->mapping->host; 921 898 int err; 922 899 923 900 err = -EIO; 924 901 if (fuse_is_bad(inode)) 925 902 goto out; 926 903 927 - err = fuse_do_readpage(file, page); 904 + err = fuse_do_readfolio(file, folio); 928 905 fuse_invalidate_atime(inode); 929 906 out: 930 - unlock_page(page); 907 + folio_unlock(folio); 931 908 return err; 932 909 } 933 910 ··· 940 919 size_t num_read = args->out_args[0].size; 941 920 struct address_space *mapping = NULL; 942 921 943 - for (i = 0; mapping == NULL && i < ap->num_pages; i++) 944 - mapping = ap->pages[i]->mapping; 922 + for (i = 0; mapping == NULL && i < ap->num_folios; i++) 923 + mapping = ap->folios[i]->mapping; 945 924 946 925 if (mapping) { 947 926 struct inode *inode = mapping->host; ··· 955 934 fuse_invalidate_atime(inode); 956 935 } 957 936 958 - for (i = 0; i < ap->num_pages; i++) { 959 - struct folio *folio = page_folio(ap->pages[i]); 960 - 961 - folio_end_read(folio, !err); 962 - folio_put(folio); 963 - } 937 + for (i = 0; i < ap->num_folios; i++) 938 + folio_end_read(ap->folios[i], !err); 964 939 if (ia->ff) 965 940 fuse_file_put(ia->ff, false); 966 941 ··· 968 951 struct fuse_file *ff = file->private_data; 969 952 struct fuse_mount *fm = ff->fm; 970 953 struct fuse_args_pages *ap = &ia->ap; 971 - loff_t pos = page_offset(ap->pages[0]); 972 - size_t count = ap->num_pages << PAGE_SHIFT; 954 + loff_t pos = folio_pos(ap->folios[0]); 955 + /* Currently, all folios in FUSE are one page */ 956 + size_t count = ap->num_folios << PAGE_SHIFT; 973 957 ssize_t res; 974 958 int err; 975 959 ··· 981 963 /* Don't overflow end offset */ 982 964 if (pos + (count - 1) == LLONG_MAX) { 983 965 count--; 984 - ap->descs[ap->num_pages - 1].length--; 966 + ap->descs[ap->num_folios - 1].length--; 985 967 } 986 968 WARN_ON((loff_t) (pos + count) < 0); 987 969 ··· 1003 985 static void fuse_readahead(struct readahead_control *rac) 1004 986 { 1005 987 struct inode *inode = rac->mapping->host; 988 + struct fuse_inode *fi = get_fuse_inode(inode); 1006 989 struct fuse_conn *fc = get_fuse_conn(inode); 1007 - unsigned int i, max_pages, nr_pages = 0; 990 + unsigned int max_pages, nr_pages; 991 + pgoff_t first = readahead_index(rac); 992 + pgoff_t last = first + readahead_count(rac) - 1; 1008 993 1009 994 if (fuse_is_bad(inode)) 1010 995 return; 1011 996 997 + wait_event(fi->page_waitq, !fuse_range_is_writeback(inode, first, last)); 998 + 1012 999 max_pages = min_t(unsigned int, fc->max_pages, 1013 1000 fc->max_read / PAGE_SIZE); 1014 1001 1015 - for (;;) { 1002 + /* 1003 + * This is only accurate the first time through, since readahead_folio() 1004 + * doesn't update readahead_count() from the previous folio until the 1005 + * next call. Grab nr_pages here so we know how many pages we're going 1006 + * to have to process. This means that we will exit here with 1007 + * readahead_count() == folio_nr_pages(last_folio), but we will have 1008 + * consumed all of the folios, and read_pages() will call 1009 + * readahead_folio() again which will clean up the rac. 1010 + */ 1011 + nr_pages = readahead_count(rac); 1012 + 1013 + while (nr_pages) { 1016 1014 struct fuse_io_args *ia; 1017 1015 struct fuse_args_pages *ap; 1016 + struct folio *folio; 1017 + unsigned cur_pages = min(max_pages, nr_pages); 1018 1018 1019 1019 if (fc->num_background >= fc->congestion_threshold && 1020 1020 rac->ra->async_size >= readahead_count(rac)) ··· 1042 1006 */ 1043 1007 break; 1044 1008 1045 - nr_pages = readahead_count(rac) - nr_pages; 1046 - if (nr_pages > max_pages) 1047 - nr_pages = max_pages; 1048 - if (nr_pages == 0) 1049 - break; 1050 - ia = fuse_io_alloc(NULL, nr_pages); 1009 + ia = fuse_io_alloc(NULL, cur_pages); 1051 1010 if (!ia) 1052 1011 return; 1053 1012 ap = &ia->ap; 1054 - nr_pages = __readahead_batch(rac, ap->pages, nr_pages); 1055 - for (i = 0; i < nr_pages; i++) { 1056 - fuse_wait_on_page_writeback(inode, 1057 - readahead_index(rac) + i); 1058 - ap->descs[i].length = PAGE_SIZE; 1013 + 1014 + while (ap->num_folios < cur_pages) { 1015 + folio = readahead_folio(rac); 1016 + ap->folios[ap->num_folios] = folio; 1017 + ap->descs[ap->num_folios].length = folio_size(folio); 1018 + ap->num_folios++; 1059 1019 } 1060 - ap->num_pages = nr_pages; 1061 1020 fuse_send_readpages(ia, rac->file); 1021 + nr_pages -= cur_pages; 1062 1022 } 1063 1023 } 1064 1024 ··· 1171 1139 bool short_write; 1172 1140 int err; 1173 1141 1174 - for (i = 0; i < ap->num_pages; i++) 1175 - fuse_wait_on_page_writeback(inode, ap->pages[i]->index); 1142 + for (i = 0; i < ap->num_folios; i++) 1143 + fuse_wait_on_folio_writeback(inode, ap->folios[i]); 1176 1144 1177 1145 fuse_write_args_fill(ia, ff, pos, count); 1178 1146 ia->write.in.flags = fuse_write_flags(iocb); ··· 1186 1154 short_write = ia->write.out.size < count; 1187 1155 offset = ap->descs[0].offset; 1188 1156 count = ia->write.out.size; 1189 - for (i = 0; i < ap->num_pages; i++) { 1190 - struct page *page = ap->pages[i]; 1157 + for (i = 0; i < ap->num_folios; i++) { 1158 + struct folio *folio = ap->folios[i]; 1191 1159 1192 1160 if (err) { 1193 - ClearPageUptodate(page); 1161 + folio_clear_uptodate(folio); 1194 1162 } else { 1195 - if (count >= PAGE_SIZE - offset) 1196 - count -= PAGE_SIZE - offset; 1163 + if (count >= folio_size(folio) - offset) 1164 + count -= folio_size(folio) - offset; 1197 1165 else { 1198 1166 if (short_write) 1199 - ClearPageUptodate(page); 1167 + folio_clear_uptodate(folio); 1200 1168 count = 0; 1201 1169 } 1202 1170 offset = 0; 1203 1171 } 1204 - if (ia->write.page_locked && (i == ap->num_pages - 1)) 1205 - unlock_page(page); 1206 - put_page(page); 1172 + if (ia->write.folio_locked && (i == ap->num_folios - 1)) 1173 + folio_unlock(folio); 1174 + folio_put(folio); 1207 1175 } 1208 1176 1209 1177 return err; ··· 1217 1185 struct fuse_args_pages *ap = &ia->ap; 1218 1186 struct fuse_conn *fc = get_fuse_conn(mapping->host); 1219 1187 unsigned offset = pos & (PAGE_SIZE - 1); 1188 + unsigned int nr_pages = 0; 1220 1189 size_t count = 0; 1221 1190 int err; 1222 1191 ··· 1226 1193 1227 1194 do { 1228 1195 size_t tmp; 1229 - struct page *page; 1196 + struct folio *folio; 1230 1197 pgoff_t index = pos >> PAGE_SHIFT; 1231 1198 size_t bytes = min_t(size_t, PAGE_SIZE - offset, 1232 1199 iov_iter_count(ii)); ··· 1238 1205 if (fault_in_iov_iter_readable(ii, bytes)) 1239 1206 break; 1240 1207 1241 - err = -ENOMEM; 1242 - page = grab_cache_page_write_begin(mapping, index); 1243 - if (!page) 1208 + folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN, 1209 + mapping_gfp_mask(mapping)); 1210 + if (IS_ERR(folio)) { 1211 + err = PTR_ERR(folio); 1244 1212 break; 1213 + } 1245 1214 1246 1215 if (mapping_writably_mapped(mapping)) 1247 - flush_dcache_page(page); 1216 + flush_dcache_folio(folio); 1248 1217 1249 - tmp = copy_page_from_iter_atomic(page, offset, bytes, ii); 1250 - flush_dcache_page(page); 1218 + tmp = copy_folio_from_iter_atomic(folio, offset, bytes, ii); 1219 + flush_dcache_folio(folio); 1251 1220 1252 1221 if (!tmp) { 1253 - unlock_page(page); 1254 - put_page(page); 1222 + folio_unlock(folio); 1223 + folio_put(folio); 1255 1224 goto again; 1256 1225 } 1257 1226 1258 1227 err = 0; 1259 - ap->pages[ap->num_pages] = page; 1260 - ap->descs[ap->num_pages].length = tmp; 1261 - ap->num_pages++; 1228 + ap->folios[ap->num_folios] = folio; 1229 + ap->descs[ap->num_folios].length = tmp; 1230 + ap->num_folios++; 1231 + nr_pages++; 1262 1232 1263 1233 count += tmp; 1264 1234 pos += tmp; ··· 1271 1235 1272 1236 /* If we copied full page, mark it uptodate */ 1273 1237 if (tmp == PAGE_SIZE) 1274 - SetPageUptodate(page); 1238 + folio_mark_uptodate(folio); 1275 1239 1276 - if (PageUptodate(page)) { 1277 - unlock_page(page); 1240 + if (folio_test_uptodate(folio)) { 1241 + folio_unlock(folio); 1278 1242 } else { 1279 - ia->write.page_locked = true; 1243 + ia->write.folio_locked = true; 1280 1244 break; 1281 1245 } 1282 1246 if (!fc->big_writes) 1283 1247 break; 1284 1248 } while (iov_iter_count(ii) && count < fc->max_write && 1285 - ap->num_pages < max_pages && offset == 0); 1249 + nr_pages < max_pages && offset == 0); 1286 1250 1287 1251 return count > 0 ? count : err; 1288 1252 } ··· 1316 1280 unsigned int nr_pages = fuse_wr_pages(pos, iov_iter_count(ii), 1317 1281 fc->max_pages); 1318 1282 1319 - ap->pages = fuse_pages_alloc(nr_pages, GFP_KERNEL, &ap->descs); 1320 - if (!ap->pages) { 1283 + ap->folios = fuse_folios_alloc(nr_pages, GFP_KERNEL, &ap->descs); 1284 + if (!ap->folios) { 1321 1285 err = -ENOMEM; 1322 1286 break; 1323 1287 } ··· 1339 1303 err = -EIO; 1340 1304 } 1341 1305 } 1342 - kfree(ap->pages); 1306 + kfree(ap->folios); 1343 1307 } while (!err && iov_iter_count(ii)); 1344 1308 1345 1309 fuse_write_update_attr(inode, pos, res); ··· 1466 1430 1467 1431 task_io_account_write(count); 1468 1432 1469 - err = file_remove_privs(file); 1470 - if (err) 1471 - goto out; 1472 - 1473 - err = file_update_time(file); 1433 + err = kiocb_modified(iocb); 1474 1434 if (err) 1475 1435 goto out; 1476 1436 ··· 1500 1468 1501 1469 static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii, 1502 1470 size_t *nbytesp, int write, 1503 - unsigned int max_pages) 1471 + unsigned int max_pages, 1472 + bool use_pages_for_kvec_io) 1504 1473 { 1474 + bool flush_or_invalidate = false; 1475 + unsigned int nr_pages = 0; 1505 1476 size_t nbytes = 0; /* # bytes already packed in req */ 1506 1477 ssize_t ret = 0; 1507 1478 1508 - /* Special case for kernel I/O: can copy directly into the buffer */ 1479 + /* Special case for kernel I/O: can copy directly into the buffer. 1480 + * However if the implementation of fuse_conn requires pages instead of 1481 + * pointer (e.g., virtio-fs), use iov_iter_extract_pages() instead. 1482 + */ 1509 1483 if (iov_iter_is_kvec(ii)) { 1510 - unsigned long user_addr = fuse_get_user_addr(ii); 1511 - size_t frag_size = fuse_get_frag_size(ii, *nbytesp); 1484 + void *user_addr = (void *)fuse_get_user_addr(ii); 1512 1485 1513 - if (write) 1514 - ap->args.in_args[1].value = (void *) user_addr; 1515 - else 1516 - ap->args.out_args[0].value = (void *) user_addr; 1486 + if (!use_pages_for_kvec_io) { 1487 + size_t frag_size = fuse_get_frag_size(ii, *nbytesp); 1517 1488 1518 - iov_iter_advance(ii, frag_size); 1519 - *nbytesp = frag_size; 1520 - return 0; 1489 + if (write) 1490 + ap->args.in_args[1].value = user_addr; 1491 + else 1492 + ap->args.out_args[0].value = user_addr; 1493 + 1494 + iov_iter_advance(ii, frag_size); 1495 + *nbytesp = frag_size; 1496 + return 0; 1497 + } 1498 + 1499 + if (is_vmalloc_addr(user_addr)) { 1500 + ap->args.vmap_base = user_addr; 1501 + flush_or_invalidate = true; 1502 + } 1521 1503 } 1522 1504 1523 - while (nbytes < *nbytesp && ap->num_pages < max_pages) { 1524 - unsigned npages; 1525 - size_t start; 1526 - struct page **pt_pages; 1505 + /* 1506 + * Until there is support for iov_iter_extract_folios(), we have to 1507 + * manually extract pages using iov_iter_extract_pages() and then 1508 + * copy that to a folios array. 1509 + */ 1510 + struct page **pages = kzalloc(max_pages * sizeof(struct page *), 1511 + GFP_KERNEL); 1512 + if (!pages) 1513 + return -ENOMEM; 1527 1514 1528 - pt_pages = &ap->pages[ap->num_pages]; 1529 - ret = iov_iter_extract_pages(ii, &pt_pages, 1515 + while (nbytes < *nbytesp && nr_pages < max_pages) { 1516 + unsigned nfolios, i; 1517 + size_t start; 1518 + 1519 + ret = iov_iter_extract_pages(ii, &pages, 1530 1520 *nbytesp - nbytes, 1531 - max_pages - ap->num_pages, 1521 + max_pages - nr_pages, 1532 1522 0, &start); 1533 1523 if (ret < 0) 1534 1524 break; ··· 1558 1504 nbytes += ret; 1559 1505 1560 1506 ret += start; 1561 - npages = DIV_ROUND_UP(ret, PAGE_SIZE); 1507 + /* Currently, all folios in FUSE are one page */ 1508 + nfolios = DIV_ROUND_UP(ret, PAGE_SIZE); 1562 1509 1563 - ap->descs[ap->num_pages].offset = start; 1564 - fuse_page_descs_length_init(ap->descs, ap->num_pages, npages); 1510 + ap->descs[ap->num_folios].offset = start; 1511 + fuse_folio_descs_length_init(ap->descs, ap->num_folios, nfolios); 1512 + for (i = 0; i < nfolios; i++) 1513 + ap->folios[i + ap->num_folios] = page_folio(pages[i]); 1565 1514 1566 - ap->num_pages += npages; 1567 - ap->descs[ap->num_pages - 1].length -= 1515 + ap->num_folios += nfolios; 1516 + ap->descs[ap->num_folios - 1].length -= 1568 1517 (PAGE_SIZE - ret) & (PAGE_SIZE - 1); 1518 + nr_pages += nfolios; 1569 1519 } 1520 + kfree(pages); 1570 1521 1522 + if (write && flush_or_invalidate) 1523 + flush_kernel_vmap_range(ap->args.vmap_base, nbytes); 1524 + 1525 + ap->args.invalidate_vmap = !write && flush_or_invalidate; 1571 1526 ap->args.is_pinned = iov_iter_extract_will_pin(ii); 1572 1527 ap->args.user_pages = true; 1573 1528 if (write) ··· 1645 1582 size_t nbytes = min(count, nmax); 1646 1583 1647 1584 err = fuse_get_user_pages(&ia->ap, iter, &nbytes, write, 1648 - max_pages); 1585 + max_pages, fc->use_pages_for_kvec_io); 1649 1586 if (err && !nbytes) 1650 1587 break; 1651 1588 ··· 1659 1596 } 1660 1597 1661 1598 if (!io->async || nres < 0) { 1662 - fuse_release_user_pages(&ia->ap, io->should_dirty); 1599 + fuse_release_user_pages(&ia->ap, nres, io->should_dirty); 1663 1600 fuse_io_free(ia); 1664 1601 } 1665 1602 ia = NULL; ··· 1713 1650 { 1714 1651 ssize_t res; 1715 1652 1716 - if (!is_sync_kiocb(iocb) && iocb->ki_flags & IOCB_DIRECT) { 1653 + if (!is_sync_kiocb(iocb)) { 1717 1654 res = fuse_direct_IO(iocb, to); 1718 1655 } else { 1719 1656 struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb); ··· 1727 1664 static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) 1728 1665 { 1729 1666 struct inode *inode = file_inode(iocb->ki_filp); 1730 - struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb); 1731 1667 ssize_t res; 1732 1668 bool exclusive; 1733 1669 ··· 1734 1672 res = generic_write_checks(iocb, from); 1735 1673 if (res > 0) { 1736 1674 task_io_account_write(res); 1737 - if (!is_sync_kiocb(iocb) && iocb->ki_flags & IOCB_DIRECT) { 1675 + if (!is_sync_kiocb(iocb)) { 1738 1676 res = fuse_direct_IO(iocb, from); 1739 1677 } else { 1678 + struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb); 1679 + 1740 1680 res = fuse_direct_io(&io, from, &iocb->ki_pos, 1741 1681 FUSE_DIO_WRITE); 1742 1682 fuse_write_update_attr(inode, iocb->ki_pos, res); ··· 1824 1760 if (wpa->bucket) 1825 1761 fuse_sync_bucket_dec(wpa->bucket); 1826 1762 1827 - for (i = 0; i < ap->num_pages; i++) 1828 - __free_page(ap->pages[i]); 1763 + for (i = 0; i < ap->num_folios; i++) 1764 + folio_put(ap->folios[i]); 1829 1765 1830 1766 fuse_file_put(wpa->ia.ff, false); 1831 1767 1832 - kfree(ap->pages); 1768 + kfree(ap->folios); 1833 1769 kfree(wpa); 1834 1770 } 1835 1771 1836 - static void fuse_writepage_finish_stat(struct inode *inode, struct page *page) 1772 + static void fuse_writepage_finish_stat(struct inode *inode, struct folio *folio) 1837 1773 { 1838 1774 struct backing_dev_info *bdi = inode_to_bdi(inode); 1839 1775 1840 1776 dec_wb_stat(&bdi->wb, WB_WRITEBACK); 1841 - dec_node_page_state(page, NR_WRITEBACK_TEMP); 1777 + node_stat_sub_folio(folio, NR_WRITEBACK_TEMP); 1842 1778 wb_writeout_inc(&bdi->wb); 1843 1779 } 1844 1780 ··· 1849 1785 struct fuse_inode *fi = get_fuse_inode(inode); 1850 1786 int i; 1851 1787 1852 - for (i = 0; i < ap->num_pages; i++) 1853 - fuse_writepage_finish_stat(inode, ap->pages[i]); 1788 + for (i = 0; i < ap->num_folios; i++) 1789 + fuse_writepage_finish_stat(inode, ap->folios[i]); 1854 1790 1855 1791 wake_up(&fi->page_waitq); 1856 1792 } ··· 1865 1801 struct fuse_inode *fi = get_fuse_inode(wpa->inode); 1866 1802 struct fuse_write_in *inarg = &wpa->ia.write.in; 1867 1803 struct fuse_args *args = &wpa->ia.ap.args; 1868 - __u64 data_size = wpa->ia.ap.num_pages * PAGE_SIZE; 1804 + /* Currently, all folios in FUSE are one page */ 1805 + __u64 data_size = wpa->ia.ap.num_folios * PAGE_SIZE; 1869 1806 int err; 1870 1807 1871 1808 fi->writectr++; ··· 1906 1841 for (aux = wpa->next; aux; aux = next) { 1907 1842 next = aux->next; 1908 1843 aux->next = NULL; 1909 - fuse_writepage_finish_stat(aux->inode, aux->ia.ap.pages[0]); 1844 + fuse_writepage_finish_stat(aux->inode, 1845 + aux->ia.ap.folios[0]); 1910 1846 fuse_writepage_free(aux); 1911 1847 } 1912 1848 ··· 1942 1876 struct fuse_writepage_args *wpa) 1943 1877 { 1944 1878 pgoff_t idx_from = wpa->ia.write.in.offset >> PAGE_SHIFT; 1945 - pgoff_t idx_to = idx_from + wpa->ia.ap.num_pages - 1; 1879 + pgoff_t idx_to = idx_from + wpa->ia.ap.num_folios - 1; 1946 1880 struct rb_node **p = &root->rb_node; 1947 1881 struct rb_node *parent = NULL; 1948 1882 1949 - WARN_ON(!wpa->ia.ap.num_pages); 1883 + WARN_ON(!wpa->ia.ap.num_folios); 1950 1884 while (*p) { 1951 1885 struct fuse_writepage_args *curr; 1952 1886 pgoff_t curr_index; ··· 1957 1891 WARN_ON(curr->inode != wpa->inode); 1958 1892 curr_index = curr->ia.write.in.offset >> PAGE_SHIFT; 1959 1893 1960 - if (idx_from >= curr_index + curr->ia.ap.num_pages) 1894 + if (idx_from >= curr_index + curr->ia.ap.num_folios) 1961 1895 p = &(*p)->rb_right; 1962 1896 else if (idx_to < curr_index) 1963 1897 p = &(*p)->rb_left; ··· 2089 2023 wpa = kzalloc(sizeof(*wpa), GFP_NOFS); 2090 2024 if (wpa) { 2091 2025 ap = &wpa->ia.ap; 2092 - ap->num_pages = 0; 2093 - ap->pages = fuse_pages_alloc(1, GFP_NOFS, &ap->descs); 2094 - if (!ap->pages) { 2026 + ap->num_folios = 0; 2027 + ap->folios = fuse_folios_alloc(1, GFP_NOFS, &ap->descs); 2028 + if (!ap->folios) { 2095 2029 kfree(wpa); 2096 2030 wpa = NULL; 2097 2031 } ··· 2115 2049 } 2116 2050 2117 2051 static void fuse_writepage_args_page_fill(struct fuse_writepage_args *wpa, struct folio *folio, 2118 - struct folio *tmp_folio, uint32_t page_index) 2052 + struct folio *tmp_folio, uint32_t folio_index) 2119 2053 { 2120 2054 struct inode *inode = folio->mapping->host; 2121 2055 struct fuse_args_pages *ap = &wpa->ia.ap; 2122 2056 2123 2057 folio_copy(tmp_folio, folio); 2124 2058 2125 - ap->pages[page_index] = &tmp_folio->page; 2126 - ap->descs[page_index].offset = 0; 2127 - ap->descs[page_index].length = PAGE_SIZE; 2059 + ap->folios[folio_index] = tmp_folio; 2060 + ap->descs[folio_index].offset = 0; 2061 + ap->descs[folio_index].length = PAGE_SIZE; 2128 2062 2129 2063 inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK); 2130 - inc_node_page_state(&tmp_folio->page, NR_WRITEBACK_TEMP); 2064 + node_stat_add_folio(tmp_folio, NR_WRITEBACK_TEMP); 2131 2065 } 2132 2066 2133 2067 static struct fuse_writepage_args *fuse_writepage_args_setup(struct folio *folio, ··· 2181 2115 goto err_writepage_args; 2182 2116 2183 2117 ap = &wpa->ia.ap; 2184 - ap->num_pages = 1; 2118 + ap->num_folios = 1; 2185 2119 2186 2120 folio_start_writeback(folio); 2187 2121 fuse_writepage_args_page_fill(wpa, folio, tmp_folio, 0); ··· 2209 2143 struct fuse_writepage_args *wpa; 2210 2144 struct fuse_file *ff; 2211 2145 struct inode *inode; 2212 - struct page **orig_pages; 2213 - unsigned int max_pages; 2146 + struct folio **orig_folios; 2147 + unsigned int max_folios; 2214 2148 }; 2215 2149 2216 2150 static bool fuse_pages_realloc(struct fuse_fill_wb_data *data) 2217 2151 { 2218 2152 struct fuse_args_pages *ap = &data->wpa->ia.ap; 2219 2153 struct fuse_conn *fc = get_fuse_conn(data->inode); 2220 - struct page **pages; 2221 - struct fuse_page_desc *descs; 2222 - unsigned int npages = min_t(unsigned int, 2223 - max_t(unsigned int, data->max_pages * 2, 2224 - FUSE_DEFAULT_MAX_PAGES_PER_REQ), 2154 + struct folio **folios; 2155 + struct fuse_folio_desc *descs; 2156 + unsigned int nfolios = min_t(unsigned int, 2157 + max_t(unsigned int, data->max_folios * 2, 2158 + FUSE_DEFAULT_MAX_PAGES_PER_REQ), 2225 2159 fc->max_pages); 2226 - WARN_ON(npages <= data->max_pages); 2160 + WARN_ON(nfolios <= data->max_folios); 2227 2161 2228 - pages = fuse_pages_alloc(npages, GFP_NOFS, &descs); 2229 - if (!pages) 2162 + folios = fuse_folios_alloc(nfolios, GFP_NOFS, &descs); 2163 + if (!folios) 2230 2164 return false; 2231 2165 2232 - memcpy(pages, ap->pages, sizeof(struct page *) * ap->num_pages); 2233 - memcpy(descs, ap->descs, sizeof(struct fuse_page_desc) * ap->num_pages); 2234 - kfree(ap->pages); 2235 - ap->pages = pages; 2166 + memcpy(folios, ap->folios, sizeof(struct folio *) * ap->num_folios); 2167 + memcpy(descs, ap->descs, sizeof(struct fuse_folio_desc) * ap->num_folios); 2168 + kfree(ap->folios); 2169 + ap->folios = folios; 2236 2170 ap->descs = descs; 2237 - data->max_pages = npages; 2171 + data->max_folios = nfolios; 2238 2172 2239 2173 return true; 2240 2174 } ··· 2244 2178 struct fuse_writepage_args *wpa = data->wpa; 2245 2179 struct inode *inode = data->inode; 2246 2180 struct fuse_inode *fi = get_fuse_inode(inode); 2247 - int num_pages = wpa->ia.ap.num_pages; 2181 + int num_folios = wpa->ia.ap.num_folios; 2248 2182 int i; 2249 2183 2250 2184 spin_lock(&fi->lock); ··· 2252 2186 fuse_flush_writepages(inode); 2253 2187 spin_unlock(&fi->lock); 2254 2188 2255 - for (i = 0; i < num_pages; i++) 2256 - end_page_writeback(data->orig_pages[i]); 2189 + for (i = 0; i < num_folios; i++) 2190 + folio_end_writeback(data->orig_folios[i]); 2257 2191 } 2258 2192 2259 2193 /* ··· 2264 2198 * swapping the new temp page with the old one. 2265 2199 */ 2266 2200 static bool fuse_writepage_add(struct fuse_writepage_args *new_wpa, 2267 - struct page *page) 2201 + struct folio *folio) 2268 2202 { 2269 2203 struct fuse_inode *fi = get_fuse_inode(new_wpa->inode); 2270 2204 struct fuse_writepage_args *tmp; 2271 2205 struct fuse_writepage_args *old_wpa; 2272 2206 struct fuse_args_pages *new_ap = &new_wpa->ia.ap; 2273 2207 2274 - WARN_ON(new_ap->num_pages != 0); 2275 - new_ap->num_pages = 1; 2208 + WARN_ON(new_ap->num_folios != 0); 2209 + new_ap->num_folios = 1; 2276 2210 2277 2211 spin_lock(&fi->lock); 2278 2212 old_wpa = fuse_insert_writeback(&fi->writepages, new_wpa); ··· 2286 2220 2287 2221 WARN_ON(tmp->inode != new_wpa->inode); 2288 2222 curr_index = tmp->ia.write.in.offset >> PAGE_SHIFT; 2289 - if (curr_index == page->index) { 2290 - WARN_ON(tmp->ia.ap.num_pages != 1); 2291 - swap(tmp->ia.ap.pages[0], new_ap->pages[0]); 2223 + if (curr_index == folio->index) { 2224 + WARN_ON(tmp->ia.ap.num_folios != 1); 2225 + swap(tmp->ia.ap.folios[0], new_ap->folios[0]); 2292 2226 break; 2293 2227 } 2294 2228 } ··· 2301 2235 spin_unlock(&fi->lock); 2302 2236 2303 2237 if (tmp) { 2304 - fuse_writepage_finish_stat(new_wpa->inode, new_ap->pages[0]); 2238 + fuse_writepage_finish_stat(new_wpa->inode, 2239 + folio); 2305 2240 fuse_writepage_free(new_wpa); 2306 2241 } 2307 2242 2308 2243 return false; 2309 2244 } 2310 2245 2311 - static bool fuse_writepage_need_send(struct fuse_conn *fc, struct page *page, 2246 + static bool fuse_writepage_need_send(struct fuse_conn *fc, struct folio *folio, 2312 2247 struct fuse_args_pages *ap, 2313 2248 struct fuse_fill_wb_data *data) 2314 2249 { 2315 - WARN_ON(!ap->num_pages); 2250 + WARN_ON(!ap->num_folios); 2316 2251 2317 2252 /* 2318 2253 * Being under writeback is unlikely but possible. For example direct ··· 2321 2254 * the pages are faulted with get_user_pages(), and then after the read 2322 2255 * completed. 2323 2256 */ 2324 - if (fuse_page_is_writeback(data->inode, page->index)) 2257 + if (fuse_folio_is_writeback(data->inode, folio)) 2325 2258 return true; 2326 2259 2327 2260 /* Reached max pages */ 2328 - if (ap->num_pages == fc->max_pages) 2261 + if (ap->num_folios == fc->max_pages) 2329 2262 return true; 2330 2263 2331 2264 /* Reached max write bytes */ 2332 - if ((ap->num_pages + 1) * PAGE_SIZE > fc->max_write) 2265 + if ((ap->num_folios + 1) * PAGE_SIZE > fc->max_write) 2333 2266 return true; 2334 2267 2335 2268 /* Discontinuity */ 2336 - if (data->orig_pages[ap->num_pages - 1]->index + 1 != page->index) 2269 + if (data->orig_folios[ap->num_folios - 1]->index + 1 != folio_index(folio)) 2337 2270 return true; 2338 2271 2339 2272 /* Need to grow the pages array? If so, did the expansion fail? */ 2340 - if (ap->num_pages == data->max_pages && !fuse_pages_realloc(data)) 2273 + if (ap->num_folios == data->max_folios && !fuse_pages_realloc(data)) 2341 2274 return true; 2342 2275 2343 2276 return false; ··· 2362 2295 goto out_unlock; 2363 2296 } 2364 2297 2365 - if (wpa && fuse_writepage_need_send(fc, &folio->page, ap, data)) { 2298 + if (wpa && fuse_writepage_need_send(fc, folio, ap, data)) { 2366 2299 fuse_writepages_send(data); 2367 2300 data->wpa = NULL; 2368 2301 } ··· 2381 2314 * This is ensured by holding the page lock in page_mkwrite() while 2382 2315 * checking fuse_page_is_writeback(). We already hold the page lock 2383 2316 * since clear_page_dirty_for_io() and keep it held until we add the 2384 - * request to the fi->writepages list and increment ap->num_pages. 2317 + * request to the fi->writepages list and increment ap->num_folios. 2385 2318 * After this fuse_page_is_writeback() will indicate that the page is 2386 2319 * under writeback, so we can release the page lock. 2387 2320 */ ··· 2393 2326 goto out_unlock; 2394 2327 } 2395 2328 fuse_file_get(wpa->ia.ff); 2396 - data->max_pages = 1; 2329 + data->max_folios = 1; 2397 2330 ap = &wpa->ia.ap; 2398 2331 } 2399 2332 folio_start_writeback(folio); 2400 2333 2401 - fuse_writepage_args_page_fill(wpa, folio, tmp_folio, ap->num_pages); 2402 - data->orig_pages[ap->num_pages] = &folio->page; 2334 + fuse_writepage_args_page_fill(wpa, folio, tmp_folio, ap->num_folios); 2335 + data->orig_folios[ap->num_folios] = folio; 2403 2336 2404 2337 err = 0; 2405 2338 if (data->wpa) { ··· 2408 2341 * fuse_page_is_writeback(). 2409 2342 */ 2410 2343 spin_lock(&fi->lock); 2411 - ap->num_pages++; 2344 + ap->num_folios++; 2412 2345 spin_unlock(&fi->lock); 2413 - } else if (fuse_writepage_add(wpa, &folio->page)) { 2346 + } else if (fuse_writepage_add(wpa, folio)) { 2414 2347 data->wpa = wpa; 2415 2348 } else { 2416 2349 folio_end_writeback(folio); ··· 2442 2375 data.ff = NULL; 2443 2376 2444 2377 err = -ENOMEM; 2445 - data.orig_pages = kcalloc(fc->max_pages, 2446 - sizeof(struct page *), 2447 - GFP_NOFS); 2448 - if (!data.orig_pages) 2378 + data.orig_folios = kcalloc(fc->max_pages, 2379 + sizeof(struct folio *), 2380 + GFP_NOFS); 2381 + if (!data.orig_folios) 2449 2382 goto out; 2450 2383 2451 2384 err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data); 2452 2385 if (data.wpa) { 2453 - WARN_ON(!data.wpa->ia.ap.num_pages); 2386 + WARN_ON(!data.wpa->ia.ap.num_folios); 2454 2387 fuse_writepages_send(&data); 2455 2388 } 2456 2389 if (data.ff) 2457 2390 fuse_file_put(data.ff, false); 2458 2391 2459 - kfree(data.orig_pages); 2392 + kfree(data.orig_folios); 2460 2393 out: 2461 2394 return err; 2462 2395 } ··· 2496 2429 folio_zero_segment(folio, 0, off); 2497 2430 goto success; 2498 2431 } 2499 - err = fuse_do_readpage(file, &folio->page); 2432 + err = fuse_do_readfolio(file, folio); 2500 2433 if (err) 2501 2434 goto cleanup; 2502 2435 success: ··· 2585 2518 */ 2586 2519 static vm_fault_t fuse_page_mkwrite(struct vm_fault *vmf) 2587 2520 { 2588 - struct page *page = vmf->page; 2521 + struct folio *folio = page_folio(vmf->page); 2589 2522 struct inode *inode = file_inode(vmf->vma->vm_file); 2590 2523 2591 2524 file_update_time(vmf->vma->vm_file); 2592 - lock_page(page); 2593 - if (page->mapping != inode->i_mapping) { 2594 - unlock_page(page); 2525 + folio_lock(folio); 2526 + if (folio->mapping != inode->i_mapping) { 2527 + folio_unlock(folio); 2595 2528 return VM_FAULT_NOPAGE; 2596 2529 } 2597 2530 2598 - fuse_wait_on_page_writeback(inode, page->index); 2531 + fuse_wait_on_folio_writeback(inode, folio); 2599 2532 return VM_FAULT_LOCKED; 2600 2533 } 2601 2534
+46 -22
fs/fuse/fuse_i.h
··· 35 35 /** Default max number of pages that can be used in a single read request */ 36 36 #define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32 37 37 38 - /** Maximum of max_pages received in init_out */ 39 - #define FUSE_MAX_MAX_PAGES 256 40 - 41 38 /** Bias for fi->writectr, meaning new writepages must not be sent */ 42 39 #define FUSE_NOWRITE INT_MIN 43 40 ··· 43 46 44 47 /** Number of dentries for each connection in the control filesystem */ 45 48 #define FUSE_CTL_NUM_DENTRIES 5 49 + 50 + /** Maximum of max_pages received in init_out */ 51 + extern unsigned int fuse_max_pages_limit; 46 52 47 53 /** List of active connections */ 48 54 extern struct list_head fuse_conn_list; ··· 285 285 void *value; 286 286 }; 287 287 288 - /** FUSE page descriptor */ 289 - struct fuse_page_desc { 288 + /** FUSE folio descriptor */ 289 + struct fuse_folio_desc { 290 290 unsigned int length; 291 291 unsigned int offset; 292 292 }; ··· 309 309 bool may_block:1; 310 310 bool is_ext:1; 311 311 bool is_pinned:1; 312 + bool invalidate_vmap:1; 312 313 struct fuse_in_arg in_args[3]; 313 314 struct fuse_arg out_args[2]; 314 315 void (*end)(struct fuse_mount *fm, struct fuse_args *args, int error); 316 + /* Used for kvec iter backed by vmalloc address */ 317 + void *vmap_base; 315 318 }; 316 319 317 320 struct fuse_args_pages { 318 321 struct fuse_args args; 319 - struct page **pages; 320 - struct fuse_page_desc *descs; 321 - unsigned int num_pages; 322 + struct folio **folios; 323 + struct fuse_folio_desc *descs; 324 + unsigned int num_folios; 322 325 }; 323 326 324 327 struct fuse_release_args { ··· 860 857 /** Passthrough support for read/write IO */ 861 858 unsigned int passthrough:1; 862 859 860 + /* Use pages instead of pointer for kernel I/O */ 861 + unsigned int use_pages_for_kvec_io:1; 862 + 863 863 /** Maximum stack depth for passthrough backing files */ 864 864 int max_stack_depth; 865 865 ··· 889 883 890 884 /** Version counter for attribute changes */ 891 885 atomic64_t attr_version; 886 + 887 + /** Version counter for evict inode */ 888 + atomic64_t evict_ctr; 892 889 893 890 /** Called on final put */ 894 891 void (*release)(struct fuse_conn *); ··· 987 978 return atomic64_read(&fc->attr_version); 988 979 } 989 980 981 + static inline u64 fuse_get_evict_ctr(struct fuse_conn *fc) 982 + { 983 + return atomic64_read(&fc->evict_ctr); 984 + } 985 + 990 986 static inline bool fuse_stale_inode(const struct inode *inode, int generation, 991 987 struct fuse_attr *attr) 992 988 { ··· 1009 995 return unlikely(test_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state)); 1010 996 } 1011 997 1012 - static inline struct page **fuse_pages_alloc(unsigned int npages, gfp_t flags, 1013 - struct fuse_page_desc **desc) 998 + static inline struct folio **fuse_folios_alloc(unsigned int nfolios, gfp_t flags, 999 + struct fuse_folio_desc **desc) 1014 1000 { 1015 - struct page **pages; 1001 + struct folio **folios; 1016 1002 1017 - pages = kzalloc(npages * (sizeof(struct page *) + 1018 - sizeof(struct fuse_page_desc)), flags); 1019 - *desc = (void *) (pages + npages); 1003 + folios = kzalloc(nfolios * (sizeof(struct folio *) + 1004 + sizeof(struct fuse_folio_desc)), flags); 1005 + *desc = (void *) (folios + nfolios); 1020 1006 1021 - return pages; 1007 + return folios; 1022 1008 } 1023 1009 1024 - static inline void fuse_page_descs_length_init(struct fuse_page_desc *descs, 1025 - unsigned int index, 1026 - unsigned int nr_pages) 1010 + static inline void fuse_folio_descs_length_init(struct fuse_folio_desc *descs, 1011 + unsigned int index, 1012 + unsigned int nr_folios) 1027 1013 { 1028 1014 int i; 1029 1015 1030 - for (i = index; i < index + nr_pages; i++) 1016 + for (i = index; i < index + nr_folios; i++) 1031 1017 descs[i].length = PAGE_SIZE - descs[i].offset; 1032 1018 } 1033 1019 ··· 1051 1037 */ 1052 1038 struct inode *fuse_iget(struct super_block *sb, u64 nodeid, 1053 1039 int generation, struct fuse_attr *attr, 1054 - u64 attr_valid, u64 attr_version); 1040 + u64 attr_valid, u64 attr_version, 1041 + u64 evict_ctr); 1055 1042 1056 1043 int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name, 1057 1044 struct fuse_entry_out *outarg, struct inode **inode); ··· 1077 1062 struct { 1078 1063 struct fuse_write_in in; 1079 1064 struct fuse_write_out out; 1080 - bool page_locked; 1065 + bool folio_locked; 1081 1066 } write; 1082 1067 }; 1083 1068 struct fuse_args_pages ap; ··· 1142 1127 1143 1128 void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, 1144 1129 struct fuse_statx *sx, 1145 - u64 attr_valid, u32 cache_mask); 1130 + u64 attr_valid, u32 cache_mask, 1131 + u64 evict_ctr); 1146 1132 1147 1133 u32 fuse_get_cache_mask(struct inode *inode); 1148 1134 ··· 1495 1479 struct file *out, loff_t *ppos, 1496 1480 size_t len, unsigned int flags); 1497 1481 ssize_t fuse_passthrough_mmap(struct file *file, struct vm_area_struct *vma); 1482 + 1483 + #ifdef CONFIG_SYSCTL 1484 + extern int fuse_sysctl_register(void); 1485 + extern void fuse_sysctl_unregister(void); 1486 + #else 1487 + #define fuse_sysctl_register() (0) 1488 + #define fuse_sysctl_unregister() do { } while (0) 1489 + #endif /* CONFIG_SYSCTL */ 1498 1490 1499 1491 #endif /* _FS_FUSE_I_H */
+54 -13
fs/fuse/inode.c
··· 35 35 36 36 static int set_global_limit(const char *val, const struct kernel_param *kp); 37 37 38 + unsigned int fuse_max_pages_limit = 256; 39 + 38 40 unsigned max_user_bgreq; 39 41 module_param_call(max_user_bgreq, set_global_limit, param_get_uint, 40 42 &max_user_bgreq, 0644); ··· 175 173 fuse_cleanup_submount_lookup(fc, fi->submount_lookup); 176 174 fi->submount_lookup = NULL; 177 175 } 176 + /* 177 + * Evict of non-deleted inode may race with outstanding 178 + * LOOKUP/READDIRPLUS requests and result in inconsistency when 179 + * the request finishes. Deal with that here by bumping a 180 + * counter that can be compared to the starting value. 181 + */ 182 + if (inode->i_nlink > 0) 183 + atomic64_inc(&fc->evict_ctr); 178 184 } 179 185 if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) { 180 186 WARN_ON(fi->iocachectr != 0); ··· 216 206 217 207 void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, 218 208 struct fuse_statx *sx, 219 - u64 attr_valid, u32 cache_mask) 209 + u64 attr_valid, u32 cache_mask, 210 + u64 evict_ctr) 220 211 { 221 212 struct fuse_conn *fc = get_fuse_conn(inode); 222 213 struct fuse_inode *fi = get_fuse_inode(inode); 223 214 224 215 lockdep_assert_held(&fi->lock); 225 216 217 + /* 218 + * Clear basic stats from invalid mask. 219 + * 220 + * Don't do this if this is coming from a fuse_iget() call and there 221 + * might have been a racing evict which would've invalidated the result 222 + * if the attr_version would've been preserved. 223 + * 224 + * !evict_ctr -> this is create 225 + * fi->attr_version != 0 -> this is not a new inode 226 + * evict_ctr == fuse_get_evict_ctr() -> no evicts while during request 227 + */ 228 + if (!evict_ctr || fi->attr_version || evict_ctr == fuse_get_evict_ctr(fc)) 229 + set_mask_bits(&fi->inval_mask, STATX_BASIC_STATS, 0); 230 + 226 231 fi->attr_version = atomic64_inc_return(&fc->attr_version); 227 232 fi->i_time = attr_valid; 228 - /* Clear basic stats from invalid mask */ 229 - set_mask_bits(&fi->inval_mask, STATX_BASIC_STATS, 0); 230 233 231 234 inode->i_ino = fuse_squash_ino(attr->ino); 232 235 inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); ··· 318 295 return STATX_MTIME | STATX_CTIME | STATX_SIZE; 319 296 } 320 297 321 - void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, 322 - struct fuse_statx *sx, 323 - u64 attr_valid, u64 attr_version) 298 + static void fuse_change_attributes_i(struct inode *inode, struct fuse_attr *attr, 299 + struct fuse_statx *sx, u64 attr_valid, 300 + u64 attr_version, u64 evict_ctr) 324 301 { 325 302 struct fuse_conn *fc = get_fuse_conn(inode); 326 303 struct fuse_inode *fi = get_fuse_inode(inode); ··· 354 331 } 355 332 356 333 old_mtime = inode_get_mtime(inode); 357 - fuse_change_attributes_common(inode, attr, sx, attr_valid, cache_mask); 334 + fuse_change_attributes_common(inode, attr, sx, attr_valid, cache_mask, 335 + evict_ctr); 358 336 359 337 oldsize = inode->i_size; 360 338 /* ··· 394 370 395 371 if (IS_ENABLED(CONFIG_FUSE_DAX)) 396 372 fuse_dax_dontcache(inode, attr->flags); 373 + } 374 + 375 + void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, 376 + struct fuse_statx *sx, u64 attr_valid, 377 + u64 attr_version) 378 + { 379 + fuse_change_attributes_i(inode, attr, sx, attr_valid, attr_version, 0); 397 380 } 398 381 399 382 static void fuse_init_submount_lookup(struct fuse_submount_lookup *sl, ··· 457 426 458 427 struct inode *fuse_iget(struct super_block *sb, u64 nodeid, 459 428 int generation, struct fuse_attr *attr, 460 - u64 attr_valid, u64 attr_version) 429 + u64 attr_valid, u64 attr_version, 430 + u64 evict_ctr) 461 431 { 462 432 struct inode *inode; 463 433 struct fuse_inode *fi; ··· 519 487 fi->nlookup++; 520 488 spin_unlock(&fi->lock); 521 489 done: 522 - fuse_change_attributes(inode, attr, NULL, attr_valid, attr_version); 523 - 490 + fuse_change_attributes_i(inode, attr, NULL, attr_valid, attr_version, 491 + evict_ctr); 524 492 return inode; 525 493 } 526 494 ··· 972 940 fc->initialized = 0; 973 941 fc->connected = 1; 974 942 atomic64_set(&fc->attr_version, 1); 943 + atomic64_set(&fc->evict_ctr, 1); 975 944 get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); 976 945 fc->pid_ns = get_pid_ns(task_active_pid_ns(current)); 977 946 fc->user_ns = get_user_ns(user_ns); 978 947 fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ; 979 - fc->max_pages_limit = FUSE_MAX_MAX_PAGES; 948 + fc->max_pages_limit = fuse_max_pages_limit; 980 949 981 950 if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH)) 982 951 fuse_backing_files_init(fc); ··· 1034 1001 attr.mode = mode; 1035 1002 attr.ino = FUSE_ROOT_ID; 1036 1003 attr.nlink = 1; 1037 - return fuse_iget(sb, FUSE_ROOT_ID, 0, &attr, 0, 0); 1004 + return fuse_iget(sb, FUSE_ROOT_ID, 0, &attr, 0, 0, 0); 1038 1005 } 1039 1006 1040 1007 struct fuse_inode_handle { ··· 1643 1610 return -ENOMEM; 1644 1611 1645 1612 fuse_fill_attr_from_inode(&root_attr, parent_fi); 1646 - root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0); 1613 + root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0, 1614 + fuse_get_evict_ctr(fm->fc)); 1647 1615 /* 1648 1616 * This inode is just a duplicate, so it is not looked up and 1649 1617 * its nlookup should not be incremented. fuse_iget() does ··· 2097 2063 if (err) 2098 2064 goto out3; 2099 2065 2066 + err = fuse_sysctl_register(); 2067 + if (err) 2068 + goto out4; 2069 + 2100 2070 return 0; 2101 2071 2072 + out4: 2073 + unregister_filesystem(&fuse_fs_type); 2102 2074 out3: 2103 2075 unregister_fuseblk(); 2104 2076 out2: ··· 2115 2075 2116 2076 static void fuse_fs_cleanup(void) 2117 2077 { 2078 + fuse_sysctl_unregister(); 2118 2079 unregister_filesystem(&fuse_fs_type); 2119 2080 unregister_fuseblk(); 2120 2081
+18 -17
fs/fuse/ioctl.c
··· 10 10 #include <linux/fileattr.h> 11 11 #include <linux/fsverity.h> 12 12 13 + #define FUSE_VERITY_ENABLE_ARG_MAX_PAGES 256 14 + 13 15 static ssize_t fuse_send_ioctl(struct fuse_mount *fm, struct fuse_args *args, 14 16 struct fuse_ioctl_out *outarg) 15 17 { ··· 142 140 { 143 141 struct fsverity_enable_arg enable; 144 142 struct fsverity_enable_arg __user *uarg = (void __user *)arg; 145 - const __u32 max_buffer_len = FUSE_MAX_MAX_PAGES * PAGE_SIZE; 143 + const __u32 max_buffer_len = FUSE_VERITY_ENABLE_ARG_MAX_PAGES * PAGE_SIZE; 146 144 147 145 if (copy_from_user(&enable, uarg, sizeof(enable))) 148 146 return -EFAULT; ··· 251 249 BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE); 252 250 253 251 err = -ENOMEM; 254 - ap.pages = fuse_pages_alloc(fm->fc->max_pages, GFP_KERNEL, &ap.descs); 252 + ap.folios = fuse_folios_alloc(fm->fc->max_pages, GFP_KERNEL, &ap.descs); 255 253 iov_page = (struct iovec *) __get_free_page(GFP_KERNEL); 256 - if (!ap.pages || !iov_page) 254 + if (!ap.folios || !iov_page) 257 255 goto out; 258 256 259 - fuse_page_descs_length_init(ap.descs, 0, fm->fc->max_pages); 257 + fuse_folio_descs_length_init(ap.descs, 0, fm->fc->max_pages); 260 258 261 259 /* 262 260 * If restricted, initialize IO parameters as encoded in @cmd. ··· 306 304 err = -ENOMEM; 307 305 if (max_pages > fm->fc->max_pages) 308 306 goto out; 309 - while (ap.num_pages < max_pages) { 310 - ap.pages[ap.num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); 311 - if (!ap.pages[ap.num_pages]) 307 + while (ap.num_folios < max_pages) { 308 + ap.folios[ap.num_folios] = folio_alloc(GFP_KERNEL | __GFP_HIGHMEM, 0); 309 + if (!ap.folios[ap.num_folios]) 312 310 goto out; 313 - ap.num_pages++; 311 + ap.num_folios++; 314 312 } 315 - 316 313 317 314 /* okay, let's send it to the client */ 318 315 ap.args.opcode = FUSE_IOCTL; ··· 326 325 327 326 err = -EFAULT; 328 327 iov_iter_init(&ii, ITER_SOURCE, in_iov, in_iovs, in_size); 329 - for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) { 330 - c = copy_page_from_iter(ap.pages[i], 0, PAGE_SIZE, &ii); 328 + for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_folios); i++) { 329 + c = copy_folio_from_iter(ap.folios[i], 0, PAGE_SIZE, &ii); 331 330 if (c != PAGE_SIZE && iov_iter_count(&ii)) 332 331 goto out; 333 332 } ··· 365 364 in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV) 366 365 goto out; 367 366 368 - vaddr = kmap_local_page(ap.pages[0]); 367 + vaddr = kmap_local_folio(ap.folios[0], 0); 369 368 err = fuse_copy_ioctl_iovec(fm->fc, iov_page, vaddr, 370 369 transferred, in_iovs + out_iovs, 371 370 (flags & FUSE_IOCTL_COMPAT) != 0); ··· 393 392 394 393 err = -EFAULT; 395 394 iov_iter_init(&ii, ITER_DEST, out_iov, out_iovs, transferred); 396 - for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) { 397 - c = copy_page_to_iter(ap.pages[i], 0, PAGE_SIZE, &ii); 395 + for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_folios); i++) { 396 + c = copy_folio_to_iter(ap.folios[i], 0, PAGE_SIZE, &ii); 398 397 if (c != PAGE_SIZE && iov_iter_count(&ii)) 399 398 goto out; 400 399 } 401 400 err = 0; 402 401 out: 403 402 free_page((unsigned long) iov_page); 404 - while (ap.num_pages) 405 - __free_page(ap.pages[--ap.num_pages]); 406 - kfree(ap.pages); 403 + while (ap.num_folios) 404 + folio_put(ap.folios[--ap.num_folios]); 405 + kfree(ap.folios); 407 406 408 407 return err ? err : outarg.result; 409 408 }
+18 -15
fs/fuse/readdir.c
··· 149 149 150 150 static int fuse_direntplus_link(struct file *file, 151 151 struct fuse_direntplus *direntplus, 152 - u64 attr_version) 152 + u64 attr_version, u64 evict_ctr) 153 153 { 154 154 struct fuse_entry_out *o = &direntplus->entry_out; 155 155 struct fuse_dirent *dirent = &direntplus->dirent; ··· 233 233 } else { 234 234 inode = fuse_iget(dir->i_sb, o->nodeid, o->generation, 235 235 &o->attr, ATTR_TIMEOUT(o), 236 - attr_version); 236 + attr_version, evict_ctr); 237 237 if (!inode) 238 238 inode = ERR_PTR(-ENOMEM); 239 239 ··· 284 284 } 285 285 286 286 static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, 287 - struct dir_context *ctx, u64 attr_version) 287 + struct dir_context *ctx, u64 attr_version, 288 + u64 evict_ctr) 288 289 { 289 290 struct fuse_direntplus *direntplus; 290 291 struct fuse_dirent *dirent; ··· 320 319 buf += reclen; 321 320 nbytes -= reclen; 322 321 323 - ret = fuse_direntplus_link(file, direntplus, attr_version); 322 + ret = fuse_direntplus_link(file, direntplus, attr_version, evict_ctr); 324 323 if (ret) 325 324 fuse_force_forget(file, direntplus->entry_out.nodeid); 326 325 } ··· 332 331 { 333 332 int plus; 334 333 ssize_t res; 335 - struct page *page; 334 + struct folio *folio; 336 335 struct inode *inode = file_inode(file); 337 336 struct fuse_mount *fm = get_fuse_mount(inode); 338 337 struct fuse_io_args ia = {}; 339 338 struct fuse_args_pages *ap = &ia.ap; 340 - struct fuse_page_desc desc = { .length = PAGE_SIZE }; 341 - u64 attr_version = 0; 339 + struct fuse_folio_desc desc = { .length = PAGE_SIZE }; 340 + u64 attr_version = 0, evict_ctr = 0; 342 341 bool locked; 343 342 344 - page = alloc_page(GFP_KERNEL); 345 - if (!page) 343 + folio = folio_alloc(GFP_KERNEL, 0); 344 + if (!folio) 346 345 return -ENOMEM; 347 346 348 347 plus = fuse_use_readdirplus(inode, ctx); 349 348 ap->args.out_pages = true; 350 - ap->num_pages = 1; 351 - ap->pages = &page; 349 + ap->num_folios = 1; 350 + ap->folios = &folio; 352 351 ap->descs = &desc; 353 352 if (plus) { 354 353 attr_version = fuse_get_attr_version(fm->fc); 354 + evict_ctr = fuse_get_evict_ctr(fm->fc); 355 355 fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE, 356 356 FUSE_READDIRPLUS); 357 357 } else { ··· 369 367 if (ff->open_flags & FOPEN_CACHE_DIR) 370 368 fuse_readdir_cache_end(file, ctx->pos); 371 369 } else if (plus) { 372 - res = parse_dirplusfile(page_address(page), res, 373 - file, ctx, attr_version); 370 + res = parse_dirplusfile(folio_address(folio), res, 371 + file, ctx, attr_version, 372 + evict_ctr); 374 373 } else { 375 - res = parse_dirfile(page_address(page), res, file, 374 + res = parse_dirfile(folio_address(folio), res, file, 376 375 ctx); 377 376 } 378 377 } 379 378 380 - __free_page(page); 379 + folio_put(folio); 381 380 fuse_invalidate_atime(inode); 382 381 return res; 383 382 }
+40
fs/fuse/sysctl.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * linux/fs/fuse/fuse_sysctl.c 4 + * 5 + * Sysctl interface to fuse parameters 6 + */ 7 + #include <linux/sysctl.h> 8 + 9 + #include "fuse_i.h" 10 + 11 + static struct ctl_table_header *fuse_table_header; 12 + 13 + /* Bound by fuse_init_out max_pages, which is a u16 */ 14 + static unsigned int sysctl_fuse_max_pages_limit = 65535; 15 + 16 + static struct ctl_table fuse_sysctl_table[] = { 17 + { 18 + .procname = "max_pages_limit", 19 + .data = &fuse_max_pages_limit, 20 + .maxlen = sizeof(fuse_max_pages_limit), 21 + .mode = 0644, 22 + .proc_handler = proc_douintvec_minmax, 23 + .extra1 = SYSCTL_ONE, 24 + .extra2 = &sysctl_fuse_max_pages_limit, 25 + }, 26 + }; 27 + 28 + int fuse_sysctl_register(void) 29 + { 30 + fuse_table_header = register_sysctl("fs/fuse", fuse_sysctl_table); 31 + if (!fuse_table_header) 32 + return -ENOMEM; 33 + return 0; 34 + } 35 + 36 + void fuse_sysctl_unregister(void) 37 + { 38 + unregister_sysctl_table(fuse_table_header); 39 + fuse_table_header = NULL; 40 + }
+42 -35
fs/fuse/virtio_fs.c
··· 97 97 }; 98 98 99 99 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, 100 - struct fuse_req *req, bool in_flight); 100 + struct fuse_req *req, bool in_flight, 101 + gfp_t gfp); 101 102 102 103 static const struct constant_table dax_param_enums[] = { 103 104 {"always", FUSE_DAX_ALWAYS }, ··· 576 575 577 576 /* Dispatch pending requests */ 578 577 while (1) { 578 + unsigned int flags; 579 + 579 580 spin_lock(&fsvq->lock); 580 581 req = list_first_entry_or_null(&fsvq->queued_reqs, 581 582 struct fuse_req, list); ··· 588 585 list_del_init(&req->list); 589 586 spin_unlock(&fsvq->lock); 590 587 591 - ret = virtio_fs_enqueue_req(fsvq, req, true); 588 + flags = memalloc_nofs_save(); 589 + ret = virtio_fs_enqueue_req(fsvq, req, true, GFP_KERNEL); 590 + memalloc_nofs_restore(flags); 592 591 if (ret < 0) { 593 592 if (ret == -ENOSPC) { 594 593 spin_lock(&fsvq->lock); ··· 691 686 } 692 687 693 688 /* Allocate and copy args into req->argbuf */ 694 - static int copy_args_to_argbuf(struct fuse_req *req) 689 + static int copy_args_to_argbuf(struct fuse_req *req, gfp_t gfp) 695 690 { 696 691 struct fuse_args *args = req->args; 697 692 unsigned int offset = 0; ··· 705 700 len = fuse_len_args(num_in, (struct fuse_arg *) args->in_args) + 706 701 fuse_len_args(num_out, args->out_args); 707 702 708 - req->argbuf = kmalloc(len, GFP_ATOMIC); 703 + req->argbuf = kmalloc(len, gfp); 709 704 if (!req->argbuf) 710 705 return -ENOMEM; 711 706 ··· 765 760 struct fuse_args *args; 766 761 struct fuse_args_pages *ap; 767 762 unsigned int len, i, thislen; 768 - struct page *page; 763 + struct folio *folio; 769 764 770 765 /* 771 766 * TODO verify that server properly follows FUSE protocol ··· 777 772 if (args->out_pages && args->page_zeroing) { 778 773 len = args->out_args[args->out_numargs - 1].size; 779 774 ap = container_of(args, typeof(*ap), args); 780 - for (i = 0; i < ap->num_pages; i++) { 775 + for (i = 0; i < ap->num_folios; i++) { 781 776 thislen = ap->descs[i].length; 782 777 if (len < thislen) { 783 778 WARN_ON(ap->descs[i].offset); 784 - page = ap->pages[i]; 785 - zero_user_segment(page, len, thislen); 779 + folio = ap->folios[i]; 780 + folio_zero_segment(folio, len, thislen); 786 781 len = 0; 787 782 } else { 788 783 len -= thislen; ··· 1272 1267 } 1273 1268 1274 1269 /* Count number of scatter-gather elements required */ 1275 - static unsigned int sg_count_fuse_pages(struct fuse_page_desc *page_descs, 1276 - unsigned int num_pages, 1277 - unsigned int total_len) 1270 + static unsigned int sg_count_fuse_folios(struct fuse_folio_desc *folio_descs, 1271 + unsigned int num_folios, 1272 + unsigned int total_len) 1278 1273 { 1279 1274 unsigned int i; 1280 1275 unsigned int this_len; 1281 1276 1282 - for (i = 0; i < num_pages && total_len; i++) { 1283 - this_len = min(page_descs[i].length, total_len); 1277 + for (i = 0; i < num_folios && total_len; i++) { 1278 + this_len = min(folio_descs[i].length, total_len); 1284 1279 total_len -= this_len; 1285 1280 } 1286 1281 ··· 1299 1294 1300 1295 if (args->in_pages) { 1301 1296 size = args->in_args[args->in_numargs - 1].size; 1302 - total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages, 1303 - size); 1297 + total_sgs += sg_count_fuse_folios(ap->descs, ap->num_folios, 1298 + size); 1304 1299 } 1305 1300 1306 1301 if (!test_bit(FR_ISREPLY, &req->flags)) ··· 1313 1308 1314 1309 if (args->out_pages) { 1315 1310 size = args->out_args[args->out_numargs - 1].size; 1316 - total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages, 1317 - size); 1311 + total_sgs += sg_count_fuse_folios(ap->descs, ap->num_folios, 1312 + size); 1318 1313 } 1319 1314 1320 1315 return total_sgs; 1321 1316 } 1322 1317 1323 - /* Add pages to scatter-gather list and return number of elements used */ 1324 - static unsigned int sg_init_fuse_pages(struct scatterlist *sg, 1325 - struct page **pages, 1326 - struct fuse_page_desc *page_descs, 1327 - unsigned int num_pages, 1328 - unsigned int total_len) 1318 + /* Add folios to scatter-gather list and return number of elements used */ 1319 + static unsigned int sg_init_fuse_folios(struct scatterlist *sg, 1320 + struct folio **folios, 1321 + struct fuse_folio_desc *folio_descs, 1322 + unsigned int num_folios, 1323 + unsigned int total_len) 1329 1324 { 1330 1325 unsigned int i; 1331 1326 unsigned int this_len; 1332 1327 1333 - for (i = 0; i < num_pages && total_len; i++) { 1328 + for (i = 0; i < num_folios && total_len; i++) { 1334 1329 sg_init_table(&sg[i], 1); 1335 - this_len = min(page_descs[i].length, total_len); 1336 - sg_set_page(&sg[i], pages[i], this_len, page_descs[i].offset); 1330 + this_len = min(folio_descs[i].length, total_len); 1331 + sg_set_folio(&sg[i], folios[i], this_len, folio_descs[i].offset); 1337 1332 total_len -= this_len; 1338 1333 } 1339 1334 ··· 1358 1353 sg_init_one(&sg[total_sgs++], argbuf, len); 1359 1354 1360 1355 if (argpages) 1361 - total_sgs += sg_init_fuse_pages(&sg[total_sgs], 1362 - ap->pages, ap->descs, 1363 - ap->num_pages, 1364 - args[numargs - 1].size); 1356 + total_sgs += sg_init_fuse_folios(&sg[total_sgs], 1357 + ap->folios, ap->descs, 1358 + ap->num_folios, 1359 + args[numargs - 1].size); 1365 1360 1366 1361 if (len_used) 1367 1362 *len_used = len; ··· 1371 1366 1372 1367 /* Add a request to a virtqueue and kick the device */ 1373 1368 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, 1374 - struct fuse_req *req, bool in_flight) 1369 + struct fuse_req *req, bool in_flight, 1370 + gfp_t gfp) 1375 1371 { 1376 1372 /* requests need at least 4 elements */ 1377 1373 struct scatterlist *stack_sgs[6]; ··· 1393 1387 /* Does the sglist fit on the stack? */ 1394 1388 total_sgs = sg_count_fuse_req(req); 1395 1389 if (total_sgs > ARRAY_SIZE(stack_sgs)) { 1396 - sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), GFP_ATOMIC); 1397 - sg = kmalloc_array(total_sgs, sizeof(sg[0]), GFP_ATOMIC); 1390 + sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), gfp); 1391 + sg = kmalloc_array(total_sgs, sizeof(sg[0]), gfp); 1398 1392 if (!sgs || !sg) { 1399 1393 ret = -ENOMEM; 1400 1394 goto out; ··· 1402 1396 } 1403 1397 1404 1398 /* Use a bounce buffer since stack args cannot be mapped */ 1405 - ret = copy_args_to_argbuf(req); 1399 + ret = copy_args_to_argbuf(req, gfp); 1406 1400 if (ret < 0) 1407 1401 goto out; 1408 1402 ··· 1496 1490 queue_id); 1497 1491 1498 1492 fsvq = &fs->vqs[queue_id]; 1499 - ret = virtio_fs_enqueue_req(fsvq, req, false); 1493 + ret = virtio_fs_enqueue_req(fsvq, req, false, GFP_ATOMIC); 1500 1494 if (ret < 0) { 1501 1495 if (ret == -ENOSPC) { 1502 1496 /* ··· 1697 1691 fc->delete_stale = true; 1698 1692 fc->auto_submounts = true; 1699 1693 fc->sync_fs = true; 1694 + fc->use_pages_for_kvec_io = true; 1700 1695 1701 1696 /* Tell FUSE to split requests that exceed the virtqueue's size */ 1702 1697 fc->max_pages_limit = min_t(unsigned int, fc->max_pages_limit,
+1
include/linux/mm.h
··· 2550 2550 struct page *get_dump_page(unsigned long addr); 2551 2551 2552 2552 bool folio_mark_dirty(struct folio *folio); 2553 + bool folio_mark_dirty_lock(struct folio *folio); 2553 2554 bool set_page_dirty(struct page *page); 2554 2555 int set_page_dirty_lock(struct page *page); 2555 2556
+6
mm/folio-compat.c
··· 52 52 } 53 53 EXPORT_SYMBOL(set_page_dirty); 54 54 55 + int set_page_dirty_lock(struct page *page) 56 + { 57 + return folio_mark_dirty_lock(page_folio(page)); 58 + } 59 + EXPORT_SYMBOL(set_page_dirty_lock); 60 + 55 61 bool clear_page_dirty_for_io(struct page *page) 56 62 { 57 63 return folio_clear_dirty_for_io(page_folio(page));
+11 -11
mm/page-writeback.c
··· 2925 2925 EXPORT_SYMBOL(folio_mark_dirty); 2926 2926 2927 2927 /* 2928 - * set_page_dirty() is racy if the caller has no reference against 2929 - * page->mapping->host, and if the page is unlocked. This is because another 2930 - * CPU could truncate the page off the mapping and then free the mapping. 2928 + * folio_mark_dirty() is racy if the caller has no reference against 2929 + * folio->mapping->host, and if the folio is unlocked. This is because another 2930 + * CPU could truncate the folio off the mapping and then free the mapping. 2931 2931 * 2932 - * Usually, the page _is_ locked, or the caller is a user-space process which 2932 + * Usually, the folio _is_ locked, or the caller is a user-space process which 2933 2933 * holds a reference on the inode by having an open file. 2934 2934 * 2935 - * In other cases, the page should be locked before running set_page_dirty(). 2935 + * In other cases, the folio should be locked before running folio_mark_dirty(). 2936 2936 */ 2937 - int set_page_dirty_lock(struct page *page) 2937 + bool folio_mark_dirty_lock(struct folio *folio) 2938 2938 { 2939 - int ret; 2939 + bool ret; 2940 2940 2941 - lock_page(page); 2942 - ret = set_page_dirty(page); 2943 - unlock_page(page); 2941 + folio_lock(folio); 2942 + ret = folio_mark_dirty(folio); 2943 + folio_unlock(folio); 2944 2944 return ret; 2945 2945 } 2946 - EXPORT_SYMBOL(set_page_dirty_lock); 2946 + EXPORT_SYMBOL(folio_mark_dirty_lock); 2947 2947 2948 2948 /* 2949 2949 * This cancels just the dirty bit on the kernel page itself, it does NOT