Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'vfs-6.10-rc2.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs fixes from Christian Brauner:

- Fix io_uring based write-through after converting cifs to use the
netfs library

- Fix aio error handling when doing write-through via netfs library

- Fix performance regression in iomap when used with non-large folio
mappings

- Fix signalfd error code

- Remove obsolete comment in signalfd code

- Fix async request indication in netfs_perform_write() by raising
BDP_ASYNC when IOCB_NOWAIT is set

- Yield swap device immediately to prevent spurious EBUSY errors

- Don't cross a .backup mountpoint from backup volumes in afs to avoid
infinite loops

- Fix a race between umount and async request completion in 9p after 9p
was converted to use the netfs library

* tag 'vfs-6.10-rc2.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
netfs, 9p: Fix race between umount and async request completion
afs: Don't cross .backup mountpoint from backup volume
swap: yield device immediately
netfs: Fix setting of BDP_ASYNC from iocb flags
signalfd: drop an obsolete comment
signalfd: fix error return code
iomap: fault in smaller chunks for non-large folio mappings
filemap: add helper mapping_max_folio_size()
netfs: Fix AIO error handling when doing write-through
netfs: Fix io_uring based write-through

+68 -27
+1
fs/9p/vfs_inode.c
··· 348 348 __le32 __maybe_unused version; 349 349 350 350 if (!is_bad_inode(inode)) { 351 + netfs_wait_for_outstanding_io(inode); 351 352 truncate_inode_pages_final(&inode->i_data); 352 353 353 354 version = cpu_to_le32(v9inode->qid.version);
+1
fs/afs/inode.c
··· 648 648 649 649 ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode); 650 650 651 + netfs_wait_for_outstanding_io(inode); 651 652 truncate_inode_pages_final(&inode->i_data); 652 653 653 654 afs_set_cache_aux(vnode, &aux);
+5
fs/afs/mntpt.c
··· 140 140 put_page(page); 141 141 if (ret < 0) 142 142 return ret; 143 + 144 + /* Don't cross a backup volume mountpoint from a backup volume */ 145 + if (src_as->volume && src_as->volume->type == AFSVL_BACKVOL && 146 + ctx->type == AFSVL_BACKVOL) 147 + return -ENODEV; 143 148 } 144 149 145 150 return 0;
+1 -1
fs/iomap/buffered-io.c
··· 898 898 static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) 899 899 { 900 900 loff_t length = iomap_length(iter); 901 - size_t chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER; 902 901 loff_t pos = iter->pos; 903 902 ssize_t total_written = 0; 904 903 long status = 0; 905 904 struct address_space *mapping = iter->inode->i_mapping; 905 + size_t chunk = mapping_max_folio_size(mapping); 906 906 unsigned int bdp_flags = (iter->flags & IOMAP_NOWAIT) ? BDP_ASYNC : 0; 907 907 908 908 do {
+1 -1
fs/netfs/buffered_write.c
··· 181 181 struct folio *folio, *writethrough = NULL; 182 182 enum netfs_how_to_modify howto; 183 183 enum netfs_folio_trace trace; 184 - unsigned int bdp_flags = (iocb->ki_flags & IOCB_SYNC) ? 0: BDP_ASYNC; 184 + unsigned int bdp_flags = (iocb->ki_flags & IOCB_NOWAIT) ? BDP_ASYNC : 0; 185 185 ssize_t written = 0, ret, ret2; 186 186 loff_t i_size, pos = iocb->ki_pos, from, to; 187 187 size_t max_chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER;
+1 -1
fs/netfs/direct_write.c
··· 12 12 static void netfs_cleanup_dio_write(struct netfs_io_request *wreq) 13 13 { 14 14 struct inode *inode = wreq->inode; 15 - unsigned long long end = wreq->start + wreq->len; 15 + unsigned long long end = wreq->start + wreq->transferred; 16 16 17 17 if (!wreq->error && 18 18 i_size_read(inode) < end) {
+5
fs/netfs/objects.c
··· 72 72 } 73 73 } 74 74 75 + atomic_inc(&ctx->io_count); 75 76 trace_netfs_rreq_ref(rreq->debug_id, 1, netfs_rreq_trace_new); 76 77 netfs_proc_add_rreq(rreq); 77 78 netfs_stat(&netfs_n_rh_rreq); ··· 125 124 { 126 125 struct netfs_io_request *rreq = 127 126 container_of(work, struct netfs_io_request, work); 127 + struct netfs_inode *ictx = netfs_inode(rreq->inode); 128 128 unsigned int i; 129 129 130 130 trace_netfs_rreq(rreq, netfs_rreq_trace_free); ··· 144 142 } 145 143 kvfree(rreq->direct_bv); 146 144 } 145 + 146 + if (atomic_dec_and_test(&ictx->io_count)) 147 + wake_up_var(&ictx->io_count); 147 148 call_rcu(&rreq->rcu, netfs_free_request_rcu); 148 149 } 149 150
+4 -3
fs/netfs/write_collect.c
··· 510 510 * stream has a gap that can be jumped. 511 511 */ 512 512 if (notes & SOME_EMPTY) { 513 - unsigned long long jump_to = wreq->start + wreq->len; 513 + unsigned long long jump_to = wreq->start + READ_ONCE(wreq->submitted); 514 514 515 515 for (s = 0; s < NR_IO_STREAMS; s++) { 516 516 stream = &wreq->io_streams[s]; ··· 690 690 wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS); 691 691 692 692 if (wreq->iocb) { 693 - wreq->iocb->ki_pos += wreq->transferred; 693 + size_t written = min(wreq->transferred, wreq->len); 694 + wreq->iocb->ki_pos += written; 694 695 if (wreq->iocb->ki_complete) 695 696 wreq->iocb->ki_complete( 696 - wreq->iocb, wreq->error ? wreq->error : wreq->transferred); 697 + wreq->iocb, wreq->error ? wreq->error : written); 697 698 wreq->iocb = VFS_PTR_POISON; 698 699 } 699 700
+7 -2
fs/netfs/write_issue.c
··· 254 254 stream->construct = NULL; 255 255 256 256 if (subreq->start + subreq->len > wreq->start + wreq->submitted) 257 - wreq->len = wreq->submitted = subreq->start + subreq->len - wreq->start; 257 + WRITE_ONCE(wreq->submitted, subreq->start + subreq->len - wreq->start); 258 258 netfs_do_issue_write(stream, subreq); 259 259 } 260 260 ··· 636 636 637 637 mutex_unlock(&ictx->wb_lock); 638 638 639 - ret = wreq->error; 639 + if (wreq->iocb) { 640 + ret = -EIOCBQUEUED; 641 + } else { 642 + wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS, TASK_UNINTERRUPTIBLE); 643 + ret = wreq->error; 644 + } 640 645 netfs_put_request(wreq, false, netfs_rreq_trace_put_return); 641 646 return ret; 642 647 }
+1 -5
fs/signalfd.c
··· 282 282 if (IS_ERR(file)) { 283 283 put_unused_fd(ufd); 284 284 kfree(ctx); 285 - return ufd; 285 + return PTR_ERR(file); 286 286 } 287 287 file->f_mode |= FMODE_NOWAIT; 288 288 289 - /* 290 - * When we call this, the initialization must be complete, since 291 - * anon_inode_getfd() will install the fd. 292 - */ 293 289 fd_install(ufd, file); 294 290 } else { 295 291 struct fd f = fdget(ufd);
+1
fs/smb/client/cifsfs.c
··· 431 431 static void 432 432 cifs_evict_inode(struct inode *inode) 433 433 { 434 + netfs_wait_for_outstanding_io(inode); 434 435 truncate_inode_pages_final(&inode->i_data); 435 436 if (inode->i_state & I_PINNING_NETFS_WB) 436 437 cifs_fscache_unuse_inode_cookie(inode, true);
+18
include/linux/netfs.h
··· 68 68 loff_t remote_i_size; /* Size of the remote file */ 69 69 loff_t zero_point; /* Size after which we assume there's no data 70 70 * on the server */ 71 + atomic_t io_count; /* Number of outstanding reqs */ 71 72 unsigned long flags; 72 73 #define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */ 73 74 #define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */ ··· 475 474 ctx->remote_i_size = i_size_read(&ctx->inode); 476 475 ctx->zero_point = LLONG_MAX; 477 476 ctx->flags = 0; 477 + atomic_set(&ctx->io_count, 0); 478 478 #if IS_ENABLED(CONFIG_FSCACHE) 479 479 ctx->cache = NULL; 480 480 #endif ··· 517 515 #else 518 516 return NULL; 519 517 #endif 518 + } 519 + 520 + /** 521 + * netfs_wait_for_outstanding_io - Wait for outstanding I/O to complete 522 + * @ctx: The netfs inode to wait on 523 + * 524 + * Wait for outstanding I/O requests of any type to complete. This is intended 525 + * to be called from inode eviction routines. This makes sure that any 526 + * resources held by those requests are cleaned up before we let the inode get 527 + * cleaned up. 528 + */ 529 + static inline void netfs_wait_for_outstanding_io(struct inode *inode) 530 + { 531 + struct netfs_inode *ictx = netfs_inode(inode); 532 + 533 + wait_var_event(&ictx->io_count, atomic_read(&ictx->io_count) == 0); 520 534 } 521 535 522 536 #endif /* _LINUX_NETFS_H */
+21 -13
include/linux/pagemap.h
··· 346 346 m->gfp_mask = mask; 347 347 } 348 348 349 + /* 350 + * There are some parts of the kernel which assume that PMD entries 351 + * are exactly HPAGE_PMD_ORDER. Those should be fixed, but until then, 352 + * limit the maximum allocation order to PMD size. I'm not aware of any 353 + * assumptions about maximum order if THP are disabled, but 8 seems like 354 + * a good order (that's 1MB if you're using 4kB pages) 355 + */ 356 + #ifdef CONFIG_TRANSPARENT_HUGEPAGE 357 + #define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER 358 + #else 359 + #define MAX_PAGECACHE_ORDER 8 360 + #endif 361 + 349 362 /** 350 363 * mapping_set_large_folios() - Indicate the file supports large folios. 351 364 * @mapping: The file. ··· 383 370 { 384 371 return IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && 385 372 test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags); 373 + } 374 + 375 + /* Return the maximum folio size for this pagecache mapping, in bytes. */ 376 + static inline size_t mapping_max_folio_size(struct address_space *mapping) 377 + { 378 + if (mapping_large_folio_support(mapping)) 379 + return PAGE_SIZE << MAX_PAGECACHE_ORDER; 380 + return PAGE_SIZE; 386 381 } 387 382 388 383 static inline int filemap_nr_thps(struct address_space *mapping) ··· 550 529 { 551 530 return folio_detach_private(page_folio(page)); 552 531 } 553 - 554 - /* 555 - * There are some parts of the kernel which assume that PMD entries 556 - * are exactly HPAGE_PMD_ORDER. Those should be fixed, but until then, 557 - * limit the maximum allocation order to PMD size. I'm not aware of any 558 - * assumptions about maximum order if THP are disabled, but 8 seems like 559 - * a good order (that's 1MB if you're using 4kB pages) 560 - */ 561 - #ifdef CONFIG_TRANSPARENT_HUGEPAGE 562 - #define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER 563 - #else 564 - #define MAX_PAGECACHE_ORDER 8 565 - #endif 566 532 567 533 #ifdef CONFIG_NUMA 568 534 struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order);
+1 -1
kernel/power/swap.c
··· 1595 1595 1596 1596 put: 1597 1597 if (error) 1598 - fput(hib_resume_bdev_file); 1598 + bdev_fput(hib_resume_bdev_file); 1599 1599 else 1600 1600 pr_debug("Image signature found, resuming\n"); 1601 1601 } else {