Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'vfs-6.12-rc5.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs fixes from Christian Brauner:
"afs:
- Fix a lock recursion in afs_wake_up_async_call() on ->notify_lock

netfs:
- Drop the references to a folio immediately after the folio has been
extracted to prevent races with future I/O collection

- Fix a documenation build error

- Downgrade the i_rwsem for buffered writes to fix a cifs reported
performance regression when switching to netfslib

vfs:
- Explicitly return -E2BIG from openat2() if the specified size is
unexpectedly large. This aligns openat2() with other extensible
struct based system calls

- When copying a mount namespace ensure that we only try to remove
the new copy from the mount namespace rbtree if it has already been
added to it

nilfs:
- Clear the buffer delay flag when clearing the buffer state clags
when a buffer head is discarded to prevent a kernel OOPs

ocfs2:
- Fix an unitialized value warning in ocfs2_setattr()

proc:
- Fix a kernel doc warning"

* tag 'vfs-6.12-rc5.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
proc: Fix W=1 build kernel-doc warning
afs: Fix lock recursion
fs: Fix uninitialized value issue in from_kuid and from_kgid
fs: don't try and remove empty rbtree node
netfs: Downgrade i_rwsem for a buffered write
nilfs2: fix kernel bug due to missing clearing of buffer delay flag
openat2: explicitly return -E2BIG for (usize > PAGE_SIZE)
netfs: fix documentation build error
netfs: In readahead, put the folio refs as soon extracted

+95 -67
-1
Documentation/filesystems/netfs_library.rst
··· 592 592 593 593 .. kernel-doc:: include/linux/netfs.h 594 594 .. kernel-doc:: fs/netfs/buffered_read.c 595 - .. kernel-doc:: fs/netfs/io.c
+2
fs/afs/internal.h
··· 130 130 wait_queue_head_t waitq; /* processes awaiting completion */ 131 131 struct work_struct async_work; /* async I/O processor */ 132 132 struct work_struct work; /* actual work processor */ 133 + struct work_struct free_work; /* Deferred free processor */ 133 134 struct rxrpc_call *rxcall; /* RxRPC call handle */ 134 135 struct rxrpc_peer *peer; /* Remote endpoint */ 135 136 struct key *key; /* security for this call */ ··· 1332 1331 extern void __net_exit afs_close_socket(struct afs_net *); 1333 1332 extern void afs_charge_preallocation(struct work_struct *); 1334 1333 extern void afs_put_call(struct afs_call *); 1334 + void afs_deferred_put_call(struct afs_call *call); 1335 1335 void afs_make_call(struct afs_call *call, gfp_t gfp); 1336 1336 void afs_wait_for_call_to_complete(struct afs_call *call); 1337 1337 extern struct afs_call *afs_alloc_flat_call(struct afs_net *,
+59 -24
fs/afs/rxrpc.c
··· 18 18 19 19 struct workqueue_struct *afs_async_calls; 20 20 21 + static void afs_deferred_free_worker(struct work_struct *work); 21 22 static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long); 22 23 static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long); 23 24 static void afs_process_async_call(struct work_struct *); ··· 150 149 call->debug_id = atomic_inc_return(&rxrpc_debug_id); 151 150 refcount_set(&call->ref, 1); 152 151 INIT_WORK(&call->async_work, afs_process_async_call); 152 + INIT_WORK(&call->free_work, afs_deferred_free_worker); 153 153 init_waitqueue_head(&call->waitq); 154 154 spin_lock_init(&call->state_lock); 155 155 call->iter = &call->def_iter; ··· 159 157 trace_afs_call(call->debug_id, afs_call_trace_alloc, 1, o, 160 158 __builtin_return_address(0)); 161 159 return call; 160 + } 161 + 162 + static void afs_free_call(struct afs_call *call) 163 + { 164 + struct afs_net *net = call->net; 165 + int o; 166 + 167 + ASSERT(!work_pending(&call->async_work)); 168 + 169 + rxrpc_kernel_put_peer(call->peer); 170 + 171 + if (call->rxcall) { 172 + rxrpc_kernel_shutdown_call(net->socket, call->rxcall); 173 + rxrpc_kernel_put_call(net->socket, call->rxcall); 174 + call->rxcall = NULL; 175 + } 176 + if (call->type->destructor) 177 + call->type->destructor(call); 178 + 179 + afs_unuse_server_notime(call->net, call->server, afs_server_trace_put_call); 180 + kfree(call->request); 181 + 182 + o = atomic_read(&net->nr_outstanding_calls); 183 + trace_afs_call(call->debug_id, afs_call_trace_free, 0, o, 184 + __builtin_return_address(0)); 185 + kfree(call); 186 + 187 + o = atomic_dec_return(&net->nr_outstanding_calls); 188 + if (o == 0) 189 + wake_up_var(&net->nr_outstanding_calls); 162 190 } 163 191 164 192 /* ··· 205 173 o = atomic_read(&net->nr_outstanding_calls); 206 174 trace_afs_call(debug_id, afs_call_trace_put, r - 1, o, 207 175 __builtin_return_address(0)); 176 + if (zero) 177 + afs_free_call(call); 178 + } 208 179 209 - if (zero) { 210 - ASSERT(!work_pending(&call->async_work)); 211 - ASSERT(call->type->name != NULL); 180 + static void afs_deferred_free_worker(struct work_struct *work) 181 + { 182 + struct afs_call *call = container_of(work, struct afs_call, free_work); 212 183 213 - rxrpc_kernel_put_peer(call->peer); 184 + afs_free_call(call); 185 + } 214 186 215 - if (call->rxcall) { 216 - rxrpc_kernel_shutdown_call(net->socket, call->rxcall); 217 - rxrpc_kernel_put_call(net->socket, call->rxcall); 218 - call->rxcall = NULL; 219 - } 220 - if (call->type->destructor) 221 - call->type->destructor(call); 187 + /* 188 + * Dispose of a reference on a call, deferring the cleanup to a workqueue 189 + * to avoid lock recursion. 190 + */ 191 + void afs_deferred_put_call(struct afs_call *call) 192 + { 193 + struct afs_net *net = call->net; 194 + unsigned int debug_id = call->debug_id; 195 + bool zero; 196 + int r, o; 222 197 223 - afs_unuse_server_notime(call->net, call->server, afs_server_trace_put_call); 224 - kfree(call->request); 225 - 226 - trace_afs_call(call->debug_id, afs_call_trace_free, 0, o, 227 - __builtin_return_address(0)); 228 - kfree(call); 229 - 230 - o = atomic_dec_return(&net->nr_outstanding_calls); 231 - if (o == 0) 232 - wake_up_var(&net->nr_outstanding_calls); 233 - } 198 + zero = __refcount_dec_and_test(&call->ref, &r); 199 + o = atomic_read(&net->nr_outstanding_calls); 200 + trace_afs_call(debug_id, afs_call_trace_put, r - 1, o, 201 + __builtin_return_address(0)); 202 + if (zero) 203 + schedule_work(&call->free_work); 234 204 } 235 205 236 206 static struct afs_call *afs_get_call(struct afs_call *call, ··· 674 640 } 675 641 676 642 /* 677 - * wake up an asynchronous call 643 + * Wake up an asynchronous call. The caller is holding the call notify 644 + * spinlock around this, so we can't call afs_put_call(). 678 645 */ 679 646 static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall, 680 647 unsigned long call_user_ID) ··· 692 657 __builtin_return_address(0)); 693 658 694 659 if (!queue_work(afs_async_calls, &call->async_work)) 695 - afs_put_call(call); 660 + afs_deferred_put_call(call); 696 661 } 697 662 } 698 663
+3 -1
fs/namespace.c
··· 3944 3944 new = copy_tree(old, old->mnt.mnt_root, copy_flags); 3945 3945 if (IS_ERR(new)) { 3946 3946 namespace_unlock(); 3947 - free_mnt_ns(new_ns); 3947 + ns_free_inum(&new_ns->ns); 3948 + dec_mnt_namespaces(new_ns->ucounts); 3949 + mnt_ns_release(new_ns); 3948 3950 return ERR_CAST(new); 3949 3951 } 3950 3952 if (user_ns != ns->user_ns) {
+14 -33
fs/netfs/buffered_read.c
··· 67 67 * Decant the list of folios to read into a rolling buffer. 68 68 */ 69 69 static size_t netfs_load_buffer_from_ra(struct netfs_io_request *rreq, 70 - struct folio_queue *folioq) 70 + struct folio_queue *folioq, 71 + struct folio_batch *put_batch) 71 72 { 72 73 unsigned int order, nr; 73 74 size_t size = 0; ··· 83 82 order = folio_order(folio); 84 83 folioq->orders[i] = order; 85 84 size += PAGE_SIZE << order; 85 + 86 + if (!folio_batch_add(put_batch, folio)) 87 + folio_batch_release(put_batch); 86 88 } 87 89 88 90 for (int i = nr; i < folioq_nr_slots(folioq); i++) ··· 124 120 * that we will need to release later - but we don't want to do 125 121 * that until after we've started the I/O. 126 122 */ 123 + struct folio_batch put_batch; 124 + 125 + folio_batch_init(&put_batch); 127 126 while (rreq->submitted < subreq->start + rsize) { 128 127 struct folio_queue *tail = rreq->buffer_tail, *new; 129 128 size_t added; ··· 139 132 new->prev = tail; 140 133 tail->next = new; 141 134 rreq->buffer_tail = new; 142 - added = netfs_load_buffer_from_ra(rreq, new); 135 + added = netfs_load_buffer_from_ra(rreq, new, &put_batch); 143 136 rreq->iter.count += added; 144 137 rreq->submitted += added; 145 138 } 139 + folio_batch_release(&put_batch); 146 140 } 147 141 148 142 subreq->len = rsize; ··· 356 348 static int netfs_prime_buffer(struct netfs_io_request *rreq) 357 349 { 358 350 struct folio_queue *folioq; 351 + struct folio_batch put_batch; 359 352 size_t added; 360 353 361 354 folioq = kmalloc(sizeof(*folioq), GFP_KERNEL); ··· 369 360 rreq->submitted = rreq->start; 370 361 iov_iter_folio_queue(&rreq->iter, ITER_DEST, folioq, 0, 0, 0); 371 362 372 - added = netfs_load_buffer_from_ra(rreq, folioq); 363 + folio_batch_init(&put_batch); 364 + added = netfs_load_buffer_from_ra(rreq, folioq, &put_batch); 365 + folio_batch_release(&put_batch); 373 366 rreq->iter.count += added; 374 367 rreq->submitted += added; 375 368 return 0; 376 - } 377 - 378 - /* 379 - * Drop the ref on each folio that we inherited from the VM readahead code. We 380 - * still have the folio locks to pin the page until we complete the I/O. 381 - * 382 - * Note that we can't just release the batch in each queue struct as we use the 383 - * occupancy count in other places. 384 - */ 385 - static void netfs_put_ra_refs(struct folio_queue *folioq) 386 - { 387 - struct folio_batch fbatch; 388 - 389 - folio_batch_init(&fbatch); 390 - while (folioq) { 391 - for (unsigned int slot = 0; slot < folioq_count(folioq); slot++) { 392 - struct folio *folio = folioq_folio(folioq, slot); 393 - if (!folio) 394 - continue; 395 - trace_netfs_folio(folio, netfs_folio_trace_read_put); 396 - if (!folio_batch_add(&fbatch, folio)) 397 - folio_batch_release(&fbatch); 398 - } 399 - folioq = folioq->next; 400 - } 401 - 402 - folio_batch_release(&fbatch); 403 369 } 404 370 405 371 /** ··· 419 435 if (netfs_prime_buffer(rreq) < 0) 420 436 goto cleanup_free; 421 437 netfs_read_to_pagecache(rreq); 422 - 423 - /* Release the folio refs whilst we're waiting for the I/O. */ 424 - netfs_put_ra_refs(rreq->buffer); 425 438 426 439 netfs_put_request(rreq, true, netfs_rreq_trace_put_return); 427 440 return;
+2 -1
fs/netfs/locking.c
··· 109 109 up_write(&inode->i_rwsem); 110 110 return -ERESTARTSYS; 111 111 } 112 + downgrade_write(&inode->i_rwsem); 112 113 return 0; 113 114 } 114 115 EXPORT_SYMBOL(netfs_start_io_write); ··· 124 123 void netfs_end_io_write(struct inode *inode) 125 124 __releases(inode->i_rwsem) 126 125 { 127 - up_write(&inode->i_rwsem); 126 + up_read(&inode->i_rwsem); 128 127 } 129 128 EXPORT_SYMBOL(netfs_end_io_write); 130 129
+2
fs/netfs/read_collect.c
··· 77 77 folio_unlock(folio); 78 78 } 79 79 } 80 + 81 + folioq_clear(folioq, slot); 80 82 } 81 83 82 84 /*
+4 -2
fs/nilfs2/page.c
··· 77 77 const unsigned long clear_bits = 78 78 (BIT(BH_Uptodate) | BIT(BH_Dirty) | BIT(BH_Mapped) | 79 79 BIT(BH_Async_Write) | BIT(BH_NILFS_Volatile) | 80 - BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected)); 80 + BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected) | 81 + BIT(BH_Delay)); 81 82 82 83 lock_buffer(bh); 83 84 set_mask_bits(&bh->b_state, clear_bits, 0); ··· 407 406 const unsigned long clear_bits = 408 407 (BIT(BH_Uptodate) | BIT(BH_Dirty) | BIT(BH_Mapped) | 409 408 BIT(BH_Async_Write) | BIT(BH_NILFS_Volatile) | 410 - BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected)); 409 + BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected) | 410 + BIT(BH_Delay)); 411 411 412 412 bh = head; 413 413 do {
+6 -3
fs/ocfs2/file.c
··· 1129 1129 trace_ocfs2_setattr(inode, dentry, 1130 1130 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1131 1131 dentry->d_name.len, dentry->d_name.name, 1132 - attr->ia_valid, attr->ia_mode, 1133 - from_kuid(&init_user_ns, attr->ia_uid), 1134 - from_kgid(&init_user_ns, attr->ia_gid)); 1132 + attr->ia_valid, 1133 + attr->ia_valid & ATTR_MODE ? attr->ia_mode : 0, 1134 + attr->ia_valid & ATTR_UID ? 1135 + from_kuid(&init_user_ns, attr->ia_uid) : 0, 1136 + attr->ia_valid & ATTR_GID ? 1137 + from_kgid(&init_user_ns, attr->ia_gid) : 0); 1135 1138 1136 1139 /* ensuring we don't even attempt to truncate a symlink */ 1137 1140 if (S_ISLNK(inode->i_mode))
+2
fs/open.c
··· 1457 1457 1458 1458 if (unlikely(usize < OPEN_HOW_SIZE_VER0)) 1459 1459 return -EINVAL; 1460 + if (unlikely(usize > PAGE_SIZE)) 1461 + return -E2BIG; 1460 1462 1461 1463 err = copy_struct_from_user(&tmp, sizeof(tmp), how, usize); 1462 1464 if (err)
+1 -1
fs/proc/fd.c
··· 77 77 return single_open(file, seq_show, inode); 78 78 } 79 79 80 - /** 80 + /* 81 81 * Shared /proc/pid/fdinfo and /proc/pid/fdinfo/fd permission helper to ensure 82 82 * that the current task has PTRACE_MODE_READ in addition to the normal 83 83 * POSIX-like checks.
-1
include/trace/events/netfs.h
··· 172 172 EM(netfs_folio_trace_read, "read") \ 173 173 EM(netfs_folio_trace_read_done, "read-done") \ 174 174 EM(netfs_folio_trace_read_gaps, "read-gaps") \ 175 - EM(netfs_folio_trace_read_put, "read-put") \ 176 175 EM(netfs_folio_trace_read_unlock, "read-unlock") \ 177 176 EM(netfs_folio_trace_redirtied, "redirtied") \ 178 177 EM(netfs_folio_trace_store, "store") \