Revert "fsnotify: generate pre-content permission event on page fault"

This reverts commit 8392bc2ff8c8bf7c4c5e6dfa71ccd893a3c046f6.

In the use case of buffered write whose input buffer is mmapped file on a
filesystem with a pre-content mark, the prefaulting of the buffer can
happen under the filesystem freeze protection (obtained in vfs_write())
which breaks assumptions of pre-content hook and introduces potential
deadlock of HSM handler in userspace with filesystem freezing.

Now that we have pre-content hooks at file mmap() time, disable the
pre-content event hooks on page fault to avoid the potential deadlock.

Reported-by: syzbot+7229071b47908b19d5b7@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/linux-fsdevel/7ehxrhbvehlrjwvrduoxsao5k3x4aw275patsb3krkwuq573yv@o2hskrfawbnc/
Fixes: 8392bc2ff8c8 ("fsnotify: generate pre-content permission event on page fault")
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20250312073852.2123409-5-amir73il@gmail.com

authored by Amir Goldstein and committed by Jan Kara 955fbe0e 27773ce1

-82
-1
include/linux/mm.h
··· 3420 extern vm_fault_t filemap_map_pages(struct vm_fault *vmf, 3421 pgoff_t start_pgoff, pgoff_t end_pgoff); 3422 extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf); 3423 - extern vm_fault_t filemap_fsnotify_fault(struct vm_fault *vmf); 3424 3425 extern unsigned long stack_guard_gap; 3426 /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
··· 3420 extern vm_fault_t filemap_map_pages(struct vm_fault *vmf, 3421 pgoff_t start_pgoff, pgoff_t end_pgoff); 3422 extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf); 3423 3424 extern unsigned long stack_guard_gap; 3425 /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
-74
mm/filemap.c
··· 47 #include <linux/splice.h> 48 #include <linux/rcupdate_wait.h> 49 #include <linux/sched/mm.h> 50 - #include <linux/fsnotify.h> 51 #include <asm/pgalloc.h> 52 #include <asm/tlbflush.h> 53 #include "internal.h" ··· 3336 } 3337 3338 /** 3339 - * filemap_fsnotify_fault - maybe emit a pre-content event. 3340 - * @vmf: struct vm_fault containing details of the fault. 3341 - * 3342 - * If we have a pre-content watch on this file we will emit an event for this 3343 - * range. If we return anything the fault caller should return immediately, we 3344 - * will return VM_FAULT_RETRY if we had to emit an event, which will trigger the 3345 - * fault again and then the fault handler will run the second time through. 3346 - * 3347 - * Return: a bitwise-OR of %VM_FAULT_ codes, 0 if nothing happened. 3348 - */ 3349 - vm_fault_t filemap_fsnotify_fault(struct vm_fault *vmf) 3350 - { 3351 - struct file *fpin = NULL; 3352 - int mask = (vmf->flags & FAULT_FLAG_WRITE) ? MAY_WRITE : MAY_ACCESS; 3353 - loff_t pos = vmf->pgoff >> PAGE_SHIFT; 3354 - size_t count = PAGE_SIZE; 3355 - int err; 3356 - 3357 - /* 3358 - * We already did this and now we're retrying with everything locked, 3359 - * don't emit the event and continue. 3360 - */ 3361 - if (vmf->flags & FAULT_FLAG_TRIED) 3362 - return 0; 3363 - 3364 - /* No watches, we're done. */ 3365 - if (likely(!FMODE_FSNOTIFY_HSM(vmf->vma->vm_file->f_mode))) 3366 - return 0; 3367 - 3368 - fpin = maybe_unlock_mmap_for_io(vmf, fpin); 3369 - if (!fpin) 3370 - return VM_FAULT_SIGBUS; 3371 - 3372 - err = fsnotify_file_area_perm(fpin, mask, &pos, count); 3373 - fput(fpin); 3374 - if (err) 3375 - return VM_FAULT_SIGBUS; 3376 - return VM_FAULT_RETRY; 3377 - } 3378 - EXPORT_SYMBOL_GPL(filemap_fsnotify_fault); 3379 - 3380 - /** 3381 * filemap_fault - read in file data for page fault handling 3382 * @vmf: struct vm_fault containing details of the fault 3383 * ··· 3438 * or because readahead was otherwise unable to retrieve it. 3439 */ 3440 if (unlikely(!folio_test_uptodate(folio))) { 3441 - /* 3442 - * If this is a precontent file we have can now emit an event to 3443 - * try and populate the folio. 3444 - */ 3445 - if (!(vmf->flags & FAULT_FLAG_TRIED) && 3446 - unlikely(FMODE_FSNOTIFY_HSM(file->f_mode))) { 3447 - loff_t pos = folio_pos(folio); 3448 - size_t count = folio_size(folio); 3449 - 3450 - /* We're NOWAIT, we have to retry. */ 3451 - if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT) { 3452 - folio_unlock(folio); 3453 - goto out_retry; 3454 - } 3455 - 3456 - if (mapping_locked) 3457 - filemap_invalidate_unlock_shared(mapping); 3458 - mapping_locked = false; 3459 - 3460 - folio_unlock(folio); 3461 - fpin = maybe_unlock_mmap_for_io(vmf, fpin); 3462 - if (!fpin) 3463 - goto out_retry; 3464 - 3465 - error = fsnotify_file_area_perm(fpin, MAY_ACCESS, &pos, 3466 - count); 3467 - if (error) 3468 - ret = VM_FAULT_SIGBUS; 3469 - goto out_retry; 3470 - } 3471 - 3472 /* 3473 * If the invalidate lock is not held, the folio was in cache 3474 * and uptodate and now it is not. Strange but possible since we
··· 47 #include <linux/splice.h> 48 #include <linux/rcupdate_wait.h> 49 #include <linux/sched/mm.h> 50 #include <asm/pgalloc.h> 51 #include <asm/tlbflush.h> 52 #include "internal.h" ··· 3337 } 3338 3339 /** 3340 * filemap_fault - read in file data for page fault handling 3341 * @vmf: struct vm_fault containing details of the fault 3342 * ··· 3481 * or because readahead was otherwise unable to retrieve it. 3482 */ 3483 if (unlikely(!folio_test_uptodate(folio))) { 3484 /* 3485 * If the invalidate lock is not held, the folio was in cache 3486 * and uptodate and now it is not. Strange but possible since we
-7
mm/nommu.c
··· 1613 } 1614 EXPORT_SYMBOL(remap_vmalloc_range); 1615 1616 - vm_fault_t filemap_fsnotify_fault(struct vm_fault *vmf) 1617 - { 1618 - BUG(); 1619 - return 0; 1620 - } 1621 - EXPORT_SYMBOL_GPL(filemap_fsnotify_fault); 1622 - 1623 vm_fault_t filemap_fault(struct vm_fault *vmf) 1624 { 1625 BUG();
··· 1613 } 1614 EXPORT_SYMBOL(remap_vmalloc_range); 1615 1616 vm_fault_t filemap_fault(struct vm_fault *vmf) 1617 { 1618 BUG();