Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

fs: replace mmap hook with .mmap_prepare for simple mappings

Since commit c84bf6dd2b83 ("mm: introduce new .mmap_prepare() file
callback"), the f_op->mmap() hook has been deprecated in favour of
f_op->mmap_prepare().

This callback is invoked in the mmap() logic far earlier, so error handling
can be performed more safely without complicated and bug-prone state
unwinding required should an error arise.

This hook also avoids passing a pointer to a not-yet-correctly-established
VMA avoiding any issues with referencing this data structure.

It rather provides a pointer to the new struct vm_area_desc descriptor type
which contains all required state and allows easy setting of required
parameters without any consideration needing to be paid to locking or
reference counts.

Note that nested filesystems like overlayfs are compatible with an
.mmap_prepare() callback since commit bb666b7c2707 ("mm: add mmap_prepare()
compatibility layer for nested file systems").

In this patch we apply this change to file systems with relatively simple
mmap() hook logic - exfat, ceph, f2fs, bcachefs, zonefs, btrfs, ocfs2,
orangefs, nilfs2, romfs, ramfs and aio.

Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Link: https://lore.kernel.org/f528ac4f35b9378931bd800920fee53fc0c5c74d.1750099179.git.lorenzo.stoakes@oracle.com
Acked-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>

authored by

Lorenzo Stoakes and committed by
Christian Brauner
2e3b37a7 9d5403b1

+58 -49
+4 -4
fs/aio.c
··· 392 392 #endif 393 393 }; 394 394 395 - static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma) 395 + static int aio_ring_mmap_prepare(struct vm_area_desc *desc) 396 396 { 397 - vm_flags_set(vma, VM_DONTEXPAND); 398 - vma->vm_ops = &aio_ring_vm_ops; 397 + desc->vm_flags |= VM_DONTEXPAND; 398 + desc->vm_ops = &aio_ring_vm_ops; 399 399 return 0; 400 400 } 401 401 402 402 static const struct file_operations aio_ring_fops = { 403 - .mmap = aio_ring_mmap, 403 + .mmap_prepare = aio_ring_mmap_prepare, 404 404 }; 405 405 406 406 #if IS_ENABLED(CONFIG_MIGRATION)
+4 -4
fs/bcachefs/fs.c
··· 1553 1553 .page_mkwrite = bch2_page_mkwrite, 1554 1554 }; 1555 1555 1556 - static int bch2_mmap(struct file *file, struct vm_area_struct *vma) 1556 + static int bch2_mmap_prepare(struct vm_area_desc *desc) 1557 1557 { 1558 - file_accessed(file); 1558 + file_accessed(desc->file); 1559 1559 1560 - vma->vm_ops = &bch_vm_ops; 1560 + desc->vm_ops = &bch_vm_ops; 1561 1561 return 0; 1562 1562 } 1563 1563 ··· 1740 1740 .llseek = bch2_llseek, 1741 1741 .read_iter = bch2_read_iter, 1742 1742 .write_iter = bch2_write_iter, 1743 - .mmap = bch2_mmap, 1743 + .mmap_prepare = bch2_mmap_prepare, 1744 1744 .get_unmapped_area = thp_get_unmapped_area, 1745 1745 .fsync = bch2_fsync, 1746 1746 .splice_read = filemap_splice_read,
+4 -3
fs/btrfs/file.c
··· 1978 1978 .page_mkwrite = btrfs_page_mkwrite, 1979 1979 }; 1980 1980 1981 - static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) 1981 + static int btrfs_file_mmap_prepare(struct vm_area_desc *desc) 1982 1982 { 1983 + struct file *filp = desc->file; 1983 1984 struct address_space *mapping = filp->f_mapping; 1984 1985 1985 1986 if (!mapping->a_ops->read_folio) 1986 1987 return -ENOEXEC; 1987 1988 1988 1989 file_accessed(filp); 1989 - vma->vm_ops = &btrfs_file_vm_ops; 1990 + desc->vm_ops = &btrfs_file_vm_ops; 1990 1991 1991 1992 return 0; 1992 1993 } ··· 3766 3765 .splice_read = filemap_splice_read, 3767 3766 .write_iter = btrfs_file_write_iter, 3768 3767 .splice_write = iter_file_splice_write, 3769 - .mmap = btrfs_file_mmap, 3768 + .mmap_prepare = btrfs_file_mmap_prepare, 3770 3769 .open = btrfs_file_open, 3771 3770 .release = btrfs_release_file, 3772 3771 .get_unmapped_area = thp_get_unmapped_area,
+3 -3
fs/ceph/addr.c
··· 2330 2330 .page_mkwrite = ceph_page_mkwrite, 2331 2331 }; 2332 2332 2333 - int ceph_mmap(struct file *file, struct vm_area_struct *vma) 2333 + int ceph_mmap_prepare(struct vm_area_desc *desc) 2334 2334 { 2335 - struct address_space *mapping = file->f_mapping; 2335 + struct address_space *mapping = desc->file->f_mapping; 2336 2336 2337 2337 if (!mapping->a_ops->read_folio) 2338 2338 return -ENOEXEC; 2339 - vma->vm_ops = &ceph_vmops; 2339 + desc->vm_ops = &ceph_vmops; 2340 2340 return 0; 2341 2341 } 2342 2342
+1 -1
fs/ceph/file.c
··· 3171 3171 .llseek = ceph_llseek, 3172 3172 .read_iter = ceph_read_iter, 3173 3173 .write_iter = ceph_write_iter, 3174 - .mmap = ceph_mmap, 3174 + .mmap_prepare = ceph_mmap_prepare, 3175 3175 .fsync = ceph_fsync, 3176 3176 .lock = ceph_lock, 3177 3177 .setlease = simple_nosetlease,
+1 -1
fs/ceph/super.h
··· 1286 1286 /* addr.c */ 1287 1287 extern const struct address_space_operations ceph_aops; 1288 1288 extern const struct netfs_request_ops ceph_netfs_ops; 1289 - extern int ceph_mmap(struct file *file, struct vm_area_struct *vma); 1289 + int ceph_mmap_prepare(struct vm_area_desc *desc); 1290 1290 extern int ceph_uninline_data(struct file *file); 1291 1291 extern int ceph_pool_perm_check(struct inode *inode, int need); 1292 1292 extern void ceph_pool_perm_destroy(struct ceph_mds_client* mdsc);
+6 -4
fs/exfat/file.c
··· 683 683 .page_mkwrite = exfat_page_mkwrite, 684 684 }; 685 685 686 - static int exfat_file_mmap(struct file *file, struct vm_area_struct *vma) 686 + static int exfat_file_mmap_prepare(struct vm_area_desc *desc) 687 687 { 688 - if (unlikely(exfat_forced_shutdown(file_inode(file)->i_sb))) 688 + struct file *file = desc->file; 689 + 690 + if (unlikely(exfat_forced_shutdown(file_inode(desc->file)->i_sb))) 689 691 return -EIO; 690 692 691 693 file_accessed(file); 692 - vma->vm_ops = &exfat_file_vm_ops; 694 + desc->vm_ops = &exfat_file_vm_ops; 693 695 return 0; 694 696 } 695 697 ··· 712 710 #ifdef CONFIG_COMPAT 713 711 .compat_ioctl = exfat_compat_ioctl, 714 712 #endif 715 - .mmap = exfat_file_mmap, 713 + .mmap_prepare = exfat_file_mmap_prepare, 716 714 .fsync = exfat_file_fsync, 717 715 .splice_read = exfat_splice_read, 718 716 .splice_write = iter_file_splice_write,
+4 -3
fs/f2fs/file.c
··· 532 532 return -EINVAL; 533 533 } 534 534 535 - static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) 535 + static int f2fs_file_mmap_prepare(struct vm_area_desc *desc) 536 536 { 537 + struct file *file = desc->file; 537 538 struct inode *inode = file_inode(file); 538 539 539 540 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) ··· 544 543 return -EOPNOTSUPP; 545 544 546 545 file_accessed(file); 547 - vma->vm_ops = &f2fs_file_vm_ops; 546 + desc->vm_ops = &f2fs_file_vm_ops; 548 547 549 548 f2fs_down_read(&F2FS_I(inode)->i_sem); 550 549 set_inode_flag(inode, FI_MMAP_FILE); ··· 5377 5376 .iopoll = iocb_bio_iopoll, 5378 5377 .open = f2fs_file_open, 5379 5378 .release = f2fs_release_file, 5380 - .mmap = f2fs_file_mmap, 5379 + .mmap_prepare = f2fs_file_mmap_prepare, 5381 5380 .flush = f2fs_file_flush, 5382 5381 .fsync = f2fs_sync_file, 5383 5382 .fallocate = f2fs_fallocate,
+4 -4
fs/nilfs2/file.c
··· 125 125 .page_mkwrite = nilfs_page_mkwrite, 126 126 }; 127 127 128 - static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma) 128 + static int nilfs_file_mmap_prepare(struct vm_area_desc *desc) 129 129 { 130 - file_accessed(file); 131 - vma->vm_ops = &nilfs_file_vm_ops; 130 + file_accessed(desc->file); 131 + desc->vm_ops = &nilfs_file_vm_ops; 132 132 return 0; 133 133 } 134 134 ··· 144 144 #ifdef CONFIG_COMPAT 145 145 .compat_ioctl = nilfs_compat_ioctl, 146 146 #endif /* CONFIG_COMPAT */ 147 - .mmap = nilfs_file_mmap, 147 + .mmap_prepare = nilfs_file_mmap_prepare, 148 148 .open = generic_file_open, 149 149 /* .release = nilfs_release_file, */ 150 150 .fsync = nilfs_sync_file,
+2 -2
fs/ocfs2/file.c
··· 2800 2800 */ 2801 2801 const struct file_operations ocfs2_fops = { 2802 2802 .llseek = ocfs2_file_llseek, 2803 - .mmap = ocfs2_mmap, 2803 + .mmap_prepare = ocfs2_mmap_prepare, 2804 2804 .fsync = ocfs2_sync_file, 2805 2805 .release = ocfs2_file_release, 2806 2806 .open = ocfs2_file_open, ··· 2850 2850 */ 2851 2851 const struct file_operations ocfs2_fops_no_plocks = { 2852 2852 .llseek = ocfs2_file_llseek, 2853 - .mmap = ocfs2_mmap, 2853 + .mmap_prepare = ocfs2_mmap_prepare, 2854 2854 .fsync = ocfs2_sync_file, 2855 2855 .release = ocfs2_file_release, 2856 2856 .open = ocfs2_file_open,
+3 -2
fs/ocfs2/mmap.c
··· 159 159 .page_mkwrite = ocfs2_page_mkwrite, 160 160 }; 161 161 162 - int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) 162 + int ocfs2_mmap_prepare(struct vm_area_desc *desc) 163 163 { 164 + struct file *file = desc->file; 164 165 int ret = 0, lock_level = 0; 165 166 166 167 ret = ocfs2_inode_lock_atime(file_inode(file), ··· 172 171 } 173 172 ocfs2_inode_unlock(file_inode(file), lock_level); 174 173 out: 175 - vma->vm_ops = &ocfs2_file_vm_ops; 174 + desc->vm_ops = &ocfs2_file_vm_ops; 176 175 return 0; 177 176 } 178 177
+1 -1
fs/ocfs2/mmap.h
··· 2 2 #ifndef OCFS2_MMAP_H 3 3 #define OCFS2_MMAP_H 4 4 5 - int ocfs2_mmap(struct file *file, struct vm_area_struct *vma); 5 + int ocfs2_mmap_prepare(struct vm_area_desc *desc); 6 6 7 7 #endif /* OCFS2_MMAP_H */
+6 -4
fs/orangefs/file.c
··· 398 398 /* 399 399 * Memory map a region of a file. 400 400 */ 401 - static int orangefs_file_mmap(struct file *file, struct vm_area_struct *vma) 401 + static int orangefs_file_mmap_prepare(struct vm_area_desc *desc) 402 402 { 403 + struct file *file = desc->file; 403 404 int ret; 404 405 405 406 ret = orangefs_revalidate_mapping(file_inode(file)); ··· 411 410 "orangefs_file_mmap: called on %pD\n", file); 412 411 413 412 /* set the sequential readahead hint */ 414 - vm_flags_mod(vma, VM_SEQ_READ, VM_RAND_READ); 413 + desc->vm_flags |= VM_SEQ_READ; 414 + desc->vm_flags &= ~VM_RAND_READ; 415 415 416 416 file_accessed(file); 417 - vma->vm_ops = &orangefs_file_vm_ops; 417 + desc->vm_ops = &orangefs_file_vm_ops; 418 418 return 0; 419 419 } 420 420 ··· 576 574 .read_iter = orangefs_file_read_iter, 577 575 .write_iter = orangefs_file_write_iter, 578 576 .lock = orangefs_lock, 579 - .mmap = orangefs_file_mmap, 577 + .mmap_prepare = orangefs_file_mmap_prepare, 580 578 .open = generic_file_open, 581 579 .splice_read = orangefs_file_splice_read, 582 580 .splice_write = iter_file_splice_write,
+6 -6
fs/ramfs/file-nommu.c
··· 28 28 unsigned long len, 29 29 unsigned long pgoff, 30 30 unsigned long flags); 31 - static int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma); 31 + static int ramfs_nommu_mmap_prepare(struct vm_area_desc *desc); 32 32 33 33 static unsigned ramfs_mmap_capabilities(struct file *file) 34 34 { ··· 38 38 39 39 const struct file_operations ramfs_file_operations = { 40 40 .mmap_capabilities = ramfs_mmap_capabilities, 41 - .mmap = ramfs_nommu_mmap, 41 + .mmap_prepare = ramfs_nommu_mmap_prepare, 42 42 .get_unmapped_area = ramfs_nommu_get_unmapped_area, 43 43 .read_iter = generic_file_read_iter, 44 44 .write_iter = generic_file_write_iter, ··· 262 262 /* 263 263 * set up a mapping for shared memory segments 264 264 */ 265 - static int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma) 265 + static int ramfs_nommu_mmap_prepare(struct vm_area_desc *desc) 266 266 { 267 - if (!is_nommu_shared_mapping(vma->vm_flags)) 267 + if (!is_nommu_shared_mapping(desc->vm_flags)) 268 268 return -ENOSYS; 269 269 270 - file_accessed(file); 271 - vma->vm_ops = &generic_file_vm_ops; 270 + file_accessed(desc->file); 271 + desc->vm_ops = &generic_file_vm_ops; 272 272 return 0; 273 273 }
+3 -3
fs/romfs/mmap-nommu.c
··· 61 61 * permit a R/O mapping to be made directly through onto an MTD device if 62 62 * possible 63 63 */ 64 - static int romfs_mmap(struct file *file, struct vm_area_struct *vma) 64 + static int romfs_mmap_prepare(struct vm_area_desc *desc) 65 65 { 66 - return is_nommu_shared_mapping(vma->vm_flags) ? 0 : -ENOSYS; 66 + return is_nommu_shared_mapping(desc->vm_flags) ? 0 : -ENOSYS; 67 67 } 68 68 69 69 static unsigned romfs_mmap_capabilities(struct file *file) ··· 79 79 .llseek = generic_file_llseek, 80 80 .read_iter = generic_file_read_iter, 81 81 .splice_read = filemap_splice_read, 82 - .mmap = romfs_mmap, 82 + .mmap_prepare = romfs_mmap_prepare, 83 83 .get_unmapped_area = romfs_get_unmapped_area, 84 84 .mmap_capabilities = romfs_mmap_capabilities, 85 85 };
+6 -4
fs/zonefs/file.c
··· 312 312 .page_mkwrite = zonefs_filemap_page_mkwrite, 313 313 }; 314 314 315 - static int zonefs_file_mmap(struct file *file, struct vm_area_struct *vma) 315 + static int zonefs_file_mmap_prepare(struct vm_area_desc *desc) 316 316 { 317 + struct file *file = desc->file; 318 + 317 319 /* 318 320 * Conventional zones accept random writes, so their files can support 319 321 * shared writable mappings. For sequential zone files, only read ··· 323 321 * ordering between msync() and page cache writeback. 324 322 */ 325 323 if (zonefs_inode_is_seq(file_inode(file)) && 326 - (vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) 324 + (desc->vm_flags & VM_SHARED) && (desc->vm_flags & VM_MAYWRITE)) 327 325 return -EINVAL; 328 326 329 327 file_accessed(file); 330 - vma->vm_ops = &zonefs_file_vm_ops; 328 + desc->vm_ops = &zonefs_file_vm_ops; 331 329 332 330 return 0; 333 331 } ··· 852 850 .open = zonefs_file_open, 853 851 .release = zonefs_file_release, 854 852 .fsync = zonefs_file_fsync, 855 - .mmap = zonefs_file_mmap, 853 + .mmap_prepare = zonefs_file_mmap_prepare, 856 854 .llseek = zonefs_file_llseek, 857 855 .read_iter = zonefs_file_read_iter, 858 856 .write_iter = zonefs_file_write_iter,