Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm: remove enum page_entry_size

Remove the unnecessary encoding of page order into an enum and pass the
page order directly. That lets us get rid of pe_order().

The switch constructs have to be changed to if/else constructs to prevent
GCC from warning on builds with 3-level page tables where PMD_ORDER and
PUD_ORDER have the same value.

If you are looking at this commit because your driver stopped compiling,
look at the previous commit as well and audit your driver to be sure it
doesn't depend on mmap_lock being held in its ->huge_fault method.

[willy@infradead.org: use "order %u" to match the (non dev_t) style]
Link: https://lkml.kernel.org/r/ZOUYekbtTv+n8hYf@casper.infradead.org
Link: https://lkml.kernel.org/r/20230818202335.2739663-4-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Matthew Wilcox (Oracle) and committed by
Andrew Morton
1d024e7a 40d49a3c

+59 -98
+8 -14
drivers/dax/device.c
··· 228 228 } 229 229 #endif /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ 230 230 231 - static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf, 232 - enum page_entry_size pe_size) 231 + static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf, unsigned int order) 233 232 { 234 233 struct file *filp = vmf->vma->vm_file; 235 234 vm_fault_t rc = VM_FAULT_SIGBUS; 236 235 int id; 237 236 struct dev_dax *dev_dax = filp->private_data; 238 237 239 - dev_dbg(&dev_dax->dev, "%s: %s (%#lx - %#lx) size = %d\n", current->comm, 238 + dev_dbg(&dev_dax->dev, "%s: %s (%#lx - %#lx) order:%d\n", current->comm, 240 239 (vmf->flags & FAULT_FLAG_WRITE) ? "write" : "read", 241 - vmf->vma->vm_start, vmf->vma->vm_end, pe_size); 240 + vmf->vma->vm_start, vmf->vma->vm_end, order); 242 241 243 242 id = dax_read_lock(); 244 - switch (pe_size) { 245 - case PE_SIZE_PTE: 243 + if (order == 0) 246 244 rc = __dev_dax_pte_fault(dev_dax, vmf); 247 - break; 248 - case PE_SIZE_PMD: 245 + else if (order == PMD_ORDER) 249 246 rc = __dev_dax_pmd_fault(dev_dax, vmf); 250 - break; 251 - case PE_SIZE_PUD: 247 + else if (order == PUD_ORDER) 252 248 rc = __dev_dax_pud_fault(dev_dax, vmf); 253 - break; 254 - default: 249 + else 255 250 rc = VM_FAULT_SIGBUS; 256 - } 257 251 258 252 dax_read_unlock(id); 259 253 ··· 256 262 257 263 static vm_fault_t dev_dax_fault(struct vm_fault *vmf) 258 264 { 259 - return dev_dax_huge_fault(vmf, PE_SIZE_PTE); 265 + return dev_dax_huge_fault(vmf, 0); 260 266 } 261 267 262 268 static int dev_dax_may_split(struct vm_area_struct *vma, unsigned long addr)
+8 -22
fs/dax.c
··· 30 30 #define CREATE_TRACE_POINTS 31 31 #include <trace/events/fs_dax.h> 32 32 33 - static inline unsigned int pe_order(enum page_entry_size pe_size) 34 - { 35 - if (pe_size == PE_SIZE_PTE) 36 - return PAGE_SHIFT - PAGE_SHIFT; 37 - if (pe_size == PE_SIZE_PMD) 38 - return PMD_SHIFT - PAGE_SHIFT; 39 - if (pe_size == PE_SIZE_PUD) 40 - return PUD_SHIFT - PAGE_SHIFT; 41 - return ~0; 42 - } 43 - 44 33 /* We choose 4096 entries - same as per-zone page wait tables */ 45 34 #define DAX_WAIT_TABLE_BITS 12 46 35 #define DAX_WAIT_TABLE_ENTRIES (1 << DAX_WAIT_TABLE_BITS) ··· 1894 1905 /** 1895 1906 * dax_iomap_fault - handle a page fault on a DAX file 1896 1907 * @vmf: The description of the fault 1897 - * @pe_size: Size of the page to fault in 1908 + * @order: Order of the page to fault in 1898 1909 * @pfnp: PFN to insert for synchronous faults if fsync is required 1899 1910 * @iomap_errp: Storage for detailed error code in case of error 1900 1911 * @ops: Iomap ops passed from the file system ··· 1904 1915 * has done all the necessary locking for page fault to proceed 1905 1916 * successfully. 1906 1917 */ 1907 - vm_fault_t dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, 1918 + vm_fault_t dax_iomap_fault(struct vm_fault *vmf, unsigned int order, 1908 1919 pfn_t *pfnp, int *iomap_errp, const struct iomap_ops *ops) 1909 1920 { 1910 - switch (pe_size) { 1911 - case PE_SIZE_PTE: 1921 + if (order == 0) 1912 1922 return dax_iomap_pte_fault(vmf, pfnp, iomap_errp, ops); 1913 - case PE_SIZE_PMD: 1923 + else if (order == PMD_ORDER) 1914 1924 return dax_iomap_pmd_fault(vmf, pfnp, ops); 1915 - default: 1925 + else 1916 1926 return VM_FAULT_FALLBACK; 1917 - } 1918 1927 } 1919 1928 EXPORT_SYMBOL_GPL(dax_iomap_fault); 1920 1929 ··· 1963 1976 /** 1964 1977 * dax_finish_sync_fault - finish synchronous page fault 1965 1978 * @vmf: The description of the fault 1966 - * @pe_size: Size of entry to be inserted 1979 + * @order: Order of entry to be inserted 1967 1980 * @pfn: PFN to insert 1968 1981 * 1969 1982 * This function ensures that the file range touched by the page fault is 1970 1983 * stored persistently on the media and handles inserting of appropriate page 1971 1984 * table entry. 1972 1985 */ 1973 - vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, 1974 - enum page_entry_size pe_size, pfn_t pfn) 1986 + vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, unsigned int order, 1987 + pfn_t pfn) 1975 1988 { 1976 1989 int err; 1977 1990 loff_t start = ((loff_t)vmf->pgoff) << PAGE_SHIFT; 1978 - unsigned int order = pe_order(pe_size); 1979 1991 size_t len = PAGE_SIZE << order; 1980 1992 1981 1993 err = vfs_fsync_range(vmf->vma->vm_file, start, start + len - 1, 1);
+3 -3
fs/erofs/data.c
··· 413 413 414 414 #ifdef CONFIG_FS_DAX 415 415 static vm_fault_t erofs_dax_huge_fault(struct vm_fault *vmf, 416 - enum page_entry_size pe_size) 416 + unsigned int order) 417 417 { 418 - return dax_iomap_fault(vmf, pe_size, NULL, NULL, &erofs_iomap_ops); 418 + return dax_iomap_fault(vmf, order, NULL, NULL, &erofs_iomap_ops); 419 419 } 420 420 421 421 static vm_fault_t erofs_dax_fault(struct vm_fault *vmf) 422 422 { 423 - return erofs_dax_huge_fault(vmf, PE_SIZE_PTE); 423 + return erofs_dax_huge_fault(vmf, 0); 424 424 } 425 425 426 426 static const struct vm_operations_struct erofs_dax_vm_ops = {
+1 -1
fs/ext2/file.c
··· 103 103 } 104 104 filemap_invalidate_lock_shared(inode->i_mapping); 105 105 106 - ret = dax_iomap_fault(vmf, PE_SIZE_PTE, NULL, NULL, &ext2_iomap_ops); 106 + ret = dax_iomap_fault(vmf, 0, NULL, NULL, &ext2_iomap_ops); 107 107 108 108 filemap_invalidate_unlock_shared(inode->i_mapping); 109 109 if (write)
+5 -6
fs/ext4/file.c
··· 723 723 } 724 724 725 725 #ifdef CONFIG_FS_DAX 726 - static vm_fault_t ext4_dax_huge_fault(struct vm_fault *vmf, 727 - enum page_entry_size pe_size) 726 + static vm_fault_t ext4_dax_huge_fault(struct vm_fault *vmf, unsigned int order) 728 727 { 729 728 int error = 0; 730 729 vm_fault_t result; ··· 739 740 * read-only. 740 741 * 741 742 * We check for VM_SHARED rather than vmf->cow_page since the latter is 742 - * unset for pe_size != PE_SIZE_PTE (i.e. only in do_cow_fault); for 743 + * unset for order != 0 (i.e. only in do_cow_fault); for 743 744 * other sizes, dax_iomap_fault will handle splitting / fallback so that 744 745 * we eventually come back with a COW page. 745 746 */ ··· 763 764 } else { 764 765 filemap_invalidate_lock_shared(mapping); 765 766 } 766 - result = dax_iomap_fault(vmf, pe_size, &pfn, &error, &ext4_iomap_ops); 767 + result = dax_iomap_fault(vmf, order, &pfn, &error, &ext4_iomap_ops); 767 768 if (write) { 768 769 ext4_journal_stop(handle); 769 770 ··· 772 773 goto retry; 773 774 /* Handling synchronous page fault? */ 774 775 if (result & VM_FAULT_NEEDDSYNC) 775 - result = dax_finish_sync_fault(vmf, pe_size, pfn); 776 + result = dax_finish_sync_fault(vmf, order, pfn); 776 777 filemap_invalidate_unlock_shared(mapping); 777 778 sb_end_pagefault(sb); 778 779 } else { ··· 784 785 785 786 static vm_fault_t ext4_dax_fault(struct vm_fault *vmf) 786 787 { 787 - return ext4_dax_huge_fault(vmf, PE_SIZE_PTE); 788 + return ext4_dax_huge_fault(vmf, 0); 788 789 } 789 790 790 791 static const struct vm_operations_struct ext4_dax_vm_ops = {
+9 -11
fs/fuse/dax.c
··· 784 784 return dax_writeback_mapping_range(mapping, fc->dax->dev, wbc); 785 785 } 786 786 787 - static vm_fault_t __fuse_dax_fault(struct vm_fault *vmf, 788 - enum page_entry_size pe_size, bool write) 787 + static vm_fault_t __fuse_dax_fault(struct vm_fault *vmf, unsigned int order, 788 + bool write) 789 789 { 790 790 vm_fault_t ret; 791 791 struct inode *inode = file_inode(vmf->vma->vm_file); ··· 809 809 * to populate page cache or access memory we are trying to free. 810 810 */ 811 811 filemap_invalidate_lock_shared(inode->i_mapping); 812 - ret = dax_iomap_fault(vmf, pe_size, &pfn, &error, &fuse_iomap_ops); 812 + ret = dax_iomap_fault(vmf, order, &pfn, &error, &fuse_iomap_ops); 813 813 if ((ret & VM_FAULT_ERROR) && error == -EAGAIN) { 814 814 error = 0; 815 815 retry = true; ··· 818 818 } 819 819 820 820 if (ret & VM_FAULT_NEEDDSYNC) 821 - ret = dax_finish_sync_fault(vmf, pe_size, pfn); 821 + ret = dax_finish_sync_fault(vmf, order, pfn); 822 822 filemap_invalidate_unlock_shared(inode->i_mapping); 823 823 824 824 if (write) ··· 829 829 830 830 static vm_fault_t fuse_dax_fault(struct vm_fault *vmf) 831 831 { 832 - return __fuse_dax_fault(vmf, PE_SIZE_PTE, 833 - vmf->flags & FAULT_FLAG_WRITE); 832 + return __fuse_dax_fault(vmf, 0, vmf->flags & FAULT_FLAG_WRITE); 834 833 } 835 834 836 - static vm_fault_t fuse_dax_huge_fault(struct vm_fault *vmf, 837 - enum page_entry_size pe_size) 835 + static vm_fault_t fuse_dax_huge_fault(struct vm_fault *vmf, unsigned int order) 838 836 { 839 - return __fuse_dax_fault(vmf, pe_size, vmf->flags & FAULT_FLAG_WRITE); 837 + return __fuse_dax_fault(vmf, order, vmf->flags & FAULT_FLAG_WRITE); 840 838 } 841 839 842 840 static vm_fault_t fuse_dax_page_mkwrite(struct vm_fault *vmf) 843 841 { 844 - return __fuse_dax_fault(vmf, PE_SIZE_PTE, true); 842 + return __fuse_dax_fault(vmf, 0, true); 845 843 } 846 844 847 845 static vm_fault_t fuse_dax_pfn_mkwrite(struct vm_fault *vmf) 848 846 { 849 - return __fuse_dax_fault(vmf, PE_SIZE_PTE, true); 847 + return __fuse_dax_fault(vmf, 0, true); 850 848 } 851 849 852 850 static const struct vm_operations_struct fuse_dax_vm_ops = {
+12 -12
fs/xfs/xfs_file.c
··· 1287 1287 static inline vm_fault_t 1288 1288 xfs_dax_fault( 1289 1289 struct vm_fault *vmf, 1290 - enum page_entry_size pe_size, 1290 + unsigned int order, 1291 1291 bool write_fault, 1292 1292 pfn_t *pfn) 1293 1293 { 1294 - return dax_iomap_fault(vmf, pe_size, pfn, NULL, 1294 + return dax_iomap_fault(vmf, order, pfn, NULL, 1295 1295 (write_fault && !vmf->cow_page) ? 1296 1296 &xfs_dax_write_iomap_ops : 1297 1297 &xfs_read_iomap_ops); ··· 1300 1300 static inline vm_fault_t 1301 1301 xfs_dax_fault( 1302 1302 struct vm_fault *vmf, 1303 - enum page_entry_size pe_size, 1303 + unsigned int order, 1304 1304 bool write_fault, 1305 1305 pfn_t *pfn) 1306 1306 { ··· 1322 1322 static vm_fault_t 1323 1323 __xfs_filemap_fault( 1324 1324 struct vm_fault *vmf, 1325 - enum page_entry_size pe_size, 1325 + unsigned int order, 1326 1326 bool write_fault) 1327 1327 { 1328 1328 struct inode *inode = file_inode(vmf->vma->vm_file); 1329 1329 struct xfs_inode *ip = XFS_I(inode); 1330 1330 vm_fault_t ret; 1331 1331 1332 - trace_xfs_filemap_fault(ip, pe_size, write_fault); 1332 + trace_xfs_filemap_fault(ip, order, write_fault); 1333 1333 1334 1334 if (write_fault) { 1335 1335 sb_start_pagefault(inode->i_sb); ··· 1340 1340 pfn_t pfn; 1341 1341 1342 1342 xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); 1343 - ret = xfs_dax_fault(vmf, pe_size, write_fault, &pfn); 1343 + ret = xfs_dax_fault(vmf, order, write_fault, &pfn); 1344 1344 if (ret & VM_FAULT_NEEDDSYNC) 1345 - ret = dax_finish_sync_fault(vmf, pe_size, pfn); 1345 + ret = dax_finish_sync_fault(vmf, order, pfn); 1346 1346 xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); 1347 1347 } else { 1348 1348 if (write_fault) { ··· 1373 1373 struct vm_fault *vmf) 1374 1374 { 1375 1375 /* DAX can shortcut the normal fault path on write faults! */ 1376 - return __xfs_filemap_fault(vmf, PE_SIZE_PTE, 1376 + return __xfs_filemap_fault(vmf, 0, 1377 1377 IS_DAX(file_inode(vmf->vma->vm_file)) && 1378 1378 xfs_is_write_fault(vmf)); 1379 1379 } ··· 1381 1381 static vm_fault_t 1382 1382 xfs_filemap_huge_fault( 1383 1383 struct vm_fault *vmf, 1384 - enum page_entry_size pe_size) 1384 + unsigned int order) 1385 1385 { 1386 1386 if (!IS_DAX(file_inode(vmf->vma->vm_file))) 1387 1387 return VM_FAULT_FALLBACK; 1388 1388 1389 1389 /* DAX can shortcut the normal fault path on write faults! */ 1390 - return __xfs_filemap_fault(vmf, pe_size, 1390 + return __xfs_filemap_fault(vmf, order, 1391 1391 xfs_is_write_fault(vmf)); 1392 1392 } 1393 1393 ··· 1395 1395 xfs_filemap_page_mkwrite( 1396 1396 struct vm_fault *vmf) 1397 1397 { 1398 - return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true); 1398 + return __xfs_filemap_fault(vmf, 0, true); 1399 1399 } 1400 1400 1401 1401 /* ··· 1408 1408 struct vm_fault *vmf) 1409 1409 { 1410 1410 1411 - return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true); 1411 + return __xfs_filemap_fault(vmf, 0, true); 1412 1412 } 1413 1413 1414 1414 static const struct vm_operations_struct xfs_file_vm_ops = {
+6 -14
fs/xfs/xfs_trace.h
··· 802 802 * ring buffer. Somehow this was only worth mentioning in the ftrace sample 803 803 * code. 804 804 */ 805 - TRACE_DEFINE_ENUM(PE_SIZE_PTE); 806 - TRACE_DEFINE_ENUM(PE_SIZE_PMD); 807 - TRACE_DEFINE_ENUM(PE_SIZE_PUD); 808 - 809 805 TRACE_DEFINE_ENUM(XFS_REFC_DOMAIN_SHARED); 810 806 TRACE_DEFINE_ENUM(XFS_REFC_DOMAIN_COW); 811 807 812 808 TRACE_EVENT(xfs_filemap_fault, 813 - TP_PROTO(struct xfs_inode *ip, enum page_entry_size pe_size, 814 - bool write_fault), 815 - TP_ARGS(ip, pe_size, write_fault), 809 + TP_PROTO(struct xfs_inode *ip, unsigned int order, bool write_fault), 810 + TP_ARGS(ip, order, write_fault), 816 811 TP_STRUCT__entry( 817 812 __field(dev_t, dev) 818 813 __field(xfs_ino_t, ino) 819 - __field(enum page_entry_size, pe_size) 814 + __field(unsigned int, order) 820 815 __field(bool, write_fault) 821 816 ), 822 817 TP_fast_assign( 823 818 __entry->dev = VFS_I(ip)->i_sb->s_dev; 824 819 __entry->ino = ip->i_ino; 825 - __entry->pe_size = pe_size; 820 + __entry->order = order; 826 821 __entry->write_fault = write_fault; 827 822 ), 828 - TP_printk("dev %d:%d ino 0x%llx %s write_fault %d", 823 + TP_printk("dev %d:%d ino 0x%llx order %u write_fault %d", 829 824 MAJOR(__entry->dev), MINOR(__entry->dev), 830 825 __entry->ino, 831 - __print_symbolic(__entry->pe_size, 832 - { PE_SIZE_PTE, "PTE" }, 833 - { PE_SIZE_PMD, "PMD" }, 834 - { PE_SIZE_PUD, "PUD" }), 826 + __entry->order, 835 827 __entry->write_fault) 836 828 ) 837 829
+2 -2
include/linux/dax.h
··· 241 241 242 242 ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, 243 243 const struct iomap_ops *ops); 244 - vm_fault_t dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, 244 + vm_fault_t dax_iomap_fault(struct vm_fault *vmf, unsigned int order, 245 245 pfn_t *pfnp, int *errp, const struct iomap_ops *ops); 246 246 vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, 247 - enum page_entry_size pe_size, pfn_t pfn); 247 + unsigned int order, pfn_t pfn); 248 248 int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); 249 249 int dax_invalidate_mapping_entry_sync(struct address_space *mapping, 250 250 pgoff_t index);
+1 -9
include/linux/mm.h
··· 532 532 */ 533 533 }; 534 534 535 - /* page entry size for vm->huge_fault() */ 536 - enum page_entry_size { 537 - PE_SIZE_PTE = 0, 538 - PE_SIZE_PMD, 539 - PE_SIZE_PUD, 540 - }; 541 - 542 535 /* 543 536 * These are the virtual MM functions - opening of an area, closing and 544 537 * unmapping it (needed to keep files on disk up-to-date etc), pointer ··· 555 562 int (*mprotect)(struct vm_area_struct *vma, unsigned long start, 556 563 unsigned long end, unsigned long newflags); 557 564 vm_fault_t (*fault)(struct vm_fault *vmf); 558 - vm_fault_t (*huge_fault)(struct vm_fault *vmf, 559 - enum page_entry_size pe_size); 565 + vm_fault_t (*huge_fault)(struct vm_fault *vmf, unsigned int order); 560 566 vm_fault_t (*map_pages)(struct vm_fault *vmf, 561 567 pgoff_t start_pgoff, pgoff_t end_pgoff); 562 568 unsigned long (*pagesize)(struct vm_area_struct * area);
+4 -4
mm/memory.c
··· 4855 4855 if (vma_is_anonymous(vma)) 4856 4856 return do_huge_pmd_anonymous_page(vmf); 4857 4857 if (vma->vm_ops->huge_fault) 4858 - return vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD); 4858 + return vma->vm_ops->huge_fault(vmf, PMD_ORDER); 4859 4859 return VM_FAULT_FALLBACK; 4860 4860 } 4861 4861 ··· 4875 4875 4876 4876 if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) { 4877 4877 if (vma->vm_ops->huge_fault) { 4878 - ret = vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD); 4878 + ret = vma->vm_ops->huge_fault(vmf, PMD_ORDER); 4879 4879 if (!(ret & VM_FAULT_FALLBACK)) 4880 4880 return ret; 4881 4881 } ··· 4896 4896 if (vma_is_anonymous(vma)) 4897 4897 return VM_FAULT_FALLBACK; 4898 4898 if (vma->vm_ops->huge_fault) 4899 - return vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD); 4899 + return vma->vm_ops->huge_fault(vmf, PUD_ORDER); 4900 4900 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 4901 4901 return VM_FAULT_FALLBACK; 4902 4902 } ··· 4913 4913 goto split; 4914 4914 if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) { 4915 4915 if (vma->vm_ops->huge_fault) { 4916 - ret = vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD); 4916 + ret = vma->vm_ops->huge_fault(vmf, PUD_ORDER); 4917 4917 if (!(ret & VM_FAULT_FALLBACK)) 4918 4918 return ret; 4919 4919 }