Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm,tmpfs: consider end of file write in shmem_is_huge

Take the end of a file write into consideration when deciding whether or
not to use huge pages for tmpfs files when the tmpfs filesystem is mounted
with huge=within_size

This allows large writes that append to the end of a file to automatically
use large pages.

Doing 4MB sequential writes without fallocate to a 16GB tmpfs file with
fio. The numbers without THP or with huge=always stay the same, but the
performance with huge=within_size now matches that of huge=always.

huge before after
4kB pages 1560 MB/s 1560 MB/s
within_size 1560 MB/s 4720 MB/s
always: 4720 MB/s 4720 MB/s

[akpm@linux-foundation.org: coding-style cleanups]
Link: https://lkml.kernel.org/r/20240903111928.7171e60c@imladris.surriel.com
Signed-off-by: Rik van Riel <riel@surriel.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Tested-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Darrick J. Wong <djwong@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Rik van Riel and committed by
Andrew Morton
e1e4cfd0 e899007a

+42 -39
+3 -3
fs/xfs/scrub/xfile.c
··· 126 126 unsigned int len; 127 127 unsigned int offset; 128 128 129 - if (shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio, 129 + if (shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio, 130 130 SGP_READ) < 0) 131 131 break; 132 132 if (!folio) { ··· 196 196 unsigned int len; 197 197 unsigned int offset; 198 198 199 - if (shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio, 199 + if (shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio, 200 200 SGP_CACHE) < 0) 201 201 break; 202 202 if (filemap_check_wb_err(inode->i_mapping, 0)) { ··· 267 267 i_size_write(inode, pos + len); 268 268 269 269 pflags = memalloc_nofs_save(); 270 - error = shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio, 270 + error = shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio, 271 271 (flags & XFILE_ALLOC) ? SGP_CACHE : SGP_READ); 272 272 memalloc_nofs_restore(pflags); 273 273 if (error)
+1 -1
fs/xfs/xfs_buf_mem.c
··· 149 149 return -ENOMEM; 150 150 } 151 151 152 - error = shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio, SGP_CACHE); 152 + error = shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio, SGP_CACHE); 153 153 if (error) 154 154 return error; 155 155
+4 -4
include/linux/shmem_fs.h
··· 113 113 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 114 114 unsigned long shmem_allowable_huge_orders(struct inode *inode, 115 115 struct vm_area_struct *vma, pgoff_t index, 116 - bool shmem_huge_force); 116 + loff_t write_end, bool shmem_huge_force); 117 117 #else 118 118 static inline unsigned long shmem_allowable_huge_orders(struct inode *inode, 119 119 struct vm_area_struct *vma, pgoff_t index, 120 - bool shmem_huge_force) 120 + loff_t write_end, bool shmem_huge_force) 121 121 { 122 122 return 0; 123 123 } ··· 143 143 SGP_FALLOC, /* like SGP_WRITE, but make existing page Uptodate */ 144 144 }; 145 145 146 - int shmem_get_folio(struct inode *inode, pgoff_t index, struct folio **foliop, 147 - enum sgp_type sgp); 146 + int shmem_get_folio(struct inode *inode, pgoff_t index, loff_t write_end, 147 + struct folio **foliop, enum sgp_type sgp); 148 148 struct folio *shmem_read_folio_gfp(struct address_space *mapping, 149 149 pgoff_t index, gfp_t gfp); 150 150
+1 -1
mm/huge_memory.c
··· 164 164 */ 165 165 if (!in_pf && shmem_file(vma->vm_file)) 166 166 return shmem_allowable_huge_orders(file_inode(vma->vm_file), 167 - vma, vma->vm_pgoff, 167 + vma, vma->vm_pgoff, 0, 168 168 !enforce_sysfs); 169 169 170 170 if (!vma_is_anonymous(vma)) {
+1 -1
mm/khugepaged.c
··· 1870 1870 if (xa_is_value(folio) || !folio_test_uptodate(folio)) { 1871 1871 xas_unlock_irq(&xas); 1872 1872 /* swap in or instantiate fallocated page */ 1873 - if (shmem_get_folio(mapping->host, index, 1873 + if (shmem_get_folio(mapping->host, index, 0, 1874 1874 &folio, SGP_NOALLOC)) { 1875 1875 result = SCAN_FAIL; 1876 1876 goto xa_unlocked;
+31 -28
mm/shmem.c
··· 549 549 static int shmem_huge __read_mostly = SHMEM_HUGE_NEVER; 550 550 551 551 static bool __shmem_huge_global_enabled(struct inode *inode, pgoff_t index, 552 - bool shmem_huge_force, struct vm_area_struct *vma, 552 + loff_t write_end, bool shmem_huge_force, 553 + struct vm_area_struct *vma, 553 554 unsigned long vm_flags) 554 555 { 555 556 struct mm_struct *mm = vma ? vma->vm_mm : NULL; ··· 570 569 return true; 571 570 case SHMEM_HUGE_WITHIN_SIZE: 572 571 index = round_up(index + 1, HPAGE_PMD_NR); 573 - i_size = round_up(i_size_read(inode), PAGE_SIZE); 572 + i_size = max(write_end, i_size_read(inode)); 573 + i_size = round_up(i_size, PAGE_SIZE); 574 574 if (i_size >> PAGE_SHIFT >= index) 575 575 return true; 576 576 fallthrough; ··· 585 583 } 586 584 587 585 static bool shmem_huge_global_enabled(struct inode *inode, pgoff_t index, 588 - bool shmem_huge_force, struct vm_area_struct *vma, 589 - unsigned long vm_flags) 586 + loff_t write_end, bool shmem_huge_force, 587 + struct vm_area_struct *vma, unsigned long vm_flags) 590 588 { 591 589 if (HPAGE_PMD_ORDER > MAX_PAGECACHE_ORDER) 592 590 return false; 593 591 594 - return __shmem_huge_global_enabled(inode, index, shmem_huge_force, 595 - vma, vm_flags); 592 + return __shmem_huge_global_enabled(inode, index, write_end, 593 + shmem_huge_force, vma, vm_flags); 596 594 } 597 595 598 596 #if defined(CONFIG_SYSFS) ··· 772 770 } 773 771 774 772 static bool shmem_huge_global_enabled(struct inode *inode, pgoff_t index, 775 - bool shmem_huge_force, struct vm_area_struct *vma, 776 - unsigned long vm_flags) 773 + loff_t write_end, bool shmem_huge_force, 774 + struct vm_area_struct *vma, unsigned long vm_flags) 777 775 { 778 776 return false; 779 777 } ··· 980 978 * (although in some cases this is just a waste of time). 981 979 */ 982 980 folio = NULL; 983 - shmem_get_folio(inode, index, &folio, SGP_READ); 981 + shmem_get_folio(inode, index, 0, &folio, SGP_READ); 984 982 return folio; 985 983 } 986 984 ··· 1168 1166 STATX_ATTR_NODUMP); 1169 1167 generic_fillattr(idmap, request_mask, inode, stat); 1170 1168 1171 - if (shmem_huge_global_enabled(inode, 0, false, NULL, 0)) 1169 + if (shmem_huge_global_enabled(inode, 0, 0, false, NULL, 0)) 1172 1170 stat->blksize = HPAGE_PMD_SIZE; 1173 1171 1174 1172 if (request_mask & STATX_BTIME) { ··· 1655 1653 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1656 1654 unsigned long shmem_allowable_huge_orders(struct inode *inode, 1657 1655 struct vm_area_struct *vma, pgoff_t index, 1658 - bool shmem_huge_force) 1656 + loff_t write_end, bool shmem_huge_force) 1659 1657 { 1660 1658 unsigned long mask = READ_ONCE(huge_shmem_orders_always); 1661 1659 unsigned long within_size_orders = READ_ONCE(huge_shmem_orders_within_size); ··· 1672 1670 if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_UNSUPPORTED)) 1673 1671 return 0; 1674 1672 1675 - global_huge = shmem_huge_global_enabled(inode, index, shmem_huge_force, 1676 - vma, vm_flags); 1673 + global_huge = shmem_huge_global_enabled(inode, index, write_end, 1674 + shmem_huge_force, vma, vm_flags); 1677 1675 if (!vma || !vma_is_anon_shmem(vma)) { 1678 1676 /* 1679 1677 * For tmpfs, we now only support PMD sized THP if huge page ··· 2233 2231 * vmf and fault_type are only supplied by shmem_fault: otherwise they are NULL. 2234 2232 */ 2235 2233 static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index, 2236 - struct folio **foliop, enum sgp_type sgp, gfp_t gfp, 2237 - struct vm_fault *vmf, vm_fault_t *fault_type) 2234 + loff_t write_end, struct folio **foliop, enum sgp_type sgp, 2235 + gfp_t gfp, struct vm_fault *vmf, vm_fault_t *fault_type) 2238 2236 { 2239 2237 struct vm_area_struct *vma = vmf ? vmf->vma : NULL; 2240 2238 struct mm_struct *fault_mm; ··· 2314 2312 } 2315 2313 2316 2314 /* Find hugepage orders that are allowed for anonymous shmem and tmpfs. */ 2317 - orders = shmem_allowable_huge_orders(inode, vma, index, false); 2315 + orders = shmem_allowable_huge_orders(inode, vma, index, write_end, false); 2318 2316 if (orders > 0) { 2319 2317 gfp_t huge_gfp; 2320 2318 ··· 2415 2413 * shmem_get_folio - find, and lock a shmem folio. 2416 2414 * @inode: inode to search 2417 2415 * @index: the page index. 2416 + * @write_end: end of a write, could extend inode size 2418 2417 * @foliop: pointer to the folio if found 2419 2418 * @sgp: SGP_* flags to control behavior 2420 2419 * ··· 2435 2432 * Context: May sleep. 2436 2433 * Return: 0 if successful, else a negative error code. 2437 2434 */ 2438 - int shmem_get_folio(struct inode *inode, pgoff_t index, struct folio **foliop, 2439 - enum sgp_type sgp) 2435 + int shmem_get_folio(struct inode *inode, pgoff_t index, loff_t write_end, 2436 + struct folio **foliop, enum sgp_type sgp) 2440 2437 { 2441 - return shmem_get_folio_gfp(inode, index, foliop, sgp, 2438 + return shmem_get_folio_gfp(inode, index, write_end, foliop, sgp, 2442 2439 mapping_gfp_mask(inode->i_mapping), NULL, NULL); 2443 2440 } 2444 2441 EXPORT_SYMBOL_GPL(shmem_get_folio); ··· 2533 2530 } 2534 2531 2535 2532 WARN_ON_ONCE(vmf->page != NULL); 2536 - err = shmem_get_folio_gfp(inode, vmf->pgoff, &folio, SGP_CACHE, 2533 + err = shmem_get_folio_gfp(inode, vmf->pgoff, 0, &folio, SGP_CACHE, 2537 2534 gfp, vmf, &ret); 2538 2535 if (err) 2539 2536 return vmf_error(err); ··· 3043 3040 return -EPERM; 3044 3041 } 3045 3042 3046 - ret = shmem_get_folio(inode, index, &folio, SGP_WRITE); 3043 + ret = shmem_get_folio(inode, index, pos + len, &folio, SGP_WRITE); 3047 3044 if (ret) 3048 3045 return ret; 3049 3046 ··· 3114 3111 break; 3115 3112 } 3116 3113 3117 - error = shmem_get_folio(inode, index, &folio, SGP_READ); 3114 + error = shmem_get_folio(inode, index, 0, &folio, SGP_READ); 3118 3115 if (error) { 3119 3116 if (error == -EINVAL) 3120 3117 error = 0; ··· 3290 3287 if (*ppos >= i_size_read(inode)) 3291 3288 break; 3292 3289 3293 - error = shmem_get_folio(inode, *ppos / PAGE_SIZE, &folio, 3290 + error = shmem_get_folio(inode, *ppos / PAGE_SIZE, 0, &folio, 3294 3291 SGP_READ); 3295 3292 if (error) { 3296 3293 if (error == -EINVAL) ··· 3480 3477 else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced) 3481 3478 error = -ENOMEM; 3482 3479 else 3483 - error = shmem_get_folio(inode, index, &folio, 3484 - SGP_FALLOC); 3480 + error = shmem_get_folio(inode, index, offset + len, 3481 + &folio, SGP_FALLOC); 3485 3482 if (error) { 3486 3483 info->fallocend = undo_fallocend; 3487 3484 /* Remove the !uptodate folios we added */ ··· 3832 3829 } else { 3833 3830 inode_nohighmem(inode); 3834 3831 inode->i_mapping->a_ops = &shmem_aops; 3835 - error = shmem_get_folio(inode, 0, &folio, SGP_WRITE); 3832 + error = shmem_get_folio(inode, 0, 0, &folio, SGP_WRITE); 3836 3833 if (error) 3837 3834 goto out_remove_offset; 3838 3835 inode->i_op = &shmem_symlink_inode_operations; ··· 3878 3875 return ERR_PTR(-ECHILD); 3879 3876 } 3880 3877 } else { 3881 - error = shmem_get_folio(inode, 0, &folio, SGP_READ); 3878 + error = shmem_get_folio(inode, 0, 0, &folio, SGP_READ); 3882 3879 if (error) 3883 3880 return ERR_PTR(error); 3884 3881 if (!folio) ··· 5346 5343 struct folio *folio; 5347 5344 int error; 5348 5345 5349 - error = shmem_get_folio_gfp(inode, index, &folio, SGP_CACHE, 5346 + error = shmem_get_folio_gfp(inode, index, 0, &folio, SGP_CACHE, 5350 5347 gfp, NULL, NULL); 5351 5348 if (error) 5352 5349 return ERR_PTR(error);
+1 -1
mm/userfaultfd.c
··· 391 391 struct page *page; 392 392 int ret; 393 393 394 - ret = shmem_get_folio(inode, pgoff, &folio, SGP_NOALLOC); 394 + ret = shmem_get_folio(inode, pgoff, 0, &folio, SGP_NOALLOC); 395 395 /* Our caller expects us to return -EFAULT if we failed to find folio */ 396 396 if (ret == -ENOENT) 397 397 ret = -EFAULT;