Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'vfs-6.19-rc1.folio' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull folio updates from Christian Brauner:
"Add a new folio_next_pos() helper function that returns the file
position of the first byte after the current folio. This is a common
operation in filesystems when needing to know the end of the current
folio.

The helper is lifted from btrfs which already had its own version, and
is now used across multiple filesystems and subsystems:
- btrfs
- buffer
- ext4
- f2fs
- gfs2
- iomap
- netfs
- xfs
- mm

This fixes a long-standing bug in ocfs2 on 32-bit systems with files
larger than 2GiB. Presumably this is not a common configuration, but
the fix is backported anyway. The other filesystems did not have bugs,
they were just mildly inefficient.

This also introduce uoff_t as the unsigned version of loff_t. A recent
commit inadvertently changed a comparison from being unsigned (on
64-bit systems) to being signed (which it had always been on 32-bit
systems), leading to sporadic fstests failures.

Generally file sizes are restricted to being a signed integer, but in
places where -1 is passed to indicate "up to the end of the file", it
is convenient to have an unsigned type to ensure comparisons are
always unsigned regardless of architecture"

* tag 'vfs-6.19-rc1.folio' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
fs: Add uoff_t
mm: Use folio_next_pos()
xfs: Use folio_next_pos()
netfs: Use folio_next_pos()
iomap: Use folio_next_pos()
gfs2: Use folio_next_pos()
f2fs: Use folio_next_pos()
ext4: Use folio_next_pos()
buffer: Use folio_next_pos()
btrfs: Use folio_next_pos()
filemap: Add folio_next_pos()

+70 -61
+2 -2
fs/btrfs/compression.h
··· 85 85 { 86 86 /* @cur must be inside the folio. */ 87 87 ASSERT(folio_pos(folio) <= cur); 88 - ASSERT(cur < folio_end(folio)); 89 - return min(range_end, folio_end(folio)) - cur; 88 + ASSERT(cur < folio_next_pos(folio)); 89 + return umin(range_end, folio_next_pos(folio)) - cur; 90 90 } 91 91 92 92 int btrfs_alloc_compress_wsm(struct btrfs_fs_info *fs_info);
+4 -3
fs/btrfs/defrag.c
··· 886 886 } 887 887 888 888 lock_start = folio_pos(folio); 889 - lock_end = folio_end(folio) - 1; 889 + lock_end = folio_next_pos(folio) - 1; 890 890 /* Wait for any existing ordered extent in the range */ 891 891 while (1) { 892 892 struct btrfs_ordered_extent *ordered; ··· 1178 1178 1179 1179 if (!folio) 1180 1180 break; 1181 - if (start >= folio_end(folio) || start + len <= folio_pos(folio)) 1181 + if (start >= folio_next_pos(folio) || 1182 + start + len <= folio_pos(folio)) 1182 1183 continue; 1183 1184 btrfs_folio_clamp_clear_checked(fs_info, folio, start, len); 1184 1185 btrfs_folio_clamp_set_dirty(fs_info, folio, start, len); ··· 1220 1219 folios[i] = NULL; 1221 1220 goto free_folios; 1222 1221 } 1223 - cur = folio_end(folios[i]); 1222 + cur = folio_next_pos(folios[i]); 1224 1223 } 1225 1224 for (int i = 0; i < nr_pages; i++) { 1226 1225 if (!folios[i])
+8 -8
fs/btrfs/extent_io.c
··· 333 333 goto out; 334 334 } 335 335 range_start = max_t(u64, folio_pos(folio), start); 336 - range_len = min_t(u64, folio_end(folio), end + 1) - range_start; 336 + range_len = min_t(u64, folio_next_pos(folio), end + 1) - range_start; 337 337 btrfs_folio_set_lock(fs_info, folio, range_start, range_len); 338 338 339 339 processed_end = range_start + range_len - 1; ··· 387 387 ASSERT(orig_end > orig_start); 388 388 389 389 /* The range should at least cover part of the folio */ 390 - ASSERT(!(orig_start >= folio_end(locked_folio) || 390 + ASSERT(!(orig_start >= folio_next_pos(locked_folio) || 391 391 orig_end <= folio_pos(locked_folio))); 392 392 again: 393 393 /* step one, find a bunch of delalloc bytes starting at start */ ··· 493 493 struct btrfs_fs_info *fs_info = folio_to_fs_info(folio); 494 494 495 495 ASSERT(folio_pos(folio) <= start && 496 - start + len <= folio_end(folio)); 496 + start + len <= folio_next_pos(folio)); 497 497 498 498 if (uptodate && btrfs_verify_folio(folio, start, len)) 499 499 btrfs_folio_set_uptodate(fs_info, folio, start, len); ··· 1201 1201 * finished our folio read and unlocked the folio. 1202 1202 */ 1203 1203 if (btrfs_folio_test_dirty(fs_info, folio, cur, blocksize)) { 1204 - u64 range_len = min(folio_end(folio), 1204 + u64 range_len = umin(folio_next_pos(folio), 1205 1205 ordered->file_offset + ordered->num_bytes) - cur; 1206 1206 1207 1207 ret = true; ··· 1223 1223 * So we return true and update @next_ret to the OE/folio boundary. 1224 1224 */ 1225 1225 if (btrfs_folio_test_uptodate(fs_info, folio, cur, blocksize)) { 1226 - u64 range_len = min(folio_end(folio), 1226 + u64 range_len = umin(folio_next_pos(folio), 1227 1227 ordered->file_offset + ordered->num_bytes) - cur; 1228 1228 1229 1229 /* ··· 2215 2215 for (int i = 0; i < num_extent_folios(eb); i++) { 2216 2216 struct folio *folio = eb->folios[i]; 2217 2217 u64 range_start = max_t(u64, eb->start, folio_pos(folio)); 2218 - u32 range_len = min_t(u64, folio_end(folio), 2218 + u32 range_len = min_t(u64, folio_next_pos(folio), 2219 2219 eb->start + eb->len) - range_start; 2220 2220 2221 2221 folio_lock(folio); ··· 2624 2624 continue; 2625 2625 } 2626 2626 2627 - cur_end = min_t(u64, folio_end(folio) - 1, end); 2627 + cur_end = min_t(u64, folio_next_pos(folio) - 1, end); 2628 2628 cur_len = cur_end + 1 - cur; 2629 2629 2630 2630 ASSERT(folio_test_locked(folio)); ··· 3865 3865 for (int i = 0; i < num_extent_folios(eb); i++) { 3866 3866 struct folio *folio = eb->folios[i]; 3867 3867 u64 range_start = max_t(u64, eb->start, folio_pos(folio)); 3868 - u32 range_len = min_t(u64, folio_end(folio), 3868 + u32 range_len = min_t(u64, folio_next_pos(folio), 3869 3869 eb->start + eb->len) - range_start; 3870 3870 3871 3871 bio_add_folio_nofail(&bbio->bio, folio, range_len,
+5 -4
fs/btrfs/file.c
··· 89 89 num_bytes = round_up(write_bytes + pos - start_pos, 90 90 fs_info->sectorsize); 91 91 ASSERT(num_bytes <= U32_MAX); 92 - ASSERT(folio_pos(folio) <= pos && folio_end(folio) >= pos + write_bytes); 92 + ASSERT(folio_pos(folio) <= pos && 93 + folio_next_pos(folio) >= pos + write_bytes); 93 94 94 95 end_of_last_block = start_pos + num_bytes - 1; 95 96 ··· 800 799 u64 len) 801 800 { 802 801 u64 clamp_start = max_t(u64, pos, folio_pos(folio)); 803 - u64 clamp_end = min_t(u64, pos + len, folio_end(folio)); 802 + u64 clamp_end = min_t(u64, pos + len, folio_next_pos(folio)); 804 803 const u32 blocksize = inode_to_fs_info(inode)->sectorsize; 805 804 int ret = 0; 806 805 ··· 1255 1254 * The reserved range goes beyond the current folio, shrink the reserved 1256 1255 * space to the folio boundary. 1257 1256 */ 1258 - if (reserved_start + reserved_len > folio_end(folio)) { 1259 - const u64 last_block = folio_end(folio); 1257 + if (reserved_start + reserved_len > folio_next_pos(folio)) { 1258 + const u64 last_block = folio_next_pos(folio); 1260 1259 1261 1260 shrink_reserved_space(inode, *data_reserved, reserved_start, 1262 1261 reserved_len, last_block - reserved_start,
+6 -5
fs/btrfs/inode.c
··· 411 411 continue; 412 412 } 413 413 414 - index = folio_end(folio) >> PAGE_SHIFT; 414 + index = folio_next_index(folio); 415 415 /* 416 416 * Here we just clear all Ordered bits for every page in the 417 417 * range, then btrfs_mark_ordered_io_finished() will handle ··· 2338 2338 * The range must cover part of the @locked_folio, or a return of 1 2339 2339 * can confuse the caller. 2340 2340 */ 2341 - ASSERT(!(end <= folio_pos(locked_folio) || start >= folio_end(locked_folio))); 2341 + ASSERT(!(end <= folio_pos(locked_folio) || 2342 + start >= folio_next_pos(locked_folio))); 2342 2343 2343 2344 if (should_nocow(inode, start, end)) { 2344 2345 ret = run_delalloc_nocow(inode, locked_folio, start, end); ··· 2746 2745 struct btrfs_inode *inode = fixup->inode; 2747 2746 struct btrfs_fs_info *fs_info = inode->root->fs_info; 2748 2747 u64 page_start = folio_pos(folio); 2749 - u64 page_end = folio_end(folio) - 1; 2748 + u64 page_end = folio_next_pos(folio) - 1; 2750 2749 int ret = 0; 2751 2750 bool free_delalloc_space = true; 2752 2751 ··· 4858 4857 */ 4859 4858 4860 4859 zero_start = max_t(u64, folio_pos(folio), start); 4861 - zero_end = folio_end(folio); 4860 + zero_end = folio_next_pos(folio); 4862 4861 folio_zero_range(folio, zero_start - folio_pos(folio), 4863 4862 zero_end - zero_start); 4864 4863 ··· 5041 5040 * not reach disk, it still affects our page caches. 5042 5041 */ 5043 5042 zero_start = max_t(u64, folio_pos(folio), start); 5044 - zero_end = min_t(u64, folio_end(folio) - 1, end); 5043 + zero_end = min_t(u64, folio_next_pos(folio) - 1, end); 5045 5044 } else { 5046 5045 zero_start = max_t(u64, block_start, start); 5047 5046 zero_end = min_t(u64, block_end, end);
-5
fs/btrfs/misc.h
··· 209 209 return (found_set == start + nbits); 210 210 } 211 211 212 - static inline u64 folio_end(struct folio *folio) 213 - { 214 - return folio_pos(folio) + folio_size(folio); 215 - } 216 - 217 212 #endif
+1 -1
fs/btrfs/ordered-data.c
··· 359 359 if (folio) { 360 360 ASSERT(folio->mapping); 361 361 ASSERT(folio_pos(folio) <= file_offset); 362 - ASSERT(file_offset + len <= folio_end(folio)); 362 + ASSERT(file_offset + len <= folio_next_pos(folio)); 363 363 364 364 /* 365 365 * Ordered flag indicates whether we still have
+3 -2
fs/btrfs/subpage.c
··· 186 186 * unmapped page like dummy extent buffer pages. 187 187 */ 188 188 if (folio->mapping) 189 - ASSERT(folio_pos(folio) <= start && start + len <= folio_end(folio), 189 + ASSERT(folio_pos(folio) <= start && 190 + start + len <= folio_next_pos(folio), 190 191 "start=%llu len=%u folio_pos=%llu folio_size=%zu", 191 192 start, len, folio_pos(folio), folio_size(folio)); 192 193 } ··· 218 217 if (folio_pos(folio) >= orig_start + orig_len) 219 218 *len = 0; 220 219 else 221 - *len = min_t(u64, folio_end(folio), orig_start + orig_len) - *start; 220 + *len = min_t(u64, folio_next_pos(folio), orig_start + orig_len) - *start; 222 221 } 223 222 224 223 static bool btrfs_subpage_end_and_test_lock(const struct btrfs_fs_info *fs_info,
+1 -1
fs/buffer.c
··· 2732 2732 loff_t i_size = i_size_read(inode); 2733 2733 2734 2734 /* Is the folio fully inside i_size? */ 2735 - if (folio_pos(folio) + folio_size(folio) <= i_size) 2735 + if (folio_next_pos(folio) <= i_size) 2736 2736 return __block_write_full_folio(inode, folio, get_block, wbc); 2737 2737 2738 2738 /* Is the folio fully outside i_size? (truncate in progress) */
+5 -5
fs/ext4/inode.c
··· 1318 1318 if (IS_ERR(folio)) 1319 1319 return PTR_ERR(folio); 1320 1320 1321 - if (pos + len > folio_pos(folio) + folio_size(folio)) 1322 - len = folio_pos(folio) + folio_size(folio) - pos; 1321 + if (len > folio_next_pos(folio) - pos) 1322 + len = folio_next_pos(folio) - pos; 1323 1323 1324 1324 from = offset_in_folio(folio, pos); 1325 1325 to = from + len; ··· 2700 2700 2701 2701 if (mpd->map.m_len == 0) 2702 2702 mpd->start_pos = folio_pos(folio); 2703 - mpd->next_pos = folio_pos(folio) + folio_size(folio); 2703 + mpd->next_pos = folio_next_pos(folio); 2704 2704 /* 2705 2705 * Writeout when we cannot modify metadata is simple. 2706 2706 * Just submit the page. For data=journal mode we ··· 3142 3142 if (IS_ERR(folio)) 3143 3143 return PTR_ERR(folio); 3144 3144 3145 - if (pos + len > folio_pos(folio) + folio_size(folio)) 3146 - len = folio_pos(folio) + folio_size(folio) - pos; 3145 + if (len > folio_next_pos(folio) - pos) 3146 + len = folio_next_pos(folio) - pos; 3147 3147 3148 3148 ret = ext4_block_write_begin(NULL, folio, pos, len, 3149 3149 ext4_da_get_block_prep);
+1 -1
fs/f2fs/compress.c
··· 1329 1329 } 1330 1330 1331 1331 folio = page_folio(cc->rpages[last_index]); 1332 - psize = folio_pos(folio) + folio_size(folio); 1332 + psize = folio_next_pos(folio); 1333 1333 1334 1334 err = f2fs_get_node_info(fio.sbi, dn.nid, &ni, false); 1335 1335 if (err)
+1 -2
fs/gfs2/aops.c
··· 81 81 * the page size, the remaining memory is zeroed when mapped, and 82 82 * writes to that region are not written out to the file." 83 83 */ 84 - if (folio_pos(folio) < i_size && 85 - i_size < folio_pos(folio) + folio_size(folio)) 84 + if (folio_pos(folio) < i_size && i_size < folio_next_pos(folio)) 86 85 folio_zero_segment(folio, offset_in_folio(folio, i_size), 87 86 folio_size(folio)); 88 87
+4 -6
fs/iomap/buffered-io.c
··· 775 775 * are not changing pagecache contents. 776 776 */ 777 777 if (!(iter->flags & IOMAP_UNSHARE) && pos <= folio_pos(folio) && 778 - pos + len >= folio_pos(folio) + folio_size(folio)) 778 + pos + len >= folio_next_pos(folio)) 779 779 return 0; 780 780 781 781 ifs = ifs_alloc(iter->inode, folio, iter->flags); ··· 1214 1214 if (!ifs) 1215 1215 return; 1216 1216 1217 - last_byte = min_t(loff_t, end_byte - 1, 1218 - folio_pos(folio) + folio_size(folio) - 1); 1217 + last_byte = min_t(loff_t, end_byte - 1, folio_next_pos(folio) - 1); 1219 1218 first_blk = offset_in_folio(folio, start_byte) >> blkbits; 1220 1219 last_blk = offset_in_folio(folio, last_byte) >> blkbits; 1221 1220 while ((first_blk = ifs_next_clean_block(folio, first_blk, last_blk)) ··· 1246 1247 * Make sure the next punch start is correctly bound to 1247 1248 * the end of this data range, not the end of the folio. 1248 1249 */ 1249 - *punch_start_byte = min_t(loff_t, end_byte, 1250 - folio_pos(folio) + folio_size(folio)); 1250 + *punch_start_byte = min_t(loff_t, end_byte, folio_next_pos(folio)); 1251 1251 } 1252 1252 1253 1253 /* ··· 1286 1288 start_byte, end_byte, iomap, punch); 1287 1289 1288 1290 /* move offset to start of next folio in range */ 1289 - start_byte = folio_pos(folio) + folio_size(folio); 1291 + start_byte = folio_next_pos(folio); 1290 1292 folio_unlock(folio); 1291 1293 folio_put(folio); 1292 1294 }
+1 -1
fs/netfs/buffered_write.c
··· 535 535 folio_unlock(folio); 536 536 err = filemap_fdatawrite_range(mapping, 537 537 folio_pos(folio), 538 - folio_pos(folio) + folio_size(folio)); 538 + folio_next_pos(folio)); 539 539 switch (err) { 540 540 case 0: 541 541 ret = VM_FAULT_RETRY;
+1 -1
fs/netfs/misc.c
··· 298 298 if (folio_test_dirty(folio)) 299 299 return false; 300 300 301 - end = umin(folio_pos(folio) + folio_size(folio), i_size_read(&ctx->inode)); 301 + end = umin(folio_next_pos(folio), i_size_read(&ctx->inode)); 302 302 if (end > ctx->zero_point) 303 303 ctx->zero_point = end; 304 304
+1 -1
fs/ocfs2/alloc.c
··· 6892 6892 ocfs2_map_and_dirty_folio(inode, handle, from, to, folio, 1, 6893 6893 &phys); 6894 6894 6895 - start = folio_next_index(folio) << PAGE_SHIFT; 6895 + start = folio_next_pos(folio); 6896 6896 } 6897 6897 out: 6898 6898 if (folios)
+1 -1
fs/xfs/scrub/xfarray.c
··· 834 834 si->first_folio_idx = xfarray_idx(si->array, 835 835 folio_pos(si->folio) + si->array->obj_size - 1); 836 836 837 - next_pos = folio_pos(si->folio) + folio_size(si->folio); 837 + next_pos = folio_next_pos(si->folio); 838 838 si->last_folio_idx = xfarray_idx(si->array, next_pos - 1); 839 839 if (xfarray_pos(si->array, si->last_folio_idx + 1) > next_pos) 840 840 si->last_folio_idx--;
+1 -1
fs/xfs/xfs_aops.c
··· 271 271 * folio itself and not the start offset that is passed in. 272 272 */ 273 273 xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK, pos, 274 - folio_pos(folio) + folio_size(folio), NULL); 274 + folio_next_pos(folio), NULL); 275 275 } 276 276 277 277 /*
+4 -4
include/linux/mm.h
··· 3502 3502 extern unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info); 3503 3503 3504 3504 /* truncate.c */ 3505 - extern void truncate_inode_pages(struct address_space *, loff_t); 3506 - extern void truncate_inode_pages_range(struct address_space *, 3507 - loff_t lstart, loff_t lend); 3508 - extern void truncate_inode_pages_final(struct address_space *); 3505 + void truncate_inode_pages(struct address_space *mapping, loff_t lstart); 3506 + void truncate_inode_pages_range(struct address_space *mapping, loff_t lstart, 3507 + uoff_t lend); 3508 + void truncate_inode_pages_final(struct address_space *mapping); 3509 3509 3510 3510 /* generic vm_area_ops exported for stackable file systems */ 3511 3511 extern vm_fault_t filemap_fault(struct vm_fault *vmf);
+11
include/linux/pagemap.h
··· 939 939 } 940 940 941 941 /** 942 + * folio_next_pos - Get the file position of the next folio. 943 + * @folio: The current folio. 944 + * 945 + * Return: The position of the folio which follows this folio in the file. 946 + */ 947 + static inline loff_t folio_next_pos(const struct folio *folio) 948 + { 949 + return (loff_t)folio_next_index(folio) << PAGE_SHIFT; 950 + } 951 + 952 + /** 942 953 * folio_file_page - The page for a particular index. 943 954 * @folio: The folio which contains this index. 944 955 * @index: The index we want to look up.
+1 -1
include/linux/shmem_fs.h
··· 111 111 pgoff_t index, gfp_t gfp_mask); 112 112 int shmem_writeout(struct folio *folio, struct swap_iocb **plug, 113 113 struct list_head *folio_list); 114 - void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); 114 + void shmem_truncate_range(struct inode *inode, loff_t start, uoff_t end); 115 115 int shmem_unuse(unsigned int type); 116 116 117 117 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+1
include/linux/types.h
··· 50 50 51 51 #if defined(__GNUC__) 52 52 typedef __kernel_loff_t loff_t; 53 + typedef __kernel_uoff_t uoff_t; 53 54 #endif 54 55 55 56 /*
+1
include/uapi/asm-generic/posix_types.h
··· 86 86 */ 87 87 typedef __kernel_long_t __kernel_off_t; 88 88 typedef long long __kernel_loff_t; 89 + typedef unsigned long long __kernel_uoff_t; 89 90 typedef __kernel_long_t __kernel_old_time_t; 90 91 #ifndef __KERNEL__ 91 92 typedef __kernel_long_t __kernel_time_t;
+4 -4
mm/shmem.c
··· 1075 1075 * Remove range of pages and swap entries from page cache, and free them. 1076 1076 * If !unfalloc, truncate or punch hole; if unfalloc, undo failed fallocate. 1077 1077 */ 1078 - static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, 1078 + static void shmem_undo_range(struct inode *inode, loff_t lstart, uoff_t lend, 1079 1079 bool unfalloc) 1080 1080 { 1081 1081 struct address_space *mapping = inode->i_mapping; ··· 1132 1132 same_folio = (lstart >> PAGE_SHIFT) == (lend >> PAGE_SHIFT); 1133 1133 folio = shmem_get_partial_folio(inode, lstart >> PAGE_SHIFT); 1134 1134 if (folio) { 1135 - same_folio = lend < folio_pos(folio) + folio_size(folio); 1135 + same_folio = lend < folio_next_pos(folio); 1136 1136 folio_mark_dirty(folio); 1137 1137 if (!truncate_inode_partial_folio(folio, lstart, lend)) { 1138 1138 start = folio_next_index(folio); ··· 1226 1226 shmem_recalc_inode(inode, 0, -nr_swaps_freed); 1227 1227 } 1228 1228 1229 - void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) 1229 + void shmem_truncate_range(struct inode *inode, loff_t lstart, uoff_t lend) 1230 1230 { 1231 1231 shmem_undo_range(inode, lstart, lend, false); 1232 1232 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); ··· 5778 5778 } 5779 5779 #endif 5780 5780 5781 - void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) 5781 + void shmem_truncate_range(struct inode *inode, loff_t lstart, uoff_t lend) 5782 5782 { 5783 5783 truncate_inode_pages_range(inode->i_mapping, lstart, lend); 5784 5784 }
+2 -2
mm/truncate.c
··· 364 364 * page aligned properly. 365 365 */ 366 366 void truncate_inode_pages_range(struct address_space *mapping, 367 - loff_t lstart, loff_t lend) 367 + loff_t lstart, uoff_t lend) 368 368 { 369 369 pgoff_t start; /* inclusive */ 370 370 pgoff_t end; /* exclusive */ ··· 412 412 same_folio = (lstart >> PAGE_SHIFT) == (lend >> PAGE_SHIFT); 413 413 folio = __filemap_get_folio(mapping, lstart >> PAGE_SHIFT, FGP_LOCK, 0); 414 414 if (!IS_ERR(folio)) { 415 - same_folio = lend < folio_pos(folio) + folio_size(folio); 415 + same_folio = lend < folio_next_pos(folio); 416 416 if (!truncate_inode_partial_folio(folio, lstart, lend)) { 417 417 start = folio_next_index(folio); 418 418 if (same_folio)