Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bcachefs: Fix excess transaction restarts in __bchfs_fallocate()

drop_locks_do() should not be used in a fastpath without first trying
the do in nonblocking mode - the unlock and relock will cause excessive
transaction restarts and potentially livelocking with other threads that
are contending for the same locks.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

+35 -16
+5
fs/bcachefs/btree_iter.h
··· 819 819 #define for_each_btree_key_continue_norestart(_iter, _flags, _k, _ret) \ 820 820 for_each_btree_key_upto_continue_norestart(_iter, SPOS_MAX, _flags, _k, _ret) 821 821 822 + /* 823 + * This should not be used in a fastpath, without first trying _do in 824 + * nonblocking mode - it will cause excessive transaction restarts and 825 + * potentially livelocking: 826 + */ 822 827 #define drop_locks_do(_trans, _do) \ 823 828 ({ \ 824 829 bch2_trans_unlock(_trans); \
+24 -13
fs/bcachefs/fs-io-pagecache.c
··· 309 309 } 310 310 } 311 311 312 - void bch2_mark_pagecache_reserved(struct bch_inode_info *inode, 313 - u64 start, u64 end) 312 + int bch2_mark_pagecache_reserved(struct bch_inode_info *inode, 313 + u64 *start, u64 end, 314 + bool nonblocking) 314 315 { 315 316 struct bch_fs *c = inode->v.i_sb->s_fs_info; 316 - pgoff_t index = start >> PAGE_SECTORS_SHIFT; 317 + pgoff_t index = *start >> PAGE_SECTORS_SHIFT; 317 318 pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT; 318 319 struct folio_batch fbatch; 319 320 s64 i_sectors_delta = 0; 320 - unsigned i, j; 321 + int ret = 0; 321 322 322 - if (end <= start) 323 - return; 323 + if (end <= *start) 324 + return 0; 324 325 325 326 folio_batch_init(&fbatch); 326 327 327 328 while (filemap_get_folios(inode->v.i_mapping, 328 329 &index, end_index, &fbatch)) { 329 - for (i = 0; i < folio_batch_count(&fbatch); i++) { 330 + for (unsigned i = 0; i < folio_batch_count(&fbatch); i++) { 330 331 struct folio *folio = fbatch.folios[i]; 332 + 333 + if (!nonblocking) 334 + folio_lock(folio); 335 + else if (!folio_trylock(folio)) { 336 + folio_batch_release(&fbatch); 337 + ret = -EAGAIN; 338 + break; 339 + } 340 + 331 341 u64 folio_start = folio_sector(folio); 332 342 u64 folio_end = folio_end_sector(folio); 333 - unsigned folio_offset = max(start, folio_start) - folio_start; 334 - unsigned folio_len = min(end, folio_end) - folio_offset - folio_start; 335 - struct bch_folio *s; 336 343 337 344 BUG_ON(end <= folio_start); 338 345 339 - folio_lock(folio); 340 - s = bch2_folio(folio); 346 + *start = min(end, folio_end); 341 347 348 + struct bch_folio *s = bch2_folio(folio); 342 349 if (s) { 350 + unsigned folio_offset = max(*start, folio_start) - folio_start; 351 + unsigned folio_len = min(end, folio_end) - folio_offset - folio_start; 352 + 343 353 spin_lock(&s->lock); 344 - for (j = folio_offset; j < folio_offset + folio_len; j++) { 354 + for (unsigned j = folio_offset; j < folio_offset + folio_len; j++) { 345 355 i_sectors_delta -= s->s[j].state == SECTOR_dirty; 346 356 bch2_folio_sector_set(folio, s, j, 347 357 folio_sector_reserve(s->s[j].state)); ··· 366 356 } 367 357 368 358 bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta); 359 + return ret; 369 360 } 370 361 371 362 static inline unsigned sectors_to_reserve(struct bch_folio_sector *s,
+1 -1
fs/bcachefs/fs-io-pagecache.h
··· 143 143 void bch2_bio_page_state_set(struct bio *, struct bkey_s_c); 144 144 145 145 void bch2_mark_pagecache_unallocated(struct bch_inode_info *, u64, u64); 146 - void bch2_mark_pagecache_reserved(struct bch_inode_info *, u64, u64); 146 + int bch2_mark_pagecache_reserved(struct bch_inode_info *, u64 *, u64, bool); 147 147 148 148 int bch2_get_folio_disk_reservation(struct bch_fs *, 149 149 struct bch_inode_info *,
+5 -2
fs/bcachefs/fs-io.c
··· 675 675 676 676 bch2_i_sectors_acct(c, inode, &quota_res, i_sectors_delta); 677 677 678 - drop_locks_do(trans, 679 - (bch2_mark_pagecache_reserved(inode, hole_start, iter.pos.offset), 0)); 678 + if (bch2_mark_pagecache_reserved(inode, &hole_start, 679 + iter.pos.offset, true)) 680 + drop_locks_do(trans, 681 + bch2_mark_pagecache_reserved(inode, &hole_start, 682 + iter.pos.offset, false)); 680 683 bkey_err: 681 684 bch2_quota_reservation_put(c, inode, &quota_res); 682 685 if (bch2_err_matches(ret, BCH_ERR_transaction_restart))