Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bcachefs: New io_misc.c helpers

This pulls the non vfs specific parts of truncate and finsert/fcollapse
out of fs-io.c, and moves them to io_misc.c.

This is prep work for logging these operations, to make them atomic in
the event of a crash.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

+251 -193
+20 -191
fs/bcachefs/fs-io.c
··· 391 391 return bch2_setattr_nonsize(idmap, inode, iattr); 392 392 } 393 393 394 - static int bch2_truncate_finish_fn(struct btree_trans *trans, 395 - struct bch_inode_info *inode, 396 - struct bch_inode_unpacked *bi, 397 - void *p) 398 - { 399 - bi->bi_flags &= ~BCH_INODE_I_SIZE_DIRTY; 400 - return 0; 401 - } 402 - 403 - static int bch2_truncate_start_fn(struct btree_trans *trans, 404 - struct bch_inode_info *inode, 405 - struct bch_inode_unpacked *bi, void *p) 406 - { 407 - u64 *new_i_size = p; 408 - 409 - bi->bi_flags |= BCH_INODE_I_SIZE_DIRTY; 410 - bi->bi_size = *new_i_size; 411 - return 0; 412 - } 413 - 414 - int bch2_truncate(struct mnt_idmap *idmap, 394 + int bchfs_truncate(struct mnt_idmap *idmap, 415 395 struct bch_inode_info *inode, struct iattr *iattr) 416 396 { 417 397 struct bch_fs *c = inode->v.i_sb->s_fs_info; 418 398 struct address_space *mapping = inode->v.i_mapping; 419 399 struct bch_inode_unpacked inode_u; 420 - u64 new_i_size = iattr->ia_size; 421 400 s64 i_sectors_delta = 0; 422 401 int ret = 0; 423 402 ··· 445 466 if (unlikely(ret < 0)) 446 467 goto err; 447 468 469 + truncate_setsize(&inode->v, iattr->ia_size); 470 + 448 471 /* 449 472 * When extending, we're going to write the new i_size to disk 450 473 * immediately so we need to flush anything above the current on disk ··· 468 487 if (ret) 469 488 goto err; 470 489 471 - mutex_lock(&inode->ei_update_lock); 472 - ret = bch2_write_inode(c, inode, bch2_truncate_start_fn, 473 - &new_i_size, 0); 474 - mutex_unlock(&inode->ei_update_lock); 475 - 476 - if (unlikely(ret)) 477 - goto err; 478 - 479 - truncate_setsize(&inode->v, iattr->ia_size); 480 - 481 - ret = bch2_fpunch(c, inode_inum(inode), 482 - round_up(iattr->ia_size, block_bytes(c)) >> 9, 483 - U64_MAX, &i_sectors_delta); 490 + ret = bch2_truncate(c, inode_inum(inode), iattr->ia_size, &i_sectors_delta); 484 491 bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta); 492 + 493 + if (unlikely(ret)) { 494 + /* 495 + * If we error here, VFS caches are now inconsistent with btree 496 + */ 497 + set_bit(EI_INODE_ERROR, &inode->ei_flags); 498 + goto err; 499 + } 485 500 486 501 bch2_fs_inconsistent_on(!inode->v.i_size && inode->v.i_blocks && 487 502 !bch2_journal_error(&c->journal), c, 488 503 "inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)", 489 504 inode->v.i_ino, (u64) inode->v.i_blocks, 490 505 inode->ei_inode.bi_sectors); 491 - if (unlikely(ret)) 492 - goto err; 493 - 494 - mutex_lock(&inode->ei_update_lock); 495 - ret = bch2_write_inode(c, inode, bch2_truncate_finish_fn, NULL, 0); 496 - mutex_unlock(&inode->ei_update_lock); 497 506 498 507 ret = bch2_setattr_nonsize(idmap, inode, iattr); 499 508 err: ··· 548 577 { 549 578 struct bch_fs *c = inode->v.i_sb->s_fs_info; 550 579 struct address_space *mapping = inode->v.i_mapping; 551 - struct bkey_buf copy; 552 - struct btree_trans trans; 553 - struct btree_iter src, dst, del; 554 - loff_t shift, new_size; 555 - u64 src_start; 580 + s64 i_sectors_delta = 0; 556 581 int ret = 0; 557 582 558 583 if ((offset | len) & (block_bytes(c) - 1)) 559 584 return -EINVAL; 560 585 561 586 if (insert) { 562 - if (inode->v.i_sb->s_maxbytes - inode->v.i_size < len) 563 - return -EFBIG; 564 - 565 587 if (offset >= inode->v.i_size) 566 588 return -EINVAL; 567 - 568 - src_start = U64_MAX; 569 - shift = len; 570 589 } else { 571 590 if (offset + len >= inode->v.i_size) 572 591 return -EINVAL; 573 - 574 - src_start = offset + len; 575 - shift = -len; 576 592 } 577 - 578 - new_size = inode->v.i_size + shift; 579 593 580 594 ret = bch2_write_invalidate_inode_pages_range(mapping, offset, LLONG_MAX); 581 595 if (ret) 582 596 return ret; 583 597 584 - if (insert) { 585 - i_size_write(&inode->v, new_size); 586 - mutex_lock(&inode->ei_update_lock); 587 - ret = bch2_write_inode_size(c, inode, new_size, 588 - ATTR_MTIME|ATTR_CTIME); 589 - mutex_unlock(&inode->ei_update_lock); 590 - } else { 591 - s64 i_sectors_delta = 0; 598 + if (insert) 599 + i_size_write(&inode->v, inode->v.i_size + len); 592 600 593 - ret = bch2_fpunch(c, inode_inum(inode), 594 - offset >> 9, (offset + len) >> 9, 595 - &i_sectors_delta); 596 - bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta); 601 + ret = bch2_fcollapse_finsert(c, inode_inum(inode), offset >> 9, len >> 9, 602 + insert, &i_sectors_delta); 603 + if (!ret && !insert) 604 + i_size_write(&inode->v, inode->v.i_size - len); 605 + bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta); 597 606 598 - if (ret) 599 - return ret; 600 - } 601 - 602 - bch2_bkey_buf_init(&copy); 603 - bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); 604 - bch2_trans_iter_init(&trans, &src, BTREE_ID_extents, 605 - POS(inode->v.i_ino, src_start >> 9), 606 - BTREE_ITER_INTENT); 607 - bch2_trans_copy_iter(&dst, &src); 608 - bch2_trans_copy_iter(&del, &src); 609 - 610 - while (ret == 0 || 611 - bch2_err_matches(ret, BCH_ERR_transaction_restart)) { 612 - struct disk_reservation disk_res = 613 - bch2_disk_reservation_init(c, 0); 614 - struct bkey_i delete; 615 - struct bkey_s_c k; 616 - struct bpos next_pos; 617 - struct bpos move_pos = POS(inode->v.i_ino, offset >> 9); 618 - struct bpos atomic_end; 619 - unsigned trigger_flags = 0; 620 - u32 snapshot; 621 - 622 - bch2_trans_begin(&trans); 623 - 624 - ret = bch2_subvolume_get_snapshot(&trans, 625 - inode->ei_subvol, &snapshot); 626 - if (ret) 627 - continue; 628 - 629 - bch2_btree_iter_set_snapshot(&src, snapshot); 630 - bch2_btree_iter_set_snapshot(&dst, snapshot); 631 - bch2_btree_iter_set_snapshot(&del, snapshot); 632 - 633 - bch2_trans_begin(&trans); 634 - 635 - k = insert 636 - ? bch2_btree_iter_peek_prev(&src) 637 - : bch2_btree_iter_peek_upto(&src, POS(inode->v.i_ino, U64_MAX)); 638 - if ((ret = bkey_err(k))) 639 - continue; 640 - 641 - if (!k.k || k.k->p.inode != inode->v.i_ino) 642 - break; 643 - 644 - if (insert && 645 - bkey_le(k.k->p, POS(inode->v.i_ino, offset >> 9))) 646 - break; 647 - reassemble: 648 - bch2_bkey_buf_reassemble(&copy, c, k); 649 - 650 - if (insert && 651 - bkey_lt(bkey_start_pos(k.k), move_pos)) 652 - bch2_cut_front(move_pos, copy.k); 653 - 654 - copy.k->k.p.offset += shift >> 9; 655 - bch2_btree_iter_set_pos(&dst, bkey_start_pos(&copy.k->k)); 656 - 657 - ret = bch2_extent_atomic_end(&trans, &dst, copy.k, &atomic_end); 658 - if (ret) 659 - continue; 660 - 661 - if (!bkey_eq(atomic_end, copy.k->k.p)) { 662 - if (insert) { 663 - move_pos = atomic_end; 664 - move_pos.offset -= shift >> 9; 665 - goto reassemble; 666 - } else { 667 - bch2_cut_back(atomic_end, copy.k); 668 - } 669 - } 670 - 671 - bkey_init(&delete.k); 672 - delete.k.p = copy.k->k.p; 673 - delete.k.size = copy.k->k.size; 674 - delete.k.p.offset -= shift >> 9; 675 - bch2_btree_iter_set_pos(&del, bkey_start_pos(&delete.k)); 676 - 677 - next_pos = insert ? bkey_start_pos(&delete.k) : delete.k.p; 678 - 679 - if (copy.k->k.size != k.k->size) { 680 - /* We might end up splitting compressed extents: */ 681 - unsigned nr_ptrs = 682 - bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(copy.k)); 683 - 684 - ret = bch2_disk_reservation_get(c, &disk_res, 685 - copy.k->k.size, nr_ptrs, 686 - BCH_DISK_RESERVATION_NOFAIL); 687 - BUG_ON(ret); 688 - } 689 - 690 - ret = bch2_btree_iter_traverse(&del) ?: 691 - bch2_trans_update(&trans, &del, &delete, trigger_flags) ?: 692 - bch2_trans_update(&trans, &dst, copy.k, trigger_flags) ?: 693 - bch2_trans_commit(&trans, &disk_res, NULL, 694 - BTREE_INSERT_NOFAIL); 695 - bch2_disk_reservation_put(c, &disk_res); 696 - 697 - if (!ret) 698 - bch2_btree_iter_set_pos(&src, next_pos); 699 - } 700 - bch2_trans_iter_exit(&trans, &del); 701 - bch2_trans_iter_exit(&trans, &dst); 702 - bch2_trans_iter_exit(&trans, &src); 703 - bch2_trans_exit(&trans); 704 - bch2_bkey_buf_exit(&copy, c); 705 - 706 - if (ret) 707 - return ret; 708 - 709 - mutex_lock(&inode->ei_update_lock); 710 - if (!insert) { 711 - i_size_write(&inode->v, new_size); 712 - ret = bch2_write_inode_size(c, inode, new_size, 713 - ATTR_MTIME|ATTR_CTIME); 714 - } else { 715 - /* We need an inode update to update bi_journal_seq for fsync: */ 716 - ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL, 717 - ATTR_MTIME|ATTR_CTIME); 718 - } 719 - mutex_unlock(&inode->ei_update_lock); 720 607 return ret; 721 608 } 722 609
+1 -1
fs/bcachefs/fs-io.h
··· 165 165 166 166 int bch2_fsync(struct file *, loff_t, loff_t, int); 167 167 168 - int bch2_truncate(struct mnt_idmap *, 168 + int bchfs_truncate(struct mnt_idmap *, 169 169 struct bch_inode_info *, struct iattr *); 170 170 long bch2_fallocate_dispatch(struct file *, int, loff_t, loff_t); 171 171
+1 -1
fs/bcachefs/fs.c
··· 798 798 return ret; 799 799 800 800 return iattr->ia_valid & ATTR_SIZE 801 - ? bch2_truncate(idmap, inode, iattr) 801 + ? bchfs_truncate(idmap, inode, iattr) 802 802 : bch2_setattr_nonsize(idmap, inode, iattr); 803 803 } 804 804
+226
fs/bcachefs/io_misc.c
··· 9 9 #include "btree_update.h" 10 10 #include "buckets.h" 11 11 #include "clock.h" 12 + #include "error.h" 12 13 #include "extents.h" 14 + #include "extent_update.h" 15 + #include "inode.h" 13 16 #include "io_misc.h" 14 17 #include "io_write.h" 15 18 #include "subvolume.h" ··· 214 211 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 215 212 ret = 0; 216 213 214 + return ret; 215 + } 216 + 217 + static int truncate_set_isize(struct btree_trans *trans, 218 + subvol_inum inum, 219 + u64 new_i_size) 220 + { 221 + struct btree_iter iter = { NULL }; 222 + struct bch_inode_unpacked inode_u; 223 + int ret; 224 + 225 + ret = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_INTENT) ?: 226 + (inode_u.bi_size = new_i_size, 0) ?: 227 + bch2_inode_write(trans, &iter, &inode_u); 228 + 229 + bch2_trans_iter_exit(trans, &iter); 230 + return ret; 231 + } 232 + 233 + int bch2_truncate(struct bch_fs *c, subvol_inum inum, u64 new_i_size, u64 *i_sectors_delta) 234 + { 235 + struct btree_trans trans; 236 + struct btree_iter fpunch_iter; 237 + int ret; 238 + 239 + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); 240 + bch2_trans_iter_init(&trans, &fpunch_iter, BTREE_ID_extents, 241 + POS(inum.inum, round_up(new_i_size, block_bytes(c)) >> 9), 242 + BTREE_ITER_INTENT); 243 + 244 + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL, 245 + truncate_set_isize(&trans, inum, new_i_size)); 246 + if (ret) 247 + goto err; 248 + 249 + ret = bch2_fpunch_at(&trans, &fpunch_iter, inum, U64_MAX, i_sectors_delta); 250 + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 251 + ret = 0; 252 + if (ret) 253 + goto err; 254 + err: 255 + bch2_trans_iter_exit(&trans, &fpunch_iter); 256 + bch2_trans_exit(&trans); 257 + 258 + bch2_fs_fatal_err_on(ret, c, "%s: error truncating %u:%llu: %s", 259 + __func__, inum.subvol, inum.inum, bch2_err_str(ret)); 260 + return ret; 261 + } 262 + 263 + static int adjust_i_size(struct btree_trans *trans, subvol_inum inum, u64 offset, s64 len) 264 + { 265 + struct btree_iter iter; 266 + struct bch_inode_unpacked inode_u; 267 + int ret; 268 + 269 + offset <<= 9; 270 + len <<= 9; 271 + 272 + ret = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_INTENT); 273 + if (ret) 274 + return ret; 275 + 276 + if (len > 0) { 277 + if (MAX_LFS_FILESIZE - inode_u.bi_size < len) { 278 + ret = -EFBIG; 279 + goto err; 280 + } 281 + 282 + if (offset >= inode_u.bi_size) { 283 + ret = -EINVAL; 284 + goto err; 285 + } 286 + } 287 + 288 + inode_u.bi_size += len; 289 + inode_u.bi_mtime = inode_u.bi_ctime = bch2_current_time(trans->c); 290 + 291 + ret = bch2_inode_write(trans, &iter, &inode_u); 292 + err: 293 + bch2_trans_iter_exit(trans, &iter); 294 + return ret; 295 + } 296 + 297 + int bch2_fcollapse_finsert(struct bch_fs *c, subvol_inum inum, 298 + u64 offset, u64 len, bool insert, 299 + s64 *i_sectors_delta) 300 + { 301 + struct bkey_buf copy; 302 + struct btree_trans trans; 303 + struct btree_iter src = { NULL }, dst = { NULL }, del = { NULL }; 304 + s64 shift = insert ? len : -len; 305 + int ret = 0; 306 + 307 + bch2_bkey_buf_init(&copy); 308 + bch2_trans_init(&trans, c, 0, 1024); 309 + 310 + bch2_trans_iter_init(&trans, &src, BTREE_ID_extents, 311 + POS(inum.inum, U64_MAX), 312 + BTREE_ITER_INTENT); 313 + bch2_trans_copy_iter(&dst, &src); 314 + bch2_trans_copy_iter(&del, &src); 315 + 316 + if (insert) { 317 + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL, 318 + adjust_i_size(&trans, inum, offset, len)); 319 + if (ret) 320 + goto err; 321 + } else { 322 + bch2_btree_iter_set_pos(&src, POS(inum.inum, offset)); 323 + 324 + ret = bch2_fpunch_at(&trans, &src, inum, offset + len, i_sectors_delta); 325 + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) 326 + goto err; 327 + 328 + bch2_btree_iter_set_pos(&src, POS(inum.inum, offset + len)); 329 + } 330 + 331 + while (ret == 0 || bch2_err_matches(ret, BCH_ERR_transaction_restart)) { 332 + struct disk_reservation disk_res = 333 + bch2_disk_reservation_init(c, 0); 334 + struct bkey_i delete; 335 + struct bkey_s_c k; 336 + struct bpos next_pos; 337 + struct bpos move_pos = POS(inum.inum, offset); 338 + struct bpos atomic_end; 339 + unsigned trigger_flags = 0; 340 + u32 snapshot; 341 + 342 + bch2_trans_begin(&trans); 343 + 344 + ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); 345 + if (ret) 346 + continue; 347 + 348 + bch2_btree_iter_set_snapshot(&src, snapshot); 349 + bch2_btree_iter_set_snapshot(&dst, snapshot); 350 + bch2_btree_iter_set_snapshot(&del, snapshot); 351 + 352 + bch2_trans_begin(&trans); 353 + 354 + k = insert 355 + ? bch2_btree_iter_peek_prev(&src) 356 + : bch2_btree_iter_peek_upto(&src, POS(inum.inum, U64_MAX)); 357 + if ((ret = bkey_err(k))) 358 + continue; 359 + 360 + if (!k.k || k.k->p.inode != inum.inum) 361 + break; 362 + 363 + if (insert && 364 + bkey_le(k.k->p, POS(inum.inum, offset))) 365 + break; 366 + reassemble: 367 + bch2_bkey_buf_reassemble(&copy, c, k); 368 + 369 + if (insert && 370 + bkey_lt(bkey_start_pos(k.k), move_pos)) 371 + bch2_cut_front(move_pos, copy.k); 372 + 373 + copy.k->k.p.offset += shift; 374 + bch2_btree_iter_set_pos(&dst, bkey_start_pos(&copy.k->k)); 375 + 376 + ret = bch2_extent_atomic_end(&trans, &dst, copy.k, &atomic_end); 377 + if (ret) 378 + continue; 379 + 380 + if (!bkey_eq(atomic_end, copy.k->k.p)) { 381 + if (insert) { 382 + move_pos = atomic_end; 383 + move_pos.offset -= shift; 384 + goto reassemble; 385 + } else { 386 + bch2_cut_back(atomic_end, copy.k); 387 + } 388 + } 389 + 390 + bkey_init(&delete.k); 391 + delete.k.p = copy.k->k.p; 392 + delete.k.size = copy.k->k.size; 393 + delete.k.p.offset -= shift; 394 + bch2_btree_iter_set_pos(&del, bkey_start_pos(&delete.k)); 395 + 396 + next_pos = insert ? bkey_start_pos(&delete.k) : delete.k.p; 397 + 398 + if (copy.k->k.size != k.k->size) { 399 + /* We might end up splitting compressed extents: */ 400 + unsigned nr_ptrs = 401 + bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(copy.k)); 402 + 403 + ret = bch2_disk_reservation_get(c, &disk_res, 404 + copy.k->k.size, nr_ptrs, 405 + BCH_DISK_RESERVATION_NOFAIL); 406 + BUG_ON(ret); 407 + } 408 + 409 + ret = bch2_btree_iter_traverse(&del) ?: 410 + bch2_trans_update(&trans, &del, &delete, trigger_flags) ?: 411 + bch2_trans_update(&trans, &dst, copy.k, trigger_flags) ?: 412 + bch2_trans_commit(&trans, &disk_res, NULL, 413 + BTREE_INSERT_NOFAIL); 414 + bch2_disk_reservation_put(c, &disk_res); 415 + 416 + if (!ret) 417 + bch2_btree_iter_set_pos(&src, next_pos); 418 + } 419 + 420 + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) 421 + goto err; 422 + 423 + if (!insert) { 424 + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL, 425 + adjust_i_size(&trans, inum, offset, -len)); 426 + } else { 427 + /* We need an inode update to update bi_journal_seq for fsync: */ 428 + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL, 429 + adjust_i_size(&trans, inum, 0, 0)); 430 + } 431 + err: 432 + bch2_trans_iter_exit(&trans, &del); 433 + bch2_trans_iter_exit(&trans, &dst); 434 + bch2_trans_iter_exit(&trans, &src); 435 + bch2_trans_exit(&trans); 436 + bch2_bkey_buf_exit(&copy, c); 217 437 return ret; 218 438 }
+3
fs/bcachefs/io_misc.h
··· 9 9 subvol_inum, u64, s64 *); 10 10 int bch2_fpunch(struct bch_fs *c, subvol_inum, u64, u64, s64 *); 11 11 12 + int bch2_truncate(struct bch_fs *, subvol_inum, u64, u64 *); 13 + int bch2_fcollapse_finsert(struct bch_fs *, subvol_inum, u64, u64, bool, s64 *); 14 + 12 15 #endif /* _BCACHEFS_IO_MISC_H */