Merge tag 'xfs-4.15-fixes-8' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs fixes from Darrick Wong:
"Here are some XFS fixes for 4.15-rc5. Apologies for the unusually
large number of patches this late, but I wanted to make sure the
corruption fixes were really ready to go.

Changes since last update:

- Fix a locking problem during xattr block conversion that could lead
to the log checkpointing thread to try to write an incomplete
buffer to disk, which leads to a corruption shutdown

- Fix a null pointer dereference when removing delayed allocation
extents

- Remove post-eof speculative allocations when reflinking a block
past current inode size so that we don't just leave them there and
assert on inode reclaim

- Relax an assert which didn't accurately reflect the way locking
works and would trigger under heavy io load

- Avoid infinite loop when cancelling copy on write extents after a
writeback failure

- Try to avoid copy on write transaction reservation overflows when
remapping after a successful write

- Fix various problems with the copy-on-write reservation automatic
garbage collection not being cleaned up properly during a ro
remount

- Fix problems with rmap log items being processed in the wrong
order, leading to corruption shutdowns

- Fix problems with EFI recovery wherein the "remove any rmapping if
present" mechanism wasn't actually doing anything, which would lead
to corruption problems later when the extent is reallocated,
leading to multiple rmaps for the same extent"

* tag 'xfs-4.15-fixes-8' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
xfs: only skip rmap owner checks for unknown-owner rmap removal
xfs: always honor OWN_UNKNOWN rmap removal requests
xfs: queue deferred rmap ops for cow staging extent alloc/free in the right order
xfs: set cowblocks tag for direct cow writes too
xfs: remove leftover CoW reservations when remounting ro
xfs: don't be so eager to clear the cowblocks tag on truncate
xfs: track cowblocks separately in i_flags
xfs: allow CoW remap transactions to use reserve blocks
xfs: avoid infinite loop when cancelling CoW blocks after writeback failure
xfs: relax is_reflink_inode assert in xfs_reflink_find_cow_mapping
xfs: remove dest file's post-eof preallocations before reflinking
xfs: move xfs_iext_insert tracepoint to report useful information
xfs: account for null transactions in bunmapi
xfs: hold xfs_buf locked between shortform->leaf conversion and the addition of an attribute
xfs: add the ability to join a held buffer to a defer_ops

+2 -2
fs/xfs/libxfs/xfs_alloc.c
··· 702 702 ASSERT(args->agbno % args->alignment == 0); 703 703 704 704 /* if not file data, insert new block into the reverse map btree */ 705 - if (args->oinfo.oi_owner != XFS_RMAP_OWN_UNKNOWN) { 705 + if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) { 706 706 error = xfs_rmap_alloc(args->tp, args->agbp, args->agno, 707 707 args->agbno, args->len, &args->oinfo); 708 708 if (error) ··· 1682 1682 bno_cur = cnt_cur = NULL; 1683 1683 mp = tp->t_mountp; 1684 1684 1685 - if (oinfo->oi_owner != XFS_RMAP_OWN_UNKNOWN) { 1685 + if (!xfs_rmap_should_skip_owner_update(oinfo)) { 1686 1686 error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo); 1687 1687 if (error) 1688 1688 goto error0;
+15 -5
fs/xfs/libxfs/xfs_attr.c
··· 212 212 int flags) 213 213 { 214 214 struct xfs_mount *mp = dp->i_mount; 215 + struct xfs_buf *leaf_bp = NULL; 215 216 struct xfs_da_args args; 216 217 struct xfs_defer_ops dfops; 217 218 struct xfs_trans_res tres; ··· 328 327 * GROT: another possible req'mt for a double-split btree op. 329 328 */ 330 329 xfs_defer_init(args.dfops, args.firstblock); 331 - error = xfs_attr_shortform_to_leaf(&args); 330 + error = xfs_attr_shortform_to_leaf(&args, &leaf_bp); 332 331 if (error) 333 332 goto out_defer_cancel; 333 + /* 334 + * Prevent the leaf buffer from being unlocked so that a 335 + * concurrent AIL push cannot grab the half-baked leaf 336 + * buffer and run into problems with the write verifier. 337 + */ 338 + xfs_trans_bhold(args.trans, leaf_bp); 339 + xfs_defer_bjoin(args.dfops, leaf_bp); 334 340 xfs_defer_ijoin(args.dfops, dp); 335 341 error = xfs_defer_finish(&args.trans, args.dfops); 336 342 if (error) ··· 345 337 346 338 /* 347 339 * Commit the leaf transformation. We'll need another (linked) 348 - * transaction to add the new attribute to the leaf. 340 + * transaction to add the new attribute to the leaf, which 341 + * means that we have to hold & join the leaf buffer here too. 349 342 */ 350 - 351 343 error = xfs_trans_roll_inode(&args.trans, dp); 352 344 if (error) 353 345 goto out; 354 - 346 + xfs_trans_bjoin(args.trans, leaf_bp); 347 + leaf_bp = NULL; 355 348 } 356 349 357 350 if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) ··· 383 374 384 375 out_defer_cancel: 385 376 xfs_defer_cancel(&dfops); 386 - args.trans = NULL; 387 377 out: 378 + if (leaf_bp) 379 + xfs_trans_brelse(args.trans, leaf_bp); 388 380 if (args.trans) 389 381 xfs_trans_cancel(args.trans); 390 382 xfs_iunlock(dp, XFS_ILOCK_EXCL);
+6 -3
fs/xfs/libxfs/xfs_attr_leaf.c
··· 735 735 } 736 736 737 737 /* 738 - * Convert from using the shortform to the leaf. 738 + * Convert from using the shortform to the leaf. On success, return the 739 + * buffer so that we can keep it locked until we're totally done with it. 739 740 */ 740 741 int 741 - xfs_attr_shortform_to_leaf(xfs_da_args_t *args) 742 + xfs_attr_shortform_to_leaf( 743 + struct xfs_da_args *args, 744 + struct xfs_buf **leaf_bp) 742 745 { 743 746 xfs_inode_t *dp; 744 747 xfs_attr_shortform_t *sf; ··· 821 818 sfe = XFS_ATTR_SF_NEXTENTRY(sfe); 822 819 } 823 820 error = 0; 824 - 821 + *leaf_bp = bp; 825 822 out: 826 823 kmem_free(tmpbuffer); 827 824 return error;
+2 -1
fs/xfs/libxfs/xfs_attr_leaf.h
··· 48 48 void xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff); 49 49 int xfs_attr_shortform_lookup(struct xfs_da_args *args); 50 50 int xfs_attr_shortform_getvalue(struct xfs_da_args *args); 51 - int xfs_attr_shortform_to_leaf(struct xfs_da_args *args); 51 + int xfs_attr_shortform_to_leaf(struct xfs_da_args *args, 52 + struct xfs_buf **leaf_bp); 52 53 int xfs_attr_shortform_remove(struct xfs_da_args *args); 53 54 int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp); 54 55 int xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes);
+1 -1
fs/xfs/libxfs/xfs_bmap.c
··· 5136 5136 * blowing out the transaction with a mix of EFIs and reflink 5137 5137 * adjustments. 5138 5138 */ 5139 - if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) 5139 + if (tp && xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) 5140 5140 max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res)); 5141 5141 else 5142 5142 max_len = len;
+36 -3
fs/xfs/libxfs/xfs_defer.c
··· 249 249 for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) 250 250 xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE); 251 251 252 + /* Hold the (previously bjoin'd) buffer locked across the roll. */ 253 + for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) 254 + xfs_trans_dirty_buf(*tp, dop->dop_bufs[i]); 255 + 252 256 trace_xfs_defer_trans_roll((*tp)->t_mountp, dop); 253 257 254 258 /* Roll the transaction. */ ··· 267 263 /* Rejoin the joined inodes. */ 268 264 for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) 269 265 xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0); 266 + 267 + /* Rejoin the buffers and dirty them so the log moves forward. */ 268 + for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) { 269 + xfs_trans_bjoin(*tp, dop->dop_bufs[i]); 270 + xfs_trans_bhold(*tp, dop->dop_bufs[i]); 271 + } 270 272 271 273 return error; 272 274 } ··· 305 295 } 306 296 } 307 297 298 + ASSERT(0); 299 + return -EFSCORRUPTED; 300 + } 301 + 302 + /* 303 + * Add this buffer to the deferred op. Each joined buffer is relogged 304 + * each time we roll the transaction. 305 + */ 306 + int 307 + xfs_defer_bjoin( 308 + struct xfs_defer_ops *dop, 309 + struct xfs_buf *bp) 310 + { 311 + int i; 312 + 313 + for (i = 0; i < XFS_DEFER_OPS_NR_BUFS; i++) { 314 + if (dop->dop_bufs[i] == bp) 315 + return 0; 316 + else if (dop->dop_bufs[i] == NULL) { 317 + dop->dop_bufs[i] = bp; 318 + return 0; 319 + } 320 + } 321 + 322 + ASSERT(0); 308 323 return -EFSCORRUPTED; 309 324 } 310 325 ··· 528 493 struct xfs_defer_ops *dop, 529 494 xfs_fsblock_t *fbp) 530 495 { 531 - dop->dop_committed = false; 532 - dop->dop_low = false; 533 - memset(&dop->dop_inodes, 0, sizeof(dop->dop_inodes)); 496 + memset(dop, 0, sizeof(struct xfs_defer_ops)); 534 497 *fbp = NULLFSBLOCK; 535 498 INIT_LIST_HEAD(&dop->dop_intake); 536 499 INIT_LIST_HEAD(&dop->dop_pending);
+4 -1
fs/xfs/libxfs/xfs_defer.h
··· 59 59 }; 60 60 61 61 #define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */ 62 + #define XFS_DEFER_OPS_NR_BUFS 2 /* join up to two buffers */ 62 63 63 64 struct xfs_defer_ops { 64 65 bool dop_committed; /* did any trans commit? */ ··· 67 66 struct list_head dop_intake; /* unlogged pending work */ 68 67 struct list_head dop_pending; /* logged pending work */ 69 68 70 - /* relog these inodes with each roll */ 69 + /* relog these with each roll */ 71 70 struct xfs_inode *dop_inodes[XFS_DEFER_OPS_NR_INODES]; 71 + struct xfs_buf *dop_bufs[XFS_DEFER_OPS_NR_BUFS]; 72 72 }; 73 73 74 74 void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type, ··· 79 77 void xfs_defer_init(struct xfs_defer_ops *dop, xfs_fsblock_t *fbp); 80 78 bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop); 81 79 int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip); 80 + int xfs_defer_bjoin(struct xfs_defer_ops *dop, struct xfs_buf *bp); 82 81 83 82 /* Description of a deferred type. */ 84 83 struct xfs_defer_op_type {
+2 -2
fs/xfs/libxfs/xfs_iext_tree.c
··· 632 632 struct xfs_iext_leaf *new = NULL; 633 633 int nr_entries, i; 634 634 635 - trace_xfs_iext_insert(ip, cur, state, _RET_IP_); 636 - 637 635 if (ifp->if_height == 0) 638 636 xfs_iext_alloc_root(ifp, cur); 639 637 else if (ifp->if_height == 1) ··· 658 660 cur->leaf->recs[i] = cur->leaf->recs[i - 1]; 659 661 xfs_iext_set(cur_rec(cur), irec); 660 662 ifp->if_bytes += sizeof(struct xfs_iext_rec); 663 + 664 + trace_xfs_iext_insert(ip, cur, state, _RET_IP_); 661 665 662 666 if (new) 663 667 xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2);
+19 -33
fs/xfs/libxfs/xfs_refcount.c
··· 1488 1488 xfs_extlen_t aglen, 1489 1489 struct xfs_defer_ops *dfops) 1490 1490 { 1491 - int error; 1492 - 1493 1491 trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno, 1494 1492 agbno, aglen); 1495 1493 1496 1494 /* Add refcount btree reservation */ 1497 - error = xfs_refcount_adjust_cow(rcur, agbno, aglen, 1495 + return xfs_refcount_adjust_cow(rcur, agbno, aglen, 1498 1496 XFS_REFCOUNT_ADJUST_COW_ALLOC, dfops); 1499 - if (error) 1500 - return error; 1501 - 1502 - /* Add rmap entry */ 1503 - if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) { 1504 - error = xfs_rmap_alloc_extent(rcur->bc_mp, dfops, 1505 - rcur->bc_private.a.agno, 1506 - agbno, aglen, XFS_RMAP_OWN_COW); 1507 - if (error) 1508 - return error; 1509 - } 1510 - 1511 - return error; 1512 1497 } 1513 1498 1514 1499 /* ··· 1506 1521 xfs_extlen_t aglen, 1507 1522 struct xfs_defer_ops *dfops) 1508 1523 { 1509 - int error; 1510 - 1511 1524 trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno, 1512 1525 agbno, aglen); 1513 1526 1514 1527 /* Remove refcount btree reservation */ 1515 - error = xfs_refcount_adjust_cow(rcur, agbno, aglen, 1528 + return xfs_refcount_adjust_cow(rcur, agbno, aglen, 1516 1529 XFS_REFCOUNT_ADJUST_COW_FREE, dfops); 1517 - if (error) 1518 - return error; 1519 - 1520 - /* Remove rmap entry */ 1521 - if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) { 1522 - error = xfs_rmap_free_extent(rcur->bc_mp, dfops, 1523 - rcur->bc_private.a.agno, 1524 - agbno, aglen, XFS_RMAP_OWN_COW); 1525 - if (error) 1526 - return error; 1527 - } 1528 - 1529 - return error; 1530 1530 } 1531 1531 1532 1532 /* Record a CoW staging extent in the refcount btree. */ ··· 1522 1552 xfs_fsblock_t fsb, 1523 1553 xfs_extlen_t len) 1524 1554 { 1555 + int error; 1556 + 1525 1557 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1526 1558 return 0; 1527 1559 1528 - return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW, 1560 + error = __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW, 1529 1561 fsb, len); 1562 + if (error) 1563 + return error; 1564 + 1565 + /* Add rmap entry */ 1566 + return xfs_rmap_alloc_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb), 1567 + XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); 1530 1568 } 1531 1569 1532 1570 /* Forget a CoW staging event in the refcount btree. */ ··· 1545 1567 xfs_fsblock_t fsb, 1546 1568 xfs_extlen_t len) 1547 1569 { 1570 + int error; 1571 + 1548 1572 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1549 1573 return 0; 1574 + 1575 + /* Remove rmap entry */ 1576 + error = xfs_rmap_free_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb), 1577 + XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); 1578 + if (error) 1579 + return error; 1550 1580 1551 1581 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_FREE_COW, 1552 1582 fsb, len);
+76 -23
fs/xfs/libxfs/xfs_rmap.c
··· 368 368 } 369 369 370 370 /* 371 + * Perform all the relevant owner checks for a removal op. If we're doing an 372 + * unknown-owner removal then we have no owner information to check. 373 + */ 374 + static int 375 + xfs_rmap_free_check_owner( 376 + struct xfs_mount *mp, 377 + uint64_t ltoff, 378 + struct xfs_rmap_irec *rec, 379 + xfs_fsblock_t bno, 380 + xfs_filblks_t len, 381 + uint64_t owner, 382 + uint64_t offset, 383 + unsigned int flags) 384 + { 385 + int error = 0; 386 + 387 + if (owner == XFS_RMAP_OWN_UNKNOWN) 388 + return 0; 389 + 390 + /* Make sure the unwritten flag matches. */ 391 + XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) == 392 + (rec->rm_flags & XFS_RMAP_UNWRITTEN), out); 393 + 394 + /* Make sure the owner matches what we expect to find in the tree. */ 395 + XFS_WANT_CORRUPTED_GOTO(mp, owner == rec->rm_owner, out); 396 + 397 + /* Check the offset, if necessary. */ 398 + if (XFS_RMAP_NON_INODE_OWNER(owner)) 399 + goto out; 400 + 401 + if (flags & XFS_RMAP_BMBT_BLOCK) { 402 + XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_flags & XFS_RMAP_BMBT_BLOCK, 403 + out); 404 + } else { 405 + XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_offset <= offset, out); 406 + XFS_WANT_CORRUPTED_GOTO(mp, 407 + ltoff + rec->rm_blockcount >= offset + len, 408 + out); 409 + } 410 + 411 + out: 412 + return error; 413 + } 414 + 415 + /* 371 416 * Find the extent in the rmap btree and remove it. 372 417 * 373 418 * The record we find should always be an exact match for the extent that we're ··· 489 444 goto out_done; 490 445 } 491 446 492 - /* Make sure the unwritten flag matches. */ 493 - XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) == 494 - (ltrec.rm_flags & XFS_RMAP_UNWRITTEN), out_error); 447 + /* 448 + * If we're doing an unknown-owner removal for EFI recovery, we expect 449 + * to find the full range in the rmapbt or nothing at all. If we 450 + * don't find any rmaps overlapping either end of the range, we're 451 + * done. Hopefully this means that the EFI creator already queued 452 + * (and finished) a RUI to remove the rmap. 453 + */ 454 + if (owner == XFS_RMAP_OWN_UNKNOWN && 455 + ltrec.rm_startblock + ltrec.rm_blockcount <= bno) { 456 + struct xfs_rmap_irec rtrec; 457 + 458 + error = xfs_btree_increment(cur, 0, &i); 459 + if (error) 460 + goto out_error; 461 + if (i == 0) 462 + goto out_done; 463 + error = xfs_rmap_get_rec(cur, &rtrec, &i); 464 + if (error) 465 + goto out_error; 466 + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error); 467 + if (rtrec.rm_startblock >= bno + len) 468 + goto out_done; 469 + } 495 470 496 471 /* Make sure the extent we found covers the entire freeing range. */ 497 472 XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno && 498 - ltrec.rm_startblock + ltrec.rm_blockcount >= 499 - bno + len, out_error); 473 + ltrec.rm_startblock + ltrec.rm_blockcount >= 474 + bno + len, out_error); 500 475 501 - /* Make sure the owner matches what we expect to find in the tree. */ 502 - XFS_WANT_CORRUPTED_GOTO(mp, owner == ltrec.rm_owner || 503 - XFS_RMAP_NON_INODE_OWNER(owner), out_error); 504 - 505 - /* Check the offset, if necessary. */ 506 - if (!XFS_RMAP_NON_INODE_OWNER(owner)) { 507 - if (flags & XFS_RMAP_BMBT_BLOCK) { 508 - XFS_WANT_CORRUPTED_GOTO(mp, 509 - ltrec.rm_flags & XFS_RMAP_BMBT_BLOCK, 510 - out_error); 511 - } else { 512 - XFS_WANT_CORRUPTED_GOTO(mp, 513 - ltrec.rm_offset <= offset, out_error); 514 - XFS_WANT_CORRUPTED_GOTO(mp, 515 - ltoff + ltrec.rm_blockcount >= offset + len, 516 - out_error); 517 - } 518 - } 476 + /* Check owner information. */ 477 + error = xfs_rmap_free_check_owner(mp, ltoff, &ltrec, bno, len, owner, 478 + offset, flags); 479 + if (error) 480 + goto out_error; 519 481 520 482 if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) { 521 483 /* exact match, simply remove the record from rmap tree */ ··· 716 664 flags |= XFS_RMAP_UNWRITTEN; 717 665 trace_xfs_rmap_map(mp, cur->bc_private.a.agno, bno, len, 718 666 unwritten, oinfo); 667 + ASSERT(!xfs_rmap_should_skip_owner_update(oinfo)); 719 668 720 669 /* 721 670 * For the initial lookup, look for an exact match or the left-adjacent
+15 -1
fs/xfs/libxfs/xfs_rmap.h
··· 61 61 xfs_rmap_skip_owner_update( 62 62 struct xfs_owner_info *oi) 63 63 { 64 - oi->oi_owner = XFS_RMAP_OWN_UNKNOWN; 64 + xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_NULL); 65 + } 66 + 67 + static inline bool 68 + xfs_rmap_should_skip_owner_update( 69 + struct xfs_owner_info *oi) 70 + { 71 + return oi->oi_owner == XFS_RMAP_OWN_NULL; 72 + } 73 + 74 + static inline void 75 + xfs_rmap_any_owner_update( 76 + struct xfs_owner_info *oi) 77 + { 78 + xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_UNKNOWN); 65 79 } 66 80 67 81 /* Reverse mapping functions. */
+1 -1
fs/xfs/xfs_extfree_item.c
··· 538 538 return error; 539 539 efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); 540 540 541 - xfs_rmap_skip_owner_update(&oinfo); 541 + xfs_rmap_any_owner_update(&oinfo); 542 542 for (i = 0; i < efip->efi_format.efi_nextents; i++) { 543 543 extp = &efip->efi_format.efi_extents[i]; 544 544 error = xfs_trans_free_extent(tp, efdp, extp->ext_start,
+5
fs/xfs/xfs_fsops.c
··· 571 571 * this doesn't actually exist in the rmap btree. 572 572 */ 573 573 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL); 574 + error = xfs_rmap_free(tp, bp, agno, 575 + be32_to_cpu(agf->agf_length) - new, 576 + new, &oinfo); 577 + if (error) 578 + goto error0; 574 579 error = xfs_free_extent(tp, 575 580 XFS_AGB_TO_FSB(mp, agno, 576 581 be32_to_cpu(agf->agf_length) - new),
+25 -10
fs/xfs/xfs_icache.c
··· 870 870 * based on the 'speculative_cow_prealloc_lifetime' tunable (5m by default). 871 871 * (We'll just piggyback on the post-EOF prealloc space workqueue.) 872 872 */ 873 - STATIC void 873 + void 874 874 xfs_queue_cowblocks( 875 875 struct xfs_mount *mp) 876 876 { ··· 1536 1536 return __xfs_inode_free_quota_eofblocks(ip, xfs_icache_free_eofblocks); 1537 1537 } 1538 1538 1539 + static inline unsigned long 1540 + xfs_iflag_for_tag( 1541 + int tag) 1542 + { 1543 + switch (tag) { 1544 + case XFS_ICI_EOFBLOCKS_TAG: 1545 + return XFS_IEOFBLOCKS; 1546 + case XFS_ICI_COWBLOCKS_TAG: 1547 + return XFS_ICOWBLOCKS; 1548 + default: 1549 + ASSERT(0); 1550 + return 0; 1551 + } 1552 + } 1553 + 1539 1554 static void 1540 - __xfs_inode_set_eofblocks_tag( 1555 + __xfs_inode_set_blocks_tag( 1541 1556 xfs_inode_t *ip, 1542 1557 void (*execute)(struct xfs_mount *mp), 1543 1558 void (*set_tp)(struct xfs_mount *mp, xfs_agnumber_t agno, ··· 1567 1552 * Don't bother locking the AG and looking up in the radix trees 1568 1553 * if we already know that we have the tag set. 1569 1554 */ 1570 - if (ip->i_flags & XFS_IEOFBLOCKS) 1555 + if (ip->i_flags & xfs_iflag_for_tag(tag)) 1571 1556 return; 1572 1557 spin_lock(&ip->i_flags_lock); 1573 - ip->i_flags |= XFS_IEOFBLOCKS; 1558 + ip->i_flags |= xfs_iflag_for_tag(tag); 1574 1559 spin_unlock(&ip->i_flags_lock); 1575 1560 1576 1561 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); ··· 1602 1587 xfs_inode_t *ip) 1603 1588 { 1604 1589 trace_xfs_inode_set_eofblocks_tag(ip); 1605 - return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_eofblocks, 1590 + return __xfs_inode_set_blocks_tag(ip, xfs_queue_eofblocks, 1606 1591 trace_xfs_perag_set_eofblocks, 1607 1592 XFS_ICI_EOFBLOCKS_TAG); 1608 1593 } 1609 1594 1610 1595 static void 1611 - __xfs_inode_clear_eofblocks_tag( 1596 + __xfs_inode_clear_blocks_tag( 1612 1597 xfs_inode_t *ip, 1613 1598 void (*clear_tp)(struct xfs_mount *mp, xfs_agnumber_t agno, 1614 1599 int error, unsigned long caller_ip), ··· 1618 1603 struct xfs_perag *pag; 1619 1604 1620 1605 spin_lock(&ip->i_flags_lock); 1621 - ip->i_flags &= ~XFS_IEOFBLOCKS; 1606 + ip->i_flags &= ~xfs_iflag_for_tag(tag); 1622 1607 spin_unlock(&ip->i_flags_lock); 1623 1608 1624 1609 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); ··· 1645 1630 xfs_inode_t *ip) 1646 1631 { 1647 1632 trace_xfs_inode_clear_eofblocks_tag(ip); 1648 - return __xfs_inode_clear_eofblocks_tag(ip, 1633 + return __xfs_inode_clear_blocks_tag(ip, 1649 1634 trace_xfs_perag_clear_eofblocks, XFS_ICI_EOFBLOCKS_TAG); 1650 1635 } 1651 1636 ··· 1739 1724 xfs_inode_t *ip) 1740 1725 { 1741 1726 trace_xfs_inode_set_cowblocks_tag(ip); 1742 - return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_cowblocks, 1727 + return __xfs_inode_set_blocks_tag(ip, xfs_queue_cowblocks, 1743 1728 trace_xfs_perag_set_cowblocks, 1744 1729 XFS_ICI_COWBLOCKS_TAG); 1745 1730 } ··· 1749 1734 xfs_inode_t *ip) 1750 1735 { 1751 1736 trace_xfs_inode_clear_cowblocks_tag(ip); 1752 - return __xfs_inode_clear_eofblocks_tag(ip, 1737 + return __xfs_inode_clear_blocks_tag(ip, 1753 1738 trace_xfs_perag_clear_cowblocks, XFS_ICI_COWBLOCKS_TAG); 1754 1739 }
+1
fs/xfs/xfs_icache.h
··· 81 81 int xfs_icache_free_cowblocks(struct xfs_mount *, struct xfs_eofblocks *); 82 82 int xfs_inode_free_quota_cowblocks(struct xfs_inode *ip); 83 83 void xfs_cowblocks_worker(struct work_struct *); 84 + void xfs_queue_cowblocks(struct xfs_mount *); 84 85 85 86 int xfs_inode_ag_iterator(struct xfs_mount *mp, 86 87 int (*execute)(struct xfs_inode *ip, int flags, void *args),
+19 -9
fs/xfs/xfs_inode.c
··· 1487 1487 return error; 1488 1488 } 1489 1489 1490 + /* Clear the reflink flag and the cowblocks tag if possible. */ 1491 + static void 1492 + xfs_itruncate_clear_reflink_flags( 1493 + struct xfs_inode *ip) 1494 + { 1495 + struct xfs_ifork *dfork; 1496 + struct xfs_ifork *cfork; 1497 + 1498 + if (!xfs_is_reflink_inode(ip)) 1499 + return; 1500 + dfork = XFS_IFORK_PTR(ip, XFS_DATA_FORK); 1501 + cfork = XFS_IFORK_PTR(ip, XFS_COW_FORK); 1502 + if (dfork->if_bytes == 0 && cfork->if_bytes == 0) 1503 + ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; 1504 + if (cfork->if_bytes == 0) 1505 + xfs_inode_clear_cowblocks_tag(ip); 1506 + } 1507 + 1490 1508 /* 1491 1509 * Free up the underlying blocks past new_size. The new size must be smaller 1492 1510 * than the current size. This routine can be used both for the attribute and ··· 1601 1583 if (error) 1602 1584 goto out; 1603 1585 1604 - /* 1605 - * Clear the reflink flag if there are no data fork blocks and 1606 - * there are no extents staged in the cow fork. 1607 - */ 1608 - if (xfs_is_reflink_inode(ip) && ip->i_cnextents == 0) { 1609 - if (ip->i_d.di_nblocks == 0) 1610 - ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; 1611 - xfs_inode_clear_cowblocks_tag(ip); 1612 - } 1586 + xfs_itruncate_clear_reflink_flags(ip); 1613 1587 1614 1588 /* 1615 1589 * Always re-log the inode so that our permanent transaction can keep
+1
fs/xfs/xfs_inode.h
··· 232 232 * log recovery to replay a bmap operation on the inode. 233 233 */ 234 234 #define XFS_IRECOVERY (1 << 11) 235 + #define XFS_ICOWBLOCKS (1 << 12)/* has the cowblocks tag set */ 235 236 236 237 /* 237 238 * Per-lifetime flags need to be reset when re-using a reclaimable inode during
+19 -2
fs/xfs/xfs_reflink.c
··· 454 454 if (error) 455 455 goto out_bmap_cancel; 456 456 457 + xfs_inode_set_cowblocks_tag(ip); 458 + 457 459 /* Finish up. */ 458 460 error = xfs_defer_finish(&tp, &dfops); 459 461 if (error) ··· 492 490 struct xfs_iext_cursor icur; 493 491 494 492 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)); 495 - ASSERT(xfs_is_reflink_inode(ip)); 496 493 494 + if (!xfs_is_reflink_inode(ip)) 495 + return false; 497 496 offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); 498 497 if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got)) 499 498 return false; ··· 613 610 614 611 /* Remove the mapping from the CoW fork. */ 615 612 xfs_bmap_del_extent_cow(ip, &icur, &got, &del); 613 + } else { 614 + /* Didn't do anything, push cursor back. */ 615 + xfs_iext_prev(ifp, &icur); 616 616 } 617 617 next_extent: 618 618 if (!xfs_iext_get_extent(ifp, &icur, &got)) ··· 731 725 (unsigned int)(end_fsb - offset_fsb), 732 726 XFS_DATA_FORK); 733 727 error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write, 734 - resblks, 0, 0, &tp); 728 + resblks, 0, XFS_TRANS_RESERVE, &tp); 735 729 if (error) 736 730 goto out; 737 731 ··· 1296 1290 goto out_unlock; 1297 1291 1298 1292 trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); 1293 + 1294 + /* 1295 + * Clear out post-eof preallocations because we don't have page cache 1296 + * backing the delayed allocations and they'll never get freed on 1297 + * their own. 1298 + */ 1299 + if (xfs_can_free_eofblocks(dest, true)) { 1300 + ret = xfs_free_eofblocks(dest); 1301 + if (ret) 1302 + goto out_unlock; 1303 + } 1299 1304 1300 1305 /* Set flags and remap blocks. */ 1301 1306 ret = xfs_reflink_set_inode_flag(src, dest);
+9
fs/xfs/xfs_super.c
··· 1360 1360 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1361 1361 return error; 1362 1362 } 1363 + xfs_queue_cowblocks(mp); 1363 1364 1364 1365 /* Create the per-AG metadata reservation pool .*/ 1365 1366 error = xfs_fs_reserve_ag_blocks(mp); ··· 1370 1369 1371 1370 /* rw -> ro */ 1372 1371 if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & SB_RDONLY)) { 1372 + /* Get rid of any leftover CoW reservations... */ 1373 + cancel_delayed_work_sync(&mp->m_cowblocks_work); 1374 + error = xfs_icache_free_cowblocks(mp, NULL); 1375 + if (error) { 1376 + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1377 + return error; 1378 + } 1379 + 1373 1380 /* Free the per-AG metadata reservation pool. */ 1374 1381 error = xfs_fs_unreserve_ag_blocks(mp); 1375 1382 if (error) {