Merge tag 'xfs-4.15-fixes-8' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs fixes from Darrick Wong:
"Here are some XFS fixes for 4.15-rc5. Apologies for the unusually
large number of patches this late, but I wanted to make sure the
corruption fixes were really ready to go.

Changes since last update:

- Fix a locking problem during xattr block conversion that could lead
to the log checkpointing thread to try to write an incomplete
buffer to disk, which leads to a corruption shutdown

- Fix a null pointer dereference when removing delayed allocation
extents

- Remove post-eof speculative allocations when reflinking a block
past current inode size so that we don't just leave them there and
assert on inode reclaim

- Relax an assert which didn't accurately reflect the way locking
works and would trigger under heavy io load

- Avoid infinite loop when cancelling copy on write extents after a
writeback failure

- Try to avoid copy on write transaction reservation overflows when
remapping after a successful write

- Fix various problems with the copy-on-write reservation automatic
garbage collection not being cleaned up properly during a ro
remount

- Fix problems with rmap log items being processed in the wrong
order, leading to corruption shutdowns

- Fix problems with EFI recovery wherein the "remove any rmapping if
present" mechanism wasn't actually doing anything, which would lead
to corruption problems later when the extent is reallocated,
leading to multiple rmaps for the same extent"

* tag 'xfs-4.15-fixes-8' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
xfs: only skip rmap owner checks for unknown-owner rmap removal
xfs: always honor OWN_UNKNOWN rmap removal requests
xfs: queue deferred rmap ops for cow staging extent alloc/free in the right order
xfs: set cowblocks tag for direct cow writes too
xfs: remove leftover CoW reservations when remounting ro
xfs: don't be so eager to clear the cowblocks tag on truncate
xfs: track cowblocks separately in i_flags
xfs: allow CoW remap transactions to use reserve blocks
xfs: avoid infinite loop when cancelling CoW blocks after writeback failure
xfs: relax is_reflink_inode assert in xfs_reflink_find_cow_mapping
xfs: remove dest file's post-eof preallocations before reflinking
xfs: move xfs_iext_insert tracepoint to report useful information
xfs: account for null transactions in bunmapi
xfs: hold xfs_buf locked between shortform->leaf conversion and the addition of an attribute
xfs: add the ability to join a held buffer to a defer_ops

+2 -2
fs/xfs/libxfs/xfs_alloc.c
··· 702 ASSERT(args->agbno % args->alignment == 0); 703 704 /* if not file data, insert new block into the reverse map btree */ 705 - if (args->oinfo.oi_owner != XFS_RMAP_OWN_UNKNOWN) { 706 error = xfs_rmap_alloc(args->tp, args->agbp, args->agno, 707 args->agbno, args->len, &args->oinfo); 708 if (error) ··· 1682 bno_cur = cnt_cur = NULL; 1683 mp = tp->t_mountp; 1684 1685 - if (oinfo->oi_owner != XFS_RMAP_OWN_UNKNOWN) { 1686 error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo); 1687 if (error) 1688 goto error0;
··· 702 ASSERT(args->agbno % args->alignment == 0); 703 704 /* if not file data, insert new block into the reverse map btree */ 705 + if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) { 706 error = xfs_rmap_alloc(args->tp, args->agbp, args->agno, 707 args->agbno, args->len, &args->oinfo); 708 if (error) ··· 1682 bno_cur = cnt_cur = NULL; 1683 mp = tp->t_mountp; 1684 1685 + if (!xfs_rmap_should_skip_owner_update(oinfo)) { 1686 error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo); 1687 if (error) 1688 goto error0;
+15 -5
fs/xfs/libxfs/xfs_attr.c
··· 212 int flags) 213 { 214 struct xfs_mount *mp = dp->i_mount; 215 struct xfs_da_args args; 216 struct xfs_defer_ops dfops; 217 struct xfs_trans_res tres; ··· 328 * GROT: another possible req'mt for a double-split btree op. 329 */ 330 xfs_defer_init(args.dfops, args.firstblock); 331 - error = xfs_attr_shortform_to_leaf(&args); 332 if (error) 333 goto out_defer_cancel; 334 xfs_defer_ijoin(args.dfops, dp); 335 error = xfs_defer_finish(&args.trans, args.dfops); 336 if (error) ··· 345 346 /* 347 * Commit the leaf transformation. We'll need another (linked) 348 - * transaction to add the new attribute to the leaf. 349 */ 350 - 351 error = xfs_trans_roll_inode(&args.trans, dp); 352 if (error) 353 goto out; 354 - 355 } 356 357 if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) ··· 383 384 out_defer_cancel: 385 xfs_defer_cancel(&dfops); 386 - args.trans = NULL; 387 out: 388 if (args.trans) 389 xfs_trans_cancel(args.trans); 390 xfs_iunlock(dp, XFS_ILOCK_EXCL);
··· 212 int flags) 213 { 214 struct xfs_mount *mp = dp->i_mount; 215 + struct xfs_buf *leaf_bp = NULL; 216 struct xfs_da_args args; 217 struct xfs_defer_ops dfops; 218 struct xfs_trans_res tres; ··· 327 * GROT: another possible req'mt for a double-split btree op. 328 */ 329 xfs_defer_init(args.dfops, args.firstblock); 330 + error = xfs_attr_shortform_to_leaf(&args, &leaf_bp); 331 if (error) 332 goto out_defer_cancel; 333 + /* 334 + * Prevent the leaf buffer from being unlocked so that a 335 + * concurrent AIL push cannot grab the half-baked leaf 336 + * buffer and run into problems with the write verifier. 337 + */ 338 + xfs_trans_bhold(args.trans, leaf_bp); 339 + xfs_defer_bjoin(args.dfops, leaf_bp); 340 xfs_defer_ijoin(args.dfops, dp); 341 error = xfs_defer_finish(&args.trans, args.dfops); 342 if (error) ··· 337 338 /* 339 * Commit the leaf transformation. We'll need another (linked) 340 + * transaction to add the new attribute to the leaf, which 341 + * means that we have to hold & join the leaf buffer here too. 342 */ 343 error = xfs_trans_roll_inode(&args.trans, dp); 344 if (error) 345 goto out; 346 + xfs_trans_bjoin(args.trans, leaf_bp); 347 + leaf_bp = NULL; 348 } 349 350 if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) ··· 374 375 out_defer_cancel: 376 xfs_defer_cancel(&dfops); 377 out: 378 + if (leaf_bp) 379 + xfs_trans_brelse(args.trans, leaf_bp); 380 if (args.trans) 381 xfs_trans_cancel(args.trans); 382 xfs_iunlock(dp, XFS_ILOCK_EXCL);
+6 -3
fs/xfs/libxfs/xfs_attr_leaf.c
··· 735 } 736 737 /* 738 - * Convert from using the shortform to the leaf. 739 */ 740 int 741 - xfs_attr_shortform_to_leaf(xfs_da_args_t *args) 742 { 743 xfs_inode_t *dp; 744 xfs_attr_shortform_t *sf; ··· 821 sfe = XFS_ATTR_SF_NEXTENTRY(sfe); 822 } 823 error = 0; 824 - 825 out: 826 kmem_free(tmpbuffer); 827 return error;
··· 735 } 736 737 /* 738 + * Convert from using the shortform to the leaf. On success, return the 739 + * buffer so that we can keep it locked until we're totally done with it. 740 */ 741 int 742 + xfs_attr_shortform_to_leaf( 743 + struct xfs_da_args *args, 744 + struct xfs_buf **leaf_bp) 745 { 746 xfs_inode_t *dp; 747 xfs_attr_shortform_t *sf; ··· 818 sfe = XFS_ATTR_SF_NEXTENTRY(sfe); 819 } 820 error = 0; 821 + *leaf_bp = bp; 822 out: 823 kmem_free(tmpbuffer); 824 return error;
+2 -1
fs/xfs/libxfs/xfs_attr_leaf.h
··· 48 void xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff); 49 int xfs_attr_shortform_lookup(struct xfs_da_args *args); 50 int xfs_attr_shortform_getvalue(struct xfs_da_args *args); 51 - int xfs_attr_shortform_to_leaf(struct xfs_da_args *args); 52 int xfs_attr_shortform_remove(struct xfs_da_args *args); 53 int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp); 54 int xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes);
··· 48 void xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff); 49 int xfs_attr_shortform_lookup(struct xfs_da_args *args); 50 int xfs_attr_shortform_getvalue(struct xfs_da_args *args); 51 + int xfs_attr_shortform_to_leaf(struct xfs_da_args *args, 52 + struct xfs_buf **leaf_bp); 53 int xfs_attr_shortform_remove(struct xfs_da_args *args); 54 int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp); 55 int xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes);
+1 -1
fs/xfs/libxfs/xfs_bmap.c
··· 5136 * blowing out the transaction with a mix of EFIs and reflink 5137 * adjustments. 5138 */ 5139 - if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) 5140 max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res)); 5141 else 5142 max_len = len;
··· 5136 * blowing out the transaction with a mix of EFIs and reflink 5137 * adjustments. 5138 */ 5139 + if (tp && xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) 5140 max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res)); 5141 else 5142 max_len = len;
+36 -3
fs/xfs/libxfs/xfs_defer.c
··· 249 for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) 250 xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE); 251 252 trace_xfs_defer_trans_roll((*tp)->t_mountp, dop); 253 254 /* Roll the transaction. */ ··· 267 /* Rejoin the joined inodes. */ 268 for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) 269 xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0); 270 271 return error; 272 } ··· 305 } 306 } 307 308 return -EFSCORRUPTED; 309 } 310 ··· 528 struct xfs_defer_ops *dop, 529 xfs_fsblock_t *fbp) 530 { 531 - dop->dop_committed = false; 532 - dop->dop_low = false; 533 - memset(&dop->dop_inodes, 0, sizeof(dop->dop_inodes)); 534 *fbp = NULLFSBLOCK; 535 INIT_LIST_HEAD(&dop->dop_intake); 536 INIT_LIST_HEAD(&dop->dop_pending);
··· 249 for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) 250 xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE); 251 252 + /* Hold the (previously bjoin'd) buffer locked across the roll. */ 253 + for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) 254 + xfs_trans_dirty_buf(*tp, dop->dop_bufs[i]); 255 + 256 trace_xfs_defer_trans_roll((*tp)->t_mountp, dop); 257 258 /* Roll the transaction. */ ··· 263 /* Rejoin the joined inodes. */ 264 for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) 265 xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0); 266 + 267 + /* Rejoin the buffers and dirty them so the log moves forward. */ 268 + for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) { 269 + xfs_trans_bjoin(*tp, dop->dop_bufs[i]); 270 + xfs_trans_bhold(*tp, dop->dop_bufs[i]); 271 + } 272 273 return error; 274 } ··· 295 } 296 } 297 298 + ASSERT(0); 299 + return -EFSCORRUPTED; 300 + } 301 + 302 + /* 303 + * Add this buffer to the deferred op. Each joined buffer is relogged 304 + * each time we roll the transaction. 305 + */ 306 + int 307 + xfs_defer_bjoin( 308 + struct xfs_defer_ops *dop, 309 + struct xfs_buf *bp) 310 + { 311 + int i; 312 + 313 + for (i = 0; i < XFS_DEFER_OPS_NR_BUFS; i++) { 314 + if (dop->dop_bufs[i] == bp) 315 + return 0; 316 + else if (dop->dop_bufs[i] == NULL) { 317 + dop->dop_bufs[i] = bp; 318 + return 0; 319 + } 320 + } 321 + 322 + ASSERT(0); 323 return -EFSCORRUPTED; 324 } 325 ··· 493 struct xfs_defer_ops *dop, 494 xfs_fsblock_t *fbp) 495 { 496 + memset(dop, 0, sizeof(struct xfs_defer_ops)); 497 *fbp = NULLFSBLOCK; 498 INIT_LIST_HEAD(&dop->dop_intake); 499 INIT_LIST_HEAD(&dop->dop_pending);
+4 -1
fs/xfs/libxfs/xfs_defer.h
··· 59 }; 60 61 #define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */ 62 63 struct xfs_defer_ops { 64 bool dop_committed; /* did any trans commit? */ ··· 67 struct list_head dop_intake; /* unlogged pending work */ 68 struct list_head dop_pending; /* logged pending work */ 69 70 - /* relog these inodes with each roll */ 71 struct xfs_inode *dop_inodes[XFS_DEFER_OPS_NR_INODES]; 72 }; 73 74 void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type, ··· 79 void xfs_defer_init(struct xfs_defer_ops *dop, xfs_fsblock_t *fbp); 80 bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop); 81 int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip); 82 83 /* Description of a deferred type. */ 84 struct xfs_defer_op_type {
··· 59 }; 60 61 #define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */ 62 + #define XFS_DEFER_OPS_NR_BUFS 2 /* join up to two buffers */ 63 64 struct xfs_defer_ops { 65 bool dop_committed; /* did any trans commit? */ ··· 66 struct list_head dop_intake; /* unlogged pending work */ 67 struct list_head dop_pending; /* logged pending work */ 68 69 + /* relog these with each roll */ 70 struct xfs_inode *dop_inodes[XFS_DEFER_OPS_NR_INODES]; 71 + struct xfs_buf *dop_bufs[XFS_DEFER_OPS_NR_BUFS]; 72 }; 73 74 void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type, ··· 77 void xfs_defer_init(struct xfs_defer_ops *dop, xfs_fsblock_t *fbp); 78 bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop); 79 int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip); 80 + int xfs_defer_bjoin(struct xfs_defer_ops *dop, struct xfs_buf *bp); 81 82 /* Description of a deferred type. */ 83 struct xfs_defer_op_type {
+2 -2
fs/xfs/libxfs/xfs_iext_tree.c
··· 632 struct xfs_iext_leaf *new = NULL; 633 int nr_entries, i; 634 635 - trace_xfs_iext_insert(ip, cur, state, _RET_IP_); 636 - 637 if (ifp->if_height == 0) 638 xfs_iext_alloc_root(ifp, cur); 639 else if (ifp->if_height == 1) ··· 658 cur->leaf->recs[i] = cur->leaf->recs[i - 1]; 659 xfs_iext_set(cur_rec(cur), irec); 660 ifp->if_bytes += sizeof(struct xfs_iext_rec); 661 662 if (new) 663 xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2);
··· 632 struct xfs_iext_leaf *new = NULL; 633 int nr_entries, i; 634 635 if (ifp->if_height == 0) 636 xfs_iext_alloc_root(ifp, cur); 637 else if (ifp->if_height == 1) ··· 660 cur->leaf->recs[i] = cur->leaf->recs[i - 1]; 661 xfs_iext_set(cur_rec(cur), irec); 662 ifp->if_bytes += sizeof(struct xfs_iext_rec); 663 + 664 + trace_xfs_iext_insert(ip, cur, state, _RET_IP_); 665 666 if (new) 667 xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2);
+19 -33
fs/xfs/libxfs/xfs_refcount.c
··· 1488 xfs_extlen_t aglen, 1489 struct xfs_defer_ops *dfops) 1490 { 1491 - int error; 1492 - 1493 trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno, 1494 agbno, aglen); 1495 1496 /* Add refcount btree reservation */ 1497 - error = xfs_refcount_adjust_cow(rcur, agbno, aglen, 1498 XFS_REFCOUNT_ADJUST_COW_ALLOC, dfops); 1499 - if (error) 1500 - return error; 1501 - 1502 - /* Add rmap entry */ 1503 - if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) { 1504 - error = xfs_rmap_alloc_extent(rcur->bc_mp, dfops, 1505 - rcur->bc_private.a.agno, 1506 - agbno, aglen, XFS_RMAP_OWN_COW); 1507 - if (error) 1508 - return error; 1509 - } 1510 - 1511 - return error; 1512 } 1513 1514 /* ··· 1506 xfs_extlen_t aglen, 1507 struct xfs_defer_ops *dfops) 1508 { 1509 - int error; 1510 - 1511 trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno, 1512 agbno, aglen); 1513 1514 /* Remove refcount btree reservation */ 1515 - error = xfs_refcount_adjust_cow(rcur, agbno, aglen, 1516 XFS_REFCOUNT_ADJUST_COW_FREE, dfops); 1517 - if (error) 1518 - return error; 1519 - 1520 - /* Remove rmap entry */ 1521 - if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) { 1522 - error = xfs_rmap_free_extent(rcur->bc_mp, dfops, 1523 - rcur->bc_private.a.agno, 1524 - agbno, aglen, XFS_RMAP_OWN_COW); 1525 - if (error) 1526 - return error; 1527 - } 1528 - 1529 - return error; 1530 } 1531 1532 /* Record a CoW staging extent in the refcount btree. */ ··· 1522 xfs_fsblock_t fsb, 1523 xfs_extlen_t len) 1524 { 1525 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1526 return 0; 1527 1528 - return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW, 1529 fsb, len); 1530 } 1531 1532 /* Forget a CoW staging event in the refcount btree. */ ··· 1545 xfs_fsblock_t fsb, 1546 xfs_extlen_t len) 1547 { 1548 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1549 return 0; 1550 1551 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_FREE_COW, 1552 fsb, len);
··· 1488 xfs_extlen_t aglen, 1489 struct xfs_defer_ops *dfops) 1490 { 1491 trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno, 1492 agbno, aglen); 1493 1494 /* Add refcount btree reservation */ 1495 + return xfs_refcount_adjust_cow(rcur, agbno, aglen, 1496 XFS_REFCOUNT_ADJUST_COW_ALLOC, dfops); 1497 } 1498 1499 /* ··· 1521 xfs_extlen_t aglen, 1522 struct xfs_defer_ops *dfops) 1523 { 1524 trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno, 1525 agbno, aglen); 1526 1527 /* Remove refcount btree reservation */ 1528 + return xfs_refcount_adjust_cow(rcur, agbno, aglen, 1529 XFS_REFCOUNT_ADJUST_COW_FREE, dfops); 1530 } 1531 1532 /* Record a CoW staging extent in the refcount btree. */ ··· 1552 xfs_fsblock_t fsb, 1553 xfs_extlen_t len) 1554 { 1555 + int error; 1556 + 1557 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1558 return 0; 1559 1560 + error = __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW, 1561 fsb, len); 1562 + if (error) 1563 + return error; 1564 + 1565 + /* Add rmap entry */ 1566 + return xfs_rmap_alloc_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb), 1567 + XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); 1568 } 1569 1570 /* Forget a CoW staging event in the refcount btree. */ ··· 1567 xfs_fsblock_t fsb, 1568 xfs_extlen_t len) 1569 { 1570 + int error; 1571 + 1572 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1573 return 0; 1574 + 1575 + /* Remove rmap entry */ 1576 + error = xfs_rmap_free_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb), 1577 + XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); 1578 + if (error) 1579 + return error; 1580 1581 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_FREE_COW, 1582 fsb, len);
+76 -23
fs/xfs/libxfs/xfs_rmap.c
··· 368 } 369 370 /* 371 * Find the extent in the rmap btree and remove it. 372 * 373 * The record we find should always be an exact match for the extent that we're ··· 489 goto out_done; 490 } 491 492 - /* Make sure the unwritten flag matches. */ 493 - XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) == 494 - (ltrec.rm_flags & XFS_RMAP_UNWRITTEN), out_error); 495 496 /* Make sure the extent we found covers the entire freeing range. */ 497 XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno && 498 - ltrec.rm_startblock + ltrec.rm_blockcount >= 499 - bno + len, out_error); 500 501 - /* Make sure the owner matches what we expect to find in the tree. */ 502 - XFS_WANT_CORRUPTED_GOTO(mp, owner == ltrec.rm_owner || 503 - XFS_RMAP_NON_INODE_OWNER(owner), out_error); 504 - 505 - /* Check the offset, if necessary. */ 506 - if (!XFS_RMAP_NON_INODE_OWNER(owner)) { 507 - if (flags & XFS_RMAP_BMBT_BLOCK) { 508 - XFS_WANT_CORRUPTED_GOTO(mp, 509 - ltrec.rm_flags & XFS_RMAP_BMBT_BLOCK, 510 - out_error); 511 - } else { 512 - XFS_WANT_CORRUPTED_GOTO(mp, 513 - ltrec.rm_offset <= offset, out_error); 514 - XFS_WANT_CORRUPTED_GOTO(mp, 515 - ltoff + ltrec.rm_blockcount >= offset + len, 516 - out_error); 517 - } 518 - } 519 520 if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) { 521 /* exact match, simply remove the record from rmap tree */ ··· 716 flags |= XFS_RMAP_UNWRITTEN; 717 trace_xfs_rmap_map(mp, cur->bc_private.a.agno, bno, len, 718 unwritten, oinfo); 719 720 /* 721 * For the initial lookup, look for an exact match or the left-adjacent
··· 368 } 369 370 /* 371 + * Perform all the relevant owner checks for a removal op. If we're doing an 372 + * unknown-owner removal then we have no owner information to check. 373 + */ 374 + static int 375 + xfs_rmap_free_check_owner( 376 + struct xfs_mount *mp, 377 + uint64_t ltoff, 378 + struct xfs_rmap_irec *rec, 379 + xfs_fsblock_t bno, 380 + xfs_filblks_t len, 381 + uint64_t owner, 382 + uint64_t offset, 383 + unsigned int flags) 384 + { 385 + int error = 0; 386 + 387 + if (owner == XFS_RMAP_OWN_UNKNOWN) 388 + return 0; 389 + 390 + /* Make sure the unwritten flag matches. */ 391 + XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) == 392 + (rec->rm_flags & XFS_RMAP_UNWRITTEN), out); 393 + 394 + /* Make sure the owner matches what we expect to find in the tree. */ 395 + XFS_WANT_CORRUPTED_GOTO(mp, owner == rec->rm_owner, out); 396 + 397 + /* Check the offset, if necessary. */ 398 + if (XFS_RMAP_NON_INODE_OWNER(owner)) 399 + goto out; 400 + 401 + if (flags & XFS_RMAP_BMBT_BLOCK) { 402 + XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_flags & XFS_RMAP_BMBT_BLOCK, 403 + out); 404 + } else { 405 + XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_offset <= offset, out); 406 + XFS_WANT_CORRUPTED_GOTO(mp, 407 + ltoff + rec->rm_blockcount >= offset + len, 408 + out); 409 + } 410 + 411 + out: 412 + return error; 413 + } 414 + 415 + /* 416 * Find the extent in the rmap btree and remove it. 417 * 418 * The record we find should always be an exact match for the extent that we're ··· 444 goto out_done; 445 } 446 447 + /* 448 + * If we're doing an unknown-owner removal for EFI recovery, we expect 449 + * to find the full range in the rmapbt or nothing at all. If we 450 + * don't find any rmaps overlapping either end of the range, we're 451 + * done. Hopefully this means that the EFI creator already queued 452 + * (and finished) a RUI to remove the rmap. 453 + */ 454 + if (owner == XFS_RMAP_OWN_UNKNOWN && 455 + ltrec.rm_startblock + ltrec.rm_blockcount <= bno) { 456 + struct xfs_rmap_irec rtrec; 457 + 458 + error = xfs_btree_increment(cur, 0, &i); 459 + if (error) 460 + goto out_error; 461 + if (i == 0) 462 + goto out_done; 463 + error = xfs_rmap_get_rec(cur, &rtrec, &i); 464 + if (error) 465 + goto out_error; 466 + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error); 467 + if (rtrec.rm_startblock >= bno + len) 468 + goto out_done; 469 + } 470 471 /* Make sure the extent we found covers the entire freeing range. */ 472 XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno && 473 + ltrec.rm_startblock + ltrec.rm_blockcount >= 474 + bno + len, out_error); 475 476 + /* Check owner information. */ 477 + error = xfs_rmap_free_check_owner(mp, ltoff, &ltrec, bno, len, owner, 478 + offset, flags); 479 + if (error) 480 + goto out_error; 481 482 if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) { 483 /* exact match, simply remove the record from rmap tree */ ··· 664 flags |= XFS_RMAP_UNWRITTEN; 665 trace_xfs_rmap_map(mp, cur->bc_private.a.agno, bno, len, 666 unwritten, oinfo); 667 + ASSERT(!xfs_rmap_should_skip_owner_update(oinfo)); 668 669 /* 670 * For the initial lookup, look for an exact match or the left-adjacent
+15 -1
fs/xfs/libxfs/xfs_rmap.h
··· 61 xfs_rmap_skip_owner_update( 62 struct xfs_owner_info *oi) 63 { 64 - oi->oi_owner = XFS_RMAP_OWN_UNKNOWN; 65 } 66 67 /* Reverse mapping functions. */
··· 61 xfs_rmap_skip_owner_update( 62 struct xfs_owner_info *oi) 63 { 64 + xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_NULL); 65 + } 66 + 67 + static inline bool 68 + xfs_rmap_should_skip_owner_update( 69 + struct xfs_owner_info *oi) 70 + { 71 + return oi->oi_owner == XFS_RMAP_OWN_NULL; 72 + } 73 + 74 + static inline void 75 + xfs_rmap_any_owner_update( 76 + struct xfs_owner_info *oi) 77 + { 78 + xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_UNKNOWN); 79 } 80 81 /* Reverse mapping functions. */
+1 -1
fs/xfs/xfs_extfree_item.c
··· 538 return error; 539 efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); 540 541 - xfs_rmap_skip_owner_update(&oinfo); 542 for (i = 0; i < efip->efi_format.efi_nextents; i++) { 543 extp = &efip->efi_format.efi_extents[i]; 544 error = xfs_trans_free_extent(tp, efdp, extp->ext_start,
··· 538 return error; 539 efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); 540 541 + xfs_rmap_any_owner_update(&oinfo); 542 for (i = 0; i < efip->efi_format.efi_nextents; i++) { 543 extp = &efip->efi_format.efi_extents[i]; 544 error = xfs_trans_free_extent(tp, efdp, extp->ext_start,
+5
fs/xfs/xfs_fsops.c
··· 571 * this doesn't actually exist in the rmap btree. 572 */ 573 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL); 574 error = xfs_free_extent(tp, 575 XFS_AGB_TO_FSB(mp, agno, 576 be32_to_cpu(agf->agf_length) - new),
··· 571 * this doesn't actually exist in the rmap btree. 572 */ 573 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL); 574 + error = xfs_rmap_free(tp, bp, agno, 575 + be32_to_cpu(agf->agf_length) - new, 576 + new, &oinfo); 577 + if (error) 578 + goto error0; 579 error = xfs_free_extent(tp, 580 XFS_AGB_TO_FSB(mp, agno, 581 be32_to_cpu(agf->agf_length) - new),
+25 -10
fs/xfs/xfs_icache.c
··· 870 * based on the 'speculative_cow_prealloc_lifetime' tunable (5m by default). 871 * (We'll just piggyback on the post-EOF prealloc space workqueue.) 872 */ 873 - STATIC void 874 xfs_queue_cowblocks( 875 struct xfs_mount *mp) 876 { ··· 1536 return __xfs_inode_free_quota_eofblocks(ip, xfs_icache_free_eofblocks); 1537 } 1538 1539 static void 1540 - __xfs_inode_set_eofblocks_tag( 1541 xfs_inode_t *ip, 1542 void (*execute)(struct xfs_mount *mp), 1543 void (*set_tp)(struct xfs_mount *mp, xfs_agnumber_t agno, ··· 1567 * Don't bother locking the AG and looking up in the radix trees 1568 * if we already know that we have the tag set. 1569 */ 1570 - if (ip->i_flags & XFS_IEOFBLOCKS) 1571 return; 1572 spin_lock(&ip->i_flags_lock); 1573 - ip->i_flags |= XFS_IEOFBLOCKS; 1574 spin_unlock(&ip->i_flags_lock); 1575 1576 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); ··· 1602 xfs_inode_t *ip) 1603 { 1604 trace_xfs_inode_set_eofblocks_tag(ip); 1605 - return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_eofblocks, 1606 trace_xfs_perag_set_eofblocks, 1607 XFS_ICI_EOFBLOCKS_TAG); 1608 } 1609 1610 static void 1611 - __xfs_inode_clear_eofblocks_tag( 1612 xfs_inode_t *ip, 1613 void (*clear_tp)(struct xfs_mount *mp, xfs_agnumber_t agno, 1614 int error, unsigned long caller_ip), ··· 1618 struct xfs_perag *pag; 1619 1620 spin_lock(&ip->i_flags_lock); 1621 - ip->i_flags &= ~XFS_IEOFBLOCKS; 1622 spin_unlock(&ip->i_flags_lock); 1623 1624 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); ··· 1645 xfs_inode_t *ip) 1646 { 1647 trace_xfs_inode_clear_eofblocks_tag(ip); 1648 - return __xfs_inode_clear_eofblocks_tag(ip, 1649 trace_xfs_perag_clear_eofblocks, XFS_ICI_EOFBLOCKS_TAG); 1650 } 1651 ··· 1739 xfs_inode_t *ip) 1740 { 1741 trace_xfs_inode_set_cowblocks_tag(ip); 1742 - return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_cowblocks, 1743 trace_xfs_perag_set_cowblocks, 1744 XFS_ICI_COWBLOCKS_TAG); 1745 } ··· 1749 xfs_inode_t *ip) 1750 { 1751 trace_xfs_inode_clear_cowblocks_tag(ip); 1752 - return __xfs_inode_clear_eofblocks_tag(ip, 1753 trace_xfs_perag_clear_cowblocks, XFS_ICI_COWBLOCKS_TAG); 1754 }
··· 870 * based on the 'speculative_cow_prealloc_lifetime' tunable (5m by default). 871 * (We'll just piggyback on the post-EOF prealloc space workqueue.) 872 */ 873 + void 874 xfs_queue_cowblocks( 875 struct xfs_mount *mp) 876 { ··· 1536 return __xfs_inode_free_quota_eofblocks(ip, xfs_icache_free_eofblocks); 1537 } 1538 1539 + static inline unsigned long 1540 + xfs_iflag_for_tag( 1541 + int tag) 1542 + { 1543 + switch (tag) { 1544 + case XFS_ICI_EOFBLOCKS_TAG: 1545 + return XFS_IEOFBLOCKS; 1546 + case XFS_ICI_COWBLOCKS_TAG: 1547 + return XFS_ICOWBLOCKS; 1548 + default: 1549 + ASSERT(0); 1550 + return 0; 1551 + } 1552 + } 1553 + 1554 static void 1555 + __xfs_inode_set_blocks_tag( 1556 xfs_inode_t *ip, 1557 void (*execute)(struct xfs_mount *mp), 1558 void (*set_tp)(struct xfs_mount *mp, xfs_agnumber_t agno, ··· 1552 * Don't bother locking the AG and looking up in the radix trees 1553 * if we already know that we have the tag set. 1554 */ 1555 + if (ip->i_flags & xfs_iflag_for_tag(tag)) 1556 return; 1557 spin_lock(&ip->i_flags_lock); 1558 + ip->i_flags |= xfs_iflag_for_tag(tag); 1559 spin_unlock(&ip->i_flags_lock); 1560 1561 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); ··· 1587 xfs_inode_t *ip) 1588 { 1589 trace_xfs_inode_set_eofblocks_tag(ip); 1590 + return __xfs_inode_set_blocks_tag(ip, xfs_queue_eofblocks, 1591 trace_xfs_perag_set_eofblocks, 1592 XFS_ICI_EOFBLOCKS_TAG); 1593 } 1594 1595 static void 1596 + __xfs_inode_clear_blocks_tag( 1597 xfs_inode_t *ip, 1598 void (*clear_tp)(struct xfs_mount *mp, xfs_agnumber_t agno, 1599 int error, unsigned long caller_ip), ··· 1603 struct xfs_perag *pag; 1604 1605 spin_lock(&ip->i_flags_lock); 1606 + ip->i_flags &= ~xfs_iflag_for_tag(tag); 1607 spin_unlock(&ip->i_flags_lock); 1608 1609 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); ··· 1630 xfs_inode_t *ip) 1631 { 1632 trace_xfs_inode_clear_eofblocks_tag(ip); 1633 + return __xfs_inode_clear_blocks_tag(ip, 1634 trace_xfs_perag_clear_eofblocks, XFS_ICI_EOFBLOCKS_TAG); 1635 } 1636 ··· 1724 xfs_inode_t *ip) 1725 { 1726 trace_xfs_inode_set_cowblocks_tag(ip); 1727 + return __xfs_inode_set_blocks_tag(ip, xfs_queue_cowblocks, 1728 trace_xfs_perag_set_cowblocks, 1729 XFS_ICI_COWBLOCKS_TAG); 1730 } ··· 1734 xfs_inode_t *ip) 1735 { 1736 trace_xfs_inode_clear_cowblocks_tag(ip); 1737 + return __xfs_inode_clear_blocks_tag(ip, 1738 trace_xfs_perag_clear_cowblocks, XFS_ICI_COWBLOCKS_TAG); 1739 }
+1
fs/xfs/xfs_icache.h
··· 81 int xfs_icache_free_cowblocks(struct xfs_mount *, struct xfs_eofblocks *); 82 int xfs_inode_free_quota_cowblocks(struct xfs_inode *ip); 83 void xfs_cowblocks_worker(struct work_struct *); 84 85 int xfs_inode_ag_iterator(struct xfs_mount *mp, 86 int (*execute)(struct xfs_inode *ip, int flags, void *args),
··· 81 int xfs_icache_free_cowblocks(struct xfs_mount *, struct xfs_eofblocks *); 82 int xfs_inode_free_quota_cowblocks(struct xfs_inode *ip); 83 void xfs_cowblocks_worker(struct work_struct *); 84 + void xfs_queue_cowblocks(struct xfs_mount *); 85 86 int xfs_inode_ag_iterator(struct xfs_mount *mp, 87 int (*execute)(struct xfs_inode *ip, int flags, void *args),
+19 -9
fs/xfs/xfs_inode.c
··· 1487 return error; 1488 } 1489 1490 /* 1491 * Free up the underlying blocks past new_size. The new size must be smaller 1492 * than the current size. This routine can be used both for the attribute and ··· 1601 if (error) 1602 goto out; 1603 1604 - /* 1605 - * Clear the reflink flag if there are no data fork blocks and 1606 - * there are no extents staged in the cow fork. 1607 - */ 1608 - if (xfs_is_reflink_inode(ip) && ip->i_cnextents == 0) { 1609 - if (ip->i_d.di_nblocks == 0) 1610 - ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; 1611 - xfs_inode_clear_cowblocks_tag(ip); 1612 - } 1613 1614 /* 1615 * Always re-log the inode so that our permanent transaction can keep
··· 1487 return error; 1488 } 1489 1490 + /* Clear the reflink flag and the cowblocks tag if possible. */ 1491 + static void 1492 + xfs_itruncate_clear_reflink_flags( 1493 + struct xfs_inode *ip) 1494 + { 1495 + struct xfs_ifork *dfork; 1496 + struct xfs_ifork *cfork; 1497 + 1498 + if (!xfs_is_reflink_inode(ip)) 1499 + return; 1500 + dfork = XFS_IFORK_PTR(ip, XFS_DATA_FORK); 1501 + cfork = XFS_IFORK_PTR(ip, XFS_COW_FORK); 1502 + if (dfork->if_bytes == 0 && cfork->if_bytes == 0) 1503 + ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; 1504 + if (cfork->if_bytes == 0) 1505 + xfs_inode_clear_cowblocks_tag(ip); 1506 + } 1507 + 1508 /* 1509 * Free up the underlying blocks past new_size. The new size must be smaller 1510 * than the current size. This routine can be used both for the attribute and ··· 1583 if (error) 1584 goto out; 1585 1586 + xfs_itruncate_clear_reflink_flags(ip); 1587 1588 /* 1589 * Always re-log the inode so that our permanent transaction can keep
+1
fs/xfs/xfs_inode.h
··· 232 * log recovery to replay a bmap operation on the inode. 233 */ 234 #define XFS_IRECOVERY (1 << 11) 235 236 /* 237 * Per-lifetime flags need to be reset when re-using a reclaimable inode during
··· 232 * log recovery to replay a bmap operation on the inode. 233 */ 234 #define XFS_IRECOVERY (1 << 11) 235 + #define XFS_ICOWBLOCKS (1 << 12)/* has the cowblocks tag set */ 236 237 /* 238 * Per-lifetime flags need to be reset when re-using a reclaimable inode during
+19 -2
fs/xfs/xfs_reflink.c
··· 454 if (error) 455 goto out_bmap_cancel; 456 457 /* Finish up. */ 458 error = xfs_defer_finish(&tp, &dfops); 459 if (error) ··· 492 struct xfs_iext_cursor icur; 493 494 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)); 495 - ASSERT(xfs_is_reflink_inode(ip)); 496 497 offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); 498 if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got)) 499 return false; ··· 613 614 /* Remove the mapping from the CoW fork. */ 615 xfs_bmap_del_extent_cow(ip, &icur, &got, &del); 616 } 617 next_extent: 618 if (!xfs_iext_get_extent(ifp, &icur, &got)) ··· 731 (unsigned int)(end_fsb - offset_fsb), 732 XFS_DATA_FORK); 733 error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write, 734 - resblks, 0, 0, &tp); 735 if (error) 736 goto out; 737 ··· 1296 goto out_unlock; 1297 1298 trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); 1299 1300 /* Set flags and remap blocks. */ 1301 ret = xfs_reflink_set_inode_flag(src, dest);
··· 454 if (error) 455 goto out_bmap_cancel; 456 457 + xfs_inode_set_cowblocks_tag(ip); 458 + 459 /* Finish up. */ 460 error = xfs_defer_finish(&tp, &dfops); 461 if (error) ··· 490 struct xfs_iext_cursor icur; 491 492 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)); 493 494 + if (!xfs_is_reflink_inode(ip)) 495 + return false; 496 offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); 497 if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got)) 498 return false; ··· 610 611 /* Remove the mapping from the CoW fork. */ 612 xfs_bmap_del_extent_cow(ip, &icur, &got, &del); 613 + } else { 614 + /* Didn't do anything, push cursor back. */ 615 + xfs_iext_prev(ifp, &icur); 616 } 617 next_extent: 618 if (!xfs_iext_get_extent(ifp, &icur, &got)) ··· 725 (unsigned int)(end_fsb - offset_fsb), 726 XFS_DATA_FORK); 727 error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write, 728 + resblks, 0, XFS_TRANS_RESERVE, &tp); 729 if (error) 730 goto out; 731 ··· 1290 goto out_unlock; 1291 1292 trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); 1293 + 1294 + /* 1295 + * Clear out post-eof preallocations because we don't have page cache 1296 + * backing the delayed allocations and they'll never get freed on 1297 + * their own. 1298 + */ 1299 + if (xfs_can_free_eofblocks(dest, true)) { 1300 + ret = xfs_free_eofblocks(dest); 1301 + if (ret) 1302 + goto out_unlock; 1303 + } 1304 1305 /* Set flags and remap blocks. */ 1306 ret = xfs_reflink_set_inode_flag(src, dest);
+9
fs/xfs/xfs_super.c
··· 1360 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1361 return error; 1362 } 1363 1364 /* Create the per-AG metadata reservation pool .*/ 1365 error = xfs_fs_reserve_ag_blocks(mp); ··· 1370 1371 /* rw -> ro */ 1372 if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & SB_RDONLY)) { 1373 /* Free the per-AG metadata reservation pool. */ 1374 error = xfs_fs_unreserve_ag_blocks(mp); 1375 if (error) {
··· 1360 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1361 return error; 1362 } 1363 + xfs_queue_cowblocks(mp); 1364 1365 /* Create the per-AG metadata reservation pool .*/ 1366 error = xfs_fs_reserve_ag_blocks(mp); ··· 1369 1370 /* rw -> ro */ 1371 if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & SB_RDONLY)) { 1372 + /* Get rid of any leftover CoW reservations... */ 1373 + cancel_delayed_work_sync(&mp->m_cowblocks_work); 1374 + error = xfs_icache_free_cowblocks(mp, NULL); 1375 + if (error) { 1376 + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1377 + return error; 1378 + } 1379 + 1380 /* Free the per-AG metadata reservation pool. */ 1381 error = xfs_fs_unreserve_ag_blocks(mp); 1382 if (error) {