Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

xfs: relog dirty buffers during swapext bmbt owner change

The owner change bmbt scan that occurs during extent swap operations
does not handle ordered buffer failures. Buffers that cannot be
marked ordered must be physically logged so previously dirty ranges
of the buffer can be relogged in the transaction.

Since the bmbt scan may need to process and potentially log a large
number of blocks, we can't expect to complete this operation in a
single transaction. Update extent swap to use a permanent
transaction with enough log reservation to physically log a buffer.
Update the bmbt scan to physically log any buffers that cannot be
ordered and to terminate the scan with -EAGAIN. On -EAGAIN, the
caller rolls the transaction and restarts the scan. Finally, update
the bmbt scan helper function to skip bmbt blocks that already match
the expected owner so they are not reprocessed after scan restarts.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
[darrick: fix the xfs_trans_roll call]
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>

authored by

Brian Foster and committed by
Darrick J. Wong
2dd3d709 a5814bce

+65 -18
+18 -8
fs/xfs/libxfs/xfs_btree.c
··· 4452 4452 4453 4453 /* modify the owner */ 4454 4454 block = xfs_btree_get_block(cur, level, &bp); 4455 - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 4455 + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 4456 + if (block->bb_u.l.bb_owner == cpu_to_be64(bbcoi->new_owner)) 4457 + return 0; 4456 4458 block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner); 4457 - else 4459 + } else { 4460 + if (block->bb_u.s.bb_owner == cpu_to_be32(bbcoi->new_owner)) 4461 + return 0; 4458 4462 block->bb_u.s.bb_owner = cpu_to_be32(bbcoi->new_owner); 4463 + } 4459 4464 4460 4465 /* 4461 4466 * If the block is a root block hosted in an inode, we might not have a ··· 4469 4464 * block is formatted into the on-disk inode fork. We still change it, 4470 4465 * though, so everything is consistent in memory. 4471 4466 */ 4472 - if (bp) { 4473 - if (cur->bc_tp) 4474 - xfs_trans_ordered_buf(cur->bc_tp, bp); 4475 - else 4476 - xfs_buf_delwri_queue(bp, bbcoi->buffer_list); 4477 - } else { 4467 + if (!bp) { 4478 4468 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); 4479 4469 ASSERT(level == cur->bc_nlevels - 1); 4470 + return 0; 4471 + } 4472 + 4473 + if (cur->bc_tp) { 4474 + if (!xfs_trans_ordered_buf(cur->bc_tp, bp)) { 4475 + xfs_btree_log_block(cur, bp, XFS_BB_OWNER); 4476 + return -EAGAIN; 4477 + } 4478 + } else { 4479 + xfs_buf_delwri_queue(bp, bbcoi->buffer_list); 4480 4480 } 4481 4481 4482 4482 return 0;
+47 -10
fs/xfs/xfs_bmap_util.c
··· 1931 1931 return 0; 1932 1932 } 1933 1933 1934 + /* 1935 + * Fix up the owners of the bmbt blocks to refer to the current inode. The 1936 + * change owner scan attempts to order all modified buffers in the current 1937 + * transaction. In the event of ordered buffer failure, the offending buffer is 1938 + * physically logged as a fallback and the scan returns -EAGAIN. We must roll 1939 + * the transaction in this case to replenish the fallback log reservation and 1940 + * restart the scan. This process repeats until the scan completes. 1941 + */ 1942 + static int 1943 + xfs_swap_change_owner( 1944 + struct xfs_trans **tpp, 1945 + struct xfs_inode *ip, 1946 + struct xfs_inode *tmpip) 1947 + { 1948 + int error; 1949 + struct xfs_trans *tp = *tpp; 1950 + 1951 + do { 1952 + error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK, ip->i_ino, 1953 + NULL); 1954 + /* success or fatal error */ 1955 + if (error != -EAGAIN) 1956 + break; 1957 + 1958 + error = xfs_trans_roll(tpp); 1959 + if (error) 1960 + break; 1961 + tp = *tpp; 1962 + 1963 + /* 1964 + * Redirty both inodes so they can relog and keep the log tail 1965 + * moving forward. 1966 + */ 1967 + xfs_trans_ijoin(tp, ip, 0); 1968 + xfs_trans_ijoin(tp, tmpip, 0); 1969 + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1970 + xfs_trans_log_inode(tp, tmpip, XFS_ILOG_CORE); 1971 + } while (true); 1972 + 1973 + return error; 1974 + } 1975 + 1934 1976 int 1935 1977 xfs_swap_extents( 1936 1978 struct xfs_inode *ip, /* target inode */ ··· 1987 1945 int lock_flags; 1988 1946 struct xfs_ifork *cowfp; 1989 1947 uint64_t f; 1990 - int resblks; 1948 + int resblks = 0; 1991 1949 1992 1950 /* 1993 1951 * Lock the inodes against other IO, page faults and truncate to ··· 2035 1993 XFS_SWAP_RMAP_SPACE_RES(mp, 2036 1994 XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK), 2037 1995 XFS_DATA_FORK); 2038 - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 2039 - 0, 0, &tp); 2040 - } else 2041 - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 2042 - 0, 0, &tp); 1996 + } 1997 + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp); 2043 1998 if (error) 2044 1999 goto out_unlock; 2045 2000 ··· 2128 2089 * inode number of the current inode. 2129 2090 */ 2130 2091 if (src_log_flags & XFS_ILOG_DOWNER) { 2131 - error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK, 2132 - ip->i_ino, NULL); 2092 + error = xfs_swap_change_owner(&tp, ip, tip); 2133 2093 if (error) 2134 2094 goto out_trans_cancel; 2135 2095 } 2136 2096 if (target_log_flags & XFS_ILOG_DOWNER) { 2137 - error = xfs_bmbt_change_owner(tp, tip, XFS_DATA_FORK, 2138 - tip->i_ino, NULL); 2097 + error = xfs_swap_change_owner(&tp, tip, ip); 2139 2098 if (error) 2140 2099 goto out_trans_cancel; 2141 2100 }