Merge tag 'xfs-5.13-fixes-3' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs fixes from Darrick Wong:
"This week's pile mitigates some decades-old problems in how extent
size hints interact with realtime volumes, fixes some failures in
online shrink, and fixes a problem where directory and symlink
shrinking on extremely fragmented filesystems could fail.

The most user-notable change here is to point users at our (new) IRC
channel on OFTC. Freedom isn't free, it costs folks like you and me;
and if you don't kowtow, they'll expel everyone and take over your
channel. (Ok, ok, that didn't fit the song lyrics...)

Summary:

- Fix a bug where unmapping operations end earlier than expected,
which can cause chaos on multi-block directory and symlink shrink
operations.

- Fix an erroneous assert that can trigger if we try to transition a
bmap structure from btree format to extents format with zero
extents. This was exposed by xfs/538"

* tag 'xfs-5.13-fixes-3' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
xfs: bunmapi has unnecessary AG lock ordering issues
xfs: btree format inode forks can have zero extents
xfs: add new IRC channel to MAINTAINERS
xfs: validate extsz hints against rt extent size when rtinherit is set
xfs: standardize extent size hint validation
xfs: check free AG space when making per-AG reservations

+1
MAINTAINERS
··· 20014 20014 F: drivers/xen/*swiotlb* 20015 20015 20016 20016 XFS FILESYSTEM 20017 + C: irc://irc.oftc.net/xfs 20017 20018 M: Darrick J. Wong <djwong@kernel.org> 20018 20019 M: linux-xfs@vger.kernel.org 20019 20020 L: linux-xfs@vger.kernel.org
+15 -3
fs/xfs/libxfs/xfs_ag_resv.c
··· 325 325 error2 = xfs_alloc_pagf_init(mp, tp, pag->pag_agno, 0); 326 326 if (error2) 327 327 return error2; 328 - ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved + 329 - xfs_perag_resv(pag, XFS_AG_RESV_RMAPBT)->ar_reserved <= 330 - pag->pagf_freeblks + pag->pagf_flcount); 328 + 329 + /* 330 + * If there isn't enough space in the AG to satisfy the 331 + * reservation, let the caller know that there wasn't enough 332 + * space. Callers are responsible for deciding what to do 333 + * next, since (in theory) we can stumble along with 334 + * insufficient reservation if data blocks are being freed to 335 + * replenish the AG's free space. 336 + */ 337 + if (!error && 338 + xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved + 339 + xfs_perag_resv(pag, XFS_AG_RESV_RMAPBT)->ar_reserved > 340 + pag->pagf_freeblks + pag->pagf_flcount) 341 + error = -ENOSPC; 331 342 } 343 + 332 344 return error; 333 345 } 334 346
-12
fs/xfs/libxfs/xfs_bmap.c
··· 605 605 606 606 ASSERT(cur); 607 607 ASSERT(whichfork != XFS_COW_FORK); 608 - ASSERT(!xfs_need_iread_extents(ifp)); 609 608 ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE); 610 609 ASSERT(be16_to_cpu(rblock->bb_level) == 1); 611 610 ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1); ··· 5349 5350 xfs_fsblock_t sum; 5350 5351 xfs_filblks_t len = *rlen; /* length to unmap in file */ 5351 5352 xfs_fileoff_t max_len; 5352 - xfs_agnumber_t prev_agno = NULLAGNUMBER, agno; 5353 5353 xfs_fileoff_t end; 5354 5354 struct xfs_iext_cursor icur; 5355 5355 bool done = false; ··· 5440 5442 del = got; 5441 5443 wasdel = isnullstartblock(del.br_startblock); 5442 5444 5443 - /* 5444 - * Make sure we don't touch multiple AGF headers out of order 5445 - * in a single transaction, as that could cause AB-BA deadlocks. 5446 - */ 5447 - if (!wasdel && !isrt) { 5448 - agno = XFS_FSB_TO_AGNO(mp, del.br_startblock); 5449 - if (prev_agno != NULLAGNUMBER && prev_agno > agno) 5450 - break; 5451 - prev_agno = agno; 5452 - } 5453 5445 if (got.br_startoff < start) { 5454 5446 del.br_startoff = start; 5455 5447 del.br_blockcount -= start - got.br_startoff;
+42 -4
fs/xfs/libxfs/xfs_inode_buf.c
··· 559 559 /* 560 560 * Validate di_extsize hint. 561 561 * 562 - * The rules are documented at xfs_ioctl_setattr_check_extsize(). 563 - * These functions must be kept in sync with each other. 562 + * 1. Extent size hint is only valid for directories and regular files. 563 + * 2. FS_XFLAG_EXTSIZE is only valid for regular files. 564 + * 3. FS_XFLAG_EXTSZINHERIT is only valid for directories. 565 + * 4. Hint cannot be larger than MAXTEXTLEN. 566 + * 5. Can be changed on directories at any time. 567 + * 6. Hint value of 0 turns off hints, clears inode flags. 568 + * 7. Extent size must be a multiple of the appropriate block size. 569 + * For realtime files, this is the rt extent size. 570 + * 8. For non-realtime files, the extent size hint must be limited 571 + * to half the AG size to avoid alignment extending the extent beyond the 572 + * limits of the AG. 564 573 */ 565 574 xfs_failaddr_t 566 575 xfs_inode_validate_extsize( ··· 588 579 hint_flag = (flags & XFS_DIFLAG_EXTSIZE); 589 580 inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT); 590 581 extsize_bytes = XFS_FSB_TO_B(mp, extsize); 582 + 583 + /* 584 + * This comment describes a historic gap in this verifier function. 585 + * 586 + * On older kernels, the extent size hint verifier doesn't check that 587 + * the extent size hint is an integer multiple of the realtime extent 588 + * size on a directory with both RTINHERIT and EXTSZINHERIT flags set. 589 + * The verifier has always enforced the alignment rule for regular 590 + * files with the REALTIME flag set. 591 + * 592 + * If a directory with a misaligned extent size hint is allowed to 593 + * propagate that hint into a new regular realtime file, the result 594 + * is that the inode cluster buffer verifier will trigger a corruption 595 + * shutdown the next time it is run. 596 + * 597 + * Unfortunately, there could be filesystems with these misconfigured 598 + * directories in the wild, so we cannot add a check to this verifier 599 + * at this time because that will result a new source of directory 600 + * corruption errors when reading an existing filesystem. Instead, we 601 + * permit the misconfiguration to pass through the verifiers so that 602 + * callers of this function can correct and mitigate externally. 603 + */ 591 604 592 605 if (rt_flag) 593 606 blocksize_bytes = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog; ··· 647 616 /* 648 617 * Validate di_cowextsize hint. 649 618 * 650 - * The rules are documented at xfs_ioctl_setattr_check_cowextsize(). 651 - * These functions must be kept in sync with each other. 619 + * 1. CoW extent size hint can only be set if reflink is enabled on the fs. 620 + * The inode does not have to have any shared blocks, but it must be a v3. 621 + * 2. FS_XFLAG_COWEXTSIZE is only valid for directories and regular files; 622 + * for a directory, the hint is propagated to new files. 623 + * 3. Can be changed on files & directories at any time. 624 + * 4. Hint value of 0 turns off hints, clears inode flags. 625 + * 5. Extent size must be a multiple of the appropriate block size. 626 + * 6. The extent size hint must be limited to half the AG size to avoid 627 + * alignment extending the extent beyond the limits of the AG. 652 628 */ 653 629 xfs_failaddr_t 654 630 xfs_inode_validate_cowextsize(
+17
fs/xfs/libxfs/xfs_trans_inode.c
··· 143 143 } 144 144 145 145 /* 146 + * Inode verifiers on older kernels don't check that the extent size 147 + * hint is an integer multiple of the rt extent size on a directory 148 + * with both rtinherit and extszinherit flags set. If we're logging a 149 + * directory that is misconfigured in this way, clear the hint. 150 + */ 151 + if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) && 152 + (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) && 153 + (ip->i_extsize % ip->i_mount->m_sb.sb_rextsize) > 0) { 154 + xfs_info_once(ip->i_mount, 155 + "Correcting misaligned extent size hint in inode 0x%llx.", ip->i_ino); 156 + ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE | 157 + XFS_DIFLAG_EXTSZINHERIT); 158 + ip->i_extsize = 0; 159 + flags |= XFS_ILOG_CORE; 160 + } 161 + 162 + /* 146 163 * Record the specific change for fdatasync optimisation. This allows 147 164 * fdatasync to skip log forces for inodes that are only timestamp 148 165 * dirty.
+29
fs/xfs/xfs_inode.c
··· 690 690 const struct xfs_inode *pip) 691 691 { 692 692 unsigned int di_flags = 0; 693 + xfs_failaddr_t failaddr; 693 694 umode_t mode = VFS_I(ip)->i_mode; 694 695 695 696 if (S_ISDIR(mode)) { ··· 730 729 di_flags |= XFS_DIFLAG_FILESTREAM; 731 730 732 731 ip->i_diflags |= di_flags; 732 + 733 + /* 734 + * Inode verifiers on older kernels only check that the extent size 735 + * hint is an integer multiple of the rt extent size on realtime files. 736 + * They did not check the hint alignment on a directory with both 737 + * rtinherit and extszinherit flags set. If the misaligned hint is 738 + * propagated from a directory into a new realtime file, new file 739 + * allocations will fail due to math errors in the rt allocator and/or 740 + * trip the verifiers. Validate the hint settings in the new file so 741 + * that we don't let broken hints propagate. 742 + */ 743 + failaddr = xfs_inode_validate_extsize(ip->i_mount, ip->i_extsize, 744 + VFS_I(ip)->i_mode, ip->i_diflags); 745 + if (failaddr) { 746 + ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE | 747 + XFS_DIFLAG_EXTSZINHERIT); 748 + ip->i_extsize = 0; 749 + } 733 750 } 734 751 735 752 /* Propagate di_flags2 from a parent inode to a child inode. */ ··· 756 737 struct xfs_inode *ip, 757 738 const struct xfs_inode *pip) 758 739 { 740 + xfs_failaddr_t failaddr; 741 + 759 742 if (pip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) { 760 743 ip->i_diflags2 |= XFS_DIFLAG2_COWEXTSIZE; 761 744 ip->i_cowextsize = pip->i_cowextsize; 762 745 } 763 746 if (pip->i_diflags2 & XFS_DIFLAG2_DAX) 764 747 ip->i_diflags2 |= XFS_DIFLAG2_DAX; 748 + 749 + /* Don't let invalid cowextsize hints propagate. */ 750 + failaddr = xfs_inode_validate_cowextsize(ip->i_mount, ip->i_cowextsize, 751 + VFS_I(ip)->i_mode, ip->i_diflags, ip->i_diflags2); 752 + if (failaddr) { 753 + ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE; 754 + ip->i_cowextsize = 0; 755 + } 765 756 } 766 757 767 758 /*
+34 -67
fs/xfs/xfs_ioctl.c
··· 1267 1267 } 1268 1268 1269 1269 /* 1270 - * extent size hint validation is somewhat cumbersome. Rules are: 1271 - * 1272 - * 1. extent size hint is only valid for directories and regular files 1273 - * 2. FS_XFLAG_EXTSIZE is only valid for regular files 1274 - * 3. FS_XFLAG_EXTSZINHERIT is only valid for directories. 1275 - * 4. can only be changed on regular files if no extents are allocated 1276 - * 5. can be changed on directories at any time 1277 - * 6. extsize hint of 0 turns off hints, clears inode flags. 1278 - * 7. Extent size must be a multiple of the appropriate block size. 1279 - * 8. for non-realtime files, the extent size hint must be limited 1280 - * to half the AG size to avoid alignment extending the extent beyond the 1281 - * limits of the AG. 1282 - * 1283 - * Please keep this function in sync with xfs_scrub_inode_extsize. 1270 + * Validate a proposed extent size hint. For regular files, the hint can only 1271 + * be changed if no extents are allocated. 1284 1272 */ 1285 1273 static int 1286 1274 xfs_ioctl_setattr_check_extsize( ··· 1276 1288 struct fileattr *fa) 1277 1289 { 1278 1290 struct xfs_mount *mp = ip->i_mount; 1279 - xfs_extlen_t size; 1280 - xfs_fsblock_t extsize_fsb; 1291 + xfs_failaddr_t failaddr; 1292 + uint16_t new_diflags; 1281 1293 1282 1294 if (!fa->fsx_valid) 1283 1295 return 0; 1284 1296 1285 1297 if (S_ISREG(VFS_I(ip)->i_mode) && ip->i_df.if_nextents && 1286 - ((ip->i_extsize << mp->m_sb.sb_blocklog) != fa->fsx_extsize)) 1298 + XFS_FSB_TO_B(mp, ip->i_extsize) != fa->fsx_extsize) 1287 1299 return -EINVAL; 1288 1300 1289 - if (fa->fsx_extsize == 0) 1290 - return 0; 1291 - 1292 - extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize); 1293 - if (extsize_fsb > MAXEXTLEN) 1301 + if (fa->fsx_extsize & mp->m_blockmask) 1294 1302 return -EINVAL; 1295 1303 1296 - if (XFS_IS_REALTIME_INODE(ip) || 1297 - (fa->fsx_xflags & FS_XFLAG_REALTIME)) { 1298 - size = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog; 1299 - } else { 1300 - size = mp->m_sb.sb_blocksize; 1301 - if (extsize_fsb > mp->m_sb.sb_agblocks / 2) 1304 + new_diflags = xfs_flags2diflags(ip, fa->fsx_xflags); 1305 + 1306 + /* 1307 + * Inode verifiers on older kernels don't check that the extent size 1308 + * hint is an integer multiple of the rt extent size on a directory 1309 + * with both rtinherit and extszinherit flags set. Don't let sysadmins 1310 + * misconfigure directories. 1311 + */ 1312 + if ((new_diflags & XFS_DIFLAG_RTINHERIT) && 1313 + (new_diflags & XFS_DIFLAG_EXTSZINHERIT)) { 1314 + unsigned int rtextsize_bytes; 1315 + 1316 + rtextsize_bytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize); 1317 + if (fa->fsx_extsize % rtextsize_bytes) 1302 1318 return -EINVAL; 1303 1319 } 1304 1320 1305 - if (fa->fsx_extsize % size) 1306 - return -EINVAL; 1307 - 1308 - return 0; 1321 + failaddr = xfs_inode_validate_extsize(ip->i_mount, 1322 + XFS_B_TO_FSB(mp, fa->fsx_extsize), 1323 + VFS_I(ip)->i_mode, new_diflags); 1324 + return failaddr != NULL ? -EINVAL : 0; 1309 1325 } 1310 1326 1311 - /* 1312 - * CoW extent size hint validation rules are: 1313 - * 1314 - * 1. CoW extent size hint can only be set if reflink is enabled on the fs. 1315 - * The inode does not have to have any shared blocks, but it must be a v3. 1316 - * 2. FS_XFLAG_COWEXTSIZE is only valid for directories and regular files; 1317 - * for a directory, the hint is propagated to new files. 1318 - * 3. Can be changed on files & directories at any time. 1319 - * 4. CoW extsize hint of 0 turns off hints, clears inode flags. 1320 - * 5. Extent size must be a multiple of the appropriate block size. 1321 - * 6. The extent size hint must be limited to half the AG size to avoid 1322 - * alignment extending the extent beyond the limits of the AG. 1323 - * 1324 - * Please keep this function in sync with xfs_scrub_inode_cowextsize. 1325 - */ 1326 1327 static int 1327 1328 xfs_ioctl_setattr_check_cowextsize( 1328 1329 struct xfs_inode *ip, 1329 1330 struct fileattr *fa) 1330 1331 { 1331 1332 struct xfs_mount *mp = ip->i_mount; 1332 - xfs_extlen_t size; 1333 - xfs_fsblock_t cowextsize_fsb; 1333 + xfs_failaddr_t failaddr; 1334 + uint64_t new_diflags2; 1335 + uint16_t new_diflags; 1334 1336 1335 1337 if (!fa->fsx_valid) 1336 1338 return 0; 1337 1339 1338 - if (!(fa->fsx_xflags & FS_XFLAG_COWEXTSIZE)) 1339 - return 0; 1340 - 1341 - if (!xfs_sb_version_hasreflink(&ip->i_mount->m_sb)) 1340 + if (fa->fsx_cowextsize & mp->m_blockmask) 1342 1341 return -EINVAL; 1343 1342 1344 - if (fa->fsx_cowextsize == 0) 1345 - return 0; 1343 + new_diflags = xfs_flags2diflags(ip, fa->fsx_xflags); 1344 + new_diflags2 = xfs_flags2diflags2(ip, fa->fsx_xflags); 1346 1345 1347 - cowextsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_cowextsize); 1348 - if (cowextsize_fsb > MAXEXTLEN) 1349 - return -EINVAL; 1350 - 1351 - size = mp->m_sb.sb_blocksize; 1352 - if (cowextsize_fsb > mp->m_sb.sb_agblocks / 2) 1353 - return -EINVAL; 1354 - 1355 - if (fa->fsx_cowextsize % size) 1356 - return -EINVAL; 1357 - 1358 - return 0; 1346 + failaddr = xfs_inode_validate_cowextsize(ip->i_mount, 1347 + XFS_B_TO_FSB(mp, fa->fsx_cowextsize), 1348 + VFS_I(ip)->i_mode, new_diflags, new_diflags2); 1349 + return failaddr != NULL ? -EINVAL : 0; 1359 1350 } 1360 1351 1361 1352 static int
+2
fs/xfs/xfs_message.h
··· 73 73 xfs_printk_once(xfs_warn, dev, fmt, ##__VA_ARGS__) 74 74 #define xfs_notice_once(dev, fmt, ...) \ 75 75 xfs_printk_once(xfs_notice, dev, fmt, ##__VA_ARGS__) 76 + #define xfs_info_once(dev, fmt, ...) \ 77 + xfs_printk_once(xfs_info, dev, fmt, ##__VA_ARGS__) 76 78 77 79 void assfail(struct xfs_mount *mp, char *expr, char *f, int l); 78 80 void asswarn(struct xfs_mount *mp, char *expr, char *f, int l);