Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'xfs-4.16-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs updates from Darrick Wong:
"This merge cycle, we're again some substantive changes to XFS.

Metadata verifiers have been restructured to provide more detail about
which part of a metadata structure failed checks, and we've enhanced
the new online fsck feature to cross-reference extent allocation
information with the other metadata structures. With this pull, the
metadata verification part of online fsck is more or less finished,
though the feature is still experimental and still disabled by
default.

We're also preparing to remove the EXPERIMENTAL tag from a couple of
features this cycle. This week we're committing a bunch of space
accounting fixes for reflink and removing the EXPERIMENTAL tag from
reflink; I anticipate that we'll be ready to do the same for the
reverse mapping feature next week. (I don't have any pending fixes for
rmap; however I wish to remove the tags one at a time.)

This giant pile of patches has been run through a full xfstests run
over the weekend and through a quick xfstests run against this
morning's master, with no major failures reported. Let me know if
there's any merge problems -- git merge reported that one of our
patches touched the same function as the i_version series, but it
resolved things cleanly.

Summary:

- Log faulting code locations when verifiers fail, for improved
diagnosis of corrupt filesystems.

- Implement metadata verifiers for local format inode fork data.

- Online scrub now cross-references metadata records with other
metadata.

- Refactor the fs geometry ioctl generation functions.

- Harden various metadata verifiers.

- Fix various accounting problems.

- Fix uncancelled transactions leaking when xattr functions fail.

- Prevent the copy-on-write speculative preallocation garbage
collector from racing with writeback.

- Emit log reservation type information as trace data so that we can
compare against xfsprogs.

- Fix some erroneous asserts in the online scrub code.

- Clean up the transaction reservation calculations.

- Fix various minor bugs in online scrub.

- Log complaints about mixed dio/buffered writes once per day and
less noisily than before.

- Refactor buffer log item lists to use list_head.

- Break PNFS leases before reflinking blocks.

- Reduce lock contention on reflink source files.

- Fix some quota accounting problems with reflink.

- Fix a serious corruption problem in the direct cow write code where
we fed bad iomaps to the vfs iomap consumers.

- Various other refactorings.

- Remove EXPERIMENTAL tag from reflink!"

* tag 'xfs-4.16-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (94 commits)
xfs: remove experimental tag for reflinks
xfs: don't screw up direct writes when freesp is fragmented
xfs: check reflink allocation mappings
iomap: warn on zero-length mappings
xfs: treat CoW fork operations as delalloc for quota accounting
xfs: only grab shared inode locks for source file during reflink
xfs: allow xfs_lock_two_inodes to take different EXCL/SHARED modes
xfs: reflink should break pnfs leases before sharing blocks
xfs: don't clobber inobt/finobt cursors when xref with rmap
xfs: skip CoW writes past EOF when writeback races with truncate
xfs: preserve i_rdev when recycling a reclaimable inode
xfs: refactor accounting updates out of xfs_bmap_btalloc
xfs: refactor inode verifier corruption error printing
xfs: make tracepoint inode number format consistent
xfs: always zero di_flags2 when we free the inode
xfs: call xfs_qm_dqattach before performing reflink operations
xfs: bmap code cleanup
Use list_head infra-structure for buffer's log items list
Split buffer's b_fspriv field
Get rid of xfs_buf_log_item_t typedef
...

+4661 -1613
+23 -1
fs/direct-io.c
··· 219 219 return dio->pages[sdio->head]; 220 220 } 221 221 222 + /* 223 + * Warn about a page cache invalidation failure during a direct io write. 224 + */ 225 + void dio_warn_stale_pagecache(struct file *filp) 226 + { 227 + static DEFINE_RATELIMIT_STATE(_rs, 86400 * HZ, DEFAULT_RATELIMIT_BURST); 228 + char pathname[128]; 229 + struct inode *inode = file_inode(filp); 230 + char *path; 231 + 232 + errseq_set(&inode->i_mapping->wb_err, -EIO); 233 + if (__ratelimit(&_rs)) { 234 + path = file_path(filp, pathname, sizeof(pathname)); 235 + if (IS_ERR(path)) 236 + path = "(unknown)"; 237 + pr_crit("Page cache invalidation failure on direct I/O. Possible data corruption due to collision with buffered I/O!\n"); 238 + pr_crit("File: %s PID: %d Comm: %.20s\n", path, current->pid, 239 + current->comm); 240 + } 241 + } 242 + 222 243 /** 223 244 * dio_complete() - called when all DIO BIO I/O has been completed 224 245 * @offset: the byte offset in the file of the completed operation ··· 311 290 err = invalidate_inode_pages2_range(dio->inode->i_mapping, 312 291 offset >> PAGE_SHIFT, 313 292 (offset + ret - 1) >> PAGE_SHIFT); 314 - WARN_ON_ONCE(err); 293 + if (err) 294 + dio_warn_stale_pagecache(dio->iocb->ki_filp); 315 295 } 316 296 317 297 if (!(dio->flags & DIO_SKIP_DIO_COUNT))
+12 -2
fs/iomap.c
··· 65 65 return ret; 66 66 if (WARN_ON(iomap.offset > pos)) 67 67 return -EIO; 68 + if (WARN_ON(iomap.length == 0)) 69 + return -EIO; 68 70 69 71 /* 70 72 * Cut down the length to the one actually provided by the filesystem, ··· 755 753 err = invalidate_inode_pages2_range(inode->i_mapping, 756 754 offset >> PAGE_SHIFT, 757 755 (offset + dio->size - 1) >> PAGE_SHIFT); 758 - WARN_ON_ONCE(err); 756 + if (err) 757 + dio_warn_stale_pagecache(iocb->ki_filp); 759 758 } 760 759 761 760 inode_dio_end(file_inode(iocb->ki_filp)); ··· 1021 1018 if (ret) 1022 1019 goto out_free_dio; 1023 1020 1021 + /* 1022 + * Try to invalidate cache pages for the range we're direct 1023 + * writing. If this invalidation fails, tough, the write will 1024 + * still work, but racing two incompatible write paths is a 1025 + * pretty crazy thing to do, so we don't support it 100%. 1026 + */ 1024 1027 ret = invalidate_inode_pages2_range(mapping, 1025 1028 start >> PAGE_SHIFT, end >> PAGE_SHIFT); 1026 - WARN_ON_ONCE(ret); 1029 + if (ret) 1030 + dio_warn_stale_pagecache(iocb->ki_filp); 1027 1031 ret = 0; 1028 1032 1029 1033 if (iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) &&
+79 -45
fs/xfs/libxfs/xfs_alloc.c
··· 167 167 * Lookup the first record less than or equal to [bno, len] 168 168 * in the btree given by cur. 169 169 */ 170 - static int /* error */ 170 + int /* error */ 171 171 xfs_alloc_lookup_le( 172 172 struct xfs_btree_cur *cur, /* btree cursor */ 173 173 xfs_agblock_t bno, /* starting block of extent */ ··· 520 520 return 0; 521 521 } 522 522 523 - static bool 523 + static xfs_failaddr_t 524 524 xfs_agfl_verify( 525 525 struct xfs_buf *bp) 526 526 { ··· 528 528 struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp); 529 529 int i; 530 530 531 + /* 532 + * There is no verification of non-crc AGFLs because mkfs does not 533 + * initialise the AGFL to zero or NULL. Hence the only valid part of the 534 + * AGFL is what the AGF says is active. We can't get to the AGF, so we 535 + * can't verify just those entries are valid. 536 + */ 537 + if (!xfs_sb_version_hascrc(&mp->m_sb)) 538 + return NULL; 539 + 531 540 if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid)) 532 - return false; 541 + return __this_address; 533 542 if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC) 534 - return false; 543 + return __this_address; 535 544 /* 536 545 * during growfs operations, the perag is not fully initialised, 537 546 * so we can't use it for any useful checking. growfs ensures we can't ··· 548 539 * so we can detect and avoid this problem. 549 540 */ 550 541 if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno) 551 - return false; 542 + return __this_address; 552 543 553 544 for (i = 0; i < XFS_AGFL_SIZE(mp); i++) { 554 545 if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK && 555 546 be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks) 556 - return false; 547 + return __this_address; 557 548 } 558 549 559 - return xfs_log_check_lsn(mp, 560 - be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)); 550 + if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn))) 551 + return __this_address; 552 + return NULL; 561 553 } 562 554 563 555 static void ··· 566 556 struct xfs_buf *bp) 567 557 { 568 558 struct xfs_mount *mp = bp->b_target->bt_mount; 559 + xfs_failaddr_t fa; 569 560 570 561 /* 571 562 * There is no verification of non-crc AGFLs because mkfs does not ··· 578 567 return; 579 568 580 569 if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF)) 581 - xfs_buf_ioerror(bp, -EFSBADCRC); 582 - else if (!xfs_agfl_verify(bp)) 583 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 584 - 585 - if (bp->b_error) 586 - xfs_verifier_error(bp); 570 + xfs_verifier_error(bp, -EFSBADCRC, __this_address); 571 + else { 572 + fa = xfs_agfl_verify(bp); 573 + if (fa) 574 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 575 + } 587 576 } 588 577 589 578 static void 590 579 xfs_agfl_write_verify( 591 580 struct xfs_buf *bp) 592 581 { 593 - struct xfs_mount *mp = bp->b_target->bt_mount; 594 - struct xfs_buf_log_item *bip = bp->b_fspriv; 582 + struct xfs_mount *mp = bp->b_target->bt_mount; 583 + struct xfs_buf_log_item *bip = bp->b_log_item; 584 + xfs_failaddr_t fa; 595 585 596 586 /* no verification of non-crc AGFLs */ 597 587 if (!xfs_sb_version_hascrc(&mp->m_sb)) 598 588 return; 599 589 600 - if (!xfs_agfl_verify(bp)) { 601 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 602 - xfs_verifier_error(bp); 590 + fa = xfs_agfl_verify(bp); 591 + if (fa) { 592 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 603 593 return; 604 594 } 605 595 ··· 614 602 .name = "xfs_agfl", 615 603 .verify_read = xfs_agfl_read_verify, 616 604 .verify_write = xfs_agfl_write_verify, 605 + .verify_struct = xfs_agfl_verify, 617 606 }; 618 607 619 608 /* ··· 2410 2397 return 0; 2411 2398 } 2412 2399 2413 - static bool 2400 + static xfs_failaddr_t 2414 2401 xfs_agf_verify( 2415 - struct xfs_mount *mp, 2416 - struct xfs_buf *bp) 2417 - { 2418 - struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); 2402 + struct xfs_buf *bp) 2403 + { 2404 + struct xfs_mount *mp = bp->b_target->bt_mount; 2405 + struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); 2419 2406 2420 2407 if (xfs_sb_version_hascrc(&mp->m_sb)) { 2421 2408 if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid)) 2422 - return false; 2409 + return __this_address; 2423 2410 if (!xfs_log_check_lsn(mp, 2424 2411 be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn))) 2425 - return false; 2412 + return __this_address; 2426 2413 } 2427 2414 2428 2415 if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) && ··· 2431 2418 be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) && 2432 2419 be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) && 2433 2420 be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp))) 2434 - return false; 2421 + return __this_address; 2435 2422 2436 2423 if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 || 2437 2424 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 || 2438 2425 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS || 2439 2426 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS) 2440 - return false; 2427 + return __this_address; 2441 2428 2442 2429 if (xfs_sb_version_hasrmapbt(&mp->m_sb) && 2443 2430 (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 || 2444 2431 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS)) 2445 - return false; 2432 + return __this_address; 2446 2433 2447 2434 /* 2448 2435 * during growfs operations, the perag is not fully initialised, ··· 2451 2438 * so we can detect and avoid this problem. 2452 2439 */ 2453 2440 if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno) 2454 - return false; 2441 + return __this_address; 2455 2442 2456 2443 if (xfs_sb_version_haslazysbcount(&mp->m_sb) && 2457 2444 be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length)) 2458 - return false; 2445 + return __this_address; 2459 2446 2460 2447 if (xfs_sb_version_hasreflink(&mp->m_sb) && 2461 2448 (be32_to_cpu(agf->agf_refcount_level) < 1 || 2462 2449 be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS)) 2463 - return false; 2450 + return __this_address; 2464 2451 2465 - return true;; 2452 + return NULL; 2466 2453 2467 2454 } 2468 2455 ··· 2471 2458 struct xfs_buf *bp) 2472 2459 { 2473 2460 struct xfs_mount *mp = bp->b_target->bt_mount; 2461 + xfs_failaddr_t fa; 2474 2462 2475 2463 if (xfs_sb_version_hascrc(&mp->m_sb) && 2476 2464 !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF)) 2477 - xfs_buf_ioerror(bp, -EFSBADCRC); 2478 - else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp, 2479 - XFS_ERRTAG_ALLOC_READ_AGF)) 2480 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 2481 - 2482 - if (bp->b_error) 2483 - xfs_verifier_error(bp); 2465 + xfs_verifier_error(bp, -EFSBADCRC, __this_address); 2466 + else { 2467 + fa = xfs_agf_verify(bp); 2468 + if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF)) 2469 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 2470 + } 2484 2471 } 2485 2472 2486 2473 static void 2487 2474 xfs_agf_write_verify( 2488 2475 struct xfs_buf *bp) 2489 2476 { 2490 - struct xfs_mount *mp = bp->b_target->bt_mount; 2491 - struct xfs_buf_log_item *bip = bp->b_fspriv; 2477 + struct xfs_mount *mp = bp->b_target->bt_mount; 2478 + struct xfs_buf_log_item *bip = bp->b_log_item; 2479 + xfs_failaddr_t fa; 2492 2480 2493 - if (!xfs_agf_verify(mp, bp)) { 2494 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 2495 - xfs_verifier_error(bp); 2481 + fa = xfs_agf_verify(bp); 2482 + if (fa) { 2483 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 2496 2484 return; 2497 2485 } 2498 2486 ··· 2510 2496 .name = "xfs_agf", 2511 2497 .verify_read = xfs_agf_read_verify, 2512 2498 .verify_write = xfs_agf_write_verify, 2499 + .verify_struct = xfs_agf_verify, 2513 2500 }; 2514 2501 2515 2502 /* ··· 2995 2980 if (agno >= mp->m_sb.sb_agcount) 2996 2981 return false; 2997 2982 return xfs_verify_agbno(mp, agno, XFS_FSB_TO_AGBNO(mp, fsbno)); 2983 + } 2984 + 2985 + /* Is there a record covering a given extent? */ 2986 + int 2987 + xfs_alloc_has_record( 2988 + struct xfs_btree_cur *cur, 2989 + xfs_agblock_t bno, 2990 + xfs_extlen_t len, 2991 + bool *exists) 2992 + { 2993 + union xfs_btree_irec low; 2994 + union xfs_btree_irec high; 2995 + 2996 + memset(&low, 0, sizeof(low)); 2997 + low.a.ar_startblock = bno; 2998 + memset(&high, 0xFF, sizeof(high)); 2999 + high.a.ar_startblock = bno + len - 1; 3000 + 3001 + return xfs_btree_has_record(cur, &low, &high, exists); 2998 3002 }
+10
fs/xfs/libxfs/xfs_alloc.h
··· 198 198 enum xfs_ag_resv_type type); /* block reservation type */ 199 199 200 200 int /* error */ 201 + xfs_alloc_lookup_le( 202 + struct xfs_btree_cur *cur, /* btree cursor */ 203 + xfs_agblock_t bno, /* starting block of extent */ 204 + xfs_extlen_t len, /* length of extent */ 205 + int *stat); /* success/failure */ 206 + 207 + int /* error */ 201 208 xfs_alloc_lookup_ge( 202 209 struct xfs_btree_cur *cur, /* btree cursor */ 203 210 xfs_agblock_t bno, /* starting block of extent */ ··· 243 236 bool xfs_verify_agbno(struct xfs_mount *mp, xfs_agnumber_t agno, 244 237 xfs_agblock_t agbno); 245 238 bool xfs_verify_fsbno(struct xfs_mount *mp, xfs_fsblock_t fsbno); 239 + 240 + int xfs_alloc_has_record(struct xfs_btree_cur *cur, xfs_agblock_t bno, 241 + xfs_extlen_t len, bool *exist); 246 242 247 243 #endif /* __XFS_ALLOC_H__ */
+29 -20
fs/xfs/libxfs/xfs_alloc_btree.c
··· 307 307 be32_to_cpu(k2->alloc.ar_startblock); 308 308 } 309 309 310 - static bool 310 + static xfs_failaddr_t 311 311 xfs_allocbt_verify( 312 312 struct xfs_buf *bp) 313 313 { 314 314 struct xfs_mount *mp = bp->b_target->bt_mount; 315 315 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 316 316 struct xfs_perag *pag = bp->b_pag; 317 + xfs_failaddr_t fa; 317 318 unsigned int level; 318 319 319 320 /* ··· 332 331 level = be16_to_cpu(block->bb_level); 333 332 switch (block->bb_magic) { 334 333 case cpu_to_be32(XFS_ABTB_CRC_MAGIC): 335 - if (!xfs_btree_sblock_v5hdr_verify(bp)) 336 - return false; 334 + fa = xfs_btree_sblock_v5hdr_verify(bp); 335 + if (fa) 336 + return fa; 337 337 /* fall through */ 338 338 case cpu_to_be32(XFS_ABTB_MAGIC): 339 339 if (pag && pag->pagf_init) { 340 340 if (level >= pag->pagf_levels[XFS_BTNUM_BNOi]) 341 - return false; 341 + return __this_address; 342 342 } else if (level >= mp->m_ag_maxlevels) 343 - return false; 343 + return __this_address; 344 344 break; 345 345 case cpu_to_be32(XFS_ABTC_CRC_MAGIC): 346 - if (!xfs_btree_sblock_v5hdr_verify(bp)) 347 - return false; 346 + fa = xfs_btree_sblock_v5hdr_verify(bp); 347 + if (fa) 348 + return fa; 348 349 /* fall through */ 349 350 case cpu_to_be32(XFS_ABTC_MAGIC): 350 351 if (pag && pag->pagf_init) { 351 352 if (level >= pag->pagf_levels[XFS_BTNUM_CNTi]) 352 - return false; 353 + return __this_address; 353 354 } else if (level >= mp->m_ag_maxlevels) 354 - return false; 355 + return __this_address; 355 356 break; 356 357 default: 357 - return false; 358 + return __this_address; 358 359 } 359 360 360 361 return xfs_btree_sblock_verify(bp, mp->m_alloc_mxr[level != 0]); ··· 366 363 xfs_allocbt_read_verify( 367 364 struct xfs_buf *bp) 368 365 { 369 - if (!xfs_btree_sblock_verify_crc(bp)) 370 - xfs_buf_ioerror(bp, -EFSBADCRC); 371 - else if (!xfs_allocbt_verify(bp)) 372 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 366 + xfs_failaddr_t fa; 373 367 374 - if (bp->b_error) { 375 - trace_xfs_btree_corrupt(bp, _RET_IP_); 376 - xfs_verifier_error(bp); 368 + if (!xfs_btree_sblock_verify_crc(bp)) 369 + xfs_verifier_error(bp, -EFSBADCRC, __this_address); 370 + else { 371 + fa = xfs_allocbt_verify(bp); 372 + if (fa) 373 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 377 374 } 375 + 376 + if (bp->b_error) 377 + trace_xfs_btree_corrupt(bp, _RET_IP_); 378 378 } 379 379 380 380 static void 381 381 xfs_allocbt_write_verify( 382 382 struct xfs_buf *bp) 383 383 { 384 - if (!xfs_allocbt_verify(bp)) { 384 + xfs_failaddr_t fa; 385 + 386 + fa = xfs_allocbt_verify(bp); 387 + if (fa) { 385 388 trace_xfs_btree_corrupt(bp, _RET_IP_); 386 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 387 - xfs_verifier_error(bp); 389 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 388 390 return; 389 391 } 390 392 xfs_btree_sblock_calc_crc(bp); ··· 400 392 .name = "xfs_allocbt", 401 393 .verify_read = xfs_allocbt_read_verify, 402 394 .verify_write = xfs_allocbt_write_verify, 395 + .verify_struct = xfs_allocbt_verify, 403 396 }; 404 397 405 398
-4
fs/xfs/libxfs/xfs_attr.c
··· 717 717 return error; 718 718 out_defer_cancel: 719 719 xfs_defer_cancel(args->dfops); 720 - args->trans = NULL; 721 720 return error; 722 721 } 723 722 ··· 769 770 return 0; 770 771 out_defer_cancel: 771 772 xfs_defer_cancel(args->dfops); 772 - args->trans = NULL; 773 773 return error; 774 774 } 775 775 ··· 1043 1045 return retval; 1044 1046 out_defer_cancel: 1045 1047 xfs_defer_cancel(args->dfops); 1046 - args->trans = NULL; 1047 1048 goto out; 1048 1049 } 1049 1050 ··· 1183 1186 return error; 1184 1187 out_defer_cancel: 1185 1188 xfs_defer_cancel(args->dfops); 1186 - args->trans = NULL; 1187 1189 goto out; 1188 1190 } 1189 1191
+122 -26
fs/xfs/libxfs/xfs_attr_leaf.c
··· 247 247 } 248 248 } 249 249 250 - static bool 250 + static xfs_failaddr_t 251 251 xfs_attr3_leaf_verify( 252 - struct xfs_buf *bp) 252 + struct xfs_buf *bp) 253 253 { 254 - struct xfs_mount *mp = bp->b_target->bt_mount; 255 - struct xfs_attr_leafblock *leaf = bp->b_addr; 256 - struct xfs_perag *pag = bp->b_pag; 257 - struct xfs_attr3_icleaf_hdr ichdr; 254 + struct xfs_attr3_icleaf_hdr ichdr; 255 + struct xfs_mount *mp = bp->b_target->bt_mount; 256 + struct xfs_attr_leafblock *leaf = bp->b_addr; 257 + struct xfs_perag *pag = bp->b_pag; 258 + struct xfs_attr_leaf_entry *entries; 258 259 259 260 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf); 260 261 ··· 263 262 struct xfs_da3_node_hdr *hdr3 = bp->b_addr; 264 263 265 264 if (ichdr.magic != XFS_ATTR3_LEAF_MAGIC) 266 - return false; 265 + return __this_address; 267 266 268 267 if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid)) 269 - return false; 268 + return __this_address; 270 269 if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn) 271 - return false; 270 + return __this_address; 272 271 if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn))) 273 - return false; 272 + return __this_address; 274 273 } else { 275 274 if (ichdr.magic != XFS_ATTR_LEAF_MAGIC) 276 - return false; 275 + return __this_address; 277 276 } 278 277 /* 279 278 * In recovery there is a transient state where count == 0 is valid ··· 281 280 * if the attr didn't fit in shortform. 282 281 */ 283 282 if (pag && pag->pagf_init && ichdr.count == 0) 284 - return false; 283 + return __this_address; 284 + 285 + /* 286 + * firstused is the block offset of the first name info structure. 287 + * Make sure it doesn't go off the block or crash into the header. 288 + */ 289 + if (ichdr.firstused > mp->m_attr_geo->blksize) 290 + return __this_address; 291 + if (ichdr.firstused < xfs_attr3_leaf_hdr_size(leaf)) 292 + return __this_address; 293 + 294 + /* Make sure the entries array doesn't crash into the name info. */ 295 + entries = xfs_attr3_leaf_entryp(bp->b_addr); 296 + if ((char *)&entries[ichdr.count] > 297 + (char *)bp->b_addr + ichdr.firstused) 298 + return __this_address; 285 299 286 300 /* XXX: need to range check rest of attr header values */ 287 301 /* XXX: hash order check? */ 288 302 289 - return true; 303 + return NULL; 290 304 } 291 305 292 306 static void ··· 309 293 struct xfs_buf *bp) 310 294 { 311 295 struct xfs_mount *mp = bp->b_target->bt_mount; 312 - struct xfs_buf_log_item *bip = bp->b_fspriv; 296 + struct xfs_buf_log_item *bip = bp->b_log_item; 313 297 struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr; 298 + xfs_failaddr_t fa; 314 299 315 - if (!xfs_attr3_leaf_verify(bp)) { 316 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 317 - xfs_verifier_error(bp); 300 + fa = xfs_attr3_leaf_verify(bp); 301 + if (fa) { 302 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 318 303 return; 319 304 } 320 305 ··· 339 322 struct xfs_buf *bp) 340 323 { 341 324 struct xfs_mount *mp = bp->b_target->bt_mount; 325 + xfs_failaddr_t fa; 342 326 343 327 if (xfs_sb_version_hascrc(&mp->m_sb) && 344 328 !xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF)) 345 - xfs_buf_ioerror(bp, -EFSBADCRC); 346 - else if (!xfs_attr3_leaf_verify(bp)) 347 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 348 - 349 - if (bp->b_error) 350 - xfs_verifier_error(bp); 329 + xfs_verifier_error(bp, -EFSBADCRC, __this_address); 330 + else { 331 + fa = xfs_attr3_leaf_verify(bp); 332 + if (fa) 333 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 334 + } 351 335 } 352 336 353 337 const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = { 354 338 .name = "xfs_attr3_leaf", 355 339 .verify_read = xfs_attr3_leaf_read_verify, 356 340 .verify_write = xfs_attr3_leaf_write_verify, 341 + .verify_struct = xfs_attr3_leaf_verify, 357 342 }; 358 343 359 344 int ··· 887 868 (bytes == sizeof(struct xfs_attr_sf_hdr))) 888 869 return -1; 889 870 return xfs_attr_shortform_bytesfit(dp, bytes); 871 + } 872 + 873 + /* Verify the consistency of an inline attribute fork. */ 874 + xfs_failaddr_t 875 + xfs_attr_shortform_verify( 876 + struct xfs_inode *ip) 877 + { 878 + struct xfs_attr_shortform *sfp; 879 + struct xfs_attr_sf_entry *sfep; 880 + struct xfs_attr_sf_entry *next_sfep; 881 + char *endp; 882 + struct xfs_ifork *ifp; 883 + int i; 884 + int size; 885 + 886 + ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL); 887 + ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK); 888 + sfp = (struct xfs_attr_shortform *)ifp->if_u1.if_data; 889 + size = ifp->if_bytes; 890 + 891 + /* 892 + * Give up if the attribute is way too short. 893 + */ 894 + if (size < sizeof(struct xfs_attr_sf_hdr)) 895 + return __this_address; 896 + 897 + endp = (char *)sfp + size; 898 + 899 + /* Check all reported entries */ 900 + sfep = &sfp->list[0]; 901 + for (i = 0; i < sfp->hdr.count; i++) { 902 + /* 903 + * struct xfs_attr_sf_entry has a variable length. 904 + * Check the fixed-offset parts of the structure are 905 + * within the data buffer. 906 + */ 907 + if (((char *)sfep + sizeof(*sfep)) >= endp) 908 + return __this_address; 909 + 910 + /* Don't allow names with known bad length. */ 911 + if (sfep->namelen == 0) 912 + return __this_address; 913 + 914 + /* 915 + * Check that the variable-length part of the structure is 916 + * within the data buffer. The next entry starts after the 917 + * name component, so nextentry is an acceptable test. 918 + */ 919 + next_sfep = XFS_ATTR_SF_NEXTENTRY(sfep); 920 + if ((char *)next_sfep > endp) 921 + return __this_address; 922 + 923 + /* 924 + * Check for unknown flags. Short form doesn't support 925 + * the incomplete or local bits, so we can use the namespace 926 + * mask here. 927 + */ 928 + if (sfep->flags & ~XFS_ATTR_NSP_ONDISK_MASK) 929 + return __this_address; 930 + 931 + /* 932 + * Check for invalid namespace combinations. We only allow 933 + * one namespace flag per xattr, so we can just count the 934 + * bits (i.e. hweight) here. 935 + */ 936 + if (hweight8(sfep->flags & XFS_ATTR_NSP_ONDISK_MASK) > 1) 937 + return __this_address; 938 + 939 + sfep = next_sfep; 940 + } 941 + if ((void *)sfep != (void *)endp) 942 + return __this_address; 943 + 944 + return NULL; 890 945 } 891 946 892 947 /* ··· 2266 2173 leaf = bp->b_addr; 2267 2174 xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf); 2268 2175 entries = xfs_attr3_leaf_entryp(leaf); 2269 - ASSERT(ichdr.count < args->geo->blksize / 8); 2176 + if (ichdr.count >= args->geo->blksize / 8) 2177 + return -EFSCORRUPTED; 2270 2178 2271 2179 /* 2272 2180 * Binary search. (note: small blocks will skip this loop) ··· 2283 2189 else 2284 2190 break; 2285 2191 } 2286 - ASSERT(probe >= 0 && (!ichdr.count || probe < ichdr.count)); 2287 - ASSERT(span <= 4 || be32_to_cpu(entry->hashval) == hashval); 2192 + if (!(probe >= 0 && (!ichdr.count || probe < ichdr.count))) 2193 + return -EFSCORRUPTED; 2194 + if (!(span <= 4 || be32_to_cpu(entry->hashval) == hashval)) 2195 + return -EFSCORRUPTED; 2288 2196 2289 2197 /* 2290 2198 * Since we may have duplicate hashval's, find the first matching
+1
fs/xfs/libxfs/xfs_attr_leaf.h
··· 53 53 int xfs_attr_shortform_remove(struct xfs_da_args *args); 54 54 int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp); 55 55 int xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes); 56 + xfs_failaddr_t xfs_attr_shortform_verify(struct xfs_inode *ip); 56 57 void xfs_attr_fork_remove(struct xfs_inode *ip, struct xfs_trans *tp); 57 58 58 59 /*
+67 -37
fs/xfs/libxfs/xfs_attr_remote.c
··· 65 65 * does CRC, location and bounds checking, the unpacking function checks the 66 66 * attribute parameters and owner. 67 67 */ 68 - static bool 68 + static xfs_failaddr_t 69 69 xfs_attr3_rmt_hdr_ok( 70 70 void *ptr, 71 71 xfs_ino_t ino, ··· 76 76 struct xfs_attr3_rmt_hdr *rmt = ptr; 77 77 78 78 if (bno != be64_to_cpu(rmt->rm_blkno)) 79 - return false; 79 + return __this_address; 80 80 if (offset != be32_to_cpu(rmt->rm_offset)) 81 - return false; 81 + return __this_address; 82 82 if (size != be32_to_cpu(rmt->rm_bytes)) 83 - return false; 83 + return __this_address; 84 84 if (ino != be64_to_cpu(rmt->rm_owner)) 85 - return false; 85 + return __this_address; 86 86 87 87 /* ok */ 88 - return true; 88 + return NULL; 89 89 } 90 90 91 - static bool 91 + static xfs_failaddr_t 92 92 xfs_attr3_rmt_verify( 93 93 struct xfs_mount *mp, 94 94 void *ptr, ··· 98 98 struct xfs_attr3_rmt_hdr *rmt = ptr; 99 99 100 100 if (!xfs_sb_version_hascrc(&mp->m_sb)) 101 - return false; 101 + return __this_address; 102 102 if (rmt->rm_magic != cpu_to_be32(XFS_ATTR3_RMT_MAGIC)) 103 - return false; 103 + return __this_address; 104 104 if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid)) 105 - return false; 105 + return __this_address; 106 106 if (be64_to_cpu(rmt->rm_blkno) != bno) 107 - return false; 107 + return __this_address; 108 108 if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt)) 109 - return false; 109 + return __this_address; 110 110 if (be32_to_cpu(rmt->rm_offset) + 111 111 be32_to_cpu(rmt->rm_bytes) > XFS_XATTR_SIZE_MAX) 112 - return false; 112 + return __this_address; 113 113 if (rmt->rm_owner == 0) 114 - return false; 114 + return __this_address; 115 115 116 - return true; 116 + return NULL; 117 117 } 118 118 119 - static void 120 - xfs_attr3_rmt_read_verify( 121 - struct xfs_buf *bp) 119 + static int 120 + __xfs_attr3_rmt_read_verify( 121 + struct xfs_buf *bp, 122 + bool check_crc, 123 + xfs_failaddr_t *failaddr) 122 124 { 123 125 struct xfs_mount *mp = bp->b_target->bt_mount; 124 126 char *ptr; ··· 130 128 131 129 /* no verification of non-crc buffers */ 132 130 if (!xfs_sb_version_hascrc(&mp->m_sb)) 133 - return; 131 + return 0; 134 132 135 133 ptr = bp->b_addr; 136 134 bno = bp->b_bn; ··· 138 136 ASSERT(len >= blksize); 139 137 140 138 while (len > 0) { 141 - if (!xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) { 142 - xfs_buf_ioerror(bp, -EFSBADCRC); 143 - break; 139 + if (check_crc && 140 + !xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) { 141 + *failaddr = __this_address; 142 + return -EFSBADCRC; 144 143 } 145 - if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { 146 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 147 - break; 148 - } 144 + *failaddr = xfs_attr3_rmt_verify(mp, ptr, blksize, bno); 145 + if (*failaddr) 146 + return -EFSCORRUPTED; 149 147 len -= blksize; 150 148 ptr += blksize; 151 149 bno += BTOBB(blksize); 152 150 } 153 151 154 - if (bp->b_error) 155 - xfs_verifier_error(bp); 156 - else 157 - ASSERT(len == 0); 152 + if (len != 0) { 153 + *failaddr = __this_address; 154 + return -EFSCORRUPTED; 155 + } 156 + 157 + return 0; 158 + } 159 + 160 + static void 161 + xfs_attr3_rmt_read_verify( 162 + struct xfs_buf *bp) 163 + { 164 + xfs_failaddr_t fa; 165 + int error; 166 + 167 + error = __xfs_attr3_rmt_read_verify(bp, true, &fa); 168 + if (error) 169 + xfs_verifier_error(bp, error, fa); 170 + } 171 + 172 + static xfs_failaddr_t 173 + xfs_attr3_rmt_verify_struct( 174 + struct xfs_buf *bp) 175 + { 176 + xfs_failaddr_t fa; 177 + int error; 178 + 179 + error = __xfs_attr3_rmt_read_verify(bp, false, &fa); 180 + return error ? fa : NULL; 158 181 } 159 182 160 183 static void ··· 187 160 struct xfs_buf *bp) 188 161 { 189 162 struct xfs_mount *mp = bp->b_target->bt_mount; 163 + xfs_failaddr_t fa; 190 164 int blksize = mp->m_attr_geo->blksize; 191 165 char *ptr; 192 166 int len; ··· 205 177 while (len > 0) { 206 178 struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr; 207 179 208 - if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { 209 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 210 - xfs_verifier_error(bp); 180 + fa = xfs_attr3_rmt_verify(mp, ptr, blksize, bno); 181 + if (fa) { 182 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 211 183 return; 212 184 } 213 185 ··· 216 188 * xfs_attr3_rmt_hdr_set() for the explanation. 217 189 */ 218 190 if (rmt->rm_lsn != cpu_to_be64(NULLCOMMITLSN)) { 219 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 220 - xfs_verifier_error(bp); 191 + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); 221 192 return; 222 193 } 223 194 xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF); ··· 225 198 ptr += blksize; 226 199 bno += BTOBB(blksize); 227 200 } 228 - ASSERT(len == 0); 201 + 202 + if (len != 0) 203 + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); 229 204 } 230 205 231 206 const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = { 232 207 .name = "xfs_attr3_rmt", 233 208 .verify_read = xfs_attr3_rmt_read_verify, 234 209 .verify_write = xfs_attr3_rmt_write_verify, 210 + .verify_struct = xfs_attr3_rmt_verify_struct, 235 211 }; 236 212 237 213 STATIC int ··· 299 269 byte_cnt = min(*valuelen, byte_cnt); 300 270 301 271 if (xfs_sb_version_hascrc(&mp->m_sb)) { 302 - if (!xfs_attr3_rmt_hdr_ok(src, ino, *offset, 272 + if (xfs_attr3_rmt_hdr_ok(src, ino, *offset, 303 273 byte_cnt, bno)) { 304 274 xfs_alert(mp, 305 275 "remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)",
+82 -38
fs/xfs/libxfs/xfs_bmap.c
··· 400 400 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); 401 401 bno = be64_to_cpu(*pp); 402 402 XFS_WANT_CORRUPTED_GOTO(mp, 403 - XFS_FSB_SANITY_CHECK(mp, bno), error0); 403 + xfs_verify_fsbno(mp, bno), error0); 404 404 if (bp_release) { 405 405 bp_release = 0; 406 406 xfs_trans_brelse(NULL, bp); ··· 1220 1220 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); 1221 1221 bno = be64_to_cpu(*pp); 1222 1222 XFS_WANT_CORRUPTED_GOTO(mp, 1223 - XFS_FSB_SANITY_CHECK(mp, bno), out_brelse); 1223 + xfs_verify_fsbno(mp, bno), out_brelse); 1224 1224 xfs_trans_brelse(tp, bp); 1225 1225 } 1226 1226 ··· 3337 3337 return 0; 3338 3338 } 3339 3339 3340 + /* Update all inode and quota accounting for the allocation we just did. */ 3341 + static void 3342 + xfs_bmap_btalloc_accounting( 3343 + struct xfs_bmalloca *ap, 3344 + struct xfs_alloc_arg *args) 3345 + { 3346 + if (ap->flags & XFS_BMAPI_COWFORK) { 3347 + /* 3348 + * COW fork blocks are in-core only and thus are treated as 3349 + * in-core quota reservation (like delalloc blocks) even when 3350 + * converted to real blocks. The quota reservation is not 3351 + * accounted to disk until blocks are remapped to the data 3352 + * fork. So if these blocks were previously delalloc, we 3353 + * already have quota reservation and there's nothing to do 3354 + * yet. 3355 + */ 3356 + if (ap->wasdel) 3357 + return; 3358 + 3359 + /* 3360 + * Otherwise, we've allocated blocks in a hole. The transaction 3361 + * has acquired in-core quota reservation for this extent. 3362 + * Rather than account these as real blocks, however, we reduce 3363 + * the transaction quota reservation based on the allocation. 3364 + * This essentially transfers the transaction quota reservation 3365 + * to that of a delalloc extent. 3366 + */ 3367 + ap->ip->i_delayed_blks += args->len; 3368 + xfs_trans_mod_dquot_byino(ap->tp, ap->ip, XFS_TRANS_DQ_RES_BLKS, 3369 + -(long)args->len); 3370 + return; 3371 + } 3372 + 3373 + /* data/attr fork only */ 3374 + ap->ip->i_d.di_nblocks += args->len; 3375 + xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); 3376 + if (ap->wasdel) 3377 + ap->ip->i_delayed_blks -= args->len; 3378 + xfs_trans_mod_dquot_byino(ap->tp, ap->ip, 3379 + ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : XFS_TRANS_DQ_BCOUNT, 3380 + args->len); 3381 + } 3382 + 3340 3383 STATIC int 3341 3384 xfs_bmap_btalloc( 3342 3385 struct xfs_bmalloca *ap) /* bmap alloc argument struct */ ··· 3390 3347 xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */ 3391 3348 xfs_agnumber_t ag; 3392 3349 xfs_alloc_arg_t args; 3350 + xfs_fileoff_t orig_offset; 3351 + xfs_extlen_t orig_length; 3393 3352 xfs_extlen_t blen; 3394 3353 xfs_extlen_t nextminlen = 0; 3395 3354 int nullfb; /* true if ap->firstblock isn't set */ ··· 3401 3356 int stripe_align; 3402 3357 3403 3358 ASSERT(ap->length); 3359 + orig_offset = ap->offset; 3360 + orig_length = ap->length; 3404 3361 3405 3362 mp = ap->ip->i_mount; 3406 3363 ··· 3618 3571 *ap->firstblock = args.fsbno; 3619 3572 ASSERT(nullfb || fb_agno <= args.agno); 3620 3573 ap->length = args.len; 3621 - if (!(ap->flags & XFS_BMAPI_COWFORK)) 3622 - ap->ip->i_d.di_nblocks += args.len; 3623 - xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); 3624 - if (ap->wasdel) 3625 - ap->ip->i_delayed_blks -= args.len; 3626 3574 /* 3627 - * Adjust the disk quota also. This was reserved 3628 - * earlier. 3575 + * If the extent size hint is active, we tried to round the 3576 + * caller's allocation request offset down to extsz and the 3577 + * length up to another extsz boundary. If we found a free 3578 + * extent we mapped it in starting at this new offset. If the 3579 + * newly mapped space isn't long enough to cover any of the 3580 + * range of offsets that was originally requested, move the 3581 + * mapping up so that we can fill as much of the caller's 3582 + * original request as possible. Free space is apparently 3583 + * very fragmented so we're unlikely to be able to satisfy the 3584 + * hints anyway. 3629 3585 */ 3630 - xfs_trans_mod_dquot_byino(ap->tp, ap->ip, 3631 - ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : 3632 - XFS_TRANS_DQ_BCOUNT, 3633 - (long) args.len); 3586 + if (ap->length <= orig_length) 3587 + ap->offset = orig_offset; 3588 + else if (ap->offset + ap->length < orig_offset + orig_length) 3589 + ap->offset = orig_offset + orig_length - ap->length; 3590 + xfs_bmap_btalloc_accounting(ap, &args); 3634 3591 } else { 3635 3592 ap->blkno = NULLFSBLOCK; 3636 3593 ap->length = 0; ··· 3927 3876 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 3928 3877 xfs_extlen_t alen; 3929 3878 xfs_extlen_t indlen; 3930 - char rt = XFS_IS_REALTIME_INODE(ip); 3931 - xfs_extlen_t extsz; 3932 3879 int error; 3933 3880 xfs_fileoff_t aoff = off; 3934 3881 ··· 3941 3892 prealloc = alen - len; 3942 3893 3943 3894 /* Figure out the extent size, adjust alen */ 3944 - if (whichfork == XFS_COW_FORK) 3945 - extsz = xfs_get_cowextsz_hint(ip); 3946 - else 3947 - extsz = xfs_get_extsz_hint(ip); 3948 - if (extsz) { 3895 + if (whichfork == XFS_COW_FORK) { 3949 3896 struct xfs_bmbt_irec prev; 3897 + xfs_extlen_t extsz = xfs_get_cowextsz_hint(ip); 3950 3898 3951 3899 if (!xfs_iext_peek_prev_extent(ifp, icur, &prev)) 3952 3900 prev.br_startoff = NULLFILEOFF; 3953 3901 3954 - error = xfs_bmap_extsize_align(mp, got, &prev, extsz, rt, eof, 3902 + error = xfs_bmap_extsize_align(mp, got, &prev, extsz, 0, eof, 3955 3903 1, 0, &aoff, &alen); 3956 3904 ASSERT(!error); 3957 3905 } 3958 - 3959 - if (rt) 3960 - extsz = alen / mp->m_sb.sb_rextsize; 3961 3906 3962 3907 /* 3963 3908 * Make a transaction-less quota reservation for delayed allocation ··· 3959 3916 * allocated blocks already inside this loop. 3960 3917 */ 3961 3918 error = xfs_trans_reserve_quota_nblks(NULL, ip, (long)alen, 0, 3962 - rt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); 3919 + XFS_QMOPT_RES_REGBLKS); 3963 3920 if (error) 3964 3921 return error; 3965 3922 ··· 3970 3927 indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen); 3971 3928 ASSERT(indlen > 0); 3972 3929 3973 - if (rt) { 3974 - error = xfs_mod_frextents(mp, -((int64_t)extsz)); 3975 - } else { 3976 - error = xfs_mod_fdblocks(mp, -((int64_t)alen), false); 3977 - } 3978 - 3930 + error = xfs_mod_fdblocks(mp, -((int64_t)alen), false); 3979 3931 if (error) 3980 3932 goto out_unreserve_quota; 3981 3933 ··· 4001 3963 return 0; 4002 3964 4003 3965 out_unreserve_blocks: 4004 - if (rt) 4005 - xfs_mod_frextents(mp, extsz); 4006 - else 4007 - xfs_mod_fdblocks(mp, alen, false); 3966 + xfs_mod_fdblocks(mp, alen, false); 4008 3967 out_unreserve_quota: 4009 3968 if (XFS_IS_QUOTA_ON(mp)) 4010 - xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ? 4011 - XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); 3969 + xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, 3970 + XFS_QMOPT_RES_REGBLKS); 4012 3971 return error; 4013 3972 } 4014 3973 ··· 4339 4304 while (bno < end && n < *nmap) { 4340 4305 bool need_alloc = false, wasdelay = false; 4341 4306 4342 - /* in hole or beyoned EOF? */ 4307 + /* in hole or beyond EOF? */ 4343 4308 if (eof || bma.got.br_startoff > bno) { 4309 + /* 4310 + * CoW fork conversions should /never/ hit EOF or 4311 + * holes. There should always be something for us 4312 + * to work on. 4313 + */ 4314 + ASSERT(!((flags & XFS_BMAPI_CONVERT) && 4315 + (flags & XFS_BMAPI_COWFORK))); 4316 + 4344 4317 if (flags & XFS_BMAPI_DELALLOC) { 4345 4318 /* 4346 4319 * For the COW fork we can reasonably get a ··· 4867 4824 xfs_iext_insert(ip, icur, &new, state); 4868 4825 break; 4869 4826 } 4827 + ip->i_delayed_blks -= del->br_blockcount; 4870 4828 } 4871 4829 4872 4830 /*
+24 -34
fs/xfs/libxfs/xfs_bmap_btree.c
··· 425 425 be64_to_cpu(k2->bmbt.br_startoff); 426 426 } 427 427 428 - static bool 428 + static xfs_failaddr_t 429 429 xfs_bmbt_verify( 430 430 struct xfs_buf *bp) 431 431 { 432 432 struct xfs_mount *mp = bp->b_target->bt_mount; 433 433 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 434 + xfs_failaddr_t fa; 434 435 unsigned int level; 435 436 436 437 switch (block->bb_magic) { 437 438 case cpu_to_be32(XFS_BMAP_CRC_MAGIC): 438 - if (!xfs_sb_version_hascrc(&mp->m_sb)) 439 - return false; 440 - if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)) 441 - return false; 442 - if (be64_to_cpu(block->bb_u.l.bb_blkno) != bp->b_bn) 443 - return false; 444 439 /* 445 440 * XXX: need a better way of verifying the owner here. Right now 446 441 * just make sure there has been one set. 447 442 */ 448 - if (be64_to_cpu(block->bb_u.l.bb_owner) == 0) 449 - return false; 443 + fa = xfs_btree_lblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN); 444 + if (fa) 445 + return fa; 450 446 /* fall through */ 451 447 case cpu_to_be32(XFS_BMAP_MAGIC): 452 448 break; 453 449 default: 454 - return false; 450 + return __this_address; 455 451 } 456 452 457 453 /* ··· 459 463 */ 460 464 level = be16_to_cpu(block->bb_level); 461 465 if (level > max(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1])) 462 - return false; 463 - if (be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0]) 464 - return false; 466 + return __this_address; 465 467 466 - /* sibling pointer verification */ 467 - if (!block->bb_u.l.bb_leftsib || 468 - (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) && 469 - !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_leftsib)))) 470 - return false; 471 - if (!block->bb_u.l.bb_rightsib || 472 - (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) && 473 - !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_rightsib)))) 474 - return false; 475 - 476 - return true; 468 + return xfs_btree_lblock_verify(bp, mp->m_bmap_dmxr[level != 0]); 477 469 } 478 470 479 471 static void 480 472 xfs_bmbt_read_verify( 481 473 struct xfs_buf *bp) 482 474 { 483 - if (!xfs_btree_lblock_verify_crc(bp)) 484 - xfs_buf_ioerror(bp, -EFSBADCRC); 485 - else if (!xfs_bmbt_verify(bp)) 486 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 475 + xfs_failaddr_t fa; 487 476 488 - if (bp->b_error) { 489 - trace_xfs_btree_corrupt(bp, _RET_IP_); 490 - xfs_verifier_error(bp); 477 + if (!xfs_btree_lblock_verify_crc(bp)) 478 + xfs_verifier_error(bp, -EFSBADCRC, __this_address); 479 + else { 480 + fa = xfs_bmbt_verify(bp); 481 + if (fa) 482 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 491 483 } 484 + 485 + if (bp->b_error) 486 + trace_xfs_btree_corrupt(bp, _RET_IP_); 492 487 } 493 488 494 489 static void 495 490 xfs_bmbt_write_verify( 496 491 struct xfs_buf *bp) 497 492 { 498 - if (!xfs_bmbt_verify(bp)) { 493 + xfs_failaddr_t fa; 494 + 495 + fa = xfs_bmbt_verify(bp); 496 + if (fa) { 499 497 trace_xfs_btree_corrupt(bp, _RET_IP_); 500 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 501 - xfs_verifier_error(bp); 498 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 502 499 return; 503 500 } 504 501 xfs_btree_lblock_calc_crc(bp); ··· 501 512 .name = "xfs_bmbt", 502 513 .verify_read = xfs_bmbt_read_verify, 503 514 .verify_write = xfs_bmbt_write_verify, 515 + .verify_struct = xfs_bmbt_verify, 504 516 }; 505 517 506 518
+117 -42
fs/xfs/libxfs/xfs_btree.c
··· 273 273 struct xfs_buf *bp) 274 274 { 275 275 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 276 - struct xfs_buf_log_item *bip = bp->b_fspriv; 276 + struct xfs_buf_log_item *bip = bp->b_log_item; 277 277 278 278 if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) 279 279 return; ··· 311 311 struct xfs_buf *bp) 312 312 { 313 313 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 314 - struct xfs_buf_log_item *bip = bp->b_fspriv; 314 + struct xfs_buf_log_item *bip = bp->b_log_item; 315 315 316 316 if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) 317 317 return; ··· 329 329 330 330 if (xfs_sb_version_hascrc(&mp->m_sb)) { 331 331 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn))) 332 - return false; 332 + return __this_address; 333 333 return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); 334 334 } 335 335 ··· 853 853 xfs_daddr_t d; /* real disk block address */ 854 854 int error; 855 855 856 - if (!XFS_FSB_SANITY_CHECK(mp, fsbno)) 856 + if (!xfs_verify_fsbno(mp, fsbno)) 857 857 return -EFSCORRUPTED; 858 858 d = XFS_FSB_TO_DADDR(mp, fsbno); 859 859 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, ··· 4529 4529 &bbcoi); 4530 4530 } 4531 4531 4532 - /** 4533 - * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format 4534 - * btree block 4535 - * 4536 - * @bp: buffer containing the btree block 4537 - * @max_recs: pointer to the m_*_mxr max records field in the xfs mount 4538 - * @pag_max_level: pointer to the per-ag max level field 4539 - */ 4540 - bool 4541 - xfs_btree_sblock_v5hdr_verify( 4542 - struct xfs_buf *bp) 4532 + /* Verify the v5 fields of a long-format btree block. */ 4533 + xfs_failaddr_t 4534 + xfs_btree_lblock_v5hdr_verify( 4535 + struct xfs_buf *bp, 4536 + uint64_t owner) 4543 4537 { 4544 4538 struct xfs_mount *mp = bp->b_target->bt_mount; 4545 4539 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4546 - struct xfs_perag *pag = bp->b_pag; 4547 4540 4548 4541 if (!xfs_sb_version_hascrc(&mp->m_sb)) 4549 - return false; 4550 - if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) 4551 - return false; 4552 - if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) 4553 - return false; 4554 - if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) 4555 - return false; 4556 - return true; 4542 + return __this_address; 4543 + if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)) 4544 + return __this_address; 4545 + if (block->bb_u.l.bb_blkno != cpu_to_be64(bp->b_bn)) 4546 + return __this_address; 4547 + if (owner != XFS_RMAP_OWN_UNKNOWN && 4548 + be64_to_cpu(block->bb_u.l.bb_owner) != owner) 4549 + return __this_address; 4550 + return NULL; 4557 4551 } 4558 4552 4559 - /** 4560 - * xfs_btree_sblock_verify() -- verify a short-format btree block 4561 - * 4562 - * @bp: buffer containing the btree block 4563 - * @max_recs: maximum records allowed in this btree node 4564 - */ 4565 - bool 4566 - xfs_btree_sblock_verify( 4553 + /* Verify a long-format btree block. */ 4554 + xfs_failaddr_t 4555 + xfs_btree_lblock_verify( 4567 4556 struct xfs_buf *bp, 4568 4557 unsigned int max_recs) 4569 4558 { ··· 4561 4572 4562 4573 /* numrecs verification */ 4563 4574 if (be16_to_cpu(block->bb_numrecs) > max_recs) 4564 - return false; 4575 + return __this_address; 4565 4576 4566 4577 /* sibling pointer verification */ 4567 - if (!block->bb_u.s.bb_leftsib || 4568 - (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks && 4569 - block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK))) 4570 - return false; 4571 - if (!block->bb_u.s.bb_rightsib || 4572 - (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks && 4573 - block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK))) 4574 - return false; 4578 + if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) && 4579 + !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_leftsib))) 4580 + return __this_address; 4581 + if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) && 4582 + !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_rightsib))) 4583 + return __this_address; 4575 4584 4576 - return true; 4585 + return NULL; 4586 + } 4587 + 4588 + /** 4589 + * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format 4590 + * btree block 4591 + * 4592 + * @bp: buffer containing the btree block 4593 + * @max_recs: pointer to the m_*_mxr max records field in the xfs mount 4594 + * @pag_max_level: pointer to the per-ag max level field 4595 + */ 4596 + xfs_failaddr_t 4597 + xfs_btree_sblock_v5hdr_verify( 4598 + struct xfs_buf *bp) 4599 + { 4600 + struct xfs_mount *mp = bp->b_target->bt_mount; 4601 + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4602 + struct xfs_perag *pag = bp->b_pag; 4603 + 4604 + if (!xfs_sb_version_hascrc(&mp->m_sb)) 4605 + return __this_address; 4606 + if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) 4607 + return __this_address; 4608 + if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) 4609 + return __this_address; 4610 + if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) 4611 + return __this_address; 4612 + return NULL; 4613 + } 4614 + 4615 + /** 4616 + * xfs_btree_sblock_verify() -- verify a short-format btree block 4617 + * 4618 + * @bp: buffer containing the btree block 4619 + * @max_recs: maximum records allowed in this btree node 4620 + */ 4621 + xfs_failaddr_t 4622 + xfs_btree_sblock_verify( 4623 + struct xfs_buf *bp, 4624 + unsigned int max_recs) 4625 + { 4626 + struct xfs_mount *mp = bp->b_target->bt_mount; 4627 + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4628 + xfs_agblock_t agno; 4629 + 4630 + /* numrecs verification */ 4631 + if (be16_to_cpu(block->bb_numrecs) > max_recs) 4632 + return __this_address; 4633 + 4634 + /* sibling pointer verification */ 4635 + agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp)); 4636 + if (block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK) && 4637 + !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_leftsib))) 4638 + return __this_address; 4639 + if (block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK) && 4640 + !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_rightsib))) 4641 + return __this_address; 4642 + 4643 + return NULL; 4577 4644 } 4578 4645 4579 4646 /* ··· 4997 4952 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 4998 4953 return (int64_t)be64_to_cpu(a->l) - be64_to_cpu(b->l); 4999 4954 return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s); 4955 + } 4956 + 4957 + /* If there's an extent, we're done. */ 4958 + STATIC int 4959 + xfs_btree_has_record_helper( 4960 + struct xfs_btree_cur *cur, 4961 + union xfs_btree_rec *rec, 4962 + void *priv) 4963 + { 4964 + return XFS_BTREE_QUERY_RANGE_ABORT; 4965 + } 4966 + 4967 + /* Is there a record covering a given range of keys? */ 4968 + int 4969 + xfs_btree_has_record( 4970 + struct xfs_btree_cur *cur, 4971 + union xfs_btree_irec *low, 4972 + union xfs_btree_irec *high, 4973 + bool *exists) 4974 + { 4975 + int error; 4976 + 4977 + error = xfs_btree_query_range(cur, low, high, 4978 + &xfs_btree_has_record_helper, NULL); 4979 + if (error == XFS_BTREE_QUERY_RANGE_ABORT) { 4980 + *exists = true; 4981 + return 0; 4982 + } 4983 + *exists = false; 4984 + return error; 5000 4985 }
+10 -6
fs/xfs/libxfs/xfs_btree.h
··· 473 473 #define XFS_FILBLKS_MIN(a,b) min_t(xfs_filblks_t, (a), (b)) 474 474 #define XFS_FILBLKS_MAX(a,b) max_t(xfs_filblks_t, (a), (b)) 475 475 476 - #define XFS_FSB_SANITY_CHECK(mp,fsb) \ 477 - (fsb && XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \ 478 - XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks) 479 - 480 476 /* 481 477 * Trace hooks. Currently not implemented as they need to be ported 482 478 * over to the generic tracing functionality, which is some effort. ··· 492 496 #define XFS_BTREE_TRACE_ARGR(c, r) 493 497 #define XFS_BTREE_TRACE_CURSOR(c, t) 494 498 495 - bool xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp); 496 - bool xfs_btree_sblock_verify(struct xfs_buf *bp, unsigned int max_recs); 499 + xfs_failaddr_t xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp); 500 + xfs_failaddr_t xfs_btree_sblock_verify(struct xfs_buf *bp, 501 + unsigned int max_recs); 502 + xfs_failaddr_t xfs_btree_lblock_v5hdr_verify(struct xfs_buf *bp, 503 + uint64_t owner); 504 + xfs_failaddr_t xfs_btree_lblock_verify(struct xfs_buf *bp, 505 + unsigned int max_recs); 506 + 497 507 uint xfs_btree_compute_maxlevels(struct xfs_mount *mp, uint *limits, 498 508 unsigned long len); 499 509 xfs_extlen_t xfs_btree_calc_size(struct xfs_mount *mp, uint *limits, ··· 547 545 struct xfs_btree_block *block, union xfs_btree_key *key); 548 546 union xfs_btree_key *xfs_btree_high_key_from_key(struct xfs_btree_cur *cur, 549 547 union xfs_btree_key *key); 548 + int xfs_btree_has_record(struct xfs_btree_cur *cur, union xfs_btree_irec *low, 549 + union xfs_btree_irec *high, bool *exists); 550 550 551 551 #endif /* __XFS_BTREE_H__ */
+47 -23
fs/xfs/libxfs/xfs_da_btree.c
··· 128 128 kmem_zone_free(xfs_da_state_zone, state); 129 129 } 130 130 131 - static bool 131 + static xfs_failaddr_t 132 132 xfs_da3_node_verify( 133 133 struct xfs_buf *bp) 134 134 { ··· 145 145 struct xfs_da3_node_hdr *hdr3 = bp->b_addr; 146 146 147 147 if (ichdr.magic != XFS_DA3_NODE_MAGIC) 148 - return false; 148 + return __this_address; 149 149 150 150 if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid)) 151 - return false; 151 + return __this_address; 152 152 if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn) 153 - return false; 153 + return __this_address; 154 154 if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn))) 155 - return false; 155 + return __this_address; 156 156 } else { 157 157 if (ichdr.magic != XFS_DA_NODE_MAGIC) 158 - return false; 158 + return __this_address; 159 159 } 160 160 if (ichdr.level == 0) 161 - return false; 161 + return __this_address; 162 162 if (ichdr.level > XFS_DA_NODE_MAXDEPTH) 163 - return false; 163 + return __this_address; 164 164 if (ichdr.count == 0) 165 - return false; 165 + return __this_address; 166 166 167 167 /* 168 168 * we don't know if the node is for and attribute or directory tree, ··· 170 170 */ 171 171 if (ichdr.count > mp->m_dir_geo->node_ents && 172 172 ichdr.count > mp->m_attr_geo->node_ents) 173 - return false; 173 + return __this_address; 174 174 175 175 /* XXX: hash order check? */ 176 176 177 - return true; 177 + return NULL; 178 178 } 179 179 180 180 static void ··· 182 182 struct xfs_buf *bp) 183 183 { 184 184 struct xfs_mount *mp = bp->b_target->bt_mount; 185 - struct xfs_buf_log_item *bip = bp->b_fspriv; 185 + struct xfs_buf_log_item *bip = bp->b_log_item; 186 186 struct xfs_da3_node_hdr *hdr3 = bp->b_addr; 187 + xfs_failaddr_t fa; 187 188 188 - if (!xfs_da3_node_verify(bp)) { 189 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 190 - xfs_verifier_error(bp); 189 + fa = xfs_da3_node_verify(bp); 190 + if (fa) { 191 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 191 192 return; 192 193 } 193 194 ··· 212 211 struct xfs_buf *bp) 213 212 { 214 213 struct xfs_da_blkinfo *info = bp->b_addr; 214 + xfs_failaddr_t fa; 215 215 216 216 switch (be16_to_cpu(info->magic)) { 217 217 case XFS_DA3_NODE_MAGIC: 218 218 if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) { 219 - xfs_buf_ioerror(bp, -EFSBADCRC); 219 + xfs_verifier_error(bp, -EFSBADCRC, 220 + __this_address); 220 221 break; 221 222 } 222 223 /* fall through */ 223 224 case XFS_DA_NODE_MAGIC: 224 - if (!xfs_da3_node_verify(bp)) { 225 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 226 - break; 227 - } 225 + fa = xfs_da3_node_verify(bp); 226 + if (fa) 227 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 228 228 return; 229 229 case XFS_ATTR_LEAF_MAGIC: 230 230 case XFS_ATTR3_LEAF_MAGIC: ··· 238 236 bp->b_ops->verify_read(bp); 239 237 return; 240 238 default: 241 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 239 + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); 242 240 break; 243 241 } 242 + } 244 243 245 - /* corrupt block */ 246 - xfs_verifier_error(bp); 244 + /* Verify the structure of a da3 block. */ 245 + static xfs_failaddr_t 246 + xfs_da3_node_verify_struct( 247 + struct xfs_buf *bp) 248 + { 249 + struct xfs_da_blkinfo *info = bp->b_addr; 250 + 251 + switch (be16_to_cpu(info->magic)) { 252 + case XFS_DA3_NODE_MAGIC: 253 + case XFS_DA_NODE_MAGIC: 254 + return xfs_da3_node_verify(bp); 255 + case XFS_ATTR_LEAF_MAGIC: 256 + case XFS_ATTR3_LEAF_MAGIC: 257 + bp->b_ops = &xfs_attr3_leaf_buf_ops; 258 + return bp->b_ops->verify_struct(bp); 259 + case XFS_DIR2_LEAFN_MAGIC: 260 + case XFS_DIR3_LEAFN_MAGIC: 261 + bp->b_ops = &xfs_dir3_leafn_buf_ops; 262 + return bp->b_ops->verify_struct(bp); 263 + default: 264 + return __this_address; 265 + } 247 266 } 248 267 249 268 const struct xfs_buf_ops xfs_da3_node_buf_ops = { 250 269 .name = "xfs_da3_node", 251 270 .verify_read = xfs_da3_node_read_verify, 252 271 .verify_write = xfs_da3_node_write_verify, 272 + .verify_struct = xfs_da3_node_verify_struct, 253 273 }; 254 274 255 275 int
+6
fs/xfs/libxfs/xfs_da_format.h
··· 875 875 ((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \ 876 876 sizeof(struct xfs_attr3_rmt_hdr) : 0)) 877 877 878 + /* Number of bytes in a directory block. */ 879 + static inline unsigned int xfs_dir2_dirblock_bytes(struct xfs_sb *sbp) 880 + { 881 + return 1 << (sbp->sb_blocklog + sbp->sb_dirblklog); 882 + } 883 + 878 884 #endif /* __XFS_DA_FORMAT_H__ */
+2 -3
fs/xfs/libxfs/xfs_dir2.c
··· 119 119 120 120 121 121 ASSERT(mp->m_sb.sb_versionnum & XFS_SB_VERSION_DIRV2BIT); 122 - ASSERT((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) <= 123 - XFS_MAX_BLOCKSIZE); 122 + ASSERT(xfs_dir2_dirblock_bytes(&mp->m_sb) <= XFS_MAX_BLOCKSIZE); 124 123 125 124 mp->m_dir_inode_ops = xfs_dir_get_ops(mp, NULL); 126 125 mp->m_nondir_inode_ops = xfs_nondir_get_ops(mp, NULL); ··· 139 140 dageo = mp->m_dir_geo; 140 141 dageo->blklog = mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog; 141 142 dageo->fsblog = mp->m_sb.sb_blocklog; 142 - dageo->blksize = 1 << dageo->blklog; 143 + dageo->blksize = xfs_dir2_dirblock_bytes(&mp->m_sb); 143 144 dageo->fsbcount = 1 << mp->m_sb.sb_dirblklog; 144 145 145 146 /*
+2
fs/xfs/libxfs/xfs_dir2.h
··· 340 340 #define XFS_READDIR_BUFSIZE (32768) 341 341 342 342 unsigned char xfs_dir3_get_dtype(struct xfs_mount *mp, uint8_t filetype); 343 + void *xfs_dir3_data_endp(struct xfs_da_geometry *geo, 344 + struct xfs_dir2_data_hdr *hdr); 343 345 344 346 #endif /* __XFS_DIR2_H__ */
+20 -19
fs/xfs/libxfs/xfs_dir2_block.c
··· 58 58 xfs_dir_hash_dotdot = xfs_da_hashname((unsigned char *)"..", 2); 59 59 } 60 60 61 - static bool 61 + static xfs_failaddr_t 62 62 xfs_dir3_block_verify( 63 63 struct xfs_buf *bp) 64 64 { ··· 67 67 68 68 if (xfs_sb_version_hascrc(&mp->m_sb)) { 69 69 if (hdr3->magic != cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) 70 - return false; 70 + return __this_address; 71 71 if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid)) 72 - return false; 72 + return __this_address; 73 73 if (be64_to_cpu(hdr3->blkno) != bp->b_bn) 74 - return false; 74 + return __this_address; 75 75 if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) 76 - return false; 76 + return __this_address; 77 77 } else { 78 78 if (hdr3->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) 79 - return false; 79 + return __this_address; 80 80 } 81 - if (__xfs_dir3_data_check(NULL, bp)) 82 - return false; 83 - return true; 81 + return __xfs_dir3_data_check(NULL, bp); 84 82 } 85 83 86 84 static void ··· 86 88 struct xfs_buf *bp) 87 89 { 88 90 struct xfs_mount *mp = bp->b_target->bt_mount; 91 + xfs_failaddr_t fa; 89 92 90 93 if (xfs_sb_version_hascrc(&mp->m_sb) && 91 94 !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) 92 - xfs_buf_ioerror(bp, -EFSBADCRC); 93 - else if (!xfs_dir3_block_verify(bp)) 94 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 95 - 96 - if (bp->b_error) 97 - xfs_verifier_error(bp); 95 + xfs_verifier_error(bp, -EFSBADCRC, __this_address); 96 + else { 97 + fa = xfs_dir3_block_verify(bp); 98 + if (fa) 99 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 100 + } 98 101 } 99 102 100 103 static void ··· 103 104 struct xfs_buf *bp) 104 105 { 105 106 struct xfs_mount *mp = bp->b_target->bt_mount; 106 - struct xfs_buf_log_item *bip = bp->b_fspriv; 107 + struct xfs_buf_log_item *bip = bp->b_log_item; 107 108 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; 109 + xfs_failaddr_t fa; 108 110 109 - if (!xfs_dir3_block_verify(bp)) { 110 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 111 - xfs_verifier_error(bp); 111 + fa = xfs_dir3_block_verify(bp); 112 + if (fa) { 113 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 112 114 return; 113 115 } 114 116 ··· 126 126 .name = "xfs_dir3_block", 127 127 .verify_read = xfs_dir3_block_read_verify, 128 128 .verify_write = xfs_dir3_block_write_verify, 129 + .verify_struct = xfs_dir3_block_verify, 129 130 }; 130 131 131 132 int
+122 -86
fs/xfs/libxfs/xfs_dir2_data.c
··· 36 36 /* 37 37 * Check the consistency of the data block. 38 38 * The input can also be a block-format directory. 39 - * Return 0 is the buffer is good, otherwise an error. 39 + * Return NULL if the buffer is good, otherwise the address of the error. 40 40 */ 41 - int 41 + xfs_failaddr_t 42 42 __xfs_dir3_data_check( 43 43 struct xfs_inode *dp, /* incore inode pointer */ 44 44 struct xfs_buf *bp) /* data block's buffer */ ··· 73 73 */ 74 74 ops = xfs_dir_get_ops(mp, dp); 75 75 76 + /* 77 + * If this isn't a directory, or we don't get handed the dir ops, 78 + * something is seriously wrong. Bail out. 79 + */ 80 + if ((dp && !S_ISDIR(VFS_I(dp)->i_mode)) || 81 + ops != xfs_dir_get_ops(mp, NULL)) 82 + return __this_address; 83 + 76 84 hdr = bp->b_addr; 77 85 p = (char *)ops->data_entry_p(hdr); 78 86 ··· 89 81 case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): 90 82 btp = xfs_dir2_block_tail_p(geo, hdr); 91 83 lep = xfs_dir2_block_leaf_p(btp); 92 - endp = (char *)lep; 93 84 94 85 /* 95 86 * The number of leaf entries is limited by the size of the ··· 97 90 * so just ensure that the count falls somewhere inside the 98 91 * block right now. 99 92 */ 100 - XFS_WANT_CORRUPTED_RETURN(mp, be32_to_cpu(btp->count) < 101 - ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry)); 93 + if (be32_to_cpu(btp->count) >= 94 + ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry)) 95 + return __this_address; 102 96 break; 103 97 case cpu_to_be32(XFS_DIR3_DATA_MAGIC): 104 98 case cpu_to_be32(XFS_DIR2_DATA_MAGIC): 105 - endp = (char *)hdr + geo->blksize; 106 99 break; 107 100 default: 108 - XFS_ERROR_REPORT("Bad Magic", XFS_ERRLEVEL_LOW, mp); 109 - return -EFSCORRUPTED; 101 + return __this_address; 110 102 } 103 + endp = xfs_dir3_data_endp(geo, hdr); 104 + if (!endp) 105 + return __this_address; 111 106 112 107 /* 113 108 * Account for zero bestfree entries. ··· 117 108 bf = ops->data_bestfree_p(hdr); 118 109 count = lastfree = freeseen = 0; 119 110 if (!bf[0].length) { 120 - XFS_WANT_CORRUPTED_RETURN(mp, !bf[0].offset); 111 + if (bf[0].offset) 112 + return __this_address; 121 113 freeseen |= 1 << 0; 122 114 } 123 115 if (!bf[1].length) { 124 - XFS_WANT_CORRUPTED_RETURN(mp, !bf[1].offset); 116 + if (bf[1].offset) 117 + return __this_address; 125 118 freeseen |= 1 << 1; 126 119 } 127 120 if (!bf[2].length) { 128 - XFS_WANT_CORRUPTED_RETURN(mp, !bf[2].offset); 121 + if (bf[2].offset) 122 + return __this_address; 129 123 freeseen |= 1 << 2; 130 124 } 131 125 132 - XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(bf[0].length) >= 133 - be16_to_cpu(bf[1].length)); 134 - XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(bf[1].length) >= 135 - be16_to_cpu(bf[2].length)); 126 + if (be16_to_cpu(bf[0].length) < be16_to_cpu(bf[1].length)) 127 + return __this_address; 128 + if (be16_to_cpu(bf[1].length) < be16_to_cpu(bf[2].length)) 129 + return __this_address; 136 130 /* 137 131 * Loop over the data/unused entries. 138 132 */ ··· 147 135 * doesn't need to be there. 148 136 */ 149 137 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { 150 - XFS_WANT_CORRUPTED_RETURN(mp, lastfree == 0); 151 - XFS_WANT_CORRUPTED_RETURN(mp, endp >= 152 - p + be16_to_cpu(dup->length)); 153 - XFS_WANT_CORRUPTED_RETURN(mp, 154 - be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) == 155 - (char *)dup - (char *)hdr); 138 + if (lastfree != 0) 139 + return __this_address; 140 + if (endp < p + be16_to_cpu(dup->length)) 141 + return __this_address; 142 + if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) != 143 + (char *)dup - (char *)hdr) 144 + return __this_address; 156 145 dfp = xfs_dir2_data_freefind(hdr, bf, dup); 157 146 if (dfp) { 158 147 i = (int)(dfp - bf); 159 - XFS_WANT_CORRUPTED_RETURN(mp, 160 - (freeseen & (1 << i)) == 0); 148 + if ((freeseen & (1 << i)) != 0) 149 + return __this_address; 161 150 freeseen |= 1 << i; 162 151 } else { 163 - XFS_WANT_CORRUPTED_RETURN(mp, 164 - be16_to_cpu(dup->length) <= 165 - be16_to_cpu(bf[2].length)); 152 + if (be16_to_cpu(dup->length) > 153 + be16_to_cpu(bf[2].length)) 154 + return __this_address; 166 155 } 167 156 p += be16_to_cpu(dup->length); 168 157 lastfree = 1; ··· 176 163 * The linear search is crude but this is DEBUG code. 177 164 */ 178 165 dep = (xfs_dir2_data_entry_t *)p; 179 - XFS_WANT_CORRUPTED_RETURN(mp, dep->namelen != 0); 180 - XFS_WANT_CORRUPTED_RETURN(mp, 181 - !xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber))); 182 - XFS_WANT_CORRUPTED_RETURN(mp, endp >= 183 - p + ops->data_entsize(dep->namelen)); 184 - XFS_WANT_CORRUPTED_RETURN(mp, 185 - be16_to_cpu(*ops->data_entry_tag_p(dep)) == 186 - (char *)dep - (char *)hdr); 187 - XFS_WANT_CORRUPTED_RETURN(mp, 188 - ops->data_get_ftype(dep) < XFS_DIR3_FT_MAX); 166 + if (dep->namelen == 0) 167 + return __this_address; 168 + if (xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber))) 169 + return __this_address; 170 + if (endp < p + ops->data_entsize(dep->namelen)) 171 + return __this_address; 172 + if (be16_to_cpu(*ops->data_entry_tag_p(dep)) != 173 + (char *)dep - (char *)hdr) 174 + return __this_address; 175 + if (ops->data_get_ftype(dep) >= XFS_DIR3_FT_MAX) 176 + return __this_address; 189 177 count++; 190 178 lastfree = 0; 191 179 if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || ··· 202 188 be32_to_cpu(lep[i].hashval) == hash) 203 189 break; 204 190 } 205 - XFS_WANT_CORRUPTED_RETURN(mp, 206 - i < be32_to_cpu(btp->count)); 191 + if (i >= be32_to_cpu(btp->count)) 192 + return __this_address; 207 193 } 208 194 p += ops->data_entsize(dep->namelen); 209 195 } 210 196 /* 211 197 * Need to have seen all the entries and all the bestfree slots. 212 198 */ 213 - XFS_WANT_CORRUPTED_RETURN(mp, freeseen == 7); 199 + if (freeseen != 7) 200 + return __this_address; 214 201 if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || 215 202 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { 216 203 for (i = stale = 0; i < be32_to_cpu(btp->count); i++) { 217 204 if (lep[i].address == 218 205 cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) 219 206 stale++; 220 - if (i > 0) 221 - XFS_WANT_CORRUPTED_RETURN(mp, 222 - be32_to_cpu(lep[i].hashval) >= 223 - be32_to_cpu(lep[i - 1].hashval)); 207 + if (i > 0 && be32_to_cpu(lep[i].hashval) < 208 + be32_to_cpu(lep[i - 1].hashval)) 209 + return __this_address; 224 210 } 225 - XFS_WANT_CORRUPTED_RETURN(mp, count == 226 - be32_to_cpu(btp->count) - be32_to_cpu(btp->stale)); 227 - XFS_WANT_CORRUPTED_RETURN(mp, stale == be32_to_cpu(btp->stale)); 211 + if (count != be32_to_cpu(btp->count) - be32_to_cpu(btp->stale)) 212 + return __this_address; 213 + if (stale != be32_to_cpu(btp->stale)) 214 + return __this_address; 228 215 } 229 - return 0; 216 + return NULL; 230 217 } 231 218 232 - static bool 219 + #ifdef DEBUG 220 + void 221 + xfs_dir3_data_check( 222 + struct xfs_inode *dp, 223 + struct xfs_buf *bp) 224 + { 225 + xfs_failaddr_t fa; 226 + 227 + fa = __xfs_dir3_data_check(dp, bp); 228 + if (!fa) 229 + return; 230 + xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, dp->i_mount, 231 + bp->b_addr, __FILE__, __LINE__, fa); 232 + ASSERT(0); 233 + } 234 + #endif 235 + 236 + static xfs_failaddr_t 233 237 xfs_dir3_data_verify( 234 238 struct xfs_buf *bp) 235 239 { ··· 256 224 257 225 if (xfs_sb_version_hascrc(&mp->m_sb)) { 258 226 if (hdr3->magic != cpu_to_be32(XFS_DIR3_DATA_MAGIC)) 259 - return false; 227 + return __this_address; 260 228 if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid)) 261 - return false; 229 + return __this_address; 262 230 if (be64_to_cpu(hdr3->blkno) != bp->b_bn) 263 - return false; 231 + return __this_address; 264 232 if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) 265 - return false; 233 + return __this_address; 266 234 } else { 267 235 if (hdr3->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC)) 268 - return false; 236 + return __this_address; 269 237 } 270 - if (__xfs_dir3_data_check(NULL, bp)) 271 - return false; 272 - return true; 238 + return __xfs_dir3_data_check(NULL, bp); 273 239 } 274 240 275 241 /* ··· 293 263 bp->b_ops->verify_read(bp); 294 264 return; 295 265 default: 296 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 297 - xfs_verifier_error(bp); 266 + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); 298 267 break; 299 268 } 300 269 } ··· 303 274 struct xfs_buf *bp) 304 275 { 305 276 struct xfs_mount *mp = bp->b_target->bt_mount; 277 + xfs_failaddr_t fa; 306 278 307 279 if (xfs_sb_version_hascrc(&mp->m_sb) && 308 - !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) 309 - xfs_buf_ioerror(bp, -EFSBADCRC); 310 - else if (!xfs_dir3_data_verify(bp)) 311 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 312 - 313 - if (bp->b_error) 314 - xfs_verifier_error(bp); 280 + !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) 281 + xfs_verifier_error(bp, -EFSBADCRC, __this_address); 282 + else { 283 + fa = xfs_dir3_data_verify(bp); 284 + if (fa) 285 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 286 + } 315 287 } 316 288 317 289 static void ··· 320 290 struct xfs_buf *bp) 321 291 { 322 292 struct xfs_mount *mp = bp->b_target->bt_mount; 323 - struct xfs_buf_log_item *bip = bp->b_fspriv; 293 + struct xfs_buf_log_item *bip = bp->b_log_item; 324 294 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; 295 + xfs_failaddr_t fa; 325 296 326 - if (!xfs_dir3_data_verify(bp)) { 327 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 328 - xfs_verifier_error(bp); 297 + fa = xfs_dir3_data_verify(bp); 298 + if (fa) { 299 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 329 300 return; 330 301 } 331 302 ··· 343 312 .name = "xfs_dir3_data", 344 313 .verify_read = xfs_dir3_data_read_verify, 345 314 .verify_write = xfs_dir3_data_write_verify, 315 + .verify_struct = xfs_dir3_data_verify, 346 316 }; 347 317 348 318 static const struct xfs_buf_ops xfs_dir3_data_reada_buf_ops = { ··· 547 515 struct xfs_dir2_data_hdr *hdr, 548 516 int *loghead) 549 517 { 550 - xfs_dir2_block_tail_t *btp; /* block tail */ 551 518 xfs_dir2_data_entry_t *dep; /* active data entry */ 552 519 xfs_dir2_data_unused_t *dup; /* unused data entry */ 553 520 struct xfs_dir2_data_free *bf; ··· 568 537 * Set up pointers. 569 538 */ 570 539 p = (char *)ops->data_entry_p(hdr); 571 - if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || 572 - hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { 573 - btp = xfs_dir2_block_tail_p(geo, hdr); 574 - endp = (char *)xfs_dir2_block_leaf_p(btp); 575 - } else 576 - endp = (char *)hdr + geo->blksize; 540 + endp = xfs_dir3_data_endp(geo, hdr); 577 541 /* 578 542 * Loop over the block's entries. 579 543 */ ··· 781 755 /* 782 756 * Figure out where the end of the data area is. 783 757 */ 784 - if (hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || 785 - hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC)) 786 - endptr = (char *)hdr + args->geo->blksize; 787 - else { 788 - xfs_dir2_block_tail_t *btp; /* block tail */ 758 + endptr = xfs_dir3_data_endp(args->geo, hdr); 759 + ASSERT(endptr != NULL); 789 760 790 - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || 791 - hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); 792 - btp = xfs_dir2_block_tail_p(args->geo, hdr); 793 - endptr = (char *)xfs_dir2_block_leaf_p(btp); 794 - } 795 761 /* 796 762 * If this isn't the start of the block, then back up to 797 763 * the previous entry and see if it's free. ··· 1084 1066 } 1085 1067 } 1086 1068 *needscanp = needscan; 1069 + } 1070 + 1071 + /* Find the end of the entry data in a data/block format dir block. */ 1072 + void * 1073 + xfs_dir3_data_endp( 1074 + struct xfs_da_geometry *geo, 1075 + struct xfs_dir2_data_hdr *hdr) 1076 + { 1077 + switch (hdr->magic) { 1078 + case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC): 1079 + case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): 1080 + return xfs_dir2_block_leaf_p(xfs_dir2_block_tail_p(geo, hdr)); 1081 + case cpu_to_be32(XFS_DIR3_DATA_MAGIC): 1082 + case cpu_to_be32(XFS_DIR2_DATA_MAGIC): 1083 + return (char *)hdr + geo->blksize; 1084 + default: 1085 + return NULL; 1086 + } 1087 1087 }
+58 -31
fs/xfs/libxfs/xfs_dir2_leaf.c
··· 50 50 * Pop an assert if something is wrong. 51 51 */ 52 52 #ifdef DEBUG 53 - #define xfs_dir3_leaf_check(dp, bp) \ 54 - do { \ 55 - if (!xfs_dir3_leaf1_check((dp), (bp))) \ 56 - ASSERT(0); \ 57 - } while (0); 58 - 59 - STATIC bool 53 + static xfs_failaddr_t 60 54 xfs_dir3_leaf1_check( 61 55 struct xfs_inode *dp, 62 56 struct xfs_buf *bp) ··· 63 69 if (leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) { 64 70 struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; 65 71 if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) 66 - return false; 72 + return __this_address; 67 73 } else if (leafhdr.magic != XFS_DIR2_LEAF1_MAGIC) 68 - return false; 74 + return __this_address; 69 75 70 76 return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf); 77 + } 78 + 79 + static inline void 80 + xfs_dir3_leaf_check( 81 + struct xfs_inode *dp, 82 + struct xfs_buf *bp) 83 + { 84 + xfs_failaddr_t fa; 85 + 86 + fa = xfs_dir3_leaf1_check(dp, bp); 87 + if (!fa) 88 + return; 89 + xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, dp->i_mount, 90 + bp->b_addr, __FILE__, __LINE__, fa); 91 + ASSERT(0); 71 92 } 72 93 #else 73 94 #define xfs_dir3_leaf_check(dp, bp) 74 95 #endif 75 96 76 - bool 97 + xfs_failaddr_t 77 98 xfs_dir3_leaf_check_int( 78 99 struct xfs_mount *mp, 79 100 struct xfs_inode *dp, ··· 123 114 * We can deduce a value for that from di_size. 124 115 */ 125 116 if (hdr->count > ops->leaf_max_ents(geo)) 126 - return false; 117 + return __this_address; 127 118 128 119 /* Leaves and bests don't overlap in leaf format. */ 129 120 if ((hdr->magic == XFS_DIR2_LEAF1_MAGIC || 130 121 hdr->magic == XFS_DIR3_LEAF1_MAGIC) && 131 122 (char *)&ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp)) 132 - return false; 123 + return __this_address; 133 124 134 125 /* Check hash value order, count stale entries. */ 135 126 for (i = stale = 0; i < hdr->count; i++) { 136 127 if (i + 1 < hdr->count) { 137 128 if (be32_to_cpu(ents[i].hashval) > 138 129 be32_to_cpu(ents[i + 1].hashval)) 139 - return false; 130 + return __this_address; 140 131 } 141 132 if (ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) 142 133 stale++; 143 134 } 144 135 if (hdr->stale != stale) 145 - return false; 146 - return true; 136 + return __this_address; 137 + return NULL; 147 138 } 148 139 149 140 /* ··· 151 142 * kernels we don't get assertion failures in xfs_dir3_leaf_hdr_from_disk() due 152 143 * to incorrect magic numbers. 153 144 */ 154 - static bool 145 + static xfs_failaddr_t 155 146 xfs_dir3_leaf_verify( 156 147 struct xfs_buf *bp, 157 148 uint16_t magic) ··· 169 160 : XFS_DIR3_LEAFN_MAGIC; 170 161 171 162 if (leaf3->info.hdr.magic != cpu_to_be16(magic3)) 172 - return false; 163 + return __this_address; 173 164 if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_meta_uuid)) 174 - return false; 165 + return __this_address; 175 166 if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) 176 - return false; 167 + return __this_address; 177 168 if (!xfs_log_check_lsn(mp, be64_to_cpu(leaf3->info.lsn))) 178 - return false; 169 + return __this_address; 179 170 } else { 180 171 if (leaf->hdr.info.magic != cpu_to_be16(magic)) 181 - return false; 172 + return __this_address; 182 173 } 183 174 184 175 return xfs_dir3_leaf_check_int(mp, NULL, NULL, leaf); ··· 190 181 uint16_t magic) 191 182 { 192 183 struct xfs_mount *mp = bp->b_target->bt_mount; 184 + xfs_failaddr_t fa; 193 185 194 186 if (xfs_sb_version_hascrc(&mp->m_sb) && 195 187 !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF)) 196 - xfs_buf_ioerror(bp, -EFSBADCRC); 197 - else if (!xfs_dir3_leaf_verify(bp, magic)) 198 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 199 - 200 - if (bp->b_error) 201 - xfs_verifier_error(bp); 188 + xfs_verifier_error(bp, -EFSBADCRC, __this_address); 189 + else { 190 + fa = xfs_dir3_leaf_verify(bp, magic); 191 + if (fa) 192 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 193 + } 202 194 } 203 195 204 196 static void ··· 208 198 uint16_t magic) 209 199 { 210 200 struct xfs_mount *mp = bp->b_target->bt_mount; 211 - struct xfs_buf_log_item *bip = bp->b_fspriv; 201 + struct xfs_buf_log_item *bip = bp->b_log_item; 212 202 struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr; 203 + xfs_failaddr_t fa; 213 204 214 - if (!xfs_dir3_leaf_verify(bp, magic)) { 215 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 216 - xfs_verifier_error(bp); 205 + fa = xfs_dir3_leaf_verify(bp, magic); 206 + if (fa) { 207 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 217 208 return; 218 209 } 219 210 ··· 225 214 hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); 226 215 227 216 xfs_buf_update_cksum(bp, XFS_DIR3_LEAF_CRC_OFF); 217 + } 218 + 219 + static xfs_failaddr_t 220 + xfs_dir3_leaf1_verify( 221 + struct xfs_buf *bp) 222 + { 223 + return xfs_dir3_leaf_verify(bp, XFS_DIR2_LEAF1_MAGIC); 228 224 } 229 225 230 226 static void ··· 246 228 struct xfs_buf *bp) 247 229 { 248 230 __write_verify(bp, XFS_DIR2_LEAF1_MAGIC); 231 + } 232 + 233 + static xfs_failaddr_t 234 + xfs_dir3_leafn_verify( 235 + struct xfs_buf *bp) 236 + { 237 + return xfs_dir3_leaf_verify(bp, XFS_DIR2_LEAFN_MAGIC); 249 238 } 250 239 251 240 static void ··· 273 248 .name = "xfs_dir3_leaf1", 274 249 .verify_read = xfs_dir3_leaf1_read_verify, 275 250 .verify_write = xfs_dir3_leaf1_write_verify, 251 + .verify_struct = xfs_dir3_leaf1_verify, 276 252 }; 277 253 278 254 const struct xfs_buf_ops xfs_dir3_leafn_buf_ops = { 279 255 .name = "xfs_dir3_leafn", 280 256 .verify_read = xfs_dir3_leafn_read_verify, 281 257 .verify_write = xfs_dir3_leafn_write_verify, 258 + .verify_struct = xfs_dir3_leafn_verify, 282 259 }; 283 260 284 261 int
+51 -38
fs/xfs/libxfs/xfs_dir2_node.c
··· 53 53 * Check internal consistency of a leafn block. 54 54 */ 55 55 #ifdef DEBUG 56 - #define xfs_dir3_leaf_check(dp, bp) \ 57 - do { \ 58 - if (!xfs_dir3_leafn_check((dp), (bp))) \ 59 - ASSERT(0); \ 60 - } while (0); 61 - 62 - static bool 56 + static xfs_failaddr_t 63 57 xfs_dir3_leafn_check( 64 58 struct xfs_inode *dp, 65 59 struct xfs_buf *bp) ··· 66 72 if (leafhdr.magic == XFS_DIR3_LEAFN_MAGIC) { 67 73 struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; 68 74 if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) 69 - return false; 75 + return __this_address; 70 76 } else if (leafhdr.magic != XFS_DIR2_LEAFN_MAGIC) 71 - return false; 77 + return __this_address; 72 78 73 79 return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf); 80 + } 81 + 82 + static inline void 83 + xfs_dir3_leaf_check( 84 + struct xfs_inode *dp, 85 + struct xfs_buf *bp) 86 + { 87 + xfs_failaddr_t fa; 88 + 89 + fa = xfs_dir3_leafn_check(dp, bp); 90 + if (!fa) 91 + return; 92 + xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, dp->i_mount, 93 + bp->b_addr, __FILE__, __LINE__, fa); 94 + ASSERT(0); 74 95 } 75 96 #else 76 97 #define xfs_dir3_leaf_check(dp, bp) 77 98 #endif 78 99 79 - static bool 100 + static xfs_failaddr_t 80 101 xfs_dir3_free_verify( 81 102 struct xfs_buf *bp) 82 103 { ··· 102 93 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; 103 94 104 95 if (hdr3->magic != cpu_to_be32(XFS_DIR3_FREE_MAGIC)) 105 - return false; 96 + return __this_address; 106 97 if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid)) 107 - return false; 98 + return __this_address; 108 99 if (be64_to_cpu(hdr3->blkno) != bp->b_bn) 109 - return false; 100 + return __this_address; 110 101 if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) 111 - return false; 102 + return __this_address; 112 103 } else { 113 104 if (hdr->magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC)) 114 - return false; 105 + return __this_address; 115 106 } 116 107 117 108 /* XXX: should bounds check the xfs_dir3_icfree_hdr here */ 118 109 119 - return true; 110 + return NULL; 120 111 } 121 112 122 113 static void ··· 124 115 struct xfs_buf *bp) 125 116 { 126 117 struct xfs_mount *mp = bp->b_target->bt_mount; 118 + xfs_failaddr_t fa; 127 119 128 120 if (xfs_sb_version_hascrc(&mp->m_sb) && 129 121 !xfs_buf_verify_cksum(bp, XFS_DIR3_FREE_CRC_OFF)) 130 - xfs_buf_ioerror(bp, -EFSBADCRC); 131 - else if (!xfs_dir3_free_verify(bp)) 132 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 133 - 134 - if (bp->b_error) 135 - xfs_verifier_error(bp); 122 + xfs_verifier_error(bp, -EFSBADCRC, __this_address); 123 + else { 124 + fa = xfs_dir3_free_verify(bp); 125 + if (fa) 126 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 127 + } 136 128 } 137 129 138 130 static void ··· 141 131 struct xfs_buf *bp) 142 132 { 143 133 struct xfs_mount *mp = bp->b_target->bt_mount; 144 - struct xfs_buf_log_item *bip = bp->b_fspriv; 134 + struct xfs_buf_log_item *bip = bp->b_log_item; 145 135 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; 136 + xfs_failaddr_t fa; 146 137 147 - if (!xfs_dir3_free_verify(bp)) { 148 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 149 - xfs_verifier_error(bp); 138 + fa = xfs_dir3_free_verify(bp); 139 + if (fa) { 140 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 150 141 return; 151 142 } 152 143 ··· 164 153 .name = "xfs_dir3_free", 165 154 .verify_read = xfs_dir3_free_read_verify, 166 155 .verify_write = xfs_dir3_free_write_verify, 156 + .verify_struct = xfs_dir3_free_verify, 167 157 }; 168 158 169 159 /* Everything ok in the free block header? */ 170 - static bool 160 + static xfs_failaddr_t 171 161 xfs_dir3_free_header_check( 172 162 struct xfs_inode *dp, 173 163 xfs_dablk_t fbno, ··· 186 174 struct xfs_dir3_free_hdr *hdr3 = bp->b_addr; 187 175 188 176 if (be32_to_cpu(hdr3->firstdb) != firstdb) 189 - return false; 177 + return __this_address; 190 178 if (be32_to_cpu(hdr3->nvalid) > maxbests) 191 - return false; 179 + return __this_address; 192 180 if (be32_to_cpu(hdr3->nvalid) < be32_to_cpu(hdr3->nused)) 193 - return false; 181 + return __this_address; 194 182 } else { 195 183 struct xfs_dir2_free_hdr *hdr = bp->b_addr; 196 184 197 185 if (be32_to_cpu(hdr->firstdb) != firstdb) 198 - return false; 186 + return __this_address; 199 187 if (be32_to_cpu(hdr->nvalid) > maxbests) 200 - return false; 188 + return __this_address; 201 189 if (be32_to_cpu(hdr->nvalid) < be32_to_cpu(hdr->nused)) 202 - return false; 190 + return __this_address; 203 191 } 204 - return true; 192 + return NULL; 205 193 } 206 194 207 195 static int ··· 212 200 xfs_daddr_t mappedbno, 213 201 struct xfs_buf **bpp) 214 202 { 203 + xfs_failaddr_t fa; 215 204 int err; 216 205 217 206 err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, ··· 221 208 return err; 222 209 223 210 /* Check things that we can't do in the verifier. */ 224 - if (!xfs_dir3_free_header_check(dp, fbno, *bpp)) { 225 - xfs_buf_ioerror(*bpp, -EFSCORRUPTED); 226 - xfs_verifier_error(*bpp); 211 + fa = xfs_dir3_free_header_check(dp, fbno, *bpp); 212 + if (fa) { 213 + xfs_verifier_error(*bpp, -EFSCORRUPTED, fa); 227 214 xfs_trans_brelse(tp, *bpp); 228 215 return -EFSCORRUPTED; 229 216 } ··· 1919 1906 (unsigned long long)ifbno, lastfbno); 1920 1907 if (fblk) { 1921 1908 xfs_alert(mp, 1922 - " fblk 0x%p blkno %llu index %d magic 0x%x", 1909 + " fblk "PTR_FMT" blkno %llu index %d magic 0x%x", 1923 1910 fblk, 1924 1911 (unsigned long long)fblk->blkno, 1925 1912 fblk->index,
+7 -5
fs/xfs/libxfs/xfs_dir2_priv.h
··· 39 39 40 40 /* xfs_dir2_data.c */ 41 41 #ifdef DEBUG 42 - #define xfs_dir3_data_check(dp,bp) __xfs_dir3_data_check(dp, bp); 42 + extern void xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp); 43 43 #else 44 44 #define xfs_dir3_data_check(dp,bp) 45 45 #endif 46 46 47 - extern int __xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp); 47 + extern xfs_failaddr_t __xfs_dir3_data_check(struct xfs_inode *dp, 48 + struct xfs_buf *bp); 48 49 extern int xfs_dir3_data_read(struct xfs_trans *tp, struct xfs_inode *dp, 49 50 xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp); 50 51 extern int xfs_dir3_data_readahead(struct xfs_inode *dp, xfs_dablk_t bno, ··· 90 89 int lowstale, int highstale, int *lfloglow, int *lfloghigh); 91 90 extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state); 92 91 93 - extern bool xfs_dir3_leaf_check_int(struct xfs_mount *mp, struct xfs_inode *dp, 94 - struct xfs_dir3_icleaf_hdr *hdr, struct xfs_dir2_leaf *leaf); 92 + extern xfs_failaddr_t xfs_dir3_leaf_check_int(struct xfs_mount *mp, 93 + struct xfs_inode *dp, struct xfs_dir3_icleaf_hdr *hdr, 94 + struct xfs_dir2_leaf *leaf); 95 95 96 96 /* xfs_dir2_node.c */ 97 97 extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args, ··· 129 127 extern int xfs_dir2_sf_lookup(struct xfs_da_args *args); 130 128 extern int xfs_dir2_sf_removename(struct xfs_da_args *args); 131 129 extern int xfs_dir2_sf_replace(struct xfs_da_args *args); 132 - extern int xfs_dir2_sf_verify(struct xfs_inode *ip); 130 + extern xfs_failaddr_t xfs_dir2_sf_verify(struct xfs_inode *ip); 133 131 134 132 /* xfs_dir2_readdir.c */ 135 133 extern int xfs_readdir(struct xfs_trans *tp, struct xfs_inode *dp,
+14 -16
fs/xfs/libxfs/xfs_dir2_sf.c
··· 156 156 xfs_dir2_sf_hdr_t *sfhp) /* shortform directory hdr */ 157 157 { 158 158 xfs_dir2_data_hdr_t *hdr; /* block header */ 159 - xfs_dir2_block_tail_t *btp; /* block tail pointer */ 160 159 xfs_dir2_data_entry_t *dep; /* data entry pointer */ 161 160 xfs_inode_t *dp; /* incore directory inode */ 162 161 xfs_dir2_data_unused_t *dup; /* unused data pointer */ ··· 191 192 /* 192 193 * Set up to loop over the block's entries. 193 194 */ 194 - btp = xfs_dir2_block_tail_p(args->geo, hdr); 195 195 ptr = (char *)dp->d_ops->data_entry_p(hdr); 196 - endptr = (char *)xfs_dir2_block_leaf_p(btp); 196 + endptr = xfs_dir3_data_endp(args->geo, hdr); 197 197 sfep = xfs_dir2_sf_firstentry(sfp); 198 198 /* 199 199 * Loop over the active and unused entries. ··· 628 630 #endif /* DEBUG */ 629 631 630 632 /* Verify the consistency of an inline directory. */ 631 - int 633 + xfs_failaddr_t 632 634 xfs_dir2_sf_verify( 633 635 struct xfs_inode *ip) 634 636 { ··· 663 665 */ 664 666 if (size <= offsetof(struct xfs_dir2_sf_hdr, parent) || 665 667 size < xfs_dir2_sf_hdr_size(sfp->i8count)) 666 - return -EFSCORRUPTED; 668 + return __this_address; 667 669 668 670 endp = (char *)sfp + size; 669 671 ··· 672 674 i8count = ino > XFS_DIR2_MAX_SHORT_INUM; 673 675 error = xfs_dir_ino_validate(mp, ino); 674 676 if (error) 675 - return error; 677 + return __this_address; 676 678 offset = dops->data_first_offset; 677 679 678 680 /* Check all reported entries */ ··· 684 686 * within the data buffer. 685 687 */ 686 688 if (((char *)sfep + sizeof(*sfep)) >= endp) 687 - return -EFSCORRUPTED; 689 + return __this_address; 688 690 689 691 /* Don't allow names with known bad length. */ 690 692 if (sfep->namelen == 0) 691 - return -EFSCORRUPTED; 693 + return __this_address; 692 694 693 695 /* 694 696 * Check that the variable-length part of the structure is ··· 697 699 */ 698 700 next_sfep = dops->sf_nextentry(sfp, sfep); 699 701 if (endp < (char *)next_sfep) 700 - return -EFSCORRUPTED; 702 + return __this_address; 701 703 702 704 /* Check that the offsets always increase. */ 703 705 if (xfs_dir2_sf_get_offset(sfep) < offset) 704 - return -EFSCORRUPTED; 706 + return __this_address; 705 707 706 708 /* Check the inode number. */ 707 709 ino = dops->sf_get_ino(sfp, sfep); 708 710 i8count += ino > XFS_DIR2_MAX_SHORT_INUM; 709 711 error = xfs_dir_ino_validate(mp, ino); 710 712 if (error) 711 - return error; 713 + return __this_address; 712 714 713 715 /* Check the file type. */ 714 716 filetype = dops->sf_get_ftype(sfep); 715 717 if (filetype >= XFS_DIR3_FT_MAX) 716 - return -EFSCORRUPTED; 718 + return __this_address; 717 719 718 720 offset = xfs_dir2_sf_get_offset(sfep) + 719 721 dops->data_entsize(sfep->namelen); ··· 721 723 sfep = next_sfep; 722 724 } 723 725 if (i8count != sfp->i8count) 724 - return -EFSCORRUPTED; 726 + return __this_address; 725 727 if ((void *)sfep != (void *)endp) 726 - return -EFSCORRUPTED; 728 + return __this_address; 727 729 728 730 /* Make sure this whole thing ought to be in local format. */ 729 731 if (offset + (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) + 730 732 (uint)sizeof(xfs_dir2_block_tail_t) > mp->m_dir_geo->blksize) 731 - return -EFSCORRUPTED; 733 + return __this_address; 732 734 733 - return 0; 735 + return NULL; 734 736 } 735 737 736 738 /*
+75 -99
fs/xfs/libxfs/xfs_dquot_buf.c
··· 42 42 /* 43 43 * Do some primitive error checking on ondisk dquot data structures. 44 44 */ 45 - int 46 - xfs_dqcheck( 45 + xfs_failaddr_t 46 + xfs_dquot_verify( 47 47 struct xfs_mount *mp, 48 48 xfs_disk_dquot_t *ddq, 49 49 xfs_dqid_t id, 50 50 uint type, /* used only when IO_dorepair is true */ 51 - uint flags, 52 - const char *str) 51 + uint flags) 53 52 { 54 - xfs_dqblk_t *d = (xfs_dqblk_t *)ddq; 55 - int errs = 0; 56 - 57 53 /* 58 54 * We can encounter an uninitialized dquot buffer for 2 reasons: 59 55 * 1. If we crash while deleting the quotainode(s), and those blks got ··· 65 69 * This is all fine; things are still consistent, and we haven't lost 66 70 * any quota information. Just don't complain about bad dquot blks. 67 71 */ 68 - if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) { 69 - if (flags & XFS_QMOPT_DOWARN) 70 - xfs_alert(mp, 71 - "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", 72 - str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC); 73 - errs++; 74 - } 75 - if (ddq->d_version != XFS_DQUOT_VERSION) { 76 - if (flags & XFS_QMOPT_DOWARN) 77 - xfs_alert(mp, 78 - "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x", 79 - str, id, ddq->d_version, XFS_DQUOT_VERSION); 80 - errs++; 81 - } 72 + if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) 73 + return __this_address; 74 + if (ddq->d_version != XFS_DQUOT_VERSION) 75 + return __this_address; 82 76 83 77 if (ddq->d_flags != XFS_DQ_USER && 84 78 ddq->d_flags != XFS_DQ_PROJ && 85 - ddq->d_flags != XFS_DQ_GROUP) { 86 - if (flags & XFS_QMOPT_DOWARN) 87 - xfs_alert(mp, 88 - "%s : XFS dquot ID 0x%x, unknown flags 0x%x", 89 - str, id, ddq->d_flags); 90 - errs++; 91 - } 79 + ddq->d_flags != XFS_DQ_GROUP) 80 + return __this_address; 92 81 93 - if (id != -1 && id != be32_to_cpu(ddq->d_id)) { 94 - if (flags & XFS_QMOPT_DOWARN) 95 - xfs_alert(mp, 96 - "%s : ondisk-dquot 0x%p, ID mismatch: " 97 - "0x%x expected, found id 0x%x", 98 - str, ddq, id, be32_to_cpu(ddq->d_id)); 99 - errs++; 100 - } 82 + if (id != -1 && id != be32_to_cpu(ddq->d_id)) 83 + return __this_address; 101 84 102 - if (!errs && ddq->d_id) { 103 - if (ddq->d_blk_softlimit && 104 - be64_to_cpu(ddq->d_bcount) > 105 - be64_to_cpu(ddq->d_blk_softlimit)) { 106 - if (!ddq->d_btimer) { 107 - if (flags & XFS_QMOPT_DOWARN) 108 - xfs_alert(mp, 109 - "%s : Dquot ID 0x%x (0x%p) BLK TIMER NOT STARTED", 110 - str, (int)be32_to_cpu(ddq->d_id), ddq); 111 - errs++; 112 - } 113 - } 114 - if (ddq->d_ino_softlimit && 115 - be64_to_cpu(ddq->d_icount) > 116 - be64_to_cpu(ddq->d_ino_softlimit)) { 117 - if (!ddq->d_itimer) { 118 - if (flags & XFS_QMOPT_DOWARN) 119 - xfs_alert(mp, 120 - "%s : Dquot ID 0x%x (0x%p) INODE TIMER NOT STARTED", 121 - str, (int)be32_to_cpu(ddq->d_id), ddq); 122 - errs++; 123 - } 124 - } 125 - if (ddq->d_rtb_softlimit && 126 - be64_to_cpu(ddq->d_rtbcount) > 127 - be64_to_cpu(ddq->d_rtb_softlimit)) { 128 - if (!ddq->d_rtbtimer) { 129 - if (flags & XFS_QMOPT_DOWARN) 130 - xfs_alert(mp, 131 - "%s : Dquot ID 0x%x (0x%p) RTBLK TIMER NOT STARTED", 132 - str, (int)be32_to_cpu(ddq->d_id), ddq); 133 - errs++; 134 - } 135 - } 136 - } 85 + if (!ddq->d_id) 86 + return NULL; 137 87 138 - if (!errs || !(flags & XFS_QMOPT_DQREPAIR)) 139 - return errs; 88 + if (ddq->d_blk_softlimit && 89 + be64_to_cpu(ddq->d_bcount) > be64_to_cpu(ddq->d_blk_softlimit) && 90 + !ddq->d_btimer) 91 + return __this_address; 140 92 141 - if (flags & XFS_QMOPT_DOWARN) 142 - xfs_notice(mp, "Re-initializing dquot ID 0x%x", id); 93 + if (ddq->d_ino_softlimit && 94 + be64_to_cpu(ddq->d_icount) > be64_to_cpu(ddq->d_ino_softlimit) && 95 + !ddq->d_itimer) 96 + return __this_address; 97 + 98 + if (ddq->d_rtb_softlimit && 99 + be64_to_cpu(ddq->d_rtbcount) > be64_to_cpu(ddq->d_rtb_softlimit) && 100 + !ddq->d_rtbtimer) 101 + return __this_address; 102 + 103 + return NULL; 104 + } 105 + 106 + /* 107 + * Do some primitive error checking on ondisk dquot data structures. 108 + */ 109 + int 110 + xfs_dquot_repair( 111 + struct xfs_mount *mp, 112 + struct xfs_disk_dquot *ddq, 113 + xfs_dqid_t id, 114 + uint type) 115 + { 116 + struct xfs_dqblk *d = (struct xfs_dqblk *)ddq; 117 + 143 118 144 119 /* 145 120 * Typically, a repair is only requested by quotacheck. 146 121 */ 147 122 ASSERT(id != -1); 148 - ASSERT(flags & XFS_QMOPT_DQREPAIR); 149 123 memset(d, 0, sizeof(xfs_dqblk_t)); 150 124 151 125 d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); ··· 129 163 XFS_DQUOT_CRC_OFF); 130 164 } 131 165 132 - return errs; 166 + return 0; 133 167 } 134 168 135 169 STATIC bool ··· 164 198 return true; 165 199 } 166 200 167 - STATIC bool 201 + STATIC xfs_failaddr_t 168 202 xfs_dquot_buf_verify( 169 203 struct xfs_mount *mp, 170 - struct xfs_buf *bp, 171 - int warn) 204 + struct xfs_buf *bp) 172 205 { 173 206 struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr; 207 + xfs_failaddr_t fa; 174 208 xfs_dqid_t id = 0; 175 209 int ndquots; 176 210 int i; ··· 194 228 */ 195 229 for (i = 0; i < ndquots; i++) { 196 230 struct xfs_disk_dquot *ddq; 197 - int error; 198 231 199 232 ddq = &d[i].dd_diskdq; 200 233 201 234 if (i == 0) 202 235 id = be32_to_cpu(ddq->d_id); 203 236 204 - error = xfs_dqcheck(mp, ddq, id + i, 0, warn, __func__); 205 - if (error) 206 - return false; 237 + fa = xfs_dquot_verify(mp, ddq, id + i, 0, 0); 238 + if (fa) 239 + return fa; 207 240 } 208 - return true; 241 + 242 + return NULL; 243 + } 244 + 245 + static xfs_failaddr_t 246 + xfs_dquot_buf_verify_struct( 247 + struct xfs_buf *bp) 248 + { 249 + struct xfs_mount *mp = bp->b_target->bt_mount; 250 + 251 + return xfs_dquot_buf_verify(mp, bp); 209 252 } 210 253 211 254 static void 212 255 xfs_dquot_buf_read_verify( 213 - struct xfs_buf *bp) 256 + struct xfs_buf *bp) 214 257 { 215 258 struct xfs_mount *mp = bp->b_target->bt_mount; 259 + xfs_failaddr_t fa; 216 260 217 261 if (!xfs_dquot_buf_verify_crc(mp, bp)) 218 - xfs_buf_ioerror(bp, -EFSBADCRC); 219 - else if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN)) 220 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 221 - 222 - if (bp->b_error) 223 - xfs_verifier_error(bp); 262 + xfs_verifier_error(bp, -EFSBADCRC, __this_address); 263 + else { 264 + fa = xfs_dquot_buf_verify(mp, bp); 265 + if (fa) 266 + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); 267 + } 224 268 } 225 269 226 270 /* ··· 246 270 struct xfs_mount *mp = bp->b_target->bt_mount; 247 271 248 272 if (!xfs_dquot_buf_verify_crc(mp, bp) || 249 - !xfs_dquot_buf_verify(mp, bp, 0)) { 273 + xfs_dquot_buf_verify(mp, bp) != NULL) { 250 274 xfs_buf_ioerror(bp, -EIO); 251 275 bp->b_flags &= ~XBF_DONE; 252 276 } ··· 259 283 */ 260 284 static void 261 285 xfs_dquot_buf_write_verify( 262 - struct xfs_buf *bp) 286 + struct xfs_buf *bp) 263 287 { 264 288 struct xfs_mount *mp = bp->b_target->bt_mount; 289 + xfs_failaddr_t fa; 265 290 266 - if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN)) { 267 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 268 - xfs_verifier_error(bp); 269 - return; 270 - } 291 + fa = xfs_dquot_buf_verify(mp, bp); 292 + if (fa) 293 + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); 271 294 } 272 295 273 296 const struct xfs_buf_ops xfs_dquot_buf_ops = { 274 297 .name = "xfs_dquot", 275 298 .verify_read = xfs_dquot_buf_read_verify, 276 299 .verify_write = xfs_dquot_buf_write_verify, 300 + .verify_struct = xfs_dquot_buf_verify_struct, 277 301 }; 278 302 279 303 const struct xfs_buf_ops xfs_dquot_buf_ra_ops = {
+7
fs/xfs/libxfs/xfs_fs.h
··· 233 233 #define XFS_MAX_LOG_BLOCKS (1024 * 1024ULL) 234 234 #define XFS_MIN_LOG_BYTES (10 * 1024 * 1024ULL) 235 235 236 + /* 237 + * Limits on sb_agblocks/sb_agblklog -- mkfs won't format AGs smaller than 238 + * 16MB or larger than 1TB. 239 + */ 240 + #define XFS_MIN_AG_BYTES (1ULL << 24) /* 16 MB */ 241 + #define XFS_MAX_AG_BYTES (1ULL << 40) /* 1 TB */ 242 + 236 243 /* keep the maximum size under 2^31 by a small amount */ 237 244 #define XFS_MAX_LOG_BYTES \ 238 245 ((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES)
+122 -21
fs/xfs/libxfs/xfs_ialloc.c
··· 2491 2491 #define xfs_check_agi_unlinked(agi) 2492 2492 #endif 2493 2493 2494 - static bool 2494 + static xfs_failaddr_t 2495 2495 xfs_agi_verify( 2496 2496 struct xfs_buf *bp) 2497 2497 { ··· 2500 2500 2501 2501 if (xfs_sb_version_hascrc(&mp->m_sb)) { 2502 2502 if (!uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid)) 2503 - return false; 2503 + return __this_address; 2504 2504 if (!xfs_log_check_lsn(mp, 2505 2505 be64_to_cpu(XFS_BUF_TO_AGI(bp)->agi_lsn))) 2506 - return false; 2506 + return __this_address; 2507 2507 } 2508 2508 2509 2509 /* 2510 2510 * Validate the magic number of the agi block. 2511 2511 */ 2512 2512 if (agi->agi_magicnum != cpu_to_be32(XFS_AGI_MAGIC)) 2513 - return false; 2513 + return __this_address; 2514 2514 if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum))) 2515 - return false; 2515 + return __this_address; 2516 2516 2517 2517 if (be32_to_cpu(agi->agi_level) < 1 || 2518 2518 be32_to_cpu(agi->agi_level) > XFS_BTREE_MAXLEVELS) 2519 - return false; 2519 + return __this_address; 2520 2520 2521 2521 if (xfs_sb_version_hasfinobt(&mp->m_sb) && 2522 2522 (be32_to_cpu(agi->agi_free_level) < 1 || 2523 2523 be32_to_cpu(agi->agi_free_level) > XFS_BTREE_MAXLEVELS)) 2524 - return false; 2524 + return __this_address; 2525 2525 2526 2526 /* 2527 2527 * during growfs operations, the perag is not fully initialised, ··· 2530 2530 * so we can detect and avoid this problem. 2531 2531 */ 2532 2532 if (bp->b_pag && be32_to_cpu(agi->agi_seqno) != bp->b_pag->pag_agno) 2533 - return false; 2533 + return __this_address; 2534 2534 2535 2535 xfs_check_agi_unlinked(agi); 2536 - return true; 2536 + return NULL; 2537 2537 } 2538 2538 2539 2539 static void ··· 2541 2541 struct xfs_buf *bp) 2542 2542 { 2543 2543 struct xfs_mount *mp = bp->b_target->bt_mount; 2544 + xfs_failaddr_t fa; 2544 2545 2545 2546 if (xfs_sb_version_hascrc(&mp->m_sb) && 2546 2547 !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF)) 2547 - xfs_buf_ioerror(bp, -EFSBADCRC); 2548 - else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp, 2549 - XFS_ERRTAG_IALLOC_READ_AGI)) 2550 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 2551 - 2552 - if (bp->b_error) 2553 - xfs_verifier_error(bp); 2548 + xfs_verifier_error(bp, -EFSBADCRC, __this_address); 2549 + else { 2550 + fa = xfs_agi_verify(bp); 2551 + if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_IALLOC_READ_AGI)) 2552 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 2553 + } 2554 2554 } 2555 2555 2556 2556 static void 2557 2557 xfs_agi_write_verify( 2558 2558 struct xfs_buf *bp) 2559 2559 { 2560 - struct xfs_mount *mp = bp->b_target->bt_mount; 2561 - struct xfs_buf_log_item *bip = bp->b_fspriv; 2560 + struct xfs_mount *mp = bp->b_target->bt_mount; 2561 + struct xfs_buf_log_item *bip = bp->b_log_item; 2562 + xfs_failaddr_t fa; 2562 2563 2563 - if (!xfs_agi_verify(bp)) { 2564 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 2565 - xfs_verifier_error(bp); 2564 + fa = xfs_agi_verify(bp); 2565 + if (fa) { 2566 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 2566 2567 return; 2567 2568 } 2568 2569 ··· 2579 2578 .name = "xfs_agi", 2580 2579 .verify_read = xfs_agi_read_verify, 2581 2580 .verify_write = xfs_agi_write_verify, 2581 + .verify_struct = xfs_agi_verify, 2582 2582 }; 2583 2583 2584 2584 /* ··· 2752 2750 if (xfs_internal_inum(mp, ino)) 2753 2751 return false; 2754 2752 return xfs_verify_ino(mp, ino); 2753 + } 2754 + 2755 + /* Is there an inode record covering a given range of inode numbers? */ 2756 + int 2757 + xfs_ialloc_has_inode_record( 2758 + struct xfs_btree_cur *cur, 2759 + xfs_agino_t low, 2760 + xfs_agino_t high, 2761 + bool *exists) 2762 + { 2763 + struct xfs_inobt_rec_incore irec; 2764 + xfs_agino_t agino; 2765 + uint16_t holemask; 2766 + int has_record; 2767 + int i; 2768 + int error; 2769 + 2770 + *exists = false; 2771 + error = xfs_inobt_lookup(cur, low, XFS_LOOKUP_LE, &has_record); 2772 + while (error == 0 && has_record) { 2773 + error = xfs_inobt_get_rec(cur, &irec, &has_record); 2774 + if (error || irec.ir_startino > high) 2775 + break; 2776 + 2777 + agino = irec.ir_startino; 2778 + holemask = irec.ir_holemask; 2779 + for (i = 0; i < XFS_INOBT_HOLEMASK_BITS; holemask >>= 1, 2780 + i++, agino += XFS_INODES_PER_HOLEMASK_BIT) { 2781 + if (holemask & 1) 2782 + continue; 2783 + if (agino + XFS_INODES_PER_HOLEMASK_BIT > low && 2784 + agino <= high) { 2785 + *exists = true; 2786 + return 0; 2787 + } 2788 + } 2789 + 2790 + error = xfs_btree_increment(cur, 0, &has_record); 2791 + } 2792 + return error; 2793 + } 2794 + 2795 + /* Is there an inode record covering a given extent? */ 2796 + int 2797 + xfs_ialloc_has_inodes_at_extent( 2798 + struct xfs_btree_cur *cur, 2799 + xfs_agblock_t bno, 2800 + xfs_extlen_t len, 2801 + bool *exists) 2802 + { 2803 + xfs_agino_t low; 2804 + xfs_agino_t high; 2805 + 2806 + low = XFS_OFFBNO_TO_AGINO(cur->bc_mp, bno, 0); 2807 + high = XFS_OFFBNO_TO_AGINO(cur->bc_mp, bno + len, 0) - 1; 2808 + 2809 + return xfs_ialloc_has_inode_record(cur, low, high, exists); 2810 + } 2811 + 2812 + struct xfs_ialloc_count_inodes { 2813 + xfs_agino_t count; 2814 + xfs_agino_t freecount; 2815 + }; 2816 + 2817 + /* Record inode counts across all inobt records. */ 2818 + STATIC int 2819 + xfs_ialloc_count_inodes_rec( 2820 + struct xfs_btree_cur *cur, 2821 + union xfs_btree_rec *rec, 2822 + void *priv) 2823 + { 2824 + struct xfs_inobt_rec_incore irec; 2825 + struct xfs_ialloc_count_inodes *ci = priv; 2826 + 2827 + xfs_inobt_btrec_to_irec(cur->bc_mp, rec, &irec); 2828 + ci->count += irec.ir_count; 2829 + ci->freecount += irec.ir_freecount; 2830 + 2831 + return 0; 2832 + } 2833 + 2834 + /* Count allocated and free inodes under an inobt. */ 2835 + int 2836 + xfs_ialloc_count_inodes( 2837 + struct xfs_btree_cur *cur, 2838 + xfs_agino_t *count, 2839 + xfs_agino_t *freecount) 2840 + { 2841 + struct xfs_ialloc_count_inodes ci = {0}; 2842 + int error; 2843 + 2844 + ASSERT(cur->bc_btnum == XFS_BTNUM_INO); 2845 + error = xfs_btree_query_all(cur, xfs_ialloc_count_inodes_rec, &ci); 2846 + if (error) 2847 + return error; 2848 + 2849 + *count = ci.count; 2850 + *freecount = ci.freecount; 2851 + return 0; 2755 2852 }
+6
fs/xfs/libxfs/xfs_ialloc.h
··· 170 170 union xfs_btree_rec; 171 171 void xfs_inobt_btrec_to_irec(struct xfs_mount *mp, union xfs_btree_rec *rec, 172 172 struct xfs_inobt_rec_incore *irec); 173 + int xfs_ialloc_has_inodes_at_extent(struct xfs_btree_cur *cur, 174 + xfs_agblock_t bno, xfs_extlen_t len, bool *exists); 175 + int xfs_ialloc_has_inode_record(struct xfs_btree_cur *cur, xfs_agino_t low, 176 + xfs_agino_t high, bool *exists); 177 + int xfs_ialloc_count_inodes(struct xfs_btree_cur *cur, xfs_agino_t *count, 178 + xfs_agino_t *freecount); 173 179 174 180 int xfs_ialloc_cluster_alignment(struct xfs_mount *mp); 175 181 void xfs_ialloc_agino_range(struct xfs_mount *mp, xfs_agnumber_t agno,
+50 -21
fs/xfs/libxfs/xfs_ialloc_btree.c
··· 141 141 union xfs_btree_ptr *new, 142 142 int *stat) 143 143 { 144 + if (cur->bc_mp->m_inotbt_nores) 145 + return xfs_inobt_alloc_block(cur, start, new, stat); 144 146 return __xfs_inobt_alloc_block(cur, start, new, stat, 145 147 XFS_AG_RESV_METADATA); 148 + } 149 + 150 + STATIC int 151 + __xfs_inobt_free_block( 152 + struct xfs_btree_cur *cur, 153 + struct xfs_buf *bp, 154 + enum xfs_ag_resv_type resv) 155 + { 156 + struct xfs_owner_info oinfo; 157 + 158 + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT); 159 + return xfs_free_extent(cur->bc_tp, 160 + XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1, 161 + &oinfo, resv); 146 162 } 147 163 148 164 STATIC int ··· 166 150 struct xfs_btree_cur *cur, 167 151 struct xfs_buf *bp) 168 152 { 169 - struct xfs_owner_info oinfo; 153 + return __xfs_inobt_free_block(cur, bp, XFS_AG_RESV_NONE); 154 + } 170 155 171 - xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT); 172 - return xfs_free_extent(cur->bc_tp, 173 - XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1, 174 - &oinfo, XFS_AG_RESV_NONE); 156 + STATIC int 157 + xfs_finobt_free_block( 158 + struct xfs_btree_cur *cur, 159 + struct xfs_buf *bp) 160 + { 161 + if (cur->bc_mp->m_inotbt_nores) 162 + return xfs_inobt_free_block(cur, bp); 163 + return __xfs_inobt_free_block(cur, bp, XFS_AG_RESV_METADATA); 175 164 } 176 165 177 166 STATIC int ··· 271 250 be32_to_cpu(k2->inobt.ir_startino); 272 251 } 273 252 274 - static int 253 + static xfs_failaddr_t 275 254 xfs_inobt_verify( 276 255 struct xfs_buf *bp) 277 256 { 278 257 struct xfs_mount *mp = bp->b_target->bt_mount; 279 258 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 259 + xfs_failaddr_t fa; 280 260 unsigned int level; 281 261 282 262 /* ··· 293 271 switch (block->bb_magic) { 294 272 case cpu_to_be32(XFS_IBT_CRC_MAGIC): 295 273 case cpu_to_be32(XFS_FIBT_CRC_MAGIC): 296 - if (!xfs_btree_sblock_v5hdr_verify(bp)) 297 - return false; 274 + fa = xfs_btree_sblock_v5hdr_verify(bp); 275 + if (fa) 276 + return fa; 298 277 /* fall through */ 299 278 case cpu_to_be32(XFS_IBT_MAGIC): 300 279 case cpu_to_be32(XFS_FIBT_MAGIC): 301 280 break; 302 281 default: 303 - return 0; 282 + return NULL; 304 283 } 305 284 306 285 /* level verification */ 307 286 level = be16_to_cpu(block->bb_level); 308 287 if (level >= mp->m_in_maxlevels) 309 - return false; 288 + return __this_address; 310 289 311 290 return xfs_btree_sblock_verify(bp, mp->m_inobt_mxr[level != 0]); 312 291 } ··· 316 293 xfs_inobt_read_verify( 317 294 struct xfs_buf *bp) 318 295 { 319 - if (!xfs_btree_sblock_verify_crc(bp)) 320 - xfs_buf_ioerror(bp, -EFSBADCRC); 321 - else if (!xfs_inobt_verify(bp)) 322 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 296 + xfs_failaddr_t fa; 323 297 324 - if (bp->b_error) { 325 - trace_xfs_btree_corrupt(bp, _RET_IP_); 326 - xfs_verifier_error(bp); 298 + if (!xfs_btree_sblock_verify_crc(bp)) 299 + xfs_verifier_error(bp, -EFSBADCRC, __this_address); 300 + else { 301 + fa = xfs_inobt_verify(bp); 302 + if (fa) 303 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 327 304 } 305 + 306 + if (bp->b_error) 307 + trace_xfs_btree_corrupt(bp, _RET_IP_); 328 308 } 329 309 330 310 static void 331 311 xfs_inobt_write_verify( 332 312 struct xfs_buf *bp) 333 313 { 334 - if (!xfs_inobt_verify(bp)) { 314 + xfs_failaddr_t fa; 315 + 316 + fa = xfs_inobt_verify(bp); 317 + if (fa) { 335 318 trace_xfs_btree_corrupt(bp, _RET_IP_); 336 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 337 - xfs_verifier_error(bp); 319 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 338 320 return; 339 321 } 340 322 xfs_btree_sblock_calc_crc(bp); ··· 350 322 .name = "xfs_inobt", 351 323 .verify_read = xfs_inobt_read_verify, 352 324 .verify_write = xfs_inobt_write_verify, 325 + .verify_struct = xfs_inobt_verify, 353 326 }; 354 327 355 328 STATIC int ··· 401 372 .dup_cursor = xfs_inobt_dup_cursor, 402 373 .set_root = xfs_finobt_set_root, 403 374 .alloc_block = xfs_finobt_alloc_block, 404 - .free_block = xfs_inobt_free_block, 375 + .free_block = xfs_finobt_free_block, 405 376 .get_minrecs = xfs_inobt_get_minrecs, 406 377 .get_maxrecs = xfs_inobt_get_maxrecs, 407 378 .init_key_from_rec = xfs_inobt_init_key_from_rec,
+98 -30
fs/xfs/libxfs/xfs_inode_buf.c
··· 115 115 return; 116 116 } 117 117 118 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 119 - xfs_verifier_error(bp); 118 + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); 120 119 #ifdef DEBUG 121 120 xfs_alert(mp, 122 121 "bad inode magic/vsn daddr %lld #%d (magic=%x)", ··· 383 384 } 384 385 } 385 386 386 - bool 387 + xfs_failaddr_t 387 388 xfs_dinode_verify( 388 389 struct xfs_mount *mp, 389 390 xfs_ino_t ino, ··· 392 393 uint16_t mode; 393 394 uint16_t flags; 394 395 uint64_t flags2; 396 + uint64_t di_size; 395 397 396 398 if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) 397 - return false; 399 + return __this_address; 400 + 401 + /* Verify v3 integrity information first */ 402 + if (dip->di_version >= 3) { 403 + if (!xfs_sb_version_hascrc(&mp->m_sb)) 404 + return __this_address; 405 + if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, 406 + XFS_DINODE_CRC_OFF)) 407 + return __this_address; 408 + if (be64_to_cpu(dip->di_ino) != ino) 409 + return __this_address; 410 + if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid)) 411 + return __this_address; 412 + } 398 413 399 414 /* don't allow invalid i_size */ 400 - if (be64_to_cpu(dip->di_size) & (1ULL << 63)) 401 - return false; 415 + di_size = be64_to_cpu(dip->di_size); 416 + if (di_size & (1ULL << 63)) 417 + return __this_address; 402 418 403 419 mode = be16_to_cpu(dip->di_mode); 404 420 if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN) 405 - return false; 421 + return __this_address; 406 422 407 423 /* No zero-length symlinks/dirs. */ 408 - if ((S_ISLNK(mode) || S_ISDIR(mode)) && dip->di_size == 0) 409 - return false; 424 + if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0) 425 + return __this_address; 426 + 427 + /* Fork checks carried over from xfs_iformat_fork */ 428 + if (mode && 429 + be32_to_cpu(dip->di_nextents) + be16_to_cpu(dip->di_anextents) > 430 + be64_to_cpu(dip->di_nblocks)) 431 + return __this_address; 432 + 433 + if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize) 434 + return __this_address; 435 + 436 + flags = be16_to_cpu(dip->di_flags); 437 + 438 + if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp) 439 + return __this_address; 440 + 441 + /* Do we have appropriate data fork formats for the mode? */ 442 + switch (mode & S_IFMT) { 443 + case S_IFIFO: 444 + case S_IFCHR: 445 + case S_IFBLK: 446 + case S_IFSOCK: 447 + if (dip->di_format != XFS_DINODE_FMT_DEV) 448 + return __this_address; 449 + break; 450 + case S_IFREG: 451 + case S_IFLNK: 452 + case S_IFDIR: 453 + switch (dip->di_format) { 454 + case XFS_DINODE_FMT_LOCAL: 455 + /* 456 + * no local regular files yet 457 + */ 458 + if (S_ISREG(mode)) 459 + return __this_address; 460 + if (di_size > XFS_DFORK_DSIZE(dip, mp)) 461 + return __this_address; 462 + /* fall through */ 463 + case XFS_DINODE_FMT_EXTENTS: 464 + case XFS_DINODE_FMT_BTREE: 465 + break; 466 + default: 467 + return __this_address; 468 + } 469 + break; 470 + case 0: 471 + /* Uninitialized inode ok. */ 472 + break; 473 + default: 474 + return __this_address; 475 + } 476 + 477 + if (XFS_DFORK_Q(dip)) { 478 + switch (dip->di_aformat) { 479 + case XFS_DINODE_FMT_LOCAL: 480 + case XFS_DINODE_FMT_EXTENTS: 481 + case XFS_DINODE_FMT_BTREE: 482 + break; 483 + default: 484 + return __this_address; 485 + } 486 + } 410 487 411 488 /* only version 3 or greater inodes are extensively verified here */ 412 489 if (dip->di_version < 3) 413 - return true; 490 + return NULL; 414 491 415 - if (!xfs_sb_version_hascrc(&mp->m_sb)) 416 - return false; 417 - if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, 418 - XFS_DINODE_CRC_OFF)) 419 - return false; 420 - if (be64_to_cpu(dip->di_ino) != ino) 421 - return false; 422 - if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid)) 423 - return false; 424 - 425 - flags = be16_to_cpu(dip->di_flags); 426 492 flags2 = be64_to_cpu(dip->di_flags2); 427 493 428 494 /* don't allow reflink/cowextsize if we don't have reflink */ 429 495 if ((flags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)) && 430 496 !xfs_sb_version_hasreflink(&mp->m_sb)) 431 - return false; 497 + return __this_address; 498 + 499 + /* only regular files get reflink */ 500 + if ((flags2 & XFS_DIFLAG2_REFLINK) && (mode & S_IFMT) != S_IFREG) 501 + return __this_address; 432 502 433 503 /* don't let reflink and realtime mix */ 434 504 if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME)) 435 - return false; 505 + return __this_address; 436 506 437 507 /* don't let reflink and dax mix */ 438 508 if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags2 & XFS_DIFLAG2_DAX)) 439 - return false; 509 + return __this_address; 440 510 441 - return true; 511 + return NULL; 442 512 } 443 513 444 514 void ··· 547 479 { 548 480 xfs_buf_t *bp; 549 481 xfs_dinode_t *dip; 482 + xfs_failaddr_t fa; 550 483 int error; 551 484 552 485 /* ··· 579 510 return error; 580 511 581 512 /* even unallocated inodes are verified */ 582 - if (!xfs_dinode_verify(mp, ip->i_ino, dip)) { 583 - xfs_alert(mp, "%s: validation failed for inode %lld", 584 - __func__, ip->i_ino); 585 - 586 - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip); 513 + fa = xfs_dinode_verify(mp, ip->i_ino, dip); 514 + if (fa) { 515 + xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", dip, 516 + sizeof(*dip), fa); 587 517 error = -EFSCORRUPTED; 588 518 goto out_brelse; 589 519 }
+2 -2
fs/xfs/libxfs/xfs_inode_buf.h
··· 82 82 #define xfs_inobp_check(mp, bp) 83 83 #endif /* DEBUG */ 84 84 85 - bool xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino, 86 - struct xfs_dinode *dip); 85 + xfs_failaddr_t xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino, 86 + struct xfs_dinode *dip); 87 87 88 88 #endif /* __XFS_INODE_BUF_H__ */
+45 -107
fs/xfs/libxfs/xfs_inode_fork.c
··· 35 35 #include "xfs_da_format.h" 36 36 #include "xfs_da_btree.h" 37 37 #include "xfs_dir2_priv.h" 38 + #include "xfs_attr_leaf.h" 39 + #include "xfs_shared.h" 38 40 39 41 kmem_zone_t *xfs_ifork_zone; 40 42 ··· 64 62 int error = 0; 65 63 xfs_fsize_t di_size; 66 64 67 - if (unlikely(be32_to_cpu(dip->di_nextents) + 68 - be16_to_cpu(dip->di_anextents) > 69 - be64_to_cpu(dip->di_nblocks))) { 70 - xfs_warn(ip->i_mount, 71 - "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.", 72 - (unsigned long long)ip->i_ino, 73 - (int)(be32_to_cpu(dip->di_nextents) + 74 - be16_to_cpu(dip->di_anextents)), 75 - (unsigned long long) 76 - be64_to_cpu(dip->di_nblocks)); 77 - XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW, 78 - ip->i_mount, dip); 79 - return -EFSCORRUPTED; 80 - } 81 - 82 - if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) { 83 - xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.", 84 - (unsigned long long)ip->i_ino, 85 - dip->di_forkoff); 86 - XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, 87 - ip->i_mount, dip); 88 - return -EFSCORRUPTED; 89 - } 90 - 91 - if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && 92 - !ip->i_mount->m_rtdev_targp)) { 93 - xfs_warn(ip->i_mount, 94 - "corrupt dinode %Lu, has realtime flag set.", 95 - ip->i_ino); 96 - XFS_CORRUPTION_ERROR("xfs_iformat(realtime)", 97 - XFS_ERRLEVEL_LOW, ip->i_mount, dip); 98 - return -EFSCORRUPTED; 99 - } 100 - 101 - if (unlikely(xfs_is_reflink_inode(ip) && !S_ISREG(inode->i_mode))) { 102 - xfs_warn(ip->i_mount, 103 - "corrupt dinode %llu, wrong file type for reflink.", 104 - ip->i_ino); 105 - XFS_CORRUPTION_ERROR("xfs_iformat(reflink)", 106 - XFS_ERRLEVEL_LOW, ip->i_mount, dip); 107 - return -EFSCORRUPTED; 108 - } 109 - 110 - if (unlikely(xfs_is_reflink_inode(ip) && 111 - (ip->i_d.di_flags & XFS_DIFLAG_REALTIME))) { 112 - xfs_warn(ip->i_mount, 113 - "corrupt dinode %llu, has reflink+realtime flag set.", 114 - ip->i_ino); 115 - XFS_CORRUPTION_ERROR("xfs_iformat(reflink)", 116 - XFS_ERRLEVEL_LOW, ip->i_mount, dip); 117 - return -EFSCORRUPTED; 118 - } 119 - 120 65 switch (inode->i_mode & S_IFMT) { 121 66 case S_IFIFO: 122 67 case S_IFCHR: 123 68 case S_IFBLK: 124 69 case S_IFSOCK: 125 - if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) { 126 - XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW, 127 - ip->i_mount, dip); 128 - return -EFSCORRUPTED; 129 - } 130 70 ip->i_d.di_size = 0; 131 71 inode->i_rdev = xfs_to_linux_dev_t(xfs_dinode_get_rdev(dip)); 132 72 break; ··· 78 134 case S_IFDIR: 79 135 switch (dip->di_format) { 80 136 case XFS_DINODE_FMT_LOCAL: 81 - /* 82 - * no local regular files yet 83 - */ 84 - if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) { 85 - xfs_warn(ip->i_mount, 86 - "corrupt inode %Lu (local format for regular file).", 87 - (unsigned long long) ip->i_ino); 88 - XFS_CORRUPTION_ERROR("xfs_iformat(4)", 89 - XFS_ERRLEVEL_LOW, 90 - ip->i_mount, dip); 91 - return -EFSCORRUPTED; 92 - } 93 - 94 137 di_size = be64_to_cpu(dip->di_size); 95 - if (unlikely(di_size < 0 || 96 - di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { 97 - xfs_warn(ip->i_mount, 98 - "corrupt inode %Lu (bad size %Ld for local inode).", 99 - (unsigned long long) ip->i_ino, 100 - (long long) di_size); 101 - XFS_CORRUPTION_ERROR("xfs_iformat(5)", 102 - XFS_ERRLEVEL_LOW, 103 - ip->i_mount, dip); 104 - return -EFSCORRUPTED; 105 - } 106 - 107 138 size = (int)di_size; 108 139 error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size); 109 140 break; ··· 89 170 error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK); 90 171 break; 91 172 default: 92 - XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW, 93 - ip->i_mount); 94 173 return -EFSCORRUPTED; 95 174 } 96 175 break; 97 176 98 177 default: 99 - XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount); 100 178 return -EFSCORRUPTED; 101 179 } 102 180 if (error) 103 181 return error; 104 - 105 - /* Check inline dir contents. */ 106 - if (S_ISDIR(inode->i_mode) && dip->di_format == XFS_DINODE_FMT_LOCAL) { 107 - error = xfs_dir2_sf_verify(ip); 108 - if (error) { 109 - xfs_idestroy_fork(ip, XFS_DATA_FORK); 110 - return error; 111 - } 112 - } 113 182 114 183 if (xfs_is_reflink_inode(ip)) { 115 184 ASSERT(ip->i_cowfp == NULL); ··· 114 207 case XFS_DINODE_FMT_LOCAL: 115 208 atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); 116 209 size = be16_to_cpu(atp->hdr.totsize); 117 - 118 - if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) { 119 - xfs_warn(ip->i_mount, 120 - "corrupt inode %Lu (bad attr fork size %Ld).", 121 - (unsigned long long) ip->i_ino, 122 - (long long) size); 123 - XFS_CORRUPTION_ERROR("xfs_iformat(8)", 124 - XFS_ERRLEVEL_LOW, 125 - ip->i_mount, dip); 126 - error = -EFSCORRUPTED; 127 - break; 128 - } 129 210 130 211 error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size); 131 212 break; ··· 298 403 */ 299 404 if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= 300 405 XFS_IFORK_MAXEXT(ip, whichfork) || 406 + nrecs == 0 || 301 407 XFS_BMDR_SPACE_CALC(nrecs) > 302 408 XFS_DFORK_SIZE(dip, mp, whichfork) || 303 409 XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks) || ··· 722 826 ip->i_cowfp->if_flags = XFS_IFEXTENTS; 723 827 ip->i_cformat = XFS_DINODE_FMT_EXTENTS; 724 828 ip->i_cnextents = 0; 829 + } 830 + 831 + /* Default fork content verifiers. */ 832 + struct xfs_ifork_ops xfs_default_ifork_ops = { 833 + .verify_attr = xfs_attr_shortform_verify, 834 + .verify_dir = xfs_dir2_sf_verify, 835 + .verify_symlink = xfs_symlink_shortform_verify, 836 + }; 837 + 838 + /* Verify the inline contents of the data fork of an inode. */ 839 + xfs_failaddr_t 840 + xfs_ifork_verify_data( 841 + struct xfs_inode *ip, 842 + struct xfs_ifork_ops *ops) 843 + { 844 + /* Non-local data fork, we're done. */ 845 + if (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) 846 + return NULL; 847 + 848 + /* Check the inline data fork if there is one. */ 849 + switch (VFS_I(ip)->i_mode & S_IFMT) { 850 + case S_IFDIR: 851 + return ops->verify_dir(ip); 852 + case S_IFLNK: 853 + return ops->verify_symlink(ip); 854 + default: 855 + return NULL; 856 + } 857 + } 858 + 859 + /* Verify the inline contents of the attr fork of an inode. */ 860 + xfs_failaddr_t 861 + xfs_ifork_verify_attr( 862 + struct xfs_inode *ip, 863 + struct xfs_ifork_ops *ops) 864 + { 865 + /* There has to be an attr fork allocated if aformat is local. */ 866 + if (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) 867 + return NULL; 868 + if (!XFS_IFORK_PTR(ip, XFS_ATTR_FORK)) 869 + return __this_address; 870 + return ops->verify_attr(ip); 725 871 }
+14
fs/xfs/libxfs/xfs_inode_fork.h
··· 186 186 187 187 extern void xfs_ifork_init_cow(struct xfs_inode *ip); 188 188 189 + typedef xfs_failaddr_t (*xfs_ifork_verifier_t)(struct xfs_inode *); 190 + 191 + struct xfs_ifork_ops { 192 + xfs_ifork_verifier_t verify_symlink; 193 + xfs_ifork_verifier_t verify_dir; 194 + xfs_ifork_verifier_t verify_attr; 195 + }; 196 + extern struct xfs_ifork_ops xfs_default_ifork_ops; 197 + 198 + xfs_failaddr_t xfs_ifork_verify_data(struct xfs_inode *ip, 199 + struct xfs_ifork_ops *ops); 200 + xfs_failaddr_t xfs_ifork_verify_attr(struct xfs_inode *ip, 201 + struct xfs_ifork_ops *ops); 202 + 189 203 #endif /* __XFS_INODE_FORK_H__ */
+1 -1
fs/xfs/libxfs/xfs_log_rlimit.c
··· 55 55 * the maximum one in terms of the pre-calculated values which were done 56 56 * at mount time. 57 57 */ 58 - STATIC void 58 + void 59 59 xfs_log_get_max_trans_res( 60 60 struct xfs_mount *mp, 61 61 struct xfs_trans_res *max_resp)
+5 -4
fs/xfs/libxfs/xfs_quota_defs.h
··· 112 112 #define XFS_QMOPT_PQUOTA 0x0000008 /* project dquot requested */ 113 113 #define XFS_QMOPT_FORCE_RES 0x0000010 /* ignore quota limits */ 114 114 #define XFS_QMOPT_SBVERSION 0x0000040 /* change superblock version num */ 115 - #define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */ 116 - #define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */ 117 115 #define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */ 118 116 #define XFS_QMOPT_ENOSPC 0x0004000 /* enospc instead of edquot (prj) */ 119 117 #define XFS_QMOPT_DQNEXT 0x0008000 /* return next dquot >= this ID */ ··· 151 153 (XFS_QMOPT_UQUOTA | XFS_QMOPT_PQUOTA | XFS_QMOPT_GQUOTA) 152 154 #define XFS_QMOPT_RESBLK_MASK (XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS) 153 155 154 - extern int xfs_dqcheck(struct xfs_mount *mp, xfs_disk_dquot_t *ddq, 155 - xfs_dqid_t id, uint type, uint flags, const char *str); 156 + extern xfs_failaddr_t xfs_dquot_verify(struct xfs_mount *mp, 157 + struct xfs_disk_dquot *ddq, xfs_dqid_t id, uint type, 158 + uint flags); 156 159 extern int xfs_calc_dquots_per_chunk(unsigned int nbblks); 160 + extern int xfs_dquot_repair(struct xfs_mount *mp, struct xfs_disk_dquot *ddq, 161 + xfs_dqid_t id, uint type); 157 162 158 163 #endif /* __XFS_QUOTA_H__ */
+19
fs/xfs/libxfs/xfs_refcount.c
··· 1696 1696 xfs_trans_brelse(tp, agbp); 1697 1697 goto out_trans; 1698 1698 } 1699 + 1700 + /* Is there a record covering a given extent? */ 1701 + int 1702 + xfs_refcount_has_record( 1703 + struct xfs_btree_cur *cur, 1704 + xfs_agblock_t bno, 1705 + xfs_extlen_t len, 1706 + bool *exists) 1707 + { 1708 + union xfs_btree_irec low; 1709 + union xfs_btree_irec high; 1710 + 1711 + memset(&low, 0, sizeof(low)); 1712 + low.rc.rc_startblock = bno; 1713 + memset(&high, 0xFF, sizeof(high)); 1714 + high.rc.rc_startblock = bno + len - 1; 1715 + 1716 + return xfs_btree_has_record(cur, &low, &high, exists); 1717 + }
+3
fs/xfs/libxfs/xfs_refcount.h
··· 83 83 return (log_res * 3 / 4) / XFS_REFCOUNT_ITEM_OVERHEAD; 84 84 } 85 85 86 + extern int xfs_refcount_has_record(struct xfs_btree_cur *cur, 87 + xfs_agblock_t bno, xfs_extlen_t len, bool *exists); 88 + 86 89 #endif /* __XFS_REFCOUNT_H__ */
+25 -17
fs/xfs/libxfs/xfs_refcount_btree.c
··· 223 223 be32_to_cpu(k2->refc.rc_startblock); 224 224 } 225 225 226 - STATIC bool 226 + STATIC xfs_failaddr_t 227 227 xfs_refcountbt_verify( 228 228 struct xfs_buf *bp) 229 229 { 230 230 struct xfs_mount *mp = bp->b_target->bt_mount; 231 231 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 232 232 struct xfs_perag *pag = bp->b_pag; 233 + xfs_failaddr_t fa; 233 234 unsigned int level; 234 235 235 236 if (block->bb_magic != cpu_to_be32(XFS_REFC_CRC_MAGIC)) 236 - return false; 237 + return __this_address; 237 238 238 239 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 239 - return false; 240 - if (!xfs_btree_sblock_v5hdr_verify(bp)) 241 - return false; 240 + return __this_address; 241 + fa = xfs_btree_sblock_v5hdr_verify(bp); 242 + if (fa) 243 + return fa; 242 244 243 245 level = be16_to_cpu(block->bb_level); 244 246 if (pag && pag->pagf_init) { 245 247 if (level >= pag->pagf_refcount_level) 246 - return false; 248 + return __this_address; 247 249 } else if (level >= mp->m_refc_maxlevels) 248 - return false; 250 + return __this_address; 249 251 250 252 return xfs_btree_sblock_verify(bp, mp->m_refc_mxr[level != 0]); 251 253 } ··· 256 254 xfs_refcountbt_read_verify( 257 255 struct xfs_buf *bp) 258 256 { 259 - if (!xfs_btree_sblock_verify_crc(bp)) 260 - xfs_buf_ioerror(bp, -EFSBADCRC); 261 - else if (!xfs_refcountbt_verify(bp)) 262 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 257 + xfs_failaddr_t fa; 263 258 264 - if (bp->b_error) { 265 - trace_xfs_btree_corrupt(bp, _RET_IP_); 266 - xfs_verifier_error(bp); 259 + if (!xfs_btree_sblock_verify_crc(bp)) 260 + xfs_verifier_error(bp, -EFSBADCRC, __this_address); 261 + else { 262 + fa = xfs_refcountbt_verify(bp); 263 + if (fa) 264 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 267 265 } 266 + 267 + if (bp->b_error) 268 + trace_xfs_btree_corrupt(bp, _RET_IP_); 268 269 } 269 270 270 271 STATIC void 271 272 xfs_refcountbt_write_verify( 272 273 struct xfs_buf *bp) 273 274 { 274 - if (!xfs_refcountbt_verify(bp)) { 275 + xfs_failaddr_t fa; 276 + 277 + fa = xfs_refcountbt_verify(bp); 278 + if (fa) { 275 279 trace_xfs_btree_corrupt(bp, _RET_IP_); 276 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 277 - xfs_verifier_error(bp); 280 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 278 281 return; 279 282 } 280 283 xfs_btree_sblock_calc_crc(bp); ··· 290 283 .name = "xfs_refcountbt", 291 284 .verify_read = xfs_refcountbt_read_verify, 292 285 .verify_write = xfs_refcountbt_write_verify, 286 + .verify_struct = xfs_refcountbt_verify, 293 287 }; 294 288 295 289 STATIC int
+67
fs/xfs/libxfs/xfs_rmap.c
··· 2387 2387 else 2388 2388 return 0; 2389 2389 } 2390 + 2391 + /* Is there a record covering a given extent? */ 2392 + int 2393 + xfs_rmap_has_record( 2394 + struct xfs_btree_cur *cur, 2395 + xfs_agblock_t bno, 2396 + xfs_extlen_t len, 2397 + bool *exists) 2398 + { 2399 + union xfs_btree_irec low; 2400 + union xfs_btree_irec high; 2401 + 2402 + memset(&low, 0, sizeof(low)); 2403 + low.r.rm_startblock = bno; 2404 + memset(&high, 0xFF, sizeof(high)); 2405 + high.r.rm_startblock = bno + len - 1; 2406 + 2407 + return xfs_btree_has_record(cur, &low, &high, exists); 2408 + } 2409 + 2410 + /* 2411 + * Is there a record for this owner completely covering a given physical 2412 + * extent? If so, *has_rmap will be set to true. If there is no record 2413 + * or the record only covers part of the range, we set *has_rmap to false. 2414 + * This function doesn't perform range lookups or offset checks, so it is 2415 + * not suitable for checking data fork blocks. 2416 + */ 2417 + int 2418 + xfs_rmap_record_exists( 2419 + struct xfs_btree_cur *cur, 2420 + xfs_agblock_t bno, 2421 + xfs_extlen_t len, 2422 + struct xfs_owner_info *oinfo, 2423 + bool *has_rmap) 2424 + { 2425 + uint64_t owner; 2426 + uint64_t offset; 2427 + unsigned int flags; 2428 + int has_record; 2429 + struct xfs_rmap_irec irec; 2430 + int error; 2431 + 2432 + xfs_owner_info_unpack(oinfo, &owner, &offset, &flags); 2433 + ASSERT(XFS_RMAP_NON_INODE_OWNER(owner) || 2434 + (flags & XFS_RMAP_BMBT_BLOCK)); 2435 + 2436 + error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, flags, 2437 + &has_record); 2438 + if (error) 2439 + return error; 2440 + if (!has_record) { 2441 + *has_rmap = false; 2442 + return 0; 2443 + } 2444 + 2445 + error = xfs_rmap_get_rec(cur, &irec, &has_record); 2446 + if (error) 2447 + return error; 2448 + if (!has_record) { 2449 + *has_rmap = false; 2450 + return 0; 2451 + } 2452 + 2453 + *has_rmap = (irec.rm_owner == owner && irec.rm_startblock <= bno && 2454 + irec.rm_startblock + irec.rm_blockcount >= bno + len); 2455 + return 0; 2456 + }
+5
fs/xfs/libxfs/xfs_rmap.h
··· 233 233 union xfs_btree_rec; 234 234 int xfs_rmap_btrec_to_irec(union xfs_btree_rec *rec, 235 235 struct xfs_rmap_irec *irec); 236 + int xfs_rmap_has_record(struct xfs_btree_cur *cur, xfs_agblock_t bno, 237 + xfs_extlen_t len, bool *exists); 238 + int xfs_rmap_record_exists(struct xfs_btree_cur *cur, xfs_agblock_t bno, 239 + xfs_extlen_t len, struct xfs_owner_info *oinfo, 240 + bool *has_rmap); 236 241 237 242 #endif /* __XFS_RMAP_H__ */
+25 -17
fs/xfs/libxfs/xfs_rmap_btree.c
··· 303 303 return 0; 304 304 } 305 305 306 - static bool 306 + static xfs_failaddr_t 307 307 xfs_rmapbt_verify( 308 308 struct xfs_buf *bp) 309 309 { 310 310 struct xfs_mount *mp = bp->b_target->bt_mount; 311 311 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 312 312 struct xfs_perag *pag = bp->b_pag; 313 + xfs_failaddr_t fa; 313 314 unsigned int level; 314 315 315 316 /* ··· 326 325 * in this case. 327 326 */ 328 327 if (block->bb_magic != cpu_to_be32(XFS_RMAP_CRC_MAGIC)) 329 - return false; 328 + return __this_address; 330 329 331 330 if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) 332 - return false; 333 - if (!xfs_btree_sblock_v5hdr_verify(bp)) 334 - return false; 331 + return __this_address; 332 + fa = xfs_btree_sblock_v5hdr_verify(bp); 333 + if (fa) 334 + return fa; 335 335 336 336 level = be16_to_cpu(block->bb_level); 337 337 if (pag && pag->pagf_init) { 338 338 if (level >= pag->pagf_levels[XFS_BTNUM_RMAPi]) 339 - return false; 339 + return __this_address; 340 340 } else if (level >= mp->m_rmap_maxlevels) 341 - return false; 341 + return __this_address; 342 342 343 343 return xfs_btree_sblock_verify(bp, mp->m_rmap_mxr[level != 0]); 344 344 } ··· 348 346 xfs_rmapbt_read_verify( 349 347 struct xfs_buf *bp) 350 348 { 351 - if (!xfs_btree_sblock_verify_crc(bp)) 352 - xfs_buf_ioerror(bp, -EFSBADCRC); 353 - else if (!xfs_rmapbt_verify(bp)) 354 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 349 + xfs_failaddr_t fa; 355 350 356 - if (bp->b_error) { 357 - trace_xfs_btree_corrupt(bp, _RET_IP_); 358 - xfs_verifier_error(bp); 351 + if (!xfs_btree_sblock_verify_crc(bp)) 352 + xfs_verifier_error(bp, -EFSBADCRC, __this_address); 353 + else { 354 + fa = xfs_rmapbt_verify(bp); 355 + if (fa) 356 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 359 357 } 358 + 359 + if (bp->b_error) 360 + trace_xfs_btree_corrupt(bp, _RET_IP_); 360 361 } 361 362 362 363 static void 363 364 xfs_rmapbt_write_verify( 364 365 struct xfs_buf *bp) 365 366 { 366 - if (!xfs_rmapbt_verify(bp)) { 367 + xfs_failaddr_t fa; 368 + 369 + fa = xfs_rmapbt_verify(bp); 370 + if (fa) { 367 371 trace_xfs_btree_corrupt(bp, _RET_IP_); 368 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 369 - xfs_verifier_error(bp); 372 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 370 373 return; 371 374 } 372 375 xfs_btree_sblock_calc_crc(bp); ··· 382 375 .name = "xfs_rmapbt", 383 376 .verify_read = xfs_rmapbt_read_verify, 384 377 .verify_write = xfs_rmapbt_write_verify, 378 + .verify_struct = xfs_rmapbt_verify, 385 379 }; 386 380 387 381 STATIC int
+21
fs/xfs/libxfs/xfs_rtbitmap.c
··· 1097 1097 { 1098 1098 return rtbno < mp->m_sb.sb_rblocks; 1099 1099 } 1100 + 1101 + /* Is the given extent all free? */ 1102 + int 1103 + xfs_rtalloc_extent_is_free( 1104 + struct xfs_mount *mp, 1105 + struct xfs_trans *tp, 1106 + xfs_rtblock_t start, 1107 + xfs_extlen_t len, 1108 + bool *is_free) 1109 + { 1110 + xfs_rtblock_t end; 1111 + int matches; 1112 + int error; 1113 + 1114 + error = xfs_rtcheck_range(mp, tp, start, len, 1, &end, &matches); 1115 + if (error) 1116 + return error; 1117 + 1118 + *is_free = matches; 1119 + return 0; 1120 + }
+106 -7
fs/xfs/libxfs/xfs_sb.c
··· 40 40 #include "xfs_rmap_btree.h" 41 41 #include "xfs_bmap.h" 42 42 #include "xfs_refcount_btree.h" 43 + #include "xfs_da_format.h" 44 + #include "xfs_da_btree.h" 43 45 44 46 /* 45 47 * Physical superblock buffer manipulations. Shared with libxfs in userspace. ··· 118 116 bool check_inprogress, 119 117 bool check_version) 120 118 { 119 + u32 agcount = 0; 120 + u32 rem; 121 + 121 122 if (sbp->sb_magicnum != XFS_SB_MAGIC) { 122 123 xfs_warn(mp, "bad magic number"); 123 124 return -EWRONGFS; ··· 231 226 return -EINVAL; 232 227 } 233 228 229 + /* Compute agcount for this number of dblocks and agblocks */ 230 + if (sbp->sb_agblocks) { 231 + agcount = div_u64_rem(sbp->sb_dblocks, sbp->sb_agblocks, &rem); 232 + if (rem) 233 + agcount++; 234 + } 235 + 234 236 /* 235 237 * More sanity checking. Most of these were stolen directly from 236 238 * xfs_repair. ··· 262 250 sbp->sb_inodesize != (1 << sbp->sb_inodelog) || 263 251 sbp->sb_logsunit > XLOG_MAX_RECORD_BSIZE || 264 252 sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) || 253 + XFS_FSB_TO_B(mp, sbp->sb_agblocks) < XFS_MIN_AG_BYTES || 254 + XFS_FSB_TO_B(mp, sbp->sb_agblocks) > XFS_MAX_AG_BYTES || 255 + sbp->sb_agblklog != xfs_highbit32(sbp->sb_agblocks - 1) + 1 || 256 + agcount == 0 || agcount != sbp->sb_agcount || 265 257 (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || 266 258 (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || 267 259 (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || ··· 656 640 error = xfs_sb_verify(bp, true); 657 641 658 642 out_error: 659 - if (error) { 643 + if (error == -EFSCORRUPTED || error == -EFSBADCRC) 644 + xfs_verifier_error(bp, error, __this_address); 645 + else if (error) 660 646 xfs_buf_ioerror(bp, error); 661 - if (error == -EFSCORRUPTED || error == -EFSBADCRC) 662 - xfs_verifier_error(bp); 663 - } 664 647 } 665 648 666 649 /* ··· 688 673 struct xfs_buf *bp) 689 674 { 690 675 struct xfs_mount *mp = bp->b_target->bt_mount; 691 - struct xfs_buf_log_item *bip = bp->b_fspriv; 676 + struct xfs_buf_log_item *bip = bp->b_log_item; 692 677 int error; 693 678 694 679 error = xfs_sb_verify(bp, false); 695 680 if (error) { 696 - xfs_buf_ioerror(bp, error); 697 - xfs_verifier_error(bp); 681 + xfs_verifier_error(bp, error, __this_address); 698 682 return; 699 683 } 700 684 ··· 889 875 if (wait) 890 876 xfs_trans_set_sync(tp); 891 877 return xfs_trans_commit(tp); 878 + } 879 + 880 + int 881 + xfs_fs_geometry( 882 + struct xfs_sb *sbp, 883 + struct xfs_fsop_geom *geo, 884 + int struct_version) 885 + { 886 + memset(geo, 0, sizeof(struct xfs_fsop_geom)); 887 + 888 + geo->blocksize = sbp->sb_blocksize; 889 + geo->rtextsize = sbp->sb_rextsize; 890 + geo->agblocks = sbp->sb_agblocks; 891 + geo->agcount = sbp->sb_agcount; 892 + geo->logblocks = sbp->sb_logblocks; 893 + geo->sectsize = sbp->sb_sectsize; 894 + geo->inodesize = sbp->sb_inodesize; 895 + geo->imaxpct = sbp->sb_imax_pct; 896 + geo->datablocks = sbp->sb_dblocks; 897 + geo->rtblocks = sbp->sb_rblocks; 898 + geo->rtextents = sbp->sb_rextents; 899 + geo->logstart = sbp->sb_logstart; 900 + BUILD_BUG_ON(sizeof(geo->uuid) != sizeof(sbp->sb_uuid)); 901 + memcpy(geo->uuid, &sbp->sb_uuid, sizeof(sbp->sb_uuid)); 902 + 903 + if (struct_version < 2) 904 + return 0; 905 + 906 + geo->sunit = sbp->sb_unit; 907 + geo->swidth = sbp->sb_width; 908 + 909 + if (struct_version < 3) 910 + return 0; 911 + 912 + geo->version = XFS_FSOP_GEOM_VERSION; 913 + geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK | 914 + XFS_FSOP_GEOM_FLAGS_DIRV2; 915 + if (xfs_sb_version_hasattr(sbp)) 916 + geo->flags |= XFS_FSOP_GEOM_FLAGS_ATTR; 917 + if (xfs_sb_version_hasquota(sbp)) 918 + geo->flags |= XFS_FSOP_GEOM_FLAGS_QUOTA; 919 + if (xfs_sb_version_hasalign(sbp)) 920 + geo->flags |= XFS_FSOP_GEOM_FLAGS_IALIGN; 921 + if (xfs_sb_version_hasdalign(sbp)) 922 + geo->flags |= XFS_FSOP_GEOM_FLAGS_DALIGN; 923 + if (xfs_sb_version_hasextflgbit(sbp)) 924 + geo->flags |= XFS_FSOP_GEOM_FLAGS_EXTFLG; 925 + if (xfs_sb_version_hassector(sbp)) 926 + geo->flags |= XFS_FSOP_GEOM_FLAGS_SECTOR; 927 + if (xfs_sb_version_hasasciici(sbp)) 928 + geo->flags |= XFS_FSOP_GEOM_FLAGS_DIRV2CI; 929 + if (xfs_sb_version_haslazysbcount(sbp)) 930 + geo->flags |= XFS_FSOP_GEOM_FLAGS_LAZYSB; 931 + if (xfs_sb_version_hasattr2(sbp)) 932 + geo->flags |= XFS_FSOP_GEOM_FLAGS_ATTR2; 933 + if (xfs_sb_version_hasprojid32bit(sbp)) 934 + geo->flags |= XFS_FSOP_GEOM_FLAGS_PROJID32; 935 + if (xfs_sb_version_hascrc(sbp)) 936 + geo->flags |= XFS_FSOP_GEOM_FLAGS_V5SB; 937 + if (xfs_sb_version_hasftype(sbp)) 938 + geo->flags |= XFS_FSOP_GEOM_FLAGS_FTYPE; 939 + if (xfs_sb_version_hasfinobt(sbp)) 940 + geo->flags |= XFS_FSOP_GEOM_FLAGS_FINOBT; 941 + if (xfs_sb_version_hassparseinodes(sbp)) 942 + geo->flags |= XFS_FSOP_GEOM_FLAGS_SPINODES; 943 + if (xfs_sb_version_hasrmapbt(sbp)) 944 + geo->flags |= XFS_FSOP_GEOM_FLAGS_RMAPBT; 945 + if (xfs_sb_version_hasreflink(sbp)) 946 + geo->flags |= XFS_FSOP_GEOM_FLAGS_REFLINK; 947 + if (xfs_sb_version_hassector(sbp)) 948 + geo->logsectsize = sbp->sb_logsectsize; 949 + else 950 + geo->logsectsize = BBSIZE; 951 + geo->rtsectsize = sbp->sb_blocksize; 952 + geo->dirblocksize = xfs_dir2_dirblock_bytes(sbp); 953 + 954 + if (struct_version < 4) 955 + return 0; 956 + 957 + if (xfs_sb_version_haslogv2(sbp)) 958 + geo->flags |= XFS_FSOP_GEOM_FLAGS_LOGV2; 959 + 960 + geo->logsunit = sbp->sb_logsunit; 961 + 962 + return 0; 892 963 }
+4
fs/xfs/libxfs/xfs_sb.h
··· 34 34 extern void xfs_sb_to_disk(struct xfs_dsb *to, struct xfs_sb *from); 35 35 extern void xfs_sb_quota_from_disk(struct xfs_sb *sbp); 36 36 37 + #define XFS_FS_GEOM_MAX_STRUCT_VER (4) 38 + extern int xfs_fs_geometry(struct xfs_sb *sbp, struct xfs_fsop_geom *geo, 39 + int struct_version); 40 + 37 41 #endif /* __XFS_SB_H__ */
+4
fs/xfs/libxfs/xfs_shared.h
··· 76 76 int xfs_log_calc_unit_res(struct xfs_mount *mp, int unit_bytes); 77 77 int xfs_log_calc_minimum_size(struct xfs_mount *); 78 78 79 + struct xfs_trans_res; 80 + void xfs_log_get_max_trans_res(struct xfs_mount *mp, 81 + struct xfs_trans_res *max_resp); 79 82 80 83 /* 81 84 * Values for t_flags. ··· 146 143 uint32_t size, struct xfs_buf *bp); 147 144 void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp, 148 145 struct xfs_inode *ip, struct xfs_ifork *ifp); 146 + xfs_failaddr_t xfs_symlink_shortform_verify(struct xfs_inode *ip); 149 147 150 148 #endif /* __XFS_SHARED_H__ */
+94 -107
fs/xfs/libxfs/xfs_trans_resv.c
··· 34 34 #include "xfs_trans_space.h" 35 35 #include "xfs_trace.h" 36 36 37 + #define _ALLOC true 38 + #define _FREE false 39 + 37 40 /* 38 41 * A buffer has a format structure overhead in the log in addition 39 42 * to the data, so we need to take this into account when reserving ··· 135 132 } 136 133 137 134 /* 138 - * The free inode btree is a conditional feature and the log reservation 139 - * requirements differ slightly from that of the traditional inode allocation 140 - * btree. The finobt tracks records for inode chunks with at least one free 141 - * inode. A record can be removed from the tree for an inode allocation 142 - * or free and thus the finobt reservation is unconditional across: 135 + * Inode btree record insertion/removal modifies the inode btree and free space 136 + * btrees (since the inobt does not use the agfl). This requires the following 137 + * reservation: 143 138 * 144 - * - inode allocation 145 - * - inode free 146 - * - inode chunk allocation 147 - * 148 - * The 'modify' param indicates to include the record modification scenario. The 149 - * 'alloc' param indicates to include the reservation for free space btree 150 - * modifications on behalf of finobt modifications. This is required only for 151 - * transactions that do not already account for free space btree modifications. 152 - * 153 - * the free inode btree: max depth * block size 139 + * the inode btree: max depth * blocksize 154 140 * the allocation btrees: 2 trees * (max depth - 1) * block size 155 - * the free inode btree entry: block size 141 + * 142 + * The caller must account for SB and AG header modifications, etc. 143 + */ 144 + STATIC uint 145 + xfs_calc_inobt_res( 146 + struct xfs_mount *mp) 147 + { 148 + return xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + 149 + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), 150 + XFS_FSB_TO_B(mp, 1)); 151 + } 152 + 153 + /* 154 + * The free inode btree is a conditional feature. The behavior differs slightly 155 + * from that of the traditional inode btree in that the finobt tracks records 156 + * for inode chunks with at least one free inode. A record can be removed from 157 + * the tree during individual inode allocation. Therefore the finobt 158 + * reservation is unconditional for both the inode chunk allocation and 159 + * individual inode allocation (modify) cases. 160 + * 161 + * Behavior aside, the reservation for finobt modification is equivalent to the 162 + * traditional inobt: cover a full finobt shape change plus block allocation. 156 163 */ 157 164 STATIC uint 158 165 xfs_calc_finobt_res( 159 - struct xfs_mount *mp, 160 - int alloc, 161 - int modify) 166 + struct xfs_mount *mp) 162 167 { 163 - uint res; 164 - 165 168 if (!xfs_sb_version_hasfinobt(&mp->m_sb)) 166 169 return 0; 167 170 168 - res = xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)); 169 - if (alloc) 170 - res += xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), 171 - XFS_FSB_TO_B(mp, 1)); 172 - if (modify) 173 - res += (uint)XFS_FSB_TO_B(mp, 1); 171 + return xfs_calc_inobt_res(mp); 172 + } 174 173 174 + /* 175 + * Calculate the reservation required to allocate or free an inode chunk. This 176 + * includes: 177 + * 178 + * the allocation btrees: 2 trees * (max depth - 1) * block size 179 + * the inode chunk: m_ialloc_blks * N 180 + * 181 + * The size N of the inode chunk reservation depends on whether it is for 182 + * allocation or free and which type of create transaction is in use. An inode 183 + * chunk free always invalidates the buffers and only requires reservation for 184 + * headers (N == 0). An inode chunk allocation requires a chunk sized 185 + * reservation on v4 and older superblocks to initialize the chunk. No chunk 186 + * reservation is required for allocation on v5 supers, which use ordered 187 + * buffers to initialize. 188 + */ 189 + STATIC uint 190 + xfs_calc_inode_chunk_res( 191 + struct xfs_mount *mp, 192 + bool alloc) 193 + { 194 + uint res, size = 0; 195 + 196 + res = xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), 197 + XFS_FSB_TO_B(mp, 1)); 198 + if (alloc) { 199 + /* icreate tx uses ordered buffers */ 200 + if (xfs_sb_version_hascrc(&mp->m_sb)) 201 + return res; 202 + size = XFS_FSB_TO_B(mp, 1); 203 + } 204 + 205 + res += xfs_calc_buf_res(mp->m_ialloc_blks, size); 175 206 return res; 176 207 } 177 208 ··· 269 232 * the super block to reflect the freed blocks: sector size 270 233 * worst case split in allocation btrees per extent assuming 4 extents: 271 234 * 4 exts * 2 trees * (2 * max depth - 1) * block size 272 - * the inode btree: max depth * blocksize 273 - * the allocation btrees: 2 trees * (max depth - 1) * block size 274 235 */ 275 236 STATIC uint 276 237 xfs_calc_itruncate_reservation( ··· 280 245 XFS_FSB_TO_B(mp, 1))), 281 246 (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + 282 247 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4), 283 - XFS_FSB_TO_B(mp, 1)) + 284 - xfs_calc_buf_res(5, 0) + 285 - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), 286 - XFS_FSB_TO_B(mp, 1)) + 287 - xfs_calc_buf_res(2 + mp->m_ialloc_blks + 288 - mp->m_in_maxlevels, 0))); 248 + XFS_FSB_TO_B(mp, 1)))); 289 249 } 290 250 291 251 /* ··· 312 282 * For removing an inode from unlinked list at first, we can modify: 313 283 * the agi hash list and counters: sector size 314 284 * the on disk inode before ours in the agi hash list: inode cluster size 285 + * the on disk inode in the agi hash list: inode cluster size 315 286 */ 316 287 STATIC uint 317 288 xfs_calc_iunlink_remove_reservation( 318 289 struct xfs_mount *mp) 319 290 { 320 291 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + 321 - max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size); 292 + 2 * max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size); 322 293 } 323 294 324 295 /* ··· 351 320 /* 352 321 * For adding an inode to unlinked list we can modify: 353 322 * the agi hash list: sector size 354 - * the unlinked inode: inode size 323 + * the on disk inode: inode cluster size 355 324 */ 356 325 STATIC uint 357 326 xfs_calc_iunlink_add_reservation(xfs_mount_t *mp) 358 327 { 359 328 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + 360 - xfs_calc_inode_res(mp, 1); 329 + max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size); 361 330 } 362 331 363 332 /* ··· 410 379 xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + 411 380 (uint)XFS_FSB_TO_B(mp, 1) + 412 381 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)) + 413 - xfs_calc_finobt_res(mp, 1, 1); 414 - } 415 - 416 - /* 417 - * For create we can allocate some inodes giving: 418 - * the agi and agf of the ag getting the new inodes: 2 * sectorsize 419 - * the superblock for the nlink flag: sector size 420 - * the inode blocks allocated: mp->m_ialloc_blks * blocksize 421 - * the inode btree: max depth * blocksize 422 - * the allocation btrees: 2 trees * (max depth - 1) * block size 423 - */ 424 - STATIC uint 425 - xfs_calc_create_resv_alloc( 426 - struct xfs_mount *mp) 427 - { 428 - return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + 429 - mp->m_sb.sb_sectsize + 430 - xfs_calc_buf_res(mp->m_ialloc_blks, XFS_FSB_TO_B(mp, 1)) + 431 - xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + 432 - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), 433 - XFS_FSB_TO_B(mp, 1)); 434 - } 435 - 436 - STATIC uint 437 - __xfs_calc_create_reservation( 438 - struct xfs_mount *mp) 439 - { 440 - return XFS_DQUOT_LOGRES(mp) + 441 - MAX(xfs_calc_create_resv_alloc(mp), 442 - xfs_calc_create_resv_modify(mp)); 382 + xfs_calc_finobt_res(mp); 443 383 } 444 384 445 385 /* 446 386 * For icreate we can allocate some inodes giving: 447 387 * the agi and agf of the ag getting the new inodes: 2 * sectorsize 448 388 * the superblock for the nlink flag: sector size 449 - * the inode btree: max depth * blocksize 450 - * the allocation btrees: 2 trees * (max depth - 1) * block size 451 - * the finobt (record insertion) 389 + * the inode chunk (allocation, optional init) 390 + * the inobt (record insertion) 391 + * the finobt (optional, record insertion) 452 392 */ 453 393 STATIC uint 454 394 xfs_calc_icreate_resv_alloc( ··· 427 425 { 428 426 return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + 429 427 mp->m_sb.sb_sectsize + 430 - xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + 431 - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), 432 - XFS_FSB_TO_B(mp, 1)) + 433 - xfs_calc_finobt_res(mp, 0, 0); 428 + xfs_calc_inode_chunk_res(mp, _ALLOC) + 429 + xfs_calc_inobt_res(mp) + 430 + xfs_calc_finobt_res(mp); 434 431 } 435 432 436 433 STATIC uint ··· 441 440 } 442 441 443 442 STATIC uint 444 - xfs_calc_create_reservation( 445 - struct xfs_mount *mp) 446 - { 447 - if (xfs_sb_version_hascrc(&mp->m_sb)) 448 - return xfs_calc_icreate_reservation(mp); 449 - return __xfs_calc_create_reservation(mp); 450 - 451 - } 452 - 453 - STATIC uint 454 443 xfs_calc_create_tmpfile_reservation( 455 444 struct xfs_mount *mp) 456 445 { 457 446 uint res = XFS_DQUOT_LOGRES(mp); 458 447 459 - if (xfs_sb_version_hascrc(&mp->m_sb)) 460 - res += xfs_calc_icreate_resv_alloc(mp); 461 - else 462 - res += xfs_calc_create_resv_alloc(mp); 463 - 448 + res += xfs_calc_icreate_resv_alloc(mp); 464 449 return res + xfs_calc_iunlink_add_reservation(mp); 465 450 } 466 451 ··· 457 470 xfs_calc_mkdir_reservation( 458 471 struct xfs_mount *mp) 459 472 { 460 - return xfs_calc_create_reservation(mp); 473 + return xfs_calc_icreate_reservation(mp); 461 474 } 462 475 463 476 ··· 470 483 xfs_calc_symlink_reservation( 471 484 struct xfs_mount *mp) 472 485 { 473 - return xfs_calc_create_reservation(mp) + 486 + return xfs_calc_icreate_reservation(mp) + 474 487 xfs_calc_buf_res(1, XFS_SYMLINK_MAXLEN); 475 488 } 476 489 477 490 /* 478 491 * In freeing an inode we can modify: 479 492 * the inode being freed: inode size 480 - * the super block free inode counter: sector size 481 - * the agi hash list and counters: sector size 482 - * the inode btree entry: block size 483 - * the on disk inode before ours in the agi hash list: inode cluster size 484 - * the inode btree: max depth * blocksize 485 - * the allocation btrees: 2 trees * (max depth - 1) * block size 493 + * the super block free inode counter, AGF and AGFL: sector size 494 + * the on disk inode (agi unlinked list removal) 495 + * the inode chunk (invalidated, headers only) 496 + * the inode btree 486 497 * the finobt (record insertion, removal or modification) 498 + * 499 + * Note that the inode chunk res. includes an allocfree res. for freeing of the 500 + * inode chunk. This is technically extraneous because the inode chunk free is 501 + * deferred (it occurs after a transaction roll). Include the extra reservation 502 + * anyways since we've had reports of ifree transaction overruns due to too many 503 + * agfl fixups during inode chunk frees. 487 504 */ 488 505 STATIC uint 489 506 xfs_calc_ifree_reservation( ··· 495 504 { 496 505 return XFS_DQUOT_LOGRES(mp) + 497 506 xfs_calc_inode_res(mp, 1) + 498 - xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + 499 - xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) + 507 + xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + 500 508 xfs_calc_iunlink_remove_reservation(mp) + 501 - xfs_calc_buf_res(1, 0) + 502 - xfs_calc_buf_res(2 + mp->m_ialloc_blks + 503 - mp->m_in_maxlevels, 0) + 504 - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), 505 - XFS_FSB_TO_B(mp, 1)) + 506 - xfs_calc_finobt_res(mp, 0, 1); 509 + xfs_calc_inode_chunk_res(mp, _FREE) + 510 + xfs_calc_inobt_res(mp) + 511 + xfs_calc_finobt_res(mp); 507 512 } 508 513 509 514 /* ··· 829 842 resp->tr_symlink.tr_logcount = XFS_SYMLINK_LOG_COUNT; 830 843 resp->tr_symlink.tr_logflags |= XFS_TRANS_PERM_LOG_RES; 831 844 832 - resp->tr_create.tr_logres = xfs_calc_create_reservation(mp); 845 + resp->tr_create.tr_logres = xfs_calc_icreate_reservation(mp); 833 846 resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT; 834 847 resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES; 835 848
+324 -16
fs/xfs/scrub/agheader.c
··· 32 32 #include "xfs_inode.h" 33 33 #include "xfs_alloc.h" 34 34 #include "xfs_ialloc.h" 35 + #include "xfs_rmap.h" 35 36 #include "scrub/xfs_scrub.h" 36 37 #include "scrub/scrub.h" 37 38 #include "scrub/common.h" 38 39 #include "scrub/trace.h" 39 40 40 41 /* 41 - * Set up scrub to check all the static metadata in each AG. 42 - * This means the SB, AGF, AGI, and AGFL headers. 42 + * Walk all the blocks in the AGFL. The fn function can return any negative 43 + * error code or XFS_BTREE_QUERY_RANGE_ABORT. 43 44 */ 44 - int 45 - xfs_scrub_setup_ag_header( 46 - struct xfs_scrub_context *sc, 47 - struct xfs_inode *ip) 48 - { 49 - struct xfs_mount *mp = sc->mp; 50 - 51 - if (sc->sm->sm_agno >= mp->m_sb.sb_agcount || 52 - sc->sm->sm_ino || sc->sm->sm_gen) 53 - return -EINVAL; 54 - return xfs_scrub_setup_fs(sc, ip); 55 - } 56 - 57 - /* Walk all the blocks in the AGFL. */ 58 45 int 59 46 xfs_scrub_walk_agfl( 60 47 struct xfs_scrub_context *sc, ··· 102 115 103 116 /* Superblock */ 104 117 118 + /* Cross-reference with the other btrees. */ 119 + STATIC void 120 + xfs_scrub_superblock_xref( 121 + struct xfs_scrub_context *sc, 122 + struct xfs_buf *bp) 123 + { 124 + struct xfs_owner_info oinfo; 125 + struct xfs_mount *mp = sc->mp; 126 + xfs_agnumber_t agno = sc->sm->sm_agno; 127 + xfs_agblock_t agbno; 128 + int error; 129 + 130 + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 131 + return; 132 + 133 + agbno = XFS_SB_BLOCK(mp); 134 + 135 + error = xfs_scrub_ag_init(sc, agno, &sc->sa); 136 + if (!xfs_scrub_xref_process_error(sc, agno, agbno, &error)) 137 + return; 138 + 139 + xfs_scrub_xref_is_used_space(sc, agbno, 1); 140 + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); 141 + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS); 142 + xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); 143 + xfs_scrub_xref_is_not_shared(sc, agbno, 1); 144 + 145 + /* scrub teardown will take care of sc->sa for us */ 146 + } 147 + 105 148 /* 106 149 * Scrub the filesystem superblock. 107 150 * ··· 160 143 error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp, 161 144 XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)), 162 145 XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_sb_buf_ops); 146 + /* 147 + * The superblock verifier can return several different error codes 148 + * if it thinks the superblock doesn't look right. For a mount these 149 + * would all get bounced back to userspace, but if we're here then the 150 + * fs mounted successfully, which means that this secondary superblock 151 + * is simply incorrect. Treat all these codes the same way we treat 152 + * any corruption. 153 + */ 154 + switch (error) { 155 + case -EINVAL: /* also -EWRONGFS */ 156 + case -ENOSYS: 157 + case -EFBIG: 158 + error = -EFSCORRUPTED; 159 + default: 160 + break; 161 + } 163 162 if (!xfs_scrub_process_error(sc, agno, XFS_SB_BLOCK(mp), &error)) 164 163 return error; 165 164 ··· 420 387 BBTOB(bp->b_length) - sizeof(struct xfs_dsb))) 421 388 xfs_scrub_block_set_corrupt(sc, bp); 422 389 390 + xfs_scrub_superblock_xref(sc, bp); 391 + 423 392 return error; 424 393 } 425 394 426 395 /* AGF */ 396 + 397 + /* Tally freespace record lengths. */ 398 + STATIC int 399 + xfs_scrub_agf_record_bno_lengths( 400 + struct xfs_btree_cur *cur, 401 + struct xfs_alloc_rec_incore *rec, 402 + void *priv) 403 + { 404 + xfs_extlen_t *blocks = priv; 405 + 406 + (*blocks) += rec->ar_blockcount; 407 + return 0; 408 + } 409 + 410 + /* Check agf_freeblks */ 411 + static inline void 412 + xfs_scrub_agf_xref_freeblks( 413 + struct xfs_scrub_context *sc) 414 + { 415 + struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); 416 + xfs_extlen_t blocks = 0; 417 + int error; 418 + 419 + if (!sc->sa.bno_cur) 420 + return; 421 + 422 + error = xfs_alloc_query_all(sc->sa.bno_cur, 423 + xfs_scrub_agf_record_bno_lengths, &blocks); 424 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.bno_cur)) 425 + return; 426 + if (blocks != be32_to_cpu(agf->agf_freeblks)) 427 + xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); 428 + } 429 + 430 + /* Cross reference the AGF with the cntbt (freespace by length btree) */ 431 + static inline void 432 + xfs_scrub_agf_xref_cntbt( 433 + struct xfs_scrub_context *sc) 434 + { 435 + struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); 436 + xfs_agblock_t agbno; 437 + xfs_extlen_t blocks; 438 + int have; 439 + int error; 440 + 441 + if (!sc->sa.cnt_cur) 442 + return; 443 + 444 + /* Any freespace at all? */ 445 + error = xfs_alloc_lookup_le(sc->sa.cnt_cur, 0, -1U, &have); 446 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.cnt_cur)) 447 + return; 448 + if (!have) { 449 + if (agf->agf_freeblks != be32_to_cpu(0)) 450 + xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); 451 + return; 452 + } 453 + 454 + /* Check agf_longest */ 455 + error = xfs_alloc_get_rec(sc->sa.cnt_cur, &agbno, &blocks, &have); 456 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.cnt_cur)) 457 + return; 458 + if (!have || blocks != be32_to_cpu(agf->agf_longest)) 459 + xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); 460 + } 461 + 462 + /* Check the btree block counts in the AGF against the btrees. */ 463 + STATIC void 464 + xfs_scrub_agf_xref_btreeblks( 465 + struct xfs_scrub_context *sc) 466 + { 467 + struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); 468 + struct xfs_mount *mp = sc->mp; 469 + xfs_agblock_t blocks; 470 + xfs_agblock_t btreeblks; 471 + int error; 472 + 473 + /* Check agf_rmap_blocks; set up for agf_btreeblks check */ 474 + if (sc->sa.rmap_cur) { 475 + error = xfs_btree_count_blocks(sc->sa.rmap_cur, &blocks); 476 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) 477 + return; 478 + btreeblks = blocks - 1; 479 + if (blocks != be32_to_cpu(agf->agf_rmap_blocks)) 480 + xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); 481 + } else { 482 + btreeblks = 0; 483 + } 484 + 485 + /* 486 + * No rmap cursor; we can't xref if we have the rmapbt feature. 487 + * We also can't do it if we're missing the free space btree cursors. 488 + */ 489 + if ((xfs_sb_version_hasrmapbt(&mp->m_sb) && !sc->sa.rmap_cur) || 490 + !sc->sa.bno_cur || !sc->sa.cnt_cur) 491 + return; 492 + 493 + /* Check agf_btreeblks */ 494 + error = xfs_btree_count_blocks(sc->sa.bno_cur, &blocks); 495 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.bno_cur)) 496 + return; 497 + btreeblks += blocks - 1; 498 + 499 + error = xfs_btree_count_blocks(sc->sa.cnt_cur, &blocks); 500 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.cnt_cur)) 501 + return; 502 + btreeblks += blocks - 1; 503 + 504 + if (btreeblks != be32_to_cpu(agf->agf_btreeblks)) 505 + xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); 506 + } 507 + 508 + /* Check agf_refcount_blocks against tree size */ 509 + static inline void 510 + xfs_scrub_agf_xref_refcblks( 511 + struct xfs_scrub_context *sc) 512 + { 513 + struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); 514 + xfs_agblock_t blocks; 515 + int error; 516 + 517 + if (!sc->sa.refc_cur) 518 + return; 519 + 520 + error = xfs_btree_count_blocks(sc->sa.refc_cur, &blocks); 521 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur)) 522 + return; 523 + if (blocks != be32_to_cpu(agf->agf_refcount_blocks)) 524 + xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); 525 + } 526 + 527 + /* Cross-reference with the other btrees. */ 528 + STATIC void 529 + xfs_scrub_agf_xref( 530 + struct xfs_scrub_context *sc) 531 + { 532 + struct xfs_owner_info oinfo; 533 + struct xfs_mount *mp = sc->mp; 534 + xfs_agblock_t agbno; 535 + int error; 536 + 537 + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 538 + return; 539 + 540 + agbno = XFS_AGF_BLOCK(mp); 541 + 542 + error = xfs_scrub_ag_btcur_init(sc, &sc->sa); 543 + if (error) 544 + return; 545 + 546 + xfs_scrub_xref_is_used_space(sc, agbno, 1); 547 + xfs_scrub_agf_xref_freeblks(sc); 548 + xfs_scrub_agf_xref_cntbt(sc); 549 + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); 550 + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS); 551 + xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); 552 + xfs_scrub_agf_xref_btreeblks(sc); 553 + xfs_scrub_xref_is_not_shared(sc, agbno, 1); 554 + xfs_scrub_agf_xref_refcblks(sc); 555 + 556 + /* scrub teardown will take care of sc->sa for us */ 557 + } 427 558 428 559 /* Scrub the AGF. */ 429 560 int ··· 611 414 &sc->sa.agf_bp, &sc->sa.agfl_bp); 612 415 if (!xfs_scrub_process_error(sc, agno, XFS_AGF_BLOCK(sc->mp), &error)) 613 416 goto out; 417 + xfs_scrub_buffer_recheck(sc, sc->sa.agf_bp); 614 418 615 419 agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); 616 420 ··· 668 470 if (agfl_count != 0 && fl_count != agfl_count) 669 471 xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); 670 472 473 + xfs_scrub_agf_xref(sc); 671 474 out: 672 475 return error; 673 476 } ··· 676 477 /* AGFL */ 677 478 678 479 struct xfs_scrub_agfl_info { 480 + struct xfs_owner_info oinfo; 679 481 unsigned int sz_entries; 680 482 unsigned int nr_entries; 681 483 xfs_agblock_t *entries; 682 484 }; 485 + 486 + /* Cross-reference with the other btrees. */ 487 + STATIC void 488 + xfs_scrub_agfl_block_xref( 489 + struct xfs_scrub_context *sc, 490 + xfs_agblock_t agbno, 491 + struct xfs_owner_info *oinfo) 492 + { 493 + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 494 + return; 495 + 496 + xfs_scrub_xref_is_used_space(sc, agbno, 1); 497 + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); 498 + xfs_scrub_xref_is_owned_by(sc, agbno, 1, oinfo); 499 + xfs_scrub_xref_is_not_shared(sc, agbno, 1); 500 + } 683 501 684 502 /* Scrub an AGFL block. */ 685 503 STATIC int ··· 715 499 else 716 500 xfs_scrub_block_set_corrupt(sc, sc->sa.agfl_bp); 717 501 502 + xfs_scrub_agfl_block_xref(sc, agbno, priv); 503 + 718 504 return 0; 719 505 } 720 506 ··· 729 511 const xfs_agblock_t *b = pb; 730 512 731 513 return (int)*a - (int)*b; 514 + } 515 + 516 + /* Cross-reference with the other btrees. */ 517 + STATIC void 518 + xfs_scrub_agfl_xref( 519 + struct xfs_scrub_context *sc) 520 + { 521 + struct xfs_owner_info oinfo; 522 + struct xfs_mount *mp = sc->mp; 523 + xfs_agblock_t agbno; 524 + int error; 525 + 526 + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 527 + return; 528 + 529 + agbno = XFS_AGFL_BLOCK(mp); 530 + 531 + error = xfs_scrub_ag_btcur_init(sc, &sc->sa); 532 + if (error) 533 + return; 534 + 535 + xfs_scrub_xref_is_used_space(sc, agbno, 1); 536 + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); 537 + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS); 538 + xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); 539 + xfs_scrub_xref_is_not_shared(sc, agbno, 1); 540 + 541 + /* 542 + * Scrub teardown will take care of sc->sa for us. Leave sc->sa 543 + * active so that the agfl block xref can use it too. 544 + */ 732 545 } 733 546 734 547 /* Scrub the AGFL. */ ··· 781 532 goto out; 782 533 if (!sc->sa.agf_bp) 783 534 return -EFSCORRUPTED; 535 + xfs_scrub_buffer_recheck(sc, sc->sa.agfl_bp); 536 + 537 + xfs_scrub_agfl_xref(sc); 538 + 539 + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 540 + goto out; 784 541 785 542 /* Allocate buffer to ensure uniqueness of AGFL entries. */ 786 543 agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); ··· 803 548 } 804 549 805 550 /* Check the blocks in the AGFL. */ 551 + xfs_rmap_ag_owner(&sai.oinfo, XFS_RMAP_OWN_AG); 806 552 error = xfs_scrub_walk_agfl(sc, xfs_scrub_agfl_block, &sai); 807 553 if (error) 808 554 goto out_free; ··· 831 575 832 576 /* AGI */ 833 577 578 + /* Check agi_count/agi_freecount */ 579 + static inline void 580 + xfs_scrub_agi_xref_icounts( 581 + struct xfs_scrub_context *sc) 582 + { 583 + struct xfs_agi *agi = XFS_BUF_TO_AGI(sc->sa.agi_bp); 584 + xfs_agino_t icount; 585 + xfs_agino_t freecount; 586 + int error; 587 + 588 + if (!sc->sa.ino_cur) 589 + return; 590 + 591 + error = xfs_ialloc_count_inodes(sc->sa.ino_cur, &icount, &freecount); 592 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.ino_cur)) 593 + return; 594 + if (be32_to_cpu(agi->agi_count) != icount || 595 + be32_to_cpu(agi->agi_freecount) != freecount) 596 + xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agi_bp); 597 + } 598 + 599 + /* Cross-reference with the other btrees. */ 600 + STATIC void 601 + xfs_scrub_agi_xref( 602 + struct xfs_scrub_context *sc) 603 + { 604 + struct xfs_owner_info oinfo; 605 + struct xfs_mount *mp = sc->mp; 606 + xfs_agblock_t agbno; 607 + int error; 608 + 609 + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 610 + return; 611 + 612 + agbno = XFS_AGI_BLOCK(mp); 613 + 614 + error = xfs_scrub_ag_btcur_init(sc, &sc->sa); 615 + if (error) 616 + return; 617 + 618 + xfs_scrub_xref_is_used_space(sc, agbno, 1); 619 + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); 620 + xfs_scrub_agi_xref_icounts(sc); 621 + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS); 622 + xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); 623 + xfs_scrub_xref_is_not_shared(sc, agbno, 1); 624 + 625 + /* scrub teardown will take care of sc->sa for us */ 626 + } 627 + 834 628 /* Scrub the AGI. */ 835 629 int 836 630 xfs_scrub_agi( ··· 904 598 &sc->sa.agf_bp, &sc->sa.agfl_bp); 905 599 if (!xfs_scrub_process_error(sc, agno, XFS_AGI_BLOCK(sc->mp), &error)) 906 600 goto out; 601 + xfs_scrub_buffer_recheck(sc, sc->sa.agi_bp); 907 602 908 603 agi = XFS_BUF_TO_AGI(sc->sa.agi_bp); 909 604 ··· 960 653 if (agi->agi_pad32 != cpu_to_be32(0)) 961 654 xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp); 962 655 656 + xfs_scrub_agi_xref(sc); 963 657 out: 964 658 return error; 965 659 }
+81
fs/xfs/scrub/alloc.c
··· 31 31 #include "xfs_sb.h" 32 32 #include "xfs_alloc.h" 33 33 #include "xfs_rmap.h" 34 + #include "xfs_alloc.h" 34 35 #include "scrub/xfs_scrub.h" 35 36 #include "scrub/scrub.h" 36 37 #include "scrub/common.h" ··· 50 49 } 51 50 52 51 /* Free space btree scrubber. */ 52 + /* 53 + * Ensure there's a corresponding cntbt/bnobt record matching this 54 + * bnobt/cntbt record, respectively. 55 + */ 56 + STATIC void 57 + xfs_scrub_allocbt_xref_other( 58 + struct xfs_scrub_context *sc, 59 + xfs_agblock_t agbno, 60 + xfs_extlen_t len) 61 + { 62 + struct xfs_btree_cur **pcur; 63 + xfs_agblock_t fbno; 64 + xfs_extlen_t flen; 65 + int has_otherrec; 66 + int error; 67 + 68 + if (sc->sm->sm_type == XFS_SCRUB_TYPE_BNOBT) 69 + pcur = &sc->sa.cnt_cur; 70 + else 71 + pcur = &sc->sa.bno_cur; 72 + if (!*pcur) 73 + return; 74 + 75 + error = xfs_alloc_lookup_le(*pcur, agbno, len, &has_otherrec); 76 + if (!xfs_scrub_should_check_xref(sc, &error, pcur)) 77 + return; 78 + if (!has_otherrec) { 79 + xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0); 80 + return; 81 + } 82 + 83 + error = xfs_alloc_get_rec(*pcur, &fbno, &flen, &has_otherrec); 84 + if (!xfs_scrub_should_check_xref(sc, &error, pcur)) 85 + return; 86 + if (!has_otherrec) { 87 + xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0); 88 + return; 89 + } 90 + 91 + if (fbno != agbno || flen != len) 92 + xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0); 93 + } 94 + 95 + /* Cross-reference with the other btrees. */ 96 + STATIC void 97 + xfs_scrub_allocbt_xref( 98 + struct xfs_scrub_context *sc, 99 + xfs_agblock_t agbno, 100 + xfs_extlen_t len) 101 + { 102 + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 103 + return; 104 + 105 + xfs_scrub_allocbt_xref_other(sc, agbno, len); 106 + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len); 107 + xfs_scrub_xref_has_no_owner(sc, agbno, len); 108 + xfs_scrub_xref_is_not_shared(sc, agbno, len); 109 + } 53 110 54 111 /* Scrub a bnobt/cntbt record. */ 55 112 STATIC int ··· 128 69 !xfs_verify_agbno(mp, agno, bno) || 129 70 !xfs_verify_agbno(mp, agno, bno + len - 1)) 130 71 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 72 + 73 + xfs_scrub_allocbt_xref(bs->sc, bno, len); 131 74 132 75 return error; 133 76 } ··· 160 99 struct xfs_scrub_context *sc) 161 100 { 162 101 return xfs_scrub_allocbt(sc, XFS_BTNUM_CNT); 102 + } 103 + 104 + /* xref check that the extent is not free */ 105 + void 106 + xfs_scrub_xref_is_used_space( 107 + struct xfs_scrub_context *sc, 108 + xfs_agblock_t agbno, 109 + xfs_extlen_t len) 110 + { 111 + bool is_freesp; 112 + int error; 113 + 114 + if (!sc->sa.bno_cur) 115 + return; 116 + 117 + error = xfs_alloc_has_record(sc->sa.bno_cur, agbno, len, &is_freesp); 118 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.bno_cur)) 119 + return; 120 + if (is_freesp) 121 + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.bno_cur, 0); 163 122 }
+211 -8
fs/xfs/scrub/bmap.c
··· 37 37 #include "xfs_bmap_util.h" 38 38 #include "xfs_bmap_btree.h" 39 39 #include "xfs_rmap.h" 40 + #include "xfs_refcount.h" 40 41 #include "scrub/xfs_scrub.h" 41 42 #include "scrub/scrub.h" 42 43 #include "scrub/common.h" ··· 100 99 int whichfork; 101 100 }; 102 101 102 + /* Look for a corresponding rmap for this irec. */ 103 + static inline bool 104 + xfs_scrub_bmap_get_rmap( 105 + struct xfs_scrub_bmap_info *info, 106 + struct xfs_bmbt_irec *irec, 107 + xfs_agblock_t agbno, 108 + uint64_t owner, 109 + struct xfs_rmap_irec *rmap) 110 + { 111 + xfs_fileoff_t offset; 112 + unsigned int rflags = 0; 113 + int has_rmap; 114 + int error; 115 + 116 + if (info->whichfork == XFS_ATTR_FORK) 117 + rflags |= XFS_RMAP_ATTR_FORK; 118 + 119 + /* 120 + * CoW staging extents are owned (on disk) by the refcountbt, so 121 + * their rmaps do not have offsets. 122 + */ 123 + if (info->whichfork == XFS_COW_FORK) 124 + offset = 0; 125 + else 126 + offset = irec->br_startoff; 127 + 128 + /* 129 + * If the caller thinks this could be a shared bmbt extent (IOWs, 130 + * any data fork extent of a reflink inode) then we have to use the 131 + * range rmap lookup to make sure we get the correct owner/offset. 132 + */ 133 + if (info->is_shared) { 134 + error = xfs_rmap_lookup_le_range(info->sc->sa.rmap_cur, agbno, 135 + owner, offset, rflags, rmap, &has_rmap); 136 + if (!xfs_scrub_should_check_xref(info->sc, &error, 137 + &info->sc->sa.rmap_cur)) 138 + return false; 139 + goto out; 140 + } 141 + 142 + /* 143 + * Otherwise, use the (faster) regular lookup. 144 + */ 145 + error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno, 0, owner, 146 + offset, rflags, &has_rmap); 147 + if (!xfs_scrub_should_check_xref(info->sc, &error, 148 + &info->sc->sa.rmap_cur)) 149 + return false; 150 + if (!has_rmap) 151 + goto out; 152 + 153 + error = xfs_rmap_get_rec(info->sc->sa.rmap_cur, rmap, &has_rmap); 154 + if (!xfs_scrub_should_check_xref(info->sc, &error, 155 + &info->sc->sa.rmap_cur)) 156 + return false; 157 + 158 + out: 159 + if (!has_rmap) 160 + xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork, 161 + irec->br_startoff); 162 + return has_rmap; 163 + } 164 + 165 + /* Make sure that we have rmapbt records for this extent. */ 166 + STATIC void 167 + xfs_scrub_bmap_xref_rmap( 168 + struct xfs_scrub_bmap_info *info, 169 + struct xfs_bmbt_irec *irec, 170 + xfs_agblock_t agbno) 171 + { 172 + struct xfs_rmap_irec rmap; 173 + unsigned long long rmap_end; 174 + uint64_t owner; 175 + 176 + if (!info->sc->sa.rmap_cur) 177 + return; 178 + 179 + if (info->whichfork == XFS_COW_FORK) 180 + owner = XFS_RMAP_OWN_COW; 181 + else 182 + owner = info->sc->ip->i_ino; 183 + 184 + /* Find the rmap record for this irec. */ 185 + if (!xfs_scrub_bmap_get_rmap(info, irec, agbno, owner, &rmap)) 186 + return; 187 + 188 + /* Check the rmap. */ 189 + rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount; 190 + if (rmap.rm_startblock > agbno || 191 + agbno + irec->br_blockcount > rmap_end) 192 + xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork, 193 + irec->br_startoff); 194 + 195 + /* 196 + * Check the logical offsets if applicable. CoW staging extents 197 + * don't track logical offsets since the mappings only exist in 198 + * memory. 199 + */ 200 + if (info->whichfork != XFS_COW_FORK) { 201 + rmap_end = (unsigned long long)rmap.rm_offset + 202 + rmap.rm_blockcount; 203 + if (rmap.rm_offset > irec->br_startoff || 204 + irec->br_startoff + irec->br_blockcount > rmap_end) 205 + xfs_scrub_fblock_xref_set_corrupt(info->sc, 206 + info->whichfork, irec->br_startoff); 207 + } 208 + 209 + if (rmap.rm_owner != owner) 210 + xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork, 211 + irec->br_startoff); 212 + 213 + /* 214 + * Check for discrepancies between the unwritten flag in the irec and 215 + * the rmap. Note that the (in-memory) CoW fork distinguishes between 216 + * unwritten and written extents, but we don't track that in the rmap 217 + * records because the blocks are owned (on-disk) by the refcountbt, 218 + * which doesn't track unwritten state. 219 + */ 220 + if (owner != XFS_RMAP_OWN_COW && 221 + irec->br_state == XFS_EXT_UNWRITTEN && 222 + !(rmap.rm_flags & XFS_RMAP_UNWRITTEN)) 223 + xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork, 224 + irec->br_startoff); 225 + 226 + if (info->whichfork == XFS_ATTR_FORK && 227 + !(rmap.rm_flags & XFS_RMAP_ATTR_FORK)) 228 + xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork, 229 + irec->br_startoff); 230 + if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK) 231 + xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork, 232 + irec->br_startoff); 233 + } 234 + 235 + /* Cross-reference a single rtdev extent record. */ 236 + STATIC void 237 + xfs_scrub_bmap_rt_extent_xref( 238 + struct xfs_scrub_bmap_info *info, 239 + struct xfs_inode *ip, 240 + struct xfs_btree_cur *cur, 241 + struct xfs_bmbt_irec *irec) 242 + { 243 + if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 244 + return; 245 + 246 + xfs_scrub_xref_is_used_rt_space(info->sc, irec->br_startblock, 247 + irec->br_blockcount); 248 + } 249 + 250 + /* Cross-reference a single datadev extent record. */ 251 + STATIC void 252 + xfs_scrub_bmap_extent_xref( 253 + struct xfs_scrub_bmap_info *info, 254 + struct xfs_inode *ip, 255 + struct xfs_btree_cur *cur, 256 + struct xfs_bmbt_irec *irec) 257 + { 258 + struct xfs_mount *mp = info->sc->mp; 259 + xfs_agnumber_t agno; 260 + xfs_agblock_t agbno; 261 + xfs_extlen_t len; 262 + int error; 263 + 264 + if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 265 + return; 266 + 267 + agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock); 268 + agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock); 269 + len = irec->br_blockcount; 270 + 271 + error = xfs_scrub_ag_init(info->sc, agno, &info->sc->sa); 272 + if (!xfs_scrub_fblock_process_error(info->sc, info->whichfork, 273 + irec->br_startoff, &error)) 274 + return; 275 + 276 + xfs_scrub_xref_is_used_space(info->sc, agbno, len); 277 + xfs_scrub_xref_is_not_inode_chunk(info->sc, agbno, len); 278 + xfs_scrub_bmap_xref_rmap(info, irec, agbno); 279 + switch (info->whichfork) { 280 + case XFS_DATA_FORK: 281 + if (xfs_is_reflink_inode(info->sc->ip)) 282 + break; 283 + /* fall through */ 284 + case XFS_ATTR_FORK: 285 + xfs_scrub_xref_is_not_shared(info->sc, agbno, 286 + irec->br_blockcount); 287 + break; 288 + case XFS_COW_FORK: 289 + xfs_scrub_xref_is_cow_staging(info->sc, agbno, 290 + irec->br_blockcount); 291 + break; 292 + } 293 + 294 + xfs_scrub_ag_free(info->sc, &info->sc->sa); 295 + } 296 + 103 297 /* Scrub a single extent record. */ 104 298 STATIC int 105 299 xfs_scrub_bmap_extent( ··· 305 109 { 306 110 struct xfs_mount *mp = info->sc->mp; 307 111 struct xfs_buf *bp = NULL; 112 + xfs_filblks_t end; 308 113 int error = 0; 309 114 310 115 if (cur) ··· 333 136 irec->br_startoff); 334 137 335 138 /* Make sure the extent points to a valid place. */ 139 + if (irec->br_blockcount > MAXEXTLEN) 140 + xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, 141 + irec->br_startoff); 336 142 if (irec->br_startblock + irec->br_blockcount <= irec->br_startblock) 337 143 xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, 338 144 irec->br_startoff); 145 + end = irec->br_startblock + irec->br_blockcount - 1; 339 146 if (info->is_rt && 340 147 (!xfs_verify_rtbno(mp, irec->br_startblock) || 341 - !xfs_verify_rtbno(mp, irec->br_startblock + 342 - irec->br_blockcount - 1))) 148 + !xfs_verify_rtbno(mp, end))) 343 149 xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, 344 150 irec->br_startoff); 345 151 if (!info->is_rt && 346 152 (!xfs_verify_fsbno(mp, irec->br_startblock) || 347 - !xfs_verify_fsbno(mp, irec->br_startblock + 348 - irec->br_blockcount - 1))) 153 + !xfs_verify_fsbno(mp, end) || 154 + XFS_FSB_TO_AGNO(mp, irec->br_startblock) != 155 + XFS_FSB_TO_AGNO(mp, end))) 349 156 xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, 350 157 irec->br_startoff); 351 158 ··· 358 157 info->whichfork == XFS_ATTR_FORK) 359 158 xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, 360 159 irec->br_startoff); 160 + 161 + if (info->is_rt) 162 + xfs_scrub_bmap_rt_extent_xref(info, ip, cur, irec); 163 + else 164 + xfs_scrub_bmap_extent_xref(info, ip, cur, irec); 361 165 362 166 info->lastoff = irec->br_startoff + irec->br_blockcount; 363 167 return error; ··· 441 235 struct xfs_ifork *ifp; 442 236 xfs_fileoff_t endoff; 443 237 struct xfs_iext_cursor icur; 444 - bool found; 445 238 int error = 0; 446 239 447 240 ifp = XFS_IFORK_PTR(ip, whichfork); ··· 519 314 /* Scrub extent records. */ 520 315 info.lastoff = 0; 521 316 ifp = XFS_IFORK_PTR(ip, whichfork); 522 - for (found = xfs_iext_lookup_extent(ip, ifp, 0, &icur, &irec); 523 - found != 0; 524 - found = xfs_iext_next_extent(ifp, &icur, &irec)) { 317 + for_each_xfs_iext(ifp, &icur, &irec) { 525 318 if (xfs_scrub_should_terminate(sc, &error)) 526 319 break; 527 320 if (isnullstartblock(irec.br_startblock))
+173 -13
fs/xfs/scrub/btree.c
··· 42 42 * Check for btree operation errors. See the section about handling 43 43 * operational errors in common.c. 44 44 */ 45 - bool 46 - xfs_scrub_btree_process_error( 45 + static bool 46 + __xfs_scrub_btree_process_error( 47 47 struct xfs_scrub_context *sc, 48 48 struct xfs_btree_cur *cur, 49 49 int level, 50 - int *error) 50 + int *error, 51 + __u32 errflag, 52 + void *ret_ip) 51 53 { 52 54 if (*error == 0) 53 55 return true; ··· 62 60 case -EFSBADCRC: 63 61 case -EFSCORRUPTED: 64 62 /* Note the badness but don't abort. */ 65 - sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 63 + sc->sm->sm_flags |= errflag; 66 64 *error = 0; 67 65 /* fall through */ 68 66 default: 69 67 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) 70 68 trace_xfs_scrub_ifork_btree_op_error(sc, cur, level, 71 - *error, __return_address); 69 + *error, ret_ip); 72 70 else 73 71 trace_xfs_scrub_btree_op_error(sc, cur, level, 74 - *error, __return_address); 72 + *error, ret_ip); 75 73 break; 76 74 } 77 75 return false; 78 76 } 79 77 78 + bool 79 + xfs_scrub_btree_process_error( 80 + struct xfs_scrub_context *sc, 81 + struct xfs_btree_cur *cur, 82 + int level, 83 + int *error) 84 + { 85 + return __xfs_scrub_btree_process_error(sc, cur, level, error, 86 + XFS_SCRUB_OFLAG_CORRUPT, __return_address); 87 + } 88 + 89 + bool 90 + xfs_scrub_btree_xref_process_error( 91 + struct xfs_scrub_context *sc, 92 + struct xfs_btree_cur *cur, 93 + int level, 94 + int *error) 95 + { 96 + return __xfs_scrub_btree_process_error(sc, cur, level, error, 97 + XFS_SCRUB_OFLAG_XFAIL, __return_address); 98 + } 99 + 80 100 /* Record btree block corruption. */ 101 + static void 102 + __xfs_scrub_btree_set_corrupt( 103 + struct xfs_scrub_context *sc, 104 + struct xfs_btree_cur *cur, 105 + int level, 106 + __u32 errflag, 107 + void *ret_ip) 108 + { 109 + sc->sm->sm_flags |= errflag; 110 + 111 + if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) 112 + trace_xfs_scrub_ifork_btree_error(sc, cur, level, 113 + ret_ip); 114 + else 115 + trace_xfs_scrub_btree_error(sc, cur, level, 116 + ret_ip); 117 + } 118 + 81 119 void 82 120 xfs_scrub_btree_set_corrupt( 83 121 struct xfs_scrub_context *sc, 84 122 struct xfs_btree_cur *cur, 85 123 int level) 86 124 { 87 - sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 125 + __xfs_scrub_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_CORRUPT, 126 + __return_address); 127 + } 88 128 89 - if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) 90 - trace_xfs_scrub_ifork_btree_error(sc, cur, level, 91 - __return_address); 92 - else 93 - trace_xfs_scrub_btree_error(sc, cur, level, 94 - __return_address); 129 + void 130 + xfs_scrub_btree_xref_set_corrupt( 131 + struct xfs_scrub_context *sc, 132 + struct xfs_btree_cur *cur, 133 + int level) 134 + { 135 + __xfs_scrub_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_XCORRUPT, 136 + __return_address); 95 137 } 96 138 97 139 /* ··· 314 268 pp = xfs_btree_ptr_addr(ncur, ncur->bc_ptrs[level + 1], pblock); 315 269 if (!xfs_scrub_btree_ptr_ok(bs, level + 1, pp)) 316 270 goto out; 271 + if (pbp) 272 + xfs_scrub_buffer_recheck(bs->sc, pbp); 317 273 318 274 if (xfs_btree_diff_two_ptrs(cur, pp, sibling)) 319 275 xfs_scrub_btree_set_corrupt(bs->sc, cur, level); ··· 363 315 return error; 364 316 } 365 317 318 + struct check_owner { 319 + struct list_head list; 320 + xfs_daddr_t daddr; 321 + int level; 322 + }; 323 + 324 + /* 325 + * Make sure this btree block isn't in the free list and that there's 326 + * an rmap record for it. 327 + */ 328 + STATIC int 329 + xfs_scrub_btree_check_block_owner( 330 + struct xfs_scrub_btree *bs, 331 + int level, 332 + xfs_daddr_t daddr) 333 + { 334 + xfs_agnumber_t agno; 335 + xfs_agblock_t agbno; 336 + xfs_btnum_t btnum; 337 + bool init_sa; 338 + int error = 0; 339 + 340 + if (!bs->cur) 341 + return 0; 342 + 343 + btnum = bs->cur->bc_btnum; 344 + agno = xfs_daddr_to_agno(bs->cur->bc_mp, daddr); 345 + agbno = xfs_daddr_to_agbno(bs->cur->bc_mp, daddr); 346 + 347 + init_sa = bs->cur->bc_flags & XFS_BTREE_LONG_PTRS; 348 + if (init_sa) { 349 + error = xfs_scrub_ag_init(bs->sc, agno, &bs->sc->sa); 350 + if (!xfs_scrub_btree_xref_process_error(bs->sc, bs->cur, 351 + level, &error)) 352 + return error; 353 + } 354 + 355 + xfs_scrub_xref_is_used_space(bs->sc, agbno, 1); 356 + /* 357 + * The bnobt scrubber aliases bs->cur to bs->sc->sa.bno_cur, so we 358 + * have to nullify it (to shut down further block owner checks) if 359 + * self-xref encounters problems. 360 + */ 361 + if (!bs->sc->sa.bno_cur && btnum == XFS_BTNUM_BNO) 362 + bs->cur = NULL; 363 + 364 + xfs_scrub_xref_is_owned_by(bs->sc, agbno, 1, bs->oinfo); 365 + if (!bs->sc->sa.rmap_cur && btnum == XFS_BTNUM_RMAP) 366 + bs->cur = NULL; 367 + 368 + if (init_sa) 369 + xfs_scrub_ag_free(bs->sc, &bs->sc->sa); 370 + 371 + return error; 372 + } 373 + 374 + /* Check the owner of a btree block. */ 375 + STATIC int 376 + xfs_scrub_btree_check_owner( 377 + struct xfs_scrub_btree *bs, 378 + int level, 379 + struct xfs_buf *bp) 380 + { 381 + struct xfs_btree_cur *cur = bs->cur; 382 + struct check_owner *co; 383 + 384 + if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && bp == NULL) 385 + return 0; 386 + 387 + /* 388 + * We want to cross-reference each btree block with the bnobt 389 + * and the rmapbt. We cannot cross-reference the bnobt or 390 + * rmapbt while scanning the bnobt or rmapbt, respectively, 391 + * because we cannot alter the cursor and we'd prefer not to 392 + * duplicate cursors. Therefore, save the buffer daddr for 393 + * later scanning. 394 + */ 395 + if (cur->bc_btnum == XFS_BTNUM_BNO || cur->bc_btnum == XFS_BTNUM_RMAP) { 396 + co = kmem_alloc(sizeof(struct check_owner), 397 + KM_MAYFAIL | KM_NOFS); 398 + if (!co) 399 + return -ENOMEM; 400 + co->level = level; 401 + co->daddr = XFS_BUF_ADDR(bp); 402 + list_add_tail(&co->list, &bs->to_check); 403 + return 0; 404 + } 405 + 406 + return xfs_scrub_btree_check_block_owner(bs, level, XFS_BUF_ADDR(bp)); 407 + } 408 + 366 409 /* 367 410 * Grab and scrub a btree block given a btree pointer. Returns block 368 411 * and buffer pointers (if applicable) if they're ok to use. ··· 488 349 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, level); 489 350 return 0; 490 351 } 352 + if (*pbp) 353 + xfs_scrub_buffer_recheck(bs->sc, *pbp); 354 + 355 + /* 356 + * Check the block's owner; this function absorbs error codes 357 + * for us. 358 + */ 359 + error = xfs_scrub_btree_check_owner(bs, level, *pbp); 360 + if (error) 361 + return error; 491 362 492 363 /* 493 364 * Check the block's siblings; this function absorbs error codes ··· 570 421 struct xfs_btree_block *block; 571 422 int level; 572 423 struct xfs_buf *bp; 424 + struct check_owner *co; 425 + struct check_owner *n; 573 426 int i; 574 427 int error = 0; 575 428 ··· 663 512 } 664 513 665 514 out: 515 + /* Process deferred owner checks on btree blocks. */ 516 + list_for_each_entry_safe(co, n, &bs.to_check, list) { 517 + if (!error && bs.cur) 518 + error = xfs_scrub_btree_check_block_owner(&bs, 519 + co->level, co->daddr); 520 + list_del(&co->list); 521 + kmem_free(co); 522 + } 523 + 666 524 return error; 667 525 }
+9
fs/xfs/scrub/btree.h
··· 26 26 bool xfs_scrub_btree_process_error(struct xfs_scrub_context *sc, 27 27 struct xfs_btree_cur *cur, int level, int *error); 28 28 29 + /* Check for btree xref operation errors. */ 30 + bool xfs_scrub_btree_xref_process_error(struct xfs_scrub_context *sc, 31 + struct xfs_btree_cur *cur, int level, 32 + int *error); 33 + 29 34 /* Check for btree corruption. */ 30 35 void xfs_scrub_btree_set_corrupt(struct xfs_scrub_context *sc, 36 + struct xfs_btree_cur *cur, int level); 37 + 38 + /* Check for btree xref discrepancies. */ 39 + void xfs_scrub_btree_xref_set_corrupt(struct xfs_scrub_context *sc, 31 40 struct xfs_btree_cur *cur, int level); 32 41 33 42 struct xfs_scrub_btree;
+245 -38
fs/xfs/scrub/common.c
··· 78 78 */ 79 79 80 80 /* Check for operational errors. */ 81 + static bool 82 + __xfs_scrub_process_error( 83 + struct xfs_scrub_context *sc, 84 + xfs_agnumber_t agno, 85 + xfs_agblock_t bno, 86 + int *error, 87 + __u32 errflag, 88 + void *ret_ip) 89 + { 90 + switch (*error) { 91 + case 0: 92 + return true; 93 + case -EDEADLOCK: 94 + /* Used to restart an op with deadlock avoidance. */ 95 + trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error); 96 + break; 97 + case -EFSBADCRC: 98 + case -EFSCORRUPTED: 99 + /* Note the badness but don't abort. */ 100 + sc->sm->sm_flags |= errflag; 101 + *error = 0; 102 + /* fall through */ 103 + default: 104 + trace_xfs_scrub_op_error(sc, agno, bno, *error, 105 + ret_ip); 106 + break; 107 + } 108 + return false; 109 + } 110 + 81 111 bool 82 112 xfs_scrub_process_error( 83 113 struct xfs_scrub_context *sc, ··· 115 85 xfs_agblock_t bno, 116 86 int *error) 117 87 { 118 - switch (*error) { 119 - case 0: 120 - return true; 121 - case -EDEADLOCK: 122 - /* Used to restart an op with deadlock avoidance. */ 123 - trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error); 124 - break; 125 - case -EFSBADCRC: 126 - case -EFSCORRUPTED: 127 - /* Note the badness but don't abort. */ 128 - sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 129 - *error = 0; 130 - /* fall through */ 131 - default: 132 - trace_xfs_scrub_op_error(sc, agno, bno, *error, 133 - __return_address); 134 - break; 135 - } 136 - return false; 88 + return __xfs_scrub_process_error(sc, agno, bno, error, 89 + XFS_SCRUB_OFLAG_CORRUPT, __return_address); 90 + } 91 + 92 + bool 93 + xfs_scrub_xref_process_error( 94 + struct xfs_scrub_context *sc, 95 + xfs_agnumber_t agno, 96 + xfs_agblock_t bno, 97 + int *error) 98 + { 99 + return __xfs_scrub_process_error(sc, agno, bno, error, 100 + XFS_SCRUB_OFLAG_XFAIL, __return_address); 137 101 } 138 102 139 103 /* Check for operational errors for a file offset. */ 140 - bool 141 - xfs_scrub_fblock_process_error( 104 + static bool 105 + __xfs_scrub_fblock_process_error( 142 106 struct xfs_scrub_context *sc, 143 107 int whichfork, 144 108 xfs_fileoff_t offset, 145 - int *error) 109 + int *error, 110 + __u32 errflag, 111 + void *ret_ip) 146 112 { 147 113 switch (*error) { 148 114 case 0: ··· 150 124 case -EFSBADCRC: 151 125 case -EFSCORRUPTED: 152 126 /* Note the badness but don't abort. */ 153 - sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 127 + sc->sm->sm_flags |= errflag; 154 128 *error = 0; 155 129 /* fall through */ 156 130 default: 157 131 trace_xfs_scrub_file_op_error(sc, whichfork, offset, *error, 158 - __return_address); 132 + ret_ip); 159 133 break; 160 134 } 161 135 return false; 136 + } 137 + 138 + bool 139 + xfs_scrub_fblock_process_error( 140 + struct xfs_scrub_context *sc, 141 + int whichfork, 142 + xfs_fileoff_t offset, 143 + int *error) 144 + { 145 + return __xfs_scrub_fblock_process_error(sc, whichfork, offset, error, 146 + XFS_SCRUB_OFLAG_CORRUPT, __return_address); 147 + } 148 + 149 + bool 150 + xfs_scrub_fblock_xref_process_error( 151 + struct xfs_scrub_context *sc, 152 + int whichfork, 153 + xfs_fileoff_t offset, 154 + int *error) 155 + { 156 + return __xfs_scrub_fblock_process_error(sc, whichfork, offset, error, 157 + XFS_SCRUB_OFLAG_XFAIL, __return_address); 162 158 } 163 159 164 160 /* ··· 231 183 trace_xfs_scrub_block_error(sc, bp->b_bn, __return_address); 232 184 } 233 185 186 + /* Record a corruption while cross-referencing. */ 187 + void 188 + xfs_scrub_block_xref_set_corrupt( 189 + struct xfs_scrub_context *sc, 190 + struct xfs_buf *bp) 191 + { 192 + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT; 193 + trace_xfs_scrub_block_error(sc, bp->b_bn, __return_address); 194 + } 195 + 234 196 /* 235 197 * Record a corrupt inode. The trace data will include the block given 236 198 * by bp if bp is given; otherwise it will use the block location of the ··· 256 198 trace_xfs_scrub_ino_error(sc, ino, bp ? bp->b_bn : 0, __return_address); 257 199 } 258 200 201 + /* Record a corruption while cross-referencing with an inode. */ 202 + void 203 + xfs_scrub_ino_xref_set_corrupt( 204 + struct xfs_scrub_context *sc, 205 + xfs_ino_t ino, 206 + struct xfs_buf *bp) 207 + { 208 + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT; 209 + trace_xfs_scrub_ino_error(sc, ino, bp ? bp->b_bn : 0, __return_address); 210 + } 211 + 259 212 /* Record corruption in a block indexed by a file fork. */ 260 213 void 261 214 xfs_scrub_fblock_set_corrupt( ··· 275 206 xfs_fileoff_t offset) 276 207 { 277 208 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 209 + trace_xfs_scrub_fblock_error(sc, whichfork, offset, __return_address); 210 + } 211 + 212 + /* Record a corruption while cross-referencing a fork block. */ 213 + void 214 + xfs_scrub_fblock_xref_set_corrupt( 215 + struct xfs_scrub_context *sc, 216 + int whichfork, 217 + xfs_fileoff_t offset) 218 + { 219 + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT; 278 220 trace_xfs_scrub_fblock_error(sc, whichfork, offset, __return_address); 279 221 } 280 222 ··· 322 242 { 323 243 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_INCOMPLETE; 324 244 trace_xfs_scrub_incomplete(sc, __return_address); 245 + } 246 + 247 + /* 248 + * rmap scrubbing -- compute the number of blocks with a given owner, 249 + * at least according to the reverse mapping data. 250 + */ 251 + 252 + struct xfs_scrub_rmap_ownedby_info { 253 + struct xfs_owner_info *oinfo; 254 + xfs_filblks_t *blocks; 255 + }; 256 + 257 + STATIC int 258 + xfs_scrub_count_rmap_ownedby_irec( 259 + struct xfs_btree_cur *cur, 260 + struct xfs_rmap_irec *rec, 261 + void *priv) 262 + { 263 + struct xfs_scrub_rmap_ownedby_info *sroi = priv; 264 + bool irec_attr; 265 + bool oinfo_attr; 266 + 267 + irec_attr = rec->rm_flags & XFS_RMAP_ATTR_FORK; 268 + oinfo_attr = sroi->oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK; 269 + 270 + if (rec->rm_owner != sroi->oinfo->oi_owner) 271 + return 0; 272 + 273 + if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) || irec_attr == oinfo_attr) 274 + (*sroi->blocks) += rec->rm_blockcount; 275 + 276 + return 0; 277 + } 278 + 279 + /* 280 + * Calculate the number of blocks the rmap thinks are owned by something. 281 + * The caller should pass us an rmapbt cursor. 282 + */ 283 + int 284 + xfs_scrub_count_rmap_ownedby_ag( 285 + struct xfs_scrub_context *sc, 286 + struct xfs_btree_cur *cur, 287 + struct xfs_owner_info *oinfo, 288 + xfs_filblks_t *blocks) 289 + { 290 + struct xfs_scrub_rmap_ownedby_info sroi; 291 + 292 + sroi.oinfo = oinfo; 293 + *blocks = 0; 294 + sroi.blocks = blocks; 295 + 296 + return xfs_rmap_query_all(cur, xfs_scrub_count_rmap_ownedby_irec, 297 + &sroi); 325 298 } 326 299 327 300 /* ··· 435 302 error = xfs_alloc_read_agfl(mp, sc->tp, agno, agfl); 436 303 if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGFL)) 437 304 goto out; 438 - 305 + error = 0; 439 306 out: 440 307 return error; 441 308 } ··· 605 472 return error; 606 473 } 607 474 608 - error = xfs_scrub_setup_ag_header(sc, ip); 475 + error = xfs_scrub_setup_fs(sc, ip); 609 476 if (error) 610 477 return error; 611 478 ··· 636 503 struct xfs_scrub_context *sc, 637 504 struct xfs_inode *ip_in) 638 505 { 506 + struct xfs_imap imap; 639 507 struct xfs_mount *mp = sc->mp; 640 508 struct xfs_inode *ip = NULL; 641 509 int error; 642 - 643 - /* 644 - * If userspace passed us an AG number or a generation number 645 - * without an inode number, they haven't got a clue so bail out 646 - * immediately. 647 - */ 648 - if (sc->sm->sm_agno || (sc->sm->sm_gen && !sc->sm->sm_ino)) 649 - return -EINVAL; 650 510 651 511 /* We want to scan the inode we already had opened. */ 652 512 if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) { ··· 652 526 return -ENOENT; 653 527 error = xfs_iget(mp, NULL, sc->sm->sm_ino, 654 528 XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE, 0, &ip); 655 - if (error == -ENOENT || error == -EINVAL) { 656 - /* inode doesn't exist... */ 657 - return -ENOENT; 658 - } else if (error) { 529 + switch (error) { 530 + case -ENOENT: 531 + /* Inode doesn't exist, just bail out. */ 532 + return error; 533 + case 0: 534 + /* Got an inode, continue. */ 535 + break; 536 + case -EINVAL: 537 + /* 538 + * -EINVAL with IGET_UNTRUSTED could mean one of several 539 + * things: userspace gave us an inode number that doesn't 540 + * correspond to fs space, or doesn't have an inobt entry; 541 + * or it could simply mean that the inode buffer failed the 542 + * read verifiers. 543 + * 544 + * Try just the inode mapping lookup -- if it succeeds, then 545 + * the inode buffer verifier failed and something needs fixing. 546 + * Otherwise, we really couldn't find it so tell userspace 547 + * that it no longer exists. 548 + */ 549 + error = xfs_imap(sc->mp, sc->tp, sc->sm->sm_ino, &imap, 550 + XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE); 551 + if (error) 552 + return -ENOENT; 553 + error = -EFSCORRUPTED; 554 + /* fall through */ 555 + default: 659 556 trace_xfs_scrub_op_error(sc, 660 557 XFS_INO_TO_AGNO(mp, sc->sm->sm_ino), 661 558 XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino), ··· 720 571 out: 721 572 /* scrub teardown will unlock and release the inode for us */ 722 573 return error; 574 + } 575 + 576 + /* 577 + * Predicate that decides if we need to evaluate the cross-reference check. 578 + * If there was an error accessing the cross-reference btree, just delete 579 + * the cursor and skip the check. 580 + */ 581 + bool 582 + xfs_scrub_should_check_xref( 583 + struct xfs_scrub_context *sc, 584 + int *error, 585 + struct xfs_btree_cur **curpp) 586 + { 587 + if (*error == 0) 588 + return true; 589 + 590 + if (curpp) { 591 + /* If we've already given up on xref, just bail out. */ 592 + if (!*curpp) 593 + return false; 594 + 595 + /* xref error, delete cursor and bail out. */ 596 + xfs_btree_del_cursor(*curpp, XFS_BTREE_ERROR); 597 + *curpp = NULL; 598 + } 599 + 600 + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XFAIL; 601 + trace_xfs_scrub_xref_error(sc, *error, __return_address); 602 + 603 + /* 604 + * Errors encountered during cross-referencing with another 605 + * data structure should not cause this scrubber to abort. 606 + */ 607 + *error = 0; 608 + return false; 609 + } 610 + 611 + /* Run the structure verifiers on in-memory buffers to detect bad memory. */ 612 + void 613 + xfs_scrub_buffer_recheck( 614 + struct xfs_scrub_context *sc, 615 + struct xfs_buf *bp) 616 + { 617 + xfs_failaddr_t fa; 618 + 619 + if (bp->b_ops == NULL) { 620 + xfs_scrub_block_set_corrupt(sc, bp); 621 + return; 622 + } 623 + if (bp->b_ops->verify_struct == NULL) { 624 + xfs_scrub_set_incomplete(sc); 625 + return; 626 + } 627 + fa = bp->b_ops->verify_struct(bp); 628 + if (!fa) 629 + return; 630 + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 631 + trace_xfs_scrub_block_error(sc, bp->b_bn, fa); 723 632 }
+21 -2
fs/xfs/scrub/common.h
··· 56 56 bool xfs_scrub_fblock_process_error(struct xfs_scrub_context *sc, int whichfork, 57 57 xfs_fileoff_t offset, int *error); 58 58 59 + bool xfs_scrub_xref_process_error(struct xfs_scrub_context *sc, 60 + xfs_agnumber_t agno, xfs_agblock_t bno, int *error); 61 + bool xfs_scrub_fblock_xref_process_error(struct xfs_scrub_context *sc, 62 + int whichfork, xfs_fileoff_t offset, int *error); 63 + 59 64 void xfs_scrub_block_set_preen(struct xfs_scrub_context *sc, 60 65 struct xfs_buf *bp); 61 66 void xfs_scrub_ino_set_preen(struct xfs_scrub_context *sc, xfs_ino_t ino, ··· 73 68 void xfs_scrub_fblock_set_corrupt(struct xfs_scrub_context *sc, int whichfork, 74 69 xfs_fileoff_t offset); 75 70 71 + void xfs_scrub_block_xref_set_corrupt(struct xfs_scrub_context *sc, 72 + struct xfs_buf *bp); 73 + void xfs_scrub_ino_xref_set_corrupt(struct xfs_scrub_context *sc, xfs_ino_t ino, 74 + struct xfs_buf *bp); 75 + void xfs_scrub_fblock_xref_set_corrupt(struct xfs_scrub_context *sc, 76 + int whichfork, xfs_fileoff_t offset); 77 + 76 78 void xfs_scrub_ino_set_warning(struct xfs_scrub_context *sc, xfs_ino_t ino, 77 79 struct xfs_buf *bp); 78 80 void xfs_scrub_fblock_set_warning(struct xfs_scrub_context *sc, int whichfork, ··· 88 76 void xfs_scrub_set_incomplete(struct xfs_scrub_context *sc); 89 77 int xfs_scrub_checkpoint_log(struct xfs_mount *mp); 90 78 79 + /* Are we set up for a cross-referencing check? */ 80 + bool xfs_scrub_should_check_xref(struct xfs_scrub_context *sc, int *error, 81 + struct xfs_btree_cur **curpp); 82 + 91 83 /* Setup functions */ 92 84 int xfs_scrub_setup_fs(struct xfs_scrub_context *sc, struct xfs_inode *ip); 93 - int xfs_scrub_setup_ag_header(struct xfs_scrub_context *sc, 94 - struct xfs_inode *ip); 95 85 int xfs_scrub_setup_ag_allocbt(struct xfs_scrub_context *sc, 96 86 struct xfs_inode *ip); 97 87 int xfs_scrub_setup_ag_iallocbt(struct xfs_scrub_context *sc, ··· 148 134 int (*fn)(struct xfs_scrub_context *, xfs_agblock_t bno, 149 135 void *), 150 136 void *priv); 137 + int xfs_scrub_count_rmap_ownedby_ag(struct xfs_scrub_context *sc, 138 + struct xfs_btree_cur *cur, 139 + struct xfs_owner_info *oinfo, 140 + xfs_filblks_t *blocks); 151 141 152 142 int xfs_scrub_setup_ag_btree(struct xfs_scrub_context *sc, 153 143 struct xfs_inode *ip, bool force_log); 154 144 int xfs_scrub_get_inode(struct xfs_scrub_context *sc, struct xfs_inode *ip_in); 155 145 int xfs_scrub_setup_inode_contents(struct xfs_scrub_context *sc, 156 146 struct xfs_inode *ip, unsigned int resblks); 147 + void xfs_scrub_buffer_recheck(struct xfs_scrub_context *sc, struct xfs_buf *bp); 157 148 158 149 #endif /* __XFS_SCRUB_COMMON_H__ */
+22
fs/xfs/scrub/dabtree.c
··· 233 233 return; 234 234 } 235 235 } 236 + static void * 237 + xfs_scrub_da_btree_verify( 238 + struct xfs_buf *bp) 239 + { 240 + struct xfs_da_blkinfo *info = bp->b_addr; 241 + 242 + switch (be16_to_cpu(info->magic)) { 243 + case XFS_DIR2_LEAF1_MAGIC: 244 + case XFS_DIR3_LEAF1_MAGIC: 245 + bp->b_ops = &xfs_dir3_leaf1_buf_ops; 246 + return bp->b_ops->verify_struct(bp); 247 + default: 248 + bp->b_ops = &xfs_da3_node_buf_ops; 249 + return bp->b_ops->verify_struct(bp); 250 + } 251 + } 236 252 237 253 static const struct xfs_buf_ops xfs_scrub_da_btree_buf_ops = { 238 254 .name = "xfs_scrub_da_btree", 239 255 .verify_read = xfs_scrub_da_btree_read_verify, 240 256 .verify_write = xfs_scrub_da_btree_write_verify, 257 + .verify_struct = xfs_scrub_da_btree_verify, 241 258 }; 242 259 243 260 /* Check a block's sibling. */ ··· 293 276 xfs_scrub_da_set_corrupt(ds, level); 294 277 return error; 295 278 } 279 + if (ds->state->altpath.blk[level].bp) 280 + xfs_scrub_buffer_recheck(ds->sc, 281 + ds->state->altpath.blk[level].bp); 296 282 297 283 /* Compare upper level pointer to sibling pointer. */ 298 284 if (ds->state->altpath.blk[level].blkno != sibling) ··· 378 358 &xfs_scrub_da_btree_buf_ops); 379 359 if (!xfs_scrub_da_process_error(ds, level, &error)) 380 360 goto out_nobuf; 361 + if (blk->bp) 362 + xfs_scrub_buffer_recheck(ds->sc, blk->bp); 381 363 382 364 /* 383 365 * We didn't find a dir btree root block, which means that
+35 -9
fs/xfs/scrub/dir.c
··· 92 92 * inodes can trigger immediate inactive cleanup of the inode. 93 93 */ 94 94 error = xfs_iget(mp, sdc->sc->tp, inum, 0, 0, &ip); 95 - if (!xfs_scrub_fblock_process_error(sdc->sc, XFS_DATA_FORK, offset, 95 + if (!xfs_scrub_fblock_xref_process_error(sdc->sc, XFS_DATA_FORK, offset, 96 96 &error)) 97 97 goto out; 98 98 ··· 200 200 struct xfs_inode *dp = ds->dargs.dp; 201 201 struct xfs_dir2_data_entry *dent; 202 202 struct xfs_buf *bp; 203 + char *p, *endp; 203 204 xfs_ino_t ino; 204 205 xfs_dablk_t rec_bno; 205 206 xfs_dir2_db_t db; ··· 238 237 xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); 239 238 goto out; 240 239 } 240 + xfs_scrub_buffer_recheck(ds->sc, bp); 241 + 242 + dent = (struct xfs_dir2_data_entry *)(((char *)bp->b_addr) + off); 243 + 244 + /* Make sure we got a real directory entry. */ 245 + p = (char *)mp->m_dir_inode_ops->data_entry_p(bp->b_addr); 246 + endp = xfs_dir3_data_endp(mp->m_dir_geo, bp->b_addr); 247 + if (!endp) { 248 + xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); 249 + goto out_relse; 250 + } 251 + while (p < endp) { 252 + struct xfs_dir2_data_entry *dep; 253 + struct xfs_dir2_data_unused *dup; 254 + 255 + dup = (struct xfs_dir2_data_unused *)p; 256 + if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { 257 + p += be16_to_cpu(dup->length); 258 + continue; 259 + } 260 + dep = (struct xfs_dir2_data_entry *)p; 261 + if (dep == dent) 262 + break; 263 + p += mp->m_dir_inode_ops->data_entsize(dep->namelen); 264 + } 265 + if (p >= endp) { 266 + xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); 267 + goto out_relse; 268 + } 241 269 242 270 /* Retrieve the entry, sanity check it, and compare hashes. */ 243 - dent = (struct xfs_dir2_data_entry *)(((char *)bp->b_addr) + off); 244 271 ino = be64_to_cpu(dent->inumber); 245 272 hash = be32_to_cpu(ent->hashval); 246 273 tag = be16_to_cpup(dp->d_ops->data_entry_tag_p(dent)); ··· 353 324 } 354 325 if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) 355 326 goto out; 327 + xfs_scrub_buffer_recheck(sc, bp); 356 328 357 329 /* XXX: Check xfs_dir3_data_hdr.pad is zero once we start setting it. */ 358 330 ··· 391 361 392 362 /* Make sure the bestfrees are actually the best free spaces. */ 393 363 ptr = (char *)d_ops->data_entry_p(bp->b_addr); 394 - if (is_block) { 395 - struct xfs_dir2_block_tail *btp; 396 - 397 - btp = xfs_dir2_block_tail_p(mp->m_dir_geo, bp->b_addr); 398 - endptr = (char *)xfs_dir2_block_leaf_p(btp); 399 - } else 400 - endptr = (char *)bp->b_addr + BBTOB(bp->b_length); 364 + endptr = xfs_dir3_data_endp(mp->m_dir_geo, bp->b_addr); 401 365 402 366 /* Iterate the entries, stopping when we hit or go past the end. */ 403 367 while (ptr < endptr) { ··· 498 474 error = xfs_dir3_leaf_read(sc->tp, sc->ip, lblk, -1, &bp); 499 475 if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) 500 476 goto out; 477 + xfs_scrub_buffer_recheck(sc, bp); 501 478 502 479 leaf = bp->b_addr; 503 480 d_ops->leaf_hdr_from_disk(&leafhdr, leaf); ··· 584 559 error = xfs_dir2_free_read(sc->tp, sc->ip, lblk, &bp); 585 560 if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) 586 561 goto out; 562 + xfs_scrub_buffer_recheck(sc, bp); 587 563 588 564 if (xfs_sb_version_hascrc(&sc->mp->m_sb)) { 589 565 struct xfs_dir3_free_hdr *hdr3 = bp->b_addr;
+192 -2
fs/xfs/scrub/ialloc.c
··· 58 58 59 59 /* Inode btree scrubber. */ 60 60 61 + /* 62 + * If we're checking the finobt, cross-reference with the inobt. 63 + * Otherwise we're checking the inobt; if there is an finobt, make sure 64 + * we have a record or not depending on freecount. 65 + */ 66 + static inline void 67 + xfs_scrub_iallocbt_chunk_xref_other( 68 + struct xfs_scrub_context *sc, 69 + struct xfs_inobt_rec_incore *irec, 70 + xfs_agino_t agino) 71 + { 72 + struct xfs_btree_cur **pcur; 73 + bool has_irec; 74 + int error; 75 + 76 + if (sc->sm->sm_type == XFS_SCRUB_TYPE_FINOBT) 77 + pcur = &sc->sa.ino_cur; 78 + else 79 + pcur = &sc->sa.fino_cur; 80 + if (!(*pcur)) 81 + return; 82 + error = xfs_ialloc_has_inode_record(*pcur, agino, agino, &has_irec); 83 + if (!xfs_scrub_should_check_xref(sc, &error, pcur)) 84 + return; 85 + if (((irec->ir_freecount > 0 && !has_irec) || 86 + (irec->ir_freecount == 0 && has_irec))) 87 + xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0); 88 + } 89 + 90 + /* Cross-reference with the other btrees. */ 91 + STATIC void 92 + xfs_scrub_iallocbt_chunk_xref( 93 + struct xfs_scrub_context *sc, 94 + struct xfs_inobt_rec_incore *irec, 95 + xfs_agino_t agino, 96 + xfs_agblock_t agbno, 97 + xfs_extlen_t len) 98 + { 99 + struct xfs_owner_info oinfo; 100 + 101 + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 102 + return; 103 + 104 + xfs_scrub_xref_is_used_space(sc, agbno, len); 105 + xfs_scrub_iallocbt_chunk_xref_other(sc, irec, agino); 106 + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES); 107 + xfs_scrub_xref_is_owned_by(sc, agbno, len, &oinfo); 108 + xfs_scrub_xref_is_not_shared(sc, agbno, len); 109 + } 110 + 61 111 /* Is this chunk worth checking? */ 62 112 STATIC bool 63 113 xfs_scrub_iallocbt_chunk( ··· 125 75 !xfs_verify_agbno(mp, agno, bno) || 126 76 !xfs_verify_agbno(mp, agno, bno + len - 1)) 127 77 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 78 + 79 + xfs_scrub_iallocbt_chunk_xref(bs->sc, irec, agino, bno, len); 128 80 129 81 return true; 130 82 } ··· 242 190 } 243 191 244 192 /* If any part of this is a hole, skip it. */ 245 - if (ir_holemask) 193 + if (ir_holemask) { 194 + xfs_scrub_xref_is_not_owned_by(bs->sc, agbno, 195 + blks_per_cluster, &oinfo); 246 196 continue; 197 + } 198 + 199 + xfs_scrub_xref_is_owned_by(bs->sc, agbno, blks_per_cluster, 200 + &oinfo); 247 201 248 202 /* Grab the inode cluster buffer. */ 249 203 imap.im_blkno = XFS_AGB_TO_DADDR(mp, bs->cur->bc_private.a.agno, ··· 285 227 union xfs_btree_rec *rec) 286 228 { 287 229 struct xfs_mount *mp = bs->cur->bc_mp; 230 + xfs_filblks_t *inode_blocks = bs->private; 288 231 struct xfs_inobt_rec_incore irec; 289 232 uint64_t holes; 290 233 xfs_agnumber_t agno = bs->cur->bc_private.a.agno; ··· 322 263 if ((agbno & (xfs_ialloc_cluster_alignment(mp) - 1)) || 323 264 (agbno & (xfs_icluster_size_fsb(mp) - 1))) 324 265 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 266 + 267 + *inode_blocks += XFS_B_TO_FSB(mp, 268 + irec.ir_count * mp->m_sb.sb_inodesize); 325 269 326 270 /* Handle non-sparse inodes */ 327 271 if (!xfs_inobt_issparse(irec.ir_holemask)) { ··· 370 308 return error; 371 309 } 372 310 311 + /* 312 + * Make sure the inode btrees are as large as the rmap thinks they are. 313 + * Don't bother if we're missing btree cursors, as we're already corrupt. 314 + */ 315 + STATIC void 316 + xfs_scrub_iallocbt_xref_rmap_btreeblks( 317 + struct xfs_scrub_context *sc, 318 + int which) 319 + { 320 + struct xfs_owner_info oinfo; 321 + xfs_filblks_t blocks; 322 + xfs_extlen_t inobt_blocks = 0; 323 + xfs_extlen_t finobt_blocks = 0; 324 + int error; 325 + 326 + if (!sc->sa.ino_cur || !sc->sa.rmap_cur || 327 + (xfs_sb_version_hasfinobt(&sc->mp->m_sb) && !sc->sa.fino_cur)) 328 + return; 329 + 330 + /* Check that we saw as many inobt blocks as the rmap says. */ 331 + error = xfs_btree_count_blocks(sc->sa.ino_cur, &inobt_blocks); 332 + if (!xfs_scrub_process_error(sc, 0, 0, &error)) 333 + return; 334 + 335 + if (sc->sa.fino_cur) { 336 + error = xfs_btree_count_blocks(sc->sa.fino_cur, &finobt_blocks); 337 + if (!xfs_scrub_process_error(sc, 0, 0, &error)) 338 + return; 339 + } 340 + 341 + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT); 342 + error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, &oinfo, 343 + &blocks); 344 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) 345 + return; 346 + if (blocks != inobt_blocks + finobt_blocks) 347 + xfs_scrub_btree_set_corrupt(sc, sc->sa.ino_cur, 0); 348 + } 349 + 350 + /* 351 + * Make sure that the inobt records point to the same number of blocks as 352 + * the rmap says are owned by inodes. 353 + */ 354 + STATIC void 355 + xfs_scrub_iallocbt_xref_rmap_inodes( 356 + struct xfs_scrub_context *sc, 357 + int which, 358 + xfs_filblks_t inode_blocks) 359 + { 360 + struct xfs_owner_info oinfo; 361 + xfs_filblks_t blocks; 362 + int error; 363 + 364 + if (!sc->sa.rmap_cur) 365 + return; 366 + 367 + /* Check that we saw as many inode blocks as the rmap knows about. */ 368 + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES); 369 + error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, &oinfo, 370 + &blocks); 371 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) 372 + return; 373 + if (blocks != inode_blocks) 374 + xfs_scrub_btree_set_corrupt(sc, sc->sa.ino_cur, 0); 375 + } 376 + 373 377 /* Scrub the inode btrees for some AG. */ 374 378 STATIC int 375 379 xfs_scrub_iallocbt( ··· 444 316 { 445 317 struct xfs_btree_cur *cur; 446 318 struct xfs_owner_info oinfo; 319 + xfs_filblks_t inode_blocks = 0; 320 + int error; 447 321 448 322 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT); 449 323 cur = which == XFS_BTNUM_INO ? sc->sa.ino_cur : sc->sa.fino_cur; 450 - return xfs_scrub_btree(sc, cur, xfs_scrub_iallocbt_rec, &oinfo, NULL); 324 + error = xfs_scrub_btree(sc, cur, xfs_scrub_iallocbt_rec, &oinfo, 325 + &inode_blocks); 326 + if (error) 327 + return error; 328 + 329 + xfs_scrub_iallocbt_xref_rmap_btreeblks(sc, which); 330 + 331 + /* 332 + * If we're scrubbing the inode btree, inode_blocks is the number of 333 + * blocks pointed to by all the inode chunk records. Therefore, we 334 + * should compare to the number of inode chunk blocks that the rmap 335 + * knows about. We can't do this for the finobt since it only points 336 + * to inode chunks with free inodes. 337 + */ 338 + if (which == XFS_BTNUM_INO) 339 + xfs_scrub_iallocbt_xref_rmap_inodes(sc, which, inode_blocks); 340 + 341 + return error; 451 342 } 452 343 453 344 int ··· 481 334 struct xfs_scrub_context *sc) 482 335 { 483 336 return xfs_scrub_iallocbt(sc, XFS_BTNUM_FINO); 337 + } 338 + 339 + /* See if an inode btree has (or doesn't have) an inode chunk record. */ 340 + static inline void 341 + xfs_scrub_xref_inode_check( 342 + struct xfs_scrub_context *sc, 343 + xfs_agblock_t agbno, 344 + xfs_extlen_t len, 345 + struct xfs_btree_cur **icur, 346 + bool should_have_inodes) 347 + { 348 + bool has_inodes; 349 + int error; 350 + 351 + if (!(*icur)) 352 + return; 353 + 354 + error = xfs_ialloc_has_inodes_at_extent(*icur, agbno, len, &has_inodes); 355 + if (!xfs_scrub_should_check_xref(sc, &error, icur)) 356 + return; 357 + if (has_inodes != should_have_inodes) 358 + xfs_scrub_btree_xref_set_corrupt(sc, *icur, 0); 359 + } 360 + 361 + /* xref check that the extent is not covered by inodes */ 362 + void 363 + xfs_scrub_xref_is_not_inode_chunk( 364 + struct xfs_scrub_context *sc, 365 + xfs_agblock_t agbno, 366 + xfs_extlen_t len) 367 + { 368 + xfs_scrub_xref_inode_check(sc, agbno, len, &sc->sa.ino_cur, false); 369 + xfs_scrub_xref_inode_check(sc, agbno, len, &sc->sa.fino_cur, false); 370 + } 371 + 372 + /* xref check that the extent is covered by inodes */ 373 + void 374 + xfs_scrub_xref_is_inode_chunk( 375 + struct xfs_scrub_context *sc, 376 + xfs_agblock_t agbno, 377 + xfs_extlen_t len) 378 + { 379 + xfs_scrub_xref_inode_check(sc, agbno, len, &sc->sa.ino_cur, true); 484 380 }
+162 -16
fs/xfs/scrub/inode.c
··· 36 36 #include "xfs_ialloc.h" 37 37 #include "xfs_da_format.h" 38 38 #include "xfs_reflink.h" 39 + #include "xfs_rmap.h" 40 + #include "xfs_bmap.h" 41 + #include "xfs_bmap_util.h" 39 42 #include "scrub/xfs_scrub.h" 40 43 #include "scrub/scrub.h" 41 44 #include "scrub/common.h" 45 + #include "scrub/btree.h" 42 46 #include "scrub/trace.h" 43 47 44 48 /* ··· 68 64 break; 69 65 case -EFSCORRUPTED: 70 66 case -EFSBADCRC: 71 - return 0; 67 + return xfs_scrub_trans_alloc(sc->sm, mp, &sc->tp); 72 68 default: 73 69 return error; 74 70 } ··· 396 392 break; 397 393 } 398 394 395 + /* di_[amc]time.nsec */ 396 + if (be32_to_cpu(dip->di_atime.t_nsec) >= NSEC_PER_SEC) 397 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 398 + if (be32_to_cpu(dip->di_mtime.t_nsec) >= NSEC_PER_SEC) 399 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 400 + if (be32_to_cpu(dip->di_ctime.t_nsec) >= NSEC_PER_SEC) 401 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 402 + 399 403 /* 400 404 * di_size. xfs_dinode_verify checks for things that screw up 401 405 * the VFS such as the upper bit being set and zero-length ··· 507 495 } 508 496 509 497 if (dip->di_version >= 3) { 498 + if (be32_to_cpu(dip->di_crtime.t_nsec) >= NSEC_PER_SEC) 499 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 510 500 xfs_scrub_inode_flags2(sc, bp, dip, ino, mode, flags, flags2); 511 501 xfs_scrub_inode_cowextsize(sc, bp, dip, ino, mode, flags, 512 502 flags2); ··· 560 546 */ 561 547 bp->b_ops = &xfs_inode_buf_ops; 562 548 dip = xfs_buf_offset(bp, imap.im_boffset); 563 - if (!xfs_dinode_verify(mp, ino, dip) || 549 + if (xfs_dinode_verify(mp, ino, dip) != NULL || 564 550 !xfs_dinode_good_version(mp, dip->di_version)) { 565 551 xfs_scrub_ino_set_corrupt(sc, ino, bp); 566 552 goto out_buf; ··· 581 567 return error; 582 568 } 583 569 570 + /* 571 + * Make sure the finobt doesn't think this inode is free. 572 + * We don't have to check the inobt ourselves because we got the inode via 573 + * IGET_UNTRUSTED, which checks the inobt for us. 574 + */ 575 + static void 576 + xfs_scrub_inode_xref_finobt( 577 + struct xfs_scrub_context *sc, 578 + xfs_ino_t ino) 579 + { 580 + struct xfs_inobt_rec_incore rec; 581 + xfs_agino_t agino; 582 + int has_record; 583 + int error; 584 + 585 + if (!sc->sa.fino_cur) 586 + return; 587 + 588 + agino = XFS_INO_TO_AGINO(sc->mp, ino); 589 + 590 + /* 591 + * Try to get the finobt record. If we can't get it, then we're 592 + * in good shape. 593 + */ 594 + error = xfs_inobt_lookup(sc->sa.fino_cur, agino, XFS_LOOKUP_LE, 595 + &has_record); 596 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.fino_cur) || 597 + !has_record) 598 + return; 599 + 600 + error = xfs_inobt_get_rec(sc->sa.fino_cur, &rec, &has_record); 601 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.fino_cur) || 602 + !has_record) 603 + return; 604 + 605 + /* 606 + * Otherwise, make sure this record either doesn't cover this inode, 607 + * or that it does but it's marked present. 608 + */ 609 + if (rec.ir_startino > agino || 610 + rec.ir_startino + XFS_INODES_PER_CHUNK <= agino) 611 + return; 612 + 613 + if (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)) 614 + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.fino_cur, 0); 615 + } 616 + 617 + /* Cross reference the inode fields with the forks. */ 618 + STATIC void 619 + xfs_scrub_inode_xref_bmap( 620 + struct xfs_scrub_context *sc, 621 + struct xfs_dinode *dip) 622 + { 623 + xfs_extnum_t nextents; 624 + xfs_filblks_t count; 625 + xfs_filblks_t acount; 626 + int error; 627 + 628 + /* Walk all the extents to check nextents/naextents/nblocks. */ 629 + error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK, 630 + &nextents, &count); 631 + if (!xfs_scrub_should_check_xref(sc, &error, NULL)) 632 + return; 633 + if (nextents < be32_to_cpu(dip->di_nextents)) 634 + xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino, NULL); 635 + 636 + error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK, 637 + &nextents, &acount); 638 + if (!xfs_scrub_should_check_xref(sc, &error, NULL)) 639 + return; 640 + if (nextents != be16_to_cpu(dip->di_anextents)) 641 + xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino, NULL); 642 + 643 + /* Check nblocks against the inode. */ 644 + if (count + acount != be64_to_cpu(dip->di_nblocks)) 645 + xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino, NULL); 646 + } 647 + 648 + /* Cross-reference with the other btrees. */ 649 + STATIC void 650 + xfs_scrub_inode_xref( 651 + struct xfs_scrub_context *sc, 652 + xfs_ino_t ino, 653 + struct xfs_dinode *dip) 654 + { 655 + struct xfs_owner_info oinfo; 656 + xfs_agnumber_t agno; 657 + xfs_agblock_t agbno; 658 + int error; 659 + 660 + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 661 + return; 662 + 663 + agno = XFS_INO_TO_AGNO(sc->mp, ino); 664 + agbno = XFS_INO_TO_AGBNO(sc->mp, ino); 665 + 666 + error = xfs_scrub_ag_init(sc, agno, &sc->sa); 667 + if (!xfs_scrub_xref_process_error(sc, agno, agbno, &error)) 668 + return; 669 + 670 + xfs_scrub_xref_is_used_space(sc, agbno, 1); 671 + xfs_scrub_inode_xref_finobt(sc, ino); 672 + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES); 673 + xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); 674 + xfs_scrub_xref_is_not_shared(sc, agbno, 1); 675 + xfs_scrub_inode_xref_bmap(sc, dip); 676 + 677 + xfs_scrub_ag_free(sc, &sc->sa); 678 + } 679 + 680 + /* 681 + * If the reflink iflag disagrees with a scan for shared data fork extents, 682 + * either flag an error (shared extents w/ no flag) or a preen (flag set w/o 683 + * any shared extents). We already checked for reflink iflag set on a non 684 + * reflink filesystem. 685 + */ 686 + static void 687 + xfs_scrub_inode_check_reflink_iflag( 688 + struct xfs_scrub_context *sc, 689 + xfs_ino_t ino, 690 + struct xfs_buf *bp) 691 + { 692 + struct xfs_mount *mp = sc->mp; 693 + bool has_shared; 694 + int error; 695 + 696 + if (!xfs_sb_version_hasreflink(&mp->m_sb)) 697 + return; 698 + 699 + error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip, 700 + &has_shared); 701 + if (!xfs_scrub_xref_process_error(sc, XFS_INO_TO_AGNO(mp, ino), 702 + XFS_INO_TO_AGBNO(mp, ino), &error)) 703 + return; 704 + if (xfs_is_reflink_inode(sc->ip) && !has_shared) 705 + xfs_scrub_ino_set_preen(sc, ino, bp); 706 + else if (!xfs_is_reflink_inode(sc->ip) && has_shared) 707 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 708 + } 709 + 584 710 /* Scrub an inode. */ 585 711 int 586 712 xfs_scrub_inode( 587 713 struct xfs_scrub_context *sc) 588 714 { 589 715 struct xfs_dinode di; 590 - struct xfs_mount *mp = sc->mp; 591 716 struct xfs_buf *bp = NULL; 592 717 struct xfs_dinode *dip; 593 718 xfs_ino_t ino; 594 - 595 - bool has_shared; 596 719 int error = 0; 597 720 598 721 /* Did we get the in-core inode, or are we doing this manually? */ ··· 754 603 goto out; 755 604 756 605 /* 757 - * Does this inode have the reflink flag set but no shared extents? 758 - * Set the preening flag if this is the case. 606 + * Look for discrepancies between file's data blocks and the reflink 607 + * iflag. We already checked the iflag against the file mode when 608 + * we scrubbed the dinode. 759 609 */ 760 - if (xfs_is_reflink_inode(sc->ip)) { 761 - error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip, 762 - &has_shared); 763 - if (!xfs_scrub_process_error(sc, XFS_INO_TO_AGNO(mp, ino), 764 - XFS_INO_TO_AGBNO(mp, ino), &error)) 765 - goto out; 766 - if (!has_shared) 767 - xfs_scrub_ino_set_preen(sc, ino, bp); 768 - } 610 + if (S_ISREG(VFS_I(sc->ip)->i_mode)) 611 + xfs_scrub_inode_check_reflink_iflag(sc, ino, bp); 769 612 613 + xfs_scrub_inode_xref(sc, ino, dip); 770 614 out: 771 615 if (bp) 772 616 xfs_trans_brelse(sc->tp, bp);
+4 -4
fs/xfs/scrub/parent.c
··· 169 169 * immediate inactive cleanup of the inode. 170 170 */ 171 171 error = xfs_iget(mp, sc->tp, dnum, 0, 0, &dp); 172 - if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) 172 + if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) 173 173 goto out; 174 - if (dp == sc->ip) { 174 + if (dp == sc->ip || !S_ISDIR(VFS_I(dp)->i_mode)) { 175 175 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); 176 176 goto out_rele; 177 177 } ··· 185 185 */ 186 186 if (xfs_ilock_nowait(dp, XFS_IOLOCK_SHARED)) { 187 187 error = xfs_scrub_parent_count_parent_dentries(sc, dp, &nlink); 188 - if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, 188 + if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, 189 189 &error)) 190 190 goto out_unlock; 191 191 if (nlink != expected_nlink) ··· 205 205 206 206 /* Go looking for our dentry. */ 207 207 error = xfs_scrub_parent_count_parent_dentries(sc, dp, &nlink); 208 - if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) 208 + if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) 209 209 goto out_unlock; 210 210 211 211 /* Drop the parent lock, relock this inode. */
-7
fs/xfs/scrub/quota.c
··· 67 67 { 68 68 uint dqtype; 69 69 70 - /* 71 - * If userspace gave us an AG number or inode data, they don't 72 - * know what they're doing. Get out. 73 - */ 74 - if (sc->sm->sm_agno || sc->sm->sm_ino || sc->sm->sm_gen) 75 - return -EINVAL; 76 - 77 70 dqtype = xfs_scrub_quota_to_dqtype(sc); 78 71 if (dqtype == 0) 79 72 return -EINVAL;
+418 -2
fs/xfs/scrub/refcount.c
··· 31 31 #include "xfs_sb.h" 32 32 #include "xfs_alloc.h" 33 33 #include "xfs_rmap.h" 34 + #include "xfs_refcount.h" 34 35 #include "scrub/xfs_scrub.h" 35 36 #include "scrub/scrub.h" 36 37 #include "scrub/common.h" ··· 51 50 52 51 /* Reference count btree scrubber. */ 53 52 53 + /* 54 + * Confirming Reference Counts via Reverse Mappings 55 + * 56 + * We want to count the reverse mappings overlapping a refcount record 57 + * (bno, len, refcount), allowing for the possibility that some of the 58 + * overlap may come from smaller adjoining reverse mappings, while some 59 + * comes from single extents which overlap the range entirely. The 60 + * outer loop is as follows: 61 + * 62 + * 1. For all reverse mappings overlapping the refcount extent, 63 + * a. If a given rmap completely overlaps, mark it as seen. 64 + * b. Otherwise, record the fragment (in agbno order) for later 65 + * processing. 66 + * 67 + * Once we've seen all the rmaps, we know that for all blocks in the 68 + * refcount record we want to find $refcount owners and we've already 69 + * visited $seen extents that overlap all the blocks. Therefore, we 70 + * need to find ($refcount - $seen) owners for every block in the 71 + * extent; call that quantity $target_nr. Proceed as follows: 72 + * 73 + * 2. Pull the first $target_nr fragments from the list; all of them 74 + * should start at or before the start of the extent. 75 + * Call this subset of fragments the working set. 76 + * 3. Until there are no more unprocessed fragments, 77 + * a. Find the shortest fragments in the set and remove them. 78 + * b. Note the block number of the end of these fragments. 79 + * c. Pull the same number of fragments from the list. All of these 80 + * fragments should start at the block number recorded in the 81 + * previous step. 82 + * d. Put those fragments in the set. 83 + * 4. Check that there are $target_nr fragments remaining in the list, 84 + * and that they all end at or beyond the end of the refcount extent. 85 + * 86 + * If the refcount is correct, all the check conditions in the algorithm 87 + * should always hold true. If not, the refcount is incorrect. 88 + */ 89 + struct xfs_scrub_refcnt_frag { 90 + struct list_head list; 91 + struct xfs_rmap_irec rm; 92 + }; 93 + 94 + struct xfs_scrub_refcnt_check { 95 + struct xfs_scrub_context *sc; 96 + struct list_head fragments; 97 + 98 + /* refcount extent we're examining */ 99 + xfs_agblock_t bno; 100 + xfs_extlen_t len; 101 + xfs_nlink_t refcount; 102 + 103 + /* number of owners seen */ 104 + xfs_nlink_t seen; 105 + }; 106 + 107 + /* 108 + * Decide if the given rmap is large enough that we can redeem it 109 + * towards refcount verification now, or if it's a fragment, in 110 + * which case we'll hang onto it in the hopes that we'll later 111 + * discover that we've collected exactly the correct number of 112 + * fragments as the refcountbt says we should have. 113 + */ 114 + STATIC int 115 + xfs_scrub_refcountbt_rmap_check( 116 + struct xfs_btree_cur *cur, 117 + struct xfs_rmap_irec *rec, 118 + void *priv) 119 + { 120 + struct xfs_scrub_refcnt_check *refchk = priv; 121 + struct xfs_scrub_refcnt_frag *frag; 122 + xfs_agblock_t rm_last; 123 + xfs_agblock_t rc_last; 124 + int error = 0; 125 + 126 + if (xfs_scrub_should_terminate(refchk->sc, &error)) 127 + return error; 128 + 129 + rm_last = rec->rm_startblock + rec->rm_blockcount - 1; 130 + rc_last = refchk->bno + refchk->len - 1; 131 + 132 + /* Confirm that a single-owner refc extent is a CoW stage. */ 133 + if (refchk->refcount == 1 && rec->rm_owner != XFS_RMAP_OWN_COW) { 134 + xfs_scrub_btree_xref_set_corrupt(refchk->sc, cur, 0); 135 + return 0; 136 + } 137 + 138 + if (rec->rm_startblock <= refchk->bno && rm_last >= rc_last) { 139 + /* 140 + * The rmap overlaps the refcount record, so we can confirm 141 + * one refcount owner seen. 142 + */ 143 + refchk->seen++; 144 + } else { 145 + /* 146 + * This rmap covers only part of the refcount record, so 147 + * save the fragment for later processing. If the rmapbt 148 + * is healthy each rmap_irec we see will be in agbno order 149 + * so we don't need insertion sort here. 150 + */ 151 + frag = kmem_alloc(sizeof(struct xfs_scrub_refcnt_frag), 152 + KM_MAYFAIL | KM_NOFS); 153 + if (!frag) 154 + return -ENOMEM; 155 + memcpy(&frag->rm, rec, sizeof(frag->rm)); 156 + list_add_tail(&frag->list, &refchk->fragments); 157 + } 158 + 159 + return 0; 160 + } 161 + 162 + /* 163 + * Given a bunch of rmap fragments, iterate through them, keeping 164 + * a running tally of the refcount. If this ever deviates from 165 + * what we expect (which is the refcountbt's refcount minus the 166 + * number of extents that totally covered the refcountbt extent), 167 + * we have a refcountbt error. 168 + */ 169 + STATIC void 170 + xfs_scrub_refcountbt_process_rmap_fragments( 171 + struct xfs_scrub_refcnt_check *refchk) 172 + { 173 + struct list_head worklist; 174 + struct xfs_scrub_refcnt_frag *frag; 175 + struct xfs_scrub_refcnt_frag *n; 176 + xfs_agblock_t bno; 177 + xfs_agblock_t rbno; 178 + xfs_agblock_t next_rbno; 179 + xfs_nlink_t nr; 180 + xfs_nlink_t target_nr; 181 + 182 + target_nr = refchk->refcount - refchk->seen; 183 + if (target_nr == 0) 184 + return; 185 + 186 + /* 187 + * There are (refchk->rc.rc_refcount - refchk->nr refcount) 188 + * references we haven't found yet. Pull that many off the 189 + * fragment list and figure out where the smallest rmap ends 190 + * (and therefore the next rmap should start). All the rmaps 191 + * we pull off should start at or before the beginning of the 192 + * refcount record's range. 193 + */ 194 + INIT_LIST_HEAD(&worklist); 195 + rbno = NULLAGBLOCK; 196 + nr = 1; 197 + 198 + /* Make sure the fragments actually /are/ in agbno order. */ 199 + bno = 0; 200 + list_for_each_entry(frag, &refchk->fragments, list) { 201 + if (frag->rm.rm_startblock < bno) 202 + goto done; 203 + bno = frag->rm.rm_startblock; 204 + } 205 + 206 + /* 207 + * Find all the rmaps that start at or before the refc extent, 208 + * and put them on the worklist. 209 + */ 210 + list_for_each_entry_safe(frag, n, &refchk->fragments, list) { 211 + if (frag->rm.rm_startblock > refchk->bno) 212 + goto done; 213 + bno = frag->rm.rm_startblock + frag->rm.rm_blockcount; 214 + if (bno < rbno) 215 + rbno = bno; 216 + list_move_tail(&frag->list, &worklist); 217 + if (nr == target_nr) 218 + break; 219 + nr++; 220 + } 221 + 222 + /* 223 + * We should have found exactly $target_nr rmap fragments starting 224 + * at or before the refcount extent. 225 + */ 226 + if (nr != target_nr) 227 + goto done; 228 + 229 + while (!list_empty(&refchk->fragments)) { 230 + /* Discard any fragments ending at rbno from the worklist. */ 231 + nr = 0; 232 + next_rbno = NULLAGBLOCK; 233 + list_for_each_entry_safe(frag, n, &worklist, list) { 234 + bno = frag->rm.rm_startblock + frag->rm.rm_blockcount; 235 + if (bno != rbno) { 236 + if (bno < next_rbno) 237 + next_rbno = bno; 238 + continue; 239 + } 240 + list_del(&frag->list); 241 + kmem_free(frag); 242 + nr++; 243 + } 244 + 245 + /* Try to add nr rmaps starting at rbno to the worklist. */ 246 + list_for_each_entry_safe(frag, n, &refchk->fragments, list) { 247 + bno = frag->rm.rm_startblock + frag->rm.rm_blockcount; 248 + if (frag->rm.rm_startblock != rbno) 249 + goto done; 250 + list_move_tail(&frag->list, &worklist); 251 + if (next_rbno > bno) 252 + next_rbno = bno; 253 + nr--; 254 + if (nr == 0) 255 + break; 256 + } 257 + 258 + /* 259 + * If we get here and nr > 0, this means that we added fewer 260 + * items to the worklist than we discarded because the fragment 261 + * list ran out of items. Therefore, we cannot maintain the 262 + * required refcount. Something is wrong, so we're done. 263 + */ 264 + if (nr) 265 + goto done; 266 + 267 + rbno = next_rbno; 268 + } 269 + 270 + /* 271 + * Make sure the last extent we processed ends at or beyond 272 + * the end of the refcount extent. 273 + */ 274 + if (rbno < refchk->bno + refchk->len) 275 + goto done; 276 + 277 + /* Actually record us having seen the remaining refcount. */ 278 + refchk->seen = refchk->refcount; 279 + done: 280 + /* Delete fragments and work list. */ 281 + list_for_each_entry_safe(frag, n, &worklist, list) { 282 + list_del(&frag->list); 283 + kmem_free(frag); 284 + } 285 + list_for_each_entry_safe(frag, n, &refchk->fragments, list) { 286 + list_del(&frag->list); 287 + kmem_free(frag); 288 + } 289 + } 290 + 291 + /* Use the rmap entries covering this extent to verify the refcount. */ 292 + STATIC void 293 + xfs_scrub_refcountbt_xref_rmap( 294 + struct xfs_scrub_context *sc, 295 + xfs_agblock_t bno, 296 + xfs_extlen_t len, 297 + xfs_nlink_t refcount) 298 + { 299 + struct xfs_scrub_refcnt_check refchk = { 300 + .sc = sc, 301 + .bno = bno, 302 + .len = len, 303 + .refcount = refcount, 304 + .seen = 0, 305 + }; 306 + struct xfs_rmap_irec low; 307 + struct xfs_rmap_irec high; 308 + struct xfs_scrub_refcnt_frag *frag; 309 + struct xfs_scrub_refcnt_frag *n; 310 + int error; 311 + 312 + if (!sc->sa.rmap_cur) 313 + return; 314 + 315 + /* Cross-reference with the rmapbt to confirm the refcount. */ 316 + memset(&low, 0, sizeof(low)); 317 + low.rm_startblock = bno; 318 + memset(&high, 0xFF, sizeof(high)); 319 + high.rm_startblock = bno + len - 1; 320 + 321 + INIT_LIST_HEAD(&refchk.fragments); 322 + error = xfs_rmap_query_range(sc->sa.rmap_cur, &low, &high, 323 + &xfs_scrub_refcountbt_rmap_check, &refchk); 324 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) 325 + goto out_free; 326 + 327 + xfs_scrub_refcountbt_process_rmap_fragments(&refchk); 328 + if (refcount != refchk.seen) 329 + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); 330 + 331 + out_free: 332 + list_for_each_entry_safe(frag, n, &refchk.fragments, list) { 333 + list_del(&frag->list); 334 + kmem_free(frag); 335 + } 336 + } 337 + 338 + /* Cross-reference with the other btrees. */ 339 + STATIC void 340 + xfs_scrub_refcountbt_xref( 341 + struct xfs_scrub_context *sc, 342 + xfs_agblock_t agbno, 343 + xfs_extlen_t len, 344 + xfs_nlink_t refcount) 345 + { 346 + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 347 + return; 348 + 349 + xfs_scrub_xref_is_used_space(sc, agbno, len); 350 + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len); 351 + xfs_scrub_refcountbt_xref_rmap(sc, agbno, len, refcount); 352 + } 353 + 54 354 /* Scrub a refcountbt record. */ 55 355 STATIC int 56 356 xfs_scrub_refcountbt_rec( ··· 359 57 union xfs_btree_rec *rec) 360 58 { 361 59 struct xfs_mount *mp = bs->cur->bc_mp; 60 + xfs_agblock_t *cow_blocks = bs->private; 362 61 xfs_agnumber_t agno = bs->cur->bc_private.a.agno; 363 62 xfs_agblock_t bno; 364 63 xfs_extlen_t len; ··· 375 72 has_cowflag = (bno & XFS_REFC_COW_START); 376 73 if ((refcount == 1 && !has_cowflag) || (refcount != 1 && has_cowflag)) 377 74 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 75 + if (has_cowflag) 76 + (*cow_blocks) += len; 378 77 379 78 /* Check the extent. */ 380 79 bno &= ~XFS_REFC_COW_START; ··· 388 83 if (refcount == 0) 389 84 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 390 85 86 + xfs_scrub_refcountbt_xref(bs->sc, bno, len, refcount); 87 + 391 88 return error; 89 + } 90 + 91 + /* Make sure we have as many refc blocks as the rmap says. */ 92 + STATIC void 93 + xfs_scrub_refcount_xref_rmap( 94 + struct xfs_scrub_context *sc, 95 + struct xfs_owner_info *oinfo, 96 + xfs_filblks_t cow_blocks) 97 + { 98 + xfs_extlen_t refcbt_blocks = 0; 99 + xfs_filblks_t blocks; 100 + int error; 101 + 102 + if (!sc->sa.rmap_cur) 103 + return; 104 + 105 + /* Check that we saw as many refcbt blocks as the rmap knows about. */ 106 + error = xfs_btree_count_blocks(sc->sa.refc_cur, &refcbt_blocks); 107 + if (!xfs_scrub_btree_process_error(sc, sc->sa.refc_cur, 0, &error)) 108 + return; 109 + error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo, 110 + &blocks); 111 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) 112 + return; 113 + if (blocks != refcbt_blocks) 114 + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); 115 + 116 + /* Check that we saw as many cow blocks as the rmap knows about. */ 117 + xfs_rmap_ag_owner(oinfo, XFS_RMAP_OWN_COW); 118 + error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo, 119 + &blocks); 120 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) 121 + return; 122 + if (blocks != cow_blocks) 123 + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); 392 124 } 393 125 394 126 /* Scrub the refcount btree for some AG. */ ··· 434 92 struct xfs_scrub_context *sc) 435 93 { 436 94 struct xfs_owner_info oinfo; 95 + xfs_agblock_t cow_blocks = 0; 96 + int error; 437 97 438 98 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_REFC); 439 - return xfs_scrub_btree(sc, sc->sa.refc_cur, xfs_scrub_refcountbt_rec, 440 - &oinfo, NULL); 99 + error = xfs_scrub_btree(sc, sc->sa.refc_cur, xfs_scrub_refcountbt_rec, 100 + &oinfo, &cow_blocks); 101 + if (error) 102 + return error; 103 + 104 + xfs_scrub_refcount_xref_rmap(sc, &oinfo, cow_blocks); 105 + 106 + return 0; 107 + } 108 + 109 + /* xref check that a cow staging extent is marked in the refcountbt. */ 110 + void 111 + xfs_scrub_xref_is_cow_staging( 112 + struct xfs_scrub_context *sc, 113 + xfs_agblock_t agbno, 114 + xfs_extlen_t len) 115 + { 116 + struct xfs_refcount_irec rc; 117 + bool has_cowflag; 118 + int has_refcount; 119 + int error; 120 + 121 + if (!sc->sa.refc_cur) 122 + return; 123 + 124 + /* Find the CoW staging extent. */ 125 + error = xfs_refcount_lookup_le(sc->sa.refc_cur, 126 + agbno + XFS_REFC_COW_START, &has_refcount); 127 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur)) 128 + return; 129 + if (!has_refcount) { 130 + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); 131 + return; 132 + } 133 + 134 + error = xfs_refcount_get_rec(sc->sa.refc_cur, &rc, &has_refcount); 135 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur)) 136 + return; 137 + if (!has_refcount) { 138 + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); 139 + return; 140 + } 141 + 142 + /* CoW flag must be set, refcount must be 1. */ 143 + has_cowflag = (rc.rc_startblock & XFS_REFC_COW_START); 144 + if (!has_cowflag || rc.rc_refcount != 1) 145 + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); 146 + 147 + /* Must be at least as long as what was passed in */ 148 + if (rc.rc_blockcount < len) 149 + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); 150 + } 151 + 152 + /* 153 + * xref check that the extent is not shared. Only file data blocks 154 + * can have multiple owners. 155 + */ 156 + void 157 + xfs_scrub_xref_is_not_shared( 158 + struct xfs_scrub_context *sc, 159 + xfs_agblock_t agbno, 160 + xfs_extlen_t len) 161 + { 162 + bool shared; 163 + int error; 164 + 165 + if (!sc->sa.refc_cur) 166 + return; 167 + 168 + error = xfs_refcount_has_record(sc->sa.refc_cur, agbno, len, &shared); 169 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur)) 170 + return; 171 + if (shared) 172 + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); 441 173 }
+123
fs/xfs/scrub/rmap.c
··· 32 32 #include "xfs_alloc.h" 33 33 #include "xfs_ialloc.h" 34 34 #include "xfs_rmap.h" 35 + #include "xfs_refcount.h" 35 36 #include "scrub/xfs_scrub.h" 36 37 #include "scrub/scrub.h" 37 38 #include "scrub/common.h" ··· 51 50 } 52 51 53 52 /* Reverse-mapping scrubber. */ 53 + 54 + /* Cross-reference a rmap against the refcount btree. */ 55 + STATIC void 56 + xfs_scrub_rmapbt_xref_refc( 57 + struct xfs_scrub_context *sc, 58 + struct xfs_rmap_irec *irec) 59 + { 60 + xfs_agblock_t fbno; 61 + xfs_extlen_t flen; 62 + bool non_inode; 63 + bool is_bmbt; 64 + bool is_attr; 65 + bool is_unwritten; 66 + int error; 67 + 68 + if (!sc->sa.refc_cur) 69 + return; 70 + 71 + non_inode = XFS_RMAP_NON_INODE_OWNER(irec->rm_owner); 72 + is_bmbt = irec->rm_flags & XFS_RMAP_BMBT_BLOCK; 73 + is_attr = irec->rm_flags & XFS_RMAP_ATTR_FORK; 74 + is_unwritten = irec->rm_flags & XFS_RMAP_UNWRITTEN; 75 + 76 + /* If this is shared, must be a data fork extent. */ 77 + error = xfs_refcount_find_shared(sc->sa.refc_cur, irec->rm_startblock, 78 + irec->rm_blockcount, &fbno, &flen, false); 79 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur)) 80 + return; 81 + if (flen != 0 && (non_inode || is_attr || is_bmbt || is_unwritten)) 82 + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); 83 + } 84 + 85 + /* Cross-reference with the other btrees. */ 86 + STATIC void 87 + xfs_scrub_rmapbt_xref( 88 + struct xfs_scrub_context *sc, 89 + struct xfs_rmap_irec *irec) 90 + { 91 + xfs_agblock_t agbno = irec->rm_startblock; 92 + xfs_extlen_t len = irec->rm_blockcount; 93 + 94 + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 95 + return; 96 + 97 + xfs_scrub_xref_is_used_space(sc, agbno, len); 98 + if (irec->rm_owner == XFS_RMAP_OWN_INODES) 99 + xfs_scrub_xref_is_inode_chunk(sc, agbno, len); 100 + else 101 + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len); 102 + if (irec->rm_owner == XFS_RMAP_OWN_COW) 103 + xfs_scrub_xref_is_cow_staging(sc, irec->rm_startblock, 104 + irec->rm_blockcount); 105 + else 106 + xfs_scrub_rmapbt_xref_refc(sc, irec); 107 + } 54 108 55 109 /* Scrub an rmapbt record. */ 56 110 STATIC int ··· 177 121 irec.rm_owner > XFS_RMAP_OWN_FS) 178 122 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 179 123 } 124 + 125 + xfs_scrub_rmapbt_xref(bs->sc, &irec); 180 126 out: 181 127 return error; 182 128 } ··· 193 135 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG); 194 136 return xfs_scrub_btree(sc, sc->sa.rmap_cur, xfs_scrub_rmapbt_rec, 195 137 &oinfo, NULL); 138 + } 139 + 140 + /* xref check that the extent is owned by a given owner */ 141 + static inline void 142 + xfs_scrub_xref_check_owner( 143 + struct xfs_scrub_context *sc, 144 + xfs_agblock_t bno, 145 + xfs_extlen_t len, 146 + struct xfs_owner_info *oinfo, 147 + bool should_have_rmap) 148 + { 149 + bool has_rmap; 150 + int error; 151 + 152 + if (!sc->sa.rmap_cur) 153 + return; 154 + 155 + error = xfs_rmap_record_exists(sc->sa.rmap_cur, bno, len, oinfo, 156 + &has_rmap); 157 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) 158 + return; 159 + if (has_rmap != should_have_rmap) 160 + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); 161 + } 162 + 163 + /* xref check that the extent is owned by a given owner */ 164 + void 165 + xfs_scrub_xref_is_owned_by( 166 + struct xfs_scrub_context *sc, 167 + xfs_agblock_t bno, 168 + xfs_extlen_t len, 169 + struct xfs_owner_info *oinfo) 170 + { 171 + xfs_scrub_xref_check_owner(sc, bno, len, oinfo, true); 172 + } 173 + 174 + /* xref check that the extent is not owned by a given owner */ 175 + void 176 + xfs_scrub_xref_is_not_owned_by( 177 + struct xfs_scrub_context *sc, 178 + xfs_agblock_t bno, 179 + xfs_extlen_t len, 180 + struct xfs_owner_info *oinfo) 181 + { 182 + xfs_scrub_xref_check_owner(sc, bno, len, oinfo, false); 183 + } 184 + 185 + /* xref check that the extent has no reverse mapping at all */ 186 + void 187 + xfs_scrub_xref_has_no_owner( 188 + struct xfs_scrub_context *sc, 189 + xfs_agblock_t bno, 190 + xfs_extlen_t len) 191 + { 192 + bool has_rmap; 193 + int error; 194 + 195 + if (!sc->sa.rmap_cur) 196 + return; 197 + 198 + error = xfs_rmap_has_record(sc->sa.rmap_cur, bno, len, &has_rmap); 199 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) 200 + return; 201 + if (has_rmap) 202 + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); 196 203 }
+25 -10
fs/xfs/scrub/rtbitmap.c
··· 43 43 struct xfs_scrub_context *sc, 44 44 struct xfs_inode *ip) 45 45 { 46 - struct xfs_mount *mp = sc->mp; 47 - int error = 0; 48 - 49 - /* 50 - * If userspace gave us an AG number or inode data, they don't 51 - * know what they're doing. Get out. 52 - */ 53 - if (sc->sm->sm_agno || sc->sm->sm_ino || sc->sm->sm_gen) 54 - return -EINVAL; 46 + int error; 55 47 56 48 error = xfs_scrub_setup_fs(sc, ip); 57 49 if (error) 58 50 return error; 59 51 60 52 sc->ilock_flags = XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP; 61 - sc->ip = mp->m_rbmip; 53 + sc->ip = sc->mp->m_rbmip; 62 54 xfs_ilock(sc->ip, sc->ilock_flags); 63 55 64 56 return 0; ··· 97 105 { 98 106 /* XXX: implement this some day */ 99 107 return -ENOENT; 108 + } 109 + 110 + 111 + /* xref check that the extent is not free in the rtbitmap */ 112 + void 113 + xfs_scrub_xref_is_used_rt_space( 114 + struct xfs_scrub_context *sc, 115 + xfs_rtblock_t fsbno, 116 + xfs_extlen_t len) 117 + { 118 + bool is_free; 119 + int error; 120 + 121 + xfs_ilock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); 122 + error = xfs_rtalloc_extent_is_free(sc->mp, sc->tp, fsbno, len, 123 + &is_free); 124 + if (!xfs_scrub_should_check_xref(sc, &error, NULL)) 125 + goto out_unlock; 126 + if (is_free) 127 + xfs_scrub_ino_xref_set_corrupt(sc, sc->mp->m_rbmip->i_ino, 128 + NULL); 129 + out_unlock: 130 + xfs_iunlock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); 100 131 }
+147 -76
fs/xfs/scrub/scrub.c
··· 110 110 * structure itself is corrupt, the CORRUPT flag will be set. If 111 111 * the metadata is correct but otherwise suboptimal, the PREEN flag 112 112 * will be set. 113 + * 114 + * We perform secondary validation of filesystem metadata by 115 + * cross-referencing every record with all other available metadata. 116 + * For example, for block mapping extents, we verify that there are no 117 + * records in the free space and inode btrees corresponding to that 118 + * space extent and that there is a corresponding entry in the reverse 119 + * mapping btree. Inconsistent metadata is noted by setting the 120 + * XCORRUPT flag; btree query function errors are noted by setting the 121 + * XFAIL flag and deleting the cursor to prevent further attempts to 122 + * cross-reference with a defective btree. 113 123 */ 114 124 115 125 /* ··· 138 128 { 139 129 int error = 0; 140 130 141 - if (sc->sm->sm_ino || sc->sm->sm_agno) 142 - return -EINVAL; 143 131 if (xfs_scrub_should_terminate(sc, &error)) 144 132 return error; 145 133 ··· 159 151 sc->tp = NULL; 160 152 } 161 153 if (sc->ip) { 162 - xfs_iunlock(sc->ip, sc->ilock_flags); 154 + if (sc->ilock_flags) 155 + xfs_iunlock(sc->ip, sc->ilock_flags); 163 156 if (sc->ip != ip_in && 164 157 !xfs_internal_inum(sc->mp, sc->ip->i_ino)) 165 158 iput(VFS_I(sc->ip)); ··· 176 167 /* Scrubbing dispatch. */ 177 168 178 169 static const struct xfs_scrub_meta_ops meta_scrub_ops[] = { 179 - { /* ioctl presence test */ 170 + [XFS_SCRUB_TYPE_PROBE] = { /* ioctl presence test */ 171 + .type = ST_NONE, 180 172 .setup = xfs_scrub_setup_fs, 181 173 .scrub = xfs_scrub_probe, 182 174 }, 183 - { /* superblock */ 184 - .setup = xfs_scrub_setup_ag_header, 175 + [XFS_SCRUB_TYPE_SB] = { /* superblock */ 176 + .type = ST_PERAG, 177 + .setup = xfs_scrub_setup_fs, 185 178 .scrub = xfs_scrub_superblock, 186 179 }, 187 - { /* agf */ 188 - .setup = xfs_scrub_setup_ag_header, 180 + [XFS_SCRUB_TYPE_AGF] = { /* agf */ 181 + .type = ST_PERAG, 182 + .setup = xfs_scrub_setup_fs, 189 183 .scrub = xfs_scrub_agf, 190 184 }, 191 - { /* agfl */ 192 - .setup = xfs_scrub_setup_ag_header, 185 + [XFS_SCRUB_TYPE_AGFL]= { /* agfl */ 186 + .type = ST_PERAG, 187 + .setup = xfs_scrub_setup_fs, 193 188 .scrub = xfs_scrub_agfl, 194 189 }, 195 - { /* agi */ 196 - .setup = xfs_scrub_setup_ag_header, 190 + [XFS_SCRUB_TYPE_AGI] = { /* agi */ 191 + .type = ST_PERAG, 192 + .setup = xfs_scrub_setup_fs, 197 193 .scrub = xfs_scrub_agi, 198 194 }, 199 - { /* bnobt */ 195 + [XFS_SCRUB_TYPE_BNOBT] = { /* bnobt */ 196 + .type = ST_PERAG, 200 197 .setup = xfs_scrub_setup_ag_allocbt, 201 198 .scrub = xfs_scrub_bnobt, 202 199 }, 203 - { /* cntbt */ 200 + [XFS_SCRUB_TYPE_CNTBT] = { /* cntbt */ 201 + .type = ST_PERAG, 204 202 .setup = xfs_scrub_setup_ag_allocbt, 205 203 .scrub = xfs_scrub_cntbt, 206 204 }, 207 - { /* inobt */ 205 + [XFS_SCRUB_TYPE_INOBT] = { /* inobt */ 206 + .type = ST_PERAG, 208 207 .setup = xfs_scrub_setup_ag_iallocbt, 209 208 .scrub = xfs_scrub_inobt, 210 209 }, 211 - { /* finobt */ 210 + [XFS_SCRUB_TYPE_FINOBT] = { /* finobt */ 211 + .type = ST_PERAG, 212 212 .setup = xfs_scrub_setup_ag_iallocbt, 213 213 .scrub = xfs_scrub_finobt, 214 214 .has = xfs_sb_version_hasfinobt, 215 215 }, 216 - { /* rmapbt */ 216 + [XFS_SCRUB_TYPE_RMAPBT] = { /* rmapbt */ 217 + .type = ST_PERAG, 217 218 .setup = xfs_scrub_setup_ag_rmapbt, 218 219 .scrub = xfs_scrub_rmapbt, 219 220 .has = xfs_sb_version_hasrmapbt, 220 221 }, 221 - { /* refcountbt */ 222 + [XFS_SCRUB_TYPE_REFCNTBT] = { /* refcountbt */ 223 + .type = ST_PERAG, 222 224 .setup = xfs_scrub_setup_ag_refcountbt, 223 225 .scrub = xfs_scrub_refcountbt, 224 226 .has = xfs_sb_version_hasreflink, 225 227 }, 226 - { /* inode record */ 228 + [XFS_SCRUB_TYPE_INODE] = { /* inode record */ 229 + .type = ST_INODE, 227 230 .setup = xfs_scrub_setup_inode, 228 231 .scrub = xfs_scrub_inode, 229 232 }, 230 - { /* inode data fork */ 233 + [XFS_SCRUB_TYPE_BMBTD] = { /* inode data fork */ 234 + .type = ST_INODE, 231 235 .setup = xfs_scrub_setup_inode_bmap, 232 236 .scrub = xfs_scrub_bmap_data, 233 237 }, 234 - { /* inode attr fork */ 238 + [XFS_SCRUB_TYPE_BMBTA] = { /* inode attr fork */ 239 + .type = ST_INODE, 235 240 .setup = xfs_scrub_setup_inode_bmap, 236 241 .scrub = xfs_scrub_bmap_attr, 237 242 }, 238 - { /* inode CoW fork */ 243 + [XFS_SCRUB_TYPE_BMBTC] = { /* inode CoW fork */ 244 + .type = ST_INODE, 239 245 .setup = xfs_scrub_setup_inode_bmap, 240 246 .scrub = xfs_scrub_bmap_cow, 241 247 }, 242 - { /* directory */ 248 + [XFS_SCRUB_TYPE_DIR] = { /* directory */ 249 + .type = ST_INODE, 243 250 .setup = xfs_scrub_setup_directory, 244 251 .scrub = xfs_scrub_directory, 245 252 }, 246 - { /* extended attributes */ 253 + [XFS_SCRUB_TYPE_XATTR] = { /* extended attributes */ 254 + .type = ST_INODE, 247 255 .setup = xfs_scrub_setup_xattr, 248 256 .scrub = xfs_scrub_xattr, 249 257 }, 250 - { /* symbolic link */ 258 + [XFS_SCRUB_TYPE_SYMLINK] = { /* symbolic link */ 259 + .type = ST_INODE, 251 260 .setup = xfs_scrub_setup_symlink, 252 261 .scrub = xfs_scrub_symlink, 253 262 }, 254 - { /* parent pointers */ 263 + [XFS_SCRUB_TYPE_PARENT] = { /* parent pointers */ 264 + .type = ST_INODE, 255 265 .setup = xfs_scrub_setup_parent, 256 266 .scrub = xfs_scrub_parent, 257 267 }, 258 - { /* realtime bitmap */ 268 + [XFS_SCRUB_TYPE_RTBITMAP] = { /* realtime bitmap */ 269 + .type = ST_FS, 259 270 .setup = xfs_scrub_setup_rt, 260 271 .scrub = xfs_scrub_rtbitmap, 261 272 .has = xfs_sb_version_hasrealtime, 262 273 }, 263 - { /* realtime summary */ 274 + [XFS_SCRUB_TYPE_RTSUM] = { /* realtime summary */ 275 + .type = ST_FS, 264 276 .setup = xfs_scrub_setup_rt, 265 277 .scrub = xfs_scrub_rtsummary, 266 278 .has = xfs_sb_version_hasrealtime, 267 279 }, 268 - { /* user quota */ 269 - .setup = xfs_scrub_setup_quota, 270 - .scrub = xfs_scrub_quota, 280 + [XFS_SCRUB_TYPE_UQUOTA] = { /* user quota */ 281 + .type = ST_FS, 282 + .setup = xfs_scrub_setup_quota, 283 + .scrub = xfs_scrub_quota, 271 284 }, 272 - { /* group quota */ 273 - .setup = xfs_scrub_setup_quota, 274 - .scrub = xfs_scrub_quota, 285 + [XFS_SCRUB_TYPE_GQUOTA] = { /* group quota */ 286 + .type = ST_FS, 287 + .setup = xfs_scrub_setup_quota, 288 + .scrub = xfs_scrub_quota, 275 289 }, 276 - { /* project quota */ 277 - .setup = xfs_scrub_setup_quota, 278 - .scrub = xfs_scrub_quota, 290 + [XFS_SCRUB_TYPE_PQUOTA] = { /* project quota */ 291 + .type = ST_FS, 292 + .setup = xfs_scrub_setup_quota, 293 + .scrub = xfs_scrub_quota, 279 294 }, 280 295 }; 281 296 ··· 317 284 "EXPERIMENTAL online scrub feature in use. Use at your own risk!"); 318 285 } 319 286 287 + static int 288 + xfs_scrub_validate_inputs( 289 + struct xfs_mount *mp, 290 + struct xfs_scrub_metadata *sm) 291 + { 292 + int error; 293 + const struct xfs_scrub_meta_ops *ops; 294 + 295 + error = -EINVAL; 296 + /* Check our inputs. */ 297 + sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT; 298 + if (sm->sm_flags & ~XFS_SCRUB_FLAGS_IN) 299 + goto out; 300 + /* sm_reserved[] must be zero */ 301 + if (memchr_inv(sm->sm_reserved, 0, sizeof(sm->sm_reserved))) 302 + goto out; 303 + 304 + error = -ENOENT; 305 + /* Do we know about this type of metadata? */ 306 + if (sm->sm_type >= XFS_SCRUB_TYPE_NR) 307 + goto out; 308 + ops = &meta_scrub_ops[sm->sm_type]; 309 + if (ops->setup == NULL || ops->scrub == NULL) 310 + goto out; 311 + /* Does this fs even support this type of metadata? */ 312 + if (ops->has && !ops->has(&mp->m_sb)) 313 + goto out; 314 + 315 + error = -EINVAL; 316 + /* restricting fields must be appropriate for type */ 317 + switch (ops->type) { 318 + case ST_NONE: 319 + case ST_FS: 320 + if (sm->sm_ino || sm->sm_gen || sm->sm_agno) 321 + goto out; 322 + break; 323 + case ST_PERAG: 324 + if (sm->sm_ino || sm->sm_gen || 325 + sm->sm_agno >= mp->m_sb.sb_agcount) 326 + goto out; 327 + break; 328 + case ST_INODE: 329 + if (sm->sm_agno || (sm->sm_gen && !sm->sm_ino)) 330 + goto out; 331 + break; 332 + default: 333 + goto out; 334 + } 335 + 336 + error = -EOPNOTSUPP; 337 + /* 338 + * We won't scrub any filesystem that doesn't have the ability 339 + * to record unwritten extents. The option was made default in 340 + * 2003, removed from mkfs in 2007, and cannot be disabled in 341 + * v5, so if we find a filesystem without this flag it's either 342 + * really old or totally unsupported. Avoid it either way. 343 + * We also don't support v1-v3 filesystems, which aren't 344 + * mountable. 345 + */ 346 + if (!xfs_sb_version_hasextflgbit(&mp->m_sb)) 347 + goto out; 348 + 349 + /* We don't know how to repair anything yet. */ 350 + if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) 351 + goto out; 352 + 353 + error = 0; 354 + out: 355 + return error; 356 + } 357 + 320 358 /* Dispatch metadata scrubbing. */ 321 359 int 322 360 xfs_scrub_metadata( ··· 396 292 { 397 293 struct xfs_scrub_context sc; 398 294 struct xfs_mount *mp = ip->i_mount; 399 - const struct xfs_scrub_meta_ops *ops; 400 295 bool try_harder = false; 401 296 int error = 0; 297 + 298 + BUILD_BUG_ON(sizeof(meta_scrub_ops) != 299 + (sizeof(struct xfs_scrub_meta_ops) * XFS_SCRUB_TYPE_NR)); 402 300 403 301 trace_xfs_scrub_start(ip, sm, error); 404 302 ··· 412 306 if (mp->m_flags & XFS_MOUNT_NORECOVERY) 413 307 goto out; 414 308 415 - /* Check our inputs. */ 416 - error = -EINVAL; 417 - sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT; 418 - if (sm->sm_flags & ~XFS_SCRUB_FLAGS_IN) 419 - goto out; 420 - if (memchr_inv(sm->sm_reserved, 0, sizeof(sm->sm_reserved))) 421 - goto out; 422 - 423 - /* Do we know about this type of metadata? */ 424 - error = -ENOENT; 425 - if (sm->sm_type >= XFS_SCRUB_TYPE_NR) 426 - goto out; 427 - ops = &meta_scrub_ops[sm->sm_type]; 428 - if (ops->scrub == NULL) 429 - goto out; 430 - 431 - /* 432 - * We won't scrub any filesystem that doesn't have the ability 433 - * to record unwritten extents. The option was made default in 434 - * 2003, removed from mkfs in 2007, and cannot be disabled in 435 - * v5, so if we find a filesystem without this flag it's either 436 - * really old or totally unsupported. Avoid it either way. 437 - * We also don't support v1-v3 filesystems, which aren't 438 - * mountable. 439 - */ 440 - error = -EOPNOTSUPP; 441 - if (!xfs_sb_version_hasextflgbit(&mp->m_sb)) 442 - goto out; 443 - 444 - /* Does this fs even support this type of metadata? */ 445 - error = -ENOENT; 446 - if (ops->has && !ops->has(&mp->m_sb)) 447 - goto out; 448 - 449 - /* We don't know how to repair anything yet. */ 450 - error = -EOPNOTSUPP; 451 - if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) 309 + error = xfs_scrub_validate_inputs(mp, sm); 310 + if (error) 452 311 goto out; 453 312 454 313 xfs_scrub_experimental_warning(mp); ··· 423 352 memset(&sc, 0, sizeof(sc)); 424 353 sc.mp = ip->i_mount; 425 354 sc.sm = sm; 426 - sc.ops = ops; 355 + sc.ops = &meta_scrub_ops[sm->sm_type]; 427 356 sc.try_harder = try_harder; 428 357 sc.sa.agno = NULLAGNUMBER; 429 358 error = sc.ops->setup(&sc, ip);
+37
fs/xfs/scrub/scrub.h
··· 22 22 23 23 struct xfs_scrub_context; 24 24 25 + /* Type info and names for the scrub types. */ 26 + enum xfs_scrub_type { 27 + ST_NONE = 1, /* disabled */ 28 + ST_PERAG, /* per-AG metadata */ 29 + ST_FS, /* per-FS metadata */ 30 + ST_INODE, /* per-inode metadata */ 31 + }; 32 + 25 33 struct xfs_scrub_meta_ops { 26 34 /* Acquire whatever resources are needed for the operation. */ 27 35 int (*setup)(struct xfs_scrub_context *, ··· 40 32 41 33 /* Decide if we even have this piece of metadata. */ 42 34 bool (*has)(struct xfs_sb *); 35 + 36 + /* type describing required/allowed inputs */ 37 + enum xfs_scrub_type type; 43 38 }; 44 39 45 40 /* Buffer pointers and btree cursors for an entire AG. */ ··· 121 110 { 122 111 return -ENOENT; 123 112 } 113 + #endif 114 + 115 + /* cross-referencing helpers */ 116 + void xfs_scrub_xref_is_used_space(struct xfs_scrub_context *sc, 117 + xfs_agblock_t agbno, xfs_extlen_t len); 118 + void xfs_scrub_xref_is_not_inode_chunk(struct xfs_scrub_context *sc, 119 + xfs_agblock_t agbno, xfs_extlen_t len); 120 + void xfs_scrub_xref_is_inode_chunk(struct xfs_scrub_context *sc, 121 + xfs_agblock_t agbno, xfs_extlen_t len); 122 + void xfs_scrub_xref_is_owned_by(struct xfs_scrub_context *sc, 123 + xfs_agblock_t agbno, xfs_extlen_t len, 124 + struct xfs_owner_info *oinfo); 125 + void xfs_scrub_xref_is_not_owned_by(struct xfs_scrub_context *sc, 126 + xfs_agblock_t agbno, xfs_extlen_t len, 127 + struct xfs_owner_info *oinfo); 128 + void xfs_scrub_xref_has_no_owner(struct xfs_scrub_context *sc, 129 + xfs_agblock_t agbno, xfs_extlen_t len); 130 + void xfs_scrub_xref_is_cow_staging(struct xfs_scrub_context *sc, 131 + xfs_agblock_t bno, xfs_extlen_t len); 132 + void xfs_scrub_xref_is_not_shared(struct xfs_scrub_context *sc, 133 + xfs_agblock_t bno, xfs_extlen_t len); 134 + #ifdef CONFIG_XFS_RT 135 + void xfs_scrub_xref_is_used_rt_space(struct xfs_scrub_context *sc, 136 + xfs_rtblock_t rtbno, xfs_extlen_t len); 137 + #else 138 + # define xfs_scrub_xref_is_used_rt_space(sc, rtbno, len) do { } while (0) 124 139 #endif 125 140 126 141 #endif /* __XFS_SCRUB_SCRUB_H__ */
+33 -11
fs/xfs/scrub/trace.h
··· 50 50 __entry->flags = sm->sm_flags; 51 51 __entry->error = error; 52 52 ), 53 - TP_printk("dev %d:%d ino %llu type %u agno %u inum %llu gen %u flags 0x%x error %d", 53 + TP_printk("dev %d:%d ino 0x%llx type %u agno %u inum %llu gen %u flags 0x%x error %d", 54 54 MAJOR(__entry->dev), MINOR(__entry->dev), 55 55 __entry->ino, 56 56 __entry->type, ··· 90 90 __entry->error = error; 91 91 __entry->ret_ip = ret_ip; 92 92 ), 93 - TP_printk("dev %d:%d type %u agno %u agbno %u error %d ret_ip %pF", 93 + TP_printk("dev %d:%d type %u agno %u agbno %u error %d ret_ip %pS", 94 94 MAJOR(__entry->dev), MINOR(__entry->dev), 95 95 __entry->type, 96 96 __entry->agno, ··· 121 121 __entry->error = error; 122 122 __entry->ret_ip = ret_ip; 123 123 ), 124 - TP_printk("dev %d:%d ino %llu fork %d type %u offset %llu error %d ret_ip %pF", 124 + TP_printk("dev %d:%d ino 0x%llx fork %d type %u offset %llu error %d ret_ip %pS", 125 125 MAJOR(__entry->dev), MINOR(__entry->dev), 126 126 __entry->ino, 127 127 __entry->whichfork, ··· 156 156 __entry->bno = bno; 157 157 __entry->ret_ip = ret_ip; 158 158 ), 159 - TP_printk("dev %d:%d type %u agno %u agbno %u ret_ip %pF", 159 + TP_printk("dev %d:%d type %u agno %u agbno %u ret_ip %pS", 160 160 MAJOR(__entry->dev), MINOR(__entry->dev), 161 161 __entry->type, 162 162 __entry->agno, ··· 207 207 __entry->bno = bno; 208 208 __entry->ret_ip = ret_ip; 209 209 ), 210 - TP_printk("dev %d:%d ino %llu type %u agno %u agbno %u ret_ip %pF", 210 + TP_printk("dev %d:%d ino 0x%llx type %u agno %u agbno %u ret_ip %pS", 211 211 MAJOR(__entry->dev), MINOR(__entry->dev), 212 212 __entry->ino, 213 213 __entry->type, ··· 246 246 __entry->offset = offset; 247 247 __entry->ret_ip = ret_ip; 248 248 ), 249 - TP_printk("dev %d:%d ino %llu fork %d type %u offset %llu ret_ip %pF", 249 + TP_printk("dev %d:%d ino 0x%llx fork %d type %u offset %llu ret_ip %pS", 250 250 MAJOR(__entry->dev), MINOR(__entry->dev), 251 251 __entry->ino, 252 252 __entry->whichfork, ··· 277 277 __entry->type = sc->sm->sm_type; 278 278 __entry->ret_ip = ret_ip; 279 279 ), 280 - TP_printk("dev %d:%d type %u ret_ip %pF", 280 + TP_printk("dev %d:%d type %u ret_ip %pS", 281 281 MAJOR(__entry->dev), MINOR(__entry->dev), 282 282 __entry->type, 283 283 __entry->ret_ip) ··· 311 311 __entry->error = error; 312 312 __entry->ret_ip = ret_ip; 313 313 ), 314 - TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pF", 314 + TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pS", 315 315 MAJOR(__entry->dev), MINOR(__entry->dev), 316 316 __entry->type, 317 317 __entry->btnum, ··· 354 354 __entry->error = error; 355 355 __entry->ret_ip = ret_ip; 356 356 ), 357 - TP_printk("dev %d:%d ino %llu fork %d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pF", 357 + TP_printk("dev %d:%d ino 0x%llx fork %d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pS", 358 358 MAJOR(__entry->dev), MINOR(__entry->dev), 359 359 __entry->ino, 360 360 __entry->whichfork, ··· 393 393 __entry->ptr = cur->bc_ptrs[level]; 394 394 __entry->ret_ip = ret_ip; 395 395 ), 396 - TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pF", 396 + TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pS", 397 397 MAJOR(__entry->dev), MINOR(__entry->dev), 398 398 __entry->type, 399 399 __entry->btnum, ··· 433 433 __entry->ptr = cur->bc_ptrs[level]; 434 434 __entry->ret_ip = ret_ip; 435 435 ), 436 - TP_printk("dev %d:%d ino %llu fork %d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pF", 436 + TP_printk("dev %d:%d ino 0x%llx fork %d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pS", 437 437 MAJOR(__entry->dev), MINOR(__entry->dev), 438 438 __entry->ino, 439 439 __entry->whichfork, ··· 490 490 491 491 DEFINE_SCRUB_SBTREE_EVENT(xfs_scrub_btree_rec); 492 492 DEFINE_SCRUB_SBTREE_EVENT(xfs_scrub_btree_key); 493 + 494 + TRACE_EVENT(xfs_scrub_xref_error, 495 + TP_PROTO(struct xfs_scrub_context *sc, int error, void *ret_ip), 496 + TP_ARGS(sc, error, ret_ip), 497 + TP_STRUCT__entry( 498 + __field(dev_t, dev) 499 + __field(int, type) 500 + __field(int, error) 501 + __field(void *, ret_ip) 502 + ), 503 + TP_fast_assign( 504 + __entry->dev = sc->mp->m_super->s_dev; 505 + __entry->type = sc->sm->sm_type; 506 + __entry->error = error; 507 + __entry->ret_ip = ret_ip; 508 + ), 509 + TP_printk("dev %d:%d type %u xref error %d ret_ip %pF", 510 + MAJOR(__entry->dev), MINOR(__entry->dev), 511 + __entry->type, 512 + __entry->error, 513 + __entry->ret_ip) 514 + ); 493 515 494 516 #endif /* _TRACE_XFS_SCRUB_TRACE_H */ 495 517
+14 -1
fs/xfs/xfs_aops.c
··· 390 390 if (XFS_FORCED_SHUTDOWN(mp)) 391 391 return -EIO; 392 392 393 + /* 394 + * Truncate can race with writeback since writeback doesn't take the 395 + * iolock and truncate decreases the file size before it starts 396 + * truncating the pages between new_size and old_size. Therefore, we 397 + * can end up in the situation where writeback gets a CoW fork mapping 398 + * but the truncate makes the mapping invalid and we end up in here 399 + * trying to get a new mapping. Bail out here so that we simply never 400 + * get a valid mapping and so we drop the write altogether. The page 401 + * truncation will kill the contents anyway. 402 + */ 403 + if (type == XFS_IO_COW && offset > i_size_read(inode)) 404 + return 0; 405 + 393 406 ASSERT(type != XFS_IO_COW); 394 407 if (type == XFS_IO_UNWRITTEN) 395 408 bmapi_flags |= XFS_BMAPI_IGSTATE; ··· 804 791 goto out_invalidate; 805 792 806 793 xfs_alert(ip->i_mount, 807 - "page discard on page %p, inode 0x%llx, offset %llu.", 794 + "page discard on page "PTR_FMT", inode 0x%llx, offset %llu.", 808 795 page, ip->i_ino, offset); 809 796 810 797 xfs_ilock(ip, XFS_ILOCK_EXCL);
+2 -2
fs/xfs/xfs_bmap_util.c
··· 1872 1872 */ 1873 1873 lock_two_nondirectories(VFS_I(ip), VFS_I(tip)); 1874 1874 lock_flags = XFS_MMAPLOCK_EXCL; 1875 - xfs_lock_two_inodes(ip, tip, XFS_MMAPLOCK_EXCL); 1875 + xfs_lock_two_inodes(ip, XFS_MMAPLOCK_EXCL, tip, XFS_MMAPLOCK_EXCL); 1876 1876 1877 1877 /* Verify that both files have the same format */ 1878 1878 if ((VFS_I(ip)->i_mode & S_IFMT) != (VFS_I(tip)->i_mode & S_IFMT)) { ··· 1919 1919 * Lock and join the inodes to the tansaction so that transaction commit 1920 1920 * or cancel will unlock the inodes from this point onwards. 1921 1921 */ 1922 - xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); 1922 + xfs_lock_two_inodes(ip, XFS_ILOCK_EXCL, tip, XFS_ILOCK_EXCL); 1923 1923 lock_flags |= XFS_ILOCK_EXCL; 1924 1924 xfs_trans_ijoin(tp, ip, 0); 1925 1925 xfs_trans_ijoin(tp, tip, 0);
+13 -9
fs/xfs/xfs_buf.c
··· 236 236 init_completion(&bp->b_iowait); 237 237 INIT_LIST_HEAD(&bp->b_lru); 238 238 INIT_LIST_HEAD(&bp->b_list); 239 + INIT_LIST_HEAD(&bp->b_li_list); 239 240 sema_init(&bp->b_sema, 0); /* held, no waiters */ 240 241 spin_lock_init(&bp->b_lock); 241 242 XB_SET_OWNER(bp); ··· 586 585 * returning a specific error on buffer lookup failures. 587 586 */ 588 587 xfs_alert(btp->bt_mount, 589 - "%s: Block out of range: block 0x%llx, EOFS 0x%llx ", 588 + "%s: daddr 0x%llx out of range, EOFS 0x%llx", 590 589 __func__, cmap.bm_bn, eofs); 591 590 WARN_ON(1); 592 591 return NULL; ··· 1181 1180 } 1182 1181 1183 1182 void 1184 - xfs_buf_ioerror( 1183 + __xfs_buf_ioerror( 1185 1184 xfs_buf_t *bp, 1186 - int error) 1185 + int error, 1186 + xfs_failaddr_t failaddr) 1187 1187 { 1188 1188 ASSERT(error <= 0 && error >= -1000); 1189 1189 bp->b_error = error; 1190 - trace_xfs_buf_ioerror(bp, error, _RET_IP_); 1190 + trace_xfs_buf_ioerror(bp, error, failaddr); 1191 1191 } 1192 1192 1193 1193 void ··· 1197 1195 const char *func) 1198 1196 { 1199 1197 xfs_alert(bp->b_target->bt_mount, 1200 - "metadata I/O error: block 0x%llx (\"%s\") error %d numblks %d", 1201 - (uint64_t)XFS_BUF_ADDR(bp), func, -bp->b_error, bp->b_length); 1198 + "metadata I/O error in \"%s\" at daddr 0x%llx len %d error %d", 1199 + func, (uint64_t)XFS_BUF_ADDR(bp), bp->b_length, 1200 + -bp->b_error); 1202 1201 } 1203 1202 1204 1203 int ··· 1381 1378 */ 1382 1379 if (xfs_sb_version_hascrc(&mp->m_sb)) { 1383 1380 xfs_warn(mp, 1384 - "%s: no ops on block 0x%llx/0x%x", 1381 + "%s: no buf ops on daddr 0x%llx len %d", 1385 1382 __func__, bp->b_bn, bp->b_length); 1386 - xfs_hex_dump(bp->b_addr, 64); 1383 + xfs_hex_dump(bp->b_addr, 1384 + XFS_CORRUPTION_DUMP_LEN); 1387 1385 dump_stack(); 1388 1386 } 1389 1387 } ··· 1675 1671 list_del_init(&bp->b_lru); 1676 1672 if (bp->b_flags & XBF_WRITE_FAIL) { 1677 1673 xfs_alert(btp->bt_mount, 1678 - "Corruption Alert: Buffer at block 0x%llx had permanent write failures!", 1674 + "Corruption Alert: Buffer at daddr 0x%llx had permanent write failures!", 1679 1675 (long long)bp->b_bn); 1680 1676 xfs_alert(btp->bt_mount, 1681 1677 "Please run xfs_repair to determine the extent of the problem.");
+6 -2
fs/xfs/xfs_buf.h
··· 140 140 char *name; 141 141 void (*verify_read)(struct xfs_buf *); 142 142 void (*verify_write)(struct xfs_buf *); 143 + xfs_failaddr_t (*verify_struct)(struct xfs_buf *bp); 143 144 }; 144 145 145 146 typedef struct xfs_buf { ··· 176 175 struct workqueue_struct *b_ioend_wq; /* I/O completion wq */ 177 176 xfs_buf_iodone_t b_iodone; /* I/O completion function */ 178 177 struct completion b_iowait; /* queue for I/O waiters */ 179 - void *b_fspriv; 178 + void *b_log_item; 179 + struct list_head b_li_list; /* Log items list head */ 180 180 struct xfs_trans *b_transp; 181 181 struct page **b_pages; /* array of page pointers */ 182 182 struct page *b_page_array[XB_PAGES]; /* inline pages */ ··· 317 315 /* Buffer Read and Write Routines */ 318 316 extern int xfs_bwrite(struct xfs_buf *bp); 319 317 extern void xfs_buf_ioend(struct xfs_buf *bp); 320 - extern void xfs_buf_ioerror(xfs_buf_t *, int); 318 + extern void __xfs_buf_ioerror(struct xfs_buf *bp, int error, 319 + xfs_failaddr_t failaddr); 320 + #define xfs_buf_ioerror(bp, err) __xfs_buf_ioerror((bp), (err), __this_address) 321 321 extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func); 322 322 extern void xfs_buf_submit(struct xfs_buf *bp); 323 323 extern int xfs_buf_submit_wait(struct xfs_buf *bp);
+82 -74
fs/xfs/xfs_buf_item.c
··· 61 61 */ 62 62 STATIC void 63 63 xfs_buf_item_size_segment( 64 - struct xfs_buf_log_item *bip, 65 - struct xfs_buf_log_format *blfp, 66 - int *nvecs, 67 - int *nbytes) 64 + struct xfs_buf_log_item *bip, 65 + struct xfs_buf_log_format *blfp, 66 + int *nvecs, 67 + int *nbytes) 68 68 { 69 - struct xfs_buf *bp = bip->bli_buf; 70 - int next_bit; 71 - int last_bit; 69 + struct xfs_buf *bp = bip->bli_buf; 70 + int next_bit; 71 + int last_bit; 72 72 73 73 last_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0); 74 74 if (last_bit == -1) ··· 218 218 uint offset, 219 219 struct xfs_buf_log_format *blfp) 220 220 { 221 - struct xfs_buf *bp = bip->bli_buf; 222 - uint base_size; 223 - int first_bit; 224 - int last_bit; 225 - int next_bit; 226 - uint nbits; 221 + struct xfs_buf *bp = bip->bli_buf; 222 + uint base_size; 223 + int first_bit; 224 + int last_bit; 225 + int next_bit; 226 + uint nbits; 227 227 228 228 /* copy the flags across from the base format item */ 229 229 blfp->blf_flags = bip->__bli_format.blf_flags; ··· 406 406 int remove) 407 407 { 408 408 struct xfs_buf_log_item *bip = BUF_ITEM(lip); 409 - xfs_buf_t *bp = bip->bli_buf; 410 - struct xfs_ail *ailp = lip->li_ailp; 411 - int stale = bip->bli_flags & XFS_BLI_STALE; 412 - int freed; 409 + xfs_buf_t *bp = bip->bli_buf; 410 + struct xfs_ail *ailp = lip->li_ailp; 411 + int stale = bip->bli_flags & XFS_BLI_STALE; 412 + int freed; 413 413 414 - ASSERT(bp->b_fspriv == bip); 414 + ASSERT(bp->b_log_item == bip); 415 415 ASSERT(atomic_read(&bip->bli_refcount) > 0); 416 416 417 417 trace_xfs_buf_item_unpin(bip); ··· 456 456 */ 457 457 if (bip->bli_flags & XFS_BLI_STALE_INODE) { 458 458 xfs_buf_do_callbacks(bp); 459 - bp->b_fspriv = NULL; 459 + bp->b_log_item = NULL; 460 + list_del_init(&bp->b_li_list); 460 461 bp->b_iodone = NULL; 461 462 } else { 462 463 spin_lock(&ailp->xa_lock); 463 464 xfs_trans_ail_delete(ailp, lip, SHUTDOWN_LOG_IO_ERROR); 464 465 xfs_buf_item_relse(bp); 465 - ASSERT(bp->b_fspriv == NULL); 466 + ASSERT(bp->b_log_item == NULL); 466 467 } 467 468 xfs_buf_relse(bp); 468 469 } else if (freed && remove) { ··· 723 722 724 723 /* 725 724 * Allocate a new buf log item to go with the given buffer. 726 - * Set the buffer's b_fsprivate field to point to the new 727 - * buf log item. If there are other item's attached to the 728 - * buffer (see xfs_buf_attach_iodone() below), then put the 729 - * buf log item at the front. 725 + * Set the buffer's b_log_item field to point to the new 726 + * buf log item. 730 727 */ 731 728 int 732 729 xfs_buf_item_init( 733 730 struct xfs_buf *bp, 734 731 struct xfs_mount *mp) 735 732 { 736 - struct xfs_log_item *lip = bp->b_fspriv; 737 - struct xfs_buf_log_item *bip; 733 + struct xfs_buf_log_item *bip = bp->b_log_item; 738 734 int chunks; 739 735 int map_size; 740 736 int error; ··· 739 741 740 742 /* 741 743 * Check to see if there is already a buf log item for 742 - * this buffer. If there is, it is guaranteed to be 743 - * the first. If we do already have one, there is 744 + * this buffer. If we do already have one, there is 744 745 * nothing to do here so return. 745 746 */ 746 747 ASSERT(bp->b_target->bt_mount == mp); 747 - if (lip != NULL && lip->li_type == XFS_LI_BUF) 748 + if (bip != NULL) { 749 + ASSERT(bip->bli_item.li_type == XFS_LI_BUF); 748 750 return 0; 751 + } 749 752 750 753 bip = kmem_zone_zalloc(xfs_buf_item_zone, KM_SLEEP); 751 754 xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops); ··· 780 781 bip->bli_formats[i].blf_map_size = map_size; 781 782 } 782 783 783 - /* 784 - * Put the buf item into the list of items attached to the 785 - * buffer at the front. 786 - */ 787 - if (bp->b_fspriv) 788 - bip->bli_item.li_bio_list = bp->b_fspriv; 789 - bp->b_fspriv = bip; 784 + bp->b_log_item = bip; 790 785 xfs_buf_hold(bp); 791 786 return 0; 792 787 } ··· 873 880 */ 874 881 void 875 882 xfs_buf_item_log( 876 - xfs_buf_log_item_t *bip, 883 + struct xfs_buf_log_item *bip, 877 884 uint first, 878 885 uint last) 879 886 { ··· 936 943 937 944 STATIC void 938 945 xfs_buf_item_free( 939 - xfs_buf_log_item_t *bip) 946 + struct xfs_buf_log_item *bip) 940 947 { 941 948 xfs_buf_item_free_format(bip); 942 949 kmem_free(bip->bli_item.li_lv_shadow); ··· 954 961 xfs_buf_item_relse( 955 962 xfs_buf_t *bp) 956 963 { 957 - xfs_buf_log_item_t *bip = bp->b_fspriv; 964 + struct xfs_buf_log_item *bip = bp->b_log_item; 958 965 959 966 trace_xfs_buf_item_relse(bp, _RET_IP_); 960 967 ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL)); 961 968 962 - bp->b_fspriv = bip->bli_item.li_bio_list; 963 - if (bp->b_fspriv == NULL) 969 + bp->b_log_item = NULL; 970 + if (list_empty(&bp->b_li_list)) 964 971 bp->b_iodone = NULL; 965 972 966 973 xfs_buf_rele(bp); ··· 973 980 * to be called when the buffer's I/O completes. If it is not set 974 981 * already, set the buffer's b_iodone() routine to be 975 982 * xfs_buf_iodone_callbacks() and link the log item into the list of 976 - * items rooted at b_fsprivate. Items are always added as the second 977 - * entry in the list if there is a first, because the buf item code 978 - * assumes that the buf log item is first. 983 + * items rooted at b_li_list. 979 984 */ 980 985 void 981 986 xfs_buf_attach_iodone( ··· 981 990 void (*cb)(xfs_buf_t *, xfs_log_item_t *), 982 991 xfs_log_item_t *lip) 983 992 { 984 - xfs_log_item_t *head_lip; 985 - 986 993 ASSERT(xfs_buf_islocked(bp)); 987 994 988 995 lip->li_cb = cb; 989 - head_lip = bp->b_fspriv; 990 - if (head_lip) { 991 - lip->li_bio_list = head_lip->li_bio_list; 992 - head_lip->li_bio_list = lip; 993 - } else { 994 - bp->b_fspriv = lip; 995 - } 996 + list_add_tail(&lip->li_bio_list, &bp->b_li_list); 996 997 997 998 ASSERT(bp->b_iodone == NULL || 998 999 bp->b_iodone == xfs_buf_iodone_callbacks); ··· 994 1011 /* 995 1012 * We can have many callbacks on a buffer. Running the callbacks individually 996 1013 * can cause a lot of contention on the AIL lock, so we allow for a single 997 - * callback to be able to scan the remaining lip->li_bio_list for other items 998 - * of the same type and callback to be processed in the first call. 1014 + * callback to be able to scan the remaining items in bp->b_li_list for other 1015 + * items of the same type and callback to be processed in the first call. 999 1016 * 1000 1017 * As a result, the loop walking the callback list below will also modify the 1001 1018 * list. it removes the first item from the list and then runs the callback. 1002 - * The loop then restarts from the new head of the list. This allows the 1019 + * The loop then restarts from the new first item int the list. This allows the 1003 1020 * callback to scan and modify the list attached to the buffer and we don't 1004 1021 * have to care about maintaining a next item pointer. 1005 1022 */ ··· 1007 1024 xfs_buf_do_callbacks( 1008 1025 struct xfs_buf *bp) 1009 1026 { 1027 + struct xfs_buf_log_item *blip = bp->b_log_item; 1010 1028 struct xfs_log_item *lip; 1011 1029 1012 - while ((lip = bp->b_fspriv) != NULL) { 1013 - bp->b_fspriv = lip->li_bio_list; 1014 - ASSERT(lip->li_cb != NULL); 1030 + /* If there is a buf_log_item attached, run its callback */ 1031 + if (blip) { 1032 + lip = &blip->bli_item; 1033 + lip->li_cb(bp, lip); 1034 + } 1035 + 1036 + while (!list_empty(&bp->b_li_list)) { 1037 + lip = list_first_entry(&bp->b_li_list, struct xfs_log_item, 1038 + li_bio_list); 1039 + 1015 1040 /* 1016 - * Clear the next pointer so we don't have any 1041 + * Remove the item from the list, so we don't have any 1017 1042 * confusion if the item is added to another buf. 1018 1043 * Don't touch the log item after calling its 1019 1044 * callback, because it could have freed itself. 1020 1045 */ 1021 - lip->li_bio_list = NULL; 1046 + list_del_init(&lip->li_bio_list); 1022 1047 lip->li_cb(bp, lip); 1023 1048 } 1024 1049 } ··· 1043 1052 xfs_buf_do_callbacks_fail( 1044 1053 struct xfs_buf *bp) 1045 1054 { 1046 - struct xfs_log_item *next; 1047 - struct xfs_log_item *lip = bp->b_fspriv; 1048 - struct xfs_ail *ailp = lip->li_ailp; 1055 + struct xfs_log_item *lip; 1056 + struct xfs_ail *ailp; 1049 1057 1058 + /* 1059 + * Buffer log item errors are handled directly by xfs_buf_item_push() 1060 + * and xfs_buf_iodone_callback_error, and they have no IO error 1061 + * callbacks. Check only for items in b_li_list. 1062 + */ 1063 + if (list_empty(&bp->b_li_list)) 1064 + return; 1065 + 1066 + lip = list_first_entry(&bp->b_li_list, struct xfs_log_item, 1067 + li_bio_list); 1068 + ailp = lip->li_ailp; 1050 1069 spin_lock(&ailp->xa_lock); 1051 - for (; lip; lip = next) { 1052 - next = lip->li_bio_list; 1070 + list_for_each_entry(lip, &bp->b_li_list, li_bio_list) { 1053 1071 if (lip->li_ops->iop_error) 1054 1072 lip->li_ops->iop_error(lip, bp); 1055 1073 } ··· 1069 1069 xfs_buf_iodone_callback_error( 1070 1070 struct xfs_buf *bp) 1071 1071 { 1072 - struct xfs_log_item *lip = bp->b_fspriv; 1073 - struct xfs_mount *mp = lip->li_mountp; 1072 + struct xfs_buf_log_item *bip = bp->b_log_item; 1073 + struct xfs_log_item *lip; 1074 + struct xfs_mount *mp; 1074 1075 static ulong lasttime; 1075 1076 static xfs_buftarg_t *lasttarg; 1076 1077 struct xfs_error_cfg *cfg; 1078 + 1079 + /* 1080 + * The failed buffer might not have a buf_log_item attached or the 1081 + * log_item list might be empty. Get the mp from the available 1082 + * xfs_log_item 1083 + */ 1084 + lip = list_first_entry_or_null(&bp->b_li_list, struct xfs_log_item, 1085 + li_bio_list); 1086 + mp = lip ? lip->li_mountp : bip->bli_item.li_mountp; 1077 1087 1078 1088 /* 1079 1089 * If we've already decided to shutdown the filesystem because of ··· 1193 1183 bp->b_first_retry_time = 0; 1194 1184 1195 1185 xfs_buf_do_callbacks(bp); 1196 - bp->b_fspriv = NULL; 1186 + bp->b_log_item = NULL; 1187 + list_del_init(&bp->b_li_list); 1197 1188 bp->b_iodone = NULL; 1198 1189 xfs_buf_ioend(bp); 1199 1190 } ··· 1239 1228 bool 1240 1229 xfs_buf_resubmit_failed_buffers( 1241 1230 struct xfs_buf *bp, 1242 - struct xfs_log_item *lip, 1243 1231 struct list_head *buffer_list) 1244 1232 { 1245 - struct xfs_log_item *next; 1233 + struct xfs_log_item *lip; 1246 1234 1247 1235 /* 1248 1236 * Clear XFS_LI_FAILED flag from all items before resubmit ··· 1249 1239 * XFS_LI_FAILED set/clear is protected by xa_lock, caller this 1250 1240 * function already have it acquired 1251 1241 */ 1252 - for (; lip; lip = next) { 1253 - next = lip->li_bio_list; 1242 + list_for_each_entry(lip, &bp->b_li_list, li_bio_list) 1254 1243 xfs_clear_li_failed(lip); 1255 - } 1256 1244 1257 1245 /* Add this buffer back to the delayed write list */ 1258 1246 return xfs_buf_delwri_queue(bp, buffer_list);
+3 -4
fs/xfs/xfs_buf_item.h
··· 50 50 * needed to log buffers. It tracks how many times the lock has been 51 51 * locked, and which 128 byte chunks of the buffer are dirty. 52 52 */ 53 - typedef struct xfs_buf_log_item { 53 + struct xfs_buf_log_item { 54 54 xfs_log_item_t bli_item; /* common item structure */ 55 55 struct xfs_buf *bli_buf; /* real buffer pointer */ 56 56 unsigned int bli_flags; /* misc flags */ ··· 59 59 int bli_format_count; /* count of headers */ 60 60 struct xfs_buf_log_format *bli_formats; /* array of in-log header ptrs */ 61 61 struct xfs_buf_log_format __bli_format; /* embedded in-log header */ 62 - } xfs_buf_log_item_t; 62 + }; 63 63 64 64 int xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *); 65 65 void xfs_buf_item_relse(struct xfs_buf *); 66 - void xfs_buf_item_log(xfs_buf_log_item_t *, uint, uint); 66 + void xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint); 67 67 bool xfs_buf_item_dirty_format(struct xfs_buf_log_item *); 68 68 void xfs_buf_attach_iodone(struct xfs_buf *, 69 69 void(*)(struct xfs_buf *, xfs_log_item_t *), ··· 71 71 void xfs_buf_iodone_callbacks(struct xfs_buf *); 72 72 void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *); 73 73 bool xfs_buf_resubmit_failed_buffers(struct xfs_buf *, 74 - struct xfs_log_item *, 75 74 struct list_head *); 76 75 77 76 extern kmem_zone_t *xfs_buf_item_zone;
+1 -3
fs/xfs/xfs_dir2_readdir.c
··· 152 152 struct xfs_inode *dp = args->dp; /* incore directory inode */ 153 153 xfs_dir2_data_hdr_t *hdr; /* block header */ 154 154 struct xfs_buf *bp; /* buffer for block */ 155 - xfs_dir2_block_tail_t *btp; /* block tail */ 156 155 xfs_dir2_data_entry_t *dep; /* block data entry */ 157 156 xfs_dir2_data_unused_t *dup; /* block unused entry */ 158 157 char *endptr; /* end of the data entries */ ··· 184 185 /* 185 186 * Set up values for the loop. 186 187 */ 187 - btp = xfs_dir2_block_tail_p(geo, hdr); 188 188 ptr = (char *)dp->d_ops->data_entry_p(hdr); 189 - endptr = (char *)xfs_dir2_block_leaf_p(btp); 189 + endptr = xfs_dir3_data_endp(geo, hdr); 190 190 191 191 /* 192 192 * Loop over the data portion of the block.
+5 -57
fs/xfs/xfs_dquot.c
··· 399 399 return error; 400 400 } 401 401 402 - STATIC int 403 - xfs_qm_dqrepair( 404 - struct xfs_mount *mp, 405 - struct xfs_trans *tp, 406 - struct xfs_dquot *dqp, 407 - xfs_dqid_t firstid, 408 - struct xfs_buf **bpp) 409 - { 410 - int error; 411 - struct xfs_disk_dquot *ddq; 412 - struct xfs_dqblk *d; 413 - int i; 414 - 415 - /* 416 - * Read the buffer without verification so we get the corrupted 417 - * buffer returned to us. make sure we verify it on write, though. 418 - */ 419 - error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, dqp->q_blkno, 420 - mp->m_quotainfo->qi_dqchunklen, 421 - 0, bpp, NULL); 422 - 423 - if (error) { 424 - ASSERT(*bpp == NULL); 425 - return error; 426 - } 427 - (*bpp)->b_ops = &xfs_dquot_buf_ops; 428 - 429 - ASSERT(xfs_buf_islocked(*bpp)); 430 - d = (struct xfs_dqblk *)(*bpp)->b_addr; 431 - 432 - /* Do the actual repair of dquots in this buffer */ 433 - for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++) { 434 - ddq = &d[i].dd_diskdq; 435 - error = xfs_dqcheck(mp, ddq, firstid + i, 436 - dqp->dq_flags & XFS_DQ_ALLTYPES, 437 - XFS_QMOPT_DQREPAIR, "xfs_qm_dqrepair"); 438 - if (error) { 439 - /* repair failed, we're screwed */ 440 - xfs_trans_brelse(tp, *bpp); 441 - return -EIO; 442 - } 443 - } 444 - 445 - return 0; 446 - } 447 - 448 402 /* 449 403 * Maps a dquot to the buffer containing its on-disk version. 450 404 * This returns a ptr to the buffer containing the on-disk dquot ··· 480 526 dqp->q_blkno, 481 527 mp->m_quotainfo->qi_dqchunklen, 482 528 0, &bp, &xfs_dquot_buf_ops); 483 - 484 - if (error == -EFSCORRUPTED && (flags & XFS_QMOPT_DQREPAIR)) { 485 - xfs_dqid_t firstid = (xfs_dqid_t)map.br_startoff * 486 - mp->m_quotainfo->qi_dqperchunk; 487 - ASSERT(bp == NULL); 488 - error = xfs_qm_dqrepair(mp, tp, dqp, firstid, &bp); 489 - } 490 - 491 529 if (error) { 492 530 ASSERT(bp == NULL); 493 531 return error; ··· 956 1010 struct xfs_mount *mp = dqp->q_mount; 957 1011 struct xfs_buf *bp; 958 1012 struct xfs_disk_dquot *ddqp; 1013 + xfs_failaddr_t fa; 959 1014 int error; 960 1015 961 1016 ASSERT(XFS_DQ_IS_LOCKED(dqp)); ··· 1003 1056 /* 1004 1057 * A simple sanity check in case we got a corrupted dquot.. 1005 1058 */ 1006 - error = xfs_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0, 1007 - XFS_QMOPT_DOWARN, "dqflush (incore copy)"); 1008 - if (error) { 1059 + fa = xfs_dquot_verify(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0, 0); 1060 + if (fa) { 1061 + xfs_alert(mp, "corrupt dquot ID 0x%x in memory at %pS", 1062 + be32_to_cpu(ddqp->d_id), fa); 1009 1063 xfs_buf_relse(bp); 1010 1064 xfs_dqfunlock(dqp); 1011 1065 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+3 -6
fs/xfs/xfs_dquot_item.c
··· 150 150 struct xfs_log_item *lip, 151 151 struct xfs_buf *bp) 152 152 { 153 - struct xfs_dquot *dqp; 154 - 155 - dqp = DQUOT_ITEM(lip)->qli_dquot; 156 - ASSERT(!completion_done(&dqp->q_flush)); 153 + ASSERT(!completion_done(&DQUOT_ITEM(lip)->qli_dquot->q_flush)); 157 154 xfs_set_li_failed(lip, bp); 158 155 } 159 156 ··· 176 179 if (!xfs_buf_trylock(bp)) 177 180 return XFS_ITEM_LOCKED; 178 181 179 - if (!xfs_buf_resubmit_failed_buffers(bp, lip, buffer_list)) 182 + if (!xfs_buf_resubmit_failed_buffers(bp, buffer_list)) 180 183 rval = XFS_ITEM_FLUSHING; 181 184 182 185 xfs_buf_unlock(bp); ··· 209 212 210 213 error = xfs_qm_dqflush(dqp, &bp); 211 214 if (error) { 212 - xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p", 215 + xfs_warn(dqp->q_mount, "%s: push error %d on dqp "PTR_FMT, 213 216 __func__, error, dqp); 214 217 } else { 215 218 if (!xfs_buf_delwri_queue(bp, buffer_list))
+54 -10
fs/xfs/xfs_error.c
··· 24 24 #include "xfs_errortag.h" 25 25 #include "xfs_error.h" 26 26 #include "xfs_sysfs.h" 27 + #include "xfs_inode.h" 27 28 28 29 #ifdef DEBUG 29 30 ··· 315 314 struct xfs_mount *mp, 316 315 const char *filename, 317 316 int linenum, 318 - void *ra) 317 + xfs_failaddr_t failaddr) 319 318 { 320 319 if (level <= xfs_error_level) { 321 320 xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT, 322 321 "Internal error %s at line %d of file %s. Caller %pS", 323 - tag, linenum, filename, ra); 322 + tag, linenum, filename, failaddr); 324 323 325 324 xfs_stack_trace(); 326 325 } ··· 334 333 void *p, 335 334 const char *filename, 336 335 int linenum, 337 - void *ra) 336 + xfs_failaddr_t failaddr) 338 337 { 339 338 if (level <= xfs_error_level) 340 - xfs_hex_dump(p, 64); 341 - xfs_error_report(tag, level, mp, filename, linenum, ra); 339 + xfs_hex_dump(p, XFS_CORRUPTION_DUMP_LEN); 340 + xfs_error_report(tag, level, mp, filename, linenum, failaddr); 342 341 xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair"); 343 342 } 344 343 ··· 348 347 */ 349 348 void 350 349 xfs_verifier_error( 351 - struct xfs_buf *bp) 350 + struct xfs_buf *bp, 351 + int error, 352 + xfs_failaddr_t failaddr) 352 353 { 353 - struct xfs_mount *mp = bp->b_target->bt_mount; 354 + struct xfs_mount *mp = bp->b_target->bt_mount; 355 + xfs_failaddr_t fa; 356 + 357 + fa = failaddr ? failaddr : __return_address; 358 + __xfs_buf_ioerror(bp, error, fa); 354 359 355 360 xfs_alert(mp, "Metadata %s detected at %pS, %s block 0x%llx", 356 361 bp->b_error == -EFSBADCRC ? "CRC error" : "corruption", 357 - __return_address, bp->b_ops->name, bp->b_bn); 362 + fa, bp->b_ops->name, bp->b_bn); 358 363 359 364 xfs_alert(mp, "Unmount and run xfs_repair"); 360 365 361 366 if (xfs_error_level >= XFS_ERRLEVEL_LOW) { 362 - xfs_alert(mp, "First 64 bytes of corrupted metadata buffer:"); 363 - xfs_hex_dump(xfs_buf_offset(bp, 0), 64); 367 + xfs_alert(mp, "First %d bytes of corrupted metadata buffer:", 368 + XFS_CORRUPTION_DUMP_LEN); 369 + xfs_hex_dump(xfs_buf_offset(bp, 0), XFS_CORRUPTION_DUMP_LEN); 370 + } 371 + 372 + if (xfs_error_level >= XFS_ERRLEVEL_HIGH) 373 + xfs_stack_trace(); 374 + } 375 + 376 + /* 377 + * Warnings for inode corruption problems. Don't bother with the stack 378 + * trace unless the error level is turned up high. 379 + */ 380 + void 381 + xfs_inode_verifier_error( 382 + struct xfs_inode *ip, 383 + int error, 384 + const char *name, 385 + void *buf, 386 + size_t bufsz, 387 + xfs_failaddr_t failaddr) 388 + { 389 + struct xfs_mount *mp = ip->i_mount; 390 + xfs_failaddr_t fa; 391 + int sz; 392 + 393 + fa = failaddr ? failaddr : __return_address; 394 + 395 + xfs_alert(mp, "Metadata %s detected at %pS, inode 0x%llx %s", 396 + error == -EFSBADCRC ? "CRC error" : "corruption", 397 + fa, ip->i_ino, name); 398 + 399 + xfs_alert(mp, "Unmount and run xfs_repair"); 400 + 401 + if (buf && xfs_error_level >= XFS_ERRLEVEL_LOW) { 402 + sz = min_t(size_t, XFS_CORRUPTION_DUMP_LEN, bufsz); 403 + xfs_alert(mp, "First %d bytes of corrupted metadata buffer:", 404 + sz); 405 + xfs_hex_dump(buf, sz); 364 406 } 365 407 366 408 if (xfs_error_level >= XFS_ERRLEVEL_HIGH)
+11 -3
fs/xfs/xfs_error.h
··· 21 21 struct xfs_mount; 22 22 23 23 extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp, 24 - const char *filename, int linenum, void *ra); 24 + const char *filename, int linenum, 25 + xfs_failaddr_t failaddr); 25 26 extern void xfs_corruption_error(const char *tag, int level, 26 27 struct xfs_mount *mp, void *p, const char *filename, 27 - int linenum, void *ra); 28 - extern void xfs_verifier_error(struct xfs_buf *bp); 28 + int linenum, xfs_failaddr_t failaddr); 29 + extern void xfs_verifier_error(struct xfs_buf *bp, int error, 30 + xfs_failaddr_t failaddr); 31 + extern void xfs_inode_verifier_error(struct xfs_inode *ip, int error, 32 + const char *name, void *buf, size_t bufsz, 33 + xfs_failaddr_t failaddr); 29 34 30 35 #define XFS_ERROR_REPORT(e, lvl, mp) \ 31 36 xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address) ··· 41 36 #define XFS_ERRLEVEL_OFF 0 42 37 #define XFS_ERRLEVEL_LOW 1 43 38 #define XFS_ERRLEVEL_HIGH 5 39 + 40 + /* Dump 128 bytes of any corrupt buffer */ 41 + #define XFS_CORRUPTION_DUMP_LEN (128) 44 42 45 43 /* 46 44 * Macros to set EFSCORRUPTED & return/branch.
+1 -78
fs/xfs/xfs_fsops.c
··· 49 49 * File system operations 50 50 */ 51 51 52 - int 53 - xfs_fs_geometry( 54 - xfs_mount_t *mp, 55 - xfs_fsop_geom_t *geo, 56 - int new_version) 57 - { 58 - 59 - memset(geo, 0, sizeof(*geo)); 60 - 61 - geo->blocksize = mp->m_sb.sb_blocksize; 62 - geo->rtextsize = mp->m_sb.sb_rextsize; 63 - geo->agblocks = mp->m_sb.sb_agblocks; 64 - geo->agcount = mp->m_sb.sb_agcount; 65 - geo->logblocks = mp->m_sb.sb_logblocks; 66 - geo->sectsize = mp->m_sb.sb_sectsize; 67 - geo->inodesize = mp->m_sb.sb_inodesize; 68 - geo->imaxpct = mp->m_sb.sb_imax_pct; 69 - geo->datablocks = mp->m_sb.sb_dblocks; 70 - geo->rtblocks = mp->m_sb.sb_rblocks; 71 - geo->rtextents = mp->m_sb.sb_rextents; 72 - geo->logstart = mp->m_sb.sb_logstart; 73 - ASSERT(sizeof(geo->uuid)==sizeof(mp->m_sb.sb_uuid)); 74 - memcpy(geo->uuid, &mp->m_sb.sb_uuid, sizeof(mp->m_sb.sb_uuid)); 75 - if (new_version >= 2) { 76 - geo->sunit = mp->m_sb.sb_unit; 77 - geo->swidth = mp->m_sb.sb_width; 78 - } 79 - if (new_version >= 3) { 80 - geo->version = XFS_FSOP_GEOM_VERSION; 81 - geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK | 82 - XFS_FSOP_GEOM_FLAGS_DIRV2 | 83 - (xfs_sb_version_hasattr(&mp->m_sb) ? 84 - XFS_FSOP_GEOM_FLAGS_ATTR : 0) | 85 - (xfs_sb_version_hasquota(&mp->m_sb) ? 86 - XFS_FSOP_GEOM_FLAGS_QUOTA : 0) | 87 - (xfs_sb_version_hasalign(&mp->m_sb) ? 88 - XFS_FSOP_GEOM_FLAGS_IALIGN : 0) | 89 - (xfs_sb_version_hasdalign(&mp->m_sb) ? 90 - XFS_FSOP_GEOM_FLAGS_DALIGN : 0) | 91 - (xfs_sb_version_hasextflgbit(&mp->m_sb) ? 92 - XFS_FSOP_GEOM_FLAGS_EXTFLG : 0) | 93 - (xfs_sb_version_hassector(&mp->m_sb) ? 94 - XFS_FSOP_GEOM_FLAGS_SECTOR : 0) | 95 - (xfs_sb_version_hasasciici(&mp->m_sb) ? 96 - XFS_FSOP_GEOM_FLAGS_DIRV2CI : 0) | 97 - (xfs_sb_version_haslazysbcount(&mp->m_sb) ? 98 - XFS_FSOP_GEOM_FLAGS_LAZYSB : 0) | 99 - (xfs_sb_version_hasattr2(&mp->m_sb) ? 100 - XFS_FSOP_GEOM_FLAGS_ATTR2 : 0) | 101 - (xfs_sb_version_hasprojid32bit(&mp->m_sb) ? 102 - XFS_FSOP_GEOM_FLAGS_PROJID32 : 0) | 103 - (xfs_sb_version_hascrc(&mp->m_sb) ? 104 - XFS_FSOP_GEOM_FLAGS_V5SB : 0) | 105 - (xfs_sb_version_hasftype(&mp->m_sb) ? 106 - XFS_FSOP_GEOM_FLAGS_FTYPE : 0) | 107 - (xfs_sb_version_hasfinobt(&mp->m_sb) ? 108 - XFS_FSOP_GEOM_FLAGS_FINOBT : 0) | 109 - (xfs_sb_version_hassparseinodes(&mp->m_sb) ? 110 - XFS_FSOP_GEOM_FLAGS_SPINODES : 0) | 111 - (xfs_sb_version_hasrmapbt(&mp->m_sb) ? 112 - XFS_FSOP_GEOM_FLAGS_RMAPBT : 0) | 113 - (xfs_sb_version_hasreflink(&mp->m_sb) ? 114 - XFS_FSOP_GEOM_FLAGS_REFLINK : 0); 115 - geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ? 116 - mp->m_sb.sb_logsectsize : BBSIZE; 117 - geo->rtsectsize = mp->m_sb.sb_blocksize; 118 - geo->dirblocksize = mp->m_dir_geo->blksize; 119 - } 120 - if (new_version >= 4) { 121 - geo->flags |= 122 - (xfs_sb_version_haslogv2(&mp->m_sb) ? 123 - XFS_FSOP_GEOM_FLAGS_LOGV2 : 0); 124 - geo->logsunit = mp->m_sb.sb_logsunit; 125 - } 126 - return 0; 127 - } 128 - 129 52 static struct xfs_buf * 130 53 xfs_growfs_get_hdr_buf( 131 54 struct xfs_mount *mp, ··· 878 955 879 956 if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { 880 957 xfs_notice(mp, 881 - "%s(0x%x) called from line %d of file %s. Return address = 0x%p", 958 + "%s(0x%x) called from line %d of file %s. Return address = "PTR_FMT, 882 959 __func__, flags, lnnum, fname, __return_address); 883 960 } 884 961 /*
-1
fs/xfs/xfs_fsops.h
··· 18 18 #ifndef __XFS_FSOPS_H__ 19 19 #define __XFS_FSOPS_H__ 20 20 21 - extern int xfs_fs_geometry(xfs_mount_t *mp, xfs_fsop_geom_t *geo, int nversion); 22 21 extern int xfs_growfs_data(xfs_mount_t *mp, xfs_growfs_data_t *in); 23 22 extern int xfs_growfs_log(xfs_mount_t *mp, xfs_growfs_log_t *in); 24 23 extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt);
+50 -22
fs/xfs/xfs_icache.c
··· 296 296 uint32_t generation = inode->i_generation; 297 297 uint64_t version = inode_peek_iversion(inode); 298 298 umode_t mode = inode->i_mode; 299 + dev_t dev = inode->i_rdev; 299 300 300 301 error = inode_init_always(mp->m_super, inode); 301 302 ··· 304 303 inode->i_generation = generation; 305 304 inode_set_iversion_queried(inode, version); 306 305 inode->i_mode = mode; 306 + inode->i_rdev = dev; 307 307 return error; 308 308 } 309 309 ··· 475 473 error = xfs_iread(mp, tp, ip, flags); 476 474 if (error) 477 475 goto out_destroy; 476 + 477 + if (!xfs_inode_verify_forks(ip)) { 478 + error = -EFSCORRUPTED; 479 + goto out_destroy; 480 + } 478 481 479 482 trace_xfs_iget_miss(ip); 480 483 ··· 1658 1651 } 1659 1652 1660 1653 /* 1654 + * Set ourselves up to free CoW blocks from this file. If it's already clean 1655 + * then we can bail out quickly, but otherwise we must back off if the file 1656 + * is undergoing some kind of write. 1657 + */ 1658 + static bool 1659 + xfs_prep_free_cowblocks( 1660 + struct xfs_inode *ip, 1661 + struct xfs_ifork *ifp) 1662 + { 1663 + /* 1664 + * Just clear the tag if we have an empty cow fork or none at all. It's 1665 + * possible the inode was fully unshared since it was originally tagged. 1666 + */ 1667 + if (!xfs_is_reflink_inode(ip) || !ifp->if_bytes) { 1668 + trace_xfs_inode_free_cowblocks_invalid(ip); 1669 + xfs_inode_clear_cowblocks_tag(ip); 1670 + return false; 1671 + } 1672 + 1673 + /* 1674 + * If the mapping is dirty or under writeback we cannot touch the 1675 + * CoW fork. Leave it alone if we're in the midst of a directio. 1676 + */ 1677 + if ((VFS_I(ip)->i_state & I_DIRTY_PAGES) || 1678 + mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) || 1679 + mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) || 1680 + atomic_read(&VFS_I(ip)->i_dio_count)) 1681 + return false; 1682 + 1683 + return true; 1684 + } 1685 + 1686 + /* 1661 1687 * Automatic CoW Reservation Freeing 1662 1688 * 1663 1689 * These functions automatically garbage collect leftover CoW reservations ··· 1708 1668 int flags, 1709 1669 void *args) 1710 1670 { 1711 - int ret; 1712 - struct xfs_eofblocks *eofb = args; 1713 - int match; 1671 + struct xfs_eofblocks *eofb = args; 1714 1672 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); 1673 + int match; 1674 + int ret = 0; 1715 1675 1716 - /* 1717 - * Just clear the tag if we have an empty cow fork or none at all. It's 1718 - * possible the inode was fully unshared since it was originally tagged. 1719 - */ 1720 - if (!xfs_is_reflink_inode(ip) || !ifp->if_bytes) { 1721 - trace_xfs_inode_free_cowblocks_invalid(ip); 1722 - xfs_inode_clear_cowblocks_tag(ip); 1723 - return 0; 1724 - } 1725 - 1726 - /* 1727 - * If the mapping is dirty or under writeback we cannot touch the 1728 - * CoW fork. Leave it alone if we're in the midst of a directio. 1729 - */ 1730 - if ((VFS_I(ip)->i_state & I_DIRTY_PAGES) || 1731 - mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) || 1732 - mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) || 1733 - atomic_read(&VFS_I(ip)->i_dio_count)) 1676 + if (!xfs_prep_free_cowblocks(ip, ifp)) 1734 1677 return 0; 1735 1678 1736 1679 if (eofb) { ··· 1734 1711 xfs_ilock(ip, XFS_IOLOCK_EXCL); 1735 1712 xfs_ilock(ip, XFS_MMAPLOCK_EXCL); 1736 1713 1737 - ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false); 1714 + /* 1715 + * Check again, nobody else should be able to dirty blocks or change 1716 + * the reflink iflag now that we have the first two locks held. 1717 + */ 1718 + if (xfs_prep_free_cowblocks(ip, ifp)) 1719 + ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false); 1738 1720 1739 1721 xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); 1740 1722 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+73 -31
fs/xfs/xfs_inode.c
··· 547 547 548 548 /* 549 549 * xfs_lock_two_inodes() can only be used to lock one type of lock at a time - 550 - * the iolock, the mmaplock or the ilock, but not more than one at a time. If we 551 - * lock more than one at a time, lockdep will report false positives saying we 552 - * have violated locking orders. 550 + * the mmaplock or the ilock, but not more than one type at a time. If we lock 551 + * more than one at a time, lockdep will report false positives saying we have 552 + * violated locking orders. The iolock must be double-locked separately since 553 + * we use i_rwsem for that. We now support taking one lock EXCL and the other 554 + * SHARED. 553 555 */ 554 556 void 555 557 xfs_lock_two_inodes( 556 - xfs_inode_t *ip0, 557 - xfs_inode_t *ip1, 558 - uint lock_mode) 558 + struct xfs_inode *ip0, 559 + uint ip0_mode, 560 + struct xfs_inode *ip1, 561 + uint ip1_mode) 559 562 { 560 - xfs_inode_t *temp; 563 + struct xfs_inode *temp; 564 + uint mode_temp; 561 565 int attempts = 0; 562 566 xfs_log_item_t *lp; 563 567 564 - ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))); 565 - if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) 566 - ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); 568 + ASSERT(hweight32(ip0_mode) == 1); 569 + ASSERT(hweight32(ip1_mode) == 1); 570 + ASSERT(!(ip0_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))); 571 + ASSERT(!(ip1_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))); 572 + ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) || 573 + !(ip0_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); 574 + ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) || 575 + !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); 576 + ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) || 577 + !(ip0_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); 578 + ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) || 579 + !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); 567 580 568 581 ASSERT(ip0->i_ino != ip1->i_ino); 569 582 ··· 584 571 temp = ip0; 585 572 ip0 = ip1; 586 573 ip1 = temp; 574 + mode_temp = ip0_mode; 575 + ip0_mode = ip1_mode; 576 + ip1_mode = mode_temp; 587 577 } 588 578 589 579 again: 590 - xfs_ilock(ip0, xfs_lock_inumorder(lock_mode, 0)); 580 + xfs_ilock(ip0, xfs_lock_inumorder(ip0_mode, 0)); 591 581 592 582 /* 593 583 * If the first lock we have locked is in the AIL, we must TRY to get ··· 599 583 */ 600 584 lp = (xfs_log_item_t *)ip0->i_itemp; 601 585 if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { 602 - if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(lock_mode, 1))) { 603 - xfs_iunlock(ip0, lock_mode); 586 + if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(ip1_mode, 1))) { 587 + xfs_iunlock(ip0, ip0_mode); 604 588 if ((++attempts % 5) == 0) 605 589 delay(1); /* Don't just spin the CPU */ 606 590 goto again; 607 591 } 608 592 } else { 609 - xfs_ilock(ip1, xfs_lock_inumorder(lock_mode, 1)); 593 + xfs_ilock(ip1, xfs_lock_inumorder(ip1_mode, 1)); 610 594 } 611 595 } 612 - 613 596 614 597 void 615 598 __xfs_iflock( ··· 1437 1422 if (error) 1438 1423 goto std_return; 1439 1424 1440 - xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); 1425 + xfs_lock_two_inodes(sip, XFS_ILOCK_EXCL, tdp, XFS_ILOCK_EXCL); 1441 1426 1442 1427 xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); 1443 1428 xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); ··· 2230 2215 xfs_buf_t *bp; 2231 2216 xfs_inode_t *ip; 2232 2217 xfs_inode_log_item_t *iip; 2233 - xfs_log_item_t *lip; 2218 + struct xfs_log_item *lip; 2234 2219 struct xfs_perag *pag; 2235 2220 xfs_ino_t inum; 2236 2221 ··· 2288 2273 * stale first, we will not attempt to lock them in the loop 2289 2274 * below as the XFS_ISTALE flag will be set. 2290 2275 */ 2291 - lip = bp->b_fspriv; 2292 - while (lip) { 2276 + list_for_each_entry(lip, &bp->b_li_list, li_bio_list) { 2293 2277 if (lip->li_type == XFS_LI_INODE) { 2294 2278 iip = (xfs_inode_log_item_t *)lip; 2295 2279 ASSERT(iip->ili_logged == 1); ··· 2298 2284 &iip->ili_item.li_lsn); 2299 2285 xfs_iflags_set(iip->ili_inode, XFS_ISTALE); 2300 2286 } 2301 - lip = lip->li_bio_list; 2302 2287 } 2303 2288 2304 2289 ··· 2465 2452 2466 2453 VFS_I(ip)->i_mode = 0; /* mark incore inode as free */ 2467 2454 ip->i_d.di_flags = 0; 2455 + ip->i_d.di_flags2 = 0; 2468 2456 ip->i_d.di_dmevmask = 0; 2469 2457 ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */ 2470 2458 ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; ··· 2601 2587 goto std_return; 2602 2588 } 2603 2589 2604 - xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL); 2590 + xfs_lock_two_inodes(dp, XFS_ILOCK_EXCL, ip, XFS_ILOCK_EXCL); 2605 2591 2606 2592 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 2607 2593 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); ··· 3494 3480 return error; 3495 3481 } 3496 3482 3483 + /* 3484 + * If there are inline format data / attr forks attached to this inode, 3485 + * make sure they're not corrupt. 3486 + */ 3487 + bool 3488 + xfs_inode_verify_forks( 3489 + struct xfs_inode *ip) 3490 + { 3491 + struct xfs_ifork *ifp; 3492 + xfs_failaddr_t fa; 3493 + 3494 + fa = xfs_ifork_verify_data(ip, &xfs_default_ifork_ops); 3495 + if (fa) { 3496 + ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); 3497 + xfs_inode_verifier_error(ip, -EFSCORRUPTED, "data fork", 3498 + ifp->if_u1.if_data, ifp->if_bytes, fa); 3499 + return false; 3500 + } 3501 + 3502 + fa = xfs_ifork_verify_attr(ip, &xfs_default_ifork_ops); 3503 + if (fa) { 3504 + ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK); 3505 + xfs_inode_verifier_error(ip, -EFSCORRUPTED, "attr fork", 3506 + ifp ? ifp->if_u1.if_data : NULL, 3507 + ifp ? ifp->if_bytes : 0, fa); 3508 + return false; 3509 + } 3510 + return true; 3511 + } 3512 + 3497 3513 STATIC int 3498 3514 xfs_iflush_int( 3499 3515 struct xfs_inode *ip, ··· 3546 3502 if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC), 3547 3503 mp, XFS_ERRTAG_IFLUSH_1)) { 3548 3504 xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 3549 - "%s: Bad inode %Lu magic number 0x%x, ptr 0x%p", 3505 + "%s: Bad inode %Lu magic number 0x%x, ptr "PTR_FMT, 3550 3506 __func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip); 3551 3507 goto corrupt_out; 3552 3508 } ··· 3556 3512 (ip->i_d.di_format != XFS_DINODE_FMT_BTREE), 3557 3513 mp, XFS_ERRTAG_IFLUSH_3)) { 3558 3514 xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 3559 - "%s: Bad regular inode %Lu, ptr 0x%p", 3515 + "%s: Bad regular inode %Lu, ptr "PTR_FMT, 3560 3516 __func__, ip->i_ino, ip); 3561 3517 goto corrupt_out; 3562 3518 } ··· 3567 3523 (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL), 3568 3524 mp, XFS_ERRTAG_IFLUSH_4)) { 3569 3525 xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 3570 - "%s: Bad directory inode %Lu, ptr 0x%p", 3526 + "%s: Bad directory inode %Lu, ptr "PTR_FMT, 3571 3527 __func__, ip->i_ino, ip); 3572 3528 goto corrupt_out; 3573 3529 } ··· 3576 3532 ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5)) { 3577 3533 xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 3578 3534 "%s: detected corrupt incore inode %Lu, " 3579 - "total extents = %d, nblocks = %Ld, ptr 0x%p", 3535 + "total extents = %d, nblocks = %Ld, ptr "PTR_FMT, 3580 3536 __func__, ip->i_ino, 3581 3537 ip->i_d.di_nextents + ip->i_d.di_anextents, 3582 3538 ip->i_d.di_nblocks, ip); ··· 3585 3541 if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize, 3586 3542 mp, XFS_ERRTAG_IFLUSH_6)) { 3587 3543 xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 3588 - "%s: bad inode %Lu, forkoff 0x%x, ptr 0x%p", 3544 + "%s: bad inode %Lu, forkoff 0x%x, ptr "PTR_FMT, 3589 3545 __func__, ip->i_ino, ip->i_d.di_forkoff, ip); 3590 3546 goto corrupt_out; 3591 3547 } ··· 3602 3558 if (ip->i_d.di_version < 3) 3603 3559 ip->i_d.di_flushiter++; 3604 3560 3605 - /* Check the inline directory data. */ 3606 - if (S_ISDIR(VFS_I(ip)->i_mode) && 3607 - ip->i_d.di_format == XFS_DINODE_FMT_LOCAL && 3608 - xfs_dir2_sf_verify(ip)) 3561 + /* Check the inline fork data before we write out. */ 3562 + if (!xfs_inode_verify_forks(ip)) 3609 3563 goto corrupt_out; 3610 3564 3611 3565 /* ··· 3666 3624 /* generate the checksum. */ 3667 3625 xfs_dinode_calc_crc(mp, dip); 3668 3626 3669 - ASSERT(bp->b_fspriv != NULL); 3627 + ASSERT(!list_empty(&bp->b_li_list)); 3670 3628 ASSERT(bp->b_iodone != NULL); 3671 3629 return 0; 3672 3630
+4 -1
fs/xfs/xfs_inode.h
··· 423 423 #define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount)) 424 424 425 425 int xfs_iflush(struct xfs_inode *, struct xfs_buf **); 426 - void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); 426 + void xfs_lock_two_inodes(struct xfs_inode *ip0, uint ip0_mode, 427 + struct xfs_inode *ip1, uint ip1_mode); 427 428 428 429 xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip); 429 430 xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip); ··· 491 490 492 491 /* The default CoW extent size hint. */ 493 492 #define XFS_DEFAULT_COWEXTSZ_HINT 32 493 + 494 + bool xfs_inode_verify_forks(struct xfs_inode *ip); 494 495 495 496 #endif /* __XFS_INODE_H__ */
+13 -30
fs/xfs/xfs_inode_item.c
··· 522 522 if (!xfs_buf_trylock(bp)) 523 523 return XFS_ITEM_LOCKED; 524 524 525 - if (!xfs_buf_resubmit_failed_buffers(bp, lip, buffer_list)) 525 + if (!xfs_buf_resubmit_failed_buffers(bp, buffer_list)) 526 526 rval = XFS_ITEM_FLUSHING; 527 527 528 528 xfs_buf_unlock(bp); ··· 713 713 struct xfs_log_item *lip) 714 714 { 715 715 struct xfs_inode_log_item *iip; 716 - struct xfs_log_item *blip; 717 - struct xfs_log_item *next; 718 - struct xfs_log_item *prev; 716 + struct xfs_log_item *blip, *n; 719 717 struct xfs_ail *ailp = lip->li_ailp; 720 718 int need_ail = 0; 719 + LIST_HEAD(tmp); 721 720 722 721 /* 723 722 * Scan the buffer IO completions for other inodes being completed and 724 723 * attach them to the current inode log item. 725 724 */ 726 - blip = bp->b_fspriv; 727 - prev = NULL; 728 - while (blip != NULL) { 729 - if (blip->li_cb != xfs_iflush_done) { 730 - prev = blip; 731 - blip = blip->li_bio_list; 725 + 726 + list_add_tail(&lip->li_bio_list, &tmp); 727 + 728 + list_for_each_entry_safe(blip, n, &bp->b_li_list, li_bio_list) { 729 + if (lip->li_cb != xfs_iflush_done) 732 730 continue; 733 - } 734 731 735 - /* remove from list */ 736 - next = blip->li_bio_list; 737 - if (!prev) { 738 - bp->b_fspriv = next; 739 - } else { 740 - prev->li_bio_list = next; 741 - } 742 - 743 - /* add to current list */ 744 - blip->li_bio_list = lip->li_bio_list; 745 - lip->li_bio_list = blip; 746 - 732 + list_move_tail(&blip->li_bio_list, &tmp); 747 733 /* 748 734 * while we have the item, do the unlocked check for needing 749 735 * the AIL lock. ··· 738 752 if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) || 739 753 (blip->li_flags & XFS_LI_FAILED)) 740 754 need_ail++; 741 - 742 - blip = next; 743 755 } 744 756 745 757 /* make sure we capture the state of the initial inode. */ ··· 760 776 761 777 /* this is an opencoded batch version of xfs_trans_ail_delete */ 762 778 spin_lock(&ailp->xa_lock); 763 - for (blip = lip; blip; blip = blip->li_bio_list) { 779 + list_for_each_entry(blip, &tmp, li_bio_list) { 764 780 if (INODE_ITEM(blip)->ili_logged && 765 781 blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn) 766 782 mlip_changed |= xfs_ail_delete_one(ailp, blip); ··· 786 802 * ili_last_fields bits now that we know that the data corresponding to 787 803 * them is safely on disk. 788 804 */ 789 - for (blip = lip; blip; blip = next) { 790 - next = blip->li_bio_list; 791 - blip->li_bio_list = NULL; 792 - 805 + list_for_each_entry_safe(blip, n, &tmp, li_bio_list) { 806 + list_del_init(&blip->li_bio_list); 793 807 iip = INODE_ITEM(blip); 794 808 iip->ili_logged = 0; 795 809 iip->ili_last_fields = 0; 796 810 xfs_ifunlock(iip->ili_inode); 797 811 } 812 + list_del(&tmp); 798 813 } 799 814 800 815 /*
+3 -2
fs/xfs/xfs_ioctl.c
··· 45 45 #include <linux/fsmap.h> 46 46 #include "xfs_fsmap.h" 47 47 #include "scrub/xfs_scrub.h" 48 + #include "xfs_sb.h" 48 49 49 50 #include <linux/capability.h> 50 51 #include <linux/cred.h> ··· 810 809 xfs_fsop_geom_t fsgeo; 811 810 int error; 812 811 813 - error = xfs_fs_geometry(mp, &fsgeo, 3); 812 + error = xfs_fs_geometry(&mp->m_sb, &fsgeo, 3); 814 813 if (error) 815 814 return error; 816 815 ··· 832 831 xfs_fsop_geom_t fsgeo; 833 832 int error; 834 833 835 - error = xfs_fs_geometry(mp, &fsgeo, 4); 834 + error = xfs_fs_geometry(&mp->m_sb, &fsgeo, 4); 836 835 if (error) 837 836 return error; 838 837
+2 -1
fs/xfs/xfs_ioctl32.c
··· 37 37 #include "xfs_ioctl.h" 38 38 #include "xfs_ioctl32.h" 39 39 #include "xfs_trace.h" 40 + #include "xfs_sb.h" 40 41 41 42 #define _NATIVE_IOC(cmd, type) \ 42 43 _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type)) ··· 67 66 xfs_fsop_geom_t fsgeo; 68 67 int error; 69 68 70 - error = xfs_fs_geometry(mp, &fsgeo, 3); 69 + error = xfs_fs_geometry(&mp->m_sb, &fsgeo, 3); 71 70 if (error) 72 71 return error; 73 72 /* The 32-bit variant simply has some padding at the end */
+14
fs/xfs/xfs_linux.h
··· 285 285 #define XFS_IS_REALTIME_INODE(ip) \ 286 286 (((ip)->i_d.di_flags & XFS_DIFLAG_REALTIME) && \ 287 287 (ip)->i_mount->m_rtdev_targp) 288 + #define XFS_IS_REALTIME_MOUNT(mp) ((mp)->m_rtdev_targp ? 1 : 0) 288 289 #else 289 290 #define XFS_IS_REALTIME_INODE(ip) (0) 291 + #define XFS_IS_REALTIME_MOUNT(mp) (0) 292 + #endif 293 + 294 + /* 295 + * Starting in Linux 4.15, the %p (raw pointer value) printk modifier 296 + * prints a hashed version of the pointer to avoid leaking kernel 297 + * pointers into dmesg. If we're trying to debug the kernel we want the 298 + * raw values, so override this behavior as best we can. 299 + */ 300 + #ifdef DEBUG 301 + # define PTR_FMT "%px" 302 + #else 303 + # define PTR_FMT "%p" 290 304 #endif 291 305 292 306 #endif /* __XFS_LINUX__ */
+10 -7
fs/xfs/xfs_log.c
··· 1047 1047 1048 1048 INIT_LIST_HEAD(&item->li_ail); 1049 1049 INIT_LIST_HEAD(&item->li_cil); 1050 + INIT_LIST_HEAD(&item->li_bio_list); 1050 1051 } 1051 1052 1052 1053 /* ··· 1243 1242 static void 1244 1243 xlog_iodone(xfs_buf_t *bp) 1245 1244 { 1246 - struct xlog_in_core *iclog = bp->b_fspriv; 1245 + struct xlog_in_core *iclog = bp->b_log_item; 1247 1246 struct xlog *l = iclog->ic_log; 1248 1247 int aborted = 0; 1249 1248 ··· 1774 1773 xlog_bdstrat( 1775 1774 struct xfs_buf *bp) 1776 1775 { 1777 - struct xlog_in_core *iclog = bp->b_fspriv; 1776 + struct xlog_in_core *iclog = bp->b_log_item; 1778 1777 1779 1778 xfs_buf_lock(bp); 1780 1779 if (iclog->ic_state & XLOG_STATE_IOERROR) { ··· 1920 1919 } 1921 1920 1922 1921 bp->b_io_length = BTOBB(count); 1923 - bp->b_fspriv = iclog; 1922 + bp->b_log_item = iclog; 1924 1923 bp->b_flags &= ~XBF_FLUSH; 1925 1924 bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA); 1926 1925 ··· 1959 1958 XFS_BUF_SET_ADDR(bp, 0); /* logical 0 */ 1960 1959 xfs_buf_associate_memory(bp, 1961 1960 (char *)&iclog->ic_header + count, split); 1962 - bp->b_fspriv = iclog; 1961 + bp->b_log_item = iclog; 1963 1962 bp->b_flags &= ~XBF_FLUSH; 1964 1963 bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA); 1965 1964 ··· 2118 2117 2119 2118 /* dump core transaction and ticket info */ 2120 2119 xfs_warn(mp, "transaction summary:"); 2121 - xfs_warn(mp, " flags = 0x%x", tp->t_flags); 2120 + xfs_warn(mp, " log res = %d", tp->t_log_res); 2121 + xfs_warn(mp, " log count = %d", tp->t_log_count); 2122 + xfs_warn(mp, " flags = 0x%x", tp->t_flags); 2122 2123 2123 2124 xlog_print_tic_res(mp, tp->t_ticket); 2124 2125 ··· 2245 2242 break; 2246 2243 default: 2247 2244 xfs_warn(log->l_mp, 2248 - "Bad XFS transaction clientid 0x%x in ticket 0x%p", 2245 + "Bad XFS transaction clientid 0x%x in ticket "PTR_FMT, 2249 2246 ophdr->oh_clientid, ticket); 2250 2247 return NULL; 2251 2248 } ··· 3927 3924 } 3928 3925 if (clientid != XFS_TRANSACTION && clientid != XFS_LOG) 3929 3926 xfs_warn(log->l_mp, 3930 - "%s: invalid clientid %d op 0x%p offset 0x%lx", 3927 + "%s: invalid clientid %d op "PTR_FMT" offset 0x%lx", 3931 3928 __func__, clientid, ophead, 3932 3929 (unsigned long)field_offset); 3933 3930
+34 -24
fs/xfs/xfs_log_recover.c
··· 400 400 * On v5 supers, a bli could be attached to update the metadata LSN. 401 401 * Clean it up. 402 402 */ 403 - if (bp->b_fspriv) 403 + if (bp->b_log_item) 404 404 xfs_buf_item_relse(bp); 405 - ASSERT(bp->b_fspriv == NULL); 405 + ASSERT(bp->b_log_item == NULL); 406 406 407 407 bp->b_iodone = NULL; 408 408 xfs_buf_ioend(bp); ··· 2218 2218 next_unlinked_offset - reg_buf_offset; 2219 2219 if (unlikely(*logged_nextp == 0)) { 2220 2220 xfs_alert(mp, 2221 - "Bad inode buffer log record (ptr = 0x%p, bp = 0x%p). " 2221 + "Bad inode buffer log record (ptr = "PTR_FMT", bp = "PTR_FMT"). " 2222 2222 "Trying to replay bad (0) inode di_next_unlinked field.", 2223 2223 item, bp); 2224 2224 XFS_ERROR_REPORT("xlog_recover_do_inode_buf", ··· 2630 2630 ASSERT(!bp->b_iodone || bp->b_iodone == xlog_recover_iodone); 2631 2631 bp->b_iodone = xlog_recover_iodone; 2632 2632 xfs_buf_item_init(bp, mp); 2633 - bip = bp->b_fspriv; 2633 + bip = bp->b_log_item; 2634 2634 bip->bli_item.li_lsn = current_lsn; 2635 2635 } 2636 2636 } ··· 2652 2652 int i; 2653 2653 int bit; 2654 2654 int nbits; 2655 - int error; 2655 + xfs_failaddr_t fa; 2656 2656 2657 2657 trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f); 2658 2658 ··· 2687 2687 * the first dquot in the buffer should do. XXXThis is 2688 2688 * probably a good thing to do for other buf types also. 2689 2689 */ 2690 - error = 0; 2690 + fa = NULL; 2691 2691 if (buf_f->blf_flags & 2692 2692 (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { 2693 2693 if (item->ri_buf[i].i_addr == NULL) { ··· 2701 2701 item->ri_buf[i].i_len, __func__); 2702 2702 goto next; 2703 2703 } 2704 - error = xfs_dqcheck(mp, item->ri_buf[i].i_addr, 2705 - -1, 0, XFS_QMOPT_DOWARN, 2706 - "dquot_buf_recover"); 2707 - if (error) 2704 + fa = xfs_dquot_verify(mp, item->ri_buf[i].i_addr, 2705 + -1, 0, 0); 2706 + if (fa) { 2707 + xfs_alert(mp, 2708 + "dquot corrupt at %pS trying to replay into block 0x%llx", 2709 + fa, bp->b_bn); 2708 2710 goto next; 2711 + } 2709 2712 } 2710 2713 2711 2714 memcpy(xfs_buf_offset(bp, ··· 2960 2957 if (error) 2961 2958 goto out_free_ip; 2962 2959 2960 + if (!xfs_inode_verify_forks(ip)) { 2961 + error = -EFSCORRUPTED; 2962 + goto out_free_ip; 2963 + } 2963 2964 2964 2965 if (in_f->ilf_fields & XFS_ILOG_DOWNER) { 2965 2966 ASSERT(in_f->ilf_fields & XFS_ILOG_DBROOT); ··· 3049 3042 */ 3050 3043 if (unlikely(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))) { 3051 3044 xfs_alert(mp, 3052 - "%s: Bad inode magic number, dip = 0x%p, dino bp = 0x%p, ino = %Ld", 3045 + "%s: Bad inode magic number, dip = "PTR_FMT", dino bp = "PTR_FMT", ino = %Ld", 3053 3046 __func__, dip, bp, in_f->ilf_ino); 3054 3047 XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)", 3055 3048 XFS_ERRLEVEL_LOW, mp); ··· 3059 3052 ldip = item->ri_buf[1].i_addr; 3060 3053 if (unlikely(ldip->di_magic != XFS_DINODE_MAGIC)) { 3061 3054 xfs_alert(mp, 3062 - "%s: Bad inode log record, rec ptr 0x%p, ino %Ld", 3055 + "%s: Bad inode log record, rec ptr "PTR_FMT", ino %Ld", 3063 3056 __func__, item, in_f->ilf_ino); 3064 3057 XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)", 3065 3058 XFS_ERRLEVEL_LOW, mp); ··· 3117 3110 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)", 3118 3111 XFS_ERRLEVEL_LOW, mp, ldip); 3119 3112 xfs_alert(mp, 3120 - "%s: Bad regular inode log record, rec ptr 0x%p, " 3121 - "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", 3113 + "%s: Bad regular inode log record, rec ptr "PTR_FMT", " 3114 + "ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld", 3122 3115 __func__, item, dip, bp, in_f->ilf_ino); 3123 3116 error = -EFSCORRUPTED; 3124 3117 goto out_release; ··· 3130 3123 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)", 3131 3124 XFS_ERRLEVEL_LOW, mp, ldip); 3132 3125 xfs_alert(mp, 3133 - "%s: Bad dir inode log record, rec ptr 0x%p, " 3134 - "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", 3126 + "%s: Bad dir inode log record, rec ptr "PTR_FMT", " 3127 + "ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld", 3135 3128 __func__, item, dip, bp, in_f->ilf_ino); 3136 3129 error = -EFSCORRUPTED; 3137 3130 goto out_release; ··· 3141 3134 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)", 3142 3135 XFS_ERRLEVEL_LOW, mp, ldip); 3143 3136 xfs_alert(mp, 3144 - "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, " 3145 - "dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld", 3137 + "%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", " 3138 + "dino bp "PTR_FMT", ino %Ld, total extents = %d, nblocks = %Ld", 3146 3139 __func__, item, dip, bp, in_f->ilf_ino, 3147 3140 ldip->di_nextents + ldip->di_anextents, 3148 3141 ldip->di_nblocks); ··· 3153 3146 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)", 3154 3147 XFS_ERRLEVEL_LOW, mp, ldip); 3155 3148 xfs_alert(mp, 3156 - "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, " 3157 - "dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__, 3149 + "%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", " 3150 + "dino bp "PTR_FMT", ino %Ld, forkoff 0x%x", __func__, 3158 3151 item, dip, bp, in_f->ilf_ino, ldip->di_forkoff); 3159 3152 error = -EFSCORRUPTED; 3160 3153 goto out_release; ··· 3164 3157 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)", 3165 3158 XFS_ERRLEVEL_LOW, mp, ldip); 3166 3159 xfs_alert(mp, 3167 - "%s: Bad inode log record length %d, rec ptr 0x%p", 3160 + "%s: Bad inode log record length %d, rec ptr "PTR_FMT, 3168 3161 __func__, item->ri_buf[1].i_len, item); 3169 3162 error = -EFSCORRUPTED; 3170 3163 goto out_release; ··· 3310 3303 xfs_mount_t *mp = log->l_mp; 3311 3304 xfs_buf_t *bp; 3312 3305 struct xfs_disk_dquot *ddq, *recddq; 3306 + xfs_failaddr_t fa; 3313 3307 int error; 3314 3308 xfs_dq_logformat_t *dq_f; 3315 3309 uint type; ··· 3353 3345 */ 3354 3346 dq_f = item->ri_buf[0].i_addr; 3355 3347 ASSERT(dq_f); 3356 - error = xfs_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, 3357 - "xlog_recover_dquot_pass2 (log copy)"); 3358 - if (error) 3348 + fa = xfs_dquot_verify(mp, recddq, dq_f->qlf_id, 0, 0); 3349 + if (fa) { 3350 + xfs_alert(mp, "corrupt dquot ID 0x%x in log at %pS", 3351 + dq_f->qlf_id, fa); 3359 3352 return -EIO; 3353 + } 3360 3354 ASSERT(dq_f->qlf_len == 1); 3361 3355 3362 3356 /*
+3
fs/xfs/xfs_mount.c
··· 162 162 ASSERT(pag); 163 163 ASSERT(atomic_read(&pag->pag_ref) == 0); 164 164 xfs_buf_hash_destroy(pag); 165 + mutex_destroy(&pag->pag_ici_reclaim_lock); 165 166 call_rcu(&pag->rcu_head, __xfs_free_perag); 166 167 } 167 168 } ··· 249 248 out_hash_destroy: 250 249 xfs_buf_hash_destroy(pag); 251 250 out_free_pag: 251 + mutex_destroy(&pag->pag_ici_reclaim_lock); 252 252 kmem_free(pag); 253 253 out_unwind_new_pags: 254 254 /* unwind any prior newly initialized pags */ ··· 258 256 if (!pag) 259 257 break; 260 258 xfs_buf_hash_destroy(pag); 259 + mutex_destroy(&pag->pag_ici_reclaim_lock); 261 260 kmem_free(pag); 262 261 } 263 262 return error;
+16 -17
fs/xfs/xfs_qm.c
··· 162 162 */ 163 163 error = xfs_qm_dqflush(dqp, &bp); 164 164 if (error) { 165 - xfs_warn(mp, "%s: dquot %p flush failed", 165 + xfs_warn(mp, "%s: dquot "PTR_FMT" flush failed", 166 166 __func__, dqp); 167 167 } else { 168 168 error = xfs_bwrite(bp); ··· 291 291 * exist on disk and we didn't ask it to allocate; ESRCH if quotas got 292 292 * turned off suddenly. 293 293 */ 294 - error = xfs_qm_dqget(ip->i_mount, ip, id, type, 295 - doalloc | XFS_QMOPT_DOWARN, &dqp); 294 + error = xfs_qm_dqget(ip->i_mount, ip, id, type, doalloc, &dqp); 296 295 if (error) 297 296 return error; 298 297 ··· 480 481 481 482 error = xfs_qm_dqflush(dqp, &bp); 482 483 if (error) { 483 - xfs_warn(dqp->q_mount, "%s: dquot %p flush failed", 484 + xfs_warn(dqp->q_mount, "%s: dquot "PTR_FMT" flush failed", 484 485 __func__, dqp); 485 486 goto out_unlock_dirty; 486 487 } ··· 573 574 struct xfs_def_quota *defq; 574 575 int error; 575 576 576 - error = xfs_qm_dqread(mp, 0, type, XFS_QMOPT_DOWARN, &dqp); 577 + error = xfs_qm_dqread(mp, 0, type, 0, &dqp); 577 578 578 579 if (!error) { 579 580 xfs_disk_dquot_t *ddqp = &dqp->q_core; ··· 651 652 XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : 652 653 (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP : 653 654 XFS_DQ_PROJ), 654 - XFS_QMOPT_DOWARN, &dqp); 655 + 0, &dqp); 655 656 656 657 if (!error) { 657 658 xfs_disk_dquot_t *ddqp = &dqp->q_core; ··· 842 843 { 843 844 struct xfs_dqblk *dqb; 844 845 int j; 846 + xfs_failaddr_t fa; 845 847 846 848 trace_xfs_reset_dqcounts(bp, _RET_IP_); 847 849 ··· 864 864 /* 865 865 * Do a sanity check, and if needed, repair the dqblk. Don't 866 866 * output any warnings because it's perfectly possible to 867 - * find uninitialised dquot blks. See comment in xfs_dqcheck. 867 + * find uninitialised dquot blks. See comment in 868 + * xfs_dquot_verify. 868 869 */ 869 - xfs_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR, 870 - "xfs_quotacheck"); 870 + fa = xfs_dquot_verify(mp, ddq, id + j, type, 0); 871 + if (fa) 872 + xfs_dquot_repair(mp, ddq, id + j, type); 873 + 871 874 /* 872 875 * Reset type in case we are reusing group quota file for 873 876 * project quotas or vice versa ··· 1077 1074 struct xfs_dquot *dqp; 1078 1075 int error; 1079 1076 1080 - error = xfs_qm_dqget(mp, ip, id, type, 1081 - XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, &dqp); 1077 + error = xfs_qm_dqget(mp, ip, id, type, XFS_QMOPT_DQALLOC, &dqp); 1082 1078 if (error) { 1083 1079 /* 1084 1080 * Shouldn't be able to turn off quotas here. ··· 1698 1696 xfs_iunlock(ip, lockflags); 1699 1697 error = xfs_qm_dqget(mp, NULL, uid, 1700 1698 XFS_DQ_USER, 1701 - XFS_QMOPT_DQALLOC | 1702 - XFS_QMOPT_DOWARN, 1699 + XFS_QMOPT_DQALLOC, 1703 1700 &uq); 1704 1701 if (error) { 1705 1702 ASSERT(error != -ENOENT); ··· 1724 1723 xfs_iunlock(ip, lockflags); 1725 1724 error = xfs_qm_dqget(mp, NULL, gid, 1726 1725 XFS_DQ_GROUP, 1727 - XFS_QMOPT_DQALLOC | 1728 - XFS_QMOPT_DOWARN, 1726 + XFS_QMOPT_DQALLOC, 1729 1727 &gq); 1730 1728 if (error) { 1731 1729 ASSERT(error != -ENOENT); ··· 1743 1743 xfs_iunlock(ip, lockflags); 1744 1744 error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid, 1745 1745 XFS_DQ_PROJ, 1746 - XFS_QMOPT_DQALLOC | 1747 - XFS_QMOPT_DOWARN, 1746 + XFS_QMOPT_DQALLOC, 1748 1747 &pq); 1749 1748 if (error) { 1750 1749 ASSERT(error != -ENOENT);
+83 -12
fs/xfs/xfs_reflink.c
··· 464 464 error = xfs_trans_commit(tp); 465 465 if (error) 466 466 return error; 467 + 468 + /* 469 + * Allocation succeeded but the requested range was not even partially 470 + * satisfied? Bail out! 471 + */ 472 + if (nimaps == 0) 473 + return -ENOSPC; 467 474 convert: 468 475 return xfs_reflink_convert_cow_extent(ip, imap, offset_fsb, count_fsb, 469 476 &dfops); ··· 606 599 del.br_startblock, del.br_blockcount, 607 600 NULL); 608 601 609 - /* Update quota accounting */ 610 - xfs_trans_mod_dquot_byino(*tpp, ip, XFS_TRANS_DQ_BCOUNT, 611 - -(long)del.br_blockcount); 612 - 613 602 /* Roll the transaction */ 614 603 xfs_defer_ijoin(&dfops, ip); 615 604 error = xfs_defer_finish(tpp, &dfops); ··· 616 613 617 614 /* Remove the mapping from the CoW fork. */ 618 615 xfs_bmap_del_extent_cow(ip, &icur, &got, &del); 616 + 617 + /* Remove the quota reservation */ 618 + error = xfs_trans_reserve_quota_nblks(NULL, ip, 619 + -(long)del.br_blockcount, 0, 620 + XFS_QMOPT_RES_REGBLKS); 621 + if (error) 622 + break; 619 623 } else { 620 624 /* Didn't do anything, push cursor back. */ 621 625 xfs_iext_prev(ifp, &icur); ··· 805 795 if (error) 806 796 goto out_defer; 807 797 798 + /* Charge this new data fork mapping to the on-disk quota. */ 799 + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_DELBCOUNT, 800 + (long)del.br_blockcount); 801 + 808 802 /* Remove the mapping from the CoW fork. */ 809 803 xfs_bmap_del_extent_cow(ip, &icur, &got, &del); 810 804 ··· 958 944 if (src->i_ino == dest->i_ino) 959 945 xfs_ilock(src, XFS_ILOCK_EXCL); 960 946 else 961 - xfs_lock_two_inodes(src, dest, XFS_ILOCK_EXCL); 947 + xfs_lock_two_inodes(src, XFS_ILOCK_EXCL, dest, XFS_ILOCK_EXCL); 962 948 963 949 if (!xfs_is_reflink_inode(src)) { 964 950 trace_xfs_reflink_set_inode_flag(src); ··· 1216 1202 1217 1203 /* drange = (destoff, destoff + len); srange = (srcoff, srcoff + len) */ 1218 1204 while (len) { 1205 + uint lock_mode; 1206 + 1219 1207 trace_xfs_reflink_remap_blocks_loop(src, srcoff, len, 1220 1208 dest, destoff); 1209 + 1221 1210 /* Read extent from the source file */ 1222 1211 nimaps = 1; 1223 - xfs_ilock(src, XFS_ILOCK_EXCL); 1212 + lock_mode = xfs_ilock_data_map_shared(src); 1224 1213 error = xfs_bmapi_read(src, srcoff, len, &imap, &nimaps, 0); 1225 - xfs_iunlock(src, XFS_ILOCK_EXCL); 1214 + xfs_iunlock(src, lock_mode); 1226 1215 if (error) 1227 1216 goto err; 1228 1217 ASSERT(nimaps == 1); ··· 1262 1245 } 1263 1246 1264 1247 /* 1248 + * Grab the exclusive iolock for a data copy from src to dest, making 1249 + * sure to abide vfs locking order (lowest pointer value goes first) and 1250 + * breaking the pnfs layout leases on dest before proceeding. The loop 1251 + * is needed because we cannot call the blocking break_layout() with the 1252 + * src iolock held, and therefore have to back out both locks. 1253 + */ 1254 + static int 1255 + xfs_iolock_two_inodes_and_break_layout( 1256 + struct inode *src, 1257 + struct inode *dest) 1258 + { 1259 + int error; 1260 + 1261 + retry: 1262 + if (src < dest) { 1263 + inode_lock_shared(src); 1264 + inode_lock_nested(dest, I_MUTEX_NONDIR2); 1265 + } else { 1266 + /* src >= dest */ 1267 + inode_lock(dest); 1268 + } 1269 + 1270 + error = break_layout(dest, false); 1271 + if (error == -EWOULDBLOCK) { 1272 + inode_unlock(dest); 1273 + if (src < dest) 1274 + inode_unlock_shared(src); 1275 + error = break_layout(dest, true); 1276 + if (error) 1277 + return error; 1278 + goto retry; 1279 + } 1280 + if (error) { 1281 + inode_unlock(dest); 1282 + if (src < dest) 1283 + inode_unlock_shared(src); 1284 + return error; 1285 + } 1286 + if (src > dest) 1287 + inode_lock_shared_nested(src, I_MUTEX_NONDIR2); 1288 + return 0; 1289 + } 1290 + 1291 + /* 1265 1292 * Link a range of blocks from one file to another. 1266 1293 */ 1267 1294 int ··· 1335 1274 return -EIO; 1336 1275 1337 1276 /* Lock both files against IO */ 1338 - lock_two_nondirectories(inode_in, inode_out); 1277 + ret = xfs_iolock_two_inodes_and_break_layout(inode_in, inode_out); 1278 + if (ret) 1279 + return ret; 1339 1280 if (same_inode) 1340 1281 xfs_ilock(src, XFS_MMAPLOCK_EXCL); 1341 1282 else 1342 - xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL); 1283 + xfs_lock_two_inodes(src, XFS_MMAPLOCK_SHARED, dest, 1284 + XFS_MMAPLOCK_EXCL); 1343 1285 1344 1286 /* Check file eligibility and prepare for block sharing. */ 1345 1287 ret = -EINVAL; ··· 1357 1293 ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out, 1358 1294 &len, is_dedupe); 1359 1295 if (ret <= 0) 1296 + goto out_unlock; 1297 + 1298 + /* Attach dquots to dest inode before changing block map */ 1299 + ret = xfs_qm_dqattach(dest, 0); 1300 + if (ret) 1360 1301 goto out_unlock; 1361 1302 1362 1303 trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); ··· 1410 1341 is_dedupe); 1411 1342 1412 1343 out_unlock: 1413 - xfs_iunlock(src, XFS_MMAPLOCK_EXCL); 1344 + xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); 1414 1345 if (!same_inode) 1415 - xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); 1416 - unlock_two_nondirectories(inode_in, inode_out); 1346 + xfs_iunlock(src, XFS_MMAPLOCK_SHARED); 1347 + inode_unlock(inode_out); 1348 + if (!same_inode) 1349 + inode_unlock_shared(inode_in); 1417 1350 if (ret) 1418 1351 trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); 1419 1352 return ret;
+4
fs/xfs/xfs_rtalloc.h
··· 139 139 xfs_rtalloc_query_range_fn fn, 140 140 void *priv); 141 141 bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno); 142 + int xfs_rtalloc_extent_is_free(struct xfs_mount *mp, struct xfs_trans *tp, 143 + xfs_rtblock_t start, xfs_extlen_t len, 144 + bool *is_free); 142 145 #else 143 146 # define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb) (ENOSYS) 144 147 # define xfs_rtfree_extent(t,b,l) (ENOSYS) ··· 151 148 # define xfs_rtalloc_query_all(t,f,p) (ENOSYS) 152 149 # define xfs_rtbuf_get(m,t,b,i,p) (ENOSYS) 153 150 # define xfs_verify_rtbno(m, r) (false) 151 + # define xfs_rtalloc_extent_is_free(m,t,s,l,i) (ENOSYS) 154 152 static inline int /* error */ 155 153 xfs_rtmount_init( 156 154 xfs_mount_t *mp) /* file system mount structure */
+9 -5
fs/xfs/xfs_super.c
··· 1153 1153 ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) == 1154 1154 (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD)) 1155 1155 xfs_qm_statvfs(ip, statp); 1156 + 1157 + if (XFS_IS_REALTIME_MOUNT(mp) && 1158 + (ip->i_d.di_flags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME))) { 1159 + statp->f_blocks = sbp->sb_rblocks; 1160 + statp->f_bavail = statp->f_bfree = 1161 + sbp->sb_frextents * sbp->sb_rextsize; 1162 + } 1163 + 1156 1164 return 0; 1157 1165 } 1158 1166 ··· 1668 1660 } 1669 1661 if (xfs_sb_version_hasreflink(&mp->m_sb)) 1670 1662 xfs_alert(mp, 1671 - "DAX and reflink have not been tested together!"); 1663 + "DAX and reflink cannot be used together!"); 1672 1664 } 1673 1665 1674 1666 if (mp->m_flags & XFS_MOUNT_DISCARD) { ··· 1691 1683 xfs_alert(mp, 1692 1684 "EXPERIMENTAL reverse mapping btree feature enabled. Use at your own risk!"); 1693 1685 } 1694 - 1695 - if (xfs_sb_version_hasreflink(&mp->m_sb)) 1696 - xfs_alert(mp, 1697 - "EXPERIMENTAL reflink feature enabled. Use at your own risk!"); 1698 1686 1699 1687 error = xfs_mountfs(mp); 1700 1688 if (error)
+47 -21
fs/xfs/xfs_trace.h
··· 72 72 __entry->flags = ctx->flags; 73 73 ), 74 74 TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u " 75 - "alist 0x%p size %u count %u firstu %u flags %d %s", 75 + "alist %p size %u count %u firstu %u flags %d %s", 76 76 MAJOR(__entry->dev), MINOR(__entry->dev), 77 77 __entry->ino, 78 78 __entry->hashval, ··· 119 119 __entry->refcount = refcount; 120 120 __entry->caller_ip = caller_ip; 121 121 ), 122 - TP_printk("dev %d:%d agno %u refcount %d caller %ps", 122 + TP_printk("dev %d:%d agno %u refcount %d caller %pS", 123 123 MAJOR(__entry->dev), MINOR(__entry->dev), 124 124 __entry->agno, 125 125 __entry->refcount, ··· 200 200 __entry->bt_before = be32_to_cpu(btree->before); 201 201 ), 202 202 TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u " 203 - "alist 0x%p size %u count %u firstu %u flags %d %s " 203 + "alist %p size %u count %u firstu %u flags %d %s " 204 204 "node hashval %u, node before %u", 205 205 MAJOR(__entry->dev), MINOR(__entry->dev), 206 206 __entry->ino, ··· 251 251 __entry->bmap_state = state; 252 252 __entry->caller_ip = caller_ip; 253 253 ), 254 - TP_printk("dev %d:%d ino 0x%llx state %s cur 0x%p/%d " 255 - "offset %lld block %lld count %lld flag %d caller %ps", 254 + TP_printk("dev %d:%d ino 0x%llx state %s cur %p/%d " 255 + "offset %lld block %lld count %lld flag %d caller %pS", 256 256 MAJOR(__entry->dev), MINOR(__entry->dev), 257 257 __entry->ino, 258 258 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), ··· 301 301 __entry->caller_ip = caller_ip; 302 302 ), 303 303 TP_printk("dev %d:%d bno 0x%llx nblks 0x%x hold %d pincount %d " 304 - "lock %d flags %s caller %ps", 304 + "lock %d flags %s caller %pS", 305 305 MAJOR(__entry->dev), MINOR(__entry->dev), 306 306 (unsigned long long)__entry->bno, 307 307 __entry->nblks, ··· 370 370 __entry->caller_ip = caller_ip; 371 371 ), 372 372 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " 373 - "lock %d flags %s caller %ps", 373 + "lock %d flags %s caller %pS", 374 374 MAJOR(__entry->dev), MINOR(__entry->dev), 375 375 (unsigned long long)__entry->bno, 376 376 __entry->buffer_length, ··· 390 390 DEFINE_BUF_FLAGS_EVENT(xfs_buf_read); 391 391 392 392 TRACE_EVENT(xfs_buf_ioerror, 393 - TP_PROTO(struct xfs_buf *bp, int error, unsigned long caller_ip), 393 + TP_PROTO(struct xfs_buf *bp, int error, xfs_failaddr_t caller_ip), 394 394 TP_ARGS(bp, error, caller_ip), 395 395 TP_STRUCT__entry( 396 396 __field(dev_t, dev) ··· 401 401 __field(int, pincount) 402 402 __field(unsigned, lockval) 403 403 __field(int, error) 404 - __field(unsigned long, caller_ip) 404 + __field(xfs_failaddr_t, caller_ip) 405 405 ), 406 406 TP_fast_assign( 407 407 __entry->dev = bp->b_target->bt_dev; ··· 415 415 __entry->caller_ip = caller_ip; 416 416 ), 417 417 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " 418 - "lock %d error %d flags %s caller %ps", 418 + "lock %d error %d flags %s caller %pS", 419 419 MAJOR(__entry->dev), MINOR(__entry->dev), 420 420 (unsigned long long)__entry->bno, 421 421 __entry->buffer_length, ··· 460 460 ), 461 461 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " 462 462 "lock %d flags %s recur %d refcount %d bliflags %s " 463 - "lidesc 0x%p liflags %s", 463 + "lidesc %p liflags %s", 464 464 MAJOR(__entry->dev), MINOR(__entry->dev), 465 465 (unsigned long long)__entry->buf_bno, 466 466 __entry->buf_len, ··· 579 579 __entry->lock_flags = lock_flags; 580 580 __entry->caller_ip = caller_ip; 581 581 ), 582 - TP_printk("dev %d:%d ino 0x%llx flags %s caller %ps", 582 + TP_printk("dev %d:%d ino 0x%llx flags %s caller %pS", 583 583 MAJOR(__entry->dev), MINOR(__entry->dev), 584 584 __entry->ino, 585 585 __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS), ··· 697 697 __entry->pincount = atomic_read(&ip->i_pincount); 698 698 __entry->caller_ip = caller_ip; 699 699 ), 700 - TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %ps", 700 + TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pS", 701 701 MAJOR(__entry->dev), MINOR(__entry->dev), 702 702 __entry->ino, 703 703 __entry->count, ··· 1028 1028 __entry->flags = lip->li_flags; 1029 1029 __entry->lsn = lip->li_lsn; 1030 1030 ), 1031 - TP_printk("dev %d:%d lip 0x%p lsn %d/%d type %s flags %s", 1031 + TP_printk("dev %d:%d lip %p lsn %d/%d type %s flags %s", 1032 1032 MAJOR(__entry->dev), MINOR(__entry->dev), 1033 1033 __entry->lip, 1034 1034 CYCLE_LSN(__entry->lsn), BLOCK_LSN(__entry->lsn), ··· 1049 1049 __entry->lsn = lsn; 1050 1050 __entry->caller_ip = caller_ip; 1051 1051 ), 1052 - TP_printk("dev %d:%d lsn 0x%llx caller %ps", 1052 + TP_printk("dev %d:%d lsn 0x%llx caller %pS", 1053 1053 MAJOR(__entry->dev), MINOR(__entry->dev), 1054 1054 __entry->lsn, (void *)__entry->caller_ip) 1055 1055 ) ··· 1082 1082 __entry->old_lsn = old_lsn; 1083 1083 __entry->new_lsn = new_lsn; 1084 1084 ), 1085 - TP_printk("dev %d:%d lip 0x%p old lsn %d/%d new lsn %d/%d type %s flags %s", 1085 + TP_printk("dev %d:%d lip %p old lsn %d/%d new lsn %d/%d type %s flags %s", 1086 1086 MAJOR(__entry->dev), MINOR(__entry->dev), 1087 1087 __entry->lip, 1088 1088 CYCLE_LSN(__entry->old_lsn), BLOCK_LSN(__entry->old_lsn), ··· 1403 1403 __entry->flags = flags; 1404 1404 ), 1405 1405 TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx" 1406 - "flags %s caller %ps", 1406 + "flags %s caller %pS", 1407 1407 MAJOR(__entry->dev), MINOR(__entry->dev), 1408 1408 __entry->ino, 1409 1409 __entry->size, ··· 1517 1517 ), 1518 1518 TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u " 1519 1519 "levels b %u c %u flfirst %u fllast %u flcount %u " 1520 - "freeblks %u longest %u caller %ps", 1520 + "freeblks %u longest %u caller %pS", 1521 1521 MAJOR(__entry->dev), MINOR(__entry->dev), 1522 1522 __entry->agno, 1523 1523 __print_flags(__entry->flags, "|", XFS_AGF_FLAGS), ··· 2014 2014 __entry->count = item->ri_cnt; 2015 2015 __entry->total = item->ri_total; 2016 2016 ), 2017 - TP_printk("dev %d:%d tid 0x%x lsn 0x%llx, pass %d, item 0x%p, " 2017 + TP_printk("dev %d:%d tid 0x%x lsn 0x%llx, pass %d, item %p, " 2018 2018 "item type %s item region count/total %d/%d", 2019 2019 MAJOR(__entry->dev), MINOR(__entry->dev), 2020 2020 __entry->tid, ··· 2486 2486 __entry->error = error; 2487 2487 __entry->caller_ip = caller_ip; 2488 2488 ), 2489 - TP_printk("dev %d:%d agno %u error %d caller %ps", 2489 + TP_printk("dev %d:%d agno %u error %d caller %pS", 2490 2490 MAJOR(__entry->dev), MINOR(__entry->dev), 2491 2491 __entry->agno, 2492 2492 __entry->error, ··· 2977 2977 __entry->error = error; 2978 2978 __entry->caller_ip = caller_ip; 2979 2979 ), 2980 - TP_printk("dev %d:%d ino %llx error %d caller %ps", 2980 + TP_printk("dev %d:%d ino %llx error %d caller %pS", 2981 2981 MAJOR(__entry->dev), MINOR(__entry->dev), 2982 2982 __entry->ino, 2983 2983 __entry->error, ··· 3312 3312 DEFINE_GETFSMAP_EVENT(xfs_getfsmap_low_key); 3313 3313 DEFINE_GETFSMAP_EVENT(xfs_getfsmap_high_key); 3314 3314 DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping); 3315 + 3316 + TRACE_EVENT(xfs_trans_resv_calc, 3317 + TP_PROTO(struct xfs_mount *mp, unsigned int type, 3318 + struct xfs_trans_res *res), 3319 + TP_ARGS(mp, type, res), 3320 + TP_STRUCT__entry( 3321 + __field(dev_t, dev) 3322 + __field(int, type) 3323 + __field(uint, logres) 3324 + __field(int, logcount) 3325 + __field(int, logflags) 3326 + ), 3327 + TP_fast_assign( 3328 + __entry->dev = mp->m_super->s_dev; 3329 + __entry->type = type; 3330 + __entry->logres = res->tr_logres; 3331 + __entry->logcount = res->tr_logcount; 3332 + __entry->logflags = res->tr_logflags; 3333 + ), 3334 + TP_printk("dev %d:%d type %d logres %u logcount %d flags 0x%x", 3335 + MAJOR(__entry->dev), MINOR(__entry->dev), 3336 + __entry->type, 3337 + __entry->logres, 3338 + __entry->logcount, 3339 + __entry->logflags) 3340 + ); 3315 3341 3316 3342 #endif /* _TRACE_XFS_H */ 3317 3343
+22
fs/xfs/xfs_trans.c
··· 35 35 kmem_zone_t *xfs_trans_zone; 36 36 kmem_zone_t *xfs_log_item_desc_zone; 37 37 38 + #if defined(CONFIG_TRACEPOINTS) 39 + static void 40 + xfs_trans_trace_reservations( 41 + struct xfs_mount *mp) 42 + { 43 + struct xfs_trans_res resv; 44 + struct xfs_trans_res *res; 45 + struct xfs_trans_res *end_res; 46 + int i; 47 + 48 + res = (struct xfs_trans_res *)M_RES(mp); 49 + end_res = (struct xfs_trans_res *)(M_RES(mp) + 1); 50 + for (i = 0; res < end_res; i++, res++) 51 + trace_xfs_trans_resv_calc(mp, i, res); 52 + xfs_log_get_max_trans_res(mp, &resv); 53 + trace_xfs_trans_resv_calc(mp, -1, &resv); 54 + } 55 + #else 56 + # define xfs_trans_trace_reservations(mp) 57 + #endif 58 + 38 59 /* 39 60 * Initialize the precomputed transaction reservation values 40 61 * in the mount structure. ··· 65 44 struct xfs_mount *mp) 66 45 { 67 46 xfs_trans_resv_calc(mp, M_RES(mp)); 47 + xfs_trans_trace_reservations(mp); 68 48 } 69 49 70 50 /*
+1 -1
fs/xfs/xfs_trans.h
··· 50 50 uint li_type; /* item type */ 51 51 uint li_flags; /* misc flags */ 52 52 struct xfs_buf *li_buf; /* real buffer pointer */ 53 - struct xfs_log_item *li_bio_list; /* buffer item list */ 53 + struct list_head li_bio_list; /* buffer item list */ 54 54 void (*li_cb)(struct xfs_buf *, 55 55 struct xfs_log_item *); 56 56 /* buffer item iodone */
+51 -47
fs/xfs/xfs_trans_buf.c
··· 82 82 ASSERT(bp->b_transp == NULL); 83 83 84 84 /* 85 - * The xfs_buf_log_item pointer is stored in b_fsprivate. If 85 + * The xfs_buf_log_item pointer is stored in b_log_item. If 86 86 * it doesn't have one yet, then allocate one and initialize it. 87 87 * The checks to see if one is there are in xfs_buf_item_init(). 88 88 */ 89 89 xfs_buf_item_init(bp, tp->t_mountp); 90 - bip = bp->b_fspriv; 90 + bip = bp->b_log_item; 91 91 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 92 92 ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL)); 93 93 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); ··· 118 118 struct xfs_buf *bp) 119 119 { 120 120 _xfs_trans_bjoin(tp, bp, 0); 121 - trace_xfs_trans_bjoin(bp->b_fspriv); 121 + trace_xfs_trans_bjoin(bp->b_log_item); 122 122 } 123 123 124 124 /* ··· 139 139 xfs_buf_flags_t flags) 140 140 { 141 141 xfs_buf_t *bp; 142 - xfs_buf_log_item_t *bip; 142 + struct xfs_buf_log_item *bip; 143 143 144 144 if (!tp) 145 145 return xfs_buf_get_map(target, map, nmaps, flags); ··· 159 159 } 160 160 161 161 ASSERT(bp->b_transp == tp); 162 - bip = bp->b_fspriv; 162 + bip = bp->b_log_item; 163 163 ASSERT(bip != NULL); 164 164 ASSERT(atomic_read(&bip->bli_refcount) > 0); 165 165 bip->bli_recur++; ··· 175 175 ASSERT(!bp->b_error); 176 176 177 177 _xfs_trans_bjoin(tp, bp, 1); 178 - trace_xfs_trans_get_buf(bp->b_fspriv); 178 + trace_xfs_trans_get_buf(bp->b_log_item); 179 179 return bp; 180 180 } 181 181 ··· 188 188 * mount structure. 189 189 */ 190 190 xfs_buf_t * 191 - xfs_trans_getsb(xfs_trans_t *tp, 192 - struct xfs_mount *mp, 193 - int flags) 191 + xfs_trans_getsb( 192 + xfs_trans_t *tp, 193 + struct xfs_mount *mp, 194 + int flags) 194 195 { 195 196 xfs_buf_t *bp; 196 - xfs_buf_log_item_t *bip; 197 + struct xfs_buf_log_item *bip; 197 198 198 199 /* 199 200 * Default to just trying to lock the superblock buffer ··· 211 210 */ 212 211 bp = mp->m_sb_bp; 213 212 if (bp->b_transp == tp) { 214 - bip = bp->b_fspriv; 213 + bip = bp->b_log_item; 215 214 ASSERT(bip != NULL); 216 215 ASSERT(atomic_read(&bip->bli_refcount) > 0); 217 216 bip->bli_recur++; ··· 224 223 return NULL; 225 224 226 225 _xfs_trans_bjoin(tp, bp, 1); 227 - trace_xfs_trans_getsb(bp->b_fspriv); 226 + trace_xfs_trans_getsb(bp->b_log_item); 228 227 return bp; 229 228 } 230 229 ··· 267 266 if (bp) { 268 267 ASSERT(xfs_buf_islocked(bp)); 269 268 ASSERT(bp->b_transp == tp); 270 - ASSERT(bp->b_fspriv != NULL); 269 + ASSERT(bp->b_log_item != NULL); 271 270 ASSERT(!bp->b_error); 272 271 ASSERT(bp->b_flags & XBF_DONE); 273 272 ··· 280 279 return -EIO; 281 280 } 282 281 283 - bip = bp->b_fspriv; 282 + bip = bp->b_log_item; 284 283 bip->bli_recur++; 285 284 286 285 ASSERT(atomic_read(&bip->bli_refcount) > 0); ··· 330 329 331 330 if (tp) { 332 331 _xfs_trans_bjoin(tp, bp, 1); 333 - trace_xfs_trans_read_buf(bp->b_fspriv); 332 + trace_xfs_trans_read_buf(bp->b_log_item); 334 333 } 335 334 *bpp = bp; 336 335 return 0; ··· 353 352 * brelse() call. 354 353 */ 355 354 void 356 - xfs_trans_brelse(xfs_trans_t *tp, 357 - xfs_buf_t *bp) 355 + xfs_trans_brelse( 356 + xfs_trans_t *tp, 357 + xfs_buf_t *bp) 358 358 { 359 - xfs_buf_log_item_t *bip; 359 + struct xfs_buf_log_item *bip; 360 360 int freed; 361 361 362 362 /* ··· 370 368 } 371 369 372 370 ASSERT(bp->b_transp == tp); 373 - bip = bp->b_fspriv; 371 + bip = bp->b_log_item; 374 372 ASSERT(bip->bli_item.li_type == XFS_LI_BUF); 375 373 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 376 374 ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL)); ··· 458 456 */ 459 457 /* ARGSUSED */ 460 458 void 461 - xfs_trans_bhold(xfs_trans_t *tp, 462 - xfs_buf_t *bp) 459 + xfs_trans_bhold( 460 + xfs_trans_t *tp, 461 + xfs_buf_t *bp) 463 462 { 464 - xfs_buf_log_item_t *bip = bp->b_fspriv; 463 + struct xfs_buf_log_item *bip = bp->b_log_item; 465 464 466 465 ASSERT(bp->b_transp == tp); 467 466 ASSERT(bip != NULL); ··· 479 476 * for this transaction. 480 477 */ 481 478 void 482 - xfs_trans_bhold_release(xfs_trans_t *tp, 483 - xfs_buf_t *bp) 479 + xfs_trans_bhold_release( 480 + xfs_trans_t *tp, 481 + xfs_buf_t *bp) 484 482 { 485 - xfs_buf_log_item_t *bip = bp->b_fspriv; 483 + struct xfs_buf_log_item *bip = bp->b_log_item; 486 484 487 485 ASSERT(bp->b_transp == tp); 488 486 ASSERT(bip != NULL); ··· 504 500 struct xfs_trans *tp, 505 501 struct xfs_buf *bp) 506 502 { 507 - struct xfs_buf_log_item *bip = bp->b_fspriv; 503 + struct xfs_buf_log_item *bip = bp->b_log_item; 508 504 509 505 ASSERT(bp->b_transp == tp); 510 506 ASSERT(bip != NULL); ··· 561 557 uint first, 562 558 uint last) 563 559 { 564 - struct xfs_buf_log_item *bip = bp->b_fspriv; 560 + struct xfs_buf_log_item *bip = bp->b_log_item; 565 561 566 562 ASSERT(first <= last && last < BBTOB(bp->b_length)); 567 563 ASSERT(!(bip->bli_flags & XFS_BLI_ORDERED)); ··· 604 600 */ 605 601 void 606 602 xfs_trans_binval( 607 - xfs_trans_t *tp, 608 - xfs_buf_t *bp) 603 + xfs_trans_t *tp, 604 + xfs_buf_t *bp) 609 605 { 610 - xfs_buf_log_item_t *bip = bp->b_fspriv; 606 + struct xfs_buf_log_item *bip = bp->b_log_item; 611 607 int i; 612 608 613 609 ASSERT(bp->b_transp == tp); ··· 659 655 */ 660 656 void 661 657 xfs_trans_inode_buf( 662 - xfs_trans_t *tp, 663 - xfs_buf_t *bp) 658 + xfs_trans_t *tp, 659 + xfs_buf_t *bp) 664 660 { 665 - xfs_buf_log_item_t *bip = bp->b_fspriv; 661 + struct xfs_buf_log_item *bip = bp->b_log_item; 666 662 667 663 ASSERT(bp->b_transp == tp); 668 664 ASSERT(bip != NULL); ··· 683 679 */ 684 680 void 685 681 xfs_trans_stale_inode_buf( 686 - xfs_trans_t *tp, 687 - xfs_buf_t *bp) 682 + xfs_trans_t *tp, 683 + xfs_buf_t *bp) 688 684 { 689 - xfs_buf_log_item_t *bip = bp->b_fspriv; 685 + struct xfs_buf_log_item *bip = bp->b_log_item; 690 686 691 687 ASSERT(bp->b_transp == tp); 692 688 ASSERT(bip != NULL); ··· 708 704 /* ARGSUSED */ 709 705 void 710 706 xfs_trans_inode_alloc_buf( 711 - xfs_trans_t *tp, 712 - xfs_buf_t *bp) 707 + xfs_trans_t *tp, 708 + xfs_buf_t *bp) 713 709 { 714 - xfs_buf_log_item_t *bip = bp->b_fspriv; 710 + struct xfs_buf_log_item *bip = bp->b_log_item; 715 711 716 712 ASSERT(bp->b_transp == tp); 717 713 ASSERT(bip != NULL); ··· 733 729 struct xfs_trans *tp, 734 730 struct xfs_buf *bp) 735 731 { 736 - struct xfs_buf_log_item *bip = bp->b_fspriv; 732 + struct xfs_buf_log_item *bip = bp->b_log_item; 737 733 738 734 ASSERT(bp->b_transp == tp); 739 735 ASSERT(bip != NULL); ··· 763 759 struct xfs_buf *bp, 764 760 enum xfs_blft type) 765 761 { 766 - struct xfs_buf_log_item *bip = bp->b_fspriv; 762 + struct xfs_buf_log_item *bip = bp->b_log_item; 767 763 768 764 if (!tp) 769 765 return; ··· 780 776 struct xfs_buf *dst_bp, 781 777 struct xfs_buf *src_bp) 782 778 { 783 - struct xfs_buf_log_item *sbip = src_bp->b_fspriv; 784 - struct xfs_buf_log_item *dbip = dst_bp->b_fspriv; 779 + struct xfs_buf_log_item *sbip = src_bp->b_log_item; 780 + struct xfs_buf_log_item *dbip = dst_bp->b_log_item; 785 781 enum xfs_blft type; 786 782 787 783 type = xfs_blft_from_flags(&sbip->__bli_format); ··· 801 797 /* ARGSUSED */ 802 798 void 803 799 xfs_trans_dquot_buf( 804 - xfs_trans_t *tp, 805 - xfs_buf_t *bp, 806 - uint type) 800 + xfs_trans_t *tp, 801 + xfs_buf_t *bp, 802 + uint type) 807 803 { 808 - struct xfs_buf_log_item *bip = bp->b_fspriv; 804 + struct xfs_buf_log_item *bip = bp->b_log_item; 809 805 810 806 ASSERT(type == XFS_BLF_UDQUOT_BUF || 811 807 type == XFS_BLF_PDQUOT_BUF ||
+6
include/linux/fs.h
··· 748 748 down_write_nested(&inode->i_rwsem, subclass); 749 749 } 750 750 751 + static inline void inode_lock_shared_nested(struct inode *inode, unsigned subclass) 752 + { 753 + down_read_nested(&inode->i_rwsem, subclass); 754 + } 755 + 751 756 void lock_two_nondirectories(struct inode *, struct inode*); 752 757 void unlock_two_nondirectories(struct inode *, struct inode*); 753 758 ··· 2985 2980 }; 2986 2981 2987 2982 void dio_end_io(struct bio *bio); 2983 + void dio_warn_stale_pagecache(struct file *filp); 2988 2984 2989 2985 ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, 2990 2986 struct block_device *bdev, struct iov_iter *iter,