Merge tag 'xfs-4.16-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

+23 -1

fs/direct-io.c

··· 219 219 return dio->pages[sdio->head]; 220 220 } 221 221 222 + /* 223 + * Warn about a page cache invalidation failure during a direct io write. 224 + */ 225 + void dio_warn_stale_pagecache(struct file *filp) 226 + { 227 + static DEFINE_RATELIMIT_STATE(_rs, 86400 * HZ, DEFAULT_RATELIMIT_BURST); 228 + char pathname[128]; 229 + struct inode *inode = file_inode(filp); 230 + char *path; 231 + 232 + errseq_set(&inode->i_mapping->wb_err, -EIO); 233 + if (__ratelimit(&_rs)) { 234 + path = file_path(filp, pathname, sizeof(pathname)); 235 + if (IS_ERR(path)) 236 + path = "(unknown)"; 237 + pr_crit("Page cache invalidation failure on direct I/O. Possible data corruption due to collision with buffered I/O!\n"); 238 + pr_crit("File: %s PID: %d Comm: %.20s\n", path, current->pid, 239 + current->comm); 240 + } 241 + } 242 + 222 243 /** 223 244 * dio_complete() - called when all DIO BIO I/O has been completed 224 245 * @offset: the byte offset in the file of the completed operation ··· 311 290 err = invalidate_inode_pages2_range(dio->inode->i_mapping, 312 291 offset >> PAGE_SHIFT, 313 292 (offset + ret - 1) >> PAGE_SHIFT); 314 - WARN_ON_ONCE(err); 293 + if (err) 294 + dio_warn_stale_pagecache(dio->iocb->ki_filp); 315 295 } 316 296 317 297 if (!(dio->flags & DIO_SKIP_DIO_COUNT))

+12 -2

fs/iomap.c

··· 65 65 return ret; 66 66 if (WARN_ON(iomap.offset > pos)) 67 67 return -EIO; 68 + if (WARN_ON(iomap.length == 0)) 69 + return -EIO; 68 70 69 71 /* 70 72 * Cut down the length to the one actually provided by the filesystem, ··· 755 753 err = invalidate_inode_pages2_range(inode->i_mapping, 756 754 offset >> PAGE_SHIFT, 757 755 (offset + dio->size - 1) >> PAGE_SHIFT); 758 - WARN_ON_ONCE(err); 756 + if (err) 757 + dio_warn_stale_pagecache(iocb->ki_filp); 759 758 } 760 759 761 760 inode_dio_end(file_inode(iocb->ki_filp)); ··· 1021 1018 if (ret) 1022 1019 goto out_free_dio; 1023 1020 1021 + /* 1022 + * Try to invalidate cache pages for the range we're direct 1023 + * writing. If this invalidation fails, tough, the write will 1024 + * still work, but racing two incompatible write paths is a 1025 + * pretty crazy thing to do, so we don't support it 100%. 1026 + */ 1024 1027 ret = invalidate_inode_pages2_range(mapping, 1025 1028 start >> PAGE_SHIFT, end >> PAGE_SHIFT); 1026 - WARN_ON_ONCE(ret); 1029 + if (ret) 1030 + dio_warn_stale_pagecache(iocb->ki_filp); 1027 1031 ret = 0; 1028 1032 1029 1033 if (iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) &&

+79 -45

fs/xfs/libxfs/xfs_alloc.c

··· 167 167 * Lookup the first record less than or equal to [bno, len] 168 168 * in the btree given by cur. 169 169 */ 170 - static int /* error */ 170 + int /* error */ 171 171 xfs_alloc_lookup_le( 172 172 struct xfs_btree_cur *cur, /* btree cursor */ 173 173 xfs_agblock_t bno, /* starting block of extent */ ··· 520 520 return 0; 521 521 } 522 522 523 - static bool 523 + static xfs_failaddr_t 524 524 xfs_agfl_verify( 525 525 struct xfs_buf *bp) 526 526 { ··· 528 528 struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp); 529 529 int i; 530 530 531 + /* 532 + * There is no verification of non-crc AGFLs because mkfs does not 533 + * initialise the AGFL to zero or NULL. Hence the only valid part of the 534 + * AGFL is what the AGF says is active. We can't get to the AGF, so we 535 + * can't verify just those entries are valid. 536 + */ 537 + if (!xfs_sb_version_hascrc(&mp->m_sb)) 538 + return NULL; 539 + 531 540 if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid)) 532 - return false; 541 + return __this_address; 533 542 if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC) 534 - return false; 543 + return __this_address; 535 544 /* 536 545 * during growfs operations, the perag is not fully initialised, 537 546 * so we can't use it for any useful checking. growfs ensures we can't ··· 548 539 * so we can detect and avoid this problem. 549 540 */ 550 541 if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno) 551 - return false; 542 + return __this_address; 552 543 553 544 for (i = 0; i < XFS_AGFL_SIZE(mp); i++) { 554 545 if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK && 555 546 be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks) 556 - return false; 547 + return __this_address; 557 548 } 558 549 559 - return xfs_log_check_lsn(mp, 560 - be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)); 550 + if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn))) 551 + return __this_address; 552 + return NULL; 561 553 } 562 554 563 555 static void ··· 566 556 struct xfs_buf *bp) 567 557 { 568 558 struct xfs_mount *mp = bp->b_target->bt_mount; 559 + xfs_failaddr_t fa; 569 560 570 561 /* 571 562 * There is no verification of non-crc AGFLs because mkfs does not ··· 578 567 return; 579 568 580 569 if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF)) 581 - xfs_buf_ioerror(bp, -EFSBADCRC); 582 - else if (!xfs_agfl_verify(bp)) 583 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 584 - 585 - if (bp->b_error) 586 - xfs_verifier_error(bp); 570 + xfs_verifier_error(bp, -EFSBADCRC, __this_address); 571 + else { 572 + fa = xfs_agfl_verify(bp); 573 + if (fa) 574 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 575 + } 587 576 } 588 577 589 578 static void 590 579 xfs_agfl_write_verify( 591 580 struct xfs_buf *bp) 592 581 { 593 - struct xfs_mount *mp = bp->b_target->bt_mount; 594 - struct xfs_buf_log_item *bip = bp->b_fspriv; 582 + struct xfs_mount *mp = bp->b_target->bt_mount; 583 + struct xfs_buf_log_item *bip = bp->b_log_item; 584 + xfs_failaddr_t fa; 595 585 596 586 /* no verification of non-crc AGFLs */ 597 587 if (!xfs_sb_version_hascrc(&mp->m_sb)) 598 588 return; 599 589 600 - if (!xfs_agfl_verify(bp)) { 601 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 602 - xfs_verifier_error(bp); 590 + fa = xfs_agfl_verify(bp); 591 + if (fa) { 592 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 603 593 return; 604 594 } 605 595 ··· 614 602 .name = "xfs_agfl", 615 603 .verify_read = xfs_agfl_read_verify, 616 604 .verify_write = xfs_agfl_write_verify, 605 + .verify_struct = xfs_agfl_verify, 617 606 }; 618 607 619 608 /* ··· 2410 2397 return 0; 2411 2398 } 2412 2399 2413 - static bool 2400 + static xfs_failaddr_t 2414 2401 xfs_agf_verify( 2415 - struct xfs_mount *mp, 2416 - struct xfs_buf *bp) 2417 - { 2418 - struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); 2402 + struct xfs_buf *bp) 2403 + { 2404 + struct xfs_mount *mp = bp->b_target->bt_mount; 2405 + struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); 2419 2406 2420 2407 if (xfs_sb_version_hascrc(&mp->m_sb)) { 2421 2408 if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid)) 2422 - return false; 2409 + return __this_address; 2423 2410 if (!xfs_log_check_lsn(mp, 2424 2411 be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn))) 2425 - return false; 2412 + return __this_address; 2426 2413 } 2427 2414 2428 2415 if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) && ··· 2431 2418 be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) && 2432 2419 be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) && 2433 2420 be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp))) 2434 - return false; 2421 + return __this_address; 2435 2422 2436 2423 if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 || 2437 2424 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 || 2438 2425 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS || 2439 2426 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS) 2440 - return false; 2427 + return __this_address; 2441 2428 2442 2429 if (xfs_sb_version_hasrmapbt(&mp->m_sb) && 2443 2430 (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 || 2444 2431 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS)) 2445 - return false; 2432 + return __this_address; 2446 2433 2447 2434 /* 2448 2435 * during growfs operations, the perag is not fully initialised, ··· 2451 2438 * so we can detect and avoid this problem. 2452 2439 */ 2453 2440 if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno) 2454 - return false; 2441 + return __this_address; 2455 2442 2456 2443 if (xfs_sb_version_haslazysbcount(&mp->m_sb) && 2457 2444 be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length)) 2458 - return false; 2445 + return __this_address; 2459 2446 2460 2447 if (xfs_sb_version_hasreflink(&mp->m_sb) && 2461 2448 (be32_to_cpu(agf->agf_refcount_level) < 1 || 2462 2449 be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS)) 2463 - return false; 2450 + return __this_address; 2464 2451 2465 - return true;; 2452 + return NULL; 2466 2453 2467 2454 } 2468 2455 ··· 2471 2458 struct xfs_buf *bp) 2472 2459 { 2473 2460 struct xfs_mount *mp = bp->b_target->bt_mount; 2461 + xfs_failaddr_t fa; 2474 2462 2475 2463 if (xfs_sb_version_hascrc(&mp->m_sb) && 2476 2464 !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF)) 2477 - xfs_buf_ioerror(bp, -EFSBADCRC); 2478 - else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp, 2479 - XFS_ERRTAG_ALLOC_READ_AGF)) 2480 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 2481 - 2482 - if (bp->b_error) 2483 - xfs_verifier_error(bp); 2465 + xfs_verifier_error(bp, -EFSBADCRC, __this_address); 2466 + else { 2467 + fa = xfs_agf_verify(bp); 2468 + if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF)) 2469 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 2470 + } 2484 2471 } 2485 2472 2486 2473 static void 2487 2474 xfs_agf_write_verify( 2488 2475 struct xfs_buf *bp) 2489 2476 { 2490 - struct xfs_mount *mp = bp->b_target->bt_mount; 2491 - struct xfs_buf_log_item *bip = bp->b_fspriv; 2477 + struct xfs_mount *mp = bp->b_target->bt_mount; 2478 + struct xfs_buf_log_item *bip = bp->b_log_item; 2479 + xfs_failaddr_t fa; 2492 2480 2493 - if (!xfs_agf_verify(mp, bp)) { 2494 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 2495 - xfs_verifier_error(bp); 2481 + fa = xfs_agf_verify(bp); 2482 + if (fa) { 2483 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 2496 2484 return; 2497 2485 } 2498 2486 ··· 2510 2496 .name = "xfs_agf", 2511 2497 .verify_read = xfs_agf_read_verify, 2512 2498 .verify_write = xfs_agf_write_verify, 2499 + .verify_struct = xfs_agf_verify, 2513 2500 }; 2514 2501 2515 2502 /* ··· 2995 2980 if (agno >= mp->m_sb.sb_agcount) 2996 2981 return false; 2997 2982 return xfs_verify_agbno(mp, agno, XFS_FSB_TO_AGBNO(mp, fsbno)); 2983 + } 2984 + 2985 + /* Is there a record covering a given extent? */ 2986 + int 2987 + xfs_alloc_has_record( 2988 + struct xfs_btree_cur *cur, 2989 + xfs_agblock_t bno, 2990 + xfs_extlen_t len, 2991 + bool *exists) 2992 + { 2993 + union xfs_btree_irec low; 2994 + union xfs_btree_irec high; 2995 + 2996 + memset(&low, 0, sizeof(low)); 2997 + low.a.ar_startblock = bno; 2998 + memset(&high, 0xFF, sizeof(high)); 2999 + high.a.ar_startblock = bno + len - 1; 3000 + 3001 + return xfs_btree_has_record(cur, &low, &high, exists); 2998 3002 }

+10

fs/xfs/libxfs/xfs_alloc.h

··· 198 198 enum xfs_ag_resv_type type); /* block reservation type */ 199 199 200 200 int /* error */ 201 + xfs_alloc_lookup_le( 202 + struct xfs_btree_cur *cur, /* btree cursor */ 203 + xfs_agblock_t bno, /* starting block of extent */ 204 + xfs_extlen_t len, /* length of extent */ 205 + int *stat); /* success/failure */ 206 + 207 + int /* error */ 201 208 xfs_alloc_lookup_ge( 202 209 struct xfs_btree_cur *cur, /* btree cursor */ 203 210 xfs_agblock_t bno, /* starting block of extent */ ··· 243 236 bool xfs_verify_agbno(struct xfs_mount *mp, xfs_agnumber_t agno, 244 237 xfs_agblock_t agbno); 245 238 bool xfs_verify_fsbno(struct xfs_mount *mp, xfs_fsblock_t fsbno); 239 + 240 + int xfs_alloc_has_record(struct xfs_btree_cur *cur, xfs_agblock_t bno, 241 + xfs_extlen_t len, bool *exist); 246 242 247 243 #endif /* __XFS_ALLOC_H__ */

+29 -20

fs/xfs/libxfs/xfs_alloc_btree.c

··· 307 307 be32_to_cpu(k2->alloc.ar_startblock); 308 308 } 309 309 310 - static bool 310 + static xfs_failaddr_t 311 311 xfs_allocbt_verify( 312 312 struct xfs_buf *bp) 313 313 { 314 314 struct xfs_mount *mp = bp->b_target->bt_mount; 315 315 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 316 316 struct xfs_perag *pag = bp->b_pag; 317 + xfs_failaddr_t fa; 317 318 unsigned int level; 318 319 319 320 /* ··· 332 331 level = be16_to_cpu(block->bb_level); 333 332 switch (block->bb_magic) { 334 333 case cpu_to_be32(XFS_ABTB_CRC_MAGIC): 335 - if (!xfs_btree_sblock_v5hdr_verify(bp)) 336 - return false; 334 + fa = xfs_btree_sblock_v5hdr_verify(bp); 335 + if (fa) 336 + return fa; 337 337 /* fall through */ 338 338 case cpu_to_be32(XFS_ABTB_MAGIC): 339 339 if (pag && pag->pagf_init) { 340 340 if (level >= pag->pagf_levels[XFS_BTNUM_BNOi]) 341 - return false; 341 + return __this_address; 342 342 } else if (level >= mp->m_ag_maxlevels) 343 - return false; 343 + return __this_address; 344 344 break; 345 345 case cpu_to_be32(XFS_ABTC_CRC_MAGIC): 346 - if (!xfs_btree_sblock_v5hdr_verify(bp)) 347 - return false; 346 + fa = xfs_btree_sblock_v5hdr_verify(bp); 347 + if (fa) 348 + return fa; 348 349 /* fall through */ 349 350 case cpu_to_be32(XFS_ABTC_MAGIC): 350 351 if (pag && pag->pagf_init) { 351 352 if (level >= pag->pagf_levels[XFS_BTNUM_CNTi]) 352 - return false; 353 + return __this_address; 353 354 } else if (level >= mp->m_ag_maxlevels) 354 - return false; 355 + return __this_address; 355 356 break; 356 357 default: 357 - return false; 358 + return __this_address; 358 359 } 359 360 360 361 return xfs_btree_sblock_verify(bp, mp->m_alloc_mxr[level != 0]); ··· 366 363 xfs_allocbt_read_verify( 367 364 struct xfs_buf *bp) 368 365 { 369 - if (!xfs_btree_sblock_verify_crc(bp)) 370 - xfs_buf_ioerror(bp, -EFSBADCRC); 371 - else if (!xfs_allocbt_verify(bp)) 372 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 366 + xfs_failaddr_t fa; 373 367 374 - if (bp->b_error) { 375 - trace_xfs_btree_corrupt(bp, _RET_IP_); 376 - xfs_verifier_error(bp); 368 + if (!xfs_btree_sblock_verify_crc(bp)) 369 + xfs_verifier_error(bp, -EFSBADCRC, __this_address); 370 + else { 371 + fa = xfs_allocbt_verify(bp); 372 + if (fa) 373 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 377 374 } 375 + 376 + if (bp->b_error) 377 + trace_xfs_btree_corrupt(bp, _RET_IP_); 378 378 } 379 379 380 380 static void 381 381 xfs_allocbt_write_verify( 382 382 struct xfs_buf *bp) 383 383 { 384 - if (!xfs_allocbt_verify(bp)) { 384 + xfs_failaddr_t fa; 385 + 386 + fa = xfs_allocbt_verify(bp); 387 + if (fa) { 385 388 trace_xfs_btree_corrupt(bp, _RET_IP_); 386 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 387 - xfs_verifier_error(bp); 389 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 388 390 return; 389 391 } 390 392 xfs_btree_sblock_calc_crc(bp); ··· 400 392 .name = "xfs_allocbt", 401 393 .verify_read = xfs_allocbt_read_verify, 402 394 .verify_write = xfs_allocbt_write_verify, 395 + .verify_struct = xfs_allocbt_verify, 403 396 }; 404 397 405 398

-4

fs/xfs/libxfs/xfs_attr.c

··· 717 717 return error; 718 718 out_defer_cancel: 719 719 xfs_defer_cancel(args->dfops); 720 - args->trans = NULL; 721 720 return error; 722 721 } 723 722 ··· 769 770 return 0; 770 771 out_defer_cancel: 771 772 xfs_defer_cancel(args->dfops); 772 - args->trans = NULL; 773 773 return error; 774 774 } 775 775 ··· 1043 1045 return retval; 1044 1046 out_defer_cancel: 1045 1047 xfs_defer_cancel(args->dfops); 1046 - args->trans = NULL; 1047 1048 goto out; 1048 1049 } 1049 1050 ··· 1183 1186 return error; 1184 1187 out_defer_cancel: 1185 1188 xfs_defer_cancel(args->dfops); 1186 - args->trans = NULL; 1187 1189 goto out; 1188 1190 } 1189 1191

+122 -26

fs/xfs/libxfs/xfs_attr_leaf.c

··· 247 247 } 248 248 } 249 249 250 - static bool 250 + static xfs_failaddr_t 251 251 xfs_attr3_leaf_verify( 252 - struct xfs_buf *bp) 252 + struct xfs_buf *bp) 253 253 { 254 - struct xfs_mount *mp = bp->b_target->bt_mount; 255 - struct xfs_attr_leafblock *leaf = bp->b_addr; 256 - struct xfs_perag *pag = bp->b_pag; 257 - struct xfs_attr3_icleaf_hdr ichdr; 254 + struct xfs_attr3_icleaf_hdr ichdr; 255 + struct xfs_mount *mp = bp->b_target->bt_mount; 256 + struct xfs_attr_leafblock *leaf = bp->b_addr; 257 + struct xfs_perag *pag = bp->b_pag; 258 + struct xfs_attr_leaf_entry *entries; 258 259 259 260 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf); 260 261 ··· 263 262 struct xfs_da3_node_hdr *hdr3 = bp->b_addr; 264 263 265 264 if (ichdr.magic != XFS_ATTR3_LEAF_MAGIC) 266 - return false; 265 + return __this_address; 267 266 268 267 if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid)) 269 - return false; 268 + return __this_address; 270 269 if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn) 271 - return false; 270 + return __this_address; 272 271 if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn))) 273 - return false; 272 + return __this_address; 274 273 } else { 275 274 if (ichdr.magic != XFS_ATTR_LEAF_MAGIC) 276 - return false; 275 + return __this_address; 277 276 } 278 277 /* 279 278 * In recovery there is a transient state where count == 0 is valid ··· 281 280 * if the attr didn't fit in shortform. 282 281 */ 283 282 if (pag && pag->pagf_init && ichdr.count == 0) 284 - return false; 283 + return __this_address; 284 + 285 + /* 286 + * firstused is the block offset of the first name info structure. 287 + * Make sure it doesn't go off the block or crash into the header. 288 + */ 289 + if (ichdr.firstused > mp->m_attr_geo->blksize) 290 + return __this_address; 291 + if (ichdr.firstused < xfs_attr3_leaf_hdr_size(leaf)) 292 + return __this_address; 293 + 294 + /* Make sure the entries array doesn't crash into the name info. */ 295 + entries = xfs_attr3_leaf_entryp(bp->b_addr); 296 + if ((char *)&entries[ichdr.count] > 297 + (char *)bp->b_addr + ichdr.firstused) 298 + return __this_address; 285 299 286 300 /* XXX: need to range check rest of attr header values */ 287 301 /* XXX: hash order check? */ 288 302 289 - return true; 303 + return NULL; 290 304 } 291 305 292 306 static void ··· 309 293 struct xfs_buf *bp) 310 294 { 311 295 struct xfs_mount *mp = bp->b_target->bt_mount; 312 - struct xfs_buf_log_item *bip = bp->b_fspriv; 296 + struct xfs_buf_log_item *bip = bp->b_log_item; 313 297 struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr; 298 + xfs_failaddr_t fa; 314 299 315 - if (!xfs_attr3_leaf_verify(bp)) { 316 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 317 - xfs_verifier_error(bp); 300 + fa = xfs_attr3_leaf_verify(bp); 301 + if (fa) { 302 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 318 303 return; 319 304 } 320 305 ··· 339 322 struct xfs_buf *bp) 340 323 { 341 324 struct xfs_mount *mp = bp->b_target->bt_mount; 325 + xfs_failaddr_t fa; 342 326 343 327 if (xfs_sb_version_hascrc(&mp->m_sb) && 344 328 !xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF)) 345 - xfs_buf_ioerror(bp, -EFSBADCRC); 346 - else if (!xfs_attr3_leaf_verify(bp)) 347 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 348 - 349 - if (bp->b_error) 350 - xfs_verifier_error(bp); 329 + xfs_verifier_error(bp, -EFSBADCRC, __this_address); 330 + else { 331 + fa = xfs_attr3_leaf_verify(bp); 332 + if (fa) 333 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 334 + } 351 335 } 352 336 353 337 const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = { 354 338 .name = "xfs_attr3_leaf", 355 339 .verify_read = xfs_attr3_leaf_read_verify, 356 340 .verify_write = xfs_attr3_leaf_write_verify, 341 + .verify_struct = xfs_attr3_leaf_verify, 357 342 }; 358 343 359 344 int ··· 887 868 (bytes == sizeof(struct xfs_attr_sf_hdr))) 888 869 return -1; 889 870 return xfs_attr_shortform_bytesfit(dp, bytes); 871 + } 872 + 873 + /* Verify the consistency of an inline attribute fork. */ 874 + xfs_failaddr_t 875 + xfs_attr_shortform_verify( 876 + struct xfs_inode *ip) 877 + { 878 + struct xfs_attr_shortform *sfp; 879 + struct xfs_attr_sf_entry *sfep; 880 + struct xfs_attr_sf_entry *next_sfep; 881 + char *endp; 882 + struct xfs_ifork *ifp; 883 + int i; 884 + int size; 885 + 886 + ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL); 887 + ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK); 888 + sfp = (struct xfs_attr_shortform *)ifp->if_u1.if_data; 889 + size = ifp->if_bytes; 890 + 891 + /* 892 + * Give up if the attribute is way too short. 893 + */ 894 + if (size < sizeof(struct xfs_attr_sf_hdr)) 895 + return __this_address; 896 + 897 + endp = (char *)sfp + size; 898 + 899 + /* Check all reported entries */ 900 + sfep = &sfp->list[0]; 901 + for (i = 0; i < sfp->hdr.count; i++) { 902 + /* 903 + * struct xfs_attr_sf_entry has a variable length. 904 + * Check the fixed-offset parts of the structure are 905 + * within the data buffer. 906 + */ 907 + if (((char *)sfep + sizeof(*sfep)) >= endp) 908 + return __this_address; 909 + 910 + /* Don't allow names with known bad length. */ 911 + if (sfep->namelen == 0) 912 + return __this_address; 913 + 914 + /* 915 + * Check that the variable-length part of the structure is 916 + * within the data buffer. The next entry starts after the 917 + * name component, so nextentry is an acceptable test. 918 + */ 919 + next_sfep = XFS_ATTR_SF_NEXTENTRY(sfep); 920 + if ((char *)next_sfep > endp) 921 + return __this_address; 922 + 923 + /* 924 + * Check for unknown flags. Short form doesn't support 925 + * the incomplete or local bits, so we can use the namespace 926 + * mask here. 927 + */ 928 + if (sfep->flags & ~XFS_ATTR_NSP_ONDISK_MASK) 929 + return __this_address; 930 + 931 + /* 932 + * Check for invalid namespace combinations. We only allow 933 + * one namespace flag per xattr, so we can just count the 934 + * bits (i.e. hweight) here. 935 + */ 936 + if (hweight8(sfep->flags & XFS_ATTR_NSP_ONDISK_MASK) > 1) 937 + return __this_address; 938 + 939 + sfep = next_sfep; 940 + } 941 + if ((void *)sfep != (void *)endp) 942 + return __this_address; 943 + 944 + return NULL; 890 945 } 891 946 892 947 /* ··· 2266 2173 leaf = bp->b_addr; 2267 2174 xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf); 2268 2175 entries = xfs_attr3_leaf_entryp(leaf); 2269 - ASSERT(ichdr.count < args->geo->blksize / 8); 2176 + if (ichdr.count >= args->geo->blksize / 8) 2177 + return -EFSCORRUPTED; 2270 2178 2271 2179 /* 2272 2180 * Binary search. (note: small blocks will skip this loop) ··· 2283 2189 else 2284 2190 break; 2285 2191 } 2286 - ASSERT(probe >= 0 && (!ichdr.count || probe < ichdr.count)); 2287 - ASSERT(span <= 4 || be32_to_cpu(entry->hashval) == hashval); 2192 + if (!(probe >= 0 && (!ichdr.count || probe < ichdr.count))) 2193 + return -EFSCORRUPTED; 2194 + if (!(span <= 4 || be32_to_cpu(entry->hashval) == hashval)) 2195 + return -EFSCORRUPTED; 2288 2196 2289 2197 /* 2290 2198 * Since we may have duplicate hashval's, find the first matching

+1

fs/xfs/libxfs/xfs_attr_leaf.h

··· 53 53 int xfs_attr_shortform_remove(struct xfs_da_args *args); 54 54 int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp); 55 55 int xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes); 56 + xfs_failaddr_t xfs_attr_shortform_verify(struct xfs_inode *ip); 56 57 void xfs_attr_fork_remove(struct xfs_inode *ip, struct xfs_trans *tp); 57 58 58 59 /*

+67 -37

fs/xfs/libxfs/xfs_attr_remote.c

··· 65 65 * does CRC, location and bounds checking, the unpacking function checks the 66 66 * attribute parameters and owner. 67 67 */ 68 - static bool 68 + static xfs_failaddr_t 69 69 xfs_attr3_rmt_hdr_ok( 70 70 void *ptr, 71 71 xfs_ino_t ino, ··· 76 76 struct xfs_attr3_rmt_hdr *rmt = ptr; 77 77 78 78 if (bno != be64_to_cpu(rmt->rm_blkno)) 79 - return false; 79 + return __this_address; 80 80 if (offset != be32_to_cpu(rmt->rm_offset)) 81 - return false; 81 + return __this_address; 82 82 if (size != be32_to_cpu(rmt->rm_bytes)) 83 - return false; 83 + return __this_address; 84 84 if (ino != be64_to_cpu(rmt->rm_owner)) 85 - return false; 85 + return __this_address; 86 86 87 87 /* ok */ 88 - return true; 88 + return NULL; 89 89 } 90 90 91 - static bool 91 + static xfs_failaddr_t 92 92 xfs_attr3_rmt_verify( 93 93 struct xfs_mount *mp, 94 94 void *ptr, ··· 98 98 struct xfs_attr3_rmt_hdr *rmt = ptr; 99 99 100 100 if (!xfs_sb_version_hascrc(&mp->m_sb)) 101 - return false; 101 + return __this_address; 102 102 if (rmt->rm_magic != cpu_to_be32(XFS_ATTR3_RMT_MAGIC)) 103 - return false; 103 + return __this_address; 104 104 if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid)) 105 - return false; 105 + return __this_address; 106 106 if (be64_to_cpu(rmt->rm_blkno) != bno) 107 - return false; 107 + return __this_address; 108 108 if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt)) 109 - return false; 109 + return __this_address; 110 110 if (be32_to_cpu(rmt->rm_offset) + 111 111 be32_to_cpu(rmt->rm_bytes) > XFS_XATTR_SIZE_MAX) 112 - return false; 112 + return __this_address; 113 113 if (rmt->rm_owner == 0) 114 - return false; 114 + return __this_address; 115 115 116 - return true; 116 + return NULL; 117 117 } 118 118 119 - static void 120 - xfs_attr3_rmt_read_verify( 121 - struct xfs_buf *bp) 119 + static int 120 + __xfs_attr3_rmt_read_verify( 121 + struct xfs_buf *bp, 122 + bool check_crc, 123 + xfs_failaddr_t *failaddr) 122 124 { 123 125 struct xfs_mount *mp = bp->b_target->bt_mount; 124 126 char *ptr; ··· 130 128 131 129 /* no verification of non-crc buffers */ 132 130 if (!xfs_sb_version_hascrc(&mp->m_sb)) 133 - return; 131 + return 0; 134 132 135 133 ptr = bp->b_addr; 136 134 bno = bp->b_bn; ··· 138 136 ASSERT(len >= blksize); 139 137 140 138 while (len > 0) { 141 - if (!xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) { 142 - xfs_buf_ioerror(bp, -EFSBADCRC); 143 - break; 139 + if (check_crc && 140 + !xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) { 141 + *failaddr = __this_address; 142 + return -EFSBADCRC; 144 143 } 145 - if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { 146 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 147 - break; 148 - } 144 + *failaddr = xfs_attr3_rmt_verify(mp, ptr, blksize, bno); 145 + if (*failaddr) 146 + return -EFSCORRUPTED; 149 147 len -= blksize; 150 148 ptr += blksize; 151 149 bno += BTOBB(blksize); 152 150 } 153 151 154 - if (bp->b_error) 155 - xfs_verifier_error(bp); 156 - else 157 - ASSERT(len == 0); 152 + if (len != 0) { 153 + *failaddr = __this_address; 154 + return -EFSCORRUPTED; 155 + } 156 + 157 + return 0; 158 + } 159 + 160 + static void 161 + xfs_attr3_rmt_read_verify( 162 + struct xfs_buf *bp) 163 + { 164 + xfs_failaddr_t fa; 165 + int error; 166 + 167 + error = __xfs_attr3_rmt_read_verify(bp, true, &fa); 168 + if (error) 169 + xfs_verifier_error(bp, error, fa); 170 + } 171 + 172 + static xfs_failaddr_t 173 + xfs_attr3_rmt_verify_struct( 174 + struct xfs_buf *bp) 175 + { 176 + xfs_failaddr_t fa; 177 + int error; 178 + 179 + error = __xfs_attr3_rmt_read_verify(bp, false, &fa); 180 + return error ? fa : NULL; 158 181 } 159 182 160 183 static void ··· 187 160 struct xfs_buf *bp) 188 161 { 189 162 struct xfs_mount *mp = bp->b_target->bt_mount; 163 + xfs_failaddr_t fa; 190 164 int blksize = mp->m_attr_geo->blksize; 191 165 char *ptr; 192 166 int len; ··· 205 177 while (len > 0) { 206 178 struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr; 207 179 208 - if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { 209 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 210 - xfs_verifier_error(bp); 180 + fa = xfs_attr3_rmt_verify(mp, ptr, blksize, bno); 181 + if (fa) { 182 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 211 183 return; 212 184 } 213 185 ··· 216 188 * xfs_attr3_rmt_hdr_set() for the explanation. 217 189 */ 218 190 if (rmt->rm_lsn != cpu_to_be64(NULLCOMMITLSN)) { 219 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 220 - xfs_verifier_error(bp); 191 + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); 221 192 return; 222 193 } 223 194 xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF); ··· 225 198 ptr += blksize; 226 199 bno += BTOBB(blksize); 227 200 } 228 - ASSERT(len == 0); 201 + 202 + if (len != 0) 203 + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); 229 204 } 230 205 231 206 const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = { 232 207 .name = "xfs_attr3_rmt", 233 208 .verify_read = xfs_attr3_rmt_read_verify, 234 209 .verify_write = xfs_attr3_rmt_write_verify, 210 + .verify_struct = xfs_attr3_rmt_verify_struct, 235 211 }; 236 212 237 213 STATIC int ··· 299 269 byte_cnt = min(*valuelen, byte_cnt); 300 270 301 271 if (xfs_sb_version_hascrc(&mp->m_sb)) { 302 - if (!xfs_attr3_rmt_hdr_ok(src, ino, *offset, 272 + if (xfs_attr3_rmt_hdr_ok(src, ino, *offset, 303 273 byte_cnt, bno)) { 304 274 xfs_alert(mp, 305 275 "remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)",

+82 -38

fs/xfs/libxfs/xfs_bmap.c

··· 400 400 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); 401 401 bno = be64_to_cpu(*pp); 402 402 XFS_WANT_CORRUPTED_GOTO(mp, 403 - XFS_FSB_SANITY_CHECK(mp, bno), error0); 403 + xfs_verify_fsbno(mp, bno), error0); 404 404 if (bp_release) { 405 405 bp_release = 0; 406 406 xfs_trans_brelse(NULL, bp); ··· 1220 1220 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); 1221 1221 bno = be64_to_cpu(*pp); 1222 1222 XFS_WANT_CORRUPTED_GOTO(mp, 1223 - XFS_FSB_SANITY_CHECK(mp, bno), out_brelse); 1223 + xfs_verify_fsbno(mp, bno), out_brelse); 1224 1224 xfs_trans_brelse(tp, bp); 1225 1225 } 1226 1226 ··· 3337 3337 return 0; 3338 3338 } 3339 3339 3340 + /* Update all inode and quota accounting for the allocation we just did. */ 3341 + static void 3342 + xfs_bmap_btalloc_accounting( 3343 + struct xfs_bmalloca *ap, 3344 + struct xfs_alloc_arg *args) 3345 + { 3346 + if (ap->flags & XFS_BMAPI_COWFORK) { 3347 + /* 3348 + * COW fork blocks are in-core only and thus are treated as 3349 + * in-core quota reservation (like delalloc blocks) even when 3350 + * converted to real blocks. The quota reservation is not 3351 + * accounted to disk until blocks are remapped to the data 3352 + * fork. So if these blocks were previously delalloc, we 3353 + * already have quota reservation and there's nothing to do 3354 + * yet. 3355 + */ 3356 + if (ap->wasdel) 3357 + return; 3358 + 3359 + /* 3360 + * Otherwise, we've allocated blocks in a hole. The transaction 3361 + * has acquired in-core quota reservation for this extent. 3362 + * Rather than account these as real blocks, however, we reduce 3363 + * the transaction quota reservation based on the allocation. 3364 + * This essentially transfers the transaction quota reservation 3365 + * to that of a delalloc extent. 3366 + */ 3367 + ap->ip->i_delayed_blks += args->len; 3368 + xfs_trans_mod_dquot_byino(ap->tp, ap->ip, XFS_TRANS_DQ_RES_BLKS, 3369 + -(long)args->len); 3370 + return; 3371 + } 3372 + 3373 + /* data/attr fork only */ 3374 + ap->ip->i_d.di_nblocks += args->len; 3375 + xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); 3376 + if (ap->wasdel) 3377 + ap->ip->i_delayed_blks -= args->len; 3378 + xfs_trans_mod_dquot_byino(ap->tp, ap->ip, 3379 + ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : XFS_TRANS_DQ_BCOUNT, 3380 + args->len); 3381 + } 3382 + 3340 3383 STATIC int 3341 3384 xfs_bmap_btalloc( 3342 3385 struct xfs_bmalloca *ap) /* bmap alloc argument struct */ ··· 3390 3347 xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */ 3391 3348 xfs_agnumber_t ag; 3392 3349 xfs_alloc_arg_t args; 3350 + xfs_fileoff_t orig_offset; 3351 + xfs_extlen_t orig_length; 3393 3352 xfs_extlen_t blen; 3394 3353 xfs_extlen_t nextminlen = 0; 3395 3354 int nullfb; /* true if ap->firstblock isn't set */ ··· 3401 3356 int stripe_align; 3402 3357 3403 3358 ASSERT(ap->length); 3359 + orig_offset = ap->offset; 3360 + orig_length = ap->length; 3404 3361 3405 3362 mp = ap->ip->i_mount; 3406 3363 ··· 3618 3571 *ap->firstblock = args.fsbno; 3619 3572 ASSERT(nullfb || fb_agno <= args.agno); 3620 3573 ap->length = args.len; 3621 - if (!(ap->flags & XFS_BMAPI_COWFORK)) 3622 - ap->ip->i_d.di_nblocks += args.len; 3623 - xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); 3624 - if (ap->wasdel) 3625 - ap->ip->i_delayed_blks -= args.len; 3626 3574 /* 3627 - * Adjust the disk quota also. This was reserved 3628 - * earlier. 3575 + * If the extent size hint is active, we tried to round the 3576 + * caller's allocation request offset down to extsz and the 3577 + * length up to another extsz boundary. If we found a free 3578 + * extent we mapped it in starting at this new offset. If the 3579 + * newly mapped space isn't long enough to cover any of the 3580 + * range of offsets that was originally requested, move the 3581 + * mapping up so that we can fill as much of the caller's 3582 + * original request as possible. Free space is apparently 3583 + * very fragmented so we're unlikely to be able to satisfy the 3584 + * hints anyway. 3629 3585 */ 3630 - xfs_trans_mod_dquot_byino(ap->tp, ap->ip, 3631 - ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : 3632 - XFS_TRANS_DQ_BCOUNT, 3633 - (long) args.len); 3586 + if (ap->length <= orig_length) 3587 + ap->offset = orig_offset; 3588 + else if (ap->offset + ap->length < orig_offset + orig_length) 3589 + ap->offset = orig_offset + orig_length - ap->length; 3590 + xfs_bmap_btalloc_accounting(ap, &args); 3634 3591 } else { 3635 3592 ap->blkno = NULLFSBLOCK; 3636 3593 ap->length = 0; ··· 3927 3876 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 3928 3877 xfs_extlen_t alen; 3929 3878 xfs_extlen_t indlen; 3930 - char rt = XFS_IS_REALTIME_INODE(ip); 3931 - xfs_extlen_t extsz; 3932 3879 int error; 3933 3880 xfs_fileoff_t aoff = off; 3934 3881 ··· 3941 3892 prealloc = alen - len; 3942 3893 3943 3894 /* Figure out the extent size, adjust alen */ 3944 - if (whichfork == XFS_COW_FORK) 3945 - extsz = xfs_get_cowextsz_hint(ip); 3946 - else 3947 - extsz = xfs_get_extsz_hint(ip); 3948 - if (extsz) { 3895 + if (whichfork == XFS_COW_FORK) { 3949 3896 struct xfs_bmbt_irec prev; 3897 + xfs_extlen_t extsz = xfs_get_cowextsz_hint(ip); 3950 3898 3951 3899 if (!xfs_iext_peek_prev_extent(ifp, icur, &prev)) 3952 3900 prev.br_startoff = NULLFILEOFF; 3953 3901 3954 - error = xfs_bmap_extsize_align(mp, got, &prev, extsz, rt, eof, 3902 + error = xfs_bmap_extsize_align(mp, got, &prev, extsz, 0, eof, 3955 3903 1, 0, &aoff, &alen); 3956 3904 ASSERT(!error); 3957 3905 } 3958 - 3959 - if (rt) 3960 - extsz = alen / mp->m_sb.sb_rextsize; 3961 3906 3962 3907 /* 3963 3908 * Make a transaction-less quota reservation for delayed allocation ··· 3959 3916 * allocated blocks already inside this loop. 3960 3917 */ 3961 3918 error = xfs_trans_reserve_quota_nblks(NULL, ip, (long)alen, 0, 3962 - rt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); 3919 + XFS_QMOPT_RES_REGBLKS); 3963 3920 if (error) 3964 3921 return error; 3965 3922 ··· 3970 3927 indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen); 3971 3928 ASSERT(indlen > 0); 3972 3929 3973 - if (rt) { 3974 - error = xfs_mod_frextents(mp, -((int64_t)extsz)); 3975 - } else { 3976 - error = xfs_mod_fdblocks(mp, -((int64_t)alen), false); 3977 - } 3978 - 3930 + error = xfs_mod_fdblocks(mp, -((int64_t)alen), false); 3979 3931 if (error) 3980 3932 goto out_unreserve_quota; 3981 3933 ··· 4001 3963 return 0; 4002 3964 4003 3965 out_unreserve_blocks: 4004 - if (rt) 4005 - xfs_mod_frextents(mp, extsz); 4006 - else 4007 - xfs_mod_fdblocks(mp, alen, false); 3966 + xfs_mod_fdblocks(mp, alen, false); 4008 3967 out_unreserve_quota: 4009 3968 if (XFS_IS_QUOTA_ON(mp)) 4010 - xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ? 4011 - XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); 3969 + xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, 3970 + XFS_QMOPT_RES_REGBLKS); 4012 3971 return error; 4013 3972 } 4014 3973 ··· 4339 4304 while (bno < end && n < *nmap) { 4340 4305 bool need_alloc = false, wasdelay = false; 4341 4306 4342 - /* in hole or beyoned EOF? */ 4307 + /* in hole or beyond EOF? */ 4343 4308 if (eof || bma.got.br_startoff > bno) { 4309 + /* 4310 + * CoW fork conversions should /never/ hit EOF or 4311 + * holes. There should always be something for us 4312 + * to work on. 4313 + */ 4314 + ASSERT(!((flags & XFS_BMAPI_CONVERT) && 4315 + (flags & XFS_BMAPI_COWFORK))); 4316 + 4344 4317 if (flags & XFS_BMAPI_DELALLOC) { 4345 4318 /* 4346 4319 * For the COW fork we can reasonably get a ··· 4867 4824 xfs_iext_insert(ip, icur, &new, state); 4868 4825 break; 4869 4826 } 4827 + ip->i_delayed_blks -= del->br_blockcount; 4870 4828 } 4871 4829 4872 4830 /*

+24 -34

fs/xfs/libxfs/xfs_bmap_btree.c

··· 425 425 be64_to_cpu(k2->bmbt.br_startoff); 426 426 } 427 427 428 - static bool 428 + static xfs_failaddr_t 429 429 xfs_bmbt_verify( 430 430 struct xfs_buf *bp) 431 431 { 432 432 struct xfs_mount *mp = bp->b_target->bt_mount; 433 433 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 434 + xfs_failaddr_t fa; 434 435 unsigned int level; 435 436 436 437 switch (block->bb_magic) { 437 438 case cpu_to_be32(XFS_BMAP_CRC_MAGIC): 438 - if (!xfs_sb_version_hascrc(&mp->m_sb)) 439 - return false; 440 - if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)) 441 - return false; 442 - if (be64_to_cpu(block->bb_u.l.bb_blkno) != bp->b_bn) 443 - return false; 444 439 /* 445 440 * XXX: need a better way of verifying the owner here. Right now 446 441 * just make sure there has been one set. 447 442 */ 448 - if (be64_to_cpu(block->bb_u.l.bb_owner) == 0) 449 - return false; 443 + fa = xfs_btree_lblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN); 444 + if (fa) 445 + return fa; 450 446 /* fall through */ 451 447 case cpu_to_be32(XFS_BMAP_MAGIC): 452 448 break; 453 449 default: 454 - return false; 450 + return __this_address; 455 451 } 456 452 457 453 /* ··· 459 463 */ 460 464 level = be16_to_cpu(block->bb_level); 461 465 if (level > max(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1])) 462 - return false; 463 - if (be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0]) 464 - return false; 466 + return __this_address; 465 467 466 - /* sibling pointer verification */ 467 - if (!block->bb_u.l.bb_leftsib || 468 - (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) && 469 - !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_leftsib)))) 470 - return false; 471 - if (!block->bb_u.l.bb_rightsib || 472 - (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) && 473 - !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_rightsib)))) 474 - return false; 475 - 476 - return true; 468 + return xfs_btree_lblock_verify(bp, mp->m_bmap_dmxr[level != 0]); 477 469 } 478 470 479 471 static void 480 472 xfs_bmbt_read_verify( 481 473 struct xfs_buf *bp) 482 474 { 483 - if (!xfs_btree_lblock_verify_crc(bp)) 484 - xfs_buf_ioerror(bp, -EFSBADCRC); 485 - else if (!xfs_bmbt_verify(bp)) 486 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 475 + xfs_failaddr_t fa; 487 476 488 - if (bp->b_error) { 489 - trace_xfs_btree_corrupt(bp, _RET_IP_); 490 - xfs_verifier_error(bp); 477 + if (!xfs_btree_lblock_verify_crc(bp)) 478 + xfs_verifier_error(bp, -EFSBADCRC, __this_address); 479 + else { 480 + fa = xfs_bmbt_verify(bp); 481 + if (fa) 482 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 491 483 } 484 + 485 + if (bp->b_error) 486 + trace_xfs_btree_corrupt(bp, _RET_IP_); 492 487 } 493 488 494 489 static void 495 490 xfs_bmbt_write_verify( 496 491 struct xfs_buf *bp) 497 492 { 498 - if (!xfs_bmbt_verify(bp)) { 493 + xfs_failaddr_t fa; 494 + 495 + fa = xfs_bmbt_verify(bp); 496 + if (fa) { 499 497 trace_xfs_btree_corrupt(bp, _RET_IP_); 500 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 501 - xfs_verifier_error(bp); 498 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 502 499 return; 503 500 } 504 501 xfs_btree_lblock_calc_crc(bp); ··· 501 512 .name = "xfs_bmbt", 502 513 .verify_read = xfs_bmbt_read_verify, 503 514 .verify_write = xfs_bmbt_write_verify, 515 + .verify_struct = xfs_bmbt_verify, 504 516 }; 505 517 506 518

+117 -42

fs/xfs/libxfs/xfs_btree.c

··· 273 273 struct xfs_buf *bp) 274 274 { 275 275 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 276 - struct xfs_buf_log_item *bip = bp->b_fspriv; 276 + struct xfs_buf_log_item *bip = bp->b_log_item; 277 277 278 278 if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) 279 279 return; ··· 311 311 struct xfs_buf *bp) 312 312 { 313 313 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 314 - struct xfs_buf_log_item *bip = bp->b_fspriv; 314 + struct xfs_buf_log_item *bip = bp->b_log_item; 315 315 316 316 if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) 317 317 return; ··· 329 329 330 330 if (xfs_sb_version_hascrc(&mp->m_sb)) { 331 331 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn))) 332 - return false; 332 + return __this_address; 333 333 return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); 334 334 } 335 335 ··· 853 853 xfs_daddr_t d; /* real disk block address */ 854 854 int error; 855 855 856 - if (!XFS_FSB_SANITY_CHECK(mp, fsbno)) 856 + if (!xfs_verify_fsbno(mp, fsbno)) 857 857 return -EFSCORRUPTED; 858 858 d = XFS_FSB_TO_DADDR(mp, fsbno); 859 859 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, ··· 4529 4529 &bbcoi); 4530 4530 } 4531 4531 4532 - /** 4533 - * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format 4534 - * btree block 4535 - * 4536 - * @bp: buffer containing the btree block 4537 - * @max_recs: pointer to the m_*_mxr max records field in the xfs mount 4538 - * @pag_max_level: pointer to the per-ag max level field 4539 - */ 4540 - bool 4541 - xfs_btree_sblock_v5hdr_verify( 4542 - struct xfs_buf *bp) 4532 + /* Verify the v5 fields of a long-format btree block. */ 4533 + xfs_failaddr_t 4534 + xfs_btree_lblock_v5hdr_verify( 4535 + struct xfs_buf *bp, 4536 + uint64_t owner) 4543 4537 { 4544 4538 struct xfs_mount *mp = bp->b_target->bt_mount; 4545 4539 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4546 - struct xfs_perag *pag = bp->b_pag; 4547 4540 4548 4541 if (!xfs_sb_version_hascrc(&mp->m_sb)) 4549 - return false; 4550 - if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) 4551 - return false; 4552 - if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) 4553 - return false; 4554 - if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) 4555 - return false; 4556 - return true; 4542 + return __this_address; 4543 + if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)) 4544 + return __this_address; 4545 + if (block->bb_u.l.bb_blkno != cpu_to_be64(bp->b_bn)) 4546 + return __this_address; 4547 + if (owner != XFS_RMAP_OWN_UNKNOWN && 4548 + be64_to_cpu(block->bb_u.l.bb_owner) != owner) 4549 + return __this_address; 4550 + return NULL; 4557 4551 } 4558 4552 4559 - /** 4560 - * xfs_btree_sblock_verify() -- verify a short-format btree block 4561 - * 4562 - * @bp: buffer containing the btree block 4563 - * @max_recs: maximum records allowed in this btree node 4564 - */ 4565 - bool 4566 - xfs_btree_sblock_verify( 4553 + /* Verify a long-format btree block. */ 4554 + xfs_failaddr_t 4555 + xfs_btree_lblock_verify( 4567 4556 struct xfs_buf *bp, 4568 4557 unsigned int max_recs) 4569 4558 { ··· 4561 4572 4562 4573 /* numrecs verification */ 4563 4574 if (be16_to_cpu(block->bb_numrecs) > max_recs) 4564 - return false; 4575 + return __this_address; 4565 4576 4566 4577 /* sibling pointer verification */ 4567 - if (!block->bb_u.s.bb_leftsib || 4568 - (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks && 4569 - block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK))) 4570 - return false; 4571 - if (!block->bb_u.s.bb_rightsib || 4572 - (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks && 4573 - block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK))) 4574 - return false; 4578 + if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) && 4579 + !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_leftsib))) 4580 + return __this_address; 4581 + if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) && 4582 + !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_rightsib))) 4583 + return __this_address; 4575 4584 4576 - return true; 4585 + return NULL; 4586 + } 4587 + 4588 + /** 4589 + * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format 4590 + * btree block 4591 + * 4592 + * @bp: buffer containing the btree block 4593 + * @max_recs: pointer to the m_*_mxr max records field in the xfs mount 4594 + * @pag_max_level: pointer to the per-ag max level field 4595 + */ 4596 + xfs_failaddr_t 4597 + xfs_btree_sblock_v5hdr_verify( 4598 + struct xfs_buf *bp) 4599 + { 4600 + struct xfs_mount *mp = bp->b_target->bt_mount; 4601 + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4602 + struct xfs_perag *pag = bp->b_pag; 4603 + 4604 + if (!xfs_sb_version_hascrc(&mp->m_sb)) 4605 + return __this_address; 4606 + if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) 4607 + return __this_address; 4608 + if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) 4609 + return __this_address; 4610 + if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) 4611 + return __this_address; 4612 + return NULL; 4613 + } 4614 + 4615 + /** 4616 + * xfs_btree_sblock_verify() -- verify a short-format btree block 4617 + * 4618 + * @bp: buffer containing the btree block 4619 + * @max_recs: maximum records allowed in this btree node 4620 + */ 4621 + xfs_failaddr_t 4622 + xfs_btree_sblock_verify( 4623 + struct xfs_buf *bp, 4624 + unsigned int max_recs) 4625 + { 4626 + struct xfs_mount *mp = bp->b_target->bt_mount; 4627 + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4628 + xfs_agblock_t agno; 4629 + 4630 + /* numrecs verification */ 4631 + if (be16_to_cpu(block->bb_numrecs) > max_recs) 4632 + return __this_address; 4633 + 4634 + /* sibling pointer verification */ 4635 + agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp)); 4636 + if (block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK) && 4637 + !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_leftsib))) 4638 + return __this_address; 4639 + if (block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK) && 4640 + !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_rightsib))) 4641 + return __this_address; 4642 + 4643 + return NULL; 4577 4644 } 4578 4645 4579 4646 /* ··· 4997 4952 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 4998 4953 return (int64_t)be64_to_cpu(a->l) - be64_to_cpu(b->l); 4999 4954 return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s); 4955 + } 4956 + 4957 + /* If there's an extent, we're done. */ 4958 + STATIC int 4959 + xfs_btree_has_record_helper( 4960 + struct xfs_btree_cur *cur, 4961 + union xfs_btree_rec *rec, 4962 + void *priv) 4963 + { 4964 + return XFS_BTREE_QUERY_RANGE_ABORT; 4965 + } 4966 + 4967 + /* Is there a record covering a given range of keys? */ 4968 + int 4969 + xfs_btree_has_record( 4970 + struct xfs_btree_cur *cur, 4971 + union xfs_btree_irec *low, 4972 + union xfs_btree_irec *high, 4973 + bool *exists) 4974 + { 4975 + int error; 4976 + 4977 + error = xfs_btree_query_range(cur, low, high, 4978 + &xfs_btree_has_record_helper, NULL); 4979 + if (error == XFS_BTREE_QUERY_RANGE_ABORT) { 4980 + *exists = true; 4981 + return 0; 4982 + } 4983 + *exists = false; 4984 + return error; 5000 4985 }

+10 -6

fs/xfs/libxfs/xfs_btree.h

··· 473 473 #define XFS_FILBLKS_MIN(a,b) min_t(xfs_filblks_t, (a), (b)) 474 474 #define XFS_FILBLKS_MAX(a,b) max_t(xfs_filblks_t, (a), (b)) 475 475 476 - #define XFS_FSB_SANITY_CHECK(mp,fsb) \ 477 - (fsb && XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \ 478 - XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks) 479 - 480 476 /* 481 477 * Trace hooks. Currently not implemented as they need to be ported 482 478 * over to the generic tracing functionality, which is some effort. ··· 492 496 #define XFS_BTREE_TRACE_ARGR(c, r) 493 497 #define XFS_BTREE_TRACE_CURSOR(c, t) 494 498 495 - bool xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp); 496 - bool xfs_btree_sblock_verify(struct xfs_buf *bp, unsigned int max_recs); 499 + xfs_failaddr_t xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp); 500 + xfs_failaddr_t xfs_btree_sblock_verify(struct xfs_buf *bp, 501 + unsigned int max_recs); 502 + xfs_failaddr_t xfs_btree_lblock_v5hdr_verify(struct xfs_buf *bp, 503 + uint64_t owner); 504 + xfs_failaddr_t xfs_btree_lblock_verify(struct xfs_buf *bp, 505 + unsigned int max_recs); 506 + 497 507 uint xfs_btree_compute_maxlevels(struct xfs_mount *mp, uint *limits, 498 508 unsigned long len); 499 509 xfs_extlen_t xfs_btree_calc_size(struct xfs_mount *mp, uint *limits, ··· 547 545 struct xfs_btree_block *block, union xfs_btree_key *key); 548 546 union xfs_btree_key *xfs_btree_high_key_from_key(struct xfs_btree_cur *cur, 549 547 union xfs_btree_key *key); 548 + int xfs_btree_has_record(struct xfs_btree_cur *cur, union xfs_btree_irec *low, 549 + union xfs_btree_irec *high, bool *exists); 550 550 551 551 #endif /* __XFS_BTREE_H__ */

+47 -23

fs/xfs/libxfs/xfs_da_btree.c

··· 128 128 kmem_zone_free(xfs_da_state_zone, state); 129 129 } 130 130 131 - static bool 131 + static xfs_failaddr_t 132 132 xfs_da3_node_verify( 133 133 struct xfs_buf *bp) 134 134 { ··· 145 145 struct xfs_da3_node_hdr *hdr3 = bp->b_addr; 146 146 147 147 if (ichdr.magic != XFS_DA3_NODE_MAGIC) 148 - return false; 148 + return __this_address; 149 149 150 150 if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid)) 151 - return false; 151 + return __this_address; 152 152 if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn) 153 - return false; 153 + return __this_address; 154 154 if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn))) 155 - return false; 155 + return __this_address; 156 156 } else { 157 157 if (ichdr.magic != XFS_DA_NODE_MAGIC) 158 - return false; 158 + return __this_address; 159 159 } 160 160 if (ichdr.level == 0) 161 - return false; 161 + return __this_address; 162 162 if (ichdr.level > XFS_DA_NODE_MAXDEPTH) 163 - return false; 163 + return __this_address; 164 164 if (ichdr.count == 0) 165 - return false; 165 + return __this_address; 166 166 167 167 /* 168 168 * we don't know if the node is for and attribute or directory tree, ··· 170 170 */ 171 171 if (ichdr.count > mp->m_dir_geo->node_ents && 172 172 ichdr.count > mp->m_attr_geo->node_ents) 173 - return false; 173 + return __this_address; 174 174 175 175 /* XXX: hash order check? */ 176 176 177 - return true; 177 + return NULL; 178 178 } 179 179 180 180 static void ··· 182 182 struct xfs_buf *bp) 183 183 { 184 184 struct xfs_mount *mp = bp->b_target->bt_mount; 185 - struct xfs_buf_log_item *bip = bp->b_fspriv; 185 + struct xfs_buf_log_item *bip = bp->b_log_item; 186 186 struct xfs_da3_node_hdr *hdr3 = bp->b_addr; 187 + xfs_failaddr_t fa; 187 188 188 - if (!xfs_da3_node_verify(bp)) { 189 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 190 - xfs_verifier_error(bp); 189 + fa = xfs_da3_node_verify(bp); 190 + if (fa) { 191 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 191 192 return; 192 193 } 193 194 ··· 212 211 struct xfs_buf *bp) 213 212 { 214 213 struct xfs_da_blkinfo *info = bp->b_addr; 214 + xfs_failaddr_t fa; 215 215 216 216 switch (be16_to_cpu(info->magic)) { 217 217 case XFS_DA3_NODE_MAGIC: 218 218 if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) { 219 - xfs_buf_ioerror(bp, -EFSBADCRC); 219 + xfs_verifier_error(bp, -EFSBADCRC, 220 + __this_address); 220 221 break; 221 222 } 222 223 /* fall through */ 223 224 case XFS_DA_NODE_MAGIC: 224 - if (!xfs_da3_node_verify(bp)) { 225 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 226 - break; 227 - } 225 + fa = xfs_da3_node_verify(bp); 226 + if (fa) 227 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 228 228 return; 229 229 case XFS_ATTR_LEAF_MAGIC: 230 230 case XFS_ATTR3_LEAF_MAGIC: ··· 238 236 bp->b_ops->verify_read(bp); 239 237 return; 240 238 default: 241 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 239 + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); 242 240 break; 243 241 } 242 + } 244 243 245 - /* corrupt block */ 246 - xfs_verifier_error(bp); 244 + /* Verify the structure of a da3 block. */ 245 + static xfs_failaddr_t 246 + xfs_da3_node_verify_struct( 247 + struct xfs_buf *bp) 248 + { 249 + struct xfs_da_blkinfo *info = bp->b_addr; 250 + 251 + switch (be16_to_cpu(info->magic)) { 252 + case XFS_DA3_NODE_MAGIC: 253 + case XFS_DA_NODE_MAGIC: 254 + return xfs_da3_node_verify(bp); 255 + case XFS_ATTR_LEAF_MAGIC: 256 + case XFS_ATTR3_LEAF_MAGIC: 257 + bp->b_ops = &xfs_attr3_leaf_buf_ops; 258 + return bp->b_ops->verify_struct(bp); 259 + case XFS_DIR2_LEAFN_MAGIC: 260 + case XFS_DIR3_LEAFN_MAGIC: 261 + bp->b_ops = &xfs_dir3_leafn_buf_ops; 262 + return bp->b_ops->verify_struct(bp); 263 + default: 264 + return __this_address; 265 + } 247 266 } 248 267 249 268 const struct xfs_buf_ops xfs_da3_node_buf_ops = { 250 269 .name = "xfs_da3_node", 251 270 .verify_read = xfs_da3_node_read_verify, 252 271 .verify_write = xfs_da3_node_write_verify, 272 + .verify_struct = xfs_da3_node_verify_struct, 253 273 }; 254 274 255 275 int

+6

fs/xfs/libxfs/xfs_da_format.h

··· 875 875 ((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \ 876 876 sizeof(struct xfs_attr3_rmt_hdr) : 0)) 877 877 878 + /* Number of bytes in a directory block. */ 879 + static inline unsigned int xfs_dir2_dirblock_bytes(struct xfs_sb *sbp) 880 + { 881 + return 1 << (sbp->sb_blocklog + sbp->sb_dirblklog); 882 + } 883 + 878 884 #endif /* __XFS_DA_FORMAT_H__ */

+2 -3

fs/xfs/libxfs/xfs_dir2.c

··· 119 119 120 120 121 121 ASSERT(mp->m_sb.sb_versionnum & XFS_SB_VERSION_DIRV2BIT); 122 - ASSERT((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) <= 123 - XFS_MAX_BLOCKSIZE); 122 + ASSERT(xfs_dir2_dirblock_bytes(&mp->m_sb) <= XFS_MAX_BLOCKSIZE); 124 123 125 124 mp->m_dir_inode_ops = xfs_dir_get_ops(mp, NULL); 126 125 mp->m_nondir_inode_ops = xfs_nondir_get_ops(mp, NULL); ··· 139 140 dageo = mp->m_dir_geo; 140 141 dageo->blklog = mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog; 141 142 dageo->fsblog = mp->m_sb.sb_blocklog; 142 - dageo->blksize = 1 << dageo->blklog; 143 + dageo->blksize = xfs_dir2_dirblock_bytes(&mp->m_sb); 143 144 dageo->fsbcount = 1 << mp->m_sb.sb_dirblklog; 144 145 145 146 /*

+2

fs/xfs/libxfs/xfs_dir2.h

··· 340 340 #define XFS_READDIR_BUFSIZE (32768) 341 341 342 342 unsigned char xfs_dir3_get_dtype(struct xfs_mount *mp, uint8_t filetype); 343 + void *xfs_dir3_data_endp(struct xfs_da_geometry *geo, 344 + struct xfs_dir2_data_hdr *hdr); 343 345 344 346 #endif /* __XFS_DIR2_H__ */

+20 -19

fs/xfs/libxfs/xfs_dir2_block.c

··· 58 58 xfs_dir_hash_dotdot = xfs_da_hashname((unsigned char *)"..", 2); 59 59 } 60 60 61 - static bool 61 + static xfs_failaddr_t 62 62 xfs_dir3_block_verify( 63 63 struct xfs_buf *bp) 64 64 { ··· 67 67 68 68 if (xfs_sb_version_hascrc(&mp->m_sb)) { 69 69 if (hdr3->magic != cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) 70 - return false; 70 + return __this_address; 71 71 if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid)) 72 - return false; 72 + return __this_address; 73 73 if (be64_to_cpu(hdr3->blkno) != bp->b_bn) 74 - return false; 74 + return __this_address; 75 75 if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) 76 - return false; 76 + return __this_address; 77 77 } else { 78 78 if (hdr3->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) 79 - return false; 79 + return __this_address; 80 80 } 81 - if (__xfs_dir3_data_check(NULL, bp)) 82 - return false; 83 - return true; 81 + return __xfs_dir3_data_check(NULL, bp); 84 82 } 85 83 86 84 static void ··· 86 88 struct xfs_buf *bp) 87 89 { 88 90 struct xfs_mount *mp = bp->b_target->bt_mount; 91 + xfs_failaddr_t fa; 89 92 90 93 if (xfs_sb_version_hascrc(&mp->m_sb) && 91 94 !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) 92 - xfs_buf_ioerror(bp, -EFSBADCRC); 93 - else if (!xfs_dir3_block_verify(bp)) 94 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 95 - 96 - if (bp->b_error) 97 - xfs_verifier_error(bp); 95 + xfs_verifier_error(bp, -EFSBADCRC, __this_address); 96 + else { 97 + fa = xfs_dir3_block_verify(bp); 98 + if (fa) 99 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 100 + } 98 101 } 99 102 100 103 static void ··· 103 104 struct xfs_buf *bp) 104 105 { 105 106 struct xfs_mount *mp = bp->b_target->bt_mount; 106 - struct xfs_buf_log_item *bip = bp->b_fspriv; 107 + struct xfs_buf_log_item *bip = bp->b_log_item; 107 108 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; 109 + xfs_failaddr_t fa; 108 110 109 - if (!xfs_dir3_block_verify(bp)) { 110 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 111 - xfs_verifier_error(bp); 111 + fa = xfs_dir3_block_verify(bp); 112 + if (fa) { 113 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 112 114 return; 113 115 } 114 116 ··· 126 126 .name = "xfs_dir3_block", 127 127 .verify_read = xfs_dir3_block_read_verify, 128 128 .verify_write = xfs_dir3_block_write_verify, 129 + .verify_struct = xfs_dir3_block_verify, 129 130 }; 130 131 131 132 int

+122 -86

fs/xfs/libxfs/xfs_dir2_data.c

··· 36 36 /* 37 37 * Check the consistency of the data block. 38 38 * The input can also be a block-format directory. 39 - * Return 0 is the buffer is good, otherwise an error. 39 + * Return NULL if the buffer is good, otherwise the address of the error. 40 40 */ 41 - int 41 + xfs_failaddr_t 42 42 __xfs_dir3_data_check( 43 43 struct xfs_inode *dp, /* incore inode pointer */ 44 44 struct xfs_buf *bp) /* data block's buffer */ ··· 73 73 */ 74 74 ops = xfs_dir_get_ops(mp, dp); 75 75 76 + /* 77 + * If this isn't a directory, or we don't get handed the dir ops, 78 + * something is seriously wrong. Bail out. 79 + */ 80 + if ((dp && !S_ISDIR(VFS_I(dp)->i_mode)) || 81 + ops != xfs_dir_get_ops(mp, NULL)) 82 + return __this_address; 83 + 76 84 hdr = bp->b_addr; 77 85 p = (char *)ops->data_entry_p(hdr); 78 86 ··· 89 81 case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): 90 82 btp = xfs_dir2_block_tail_p(geo, hdr); 91 83 lep = xfs_dir2_block_leaf_p(btp); 92 - endp = (char *)lep; 93 84 94 85 /* 95 86 * The number of leaf entries is limited by the size of the ··· 97 90 * so just ensure that the count falls somewhere inside the 98 91 * block right now. 99 92 */ 100 - XFS_WANT_CORRUPTED_RETURN(mp, be32_to_cpu(btp->count) < 101 - ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry)); 93 + if (be32_to_cpu(btp->count) >= 94 + ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry)) 95 + return __this_address; 102 96 break; 103 97 case cpu_to_be32(XFS_DIR3_DATA_MAGIC): 104 98 case cpu_to_be32(XFS_DIR2_DATA_MAGIC): 105 - endp = (char *)hdr + geo->blksize; 106 99 break; 107 100 default: 108 - XFS_ERROR_REPORT("Bad Magic", XFS_ERRLEVEL_LOW, mp); 109 - return -EFSCORRUPTED; 101 + return __this_address; 110 102 } 103 + endp = xfs_dir3_data_endp(geo, hdr); 104 + if (!endp) 105 + return __this_address; 111 106 112 107 /* 113 108 * Account for zero bestfree entries. ··· 117 108 bf = ops->data_bestfree_p(hdr); 118 109 count = lastfree = freeseen = 0; 119 110 if (!bf[0].length) { 120 - XFS_WANT_CORRUPTED_RETURN(mp, !bf[0].offset); 111 + if (bf[0].offset) 112 + return __this_address; 121 113 freeseen |= 1 << 0; 122 114 } 123 115 if (!bf[1].length) { 124 - XFS_WANT_CORRUPTED_RETURN(mp, !bf[1].offset); 116 + if (bf[1].offset) 117 + return __this_address; 125 118 freeseen |= 1 << 1; 126 119 } 127 120 if (!bf[2].length) { 128 - XFS_WANT_CORRUPTED_RETURN(mp, !bf[2].offset); 121 + if (bf[2].offset) 122 + return __this_address; 129 123 freeseen |= 1 << 2; 130 124 } 131 125 132 - XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(bf[0].length) >= 133 - be16_to_cpu(bf[1].length)); 134 - XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(bf[1].length) >= 135 - be16_to_cpu(bf[2].length)); 126 + if (be16_to_cpu(bf[0].length) < be16_to_cpu(bf[1].length)) 127 + return __this_address; 128 + if (be16_to_cpu(bf[1].length) < be16_to_cpu(bf[2].length)) 129 + return __this_address; 136 130 /* 137 131 * Loop over the data/unused entries. 138 132 */ ··· 147 135 * doesn't need to be there. 148 136 */ 149 137 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { 150 - XFS_WANT_CORRUPTED_RETURN(mp, lastfree == 0); 151 - XFS_WANT_CORRUPTED_RETURN(mp, endp >= 152 - p + be16_to_cpu(dup->length)); 153 - XFS_WANT_CORRUPTED_RETURN(mp, 154 - be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) == 155 - (char *)dup - (char *)hdr); 138 + if (lastfree != 0) 139 + return __this_address; 140 + if (endp < p + be16_to_cpu(dup->length)) 141 + return __this_address; 142 + if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) != 143 + (char *)dup - (char *)hdr) 144 + return __this_address; 156 145 dfp = xfs_dir2_data_freefind(hdr, bf, dup); 157 146 if (dfp) { 158 147 i = (int)(dfp - bf); 159 - XFS_WANT_CORRUPTED_RETURN(mp, 160 - (freeseen & (1 << i)) == 0); 148 + if ((freeseen & (1 << i)) != 0) 149 + return __this_address; 161 150 freeseen |= 1 << i; 162 151 } else { 163 - XFS_WANT_CORRUPTED_RETURN(mp, 164 - be16_to_cpu(dup->length) <= 165 - be16_to_cpu(bf[2].length)); 152 + if (be16_to_cpu(dup->length) > 153 + be16_to_cpu(bf[2].length)) 154 + return __this_address; 166 155 } 167 156 p += be16_to_cpu(dup->length); 168 157 lastfree = 1; ··· 176 163 * The linear search is crude but this is DEBUG code. 177 164 */ 178 165 dep = (xfs_dir2_data_entry_t *)p; 179 - XFS_WANT_CORRUPTED_RETURN(mp, dep->namelen != 0); 180 - XFS_WANT_CORRUPTED_RETURN(mp, 181 - !xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber))); 182 - XFS_WANT_CORRUPTED_RETURN(mp, endp >= 183 - p + ops->data_entsize(dep->namelen)); 184 - XFS_WANT_CORRUPTED_RETURN(mp, 185 - be16_to_cpu(*ops->data_entry_tag_p(dep)) == 186 - (char *)dep - (char *)hdr); 187 - XFS_WANT_CORRUPTED_RETURN(mp, 188 - ops->data_get_ftype(dep) < XFS_DIR3_FT_MAX); 166 + if (dep->namelen == 0) 167 + return __this_address; 168 + if (xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber))) 169 + return __this_address; 170 + if (endp < p + ops->data_entsize(dep->namelen)) 171 + return __this_address; 172 + if (be16_to_cpu(*ops->data_entry_tag_p(dep)) != 173 + (char *)dep - (char *)hdr) 174 + return __this_address; 175 + if (ops->data_get_ftype(dep) >= XFS_DIR3_FT_MAX) 176 + return __this_address; 189 177 count++; 190 178 lastfree = 0; 191 179 if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || ··· 202 188 be32_to_cpu(lep[i].hashval) == hash) 203 189 break; 204 190 } 205 - XFS_WANT_CORRUPTED_RETURN(mp, 206 - i < be32_to_cpu(btp->count)); 191 + if (i >= be32_to_cpu(btp->count)) 192 + return __this_address; 207 193 } 208 194 p += ops->data_entsize(dep->namelen); 209 195 } 210 196 /* 211 197 * Need to have seen all the entries and all the bestfree slots. 212 198 */ 213 - XFS_WANT_CORRUPTED_RETURN(mp, freeseen == 7); 199 + if (freeseen != 7) 200 + return __this_address; 214 201 if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || 215 202 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { 216 203 for (i = stale = 0; i < be32_to_cpu(btp->count); i++) { 217 204 if (lep[i].address == 218 205 cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) 219 206 stale++; 220 - if (i > 0) 221 - XFS_WANT_CORRUPTED_RETURN(mp, 222 - be32_to_cpu(lep[i].hashval) >= 223 - be32_to_cpu(lep[i - 1].hashval)); 207 + if (i > 0 && be32_to_cpu(lep[i].hashval) < 208 + be32_to_cpu(lep[i - 1].hashval)) 209 + return __this_address; 224 210 } 225 - XFS_WANT_CORRUPTED_RETURN(mp, count == 226 - be32_to_cpu(btp->count) - be32_to_cpu(btp->stale)); 227 - XFS_WANT_CORRUPTED_RETURN(mp, stale == be32_to_cpu(btp->stale)); 211 + if (count != be32_to_cpu(btp->count) - be32_to_cpu(btp->stale)) 212 + return __this_address; 213 + if (stale != be32_to_cpu(btp->stale)) 214 + return __this_address; 228 215 } 229 - return 0; 216 + return NULL; 230 217 } 231 218 232 - static bool 219 + #ifdef DEBUG 220 + void 221 + xfs_dir3_data_check( 222 + struct xfs_inode *dp, 223 + struct xfs_buf *bp) 224 + { 225 + xfs_failaddr_t fa; 226 + 227 + fa = __xfs_dir3_data_check(dp, bp); 228 + if (!fa) 229 + return; 230 + xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, dp->i_mount, 231 + bp->b_addr, __FILE__, __LINE__, fa); 232 + ASSERT(0); 233 + } 234 + #endif 235 + 236 + static xfs_failaddr_t 233 237 xfs_dir3_data_verify( 234 238 struct xfs_buf *bp) 235 239 { ··· 256 224 257 225 if (xfs_sb_version_hascrc(&mp->m_sb)) { 258 226 if (hdr3->magic != cpu_to_be32(XFS_DIR3_DATA_MAGIC)) 259 - return false; 227 + return __this_address; 260 228 if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid)) 261 - return false; 229 + return __this_address; 262 230 if (be64_to_cpu(hdr3->blkno) != bp->b_bn) 263 - return false; 231 + return __this_address; 264 232 if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) 265 - return false; 233 + return __this_address; 266 234 } else { 267 235 if (hdr3->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC)) 268 - return false; 236 + return __this_address; 269 237 } 270 - if (__xfs_dir3_data_check(NULL, bp)) 271 - return false; 272 - return true; 238 + return __xfs_dir3_data_check(NULL, bp); 273 239 } 274 240 275 241 /* ··· 293 263 bp->b_ops->verify_read(bp); 294 264 return; 295 265 default: 296 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 297 - xfs_verifier_error(bp); 266 + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); 298 267 break; 299 268 } 300 269 } ··· 303 274 struct xfs_buf *bp) 304 275 { 305 276 struct xfs_mount *mp = bp->b_target->bt_mount; 277 + xfs_failaddr_t fa; 306 278 307 279 if (xfs_sb_version_hascrc(&mp->m_sb) && 308 - !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) 309 - xfs_buf_ioerror(bp, -EFSBADCRC); 310 - else if (!xfs_dir3_data_verify(bp)) 311 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 312 - 313 - if (bp->b_error) 314 - xfs_verifier_error(bp); 280 + !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) 281 + xfs_verifier_error(bp, -EFSBADCRC, __this_address); 282 + else { 283 + fa = xfs_dir3_data_verify(bp); 284 + if (fa) 285 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 286 + } 315 287 } 316 288 317 289 static void ··· 320 290 struct xfs_buf *bp) 321 291 { 322 292 struct xfs_mount *mp = bp->b_target->bt_mount; 323 - struct xfs_buf_log_item *bip = bp->b_fspriv; 293 + struct xfs_buf_log_item *bip = bp->b_log_item; 324 294 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; 295 + xfs_failaddr_t fa; 325 296 326 - if (!xfs_dir3_data_verify(bp)) { 327 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 328 - xfs_verifier_error(bp); 297 + fa = xfs_dir3_data_verify(bp); 298 + if (fa) { 299 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 329 300 return; 330 301 } 331 302 ··· 343 312 .name = "xfs_dir3_data", 344 313 .verify_read = xfs_dir3_data_read_verify, 345 314 .verify_write = xfs_dir3_data_write_verify, 315 + .verify_struct = xfs_dir3_data_verify, 346 316 }; 347 317 348 318 static const struct xfs_buf_ops xfs_dir3_data_reada_buf_ops = { ··· 547 515 struct xfs_dir2_data_hdr *hdr, 548 516 int *loghead) 549 517 { 550 - xfs_dir2_block_tail_t *btp; /* block tail */ 551 518 xfs_dir2_data_entry_t *dep; /* active data entry */ 552 519 xfs_dir2_data_unused_t *dup; /* unused data entry */ 553 520 struct xfs_dir2_data_free *bf; ··· 568 537 * Set up pointers. 569 538 */ 570 539 p = (char *)ops->data_entry_p(hdr); 571 - if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || 572 - hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { 573 - btp = xfs_dir2_block_tail_p(geo, hdr); 574 - endp = (char *)xfs_dir2_block_leaf_p(btp); 575 - } else 576 - endp = (char *)hdr + geo->blksize; 540 + endp = xfs_dir3_data_endp(geo, hdr); 577 541 /* 578 542 * Loop over the block's entries. 579 543 */ ··· 781 755 /* 782 756 * Figure out where the end of the data area is. 783 757 */ 784 - if (hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || 785 - hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC)) 786 - endptr = (char *)hdr + args->geo->blksize; 787 - else { 788 - xfs_dir2_block_tail_t *btp; /* block tail */ 758 + endptr = xfs_dir3_data_endp(args->geo, hdr); 759 + ASSERT(endptr != NULL); 789 760 790 - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || 791 - hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); 792 - btp = xfs_dir2_block_tail_p(args->geo, hdr); 793 - endptr = (char *)xfs_dir2_block_leaf_p(btp); 794 - } 795 761 /* 796 762 * If this isn't the start of the block, then back up to 797 763 * the previous entry and see if it's free. ··· 1084 1066 } 1085 1067 } 1086 1068 *needscanp = needscan; 1069 + } 1070 + 1071 + /* Find the end of the entry data in a data/block format dir block. */ 1072 + void * 1073 + xfs_dir3_data_endp( 1074 + struct xfs_da_geometry *geo, 1075 + struct xfs_dir2_data_hdr *hdr) 1076 + { 1077 + switch (hdr->magic) { 1078 + case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC): 1079 + case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): 1080 + return xfs_dir2_block_leaf_p(xfs_dir2_block_tail_p(geo, hdr)); 1081 + case cpu_to_be32(XFS_DIR3_DATA_MAGIC): 1082 + case cpu_to_be32(XFS_DIR2_DATA_MAGIC): 1083 + return (char *)hdr + geo->blksize; 1084 + default: 1085 + return NULL; 1086 + } 1087 1087 }

+58 -31

fs/xfs/libxfs/xfs_dir2_leaf.c

··· 50 50 * Pop an assert if something is wrong. 51 51 */ 52 52 #ifdef DEBUG 53 - #define xfs_dir3_leaf_check(dp, bp) \ 54 - do { \ 55 - if (!xfs_dir3_leaf1_check((dp), (bp))) \ 56 - ASSERT(0); \ 57 - } while (0); 58 - 59 - STATIC bool 53 + static xfs_failaddr_t 60 54 xfs_dir3_leaf1_check( 61 55 struct xfs_inode *dp, 62 56 struct xfs_buf *bp) ··· 63 69 if (leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) { 64 70 struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; 65 71 if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) 66 - return false; 72 + return __this_address; 67 73 } else if (leafhdr.magic != XFS_DIR2_LEAF1_MAGIC) 68 - return false; 74 + return __this_address; 69 75 70 76 return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf); 77 + } 78 + 79 + static inline void 80 + xfs_dir3_leaf_check( 81 + struct xfs_inode *dp, 82 + struct xfs_buf *bp) 83 + { 84 + xfs_failaddr_t fa; 85 + 86 + fa = xfs_dir3_leaf1_check(dp, bp); 87 + if (!fa) 88 + return; 89 + xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, dp->i_mount, 90 + bp->b_addr, __FILE__, __LINE__, fa); 91 + ASSERT(0); 71 92 } 72 93 #else 73 94 #define xfs_dir3_leaf_check(dp, bp) 74 95 #endif 75 96 76 - bool 97 + xfs_failaddr_t 77 98 xfs_dir3_leaf_check_int( 78 99 struct xfs_mount *mp, 79 100 struct xfs_inode *dp, ··· 123 114 * We can deduce a value for that from di_size. 124 115 */ 125 116 if (hdr->count > ops->leaf_max_ents(geo)) 126 - return false; 117 + return __this_address; 127 118 128 119 /* Leaves and bests don't overlap in leaf format. */ 129 120 if ((hdr->magic == XFS_DIR2_LEAF1_MAGIC || 130 121 hdr->magic == XFS_DIR3_LEAF1_MAGIC) && 131 122 (char *)&ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp)) 132 - return false; 123 + return __this_address; 133 124 134 125 /* Check hash value order, count stale entries. */ 135 126 for (i = stale = 0; i < hdr->count; i++) { 136 127 if (i + 1 < hdr->count) { 137 128 if (be32_to_cpu(ents[i].hashval) > 138 129 be32_to_cpu(ents[i + 1].hashval)) 139 - return false; 130 + return __this_address; 140 131 } 141 132 if (ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) 142 133 stale++; 143 134 } 144 135 if (hdr->stale != stale) 145 - return false; 146 - return true; 136 + return __this_address; 137 + return NULL; 147 138 } 148 139 149 140 /* ··· 151 142 * kernels we don't get assertion failures in xfs_dir3_leaf_hdr_from_disk() due 152 143 * to incorrect magic numbers. 153 144 */ 154 - static bool 145 + static xfs_failaddr_t 155 146 xfs_dir3_leaf_verify( 156 147 struct xfs_buf *bp, 157 148 uint16_t magic) ··· 169 160 : XFS_DIR3_LEAFN_MAGIC; 170 161 171 162 if (leaf3->info.hdr.magic != cpu_to_be16(magic3)) 172 - return false; 163 + return __this_address; 173 164 if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_meta_uuid)) 174 - return false; 165 + return __this_address; 175 166 if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) 176 - return false; 167 + return __this_address; 177 168 if (!xfs_log_check_lsn(mp, be64_to_cpu(leaf3->info.lsn))) 178 - return false; 169 + return __this_address; 179 170 } else { 180 171 if (leaf->hdr.info.magic != cpu_to_be16(magic)) 181 - return false; 172 + return __this_address; 182 173 } 183 174 184 175 return xfs_dir3_leaf_check_int(mp, NULL, NULL, leaf); ··· 190 181 uint16_t magic) 191 182 { 192 183 struct xfs_mount *mp = bp->b_target->bt_mount; 184 + xfs_failaddr_t fa; 193 185 194 186 if (xfs_sb_version_hascrc(&mp->m_sb) && 195 187 !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF)) 196 - xfs_buf_ioerror(bp, -EFSBADCRC); 197 - else if (!xfs_dir3_leaf_verify(bp, magic)) 198 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 199 - 200 - if (bp->b_error) 201 - xfs_verifier_error(bp); 188 + xfs_verifier_error(bp, -EFSBADCRC, __this_address); 189 + else { 190 + fa = xfs_dir3_leaf_verify(bp, magic); 191 + if (fa) 192 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 193 + } 202 194 } 203 195 204 196 static void ··· 208 198 uint16_t magic) 209 199 { 210 200 struct xfs_mount *mp = bp->b_target->bt_mount; 211 - struct xfs_buf_log_item *bip = bp->b_fspriv; 201 + struct xfs_buf_log_item *bip = bp->b_log_item; 212 202 struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr; 203 + xfs_failaddr_t fa; 213 204 214 - if (!xfs_dir3_leaf_verify(bp, magic)) { 215 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 216 - xfs_verifier_error(bp); 205 + fa = xfs_dir3_leaf_verify(bp, magic); 206 + if (fa) { 207 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 217 208 return; 218 209 } 219 210 ··· 225 214 hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); 226 215 227 216 xfs_buf_update_cksum(bp, XFS_DIR3_LEAF_CRC_OFF); 217 + } 218 + 219 + static xfs_failaddr_t 220 + xfs_dir3_leaf1_verify( 221 + struct xfs_buf *bp) 222 + { 223 + return xfs_dir3_leaf_verify(bp, XFS_DIR2_LEAF1_MAGIC); 228 224 } 229 225 230 226 static void ··· 246 228 struct xfs_buf *bp) 247 229 { 248 230 __write_verify(bp, XFS_DIR2_LEAF1_MAGIC); 231 + } 232 + 233 + static xfs_failaddr_t 234 + xfs_dir3_leafn_verify( 235 + struct xfs_buf *bp) 236 + { 237 + return xfs_dir3_leaf_verify(bp, XFS_DIR2_LEAFN_MAGIC); 249 238 } 250 239 251 240 static void ··· 273 248 .name = "xfs_dir3_leaf1", 274 249 .verify_read = xfs_dir3_leaf1_read_verify, 275 250 .verify_write = xfs_dir3_leaf1_write_verify, 251 + .verify_struct = xfs_dir3_leaf1_verify, 276 252 }; 277 253 278 254 const struct xfs_buf_ops xfs_dir3_leafn_buf_ops = { 279 255 .name = "xfs_dir3_leafn", 280 256 .verify_read = xfs_dir3_leafn_read_verify, 281 257 .verify_write = xfs_dir3_leafn_write_verify, 258 + .verify_struct = xfs_dir3_leafn_verify, 282 259 }; 283 260 284 261 int

+51 -38

fs/xfs/libxfs/xfs_dir2_node.c

··· 53 53 * Check internal consistency of a leafn block. 54 54 */ 55 55 #ifdef DEBUG 56 - #define xfs_dir3_leaf_check(dp, bp) \ 57 - do { \ 58 - if (!xfs_dir3_leafn_check((dp), (bp))) \ 59 - ASSERT(0); \ 60 - } while (0); 61 - 62 - static bool 56 + static xfs_failaddr_t 63 57 xfs_dir3_leafn_check( 64 58 struct xfs_inode *dp, 65 59 struct xfs_buf *bp) ··· 66 72 if (leafhdr.magic == XFS_DIR3_LEAFN_MAGIC) { 67 73 struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; 68 74 if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) 69 - return false; 75 + return __this_address; 70 76 } else if (leafhdr.magic != XFS_DIR2_LEAFN_MAGIC) 71 - return false; 77 + return __this_address; 72 78 73 79 return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf); 80 + } 81 + 82 + static inline void 83 + xfs_dir3_leaf_check( 84 + struct xfs_inode *dp, 85 + struct xfs_buf *bp) 86 + { 87 + xfs_failaddr_t fa; 88 + 89 + fa = xfs_dir3_leafn_check(dp, bp); 90 + if (!fa) 91 + return; 92 + xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, dp->i_mount, 93 + bp->b_addr, __FILE__, __LINE__, fa); 94 + ASSERT(0); 74 95 } 75 96 #else 76 97 #define xfs_dir3_leaf_check(dp, bp) 77 98 #endif 78 99 79 - static bool 100 + static xfs_failaddr_t 80 101 xfs_dir3_free_verify( 81 102 struct xfs_buf *bp) 82 103 { ··· 102 93 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; 103 94 104 95 if (hdr3->magic != cpu_to_be32(XFS_DIR3_FREE_MAGIC)) 105 - return false; 96 + return __this_address; 106 97 if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid)) 107 - return false; 98 + return __this_address; 108 99 if (be64_to_cpu(hdr3->blkno) != bp->b_bn) 109 - return false; 100 + return __this_address; 110 101 if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) 111 - return false; 102 + return __this_address; 112 103 } else { 113 104 if (hdr->magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC)) 114 - return false; 105 + return __this_address; 115 106 } 116 107 117 108 /* XXX: should bounds check the xfs_dir3_icfree_hdr here */ 118 109 119 - return true; 110 + return NULL; 120 111 } 121 112 122 113 static void ··· 124 115 struct xfs_buf *bp) 125 116 { 126 117 struct xfs_mount *mp = bp->b_target->bt_mount; 118 + xfs_failaddr_t fa; 127 119 128 120 if (xfs_sb_version_hascrc(&mp->m_sb) && 129 121 !xfs_buf_verify_cksum(bp, XFS_DIR3_FREE_CRC_OFF)) 130 - xfs_buf_ioerror(bp, -EFSBADCRC); 131 - else if (!xfs_dir3_free_verify(bp)) 132 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 133 - 134 - if (bp->b_error) 135 - xfs_verifier_error(bp); 122 + xfs_verifier_error(bp, -EFSBADCRC, __this_address); 123 + else { 124 + fa = xfs_dir3_free_verify(bp); 125 + if (fa) 126 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 127 + } 136 128 } 137 129 138 130 static void ··· 141 131 struct xfs_buf *bp) 142 132 { 143 133 struct xfs_mount *mp = bp->b_target->bt_mount; 144 - struct xfs_buf_log_item *bip = bp->b_fspriv; 134 + struct xfs_buf_log_item *bip = bp->b_log_item; 145 135 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; 136 + xfs_failaddr_t fa; 146 137 147 - if (!xfs_dir3_free_verify(bp)) { 148 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 149 - xfs_verifier_error(bp); 138 + fa = xfs_dir3_free_verify(bp); 139 + if (fa) { 140 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 150 141 return; 151 142 } 152 143 ··· 164 153 .name = "xfs_dir3_free", 165 154 .verify_read = xfs_dir3_free_read_verify, 166 155 .verify_write = xfs_dir3_free_write_verify, 156 + .verify_struct = xfs_dir3_free_verify, 167 157 }; 168 158 169 159 /* Everything ok in the free block header? */ 170 - static bool 160 + static xfs_failaddr_t 171 161 xfs_dir3_free_header_check( 172 162 struct xfs_inode *dp, 173 163 xfs_dablk_t fbno, ··· 186 174 struct xfs_dir3_free_hdr *hdr3 = bp->b_addr; 187 175 188 176 if (be32_to_cpu(hdr3->firstdb) != firstdb) 189 - return false; 177 + return __this_address; 190 178 if (be32_to_cpu(hdr3->nvalid) > maxbests) 191 - return false; 179 + return __this_address; 192 180 if (be32_to_cpu(hdr3->nvalid) < be32_to_cpu(hdr3->nused)) 193 - return false; 181 + return __this_address; 194 182 } else { 195 183 struct xfs_dir2_free_hdr *hdr = bp->b_addr; 196 184 197 185 if (be32_to_cpu(hdr->firstdb) != firstdb) 198 - return false; 186 + return __this_address; 199 187 if (be32_to_cpu(hdr->nvalid) > maxbests) 200 - return false; 188 + return __this_address; 201 189 if (be32_to_cpu(hdr->nvalid) < be32_to_cpu(hdr->nused)) 202 - return false; 190 + return __this_address; 203 191 } 204 - return true; 192 + return NULL; 205 193 } 206 194 207 195 static int ··· 212 200 xfs_daddr_t mappedbno, 213 201 struct xfs_buf **bpp) 214 202 { 203 + xfs_failaddr_t fa; 215 204 int err; 216 205 217 206 err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, ··· 221 208 return err; 222 209 223 210 /* Check things that we can't do in the verifier. */ 224 - if (!xfs_dir3_free_header_check(dp, fbno, *bpp)) { 225 - xfs_buf_ioerror(*bpp, -EFSCORRUPTED); 226 - xfs_verifier_error(*bpp); 211 + fa = xfs_dir3_free_header_check(dp, fbno, *bpp); 212 + if (fa) { 213 + xfs_verifier_error(*bpp, -EFSCORRUPTED, fa); 227 214 xfs_trans_brelse(tp, *bpp); 228 215 return -EFSCORRUPTED; 229 216 } ··· 1919 1906 (unsigned long long)ifbno, lastfbno); 1920 1907 if (fblk) { 1921 1908 xfs_alert(mp, 1922 - " fblk 0x%p blkno %llu index %d magic 0x%x", 1909 + " fblk "PTR_FMT" blkno %llu index %d magic 0x%x", 1923 1910 fblk, 1924 1911 (unsigned long long)fblk->blkno, 1925 1912 fblk->index,

+7 -5

fs/xfs/libxfs/xfs_dir2_priv.h

··· 39 39 40 40 /* xfs_dir2_data.c */ 41 41 #ifdef DEBUG 42 - #define xfs_dir3_data_check(dp,bp) __xfs_dir3_data_check(dp, bp); 42 + extern void xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp); 43 43 #else 44 44 #define xfs_dir3_data_check(dp,bp) 45 45 #endif 46 46 47 - extern int __xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp); 47 + extern xfs_failaddr_t __xfs_dir3_data_check(struct xfs_inode *dp, 48 + struct xfs_buf *bp); 48 49 extern int xfs_dir3_data_read(struct xfs_trans *tp, struct xfs_inode *dp, 49 50 xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp); 50 51 extern int xfs_dir3_data_readahead(struct xfs_inode *dp, xfs_dablk_t bno, ··· 90 89 int lowstale, int highstale, int *lfloglow, int *lfloghigh); 91 90 extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state); 92 91 93 - extern bool xfs_dir3_leaf_check_int(struct xfs_mount *mp, struct xfs_inode *dp, 94 - struct xfs_dir3_icleaf_hdr *hdr, struct xfs_dir2_leaf *leaf); 92 + extern xfs_failaddr_t xfs_dir3_leaf_check_int(struct xfs_mount *mp, 93 + struct xfs_inode *dp, struct xfs_dir3_icleaf_hdr *hdr, 94 + struct xfs_dir2_leaf *leaf); 95 95 96 96 /* xfs_dir2_node.c */ 97 97 extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args, ··· 129 127 extern int xfs_dir2_sf_lookup(struct xfs_da_args *args); 130 128 extern int xfs_dir2_sf_removename(struct xfs_da_args *args); 131 129 extern int xfs_dir2_sf_replace(struct xfs_da_args *args); 132 - extern int xfs_dir2_sf_verify(struct xfs_inode *ip); 130 + extern xfs_failaddr_t xfs_dir2_sf_verify(struct xfs_inode *ip); 133 131 134 132 /* xfs_dir2_readdir.c */ 135 133 extern int xfs_readdir(struct xfs_trans *tp, struct xfs_inode *dp,

+14 -16

fs/xfs/libxfs/xfs_dir2_sf.c

··· 156 156 xfs_dir2_sf_hdr_t *sfhp) /* shortform directory hdr */ 157 157 { 158 158 xfs_dir2_data_hdr_t *hdr; /* block header */ 159 - xfs_dir2_block_tail_t *btp; /* block tail pointer */ 160 159 xfs_dir2_data_entry_t *dep; /* data entry pointer */ 161 160 xfs_inode_t *dp; /* incore directory inode */ 162 161 xfs_dir2_data_unused_t *dup; /* unused data pointer */ ··· 191 192 /* 192 193 * Set up to loop over the block's entries. 193 194 */ 194 - btp = xfs_dir2_block_tail_p(args->geo, hdr); 195 195 ptr = (char *)dp->d_ops->data_entry_p(hdr); 196 - endptr = (char *)xfs_dir2_block_leaf_p(btp); 196 + endptr = xfs_dir3_data_endp(args->geo, hdr); 197 197 sfep = xfs_dir2_sf_firstentry(sfp); 198 198 /* 199 199 * Loop over the active and unused entries. ··· 628 630 #endif /* DEBUG */ 629 631 630 632 /* Verify the consistency of an inline directory. */ 631 - int 633 + xfs_failaddr_t 632 634 xfs_dir2_sf_verify( 633 635 struct xfs_inode *ip) 634 636 { ··· 663 665 */ 664 666 if (size <= offsetof(struct xfs_dir2_sf_hdr, parent) || 665 667 size < xfs_dir2_sf_hdr_size(sfp->i8count)) 666 - return -EFSCORRUPTED; 668 + return __this_address; 667 669 668 670 endp = (char *)sfp + size; 669 671 ··· 672 674 i8count = ino > XFS_DIR2_MAX_SHORT_INUM; 673 675 error = xfs_dir_ino_validate(mp, ino); 674 676 if (error) 675 - return error; 677 + return __this_address; 676 678 offset = dops->data_first_offset; 677 679 678 680 /* Check all reported entries */ ··· 684 686 * within the data buffer. 685 687 */ 686 688 if (((char *)sfep + sizeof(*sfep)) >= endp) 687 - return -EFSCORRUPTED; 689 + return __this_address; 688 690 689 691 /* Don't allow names with known bad length. */ 690 692 if (sfep->namelen == 0) 691 - return -EFSCORRUPTED; 693 + return __this_address; 692 694 693 695 /* 694 696 * Check that the variable-length part of the structure is ··· 697 699 */ 698 700 next_sfep = dops->sf_nextentry(sfp, sfep); 699 701 if (endp < (char *)next_sfep) 700 - return -EFSCORRUPTED; 702 + return __this_address; 701 703 702 704 /* Check that the offsets always increase. */ 703 705 if (xfs_dir2_sf_get_offset(sfep) < offset) 704 - return -EFSCORRUPTED; 706 + return __this_address; 705 707 706 708 /* Check the inode number. */ 707 709 ino = dops->sf_get_ino(sfp, sfep); 708 710 i8count += ino > XFS_DIR2_MAX_SHORT_INUM; 709 711 error = xfs_dir_ino_validate(mp, ino); 710 712 if (error) 711 - return error; 713 + return __this_address; 712 714 713 715 /* Check the file type. */ 714 716 filetype = dops->sf_get_ftype(sfep); 715 717 if (filetype >= XFS_DIR3_FT_MAX) 716 - return -EFSCORRUPTED; 718 + return __this_address; 717 719 718 720 offset = xfs_dir2_sf_get_offset(sfep) + 719 721 dops->data_entsize(sfep->namelen); ··· 721 723 sfep = next_sfep; 722 724 } 723 725 if (i8count != sfp->i8count) 724 - return -EFSCORRUPTED; 726 + return __this_address; 725 727 if ((void *)sfep != (void *)endp) 726 - return -EFSCORRUPTED; 728 + return __this_address; 727 729 728 730 /* Make sure this whole thing ought to be in local format. */ 729 731 if (offset + (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) + 730 732 (uint)sizeof(xfs_dir2_block_tail_t) > mp->m_dir_geo->blksize) 731 - return -EFSCORRUPTED; 733 + return __this_address; 732 734 733 - return 0; 735 + return NULL; 734 736 } 735 737 736 738 /*

+75 -99

fs/xfs/libxfs/xfs_dquot_buf.c

··· 42 42 /* 43 43 * Do some primitive error checking on ondisk dquot data structures. 44 44 */ 45 - int 46 - xfs_dqcheck( 45 + xfs_failaddr_t 46 + xfs_dquot_verify( 47 47 struct xfs_mount *mp, 48 48 xfs_disk_dquot_t *ddq, 49 49 xfs_dqid_t id, 50 50 uint type, /* used only when IO_dorepair is true */ 51 - uint flags, 52 - const char *str) 51 + uint flags) 53 52 { 54 - xfs_dqblk_t *d = (xfs_dqblk_t *)ddq; 55 - int errs = 0; 56 - 57 53 /* 58 54 * We can encounter an uninitialized dquot buffer for 2 reasons: 59 55 * 1. If we crash while deleting the quotainode(s), and those blks got ··· 65 69 * This is all fine; things are still consistent, and we haven't lost 66 70 * any quota information. Just don't complain about bad dquot blks. 67 71 */ 68 - if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) { 69 - if (flags & XFS_QMOPT_DOWARN) 70 - xfs_alert(mp, 71 - "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", 72 - str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC); 73 - errs++; 74 - } 75 - if (ddq->d_version != XFS_DQUOT_VERSION) { 76 - if (flags & XFS_QMOPT_DOWARN) 77 - xfs_alert(mp, 78 - "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x", 79 - str, id, ddq->d_version, XFS_DQUOT_VERSION); 80 - errs++; 81 - } 72 + if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) 73 + return __this_address; 74 + if (ddq->d_version != XFS_DQUOT_VERSION) 75 + return __this_address; 82 76 83 77 if (ddq->d_flags != XFS_DQ_USER && 84 78 ddq->d_flags != XFS_DQ_PROJ && 85 - ddq->d_flags != XFS_DQ_GROUP) { 86 - if (flags & XFS_QMOPT_DOWARN) 87 - xfs_alert(mp, 88 - "%s : XFS dquot ID 0x%x, unknown flags 0x%x", 89 - str, id, ddq->d_flags); 90 - errs++; 91 - } 79 + ddq->d_flags != XFS_DQ_GROUP) 80 + return __this_address; 92 81 93 - if (id != -1 && id != be32_to_cpu(ddq->d_id)) { 94 - if (flags & XFS_QMOPT_DOWARN) 95 - xfs_alert(mp, 96 - "%s : ondisk-dquot 0x%p, ID mismatch: " 97 - "0x%x expected, found id 0x%x", 98 - str, ddq, id, be32_to_cpu(ddq->d_id)); 99 - errs++; 100 - } 82 + if (id != -1 && id != be32_to_cpu(ddq->d_id)) 83 + return __this_address; 101 84 102 - if (!errs && ddq->d_id) { 103 - if (ddq->d_blk_softlimit && 104 - be64_to_cpu(ddq->d_bcount) > 105 - be64_to_cpu(ddq->d_blk_softlimit)) { 106 - if (!ddq->d_btimer) { 107 - if (flags & XFS_QMOPT_DOWARN) 108 - xfs_alert(mp, 109 - "%s : Dquot ID 0x%x (0x%p) BLK TIMER NOT STARTED", 110 - str, (int)be32_to_cpu(ddq->d_id), ddq); 111 - errs++; 112 - } 113 - } 114 - if (ddq->d_ino_softlimit && 115 - be64_to_cpu(ddq->d_icount) > 116 - be64_to_cpu(ddq->d_ino_softlimit)) { 117 - if (!ddq->d_itimer) { 118 - if (flags & XFS_QMOPT_DOWARN) 119 - xfs_alert(mp, 120 - "%s : Dquot ID 0x%x (0x%p) INODE TIMER NOT STARTED", 121 - str, (int)be32_to_cpu(ddq->d_id), ddq); 122 - errs++; 123 - } 124 - } 125 - if (ddq->d_rtb_softlimit && 126 - be64_to_cpu(ddq->d_rtbcount) > 127 - be64_to_cpu(ddq->d_rtb_softlimit)) { 128 - if (!ddq->d_rtbtimer) { 129 - if (flags & XFS_QMOPT_DOWARN) 130 - xfs_alert(mp, 131 - "%s : Dquot ID 0x%x (0x%p) RTBLK TIMER NOT STARTED", 132 - str, (int)be32_to_cpu(ddq->d_id), ddq); 133 - errs++; 134 - } 135 - } 136 - } 85 + if (!ddq->d_id) 86 + return NULL; 137 87 138 - if (!errs || !(flags & XFS_QMOPT_DQREPAIR)) 139 - return errs; 88 + if (ddq->d_blk_softlimit && 89 + be64_to_cpu(ddq->d_bcount) > be64_to_cpu(ddq->d_blk_softlimit) && 90 + !ddq->d_btimer) 91 + return __this_address; 140 92 141 - if (flags & XFS_QMOPT_DOWARN) 142 - xfs_notice(mp, "Re-initializing dquot ID 0x%x", id); 93 + if (ddq->d_ino_softlimit && 94 + be64_to_cpu(ddq->d_icount) > be64_to_cpu(ddq->d_ino_softlimit) && 95 + !ddq->d_itimer) 96 + return __this_address; 97 + 98 + if (ddq->d_rtb_softlimit && 99 + be64_to_cpu(ddq->d_rtbcount) > be64_to_cpu(ddq->d_rtb_softlimit) && 100 + !ddq->d_rtbtimer) 101 + return __this_address; 102 + 103 + return NULL; 104 + } 105 + 106 + /* 107 + * Do some primitive error checking on ondisk dquot data structures. 108 + */ 109 + int 110 + xfs_dquot_repair( 111 + struct xfs_mount *mp, 112 + struct xfs_disk_dquot *ddq, 113 + xfs_dqid_t id, 114 + uint type) 115 + { 116 + struct xfs_dqblk *d = (struct xfs_dqblk *)ddq; 117 + 143 118 144 119 /* 145 120 * Typically, a repair is only requested by quotacheck. 146 121 */ 147 122 ASSERT(id != -1); 148 - ASSERT(flags & XFS_QMOPT_DQREPAIR); 149 123 memset(d, 0, sizeof(xfs_dqblk_t)); 150 124 151 125 d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); ··· 129 163 XFS_DQUOT_CRC_OFF); 130 164 } 131 165 132 - return errs; 166 + return 0; 133 167 } 134 168 135 169 STATIC bool ··· 164 198 return true; 165 199 } 166 200 167 - STATIC bool 201 + STATIC xfs_failaddr_t 168 202 xfs_dquot_buf_verify( 169 203 struct xfs_mount *mp, 170 - struct xfs_buf *bp, 171 - int warn) 204 + struct xfs_buf *bp) 172 205 { 173 206 struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr; 207 + xfs_failaddr_t fa; 174 208 xfs_dqid_t id = 0; 175 209 int ndquots; 176 210 int i; ··· 194 228 */ 195 229 for (i = 0; i < ndquots; i++) { 196 230 struct xfs_disk_dquot *ddq; 197 - int error; 198 231 199 232 ddq = &d[i].dd_diskdq; 200 233 201 234 if (i == 0) 202 235 id = be32_to_cpu(ddq->d_id); 203 236 204 - error = xfs_dqcheck(mp, ddq, id + i, 0, warn, __func__); 205 - if (error) 206 - return false; 237 + fa = xfs_dquot_verify(mp, ddq, id + i, 0, 0); 238 + if (fa) 239 + return fa; 207 240 } 208 - return true; 241 + 242 + return NULL; 243 + } 244 + 245 + static xfs_failaddr_t 246 + xfs_dquot_buf_verify_struct( 247 + struct xfs_buf *bp) 248 + { 249 + struct xfs_mount *mp = bp->b_target->bt_mount; 250 + 251 + return xfs_dquot_buf_verify(mp, bp); 209 252 } 210 253 211 254 static void 212 255 xfs_dquot_buf_read_verify( 213 - struct xfs_buf *bp) 256 + struct xfs_buf *bp) 214 257 { 215 258 struct xfs_mount *mp = bp->b_target->bt_mount; 259 + xfs_failaddr_t fa; 216 260 217 261 if (!xfs_dquot_buf_verify_crc(mp, bp)) 218 - xfs_buf_ioerror(bp, -EFSBADCRC); 219 - else if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN)) 220 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 221 - 222 - if (bp->b_error) 223 - xfs_verifier_error(bp); 262 + xfs_verifier_error(bp, -EFSBADCRC, __this_address); 263 + else { 264 + fa = xfs_dquot_buf_verify(mp, bp); 265 + if (fa) 266 + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); 267 + } 224 268 } 225 269 226 270 /* ··· 246 270 struct xfs_mount *mp = bp->b_target->bt_mount; 247 271 248 272 if (!xfs_dquot_buf_verify_crc(mp, bp) || 249 - !xfs_dquot_buf_verify(mp, bp, 0)) { 273 + xfs_dquot_buf_verify(mp, bp) != NULL) { 250 274 xfs_buf_ioerror(bp, -EIO); 251 275 bp->b_flags &= ~XBF_DONE; 252 276 } ··· 259 283 */ 260 284 static void 261 285 xfs_dquot_buf_write_verify( 262 - struct xfs_buf *bp) 286 + struct xfs_buf *bp) 263 287 { 264 288 struct xfs_mount *mp = bp->b_target->bt_mount; 289 + xfs_failaddr_t fa; 265 290 266 - if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN)) { 267 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 268 - xfs_verifier_error(bp); 269 - return; 270 - } 291 + fa = xfs_dquot_buf_verify(mp, bp); 292 + if (fa) 293 + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); 271 294 } 272 295 273 296 const struct xfs_buf_ops xfs_dquot_buf_ops = { 274 297 .name = "xfs_dquot", 275 298 .verify_read = xfs_dquot_buf_read_verify, 276 299 .verify_write = xfs_dquot_buf_write_verify, 300 + .verify_struct = xfs_dquot_buf_verify_struct, 277 301 }; 278 302 279 303 const struct xfs_buf_ops xfs_dquot_buf_ra_ops = {

+7

fs/xfs/libxfs/xfs_fs.h

··· 233 233 #define XFS_MAX_LOG_BLOCKS (1024 * 1024ULL) 234 234 #define XFS_MIN_LOG_BYTES (10 * 1024 * 1024ULL) 235 235 236 + /* 237 + * Limits on sb_agblocks/sb_agblklog -- mkfs won't format AGs smaller than 238 + * 16MB or larger than 1TB. 239 + */ 240 + #define XFS_MIN_AG_BYTES (1ULL << 24) /* 16 MB */ 241 + #define XFS_MAX_AG_BYTES (1ULL << 40) /* 1 TB */ 242 + 236 243 /* keep the maximum size under 2^31 by a small amount */ 237 244 #define XFS_MAX_LOG_BYTES \ 238 245 ((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES)

+122 -21

fs/xfs/libxfs/xfs_ialloc.c

··· 2491 2491 #define xfs_check_agi_unlinked(agi) 2492 2492 #endif 2493 2493 2494 - static bool 2494 + static xfs_failaddr_t 2495 2495 xfs_agi_verify( 2496 2496 struct xfs_buf *bp) 2497 2497 { ··· 2500 2500 2501 2501 if (xfs_sb_version_hascrc(&mp->m_sb)) { 2502 2502 if (!uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid)) 2503 - return false; 2503 + return __this_address; 2504 2504 if (!xfs_log_check_lsn(mp, 2505 2505 be64_to_cpu(XFS_BUF_TO_AGI(bp)->agi_lsn))) 2506 - return false; 2506 + return __this_address; 2507 2507 } 2508 2508 2509 2509 /* 2510 2510 * Validate the magic number of the agi block. 2511 2511 */ 2512 2512 if (agi->agi_magicnum != cpu_to_be32(XFS_AGI_MAGIC)) 2513 - return false; 2513 + return __this_address; 2514 2514 if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum))) 2515 - return false; 2515 + return __this_address; 2516 2516 2517 2517 if (be32_to_cpu(agi->agi_level) < 1 || 2518 2518 be32_to_cpu(agi->agi_level) > XFS_BTREE_MAXLEVELS) 2519 - return false; 2519 + return __this_address; 2520 2520 2521 2521 if (xfs_sb_version_hasfinobt(&mp->m_sb) && 2522 2522 (be32_to_cpu(agi->agi_free_level) < 1 || 2523 2523 be32_to_cpu(agi->agi_free_level) > XFS_BTREE_MAXLEVELS)) 2524 - return false; 2524 + return __this_address; 2525 2525 2526 2526 /* 2527 2527 * during growfs operations, the perag is not fully initialised, ··· 2530 2530 * so we can detect and avoid this problem. 2531 2531 */ 2532 2532 if (bp->b_pag && be32_to_cpu(agi->agi_seqno) != bp->b_pag->pag_agno) 2533 - return false; 2533 + return __this_address; 2534 2534 2535 2535 xfs_check_agi_unlinked(agi); 2536 - return true; 2536 + return NULL; 2537 2537 } 2538 2538 2539 2539 static void ··· 2541 2541 struct xfs_buf *bp) 2542 2542 { 2543 2543 struct xfs_mount *mp = bp->b_target->bt_mount; 2544 + xfs_failaddr_t fa; 2544 2545 2545 2546 if (xfs_sb_version_hascrc(&mp->m_sb) && 2546 2547 !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF)) 2547 - xfs_buf_ioerror(bp, -EFSBADCRC); 2548 - else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp, 2549 - XFS_ERRTAG_IALLOC_READ_AGI)) 2550 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 2551 - 2552 - if (bp->b_error) 2553 - xfs_verifier_error(bp); 2548 + xfs_verifier_error(bp, -EFSBADCRC, __this_address); 2549 + else { 2550 + fa = xfs_agi_verify(bp); 2551 + if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_IALLOC_READ_AGI)) 2552 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 2553 + } 2554 2554 } 2555 2555 2556 2556 static void 2557 2557 xfs_agi_write_verify( 2558 2558 struct xfs_buf *bp) 2559 2559 { 2560 - struct xfs_mount *mp = bp->b_target->bt_mount; 2561 - struct xfs_buf_log_item *bip = bp->b_fspriv; 2560 + struct xfs_mount *mp = bp->b_target->bt_mount; 2561 + struct xfs_buf_log_item *bip = bp->b_log_item; 2562 + xfs_failaddr_t fa; 2562 2563 2563 - if (!xfs_agi_verify(bp)) { 2564 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 2565 - xfs_verifier_error(bp); 2564 + fa = xfs_agi_verify(bp); 2565 + if (fa) { 2566 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 2566 2567 return; 2567 2568 } 2568 2569 ··· 2579 2578 .name = "xfs_agi", 2580 2579 .verify_read = xfs_agi_read_verify, 2581 2580 .verify_write = xfs_agi_write_verify, 2581 + .verify_struct = xfs_agi_verify, 2582 2582 }; 2583 2583 2584 2584 /* ··· 2752 2750 if (xfs_internal_inum(mp, ino)) 2753 2751 return false; 2754 2752 return xfs_verify_ino(mp, ino); 2753 + } 2754 + 2755 + /* Is there an inode record covering a given range of inode numbers? */ 2756 + int 2757 + xfs_ialloc_has_inode_record( 2758 + struct xfs_btree_cur *cur, 2759 + xfs_agino_t low, 2760 + xfs_agino_t high, 2761 + bool *exists) 2762 + { 2763 + struct xfs_inobt_rec_incore irec; 2764 + xfs_agino_t agino; 2765 + uint16_t holemask; 2766 + int has_record; 2767 + int i; 2768 + int error; 2769 + 2770 + *exists = false; 2771 + error = xfs_inobt_lookup(cur, low, XFS_LOOKUP_LE, &has_record); 2772 + while (error == 0 && has_record) { 2773 + error = xfs_inobt_get_rec(cur, &irec, &has_record); 2774 + if (error || irec.ir_startino > high) 2775 + break; 2776 + 2777 + agino = irec.ir_startino; 2778 + holemask = irec.ir_holemask; 2779 + for (i = 0; i < XFS_INOBT_HOLEMASK_BITS; holemask >>= 1, 2780 + i++, agino += XFS_INODES_PER_HOLEMASK_BIT) { 2781 + if (holemask & 1) 2782 + continue; 2783 + if (agino + XFS_INODES_PER_HOLEMASK_BIT > low && 2784 + agino <= high) { 2785 + *exists = true; 2786 + return 0; 2787 + } 2788 + } 2789 + 2790 + error = xfs_btree_increment(cur, 0, &has_record); 2791 + } 2792 + return error; 2793 + } 2794 + 2795 + /* Is there an inode record covering a given extent? */ 2796 + int 2797 + xfs_ialloc_has_inodes_at_extent( 2798 + struct xfs_btree_cur *cur, 2799 + xfs_agblock_t bno, 2800 + xfs_extlen_t len, 2801 + bool *exists) 2802 + { 2803 + xfs_agino_t low; 2804 + xfs_agino_t high; 2805 + 2806 + low = XFS_OFFBNO_TO_AGINO(cur->bc_mp, bno, 0); 2807 + high = XFS_OFFBNO_TO_AGINO(cur->bc_mp, bno + len, 0) - 1; 2808 + 2809 + return xfs_ialloc_has_inode_record(cur, low, high, exists); 2810 + } 2811 + 2812 + struct xfs_ialloc_count_inodes { 2813 + xfs_agino_t count; 2814 + xfs_agino_t freecount; 2815 + }; 2816 + 2817 + /* Record inode counts across all inobt records. */ 2818 + STATIC int 2819 + xfs_ialloc_count_inodes_rec( 2820 + struct xfs_btree_cur *cur, 2821 + union xfs_btree_rec *rec, 2822 + void *priv) 2823 + { 2824 + struct xfs_inobt_rec_incore irec; 2825 + struct xfs_ialloc_count_inodes *ci = priv; 2826 + 2827 + xfs_inobt_btrec_to_irec(cur->bc_mp, rec, &irec); 2828 + ci->count += irec.ir_count; 2829 + ci->freecount += irec.ir_freecount; 2830 + 2831 + return 0; 2832 + } 2833 + 2834 + /* Count allocated and free inodes under an inobt. */ 2835 + int 2836 + xfs_ialloc_count_inodes( 2837 + struct xfs_btree_cur *cur, 2838 + xfs_agino_t *count, 2839 + xfs_agino_t *freecount) 2840 + { 2841 + struct xfs_ialloc_count_inodes ci = {0}; 2842 + int error; 2843 + 2844 + ASSERT(cur->bc_btnum == XFS_BTNUM_INO); 2845 + error = xfs_btree_query_all(cur, xfs_ialloc_count_inodes_rec, &ci); 2846 + if (error) 2847 + return error; 2848 + 2849 + *count = ci.count; 2850 + *freecount = ci.freecount; 2851 + return 0; 2755 2852 }

+6

fs/xfs/libxfs/xfs_ialloc.h

··· 170 170 union xfs_btree_rec; 171 171 void xfs_inobt_btrec_to_irec(struct xfs_mount *mp, union xfs_btree_rec *rec, 172 172 struct xfs_inobt_rec_incore *irec); 173 + int xfs_ialloc_has_inodes_at_extent(struct xfs_btree_cur *cur, 174 + xfs_agblock_t bno, xfs_extlen_t len, bool *exists); 175 + int xfs_ialloc_has_inode_record(struct xfs_btree_cur *cur, xfs_agino_t low, 176 + xfs_agino_t high, bool *exists); 177 + int xfs_ialloc_count_inodes(struct xfs_btree_cur *cur, xfs_agino_t *count, 178 + xfs_agino_t *freecount); 173 179 174 180 int xfs_ialloc_cluster_alignment(struct xfs_mount *mp); 175 181 void xfs_ialloc_agino_range(struct xfs_mount *mp, xfs_agnumber_t agno,

+50 -21

fs/xfs/libxfs/xfs_ialloc_btree.c

··· 141 141 union xfs_btree_ptr *new, 142 142 int *stat) 143 143 { 144 + if (cur->bc_mp->m_inotbt_nores) 145 + return xfs_inobt_alloc_block(cur, start, new, stat); 144 146 return __xfs_inobt_alloc_block(cur, start, new, stat, 145 147 XFS_AG_RESV_METADATA); 148 + } 149 + 150 + STATIC int 151 + __xfs_inobt_free_block( 152 + struct xfs_btree_cur *cur, 153 + struct xfs_buf *bp, 154 + enum xfs_ag_resv_type resv) 155 + { 156 + struct xfs_owner_info oinfo; 157 + 158 + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT); 159 + return xfs_free_extent(cur->bc_tp, 160 + XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1, 161 + &oinfo, resv); 146 162 } 147 163 148 164 STATIC int ··· 166 150 struct xfs_btree_cur *cur, 167 151 struct xfs_buf *bp) 168 152 { 169 - struct xfs_owner_info oinfo; 153 + return __xfs_inobt_free_block(cur, bp, XFS_AG_RESV_NONE); 154 + } 170 155 171 - xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT); 172 - return xfs_free_extent(cur->bc_tp, 173 - XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1, 174 - &oinfo, XFS_AG_RESV_NONE); 156 + STATIC int 157 + xfs_finobt_free_block( 158 + struct xfs_btree_cur *cur, 159 + struct xfs_buf *bp) 160 + { 161 + if (cur->bc_mp->m_inotbt_nores) 162 + return xfs_inobt_free_block(cur, bp); 163 + return __xfs_inobt_free_block(cur, bp, XFS_AG_RESV_METADATA); 175 164 } 176 165 177 166 STATIC int ··· 271 250 be32_to_cpu(k2->inobt.ir_startino); 272 251 } 273 252 274 - static int 253 + static xfs_failaddr_t 275 254 xfs_inobt_verify( 276 255 struct xfs_buf *bp) 277 256 { 278 257 struct xfs_mount *mp = bp->b_target->bt_mount; 279 258 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 259 + xfs_failaddr_t fa; 280 260 unsigned int level; 281 261 282 262 /* ··· 293 271 switch (block->bb_magic) { 294 272 case cpu_to_be32(XFS_IBT_CRC_MAGIC): 295 273 case cpu_to_be32(XFS_FIBT_CRC_MAGIC): 296 - if (!xfs_btree_sblock_v5hdr_verify(bp)) 297 - return false; 274 + fa = xfs_btree_sblock_v5hdr_verify(bp); 275 + if (fa) 276 + return fa; 298 277 /* fall through */ 299 278 case cpu_to_be32(XFS_IBT_MAGIC): 300 279 case cpu_to_be32(XFS_FIBT_MAGIC): 301 280 break; 302 281 default: 303 - return 0; 282 + return NULL; 304 283 } 305 284 306 285 /* level verification */ 307 286 level = be16_to_cpu(block->bb_level); 308 287 if (level >= mp->m_in_maxlevels) 309 - return false; 288 + return __this_address; 310 289 311 290 return xfs_btree_sblock_verify(bp, mp->m_inobt_mxr[level != 0]); 312 291 } ··· 316 293 xfs_inobt_read_verify( 317 294 struct xfs_buf *bp) 318 295 { 319 - if (!xfs_btree_sblock_verify_crc(bp)) 320 - xfs_buf_ioerror(bp, -EFSBADCRC); 321 - else if (!xfs_inobt_verify(bp)) 322 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 296 + xfs_failaddr_t fa; 323 297 324 - if (bp->b_error) { 325 - trace_xfs_btree_corrupt(bp, _RET_IP_); 326 - xfs_verifier_error(bp); 298 + if (!xfs_btree_sblock_verify_crc(bp)) 299 + xfs_verifier_error(bp, -EFSBADCRC, __this_address); 300 + else { 301 + fa = xfs_inobt_verify(bp); 302 + if (fa) 303 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 327 304 } 305 + 306 + if (bp->b_error) 307 + trace_xfs_btree_corrupt(bp, _RET_IP_); 328 308 } 329 309 330 310 static void 331 311 xfs_inobt_write_verify( 332 312 struct xfs_buf *bp) 333 313 { 334 - if (!xfs_inobt_verify(bp)) { 314 + xfs_failaddr_t fa; 315 + 316 + fa = xfs_inobt_verify(bp); 317 + if (fa) { 335 318 trace_xfs_btree_corrupt(bp, _RET_IP_); 336 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 337 - xfs_verifier_error(bp); 319 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 338 320 return; 339 321 } 340 322 xfs_btree_sblock_calc_crc(bp); ··· 350 322 .name = "xfs_inobt", 351 323 .verify_read = xfs_inobt_read_verify, 352 324 .verify_write = xfs_inobt_write_verify, 325 + .verify_struct = xfs_inobt_verify, 353 326 }; 354 327 355 328 STATIC int ··· 401 372 .dup_cursor = xfs_inobt_dup_cursor, 402 373 .set_root = xfs_finobt_set_root, 403 374 .alloc_block = xfs_finobt_alloc_block, 404 - .free_block = xfs_inobt_free_block, 375 + .free_block = xfs_finobt_free_block, 405 376 .get_minrecs = xfs_inobt_get_minrecs, 406 377 .get_maxrecs = xfs_inobt_get_maxrecs, 407 378 .init_key_from_rec = xfs_inobt_init_key_from_rec,

+98 -30

fs/xfs/libxfs/xfs_inode_buf.c

··· 115 115 return; 116 116 } 117 117 118 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 119 - xfs_verifier_error(bp); 118 + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); 120 119 #ifdef DEBUG 121 120 xfs_alert(mp, 122 121 "bad inode magic/vsn daddr %lld #%d (magic=%x)", ··· 383 384 } 384 385 } 385 386 386 - bool 387 + xfs_failaddr_t 387 388 xfs_dinode_verify( 388 389 struct xfs_mount *mp, 389 390 xfs_ino_t ino, ··· 392 393 uint16_t mode; 393 394 uint16_t flags; 394 395 uint64_t flags2; 396 + uint64_t di_size; 395 397 396 398 if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) 397 - return false; 399 + return __this_address; 400 + 401 + /* Verify v3 integrity information first */ 402 + if (dip->di_version >= 3) { 403 + if (!xfs_sb_version_hascrc(&mp->m_sb)) 404 + return __this_address; 405 + if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, 406 + XFS_DINODE_CRC_OFF)) 407 + return __this_address; 408 + if (be64_to_cpu(dip->di_ino) != ino) 409 + return __this_address; 410 + if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid)) 411 + return __this_address; 412 + } 398 413 399 414 /* don't allow invalid i_size */ 400 - if (be64_to_cpu(dip->di_size) & (1ULL << 63)) 401 - return false; 415 + di_size = be64_to_cpu(dip->di_size); 416 + if (di_size & (1ULL << 63)) 417 + return __this_address; 402 418 403 419 mode = be16_to_cpu(dip->di_mode); 404 420 if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN) 405 - return false; 421 + return __this_address; 406 422 407 423 /* No zero-length symlinks/dirs. */ 408 - if ((S_ISLNK(mode) || S_ISDIR(mode)) && dip->di_size == 0) 409 - return false; 424 + if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0) 425 + return __this_address; 426 + 427 + /* Fork checks carried over from xfs_iformat_fork */ 428 + if (mode && 429 + be32_to_cpu(dip->di_nextents) + be16_to_cpu(dip->di_anextents) > 430 + be64_to_cpu(dip->di_nblocks)) 431 + return __this_address; 432 + 433 + if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize) 434 + return __this_address; 435 + 436 + flags = be16_to_cpu(dip->di_flags); 437 + 438 + if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp) 439 + return __this_address; 440 + 441 + /* Do we have appropriate data fork formats for the mode? */ 442 + switch (mode & S_IFMT) { 443 + case S_IFIFO: 444 + case S_IFCHR: 445 + case S_IFBLK: 446 + case S_IFSOCK: 447 + if (dip->di_format != XFS_DINODE_FMT_DEV) 448 + return __this_address; 449 + break; 450 + case S_IFREG: 451 + case S_IFLNK: 452 + case S_IFDIR: 453 + switch (dip->di_format) { 454 + case XFS_DINODE_FMT_LOCAL: 455 + /* 456 + * no local regular files yet 457 + */ 458 + if (S_ISREG(mode)) 459 + return __this_address; 460 + if (di_size > XFS_DFORK_DSIZE(dip, mp)) 461 + return __this_address; 462 + /* fall through */ 463 + case XFS_DINODE_FMT_EXTENTS: 464 + case XFS_DINODE_FMT_BTREE: 465 + break; 466 + default: 467 + return __this_address; 468 + } 469 + break; 470 + case 0: 471 + /* Uninitialized inode ok. */ 472 + break; 473 + default: 474 + return __this_address; 475 + } 476 + 477 + if (XFS_DFORK_Q(dip)) { 478 + switch (dip->di_aformat) { 479 + case XFS_DINODE_FMT_LOCAL: 480 + case XFS_DINODE_FMT_EXTENTS: 481 + case XFS_DINODE_FMT_BTREE: 482 + break; 483 + default: 484 + return __this_address; 485 + } 486 + } 410 487 411 488 /* only version 3 or greater inodes are extensively verified here */ 412 489 if (dip->di_version < 3) 413 - return true; 490 + return NULL; 414 491 415 - if (!xfs_sb_version_hascrc(&mp->m_sb)) 416 - return false; 417 - if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, 418 - XFS_DINODE_CRC_OFF)) 419 - return false; 420 - if (be64_to_cpu(dip->di_ino) != ino) 421 - return false; 422 - if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid)) 423 - return false; 424 - 425 - flags = be16_to_cpu(dip->di_flags); 426 492 flags2 = be64_to_cpu(dip->di_flags2); 427 493 428 494 /* don't allow reflink/cowextsize if we don't have reflink */ 429 495 if ((flags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)) && 430 496 !xfs_sb_version_hasreflink(&mp->m_sb)) 431 - return false; 497 + return __this_address; 498 + 499 + /* only regular files get reflink */ 500 + if ((flags2 & XFS_DIFLAG2_REFLINK) && (mode & S_IFMT) != S_IFREG) 501 + return __this_address; 432 502 433 503 /* don't let reflink and realtime mix */ 434 504 if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME)) 435 - return false; 505 + return __this_address; 436 506 437 507 /* don't let reflink and dax mix */ 438 508 if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags2 & XFS_DIFLAG2_DAX)) 439 - return false; 509 + return __this_address; 440 510 441 - return true; 511 + return NULL; 442 512 } 443 513 444 514 void ··· 547 479 { 548 480 xfs_buf_t *bp; 549 481 xfs_dinode_t *dip; 482 + xfs_failaddr_t fa; 550 483 int error; 551 484 552 485 /* ··· 579 510 return error; 580 511 581 512 /* even unallocated inodes are verified */ 582 - if (!xfs_dinode_verify(mp, ip->i_ino, dip)) { 583 - xfs_alert(mp, "%s: validation failed for inode %lld", 584 - __func__, ip->i_ino); 585 - 586 - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip); 513 + fa = xfs_dinode_verify(mp, ip->i_ino, dip); 514 + if (fa) { 515 + xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", dip, 516 + sizeof(*dip), fa); 587 517 error = -EFSCORRUPTED; 588 518 goto out_brelse; 589 519 }

+2 -2

fs/xfs/libxfs/xfs_inode_buf.h

··· 82 82 #define xfs_inobp_check(mp, bp) 83 83 #endif /* DEBUG */ 84 84 85 - bool xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino, 86 - struct xfs_dinode *dip); 85 + xfs_failaddr_t xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino, 86 + struct xfs_dinode *dip); 87 87 88 88 #endif /* __XFS_INODE_BUF_H__ */

+45 -107

fs/xfs/libxfs/xfs_inode_fork.c

··· 35 35 #include "xfs_da_format.h" 36 36 #include "xfs_da_btree.h" 37 37 #include "xfs_dir2_priv.h" 38 + #include "xfs_attr_leaf.h" 39 + #include "xfs_shared.h" 38 40 39 41 kmem_zone_t *xfs_ifork_zone; 40 42 ··· 64 62 int error = 0; 65 63 xfs_fsize_t di_size; 66 64 67 - if (unlikely(be32_to_cpu(dip->di_nextents) + 68 - be16_to_cpu(dip->di_anextents) > 69 - be64_to_cpu(dip->di_nblocks))) { 70 - xfs_warn(ip->i_mount, 71 - "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.", 72 - (unsigned long long)ip->i_ino, 73 - (int)(be32_to_cpu(dip->di_nextents) + 74 - be16_to_cpu(dip->di_anextents)), 75 - (unsigned long long) 76 - be64_to_cpu(dip->di_nblocks)); 77 - XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW, 78 - ip->i_mount, dip); 79 - return -EFSCORRUPTED; 80 - } 81 - 82 - if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) { 83 - xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.", 84 - (unsigned long long)ip->i_ino, 85 - dip->di_forkoff); 86 - XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, 87 - ip->i_mount, dip); 88 - return -EFSCORRUPTED; 89 - } 90 - 91 - if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && 92 - !ip->i_mount->m_rtdev_targp)) { 93 - xfs_warn(ip->i_mount, 94 - "corrupt dinode %Lu, has realtime flag set.", 95 - ip->i_ino); 96 - XFS_CORRUPTION_ERROR("xfs_iformat(realtime)", 97 - XFS_ERRLEVEL_LOW, ip->i_mount, dip); 98 - return -EFSCORRUPTED; 99 - } 100 - 101 - if (unlikely(xfs_is_reflink_inode(ip) && !S_ISREG(inode->i_mode))) { 102 - xfs_warn(ip->i_mount, 103 - "corrupt dinode %llu, wrong file type for reflink.", 104 - ip->i_ino); 105 - XFS_CORRUPTION_ERROR("xfs_iformat(reflink)", 106 - XFS_ERRLEVEL_LOW, ip->i_mount, dip); 107 - return -EFSCORRUPTED; 108 - } 109 - 110 - if (unlikely(xfs_is_reflink_inode(ip) && 111 - (ip->i_d.di_flags & XFS_DIFLAG_REALTIME))) { 112 - xfs_warn(ip->i_mount, 113 - "corrupt dinode %llu, has reflink+realtime flag set.", 114 - ip->i_ino); 115 - XFS_CORRUPTION_ERROR("xfs_iformat(reflink)", 116 - XFS_ERRLEVEL_LOW, ip->i_mount, dip); 117 - return -EFSCORRUPTED; 118 - } 119 - 120 65 switch (inode->i_mode & S_IFMT) { 121 66 case S_IFIFO: 122 67 case S_IFCHR: 123 68 case S_IFBLK: 124 69 case S_IFSOCK: 125 - if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) { 126 - XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW, 127 - ip->i_mount, dip); 128 - return -EFSCORRUPTED; 129 - } 130 70 ip->i_d.di_size = 0; 131 71 inode->i_rdev = xfs_to_linux_dev_t(xfs_dinode_get_rdev(dip)); 132 72 break; ··· 78 134 case S_IFDIR: 79 135 switch (dip->di_format) { 80 136 case XFS_DINODE_FMT_LOCAL: 81 - /* 82 - * no local regular files yet 83 - */ 84 - if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) { 85 - xfs_warn(ip->i_mount, 86 - "corrupt inode %Lu (local format for regular file).", 87 - (unsigned long long) ip->i_ino); 88 - XFS_CORRUPTION_ERROR("xfs_iformat(4)", 89 - XFS_ERRLEVEL_LOW, 90 - ip->i_mount, dip); 91 - return -EFSCORRUPTED; 92 - } 93 - 94 137 di_size = be64_to_cpu(dip->di_size); 95 - if (unlikely(di_size < 0 || 96 - di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { 97 - xfs_warn(ip->i_mount, 98 - "corrupt inode %Lu (bad size %Ld for local inode).", 99 - (unsigned long long) ip->i_ino, 100 - (long long) di_size); 101 - XFS_CORRUPTION_ERROR("xfs_iformat(5)", 102 - XFS_ERRLEVEL_LOW, 103 - ip->i_mount, dip); 104 - return -EFSCORRUPTED; 105 - } 106 - 107 138 size = (int)di_size; 108 139 error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size); 109 140 break; ··· 89 170 error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK); 90 171 break; 91 172 default: 92 - XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW, 93 - ip->i_mount); 94 173 return -EFSCORRUPTED; 95 174 } 96 175 break; 97 176 98 177 default: 99 - XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount); 100 178 return -EFSCORRUPTED; 101 179 } 102 180 if (error) 103 181 return error; 104 - 105 - /* Check inline dir contents. */ 106 - if (S_ISDIR(inode->i_mode) && dip->di_format == XFS_DINODE_FMT_LOCAL) { 107 - error = xfs_dir2_sf_verify(ip); 108 - if (error) { 109 - xfs_idestroy_fork(ip, XFS_DATA_FORK); 110 - return error; 111 - } 112 - } 113 182 114 183 if (xfs_is_reflink_inode(ip)) { 115 184 ASSERT(ip->i_cowfp == NULL); ··· 114 207 case XFS_DINODE_FMT_LOCAL: 115 208 atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); 116 209 size = be16_to_cpu(atp->hdr.totsize); 117 - 118 - if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) { 119 - xfs_warn(ip->i_mount, 120 - "corrupt inode %Lu (bad attr fork size %Ld).", 121 - (unsigned long long) ip->i_ino, 122 - (long long) size); 123 - XFS_CORRUPTION_ERROR("xfs_iformat(8)", 124 - XFS_ERRLEVEL_LOW, 125 - ip->i_mount, dip); 126 - error = -EFSCORRUPTED; 127 - break; 128 - } 129 210 130 211 error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size); 131 212 break; ··· 298 403 */ 299 404 if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= 300 405 XFS_IFORK_MAXEXT(ip, whichfork) || 406 + nrecs == 0 || 301 407 XFS_BMDR_SPACE_CALC(nrecs) > 302 408 XFS_DFORK_SIZE(dip, mp, whichfork) || 303 409 XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks) || ··· 722 826 ip->i_cowfp->if_flags = XFS_IFEXTENTS; 723 827 ip->i_cformat = XFS_DINODE_FMT_EXTENTS; 724 828 ip->i_cnextents = 0; 829 + } 830 + 831 + /* Default fork content verifiers. */ 832 + struct xfs_ifork_ops xfs_default_ifork_ops = { 833 + .verify_attr = xfs_attr_shortform_verify, 834 + .verify_dir = xfs_dir2_sf_verify, 835 + .verify_symlink = xfs_symlink_shortform_verify, 836 + }; 837 + 838 + /* Verify the inline contents of the data fork of an inode. */ 839 + xfs_failaddr_t 840 + xfs_ifork_verify_data( 841 + struct xfs_inode *ip, 842 + struct xfs_ifork_ops *ops) 843 + { 844 + /* Non-local data fork, we're done. */ 845 + if (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) 846 + return NULL; 847 + 848 + /* Check the inline data fork if there is one. */ 849 + switch (VFS_I(ip)->i_mode & S_IFMT) { 850 + case S_IFDIR: 851 + return ops->verify_dir(ip); 852 + case S_IFLNK: 853 + return ops->verify_symlink(ip); 854 + default: 855 + return NULL; 856 + } 857 + } 858 + 859 + /* Verify the inline contents of the attr fork of an inode. */ 860 + xfs_failaddr_t 861 + xfs_ifork_verify_attr( 862 + struct xfs_inode *ip, 863 + struct xfs_ifork_ops *ops) 864 + { 865 + /* There has to be an attr fork allocated if aformat is local. */ 866 + if (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) 867 + return NULL; 868 + if (!XFS_IFORK_PTR(ip, XFS_ATTR_FORK)) 869 + return __this_address; 870 + return ops->verify_attr(ip); 725 871 }

+14

fs/xfs/libxfs/xfs_inode_fork.h

··· 186 186 187 187 extern void xfs_ifork_init_cow(struct xfs_inode *ip); 188 188 189 + typedef xfs_failaddr_t (*xfs_ifork_verifier_t)(struct xfs_inode *); 190 + 191 + struct xfs_ifork_ops { 192 + xfs_ifork_verifier_t verify_symlink; 193 + xfs_ifork_verifier_t verify_dir; 194 + xfs_ifork_verifier_t verify_attr; 195 + }; 196 + extern struct xfs_ifork_ops xfs_default_ifork_ops; 197 + 198 + xfs_failaddr_t xfs_ifork_verify_data(struct xfs_inode *ip, 199 + struct xfs_ifork_ops *ops); 200 + xfs_failaddr_t xfs_ifork_verify_attr(struct xfs_inode *ip, 201 + struct xfs_ifork_ops *ops); 202 + 189 203 #endif /* __XFS_INODE_FORK_H__ */

+1 -1

fs/xfs/libxfs/xfs_log_rlimit.c

··· 55 55 * the maximum one in terms of the pre-calculated values which were done 56 56 * at mount time. 57 57 */ 58 - STATIC void 58 + void 59 59 xfs_log_get_max_trans_res( 60 60 struct xfs_mount *mp, 61 61 struct xfs_trans_res *max_resp)

+5 -4

fs/xfs/libxfs/xfs_quota_defs.h

··· 112 112 #define XFS_QMOPT_PQUOTA 0x0000008 /* project dquot requested */ 113 113 #define XFS_QMOPT_FORCE_RES 0x0000010 /* ignore quota limits */ 114 114 #define XFS_QMOPT_SBVERSION 0x0000040 /* change superblock version num */ 115 - #define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */ 116 - #define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */ 117 115 #define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */ 118 116 #define XFS_QMOPT_ENOSPC 0x0004000 /* enospc instead of edquot (prj) */ 119 117 #define XFS_QMOPT_DQNEXT 0x0008000 /* return next dquot >= this ID */ ··· 151 153 (XFS_QMOPT_UQUOTA | XFS_QMOPT_PQUOTA | XFS_QMOPT_GQUOTA) 152 154 #define XFS_QMOPT_RESBLK_MASK (XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS) 153 155 154 - extern int xfs_dqcheck(struct xfs_mount *mp, xfs_disk_dquot_t *ddq, 155 - xfs_dqid_t id, uint type, uint flags, const char *str); 156 + extern xfs_failaddr_t xfs_dquot_verify(struct xfs_mount *mp, 157 + struct xfs_disk_dquot *ddq, xfs_dqid_t id, uint type, 158 + uint flags); 156 159 extern int xfs_calc_dquots_per_chunk(unsigned int nbblks); 160 + extern int xfs_dquot_repair(struct xfs_mount *mp, struct xfs_disk_dquot *ddq, 161 + xfs_dqid_t id, uint type); 157 162 158 163 #endif /* __XFS_QUOTA_H__ */

+19

fs/xfs/libxfs/xfs_refcount.c

··· 1696 1696 xfs_trans_brelse(tp, agbp); 1697 1697 goto out_trans; 1698 1698 } 1699 + 1700 + /* Is there a record covering a given extent? */ 1701 + int 1702 + xfs_refcount_has_record( 1703 + struct xfs_btree_cur *cur, 1704 + xfs_agblock_t bno, 1705 + xfs_extlen_t len, 1706 + bool *exists) 1707 + { 1708 + union xfs_btree_irec low; 1709 + union xfs_btree_irec high; 1710 + 1711 + memset(&low, 0, sizeof(low)); 1712 + low.rc.rc_startblock = bno; 1713 + memset(&high, 0xFF, sizeof(high)); 1714 + high.rc.rc_startblock = bno + len - 1; 1715 + 1716 + return xfs_btree_has_record(cur, &low, &high, exists); 1717 + }

+3

fs/xfs/libxfs/xfs_refcount.h

··· 83 83 return (log_res * 3 / 4) / XFS_REFCOUNT_ITEM_OVERHEAD; 84 84 } 85 85 86 + extern int xfs_refcount_has_record(struct xfs_btree_cur *cur, 87 + xfs_agblock_t bno, xfs_extlen_t len, bool *exists); 88 + 86 89 #endif /* __XFS_REFCOUNT_H__ */

+25 -17

fs/xfs/libxfs/xfs_refcount_btree.c

··· 223 223 be32_to_cpu(k2->refc.rc_startblock); 224 224 } 225 225 226 - STATIC bool 226 + STATIC xfs_failaddr_t 227 227 xfs_refcountbt_verify( 228 228 struct xfs_buf *bp) 229 229 { 230 230 struct xfs_mount *mp = bp->b_target->bt_mount; 231 231 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 232 232 struct xfs_perag *pag = bp->b_pag; 233 + xfs_failaddr_t fa; 233 234 unsigned int level; 234 235 235 236 if (block->bb_magic != cpu_to_be32(XFS_REFC_CRC_MAGIC)) 236 - return false; 237 + return __this_address; 237 238 238 239 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 239 - return false; 240 - if (!xfs_btree_sblock_v5hdr_verify(bp)) 241 - return false; 240 + return __this_address; 241 + fa = xfs_btree_sblock_v5hdr_verify(bp); 242 + if (fa) 243 + return fa; 242 244 243 245 level = be16_to_cpu(block->bb_level); 244 246 if (pag && pag->pagf_init) { 245 247 if (level >= pag->pagf_refcount_level) 246 - return false; 248 + return __this_address; 247 249 } else if (level >= mp->m_refc_maxlevels) 248 - return false; 250 + return __this_address; 249 251 250 252 return xfs_btree_sblock_verify(bp, mp->m_refc_mxr[level != 0]); 251 253 } ··· 256 254 xfs_refcountbt_read_verify( 257 255 struct xfs_buf *bp) 258 256 { 259 - if (!xfs_btree_sblock_verify_crc(bp)) 260 - xfs_buf_ioerror(bp, -EFSBADCRC); 261 - else if (!xfs_refcountbt_verify(bp)) 262 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 257 + xfs_failaddr_t fa; 263 258 264 - if (bp->b_error) { 265 - trace_xfs_btree_corrupt(bp, _RET_IP_); 266 - xfs_verifier_error(bp); 259 + if (!xfs_btree_sblock_verify_crc(bp)) 260 + xfs_verifier_error(bp, -EFSBADCRC, __this_address); 261 + else { 262 + fa = xfs_refcountbt_verify(bp); 263 + if (fa) 264 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 267 265 } 266 + 267 + if (bp->b_error) 268 + trace_xfs_btree_corrupt(bp, _RET_IP_); 268 269 } 269 270 270 271 STATIC void 271 272 xfs_refcountbt_write_verify( 272 273 struct xfs_buf *bp) 273 274 { 274 - if (!xfs_refcountbt_verify(bp)) { 275 + xfs_failaddr_t fa; 276 + 277 + fa = xfs_refcountbt_verify(bp); 278 + if (fa) { 275 279 trace_xfs_btree_corrupt(bp, _RET_IP_); 276 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 277 - xfs_verifier_error(bp); 280 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 278 281 return; 279 282 } 280 283 xfs_btree_sblock_calc_crc(bp); ··· 290 283 .name = "xfs_refcountbt", 291 284 .verify_read = xfs_refcountbt_read_verify, 292 285 .verify_write = xfs_refcountbt_write_verify, 286 + .verify_struct = xfs_refcountbt_verify, 293 287 }; 294 288 295 289 STATIC int

+67

fs/xfs/libxfs/xfs_rmap.c

··· 2387 2387 else 2388 2388 return 0; 2389 2389 } 2390 + 2391 + /* Is there a record covering a given extent? */ 2392 + int 2393 + xfs_rmap_has_record( 2394 + struct xfs_btree_cur *cur, 2395 + xfs_agblock_t bno, 2396 + xfs_extlen_t len, 2397 + bool *exists) 2398 + { 2399 + union xfs_btree_irec low; 2400 + union xfs_btree_irec high; 2401 + 2402 + memset(&low, 0, sizeof(low)); 2403 + low.r.rm_startblock = bno; 2404 + memset(&high, 0xFF, sizeof(high)); 2405 + high.r.rm_startblock = bno + len - 1; 2406 + 2407 + return xfs_btree_has_record(cur, &low, &high, exists); 2408 + } 2409 + 2410 + /* 2411 + * Is there a record for this owner completely covering a given physical 2412 + * extent? If so, *has_rmap will be set to true. If there is no record 2413 + * or the record only covers part of the range, we set *has_rmap to false. 2414 + * This function doesn't perform range lookups or offset checks, so it is 2415 + * not suitable for checking data fork blocks. 2416 + */ 2417 + int 2418 + xfs_rmap_record_exists( 2419 + struct xfs_btree_cur *cur, 2420 + xfs_agblock_t bno, 2421 + xfs_extlen_t len, 2422 + struct xfs_owner_info *oinfo, 2423 + bool *has_rmap) 2424 + { 2425 + uint64_t owner; 2426 + uint64_t offset; 2427 + unsigned int flags; 2428 + int has_record; 2429 + struct xfs_rmap_irec irec; 2430 + int error; 2431 + 2432 + xfs_owner_info_unpack(oinfo, &owner, &offset, &flags); 2433 + ASSERT(XFS_RMAP_NON_INODE_OWNER(owner) || 2434 + (flags & XFS_RMAP_BMBT_BLOCK)); 2435 + 2436 + error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, flags, 2437 + &has_record); 2438 + if (error) 2439 + return error; 2440 + if (!has_record) { 2441 + *has_rmap = false; 2442 + return 0; 2443 + } 2444 + 2445 + error = xfs_rmap_get_rec(cur, &irec, &has_record); 2446 + if (error) 2447 + return error; 2448 + if (!has_record) { 2449 + *has_rmap = false; 2450 + return 0; 2451 + } 2452 + 2453 + *has_rmap = (irec.rm_owner == owner && irec.rm_startblock <= bno && 2454 + irec.rm_startblock + irec.rm_blockcount >= bno + len); 2455 + return 0; 2456 + }

+5

fs/xfs/libxfs/xfs_rmap.h

··· 233 233 union xfs_btree_rec; 234 234 int xfs_rmap_btrec_to_irec(union xfs_btree_rec *rec, 235 235 struct xfs_rmap_irec *irec); 236 + int xfs_rmap_has_record(struct xfs_btree_cur *cur, xfs_agblock_t bno, 237 + xfs_extlen_t len, bool *exists); 238 + int xfs_rmap_record_exists(struct xfs_btree_cur *cur, xfs_agblock_t bno, 239 + xfs_extlen_t len, struct xfs_owner_info *oinfo, 240 + bool *has_rmap); 236 241 237 242 #endif /* __XFS_RMAP_H__ */

+25 -17

fs/xfs/libxfs/xfs_rmap_btree.c

··· 303 303 return 0; 304 304 } 305 305 306 - static bool 306 + static xfs_failaddr_t 307 307 xfs_rmapbt_verify( 308 308 struct xfs_buf *bp) 309 309 { 310 310 struct xfs_mount *mp = bp->b_target->bt_mount; 311 311 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 312 312 struct xfs_perag *pag = bp->b_pag; 313 + xfs_failaddr_t fa; 313 314 unsigned int level; 314 315 315 316 /* ··· 326 325 * in this case. 327 326 */ 328 327 if (block->bb_magic != cpu_to_be32(XFS_RMAP_CRC_MAGIC)) 329 - return false; 328 + return __this_address; 330 329 331 330 if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) 332 - return false; 333 - if (!xfs_btree_sblock_v5hdr_verify(bp)) 334 - return false; 331 + return __this_address; 332 + fa = xfs_btree_sblock_v5hdr_verify(bp); 333 + if (fa) 334 + return fa; 335 335 336 336 level = be16_to_cpu(block->bb_level); 337 337 if (pag && pag->pagf_init) { 338 338 if (level >= pag->pagf_levels[XFS_BTNUM_RMAPi]) 339 - return false; 339 + return __this_address; 340 340 } else if (level >= mp->m_rmap_maxlevels) 341 - return false; 341 + return __this_address; 342 342 343 343 return xfs_btree_sblock_verify(bp, mp->m_rmap_mxr[level != 0]); 344 344 } ··· 348 346 xfs_rmapbt_read_verify( 349 347 struct xfs_buf *bp) 350 348 { 351 - if (!xfs_btree_sblock_verify_crc(bp)) 352 - xfs_buf_ioerror(bp, -EFSBADCRC); 353 - else if (!xfs_rmapbt_verify(bp)) 354 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 349 + xfs_failaddr_t fa; 355 350 356 - if (bp->b_error) { 357 - trace_xfs_btree_corrupt(bp, _RET_IP_); 358 - xfs_verifier_error(bp); 351 + if (!xfs_btree_sblock_verify_crc(bp)) 352 + xfs_verifier_error(bp, -EFSBADCRC, __this_address); 353 + else { 354 + fa = xfs_rmapbt_verify(bp); 355 + if (fa) 356 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 359 357 } 358 + 359 + if (bp->b_error) 360 + trace_xfs_btree_corrupt(bp, _RET_IP_); 360 361 } 361 362 362 363 static void 363 364 xfs_rmapbt_write_verify( 364 365 struct xfs_buf *bp) 365 366 { 366 - if (!xfs_rmapbt_verify(bp)) { 367 + xfs_failaddr_t fa; 368 + 369 + fa = xfs_rmapbt_verify(bp); 370 + if (fa) { 367 371 trace_xfs_btree_corrupt(bp, _RET_IP_); 368 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 369 - xfs_verifier_error(bp); 372 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 370 373 return; 371 374 } 372 375 xfs_btree_sblock_calc_crc(bp); ··· 382 375 .name = "xfs_rmapbt", 383 376 .verify_read = xfs_rmapbt_read_verify, 384 377 .verify_write = xfs_rmapbt_write_verify, 378 + .verify_struct = xfs_rmapbt_verify, 385 379 }; 386 380 387 381 STATIC int

+21

fs/xfs/libxfs/xfs_rtbitmap.c

··· 1097 1097 { 1098 1098 return rtbno < mp->m_sb.sb_rblocks; 1099 1099 } 1100 + 1101 + /* Is the given extent all free? */ 1102 + int 1103 + xfs_rtalloc_extent_is_free( 1104 + struct xfs_mount *mp, 1105 + struct xfs_trans *tp, 1106 + xfs_rtblock_t start, 1107 + xfs_extlen_t len, 1108 + bool *is_free) 1109 + { 1110 + xfs_rtblock_t end; 1111 + int matches; 1112 + int error; 1113 + 1114 + error = xfs_rtcheck_range(mp, tp, start, len, 1, &end, &matches); 1115 + if (error) 1116 + return error; 1117 + 1118 + *is_free = matches; 1119 + return 0; 1120 + }

+106 -7

fs/xfs/libxfs/xfs_sb.c

··· 40 40 #include "xfs_rmap_btree.h" 41 41 #include "xfs_bmap.h" 42 42 #include "xfs_refcount_btree.h" 43 + #include "xfs_da_format.h" 44 + #include "xfs_da_btree.h" 43 45 44 46 /* 45 47 * Physical superblock buffer manipulations. Shared with libxfs in userspace. ··· 118 116 bool check_inprogress, 119 117 bool check_version) 120 118 { 119 + u32 agcount = 0; 120 + u32 rem; 121 + 121 122 if (sbp->sb_magicnum != XFS_SB_MAGIC) { 122 123 xfs_warn(mp, "bad magic number"); 123 124 return -EWRONGFS; ··· 231 226 return -EINVAL; 232 227 } 233 228 229 + /* Compute agcount for this number of dblocks and agblocks */ 230 + if (sbp->sb_agblocks) { 231 + agcount = div_u64_rem(sbp->sb_dblocks, sbp->sb_agblocks, &rem); 232 + if (rem) 233 + agcount++; 234 + } 235 + 234 236 /* 235 237 * More sanity checking. Most of these were stolen directly from 236 238 * xfs_repair. ··· 262 250 sbp->sb_inodesize != (1 << sbp->sb_inodelog) || 263 251 sbp->sb_logsunit > XLOG_MAX_RECORD_BSIZE || 264 252 sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) || 253 + XFS_FSB_TO_B(mp, sbp->sb_agblocks) < XFS_MIN_AG_BYTES || 254 + XFS_FSB_TO_B(mp, sbp->sb_agblocks) > XFS_MAX_AG_BYTES || 255 + sbp->sb_agblklog != xfs_highbit32(sbp->sb_agblocks - 1) + 1 || 256 + agcount == 0 || agcount != sbp->sb_agcount || 265 257 (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || 266 258 (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || 267 259 (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || ··· 656 640 error = xfs_sb_verify(bp, true); 657 641 658 642 out_error: 659 - if (error) { 643 + if (error == -EFSCORRUPTED || error == -EFSBADCRC) 644 + xfs_verifier_error(bp, error, __this_address); 645 + else if (error) 660 646 xfs_buf_ioerror(bp, error); 661 - if (error == -EFSCORRUPTED || error == -EFSBADCRC) 662 - xfs_verifier_error(bp); 663 - } 664 647 } 665 648 666 649 /* ··· 688 673 struct xfs_buf *bp) 689 674 { 690 675 struct xfs_mount *mp = bp->b_target->bt_mount; 691 - struct xfs_buf_log_item *bip = bp->b_fspriv; 676 + struct xfs_buf_log_item *bip = bp->b_log_item; 692 677 int error; 693 678 694 679 error = xfs_sb_verify(bp, false); 695 680 if (error) { 696 - xfs_buf_ioerror(bp, error); 697 - xfs_verifier_error(bp); 681 + xfs_verifier_error(bp, error, __this_address); 698 682 return; 699 683 } 700 684 ··· 889 875 if (wait) 890 876 xfs_trans_set_sync(tp); 891 877 return xfs_trans_commit(tp); 878 + } 879 + 880 + int 881 + xfs_fs_geometry( 882 + struct xfs_sb *sbp, 883 + struct xfs_fsop_geom *geo, 884 + int struct_version) 885 + { 886 + memset(geo, 0, sizeof(struct xfs_fsop_geom)); 887 + 888 + geo->blocksize = sbp->sb_blocksize; 889 + geo->rtextsize = sbp->sb_rextsize; 890 + geo->agblocks = sbp->sb_agblocks; 891 + geo->agcount = sbp->sb_agcount; 892 + geo->logblocks = sbp->sb_logblocks; 893 + geo->sectsize = sbp->sb_sectsize; 894 + geo->inodesize = sbp->sb_inodesize; 895 + geo->imaxpct = sbp->sb_imax_pct; 896 + geo->datablocks = sbp->sb_dblocks; 897 + geo->rtblocks = sbp->sb_rblocks; 898 + geo->rtextents = sbp->sb_rextents; 899 + geo->logstart = sbp->sb_logstart; 900 + BUILD_BUG_ON(sizeof(geo->uuid) != sizeof(sbp->sb_uuid)); 901 + memcpy(geo->uuid, &sbp->sb_uuid, sizeof(sbp->sb_uuid)); 902 + 903 + if (struct_version < 2) 904 + return 0; 905 + 906 + geo->sunit = sbp->sb_unit; 907 + geo->swidth = sbp->sb_width; 908 + 909 + if (struct_version < 3) 910 + return 0; 911 + 912 + geo->version = XFS_FSOP_GEOM_VERSION; 913 + geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK | 914 + XFS_FSOP_GEOM_FLAGS_DIRV2; 915 + if (xfs_sb_version_hasattr(sbp)) 916 + geo->flags |= XFS_FSOP_GEOM_FLAGS_ATTR; 917 + if (xfs_sb_version_hasquota(sbp)) 918 + geo->flags |= XFS_FSOP_GEOM_FLAGS_QUOTA; 919 + if (xfs_sb_version_hasalign(sbp)) 920 + geo->flags |= XFS_FSOP_GEOM_FLAGS_IALIGN; 921 + if (xfs_sb_version_hasdalign(sbp)) 922 + geo->flags |= XFS_FSOP_GEOM_FLAGS_DALIGN; 923 + if (xfs_sb_version_hasextflgbit(sbp)) 924 + geo->flags |= XFS_FSOP_GEOM_FLAGS_EXTFLG; 925 + if (xfs_sb_version_hassector(sbp)) 926 + geo->flags |= XFS_FSOP_GEOM_FLAGS_SECTOR; 927 + if (xfs_sb_version_hasasciici(sbp)) 928 + geo->flags |= XFS_FSOP_GEOM_FLAGS_DIRV2CI; 929 + if (xfs_sb_version_haslazysbcount(sbp)) 930 + geo->flags |= XFS_FSOP_GEOM_FLAGS_LAZYSB; 931 + if (xfs_sb_version_hasattr2(sbp)) 932 + geo->flags |= XFS_FSOP_GEOM_FLAGS_ATTR2; 933 + if (xfs_sb_version_hasprojid32bit(sbp)) 934 + geo->flags |= XFS_FSOP_GEOM_FLAGS_PROJID32; 935 + if (xfs_sb_version_hascrc(sbp)) 936 + geo->flags |= XFS_FSOP_GEOM_FLAGS_V5SB; 937 + if (xfs_sb_version_hasftype(sbp)) 938 + geo->flags |= XFS_FSOP_GEOM_FLAGS_FTYPE; 939 + if (xfs_sb_version_hasfinobt(sbp)) 940 + geo->flags |= XFS_FSOP_GEOM_FLAGS_FINOBT; 941 + if (xfs_sb_version_hassparseinodes(sbp)) 942 + geo->flags |= XFS_FSOP_GEOM_FLAGS_SPINODES; 943 + if (xfs_sb_version_hasrmapbt(sbp)) 944 + geo->flags |= XFS_FSOP_GEOM_FLAGS_RMAPBT; 945 + if (xfs_sb_version_hasreflink(sbp)) 946 + geo->flags |= XFS_FSOP_GEOM_FLAGS_REFLINK; 947 + if (xfs_sb_version_hassector(sbp)) 948 + geo->logsectsize = sbp->sb_logsectsize; 949 + else 950 + geo->logsectsize = BBSIZE; 951 + geo->rtsectsize = sbp->sb_blocksize; 952 + geo->dirblocksize = xfs_dir2_dirblock_bytes(sbp); 953 + 954 + if (struct_version < 4) 955 + return 0; 956 + 957 + if (xfs_sb_version_haslogv2(sbp)) 958 + geo->flags |= XFS_FSOP_GEOM_FLAGS_LOGV2; 959 + 960 + geo->logsunit = sbp->sb_logsunit; 961 + 962 + return 0; 892 963 }

+4

fs/xfs/libxfs/xfs_sb.h

··· 34 34 extern void xfs_sb_to_disk(struct xfs_dsb *to, struct xfs_sb *from); 35 35 extern void xfs_sb_quota_from_disk(struct xfs_sb *sbp); 36 36 37 + #define XFS_FS_GEOM_MAX_STRUCT_VER (4) 38 + extern int xfs_fs_geometry(struct xfs_sb *sbp, struct xfs_fsop_geom *geo, 39 + int struct_version); 40 + 37 41 #endif /* __XFS_SB_H__ */

+4

fs/xfs/libxfs/xfs_shared.h

··· 76 76 int xfs_log_calc_unit_res(struct xfs_mount *mp, int unit_bytes); 77 77 int xfs_log_calc_minimum_size(struct xfs_mount *); 78 78 79 + struct xfs_trans_res; 80 + void xfs_log_get_max_trans_res(struct xfs_mount *mp, 81 + struct xfs_trans_res *max_resp); 79 82 80 83 /* 81 84 * Values for t_flags. ··· 146 143 uint32_t size, struct xfs_buf *bp); 147 144 void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp, 148 145 struct xfs_inode *ip, struct xfs_ifork *ifp); 146 + xfs_failaddr_t xfs_symlink_shortform_verify(struct xfs_inode *ip); 149 147 150 148 #endif /* __XFS_SHARED_H__ */

+56 -19

fs/xfs/libxfs/xfs_symlink_remote.c

··· 98 98 return true; 99 99 } 100 100 101 - static bool 101 + static xfs_failaddr_t 102 102 xfs_symlink_verify( 103 103 struct xfs_buf *bp) 104 104 { ··· 106 106 struct xfs_dsymlink_hdr *dsl = bp->b_addr; 107 107 108 108 if (!xfs_sb_version_hascrc(&mp->m_sb)) 109 - return false; 109 + return __this_address; 110 110 if (dsl->sl_magic != cpu_to_be32(XFS_SYMLINK_MAGIC)) 111 - return false; 111 + return __this_address; 112 112 if (!uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_meta_uuid)) 113 - return false; 113 + return __this_address; 114 114 if (bp->b_bn != be64_to_cpu(dsl->sl_blkno)) 115 - return false; 115 + return __this_address; 116 116 if (be32_to_cpu(dsl->sl_offset) + 117 117 be32_to_cpu(dsl->sl_bytes) >= XFS_SYMLINK_MAXLEN) 118 - return false; 118 + return __this_address; 119 119 if (dsl->sl_owner == 0) 120 - return false; 120 + return __this_address; 121 121 if (!xfs_log_check_lsn(mp, be64_to_cpu(dsl->sl_lsn))) 122 - return false; 122 + return __this_address; 123 123 124 - return true; 124 + return NULL; 125 125 } 126 126 127 127 static void ··· 129 129 struct xfs_buf *bp) 130 130 { 131 131 struct xfs_mount *mp = bp->b_target->bt_mount; 132 + xfs_failaddr_t fa; 132 133 133 134 /* no verification of non-crc buffers */ 134 135 if (!xfs_sb_version_hascrc(&mp->m_sb)) 135 136 return; 136 137 137 138 if (!xfs_buf_verify_cksum(bp, XFS_SYMLINK_CRC_OFF)) 138 - xfs_buf_ioerror(bp, -EFSBADCRC); 139 - else if (!xfs_symlink_verify(bp)) 140 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 141 - 142 - if (bp->b_error) 143 - xfs_verifier_error(bp); 139 + xfs_verifier_error(bp, -EFSBADCRC, __this_address); 140 + else { 141 + fa = xfs_symlink_verify(bp); 142 + if (fa) 143 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 144 + } 144 145 } 145 146 146 147 static void ··· 149 148 struct xfs_buf *bp) 150 149 { 151 150 struct xfs_mount *mp = bp->b_target->bt_mount; 152 - struct xfs_buf_log_item *bip = bp->b_fspriv; 151 + struct xfs_buf_log_item *bip = bp->b_log_item; 152 + xfs_failaddr_t fa; 153 153 154 154 /* no verification of non-crc buffers */ 155 155 if (!xfs_sb_version_hascrc(&mp->m_sb)) 156 156 return; 157 157 158 - if (!xfs_symlink_verify(bp)) { 159 - xfs_buf_ioerror(bp, -EFSCORRUPTED); 160 - xfs_verifier_error(bp); 158 + fa = xfs_symlink_verify(bp); 159 + if (fa) { 160 + xfs_verifier_error(bp, -EFSCORRUPTED, fa); 161 161 return; 162 162 } 163 163 ··· 173 171 .name = "xfs_symlink", 174 172 .verify_read = xfs_symlink_read_verify, 175 173 .verify_write = xfs_symlink_write_verify, 174 + .verify_struct = xfs_symlink_verify, 176 175 }; 177 176 178 177 void ··· 209 206 memcpy(buf, ifp->if_u1.if_data, ifp->if_bytes); 210 207 xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsymlink_hdr) + 211 208 ifp->if_bytes - 1); 209 + } 210 + 211 + /* Verify the consistency of an inline symlink. */ 212 + xfs_failaddr_t 213 + xfs_symlink_shortform_verify( 214 + struct xfs_inode *ip) 215 + { 216 + char *sfp; 217 + char *endp; 218 + struct xfs_ifork *ifp; 219 + int size; 220 + 221 + ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL); 222 + ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); 223 + sfp = (char *)ifp->if_u1.if_data; 224 + size = ifp->if_bytes; 225 + endp = sfp + size; 226 + 227 + /* Zero length symlinks can exist while we're deleting a remote one. */ 228 + if (size == 0) 229 + return NULL; 230 + 231 + /* No negative sizes or overly long symlink targets. */ 232 + if (size < 0 || size > XFS_SYMLINK_MAXLEN) 233 + return __this_address; 234 + 235 + /* No NULLs in the target either. */ 236 + if (memchr(sfp, 0, size - 1)) 237 + return __this_address; 238 + 239 + /* We /did/ null-terminate the buffer, right? */ 240 + if (*endp != 0) 241 + return __this_address; 242 + return NULL; 212 243 }

+94 -107

fs/xfs/libxfs/xfs_trans_resv.c

··· 34 34 #include "xfs_trans_space.h" 35 35 #include "xfs_trace.h" 36 36 37 + #define _ALLOC true 38 + #define _FREE false 39 + 37 40 /* 38 41 * A buffer has a format structure overhead in the log in addition 39 42 * to the data, so we need to take this into account when reserving ··· 135 132 } 136 133 137 134 /* 138 - * The free inode btree is a conditional feature and the log reservation 139 - * requirements differ slightly from that of the traditional inode allocation 140 - * btree. The finobt tracks records for inode chunks with at least one free 141 - * inode. A record can be removed from the tree for an inode allocation 142 - * or free and thus the finobt reservation is unconditional across: 135 + * Inode btree record insertion/removal modifies the inode btree and free space 136 + * btrees (since the inobt does not use the agfl). This requires the following 137 + * reservation: 143 138 * 144 - * - inode allocation 145 - * - inode free 146 - * - inode chunk allocation 147 - * 148 - * The 'modify' param indicates to include the record modification scenario. The 149 - * 'alloc' param indicates to include the reservation for free space btree 150 - * modifications on behalf of finobt modifications. This is required only for 151 - * transactions that do not already account for free space btree modifications. 152 - * 153 - * the free inode btree: max depth * block size 139 + * the inode btree: max depth * blocksize 154 140 * the allocation btrees: 2 trees * (max depth - 1) * block size 155 - * the free inode btree entry: block size 141 + * 142 + * The caller must account for SB and AG header modifications, etc. 143 + */ 144 + STATIC uint 145 + xfs_calc_inobt_res( 146 + struct xfs_mount *mp) 147 + { 148 + return xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + 149 + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), 150 + XFS_FSB_TO_B(mp, 1)); 151 + } 152 + 153 + /* 154 + * The free inode btree is a conditional feature. The behavior differs slightly 155 + * from that of the traditional inode btree in that the finobt tracks records 156 + * for inode chunks with at least one free inode. A record can be removed from 157 + * the tree during individual inode allocation. Therefore the finobt 158 + * reservation is unconditional for both the inode chunk allocation and 159 + * individual inode allocation (modify) cases. 160 + * 161 + * Behavior aside, the reservation for finobt modification is equivalent to the 162 + * traditional inobt: cover a full finobt shape change plus block allocation. 156 163 */ 157 164 STATIC uint 158 165 xfs_calc_finobt_res( 159 - struct xfs_mount *mp, 160 - int alloc, 161 - int modify) 166 + struct xfs_mount *mp) 162 167 { 163 - uint res; 164 - 165 168 if (!xfs_sb_version_hasfinobt(&mp->m_sb)) 166 169 return 0; 167 170 168 - res = xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)); 169 - if (alloc) 170 - res += xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), 171 - XFS_FSB_TO_B(mp, 1)); 172 - if (modify) 173 - res += (uint)XFS_FSB_TO_B(mp, 1); 171 + return xfs_calc_inobt_res(mp); 172 + } 174 173 174 + /* 175 + * Calculate the reservation required to allocate or free an inode chunk. This 176 + * includes: 177 + * 178 + * the allocation btrees: 2 trees * (max depth - 1) * block size 179 + * the inode chunk: m_ialloc_blks * N 180 + * 181 + * The size N of the inode chunk reservation depends on whether it is for 182 + * allocation or free and which type of create transaction is in use. An inode 183 + * chunk free always invalidates the buffers and only requires reservation for 184 + * headers (N == 0). An inode chunk allocation requires a chunk sized 185 + * reservation on v4 and older superblocks to initialize the chunk. No chunk 186 + * reservation is required for allocation on v5 supers, which use ordered 187 + * buffers to initialize. 188 + */ 189 + STATIC uint 190 + xfs_calc_inode_chunk_res( 191 + struct xfs_mount *mp, 192 + bool alloc) 193 + { 194 + uint res, size = 0; 195 + 196 + res = xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), 197 + XFS_FSB_TO_B(mp, 1)); 198 + if (alloc) { 199 + /* icreate tx uses ordered buffers */ 200 + if (xfs_sb_version_hascrc(&mp->m_sb)) 201 + return res; 202 + size = XFS_FSB_TO_B(mp, 1); 203 + } 204 + 205 + res += xfs_calc_buf_res(mp->m_ialloc_blks, size); 175 206 return res; 176 207 } 177 208 ··· 269 232 * the super block to reflect the freed blocks: sector size 270 233 * worst case split in allocation btrees per extent assuming 4 extents: 271 234 * 4 exts * 2 trees * (2 * max depth - 1) * block size 272 - * the inode btree: max depth * blocksize 273 - * the allocation btrees: 2 trees * (max depth - 1) * block size 274 235 */ 275 236 STATIC uint 276 237 xfs_calc_itruncate_reservation( ··· 280 245 XFS_FSB_TO_B(mp, 1))), 281 246 (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + 282 247 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4), 283 - XFS_FSB_TO_B(mp, 1)) + 284 - xfs_calc_buf_res(5, 0) + 285 - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), 286 - XFS_FSB_TO_B(mp, 1)) + 287 - xfs_calc_buf_res(2 + mp->m_ialloc_blks + 288 - mp->m_in_maxlevels, 0))); 248 + XFS_FSB_TO_B(mp, 1)))); 289 249 } 290 250 291 251 /* ··· 312 282 * For removing an inode from unlinked list at first, we can modify: 313 283 * the agi hash list and counters: sector size 314 284 * the on disk inode before ours in the agi hash list: inode cluster size 285 + * the on disk inode in the agi hash list: inode cluster size 315 286 */ 316 287 STATIC uint 317 288 xfs_calc_iunlink_remove_reservation( 318 289 struct xfs_mount *mp) 319 290 { 320 291 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + 321 - max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size); 292 + 2 * max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size); 322 293 } 323 294 324 295 /* ··· 351 320 /* 352 321 * For adding an inode to unlinked list we can modify: 353 322 * the agi hash list: sector size 354 - * the unlinked inode: inode size 323 + * the on disk inode: inode cluster size 355 324 */ 356 325 STATIC uint 357 326 xfs_calc_iunlink_add_reservation(xfs_mount_t *mp) 358 327 { 359 328 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + 360 - xfs_calc_inode_res(mp, 1); 329 + max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size); 361 330 } 362 331 363 332 /* ··· 410 379 xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + 411 380 (uint)XFS_FSB_TO_B(mp, 1) + 412 381 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)) + 413 - xfs_calc_finobt_res(mp, 1, 1); 414 - } 415 - 416 - /* 417 - * For create we can allocate some inodes giving: 418 - * the agi and agf of the ag getting the new inodes: 2 * sectorsize 419 - * the superblock for the nlink flag: sector size 420 - * the inode blocks allocated: mp->m_ialloc_blks * blocksize 421 - * the inode btree: max depth * blocksize 422 - * the allocation btrees: 2 trees * (max depth - 1) * block size 423 - */ 424 - STATIC uint 425 - xfs_calc_create_resv_alloc( 426 - struct xfs_mount *mp) 427 - { 428 - return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + 429 - mp->m_sb.sb_sectsize + 430 - xfs_calc_buf_res(mp->m_ialloc_blks, XFS_FSB_TO_B(mp, 1)) + 431 - xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + 432 - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), 433 - XFS_FSB_TO_B(mp, 1)); 434 - } 435 - 436 - STATIC uint 437 - __xfs_calc_create_reservation( 438 - struct xfs_mount *mp) 439 - { 440 - return XFS_DQUOT_LOGRES(mp) + 441 - MAX(xfs_calc_create_resv_alloc(mp), 442 - xfs_calc_create_resv_modify(mp)); 382 + xfs_calc_finobt_res(mp); 443 383 } 444 384 445 385 /* 446 386 * For icreate we can allocate some inodes giving: 447 387 * the agi and agf of the ag getting the new inodes: 2 * sectorsize 448 388 * the superblock for the nlink flag: sector size 449 - * the inode btree: max depth * blocksize 450 - * the allocation btrees: 2 trees * (max depth - 1) * block size 451 - * the finobt (record insertion) 389 + * the inode chunk (allocation, optional init) 390 + * the inobt (record insertion) 391 + * the finobt (optional, record insertion) 452 392 */ 453 393 STATIC uint 454 394 xfs_calc_icreate_resv_alloc( ··· 427 425 { 428 426 return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + 429 427 mp->m_sb.sb_sectsize + 430 - xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + 431 - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), 432 - XFS_FSB_TO_B(mp, 1)) + 433 - xfs_calc_finobt_res(mp, 0, 0); 428 + xfs_calc_inode_chunk_res(mp, _ALLOC) + 429 + xfs_calc_inobt_res(mp) + 430 + xfs_calc_finobt_res(mp); 434 431 } 435 432 436 433 STATIC uint ··· 441 440 } 442 441 443 442 STATIC uint 444 - xfs_calc_create_reservation( 445 - struct xfs_mount *mp) 446 - { 447 - if (xfs_sb_version_hascrc(&mp->m_sb)) 448 - return xfs_calc_icreate_reservation(mp); 449 - return __xfs_calc_create_reservation(mp); 450 - 451 - } 452 - 453 - STATIC uint 454 443 xfs_calc_create_tmpfile_reservation( 455 444 struct xfs_mount *mp) 456 445 { 457 446 uint res = XFS_DQUOT_LOGRES(mp); 458 447 459 - if (xfs_sb_version_hascrc(&mp->m_sb)) 460 - res += xfs_calc_icreate_resv_alloc(mp); 461 - else 462 - res += xfs_calc_create_resv_alloc(mp); 463 - 448 + res += xfs_calc_icreate_resv_alloc(mp); 464 449 return res + xfs_calc_iunlink_add_reservation(mp); 465 450 } 466 451 ··· 457 470 xfs_calc_mkdir_reservation( 458 471 struct xfs_mount *mp) 459 472 { 460 - return xfs_calc_create_reservation(mp); 473 + return xfs_calc_icreate_reservation(mp); 461 474 } 462 475 463 476 ··· 470 483 xfs_calc_symlink_reservation( 471 484 struct xfs_mount *mp) 472 485 { 473 - return xfs_calc_create_reservation(mp) + 486 + return xfs_calc_icreate_reservation(mp) + 474 487 xfs_calc_buf_res(1, XFS_SYMLINK_MAXLEN); 475 488 } 476 489 477 490 /* 478 491 * In freeing an inode we can modify: 479 492 * the inode being freed: inode size 480 - * the super block free inode counter: sector size 481 - * the agi hash list and counters: sector size 482 - * the inode btree entry: block size 483 - * the on disk inode before ours in the agi hash list: inode cluster size 484 - * the inode btree: max depth * blocksize 485 - * the allocation btrees: 2 trees * (max depth - 1) * block size 493 + * the super block free inode counter, AGF and AGFL: sector size 494 + * the on disk inode (agi unlinked list removal) 495 + * the inode chunk (invalidated, headers only) 496 + * the inode btree 486 497 * the finobt (record insertion, removal or modification) 498 + * 499 + * Note that the inode chunk res. includes an allocfree res. for freeing of the 500 + * inode chunk. This is technically extraneous because the inode chunk free is 501 + * deferred (it occurs after a transaction roll). Include the extra reservation 502 + * anyways since we've had reports of ifree transaction overruns due to too many 503 + * agfl fixups during inode chunk frees. 487 504 */ 488 505 STATIC uint 489 506 xfs_calc_ifree_reservation( ··· 495 504 { 496 505 return XFS_DQUOT_LOGRES(mp) + 497 506 xfs_calc_inode_res(mp, 1) + 498 - xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + 499 - xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) + 507 + xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + 500 508 xfs_calc_iunlink_remove_reservation(mp) + 501 - xfs_calc_buf_res(1, 0) + 502 - xfs_calc_buf_res(2 + mp->m_ialloc_blks + 503 - mp->m_in_maxlevels, 0) + 504 - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), 505 - XFS_FSB_TO_B(mp, 1)) + 506 - xfs_calc_finobt_res(mp, 0, 1); 509 + xfs_calc_inode_chunk_res(mp, _FREE) + 510 + xfs_calc_inobt_res(mp) + 511 + xfs_calc_finobt_res(mp); 507 512 } 508 513 509 514 /* ··· 829 842 resp->tr_symlink.tr_logcount = XFS_SYMLINK_LOG_COUNT; 830 843 resp->tr_symlink.tr_logflags |= XFS_TRANS_PERM_LOG_RES; 831 844 832 - resp->tr_create.tr_logres = xfs_calc_create_reservation(mp); 845 + resp->tr_create.tr_logres = xfs_calc_icreate_reservation(mp); 833 846 resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT; 834 847 resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES; 835 848

+324 -16

fs/xfs/scrub/agheader.c

··· 32 32 #include "xfs_inode.h" 33 33 #include "xfs_alloc.h" 34 34 #include "xfs_ialloc.h" 35 + #include "xfs_rmap.h" 35 36 #include "scrub/xfs_scrub.h" 36 37 #include "scrub/scrub.h" 37 38 #include "scrub/common.h" 38 39 #include "scrub/trace.h" 39 40 40 41 /* 41 - * Set up scrub to check all the static metadata in each AG. 42 - * This means the SB, AGF, AGI, and AGFL headers. 42 + * Walk all the blocks in the AGFL. The fn function can return any negative 43 + * error code or XFS_BTREE_QUERY_RANGE_ABORT. 43 44 */ 44 - int 45 - xfs_scrub_setup_ag_header( 46 - struct xfs_scrub_context *sc, 47 - struct xfs_inode *ip) 48 - { 49 - struct xfs_mount *mp = sc->mp; 50 - 51 - if (sc->sm->sm_agno >= mp->m_sb.sb_agcount || 52 - sc->sm->sm_ino || sc->sm->sm_gen) 53 - return -EINVAL; 54 - return xfs_scrub_setup_fs(sc, ip); 55 - } 56 - 57 - /* Walk all the blocks in the AGFL. */ 58 45 int 59 46 xfs_scrub_walk_agfl( 60 47 struct xfs_scrub_context *sc, ··· 102 115 103 116 /* Superblock */ 104 117 118 + /* Cross-reference with the other btrees. */ 119 + STATIC void 120 + xfs_scrub_superblock_xref( 121 + struct xfs_scrub_context *sc, 122 + struct xfs_buf *bp) 123 + { 124 + struct xfs_owner_info oinfo; 125 + struct xfs_mount *mp = sc->mp; 126 + xfs_agnumber_t agno = sc->sm->sm_agno; 127 + xfs_agblock_t agbno; 128 + int error; 129 + 130 + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 131 + return; 132 + 133 + agbno = XFS_SB_BLOCK(mp); 134 + 135 + error = xfs_scrub_ag_init(sc, agno, &sc->sa); 136 + if (!xfs_scrub_xref_process_error(sc, agno, agbno, &error)) 137 + return; 138 + 139 + xfs_scrub_xref_is_used_space(sc, agbno, 1); 140 + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); 141 + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS); 142 + xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); 143 + xfs_scrub_xref_is_not_shared(sc, agbno, 1); 144 + 145 + /* scrub teardown will take care of sc->sa for us */ 146 + } 147 + 105 148 /* 106 149 * Scrub the filesystem superblock. 107 150 * ··· 160 143 error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp, 161 144 XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)), 162 145 XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_sb_buf_ops); 146 + /* 147 + * The superblock verifier can return several different error codes 148 + * if it thinks the superblock doesn't look right. For a mount these 149 + * would all get bounced back to userspace, but if we're here then the 150 + * fs mounted successfully, which means that this secondary superblock 151 + * is simply incorrect. Treat all these codes the same way we treat 152 + * any corruption. 153 + */ 154 + switch (error) { 155 + case -EINVAL: /* also -EWRONGFS */ 156 + case -ENOSYS: 157 + case -EFBIG: 158 + error = -EFSCORRUPTED; 159 + default: 160 + break; 161 + } 163 162 if (!xfs_scrub_process_error(sc, agno, XFS_SB_BLOCK(mp), &error)) 164 163 return error; 165 164 ··· 420 387 BBTOB(bp->b_length) - sizeof(struct xfs_dsb))) 421 388 xfs_scrub_block_set_corrupt(sc, bp); 422 389 390 + xfs_scrub_superblock_xref(sc, bp); 391 + 423 392 return error; 424 393 } 425 394 426 395 /* AGF */ 396 + 397 + /* Tally freespace record lengths. */ 398 + STATIC int 399 + xfs_scrub_agf_record_bno_lengths( 400 + struct xfs_btree_cur *cur, 401 + struct xfs_alloc_rec_incore *rec, 402 + void *priv) 403 + { 404 + xfs_extlen_t *blocks = priv; 405 + 406 + (*blocks) += rec->ar_blockcount; 407 + return 0; 408 + } 409 + 410 + /* Check agf_freeblks */ 411 + static inline void 412 + xfs_scrub_agf_xref_freeblks( 413 + struct xfs_scrub_context *sc) 414 + { 415 + struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); 416 + xfs_extlen_t blocks = 0; 417 + int error; 418 + 419 + if (!sc->sa.bno_cur) 420 + return; 421 + 422 + error = xfs_alloc_query_all(sc->sa.bno_cur, 423 + xfs_scrub_agf_record_bno_lengths, &blocks); 424 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.bno_cur)) 425 + return; 426 + if (blocks != be32_to_cpu(agf->agf_freeblks)) 427 + xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); 428 + } 429 + 430 + /* Cross reference the AGF with the cntbt (freespace by length btree) */ 431 + static inline void 432 + xfs_scrub_agf_xref_cntbt( 433 + struct xfs_scrub_context *sc) 434 + { 435 + struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); 436 + xfs_agblock_t agbno; 437 + xfs_extlen_t blocks; 438 + int have; 439 + int error; 440 + 441 + if (!sc->sa.cnt_cur) 442 + return; 443 + 444 + /* Any freespace at all? */ 445 + error = xfs_alloc_lookup_le(sc->sa.cnt_cur, 0, -1U, &have); 446 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.cnt_cur)) 447 + return; 448 + if (!have) { 449 + if (agf->agf_freeblks != be32_to_cpu(0)) 450 + xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); 451 + return; 452 + } 453 + 454 + /* Check agf_longest */ 455 + error = xfs_alloc_get_rec(sc->sa.cnt_cur, &agbno, &blocks, &have); 456 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.cnt_cur)) 457 + return; 458 + if (!have || blocks != be32_to_cpu(agf->agf_longest)) 459 + xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); 460 + } 461 + 462 + /* Check the btree block counts in the AGF against the btrees. */ 463 + STATIC void 464 + xfs_scrub_agf_xref_btreeblks( 465 + struct xfs_scrub_context *sc) 466 + { 467 + struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); 468 + struct xfs_mount *mp = sc->mp; 469 + xfs_agblock_t blocks; 470 + xfs_agblock_t btreeblks; 471 + int error; 472 + 473 + /* Check agf_rmap_blocks; set up for agf_btreeblks check */ 474 + if (sc->sa.rmap_cur) { 475 + error = xfs_btree_count_blocks(sc->sa.rmap_cur, &blocks); 476 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) 477 + return; 478 + btreeblks = blocks - 1; 479 + if (blocks != be32_to_cpu(agf->agf_rmap_blocks)) 480 + xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); 481 + } else { 482 + btreeblks = 0; 483 + } 484 + 485 + /* 486 + * No rmap cursor; we can't xref if we have the rmapbt feature. 487 + * We also can't do it if we're missing the free space btree cursors. 488 + */ 489 + if ((xfs_sb_version_hasrmapbt(&mp->m_sb) && !sc->sa.rmap_cur) || 490 + !sc->sa.bno_cur || !sc->sa.cnt_cur) 491 + return; 492 + 493 + /* Check agf_btreeblks */ 494 + error = xfs_btree_count_blocks(sc->sa.bno_cur, &blocks); 495 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.bno_cur)) 496 + return; 497 + btreeblks += blocks - 1; 498 + 499 + error = xfs_btree_count_blocks(sc->sa.cnt_cur, &blocks); 500 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.cnt_cur)) 501 + return; 502 + btreeblks += blocks - 1; 503 + 504 + if (btreeblks != be32_to_cpu(agf->agf_btreeblks)) 505 + xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); 506 + } 507 + 508 + /* Check agf_refcount_blocks against tree size */ 509 + static inline void 510 + xfs_scrub_agf_xref_refcblks( 511 + struct xfs_scrub_context *sc) 512 + { 513 + struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); 514 + xfs_agblock_t blocks; 515 + int error; 516 + 517 + if (!sc->sa.refc_cur) 518 + return; 519 + 520 + error = xfs_btree_count_blocks(sc->sa.refc_cur, &blocks); 521 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur)) 522 + return; 523 + if (blocks != be32_to_cpu(agf->agf_refcount_blocks)) 524 + xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); 525 + } 526 + 527 + /* Cross-reference with the other btrees. */ 528 + STATIC void 529 + xfs_scrub_agf_xref( 530 + struct xfs_scrub_context *sc) 531 + { 532 + struct xfs_owner_info oinfo; 533 + struct xfs_mount *mp = sc->mp; 534 + xfs_agblock_t agbno; 535 + int error; 536 + 537 + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 538 + return; 539 + 540 + agbno = XFS_AGF_BLOCK(mp); 541 + 542 + error = xfs_scrub_ag_btcur_init(sc, &sc->sa); 543 + if (error) 544 + return; 545 + 546 + xfs_scrub_xref_is_used_space(sc, agbno, 1); 547 + xfs_scrub_agf_xref_freeblks(sc); 548 + xfs_scrub_agf_xref_cntbt(sc); 549 + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); 550 + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS); 551 + xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); 552 + xfs_scrub_agf_xref_btreeblks(sc); 553 + xfs_scrub_xref_is_not_shared(sc, agbno, 1); 554 + xfs_scrub_agf_xref_refcblks(sc); 555 + 556 + /* scrub teardown will take care of sc->sa for us */ 557 + } 427 558 428 559 /* Scrub the AGF. */ 429 560 int ··· 611 414 &sc->sa.agf_bp, &sc->sa.agfl_bp); 612 415 if (!xfs_scrub_process_error(sc, agno, XFS_AGF_BLOCK(sc->mp), &error)) 613 416 goto out; 417 + xfs_scrub_buffer_recheck(sc, sc->sa.agf_bp); 614 418 615 419 agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); 616 420 ··· 668 470 if (agfl_count != 0 && fl_count != agfl_count) 669 471 xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); 670 472 473 + xfs_scrub_agf_xref(sc); 671 474 out: 672 475 return error; 673 476 } ··· 676 477 /* AGFL */ 677 478 678 479 struct xfs_scrub_agfl_info { 480 + struct xfs_owner_info oinfo; 679 481 unsigned int sz_entries; 680 482 unsigned int nr_entries; 681 483 xfs_agblock_t *entries; 682 484 }; 485 + 486 + /* Cross-reference with the other btrees. */ 487 + STATIC void 488 + xfs_scrub_agfl_block_xref( 489 + struct xfs_scrub_context *sc, 490 + xfs_agblock_t agbno, 491 + struct xfs_owner_info *oinfo) 492 + { 493 + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 494 + return; 495 + 496 + xfs_scrub_xref_is_used_space(sc, agbno, 1); 497 + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); 498 + xfs_scrub_xref_is_owned_by(sc, agbno, 1, oinfo); 499 + xfs_scrub_xref_is_not_shared(sc, agbno, 1); 500 + } 683 501 684 502 /* Scrub an AGFL block. */ 685 503 STATIC int ··· 715 499 else 716 500 xfs_scrub_block_set_corrupt(sc, sc->sa.agfl_bp); 717 501 502 + xfs_scrub_agfl_block_xref(sc, agbno, priv); 503 + 718 504 return 0; 719 505 } 720 506 ··· 729 511 const xfs_agblock_t *b = pb; 730 512 731 513 return (int)*a - (int)*b; 514 + } 515 + 516 + /* Cross-reference with the other btrees. */ 517 + STATIC void 518 + xfs_scrub_agfl_xref( 519 + struct xfs_scrub_context *sc) 520 + { 521 + struct xfs_owner_info oinfo; 522 + struct xfs_mount *mp = sc->mp; 523 + xfs_agblock_t agbno; 524 + int error; 525 + 526 + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 527 + return; 528 + 529 + agbno = XFS_AGFL_BLOCK(mp); 530 + 531 + error = xfs_scrub_ag_btcur_init(sc, &sc->sa); 532 + if (error) 533 + return; 534 + 535 + xfs_scrub_xref_is_used_space(sc, agbno, 1); 536 + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); 537 + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS); 538 + xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); 539 + xfs_scrub_xref_is_not_shared(sc, agbno, 1); 540 + 541 + /* 542 + * Scrub teardown will take care of sc->sa for us. Leave sc->sa 543 + * active so that the agfl block xref can use it too. 544 + */ 732 545 } 733 546 734 547 /* Scrub the AGFL. */ ··· 781 532 goto out; 782 533 if (!sc->sa.agf_bp) 783 534 return -EFSCORRUPTED; 535 + xfs_scrub_buffer_recheck(sc, sc->sa.agfl_bp); 536 + 537 + xfs_scrub_agfl_xref(sc); 538 + 539 + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 540 + goto out; 784 541 785 542 /* Allocate buffer to ensure uniqueness of AGFL entries. */ 786 543 agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); ··· 803 548 } 804 549 805 550 /* Check the blocks in the AGFL. */ 551 + xfs_rmap_ag_owner(&sai.oinfo, XFS_RMAP_OWN_AG); 806 552 error = xfs_scrub_walk_agfl(sc, xfs_scrub_agfl_block, &sai); 807 553 if (error) 808 554 goto out_free; ··· 831 575 832 576 /* AGI */ 833 577 578 + /* Check agi_count/agi_freecount */ 579 + static inline void 580 + xfs_scrub_agi_xref_icounts( 581 + struct xfs_scrub_context *sc) 582 + { 583 + struct xfs_agi *agi = XFS_BUF_TO_AGI(sc->sa.agi_bp); 584 + xfs_agino_t icount; 585 + xfs_agino_t freecount; 586 + int error; 587 + 588 + if (!sc->sa.ino_cur) 589 + return; 590 + 591 + error = xfs_ialloc_count_inodes(sc->sa.ino_cur, &icount, &freecount); 592 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.ino_cur)) 593 + return; 594 + if (be32_to_cpu(agi->agi_count) != icount || 595 + be32_to_cpu(agi->agi_freecount) != freecount) 596 + xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agi_bp); 597 + } 598 + 599 + /* Cross-reference with the other btrees. */ 600 + STATIC void 601 + xfs_scrub_agi_xref( 602 + struct xfs_scrub_context *sc) 603 + { 604 + struct xfs_owner_info oinfo; 605 + struct xfs_mount *mp = sc->mp; 606 + xfs_agblock_t agbno; 607 + int error; 608 + 609 + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 610 + return; 611 + 612 + agbno = XFS_AGI_BLOCK(mp); 613 + 614 + error = xfs_scrub_ag_btcur_init(sc, &sc->sa); 615 + if (error) 616 + return; 617 + 618 + xfs_scrub_xref_is_used_space(sc, agbno, 1); 619 + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); 620 + xfs_scrub_agi_xref_icounts(sc); 621 + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS); 622 + xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); 623 + xfs_scrub_xref_is_not_shared(sc, agbno, 1); 624 + 625 + /* scrub teardown will take care of sc->sa for us */ 626 + } 627 + 834 628 /* Scrub the AGI. */ 835 629 int 836 630 xfs_scrub_agi( ··· 904 598 &sc->sa.agf_bp, &sc->sa.agfl_bp); 905 599 if (!xfs_scrub_process_error(sc, agno, XFS_AGI_BLOCK(sc->mp), &error)) 906 600 goto out; 601 + xfs_scrub_buffer_recheck(sc, sc->sa.agi_bp); 907 602 908 603 agi = XFS_BUF_TO_AGI(sc->sa.agi_bp); 909 604 ··· 960 653 if (agi->agi_pad32 != cpu_to_be32(0)) 961 654 xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp); 962 655 656 + xfs_scrub_agi_xref(sc); 963 657 out: 964 658 return error; 965 659 }

+81

fs/xfs/scrub/alloc.c

··· 31 31 #include "xfs_sb.h" 32 32 #include "xfs_alloc.h" 33 33 #include "xfs_rmap.h" 34 + #include "xfs_alloc.h" 34 35 #include "scrub/xfs_scrub.h" 35 36 #include "scrub/scrub.h" 36 37 #include "scrub/common.h" ··· 50 49 } 51 50 52 51 /* Free space btree scrubber. */ 52 + /* 53 + * Ensure there's a corresponding cntbt/bnobt record matching this 54 + * bnobt/cntbt record, respectively. 55 + */ 56 + STATIC void 57 + xfs_scrub_allocbt_xref_other( 58 + struct xfs_scrub_context *sc, 59 + xfs_agblock_t agbno, 60 + xfs_extlen_t len) 61 + { 62 + struct xfs_btree_cur **pcur; 63 + xfs_agblock_t fbno; 64 + xfs_extlen_t flen; 65 + int has_otherrec; 66 + int error; 67 + 68 + if (sc->sm->sm_type == XFS_SCRUB_TYPE_BNOBT) 69 + pcur = &sc->sa.cnt_cur; 70 + else 71 + pcur = &sc->sa.bno_cur; 72 + if (!*pcur) 73 + return; 74 + 75 + error = xfs_alloc_lookup_le(*pcur, agbno, len, &has_otherrec); 76 + if (!xfs_scrub_should_check_xref(sc, &error, pcur)) 77 + return; 78 + if (!has_otherrec) { 79 + xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0); 80 + return; 81 + } 82 + 83 + error = xfs_alloc_get_rec(*pcur, &fbno, &flen, &has_otherrec); 84 + if (!xfs_scrub_should_check_xref(sc, &error, pcur)) 85 + return; 86 + if (!has_otherrec) { 87 + xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0); 88 + return; 89 + } 90 + 91 + if (fbno != agbno || flen != len) 92 + xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0); 93 + } 94 + 95 + /* Cross-reference with the other btrees. */ 96 + STATIC void 97 + xfs_scrub_allocbt_xref( 98 + struct xfs_scrub_context *sc, 99 + xfs_agblock_t agbno, 100 + xfs_extlen_t len) 101 + { 102 + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 103 + return; 104 + 105 + xfs_scrub_allocbt_xref_other(sc, agbno, len); 106 + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len); 107 + xfs_scrub_xref_has_no_owner(sc, agbno, len); 108 + xfs_scrub_xref_is_not_shared(sc, agbno, len); 109 + } 53 110 54 111 /* Scrub a bnobt/cntbt record. */ 55 112 STATIC int ··· 128 69 !xfs_verify_agbno(mp, agno, bno) || 129 70 !xfs_verify_agbno(mp, agno, bno + len - 1)) 130 71 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 72 + 73 + xfs_scrub_allocbt_xref(bs->sc, bno, len); 131 74 132 75 return error; 133 76 } ··· 160 99 struct xfs_scrub_context *sc) 161 100 { 162 101 return xfs_scrub_allocbt(sc, XFS_BTNUM_CNT); 102 + } 103 + 104 + /* xref check that the extent is not free */ 105 + void 106 + xfs_scrub_xref_is_used_space( 107 + struct xfs_scrub_context *sc, 108 + xfs_agblock_t agbno, 109 + xfs_extlen_t len) 110 + { 111 + bool is_freesp; 112 + int error; 113 + 114 + if (!sc->sa.bno_cur) 115 + return; 116 + 117 + error = xfs_alloc_has_record(sc->sa.bno_cur, agbno, len, &is_freesp); 118 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.bno_cur)) 119 + return; 120 + if (is_freesp) 121 + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.bno_cur, 0); 163 122 }

+211 -8

fs/xfs/scrub/bmap.c

··· 37 37 #include "xfs_bmap_util.h" 38 38 #include "xfs_bmap_btree.h" 39 39 #include "xfs_rmap.h" 40 + #include "xfs_refcount.h" 40 41 #include "scrub/xfs_scrub.h" 41 42 #include "scrub/scrub.h" 42 43 #include "scrub/common.h" ··· 100 99 int whichfork; 101 100 }; 102 101 102 + /* Look for a corresponding rmap for this irec. */ 103 + static inline bool 104 + xfs_scrub_bmap_get_rmap( 105 + struct xfs_scrub_bmap_info *info, 106 + struct xfs_bmbt_irec *irec, 107 + xfs_agblock_t agbno, 108 + uint64_t owner, 109 + struct xfs_rmap_irec *rmap) 110 + { 111 + xfs_fileoff_t offset; 112 + unsigned int rflags = 0; 113 + int has_rmap; 114 + int error; 115 + 116 + if (info->whichfork == XFS_ATTR_FORK) 117 + rflags |= XFS_RMAP_ATTR_FORK; 118 + 119 + /* 120 + * CoW staging extents are owned (on disk) by the refcountbt, so 121 + * their rmaps do not have offsets. 122 + */ 123 + if (info->whichfork == XFS_COW_FORK) 124 + offset = 0; 125 + else 126 + offset = irec->br_startoff; 127 + 128 + /* 129 + * If the caller thinks this could be a shared bmbt extent (IOWs, 130 + * any data fork extent of a reflink inode) then we have to use the 131 + * range rmap lookup to make sure we get the correct owner/offset. 132 + */ 133 + if (info->is_shared) { 134 + error = xfs_rmap_lookup_le_range(info->sc->sa.rmap_cur, agbno, 135 + owner, offset, rflags, rmap, &has_rmap); 136 + if (!xfs_scrub_should_check_xref(info->sc, &error, 137 + &info->sc->sa.rmap_cur)) 138 + return false; 139 + goto out; 140 + } 141 + 142 + /* 143 + * Otherwise, use the (faster) regular lookup. 144 + */ 145 + error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno, 0, owner, 146 + offset, rflags, &has_rmap); 147 + if (!xfs_scrub_should_check_xref(info->sc, &error, 148 + &info->sc->sa.rmap_cur)) 149 + return false; 150 + if (!has_rmap) 151 + goto out; 152 + 153 + error = xfs_rmap_get_rec(info->sc->sa.rmap_cur, rmap, &has_rmap); 154 + if (!xfs_scrub_should_check_xref(info->sc, &error, 155 + &info->sc->sa.rmap_cur)) 156 + return false; 157 + 158 + out: 159 + if (!has_rmap) 160 + xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork, 161 + irec->br_startoff); 162 + return has_rmap; 163 + } 164 + 165 + /* Make sure that we have rmapbt records for this extent. */ 166 + STATIC void 167 + xfs_scrub_bmap_xref_rmap( 168 + struct xfs_scrub_bmap_info *info, 169 + struct xfs_bmbt_irec *irec, 170 + xfs_agblock_t agbno) 171 + { 172 + struct xfs_rmap_irec rmap; 173 + unsigned long long rmap_end; 174 + uint64_t owner; 175 + 176 + if (!info->sc->sa.rmap_cur) 177 + return; 178 + 179 + if (info->whichfork == XFS_COW_FORK) 180 + owner = XFS_RMAP_OWN_COW; 181 + else 182 + owner = info->sc->ip->i_ino; 183 + 184 + /* Find the rmap record for this irec. */ 185 + if (!xfs_scrub_bmap_get_rmap(info, irec, agbno, owner, &rmap)) 186 + return; 187 + 188 + /* Check the rmap. */ 189 + rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount; 190 + if (rmap.rm_startblock > agbno || 191 + agbno + irec->br_blockcount > rmap_end) 192 + xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork, 193 + irec->br_startoff); 194 + 195 + /* 196 + * Check the logical offsets if applicable. CoW staging extents 197 + * don't track logical offsets since the mappings only exist in 198 + * memory. 199 + */ 200 + if (info->whichfork != XFS_COW_FORK) { 201 + rmap_end = (unsigned long long)rmap.rm_offset + 202 + rmap.rm_blockcount; 203 + if (rmap.rm_offset > irec->br_startoff || 204 + irec->br_startoff + irec->br_blockcount > rmap_end) 205 + xfs_scrub_fblock_xref_set_corrupt(info->sc, 206 + info->whichfork, irec->br_startoff); 207 + } 208 + 209 + if (rmap.rm_owner != owner) 210 + xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork, 211 + irec->br_startoff); 212 + 213 + /* 214 + * Check for discrepancies between the unwritten flag in the irec and 215 + * the rmap. Note that the (in-memory) CoW fork distinguishes between 216 + * unwritten and written extents, but we don't track that in the rmap 217 + * records because the blocks are owned (on-disk) by the refcountbt, 218 + * which doesn't track unwritten state. 219 + */ 220 + if (owner != XFS_RMAP_OWN_COW && 221 + irec->br_state == XFS_EXT_UNWRITTEN && 222 + !(rmap.rm_flags & XFS_RMAP_UNWRITTEN)) 223 + xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork, 224 + irec->br_startoff); 225 + 226 + if (info->whichfork == XFS_ATTR_FORK && 227 + !(rmap.rm_flags & XFS_RMAP_ATTR_FORK)) 228 + xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork, 229 + irec->br_startoff); 230 + if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK) 231 + xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork, 232 + irec->br_startoff); 233 + } 234 + 235 + /* Cross-reference a single rtdev extent record. */ 236 + STATIC void 237 + xfs_scrub_bmap_rt_extent_xref( 238 + struct xfs_scrub_bmap_info *info, 239 + struct xfs_inode *ip, 240 + struct xfs_btree_cur *cur, 241 + struct xfs_bmbt_irec *irec) 242 + { 243 + if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 244 + return; 245 + 246 + xfs_scrub_xref_is_used_rt_space(info->sc, irec->br_startblock, 247 + irec->br_blockcount); 248 + } 249 + 250 + /* Cross-reference a single datadev extent record. */ 251 + STATIC void 252 + xfs_scrub_bmap_extent_xref( 253 + struct xfs_scrub_bmap_info *info, 254 + struct xfs_inode *ip, 255 + struct xfs_btree_cur *cur, 256 + struct xfs_bmbt_irec *irec) 257 + { 258 + struct xfs_mount *mp = info->sc->mp; 259 + xfs_agnumber_t agno; 260 + xfs_agblock_t agbno; 261 + xfs_extlen_t len; 262 + int error; 263 + 264 + if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 265 + return; 266 + 267 + agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock); 268 + agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock); 269 + len = irec->br_blockcount; 270 + 271 + error = xfs_scrub_ag_init(info->sc, agno, &info->sc->sa); 272 + if (!xfs_scrub_fblock_process_error(info->sc, info->whichfork, 273 + irec->br_startoff, &error)) 274 + return; 275 + 276 + xfs_scrub_xref_is_used_space(info->sc, agbno, len); 277 + xfs_scrub_xref_is_not_inode_chunk(info->sc, agbno, len); 278 + xfs_scrub_bmap_xref_rmap(info, irec, agbno); 279 + switch (info->whichfork) { 280 + case XFS_DATA_FORK: 281 + if (xfs_is_reflink_inode(info->sc->ip)) 282 + break; 283 + /* fall through */ 284 + case XFS_ATTR_FORK: 285 + xfs_scrub_xref_is_not_shared(info->sc, agbno, 286 + irec->br_blockcount); 287 + break; 288 + case XFS_COW_FORK: 289 + xfs_scrub_xref_is_cow_staging(info->sc, agbno, 290 + irec->br_blockcount); 291 + break; 292 + } 293 + 294 + xfs_scrub_ag_free(info->sc, &info->sc->sa); 295 + } 296 + 103 297 /* Scrub a single extent record. */ 104 298 STATIC int 105 299 xfs_scrub_bmap_extent( ··· 305 109 { 306 110 struct xfs_mount *mp = info->sc->mp; 307 111 struct xfs_buf *bp = NULL; 112 + xfs_filblks_t end; 308 113 int error = 0; 309 114 310 115 if (cur) ··· 333 136 irec->br_startoff); 334 137 335 138 /* Make sure the extent points to a valid place. */ 139 + if (irec->br_blockcount > MAXEXTLEN) 140 + xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, 141 + irec->br_startoff); 336 142 if (irec->br_startblock + irec->br_blockcount <= irec->br_startblock) 337 143 xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, 338 144 irec->br_startoff); 145 + end = irec->br_startblock + irec->br_blockcount - 1; 339 146 if (info->is_rt && 340 147 (!xfs_verify_rtbno(mp, irec->br_startblock) || 341 - !xfs_verify_rtbno(mp, irec->br_startblock + 342 - irec->br_blockcount - 1))) 148 + !xfs_verify_rtbno(mp, end))) 343 149 xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, 344 150 irec->br_startoff); 345 151 if (!info->is_rt && 346 152 (!xfs_verify_fsbno(mp, irec->br_startblock) || 347 - !xfs_verify_fsbno(mp, irec->br_startblock + 348 - irec->br_blockcount - 1))) 153 + !xfs_verify_fsbno(mp, end) || 154 + XFS_FSB_TO_AGNO(mp, irec->br_startblock) != 155 + XFS_FSB_TO_AGNO(mp, end))) 349 156 xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, 350 157 irec->br_startoff); 351 158 ··· 358 157 info->whichfork == XFS_ATTR_FORK) 359 158 xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, 360 159 irec->br_startoff); 160 + 161 + if (info->is_rt) 162 + xfs_scrub_bmap_rt_extent_xref(info, ip, cur, irec); 163 + else 164 + xfs_scrub_bmap_extent_xref(info, ip, cur, irec); 361 165 362 166 info->lastoff = irec->br_startoff + irec->br_blockcount; 363 167 return error; ··· 441 235 struct xfs_ifork *ifp; 442 236 xfs_fileoff_t endoff; 443 237 struct xfs_iext_cursor icur; 444 - bool found; 445 238 int error = 0; 446 239 447 240 ifp = XFS_IFORK_PTR(ip, whichfork); ··· 519 314 /* Scrub extent records. */ 520 315 info.lastoff = 0; 521 316 ifp = XFS_IFORK_PTR(ip, whichfork); 522 - for (found = xfs_iext_lookup_extent(ip, ifp, 0, &icur, &irec); 523 - found != 0; 524 - found = xfs_iext_next_extent(ifp, &icur, &irec)) { 317 + for_each_xfs_iext(ifp, &icur, &irec) { 525 318 if (xfs_scrub_should_terminate(sc, &error)) 526 319 break; 527 320 if (isnullstartblock(irec.br_startblock))

+173 -13

fs/xfs/scrub/btree.c

··· 42 42 * Check for btree operation errors. See the section about handling 43 43 * operational errors in common.c. 44 44 */ 45 - bool 46 - xfs_scrub_btree_process_error( 45 + static bool 46 + __xfs_scrub_btree_process_error( 47 47 struct xfs_scrub_context *sc, 48 48 struct xfs_btree_cur *cur, 49 49 int level, 50 - int *error) 50 + int *error, 51 + __u32 errflag, 52 + void *ret_ip) 51 53 { 52 54 if (*error == 0) 53 55 return true; ··· 62 60 case -EFSBADCRC: 63 61 case -EFSCORRUPTED: 64 62 /* Note the badness but don't abort. */ 65 - sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 63 + sc->sm->sm_flags |= errflag; 66 64 *error = 0; 67 65 /* fall through */ 68 66 default: 69 67 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) 70 68 trace_xfs_scrub_ifork_btree_op_error(sc, cur, level, 71 - *error, __return_address); 69 + *error, ret_ip); 72 70 else 73 71 trace_xfs_scrub_btree_op_error(sc, cur, level, 74 - *error, __return_address); 72 + *error, ret_ip); 75 73 break; 76 74 } 77 75 return false; 78 76 } 79 77 78 + bool 79 + xfs_scrub_btree_process_error( 80 + struct xfs_scrub_context *sc, 81 + struct xfs_btree_cur *cur, 82 + int level, 83 + int *error) 84 + { 85 + return __xfs_scrub_btree_process_error(sc, cur, level, error, 86 + XFS_SCRUB_OFLAG_CORRUPT, __return_address); 87 + } 88 + 89 + bool 90 + xfs_scrub_btree_xref_process_error( 91 + struct xfs_scrub_context *sc, 92 + struct xfs_btree_cur *cur, 93 + int level, 94 + int *error) 95 + { 96 + return __xfs_scrub_btree_process_error(sc, cur, level, error, 97 + XFS_SCRUB_OFLAG_XFAIL, __return_address); 98 + } 99 + 80 100 /* Record btree block corruption. */ 101 + static void 102 + __xfs_scrub_btree_set_corrupt( 103 + struct xfs_scrub_context *sc, 104 + struct xfs_btree_cur *cur, 105 + int level, 106 + __u32 errflag, 107 + void *ret_ip) 108 + { 109 + sc->sm->sm_flags |= errflag; 110 + 111 + if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) 112 + trace_xfs_scrub_ifork_btree_error(sc, cur, level, 113 + ret_ip); 114 + else 115 + trace_xfs_scrub_btree_error(sc, cur, level, 116 + ret_ip); 117 + } 118 + 81 119 void 82 120 xfs_scrub_btree_set_corrupt( 83 121 struct xfs_scrub_context *sc, 84 122 struct xfs_btree_cur *cur, 85 123 int level) 86 124 { 87 - sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 125 + __xfs_scrub_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_CORRUPT, 126 + __return_address); 127 + } 88 128 89 - if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) 90 - trace_xfs_scrub_ifork_btree_error(sc, cur, level, 91 - __return_address); 92 - else 93 - trace_xfs_scrub_btree_error(sc, cur, level, 94 - __return_address); 129 + void 130 + xfs_scrub_btree_xref_set_corrupt( 131 + struct xfs_scrub_context *sc, 132 + struct xfs_btree_cur *cur, 133 + int level) 134 + { 135 + __xfs_scrub_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_XCORRUPT, 136 + __return_address); 95 137 } 96 138 97 139 /* ··· 314 268 pp = xfs_btree_ptr_addr(ncur, ncur->bc_ptrs[level + 1], pblock); 315 269 if (!xfs_scrub_btree_ptr_ok(bs, level + 1, pp)) 316 270 goto out; 271 + if (pbp) 272 + xfs_scrub_buffer_recheck(bs->sc, pbp); 317 273 318 274 if (xfs_btree_diff_two_ptrs(cur, pp, sibling)) 319 275 xfs_scrub_btree_set_corrupt(bs->sc, cur, level); ··· 363 315 return error; 364 316 } 365 317 318 + struct check_owner { 319 + struct list_head list; 320 + xfs_daddr_t daddr; 321 + int level; 322 + }; 323 + 324 + /* 325 + * Make sure this btree block isn't in the free list and that there's 326 + * an rmap record for it. 327 + */ 328 + STATIC int 329 + xfs_scrub_btree_check_block_owner( 330 + struct xfs_scrub_btree *bs, 331 + int level, 332 + xfs_daddr_t daddr) 333 + { 334 + xfs_agnumber_t agno; 335 + xfs_agblock_t agbno; 336 + xfs_btnum_t btnum; 337 + bool init_sa; 338 + int error = 0; 339 + 340 + if (!bs->cur) 341 + return 0; 342 + 343 + btnum = bs->cur->bc_btnum; 344 + agno = xfs_daddr_to_agno(bs->cur->bc_mp, daddr); 345 + agbno = xfs_daddr_to_agbno(bs->cur->bc_mp, daddr); 346 + 347 + init_sa = bs->cur->bc_flags & XFS_BTREE_LONG_PTRS; 348 + if (init_sa) { 349 + error = xfs_scrub_ag_init(bs->sc, agno, &bs->sc->sa); 350 + if (!xfs_scrub_btree_xref_process_error(bs->sc, bs->cur, 351 + level, &error)) 352 + return error; 353 + } 354 + 355 + xfs_scrub_xref_is_used_space(bs->sc, agbno, 1); 356 + /* 357 + * The bnobt scrubber aliases bs->cur to bs->sc->sa.bno_cur, so we 358 + * have to nullify it (to shut down further block owner checks) if 359 + * self-xref encounters problems. 360 + */ 361 + if (!bs->sc->sa.bno_cur && btnum == XFS_BTNUM_BNO) 362 + bs->cur = NULL; 363 + 364 + xfs_scrub_xref_is_owned_by(bs->sc, agbno, 1, bs->oinfo); 365 + if (!bs->sc->sa.rmap_cur && btnum == XFS_BTNUM_RMAP) 366 + bs->cur = NULL; 367 + 368 + if (init_sa) 369 + xfs_scrub_ag_free(bs->sc, &bs->sc->sa); 370 + 371 + return error; 372 + } 373 + 374 + /* Check the owner of a btree block. */ 375 + STATIC int 376 + xfs_scrub_btree_check_owner( 377 + struct xfs_scrub_btree *bs, 378 + int level, 379 + struct xfs_buf *bp) 380 + { 381 + struct xfs_btree_cur *cur = bs->cur; 382 + struct check_owner *co; 383 + 384 + if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && bp == NULL) 385 + return 0; 386 + 387 + /* 388 + * We want to cross-reference each btree block with the bnobt 389 + * and the rmapbt. We cannot cross-reference the bnobt or 390 + * rmapbt while scanning the bnobt or rmapbt, respectively, 391 + * because we cannot alter the cursor and we'd prefer not to 392 + * duplicate cursors. Therefore, save the buffer daddr for 393 + * later scanning. 394 + */ 395 + if (cur->bc_btnum == XFS_BTNUM_BNO || cur->bc_btnum == XFS_BTNUM_RMAP) { 396 + co = kmem_alloc(sizeof(struct check_owner), 397 + KM_MAYFAIL | KM_NOFS); 398 + if (!co) 399 + return -ENOMEM; 400 + co->level = level; 401 + co->daddr = XFS_BUF_ADDR(bp); 402 + list_add_tail(&co->list, &bs->to_check); 403 + return 0; 404 + } 405 + 406 + return xfs_scrub_btree_check_block_owner(bs, level, XFS_BUF_ADDR(bp)); 407 + } 408 + 366 409 /* 367 410 * Grab and scrub a btree block given a btree pointer. Returns block 368 411 * and buffer pointers (if applicable) if they're ok to use. ··· 488 349 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, level); 489 350 return 0; 490 351 } 352 + if (*pbp) 353 + xfs_scrub_buffer_recheck(bs->sc, *pbp); 354 + 355 + /* 356 + * Check the block's owner; this function absorbs error codes 357 + * for us. 358 + */ 359 + error = xfs_scrub_btree_check_owner(bs, level, *pbp); 360 + if (error) 361 + return error; 491 362 492 363 /* 493 364 * Check the block's siblings; this function absorbs error codes ··· 570 421 struct xfs_btree_block *block; 571 422 int level; 572 423 struct xfs_buf *bp; 424 + struct check_owner *co; 425 + struct check_owner *n; 573 426 int i; 574 427 int error = 0; 575 428 ··· 663 512 } 664 513 665 514 out: 515 + /* Process deferred owner checks on btree blocks. */ 516 + list_for_each_entry_safe(co, n, &bs.to_check, list) { 517 + if (!error && bs.cur) 518 + error = xfs_scrub_btree_check_block_owner(&bs, 519 + co->level, co->daddr); 520 + list_del(&co->list); 521 + kmem_free(co); 522 + } 523 + 666 524 return error; 667 525 }

+9

fs/xfs/scrub/btree.h

··· 26 26 bool xfs_scrub_btree_process_error(struct xfs_scrub_context *sc, 27 27 struct xfs_btree_cur *cur, int level, int *error); 28 28 29 + /* Check for btree xref operation errors. */ 30 + bool xfs_scrub_btree_xref_process_error(struct xfs_scrub_context *sc, 31 + struct xfs_btree_cur *cur, int level, 32 + int *error); 33 + 29 34 /* Check for btree corruption. */ 30 35 void xfs_scrub_btree_set_corrupt(struct xfs_scrub_context *sc, 36 + struct xfs_btree_cur *cur, int level); 37 + 38 + /* Check for btree xref discrepancies. */ 39 + void xfs_scrub_btree_xref_set_corrupt(struct xfs_scrub_context *sc, 31 40 struct xfs_btree_cur *cur, int level); 32 41 33 42 struct xfs_scrub_btree;

+245 -38

fs/xfs/scrub/common.c

··· 78 78 */ 79 79 80 80 /* Check for operational errors. */ 81 + static bool 82 + __xfs_scrub_process_error( 83 + struct xfs_scrub_context *sc, 84 + xfs_agnumber_t agno, 85 + xfs_agblock_t bno, 86 + int *error, 87 + __u32 errflag, 88 + void *ret_ip) 89 + { 90 + switch (*error) { 91 + case 0: 92 + return true; 93 + case -EDEADLOCK: 94 + /* Used to restart an op with deadlock avoidance. */ 95 + trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error); 96 + break; 97 + case -EFSBADCRC: 98 + case -EFSCORRUPTED: 99 + /* Note the badness but don't abort. */ 100 + sc->sm->sm_flags |= errflag; 101 + *error = 0; 102 + /* fall through */ 103 + default: 104 + trace_xfs_scrub_op_error(sc, agno, bno, *error, 105 + ret_ip); 106 + break; 107 + } 108 + return false; 109 + } 110 + 81 111 bool 82 112 xfs_scrub_process_error( 83 113 struct xfs_scrub_context *sc, ··· 115 85 xfs_agblock_t bno, 116 86 int *error) 117 87 { 118 - switch (*error) { 119 - case 0: 120 - return true; 121 - case -EDEADLOCK: 122 - /* Used to restart an op with deadlock avoidance. */ 123 - trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error); 124 - break; 125 - case -EFSBADCRC: 126 - case -EFSCORRUPTED: 127 - /* Note the badness but don't abort. */ 128 - sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 129 - *error = 0; 130 - /* fall through */ 131 - default: 132 - trace_xfs_scrub_op_error(sc, agno, bno, *error, 133 - __return_address); 134 - break; 135 - } 136 - return false; 88 + return __xfs_scrub_process_error(sc, agno, bno, error, 89 + XFS_SCRUB_OFLAG_CORRUPT, __return_address); 90 + } 91 + 92 + bool 93 + xfs_scrub_xref_process_error( 94 + struct xfs_scrub_context *sc, 95 + xfs_agnumber_t agno, 96 + xfs_agblock_t bno, 97 + int *error) 98 + { 99 + return __xfs_scrub_process_error(sc, agno, bno, error, 100 + XFS_SCRUB_OFLAG_XFAIL, __return_address); 137 101 } 138 102 139 103 /* Check for operational errors for a file offset. */ 140 - bool 141 - xfs_scrub_fblock_process_error( 104 + static bool 105 + __xfs_scrub_fblock_process_error( 142 106 struct xfs_scrub_context *sc, 143 107 int whichfork, 144 108 xfs_fileoff_t offset, 145 - int *error) 109 + int *error, 110 + __u32 errflag, 111 + void *ret_ip) 146 112 { 147 113 switch (*error) { 148 114 case 0: ··· 150 124 case -EFSBADCRC: 151 125 case -EFSCORRUPTED: 152 126 /* Note the badness but don't abort. */ 153 - sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 127 + sc->sm->sm_flags |= errflag; 154 128 *error = 0; 155 129 /* fall through */ 156 130 default: 157 131 trace_xfs_scrub_file_op_error(sc, whichfork, offset, *error, 158 - __return_address); 132 + ret_ip); 159 133 break; 160 134 } 161 135 return false; 136 + } 137 + 138 + bool 139 + xfs_scrub_fblock_process_error( 140 + struct xfs_scrub_context *sc, 141 + int whichfork, 142 + xfs_fileoff_t offset, 143 + int *error) 144 + { 145 + return __xfs_scrub_fblock_process_error(sc, whichfork, offset, error, 146 + XFS_SCRUB_OFLAG_CORRUPT, __return_address); 147 + } 148 + 149 + bool 150 + xfs_scrub_fblock_xref_process_error( 151 + struct xfs_scrub_context *sc, 152 + int whichfork, 153 + xfs_fileoff_t offset, 154 + int *error) 155 + { 156 + return __xfs_scrub_fblock_process_error(sc, whichfork, offset, error, 157 + XFS_SCRUB_OFLAG_XFAIL, __return_address); 162 158 } 163 159 164 160 /* ··· 231 183 trace_xfs_scrub_block_error(sc, bp->b_bn, __return_address); 232 184 } 233 185 186 + /* Record a corruption while cross-referencing. */ 187 + void 188 + xfs_scrub_block_xref_set_corrupt( 189 + struct xfs_scrub_context *sc, 190 + struct xfs_buf *bp) 191 + { 192 + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT; 193 + trace_xfs_scrub_block_error(sc, bp->b_bn, __return_address); 194 + } 195 + 234 196 /* 235 197 * Record a corrupt inode. The trace data will include the block given 236 198 * by bp if bp is given; otherwise it will use the block location of the ··· 256 198 trace_xfs_scrub_ino_error(sc, ino, bp ? bp->b_bn : 0, __return_address); 257 199 } 258 200 201 + /* Record a corruption while cross-referencing with an inode. */ 202 + void 203 + xfs_scrub_ino_xref_set_corrupt( 204 + struct xfs_scrub_context *sc, 205 + xfs_ino_t ino, 206 + struct xfs_buf *bp) 207 + { 208 + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT; 209 + trace_xfs_scrub_ino_error(sc, ino, bp ? bp->b_bn : 0, __return_address); 210 + } 211 + 259 212 /* Record corruption in a block indexed by a file fork. */ 260 213 void 261 214 xfs_scrub_fblock_set_corrupt( ··· 275 206 xfs_fileoff_t offset) 276 207 { 277 208 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 209 + trace_xfs_scrub_fblock_error(sc, whichfork, offset, __return_address); 210 + } 211 + 212 + /* Record a corruption while cross-referencing a fork block. */ 213 + void 214 + xfs_scrub_fblock_xref_set_corrupt( 215 + struct xfs_scrub_context *sc, 216 + int whichfork, 217 + xfs_fileoff_t offset) 218 + { 219 + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT; 278 220 trace_xfs_scrub_fblock_error(sc, whichfork, offset, __return_address); 279 221 } 280 222 ··· 322 242 { 323 243 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_INCOMPLETE; 324 244 trace_xfs_scrub_incomplete(sc, __return_address); 245 + } 246 + 247 + /* 248 + * rmap scrubbing -- compute the number of blocks with a given owner, 249 + * at least according to the reverse mapping data. 250 + */ 251 + 252 + struct xfs_scrub_rmap_ownedby_info { 253 + struct xfs_owner_info *oinfo; 254 + xfs_filblks_t *blocks; 255 + }; 256 + 257 + STATIC int 258 + xfs_scrub_count_rmap_ownedby_irec( 259 + struct xfs_btree_cur *cur, 260 + struct xfs_rmap_irec *rec, 261 + void *priv) 262 + { 263 + struct xfs_scrub_rmap_ownedby_info *sroi = priv; 264 + bool irec_attr; 265 + bool oinfo_attr; 266 + 267 + irec_attr = rec->rm_flags & XFS_RMAP_ATTR_FORK; 268 + oinfo_attr = sroi->oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK; 269 + 270 + if (rec->rm_owner != sroi->oinfo->oi_owner) 271 + return 0; 272 + 273 + if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) || irec_attr == oinfo_attr) 274 + (*sroi->blocks) += rec->rm_blockcount; 275 + 276 + return 0; 277 + } 278 + 279 + /* 280 + * Calculate the number of blocks the rmap thinks are owned by something. 281 + * The caller should pass us an rmapbt cursor. 282 + */ 283 + int 284 + xfs_scrub_count_rmap_ownedby_ag( 285 + struct xfs_scrub_context *sc, 286 + struct xfs_btree_cur *cur, 287 + struct xfs_owner_info *oinfo, 288 + xfs_filblks_t *blocks) 289 + { 290 + struct xfs_scrub_rmap_ownedby_info sroi; 291 + 292 + sroi.oinfo = oinfo; 293 + *blocks = 0; 294 + sroi.blocks = blocks; 295 + 296 + return xfs_rmap_query_all(cur, xfs_scrub_count_rmap_ownedby_irec, 297 + &sroi); 325 298 } 326 299 327 300 /* ··· 435 302 error = xfs_alloc_read_agfl(mp, sc->tp, agno, agfl); 436 303 if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGFL)) 437 304 goto out; 438 - 305 + error = 0; 439 306 out: 440 307 return error; 441 308 } ··· 605 472 return error; 606 473 } 607 474 608 - error = xfs_scrub_setup_ag_header(sc, ip); 475 + error = xfs_scrub_setup_fs(sc, ip); 609 476 if (error) 610 477 return error; 611 478 ··· 636 503 struct xfs_scrub_context *sc, 637 504 struct xfs_inode *ip_in) 638 505 { 506 + struct xfs_imap imap; 639 507 struct xfs_mount *mp = sc->mp; 640 508 struct xfs_inode *ip = NULL; 641 509 int error; 642 - 643 - /* 644 - * If userspace passed us an AG number or a generation number 645 - * without an inode number, they haven't got a clue so bail out 646 - * immediately. 647 - */ 648 - if (sc->sm->sm_agno || (sc->sm->sm_gen && !sc->sm->sm_ino)) 649 - return -EINVAL; 650 510 651 511 /* We want to scan the inode we already had opened. */ 652 512 if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) { ··· 652 526 return -ENOENT; 653 527 error = xfs_iget(mp, NULL, sc->sm->sm_ino, 654 528 XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE, 0, &ip); 655 - if (error == -ENOENT || error == -EINVAL) { 656 - /* inode doesn't exist... */ 657 - return -ENOENT; 658 - } else if (error) { 529 + switch (error) { 530 + case -ENOENT: 531 + /* Inode doesn't exist, just bail out. */ 532 + return error; 533 + case 0: 534 + /* Got an inode, continue. */ 535 + break; 536 + case -EINVAL: 537 + /* 538 + * -EINVAL with IGET_UNTRUSTED could mean one of several 539 + * things: userspace gave us an inode number that doesn't 540 + * correspond to fs space, or doesn't have an inobt entry; 541 + * or it could simply mean that the inode buffer failed the 542 + * read verifiers. 543 + * 544 + * Try just the inode mapping lookup -- if it succeeds, then 545 + * the inode buffer verifier failed and something needs fixing. 546 + * Otherwise, we really couldn't find it so tell userspace 547 + * that it no longer exists. 548 + */ 549 + error = xfs_imap(sc->mp, sc->tp, sc->sm->sm_ino, &imap, 550 + XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE); 551 + if (error) 552 + return -ENOENT; 553 + error = -EFSCORRUPTED; 554 + /* fall through */ 555 + default: 659 556 trace_xfs_scrub_op_error(sc, 660 557 XFS_INO_TO_AGNO(mp, sc->sm->sm_ino), 661 558 XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino), ··· 720 571 out: 721 572 /* scrub teardown will unlock and release the inode for us */ 722 573 return error; 574 + } 575 + 576 + /* 577 + * Predicate that decides if we need to evaluate the cross-reference check. 578 + * If there was an error accessing the cross-reference btree, just delete 579 + * the cursor and skip the check. 580 + */ 581 + bool 582 + xfs_scrub_should_check_xref( 583 + struct xfs_scrub_context *sc, 584 + int *error, 585 + struct xfs_btree_cur **curpp) 586 + { 587 + if (*error == 0) 588 + return true; 589 + 590 + if (curpp) { 591 + /* If we've already given up on xref, just bail out. */ 592 + if (!*curpp) 593 + return false; 594 + 595 + /* xref error, delete cursor and bail out. */ 596 + xfs_btree_del_cursor(*curpp, XFS_BTREE_ERROR); 597 + *curpp = NULL; 598 + } 599 + 600 + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XFAIL; 601 + trace_xfs_scrub_xref_error(sc, *error, __return_address); 602 + 603 + /* 604 + * Errors encountered during cross-referencing with another 605 + * data structure should not cause this scrubber to abort. 606 + */ 607 + *error = 0; 608 + return false; 609 + } 610 + 611 + /* Run the structure verifiers on in-memory buffers to detect bad memory. */ 612 + void 613 + xfs_scrub_buffer_recheck( 614 + struct xfs_scrub_context *sc, 615 + struct xfs_buf *bp) 616 + { 617 + xfs_failaddr_t fa; 618 + 619 + if (bp->b_ops == NULL) { 620 + xfs_scrub_block_set_corrupt(sc, bp); 621 + return; 622 + } 623 + if (bp->b_ops->verify_struct == NULL) { 624 + xfs_scrub_set_incomplete(sc); 625 + return; 626 + } 627 + fa = bp->b_ops->verify_struct(bp); 628 + if (!fa) 629 + return; 630 + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 631 + trace_xfs_scrub_block_error(sc, bp->b_bn, fa); 723 632 }

+21 -2

fs/xfs/scrub/common.h

··· 56 56 bool xfs_scrub_fblock_process_error(struct xfs_scrub_context *sc, int whichfork, 57 57 xfs_fileoff_t offset, int *error); 58 58 59 + bool xfs_scrub_xref_process_error(struct xfs_scrub_context *sc, 60 + xfs_agnumber_t agno, xfs_agblock_t bno, int *error); 61 + bool xfs_scrub_fblock_xref_process_error(struct xfs_scrub_context *sc, 62 + int whichfork, xfs_fileoff_t offset, int *error); 63 + 59 64 void xfs_scrub_block_set_preen(struct xfs_scrub_context *sc, 60 65 struct xfs_buf *bp); 61 66 void xfs_scrub_ino_set_preen(struct xfs_scrub_context *sc, xfs_ino_t ino, ··· 73 68 void xfs_scrub_fblock_set_corrupt(struct xfs_scrub_context *sc, int whichfork, 74 69 xfs_fileoff_t offset); 75 70 71 + void xfs_scrub_block_xref_set_corrupt(struct xfs_scrub_context *sc, 72 + struct xfs_buf *bp); 73 + void xfs_scrub_ino_xref_set_corrupt(struct xfs_scrub_context *sc, xfs_ino_t ino, 74 + struct xfs_buf *bp); 75 + void xfs_scrub_fblock_xref_set_corrupt(struct xfs_scrub_context *sc, 76 + int whichfork, xfs_fileoff_t offset); 77 + 76 78 void xfs_scrub_ino_set_warning(struct xfs_scrub_context *sc, xfs_ino_t ino, 77 79 struct xfs_buf *bp); 78 80 void xfs_scrub_fblock_set_warning(struct xfs_scrub_context *sc, int whichfork, ··· 88 76 void xfs_scrub_set_incomplete(struct xfs_scrub_context *sc); 89 77 int xfs_scrub_checkpoint_log(struct xfs_mount *mp); 90 78 79 + /* Are we set up for a cross-referencing check? */ 80 + bool xfs_scrub_should_check_xref(struct xfs_scrub_context *sc, int *error, 81 + struct xfs_btree_cur **curpp); 82 + 91 83 /* Setup functions */ 92 84 int xfs_scrub_setup_fs(struct xfs_scrub_context *sc, struct xfs_inode *ip); 93 - int xfs_scrub_setup_ag_header(struct xfs_scrub_context *sc, 94 - struct xfs_inode *ip); 95 85 int xfs_scrub_setup_ag_allocbt(struct xfs_scrub_context *sc, 96 86 struct xfs_inode *ip); 97 87 int xfs_scrub_setup_ag_iallocbt(struct xfs_scrub_context *sc, ··· 148 134 int (*fn)(struct xfs_scrub_context *, xfs_agblock_t bno, 149 135 void *), 150 136 void *priv); 137 + int xfs_scrub_count_rmap_ownedby_ag(struct xfs_scrub_context *sc, 138 + struct xfs_btree_cur *cur, 139 + struct xfs_owner_info *oinfo, 140 + xfs_filblks_t *blocks); 151 141 152 142 int xfs_scrub_setup_ag_btree(struct xfs_scrub_context *sc, 153 143 struct xfs_inode *ip, bool force_log); 154 144 int xfs_scrub_get_inode(struct xfs_scrub_context *sc, struct xfs_inode *ip_in); 155 145 int xfs_scrub_setup_inode_contents(struct xfs_scrub_context *sc, 156 146 struct xfs_inode *ip, unsigned int resblks); 147 + void xfs_scrub_buffer_recheck(struct xfs_scrub_context *sc, struct xfs_buf *bp); 157 148 158 149 #endif /* __XFS_SCRUB_COMMON_H__ */

+22

fs/xfs/scrub/dabtree.c

··· 233 233 return; 234 234 } 235 235 } 236 + static void * 237 + xfs_scrub_da_btree_verify( 238 + struct xfs_buf *bp) 239 + { 240 + struct xfs_da_blkinfo *info = bp->b_addr; 241 + 242 + switch (be16_to_cpu(info->magic)) { 243 + case XFS_DIR2_LEAF1_MAGIC: 244 + case XFS_DIR3_LEAF1_MAGIC: 245 + bp->b_ops = &xfs_dir3_leaf1_buf_ops; 246 + return bp->b_ops->verify_struct(bp); 247 + default: 248 + bp->b_ops = &xfs_da3_node_buf_ops; 249 + return bp->b_ops->verify_struct(bp); 250 + } 251 + } 236 252 237 253 static const struct xfs_buf_ops xfs_scrub_da_btree_buf_ops = { 238 254 .name = "xfs_scrub_da_btree", 239 255 .verify_read = xfs_scrub_da_btree_read_verify, 240 256 .verify_write = xfs_scrub_da_btree_write_verify, 257 + .verify_struct = xfs_scrub_da_btree_verify, 241 258 }; 242 259 243 260 /* Check a block's sibling. */ ··· 293 276 xfs_scrub_da_set_corrupt(ds, level); 294 277 return error; 295 278 } 279 + if (ds->state->altpath.blk[level].bp) 280 + xfs_scrub_buffer_recheck(ds->sc, 281 + ds->state->altpath.blk[level].bp); 296 282 297 283 /* Compare upper level pointer to sibling pointer. */ 298 284 if (ds->state->altpath.blk[level].blkno != sibling) ··· 378 358 &xfs_scrub_da_btree_buf_ops); 379 359 if (!xfs_scrub_da_process_error(ds, level, &error)) 380 360 goto out_nobuf; 361 + if (blk->bp) 362 + xfs_scrub_buffer_recheck(ds->sc, blk->bp); 381 363 382 364 /* 383 365 * We didn't find a dir btree root block, which means that

+35 -9

fs/xfs/scrub/dir.c

··· 92 92 * inodes can trigger immediate inactive cleanup of the inode. 93 93 */ 94 94 error = xfs_iget(mp, sdc->sc->tp, inum, 0, 0, &ip); 95 - if (!xfs_scrub_fblock_process_error(sdc->sc, XFS_DATA_FORK, offset, 95 + if (!xfs_scrub_fblock_xref_process_error(sdc->sc, XFS_DATA_FORK, offset, 96 96 &error)) 97 97 goto out; 98 98 ··· 200 200 struct xfs_inode *dp = ds->dargs.dp; 201 201 struct xfs_dir2_data_entry *dent; 202 202 struct xfs_buf *bp; 203 + char *p, *endp; 203 204 xfs_ino_t ino; 204 205 xfs_dablk_t rec_bno; 205 206 xfs_dir2_db_t db; ··· 238 237 xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); 239 238 goto out; 240 239 } 240 + xfs_scrub_buffer_recheck(ds->sc, bp); 241 + 242 + dent = (struct xfs_dir2_data_entry *)(((char *)bp->b_addr) + off); 243 + 244 + /* Make sure we got a real directory entry. */ 245 + p = (char *)mp->m_dir_inode_ops->data_entry_p(bp->b_addr); 246 + endp = xfs_dir3_data_endp(mp->m_dir_geo, bp->b_addr); 247 + if (!endp) { 248 + xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); 249 + goto out_relse; 250 + } 251 + while (p < endp) { 252 + struct xfs_dir2_data_entry *dep; 253 + struct xfs_dir2_data_unused *dup; 254 + 255 + dup = (struct xfs_dir2_data_unused *)p; 256 + if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { 257 + p += be16_to_cpu(dup->length); 258 + continue; 259 + } 260 + dep = (struct xfs_dir2_data_entry *)p; 261 + if (dep == dent) 262 + break; 263 + p += mp->m_dir_inode_ops->data_entsize(dep->namelen); 264 + } 265 + if (p >= endp) { 266 + xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); 267 + goto out_relse; 268 + } 241 269 242 270 /* Retrieve the entry, sanity check it, and compare hashes. */ 243 - dent = (struct xfs_dir2_data_entry *)(((char *)bp->b_addr) + off); 244 271 ino = be64_to_cpu(dent->inumber); 245 272 hash = be32_to_cpu(ent->hashval); 246 273 tag = be16_to_cpup(dp->d_ops->data_entry_tag_p(dent)); ··· 353 324 } 354 325 if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) 355 326 goto out; 327 + xfs_scrub_buffer_recheck(sc, bp); 356 328 357 329 /* XXX: Check xfs_dir3_data_hdr.pad is zero once we start setting it. */ 358 330 ··· 391 361 392 362 /* Make sure the bestfrees are actually the best free spaces. */ 393 363 ptr = (char *)d_ops->data_entry_p(bp->b_addr); 394 - if (is_block) { 395 - struct xfs_dir2_block_tail *btp; 396 - 397 - btp = xfs_dir2_block_tail_p(mp->m_dir_geo, bp->b_addr); 398 - endptr = (char *)xfs_dir2_block_leaf_p(btp); 399 - } else 400 - endptr = (char *)bp->b_addr + BBTOB(bp->b_length); 364 + endptr = xfs_dir3_data_endp(mp->m_dir_geo, bp->b_addr); 401 365 402 366 /* Iterate the entries, stopping when we hit or go past the end. */ 403 367 while (ptr < endptr) { ··· 498 474 error = xfs_dir3_leaf_read(sc->tp, sc->ip, lblk, -1, &bp); 499 475 if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) 500 476 goto out; 477 + xfs_scrub_buffer_recheck(sc, bp); 501 478 502 479 leaf = bp->b_addr; 503 480 d_ops->leaf_hdr_from_disk(&leafhdr, leaf); ··· 584 559 error = xfs_dir2_free_read(sc->tp, sc->ip, lblk, &bp); 585 560 if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) 586 561 goto out; 562 + xfs_scrub_buffer_recheck(sc, bp); 587 563 588 564 if (xfs_sb_version_hascrc(&sc->mp->m_sb)) { 589 565 struct xfs_dir3_free_hdr *hdr3 = bp->b_addr;

+192 -2

fs/xfs/scrub/ialloc.c

··· 58 58 59 59 /* Inode btree scrubber. */ 60 60 61 + /* 62 + * If we're checking the finobt, cross-reference with the inobt. 63 + * Otherwise we're checking the inobt; if there is an finobt, make sure 64 + * we have a record or not depending on freecount. 65 + */ 66 + static inline void 67 + xfs_scrub_iallocbt_chunk_xref_other( 68 + struct xfs_scrub_context *sc, 69 + struct xfs_inobt_rec_incore *irec, 70 + xfs_agino_t agino) 71 + { 72 + struct xfs_btree_cur **pcur; 73 + bool has_irec; 74 + int error; 75 + 76 + if (sc->sm->sm_type == XFS_SCRUB_TYPE_FINOBT) 77 + pcur = &sc->sa.ino_cur; 78 + else 79 + pcur = &sc->sa.fino_cur; 80 + if (!(*pcur)) 81 + return; 82 + error = xfs_ialloc_has_inode_record(*pcur, agino, agino, &has_irec); 83 + if (!xfs_scrub_should_check_xref(sc, &error, pcur)) 84 + return; 85 + if (((irec->ir_freecount > 0 && !has_irec) || 86 + (irec->ir_freecount == 0 && has_irec))) 87 + xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0); 88 + } 89 + 90 + /* Cross-reference with the other btrees. */ 91 + STATIC void 92 + xfs_scrub_iallocbt_chunk_xref( 93 + struct xfs_scrub_context *sc, 94 + struct xfs_inobt_rec_incore *irec, 95 + xfs_agino_t agino, 96 + xfs_agblock_t agbno, 97 + xfs_extlen_t len) 98 + { 99 + struct xfs_owner_info oinfo; 100 + 101 + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 102 + return; 103 + 104 + xfs_scrub_xref_is_used_space(sc, agbno, len); 105 + xfs_scrub_iallocbt_chunk_xref_other(sc, irec, agino); 106 + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES); 107 + xfs_scrub_xref_is_owned_by(sc, agbno, len, &oinfo); 108 + xfs_scrub_xref_is_not_shared(sc, agbno, len); 109 + } 110 + 61 111 /* Is this chunk worth checking? */ 62 112 STATIC bool 63 113 xfs_scrub_iallocbt_chunk( ··· 125 75 !xfs_verify_agbno(mp, agno, bno) || 126 76 !xfs_verify_agbno(mp, agno, bno + len - 1)) 127 77 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 78 + 79 + xfs_scrub_iallocbt_chunk_xref(bs->sc, irec, agino, bno, len); 128 80 129 81 return true; 130 82 } ··· 242 190 } 243 191 244 192 /* If any part of this is a hole, skip it. */ 245 - if (ir_holemask) 193 + if (ir_holemask) { 194 + xfs_scrub_xref_is_not_owned_by(bs->sc, agbno, 195 + blks_per_cluster, &oinfo); 246 196 continue; 197 + } 198 + 199 + xfs_scrub_xref_is_owned_by(bs->sc, agbno, blks_per_cluster, 200 + &oinfo); 247 201 248 202 /* Grab the inode cluster buffer. */ 249 203 imap.im_blkno = XFS_AGB_TO_DADDR(mp, bs->cur->bc_private.a.agno, ··· 285 227 union xfs_btree_rec *rec) 286 228 { 287 229 struct xfs_mount *mp = bs->cur->bc_mp; 230 + xfs_filblks_t *inode_blocks = bs->private; 288 231 struct xfs_inobt_rec_incore irec; 289 232 uint64_t holes; 290 233 xfs_agnumber_t agno = bs->cur->bc_private.a.agno; ··· 322 263 if ((agbno & (xfs_ialloc_cluster_alignment(mp) - 1)) || 323 264 (agbno & (xfs_icluster_size_fsb(mp) - 1))) 324 265 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 266 + 267 + *inode_blocks += XFS_B_TO_FSB(mp, 268 + irec.ir_count * mp->m_sb.sb_inodesize); 325 269 326 270 /* Handle non-sparse inodes */ 327 271 if (!xfs_inobt_issparse(irec.ir_holemask)) { ··· 370 308 return error; 371 309 } 372 310 311 + /* 312 + * Make sure the inode btrees are as large as the rmap thinks they are. 313 + * Don't bother if we're missing btree cursors, as we're already corrupt. 314 + */ 315 + STATIC void 316 + xfs_scrub_iallocbt_xref_rmap_btreeblks( 317 + struct xfs_scrub_context *sc, 318 + int which) 319 + { 320 + struct xfs_owner_info oinfo; 321 + xfs_filblks_t blocks; 322 + xfs_extlen_t inobt_blocks = 0; 323 + xfs_extlen_t finobt_blocks = 0; 324 + int error; 325 + 326 + if (!sc->sa.ino_cur || !sc->sa.rmap_cur || 327 + (xfs_sb_version_hasfinobt(&sc->mp->m_sb) && !sc->sa.fino_cur)) 328 + return; 329 + 330 + /* Check that we saw as many inobt blocks as the rmap says. */ 331 + error = xfs_btree_count_blocks(sc->sa.ino_cur, &inobt_blocks); 332 + if (!xfs_scrub_process_error(sc, 0, 0, &error)) 333 + return; 334 + 335 + if (sc->sa.fino_cur) { 336 + error = xfs_btree_count_blocks(sc->sa.fino_cur, &finobt_blocks); 337 + if (!xfs_scrub_process_error(sc, 0, 0, &error)) 338 + return; 339 + } 340 + 341 + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT); 342 + error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, &oinfo, 343 + &blocks); 344 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) 345 + return; 346 + if (blocks != inobt_blocks + finobt_blocks) 347 + xfs_scrub_btree_set_corrupt(sc, sc->sa.ino_cur, 0); 348 + } 349 + 350 + /* 351 + * Make sure that the inobt records point to the same number of blocks as 352 + * the rmap says are owned by inodes. 353 + */ 354 + STATIC void 355 + xfs_scrub_iallocbt_xref_rmap_inodes( 356 + struct xfs_scrub_context *sc, 357 + int which, 358 + xfs_filblks_t inode_blocks) 359 + { 360 + struct xfs_owner_info oinfo; 361 + xfs_filblks_t blocks; 362 + int error; 363 + 364 + if (!sc->sa.rmap_cur) 365 + return; 366 + 367 + /* Check that we saw as many inode blocks as the rmap knows about. */ 368 + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES); 369 + error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, &oinfo, 370 + &blocks); 371 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) 372 + return; 373 + if (blocks != inode_blocks) 374 + xfs_scrub_btree_set_corrupt(sc, sc->sa.ino_cur, 0); 375 + } 376 + 373 377 /* Scrub the inode btrees for some AG. */ 374 378 STATIC int 375 379 xfs_scrub_iallocbt( ··· 444 316 { 445 317 struct xfs_btree_cur *cur; 446 318 struct xfs_owner_info oinfo; 319 + xfs_filblks_t inode_blocks = 0; 320 + int error; 447 321 448 322 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT); 449 323 cur = which == XFS_BTNUM_INO ? sc->sa.ino_cur : sc->sa.fino_cur; 450 - return xfs_scrub_btree(sc, cur, xfs_scrub_iallocbt_rec, &oinfo, NULL); 324 + error = xfs_scrub_btree(sc, cur, xfs_scrub_iallocbt_rec, &oinfo, 325 + &inode_blocks); 326 + if (error) 327 + return error; 328 + 329 + xfs_scrub_iallocbt_xref_rmap_btreeblks(sc, which); 330 + 331 + /* 332 + * If we're scrubbing the inode btree, inode_blocks is the number of 333 + * blocks pointed to by all the inode chunk records. Therefore, we 334 + * should compare to the number of inode chunk blocks that the rmap 335 + * knows about. We can't do this for the finobt since it only points 336 + * to inode chunks with free inodes. 337 + */ 338 + if (which == XFS_BTNUM_INO) 339 + xfs_scrub_iallocbt_xref_rmap_inodes(sc, which, inode_blocks); 340 + 341 + return error; 451 342 } 452 343 453 344 int ··· 481 334 struct xfs_scrub_context *sc) 482 335 { 483 336 return xfs_scrub_iallocbt(sc, XFS_BTNUM_FINO); 337 + } 338 + 339 + /* See if an inode btree has (or doesn't have) an inode chunk record. */ 340 + static inline void 341 + xfs_scrub_xref_inode_check( 342 + struct xfs_scrub_context *sc, 343 + xfs_agblock_t agbno, 344 + xfs_extlen_t len, 345 + struct xfs_btree_cur **icur, 346 + bool should_have_inodes) 347 + { 348 + bool has_inodes; 349 + int error; 350 + 351 + if (!(*icur)) 352 + return; 353 + 354 + error = xfs_ialloc_has_inodes_at_extent(*icur, agbno, len, &has_inodes); 355 + if (!xfs_scrub_should_check_xref(sc, &error, icur)) 356 + return; 357 + if (has_inodes != should_have_inodes) 358 + xfs_scrub_btree_xref_set_corrupt(sc, *icur, 0); 359 + } 360 + 361 + /* xref check that the extent is not covered by inodes */ 362 + void 363 + xfs_scrub_xref_is_not_inode_chunk( 364 + struct xfs_scrub_context *sc, 365 + xfs_agblock_t agbno, 366 + xfs_extlen_t len) 367 + { 368 + xfs_scrub_xref_inode_check(sc, agbno, len, &sc->sa.ino_cur, false); 369 + xfs_scrub_xref_inode_check(sc, agbno, len, &sc->sa.fino_cur, false); 370 + } 371 + 372 + /* xref check that the extent is covered by inodes */ 373 + void 374 + xfs_scrub_xref_is_inode_chunk( 375 + struct xfs_scrub_context *sc, 376 + xfs_agblock_t agbno, 377 + xfs_extlen_t len) 378 + { 379 + xfs_scrub_xref_inode_check(sc, agbno, len, &sc->sa.ino_cur, true); 484 380 }

+162 -16

fs/xfs/scrub/inode.c

··· 36 36 #include "xfs_ialloc.h" 37 37 #include "xfs_da_format.h" 38 38 #include "xfs_reflink.h" 39 + #include "xfs_rmap.h" 40 + #include "xfs_bmap.h" 41 + #include "xfs_bmap_util.h" 39 42 #include "scrub/xfs_scrub.h" 40 43 #include "scrub/scrub.h" 41 44 #include "scrub/common.h" 45 + #include "scrub/btree.h" 42 46 #include "scrub/trace.h" 43 47 44 48 /* ··· 68 64 break; 69 65 case -EFSCORRUPTED: 70 66 case -EFSBADCRC: 71 - return 0; 67 + return xfs_scrub_trans_alloc(sc->sm, mp, &sc->tp); 72 68 default: 73 69 return error; 74 70 } ··· 396 392 break; 397 393 } 398 394 395 + /* di_[amc]time.nsec */ 396 + if (be32_to_cpu(dip->di_atime.t_nsec) >= NSEC_PER_SEC) 397 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 398 + if (be32_to_cpu(dip->di_mtime.t_nsec) >= NSEC_PER_SEC) 399 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 400 + if (be32_to_cpu(dip->di_ctime.t_nsec) >= NSEC_PER_SEC) 401 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 402 + 399 403 /* 400 404 * di_size. xfs_dinode_verify checks for things that screw up 401 405 * the VFS such as the upper bit being set and zero-length ··· 507 495 } 508 496 509 497 if (dip->di_version >= 3) { 498 + if (be32_to_cpu(dip->di_crtime.t_nsec) >= NSEC_PER_SEC) 499 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 510 500 xfs_scrub_inode_flags2(sc, bp, dip, ino, mode, flags, flags2); 511 501 xfs_scrub_inode_cowextsize(sc, bp, dip, ino, mode, flags, 512 502 flags2); ··· 560 546 */ 561 547 bp->b_ops = &xfs_inode_buf_ops; 562 548 dip = xfs_buf_offset(bp, imap.im_boffset); 563 - if (!xfs_dinode_verify(mp, ino, dip) || 549 + if (xfs_dinode_verify(mp, ino, dip) != NULL || 564 550 !xfs_dinode_good_version(mp, dip->di_version)) { 565 551 xfs_scrub_ino_set_corrupt(sc, ino, bp); 566 552 goto out_buf; ··· 581 567 return error; 582 568 } 583 569 570 + /* 571 + * Make sure the finobt doesn't think this inode is free. 572 + * We don't have to check the inobt ourselves because we got the inode via 573 + * IGET_UNTRUSTED, which checks the inobt for us. 574 + */ 575 + static void 576 + xfs_scrub_inode_xref_finobt( 577 + struct xfs_scrub_context *sc, 578 + xfs_ino_t ino) 579 + { 580 + struct xfs_inobt_rec_incore rec; 581 + xfs_agino_t agino; 582 + int has_record; 583 + int error; 584 + 585 + if (!sc->sa.fino_cur) 586 + return; 587 + 588 + agino = XFS_INO_TO_AGINO(sc->mp, ino); 589 + 590 + /* 591 + * Try to get the finobt record. If we can't get it, then we're 592 + * in good shape. 593 + */ 594 + error = xfs_inobt_lookup(sc->sa.fino_cur, agino, XFS_LOOKUP_LE, 595 + &has_record); 596 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.fino_cur) || 597 + !has_record) 598 + return; 599 + 600 + error = xfs_inobt_get_rec(sc->sa.fino_cur, &rec, &has_record); 601 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.fino_cur) || 602 + !has_record) 603 + return; 604 + 605 + /* 606 + * Otherwise, make sure this record either doesn't cover this inode, 607 + * or that it does but it's marked present. 608 + */ 609 + if (rec.ir_startino > agino || 610 + rec.ir_startino + XFS_INODES_PER_CHUNK <= agino) 611 + return; 612 + 613 + if (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)) 614 + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.fino_cur, 0); 615 + } 616 + 617 + /* Cross reference the inode fields with the forks. */ 618 + STATIC void 619 + xfs_scrub_inode_xref_bmap( 620 + struct xfs_scrub_context *sc, 621 + struct xfs_dinode *dip) 622 + { 623 + xfs_extnum_t nextents; 624 + xfs_filblks_t count; 625 + xfs_filblks_t acount; 626 + int error; 627 + 628 + /* Walk all the extents to check nextents/naextents/nblocks. */ 629 + error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK, 630 + &nextents, &count); 631 + if (!xfs_scrub_should_check_xref(sc, &error, NULL)) 632 + return; 633 + if (nextents < be32_to_cpu(dip->di_nextents)) 634 + xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino, NULL); 635 + 636 + error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK, 637 + &nextents, &acount); 638 + if (!xfs_scrub_should_check_xref(sc, &error, NULL)) 639 + return; 640 + if (nextents != be16_to_cpu(dip->di_anextents)) 641 + xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino, NULL); 642 + 643 + /* Check nblocks against the inode. */ 644 + if (count + acount != be64_to_cpu(dip->di_nblocks)) 645 + xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino, NULL); 646 + } 647 + 648 + /* Cross-reference with the other btrees. */ 649 + STATIC void 650 + xfs_scrub_inode_xref( 651 + struct xfs_scrub_context *sc, 652 + xfs_ino_t ino, 653 + struct xfs_dinode *dip) 654 + { 655 + struct xfs_owner_info oinfo; 656 + xfs_agnumber_t agno; 657 + xfs_agblock_t agbno; 658 + int error; 659 + 660 + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 661 + return; 662 + 663 + agno = XFS_INO_TO_AGNO(sc->mp, ino); 664 + agbno = XFS_INO_TO_AGBNO(sc->mp, ino); 665 + 666 + error = xfs_scrub_ag_init(sc, agno, &sc->sa); 667 + if (!xfs_scrub_xref_process_error(sc, agno, agbno, &error)) 668 + return; 669 + 670 + xfs_scrub_xref_is_used_space(sc, agbno, 1); 671 + xfs_scrub_inode_xref_finobt(sc, ino); 672 + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES); 673 + xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); 674 + xfs_scrub_xref_is_not_shared(sc, agbno, 1); 675 + xfs_scrub_inode_xref_bmap(sc, dip); 676 + 677 + xfs_scrub_ag_free(sc, &sc->sa); 678 + } 679 + 680 + /* 681 + * If the reflink iflag disagrees with a scan for shared data fork extents, 682 + * either flag an error (shared extents w/ no flag) or a preen (flag set w/o 683 + * any shared extents). We already checked for reflink iflag set on a non 684 + * reflink filesystem. 685 + */ 686 + static void 687 + xfs_scrub_inode_check_reflink_iflag( 688 + struct xfs_scrub_context *sc, 689 + xfs_ino_t ino, 690 + struct xfs_buf *bp) 691 + { 692 + struct xfs_mount *mp = sc->mp; 693 + bool has_shared; 694 + int error; 695 + 696 + if (!xfs_sb_version_hasreflink(&mp->m_sb)) 697 + return; 698 + 699 + error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip, 700 + &has_shared); 701 + if (!xfs_scrub_xref_process_error(sc, XFS_INO_TO_AGNO(mp, ino), 702 + XFS_INO_TO_AGBNO(mp, ino), &error)) 703 + return; 704 + if (xfs_is_reflink_inode(sc->ip) && !has_shared) 705 + xfs_scrub_ino_set_preen(sc, ino, bp); 706 + else if (!xfs_is_reflink_inode(sc->ip) && has_shared) 707 + xfs_scrub_ino_set_corrupt(sc, ino, bp); 708 + } 709 + 584 710 /* Scrub an inode. */ 585 711 int 586 712 xfs_scrub_inode( 587 713 struct xfs_scrub_context *sc) 588 714 { 589 715 struct xfs_dinode di; 590 - struct xfs_mount *mp = sc->mp; 591 716 struct xfs_buf *bp = NULL; 592 717 struct xfs_dinode *dip; 593 718 xfs_ino_t ino; 594 - 595 - bool has_shared; 596 719 int error = 0; 597 720 598 721 /* Did we get the in-core inode, or are we doing this manually? */ ··· 754 603 goto out; 755 604 756 605 /* 757 - * Does this inode have the reflink flag set but no shared extents? 758 - * Set the preening flag if this is the case. 606 + * Look for discrepancies between file's data blocks and the reflink 607 + * iflag. We already checked the iflag against the file mode when 608 + * we scrubbed the dinode. 759 609 */ 760 - if (xfs_is_reflink_inode(sc->ip)) { 761 - error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip, 762 - &has_shared); 763 - if (!xfs_scrub_process_error(sc, XFS_INO_TO_AGNO(mp, ino), 764 - XFS_INO_TO_AGBNO(mp, ino), &error)) 765 - goto out; 766 - if (!has_shared) 767 - xfs_scrub_ino_set_preen(sc, ino, bp); 768 - } 610 + if (S_ISREG(VFS_I(sc->ip)->i_mode)) 611 + xfs_scrub_inode_check_reflink_iflag(sc, ino, bp); 769 612 613 + xfs_scrub_inode_xref(sc, ino, dip); 770 614 out: 771 615 if (bp) 772 616 xfs_trans_brelse(sc->tp, bp);

+4 -4

fs/xfs/scrub/parent.c

··· 169 169 * immediate inactive cleanup of the inode. 170 170 */ 171 171 error = xfs_iget(mp, sc->tp, dnum, 0, 0, &dp); 172 - if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) 172 + if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) 173 173 goto out; 174 - if (dp == sc->ip) { 174 + if (dp == sc->ip || !S_ISDIR(VFS_I(dp)->i_mode)) { 175 175 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); 176 176 goto out_rele; 177 177 } ··· 185 185 */ 186 186 if (xfs_ilock_nowait(dp, XFS_IOLOCK_SHARED)) { 187 187 error = xfs_scrub_parent_count_parent_dentries(sc, dp, &nlink); 188 - if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, 188 + if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, 189 189 &error)) 190 190 goto out_unlock; 191 191 if (nlink != expected_nlink) ··· 205 205 206 206 /* Go looking for our dentry. */ 207 207 error = xfs_scrub_parent_count_parent_dentries(sc, dp, &nlink); 208 - if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) 208 + if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) 209 209 goto out_unlock; 210 210 211 211 /* Drop the parent lock, relock this inode. */

-7

fs/xfs/scrub/quota.c

··· 67 67 { 68 68 uint dqtype; 69 69 70 - /* 71 - * If userspace gave us an AG number or inode data, they don't 72 - * know what they're doing. Get out. 73 - */ 74 - if (sc->sm->sm_agno || sc->sm->sm_ino || sc->sm->sm_gen) 75 - return -EINVAL; 76 - 77 70 dqtype = xfs_scrub_quota_to_dqtype(sc); 78 71 if (dqtype == 0) 79 72 return -EINVAL;

+418 -2

fs/xfs/scrub/refcount.c

··· 31 31 #include "xfs_sb.h" 32 32 #include "xfs_alloc.h" 33 33 #include "xfs_rmap.h" 34 + #include "xfs_refcount.h" 34 35 #include "scrub/xfs_scrub.h" 35 36 #include "scrub/scrub.h" 36 37 #include "scrub/common.h" ··· 51 50 52 51 /* Reference count btree scrubber. */ 53 52 53 + /* 54 + * Confirming Reference Counts via Reverse Mappings 55 + * 56 + * We want to count the reverse mappings overlapping a refcount record 57 + * (bno, len, refcount), allowing for the possibility that some of the 58 + * overlap may come from smaller adjoining reverse mappings, while some 59 + * comes from single extents which overlap the range entirely. The 60 + * outer loop is as follows: 61 + * 62 + * 1. For all reverse mappings overlapping the refcount extent, 63 + * a. If a given rmap completely overlaps, mark it as seen. 64 + * b. Otherwise, record the fragment (in agbno order) for later 65 + * processing. 66 + * 67 + * Once we've seen all the rmaps, we know that for all blocks in the 68 + * refcount record we want to find $refcount owners and we've already 69 + * visited $seen extents that overlap all the blocks. Therefore, we 70 + * need to find ($refcount - $seen) owners for every block in the 71 + * extent; call that quantity $target_nr. Proceed as follows: 72 + * 73 + * 2. Pull the first $target_nr fragments from the list; all of them 74 + * should start at or before the start of the extent. 75 + * Call this subset of fragments the working set. 76 + * 3. Until there are no more unprocessed fragments, 77 + * a. Find the shortest fragments in the set and remove them. 78 + * b. Note the block number of the end of these fragments. 79 + * c. Pull the same number of fragments from the list. All of these 80 + * fragments should start at the block number recorded in the 81 + * previous step. 82 + * d. Put those fragments in the set. 83 + * 4. Check that there are $target_nr fragments remaining in the list, 84 + * and that they all end at or beyond the end of the refcount extent. 85 + * 86 + * If the refcount is correct, all the check conditions in the algorithm 87 + * should always hold true. If not, the refcount is incorrect. 88 + */ 89 + struct xfs_scrub_refcnt_frag { 90 + struct list_head list; 91 + struct xfs_rmap_irec rm; 92 + }; 93 + 94 + struct xfs_scrub_refcnt_check { 95 + struct xfs_scrub_context *sc; 96 + struct list_head fragments; 97 + 98 + /* refcount extent we're examining */ 99 + xfs_agblock_t bno; 100 + xfs_extlen_t len; 101 + xfs_nlink_t refcount; 102 + 103 + /* number of owners seen */ 104 + xfs_nlink_t seen; 105 + }; 106 + 107 + /* 108 + * Decide if the given rmap is large enough that we can redeem it 109 + * towards refcount verification now, or if it's a fragment, in 110 + * which case we'll hang onto it in the hopes that we'll later 111 + * discover that we've collected exactly the correct number of 112 + * fragments as the refcountbt says we should have. 113 + */ 114 + STATIC int 115 + xfs_scrub_refcountbt_rmap_check( 116 + struct xfs_btree_cur *cur, 117 + struct xfs_rmap_irec *rec, 118 + void *priv) 119 + { 120 + struct xfs_scrub_refcnt_check *refchk = priv; 121 + struct xfs_scrub_refcnt_frag *frag; 122 + xfs_agblock_t rm_last; 123 + xfs_agblock_t rc_last; 124 + int error = 0; 125 + 126 + if (xfs_scrub_should_terminate(refchk->sc, &error)) 127 + return error; 128 + 129 + rm_last = rec->rm_startblock + rec->rm_blockcount - 1; 130 + rc_last = refchk->bno + refchk->len - 1; 131 + 132 + /* Confirm that a single-owner refc extent is a CoW stage. */ 133 + if (refchk->refcount == 1 && rec->rm_owner != XFS_RMAP_OWN_COW) { 134 + xfs_scrub_btree_xref_set_corrupt(refchk->sc, cur, 0); 135 + return 0; 136 + } 137 + 138 + if (rec->rm_startblock <= refchk->bno && rm_last >= rc_last) { 139 + /* 140 + * The rmap overlaps the refcount record, so we can confirm 141 + * one refcount owner seen. 142 + */ 143 + refchk->seen++; 144 + } else { 145 + /* 146 + * This rmap covers only part of the refcount record, so 147 + * save the fragment for later processing. If the rmapbt 148 + * is healthy each rmap_irec we see will be in agbno order 149 + * so we don't need insertion sort here. 150 + */ 151 + frag = kmem_alloc(sizeof(struct xfs_scrub_refcnt_frag), 152 + KM_MAYFAIL | KM_NOFS); 153 + if (!frag) 154 + return -ENOMEM; 155 + memcpy(&frag->rm, rec, sizeof(frag->rm)); 156 + list_add_tail(&frag->list, &refchk->fragments); 157 + } 158 + 159 + return 0; 160 + } 161 + 162 + /* 163 + * Given a bunch of rmap fragments, iterate through them, keeping 164 + * a running tally of the refcount. If this ever deviates from 165 + * what we expect (which is the refcountbt's refcount minus the 166 + * number of extents that totally covered the refcountbt extent), 167 + * we have a refcountbt error. 168 + */ 169 + STATIC void 170 + xfs_scrub_refcountbt_process_rmap_fragments( 171 + struct xfs_scrub_refcnt_check *refchk) 172 + { 173 + struct list_head worklist; 174 + struct xfs_scrub_refcnt_frag *frag; 175 + struct xfs_scrub_refcnt_frag *n; 176 + xfs_agblock_t bno; 177 + xfs_agblock_t rbno; 178 + xfs_agblock_t next_rbno; 179 + xfs_nlink_t nr; 180 + xfs_nlink_t target_nr; 181 + 182 + target_nr = refchk->refcount - refchk->seen; 183 + if (target_nr == 0) 184 + return; 185 + 186 + /* 187 + * There are (refchk->rc.rc_refcount - refchk->nr refcount) 188 + * references we haven't found yet. Pull that many off the 189 + * fragment list and figure out where the smallest rmap ends 190 + * (and therefore the next rmap should start). All the rmaps 191 + * we pull off should start at or before the beginning of the 192 + * refcount record's range. 193 + */ 194 + INIT_LIST_HEAD(&worklist); 195 + rbno = NULLAGBLOCK; 196 + nr = 1; 197 + 198 + /* Make sure the fragments actually /are/ in agbno order. */ 199 + bno = 0; 200 + list_for_each_entry(frag, &refchk->fragments, list) { 201 + if (frag->rm.rm_startblock < bno) 202 + goto done; 203 + bno = frag->rm.rm_startblock; 204 + } 205 + 206 + /* 207 + * Find all the rmaps that start at or before the refc extent, 208 + * and put them on the worklist. 209 + */ 210 + list_for_each_entry_safe(frag, n, &refchk->fragments, list) { 211 + if (frag->rm.rm_startblock > refchk->bno) 212 + goto done; 213 + bno = frag->rm.rm_startblock + frag->rm.rm_blockcount; 214 + if (bno < rbno) 215 + rbno = bno; 216 + list_move_tail(&frag->list, &worklist); 217 + if (nr == target_nr) 218 + break; 219 + nr++; 220 + } 221 + 222 + /* 223 + * We should have found exactly $target_nr rmap fragments starting 224 + * at or before the refcount extent. 225 + */ 226 + if (nr != target_nr) 227 + goto done; 228 + 229 + while (!list_empty(&refchk->fragments)) { 230 + /* Discard any fragments ending at rbno from the worklist. */ 231 + nr = 0; 232 + next_rbno = NULLAGBLOCK; 233 + list_for_each_entry_safe(frag, n, &worklist, list) { 234 + bno = frag->rm.rm_startblock + frag->rm.rm_blockcount; 235 + if (bno != rbno) { 236 + if (bno < next_rbno) 237 + next_rbno = bno; 238 + continue; 239 + } 240 + list_del(&frag->list); 241 + kmem_free(frag); 242 + nr++; 243 + } 244 + 245 + /* Try to add nr rmaps starting at rbno to the worklist. */ 246 + list_for_each_entry_safe(frag, n, &refchk->fragments, list) { 247 + bno = frag->rm.rm_startblock + frag->rm.rm_blockcount; 248 + if (frag->rm.rm_startblock != rbno) 249 + goto done; 250 + list_move_tail(&frag->list, &worklist); 251 + if (next_rbno > bno) 252 + next_rbno = bno; 253 + nr--; 254 + if (nr == 0) 255 + break; 256 + } 257 + 258 + /* 259 + * If we get here and nr > 0, this means that we added fewer 260 + * items to the worklist than we discarded because the fragment 261 + * list ran out of items. Therefore, we cannot maintain the 262 + * required refcount. Something is wrong, so we're done. 263 + */ 264 + if (nr) 265 + goto done; 266 + 267 + rbno = next_rbno; 268 + } 269 + 270 + /* 271 + * Make sure the last extent we processed ends at or beyond 272 + * the end of the refcount extent. 273 + */ 274 + if (rbno < refchk->bno + refchk->len) 275 + goto done; 276 + 277 + /* Actually record us having seen the remaining refcount. */ 278 + refchk->seen = refchk->refcount; 279 + done: 280 + /* Delete fragments and work list. */ 281 + list_for_each_entry_safe(frag, n, &worklist, list) { 282 + list_del(&frag->list); 283 + kmem_free(frag); 284 + } 285 + list_for_each_entry_safe(frag, n, &refchk->fragments, list) { 286 + list_del(&frag->list); 287 + kmem_free(frag); 288 + } 289 + } 290 + 291 + /* Use the rmap entries covering this extent to verify the refcount. */ 292 + STATIC void 293 + xfs_scrub_refcountbt_xref_rmap( 294 + struct xfs_scrub_context *sc, 295 + xfs_agblock_t bno, 296 + xfs_extlen_t len, 297 + xfs_nlink_t refcount) 298 + { 299 + struct xfs_scrub_refcnt_check refchk = { 300 + .sc = sc, 301 + .bno = bno, 302 + .len = len, 303 + .refcount = refcount, 304 + .seen = 0, 305 + }; 306 + struct xfs_rmap_irec low; 307 + struct xfs_rmap_irec high; 308 + struct xfs_scrub_refcnt_frag *frag; 309 + struct xfs_scrub_refcnt_frag *n; 310 + int error; 311 + 312 + if (!sc->sa.rmap_cur) 313 + return; 314 + 315 + /* Cross-reference with the rmapbt to confirm the refcount. */ 316 + memset(&low, 0, sizeof(low)); 317 + low.rm_startblock = bno; 318 + memset(&high, 0xFF, sizeof(high)); 319 + high.rm_startblock = bno + len - 1; 320 + 321 + INIT_LIST_HEAD(&refchk.fragments); 322 + error = xfs_rmap_query_range(sc->sa.rmap_cur, &low, &high, 323 + &xfs_scrub_refcountbt_rmap_check, &refchk); 324 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) 325 + goto out_free; 326 + 327 + xfs_scrub_refcountbt_process_rmap_fragments(&refchk); 328 + if (refcount != refchk.seen) 329 + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); 330 + 331 + out_free: 332 + list_for_each_entry_safe(frag, n, &refchk.fragments, list) { 333 + list_del(&frag->list); 334 + kmem_free(frag); 335 + } 336 + } 337 + 338 + /* Cross-reference with the other btrees. */ 339 + STATIC void 340 + xfs_scrub_refcountbt_xref( 341 + struct xfs_scrub_context *sc, 342 + xfs_agblock_t agbno, 343 + xfs_extlen_t len, 344 + xfs_nlink_t refcount) 345 + { 346 + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 347 + return; 348 + 349 + xfs_scrub_xref_is_used_space(sc, agbno, len); 350 + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len); 351 + xfs_scrub_refcountbt_xref_rmap(sc, agbno, len, refcount); 352 + } 353 + 54 354 /* Scrub a refcountbt record. */ 55 355 STATIC int 56 356 xfs_scrub_refcountbt_rec( ··· 359 57 union xfs_btree_rec *rec) 360 58 { 361 59 struct xfs_mount *mp = bs->cur->bc_mp; 60 + xfs_agblock_t *cow_blocks = bs->private; 362 61 xfs_agnumber_t agno = bs->cur->bc_private.a.agno; 363 62 xfs_agblock_t bno; 364 63 xfs_extlen_t len; ··· 375 72 has_cowflag = (bno & XFS_REFC_COW_START); 376 73 if ((refcount == 1 && !has_cowflag) || (refcount != 1 && has_cowflag)) 377 74 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 75 + if (has_cowflag) 76 + (*cow_blocks) += len; 378 77 379 78 /* Check the extent. */ 380 79 bno &= ~XFS_REFC_COW_START; ··· 388 83 if (refcount == 0) 389 84 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 390 85 86 + xfs_scrub_refcountbt_xref(bs->sc, bno, len, refcount); 87 + 391 88 return error; 89 + } 90 + 91 + /* Make sure we have as many refc blocks as the rmap says. */ 92 + STATIC void 93 + xfs_scrub_refcount_xref_rmap( 94 + struct xfs_scrub_context *sc, 95 + struct xfs_owner_info *oinfo, 96 + xfs_filblks_t cow_blocks) 97 + { 98 + xfs_extlen_t refcbt_blocks = 0; 99 + xfs_filblks_t blocks; 100 + int error; 101 + 102 + if (!sc->sa.rmap_cur) 103 + return; 104 + 105 + /* Check that we saw as many refcbt blocks as the rmap knows about. */ 106 + error = xfs_btree_count_blocks(sc->sa.refc_cur, &refcbt_blocks); 107 + if (!xfs_scrub_btree_process_error(sc, sc->sa.refc_cur, 0, &error)) 108 + return; 109 + error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo, 110 + &blocks); 111 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) 112 + return; 113 + if (blocks != refcbt_blocks) 114 + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); 115 + 116 + /* Check that we saw as many cow blocks as the rmap knows about. */ 117 + xfs_rmap_ag_owner(oinfo, XFS_RMAP_OWN_COW); 118 + error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo, 119 + &blocks); 120 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) 121 + return; 122 + if (blocks != cow_blocks) 123 + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); 392 124 } 393 125 394 126 /* Scrub the refcount btree for some AG. */ ··· 434 92 struct xfs_scrub_context *sc) 435 93 { 436 94 struct xfs_owner_info oinfo; 95 + xfs_agblock_t cow_blocks = 0; 96 + int error; 437 97 438 98 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_REFC); 439 - return xfs_scrub_btree(sc, sc->sa.refc_cur, xfs_scrub_refcountbt_rec, 440 - &oinfo, NULL); 99 + error = xfs_scrub_btree(sc, sc->sa.refc_cur, xfs_scrub_refcountbt_rec, 100 + &oinfo, &cow_blocks); 101 + if (error) 102 + return error; 103 + 104 + xfs_scrub_refcount_xref_rmap(sc, &oinfo, cow_blocks); 105 + 106 + return 0; 107 + } 108 + 109 + /* xref check that a cow staging extent is marked in the refcountbt. */ 110 + void 111 + xfs_scrub_xref_is_cow_staging( 112 + struct xfs_scrub_context *sc, 113 + xfs_agblock_t agbno, 114 + xfs_extlen_t len) 115 + { 116 + struct xfs_refcount_irec rc; 117 + bool has_cowflag; 118 + int has_refcount; 119 + int error; 120 + 121 + if (!sc->sa.refc_cur) 122 + return; 123 + 124 + /* Find the CoW staging extent. */ 125 + error = xfs_refcount_lookup_le(sc->sa.refc_cur, 126 + agbno + XFS_REFC_COW_START, &has_refcount); 127 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur)) 128 + return; 129 + if (!has_refcount) { 130 + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); 131 + return; 132 + } 133 + 134 + error = xfs_refcount_get_rec(sc->sa.refc_cur, &rc, &has_refcount); 135 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur)) 136 + return; 137 + if (!has_refcount) { 138 + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); 139 + return; 140 + } 141 + 142 + /* CoW flag must be set, refcount must be 1. */ 143 + has_cowflag = (rc.rc_startblock & XFS_REFC_COW_START); 144 + if (!has_cowflag || rc.rc_refcount != 1) 145 + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); 146 + 147 + /* Must be at least as long as what was passed in */ 148 + if (rc.rc_blockcount < len) 149 + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); 150 + } 151 + 152 + /* 153 + * xref check that the extent is not shared. Only file data blocks 154 + * can have multiple owners. 155 + */ 156 + void 157 + xfs_scrub_xref_is_not_shared( 158 + struct xfs_scrub_context *sc, 159 + xfs_agblock_t agbno, 160 + xfs_extlen_t len) 161 + { 162 + bool shared; 163 + int error; 164 + 165 + if (!sc->sa.refc_cur) 166 + return; 167 + 168 + error = xfs_refcount_has_record(sc->sa.refc_cur, agbno, len, &shared); 169 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur)) 170 + return; 171 + if (shared) 172 + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); 441 173 }

+123

fs/xfs/scrub/rmap.c

··· 32 32 #include "xfs_alloc.h" 33 33 #include "xfs_ialloc.h" 34 34 #include "xfs_rmap.h" 35 + #include "xfs_refcount.h" 35 36 #include "scrub/xfs_scrub.h" 36 37 #include "scrub/scrub.h" 37 38 #include "scrub/common.h" ··· 51 50 } 52 51 53 52 /* Reverse-mapping scrubber. */ 53 + 54 + /* Cross-reference a rmap against the refcount btree. */ 55 + STATIC void 56 + xfs_scrub_rmapbt_xref_refc( 57 + struct xfs_scrub_context *sc, 58 + struct xfs_rmap_irec *irec) 59 + { 60 + xfs_agblock_t fbno; 61 + xfs_extlen_t flen; 62 + bool non_inode; 63 + bool is_bmbt; 64 + bool is_attr; 65 + bool is_unwritten; 66 + int error; 67 + 68 + if (!sc->sa.refc_cur) 69 + return; 70 + 71 + non_inode = XFS_RMAP_NON_INODE_OWNER(irec->rm_owner); 72 + is_bmbt = irec->rm_flags & XFS_RMAP_BMBT_BLOCK; 73 + is_attr = irec->rm_flags & XFS_RMAP_ATTR_FORK; 74 + is_unwritten = irec->rm_flags & XFS_RMAP_UNWRITTEN; 75 + 76 + /* If this is shared, must be a data fork extent. */ 77 + error = xfs_refcount_find_shared(sc->sa.refc_cur, irec->rm_startblock, 78 + irec->rm_blockcount, &fbno, &flen, false); 79 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur)) 80 + return; 81 + if (flen != 0 && (non_inode || is_attr || is_bmbt || is_unwritten)) 82 + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); 83 + } 84 + 85 + /* Cross-reference with the other btrees. */ 86 + STATIC void 87 + xfs_scrub_rmapbt_xref( 88 + struct xfs_scrub_context *sc, 89 + struct xfs_rmap_irec *irec) 90 + { 91 + xfs_agblock_t agbno = irec->rm_startblock; 92 + xfs_extlen_t len = irec->rm_blockcount; 93 + 94 + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 95 + return; 96 + 97 + xfs_scrub_xref_is_used_space(sc, agbno, len); 98 + if (irec->rm_owner == XFS_RMAP_OWN_INODES) 99 + xfs_scrub_xref_is_inode_chunk(sc, agbno, len); 100 + else 101 + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len); 102 + if (irec->rm_owner == XFS_RMAP_OWN_COW) 103 + xfs_scrub_xref_is_cow_staging(sc, irec->rm_startblock, 104 + irec->rm_blockcount); 105 + else 106 + xfs_scrub_rmapbt_xref_refc(sc, irec); 107 + } 54 108 55 109 /* Scrub an rmapbt record. */ 56 110 STATIC int ··· 177 121 irec.rm_owner > XFS_RMAP_OWN_FS) 178 122 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 179 123 } 124 + 125 + xfs_scrub_rmapbt_xref(bs->sc, &irec); 180 126 out: 181 127 return error; 182 128 } ··· 193 135 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG); 194 136 return xfs_scrub_btree(sc, sc->sa.rmap_cur, xfs_scrub_rmapbt_rec, 195 137 &oinfo, NULL); 138 + } 139 + 140 + /* xref check that the extent is owned by a given owner */ 141 + static inline void 142 + xfs_scrub_xref_check_owner( 143 + struct xfs_scrub_context *sc, 144 + xfs_agblock_t bno, 145 + xfs_extlen_t len, 146 + struct xfs_owner_info *oinfo, 147 + bool should_have_rmap) 148 + { 149 + bool has_rmap; 150 + int error; 151 + 152 + if (!sc->sa.rmap_cur) 153 + return; 154 + 155 + error = xfs_rmap_record_exists(sc->sa.rmap_cur, bno, len, oinfo, 156 + &has_rmap); 157 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) 158 + return; 159 + if (has_rmap != should_have_rmap) 160 + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); 161 + } 162 + 163 + /* xref check that the extent is owned by a given owner */ 164 + void 165 + xfs_scrub_xref_is_owned_by( 166 + struct xfs_scrub_context *sc, 167 + xfs_agblock_t bno, 168 + xfs_extlen_t len, 169 + struct xfs_owner_info *oinfo) 170 + { 171 + xfs_scrub_xref_check_owner(sc, bno, len, oinfo, true); 172 + } 173 + 174 + /* xref check that the extent is not owned by a given owner */ 175 + void 176 + xfs_scrub_xref_is_not_owned_by( 177 + struct xfs_scrub_context *sc, 178 + xfs_agblock_t bno, 179 + xfs_extlen_t len, 180 + struct xfs_owner_info *oinfo) 181 + { 182 + xfs_scrub_xref_check_owner(sc, bno, len, oinfo, false); 183 + } 184 + 185 + /* xref check that the extent has no reverse mapping at all */ 186 + void 187 + xfs_scrub_xref_has_no_owner( 188 + struct xfs_scrub_context *sc, 189 + xfs_agblock_t bno, 190 + xfs_extlen_t len) 191 + { 192 + bool has_rmap; 193 + int error; 194 + 195 + if (!sc->sa.rmap_cur) 196 + return; 197 + 198 + error = xfs_rmap_has_record(sc->sa.rmap_cur, bno, len, &has_rmap); 199 + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) 200 + return; 201 + if (has_rmap) 202 + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); 196 203 }

+25 -10

fs/xfs/scrub/rtbitmap.c

··· 43 43 struct xfs_scrub_context *sc, 44 44 struct xfs_inode *ip) 45 45 { 46 - struct xfs_mount *mp = sc->mp; 47 - int error = 0; 48 - 49 - /* 50 - * If userspace gave us an AG number or inode data, they don't 51 - * know what they're doing. Get out. 52 - */ 53 - if (sc->sm->sm_agno || sc->sm->sm_ino || sc->sm->sm_gen) 54 - return -EINVAL; 46 + int error; 55 47 56 48 error = xfs_scrub_setup_fs(sc, ip); 57 49 if (error) 58 50 return error; 59 51 60 52 sc->ilock_flags = XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP; 61 - sc->ip = mp->m_rbmip; 53 + sc->ip = sc->mp->m_rbmip; 62 54 xfs_ilock(sc->ip, sc->ilock_flags); 63 55 64 56 return 0; ··· 97 105 { 98 106 /* XXX: implement this some day */ 99 107 return -ENOENT; 108 + } 109 + 110 + 111 + /* xref check that the extent is not free in the rtbitmap */ 112 + void 113 + xfs_scrub_xref_is_used_rt_space( 114 + struct xfs_scrub_context *sc, 115 + xfs_rtblock_t fsbno, 116 + xfs_extlen_t len) 117 + { 118 + bool is_free; 119 + int error; 120 + 121 + xfs_ilock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); 122 + error = xfs_rtalloc_extent_is_free(sc->mp, sc->tp, fsbno, len, 123 + &is_free); 124 + if (!xfs_scrub_should_check_xref(sc, &error, NULL)) 125 + goto out_unlock; 126 + if (is_free) 127 + xfs_scrub_ino_xref_set_corrupt(sc, sc->mp->m_rbmip->i_ino, 128 + NULL); 129 + out_unlock: 130 + xfs_iunlock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); 100 131 }

+147 -76

fs/xfs/scrub/scrub.c

··· 110 110 * structure itself is corrupt, the CORRUPT flag will be set. If 111 111 * the metadata is correct but otherwise suboptimal, the PREEN flag 112 112 * will be set. 113 + * 114 + * We perform secondary validation of filesystem metadata by 115 + * cross-referencing every record with all other available metadata. 116 + * For example, for block mapping extents, we verify that there are no 117 + * records in the free space and inode btrees corresponding to that 118 + * space extent and that there is a corresponding entry in the reverse 119 + * mapping btree. Inconsistent metadata is noted by setting the 120 + * XCORRUPT flag; btree query function errors are noted by setting the 121 + * XFAIL flag and deleting the cursor to prevent further attempts to 122 + * cross-reference with a defective btree. 113 123 */ 114 124 115 125 /* ··· 138 128 { 139 129 int error = 0; 140 130 141 - if (sc->sm->sm_ino || sc->sm->sm_agno) 142 - return -EINVAL; 143 131 if (xfs_scrub_should_terminate(sc, &error)) 144 132 return error; 145 133 ··· 159 151 sc->tp = NULL; 160 152 } 161 153 if (sc->ip) { 162 - xfs_iunlock(sc->ip, sc->ilock_flags); 154 + if (sc->ilock_flags) 155 + xfs_iunlock(sc->ip, sc->ilock_flags); 163 156 if (sc->ip != ip_in && 164 157 !xfs_internal_inum(sc->mp, sc->ip->i_ino)) 165 158 iput(VFS_I(sc->ip)); ··· 176 167 /* Scrubbing dispatch. */ 177 168 178 169 static const struct xfs_scrub_meta_ops meta_scrub_ops[] = { 179 - { /* ioctl presence test */ 170 + [XFS_SCRUB_TYPE_PROBE] = { /* ioctl presence test */ 171 + .type = ST_NONE, 180 172 .setup = xfs_scrub_setup_fs, 181 173 .scrub = xfs_scrub_probe, 182 174 }, 183 - { /* superblock */ 184 - .setup = xfs_scrub_setup_ag_header, 175 + [XFS_SCRUB_TYPE_SB] = { /* superblock */ 176 + .type = ST_PERAG, 177 + .setup = xfs_scrub_setup_fs, 185 178 .scrub = xfs_scrub_superblock, 186 179 }, 187 - { /* agf */ 188 - .setup = xfs_scrub_setup_ag_header, 180 + [XFS_SCRUB_TYPE_AGF] = { /* agf */ 181 + .type = ST_PERAG, 182 + .setup = xfs_scrub_setup_fs, 189 183 .scrub = xfs_scrub_agf, 190 184 }, 191 - { /* agfl */ 192 - .setup = xfs_scrub_setup_ag_header, 185 + [XFS_SCRUB_TYPE_AGFL]= { /* agfl */ 186 + .type = ST_PERAG, 187 + .setup = xfs_scrub_setup_fs, 193 188 .scrub = xfs_scrub_agfl, 194 189 }, 195 - { /* agi */ 196 - .setup = xfs_scrub_setup_ag_header, 190 + [XFS_SCRUB_TYPE_AGI] = { /* agi */ 191 + .type = ST_PERAG, 192 + .setup = xfs_scrub_setup_fs, 197 193 .scrub = xfs_scrub_agi, 198 194 }, 199 - { /* bnobt */ 195 + [XFS_SCRUB_TYPE_BNOBT] = { /* bnobt */ 196 + .type = ST_PERAG, 200 197 .setup = xfs_scrub_setup_ag_allocbt, 201 198 .scrub = xfs_scrub_bnobt, 202 199 }, 203 - { /* cntbt */ 200 + [XFS_SCRUB_TYPE_CNTBT] = { /* cntbt */ 201 + .type = ST_PERAG, 204 202 .setup = xfs_scrub_setup_ag_allocbt, 205 203 .scrub = xfs_scrub_cntbt, 206 204 }, 207 - { /* inobt */ 205 + [XFS_SCRUB_TYPE_INOBT] = { /* inobt */ 206 + .type = ST_PERAG, 208 207 .setup = xfs_scrub_setup_ag_iallocbt, 209 208 .scrub = xfs_scrub_inobt, 210 209 }, 211 - { /* finobt */ 210 + [XFS_SCRUB_TYPE_FINOBT] = { /* finobt */ 211 + .type = ST_PERAG, 212 212 .setup = xfs_scrub_setup_ag_iallocbt, 213 213 .scrub = xfs_scrub_finobt, 214 214 .has = xfs_sb_version_hasfinobt, 215 215 }, 216 - { /* rmapbt */ 216 + [XFS_SCRUB_TYPE_RMAPBT] = { /* rmapbt */ 217 + .type = ST_PERAG, 217 218 .setup = xfs_scrub_setup_ag_rmapbt, 218 219 .scrub = xfs_scrub_rmapbt, 219 220 .has = xfs_sb_version_hasrmapbt, 220 221 }, 221 - { /* refcountbt */ 222 + [XFS_SCRUB_TYPE_REFCNTBT] = { /* refcountbt */ 223 + .type = ST_PERAG, 222 224 .setup = xfs_scrub_setup_ag_refcountbt, 223 225 .scrub = xfs_scrub_refcountbt, 224 226 .has = xfs_sb_version_hasreflink, 225 227 }, 226 - { /* inode record */ 228 + [XFS_SCRUB_TYPE_INODE] = { /* inode record */ 229 + .type = ST_INODE, 227 230 .setup = xfs_scrub_setup_inode, 228 231 .scrub = xfs_scrub_inode, 229 232 }, 230 - { /* inode data fork */ 233 + [XFS_SCRUB_TYPE_BMBTD] = { /* inode data fork */ 234 + .type = ST_INODE, 231 235 .setup = xfs_scrub_setup_inode_bmap, 232 236 .scrub = xfs_scrub_bmap_data, 233 237 }, 234 - { /* inode attr fork */ 238 + [XFS_SCRUB_TYPE_BMBTA] = { /* inode attr fork */ 239 + .type = ST_INODE, 235 240 .setup = xfs_scrub_setup_inode_bmap, 236 241 .scrub = xfs_scrub_bmap_attr, 237 242 }, 238 - { /* inode CoW fork */ 243 + [XFS_SCRUB_TYPE_BMBTC] = { /* inode CoW fork */ 244 + .type = ST_INODE, 239 245 .setup = xfs_scrub_setup_inode_bmap, 240 246 .scrub = xfs_scrub_bmap_cow, 241 247 }, 242 - { /* directory */ 248 + [XFS_SCRUB_TYPE_DIR] = { /* directory */ 249 + .type = ST_INODE, 243 250 .setup = xfs_scrub_setup_directory, 244 251 .scrub = xfs_scrub_directory, 245 252 }, 246 - { /* extended attributes */ 253 + [XFS_SCRUB_TYPE_XATTR] = { /* extended attributes */ 254 + .type = ST_INODE, 247 255 .setup = xfs_scrub_setup_xattr, 248 256 .scrub = xfs_scrub_xattr, 249 257 }, 250 - { /* symbolic link */ 258 + [XFS_SCRUB_TYPE_SYMLINK] = { /* symbolic link */ 259 + .type = ST_INODE, 251 260 .setup = xfs_scrub_setup_symlink, 252 261 .scrub = xfs_scrub_symlink, 253 262 }, 254 - { /* parent pointers */ 263 + [XFS_SCRUB_TYPE_PARENT] = { /* parent pointers */ 264 + .type = ST_INODE, 255 265 .setup = xfs_scrub_setup_parent, 256 266 .scrub = xfs_scrub_parent, 257 267 }, 258 - { /* realtime bitmap */ 268 + [XFS_SCRUB_TYPE_RTBITMAP] = { /* realtime bitmap */ 269 + .type = ST_FS, 259 270 .setup = xfs_scrub_setup_rt, 260 271 .scrub = xfs_scrub_rtbitmap, 261 272 .has = xfs_sb_version_hasrealtime, 262 273 }, 263 - { /* realtime summary */ 274 + [XFS_SCRUB_TYPE_RTSUM] = { /* realtime summary */ 275 + .type = ST_FS, 264 276 .setup = xfs_scrub_setup_rt, 265 277 .scrub = xfs_scrub_rtsummary, 266 278 .has = xfs_sb_version_hasrealtime, 267 279 }, 268 - { /* user quota */ 269 - .setup = xfs_scrub_setup_quota, 270 - .scrub = xfs_scrub_quota, 280 + [XFS_SCRUB_TYPE_UQUOTA] = { /* user quota */ 281 + .type = ST_FS, 282 + .setup = xfs_scrub_setup_quota, 283 + .scrub = xfs_scrub_quota, 271 284 }, 272 - { /* group quota */ 273 - .setup = xfs_scrub_setup_quota, 274 - .scrub = xfs_scrub_quota, 285 + [XFS_SCRUB_TYPE_GQUOTA] = { /* group quota */ 286 + .type = ST_FS, 287 + .setup = xfs_scrub_setup_quota, 288 + .scrub = xfs_scrub_quota, 275 289 }, 276 - { /* project quota */ 277 - .setup = xfs_scrub_setup_quota, 278 - .scrub = xfs_scrub_quota, 290 + [XFS_SCRUB_TYPE_PQUOTA] = { /* project quota */ 291 + .type = ST_FS, 292 + .setup = xfs_scrub_setup_quota, 293 + .scrub = xfs_scrub_quota, 279 294 }, 280 295 }; 281 296 ··· 317 284 "EXPERIMENTAL online scrub feature in use. Use at your own risk!"); 318 285 } 319 286 287 + static int 288 + xfs_scrub_validate_inputs( 289 + struct xfs_mount *mp, 290 + struct xfs_scrub_metadata *sm) 291 + { 292 + int error; 293 + const struct xfs_scrub_meta_ops *ops; 294 + 295 + error = -EINVAL; 296 + /* Check our inputs. */ 297 + sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT; 298 + if (sm->sm_flags & ~XFS_SCRUB_FLAGS_IN) 299 + goto out; 300 + /* sm_reserved[] must be zero */ 301 + if (memchr_inv(sm->sm_reserved, 0, sizeof(sm->sm_reserved))) 302 + goto out; 303 + 304 + error = -ENOENT; 305 + /* Do we know about this type of metadata? */ 306 + if (sm->sm_type >= XFS_SCRUB_TYPE_NR) 307 + goto out; 308 + ops = &meta_scrub_ops[sm->sm_type]; 309 + if (ops->setup == NULL || ops->scrub == NULL) 310 + goto out; 311 + /* Does this fs even support this type of metadata? */ 312 + if (ops->has && !ops->has(&mp->m_sb)) 313 + goto out; 314 + 315 + error = -EINVAL; 316 + /* restricting fields must be appropriate for type */ 317 + switch (ops->type) { 318 + case ST_NONE: 319 + case ST_FS: 320 + if (sm->sm_ino || sm->sm_gen || sm->sm_agno) 321 + goto out; 322 + break; 323 + case ST_PERAG: 324 + if (sm->sm_ino || sm->sm_gen || 325 + sm->sm_agno >= mp->m_sb.sb_agcount) 326 + goto out; 327 + break; 328 + case ST_INODE: 329 + if (sm->sm_agno || (sm->sm_gen && !sm->sm_ino)) 330 + goto out; 331 + break; 332 + default: 333 + goto out; 334 + } 335 + 336 + error = -EOPNOTSUPP; 337 + /* 338 + * We won't scrub any filesystem that doesn't have the ability 339 + * to record unwritten extents. The option was made default in 340 + * 2003, removed from mkfs in 2007, and cannot be disabled in 341 + * v5, so if we find a filesystem without this flag it's either 342 + * really old or totally unsupported. Avoid it either way. 343 + * We also don't support v1-v3 filesystems, which aren't 344 + * mountable. 345 + */ 346 + if (!xfs_sb_version_hasextflgbit(&mp->m_sb)) 347 + goto out; 348 + 349 + /* We don't know how to repair anything yet. */ 350 + if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) 351 + goto out; 352 + 353 + error = 0; 354 + out: 355 + return error; 356 + } 357 + 320 358 /* Dispatch metadata scrubbing. */ 321 359 int 322 360 xfs_scrub_metadata( ··· 396 292 { 397 293 struct xfs_scrub_context sc; 398 294 struct xfs_mount *mp = ip->i_mount; 399 - const struct xfs_scrub_meta_ops *ops; 400 295 bool try_harder = false; 401 296 int error = 0; 297 + 298 + BUILD_BUG_ON(sizeof(meta_scrub_ops) != 299 + (sizeof(struct xfs_scrub_meta_ops) * XFS_SCRUB_TYPE_NR)); 402 300 403 301 trace_xfs_scrub_start(ip, sm, error); 404 302 ··· 412 306 if (mp->m_flags & XFS_MOUNT_NORECOVERY) 413 307 goto out; 414 308 415 - /* Check our inputs. */ 416 - error = -EINVAL; 417 - sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT; 418 - if (sm->sm_flags & ~XFS_SCRUB_FLAGS_IN) 419 - goto out; 420 - if (memchr_inv(sm->sm_reserved, 0, sizeof(sm->sm_reserved))) 421 - goto out; 422 - 423 - /* Do we know about this type of metadata? */ 424 - error = -ENOENT; 425 - if (sm->sm_type >= XFS_SCRUB_TYPE_NR) 426 - goto out; 427 - ops = &meta_scrub_ops[sm->sm_type]; 428 - if (ops->scrub == NULL) 429 - goto out; 430 - 431 - /* 432 - * We won't scrub any filesystem that doesn't have the ability 433 - * to record unwritten extents. The option was made default in 434 - * 2003, removed from mkfs in 2007, and cannot be disabled in 435 - * v5, so if we find a filesystem without this flag it's either 436 - * really old or totally unsupported. Avoid it either way. 437 - * We also don't support v1-v3 filesystems, which aren't 438 - * mountable. 439 - */ 440 - error = -EOPNOTSUPP; 441 - if (!xfs_sb_version_hasextflgbit(&mp->m_sb)) 442 - goto out; 443 - 444 - /* Does this fs even support this type of metadata? */ 445 - error = -ENOENT; 446 - if (ops->has && !ops->has(&mp->m_sb)) 447 - goto out; 448 - 449 - /* We don't know how to repair anything yet. */ 450 - error = -EOPNOTSUPP; 451 - if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) 309 + error = xfs_scrub_validate_inputs(mp, sm); 310 + if (error) 452 311 goto out; 453 312 454 313 xfs_scrub_experimental_warning(mp); ··· 423 352 memset(&sc, 0, sizeof(sc)); 424 353 sc.mp = ip->i_mount; 425 354 sc.sm = sm; 426 - sc.ops = ops; 355 + sc.ops = &meta_scrub_ops[sm->sm_type]; 427 356 sc.try_harder = try_harder; 428 357 sc.sa.agno = NULLAGNUMBER; 429 358 error = sc.ops->setup(&sc, ip);

+37

fs/xfs/scrub/scrub.h

··· 22 22 23 23 struct xfs_scrub_context; 24 24 25 + /* Type info and names for the scrub types. */ 26 + enum xfs_scrub_type { 27 + ST_NONE = 1, /* disabled */ 28 + ST_PERAG, /* per-AG metadata */ 29 + ST_FS, /* per-FS metadata */ 30 + ST_INODE, /* per-inode metadata */ 31 + }; 32 + 25 33 struct xfs_scrub_meta_ops { 26 34 /* Acquire whatever resources are needed for the operation. */ 27 35 int (*setup)(struct xfs_scrub_context *, ··· 40 32 41 33 /* Decide if we even have this piece of metadata. */ 42 34 bool (*has)(struct xfs_sb *); 35 + 36 + /* type describing required/allowed inputs */ 37 + enum xfs_scrub_type type; 43 38 }; 44 39 45 40 /* Buffer pointers and btree cursors for an entire AG. */ ··· 121 110 { 122 111 return -ENOENT; 123 112 } 113 + #endif 114 + 115 + /* cross-referencing helpers */ 116 + void xfs_scrub_xref_is_used_space(struct xfs_scrub_context *sc, 117 + xfs_agblock_t agbno, xfs_extlen_t len); 118 + void xfs_scrub_xref_is_not_inode_chunk(struct xfs_scrub_context *sc, 119 + xfs_agblock_t agbno, xfs_extlen_t len); 120 + void xfs_scrub_xref_is_inode_chunk(struct xfs_scrub_context *sc, 121 + xfs_agblock_t agbno, xfs_extlen_t len); 122 + void xfs_scrub_xref_is_owned_by(struct xfs_scrub_context *sc, 123 + xfs_agblock_t agbno, xfs_extlen_t len, 124 + struct xfs_owner_info *oinfo); 125 + void xfs_scrub_xref_is_not_owned_by(struct xfs_scrub_context *sc, 126 + xfs_agblock_t agbno, xfs_extlen_t len, 127 + struct xfs_owner_info *oinfo); 128 + void xfs_scrub_xref_has_no_owner(struct xfs_scrub_context *sc, 129 + xfs_agblock_t agbno, xfs_extlen_t len); 130 + void xfs_scrub_xref_is_cow_staging(struct xfs_scrub_context *sc, 131 + xfs_agblock_t bno, xfs_extlen_t len); 132 + void xfs_scrub_xref_is_not_shared(struct xfs_scrub_context *sc, 133 + xfs_agblock_t bno, xfs_extlen_t len); 134 + #ifdef CONFIG_XFS_RT 135 + void xfs_scrub_xref_is_used_rt_space(struct xfs_scrub_context *sc, 136 + xfs_rtblock_t rtbno, xfs_extlen_t len); 137 + #else 138 + # define xfs_scrub_xref_is_used_rt_space(sc, rtbno, len) do { } while (0) 124 139 #endif 125 140 126 141 #endif /* __XFS_SCRUB_SCRUB_H__ */

+33 -11

fs/xfs/scrub/trace.h

··· 50 50 __entry->flags = sm->sm_flags; 51 51 __entry->error = error; 52 52 ), 53 - TP_printk("dev %d:%d ino %llu type %u agno %u inum %llu gen %u flags 0x%x error %d", 53 + TP_printk("dev %d:%d ino 0x%llx type %u agno %u inum %llu gen %u flags 0x%x error %d", 54 54 MAJOR(__entry->dev), MINOR(__entry->dev), 55 55 __entry->ino, 56 56 __entry->type, ··· 90 90 __entry->error = error; 91 91 __entry->ret_ip = ret_ip; 92 92 ), 93 - TP_printk("dev %d:%d type %u agno %u agbno %u error %d ret_ip %pF", 93 + TP_printk("dev %d:%d type %u agno %u agbno %u error %d ret_ip %pS", 94 94 MAJOR(__entry->dev), MINOR(__entry->dev), 95 95 __entry->type, 96 96 __entry->agno, ··· 121 121 __entry->error = error; 122 122 __entry->ret_ip = ret_ip; 123 123 ), 124 - TP_printk("dev %d:%d ino %llu fork %d type %u offset %llu error %d ret_ip %pF", 124 + TP_printk("dev %d:%d ino 0x%llx fork %d type %u offset %llu error %d ret_ip %pS", 125 125 MAJOR(__entry->dev), MINOR(__entry->dev), 126 126 __entry->ino, 127 127 __entry->whichfork, ··· 156 156 __entry->bno = bno; 157 157 __entry->ret_ip = ret_ip; 158 158 ), 159 - TP_printk("dev %d:%d type %u agno %u agbno %u ret_ip %pF", 159 + TP_printk("dev %d:%d type %u agno %u agbno %u ret_ip %pS", 160 160 MAJOR(__entry->dev), MINOR(__entry->dev), 161 161 __entry->type, 162 162 __entry->agno, ··· 207 207 __entry->bno = bno; 208 208 __entry->ret_ip = ret_ip; 209 209 ), 210 - TP_printk("dev %d:%d ino %llu type %u agno %u agbno %u ret_ip %pF", 210 + TP_printk("dev %d:%d ino 0x%llx type %u agno %u agbno %u ret_ip %pS", 211 211 MAJOR(__entry->dev), MINOR(__entry->dev), 212 212 __entry->ino, 213 213 __entry->type, ··· 246 246 __entry->offset = offset; 247 247 __entry->ret_ip = ret_ip; 248 248 ), 249 - TP_printk("dev %d:%d ino %llu fork %d type %u offset %llu ret_ip %pF", 249 + TP_printk("dev %d:%d ino 0x%llx fork %d type %u offset %llu ret_ip %pS", 250 250 MAJOR(__entry->dev), MINOR(__entry->dev), 251 251 __entry->ino, 252 252 __entry->whichfork, ··· 277 277 __entry->type = sc->sm->sm_type; 278 278 __entry->ret_ip = ret_ip; 279 279 ), 280 - TP_printk("dev %d:%d type %u ret_ip %pF", 280 + TP_printk("dev %d:%d type %u ret_ip %pS", 281 281 MAJOR(__entry->dev), MINOR(__entry->dev), 282 282 __entry->type, 283 283 __entry->ret_ip) ··· 311 311 __entry->error = error; 312 312 __entry->ret_ip = ret_ip; 313 313 ), 314 - TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pF", 314 + TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pS", 315 315 MAJOR(__entry->dev), MINOR(__entry->dev), 316 316 __entry->type, 317 317 __entry->btnum, ··· 354 354 __entry->error = error; 355 355 __entry->ret_ip = ret_ip; 356 356 ), 357 - TP_printk("dev %d:%d ino %llu fork %d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pF", 357 + TP_printk("dev %d:%d ino 0x%llx fork %d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pS", 358 358 MAJOR(__entry->dev), MINOR(__entry->dev), 359 359 __entry->ino, 360 360 __entry->whichfork, ··· 393 393 __entry->ptr = cur->bc_ptrs[level]; 394 394 __entry->ret_ip = ret_ip; 395 395 ), 396 - TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pF", 396 + TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pS", 397 397 MAJOR(__entry->dev), MINOR(__entry->dev), 398 398 __entry->type, 399 399 __entry->btnum, ··· 433 433 __entry->ptr = cur->bc_ptrs[level]; 434 434 __entry->ret_ip = ret_ip; 435 435 ), 436 - TP_printk("dev %d:%d ino %llu fork %d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pF", 436 + TP_printk("dev %d:%d ino 0x%llx fork %d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pS", 437 437 MAJOR(__entry->dev), MINOR(__entry->dev), 438 438 __entry->ino, 439 439 __entry->whichfork, ··· 490 490 491 491 DEFINE_SCRUB_SBTREE_EVENT(xfs_scrub_btree_rec); 492 492 DEFINE_SCRUB_SBTREE_EVENT(xfs_scrub_btree_key); 493 + 494 + TRACE_EVENT(xfs_scrub_xref_error, 495 + TP_PROTO(struct xfs_scrub_context *sc, int error, void *ret_ip), 496 + TP_ARGS(sc, error, ret_ip), 497 + TP_STRUCT__entry( 498 + __field(dev_t, dev) 499 + __field(int, type) 500 + __field(int, error) 501 + __field(void *, ret_ip) 502 + ), 503 + TP_fast_assign( 504 + __entry->dev = sc->mp->m_super->s_dev; 505 + __entry->type = sc->sm->sm_type; 506 + __entry->error = error; 507 + __entry->ret_ip = ret_ip; 508 + ), 509 + TP_printk("dev %d:%d type %u xref error %d ret_ip %pF", 510 + MAJOR(__entry->dev), MINOR(__entry->dev), 511 + __entry->type, 512 + __entry->error, 513 + __entry->ret_ip) 514 + ); 493 515 494 516 #endif /* _TRACE_XFS_SCRUB_TRACE_H */ 495 517

+14 -1

fs/xfs/xfs_aops.c

··· 390 390 if (XFS_FORCED_SHUTDOWN(mp)) 391 391 return -EIO; 392 392 393 + /* 394 + * Truncate can race with writeback since writeback doesn't take the 395 + * iolock and truncate decreases the file size before it starts 396 + * truncating the pages between new_size and old_size. Therefore, we 397 + * can end up in the situation where writeback gets a CoW fork mapping 398 + * but the truncate makes the mapping invalid and we end up in here 399 + * trying to get a new mapping. Bail out here so that we simply never 400 + * get a valid mapping and so we drop the write altogether. The page 401 + * truncation will kill the contents anyway. 402 + */ 403 + if (type == XFS_IO_COW && offset > i_size_read(inode)) 404 + return 0; 405 + 393 406 ASSERT(type != XFS_IO_COW); 394 407 if (type == XFS_IO_UNWRITTEN) 395 408 bmapi_flags |= XFS_BMAPI_IGSTATE; ··· 804 791 goto out_invalidate; 805 792 806 793 xfs_alert(ip->i_mount, 807 - "page discard on page %p, inode 0x%llx, offset %llu.", 794 + "page discard on page "PTR_FMT", inode 0x%llx, offset %llu.", 808 795 page, ip->i_ino, offset); 809 796 810 797 xfs_ilock(ip, XFS_ILOCK_EXCL);

+2 -2

fs/xfs/xfs_bmap_util.c

··· 1872 1872 */ 1873 1873 lock_two_nondirectories(VFS_I(ip), VFS_I(tip)); 1874 1874 lock_flags = XFS_MMAPLOCK_EXCL; 1875 - xfs_lock_two_inodes(ip, tip, XFS_MMAPLOCK_EXCL); 1875 + xfs_lock_two_inodes(ip, XFS_MMAPLOCK_EXCL, tip, XFS_MMAPLOCK_EXCL); 1876 1876 1877 1877 /* Verify that both files have the same format */ 1878 1878 if ((VFS_I(ip)->i_mode & S_IFMT) != (VFS_I(tip)->i_mode & S_IFMT)) { ··· 1919 1919 * Lock and join the inodes to the tansaction so that transaction commit 1920 1920 * or cancel will unlock the inodes from this point onwards. 1921 1921 */ 1922 - xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); 1922 + xfs_lock_two_inodes(ip, XFS_ILOCK_EXCL, tip, XFS_ILOCK_EXCL); 1923 1923 lock_flags |= XFS_ILOCK_EXCL; 1924 1924 xfs_trans_ijoin(tp, ip, 0); 1925 1925 xfs_trans_ijoin(tp, tip, 0);

+13 -9

fs/xfs/xfs_buf.c

··· 236 236 init_completion(&bp->b_iowait); 237 237 INIT_LIST_HEAD(&bp->b_lru); 238 238 INIT_LIST_HEAD(&bp->b_list); 239 + INIT_LIST_HEAD(&bp->b_li_list); 239 240 sema_init(&bp->b_sema, 0); /* held, no waiters */ 240 241 spin_lock_init(&bp->b_lock); 241 242 XB_SET_OWNER(bp); ··· 586 585 * returning a specific error on buffer lookup failures. 587 586 */ 588 587 xfs_alert(btp->bt_mount, 589 - "%s: Block out of range: block 0x%llx, EOFS 0x%llx ", 588 + "%s: daddr 0x%llx out of range, EOFS 0x%llx", 590 589 __func__, cmap.bm_bn, eofs); 591 590 WARN_ON(1); 592 591 return NULL; ··· 1181 1180 } 1182 1181 1183 1182 void 1184 - xfs_buf_ioerror( 1183 + __xfs_buf_ioerror( 1185 1184 xfs_buf_t *bp, 1186 - int error) 1185 + int error, 1186 + xfs_failaddr_t failaddr) 1187 1187 { 1188 1188 ASSERT(error <= 0 && error >= -1000); 1189 1189 bp->b_error = error; 1190 - trace_xfs_buf_ioerror(bp, error, _RET_IP_); 1190 + trace_xfs_buf_ioerror(bp, error, failaddr); 1191 1191 } 1192 1192 1193 1193 void ··· 1197 1195 const char *func) 1198 1196 { 1199 1197 xfs_alert(bp->b_target->bt_mount, 1200 - "metadata I/O error: block 0x%llx (\"%s\") error %d numblks %d", 1201 - (uint64_t)XFS_BUF_ADDR(bp), func, -bp->b_error, bp->b_length); 1198 + "metadata I/O error in \"%s\" at daddr 0x%llx len %d error %d", 1199 + func, (uint64_t)XFS_BUF_ADDR(bp), bp->b_length, 1200 + -bp->b_error); 1202 1201 } 1203 1202 1204 1203 int ··· 1381 1378 */ 1382 1379 if (xfs_sb_version_hascrc(&mp->m_sb)) { 1383 1380 xfs_warn(mp, 1384 - "%s: no ops on block 0x%llx/0x%x", 1381 + "%s: no buf ops on daddr 0x%llx len %d", 1385 1382 __func__, bp->b_bn, bp->b_length); 1386 - xfs_hex_dump(bp->b_addr, 64); 1383 + xfs_hex_dump(bp->b_addr, 1384 + XFS_CORRUPTION_DUMP_LEN); 1387 1385 dump_stack(); 1388 1386 } 1389 1387 } ··· 1675 1671 list_del_init(&bp->b_lru); 1676 1672 if (bp->b_flags & XBF_WRITE_FAIL) { 1677 1673 xfs_alert(btp->bt_mount, 1678 - "Corruption Alert: Buffer at block 0x%llx had permanent write failures!", 1674 + "Corruption Alert: Buffer at daddr 0x%llx had permanent write failures!", 1679 1675 (long long)bp->b_bn); 1680 1676 xfs_alert(btp->bt_mount, 1681 1677 "Please run xfs_repair to determine the extent of the problem.");

+6 -2

fs/xfs/xfs_buf.h

··· 140 140 char *name; 141 141 void (*verify_read)(struct xfs_buf *); 142 142 void (*verify_write)(struct xfs_buf *); 143 + xfs_failaddr_t (*verify_struct)(struct xfs_buf *bp); 143 144 }; 144 145 145 146 typedef struct xfs_buf { ··· 176 175 struct workqueue_struct *b_ioend_wq; /* I/O completion wq */ 177 176 xfs_buf_iodone_t b_iodone; /* I/O completion function */ 178 177 struct completion b_iowait; /* queue for I/O waiters */ 179 - void *b_fspriv; 178 + void *b_log_item; 179 + struct list_head b_li_list; /* Log items list head */ 180 180 struct xfs_trans *b_transp; 181 181 struct page **b_pages; /* array of page pointers */ 182 182 struct page *b_page_array[XB_PAGES]; /* inline pages */ ··· 317 315 /* Buffer Read and Write Routines */ 318 316 extern int xfs_bwrite(struct xfs_buf *bp); 319 317 extern void xfs_buf_ioend(struct xfs_buf *bp); 320 - extern void xfs_buf_ioerror(xfs_buf_t *, int); 318 + extern void __xfs_buf_ioerror(struct xfs_buf *bp, int error, 319 + xfs_failaddr_t failaddr); 320 + #define xfs_buf_ioerror(bp, err) __xfs_buf_ioerror((bp), (err), __this_address) 321 321 extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func); 322 322 extern void xfs_buf_submit(struct xfs_buf *bp); 323 323 extern int xfs_buf_submit_wait(struct xfs_buf *bp);

+82 -74

fs/xfs/xfs_buf_item.c

··· 61 61 */ 62 62 STATIC void 63 63 xfs_buf_item_size_segment( 64 - struct xfs_buf_log_item *bip, 65 - struct xfs_buf_log_format *blfp, 66 - int *nvecs, 67 - int *nbytes) 64 + struct xfs_buf_log_item *bip, 65 + struct xfs_buf_log_format *blfp, 66 + int *nvecs, 67 + int *nbytes) 68 68 { 69 - struct xfs_buf *bp = bip->bli_buf; 70 - int next_bit; 71 - int last_bit; 69 + struct xfs_buf *bp = bip->bli_buf; 70 + int next_bit; 71 + int last_bit; 72 72 73 73 last_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0); 74 74 if (last_bit == -1) ··· 218 218 uint offset, 219 219 struct xfs_buf_log_format *blfp) 220 220 { 221 - struct xfs_buf *bp = bip->bli_buf; 222 - uint base_size; 223 - int first_bit; 224 - int last_bit; 225 - int next_bit; 226 - uint nbits; 221 + struct xfs_buf *bp = bip->bli_buf; 222 + uint base_size; 223 + int first_bit; 224 + int last_bit; 225 + int next_bit; 226 + uint nbits; 227 227 228 228 /* copy the flags across from the base format item */ 229 229 blfp->blf_flags = bip->__bli_format.blf_flags; ··· 406 406 int remove) 407 407 { 408 408 struct xfs_buf_log_item *bip = BUF_ITEM(lip); 409 - xfs_buf_t *bp = bip->bli_buf; 410 - struct xfs_ail *ailp = lip->li_ailp; 411 - int stale = bip->bli_flags & XFS_BLI_STALE; 412 - int freed; 409 + xfs_buf_t *bp = bip->bli_buf; 410 + struct xfs_ail *ailp = lip->li_ailp; 411 + int stale = bip->bli_flags & XFS_BLI_STALE; 412 + int freed; 413 413 414 - ASSERT(bp->b_fspriv == bip); 414 + ASSERT(bp->b_log_item == bip); 415 415 ASSERT(atomic_read(&bip->bli_refcount) > 0); 416 416 417 417 trace_xfs_buf_item_unpin(bip); ··· 456 456 */ 457 457 if (bip->bli_flags & XFS_BLI_STALE_INODE) { 458 458 xfs_buf_do_callbacks(bp); 459 - bp->b_fspriv = NULL; 459 + bp->b_log_item = NULL; 460 + list_del_init(&bp->b_li_list); 460 461 bp->b_iodone = NULL; 461 462 } else { 462 463 spin_lock(&ailp->xa_lock); 463 464 xfs_trans_ail_delete(ailp, lip, SHUTDOWN_LOG_IO_ERROR); 464 465 xfs_buf_item_relse(bp); 465 - ASSERT(bp->b_fspriv == NULL); 466 + ASSERT(bp->b_log_item == NULL); 466 467 } 467 468 xfs_buf_relse(bp); 468 469 } else if (freed && remove) { ··· 723 722 724 723 /* 725 724 * Allocate a new buf log item to go with the given buffer. 726 - * Set the buffer's b_fsprivate field to point to the new 727 - * buf log item. If there are other item's attached to the 728 - * buffer (see xfs_buf_attach_iodone() below), then put the 729 - * buf log item at the front. 725 + * Set the buffer's b_log_item field to point to the new 726 + * buf log item. 730 727 */ 731 728 int 732 729 xfs_buf_item_init( 733 730 struct xfs_buf *bp, 734 731 struct xfs_mount *mp) 735 732 { 736 - struct xfs_log_item *lip = bp->b_fspriv; 737 - struct xfs_buf_log_item *bip; 733 + struct xfs_buf_log_item *bip = bp->b_log_item; 738 734 int chunks; 739 735 int map_size; 740 736 int error; ··· 739 741 740 742 /* 741 743 * Check to see if there is already a buf log item for 742 - * this buffer. If there is, it is guaranteed to be 743 - * the first. If we do already have one, there is 744 + * this buffer. If we do already have one, there is 744 745 * nothing to do here so return. 745 746 */ 746 747 ASSERT(bp->b_target->bt_mount == mp); 747 - if (lip != NULL && lip->li_type == XFS_LI_BUF) 748 + if (bip != NULL) { 749 + ASSERT(bip->bli_item.li_type == XFS_LI_BUF); 748 750 return 0; 751 + } 749 752 750 753 bip = kmem_zone_zalloc(xfs_buf_item_zone, KM_SLEEP); 751 754 xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops); ··· 780 781 bip->bli_formats[i].blf_map_size = map_size; 781 782 } 782 783 783 - /* 784 - * Put the buf item into the list of items attached to the 785 - * buffer at the front. 786 - */ 787 - if (bp->b_fspriv) 788 - bip->bli_item.li_bio_list = bp->b_fspriv; 789 - bp->b_fspriv = bip; 784 + bp->b_log_item = bip; 790 785 xfs_buf_hold(bp); 791 786 return 0; 792 787 } ··· 873 880 */ 874 881 void 875 882 xfs_buf_item_log( 876 - xfs_buf_log_item_t *bip, 883 + struct xfs_buf_log_item *bip, 877 884 uint first, 878 885 uint last) 879 886 { ··· 936 943 937 944 STATIC void 938 945 xfs_buf_item_free( 939 - xfs_buf_log_item_t *bip) 946 + struct xfs_buf_log_item *bip) 940 947 { 941 948 xfs_buf_item_free_format(bip); 942 949 kmem_free(bip->bli_item.li_lv_shadow); ··· 954 961 xfs_buf_item_relse( 955 962 xfs_buf_t *bp) 956 963 { 957 - xfs_buf_log_item_t *bip = bp->b_fspriv; 964 + struct xfs_buf_log_item *bip = bp->b_log_item; 958 965 959 966 trace_xfs_buf_item_relse(bp, _RET_IP_); 960 967 ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL)); 961 968 962 - bp->b_fspriv = bip->bli_item.li_bio_list; 963 - if (bp->b_fspriv == NULL) 969 + bp->b_log_item = NULL; 970 + if (list_empty(&bp->b_li_list)) 964 971 bp->b_iodone = NULL; 965 972 966 973 xfs_buf_rele(bp); ··· 973 980 * to be called when the buffer's I/O completes. If it is not set 974 981 * already, set the buffer's b_iodone() routine to be 975 982 * xfs_buf_iodone_callbacks() and link the log item into the list of 976 - * items rooted at b_fsprivate. Items are always added as the second 977 - * entry in the list if there is a first, because the buf item code 978 - * assumes that the buf log item is first. 983 + * items rooted at b_li_list. 979 984 */ 980 985 void 981 986 xfs_buf_attach_iodone( ··· 981 990 void (*cb)(xfs_buf_t *, xfs_log_item_t *), 982 991 xfs_log_item_t *lip) 983 992 { 984 - xfs_log_item_t *head_lip; 985 - 986 993 ASSERT(xfs_buf_islocked(bp)); 987 994 988 995 lip->li_cb = cb; 989 - head_lip = bp->b_fspriv; 990 - if (head_lip) { 991 - lip->li_bio_list = head_lip->li_bio_list; 992 - head_lip->li_bio_list = lip; 993 - } else { 994 - bp->b_fspriv = lip; 995 - } 996 + list_add_tail(&lip->li_bio_list, &bp->b_li_list); 996 997 997 998 ASSERT(bp->b_iodone == NULL || 998 999 bp->b_iodone == xfs_buf_iodone_callbacks); ··· 994 1011 /* 995 1012 * We can have many callbacks on a buffer. Running the callbacks individually 996 1013 * can cause a lot of contention on the AIL lock, so we allow for a single 997 - * callback to be able to scan the remaining lip->li_bio_list for other items 998 - * of the same type and callback to be processed in the first call. 1014 + * callback to be able to scan the remaining items in bp->b_li_list for other 1015 + * items of the same type and callback to be processed in the first call. 999 1016 * 1000 1017 * As a result, the loop walking the callback list below will also modify the 1001 1018 * list. it removes the first item from the list and then runs the callback. 1002 - * The loop then restarts from the new head of the list. This allows the 1019 + * The loop then restarts from the new first item int the list. This allows the 1003 1020 * callback to scan and modify the list attached to the buffer and we don't 1004 1021 * have to care about maintaining a next item pointer. 1005 1022 */ ··· 1007 1024 xfs_buf_do_callbacks( 1008 1025 struct xfs_buf *bp) 1009 1026 { 1027 + struct xfs_buf_log_item *blip = bp->b_log_item; 1010 1028 struct xfs_log_item *lip; 1011 1029 1012 - while ((lip = bp->b_fspriv) != NULL) { 1013 - bp->b_fspriv = lip->li_bio_list; 1014 - ASSERT(lip->li_cb != NULL); 1030 + /* If there is a buf_log_item attached, run its callback */ 1031 + if (blip) { 1032 + lip = &blip->bli_item; 1033 + lip->li_cb(bp, lip); 1034 + } 1035 + 1036 + while (!list_empty(&bp->b_li_list)) { 1037 + lip = list_first_entry(&bp->b_li_list, struct xfs_log_item, 1038 + li_bio_list); 1039 + 1015 1040 /* 1016 - * Clear the next pointer so we don't have any 1041 + * Remove the item from the list, so we don't have any 1017 1042 * confusion if the item is added to another buf. 1018 1043 * Don't touch the log item after calling its 1019 1044 * callback, because it could have freed itself. 1020 1045 */ 1021 - lip->li_bio_list = NULL; 1046 + list_del_init(&lip->li_bio_list); 1022 1047 lip->li_cb(bp, lip); 1023 1048 } 1024 1049 } ··· 1043 1052 xfs_buf_do_callbacks_fail( 1044 1053 struct xfs_buf *bp) 1045 1054 { 1046 - struct xfs_log_item *next; 1047 - struct xfs_log_item *lip = bp->b_fspriv; 1048 - struct xfs_ail *ailp = lip->li_ailp; 1055 + struct xfs_log_item *lip; 1056 + struct xfs_ail *ailp; 1049 1057 1058 + /* 1059 + * Buffer log item errors are handled directly by xfs_buf_item_push() 1060 + * and xfs_buf_iodone_callback_error, and they have no IO error 1061 + * callbacks. Check only for items in b_li_list. 1062 + */ 1063 + if (list_empty(&bp->b_li_list)) 1064 + return; 1065 + 1066 + lip = list_first_entry(&bp->b_li_list, struct xfs_log_item, 1067 + li_bio_list); 1068 + ailp = lip->li_ailp; 1050 1069 spin_lock(&ailp->xa_lock); 1051 - for (; lip; lip = next) { 1052 - next = lip->li_bio_list; 1070 + list_for_each_entry(lip, &bp->b_li_list, li_bio_list) { 1053 1071 if (lip->li_ops->iop_error) 1054 1072 lip->li_ops->iop_error(lip, bp); 1055 1073 } ··· 1069 1069 xfs_buf_iodone_callback_error( 1070 1070 struct xfs_buf *bp) 1071 1071 { 1072 - struct xfs_log_item *lip = bp->b_fspriv; 1073 - struct xfs_mount *mp = lip->li_mountp; 1072 + struct xfs_buf_log_item *bip = bp->b_log_item; 1073 + struct xfs_log_item *lip; 1074 + struct xfs_mount *mp; 1074 1075 static ulong lasttime; 1075 1076 static xfs_buftarg_t *lasttarg; 1076 1077 struct xfs_error_cfg *cfg; 1078 + 1079 + /* 1080 + * The failed buffer might not have a buf_log_item attached or the 1081 + * log_item list might be empty. Get the mp from the available 1082 + * xfs_log_item 1083 + */ 1084 + lip = list_first_entry_or_null(&bp->b_li_list, struct xfs_log_item, 1085 + li_bio_list); 1086 + mp = lip ? lip->li_mountp : bip->bli_item.li_mountp; 1077 1087 1078 1088 /* 1079 1089 * If we've already decided to shutdown the filesystem because of ··· 1193 1183 bp->b_first_retry_time = 0; 1194 1184 1195 1185 xfs_buf_do_callbacks(bp); 1196 - bp->b_fspriv = NULL; 1186 + bp->b_log_item = NULL; 1187 + list_del_init(&bp->b_li_list); 1197 1188 bp->b_iodone = NULL; 1198 1189 xfs_buf_ioend(bp); 1199 1190 } ··· 1239 1228 bool 1240 1229 xfs_buf_resubmit_failed_buffers( 1241 1230 struct xfs_buf *bp, 1242 - struct xfs_log_item *lip, 1243 1231 struct list_head *buffer_list) 1244 1232 { 1245 - struct xfs_log_item *next; 1233 + struct xfs_log_item *lip; 1246 1234 1247 1235 /* 1248 1236 * Clear XFS_LI_FAILED flag from all items before resubmit ··· 1249 1239 * XFS_LI_FAILED set/clear is protected by xa_lock, caller this 1250 1240 * function already have it acquired 1251 1241 */ 1252 - for (; lip; lip = next) { 1253 - next = lip->li_bio_list; 1242 + list_for_each_entry(lip, &bp->b_li_list, li_bio_list) 1254 1243 xfs_clear_li_failed(lip); 1255 - } 1256 1244 1257 1245 /* Add this buffer back to the delayed write list */ 1258 1246 return xfs_buf_delwri_queue(bp, buffer_list);

+3 -4

fs/xfs/xfs_buf_item.h

··· 50 50 * needed to log buffers. It tracks how many times the lock has been 51 51 * locked, and which 128 byte chunks of the buffer are dirty. 52 52 */ 53 - typedef struct xfs_buf_log_item { 53 + struct xfs_buf_log_item { 54 54 xfs_log_item_t bli_item; /* common item structure */ 55 55 struct xfs_buf *bli_buf; /* real buffer pointer */ 56 56 unsigned int bli_flags; /* misc flags */ ··· 59 59 int bli_format_count; /* count of headers */ 60 60 struct xfs_buf_log_format *bli_formats; /* array of in-log header ptrs */ 61 61 struct xfs_buf_log_format __bli_format; /* embedded in-log header */ 62 - } xfs_buf_log_item_t; 62 + }; 63 63 64 64 int xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *); 65 65 void xfs_buf_item_relse(struct xfs_buf *); 66 - void xfs_buf_item_log(xfs_buf_log_item_t *, uint, uint); 66 + void xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint); 67 67 bool xfs_buf_item_dirty_format(struct xfs_buf_log_item *); 68 68 void xfs_buf_attach_iodone(struct xfs_buf *, 69 69 void(*)(struct xfs_buf *, xfs_log_item_t *), ··· 71 71 void xfs_buf_iodone_callbacks(struct xfs_buf *); 72 72 void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *); 73 73 bool xfs_buf_resubmit_failed_buffers(struct xfs_buf *, 74 - struct xfs_log_item *, 75 74 struct list_head *); 76 75 77 76 extern kmem_zone_t *xfs_buf_item_zone;

+1 -3

fs/xfs/xfs_dir2_readdir.c

··· 152 152 struct xfs_inode *dp = args->dp; /* incore directory inode */ 153 153 xfs_dir2_data_hdr_t *hdr; /* block header */ 154 154 struct xfs_buf *bp; /* buffer for block */ 155 - xfs_dir2_block_tail_t *btp; /* block tail */ 156 155 xfs_dir2_data_entry_t *dep; /* block data entry */ 157 156 xfs_dir2_data_unused_t *dup; /* block unused entry */ 158 157 char *endptr; /* end of the data entries */ ··· 184 185 /* 185 186 * Set up values for the loop. 186 187 */ 187 - btp = xfs_dir2_block_tail_p(geo, hdr); 188 188 ptr = (char *)dp->d_ops->data_entry_p(hdr); 189 - endptr = (char *)xfs_dir2_block_leaf_p(btp); 189 + endptr = xfs_dir3_data_endp(geo, hdr); 190 190 191 191 /* 192 192 * Loop over the data portion of the block.

+5 -57

fs/xfs/xfs_dquot.c

··· 399 399 return error; 400 400 } 401 401 402 - STATIC int 403 - xfs_qm_dqrepair( 404 - struct xfs_mount *mp, 405 - struct xfs_trans *tp, 406 - struct xfs_dquot *dqp, 407 - xfs_dqid_t firstid, 408 - struct xfs_buf **bpp) 409 - { 410 - int error; 411 - struct xfs_disk_dquot *ddq; 412 - struct xfs_dqblk *d; 413 - int i; 414 - 415 - /* 416 - * Read the buffer without verification so we get the corrupted 417 - * buffer returned to us. make sure we verify it on write, though. 418 - */ 419 - error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, dqp->q_blkno, 420 - mp->m_quotainfo->qi_dqchunklen, 421 - 0, bpp, NULL); 422 - 423 - if (error) { 424 - ASSERT(*bpp == NULL); 425 - return error; 426 - } 427 - (*bpp)->b_ops = &xfs_dquot_buf_ops; 428 - 429 - ASSERT(xfs_buf_islocked(*bpp)); 430 - d = (struct xfs_dqblk *)(*bpp)->b_addr; 431 - 432 - /* Do the actual repair of dquots in this buffer */ 433 - for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++) { 434 - ddq = &d[i].dd_diskdq; 435 - error = xfs_dqcheck(mp, ddq, firstid + i, 436 - dqp->dq_flags & XFS_DQ_ALLTYPES, 437 - XFS_QMOPT_DQREPAIR, "xfs_qm_dqrepair"); 438 - if (error) { 439 - /* repair failed, we're screwed */ 440 - xfs_trans_brelse(tp, *bpp); 441 - return -EIO; 442 - } 443 - } 444 - 445 - return 0; 446 - } 447 - 448 402 /* 449 403 * Maps a dquot to the buffer containing its on-disk version. 450 404 * This returns a ptr to the buffer containing the on-disk dquot ··· 480 526 dqp->q_blkno, 481 527 mp->m_quotainfo->qi_dqchunklen, 482 528 0, &bp, &xfs_dquot_buf_ops); 483 - 484 - if (error == -EFSCORRUPTED && (flags & XFS_QMOPT_DQREPAIR)) { 485 - xfs_dqid_t firstid = (xfs_dqid_t)map.br_startoff * 486 - mp->m_quotainfo->qi_dqperchunk; 487 - ASSERT(bp == NULL); 488 - error = xfs_qm_dqrepair(mp, tp, dqp, firstid, &bp); 489 - } 490 - 491 529 if (error) { 492 530 ASSERT(bp == NULL); 493 531 return error; ··· 956 1010 struct xfs_mount *mp = dqp->q_mount; 957 1011 struct xfs_buf *bp; 958 1012 struct xfs_disk_dquot *ddqp; 1013 + xfs_failaddr_t fa; 959 1014 int error; 960 1015 961 1016 ASSERT(XFS_DQ_IS_LOCKED(dqp)); ··· 1003 1056 /* 1004 1057 * A simple sanity check in case we got a corrupted dquot.. 1005 1058 */ 1006 - error = xfs_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0, 1007 - XFS_QMOPT_DOWARN, "dqflush (incore copy)"); 1008 - if (error) { 1059 + fa = xfs_dquot_verify(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0, 0); 1060 + if (fa) { 1061 + xfs_alert(mp, "corrupt dquot ID 0x%x in memory at %pS", 1062 + be32_to_cpu(ddqp->d_id), fa); 1009 1063 xfs_buf_relse(bp); 1010 1064 xfs_dqfunlock(dqp); 1011 1065 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);

+3 -6

fs/xfs/xfs_dquot_item.c

··· 150 150 struct xfs_log_item *lip, 151 151 struct xfs_buf *bp) 152 152 { 153 - struct xfs_dquot *dqp; 154 - 155 - dqp = DQUOT_ITEM(lip)->qli_dquot; 156 - ASSERT(!completion_done(&dqp->q_flush)); 153 + ASSERT(!completion_done(&DQUOT_ITEM(lip)->qli_dquot->q_flush)); 157 154 xfs_set_li_failed(lip, bp); 158 155 } 159 156 ··· 176 179 if (!xfs_buf_trylock(bp)) 177 180 return XFS_ITEM_LOCKED; 178 181 179 - if (!xfs_buf_resubmit_failed_buffers(bp, lip, buffer_list)) 182 + if (!xfs_buf_resubmit_failed_buffers(bp, buffer_list)) 180 183 rval = XFS_ITEM_FLUSHING; 181 184 182 185 xfs_buf_unlock(bp); ··· 209 212 210 213 error = xfs_qm_dqflush(dqp, &bp); 211 214 if (error) { 212 - xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p", 215 + xfs_warn(dqp->q_mount, "%s: push error %d on dqp "PTR_FMT, 213 216 __func__, error, dqp); 214 217 } else { 215 218 if (!xfs_buf_delwri_queue(bp, buffer_list))

+54 -10

fs/xfs/xfs_error.c

··· 24 24 #include "xfs_errortag.h" 25 25 #include "xfs_error.h" 26 26 #include "xfs_sysfs.h" 27 + #include "xfs_inode.h" 27 28 28 29 #ifdef DEBUG 29 30 ··· 315 314 struct xfs_mount *mp, 316 315 const char *filename, 317 316 int linenum, 318 - void *ra) 317 + xfs_failaddr_t failaddr) 319 318 { 320 319 if (level <= xfs_error_level) { 321 320 xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT, 322 321 "Internal error %s at line %d of file %s. Caller %pS", 323 - tag, linenum, filename, ra); 322 + tag, linenum, filename, failaddr); 324 323 325 324 xfs_stack_trace(); 326 325 } ··· 334 333 void *p, 335 334 const char *filename, 336 335 int linenum, 337 - void *ra) 336 + xfs_failaddr_t failaddr) 338 337 { 339 338 if (level <= xfs_error_level) 340 - xfs_hex_dump(p, 64); 341 - xfs_error_report(tag, level, mp, filename, linenum, ra); 339 + xfs_hex_dump(p, XFS_CORRUPTION_DUMP_LEN); 340 + xfs_error_report(tag, level, mp, filename, linenum, failaddr); 342 341 xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair"); 343 342 } 344 343 ··· 348 347 */ 349 348 void 350 349 xfs_verifier_error( 351 - struct xfs_buf *bp) 350 + struct xfs_buf *bp, 351 + int error, 352 + xfs_failaddr_t failaddr) 352 353 { 353 - struct xfs_mount *mp = bp->b_target->bt_mount; 354 + struct xfs_mount *mp = bp->b_target->bt_mount; 355 + xfs_failaddr_t fa; 356 + 357 + fa = failaddr ? failaddr : __return_address; 358 + __xfs_buf_ioerror(bp, error, fa); 354 359 355 360 xfs_alert(mp, "Metadata %s detected at %pS, %s block 0x%llx", 356 361 bp->b_error == -EFSBADCRC ? "CRC error" : "corruption", 357 - __return_address, bp->b_ops->name, bp->b_bn); 362 + fa, bp->b_ops->name, bp->b_bn); 358 363 359 364 xfs_alert(mp, "Unmount and run xfs_repair"); 360 365 361 366 if (xfs_error_level >= XFS_ERRLEVEL_LOW) { 362 - xfs_alert(mp, "First 64 bytes of corrupted metadata buffer:"); 363 - xfs_hex_dump(xfs_buf_offset(bp, 0), 64); 367 + xfs_alert(mp, "First %d bytes of corrupted metadata buffer:", 368 + XFS_CORRUPTION_DUMP_LEN); 369 + xfs_hex_dump(xfs_buf_offset(bp, 0), XFS_CORRUPTION_DUMP_LEN); 370 + } 371 + 372 + if (xfs_error_level >= XFS_ERRLEVEL_HIGH) 373 + xfs_stack_trace(); 374 + } 375 + 376 + /* 377 + * Warnings for inode corruption problems. Don't bother with the stack 378 + * trace unless the error level is turned up high. 379 + */ 380 + void 381 + xfs_inode_verifier_error( 382 + struct xfs_inode *ip, 383 + int error, 384 + const char *name, 385 + void *buf, 386 + size_t bufsz, 387 + xfs_failaddr_t failaddr) 388 + { 389 + struct xfs_mount *mp = ip->i_mount; 390 + xfs_failaddr_t fa; 391 + int sz; 392 + 393 + fa = failaddr ? failaddr : __return_address; 394 + 395 + xfs_alert(mp, "Metadata %s detected at %pS, inode 0x%llx %s", 396 + error == -EFSBADCRC ? "CRC error" : "corruption", 397 + fa, ip->i_ino, name); 398 + 399 + xfs_alert(mp, "Unmount and run xfs_repair"); 400 + 401 + if (buf && xfs_error_level >= XFS_ERRLEVEL_LOW) { 402 + sz = min_t(size_t, XFS_CORRUPTION_DUMP_LEN, bufsz); 403 + xfs_alert(mp, "First %d bytes of corrupted metadata buffer:", 404 + sz); 405 + xfs_hex_dump(buf, sz); 364 406 } 365 407 366 408 if (xfs_error_level >= XFS_ERRLEVEL_HIGH)

+11 -3

fs/xfs/xfs_error.h

··· 21 21 struct xfs_mount; 22 22 23 23 extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp, 24 - const char *filename, int linenum, void *ra); 24 + const char *filename, int linenum, 25 + xfs_failaddr_t failaddr); 25 26 extern void xfs_corruption_error(const char *tag, int level, 26 27 struct xfs_mount *mp, void *p, const char *filename, 27 - int linenum, void *ra); 28 - extern void xfs_verifier_error(struct xfs_buf *bp); 28 + int linenum, xfs_failaddr_t failaddr); 29 + extern void xfs_verifier_error(struct xfs_buf *bp, int error, 30 + xfs_failaddr_t failaddr); 31 + extern void xfs_inode_verifier_error(struct xfs_inode *ip, int error, 32 + const char *name, void *buf, size_t bufsz, 33 + xfs_failaddr_t failaddr); 29 34 30 35 #define XFS_ERROR_REPORT(e, lvl, mp) \ 31 36 xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address) ··· 41 36 #define XFS_ERRLEVEL_OFF 0 42 37 #define XFS_ERRLEVEL_LOW 1 43 38 #define XFS_ERRLEVEL_HIGH 5 39 + 40 + /* Dump 128 bytes of any corrupt buffer */ 41 + #define XFS_CORRUPTION_DUMP_LEN (128) 44 42 45 43 /* 46 44 * Macros to set EFSCORRUPTED & return/branch.

+1 -78

fs/xfs/xfs_fsops.c

··· 49 49 * File system operations 50 50 */ 51 51 52 - int 53 - xfs_fs_geometry( 54 - xfs_mount_t *mp, 55 - xfs_fsop_geom_t *geo, 56 - int new_version) 57 - { 58 - 59 - memset(geo, 0, sizeof(*geo)); 60 - 61 - geo->blocksize = mp->m_sb.sb_blocksize; 62 - geo->rtextsize = mp->m_sb.sb_rextsize; 63 - geo->agblocks = mp->m_sb.sb_agblocks; 64 - geo->agcount = mp->m_sb.sb_agcount; 65 - geo->logblocks = mp->m_sb.sb_logblocks; 66 - geo->sectsize = mp->m_sb.sb_sectsize; 67 - geo->inodesize = mp->m_sb.sb_inodesize; 68 - geo->imaxpct = mp->m_sb.sb_imax_pct; 69 - geo->datablocks = mp->m_sb.sb_dblocks; 70 - geo->rtblocks = mp->m_sb.sb_rblocks; 71 - geo->rtextents = mp->m_sb.sb_rextents; 72 - geo->logstart = mp->m_sb.sb_logstart; 73 - ASSERT(sizeof(geo->uuid)==sizeof(mp->m_sb.sb_uuid)); 74 - memcpy(geo->uuid, &mp->m_sb.sb_uuid, sizeof(mp->m_sb.sb_uuid)); 75 - if (new_version >= 2) { 76 - geo->sunit = mp->m_sb.sb_unit; 77 - geo->swidth = mp->m_sb.sb_width; 78 - } 79 - if (new_version >= 3) { 80 - geo->version = XFS_FSOP_GEOM_VERSION; 81 - geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK | 82 - XFS_FSOP_GEOM_FLAGS_DIRV2 | 83 - (xfs_sb_version_hasattr(&mp->m_sb) ? 84 - XFS_FSOP_GEOM_FLAGS_ATTR : 0) | 85 - (xfs_sb_version_hasquota(&mp->m_sb) ? 86 - XFS_FSOP_GEOM_FLAGS_QUOTA : 0) | 87 - (xfs_sb_version_hasalign(&mp->m_sb) ? 88 - XFS_FSOP_GEOM_FLAGS_IALIGN : 0) | 89 - (xfs_sb_version_hasdalign(&mp->m_sb) ? 90 - XFS_FSOP_GEOM_FLAGS_DALIGN : 0) | 91 - (xfs_sb_version_hasextflgbit(&mp->m_sb) ? 92 - XFS_FSOP_GEOM_FLAGS_EXTFLG : 0) | 93 - (xfs_sb_version_hassector(&mp->m_sb) ? 94 - XFS_FSOP_GEOM_FLAGS_SECTOR : 0) | 95 - (xfs_sb_version_hasasciici(&mp->m_sb) ? 96 - XFS_FSOP_GEOM_FLAGS_DIRV2CI : 0) | 97 - (xfs_sb_version_haslazysbcount(&mp->m_sb) ? 98 - XFS_FSOP_GEOM_FLAGS_LAZYSB : 0) | 99 - (xfs_sb_version_hasattr2(&mp->m_sb) ? 100 - XFS_FSOP_GEOM_FLAGS_ATTR2 : 0) | 101 - (xfs_sb_version_hasprojid32bit(&mp->m_sb) ? 102 - XFS_FSOP_GEOM_FLAGS_PROJID32 : 0) | 103 - (xfs_sb_version_hascrc(&mp->m_sb) ? 104 - XFS_FSOP_GEOM_FLAGS_V5SB : 0) | 105 - (xfs_sb_version_hasftype(&mp->m_sb) ? 106 - XFS_FSOP_GEOM_FLAGS_FTYPE : 0) | 107 - (xfs_sb_version_hasfinobt(&mp->m_sb) ? 108 - XFS_FSOP_GEOM_FLAGS_FINOBT : 0) | 109 - (xfs_sb_version_hassparseinodes(&mp->m_sb) ? 110 - XFS_FSOP_GEOM_FLAGS_SPINODES : 0) | 111 - (xfs_sb_version_hasrmapbt(&mp->m_sb) ? 112 - XFS_FSOP_GEOM_FLAGS_RMAPBT : 0) | 113 - (xfs_sb_version_hasreflink(&mp->m_sb) ? 114 - XFS_FSOP_GEOM_FLAGS_REFLINK : 0); 115 - geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ? 116 - mp->m_sb.sb_logsectsize : BBSIZE; 117 - geo->rtsectsize = mp->m_sb.sb_blocksize; 118 - geo->dirblocksize = mp->m_dir_geo->blksize; 119 - } 120 - if (new_version >= 4) { 121 - geo->flags |= 122 - (xfs_sb_version_haslogv2(&mp->m_sb) ? 123 - XFS_FSOP_GEOM_FLAGS_LOGV2 : 0); 124 - geo->logsunit = mp->m_sb.sb_logsunit; 125 - } 126 - return 0; 127 - } 128 - 129 52 static struct xfs_buf * 130 53 xfs_growfs_get_hdr_buf( 131 54 struct xfs_mount *mp, ··· 878 955 879 956 if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { 880 957 xfs_notice(mp, 881 - "%s(0x%x) called from line %d of file %s. Return address = 0x%p", 958 + "%s(0x%x) called from line %d of file %s. Return address = "PTR_FMT, 882 959 __func__, flags, lnnum, fname, __return_address); 883 960 } 884 961 /*

-1

fs/xfs/xfs_fsops.h

··· 18 18 #ifndef __XFS_FSOPS_H__ 19 19 #define __XFS_FSOPS_H__ 20 20 21 - extern int xfs_fs_geometry(xfs_mount_t *mp, xfs_fsop_geom_t *geo, int nversion); 22 21 extern int xfs_growfs_data(xfs_mount_t *mp, xfs_growfs_data_t *in); 23 22 extern int xfs_growfs_log(xfs_mount_t *mp, xfs_growfs_log_t *in); 24 23 extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt);

+50 -22

fs/xfs/xfs_icache.c

··· 296 296 uint32_t generation = inode->i_generation; 297 297 uint64_t version = inode_peek_iversion(inode); 298 298 umode_t mode = inode->i_mode; 299 + dev_t dev = inode->i_rdev; 299 300 300 301 error = inode_init_always(mp->m_super, inode); 301 302 ··· 304 303 inode->i_generation = generation; 305 304 inode_set_iversion_queried(inode, version); 306 305 inode->i_mode = mode; 306 + inode->i_rdev = dev; 307 307 return error; 308 308 } 309 309 ··· 475 473 error = xfs_iread(mp, tp, ip, flags); 476 474 if (error) 477 475 goto out_destroy; 476 + 477 + if (!xfs_inode_verify_forks(ip)) { 478 + error = -EFSCORRUPTED; 479 + goto out_destroy; 480 + } 478 481 479 482 trace_xfs_iget_miss(ip); 480 483 ··· 1658 1651 } 1659 1652 1660 1653 /* 1654 + * Set ourselves up to free CoW blocks from this file. If it's already clean 1655 + * then we can bail out quickly, but otherwise we must back off if the file 1656 + * is undergoing some kind of write. 1657 + */ 1658 + static bool 1659 + xfs_prep_free_cowblocks( 1660 + struct xfs_inode *ip, 1661 + struct xfs_ifork *ifp) 1662 + { 1663 + /* 1664 + * Just clear the tag if we have an empty cow fork or none at all. It's 1665 + * possible the inode was fully unshared since it was originally tagged. 1666 + */ 1667 + if (!xfs_is_reflink_inode(ip) || !ifp->if_bytes) { 1668 + trace_xfs_inode_free_cowblocks_invalid(ip); 1669 + xfs_inode_clear_cowblocks_tag(ip); 1670 + return false; 1671 + } 1672 + 1673 + /* 1674 + * If the mapping is dirty or under writeback we cannot touch the 1675 + * CoW fork. Leave it alone if we're in the midst of a directio. 1676 + */ 1677 + if ((VFS_I(ip)->i_state & I_DIRTY_PAGES) || 1678 + mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) || 1679 + mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) || 1680 + atomic_read(&VFS_I(ip)->i_dio_count)) 1681 + return false; 1682 + 1683 + return true; 1684 + } 1685 + 1686 + /* 1661 1687 * Automatic CoW Reservation Freeing 1662 1688 * 1663 1689 * These functions automatically garbage collect leftover CoW reservations ··· 1708 1668 int flags, 1709 1669 void *args) 1710 1670 { 1711 - int ret; 1712 - struct xfs_eofblocks *eofb = args; 1713 - int match; 1671 + struct xfs_eofblocks *eofb = args; 1714 1672 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); 1673 + int match; 1674 + int ret = 0; 1715 1675 1716 - /* 1717 - * Just clear the tag if we have an empty cow fork or none at all. It's 1718 - * possible the inode was fully unshared since it was originally tagged. 1719 - */ 1720 - if (!xfs_is_reflink_inode(ip) || !ifp->if_bytes) { 1721 - trace_xfs_inode_free_cowblocks_invalid(ip); 1722 - xfs_inode_clear_cowblocks_tag(ip); 1723 - return 0; 1724 - } 1725 - 1726 - /* 1727 - * If the mapping is dirty or under writeback we cannot touch the 1728 - * CoW fork. Leave it alone if we're in the midst of a directio. 1729 - */ 1730 - if ((VFS_I(ip)->i_state & I_DIRTY_PAGES) || 1731 - mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) || 1732 - mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) || 1733 - atomic_read(&VFS_I(ip)->i_dio_count)) 1676 + if (!xfs_prep_free_cowblocks(ip, ifp)) 1734 1677 return 0; 1735 1678 1736 1679 if (eofb) { ··· 1734 1711 xfs_ilock(ip, XFS_IOLOCK_EXCL); 1735 1712 xfs_ilock(ip, XFS_MMAPLOCK_EXCL); 1736 1713 1737 - ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false); 1714 + /* 1715 + * Check again, nobody else should be able to dirty blocks or change 1716 + * the reflink iflag now that we have the first two locks held. 1717 + */ 1718 + if (xfs_prep_free_cowblocks(ip, ifp)) 1719 + ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false); 1738 1720 1739 1721 xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); 1740 1722 xfs_iunlock(ip, XFS_IOLOCK_EXCL);

+73 -31

fs/xfs/xfs_inode.c

··· 547 547 548 548 /* 549 549 * xfs_lock_two_inodes() can only be used to lock one type of lock at a time - 550 - * the iolock, the mmaplock or the ilock, but not more than one at a time. If we 551 - * lock more than one at a time, lockdep will report false positives saying we 552 - * have violated locking orders. 550 + * the mmaplock or the ilock, but not more than one type at a time. If we lock 551 + * more than one at a time, lockdep will report false positives saying we have 552 + * violated locking orders. The iolock must be double-locked separately since 553 + * we use i_rwsem for that. We now support taking one lock EXCL and the other 554 + * SHARED. 553 555 */ 554 556 void 555 557 xfs_lock_two_inodes( 556 - xfs_inode_t *ip0, 557 - xfs_inode_t *ip1, 558 - uint lock_mode) 558 + struct xfs_inode *ip0, 559 + uint ip0_mode, 560 + struct xfs_inode *ip1, 561 + uint ip1_mode) 559 562 { 560 - xfs_inode_t *temp; 563 + struct xfs_inode *temp; 564 + uint mode_temp; 561 565 int attempts = 0; 562 566 xfs_log_item_t *lp; 563 567 564 - ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))); 565 - if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) 566 - ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); 568 + ASSERT(hweight32(ip0_mode) == 1); 569 + ASSERT(hweight32(ip1_mode) == 1); 570 + ASSERT(!(ip0_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))); 571 + ASSERT(!(ip1_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))); 572 + ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) || 573 + !(ip0_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); 574 + ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) || 575 + !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); 576 + ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) || 577 + !(ip0_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); 578 + ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) || 579 + !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); 567 580 568 581 ASSERT(ip0->i_ino != ip1->i_ino); 569 582 ··· 584 571 temp = ip0; 585 572 ip0 = ip1; 586 573 ip1 = temp; 574 + mode_temp = ip0_mode; 575 + ip0_mode = ip1_mode; 576 + ip1_mode = mode_temp; 587 577 } 588 578 589 579 again: 590 - xfs_ilock(ip0, xfs_lock_inumorder(lock_mode, 0)); 580 + xfs_ilock(ip0, xfs_lock_inumorder(ip0_mode, 0)); 591 581 592 582 /* 593 583 * If the first lock we have locked is in the AIL, we must TRY to get ··· 599 583 */ 600 584 lp = (xfs_log_item_t *)ip0->i_itemp; 601 585 if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { 602 - if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(lock_mode, 1))) { 603 - xfs_iunlock(ip0, lock_mode); 586 + if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(ip1_mode, 1))) { 587 + xfs_iunlock(ip0, ip0_mode); 604 588 if ((++attempts % 5) == 0) 605 589 delay(1); /* Don't just spin the CPU */ 606 590 goto again; 607 591 } 608 592 } else { 609 - xfs_ilock(ip1, xfs_lock_inumorder(lock_mode, 1)); 593 + xfs_ilock(ip1, xfs_lock_inumorder(ip1_mode, 1)); 610 594 } 611 595 } 612 - 613 596 614 597 void 615 598 __xfs_iflock( ··· 1437 1422 if (error) 1438 1423 goto std_return; 1439 1424 1440 - xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); 1425 + xfs_lock_two_inodes(sip, XFS_ILOCK_EXCL, tdp, XFS_ILOCK_EXCL); 1441 1426 1442 1427 xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); 1443 1428 xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); ··· 2230 2215 xfs_buf_t *bp; 2231 2216 xfs_inode_t *ip; 2232 2217 xfs_inode_log_item_t *iip; 2233 - xfs_log_item_t *lip; 2218 + struct xfs_log_item *lip; 2234 2219 struct xfs_perag *pag; 2235 2220 xfs_ino_t inum; 2236 2221 ··· 2288 2273 * stale first, we will not attempt to lock them in the loop 2289 2274 * below as the XFS_ISTALE flag will be set. 2290 2275 */ 2291 - lip = bp->b_fspriv; 2292 - while (lip) { 2276 + list_for_each_entry(lip, &bp->b_li_list, li_bio_list) { 2293 2277 if (lip->li_type == XFS_LI_INODE) { 2294 2278 iip = (xfs_inode_log_item_t *)lip; 2295 2279 ASSERT(iip->ili_logged == 1); ··· 2298 2284 &iip->ili_item.li_lsn); 2299 2285 xfs_iflags_set(iip->ili_inode, XFS_ISTALE); 2300 2286 } 2301 - lip = lip->li_bio_list; 2302 2287 } 2303 2288 2304 2289 ··· 2465 2452 2466 2453 VFS_I(ip)->i_mode = 0; /* mark incore inode as free */ 2467 2454 ip->i_d.di_flags = 0; 2455 + ip->i_d.di_flags2 = 0; 2468 2456 ip->i_d.di_dmevmask = 0; 2469 2457 ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */ 2470 2458 ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; ··· 2601 2587 goto std_return; 2602 2588 } 2603 2589 2604 - xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL); 2590 + xfs_lock_two_inodes(dp, XFS_ILOCK_EXCL, ip, XFS_ILOCK_EXCL); 2605 2591 2606 2592 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 2607 2593 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); ··· 3494 3480 return error; 3495 3481 } 3496 3482 3483 + /* 3484 + * If there are inline format data / attr forks attached to this inode, 3485 + * make sure they're not corrupt. 3486 + */ 3487 + bool 3488 + xfs_inode_verify_forks( 3489 + struct xfs_inode *ip) 3490 + { 3491 + struct xfs_ifork *ifp; 3492 + xfs_failaddr_t fa; 3493 + 3494 + fa = xfs_ifork_verify_data(ip, &xfs_default_ifork_ops); 3495 + if (fa) { 3496 + ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); 3497 + xfs_inode_verifier_error(ip, -EFSCORRUPTED, "data fork", 3498 + ifp->if_u1.if_data, ifp->if_bytes, fa); 3499 + return false; 3500 + } 3501 + 3502 + fa = xfs_ifork_verify_attr(ip, &xfs_default_ifork_ops); 3503 + if (fa) { 3504 + ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK); 3505 + xfs_inode_verifier_error(ip, -EFSCORRUPTED, "attr fork", 3506 + ifp ? ifp->if_u1.if_data : NULL, 3507 + ifp ? ifp->if_bytes : 0, fa); 3508 + return false; 3509 + } 3510 + return true; 3511 + } 3512 + 3497 3513 STATIC int 3498 3514 xfs_iflush_int( 3499 3515 struct xfs_inode *ip, ··· 3546 3502 if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC), 3547 3503 mp, XFS_ERRTAG_IFLUSH_1)) { 3548 3504 xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 3549 - "%s: Bad inode %Lu magic number 0x%x, ptr 0x%p", 3505 + "%s: Bad inode %Lu magic number 0x%x, ptr "PTR_FMT, 3550 3506 __func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip); 3551 3507 goto corrupt_out; 3552 3508 } ··· 3556 3512 (ip->i_d.di_format != XFS_DINODE_FMT_BTREE), 3557 3513 mp, XFS_ERRTAG_IFLUSH_3)) { 3558 3514 xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 3559 - "%s: Bad regular inode %Lu, ptr 0x%p", 3515 + "%s: Bad regular inode %Lu, ptr "PTR_FMT, 3560 3516 __func__, ip->i_ino, ip); 3561 3517 goto corrupt_out; 3562 3518 } ··· 3567 3523 (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL), 3568 3524 mp, XFS_ERRTAG_IFLUSH_4)) { 3569 3525 xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 3570 - "%s: Bad directory inode %Lu, ptr 0x%p", 3526 + "%s: Bad directory inode %Lu, ptr "PTR_FMT, 3571 3527 __func__, ip->i_ino, ip); 3572 3528 goto corrupt_out; 3573 3529 } ··· 3576 3532 ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5)) { 3577 3533 xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 3578 3534 "%s: detected corrupt incore inode %Lu, " 3579 - "total extents = %d, nblocks = %Ld, ptr 0x%p", 3535 + "total extents = %d, nblocks = %Ld, ptr "PTR_FMT, 3580 3536 __func__, ip->i_ino, 3581 3537 ip->i_d.di_nextents + ip->i_d.di_anextents, 3582 3538 ip->i_d.di_nblocks, ip); ··· 3585 3541 if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize, 3586 3542 mp, XFS_ERRTAG_IFLUSH_6)) { 3587 3543 xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 3588 - "%s: bad inode %Lu, forkoff 0x%x, ptr 0x%p", 3544 + "%s: bad inode %Lu, forkoff 0x%x, ptr "PTR_FMT, 3589 3545 __func__, ip->i_ino, ip->i_d.di_forkoff, ip); 3590 3546 goto corrupt_out; 3591 3547 } ··· 3602 3558 if (ip->i_d.di_version < 3) 3603 3559 ip->i_d.di_flushiter++; 3604 3560 3605 - /* Check the inline directory data. */ 3606 - if (S_ISDIR(VFS_I(ip)->i_mode) && 3607 - ip->i_d.di_format == XFS_DINODE_FMT_LOCAL && 3608 - xfs_dir2_sf_verify(ip)) 3561 + /* Check the inline fork data before we write out. */ 3562 + if (!xfs_inode_verify_forks(ip)) 3609 3563 goto corrupt_out; 3610 3564 3611 3565 /* ··· 3666 3624 /* generate the checksum. */ 3667 3625 xfs_dinode_calc_crc(mp, dip); 3668 3626 3669 - ASSERT(bp->b_fspriv != NULL); 3627 + ASSERT(!list_empty(&bp->b_li_list)); 3670 3628 ASSERT(bp->b_iodone != NULL); 3671 3629 return 0; 3672 3630

+4 -1

fs/xfs/xfs_inode.h

··· 423 423 #define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount)) 424 424 425 425 int xfs_iflush(struct xfs_inode *, struct xfs_buf **); 426 - void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); 426 + void xfs_lock_two_inodes(struct xfs_inode *ip0, uint ip0_mode, 427 + struct xfs_inode *ip1, uint ip1_mode); 427 428 428 429 xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip); 429 430 xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip); ··· 491 490 492 491 /* The default CoW extent size hint. */ 493 492 #define XFS_DEFAULT_COWEXTSZ_HINT 32 493 + 494 + bool xfs_inode_verify_forks(struct xfs_inode *ip); 494 495 495 496 #endif /* __XFS_INODE_H__ */

+13 -30

fs/xfs/xfs_inode_item.c

··· 522 522 if (!xfs_buf_trylock(bp)) 523 523 return XFS_ITEM_LOCKED; 524 524 525 - if (!xfs_buf_resubmit_failed_buffers(bp, lip, buffer_list)) 525 + if (!xfs_buf_resubmit_failed_buffers(bp, buffer_list)) 526 526 rval = XFS_ITEM_FLUSHING; 527 527 528 528 xfs_buf_unlock(bp); ··· 713 713 struct xfs_log_item *lip) 714 714 { 715 715 struct xfs_inode_log_item *iip; 716 - struct xfs_log_item *blip; 717 - struct xfs_log_item *next; 718 - struct xfs_log_item *prev; 716 + struct xfs_log_item *blip, *n; 719 717 struct xfs_ail *ailp = lip->li_ailp; 720 718 int need_ail = 0; 719 + LIST_HEAD(tmp); 721 720 722 721 /* 723 722 * Scan the buffer IO completions for other inodes being completed and 724 723 * attach them to the current inode log item. 725 724 */ 726 - blip = bp->b_fspriv; 727 - prev = NULL; 728 - while (blip != NULL) { 729 - if (blip->li_cb != xfs_iflush_done) { 730 - prev = blip; 731 - blip = blip->li_bio_list; 725 + 726 + list_add_tail(&lip->li_bio_list, &tmp); 727 + 728 + list_for_each_entry_safe(blip, n, &bp->b_li_list, li_bio_list) { 729 + if (lip->li_cb != xfs_iflush_done) 732 730 continue; 733 - } 734 731 735 - /* remove from list */ 736 - next = blip->li_bio_list; 737 - if (!prev) { 738 - bp->b_fspriv = next; 739 - } else { 740 - prev->li_bio_list = next; 741 - } 742 - 743 - /* add to current list */ 744 - blip->li_bio_list = lip->li_bio_list; 745 - lip->li_bio_list = blip; 746 - 732 + list_move_tail(&blip->li_bio_list, &tmp); 747 733 /* 748 734 * while we have the item, do the unlocked check for needing 749 735 * the AIL lock. ··· 738 752 if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) || 739 753 (blip->li_flags & XFS_LI_FAILED)) 740 754 need_ail++; 741 - 742 - blip = next; 743 755 } 744 756 745 757 /* make sure we capture the state of the initial inode. */ ··· 760 776 761 777 /* this is an opencoded batch version of xfs_trans_ail_delete */ 762 778 spin_lock(&ailp->xa_lock); 763 - for (blip = lip; blip; blip = blip->li_bio_list) { 779 + list_for_each_entry(blip, &tmp, li_bio_list) { 764 780 if (INODE_ITEM(blip)->ili_logged && 765 781 blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn) 766 782 mlip_changed |= xfs_ail_delete_one(ailp, blip); ··· 786 802 * ili_last_fields bits now that we know that the data corresponding to 787 803 * them is safely on disk. 788 804 */ 789 - for (blip = lip; blip; blip = next) { 790 - next = blip->li_bio_list; 791 - blip->li_bio_list = NULL; 792 - 805 + list_for_each_entry_safe(blip, n, &tmp, li_bio_list) { 806 + list_del_init(&blip->li_bio_list); 793 807 iip = INODE_ITEM(blip); 794 808 iip->ili_logged = 0; 795 809 iip->ili_last_fields = 0; 796 810 xfs_ifunlock(iip->ili_inode); 797 811 } 812 + list_del(&tmp); 798 813 } 799 814 800 815 /*

+3 -2

fs/xfs/xfs_ioctl.c

··· 45 45 #include <linux/fsmap.h> 46 46 #include "xfs_fsmap.h" 47 47 #include "scrub/xfs_scrub.h" 48 + #include "xfs_sb.h" 48 49 49 50 #include <linux/capability.h> 50 51 #include <linux/cred.h> ··· 810 809 xfs_fsop_geom_t fsgeo; 811 810 int error; 812 811 813 - error = xfs_fs_geometry(mp, &fsgeo, 3); 812 + error = xfs_fs_geometry(&mp->m_sb, &fsgeo, 3); 814 813 if (error) 815 814 return error; 816 815 ··· 832 831 xfs_fsop_geom_t fsgeo; 833 832 int error; 834 833 835 - error = xfs_fs_geometry(mp, &fsgeo, 4); 834 + error = xfs_fs_geometry(&mp->m_sb, &fsgeo, 4); 836 835 if (error) 837 836 return error; 838 837

+2 -1

fs/xfs/xfs_ioctl32.c

··· 37 37 #include "xfs_ioctl.h" 38 38 #include "xfs_ioctl32.h" 39 39 #include "xfs_trace.h" 40 + #include "xfs_sb.h" 40 41 41 42 #define _NATIVE_IOC(cmd, type) \ 42 43 _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type)) ··· 67 66 xfs_fsop_geom_t fsgeo; 68 67 int error; 69 68 70 - error = xfs_fs_geometry(mp, &fsgeo, 3); 69 + error = xfs_fs_geometry(&mp->m_sb, &fsgeo, 3); 71 70 if (error) 72 71 return error; 73 72 /* The 32-bit variant simply has some padding at the end */

+14

fs/xfs/xfs_linux.h

··· 285 285 #define XFS_IS_REALTIME_INODE(ip) \ 286 286 (((ip)->i_d.di_flags & XFS_DIFLAG_REALTIME) && \ 287 287 (ip)->i_mount->m_rtdev_targp) 288 + #define XFS_IS_REALTIME_MOUNT(mp) ((mp)->m_rtdev_targp ? 1 : 0) 288 289 #else 289 290 #define XFS_IS_REALTIME_INODE(ip) (0) 291 + #define XFS_IS_REALTIME_MOUNT(mp) (0) 292 + #endif 293 + 294 + /* 295 + * Starting in Linux 4.15, the %p (raw pointer value) printk modifier 296 + * prints a hashed version of the pointer to avoid leaking kernel 297 + * pointers into dmesg. If we're trying to debug the kernel we want the 298 + * raw values, so override this behavior as best we can. 299 + */ 300 + #ifdef DEBUG 301 + # define PTR_FMT "%px" 302 + #else 303 + # define PTR_FMT "%p" 290 304 #endif 291 305 292 306 #endif /* __XFS_LINUX__ */

+10 -7

fs/xfs/xfs_log.c

··· 1047 1047 1048 1048 INIT_LIST_HEAD(&item->li_ail); 1049 1049 INIT_LIST_HEAD(&item->li_cil); 1050 + INIT_LIST_HEAD(&item->li_bio_list); 1050 1051 } 1051 1052 1052 1053 /* ··· 1243 1242 static void 1244 1243 xlog_iodone(xfs_buf_t *bp) 1245 1244 { 1246 - struct xlog_in_core *iclog = bp->b_fspriv; 1245 + struct xlog_in_core *iclog = bp->b_log_item; 1247 1246 struct xlog *l = iclog->ic_log; 1248 1247 int aborted = 0; 1249 1248 ··· 1774 1773 xlog_bdstrat( 1775 1774 struct xfs_buf *bp) 1776 1775 { 1777 - struct xlog_in_core *iclog = bp->b_fspriv; 1776 + struct xlog_in_core *iclog = bp->b_log_item; 1778 1777 1779 1778 xfs_buf_lock(bp); 1780 1779 if (iclog->ic_state & XLOG_STATE_IOERROR) { ··· 1920 1919 } 1921 1920 1922 1921 bp->b_io_length = BTOBB(count); 1923 - bp->b_fspriv = iclog; 1922 + bp->b_log_item = iclog; 1924 1923 bp->b_flags &= ~XBF_FLUSH; 1925 1924 bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA); 1926 1925 ··· 1959 1958 XFS_BUF_SET_ADDR(bp, 0); /* logical 0 */ 1960 1959 xfs_buf_associate_memory(bp, 1961 1960 (char *)&iclog->ic_header + count, split); 1962 - bp->b_fspriv = iclog; 1961 + bp->b_log_item = iclog; 1963 1962 bp->b_flags &= ~XBF_FLUSH; 1964 1963 bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA); 1965 1964 ··· 2118 2117 2119 2118 /* dump core transaction and ticket info */ 2120 2119 xfs_warn(mp, "transaction summary:"); 2121 - xfs_warn(mp, " flags = 0x%x", tp->t_flags); 2120 + xfs_warn(mp, " log res = %d", tp->t_log_res); 2121 + xfs_warn(mp, " log count = %d", tp->t_log_count); 2122 + xfs_warn(mp, " flags = 0x%x", tp->t_flags); 2122 2123 2123 2124 xlog_print_tic_res(mp, tp->t_ticket); 2124 2125 ··· 2245 2242 break; 2246 2243 default: 2247 2244 xfs_warn(log->l_mp, 2248 - "Bad XFS transaction clientid 0x%x in ticket 0x%p", 2245 + "Bad XFS transaction clientid 0x%x in ticket "PTR_FMT, 2249 2246 ophdr->oh_clientid, ticket); 2250 2247 return NULL; 2251 2248 } ··· 3927 3924 } 3928 3925 if (clientid != XFS_TRANSACTION && clientid != XFS_LOG) 3929 3926 xfs_warn(log->l_mp, 3930 - "%s: invalid clientid %d op 0x%p offset 0x%lx", 3927 + "%s: invalid clientid %d op "PTR_FMT" offset 0x%lx", 3931 3928 __func__, clientid, ophead, 3932 3929 (unsigned long)field_offset); 3933 3930

+34 -24

fs/xfs/xfs_log_recover.c

··· 400 400 * On v5 supers, a bli could be attached to update the metadata LSN. 401 401 * Clean it up. 402 402 */ 403 - if (bp->b_fspriv) 403 + if (bp->b_log_item) 404 404 xfs_buf_item_relse(bp); 405 - ASSERT(bp->b_fspriv == NULL); 405 + ASSERT(bp->b_log_item == NULL); 406 406 407 407 bp->b_iodone = NULL; 408 408 xfs_buf_ioend(bp); ··· 2218 2218 next_unlinked_offset - reg_buf_offset; 2219 2219 if (unlikely(*logged_nextp == 0)) { 2220 2220 xfs_alert(mp, 2221 - "Bad inode buffer log record (ptr = 0x%p, bp = 0x%p). " 2221 + "Bad inode buffer log record (ptr = "PTR_FMT", bp = "PTR_FMT"). " 2222 2222 "Trying to replay bad (0) inode di_next_unlinked field.", 2223 2223 item, bp); 2224 2224 XFS_ERROR_REPORT("xlog_recover_do_inode_buf", ··· 2630 2630 ASSERT(!bp->b_iodone || bp->b_iodone == xlog_recover_iodone); 2631 2631 bp->b_iodone = xlog_recover_iodone; 2632 2632 xfs_buf_item_init(bp, mp); 2633 - bip = bp->b_fspriv; 2633 + bip = bp->b_log_item; 2634 2634 bip->bli_item.li_lsn = current_lsn; 2635 2635 } 2636 2636 } ··· 2652 2652 int i; 2653 2653 int bit; 2654 2654 int nbits; 2655 - int error; 2655 + xfs_failaddr_t fa; 2656 2656 2657 2657 trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f); 2658 2658 ··· 2687 2687 * the first dquot in the buffer should do. XXXThis is 2688 2688 * probably a good thing to do for other buf types also. 2689 2689 */ 2690 - error = 0; 2690 + fa = NULL; 2691 2691 if (buf_f->blf_flags & 2692 2692 (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { 2693 2693 if (item->ri_buf[i].i_addr == NULL) { ··· 2701 2701 item->ri_buf[i].i_len, __func__); 2702 2702 goto next; 2703 2703 } 2704 - error = xfs_dqcheck(mp, item->ri_buf[i].i_addr, 2705 - -1, 0, XFS_QMOPT_DOWARN, 2706 - "dquot_buf_recover"); 2707 - if (error) 2704 + fa = xfs_dquot_verify(mp, item->ri_buf[i].i_addr, 2705 + -1, 0, 0); 2706 + if (fa) { 2707 + xfs_alert(mp, 2708 + "dquot corrupt at %pS trying to replay into block 0x%llx", 2709 + fa, bp->b_bn); 2708 2710 goto next; 2711 + } 2709 2712 } 2710 2713 2711 2714 memcpy(xfs_buf_offset(bp, ··· 2960 2957 if (error) 2961 2958 goto out_free_ip; 2962 2959 2960 + if (!xfs_inode_verify_forks(ip)) { 2961 + error = -EFSCORRUPTED; 2962 + goto out_free_ip; 2963 + } 2963 2964 2964 2965 if (in_f->ilf_fields & XFS_ILOG_DOWNER) { 2965 2966 ASSERT(in_f->ilf_fields & XFS_ILOG_DBROOT); ··· 3049 3042 */ 3050 3043 if (unlikely(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))) { 3051 3044 xfs_alert(mp, 3052 - "%s: Bad inode magic number, dip = 0x%p, dino bp = 0x%p, ino = %Ld", 3045 + "%s: Bad inode magic number, dip = "PTR_FMT", dino bp = "PTR_FMT", ino = %Ld", 3053 3046 __func__, dip, bp, in_f->ilf_ino); 3054 3047 XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)", 3055 3048 XFS_ERRLEVEL_LOW, mp); ··· 3059 3052 ldip = item->ri_buf[1].i_addr; 3060 3053 if (unlikely(ldip->di_magic != XFS_DINODE_MAGIC)) { 3061 3054 xfs_alert(mp, 3062 - "%s: Bad inode log record, rec ptr 0x%p, ino %Ld", 3055 + "%s: Bad inode log record, rec ptr "PTR_FMT", ino %Ld", 3063 3056 __func__, item, in_f->ilf_ino); 3064 3057 XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)", 3065 3058 XFS_ERRLEVEL_LOW, mp); ··· 3117 3110 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)", 3118 3111 XFS_ERRLEVEL_LOW, mp, ldip); 3119 3112 xfs_alert(mp, 3120 - "%s: Bad regular inode log record, rec ptr 0x%p, " 3121 - "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", 3113 + "%s: Bad regular inode log record, rec ptr "PTR_FMT", " 3114 + "ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld", 3122 3115 __func__, item, dip, bp, in_f->ilf_ino); 3123 3116 error = -EFSCORRUPTED; 3124 3117 goto out_release; ··· 3130 3123 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)", 3131 3124 XFS_ERRLEVEL_LOW, mp, ldip); 3132 3125 xfs_alert(mp, 3133 - "%s: Bad dir inode log record, rec ptr 0x%p, " 3134 - "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", 3126 + "%s: Bad dir inode log record, rec ptr "PTR_FMT", " 3127 + "ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld", 3135 3128 __func__, item, dip, bp, in_f->ilf_ino); 3136 3129 error = -EFSCORRUPTED; 3137 3130 goto out_release; ··· 3141 3134 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)", 3142 3135 XFS_ERRLEVEL_LOW, mp, ldip); 3143 3136 xfs_alert(mp, 3144 - "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, " 3145 - "dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld", 3137 + "%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", " 3138 + "dino bp "PTR_FMT", ino %Ld, total extents = %d, nblocks = %Ld", 3146 3139 __func__, item, dip, bp, in_f->ilf_ino, 3147 3140 ldip->di_nextents + ldip->di_anextents, 3148 3141 ldip->di_nblocks); ··· 3153 3146 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)", 3154 3147 XFS_ERRLEVEL_LOW, mp, ldip); 3155 3148 xfs_alert(mp, 3156 - "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, " 3157 - "dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__, 3149 + "%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", " 3150 + "dino bp "PTR_FMT", ino %Ld, forkoff 0x%x", __func__, 3158 3151 item, dip, bp, in_f->ilf_ino, ldip->di_forkoff); 3159 3152 error = -EFSCORRUPTED; 3160 3153 goto out_release; ··· 3164 3157 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)", 3165 3158 XFS_ERRLEVEL_LOW, mp, ldip); 3166 3159 xfs_alert(mp, 3167 - "%s: Bad inode log record length %d, rec ptr 0x%p", 3160 + "%s: Bad inode log record length %d, rec ptr "PTR_FMT, 3168 3161 __func__, item->ri_buf[1].i_len, item); 3169 3162 error = -EFSCORRUPTED; 3170 3163 goto out_release; ··· 3310 3303 xfs_mount_t *mp = log->l_mp; 3311 3304 xfs_buf_t *bp; 3312 3305 struct xfs_disk_dquot *ddq, *recddq; 3306 + xfs_failaddr_t fa; 3313 3307 int error; 3314 3308 xfs_dq_logformat_t *dq_f; 3315 3309 uint type; ··· 3353 3345 */ 3354 3346 dq_f = item->ri_buf[0].i_addr; 3355 3347 ASSERT(dq_f); 3356 - error = xfs_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, 3357 - "xlog_recover_dquot_pass2 (log copy)"); 3358 - if (error) 3348 + fa = xfs_dquot_verify(mp, recddq, dq_f->qlf_id, 0, 0); 3349 + if (fa) { 3350 + xfs_alert(mp, "corrupt dquot ID 0x%x in log at %pS", 3351 + dq_f->qlf_id, fa); 3359 3352 return -EIO; 3353 + } 3360 3354 ASSERT(dq_f->qlf_len == 1); 3361 3355 3362 3356 /*

+3

fs/xfs/xfs_mount.c

··· 162 162 ASSERT(pag); 163 163 ASSERT(atomic_read(&pag->pag_ref) == 0); 164 164 xfs_buf_hash_destroy(pag); 165 + mutex_destroy(&pag->pag_ici_reclaim_lock); 165 166 call_rcu(&pag->rcu_head, __xfs_free_perag); 166 167 } 167 168 } ··· 249 248 out_hash_destroy: 250 249 xfs_buf_hash_destroy(pag); 251 250 out_free_pag: 251 + mutex_destroy(&pag->pag_ici_reclaim_lock); 252 252 kmem_free(pag); 253 253 out_unwind_new_pags: 254 254 /* unwind any prior newly initialized pags */ ··· 258 256 if (!pag) 259 257 break; 260 258 xfs_buf_hash_destroy(pag); 259 + mutex_destroy(&pag->pag_ici_reclaim_lock); 261 260 kmem_free(pag); 262 261 } 263 262 return error;

+16 -17

fs/xfs/xfs_qm.c

··· 162 162 */ 163 163 error = xfs_qm_dqflush(dqp, &bp); 164 164 if (error) { 165 - xfs_warn(mp, "%s: dquot %p flush failed", 165 + xfs_warn(mp, "%s: dquot "PTR_FMT" flush failed", 166 166 __func__, dqp); 167 167 } else { 168 168 error = xfs_bwrite(bp); ··· 291 291 * exist on disk and we didn't ask it to allocate; ESRCH if quotas got 292 292 * turned off suddenly. 293 293 */ 294 - error = xfs_qm_dqget(ip->i_mount, ip, id, type, 295 - doalloc | XFS_QMOPT_DOWARN, &dqp); 294 + error = xfs_qm_dqget(ip->i_mount, ip, id, type, doalloc, &dqp); 296 295 if (error) 297 296 return error; 298 297 ··· 480 481 481 482 error = xfs_qm_dqflush(dqp, &bp); 482 483 if (error) { 483 - xfs_warn(dqp->q_mount, "%s: dquot %p flush failed", 484 + xfs_warn(dqp->q_mount, "%s: dquot "PTR_FMT" flush failed", 484 485 __func__, dqp); 485 486 goto out_unlock_dirty; 486 487 } ··· 573 574 struct xfs_def_quota *defq; 574 575 int error; 575 576 576 - error = xfs_qm_dqread(mp, 0, type, XFS_QMOPT_DOWARN, &dqp); 577 + error = xfs_qm_dqread(mp, 0, type, 0, &dqp); 577 578 578 579 if (!error) { 579 580 xfs_disk_dquot_t *ddqp = &dqp->q_core; ··· 651 652 XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : 652 653 (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP : 653 654 XFS_DQ_PROJ), 654 - XFS_QMOPT_DOWARN, &dqp); 655 + 0, &dqp); 655 656 656 657 if (!error) { 657 658 xfs_disk_dquot_t *ddqp = &dqp->q_core; ··· 842 843 { 843 844 struct xfs_dqblk *dqb; 844 845 int j; 846 + xfs_failaddr_t fa; 845 847 846 848 trace_xfs_reset_dqcounts(bp, _RET_IP_); 847 849 ··· 864 864 /* 865 865 * Do a sanity check, and if needed, repair the dqblk. Don't 866 866 * output any warnings because it's perfectly possible to 867 - * find uninitialised dquot blks. See comment in xfs_dqcheck. 867 + * find uninitialised dquot blks. See comment in 868 + * xfs_dquot_verify. 868 869 */ 869 - xfs_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR, 870 - "xfs_quotacheck"); 870 + fa = xfs_dquot_verify(mp, ddq, id + j, type, 0); 871 + if (fa) 872 + xfs_dquot_repair(mp, ddq, id + j, type); 873 + 871 874 /* 872 875 * Reset type in case we are reusing group quota file for 873 876 * project quotas or vice versa ··· 1077 1074 struct xfs_dquot *dqp; 1078 1075 int error; 1079 1076 1080 - error = xfs_qm_dqget(mp, ip, id, type, 1081 - XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, &dqp); 1077 + error = xfs_qm_dqget(mp, ip, id, type, XFS_QMOPT_DQALLOC, &dqp); 1082 1078 if (error) { 1083 1079 /* 1084 1080 * Shouldn't be able to turn off quotas here. ··· 1698 1696 xfs_iunlock(ip, lockflags); 1699 1697 error = xfs_qm_dqget(mp, NULL, uid, 1700 1698 XFS_DQ_USER, 1701 - XFS_QMOPT_DQALLOC | 1702 - XFS_QMOPT_DOWARN, 1699 + XFS_QMOPT_DQALLOC, 1703 1700 &uq); 1704 1701 if (error) { 1705 1702 ASSERT(error != -ENOENT); ··· 1724 1723 xfs_iunlock(ip, lockflags); 1725 1724 error = xfs_qm_dqget(mp, NULL, gid, 1726 1725 XFS_DQ_GROUP, 1727 - XFS_QMOPT_DQALLOC | 1728 - XFS_QMOPT_DOWARN, 1726 + XFS_QMOPT_DQALLOC, 1729 1727 &gq); 1730 1728 if (error) { 1731 1729 ASSERT(error != -ENOENT); ··· 1743 1743 xfs_iunlock(ip, lockflags); 1744 1744 error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid, 1745 1745 XFS_DQ_PROJ, 1746 - XFS_QMOPT_DQALLOC | 1747 - XFS_QMOPT_DOWARN, 1746 + XFS_QMOPT_DQALLOC, 1748 1747 &pq); 1749 1748 if (error) { 1750 1749 ASSERT(error != -ENOENT);

+83 -12

fs/xfs/xfs_reflink.c

··· 464 464 error = xfs_trans_commit(tp); 465 465 if (error) 466 466 return error; 467 + 468 + /* 469 + * Allocation succeeded but the requested range was not even partially 470 + * satisfied? Bail out! 471 + */ 472 + if (nimaps == 0) 473 + return -ENOSPC; 467 474 convert: 468 475 return xfs_reflink_convert_cow_extent(ip, imap, offset_fsb, count_fsb, 469 476 &dfops); ··· 606 599 del.br_startblock, del.br_blockcount, 607 600 NULL); 608 601 609 - /* Update quota accounting */ 610 - xfs_trans_mod_dquot_byino(*tpp, ip, XFS_TRANS_DQ_BCOUNT, 611 - -(long)del.br_blockcount); 612 - 613 602 /* Roll the transaction */ 614 603 xfs_defer_ijoin(&dfops, ip); 615 604 error = xfs_defer_finish(tpp, &dfops); ··· 616 613 617 614 /* Remove the mapping from the CoW fork. */ 618 615 xfs_bmap_del_extent_cow(ip, &icur, &got, &del); 616 + 617 + /* Remove the quota reservation */ 618 + error = xfs_trans_reserve_quota_nblks(NULL, ip, 619 + -(long)del.br_blockcount, 0, 620 + XFS_QMOPT_RES_REGBLKS); 621 + if (error) 622 + break; 619 623 } else { 620 624 /* Didn't do anything, push cursor back. */ 621 625 xfs_iext_prev(ifp, &icur); ··· 805 795 if (error) 806 796 goto out_defer; 807 797 798 + /* Charge this new data fork mapping to the on-disk quota. */ 799 + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_DELBCOUNT, 800 + (long)del.br_blockcount); 801 + 808 802 /* Remove the mapping from the CoW fork. */ 809 803 xfs_bmap_del_extent_cow(ip, &icur, &got, &del); 810 804 ··· 958 944 if (src->i_ino == dest->i_ino) 959 945 xfs_ilock(src, XFS_ILOCK_EXCL); 960 946 else 961 - xfs_lock_two_inodes(src, dest, XFS_ILOCK_EXCL); 947 + xfs_lock_two_inodes(src, XFS_ILOCK_EXCL, dest, XFS_ILOCK_EXCL); 962 948 963 949 if (!xfs_is_reflink_inode(src)) { 964 950 trace_xfs_reflink_set_inode_flag(src); ··· 1216 1202 1217 1203 /* drange = (destoff, destoff + len); srange = (srcoff, srcoff + len) */ 1218 1204 while (len) { 1205 + uint lock_mode; 1206 + 1219 1207 trace_xfs_reflink_remap_blocks_loop(src, srcoff, len, 1220 1208 dest, destoff); 1209 + 1221 1210 /* Read extent from the source file */ 1222 1211 nimaps = 1; 1223 - xfs_ilock(src, XFS_ILOCK_EXCL); 1212 + lock_mode = xfs_ilock_data_map_shared(src); 1224 1213 error = xfs_bmapi_read(src, srcoff, len, &imap, &nimaps, 0); 1225 - xfs_iunlock(src, XFS_ILOCK_EXCL); 1214 + xfs_iunlock(src, lock_mode); 1226 1215 if (error) 1227 1216 goto err; 1228 1217 ASSERT(nimaps == 1); ··· 1262 1245 } 1263 1246 1264 1247 /* 1248 + * Grab the exclusive iolock for a data copy from src to dest, making 1249 + * sure to abide vfs locking order (lowest pointer value goes first) and 1250 + * breaking the pnfs layout leases on dest before proceeding. The loop 1251 + * is needed because we cannot call the blocking break_layout() with the 1252 + * src iolock held, and therefore have to back out both locks. 1253 + */ 1254 + static int 1255 + xfs_iolock_two_inodes_and_break_layout( 1256 + struct inode *src, 1257 + struct inode *dest) 1258 + { 1259 + int error; 1260 + 1261 + retry: 1262 + if (src < dest) { 1263 + inode_lock_shared(src); 1264 + inode_lock_nested(dest, I_MUTEX_NONDIR2); 1265 + } else { 1266 + /* src >= dest */ 1267 + inode_lock(dest); 1268 + } 1269 + 1270 + error = break_layout(dest, false); 1271 + if (error == -EWOULDBLOCK) { 1272 + inode_unlock(dest); 1273 + if (src < dest) 1274 + inode_unlock_shared(src); 1275 + error = break_layout(dest, true); 1276 + if (error) 1277 + return error; 1278 + goto retry; 1279 + } 1280 + if (error) { 1281 + inode_unlock(dest); 1282 + if (src < dest) 1283 + inode_unlock_shared(src); 1284 + return error; 1285 + } 1286 + if (src > dest) 1287 + inode_lock_shared_nested(src, I_MUTEX_NONDIR2); 1288 + return 0; 1289 + } 1290 + 1291 + /* 1265 1292 * Link a range of blocks from one file to another. 1266 1293 */ 1267 1294 int ··· 1335 1274 return -EIO; 1336 1275 1337 1276 /* Lock both files against IO */ 1338 - lock_two_nondirectories(inode_in, inode_out); 1277 + ret = xfs_iolock_two_inodes_and_break_layout(inode_in, inode_out); 1278 + if (ret) 1279 + return ret; 1339 1280 if (same_inode) 1340 1281 xfs_ilock(src, XFS_MMAPLOCK_EXCL); 1341 1282 else 1342 - xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL); 1283 + xfs_lock_two_inodes(src, XFS_MMAPLOCK_SHARED, dest, 1284 + XFS_MMAPLOCK_EXCL); 1343 1285 1344 1286 /* Check file eligibility and prepare for block sharing. */ 1345 1287 ret = -EINVAL; ··· 1357 1293 ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out, 1358 1294 &len, is_dedupe); 1359 1295 if (ret <= 0) 1296 + goto out_unlock; 1297 + 1298 + /* Attach dquots to dest inode before changing block map */ 1299 + ret = xfs_qm_dqattach(dest, 0); 1300 + if (ret) 1360 1301 goto out_unlock; 1361 1302 1362 1303 trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); ··· 1410 1341 is_dedupe); 1411 1342 1412 1343 out_unlock: 1413 - xfs_iunlock(src, XFS_MMAPLOCK_EXCL); 1344 + xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); 1414 1345 if (!same_inode) 1415 - xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); 1416 - unlock_two_nondirectories(inode_in, inode_out); 1346 + xfs_iunlock(src, XFS_MMAPLOCK_SHARED); 1347 + inode_unlock(inode_out); 1348 + if (!same_inode) 1349 + inode_unlock_shared(inode_in); 1417 1350 if (ret) 1418 1351 trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); 1419 1352 return ret;

+4

fs/xfs/xfs_rtalloc.h

··· 139 139 xfs_rtalloc_query_range_fn fn, 140 140 void *priv); 141 141 bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno); 142 + int xfs_rtalloc_extent_is_free(struct xfs_mount *mp, struct xfs_trans *tp, 143 + xfs_rtblock_t start, xfs_extlen_t len, 144 + bool *is_free); 142 145 #else 143 146 # define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb) (ENOSYS) 144 147 # define xfs_rtfree_extent(t,b,l) (ENOSYS) ··· 151 148 # define xfs_rtalloc_query_all(t,f,p) (ENOSYS) 152 149 # define xfs_rtbuf_get(m,t,b,i,p) (ENOSYS) 153 150 # define xfs_verify_rtbno(m, r) (false) 151 + # define xfs_rtalloc_extent_is_free(m,t,s,l,i) (ENOSYS) 154 152 static inline int /* error */ 155 153 xfs_rtmount_init( 156 154 xfs_mount_t *mp) /* file system mount structure */

+9 -5

fs/xfs/xfs_super.c

··· 1153 1153 ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) == 1154 1154 (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD)) 1155 1155 xfs_qm_statvfs(ip, statp); 1156 + 1157 + if (XFS_IS_REALTIME_MOUNT(mp) && 1158 + (ip->i_d.di_flags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME))) { 1159 + statp->f_blocks = sbp->sb_rblocks; 1160 + statp->f_bavail = statp->f_bfree = 1161 + sbp->sb_frextents * sbp->sb_rextsize; 1162 + } 1163 + 1156 1164 return 0; 1157 1165 } 1158 1166 ··· 1668 1660 } 1669 1661 if (xfs_sb_version_hasreflink(&mp->m_sb)) 1670 1662 xfs_alert(mp, 1671 - "DAX and reflink have not been tested together!"); 1663 + "DAX and reflink cannot be used together!"); 1672 1664 } 1673 1665 1674 1666 if (mp->m_flags & XFS_MOUNT_DISCARD) { ··· 1691 1683 xfs_alert(mp, 1692 1684 "EXPERIMENTAL reverse mapping btree feature enabled. Use at your own risk!"); 1693 1685 } 1694 - 1695 - if (xfs_sb_version_hasreflink(&mp->m_sb)) 1696 - xfs_alert(mp, 1697 - "EXPERIMENTAL reflink feature enabled. Use at your own risk!"); 1698 1686 1699 1687 error = xfs_mountfs(mp); 1700 1688 if (error)

+47 -21

fs/xfs/xfs_trace.h

··· 72 72 __entry->flags = ctx->flags; 73 73 ), 74 74 TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u " 75 - "alist 0x%p size %u count %u firstu %u flags %d %s", 75 + "alist %p size %u count %u firstu %u flags %d %s", 76 76 MAJOR(__entry->dev), MINOR(__entry->dev), 77 77 __entry->ino, 78 78 __entry->hashval, ··· 119 119 __entry->refcount = refcount; 120 120 __entry->caller_ip = caller_ip; 121 121 ), 122 - TP_printk("dev %d:%d agno %u refcount %d caller %ps", 122 + TP_printk("dev %d:%d agno %u refcount %d caller %pS", 123 123 MAJOR(__entry->dev), MINOR(__entry->dev), 124 124 __entry->agno, 125 125 __entry->refcount, ··· 200 200 __entry->bt_before = be32_to_cpu(btree->before); 201 201 ), 202 202 TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u " 203 - "alist 0x%p size %u count %u firstu %u flags %d %s " 203 + "alist %p size %u count %u firstu %u flags %d %s " 204 204 "node hashval %u, node before %u", 205 205 MAJOR(__entry->dev), MINOR(__entry->dev), 206 206 __entry->ino, ··· 251 251 __entry->bmap_state = state; 252 252 __entry->caller_ip = caller_ip; 253 253 ), 254 - TP_printk("dev %d:%d ino 0x%llx state %s cur 0x%p/%d " 255 - "offset %lld block %lld count %lld flag %d caller %ps", 254 + TP_printk("dev %d:%d ino 0x%llx state %s cur %p/%d " 255 + "offset %lld block %lld count %lld flag %d caller %pS", 256 256 MAJOR(__entry->dev), MINOR(__entry->dev), 257 257 __entry->ino, 258 258 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), ··· 301 301 __entry->caller_ip = caller_ip; 302 302 ), 303 303 TP_printk("dev %d:%d bno 0x%llx nblks 0x%x hold %d pincount %d " 304 - "lock %d flags %s caller %ps", 304 + "lock %d flags %s caller %pS", 305 305 MAJOR(__entry->dev), MINOR(__entry->dev), 306 306 (unsigned long long)__entry->bno, 307 307 __entry->nblks, ··· 370 370 __entry->caller_ip = caller_ip; 371 371 ), 372 372 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " 373 - "lock %d flags %s caller %ps", 373 + "lock %d flags %s caller %pS", 374 374 MAJOR(__entry->dev), MINOR(__entry->dev), 375 375 (unsigned long long)__entry->bno, 376 376 __entry->buffer_length, ··· 390 390 DEFINE_BUF_FLAGS_EVENT(xfs_buf_read); 391 391 392 392 TRACE_EVENT(xfs_buf_ioerror, 393 - TP_PROTO(struct xfs_buf *bp, int error, unsigned long caller_ip), 393 + TP_PROTO(struct xfs_buf *bp, int error, xfs_failaddr_t caller_ip), 394 394 TP_ARGS(bp, error, caller_ip), 395 395 TP_STRUCT__entry( 396 396 __field(dev_t, dev) ··· 401 401 __field(int, pincount) 402 402 __field(unsigned, lockval) 403 403 __field(int, error) 404 - __field(unsigned long, caller_ip) 404 + __field(xfs_failaddr_t, caller_ip) 405 405 ), 406 406 TP_fast_assign( 407 407 __entry->dev = bp->b_target->bt_dev; ··· 415 415 __entry->caller_ip = caller_ip; 416 416 ), 417 417 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " 418 - "lock %d error %d flags %s caller %ps", 418 + "lock %d error %d flags %s caller %pS", 419 419 MAJOR(__entry->dev), MINOR(__entry->dev), 420 420 (unsigned long long)__entry->bno, 421 421 __entry->buffer_length, ··· 460 460 ), 461 461 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " 462 462 "lock %d flags %s recur %d refcount %d bliflags %s " 463 - "lidesc 0x%p liflags %s", 463 + "lidesc %p liflags %s", 464 464 MAJOR(__entry->dev), MINOR(__entry->dev), 465 465 (unsigned long long)__entry->buf_bno, 466 466 __entry->buf_len, ··· 579 579 __entry->lock_flags = lock_flags; 580 580 __entry->caller_ip = caller_ip; 581 581 ), 582 - TP_printk("dev %d:%d ino 0x%llx flags %s caller %ps", 582 + TP_printk("dev %d:%d ino 0x%llx flags %s caller %pS", 583 583 MAJOR(__entry->dev), MINOR(__entry->dev), 584 584 __entry->ino, 585 585 __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS), ··· 697 697 __entry->pincount = atomic_read(&ip->i_pincount); 698 698 __entry->caller_ip = caller_ip; 699 699 ), 700 - TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %ps", 700 + TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pS", 701 701 MAJOR(__entry->dev), MINOR(__entry->dev), 702 702 __entry->ino, 703 703 __entry->count, ··· 1028 1028 __entry->flags = lip->li_flags; 1029 1029 __entry->lsn = lip->li_lsn; 1030 1030 ), 1031 - TP_printk("dev %d:%d lip 0x%p lsn %d/%d type %s flags %s", 1031 + TP_printk("dev %d:%d lip %p lsn %d/%d type %s flags %s", 1032 1032 MAJOR(__entry->dev), MINOR(__entry->dev), 1033 1033 __entry->lip, 1034 1034 CYCLE_LSN(__entry->lsn), BLOCK_LSN(__entry->lsn), ··· 1049 1049 __entry->lsn = lsn; 1050 1050 __entry->caller_ip = caller_ip; 1051 1051 ), 1052 - TP_printk("dev %d:%d lsn 0x%llx caller %ps", 1052 + TP_printk("dev %d:%d lsn 0x%llx caller %pS", 1053 1053 MAJOR(__entry->dev), MINOR(__entry->dev), 1054 1054 __entry->lsn, (void *)__entry->caller_ip) 1055 1055 ) ··· 1082 1082 __entry->old_lsn = old_lsn; 1083 1083 __entry->new_lsn = new_lsn; 1084 1084 ), 1085 - TP_printk("dev %d:%d lip 0x%p old lsn %d/%d new lsn %d/%d type %s flags %s", 1085 + TP_printk("dev %d:%d lip %p old lsn %d/%d new lsn %d/%d type %s flags %s", 1086 1086 MAJOR(__entry->dev), MINOR(__entry->dev), 1087 1087 __entry->lip, 1088 1088 CYCLE_LSN(__entry->old_lsn), BLOCK_LSN(__entry->old_lsn), ··· 1403 1403 __entry->flags = flags; 1404 1404 ), 1405 1405 TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx" 1406 - "flags %s caller %ps", 1406 + "flags %s caller %pS", 1407 1407 MAJOR(__entry->dev), MINOR(__entry->dev), 1408 1408 __entry->ino, 1409 1409 __entry->size, ··· 1517 1517 ), 1518 1518 TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u " 1519 1519 "levels b %u c %u flfirst %u fllast %u flcount %u " 1520 - "freeblks %u longest %u caller %ps", 1520 + "freeblks %u longest %u caller %pS", 1521 1521 MAJOR(__entry->dev), MINOR(__entry->dev), 1522 1522 __entry->agno, 1523 1523 __print_flags(__entry->flags, "|", XFS_AGF_FLAGS), ··· 2014 2014 __entry->count = item->ri_cnt; 2015 2015 __entry->total = item->ri_total; 2016 2016 ), 2017 - TP_printk("dev %d:%d tid 0x%x lsn 0x%llx, pass %d, item 0x%p, " 2017 + TP_printk("dev %d:%d tid 0x%x lsn 0x%llx, pass %d, item %p, " 2018 2018 "item type %s item region count/total %d/%d", 2019 2019 MAJOR(__entry->dev), MINOR(__entry->dev), 2020 2020 __entry->tid, ··· 2486 2486 __entry->error = error; 2487 2487 __entry->caller_ip = caller_ip; 2488 2488 ), 2489 - TP_printk("dev %d:%d agno %u error %d caller %ps", 2489 + TP_printk("dev %d:%d agno %u error %d caller %pS", 2490 2490 MAJOR(__entry->dev), MINOR(__entry->dev), 2491 2491 __entry->agno, 2492 2492 __entry->error, ··· 2977 2977 __entry->error = error; 2978 2978 __entry->caller_ip = caller_ip; 2979 2979 ), 2980 - TP_printk("dev %d:%d ino %llx error %d caller %ps", 2980 + TP_printk("dev %d:%d ino %llx error %d caller %pS", 2981 2981 MAJOR(__entry->dev), MINOR(__entry->dev), 2982 2982 __entry->ino, 2983 2983 __entry->error, ··· 3312 3312 DEFINE_GETFSMAP_EVENT(xfs_getfsmap_low_key); 3313 3313 DEFINE_GETFSMAP_EVENT(xfs_getfsmap_high_key); 3314 3314 DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping); 3315 + 3316 + TRACE_EVENT(xfs_trans_resv_calc, 3317 + TP_PROTO(struct xfs_mount *mp, unsigned int type, 3318 + struct xfs_trans_res *res), 3319 + TP_ARGS(mp, type, res), 3320 + TP_STRUCT__entry( 3321 + __field(dev_t, dev) 3322 + __field(int, type) 3323 + __field(uint, logres) 3324 + __field(int, logcount) 3325 + __field(int, logflags) 3326 + ), 3327 + TP_fast_assign( 3328 + __entry->dev = mp->m_super->s_dev; 3329 + __entry->type = type; 3330 + __entry->logres = res->tr_logres; 3331 + __entry->logcount = res->tr_logcount; 3332 + __entry->logflags = res->tr_logflags; 3333 + ), 3334 + TP_printk("dev %d:%d type %d logres %u logcount %d flags 0x%x", 3335 + MAJOR(__entry->dev), MINOR(__entry->dev), 3336 + __entry->type, 3337 + __entry->logres, 3338 + __entry->logcount, 3339 + __entry->logflags) 3340 + ); 3315 3341 3316 3342 #endif /* _TRACE_XFS_H */ 3317 3343

+22

fs/xfs/xfs_trans.c

··· 35 35 kmem_zone_t *xfs_trans_zone; 36 36 kmem_zone_t *xfs_log_item_desc_zone; 37 37 38 + #if defined(CONFIG_TRACEPOINTS) 39 + static void 40 + xfs_trans_trace_reservations( 41 + struct xfs_mount *mp) 42 + { 43 + struct xfs_trans_res resv; 44 + struct xfs_trans_res *res; 45 + struct xfs_trans_res *end_res; 46 + int i; 47 + 48 + res = (struct xfs_trans_res *)M_RES(mp); 49 + end_res = (struct xfs_trans_res *)(M_RES(mp) + 1); 50 + for (i = 0; res < end_res; i++, res++) 51 + trace_xfs_trans_resv_calc(mp, i, res); 52 + xfs_log_get_max_trans_res(mp, &resv); 53 + trace_xfs_trans_resv_calc(mp, -1, &resv); 54 + } 55 + #else 56 + # define xfs_trans_trace_reservations(mp) 57 + #endif 58 + 38 59 /* 39 60 * Initialize the precomputed transaction reservation values 40 61 * in the mount structure. ··· 65 44 struct xfs_mount *mp) 66 45 { 67 46 xfs_trans_resv_calc(mp, M_RES(mp)); 47 + xfs_trans_trace_reservations(mp); 68 48 } 69 49 70 50 /*

+1 -1

fs/xfs/xfs_trans.h

··· 50 50 uint li_type; /* item type */ 51 51 uint li_flags; /* misc flags */ 52 52 struct xfs_buf *li_buf; /* real buffer pointer */ 53 - struct xfs_log_item *li_bio_list; /* buffer item list */ 53 + struct list_head li_bio_list; /* buffer item list */ 54 54 void (*li_cb)(struct xfs_buf *, 55 55 struct xfs_log_item *); 56 56 /* buffer item iodone */

+51 -47

fs/xfs/xfs_trans_buf.c

··· 82 82 ASSERT(bp->b_transp == NULL); 83 83 84 84 /* 85 - * The xfs_buf_log_item pointer is stored in b_fsprivate. If 85 + * The xfs_buf_log_item pointer is stored in b_log_item. If 86 86 * it doesn't have one yet, then allocate one and initialize it. 87 87 * The checks to see if one is there are in xfs_buf_item_init(). 88 88 */ 89 89 xfs_buf_item_init(bp, tp->t_mountp); 90 - bip = bp->b_fspriv; 90 + bip = bp->b_log_item; 91 91 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 92 92 ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL)); 93 93 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); ··· 118 118 struct xfs_buf *bp) 119 119 { 120 120 _xfs_trans_bjoin(tp, bp, 0); 121 - trace_xfs_trans_bjoin(bp->b_fspriv); 121 + trace_xfs_trans_bjoin(bp->b_log_item); 122 122 } 123 123 124 124 /* ··· 139 139 xfs_buf_flags_t flags) 140 140 { 141 141 xfs_buf_t *bp; 142 - xfs_buf_log_item_t *bip; 142 + struct xfs_buf_log_item *bip; 143 143 144 144 if (!tp) 145 145 return xfs_buf_get_map(target, map, nmaps, flags); ··· 159 159 } 160 160 161 161 ASSERT(bp->b_transp == tp); 162 - bip = bp->b_fspriv; 162 + bip = bp->b_log_item; 163 163 ASSERT(bip != NULL); 164 164 ASSERT(atomic_read(&bip->bli_refcount) > 0); 165 165 bip->bli_recur++; ··· 175 175 ASSERT(!bp->b_error); 176 176 177 177 _xfs_trans_bjoin(tp, bp, 1); 178 - trace_xfs_trans_get_buf(bp->b_fspriv); 178 + trace_xfs_trans_get_buf(bp->b_log_item); 179 179 return bp; 180 180 } 181 181 ··· 188 188 * mount structure. 189 189 */ 190 190 xfs_buf_t * 191 - xfs_trans_getsb(xfs_trans_t *tp, 192 - struct xfs_mount *mp, 193 - int flags) 191 + xfs_trans_getsb( 192 + xfs_trans_t *tp, 193 + struct xfs_mount *mp, 194 + int flags) 194 195 { 195 196 xfs_buf_t *bp; 196 - xfs_buf_log_item_t *bip; 197 + struct xfs_buf_log_item *bip; 197 198 198 199 /* 199 200 * Default to just trying to lock the superblock buffer ··· 211 210 */ 212 211 bp = mp->m_sb_bp; 213 212 if (bp->b_transp == tp) { 214 - bip = bp->b_fspriv; 213 + bip = bp->b_log_item; 215 214 ASSERT(bip != NULL); 216 215 ASSERT(atomic_read(&bip->bli_refcount) > 0); 217 216 bip->bli_recur++; ··· 224 223 return NULL; 225 224 226 225 _xfs_trans_bjoin(tp, bp, 1); 227 - trace_xfs_trans_getsb(bp->b_fspriv); 226 + trace_xfs_trans_getsb(bp->b_log_item); 228 227 return bp; 229 228 } 230 229 ··· 267 266 if (bp) { 268 267 ASSERT(xfs_buf_islocked(bp)); 269 268 ASSERT(bp->b_transp == tp); 270 - ASSERT(bp->b_fspriv != NULL); 269 + ASSERT(bp->b_log_item != NULL); 271 270 ASSERT(!bp->b_error); 272 271 ASSERT(bp->b_flags & XBF_DONE); 273 272 ··· 280 279 return -EIO; 281 280 } 282 281 283 - bip = bp->b_fspriv; 282 + bip = bp->b_log_item; 284 283 bip->bli_recur++; 285 284 286 285 ASSERT(atomic_read(&bip->bli_refcount) > 0); ··· 330 329 331 330 if (tp) { 332 331 _xfs_trans_bjoin(tp, bp, 1); 333 - trace_xfs_trans_read_buf(bp->b_fspriv); 332 + trace_xfs_trans_read_buf(bp->b_log_item); 334 333 } 335 334 *bpp = bp; 336 335 return 0; ··· 353 352 * brelse() call. 354 353 */ 355 354 void 356 - xfs_trans_brelse(xfs_trans_t *tp, 357 - xfs_buf_t *bp) 355 + xfs_trans_brelse( 356 + xfs_trans_t *tp, 357 + xfs_buf_t *bp) 358 358 { 359 - xfs_buf_log_item_t *bip; 359 + struct xfs_buf_log_item *bip; 360 360 int freed; 361 361 362 362 /* ··· 370 368 } 371 369 372 370 ASSERT(bp->b_transp == tp); 373 - bip = bp->b_fspriv; 371 + bip = bp->b_log_item; 374 372 ASSERT(bip->bli_item.li_type == XFS_LI_BUF); 375 373 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 376 374 ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL)); ··· 458 456 */ 459 457 /* ARGSUSED */ 460 458 void 461 - xfs_trans_bhold(xfs_trans_t *tp, 462 - xfs_buf_t *bp) 459 + xfs_trans_bhold( 460 + xfs_trans_t *tp, 461 + xfs_buf_t *bp) 463 462 { 464 - xfs_buf_log_item_t *bip = bp->b_fspriv; 463 + struct xfs_buf_log_item *bip = bp->b_log_item; 465 464 466 465 ASSERT(bp->b_transp == tp); 467 466 ASSERT(bip != NULL); ··· 479 476 * for this transaction. 480 477 */ 481 478 void 482 - xfs_trans_bhold_release(xfs_trans_t *tp, 483 - xfs_buf_t *bp) 479 + xfs_trans_bhold_release( 480 + xfs_trans_t *tp, 481 + xfs_buf_t *bp) 484 482 { 485 - xfs_buf_log_item_t *bip = bp->b_fspriv; 483 + struct xfs_buf_log_item *bip = bp->b_log_item; 486 484 487 485 ASSERT(bp->b_transp == tp); 488 486 ASSERT(bip != NULL); ··· 504 500 struct xfs_trans *tp, 505 501 struct xfs_buf *bp) 506 502 { 507 - struct xfs_buf_log_item *bip = bp->b_fspriv; 503 + struct xfs_buf_log_item *bip = bp->b_log_item; 508 504 509 505 ASSERT(bp->b_transp == tp); 510 506 ASSERT(bip != NULL); ··· 561 557 uint first, 562 558 uint last) 563 559 { 564 - struct xfs_buf_log_item *bip = bp->b_fspriv; 560 + struct xfs_buf_log_item *bip = bp->b_log_item; 565 561 566 562 ASSERT(first <= last && last < BBTOB(bp->b_length)); 567 563 ASSERT(!(bip->bli_flags & XFS_BLI_ORDERED)); ··· 604 600 */ 605 601 void 606 602 xfs_trans_binval( 607 - xfs_trans_t *tp, 608 - xfs_buf_t *bp) 603 + xfs_trans_t *tp, 604 + xfs_buf_t *bp) 609 605 { 610 - xfs_buf_log_item_t *bip = bp->b_fspriv; 606 + struct xfs_buf_log_item *bip = bp->b_log_item; 611 607 int i; 612 608 613 609 ASSERT(bp->b_transp == tp); ··· 659 655 */ 660 656 void 661 657 xfs_trans_inode_buf( 662 - xfs_trans_t *tp, 663 - xfs_buf_t *bp) 658 + xfs_trans_t *tp, 659 + xfs_buf_t *bp) 664 660 { 665 - xfs_buf_log_item_t *bip = bp->b_fspriv; 661 + struct xfs_buf_log_item *bip = bp->b_log_item; 666 662 667 663 ASSERT(bp->b_transp == tp); 668 664 ASSERT(bip != NULL); ··· 683 679 */ 684 680 void 685 681 xfs_trans_stale_inode_buf( 686 - xfs_trans_t *tp, 687 - xfs_buf_t *bp) 682 + xfs_trans_t *tp, 683 + xfs_buf_t *bp) 688 684 { 689 - xfs_buf_log_item_t *bip = bp->b_fspriv; 685 + struct xfs_buf_log_item *bip = bp->b_log_item; 690 686 691 687 ASSERT(bp->b_transp == tp); 692 688 ASSERT(bip != NULL); ··· 708 704 /* ARGSUSED */ 709 705 void 710 706 xfs_trans_inode_alloc_buf( 711 - xfs_trans_t *tp, 712 - xfs_buf_t *bp) 707 + xfs_trans_t *tp, 708 + xfs_buf_t *bp) 713 709 { 714 - xfs_buf_log_item_t *bip = bp->b_fspriv; 710 + struct xfs_buf_log_item *bip = bp->b_log_item; 715 711 716 712 ASSERT(bp->b_transp == tp); 717 713 ASSERT(bip != NULL); ··· 733 729 struct xfs_trans *tp, 734 730 struct xfs_buf *bp) 735 731 { 736 - struct xfs_buf_log_item *bip = bp->b_fspriv; 732 + struct xfs_buf_log_item *bip = bp->b_log_item; 737 733 738 734 ASSERT(bp->b_transp == tp); 739 735 ASSERT(bip != NULL); ··· 763 759 struct xfs_buf *bp, 764 760 enum xfs_blft type) 765 761 { 766 - struct xfs_buf_log_item *bip = bp->b_fspriv; 762 + struct xfs_buf_log_item *bip = bp->b_log_item; 767 763 768 764 if (!tp) 769 765 return; ··· 780 776 struct xfs_buf *dst_bp, 781 777 struct xfs_buf *src_bp) 782 778 { 783 - struct xfs_buf_log_item *sbip = src_bp->b_fspriv; 784 - struct xfs_buf_log_item *dbip = dst_bp->b_fspriv; 779 + struct xfs_buf_log_item *sbip = src_bp->b_log_item; 780 + struct xfs_buf_log_item *dbip = dst_bp->b_log_item; 785 781 enum xfs_blft type; 786 782 787 783 type = xfs_blft_from_flags(&sbip->__bli_format); ··· 801 797 /* ARGSUSED */ 802 798 void 803 799 xfs_trans_dquot_buf( 804 - xfs_trans_t *tp, 805 - xfs_buf_t *bp, 806 - uint type) 800 + xfs_trans_t *tp, 801 + xfs_buf_t *bp, 802 + uint type) 807 803 { 808 - struct xfs_buf_log_item *bip = bp->b_fspriv; 804 + struct xfs_buf_log_item *bip = bp->b_log_item; 809 805 810 806 ASSERT(type == XFS_BLF_UDQUOT_BUF || 811 807 type == XFS_BLF_PDQUOT_BUF ||

+6

include/linux/fs.h

··· 748 748 down_write_nested(&inode->i_rwsem, subclass); 749 749 } 750 750 751 + static inline void inode_lock_shared_nested(struct inode *inode, unsigned subclass) 752 + { 753 + down_read_nested(&inode->i_rwsem, subclass); 754 + } 755 + 751 756 void lock_two_nondirectories(struct inode *, struct inode*); 752 757 void unlock_two_nondirectories(struct inode *, struct inode*); 753 758 ··· 2985 2980 }; 2986 2981 2987 2982 void dio_end_io(struct bio *bio); 2983 + void dio_warn_stale_pagecache(struct file *filp); 2988 2984 2989 2985 ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, 2990 2986 struct block_device *bdev, struct iov_iter *iter,