commit 0efdc097965bcf60d1db62f100ef544714714e88 · tjh.dev/kernel

tjh.dev / kernel

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Merge tag 'xfs-6.11-fixes-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs fixes from Chandan Babu:

- Do not call out v1 inodes with non-zero di_nlink field as being
corrupt

- Change xfs_finobt_count_blocks() to count "free inode btree" blocks
rather than "inode btree" blocks

- Don't report the number of trimmed bytes via FITRIM because the
underlying storage isn't required to do anything and failed discard
IOs aren't reported to the caller anyway

- Fix incorrect setting of rm_owner field in an rmap query

- Report missing disk offset range in an fsmap query

- Obtain m_growlock when extending realtime section of the filesystem

- Reset rootdir extent size hint after extending realtime section of
the filesystem

* tag 'xfs-6.11-fixes-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
xfs: reset rootdir extent size hint after growfsrt
xfs: take m_growlock when running growfsrt
xfs: Fix missing interval for missing_owner in xfs fsmap
xfs: use XFS_BUF_DADDR_NULL for daddrs in getfsmap code
xfs: Fix the owner setting issue for rmap query in xfs fsmap
xfs: don't bother reporting blocks trimmed via FITRIM
xfs: xfs_finobt_count_blocks() walks the wrong btree
xfs: fix folio dirtying for XFILE_ALLOC callers
xfs: fix di_onlink checking for V1/V2 inodes

Linus Torvalds 1 year ago 0efdc097 35667a29

+114 -48

6 changed files

expand all

unified split

xfs

libxfs

xfs_ialloc_btree.c

xfs_inode_buf.c

scrub

xfile.c

xfs_discard.c

xfs_fsmap.c

xfs_rtalloc.c

+1 -1

fs/xfs/libxfs/xfs_ialloc_btree.c

··· 749 749 if (error) 750 750 return error; 751 751 752 - cur = xfs_inobt_init_cursor(pag, tp, agbp); 752 + cur = xfs_finobt_init_cursor(pag, tp, agbp); 753 753 error = xfs_btree_count_blocks(cur, tree_blocks); 754 754 xfs_btree_del_cursor(cur, error); 755 755 xfs_trans_brelse(tp, agbp);

+10 -4

fs/xfs/libxfs/xfs_inode_buf.c

··· 514 514 return __this_address; 515 515 } 516 516 517 - if (dip->di_version > 1) { 517 + /* 518 + * Historical note: xfsprogs in the 3.2 era set up its incore inodes to 519 + * have di_nlink track the link count, even if the actual filesystem 520 + * only supported V1 inodes (i.e. di_onlink). When writing out the 521 + * ondisk inode, it would set both the ondisk di_nlink and di_onlink to 522 + * the the incore di_nlink value, which is why we cannot check for 523 + * di_nlink==0 on a V1 inode. V2/3 inodes would get written out with 524 + * di_onlink==0, so we can check that. 525 + */ 526 + if (dip->di_version >= 2) { 518 527 if (dip->di_onlink) 519 - return __this_address; 520 - } else { 521 - if (dip->di_nlink) 522 528 return __this_address; 523 529 } 524 530

+1 -1

fs/xfs/scrub/xfile.c

··· 293 293 * (potentially last) reference in xfile_put_folio. 294 294 */ 295 295 if (flags & XFILE_ALLOC) 296 - folio_set_dirty(folio); 296 + folio_mark_dirty(folio); 297 297 return folio; 298 298 } 299 299

+11 -25

fs/xfs/xfs_discard.c

··· 158 158 xfs_trim_gather_extents( 159 159 struct xfs_perag *pag, 160 160 struct xfs_trim_cur *tcur, 161 - struct xfs_busy_extents *extents, 162 - uint64_t *blocks_trimmed) 161 + struct xfs_busy_extents *extents) 163 162 { 164 163 struct xfs_mount *mp = pag->pag_mount; 165 164 struct xfs_trans *tp; ··· 279 280 280 281 xfs_extent_busy_insert_discard(pag, fbno, flen, 281 282 &extents->extent_list); 282 - *blocks_trimmed += flen; 283 283 next_extent: 284 284 if (tcur->by_bno) 285 285 error = xfs_btree_increment(cur, 0, &i); ··· 325 327 struct xfs_perag *pag, 326 328 xfs_agblock_t start, 327 329 xfs_agblock_t end, 328 - xfs_extlen_t minlen, 329 - uint64_t *blocks_trimmed) 330 + xfs_extlen_t minlen) 330 331 { 331 332 struct xfs_trim_cur tcur = { 332 333 .start = start, ··· 351 354 extents->owner = extents; 352 355 INIT_LIST_HEAD(&extents->extent_list); 353 356 354 - error = xfs_trim_gather_extents(pag, &tcur, extents, 355 - blocks_trimmed); 357 + error = xfs_trim_gather_extents(pag, &tcur, extents); 356 358 if (error) { 357 359 kfree(extents); 358 360 break; ··· 385 389 struct xfs_mount *mp, 386 390 xfs_daddr_t start, 387 391 xfs_daddr_t end, 388 - xfs_extlen_t minlen, 389 - uint64_t *blocks_trimmed) 392 + xfs_extlen_t minlen) 390 393 { 391 394 xfs_agnumber_t start_agno, end_agno; 392 395 xfs_agblock_t start_agbno, end_agbno; ··· 406 411 407 412 if (start_agno == end_agno) 408 413 agend = end_agbno; 409 - error = xfs_trim_perag_extents(pag, start_agbno, agend, minlen, 410 - blocks_trimmed); 414 + error = xfs_trim_perag_extents(pag, start_agbno, agend, minlen); 411 415 if (error) 412 416 last_error = error; 413 417 ··· 424 430 struct xfs_trim_rtdev { 425 431 /* list of rt extents to free */ 426 432 struct list_head extent_list; 427 - 428 - /* pointer to count of blocks trimmed */ 429 - uint64_t *blocks_trimmed; 430 433 431 434 /* minimum length that caller allows us to trim */ 432 435 xfs_rtblock_t minlen_fsb; ··· 542 551 busyp->length = rlen; 543 552 INIT_LIST_HEAD(&busyp->list); 544 553 list_add_tail(&busyp->list, &tr->extent_list); 545 - *tr->blocks_trimmed += rlen; 546 554 547 555 tr->restart_rtx = rec->ar_startext + rec->ar_extcount; 548 556 return 0; ··· 552 562 struct xfs_mount *mp, 553 563 xfs_daddr_t start, 554 564 xfs_daddr_t end, 555 - xfs_daddr_t minlen, 556 - uint64_t *blocks_trimmed) 565 + xfs_daddr_t minlen) 557 566 { 558 567 struct xfs_rtalloc_rec low = { }; 559 568 struct xfs_rtalloc_rec high = { }; 560 569 struct xfs_trim_rtdev tr = { 561 - .blocks_trimmed = blocks_trimmed, 562 570 .minlen_fsb = XFS_BB_TO_FSB(mp, minlen), 563 571 }; 564 572 struct xfs_trans *tp; ··· 622 634 return error; 623 635 } 624 636 #else 625 - # define xfs_trim_rtdev_extents(m,s,e,n,b) (-EOPNOTSUPP) 637 + # define xfs_trim_rtdev_extents(...) (-EOPNOTSUPP) 626 638 #endif /* CONFIG_XFS_RT */ 627 639 628 640 /* ··· 649 661 xfs_daddr_t start, end; 650 662 xfs_extlen_t minlen; 651 663 xfs_rfsblock_t max_blocks; 652 - uint64_t blocks_trimmed = 0; 653 664 int error, last_error = 0; 654 665 655 666 if (!capable(CAP_SYS_ADMIN)) ··· 693 706 end = start + BTOBBT(range.len) - 1; 694 707 695 708 if (bdev_max_discard_sectors(mp->m_ddev_targp->bt_bdev)) { 696 - error = xfs_trim_datadev_extents(mp, start, end, minlen, 697 - &blocks_trimmed); 709 + error = xfs_trim_datadev_extents(mp, start, end, minlen); 698 710 if (error) 699 711 last_error = error; 700 712 } 701 713 702 714 if (rt_bdev && !xfs_trim_should_stop()) { 703 - error = xfs_trim_rtdev_extents(mp, start, end, minlen, 704 - &blocks_trimmed); 715 + error = xfs_trim_rtdev_extents(mp, start, end, minlen); 705 716 if (error) 706 717 last_error = error; 707 718 } ··· 707 722 if (last_error) 708 723 return last_error; 709 724 710 - range.len = XFS_FSB_TO_B(mp, blocks_trimmed); 725 + range.len = min_t(unsigned long long, range.len, 726 + XFS_FSB_TO_B(mp, max_blocks)); 711 727 if (copy_to_user(urange, &range, sizeof(range))) 712 728 return -EFAULT; 713 729 return 0;

+26 -4

fs/xfs/xfs_fsmap.c

··· 71 71 switch (src->fmr_owner) { 72 72 case 0: /* "lowest owner id possible" */ 73 73 case -1ULL: /* "highest owner id possible" */ 74 - dest->rm_owner = 0; 74 + dest->rm_owner = src->fmr_owner; 75 75 break; 76 76 case XFS_FMR_OWN_FREE: 77 77 dest->rm_owner = XFS_RMAP_OWN_NULL; ··· 162 162 xfs_daddr_t next_daddr; /* next daddr we expect */ 163 163 /* daddr of low fsmap key when we're using the rtbitmap */ 164 164 xfs_daddr_t low_daddr; 165 + xfs_daddr_t end_daddr; /* daddr of high fsmap key */ 165 166 u64 missing_owner; /* owner of holes */ 166 167 u32 dev; /* device id */ 167 168 /* ··· 183 182 int (*fn)(struct xfs_trans *tp, 184 183 const struct xfs_fsmap *keys, 185 184 struct xfs_getfsmap_info *info); 185 + sector_t nr_sectors; 186 186 }; 187 187 188 188 /* Compare two getfsmap device handlers. */ ··· 254 252 const struct xfs_rmap_irec *rec, 255 253 xfs_daddr_t rec_daddr) 256 254 { 257 - if (info->low_daddr != -1ULL) 255 + if (info->low_daddr != XFS_BUF_DADDR_NULL) 258 256 return rec_daddr < info->low_daddr; 259 257 if (info->low.rm_blockcount) 260 258 return xfs_rmap_compare(rec, &info->low) < 0; ··· 295 293 info->next_daddr = rec_daddr; 296 294 return 0; 297 295 } 296 + 297 + /* 298 + * For an info->last query, we're looking for a gap between the last 299 + * mapping emitted and the high key specified by userspace. If the 300 + * user's query spans less than 1 fsblock, then info->high and 301 + * info->low will have the same rm_startblock, which causes rec_daddr 302 + * and next_daddr to be the same. Therefore, use the end_daddr that 303 + * we calculated from userspace's high key to synthesize the record. 304 + * Note that if the btree query found a mapping, there won't be a gap. 305 + */ 306 + if (info->last && info->end_daddr != XFS_BUF_DADDR_NULL) 307 + rec_daddr = info->end_daddr; 298 308 299 309 /* Are we just counting mappings? */ 300 310 if (info->head->fmh_count == 0) { ··· 918 904 919 905 /* Set up our device handlers. */ 920 906 memset(handlers, 0, sizeof(handlers)); 907 + handlers[0].nr_sectors = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); 921 908 handlers[0].dev = new_encode_dev(mp->m_ddev_targp->bt_dev); 922 909 if (use_rmap) 923 910 handlers[0].fn = xfs_getfsmap_datadev_rmapbt; 924 911 else 925 912 handlers[0].fn = xfs_getfsmap_datadev_bnobt; 926 913 if (mp->m_logdev_targp != mp->m_ddev_targp) { 914 + handlers[1].nr_sectors = XFS_FSB_TO_BB(mp, 915 + mp->m_sb.sb_logblocks); 927 916 handlers[1].dev = new_encode_dev(mp->m_logdev_targp->bt_dev); 928 917 handlers[1].fn = xfs_getfsmap_logdev; 929 918 } 930 919 #ifdef CONFIG_XFS_RT 931 920 if (mp->m_rtdev_targp) { 921 + handlers[2].nr_sectors = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks); 932 922 handlers[2].dev = new_encode_dev(mp->m_rtdev_targp->bt_dev); 933 923 handlers[2].fn = xfs_getfsmap_rtdev_rtbitmap; 934 924 } ··· 964 946 965 947 info.next_daddr = head->fmh_keys[0].fmr_physical + 966 948 head->fmh_keys[0].fmr_length; 949 + info.end_daddr = XFS_BUF_DADDR_NULL; 967 950 info.fsmap_recs = fsmap_recs; 968 951 info.head = head; 969 952 ··· 985 966 * low key, zero out the low key so that we get 986 967 * everything from the beginning. 987 968 */ 988 - if (handlers[i].dev == head->fmh_keys[1].fmr_device) 969 + if (handlers[i].dev == head->fmh_keys[1].fmr_device) { 989 970 dkeys[1] = head->fmh_keys[1]; 971 + info.end_daddr = min(handlers[i].nr_sectors - 1, 972 + dkeys[1].fmr_physical); 973 + } 990 974 if (handlers[i].dev > head->fmh_keys[0].fmr_device) 991 975 memset(&dkeys[0], 0, sizeof(struct xfs_fsmap)); 992 976 ··· 1005 983 info.dev = handlers[i].dev; 1006 984 info.last = false; 1007 985 info.pag = NULL; 1008 - info.low_daddr = -1ULL; 986 + info.low_daddr = XFS_BUF_DADDR_NULL; 1009 987 info.low.rm_blockcount = 0; 1010 988 error = handlers[i].fn(tp, dkeys, &info); 1011 989 if (error)

+65 -13

fs/xfs/xfs_rtalloc.c

··· 785 785 } 786 786 787 787 /* 788 + * If we changed the rt extent size (meaning there was no rt volume previously) 789 + * and the root directory had EXTSZINHERIT and RTINHERIT set, it's possible 790 + * that the extent size hint on the root directory is no longer congruent with 791 + * the new rt extent size. Log the rootdir inode to fix this. 792 + */ 793 + static int 794 + xfs_growfs_rt_fixup_extsize( 795 + struct xfs_mount *mp) 796 + { 797 + struct xfs_inode *ip = mp->m_rootip; 798 + struct xfs_trans *tp; 799 + int error = 0; 800 + 801 + xfs_ilock(ip, XFS_IOLOCK_EXCL); 802 + if (!(ip->i_diflags & XFS_DIFLAG_RTINHERIT) || 803 + !(ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT)) 804 + goto out_iolock; 805 + 806 + error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_ichange, 0, 0, false, 807 + &tp); 808 + if (error) 809 + goto out_iolock; 810 + 811 + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 812 + error = xfs_trans_commit(tp); 813 + xfs_iunlock(ip, XFS_ILOCK_EXCL); 814 + 815 + out_iolock: 816 + xfs_iunlock(ip, XFS_IOLOCK_EXCL); 817 + return error; 818 + } 819 + 820 + /* 788 821 * Visible (exported) functions. 789 822 */ 790 823 ··· 845 812 xfs_extlen_t rsumblocks; /* current number of rt summary blks */ 846 813 xfs_sb_t *sbp; /* old superblock */ 847 814 uint8_t *rsum_cache; /* old summary cache */ 815 + xfs_agblock_t old_rextsize = mp->m_sb.sb_rextsize; 848 816 849 817 sbp = &mp->m_sb; 850 818 ··· 855 821 /* Needs to have been mounted with an rt device. */ 856 822 if (!XFS_IS_REALTIME_MOUNT(mp)) 857 823 return -EINVAL; 824 + 825 + if (!mutex_trylock(&mp->m_growlock)) 826 + return -EWOULDBLOCK; 858 827 /* 859 828 * Mount should fail if the rt bitmap/summary files don't load, but 860 829 * we'll check anyway. 861 830 */ 831 + error = -EINVAL; 862 832 if (!mp->m_rbmip || !mp->m_rsumip) 863 - return -EINVAL; 833 + goto out_unlock; 864 834 865 835 /* Shrink not supported. */ 866 836 if (in->newblocks <= sbp->sb_rblocks) 867 - return -EINVAL; 837 + goto out_unlock; 868 838 869 839 /* Can only change rt extent size when adding rt volume. */ 870 840 if (sbp->sb_rblocks > 0 && in->extsize != sbp->sb_rextsize) 871 - return -EINVAL; 841 + goto out_unlock; 872 842 873 843 /* Range check the extent size. */ 874 844 if (XFS_FSB_TO_B(mp, in->extsize) > XFS_MAX_RTEXTSIZE || 875 845 XFS_FSB_TO_B(mp, in->extsize) < XFS_MIN_RTEXTSIZE) 876 - return -EINVAL; 846 + goto out_unlock; 877 847 878 848 /* Unsupported realtime features. */ 849 + error = -EOPNOTSUPP; 879 850 if (xfs_has_rmapbt(mp) || xfs_has_reflink(mp) || xfs_has_quota(mp)) 880 - return -EOPNOTSUPP; 851 + goto out_unlock; 881 852 882 853 nrblocks = in->newblocks; 883 854 error = xfs_sb_validate_fsb_count(sbp, nrblocks); 884 855 if (error) 885 - return error; 856 + goto out_unlock; 886 857 /* 887 858 * Read in the last block of the device, make sure it exists. 888 859 */ ··· 895 856 XFS_FSB_TO_BB(mp, nrblocks - 1), 896 857 XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL); 897 858 if (error) 898 - return error; 859 + goto out_unlock; 899 860 xfs_buf_relse(bp); 900 861 901 862 /* ··· 903 864 */ 904 865 nrextents = nrblocks; 905 866 do_div(nrextents, in->extsize); 906 - if (!xfs_validate_rtextents(nrextents)) 907 - return -EINVAL; 867 + if (!xfs_validate_rtextents(nrextents)) { 868 + error = -EINVAL; 869 + goto out_unlock; 870 + } 908 871 nrbmblocks = xfs_rtbitmap_blockcount(mp, nrextents); 909 872 nrextslog = xfs_compute_rextslog(nrextents); 910 873 nrsumlevels = nrextslog + 1; ··· 917 876 * the log. This prevents us from getting a log overflow, 918 877 * since we'll log basically the whole summary file at once. 919 878 */ 920 - if (nrsumblocks > (mp->m_sb.sb_logblocks >> 1)) 921 - return -EINVAL; 879 + if (nrsumblocks > (mp->m_sb.sb_logblocks >> 1)) { 880 + error = -EINVAL; 881 + goto out_unlock; 882 + } 883 + 922 884 /* 923 885 * Get the old block counts for bitmap and summary inodes. 924 886 * These can't change since other growfs callers are locked out. ··· 933 889 */ 934 890 error = xfs_growfs_rt_alloc(mp, rbmblocks, nrbmblocks, mp->m_rbmip); 935 891 if (error) 936 - return error; 892 + goto out_unlock; 937 893 error = xfs_growfs_rt_alloc(mp, rsumblocks, nrsumblocks, mp->m_rsumip); 938 894 if (error) 939 - return error; 895 + goto out_unlock; 940 896 941 897 rsum_cache = mp->m_rsum_cache; 942 898 if (nrbmblocks != sbp->sb_rbmblocks) ··· 1080 1036 if (error) 1081 1037 goto out_free; 1082 1038 1039 + if (old_rextsize != in->extsize) { 1040 + error = xfs_growfs_rt_fixup_extsize(mp); 1041 + if (error) 1042 + goto out_free; 1043 + } 1044 + 1083 1045 /* Update secondary superblocks now the physical grow has completed */ 1084 1046 error = xfs_update_secondary_sbs(mp); 1085 1047 ··· 1109 1059 } 1110 1060 } 1111 1061 1062 + out_unlock: 1063 + mutex_unlock(&mp->m_growlock); 1112 1064 return error; 1113 1065 } 1114 1066